{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 24.752475247524753, "eval_steps": 500, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0024752475247524753, "grad_norm": 1.395677924156189, "learning_rate": 1.5000000000000002e-07, "loss": 2.9798, "step": 10 }, { "epoch": 0.0049504950495049506, "grad_norm": 1.0540428161621094, "learning_rate": 3.1666666666666667e-07, "loss": 2.9577, "step": 20 }, { "epoch": 0.007425742574257425, "grad_norm": 0.5512261390686035, "learning_rate": 4.833333333333334e-07, "loss": 2.9245, "step": 30 }, { "epoch": 0.009900990099009901, "grad_norm": 0.5100216269493103, "learning_rate": 6.5e-07, "loss": 2.8979, "step": 40 }, { "epoch": 0.012376237623762377, "grad_norm": 0.4579915702342987, "learning_rate": 8.166666666666666e-07, "loss": 2.8867, "step": 50 }, { "epoch": 0.01485148514851485, "grad_norm": 0.3977307379245758, "learning_rate": 9.833333333333334e-07, "loss": 2.8664, "step": 60 }, { "epoch": 0.017326732673267328, "grad_norm": 0.3784933388233185, "learning_rate": 1.15e-06, "loss": 2.854, "step": 70 }, { "epoch": 0.019801980198019802, "grad_norm": 0.3765506446361542, "learning_rate": 1.3166666666666668e-06, "loss": 2.8457, "step": 80 }, { "epoch": 0.022277227722772276, "grad_norm": 0.3878174126148224, "learning_rate": 1.4833333333333335e-06, "loss": 2.8311, "step": 90 }, { "epoch": 0.024752475247524754, "grad_norm": 0.3838914632797241, "learning_rate": 1.65e-06, "loss": 2.8198, "step": 100 }, { "epoch": 0.027227722772277228, "grad_norm": 0.39541131258010864, "learning_rate": 1.816666666666667e-06, "loss": 2.8147, "step": 110 }, { "epoch": 0.0297029702970297, "grad_norm": 0.428875595331192, "learning_rate": 1.9833333333333335e-06, "loss": 2.7991, "step": 120 }, { "epoch": 0.03217821782178218, "grad_norm": 0.4117491841316223, "learning_rate": 2.1499999999999997e-06, "loss": 2.7897, "step": 130 }, { "epoch": 0.034653465346534656, "grad_norm": 0.44901010394096375, "learning_rate": 2.316666666666667e-06, "loss": 2.78, "step": 140 }, { "epoch": 0.03712871287128713, "grad_norm": 0.4373077154159546, "learning_rate": 2.4833333333333334e-06, "loss": 2.7671, "step": 150 }, { "epoch": 0.039603960396039604, "grad_norm": 0.45976722240448, "learning_rate": 2.65e-06, "loss": 2.7541, "step": 160 }, { "epoch": 0.04207920792079208, "grad_norm": 0.5550143122673035, "learning_rate": 2.8166666666666667e-06, "loss": 2.7327, "step": 170 }, { "epoch": 0.04455445544554455, "grad_norm": 0.5641716718673706, "learning_rate": 2.9833333333333333e-06, "loss": 2.7107, "step": 180 }, { "epoch": 0.04702970297029703, "grad_norm": 0.6010967493057251, "learning_rate": 3.1500000000000003e-06, "loss": 2.682, "step": 190 }, { "epoch": 0.04950495049504951, "grad_norm": 0.654480516910553, "learning_rate": 3.3166666666666665e-06, "loss": 2.6343, "step": 200 }, { "epoch": 0.05198019801980198, "grad_norm": 0.6578664183616638, "learning_rate": 3.4833333333333336e-06, "loss": 2.5947, "step": 210 }, { "epoch": 0.054455445544554455, "grad_norm": 1.5018725395202637, "learning_rate": 3.6499999999999998e-06, "loss": 2.5314, "step": 220 }, { "epoch": 0.05693069306930693, "grad_norm": 0.948902428150177, "learning_rate": 3.816666666666667e-06, "loss": 2.4774, "step": 230 }, { "epoch": 0.0594059405940594, "grad_norm": 1.2979158163070679, "learning_rate": 3.983333333333333e-06, "loss": 2.4208, "step": 240 }, { "epoch": 0.06188118811881188, "grad_norm": 1.0176002979278564, "learning_rate": 4.15e-06, "loss": 2.3634, "step": 250 }, { "epoch": 0.06435643564356436, "grad_norm": 0.8912397623062134, "learning_rate": 4.316666666666667e-06, "loss": 2.2898, "step": 260 }, { "epoch": 0.06683168316831684, "grad_norm": 0.9418484568595886, "learning_rate": 4.483333333333334e-06, "loss": 2.2184, "step": 270 }, { "epoch": 0.06930693069306931, "grad_norm": 0.9953831434249878, "learning_rate": 4.65e-06, "loss": 2.1435, "step": 280 }, { "epoch": 0.07178217821782178, "grad_norm": 1.0783240795135498, "learning_rate": 4.816666666666667e-06, "loss": 2.0791, "step": 290 }, { "epoch": 0.07425742574257425, "grad_norm": 1.4691274166107178, "learning_rate": 4.983333333333334e-06, "loss": 2.0175, "step": 300 }, { "epoch": 0.07673267326732673, "grad_norm": 1.390291452407837, "learning_rate": 5.15e-06, "loss": 1.9559, "step": 310 }, { "epoch": 0.07920792079207921, "grad_norm": 1.6206153631210327, "learning_rate": 5.316666666666667e-06, "loss": 1.9227, "step": 320 }, { "epoch": 0.08168316831683169, "grad_norm": 1.7950516939163208, "learning_rate": 5.483333333333333e-06, "loss": 1.8782, "step": 330 }, { "epoch": 0.08415841584158416, "grad_norm": 1.2521597146987915, "learning_rate": 5.65e-06, "loss": 1.8393, "step": 340 }, { "epoch": 0.08663366336633663, "grad_norm": 1.4187325239181519, "learning_rate": 5.816666666666667e-06, "loss": 1.805, "step": 350 }, { "epoch": 0.0891089108910891, "grad_norm": 1.771992802619934, "learning_rate": 5.983333333333334e-06, "loss": 1.7793, "step": 360 }, { "epoch": 0.09158415841584158, "grad_norm": 1.859925627708435, "learning_rate": 6.15e-06, "loss": 1.7446, "step": 370 }, { "epoch": 0.09405940594059406, "grad_norm": 1.452436923980713, "learning_rate": 6.316666666666667e-06, "loss": 1.7279, "step": 380 }, { "epoch": 0.09653465346534654, "grad_norm": 1.3393385410308838, "learning_rate": 6.4833333333333345e-06, "loss": 1.7102, "step": 390 }, { "epoch": 0.09900990099009901, "grad_norm": 1.3568516969680786, "learning_rate": 6.650000000000001e-06, "loss": 1.6928, "step": 400 }, { "epoch": 0.10148514851485149, "grad_norm": 1.7562252283096313, "learning_rate": 6.816666666666667e-06, "loss": 1.688, "step": 410 }, { "epoch": 0.10396039603960396, "grad_norm": 1.608577847480774, "learning_rate": 6.983333333333333e-06, "loss": 1.6862, "step": 420 }, { "epoch": 0.10643564356435643, "grad_norm": 1.4633078575134277, "learning_rate": 7.15e-06, "loss": 1.6657, "step": 430 }, { "epoch": 0.10891089108910891, "grad_norm": 1.3937551975250244, "learning_rate": 7.316666666666667e-06, "loss": 1.6429, "step": 440 }, { "epoch": 0.11138613861386139, "grad_norm": 1.2478042840957642, "learning_rate": 7.483333333333334e-06, "loss": 1.634, "step": 450 }, { "epoch": 0.11386138613861387, "grad_norm": 1.416061282157898, "learning_rate": 7.65e-06, "loss": 1.6208, "step": 460 }, { "epoch": 0.11633663366336634, "grad_norm": 1.0041433572769165, "learning_rate": 7.816666666666666e-06, "loss": 1.6113, "step": 470 }, { "epoch": 0.1188118811881188, "grad_norm": 1.0998833179473877, "learning_rate": 7.983333333333334e-06, "loss": 1.6055, "step": 480 }, { "epoch": 0.12128712871287128, "grad_norm": 0.8029158115386963, "learning_rate": 8.15e-06, "loss": 1.5907, "step": 490 }, { "epoch": 0.12376237623762376, "grad_norm": 1.618769884109497, "learning_rate": 8.316666666666668e-06, "loss": 1.5881, "step": 500 }, { "epoch": 0.12623762376237624, "grad_norm": 1.402997374534607, "learning_rate": 8.483333333333334e-06, "loss": 1.575, "step": 510 }, { "epoch": 0.12871287128712872, "grad_norm": 1.3618770837783813, "learning_rate": 8.65e-06, "loss": 1.5723, "step": 520 }, { "epoch": 0.1311881188118812, "grad_norm": 1.335325002670288, "learning_rate": 8.816666666666668e-06, "loss": 1.5729, "step": 530 }, { "epoch": 0.13366336633663367, "grad_norm": 1.1966466903686523, "learning_rate": 8.983333333333334e-06, "loss": 1.5603, "step": 540 }, { "epoch": 0.13613861386138615, "grad_norm": 1.0352104902267456, "learning_rate": 9.15e-06, "loss": 1.551, "step": 550 }, { "epoch": 0.13861386138613863, "grad_norm": 1.220436453819275, "learning_rate": 9.316666666666667e-06, "loss": 1.5385, "step": 560 }, { "epoch": 0.14108910891089108, "grad_norm": 1.121360182762146, "learning_rate": 9.483333333333335e-06, "loss": 1.5274, "step": 570 }, { "epoch": 0.14356435643564355, "grad_norm": 0.9769999384880066, "learning_rate": 9.65e-06, "loss": 1.5195, "step": 580 }, { "epoch": 0.14603960396039603, "grad_norm": 1.058780550956726, "learning_rate": 9.816666666666667e-06, "loss": 1.5116, "step": 590 }, { "epoch": 0.1485148514851485, "grad_norm": 1.2653067111968994, "learning_rate": 9.983333333333333e-06, "loss": 1.51, "step": 600 }, { "epoch": 0.15099009900990099, "grad_norm": 1.1361687183380127, "learning_rate": 1.0150000000000001e-05, "loss": 1.5082, "step": 610 }, { "epoch": 0.15346534653465346, "grad_norm": 1.0683202743530273, "learning_rate": 1.0316666666666667e-05, "loss": 1.4983, "step": 620 }, { "epoch": 0.15594059405940594, "grad_norm": 1.1581385135650635, "learning_rate": 1.0483333333333333e-05, "loss": 1.4977, "step": 630 }, { "epoch": 0.15841584158415842, "grad_norm": 1.2422620058059692, "learning_rate": 1.065e-05, "loss": 1.4852, "step": 640 }, { "epoch": 0.1608910891089109, "grad_norm": 1.0089102983474731, "learning_rate": 1.0816666666666666e-05, "loss": 1.4727, "step": 650 }, { "epoch": 0.16336633663366337, "grad_norm": 1.132785677909851, "learning_rate": 1.0983333333333334e-05, "loss": 1.4748, "step": 660 }, { "epoch": 0.16584158415841585, "grad_norm": 1.0035827159881592, "learning_rate": 1.115e-05, "loss": 1.4708, "step": 670 }, { "epoch": 0.16831683168316833, "grad_norm": 1.0395997762680054, "learning_rate": 1.1316666666666668e-05, "loss": 1.4631, "step": 680 }, { "epoch": 0.1707920792079208, "grad_norm": 1.0495045185089111, "learning_rate": 1.1483333333333334e-05, "loss": 1.4522, "step": 690 }, { "epoch": 0.17326732673267325, "grad_norm": 1.073765754699707, "learning_rate": 1.1650000000000002e-05, "loss": 1.4525, "step": 700 }, { "epoch": 0.17574257425742573, "grad_norm": 1.0139641761779785, "learning_rate": 1.1816666666666668e-05, "loss": 1.4522, "step": 710 }, { "epoch": 0.1782178217821782, "grad_norm": 0.9296018481254578, "learning_rate": 1.1983333333333334e-05, "loss": 1.4446, "step": 720 }, { "epoch": 0.1806930693069307, "grad_norm": 0.9662140011787415, "learning_rate": 1.215e-05, "loss": 1.4291, "step": 730 }, { "epoch": 0.18316831683168316, "grad_norm": 1.1718559265136719, "learning_rate": 1.2316666666666667e-05, "loss": 1.4295, "step": 740 }, { "epoch": 0.18564356435643564, "grad_norm": 1.2366734743118286, "learning_rate": 1.2483333333333335e-05, "loss": 1.4266, "step": 750 }, { "epoch": 0.18811881188118812, "grad_norm": 1.0506682395935059, "learning_rate": 1.2650000000000001e-05, "loss": 1.4183, "step": 760 }, { "epoch": 0.1905940594059406, "grad_norm": 1.342199444770813, "learning_rate": 1.2816666666666669e-05, "loss": 1.4258, "step": 770 }, { "epoch": 0.19306930693069307, "grad_norm": 1.1012799739837646, "learning_rate": 1.2983333333333333e-05, "loss": 1.4146, "step": 780 }, { "epoch": 0.19554455445544555, "grad_norm": 0.925972580909729, "learning_rate": 1.3150000000000001e-05, "loss": 1.4111, "step": 790 }, { "epoch": 0.19801980198019803, "grad_norm": 0.9802278280258179, "learning_rate": 1.3316666666666666e-05, "loss": 1.4031, "step": 800 }, { "epoch": 0.2004950495049505, "grad_norm": 0.9844738245010376, "learning_rate": 1.3483333333333334e-05, "loss": 1.3967, "step": 810 }, { "epoch": 0.20297029702970298, "grad_norm": 0.7295616865158081, "learning_rate": 1.3650000000000001e-05, "loss": 1.3965, "step": 820 }, { "epoch": 0.20544554455445543, "grad_norm": 0.7668136954307556, "learning_rate": 1.3816666666666666e-05, "loss": 1.3963, "step": 830 }, { "epoch": 0.2079207920792079, "grad_norm": 1.2452762126922607, "learning_rate": 1.3983333333333334e-05, "loss": 1.3889, "step": 840 }, { "epoch": 0.2103960396039604, "grad_norm": 0.9372097849845886, "learning_rate": 1.415e-05, "loss": 1.3805, "step": 850 }, { "epoch": 0.21287128712871287, "grad_norm": 0.9866706132888794, "learning_rate": 1.4316666666666668e-05, "loss": 1.3752, "step": 860 }, { "epoch": 0.21534653465346534, "grad_norm": 0.9075079560279846, "learning_rate": 1.4483333333333334e-05, "loss": 1.3758, "step": 870 }, { "epoch": 0.21782178217821782, "grad_norm": 1.3095142841339111, "learning_rate": 1.465e-05, "loss": 1.3754, "step": 880 }, { "epoch": 0.2202970297029703, "grad_norm": 1.000935673713684, "learning_rate": 1.4816666666666668e-05, "loss": 1.3687, "step": 890 }, { "epoch": 0.22277227722772278, "grad_norm": 1.0232343673706055, "learning_rate": 1.4983333333333336e-05, "loss": 1.3594, "step": 900 }, { "epoch": 0.22524752475247525, "grad_norm": 1.0880979299545288, "learning_rate": 1.515e-05, "loss": 1.3556, "step": 910 }, { "epoch": 0.22772277227722773, "grad_norm": 0.925398588180542, "learning_rate": 1.531666666666667e-05, "loss": 1.3544, "step": 920 }, { "epoch": 0.2301980198019802, "grad_norm": 1.0560212135314941, "learning_rate": 1.548333333333333e-05, "loss": 1.3525, "step": 930 }, { "epoch": 0.23267326732673269, "grad_norm": 0.8170042634010315, "learning_rate": 1.565e-05, "loss": 1.3452, "step": 940 }, { "epoch": 0.23514851485148514, "grad_norm": 0.885037362575531, "learning_rate": 1.5816666666666667e-05, "loss": 1.3378, "step": 950 }, { "epoch": 0.2376237623762376, "grad_norm": 0.9950456619262695, "learning_rate": 1.5983333333333333e-05, "loss": 1.3393, "step": 960 }, { "epoch": 0.2400990099009901, "grad_norm": 1.039986491203308, "learning_rate": 1.6150000000000003e-05, "loss": 1.3266, "step": 970 }, { "epoch": 0.24257425742574257, "grad_norm": 1.041464924812317, "learning_rate": 1.6316666666666666e-05, "loss": 1.3293, "step": 980 }, { "epoch": 0.24504950495049505, "grad_norm": 0.995135486125946, "learning_rate": 1.6483333333333335e-05, "loss": 1.3206, "step": 990 }, { "epoch": 0.24752475247524752, "grad_norm": 1.1469990015029907, "learning_rate": 1.665e-05, "loss": 1.3155, "step": 1000 }, { "epoch": 0.25, "grad_norm": 0.962248682975769, "learning_rate": 1.6816666666666668e-05, "loss": 1.3087, "step": 1010 }, { "epoch": 0.2524752475247525, "grad_norm": 0.9307863712310791, "learning_rate": 1.6983333333333334e-05, "loss": 1.3059, "step": 1020 }, { "epoch": 0.25495049504950495, "grad_norm": 1.0303689241409302, "learning_rate": 1.7150000000000004e-05, "loss": 1.2932, "step": 1030 }, { "epoch": 0.25742574257425743, "grad_norm": 0.9930600523948669, "learning_rate": 1.7316666666666666e-05, "loss": 1.2921, "step": 1040 }, { "epoch": 0.2599009900990099, "grad_norm": 1.113606572151184, "learning_rate": 1.7483333333333336e-05, "loss": 1.2882, "step": 1050 }, { "epoch": 0.2623762376237624, "grad_norm": 0.8283010125160217, "learning_rate": 1.765e-05, "loss": 1.2774, "step": 1060 }, { "epoch": 0.26485148514851486, "grad_norm": 0.7775421142578125, "learning_rate": 1.781666666666667e-05, "loss": 1.2791, "step": 1070 }, { "epoch": 0.26732673267326734, "grad_norm": 0.9408491849899292, "learning_rate": 1.7983333333333335e-05, "loss": 1.2668, "step": 1080 }, { "epoch": 0.2698019801980198, "grad_norm": 1.0214544534683228, "learning_rate": 1.815e-05, "loss": 1.2612, "step": 1090 }, { "epoch": 0.2722772277227723, "grad_norm": 0.9130519032478333, "learning_rate": 1.8316666666666667e-05, "loss": 1.2598, "step": 1100 }, { "epoch": 0.2747524752475248, "grad_norm": 1.298579454421997, "learning_rate": 1.8483333333333333e-05, "loss": 1.2512, "step": 1110 }, { "epoch": 0.27722772277227725, "grad_norm": 1.5179104804992676, "learning_rate": 1.865e-05, "loss": 1.2466, "step": 1120 }, { "epoch": 0.27970297029702973, "grad_norm": 1.1765742301940918, "learning_rate": 1.881666666666667e-05, "loss": 1.2399, "step": 1130 }, { "epoch": 0.28217821782178215, "grad_norm": 1.1457933187484741, "learning_rate": 1.8983333333333335e-05, "loss": 1.24, "step": 1140 }, { "epoch": 0.28465346534653463, "grad_norm": 0.9080646634101868, "learning_rate": 1.915e-05, "loss": 1.2292, "step": 1150 }, { "epoch": 0.2871287128712871, "grad_norm": 0.839780867099762, "learning_rate": 1.9316666666666668e-05, "loss": 1.226, "step": 1160 }, { "epoch": 0.2896039603960396, "grad_norm": 1.057437777519226, "learning_rate": 1.9483333333333334e-05, "loss": 1.2163, "step": 1170 }, { "epoch": 0.29207920792079206, "grad_norm": 1.0056885480880737, "learning_rate": 1.9650000000000003e-05, "loss": 1.219, "step": 1180 }, { "epoch": 0.29455445544554454, "grad_norm": 0.868206799030304, "learning_rate": 1.9816666666666666e-05, "loss": 1.2051, "step": 1190 }, { "epoch": 0.297029702970297, "grad_norm": 0.9771503806114197, "learning_rate": 1.9983333333333336e-05, "loss": 1.2008, "step": 1200 }, { "epoch": 0.2995049504950495, "grad_norm": 0.8787959218025208, "learning_rate": 2.0150000000000002e-05, "loss": 1.1956, "step": 1210 }, { "epoch": 0.30198019801980197, "grad_norm": 0.9565332531929016, "learning_rate": 2.0316666666666668e-05, "loss": 1.1907, "step": 1220 }, { "epoch": 0.30445544554455445, "grad_norm": 0.8905620574951172, "learning_rate": 2.0483333333333334e-05, "loss": 1.1905, "step": 1230 }, { "epoch": 0.3069306930693069, "grad_norm": 1.0729780197143555, "learning_rate": 2.065e-05, "loss": 1.1794, "step": 1240 }, { "epoch": 0.3094059405940594, "grad_norm": 0.9191785454750061, "learning_rate": 2.0816666666666667e-05, "loss": 1.1733, "step": 1250 }, { "epoch": 0.3118811881188119, "grad_norm": 0.9088793992996216, "learning_rate": 2.0983333333333336e-05, "loss": 1.1764, "step": 1260 }, { "epoch": 0.31435643564356436, "grad_norm": 0.9121751189231873, "learning_rate": 2.115e-05, "loss": 1.1697, "step": 1270 }, { "epoch": 0.31683168316831684, "grad_norm": 0.9851081967353821, "learning_rate": 2.131666666666667e-05, "loss": 1.1649, "step": 1280 }, { "epoch": 0.3193069306930693, "grad_norm": 0.9680418968200684, "learning_rate": 2.148333333333333e-05, "loss": 1.1621, "step": 1290 }, { "epoch": 0.3217821782178218, "grad_norm": 0.971192479133606, "learning_rate": 2.165e-05, "loss": 1.1503, "step": 1300 }, { "epoch": 0.32425742574257427, "grad_norm": 1.10955011844635, "learning_rate": 2.1816666666666667e-05, "loss": 1.1424, "step": 1310 }, { "epoch": 0.32673267326732675, "grad_norm": 1.0068061351776123, "learning_rate": 2.1983333333333333e-05, "loss": 1.1506, "step": 1320 }, { "epoch": 0.3292079207920792, "grad_norm": 0.959693968296051, "learning_rate": 2.215e-05, "loss": 1.1389, "step": 1330 }, { "epoch": 0.3316831683168317, "grad_norm": 0.9581379294395447, "learning_rate": 2.231666666666667e-05, "loss": 1.1387, "step": 1340 }, { "epoch": 0.3341584158415842, "grad_norm": 0.9816709756851196, "learning_rate": 2.2483333333333335e-05, "loss": 1.1269, "step": 1350 }, { "epoch": 0.33663366336633666, "grad_norm": 1.2685049772262573, "learning_rate": 2.265e-05, "loss": 1.1227, "step": 1360 }, { "epoch": 0.33910891089108913, "grad_norm": 0.9523789286613464, "learning_rate": 2.2816666666666668e-05, "loss": 1.1168, "step": 1370 }, { "epoch": 0.3415841584158416, "grad_norm": 0.9049601554870605, "learning_rate": 2.2983333333333334e-05, "loss": 1.1109, "step": 1380 }, { "epoch": 0.34405940594059403, "grad_norm": 0.9570116996765137, "learning_rate": 2.3150000000000004e-05, "loss": 1.1032, "step": 1390 }, { "epoch": 0.3465346534653465, "grad_norm": 1.0677274465560913, "learning_rate": 2.3316666666666666e-05, "loss": 1.103, "step": 1400 }, { "epoch": 0.349009900990099, "grad_norm": 1.005630612373352, "learning_rate": 2.3483333333333336e-05, "loss": 1.0978, "step": 1410 }, { "epoch": 0.35148514851485146, "grad_norm": 1.0137518644332886, "learning_rate": 2.365e-05, "loss": 1.0891, "step": 1420 }, { "epoch": 0.35396039603960394, "grad_norm": 0.9914647936820984, "learning_rate": 2.381666666666667e-05, "loss": 1.0851, "step": 1430 }, { "epoch": 0.3564356435643564, "grad_norm": 1.0303446054458618, "learning_rate": 2.3983333333333335e-05, "loss": 1.0823, "step": 1440 }, { "epoch": 0.3589108910891089, "grad_norm": 1.09994375705719, "learning_rate": 2.415e-05, "loss": 1.0755, "step": 1450 }, { "epoch": 0.3613861386138614, "grad_norm": 0.9747649431228638, "learning_rate": 2.4316666666666667e-05, "loss": 1.0734, "step": 1460 }, { "epoch": 0.36386138613861385, "grad_norm": 0.9992278814315796, "learning_rate": 2.4483333333333333e-05, "loss": 1.0761, "step": 1470 }, { "epoch": 0.36633663366336633, "grad_norm": 1.2236725091934204, "learning_rate": 2.465e-05, "loss": 1.0666, "step": 1480 }, { "epoch": 0.3688118811881188, "grad_norm": 1.163537859916687, "learning_rate": 2.481666666666667e-05, "loss": 1.0733, "step": 1490 }, { "epoch": 0.3712871287128713, "grad_norm": 1.242422342300415, "learning_rate": 2.4983333333333335e-05, "loss": 1.0651, "step": 1500 }, { "epoch": 0.37376237623762376, "grad_norm": 1.0669962167739868, "learning_rate": 2.515e-05, "loss": 1.0535, "step": 1510 }, { "epoch": 0.37623762376237624, "grad_norm": 0.9074938893318176, "learning_rate": 2.5316666666666668e-05, "loss": 1.0527, "step": 1520 }, { "epoch": 0.3787128712871287, "grad_norm": 1.06637442111969, "learning_rate": 2.5483333333333337e-05, "loss": 1.0533, "step": 1530 }, { "epoch": 0.3811881188118812, "grad_norm": 1.0266623497009277, "learning_rate": 2.5650000000000003e-05, "loss": 1.0418, "step": 1540 }, { "epoch": 0.38366336633663367, "grad_norm": 1.1305241584777832, "learning_rate": 2.5816666666666666e-05, "loss": 1.0408, "step": 1550 }, { "epoch": 0.38613861386138615, "grad_norm": 0.9863414168357849, "learning_rate": 2.5983333333333336e-05, "loss": 1.0327, "step": 1560 }, { "epoch": 0.3886138613861386, "grad_norm": 1.016839623451233, "learning_rate": 2.6150000000000002e-05, "loss": 1.0299, "step": 1570 }, { "epoch": 0.3910891089108911, "grad_norm": 0.9396893978118896, "learning_rate": 2.6316666666666668e-05, "loss": 1.0271, "step": 1580 }, { "epoch": 0.3935643564356436, "grad_norm": 1.0424878597259521, "learning_rate": 2.648333333333333e-05, "loss": 1.0286, "step": 1590 }, { "epoch": 0.39603960396039606, "grad_norm": 1.2309951782226562, "learning_rate": 2.6650000000000004e-05, "loss": 1.02, "step": 1600 }, { "epoch": 0.39851485148514854, "grad_norm": 1.036134958267212, "learning_rate": 2.6816666666666667e-05, "loss": 1.0213, "step": 1610 }, { "epoch": 0.400990099009901, "grad_norm": 1.2803884744644165, "learning_rate": 2.6983333333333333e-05, "loss": 1.0127, "step": 1620 }, { "epoch": 0.4034653465346535, "grad_norm": 1.088560938835144, "learning_rate": 2.7150000000000003e-05, "loss": 1.0169, "step": 1630 }, { "epoch": 0.40594059405940597, "grad_norm": 1.0597134828567505, "learning_rate": 2.731666666666667e-05, "loss": 1.0073, "step": 1640 }, { "epoch": 0.4084158415841584, "grad_norm": 1.133959412574768, "learning_rate": 2.748333333333333e-05, "loss": 0.9991, "step": 1650 }, { "epoch": 0.41089108910891087, "grad_norm": 1.2052741050720215, "learning_rate": 2.7650000000000005e-05, "loss": 1.0043, "step": 1660 }, { "epoch": 0.41336633663366334, "grad_norm": 1.0984522104263306, "learning_rate": 2.7816666666666667e-05, "loss": 0.9978, "step": 1670 }, { "epoch": 0.4158415841584158, "grad_norm": 1.1527941226959229, "learning_rate": 2.7983333333333334e-05, "loss": 0.9947, "step": 1680 }, { "epoch": 0.4183168316831683, "grad_norm": 1.1273350715637207, "learning_rate": 2.815e-05, "loss": 0.9911, "step": 1690 }, { "epoch": 0.4207920792079208, "grad_norm": 1.17258620262146, "learning_rate": 2.831666666666667e-05, "loss": 0.989, "step": 1700 }, { "epoch": 0.42326732673267325, "grad_norm": 1.1348628997802734, "learning_rate": 2.8483333333333336e-05, "loss": 0.9928, "step": 1710 }, { "epoch": 0.42574257425742573, "grad_norm": 1.1875810623168945, "learning_rate": 2.865e-05, "loss": 0.9847, "step": 1720 }, { "epoch": 0.4282178217821782, "grad_norm": 1.1458216905593872, "learning_rate": 2.8816666666666668e-05, "loss": 0.9801, "step": 1730 }, { "epoch": 0.4306930693069307, "grad_norm": 1.0313748121261597, "learning_rate": 2.8983333333333334e-05, "loss": 0.9762, "step": 1740 }, { "epoch": 0.43316831683168316, "grad_norm": 1.2675520181655884, "learning_rate": 2.915e-05, "loss": 0.9691, "step": 1750 }, { "epoch": 0.43564356435643564, "grad_norm": 1.2447614669799805, "learning_rate": 2.931666666666667e-05, "loss": 0.97, "step": 1760 }, { "epoch": 0.4381188118811881, "grad_norm": 1.2002557516098022, "learning_rate": 2.9483333333333336e-05, "loss": 0.9754, "step": 1770 }, { "epoch": 0.4405940594059406, "grad_norm": 1.218618392944336, "learning_rate": 2.965e-05, "loss": 0.9701, "step": 1780 }, { "epoch": 0.4430693069306931, "grad_norm": 1.174850344657898, "learning_rate": 2.9816666666666672e-05, "loss": 0.9625, "step": 1790 }, { "epoch": 0.44554455445544555, "grad_norm": 1.2552765607833862, "learning_rate": 2.9983333333333335e-05, "loss": 0.9602, "step": 1800 }, { "epoch": 0.44801980198019803, "grad_norm": 1.107476830482483, "learning_rate": 3.015e-05, "loss": 0.955, "step": 1810 }, { "epoch": 0.4504950495049505, "grad_norm": 1.4107182025909424, "learning_rate": 3.0316666666666664e-05, "loss": 0.957, "step": 1820 }, { "epoch": 0.452970297029703, "grad_norm": 1.2224382162094116, "learning_rate": 3.0483333333333337e-05, "loss": 0.9527, "step": 1830 }, { "epoch": 0.45544554455445546, "grad_norm": 1.3035762310028076, "learning_rate": 3.065e-05, "loss": 0.947, "step": 1840 }, { "epoch": 0.45792079207920794, "grad_norm": 1.2339040040969849, "learning_rate": 3.0816666666666666e-05, "loss": 0.9476, "step": 1850 }, { "epoch": 0.4603960396039604, "grad_norm": 1.238082766532898, "learning_rate": 3.098333333333334e-05, "loss": 0.9457, "step": 1860 }, { "epoch": 0.4628712871287129, "grad_norm": 1.377088189125061, "learning_rate": 3.115e-05, "loss": 0.9393, "step": 1870 }, { "epoch": 0.46534653465346537, "grad_norm": 1.317585825920105, "learning_rate": 3.1316666666666664e-05, "loss": 0.9364, "step": 1880 }, { "epoch": 0.46782178217821785, "grad_norm": 1.3071209192276, "learning_rate": 3.148333333333334e-05, "loss": 0.9276, "step": 1890 }, { "epoch": 0.47029702970297027, "grad_norm": 1.2015198469161987, "learning_rate": 3.1650000000000004e-05, "loss": 0.9357, "step": 1900 }, { "epoch": 0.47277227722772275, "grad_norm": 1.357736349105835, "learning_rate": 3.181666666666667e-05, "loss": 0.9275, "step": 1910 }, { "epoch": 0.4752475247524752, "grad_norm": 1.2555736303329468, "learning_rate": 3.1983333333333336e-05, "loss": 0.9289, "step": 1920 }, { "epoch": 0.4777227722772277, "grad_norm": 1.3781661987304688, "learning_rate": 3.215e-05, "loss": 0.9296, "step": 1930 }, { "epoch": 0.4801980198019802, "grad_norm": 1.3356397151947021, "learning_rate": 3.231666666666667e-05, "loss": 0.9227, "step": 1940 }, { "epoch": 0.48267326732673266, "grad_norm": 1.2806953191757202, "learning_rate": 3.2483333333333335e-05, "loss": 0.925, "step": 1950 }, { "epoch": 0.48514851485148514, "grad_norm": 1.211538553237915, "learning_rate": 3.265e-05, "loss": 0.917, "step": 1960 }, { "epoch": 0.4876237623762376, "grad_norm": 1.3094066381454468, "learning_rate": 3.281666666666667e-05, "loss": 0.9306, "step": 1970 }, { "epoch": 0.4900990099009901, "grad_norm": 1.364881157875061, "learning_rate": 3.298333333333333e-05, "loss": 0.9109, "step": 1980 }, { "epoch": 0.49257425742574257, "grad_norm": 1.2794462442398071, "learning_rate": 3.3150000000000006e-05, "loss": 0.9145, "step": 1990 }, { "epoch": 0.49504950495049505, "grad_norm": 1.5520931482315063, "learning_rate": 3.3316666666666666e-05, "loss": 0.911, "step": 2000 }, { "epoch": 0.4975247524752475, "grad_norm": 1.3350379467010498, "learning_rate": 3.348333333333333e-05, "loss": 0.9062, "step": 2010 }, { "epoch": 0.5, "grad_norm": 1.2820281982421875, "learning_rate": 3.3650000000000005e-05, "loss": 0.9062, "step": 2020 }, { "epoch": 0.5024752475247525, "grad_norm": 1.3206771612167358, "learning_rate": 3.381666666666667e-05, "loss": 0.8976, "step": 2030 }, { "epoch": 0.504950495049505, "grad_norm": 1.294630765914917, "learning_rate": 3.398333333333333e-05, "loss": 0.9036, "step": 2040 }, { "epoch": 0.5074257425742574, "grad_norm": 1.2097718715667725, "learning_rate": 3.415e-05, "loss": 0.902, "step": 2050 }, { "epoch": 0.5099009900990099, "grad_norm": 1.3366013765335083, "learning_rate": 3.431666666666667e-05, "loss": 0.9004, "step": 2060 }, { "epoch": 0.5123762376237624, "grad_norm": 1.3527308702468872, "learning_rate": 3.4483333333333336e-05, "loss": 0.894, "step": 2070 }, { "epoch": 0.5148514851485149, "grad_norm": 1.4250304698944092, "learning_rate": 3.465e-05, "loss": 0.8911, "step": 2080 }, { "epoch": 0.5173267326732673, "grad_norm": 1.3285603523254395, "learning_rate": 3.481666666666667e-05, "loss": 0.8891, "step": 2090 }, { "epoch": 0.5198019801980198, "grad_norm": 1.4772595167160034, "learning_rate": 3.4983333333333334e-05, "loss": 0.8878, "step": 2100 }, { "epoch": 0.5222772277227723, "grad_norm": 1.2338303327560425, "learning_rate": 3.515e-05, "loss": 0.896, "step": 2110 }, { "epoch": 0.5247524752475248, "grad_norm": 1.2173882722854614, "learning_rate": 3.531666666666667e-05, "loss": 0.8818, "step": 2120 }, { "epoch": 0.5272277227722773, "grad_norm": 1.2471299171447754, "learning_rate": 3.548333333333333e-05, "loss": 0.8907, "step": 2130 }, { "epoch": 0.5297029702970297, "grad_norm": 1.3052787780761719, "learning_rate": 3.565e-05, "loss": 0.88, "step": 2140 }, { "epoch": 0.5321782178217822, "grad_norm": 1.4670430421829224, "learning_rate": 3.581666666666667e-05, "loss": 0.8824, "step": 2150 }, { "epoch": 0.5346534653465347, "grad_norm": 1.3533958196640015, "learning_rate": 3.598333333333334e-05, "loss": 0.8786, "step": 2160 }, { "epoch": 0.5371287128712872, "grad_norm": 1.342527985572815, "learning_rate": 3.615e-05, "loss": 0.8719, "step": 2170 }, { "epoch": 0.5396039603960396, "grad_norm": 1.2494943141937256, "learning_rate": 3.631666666666667e-05, "loss": 0.8742, "step": 2180 }, { "epoch": 0.5420792079207921, "grad_norm": 1.5197850465774536, "learning_rate": 3.648333333333334e-05, "loss": 0.87, "step": 2190 }, { "epoch": 0.5445544554455446, "grad_norm": 1.505329966545105, "learning_rate": 3.665e-05, "loss": 0.871, "step": 2200 }, { "epoch": 0.5470297029702971, "grad_norm": 1.2481048107147217, "learning_rate": 3.681666666666667e-05, "loss": 0.8691, "step": 2210 }, { "epoch": 0.5495049504950495, "grad_norm": 1.172852873802185, "learning_rate": 3.6983333333333336e-05, "loss": 0.8693, "step": 2220 }, { "epoch": 0.551980198019802, "grad_norm": 1.4062068462371826, "learning_rate": 3.715e-05, "loss": 0.8593, "step": 2230 }, { "epoch": 0.5544554455445545, "grad_norm": 1.3376946449279785, "learning_rate": 3.731666666666667e-05, "loss": 0.8608, "step": 2240 }, { "epoch": 0.556930693069307, "grad_norm": 1.468385100364685, "learning_rate": 3.7483333333333334e-05, "loss": 0.8616, "step": 2250 }, { "epoch": 0.5594059405940595, "grad_norm": 1.3171411752700806, "learning_rate": 3.765e-05, "loss": 0.86, "step": 2260 }, { "epoch": 0.5618811881188119, "grad_norm": 1.387757658958435, "learning_rate": 3.7816666666666667e-05, "loss": 0.8612, "step": 2270 }, { "epoch": 0.5643564356435643, "grad_norm": 1.4769197702407837, "learning_rate": 3.798333333333334e-05, "loss": 0.8539, "step": 2280 }, { "epoch": 0.5668316831683168, "grad_norm": 1.4352400302886963, "learning_rate": 3.8150000000000006e-05, "loss": 0.8632, "step": 2290 }, { "epoch": 0.5693069306930693, "grad_norm": 1.332161784172058, "learning_rate": 3.8316666666666665e-05, "loss": 0.8502, "step": 2300 }, { "epoch": 0.5717821782178217, "grad_norm": 1.285348892211914, "learning_rate": 3.848333333333334e-05, "loss": 0.8502, "step": 2310 }, { "epoch": 0.5742574257425742, "grad_norm": 1.323593258857727, "learning_rate": 3.8650000000000004e-05, "loss": 0.8462, "step": 2320 }, { "epoch": 0.5767326732673267, "grad_norm": 1.3401758670806885, "learning_rate": 3.881666666666667e-05, "loss": 0.8467, "step": 2330 }, { "epoch": 0.5792079207920792, "grad_norm": 1.3217445611953735, "learning_rate": 3.898333333333333e-05, "loss": 0.8457, "step": 2340 }, { "epoch": 0.5816831683168316, "grad_norm": 1.3982020616531372, "learning_rate": 3.915e-05, "loss": 0.8394, "step": 2350 }, { "epoch": 0.5841584158415841, "grad_norm": 1.373146414756775, "learning_rate": 3.931666666666667e-05, "loss": 0.8464, "step": 2360 }, { "epoch": 0.5866336633663366, "grad_norm": 1.4554542303085327, "learning_rate": 3.9483333333333335e-05, "loss": 0.838, "step": 2370 }, { "epoch": 0.5891089108910891, "grad_norm": 1.430923342704773, "learning_rate": 3.965e-05, "loss": 0.8471, "step": 2380 }, { "epoch": 0.5915841584158416, "grad_norm": 1.294318675994873, "learning_rate": 3.981666666666667e-05, "loss": 0.839, "step": 2390 }, { "epoch": 0.594059405940594, "grad_norm": 1.4778071641921997, "learning_rate": 3.9983333333333334e-05, "loss": 0.8418, "step": 2400 }, { "epoch": 0.5965346534653465, "grad_norm": 1.4681545495986938, "learning_rate": 4.015000000000001e-05, "loss": 0.8343, "step": 2410 }, { "epoch": 0.599009900990099, "grad_norm": 1.452013611793518, "learning_rate": 4.0316666666666666e-05, "loss": 0.8346, "step": 2420 }, { "epoch": 0.6014851485148515, "grad_norm": 1.5004664659500122, "learning_rate": 4.048333333333333e-05, "loss": 0.8274, "step": 2430 }, { "epoch": 0.6039603960396039, "grad_norm": 1.3240424394607544, "learning_rate": 4.065e-05, "loss": 0.8322, "step": 2440 }, { "epoch": 0.6064356435643564, "grad_norm": 1.519425630569458, "learning_rate": 4.081666666666667e-05, "loss": 0.8262, "step": 2450 }, { "epoch": 0.6089108910891089, "grad_norm": 1.3802533149719238, "learning_rate": 4.098333333333334e-05, "loss": 0.8283, "step": 2460 }, { "epoch": 0.6113861386138614, "grad_norm": 1.5083903074264526, "learning_rate": 4.115e-05, "loss": 0.8221, "step": 2470 }, { "epoch": 0.6138613861386139, "grad_norm": 1.3876698017120361, "learning_rate": 4.131666666666667e-05, "loss": 0.8308, "step": 2480 }, { "epoch": 0.6163366336633663, "grad_norm": 1.4345647096633911, "learning_rate": 4.1483333333333337e-05, "loss": 0.8249, "step": 2490 }, { "epoch": 0.6188118811881188, "grad_norm": 1.4789422750473022, "learning_rate": 4.165e-05, "loss": 0.82, "step": 2500 }, { "epoch": 0.6212871287128713, "grad_norm": 1.6065053939819336, "learning_rate": 4.181666666666667e-05, "loss": 0.8219, "step": 2510 }, { "epoch": 0.6237623762376238, "grad_norm": 1.5004189014434814, "learning_rate": 4.1983333333333335e-05, "loss": 0.8144, "step": 2520 }, { "epoch": 0.6262376237623762, "grad_norm": 1.5605446100234985, "learning_rate": 4.215e-05, "loss": 0.8215, "step": 2530 }, { "epoch": 0.6287128712871287, "grad_norm": 1.3566524982452393, "learning_rate": 4.2316666666666674e-05, "loss": 0.812, "step": 2540 }, { "epoch": 0.6311881188118812, "grad_norm": 1.3298444747924805, "learning_rate": 4.2483333333333334e-05, "loss": 0.8104, "step": 2550 }, { "epoch": 0.6336633663366337, "grad_norm": 1.4033889770507812, "learning_rate": 4.265e-05, "loss": 0.8169, "step": 2560 }, { "epoch": 0.6361386138613861, "grad_norm": 1.4860914945602417, "learning_rate": 4.2816666666666666e-05, "loss": 0.8127, "step": 2570 }, { "epoch": 0.6386138613861386, "grad_norm": 1.538783311843872, "learning_rate": 4.298333333333334e-05, "loss": 0.8099, "step": 2580 }, { "epoch": 0.6410891089108911, "grad_norm": 1.4975072145462036, "learning_rate": 4.315e-05, "loss": 0.8087, "step": 2590 }, { "epoch": 0.6435643564356436, "grad_norm": 1.4738306999206543, "learning_rate": 4.3316666666666665e-05, "loss": 0.8095, "step": 2600 }, { "epoch": 0.6460396039603961, "grad_norm": 1.5917948484420776, "learning_rate": 4.348333333333334e-05, "loss": 0.8085, "step": 2610 }, { "epoch": 0.6485148514851485, "grad_norm": 1.439626693725586, "learning_rate": 4.3650000000000004e-05, "loss": 0.7992, "step": 2620 }, { "epoch": 0.650990099009901, "grad_norm": 1.4349663257598877, "learning_rate": 4.381666666666667e-05, "loss": 0.8038, "step": 2630 }, { "epoch": 0.6534653465346535, "grad_norm": 1.5834392309188843, "learning_rate": 4.3983333333333336e-05, "loss": 0.803, "step": 2640 }, { "epoch": 0.655940594059406, "grad_norm": 1.5815359354019165, "learning_rate": 4.415e-05, "loss": 0.8046, "step": 2650 }, { "epoch": 0.6584158415841584, "grad_norm": 1.621337890625, "learning_rate": 4.431666666666667e-05, "loss": 0.795, "step": 2660 }, { "epoch": 0.6608910891089109, "grad_norm": 1.4448026418685913, "learning_rate": 4.4483333333333335e-05, "loss": 0.7914, "step": 2670 }, { "epoch": 0.6633663366336634, "grad_norm": 1.4646351337432861, "learning_rate": 4.465e-05, "loss": 0.7946, "step": 2680 }, { "epoch": 0.6658415841584159, "grad_norm": 1.472959280014038, "learning_rate": 4.481666666666667e-05, "loss": 0.7903, "step": 2690 }, { "epoch": 0.6683168316831684, "grad_norm": 1.4932724237442017, "learning_rate": 4.4983333333333334e-05, "loss": 0.7907, "step": 2700 }, { "epoch": 0.6707920792079208, "grad_norm": 1.40550696849823, "learning_rate": 4.5150000000000006e-05, "loss": 0.7853, "step": 2710 }, { "epoch": 0.6732673267326733, "grad_norm": 1.521498203277588, "learning_rate": 4.5316666666666666e-05, "loss": 0.7874, "step": 2720 }, { "epoch": 0.6757425742574258, "grad_norm": 1.462113618850708, "learning_rate": 4.548333333333333e-05, "loss": 0.7871, "step": 2730 }, { "epoch": 0.6782178217821783, "grad_norm": 1.682627558708191, "learning_rate": 4.5650000000000005e-05, "loss": 0.7857, "step": 2740 }, { "epoch": 0.6806930693069307, "grad_norm": 1.5805023908615112, "learning_rate": 4.581666666666667e-05, "loss": 0.7895, "step": 2750 }, { "epoch": 0.6831683168316832, "grad_norm": 1.4807941913604736, "learning_rate": 4.598333333333333e-05, "loss": 0.7814, "step": 2760 }, { "epoch": 0.6856435643564357, "grad_norm": 1.5386345386505127, "learning_rate": 4.6150000000000004e-05, "loss": 0.7818, "step": 2770 }, { "epoch": 0.6881188118811881, "grad_norm": 1.5668855905532837, "learning_rate": 4.631666666666667e-05, "loss": 0.7821, "step": 2780 }, { "epoch": 0.6905940594059405, "grad_norm": 1.418047308921814, "learning_rate": 4.6483333333333336e-05, "loss": 0.7772, "step": 2790 }, { "epoch": 0.693069306930693, "grad_norm": 1.5024524927139282, "learning_rate": 4.665e-05, "loss": 0.7841, "step": 2800 }, { "epoch": 0.6955445544554455, "grad_norm": 1.3902467489242554, "learning_rate": 4.681666666666667e-05, "loss": 0.7735, "step": 2810 }, { "epoch": 0.698019801980198, "grad_norm": 1.4091075658798218, "learning_rate": 4.6983333333333335e-05, "loss": 0.7782, "step": 2820 }, { "epoch": 0.7004950495049505, "grad_norm": 1.461242914199829, "learning_rate": 4.715e-05, "loss": 0.7742, "step": 2830 }, { "epoch": 0.7029702970297029, "grad_norm": 1.3560080528259277, "learning_rate": 4.731666666666667e-05, "loss": 0.7817, "step": 2840 }, { "epoch": 0.7054455445544554, "grad_norm": 1.3964629173278809, "learning_rate": 4.748333333333333e-05, "loss": 0.7697, "step": 2850 }, { "epoch": 0.7079207920792079, "grad_norm": 1.6299197673797607, "learning_rate": 4.765e-05, "loss": 0.7793, "step": 2860 }, { "epoch": 0.7103960396039604, "grad_norm": 1.4180978536605835, "learning_rate": 4.781666666666667e-05, "loss": 0.769, "step": 2870 }, { "epoch": 0.7128712871287128, "grad_norm": 1.5892529487609863, "learning_rate": 4.798333333333334e-05, "loss": 0.7659, "step": 2880 }, { "epoch": 0.7153465346534653, "grad_norm": 1.484731674194336, "learning_rate": 4.815e-05, "loss": 0.7665, "step": 2890 }, { "epoch": 0.7178217821782178, "grad_norm": 1.6621496677398682, "learning_rate": 4.831666666666667e-05, "loss": 0.7673, "step": 2900 }, { "epoch": 0.7202970297029703, "grad_norm": 1.5680480003356934, "learning_rate": 4.848333333333334e-05, "loss": 0.7656, "step": 2910 }, { "epoch": 0.7227722772277227, "grad_norm": 1.524142861366272, "learning_rate": 4.8650000000000003e-05, "loss": 0.7654, "step": 2920 }, { "epoch": 0.7252475247524752, "grad_norm": 1.5192573070526123, "learning_rate": 4.881666666666667e-05, "loss": 0.7669, "step": 2930 }, { "epoch": 0.7277227722772277, "grad_norm": 1.4075260162353516, "learning_rate": 4.8983333333333336e-05, "loss": 0.7693, "step": 2940 }, { "epoch": 0.7301980198019802, "grad_norm": 1.478567361831665, "learning_rate": 4.915e-05, "loss": 0.7622, "step": 2950 }, { "epoch": 0.7326732673267327, "grad_norm": 1.517823338508606, "learning_rate": 4.931666666666667e-05, "loss": 0.7605, "step": 2960 }, { "epoch": 0.7351485148514851, "grad_norm": 1.441238522529602, "learning_rate": 4.9483333333333334e-05, "loss": 0.7677, "step": 2970 }, { "epoch": 0.7376237623762376, "grad_norm": 1.4414695501327515, "learning_rate": 4.965e-05, "loss": 0.7627, "step": 2980 }, { "epoch": 0.7400990099009901, "grad_norm": 1.4803917407989502, "learning_rate": 4.981666666666667e-05, "loss": 0.7634, "step": 2990 }, { "epoch": 0.7425742574257426, "grad_norm": 1.7469466924667358, "learning_rate": 4.998333333333334e-05, "loss": 0.7645, "step": 3000 }, { "epoch": 0.745049504950495, "grad_norm": 1.560592532157898, "learning_rate": 5.015e-05, "loss": 0.7559, "step": 3010 }, { "epoch": 0.7475247524752475, "grad_norm": 1.5608296394348145, "learning_rate": 5.0316666666666665e-05, "loss": 0.75, "step": 3020 }, { "epoch": 0.75, "grad_norm": 1.3769769668579102, "learning_rate": 5.048333333333334e-05, "loss": 0.7572, "step": 3030 }, { "epoch": 0.7524752475247525, "grad_norm": 1.5442588329315186, "learning_rate": 5.065e-05, "loss": 0.758, "step": 3040 }, { "epoch": 0.754950495049505, "grad_norm": 1.4457921981811523, "learning_rate": 5.081666666666667e-05, "loss": 0.7494, "step": 3050 }, { "epoch": 0.7574257425742574, "grad_norm": 1.4600157737731934, "learning_rate": 5.098333333333334e-05, "loss": 0.7452, "step": 3060 }, { "epoch": 0.7599009900990099, "grad_norm": 1.5494787693023682, "learning_rate": 5.1149999999999996e-05, "loss": 0.7486, "step": 3070 }, { "epoch": 0.7623762376237624, "grad_norm": 1.4645100831985474, "learning_rate": 5.131666666666667e-05, "loss": 0.7528, "step": 3080 }, { "epoch": 0.7648514851485149, "grad_norm": 1.5554059743881226, "learning_rate": 5.1483333333333336e-05, "loss": 0.7449, "step": 3090 }, { "epoch": 0.7673267326732673, "grad_norm": 1.6009458303451538, "learning_rate": 5.1649999999999995e-05, "loss": 0.7555, "step": 3100 }, { "epoch": 0.7698019801980198, "grad_norm": 1.537226915359497, "learning_rate": 5.181666666666667e-05, "loss": 0.7501, "step": 3110 }, { "epoch": 0.7722772277227723, "grad_norm": 1.4734143018722534, "learning_rate": 5.198333333333334e-05, "loss": 0.743, "step": 3120 }, { "epoch": 0.7747524752475248, "grad_norm": 1.4755868911743164, "learning_rate": 5.215e-05, "loss": 0.7417, "step": 3130 }, { "epoch": 0.7772277227722773, "grad_norm": 1.5243483781814575, "learning_rate": 5.231666666666667e-05, "loss": 0.7443, "step": 3140 }, { "epoch": 0.7797029702970297, "grad_norm": 1.419180989265442, "learning_rate": 5.248333333333334e-05, "loss": 0.7435, "step": 3150 }, { "epoch": 0.7821782178217822, "grad_norm": 1.4944037199020386, "learning_rate": 5.265e-05, "loss": 0.7454, "step": 3160 }, { "epoch": 0.7846534653465347, "grad_norm": 1.6440101861953735, "learning_rate": 5.281666666666667e-05, "loss": 0.7376, "step": 3170 }, { "epoch": 0.7871287128712872, "grad_norm": 1.4997076988220215, "learning_rate": 5.298333333333334e-05, "loss": 0.7457, "step": 3180 }, { "epoch": 0.7896039603960396, "grad_norm": 1.600842833518982, "learning_rate": 5.315e-05, "loss": 0.7435, "step": 3190 }, { "epoch": 0.7920792079207921, "grad_norm": 1.5473531484603882, "learning_rate": 5.331666666666667e-05, "loss": 0.7367, "step": 3200 }, { "epoch": 0.7945544554455446, "grad_norm": 1.570369839668274, "learning_rate": 5.3483333333333344e-05, "loss": 0.7357, "step": 3210 }, { "epoch": 0.7970297029702971, "grad_norm": 1.5396733283996582, "learning_rate": 5.365e-05, "loss": 0.7344, "step": 3220 }, { "epoch": 0.7995049504950495, "grad_norm": 1.4721146821975708, "learning_rate": 5.381666666666667e-05, "loss": 0.7378, "step": 3230 }, { "epoch": 0.801980198019802, "grad_norm": 1.5821391344070435, "learning_rate": 5.398333333333334e-05, "loss": 0.7309, "step": 3240 }, { "epoch": 0.8044554455445545, "grad_norm": 1.5902574062347412, "learning_rate": 5.415e-05, "loss": 0.7348, "step": 3250 }, { "epoch": 0.806930693069307, "grad_norm": 1.4253491163253784, "learning_rate": 5.4316666666666675e-05, "loss": 0.741, "step": 3260 }, { "epoch": 0.8094059405940595, "grad_norm": 1.3664137125015259, "learning_rate": 5.4483333333333334e-05, "loss": 0.7257, "step": 3270 }, { "epoch": 0.8118811881188119, "grad_norm": 1.5604255199432373, "learning_rate": 5.465e-05, "loss": 0.7313, "step": 3280 }, { "epoch": 0.8143564356435643, "grad_norm": 1.5041618347167969, "learning_rate": 5.481666666666667e-05, "loss": 0.7316, "step": 3290 }, { "epoch": 0.8168316831683168, "grad_norm": 1.4980888366699219, "learning_rate": 5.498333333333333e-05, "loss": 0.7266, "step": 3300 }, { "epoch": 0.8193069306930693, "grad_norm": 1.5725512504577637, "learning_rate": 5.515e-05, "loss": 0.718, "step": 3310 }, { "epoch": 0.8217821782178217, "grad_norm": 1.562142014503479, "learning_rate": 5.531666666666667e-05, "loss": 0.723, "step": 3320 }, { "epoch": 0.8242574257425742, "grad_norm": 1.5043144226074219, "learning_rate": 5.548333333333333e-05, "loss": 0.723, "step": 3330 }, { "epoch": 0.8267326732673267, "grad_norm": 1.4806387424468994, "learning_rate": 5.5650000000000004e-05, "loss": 0.7172, "step": 3340 }, { "epoch": 0.8292079207920792, "grad_norm": 1.4793579578399658, "learning_rate": 5.581666666666667e-05, "loss": 0.7214, "step": 3350 }, { "epoch": 0.8316831683168316, "grad_norm": 1.4649860858917236, "learning_rate": 5.598333333333333e-05, "loss": 0.7202, "step": 3360 }, { "epoch": 0.8341584158415841, "grad_norm": 1.5484026670455933, "learning_rate": 5.615e-05, "loss": 0.7138, "step": 3370 }, { "epoch": 0.8366336633663366, "grad_norm": 1.4887079000473022, "learning_rate": 5.6316666666666676e-05, "loss": 0.7175, "step": 3380 }, { "epoch": 0.8391089108910891, "grad_norm": 1.4386017322540283, "learning_rate": 5.6483333333333335e-05, "loss": 0.7157, "step": 3390 }, { "epoch": 0.8415841584158416, "grad_norm": 1.5633248090744019, "learning_rate": 5.665e-05, "loss": 0.7196, "step": 3400 }, { "epoch": 0.844059405940594, "grad_norm": 1.4451940059661865, "learning_rate": 5.6816666666666674e-05, "loss": 0.7247, "step": 3410 }, { "epoch": 0.8465346534653465, "grad_norm": 1.5586931705474854, "learning_rate": 5.6983333333333334e-05, "loss": 0.7117, "step": 3420 }, { "epoch": 0.849009900990099, "grad_norm": 1.4629358053207397, "learning_rate": 5.715000000000001e-05, "loss": 0.7153, "step": 3430 }, { "epoch": 0.8514851485148515, "grad_norm": 1.4399821758270264, "learning_rate": 5.731666666666667e-05, "loss": 0.7128, "step": 3440 }, { "epoch": 0.8539603960396039, "grad_norm": 1.5318872928619385, "learning_rate": 5.748333333333333e-05, "loss": 0.7136, "step": 3450 }, { "epoch": 0.8564356435643564, "grad_norm": 1.4728195667266846, "learning_rate": 5.7650000000000005e-05, "loss": 0.7083, "step": 3460 }, { "epoch": 0.8589108910891089, "grad_norm": 1.5553905963897705, "learning_rate": 5.781666666666667e-05, "loss": 0.7096, "step": 3470 }, { "epoch": 0.8613861386138614, "grad_norm": 1.3432985544204712, "learning_rate": 5.798333333333333e-05, "loss": 0.7071, "step": 3480 }, { "epoch": 0.8638613861386139, "grad_norm": 1.5949797630310059, "learning_rate": 5.8150000000000004e-05, "loss": 0.7136, "step": 3490 }, { "epoch": 0.8663366336633663, "grad_norm": 1.6628012657165527, "learning_rate": 5.831666666666668e-05, "loss": 0.7048, "step": 3500 }, { "epoch": 0.8688118811881188, "grad_norm": 1.6400208473205566, "learning_rate": 5.8483333333333336e-05, "loss": 0.7039, "step": 3510 }, { "epoch": 0.8712871287128713, "grad_norm": 1.402815818786621, "learning_rate": 5.865e-05, "loss": 0.7052, "step": 3520 }, { "epoch": 0.8737623762376238, "grad_norm": 1.543257474899292, "learning_rate": 5.881666666666666e-05, "loss": 0.7063, "step": 3530 }, { "epoch": 0.8762376237623762, "grad_norm": 1.4792520999908447, "learning_rate": 5.8983333333333335e-05, "loss": 0.7061, "step": 3540 }, { "epoch": 0.8787128712871287, "grad_norm": 1.4779129028320312, "learning_rate": 5.915000000000001e-05, "loss": 0.7038, "step": 3550 }, { "epoch": 0.8811881188118812, "grad_norm": 1.4667507410049438, "learning_rate": 5.931666666666667e-05, "loss": 0.7071, "step": 3560 }, { "epoch": 0.8836633663366337, "grad_norm": 1.5270806550979614, "learning_rate": 5.9483333333333334e-05, "loss": 0.7031, "step": 3570 }, { "epoch": 0.8861386138613861, "grad_norm": 1.5601025819778442, "learning_rate": 5.9650000000000007e-05, "loss": 0.7111, "step": 3580 }, { "epoch": 0.8886138613861386, "grad_norm": 1.4822452068328857, "learning_rate": 5.9816666666666666e-05, "loss": 0.7051, "step": 3590 }, { "epoch": 0.8910891089108911, "grad_norm": 1.4360407590866089, "learning_rate": 5.998333333333334e-05, "loss": 0.7034, "step": 3600 }, { "epoch": 0.8935643564356436, "grad_norm": 1.476906657218933, "learning_rate": 6.0150000000000005e-05, "loss": 0.6977, "step": 3610 }, { "epoch": 0.8960396039603961, "grad_norm": 1.5470445156097412, "learning_rate": 6.0316666666666665e-05, "loss": 0.7007, "step": 3620 }, { "epoch": 0.8985148514851485, "grad_norm": 1.4893015623092651, "learning_rate": 6.048333333333334e-05, "loss": 0.6939, "step": 3630 }, { "epoch": 0.900990099009901, "grad_norm": 1.4637203216552734, "learning_rate": 6.0650000000000004e-05, "loss": 0.697, "step": 3640 }, { "epoch": 0.9034653465346535, "grad_norm": 1.5203766822814941, "learning_rate": 6.081666666666666e-05, "loss": 0.6964, "step": 3650 }, { "epoch": 0.905940594059406, "grad_norm": 1.365180492401123, "learning_rate": 6.0983333333333336e-05, "loss": 0.6967, "step": 3660 }, { "epoch": 0.9084158415841584, "grad_norm": 1.5758309364318848, "learning_rate": 6.115000000000001e-05, "loss": 0.7004, "step": 3670 }, { "epoch": 0.9108910891089109, "grad_norm": 1.5733188390731812, "learning_rate": 6.131666666666666e-05, "loss": 0.6925, "step": 3680 }, { "epoch": 0.9133663366336634, "grad_norm": 1.5252972841262817, "learning_rate": 6.148333333333334e-05, "loss": 0.6935, "step": 3690 }, { "epoch": 0.9158415841584159, "grad_norm": 1.5073034763336182, "learning_rate": 6.165000000000001e-05, "loss": 0.6925, "step": 3700 }, { "epoch": 0.9183168316831684, "grad_norm": 1.3707022666931152, "learning_rate": 6.181666666666667e-05, "loss": 0.6937, "step": 3710 }, { "epoch": 0.9207920792079208, "grad_norm": 1.4607210159301758, "learning_rate": 6.198333333333334e-05, "loss": 0.6855, "step": 3720 }, { "epoch": 0.9232673267326733, "grad_norm": 1.7313876152038574, "learning_rate": 6.215e-05, "loss": 0.6913, "step": 3730 }, { "epoch": 0.9257425742574258, "grad_norm": 1.5466668605804443, "learning_rate": 6.231666666666667e-05, "loss": 0.6897, "step": 3740 }, { "epoch": 0.9282178217821783, "grad_norm": 1.4531956911087036, "learning_rate": 6.248333333333334e-05, "loss": 0.6879, "step": 3750 }, { "epoch": 0.9306930693069307, "grad_norm": 1.573498249053955, "learning_rate": 6.264999999999999e-05, "loss": 0.6777, "step": 3760 }, { "epoch": 0.9331683168316832, "grad_norm": 1.515224575996399, "learning_rate": 6.281666666666667e-05, "loss": 0.6854, "step": 3770 }, { "epoch": 0.9356435643564357, "grad_norm": 1.4251906871795654, "learning_rate": 6.298333333333334e-05, "loss": 0.676, "step": 3780 }, { "epoch": 0.9381188118811881, "grad_norm": 1.4154175519943237, "learning_rate": 6.315e-05, "loss": 0.6839, "step": 3790 }, { "epoch": 0.9405940594059405, "grad_norm": 1.5168192386627197, "learning_rate": 6.331666666666667e-05, "loss": 0.6889, "step": 3800 }, { "epoch": 0.943069306930693, "grad_norm": 1.5552061796188354, "learning_rate": 6.348333333333334e-05, "loss": 0.6803, "step": 3810 }, { "epoch": 0.9455445544554455, "grad_norm": 1.6521320343017578, "learning_rate": 6.365e-05, "loss": 0.6869, "step": 3820 }, { "epoch": 0.948019801980198, "grad_norm": 1.47165048122406, "learning_rate": 6.381666666666667e-05, "loss": 0.687, "step": 3830 }, { "epoch": 0.9504950495049505, "grad_norm": 1.5242950916290283, "learning_rate": 6.398333333333333e-05, "loss": 0.6802, "step": 3840 }, { "epoch": 0.9529702970297029, "grad_norm": 1.4449701309204102, "learning_rate": 6.415e-05, "loss": 0.6828, "step": 3850 }, { "epoch": 0.9554455445544554, "grad_norm": 1.588182806968689, "learning_rate": 6.431666666666667e-05, "loss": 0.6741, "step": 3860 }, { "epoch": 0.9579207920792079, "grad_norm": 1.5539838075637817, "learning_rate": 6.448333333333335e-05, "loss": 0.6761, "step": 3870 }, { "epoch": 0.9603960396039604, "grad_norm": 1.4565149545669556, "learning_rate": 6.465e-05, "loss": 0.6804, "step": 3880 }, { "epoch": 0.9628712871287128, "grad_norm": 1.5840476751327515, "learning_rate": 6.481666666666667e-05, "loss": 0.6766, "step": 3890 }, { "epoch": 0.9653465346534653, "grad_norm": 1.632237195968628, "learning_rate": 6.498333333333335e-05, "loss": 0.6749, "step": 3900 }, { "epoch": 0.9678217821782178, "grad_norm": 1.3464844226837158, "learning_rate": 6.515e-05, "loss": 0.6762, "step": 3910 }, { "epoch": 0.9702970297029703, "grad_norm": 1.581925630569458, "learning_rate": 6.531666666666666e-05, "loss": 0.6703, "step": 3920 }, { "epoch": 0.9727722772277227, "grad_norm": 1.414873719215393, "learning_rate": 6.548333333333334e-05, "loss": 0.6725, "step": 3930 }, { "epoch": 0.9752475247524752, "grad_norm": 1.5039047002792358, "learning_rate": 6.565e-05, "loss": 0.6691, "step": 3940 }, { "epoch": 0.9777227722772277, "grad_norm": 1.4870887994766235, "learning_rate": 6.581666666666668e-05, "loss": 0.67, "step": 3950 }, { "epoch": 0.9801980198019802, "grad_norm": 1.6180627346038818, "learning_rate": 6.598333333333334e-05, "loss": 0.6721, "step": 3960 }, { "epoch": 0.9826732673267327, "grad_norm": 1.5509743690490723, "learning_rate": 6.615e-05, "loss": 0.6724, "step": 3970 }, { "epoch": 0.9851485148514851, "grad_norm": 1.5836668014526367, "learning_rate": 6.631666666666667e-05, "loss": 0.677, "step": 3980 }, { "epoch": 0.9876237623762376, "grad_norm": 1.6193903684616089, "learning_rate": 6.648333333333334e-05, "loss": 0.6698, "step": 3990 }, { "epoch": 0.9900990099009901, "grad_norm": 1.6525123119354248, "learning_rate": 6.665000000000001e-05, "loss": 0.672, "step": 4000 }, { "epoch": 0.9925742574257426, "grad_norm": 1.4602067470550537, "learning_rate": 6.681666666666667e-05, "loss": 0.6667, "step": 4010 }, { "epoch": 0.995049504950495, "grad_norm": 1.378293514251709, "learning_rate": 6.698333333333333e-05, "loss": 0.6748, "step": 4020 }, { "epoch": 0.9975247524752475, "grad_norm": 1.4706934690475464, "learning_rate": 6.715e-05, "loss": 0.6795, "step": 4030 }, { "epoch": 1.0, "grad_norm": 1.5670477151870728, "learning_rate": 6.731666666666667e-05, "loss": 0.6633, "step": 4040 }, { "epoch": 1.0024752475247525, "grad_norm": 1.3544005155563354, "learning_rate": 6.748333333333334e-05, "loss": 0.665, "step": 4050 }, { "epoch": 1.004950495049505, "grad_norm": 1.5790141820907593, "learning_rate": 6.765e-05, "loss": 0.6718, "step": 4060 }, { "epoch": 1.0074257425742574, "grad_norm": 1.5518696308135986, "learning_rate": 6.781666666666667e-05, "loss": 0.6642, "step": 4070 }, { "epoch": 1.00990099009901, "grad_norm": 1.5726948976516724, "learning_rate": 6.798333333333334e-05, "loss": 0.6665, "step": 4080 }, { "epoch": 1.0123762376237624, "grad_norm": 1.5328075885772705, "learning_rate": 6.815e-05, "loss": 0.6573, "step": 4090 }, { "epoch": 1.0148514851485149, "grad_norm": 1.6444288492202759, "learning_rate": 6.831666666666667e-05, "loss": 0.6675, "step": 4100 }, { "epoch": 1.0173267326732673, "grad_norm": 1.5270886421203613, "learning_rate": 6.848333333333334e-05, "loss": 0.6625, "step": 4110 }, { "epoch": 1.0198019801980198, "grad_norm": 1.6271352767944336, "learning_rate": 6.865e-05, "loss": 0.6659, "step": 4120 }, { "epoch": 1.0222772277227723, "grad_norm": 1.414608120918274, "learning_rate": 6.881666666666667e-05, "loss": 0.6599, "step": 4130 }, { "epoch": 1.0247524752475248, "grad_norm": 1.3975894451141357, "learning_rate": 6.898333333333333e-05, "loss": 0.6637, "step": 4140 }, { "epoch": 1.0272277227722773, "grad_norm": 1.3748952150344849, "learning_rate": 6.915e-05, "loss": 0.6643, "step": 4150 }, { "epoch": 1.0297029702970297, "grad_norm": 1.4789704084396362, "learning_rate": 6.931666666666668e-05, "loss": 0.6592, "step": 4160 }, { "epoch": 1.0321782178217822, "grad_norm": 1.4048635959625244, "learning_rate": 6.948333333333333e-05, "loss": 0.6554, "step": 4170 }, { "epoch": 1.0346534653465347, "grad_norm": 1.6245118379592896, "learning_rate": 6.965e-05, "loss": 0.6547, "step": 4180 }, { "epoch": 1.0371287128712872, "grad_norm": 1.5101001262664795, "learning_rate": 6.981666666666668e-05, "loss": 0.6587, "step": 4190 }, { "epoch": 1.0396039603960396, "grad_norm": 1.389801263809204, "learning_rate": 6.998333333333333e-05, "loss": 0.6543, "step": 4200 }, { "epoch": 1.0420792079207921, "grad_norm": 1.4119632244110107, "learning_rate": 7.015000000000001e-05, "loss": 0.6619, "step": 4210 }, { "epoch": 1.0445544554455446, "grad_norm": 1.4919642210006714, "learning_rate": 7.031666666666668e-05, "loss": 0.6558, "step": 4220 }, { "epoch": 1.047029702970297, "grad_norm": 1.5376124382019043, "learning_rate": 7.048333333333333e-05, "loss": 0.6574, "step": 4230 }, { "epoch": 1.0495049504950495, "grad_norm": 1.4146764278411865, "learning_rate": 7.065000000000001e-05, "loss": 0.6604, "step": 4240 }, { "epoch": 1.051980198019802, "grad_norm": 1.3930944204330444, "learning_rate": 7.081666666666668e-05, "loss": 0.6539, "step": 4250 }, { "epoch": 1.0544554455445545, "grad_norm": 1.46146821975708, "learning_rate": 7.098333333333333e-05, "loss": 0.6515, "step": 4260 }, { "epoch": 1.056930693069307, "grad_norm": 1.4593629837036133, "learning_rate": 7.115000000000001e-05, "loss": 0.6505, "step": 4270 }, { "epoch": 1.0594059405940595, "grad_norm": 1.5977541208267212, "learning_rate": 7.131666666666666e-05, "loss": 0.6528, "step": 4280 }, { "epoch": 1.061881188118812, "grad_norm": 1.3792760372161865, "learning_rate": 7.148333333333334e-05, "loss": 0.658, "step": 4290 }, { "epoch": 1.0643564356435644, "grad_norm": 1.601241111755371, "learning_rate": 7.165000000000001e-05, "loss": 0.6498, "step": 4300 }, { "epoch": 1.066831683168317, "grad_norm": 1.4073344469070435, "learning_rate": 7.181666666666666e-05, "loss": 0.6529, "step": 4310 }, { "epoch": 1.0693069306930694, "grad_norm": 1.5261094570159912, "learning_rate": 7.198333333333334e-05, "loss": 0.653, "step": 4320 }, { "epoch": 1.0717821782178218, "grad_norm": 1.5155489444732666, "learning_rate": 7.215e-05, "loss": 0.6466, "step": 4330 }, { "epoch": 1.0742574257425743, "grad_norm": 1.409618616104126, "learning_rate": 7.231666666666667e-05, "loss": 0.6574, "step": 4340 }, { "epoch": 1.0767326732673268, "grad_norm": 1.5717377662658691, "learning_rate": 7.248333333333334e-05, "loss": 0.6534, "step": 4350 }, { "epoch": 1.0792079207920793, "grad_norm": 1.4725044965744019, "learning_rate": 7.265e-05, "loss": 0.6524, "step": 4360 }, { "epoch": 1.0816831683168318, "grad_norm": 1.4484816789627075, "learning_rate": 7.281666666666667e-05, "loss": 0.6453, "step": 4370 }, { "epoch": 1.0841584158415842, "grad_norm": 1.523217797279358, "learning_rate": 7.298333333333334e-05, "loss": 0.6453, "step": 4380 }, { "epoch": 1.0866336633663367, "grad_norm": 1.527332067489624, "learning_rate": 7.315e-05, "loss": 0.6428, "step": 4390 }, { "epoch": 1.0891089108910892, "grad_norm": 1.5956732034683228, "learning_rate": 7.331666666666667e-05, "loss": 0.6467, "step": 4400 }, { "epoch": 1.0915841584158417, "grad_norm": 1.7034837007522583, "learning_rate": 7.348333333333334e-05, "loss": 0.6431, "step": 4410 }, { "epoch": 1.0940594059405941, "grad_norm": 1.594659447669983, "learning_rate": 7.365e-05, "loss": 0.6459, "step": 4420 }, { "epoch": 1.0965346534653466, "grad_norm": 1.439086675643921, "learning_rate": 7.381666666666667e-05, "loss": 0.6452, "step": 4430 }, { "epoch": 1.099009900990099, "grad_norm": 1.5224448442459106, "learning_rate": 7.398333333333333e-05, "loss": 0.6423, "step": 4440 }, { "epoch": 1.1014851485148516, "grad_norm": 1.511752724647522, "learning_rate": 7.415000000000001e-05, "loss": 0.6457, "step": 4450 }, { "epoch": 1.103960396039604, "grad_norm": 1.4200714826583862, "learning_rate": 7.431666666666667e-05, "loss": 0.6394, "step": 4460 }, { "epoch": 1.1064356435643565, "grad_norm": 1.4950851202011108, "learning_rate": 7.448333333333333e-05, "loss": 0.6447, "step": 4470 }, { "epoch": 1.108910891089109, "grad_norm": 1.5417380332946777, "learning_rate": 7.465000000000001e-05, "loss": 0.643, "step": 4480 }, { "epoch": 1.1113861386138615, "grad_norm": 1.3338950872421265, "learning_rate": 7.481666666666666e-05, "loss": 0.6427, "step": 4490 }, { "epoch": 1.113861386138614, "grad_norm": 1.4853473901748657, "learning_rate": 7.498333333333334e-05, "loss": 0.6381, "step": 4500 }, { "epoch": 1.1163366336633664, "grad_norm": 1.4015023708343506, "learning_rate": 7.515e-05, "loss": 0.6391, "step": 4510 }, { "epoch": 1.118811881188119, "grad_norm": 1.502561092376709, "learning_rate": 7.531666666666666e-05, "loss": 0.6398, "step": 4520 }, { "epoch": 1.1212871287128712, "grad_norm": 1.5437299013137817, "learning_rate": 7.548333333333334e-05, "loss": 0.6382, "step": 4530 }, { "epoch": 1.1237623762376239, "grad_norm": 1.480706810951233, "learning_rate": 7.565e-05, "loss": 0.6416, "step": 4540 }, { "epoch": 1.1262376237623761, "grad_norm": 1.6073309183120728, "learning_rate": 7.581666666666668e-05, "loss": 0.6359, "step": 4550 }, { "epoch": 1.1287128712871288, "grad_norm": 1.4995564222335815, "learning_rate": 7.598333333333334e-05, "loss": 0.6353, "step": 4560 }, { "epoch": 1.131188118811881, "grad_norm": 1.409693956375122, "learning_rate": 7.615e-05, "loss": 0.6437, "step": 4570 }, { "epoch": 1.1336633663366338, "grad_norm": 1.4644403457641602, "learning_rate": 7.631666666666667e-05, "loss": 0.638, "step": 4580 }, { "epoch": 1.136138613861386, "grad_norm": 1.4105448722839355, "learning_rate": 7.648333333333334e-05, "loss": 0.6409, "step": 4590 }, { "epoch": 1.1386138613861387, "grad_norm": 1.5248273611068726, "learning_rate": 7.664999999999999e-05, "loss": 0.6482, "step": 4600 }, { "epoch": 1.141089108910891, "grad_norm": 1.50985848903656, "learning_rate": 7.681666666666667e-05, "loss": 0.6321, "step": 4610 }, { "epoch": 1.1435643564356435, "grad_norm": 1.4605082273483276, "learning_rate": 7.698333333333334e-05, "loss": 0.6379, "step": 4620 }, { "epoch": 1.146039603960396, "grad_norm": 1.3661079406738281, "learning_rate": 7.715e-05, "loss": 0.6314, "step": 4630 }, { "epoch": 1.1485148514851484, "grad_norm": 1.3783093690872192, "learning_rate": 7.731666666666667e-05, "loss": 0.6393, "step": 4640 }, { "epoch": 1.150990099009901, "grad_norm": 1.5596425533294678, "learning_rate": 7.748333333333334e-05, "loss": 0.6279, "step": 4650 }, { "epoch": 1.1534653465346534, "grad_norm": 1.5309678316116333, "learning_rate": 7.765e-05, "loss": 0.6354, "step": 4660 }, { "epoch": 1.1559405940594059, "grad_norm": 1.5842289924621582, "learning_rate": 7.781666666666667e-05, "loss": 0.6339, "step": 4670 }, { "epoch": 1.1584158415841583, "grad_norm": 1.5560898780822754, "learning_rate": 7.798333333333334e-05, "loss": 0.6369, "step": 4680 }, { "epoch": 1.1608910891089108, "grad_norm": 1.4855530261993408, "learning_rate": 7.815e-05, "loss": 0.6397, "step": 4690 }, { "epoch": 1.1633663366336633, "grad_norm": 1.3141154050827026, "learning_rate": 7.831666666666667e-05, "loss": 0.6355, "step": 4700 }, { "epoch": 1.1658415841584158, "grad_norm": 1.5888612270355225, "learning_rate": 7.848333333333335e-05, "loss": 0.6328, "step": 4710 }, { "epoch": 1.1683168316831682, "grad_norm": 1.4218658208847046, "learning_rate": 7.865e-05, "loss": 0.6331, "step": 4720 }, { "epoch": 1.1707920792079207, "grad_norm": 1.392624020576477, "learning_rate": 7.881666666666667e-05, "loss": 0.6272, "step": 4730 }, { "epoch": 1.1732673267326732, "grad_norm": 1.36640465259552, "learning_rate": 7.898333333333335e-05, "loss": 0.6252, "step": 4740 }, { "epoch": 1.1757425742574257, "grad_norm": 1.544000506401062, "learning_rate": 7.915e-05, "loss": 0.6333, "step": 4750 }, { "epoch": 1.1782178217821782, "grad_norm": 1.611627459526062, "learning_rate": 7.931666666666668e-05, "loss": 0.639, "step": 4760 }, { "epoch": 1.1806930693069306, "grad_norm": 1.4597886800765991, "learning_rate": 7.948333333333333e-05, "loss": 0.6311, "step": 4770 }, { "epoch": 1.183168316831683, "grad_norm": 1.331281304359436, "learning_rate": 7.965e-05, "loss": 0.631, "step": 4780 }, { "epoch": 1.1856435643564356, "grad_norm": 1.4474152326583862, "learning_rate": 7.981666666666668e-05, "loss": 0.629, "step": 4790 }, { "epoch": 1.188118811881188, "grad_norm": 1.4370228052139282, "learning_rate": 7.998333333333333e-05, "loss": 0.6242, "step": 4800 }, { "epoch": 1.1905940594059405, "grad_norm": 1.3662877082824707, "learning_rate": 8.015e-05, "loss": 0.628, "step": 4810 }, { "epoch": 1.193069306930693, "grad_norm": 1.4186474084854126, "learning_rate": 8.031666666666668e-05, "loss": 0.6232, "step": 4820 }, { "epoch": 1.1955445544554455, "grad_norm": 1.4166585206985474, "learning_rate": 8.048333333333333e-05, "loss": 0.6222, "step": 4830 }, { "epoch": 1.198019801980198, "grad_norm": 1.3832623958587646, "learning_rate": 8.065000000000001e-05, "loss": 0.632, "step": 4840 }, { "epoch": 1.2004950495049505, "grad_norm": 1.3867775201797485, "learning_rate": 8.081666666666667e-05, "loss": 0.6309, "step": 4850 }, { "epoch": 1.202970297029703, "grad_norm": 1.358099102973938, "learning_rate": 8.098333333333333e-05, "loss": 0.6265, "step": 4860 }, { "epoch": 1.2054455445544554, "grad_norm": 1.520691990852356, "learning_rate": 8.115000000000001e-05, "loss": 0.6239, "step": 4870 }, { "epoch": 1.2079207920792079, "grad_norm": 1.4524171352386475, "learning_rate": 8.131666666666667e-05, "loss": 0.6262, "step": 4880 }, { "epoch": 1.2103960396039604, "grad_norm": 1.3267614841461182, "learning_rate": 8.148333333333334e-05, "loss": 0.6293, "step": 4890 }, { "epoch": 1.2128712871287128, "grad_norm": 1.4496058225631714, "learning_rate": 8.165e-05, "loss": 0.6298, "step": 4900 }, { "epoch": 1.2153465346534653, "grad_norm": 1.4491539001464844, "learning_rate": 8.181666666666667e-05, "loss": 0.622, "step": 4910 }, { "epoch": 1.2178217821782178, "grad_norm": 1.4288558959960938, "learning_rate": 8.198333333333334e-05, "loss": 0.6244, "step": 4920 }, { "epoch": 1.2202970297029703, "grad_norm": 1.3825559616088867, "learning_rate": 8.215e-05, "loss": 0.6206, "step": 4930 }, { "epoch": 1.2227722772277227, "grad_norm": 1.4709683656692505, "learning_rate": 8.231666666666667e-05, "loss": 0.6279, "step": 4940 }, { "epoch": 1.2252475247524752, "grad_norm": 1.4429601430892944, "learning_rate": 8.248333333333334e-05, "loss": 0.6284, "step": 4950 }, { "epoch": 1.2277227722772277, "grad_norm": 1.4873162508010864, "learning_rate": 8.265e-05, "loss": 0.6213, "step": 4960 }, { "epoch": 1.2301980198019802, "grad_norm": 1.5300003290176392, "learning_rate": 8.281666666666667e-05, "loss": 0.6225, "step": 4970 }, { "epoch": 1.2326732673267327, "grad_norm": 1.2491811513900757, "learning_rate": 8.298333333333334e-05, "loss": 0.6142, "step": 4980 }, { "epoch": 1.2351485148514851, "grad_norm": 1.4044281244277954, "learning_rate": 8.315e-05, "loss": 0.6244, "step": 4990 }, { "epoch": 1.2376237623762376, "grad_norm": 1.53944730758667, "learning_rate": 8.331666666666668e-05, "loss": 0.6269, "step": 5000 }, { "epoch": 1.24009900990099, "grad_norm": 1.401929497718811, "learning_rate": 8.348333333333333e-05, "loss": 0.621, "step": 5010 }, { "epoch": 1.2425742574257426, "grad_norm": 1.3173753023147583, "learning_rate": 8.365e-05, "loss": 0.6221, "step": 5020 }, { "epoch": 1.245049504950495, "grad_norm": 1.463641881942749, "learning_rate": 8.381666666666667e-05, "loss": 0.6236, "step": 5030 }, { "epoch": 1.2475247524752475, "grad_norm": 1.3462008237838745, "learning_rate": 8.398333333333333e-05, "loss": 0.6151, "step": 5040 }, { "epoch": 1.25, "grad_norm": 1.4183306694030762, "learning_rate": 8.415000000000001e-05, "loss": 0.6168, "step": 5050 }, { "epoch": 1.2524752475247525, "grad_norm": 1.3818022012710571, "learning_rate": 8.431666666666667e-05, "loss": 0.6167, "step": 5060 }, { "epoch": 1.254950495049505, "grad_norm": 1.468330979347229, "learning_rate": 8.448333333333333e-05, "loss": 0.6173, "step": 5070 }, { "epoch": 1.2574257425742574, "grad_norm": 1.4088817834854126, "learning_rate": 8.465000000000001e-05, "loss": 0.6238, "step": 5080 }, { "epoch": 1.25990099009901, "grad_norm": 1.4960654973983765, "learning_rate": 8.481666666666666e-05, "loss": 0.6173, "step": 5090 }, { "epoch": 1.2623762376237624, "grad_norm": 1.414542317390442, "learning_rate": 8.498333333333334e-05, "loss": 0.6267, "step": 5100 }, { "epoch": 1.2648514851485149, "grad_norm": 1.400357723236084, "learning_rate": 8.515000000000001e-05, "loss": 0.6175, "step": 5110 }, { "epoch": 1.2673267326732673, "grad_norm": 1.3438225984573364, "learning_rate": 8.531666666666666e-05, "loss": 0.6171, "step": 5120 }, { "epoch": 1.2698019801980198, "grad_norm": 1.3690733909606934, "learning_rate": 8.548333333333334e-05, "loss": 0.615, "step": 5130 }, { "epoch": 1.2722772277227723, "grad_norm": 1.4241864681243896, "learning_rate": 8.565000000000001e-05, "loss": 0.6207, "step": 5140 }, { "epoch": 1.2747524752475248, "grad_norm": 1.4127118587493896, "learning_rate": 8.581666666666666e-05, "loss": 0.6225, "step": 5150 }, { "epoch": 1.2772277227722773, "grad_norm": 1.3927298784255981, "learning_rate": 8.598333333333334e-05, "loss": 0.6155, "step": 5160 }, { "epoch": 1.2797029702970297, "grad_norm": 1.3721197843551636, "learning_rate": 8.615000000000001e-05, "loss": 0.6158, "step": 5170 }, { "epoch": 1.2821782178217822, "grad_norm": 1.361688256263733, "learning_rate": 8.631666666666667e-05, "loss": 0.617, "step": 5180 }, { "epoch": 1.2846534653465347, "grad_norm": 1.3431077003479004, "learning_rate": 8.648333333333334e-05, "loss": 0.6205, "step": 5190 }, { "epoch": 1.2871287128712872, "grad_norm": 1.3267192840576172, "learning_rate": 8.665e-05, "loss": 0.6108, "step": 5200 }, { "epoch": 1.2896039603960396, "grad_norm": 1.4720817804336548, "learning_rate": 8.681666666666667e-05, "loss": 0.6235, "step": 5210 }, { "epoch": 1.2920792079207921, "grad_norm": 1.3847404718399048, "learning_rate": 8.698333333333334e-05, "loss": 0.613, "step": 5220 }, { "epoch": 1.2945544554455446, "grad_norm": 1.3698731660842896, "learning_rate": 8.715e-05, "loss": 0.614, "step": 5230 }, { "epoch": 1.297029702970297, "grad_norm": 1.5074175596237183, "learning_rate": 8.731666666666667e-05, "loss": 0.6122, "step": 5240 }, { "epoch": 1.2995049504950495, "grad_norm": 1.4077497720718384, "learning_rate": 8.748333333333334e-05, "loss": 0.6206, "step": 5250 }, { "epoch": 1.301980198019802, "grad_norm": 1.4716098308563232, "learning_rate": 8.765e-05, "loss": 0.6076, "step": 5260 }, { "epoch": 1.3044554455445545, "grad_norm": 1.3145185708999634, "learning_rate": 8.781666666666667e-05, "loss": 0.6127, "step": 5270 }, { "epoch": 1.306930693069307, "grad_norm": 1.420546054840088, "learning_rate": 8.798333333333334e-05, "loss": 0.62, "step": 5280 }, { "epoch": 1.3094059405940595, "grad_norm": 1.502901315689087, "learning_rate": 8.815e-05, "loss": 0.6153, "step": 5290 }, { "epoch": 1.311881188118812, "grad_norm": 1.4858070611953735, "learning_rate": 8.831666666666667e-05, "loss": 0.6183, "step": 5300 }, { "epoch": 1.3143564356435644, "grad_norm": 1.345834732055664, "learning_rate": 8.848333333333333e-05, "loss": 0.6115, "step": 5310 }, { "epoch": 1.316831683168317, "grad_norm": 1.386179804801941, "learning_rate": 8.865e-05, "loss": 0.6158, "step": 5320 }, { "epoch": 1.3193069306930694, "grad_norm": 1.317219614982605, "learning_rate": 8.881666666666667e-05, "loss": 0.6101, "step": 5330 }, { "epoch": 1.3217821782178218, "grad_norm": 1.3181451559066772, "learning_rate": 8.898333333333335e-05, "loss": 0.6093, "step": 5340 }, { "epoch": 1.3242574257425743, "grad_norm": 1.50509512424469, "learning_rate": 8.915e-05, "loss": 0.6058, "step": 5350 }, { "epoch": 1.3267326732673268, "grad_norm": 1.319951057434082, "learning_rate": 8.931666666666666e-05, "loss": 0.6097, "step": 5360 }, { "epoch": 1.3292079207920793, "grad_norm": 1.311625361442566, "learning_rate": 8.948333333333334e-05, "loss": 0.6102, "step": 5370 }, { "epoch": 1.3316831683168318, "grad_norm": 1.3994226455688477, "learning_rate": 8.965e-05, "loss": 0.6114, "step": 5380 }, { "epoch": 1.3341584158415842, "grad_norm": 1.4073545932769775, "learning_rate": 8.981666666666668e-05, "loss": 0.6193, "step": 5390 }, { "epoch": 1.3366336633663367, "grad_norm": 1.3543318510055542, "learning_rate": 8.998333333333334e-05, "loss": 0.615, "step": 5400 }, { "epoch": 1.3391089108910892, "grad_norm": 1.4004507064819336, "learning_rate": 9.015e-05, "loss": 0.6089, "step": 5410 }, { "epoch": 1.3415841584158417, "grad_norm": 1.3388333320617676, "learning_rate": 9.031666666666668e-05, "loss": 0.6126, "step": 5420 }, { "epoch": 1.3440594059405941, "grad_norm": 1.3139194250106812, "learning_rate": 9.048333333333334e-05, "loss": 0.6084, "step": 5430 }, { "epoch": 1.3465346534653464, "grad_norm": 1.3129392862319946, "learning_rate": 9.065000000000001e-05, "loss": 0.6132, "step": 5440 }, { "epoch": 1.349009900990099, "grad_norm": 1.3298810720443726, "learning_rate": 9.081666666666667e-05, "loss": 0.6068, "step": 5450 }, { "epoch": 1.3514851485148514, "grad_norm": 1.3879303932189941, "learning_rate": 9.098333333333334e-05, "loss": 0.6064, "step": 5460 }, { "epoch": 1.353960396039604, "grad_norm": 1.2163984775543213, "learning_rate": 9.115e-05, "loss": 0.6074, "step": 5470 }, { "epoch": 1.3564356435643563, "grad_norm": 1.4986796379089355, "learning_rate": 9.131666666666667e-05, "loss": 0.6081, "step": 5480 }, { "epoch": 1.358910891089109, "grad_norm": 1.2599892616271973, "learning_rate": 9.148333333333334e-05, "loss": 0.6103, "step": 5490 }, { "epoch": 1.3613861386138613, "grad_norm": 1.2380181550979614, "learning_rate": 9.165e-05, "loss": 0.6077, "step": 5500 }, { "epoch": 1.363861386138614, "grad_norm": 1.270316243171692, "learning_rate": 9.181666666666667e-05, "loss": 0.6058, "step": 5510 }, { "epoch": 1.3663366336633662, "grad_norm": 1.415602207183838, "learning_rate": 9.198333333333334e-05, "loss": 0.604, "step": 5520 }, { "epoch": 1.368811881188119, "grad_norm": 1.4204065799713135, "learning_rate": 9.215e-05, "loss": 0.6105, "step": 5530 }, { "epoch": 1.3712871287128712, "grad_norm": 1.4760897159576416, "learning_rate": 9.231666666666667e-05, "loss": 0.6073, "step": 5540 }, { "epoch": 1.3737623762376239, "grad_norm": 1.3115427494049072, "learning_rate": 9.248333333333334e-05, "loss": 0.6041, "step": 5550 }, { "epoch": 1.3762376237623761, "grad_norm": 1.4176841974258423, "learning_rate": 9.265e-05, "loss": 0.604, "step": 5560 }, { "epoch": 1.3787128712871288, "grad_norm": 1.290845274925232, "learning_rate": 9.281666666666667e-05, "loss": 0.6097, "step": 5570 }, { "epoch": 1.381188118811881, "grad_norm": 1.3227061033248901, "learning_rate": 9.298333333333333e-05, "loss": 0.6104, "step": 5580 }, { "epoch": 1.3836633663366338, "grad_norm": 1.1876250505447388, "learning_rate": 9.315e-05, "loss": 0.6065, "step": 5590 }, { "epoch": 1.386138613861386, "grad_norm": 1.229567050933838, "learning_rate": 9.331666666666668e-05, "loss": 0.6055, "step": 5600 }, { "epoch": 1.3886138613861387, "grad_norm": 1.2279318571090698, "learning_rate": 9.348333333333333e-05, "loss": 0.6088, "step": 5610 }, { "epoch": 1.391089108910891, "grad_norm": 1.2772787809371948, "learning_rate": 9.365e-05, "loss": 0.6017, "step": 5620 }, { "epoch": 1.3935643564356437, "grad_norm": 1.2492035627365112, "learning_rate": 9.381666666666668e-05, "loss": 0.5964, "step": 5630 }, { "epoch": 1.396039603960396, "grad_norm": 1.3297439813613892, "learning_rate": 9.398333333333333e-05, "loss": 0.6038, "step": 5640 }, { "epoch": 1.3985148514851486, "grad_norm": 1.428175687789917, "learning_rate": 9.415e-05, "loss": 0.6051, "step": 5650 }, { "epoch": 1.400990099009901, "grad_norm": 1.3009779453277588, "learning_rate": 9.431666666666668e-05, "loss": 0.6001, "step": 5660 }, { "epoch": 1.4034653465346536, "grad_norm": 1.3145456314086914, "learning_rate": 9.448333333333333e-05, "loss": 0.5982, "step": 5670 }, { "epoch": 1.4059405940594059, "grad_norm": 1.397083044052124, "learning_rate": 9.465000000000001e-05, "loss": 0.6026, "step": 5680 }, { "epoch": 1.4084158415841583, "grad_norm": 1.4355016946792603, "learning_rate": 9.481666666666668e-05, "loss": 0.6086, "step": 5690 }, { "epoch": 1.4108910891089108, "grad_norm": 1.4052308797836304, "learning_rate": 9.498333333333333e-05, "loss": 0.6069, "step": 5700 }, { "epoch": 1.4133663366336633, "grad_norm": 1.2916063070297241, "learning_rate": 9.515000000000001e-05, "loss": 0.6023, "step": 5710 }, { "epoch": 1.4158415841584158, "grad_norm": 1.255492925643921, "learning_rate": 9.531666666666668e-05, "loss": 0.6019, "step": 5720 }, { "epoch": 1.4183168316831682, "grad_norm": 1.3637275695800781, "learning_rate": 9.548333333333334e-05, "loss": 0.601, "step": 5730 }, { "epoch": 1.4207920792079207, "grad_norm": 1.2607249021530151, "learning_rate": 9.565000000000001e-05, "loss": 0.5993, "step": 5740 }, { "epoch": 1.4232673267326732, "grad_norm": 1.26339590549469, "learning_rate": 9.581666666666667e-05, "loss": 0.6043, "step": 5750 }, { "epoch": 1.4257425742574257, "grad_norm": 1.3551082611083984, "learning_rate": 9.598333333333334e-05, "loss": 0.5979, "step": 5760 }, { "epoch": 1.4282178217821782, "grad_norm": 1.319689154624939, "learning_rate": 9.615e-05, "loss": 0.6005, "step": 5770 }, { "epoch": 1.4306930693069306, "grad_norm": 1.2965096235275269, "learning_rate": 9.631666666666667e-05, "loss": 0.597, "step": 5780 }, { "epoch": 1.433168316831683, "grad_norm": 1.2606486082077026, "learning_rate": 9.648333333333334e-05, "loss": 0.5932, "step": 5790 }, { "epoch": 1.4356435643564356, "grad_norm": 1.3653829097747803, "learning_rate": 9.665e-05, "loss": 0.6098, "step": 5800 }, { "epoch": 1.438118811881188, "grad_norm": 1.23099684715271, "learning_rate": 9.681666666666667e-05, "loss": 0.5993, "step": 5810 }, { "epoch": 1.4405940594059405, "grad_norm": 1.210462212562561, "learning_rate": 9.698333333333334e-05, "loss": 0.6033, "step": 5820 }, { "epoch": 1.443069306930693, "grad_norm": 1.1717671155929565, "learning_rate": 9.715e-05, "loss": 0.6043, "step": 5830 }, { "epoch": 1.4455445544554455, "grad_norm": 1.2540385723114014, "learning_rate": 9.731666666666667e-05, "loss": 0.6006, "step": 5840 }, { "epoch": 1.448019801980198, "grad_norm": 1.370827317237854, "learning_rate": 9.748333333333334e-05, "loss": 0.6035, "step": 5850 }, { "epoch": 1.4504950495049505, "grad_norm": 1.3329496383666992, "learning_rate": 9.765e-05, "loss": 0.5965, "step": 5860 }, { "epoch": 1.452970297029703, "grad_norm": 1.3295925855636597, "learning_rate": 9.781666666666667e-05, "loss": 0.594, "step": 5870 }, { "epoch": 1.4554455445544554, "grad_norm": 1.3535147905349731, "learning_rate": 9.798333333333333e-05, "loss": 0.602, "step": 5880 }, { "epoch": 1.4579207920792079, "grad_norm": 1.3240364789962769, "learning_rate": 9.815000000000001e-05, "loss": 0.597, "step": 5890 }, { "epoch": 1.4603960396039604, "grad_norm": 1.2924927473068237, "learning_rate": 9.831666666666667e-05, "loss": 0.5973, "step": 5900 }, { "epoch": 1.4628712871287128, "grad_norm": 1.2923542261123657, "learning_rate": 9.848333333333333e-05, "loss": 0.6003, "step": 5910 }, { "epoch": 1.4653465346534653, "grad_norm": 1.3622397184371948, "learning_rate": 9.865000000000001e-05, "loss": 0.5957, "step": 5920 }, { "epoch": 1.4678217821782178, "grad_norm": 1.400071144104004, "learning_rate": 9.881666666666667e-05, "loss": 0.5979, "step": 5930 }, { "epoch": 1.4702970297029703, "grad_norm": 1.3653578758239746, "learning_rate": 9.898333333333335e-05, "loss": 0.5978, "step": 5940 }, { "epoch": 1.4727722772277227, "grad_norm": 1.435711145401001, "learning_rate": 9.915000000000001e-05, "loss": 0.5911, "step": 5950 }, { "epoch": 1.4752475247524752, "grad_norm": 1.2599726915359497, "learning_rate": 9.931666666666666e-05, "loss": 0.5994, "step": 5960 }, { "epoch": 1.4777227722772277, "grad_norm": 1.2731385231018066, "learning_rate": 9.948333333333334e-05, "loss": 0.5938, "step": 5970 }, { "epoch": 1.4801980198019802, "grad_norm": 1.4575822353363037, "learning_rate": 9.965000000000001e-05, "loss": 0.6019, "step": 5980 }, { "epoch": 1.4826732673267327, "grad_norm": 1.305307388305664, "learning_rate": 9.981666666666668e-05, "loss": 0.5979, "step": 5990 }, { "epoch": 1.4851485148514851, "grad_norm": 1.2577488422393799, "learning_rate": 9.998333333333334e-05, "loss": 0.5936, "step": 6000 }, { "epoch": 1.4876237623762376, "grad_norm": 1.2813396453857422, "learning_rate": 9.999999846214614e-05, "loss": 0.5988, "step": 6010 }, { "epoch": 1.49009900990099, "grad_norm": 1.3467111587524414, "learning_rate": 9.999999314610822e-05, "loss": 0.5974, "step": 6020 }, { "epoch": 1.4925742574257426, "grad_norm": 1.2857484817504883, "learning_rate": 9.999998403290078e-05, "loss": 0.5964, "step": 6030 }, { "epoch": 1.495049504950495, "grad_norm": 1.2820734977722168, "learning_rate": 9.999997112252452e-05, "loss": 0.5961, "step": 6040 }, { "epoch": 1.4975247524752475, "grad_norm": 1.316970705986023, "learning_rate": 9.999995441498044e-05, "loss": 0.5988, "step": 6050 }, { "epoch": 1.5, "grad_norm": 1.2472444772720337, "learning_rate": 9.99999339102698e-05, "loss": 0.5972, "step": 6060 }, { "epoch": 1.5024752475247525, "grad_norm": 1.221819519996643, "learning_rate": 9.999990960839414e-05, "loss": 0.5948, "step": 6070 }, { "epoch": 1.504950495049505, "grad_norm": 1.2253663539886475, "learning_rate": 9.999988150935531e-05, "loss": 0.5944, "step": 6080 }, { "epoch": 1.5074257425742574, "grad_norm": 1.3833445310592651, "learning_rate": 9.999984961315545e-05, "loss": 0.5894, "step": 6090 }, { "epoch": 1.50990099009901, "grad_norm": 1.3452733755111694, "learning_rate": 9.999981391979699e-05, "loss": 0.5961, "step": 6100 }, { "epoch": 1.5123762376237624, "grad_norm": 1.343449354171753, "learning_rate": 9.999977442928263e-05, "loss": 0.5912, "step": 6110 }, { "epoch": 1.5148514851485149, "grad_norm": 1.3065035343170166, "learning_rate": 9.999973114161537e-05, "loss": 0.5937, "step": 6120 }, { "epoch": 1.5173267326732673, "grad_norm": 1.1774924993515015, "learning_rate": 9.99996840567985e-05, "loss": 0.5884, "step": 6130 }, { "epoch": 1.5198019801980198, "grad_norm": 1.3330329656600952, "learning_rate": 9.999963317483559e-05, "loss": 0.59, "step": 6140 }, { "epoch": 1.5222772277227723, "grad_norm": 1.296000599861145, "learning_rate": 9.999957849573052e-05, "loss": 0.5913, "step": 6150 }, { "epoch": 1.5247524752475248, "grad_norm": 1.2796049118041992, "learning_rate": 9.999952001948742e-05, "loss": 0.5933, "step": 6160 }, { "epoch": 1.5272277227722773, "grad_norm": 1.2525136470794678, "learning_rate": 9.999945774611075e-05, "loss": 0.5933, "step": 6170 }, { "epoch": 1.5297029702970297, "grad_norm": 1.2475361824035645, "learning_rate": 9.999939167560524e-05, "loss": 0.5972, "step": 6180 }, { "epoch": 1.5321782178217822, "grad_norm": 1.2961610555648804, "learning_rate": 9.999932180797589e-05, "loss": 0.5879, "step": 6190 }, { "epoch": 1.5346534653465347, "grad_norm": 1.2884050607681274, "learning_rate": 9.999924814322801e-05, "loss": 0.5914, "step": 6200 }, { "epoch": 1.5371287128712872, "grad_norm": 1.2791858911514282, "learning_rate": 9.999917068136722e-05, "loss": 0.5921, "step": 6210 }, { "epoch": 1.5396039603960396, "grad_norm": 1.1633756160736084, "learning_rate": 9.999908942239937e-05, "loss": 0.5869, "step": 6220 }, { "epoch": 1.5420792079207921, "grad_norm": 1.2578169107437134, "learning_rate": 9.999900436633062e-05, "loss": 0.5909, "step": 6230 }, { "epoch": 1.5445544554455446, "grad_norm": 1.2752809524536133, "learning_rate": 9.999891551316749e-05, "loss": 0.5902, "step": 6240 }, { "epoch": 1.547029702970297, "grad_norm": 1.325258493423462, "learning_rate": 9.999882286291668e-05, "loss": 0.5864, "step": 6250 }, { "epoch": 1.5495049504950495, "grad_norm": 1.1826446056365967, "learning_rate": 9.999872641558524e-05, "loss": 0.5907, "step": 6260 }, { "epoch": 1.551980198019802, "grad_norm": 1.295058250427246, "learning_rate": 9.999862617118047e-05, "loss": 0.5888, "step": 6270 }, { "epoch": 1.5544554455445545, "grad_norm": 1.1951642036437988, "learning_rate": 9.999852212971002e-05, "loss": 0.5934, "step": 6280 }, { "epoch": 1.556930693069307, "grad_norm": 1.204977035522461, "learning_rate": 9.999841429118178e-05, "loss": 0.5928, "step": 6290 }, { "epoch": 1.5594059405940595, "grad_norm": 1.1945230960845947, "learning_rate": 9.999830265560392e-05, "loss": 0.5891, "step": 6300 }, { "epoch": 1.561881188118812, "grad_norm": 1.2144532203674316, "learning_rate": 9.999818722298495e-05, "loss": 0.5921, "step": 6310 }, { "epoch": 1.5643564356435644, "grad_norm": 1.15304696559906, "learning_rate": 9.99980679933336e-05, "loss": 0.5822, "step": 6320 }, { "epoch": 1.5668316831683167, "grad_norm": 1.1695317029953003, "learning_rate": 9.999794496665897e-05, "loss": 0.5867, "step": 6330 }, { "epoch": 1.5693069306930694, "grad_norm": 1.285791039466858, "learning_rate": 9.999781814297036e-05, "loss": 0.5884, "step": 6340 }, { "epoch": 1.5717821782178216, "grad_norm": 1.2972203493118286, "learning_rate": 9.999768752227743e-05, "loss": 0.5911, "step": 6350 }, { "epoch": 1.5742574257425743, "grad_norm": 1.1600912809371948, "learning_rate": 9.99975531045901e-05, "loss": 0.5943, "step": 6360 }, { "epoch": 1.5767326732673266, "grad_norm": 1.2202757596969604, "learning_rate": 9.999741488991853e-05, "loss": 0.5843, "step": 6370 }, { "epoch": 1.5792079207920793, "grad_norm": 1.2298775911331177, "learning_rate": 9.999727287827328e-05, "loss": 0.5866, "step": 6380 }, { "epoch": 1.5816831683168315, "grad_norm": 1.2620282173156738, "learning_rate": 9.999712706966509e-05, "loss": 0.5906, "step": 6390 }, { "epoch": 1.5841584158415842, "grad_norm": 1.2209243774414062, "learning_rate": 9.999697746410508e-05, "loss": 0.5865, "step": 6400 }, { "epoch": 1.5866336633663365, "grad_norm": 1.1858364343643188, "learning_rate": 9.999682406160455e-05, "loss": 0.5859, "step": 6410 }, { "epoch": 1.5891089108910892, "grad_norm": 1.3028243780136108, "learning_rate": 9.999666686217522e-05, "loss": 0.586, "step": 6420 }, { "epoch": 1.5915841584158414, "grad_norm": 1.2150377035140991, "learning_rate": 9.999650586582896e-05, "loss": 0.5883, "step": 6430 }, { "epoch": 1.5940594059405941, "grad_norm": 1.206921935081482, "learning_rate": 9.999634107257804e-05, "loss": 0.5819, "step": 6440 }, { "epoch": 1.5965346534653464, "grad_norm": 1.1420509815216064, "learning_rate": 9.999617248243496e-05, "loss": 0.5867, "step": 6450 }, { "epoch": 1.599009900990099, "grad_norm": 1.2084287405014038, "learning_rate": 9.999600009541252e-05, "loss": 0.5829, "step": 6460 }, { "epoch": 1.6014851485148514, "grad_norm": 1.2134138345718384, "learning_rate": 9.999582391152383e-05, "loss": 0.5795, "step": 6470 }, { "epoch": 1.603960396039604, "grad_norm": 1.1697007417678833, "learning_rate": 9.999564393078225e-05, "loss": 0.5852, "step": 6480 }, { "epoch": 1.6064356435643563, "grad_norm": 1.1970182657241821, "learning_rate": 9.999546015320145e-05, "loss": 0.5864, "step": 6490 }, { "epoch": 1.608910891089109, "grad_norm": 1.1668332815170288, "learning_rate": 9.999527257879541e-05, "loss": 0.5823, "step": 6500 }, { "epoch": 1.6113861386138613, "grad_norm": 1.1837056875228882, "learning_rate": 9.999508120757835e-05, "loss": 0.5889, "step": 6510 }, { "epoch": 1.613861386138614, "grad_norm": 1.1295278072357178, "learning_rate": 9.999488603956479e-05, "loss": 0.5909, "step": 6520 }, { "epoch": 1.6163366336633662, "grad_norm": 1.3270241022109985, "learning_rate": 9.99946870747696e-05, "loss": 0.5798, "step": 6530 }, { "epoch": 1.618811881188119, "grad_norm": 1.1784683465957642, "learning_rate": 9.999448431320786e-05, "loss": 0.5804, "step": 6540 }, { "epoch": 1.6212871287128712, "grad_norm": 1.1735986471176147, "learning_rate": 9.999427775489496e-05, "loss": 0.5849, "step": 6550 }, { "epoch": 1.6237623762376239, "grad_norm": 1.2937771081924438, "learning_rate": 9.99940673998466e-05, "loss": 0.5832, "step": 6560 }, { "epoch": 1.6262376237623761, "grad_norm": 1.2554686069488525, "learning_rate": 9.999385324807878e-05, "loss": 0.5785, "step": 6570 }, { "epoch": 1.6287128712871288, "grad_norm": 1.2449456453323364, "learning_rate": 9.99936352996077e-05, "loss": 0.5763, "step": 6580 }, { "epoch": 1.631188118811881, "grad_norm": 1.1425971984863281, "learning_rate": 9.999341355444995e-05, "loss": 0.585, "step": 6590 }, { "epoch": 1.6336633663366338, "grad_norm": 1.1228744983673096, "learning_rate": 9.99931880126224e-05, "loss": 0.5839, "step": 6600 }, { "epoch": 1.636138613861386, "grad_norm": 1.244328498840332, "learning_rate": 9.999295867414212e-05, "loss": 0.5828, "step": 6610 }, { "epoch": 1.6386138613861387, "grad_norm": 1.1428041458129883, "learning_rate": 9.999272553902656e-05, "loss": 0.5853, "step": 6620 }, { "epoch": 1.641089108910891, "grad_norm": 1.1896331310272217, "learning_rate": 9.999248860729342e-05, "loss": 0.5857, "step": 6630 }, { "epoch": 1.6435643564356437, "grad_norm": 1.2453763484954834, "learning_rate": 9.999224787896069e-05, "loss": 0.579, "step": 6640 }, { "epoch": 1.646039603960396, "grad_norm": 1.1755818128585815, "learning_rate": 9.999200335404666e-05, "loss": 0.5873, "step": 6650 }, { "epoch": 1.6485148514851486, "grad_norm": 1.0955256223678589, "learning_rate": 9.999175503256988e-05, "loss": 0.5832, "step": 6660 }, { "epoch": 1.650990099009901, "grad_norm": 1.094787836074829, "learning_rate": 9.999150291454924e-05, "loss": 0.5842, "step": 6670 }, { "epoch": 1.6534653465346536, "grad_norm": 1.0936529636383057, "learning_rate": 9.999124700000386e-05, "loss": 0.5766, "step": 6680 }, { "epoch": 1.6559405940594059, "grad_norm": 1.0841530561447144, "learning_rate": 9.999098728895317e-05, "loss": 0.5831, "step": 6690 }, { "epoch": 1.6584158415841586, "grad_norm": 1.2499017715454102, "learning_rate": 9.999072378141694e-05, "loss": 0.5767, "step": 6700 }, { "epoch": 1.6608910891089108, "grad_norm": 1.1823770999908447, "learning_rate": 9.999045647741512e-05, "loss": 0.5844, "step": 6710 }, { "epoch": 1.6633663366336635, "grad_norm": 1.1520519256591797, "learning_rate": 9.999018537696804e-05, "loss": 0.5803, "step": 6720 }, { "epoch": 1.6658415841584158, "grad_norm": 1.2111445665359497, "learning_rate": 9.99899104800963e-05, "loss": 0.5814, "step": 6730 }, { "epoch": 1.6683168316831685, "grad_norm": 1.1906375885009766, "learning_rate": 9.998963178682077e-05, "loss": 0.5749, "step": 6740 }, { "epoch": 1.6707920792079207, "grad_norm": 1.2272475957870483, "learning_rate": 9.99893492971626e-05, "loss": 0.5741, "step": 6750 }, { "epoch": 1.6732673267326734, "grad_norm": 1.0902884006500244, "learning_rate": 9.998906301114324e-05, "loss": 0.5813, "step": 6760 }, { "epoch": 1.6757425742574257, "grad_norm": 1.1913987398147583, "learning_rate": 9.998877292878446e-05, "loss": 0.5744, "step": 6770 }, { "epoch": 1.6782178217821784, "grad_norm": 1.1092631816864014, "learning_rate": 9.998847905010826e-05, "loss": 0.5771, "step": 6780 }, { "epoch": 1.6806930693069306, "grad_norm": 1.1795631647109985, "learning_rate": 9.998818137513697e-05, "loss": 0.572, "step": 6790 }, { "epoch": 1.6831683168316833, "grad_norm": 1.0798970460891724, "learning_rate": 9.998787990389323e-05, "loss": 0.586, "step": 6800 }, { "epoch": 1.6856435643564356, "grad_norm": 1.1576365232467651, "learning_rate": 9.998757463639987e-05, "loss": 0.5826, "step": 6810 }, { "epoch": 1.688118811881188, "grad_norm": 1.0900828838348389, "learning_rate": 9.998726557268012e-05, "loss": 0.5774, "step": 6820 }, { "epoch": 1.6905940594059405, "grad_norm": 1.2810553312301636, "learning_rate": 9.998695271275741e-05, "loss": 0.5791, "step": 6830 }, { "epoch": 1.693069306930693, "grad_norm": 1.172226905822754, "learning_rate": 9.998663605665556e-05, "loss": 0.5751, "step": 6840 }, { "epoch": 1.6955445544554455, "grad_norm": 1.102444052696228, "learning_rate": 9.998631560439857e-05, "loss": 0.5742, "step": 6850 }, { "epoch": 1.698019801980198, "grad_norm": 1.105329990386963, "learning_rate": 9.998599135601079e-05, "loss": 0.5735, "step": 6860 }, { "epoch": 1.7004950495049505, "grad_norm": 1.0888139009475708, "learning_rate": 9.998566331151685e-05, "loss": 0.5735, "step": 6870 }, { "epoch": 1.702970297029703, "grad_norm": 1.1137239933013916, "learning_rate": 9.998533147094166e-05, "loss": 0.5761, "step": 6880 }, { "epoch": 1.7054455445544554, "grad_norm": 1.0851613283157349, "learning_rate": 9.998499583431042e-05, "loss": 0.5727, "step": 6890 }, { "epoch": 1.7079207920792079, "grad_norm": 1.084807276725769, "learning_rate": 9.998465640164861e-05, "loss": 0.5735, "step": 6900 }, { "epoch": 1.7103960396039604, "grad_norm": 1.1454482078552246, "learning_rate": 9.998431317298202e-05, "loss": 0.5736, "step": 6910 }, { "epoch": 1.7128712871287128, "grad_norm": 1.1863515377044678, "learning_rate": 9.998396614833672e-05, "loss": 0.5711, "step": 6920 }, { "epoch": 1.7153465346534653, "grad_norm": 1.093381643295288, "learning_rate": 9.998361532773906e-05, "loss": 0.5729, "step": 6930 }, { "epoch": 1.7178217821782178, "grad_norm": 1.2075363397598267, "learning_rate": 9.998326071121566e-05, "loss": 0.5742, "step": 6940 }, { "epoch": 1.7202970297029703, "grad_norm": 1.1424221992492676, "learning_rate": 9.998290229879348e-05, "loss": 0.5754, "step": 6950 }, { "epoch": 1.7227722772277227, "grad_norm": 1.10857093334198, "learning_rate": 9.998254009049973e-05, "loss": 0.5749, "step": 6960 }, { "epoch": 1.7252475247524752, "grad_norm": 1.1486802101135254, "learning_rate": 9.99821740863619e-05, "loss": 0.5762, "step": 6970 }, { "epoch": 1.7277227722772277, "grad_norm": 1.2033345699310303, "learning_rate": 9.998180428640782e-05, "loss": 0.5678, "step": 6980 }, { "epoch": 1.7301980198019802, "grad_norm": 1.1708921194076538, "learning_rate": 9.998143069066555e-05, "loss": 0.5726, "step": 6990 }, { "epoch": 1.7326732673267327, "grad_norm": 1.162221908569336, "learning_rate": 9.998105329916348e-05, "loss": 0.5741, "step": 7000 }, { "epoch": 1.7351485148514851, "grad_norm": 1.0424407720565796, "learning_rate": 9.998067211193024e-05, "loss": 0.575, "step": 7010 }, { "epoch": 1.7376237623762376, "grad_norm": 1.132449984550476, "learning_rate": 9.99802871289948e-05, "loss": 0.5786, "step": 7020 }, { "epoch": 1.74009900990099, "grad_norm": 1.0770530700683594, "learning_rate": 9.99798983503864e-05, "loss": 0.5717, "step": 7030 }, { "epoch": 1.7425742574257426, "grad_norm": 1.0912792682647705, "learning_rate": 9.997950577613456e-05, "loss": 0.5797, "step": 7040 }, { "epoch": 1.745049504950495, "grad_norm": 1.0983744859695435, "learning_rate": 9.997910940626909e-05, "loss": 0.5688, "step": 7050 }, { "epoch": 1.7475247524752475, "grad_norm": 1.1116245985031128, "learning_rate": 9.99787092408201e-05, "loss": 0.5714, "step": 7060 }, { "epoch": 1.75, "grad_norm": 1.151600956916809, "learning_rate": 9.997830527981796e-05, "loss": 0.5783, "step": 7070 }, { "epoch": 1.7524752475247525, "grad_norm": 1.095258116722107, "learning_rate": 9.997789752329337e-05, "loss": 0.5722, "step": 7080 }, { "epoch": 1.754950495049505, "grad_norm": 1.1651055812835693, "learning_rate": 9.997748597127728e-05, "loss": 0.5791, "step": 7090 }, { "epoch": 1.7574257425742574, "grad_norm": 1.234158992767334, "learning_rate": 9.997707062380096e-05, "loss": 0.5687, "step": 7100 }, { "epoch": 1.75990099009901, "grad_norm": 1.0768558979034424, "learning_rate": 9.997665148089594e-05, "loss": 0.5775, "step": 7110 }, { "epoch": 1.7623762376237624, "grad_norm": 1.0965163707733154, "learning_rate": 9.997622854259408e-05, "loss": 0.5769, "step": 7120 }, { "epoch": 1.7648514851485149, "grad_norm": 1.1270995140075684, "learning_rate": 9.997580180892744e-05, "loss": 0.5706, "step": 7130 }, { "epoch": 1.7673267326732673, "grad_norm": 1.1439129114151, "learning_rate": 9.99753712799285e-05, "loss": 0.5718, "step": 7140 }, { "epoch": 1.7698019801980198, "grad_norm": 1.0496652126312256, "learning_rate": 9.99749369556299e-05, "loss": 0.5696, "step": 7150 }, { "epoch": 1.7722772277227723, "grad_norm": 1.0811141729354858, "learning_rate": 9.997449883606466e-05, "loss": 0.5704, "step": 7160 }, { "epoch": 1.7747524752475248, "grad_norm": 1.1209789514541626, "learning_rate": 9.997405692126603e-05, "loss": 0.5742, "step": 7170 }, { "epoch": 1.7772277227722773, "grad_norm": 1.1894720792770386, "learning_rate": 9.997361121126757e-05, "loss": 0.5652, "step": 7180 }, { "epoch": 1.7797029702970297, "grad_norm": 1.1879469156265259, "learning_rate": 9.997316170610314e-05, "loss": 0.5674, "step": 7190 }, { "epoch": 1.7821782178217822, "grad_norm": 1.096873164176941, "learning_rate": 9.997270840580686e-05, "loss": 0.5706, "step": 7200 }, { "epoch": 1.7846534653465347, "grad_norm": 1.1547988653182983, "learning_rate": 9.997225131041318e-05, "loss": 0.5707, "step": 7210 }, { "epoch": 1.7871287128712872, "grad_norm": 1.003650426864624, "learning_rate": 9.99717904199568e-05, "loss": 0.5777, "step": 7220 }, { "epoch": 1.7896039603960396, "grad_norm": 1.0272986888885498, "learning_rate": 9.997132573447272e-05, "loss": 0.568, "step": 7230 }, { "epoch": 1.7920792079207921, "grad_norm": 1.050298810005188, "learning_rate": 9.997085725399624e-05, "loss": 0.5695, "step": 7240 }, { "epoch": 1.7945544554455446, "grad_norm": 1.1352252960205078, "learning_rate": 9.997038497856292e-05, "loss": 0.5681, "step": 7250 }, { "epoch": 1.797029702970297, "grad_norm": 1.0821064710617065, "learning_rate": 9.996990890820864e-05, "loss": 0.5706, "step": 7260 }, { "epoch": 1.7995049504950495, "grad_norm": 1.0381243228912354, "learning_rate": 9.996942904296956e-05, "loss": 0.5709, "step": 7270 }, { "epoch": 1.801980198019802, "grad_norm": 1.0580945014953613, "learning_rate": 9.99689453828821e-05, "loss": 0.5685, "step": 7280 }, { "epoch": 1.8044554455445545, "grad_norm": 1.1007764339447021, "learning_rate": 9.996845792798301e-05, "loss": 0.5672, "step": 7290 }, { "epoch": 1.806930693069307, "grad_norm": 1.213291049003601, "learning_rate": 9.996796667830931e-05, "loss": 0.5656, "step": 7300 }, { "epoch": 1.8094059405940595, "grad_norm": 1.0904279947280884, "learning_rate": 9.99674716338983e-05, "loss": 0.5624, "step": 7310 }, { "epoch": 1.811881188118812, "grad_norm": 1.089015245437622, "learning_rate": 9.996697279478757e-05, "loss": 0.5703, "step": 7320 }, { "epoch": 1.8143564356435644, "grad_norm": 1.0983028411865234, "learning_rate": 9.996647016101502e-05, "loss": 0.5711, "step": 7330 }, { "epoch": 1.8168316831683167, "grad_norm": 1.0710829496383667, "learning_rate": 9.99659637326188e-05, "loss": 0.5668, "step": 7340 }, { "epoch": 1.8193069306930694, "grad_norm": 1.0246237516403198, "learning_rate": 9.996545350963738e-05, "loss": 0.5672, "step": 7350 }, { "epoch": 1.8217821782178216, "grad_norm": 1.0598158836364746, "learning_rate": 9.99649394921095e-05, "loss": 0.5653, "step": 7360 }, { "epoch": 1.8242574257425743, "grad_norm": 1.0569696426391602, "learning_rate": 9.996442168007422e-05, "loss": 0.5748, "step": 7370 }, { "epoch": 1.8267326732673266, "grad_norm": 1.0710878372192383, "learning_rate": 9.996390007357086e-05, "loss": 0.5706, "step": 7380 }, { "epoch": 1.8292079207920793, "grad_norm": 1.1050162315368652, "learning_rate": 9.996337467263902e-05, "loss": 0.569, "step": 7390 }, { "epoch": 1.8316831683168315, "grad_norm": 1.0832582712173462, "learning_rate": 9.996284547731858e-05, "loss": 0.5719, "step": 7400 }, { "epoch": 1.8341584158415842, "grad_norm": 1.003187656402588, "learning_rate": 9.996231248764976e-05, "loss": 0.5707, "step": 7410 }, { "epoch": 1.8366336633663365, "grad_norm": 1.1257885694503784, "learning_rate": 9.996177570367305e-05, "loss": 0.5756, "step": 7420 }, { "epoch": 1.8391089108910892, "grad_norm": 1.0969077348709106, "learning_rate": 9.996123512542916e-05, "loss": 0.5627, "step": 7430 }, { "epoch": 1.8415841584158414, "grad_norm": 1.056215524673462, "learning_rate": 9.99606907529592e-05, "loss": 0.5702, "step": 7440 }, { "epoch": 1.8440594059405941, "grad_norm": 1.0239588022232056, "learning_rate": 9.996014258630448e-05, "loss": 0.5683, "step": 7450 }, { "epoch": 1.8465346534653464, "grad_norm": 0.9801771640777588, "learning_rate": 9.995959062550666e-05, "loss": 0.5678, "step": 7460 }, { "epoch": 1.849009900990099, "grad_norm": 1.079970121383667, "learning_rate": 9.995903487060762e-05, "loss": 0.5716, "step": 7470 }, { "epoch": 1.8514851485148514, "grad_norm": 1.109399437904358, "learning_rate": 9.995847532164957e-05, "loss": 0.5675, "step": 7480 }, { "epoch": 1.853960396039604, "grad_norm": 1.1351181268692017, "learning_rate": 9.995791197867504e-05, "loss": 0.5694, "step": 7490 }, { "epoch": 1.8564356435643563, "grad_norm": 1.1019301414489746, "learning_rate": 9.995734484172678e-05, "loss": 0.5698, "step": 7500 }, { "epoch": 1.858910891089109, "grad_norm": 1.072972297668457, "learning_rate": 9.995677391084787e-05, "loss": 0.5673, "step": 7510 }, { "epoch": 1.8613861386138613, "grad_norm": 1.029335618019104, "learning_rate": 9.995619918608167e-05, "loss": 0.5615, "step": 7520 }, { "epoch": 1.863861386138614, "grad_norm": 1.0979657173156738, "learning_rate": 9.995562066747182e-05, "loss": 0.5616, "step": 7530 }, { "epoch": 1.8663366336633662, "grad_norm": 1.0617252588272095, "learning_rate": 9.995503835506226e-05, "loss": 0.5715, "step": 7540 }, { "epoch": 1.868811881188119, "grad_norm": 0.9605892896652222, "learning_rate": 9.995445224889721e-05, "loss": 0.5664, "step": 7550 }, { "epoch": 1.8712871287128712, "grad_norm": 1.0452322959899902, "learning_rate": 9.995386234902118e-05, "loss": 0.5634, "step": 7560 }, { "epoch": 1.8737623762376239, "grad_norm": 1.1008460521697998, "learning_rate": 9.995326865547898e-05, "loss": 0.5656, "step": 7570 }, { "epoch": 1.8762376237623761, "grad_norm": 1.1224753856658936, "learning_rate": 9.995267116831568e-05, "loss": 0.5572, "step": 7580 }, { "epoch": 1.8787128712871288, "grad_norm": 1.0780552625656128, "learning_rate": 9.995206988757666e-05, "loss": 0.5639, "step": 7590 }, { "epoch": 1.881188118811881, "grad_norm": 1.0960341691970825, "learning_rate": 9.99514648133076e-05, "loss": 0.5614, "step": 7600 }, { "epoch": 1.8836633663366338, "grad_norm": 1.0455305576324463, "learning_rate": 9.995085594555443e-05, "loss": 0.5586, "step": 7610 }, { "epoch": 1.886138613861386, "grad_norm": 1.0186258554458618, "learning_rate": 9.99502432843634e-05, "loss": 0.5619, "step": 7620 }, { "epoch": 1.8886138613861387, "grad_norm": 1.1060503721237183, "learning_rate": 9.994962682978103e-05, "loss": 0.5615, "step": 7630 }, { "epoch": 1.891089108910891, "grad_norm": 1.066631555557251, "learning_rate": 9.994900658185415e-05, "loss": 0.5598, "step": 7640 }, { "epoch": 1.8935643564356437, "grad_norm": 1.137848138809204, "learning_rate": 9.994838254062984e-05, "loss": 0.5619, "step": 7650 }, { "epoch": 1.896039603960396, "grad_norm": 1.0841819047927856, "learning_rate": 9.994775470615553e-05, "loss": 0.5619, "step": 7660 }, { "epoch": 1.8985148514851486, "grad_norm": 1.0107554197311401, "learning_rate": 9.994712307847887e-05, "loss": 0.5644, "step": 7670 }, { "epoch": 1.900990099009901, "grad_norm": 1.113227367401123, "learning_rate": 9.994648765764782e-05, "loss": 0.5671, "step": 7680 }, { "epoch": 1.9034653465346536, "grad_norm": 1.1107473373413086, "learning_rate": 9.994584844371067e-05, "loss": 0.5632, "step": 7690 }, { "epoch": 1.9059405940594059, "grad_norm": 1.027385950088501, "learning_rate": 9.994520543671593e-05, "loss": 0.5692, "step": 7700 }, { "epoch": 1.9084158415841586, "grad_norm": 1.0513461828231812, "learning_rate": 9.994455863671244e-05, "loss": 0.5675, "step": 7710 }, { "epoch": 1.9108910891089108, "grad_norm": 0.9821844100952148, "learning_rate": 9.994390804374934e-05, "loss": 0.5645, "step": 7720 }, { "epoch": 1.9133663366336635, "grad_norm": 1.021871566772461, "learning_rate": 9.994325365787602e-05, "loss": 0.5623, "step": 7730 }, { "epoch": 1.9158415841584158, "grad_norm": 1.0750788450241089, "learning_rate": 9.994259547914219e-05, "loss": 0.5649, "step": 7740 }, { "epoch": 1.9183168316831685, "grad_norm": 1.042016625404358, "learning_rate": 9.994193350759781e-05, "loss": 0.5692, "step": 7750 }, { "epoch": 1.9207920792079207, "grad_norm": 1.0868046283721924, "learning_rate": 9.994126774329319e-05, "loss": 0.5598, "step": 7760 }, { "epoch": 1.9232673267326734, "grad_norm": 1.0302661657333374, "learning_rate": 9.994059818627885e-05, "loss": 0.5627, "step": 7770 }, { "epoch": 1.9257425742574257, "grad_norm": 1.0593953132629395, "learning_rate": 9.993992483660567e-05, "loss": 0.5648, "step": 7780 }, { "epoch": 1.9282178217821784, "grad_norm": 1.0700217485427856, "learning_rate": 9.993924769432476e-05, "loss": 0.5631, "step": 7790 }, { "epoch": 1.9306930693069306, "grad_norm": 1.0674422979354858, "learning_rate": 9.993856675948757e-05, "loss": 0.5599, "step": 7800 }, { "epoch": 1.9331683168316833, "grad_norm": 1.0239421129226685, "learning_rate": 9.99378820321458e-05, "loss": 0.5632, "step": 7810 }, { "epoch": 1.9356435643564356, "grad_norm": 1.0719354152679443, "learning_rate": 9.993719351235145e-05, "loss": 0.5633, "step": 7820 }, { "epoch": 1.938118811881188, "grad_norm": 1.0294859409332275, "learning_rate": 9.99365012001568e-05, "loss": 0.5675, "step": 7830 }, { "epoch": 1.9405940594059405, "grad_norm": 1.047227382659912, "learning_rate": 9.993580509561444e-05, "loss": 0.5553, "step": 7840 }, { "epoch": 1.943069306930693, "grad_norm": 1.0286266803741455, "learning_rate": 9.993510519877723e-05, "loss": 0.5637, "step": 7850 }, { "epoch": 1.9455445544554455, "grad_norm": 1.0311840772628784, "learning_rate": 9.993440150969833e-05, "loss": 0.5605, "step": 7860 }, { "epoch": 1.948019801980198, "grad_norm": 1.0242570638656616, "learning_rate": 9.993369402843118e-05, "loss": 0.5605, "step": 7870 }, { "epoch": 1.9504950495049505, "grad_norm": 1.158584713935852, "learning_rate": 9.993298275502949e-05, "loss": 0.5602, "step": 7880 }, { "epoch": 1.952970297029703, "grad_norm": 0.9804286956787109, "learning_rate": 9.993226768954729e-05, "loss": 0.5614, "step": 7890 }, { "epoch": 1.9554455445544554, "grad_norm": 1.0899159908294678, "learning_rate": 9.993154883203887e-05, "loss": 0.5608, "step": 7900 }, { "epoch": 1.9579207920792079, "grad_norm": 1.0121886730194092, "learning_rate": 9.993082618255886e-05, "loss": 0.5623, "step": 7910 }, { "epoch": 1.9603960396039604, "grad_norm": 1.0008187294006348, "learning_rate": 9.993009974116211e-05, "loss": 0.5573, "step": 7920 }, { "epoch": 1.9628712871287128, "grad_norm": 0.956240713596344, "learning_rate": 9.992936950790379e-05, "loss": 0.5564, "step": 7930 }, { "epoch": 1.9653465346534653, "grad_norm": 1.0684715509414673, "learning_rate": 9.992863548283936e-05, "loss": 0.5642, "step": 7940 }, { "epoch": 1.9678217821782178, "grad_norm": 1.1166421175003052, "learning_rate": 9.992789766602457e-05, "loss": 0.5591, "step": 7950 }, { "epoch": 1.9702970297029703, "grad_norm": 1.084633231163025, "learning_rate": 9.992715605751544e-05, "loss": 0.5522, "step": 7960 }, { "epoch": 1.9727722772277227, "grad_norm": 1.0592410564422607, "learning_rate": 9.99264106573683e-05, "loss": 0.5601, "step": 7970 }, { "epoch": 1.9752475247524752, "grad_norm": 1.1404333114624023, "learning_rate": 9.992566146563977e-05, "loss": 0.564, "step": 7980 }, { "epoch": 1.9777227722772277, "grad_norm": 0.9832201600074768, "learning_rate": 9.992490848238672e-05, "loss": 0.5573, "step": 7990 }, { "epoch": 1.9801980198019802, "grad_norm": 1.000122308731079, "learning_rate": 9.992415170766634e-05, "loss": 0.5537, "step": 8000 }, { "epoch": 1.9826732673267327, "grad_norm": 1.01481032371521, "learning_rate": 9.992339114153612e-05, "loss": 0.5575, "step": 8010 }, { "epoch": 1.9851485148514851, "grad_norm": 1.1068933010101318, "learning_rate": 9.992262678405381e-05, "loss": 0.5604, "step": 8020 }, { "epoch": 1.9876237623762376, "grad_norm": 1.1125822067260742, "learning_rate": 9.992185863527746e-05, "loss": 0.5588, "step": 8030 }, { "epoch": 1.99009900990099, "grad_norm": 1.0788670778274536, "learning_rate": 9.992108669526538e-05, "loss": 0.5632, "step": 8040 }, { "epoch": 1.9925742574257426, "grad_norm": 0.9620088934898376, "learning_rate": 9.992031096407623e-05, "loss": 0.5495, "step": 8050 }, { "epoch": 1.995049504950495, "grad_norm": 0.9989855289459229, "learning_rate": 9.99195314417689e-05, "loss": 0.5569, "step": 8060 }, { "epoch": 1.9975247524752475, "grad_norm": 1.0243645906448364, "learning_rate": 9.991874812840261e-05, "loss": 0.5611, "step": 8070 }, { "epoch": 2.0, "grad_norm": 1.0053071975708008, "learning_rate": 9.991796102403684e-05, "loss": 0.556, "step": 8080 }, { "epoch": 2.0024752475247523, "grad_norm": 1.1227818727493286, "learning_rate": 9.991717012873135e-05, "loss": 0.5581, "step": 8090 }, { "epoch": 2.004950495049505, "grad_norm": 1.0639930963516235, "learning_rate": 9.99163754425462e-05, "loss": 0.555, "step": 8100 }, { "epoch": 2.007425742574257, "grad_norm": 1.0066819190979004, "learning_rate": 9.991557696554177e-05, "loss": 0.5599, "step": 8110 }, { "epoch": 2.00990099009901, "grad_norm": 0.9826246500015259, "learning_rate": 9.991477469777868e-05, "loss": 0.549, "step": 8120 }, { "epoch": 2.012376237623762, "grad_norm": 1.0151300430297852, "learning_rate": 9.991396863931786e-05, "loss": 0.5541, "step": 8130 }, { "epoch": 2.014851485148515, "grad_norm": 0.9546704292297363, "learning_rate": 9.991315879022055e-05, "loss": 0.5501, "step": 8140 }, { "epoch": 2.017326732673267, "grad_norm": 0.9961840510368347, "learning_rate": 9.991234515054819e-05, "loss": 0.5579, "step": 8150 }, { "epoch": 2.01980198019802, "grad_norm": 1.05398690700531, "learning_rate": 9.991152772036263e-05, "loss": 0.5566, "step": 8160 }, { "epoch": 2.022277227722772, "grad_norm": 1.0547617673873901, "learning_rate": 9.991070649972592e-05, "loss": 0.5576, "step": 8170 }, { "epoch": 2.0247524752475248, "grad_norm": 1.0549086332321167, "learning_rate": 9.990988148870043e-05, "loss": 0.5523, "step": 8180 }, { "epoch": 2.027227722772277, "grad_norm": 1.0511468648910522, "learning_rate": 9.990905268734882e-05, "loss": 0.568, "step": 8190 }, { "epoch": 2.0297029702970297, "grad_norm": 0.9944787621498108, "learning_rate": 9.990822009573404e-05, "loss": 0.5608, "step": 8200 }, { "epoch": 2.032178217821782, "grad_norm": 1.0113109350204468, "learning_rate": 9.99073837139193e-05, "loss": 0.5539, "step": 8210 }, { "epoch": 2.0346534653465347, "grad_norm": 1.015494465827942, "learning_rate": 9.990654354196812e-05, "loss": 0.5538, "step": 8220 }, { "epoch": 2.037128712871287, "grad_norm": 0.9962309002876282, "learning_rate": 9.990569957994432e-05, "loss": 0.55, "step": 8230 }, { "epoch": 2.0396039603960396, "grad_norm": 1.0063525438308716, "learning_rate": 9.990485182791198e-05, "loss": 0.5566, "step": 8240 }, { "epoch": 2.042079207920792, "grad_norm": 1.028746485710144, "learning_rate": 9.99040002859355e-05, "loss": 0.5521, "step": 8250 }, { "epoch": 2.0445544554455446, "grad_norm": 1.0179413557052612, "learning_rate": 9.990314495407954e-05, "loss": 0.5568, "step": 8260 }, { "epoch": 2.047029702970297, "grad_norm": 1.0901250839233398, "learning_rate": 9.990228583240903e-05, "loss": 0.5579, "step": 8270 }, { "epoch": 2.0495049504950495, "grad_norm": 1.0074125528335571, "learning_rate": 9.990142292098925e-05, "loss": 0.5534, "step": 8280 }, { "epoch": 2.051980198019802, "grad_norm": 1.021348237991333, "learning_rate": 9.990055621988572e-05, "loss": 0.556, "step": 8290 }, { "epoch": 2.0544554455445545, "grad_norm": 1.0783380270004272, "learning_rate": 9.989968572916426e-05, "loss": 0.557, "step": 8300 }, { "epoch": 2.0569306930693068, "grad_norm": 0.9645823836326599, "learning_rate": 9.989881144889098e-05, "loss": 0.5526, "step": 8310 }, { "epoch": 2.0594059405940595, "grad_norm": 0.9948710203170776, "learning_rate": 9.989793337913227e-05, "loss": 0.5599, "step": 8320 }, { "epoch": 2.0618811881188117, "grad_norm": 1.160089373588562, "learning_rate": 9.989705151995482e-05, "loss": 0.5502, "step": 8330 }, { "epoch": 2.0643564356435644, "grad_norm": 1.0790077447891235, "learning_rate": 9.98961658714256e-05, "loss": 0.5569, "step": 8340 }, { "epoch": 2.0668316831683167, "grad_norm": 1.0884102582931519, "learning_rate": 9.989527643361186e-05, "loss": 0.5579, "step": 8350 }, { "epoch": 2.0693069306930694, "grad_norm": 0.9901095628738403, "learning_rate": 9.989438320658115e-05, "loss": 0.5596, "step": 8360 }, { "epoch": 2.0717821782178216, "grad_norm": 1.0390748977661133, "learning_rate": 9.989348619040133e-05, "loss": 0.5506, "step": 8370 }, { "epoch": 2.0742574257425743, "grad_norm": 1.101702332496643, "learning_rate": 9.98925853851405e-05, "loss": 0.5575, "step": 8380 }, { "epoch": 2.0767326732673266, "grad_norm": 1.008196473121643, "learning_rate": 9.989168079086706e-05, "loss": 0.5509, "step": 8390 }, { "epoch": 2.0792079207920793, "grad_norm": 1.058265209197998, "learning_rate": 9.989077240764973e-05, "loss": 0.5504, "step": 8400 }, { "epoch": 2.0816831683168315, "grad_norm": 1.0937691926956177, "learning_rate": 9.988986023555749e-05, "loss": 0.5483, "step": 8410 }, { "epoch": 2.0841584158415842, "grad_norm": 1.0641192197799683, "learning_rate": 9.98889442746596e-05, "loss": 0.5549, "step": 8420 }, { "epoch": 2.0866336633663365, "grad_norm": 1.0843329429626465, "learning_rate": 9.988802452502562e-05, "loss": 0.5516, "step": 8430 }, { "epoch": 2.089108910891089, "grad_norm": 1.0127904415130615, "learning_rate": 9.988710098672543e-05, "loss": 0.5571, "step": 8440 }, { "epoch": 2.0915841584158414, "grad_norm": 0.9335194230079651, "learning_rate": 9.988617365982912e-05, "loss": 0.5492, "step": 8450 }, { "epoch": 2.094059405940594, "grad_norm": 0.9963361620903015, "learning_rate": 9.988524254440716e-05, "loss": 0.5525, "step": 8460 }, { "epoch": 2.0965346534653464, "grad_norm": 1.015546202659607, "learning_rate": 9.988430764053025e-05, "loss": 0.5522, "step": 8470 }, { "epoch": 2.099009900990099, "grad_norm": 0.9956268668174744, "learning_rate": 9.988336894826938e-05, "loss": 0.556, "step": 8480 }, { "epoch": 2.1014851485148514, "grad_norm": 1.056719422340393, "learning_rate": 9.988242646769584e-05, "loss": 0.5521, "step": 8490 }, { "epoch": 2.103960396039604, "grad_norm": 0.9518352746963501, "learning_rate": 9.988148019888119e-05, "loss": 0.5572, "step": 8500 }, { "epoch": 2.1064356435643563, "grad_norm": 1.113493800163269, "learning_rate": 9.988053014189731e-05, "loss": 0.5532, "step": 8510 }, { "epoch": 2.108910891089109, "grad_norm": 0.9954710006713867, "learning_rate": 9.987957629681636e-05, "loss": 0.5509, "step": 8520 }, { "epoch": 2.1113861386138613, "grad_norm": 0.9513129591941833, "learning_rate": 9.987861866371076e-05, "loss": 0.5551, "step": 8530 }, { "epoch": 2.113861386138614, "grad_norm": 1.0486010313034058, "learning_rate": 9.987765724265325e-05, "loss": 0.5555, "step": 8540 }, { "epoch": 2.116336633663366, "grad_norm": 1.0186017751693726, "learning_rate": 9.987669203371683e-05, "loss": 0.5486, "step": 8550 }, { "epoch": 2.118811881188119, "grad_norm": 0.9533988237380981, "learning_rate": 9.987572303697482e-05, "loss": 0.5509, "step": 8560 }, { "epoch": 2.121287128712871, "grad_norm": 1.0690447092056274, "learning_rate": 9.987475025250078e-05, "loss": 0.55, "step": 8570 }, { "epoch": 2.123762376237624, "grad_norm": 1.0838220119476318, "learning_rate": 9.987377368036862e-05, "loss": 0.5508, "step": 8580 }, { "epoch": 2.126237623762376, "grad_norm": 0.9294942021369934, "learning_rate": 9.987279332065245e-05, "loss": 0.5505, "step": 8590 }, { "epoch": 2.128712871287129, "grad_norm": 1.0276210308074951, "learning_rate": 9.98718091734268e-05, "loss": 0.5565, "step": 8600 }, { "epoch": 2.131188118811881, "grad_norm": 0.9782671928405762, "learning_rate": 9.987082123876634e-05, "loss": 0.5501, "step": 8610 }, { "epoch": 2.133663366336634, "grad_norm": 0.9523621797561646, "learning_rate": 9.986982951674615e-05, "loss": 0.551, "step": 8620 }, { "epoch": 2.136138613861386, "grad_norm": 1.007997751235962, "learning_rate": 9.986883400744152e-05, "loss": 0.5533, "step": 8630 }, { "epoch": 2.1386138613861387, "grad_norm": 0.9973574876785278, "learning_rate": 9.986783471092803e-05, "loss": 0.5551, "step": 8640 }, { "epoch": 2.141089108910891, "grad_norm": 0.9788517951965332, "learning_rate": 9.986683162728162e-05, "loss": 0.5535, "step": 8650 }, { "epoch": 2.1435643564356437, "grad_norm": 0.9652581810951233, "learning_rate": 9.98658247565784e-05, "loss": 0.5513, "step": 8660 }, { "epoch": 2.146039603960396, "grad_norm": 0.9125329256057739, "learning_rate": 9.986481409889491e-05, "loss": 0.5562, "step": 8670 }, { "epoch": 2.1485148514851486, "grad_norm": 0.9723113775253296, "learning_rate": 9.986379965430786e-05, "loss": 0.5483, "step": 8680 }, { "epoch": 2.150990099009901, "grad_norm": 0.9930548667907715, "learning_rate": 9.986278142289429e-05, "loss": 0.5527, "step": 8690 }, { "epoch": 2.1534653465346536, "grad_norm": 0.9184022545814514, "learning_rate": 9.986175940473155e-05, "loss": 0.5486, "step": 8700 }, { "epoch": 2.155940594059406, "grad_norm": 1.068154215812683, "learning_rate": 9.986073359989724e-05, "loss": 0.5461, "step": 8710 }, { "epoch": 2.1584158415841586, "grad_norm": 1.030332088470459, "learning_rate": 9.985970400846927e-05, "loss": 0.5549, "step": 8720 }, { "epoch": 2.160891089108911, "grad_norm": 0.9349484443664551, "learning_rate": 9.985867063052581e-05, "loss": 0.5515, "step": 8730 }, { "epoch": 2.1633663366336635, "grad_norm": 0.956765353679657, "learning_rate": 9.985763346614536e-05, "loss": 0.5513, "step": 8740 }, { "epoch": 2.1658415841584158, "grad_norm": 1.0816562175750732, "learning_rate": 9.98565925154067e-05, "loss": 0.5491, "step": 8750 }, { "epoch": 2.1683168316831685, "grad_norm": 0.9916239976882935, "learning_rate": 9.985554777838884e-05, "loss": 0.55, "step": 8760 }, { "epoch": 2.1707920792079207, "grad_norm": 0.9543967247009277, "learning_rate": 9.985449925517114e-05, "loss": 0.5475, "step": 8770 }, { "epoch": 2.1732673267326734, "grad_norm": 0.957206666469574, "learning_rate": 9.985344694583326e-05, "loss": 0.5411, "step": 8780 }, { "epoch": 2.1757425742574257, "grad_norm": 1.0245091915130615, "learning_rate": 9.985239085045506e-05, "loss": 0.547, "step": 8790 }, { "epoch": 2.1782178217821784, "grad_norm": 1.0842853784561157, "learning_rate": 9.985133096911678e-05, "loss": 0.5442, "step": 8800 }, { "epoch": 2.1806930693069306, "grad_norm": 0.9504092931747437, "learning_rate": 9.98502673018989e-05, "loss": 0.5483, "step": 8810 }, { "epoch": 2.1831683168316833, "grad_norm": 0.9558846354484558, "learning_rate": 9.984919984888222e-05, "loss": 0.5405, "step": 8820 }, { "epoch": 2.1856435643564356, "grad_norm": 0.8993899822235107, "learning_rate": 9.984812861014776e-05, "loss": 0.5404, "step": 8830 }, { "epoch": 2.1881188118811883, "grad_norm": 1.0062966346740723, "learning_rate": 9.984705358577692e-05, "loss": 0.5434, "step": 8840 }, { "epoch": 2.1905940594059405, "grad_norm": 0.9527705311775208, "learning_rate": 9.984597477585132e-05, "loss": 0.5407, "step": 8850 }, { "epoch": 2.1930693069306932, "grad_norm": 0.9625396728515625, "learning_rate": 9.98448921804529e-05, "loss": 0.5475, "step": 8860 }, { "epoch": 2.1955445544554455, "grad_norm": 0.95316481590271, "learning_rate": 9.984380579966385e-05, "loss": 0.5543, "step": 8870 }, { "epoch": 2.198019801980198, "grad_norm": 1.0280135869979858, "learning_rate": 9.98427156335667e-05, "loss": 0.5456, "step": 8880 }, { "epoch": 2.2004950495049505, "grad_norm": 1.004449486732483, "learning_rate": 9.984162168224423e-05, "loss": 0.548, "step": 8890 }, { "epoch": 2.202970297029703, "grad_norm": 0.9315162897109985, "learning_rate": 9.984052394577951e-05, "loss": 0.5484, "step": 8900 }, { "epoch": 2.2054455445544554, "grad_norm": 0.8844666481018066, "learning_rate": 9.983942242425593e-05, "loss": 0.545, "step": 8910 }, { "epoch": 2.207920792079208, "grad_norm": 0.9429298043251038, "learning_rate": 9.98383171177571e-05, "loss": 0.5499, "step": 8920 }, { "epoch": 2.2103960396039604, "grad_norm": 0.9449836611747742, "learning_rate": 9.983720802636702e-05, "loss": 0.5478, "step": 8930 }, { "epoch": 2.212871287128713, "grad_norm": 1.0081214904785156, "learning_rate": 9.983609515016987e-05, "loss": 0.5505, "step": 8940 }, { "epoch": 2.2153465346534653, "grad_norm": 0.908290445804596, "learning_rate": 9.983497848925019e-05, "loss": 0.5434, "step": 8950 }, { "epoch": 2.217821782178218, "grad_norm": 0.9269936680793762, "learning_rate": 9.983385804369278e-05, "loss": 0.5416, "step": 8960 }, { "epoch": 2.2202970297029703, "grad_norm": 1.0275261402130127, "learning_rate": 9.983273381358271e-05, "loss": 0.5461, "step": 8970 }, { "epoch": 2.222772277227723, "grad_norm": 0.9413238763809204, "learning_rate": 9.983160579900539e-05, "loss": 0.5464, "step": 8980 }, { "epoch": 2.2252475247524752, "grad_norm": 0.9907044768333435, "learning_rate": 9.983047400004647e-05, "loss": 0.5466, "step": 8990 }, { "epoch": 2.227722772277228, "grad_norm": 1.0454546213150024, "learning_rate": 9.98293384167919e-05, "loss": 0.5417, "step": 9000 }, { "epoch": 2.23019801980198, "grad_norm": 0.996301531791687, "learning_rate": 9.982819904932792e-05, "loss": 0.5474, "step": 9010 }, { "epoch": 2.232673267326733, "grad_norm": 1.0359091758728027, "learning_rate": 9.982705589774106e-05, "loss": 0.5434, "step": 9020 }, { "epoch": 2.235148514851485, "grad_norm": 0.9503955245018005, "learning_rate": 9.982590896211813e-05, "loss": 0.5451, "step": 9030 }, { "epoch": 2.237623762376238, "grad_norm": 0.9478923678398132, "learning_rate": 9.982475824254625e-05, "loss": 0.5508, "step": 9040 }, { "epoch": 2.24009900990099, "grad_norm": 1.0055909156799316, "learning_rate": 9.982360373911278e-05, "loss": 0.5401, "step": 9050 }, { "epoch": 2.2425742574257423, "grad_norm": 1.0460855960845947, "learning_rate": 9.982244545190542e-05, "loss": 0.5444, "step": 9060 }, { "epoch": 2.245049504950495, "grad_norm": 0.9824306964874268, "learning_rate": 9.982128338101213e-05, "loss": 0.543, "step": 9070 }, { "epoch": 2.2475247524752477, "grad_norm": 0.9985560178756714, "learning_rate": 9.982011752652115e-05, "loss": 0.5516, "step": 9080 }, { "epoch": 2.25, "grad_norm": 0.969997763633728, "learning_rate": 9.981894788852104e-05, "loss": 0.5477, "step": 9090 }, { "epoch": 2.2524752475247523, "grad_norm": 0.890031099319458, "learning_rate": 9.981777446710061e-05, "loss": 0.5369, "step": 9100 }, { "epoch": 2.254950495049505, "grad_norm": 1.0835801362991333, "learning_rate": 9.981659726234897e-05, "loss": 0.5516, "step": 9110 }, { "epoch": 2.2574257425742577, "grad_norm": 0.9166210293769836, "learning_rate": 9.981541627435553e-05, "loss": 0.5472, "step": 9120 }, { "epoch": 2.25990099009901, "grad_norm": 0.9552027583122253, "learning_rate": 9.981423150320998e-05, "loss": 0.5441, "step": 9130 }, { "epoch": 2.262376237623762, "grad_norm": 0.9953572154045105, "learning_rate": 9.981304294900231e-05, "loss": 0.5423, "step": 9140 }, { "epoch": 2.264851485148515, "grad_norm": 0.9279681444168091, "learning_rate": 9.981185061182274e-05, "loss": 0.5448, "step": 9150 }, { "epoch": 2.2673267326732676, "grad_norm": 0.913109302520752, "learning_rate": 9.981065449176187e-05, "loss": 0.5548, "step": 9160 }, { "epoch": 2.26980198019802, "grad_norm": 0.9512195587158203, "learning_rate": 9.980945458891049e-05, "loss": 0.5415, "step": 9170 }, { "epoch": 2.272277227722772, "grad_norm": 0.981461226940155, "learning_rate": 9.980825090335977e-05, "loss": 0.5464, "step": 9180 }, { "epoch": 2.2747524752475248, "grad_norm": 1.0635350942611694, "learning_rate": 9.980704343520109e-05, "loss": 0.5424, "step": 9190 }, { "epoch": 2.2772277227722775, "grad_norm": 0.9506457448005676, "learning_rate": 9.980583218452615e-05, "loss": 0.5466, "step": 9200 }, { "epoch": 2.2797029702970297, "grad_norm": 1.051260232925415, "learning_rate": 9.980461715142695e-05, "loss": 0.5479, "step": 9210 }, { "epoch": 2.282178217821782, "grad_norm": 0.8968655467033386, "learning_rate": 9.980339833599577e-05, "loss": 0.5464, "step": 9220 }, { "epoch": 2.2846534653465347, "grad_norm": 0.9654403328895569, "learning_rate": 9.980217573832514e-05, "loss": 0.5478, "step": 9230 }, { "epoch": 2.287128712871287, "grad_norm": 0.9200029969215393, "learning_rate": 9.980094935850794e-05, "loss": 0.5406, "step": 9240 }, { "epoch": 2.2896039603960396, "grad_norm": 0.9594417810440063, "learning_rate": 9.97997191966373e-05, "loss": 0.5391, "step": 9250 }, { "epoch": 2.292079207920792, "grad_norm": 0.9543333649635315, "learning_rate": 9.979848525280663e-05, "loss": 0.5397, "step": 9260 }, { "epoch": 2.2945544554455446, "grad_norm": 0.9714906811714172, "learning_rate": 9.979724752710964e-05, "loss": 0.5511, "step": 9270 }, { "epoch": 2.297029702970297, "grad_norm": 0.9833630323410034, "learning_rate": 9.979600601964034e-05, "loss": 0.5467, "step": 9280 }, { "epoch": 2.2995049504950495, "grad_norm": 1.0120737552642822, "learning_rate": 9.9794760730493e-05, "loss": 0.5434, "step": 9290 }, { "epoch": 2.301980198019802, "grad_norm": 0.9588707685470581, "learning_rate": 9.979351165976222e-05, "loss": 0.5419, "step": 9300 }, { "epoch": 2.3044554455445545, "grad_norm": 1.0058857202529907, "learning_rate": 9.979225880754282e-05, "loss": 0.5458, "step": 9310 }, { "epoch": 2.3069306930693068, "grad_norm": 0.9661387801170349, "learning_rate": 9.979100217392998e-05, "loss": 0.55, "step": 9320 }, { "epoch": 2.3094059405940595, "grad_norm": 0.9514153003692627, "learning_rate": 9.978974175901909e-05, "loss": 0.5428, "step": 9330 }, { "epoch": 2.3118811881188117, "grad_norm": 0.9292290806770325, "learning_rate": 9.978847756290593e-05, "loss": 0.5409, "step": 9340 }, { "epoch": 2.3143564356435644, "grad_norm": 0.9630512595176697, "learning_rate": 9.978720958568645e-05, "loss": 0.5374, "step": 9350 }, { "epoch": 2.3168316831683167, "grad_norm": 0.9181420803070068, "learning_rate": 9.978593782745698e-05, "loss": 0.5364, "step": 9360 }, { "epoch": 2.3193069306930694, "grad_norm": 0.9621136784553528, "learning_rate": 9.97846622883141e-05, "loss": 0.5388, "step": 9370 }, { "epoch": 2.3217821782178216, "grad_norm": 1.006995677947998, "learning_rate": 9.978338296835464e-05, "loss": 0.5404, "step": 9380 }, { "epoch": 2.3242574257425743, "grad_norm": 0.9220977425575256, "learning_rate": 9.97820998676758e-05, "loss": 0.5408, "step": 9390 }, { "epoch": 2.3267326732673266, "grad_norm": 0.9755834341049194, "learning_rate": 9.978081298637501e-05, "loss": 0.5421, "step": 9400 }, { "epoch": 2.3292079207920793, "grad_norm": 0.9199584722518921, "learning_rate": 9.977952232455001e-05, "loss": 0.5401, "step": 9410 }, { "epoch": 2.3316831683168315, "grad_norm": 0.8972710371017456, "learning_rate": 9.977822788229878e-05, "loss": 0.5444, "step": 9420 }, { "epoch": 2.3341584158415842, "grad_norm": 0.9920417666435242, "learning_rate": 9.977692965971968e-05, "loss": 0.5431, "step": 9430 }, { "epoch": 2.3366336633663365, "grad_norm": 0.8984516859054565, "learning_rate": 9.977562765691124e-05, "loss": 0.5379, "step": 9440 }, { "epoch": 2.339108910891089, "grad_norm": 0.905728280544281, "learning_rate": 9.977432187397239e-05, "loss": 0.5479, "step": 9450 }, { "epoch": 2.3415841584158414, "grad_norm": 0.9656771421432495, "learning_rate": 9.977301231100227e-05, "loss": 0.5326, "step": 9460 }, { "epoch": 2.344059405940594, "grad_norm": 0.928699254989624, "learning_rate": 9.977169896810035e-05, "loss": 0.5373, "step": 9470 }, { "epoch": 2.3465346534653464, "grad_norm": 0.9317365288734436, "learning_rate": 9.977038184536634e-05, "loss": 0.5471, "step": 9480 }, { "epoch": 2.349009900990099, "grad_norm": 0.9698694348335266, "learning_rate": 9.976906094290029e-05, "loss": 0.542, "step": 9490 }, { "epoch": 2.3514851485148514, "grad_norm": 1.00506591796875, "learning_rate": 9.976773626080251e-05, "loss": 0.5413, "step": 9500 }, { "epoch": 2.353960396039604, "grad_norm": 0.959452748298645, "learning_rate": 9.976640779917361e-05, "loss": 0.5394, "step": 9510 }, { "epoch": 2.3564356435643563, "grad_norm": 1.0374656915664673, "learning_rate": 9.976507555811446e-05, "loss": 0.5406, "step": 9520 }, { "epoch": 2.358910891089109, "grad_norm": 0.974475085735321, "learning_rate": 9.976373953772624e-05, "loss": 0.5362, "step": 9530 }, { "epoch": 2.3613861386138613, "grad_norm": 0.8799049258232117, "learning_rate": 9.976239973811041e-05, "loss": 0.5408, "step": 9540 }, { "epoch": 2.363861386138614, "grad_norm": 0.9358004927635193, "learning_rate": 9.976105615936872e-05, "loss": 0.5337, "step": 9550 }, { "epoch": 2.366336633663366, "grad_norm": 0.8778890371322632, "learning_rate": 9.975970880160323e-05, "loss": 0.5354, "step": 9560 }, { "epoch": 2.368811881188119, "grad_norm": 0.9894009828567505, "learning_rate": 9.975835766491623e-05, "loss": 0.5405, "step": 9570 }, { "epoch": 2.371287128712871, "grad_norm": 0.9464881420135498, "learning_rate": 9.975700274941033e-05, "loss": 0.5387, "step": 9580 }, { "epoch": 2.373762376237624, "grad_norm": 1.0021339654922485, "learning_rate": 9.975564405518846e-05, "loss": 0.5361, "step": 9590 }, { "epoch": 2.376237623762376, "grad_norm": 0.891792356967926, "learning_rate": 9.975428158235375e-05, "loss": 0.5459, "step": 9600 }, { "epoch": 2.378712871287129, "grad_norm": 0.9076243042945862, "learning_rate": 9.975291533100975e-05, "loss": 0.54, "step": 9610 }, { "epoch": 2.381188118811881, "grad_norm": 0.9935998916625977, "learning_rate": 9.975154530126013e-05, "loss": 0.5397, "step": 9620 }, { "epoch": 2.383663366336634, "grad_norm": 1.0145230293273926, "learning_rate": 9.9750171493209e-05, "loss": 0.5426, "step": 9630 }, { "epoch": 2.386138613861386, "grad_norm": 0.9842987656593323, "learning_rate": 9.974879390696066e-05, "loss": 0.5324, "step": 9640 }, { "epoch": 2.3886138613861387, "grad_norm": 0.8666898608207703, "learning_rate": 9.974741254261974e-05, "loss": 0.5381, "step": 9650 }, { "epoch": 2.391089108910891, "grad_norm": 0.9478835463523865, "learning_rate": 9.974602740029115e-05, "loss": 0.5431, "step": 9660 }, { "epoch": 2.3935643564356437, "grad_norm": 0.8953028321266174, "learning_rate": 9.974463848008006e-05, "loss": 0.5454, "step": 9670 }, { "epoch": 2.396039603960396, "grad_norm": 0.9821110963821411, "learning_rate": 9.974324578209195e-05, "loss": 0.548, "step": 9680 }, { "epoch": 2.3985148514851486, "grad_norm": 0.8866865634918213, "learning_rate": 9.974184930643262e-05, "loss": 0.5418, "step": 9690 }, { "epoch": 2.400990099009901, "grad_norm": 0.9684435129165649, "learning_rate": 9.97404490532081e-05, "loss": 0.5375, "step": 9700 }, { "epoch": 2.4034653465346536, "grad_norm": 0.9655838012695312, "learning_rate": 9.973904502252473e-05, "loss": 0.548, "step": 9710 }, { "epoch": 2.405940594059406, "grad_norm": 0.9080298542976379, "learning_rate": 9.973763721448914e-05, "loss": 0.5359, "step": 9720 }, { "epoch": 2.4084158415841586, "grad_norm": 0.893877387046814, "learning_rate": 9.973622562920825e-05, "loss": 0.5454, "step": 9730 }, { "epoch": 2.410891089108911, "grad_norm": 0.9619543552398682, "learning_rate": 9.973481026678926e-05, "loss": 0.5379, "step": 9740 }, { "epoch": 2.4133663366336635, "grad_norm": 0.9774010181427002, "learning_rate": 9.973339112733964e-05, "loss": 0.5413, "step": 9750 }, { "epoch": 2.4158415841584158, "grad_norm": 0.8861041069030762, "learning_rate": 9.973196821096718e-05, "loss": 0.5383, "step": 9760 }, { "epoch": 2.4183168316831685, "grad_norm": 0.876498818397522, "learning_rate": 9.973054151777991e-05, "loss": 0.5402, "step": 9770 }, { "epoch": 2.4207920792079207, "grad_norm": 0.9249369502067566, "learning_rate": 9.972911104788624e-05, "loss": 0.5386, "step": 9780 }, { "epoch": 2.4232673267326734, "grad_norm": 1.020761489868164, "learning_rate": 9.972767680139476e-05, "loss": 0.5344, "step": 9790 }, { "epoch": 2.4257425742574257, "grad_norm": 0.9034565091133118, "learning_rate": 9.97262387784144e-05, "loss": 0.5377, "step": 9800 }, { "epoch": 2.4282178217821784, "grad_norm": 0.9092475771903992, "learning_rate": 9.972479697905438e-05, "loss": 0.5405, "step": 9810 }, { "epoch": 2.4306930693069306, "grad_norm": 0.949803352355957, "learning_rate": 9.972335140342415e-05, "loss": 0.5357, "step": 9820 }, { "epoch": 2.4331683168316833, "grad_norm": 0.9013324975967407, "learning_rate": 9.972190205163356e-05, "loss": 0.538, "step": 9830 }, { "epoch": 2.4356435643564356, "grad_norm": 0.9452275633811951, "learning_rate": 9.972044892379262e-05, "loss": 0.5373, "step": 9840 }, { "epoch": 2.4381188118811883, "grad_norm": 0.9203241467475891, "learning_rate": 9.971899202001173e-05, "loss": 0.5351, "step": 9850 }, { "epoch": 2.4405940594059405, "grad_norm": 0.8690760135650635, "learning_rate": 9.971753134040152e-05, "loss": 0.5403, "step": 9860 }, { "epoch": 2.4430693069306932, "grad_norm": 0.9870879054069519, "learning_rate": 9.971606688507288e-05, "loss": 0.5331, "step": 9870 }, { "epoch": 2.4455445544554455, "grad_norm": 0.9596893787384033, "learning_rate": 9.971459865413709e-05, "loss": 0.5424, "step": 9880 }, { "epoch": 2.448019801980198, "grad_norm": 0.905780017375946, "learning_rate": 9.97131266477056e-05, "loss": 0.5412, "step": 9890 }, { "epoch": 2.4504950495049505, "grad_norm": 0.9292724132537842, "learning_rate": 9.971165086589023e-05, "loss": 0.5403, "step": 9900 }, { "epoch": 2.452970297029703, "grad_norm": 0.8626181483268738, "learning_rate": 9.971017130880303e-05, "loss": 0.5356, "step": 9910 }, { "epoch": 2.4554455445544554, "grad_norm": 0.924237072467804, "learning_rate": 9.97086879765564e-05, "loss": 0.5351, "step": 9920 }, { "epoch": 2.457920792079208, "grad_norm": 0.9117553234100342, "learning_rate": 9.970720086926295e-05, "loss": 0.5382, "step": 9930 }, { "epoch": 2.4603960396039604, "grad_norm": 0.8638733625411987, "learning_rate": 9.970570998703563e-05, "loss": 0.5385, "step": 9940 }, { "epoch": 2.462871287128713, "grad_norm": 0.9435857534408569, "learning_rate": 9.970421532998768e-05, "loss": 0.5391, "step": 9950 }, { "epoch": 2.4653465346534653, "grad_norm": 0.9233555197715759, "learning_rate": 9.970271689823259e-05, "loss": 0.539, "step": 9960 }, { "epoch": 2.467821782178218, "grad_norm": 0.9906812310218811, "learning_rate": 9.970121469188416e-05, "loss": 0.5305, "step": 9970 }, { "epoch": 2.4702970297029703, "grad_norm": 1.0027692317962646, "learning_rate": 9.969970871105647e-05, "loss": 0.537, "step": 9980 }, { "epoch": 2.4727722772277225, "grad_norm": 0.9134097099304199, "learning_rate": 9.96981989558639e-05, "loss": 0.5378, "step": 9990 }, { "epoch": 2.4752475247524752, "grad_norm": 0.9708094000816345, "learning_rate": 9.96966854264211e-05, "loss": 0.5367, "step": 10000 }, { "epoch": 2.477722772277228, "grad_norm": 0.9368119239807129, "learning_rate": 9.969516812284301e-05, "loss": 0.5475, "step": 10010 }, { "epoch": 2.48019801980198, "grad_norm": 0.9493786692619324, "learning_rate": 9.969364704524485e-05, "loss": 0.5372, "step": 10020 }, { "epoch": 2.4826732673267324, "grad_norm": 0.9126348495483398, "learning_rate": 9.969212219374215e-05, "loss": 0.5354, "step": 10030 }, { "epoch": 2.485148514851485, "grad_norm": 0.9522691369056702, "learning_rate": 9.969059356845071e-05, "loss": 0.5405, "step": 10040 }, { "epoch": 2.487623762376238, "grad_norm": 0.9178929924964905, "learning_rate": 9.968906116948663e-05, "loss": 0.5296, "step": 10050 }, { "epoch": 2.49009900990099, "grad_norm": 0.9470218420028687, "learning_rate": 9.968752499696627e-05, "loss": 0.5393, "step": 10060 }, { "epoch": 2.4925742574257423, "grad_norm": 1.0962098836898804, "learning_rate": 9.968598505100628e-05, "loss": 0.5433, "step": 10070 }, { "epoch": 2.495049504950495, "grad_norm": 0.932064950466156, "learning_rate": 9.968444133172364e-05, "loss": 0.5378, "step": 10080 }, { "epoch": 2.4975247524752477, "grad_norm": 0.9618189930915833, "learning_rate": 9.968289383923556e-05, "loss": 0.5375, "step": 10090 }, { "epoch": 2.5, "grad_norm": 0.9310656189918518, "learning_rate": 9.968134257365958e-05, "loss": 0.535, "step": 10100 }, { "epoch": 2.5024752475247523, "grad_norm": 0.8725021481513977, "learning_rate": 9.967978753511349e-05, "loss": 0.5361, "step": 10110 }, { "epoch": 2.504950495049505, "grad_norm": 1.0104823112487793, "learning_rate": 9.96782287237154e-05, "loss": 0.5381, "step": 10120 }, { "epoch": 2.5074257425742577, "grad_norm": 0.9642874598503113, "learning_rate": 9.967666613958369e-05, "loss": 0.5363, "step": 10130 }, { "epoch": 2.50990099009901, "grad_norm": 0.9121452569961548, "learning_rate": 9.967509978283702e-05, "loss": 0.5288, "step": 10140 }, { "epoch": 2.512376237623762, "grad_norm": 0.9191849827766418, "learning_rate": 9.967352965359436e-05, "loss": 0.5322, "step": 10150 }, { "epoch": 2.514851485148515, "grad_norm": 0.8391523957252502, "learning_rate": 9.967195575197492e-05, "loss": 0.5381, "step": 10160 }, { "epoch": 2.5173267326732676, "grad_norm": 0.8873317241668701, "learning_rate": 9.967037807809825e-05, "loss": 0.5367, "step": 10170 }, { "epoch": 2.51980198019802, "grad_norm": 0.8751257658004761, "learning_rate": 9.966879663208417e-05, "loss": 0.5365, "step": 10180 }, { "epoch": 2.522277227722772, "grad_norm": 0.9776911735534668, "learning_rate": 9.966721141405277e-05, "loss": 0.5376, "step": 10190 }, { "epoch": 2.5247524752475248, "grad_norm": 0.9594063758850098, "learning_rate": 9.966562242412442e-05, "loss": 0.5292, "step": 10200 }, { "epoch": 2.5272277227722775, "grad_norm": 0.9469699263572693, "learning_rate": 9.966402966241985e-05, "loss": 0.5338, "step": 10210 }, { "epoch": 2.5297029702970297, "grad_norm": 0.8863347172737122, "learning_rate": 9.966243312905996e-05, "loss": 0.5366, "step": 10220 }, { "epoch": 2.532178217821782, "grad_norm": 0.9193538427352905, "learning_rate": 9.966083282416601e-05, "loss": 0.536, "step": 10230 }, { "epoch": 2.5346534653465347, "grad_norm": 0.9349112510681152, "learning_rate": 9.965922874785955e-05, "loss": 0.5338, "step": 10240 }, { "epoch": 2.5371287128712874, "grad_norm": 0.9284337162971497, "learning_rate": 9.965762090026238e-05, "loss": 0.5293, "step": 10250 }, { "epoch": 2.5396039603960396, "grad_norm": 0.9591560363769531, "learning_rate": 9.965600928149661e-05, "loss": 0.5358, "step": 10260 }, { "epoch": 2.542079207920792, "grad_norm": 0.8925835490226746, "learning_rate": 9.965439389168466e-05, "loss": 0.5358, "step": 10270 }, { "epoch": 2.5445544554455446, "grad_norm": 0.9618791937828064, "learning_rate": 9.965277473094917e-05, "loss": 0.5356, "step": 10280 }, { "epoch": 2.5470297029702973, "grad_norm": 0.9101438522338867, "learning_rate": 9.965115179941311e-05, "loss": 0.534, "step": 10290 }, { "epoch": 2.5495049504950495, "grad_norm": 0.9728567004203796, "learning_rate": 9.964952509719977e-05, "loss": 0.5411, "step": 10300 }, { "epoch": 2.551980198019802, "grad_norm": 0.915654718875885, "learning_rate": 9.964789462443263e-05, "loss": 0.5331, "step": 10310 }, { "epoch": 2.5544554455445545, "grad_norm": 0.9174913763999939, "learning_rate": 9.964626038123554e-05, "loss": 0.5367, "step": 10320 }, { "epoch": 2.556930693069307, "grad_norm": 0.9912504553794861, "learning_rate": 9.964462236773262e-05, "loss": 0.5321, "step": 10330 }, { "epoch": 2.5594059405940595, "grad_norm": 0.9671111106872559, "learning_rate": 9.964298058404825e-05, "loss": 0.5351, "step": 10340 }, { "epoch": 2.5618811881188117, "grad_norm": 1.0082213878631592, "learning_rate": 9.964133503030713e-05, "loss": 0.5321, "step": 10350 }, { "epoch": 2.5643564356435644, "grad_norm": 0.9034382700920105, "learning_rate": 9.96396857066342e-05, "loss": 0.5287, "step": 10360 }, { "epoch": 2.5668316831683167, "grad_norm": 0.8582724928855896, "learning_rate": 9.963803261315475e-05, "loss": 0.5313, "step": 10370 }, { "epoch": 2.5693069306930694, "grad_norm": 0.8802998661994934, "learning_rate": 9.963637574999432e-05, "loss": 0.5313, "step": 10380 }, { "epoch": 2.5717821782178216, "grad_norm": 0.9802852272987366, "learning_rate": 9.963471511727868e-05, "loss": 0.531, "step": 10390 }, { "epoch": 2.5742574257425743, "grad_norm": 0.9879611134529114, "learning_rate": 9.963305071513402e-05, "loss": 0.5356, "step": 10400 }, { "epoch": 2.5767326732673266, "grad_norm": 0.8952611684799194, "learning_rate": 9.963138254368672e-05, "loss": 0.5312, "step": 10410 }, { "epoch": 2.5792079207920793, "grad_norm": 0.8467462658882141, "learning_rate": 9.962971060306343e-05, "loss": 0.5304, "step": 10420 }, { "epoch": 2.5816831683168315, "grad_norm": 0.9625754952430725, "learning_rate": 9.962803489339116e-05, "loss": 0.527, "step": 10430 }, { "epoch": 2.5841584158415842, "grad_norm": 0.9855440855026245, "learning_rate": 9.962635541479715e-05, "loss": 0.5333, "step": 10440 }, { "epoch": 2.5866336633663365, "grad_norm": 0.8836238384246826, "learning_rate": 9.962467216740894e-05, "loss": 0.5355, "step": 10450 }, { "epoch": 2.589108910891089, "grad_norm": 0.9500771760940552, "learning_rate": 9.962298515135439e-05, "loss": 0.5307, "step": 10460 }, { "epoch": 2.5915841584158414, "grad_norm": 0.9197276830673218, "learning_rate": 9.96212943667616e-05, "loss": 0.5338, "step": 10470 }, { "epoch": 2.594059405940594, "grad_norm": 0.8860569596290588, "learning_rate": 9.961959981375897e-05, "loss": 0.5318, "step": 10480 }, { "epoch": 2.5965346534653464, "grad_norm": 0.9508169293403625, "learning_rate": 9.961790149247518e-05, "loss": 0.5349, "step": 10490 }, { "epoch": 2.599009900990099, "grad_norm": 0.8910251259803772, "learning_rate": 9.961619940303925e-05, "loss": 0.5323, "step": 10500 }, { "epoch": 2.6014851485148514, "grad_norm": 0.942378044128418, "learning_rate": 9.961449354558041e-05, "loss": 0.5371, "step": 10510 }, { "epoch": 2.603960396039604, "grad_norm": 0.8445572257041931, "learning_rate": 9.961278392022819e-05, "loss": 0.538, "step": 10520 }, { "epoch": 2.6064356435643563, "grad_norm": 0.8689337372779846, "learning_rate": 9.961107052711246e-05, "loss": 0.5325, "step": 10530 }, { "epoch": 2.608910891089109, "grad_norm": 0.8639494776725769, "learning_rate": 9.960935336636333e-05, "loss": 0.5324, "step": 10540 }, { "epoch": 2.6113861386138613, "grad_norm": 0.9267062544822693, "learning_rate": 9.96076324381112e-05, "loss": 0.5355, "step": 10550 }, { "epoch": 2.613861386138614, "grad_norm": 0.9827973246574402, "learning_rate": 9.960590774248677e-05, "loss": 0.538, "step": 10560 }, { "epoch": 2.616336633663366, "grad_norm": 0.9674758911132812, "learning_rate": 9.960417927962102e-05, "loss": 0.538, "step": 10570 }, { "epoch": 2.618811881188119, "grad_norm": 0.9247533679008484, "learning_rate": 9.960244704964521e-05, "loss": 0.536, "step": 10580 }, { "epoch": 2.621287128712871, "grad_norm": 0.9249588847160339, "learning_rate": 9.960071105269091e-05, "loss": 0.5398, "step": 10590 }, { "epoch": 2.623762376237624, "grad_norm": 0.9636698365211487, "learning_rate": 9.959897128888992e-05, "loss": 0.5329, "step": 10600 }, { "epoch": 2.626237623762376, "grad_norm": 0.8841663599014282, "learning_rate": 9.959722775837439e-05, "loss": 0.5309, "step": 10610 }, { "epoch": 2.628712871287129, "grad_norm": 0.9206003546714783, "learning_rate": 9.959548046127673e-05, "loss": 0.54, "step": 10620 }, { "epoch": 2.631188118811881, "grad_norm": 0.913494348526001, "learning_rate": 9.959372939772963e-05, "loss": 0.5384, "step": 10630 }, { "epoch": 2.633663366336634, "grad_norm": 0.9689640998840332, "learning_rate": 9.959197456786608e-05, "loss": 0.5393, "step": 10640 }, { "epoch": 2.636138613861386, "grad_norm": 1.013797640800476, "learning_rate": 9.959021597181933e-05, "loss": 0.5354, "step": 10650 }, { "epoch": 2.6386138613861387, "grad_norm": 0.8727473616600037, "learning_rate": 9.958845360972293e-05, "loss": 0.534, "step": 10660 }, { "epoch": 2.641089108910891, "grad_norm": 0.8777872920036316, "learning_rate": 9.958668748171077e-05, "loss": 0.5349, "step": 10670 }, { "epoch": 2.6435643564356437, "grad_norm": 0.8457823395729065, "learning_rate": 9.95849175879169e-05, "loss": 0.5345, "step": 10680 }, { "epoch": 2.646039603960396, "grad_norm": 0.8662412762641907, "learning_rate": 9.95831439284758e-05, "loss": 0.5413, "step": 10690 }, { "epoch": 2.6485148514851486, "grad_norm": 0.8448507189750671, "learning_rate": 9.958136650352212e-05, "loss": 0.5267, "step": 10700 }, { "epoch": 2.650990099009901, "grad_norm": 0.8594169616699219, "learning_rate": 9.957958531319088e-05, "loss": 0.5378, "step": 10710 }, { "epoch": 2.6534653465346536, "grad_norm": 0.8902627229690552, "learning_rate": 9.957780035761732e-05, "loss": 0.5295, "step": 10720 }, { "epoch": 2.655940594059406, "grad_norm": 0.8842018246650696, "learning_rate": 9.957601163693702e-05, "loss": 0.526, "step": 10730 }, { "epoch": 2.6584158415841586, "grad_norm": 0.824047863483429, "learning_rate": 9.95742191512858e-05, "loss": 0.5324, "step": 10740 }, { "epoch": 2.660891089108911, "grad_norm": 0.8598667979240417, "learning_rate": 9.957242290079979e-05, "loss": 0.5327, "step": 10750 }, { "epoch": 2.6633663366336635, "grad_norm": 0.8513838648796082, "learning_rate": 9.957062288561541e-05, "loss": 0.5311, "step": 10760 }, { "epoch": 2.6658415841584158, "grad_norm": 0.9129284620285034, "learning_rate": 9.956881910586937e-05, "loss": 0.535, "step": 10770 }, { "epoch": 2.6683168316831685, "grad_norm": 0.965218722820282, "learning_rate": 9.956701156169865e-05, "loss": 0.5305, "step": 10780 }, { "epoch": 2.6707920792079207, "grad_norm": 0.8769111633300781, "learning_rate": 9.95652002532405e-05, "loss": 0.5279, "step": 10790 }, { "epoch": 2.6732673267326734, "grad_norm": 0.8931010961532593, "learning_rate": 9.95633851806325e-05, "loss": 0.5342, "step": 10800 }, { "epoch": 2.6757425742574257, "grad_norm": 0.8793346881866455, "learning_rate": 9.956156634401249e-05, "loss": 0.5321, "step": 10810 }, { "epoch": 2.6782178217821784, "grad_norm": 0.9564671516418457, "learning_rate": 9.95597437435186e-05, "loss": 0.5268, "step": 10820 }, { "epoch": 2.6806930693069306, "grad_norm": 0.9348644018173218, "learning_rate": 9.955791737928921e-05, "loss": 0.5311, "step": 10830 }, { "epoch": 2.6831683168316833, "grad_norm": 0.9611016511917114, "learning_rate": 9.955608725146309e-05, "loss": 0.5309, "step": 10840 }, { "epoch": 2.6856435643564356, "grad_norm": 0.9431951642036438, "learning_rate": 9.955425336017914e-05, "loss": 0.5309, "step": 10850 }, { "epoch": 2.6881188118811883, "grad_norm": 0.881892204284668, "learning_rate": 9.95524157055767e-05, "loss": 0.5289, "step": 10860 }, { "epoch": 2.6905940594059405, "grad_norm": 0.939906895160675, "learning_rate": 9.95505742877953e-05, "loss": 0.5321, "step": 10870 }, { "epoch": 2.693069306930693, "grad_norm": 0.9175068736076355, "learning_rate": 9.954872910697479e-05, "loss": 0.5267, "step": 10880 }, { "epoch": 2.6955445544554455, "grad_norm": 0.8523741960525513, "learning_rate": 9.95468801632553e-05, "loss": 0.5334, "step": 10890 }, { "epoch": 2.698019801980198, "grad_norm": 0.8366478085517883, "learning_rate": 9.954502745677725e-05, "loss": 0.5345, "step": 10900 }, { "epoch": 2.7004950495049505, "grad_norm": 0.9407424330711365, "learning_rate": 9.954317098768134e-05, "loss": 0.5312, "step": 10910 }, { "epoch": 2.7029702970297027, "grad_norm": 0.8585940599441528, "learning_rate": 9.954131075610852e-05, "loss": 0.5323, "step": 10920 }, { "epoch": 2.7054455445544554, "grad_norm": 0.9079265594482422, "learning_rate": 9.953944676220011e-05, "loss": 0.5334, "step": 10930 }, { "epoch": 2.707920792079208, "grad_norm": 0.8118189573287964, "learning_rate": 9.953757900609764e-05, "loss": 0.5279, "step": 10940 }, { "epoch": 2.7103960396039604, "grad_norm": 0.9268868565559387, "learning_rate": 9.953570748794297e-05, "loss": 0.525, "step": 10950 }, { "epoch": 2.7128712871287126, "grad_norm": 0.8967493176460266, "learning_rate": 9.953383220787824e-05, "loss": 0.5247, "step": 10960 }, { "epoch": 2.7153465346534653, "grad_norm": 0.9513668417930603, "learning_rate": 9.953195316604583e-05, "loss": 0.5315, "step": 10970 }, { "epoch": 2.717821782178218, "grad_norm": 0.879172682762146, "learning_rate": 9.953007036258845e-05, "loss": 0.5299, "step": 10980 }, { "epoch": 2.7202970297029703, "grad_norm": 0.8679616451263428, "learning_rate": 9.952818379764912e-05, "loss": 0.5317, "step": 10990 }, { "epoch": 2.7227722772277225, "grad_norm": 0.8383806943893433, "learning_rate": 9.952629347137107e-05, "loss": 0.5326, "step": 11000 }, { "epoch": 2.7252475247524752, "grad_norm": 0.8318424224853516, "learning_rate": 9.952439938389789e-05, "loss": 0.5288, "step": 11010 }, { "epoch": 2.727722772277228, "grad_norm": 1.0297945737838745, "learning_rate": 9.95225015353734e-05, "loss": 0.5306, "step": 11020 }, { "epoch": 2.73019801980198, "grad_norm": 0.8925171494483948, "learning_rate": 9.952059992594174e-05, "loss": 0.5284, "step": 11030 }, { "epoch": 2.7326732673267324, "grad_norm": 0.8980510234832764, "learning_rate": 9.951869455574733e-05, "loss": 0.5345, "step": 11040 }, { "epoch": 2.735148514851485, "grad_norm": 0.8598983287811279, "learning_rate": 9.951678542493485e-05, "loss": 0.5329, "step": 11050 }, { "epoch": 2.737623762376238, "grad_norm": 0.8611969947814941, "learning_rate": 9.95148725336493e-05, "loss": 0.5329, "step": 11060 }, { "epoch": 2.74009900990099, "grad_norm": 0.8842231035232544, "learning_rate": 9.951295588203598e-05, "loss": 0.5288, "step": 11070 }, { "epoch": 2.7425742574257423, "grad_norm": 0.9220582246780396, "learning_rate": 9.951103547024038e-05, "loss": 0.5243, "step": 11080 }, { "epoch": 2.745049504950495, "grad_norm": 0.9102240204811096, "learning_rate": 9.950911129840841e-05, "loss": 0.5328, "step": 11090 }, { "epoch": 2.7475247524752477, "grad_norm": 0.8799940347671509, "learning_rate": 9.950718336668614e-05, "loss": 0.5292, "step": 11100 }, { "epoch": 2.75, "grad_norm": 0.8498042225837708, "learning_rate": 9.950525167522003e-05, "loss": 0.5324, "step": 11110 }, { "epoch": 2.7524752475247523, "grad_norm": 0.8206253051757812, "learning_rate": 9.950331622415676e-05, "loss": 0.5299, "step": 11120 }, { "epoch": 2.754950495049505, "grad_norm": 0.8628694415092468, "learning_rate": 9.950137701364332e-05, "loss": 0.5294, "step": 11130 }, { "epoch": 2.7574257425742577, "grad_norm": 0.9563665986061096, "learning_rate": 9.949943404382698e-05, "loss": 0.5289, "step": 11140 }, { "epoch": 2.75990099009901, "grad_norm": 0.9524432420730591, "learning_rate": 9.949748731485527e-05, "loss": 0.5326, "step": 11150 }, { "epoch": 2.762376237623762, "grad_norm": 0.8502256870269775, "learning_rate": 9.949553682687607e-05, "loss": 0.5278, "step": 11160 }, { "epoch": 2.764851485148515, "grad_norm": 0.9585160613059998, "learning_rate": 9.949358258003749e-05, "loss": 0.5283, "step": 11170 }, { "epoch": 2.7673267326732676, "grad_norm": 0.9266567826271057, "learning_rate": 9.949162457448794e-05, "loss": 0.5303, "step": 11180 }, { "epoch": 2.76980198019802, "grad_norm": 1.0007803440093994, "learning_rate": 9.948966281037612e-05, "loss": 0.5368, "step": 11190 }, { "epoch": 2.772277227722772, "grad_norm": 0.8151941299438477, "learning_rate": 9.948769728785101e-05, "loss": 0.5236, "step": 11200 }, { "epoch": 2.7747524752475248, "grad_norm": 0.8498386740684509, "learning_rate": 9.948572800706189e-05, "loss": 0.5228, "step": 11210 }, { "epoch": 2.7772277227722775, "grad_norm": 0.8854384422302246, "learning_rate": 9.94837549681583e-05, "loss": 0.534, "step": 11220 }, { "epoch": 2.7797029702970297, "grad_norm": 0.8763745427131653, "learning_rate": 9.948177817129008e-05, "loss": 0.528, "step": 11230 }, { "epoch": 2.782178217821782, "grad_norm": 0.8726077079772949, "learning_rate": 9.947979761660735e-05, "loss": 0.5245, "step": 11240 }, { "epoch": 2.7846534653465347, "grad_norm": 0.8776301145553589, "learning_rate": 9.947781330426055e-05, "loss": 0.5284, "step": 11250 }, { "epoch": 2.7871287128712874, "grad_norm": 0.951884925365448, "learning_rate": 9.947582523440034e-05, "loss": 0.5301, "step": 11260 }, { "epoch": 2.7896039603960396, "grad_norm": 0.9467377066612244, "learning_rate": 9.947383340717773e-05, "loss": 0.5328, "step": 11270 }, { "epoch": 2.792079207920792, "grad_norm": 0.9575574994087219, "learning_rate": 9.947183782274396e-05, "loss": 0.5322, "step": 11280 }, { "epoch": 2.7945544554455446, "grad_norm": 0.8838969469070435, "learning_rate": 9.94698384812506e-05, "loss": 0.536, "step": 11290 }, { "epoch": 2.7970297029702973, "grad_norm": 0.849685549736023, "learning_rate": 9.946783538284947e-05, "loss": 0.5305, "step": 11300 }, { "epoch": 2.7995049504950495, "grad_norm": 0.8460737466812134, "learning_rate": 9.946582852769271e-05, "loss": 0.5349, "step": 11310 }, { "epoch": 2.801980198019802, "grad_norm": 1.0142765045166016, "learning_rate": 9.946381791593272e-05, "loss": 0.534, "step": 11320 }, { "epoch": 2.8044554455445545, "grad_norm": 0.9780712723731995, "learning_rate": 9.946180354772219e-05, "loss": 0.5342, "step": 11330 }, { "epoch": 2.806930693069307, "grad_norm": 0.8762306571006775, "learning_rate": 9.945978542321411e-05, "loss": 0.5318, "step": 11340 }, { "epoch": 2.8094059405940595, "grad_norm": 0.9307741522789001, "learning_rate": 9.945776354256172e-05, "loss": 0.5278, "step": 11350 }, { "epoch": 2.8118811881188117, "grad_norm": 0.8217676877975464, "learning_rate": 9.945573790591858e-05, "loss": 0.5322, "step": 11360 }, { "epoch": 2.8143564356435644, "grad_norm": 0.8613919615745544, "learning_rate": 9.945370851343854e-05, "loss": 0.5299, "step": 11370 }, { "epoch": 2.8168316831683167, "grad_norm": 0.8474620580673218, "learning_rate": 9.945167536527569e-05, "loss": 0.5274, "step": 11380 }, { "epoch": 2.8193069306930694, "grad_norm": 0.866621732711792, "learning_rate": 9.944963846158445e-05, "loss": 0.5277, "step": 11390 }, { "epoch": 2.8217821782178216, "grad_norm": 0.8401481509208679, "learning_rate": 9.944759780251951e-05, "loss": 0.5264, "step": 11400 }, { "epoch": 2.8242574257425743, "grad_norm": 0.8372300267219543, "learning_rate": 9.944555338823585e-05, "loss": 0.5224, "step": 11410 }, { "epoch": 2.8267326732673266, "grad_norm": 0.9075191020965576, "learning_rate": 9.944350521888871e-05, "loss": 0.5265, "step": 11420 }, { "epoch": 2.8292079207920793, "grad_norm": 0.8133928179740906, "learning_rate": 9.944145329463365e-05, "loss": 0.5254, "step": 11430 }, { "epoch": 2.8316831683168315, "grad_norm": 1.014939546585083, "learning_rate": 9.943939761562651e-05, "loss": 0.528, "step": 11440 }, { "epoch": 2.8341584158415842, "grad_norm": 0.9130150675773621, "learning_rate": 9.943733818202339e-05, "loss": 0.5265, "step": 11450 }, { "epoch": 2.8366336633663365, "grad_norm": 0.9034225940704346, "learning_rate": 9.943527499398068e-05, "loss": 0.5238, "step": 11460 }, { "epoch": 2.839108910891089, "grad_norm": 0.8585871458053589, "learning_rate": 9.943320805165508e-05, "loss": 0.5273, "step": 11470 }, { "epoch": 2.8415841584158414, "grad_norm": 0.8688744306564331, "learning_rate": 9.943113735520358e-05, "loss": 0.529, "step": 11480 }, { "epoch": 2.844059405940594, "grad_norm": 0.8064686059951782, "learning_rate": 9.94290629047834e-05, "loss": 0.5254, "step": 11490 }, { "epoch": 2.8465346534653464, "grad_norm": 0.7945666313171387, "learning_rate": 9.942698470055209e-05, "loss": 0.526, "step": 11500 }, { "epoch": 2.849009900990099, "grad_norm": 0.9819457530975342, "learning_rate": 9.942490274266749e-05, "loss": 0.5279, "step": 11510 }, { "epoch": 2.8514851485148514, "grad_norm": 0.8913330435752869, "learning_rate": 9.94228170312877e-05, "loss": 0.5313, "step": 11520 }, { "epoch": 2.853960396039604, "grad_norm": 0.8449063301086426, "learning_rate": 9.942072756657112e-05, "loss": 0.5244, "step": 11530 }, { "epoch": 2.8564356435643563, "grad_norm": 0.8575042486190796, "learning_rate": 9.941863434867643e-05, "loss": 0.5306, "step": 11540 }, { "epoch": 2.858910891089109, "grad_norm": 0.7748112678527832, "learning_rate": 9.94165373777626e-05, "loss": 0.5258, "step": 11550 }, { "epoch": 2.8613861386138613, "grad_norm": 0.8592458963394165, "learning_rate": 9.941443665398887e-05, "loss": 0.5287, "step": 11560 }, { "epoch": 2.863861386138614, "grad_norm": 0.8432192206382751, "learning_rate": 9.941233217751477e-05, "loss": 0.5286, "step": 11570 }, { "epoch": 2.866336633663366, "grad_norm": 0.9166144728660583, "learning_rate": 9.941022394850016e-05, "loss": 0.5226, "step": 11580 }, { "epoch": 2.868811881188119, "grad_norm": 0.9796750545501709, "learning_rate": 9.940811196710511e-05, "loss": 0.521, "step": 11590 }, { "epoch": 2.871287128712871, "grad_norm": 0.8700019717216492, "learning_rate": 9.940599623349002e-05, "loss": 0.5229, "step": 11600 }, { "epoch": 2.873762376237624, "grad_norm": 0.8966332077980042, "learning_rate": 9.940387674781557e-05, "loss": 0.5278, "step": 11610 }, { "epoch": 2.876237623762376, "grad_norm": 0.8756115436553955, "learning_rate": 9.94017535102427e-05, "loss": 0.5243, "step": 11620 }, { "epoch": 2.878712871287129, "grad_norm": 0.8847984075546265, "learning_rate": 9.939962652093271e-05, "loss": 0.5357, "step": 11630 }, { "epoch": 2.881188118811881, "grad_norm": 0.9114370942115784, "learning_rate": 9.939749578004707e-05, "loss": 0.5347, "step": 11640 }, { "epoch": 2.883663366336634, "grad_norm": 0.8946544528007507, "learning_rate": 9.939536128774762e-05, "loss": 0.5254, "step": 11650 }, { "epoch": 2.886138613861386, "grad_norm": 0.8137221336364746, "learning_rate": 9.939322304419648e-05, "loss": 0.5302, "step": 11660 }, { "epoch": 2.8886138613861387, "grad_norm": 1.0051974058151245, "learning_rate": 9.9391081049556e-05, "loss": 0.5276, "step": 11670 }, { "epoch": 2.891089108910891, "grad_norm": 0.8282648324966431, "learning_rate": 9.938893530398886e-05, "loss": 0.5311, "step": 11680 }, { "epoch": 2.8935643564356437, "grad_norm": 0.8491721153259277, "learning_rate": 9.938678580765804e-05, "loss": 0.5327, "step": 11690 }, { "epoch": 2.896039603960396, "grad_norm": 0.8561279773712158, "learning_rate": 9.938463256072675e-05, "loss": 0.5287, "step": 11700 }, { "epoch": 2.8985148514851486, "grad_norm": 0.9291508793830872, "learning_rate": 9.938247556335854e-05, "loss": 0.5286, "step": 11710 }, { "epoch": 2.900990099009901, "grad_norm": 0.8152692317962646, "learning_rate": 9.93803148157172e-05, "loss": 0.5262, "step": 11720 }, { "epoch": 2.9034653465346536, "grad_norm": 0.9215351939201355, "learning_rate": 9.937815031796682e-05, "loss": 0.5319, "step": 11730 }, { "epoch": 2.905940594059406, "grad_norm": 0.8355497121810913, "learning_rate": 9.937598207027181e-05, "loss": 0.5243, "step": 11740 }, { "epoch": 2.9084158415841586, "grad_norm": 0.9934191107749939, "learning_rate": 9.93738100727968e-05, "loss": 0.5217, "step": 11750 }, { "epoch": 2.910891089108911, "grad_norm": 0.82426917552948, "learning_rate": 9.937163432570677e-05, "loss": 0.5283, "step": 11760 }, { "epoch": 2.9133663366336635, "grad_norm": 0.8521701097488403, "learning_rate": 9.936945482916691e-05, "loss": 0.5288, "step": 11770 }, { "epoch": 2.9158415841584158, "grad_norm": 0.8360291719436646, "learning_rate": 9.93672715833428e-05, "loss": 0.5327, "step": 11780 }, { "epoch": 2.9183168316831685, "grad_norm": 0.8860406279563904, "learning_rate": 9.936508458840019e-05, "loss": 0.5204, "step": 11790 }, { "epoch": 2.9207920792079207, "grad_norm": 0.8273493051528931, "learning_rate": 9.93628938445052e-05, "loss": 0.5337, "step": 11800 }, { "epoch": 2.9232673267326734, "grad_norm": 0.8853658437728882, "learning_rate": 9.936069935182417e-05, "loss": 0.5306, "step": 11810 }, { "epoch": 2.9257425742574257, "grad_norm": 0.8037503361701965, "learning_rate": 9.935850111052378e-05, "loss": 0.5251, "step": 11820 }, { "epoch": 2.9282178217821784, "grad_norm": 0.9616329073905945, "learning_rate": 9.935629912077097e-05, "loss": 0.5278, "step": 11830 }, { "epoch": 2.9306930693069306, "grad_norm": 0.8569377064704895, "learning_rate": 9.935409338273297e-05, "loss": 0.5202, "step": 11840 }, { "epoch": 2.9331683168316833, "grad_norm": 0.848463237285614, "learning_rate": 9.935188389657729e-05, "loss": 0.5299, "step": 11850 }, { "epoch": 2.9356435643564356, "grad_norm": 0.8241478204727173, "learning_rate": 9.934967066247171e-05, "loss": 0.5252, "step": 11860 }, { "epoch": 2.9381188118811883, "grad_norm": 0.9014785885810852, "learning_rate": 9.934745368058433e-05, "loss": 0.5263, "step": 11870 }, { "epoch": 2.9405940594059405, "grad_norm": 0.8902969360351562, "learning_rate": 9.934523295108352e-05, "loss": 0.5261, "step": 11880 }, { "epoch": 2.943069306930693, "grad_norm": 0.8118503093719482, "learning_rate": 9.934300847413788e-05, "loss": 0.5255, "step": 11890 }, { "epoch": 2.9455445544554455, "grad_norm": 0.8347038626670837, "learning_rate": 9.934078024991642e-05, "loss": 0.5185, "step": 11900 }, { "epoch": 2.948019801980198, "grad_norm": 0.9010823369026184, "learning_rate": 9.93385482785883e-05, "loss": 0.5236, "step": 11910 }, { "epoch": 2.9504950495049505, "grad_norm": 0.9010630249977112, "learning_rate": 9.933631256032305e-05, "loss": 0.5318, "step": 11920 }, { "epoch": 2.9529702970297027, "grad_norm": 0.8887155652046204, "learning_rate": 9.933407309529046e-05, "loss": 0.5296, "step": 11930 }, { "epoch": 2.9554455445544554, "grad_norm": 0.8166369795799255, "learning_rate": 9.933182988366059e-05, "loss": 0.5218, "step": 11940 }, { "epoch": 2.957920792079208, "grad_norm": 0.8840643167495728, "learning_rate": 9.932958292560381e-05, "loss": 0.5215, "step": 11950 }, { "epoch": 2.9603960396039604, "grad_norm": 0.8491992354393005, "learning_rate": 9.932733222129074e-05, "loss": 0.5236, "step": 11960 }, { "epoch": 2.9628712871287126, "grad_norm": 0.829835832118988, "learning_rate": 9.932507777089232e-05, "loss": 0.5206, "step": 11970 }, { "epoch": 2.9653465346534653, "grad_norm": 0.7575082778930664, "learning_rate": 9.932281957457976e-05, "loss": 0.5238, "step": 11980 }, { "epoch": 2.967821782178218, "grad_norm": 0.8495590686798096, "learning_rate": 9.932055763252457e-05, "loss": 0.5256, "step": 11990 }, { "epoch": 2.9702970297029703, "grad_norm": 0.8906236886978149, "learning_rate": 9.93182919448985e-05, "loss": 0.5193, "step": 12000 }, { "epoch": 2.9727722772277225, "grad_norm": 0.8496561050415039, "learning_rate": 9.931602251187364e-05, "loss": 0.5274, "step": 12010 }, { "epoch": 2.9752475247524752, "grad_norm": 0.7984356880187988, "learning_rate": 9.931374933362232e-05, "loss": 0.5261, "step": 12020 }, { "epoch": 2.977722772277228, "grad_norm": 0.8242306709289551, "learning_rate": 9.931147241031719e-05, "loss": 0.5291, "step": 12030 }, { "epoch": 2.98019801980198, "grad_norm": 0.8241235017776489, "learning_rate": 9.930919174213115e-05, "loss": 0.5227, "step": 12040 }, { "epoch": 2.9826732673267324, "grad_norm": 1.0066996812820435, "learning_rate": 9.93069073292374e-05, "loss": 0.5184, "step": 12050 }, { "epoch": 2.985148514851485, "grad_norm": 0.8556184768676758, "learning_rate": 9.930461917180946e-05, "loss": 0.5206, "step": 12060 }, { "epoch": 2.987623762376238, "grad_norm": 0.870368480682373, "learning_rate": 9.930232727002105e-05, "loss": 0.5241, "step": 12070 }, { "epoch": 2.99009900990099, "grad_norm": 0.8688924312591553, "learning_rate": 9.930003162404626e-05, "loss": 0.5232, "step": 12080 }, { "epoch": 2.9925742574257423, "grad_norm": 0.911646842956543, "learning_rate": 9.929773223405943e-05, "loss": 0.5236, "step": 12090 }, { "epoch": 2.995049504950495, "grad_norm": 0.810248851776123, "learning_rate": 9.929542910023517e-05, "loss": 0.5201, "step": 12100 }, { "epoch": 2.9975247524752477, "grad_norm": 0.9954603910446167, "learning_rate": 9.929312222274839e-05, "loss": 0.5222, "step": 12110 }, { "epoch": 3.0, "grad_norm": 0.8446747064590454, "learning_rate": 9.929081160177428e-05, "loss": 0.5256, "step": 12120 }, { "epoch": 3.0024752475247523, "grad_norm": 0.8462623953819275, "learning_rate": 9.928849723748831e-05, "loss": 0.5232, "step": 12130 }, { "epoch": 3.004950495049505, "grad_norm": 0.8808247447013855, "learning_rate": 9.928617913006629e-05, "loss": 0.5268, "step": 12140 }, { "epoch": 3.007425742574257, "grad_norm": 0.8194703459739685, "learning_rate": 9.928385727968419e-05, "loss": 0.5236, "step": 12150 }, { "epoch": 3.00990099009901, "grad_norm": 0.8575142025947571, "learning_rate": 9.928153168651837e-05, "loss": 0.5304, "step": 12160 }, { "epoch": 3.012376237623762, "grad_norm": 0.8639472126960754, "learning_rate": 9.927920235074548e-05, "loss": 0.5241, "step": 12170 }, { "epoch": 3.014851485148515, "grad_norm": 0.9347007870674133, "learning_rate": 9.927686927254236e-05, "loss": 0.5234, "step": 12180 }, { "epoch": 3.017326732673267, "grad_norm": 0.9111061096191406, "learning_rate": 9.927453245208622e-05, "loss": 0.5207, "step": 12190 }, { "epoch": 3.01980198019802, "grad_norm": 0.9200930595397949, "learning_rate": 9.927219188955452e-05, "loss": 0.518, "step": 12200 }, { "epoch": 3.022277227722772, "grad_norm": 0.8534479737281799, "learning_rate": 9.926984758512503e-05, "loss": 0.5217, "step": 12210 }, { "epoch": 3.0247524752475248, "grad_norm": 1.0129228830337524, "learning_rate": 9.926749953897576e-05, "loss": 0.5257, "step": 12220 }, { "epoch": 3.027227722772277, "grad_norm": 0.9175012707710266, "learning_rate": 9.926514775128504e-05, "loss": 0.5201, "step": 12230 }, { "epoch": 3.0297029702970297, "grad_norm": 0.873621940612793, "learning_rate": 9.926279222223147e-05, "loss": 0.5213, "step": 12240 }, { "epoch": 3.032178217821782, "grad_norm": 0.8391276597976685, "learning_rate": 9.926043295199393e-05, "loss": 0.5246, "step": 12250 }, { "epoch": 3.0346534653465347, "grad_norm": 0.7948262691497803, "learning_rate": 9.925806994075161e-05, "loss": 0.5271, "step": 12260 }, { "epoch": 3.037128712871287, "grad_norm": 0.8454986810684204, "learning_rate": 9.925570318868393e-05, "loss": 0.5218, "step": 12270 }, { "epoch": 3.0396039603960396, "grad_norm": 0.8568494319915771, "learning_rate": 9.925333269597067e-05, "loss": 0.5232, "step": 12280 }, { "epoch": 3.042079207920792, "grad_norm": 0.8095263838768005, "learning_rate": 9.925095846279184e-05, "loss": 0.5205, "step": 12290 }, { "epoch": 3.0445544554455446, "grad_norm": 0.7975438833236694, "learning_rate": 9.924858048932774e-05, "loss": 0.5214, "step": 12300 }, { "epoch": 3.047029702970297, "grad_norm": 0.8669601082801819, "learning_rate": 9.924619877575896e-05, "loss": 0.5197, "step": 12310 }, { "epoch": 3.0495049504950495, "grad_norm": 0.887656569480896, "learning_rate": 9.924381332226637e-05, "loss": 0.5204, "step": 12320 }, { "epoch": 3.051980198019802, "grad_norm": 0.8492504954338074, "learning_rate": 9.924142412903117e-05, "loss": 0.5202, "step": 12330 }, { "epoch": 3.0544554455445545, "grad_norm": 0.8411674499511719, "learning_rate": 9.923903119623474e-05, "loss": 0.5216, "step": 12340 }, { "epoch": 3.0569306930693068, "grad_norm": 0.8740456104278564, "learning_rate": 9.923663452405886e-05, "loss": 0.5244, "step": 12350 }, { "epoch": 3.0594059405940595, "grad_norm": 0.7813109755516052, "learning_rate": 9.923423411268552e-05, "loss": 0.5257, "step": 12360 }, { "epoch": 3.0618811881188117, "grad_norm": 0.8124046921730042, "learning_rate": 9.923182996229701e-05, "loss": 0.5263, "step": 12370 }, { "epoch": 3.0643564356435644, "grad_norm": 0.8672689199447632, "learning_rate": 9.92294220730759e-05, "loss": 0.5228, "step": 12380 }, { "epoch": 3.0668316831683167, "grad_norm": 0.9339288473129272, "learning_rate": 9.922701044520509e-05, "loss": 0.524, "step": 12390 }, { "epoch": 3.0693069306930694, "grad_norm": 0.9247320890426636, "learning_rate": 9.92245950788677e-05, "loss": 0.5257, "step": 12400 }, { "epoch": 3.0717821782178216, "grad_norm": 0.8094199895858765, "learning_rate": 9.922217597424717e-05, "loss": 0.5242, "step": 12410 }, { "epoch": 3.0742574257425743, "grad_norm": 0.8548868894577026, "learning_rate": 9.921975313152722e-05, "loss": 0.5219, "step": 12420 }, { "epoch": 3.0767326732673266, "grad_norm": 0.8256779909133911, "learning_rate": 9.921732655089183e-05, "loss": 0.5171, "step": 12430 }, { "epoch": 3.0792079207920793, "grad_norm": 0.8262947797775269, "learning_rate": 9.92148962325253e-05, "loss": 0.5227, "step": 12440 }, { "epoch": 3.0816831683168315, "grad_norm": 0.74909508228302, "learning_rate": 9.921246217661219e-05, "loss": 0.5256, "step": 12450 }, { "epoch": 3.0841584158415842, "grad_norm": 0.8288972973823547, "learning_rate": 9.921002438333736e-05, "loss": 0.5206, "step": 12460 }, { "epoch": 3.0866336633663365, "grad_norm": 0.8465849757194519, "learning_rate": 9.920758285288591e-05, "loss": 0.5184, "step": 12470 }, { "epoch": 3.089108910891089, "grad_norm": 0.7990097999572754, "learning_rate": 9.920513758544332e-05, "loss": 0.5241, "step": 12480 }, { "epoch": 3.0915841584158414, "grad_norm": 0.8176372051239014, "learning_rate": 9.920268858119524e-05, "loss": 0.5193, "step": 12490 }, { "epoch": 3.094059405940594, "grad_norm": 0.7906609773635864, "learning_rate": 9.920023584032767e-05, "loss": 0.52, "step": 12500 }, { "epoch": 3.0965346534653464, "grad_norm": 0.8430858254432678, "learning_rate": 9.919777936302688e-05, "loss": 0.5241, "step": 12510 }, { "epoch": 3.099009900990099, "grad_norm": 0.9833130836486816, "learning_rate": 9.919531914947942e-05, "loss": 0.5228, "step": 12520 }, { "epoch": 3.1014851485148514, "grad_norm": 0.9247949719429016, "learning_rate": 9.919285519987214e-05, "loss": 0.523, "step": 12530 }, { "epoch": 3.103960396039604, "grad_norm": 0.8464856743812561, "learning_rate": 9.919038751439215e-05, "loss": 0.5241, "step": 12540 }, { "epoch": 3.1064356435643563, "grad_norm": 0.9437419176101685, "learning_rate": 9.918791609322687e-05, "loss": 0.5156, "step": 12550 }, { "epoch": 3.108910891089109, "grad_norm": 0.8609942197799683, "learning_rate": 9.918544093656395e-05, "loss": 0.5195, "step": 12560 }, { "epoch": 3.1113861386138613, "grad_norm": 0.8109003305435181, "learning_rate": 9.918296204459138e-05, "loss": 0.5232, "step": 12570 }, { "epoch": 3.113861386138614, "grad_norm": 0.7994266152381897, "learning_rate": 9.918047941749745e-05, "loss": 0.5203, "step": 12580 }, { "epoch": 3.116336633663366, "grad_norm": 0.8056386709213257, "learning_rate": 9.917799305547066e-05, "loss": 0.5226, "step": 12590 }, { "epoch": 3.118811881188119, "grad_norm": 0.8670958876609802, "learning_rate": 9.917550295869983e-05, "loss": 0.5239, "step": 12600 }, { "epoch": 3.121287128712871, "grad_norm": 0.9582583904266357, "learning_rate": 9.91730091273741e-05, "loss": 0.5232, "step": 12610 }, { "epoch": 3.123762376237624, "grad_norm": 0.8221135139465332, "learning_rate": 9.917051156168284e-05, "loss": 0.5221, "step": 12620 }, { "epoch": 3.126237623762376, "grad_norm": 0.8055850267410278, "learning_rate": 9.916801026181573e-05, "loss": 0.5223, "step": 12630 }, { "epoch": 3.128712871287129, "grad_norm": 0.8928009867668152, "learning_rate": 9.916550522796271e-05, "loss": 0.5191, "step": 12640 }, { "epoch": 3.131188118811881, "grad_norm": 0.8138318657875061, "learning_rate": 9.916299646031403e-05, "loss": 0.5197, "step": 12650 }, { "epoch": 3.133663366336634, "grad_norm": 0.9675793647766113, "learning_rate": 9.916048395906021e-05, "loss": 0.5194, "step": 12660 }, { "epoch": 3.136138613861386, "grad_norm": 0.7971094846725464, "learning_rate": 9.915796772439207e-05, "loss": 0.5257, "step": 12670 }, { "epoch": 3.1386138613861387, "grad_norm": 0.8505246043205261, "learning_rate": 9.915544775650072e-05, "loss": 0.5263, "step": 12680 }, { "epoch": 3.141089108910891, "grad_norm": 0.8723838925361633, "learning_rate": 9.915292405557747e-05, "loss": 0.5186, "step": 12690 }, { "epoch": 3.1435643564356437, "grad_norm": 0.9236480593681335, "learning_rate": 9.915039662181404e-05, "loss": 0.5188, "step": 12700 }, { "epoch": 3.146039603960396, "grad_norm": 0.8082103729248047, "learning_rate": 9.914786545540236e-05, "loss": 0.521, "step": 12710 }, { "epoch": 3.1485148514851486, "grad_norm": 0.8004226088523865, "learning_rate": 9.914533055653463e-05, "loss": 0.5169, "step": 12720 }, { "epoch": 3.150990099009901, "grad_norm": 0.7826417088508606, "learning_rate": 9.91427919254034e-05, "loss": 0.5217, "step": 12730 }, { "epoch": 3.1534653465346536, "grad_norm": 0.8904594779014587, "learning_rate": 9.91402495622014e-05, "loss": 0.5227, "step": 12740 }, { "epoch": 3.155940594059406, "grad_norm": 0.7793117761611938, "learning_rate": 9.913770346712176e-05, "loss": 0.52, "step": 12750 }, { "epoch": 3.1584158415841586, "grad_norm": 0.926296055316925, "learning_rate": 9.913515364035783e-05, "loss": 0.5182, "step": 12760 }, { "epoch": 3.160891089108911, "grad_norm": 0.8755995035171509, "learning_rate": 9.913260008210323e-05, "loss": 0.5256, "step": 12770 }, { "epoch": 3.1633663366336635, "grad_norm": 0.8595930933952332, "learning_rate": 9.913004279255191e-05, "loss": 0.5271, "step": 12780 }, { "epoch": 3.1658415841584158, "grad_norm": 0.7809982895851135, "learning_rate": 9.912748177189808e-05, "loss": 0.5252, "step": 12790 }, { "epoch": 3.1683168316831685, "grad_norm": 0.8479055762290955, "learning_rate": 9.912491702033622e-05, "loss": 0.518, "step": 12800 }, { "epoch": 3.1707920792079207, "grad_norm": 0.947904109954834, "learning_rate": 9.912234853806109e-05, "loss": 0.5198, "step": 12810 }, { "epoch": 3.1732673267326734, "grad_norm": 0.8285653591156006, "learning_rate": 9.91197763252678e-05, "loss": 0.5216, "step": 12820 }, { "epoch": 3.1757425742574257, "grad_norm": 0.884183943271637, "learning_rate": 9.911720038215162e-05, "loss": 0.5201, "step": 12830 }, { "epoch": 3.1782178217821784, "grad_norm": 0.8235648274421692, "learning_rate": 9.911462070890824e-05, "loss": 0.519, "step": 12840 }, { "epoch": 3.1806930693069306, "grad_norm": 0.8053085207939148, "learning_rate": 9.911203730573353e-05, "loss": 0.5182, "step": 12850 }, { "epoch": 3.1831683168316833, "grad_norm": 0.7886745929718018, "learning_rate": 9.910945017282372e-05, "loss": 0.5163, "step": 12860 }, { "epoch": 3.1856435643564356, "grad_norm": 0.7914426326751709, "learning_rate": 9.910685931037524e-05, "loss": 0.5248, "step": 12870 }, { "epoch": 3.1881188118811883, "grad_norm": 0.9595016837120056, "learning_rate": 9.91042647185849e-05, "loss": 0.5255, "step": 12880 }, { "epoch": 3.1905940594059405, "grad_norm": 0.840107798576355, "learning_rate": 9.910166639764968e-05, "loss": 0.5173, "step": 12890 }, { "epoch": 3.1930693069306932, "grad_norm": 0.8726903796195984, "learning_rate": 9.909906434776696e-05, "loss": 0.5204, "step": 12900 }, { "epoch": 3.1955445544554455, "grad_norm": 0.8126251697540283, "learning_rate": 9.909645856913434e-05, "loss": 0.5184, "step": 12910 }, { "epoch": 3.198019801980198, "grad_norm": 0.9025110006332397, "learning_rate": 9.909384906194969e-05, "loss": 0.5257, "step": 12920 }, { "epoch": 3.2004950495049505, "grad_norm": 0.8399587273597717, "learning_rate": 9.909123582641119e-05, "loss": 0.5154, "step": 12930 }, { "epoch": 3.202970297029703, "grad_norm": 0.7937573790550232, "learning_rate": 9.90886188627173e-05, "loss": 0.5238, "step": 12940 }, { "epoch": 3.2054455445544554, "grad_norm": 0.8218724727630615, "learning_rate": 9.908599817106677e-05, "loss": 0.5226, "step": 12950 }, { "epoch": 3.207920792079208, "grad_norm": 0.9382994771003723, "learning_rate": 9.908337375165861e-05, "loss": 0.5222, "step": 12960 }, { "epoch": 3.2103960396039604, "grad_norm": 0.8417764902114868, "learning_rate": 9.908074560469216e-05, "loss": 0.525, "step": 12970 }, { "epoch": 3.212871287128713, "grad_norm": 0.8406091928482056, "learning_rate": 9.907811373036696e-05, "loss": 0.5215, "step": 12980 }, { "epoch": 3.2153465346534653, "grad_norm": 0.8208351731300354, "learning_rate": 9.907547812888292e-05, "loss": 0.5172, "step": 12990 }, { "epoch": 3.217821782178218, "grad_norm": 0.84403395652771, "learning_rate": 9.907283880044019e-05, "loss": 0.5209, "step": 13000 }, { "epoch": 3.2202970297029703, "grad_norm": 0.7766345143318176, "learning_rate": 9.907019574523919e-05, "loss": 0.5258, "step": 13010 }, { "epoch": 3.222772277227723, "grad_norm": 0.825298011302948, "learning_rate": 9.906754896348068e-05, "loss": 0.5234, "step": 13020 }, { "epoch": 3.2252475247524752, "grad_norm": 0.909394383430481, "learning_rate": 9.906489845536563e-05, "loss": 0.5221, "step": 13030 }, { "epoch": 3.227722772277228, "grad_norm": 0.8675062656402588, "learning_rate": 9.906224422109534e-05, "loss": 0.5182, "step": 13040 }, { "epoch": 3.23019801980198, "grad_norm": 0.8175813555717468, "learning_rate": 9.90595862608714e-05, "loss": 0.5183, "step": 13050 }, { "epoch": 3.232673267326733, "grad_norm": 0.995711624622345, "learning_rate": 9.905692457489564e-05, "loss": 0.5211, "step": 13060 }, { "epoch": 3.235148514851485, "grad_norm": 0.9167292714118958, "learning_rate": 9.905425916337018e-05, "loss": 0.5206, "step": 13070 }, { "epoch": 3.237623762376238, "grad_norm": 0.7904573082923889, "learning_rate": 9.90515900264975e-05, "loss": 0.5207, "step": 13080 }, { "epoch": 3.24009900990099, "grad_norm": 0.8350172638893127, "learning_rate": 9.904891716448027e-05, "loss": 0.5209, "step": 13090 }, { "epoch": 3.2425742574257423, "grad_norm": 0.8231693506240845, "learning_rate": 9.904624057752146e-05, "loss": 0.5227, "step": 13100 }, { "epoch": 3.245049504950495, "grad_norm": 0.8755106925964355, "learning_rate": 9.904356026582437e-05, "loss": 0.5236, "step": 13110 }, { "epoch": 3.2475247524752477, "grad_norm": 0.8029777407646179, "learning_rate": 9.904087622959253e-05, "loss": 0.5224, "step": 13120 }, { "epoch": 3.25, "grad_norm": 0.9193119406700134, "learning_rate": 9.903818846902977e-05, "loss": 0.5178, "step": 13130 }, { "epoch": 3.2524752475247523, "grad_norm": 0.8949933052062988, "learning_rate": 9.903549698434024e-05, "loss": 0.5223, "step": 13140 }, { "epoch": 3.254950495049505, "grad_norm": 0.893151044845581, "learning_rate": 9.903280177572831e-05, "loss": 0.5116, "step": 13150 }, { "epoch": 3.2574257425742577, "grad_norm": 0.9047921895980835, "learning_rate": 9.903010284339867e-05, "loss": 0.5219, "step": 13160 }, { "epoch": 3.25990099009901, "grad_norm": 0.8009589314460754, "learning_rate": 9.90274001875563e-05, "loss": 0.5192, "step": 13170 }, { "epoch": 3.262376237623762, "grad_norm": 0.8230291604995728, "learning_rate": 9.902469380840642e-05, "loss": 0.5198, "step": 13180 }, { "epoch": 3.264851485148515, "grad_norm": 0.7973464727401733, "learning_rate": 9.902198370615459e-05, "loss": 0.5181, "step": 13190 }, { "epoch": 3.2673267326732676, "grad_norm": 0.913739025592804, "learning_rate": 9.901926988100663e-05, "loss": 0.5187, "step": 13200 }, { "epoch": 3.26980198019802, "grad_norm": 0.8609355688095093, "learning_rate": 9.90165523331686e-05, "loss": 0.5193, "step": 13210 }, { "epoch": 3.272277227722772, "grad_norm": 0.7754203677177429, "learning_rate": 9.901383106284691e-05, "loss": 0.5206, "step": 13220 }, { "epoch": 3.2747524752475248, "grad_norm": 0.80146324634552, "learning_rate": 9.901110607024821e-05, "loss": 0.5179, "step": 13230 }, { "epoch": 3.2772277227722775, "grad_norm": 0.8137198686599731, "learning_rate": 9.900837735557947e-05, "loss": 0.5185, "step": 13240 }, { "epoch": 3.2797029702970297, "grad_norm": 0.779784619808197, "learning_rate": 9.900564491904787e-05, "loss": 0.5148, "step": 13250 }, { "epoch": 3.282178217821782, "grad_norm": 0.8761454820632935, "learning_rate": 9.900290876086095e-05, "loss": 0.5143, "step": 13260 }, { "epoch": 3.2846534653465347, "grad_norm": 0.8213475346565247, "learning_rate": 9.900016888122652e-05, "loss": 0.5171, "step": 13270 }, { "epoch": 3.287128712871287, "grad_norm": 0.7928522229194641, "learning_rate": 9.899742528035262e-05, "loss": 0.5181, "step": 13280 }, { "epoch": 3.2896039603960396, "grad_norm": 0.7773932218551636, "learning_rate": 9.899467795844764e-05, "loss": 0.5143, "step": 13290 }, { "epoch": 3.292079207920792, "grad_norm": 0.8709316253662109, "learning_rate": 9.899192691572019e-05, "loss": 0.5188, "step": 13300 }, { "epoch": 3.2945544554455446, "grad_norm": 0.9341078400611877, "learning_rate": 9.898917215237921e-05, "loss": 0.5195, "step": 13310 }, { "epoch": 3.297029702970297, "grad_norm": 0.9026022553443909, "learning_rate": 9.89864136686339e-05, "loss": 0.528, "step": 13320 }, { "epoch": 3.2995049504950495, "grad_norm": 0.8222607970237732, "learning_rate": 9.898365146469377e-05, "loss": 0.5106, "step": 13330 }, { "epoch": 3.301980198019802, "grad_norm": 0.8026601076126099, "learning_rate": 9.898088554076857e-05, "loss": 0.5175, "step": 13340 }, { "epoch": 3.3044554455445545, "grad_norm": 0.9106888175010681, "learning_rate": 9.897811589706836e-05, "loss": 0.518, "step": 13350 }, { "epoch": 3.3069306930693068, "grad_norm": 0.8387976288795471, "learning_rate": 9.897534253380347e-05, "loss": 0.5194, "step": 13360 }, { "epoch": 3.3094059405940595, "grad_norm": 0.8064348697662354, "learning_rate": 9.897256545118452e-05, "loss": 0.5152, "step": 13370 }, { "epoch": 3.3118811881188117, "grad_norm": 0.8441630601882935, "learning_rate": 9.896978464942243e-05, "loss": 0.5212, "step": 13380 }, { "epoch": 3.3143564356435644, "grad_norm": 0.8085663318634033, "learning_rate": 9.896700012872836e-05, "loss": 0.519, "step": 13390 }, { "epoch": 3.3168316831683167, "grad_norm": 0.8386408686637878, "learning_rate": 9.896421188931378e-05, "loss": 0.5122, "step": 13400 }, { "epoch": 3.3193069306930694, "grad_norm": 0.8972533941268921, "learning_rate": 9.896141993139046e-05, "loss": 0.5175, "step": 13410 }, { "epoch": 3.3217821782178216, "grad_norm": 0.8199843168258667, "learning_rate": 9.89586242551704e-05, "loss": 0.5139, "step": 13420 }, { "epoch": 3.3242574257425743, "grad_norm": 0.7814997434616089, "learning_rate": 9.895582486086592e-05, "loss": 0.5117, "step": 13430 }, { "epoch": 3.3267326732673266, "grad_norm": 0.7724425196647644, "learning_rate": 9.895302174868963e-05, "loss": 0.5183, "step": 13440 }, { "epoch": 3.3292079207920793, "grad_norm": 0.7991220355033875, "learning_rate": 9.89502149188544e-05, "loss": 0.5151, "step": 13450 }, { "epoch": 3.3316831683168315, "grad_norm": 0.8478954434394836, "learning_rate": 9.894740437157338e-05, "loss": 0.5187, "step": 13460 }, { "epoch": 3.3341584158415842, "grad_norm": 0.8076708316802979, "learning_rate": 9.894459010706003e-05, "loss": 0.5134, "step": 13470 }, { "epoch": 3.3366336633663365, "grad_norm": 0.812915563583374, "learning_rate": 9.894177212552808e-05, "loss": 0.5077, "step": 13480 }, { "epoch": 3.339108910891089, "grad_norm": 0.8146803975105286, "learning_rate": 9.89389504271915e-05, "loss": 0.5207, "step": 13490 }, { "epoch": 3.3415841584158414, "grad_norm": 0.8001635074615479, "learning_rate": 9.893612501226462e-05, "loss": 0.5227, "step": 13500 }, { "epoch": 3.344059405940594, "grad_norm": 0.8114627599716187, "learning_rate": 9.893329588096201e-05, "loss": 0.5199, "step": 13510 }, { "epoch": 3.3465346534653464, "grad_norm": 0.912162721157074, "learning_rate": 9.893046303349848e-05, "loss": 0.5154, "step": 13520 }, { "epoch": 3.349009900990099, "grad_norm": 0.7861186265945435, "learning_rate": 9.892762647008922e-05, "loss": 0.5149, "step": 13530 }, { "epoch": 3.3514851485148514, "grad_norm": 0.7595396637916565, "learning_rate": 9.89247861909496e-05, "loss": 0.5153, "step": 13540 }, { "epoch": 3.353960396039604, "grad_norm": 0.7769120335578918, "learning_rate": 9.892194219629536e-05, "loss": 0.5209, "step": 13550 }, { "epoch": 3.3564356435643563, "grad_norm": 0.8069387078285217, "learning_rate": 9.891909448634246e-05, "loss": 0.516, "step": 13560 }, { "epoch": 3.358910891089109, "grad_norm": 0.8443123698234558, "learning_rate": 9.891624306130718e-05, "loss": 0.518, "step": 13570 }, { "epoch": 3.3613861386138613, "grad_norm": 0.9356722235679626, "learning_rate": 9.891338792140605e-05, "loss": 0.5149, "step": 13580 }, { "epoch": 3.363861386138614, "grad_norm": 0.8312118649482727, "learning_rate": 9.891052906685592e-05, "loss": 0.5159, "step": 13590 }, { "epoch": 3.366336633663366, "grad_norm": 0.7942307591438293, "learning_rate": 9.890766649787388e-05, "loss": 0.5184, "step": 13600 }, { "epoch": 3.368811881188119, "grad_norm": 0.8798043727874756, "learning_rate": 9.890480021467733e-05, "loss": 0.5189, "step": 13610 }, { "epoch": 3.371287128712871, "grad_norm": 0.8830194473266602, "learning_rate": 9.890193021748395e-05, "loss": 0.5149, "step": 13620 }, { "epoch": 3.373762376237624, "grad_norm": 0.804185688495636, "learning_rate": 9.88990565065117e-05, "loss": 0.5187, "step": 13630 }, { "epoch": 3.376237623762376, "grad_norm": 0.8310927152633667, "learning_rate": 9.88961790819788e-05, "loss": 0.5152, "step": 13640 }, { "epoch": 3.378712871287129, "grad_norm": 0.789948582649231, "learning_rate": 9.88932979441038e-05, "loss": 0.5196, "step": 13650 }, { "epoch": 3.381188118811881, "grad_norm": 0.8170382380485535, "learning_rate": 9.889041309310548e-05, "loss": 0.5142, "step": 13660 }, { "epoch": 3.383663366336634, "grad_norm": 0.7833046317100525, "learning_rate": 9.888752452920294e-05, "loss": 0.5174, "step": 13670 }, { "epoch": 3.386138613861386, "grad_norm": 1.0675151348114014, "learning_rate": 9.888463225261553e-05, "loss": 0.5168, "step": 13680 }, { "epoch": 3.3886138613861387, "grad_norm": 0.8534442782402039, "learning_rate": 9.888173626356293e-05, "loss": 0.5113, "step": 13690 }, { "epoch": 3.391089108910891, "grad_norm": 0.8413082957267761, "learning_rate": 9.887883656226504e-05, "loss": 0.5119, "step": 13700 }, { "epoch": 3.3935643564356437, "grad_norm": 0.8302295207977295, "learning_rate": 9.887593314894207e-05, "loss": 0.512, "step": 13710 }, { "epoch": 3.396039603960396, "grad_norm": 0.8063073754310608, "learning_rate": 9.887302602381456e-05, "loss": 0.5194, "step": 13720 }, { "epoch": 3.3985148514851486, "grad_norm": 0.8712301850318909, "learning_rate": 9.887011518710326e-05, "loss": 0.5162, "step": 13730 }, { "epoch": 3.400990099009901, "grad_norm": 0.8064782023429871, "learning_rate": 9.88672006390292e-05, "loss": 0.5143, "step": 13740 }, { "epoch": 3.4034653465346536, "grad_norm": 0.7638493180274963, "learning_rate": 9.886428237981375e-05, "loss": 0.5171, "step": 13750 }, { "epoch": 3.405940594059406, "grad_norm": 0.7575046420097351, "learning_rate": 9.886136040967854e-05, "loss": 0.5144, "step": 13760 }, { "epoch": 3.4084158415841586, "grad_norm": 0.7847615480422974, "learning_rate": 9.885843472884546e-05, "loss": 0.5227, "step": 13770 }, { "epoch": 3.410891089108911, "grad_norm": 0.8216129541397095, "learning_rate": 9.885550533753671e-05, "loss": 0.514, "step": 13780 }, { "epoch": 3.4133663366336635, "grad_norm": 0.8046090602874756, "learning_rate": 9.885257223597473e-05, "loss": 0.5214, "step": 13790 }, { "epoch": 3.4158415841584158, "grad_norm": 0.804520308971405, "learning_rate": 9.88496354243823e-05, "loss": 0.5168, "step": 13800 }, { "epoch": 3.4183168316831685, "grad_norm": 0.8101379871368408, "learning_rate": 9.884669490298244e-05, "loss": 0.5123, "step": 13810 }, { "epoch": 3.4207920792079207, "grad_norm": 0.822715699672699, "learning_rate": 9.884375067199845e-05, "loss": 0.5123, "step": 13820 }, { "epoch": 3.4232673267326734, "grad_norm": 0.777888298034668, "learning_rate": 9.884080273165394e-05, "loss": 0.5117, "step": 13830 }, { "epoch": 3.4257425742574257, "grad_norm": 0.8746023178100586, "learning_rate": 9.883785108217281e-05, "loss": 0.5151, "step": 13840 }, { "epoch": 3.4282178217821784, "grad_norm": 1.1394941806793213, "learning_rate": 9.883489572377916e-05, "loss": 0.5144, "step": 13850 }, { "epoch": 3.4306930693069306, "grad_norm": 0.8479703068733215, "learning_rate": 9.883193665669748e-05, "loss": 0.5189, "step": 13860 }, { "epoch": 3.4331683168316833, "grad_norm": 0.8376144170761108, "learning_rate": 9.882897388115246e-05, "loss": 0.5113, "step": 13870 }, { "epoch": 3.4356435643564356, "grad_norm": 0.8275471329689026, "learning_rate": 9.882600739736913e-05, "loss": 0.5127, "step": 13880 }, { "epoch": 3.4381188118811883, "grad_norm": 0.7731739282608032, "learning_rate": 9.882303720557276e-05, "loss": 0.5106, "step": 13890 }, { "epoch": 3.4405940594059405, "grad_norm": 0.7709509134292603, "learning_rate": 9.88200633059889e-05, "loss": 0.5112, "step": 13900 }, { "epoch": 3.4430693069306932, "grad_norm": 0.8207659125328064, "learning_rate": 9.881708569884343e-05, "loss": 0.5084, "step": 13910 }, { "epoch": 3.4455445544554455, "grad_norm": 0.8314772248268127, "learning_rate": 9.881410438436247e-05, "loss": 0.5152, "step": 13920 }, { "epoch": 3.448019801980198, "grad_norm": 0.8237103819847107, "learning_rate": 9.881111936277243e-05, "loss": 0.5162, "step": 13930 }, { "epoch": 3.4504950495049505, "grad_norm": 0.7820044159889221, "learning_rate": 9.880813063429999e-05, "loss": 0.5074, "step": 13940 }, { "epoch": 3.452970297029703, "grad_norm": 0.8339124321937561, "learning_rate": 9.880513819917214e-05, "loss": 0.5155, "step": 13950 }, { "epoch": 3.4554455445544554, "grad_norm": 0.8136736154556274, "learning_rate": 9.880214205761613e-05, "loss": 0.5138, "step": 13960 }, { "epoch": 3.457920792079208, "grad_norm": 0.8585912585258484, "learning_rate": 9.879914220985949e-05, "loss": 0.5178, "step": 13970 }, { "epoch": 3.4603960396039604, "grad_norm": 0.8436213135719299, "learning_rate": 9.879613865613004e-05, "loss": 0.518, "step": 13980 }, { "epoch": 3.462871287128713, "grad_norm": 0.7950912117958069, "learning_rate": 9.879313139665589e-05, "loss": 0.5156, "step": 13990 }, { "epoch": 3.4653465346534653, "grad_norm": 0.8326154947280884, "learning_rate": 9.879012043166542e-05, "loss": 0.5147, "step": 14000 }, { "epoch": 3.467821782178218, "grad_norm": 0.8144271373748779, "learning_rate": 9.878710576138729e-05, "loss": 0.5131, "step": 14010 }, { "epoch": 3.4702970297029703, "grad_norm": 0.9211321473121643, "learning_rate": 9.878408738605044e-05, "loss": 0.514, "step": 14020 }, { "epoch": 3.4727722772277225, "grad_norm": 0.8560377359390259, "learning_rate": 9.878106530588413e-05, "loss": 0.5187, "step": 14030 }, { "epoch": 3.4752475247524752, "grad_norm": 0.8790581226348877, "learning_rate": 9.87780395211178e-05, "loss": 0.5186, "step": 14040 }, { "epoch": 3.477722772277228, "grad_norm": 0.7705204486846924, "learning_rate": 9.877501003198128e-05, "loss": 0.5244, "step": 14050 }, { "epoch": 3.48019801980198, "grad_norm": 0.8808408379554749, "learning_rate": 9.877197683870464e-05, "loss": 0.515, "step": 14060 }, { "epoch": 3.4826732673267324, "grad_norm": 0.8766692876815796, "learning_rate": 9.876893994151824e-05, "loss": 0.5079, "step": 14070 }, { "epoch": 3.485148514851485, "grad_norm": 0.7536056637763977, "learning_rate": 9.87658993406527e-05, "loss": 0.5097, "step": 14080 }, { "epoch": 3.487623762376238, "grad_norm": 0.8761548399925232, "learning_rate": 9.87628550363389e-05, "loss": 0.519, "step": 14090 }, { "epoch": 3.49009900990099, "grad_norm": 0.8463135957717896, "learning_rate": 9.875980702880809e-05, "loss": 0.5144, "step": 14100 }, { "epoch": 3.4925742574257423, "grad_norm": 0.7617613673210144, "learning_rate": 9.875675531829173e-05, "loss": 0.5133, "step": 14110 }, { "epoch": 3.495049504950495, "grad_norm": 0.8069170713424683, "learning_rate": 9.875369990502158e-05, "loss": 0.5103, "step": 14120 }, { "epoch": 3.4975247524752477, "grad_norm": 0.8384374380111694, "learning_rate": 9.875064078922962e-05, "loss": 0.5128, "step": 14130 }, { "epoch": 3.5, "grad_norm": 0.8153098225593567, "learning_rate": 9.874757797114826e-05, "loss": 0.5117, "step": 14140 }, { "epoch": 3.5024752475247523, "grad_norm": 0.80722975730896, "learning_rate": 9.874451145101004e-05, "loss": 0.5187, "step": 14150 }, { "epoch": 3.504950495049505, "grad_norm": 0.8213874101638794, "learning_rate": 9.874144122904787e-05, "loss": 0.5106, "step": 14160 }, { "epoch": 3.5074257425742577, "grad_norm": 0.8141851425170898, "learning_rate": 9.87383673054949e-05, "loss": 0.5158, "step": 14170 }, { "epoch": 3.50990099009901, "grad_norm": 0.8635473847389221, "learning_rate": 9.873528968058458e-05, "loss": 0.5111, "step": 14180 }, { "epoch": 3.512376237623762, "grad_norm": 0.8235127925872803, "learning_rate": 9.873220835455064e-05, "loss": 0.5141, "step": 14190 }, { "epoch": 3.514851485148515, "grad_norm": 0.7821812033653259, "learning_rate": 9.872912332762706e-05, "loss": 0.5112, "step": 14200 }, { "epoch": 3.5173267326732676, "grad_norm": 0.8235549926757812, "learning_rate": 9.872603460004817e-05, "loss": 0.5173, "step": 14210 }, { "epoch": 3.51980198019802, "grad_norm": 0.8143296837806702, "learning_rate": 9.87229421720485e-05, "loss": 0.516, "step": 14220 }, { "epoch": 3.522277227722772, "grad_norm": 0.7821813225746155, "learning_rate": 9.871984604386293e-05, "loss": 0.5139, "step": 14230 }, { "epoch": 3.5247524752475248, "grad_norm": 0.7855284214019775, "learning_rate": 9.871674621572656e-05, "loss": 0.513, "step": 14240 }, { "epoch": 3.5272277227722775, "grad_norm": 0.823883593082428, "learning_rate": 9.871364268787483e-05, "loss": 0.5119, "step": 14250 }, { "epoch": 3.5297029702970297, "grad_norm": 0.797492504119873, "learning_rate": 9.871053546054341e-05, "loss": 0.5155, "step": 14260 }, { "epoch": 3.532178217821782, "grad_norm": 0.8020750880241394, "learning_rate": 9.870742453396828e-05, "loss": 0.5099, "step": 14270 }, { "epoch": 3.5346534653465347, "grad_norm": 0.7983999848365784, "learning_rate": 9.870430990838572e-05, "loss": 0.5137, "step": 14280 }, { "epoch": 3.5371287128712874, "grad_norm": 0.7925062775611877, "learning_rate": 9.870119158403222e-05, "loss": 0.5143, "step": 14290 }, { "epoch": 3.5396039603960396, "grad_norm": 0.8460506796836853, "learning_rate": 9.869806956114464e-05, "loss": 0.5147, "step": 14300 }, { "epoch": 3.542079207920792, "grad_norm": 0.7919933199882507, "learning_rate": 9.869494383996005e-05, "loss": 0.5161, "step": 14310 }, { "epoch": 3.5445544554455446, "grad_norm": 0.8763142824172974, "learning_rate": 9.869181442071582e-05, "loss": 0.5165, "step": 14320 }, { "epoch": 3.5470297029702973, "grad_norm": 0.7377356290817261, "learning_rate": 9.868868130364963e-05, "loss": 0.5162, "step": 14330 }, { "epoch": 3.5495049504950495, "grad_norm": 0.767162024974823, "learning_rate": 9.868554448899941e-05, "loss": 0.5137, "step": 14340 }, { "epoch": 3.551980198019802, "grad_norm": 0.7668728828430176, "learning_rate": 9.86824039770034e-05, "loss": 0.5147, "step": 14350 }, { "epoch": 3.5544554455445545, "grad_norm": 0.7514689564704895, "learning_rate": 9.867925976790007e-05, "loss": 0.5122, "step": 14360 }, { "epoch": 3.556930693069307, "grad_norm": 0.8003139495849609, "learning_rate": 9.86761118619282e-05, "loss": 0.5121, "step": 14370 }, { "epoch": 3.5594059405940595, "grad_norm": 0.7849332094192505, "learning_rate": 9.867296025932688e-05, "loss": 0.5174, "step": 14380 }, { "epoch": 3.5618811881188117, "grad_norm": 0.8908095359802246, "learning_rate": 9.866980496033546e-05, "loss": 0.5101, "step": 14390 }, { "epoch": 3.5643564356435644, "grad_norm": 0.8058547377586365, "learning_rate": 9.866664596519353e-05, "loss": 0.5178, "step": 14400 }, { "epoch": 3.5668316831683167, "grad_norm": 0.8069846034049988, "learning_rate": 9.866348327414102e-05, "loss": 0.5123, "step": 14410 }, { "epoch": 3.5693069306930694, "grad_norm": 0.8300783634185791, "learning_rate": 9.86603168874181e-05, "loss": 0.5156, "step": 14420 }, { "epoch": 3.5717821782178216, "grad_norm": 0.8116902112960815, "learning_rate": 9.865714680526524e-05, "loss": 0.5196, "step": 14430 }, { "epoch": 3.5742574257425743, "grad_norm": 0.842159628868103, "learning_rate": 9.865397302792319e-05, "loss": 0.5132, "step": 14440 }, { "epoch": 3.5767326732673266, "grad_norm": 0.7656996846199036, "learning_rate": 9.865079555563298e-05, "loss": 0.5118, "step": 14450 }, { "epoch": 3.5792079207920793, "grad_norm": 0.7732524275779724, "learning_rate": 9.86476143886359e-05, "loss": 0.5154, "step": 14460 }, { "epoch": 3.5816831683168315, "grad_norm": 0.796696126461029, "learning_rate": 9.864442952717358e-05, "loss": 0.513, "step": 14470 }, { "epoch": 3.5841584158415842, "grad_norm": 0.8954437971115112, "learning_rate": 9.864124097148785e-05, "loss": 0.5107, "step": 14480 }, { "epoch": 3.5866336633663365, "grad_norm": 0.8309118151664734, "learning_rate": 9.863804872182088e-05, "loss": 0.5146, "step": 14490 }, { "epoch": 3.589108910891089, "grad_norm": 0.7606772184371948, "learning_rate": 9.863485277841508e-05, "loss": 0.5143, "step": 14500 }, { "epoch": 3.5915841584158414, "grad_norm": 0.7757837772369385, "learning_rate": 9.863165314151318e-05, "loss": 0.5123, "step": 14510 }, { "epoch": 3.594059405940594, "grad_norm": 0.823287308216095, "learning_rate": 9.862844981135814e-05, "loss": 0.5155, "step": 14520 }, { "epoch": 3.5965346534653464, "grad_norm": 0.7751653790473938, "learning_rate": 9.862524278819329e-05, "loss": 0.5132, "step": 14530 }, { "epoch": 3.599009900990099, "grad_norm": 0.8309884071350098, "learning_rate": 9.862203207226216e-05, "loss": 0.5094, "step": 14540 }, { "epoch": 3.6014851485148514, "grad_norm": 0.8729249835014343, "learning_rate": 9.861881766380854e-05, "loss": 0.514, "step": 14550 }, { "epoch": 3.603960396039604, "grad_norm": 0.7819044589996338, "learning_rate": 9.861559956307657e-05, "loss": 0.5145, "step": 14560 }, { "epoch": 3.6064356435643563, "grad_norm": 0.7459197640419006, "learning_rate": 9.861237777031068e-05, "loss": 0.5089, "step": 14570 }, { "epoch": 3.608910891089109, "grad_norm": 0.7860618233680725, "learning_rate": 9.860915228575549e-05, "loss": 0.5142, "step": 14580 }, { "epoch": 3.6113861386138613, "grad_norm": 0.7934134602546692, "learning_rate": 9.860592310965597e-05, "loss": 0.5151, "step": 14590 }, { "epoch": 3.613861386138614, "grad_norm": 0.8125261664390564, "learning_rate": 9.86026902422574e-05, "loss": 0.5131, "step": 14600 }, { "epoch": 3.616336633663366, "grad_norm": 0.7611857056617737, "learning_rate": 9.859945368380521e-05, "loss": 0.5122, "step": 14610 }, { "epoch": 3.618811881188119, "grad_norm": 0.7994186878204346, "learning_rate": 9.859621343454525e-05, "loss": 0.5131, "step": 14620 }, { "epoch": 3.621287128712871, "grad_norm": 0.7809689044952393, "learning_rate": 9.85929694947236e-05, "loss": 0.5071, "step": 14630 }, { "epoch": 3.623762376237624, "grad_norm": 0.828849196434021, "learning_rate": 9.858972186458662e-05, "loss": 0.52, "step": 14640 }, { "epoch": 3.626237623762376, "grad_norm": 0.8019101023674011, "learning_rate": 9.85864705443809e-05, "loss": 0.5115, "step": 14650 }, { "epoch": 3.628712871287129, "grad_norm": 0.8290508389472961, "learning_rate": 9.858321553435339e-05, "loss": 0.5124, "step": 14660 }, { "epoch": 3.631188118811881, "grad_norm": 0.7994346618652344, "learning_rate": 9.857995683475129e-05, "loss": 0.5095, "step": 14670 }, { "epoch": 3.633663366336634, "grad_norm": 0.8237411975860596, "learning_rate": 9.857669444582205e-05, "loss": 0.5163, "step": 14680 }, { "epoch": 3.636138613861386, "grad_norm": 0.7806293368339539, "learning_rate": 9.857342836781346e-05, "loss": 0.5141, "step": 14690 }, { "epoch": 3.6386138613861387, "grad_norm": 0.7854220867156982, "learning_rate": 9.857015860097354e-05, "loss": 0.513, "step": 14700 }, { "epoch": 3.641089108910891, "grad_norm": 0.7879302501678467, "learning_rate": 9.856688514555061e-05, "loss": 0.5101, "step": 14710 }, { "epoch": 3.6435643564356437, "grad_norm": 0.7784743309020996, "learning_rate": 9.856360800179328e-05, "loss": 0.5167, "step": 14720 }, { "epoch": 3.646039603960396, "grad_norm": 0.8170332312583923, "learning_rate": 9.856032716995038e-05, "loss": 0.5042, "step": 14730 }, { "epoch": 3.6485148514851486, "grad_norm": 0.7615020275115967, "learning_rate": 9.855704265027113e-05, "loss": 0.507, "step": 14740 }, { "epoch": 3.650990099009901, "grad_norm": 0.8000773191452026, "learning_rate": 9.855375444300494e-05, "loss": 0.5176, "step": 14750 }, { "epoch": 3.6534653465346536, "grad_norm": 0.8017517924308777, "learning_rate": 9.855046254840151e-05, "loss": 0.5106, "step": 14760 }, { "epoch": 3.655940594059406, "grad_norm": 0.7896542549133301, "learning_rate": 9.854716696671087e-05, "loss": 0.5192, "step": 14770 }, { "epoch": 3.6584158415841586, "grad_norm": 0.7846716642379761, "learning_rate": 9.854386769818326e-05, "loss": 0.5147, "step": 14780 }, { "epoch": 3.660891089108911, "grad_norm": 0.8460705280303955, "learning_rate": 9.854056474306929e-05, "loss": 0.5045, "step": 14790 }, { "epoch": 3.6633663366336635, "grad_norm": 0.7572702169418335, "learning_rate": 9.853725810161976e-05, "loss": 0.5154, "step": 14800 }, { "epoch": 3.6658415841584158, "grad_norm": 0.8557485938072205, "learning_rate": 9.853394777408578e-05, "loss": 0.5123, "step": 14810 }, { "epoch": 3.6683168316831685, "grad_norm": 0.8067054748535156, "learning_rate": 9.853063376071876e-05, "loss": 0.5077, "step": 14820 }, { "epoch": 3.6707920792079207, "grad_norm": 0.824467658996582, "learning_rate": 9.85273160617704e-05, "loss": 0.5143, "step": 14830 }, { "epoch": 3.6732673267326734, "grad_norm": 0.8061984777450562, "learning_rate": 9.852399467749263e-05, "loss": 0.5126, "step": 14840 }, { "epoch": 3.6757425742574257, "grad_norm": 0.7683075666427612, "learning_rate": 9.852066960813768e-05, "loss": 0.5122, "step": 14850 }, { "epoch": 3.6782178217821784, "grad_norm": 0.8543092608451843, "learning_rate": 9.85173408539581e-05, "loss": 0.5114, "step": 14860 }, { "epoch": 3.6806930693069306, "grad_norm": 0.7859283685684204, "learning_rate": 9.851400841520667e-05, "loss": 0.5125, "step": 14870 }, { "epoch": 3.6831683168316833, "grad_norm": 0.7361510992050171, "learning_rate": 9.851067229213646e-05, "loss": 0.5056, "step": 14880 }, { "epoch": 3.6856435643564356, "grad_norm": 0.7825437188148499, "learning_rate": 9.850733248500083e-05, "loss": 0.5072, "step": 14890 }, { "epoch": 3.6881188118811883, "grad_norm": 0.9069567322731018, "learning_rate": 9.850398899405342e-05, "loss": 0.5135, "step": 14900 }, { "epoch": 3.6905940594059405, "grad_norm": 0.8282741904258728, "learning_rate": 9.850064181954815e-05, "loss": 0.5136, "step": 14910 }, { "epoch": 3.693069306930693, "grad_norm": 0.7547427415847778, "learning_rate": 9.84972909617392e-05, "loss": 0.513, "step": 14920 }, { "epoch": 3.6955445544554455, "grad_norm": 0.7405969500541687, "learning_rate": 9.849393642088106e-05, "loss": 0.5096, "step": 14930 }, { "epoch": 3.698019801980198, "grad_norm": 0.7999070882797241, "learning_rate": 9.849057819722847e-05, "loss": 0.5104, "step": 14940 }, { "epoch": 3.7004950495049505, "grad_norm": 1.0641473531723022, "learning_rate": 9.84872162910365e-05, "loss": 0.5177, "step": 14950 }, { "epoch": 3.7029702970297027, "grad_norm": 0.8301922082901001, "learning_rate": 9.848385070256042e-05, "loss": 0.5167, "step": 14960 }, { "epoch": 3.7054455445544554, "grad_norm": 0.7647877335548401, "learning_rate": 9.848048143205586e-05, "loss": 0.5125, "step": 14970 }, { "epoch": 3.707920792079208, "grad_norm": 0.7459143996238708, "learning_rate": 9.847710847977867e-05, "loss": 0.5079, "step": 14980 }, { "epoch": 3.7103960396039604, "grad_norm": 0.7648584842681885, "learning_rate": 9.847373184598501e-05, "loss": 0.516, "step": 14990 }, { "epoch": 3.7128712871287126, "grad_norm": 0.840184211730957, "learning_rate": 9.847035153093131e-05, "loss": 0.5112, "step": 15000 }, { "epoch": 3.7153465346534653, "grad_norm": 0.8261896371841431, "learning_rate": 9.84669675348743e-05, "loss": 0.513, "step": 15010 }, { "epoch": 3.717821782178218, "grad_norm": 0.7661440968513489, "learning_rate": 9.846357985807095e-05, "loss": 0.5157, "step": 15020 }, { "epoch": 3.7202970297029703, "grad_norm": 0.8120524287223816, "learning_rate": 9.846018850077856e-05, "loss": 0.5136, "step": 15030 }, { "epoch": 3.7227722772277225, "grad_norm": 0.752120852470398, "learning_rate": 9.845679346325465e-05, "loss": 0.5119, "step": 15040 }, { "epoch": 3.7252475247524752, "grad_norm": 0.7783523797988892, "learning_rate": 9.845339474575705e-05, "loss": 0.5118, "step": 15050 }, { "epoch": 3.727722772277228, "grad_norm": 0.8521688580513, "learning_rate": 9.84499923485439e-05, "loss": 0.5068, "step": 15060 }, { "epoch": 3.73019801980198, "grad_norm": 0.754096508026123, "learning_rate": 9.844658627187358e-05, "loss": 0.5071, "step": 15070 }, { "epoch": 3.7326732673267324, "grad_norm": 0.802636444568634, "learning_rate": 9.844317651600474e-05, "loss": 0.5161, "step": 15080 }, { "epoch": 3.735148514851485, "grad_norm": 0.7901771664619446, "learning_rate": 9.843976308119633e-05, "loss": 0.5074, "step": 15090 }, { "epoch": 3.737623762376238, "grad_norm": 0.817596971988678, "learning_rate": 9.843634596770761e-05, "loss": 0.5086, "step": 15100 }, { "epoch": 3.74009900990099, "grad_norm": 0.8374320268630981, "learning_rate": 9.843292517579806e-05, "loss": 0.5076, "step": 15110 }, { "epoch": 3.7425742574257423, "grad_norm": 0.8674448728561401, "learning_rate": 9.842950070572748e-05, "loss": 0.5116, "step": 15120 }, { "epoch": 3.745049504950495, "grad_norm": 0.7567639350891113, "learning_rate": 9.842607255775591e-05, "loss": 0.5091, "step": 15130 }, { "epoch": 3.7475247524752477, "grad_norm": 0.7541729807853699, "learning_rate": 9.842264073214371e-05, "loss": 0.511, "step": 15140 }, { "epoch": 3.75, "grad_norm": 0.7724788784980774, "learning_rate": 9.841920522915154e-05, "loss": 0.5095, "step": 15150 }, { "epoch": 3.7524752475247523, "grad_norm": 0.7412953972816467, "learning_rate": 9.841576604904024e-05, "loss": 0.5119, "step": 15160 }, { "epoch": 3.754950495049505, "grad_norm": 0.7381254434585571, "learning_rate": 9.841232319207103e-05, "loss": 0.5099, "step": 15170 }, { "epoch": 3.7574257425742577, "grad_norm": 0.8970340490341187, "learning_rate": 9.840887665850538e-05, "loss": 0.5081, "step": 15180 }, { "epoch": 3.75990099009901, "grad_norm": 0.7971183061599731, "learning_rate": 9.8405426448605e-05, "loss": 0.5133, "step": 15190 }, { "epoch": 3.762376237623762, "grad_norm": 0.8824784159660339, "learning_rate": 9.840197256263195e-05, "loss": 0.5128, "step": 15200 }, { "epoch": 3.764851485148515, "grad_norm": 0.782502293586731, "learning_rate": 9.83985150008485e-05, "loss": 0.5128, "step": 15210 }, { "epoch": 3.7673267326732676, "grad_norm": 0.736157238483429, "learning_rate": 9.839505376351725e-05, "loss": 0.5048, "step": 15220 }, { "epoch": 3.76980198019802, "grad_norm": 0.8293583989143372, "learning_rate": 9.839158885090102e-05, "loss": 0.5106, "step": 15230 }, { "epoch": 3.772277227722772, "grad_norm": 0.8032740354537964, "learning_rate": 9.838812026326298e-05, "loss": 0.5122, "step": 15240 }, { "epoch": 3.7747524752475248, "grad_norm": 0.7770896553993225, "learning_rate": 9.838464800086655e-05, "loss": 0.5121, "step": 15250 }, { "epoch": 3.7772277227722775, "grad_norm": 0.7731147408485413, "learning_rate": 9.838117206397542e-05, "loss": 0.5176, "step": 15260 }, { "epoch": 3.7797029702970297, "grad_norm": 0.7982733845710754, "learning_rate": 9.837769245285356e-05, "loss": 0.5077, "step": 15270 }, { "epoch": 3.782178217821782, "grad_norm": 0.8317473530769348, "learning_rate": 9.837420916776521e-05, "loss": 0.5109, "step": 15280 }, { "epoch": 3.7846534653465347, "grad_norm": 0.7988355755805969, "learning_rate": 9.837072220897492e-05, "loss": 0.5116, "step": 15290 }, { "epoch": 3.7871287128712874, "grad_norm": 0.7806276082992554, "learning_rate": 9.83672315767475e-05, "loss": 0.5221, "step": 15300 }, { "epoch": 3.7896039603960396, "grad_norm": 0.797279953956604, "learning_rate": 9.836373727134805e-05, "loss": 0.5141, "step": 15310 }, { "epoch": 3.792079207920792, "grad_norm": 0.8234958052635193, "learning_rate": 9.83602392930419e-05, "loss": 0.5094, "step": 15320 }, { "epoch": 3.7945544554455446, "grad_norm": 0.7974276542663574, "learning_rate": 9.835673764209474e-05, "loss": 0.5128, "step": 15330 }, { "epoch": 3.7970297029702973, "grad_norm": 0.7671923637390137, "learning_rate": 9.835323231877248e-05, "loss": 0.507, "step": 15340 }, { "epoch": 3.7995049504950495, "grad_norm": 0.7795227766036987, "learning_rate": 9.834972332334133e-05, "loss": 0.5129, "step": 15350 }, { "epoch": 3.801980198019802, "grad_norm": 0.7699807286262512, "learning_rate": 9.834621065606777e-05, "loss": 0.5036, "step": 15360 }, { "epoch": 3.8044554455445545, "grad_norm": 0.7866533398628235, "learning_rate": 9.834269431721859e-05, "loss": 0.5062, "step": 15370 }, { "epoch": 3.806930693069307, "grad_norm": 0.8210740685462952, "learning_rate": 9.833917430706079e-05, "loss": 0.5064, "step": 15380 }, { "epoch": 3.8094059405940595, "grad_norm": 0.7422927021980286, "learning_rate": 9.833565062586173e-05, "loss": 0.5103, "step": 15390 }, { "epoch": 3.8118811881188117, "grad_norm": 0.8793237209320068, "learning_rate": 9.833212327388896e-05, "loss": 0.5127, "step": 15400 }, { "epoch": 3.8143564356435644, "grad_norm": 0.7755078077316284, "learning_rate": 9.832859225141041e-05, "loss": 0.5106, "step": 15410 }, { "epoch": 3.8168316831683167, "grad_norm": 0.8411746025085449, "learning_rate": 9.832505755869421e-05, "loss": 0.5105, "step": 15420 }, { "epoch": 3.8193069306930694, "grad_norm": 0.7636188268661499, "learning_rate": 9.832151919600883e-05, "loss": 0.5051, "step": 15430 }, { "epoch": 3.8217821782178216, "grad_norm": 0.8035736680030823, "learning_rate": 9.831797716362294e-05, "loss": 0.5076, "step": 15440 }, { "epoch": 3.8242574257425743, "grad_norm": 0.7816355228424072, "learning_rate": 9.831443146180557e-05, "loss": 0.5063, "step": 15450 }, { "epoch": 3.8267326732673266, "grad_norm": 0.7970533967018127, "learning_rate": 9.831088209082596e-05, "loss": 0.5124, "step": 15460 }, { "epoch": 3.8292079207920793, "grad_norm": 0.7839850187301636, "learning_rate": 9.83073290509537e-05, "loss": 0.5118, "step": 15470 }, { "epoch": 3.8316831683168315, "grad_norm": 0.7450287938117981, "learning_rate": 9.830377234245858e-05, "loss": 0.506, "step": 15480 }, { "epoch": 3.8341584158415842, "grad_norm": 0.7598549127578735, "learning_rate": 9.830021196561074e-05, "loss": 0.5114, "step": 15490 }, { "epoch": 3.8366336633663365, "grad_norm": 0.7857577800750732, "learning_rate": 9.829664792068055e-05, "loss": 0.513, "step": 15500 }, { "epoch": 3.839108910891089, "grad_norm": 0.7275547385215759, "learning_rate": 9.829308020793868e-05, "loss": 0.5076, "step": 15510 }, { "epoch": 3.8415841584158414, "grad_norm": 0.8122267127037048, "learning_rate": 9.828950882765608e-05, "loss": 0.5106, "step": 15520 }, { "epoch": 3.844059405940594, "grad_norm": 0.7314027547836304, "learning_rate": 9.828593378010395e-05, "loss": 0.5123, "step": 15530 }, { "epoch": 3.8465346534653464, "grad_norm": 0.7362180352210999, "learning_rate": 9.828235506555383e-05, "loss": 0.507, "step": 15540 }, { "epoch": 3.849009900990099, "grad_norm": 0.7254633903503418, "learning_rate": 9.827877268427748e-05, "loss": 0.5079, "step": 15550 }, { "epoch": 3.8514851485148514, "grad_norm": 0.8346689939498901, "learning_rate": 9.827518663654695e-05, "loss": 0.5087, "step": 15560 }, { "epoch": 3.853960396039604, "grad_norm": 0.7881568670272827, "learning_rate": 9.827159692263457e-05, "loss": 0.508, "step": 15570 }, { "epoch": 3.8564356435643563, "grad_norm": 0.8398730158805847, "learning_rate": 9.826800354281298e-05, "loss": 0.51, "step": 15580 }, { "epoch": 3.858910891089109, "grad_norm": 0.8364596366882324, "learning_rate": 9.826440649735507e-05, "loss": 0.5146, "step": 15590 }, { "epoch": 3.8613861386138613, "grad_norm": 0.8029153347015381, "learning_rate": 9.8260805786534e-05, "loss": 0.5117, "step": 15600 }, { "epoch": 3.863861386138614, "grad_norm": 0.7169919013977051, "learning_rate": 9.825720141062321e-05, "loss": 0.5097, "step": 15610 }, { "epoch": 3.866336633663366, "grad_norm": 0.7588196396827698, "learning_rate": 9.825359336989645e-05, "loss": 0.5143, "step": 15620 }, { "epoch": 3.868811881188119, "grad_norm": 0.7468461990356445, "learning_rate": 9.824998166462773e-05, "loss": 0.5094, "step": 15630 }, { "epoch": 3.871287128712871, "grad_norm": 0.8654156923294067, "learning_rate": 9.824636629509131e-05, "loss": 0.5052, "step": 15640 }, { "epoch": 3.873762376237624, "grad_norm": 0.7674470543861389, "learning_rate": 9.824274726156179e-05, "loss": 0.5157, "step": 15650 }, { "epoch": 3.876237623762376, "grad_norm": 0.7774156332015991, "learning_rate": 9.823912456431396e-05, "loss": 0.5092, "step": 15660 }, { "epoch": 3.878712871287129, "grad_norm": 0.7491924166679382, "learning_rate": 9.823549820362299e-05, "loss": 0.5102, "step": 15670 }, { "epoch": 3.881188118811881, "grad_norm": 0.7577826976776123, "learning_rate": 9.823186817976426e-05, "loss": 0.514, "step": 15680 }, { "epoch": 3.883663366336634, "grad_norm": 0.7232824563980103, "learning_rate": 9.822823449301344e-05, "loss": 0.5099, "step": 15690 }, { "epoch": 3.886138613861386, "grad_norm": 0.8029784560203552, "learning_rate": 9.822459714364647e-05, "loss": 0.5031, "step": 15700 }, { "epoch": 3.8886138613861387, "grad_norm": 0.792658269405365, "learning_rate": 9.822095613193962e-05, "loss": 0.5158, "step": 15710 }, { "epoch": 3.891089108910891, "grad_norm": 0.7444489598274231, "learning_rate": 9.82173114581694e-05, "loss": 0.512, "step": 15720 }, { "epoch": 3.8935643564356437, "grad_norm": 0.7972973585128784, "learning_rate": 9.821366312261256e-05, "loss": 0.5085, "step": 15730 }, { "epoch": 3.896039603960396, "grad_norm": 0.7264560461044312, "learning_rate": 9.821001112554617e-05, "loss": 0.5043, "step": 15740 }, { "epoch": 3.8985148514851486, "grad_norm": 0.7838093638420105, "learning_rate": 9.820635546724762e-05, "loss": 0.5171, "step": 15750 }, { "epoch": 3.900990099009901, "grad_norm": 0.8272507786750793, "learning_rate": 9.820269614799451e-05, "loss": 0.5068, "step": 15760 }, { "epoch": 3.9034653465346536, "grad_norm": 0.7859307527542114, "learning_rate": 9.819903316806473e-05, "loss": 0.5027, "step": 15770 }, { "epoch": 3.905940594059406, "grad_norm": 0.7443055510520935, "learning_rate": 9.819536652773647e-05, "loss": 0.5126, "step": 15780 }, { "epoch": 3.9084158415841586, "grad_norm": 0.7762306928634644, "learning_rate": 9.819169622728817e-05, "loss": 0.5133, "step": 15790 }, { "epoch": 3.910891089108911, "grad_norm": 0.7880569696426392, "learning_rate": 9.818802226699859e-05, "loss": 0.5048, "step": 15800 }, { "epoch": 3.9133663366336635, "grad_norm": 0.7923110127449036, "learning_rate": 9.818434464714673e-05, "loss": 0.5091, "step": 15810 }, { "epoch": 3.9158415841584158, "grad_norm": 0.7804614305496216, "learning_rate": 9.818066336801189e-05, "loss": 0.5056, "step": 15820 }, { "epoch": 3.9183168316831685, "grad_norm": 0.7855991721153259, "learning_rate": 9.817697842987361e-05, "loss": 0.5119, "step": 15830 }, { "epoch": 3.9207920792079207, "grad_norm": 0.8143033981323242, "learning_rate": 9.817328983301177e-05, "loss": 0.5049, "step": 15840 }, { "epoch": 3.9232673267326734, "grad_norm": 0.8166285157203674, "learning_rate": 9.816959757770649e-05, "loss": 0.5101, "step": 15850 }, { "epoch": 3.9257425742574257, "grad_norm": 0.7653456330299377, "learning_rate": 9.816590166423815e-05, "loss": 0.5084, "step": 15860 }, { "epoch": 3.9282178217821784, "grad_norm": 0.7764001488685608, "learning_rate": 9.816220209288747e-05, "loss": 0.5124, "step": 15870 }, { "epoch": 3.9306930693069306, "grad_norm": 0.774054229259491, "learning_rate": 9.815849886393538e-05, "loss": 0.5152, "step": 15880 }, { "epoch": 3.9331683168316833, "grad_norm": 0.7936362624168396, "learning_rate": 9.81547919776631e-05, "loss": 0.5127, "step": 15890 }, { "epoch": 3.9356435643564356, "grad_norm": 0.8265398740768433, "learning_rate": 9.815108143435218e-05, "loss": 0.5152, "step": 15900 }, { "epoch": 3.9381188118811883, "grad_norm": 0.8250021934509277, "learning_rate": 9.81473672342844e-05, "loss": 0.5053, "step": 15910 }, { "epoch": 3.9405940594059405, "grad_norm": 0.8224237561225891, "learning_rate": 9.81436493777418e-05, "loss": 0.5073, "step": 15920 }, { "epoch": 3.943069306930693, "grad_norm": 0.7836650013923645, "learning_rate": 9.813992786500677e-05, "loss": 0.5057, "step": 15930 }, { "epoch": 3.9455445544554455, "grad_norm": 0.7439926862716675, "learning_rate": 9.81362026963619e-05, "loss": 0.5104, "step": 15940 }, { "epoch": 3.948019801980198, "grad_norm": 0.8358587622642517, "learning_rate": 9.813247387209013e-05, "loss": 0.5077, "step": 15950 }, { "epoch": 3.9504950495049505, "grad_norm": 0.7874100208282471, "learning_rate": 9.812874139247459e-05, "loss": 0.5088, "step": 15960 }, { "epoch": 3.9529702970297027, "grad_norm": 0.834838330745697, "learning_rate": 9.812500525779878e-05, "loss": 0.5044, "step": 15970 }, { "epoch": 3.9554455445544554, "grad_norm": 0.7653135061264038, "learning_rate": 9.81212654683464e-05, "loss": 0.5108, "step": 15980 }, { "epoch": 3.957920792079208, "grad_norm": 0.7900477647781372, "learning_rate": 9.81175220244015e-05, "loss": 0.5068, "step": 15990 }, { "epoch": 3.9603960396039604, "grad_norm": 0.7648694515228271, "learning_rate": 9.811377492624833e-05, "loss": 0.5077, "step": 16000 }, { "epoch": 3.9628712871287126, "grad_norm": 0.740915060043335, "learning_rate": 9.81100241741715e-05, "loss": 0.5088, "step": 16010 }, { "epoch": 3.9653465346534653, "grad_norm": 0.7497383952140808, "learning_rate": 9.810626976845582e-05, "loss": 0.5066, "step": 16020 }, { "epoch": 3.967821782178218, "grad_norm": 0.8776432275772095, "learning_rate": 9.810251170938643e-05, "loss": 0.5066, "step": 16030 }, { "epoch": 3.9702970297029703, "grad_norm": 0.7611225843429565, "learning_rate": 9.809874999724874e-05, "loss": 0.5052, "step": 16040 }, { "epoch": 3.9727722772277225, "grad_norm": 0.6870295405387878, "learning_rate": 9.809498463232838e-05, "loss": 0.5023, "step": 16050 }, { "epoch": 3.9752475247524752, "grad_norm": 0.7648963332176208, "learning_rate": 9.809121561491135e-05, "loss": 0.5062, "step": 16060 }, { "epoch": 3.977722772277228, "grad_norm": 0.7103619575500488, "learning_rate": 9.808744294528387e-05, "loss": 0.5107, "step": 16070 }, { "epoch": 3.98019801980198, "grad_norm": 0.8292820453643799, "learning_rate": 9.808366662373245e-05, "loss": 0.5065, "step": 16080 }, { "epoch": 3.9826732673267324, "grad_norm": 0.7735788822174072, "learning_rate": 9.807988665054386e-05, "loss": 0.5069, "step": 16090 }, { "epoch": 3.985148514851485, "grad_norm": 0.7679173350334167, "learning_rate": 9.80761030260052e-05, "loss": 0.5117, "step": 16100 }, { "epoch": 3.987623762376238, "grad_norm": 0.789439857006073, "learning_rate": 9.807231575040377e-05, "loss": 0.5081, "step": 16110 }, { "epoch": 3.99009900990099, "grad_norm": 0.8100751042366028, "learning_rate": 9.806852482402722e-05, "loss": 0.5119, "step": 16120 }, { "epoch": 3.9925742574257423, "grad_norm": 0.7984311580657959, "learning_rate": 9.806473024716343e-05, "loss": 0.5088, "step": 16130 }, { "epoch": 3.995049504950495, "grad_norm": 0.769944965839386, "learning_rate": 9.806093202010058e-05, "loss": 0.5118, "step": 16140 }, { "epoch": 3.9975247524752477, "grad_norm": 0.8768450617790222, "learning_rate": 9.805713014312712e-05, "loss": 0.505, "step": 16150 }, { "epoch": 4.0, "grad_norm": 0.7790595293045044, "learning_rate": 9.805332461653177e-05, "loss": 0.507, "step": 16160 }, { "epoch": 4.002475247524752, "grad_norm": 0.7342106699943542, "learning_rate": 9.804951544060355e-05, "loss": 0.5068, "step": 16170 }, { "epoch": 4.0049504950495045, "grad_norm": 0.7938467860221863, "learning_rate": 9.804570261563172e-05, "loss": 0.5065, "step": 16180 }, { "epoch": 4.007425742574258, "grad_norm": 0.7363917827606201, "learning_rate": 9.804188614190586e-05, "loss": 0.5039, "step": 16190 }, { "epoch": 4.00990099009901, "grad_norm": 0.8471347689628601, "learning_rate": 9.803806601971579e-05, "loss": 0.5083, "step": 16200 }, { "epoch": 4.012376237623762, "grad_norm": 0.778165340423584, "learning_rate": 9.803424224935163e-05, "loss": 0.5095, "step": 16210 }, { "epoch": 4.014851485148514, "grad_norm": 0.7727250456809998, "learning_rate": 9.803041483110376e-05, "loss": 0.5098, "step": 16220 }, { "epoch": 4.017326732673268, "grad_norm": 0.7259363532066345, "learning_rate": 9.802658376526287e-05, "loss": 0.5073, "step": 16230 }, { "epoch": 4.01980198019802, "grad_norm": 0.7794106006622314, "learning_rate": 9.80227490521199e-05, "loss": 0.5115, "step": 16240 }, { "epoch": 4.022277227722772, "grad_norm": 0.7356583476066589, "learning_rate": 9.801891069196603e-05, "loss": 0.5087, "step": 16250 }, { "epoch": 4.024752475247524, "grad_norm": 0.8233605027198792, "learning_rate": 9.801506868509281e-05, "loss": 0.5127, "step": 16260 }, { "epoch": 4.0272277227722775, "grad_norm": 0.7538130879402161, "learning_rate": 9.8011223031792e-05, "loss": 0.5051, "step": 16270 }, { "epoch": 4.02970297029703, "grad_norm": 0.7232651710510254, "learning_rate": 9.800737373235565e-05, "loss": 0.5107, "step": 16280 }, { "epoch": 4.032178217821782, "grad_norm": 0.7272235751152039, "learning_rate": 9.800352078707606e-05, "loss": 0.5089, "step": 16290 }, { "epoch": 4.034653465346534, "grad_norm": 0.8339650630950928, "learning_rate": 9.799966419624589e-05, "loss": 0.4973, "step": 16300 }, { "epoch": 4.037128712871287, "grad_norm": 0.7500566244125366, "learning_rate": 9.799580396015798e-05, "loss": 0.5078, "step": 16310 }, { "epoch": 4.03960396039604, "grad_norm": 0.7657846212387085, "learning_rate": 9.799194007910551e-05, "loss": 0.5089, "step": 16320 }, { "epoch": 4.042079207920792, "grad_norm": 0.7605831027030945, "learning_rate": 9.798807255338192e-05, "loss": 0.5114, "step": 16330 }, { "epoch": 4.044554455445544, "grad_norm": 0.791872501373291, "learning_rate": 9.798420138328089e-05, "loss": 0.5126, "step": 16340 }, { "epoch": 4.047029702970297, "grad_norm": 0.7851059436798096, "learning_rate": 9.798032656909645e-05, "loss": 0.5074, "step": 16350 }, { "epoch": 4.0495049504950495, "grad_norm": 0.7250646352767944, "learning_rate": 9.797644811112285e-05, "loss": 0.5095, "step": 16360 }, { "epoch": 4.051980198019802, "grad_norm": 0.8232829570770264, "learning_rate": 9.797256600965462e-05, "loss": 0.5035, "step": 16370 }, { "epoch": 4.054455445544554, "grad_norm": 0.7256481051445007, "learning_rate": 9.79686802649866e-05, "loss": 0.5039, "step": 16380 }, { "epoch": 4.056930693069307, "grad_norm": 0.8354199528694153, "learning_rate": 9.796479087741388e-05, "loss": 0.5077, "step": 16390 }, { "epoch": 4.0594059405940595, "grad_norm": 0.7873492240905762, "learning_rate": 9.796089784723184e-05, "loss": 0.5049, "step": 16400 }, { "epoch": 4.061881188118812, "grad_norm": 0.6772356033325195, "learning_rate": 9.79570011747361e-05, "loss": 0.5059, "step": 16410 }, { "epoch": 4.064356435643564, "grad_norm": 0.7302952408790588, "learning_rate": 9.795310086022264e-05, "loss": 0.5073, "step": 16420 }, { "epoch": 4.066831683168317, "grad_norm": 0.7654837369918823, "learning_rate": 9.79491969039876e-05, "loss": 0.5046, "step": 16430 }, { "epoch": 4.069306930693069, "grad_norm": 0.7457629442214966, "learning_rate": 9.794528930632751e-05, "loss": 0.5056, "step": 16440 }, { "epoch": 4.071782178217822, "grad_norm": 0.7506021857261658, "learning_rate": 9.794137806753911e-05, "loss": 0.51, "step": 16450 }, { "epoch": 4.074257425742574, "grad_norm": 0.9006524085998535, "learning_rate": 9.793746318791944e-05, "loss": 0.5019, "step": 16460 }, { "epoch": 4.076732673267327, "grad_norm": 0.7298718094825745, "learning_rate": 9.793354466776579e-05, "loss": 0.5036, "step": 16470 }, { "epoch": 4.079207920792079, "grad_norm": 0.7192530632019043, "learning_rate": 9.792962250737576e-05, "loss": 0.5057, "step": 16480 }, { "epoch": 4.0816831683168315, "grad_norm": 0.8188049793243408, "learning_rate": 9.79256967070472e-05, "loss": 0.5176, "step": 16490 }, { "epoch": 4.084158415841584, "grad_norm": 0.7109169363975525, "learning_rate": 9.792176726707827e-05, "loss": 0.5065, "step": 16500 }, { "epoch": 4.086633663366337, "grad_norm": 0.7294211983680725, "learning_rate": 9.791783418776737e-05, "loss": 0.5049, "step": 16510 }, { "epoch": 4.089108910891089, "grad_norm": 0.7684565186500549, "learning_rate": 9.791389746941318e-05, "loss": 0.5088, "step": 16520 }, { "epoch": 4.091584158415841, "grad_norm": 0.7804899215698242, "learning_rate": 9.79099571123147e-05, "loss": 0.5078, "step": 16530 }, { "epoch": 4.094059405940594, "grad_norm": 0.7922956347465515, "learning_rate": 9.790601311677115e-05, "loss": 0.5044, "step": 16540 }, { "epoch": 4.096534653465347, "grad_norm": 0.743564248085022, "learning_rate": 9.790206548308204e-05, "loss": 0.5044, "step": 16550 }, { "epoch": 4.099009900990099, "grad_norm": 0.7901009917259216, "learning_rate": 9.789811421154719e-05, "loss": 0.5083, "step": 16560 }, { "epoch": 4.101485148514851, "grad_norm": 0.8470569252967834, "learning_rate": 9.789415930246668e-05, "loss": 0.503, "step": 16570 }, { "epoch": 4.103960396039604, "grad_norm": 0.7141685485839844, "learning_rate": 9.789020075614082e-05, "loss": 0.5048, "step": 16580 }, { "epoch": 4.106435643564357, "grad_norm": 0.756739616394043, "learning_rate": 9.78862385728703e-05, "loss": 0.5091, "step": 16590 }, { "epoch": 4.108910891089109, "grad_norm": 0.8132949471473694, "learning_rate": 9.788227275295595e-05, "loss": 0.5054, "step": 16600 }, { "epoch": 4.111386138613861, "grad_norm": 0.7421947717666626, "learning_rate": 9.787830329669898e-05, "loss": 0.5094, "step": 16610 }, { "epoch": 4.1138613861386135, "grad_norm": 0.7336099743843079, "learning_rate": 9.787433020440084e-05, "loss": 0.5047, "step": 16620 }, { "epoch": 4.116336633663367, "grad_norm": 0.7107330560684204, "learning_rate": 9.787035347636329e-05, "loss": 0.5092, "step": 16630 }, { "epoch": 4.118811881188119, "grad_norm": 0.8209594488143921, "learning_rate": 9.786637311288828e-05, "loss": 0.5111, "step": 16640 }, { "epoch": 4.121287128712871, "grad_norm": 0.747581422328949, "learning_rate": 9.786238911427814e-05, "loss": 0.5109, "step": 16650 }, { "epoch": 4.123762376237623, "grad_norm": 0.7765509486198425, "learning_rate": 9.785840148083543e-05, "loss": 0.5046, "step": 16660 }, { "epoch": 4.126237623762377, "grad_norm": 0.6988978981971741, "learning_rate": 9.785441021286293e-05, "loss": 0.5082, "step": 16670 }, { "epoch": 4.128712871287129, "grad_norm": 0.7773998975753784, "learning_rate": 9.785041531066381e-05, "loss": 0.5044, "step": 16680 }, { "epoch": 4.131188118811881, "grad_norm": 0.7695739269256592, "learning_rate": 9.784641677454142e-05, "loss": 0.5079, "step": 16690 }, { "epoch": 4.133663366336633, "grad_norm": 0.7667250037193298, "learning_rate": 9.784241460479944e-05, "loss": 0.5081, "step": 16700 }, { "epoch": 4.1361386138613865, "grad_norm": 0.8063376545906067, "learning_rate": 9.78384088017418e-05, "loss": 0.5042, "step": 16710 }, { "epoch": 4.138613861386139, "grad_norm": 0.7191446423530579, "learning_rate": 9.783439936567272e-05, "loss": 0.5019, "step": 16720 }, { "epoch": 4.141089108910891, "grad_norm": 0.7322946786880493, "learning_rate": 9.783038629689668e-05, "loss": 0.5043, "step": 16730 }, { "epoch": 4.143564356435643, "grad_norm": 0.749659538269043, "learning_rate": 9.782636959571847e-05, "loss": 0.5122, "step": 16740 }, { "epoch": 4.146039603960396, "grad_norm": 0.7592344284057617, "learning_rate": 9.782234926244309e-05, "loss": 0.5113, "step": 16750 }, { "epoch": 4.148514851485149, "grad_norm": 0.7526717782020569, "learning_rate": 9.781832529737591e-05, "loss": 0.5052, "step": 16760 }, { "epoch": 4.150990099009901, "grad_norm": 0.7409035563468933, "learning_rate": 9.781429770082249e-05, "loss": 0.5041, "step": 16770 }, { "epoch": 4.153465346534653, "grad_norm": 0.8350714445114136, "learning_rate": 9.78102664730887e-05, "loss": 0.5084, "step": 16780 }, { "epoch": 4.155940594059406, "grad_norm": 0.7928711771965027, "learning_rate": 9.780623161448069e-05, "loss": 0.5043, "step": 16790 }, { "epoch": 4.158415841584159, "grad_norm": 0.810846209526062, "learning_rate": 9.78021931253049e-05, "loss": 0.5105, "step": 16800 }, { "epoch": 4.160891089108911, "grad_norm": 0.8427425026893616, "learning_rate": 9.7798151005868e-05, "loss": 0.5057, "step": 16810 }, { "epoch": 4.163366336633663, "grad_norm": 0.8043913245201111, "learning_rate": 9.779410525647696e-05, "loss": 0.505, "step": 16820 }, { "epoch": 4.165841584158416, "grad_norm": 0.784429669380188, "learning_rate": 9.779005587743905e-05, "loss": 0.5059, "step": 16830 }, { "epoch": 4.1683168316831685, "grad_norm": 0.7438424229621887, "learning_rate": 9.778600286906176e-05, "loss": 0.5105, "step": 16840 }, { "epoch": 4.170792079207921, "grad_norm": 0.7875086665153503, "learning_rate": 9.778194623165296e-05, "loss": 0.515, "step": 16850 }, { "epoch": 4.173267326732673, "grad_norm": 0.7200367450714111, "learning_rate": 9.777788596552064e-05, "loss": 0.5083, "step": 16860 }, { "epoch": 4.175742574257426, "grad_norm": 0.7093210220336914, "learning_rate": 9.777382207097318e-05, "loss": 0.5102, "step": 16870 }, { "epoch": 4.178217821782178, "grad_norm": 0.797606348991394, "learning_rate": 9.776975454831924e-05, "loss": 0.508, "step": 16880 }, { "epoch": 4.180693069306931, "grad_norm": 0.9362310171127319, "learning_rate": 9.77656833978677e-05, "loss": 0.502, "step": 16890 }, { "epoch": 4.183168316831683, "grad_norm": 0.7917871475219727, "learning_rate": 9.776160861992772e-05, "loss": 0.5092, "step": 16900 }, { "epoch": 4.185643564356436, "grad_norm": 0.7473326921463013, "learning_rate": 9.775753021480875e-05, "loss": 0.5078, "step": 16910 }, { "epoch": 4.188118811881188, "grad_norm": 0.7485139966011047, "learning_rate": 9.775344818282055e-05, "loss": 0.5027, "step": 16920 }, { "epoch": 4.1905940594059405, "grad_norm": 0.7501392364501953, "learning_rate": 9.77493625242731e-05, "loss": 0.5074, "step": 16930 }, { "epoch": 4.193069306930693, "grad_norm": 0.7523689270019531, "learning_rate": 9.774527323947669e-05, "loss": 0.5079, "step": 16940 }, { "epoch": 4.195544554455446, "grad_norm": 0.7287392020225525, "learning_rate": 9.774118032874186e-05, "loss": 0.5034, "step": 16950 }, { "epoch": 4.198019801980198, "grad_norm": 0.784089207649231, "learning_rate": 9.773708379237945e-05, "loss": 0.508, "step": 16960 }, { "epoch": 4.2004950495049505, "grad_norm": 0.7655258178710938, "learning_rate": 9.773298363070056e-05, "loss": 0.5049, "step": 16970 }, { "epoch": 4.202970297029703, "grad_norm": 0.7954780459403992, "learning_rate": 9.772887984401658e-05, "loss": 0.5098, "step": 16980 }, { "epoch": 4.205445544554456, "grad_norm": 0.7659585475921631, "learning_rate": 9.772477243263916e-05, "loss": 0.5109, "step": 16990 }, { "epoch": 4.207920792079208, "grad_norm": 0.7758752703666687, "learning_rate": 9.772066139688024e-05, "loss": 0.5113, "step": 17000 }, { "epoch": 4.21039603960396, "grad_norm": 0.7757425308227539, "learning_rate": 9.771654673705199e-05, "loss": 0.5056, "step": 17010 }, { "epoch": 4.212871287128713, "grad_norm": 0.7830859422683716, "learning_rate": 9.771242845346695e-05, "loss": 0.5112, "step": 17020 }, { "epoch": 4.215346534653466, "grad_norm": 0.8802123069763184, "learning_rate": 9.770830654643782e-05, "loss": 0.5094, "step": 17030 }, { "epoch": 4.217821782178218, "grad_norm": 0.749210000038147, "learning_rate": 9.770418101627765e-05, "loss": 0.5046, "step": 17040 }, { "epoch": 4.22029702970297, "grad_norm": 0.7800239324569702, "learning_rate": 9.770005186329976e-05, "loss": 0.5055, "step": 17050 }, { "epoch": 4.2227722772277225, "grad_norm": 0.6456896662712097, "learning_rate": 9.769591908781774e-05, "loss": 0.5043, "step": 17060 }, { "epoch": 4.225247524752476, "grad_norm": 0.7347676753997803, "learning_rate": 9.76917826901454e-05, "loss": 0.5072, "step": 17070 }, { "epoch": 4.227722772277228, "grad_norm": 0.7913143038749695, "learning_rate": 9.768764267059693e-05, "loss": 0.5095, "step": 17080 }, { "epoch": 4.23019801980198, "grad_norm": 0.7840316891670227, "learning_rate": 9.76834990294867e-05, "loss": 0.5031, "step": 17090 }, { "epoch": 4.232673267326732, "grad_norm": 0.7298895120620728, "learning_rate": 9.76793517671294e-05, "loss": 0.5048, "step": 17100 }, { "epoch": 4.235148514851485, "grad_norm": 0.7035727500915527, "learning_rate": 9.767520088384e-05, "loss": 0.5058, "step": 17110 }, { "epoch": 4.237623762376238, "grad_norm": 0.77611243724823, "learning_rate": 9.767104637993371e-05, "loss": 0.505, "step": 17120 }, { "epoch": 4.24009900990099, "grad_norm": 0.7532832026481628, "learning_rate": 9.766688825572605e-05, "loss": 0.5035, "step": 17130 }, { "epoch": 4.242574257425742, "grad_norm": 0.8150664567947388, "learning_rate": 9.76627265115328e-05, "loss": 0.502, "step": 17140 }, { "epoch": 4.2450495049504955, "grad_norm": 0.7373790740966797, "learning_rate": 9.765856114767003e-05, "loss": 0.5115, "step": 17150 }, { "epoch": 4.247524752475248, "grad_norm": 0.7289632558822632, "learning_rate": 9.765439216445405e-05, "loss": 0.5018, "step": 17160 }, { "epoch": 4.25, "grad_norm": 0.7232757210731506, "learning_rate": 9.765021956220148e-05, "loss": 0.5055, "step": 17170 }, { "epoch": 4.252475247524752, "grad_norm": 0.7590899467468262, "learning_rate": 9.76460433412292e-05, "loss": 0.5072, "step": 17180 }, { "epoch": 4.2549504950495045, "grad_norm": 0.7710133790969849, "learning_rate": 9.764186350185435e-05, "loss": 0.4989, "step": 17190 }, { "epoch": 4.257425742574258, "grad_norm": 0.7112373113632202, "learning_rate": 9.76376800443944e-05, "loss": 0.5036, "step": 17200 }, { "epoch": 4.25990099009901, "grad_norm": 0.8046426177024841, "learning_rate": 9.763349296916701e-05, "loss": 0.5034, "step": 17210 }, { "epoch": 4.262376237623762, "grad_norm": 0.7728721499443054, "learning_rate": 9.762930227649021e-05, "loss": 0.5048, "step": 17220 }, { "epoch": 4.264851485148515, "grad_norm": 0.7204287648200989, "learning_rate": 9.76251079666822e-05, "loss": 0.5044, "step": 17230 }, { "epoch": 4.267326732673268, "grad_norm": 0.7371503710746765, "learning_rate": 9.762091004006155e-05, "loss": 0.503, "step": 17240 }, { "epoch": 4.26980198019802, "grad_norm": 0.7618557810783386, "learning_rate": 9.761670849694706e-05, "loss": 0.507, "step": 17250 }, { "epoch": 4.272277227722772, "grad_norm": 0.7021378874778748, "learning_rate": 9.76125033376578e-05, "loss": 0.5026, "step": 17260 }, { "epoch": 4.274752475247524, "grad_norm": 0.7479745745658875, "learning_rate": 9.760829456251313e-05, "loss": 0.5009, "step": 17270 }, { "epoch": 4.2772277227722775, "grad_norm": 0.8116604089736938, "learning_rate": 9.760408217183266e-05, "loss": 0.509, "step": 17280 }, { "epoch": 4.27970297029703, "grad_norm": 0.7351595759391785, "learning_rate": 9.759986616593631e-05, "loss": 0.5082, "step": 17290 }, { "epoch": 4.282178217821782, "grad_norm": 0.8261539340019226, "learning_rate": 9.759564654514426e-05, "loss": 0.507, "step": 17300 }, { "epoch": 4.284653465346534, "grad_norm": 0.7743740081787109, "learning_rate": 9.759142330977697e-05, "loss": 0.4977, "step": 17310 }, { "epoch": 4.287128712871287, "grad_norm": 0.7534793019294739, "learning_rate": 9.758719646015514e-05, "loss": 0.5003, "step": 17320 }, { "epoch": 4.28960396039604, "grad_norm": 0.7572121620178223, "learning_rate": 9.75829659965998e-05, "loss": 0.5092, "step": 17330 }, { "epoch": 4.292079207920792, "grad_norm": 0.7481768727302551, "learning_rate": 9.757873191943221e-05, "loss": 0.5043, "step": 17340 }, { "epoch": 4.294554455445544, "grad_norm": 0.796904444694519, "learning_rate": 9.75744942289739e-05, "loss": 0.5054, "step": 17350 }, { "epoch": 4.297029702970297, "grad_norm": 0.7779701948165894, "learning_rate": 9.757025292554676e-05, "loss": 0.5012, "step": 17360 }, { "epoch": 4.2995049504950495, "grad_norm": 0.8066970705986023, "learning_rate": 9.756600800947283e-05, "loss": 0.4989, "step": 17370 }, { "epoch": 4.301980198019802, "grad_norm": 0.7413067817687988, "learning_rate": 9.756175948107448e-05, "loss": 0.5046, "step": 17380 }, { "epoch": 4.304455445544554, "grad_norm": 0.7311762571334839, "learning_rate": 9.75575073406744e-05, "loss": 0.5082, "step": 17390 }, { "epoch": 4.306930693069307, "grad_norm": 0.7646735310554504, "learning_rate": 9.755325158859548e-05, "loss": 0.505, "step": 17400 }, { "epoch": 4.3094059405940595, "grad_norm": 0.9166710376739502, "learning_rate": 9.754899222516092e-05, "loss": 0.5103, "step": 17410 }, { "epoch": 4.311881188118812, "grad_norm": 0.7647348642349243, "learning_rate": 9.75447292506942e-05, "loss": 0.5063, "step": 17420 }, { "epoch": 4.314356435643564, "grad_norm": 0.7633073925971985, "learning_rate": 9.754046266551906e-05, "loss": 0.5024, "step": 17430 }, { "epoch": 4.316831683168317, "grad_norm": 0.8146949410438538, "learning_rate": 9.753619246995954e-05, "loss": 0.5083, "step": 17440 }, { "epoch": 4.319306930693069, "grad_norm": 0.698602020740509, "learning_rate": 9.75319186643399e-05, "loss": 0.5038, "step": 17450 }, { "epoch": 4.321782178217822, "grad_norm": 0.7533316612243652, "learning_rate": 9.752764124898471e-05, "loss": 0.5051, "step": 17460 }, { "epoch": 4.324257425742574, "grad_norm": 0.7811251878738403, "learning_rate": 9.752336022421884e-05, "loss": 0.5032, "step": 17470 }, { "epoch": 4.326732673267327, "grad_norm": 0.7380408048629761, "learning_rate": 9.751907559036737e-05, "loss": 0.497, "step": 17480 }, { "epoch": 4.329207920792079, "grad_norm": 0.817848265171051, "learning_rate": 9.751478734775572e-05, "loss": 0.5046, "step": 17490 }, { "epoch": 4.3316831683168315, "grad_norm": 0.7408173084259033, "learning_rate": 9.751049549670955e-05, "loss": 0.5063, "step": 17500 }, { "epoch": 4.334158415841584, "grad_norm": 0.70205157995224, "learning_rate": 9.750620003755477e-05, "loss": 0.5043, "step": 17510 }, { "epoch": 4.336633663366337, "grad_norm": 0.746418833732605, "learning_rate": 9.750190097061762e-05, "loss": 0.5066, "step": 17520 }, { "epoch": 4.339108910891089, "grad_norm": 0.7732962965965271, "learning_rate": 9.749759829622458e-05, "loss": 0.5068, "step": 17530 }, { "epoch": 4.341584158415841, "grad_norm": 0.7818546295166016, "learning_rate": 9.749329201470239e-05, "loss": 0.5122, "step": 17540 }, { "epoch": 4.344059405940594, "grad_norm": 0.7504632472991943, "learning_rate": 9.748898212637812e-05, "loss": 0.5006, "step": 17550 }, { "epoch": 4.346534653465347, "grad_norm": 0.8571942448616028, "learning_rate": 9.748466863157905e-05, "loss": 0.5019, "step": 17560 }, { "epoch": 4.349009900990099, "grad_norm": 0.7210179567337036, "learning_rate": 9.748035153063276e-05, "loss": 0.506, "step": 17570 }, { "epoch": 4.351485148514851, "grad_norm": 0.7237450480461121, "learning_rate": 9.747603082386713e-05, "loss": 0.5048, "step": 17580 }, { "epoch": 4.353960396039604, "grad_norm": 0.6728461384773254, "learning_rate": 9.747170651161026e-05, "loss": 0.5026, "step": 17590 }, { "epoch": 4.356435643564357, "grad_norm": 0.7035638689994812, "learning_rate": 9.746737859419058e-05, "loss": 0.505, "step": 17600 }, { "epoch": 4.358910891089109, "grad_norm": 0.8318840265274048, "learning_rate": 9.746304707193675e-05, "loss": 0.5023, "step": 17610 }, { "epoch": 4.361386138613861, "grad_norm": 0.7482419610023499, "learning_rate": 9.74587119451777e-05, "loss": 0.5054, "step": 17620 }, { "epoch": 4.3638613861386135, "grad_norm": 0.7060530185699463, "learning_rate": 9.74543732142427e-05, "loss": 0.4994, "step": 17630 }, { "epoch": 4.366336633663367, "grad_norm": 0.7904645800590515, "learning_rate": 9.745003087946123e-05, "loss": 0.5026, "step": 17640 }, { "epoch": 4.368811881188119, "grad_norm": 0.7500373721122742, "learning_rate": 9.744568494116305e-05, "loss": 0.5022, "step": 17650 }, { "epoch": 4.371287128712871, "grad_norm": 0.8755686283111572, "learning_rate": 9.744133539967821e-05, "loss": 0.5068, "step": 17660 }, { "epoch": 4.373762376237623, "grad_norm": 0.8184613585472107, "learning_rate": 9.743698225533704e-05, "loss": 0.5035, "step": 17670 }, { "epoch": 4.376237623762377, "grad_norm": 0.7355740070343018, "learning_rate": 9.743262550847012e-05, "loss": 0.5108, "step": 17680 }, { "epoch": 4.378712871287129, "grad_norm": 0.729844331741333, "learning_rate": 9.742826515940832e-05, "loss": 0.5075, "step": 17690 }, { "epoch": 4.381188118811881, "grad_norm": 0.7589088678359985, "learning_rate": 9.742390120848278e-05, "loss": 0.5003, "step": 17700 }, { "epoch": 4.383663366336633, "grad_norm": 0.728496253490448, "learning_rate": 9.741953365602493e-05, "loss": 0.5104, "step": 17710 }, { "epoch": 4.3861386138613865, "grad_norm": 0.7363731861114502, "learning_rate": 9.741516250236641e-05, "loss": 0.4994, "step": 17720 }, { "epoch": 4.388613861386139, "grad_norm": 0.795325756072998, "learning_rate": 9.741078774783923e-05, "loss": 0.5057, "step": 17730 }, { "epoch": 4.391089108910891, "grad_norm": 0.7459508180618286, "learning_rate": 9.74064093927756e-05, "loss": 0.5057, "step": 17740 }, { "epoch": 4.393564356435643, "grad_norm": 0.7855289578437805, "learning_rate": 9.740202743750804e-05, "loss": 0.5019, "step": 17750 }, { "epoch": 4.396039603960396, "grad_norm": 0.80328768491745, "learning_rate": 9.739764188236931e-05, "loss": 0.4972, "step": 17760 }, { "epoch": 4.398514851485149, "grad_norm": 0.6947653293609619, "learning_rate": 9.739325272769247e-05, "loss": 0.5089, "step": 17770 }, { "epoch": 4.400990099009901, "grad_norm": 0.7403618097305298, "learning_rate": 9.738885997381087e-05, "loss": 0.5037, "step": 17780 }, { "epoch": 4.403465346534653, "grad_norm": 0.8251928091049194, "learning_rate": 9.738446362105809e-05, "loss": 0.5035, "step": 17790 }, { "epoch": 4.405940594059406, "grad_norm": 0.7539266347885132, "learning_rate": 9.7380063669768e-05, "loss": 0.505, "step": 17800 }, { "epoch": 4.408415841584159, "grad_norm": 0.7119894623756409, "learning_rate": 9.737566012027478e-05, "loss": 0.5041, "step": 17810 }, { "epoch": 4.410891089108911, "grad_norm": 0.8692534565925598, "learning_rate": 9.737125297291281e-05, "loss": 0.5042, "step": 17820 }, { "epoch": 4.413366336633663, "grad_norm": 0.7796722650527954, "learning_rate": 9.736684222801678e-05, "loss": 0.5089, "step": 17830 }, { "epoch": 4.415841584158416, "grad_norm": 0.7676243782043457, "learning_rate": 9.73624278859217e-05, "loss": 0.5012, "step": 17840 }, { "epoch": 4.4183168316831685, "grad_norm": 0.7071130275726318, "learning_rate": 9.735800994696278e-05, "loss": 0.5077, "step": 17850 }, { "epoch": 4.420792079207921, "grad_norm": 0.7950927019119263, "learning_rate": 9.735358841147555e-05, "loss": 0.502, "step": 17860 }, { "epoch": 4.423267326732673, "grad_norm": 0.7919541597366333, "learning_rate": 9.734916327979578e-05, "loss": 0.505, "step": 17870 }, { "epoch": 4.425742574257426, "grad_norm": 0.7492562532424927, "learning_rate": 9.734473455225954e-05, "loss": 0.4997, "step": 17880 }, { "epoch": 4.428217821782178, "grad_norm": 0.7640017867088318, "learning_rate": 9.734030222920314e-05, "loss": 0.4989, "step": 17890 }, { "epoch": 4.430693069306931, "grad_norm": 0.782122790813446, "learning_rate": 9.733586631096323e-05, "loss": 0.5001, "step": 17900 }, { "epoch": 4.433168316831683, "grad_norm": 0.7191017270088196, "learning_rate": 9.733142679787664e-05, "loss": 0.5097, "step": 17910 }, { "epoch": 4.435643564356436, "grad_norm": 0.7339482307434082, "learning_rate": 9.732698369028057e-05, "loss": 0.5066, "step": 17920 }, { "epoch": 4.438118811881188, "grad_norm": 0.6938663721084595, "learning_rate": 9.73225369885124e-05, "loss": 0.5067, "step": 17930 }, { "epoch": 4.4405940594059405, "grad_norm": 0.7483087182044983, "learning_rate": 9.731808669290986e-05, "loss": 0.5004, "step": 17940 }, { "epoch": 4.443069306930693, "grad_norm": 0.6843761205673218, "learning_rate": 9.731363280381089e-05, "loss": 0.4982, "step": 17950 }, { "epoch": 4.445544554455446, "grad_norm": 0.7304114103317261, "learning_rate": 9.730917532155377e-05, "loss": 0.5004, "step": 17960 }, { "epoch": 4.448019801980198, "grad_norm": 0.8097206354141235, "learning_rate": 9.730471424647699e-05, "loss": 0.4961, "step": 17970 }, { "epoch": 4.4504950495049505, "grad_norm": 0.7850520014762878, "learning_rate": 9.730024957891935e-05, "loss": 0.5024, "step": 17980 }, { "epoch": 4.452970297029703, "grad_norm": 0.6834002733230591, "learning_rate": 9.72957813192199e-05, "loss": 0.5118, "step": 17990 }, { "epoch": 4.455445544554456, "grad_norm": 0.7666674852371216, "learning_rate": 9.7291309467718e-05, "loss": 0.4994, "step": 18000 }, { "epoch": 4.457920792079208, "grad_norm": 0.7632752060890198, "learning_rate": 9.728683402475322e-05, "loss": 0.5084, "step": 18010 }, { "epoch": 4.46039603960396, "grad_norm": 0.7201710343360901, "learning_rate": 9.728235499066547e-05, "loss": 0.5013, "step": 18020 }, { "epoch": 4.462871287128713, "grad_norm": 0.733328640460968, "learning_rate": 9.727787236579491e-05, "loss": 0.502, "step": 18030 }, { "epoch": 4.465346534653466, "grad_norm": 0.7065821290016174, "learning_rate": 9.727338615048194e-05, "loss": 0.5026, "step": 18040 }, { "epoch": 4.467821782178218, "grad_norm": 0.7243065237998962, "learning_rate": 9.726889634506727e-05, "loss": 0.5019, "step": 18050 }, { "epoch": 4.47029702970297, "grad_norm": 0.7316200733184814, "learning_rate": 9.726440294989186e-05, "loss": 0.5026, "step": 18060 }, { "epoch": 4.4727722772277225, "grad_norm": 0.7230711579322815, "learning_rate": 9.725990596529698e-05, "loss": 0.5064, "step": 18070 }, { "epoch": 4.475247524752476, "grad_norm": 0.7391629815101624, "learning_rate": 9.725540539162412e-05, "loss": 0.5052, "step": 18080 }, { "epoch": 4.477722772277228, "grad_norm": 0.7235207557678223, "learning_rate": 9.725090122921507e-05, "loss": 0.5059, "step": 18090 }, { "epoch": 4.48019801980198, "grad_norm": 0.7149741649627686, "learning_rate": 9.724639347841192e-05, "loss": 0.5013, "step": 18100 }, { "epoch": 4.482673267326732, "grad_norm": 0.7223927974700928, "learning_rate": 9.724188213955698e-05, "loss": 0.5066, "step": 18110 }, { "epoch": 4.485148514851485, "grad_norm": 0.7184776663780212, "learning_rate": 9.723736721299286e-05, "loss": 0.5073, "step": 18120 }, { "epoch": 4.487623762376238, "grad_norm": 0.8005915880203247, "learning_rate": 9.723284869906242e-05, "loss": 0.5017, "step": 18130 }, { "epoch": 4.49009900990099, "grad_norm": 0.7903771996498108, "learning_rate": 9.722832659810885e-05, "loss": 0.5017, "step": 18140 }, { "epoch": 4.492574257425742, "grad_norm": 0.8083469271659851, "learning_rate": 9.722380091047555e-05, "loss": 0.5022, "step": 18150 }, { "epoch": 4.4950495049504955, "grad_norm": 0.7964977025985718, "learning_rate": 9.721927163650624e-05, "loss": 0.5049, "step": 18160 }, { "epoch": 4.497524752475248, "grad_norm": 0.7202230095863342, "learning_rate": 9.721473877654484e-05, "loss": 0.5043, "step": 18170 }, { "epoch": 4.5, "grad_norm": 0.7549861669540405, "learning_rate": 9.721020233093563e-05, "loss": 0.5052, "step": 18180 }, { "epoch": 4.502475247524752, "grad_norm": 0.706407904624939, "learning_rate": 9.720566230002312e-05, "loss": 0.5109, "step": 18190 }, { "epoch": 4.5049504950495045, "grad_norm": 0.7234919667243958, "learning_rate": 9.72011186841521e-05, "loss": 0.5006, "step": 18200 }, { "epoch": 4.507425742574258, "grad_norm": 0.7192426919937134, "learning_rate": 9.719657148366758e-05, "loss": 0.5046, "step": 18210 }, { "epoch": 4.50990099009901, "grad_norm": 0.831527829170227, "learning_rate": 9.719202069891495e-05, "loss": 0.5055, "step": 18220 }, { "epoch": 4.512376237623762, "grad_norm": 0.7323554754257202, "learning_rate": 9.718746633023978e-05, "loss": 0.5067, "step": 18230 }, { "epoch": 4.514851485148515, "grad_norm": 0.7601531744003296, "learning_rate": 9.718290837798796e-05, "loss": 0.5022, "step": 18240 }, { "epoch": 4.517326732673268, "grad_norm": 0.7762781381607056, "learning_rate": 9.717834684250563e-05, "loss": 0.5151, "step": 18250 }, { "epoch": 4.51980198019802, "grad_norm": 0.731751561164856, "learning_rate": 9.71737817241392e-05, "loss": 0.5019, "step": 18260 }, { "epoch": 4.522277227722772, "grad_norm": 0.7541120052337646, "learning_rate": 9.716921302323537e-05, "loss": 0.5058, "step": 18270 }, { "epoch": 4.524752475247524, "grad_norm": 0.7601823806762695, "learning_rate": 9.716464074014111e-05, "loss": 0.5038, "step": 18280 }, { "epoch": 4.5272277227722775, "grad_norm": 0.6877632141113281, "learning_rate": 9.716006487520364e-05, "loss": 0.4995, "step": 18290 }, { "epoch": 4.52970297029703, "grad_norm": 0.7526277303695679, "learning_rate": 9.715548542877046e-05, "loss": 0.5054, "step": 18300 }, { "epoch": 4.532178217821782, "grad_norm": 0.7196195721626282, "learning_rate": 9.715090240118938e-05, "loss": 0.5014, "step": 18310 }, { "epoch": 4.534653465346535, "grad_norm": 0.7412196397781372, "learning_rate": 9.714631579280841e-05, "loss": 0.5029, "step": 18320 }, { "epoch": 4.537128712871287, "grad_norm": 0.7342982888221741, "learning_rate": 9.714172560397591e-05, "loss": 0.5023, "step": 18330 }, { "epoch": 4.53960396039604, "grad_norm": 0.7665417790412903, "learning_rate": 9.713713183504044e-05, "loss": 0.5027, "step": 18340 }, { "epoch": 4.542079207920792, "grad_norm": 0.7626704573631287, "learning_rate": 9.713253448635091e-05, "loss": 0.5038, "step": 18350 }, { "epoch": 4.544554455445544, "grad_norm": 0.7159138321876526, "learning_rate": 9.712793355825642e-05, "loss": 0.501, "step": 18360 }, { "epoch": 4.547029702970297, "grad_norm": 0.7138391137123108, "learning_rate": 9.71233290511064e-05, "loss": 0.504, "step": 18370 }, { "epoch": 4.5495049504950495, "grad_norm": 0.7373387813568115, "learning_rate": 9.711872096525051e-05, "loss": 0.5021, "step": 18380 }, { "epoch": 4.551980198019802, "grad_norm": 0.769382655620575, "learning_rate": 9.711410930103875e-05, "loss": 0.5009, "step": 18390 }, { "epoch": 4.554455445544555, "grad_norm": 0.8232816457748413, "learning_rate": 9.710949405882129e-05, "loss": 0.508, "step": 18400 }, { "epoch": 4.556930693069307, "grad_norm": 0.7323755621910095, "learning_rate": 9.710487523894867e-05, "loss": 0.5006, "step": 18410 }, { "epoch": 4.5594059405940595, "grad_norm": 0.7038050293922424, "learning_rate": 9.710025284177162e-05, "loss": 0.5049, "step": 18420 }, { "epoch": 4.561881188118812, "grad_norm": 0.75328528881073, "learning_rate": 9.709562686764122e-05, "loss": 0.501, "step": 18430 }, { "epoch": 4.564356435643564, "grad_norm": 0.7655778527259827, "learning_rate": 9.709099731690874e-05, "loss": 0.5065, "step": 18440 }, { "epoch": 4.566831683168317, "grad_norm": 0.7386390566825867, "learning_rate": 9.70863641899258e-05, "loss": 0.5075, "step": 18450 }, { "epoch": 4.569306930693069, "grad_norm": 0.7264279723167419, "learning_rate": 9.708172748704425e-05, "loss": 0.5044, "step": 18460 }, { "epoch": 4.571782178217822, "grad_norm": 0.791502833366394, "learning_rate": 9.707708720861619e-05, "loss": 0.5001, "step": 18470 }, { "epoch": 4.574257425742574, "grad_norm": 0.7252889275550842, "learning_rate": 9.707244335499406e-05, "loss": 0.5004, "step": 18480 }, { "epoch": 4.576732673267327, "grad_norm": 0.7107462286949158, "learning_rate": 9.70677959265305e-05, "loss": 0.5044, "step": 18490 }, { "epoch": 4.579207920792079, "grad_norm": 0.6970977783203125, "learning_rate": 9.706314492357847e-05, "loss": 0.5006, "step": 18500 }, { "epoch": 4.5816831683168315, "grad_norm": 0.7439764142036438, "learning_rate": 9.705849034649114e-05, "loss": 0.5022, "step": 18510 }, { "epoch": 4.584158415841584, "grad_norm": 0.7198317646980286, "learning_rate": 9.705383219562205e-05, "loss": 0.5087, "step": 18520 }, { "epoch": 4.586633663366337, "grad_norm": 0.7442262172698975, "learning_rate": 9.704917047132493e-05, "loss": 0.5052, "step": 18530 }, { "epoch": 4.589108910891089, "grad_norm": 0.7201291918754578, "learning_rate": 9.704450517395383e-05, "loss": 0.5068, "step": 18540 }, { "epoch": 4.591584158415841, "grad_norm": 0.7192886471748352, "learning_rate": 9.7039836303863e-05, "loss": 0.5047, "step": 18550 }, { "epoch": 4.594059405940594, "grad_norm": 0.7271735668182373, "learning_rate": 9.703516386140705e-05, "loss": 0.5035, "step": 18560 }, { "epoch": 4.596534653465347, "grad_norm": 0.8821712136268616, "learning_rate": 9.703048784694081e-05, "loss": 0.507, "step": 18570 }, { "epoch": 4.599009900990099, "grad_norm": 0.7557384967803955, "learning_rate": 9.70258082608194e-05, "loss": 0.4967, "step": 18580 }, { "epoch": 4.601485148514851, "grad_norm": 0.7280278205871582, "learning_rate": 9.702112510339819e-05, "loss": 0.5025, "step": 18590 }, { "epoch": 4.603960396039604, "grad_norm": 0.7246922254562378, "learning_rate": 9.701643837503283e-05, "loss": 0.5069, "step": 18600 }, { "epoch": 4.606435643564357, "grad_norm": 0.6822279691696167, "learning_rate": 9.701174807607927e-05, "loss": 0.4973, "step": 18610 }, { "epoch": 4.608910891089109, "grad_norm": 0.7200310826301575, "learning_rate": 9.700705420689368e-05, "loss": 0.5076, "step": 18620 }, { "epoch": 4.611386138613861, "grad_norm": 0.7350950241088867, "learning_rate": 9.700235676783255e-05, "loss": 0.5057, "step": 18630 }, { "epoch": 4.6138613861386135, "grad_norm": 0.7594968676567078, "learning_rate": 9.699765575925261e-05, "loss": 0.5086, "step": 18640 }, { "epoch": 4.616336633663367, "grad_norm": 0.7830429673194885, "learning_rate": 9.699295118151088e-05, "loss": 0.5006, "step": 18650 }, { "epoch": 4.618811881188119, "grad_norm": 0.7551672458648682, "learning_rate": 9.698824303496463e-05, "loss": 0.501, "step": 18660 }, { "epoch": 4.621287128712871, "grad_norm": 0.748227059841156, "learning_rate": 9.698353131997142e-05, "loss": 0.5044, "step": 18670 }, { "epoch": 4.623762376237623, "grad_norm": 0.7669783234596252, "learning_rate": 9.697881603688907e-05, "loss": 0.4997, "step": 18680 }, { "epoch": 4.626237623762377, "grad_norm": 0.6970008611679077, "learning_rate": 9.697409718607569e-05, "loss": 0.5008, "step": 18690 }, { "epoch": 4.628712871287129, "grad_norm": 0.7448675036430359, "learning_rate": 9.696937476788961e-05, "loss": 0.4972, "step": 18700 }, { "epoch": 4.631188118811881, "grad_norm": 0.7114453911781311, "learning_rate": 9.69646487826895e-05, "loss": 0.4993, "step": 18710 }, { "epoch": 4.633663366336633, "grad_norm": 0.785087525844574, "learning_rate": 9.695991923083422e-05, "loss": 0.498, "step": 18720 }, { "epoch": 4.6361386138613865, "grad_norm": 0.7344344258308411, "learning_rate": 9.695518611268301e-05, "loss": 0.5049, "step": 18730 }, { "epoch": 4.638613861386139, "grad_norm": 0.7449827790260315, "learning_rate": 9.69504494285953e-05, "loss": 0.506, "step": 18740 }, { "epoch": 4.641089108910891, "grad_norm": 0.7092929482460022, "learning_rate": 9.69457091789308e-05, "loss": 0.505, "step": 18750 }, { "epoch": 4.643564356435643, "grad_norm": 0.7785451412200928, "learning_rate": 9.69409653640495e-05, "loss": 0.5025, "step": 18760 }, { "epoch": 4.646039603960396, "grad_norm": 0.8295140862464905, "learning_rate": 9.693621798431165e-05, "loss": 0.4994, "step": 18770 }, { "epoch": 4.648514851485149, "grad_norm": 0.7118940353393555, "learning_rate": 9.69314670400778e-05, "loss": 0.4996, "step": 18780 }, { "epoch": 4.650990099009901, "grad_norm": 0.7324670553207397, "learning_rate": 9.692671253170877e-05, "loss": 0.5069, "step": 18790 }, { "epoch": 4.653465346534653, "grad_norm": 0.6727412939071655, "learning_rate": 9.69219544595656e-05, "loss": 0.4998, "step": 18800 }, { "epoch": 4.655940594059406, "grad_norm": 0.7547934055328369, "learning_rate": 9.691719282400964e-05, "loss": 0.5025, "step": 18810 }, { "epoch": 4.658415841584159, "grad_norm": 0.8392107486724854, "learning_rate": 9.691242762540252e-05, "loss": 0.505, "step": 18820 }, { "epoch": 4.660891089108911, "grad_norm": 0.6921795606613159, "learning_rate": 9.690765886410611e-05, "loss": 0.4991, "step": 18830 }, { "epoch": 4.663366336633663, "grad_norm": 0.7571559548377991, "learning_rate": 9.690288654048257e-05, "loss": 0.4994, "step": 18840 }, { "epoch": 4.665841584158416, "grad_norm": 0.7293050289154053, "learning_rate": 9.689811065489434e-05, "loss": 0.503, "step": 18850 }, { "epoch": 4.6683168316831685, "grad_norm": 0.7052704095840454, "learning_rate": 9.68933312077041e-05, "loss": 0.503, "step": 18860 }, { "epoch": 4.670792079207921, "grad_norm": 0.7675442099571228, "learning_rate": 9.688854819927482e-05, "loss": 0.5026, "step": 18870 }, { "epoch": 4.673267326732673, "grad_norm": 0.7130471467971802, "learning_rate": 9.688376162996975e-05, "loss": 0.5003, "step": 18880 }, { "epoch": 4.675742574257426, "grad_norm": 0.736242949962616, "learning_rate": 9.68789715001524e-05, "loss": 0.5015, "step": 18890 }, { "epoch": 4.678217821782178, "grad_norm": 0.712556779384613, "learning_rate": 9.687417781018652e-05, "loss": 0.5035, "step": 18900 }, { "epoch": 4.680693069306931, "grad_norm": 0.7624417543411255, "learning_rate": 9.686938056043617e-05, "loss": 0.5007, "step": 18910 }, { "epoch": 4.683168316831683, "grad_norm": 0.7258579730987549, "learning_rate": 9.686457975126571e-05, "loss": 0.4997, "step": 18920 }, { "epoch": 4.685643564356436, "grad_norm": 0.7054364681243896, "learning_rate": 9.685977538303968e-05, "loss": 0.5066, "step": 18930 }, { "epoch": 4.688118811881188, "grad_norm": 0.7316915988922119, "learning_rate": 9.685496745612295e-05, "loss": 0.4969, "step": 18940 }, { "epoch": 4.6905940594059405, "grad_norm": 0.7386461496353149, "learning_rate": 9.685015597088066e-05, "loss": 0.5005, "step": 18950 }, { "epoch": 4.693069306930693, "grad_norm": 0.7511268258094788, "learning_rate": 9.68453409276782e-05, "loss": 0.5025, "step": 18960 }, { "epoch": 4.695544554455445, "grad_norm": 0.7234663963317871, "learning_rate": 9.684052232688128e-05, "loss": 0.5036, "step": 18970 }, { "epoch": 4.698019801980198, "grad_norm": 0.7071054577827454, "learning_rate": 9.683570016885579e-05, "loss": 0.5007, "step": 18980 }, { "epoch": 4.7004950495049505, "grad_norm": 0.7446286082267761, "learning_rate": 9.683087445396796e-05, "loss": 0.5025, "step": 18990 }, { "epoch": 4.702970297029703, "grad_norm": 0.763713538646698, "learning_rate": 9.682604518258427e-05, "loss": 0.501, "step": 19000 }, { "epoch": 4.705445544554456, "grad_norm": 0.7370991706848145, "learning_rate": 9.682121235507146e-05, "loss": 0.5033, "step": 19010 }, { "epoch": 4.707920792079208, "grad_norm": 0.7034388184547424, "learning_rate": 9.68163759717966e-05, "loss": 0.4992, "step": 19020 }, { "epoch": 4.71039603960396, "grad_norm": 0.6954047083854675, "learning_rate": 9.681153603312692e-05, "loss": 0.4959, "step": 19030 }, { "epoch": 4.712871287128713, "grad_norm": 0.6895144581794739, "learning_rate": 9.680669253943003e-05, "loss": 0.5049, "step": 19040 }, { "epoch": 4.715346534653465, "grad_norm": 0.691057562828064, "learning_rate": 9.680184549107372e-05, "loss": 0.5007, "step": 19050 }, { "epoch": 4.717821782178218, "grad_norm": 0.6940414309501648, "learning_rate": 9.67969948884261e-05, "loss": 0.5012, "step": 19060 }, { "epoch": 4.72029702970297, "grad_norm": 0.7468303442001343, "learning_rate": 9.679214073185558e-05, "loss": 0.4985, "step": 19070 }, { "epoch": 4.7227722772277225, "grad_norm": 0.7705143690109253, "learning_rate": 9.678728302173075e-05, "loss": 0.4996, "step": 19080 }, { "epoch": 4.725247524752476, "grad_norm": 0.7348966002464294, "learning_rate": 9.678242175842056e-05, "loss": 0.5, "step": 19090 }, { "epoch": 4.727722772277228, "grad_norm": 0.7246736884117126, "learning_rate": 9.677755694229416e-05, "loss": 0.4941, "step": 19100 }, { "epoch": 4.73019801980198, "grad_norm": 0.729275643825531, "learning_rate": 9.677268857372103e-05, "loss": 0.4993, "step": 19110 }, { "epoch": 4.732673267326732, "grad_norm": 0.7120395302772522, "learning_rate": 9.676781665307087e-05, "loss": 0.4985, "step": 19120 }, { "epoch": 4.735148514851485, "grad_norm": 0.7745620012283325, "learning_rate": 9.676294118071367e-05, "loss": 0.5047, "step": 19130 }, { "epoch": 4.737623762376238, "grad_norm": 0.6955490112304688, "learning_rate": 9.67580621570197e-05, "loss": 0.5005, "step": 19140 }, { "epoch": 4.74009900990099, "grad_norm": 0.7172293663024902, "learning_rate": 9.675317958235948e-05, "loss": 0.4956, "step": 19150 }, { "epoch": 4.742574257425742, "grad_norm": 0.6814705729484558, "learning_rate": 9.674829345710382e-05, "loss": 0.5007, "step": 19160 }, { "epoch": 4.7450495049504955, "grad_norm": 0.7222841382026672, "learning_rate": 9.674340378162378e-05, "loss": 0.4996, "step": 19170 }, { "epoch": 4.747524752475248, "grad_norm": 0.6526257991790771, "learning_rate": 9.673851055629071e-05, "loss": 0.499, "step": 19180 }, { "epoch": 4.75, "grad_norm": 0.6942418813705444, "learning_rate": 9.67336137814762e-05, "loss": 0.5043, "step": 19190 }, { "epoch": 4.752475247524752, "grad_norm": 0.7456153035163879, "learning_rate": 9.672871345755215e-05, "loss": 0.4978, "step": 19200 }, { "epoch": 4.7549504950495045, "grad_norm": 0.713028073310852, "learning_rate": 9.672380958489069e-05, "loss": 0.5063, "step": 19210 }, { "epoch": 4.757425742574258, "grad_norm": 0.7133201956748962, "learning_rate": 9.671890216386425e-05, "loss": 0.5021, "step": 19220 }, { "epoch": 4.75990099009901, "grad_norm": 0.7430087924003601, "learning_rate": 9.67139911948455e-05, "loss": 0.5002, "step": 19230 }, { "epoch": 4.762376237623762, "grad_norm": 0.7502160668373108, "learning_rate": 9.670907667820742e-05, "loss": 0.4991, "step": 19240 }, { "epoch": 4.764851485148515, "grad_norm": 0.6995401978492737, "learning_rate": 9.670415861432319e-05, "loss": 0.4961, "step": 19250 }, { "epoch": 4.767326732673268, "grad_norm": 0.71341872215271, "learning_rate": 9.669923700356635e-05, "loss": 0.5004, "step": 19260 }, { "epoch": 4.76980198019802, "grad_norm": 0.7126432061195374, "learning_rate": 9.669431184631065e-05, "loss": 0.5044, "step": 19270 }, { "epoch": 4.772277227722772, "grad_norm": 0.721342146396637, "learning_rate": 9.668938314293011e-05, "loss": 0.499, "step": 19280 }, { "epoch": 4.774752475247524, "grad_norm": 0.7496269941329956, "learning_rate": 9.668445089379905e-05, "loss": 0.5018, "step": 19290 }, { "epoch": 4.7772277227722775, "grad_norm": 0.7486146688461304, "learning_rate": 9.667951509929205e-05, "loss": 0.5023, "step": 19300 }, { "epoch": 4.77970297029703, "grad_norm": 0.7387816905975342, "learning_rate": 9.667457575978391e-05, "loss": 0.5008, "step": 19310 }, { "epoch": 4.782178217821782, "grad_norm": 0.6996147036552429, "learning_rate": 9.666963287564979e-05, "loss": 0.5053, "step": 19320 }, { "epoch": 4.784653465346535, "grad_norm": 0.6796615719795227, "learning_rate": 9.666468644726503e-05, "loss": 0.5005, "step": 19330 }, { "epoch": 4.787128712871287, "grad_norm": 0.7364220023155212, "learning_rate": 9.66597364750053e-05, "loss": 0.5024, "step": 19340 }, { "epoch": 4.78960396039604, "grad_norm": 0.7079371213912964, "learning_rate": 9.665478295924651e-05, "loss": 0.5015, "step": 19350 }, { "epoch": 4.792079207920792, "grad_norm": 0.6891046166419983, "learning_rate": 9.664982590036484e-05, "loss": 0.507, "step": 19360 }, { "epoch": 4.794554455445544, "grad_norm": 0.7252429127693176, "learning_rate": 9.664486529873678e-05, "loss": 0.5016, "step": 19370 }, { "epoch": 4.797029702970297, "grad_norm": 0.750275731086731, "learning_rate": 9.663990115473901e-05, "loss": 0.5013, "step": 19380 }, { "epoch": 4.7995049504950495, "grad_norm": 0.733636200428009, "learning_rate": 9.663493346874854e-05, "loss": 0.4927, "step": 19390 }, { "epoch": 4.801980198019802, "grad_norm": 0.763778805732727, "learning_rate": 9.662996224114265e-05, "loss": 0.4996, "step": 19400 }, { "epoch": 4.804455445544555, "grad_norm": 0.7760623097419739, "learning_rate": 9.662498747229886e-05, "loss": 0.5038, "step": 19410 }, { "epoch": 4.806930693069307, "grad_norm": 0.7560349106788635, "learning_rate": 9.662000916259497e-05, "loss": 0.4979, "step": 19420 }, { "epoch": 4.8094059405940595, "grad_norm": 0.7395477294921875, "learning_rate": 9.661502731240903e-05, "loss": 0.4995, "step": 19430 }, { "epoch": 4.811881188118812, "grad_norm": 0.6810795664787292, "learning_rate": 9.661004192211942e-05, "loss": 0.5026, "step": 19440 }, { "epoch": 4.814356435643564, "grad_norm": 0.7813180685043335, "learning_rate": 9.660505299210471e-05, "loss": 0.5049, "step": 19450 }, { "epoch": 4.816831683168317, "grad_norm": 0.6778058409690857, "learning_rate": 9.66000605227438e-05, "loss": 0.4979, "step": 19460 }, { "epoch": 4.819306930693069, "grad_norm": 0.708793580532074, "learning_rate": 9.659506451441583e-05, "loss": 0.5006, "step": 19470 }, { "epoch": 4.821782178217822, "grad_norm": 0.7392145395278931, "learning_rate": 9.659006496750022e-05, "loss": 0.4962, "step": 19480 }, { "epoch": 4.824257425742574, "grad_norm": 0.7178135514259338, "learning_rate": 9.658506188237662e-05, "loss": 0.5072, "step": 19490 }, { "epoch": 4.826732673267327, "grad_norm": 0.7096713781356812, "learning_rate": 9.658005525942502e-05, "loss": 0.504, "step": 19500 }, { "epoch": 4.829207920792079, "grad_norm": 0.7260270714759827, "learning_rate": 9.657504509902562e-05, "loss": 0.5016, "step": 19510 }, { "epoch": 4.8316831683168315, "grad_norm": 0.7590736746788025, "learning_rate": 9.657003140155892e-05, "loss": 0.4978, "step": 19520 }, { "epoch": 4.834158415841584, "grad_norm": 0.7401646375656128, "learning_rate": 9.656501416740566e-05, "loss": 0.5042, "step": 19530 }, { "epoch": 4.836633663366337, "grad_norm": 0.8104812502861023, "learning_rate": 9.655999339694688e-05, "loss": 0.5002, "step": 19540 }, { "epoch": 4.839108910891089, "grad_norm": 0.6992715001106262, "learning_rate": 9.655496909056387e-05, "loss": 0.4995, "step": 19550 }, { "epoch": 4.841584158415841, "grad_norm": 0.7281361222267151, "learning_rate": 9.654994124863818e-05, "loss": 0.4999, "step": 19560 }, { "epoch": 4.844059405940594, "grad_norm": 0.7155736088752747, "learning_rate": 9.654490987155168e-05, "loss": 0.5057, "step": 19570 }, { "epoch": 4.846534653465347, "grad_norm": 0.7077170610427856, "learning_rate": 9.653987495968644e-05, "loss": 0.4988, "step": 19580 }, { "epoch": 4.849009900990099, "grad_norm": 0.7377507090568542, "learning_rate": 9.653483651342482e-05, "loss": 0.5049, "step": 19590 }, { "epoch": 4.851485148514851, "grad_norm": 0.7289764881134033, "learning_rate": 9.652979453314947e-05, "loss": 0.4993, "step": 19600 }, { "epoch": 4.853960396039604, "grad_norm": 0.6879473328590393, "learning_rate": 9.652474901924332e-05, "loss": 0.4988, "step": 19610 }, { "epoch": 4.856435643564357, "grad_norm": 0.7213262915611267, "learning_rate": 9.651969997208949e-05, "loss": 0.4983, "step": 19620 }, { "epoch": 4.858910891089109, "grad_norm": 0.698025107383728, "learning_rate": 9.651464739207146e-05, "loss": 0.4985, "step": 19630 }, { "epoch": 4.861386138613861, "grad_norm": 0.748444139957428, "learning_rate": 9.650959127957293e-05, "loss": 0.5049, "step": 19640 }, { "epoch": 4.8638613861386135, "grad_norm": 0.7555550336837769, "learning_rate": 9.650453163497789e-05, "loss": 0.4987, "step": 19650 }, { "epoch": 4.866336633663367, "grad_norm": 0.7450690865516663, "learning_rate": 9.649946845867058e-05, "loss": 0.5052, "step": 19660 }, { "epoch": 4.868811881188119, "grad_norm": 0.6969115734100342, "learning_rate": 9.649440175103548e-05, "loss": 0.4937, "step": 19670 }, { "epoch": 4.871287128712871, "grad_norm": 0.711587131023407, "learning_rate": 9.648933151245742e-05, "loss": 0.4999, "step": 19680 }, { "epoch": 4.873762376237623, "grad_norm": 0.7162973880767822, "learning_rate": 9.648425774332144e-05, "loss": 0.4992, "step": 19690 }, { "epoch": 4.876237623762377, "grad_norm": 0.7172423005104065, "learning_rate": 9.647918044401285e-05, "loss": 0.49, "step": 19700 }, { "epoch": 4.878712871287129, "grad_norm": 0.703069806098938, "learning_rate": 9.647409961491724e-05, "loss": 0.5002, "step": 19710 }, { "epoch": 4.881188118811881, "grad_norm": 0.7221092581748962, "learning_rate": 9.646901525642046e-05, "loss": 0.4933, "step": 19720 }, { "epoch": 4.883663366336633, "grad_norm": 0.6974915862083435, "learning_rate": 9.646392736890865e-05, "loss": 0.501, "step": 19730 }, { "epoch": 4.8861386138613865, "grad_norm": 0.7692386507987976, "learning_rate": 9.64588359527682e-05, "loss": 0.4994, "step": 19740 }, { "epoch": 4.888613861386139, "grad_norm": 0.7459315061569214, "learning_rate": 9.645374100838573e-05, "loss": 0.5034, "step": 19750 }, { "epoch": 4.891089108910891, "grad_norm": 0.6789430379867554, "learning_rate": 9.644864253614823e-05, "loss": 0.5017, "step": 19760 }, { "epoch": 4.893564356435643, "grad_norm": 0.7147711515426636, "learning_rate": 9.644354053644285e-05, "loss": 0.495, "step": 19770 }, { "epoch": 4.896039603960396, "grad_norm": 0.7230862379074097, "learning_rate": 9.643843500965705e-05, "loss": 0.4996, "step": 19780 }, { "epoch": 4.898514851485149, "grad_norm": 0.7104310989379883, "learning_rate": 9.643332595617861e-05, "loss": 0.5056, "step": 19790 }, { "epoch": 4.900990099009901, "grad_norm": 0.7487738132476807, "learning_rate": 9.642821337639548e-05, "loss": 0.5049, "step": 19800 }, { "epoch": 4.903465346534653, "grad_norm": 0.7139663100242615, "learning_rate": 9.642309727069596e-05, "loss": 0.5005, "step": 19810 }, { "epoch": 4.905940594059406, "grad_norm": 0.7151985168457031, "learning_rate": 9.641797763946855e-05, "loss": 0.5015, "step": 19820 }, { "epoch": 4.908415841584159, "grad_norm": 0.7156112790107727, "learning_rate": 9.641285448310207e-05, "loss": 0.4988, "step": 19830 }, { "epoch": 4.910891089108911, "grad_norm": 0.6866828799247742, "learning_rate": 9.640772780198559e-05, "loss": 0.5018, "step": 19840 }, { "epoch": 4.913366336633663, "grad_norm": 0.6983682513237, "learning_rate": 9.640259759650845e-05, "loss": 0.4986, "step": 19850 }, { "epoch": 4.915841584158416, "grad_norm": 0.6800804734230042, "learning_rate": 9.639746386706026e-05, "loss": 0.4949, "step": 19860 }, { "epoch": 4.9183168316831685, "grad_norm": 0.6867223381996155, "learning_rate": 9.639232661403089e-05, "loss": 0.5016, "step": 19870 }, { "epoch": 4.920792079207921, "grad_norm": 0.7012162208557129, "learning_rate": 9.638718583781047e-05, "loss": 0.5002, "step": 19880 }, { "epoch": 4.923267326732673, "grad_norm": 0.7294097542762756, "learning_rate": 9.63820415387894e-05, "loss": 0.5007, "step": 19890 }, { "epoch": 4.925742574257426, "grad_norm": 0.7350699305534363, "learning_rate": 9.637689371735838e-05, "loss": 0.5027, "step": 19900 }, { "epoch": 4.928217821782178, "grad_norm": 0.7207146883010864, "learning_rate": 9.637174237390836e-05, "loss": 0.4995, "step": 19910 }, { "epoch": 4.930693069306931, "grad_norm": 0.7574730515480042, "learning_rate": 9.636658750883052e-05, "loss": 0.5041, "step": 19920 }, { "epoch": 4.933168316831683, "grad_norm": 0.7312963604927063, "learning_rate": 9.636142912251635e-05, "loss": 0.5023, "step": 19930 }, { "epoch": 4.935643564356436, "grad_norm": 0.7350485920906067, "learning_rate": 9.63562672153576e-05, "loss": 0.4936, "step": 19940 }, { "epoch": 4.938118811881188, "grad_norm": 0.7752833962440491, "learning_rate": 9.635110178774627e-05, "loss": 0.4917, "step": 19950 }, { "epoch": 4.9405940594059405, "grad_norm": 0.7827439904212952, "learning_rate": 9.634593284007468e-05, "loss": 0.4958, "step": 19960 }, { "epoch": 4.943069306930693, "grad_norm": 0.6767786145210266, "learning_rate": 9.63407603727353e-05, "loss": 0.4969, "step": 19970 }, { "epoch": 4.945544554455445, "grad_norm": 0.6648345589637756, "learning_rate": 9.633558438612103e-05, "loss": 0.5014, "step": 19980 }, { "epoch": 4.948019801980198, "grad_norm": 0.6840981245040894, "learning_rate": 9.63304048806249e-05, "loss": 0.4986, "step": 19990 }, { "epoch": 4.9504950495049505, "grad_norm": 0.7405098080635071, "learning_rate": 9.632522185664027e-05, "loss": 0.4979, "step": 20000 }, { "epoch": 4.952970297029703, "grad_norm": 0.7224273085594177, "learning_rate": 9.632003531456076e-05, "loss": 0.4973, "step": 20010 }, { "epoch": 4.955445544554456, "grad_norm": 0.6775679588317871, "learning_rate": 9.631484525478026e-05, "loss": 0.4947, "step": 20020 }, { "epoch": 4.957920792079208, "grad_norm": 0.6925530433654785, "learning_rate": 9.630965167769293e-05, "loss": 0.504, "step": 20030 }, { "epoch": 4.96039603960396, "grad_norm": 0.7113035917282104, "learning_rate": 9.630445458369313e-05, "loss": 0.4964, "step": 20040 }, { "epoch": 4.962871287128713, "grad_norm": 0.7328691482543945, "learning_rate": 9.629925397317563e-05, "loss": 0.4979, "step": 20050 }, { "epoch": 4.965346534653465, "grad_norm": 0.6960362195968628, "learning_rate": 9.629404984653532e-05, "loss": 0.4957, "step": 20060 }, { "epoch": 4.967821782178218, "grad_norm": 0.727414071559906, "learning_rate": 9.628884220416743e-05, "loss": 0.5027, "step": 20070 }, { "epoch": 4.97029702970297, "grad_norm": 0.7327008843421936, "learning_rate": 9.628363104646747e-05, "loss": 0.4983, "step": 20080 }, { "epoch": 4.9727722772277225, "grad_norm": 0.7393609285354614, "learning_rate": 9.627841637383117e-05, "loss": 0.4947, "step": 20090 }, { "epoch": 4.975247524752476, "grad_norm": 0.6847256422042847, "learning_rate": 9.627319818665455e-05, "loss": 0.4926, "step": 20100 }, { "epoch": 4.977722772277228, "grad_norm": 0.7245068550109863, "learning_rate": 9.626797648533392e-05, "loss": 0.4982, "step": 20110 }, { "epoch": 4.98019801980198, "grad_norm": 0.7218835949897766, "learning_rate": 9.62627512702658e-05, "loss": 0.5058, "step": 20120 }, { "epoch": 4.982673267326732, "grad_norm": 0.7117132544517517, "learning_rate": 9.625752254184706e-05, "loss": 0.4997, "step": 20130 }, { "epoch": 4.985148514851485, "grad_norm": 0.6889665126800537, "learning_rate": 9.625229030047473e-05, "loss": 0.5006, "step": 20140 }, { "epoch": 4.987623762376238, "grad_norm": 0.6462411880493164, "learning_rate": 9.624705454654618e-05, "loss": 0.4953, "step": 20150 }, { "epoch": 4.99009900990099, "grad_norm": 0.7160190343856812, "learning_rate": 9.624181528045906e-05, "loss": 0.4979, "step": 20160 }, { "epoch": 4.992574257425742, "grad_norm": 0.7252028584480286, "learning_rate": 9.623657250261124e-05, "loss": 0.5017, "step": 20170 }, { "epoch": 4.9950495049504955, "grad_norm": 0.6633305549621582, "learning_rate": 9.623132621340088e-05, "loss": 0.5029, "step": 20180 }, { "epoch": 4.997524752475248, "grad_norm": 0.7455281615257263, "learning_rate": 9.622607641322637e-05, "loss": 0.502, "step": 20190 }, { "epoch": 5.0, "grad_norm": 0.7459938526153564, "learning_rate": 9.622082310248643e-05, "loss": 0.496, "step": 20200 }, { "epoch": 5.002475247524752, "grad_norm": 0.705973744392395, "learning_rate": 9.621556628158002e-05, "loss": 0.4989, "step": 20210 }, { "epoch": 5.0049504950495045, "grad_norm": 0.7065191864967346, "learning_rate": 9.621030595090635e-05, "loss": 0.5023, "step": 20220 }, { "epoch": 5.007425742574258, "grad_norm": 0.7703894376754761, "learning_rate": 9.620504211086488e-05, "loss": 0.5033, "step": 20230 }, { "epoch": 5.00990099009901, "grad_norm": 0.7350071668624878, "learning_rate": 9.61997747618554e-05, "loss": 0.4964, "step": 20240 }, { "epoch": 5.012376237623762, "grad_norm": 0.7157539129257202, "learning_rate": 9.619450390427792e-05, "loss": 0.5017, "step": 20250 }, { "epoch": 5.014851485148514, "grad_norm": 0.6703144311904907, "learning_rate": 9.618922953853274e-05, "loss": 0.4975, "step": 20260 }, { "epoch": 5.017326732673268, "grad_norm": 0.6696843504905701, "learning_rate": 9.618395166502037e-05, "loss": 0.4993, "step": 20270 }, { "epoch": 5.01980198019802, "grad_norm": 0.7157815098762512, "learning_rate": 9.617867028414167e-05, "loss": 0.5009, "step": 20280 }, { "epoch": 5.022277227722772, "grad_norm": 0.6499803066253662, "learning_rate": 9.617338539629772e-05, "loss": 0.5052, "step": 20290 }, { "epoch": 5.024752475247524, "grad_norm": 0.7225094437599182, "learning_rate": 9.616809700188987e-05, "loss": 0.5006, "step": 20300 }, { "epoch": 5.0272277227722775, "grad_norm": 0.7135584950447083, "learning_rate": 9.616280510131974e-05, "loss": 0.507, "step": 20310 }, { "epoch": 5.02970297029703, "grad_norm": 0.7165998816490173, "learning_rate": 9.615750969498918e-05, "loss": 0.4988, "step": 20320 }, { "epoch": 5.032178217821782, "grad_norm": 0.6813362836837769, "learning_rate": 9.615221078330041e-05, "loss": 0.498, "step": 20330 }, { "epoch": 5.034653465346534, "grad_norm": 0.6462497711181641, "learning_rate": 9.614690836665579e-05, "loss": 0.4986, "step": 20340 }, { "epoch": 5.037128712871287, "grad_norm": 0.7038962244987488, "learning_rate": 9.614160244545802e-05, "loss": 0.4941, "step": 20350 }, { "epoch": 5.03960396039604, "grad_norm": 0.7168846130371094, "learning_rate": 9.613629302011006e-05, "loss": 0.5022, "step": 20360 }, { "epoch": 5.042079207920792, "grad_norm": 0.7257160544395447, "learning_rate": 9.613098009101512e-05, "loss": 0.493, "step": 20370 }, { "epoch": 5.044554455445544, "grad_norm": 0.7427330613136292, "learning_rate": 9.612566365857667e-05, "loss": 0.4984, "step": 20380 }, { "epoch": 5.047029702970297, "grad_norm": 0.7084333300590515, "learning_rate": 9.612034372319847e-05, "loss": 0.4987, "step": 20390 }, { "epoch": 5.0495049504950495, "grad_norm": 0.69693922996521, "learning_rate": 9.611502028528454e-05, "loss": 0.5027, "step": 20400 }, { "epoch": 5.051980198019802, "grad_norm": 0.7311951518058777, "learning_rate": 9.610969334523914e-05, "loss": 0.4993, "step": 20410 }, { "epoch": 5.054455445544554, "grad_norm": 0.6948270201683044, "learning_rate": 9.610436290346684e-05, "loss": 0.4943, "step": 20420 }, { "epoch": 5.056930693069307, "grad_norm": 0.7372270226478577, "learning_rate": 9.609902896037243e-05, "loss": 0.5004, "step": 20430 }, { "epoch": 5.0594059405940595, "grad_norm": 0.7404931783676147, "learning_rate": 9.609369151636101e-05, "loss": 0.501, "step": 20440 }, { "epoch": 5.061881188118812, "grad_norm": 0.7357115149497986, "learning_rate": 9.60883505718379e-05, "loss": 0.4946, "step": 20450 }, { "epoch": 5.064356435643564, "grad_norm": 0.6738870143890381, "learning_rate": 9.608300612720873e-05, "loss": 0.5032, "step": 20460 }, { "epoch": 5.066831683168317, "grad_norm": 0.7144834399223328, "learning_rate": 9.607765818287936e-05, "loss": 0.5003, "step": 20470 }, { "epoch": 5.069306930693069, "grad_norm": 0.7393369674682617, "learning_rate": 9.607230673925594e-05, "loss": 0.5009, "step": 20480 }, { "epoch": 5.071782178217822, "grad_norm": 0.6923773288726807, "learning_rate": 9.606695179674486e-05, "loss": 0.4962, "step": 20490 }, { "epoch": 5.074257425742574, "grad_norm": 0.754851758480072, "learning_rate": 9.606159335575282e-05, "loss": 0.4951, "step": 20500 }, { "epoch": 5.076732673267327, "grad_norm": 0.7168545126914978, "learning_rate": 9.605623141668675e-05, "loss": 0.5013, "step": 20510 }, { "epoch": 5.079207920792079, "grad_norm": 0.6912991404533386, "learning_rate": 9.605086597995383e-05, "loss": 0.4935, "step": 20520 }, { "epoch": 5.0816831683168315, "grad_norm": 0.6904762387275696, "learning_rate": 9.604549704596157e-05, "loss": 0.5023, "step": 20530 }, { "epoch": 5.084158415841584, "grad_norm": 0.6839058995246887, "learning_rate": 9.604012461511767e-05, "loss": 0.4992, "step": 20540 }, { "epoch": 5.086633663366337, "grad_norm": 0.7061988115310669, "learning_rate": 9.603474868783014e-05, "loss": 0.503, "step": 20550 }, { "epoch": 5.089108910891089, "grad_norm": 0.6709898710250854, "learning_rate": 9.602936926450726e-05, "loss": 0.5042, "step": 20560 }, { "epoch": 5.091584158415841, "grad_norm": 0.690093457698822, "learning_rate": 9.602398634555754e-05, "loss": 0.4984, "step": 20570 }, { "epoch": 5.094059405940594, "grad_norm": 0.7285270094871521, "learning_rate": 9.60185999313898e-05, "loss": 0.4978, "step": 20580 }, { "epoch": 5.096534653465347, "grad_norm": 0.6802079677581787, "learning_rate": 9.60132100224131e-05, "loss": 0.499, "step": 20590 }, { "epoch": 5.099009900990099, "grad_norm": 0.6831924319267273, "learning_rate": 9.600781661903675e-05, "loss": 0.4995, "step": 20600 }, { "epoch": 5.101485148514851, "grad_norm": 0.7116832733154297, "learning_rate": 9.600241972167035e-05, "loss": 0.4977, "step": 20610 }, { "epoch": 5.103960396039604, "grad_norm": 0.7086892127990723, "learning_rate": 9.599701933072376e-05, "loss": 0.4978, "step": 20620 }, { "epoch": 5.106435643564357, "grad_norm": 0.7052211761474609, "learning_rate": 9.599161544660711e-05, "loss": 0.4959, "step": 20630 }, { "epoch": 5.108910891089109, "grad_norm": 0.7327545881271362, "learning_rate": 9.598620806973078e-05, "loss": 0.4977, "step": 20640 }, { "epoch": 5.111386138613861, "grad_norm": 0.6666846871376038, "learning_rate": 9.598079720050544e-05, "loss": 0.4956, "step": 20650 }, { "epoch": 5.1138613861386135, "grad_norm": 0.6614166498184204, "learning_rate": 9.597538283934199e-05, "loss": 0.4968, "step": 20660 }, { "epoch": 5.116336633663367, "grad_norm": 0.7094413042068481, "learning_rate": 9.596996498665162e-05, "loss": 0.5015, "step": 20670 }, { "epoch": 5.118811881188119, "grad_norm": 0.6968229413032532, "learning_rate": 9.596454364284579e-05, "loss": 0.5019, "step": 20680 }, { "epoch": 5.121287128712871, "grad_norm": 0.6760798096656799, "learning_rate": 9.595911880833621e-05, "loss": 0.494, "step": 20690 }, { "epoch": 5.123762376237623, "grad_norm": 0.6998032331466675, "learning_rate": 9.595369048353486e-05, "loss": 0.4944, "step": 20700 }, { "epoch": 5.126237623762377, "grad_norm": 0.7091021537780762, "learning_rate": 9.594825866885399e-05, "loss": 0.4974, "step": 20710 }, { "epoch": 5.128712871287129, "grad_norm": 0.6804271340370178, "learning_rate": 9.594282336470611e-05, "loss": 0.4952, "step": 20720 }, { "epoch": 5.131188118811881, "grad_norm": 0.7419680953025818, "learning_rate": 9.593738457150398e-05, "loss": 0.4945, "step": 20730 }, { "epoch": 5.133663366336633, "grad_norm": 0.7084131836891174, "learning_rate": 9.593194228966067e-05, "loss": 0.4971, "step": 20740 }, { "epoch": 5.1361386138613865, "grad_norm": 0.7148073315620422, "learning_rate": 9.592649651958944e-05, "loss": 0.501, "step": 20750 }, { "epoch": 5.138613861386139, "grad_norm": 0.6939783096313477, "learning_rate": 9.592104726170393e-05, "loss": 0.4994, "step": 20760 }, { "epoch": 5.141089108910891, "grad_norm": 0.6900985836982727, "learning_rate": 9.59155945164179e-05, "loss": 0.4934, "step": 20770 }, { "epoch": 5.143564356435643, "grad_norm": 0.7520175576210022, "learning_rate": 9.591013828414548e-05, "loss": 0.4981, "step": 20780 }, { "epoch": 5.146039603960396, "grad_norm": 0.6907670497894287, "learning_rate": 9.590467856530106e-05, "loss": 0.5019, "step": 20790 }, { "epoch": 5.148514851485149, "grad_norm": 0.7003084421157837, "learning_rate": 9.589921536029923e-05, "loss": 0.5019, "step": 20800 }, { "epoch": 5.150990099009901, "grad_norm": 0.7166914343833923, "learning_rate": 9.58937486695549e-05, "loss": 0.5041, "step": 20810 }, { "epoch": 5.153465346534653, "grad_norm": 0.722444474697113, "learning_rate": 9.588827849348324e-05, "loss": 0.4976, "step": 20820 }, { "epoch": 5.155940594059406, "grad_norm": 0.7282170057296753, "learning_rate": 9.588280483249966e-05, "loss": 0.494, "step": 20830 }, { "epoch": 5.158415841584159, "grad_norm": 0.6761897802352905, "learning_rate": 9.587732768701986e-05, "loss": 0.503, "step": 20840 }, { "epoch": 5.160891089108911, "grad_norm": 0.684999406337738, "learning_rate": 9.587184705745977e-05, "loss": 0.4968, "step": 20850 }, { "epoch": 5.163366336633663, "grad_norm": 0.6723262667655945, "learning_rate": 9.586636294423564e-05, "loss": 0.5019, "step": 20860 }, { "epoch": 5.165841584158416, "grad_norm": 0.7180541157722473, "learning_rate": 9.586087534776394e-05, "loss": 0.4983, "step": 20870 }, { "epoch": 5.1683168316831685, "grad_norm": 0.6884577870368958, "learning_rate": 9.585538426846141e-05, "loss": 0.4972, "step": 20880 }, { "epoch": 5.170792079207921, "grad_norm": 0.69861900806427, "learning_rate": 9.584988970674504e-05, "loss": 0.5023, "step": 20890 }, { "epoch": 5.173267326732673, "grad_norm": 0.7242107391357422, "learning_rate": 9.584439166303217e-05, "loss": 0.4959, "step": 20900 }, { "epoch": 5.175742574257426, "grad_norm": 0.7097012400627136, "learning_rate": 9.583889013774026e-05, "loss": 0.4976, "step": 20910 }, { "epoch": 5.178217821782178, "grad_norm": 0.6893798112869263, "learning_rate": 9.583338513128717e-05, "loss": 0.4979, "step": 20920 }, { "epoch": 5.180693069306931, "grad_norm": 0.7006474733352661, "learning_rate": 9.582787664409097e-05, "loss": 0.4969, "step": 20930 }, { "epoch": 5.183168316831683, "grad_norm": 0.664228081703186, "learning_rate": 9.582236467656995e-05, "loss": 0.4948, "step": 20940 }, { "epoch": 5.185643564356436, "grad_norm": 0.7150835394859314, "learning_rate": 9.581684922914276e-05, "loss": 0.4995, "step": 20950 }, { "epoch": 5.188118811881188, "grad_norm": 0.6943761706352234, "learning_rate": 9.581133030222822e-05, "loss": 0.5004, "step": 20960 }, { "epoch": 5.1905940594059405, "grad_norm": 0.6769047975540161, "learning_rate": 9.580580789624548e-05, "loss": 0.4951, "step": 20970 }, { "epoch": 5.193069306930693, "grad_norm": 0.6909745931625366, "learning_rate": 9.58002820116139e-05, "loss": 0.4928, "step": 20980 }, { "epoch": 5.195544554455446, "grad_norm": 0.6374338865280151, "learning_rate": 9.579475264875317e-05, "loss": 0.4984, "step": 20990 }, { "epoch": 5.198019801980198, "grad_norm": 0.6630854606628418, "learning_rate": 9.578921980808321e-05, "loss": 0.4976, "step": 21000 }, { "epoch": 5.2004950495049505, "grad_norm": 0.6869702935218811, "learning_rate": 9.578368349002418e-05, "loss": 0.4986, "step": 21010 }, { "epoch": 5.202970297029703, "grad_norm": 0.7255537509918213, "learning_rate": 9.577814369499652e-05, "loss": 0.4943, "step": 21020 }, { "epoch": 5.205445544554456, "grad_norm": 0.6817488670349121, "learning_rate": 9.577260042342097e-05, "loss": 0.4982, "step": 21030 }, { "epoch": 5.207920792079208, "grad_norm": 0.7224024534225464, "learning_rate": 9.57670536757185e-05, "loss": 0.4989, "step": 21040 }, { "epoch": 5.21039603960396, "grad_norm": 0.7142822742462158, "learning_rate": 9.576150345231032e-05, "loss": 0.4959, "step": 21050 }, { "epoch": 5.212871287128713, "grad_norm": 0.6532909870147705, "learning_rate": 9.575594975361796e-05, "loss": 0.4966, "step": 21060 }, { "epoch": 5.215346534653466, "grad_norm": 0.6949794292449951, "learning_rate": 9.575039258006318e-05, "loss": 0.5029, "step": 21070 }, { "epoch": 5.217821782178218, "grad_norm": 0.7165389060974121, "learning_rate": 9.5744831932068e-05, "loss": 0.4986, "step": 21080 }, { "epoch": 5.22029702970297, "grad_norm": 0.6803778409957886, "learning_rate": 9.573926781005474e-05, "loss": 0.4945, "step": 21090 }, { "epoch": 5.2227722772277225, "grad_norm": 0.6965579390525818, "learning_rate": 9.573370021444594e-05, "loss": 0.4915, "step": 21100 }, { "epoch": 5.225247524752476, "grad_norm": 0.7161457538604736, "learning_rate": 9.572812914566442e-05, "loss": 0.4942, "step": 21110 }, { "epoch": 5.227722772277228, "grad_norm": 0.7800333499908447, "learning_rate": 9.572255460413328e-05, "loss": 0.5037, "step": 21120 }, { "epoch": 5.23019801980198, "grad_norm": 0.6929776072502136, "learning_rate": 9.571697659027585e-05, "loss": 0.4912, "step": 21130 }, { "epoch": 5.232673267326732, "grad_norm": 0.737945020198822, "learning_rate": 9.571139510451576e-05, "loss": 0.5027, "step": 21140 }, { "epoch": 5.235148514851485, "grad_norm": 0.6930323839187622, "learning_rate": 9.570581014727687e-05, "loss": 0.5008, "step": 21150 }, { "epoch": 5.237623762376238, "grad_norm": 0.6920428276062012, "learning_rate": 9.570022171898334e-05, "loss": 0.4992, "step": 21160 }, { "epoch": 5.24009900990099, "grad_norm": 0.6876739263534546, "learning_rate": 9.569462982005957e-05, "loss": 0.4986, "step": 21170 }, { "epoch": 5.242574257425742, "grad_norm": 0.7303794026374817, "learning_rate": 9.56890344509302e-05, "loss": 0.5007, "step": 21180 }, { "epoch": 5.2450495049504955, "grad_norm": 0.6951481103897095, "learning_rate": 9.568343561202022e-05, "loss": 0.496, "step": 21190 }, { "epoch": 5.247524752475248, "grad_norm": 0.701250433921814, "learning_rate": 9.567783330375477e-05, "loss": 0.4975, "step": 21200 }, { "epoch": 5.25, "grad_norm": 0.6786067485809326, "learning_rate": 9.567222752655934e-05, "loss": 0.4937, "step": 21210 }, { "epoch": 5.252475247524752, "grad_norm": 0.7319567799568176, "learning_rate": 9.56666182808596e-05, "loss": 0.5051, "step": 21220 }, { "epoch": 5.2549504950495045, "grad_norm": 0.6723818778991699, "learning_rate": 9.566100556708162e-05, "loss": 0.4947, "step": 21230 }, { "epoch": 5.257425742574258, "grad_norm": 0.6742265224456787, "learning_rate": 9.565538938565159e-05, "loss": 0.5023, "step": 21240 }, { "epoch": 5.25990099009901, "grad_norm": 0.7582823634147644, "learning_rate": 9.564976973699604e-05, "loss": 0.5009, "step": 21250 }, { "epoch": 5.262376237623762, "grad_norm": 0.6783936023712158, "learning_rate": 9.564414662154173e-05, "loss": 0.4936, "step": 21260 }, { "epoch": 5.264851485148515, "grad_norm": 0.7125350832939148, "learning_rate": 9.563852003971571e-05, "loss": 0.4969, "step": 21270 }, { "epoch": 5.267326732673268, "grad_norm": 0.7263048887252808, "learning_rate": 9.563288999194529e-05, "loss": 0.497, "step": 21280 }, { "epoch": 5.26980198019802, "grad_norm": 0.7074813842773438, "learning_rate": 9.562725647865803e-05, "loss": 0.4917, "step": 21290 }, { "epoch": 5.272277227722772, "grad_norm": 0.6997993588447571, "learning_rate": 9.562161950028175e-05, "loss": 0.5027, "step": 21300 }, { "epoch": 5.274752475247524, "grad_norm": 0.7429183721542358, "learning_rate": 9.561597905724454e-05, "loss": 0.4985, "step": 21310 }, { "epoch": 5.2772277227722775, "grad_norm": 0.6882157325744629, "learning_rate": 9.561033514997475e-05, "loss": 0.4975, "step": 21320 }, { "epoch": 5.27970297029703, "grad_norm": 0.709250807762146, "learning_rate": 9.560468777890101e-05, "loss": 0.4979, "step": 21330 }, { "epoch": 5.282178217821782, "grad_norm": 0.7183778285980225, "learning_rate": 9.559903694445221e-05, "loss": 0.5024, "step": 21340 }, { "epoch": 5.284653465346534, "grad_norm": 0.6835340261459351, "learning_rate": 9.559338264705748e-05, "loss": 0.495, "step": 21350 }, { "epoch": 5.287128712871287, "grad_norm": 0.7159419655799866, "learning_rate": 9.558772488714622e-05, "loss": 0.4956, "step": 21360 }, { "epoch": 5.28960396039604, "grad_norm": 0.6644235849380493, "learning_rate": 9.558206366514813e-05, "loss": 0.4946, "step": 21370 }, { "epoch": 5.292079207920792, "grad_norm": 0.7201200127601624, "learning_rate": 9.557639898149309e-05, "loss": 0.4958, "step": 21380 }, { "epoch": 5.294554455445544, "grad_norm": 0.6819207072257996, "learning_rate": 9.557073083661134e-05, "loss": 0.4886, "step": 21390 }, { "epoch": 5.297029702970297, "grad_norm": 0.6696884632110596, "learning_rate": 9.556505923093333e-05, "loss": 0.4932, "step": 21400 }, { "epoch": 5.2995049504950495, "grad_norm": 0.6951116919517517, "learning_rate": 9.555938416488977e-05, "loss": 0.497, "step": 21410 }, { "epoch": 5.301980198019802, "grad_norm": 0.6941846609115601, "learning_rate": 9.555370563891164e-05, "loss": 0.4941, "step": 21420 }, { "epoch": 5.304455445544554, "grad_norm": 0.7237010598182678, "learning_rate": 9.55480236534302e-05, "loss": 0.4961, "step": 21430 }, { "epoch": 5.306930693069307, "grad_norm": 0.727664053440094, "learning_rate": 9.554233820887695e-05, "loss": 0.4991, "step": 21440 }, { "epoch": 5.3094059405940595, "grad_norm": 0.7522806525230408, "learning_rate": 9.553664930568367e-05, "loss": 0.5014, "step": 21450 }, { "epoch": 5.311881188118812, "grad_norm": 0.7110211849212646, "learning_rate": 9.55309569442824e-05, "loss": 0.5015, "step": 21460 }, { "epoch": 5.314356435643564, "grad_norm": 0.7337501645088196, "learning_rate": 9.552526112510543e-05, "loss": 0.498, "step": 21470 }, { "epoch": 5.316831683168317, "grad_norm": 0.6774621605873108, "learning_rate": 9.55195618485853e-05, "loss": 0.4922, "step": 21480 }, { "epoch": 5.319306930693069, "grad_norm": 0.6796510219573975, "learning_rate": 9.551385911515485e-05, "loss": 0.4955, "step": 21490 }, { "epoch": 5.321782178217822, "grad_norm": 0.6607616543769836, "learning_rate": 9.550815292524718e-05, "loss": 0.4933, "step": 21500 }, { "epoch": 5.324257425742574, "grad_norm": 0.6519168019294739, "learning_rate": 9.550244327929562e-05, "loss": 0.4957, "step": 21510 }, { "epoch": 5.326732673267327, "grad_norm": 0.6699130535125732, "learning_rate": 9.549673017773377e-05, "loss": 0.5003, "step": 21520 }, { "epoch": 5.329207920792079, "grad_norm": 0.6634597778320312, "learning_rate": 9.549101362099552e-05, "loss": 0.4945, "step": 21530 }, { "epoch": 5.3316831683168315, "grad_norm": 0.6739826798439026, "learning_rate": 9.548529360951501e-05, "loss": 0.4968, "step": 21540 }, { "epoch": 5.334158415841584, "grad_norm": 0.648435115814209, "learning_rate": 9.547957014372664e-05, "loss": 0.496, "step": 21550 }, { "epoch": 5.336633663366337, "grad_norm": 0.6895898580551147, "learning_rate": 9.547384322406503e-05, "loss": 0.502, "step": 21560 }, { "epoch": 5.339108910891089, "grad_norm": 0.7034032940864563, "learning_rate": 9.546811285096515e-05, "loss": 0.5023, "step": 21570 }, { "epoch": 5.341584158415841, "grad_norm": 0.7356610298156738, "learning_rate": 9.546237902486214e-05, "loss": 0.5012, "step": 21580 }, { "epoch": 5.344059405940594, "grad_norm": 0.652067244052887, "learning_rate": 9.545664174619148e-05, "loss": 0.4937, "step": 21590 }, { "epoch": 5.346534653465347, "grad_norm": 0.6567900776863098, "learning_rate": 9.545090101538887e-05, "loss": 0.4903, "step": 21600 }, { "epoch": 5.349009900990099, "grad_norm": 0.683546781539917, "learning_rate": 9.544515683289027e-05, "loss": 0.4941, "step": 21610 }, { "epoch": 5.351485148514851, "grad_norm": 0.7145261764526367, "learning_rate": 9.543940919913193e-05, "loss": 0.5001, "step": 21620 }, { "epoch": 5.353960396039604, "grad_norm": 0.7302564978599548, "learning_rate": 9.543365811455032e-05, "loss": 0.4959, "step": 21630 }, { "epoch": 5.356435643564357, "grad_norm": 0.744692862033844, "learning_rate": 9.542790357958223e-05, "loss": 0.4954, "step": 21640 }, { "epoch": 5.358910891089109, "grad_norm": 0.6951557993888855, "learning_rate": 9.542214559466464e-05, "loss": 0.4956, "step": 21650 }, { "epoch": 5.361386138613861, "grad_norm": 0.7509949207305908, "learning_rate": 9.541638416023487e-05, "loss": 0.4912, "step": 21660 }, { "epoch": 5.3638613861386135, "grad_norm": 0.7010226845741272, "learning_rate": 9.541061927673045e-05, "loss": 0.4963, "step": 21670 }, { "epoch": 5.366336633663367, "grad_norm": 0.7186484336853027, "learning_rate": 9.540485094458918e-05, "loss": 0.4957, "step": 21680 }, { "epoch": 5.368811881188119, "grad_norm": 0.7073588371276855, "learning_rate": 9.53990791642491e-05, "loss": 0.4955, "step": 21690 }, { "epoch": 5.371287128712871, "grad_norm": 0.6948750019073486, "learning_rate": 9.539330393614859e-05, "loss": 0.4948, "step": 21700 }, { "epoch": 5.373762376237623, "grad_norm": 0.7113081216812134, "learning_rate": 9.53875252607262e-05, "loss": 0.502, "step": 21710 }, { "epoch": 5.376237623762377, "grad_norm": 0.7188633680343628, "learning_rate": 9.538174313842081e-05, "loss": 0.4975, "step": 21720 }, { "epoch": 5.378712871287129, "grad_norm": 0.7112079858779907, "learning_rate": 9.537595756967152e-05, "loss": 0.4935, "step": 21730 }, { "epoch": 5.381188118811881, "grad_norm": 0.6527764201164246, "learning_rate": 9.537016855491771e-05, "loss": 0.493, "step": 21740 }, { "epoch": 5.383663366336633, "grad_norm": 0.724233090877533, "learning_rate": 9.5364376094599e-05, "loss": 0.4948, "step": 21750 }, { "epoch": 5.3861386138613865, "grad_norm": 0.7339910864830017, "learning_rate": 9.535858018915532e-05, "loss": 0.4958, "step": 21760 }, { "epoch": 5.388613861386139, "grad_norm": 0.7092688083648682, "learning_rate": 9.535278083902681e-05, "loss": 0.4939, "step": 21770 }, { "epoch": 5.391089108910891, "grad_norm": 0.6646677851676941, "learning_rate": 9.534697804465391e-05, "loss": 0.4931, "step": 21780 }, { "epoch": 5.393564356435643, "grad_norm": 0.7016463279724121, "learning_rate": 9.534117180647728e-05, "loss": 0.4986, "step": 21790 }, { "epoch": 5.396039603960396, "grad_norm": 0.7402837872505188, "learning_rate": 9.533536212493787e-05, "loss": 0.4989, "step": 21800 }, { "epoch": 5.398514851485149, "grad_norm": 0.715894341468811, "learning_rate": 9.53295490004769e-05, "loss": 0.5003, "step": 21810 }, { "epoch": 5.400990099009901, "grad_norm": 0.6846730709075928, "learning_rate": 9.532373243353583e-05, "loss": 0.4978, "step": 21820 }, { "epoch": 5.403465346534653, "grad_norm": 0.6578965187072754, "learning_rate": 9.531791242455641e-05, "loss": 0.5061, "step": 21830 }, { "epoch": 5.405940594059406, "grad_norm": 0.6878682374954224, "learning_rate": 9.531208897398058e-05, "loss": 0.4964, "step": 21840 }, { "epoch": 5.408415841584159, "grad_norm": 0.6975110769271851, "learning_rate": 9.530626208225066e-05, "loss": 0.4986, "step": 21850 }, { "epoch": 5.410891089108911, "grad_norm": 0.7026899456977844, "learning_rate": 9.530043174980912e-05, "loss": 0.4995, "step": 21860 }, { "epoch": 5.413366336633663, "grad_norm": 0.699727475643158, "learning_rate": 9.529459797709874e-05, "loss": 0.4996, "step": 21870 }, { "epoch": 5.415841584158416, "grad_norm": 0.7121231555938721, "learning_rate": 9.528876076456255e-05, "loss": 0.4969, "step": 21880 }, { "epoch": 5.4183168316831685, "grad_norm": 0.6949172019958496, "learning_rate": 9.528292011264389e-05, "loss": 0.4914, "step": 21890 }, { "epoch": 5.420792079207921, "grad_norm": 0.7237693071365356, "learning_rate": 9.527707602178625e-05, "loss": 0.4958, "step": 21900 }, { "epoch": 5.423267326732673, "grad_norm": 0.6954572200775146, "learning_rate": 9.527122849243353e-05, "loss": 0.4927, "step": 21910 }, { "epoch": 5.425742574257426, "grad_norm": 0.6856471300125122, "learning_rate": 9.526537752502976e-05, "loss": 0.4945, "step": 21920 }, { "epoch": 5.428217821782178, "grad_norm": 0.6474347710609436, "learning_rate": 9.525952312001927e-05, "loss": 0.4923, "step": 21930 }, { "epoch": 5.430693069306931, "grad_norm": 0.6280668377876282, "learning_rate": 9.525366527784669e-05, "loss": 0.4944, "step": 21940 }, { "epoch": 5.433168316831683, "grad_norm": 0.6941468119621277, "learning_rate": 9.524780399895689e-05, "loss": 0.4981, "step": 21950 }, { "epoch": 5.435643564356436, "grad_norm": 0.7130104899406433, "learning_rate": 9.5241939283795e-05, "loss": 0.4976, "step": 21960 }, { "epoch": 5.438118811881188, "grad_norm": 0.6782022714614868, "learning_rate": 9.523607113280637e-05, "loss": 0.4938, "step": 21970 }, { "epoch": 5.4405940594059405, "grad_norm": 0.6435871124267578, "learning_rate": 9.523019954643669e-05, "loss": 0.4955, "step": 21980 }, { "epoch": 5.443069306930693, "grad_norm": 0.639030933380127, "learning_rate": 9.522432452513182e-05, "loss": 0.4946, "step": 21990 }, { "epoch": 5.445544554455446, "grad_norm": 0.6718349456787109, "learning_rate": 9.521844606933797e-05, "loss": 0.4964, "step": 22000 }, { "epoch": 5.448019801980198, "grad_norm": 0.6713849306106567, "learning_rate": 9.521256417950156e-05, "loss": 0.495, "step": 22010 }, { "epoch": 5.4504950495049505, "grad_norm": 0.6800833940505981, "learning_rate": 9.520667885606928e-05, "loss": 0.4926, "step": 22020 }, { "epoch": 5.452970297029703, "grad_norm": 0.7266783714294434, "learning_rate": 9.520079009948808e-05, "loss": 0.4961, "step": 22030 }, { "epoch": 5.455445544554456, "grad_norm": 0.7107278108596802, "learning_rate": 9.519489791020516e-05, "loss": 0.4967, "step": 22040 }, { "epoch": 5.457920792079208, "grad_norm": 0.6831000447273254, "learning_rate": 9.518900228866801e-05, "loss": 0.4966, "step": 22050 }, { "epoch": 5.46039603960396, "grad_norm": 0.6682476997375488, "learning_rate": 9.518310323532436e-05, "loss": 0.4948, "step": 22060 }, { "epoch": 5.462871287128713, "grad_norm": 0.7195137143135071, "learning_rate": 9.51772007506222e-05, "loss": 0.4931, "step": 22070 }, { "epoch": 5.465346534653466, "grad_norm": 0.734123945236206, "learning_rate": 9.517129483500979e-05, "loss": 0.4917, "step": 22080 }, { "epoch": 5.467821782178218, "grad_norm": 0.667396605014801, "learning_rate": 9.516538548893563e-05, "loss": 0.5019, "step": 22090 }, { "epoch": 5.47029702970297, "grad_norm": 0.6570314764976501, "learning_rate": 9.515947271284852e-05, "loss": 0.4933, "step": 22100 }, { "epoch": 5.4727722772277225, "grad_norm": 0.6618701219558716, "learning_rate": 9.515355650719748e-05, "loss": 0.496, "step": 22110 }, { "epoch": 5.475247524752476, "grad_norm": 0.6687915325164795, "learning_rate": 9.514763687243182e-05, "loss": 0.4971, "step": 22120 }, { "epoch": 5.477722772277228, "grad_norm": 0.717027485370636, "learning_rate": 9.514171380900107e-05, "loss": 0.4934, "step": 22130 }, { "epoch": 5.48019801980198, "grad_norm": 0.667878270149231, "learning_rate": 9.513578731735507e-05, "loss": 0.4937, "step": 22140 }, { "epoch": 5.482673267326732, "grad_norm": 0.6558663845062256, "learning_rate": 9.51298573979439e-05, "loss": 0.4908, "step": 22150 }, { "epoch": 5.485148514851485, "grad_norm": 0.7110480666160583, "learning_rate": 9.512392405121788e-05, "loss": 0.4982, "step": 22160 }, { "epoch": 5.487623762376238, "grad_norm": 0.7455015182495117, "learning_rate": 9.511798727762764e-05, "loss": 0.5036, "step": 22170 }, { "epoch": 5.49009900990099, "grad_norm": 0.6476817727088928, "learning_rate": 9.5112047077624e-05, "loss": 0.4949, "step": 22180 }, { "epoch": 5.492574257425742, "grad_norm": 0.6724919676780701, "learning_rate": 9.510610345165811e-05, "loss": 0.4906, "step": 22190 }, { "epoch": 5.4950495049504955, "grad_norm": 0.6778200268745422, "learning_rate": 9.510015640018135e-05, "loss": 0.4977, "step": 22200 }, { "epoch": 5.497524752475248, "grad_norm": 0.6485478281974792, "learning_rate": 9.509420592364533e-05, "loss": 0.4898, "step": 22210 }, { "epoch": 5.5, "grad_norm": 0.8019765019416809, "learning_rate": 9.508825202250196e-05, "loss": 0.4987, "step": 22220 }, { "epoch": 5.502475247524752, "grad_norm": 0.6574825048446655, "learning_rate": 9.508229469720342e-05, "loss": 0.493, "step": 22230 }, { "epoch": 5.5049504950495045, "grad_norm": 0.7304871678352356, "learning_rate": 9.50763339482021e-05, "loss": 0.495, "step": 22240 }, { "epoch": 5.507425742574258, "grad_norm": 0.664433479309082, "learning_rate": 9.50703697759507e-05, "loss": 0.4997, "step": 22250 }, { "epoch": 5.50990099009901, "grad_norm": 0.6628553867340088, "learning_rate": 9.506440218090216e-05, "loss": 0.4905, "step": 22260 }, { "epoch": 5.512376237623762, "grad_norm": 0.7100067138671875, "learning_rate": 9.505843116350966e-05, "loss": 0.4946, "step": 22270 }, { "epoch": 5.514851485148515, "grad_norm": 0.6816449761390686, "learning_rate": 9.505245672422669e-05, "loss": 0.4944, "step": 22280 }, { "epoch": 5.517326732673268, "grad_norm": 0.6850700974464417, "learning_rate": 9.504647886350695e-05, "loss": 0.491, "step": 22290 }, { "epoch": 5.51980198019802, "grad_norm": 0.7096611857414246, "learning_rate": 9.504049758180441e-05, "loss": 0.4975, "step": 22300 }, { "epoch": 5.522277227722772, "grad_norm": 0.6700732111930847, "learning_rate": 9.503451287957332e-05, "loss": 0.4935, "step": 22310 }, { "epoch": 5.524752475247524, "grad_norm": 0.6776368618011475, "learning_rate": 9.50285247572682e-05, "loss": 0.4965, "step": 22320 }, { "epoch": 5.5272277227722775, "grad_norm": 0.6837242841720581, "learning_rate": 9.502253321534376e-05, "loss": 0.4905, "step": 22330 }, { "epoch": 5.52970297029703, "grad_norm": 0.6867803335189819, "learning_rate": 9.501653825425506e-05, "loss": 0.4942, "step": 22340 }, { "epoch": 5.532178217821782, "grad_norm": 0.6928383708000183, "learning_rate": 9.501053987445735e-05, "loss": 0.4941, "step": 22350 }, { "epoch": 5.534653465346535, "grad_norm": 0.7060667276382446, "learning_rate": 9.50045380764062e-05, "loss": 0.49, "step": 22360 }, { "epoch": 5.537128712871287, "grad_norm": 0.6850438714027405, "learning_rate": 9.499853286055738e-05, "loss": 0.4951, "step": 22370 }, { "epoch": 5.53960396039604, "grad_norm": 0.6703614592552185, "learning_rate": 9.499252422736695e-05, "loss": 0.4924, "step": 22380 }, { "epoch": 5.542079207920792, "grad_norm": 0.6647247672080994, "learning_rate": 9.498651217729122e-05, "loss": 0.496, "step": 22390 }, { "epoch": 5.544554455445544, "grad_norm": 0.698291003704071, "learning_rate": 9.498049671078679e-05, "loss": 0.4929, "step": 22400 }, { "epoch": 5.547029702970297, "grad_norm": 0.7062404155731201, "learning_rate": 9.497447782831048e-05, "loss": 0.4974, "step": 22410 }, { "epoch": 5.5495049504950495, "grad_norm": 0.6938732266426086, "learning_rate": 9.496845553031939e-05, "loss": 0.4954, "step": 22420 }, { "epoch": 5.551980198019802, "grad_norm": 0.6970093250274658, "learning_rate": 9.496242981727086e-05, "loss": 0.4907, "step": 22430 }, { "epoch": 5.554455445544555, "grad_norm": 0.6763580441474915, "learning_rate": 9.49564006896225e-05, "loss": 0.4987, "step": 22440 }, { "epoch": 5.556930693069307, "grad_norm": 0.6552399396896362, "learning_rate": 9.49503681478322e-05, "loss": 0.4931, "step": 22450 }, { "epoch": 5.5594059405940595, "grad_norm": 0.6273687481880188, "learning_rate": 9.49443321923581e-05, "loss": 0.4918, "step": 22460 }, { "epoch": 5.561881188118812, "grad_norm": 0.6848121881484985, "learning_rate": 9.493829282365855e-05, "loss": 0.4962, "step": 22470 }, { "epoch": 5.564356435643564, "grad_norm": 0.6235942244529724, "learning_rate": 9.493225004219227e-05, "loss": 0.4864, "step": 22480 }, { "epoch": 5.566831683168317, "grad_norm": 0.6487409472465515, "learning_rate": 9.492620384841809e-05, "loss": 0.4944, "step": 22490 }, { "epoch": 5.569306930693069, "grad_norm": 0.6520559191703796, "learning_rate": 9.492015424279523e-05, "loss": 0.4934, "step": 22500 }, { "epoch": 5.571782178217822, "grad_norm": 0.6960709691047668, "learning_rate": 9.49141012257831e-05, "loss": 0.4873, "step": 22510 }, { "epoch": 5.574257425742574, "grad_norm": 0.7341060638427734, "learning_rate": 9.490804479784139e-05, "loss": 0.4932, "step": 22520 }, { "epoch": 5.576732673267327, "grad_norm": 0.6851258277893066, "learning_rate": 9.490198495943003e-05, "loss": 0.4958, "step": 22530 }, { "epoch": 5.579207920792079, "grad_norm": 0.6931005120277405, "learning_rate": 9.489592171100925e-05, "loss": 0.4926, "step": 22540 }, { "epoch": 5.5816831683168315, "grad_norm": 0.7124272584915161, "learning_rate": 9.488985505303951e-05, "loss": 0.4965, "step": 22550 }, { "epoch": 5.584158415841584, "grad_norm": 0.6776531338691711, "learning_rate": 9.488378498598152e-05, "loss": 0.4932, "step": 22560 }, { "epoch": 5.586633663366337, "grad_norm": 0.675773024559021, "learning_rate": 9.487771151029627e-05, "loss": 0.4985, "step": 22570 }, { "epoch": 5.589108910891089, "grad_norm": 0.6476688385009766, "learning_rate": 9.487163462644499e-05, "loss": 0.4972, "step": 22580 }, { "epoch": 5.591584158415841, "grad_norm": 0.7768530249595642, "learning_rate": 9.486555433488921e-05, "loss": 0.4981, "step": 22590 }, { "epoch": 5.594059405940594, "grad_norm": 0.6793107390403748, "learning_rate": 9.485947063609064e-05, "loss": 0.4967, "step": 22600 }, { "epoch": 5.596534653465347, "grad_norm": 0.673789918422699, "learning_rate": 9.485338353051134e-05, "loss": 0.5014, "step": 22610 }, { "epoch": 5.599009900990099, "grad_norm": 0.7083485722541809, "learning_rate": 9.484729301861355e-05, "loss": 0.4979, "step": 22620 }, { "epoch": 5.601485148514851, "grad_norm": 0.6804497241973877, "learning_rate": 9.484119910085982e-05, "loss": 0.4943, "step": 22630 }, { "epoch": 5.603960396039604, "grad_norm": 0.6648809313774109, "learning_rate": 9.483510177771296e-05, "loss": 0.4922, "step": 22640 }, { "epoch": 5.606435643564357, "grad_norm": 0.6528737545013428, "learning_rate": 9.482900104963601e-05, "loss": 0.4955, "step": 22650 }, { "epoch": 5.608910891089109, "grad_norm": 0.6857967972755432, "learning_rate": 9.482289691709227e-05, "loss": 0.4932, "step": 22660 }, { "epoch": 5.611386138613861, "grad_norm": 0.6043749451637268, "learning_rate": 9.48167893805453e-05, "loss": 0.4948, "step": 22670 }, { "epoch": 5.6138613861386135, "grad_norm": 0.6840988993644714, "learning_rate": 9.481067844045897e-05, "loss": 0.4968, "step": 22680 }, { "epoch": 5.616336633663367, "grad_norm": 0.6487518548965454, "learning_rate": 9.48045640972973e-05, "loss": 0.4975, "step": 22690 }, { "epoch": 5.618811881188119, "grad_norm": 0.6659702062606812, "learning_rate": 9.47984463515247e-05, "loss": 0.491, "step": 22700 }, { "epoch": 5.621287128712871, "grad_norm": 0.6774781942367554, "learning_rate": 9.479232520360573e-05, "loss": 0.492, "step": 22710 }, { "epoch": 5.623762376237623, "grad_norm": 0.6790499091148376, "learning_rate": 9.478620065400528e-05, "loss": 0.4924, "step": 22720 }, { "epoch": 5.626237623762377, "grad_norm": 0.7323364019393921, "learning_rate": 9.478007270318842e-05, "loss": 0.4933, "step": 22730 }, { "epoch": 5.628712871287129, "grad_norm": 0.6340842843055725, "learning_rate": 9.47739413516206e-05, "loss": 0.497, "step": 22740 }, { "epoch": 5.631188118811881, "grad_norm": 0.6485574841499329, "learning_rate": 9.476780659976739e-05, "loss": 0.4991, "step": 22750 }, { "epoch": 5.633663366336633, "grad_norm": 0.676596462726593, "learning_rate": 9.476166844809474e-05, "loss": 0.4961, "step": 22760 }, { "epoch": 5.6361386138613865, "grad_norm": 0.6827379465103149, "learning_rate": 9.475552689706875e-05, "loss": 0.4935, "step": 22770 }, { "epoch": 5.638613861386139, "grad_norm": 0.6990441679954529, "learning_rate": 9.474938194715585e-05, "loss": 0.4948, "step": 22780 }, { "epoch": 5.641089108910891, "grad_norm": 0.6456711292266846, "learning_rate": 9.474323359882271e-05, "loss": 0.5012, "step": 22790 }, { "epoch": 5.643564356435643, "grad_norm": 0.657173752784729, "learning_rate": 9.473708185253628e-05, "loss": 0.4944, "step": 22800 }, { "epoch": 5.646039603960396, "grad_norm": 0.6865933537483215, "learning_rate": 9.473092670876369e-05, "loss": 0.4914, "step": 22810 }, { "epoch": 5.648514851485149, "grad_norm": 0.6576148867607117, "learning_rate": 9.472476816797244e-05, "loss": 0.4931, "step": 22820 }, { "epoch": 5.650990099009901, "grad_norm": 0.6383556723594666, "learning_rate": 9.47186062306302e-05, "loss": 0.4925, "step": 22830 }, { "epoch": 5.653465346534653, "grad_norm": 0.7122588157653809, "learning_rate": 9.471244089720492e-05, "loss": 0.4961, "step": 22840 }, { "epoch": 5.655940594059406, "grad_norm": 0.7122207880020142, "learning_rate": 9.470627216816486e-05, "loss": 0.4925, "step": 22850 }, { "epoch": 5.658415841584159, "grad_norm": 0.7038292288780212, "learning_rate": 9.470010004397844e-05, "loss": 0.4934, "step": 22860 }, { "epoch": 5.660891089108911, "grad_norm": 0.6915731430053711, "learning_rate": 9.469392452511443e-05, "loss": 0.4932, "step": 22870 }, { "epoch": 5.663366336633663, "grad_norm": 0.6559720039367676, "learning_rate": 9.468774561204179e-05, "loss": 0.4959, "step": 22880 }, { "epoch": 5.665841584158416, "grad_norm": 0.667172908782959, "learning_rate": 9.46815633052298e-05, "loss": 0.5012, "step": 22890 }, { "epoch": 5.6683168316831685, "grad_norm": 0.695073664188385, "learning_rate": 9.467537760514794e-05, "loss": 0.4949, "step": 22900 }, { "epoch": 5.670792079207921, "grad_norm": 0.7197666168212891, "learning_rate": 9.466918851226599e-05, "loss": 0.4915, "step": 22910 }, { "epoch": 5.673267326732673, "grad_norm": 0.6852660179138184, "learning_rate": 9.466299602705395e-05, "loss": 0.4942, "step": 22920 }, { "epoch": 5.675742574257426, "grad_norm": 0.6369038820266724, "learning_rate": 9.465680014998213e-05, "loss": 0.4907, "step": 22930 }, { "epoch": 5.678217821782178, "grad_norm": 0.7091821432113647, "learning_rate": 9.465060088152105e-05, "loss": 0.4955, "step": 22940 }, { "epoch": 5.680693069306931, "grad_norm": 0.6545227766036987, "learning_rate": 9.464439822214148e-05, "loss": 0.493, "step": 22950 }, { "epoch": 5.683168316831683, "grad_norm": 0.6621568202972412, "learning_rate": 9.46381921723145e-05, "loss": 0.494, "step": 22960 }, { "epoch": 5.685643564356436, "grad_norm": 0.6696566939353943, "learning_rate": 9.463198273251141e-05, "loss": 0.495, "step": 22970 }, { "epoch": 5.688118811881188, "grad_norm": 0.6770690679550171, "learning_rate": 9.46257699032038e-05, "loss": 0.4991, "step": 22980 }, { "epoch": 5.6905940594059405, "grad_norm": 0.6486595273017883, "learning_rate": 9.461955368486344e-05, "loss": 0.4889, "step": 22990 }, { "epoch": 5.693069306930693, "grad_norm": 0.6709611415863037, "learning_rate": 9.461333407796246e-05, "loss": 0.4906, "step": 23000 }, { "epoch": 5.695544554455445, "grad_norm": 0.6880239248275757, "learning_rate": 9.460711108297318e-05, "loss": 0.4951, "step": 23010 }, { "epoch": 5.698019801980198, "grad_norm": 0.6329277753829956, "learning_rate": 9.460088470036818e-05, "loss": 0.4956, "step": 23020 }, { "epoch": 5.7004950495049505, "grad_norm": 0.6904314756393433, "learning_rate": 9.459465493062033e-05, "loss": 0.498, "step": 23030 }, { "epoch": 5.702970297029703, "grad_norm": 0.7322156429290771, "learning_rate": 9.458842177420275e-05, "loss": 0.4898, "step": 23040 }, { "epoch": 5.705445544554456, "grad_norm": 0.6593242883682251, "learning_rate": 9.458218523158877e-05, "loss": 0.4938, "step": 23050 }, { "epoch": 5.707920792079208, "grad_norm": 0.6736606359481812, "learning_rate": 9.457594530325207e-05, "loss": 0.4917, "step": 23060 }, { "epoch": 5.71039603960396, "grad_norm": 0.6934103965759277, "learning_rate": 9.456970198966646e-05, "loss": 0.4943, "step": 23070 }, { "epoch": 5.712871287128713, "grad_norm": 0.6760016679763794, "learning_rate": 9.456345529130616e-05, "loss": 0.4964, "step": 23080 }, { "epoch": 5.715346534653465, "grad_norm": 0.72941654920578, "learning_rate": 9.455720520864551e-05, "loss": 0.4973, "step": 23090 }, { "epoch": 5.717821782178218, "grad_norm": 0.7042532563209534, "learning_rate": 9.455095174215916e-05, "loss": 0.5015, "step": 23100 }, { "epoch": 5.72029702970297, "grad_norm": 0.6717808842658997, "learning_rate": 9.454469489232205e-05, "loss": 0.4904, "step": 23110 }, { "epoch": 5.7227722772277225, "grad_norm": 0.7432168126106262, "learning_rate": 9.453843465960933e-05, "loss": 0.4968, "step": 23120 }, { "epoch": 5.725247524752476, "grad_norm": 0.6759523749351501, "learning_rate": 9.453217104449643e-05, "loss": 0.4969, "step": 23130 }, { "epoch": 5.727722772277228, "grad_norm": 0.6551719307899475, "learning_rate": 9.452590404745903e-05, "loss": 0.4953, "step": 23140 }, { "epoch": 5.73019801980198, "grad_norm": 0.6608520746231079, "learning_rate": 9.451963366897305e-05, "loss": 0.4912, "step": 23150 }, { "epoch": 5.732673267326732, "grad_norm": 0.6478062272071838, "learning_rate": 9.451335990951469e-05, "loss": 0.4965, "step": 23160 }, { "epoch": 5.735148514851485, "grad_norm": 0.6659473180770874, "learning_rate": 9.450708276956042e-05, "loss": 0.4936, "step": 23170 }, { "epoch": 5.737623762376238, "grad_norm": 0.6823928356170654, "learning_rate": 9.450080224958695e-05, "loss": 0.4954, "step": 23180 }, { "epoch": 5.74009900990099, "grad_norm": 0.6869696974754333, "learning_rate": 9.44945183500712e-05, "loss": 0.4966, "step": 23190 }, { "epoch": 5.742574257425742, "grad_norm": 0.6838988065719604, "learning_rate": 9.448823107149043e-05, "loss": 0.4965, "step": 23200 }, { "epoch": 5.7450495049504955, "grad_norm": 0.7047802805900574, "learning_rate": 9.448194041432212e-05, "loss": 0.492, "step": 23210 }, { "epoch": 5.747524752475248, "grad_norm": 0.707290768623352, "learning_rate": 9.447564637904398e-05, "loss": 0.4956, "step": 23220 }, { "epoch": 5.75, "grad_norm": 0.6516978144645691, "learning_rate": 9.4469348966134e-05, "loss": 0.4972, "step": 23230 }, { "epoch": 5.752475247524752, "grad_norm": 0.7131555080413818, "learning_rate": 9.446304817607045e-05, "loss": 0.4899, "step": 23240 }, { "epoch": 5.7549504950495045, "grad_norm": 0.6643797755241394, "learning_rate": 9.445674400933182e-05, "loss": 0.4924, "step": 23250 }, { "epoch": 5.757425742574258, "grad_norm": 0.6997048854827881, "learning_rate": 9.445043646639688e-05, "loss": 0.4926, "step": 23260 }, { "epoch": 5.75990099009901, "grad_norm": 0.644343912601471, "learning_rate": 9.444412554774462e-05, "loss": 0.5003, "step": 23270 }, { "epoch": 5.762376237623762, "grad_norm": 0.7035076022148132, "learning_rate": 9.443781125385435e-05, "loss": 0.4968, "step": 23280 }, { "epoch": 5.764851485148515, "grad_norm": 0.679000735282898, "learning_rate": 9.443149358520558e-05, "loss": 0.4979, "step": 23290 }, { "epoch": 5.767326732673268, "grad_norm": 0.6851304769515991, "learning_rate": 9.442517254227808e-05, "loss": 0.4951, "step": 23300 }, { "epoch": 5.76980198019802, "grad_norm": 0.727546215057373, "learning_rate": 9.44188481255519e-05, "loss": 0.4958, "step": 23310 }, { "epoch": 5.772277227722772, "grad_norm": 0.7076166868209839, "learning_rate": 9.441252033550736e-05, "loss": 0.4954, "step": 23320 }, { "epoch": 5.774752475247524, "grad_norm": 0.6494976282119751, "learning_rate": 9.4406189172625e-05, "loss": 0.4984, "step": 23330 }, { "epoch": 5.7772277227722775, "grad_norm": 0.6786664724349976, "learning_rate": 9.439985463738562e-05, "loss": 0.4958, "step": 23340 }, { "epoch": 5.77970297029703, "grad_norm": 0.6420280337333679, "learning_rate": 9.439351673027028e-05, "loss": 0.4925, "step": 23350 }, { "epoch": 5.782178217821782, "grad_norm": 0.6206204295158386, "learning_rate": 9.438717545176034e-05, "loss": 0.4942, "step": 23360 }, { "epoch": 5.784653465346535, "grad_norm": 0.6703090071678162, "learning_rate": 9.438083080233734e-05, "loss": 0.4923, "step": 23370 }, { "epoch": 5.787128712871287, "grad_norm": 0.6583405137062073, "learning_rate": 9.437448278248313e-05, "loss": 0.4937, "step": 23380 }, { "epoch": 5.78960396039604, "grad_norm": 0.7366304993629456, "learning_rate": 9.43681313926798e-05, "loss": 0.4911, "step": 23390 }, { "epoch": 5.792079207920792, "grad_norm": 0.6967867612838745, "learning_rate": 9.436177663340971e-05, "loss": 0.4871, "step": 23400 }, { "epoch": 5.794554455445544, "grad_norm": 0.7028957605361938, "learning_rate": 9.435541850515543e-05, "loss": 0.4927, "step": 23410 }, { "epoch": 5.797029702970297, "grad_norm": 0.661806046962738, "learning_rate": 9.434905700839983e-05, "loss": 0.4961, "step": 23420 }, { "epoch": 5.7995049504950495, "grad_norm": 0.6732451915740967, "learning_rate": 9.434269214362604e-05, "loss": 0.4893, "step": 23430 }, { "epoch": 5.801980198019802, "grad_norm": 0.7412935495376587, "learning_rate": 9.433632391131742e-05, "loss": 0.4923, "step": 23440 }, { "epoch": 5.804455445544555, "grad_norm": 0.6685687899589539, "learning_rate": 9.432995231195759e-05, "loss": 0.4897, "step": 23450 }, { "epoch": 5.806930693069307, "grad_norm": 0.6716099381446838, "learning_rate": 9.432357734603042e-05, "loss": 0.4916, "step": 23460 }, { "epoch": 5.8094059405940595, "grad_norm": 0.7188799381256104, "learning_rate": 9.431719901402008e-05, "loss": 0.4911, "step": 23470 }, { "epoch": 5.811881188118812, "grad_norm": 0.6964933276176453, "learning_rate": 9.431081731641093e-05, "loss": 0.4909, "step": 23480 }, { "epoch": 5.814356435643564, "grad_norm": 0.6409781575202942, "learning_rate": 9.430443225368763e-05, "loss": 0.4975, "step": 23490 }, { "epoch": 5.816831683168317, "grad_norm": 0.6222018599510193, "learning_rate": 9.42980438263351e-05, "loss": 0.4997, "step": 23500 }, { "epoch": 5.819306930693069, "grad_norm": 0.6485810875892639, "learning_rate": 9.429165203483847e-05, "loss": 0.491, "step": 23510 }, { "epoch": 5.821782178217822, "grad_norm": 0.6780351996421814, "learning_rate": 9.428525687968317e-05, "loss": 0.4888, "step": 23520 }, { "epoch": 5.824257425742574, "grad_norm": 0.6861041784286499, "learning_rate": 9.427885836135488e-05, "loss": 0.492, "step": 23530 }, { "epoch": 5.826732673267327, "grad_norm": 0.7182576060295105, "learning_rate": 9.427245648033948e-05, "loss": 0.488, "step": 23540 }, { "epoch": 5.829207920792079, "grad_norm": 0.7620370984077454, "learning_rate": 9.426605123712322e-05, "loss": 0.4936, "step": 23550 }, { "epoch": 5.8316831683168315, "grad_norm": 0.6792449355125427, "learning_rate": 9.425964263219248e-05, "loss": 0.4975, "step": 23560 }, { "epoch": 5.834158415841584, "grad_norm": 0.6665513515472412, "learning_rate": 9.425323066603397e-05, "loss": 0.4971, "step": 23570 }, { "epoch": 5.836633663366337, "grad_norm": 0.6576762199401855, "learning_rate": 9.424681533913465e-05, "loss": 0.4909, "step": 23580 }, { "epoch": 5.839108910891089, "grad_norm": 0.6784145832061768, "learning_rate": 9.42403966519817e-05, "loss": 0.4975, "step": 23590 }, { "epoch": 5.841584158415841, "grad_norm": 0.6968861818313599, "learning_rate": 9.423397460506258e-05, "loss": 0.4874, "step": 23600 }, { "epoch": 5.844059405940594, "grad_norm": 0.6663863658905029, "learning_rate": 9.4227549198865e-05, "loss": 0.4938, "step": 23610 }, { "epoch": 5.846534653465347, "grad_norm": 0.6476473212242126, "learning_rate": 9.422112043387695e-05, "loss": 0.4932, "step": 23620 }, { "epoch": 5.849009900990099, "grad_norm": 0.6809970140457153, "learning_rate": 9.421468831058664e-05, "loss": 0.496, "step": 23630 }, { "epoch": 5.851485148514851, "grad_norm": 0.6869102716445923, "learning_rate": 9.420825282948253e-05, "loss": 0.4906, "step": 23640 }, { "epoch": 5.853960396039604, "grad_norm": 0.6428642868995667, "learning_rate": 9.420181399105338e-05, "loss": 0.4872, "step": 23650 }, { "epoch": 5.856435643564357, "grad_norm": 0.6601603031158447, "learning_rate": 9.419537179578816e-05, "loss": 0.499, "step": 23660 }, { "epoch": 5.858910891089109, "grad_norm": 0.6941671967506409, "learning_rate": 9.418892624417611e-05, "loss": 0.4943, "step": 23670 }, { "epoch": 5.861386138613861, "grad_norm": 0.6930562257766724, "learning_rate": 9.418247733670674e-05, "loss": 0.4927, "step": 23680 }, { "epoch": 5.8638613861386135, "grad_norm": 0.6516923904418945, "learning_rate": 9.417602507386981e-05, "loss": 0.4884, "step": 23690 }, { "epoch": 5.866336633663367, "grad_norm": 0.656984806060791, "learning_rate": 9.416956945615527e-05, "loss": 0.4976, "step": 23700 }, { "epoch": 5.868811881188119, "grad_norm": 0.6454426050186157, "learning_rate": 9.416311048405346e-05, "loss": 0.4961, "step": 23710 }, { "epoch": 5.871287128712871, "grad_norm": 0.677372395992279, "learning_rate": 9.415664815805485e-05, "loss": 0.4973, "step": 23720 }, { "epoch": 5.873762376237623, "grad_norm": 0.7020536065101624, "learning_rate": 9.415018247865022e-05, "loss": 0.4913, "step": 23730 }, { "epoch": 5.876237623762377, "grad_norm": 0.6166278123855591, "learning_rate": 9.41437134463306e-05, "loss": 0.5002, "step": 23740 }, { "epoch": 5.878712871287129, "grad_norm": 0.666875422000885, "learning_rate": 9.413724106158726e-05, "loss": 0.4898, "step": 23750 }, { "epoch": 5.881188118811881, "grad_norm": 0.6320541501045227, "learning_rate": 9.413076532491174e-05, "loss": 0.4998, "step": 23760 }, { "epoch": 5.883663366336633, "grad_norm": 0.6331081390380859, "learning_rate": 9.412428623679583e-05, "loss": 0.4896, "step": 23770 }, { "epoch": 5.8861386138613865, "grad_norm": 0.6856775283813477, "learning_rate": 9.411780379773159e-05, "loss": 0.4875, "step": 23780 }, { "epoch": 5.888613861386139, "grad_norm": 0.6812372207641602, "learning_rate": 9.41113180082113e-05, "loss": 0.4911, "step": 23790 }, { "epoch": 5.891089108910891, "grad_norm": 0.6925831437110901, "learning_rate": 9.410482886872751e-05, "loss": 0.497, "step": 23800 }, { "epoch": 5.893564356435643, "grad_norm": 0.7296769618988037, "learning_rate": 9.409833637977303e-05, "loss": 0.4924, "step": 23810 }, { "epoch": 5.896039603960396, "grad_norm": 0.6531125903129578, "learning_rate": 9.409184054184094e-05, "loss": 0.4861, "step": 23820 }, { "epoch": 5.898514851485149, "grad_norm": 0.7011905908584595, "learning_rate": 9.408534135542452e-05, "loss": 0.483, "step": 23830 }, { "epoch": 5.900990099009901, "grad_norm": 0.6976701617240906, "learning_rate": 9.407883882101738e-05, "loss": 0.4943, "step": 23840 }, { "epoch": 5.903465346534653, "grad_norm": 0.6474626660346985, "learning_rate": 9.407233293911333e-05, "loss": 0.5009, "step": 23850 }, { "epoch": 5.905940594059406, "grad_norm": 0.7295416593551636, "learning_rate": 9.406582371020642e-05, "loss": 0.489, "step": 23860 }, { "epoch": 5.908415841584159, "grad_norm": 0.660409688949585, "learning_rate": 9.405931113479104e-05, "loss": 0.4881, "step": 23870 }, { "epoch": 5.910891089108911, "grad_norm": 0.665587842464447, "learning_rate": 9.405279521336173e-05, "loss": 0.4962, "step": 23880 }, { "epoch": 5.913366336633663, "grad_norm": 0.6610579490661621, "learning_rate": 9.404627594641334e-05, "loss": 0.4907, "step": 23890 }, { "epoch": 5.915841584158416, "grad_norm": 0.6576491594314575, "learning_rate": 9.403975333444098e-05, "loss": 0.496, "step": 23900 }, { "epoch": 5.9183168316831685, "grad_norm": 0.6864446401596069, "learning_rate": 9.403322737794e-05, "loss": 0.4922, "step": 23910 }, { "epoch": 5.920792079207921, "grad_norm": 0.6313393712043762, "learning_rate": 9.402669807740598e-05, "loss": 0.4884, "step": 23920 }, { "epoch": 5.923267326732673, "grad_norm": 0.6663051843643188, "learning_rate": 9.402016543333481e-05, "loss": 0.4941, "step": 23930 }, { "epoch": 5.925742574257426, "grad_norm": 0.6199500560760498, "learning_rate": 9.401362944622256e-05, "loss": 0.493, "step": 23940 }, { "epoch": 5.928217821782178, "grad_norm": 0.6733233332633972, "learning_rate": 9.400709011656563e-05, "loss": 0.4947, "step": 23950 }, { "epoch": 5.930693069306931, "grad_norm": 0.6403918862342834, "learning_rate": 9.400054744486062e-05, "loss": 0.4874, "step": 23960 }, { "epoch": 5.933168316831683, "grad_norm": 0.7035472393035889, "learning_rate": 9.399400143160442e-05, "loss": 0.4915, "step": 23970 }, { "epoch": 5.935643564356436, "grad_norm": 0.6897085905075073, "learning_rate": 9.398745207729414e-05, "loss": 0.4898, "step": 23980 }, { "epoch": 5.938118811881188, "grad_norm": 0.6750791072845459, "learning_rate": 9.398089938242718e-05, "loss": 0.4967, "step": 23990 }, { "epoch": 5.9405940594059405, "grad_norm": 0.6912382245063782, "learning_rate": 9.397434334750115e-05, "loss": 0.4877, "step": 24000 }, { "epoch": 5.943069306930693, "grad_norm": 0.7024639844894409, "learning_rate": 9.396778397301394e-05, "loss": 0.488, "step": 24010 }, { "epoch": 5.945544554455445, "grad_norm": 0.6923540830612183, "learning_rate": 9.396122125946372e-05, "loss": 0.495, "step": 24020 }, { "epoch": 5.948019801980198, "grad_norm": 0.6754488348960876, "learning_rate": 9.395465520734884e-05, "loss": 0.4918, "step": 24030 }, { "epoch": 5.9504950495049505, "grad_norm": 0.6839036345481873, "learning_rate": 9.394808581716799e-05, "loss": 0.4928, "step": 24040 }, { "epoch": 5.952970297029703, "grad_norm": 0.7124633193016052, "learning_rate": 9.394151308942005e-05, "loss": 0.4923, "step": 24050 }, { "epoch": 5.955445544554456, "grad_norm": 0.6507765650749207, "learning_rate": 9.393493702460416e-05, "loss": 0.4893, "step": 24060 }, { "epoch": 5.957920792079208, "grad_norm": 0.656505286693573, "learning_rate": 9.392835762321977e-05, "loss": 0.4913, "step": 24070 }, { "epoch": 5.96039603960396, "grad_norm": 0.6479337215423584, "learning_rate": 9.392177488576653e-05, "loss": 0.4902, "step": 24080 }, { "epoch": 5.962871287128713, "grad_norm": 0.6815648674964905, "learning_rate": 9.391518881274433e-05, "loss": 0.4969, "step": 24090 }, { "epoch": 5.965346534653465, "grad_norm": 0.7079155445098877, "learning_rate": 9.390859940465335e-05, "loss": 0.4903, "step": 24100 }, { "epoch": 5.967821782178218, "grad_norm": 0.6621034741401672, "learning_rate": 9.390200666199402e-05, "loss": 0.4929, "step": 24110 }, { "epoch": 5.97029702970297, "grad_norm": 0.6282349228858948, "learning_rate": 9.389541058526703e-05, "loss": 0.4891, "step": 24120 }, { "epoch": 5.9727722772277225, "grad_norm": 0.6392198204994202, "learning_rate": 9.388881117497327e-05, "loss": 0.4974, "step": 24130 }, { "epoch": 5.975247524752476, "grad_norm": 0.6531199812889099, "learning_rate": 9.388220843161395e-05, "loss": 0.49, "step": 24140 }, { "epoch": 5.977722772277228, "grad_norm": 0.6396721005439758, "learning_rate": 9.387560235569049e-05, "loss": 0.5022, "step": 24150 }, { "epoch": 5.98019801980198, "grad_norm": 0.7001849412918091, "learning_rate": 9.38689929477046e-05, "loss": 0.4901, "step": 24160 }, { "epoch": 5.982673267326732, "grad_norm": 0.6312510371208191, "learning_rate": 9.386238020815819e-05, "loss": 0.4986, "step": 24170 }, { "epoch": 5.985148514851485, "grad_norm": 0.6960650682449341, "learning_rate": 9.385576413755349e-05, "loss": 0.4984, "step": 24180 }, { "epoch": 5.987623762376238, "grad_norm": 0.679093599319458, "learning_rate": 9.38491447363929e-05, "loss": 0.4926, "step": 24190 }, { "epoch": 5.99009900990099, "grad_norm": 0.7105864882469177, "learning_rate": 9.384252200517915e-05, "loss": 0.4909, "step": 24200 }, { "epoch": 5.992574257425742, "grad_norm": 0.6939651966094971, "learning_rate": 9.383589594441521e-05, "loss": 0.49, "step": 24210 }, { "epoch": 5.9950495049504955, "grad_norm": 0.6528273820877075, "learning_rate": 9.382926655460425e-05, "loss": 0.4916, "step": 24220 }, { "epoch": 5.997524752475248, "grad_norm": 0.7271142601966858, "learning_rate": 9.382263383624975e-05, "loss": 0.4869, "step": 24230 }, { "epoch": 6.0, "grad_norm": 0.6372568011283875, "learning_rate": 9.381599778985541e-05, "loss": 0.4892, "step": 24240 }, { "epoch": 6.002475247524752, "grad_norm": 0.6497102379798889, "learning_rate": 9.38093584159252e-05, "loss": 0.4875, "step": 24250 }, { "epoch": 6.0049504950495045, "grad_norm": 0.6621847152709961, "learning_rate": 9.380271571496334e-05, "loss": 0.4994, "step": 24260 }, { "epoch": 6.007425742574258, "grad_norm": 0.657985270023346, "learning_rate": 9.379606968747428e-05, "loss": 0.4987, "step": 24270 }, { "epoch": 6.00990099009901, "grad_norm": 0.6807758808135986, "learning_rate": 9.378942033396276e-05, "loss": 0.4931, "step": 24280 }, { "epoch": 6.012376237623762, "grad_norm": 0.6618520021438599, "learning_rate": 9.378276765493375e-05, "loss": 0.495, "step": 24290 }, { "epoch": 6.014851485148514, "grad_norm": 0.6710907816886902, "learning_rate": 9.377611165089249e-05, "loss": 0.4853, "step": 24300 }, { "epoch": 6.017326732673268, "grad_norm": 0.6685876250267029, "learning_rate": 9.376945232234445e-05, "loss": 0.4932, "step": 24310 }, { "epoch": 6.01980198019802, "grad_norm": 0.6695975661277771, "learning_rate": 9.376278966979535e-05, "loss": 0.4884, "step": 24320 }, { "epoch": 6.022277227722772, "grad_norm": 0.6232790350914001, "learning_rate": 9.375612369375118e-05, "loss": 0.4904, "step": 24330 }, { "epoch": 6.024752475247524, "grad_norm": 0.6870796084403992, "learning_rate": 9.374945439471818e-05, "loss": 0.49, "step": 24340 }, { "epoch": 6.0272277227722775, "grad_norm": 0.6910142302513123, "learning_rate": 9.374278177320284e-05, "loss": 0.4947, "step": 24350 }, { "epoch": 6.02970297029703, "grad_norm": 0.6701478958129883, "learning_rate": 9.373610582971191e-05, "loss": 0.4872, "step": 24360 }, { "epoch": 6.032178217821782, "grad_norm": 0.6414119601249695, "learning_rate": 9.372942656475236e-05, "loss": 0.4925, "step": 24370 }, { "epoch": 6.034653465346534, "grad_norm": 0.6722452044487, "learning_rate": 9.372274397883147e-05, "loss": 0.4908, "step": 24380 }, { "epoch": 6.037128712871287, "grad_norm": 0.6733793020248413, "learning_rate": 9.371605807245671e-05, "loss": 0.4963, "step": 24390 }, { "epoch": 6.03960396039604, "grad_norm": 0.6465936899185181, "learning_rate": 9.370936884613585e-05, "loss": 0.4927, "step": 24400 }, { "epoch": 6.042079207920792, "grad_norm": 0.6408087015151978, "learning_rate": 9.370267630037688e-05, "loss": 0.496, "step": 24410 }, { "epoch": 6.044554455445544, "grad_norm": 0.6562625169754028, "learning_rate": 9.369598043568804e-05, "loss": 0.4883, "step": 24420 }, { "epoch": 6.047029702970297, "grad_norm": 0.6428780555725098, "learning_rate": 9.368928125257787e-05, "loss": 0.4917, "step": 24430 }, { "epoch": 6.0495049504950495, "grad_norm": 0.7080708742141724, "learning_rate": 9.36825787515551e-05, "loss": 0.4858, "step": 24440 }, { "epoch": 6.051980198019802, "grad_norm": 0.68021160364151, "learning_rate": 9.367587293312878e-05, "loss": 0.4901, "step": 24450 }, { "epoch": 6.054455445544554, "grad_norm": 0.7010735869407654, "learning_rate": 9.366916379780812e-05, "loss": 0.493, "step": 24460 }, { "epoch": 6.056930693069307, "grad_norm": 0.6536318063735962, "learning_rate": 9.366245134610268e-05, "loss": 0.4939, "step": 24470 }, { "epoch": 6.0594059405940595, "grad_norm": 0.6788082718849182, "learning_rate": 9.36557355785222e-05, "loss": 0.4949, "step": 24480 }, { "epoch": 6.061881188118812, "grad_norm": 0.7044695615768433, "learning_rate": 9.36490164955767e-05, "loss": 0.4962, "step": 24490 }, { "epoch": 6.064356435643564, "grad_norm": 0.6214095950126648, "learning_rate": 9.364229409777646e-05, "loss": 0.4894, "step": 24500 }, { "epoch": 6.066831683168317, "grad_norm": 0.6542657017707825, "learning_rate": 9.3635568385632e-05, "loss": 0.4907, "step": 24510 }, { "epoch": 6.069306930693069, "grad_norm": 0.652982771396637, "learning_rate": 9.362883935965409e-05, "loss": 0.4893, "step": 24520 }, { "epoch": 6.071782178217822, "grad_norm": 0.6343420147895813, "learning_rate": 9.362210702035374e-05, "loss": 0.4961, "step": 24530 }, { "epoch": 6.074257425742574, "grad_norm": 0.6828444004058838, "learning_rate": 9.361537136824226e-05, "loss": 0.4988, "step": 24540 }, { "epoch": 6.076732673267327, "grad_norm": 0.6403016448020935, "learning_rate": 9.360863240383117e-05, "loss": 0.4938, "step": 24550 }, { "epoch": 6.079207920792079, "grad_norm": 0.6503271460533142, "learning_rate": 9.360189012763223e-05, "loss": 0.4952, "step": 24560 }, { "epoch": 6.0816831683168315, "grad_norm": 0.7098903059959412, "learning_rate": 9.359514454015747e-05, "loss": 0.4923, "step": 24570 }, { "epoch": 6.084158415841584, "grad_norm": 0.639106810092926, "learning_rate": 9.35883956419192e-05, "loss": 0.4966, "step": 24580 }, { "epoch": 6.086633663366337, "grad_norm": 0.6759112477302551, "learning_rate": 9.358164343342993e-05, "loss": 0.4953, "step": 24590 }, { "epoch": 6.089108910891089, "grad_norm": 0.6764036417007446, "learning_rate": 9.357488791520246e-05, "loss": 0.4912, "step": 24600 }, { "epoch": 6.091584158415841, "grad_norm": 0.6553007364273071, "learning_rate": 9.356812908774983e-05, "loss": 0.4891, "step": 24610 }, { "epoch": 6.094059405940594, "grad_norm": 0.6521545052528381, "learning_rate": 9.356136695158531e-05, "loss": 0.4947, "step": 24620 }, { "epoch": 6.096534653465347, "grad_norm": 0.6755918264389038, "learning_rate": 9.355460150722245e-05, "loss": 0.4939, "step": 24630 }, { "epoch": 6.099009900990099, "grad_norm": 0.6334181427955627, "learning_rate": 9.354783275517504e-05, "loss": 0.4927, "step": 24640 }, { "epoch": 6.101485148514851, "grad_norm": 0.6679133772850037, "learning_rate": 9.354106069595714e-05, "loss": 0.4994, "step": 24650 }, { "epoch": 6.103960396039604, "grad_norm": 0.6724262237548828, "learning_rate": 9.353428533008301e-05, "loss": 0.4913, "step": 24660 }, { "epoch": 6.106435643564357, "grad_norm": 0.6735829710960388, "learning_rate": 9.352750665806723e-05, "loss": 0.4894, "step": 24670 }, { "epoch": 6.108910891089109, "grad_norm": 0.6622719764709473, "learning_rate": 9.352072468042454e-05, "loss": 0.4916, "step": 24680 }, { "epoch": 6.111386138613861, "grad_norm": 0.6264487504959106, "learning_rate": 9.351393939767004e-05, "loss": 0.4924, "step": 24690 }, { "epoch": 6.1138613861386135, "grad_norm": 0.6623631715774536, "learning_rate": 9.350715081031902e-05, "loss": 0.4874, "step": 24700 }, { "epoch": 6.116336633663367, "grad_norm": 0.6762424111366272, "learning_rate": 9.3500358918887e-05, "loss": 0.4933, "step": 24710 }, { "epoch": 6.118811881188119, "grad_norm": 0.6717430949211121, "learning_rate": 9.349356372388981e-05, "loss": 0.4951, "step": 24720 }, { "epoch": 6.121287128712871, "grad_norm": 0.6746101379394531, "learning_rate": 9.348676522584349e-05, "loss": 0.498, "step": 24730 }, { "epoch": 6.123762376237623, "grad_norm": 0.655380368232727, "learning_rate": 9.347996342526432e-05, "loss": 0.4925, "step": 24740 }, { "epoch": 6.126237623762377, "grad_norm": 0.6535238027572632, "learning_rate": 9.347315832266889e-05, "loss": 0.4916, "step": 24750 }, { "epoch": 6.128712871287129, "grad_norm": 0.6670726537704468, "learning_rate": 9.346634991857398e-05, "loss": 0.4912, "step": 24760 }, { "epoch": 6.131188118811881, "grad_norm": 0.7049589157104492, "learning_rate": 9.345953821349664e-05, "loss": 0.4896, "step": 24770 }, { "epoch": 6.133663366336633, "grad_norm": 0.6766095161437988, "learning_rate": 9.34527232079542e-05, "loss": 0.4994, "step": 24780 }, { "epoch": 6.1361386138613865, "grad_norm": 0.6501331925392151, "learning_rate": 9.344590490246417e-05, "loss": 0.4927, "step": 24790 }, { "epoch": 6.138613861386139, "grad_norm": 0.6780247688293457, "learning_rate": 9.343908329754438e-05, "loss": 0.4919, "step": 24800 }, { "epoch": 6.141089108910891, "grad_norm": 0.6131954193115234, "learning_rate": 9.34322583937129e-05, "loss": 0.49, "step": 24810 }, { "epoch": 6.143564356435643, "grad_norm": 0.6923993229866028, "learning_rate": 9.342543019148803e-05, "loss": 0.4926, "step": 24820 }, { "epoch": 6.146039603960396, "grad_norm": 0.6479511260986328, "learning_rate": 9.341859869138831e-05, "loss": 0.4923, "step": 24830 }, { "epoch": 6.148514851485149, "grad_norm": 0.6252142786979675, "learning_rate": 9.341176389393256e-05, "loss": 0.4899, "step": 24840 }, { "epoch": 6.150990099009901, "grad_norm": 0.6617873311042786, "learning_rate": 9.340492579963982e-05, "loss": 0.4924, "step": 24850 }, { "epoch": 6.153465346534653, "grad_norm": 0.6561679244041443, "learning_rate": 9.339808440902942e-05, "loss": 0.488, "step": 24860 }, { "epoch": 6.155940594059406, "grad_norm": 0.6497090458869934, "learning_rate": 9.339123972262093e-05, "loss": 0.4848, "step": 24870 }, { "epoch": 6.158415841584159, "grad_norm": 0.683704674243927, "learning_rate": 9.338439174093412e-05, "loss": 0.4897, "step": 24880 }, { "epoch": 6.160891089108911, "grad_norm": 0.6555683612823486, "learning_rate": 9.337754046448908e-05, "loss": 0.496, "step": 24890 }, { "epoch": 6.163366336633663, "grad_norm": 0.6415319442749023, "learning_rate": 9.33706858938061e-05, "loss": 0.4934, "step": 24900 }, { "epoch": 6.165841584158416, "grad_norm": 0.6786409616470337, "learning_rate": 9.336382802940575e-05, "loss": 0.4878, "step": 24910 }, { "epoch": 6.1683168316831685, "grad_norm": 0.7164464592933655, "learning_rate": 9.335696687180886e-05, "loss": 0.5008, "step": 24920 }, { "epoch": 6.170792079207921, "grad_norm": 0.6412750482559204, "learning_rate": 9.335010242153644e-05, "loss": 0.4937, "step": 24930 }, { "epoch": 6.173267326732673, "grad_norm": 0.6680976748466492, "learning_rate": 9.334323467910984e-05, "loss": 0.4878, "step": 24940 }, { "epoch": 6.175742574257426, "grad_norm": 0.652101457118988, "learning_rate": 9.33363636450506e-05, "loss": 0.4951, "step": 24950 }, { "epoch": 6.178217821782178, "grad_norm": 0.6529828906059265, "learning_rate": 9.332948931988054e-05, "loss": 0.4932, "step": 24960 }, { "epoch": 6.180693069306931, "grad_norm": 0.647549569606781, "learning_rate": 9.332261170412171e-05, "loss": 0.4899, "step": 24970 }, { "epoch": 6.183168316831683, "grad_norm": 0.7107653021812439, "learning_rate": 9.331573079829643e-05, "loss": 0.4877, "step": 24980 }, { "epoch": 6.185643564356436, "grad_norm": 0.6961306929588318, "learning_rate": 9.330884660292724e-05, "loss": 0.4948, "step": 24990 }, { "epoch": 6.188118811881188, "grad_norm": 0.6503212451934814, "learning_rate": 9.330195911853699e-05, "loss": 0.4901, "step": 25000 }, { "epoch": 6.1905940594059405, "grad_norm": 0.6334972977638245, "learning_rate": 9.32950683456487e-05, "loss": 0.4903, "step": 25010 }, { "epoch": 6.193069306930693, "grad_norm": 0.6619696021080017, "learning_rate": 9.328817428478569e-05, "loss": 0.4886, "step": 25020 }, { "epoch": 6.195544554455446, "grad_norm": 0.6590623259544373, "learning_rate": 9.328127693647153e-05, "loss": 0.4896, "step": 25030 }, { "epoch": 6.198019801980198, "grad_norm": 0.6700637936592102, "learning_rate": 9.327437630123e-05, "loss": 0.4864, "step": 25040 }, { "epoch": 6.2004950495049505, "grad_norm": 0.6782771348953247, "learning_rate": 9.32674723795852e-05, "loss": 0.4926, "step": 25050 }, { "epoch": 6.202970297029703, "grad_norm": 0.6580089926719666, "learning_rate": 9.326056517206138e-05, "loss": 0.4904, "step": 25060 }, { "epoch": 6.205445544554456, "grad_norm": 0.6386672258377075, "learning_rate": 9.325365467918316e-05, "loss": 0.4928, "step": 25070 }, { "epoch": 6.207920792079208, "grad_norm": 0.68783038854599, "learning_rate": 9.324674090147531e-05, "loss": 0.4922, "step": 25080 }, { "epoch": 6.21039603960396, "grad_norm": 0.7212231755256653, "learning_rate": 9.323982383946288e-05, "loss": 0.496, "step": 25090 }, { "epoch": 6.212871287128713, "grad_norm": 0.6984595060348511, "learning_rate": 9.32329034936712e-05, "loss": 0.4921, "step": 25100 }, { "epoch": 6.215346534653466, "grad_norm": 0.6002871990203857, "learning_rate": 9.322597986462582e-05, "loss": 0.4909, "step": 25110 }, { "epoch": 6.217821782178218, "grad_norm": 0.6647087931632996, "learning_rate": 9.321905295285251e-05, "loss": 0.4919, "step": 25120 }, { "epoch": 6.22029702970297, "grad_norm": 0.6376528143882751, "learning_rate": 9.321212275887736e-05, "loss": 0.4871, "step": 25130 }, { "epoch": 6.2227722772277225, "grad_norm": 0.6301149129867554, "learning_rate": 9.320518928322668e-05, "loss": 0.4957, "step": 25140 }, { "epoch": 6.225247524752476, "grad_norm": 0.6285369396209717, "learning_rate": 9.319825252642697e-05, "loss": 0.4909, "step": 25150 }, { "epoch": 6.227722772277228, "grad_norm": 0.635596752166748, "learning_rate": 9.31913124890051e-05, "loss": 0.493, "step": 25160 }, { "epoch": 6.23019801980198, "grad_norm": 0.5980125069618225, "learning_rate": 9.318436917148806e-05, "loss": 0.4911, "step": 25170 }, { "epoch": 6.232673267326732, "grad_norm": 0.6543392539024353, "learning_rate": 9.317742257440318e-05, "loss": 0.4924, "step": 25180 }, { "epoch": 6.235148514851485, "grad_norm": 0.6598267555236816, "learning_rate": 9.317047269827801e-05, "loss": 0.4921, "step": 25190 }, { "epoch": 6.237623762376238, "grad_norm": 0.6703551411628723, "learning_rate": 9.316351954364033e-05, "loss": 0.4987, "step": 25200 }, { "epoch": 6.24009900990099, "grad_norm": 0.6835122108459473, "learning_rate": 9.31565631110182e-05, "loss": 0.4938, "step": 25210 }, { "epoch": 6.242574257425742, "grad_norm": 0.6419340968132019, "learning_rate": 9.31496034009399e-05, "loss": 0.4883, "step": 25220 }, { "epoch": 6.2450495049504955, "grad_norm": 0.679504930973053, "learning_rate": 9.314264041393398e-05, "loss": 0.4967, "step": 25230 }, { "epoch": 6.247524752475248, "grad_norm": 0.6661419868469238, "learning_rate": 9.313567415052926e-05, "loss": 0.4939, "step": 25240 }, { "epoch": 6.25, "grad_norm": 0.6338697671890259, "learning_rate": 9.312870461125475e-05, "loss": 0.4912, "step": 25250 }, { "epoch": 6.252475247524752, "grad_norm": 0.6390149593353271, "learning_rate": 9.312173179663974e-05, "loss": 0.4896, "step": 25260 }, { "epoch": 6.2549504950495045, "grad_norm": 0.6276742219924927, "learning_rate": 9.311475570721378e-05, "loss": 0.4851, "step": 25270 }, { "epoch": 6.257425742574258, "grad_norm": 0.6217899322509766, "learning_rate": 9.310777634350667e-05, "loss": 0.4964, "step": 25280 }, { "epoch": 6.25990099009901, "grad_norm": 0.6407887935638428, "learning_rate": 9.310079370604842e-05, "loss": 0.4882, "step": 25290 }, { "epoch": 6.262376237623762, "grad_norm": 0.6487254500389099, "learning_rate": 9.309380779536933e-05, "loss": 0.4883, "step": 25300 }, { "epoch": 6.264851485148515, "grad_norm": 0.6450498104095459, "learning_rate": 9.308681861199993e-05, "loss": 0.4939, "step": 25310 }, { "epoch": 6.267326732673268, "grad_norm": 0.6260886788368225, "learning_rate": 9.307982615647101e-05, "loss": 0.49, "step": 25320 }, { "epoch": 6.26980198019802, "grad_norm": 0.6892823576927185, "learning_rate": 9.30728304293136e-05, "loss": 0.4915, "step": 25330 }, { "epoch": 6.272277227722772, "grad_norm": 0.664807140827179, "learning_rate": 9.306583143105897e-05, "loss": 0.49, "step": 25340 }, { "epoch": 6.274752475247524, "grad_norm": 0.6513121724128723, "learning_rate": 9.305882916223865e-05, "loss": 0.493, "step": 25350 }, { "epoch": 6.2772277227722775, "grad_norm": 0.6260848641395569, "learning_rate": 9.305182362338443e-05, "loss": 0.4951, "step": 25360 }, { "epoch": 6.27970297029703, "grad_norm": 0.6537912487983704, "learning_rate": 9.304481481502831e-05, "loss": 0.4881, "step": 25370 }, { "epoch": 6.282178217821782, "grad_norm": 0.6462581753730774, "learning_rate": 9.30378027377026e-05, "loss": 0.4892, "step": 25380 }, { "epoch": 6.284653465346534, "grad_norm": 0.6405371427536011, "learning_rate": 9.303078739193977e-05, "loss": 0.4899, "step": 25390 }, { "epoch": 6.287128712871287, "grad_norm": 0.6394467949867249, "learning_rate": 9.302376877827263e-05, "loss": 0.4915, "step": 25400 }, { "epoch": 6.28960396039604, "grad_norm": 0.6791539192199707, "learning_rate": 9.301674689723417e-05, "loss": 0.4912, "step": 25410 }, { "epoch": 6.292079207920792, "grad_norm": 0.6501572132110596, "learning_rate": 9.300972174935767e-05, "loss": 0.4923, "step": 25420 }, { "epoch": 6.294554455445544, "grad_norm": 0.6202908158302307, "learning_rate": 9.300269333517667e-05, "loss": 0.4866, "step": 25430 }, { "epoch": 6.297029702970297, "grad_norm": 0.6892065405845642, "learning_rate": 9.299566165522486e-05, "loss": 0.4905, "step": 25440 }, { "epoch": 6.2995049504950495, "grad_norm": 0.6453456878662109, "learning_rate": 9.298862671003632e-05, "loss": 0.491, "step": 25450 }, { "epoch": 6.301980198019802, "grad_norm": 0.6597932577133179, "learning_rate": 9.298158850014529e-05, "loss": 0.4885, "step": 25460 }, { "epoch": 6.304455445544554, "grad_norm": 0.6508418321609497, "learning_rate": 9.297454702608625e-05, "loss": 0.49, "step": 25470 }, { "epoch": 6.306930693069307, "grad_norm": 0.6360312104225159, "learning_rate": 9.296750228839398e-05, "loss": 0.4983, "step": 25480 }, { "epoch": 6.3094059405940595, "grad_norm": 0.6611862778663635, "learning_rate": 9.296045428760347e-05, "loss": 0.4884, "step": 25490 }, { "epoch": 6.311881188118812, "grad_norm": 0.743607759475708, "learning_rate": 9.295340302424996e-05, "loss": 0.4932, "step": 25500 }, { "epoch": 6.314356435643564, "grad_norm": 0.6791080832481384, "learning_rate": 9.294634849886898e-05, "loss": 0.4963, "step": 25510 }, { "epoch": 6.316831683168317, "grad_norm": 0.6653355360031128, "learning_rate": 9.293929071199622e-05, "loss": 0.4954, "step": 25520 }, { "epoch": 6.319306930693069, "grad_norm": 0.6736785173416138, "learning_rate": 9.293222966416774e-05, "loss": 0.4919, "step": 25530 }, { "epoch": 6.321782178217822, "grad_norm": 0.6455187797546387, "learning_rate": 9.292516535591973e-05, "loss": 0.4864, "step": 25540 }, { "epoch": 6.324257425742574, "grad_norm": 0.636764645576477, "learning_rate": 9.29180977877887e-05, "loss": 0.4935, "step": 25550 }, { "epoch": 6.326732673267327, "grad_norm": 0.6389903426170349, "learning_rate": 9.291102696031136e-05, "loss": 0.4931, "step": 25560 }, { "epoch": 6.329207920792079, "grad_norm": 0.6986674070358276, "learning_rate": 9.290395287402472e-05, "loss": 0.4905, "step": 25570 }, { "epoch": 6.3316831683168315, "grad_norm": 0.6972480416297913, "learning_rate": 9.289687552946601e-05, "loss": 0.4874, "step": 25580 }, { "epoch": 6.334158415841584, "grad_norm": 0.6341923475265503, "learning_rate": 9.28897949271727e-05, "loss": 0.4928, "step": 25590 }, { "epoch": 6.336633663366337, "grad_norm": 0.6156107187271118, "learning_rate": 9.288271106768249e-05, "loss": 0.4833, "step": 25600 }, { "epoch": 6.339108910891089, "grad_norm": 0.682471513748169, "learning_rate": 9.287562395153339e-05, "loss": 0.4877, "step": 25610 }, { "epoch": 6.341584158415841, "grad_norm": 0.619806170463562, "learning_rate": 9.286853357926361e-05, "loss": 0.4914, "step": 25620 }, { "epoch": 6.344059405940594, "grad_norm": 0.6142368912696838, "learning_rate": 9.286143995141161e-05, "loss": 0.4876, "step": 25630 }, { "epoch": 6.346534653465347, "grad_norm": 0.6609285473823547, "learning_rate": 9.28543430685161e-05, "loss": 0.4805, "step": 25640 }, { "epoch": 6.349009900990099, "grad_norm": 0.625974178314209, "learning_rate": 9.284724293111605e-05, "loss": 0.4867, "step": 25650 }, { "epoch": 6.351485148514851, "grad_norm": 0.653078019618988, "learning_rate": 9.284013953975068e-05, "loss": 0.4908, "step": 25660 }, { "epoch": 6.353960396039604, "grad_norm": 0.6692869663238525, "learning_rate": 9.283303289495942e-05, "loss": 0.484, "step": 25670 }, { "epoch": 6.356435643564357, "grad_norm": 0.6666257381439209, "learning_rate": 9.282592299728198e-05, "loss": 0.4897, "step": 25680 }, { "epoch": 6.358910891089109, "grad_norm": 0.6990106105804443, "learning_rate": 9.281880984725832e-05, "loss": 0.488, "step": 25690 }, { "epoch": 6.361386138613861, "grad_norm": 0.6461585164070129, "learning_rate": 9.281169344542863e-05, "loss": 0.4884, "step": 25700 }, { "epoch": 6.3638613861386135, "grad_norm": 0.6252250671386719, "learning_rate": 9.280457379233336e-05, "loss": 0.4899, "step": 25710 }, { "epoch": 6.366336633663367, "grad_norm": 0.6767966747283936, "learning_rate": 9.27974508885132e-05, "loss": 0.4916, "step": 25720 }, { "epoch": 6.368811881188119, "grad_norm": 0.6934974193572998, "learning_rate": 9.279032473450907e-05, "loss": 0.492, "step": 25730 }, { "epoch": 6.371287128712871, "grad_norm": 0.6719532608985901, "learning_rate": 9.278319533086217e-05, "loss": 0.4859, "step": 25740 }, { "epoch": 6.373762376237623, "grad_norm": 0.6457440853118896, "learning_rate": 9.277606267811394e-05, "loss": 0.4897, "step": 25750 }, { "epoch": 6.376237623762377, "grad_norm": 0.7024503946304321, "learning_rate": 9.276892677680605e-05, "loss": 0.4884, "step": 25760 }, { "epoch": 6.378712871287129, "grad_norm": 0.6568511724472046, "learning_rate": 9.276178762748041e-05, "loss": 0.4935, "step": 25770 }, { "epoch": 6.381188118811881, "grad_norm": 0.604296088218689, "learning_rate": 9.27546452306792e-05, "loss": 0.4889, "step": 25780 }, { "epoch": 6.383663366336633, "grad_norm": 0.641829788684845, "learning_rate": 9.274749958694485e-05, "loss": 0.4901, "step": 25790 }, { "epoch": 6.3861386138613865, "grad_norm": 0.603900134563446, "learning_rate": 9.274035069682001e-05, "loss": 0.4893, "step": 25800 }, { "epoch": 6.388613861386139, "grad_norm": 0.6840150952339172, "learning_rate": 9.273319856084761e-05, "loss": 0.4893, "step": 25810 }, { "epoch": 6.391089108910891, "grad_norm": 0.6736915707588196, "learning_rate": 9.272604317957078e-05, "loss": 0.4945, "step": 25820 }, { "epoch": 6.393564356435643, "grad_norm": 0.6141803860664368, "learning_rate": 9.271888455353295e-05, "loss": 0.4956, "step": 25830 }, { "epoch": 6.396039603960396, "grad_norm": 0.6472374796867371, "learning_rate": 9.271172268327774e-05, "loss": 0.4954, "step": 25840 }, { "epoch": 6.398514851485149, "grad_norm": 0.6247477531433105, "learning_rate": 9.270455756934907e-05, "loss": 0.4898, "step": 25850 }, { "epoch": 6.400990099009901, "grad_norm": 0.6438142657279968, "learning_rate": 9.269738921229108e-05, "loss": 0.4916, "step": 25860 }, { "epoch": 6.403465346534653, "grad_norm": 0.6714075803756714, "learning_rate": 9.269021761264815e-05, "loss": 0.4918, "step": 25870 }, { "epoch": 6.405940594059406, "grad_norm": 0.6330602169036865, "learning_rate": 9.268304277096495e-05, "loss": 0.4878, "step": 25880 }, { "epoch": 6.408415841584159, "grad_norm": 0.6422138214111328, "learning_rate": 9.26758646877863e-05, "loss": 0.4922, "step": 25890 }, { "epoch": 6.410891089108911, "grad_norm": 0.6809672713279724, "learning_rate": 9.26686833636574e-05, "loss": 0.4864, "step": 25900 }, { "epoch": 6.413366336633663, "grad_norm": 0.7084232568740845, "learning_rate": 9.266149879912356e-05, "loss": 0.4983, "step": 25910 }, { "epoch": 6.415841584158416, "grad_norm": 0.6422600150108337, "learning_rate": 9.265431099473044e-05, "loss": 0.4912, "step": 25920 }, { "epoch": 6.4183168316831685, "grad_norm": 0.6345969438552856, "learning_rate": 9.264711995102388e-05, "loss": 0.4888, "step": 25930 }, { "epoch": 6.420792079207921, "grad_norm": 0.6977962851524353, "learning_rate": 9.263992566855004e-05, "loss": 0.4953, "step": 25940 }, { "epoch": 6.423267326732673, "grad_norm": 0.6385856866836548, "learning_rate": 9.26327281478552e-05, "loss": 0.4911, "step": 25950 }, { "epoch": 6.425742574257426, "grad_norm": 0.6841159462928772, "learning_rate": 9.262552738948605e-05, "loss": 0.4972, "step": 25960 }, { "epoch": 6.428217821782178, "grad_norm": 0.6395867466926575, "learning_rate": 9.261832339398938e-05, "loss": 0.4949, "step": 25970 }, { "epoch": 6.430693069306931, "grad_norm": 0.6232658624649048, "learning_rate": 9.26111161619123e-05, "loss": 0.4855, "step": 25980 }, { "epoch": 6.433168316831683, "grad_norm": 0.7298741936683655, "learning_rate": 9.260390569380215e-05, "loss": 0.4937, "step": 25990 }, { "epoch": 6.435643564356436, "grad_norm": 0.642652690410614, "learning_rate": 9.259669199020653e-05, "loss": 0.4941, "step": 26000 }, { "epoch": 6.438118811881188, "grad_norm": 0.6610262393951416, "learning_rate": 9.258947505167328e-05, "loss": 0.4905, "step": 26010 }, { "epoch": 6.4405940594059405, "grad_norm": 0.6203672885894775, "learning_rate": 9.258225487875045e-05, "loss": 0.4922, "step": 26020 }, { "epoch": 6.443069306930693, "grad_norm": 0.6515893340110779, "learning_rate": 9.257503147198638e-05, "loss": 0.4891, "step": 26030 }, { "epoch": 6.445544554455446, "grad_norm": 0.6358610391616821, "learning_rate": 9.256780483192965e-05, "loss": 0.487, "step": 26040 }, { "epoch": 6.448019801980198, "grad_norm": 0.6218585968017578, "learning_rate": 9.256057495912906e-05, "loss": 0.4889, "step": 26050 }, { "epoch": 6.4504950495049505, "grad_norm": 0.6546818614006042, "learning_rate": 9.255334185413368e-05, "loss": 0.4941, "step": 26060 }, { "epoch": 6.452970297029703, "grad_norm": 0.6436471343040466, "learning_rate": 9.254610551749282e-05, "loss": 0.4864, "step": 26070 }, { "epoch": 6.455445544554456, "grad_norm": 0.6662817001342773, "learning_rate": 9.253886594975602e-05, "loss": 0.4883, "step": 26080 }, { "epoch": 6.457920792079208, "grad_norm": 0.6165592670440674, "learning_rate": 9.253162315147308e-05, "loss": 0.4939, "step": 26090 }, { "epoch": 6.46039603960396, "grad_norm": 0.5739704966545105, "learning_rate": 9.252437712319405e-05, "loss": 0.4883, "step": 26100 }, { "epoch": 6.462871287128713, "grad_norm": 0.6447372436523438, "learning_rate": 9.251712786546923e-05, "loss": 0.4906, "step": 26110 }, { "epoch": 6.465346534653466, "grad_norm": 0.6316452622413635, "learning_rate": 9.250987537884911e-05, "loss": 0.4854, "step": 26120 }, { "epoch": 6.467821782178218, "grad_norm": 0.6485469937324524, "learning_rate": 9.25026196638845e-05, "loss": 0.4957, "step": 26130 }, { "epoch": 6.47029702970297, "grad_norm": 0.6707162857055664, "learning_rate": 9.249536072112643e-05, "loss": 0.4955, "step": 26140 }, { "epoch": 6.4727722772277225, "grad_norm": 0.6437131762504578, "learning_rate": 9.248809855112615e-05, "loss": 0.4901, "step": 26150 }, { "epoch": 6.475247524752476, "grad_norm": 0.6176459193229675, "learning_rate": 9.248083315443518e-05, "loss": 0.4858, "step": 26160 }, { "epoch": 6.477722772277228, "grad_norm": 0.6345722079277039, "learning_rate": 9.24735645316053e-05, "loss": 0.4879, "step": 26170 }, { "epoch": 6.48019801980198, "grad_norm": 0.6341410875320435, "learning_rate": 9.246629268318847e-05, "loss": 0.4881, "step": 26180 }, { "epoch": 6.482673267326732, "grad_norm": 0.6080365180969238, "learning_rate": 9.245901760973698e-05, "loss": 0.4878, "step": 26190 }, { "epoch": 6.485148514851485, "grad_norm": 0.6481447815895081, "learning_rate": 9.24517393118033e-05, "loss": 0.4866, "step": 26200 }, { "epoch": 6.487623762376238, "grad_norm": 0.6434682011604309, "learning_rate": 9.244445778994018e-05, "loss": 0.4891, "step": 26210 }, { "epoch": 6.49009900990099, "grad_norm": 0.6567414999008179, "learning_rate": 9.243717304470061e-05, "loss": 0.4942, "step": 26220 }, { "epoch": 6.492574257425742, "grad_norm": 0.6230173707008362, "learning_rate": 9.242988507663779e-05, "loss": 0.4945, "step": 26230 }, { "epoch": 6.4950495049504955, "grad_norm": 0.6748486161231995, "learning_rate": 9.242259388630522e-05, "loss": 0.4896, "step": 26240 }, { "epoch": 6.497524752475248, "grad_norm": 0.6592660546302795, "learning_rate": 9.24152994742566e-05, "loss": 0.4939, "step": 26250 }, { "epoch": 6.5, "grad_norm": 0.6679662466049194, "learning_rate": 9.240800184104592e-05, "loss": 0.4852, "step": 26260 }, { "epoch": 6.502475247524752, "grad_norm": 0.6732894778251648, "learning_rate": 9.240070098722735e-05, "loss": 0.4889, "step": 26270 }, { "epoch": 6.5049504950495045, "grad_norm": 0.6247438192367554, "learning_rate": 9.239339691335538e-05, "loss": 0.4941, "step": 26280 }, { "epoch": 6.507425742574258, "grad_norm": 0.607374906539917, "learning_rate": 9.238608961998467e-05, "loss": 0.4938, "step": 26290 }, { "epoch": 6.50990099009901, "grad_norm": 0.7897190451622009, "learning_rate": 9.237877910767019e-05, "loss": 0.4863, "step": 26300 }, { "epoch": 6.512376237623762, "grad_norm": 0.7005212903022766, "learning_rate": 9.23714653769671e-05, "loss": 0.4886, "step": 26310 }, { "epoch": 6.514851485148515, "grad_norm": 0.6553071141242981, "learning_rate": 9.236414842843084e-05, "loss": 0.4937, "step": 26320 }, { "epoch": 6.517326732673268, "grad_norm": 0.6475071310997009, "learning_rate": 9.23568282626171e-05, "loss": 0.4945, "step": 26330 }, { "epoch": 6.51980198019802, "grad_norm": 0.6870328783988953, "learning_rate": 9.234950488008179e-05, "loss": 0.4852, "step": 26340 }, { "epoch": 6.522277227722772, "grad_norm": 0.6530262231826782, "learning_rate": 9.234217828138104e-05, "loss": 0.4928, "step": 26350 }, { "epoch": 6.524752475247524, "grad_norm": 0.6316383481025696, "learning_rate": 9.23348484670713e-05, "loss": 0.4892, "step": 26360 }, { "epoch": 6.5272277227722775, "grad_norm": 0.6046133041381836, "learning_rate": 9.23275154377092e-05, "loss": 0.4941, "step": 26370 }, { "epoch": 6.52970297029703, "grad_norm": 0.7603687047958374, "learning_rate": 9.232017919385164e-05, "loss": 0.4925, "step": 26380 }, { "epoch": 6.532178217821782, "grad_norm": 0.6884335875511169, "learning_rate": 9.231283973605576e-05, "loss": 0.4945, "step": 26390 }, { "epoch": 6.534653465346535, "grad_norm": 0.6536504030227661, "learning_rate": 9.230549706487894e-05, "loss": 0.4852, "step": 26400 }, { "epoch": 6.537128712871287, "grad_norm": 0.6156725287437439, "learning_rate": 9.229815118087881e-05, "loss": 0.4923, "step": 26410 }, { "epoch": 6.53960396039604, "grad_norm": 0.6108664870262146, "learning_rate": 9.229080208461324e-05, "loss": 0.4869, "step": 26420 }, { "epoch": 6.542079207920792, "grad_norm": 0.6960330605506897, "learning_rate": 9.228344977664037e-05, "loss": 0.4888, "step": 26430 }, { "epoch": 6.544554455445544, "grad_norm": 0.6415514945983887, "learning_rate": 9.22760942575185e-05, "loss": 0.4916, "step": 26440 }, { "epoch": 6.547029702970297, "grad_norm": 0.6645393967628479, "learning_rate": 9.22687355278063e-05, "loss": 0.4892, "step": 26450 }, { "epoch": 6.5495049504950495, "grad_norm": 0.6339165568351746, "learning_rate": 9.226137358806257e-05, "loss": 0.4913, "step": 26460 }, { "epoch": 6.551980198019802, "grad_norm": 0.661178708076477, "learning_rate": 9.225400843884641e-05, "loss": 0.4944, "step": 26470 }, { "epoch": 6.554455445544555, "grad_norm": 0.6299194097518921, "learning_rate": 9.224664008071717e-05, "loss": 0.491, "step": 26480 }, { "epoch": 6.556930693069307, "grad_norm": 0.6603665947914124, "learning_rate": 9.223926851423443e-05, "loss": 0.4894, "step": 26490 }, { "epoch": 6.5594059405940595, "grad_norm": 0.6285718083381653, "learning_rate": 9.223189373995798e-05, "loss": 0.4885, "step": 26500 }, { "epoch": 6.561881188118812, "grad_norm": 0.6715551614761353, "learning_rate": 9.222451575844793e-05, "loss": 0.4914, "step": 26510 }, { "epoch": 6.564356435643564, "grad_norm": 0.6381449699401855, "learning_rate": 9.221713457026455e-05, "loss": 0.4966, "step": 26520 }, { "epoch": 6.566831683168317, "grad_norm": 0.6198201179504395, "learning_rate": 9.220975017596841e-05, "loss": 0.4925, "step": 26530 }, { "epoch": 6.569306930693069, "grad_norm": 0.6215469837188721, "learning_rate": 9.220236257612031e-05, "loss": 0.4915, "step": 26540 }, { "epoch": 6.571782178217822, "grad_norm": 0.6633801460266113, "learning_rate": 9.219497177128127e-05, "loss": 0.4939, "step": 26550 }, { "epoch": 6.574257425742574, "grad_norm": 0.6373787522315979, "learning_rate": 9.21875777620126e-05, "loss": 0.4896, "step": 26560 }, { "epoch": 6.576732673267327, "grad_norm": 0.6229812502861023, "learning_rate": 9.21801805488758e-05, "loss": 0.4947, "step": 26570 }, { "epoch": 6.579207920792079, "grad_norm": 0.6435636878013611, "learning_rate": 9.217278013243266e-05, "loss": 0.4885, "step": 26580 }, { "epoch": 6.5816831683168315, "grad_norm": 0.6370933651924133, "learning_rate": 9.216537651324519e-05, "loss": 0.4869, "step": 26590 }, { "epoch": 6.584158415841584, "grad_norm": 0.6444075703620911, "learning_rate": 9.215796969187564e-05, "loss": 0.4919, "step": 26600 }, { "epoch": 6.586633663366337, "grad_norm": 0.6861947178840637, "learning_rate": 9.215055966888652e-05, "loss": 0.4864, "step": 26610 }, { "epoch": 6.589108910891089, "grad_norm": 0.6314501762390137, "learning_rate": 9.214314644484055e-05, "loss": 0.4901, "step": 26620 }, { "epoch": 6.591584158415841, "grad_norm": 0.6551684141159058, "learning_rate": 9.213573002030072e-05, "loss": 0.4899, "step": 26630 }, { "epoch": 6.594059405940594, "grad_norm": 0.6607920527458191, "learning_rate": 9.212831039583028e-05, "loss": 0.4865, "step": 26640 }, { "epoch": 6.596534653465347, "grad_norm": 0.6525609493255615, "learning_rate": 9.212088757199269e-05, "loss": 0.486, "step": 26650 }, { "epoch": 6.599009900990099, "grad_norm": 0.6541835069656372, "learning_rate": 9.211346154935165e-05, "loss": 0.4884, "step": 26660 }, { "epoch": 6.601485148514851, "grad_norm": 0.6641485691070557, "learning_rate": 9.210603232847112e-05, "loss": 0.4885, "step": 26670 }, { "epoch": 6.603960396039604, "grad_norm": 0.6150855422019958, "learning_rate": 9.209859990991533e-05, "loss": 0.485, "step": 26680 }, { "epoch": 6.606435643564357, "grad_norm": 0.6495112180709839, "learning_rate": 9.209116429424871e-05, "loss": 0.4897, "step": 26690 }, { "epoch": 6.608910891089109, "grad_norm": 0.6795379519462585, "learning_rate": 9.208372548203592e-05, "loss": 0.4837, "step": 26700 }, { "epoch": 6.611386138613861, "grad_norm": 0.6165098547935486, "learning_rate": 9.207628347384191e-05, "loss": 0.4894, "step": 26710 }, { "epoch": 6.6138613861386135, "grad_norm": 0.6441438794136047, "learning_rate": 9.206883827023187e-05, "loss": 0.4946, "step": 26720 }, { "epoch": 6.616336633663367, "grad_norm": 0.6362638473510742, "learning_rate": 9.206138987177118e-05, "loss": 0.4938, "step": 26730 }, { "epoch": 6.618811881188119, "grad_norm": 0.6121320128440857, "learning_rate": 9.205393827902551e-05, "loss": 0.486, "step": 26740 }, { "epoch": 6.621287128712871, "grad_norm": 0.645736575126648, "learning_rate": 9.204648349256076e-05, "loss": 0.4844, "step": 26750 }, { "epoch": 6.623762376237623, "grad_norm": 0.7228016257286072, "learning_rate": 9.203902551294307e-05, "loss": 0.4883, "step": 26760 }, { "epoch": 6.626237623762377, "grad_norm": 0.6455650329589844, "learning_rate": 9.203156434073883e-05, "loss": 0.4827, "step": 26770 }, { "epoch": 6.628712871287129, "grad_norm": 0.6194100379943848, "learning_rate": 9.202409997651465e-05, "loss": 0.4927, "step": 26780 }, { "epoch": 6.631188118811881, "grad_norm": 0.6704721450805664, "learning_rate": 9.201663242083743e-05, "loss": 0.493, "step": 26790 }, { "epoch": 6.633663366336633, "grad_norm": 0.64791339635849, "learning_rate": 9.200916167427425e-05, "loss": 0.4906, "step": 26800 }, { "epoch": 6.6361386138613865, "grad_norm": 0.6906334161758423, "learning_rate": 9.200168773739248e-05, "loss": 0.4906, "step": 26810 }, { "epoch": 6.638613861386139, "grad_norm": 0.6629840731620789, "learning_rate": 9.199421061075972e-05, "loss": 0.4919, "step": 26820 }, { "epoch": 6.641089108910891, "grad_norm": 0.6668376922607422, "learning_rate": 9.19867302949438e-05, "loss": 0.4893, "step": 26830 }, { "epoch": 6.643564356435643, "grad_norm": 0.6219344139099121, "learning_rate": 9.19792467905128e-05, "loss": 0.4852, "step": 26840 }, { "epoch": 6.646039603960396, "grad_norm": 0.64267897605896, "learning_rate": 9.197176009803503e-05, "loss": 0.4858, "step": 26850 }, { "epoch": 6.648514851485149, "grad_norm": 0.5924212336540222, "learning_rate": 9.19642702180791e-05, "loss": 0.491, "step": 26860 }, { "epoch": 6.650990099009901, "grad_norm": 0.6284199953079224, "learning_rate": 9.195677715121376e-05, "loss": 0.4833, "step": 26870 }, { "epoch": 6.653465346534653, "grad_norm": 0.5928601026535034, "learning_rate": 9.19492808980081e-05, "loss": 0.4903, "step": 26880 }, { "epoch": 6.655940594059406, "grad_norm": 0.6321141123771667, "learning_rate": 9.194178145903139e-05, "loss": 0.4883, "step": 26890 }, { "epoch": 6.658415841584159, "grad_norm": 0.5929247140884399, "learning_rate": 9.193427883485317e-05, "loss": 0.4905, "step": 26900 }, { "epoch": 6.660891089108911, "grad_norm": 0.6395718455314636, "learning_rate": 9.192677302604323e-05, "loss": 0.4907, "step": 26910 }, { "epoch": 6.663366336633663, "grad_norm": 0.6146804690361023, "learning_rate": 9.191926403317155e-05, "loss": 0.4857, "step": 26920 }, { "epoch": 6.665841584158416, "grad_norm": 0.6045676469802856, "learning_rate": 9.191175185680843e-05, "loss": 0.4929, "step": 26930 }, { "epoch": 6.6683168316831685, "grad_norm": 0.6197580099105835, "learning_rate": 9.190423649752433e-05, "loss": 0.4911, "step": 26940 }, { "epoch": 6.670792079207921, "grad_norm": 0.6295978426933289, "learning_rate": 9.189671795589003e-05, "loss": 0.4886, "step": 26950 }, { "epoch": 6.673267326732673, "grad_norm": 0.6387589573860168, "learning_rate": 9.188919623247648e-05, "loss": 0.4913, "step": 26960 }, { "epoch": 6.675742574257426, "grad_norm": 0.6329408884048462, "learning_rate": 9.188167132785493e-05, "loss": 0.4906, "step": 26970 }, { "epoch": 6.678217821782178, "grad_norm": 0.6285831928253174, "learning_rate": 9.187414324259684e-05, "loss": 0.4868, "step": 26980 }, { "epoch": 6.680693069306931, "grad_norm": 0.6224207878112793, "learning_rate": 9.186661197727391e-05, "loss": 0.4907, "step": 26990 }, { "epoch": 6.683168316831683, "grad_norm": 0.6471794247627258, "learning_rate": 9.18590775324581e-05, "loss": 0.4877, "step": 27000 }, { "epoch": 6.685643564356436, "grad_norm": 0.6257094740867615, "learning_rate": 9.18515399087216e-05, "loss": 0.4915, "step": 27010 }, { "epoch": 6.688118811881188, "grad_norm": 0.7050691246986389, "learning_rate": 9.184399910663683e-05, "loss": 0.4903, "step": 27020 }, { "epoch": 6.6905940594059405, "grad_norm": 0.5909727811813354, "learning_rate": 9.183645512677648e-05, "loss": 0.484, "step": 27030 }, { "epoch": 6.693069306930693, "grad_norm": 0.6773028373718262, "learning_rate": 9.182890796971346e-05, "loss": 0.485, "step": 27040 }, { "epoch": 6.695544554455445, "grad_norm": 0.6805809736251831, "learning_rate": 9.182135763602091e-05, "loss": 0.4872, "step": 27050 }, { "epoch": 6.698019801980198, "grad_norm": 0.6950858235359192, "learning_rate": 9.181380412627228e-05, "loss": 0.489, "step": 27060 }, { "epoch": 6.7004950495049505, "grad_norm": 0.6589230895042419, "learning_rate": 9.180624744104114e-05, "loss": 0.491, "step": 27070 }, { "epoch": 6.702970297029703, "grad_norm": 0.6734944581985474, "learning_rate": 9.179868758090143e-05, "loss": 0.489, "step": 27080 }, { "epoch": 6.705445544554456, "grad_norm": 0.6410910487174988, "learning_rate": 9.179112454642722e-05, "loss": 0.4927, "step": 27090 }, { "epoch": 6.707920792079208, "grad_norm": 0.682886004447937, "learning_rate": 9.178355833819293e-05, "loss": 0.4894, "step": 27100 }, { "epoch": 6.71039603960396, "grad_norm": 0.6129786372184753, "learning_rate": 9.177598895677309e-05, "loss": 0.4874, "step": 27110 }, { "epoch": 6.712871287128713, "grad_norm": 0.622578501701355, "learning_rate": 9.176841640274262e-05, "loss": 0.4898, "step": 27120 }, { "epoch": 6.715346534653465, "grad_norm": 0.6407453417778015, "learning_rate": 9.176084067667656e-05, "loss": 0.4896, "step": 27130 }, { "epoch": 6.717821782178218, "grad_norm": 0.6417505145072937, "learning_rate": 9.175326177915025e-05, "loss": 0.4938, "step": 27140 }, { "epoch": 6.72029702970297, "grad_norm": 0.628373920917511, "learning_rate": 9.174567971073925e-05, "loss": 0.4935, "step": 27150 }, { "epoch": 6.7227722772277225, "grad_norm": 0.6565329432487488, "learning_rate": 9.173809447201938e-05, "loss": 0.4894, "step": 27160 }, { "epoch": 6.725247524752476, "grad_norm": 0.6729485988616943, "learning_rate": 9.173050606356668e-05, "loss": 0.4887, "step": 27170 }, { "epoch": 6.727722772277228, "grad_norm": 0.6470021605491638, "learning_rate": 9.172291448595745e-05, "loss": 0.4914, "step": 27180 }, { "epoch": 6.73019801980198, "grad_norm": 0.6663278937339783, "learning_rate": 9.171531973976819e-05, "loss": 0.4818, "step": 27190 }, { "epoch": 6.732673267326732, "grad_norm": 0.6332297921180725, "learning_rate": 9.170772182557572e-05, "loss": 0.4933, "step": 27200 }, { "epoch": 6.735148514851485, "grad_norm": 0.5940300226211548, "learning_rate": 9.1700120743957e-05, "loss": 0.4936, "step": 27210 }, { "epoch": 6.737623762376238, "grad_norm": 0.6373118162155151, "learning_rate": 9.169251649548932e-05, "loss": 0.4884, "step": 27220 }, { "epoch": 6.74009900990099, "grad_norm": 0.5925769805908203, "learning_rate": 9.168490908075016e-05, "loss": 0.4914, "step": 27230 }, { "epoch": 6.742574257425742, "grad_norm": 0.6170162558555603, "learning_rate": 9.167729850031726e-05, "loss": 0.4867, "step": 27240 }, { "epoch": 6.7450495049504955, "grad_norm": 0.6315613985061646, "learning_rate": 9.166968475476856e-05, "loss": 0.4888, "step": 27250 }, { "epoch": 6.747524752475248, "grad_norm": 0.6330080032348633, "learning_rate": 9.166206784468232e-05, "loss": 0.4843, "step": 27260 }, { "epoch": 6.75, "grad_norm": 0.6687268614768982, "learning_rate": 9.165444777063696e-05, "loss": 0.4873, "step": 27270 }, { "epoch": 6.752475247524752, "grad_norm": 0.6653782725334167, "learning_rate": 9.16468245332112e-05, "loss": 0.4893, "step": 27280 }, { "epoch": 6.7549504950495045, "grad_norm": 0.6355924010276794, "learning_rate": 9.163919813298395e-05, "loss": 0.4913, "step": 27290 }, { "epoch": 6.757425742574258, "grad_norm": 0.6331538558006287, "learning_rate": 9.16315685705344e-05, "loss": 0.4842, "step": 27300 }, { "epoch": 6.75990099009901, "grad_norm": 0.6247898936271667, "learning_rate": 9.162393584644195e-05, "loss": 0.4862, "step": 27310 }, { "epoch": 6.762376237623762, "grad_norm": 0.655279815196991, "learning_rate": 9.161629996128629e-05, "loss": 0.4921, "step": 27320 }, { "epoch": 6.764851485148515, "grad_norm": 0.6310033798217773, "learning_rate": 9.160866091564728e-05, "loss": 0.4981, "step": 27330 }, { "epoch": 6.767326732673268, "grad_norm": 0.6014136075973511, "learning_rate": 9.160101871010507e-05, "loss": 0.4968, "step": 27340 }, { "epoch": 6.76980198019802, "grad_norm": 0.6631246209144592, "learning_rate": 9.159337334524003e-05, "loss": 0.4943, "step": 27350 }, { "epoch": 6.772277227722772, "grad_norm": 0.6343838572502136, "learning_rate": 9.158572482163277e-05, "loss": 0.4919, "step": 27360 }, { "epoch": 6.774752475247524, "grad_norm": 0.6515481472015381, "learning_rate": 9.157807313986416e-05, "loss": 0.4938, "step": 27370 }, { "epoch": 6.7772277227722775, "grad_norm": 0.6658199429512024, "learning_rate": 9.15704183005153e-05, "loss": 0.4855, "step": 27380 }, { "epoch": 6.77970297029703, "grad_norm": 0.6389538049697876, "learning_rate": 9.156276030416749e-05, "loss": 0.487, "step": 27390 }, { "epoch": 6.782178217821782, "grad_norm": 0.6066263318061829, "learning_rate": 9.155509915140233e-05, "loss": 0.4915, "step": 27400 }, { "epoch": 6.784653465346535, "grad_norm": 0.6542907357215881, "learning_rate": 9.154743484280161e-05, "loss": 0.4919, "step": 27410 }, { "epoch": 6.787128712871287, "grad_norm": 0.6153706312179565, "learning_rate": 9.153976737894743e-05, "loss": 0.4867, "step": 27420 }, { "epoch": 6.78960396039604, "grad_norm": 0.6496124863624573, "learning_rate": 9.153209676042205e-05, "loss": 0.4869, "step": 27430 }, { "epoch": 6.792079207920792, "grad_norm": 0.647873044013977, "learning_rate": 9.152442298780801e-05, "loss": 0.4883, "step": 27440 }, { "epoch": 6.794554455445544, "grad_norm": 0.6212199330329895, "learning_rate": 9.151674606168808e-05, "loss": 0.4923, "step": 27450 }, { "epoch": 6.797029702970297, "grad_norm": 0.6167204976081848, "learning_rate": 9.150906598264527e-05, "loss": 0.4863, "step": 27460 }, { "epoch": 6.7995049504950495, "grad_norm": 0.5963036417961121, "learning_rate": 9.150138275126283e-05, "loss": 0.4834, "step": 27470 }, { "epoch": 6.801980198019802, "grad_norm": 0.6431376934051514, "learning_rate": 9.149369636812427e-05, "loss": 0.4941, "step": 27480 }, { "epoch": 6.804455445544555, "grad_norm": 0.5985509157180786, "learning_rate": 9.14860068338133e-05, "loss": 0.4896, "step": 27490 }, { "epoch": 6.806930693069307, "grad_norm": 0.6085381507873535, "learning_rate": 9.14783141489139e-05, "loss": 0.4843, "step": 27500 }, { "epoch": 6.8094059405940595, "grad_norm": 0.6410961151123047, "learning_rate": 9.147061831401025e-05, "loss": 0.4865, "step": 27510 }, { "epoch": 6.811881188118812, "grad_norm": 0.6825794577598572, "learning_rate": 9.146291932968685e-05, "loss": 0.4867, "step": 27520 }, { "epoch": 6.814356435643564, "grad_norm": 0.6348825693130493, "learning_rate": 9.145521719652834e-05, "loss": 0.4921, "step": 27530 }, { "epoch": 6.816831683168317, "grad_norm": 0.6501370072364807, "learning_rate": 9.144751191511966e-05, "loss": 0.4904, "step": 27540 }, { "epoch": 6.819306930693069, "grad_norm": 0.6459065675735474, "learning_rate": 9.143980348604601e-05, "loss": 0.4848, "step": 27550 }, { "epoch": 6.821782178217822, "grad_norm": 0.588538408279419, "learning_rate": 9.143209190989273e-05, "loss": 0.4903, "step": 27560 }, { "epoch": 6.824257425742574, "grad_norm": 0.6385704278945923, "learning_rate": 9.142437718724552e-05, "loss": 0.486, "step": 27570 }, { "epoch": 6.826732673267327, "grad_norm": 0.6341626644134521, "learning_rate": 9.141665931869024e-05, "loss": 0.4889, "step": 27580 }, { "epoch": 6.829207920792079, "grad_norm": 0.684799313545227, "learning_rate": 9.140893830481299e-05, "loss": 0.4928, "step": 27590 }, { "epoch": 6.8316831683168315, "grad_norm": 0.6671372652053833, "learning_rate": 9.140121414620016e-05, "loss": 0.4871, "step": 27600 }, { "epoch": 6.834158415841584, "grad_norm": 0.6356768608093262, "learning_rate": 9.139348684343835e-05, "loss": 0.4881, "step": 27610 }, { "epoch": 6.836633663366337, "grad_norm": 0.5992507934570312, "learning_rate": 9.138575639711437e-05, "loss": 0.4903, "step": 27620 }, { "epoch": 6.839108910891089, "grad_norm": 0.6359829306602478, "learning_rate": 9.137802280781533e-05, "loss": 0.4886, "step": 27630 }, { "epoch": 6.841584158415841, "grad_norm": 0.6127630472183228, "learning_rate": 9.137028607612853e-05, "loss": 0.4901, "step": 27640 }, { "epoch": 6.844059405940594, "grad_norm": 0.6238059401512146, "learning_rate": 9.13625462026415e-05, "loss": 0.4893, "step": 27650 }, { "epoch": 6.846534653465347, "grad_norm": 0.6485666632652283, "learning_rate": 9.135480318794208e-05, "loss": 0.4867, "step": 27660 }, { "epoch": 6.849009900990099, "grad_norm": 0.6458594799041748, "learning_rate": 9.134705703261828e-05, "loss": 0.4882, "step": 27670 }, { "epoch": 6.851485148514851, "grad_norm": 0.613246738910675, "learning_rate": 9.133930773725834e-05, "loss": 0.4924, "step": 27680 }, { "epoch": 6.853960396039604, "grad_norm": 0.5908764600753784, "learning_rate": 9.133155530245082e-05, "loss": 0.4866, "step": 27690 }, { "epoch": 6.856435643564357, "grad_norm": 0.6373879313468933, "learning_rate": 9.132379972878442e-05, "loss": 0.4885, "step": 27700 }, { "epoch": 6.858910891089109, "grad_norm": 0.5992022156715393, "learning_rate": 9.131604101684814e-05, "loss": 0.4865, "step": 27710 }, { "epoch": 6.861386138613861, "grad_norm": 0.620056688785553, "learning_rate": 9.130827916723122e-05, "loss": 0.4946, "step": 27720 }, { "epoch": 6.8638613861386135, "grad_norm": 0.6061729788780212, "learning_rate": 9.130051418052311e-05, "loss": 0.4854, "step": 27730 }, { "epoch": 6.866336633663367, "grad_norm": 0.6290449500083923, "learning_rate": 9.12927460573135e-05, "loss": 0.485, "step": 27740 }, { "epoch": 6.868811881188119, "grad_norm": 0.6395866870880127, "learning_rate": 9.128497479819234e-05, "loss": 0.4834, "step": 27750 }, { "epoch": 6.871287128712871, "grad_norm": 0.6342414021492004, "learning_rate": 9.127720040374979e-05, "loss": 0.4844, "step": 27760 }, { "epoch": 6.873762376237623, "grad_norm": 0.646604061126709, "learning_rate": 9.126942287457628e-05, "loss": 0.4884, "step": 27770 }, { "epoch": 6.876237623762377, "grad_norm": 0.6353398561477661, "learning_rate": 9.126164221126246e-05, "loss": 0.4857, "step": 27780 }, { "epoch": 6.878712871287129, "grad_norm": 0.6144662499427795, "learning_rate": 9.125385841439922e-05, "loss": 0.4869, "step": 27790 }, { "epoch": 6.881188118811881, "grad_norm": 0.6408681273460388, "learning_rate": 9.124607148457768e-05, "loss": 0.4877, "step": 27800 }, { "epoch": 6.883663366336633, "grad_norm": 0.6448779702186584, "learning_rate": 9.123828142238921e-05, "loss": 0.4899, "step": 27810 }, { "epoch": 6.8861386138613865, "grad_norm": 0.6070109009742737, "learning_rate": 9.123048822842543e-05, "loss": 0.4875, "step": 27820 }, { "epoch": 6.888613861386139, "grad_norm": 0.6326333284378052, "learning_rate": 9.122269190327815e-05, "loss": 0.4861, "step": 27830 }, { "epoch": 6.891089108910891, "grad_norm": 0.7474831938743591, "learning_rate": 9.121489244753946e-05, "loss": 0.49, "step": 27840 }, { "epoch": 6.893564356435643, "grad_norm": 0.6702480316162109, "learning_rate": 9.120708986180171e-05, "loss": 0.4919, "step": 27850 }, { "epoch": 6.896039603960396, "grad_norm": 0.6097540855407715, "learning_rate": 9.119928414665741e-05, "loss": 0.4902, "step": 27860 }, { "epoch": 6.898514851485149, "grad_norm": 0.6525324583053589, "learning_rate": 9.119147530269937e-05, "loss": 0.4848, "step": 27870 }, { "epoch": 6.900990099009901, "grad_norm": 0.5898137092590332, "learning_rate": 9.118366333052062e-05, "loss": 0.485, "step": 27880 }, { "epoch": 6.903465346534653, "grad_norm": 0.5936655402183533, "learning_rate": 9.117584823071443e-05, "loss": 0.4926, "step": 27890 }, { "epoch": 6.905940594059406, "grad_norm": 0.6412689685821533, "learning_rate": 9.116803000387431e-05, "loss": 0.4808, "step": 27900 }, { "epoch": 6.908415841584159, "grad_norm": 0.6478314399719238, "learning_rate": 9.1160208650594e-05, "loss": 0.4893, "step": 27910 }, { "epoch": 6.910891089108911, "grad_norm": 0.59137362241745, "learning_rate": 9.115238417146746e-05, "loss": 0.489, "step": 27920 }, { "epoch": 6.913366336633663, "grad_norm": 0.6423120498657227, "learning_rate": 9.114455656708893e-05, "loss": 0.4904, "step": 27930 }, { "epoch": 6.915841584158416, "grad_norm": 0.6107259392738342, "learning_rate": 9.113672583805287e-05, "loss": 0.4867, "step": 27940 }, { "epoch": 6.9183168316831685, "grad_norm": 0.6506685614585876, "learning_rate": 9.112889198495396e-05, "loss": 0.482, "step": 27950 }, { "epoch": 6.920792079207921, "grad_norm": 0.6569845080375671, "learning_rate": 9.112105500838712e-05, "loss": 0.4906, "step": 27960 }, { "epoch": 6.923267326732673, "grad_norm": 0.6482325196266174, "learning_rate": 9.111321490894752e-05, "loss": 0.492, "step": 27970 }, { "epoch": 6.925742574257426, "grad_norm": 0.6105068922042847, "learning_rate": 9.11053716872306e-05, "loss": 0.4856, "step": 27980 }, { "epoch": 6.928217821782178, "grad_norm": 0.640296220779419, "learning_rate": 9.109752534383195e-05, "loss": 0.4924, "step": 27990 }, { "epoch": 6.930693069306931, "grad_norm": 0.6562506556510925, "learning_rate": 9.108967587934748e-05, "loss": 0.4934, "step": 28000 }, { "epoch": 6.933168316831683, "grad_norm": 0.6950703859329224, "learning_rate": 9.108182329437329e-05, "loss": 0.4875, "step": 28010 }, { "epoch": 6.935643564356436, "grad_norm": 0.6256964802742004, "learning_rate": 9.107396758950575e-05, "loss": 0.4865, "step": 28020 }, { "epoch": 6.938118811881188, "grad_norm": 0.6262198686599731, "learning_rate": 9.106610876534142e-05, "loss": 0.4873, "step": 28030 }, { "epoch": 6.9405940594059405, "grad_norm": 0.6772892475128174, "learning_rate": 9.105824682247715e-05, "loss": 0.4899, "step": 28040 }, { "epoch": 6.943069306930693, "grad_norm": 0.6700449585914612, "learning_rate": 9.105038176150998e-05, "loss": 0.4943, "step": 28050 }, { "epoch": 6.945544554455445, "grad_norm": 0.6691095232963562, "learning_rate": 9.104251358303724e-05, "loss": 0.4903, "step": 28060 }, { "epoch": 6.948019801980198, "grad_norm": 0.6538016200065613, "learning_rate": 9.103464228765646e-05, "loss": 0.4866, "step": 28070 }, { "epoch": 6.9504950495049505, "grad_norm": 0.6462455987930298, "learning_rate": 9.102676787596537e-05, "loss": 0.4874, "step": 28080 }, { "epoch": 6.952970297029703, "grad_norm": 0.6355530023574829, "learning_rate": 9.101889034856205e-05, "loss": 0.4933, "step": 28090 }, { "epoch": 6.955445544554456, "grad_norm": 0.5861137509346008, "learning_rate": 9.101100970604468e-05, "loss": 0.4856, "step": 28100 }, { "epoch": 6.957920792079208, "grad_norm": 0.6421204805374146, "learning_rate": 9.10031259490118e-05, "loss": 0.4919, "step": 28110 }, { "epoch": 6.96039603960396, "grad_norm": 0.6367286443710327, "learning_rate": 9.099523907806207e-05, "loss": 0.4905, "step": 28120 }, { "epoch": 6.962871287128713, "grad_norm": 0.6264655590057373, "learning_rate": 9.09873490937945e-05, "loss": 0.4842, "step": 28130 }, { "epoch": 6.965346534653465, "grad_norm": 0.6324153542518616, "learning_rate": 9.097945599680825e-05, "loss": 0.4888, "step": 28140 }, { "epoch": 6.967821782178218, "grad_norm": 0.599586009979248, "learning_rate": 9.097155978770276e-05, "loss": 0.487, "step": 28150 }, { "epoch": 6.97029702970297, "grad_norm": 0.6100962162017822, "learning_rate": 9.096366046707769e-05, "loss": 0.487, "step": 28160 }, { "epoch": 6.9727722772277225, "grad_norm": 0.6385861039161682, "learning_rate": 9.095575803553294e-05, "loss": 0.4845, "step": 28170 }, { "epoch": 6.975247524752476, "grad_norm": 0.6193989515304565, "learning_rate": 9.094785249366866e-05, "loss": 0.4848, "step": 28180 }, { "epoch": 6.977722772277228, "grad_norm": 0.6238086819648743, "learning_rate": 9.093994384208522e-05, "loss": 0.4856, "step": 28190 }, { "epoch": 6.98019801980198, "grad_norm": 0.6384609937667847, "learning_rate": 9.093203208138321e-05, "loss": 0.4884, "step": 28200 }, { "epoch": 6.982673267326732, "grad_norm": 0.6605166792869568, "learning_rate": 9.09241172121635e-05, "loss": 0.484, "step": 28210 }, { "epoch": 6.985148514851485, "grad_norm": 0.6291802525520325, "learning_rate": 9.091619923502717e-05, "loss": 0.4851, "step": 28220 }, { "epoch": 6.987623762376238, "grad_norm": 0.6223302483558655, "learning_rate": 9.090827815057552e-05, "loss": 0.486, "step": 28230 }, { "epoch": 6.99009900990099, "grad_norm": 0.6903326511383057, "learning_rate": 9.090035395941012e-05, "loss": 0.4894, "step": 28240 }, { "epoch": 6.992574257425742, "grad_norm": 0.6619125604629517, "learning_rate": 9.089242666213276e-05, "loss": 0.4854, "step": 28250 }, { "epoch": 6.9950495049504955, "grad_norm": 0.6451362371444702, "learning_rate": 9.088449625934546e-05, "loss": 0.4837, "step": 28260 }, { "epoch": 6.997524752475248, "grad_norm": 0.6553184390068054, "learning_rate": 9.087656275165048e-05, "loss": 0.4869, "step": 28270 }, { "epoch": 7.0, "grad_norm": 0.6889145970344543, "learning_rate": 9.086862613965033e-05, "loss": 0.492, "step": 28280 }, { "epoch": 7.002475247524752, "grad_norm": 0.6463438272476196, "learning_rate": 9.086068642394772e-05, "loss": 0.4919, "step": 28290 }, { "epoch": 7.0049504950495045, "grad_norm": 0.6293911337852478, "learning_rate": 9.085274360514564e-05, "loss": 0.4923, "step": 28300 }, { "epoch": 7.007425742574258, "grad_norm": 0.6743390560150146, "learning_rate": 9.084479768384728e-05, "loss": 0.4874, "step": 28310 }, { "epoch": 7.00990099009901, "grad_norm": 0.626837432384491, "learning_rate": 9.08368486606561e-05, "loss": 0.4859, "step": 28320 }, { "epoch": 7.012376237623762, "grad_norm": 0.6649382710456848, "learning_rate": 9.082889653617576e-05, "loss": 0.4863, "step": 28330 }, { "epoch": 7.014851485148514, "grad_norm": 0.5936058759689331, "learning_rate": 9.082094131101018e-05, "loss": 0.4788, "step": 28340 }, { "epoch": 7.017326732673268, "grad_norm": 0.6305229067802429, "learning_rate": 9.081298298576349e-05, "loss": 0.4856, "step": 28350 }, { "epoch": 7.01980198019802, "grad_norm": 0.6158978939056396, "learning_rate": 9.08050215610401e-05, "loss": 0.4799, "step": 28360 }, { "epoch": 7.022277227722772, "grad_norm": 0.618631899356842, "learning_rate": 9.079705703744458e-05, "loss": 0.4839, "step": 28370 }, { "epoch": 7.024752475247524, "grad_norm": 0.6544027924537659, "learning_rate": 9.078908941558186e-05, "loss": 0.4839, "step": 28380 }, { "epoch": 7.0272277227722775, "grad_norm": 0.6305201053619385, "learning_rate": 9.078111869605697e-05, "loss": 0.4854, "step": 28390 }, { "epoch": 7.02970297029703, "grad_norm": 0.6350008249282837, "learning_rate": 9.077314487947523e-05, "loss": 0.4902, "step": 28400 }, { "epoch": 7.032178217821782, "grad_norm": 0.6089512705802917, "learning_rate": 9.076516796644222e-05, "loss": 0.4911, "step": 28410 }, { "epoch": 7.034653465346534, "grad_norm": 0.6298621892929077, "learning_rate": 9.075718795756374e-05, "loss": 0.4873, "step": 28420 }, { "epoch": 7.037128712871287, "grad_norm": 0.6552090644836426, "learning_rate": 9.07492048534458e-05, "loss": 0.4826, "step": 28430 }, { "epoch": 7.03960396039604, "grad_norm": 0.6545113921165466, "learning_rate": 9.074121865469467e-05, "loss": 0.49, "step": 28440 }, { "epoch": 7.042079207920792, "grad_norm": 0.6116355657577515, "learning_rate": 9.073322936191686e-05, "loss": 0.4899, "step": 28450 }, { "epoch": 7.044554455445544, "grad_norm": 0.6040586233139038, "learning_rate": 9.07252369757191e-05, "loss": 0.4941, "step": 28460 }, { "epoch": 7.047029702970297, "grad_norm": 0.6265354156494141, "learning_rate": 9.071724149670835e-05, "loss": 0.4863, "step": 28470 }, { "epoch": 7.0495049504950495, "grad_norm": 0.6231073141098022, "learning_rate": 9.070924292549182e-05, "loss": 0.4881, "step": 28480 }, { "epoch": 7.051980198019802, "grad_norm": 0.6100313663482666, "learning_rate": 9.070124126267694e-05, "loss": 0.4846, "step": 28490 }, { "epoch": 7.054455445544554, "grad_norm": 0.6139430403709412, "learning_rate": 9.069323650887139e-05, "loss": 0.4886, "step": 28500 }, { "epoch": 7.056930693069307, "grad_norm": 0.6451750993728638, "learning_rate": 9.068522866468308e-05, "loss": 0.4892, "step": 28510 }, { "epoch": 7.0594059405940595, "grad_norm": 0.6306769251823425, "learning_rate": 9.067721773072016e-05, "loss": 0.4897, "step": 28520 }, { "epoch": 7.061881188118812, "grad_norm": 0.6013025045394897, "learning_rate": 9.066920370759099e-05, "loss": 0.4829, "step": 28530 }, { "epoch": 7.064356435643564, "grad_norm": 0.6343783140182495, "learning_rate": 9.066118659590422e-05, "loss": 0.4872, "step": 28540 }, { "epoch": 7.066831683168317, "grad_norm": 0.6472684741020203, "learning_rate": 9.065316639626865e-05, "loss": 0.485, "step": 28550 }, { "epoch": 7.069306930693069, "grad_norm": 0.6550430655479431, "learning_rate": 9.064514310929336e-05, "loss": 0.4902, "step": 28560 }, { "epoch": 7.071782178217822, "grad_norm": 0.6312587261199951, "learning_rate": 9.06371167355877e-05, "loss": 0.4857, "step": 28570 }, { "epoch": 7.074257425742574, "grad_norm": 0.6007614135742188, "learning_rate": 9.06290872757612e-05, "loss": 0.4838, "step": 28580 }, { "epoch": 7.076732673267327, "grad_norm": 0.6619706749916077, "learning_rate": 9.062105473042367e-05, "loss": 0.4842, "step": 28590 }, { "epoch": 7.079207920792079, "grad_norm": 0.6385017037391663, "learning_rate": 9.061301910018509e-05, "loss": 0.4849, "step": 28600 }, { "epoch": 7.0816831683168315, "grad_norm": 0.6180340647697449, "learning_rate": 9.060498038565574e-05, "loss": 0.4881, "step": 28610 }, { "epoch": 7.084158415841584, "grad_norm": 0.5994004011154175, "learning_rate": 9.05969385874461e-05, "loss": 0.4872, "step": 28620 }, { "epoch": 7.086633663366337, "grad_norm": 0.6060298681259155, "learning_rate": 9.058889370616689e-05, "loss": 0.4894, "step": 28630 }, { "epoch": 7.089108910891089, "grad_norm": 0.6810355186462402, "learning_rate": 9.058084574242907e-05, "loss": 0.4862, "step": 28640 }, { "epoch": 7.091584158415841, "grad_norm": 0.6196557879447937, "learning_rate": 9.057279469684381e-05, "loss": 0.4884, "step": 28650 }, { "epoch": 7.094059405940594, "grad_norm": 0.6013728976249695, "learning_rate": 9.056474057002256e-05, "loss": 0.4838, "step": 28660 }, { "epoch": 7.096534653465347, "grad_norm": 0.6332305669784546, "learning_rate": 9.055668336257698e-05, "loss": 0.4853, "step": 28670 }, { "epoch": 7.099009900990099, "grad_norm": 0.6649233102798462, "learning_rate": 9.054862307511893e-05, "loss": 0.4828, "step": 28680 }, { "epoch": 7.101485148514851, "grad_norm": 0.6608260273933411, "learning_rate": 9.054055970826056e-05, "loss": 0.4851, "step": 28690 }, { "epoch": 7.103960396039604, "grad_norm": 0.6435127258300781, "learning_rate": 9.053249326261423e-05, "loss": 0.4873, "step": 28700 }, { "epoch": 7.106435643564357, "grad_norm": 0.6586940288543701, "learning_rate": 9.052442373879252e-05, "loss": 0.488, "step": 28710 }, { "epoch": 7.108910891089109, "grad_norm": 0.5935086011886597, "learning_rate": 9.051635113740828e-05, "loss": 0.4827, "step": 28720 }, { "epoch": 7.111386138613861, "grad_norm": 0.6284005641937256, "learning_rate": 9.050827545907454e-05, "loss": 0.4864, "step": 28730 }, { "epoch": 7.1138613861386135, "grad_norm": 0.6139787435531616, "learning_rate": 9.050019670440461e-05, "loss": 0.4891, "step": 28740 }, { "epoch": 7.116336633663367, "grad_norm": 0.6096609830856323, "learning_rate": 9.049211487401204e-05, "loss": 0.4851, "step": 28750 }, { "epoch": 7.118811881188119, "grad_norm": 0.6785223484039307, "learning_rate": 9.048402996851055e-05, "loss": 0.4868, "step": 28760 }, { "epoch": 7.121287128712871, "grad_norm": 0.6187548041343689, "learning_rate": 9.047594198851417e-05, "loss": 0.4896, "step": 28770 }, { "epoch": 7.123762376237623, "grad_norm": 0.6056559085845947, "learning_rate": 9.04678509346371e-05, "loss": 0.4903, "step": 28780 }, { "epoch": 7.126237623762377, "grad_norm": 0.6129810810089111, "learning_rate": 9.045975680749381e-05, "loss": 0.4824, "step": 28790 }, { "epoch": 7.128712871287129, "grad_norm": 0.6336151361465454, "learning_rate": 9.045165960769902e-05, "loss": 0.4886, "step": 28800 }, { "epoch": 7.131188118811881, "grad_norm": 0.6117311716079712, "learning_rate": 9.044355933586764e-05, "loss": 0.4829, "step": 28810 }, { "epoch": 7.133663366336633, "grad_norm": 0.5918445587158203, "learning_rate": 9.043545599261481e-05, "loss": 0.49, "step": 28820 }, { "epoch": 7.1361386138613865, "grad_norm": 0.6148037314414978, "learning_rate": 9.042734957855597e-05, "loss": 0.4825, "step": 28830 }, { "epoch": 7.138613861386139, "grad_norm": 0.6260772347450256, "learning_rate": 9.041924009430672e-05, "loss": 0.4883, "step": 28840 }, { "epoch": 7.141089108910891, "grad_norm": 0.615291953086853, "learning_rate": 9.041112754048293e-05, "loss": 0.4883, "step": 28850 }, { "epoch": 7.143564356435643, "grad_norm": 0.6307309865951538, "learning_rate": 9.040301191770068e-05, "loss": 0.4867, "step": 28860 }, { "epoch": 7.146039603960396, "grad_norm": 0.5795031785964966, "learning_rate": 9.039489322657632e-05, "loss": 0.4859, "step": 28870 }, { "epoch": 7.148514851485149, "grad_norm": 0.6094269156455994, "learning_rate": 9.038677146772641e-05, "loss": 0.4839, "step": 28880 }, { "epoch": 7.150990099009901, "grad_norm": 0.5878157615661621, "learning_rate": 9.037864664176774e-05, "loss": 0.4882, "step": 28890 }, { "epoch": 7.153465346534653, "grad_norm": 0.6257309913635254, "learning_rate": 9.037051874931732e-05, "loss": 0.484, "step": 28900 }, { "epoch": 7.155940594059406, "grad_norm": 0.6439648270606995, "learning_rate": 9.036238779099245e-05, "loss": 0.4876, "step": 28910 }, { "epoch": 7.158415841584159, "grad_norm": 0.6208531856536865, "learning_rate": 9.035425376741055e-05, "loss": 0.4833, "step": 28920 }, { "epoch": 7.160891089108911, "grad_norm": 0.5832598805427551, "learning_rate": 9.034611667918943e-05, "loss": 0.488, "step": 28930 }, { "epoch": 7.163366336633663, "grad_norm": 0.6507771611213684, "learning_rate": 9.0337976526947e-05, "loss": 0.4895, "step": 28940 }, { "epoch": 7.165841584158416, "grad_norm": 0.625377893447876, "learning_rate": 9.032983331130145e-05, "loss": 0.4845, "step": 28950 }, { "epoch": 7.1683168316831685, "grad_norm": 0.6599193215370178, "learning_rate": 9.032168703287122e-05, "loss": 0.4896, "step": 28960 }, { "epoch": 7.170792079207921, "grad_norm": 0.5933109521865845, "learning_rate": 9.031353769227496e-05, "loss": 0.4854, "step": 28970 }, { "epoch": 7.173267326732673, "grad_norm": 0.5840240716934204, "learning_rate": 9.030538529013156e-05, "loss": 0.489, "step": 28980 }, { "epoch": 7.175742574257426, "grad_norm": 0.6299643516540527, "learning_rate": 9.029722982706014e-05, "loss": 0.4838, "step": 28990 }, { "epoch": 7.178217821782178, "grad_norm": 0.6224486231803894, "learning_rate": 9.028907130368004e-05, "loss": 0.4899, "step": 29000 }, { "epoch": 7.180693069306931, "grad_norm": 0.6411852836608887, "learning_rate": 9.028090972061088e-05, "loss": 0.4855, "step": 29010 }, { "epoch": 7.183168316831683, "grad_norm": 0.6563718914985657, "learning_rate": 9.027274507847245e-05, "loss": 0.4837, "step": 29020 }, { "epoch": 7.185643564356436, "grad_norm": 0.5991533994674683, "learning_rate": 9.02645773778848e-05, "loss": 0.4879, "step": 29030 }, { "epoch": 7.188118811881188, "grad_norm": 0.6368295550346375, "learning_rate": 9.025640661946822e-05, "loss": 0.4904, "step": 29040 }, { "epoch": 7.1905940594059405, "grad_norm": 0.6059997081756592, "learning_rate": 9.024823280384324e-05, "loss": 0.4853, "step": 29050 }, { "epoch": 7.193069306930693, "grad_norm": 0.6325743794441223, "learning_rate": 9.024005593163059e-05, "loss": 0.4786, "step": 29060 }, { "epoch": 7.195544554455446, "grad_norm": 0.6368971467018127, "learning_rate": 9.023187600345124e-05, "loss": 0.4852, "step": 29070 }, { "epoch": 7.198019801980198, "grad_norm": 0.622479259967804, "learning_rate": 9.022369301992643e-05, "loss": 0.4929, "step": 29080 }, { "epoch": 7.2004950495049505, "grad_norm": 0.6583032608032227, "learning_rate": 9.021550698167759e-05, "loss": 0.4882, "step": 29090 }, { "epoch": 7.202970297029703, "grad_norm": 0.6111276745796204, "learning_rate": 9.020731788932639e-05, "loss": 0.4848, "step": 29100 }, { "epoch": 7.205445544554456, "grad_norm": 0.6301309466362, "learning_rate": 9.019912574349474e-05, "loss": 0.4876, "step": 29110 }, { "epoch": 7.207920792079208, "grad_norm": 0.5944390892982483, "learning_rate": 9.019093054480477e-05, "loss": 0.4895, "step": 29120 }, { "epoch": 7.21039603960396, "grad_norm": 0.576410710811615, "learning_rate": 9.018273229387888e-05, "loss": 0.4859, "step": 29130 }, { "epoch": 7.212871287128713, "grad_norm": 0.6178450584411621, "learning_rate": 9.017453099133965e-05, "loss": 0.4818, "step": 29140 }, { "epoch": 7.215346534653466, "grad_norm": 0.626180112361908, "learning_rate": 9.016632663780992e-05, "loss": 0.4816, "step": 29150 }, { "epoch": 7.217821782178218, "grad_norm": 0.631637454032898, "learning_rate": 9.015811923391275e-05, "loss": 0.4854, "step": 29160 }, { "epoch": 7.22029702970297, "grad_norm": 0.6660832762718201, "learning_rate": 9.014990878027145e-05, "loss": 0.4906, "step": 29170 }, { "epoch": 7.2227722772277225, "grad_norm": 0.6378343105316162, "learning_rate": 9.014169527750955e-05, "loss": 0.4868, "step": 29180 }, { "epoch": 7.225247524752476, "grad_norm": 0.5894958972930908, "learning_rate": 9.013347872625079e-05, "loss": 0.4808, "step": 29190 }, { "epoch": 7.227722772277228, "grad_norm": 0.626427173614502, "learning_rate": 9.012525912711918e-05, "loss": 0.486, "step": 29200 }, { "epoch": 7.23019801980198, "grad_norm": 0.6565743088722229, "learning_rate": 9.011703648073895e-05, "loss": 0.4837, "step": 29210 }, { "epoch": 7.232673267326732, "grad_norm": 0.6320059299468994, "learning_rate": 9.010881078773457e-05, "loss": 0.4818, "step": 29220 }, { "epoch": 7.235148514851485, "grad_norm": 0.6288885474205017, "learning_rate": 9.010058204873067e-05, "loss": 0.4861, "step": 29230 }, { "epoch": 7.237623762376238, "grad_norm": 0.6050553321838379, "learning_rate": 9.009235026435224e-05, "loss": 0.4811, "step": 29240 }, { "epoch": 7.24009900990099, "grad_norm": 0.6106290817260742, "learning_rate": 9.008411543522437e-05, "loss": 0.4912, "step": 29250 }, { "epoch": 7.242574257425742, "grad_norm": 0.6633431315422058, "learning_rate": 9.007587756197247e-05, "loss": 0.4852, "step": 29260 }, { "epoch": 7.2450495049504955, "grad_norm": 0.6075035929679871, "learning_rate": 9.006763664522213e-05, "loss": 0.4831, "step": 29270 }, { "epoch": 7.247524752475248, "grad_norm": 0.5866252183914185, "learning_rate": 9.005939268559923e-05, "loss": 0.4828, "step": 29280 }, { "epoch": 7.25, "grad_norm": 0.6195571422576904, "learning_rate": 9.005114568372982e-05, "loss": 0.4816, "step": 29290 }, { "epoch": 7.252475247524752, "grad_norm": 0.5855512022972107, "learning_rate": 9.004289564024021e-05, "loss": 0.492, "step": 29300 }, { "epoch": 7.2549504950495045, "grad_norm": 0.6143732666969299, "learning_rate": 9.003464255575693e-05, "loss": 0.4889, "step": 29310 }, { "epoch": 7.257425742574258, "grad_norm": 0.6335498690605164, "learning_rate": 9.002638643090677e-05, "loss": 0.4878, "step": 29320 }, { "epoch": 7.25990099009901, "grad_norm": 0.6027225255966187, "learning_rate": 9.00181272663167e-05, "loss": 0.4789, "step": 29330 }, { "epoch": 7.262376237623762, "grad_norm": 0.5741111040115356, "learning_rate": 9.000986506261395e-05, "loss": 0.4878, "step": 29340 }, { "epoch": 7.264851485148515, "grad_norm": 0.5714916586875916, "learning_rate": 9.000159982042602e-05, "loss": 0.4785, "step": 29350 }, { "epoch": 7.267326732673268, "grad_norm": 0.6097884774208069, "learning_rate": 8.999333154038055e-05, "loss": 0.4861, "step": 29360 }, { "epoch": 7.26980198019802, "grad_norm": 0.5951612591743469, "learning_rate": 8.998506022310549e-05, "loss": 0.491, "step": 29370 }, { "epoch": 7.272277227722772, "grad_norm": 0.6076367497444153, "learning_rate": 8.997678586922898e-05, "loss": 0.4852, "step": 29380 }, { "epoch": 7.274752475247524, "grad_norm": 0.6375004649162292, "learning_rate": 8.996850847937941e-05, "loss": 0.4858, "step": 29390 }, { "epoch": 7.2772277227722775, "grad_norm": 0.6149189472198486, "learning_rate": 8.996022805418539e-05, "loss": 0.4894, "step": 29400 }, { "epoch": 7.27970297029703, "grad_norm": 0.5903016924858093, "learning_rate": 8.995194459427576e-05, "loss": 0.485, "step": 29410 }, { "epoch": 7.282178217821782, "grad_norm": 0.6126664280891418, "learning_rate": 8.99436581002796e-05, "loss": 0.4843, "step": 29420 }, { "epoch": 7.284653465346534, "grad_norm": 0.608616292476654, "learning_rate": 8.99353685728262e-05, "loss": 0.4882, "step": 29430 }, { "epoch": 7.287128712871287, "grad_norm": 0.6169741153717041, "learning_rate": 8.992707601254513e-05, "loss": 0.4904, "step": 29440 }, { "epoch": 7.28960396039604, "grad_norm": 0.6218394041061401, "learning_rate": 8.991878042006611e-05, "loss": 0.4868, "step": 29450 }, { "epoch": 7.292079207920792, "grad_norm": 0.635213315486908, "learning_rate": 8.991048179601918e-05, "loss": 0.492, "step": 29460 }, { "epoch": 7.294554455445544, "grad_norm": 0.6170589923858643, "learning_rate": 8.990218014103453e-05, "loss": 0.4871, "step": 29470 }, { "epoch": 7.297029702970297, "grad_norm": 0.6444498300552368, "learning_rate": 8.989387545574264e-05, "loss": 0.4922, "step": 29480 }, { "epoch": 7.2995049504950495, "grad_norm": 0.6909785866737366, "learning_rate": 8.988556774077418e-05, "loss": 0.4843, "step": 29490 }, { "epoch": 7.301980198019802, "grad_norm": 0.6839187145233154, "learning_rate": 8.987725699676008e-05, "loss": 0.4855, "step": 29500 }, { "epoch": 7.304455445544554, "grad_norm": 0.633811891078949, "learning_rate": 8.986894322433146e-05, "loss": 0.4871, "step": 29510 }, { "epoch": 7.306930693069307, "grad_norm": 0.6266473531723022, "learning_rate": 8.986062642411973e-05, "loss": 0.4898, "step": 29520 }, { "epoch": 7.3094059405940595, "grad_norm": 0.598796010017395, "learning_rate": 8.985230659675648e-05, "loss": 0.4873, "step": 29530 }, { "epoch": 7.311881188118812, "grad_norm": 0.6091638803482056, "learning_rate": 8.984398374287353e-05, "loss": 0.4901, "step": 29540 }, { "epoch": 7.314356435643564, "grad_norm": 0.610660970211029, "learning_rate": 8.983565786310298e-05, "loss": 0.4897, "step": 29550 }, { "epoch": 7.316831683168317, "grad_norm": 0.5878229737281799, "learning_rate": 8.98273289580771e-05, "loss": 0.4859, "step": 29560 }, { "epoch": 7.319306930693069, "grad_norm": 0.5610226392745972, "learning_rate": 8.981899702842842e-05, "loss": 0.4849, "step": 29570 }, { "epoch": 7.321782178217822, "grad_norm": 0.5781628489494324, "learning_rate": 8.981066207478971e-05, "loss": 0.4849, "step": 29580 }, { "epoch": 7.324257425742574, "grad_norm": 0.5810943245887756, "learning_rate": 8.980232409779394e-05, "loss": 0.4871, "step": 29590 }, { "epoch": 7.326732673267327, "grad_norm": 0.6099168658256531, "learning_rate": 8.979398309807431e-05, "loss": 0.4822, "step": 29600 }, { "epoch": 7.329207920792079, "grad_norm": 0.6432210206985474, "learning_rate": 8.978563907626429e-05, "loss": 0.4905, "step": 29610 }, { "epoch": 7.3316831683168315, "grad_norm": 0.5947167277336121, "learning_rate": 8.977729203299756e-05, "loss": 0.492, "step": 29620 }, { "epoch": 7.334158415841584, "grad_norm": 0.6134138107299805, "learning_rate": 8.9768941968908e-05, "loss": 0.4823, "step": 29630 }, { "epoch": 7.336633663366337, "grad_norm": 0.6289623975753784, "learning_rate": 8.976058888462973e-05, "loss": 0.4858, "step": 29640 }, { "epoch": 7.339108910891089, "grad_norm": 0.6145509481430054, "learning_rate": 8.975223278079715e-05, "loss": 0.4819, "step": 29650 }, { "epoch": 7.341584158415841, "grad_norm": 0.611322283744812, "learning_rate": 8.974387365804485e-05, "loss": 0.484, "step": 29660 }, { "epoch": 7.344059405940594, "grad_norm": 0.622616171836853, "learning_rate": 8.973551151700759e-05, "loss": 0.4802, "step": 29670 }, { "epoch": 7.346534653465347, "grad_norm": 0.6000705361366272, "learning_rate": 8.972714635832049e-05, "loss": 0.4839, "step": 29680 }, { "epoch": 7.349009900990099, "grad_norm": 0.6503334045410156, "learning_rate": 8.971877818261879e-05, "loss": 0.4842, "step": 29690 }, { "epoch": 7.351485148514851, "grad_norm": 0.626315712928772, "learning_rate": 8.971040699053801e-05, "loss": 0.4848, "step": 29700 }, { "epoch": 7.353960396039604, "grad_norm": 0.6556193828582764, "learning_rate": 8.97020327827139e-05, "loss": 0.4916, "step": 29710 }, { "epoch": 7.356435643564357, "grad_norm": 0.6171558499336243, "learning_rate": 8.96936555597824e-05, "loss": 0.4869, "step": 29720 }, { "epoch": 7.358910891089109, "grad_norm": 0.6279577016830444, "learning_rate": 8.96852753223797e-05, "loss": 0.4886, "step": 29730 }, { "epoch": 7.361386138613861, "grad_norm": 0.6589211821556091, "learning_rate": 8.967689207114224e-05, "loss": 0.4866, "step": 29740 }, { "epoch": 7.3638613861386135, "grad_norm": 0.624148964881897, "learning_rate": 8.96685058067067e-05, "loss": 0.491, "step": 29750 }, { "epoch": 7.366336633663367, "grad_norm": 0.6197288632392883, "learning_rate": 8.966011652970992e-05, "loss": 0.4848, "step": 29760 }, { "epoch": 7.368811881188119, "grad_norm": 0.6212390661239624, "learning_rate": 8.965172424078902e-05, "loss": 0.4882, "step": 29770 }, { "epoch": 7.371287128712871, "grad_norm": 0.5974409580230713, "learning_rate": 8.964332894058134e-05, "loss": 0.4847, "step": 29780 }, { "epoch": 7.373762376237623, "grad_norm": 0.5934008955955505, "learning_rate": 8.963493062972446e-05, "loss": 0.4847, "step": 29790 }, { "epoch": 7.376237623762377, "grad_norm": 0.6245878338813782, "learning_rate": 8.962652930885616e-05, "loss": 0.4874, "step": 29800 }, { "epoch": 7.378712871287129, "grad_norm": 0.6261271238327026, "learning_rate": 8.961812497861446e-05, "loss": 0.4872, "step": 29810 }, { "epoch": 7.381188118811881, "grad_norm": 0.6151721477508545, "learning_rate": 8.960971763963763e-05, "loss": 0.4834, "step": 29820 }, { "epoch": 7.383663366336633, "grad_norm": 0.6462975144386292, "learning_rate": 8.960130729256417e-05, "loss": 0.4864, "step": 29830 }, { "epoch": 7.3861386138613865, "grad_norm": 0.6022083759307861, "learning_rate": 8.959289393803273e-05, "loss": 0.4858, "step": 29840 }, { "epoch": 7.388613861386139, "grad_norm": 0.6382448673248291, "learning_rate": 8.958447757668232e-05, "loss": 0.4901, "step": 29850 }, { "epoch": 7.391089108910891, "grad_norm": 0.6109929084777832, "learning_rate": 8.957605820915205e-05, "loss": 0.4887, "step": 29860 }, { "epoch": 7.393564356435643, "grad_norm": 0.617136538028717, "learning_rate": 8.956763583608137e-05, "loss": 0.4892, "step": 29870 }, { "epoch": 7.396039603960396, "grad_norm": 0.6131994128227234, "learning_rate": 8.955921045810984e-05, "loss": 0.4918, "step": 29880 }, { "epoch": 7.398514851485149, "grad_norm": 0.6096698641777039, "learning_rate": 8.955078207587735e-05, "loss": 0.4887, "step": 29890 }, { "epoch": 7.400990099009901, "grad_norm": 0.6374537348747253, "learning_rate": 8.954235069002397e-05, "loss": 0.4868, "step": 29900 }, { "epoch": 7.403465346534653, "grad_norm": 0.5907552242279053, "learning_rate": 8.953391630119002e-05, "loss": 0.4831, "step": 29910 }, { "epoch": 7.405940594059406, "grad_norm": 0.6363158822059631, "learning_rate": 8.952547891001602e-05, "loss": 0.4825, "step": 29920 }, { "epoch": 7.408415841584159, "grad_norm": 0.6404915452003479, "learning_rate": 8.951703851714275e-05, "loss": 0.4864, "step": 29930 }, { "epoch": 7.410891089108911, "grad_norm": 0.6241724491119385, "learning_rate": 8.950859512321119e-05, "loss": 0.4835, "step": 29940 }, { "epoch": 7.413366336633663, "grad_norm": 0.5963716506958008, "learning_rate": 8.950014872886258e-05, "loss": 0.4889, "step": 29950 }, { "epoch": 7.415841584158416, "grad_norm": 0.6359931230545044, "learning_rate": 8.949169933473833e-05, "loss": 0.4804, "step": 29960 }, { "epoch": 7.4183168316831685, "grad_norm": 0.6081670522689819, "learning_rate": 8.948324694148016e-05, "loss": 0.4798, "step": 29970 }, { "epoch": 7.420792079207921, "grad_norm": 0.631215512752533, "learning_rate": 8.947479154972993e-05, "loss": 0.4853, "step": 29980 }, { "epoch": 7.423267326732673, "grad_norm": 0.6116186380386353, "learning_rate": 8.94663331601298e-05, "loss": 0.484, "step": 29990 }, { "epoch": 7.425742574257426, "grad_norm": 0.593376100063324, "learning_rate": 8.945787177332212e-05, "loss": 0.4872, "step": 30000 }, { "epoch": 7.428217821782178, "grad_norm": 0.6700327396392822, "learning_rate": 8.94494073899495e-05, "loss": 0.4824, "step": 30010 }, { "epoch": 7.430693069306931, "grad_norm": 0.607354998588562, "learning_rate": 8.944094001065471e-05, "loss": 0.4876, "step": 30020 }, { "epoch": 7.433168316831683, "grad_norm": 0.5926987528800964, "learning_rate": 8.943246963608082e-05, "loss": 0.4841, "step": 30030 }, { "epoch": 7.435643564356436, "grad_norm": 0.6330152153968811, "learning_rate": 8.942399626687108e-05, "loss": 0.477, "step": 30040 }, { "epoch": 7.438118811881188, "grad_norm": 0.6274181604385376, "learning_rate": 8.941551990366901e-05, "loss": 0.4887, "step": 30050 }, { "epoch": 7.4405940594059405, "grad_norm": 0.620527982711792, "learning_rate": 8.940704054711832e-05, "loss": 0.4847, "step": 30060 }, { "epoch": 7.443069306930693, "grad_norm": 0.6276371479034424, "learning_rate": 8.939855819786299e-05, "loss": 0.4873, "step": 30070 }, { "epoch": 7.445544554455446, "grad_norm": 0.63272625207901, "learning_rate": 8.939007285654714e-05, "loss": 0.4808, "step": 30080 }, { "epoch": 7.448019801980198, "grad_norm": 0.6208006739616394, "learning_rate": 8.938158452381522e-05, "loss": 0.482, "step": 30090 }, { "epoch": 7.4504950495049505, "grad_norm": 0.6522141695022583, "learning_rate": 8.937309320031185e-05, "loss": 0.4859, "step": 30100 }, { "epoch": 7.452970297029703, "grad_norm": 0.6229251027107239, "learning_rate": 8.936459888668188e-05, "loss": 0.4819, "step": 30110 }, { "epoch": 7.455445544554456, "grad_norm": 0.6233272552490234, "learning_rate": 8.935610158357042e-05, "loss": 0.4862, "step": 30120 }, { "epoch": 7.457920792079208, "grad_norm": 0.5903387069702148, "learning_rate": 8.934760129162277e-05, "loss": 0.4835, "step": 30130 }, { "epoch": 7.46039603960396, "grad_norm": 0.5963030457496643, "learning_rate": 8.933909801148447e-05, "loss": 0.4864, "step": 30140 }, { "epoch": 7.462871287128713, "grad_norm": 0.5936185717582703, "learning_rate": 8.933059174380131e-05, "loss": 0.4867, "step": 30150 }, { "epoch": 7.465346534653466, "grad_norm": 0.6048686504364014, "learning_rate": 8.932208248921925e-05, "loss": 0.4868, "step": 30160 }, { "epoch": 7.467821782178218, "grad_norm": 0.5966758131980896, "learning_rate": 8.931357024838454e-05, "loss": 0.4901, "step": 30170 }, { "epoch": 7.47029702970297, "grad_norm": 0.5999704599380493, "learning_rate": 8.93050550219436e-05, "loss": 0.4862, "step": 30180 }, { "epoch": 7.4727722772277225, "grad_norm": 0.6140446662902832, "learning_rate": 8.929653681054313e-05, "loss": 0.4848, "step": 30190 }, { "epoch": 7.475247524752476, "grad_norm": 0.5641240477561951, "learning_rate": 8.928801561483003e-05, "loss": 0.4833, "step": 30200 }, { "epoch": 7.477722772277228, "grad_norm": 0.593532919883728, "learning_rate": 8.92794914354514e-05, "loss": 0.4867, "step": 30210 }, { "epoch": 7.48019801980198, "grad_norm": 0.6347424387931824, "learning_rate": 8.927096427305464e-05, "loss": 0.4796, "step": 30220 }, { "epoch": 7.482673267326732, "grad_norm": 0.649335503578186, "learning_rate": 8.92624341282873e-05, "loss": 0.4899, "step": 30230 }, { "epoch": 7.485148514851485, "grad_norm": 0.6375052332878113, "learning_rate": 8.92539010017972e-05, "loss": 0.4928, "step": 30240 }, { "epoch": 7.487623762376238, "grad_norm": 0.6719486117362976, "learning_rate": 8.924536489423237e-05, "loss": 0.4839, "step": 30250 }, { "epoch": 7.49009900990099, "grad_norm": 0.6227006316184998, "learning_rate": 8.923682580624108e-05, "loss": 0.4782, "step": 30260 }, { "epoch": 7.492574257425742, "grad_norm": 0.5910367965698242, "learning_rate": 8.922828373847181e-05, "loss": 0.4871, "step": 30270 }, { "epoch": 7.4950495049504955, "grad_norm": 0.6341922283172607, "learning_rate": 8.921973869157327e-05, "loss": 0.4894, "step": 30280 }, { "epoch": 7.497524752475248, "grad_norm": 0.653562605381012, "learning_rate": 8.92111906661944e-05, "loss": 0.4807, "step": 30290 }, { "epoch": 7.5, "grad_norm": 0.5877997279167175, "learning_rate": 8.920263966298438e-05, "loss": 0.4895, "step": 30300 }, { "epoch": 7.502475247524752, "grad_norm": 0.6333995461463928, "learning_rate": 8.919408568259257e-05, "loss": 0.4873, "step": 30310 }, { "epoch": 7.5049504950495045, "grad_norm": 0.6087074279785156, "learning_rate": 8.918552872566864e-05, "loss": 0.4895, "step": 30320 }, { "epoch": 7.507425742574258, "grad_norm": 0.6183295845985413, "learning_rate": 8.91769687928624e-05, "loss": 0.4843, "step": 30330 }, { "epoch": 7.50990099009901, "grad_norm": 0.6289190649986267, "learning_rate": 8.916840588482392e-05, "loss": 0.4846, "step": 30340 }, { "epoch": 7.512376237623762, "grad_norm": 0.6192044019699097, "learning_rate": 8.91598400022035e-05, "loss": 0.485, "step": 30350 }, { "epoch": 7.514851485148515, "grad_norm": 0.6174667477607727, "learning_rate": 8.915127114565165e-05, "loss": 0.4839, "step": 30360 }, { "epoch": 7.517326732673268, "grad_norm": 0.5963404178619385, "learning_rate": 8.914269931581916e-05, "loss": 0.4799, "step": 30370 }, { "epoch": 7.51980198019802, "grad_norm": 0.5714800357818604, "learning_rate": 8.913412451335696e-05, "loss": 0.4909, "step": 30380 }, { "epoch": 7.522277227722772, "grad_norm": 0.5945215821266174, "learning_rate": 8.912554673891626e-05, "loss": 0.4872, "step": 30390 }, { "epoch": 7.524752475247524, "grad_norm": 0.585436224937439, "learning_rate": 8.911696599314851e-05, "loss": 0.485, "step": 30400 }, { "epoch": 7.5272277227722775, "grad_norm": 0.5774543881416321, "learning_rate": 8.910838227670534e-05, "loss": 0.4812, "step": 30410 }, { "epoch": 7.52970297029703, "grad_norm": 0.6457281112670898, "learning_rate": 8.909979559023862e-05, "loss": 0.4907, "step": 30420 }, { "epoch": 7.532178217821782, "grad_norm": 0.6201040744781494, "learning_rate": 8.909120593440045e-05, "loss": 0.488, "step": 30430 }, { "epoch": 7.534653465346535, "grad_norm": 0.614446759223938, "learning_rate": 8.90826133098432e-05, "loss": 0.4827, "step": 30440 }, { "epoch": 7.537128712871287, "grad_norm": 0.6084551811218262, "learning_rate": 8.907401771721937e-05, "loss": 0.4854, "step": 30450 }, { "epoch": 7.53960396039604, "grad_norm": 0.6016777753829956, "learning_rate": 8.906541915718176e-05, "loss": 0.4862, "step": 30460 }, { "epoch": 7.542079207920792, "grad_norm": 0.5885539054870605, "learning_rate": 8.905681763038338e-05, "loss": 0.4848, "step": 30470 }, { "epoch": 7.544554455445544, "grad_norm": 0.6271745562553406, "learning_rate": 8.904821313747744e-05, "loss": 0.4862, "step": 30480 }, { "epoch": 7.547029702970297, "grad_norm": 0.6470777988433838, "learning_rate": 8.903960567911743e-05, "loss": 0.4869, "step": 30490 }, { "epoch": 7.5495049504950495, "grad_norm": 0.6219745874404907, "learning_rate": 8.9030995255957e-05, "loss": 0.4814, "step": 30500 }, { "epoch": 7.551980198019802, "grad_norm": 0.5915669798851013, "learning_rate": 8.902238186865007e-05, "loss": 0.4877, "step": 30510 }, { "epoch": 7.554455445544555, "grad_norm": 0.6347585320472717, "learning_rate": 8.901376551785074e-05, "loss": 0.4869, "step": 30520 }, { "epoch": 7.556930693069307, "grad_norm": 0.6091338992118835, "learning_rate": 8.90051462042134e-05, "loss": 0.4891, "step": 30530 }, { "epoch": 7.5594059405940595, "grad_norm": 0.5900543332099915, "learning_rate": 8.899652392839261e-05, "loss": 0.487, "step": 30540 }, { "epoch": 7.561881188118812, "grad_norm": 0.6055745482444763, "learning_rate": 8.898789869104321e-05, "loss": 0.4878, "step": 30550 }, { "epoch": 7.564356435643564, "grad_norm": 0.6124340891838074, "learning_rate": 8.897927049282018e-05, "loss": 0.4837, "step": 30560 }, { "epoch": 7.566831683168317, "grad_norm": 0.5956087112426758, "learning_rate": 8.89706393343788e-05, "loss": 0.4832, "step": 30570 }, { "epoch": 7.569306930693069, "grad_norm": 0.6332167387008667, "learning_rate": 8.896200521637454e-05, "loss": 0.4846, "step": 30580 }, { "epoch": 7.571782178217822, "grad_norm": 0.6122103333473206, "learning_rate": 8.895336813946311e-05, "loss": 0.4882, "step": 30590 }, { "epoch": 7.574257425742574, "grad_norm": 0.6023416519165039, "learning_rate": 8.894472810430045e-05, "loss": 0.4908, "step": 30600 }, { "epoch": 7.576732673267327, "grad_norm": 0.6266419291496277, "learning_rate": 8.89360851115427e-05, "loss": 0.4895, "step": 30610 }, { "epoch": 7.579207920792079, "grad_norm": 0.6191295981407166, "learning_rate": 8.892743916184625e-05, "loss": 0.4877, "step": 30620 }, { "epoch": 7.5816831683168315, "grad_norm": 0.5921767354011536, "learning_rate": 8.891879025586767e-05, "loss": 0.4875, "step": 30630 }, { "epoch": 7.584158415841584, "grad_norm": 0.6118314862251282, "learning_rate": 8.891013839426384e-05, "loss": 0.4939, "step": 30640 }, { "epoch": 7.586633663366337, "grad_norm": 0.5970680117607117, "learning_rate": 8.890148357769177e-05, "loss": 0.4839, "step": 30650 }, { "epoch": 7.589108910891089, "grad_norm": 0.6119961738586426, "learning_rate": 8.889282580680875e-05, "loss": 0.4795, "step": 30660 }, { "epoch": 7.591584158415841, "grad_norm": 0.6245187520980835, "learning_rate": 8.888416508227227e-05, "loss": 0.4839, "step": 30670 }, { "epoch": 7.594059405940594, "grad_norm": 0.6140758395195007, "learning_rate": 8.887550140474008e-05, "loss": 0.4852, "step": 30680 }, { "epoch": 7.596534653465347, "grad_norm": 0.6043074727058411, "learning_rate": 8.88668347748701e-05, "loss": 0.4825, "step": 30690 }, { "epoch": 7.599009900990099, "grad_norm": 0.6452438831329346, "learning_rate": 8.885816519332053e-05, "loss": 0.4833, "step": 30700 }, { "epoch": 7.601485148514851, "grad_norm": 0.5854122042655945, "learning_rate": 8.884949266074975e-05, "loss": 0.4852, "step": 30710 }, { "epoch": 7.603960396039604, "grad_norm": 0.6153538227081299, "learning_rate": 8.88408171778164e-05, "loss": 0.4814, "step": 30720 }, { "epoch": 7.606435643564357, "grad_norm": 0.6295205950737, "learning_rate": 8.88321387451793e-05, "loss": 0.4805, "step": 30730 }, { "epoch": 7.608910891089109, "grad_norm": 0.6256320476531982, "learning_rate": 8.882345736349752e-05, "loss": 0.4905, "step": 30740 }, { "epoch": 7.611386138613861, "grad_norm": 0.6395837068557739, "learning_rate": 8.881477303343039e-05, "loss": 0.4881, "step": 30750 }, { "epoch": 7.6138613861386135, "grad_norm": 0.5801824331283569, "learning_rate": 8.88060857556374e-05, "loss": 0.484, "step": 30760 }, { "epoch": 7.616336633663367, "grad_norm": 0.5897629261016846, "learning_rate": 8.879739553077828e-05, "loss": 0.4874, "step": 30770 }, { "epoch": 7.618811881188119, "grad_norm": 0.621910810470581, "learning_rate": 8.878870235951302e-05, "loss": 0.4861, "step": 30780 }, { "epoch": 7.621287128712871, "grad_norm": 0.619625985622406, "learning_rate": 8.87800062425018e-05, "loss": 0.4896, "step": 30790 }, { "epoch": 7.623762376237623, "grad_norm": 0.6652230024337769, "learning_rate": 8.877130718040502e-05, "loss": 0.4872, "step": 30800 }, { "epoch": 7.626237623762377, "grad_norm": 0.6422529816627502, "learning_rate": 8.876260517388335e-05, "loss": 0.488, "step": 30810 }, { "epoch": 7.628712871287129, "grad_norm": 0.609222412109375, "learning_rate": 8.875390022359762e-05, "loss": 0.4801, "step": 30820 }, { "epoch": 7.631188118811881, "grad_norm": 0.6011002063751221, "learning_rate": 8.87451923302089e-05, "loss": 0.4857, "step": 30830 }, { "epoch": 7.633663366336633, "grad_norm": 0.6619532108306885, "learning_rate": 8.873648149437855e-05, "loss": 0.487, "step": 30840 }, { "epoch": 7.6361386138613865, "grad_norm": 0.6124223470687866, "learning_rate": 8.872776771676805e-05, "loss": 0.4858, "step": 30850 }, { "epoch": 7.638613861386139, "grad_norm": 0.6054797768592834, "learning_rate": 8.871905099803917e-05, "loss": 0.4834, "step": 30860 }, { "epoch": 7.641089108910891, "grad_norm": 0.580331027507782, "learning_rate": 8.871033133885389e-05, "loss": 0.4886, "step": 30870 }, { "epoch": 7.643564356435643, "grad_norm": 0.5915795564651489, "learning_rate": 8.870160873987443e-05, "loss": 0.49, "step": 30880 }, { "epoch": 7.646039603960396, "grad_norm": 0.6426500082015991, "learning_rate": 8.869288320176317e-05, "loss": 0.4826, "step": 30890 }, { "epoch": 7.648514851485149, "grad_norm": 0.6068077087402344, "learning_rate": 8.86841547251828e-05, "loss": 0.4911, "step": 30900 }, { "epoch": 7.650990099009901, "grad_norm": 0.5934604406356812, "learning_rate": 8.867542331079617e-05, "loss": 0.4873, "step": 30910 }, { "epoch": 7.653465346534653, "grad_norm": 0.588775634765625, "learning_rate": 8.866668895926637e-05, "loss": 0.4807, "step": 30920 }, { "epoch": 7.655940594059406, "grad_norm": 0.5670357942581177, "learning_rate": 8.865795167125672e-05, "loss": 0.4874, "step": 30930 }, { "epoch": 7.658415841584159, "grad_norm": 0.5937129855155945, "learning_rate": 8.864921144743076e-05, "loss": 0.485, "step": 30940 }, { "epoch": 7.660891089108911, "grad_norm": 0.6366387009620667, "learning_rate": 8.864046828845225e-05, "loss": 0.4853, "step": 30950 }, { "epoch": 7.663366336633663, "grad_norm": 0.6118563413619995, "learning_rate": 8.86317221949852e-05, "loss": 0.4852, "step": 30960 }, { "epoch": 7.665841584158416, "grad_norm": 0.596583902835846, "learning_rate": 8.862297316769376e-05, "loss": 0.4818, "step": 30970 }, { "epoch": 7.6683168316831685, "grad_norm": 0.6055269837379456, "learning_rate": 8.861422120724243e-05, "loss": 0.4882, "step": 30980 }, { "epoch": 7.670792079207921, "grad_norm": 0.5847370028495789, "learning_rate": 8.860546631429582e-05, "loss": 0.4918, "step": 30990 }, { "epoch": 7.673267326732673, "grad_norm": 0.5887202024459839, "learning_rate": 8.859670848951882e-05, "loss": 0.482, "step": 31000 }, { "epoch": 7.675742574257426, "grad_norm": 0.6009629964828491, "learning_rate": 8.858794773357652e-05, "loss": 0.4785, "step": 31010 }, { "epoch": 7.678217821782178, "grad_norm": 0.6353895664215088, "learning_rate": 8.857918404713426e-05, "loss": 0.4792, "step": 31020 }, { "epoch": 7.680693069306931, "grad_norm": 0.6039911508560181, "learning_rate": 8.857041743085755e-05, "loss": 0.4813, "step": 31030 }, { "epoch": 7.683168316831683, "grad_norm": 0.6202948093414307, "learning_rate": 8.85616478854122e-05, "loss": 0.4856, "step": 31040 }, { "epoch": 7.685643564356436, "grad_norm": 0.6045747995376587, "learning_rate": 8.855287541146416e-05, "loss": 0.4804, "step": 31050 }, { "epoch": 7.688118811881188, "grad_norm": 0.6333339810371399, "learning_rate": 8.854410000967968e-05, "loss": 0.4911, "step": 31060 }, { "epoch": 7.6905940594059405, "grad_norm": 0.5923751592636108, "learning_rate": 8.853532168072515e-05, "loss": 0.4778, "step": 31070 }, { "epoch": 7.693069306930693, "grad_norm": 0.625190019607544, "learning_rate": 8.852654042526727e-05, "loss": 0.4825, "step": 31080 }, { "epoch": 7.695544554455445, "grad_norm": 0.6043386459350586, "learning_rate": 8.85177562439729e-05, "loss": 0.4879, "step": 31090 }, { "epoch": 7.698019801980198, "grad_norm": 0.5949040055274963, "learning_rate": 8.850896913750911e-05, "loss": 0.4799, "step": 31100 }, { "epoch": 7.7004950495049505, "grad_norm": 0.612433135509491, "learning_rate": 8.850017910654327e-05, "loss": 0.4805, "step": 31110 }, { "epoch": 7.702970297029703, "grad_norm": 0.6252827048301697, "learning_rate": 8.849138615174291e-05, "loss": 0.4922, "step": 31120 }, { "epoch": 7.705445544554456, "grad_norm": 0.6152768731117249, "learning_rate": 8.848259027377579e-05, "loss": 0.4816, "step": 31130 }, { "epoch": 7.707920792079208, "grad_norm": 0.5756309032440186, "learning_rate": 8.84737914733099e-05, "loss": 0.4814, "step": 31140 }, { "epoch": 7.71039603960396, "grad_norm": 0.5915231108665466, "learning_rate": 8.846498975101345e-05, "loss": 0.4831, "step": 31150 }, { "epoch": 7.712871287128713, "grad_norm": 0.5998745560646057, "learning_rate": 8.845618510755486e-05, "loss": 0.4798, "step": 31160 }, { "epoch": 7.715346534653465, "grad_norm": 0.5771780014038086, "learning_rate": 8.844737754360284e-05, "loss": 0.4826, "step": 31170 }, { "epoch": 7.717821782178218, "grad_norm": 0.5759151577949524, "learning_rate": 8.843856705982619e-05, "loss": 0.4787, "step": 31180 }, { "epoch": 7.72029702970297, "grad_norm": 0.5786028504371643, "learning_rate": 8.842975365689406e-05, "loss": 0.4808, "step": 31190 }, { "epoch": 7.7227722772277225, "grad_norm": 0.6166158318519592, "learning_rate": 8.842093733547575e-05, "loss": 0.4867, "step": 31200 }, { "epoch": 7.725247524752476, "grad_norm": 0.5866308808326721, "learning_rate": 8.84121180962408e-05, "loss": 0.4843, "step": 31210 }, { "epoch": 7.727722772277228, "grad_norm": 0.6214214563369751, "learning_rate": 8.840329593985899e-05, "loss": 0.4801, "step": 31220 }, { "epoch": 7.73019801980198, "grad_norm": 0.6182790994644165, "learning_rate": 8.839447086700029e-05, "loss": 0.4749, "step": 31230 }, { "epoch": 7.732673267326732, "grad_norm": 0.6060408353805542, "learning_rate": 8.83856428783349e-05, "loss": 0.4948, "step": 31240 }, { "epoch": 7.735148514851485, "grad_norm": 0.6029646992683411, "learning_rate": 8.837681197453327e-05, "loss": 0.4888, "step": 31250 }, { "epoch": 7.737623762376238, "grad_norm": 0.5880933403968811, "learning_rate": 8.836797815626603e-05, "loss": 0.4826, "step": 31260 }, { "epoch": 7.74009900990099, "grad_norm": 0.5991101861000061, "learning_rate": 8.835914142420405e-05, "loss": 0.481, "step": 31270 }, { "epoch": 7.742574257425742, "grad_norm": 0.6167779564857483, "learning_rate": 8.835030177901843e-05, "loss": 0.4863, "step": 31280 }, { "epoch": 7.7450495049504955, "grad_norm": 0.5688458681106567, "learning_rate": 8.834145922138049e-05, "loss": 0.485, "step": 31290 }, { "epoch": 7.747524752475248, "grad_norm": 0.5785276889801025, "learning_rate": 8.833261375196176e-05, "loss": 0.4817, "step": 31300 }, { "epoch": 7.75, "grad_norm": 0.5717102289199829, "learning_rate": 8.832376537143397e-05, "loss": 0.4848, "step": 31310 }, { "epoch": 7.752475247524752, "grad_norm": 0.6063447594642639, "learning_rate": 8.831491408046911e-05, "loss": 0.4829, "step": 31320 }, { "epoch": 7.7549504950495045, "grad_norm": 0.5987357497215271, "learning_rate": 8.830605987973938e-05, "loss": 0.4778, "step": 31330 }, { "epoch": 7.757425742574258, "grad_norm": 0.6025711894035339, "learning_rate": 8.829720276991722e-05, "loss": 0.4847, "step": 31340 }, { "epoch": 7.75990099009901, "grad_norm": 0.6285037398338318, "learning_rate": 8.828834275167525e-05, "loss": 0.4844, "step": 31350 }, { "epoch": 7.762376237623762, "grad_norm": 0.5843696594238281, "learning_rate": 8.827947982568632e-05, "loss": 0.492, "step": 31360 }, { "epoch": 7.764851485148515, "grad_norm": 0.6761636734008789, "learning_rate": 8.82706139926235e-05, "loss": 0.4846, "step": 31370 }, { "epoch": 7.767326732673268, "grad_norm": 0.6039426922798157, "learning_rate": 8.826174525316014e-05, "loss": 0.4872, "step": 31380 }, { "epoch": 7.76980198019802, "grad_norm": 0.594779908657074, "learning_rate": 8.825287360796973e-05, "loss": 0.4865, "step": 31390 }, { "epoch": 7.772277227722772, "grad_norm": 0.5554215908050537, "learning_rate": 8.824399905772601e-05, "loss": 0.4805, "step": 31400 }, { "epoch": 7.774752475247524, "grad_norm": 0.6350758671760559, "learning_rate": 8.823512160310294e-05, "loss": 0.482, "step": 31410 }, { "epoch": 7.7772277227722775, "grad_norm": 0.5825591683387756, "learning_rate": 8.822624124477475e-05, "loss": 0.4784, "step": 31420 }, { "epoch": 7.77970297029703, "grad_norm": 0.5980840921401978, "learning_rate": 8.821735798341578e-05, "loss": 0.4811, "step": 31430 }, { "epoch": 7.782178217821782, "grad_norm": 0.6103502511978149, "learning_rate": 8.820847181970068e-05, "loss": 0.4894, "step": 31440 }, { "epoch": 7.784653465346535, "grad_norm": 0.5914917588233948, "learning_rate": 8.819958275430431e-05, "loss": 0.4853, "step": 31450 }, { "epoch": 7.787128712871287, "grad_norm": 0.6096323132514954, "learning_rate": 8.819069078790171e-05, "loss": 0.4819, "step": 31460 }, { "epoch": 7.78960396039604, "grad_norm": 0.6341272592544556, "learning_rate": 8.81817959211682e-05, "loss": 0.4857, "step": 31470 }, { "epoch": 7.792079207920792, "grad_norm": 0.6227834224700928, "learning_rate": 8.817289815477928e-05, "loss": 0.4879, "step": 31480 }, { "epoch": 7.794554455445544, "grad_norm": 0.603002667427063, "learning_rate": 8.816399748941063e-05, "loss": 0.485, "step": 31490 }, { "epoch": 7.797029702970297, "grad_norm": 0.5725676417350769, "learning_rate": 8.815509392573824e-05, "loss": 0.4827, "step": 31500 }, { "epoch": 7.7995049504950495, "grad_norm": 0.5935652852058411, "learning_rate": 8.814618746443828e-05, "loss": 0.4826, "step": 31510 }, { "epoch": 7.801980198019802, "grad_norm": 0.6240319609642029, "learning_rate": 8.813727810618711e-05, "loss": 0.4881, "step": 31520 }, { "epoch": 7.804455445544555, "grad_norm": 0.6017290353775024, "learning_rate": 8.812836585166138e-05, "loss": 0.4844, "step": 31530 }, { "epoch": 7.806930693069307, "grad_norm": 0.6110914945602417, "learning_rate": 8.811945070153786e-05, "loss": 0.4891, "step": 31540 }, { "epoch": 7.8094059405940595, "grad_norm": 0.5746936798095703, "learning_rate": 8.811053265649364e-05, "loss": 0.4816, "step": 31550 }, { "epoch": 7.811881188118812, "grad_norm": 0.6022284626960754, "learning_rate": 8.810161171720597e-05, "loss": 0.4819, "step": 31560 }, { "epoch": 7.814356435643564, "grad_norm": 0.6154090166091919, "learning_rate": 8.809268788435233e-05, "loss": 0.4802, "step": 31570 }, { "epoch": 7.816831683168317, "grad_norm": 0.6191981434822083, "learning_rate": 8.808376115861044e-05, "loss": 0.4845, "step": 31580 }, { "epoch": 7.819306930693069, "grad_norm": 0.5625369548797607, "learning_rate": 8.807483154065823e-05, "loss": 0.4833, "step": 31590 }, { "epoch": 7.821782178217822, "grad_norm": 0.5941482782363892, "learning_rate": 8.806589903117384e-05, "loss": 0.4896, "step": 31600 }, { "epoch": 7.824257425742574, "grad_norm": 0.6038246750831604, "learning_rate": 8.805696363083562e-05, "loss": 0.4834, "step": 31610 }, { "epoch": 7.826732673267327, "grad_norm": 0.5711467862129211, "learning_rate": 8.804802534032216e-05, "loss": 0.4828, "step": 31620 }, { "epoch": 7.829207920792079, "grad_norm": 0.6106674671173096, "learning_rate": 8.803908416031228e-05, "loss": 0.4877, "step": 31630 }, { "epoch": 7.8316831683168315, "grad_norm": 0.5810605883598328, "learning_rate": 8.803014009148498e-05, "loss": 0.4829, "step": 31640 }, { "epoch": 7.834158415841584, "grad_norm": 0.6207130551338196, "learning_rate": 8.802119313451953e-05, "loss": 0.481, "step": 31650 }, { "epoch": 7.836633663366337, "grad_norm": 0.582776665687561, "learning_rate": 8.801224329009538e-05, "loss": 0.4904, "step": 31660 }, { "epoch": 7.839108910891089, "grad_norm": 0.594566285610199, "learning_rate": 8.800329055889223e-05, "loss": 0.4835, "step": 31670 }, { "epoch": 7.841584158415841, "grad_norm": 0.6165512800216675, "learning_rate": 8.799433494158992e-05, "loss": 0.4823, "step": 31680 }, { "epoch": 7.844059405940594, "grad_norm": 0.6469323039054871, "learning_rate": 8.798537643886863e-05, "loss": 0.4851, "step": 31690 }, { "epoch": 7.846534653465347, "grad_norm": 0.6109750270843506, "learning_rate": 8.79764150514087e-05, "loss": 0.4816, "step": 31700 }, { "epoch": 7.849009900990099, "grad_norm": 0.5814009308815002, "learning_rate": 8.796745077989066e-05, "loss": 0.4837, "step": 31710 }, { "epoch": 7.851485148514851, "grad_norm": 0.5878190398216248, "learning_rate": 8.795848362499528e-05, "loss": 0.4837, "step": 31720 }, { "epoch": 7.853960396039604, "grad_norm": 0.6091416478157043, "learning_rate": 8.794951358740358e-05, "loss": 0.4863, "step": 31730 }, { "epoch": 7.856435643564357, "grad_norm": 0.5920549035072327, "learning_rate": 8.794054066779678e-05, "loss": 0.4792, "step": 31740 }, { "epoch": 7.858910891089109, "grad_norm": 0.629000186920166, "learning_rate": 8.79315648668563e-05, "loss": 0.4836, "step": 31750 }, { "epoch": 7.861386138613861, "grad_norm": 0.5790669322013855, "learning_rate": 8.792258618526378e-05, "loss": 0.487, "step": 31760 }, { "epoch": 7.8638613861386135, "grad_norm": 0.6109678149223328, "learning_rate": 8.791360462370113e-05, "loss": 0.4856, "step": 31770 }, { "epoch": 7.866336633663367, "grad_norm": 0.6583126783370972, "learning_rate": 8.790462018285039e-05, "loss": 0.4841, "step": 31780 }, { "epoch": 7.868811881188119, "grad_norm": 0.6227678060531616, "learning_rate": 8.78956328633939e-05, "loss": 0.4833, "step": 31790 }, { "epoch": 7.871287128712871, "grad_norm": 0.5988368391990662, "learning_rate": 8.78866426660142e-05, "loss": 0.4801, "step": 31800 }, { "epoch": 7.873762376237623, "grad_norm": 0.6012871265411377, "learning_rate": 8.787764959139402e-05, "loss": 0.4889, "step": 31810 }, { "epoch": 7.876237623762377, "grad_norm": 0.588335394859314, "learning_rate": 8.786865364021631e-05, "loss": 0.4809, "step": 31820 }, { "epoch": 7.878712871287129, "grad_norm": 0.5995628833770752, "learning_rate": 8.785965481316428e-05, "loss": 0.4832, "step": 31830 }, { "epoch": 7.881188118811881, "grad_norm": 0.6256629824638367, "learning_rate": 8.785065311092131e-05, "loss": 0.4836, "step": 31840 }, { "epoch": 7.883663366336633, "grad_norm": 0.6022384762763977, "learning_rate": 8.784164853417103e-05, "loss": 0.4839, "step": 31850 }, { "epoch": 7.8861386138613865, "grad_norm": 0.5454156398773193, "learning_rate": 8.783264108359728e-05, "loss": 0.4868, "step": 31860 }, { "epoch": 7.888613861386139, "grad_norm": 0.5845124125480652, "learning_rate": 8.782363075988412e-05, "loss": 0.4807, "step": 31870 }, { "epoch": 7.891089108910891, "grad_norm": 0.61656254529953, "learning_rate": 8.78146175637158e-05, "loss": 0.486, "step": 31880 }, { "epoch": 7.893564356435643, "grad_norm": 0.6319636702537537, "learning_rate": 8.780560149577683e-05, "loss": 0.4825, "step": 31890 }, { "epoch": 7.896039603960396, "grad_norm": 0.5542703866958618, "learning_rate": 8.779658255675194e-05, "loss": 0.4866, "step": 31900 }, { "epoch": 7.898514851485149, "grad_norm": 0.601127564907074, "learning_rate": 8.778756074732603e-05, "loss": 0.4809, "step": 31910 }, { "epoch": 7.900990099009901, "grad_norm": 0.5669345259666443, "learning_rate": 8.777853606818425e-05, "loss": 0.4829, "step": 31920 }, { "epoch": 7.903465346534653, "grad_norm": 0.5753805041313171, "learning_rate": 8.7769508520012e-05, "loss": 0.4893, "step": 31930 }, { "epoch": 7.905940594059406, "grad_norm": 0.6069372296333313, "learning_rate": 8.776047810349481e-05, "loss": 0.4867, "step": 31940 }, { "epoch": 7.908415841584159, "grad_norm": 0.5937450528144836, "learning_rate": 8.775144481931851e-05, "loss": 0.4792, "step": 31950 }, { "epoch": 7.910891089108911, "grad_norm": 0.597410261631012, "learning_rate": 8.774240866816912e-05, "loss": 0.4822, "step": 31960 }, { "epoch": 7.913366336633663, "grad_norm": 0.6449050307273865, "learning_rate": 8.773336965073287e-05, "loss": 0.4816, "step": 31970 }, { "epoch": 7.915841584158416, "grad_norm": 0.6010563373565674, "learning_rate": 8.77243277676962e-05, "loss": 0.4837, "step": 31980 }, { "epoch": 7.9183168316831685, "grad_norm": 0.579889178276062, "learning_rate": 8.77152830197458e-05, "loss": 0.4823, "step": 31990 }, { "epoch": 7.920792079207921, "grad_norm": 0.6010478138923645, "learning_rate": 8.770623540756858e-05, "loss": 0.4814, "step": 32000 }, { "epoch": 7.923267326732673, "grad_norm": 0.5702736377716064, "learning_rate": 8.769718493185158e-05, "loss": 0.4832, "step": 32010 }, { "epoch": 7.925742574257426, "grad_norm": 0.6106717586517334, "learning_rate": 8.76881315932822e-05, "loss": 0.4849, "step": 32020 }, { "epoch": 7.928217821782178, "grad_norm": 0.5972067713737488, "learning_rate": 8.767907539254792e-05, "loss": 0.4837, "step": 32030 }, { "epoch": 7.930693069306931, "grad_norm": 0.5918044447898865, "learning_rate": 8.767001633033653e-05, "loss": 0.4864, "step": 32040 }, { "epoch": 7.933168316831683, "grad_norm": 0.5852305889129639, "learning_rate": 8.766095440733601e-05, "loss": 0.4877, "step": 32050 }, { "epoch": 7.935643564356436, "grad_norm": 0.582021951675415, "learning_rate": 8.765188962423453e-05, "loss": 0.4828, "step": 32060 }, { "epoch": 7.938118811881188, "grad_norm": 0.5953163504600525, "learning_rate": 8.764282198172051e-05, "loss": 0.4776, "step": 32070 }, { "epoch": 7.9405940594059405, "grad_norm": 0.5861048102378845, "learning_rate": 8.763375148048261e-05, "loss": 0.4814, "step": 32080 }, { "epoch": 7.943069306930693, "grad_norm": 0.6002243757247925, "learning_rate": 8.762467812120963e-05, "loss": 0.4889, "step": 32090 }, { "epoch": 7.945544554455445, "grad_norm": 0.6289429664611816, "learning_rate": 8.761560190459064e-05, "loss": 0.4822, "step": 32100 }, { "epoch": 7.948019801980198, "grad_norm": 0.5987977385520935, "learning_rate": 8.760652283131493e-05, "loss": 0.4856, "step": 32110 }, { "epoch": 7.9504950495049505, "grad_norm": 0.6380545496940613, "learning_rate": 8.7597440902072e-05, "loss": 0.4807, "step": 32120 }, { "epoch": 7.952970297029703, "grad_norm": 0.5740141868591309, "learning_rate": 8.758835611755153e-05, "loss": 0.4833, "step": 32130 }, { "epoch": 7.955445544554456, "grad_norm": 0.5569949746131897, "learning_rate": 8.757926847844351e-05, "loss": 0.4768, "step": 32140 }, { "epoch": 7.957920792079208, "grad_norm": 0.5746740698814392, "learning_rate": 8.757017798543802e-05, "loss": 0.478, "step": 32150 }, { "epoch": 7.96039603960396, "grad_norm": 0.6481715440750122, "learning_rate": 8.756108463922548e-05, "loss": 0.4838, "step": 32160 }, { "epoch": 7.962871287128713, "grad_norm": 0.5865947604179382, "learning_rate": 8.755198844049643e-05, "loss": 0.4864, "step": 32170 }, { "epoch": 7.965346534653465, "grad_norm": 0.6013561487197876, "learning_rate": 8.754288938994168e-05, "loss": 0.484, "step": 32180 }, { "epoch": 7.967821782178218, "grad_norm": 0.5811010003089905, "learning_rate": 8.753378748825225e-05, "loss": 0.4866, "step": 32190 }, { "epoch": 7.97029702970297, "grad_norm": 0.5805180072784424, "learning_rate": 8.752468273611936e-05, "loss": 0.4832, "step": 32200 }, { "epoch": 7.9727722772277225, "grad_norm": 0.5940776467323303, "learning_rate": 8.751557513423445e-05, "loss": 0.4842, "step": 32210 }, { "epoch": 7.975247524752476, "grad_norm": 0.5969387292861938, "learning_rate": 8.750646468328919e-05, "loss": 0.4826, "step": 32220 }, { "epoch": 7.977722772277228, "grad_norm": 0.5654346942901611, "learning_rate": 8.749735138397546e-05, "loss": 0.4831, "step": 32230 }, { "epoch": 7.98019801980198, "grad_norm": 0.5692369937896729, "learning_rate": 8.748823523698535e-05, "loss": 0.4836, "step": 32240 }, { "epoch": 7.982673267326732, "grad_norm": 0.616218626499176, "learning_rate": 8.74791162430112e-05, "loss": 0.4888, "step": 32250 }, { "epoch": 7.985148514851485, "grad_norm": 0.6163007616996765, "learning_rate": 8.746999440274548e-05, "loss": 0.4813, "step": 32260 }, { "epoch": 7.987623762376238, "grad_norm": 0.5921193957328796, "learning_rate": 8.746086971688097e-05, "loss": 0.4818, "step": 32270 }, { "epoch": 7.99009900990099, "grad_norm": 0.6083107590675354, "learning_rate": 8.745174218611062e-05, "loss": 0.4829, "step": 32280 }, { "epoch": 7.992574257425742, "grad_norm": 0.629410982131958, "learning_rate": 8.744261181112765e-05, "loss": 0.4823, "step": 32290 }, { "epoch": 7.9950495049504955, "grad_norm": 0.6301906108856201, "learning_rate": 8.743347859262538e-05, "loss": 0.4774, "step": 32300 }, { "epoch": 7.997524752475248, "grad_norm": 0.6336489915847778, "learning_rate": 8.742434253129746e-05, "loss": 0.4906, "step": 32310 }, { "epoch": 8.0, "grad_norm": 0.6276483535766602, "learning_rate": 8.74152036278377e-05, "loss": 0.4846, "step": 32320 }, { "epoch": 8.002475247524753, "grad_norm": 0.6299055218696594, "learning_rate": 8.740606188294013e-05, "loss": 0.4805, "step": 32330 }, { "epoch": 8.004950495049505, "grad_norm": 0.5854093432426453, "learning_rate": 8.739691729729905e-05, "loss": 0.4796, "step": 32340 }, { "epoch": 8.007425742574258, "grad_norm": 0.5928853154182434, "learning_rate": 8.738776987160887e-05, "loss": 0.4822, "step": 32350 }, { "epoch": 8.009900990099009, "grad_norm": 0.5800935626029968, "learning_rate": 8.737861960656431e-05, "loss": 0.4848, "step": 32360 }, { "epoch": 8.012376237623762, "grad_norm": 0.6004745364189148, "learning_rate": 8.736946650286029e-05, "loss": 0.4851, "step": 32370 }, { "epoch": 8.014851485148515, "grad_norm": 0.5707614421844482, "learning_rate": 8.736031056119188e-05, "loss": 0.4859, "step": 32380 }, { "epoch": 8.017326732673267, "grad_norm": 0.5745618939399719, "learning_rate": 8.735115178225446e-05, "loss": 0.4851, "step": 32390 }, { "epoch": 8.01980198019802, "grad_norm": 0.557974100112915, "learning_rate": 8.734199016674355e-05, "loss": 0.4814, "step": 32400 }, { "epoch": 8.022277227722773, "grad_norm": 0.6027185916900635, "learning_rate": 8.733282571535492e-05, "loss": 0.4848, "step": 32410 }, { "epoch": 8.024752475247524, "grad_norm": 0.5998392701148987, "learning_rate": 8.732365842878455e-05, "loss": 0.496, "step": 32420 }, { "epoch": 8.027227722772277, "grad_norm": 0.5849689841270447, "learning_rate": 8.731448830772864e-05, "loss": 0.4854, "step": 32430 }, { "epoch": 8.029702970297029, "grad_norm": 0.5900030136108398, "learning_rate": 8.73053153528836e-05, "loss": 0.4861, "step": 32440 }, { "epoch": 8.032178217821782, "grad_norm": 0.5724604725837708, "learning_rate": 8.729613956494605e-05, "loss": 0.4799, "step": 32450 }, { "epoch": 8.034653465346535, "grad_norm": 0.607262909412384, "learning_rate": 8.728696094461284e-05, "loss": 0.4806, "step": 32460 }, { "epoch": 8.037128712871286, "grad_norm": 0.5603898763656616, "learning_rate": 8.727777949258101e-05, "loss": 0.4857, "step": 32470 }, { "epoch": 8.03960396039604, "grad_norm": 0.6350128650665283, "learning_rate": 8.726859520954785e-05, "loss": 0.4827, "step": 32480 }, { "epoch": 8.042079207920793, "grad_norm": 0.5755431056022644, "learning_rate": 8.725940809621082e-05, "loss": 0.489, "step": 32490 }, { "epoch": 8.044554455445544, "grad_norm": 0.6003754138946533, "learning_rate": 8.725021815326763e-05, "loss": 0.4848, "step": 32500 }, { "epoch": 8.047029702970297, "grad_norm": 0.5944796204566956, "learning_rate": 8.724102538141623e-05, "loss": 0.4849, "step": 32510 }, { "epoch": 8.049504950495049, "grad_norm": 0.6095154285430908, "learning_rate": 8.72318297813547e-05, "loss": 0.4846, "step": 32520 }, { "epoch": 8.051980198019802, "grad_norm": 0.6020580530166626, "learning_rate": 8.722263135378142e-05, "loss": 0.4825, "step": 32530 }, { "epoch": 8.054455445544555, "grad_norm": 0.568230926990509, "learning_rate": 8.721343009939494e-05, "loss": 0.482, "step": 32540 }, { "epoch": 8.056930693069306, "grad_norm": 0.5593418478965759, "learning_rate": 8.7204226018894e-05, "loss": 0.4815, "step": 32550 }, { "epoch": 8.05940594059406, "grad_norm": 0.599988579750061, "learning_rate": 8.719501911297765e-05, "loss": 0.4875, "step": 32560 }, { "epoch": 8.061881188118813, "grad_norm": 0.6031679511070251, "learning_rate": 8.718580938234507e-05, "loss": 0.4879, "step": 32570 }, { "epoch": 8.064356435643564, "grad_norm": 0.5846736431121826, "learning_rate": 8.717659682769564e-05, "loss": 0.4844, "step": 32580 }, { "epoch": 8.066831683168317, "grad_norm": 0.558612585067749, "learning_rate": 8.716738144972905e-05, "loss": 0.4836, "step": 32590 }, { "epoch": 8.069306930693068, "grad_norm": 0.5876027941703796, "learning_rate": 8.715816324914511e-05, "loss": 0.4812, "step": 32600 }, { "epoch": 8.071782178217822, "grad_norm": 0.6445446610450745, "learning_rate": 8.71489422266439e-05, "loss": 0.4822, "step": 32610 }, { "epoch": 8.074257425742575, "grad_norm": 0.6000902056694031, "learning_rate": 8.713971838292569e-05, "loss": 0.4811, "step": 32620 }, { "epoch": 8.076732673267326, "grad_norm": 0.5514535903930664, "learning_rate": 8.713049171869097e-05, "loss": 0.4848, "step": 32630 }, { "epoch": 8.07920792079208, "grad_norm": 0.5829468369483948, "learning_rate": 8.712126223464044e-05, "loss": 0.4829, "step": 32640 }, { "epoch": 8.081683168316832, "grad_norm": 0.5782060027122498, "learning_rate": 8.711202993147503e-05, "loss": 0.4834, "step": 32650 }, { "epoch": 8.084158415841584, "grad_norm": 0.597693681716919, "learning_rate": 8.710279480989584e-05, "loss": 0.479, "step": 32660 }, { "epoch": 8.086633663366337, "grad_norm": 0.6293655633926392, "learning_rate": 8.709355687060428e-05, "loss": 0.4831, "step": 32670 }, { "epoch": 8.089108910891088, "grad_norm": 0.5583171844482422, "learning_rate": 8.708431611430186e-05, "loss": 0.4872, "step": 32680 }, { "epoch": 8.091584158415841, "grad_norm": 0.6032898426055908, "learning_rate": 8.707507254169035e-05, "loss": 0.4734, "step": 32690 }, { "epoch": 8.094059405940595, "grad_norm": 0.5708300471305847, "learning_rate": 8.706582615347176e-05, "loss": 0.4854, "step": 32700 }, { "epoch": 8.096534653465346, "grad_norm": 0.6051654815673828, "learning_rate": 8.70565769503483e-05, "loss": 0.481, "step": 32710 }, { "epoch": 8.099009900990099, "grad_norm": 0.6045175194740295, "learning_rate": 8.704732493302235e-05, "loss": 0.4852, "step": 32720 }, { "epoch": 8.101485148514852, "grad_norm": 0.6015220880508423, "learning_rate": 8.703807010219658e-05, "loss": 0.4828, "step": 32730 }, { "epoch": 8.103960396039604, "grad_norm": 0.5844754576683044, "learning_rate": 8.702881245857381e-05, "loss": 0.4851, "step": 32740 }, { "epoch": 8.106435643564357, "grad_norm": 0.5808501839637756, "learning_rate": 8.701955200285711e-05, "loss": 0.4843, "step": 32750 }, { "epoch": 8.108910891089108, "grad_norm": 0.5863643884658813, "learning_rate": 8.701028873574973e-05, "loss": 0.4798, "step": 32760 }, { "epoch": 8.111386138613861, "grad_norm": 0.5772769451141357, "learning_rate": 8.700102265795517e-05, "loss": 0.4787, "step": 32770 }, { "epoch": 8.113861386138614, "grad_norm": 0.6269751787185669, "learning_rate": 8.699175377017714e-05, "loss": 0.4879, "step": 32780 }, { "epoch": 8.116336633663366, "grad_norm": 0.5465086102485657, "learning_rate": 8.698248207311952e-05, "loss": 0.482, "step": 32790 }, { "epoch": 8.118811881188119, "grad_norm": 0.6040816903114319, "learning_rate": 8.697320756748646e-05, "loss": 0.484, "step": 32800 }, { "epoch": 8.121287128712872, "grad_norm": 0.5697169899940491, "learning_rate": 8.696393025398229e-05, "loss": 0.4825, "step": 32810 }, { "epoch": 8.123762376237623, "grad_norm": 0.566987931728363, "learning_rate": 8.695465013331155e-05, "loss": 0.4803, "step": 32820 }, { "epoch": 8.126237623762377, "grad_norm": 0.5693660378456116, "learning_rate": 8.6945367206179e-05, "loss": 0.4807, "step": 32830 }, { "epoch": 8.128712871287128, "grad_norm": 0.5912297368049622, "learning_rate": 8.693608147328964e-05, "loss": 0.4855, "step": 32840 }, { "epoch": 8.131188118811881, "grad_norm": 0.6061752438545227, "learning_rate": 8.692679293534865e-05, "loss": 0.4779, "step": 32850 }, { "epoch": 8.133663366336634, "grad_norm": 0.6253434419631958, "learning_rate": 8.691750159306142e-05, "loss": 0.4809, "step": 32860 }, { "epoch": 8.136138613861386, "grad_norm": 0.5865617990493774, "learning_rate": 8.69082074471336e-05, "loss": 0.4805, "step": 32870 }, { "epoch": 8.138613861386139, "grad_norm": 0.5756465792655945, "learning_rate": 8.6898910498271e-05, "loss": 0.4809, "step": 32880 }, { "epoch": 8.141089108910892, "grad_norm": 0.6021357774734497, "learning_rate": 8.688961074717962e-05, "loss": 0.4886, "step": 32890 }, { "epoch": 8.143564356435643, "grad_norm": 0.5753828883171082, "learning_rate": 8.688030819456578e-05, "loss": 0.4794, "step": 32900 }, { "epoch": 8.146039603960396, "grad_norm": 0.6147698760032654, "learning_rate": 8.687100284113594e-05, "loss": 0.483, "step": 32910 }, { "epoch": 8.148514851485148, "grad_norm": 0.6263213753700256, "learning_rate": 8.686169468759673e-05, "loss": 0.4827, "step": 32920 }, { "epoch": 8.150990099009901, "grad_norm": 0.5805141925811768, "learning_rate": 8.68523837346551e-05, "loss": 0.4772, "step": 32930 }, { "epoch": 8.153465346534654, "grad_norm": 0.5589654445648193, "learning_rate": 8.68430699830181e-05, "loss": 0.4795, "step": 32940 }, { "epoch": 8.155940594059405, "grad_norm": 0.596337080001831, "learning_rate": 8.683375343339311e-05, "loss": 0.485, "step": 32950 }, { "epoch": 8.158415841584159, "grad_norm": 0.5874701738357544, "learning_rate": 8.682443408648762e-05, "loss": 0.4862, "step": 32960 }, { "epoch": 8.160891089108912, "grad_norm": 0.630111813545227, "learning_rate": 8.681511194300936e-05, "loss": 0.4855, "step": 32970 }, { "epoch": 8.163366336633663, "grad_norm": 0.6109662055969238, "learning_rate": 8.680578700366633e-05, "loss": 0.4845, "step": 32980 }, { "epoch": 8.165841584158416, "grad_norm": 0.5882648825645447, "learning_rate": 8.679645926916667e-05, "loss": 0.4821, "step": 32990 }, { "epoch": 8.168316831683168, "grad_norm": 0.6001142263412476, "learning_rate": 8.678712874021874e-05, "loss": 0.4816, "step": 33000 }, { "epoch": 8.17079207920792, "grad_norm": 0.6406720280647278, "learning_rate": 8.67777954175312e-05, "loss": 0.4881, "step": 33010 }, { "epoch": 8.173267326732674, "grad_norm": 0.5853290557861328, "learning_rate": 8.676845930181278e-05, "loss": 0.48, "step": 33020 }, { "epoch": 8.175742574257425, "grad_norm": 0.5820586085319519, "learning_rate": 8.675912039377253e-05, "loss": 0.487, "step": 33030 }, { "epoch": 8.178217821782178, "grad_norm": 0.5598057508468628, "learning_rate": 8.674977869411968e-05, "loss": 0.4837, "step": 33040 }, { "epoch": 8.180693069306932, "grad_norm": 0.6156356930732727, "learning_rate": 8.674043420356366e-05, "loss": 0.4825, "step": 33050 }, { "epoch": 8.183168316831683, "grad_norm": 0.6540920734405518, "learning_rate": 8.673108692281413e-05, "loss": 0.4895, "step": 33060 }, { "epoch": 8.185643564356436, "grad_norm": 0.6086702346801758, "learning_rate": 8.672173685258096e-05, "loss": 0.4833, "step": 33070 }, { "epoch": 8.188118811881187, "grad_norm": 0.5957081913948059, "learning_rate": 8.671238399357421e-05, "loss": 0.4843, "step": 33080 }, { "epoch": 8.19059405940594, "grad_norm": 0.6323955059051514, "learning_rate": 8.670302834650418e-05, "loss": 0.477, "step": 33090 }, { "epoch": 8.193069306930694, "grad_norm": 0.6012907028198242, "learning_rate": 8.669366991208137e-05, "loss": 0.4907, "step": 33100 }, { "epoch": 8.195544554455445, "grad_norm": 0.6224728226661682, "learning_rate": 8.668430869101647e-05, "loss": 0.4826, "step": 33110 }, { "epoch": 8.198019801980198, "grad_norm": 0.6003115177154541, "learning_rate": 8.667494468402044e-05, "loss": 0.4842, "step": 33120 }, { "epoch": 8.200495049504951, "grad_norm": 0.574729859828949, "learning_rate": 8.666557789180438e-05, "loss": 0.4825, "step": 33130 }, { "epoch": 8.202970297029703, "grad_norm": 0.6222413182258606, "learning_rate": 8.665620831507966e-05, "loss": 0.4854, "step": 33140 }, { "epoch": 8.205445544554456, "grad_norm": 0.579138457775116, "learning_rate": 8.664683595455782e-05, "loss": 0.4815, "step": 33150 }, { "epoch": 8.207920792079207, "grad_norm": 0.5768486857414246, "learning_rate": 8.663746081095065e-05, "loss": 0.4828, "step": 33160 }, { "epoch": 8.21039603960396, "grad_norm": 0.5881670713424683, "learning_rate": 8.662808288497011e-05, "loss": 0.4851, "step": 33170 }, { "epoch": 8.212871287128714, "grad_norm": 0.6303059458732605, "learning_rate": 8.661870217732841e-05, "loss": 0.4821, "step": 33180 }, { "epoch": 8.215346534653465, "grad_norm": 0.5958699584007263, "learning_rate": 8.660931868873793e-05, "loss": 0.4864, "step": 33190 }, { "epoch": 8.217821782178218, "grad_norm": 0.5899435877799988, "learning_rate": 8.659993241991129e-05, "loss": 0.4856, "step": 33200 }, { "epoch": 8.220297029702971, "grad_norm": 0.6032819747924805, "learning_rate": 8.659054337156134e-05, "loss": 0.4844, "step": 33210 }, { "epoch": 8.222772277227723, "grad_norm": 0.5885598063468933, "learning_rate": 8.658115154440108e-05, "loss": 0.4856, "step": 33220 }, { "epoch": 8.225247524752476, "grad_norm": 0.5850486755371094, "learning_rate": 8.657175693914379e-05, "loss": 0.4836, "step": 33230 }, { "epoch": 8.227722772277227, "grad_norm": 0.6056098937988281, "learning_rate": 8.65623595565029e-05, "loss": 0.4856, "step": 33240 }, { "epoch": 8.23019801980198, "grad_norm": 0.6206510663032532, "learning_rate": 8.65529593971921e-05, "loss": 0.4807, "step": 33250 }, { "epoch": 8.232673267326733, "grad_norm": 0.5846316814422607, "learning_rate": 8.654355646192526e-05, "loss": 0.4813, "step": 33260 }, { "epoch": 8.235148514851485, "grad_norm": 0.5634509921073914, "learning_rate": 8.653415075141646e-05, "loss": 0.4831, "step": 33270 }, { "epoch": 8.237623762376238, "grad_norm": 0.5785375833511353, "learning_rate": 8.652474226638005e-05, "loss": 0.4826, "step": 33280 }, { "epoch": 8.240099009900991, "grad_norm": 0.5559127330780029, "learning_rate": 8.651533100753049e-05, "loss": 0.4863, "step": 33290 }, { "epoch": 8.242574257425742, "grad_norm": 0.5647929906845093, "learning_rate": 8.65059169755825e-05, "loss": 0.4812, "step": 33300 }, { "epoch": 8.245049504950495, "grad_norm": 0.5649881362915039, "learning_rate": 8.649650017125105e-05, "loss": 0.4839, "step": 33310 }, { "epoch": 8.247524752475247, "grad_norm": 0.5859998464584351, "learning_rate": 8.648708059525127e-05, "loss": 0.4878, "step": 33320 }, { "epoch": 8.25, "grad_norm": 0.5773548483848572, "learning_rate": 8.647765824829851e-05, "loss": 0.4806, "step": 33330 }, { "epoch": 8.252475247524753, "grad_norm": 0.5975430607795715, "learning_rate": 8.646823313110834e-05, "loss": 0.4819, "step": 33340 }, { "epoch": 8.254950495049505, "grad_norm": 0.6181514263153076, "learning_rate": 8.645880524439653e-05, "loss": 0.4785, "step": 33350 }, { "epoch": 8.257425742574258, "grad_norm": 0.6329936981201172, "learning_rate": 8.644937458887908e-05, "loss": 0.4861, "step": 33360 }, { "epoch": 8.259900990099009, "grad_norm": 0.5902003049850464, "learning_rate": 8.643994116527214e-05, "loss": 0.4801, "step": 33370 }, { "epoch": 8.262376237623762, "grad_norm": 0.5629798173904419, "learning_rate": 8.64305049742922e-05, "loss": 0.4767, "step": 33380 }, { "epoch": 8.264851485148515, "grad_norm": 0.5913378000259399, "learning_rate": 8.642106601665578e-05, "loss": 0.4863, "step": 33390 }, { "epoch": 8.267326732673267, "grad_norm": 0.5933917760848999, "learning_rate": 8.641162429307978e-05, "loss": 0.4823, "step": 33400 }, { "epoch": 8.26980198019802, "grad_norm": 0.5899003148078918, "learning_rate": 8.640217980428121e-05, "loss": 0.4818, "step": 33410 }, { "epoch": 8.272277227722773, "grad_norm": 0.6195704936981201, "learning_rate": 8.639273255097731e-05, "loss": 0.4843, "step": 33420 }, { "epoch": 8.274752475247524, "grad_norm": 0.5896918177604675, "learning_rate": 8.638328253388556e-05, "loss": 0.4814, "step": 33430 }, { "epoch": 8.277227722772277, "grad_norm": 0.6223909258842468, "learning_rate": 8.63738297537236e-05, "loss": 0.4821, "step": 33440 }, { "epoch": 8.27970297029703, "grad_norm": 0.5892067551612854, "learning_rate": 8.63643742112093e-05, "loss": 0.4873, "step": 33450 }, { "epoch": 8.282178217821782, "grad_norm": 0.6002938151359558, "learning_rate": 8.63549159070608e-05, "loss": 0.4883, "step": 33460 }, { "epoch": 8.284653465346535, "grad_norm": 0.5893726348876953, "learning_rate": 8.634545484199633e-05, "loss": 0.4739, "step": 33470 }, { "epoch": 8.287128712871286, "grad_norm": 0.5667422413825989, "learning_rate": 8.633599101673445e-05, "loss": 0.4819, "step": 33480 }, { "epoch": 8.28960396039604, "grad_norm": 0.6200572848320007, "learning_rate": 8.632652443199382e-05, "loss": 0.484, "step": 33490 }, { "epoch": 8.292079207920793, "grad_norm": 0.5919725298881531, "learning_rate": 8.63170550884934e-05, "loss": 0.481, "step": 33500 }, { "epoch": 8.294554455445544, "grad_norm": 0.6020261645317078, "learning_rate": 8.630758298695232e-05, "loss": 0.4866, "step": 33510 }, { "epoch": 8.297029702970297, "grad_norm": 0.6028928756713867, "learning_rate": 8.629810812808992e-05, "loss": 0.4769, "step": 33520 }, { "epoch": 8.299504950495049, "grad_norm": 0.6369118094444275, "learning_rate": 8.628863051262575e-05, "loss": 0.4829, "step": 33530 }, { "epoch": 8.301980198019802, "grad_norm": 0.631158173084259, "learning_rate": 8.627915014127958e-05, "loss": 0.4789, "step": 33540 }, { "epoch": 8.304455445544555, "grad_norm": 0.5786572098731995, "learning_rate": 8.626966701477139e-05, "loss": 0.4826, "step": 33550 }, { "epoch": 8.306930693069306, "grad_norm": 0.5808649063110352, "learning_rate": 8.626018113382135e-05, "loss": 0.4802, "step": 33560 }, { "epoch": 8.30940594059406, "grad_norm": 0.5869502425193787, "learning_rate": 8.625069249914983e-05, "loss": 0.486, "step": 33570 }, { "epoch": 8.311881188118813, "grad_norm": 0.6212719678878784, "learning_rate": 8.624120111147746e-05, "loss": 0.4833, "step": 33580 }, { "epoch": 8.314356435643564, "grad_norm": 0.5701782703399658, "learning_rate": 8.623170697152504e-05, "loss": 0.4772, "step": 33590 }, { "epoch": 8.316831683168317, "grad_norm": 0.5677880048751831, "learning_rate": 8.622221008001358e-05, "loss": 0.4818, "step": 33600 }, { "epoch": 8.319306930693068, "grad_norm": 0.5601966381072998, "learning_rate": 8.621271043766431e-05, "loss": 0.4841, "step": 33610 }, { "epoch": 8.321782178217822, "grad_norm": 0.5703628063201904, "learning_rate": 8.620320804519866e-05, "loss": 0.4791, "step": 33620 }, { "epoch": 8.324257425742575, "grad_norm": 0.6001012325286865, "learning_rate": 8.61937029033383e-05, "loss": 0.4806, "step": 33630 }, { "epoch": 8.326732673267326, "grad_norm": 0.5557839274406433, "learning_rate": 8.618419501280503e-05, "loss": 0.481, "step": 33640 }, { "epoch": 8.32920792079208, "grad_norm": 0.5928793549537659, "learning_rate": 8.617468437432097e-05, "loss": 0.4778, "step": 33650 }, { "epoch": 8.331683168316832, "grad_norm": 0.6121658086776733, "learning_rate": 8.616517098860835e-05, "loss": 0.4814, "step": 33660 }, { "epoch": 8.334158415841584, "grad_norm": 0.5759149193763733, "learning_rate": 8.615565485638967e-05, "loss": 0.484, "step": 33670 }, { "epoch": 8.336633663366337, "grad_norm": 0.5968608856201172, "learning_rate": 8.61461359783876e-05, "loss": 0.4853, "step": 33680 }, { "epoch": 8.339108910891088, "grad_norm": 0.5539597272872925, "learning_rate": 8.613661435532505e-05, "loss": 0.4799, "step": 33690 }, { "epoch": 8.341584158415841, "grad_norm": 0.5768359303474426, "learning_rate": 8.612708998792514e-05, "loss": 0.483, "step": 33700 }, { "epoch": 8.344059405940595, "grad_norm": 0.5989137291908264, "learning_rate": 8.611756287691114e-05, "loss": 0.4816, "step": 33710 }, { "epoch": 8.346534653465346, "grad_norm": 0.601003885269165, "learning_rate": 8.610803302300661e-05, "loss": 0.4787, "step": 33720 }, { "epoch": 8.349009900990099, "grad_norm": 0.5825487971305847, "learning_rate": 8.609850042693524e-05, "loss": 0.4874, "step": 33730 }, { "epoch": 8.351485148514852, "grad_norm": 0.5947411060333252, "learning_rate": 8.608896508942101e-05, "loss": 0.4859, "step": 33740 }, { "epoch": 8.353960396039604, "grad_norm": 0.574971079826355, "learning_rate": 8.607942701118805e-05, "loss": 0.4824, "step": 33750 }, { "epoch": 8.356435643564357, "grad_norm": 0.6390419602394104, "learning_rate": 8.606988619296071e-05, "loss": 0.4863, "step": 33760 }, { "epoch": 8.358910891089108, "grad_norm": 0.58185875415802, "learning_rate": 8.606034263546355e-05, "loss": 0.4808, "step": 33770 }, { "epoch": 8.361386138613861, "grad_norm": 0.6064139604568481, "learning_rate": 8.605079633942134e-05, "loss": 0.4804, "step": 33780 }, { "epoch": 8.363861386138614, "grad_norm": 0.5752036571502686, "learning_rate": 8.604124730555909e-05, "loss": 0.4828, "step": 33790 }, { "epoch": 8.366336633663366, "grad_norm": 0.5804149508476257, "learning_rate": 8.603169553460194e-05, "loss": 0.4833, "step": 33800 }, { "epoch": 8.368811881188119, "grad_norm": 0.6187556385993958, "learning_rate": 8.602214102727528e-05, "loss": 0.4826, "step": 33810 }, { "epoch": 8.371287128712872, "grad_norm": 0.5630355477333069, "learning_rate": 8.601258378430477e-05, "loss": 0.4818, "step": 33820 }, { "epoch": 8.373762376237623, "grad_norm": 0.5766634941101074, "learning_rate": 8.600302380641616e-05, "loss": 0.4794, "step": 33830 }, { "epoch": 8.376237623762377, "grad_norm": 0.5700511336326599, "learning_rate": 8.59934610943355e-05, "loss": 0.4856, "step": 33840 }, { "epoch": 8.378712871287128, "grad_norm": 0.5575721263885498, "learning_rate": 8.598389564878901e-05, "loss": 0.4809, "step": 33850 }, { "epoch": 8.381188118811881, "grad_norm": 0.6113061308860779, "learning_rate": 8.597432747050311e-05, "loss": 0.4808, "step": 33860 }, { "epoch": 8.383663366336634, "grad_norm": 0.6348544955253601, "learning_rate": 8.596475656020446e-05, "loss": 0.4843, "step": 33870 }, { "epoch": 8.386138613861386, "grad_norm": 0.5505293607711792, "learning_rate": 8.59551829186199e-05, "loss": 0.4786, "step": 33880 }, { "epoch": 8.388613861386139, "grad_norm": 0.5925195217132568, "learning_rate": 8.594560654647645e-05, "loss": 0.4809, "step": 33890 }, { "epoch": 8.391089108910892, "grad_norm": 0.5755539536476135, "learning_rate": 8.593602744450144e-05, "loss": 0.4826, "step": 33900 }, { "epoch": 8.393564356435643, "grad_norm": 0.588625967502594, "learning_rate": 8.592644561342228e-05, "loss": 0.4739, "step": 33910 }, { "epoch": 8.396039603960396, "grad_norm": 0.5565048456192017, "learning_rate": 8.591686105396668e-05, "loss": 0.4792, "step": 33920 }, { "epoch": 8.398514851485148, "grad_norm": 0.5571756958961487, "learning_rate": 8.590727376686251e-05, "loss": 0.4826, "step": 33930 }, { "epoch": 8.400990099009901, "grad_norm": 0.558279812335968, "learning_rate": 8.589768375283786e-05, "loss": 0.4829, "step": 33940 }, { "epoch": 8.403465346534654, "grad_norm": 0.6024053692817688, "learning_rate": 8.588809101262103e-05, "loss": 0.4828, "step": 33950 }, { "epoch": 8.405940594059405, "grad_norm": 0.5634346008300781, "learning_rate": 8.587849554694054e-05, "loss": 0.4783, "step": 33960 }, { "epoch": 8.408415841584159, "grad_norm": 0.5835654139518738, "learning_rate": 8.586889735652509e-05, "loss": 0.4833, "step": 33970 }, { "epoch": 8.410891089108912, "grad_norm": 0.5781476497650146, "learning_rate": 8.58592964421036e-05, "loss": 0.4827, "step": 33980 }, { "epoch": 8.413366336633663, "grad_norm": 0.6059443354606628, "learning_rate": 8.584969280440518e-05, "loss": 0.4838, "step": 33990 }, { "epoch": 8.415841584158416, "grad_norm": 0.5639762282371521, "learning_rate": 8.58400864441592e-05, "loss": 0.4807, "step": 34000 }, { "epoch": 8.418316831683168, "grad_norm": 0.6128673553466797, "learning_rate": 8.583047736209518e-05, "loss": 0.4829, "step": 34010 }, { "epoch": 8.42079207920792, "grad_norm": 0.5743808746337891, "learning_rate": 8.582086555894284e-05, "loss": 0.4825, "step": 34020 }, { "epoch": 8.423267326732674, "grad_norm": 0.5724934935569763, "learning_rate": 8.581125103543217e-05, "loss": 0.482, "step": 34030 }, { "epoch": 8.425742574257425, "grad_norm": 0.5799967646598816, "learning_rate": 8.580163379229332e-05, "loss": 0.4863, "step": 34040 }, { "epoch": 8.428217821782178, "grad_norm": 0.5764375925064087, "learning_rate": 8.579201383025665e-05, "loss": 0.4762, "step": 34050 }, { "epoch": 8.430693069306932, "grad_norm": 0.5777288675308228, "learning_rate": 8.578239115005275e-05, "loss": 0.4746, "step": 34060 }, { "epoch": 8.433168316831683, "grad_norm": 0.5680776238441467, "learning_rate": 8.577276575241237e-05, "loss": 0.4815, "step": 34070 }, { "epoch": 8.435643564356436, "grad_norm": 0.613068699836731, "learning_rate": 8.576313763806651e-05, "loss": 0.4878, "step": 34080 }, { "epoch": 8.438118811881187, "grad_norm": 0.624660313129425, "learning_rate": 8.575350680774638e-05, "loss": 0.4791, "step": 34090 }, { "epoch": 8.44059405940594, "grad_norm": 0.5985419154167175, "learning_rate": 8.574387326218335e-05, "loss": 0.4892, "step": 34100 }, { "epoch": 8.443069306930694, "grad_norm": 0.5710894465446472, "learning_rate": 8.573423700210904e-05, "loss": 0.4764, "step": 34110 }, { "epoch": 8.445544554455445, "grad_norm": 0.567279577255249, "learning_rate": 8.572459802825525e-05, "loss": 0.4827, "step": 34120 }, { "epoch": 8.448019801980198, "grad_norm": 0.6065353155136108, "learning_rate": 8.571495634135401e-05, "loss": 0.4825, "step": 34130 }, { "epoch": 8.450495049504951, "grad_norm": 0.616134762763977, "learning_rate": 8.57053119421375e-05, "loss": 0.4793, "step": 34140 }, { "epoch": 8.452970297029703, "grad_norm": 0.5788555145263672, "learning_rate": 8.569566483133821e-05, "loss": 0.4814, "step": 34150 }, { "epoch": 8.455445544554456, "grad_norm": 0.5875390768051147, "learning_rate": 8.568601500968876e-05, "loss": 0.4824, "step": 34160 }, { "epoch": 8.457920792079207, "grad_norm": 0.5739628672599792, "learning_rate": 8.567636247792194e-05, "loss": 0.4791, "step": 34170 }, { "epoch": 8.46039603960396, "grad_norm": 0.5647267699241638, "learning_rate": 8.566670723677087e-05, "loss": 0.4786, "step": 34180 }, { "epoch": 8.462871287128714, "grad_norm": 0.6087656617164612, "learning_rate": 8.565704928696875e-05, "loss": 0.485, "step": 34190 }, { "epoch": 8.465346534653465, "grad_norm": 0.5549506545066833, "learning_rate": 8.564738862924906e-05, "loss": 0.48, "step": 34200 }, { "epoch": 8.467821782178218, "grad_norm": 0.5820557475090027, "learning_rate": 8.563772526434544e-05, "loss": 0.4826, "step": 34210 }, { "epoch": 8.47029702970297, "grad_norm": 0.547821581363678, "learning_rate": 8.562805919299178e-05, "loss": 0.4808, "step": 34220 }, { "epoch": 8.472772277227723, "grad_norm": 0.6164019107818604, "learning_rate": 8.561839041592216e-05, "loss": 0.4776, "step": 34230 }, { "epoch": 8.475247524752476, "grad_norm": 0.5703572034835815, "learning_rate": 8.560871893387083e-05, "loss": 0.4796, "step": 34240 }, { "epoch": 8.477722772277227, "grad_norm": 0.5894030332565308, "learning_rate": 8.559904474757231e-05, "loss": 0.4809, "step": 34250 }, { "epoch": 8.48019801980198, "grad_norm": 0.5608757138252258, "learning_rate": 8.558936785776127e-05, "loss": 0.4776, "step": 34260 }, { "epoch": 8.482673267326733, "grad_norm": 0.5878713130950928, "learning_rate": 8.557968826517263e-05, "loss": 0.4767, "step": 34270 }, { "epoch": 8.485148514851485, "grad_norm": 0.5874069333076477, "learning_rate": 8.557000597054144e-05, "loss": 0.4861, "step": 34280 }, { "epoch": 8.487623762376238, "grad_norm": 0.5569306015968323, "learning_rate": 8.556032097460304e-05, "loss": 0.4819, "step": 34290 }, { "epoch": 8.490099009900991, "grad_norm": 0.5798599720001221, "learning_rate": 8.555063327809296e-05, "loss": 0.4821, "step": 34300 }, { "epoch": 8.492574257425742, "grad_norm": 0.5722635984420776, "learning_rate": 8.554094288174688e-05, "loss": 0.4768, "step": 34310 }, { "epoch": 8.495049504950495, "grad_norm": 0.5698662996292114, "learning_rate": 8.553124978630075e-05, "loss": 0.4833, "step": 34320 }, { "epoch": 8.497524752475247, "grad_norm": 0.6047122478485107, "learning_rate": 8.552155399249067e-05, "loss": 0.4806, "step": 34330 }, { "epoch": 8.5, "grad_norm": 0.5565168857574463, "learning_rate": 8.5511855501053e-05, "loss": 0.4844, "step": 34340 }, { "epoch": 8.502475247524753, "grad_norm": 0.5918898582458496, "learning_rate": 8.550215431272426e-05, "loss": 0.4867, "step": 34350 }, { "epoch": 8.504950495049505, "grad_norm": 0.5572783946990967, "learning_rate": 8.549245042824119e-05, "loss": 0.4805, "step": 34360 }, { "epoch": 8.507425742574258, "grad_norm": 0.5802083015441895, "learning_rate": 8.548274384834074e-05, "loss": 0.481, "step": 34370 }, { "epoch": 8.509900990099009, "grad_norm": 0.6110864281654358, "learning_rate": 8.547303457376006e-05, "loss": 0.4783, "step": 34380 }, { "epoch": 8.512376237623762, "grad_norm": 0.593909502029419, "learning_rate": 8.54633226052365e-05, "loss": 0.4821, "step": 34390 }, { "epoch": 8.514851485148515, "grad_norm": 0.5554890036582947, "learning_rate": 8.545360794350765e-05, "loss": 0.4804, "step": 34400 }, { "epoch": 8.517326732673267, "grad_norm": 0.5509055852890015, "learning_rate": 8.544389058931123e-05, "loss": 0.4795, "step": 34410 }, { "epoch": 8.51980198019802, "grad_norm": 0.5366295576095581, "learning_rate": 8.543417054338522e-05, "loss": 0.4807, "step": 34420 }, { "epoch": 8.522277227722773, "grad_norm": 0.5827937722206116, "learning_rate": 8.542444780646784e-05, "loss": 0.4788, "step": 34430 }, { "epoch": 8.524752475247524, "grad_norm": 0.6343212723731995, "learning_rate": 8.541472237929739e-05, "loss": 0.4773, "step": 34440 }, { "epoch": 8.527227722772277, "grad_norm": 0.5838939547538757, "learning_rate": 8.54049942626125e-05, "loss": 0.4864, "step": 34450 }, { "epoch": 8.52970297029703, "grad_norm": 0.5798102021217346, "learning_rate": 8.539526345715196e-05, "loss": 0.4845, "step": 34460 }, { "epoch": 8.532178217821782, "grad_norm": 0.5634593367576599, "learning_rate": 8.538552996365474e-05, "loss": 0.4869, "step": 34470 }, { "epoch": 8.534653465346535, "grad_norm": 0.6457482576370239, "learning_rate": 8.537579378286005e-05, "loss": 0.4831, "step": 34480 }, { "epoch": 8.537128712871286, "grad_norm": 0.6252023577690125, "learning_rate": 8.536605491550729e-05, "loss": 0.4785, "step": 34490 }, { "epoch": 8.53960396039604, "grad_norm": 0.6110609769821167, "learning_rate": 8.535631336233604e-05, "loss": 0.4832, "step": 34500 }, { "epoch": 8.542079207920793, "grad_norm": 0.5941082239151001, "learning_rate": 8.534656912408613e-05, "loss": 0.4784, "step": 34510 }, { "epoch": 8.544554455445544, "grad_norm": 0.5805951356887817, "learning_rate": 8.533682220149756e-05, "loss": 0.4798, "step": 34520 }, { "epoch": 8.547029702970297, "grad_norm": 0.5988072156906128, "learning_rate": 8.532707259531055e-05, "loss": 0.4806, "step": 34530 }, { "epoch": 8.549504950495049, "grad_norm": 0.5915064811706543, "learning_rate": 8.531732030626549e-05, "loss": 0.4893, "step": 34540 }, { "epoch": 8.551980198019802, "grad_norm": 0.5699374079704285, "learning_rate": 8.530756533510306e-05, "loss": 0.4809, "step": 34550 }, { "epoch": 8.554455445544555, "grad_norm": 0.6090176701545715, "learning_rate": 8.529780768256404e-05, "loss": 0.4882, "step": 34560 }, { "epoch": 8.556930693069306, "grad_norm": 0.5634792447090149, "learning_rate": 8.528804734938947e-05, "loss": 0.4833, "step": 34570 }, { "epoch": 8.55940594059406, "grad_norm": 0.5646975040435791, "learning_rate": 8.527828433632058e-05, "loss": 0.4859, "step": 34580 }, { "epoch": 8.561881188118813, "grad_norm": 0.5482086539268494, "learning_rate": 8.526851864409882e-05, "loss": 0.484, "step": 34590 }, { "epoch": 8.564356435643564, "grad_norm": 0.5794457197189331, "learning_rate": 8.525875027346582e-05, "loss": 0.4813, "step": 34600 }, { "epoch": 8.566831683168317, "grad_norm": 0.5499856472015381, "learning_rate": 8.524897922516342e-05, "loss": 0.4768, "step": 34610 }, { "epoch": 8.569306930693068, "grad_norm": 0.5382864475250244, "learning_rate": 8.523920549993367e-05, "loss": 0.4797, "step": 34620 }, { "epoch": 8.571782178217822, "grad_norm": 0.5600929856300354, "learning_rate": 8.522942909851882e-05, "loss": 0.4806, "step": 34630 }, { "epoch": 8.574257425742575, "grad_norm": 0.5672695636749268, "learning_rate": 8.521965002166132e-05, "loss": 0.4805, "step": 34640 }, { "epoch": 8.576732673267326, "grad_norm": 0.5838015079498291, "learning_rate": 8.520986827010385e-05, "loss": 0.4828, "step": 34650 }, { "epoch": 8.57920792079208, "grad_norm": 0.572490930557251, "learning_rate": 8.520008384458924e-05, "loss": 0.4803, "step": 34660 }, { "epoch": 8.581683168316832, "grad_norm": 0.5420886278152466, "learning_rate": 8.519029674586055e-05, "loss": 0.4847, "step": 34670 }, { "epoch": 8.584158415841584, "grad_norm": 0.5432479381561279, "learning_rate": 8.518050697466107e-05, "loss": 0.4748, "step": 34680 }, { "epoch": 8.586633663366337, "grad_norm": 0.5581375956535339, "learning_rate": 8.517071453173426e-05, "loss": 0.4845, "step": 34690 }, { "epoch": 8.589108910891088, "grad_norm": 0.572810709476471, "learning_rate": 8.516091941782378e-05, "loss": 0.4856, "step": 34700 }, { "epoch": 8.591584158415841, "grad_norm": 0.5405216813087463, "learning_rate": 8.515112163367351e-05, "loss": 0.4785, "step": 34710 }, { "epoch": 8.594059405940595, "grad_norm": 0.6200893521308899, "learning_rate": 8.514132118002754e-05, "loss": 0.4831, "step": 34720 }, { "epoch": 8.596534653465346, "grad_norm": 0.5881765484809875, "learning_rate": 8.513151805763013e-05, "loss": 0.488, "step": 34730 }, { "epoch": 8.599009900990099, "grad_norm": 0.5895168781280518, "learning_rate": 8.512171226722578e-05, "loss": 0.4777, "step": 34740 }, { "epoch": 8.601485148514852, "grad_norm": 0.5881725549697876, "learning_rate": 8.511190380955915e-05, "loss": 0.4819, "step": 34750 }, { "epoch": 8.603960396039604, "grad_norm": 0.5676320195198059, "learning_rate": 8.510209268537515e-05, "loss": 0.4816, "step": 34760 }, { "epoch": 8.606435643564357, "grad_norm": 0.5708620548248291, "learning_rate": 8.509227889541888e-05, "loss": 0.4835, "step": 34770 }, { "epoch": 8.608910891089108, "grad_norm": 0.5514865517616272, "learning_rate": 8.508246244043559e-05, "loss": 0.4817, "step": 34780 }, { "epoch": 8.611386138613861, "grad_norm": 0.5828816294670105, "learning_rate": 8.507264332117082e-05, "loss": 0.4825, "step": 34790 }, { "epoch": 8.613861386138614, "grad_norm": 0.5551228523254395, "learning_rate": 8.506282153837024e-05, "loss": 0.4758, "step": 34800 }, { "epoch": 8.616336633663366, "grad_norm": 0.5576096177101135, "learning_rate": 8.505299709277977e-05, "loss": 0.4795, "step": 34810 }, { "epoch": 8.618811881188119, "grad_norm": 0.6175692081451416, "learning_rate": 8.50431699851455e-05, "loss": 0.4833, "step": 34820 }, { "epoch": 8.621287128712872, "grad_norm": 0.6375464797019958, "learning_rate": 8.503334021621372e-05, "loss": 0.4813, "step": 34830 }, { "epoch": 8.623762376237623, "grad_norm": 0.5965413451194763, "learning_rate": 8.502350778673096e-05, "loss": 0.4804, "step": 34840 }, { "epoch": 8.626237623762377, "grad_norm": 0.5967750549316406, "learning_rate": 8.501367269744393e-05, "loss": 0.4774, "step": 34850 }, { "epoch": 8.628712871287128, "grad_norm": 0.5532430410385132, "learning_rate": 8.500383494909951e-05, "loss": 0.4813, "step": 34860 }, { "epoch": 8.631188118811881, "grad_norm": 0.5237023234367371, "learning_rate": 8.499399454244484e-05, "loss": 0.4777, "step": 34870 }, { "epoch": 8.633663366336634, "grad_norm": 0.5532442331314087, "learning_rate": 8.498415147822723e-05, "loss": 0.488, "step": 34880 }, { "epoch": 8.636138613861386, "grad_norm": 0.5909122824668884, "learning_rate": 8.497430575719418e-05, "loss": 0.4858, "step": 34890 }, { "epoch": 8.638613861386139, "grad_norm": 0.5903182029724121, "learning_rate": 8.496445738009342e-05, "loss": 0.4758, "step": 34900 }, { "epoch": 8.641089108910892, "grad_norm": 0.5433390140533447, "learning_rate": 8.495460634767286e-05, "loss": 0.4822, "step": 34910 }, { "epoch": 8.643564356435643, "grad_norm": 0.5747028589248657, "learning_rate": 8.494475266068065e-05, "loss": 0.4842, "step": 34920 }, { "epoch": 8.646039603960396, "grad_norm": 0.5702351331710815, "learning_rate": 8.493489631986506e-05, "loss": 0.4774, "step": 34930 }, { "epoch": 8.648514851485148, "grad_norm": 0.5397145748138428, "learning_rate": 8.492503732597467e-05, "loss": 0.4798, "step": 34940 }, { "epoch": 8.650990099009901, "grad_norm": 0.5826185345649719, "learning_rate": 8.491517567975817e-05, "loss": 0.484, "step": 34950 }, { "epoch": 8.653465346534654, "grad_norm": 0.6038839817047119, "learning_rate": 8.49053113819645e-05, "loss": 0.4792, "step": 34960 }, { "epoch": 8.655940594059405, "grad_norm": 0.5717385411262512, "learning_rate": 8.489544443334278e-05, "loss": 0.484, "step": 34970 }, { "epoch": 8.658415841584159, "grad_norm": 0.5828934907913208, "learning_rate": 8.488557483464235e-05, "loss": 0.4763, "step": 34980 }, { "epoch": 8.660891089108912, "grad_norm": 0.6056138873100281, "learning_rate": 8.487570258661275e-05, "loss": 0.4764, "step": 34990 }, { "epoch": 8.663366336633663, "grad_norm": 0.5785578489303589, "learning_rate": 8.486582769000369e-05, "loss": 0.4816, "step": 35000 }, { "epoch": 8.665841584158416, "grad_norm": 0.5995500683784485, "learning_rate": 8.48559501455651e-05, "loss": 0.4767, "step": 35010 }, { "epoch": 8.668316831683168, "grad_norm": 0.5786498188972473, "learning_rate": 8.484606995404713e-05, "loss": 0.4795, "step": 35020 }, { "epoch": 8.67079207920792, "grad_norm": 0.5866264700889587, "learning_rate": 8.483618711620011e-05, "loss": 0.4786, "step": 35030 }, { "epoch": 8.673267326732674, "grad_norm": 0.558085560798645, "learning_rate": 8.482630163277459e-05, "loss": 0.4784, "step": 35040 }, { "epoch": 8.675742574257425, "grad_norm": 0.5913869142532349, "learning_rate": 8.481641350452127e-05, "loss": 0.4801, "step": 35050 }, { "epoch": 8.678217821782178, "grad_norm": 0.598893404006958, "learning_rate": 8.480652273219114e-05, "loss": 0.4786, "step": 35060 }, { "epoch": 8.680693069306932, "grad_norm": 0.5566623210906982, "learning_rate": 8.479662931653529e-05, "loss": 0.4763, "step": 35070 }, { "epoch": 8.683168316831683, "grad_norm": 0.603617787361145, "learning_rate": 8.478673325830509e-05, "loss": 0.4789, "step": 35080 }, { "epoch": 8.685643564356436, "grad_norm": 0.5829029083251953, "learning_rate": 8.477683455825207e-05, "loss": 0.4822, "step": 35090 }, { "epoch": 8.688118811881187, "grad_norm": 0.5492386817932129, "learning_rate": 8.476693321712797e-05, "loss": 0.4821, "step": 35100 }, { "epoch": 8.69059405940594, "grad_norm": 0.5697230696678162, "learning_rate": 8.475702923568474e-05, "loss": 0.4807, "step": 35110 }, { "epoch": 8.693069306930694, "grad_norm": 0.5547667145729065, "learning_rate": 8.474712261467452e-05, "loss": 0.4829, "step": 35120 }, { "epoch": 8.695544554455445, "grad_norm": 0.5642207264900208, "learning_rate": 8.473721335484964e-05, "loss": 0.4793, "step": 35130 }, { "epoch": 8.698019801980198, "grad_norm": 0.5560306906700134, "learning_rate": 8.472730145696265e-05, "loss": 0.486, "step": 35140 }, { "epoch": 8.700495049504951, "grad_norm": 0.563907265663147, "learning_rate": 8.471738692176631e-05, "loss": 0.4806, "step": 35150 }, { "epoch": 8.702970297029703, "grad_norm": 0.609128475189209, "learning_rate": 8.470746975001353e-05, "loss": 0.4815, "step": 35160 }, { "epoch": 8.705445544554456, "grad_norm": 0.5488462448120117, "learning_rate": 8.469754994245749e-05, "loss": 0.4814, "step": 35170 }, { "epoch": 8.707920792079207, "grad_norm": 0.5352857112884521, "learning_rate": 8.46876274998515e-05, "loss": 0.4764, "step": 35180 }, { "epoch": 8.71039603960396, "grad_norm": 0.5801853537559509, "learning_rate": 8.467770242294913e-05, "loss": 0.4814, "step": 35190 }, { "epoch": 8.712871287128714, "grad_norm": 0.5749750733375549, "learning_rate": 8.46677747125041e-05, "loss": 0.4812, "step": 35200 }, { "epoch": 8.715346534653465, "grad_norm": 0.5710667371749878, "learning_rate": 8.465784436927038e-05, "loss": 0.481, "step": 35210 }, { "epoch": 8.717821782178218, "grad_norm": 0.576847493648529, "learning_rate": 8.46479113940021e-05, "loss": 0.4765, "step": 35220 }, { "epoch": 8.72029702970297, "grad_norm": 0.6062059998512268, "learning_rate": 8.463797578745359e-05, "loss": 0.4838, "step": 35230 }, { "epoch": 8.722772277227723, "grad_norm": 0.5991623997688293, "learning_rate": 8.462803755037942e-05, "loss": 0.4775, "step": 35240 }, { "epoch": 8.725247524752476, "grad_norm": 0.5630866289138794, "learning_rate": 8.461809668353433e-05, "loss": 0.4791, "step": 35250 }, { "epoch": 8.727722772277227, "grad_norm": 0.5298961400985718, "learning_rate": 8.460815318767325e-05, "loss": 0.485, "step": 35260 }, { "epoch": 8.73019801980198, "grad_norm": 0.5893084406852722, "learning_rate": 8.459820706355136e-05, "loss": 0.4795, "step": 35270 }, { "epoch": 8.732673267326733, "grad_norm": 0.655903160572052, "learning_rate": 8.458825831192392e-05, "loss": 0.4853, "step": 35280 }, { "epoch": 8.735148514851485, "grad_norm": 0.5771281123161316, "learning_rate": 8.457830693354656e-05, "loss": 0.4798, "step": 35290 }, { "epoch": 8.737623762376238, "grad_norm": 0.5646564364433289, "learning_rate": 8.456835292917496e-05, "loss": 0.4756, "step": 35300 }, { "epoch": 8.740099009900991, "grad_norm": 0.5537102222442627, "learning_rate": 8.455839629956512e-05, "loss": 0.4822, "step": 35310 }, { "epoch": 8.742574257425742, "grad_norm": 0.5754542350769043, "learning_rate": 8.454843704547311e-05, "loss": 0.4816, "step": 35320 }, { "epoch": 8.745049504950495, "grad_norm": 0.5751810073852539, "learning_rate": 8.453847516765533e-05, "loss": 0.4796, "step": 35330 }, { "epoch": 8.747524752475247, "grad_norm": 0.5547278523445129, "learning_rate": 8.452851066686829e-05, "loss": 0.4834, "step": 35340 }, { "epoch": 8.75, "grad_norm": 0.6389200091362, "learning_rate": 8.451854354386872e-05, "loss": 0.4759, "step": 35350 }, { "epoch": 8.752475247524753, "grad_norm": 0.5661778450012207, "learning_rate": 8.45085737994136e-05, "loss": 0.4811, "step": 35360 }, { "epoch": 8.754950495049505, "grad_norm": 0.534976065158844, "learning_rate": 8.449860143426001e-05, "loss": 0.4863, "step": 35370 }, { "epoch": 8.757425742574258, "grad_norm": 0.6132557988166809, "learning_rate": 8.448862644916531e-05, "loss": 0.4767, "step": 35380 }, { "epoch": 8.759900990099009, "grad_norm": 0.5795742869377136, "learning_rate": 8.447864884488705e-05, "loss": 0.4841, "step": 35390 }, { "epoch": 8.762376237623762, "grad_norm": 0.5616191625595093, "learning_rate": 8.446866862218295e-05, "loss": 0.4815, "step": 35400 }, { "epoch": 8.764851485148515, "grad_norm": 0.578159511089325, "learning_rate": 8.445868578181095e-05, "loss": 0.4813, "step": 35410 }, { "epoch": 8.767326732673267, "grad_norm": 0.5567719340324402, "learning_rate": 8.444870032452914e-05, "loss": 0.4805, "step": 35420 }, { "epoch": 8.76980198019802, "grad_norm": 0.5628126263618469, "learning_rate": 8.443871225109592e-05, "loss": 0.4811, "step": 35430 }, { "epoch": 8.772277227722773, "grad_norm": 0.5514281392097473, "learning_rate": 8.442872156226976e-05, "loss": 0.4799, "step": 35440 }, { "epoch": 8.774752475247524, "grad_norm": 0.5748667120933533, "learning_rate": 8.44187282588094e-05, "loss": 0.4806, "step": 35450 }, { "epoch": 8.777227722772277, "grad_norm": 0.6081640720367432, "learning_rate": 8.44087323414738e-05, "loss": 0.4807, "step": 35460 }, { "epoch": 8.77970297029703, "grad_norm": 0.574088990688324, "learning_rate": 8.439873381102203e-05, "loss": 0.4813, "step": 35470 }, { "epoch": 8.782178217821782, "grad_norm": 0.5675016641616821, "learning_rate": 8.438873266821344e-05, "loss": 0.4743, "step": 35480 }, { "epoch": 8.784653465346535, "grad_norm": 0.5879091620445251, "learning_rate": 8.437872891380757e-05, "loss": 0.4792, "step": 35490 }, { "epoch": 8.787128712871286, "grad_norm": 0.6172441244125366, "learning_rate": 8.436872254856409e-05, "loss": 0.4776, "step": 35500 }, { "epoch": 8.78960396039604, "grad_norm": 0.5551273822784424, "learning_rate": 8.435871357324297e-05, "loss": 0.4816, "step": 35510 }, { "epoch": 8.792079207920793, "grad_norm": 0.5802384614944458, "learning_rate": 8.434870198860429e-05, "loss": 0.4808, "step": 35520 }, { "epoch": 8.794554455445544, "grad_norm": 0.552532434463501, "learning_rate": 8.433868779540839e-05, "loss": 0.4744, "step": 35530 }, { "epoch": 8.797029702970297, "grad_norm": 0.5392792224884033, "learning_rate": 8.432867099441575e-05, "loss": 0.4774, "step": 35540 }, { "epoch": 8.799504950495049, "grad_norm": 0.5306934714317322, "learning_rate": 8.43186515863871e-05, "loss": 0.4809, "step": 35550 }, { "epoch": 8.801980198019802, "grad_norm": 0.5507979393005371, "learning_rate": 8.430862957208337e-05, "loss": 0.4829, "step": 35560 }, { "epoch": 8.804455445544555, "grad_norm": 0.5883195996284485, "learning_rate": 8.429860495226561e-05, "loss": 0.4806, "step": 35570 }, { "epoch": 8.806930693069306, "grad_norm": 0.5553721785545349, "learning_rate": 8.428857772769517e-05, "loss": 0.4766, "step": 35580 }, { "epoch": 8.80940594059406, "grad_norm": 0.5541555285453796, "learning_rate": 8.427854789913352e-05, "loss": 0.4773, "step": 35590 }, { "epoch": 8.811881188118813, "grad_norm": 0.6040430068969727, "learning_rate": 8.42685154673424e-05, "loss": 0.4795, "step": 35600 }, { "epoch": 8.814356435643564, "grad_norm": 0.578383207321167, "learning_rate": 8.425848043308366e-05, "loss": 0.4812, "step": 35610 }, { "epoch": 8.816831683168317, "grad_norm": 0.5992740392684937, "learning_rate": 8.424844279711943e-05, "loss": 0.4769, "step": 35620 }, { "epoch": 8.819306930693068, "grad_norm": 0.6136582493782043, "learning_rate": 8.423840256021199e-05, "loss": 0.4814, "step": 35630 }, { "epoch": 8.821782178217822, "grad_norm": 0.5903441905975342, "learning_rate": 8.422835972312384e-05, "loss": 0.4745, "step": 35640 }, { "epoch": 8.824257425742575, "grad_norm": 0.5581113696098328, "learning_rate": 8.421831428661764e-05, "loss": 0.4848, "step": 35650 }, { "epoch": 8.826732673267326, "grad_norm": 0.5494892597198486, "learning_rate": 8.42082662514563e-05, "loss": 0.4863, "step": 35660 }, { "epoch": 8.82920792079208, "grad_norm": 0.5701847672462463, "learning_rate": 8.419821561840289e-05, "loss": 0.4823, "step": 35670 }, { "epoch": 8.831683168316832, "grad_norm": 0.5345900654792786, "learning_rate": 8.41881623882207e-05, "loss": 0.4735, "step": 35680 }, { "epoch": 8.834158415841584, "grad_norm": 0.5649595260620117, "learning_rate": 8.417810656167317e-05, "loss": 0.4772, "step": 35690 }, { "epoch": 8.836633663366337, "grad_norm": 0.5881116390228271, "learning_rate": 8.416804813952401e-05, "loss": 0.479, "step": 35700 }, { "epoch": 8.839108910891088, "grad_norm": 0.5407607555389404, "learning_rate": 8.415798712253711e-05, "loss": 0.4846, "step": 35710 }, { "epoch": 8.841584158415841, "grad_norm": 0.5377489328384399, "learning_rate": 8.414792351147649e-05, "loss": 0.4761, "step": 35720 }, { "epoch": 8.844059405940595, "grad_norm": 0.6022247672080994, "learning_rate": 8.413785730710646e-05, "loss": 0.4774, "step": 35730 }, { "epoch": 8.846534653465346, "grad_norm": 0.5451621413230896, "learning_rate": 8.412778851019143e-05, "loss": 0.4762, "step": 35740 }, { "epoch": 8.849009900990099, "grad_norm": 0.5422550439834595, "learning_rate": 8.41177171214961e-05, "loss": 0.4801, "step": 35750 }, { "epoch": 8.851485148514852, "grad_norm": 0.5532928705215454, "learning_rate": 8.410764314178532e-05, "loss": 0.4847, "step": 35760 }, { "epoch": 8.853960396039604, "grad_norm": 0.5527356266975403, "learning_rate": 8.40975665718241e-05, "loss": 0.4786, "step": 35770 }, { "epoch": 8.856435643564357, "grad_norm": 0.567287027835846, "learning_rate": 8.408748741237776e-05, "loss": 0.482, "step": 35780 }, { "epoch": 8.858910891089108, "grad_norm": 0.5817974209785461, "learning_rate": 8.40774056642117e-05, "loss": 0.4799, "step": 35790 }, { "epoch": 8.861386138613861, "grad_norm": 0.591167151927948, "learning_rate": 8.406732132809157e-05, "loss": 0.4807, "step": 35800 }, { "epoch": 8.863861386138614, "grad_norm": 0.5714485049247742, "learning_rate": 8.40572344047832e-05, "loss": 0.4738, "step": 35810 }, { "epoch": 8.866336633663366, "grad_norm": 0.6106954216957092, "learning_rate": 8.404714489505264e-05, "loss": 0.4817, "step": 35820 }, { "epoch": 8.868811881188119, "grad_norm": 0.5554224848747253, "learning_rate": 8.403705279966611e-05, "loss": 0.4844, "step": 35830 }, { "epoch": 8.871287128712872, "grad_norm": 0.5751283168792725, "learning_rate": 8.402695811939006e-05, "loss": 0.482, "step": 35840 }, { "epoch": 8.873762376237623, "grad_norm": 0.5422369837760925, "learning_rate": 8.40168608549911e-05, "loss": 0.4762, "step": 35850 }, { "epoch": 8.876237623762377, "grad_norm": 0.5607504844665527, "learning_rate": 8.400676100723604e-05, "loss": 0.4741, "step": 35860 }, { "epoch": 8.878712871287128, "grad_norm": 0.5935603976249695, "learning_rate": 8.39966585768919e-05, "loss": 0.4834, "step": 35870 }, { "epoch": 8.881188118811881, "grad_norm": 0.5545059442520142, "learning_rate": 8.398655356472591e-05, "loss": 0.4781, "step": 35880 }, { "epoch": 8.883663366336634, "grad_norm": 0.5658924579620361, "learning_rate": 8.397644597150546e-05, "loss": 0.4826, "step": 35890 }, { "epoch": 8.886138613861386, "grad_norm": 0.5743481516838074, "learning_rate": 8.39663357979982e-05, "loss": 0.4788, "step": 35900 }, { "epoch": 8.888613861386139, "grad_norm": 0.5365190505981445, "learning_rate": 8.395622304497185e-05, "loss": 0.4753, "step": 35910 }, { "epoch": 8.891089108910892, "grad_norm": 0.5726655125617981, "learning_rate": 8.394610771319447e-05, "loss": 0.484, "step": 35920 }, { "epoch": 8.893564356435643, "grad_norm": 0.5946376323699951, "learning_rate": 8.393598980343422e-05, "loss": 0.4794, "step": 35930 }, { "epoch": 8.896039603960396, "grad_norm": 0.5535935163497925, "learning_rate": 8.392586931645951e-05, "loss": 0.4786, "step": 35940 }, { "epoch": 8.898514851485148, "grad_norm": 0.5992769002914429, "learning_rate": 8.391574625303894e-05, "loss": 0.479, "step": 35950 }, { "epoch": 8.900990099009901, "grad_norm": 0.5826238393783569, "learning_rate": 8.390562061394122e-05, "loss": 0.4785, "step": 35960 }, { "epoch": 8.903465346534654, "grad_norm": 0.5740154981613159, "learning_rate": 8.389549239993541e-05, "loss": 0.4769, "step": 35970 }, { "epoch": 8.905940594059405, "grad_norm": 0.529008150100708, "learning_rate": 8.388536161179064e-05, "loss": 0.4784, "step": 35980 }, { "epoch": 8.908415841584159, "grad_norm": 0.5622596144676208, "learning_rate": 8.387522825027626e-05, "loss": 0.4829, "step": 35990 }, { "epoch": 8.910891089108912, "grad_norm": 0.5630442500114441, "learning_rate": 8.386509231616186e-05, "loss": 0.4812, "step": 36000 }, { "epoch": 8.913366336633663, "grad_norm": 0.5496388673782349, "learning_rate": 8.38549538102172e-05, "loss": 0.4809, "step": 36010 }, { "epoch": 8.915841584158416, "grad_norm": 0.5957456827163696, "learning_rate": 8.384481273321221e-05, "loss": 0.4832, "step": 36020 }, { "epoch": 8.918316831683168, "grad_norm": 0.5981072187423706, "learning_rate": 8.383466908591705e-05, "loss": 0.4769, "step": 36030 }, { "epoch": 8.92079207920792, "grad_norm": 0.5956576466560364, "learning_rate": 8.382452286910206e-05, "loss": 0.4809, "step": 36040 }, { "epoch": 8.923267326732674, "grad_norm": 0.5902495384216309, "learning_rate": 8.38143740835378e-05, "loss": 0.4834, "step": 36050 }, { "epoch": 8.925742574257425, "grad_norm": 0.6038113832473755, "learning_rate": 8.380422272999496e-05, "loss": 0.4765, "step": 36060 }, { "epoch": 8.928217821782178, "grad_norm": 0.5684229731559753, "learning_rate": 8.37940688092445e-05, "loss": 0.4845, "step": 36070 }, { "epoch": 8.930693069306932, "grad_norm": 0.5743018388748169, "learning_rate": 8.378391232205753e-05, "loss": 0.4804, "step": 36080 }, { "epoch": 8.933168316831683, "grad_norm": 0.6090752482414246, "learning_rate": 8.377375326920538e-05, "loss": 0.48, "step": 36090 }, { "epoch": 8.935643564356436, "grad_norm": 0.5799227952957153, "learning_rate": 8.376359165145956e-05, "loss": 0.4787, "step": 36100 }, { "epoch": 8.938118811881187, "grad_norm": 0.5542719960212708, "learning_rate": 8.375342746959176e-05, "loss": 0.4812, "step": 36110 }, { "epoch": 8.94059405940594, "grad_norm": 0.580870509147644, "learning_rate": 8.374326072437391e-05, "loss": 0.4803, "step": 36120 }, { "epoch": 8.943069306930694, "grad_norm": 0.603941798210144, "learning_rate": 8.37330914165781e-05, "loss": 0.4821, "step": 36130 }, { "epoch": 8.945544554455445, "grad_norm": 0.5835846662521362, "learning_rate": 8.372291954697659e-05, "loss": 0.4807, "step": 36140 }, { "epoch": 8.948019801980198, "grad_norm": 0.5754632949829102, "learning_rate": 8.371274511634193e-05, "loss": 0.485, "step": 36150 }, { "epoch": 8.950495049504951, "grad_norm": 0.5622389912605286, "learning_rate": 8.370256812544673e-05, "loss": 0.4743, "step": 36160 }, { "epoch": 8.952970297029703, "grad_norm": 0.5761988162994385, "learning_rate": 8.369238857506391e-05, "loss": 0.4798, "step": 36170 }, { "epoch": 8.955445544554456, "grad_norm": 0.594950258731842, "learning_rate": 8.368220646596655e-05, "loss": 0.4792, "step": 36180 }, { "epoch": 8.957920792079207, "grad_norm": 0.6150926947593689, "learning_rate": 8.367202179892785e-05, "loss": 0.4821, "step": 36190 }, { "epoch": 8.96039603960396, "grad_norm": 0.6059890985488892, "learning_rate": 8.366183457472134e-05, "loss": 0.4797, "step": 36200 }, { "epoch": 8.962871287128714, "grad_norm": 0.5737486481666565, "learning_rate": 8.365164479412063e-05, "loss": 0.4764, "step": 36210 }, { "epoch": 8.965346534653465, "grad_norm": 0.5480334758758545, "learning_rate": 8.364145245789958e-05, "loss": 0.4839, "step": 36220 }, { "epoch": 8.967821782178218, "grad_norm": 0.5844752192497253, "learning_rate": 8.363125756683223e-05, "loss": 0.4802, "step": 36230 }, { "epoch": 8.97029702970297, "grad_norm": 0.5477576851844788, "learning_rate": 8.362106012169283e-05, "loss": 0.4768, "step": 36240 }, { "epoch": 8.972772277227723, "grad_norm": 0.5309290289878845, "learning_rate": 8.361086012325579e-05, "loss": 0.477, "step": 36250 }, { "epoch": 8.975247524752476, "grad_norm": 0.5530285835266113, "learning_rate": 8.360065757229571e-05, "loss": 0.4783, "step": 36260 }, { "epoch": 8.977722772277227, "grad_norm": 0.578603208065033, "learning_rate": 8.359045246958745e-05, "loss": 0.4814, "step": 36270 }, { "epoch": 8.98019801980198, "grad_norm": 0.5529236197471619, "learning_rate": 8.3580244815906e-05, "loss": 0.4793, "step": 36280 }, { "epoch": 8.982673267326733, "grad_norm": 0.5913735032081604, "learning_rate": 8.357003461202658e-05, "loss": 0.4788, "step": 36290 }, { "epoch": 8.985148514851485, "grad_norm": 0.5328550338745117, "learning_rate": 8.355982185872458e-05, "loss": 0.4783, "step": 36300 }, { "epoch": 8.987623762376238, "grad_norm": 0.5424646735191345, "learning_rate": 8.354960655677555e-05, "loss": 0.4684, "step": 36310 }, { "epoch": 8.990099009900991, "grad_norm": 0.5500154495239258, "learning_rate": 8.353938870695535e-05, "loss": 0.4798, "step": 36320 }, { "epoch": 8.992574257425742, "grad_norm": 0.5268204808235168, "learning_rate": 8.352916831003989e-05, "loss": 0.4814, "step": 36330 }, { "epoch": 8.995049504950495, "grad_norm": 0.5784480571746826, "learning_rate": 8.351894536680537e-05, "loss": 0.4845, "step": 36340 }, { "epoch": 8.997524752475247, "grad_norm": 0.6158437728881836, "learning_rate": 8.350871987802817e-05, "loss": 0.4769, "step": 36350 }, { "epoch": 9.0, "grad_norm": 0.6017197966575623, "learning_rate": 8.349849184448481e-05, "loss": 0.4792, "step": 36360 }, { "epoch": 9.002475247524753, "grad_norm": 0.5773706436157227, "learning_rate": 8.34882612669521e-05, "loss": 0.4847, "step": 36370 }, { "epoch": 9.004950495049505, "grad_norm": 0.5834161639213562, "learning_rate": 8.347802814620694e-05, "loss": 0.4867, "step": 36380 }, { "epoch": 9.007425742574258, "grad_norm": 0.601547122001648, "learning_rate": 8.346779248302645e-05, "loss": 0.4802, "step": 36390 }, { "epoch": 9.009900990099009, "grad_norm": 0.6099653244018555, "learning_rate": 8.345755427818801e-05, "loss": 0.4798, "step": 36400 }, { "epoch": 9.012376237623762, "grad_norm": 0.5834208130836487, "learning_rate": 8.344731353246913e-05, "loss": 0.4791, "step": 36410 }, { "epoch": 9.014851485148515, "grad_norm": 0.5920343995094299, "learning_rate": 8.343707024664751e-05, "loss": 0.4789, "step": 36420 }, { "epoch": 9.017326732673267, "grad_norm": 0.5740978717803955, "learning_rate": 8.342682442150108e-05, "loss": 0.489, "step": 36430 }, { "epoch": 9.01980198019802, "grad_norm": 0.5691344738006592, "learning_rate": 8.341657605780792e-05, "loss": 0.4828, "step": 36440 }, { "epoch": 9.022277227722773, "grad_norm": 0.5492933988571167, "learning_rate": 8.340632515634634e-05, "loss": 0.4784, "step": 36450 }, { "epoch": 9.024752475247524, "grad_norm": 0.5662841200828552, "learning_rate": 8.339607171789483e-05, "loss": 0.4743, "step": 36460 }, { "epoch": 9.027227722772277, "grad_norm": 0.5586300492286682, "learning_rate": 8.338581574323208e-05, "loss": 0.4774, "step": 36470 }, { "epoch": 9.029702970297029, "grad_norm": 0.574013352394104, "learning_rate": 8.337555723313693e-05, "loss": 0.4804, "step": 36480 }, { "epoch": 9.032178217821782, "grad_norm": 0.5766432285308838, "learning_rate": 8.336529618838848e-05, "loss": 0.4772, "step": 36490 }, { "epoch": 9.034653465346535, "grad_norm": 0.5728526711463928, "learning_rate": 8.335503260976598e-05, "loss": 0.4793, "step": 36500 }, { "epoch": 9.037128712871286, "grad_norm": 0.5538159608840942, "learning_rate": 8.334476649804888e-05, "loss": 0.4825, "step": 36510 }, { "epoch": 9.03960396039604, "grad_norm": 0.5843670964241028, "learning_rate": 8.333449785401681e-05, "loss": 0.4788, "step": 36520 }, { "epoch": 9.042079207920793, "grad_norm": 0.5474295020103455, "learning_rate": 8.332422667844961e-05, "loss": 0.4778, "step": 36530 }, { "epoch": 9.044554455445544, "grad_norm": 0.5543109178543091, "learning_rate": 8.331395297212732e-05, "loss": 0.4772, "step": 36540 }, { "epoch": 9.047029702970297, "grad_norm": 0.5738042593002319, "learning_rate": 8.330367673583017e-05, "loss": 0.4724, "step": 36550 }, { "epoch": 9.049504950495049, "grad_norm": 0.5419076085090637, "learning_rate": 8.329339797033855e-05, "loss": 0.4797, "step": 36560 }, { "epoch": 9.051980198019802, "grad_norm": 0.5964462161064148, "learning_rate": 8.328311667643306e-05, "loss": 0.4745, "step": 36570 }, { "epoch": 9.054455445544555, "grad_norm": 0.5417765974998474, "learning_rate": 8.32728328548945e-05, "loss": 0.4827, "step": 36580 }, { "epoch": 9.056930693069306, "grad_norm": 0.5485579967498779, "learning_rate": 8.326254650650389e-05, "loss": 0.4795, "step": 36590 }, { "epoch": 9.05940594059406, "grad_norm": 0.5720028877258301, "learning_rate": 8.325225763204239e-05, "loss": 0.4738, "step": 36600 }, { "epoch": 9.061881188118813, "grad_norm": 0.5508458018302917, "learning_rate": 8.324196623229135e-05, "loss": 0.4798, "step": 36610 }, { "epoch": 9.064356435643564, "grad_norm": 0.5582034587860107, "learning_rate": 8.323167230803237e-05, "loss": 0.4821, "step": 36620 }, { "epoch": 9.066831683168317, "grad_norm": 0.5452266931533813, "learning_rate": 8.322137586004717e-05, "loss": 0.4808, "step": 36630 }, { "epoch": 9.069306930693068, "grad_norm": 0.5796079635620117, "learning_rate": 8.321107688911773e-05, "loss": 0.478, "step": 36640 }, { "epoch": 9.071782178217822, "grad_norm": 0.5413262844085693, "learning_rate": 8.320077539602617e-05, "loss": 0.4852, "step": 36650 }, { "epoch": 9.074257425742575, "grad_norm": 0.5904659032821655, "learning_rate": 8.319047138155481e-05, "loss": 0.4793, "step": 36660 }, { "epoch": 9.076732673267326, "grad_norm": 0.5907582640647888, "learning_rate": 8.31801648464862e-05, "loss": 0.4806, "step": 36670 }, { "epoch": 9.07920792079208, "grad_norm": 0.5422338843345642, "learning_rate": 8.316985579160303e-05, "loss": 0.4813, "step": 36680 }, { "epoch": 9.081683168316832, "grad_norm": 0.5826931595802307, "learning_rate": 8.315954421768823e-05, "loss": 0.4767, "step": 36690 }, { "epoch": 9.084158415841584, "grad_norm": 0.5461636781692505, "learning_rate": 8.314923012552486e-05, "loss": 0.4765, "step": 36700 }, { "epoch": 9.086633663366337, "grad_norm": 0.5399064421653748, "learning_rate": 8.313891351589626e-05, "loss": 0.4848, "step": 36710 }, { "epoch": 9.089108910891088, "grad_norm": 0.5667297840118408, "learning_rate": 8.312859438958586e-05, "loss": 0.4783, "step": 36720 }, { "epoch": 9.091584158415841, "grad_norm": 0.5776655077934265, "learning_rate": 8.311827274737734e-05, "loss": 0.4758, "step": 36730 }, { "epoch": 9.094059405940595, "grad_norm": 0.535247802734375, "learning_rate": 8.310794859005457e-05, "loss": 0.4789, "step": 36740 }, { "epoch": 9.096534653465346, "grad_norm": 0.5384183526039124, "learning_rate": 8.30976219184016e-05, "loss": 0.4791, "step": 36750 }, { "epoch": 9.099009900990099, "grad_norm": 0.522470235824585, "learning_rate": 8.308729273320265e-05, "loss": 0.4813, "step": 36760 }, { "epoch": 9.101485148514852, "grad_norm": 0.5220049619674683, "learning_rate": 8.30769610352422e-05, "loss": 0.4757, "step": 36770 }, { "epoch": 9.103960396039604, "grad_norm": 0.5618427395820618, "learning_rate": 8.306662682530484e-05, "loss": 0.4757, "step": 36780 }, { "epoch": 9.106435643564357, "grad_norm": 0.5913798809051514, "learning_rate": 8.305629010417541e-05, "loss": 0.4796, "step": 36790 }, { "epoch": 9.108910891089108, "grad_norm": 0.5953611135482788, "learning_rate": 8.304595087263889e-05, "loss": 0.4803, "step": 36800 }, { "epoch": 9.111386138613861, "grad_norm": 0.5620314478874207, "learning_rate": 8.303560913148048e-05, "loss": 0.4797, "step": 36810 }, { "epoch": 9.113861386138614, "grad_norm": 0.5516108870506287, "learning_rate": 8.302526488148556e-05, "loss": 0.4825, "step": 36820 }, { "epoch": 9.116336633663366, "grad_norm": 0.5587001442909241, "learning_rate": 8.301491812343973e-05, "loss": 0.4801, "step": 36830 }, { "epoch": 9.118811881188119, "grad_norm": 0.5760681629180908, "learning_rate": 8.300456885812875e-05, "loss": 0.4787, "step": 36840 }, { "epoch": 9.121287128712872, "grad_norm": 0.5664629936218262, "learning_rate": 8.299421708633858e-05, "loss": 0.4841, "step": 36850 }, { "epoch": 9.123762376237623, "grad_norm": 0.5433114767074585, "learning_rate": 8.298386280885536e-05, "loss": 0.4733, "step": 36860 }, { "epoch": 9.126237623762377, "grad_norm": 0.559585452079773, "learning_rate": 8.297350602646543e-05, "loss": 0.4752, "step": 36870 }, { "epoch": 9.128712871287128, "grad_norm": 0.5623950958251953, "learning_rate": 8.296314673995534e-05, "loss": 0.4776, "step": 36880 }, { "epoch": 9.131188118811881, "grad_norm": 0.5599463582038879, "learning_rate": 8.295278495011179e-05, "loss": 0.4777, "step": 36890 }, { "epoch": 9.133663366336634, "grad_norm": 0.5815693736076355, "learning_rate": 8.294242065772166e-05, "loss": 0.4793, "step": 36900 }, { "epoch": 9.136138613861386, "grad_norm": 0.5949814319610596, "learning_rate": 8.293205386357212e-05, "loss": 0.483, "step": 36910 }, { "epoch": 9.138613861386139, "grad_norm": 0.5760217308998108, "learning_rate": 8.29216845684504e-05, "loss": 0.4751, "step": 36920 }, { "epoch": 9.141089108910892, "grad_norm": 0.5448182225227356, "learning_rate": 8.2911312773144e-05, "loss": 0.4822, "step": 36930 }, { "epoch": 9.143564356435643, "grad_norm": 0.596537709236145, "learning_rate": 8.290093847844061e-05, "loss": 0.4832, "step": 36940 }, { "epoch": 9.146039603960396, "grad_norm": 0.5806779265403748, "learning_rate": 8.289056168512807e-05, "loss": 0.4802, "step": 36950 }, { "epoch": 9.148514851485148, "grad_norm": 0.5772958993911743, "learning_rate": 8.28801823939944e-05, "loss": 0.4732, "step": 36960 }, { "epoch": 9.150990099009901, "grad_norm": 0.6134209632873535, "learning_rate": 8.28698006058279e-05, "loss": 0.4825, "step": 36970 }, { "epoch": 9.153465346534654, "grad_norm": 0.6442786455154419, "learning_rate": 8.285941632141694e-05, "loss": 0.482, "step": 36980 }, { "epoch": 9.155940594059405, "grad_norm": 0.5713022351264954, "learning_rate": 8.284902954155019e-05, "loss": 0.482, "step": 36990 }, { "epoch": 9.158415841584159, "grad_norm": 0.5689682364463806, "learning_rate": 8.283864026701642e-05, "loss": 0.4775, "step": 37000 }, { "epoch": 9.160891089108912, "grad_norm": 0.5852217078208923, "learning_rate": 8.282824849860462e-05, "loss": 0.4757, "step": 37010 }, { "epoch": 9.163366336633663, "grad_norm": 0.5630500316619873, "learning_rate": 8.281785423710402e-05, "loss": 0.4767, "step": 37020 }, { "epoch": 9.165841584158416, "grad_norm": 0.5592277646064758, "learning_rate": 8.280745748330395e-05, "loss": 0.4733, "step": 37030 }, { "epoch": 9.168316831683168, "grad_norm": 0.5943350791931152, "learning_rate": 8.279705823799402e-05, "loss": 0.4793, "step": 37040 }, { "epoch": 9.17079207920792, "grad_norm": 0.5412008762359619, "learning_rate": 8.278665650196393e-05, "loss": 0.4861, "step": 37050 }, { "epoch": 9.173267326732674, "grad_norm": 0.512381374835968, "learning_rate": 8.277625227600367e-05, "loss": 0.4811, "step": 37060 }, { "epoch": 9.175742574257425, "grad_norm": 0.5602396130561829, "learning_rate": 8.276584556090334e-05, "loss": 0.4781, "step": 37070 }, { "epoch": 9.178217821782178, "grad_norm": 0.5625749230384827, "learning_rate": 8.27554363574533e-05, "loss": 0.4781, "step": 37080 }, { "epoch": 9.180693069306932, "grad_norm": 0.5409704446792603, "learning_rate": 8.2745024666444e-05, "loss": 0.4764, "step": 37090 }, { "epoch": 9.183168316831683, "grad_norm": 0.5699033141136169, "learning_rate": 8.27346104886662e-05, "loss": 0.4776, "step": 37100 }, { "epoch": 9.185643564356436, "grad_norm": 0.5577231049537659, "learning_rate": 8.272419382491075e-05, "loss": 0.4743, "step": 37110 }, { "epoch": 9.188118811881187, "grad_norm": 0.5690266489982605, "learning_rate": 8.271377467596875e-05, "loss": 0.483, "step": 37120 }, { "epoch": 9.19059405940594, "grad_norm": 0.5539564490318298, "learning_rate": 8.270335304263145e-05, "loss": 0.4719, "step": 37130 }, { "epoch": 9.193069306930694, "grad_norm": 0.5535292029380798, "learning_rate": 8.269292892569032e-05, "loss": 0.4796, "step": 37140 }, { "epoch": 9.195544554455445, "grad_norm": 0.5765973925590515, "learning_rate": 8.268250232593697e-05, "loss": 0.4835, "step": 37150 }, { "epoch": 9.198019801980198, "grad_norm": 0.5658343434333801, "learning_rate": 8.267207324416328e-05, "loss": 0.4821, "step": 37160 }, { "epoch": 9.200495049504951, "grad_norm": 0.55417799949646, "learning_rate": 8.266164168116122e-05, "loss": 0.4749, "step": 37170 }, { "epoch": 9.202970297029703, "grad_norm": 0.5610687732696533, "learning_rate": 8.265120763772303e-05, "loss": 0.4796, "step": 37180 }, { "epoch": 9.205445544554456, "grad_norm": 0.5594534277915955, "learning_rate": 8.264077111464113e-05, "loss": 0.4839, "step": 37190 }, { "epoch": 9.207920792079207, "grad_norm": 0.5404337048530579, "learning_rate": 8.263033211270804e-05, "loss": 0.4756, "step": 37200 }, { "epoch": 9.21039603960396, "grad_norm": 0.5690591335296631, "learning_rate": 8.261989063271657e-05, "loss": 0.4793, "step": 37210 }, { "epoch": 9.212871287128714, "grad_norm": 0.5451310276985168, "learning_rate": 8.260944667545969e-05, "loss": 0.4721, "step": 37220 }, { "epoch": 9.215346534653465, "grad_norm": 0.534453272819519, "learning_rate": 8.259900024173053e-05, "loss": 0.472, "step": 37230 }, { "epoch": 9.217821782178218, "grad_norm": 0.5682867765426636, "learning_rate": 8.258855133232244e-05, "loss": 0.4839, "step": 37240 }, { "epoch": 9.220297029702971, "grad_norm": 0.563931405544281, "learning_rate": 8.257809994802895e-05, "loss": 0.4807, "step": 37250 }, { "epoch": 9.222772277227723, "grad_norm": 0.5801688432693481, "learning_rate": 8.256764608964375e-05, "loss": 0.4763, "step": 37260 }, { "epoch": 9.225247524752476, "grad_norm": 0.5701327919960022, "learning_rate": 8.255718975796076e-05, "loss": 0.4813, "step": 37270 }, { "epoch": 9.227722772277227, "grad_norm": 0.5348809957504272, "learning_rate": 8.254673095377408e-05, "loss": 0.4793, "step": 37280 }, { "epoch": 9.23019801980198, "grad_norm": 0.5664178133010864, "learning_rate": 8.253626967787795e-05, "loss": 0.4789, "step": 37290 }, { "epoch": 9.232673267326733, "grad_norm": 0.5627885460853577, "learning_rate": 8.252580593106688e-05, "loss": 0.4787, "step": 37300 }, { "epoch": 9.235148514851485, "grad_norm": 0.5788392424583435, "learning_rate": 8.25153397141355e-05, "loss": 0.4773, "step": 37310 }, { "epoch": 9.237623762376238, "grad_norm": 0.5684924125671387, "learning_rate": 8.250487102787864e-05, "loss": 0.479, "step": 37320 }, { "epoch": 9.240099009900991, "grad_norm": 0.5993744134902954, "learning_rate": 8.249439987309134e-05, "loss": 0.4789, "step": 37330 }, { "epoch": 9.242574257425742, "grad_norm": 0.5670738220214844, "learning_rate": 8.248392625056883e-05, "loss": 0.4821, "step": 37340 }, { "epoch": 9.245049504950495, "grad_norm": 0.565301775932312, "learning_rate": 8.247345016110648e-05, "loss": 0.4837, "step": 37350 }, { "epoch": 9.247524752475247, "grad_norm": 0.5895040035247803, "learning_rate": 8.246297160549991e-05, "loss": 0.4814, "step": 37360 }, { "epoch": 9.25, "grad_norm": 0.5587558150291443, "learning_rate": 8.245249058454487e-05, "loss": 0.4781, "step": 37370 }, { "epoch": 9.252475247524753, "grad_norm": 0.5632523894309998, "learning_rate": 8.244200709903735e-05, "loss": 0.4855, "step": 37380 }, { "epoch": 9.254950495049505, "grad_norm": 0.558887243270874, "learning_rate": 8.243152114977348e-05, "loss": 0.48, "step": 37390 }, { "epoch": 9.257425742574258, "grad_norm": 0.5702630877494812, "learning_rate": 8.24210327375496e-05, "loss": 0.4769, "step": 37400 }, { "epoch": 9.259900990099009, "grad_norm": 0.559061586856842, "learning_rate": 8.241054186316226e-05, "loss": 0.4719, "step": 37410 }, { "epoch": 9.262376237623762, "grad_norm": 0.5738494992256165, "learning_rate": 8.240004852740815e-05, "loss": 0.476, "step": 37420 }, { "epoch": 9.264851485148515, "grad_norm": 0.5766007900238037, "learning_rate": 8.238955273108417e-05, "loss": 0.4805, "step": 37430 }, { "epoch": 9.267326732673267, "grad_norm": 0.5356243848800659, "learning_rate": 8.237905447498743e-05, "loss": 0.4763, "step": 37440 }, { "epoch": 9.26980198019802, "grad_norm": 0.5692233443260193, "learning_rate": 8.236855375991517e-05, "loss": 0.4755, "step": 37450 }, { "epoch": 9.272277227722773, "grad_norm": 0.5570148229598999, "learning_rate": 8.235805058666488e-05, "loss": 0.48, "step": 37460 }, { "epoch": 9.274752475247524, "grad_norm": 0.5376051664352417, "learning_rate": 8.234754495603419e-05, "loss": 0.4719, "step": 37470 }, { "epoch": 9.277227722772277, "grad_norm": 0.5330332517623901, "learning_rate": 8.233703686882092e-05, "loss": 0.4782, "step": 37480 }, { "epoch": 9.27970297029703, "grad_norm": 0.5515782833099365, "learning_rate": 8.232652632582312e-05, "loss": 0.4785, "step": 37490 }, { "epoch": 9.282178217821782, "grad_norm": 0.5835399627685547, "learning_rate": 8.231601332783898e-05, "loss": 0.4778, "step": 37500 }, { "epoch": 9.284653465346535, "grad_norm": 0.5431005954742432, "learning_rate": 8.230549787566689e-05, "loss": 0.4779, "step": 37510 }, { "epoch": 9.287128712871286, "grad_norm": 0.5629658102989197, "learning_rate": 8.229497997010544e-05, "loss": 0.4781, "step": 37520 }, { "epoch": 9.28960396039604, "grad_norm": 0.5349637269973755, "learning_rate": 8.22844596119534e-05, "loss": 0.4854, "step": 37530 }, { "epoch": 9.292079207920793, "grad_norm": 0.5592132806777954, "learning_rate": 8.227393680200968e-05, "loss": 0.4755, "step": 37540 }, { "epoch": 9.294554455445544, "grad_norm": 0.5483583807945251, "learning_rate": 8.226341154107347e-05, "loss": 0.4766, "step": 37550 }, { "epoch": 9.297029702970297, "grad_norm": 0.5771092772483826, "learning_rate": 8.225288382994407e-05, "loss": 0.4809, "step": 37560 }, { "epoch": 9.299504950495049, "grad_norm": 0.5998315811157227, "learning_rate": 8.2242353669421e-05, "loss": 0.4784, "step": 37570 }, { "epoch": 9.301980198019802, "grad_norm": 0.604118824005127, "learning_rate": 8.223182106030394e-05, "loss": 0.4771, "step": 37580 }, { "epoch": 9.304455445544555, "grad_norm": 0.5566378831863403, "learning_rate": 8.222128600339279e-05, "loss": 0.4765, "step": 37590 }, { "epoch": 9.306930693069306, "grad_norm": 0.5846917033195496, "learning_rate": 8.221074849948758e-05, "loss": 0.4792, "step": 37600 }, { "epoch": 9.30940594059406, "grad_norm": 0.5566610097885132, "learning_rate": 8.220020854938863e-05, "loss": 0.477, "step": 37610 }, { "epoch": 9.311881188118813, "grad_norm": 0.5689854621887207, "learning_rate": 8.218966615389634e-05, "loss": 0.4783, "step": 37620 }, { "epoch": 9.314356435643564, "grad_norm": 0.582952082157135, "learning_rate": 8.217912131381133e-05, "loss": 0.4846, "step": 37630 }, { "epoch": 9.316831683168317, "grad_norm": 0.5232787132263184, "learning_rate": 8.216857402993442e-05, "loss": 0.4742, "step": 37640 }, { "epoch": 9.319306930693068, "grad_norm": 0.5540486574172974, "learning_rate": 8.215802430306661e-05, "loss": 0.485, "step": 37650 }, { "epoch": 9.321782178217822, "grad_norm": 0.5474264621734619, "learning_rate": 8.214747213400908e-05, "loss": 0.4706, "step": 37660 }, { "epoch": 9.324257425742575, "grad_norm": 0.5698445439338684, "learning_rate": 8.213691752356319e-05, "loss": 0.4842, "step": 37670 }, { "epoch": 9.326732673267326, "grad_norm": 0.5272986888885498, "learning_rate": 8.212636047253051e-05, "loss": 0.4737, "step": 37680 }, { "epoch": 9.32920792079208, "grad_norm": 0.5753678679466248, "learning_rate": 8.211580098171276e-05, "loss": 0.4718, "step": 37690 }, { "epoch": 9.331683168316832, "grad_norm": 0.5659273862838745, "learning_rate": 8.210523905191185e-05, "loss": 0.4792, "step": 37700 }, { "epoch": 9.334158415841584, "grad_norm": 0.5716117024421692, "learning_rate": 8.209467468392996e-05, "loss": 0.4767, "step": 37710 }, { "epoch": 9.336633663366337, "grad_norm": 0.5556563138961792, "learning_rate": 8.208410787856929e-05, "loss": 0.4734, "step": 37720 }, { "epoch": 9.339108910891088, "grad_norm": 0.5708191394805908, "learning_rate": 8.207353863663238e-05, "loss": 0.4767, "step": 37730 }, { "epoch": 9.341584158415841, "grad_norm": 0.5688718557357788, "learning_rate": 8.206296695892188e-05, "loss": 0.4775, "step": 37740 }, { "epoch": 9.344059405940595, "grad_norm": 0.5566805005073547, "learning_rate": 8.205239284624062e-05, "loss": 0.481, "step": 37750 }, { "epoch": 9.346534653465346, "grad_norm": 0.6034247875213623, "learning_rate": 8.204181629939167e-05, "loss": 0.4781, "step": 37760 }, { "epoch": 9.349009900990099, "grad_norm": 0.512592077255249, "learning_rate": 8.203123731917824e-05, "loss": 0.4792, "step": 37770 }, { "epoch": 9.351485148514852, "grad_norm": 0.529316782951355, "learning_rate": 8.202065590640371e-05, "loss": 0.4776, "step": 37780 }, { "epoch": 9.353960396039604, "grad_norm": 0.5803072452545166, "learning_rate": 8.201007206187167e-05, "loss": 0.4782, "step": 37790 }, { "epoch": 9.356435643564357, "grad_norm": 0.58062744140625, "learning_rate": 8.199948578638592e-05, "loss": 0.4818, "step": 37800 }, { "epoch": 9.358910891089108, "grad_norm": 0.5157124996185303, "learning_rate": 8.19888970807504e-05, "loss": 0.4789, "step": 37810 }, { "epoch": 9.361386138613861, "grad_norm": 0.5390070080757141, "learning_rate": 8.197830594576924e-05, "loss": 0.477, "step": 37820 }, { "epoch": 9.363861386138614, "grad_norm": 0.5329713821411133, "learning_rate": 8.196771238224679e-05, "loss": 0.4776, "step": 37830 }, { "epoch": 9.366336633663366, "grad_norm": 0.5966445207595825, "learning_rate": 8.195711639098755e-05, "loss": 0.48, "step": 37840 }, { "epoch": 9.368811881188119, "grad_norm": 0.5268508195877075, "learning_rate": 8.194651797279624e-05, "loss": 0.4815, "step": 37850 }, { "epoch": 9.371287128712872, "grad_norm": 0.5483677983283997, "learning_rate": 8.19359171284777e-05, "loss": 0.4788, "step": 37860 }, { "epoch": 9.373762376237623, "grad_norm": 0.5424846410751343, "learning_rate": 8.192531385883702e-05, "loss": 0.4782, "step": 37870 }, { "epoch": 9.376237623762377, "grad_norm": 0.5726257562637329, "learning_rate": 8.191470816467943e-05, "loss": 0.4817, "step": 37880 }, { "epoch": 9.378712871287128, "grad_norm": 0.6068394780158997, "learning_rate": 8.190410004681039e-05, "loss": 0.4752, "step": 37890 }, { "epoch": 9.381188118811881, "grad_norm": 0.5621321201324463, "learning_rate": 8.189348950603549e-05, "loss": 0.4787, "step": 37900 }, { "epoch": 9.383663366336634, "grad_norm": 0.5603567361831665, "learning_rate": 8.188287654316054e-05, "loss": 0.4825, "step": 37910 }, { "epoch": 9.386138613861386, "grad_norm": 0.5481693744659424, "learning_rate": 8.187226115899153e-05, "loss": 0.4772, "step": 37920 }, { "epoch": 9.388613861386139, "grad_norm": 0.5678537487983704, "learning_rate": 8.186164335433463e-05, "loss": 0.4795, "step": 37930 }, { "epoch": 9.391089108910892, "grad_norm": 0.5425179600715637, "learning_rate": 8.185102312999617e-05, "loss": 0.4786, "step": 37940 }, { "epoch": 9.393564356435643, "grad_norm": 0.564591646194458, "learning_rate": 8.184040048678271e-05, "loss": 0.4761, "step": 37950 }, { "epoch": 9.396039603960396, "grad_norm": 0.5509834885597229, "learning_rate": 8.182977542550096e-05, "loss": 0.4835, "step": 37960 }, { "epoch": 9.398514851485148, "grad_norm": 0.544191837310791, "learning_rate": 8.181914794695782e-05, "loss": 0.4785, "step": 37970 }, { "epoch": 9.400990099009901, "grad_norm": 0.5453941226005554, "learning_rate": 8.180851805196039e-05, "loss": 0.4731, "step": 37980 }, { "epoch": 9.403465346534654, "grad_norm": 0.5598349571228027, "learning_rate": 8.179788574131592e-05, "loss": 0.4789, "step": 37990 }, { "epoch": 9.405940594059405, "grad_norm": 0.5615328550338745, "learning_rate": 8.17872510158319e-05, "loss": 0.4791, "step": 38000 }, { "epoch": 9.408415841584159, "grad_norm": 0.5894520282745361, "learning_rate": 8.177661387631592e-05, "loss": 0.4753, "step": 38010 }, { "epoch": 9.410891089108912, "grad_norm": 0.5755113363265991, "learning_rate": 8.176597432357583e-05, "loss": 0.4803, "step": 38020 }, { "epoch": 9.413366336633663, "grad_norm": 0.5850576758384705, "learning_rate": 8.175533235841963e-05, "loss": 0.4757, "step": 38030 }, { "epoch": 9.415841584158416, "grad_norm": 0.5723497867584229, "learning_rate": 8.17446879816555e-05, "loss": 0.4812, "step": 38040 }, { "epoch": 9.418316831683168, "grad_norm": 0.5298948884010315, "learning_rate": 8.173404119409183e-05, "loss": 0.4774, "step": 38050 }, { "epoch": 9.42079207920792, "grad_norm": 0.5264853835105896, "learning_rate": 8.172339199653714e-05, "loss": 0.4831, "step": 38060 }, { "epoch": 9.423267326732674, "grad_norm": 0.5515694618225098, "learning_rate": 8.171274038980018e-05, "loss": 0.4803, "step": 38070 }, { "epoch": 9.425742574257425, "grad_norm": 0.551981508731842, "learning_rate": 8.170208637468988e-05, "loss": 0.4758, "step": 38080 }, { "epoch": 9.428217821782178, "grad_norm": 0.5943790078163147, "learning_rate": 8.169142995201534e-05, "loss": 0.4812, "step": 38090 }, { "epoch": 9.430693069306932, "grad_norm": 0.538484513759613, "learning_rate": 8.168077112258585e-05, "loss": 0.4768, "step": 38100 }, { "epoch": 9.433168316831683, "grad_norm": 0.5556617975234985, "learning_rate": 8.167010988721084e-05, "loss": 0.4776, "step": 38110 }, { "epoch": 9.435643564356436, "grad_norm": 0.5478696823120117, "learning_rate": 8.16594462467e-05, "loss": 0.4821, "step": 38120 }, { "epoch": 9.438118811881187, "grad_norm": 0.5519699454307556, "learning_rate": 8.164878020186317e-05, "loss": 0.4755, "step": 38130 }, { "epoch": 9.44059405940594, "grad_norm": 0.5390586256980896, "learning_rate": 8.163811175351033e-05, "loss": 0.4737, "step": 38140 }, { "epoch": 9.443069306930694, "grad_norm": 0.5684095025062561, "learning_rate": 8.16274409024517e-05, "loss": 0.478, "step": 38150 }, { "epoch": 9.445544554455445, "grad_norm": 0.5785136222839355, "learning_rate": 8.161676764949766e-05, "loss": 0.4762, "step": 38160 }, { "epoch": 9.448019801980198, "grad_norm": 0.5464916825294495, "learning_rate": 8.160609199545875e-05, "loss": 0.4774, "step": 38170 }, { "epoch": 9.450495049504951, "grad_norm": 0.5751018524169922, "learning_rate": 8.159541394114576e-05, "loss": 0.4774, "step": 38180 }, { "epoch": 9.452970297029703, "grad_norm": 0.5837265849113464, "learning_rate": 8.158473348736958e-05, "loss": 0.4806, "step": 38190 }, { "epoch": 9.455445544554456, "grad_norm": 0.5685909390449524, "learning_rate": 8.157405063494135e-05, "loss": 0.4766, "step": 38200 }, { "epoch": 9.457920792079207, "grad_norm": 0.5565740466117859, "learning_rate": 8.156336538467233e-05, "loss": 0.482, "step": 38210 }, { "epoch": 9.46039603960396, "grad_norm": 0.5149017572402954, "learning_rate": 8.155267773737399e-05, "loss": 0.4799, "step": 38220 }, { "epoch": 9.462871287128714, "grad_norm": 0.5434813499450684, "learning_rate": 8.154198769385804e-05, "loss": 0.4846, "step": 38230 }, { "epoch": 9.465346534653465, "grad_norm": 0.5267906188964844, "learning_rate": 8.153129525493625e-05, "loss": 0.4806, "step": 38240 }, { "epoch": 9.467821782178218, "grad_norm": 0.5984814763069153, "learning_rate": 8.152060042142072e-05, "loss": 0.4742, "step": 38250 }, { "epoch": 9.47029702970297, "grad_norm": 0.5417039394378662, "learning_rate": 8.150990319412356e-05, "loss": 0.4794, "step": 38260 }, { "epoch": 9.472772277227723, "grad_norm": 0.5379604697227478, "learning_rate": 8.149920357385723e-05, "loss": 0.4749, "step": 38270 }, { "epoch": 9.475247524752476, "grad_norm": 0.547810435295105, "learning_rate": 8.148850156143423e-05, "loss": 0.4761, "step": 38280 }, { "epoch": 9.477722772277227, "grad_norm": 0.6038037538528442, "learning_rate": 8.147779715766738e-05, "loss": 0.4776, "step": 38290 }, { "epoch": 9.48019801980198, "grad_norm": 0.5409196615219116, "learning_rate": 8.146709036336956e-05, "loss": 0.4772, "step": 38300 }, { "epoch": 9.482673267326733, "grad_norm": 0.5464827418327332, "learning_rate": 8.145638117935388e-05, "loss": 0.475, "step": 38310 }, { "epoch": 9.485148514851485, "grad_norm": 0.5494663119316101, "learning_rate": 8.144566960643367e-05, "loss": 0.4764, "step": 38320 }, { "epoch": 9.487623762376238, "grad_norm": 0.5836407542228699, "learning_rate": 8.143495564542235e-05, "loss": 0.4779, "step": 38330 }, { "epoch": 9.490099009900991, "grad_norm": 0.5488136410713196, "learning_rate": 8.142423929713361e-05, "loss": 0.4796, "step": 38340 }, { "epoch": 9.492574257425742, "grad_norm": 0.5463764071464539, "learning_rate": 8.141352056238127e-05, "loss": 0.4803, "step": 38350 }, { "epoch": 9.495049504950495, "grad_norm": 0.5430252552032471, "learning_rate": 8.140279944197938e-05, "loss": 0.4757, "step": 38360 }, { "epoch": 9.497524752475247, "grad_norm": 0.5524135828018188, "learning_rate": 8.139207593674208e-05, "loss": 0.477, "step": 38370 }, { "epoch": 9.5, "grad_norm": 0.5565645694732666, "learning_rate": 8.138135004748381e-05, "loss": 0.4747, "step": 38380 }, { "epoch": 9.502475247524753, "grad_norm": 0.5481319427490234, "learning_rate": 8.137062177501909e-05, "loss": 0.4719, "step": 38390 }, { "epoch": 9.504950495049505, "grad_norm": 0.543061375617981, "learning_rate": 8.135989112016266e-05, "loss": 0.4741, "step": 38400 }, { "epoch": 9.507425742574258, "grad_norm": 0.5835063457489014, "learning_rate": 8.134915808372947e-05, "loss": 0.4756, "step": 38410 }, { "epoch": 9.509900990099009, "grad_norm": 0.5516184568405151, "learning_rate": 8.133842266653462e-05, "loss": 0.4733, "step": 38420 }, { "epoch": 9.512376237623762, "grad_norm": 0.5871446132659912, "learning_rate": 8.132768486939338e-05, "loss": 0.4759, "step": 38430 }, { "epoch": 9.514851485148515, "grad_norm": 0.5400346517562866, "learning_rate": 8.131694469312121e-05, "loss": 0.4837, "step": 38440 }, { "epoch": 9.517326732673267, "grad_norm": 0.5201931595802307, "learning_rate": 8.130620213853377e-05, "loss": 0.479, "step": 38450 }, { "epoch": 9.51980198019802, "grad_norm": 0.5481247305870056, "learning_rate": 8.129545720644687e-05, "loss": 0.4816, "step": 38460 }, { "epoch": 9.522277227722773, "grad_norm": 0.5923134684562683, "learning_rate": 8.128470989767655e-05, "loss": 0.4725, "step": 38470 }, { "epoch": 9.524752475247524, "grad_norm": 0.532662034034729, "learning_rate": 8.127396021303894e-05, "loss": 0.4767, "step": 38480 }, { "epoch": 9.527227722772277, "grad_norm": 0.5552313923835754, "learning_rate": 8.126320815335047e-05, "loss": 0.4788, "step": 38490 }, { "epoch": 9.52970297029703, "grad_norm": 0.5738527178764343, "learning_rate": 8.125245371942767e-05, "loss": 0.4751, "step": 38500 }, { "epoch": 9.532178217821782, "grad_norm": 0.5699824690818787, "learning_rate": 8.124169691208723e-05, "loss": 0.4732, "step": 38510 }, { "epoch": 9.534653465346535, "grad_norm": 0.5449746251106262, "learning_rate": 8.123093773214609e-05, "loss": 0.4729, "step": 38520 }, { "epoch": 9.537128712871286, "grad_norm": 0.5270848274230957, "learning_rate": 8.122017618042135e-05, "loss": 0.4795, "step": 38530 }, { "epoch": 9.53960396039604, "grad_norm": 0.5213679075241089, "learning_rate": 8.120941225773026e-05, "loss": 0.4782, "step": 38540 }, { "epoch": 9.542079207920793, "grad_norm": 0.5359364151954651, "learning_rate": 8.119864596489027e-05, "loss": 0.475, "step": 38550 }, { "epoch": 9.544554455445544, "grad_norm": 0.5895758271217346, "learning_rate": 8.1187877302719e-05, "loss": 0.4783, "step": 38560 }, { "epoch": 9.547029702970297, "grad_norm": 0.5800294876098633, "learning_rate": 8.11771062720343e-05, "loss": 0.4769, "step": 38570 }, { "epoch": 9.549504950495049, "grad_norm": 0.580371618270874, "learning_rate": 8.11663328736541e-05, "loss": 0.4762, "step": 38580 }, { "epoch": 9.551980198019802, "grad_norm": 0.55634605884552, "learning_rate": 8.115555710839662e-05, "loss": 0.4793, "step": 38590 }, { "epoch": 9.554455445544555, "grad_norm": 0.5428804755210876, "learning_rate": 8.114477897708017e-05, "loss": 0.4768, "step": 38600 }, { "epoch": 9.556930693069306, "grad_norm": 0.5378689765930176, "learning_rate": 8.11339984805233e-05, "loss": 0.4741, "step": 38610 }, { "epoch": 9.55940594059406, "grad_norm": 0.5775004029273987, "learning_rate": 8.112321561954472e-05, "loss": 0.4766, "step": 38620 }, { "epoch": 9.561881188118813, "grad_norm": 0.558182954788208, "learning_rate": 8.11124303949633e-05, "loss": 0.4734, "step": 38630 }, { "epoch": 9.564356435643564, "grad_norm": 0.5541952848434448, "learning_rate": 8.110164280759812e-05, "loss": 0.4788, "step": 38640 }, { "epoch": 9.566831683168317, "grad_norm": 0.5427102446556091, "learning_rate": 8.109085285826843e-05, "loss": 0.478, "step": 38650 }, { "epoch": 9.569306930693068, "grad_norm": 0.5346441268920898, "learning_rate": 8.108006054779363e-05, "loss": 0.4803, "step": 38660 }, { "epoch": 9.571782178217822, "grad_norm": 0.5513975620269775, "learning_rate": 8.106926587699335e-05, "loss": 0.4816, "step": 38670 }, { "epoch": 9.574257425742575, "grad_norm": 0.5471779108047485, "learning_rate": 8.105846884668738e-05, "loss": 0.482, "step": 38680 }, { "epoch": 9.576732673267326, "grad_norm": 0.581703782081604, "learning_rate": 8.104766945769564e-05, "loss": 0.4782, "step": 38690 }, { "epoch": 9.57920792079208, "grad_norm": 0.5766664147377014, "learning_rate": 8.103686771083831e-05, "loss": 0.473, "step": 38700 }, { "epoch": 9.581683168316832, "grad_norm": 0.5490952730178833, "learning_rate": 8.102606360693572e-05, "loss": 0.4753, "step": 38710 }, { "epoch": 9.584158415841584, "grad_norm": 0.5414565801620483, "learning_rate": 8.101525714680835e-05, "loss": 0.4789, "step": 38720 }, { "epoch": 9.586633663366337, "grad_norm": 0.5552153587341309, "learning_rate": 8.100444833127687e-05, "loss": 0.4815, "step": 38730 }, { "epoch": 9.589108910891088, "grad_norm": 0.5500094890594482, "learning_rate": 8.099363716116214e-05, "loss": 0.4767, "step": 38740 }, { "epoch": 9.591584158415841, "grad_norm": 0.5790029168128967, "learning_rate": 8.098282363728524e-05, "loss": 0.4782, "step": 38750 }, { "epoch": 9.594059405940595, "grad_norm": 0.5372900366783142, "learning_rate": 8.097200776046734e-05, "loss": 0.4745, "step": 38760 }, { "epoch": 9.596534653465346, "grad_norm": 0.5563099384307861, "learning_rate": 8.096118953152985e-05, "loss": 0.4754, "step": 38770 }, { "epoch": 9.599009900990099, "grad_norm": 0.5928172469139099, "learning_rate": 8.095036895129434e-05, "loss": 0.4773, "step": 38780 }, { "epoch": 9.601485148514852, "grad_norm": 0.5140332579612732, "learning_rate": 8.093954602058255e-05, "loss": 0.4694, "step": 38790 }, { "epoch": 9.603960396039604, "grad_norm": 0.5508284568786621, "learning_rate": 8.092872074021642e-05, "loss": 0.4802, "step": 38800 }, { "epoch": 9.606435643564357, "grad_norm": 0.581159770488739, "learning_rate": 8.091789311101807e-05, "loss": 0.4782, "step": 38810 }, { "epoch": 9.608910891089108, "grad_norm": 0.553162157535553, "learning_rate": 8.090706313380979e-05, "loss": 0.4789, "step": 38820 }, { "epoch": 9.611386138613861, "grad_norm": 0.5486097931861877, "learning_rate": 8.089623080941403e-05, "loss": 0.4728, "step": 38830 }, { "epoch": 9.613861386138614, "grad_norm": 0.5615237951278687, "learning_rate": 8.088539613865344e-05, "loss": 0.4831, "step": 38840 }, { "epoch": 9.616336633663366, "grad_norm": 0.5677065849304199, "learning_rate": 8.087455912235082e-05, "loss": 0.4758, "step": 38850 }, { "epoch": 9.618811881188119, "grad_norm": 0.5681304335594177, "learning_rate": 8.08637197613292e-05, "loss": 0.4774, "step": 38860 }, { "epoch": 9.621287128712872, "grad_norm": 0.5610587000846863, "learning_rate": 8.085287805641175e-05, "loss": 0.4759, "step": 38870 }, { "epoch": 9.623762376237623, "grad_norm": 0.5573794841766357, "learning_rate": 8.084203400842183e-05, "loss": 0.4753, "step": 38880 }, { "epoch": 9.626237623762377, "grad_norm": 0.5524847507476807, "learning_rate": 8.083118761818295e-05, "loss": 0.4756, "step": 38890 }, { "epoch": 9.628712871287128, "grad_norm": 0.5483666658401489, "learning_rate": 8.082033888651885e-05, "loss": 0.4792, "step": 38900 }, { "epoch": 9.631188118811881, "grad_norm": 0.5710830688476562, "learning_rate": 8.08094878142534e-05, "loss": 0.4805, "step": 38910 }, { "epoch": 9.633663366336634, "grad_norm": 0.5496314167976379, "learning_rate": 8.079863440221069e-05, "loss": 0.4784, "step": 38920 }, { "epoch": 9.636138613861386, "grad_norm": 0.5522844195365906, "learning_rate": 8.078777865121494e-05, "loss": 0.476, "step": 38930 }, { "epoch": 9.638613861386139, "grad_norm": 0.5494941473007202, "learning_rate": 8.077692056209059e-05, "loss": 0.4785, "step": 38940 }, { "epoch": 9.641089108910892, "grad_norm": 0.5496250987052917, "learning_rate": 8.076606013566223e-05, "loss": 0.477, "step": 38950 }, { "epoch": 9.643564356435643, "grad_norm": 0.5804115533828735, "learning_rate": 8.075519737275464e-05, "loss": 0.4752, "step": 38960 }, { "epoch": 9.646039603960396, "grad_norm": 0.5735257267951965, "learning_rate": 8.074433227419276e-05, "loss": 0.4734, "step": 38970 }, { "epoch": 9.648514851485148, "grad_norm": 0.5190016627311707, "learning_rate": 8.073346484080176e-05, "loss": 0.4745, "step": 38980 }, { "epoch": 9.650990099009901, "grad_norm": 0.5983937382698059, "learning_rate": 8.072259507340691e-05, "loss": 0.4803, "step": 38990 }, { "epoch": 9.653465346534654, "grad_norm": 0.5522649884223938, "learning_rate": 8.071172297283373e-05, "loss": 0.4757, "step": 39000 }, { "epoch": 9.655940594059405, "grad_norm": 0.5483918786048889, "learning_rate": 8.070084853990786e-05, "loss": 0.4757, "step": 39010 }, { "epoch": 9.658415841584159, "grad_norm": 0.545775830745697, "learning_rate": 8.068997177545518e-05, "loss": 0.4777, "step": 39020 }, { "epoch": 9.660891089108912, "grad_norm": 0.5444362759590149, "learning_rate": 8.067909268030164e-05, "loss": 0.4725, "step": 39030 }, { "epoch": 9.663366336633663, "grad_norm": 0.5384758710861206, "learning_rate": 8.066821125527351e-05, "loss": 0.4782, "step": 39040 }, { "epoch": 9.665841584158416, "grad_norm": 0.5821107625961304, "learning_rate": 8.06573275011971e-05, "loss": 0.472, "step": 39050 }, { "epoch": 9.668316831683168, "grad_norm": 0.5576624274253845, "learning_rate": 8.0646441418899e-05, "loss": 0.4791, "step": 39060 }, { "epoch": 9.67079207920792, "grad_norm": 0.5576016306877136, "learning_rate": 8.063555300920594e-05, "loss": 0.4747, "step": 39070 }, { "epoch": 9.673267326732674, "grad_norm": 0.5260089635848999, "learning_rate": 8.062466227294477e-05, "loss": 0.4857, "step": 39080 }, { "epoch": 9.675742574257425, "grad_norm": 0.5887234807014465, "learning_rate": 8.061376921094263e-05, "loss": 0.4757, "step": 39090 }, { "epoch": 9.678217821782178, "grad_norm": 0.5095102190971375, "learning_rate": 8.060287382402675e-05, "loss": 0.4778, "step": 39100 }, { "epoch": 9.680693069306932, "grad_norm": 0.5628702044487, "learning_rate": 8.059197611302456e-05, "loss": 0.4777, "step": 39110 }, { "epoch": 9.683168316831683, "grad_norm": 0.5541406869888306, "learning_rate": 8.058107607876366e-05, "loss": 0.4735, "step": 39120 }, { "epoch": 9.685643564356436, "grad_norm": 0.6150193810462952, "learning_rate": 8.057017372207188e-05, "loss": 0.4778, "step": 39130 }, { "epoch": 9.688118811881187, "grad_norm": 0.5558690428733826, "learning_rate": 8.055926904377712e-05, "loss": 0.4808, "step": 39140 }, { "epoch": 9.69059405940594, "grad_norm": 0.5870663523674011, "learning_rate": 8.054836204470757e-05, "loss": 0.4806, "step": 39150 }, { "epoch": 9.693069306930694, "grad_norm": 0.5508984923362732, "learning_rate": 8.053745272569152e-05, "loss": 0.4789, "step": 39160 }, { "epoch": 9.695544554455445, "grad_norm": 0.5712631940841675, "learning_rate": 8.052654108755746e-05, "loss": 0.4815, "step": 39170 }, { "epoch": 9.698019801980198, "grad_norm": 0.5488578081130981, "learning_rate": 8.051562713113406e-05, "loss": 0.4753, "step": 39180 }, { "epoch": 9.700495049504951, "grad_norm": 0.5829219222068787, "learning_rate": 8.050471085725018e-05, "loss": 0.4762, "step": 39190 }, { "epoch": 9.702970297029703, "grad_norm": 0.5830057263374329, "learning_rate": 8.049379226673482e-05, "loss": 0.4812, "step": 39200 }, { "epoch": 9.705445544554456, "grad_norm": 0.559853732585907, "learning_rate": 8.048287136041716e-05, "loss": 0.4783, "step": 39210 }, { "epoch": 9.707920792079207, "grad_norm": 0.5264537930488586, "learning_rate": 8.04719481391266e-05, "loss": 0.4732, "step": 39220 }, { "epoch": 9.71039603960396, "grad_norm": 0.5690034627914429, "learning_rate": 8.046102260369268e-05, "loss": 0.4751, "step": 39230 }, { "epoch": 9.712871287128714, "grad_norm": 0.5592724680900574, "learning_rate": 8.045009475494512e-05, "loss": 0.4827, "step": 39240 }, { "epoch": 9.715346534653465, "grad_norm": 0.505450963973999, "learning_rate": 8.043916459371381e-05, "loss": 0.4771, "step": 39250 }, { "epoch": 9.717821782178218, "grad_norm": 0.5836094617843628, "learning_rate": 8.042823212082883e-05, "loss": 0.4711, "step": 39260 }, { "epoch": 9.72029702970297, "grad_norm": 0.5836050510406494, "learning_rate": 8.041729733712045e-05, "loss": 0.4803, "step": 39270 }, { "epoch": 9.722772277227723, "grad_norm": 0.522892951965332, "learning_rate": 8.040636024341905e-05, "loss": 0.4868, "step": 39280 }, { "epoch": 9.725247524752476, "grad_norm": 0.5537158846855164, "learning_rate": 8.039542084055527e-05, "loss": 0.4752, "step": 39290 }, { "epoch": 9.727722772277227, "grad_norm": 0.5417633056640625, "learning_rate": 8.038447912935985e-05, "loss": 0.4853, "step": 39300 }, { "epoch": 9.73019801980198, "grad_norm": 0.5477379560470581, "learning_rate": 8.037353511066377e-05, "loss": 0.4756, "step": 39310 }, { "epoch": 9.732673267326733, "grad_norm": 0.5494825839996338, "learning_rate": 8.036258878529816e-05, "loss": 0.4798, "step": 39320 }, { "epoch": 9.735148514851485, "grad_norm": 0.611785352230072, "learning_rate": 8.035164015409429e-05, "loss": 0.4808, "step": 39330 }, { "epoch": 9.737623762376238, "grad_norm": 0.5642830729484558, "learning_rate": 8.034068921788366e-05, "loss": 0.473, "step": 39340 }, { "epoch": 9.740099009900991, "grad_norm": 0.5435744524002075, "learning_rate": 8.032973597749791e-05, "loss": 0.4812, "step": 39350 }, { "epoch": 9.742574257425742, "grad_norm": 0.5582489967346191, "learning_rate": 8.031878043376888e-05, "loss": 0.4802, "step": 39360 }, { "epoch": 9.745049504950495, "grad_norm": 0.5533810257911682, "learning_rate": 8.030782258752856e-05, "loss": 0.4755, "step": 39370 }, { "epoch": 9.747524752475247, "grad_norm": 0.5679976940155029, "learning_rate": 8.029686243960913e-05, "loss": 0.478, "step": 39380 }, { "epoch": 9.75, "grad_norm": 0.5597028136253357, "learning_rate": 8.028589999084293e-05, "loss": 0.4766, "step": 39390 }, { "epoch": 9.752475247524753, "grad_norm": 0.5544673800468445, "learning_rate": 8.027493524206249e-05, "loss": 0.4765, "step": 39400 }, { "epoch": 9.754950495049505, "grad_norm": 0.5336848497390747, "learning_rate": 8.026396819410054e-05, "loss": 0.4796, "step": 39410 }, { "epoch": 9.757425742574258, "grad_norm": 0.5326798558235168, "learning_rate": 8.025299884778991e-05, "loss": 0.4727, "step": 39420 }, { "epoch": 9.759900990099009, "grad_norm": 0.5314643979072571, "learning_rate": 8.024202720396367e-05, "loss": 0.4752, "step": 39430 }, { "epoch": 9.762376237623762, "grad_norm": 0.5452172160148621, "learning_rate": 8.023105326345506e-05, "loss": 0.4839, "step": 39440 }, { "epoch": 9.764851485148515, "grad_norm": 0.5858799815177917, "learning_rate": 8.022007702709744e-05, "loss": 0.4784, "step": 39450 }, { "epoch": 9.767326732673267, "grad_norm": 0.5487896203994751, "learning_rate": 8.020909849572444e-05, "loss": 0.4795, "step": 39460 }, { "epoch": 9.76980198019802, "grad_norm": 0.5552473068237305, "learning_rate": 8.019811767016973e-05, "loss": 0.472, "step": 39470 }, { "epoch": 9.772277227722773, "grad_norm": 0.5802110433578491, "learning_rate": 8.01871345512673e-05, "loss": 0.4754, "step": 39480 }, { "epoch": 9.774752475247524, "grad_norm": 0.5719156265258789, "learning_rate": 8.017614913985121e-05, "loss": 0.4808, "step": 39490 }, { "epoch": 9.777227722772277, "grad_norm": 0.5488607883453369, "learning_rate": 8.016516143675574e-05, "loss": 0.4756, "step": 39500 }, { "epoch": 9.77970297029703, "grad_norm": 0.5766350030899048, "learning_rate": 8.015417144281534e-05, "loss": 0.4729, "step": 39510 }, { "epoch": 9.782178217821782, "grad_norm": 0.5741209387779236, "learning_rate": 8.01431791588646e-05, "loss": 0.4758, "step": 39520 }, { "epoch": 9.784653465346535, "grad_norm": 0.5515168905258179, "learning_rate": 8.013218458573835e-05, "loss": 0.4738, "step": 39530 }, { "epoch": 9.787128712871286, "grad_norm": 0.5778453946113586, "learning_rate": 8.012118772427153e-05, "loss": 0.4817, "step": 39540 }, { "epoch": 9.78960396039604, "grad_norm": 0.5699706673622131, "learning_rate": 8.011018857529927e-05, "loss": 0.474, "step": 39550 }, { "epoch": 9.792079207920793, "grad_norm": 0.5448683500289917, "learning_rate": 8.009918713965691e-05, "loss": 0.4823, "step": 39560 }, { "epoch": 9.794554455445544, "grad_norm": 0.568464994430542, "learning_rate": 8.008818341817992e-05, "loss": 0.4782, "step": 39570 }, { "epoch": 9.797029702970297, "grad_norm": 0.5532470941543579, "learning_rate": 8.007717741170398e-05, "loss": 0.4717, "step": 39580 }, { "epoch": 9.799504950495049, "grad_norm": 0.5297067761421204, "learning_rate": 8.006616912106489e-05, "loss": 0.4725, "step": 39590 }, { "epoch": 9.801980198019802, "grad_norm": 0.5607874393463135, "learning_rate": 8.005515854709869e-05, "loss": 0.4798, "step": 39600 }, { "epoch": 9.804455445544555, "grad_norm": 0.5757636427879333, "learning_rate": 8.004414569064154e-05, "loss": 0.4805, "step": 39610 }, { "epoch": 9.806930693069306, "grad_norm": 0.5411014556884766, "learning_rate": 8.003313055252978e-05, "loss": 0.4833, "step": 39620 }, { "epoch": 9.80940594059406, "grad_norm": 0.5570172667503357, "learning_rate": 8.002211313359997e-05, "loss": 0.4805, "step": 39630 }, { "epoch": 9.811881188118813, "grad_norm": 0.5261518359184265, "learning_rate": 8.00110934346888e-05, "loss": 0.4808, "step": 39640 }, { "epoch": 9.814356435643564, "grad_norm": 0.5380529165267944, "learning_rate": 8.000007145663312e-05, "loss": 0.479, "step": 39650 }, { "epoch": 9.816831683168317, "grad_norm": 0.5058093070983887, "learning_rate": 7.998904720027003e-05, "loss": 0.4771, "step": 39660 }, { "epoch": 9.819306930693068, "grad_norm": 0.5232227444648743, "learning_rate": 7.997802066643669e-05, "loss": 0.4745, "step": 39670 }, { "epoch": 9.821782178217822, "grad_norm": 0.5422173738479614, "learning_rate": 7.996699185597052e-05, "loss": 0.4787, "step": 39680 }, { "epoch": 9.824257425742575, "grad_norm": 0.5505812764167786, "learning_rate": 7.99559607697091e-05, "loss": 0.4813, "step": 39690 }, { "epoch": 9.826732673267326, "grad_norm": 0.4985778033733368, "learning_rate": 7.994492740849014e-05, "loss": 0.4738, "step": 39700 }, { "epoch": 9.82920792079208, "grad_norm": 0.518971860408783, "learning_rate": 7.993389177315156e-05, "loss": 0.4742, "step": 39710 }, { "epoch": 9.831683168316832, "grad_norm": 0.5362893342971802, "learning_rate": 7.992285386453145e-05, "loss": 0.4792, "step": 39720 }, { "epoch": 9.834158415841584, "grad_norm": 0.5469619035720825, "learning_rate": 7.991181368346807e-05, "loss": 0.4796, "step": 39730 }, { "epoch": 9.836633663366337, "grad_norm": 0.5630517601966858, "learning_rate": 7.990077123079982e-05, "loss": 0.4783, "step": 39740 }, { "epoch": 9.839108910891088, "grad_norm": 0.5958415865898132, "learning_rate": 7.988972650736534e-05, "loss": 0.4723, "step": 39750 }, { "epoch": 9.841584158415841, "grad_norm": 0.605800211429596, "learning_rate": 7.987867951400338e-05, "loss": 0.4764, "step": 39760 }, { "epoch": 9.844059405940595, "grad_norm": 0.5531159043312073, "learning_rate": 7.98676302515529e-05, "loss": 0.4763, "step": 39770 }, { "epoch": 9.846534653465346, "grad_norm": 0.5631864070892334, "learning_rate": 7.985657872085302e-05, "loss": 0.4761, "step": 39780 }, { "epoch": 9.849009900990099, "grad_norm": 0.5314578413963318, "learning_rate": 7.984552492274301e-05, "loss": 0.4744, "step": 39790 }, { "epoch": 9.851485148514852, "grad_norm": 0.5472233891487122, "learning_rate": 7.983446885806234e-05, "loss": 0.479, "step": 39800 }, { "epoch": 9.853960396039604, "grad_norm": 0.546445906162262, "learning_rate": 7.982341052765065e-05, "loss": 0.4733, "step": 39810 }, { "epoch": 9.856435643564357, "grad_norm": 0.5378692150115967, "learning_rate": 7.981234993234776e-05, "loss": 0.4796, "step": 39820 }, { "epoch": 9.858910891089108, "grad_norm": 0.5913535952568054, "learning_rate": 7.980128707299364e-05, "loss": 0.4763, "step": 39830 }, { "epoch": 9.861386138613861, "grad_norm": 0.5397394299507141, "learning_rate": 7.979022195042842e-05, "loss": 0.4822, "step": 39840 }, { "epoch": 9.863861386138614, "grad_norm": 0.48743894696235657, "learning_rate": 7.977915456549245e-05, "loss": 0.474, "step": 39850 }, { "epoch": 9.866336633663366, "grad_norm": 0.5215013027191162, "learning_rate": 7.976808491902624e-05, "loss": 0.487, "step": 39860 }, { "epoch": 9.868811881188119, "grad_norm": 0.5132582187652588, "learning_rate": 7.975701301187039e-05, "loss": 0.4749, "step": 39870 }, { "epoch": 9.871287128712872, "grad_norm": 0.5157925486564636, "learning_rate": 7.974593884486581e-05, "loss": 0.4741, "step": 39880 }, { "epoch": 9.873762376237623, "grad_norm": 0.5576191544532776, "learning_rate": 7.973486241885346e-05, "loss": 0.4711, "step": 39890 }, { "epoch": 9.876237623762377, "grad_norm": 0.5331785082817078, "learning_rate": 7.972378373467457e-05, "loss": 0.475, "step": 39900 }, { "epoch": 9.878712871287128, "grad_norm": 0.5636085271835327, "learning_rate": 7.971270279317045e-05, "loss": 0.4764, "step": 39910 }, { "epoch": 9.881188118811881, "grad_norm": 0.5482370257377625, "learning_rate": 7.970161959518263e-05, "loss": 0.4784, "step": 39920 }, { "epoch": 9.883663366336634, "grad_norm": 0.5411211848258972, "learning_rate": 7.969053414155282e-05, "loss": 0.4755, "step": 39930 }, { "epoch": 9.886138613861386, "grad_norm": 0.5497320294380188, "learning_rate": 7.967944643312289e-05, "loss": 0.4754, "step": 39940 }, { "epoch": 9.888613861386139, "grad_norm": 0.5515309572219849, "learning_rate": 7.966835647073487e-05, "loss": 0.4793, "step": 39950 }, { "epoch": 9.891089108910892, "grad_norm": 0.5060154795646667, "learning_rate": 7.965726425523096e-05, "loss": 0.4795, "step": 39960 }, { "epoch": 9.893564356435643, "grad_norm": 0.5162692070007324, "learning_rate": 7.964616978745356e-05, "loss": 0.4752, "step": 39970 }, { "epoch": 9.896039603960396, "grad_norm": 0.5161259174346924, "learning_rate": 7.963507306824521e-05, "loss": 0.4744, "step": 39980 }, { "epoch": 9.898514851485148, "grad_norm": 0.5652627944946289, "learning_rate": 7.962397409844864e-05, "loss": 0.4707, "step": 39990 }, { "epoch": 9.900990099009901, "grad_norm": 0.5666871070861816, "learning_rate": 7.961287287890672e-05, "loss": 0.4792, "step": 40000 }, { "epoch": 9.903465346534654, "grad_norm": 0.5654459595680237, "learning_rate": 7.960176941046256e-05, "loss": 0.4736, "step": 40010 }, { "epoch": 9.905940594059405, "grad_norm": 0.5511441230773926, "learning_rate": 7.959066369395936e-05, "loss": 0.4742, "step": 40020 }, { "epoch": 9.908415841584159, "grad_norm": 0.5161470770835876, "learning_rate": 7.957955573024052e-05, "loss": 0.4744, "step": 40030 }, { "epoch": 9.910891089108912, "grad_norm": 0.5317502021789551, "learning_rate": 7.956844552014964e-05, "loss": 0.482, "step": 40040 }, { "epoch": 9.913366336633663, "grad_norm": 0.5686597228050232, "learning_rate": 7.955733306453046e-05, "loss": 0.4696, "step": 40050 }, { "epoch": 9.915841584158416, "grad_norm": 0.5373650789260864, "learning_rate": 7.954621836422689e-05, "loss": 0.4741, "step": 40060 }, { "epoch": 9.918316831683168, "grad_norm": 0.5439413785934448, "learning_rate": 7.953510142008303e-05, "loss": 0.477, "step": 40070 }, { "epoch": 9.92079207920792, "grad_norm": 0.5124827027320862, "learning_rate": 7.952398223294312e-05, "loss": 0.4711, "step": 40080 }, { "epoch": 9.923267326732674, "grad_norm": 0.545612096786499, "learning_rate": 7.95128608036516e-05, "loss": 0.4754, "step": 40090 }, { "epoch": 9.925742574257425, "grad_norm": 0.5687117576599121, "learning_rate": 7.95017371330531e-05, "loss": 0.4721, "step": 40100 }, { "epoch": 9.928217821782178, "grad_norm": 0.5369729995727539, "learning_rate": 7.949061122199233e-05, "loss": 0.4736, "step": 40110 }, { "epoch": 9.930693069306932, "grad_norm": 0.5462371110916138, "learning_rate": 7.947948307131425e-05, "loss": 0.4764, "step": 40120 }, { "epoch": 9.933168316831683, "grad_norm": 0.5716204643249512, "learning_rate": 7.946835268186401e-05, "loss": 0.4841, "step": 40130 }, { "epoch": 9.935643564356436, "grad_norm": 0.549381673336029, "learning_rate": 7.945722005448683e-05, "loss": 0.4813, "step": 40140 }, { "epoch": 9.938118811881187, "grad_norm": 0.5702754855155945, "learning_rate": 7.94460851900282e-05, "loss": 0.4789, "step": 40150 }, { "epoch": 9.94059405940594, "grad_norm": 0.5458805561065674, "learning_rate": 7.943494808933374e-05, "loss": 0.4803, "step": 40160 }, { "epoch": 9.943069306930694, "grad_norm": 0.5848128795623779, "learning_rate": 7.94238087532492e-05, "loss": 0.4786, "step": 40170 }, { "epoch": 9.945544554455445, "grad_norm": 0.520071804523468, "learning_rate": 7.94126671826206e-05, "loss": 0.4763, "step": 40180 }, { "epoch": 9.948019801980198, "grad_norm": 0.5099064111709595, "learning_rate": 7.9401523378294e-05, "loss": 0.4786, "step": 40190 }, { "epoch": 9.950495049504951, "grad_norm": 0.5690998435020447, "learning_rate": 7.939037734111573e-05, "loss": 0.4811, "step": 40200 }, { "epoch": 9.952970297029703, "grad_norm": 0.5369478464126587, "learning_rate": 7.93792290719323e-05, "loss": 0.4824, "step": 40210 }, { "epoch": 9.955445544554456, "grad_norm": 0.5176922678947449, "learning_rate": 7.936807857159026e-05, "loss": 0.4746, "step": 40220 }, { "epoch": 9.957920792079207, "grad_norm": 0.54506516456604, "learning_rate": 7.93569258409365e-05, "loss": 0.475, "step": 40230 }, { "epoch": 9.96039603960396, "grad_norm": 0.5668498277664185, "learning_rate": 7.934577088081795e-05, "loss": 0.4803, "step": 40240 }, { "epoch": 9.962871287128714, "grad_norm": 0.5850801467895508, "learning_rate": 7.933461369208177e-05, "loss": 0.4766, "step": 40250 }, { "epoch": 9.965346534653465, "grad_norm": 0.5786815881729126, "learning_rate": 7.932345427557526e-05, "loss": 0.4764, "step": 40260 }, { "epoch": 9.967821782178218, "grad_norm": 0.5890253782272339, "learning_rate": 7.931229263214592e-05, "loss": 0.4803, "step": 40270 }, { "epoch": 9.97029702970297, "grad_norm": 0.5707659125328064, "learning_rate": 7.930112876264142e-05, "loss": 0.4761, "step": 40280 }, { "epoch": 9.972772277227723, "grad_norm": 0.5725079774856567, "learning_rate": 7.928996266790953e-05, "loss": 0.4761, "step": 40290 }, { "epoch": 9.975247524752476, "grad_norm": 0.5748230814933777, "learning_rate": 7.92787943487983e-05, "loss": 0.4832, "step": 40300 }, { "epoch": 9.977722772277227, "grad_norm": 0.5541805624961853, "learning_rate": 7.926762380615584e-05, "loss": 0.4711, "step": 40310 }, { "epoch": 9.98019801980198, "grad_norm": 0.5754391551017761, "learning_rate": 7.925645104083051e-05, "loss": 0.4747, "step": 40320 }, { "epoch": 9.982673267326733, "grad_norm": 0.5563113689422607, "learning_rate": 7.92452760536708e-05, "loss": 0.4753, "step": 40330 }, { "epoch": 9.985148514851485, "grad_norm": 0.5672330856323242, "learning_rate": 7.923409884552537e-05, "loss": 0.4803, "step": 40340 }, { "epoch": 9.987623762376238, "grad_norm": 0.5651419758796692, "learning_rate": 7.922291941724307e-05, "loss": 0.4766, "step": 40350 }, { "epoch": 9.990099009900991, "grad_norm": 0.5212990045547485, "learning_rate": 7.921173776967291e-05, "loss": 0.4789, "step": 40360 }, { "epoch": 9.992574257425742, "grad_norm": 0.5716111063957214, "learning_rate": 7.920055390366401e-05, "loss": 0.4825, "step": 40370 }, { "epoch": 9.995049504950495, "grad_norm": 0.5237804651260376, "learning_rate": 7.918936782006576e-05, "loss": 0.4758, "step": 40380 }, { "epoch": 9.997524752475247, "grad_norm": 0.48501089215278625, "learning_rate": 7.917817951972766e-05, "loss": 0.4785, "step": 40390 }, { "epoch": 10.0, "grad_norm": 0.533288300037384, "learning_rate": 7.91669890034994e-05, "loss": 0.4799, "step": 40400 }, { "epoch": 10.002475247524753, "grad_norm": 0.5452917814254761, "learning_rate": 7.915579627223079e-05, "loss": 0.4756, "step": 40410 }, { "epoch": 10.004950495049505, "grad_norm": 0.5728321671485901, "learning_rate": 7.914460132677188e-05, "loss": 0.4715, "step": 40420 }, { "epoch": 10.007425742574258, "grad_norm": 0.5458479523658752, "learning_rate": 7.913340416797284e-05, "loss": 0.483, "step": 40430 }, { "epoch": 10.009900990099009, "grad_norm": 0.5625064969062805, "learning_rate": 7.9122204796684e-05, "loss": 0.4809, "step": 40440 }, { "epoch": 10.012376237623762, "grad_norm": 0.536502480506897, "learning_rate": 7.91110032137559e-05, "loss": 0.4773, "step": 40450 }, { "epoch": 10.014851485148515, "grad_norm": 0.5393995046615601, "learning_rate": 7.909979942003922e-05, "loss": 0.4785, "step": 40460 }, { "epoch": 10.017326732673267, "grad_norm": 0.5401214957237244, "learning_rate": 7.908859341638484e-05, "loss": 0.4757, "step": 40470 }, { "epoch": 10.01980198019802, "grad_norm": 0.5263001322746277, "learning_rate": 7.907738520364371e-05, "loss": 0.4757, "step": 40480 }, { "epoch": 10.022277227722773, "grad_norm": 0.5509888529777527, "learning_rate": 7.906617478266712e-05, "loss": 0.4767, "step": 40490 }, { "epoch": 10.024752475247524, "grad_norm": 0.5915299654006958, "learning_rate": 7.905496215430635e-05, "loss": 0.4769, "step": 40500 }, { "epoch": 10.027227722772277, "grad_norm": 0.5598218441009521, "learning_rate": 7.904374731941295e-05, "loss": 0.4735, "step": 40510 }, { "epoch": 10.029702970297029, "grad_norm": 0.5302037596702576, "learning_rate": 7.90325302788386e-05, "loss": 0.4753, "step": 40520 }, { "epoch": 10.032178217821782, "grad_norm": 0.5290553569793701, "learning_rate": 7.90213110334352e-05, "loss": 0.4746, "step": 40530 }, { "epoch": 10.034653465346535, "grad_norm": 0.5619435906410217, "learning_rate": 7.901008958405474e-05, "loss": 0.4709, "step": 40540 }, { "epoch": 10.037128712871286, "grad_norm": 0.5260089039802551, "learning_rate": 7.899886593154941e-05, "loss": 0.4804, "step": 40550 }, { "epoch": 10.03960396039604, "grad_norm": 0.5392195582389832, "learning_rate": 7.898764007677162e-05, "loss": 0.4771, "step": 40560 }, { "epoch": 10.042079207920793, "grad_norm": 0.5478048920631409, "learning_rate": 7.897641202057385e-05, "loss": 0.4785, "step": 40570 }, { "epoch": 10.044554455445544, "grad_norm": 0.5634122490882874, "learning_rate": 7.896518176380881e-05, "loss": 0.4791, "step": 40580 }, { "epoch": 10.047029702970297, "grad_norm": 0.5338401794433594, "learning_rate": 7.895394930732937e-05, "loss": 0.4767, "step": 40590 }, { "epoch": 10.049504950495049, "grad_norm": 0.5424391627311707, "learning_rate": 7.894271465198857e-05, "loss": 0.4777, "step": 40600 }, { "epoch": 10.051980198019802, "grad_norm": 0.5177606344223022, "learning_rate": 7.89314777986396e-05, "loss": 0.4727, "step": 40610 }, { "epoch": 10.054455445544555, "grad_norm": 0.5206916928291321, "learning_rate": 7.892023874813581e-05, "loss": 0.4739, "step": 40620 }, { "epoch": 10.056930693069306, "grad_norm": 0.5248638987541199, "learning_rate": 7.890899750133075e-05, "loss": 0.4763, "step": 40630 }, { "epoch": 10.05940594059406, "grad_norm": 0.5140098929405212, "learning_rate": 7.88977540590781e-05, "loss": 0.478, "step": 40640 }, { "epoch": 10.061881188118813, "grad_norm": 0.5442147254943848, "learning_rate": 7.888650842223177e-05, "loss": 0.4761, "step": 40650 }, { "epoch": 10.064356435643564, "grad_norm": 0.5328007936477661, "learning_rate": 7.887526059164572e-05, "loss": 0.4765, "step": 40660 }, { "epoch": 10.066831683168317, "grad_norm": 0.5140489935874939, "learning_rate": 7.886401056817423e-05, "loss": 0.4798, "step": 40670 }, { "epoch": 10.069306930693068, "grad_norm": 0.5469507575035095, "learning_rate": 7.885275835267159e-05, "loss": 0.4743, "step": 40680 }, { "epoch": 10.071782178217822, "grad_norm": 0.5507546067237854, "learning_rate": 7.884150394599237e-05, "loss": 0.4768, "step": 40690 }, { "epoch": 10.074257425742575, "grad_norm": 0.5300948023796082, "learning_rate": 7.883024734899127e-05, "loss": 0.4732, "step": 40700 }, { "epoch": 10.076732673267326, "grad_norm": 0.5391700863838196, "learning_rate": 7.881898856252314e-05, "loss": 0.4794, "step": 40710 }, { "epoch": 10.07920792079208, "grad_norm": 0.5528740286827087, "learning_rate": 7.880772758744302e-05, "loss": 0.4765, "step": 40720 }, { "epoch": 10.081683168316832, "grad_norm": 0.5387889742851257, "learning_rate": 7.879646442460611e-05, "loss": 0.4816, "step": 40730 }, { "epoch": 10.084158415841584, "grad_norm": 0.5489816665649414, "learning_rate": 7.878519907486777e-05, "loss": 0.4833, "step": 40740 }, { "epoch": 10.086633663366337, "grad_norm": 0.5483787655830383, "learning_rate": 7.877393153908353e-05, "loss": 0.4788, "step": 40750 }, { "epoch": 10.089108910891088, "grad_norm": 0.5576854944229126, "learning_rate": 7.876266181810905e-05, "loss": 0.4767, "step": 40760 }, { "epoch": 10.091584158415841, "grad_norm": 0.5554502010345459, "learning_rate": 7.875138991280024e-05, "loss": 0.481, "step": 40770 }, { "epoch": 10.094059405940595, "grad_norm": 0.5808145403862, "learning_rate": 7.874011582401312e-05, "loss": 0.482, "step": 40780 }, { "epoch": 10.096534653465346, "grad_norm": 0.5387398600578308, "learning_rate": 7.872883955260387e-05, "loss": 0.4705, "step": 40790 }, { "epoch": 10.099009900990099, "grad_norm": 0.5430834293365479, "learning_rate": 7.871756109942883e-05, "loss": 0.4764, "step": 40800 }, { "epoch": 10.101485148514852, "grad_norm": 0.5312401652336121, "learning_rate": 7.870628046534457e-05, "loss": 0.4773, "step": 40810 }, { "epoch": 10.103960396039604, "grad_norm": 0.5284324288368225, "learning_rate": 7.869499765120774e-05, "loss": 0.4753, "step": 40820 }, { "epoch": 10.106435643564357, "grad_norm": 0.5471721291542053, "learning_rate": 7.86837126578752e-05, "loss": 0.4767, "step": 40830 }, { "epoch": 10.108910891089108, "grad_norm": 0.5220054984092712, "learning_rate": 7.867242548620399e-05, "loss": 0.4744, "step": 40840 }, { "epoch": 10.111386138613861, "grad_norm": 0.5410345792770386, "learning_rate": 7.866113613705128e-05, "loss": 0.4764, "step": 40850 }, { "epoch": 10.113861386138614, "grad_norm": 0.548473060131073, "learning_rate": 7.864984461127443e-05, "loss": 0.4754, "step": 40860 }, { "epoch": 10.116336633663366, "grad_norm": 0.5760980248451233, "learning_rate": 7.863855090973096e-05, "loss": 0.4765, "step": 40870 }, { "epoch": 10.118811881188119, "grad_norm": 0.5555658936500549, "learning_rate": 7.862725503327854e-05, "loss": 0.4785, "step": 40880 }, { "epoch": 10.121287128712872, "grad_norm": 0.5203664302825928, "learning_rate": 7.861595698277503e-05, "loss": 0.4779, "step": 40890 }, { "epoch": 10.123762376237623, "grad_norm": 0.5055129528045654, "learning_rate": 7.860465675907842e-05, "loss": 0.4777, "step": 40900 }, { "epoch": 10.126237623762377, "grad_norm": 0.5380151271820068, "learning_rate": 7.859335436304692e-05, "loss": 0.4739, "step": 40910 }, { "epoch": 10.128712871287128, "grad_norm": 0.5638600587844849, "learning_rate": 7.858204979553885e-05, "loss": 0.4747, "step": 40920 }, { "epoch": 10.131188118811881, "grad_norm": 0.5259812474250793, "learning_rate": 7.857074305741273e-05, "loss": 0.4733, "step": 40930 }, { "epoch": 10.133663366336634, "grad_norm": 0.5224277377128601, "learning_rate": 7.855943414952722e-05, "loss": 0.4744, "step": 40940 }, { "epoch": 10.136138613861386, "grad_norm": 0.5016710162162781, "learning_rate": 7.854812307274116e-05, "loss": 0.4756, "step": 40950 }, { "epoch": 10.138613861386139, "grad_norm": 0.5056447982788086, "learning_rate": 7.853680982791356e-05, "loss": 0.4725, "step": 40960 }, { "epoch": 10.141089108910892, "grad_norm": 0.5187898278236389, "learning_rate": 7.852549441590357e-05, "loss": 0.476, "step": 40970 }, { "epoch": 10.143564356435643, "grad_norm": 0.5136454105377197, "learning_rate": 7.851417683757053e-05, "loss": 0.4772, "step": 40980 }, { "epoch": 10.146039603960396, "grad_norm": 0.5225532054901123, "learning_rate": 7.850285709377397e-05, "loss": 0.4734, "step": 40990 }, { "epoch": 10.148514851485148, "grad_norm": 0.5345913171768188, "learning_rate": 7.849153518537347e-05, "loss": 0.4725, "step": 41000 }, { "epoch": 10.150990099009901, "grad_norm": 0.5260655879974365, "learning_rate": 7.848021111322892e-05, "loss": 0.4742, "step": 41010 }, { "epoch": 10.153465346534654, "grad_norm": 0.5632960200309753, "learning_rate": 7.84688848782003e-05, "loss": 0.4728, "step": 41020 }, { "epoch": 10.155940594059405, "grad_norm": 0.5289061665534973, "learning_rate": 7.845755648114773e-05, "loss": 0.4759, "step": 41030 }, { "epoch": 10.158415841584159, "grad_norm": 0.5473327040672302, "learning_rate": 7.844622592293155e-05, "loss": 0.4815, "step": 41040 }, { "epoch": 10.160891089108912, "grad_norm": 0.5408180952072144, "learning_rate": 7.843489320441226e-05, "loss": 0.4836, "step": 41050 }, { "epoch": 10.163366336633663, "grad_norm": 0.5138399600982666, "learning_rate": 7.842355832645048e-05, "loss": 0.4722, "step": 41060 }, { "epoch": 10.165841584158416, "grad_norm": 0.5302850604057312, "learning_rate": 7.841222128990702e-05, "loss": 0.4715, "step": 41070 }, { "epoch": 10.168316831683168, "grad_norm": 0.5197048783302307, "learning_rate": 7.840088209564285e-05, "loss": 0.4751, "step": 41080 }, { "epoch": 10.17079207920792, "grad_norm": 0.5329477787017822, "learning_rate": 7.838954074451913e-05, "loss": 0.4811, "step": 41090 }, { "epoch": 10.173267326732674, "grad_norm": 0.5155958533287048, "learning_rate": 7.837819723739714e-05, "loss": 0.4741, "step": 41100 }, { "epoch": 10.175742574257425, "grad_norm": 0.5328543782234192, "learning_rate": 7.836685157513834e-05, "loss": 0.4782, "step": 41110 }, { "epoch": 10.178217821782178, "grad_norm": 0.5678436160087585, "learning_rate": 7.835550375860436e-05, "loss": 0.479, "step": 41120 }, { "epoch": 10.180693069306932, "grad_norm": 0.5381492972373962, "learning_rate": 7.834415378865703e-05, "loss": 0.4711, "step": 41130 }, { "epoch": 10.183168316831683, "grad_norm": 0.5650216341018677, "learning_rate": 7.833280166615826e-05, "loss": 0.4794, "step": 41140 }, { "epoch": 10.185643564356436, "grad_norm": 0.5396777391433716, "learning_rate": 7.832144739197016e-05, "loss": 0.4794, "step": 41150 }, { "epoch": 10.188118811881187, "grad_norm": 0.5676851868629456, "learning_rate": 7.831009096695508e-05, "loss": 0.4807, "step": 41160 }, { "epoch": 10.19059405940594, "grad_norm": 0.5564223527908325, "learning_rate": 7.829873239197538e-05, "loss": 0.4759, "step": 41170 }, { "epoch": 10.193069306930694, "grad_norm": 0.5675831437110901, "learning_rate": 7.828737166789373e-05, "loss": 0.4768, "step": 41180 }, { "epoch": 10.195544554455445, "grad_norm": 0.5162363648414612, "learning_rate": 7.827600879557288e-05, "loss": 0.476, "step": 41190 }, { "epoch": 10.198019801980198, "grad_norm": 0.5829764604568481, "learning_rate": 7.826464377587577e-05, "loss": 0.4793, "step": 41200 }, { "epoch": 10.200495049504951, "grad_norm": 0.6050111055374146, "learning_rate": 7.825327660966551e-05, "loss": 0.4819, "step": 41210 }, { "epoch": 10.202970297029703, "grad_norm": 0.5353959202766418, "learning_rate": 7.824190729780531e-05, "loss": 0.4803, "step": 41220 }, { "epoch": 10.205445544554456, "grad_norm": 0.5169737339019775, "learning_rate": 7.823053584115866e-05, "loss": 0.474, "step": 41230 }, { "epoch": 10.207920792079207, "grad_norm": 0.5459899306297302, "learning_rate": 7.821916224058912e-05, "loss": 0.4784, "step": 41240 }, { "epoch": 10.21039603960396, "grad_norm": 0.5092642307281494, "learning_rate": 7.820778649696042e-05, "loss": 0.4799, "step": 41250 }, { "epoch": 10.212871287128714, "grad_norm": 0.5145877003669739, "learning_rate": 7.819640861113651e-05, "loss": 0.4751, "step": 41260 }, { "epoch": 10.215346534653465, "grad_norm": 0.5453490018844604, "learning_rate": 7.818502858398143e-05, "loss": 0.4784, "step": 41270 }, { "epoch": 10.217821782178218, "grad_norm": 0.5278588533401489, "learning_rate": 7.817364641635943e-05, "loss": 0.4777, "step": 41280 }, { "epoch": 10.220297029702971, "grad_norm": 0.5392113327980042, "learning_rate": 7.816226210913493e-05, "loss": 0.4759, "step": 41290 }, { "epoch": 10.222772277227723, "grad_norm": 0.5283282399177551, "learning_rate": 7.815087566317247e-05, "loss": 0.4771, "step": 41300 }, { "epoch": 10.225247524752476, "grad_norm": 0.5453376173973083, "learning_rate": 7.813948707933678e-05, "loss": 0.4726, "step": 41310 }, { "epoch": 10.227722772277227, "grad_norm": 0.543670654296875, "learning_rate": 7.812809635849277e-05, "loss": 0.4743, "step": 41320 }, { "epoch": 10.23019801980198, "grad_norm": 0.5620225667953491, "learning_rate": 7.811670350150545e-05, "loss": 0.4724, "step": 41330 }, { "epoch": 10.232673267326733, "grad_norm": 0.5210427641868591, "learning_rate": 7.810530850924004e-05, "loss": 0.4744, "step": 41340 }, { "epoch": 10.235148514851485, "grad_norm": 0.5174251794815063, "learning_rate": 7.809391138256196e-05, "loss": 0.4802, "step": 41350 }, { "epoch": 10.237623762376238, "grad_norm": 0.5703702569007874, "learning_rate": 7.80825121223367e-05, "loss": 0.4756, "step": 41360 }, { "epoch": 10.240099009900991, "grad_norm": 0.546469509601593, "learning_rate": 7.807111072942997e-05, "loss": 0.4813, "step": 41370 }, { "epoch": 10.242574257425742, "grad_norm": 0.5257555842399597, "learning_rate": 7.805970720470763e-05, "loss": 0.4758, "step": 41380 }, { "epoch": 10.245049504950495, "grad_norm": 0.5470194220542908, "learning_rate": 7.804830154903572e-05, "loss": 0.4752, "step": 41390 }, { "epoch": 10.247524752475247, "grad_norm": 0.545159101486206, "learning_rate": 7.803689376328041e-05, "loss": 0.4758, "step": 41400 }, { "epoch": 10.25, "grad_norm": 0.5751661658287048, "learning_rate": 7.802548384830803e-05, "loss": 0.4756, "step": 41410 }, { "epoch": 10.252475247524753, "grad_norm": 0.5303000211715698, "learning_rate": 7.801407180498512e-05, "loss": 0.4784, "step": 41420 }, { "epoch": 10.254950495049505, "grad_norm": 0.5371502041816711, "learning_rate": 7.800265763417832e-05, "loss": 0.4746, "step": 41430 }, { "epoch": 10.257425742574258, "grad_norm": 0.5518530607223511, "learning_rate": 7.799124133675449e-05, "loss": 0.4812, "step": 41440 }, { "epoch": 10.259900990099009, "grad_norm": 0.5180864334106445, "learning_rate": 7.797982291358059e-05, "loss": 0.4691, "step": 41450 }, { "epoch": 10.262376237623762, "grad_norm": 0.5435307025909424, "learning_rate": 7.79684023655238e-05, "loss": 0.4734, "step": 41460 }, { "epoch": 10.264851485148515, "grad_norm": 0.5394755005836487, "learning_rate": 7.795697969345142e-05, "loss": 0.4759, "step": 41470 }, { "epoch": 10.267326732673267, "grad_norm": 0.545628011226654, "learning_rate": 7.794555489823093e-05, "loss": 0.4724, "step": 41480 }, { "epoch": 10.26980198019802, "grad_norm": 0.5815857648849487, "learning_rate": 7.793412798073e-05, "loss": 0.4726, "step": 41490 }, { "epoch": 10.272277227722773, "grad_norm": 0.5125067830085754, "learning_rate": 7.792269894181635e-05, "loss": 0.4751, "step": 41500 }, { "epoch": 10.274752475247524, "grad_norm": 0.5375165939331055, "learning_rate": 7.791126778235802e-05, "loss": 0.4754, "step": 41510 }, { "epoch": 10.277227722772277, "grad_norm": 0.5298402905464172, "learning_rate": 7.78998345032231e-05, "loss": 0.4764, "step": 41520 }, { "epoch": 10.27970297029703, "grad_norm": 0.5146468281745911, "learning_rate": 7.788839910527987e-05, "loss": 0.4782, "step": 41530 }, { "epoch": 10.282178217821782, "grad_norm": 0.5246890783309937, "learning_rate": 7.787696158939677e-05, "loss": 0.4729, "step": 41540 }, { "epoch": 10.284653465346535, "grad_norm": 0.5441969633102417, "learning_rate": 7.78655219564424e-05, "loss": 0.4769, "step": 41550 }, { "epoch": 10.287128712871286, "grad_norm": 0.5321877002716064, "learning_rate": 7.785408020728555e-05, "loss": 0.4769, "step": 41560 }, { "epoch": 10.28960396039604, "grad_norm": 0.4880256950855255, "learning_rate": 7.784263634279511e-05, "loss": 0.4772, "step": 41570 }, { "epoch": 10.292079207920793, "grad_norm": 0.549908459186554, "learning_rate": 7.783119036384022e-05, "loss": 0.4755, "step": 41580 }, { "epoch": 10.294554455445544, "grad_norm": 0.5764902234077454, "learning_rate": 7.781974227129005e-05, "loss": 0.4768, "step": 41590 }, { "epoch": 10.297029702970297, "grad_norm": 0.5470278263092041, "learning_rate": 7.780829206601407e-05, "loss": 0.4701, "step": 41600 }, { "epoch": 10.299504950495049, "grad_norm": 0.557974636554718, "learning_rate": 7.779683974888184e-05, "loss": 0.478, "step": 41610 }, { "epoch": 10.301980198019802, "grad_norm": 0.5632579326629639, "learning_rate": 7.778538532076305e-05, "loss": 0.4771, "step": 41620 }, { "epoch": 10.304455445544555, "grad_norm": 0.5238341093063354, "learning_rate": 7.777392878252763e-05, "loss": 0.4707, "step": 41630 }, { "epoch": 10.306930693069306, "grad_norm": 0.5469651222229004, "learning_rate": 7.77624701350456e-05, "loss": 0.4805, "step": 41640 }, { "epoch": 10.30940594059406, "grad_norm": 0.5551521182060242, "learning_rate": 7.775100937918717e-05, "loss": 0.4723, "step": 41650 }, { "epoch": 10.311881188118813, "grad_norm": 0.5387797355651855, "learning_rate": 7.773954651582275e-05, "loss": 0.4767, "step": 41660 }, { "epoch": 10.314356435643564, "grad_norm": 0.5594796538352966, "learning_rate": 7.77280815458228e-05, "loss": 0.4782, "step": 41670 }, { "epoch": 10.316831683168317, "grad_norm": 0.5906696915626526, "learning_rate": 7.771661447005807e-05, "loss": 0.4864, "step": 41680 }, { "epoch": 10.319306930693068, "grad_norm": 0.5875437259674072, "learning_rate": 7.770514528939938e-05, "loss": 0.4812, "step": 41690 }, { "epoch": 10.321782178217822, "grad_norm": 0.5453423857688904, "learning_rate": 7.769367400471774e-05, "loss": 0.4733, "step": 41700 }, { "epoch": 10.324257425742575, "grad_norm": 0.516240656375885, "learning_rate": 7.768220061688434e-05, "loss": 0.4767, "step": 41710 }, { "epoch": 10.326732673267326, "grad_norm": 0.5504379868507385, "learning_rate": 7.767072512677047e-05, "loss": 0.4753, "step": 41720 }, { "epoch": 10.32920792079208, "grad_norm": 0.5519547462463379, "learning_rate": 7.765924753524765e-05, "loss": 0.4718, "step": 41730 }, { "epoch": 10.331683168316832, "grad_norm": 0.5359838008880615, "learning_rate": 7.764776784318751e-05, "loss": 0.4763, "step": 41740 }, { "epoch": 10.334158415841584, "grad_norm": 0.5501367449760437, "learning_rate": 7.763628605146186e-05, "loss": 0.4714, "step": 41750 }, { "epoch": 10.336633663366337, "grad_norm": 0.5377628803253174, "learning_rate": 7.762480216094265e-05, "loss": 0.4732, "step": 41760 }, { "epoch": 10.339108910891088, "grad_norm": 0.5415878891944885, "learning_rate": 7.761331617250204e-05, "loss": 0.4758, "step": 41770 }, { "epoch": 10.341584158415841, "grad_norm": 0.5502418279647827, "learning_rate": 7.760182808701231e-05, "loss": 0.4738, "step": 41780 }, { "epoch": 10.344059405940595, "grad_norm": 0.515643298625946, "learning_rate": 7.759033790534587e-05, "loss": 0.4723, "step": 41790 }, { "epoch": 10.346534653465346, "grad_norm": 0.5579708218574524, "learning_rate": 7.757884562837537e-05, "loss": 0.478, "step": 41800 }, { "epoch": 10.349009900990099, "grad_norm": 0.5305255651473999, "learning_rate": 7.756735125697352e-05, "loss": 0.475, "step": 41810 }, { "epoch": 10.351485148514852, "grad_norm": 0.5473690032958984, "learning_rate": 7.75558547920133e-05, "loss": 0.4783, "step": 41820 }, { "epoch": 10.353960396039604, "grad_norm": 0.5708884596824646, "learning_rate": 7.754435623436776e-05, "loss": 0.4829, "step": 41830 }, { "epoch": 10.356435643564357, "grad_norm": 0.5216203927993774, "learning_rate": 7.753285558491012e-05, "loss": 0.4756, "step": 41840 }, { "epoch": 10.358910891089108, "grad_norm": 0.5487781763076782, "learning_rate": 7.752135284451381e-05, "loss": 0.471, "step": 41850 }, { "epoch": 10.361386138613861, "grad_norm": 0.5184012651443481, "learning_rate": 7.750984801405236e-05, "loss": 0.4746, "step": 41860 }, { "epoch": 10.363861386138614, "grad_norm": 0.5264614224433899, "learning_rate": 7.749834109439953e-05, "loss": 0.475, "step": 41870 }, { "epoch": 10.366336633663366, "grad_norm": 0.5521541833877563, "learning_rate": 7.748683208642915e-05, "loss": 0.4798, "step": 41880 }, { "epoch": 10.368811881188119, "grad_norm": 0.5347840785980225, "learning_rate": 7.747532099101529e-05, "loss": 0.4755, "step": 41890 }, { "epoch": 10.371287128712872, "grad_norm": 0.5383399724960327, "learning_rate": 7.74638078090321e-05, "loss": 0.4727, "step": 41900 }, { "epoch": 10.373762376237623, "grad_norm": 0.5718905925750732, "learning_rate": 7.745229254135395e-05, "loss": 0.473, "step": 41910 }, { "epoch": 10.376237623762377, "grad_norm": 0.5291493535041809, "learning_rate": 7.744077518885537e-05, "loss": 0.4738, "step": 41920 }, { "epoch": 10.378712871287128, "grad_norm": 0.534907877445221, "learning_rate": 7.7429255752411e-05, "loss": 0.478, "step": 41930 }, { "epoch": 10.381188118811881, "grad_norm": 0.5293603539466858, "learning_rate": 7.741773423289567e-05, "loss": 0.4783, "step": 41940 }, { "epoch": 10.383663366336634, "grad_norm": 0.49417543411254883, "learning_rate": 7.740621063118438e-05, "loss": 0.4773, "step": 41950 }, { "epoch": 10.386138613861386, "grad_norm": 0.5286256074905396, "learning_rate": 7.739468494815224e-05, "loss": 0.4713, "step": 41960 }, { "epoch": 10.388613861386139, "grad_norm": 0.5323520302772522, "learning_rate": 7.738315718467457e-05, "loss": 0.4753, "step": 41970 }, { "epoch": 10.391089108910892, "grad_norm": 0.5376715660095215, "learning_rate": 7.737162734162682e-05, "loss": 0.4801, "step": 41980 }, { "epoch": 10.393564356435643, "grad_norm": 0.5289390087127686, "learning_rate": 7.736009541988463e-05, "loss": 0.4767, "step": 41990 }, { "epoch": 10.396039603960396, "grad_norm": 0.5139478445053101, "learning_rate": 7.734856142032374e-05, "loss": 0.4781, "step": 42000 }, { "epoch": 10.398514851485148, "grad_norm": 0.5239613652229309, "learning_rate": 7.73370253438201e-05, "loss": 0.4804, "step": 42010 }, { "epoch": 10.400990099009901, "grad_norm": 0.5071950554847717, "learning_rate": 7.73254871912498e-05, "loss": 0.4753, "step": 42020 }, { "epoch": 10.403465346534654, "grad_norm": 0.5260604619979858, "learning_rate": 7.731394696348907e-05, "loss": 0.4784, "step": 42030 }, { "epoch": 10.405940594059405, "grad_norm": 0.5646606087684631, "learning_rate": 7.730240466141432e-05, "loss": 0.4703, "step": 42040 }, { "epoch": 10.408415841584159, "grad_norm": 0.5443940162658691, "learning_rate": 7.729086028590213e-05, "loss": 0.4776, "step": 42050 }, { "epoch": 10.410891089108912, "grad_norm": 0.5382000207901001, "learning_rate": 7.72793138378292e-05, "loss": 0.4757, "step": 42060 }, { "epoch": 10.413366336633663, "grad_norm": 0.5423523187637329, "learning_rate": 7.72677653180724e-05, "loss": 0.478, "step": 42070 }, { "epoch": 10.415841584158416, "grad_norm": 0.5296427011489868, "learning_rate": 7.725621472750878e-05, "loss": 0.4739, "step": 42080 }, { "epoch": 10.418316831683168, "grad_norm": 0.5043834447860718, "learning_rate": 7.724466206701554e-05, "loss": 0.4808, "step": 42090 }, { "epoch": 10.42079207920792, "grad_norm": 0.5509619116783142, "learning_rate": 7.723310733747002e-05, "loss": 0.4731, "step": 42100 }, { "epoch": 10.423267326732674, "grad_norm": 0.5086851119995117, "learning_rate": 7.722155053974969e-05, "loss": 0.4764, "step": 42110 }, { "epoch": 10.425742574257425, "grad_norm": 0.5319629907608032, "learning_rate": 7.720999167473227e-05, "loss": 0.4729, "step": 42120 }, { "epoch": 10.428217821782178, "grad_norm": 0.575815737247467, "learning_rate": 7.719843074329554e-05, "loss": 0.4756, "step": 42130 }, { "epoch": 10.430693069306932, "grad_norm": 0.588313639163971, "learning_rate": 7.71868677463175e-05, "loss": 0.4808, "step": 42140 }, { "epoch": 10.433168316831683, "grad_norm": 0.5551357865333557, "learning_rate": 7.717530268467625e-05, "loss": 0.4779, "step": 42150 }, { "epoch": 10.435643564356436, "grad_norm": 0.5379621386528015, "learning_rate": 7.716373555925011e-05, "loss": 0.4774, "step": 42160 }, { "epoch": 10.438118811881187, "grad_norm": 0.5349875092506409, "learning_rate": 7.715216637091754e-05, "loss": 0.4715, "step": 42170 }, { "epoch": 10.44059405940594, "grad_norm": 0.497869610786438, "learning_rate": 7.714059512055712e-05, "loss": 0.4711, "step": 42180 }, { "epoch": 10.443069306930694, "grad_norm": 0.5820611715316772, "learning_rate": 7.71290218090476e-05, "loss": 0.4766, "step": 42190 }, { "epoch": 10.445544554455445, "grad_norm": 0.525193989276886, "learning_rate": 7.71174464372679e-05, "loss": 0.4802, "step": 42200 }, { "epoch": 10.448019801980198, "grad_norm": 0.5289748311042786, "learning_rate": 7.710586900609712e-05, "loss": 0.4682, "step": 42210 }, { "epoch": 10.450495049504951, "grad_norm": 0.5400595664978027, "learning_rate": 7.709428951641447e-05, "loss": 0.4753, "step": 42220 }, { "epoch": 10.452970297029703, "grad_norm": 0.5321012139320374, "learning_rate": 7.708270796909934e-05, "loss": 0.4849, "step": 42230 }, { "epoch": 10.455445544554456, "grad_norm": 0.505759060382843, "learning_rate": 7.707112436503126e-05, "loss": 0.4754, "step": 42240 }, { "epoch": 10.457920792079207, "grad_norm": 0.5540057420730591, "learning_rate": 7.705953870508994e-05, "loss": 0.4787, "step": 42250 }, { "epoch": 10.46039603960396, "grad_norm": 0.5315214991569519, "learning_rate": 7.704795099015523e-05, "loss": 0.4763, "step": 42260 }, { "epoch": 10.462871287128714, "grad_norm": 0.5282173752784729, "learning_rate": 7.703636122110716e-05, "loss": 0.4726, "step": 42270 }, { "epoch": 10.465346534653465, "grad_norm": 0.4905223548412323, "learning_rate": 7.702476939882586e-05, "loss": 0.4722, "step": 42280 }, { "epoch": 10.467821782178218, "grad_norm": 0.5277026891708374, "learning_rate": 7.701317552419169e-05, "loss": 0.4743, "step": 42290 }, { "epoch": 10.47029702970297, "grad_norm": 0.5237539410591125, "learning_rate": 7.700157959808509e-05, "loss": 0.4716, "step": 42300 }, { "epoch": 10.472772277227723, "grad_norm": 0.5818616151809692, "learning_rate": 7.698998162138673e-05, "loss": 0.4746, "step": 42310 }, { "epoch": 10.475247524752476, "grad_norm": 0.5605331659317017, "learning_rate": 7.697838159497739e-05, "loss": 0.4793, "step": 42320 }, { "epoch": 10.477722772277227, "grad_norm": 0.5460197329521179, "learning_rate": 7.696677951973799e-05, "loss": 0.4721, "step": 42330 }, { "epoch": 10.48019801980198, "grad_norm": 0.5377420783042908, "learning_rate": 7.695517539654966e-05, "loss": 0.4744, "step": 42340 }, { "epoch": 10.482673267326733, "grad_norm": 0.5441377758979797, "learning_rate": 7.694356922629365e-05, "loss": 0.476, "step": 42350 }, { "epoch": 10.485148514851485, "grad_norm": 0.5918534398078918, "learning_rate": 7.693196100985137e-05, "loss": 0.4777, "step": 42360 }, { "epoch": 10.487623762376238, "grad_norm": 0.528217613697052, "learning_rate": 7.692035074810439e-05, "loss": 0.4725, "step": 42370 }, { "epoch": 10.490099009900991, "grad_norm": 0.5718857049942017, "learning_rate": 7.690873844193442e-05, "loss": 0.478, "step": 42380 }, { "epoch": 10.492574257425742, "grad_norm": 0.5206720232963562, "learning_rate": 7.689712409222333e-05, "loss": 0.4789, "step": 42390 }, { "epoch": 10.495049504950495, "grad_norm": 0.49472159147262573, "learning_rate": 7.688550769985321e-05, "loss": 0.4696, "step": 42400 }, { "epoch": 10.497524752475247, "grad_norm": 0.5308453440666199, "learning_rate": 7.687388926570619e-05, "loss": 0.4734, "step": 42410 }, { "epoch": 10.5, "grad_norm": 0.5479339361190796, "learning_rate": 7.686226879066465e-05, "loss": 0.4744, "step": 42420 }, { "epoch": 10.502475247524753, "grad_norm": 0.5060535669326782, "learning_rate": 7.685064627561104e-05, "loss": 0.4774, "step": 42430 }, { "epoch": 10.504950495049505, "grad_norm": 0.5419988632202148, "learning_rate": 7.683902172142806e-05, "loss": 0.4783, "step": 42440 }, { "epoch": 10.507425742574258, "grad_norm": 0.5225809216499329, "learning_rate": 7.68273951289985e-05, "loss": 0.4722, "step": 42450 }, { "epoch": 10.509900990099009, "grad_norm": 0.5286439657211304, "learning_rate": 7.681576649920534e-05, "loss": 0.4742, "step": 42460 }, { "epoch": 10.512376237623762, "grad_norm": 0.534591794013977, "learning_rate": 7.680413583293167e-05, "loss": 0.4717, "step": 42470 }, { "epoch": 10.514851485148515, "grad_norm": 0.5422487258911133, "learning_rate": 7.679250313106077e-05, "loss": 0.478, "step": 42480 }, { "epoch": 10.517326732673267, "grad_norm": 0.5058059692382812, "learning_rate": 7.678086839447607e-05, "loss": 0.4759, "step": 42490 }, { "epoch": 10.51980198019802, "grad_norm": 0.5274704098701477, "learning_rate": 7.676923162406115e-05, "loss": 0.4739, "step": 42500 }, { "epoch": 10.522277227722773, "grad_norm": 0.516622006893158, "learning_rate": 7.675759282069977e-05, "loss": 0.4724, "step": 42510 }, { "epoch": 10.524752475247524, "grad_norm": 0.5333063006401062, "learning_rate": 7.674595198527579e-05, "loss": 0.4795, "step": 42520 }, { "epoch": 10.527227722772277, "grad_norm": 0.5615268349647522, "learning_rate": 7.673430911867327e-05, "loss": 0.4738, "step": 42530 }, { "epoch": 10.52970297029703, "grad_norm": 0.5318700075149536, "learning_rate": 7.672266422177639e-05, "loss": 0.4778, "step": 42540 }, { "epoch": 10.532178217821782, "grad_norm": 0.5147452354431152, "learning_rate": 7.671101729546953e-05, "loss": 0.4711, "step": 42550 }, { "epoch": 10.534653465346535, "grad_norm": 0.5478195548057556, "learning_rate": 7.669936834063719e-05, "loss": 0.4808, "step": 42560 }, { "epoch": 10.537128712871286, "grad_norm": 0.4997195899486542, "learning_rate": 7.6687717358164e-05, "loss": 0.471, "step": 42570 }, { "epoch": 10.53960396039604, "grad_norm": 0.520603358745575, "learning_rate": 7.667606434893481e-05, "loss": 0.4769, "step": 42580 }, { "epoch": 10.542079207920793, "grad_norm": 0.5292888283729553, "learning_rate": 7.66644093138346e-05, "loss": 0.474, "step": 42590 }, { "epoch": 10.544554455445544, "grad_norm": 0.5182358026504517, "learning_rate": 7.665275225374845e-05, "loss": 0.4734, "step": 42600 }, { "epoch": 10.547029702970297, "grad_norm": 0.5377001762390137, "learning_rate": 7.664109316956168e-05, "loss": 0.473, "step": 42610 }, { "epoch": 10.549504950495049, "grad_norm": 0.5803555250167847, "learning_rate": 7.662943206215966e-05, "loss": 0.4847, "step": 42620 }, { "epoch": 10.551980198019802, "grad_norm": 0.5883097648620605, "learning_rate": 7.661776893242805e-05, "loss": 0.4721, "step": 42630 }, { "epoch": 10.554455445544555, "grad_norm": 0.5472361445426941, "learning_rate": 7.660610378125253e-05, "loss": 0.4716, "step": 42640 }, { "epoch": 10.556930693069306, "grad_norm": 0.5942164063453674, "learning_rate": 7.659443660951904e-05, "loss": 0.4804, "step": 42650 }, { "epoch": 10.55940594059406, "grad_norm": 0.5096465349197388, "learning_rate": 7.658276741811357e-05, "loss": 0.4744, "step": 42660 }, { "epoch": 10.561881188118813, "grad_norm": 0.5320366024971008, "learning_rate": 7.657109620792236e-05, "loss": 0.4743, "step": 42670 }, { "epoch": 10.564356435643564, "grad_norm": 0.5264715552330017, "learning_rate": 7.655942297983174e-05, "loss": 0.4751, "step": 42680 }, { "epoch": 10.566831683168317, "grad_norm": 0.49025702476501465, "learning_rate": 7.654774773472823e-05, "loss": 0.4686, "step": 42690 }, { "epoch": 10.569306930693068, "grad_norm": 0.5399741530418396, "learning_rate": 7.653607047349848e-05, "loss": 0.4751, "step": 42700 }, { "epoch": 10.571782178217822, "grad_norm": 0.5364018082618713, "learning_rate": 7.65243911970293e-05, "loss": 0.4771, "step": 42710 }, { "epoch": 10.574257425742575, "grad_norm": 0.5308107733726501, "learning_rate": 7.651270990620765e-05, "loss": 0.4831, "step": 42720 }, { "epoch": 10.576732673267326, "grad_norm": 0.4767110347747803, "learning_rate": 7.650102660192065e-05, "loss": 0.4716, "step": 42730 }, { "epoch": 10.57920792079208, "grad_norm": 0.5279591679573059, "learning_rate": 7.648934128505557e-05, "loss": 0.4747, "step": 42740 }, { "epoch": 10.581683168316832, "grad_norm": 0.5161558985710144, "learning_rate": 7.647765395649983e-05, "loss": 0.4762, "step": 42750 }, { "epoch": 10.584158415841584, "grad_norm": 0.5171566605567932, "learning_rate": 7.646596461714101e-05, "loss": 0.4733, "step": 42760 }, { "epoch": 10.586633663366337, "grad_norm": 0.5455465912818909, "learning_rate": 7.645427326786685e-05, "loss": 0.477, "step": 42770 }, { "epoch": 10.589108910891088, "grad_norm": 0.5507078170776367, "learning_rate": 7.64425799095652e-05, "loss": 0.4753, "step": 42780 }, { "epoch": 10.591584158415841, "grad_norm": 0.5282396674156189, "learning_rate": 7.643088454312414e-05, "loss": 0.4791, "step": 42790 }, { "epoch": 10.594059405940595, "grad_norm": 0.5286968946456909, "learning_rate": 7.64191871694318e-05, "loss": 0.477, "step": 42800 }, { "epoch": 10.596534653465346, "grad_norm": 0.549230694770813, "learning_rate": 7.640748778937655e-05, "loss": 0.4757, "step": 42810 }, { "epoch": 10.599009900990099, "grad_norm": 0.5305756330490112, "learning_rate": 7.63957864038469e-05, "loss": 0.48, "step": 42820 }, { "epoch": 10.601485148514852, "grad_norm": 0.510913610458374, "learning_rate": 7.638408301373145e-05, "loss": 0.4683, "step": 42830 }, { "epoch": 10.603960396039604, "grad_norm": 0.5231693387031555, "learning_rate": 7.637237761991901e-05, "loss": 0.4775, "step": 42840 }, { "epoch": 10.606435643564357, "grad_norm": 0.5133885741233826, "learning_rate": 7.636067022329854e-05, "loss": 0.4725, "step": 42850 }, { "epoch": 10.608910891089108, "grad_norm": 0.515579104423523, "learning_rate": 7.634896082475913e-05, "loss": 0.4681, "step": 42860 }, { "epoch": 10.611386138613861, "grad_norm": 0.5158997178077698, "learning_rate": 7.633724942519004e-05, "loss": 0.4738, "step": 42870 }, { "epoch": 10.613861386138614, "grad_norm": 0.5091545581817627, "learning_rate": 7.632553602548065e-05, "loss": 0.474, "step": 42880 }, { "epoch": 10.616336633663366, "grad_norm": 0.5156344771385193, "learning_rate": 7.631382062652057e-05, "loss": 0.4726, "step": 42890 }, { "epoch": 10.618811881188119, "grad_norm": 0.5236343741416931, "learning_rate": 7.630210322919942e-05, "loss": 0.4783, "step": 42900 }, { "epoch": 10.621287128712872, "grad_norm": 0.523165225982666, "learning_rate": 7.629038383440715e-05, "loss": 0.4733, "step": 42910 }, { "epoch": 10.623762376237623, "grad_norm": 0.5028265118598938, "learning_rate": 7.62786624430337e-05, "loss": 0.4746, "step": 42920 }, { "epoch": 10.626237623762377, "grad_norm": 0.5138021111488342, "learning_rate": 7.626693905596926e-05, "loss": 0.4781, "step": 42930 }, { "epoch": 10.628712871287128, "grad_norm": 0.5497408509254456, "learning_rate": 7.625521367410417e-05, "loss": 0.4752, "step": 42940 }, { "epoch": 10.631188118811881, "grad_norm": 0.5386633276939392, "learning_rate": 7.624348629832884e-05, "loss": 0.4776, "step": 42950 }, { "epoch": 10.633663366336634, "grad_norm": 0.5417851805686951, "learning_rate": 7.623175692953395e-05, "loss": 0.4756, "step": 42960 }, { "epoch": 10.636138613861386, "grad_norm": 0.5451120138168335, "learning_rate": 7.622002556861023e-05, "loss": 0.4757, "step": 42970 }, { "epoch": 10.638613861386139, "grad_norm": 0.5511590838432312, "learning_rate": 7.62082922164486e-05, "loss": 0.4768, "step": 42980 }, { "epoch": 10.641089108910892, "grad_norm": 0.5385121703147888, "learning_rate": 7.619655687394012e-05, "loss": 0.4757, "step": 42990 }, { "epoch": 10.643564356435643, "grad_norm": 0.537176787853241, "learning_rate": 7.618481954197605e-05, "loss": 0.4737, "step": 43000 }, { "epoch": 10.646039603960396, "grad_norm": 0.4956647455692291, "learning_rate": 7.617308022144772e-05, "loss": 0.4762, "step": 43010 }, { "epoch": 10.648514851485148, "grad_norm": 0.5943235754966736, "learning_rate": 7.61613389132467e-05, "loss": 0.4752, "step": 43020 }, { "epoch": 10.650990099009901, "grad_norm": 0.5041568875312805, "learning_rate": 7.614959561826462e-05, "loss": 0.4761, "step": 43030 }, { "epoch": 10.653465346534654, "grad_norm": 0.5372958183288574, "learning_rate": 7.613785033739333e-05, "loss": 0.4695, "step": 43040 }, { "epoch": 10.655940594059405, "grad_norm": 0.5579813122749329, "learning_rate": 7.61261030715248e-05, "loss": 0.4732, "step": 43050 }, { "epoch": 10.658415841584159, "grad_norm": 0.517181396484375, "learning_rate": 7.611435382155117e-05, "loss": 0.4717, "step": 43060 }, { "epoch": 10.660891089108912, "grad_norm": 0.5373238921165466, "learning_rate": 7.61026025883647e-05, "loss": 0.4742, "step": 43070 }, { "epoch": 10.663366336633663, "grad_norm": 0.5769635438919067, "learning_rate": 7.609084937285783e-05, "loss": 0.4789, "step": 43080 }, { "epoch": 10.665841584158416, "grad_norm": 0.5175982117652893, "learning_rate": 7.607909417592311e-05, "loss": 0.4717, "step": 43090 }, { "epoch": 10.668316831683168, "grad_norm": 0.5337866544723511, "learning_rate": 7.606733699845331e-05, "loss": 0.4727, "step": 43100 }, { "epoch": 10.67079207920792, "grad_norm": 0.5081475973129272, "learning_rate": 7.60555778413413e-05, "loss": 0.4716, "step": 43110 }, { "epoch": 10.673267326732674, "grad_norm": 0.5414690375328064, "learning_rate": 7.60438167054801e-05, "loss": 0.474, "step": 43120 }, { "epoch": 10.675742574257425, "grad_norm": 0.527337908744812, "learning_rate": 7.603205359176291e-05, "loss": 0.4706, "step": 43130 }, { "epoch": 10.678217821782178, "grad_norm": 0.5239275693893433, "learning_rate": 7.602028850108303e-05, "loss": 0.4732, "step": 43140 }, { "epoch": 10.680693069306932, "grad_norm": 0.5337688326835632, "learning_rate": 7.600852143433397e-05, "loss": 0.4754, "step": 43150 }, { "epoch": 10.683168316831683, "grad_norm": 0.5215974450111389, "learning_rate": 7.599675239240933e-05, "loss": 0.4716, "step": 43160 }, { "epoch": 10.685643564356436, "grad_norm": 0.5150344967842102, "learning_rate": 7.598498137620293e-05, "loss": 0.4771, "step": 43170 }, { "epoch": 10.688118811881187, "grad_norm": 0.4964323937892914, "learning_rate": 7.597320838660865e-05, "loss": 0.4738, "step": 43180 }, { "epoch": 10.69059405940594, "grad_norm": 0.510347306728363, "learning_rate": 7.596143342452064e-05, "loss": 0.4782, "step": 43190 }, { "epoch": 10.693069306930694, "grad_norm": 0.4762367308139801, "learning_rate": 7.594965649083305e-05, "loss": 0.4719, "step": 43200 }, { "epoch": 10.695544554455445, "grad_norm": 0.5520671606063843, "learning_rate": 7.593787758644033e-05, "loss": 0.4716, "step": 43210 }, { "epoch": 10.698019801980198, "grad_norm": 0.5385645627975464, "learning_rate": 7.592609671223698e-05, "loss": 0.4757, "step": 43220 }, { "epoch": 10.700495049504951, "grad_norm": 0.5435175895690918, "learning_rate": 7.591431386911769e-05, "loss": 0.4709, "step": 43230 }, { "epoch": 10.702970297029703, "grad_norm": 0.5291683077812195, "learning_rate": 7.590252905797727e-05, "loss": 0.474, "step": 43240 }, { "epoch": 10.705445544554456, "grad_norm": 0.5182040929794312, "learning_rate": 7.589074227971069e-05, "loss": 0.4743, "step": 43250 }, { "epoch": 10.707920792079207, "grad_norm": 0.5120971202850342, "learning_rate": 7.587895353521314e-05, "loss": 0.4726, "step": 43260 }, { "epoch": 10.71039603960396, "grad_norm": 0.5494726300239563, "learning_rate": 7.586716282537982e-05, "loss": 0.4751, "step": 43270 }, { "epoch": 10.712871287128714, "grad_norm": 0.505828320980072, "learning_rate": 7.58553701511062e-05, "loss": 0.47, "step": 43280 }, { "epoch": 10.715346534653465, "grad_norm": 0.5406468510627747, "learning_rate": 7.584357551328786e-05, "loss": 0.4781, "step": 43290 }, { "epoch": 10.717821782178218, "grad_norm": 0.5690456032752991, "learning_rate": 7.583177891282051e-05, "loss": 0.4728, "step": 43300 }, { "epoch": 10.72029702970297, "grad_norm": 0.5368716716766357, "learning_rate": 7.581998035060003e-05, "loss": 0.4696, "step": 43310 }, { "epoch": 10.722772277227723, "grad_norm": 0.5076979994773865, "learning_rate": 7.580817982752242e-05, "loss": 0.4721, "step": 43320 }, { "epoch": 10.725247524752476, "grad_norm": 0.509958803653717, "learning_rate": 7.579637734448388e-05, "loss": 0.4738, "step": 43330 }, { "epoch": 10.727722772277227, "grad_norm": 0.473467081785202, "learning_rate": 7.578457290238075e-05, "loss": 0.472, "step": 43340 }, { "epoch": 10.73019801980198, "grad_norm": 0.5272178649902344, "learning_rate": 7.577276650210943e-05, "loss": 0.4759, "step": 43350 }, { "epoch": 10.732673267326733, "grad_norm": 0.5038413405418396, "learning_rate": 7.57609581445666e-05, "loss": 0.4745, "step": 43360 }, { "epoch": 10.735148514851485, "grad_norm": 0.5199773907661438, "learning_rate": 7.574914783064899e-05, "loss": 0.4719, "step": 43370 }, { "epoch": 10.737623762376238, "grad_norm": 0.5009911060333252, "learning_rate": 7.573733556125354e-05, "loss": 0.4709, "step": 43380 }, { "epoch": 10.740099009900991, "grad_norm": 0.5365986227989197, "learning_rate": 7.572552133727733e-05, "loss": 0.4714, "step": 43390 }, { "epoch": 10.742574257425742, "grad_norm": 0.4813023805618286, "learning_rate": 7.57137051596175e-05, "loss": 0.4716, "step": 43400 }, { "epoch": 10.745049504950495, "grad_norm": 0.5293781161308289, "learning_rate": 7.570188702917151e-05, "loss": 0.4747, "step": 43410 }, { "epoch": 10.747524752475247, "grad_norm": 0.5208889245986938, "learning_rate": 7.569006694683678e-05, "loss": 0.4718, "step": 43420 }, { "epoch": 10.75, "grad_norm": 0.5189951062202454, "learning_rate": 7.567824491351104e-05, "loss": 0.4733, "step": 43430 }, { "epoch": 10.752475247524753, "grad_norm": 0.5410559773445129, "learning_rate": 7.566642093009203e-05, "loss": 0.4738, "step": 43440 }, { "epoch": 10.754950495049505, "grad_norm": 0.5369808673858643, "learning_rate": 7.565459499747775e-05, "loss": 0.4753, "step": 43450 }, { "epoch": 10.757425742574258, "grad_norm": 0.5570014715194702, "learning_rate": 7.564276711656628e-05, "loss": 0.4735, "step": 43460 }, { "epoch": 10.759900990099009, "grad_norm": 0.5550642013549805, "learning_rate": 7.563093728825587e-05, "loss": 0.4778, "step": 43470 }, { "epoch": 10.762376237623762, "grad_norm": 0.5543648600578308, "learning_rate": 7.561910551344493e-05, "loss": 0.474, "step": 43480 }, { "epoch": 10.764851485148515, "grad_norm": 0.5273292064666748, "learning_rate": 7.560727179303199e-05, "loss": 0.4764, "step": 43490 }, { "epoch": 10.767326732673267, "grad_norm": 0.5114328265190125, "learning_rate": 7.559543612791575e-05, "loss": 0.4808, "step": 43500 }, { "epoch": 10.76980198019802, "grad_norm": 0.5233956575393677, "learning_rate": 7.558359851899506e-05, "loss": 0.4731, "step": 43510 }, { "epoch": 10.772277227722773, "grad_norm": 0.5139607191085815, "learning_rate": 7.557175896716891e-05, "loss": 0.4798, "step": 43520 }, { "epoch": 10.774752475247524, "grad_norm": 0.5205358266830444, "learning_rate": 7.55599174733364e-05, "loss": 0.4736, "step": 43530 }, { "epoch": 10.777227722772277, "grad_norm": 0.52515709400177, "learning_rate": 7.554807403839685e-05, "loss": 0.4749, "step": 43540 }, { "epoch": 10.77970297029703, "grad_norm": 0.5525435209274292, "learning_rate": 7.553622866324967e-05, "loss": 0.4744, "step": 43550 }, { "epoch": 10.782178217821782, "grad_norm": 0.5124194622039795, "learning_rate": 7.552438134879447e-05, "loss": 0.473, "step": 43560 }, { "epoch": 10.784653465346535, "grad_norm": 0.4807736873626709, "learning_rate": 7.551253209593093e-05, "loss": 0.4738, "step": 43570 }, { "epoch": 10.787128712871286, "grad_norm": 0.4985877275466919, "learning_rate": 7.550068090555898e-05, "loss": 0.474, "step": 43580 }, { "epoch": 10.78960396039604, "grad_norm": 0.5177544951438904, "learning_rate": 7.548882777857856e-05, "loss": 0.4748, "step": 43590 }, { "epoch": 10.792079207920793, "grad_norm": 0.49187716841697693, "learning_rate": 7.54769727158899e-05, "loss": 0.4751, "step": 43600 }, { "epoch": 10.794554455445544, "grad_norm": 0.5258334279060364, "learning_rate": 7.546511571839329e-05, "loss": 0.4713, "step": 43610 }, { "epoch": 10.797029702970297, "grad_norm": 0.5381864309310913, "learning_rate": 7.545325678698923e-05, "loss": 0.4757, "step": 43620 }, { "epoch": 10.799504950495049, "grad_norm": 0.5028476715087891, "learning_rate": 7.544139592257826e-05, "loss": 0.4748, "step": 43630 }, { "epoch": 10.801980198019802, "grad_norm": 0.5538946390151978, "learning_rate": 7.542953312606117e-05, "loss": 0.4799, "step": 43640 }, { "epoch": 10.804455445544555, "grad_norm": 0.5526912212371826, "learning_rate": 7.541766839833886e-05, "loss": 0.4783, "step": 43650 }, { "epoch": 10.806930693069306, "grad_norm": 0.5126709938049316, "learning_rate": 7.540580174031239e-05, "loss": 0.4736, "step": 43660 }, { "epoch": 10.80940594059406, "grad_norm": 0.574478268623352, "learning_rate": 7.539393315288293e-05, "loss": 0.4734, "step": 43670 }, { "epoch": 10.811881188118813, "grad_norm": 0.49650266766548157, "learning_rate": 7.538206263695185e-05, "loss": 0.4736, "step": 43680 }, { "epoch": 10.814356435643564, "grad_norm": 0.5222116708755493, "learning_rate": 7.53701901934206e-05, "loss": 0.4772, "step": 43690 }, { "epoch": 10.816831683168317, "grad_norm": 0.509602427482605, "learning_rate": 7.535831582319083e-05, "loss": 0.4709, "step": 43700 }, { "epoch": 10.819306930693068, "grad_norm": 0.5148838758468628, "learning_rate": 7.534643952716435e-05, "loss": 0.4747, "step": 43710 }, { "epoch": 10.821782178217822, "grad_norm": 0.5441877245903015, "learning_rate": 7.533456130624303e-05, "loss": 0.4754, "step": 43720 }, { "epoch": 10.824257425742575, "grad_norm": 0.5193275213241577, "learning_rate": 7.5322681161329e-05, "loss": 0.4719, "step": 43730 }, { "epoch": 10.826732673267326, "grad_norm": 0.498119592666626, "learning_rate": 7.531079909332444e-05, "loss": 0.4764, "step": 43740 }, { "epoch": 10.82920792079208, "grad_norm": 0.5438342690467834, "learning_rate": 7.529891510313172e-05, "loss": 0.4717, "step": 43750 }, { "epoch": 10.831683168316832, "grad_norm": 0.5310576558113098, "learning_rate": 7.528702919165335e-05, "loss": 0.477, "step": 43760 }, { "epoch": 10.834158415841584, "grad_norm": 0.5580030083656311, "learning_rate": 7.527514135979201e-05, "loss": 0.4695, "step": 43770 }, { "epoch": 10.836633663366337, "grad_norm": 0.5331580638885498, "learning_rate": 7.526325160845047e-05, "loss": 0.473, "step": 43780 }, { "epoch": 10.839108910891088, "grad_norm": 0.5131069421768188, "learning_rate": 7.52513599385317e-05, "loss": 0.4722, "step": 43790 }, { "epoch": 10.841584158415841, "grad_norm": 0.5385400056838989, "learning_rate": 7.523946635093878e-05, "loss": 0.4732, "step": 43800 }, { "epoch": 10.844059405940595, "grad_norm": 0.5001948475837708, "learning_rate": 7.522757084657497e-05, "loss": 0.4725, "step": 43810 }, { "epoch": 10.846534653465346, "grad_norm": 0.5294607877731323, "learning_rate": 7.521567342634364e-05, "loss": 0.4765, "step": 43820 }, { "epoch": 10.849009900990099, "grad_norm": 0.4923548400402069, "learning_rate": 7.520377409114831e-05, "loss": 0.4703, "step": 43830 }, { "epoch": 10.851485148514852, "grad_norm": 0.5278708338737488, "learning_rate": 7.519187284189268e-05, "loss": 0.4776, "step": 43840 }, { "epoch": 10.853960396039604, "grad_norm": 0.5155786275863647, "learning_rate": 7.517996967948056e-05, "loss": 0.4694, "step": 43850 }, { "epoch": 10.856435643564357, "grad_norm": 0.5225658416748047, "learning_rate": 7.516806460481592e-05, "loss": 0.4733, "step": 43860 }, { "epoch": 10.858910891089108, "grad_norm": 0.5168792605400085, "learning_rate": 7.515615761880284e-05, "loss": 0.4736, "step": 43870 }, { "epoch": 10.861386138613861, "grad_norm": 0.5638657808303833, "learning_rate": 7.514424872234564e-05, "loss": 0.4715, "step": 43880 }, { "epoch": 10.863861386138614, "grad_norm": 0.5376777052879333, "learning_rate": 7.513233791634866e-05, "loss": 0.4713, "step": 43890 }, { "epoch": 10.866336633663366, "grad_norm": 0.5401655435562134, "learning_rate": 7.512042520171649e-05, "loss": 0.4721, "step": 43900 }, { "epoch": 10.868811881188119, "grad_norm": 0.5247501730918884, "learning_rate": 7.51085105793538e-05, "loss": 0.4752, "step": 43910 }, { "epoch": 10.871287128712872, "grad_norm": 0.5659456253051758, "learning_rate": 7.509659405016544e-05, "loss": 0.4749, "step": 43920 }, { "epoch": 10.873762376237623, "grad_norm": 0.5148149132728577, "learning_rate": 7.508467561505639e-05, "loss": 0.476, "step": 43930 }, { "epoch": 10.876237623762377, "grad_norm": 0.528764009475708, "learning_rate": 7.507275527493174e-05, "loss": 0.473, "step": 43940 }, { "epoch": 10.878712871287128, "grad_norm": 0.529670774936676, "learning_rate": 7.506083303069683e-05, "loss": 0.4786, "step": 43950 }, { "epoch": 10.881188118811881, "grad_norm": 0.5230060815811157, "learning_rate": 7.504890888325702e-05, "loss": 0.471, "step": 43960 }, { "epoch": 10.883663366336634, "grad_norm": 0.5510755777359009, "learning_rate": 7.503698283351789e-05, "loss": 0.4731, "step": 43970 }, { "epoch": 10.886138613861386, "grad_norm": 0.5509275794029236, "learning_rate": 7.502505488238514e-05, "loss": 0.4726, "step": 43980 }, { "epoch": 10.888613861386139, "grad_norm": 0.5547990202903748, "learning_rate": 7.501312503076463e-05, "loss": 0.4815, "step": 43990 }, { "epoch": 10.891089108910892, "grad_norm": 0.5213921070098877, "learning_rate": 7.500119327956234e-05, "loss": 0.4713, "step": 44000 }, { "epoch": 10.893564356435643, "grad_norm": 0.5025900602340698, "learning_rate": 7.498925962968443e-05, "loss": 0.4731, "step": 44010 }, { "epoch": 10.896039603960396, "grad_norm": 0.49957484006881714, "learning_rate": 7.497732408203715e-05, "loss": 0.472, "step": 44020 }, { "epoch": 10.898514851485148, "grad_norm": 0.5019752383232117, "learning_rate": 7.496538663752695e-05, "loss": 0.4765, "step": 44030 }, { "epoch": 10.900990099009901, "grad_norm": 0.5241532325744629, "learning_rate": 7.495344729706038e-05, "loss": 0.4763, "step": 44040 }, { "epoch": 10.903465346534654, "grad_norm": 0.5171858668327332, "learning_rate": 7.494150606154418e-05, "loss": 0.471, "step": 44050 }, { "epoch": 10.905940594059405, "grad_norm": 0.5519861578941345, "learning_rate": 7.492956293188519e-05, "loss": 0.4745, "step": 44060 }, { "epoch": 10.908415841584159, "grad_norm": 0.5416135787963867, "learning_rate": 7.491761790899041e-05, "loss": 0.4741, "step": 44070 }, { "epoch": 10.910891089108912, "grad_norm": 0.5038743615150452, "learning_rate": 7.4905670993767e-05, "loss": 0.4718, "step": 44080 }, { "epoch": 10.913366336633663, "grad_norm": 0.5522492527961731, "learning_rate": 7.489372218712223e-05, "loss": 0.477, "step": 44090 }, { "epoch": 10.915841584158416, "grad_norm": 0.4893510043621063, "learning_rate": 7.488177148996356e-05, "loss": 0.4774, "step": 44100 }, { "epoch": 10.918316831683168, "grad_norm": 0.5473765730857849, "learning_rate": 7.486981890319852e-05, "loss": 0.4792, "step": 44110 }, { "epoch": 10.92079207920792, "grad_norm": 0.5453010201454163, "learning_rate": 7.48578644277349e-05, "loss": 0.4727, "step": 44120 }, { "epoch": 10.923267326732674, "grad_norm": 0.5112728476524353, "learning_rate": 7.484590806448051e-05, "loss": 0.471, "step": 44130 }, { "epoch": 10.925742574257425, "grad_norm": 0.5086286664009094, "learning_rate": 7.483394981434338e-05, "loss": 0.4759, "step": 44140 }, { "epoch": 10.928217821782178, "grad_norm": 0.5148332118988037, "learning_rate": 7.482198967823163e-05, "loss": 0.4795, "step": 44150 }, { "epoch": 10.930693069306932, "grad_norm": 0.5168773531913757, "learning_rate": 7.481002765705358e-05, "loss": 0.4737, "step": 44160 }, { "epoch": 10.933168316831683, "grad_norm": 0.552987813949585, "learning_rate": 7.479806375171766e-05, "loss": 0.4787, "step": 44170 }, { "epoch": 10.935643564356436, "grad_norm": 0.5199519991874695, "learning_rate": 7.478609796313248e-05, "loss": 0.4696, "step": 44180 }, { "epoch": 10.938118811881187, "grad_norm": 0.5089733004570007, "learning_rate": 7.47741302922067e-05, "loss": 0.4677, "step": 44190 }, { "epoch": 10.94059405940594, "grad_norm": 0.5283535718917847, "learning_rate": 7.476216073984923e-05, "loss": 0.4786, "step": 44200 }, { "epoch": 10.943069306930694, "grad_norm": 0.5099077224731445, "learning_rate": 7.475018930696907e-05, "loss": 0.4706, "step": 44210 }, { "epoch": 10.945544554455445, "grad_norm": 0.5360881686210632, "learning_rate": 7.473821599447537e-05, "loss": 0.471, "step": 44220 }, { "epoch": 10.948019801980198, "grad_norm": 0.5248762965202332, "learning_rate": 7.472624080327741e-05, "loss": 0.472, "step": 44230 }, { "epoch": 10.950495049504951, "grad_norm": 0.51844722032547, "learning_rate": 7.471426373428465e-05, "loss": 0.4713, "step": 44240 }, { "epoch": 10.952970297029703, "grad_norm": 0.4902518391609192, "learning_rate": 7.470228478840667e-05, "loss": 0.4744, "step": 44250 }, { "epoch": 10.955445544554456, "grad_norm": 0.5303402543067932, "learning_rate": 7.469030396655316e-05, "loss": 0.4751, "step": 44260 }, { "epoch": 10.957920792079207, "grad_norm": 0.5394173264503479, "learning_rate": 7.467832126963402e-05, "loss": 0.4748, "step": 44270 }, { "epoch": 10.96039603960396, "grad_norm": 0.5213643908500671, "learning_rate": 7.466633669855924e-05, "loss": 0.4713, "step": 44280 }, { "epoch": 10.962871287128714, "grad_norm": 0.5086885094642639, "learning_rate": 7.465435025423898e-05, "loss": 0.4789, "step": 44290 }, { "epoch": 10.965346534653465, "grad_norm": 0.5049903392791748, "learning_rate": 7.464236193758352e-05, "loss": 0.4789, "step": 44300 }, { "epoch": 10.967821782178218, "grad_norm": 0.5312647223472595, "learning_rate": 7.463037174950329e-05, "loss": 0.471, "step": 44310 }, { "epoch": 10.97029702970297, "grad_norm": 0.5116689205169678, "learning_rate": 7.461837969090886e-05, "loss": 0.4735, "step": 44320 }, { "epoch": 10.972772277227723, "grad_norm": 0.5480138063430786, "learning_rate": 7.460638576271097e-05, "loss": 0.4745, "step": 44330 }, { "epoch": 10.975247524752476, "grad_norm": 0.5133987665176392, "learning_rate": 7.459438996582047e-05, "loss": 0.4716, "step": 44340 }, { "epoch": 10.977722772277227, "grad_norm": 0.5059705376625061, "learning_rate": 7.458239230114837e-05, "loss": 0.4737, "step": 44350 }, { "epoch": 10.98019801980198, "grad_norm": 0.5303767919540405, "learning_rate": 7.457039276960579e-05, "loss": 0.4803, "step": 44360 }, { "epoch": 10.982673267326733, "grad_norm": 0.5089905858039856, "learning_rate": 7.455839137210404e-05, "loss": 0.4748, "step": 44370 }, { "epoch": 10.985148514851485, "grad_norm": 0.529710054397583, "learning_rate": 7.454638810955453e-05, "loss": 0.4698, "step": 44380 }, { "epoch": 10.987623762376238, "grad_norm": 0.5186175107955933, "learning_rate": 7.453438298286884e-05, "loss": 0.4765, "step": 44390 }, { "epoch": 10.990099009900991, "grad_norm": 0.5117546916007996, "learning_rate": 7.452237599295867e-05, "loss": 0.4738, "step": 44400 }, { "epoch": 10.992574257425742, "grad_norm": 0.5298922061920166, "learning_rate": 7.451036714073587e-05, "loss": 0.4713, "step": 44410 }, { "epoch": 10.995049504950495, "grad_norm": 0.5335118174552917, "learning_rate": 7.449835642711247e-05, "loss": 0.4746, "step": 44420 }, { "epoch": 10.997524752475247, "grad_norm": 0.5279130935668945, "learning_rate": 7.448634385300053e-05, "loss": 0.4775, "step": 44430 }, { "epoch": 11.0, "grad_norm": 0.5040439367294312, "learning_rate": 7.44743294193124e-05, "loss": 0.4722, "step": 44440 }, { "epoch": 11.002475247524753, "grad_norm": 0.5205048322677612, "learning_rate": 7.446231312696047e-05, "loss": 0.4727, "step": 44450 }, { "epoch": 11.004950495049505, "grad_norm": 0.5151523351669312, "learning_rate": 7.445029497685729e-05, "loss": 0.4751, "step": 44460 }, { "epoch": 11.007425742574258, "grad_norm": 0.5210133194923401, "learning_rate": 7.443827496991555e-05, "loss": 0.4728, "step": 44470 }, { "epoch": 11.009900990099009, "grad_norm": 0.48023003339767456, "learning_rate": 7.442625310704813e-05, "loss": 0.4765, "step": 44480 }, { "epoch": 11.012376237623762, "grad_norm": 0.494823157787323, "learning_rate": 7.441422938916796e-05, "loss": 0.4724, "step": 44490 }, { "epoch": 11.014851485148515, "grad_norm": 0.5308281183242798, "learning_rate": 7.44022038171882e-05, "loss": 0.4762, "step": 44500 }, { "epoch": 11.017326732673267, "grad_norm": 0.5515936017036438, "learning_rate": 7.439017639202211e-05, "loss": 0.467, "step": 44510 }, { "epoch": 11.01980198019802, "grad_norm": 0.5251182913780212, "learning_rate": 7.437814711458306e-05, "loss": 0.4749, "step": 44520 }, { "epoch": 11.022277227722773, "grad_norm": 0.5252676606178284, "learning_rate": 7.436611598578464e-05, "loss": 0.4773, "step": 44530 }, { "epoch": 11.024752475247524, "grad_norm": 0.537446916103363, "learning_rate": 7.43540830065405e-05, "loss": 0.4768, "step": 44540 }, { "epoch": 11.027227722772277, "grad_norm": 0.5407991409301758, "learning_rate": 7.434204817776449e-05, "loss": 0.4757, "step": 44550 }, { "epoch": 11.029702970297029, "grad_norm": 0.5181182026863098, "learning_rate": 7.433001150037055e-05, "loss": 0.4711, "step": 44560 }, { "epoch": 11.032178217821782, "grad_norm": 0.5577152967453003, "learning_rate": 7.431797297527282e-05, "loss": 0.4717, "step": 44570 }, { "epoch": 11.034653465346535, "grad_norm": 0.5060154795646667, "learning_rate": 7.430593260338551e-05, "loss": 0.4745, "step": 44580 }, { "epoch": 11.037128712871286, "grad_norm": 0.5065187811851501, "learning_rate": 7.429389038562303e-05, "loss": 0.4767, "step": 44590 }, { "epoch": 11.03960396039604, "grad_norm": 0.5326281785964966, "learning_rate": 7.42818463228999e-05, "loss": 0.4783, "step": 44600 }, { "epoch": 11.042079207920793, "grad_norm": 0.5409626960754395, "learning_rate": 7.42698004161308e-05, "loss": 0.4709, "step": 44610 }, { "epoch": 11.044554455445544, "grad_norm": 0.5238310098648071, "learning_rate": 7.425775266623052e-05, "loss": 0.4729, "step": 44620 }, { "epoch": 11.047029702970297, "grad_norm": 0.5139340162277222, "learning_rate": 7.4245703074114e-05, "loss": 0.4747, "step": 44630 }, { "epoch": 11.049504950495049, "grad_norm": 0.5143783092498779, "learning_rate": 7.423365164069634e-05, "loss": 0.4759, "step": 44640 }, { "epoch": 11.051980198019802, "grad_norm": 0.5218406915664673, "learning_rate": 7.422159836689277e-05, "loss": 0.4794, "step": 44650 }, { "epoch": 11.054455445544555, "grad_norm": 0.5034372806549072, "learning_rate": 7.420954325361867e-05, "loss": 0.4731, "step": 44660 }, { "epoch": 11.056930693069306, "grad_norm": 0.5387675762176514, "learning_rate": 7.41974863017895e-05, "loss": 0.4732, "step": 44670 }, { "epoch": 11.05940594059406, "grad_norm": 0.4944473206996918, "learning_rate": 7.418542751232095e-05, "loss": 0.4691, "step": 44680 }, { "epoch": 11.061881188118813, "grad_norm": 0.5361150503158569, "learning_rate": 7.417336688612879e-05, "loss": 0.4715, "step": 44690 }, { "epoch": 11.064356435643564, "grad_norm": 0.5021975636482239, "learning_rate": 7.416130442412894e-05, "loss": 0.4687, "step": 44700 }, { "epoch": 11.066831683168317, "grad_norm": 0.5309233665466309, "learning_rate": 7.414924012723747e-05, "loss": 0.4735, "step": 44710 }, { "epoch": 11.069306930693068, "grad_norm": 0.5371498465538025, "learning_rate": 7.41371739963706e-05, "loss": 0.4749, "step": 44720 }, { "epoch": 11.071782178217822, "grad_norm": 0.4919978380203247, "learning_rate": 7.412510603244464e-05, "loss": 0.4737, "step": 44730 }, { "epoch": 11.074257425742575, "grad_norm": 0.550860583782196, "learning_rate": 7.411303623637608e-05, "loss": 0.4736, "step": 44740 }, { "epoch": 11.076732673267326, "grad_norm": 0.4946020245552063, "learning_rate": 7.410096460908157e-05, "loss": 0.4697, "step": 44750 }, { "epoch": 11.07920792079208, "grad_norm": 0.48764172196388245, "learning_rate": 7.408889115147785e-05, "loss": 0.4738, "step": 44760 }, { "epoch": 11.081683168316832, "grad_norm": 0.5134451389312744, "learning_rate": 7.407681586448181e-05, "loss": 0.4757, "step": 44770 }, { "epoch": 11.084158415841584, "grad_norm": 0.5019859671592712, "learning_rate": 7.40647387490105e-05, "loss": 0.4764, "step": 44780 }, { "epoch": 11.086633663366337, "grad_norm": 0.5073714256286621, "learning_rate": 7.405265980598111e-05, "loss": 0.4799, "step": 44790 }, { "epoch": 11.089108910891088, "grad_norm": 0.5199302434921265, "learning_rate": 7.404057903631091e-05, "loss": 0.4692, "step": 44800 }, { "epoch": 11.091584158415841, "grad_norm": 0.5218994617462158, "learning_rate": 7.40284964409174e-05, "loss": 0.4736, "step": 44810 }, { "epoch": 11.094059405940595, "grad_norm": 0.4976055920124054, "learning_rate": 7.401641202071818e-05, "loss": 0.4727, "step": 44820 }, { "epoch": 11.096534653465346, "grad_norm": 0.5232361555099487, "learning_rate": 7.400432577663095e-05, "loss": 0.4722, "step": 44830 }, { "epoch": 11.099009900990099, "grad_norm": 0.5233492255210876, "learning_rate": 7.399223770957357e-05, "loss": 0.475, "step": 44840 }, { "epoch": 11.101485148514852, "grad_norm": 0.5456637740135193, "learning_rate": 7.39801478204641e-05, "loss": 0.4745, "step": 44850 }, { "epoch": 11.103960396039604, "grad_norm": 0.5641486644744873, "learning_rate": 7.396805611022064e-05, "loss": 0.4732, "step": 44860 }, { "epoch": 11.106435643564357, "grad_norm": 0.5250827670097351, "learning_rate": 7.395596257976152e-05, "loss": 0.4709, "step": 44870 }, { "epoch": 11.108910891089108, "grad_norm": 0.5514734387397766, "learning_rate": 7.39438672300051e-05, "loss": 0.4758, "step": 44880 }, { "epoch": 11.111386138613861, "grad_norm": 0.5072020888328552, "learning_rate": 7.393177006187e-05, "loss": 0.4704, "step": 44890 }, { "epoch": 11.113861386138614, "grad_norm": 0.5295908451080322, "learning_rate": 7.39196710762749e-05, "loss": 0.4742, "step": 44900 }, { "epoch": 11.116336633663366, "grad_norm": 0.4804224967956543, "learning_rate": 7.390757027413864e-05, "loss": 0.4739, "step": 44910 }, { "epoch": 11.118811881188119, "grad_norm": 0.502675473690033, "learning_rate": 7.38954676563802e-05, "loss": 0.4741, "step": 44920 }, { "epoch": 11.121287128712872, "grad_norm": 0.5144959688186646, "learning_rate": 7.388336322391867e-05, "loss": 0.4736, "step": 44930 }, { "epoch": 11.123762376237623, "grad_norm": 0.507091760635376, "learning_rate": 7.387125697767332e-05, "loss": 0.4703, "step": 44940 }, { "epoch": 11.126237623762377, "grad_norm": 0.5121309757232666, "learning_rate": 7.385914891856354e-05, "loss": 0.4698, "step": 44950 }, { "epoch": 11.128712871287128, "grad_norm": 0.4949159026145935, "learning_rate": 7.384703904750886e-05, "loss": 0.4746, "step": 44960 }, { "epoch": 11.131188118811881, "grad_norm": 0.4994420111179352, "learning_rate": 7.383492736542895e-05, "loss": 0.472, "step": 44970 }, { "epoch": 11.133663366336634, "grad_norm": 0.5221295952796936, "learning_rate": 7.38228138732436e-05, "loss": 0.4681, "step": 44980 }, { "epoch": 11.136138613861386, "grad_norm": 0.5778297781944275, "learning_rate": 7.381069857187275e-05, "loss": 0.4711, "step": 44990 }, { "epoch": 11.138613861386139, "grad_norm": 0.5467153191566467, "learning_rate": 7.379858146223649e-05, "loss": 0.4717, "step": 45000 }, { "epoch": 11.141089108910892, "grad_norm": 0.5166712999343872, "learning_rate": 7.3786462545255e-05, "loss": 0.4773, "step": 45010 }, { "epoch": 11.143564356435643, "grad_norm": 0.5146257877349854, "learning_rate": 7.377434182184867e-05, "loss": 0.4774, "step": 45020 }, { "epoch": 11.146039603960396, "grad_norm": 0.5066910982131958, "learning_rate": 7.376221929293798e-05, "loss": 0.4786, "step": 45030 }, { "epoch": 11.148514851485148, "grad_norm": 0.5174668431282043, "learning_rate": 7.375009495944356e-05, "loss": 0.4747, "step": 45040 }, { "epoch": 11.150990099009901, "grad_norm": 0.5136123299598694, "learning_rate": 7.373796882228615e-05, "loss": 0.4768, "step": 45050 }, { "epoch": 11.153465346534654, "grad_norm": 0.491397887468338, "learning_rate": 7.372584088238666e-05, "loss": 0.4749, "step": 45060 }, { "epoch": 11.155940594059405, "grad_norm": 0.49725341796875, "learning_rate": 7.371371114066615e-05, "loss": 0.473, "step": 45070 }, { "epoch": 11.158415841584159, "grad_norm": 0.5051853656768799, "learning_rate": 7.370157959804576e-05, "loss": 0.4754, "step": 45080 }, { "epoch": 11.160891089108912, "grad_norm": 0.4835870862007141, "learning_rate": 7.368944625544684e-05, "loss": 0.4717, "step": 45090 }, { "epoch": 11.163366336633663, "grad_norm": 0.5318127870559692, "learning_rate": 7.367731111379078e-05, "loss": 0.4732, "step": 45100 }, { "epoch": 11.165841584158416, "grad_norm": 0.5140201449394226, "learning_rate": 7.366517417399922e-05, "loss": 0.4728, "step": 45110 }, { "epoch": 11.168316831683168, "grad_norm": 0.5307651162147522, "learning_rate": 7.365303543699386e-05, "loss": 0.4695, "step": 45120 }, { "epoch": 11.17079207920792, "grad_norm": 0.4871198534965515, "learning_rate": 7.364089490369655e-05, "loss": 0.4742, "step": 45130 }, { "epoch": 11.173267326732674, "grad_norm": 0.500160276889801, "learning_rate": 7.362875257502928e-05, "loss": 0.4721, "step": 45140 }, { "epoch": 11.175742574257425, "grad_norm": 0.48987072706222534, "learning_rate": 7.36166084519142e-05, "loss": 0.4738, "step": 45150 }, { "epoch": 11.178217821782178, "grad_norm": 0.48045283555984497, "learning_rate": 7.360446253527355e-05, "loss": 0.4751, "step": 45160 }, { "epoch": 11.180693069306932, "grad_norm": 0.4914335012435913, "learning_rate": 7.359231482602976e-05, "loss": 0.4707, "step": 45170 }, { "epoch": 11.183168316831683, "grad_norm": 0.5412306785583496, "learning_rate": 7.358016532510534e-05, "loss": 0.4715, "step": 45180 }, { "epoch": 11.185643564356436, "grad_norm": 0.5247246623039246, "learning_rate": 7.3568014033423e-05, "loss": 0.4762, "step": 45190 }, { "epoch": 11.188118811881187, "grad_norm": 0.5015886425971985, "learning_rate": 7.355586095190551e-05, "loss": 0.4707, "step": 45200 }, { "epoch": 11.19059405940594, "grad_norm": 0.5038384199142456, "learning_rate": 7.354370608147585e-05, "loss": 0.4725, "step": 45210 }, { "epoch": 11.193069306930694, "grad_norm": 0.5474409461021423, "learning_rate": 7.353154942305708e-05, "loss": 0.4763, "step": 45220 }, { "epoch": 11.195544554455445, "grad_norm": 0.5027785897254944, "learning_rate": 7.351939097757242e-05, "loss": 0.4743, "step": 45230 }, { "epoch": 11.198019801980198, "grad_norm": 0.5236126184463501, "learning_rate": 7.350723074594525e-05, "loss": 0.4788, "step": 45240 }, { "epoch": 11.200495049504951, "grad_norm": 0.5157689452171326, "learning_rate": 7.349506872909902e-05, "loss": 0.4708, "step": 45250 }, { "epoch": 11.202970297029703, "grad_norm": 0.5075019598007202, "learning_rate": 7.348290492795738e-05, "loss": 0.4729, "step": 45260 }, { "epoch": 11.205445544554456, "grad_norm": 0.49508121609687805, "learning_rate": 7.347073934344408e-05, "loss": 0.4748, "step": 45270 }, { "epoch": 11.207920792079207, "grad_norm": 0.5407790541648865, "learning_rate": 7.345857197648302e-05, "loss": 0.4753, "step": 45280 }, { "epoch": 11.21039603960396, "grad_norm": 0.49031901359558105, "learning_rate": 7.344640282799824e-05, "loss": 0.47, "step": 45290 }, { "epoch": 11.212871287128714, "grad_norm": 0.5279802083969116, "learning_rate": 7.343423189891389e-05, "loss": 0.471, "step": 45300 }, { "epoch": 11.215346534653465, "grad_norm": 0.514664351940155, "learning_rate": 7.34220591901543e-05, "loss": 0.477, "step": 45310 }, { "epoch": 11.217821782178218, "grad_norm": 0.5017650723457336, "learning_rate": 7.340988470264387e-05, "loss": 0.4728, "step": 45320 }, { "epoch": 11.220297029702971, "grad_norm": 0.5014967918395996, "learning_rate": 7.339770843730718e-05, "loss": 0.4687, "step": 45330 }, { "epoch": 11.222772277227723, "grad_norm": 0.4955092668533325, "learning_rate": 7.338553039506897e-05, "loss": 0.4742, "step": 45340 }, { "epoch": 11.225247524752476, "grad_norm": 0.5198754668235779, "learning_rate": 7.337335057685404e-05, "loss": 0.4722, "step": 45350 }, { "epoch": 11.227722772277227, "grad_norm": 0.5071022510528564, "learning_rate": 7.336116898358738e-05, "loss": 0.4721, "step": 45360 }, { "epoch": 11.23019801980198, "grad_norm": 0.5241120457649231, "learning_rate": 7.334898561619412e-05, "loss": 0.4727, "step": 45370 }, { "epoch": 11.232673267326733, "grad_norm": 0.5439292788505554, "learning_rate": 7.333680047559948e-05, "loss": 0.4691, "step": 45380 }, { "epoch": 11.235148514851485, "grad_norm": 0.49288010597229004, "learning_rate": 7.332461356272887e-05, "loss": 0.4708, "step": 45390 }, { "epoch": 11.237623762376238, "grad_norm": 0.541960597038269, "learning_rate": 7.331242487850776e-05, "loss": 0.4761, "step": 45400 }, { "epoch": 11.240099009900991, "grad_norm": 0.5127241015434265, "learning_rate": 7.330023442386185e-05, "loss": 0.4716, "step": 45410 }, { "epoch": 11.242574257425742, "grad_norm": 0.5006467700004578, "learning_rate": 7.32880421997169e-05, "loss": 0.4712, "step": 45420 }, { "epoch": 11.245049504950495, "grad_norm": 0.5264111161231995, "learning_rate": 7.327584820699884e-05, "loss": 0.4739, "step": 45430 }, { "epoch": 11.247524752475247, "grad_norm": 0.5292328000068665, "learning_rate": 7.326365244663371e-05, "loss": 0.4757, "step": 45440 }, { "epoch": 11.25, "grad_norm": 0.5319954752922058, "learning_rate": 7.32514549195477e-05, "loss": 0.4764, "step": 45450 }, { "epoch": 11.252475247524753, "grad_norm": 0.5169252157211304, "learning_rate": 7.323925562666714e-05, "loss": 0.4683, "step": 45460 }, { "epoch": 11.254950495049505, "grad_norm": 0.49759963154792786, "learning_rate": 7.322705456891848e-05, "loss": 0.4723, "step": 45470 }, { "epoch": 11.257425742574258, "grad_norm": 0.5277042388916016, "learning_rate": 7.321485174722831e-05, "loss": 0.4702, "step": 45480 }, { "epoch": 11.259900990099009, "grad_norm": 0.4944283366203308, "learning_rate": 7.320264716252337e-05, "loss": 0.4767, "step": 45490 }, { "epoch": 11.262376237623762, "grad_norm": 0.4999336302280426, "learning_rate": 7.319044081573047e-05, "loss": 0.472, "step": 45500 }, { "epoch": 11.264851485148515, "grad_norm": 0.5174810886383057, "learning_rate": 7.317823270777664e-05, "loss": 0.4693, "step": 45510 }, { "epoch": 11.267326732673267, "grad_norm": 0.5227034091949463, "learning_rate": 7.316602283958902e-05, "loss": 0.4706, "step": 45520 }, { "epoch": 11.26980198019802, "grad_norm": 0.4764997661113739, "learning_rate": 7.315381121209484e-05, "loss": 0.4782, "step": 45530 }, { "epoch": 11.272277227722773, "grad_norm": 0.5016588568687439, "learning_rate": 7.314159782622149e-05, "loss": 0.4765, "step": 45540 }, { "epoch": 11.274752475247524, "grad_norm": 0.5130719542503357, "learning_rate": 7.31293826828965e-05, "loss": 0.467, "step": 45550 }, { "epoch": 11.277227722772277, "grad_norm": 0.5389007925987244, "learning_rate": 7.311716578304757e-05, "loss": 0.4778, "step": 45560 }, { "epoch": 11.27970297029703, "grad_norm": 0.5384014844894409, "learning_rate": 7.310494712760243e-05, "loss": 0.4754, "step": 45570 }, { "epoch": 11.282178217821782, "grad_norm": 0.5205277800559998, "learning_rate": 7.309272671748904e-05, "loss": 0.4742, "step": 45580 }, { "epoch": 11.284653465346535, "grad_norm": 0.5607534646987915, "learning_rate": 7.308050455363547e-05, "loss": 0.4741, "step": 45590 }, { "epoch": 11.287128712871286, "grad_norm": 0.5359607934951782, "learning_rate": 7.306828063696988e-05, "loss": 0.4732, "step": 45600 }, { "epoch": 11.28960396039604, "grad_norm": 0.5094784498214722, "learning_rate": 7.30560549684206e-05, "loss": 0.4761, "step": 45610 }, { "epoch": 11.292079207920793, "grad_norm": 0.5217230319976807, "learning_rate": 7.304382754891614e-05, "loss": 0.4772, "step": 45620 }, { "epoch": 11.294554455445544, "grad_norm": 0.5244030952453613, "learning_rate": 7.303159837938502e-05, "loss": 0.4733, "step": 45630 }, { "epoch": 11.297029702970297, "grad_norm": 0.5131979584693909, "learning_rate": 7.301936746075601e-05, "loss": 0.4669, "step": 45640 }, { "epoch": 11.299504950495049, "grad_norm": 0.5019719004631042, "learning_rate": 7.300713479395795e-05, "loss": 0.4763, "step": 45650 }, { "epoch": 11.301980198019802, "grad_norm": 0.49153581261634827, "learning_rate": 7.299490037991985e-05, "loss": 0.4664, "step": 45660 }, { "epoch": 11.304455445544555, "grad_norm": 0.49761536717414856, "learning_rate": 7.298266421957081e-05, "loss": 0.4734, "step": 45670 }, { "epoch": 11.306930693069306, "grad_norm": 0.5370696187019348, "learning_rate": 7.29704263138401e-05, "loss": 0.4742, "step": 45680 }, { "epoch": 11.30940594059406, "grad_norm": 0.5305495262145996, "learning_rate": 7.295818666365711e-05, "loss": 0.4688, "step": 45690 }, { "epoch": 11.311881188118813, "grad_norm": 0.5119937062263489, "learning_rate": 7.294594526995133e-05, "loss": 0.4781, "step": 45700 }, { "epoch": 11.314356435643564, "grad_norm": 0.516751766204834, "learning_rate": 7.293370213365248e-05, "loss": 0.4788, "step": 45710 }, { "epoch": 11.316831683168317, "grad_norm": 0.5370246171951294, "learning_rate": 7.292145725569026e-05, "loss": 0.476, "step": 45720 }, { "epoch": 11.319306930693068, "grad_norm": 0.5778476595878601, "learning_rate": 7.290921063699465e-05, "loss": 0.4755, "step": 45730 }, { "epoch": 11.321782178217822, "grad_norm": 0.5069987773895264, "learning_rate": 7.289696227849566e-05, "loss": 0.4721, "step": 45740 }, { "epoch": 11.324257425742575, "grad_norm": 0.5435312390327454, "learning_rate": 7.288471218112351e-05, "loss": 0.4761, "step": 45750 }, { "epoch": 11.326732673267326, "grad_norm": 0.533811628818512, "learning_rate": 7.287246034580849e-05, "loss": 0.4727, "step": 45760 }, { "epoch": 11.32920792079208, "grad_norm": 0.5162680149078369, "learning_rate": 7.286020677348106e-05, "loss": 0.4809, "step": 45770 }, { "epoch": 11.331683168316832, "grad_norm": 0.4995698928833008, "learning_rate": 7.284795146507179e-05, "loss": 0.4757, "step": 45780 }, { "epoch": 11.334158415841584, "grad_norm": 0.5230041146278381, "learning_rate": 7.283569442151137e-05, "loss": 0.4737, "step": 45790 }, { "epoch": 11.336633663366337, "grad_norm": 0.5341967344284058, "learning_rate": 7.282343564373066e-05, "loss": 0.481, "step": 45800 }, { "epoch": 11.339108910891088, "grad_norm": 0.504395067691803, "learning_rate": 7.281117513266066e-05, "loss": 0.473, "step": 45810 }, { "epoch": 11.341584158415841, "grad_norm": 0.48298296332359314, "learning_rate": 7.279891288923245e-05, "loss": 0.4738, "step": 45820 }, { "epoch": 11.344059405940595, "grad_norm": 0.5292891263961792, "learning_rate": 7.278664891437725e-05, "loss": 0.4736, "step": 45830 }, { "epoch": 11.346534653465346, "grad_norm": 0.5073050856590271, "learning_rate": 7.277438320902645e-05, "loss": 0.4727, "step": 45840 }, { "epoch": 11.349009900990099, "grad_norm": 0.4869324266910553, "learning_rate": 7.276211577411152e-05, "loss": 0.4735, "step": 45850 }, { "epoch": 11.351485148514852, "grad_norm": 0.5238689184188843, "learning_rate": 7.274984661056414e-05, "loss": 0.4747, "step": 45860 }, { "epoch": 11.353960396039604, "grad_norm": 0.5342527627944946, "learning_rate": 7.273757571931603e-05, "loss": 0.4795, "step": 45870 }, { "epoch": 11.356435643564357, "grad_norm": 0.5298271775245667, "learning_rate": 7.272530310129912e-05, "loss": 0.4767, "step": 45880 }, { "epoch": 11.358910891089108, "grad_norm": 0.5174386501312256, "learning_rate": 7.27130287574454e-05, "loss": 0.4751, "step": 45890 }, { "epoch": 11.361386138613861, "grad_norm": 0.5532853603363037, "learning_rate": 7.270075268868705e-05, "loss": 0.4733, "step": 45900 }, { "epoch": 11.363861386138614, "grad_norm": 0.5001070499420166, "learning_rate": 7.268847489595633e-05, "loss": 0.4702, "step": 45910 }, { "epoch": 11.366336633663366, "grad_norm": 0.48131024837493896, "learning_rate": 7.267619538018568e-05, "loss": 0.4681, "step": 45920 }, { "epoch": 11.368811881188119, "grad_norm": 0.5086459517478943, "learning_rate": 7.266391414230765e-05, "loss": 0.4778, "step": 45930 }, { "epoch": 11.371287128712872, "grad_norm": 0.5301048159599304, "learning_rate": 7.265163118325489e-05, "loss": 0.4751, "step": 45940 }, { "epoch": 11.373762376237623, "grad_norm": 0.520033061504364, "learning_rate": 7.263934650396023e-05, "loss": 0.4763, "step": 45950 }, { "epoch": 11.376237623762377, "grad_norm": 0.4913501441478729, "learning_rate": 7.262706010535663e-05, "loss": 0.4713, "step": 45960 }, { "epoch": 11.378712871287128, "grad_norm": 0.5142541527748108, "learning_rate": 7.261477198837712e-05, "loss": 0.4749, "step": 45970 }, { "epoch": 11.381188118811881, "grad_norm": 0.5118765830993652, "learning_rate": 7.260248215395493e-05, "loss": 0.4717, "step": 45980 }, { "epoch": 11.383663366336634, "grad_norm": 0.4698222279548645, "learning_rate": 7.259019060302339e-05, "loss": 0.4727, "step": 45990 }, { "epoch": 11.386138613861386, "grad_norm": 0.4954284131526947, "learning_rate": 7.257789733651596e-05, "loss": 0.4741, "step": 46000 }, { "epoch": 11.388613861386139, "grad_norm": 0.526918351650238, "learning_rate": 7.256560235536621e-05, "loss": 0.4734, "step": 46010 }, { "epoch": 11.391089108910892, "grad_norm": 0.5034594535827637, "learning_rate": 7.255330566050789e-05, "loss": 0.4712, "step": 46020 }, { "epoch": 11.393564356435643, "grad_norm": 0.49385538697242737, "learning_rate": 7.254100725287485e-05, "loss": 0.4773, "step": 46030 }, { "epoch": 11.396039603960396, "grad_norm": 0.5052968859672546, "learning_rate": 7.252870713340105e-05, "loss": 0.4816, "step": 46040 }, { "epoch": 11.398514851485148, "grad_norm": 0.4922032356262207, "learning_rate": 7.251640530302065e-05, "loss": 0.4682, "step": 46050 }, { "epoch": 11.400990099009901, "grad_norm": 0.48756837844848633, "learning_rate": 7.250410176266781e-05, "loss": 0.4708, "step": 46060 }, { "epoch": 11.403465346534654, "grad_norm": 0.5203338265419006, "learning_rate": 7.249179651327701e-05, "loss": 0.4741, "step": 46070 }, { "epoch": 11.405940594059405, "grad_norm": 0.4723367989063263, "learning_rate": 7.247948955578268e-05, "loss": 0.4755, "step": 46080 }, { "epoch": 11.408415841584159, "grad_norm": 0.5686448216438293, "learning_rate": 7.246718089111946e-05, "loss": 0.4704, "step": 46090 }, { "epoch": 11.410891089108912, "grad_norm": 0.5631465911865234, "learning_rate": 7.245487052022213e-05, "loss": 0.4749, "step": 46100 }, { "epoch": 11.413366336633663, "grad_norm": 0.5403419137001038, "learning_rate": 7.244255844402557e-05, "loss": 0.475, "step": 46110 }, { "epoch": 11.415841584158416, "grad_norm": 0.5252127647399902, "learning_rate": 7.243024466346479e-05, "loss": 0.47, "step": 46120 }, { "epoch": 11.418316831683168, "grad_norm": 0.5316773056983948, "learning_rate": 7.241792917947499e-05, "loss": 0.4742, "step": 46130 }, { "epoch": 11.42079207920792, "grad_norm": 0.4967900216579437, "learning_rate": 7.240561199299138e-05, "loss": 0.4698, "step": 46140 }, { "epoch": 11.423267326732674, "grad_norm": 0.5309997200965881, "learning_rate": 7.239329310494942e-05, "loss": 0.4699, "step": 46150 }, { "epoch": 11.425742574257425, "grad_norm": 0.5050274133682251, "learning_rate": 7.238097251628463e-05, "loss": 0.4691, "step": 46160 }, { "epoch": 11.428217821782178, "grad_norm": 0.52110755443573, "learning_rate": 7.236865022793268e-05, "loss": 0.4727, "step": 46170 }, { "epoch": 11.430693069306932, "grad_norm": 0.5157992839813232, "learning_rate": 7.235632624082938e-05, "loss": 0.4814, "step": 46180 }, { "epoch": 11.433168316831683, "grad_norm": 0.49540144205093384, "learning_rate": 7.234400055591061e-05, "loss": 0.4782, "step": 46190 }, { "epoch": 11.435643564356436, "grad_norm": 0.49221089482307434, "learning_rate": 7.233167317411248e-05, "loss": 0.4672, "step": 46200 }, { "epoch": 11.438118811881187, "grad_norm": 0.48536181449890137, "learning_rate": 7.231934409637113e-05, "loss": 0.4717, "step": 46210 }, { "epoch": 11.44059405940594, "grad_norm": 0.4871537387371063, "learning_rate": 7.23070133236229e-05, "loss": 0.4692, "step": 46220 }, { "epoch": 11.443069306930694, "grad_norm": 0.4841443598270416, "learning_rate": 7.229468085680421e-05, "loss": 0.4747, "step": 46230 }, { "epoch": 11.445544554455445, "grad_norm": 0.5213012099266052, "learning_rate": 7.228234669685167e-05, "loss": 0.476, "step": 46240 }, { "epoch": 11.448019801980198, "grad_norm": 0.5131776332855225, "learning_rate": 7.227001084470192e-05, "loss": 0.4705, "step": 46250 }, { "epoch": 11.450495049504951, "grad_norm": 0.4911411702632904, "learning_rate": 7.225767330129183e-05, "loss": 0.4756, "step": 46260 }, { "epoch": 11.452970297029703, "grad_norm": 0.4659181237220764, "learning_rate": 7.224533406755834e-05, "loss": 0.4772, "step": 46270 }, { "epoch": 11.455445544554456, "grad_norm": 0.5034845471382141, "learning_rate": 7.223299314443852e-05, "loss": 0.4693, "step": 46280 }, { "epoch": 11.457920792079207, "grad_norm": 0.5253212451934814, "learning_rate": 7.222065053286962e-05, "loss": 0.4742, "step": 46290 }, { "epoch": 11.46039603960396, "grad_norm": 0.5341618657112122, "learning_rate": 7.220830623378893e-05, "loss": 0.4747, "step": 46300 }, { "epoch": 11.462871287128714, "grad_norm": 0.5003791451454163, "learning_rate": 7.219596024813396e-05, "loss": 0.4728, "step": 46310 }, { "epoch": 11.465346534653465, "grad_norm": 0.5119994282722473, "learning_rate": 7.218361257684228e-05, "loss": 0.4767, "step": 46320 }, { "epoch": 11.467821782178218, "grad_norm": 0.4702320098876953, "learning_rate": 7.217126322085163e-05, "loss": 0.4731, "step": 46330 }, { "epoch": 11.47029702970297, "grad_norm": 0.5022491216659546, "learning_rate": 7.215891218109986e-05, "loss": 0.4742, "step": 46340 }, { "epoch": 11.472772277227723, "grad_norm": 0.5102605223655701, "learning_rate": 7.214655945852495e-05, "loss": 0.4634, "step": 46350 }, { "epoch": 11.475247524752476, "grad_norm": 0.5101298689842224, "learning_rate": 7.213420505406498e-05, "loss": 0.472, "step": 46360 }, { "epoch": 11.477722772277227, "grad_norm": 0.5118571519851685, "learning_rate": 7.212184896865823e-05, "loss": 0.4734, "step": 46370 }, { "epoch": 11.48019801980198, "grad_norm": 0.5054214000701904, "learning_rate": 7.210949120324303e-05, "loss": 0.4739, "step": 46380 }, { "epoch": 11.482673267326733, "grad_norm": 0.5590181946754456, "learning_rate": 7.20971317587579e-05, "loss": 0.4746, "step": 46390 }, { "epoch": 11.485148514851485, "grad_norm": 0.4994538426399231, "learning_rate": 7.208477063614143e-05, "loss": 0.4682, "step": 46400 }, { "epoch": 11.487623762376238, "grad_norm": 0.49647805094718933, "learning_rate": 7.207240783633238e-05, "loss": 0.4767, "step": 46410 }, { "epoch": 11.490099009900991, "grad_norm": 0.4989602267742157, "learning_rate": 7.206004336026962e-05, "loss": 0.4756, "step": 46420 }, { "epoch": 11.492574257425742, "grad_norm": 0.4882189631462097, "learning_rate": 7.204767720889214e-05, "loss": 0.4769, "step": 46430 }, { "epoch": 11.495049504950495, "grad_norm": 0.501232922077179, "learning_rate": 7.20353093831391e-05, "loss": 0.4722, "step": 46440 }, { "epoch": 11.497524752475247, "grad_norm": 0.5341388583183289, "learning_rate": 7.202293988394972e-05, "loss": 0.4732, "step": 46450 }, { "epoch": 11.5, "grad_norm": 0.5418168902397156, "learning_rate": 7.20105687122634e-05, "loss": 0.4752, "step": 46460 }, { "epoch": 11.502475247524753, "grad_norm": 0.5330966114997864, "learning_rate": 7.199819586901962e-05, "loss": 0.4749, "step": 46470 }, { "epoch": 11.504950495049505, "grad_norm": 0.5266857147216797, "learning_rate": 7.198582135515806e-05, "loss": 0.4724, "step": 46480 }, { "epoch": 11.507425742574258, "grad_norm": 0.5191126465797424, "learning_rate": 7.197344517161846e-05, "loss": 0.47, "step": 46490 }, { "epoch": 11.509900990099009, "grad_norm": 0.4970894157886505, "learning_rate": 7.196106731934072e-05, "loss": 0.4746, "step": 46500 }, { "epoch": 11.512376237623762, "grad_norm": 0.5254104733467102, "learning_rate": 7.194868779926482e-05, "loss": 0.4658, "step": 46510 }, { "epoch": 11.514851485148515, "grad_norm": 0.47587716579437256, "learning_rate": 7.193630661233095e-05, "loss": 0.4707, "step": 46520 }, { "epoch": 11.517326732673267, "grad_norm": 0.5025705099105835, "learning_rate": 7.192392375947936e-05, "loss": 0.478, "step": 46530 }, { "epoch": 11.51980198019802, "grad_norm": 0.49994149804115295, "learning_rate": 7.191153924165043e-05, "loss": 0.4702, "step": 46540 }, { "epoch": 11.522277227722773, "grad_norm": 0.5087057948112488, "learning_rate": 7.189915305978473e-05, "loss": 0.476, "step": 46550 }, { "epoch": 11.524752475247524, "grad_norm": 0.49447470903396606, "learning_rate": 7.188676521482284e-05, "loss": 0.4741, "step": 46560 }, { "epoch": 11.527227722772277, "grad_norm": 0.514257550239563, "learning_rate": 7.187437570770561e-05, "loss": 0.4682, "step": 46570 }, { "epoch": 11.52970297029703, "grad_norm": 0.5218388438224792, "learning_rate": 7.186198453937388e-05, "loss": 0.4716, "step": 46580 }, { "epoch": 11.532178217821782, "grad_norm": 0.5068994760513306, "learning_rate": 7.18495917107687e-05, "loss": 0.4726, "step": 46590 }, { "epoch": 11.534653465346535, "grad_norm": 0.5209563374519348, "learning_rate": 7.183719722283123e-05, "loss": 0.4733, "step": 46600 }, { "epoch": 11.537128712871286, "grad_norm": 0.48101916909217834, "learning_rate": 7.182480107650275e-05, "loss": 0.4714, "step": 46610 }, { "epoch": 11.53960396039604, "grad_norm": 0.5363243818283081, "learning_rate": 7.181240327272464e-05, "loss": 0.4763, "step": 46620 }, { "epoch": 11.542079207920793, "grad_norm": 0.5280161499977112, "learning_rate": 7.180000381243848e-05, "loss": 0.471, "step": 46630 }, { "epoch": 11.544554455445544, "grad_norm": 0.48843899369239807, "learning_rate": 7.178760269658587e-05, "loss": 0.4718, "step": 46640 }, { "epoch": 11.547029702970297, "grad_norm": 0.508329451084137, "learning_rate": 7.177519992610864e-05, "loss": 0.467, "step": 46650 }, { "epoch": 11.549504950495049, "grad_norm": 0.5069247484207153, "learning_rate": 7.176279550194865e-05, "loss": 0.4702, "step": 46660 }, { "epoch": 11.551980198019802, "grad_norm": 0.4929649829864502, "learning_rate": 7.175038942504799e-05, "loss": 0.4742, "step": 46670 }, { "epoch": 11.554455445544555, "grad_norm": 0.4878731667995453, "learning_rate": 7.17379816963488e-05, "loss": 0.4719, "step": 46680 }, { "epoch": 11.556930693069306, "grad_norm": 0.5069460272789001, "learning_rate": 7.172557231679334e-05, "loss": 0.4736, "step": 46690 }, { "epoch": 11.55940594059406, "grad_norm": 0.5213707089424133, "learning_rate": 7.171316128732405e-05, "loss": 0.473, "step": 46700 }, { "epoch": 11.561881188118813, "grad_norm": 0.49862489104270935, "learning_rate": 7.170074860888344e-05, "loss": 0.4733, "step": 46710 }, { "epoch": 11.564356435643564, "grad_norm": 0.5188993215560913, "learning_rate": 7.16883342824142e-05, "loss": 0.4739, "step": 46720 }, { "epoch": 11.566831683168317, "grad_norm": 0.5110024809837341, "learning_rate": 7.16759183088591e-05, "loss": 0.4766, "step": 46730 }, { "epoch": 11.569306930693068, "grad_norm": 0.5078525543212891, "learning_rate": 7.166350068916104e-05, "loss": 0.4721, "step": 46740 }, { "epoch": 11.571782178217822, "grad_norm": 0.489694744348526, "learning_rate": 7.165108142426309e-05, "loss": 0.4719, "step": 46750 }, { "epoch": 11.574257425742575, "grad_norm": 0.5083659291267395, "learning_rate": 7.163866051510838e-05, "loss": 0.4765, "step": 46760 }, { "epoch": 11.576732673267326, "grad_norm": 0.4795656204223633, "learning_rate": 7.16262379626402e-05, "loss": 0.4781, "step": 46770 }, { "epoch": 11.57920792079208, "grad_norm": 0.5012110471725464, "learning_rate": 7.161381376780198e-05, "loss": 0.4666, "step": 46780 }, { "epoch": 11.581683168316832, "grad_norm": 0.49917176365852356, "learning_rate": 7.160138793153723e-05, "loss": 0.473, "step": 46790 }, { "epoch": 11.584158415841584, "grad_norm": 0.47502270340919495, "learning_rate": 7.158896045478964e-05, "loss": 0.4787, "step": 46800 }, { "epoch": 11.586633663366337, "grad_norm": 0.4851313829421997, "learning_rate": 7.157653133850296e-05, "loss": 0.465, "step": 46810 }, { "epoch": 11.589108910891088, "grad_norm": 0.48768746852874756, "learning_rate": 7.156410058362112e-05, "loss": 0.4718, "step": 46820 }, { "epoch": 11.591584158415841, "grad_norm": 0.5089943408966064, "learning_rate": 7.155166819108816e-05, "loss": 0.4722, "step": 46830 }, { "epoch": 11.594059405940595, "grad_norm": 0.5030878186225891, "learning_rate": 7.153923416184823e-05, "loss": 0.4706, "step": 46840 }, { "epoch": 11.596534653465346, "grad_norm": 0.5366117358207703, "learning_rate": 7.152679849684561e-05, "loss": 0.477, "step": 46850 }, { "epoch": 11.599009900990099, "grad_norm": 0.4937104880809784, "learning_rate": 7.15143611970247e-05, "loss": 0.4744, "step": 46860 }, { "epoch": 11.601485148514852, "grad_norm": 0.5105372667312622, "learning_rate": 7.150192226333007e-05, "loss": 0.4762, "step": 46870 }, { "epoch": 11.603960396039604, "grad_norm": 0.5163099765777588, "learning_rate": 7.14894816967063e-05, "loss": 0.473, "step": 46880 }, { "epoch": 11.606435643564357, "grad_norm": 0.5404608249664307, "learning_rate": 7.147703949809825e-05, "loss": 0.4722, "step": 46890 }, { "epoch": 11.608910891089108, "grad_norm": 0.6715019941329956, "learning_rate": 7.146459566845077e-05, "loss": 0.4726, "step": 46900 }, { "epoch": 11.611386138613861, "grad_norm": 0.5321164131164551, "learning_rate": 7.14521502087089e-05, "loss": 0.4697, "step": 46910 }, { "epoch": 11.613861386138614, "grad_norm": 0.5065787434577942, "learning_rate": 7.14397031198178e-05, "loss": 0.4809, "step": 46920 }, { "epoch": 11.616336633663366, "grad_norm": 0.5026915073394775, "learning_rate": 7.142725440272273e-05, "loss": 0.477, "step": 46930 }, { "epoch": 11.618811881188119, "grad_norm": 0.49817249178886414, "learning_rate": 7.141480405836911e-05, "loss": 0.4703, "step": 46940 }, { "epoch": 11.621287128712872, "grad_norm": 0.5256800651550293, "learning_rate": 7.140235208770244e-05, "loss": 0.4714, "step": 46950 }, { "epoch": 11.623762376237623, "grad_norm": 0.5267153978347778, "learning_rate": 7.138989849166836e-05, "loss": 0.4726, "step": 46960 }, { "epoch": 11.626237623762377, "grad_norm": 0.49886688590049744, "learning_rate": 7.137744327121267e-05, "loss": 0.4718, "step": 46970 }, { "epoch": 11.628712871287128, "grad_norm": 0.500005304813385, "learning_rate": 7.136498642728124e-05, "loss": 0.4723, "step": 46980 }, { "epoch": 11.631188118811881, "grad_norm": 0.5136427879333496, "learning_rate": 7.135252796082007e-05, "loss": 0.472, "step": 46990 }, { "epoch": 11.633663366336634, "grad_norm": 0.5313377380371094, "learning_rate": 7.134006787277533e-05, "loss": 0.4696, "step": 47000 }, { "epoch": 11.636138613861386, "grad_norm": 0.4854505658149719, "learning_rate": 7.132760616409326e-05, "loss": 0.4725, "step": 47010 }, { "epoch": 11.638613861386139, "grad_norm": 0.494296669960022, "learning_rate": 7.131514283572027e-05, "loss": 0.4692, "step": 47020 }, { "epoch": 11.641089108910892, "grad_norm": 0.48791760206222534, "learning_rate": 7.130267788860283e-05, "loss": 0.4721, "step": 47030 }, { "epoch": 11.643564356435643, "grad_norm": 0.4901012182235718, "learning_rate": 7.12902113236876e-05, "loss": 0.4669, "step": 47040 }, { "epoch": 11.646039603960396, "grad_norm": 0.5031297206878662, "learning_rate": 7.12777431419213e-05, "loss": 0.4729, "step": 47050 }, { "epoch": 11.648514851485148, "grad_norm": 0.5126808285713196, "learning_rate": 7.126527334425086e-05, "loss": 0.4685, "step": 47060 }, { "epoch": 11.650990099009901, "grad_norm": 0.5329969525337219, "learning_rate": 7.125280193162324e-05, "loss": 0.4761, "step": 47070 }, { "epoch": 11.653465346534654, "grad_norm": 0.5083256363868713, "learning_rate": 7.124032890498555e-05, "loss": 0.4728, "step": 47080 }, { "epoch": 11.655940594059405, "grad_norm": 0.4946599006652832, "learning_rate": 7.122785426528508e-05, "loss": 0.4719, "step": 47090 }, { "epoch": 11.658415841584159, "grad_norm": 0.5043632388114929, "learning_rate": 7.121537801346915e-05, "loss": 0.4765, "step": 47100 }, { "epoch": 11.660891089108912, "grad_norm": 0.5223202705383301, "learning_rate": 7.120290015048528e-05, "loss": 0.4751, "step": 47110 }, { "epoch": 11.663366336633663, "grad_norm": 0.5109861493110657, "learning_rate": 7.119042067728108e-05, "loss": 0.4751, "step": 47120 }, { "epoch": 11.665841584158416, "grad_norm": 0.5375095009803772, "learning_rate": 7.117793959480425e-05, "loss": 0.4761, "step": 47130 }, { "epoch": 11.668316831683168, "grad_norm": 0.5015677809715271, "learning_rate": 7.11654569040027e-05, "loss": 0.4739, "step": 47140 }, { "epoch": 11.67079207920792, "grad_norm": 0.4892488718032837, "learning_rate": 7.115297260582436e-05, "loss": 0.4738, "step": 47150 }, { "epoch": 11.673267326732674, "grad_norm": 0.48882609605789185, "learning_rate": 7.114048670121733e-05, "loss": 0.4761, "step": 47160 }, { "epoch": 11.675742574257425, "grad_norm": 0.47371333837509155, "learning_rate": 7.112799919112989e-05, "loss": 0.4742, "step": 47170 }, { "epoch": 11.678217821782178, "grad_norm": 0.5001059770584106, "learning_rate": 7.111551007651031e-05, "loss": 0.4723, "step": 47180 }, { "epoch": 11.680693069306932, "grad_norm": 0.5181347131729126, "learning_rate": 7.110301935830711e-05, "loss": 0.468, "step": 47190 }, { "epoch": 11.683168316831683, "grad_norm": 0.4832153022289276, "learning_rate": 7.109052703746884e-05, "loss": 0.4672, "step": 47200 }, { "epoch": 11.685643564356436, "grad_norm": 0.4877460300922394, "learning_rate": 7.107803311494424e-05, "loss": 0.4682, "step": 47210 }, { "epoch": 11.688118811881187, "grad_norm": 0.5013202428817749, "learning_rate": 7.106553759168213e-05, "loss": 0.4668, "step": 47220 }, { "epoch": 11.69059405940594, "grad_norm": 0.46984830498695374, "learning_rate": 7.105304046863146e-05, "loss": 0.471, "step": 47230 }, { "epoch": 11.693069306930694, "grad_norm": 0.5263655185699463, "learning_rate": 7.10405417467413e-05, "loss": 0.4736, "step": 47240 }, { "epoch": 11.695544554455445, "grad_norm": 0.49807974696159363, "learning_rate": 7.102804142696085e-05, "loss": 0.471, "step": 47250 }, { "epoch": 11.698019801980198, "grad_norm": 0.504102349281311, "learning_rate": 7.101553951023943e-05, "loss": 0.467, "step": 47260 }, { "epoch": 11.700495049504951, "grad_norm": 0.49476465582847595, "learning_rate": 7.100303599752646e-05, "loss": 0.4713, "step": 47270 }, { "epoch": 11.702970297029703, "grad_norm": 0.48474743962287903, "learning_rate": 7.099053088977153e-05, "loss": 0.474, "step": 47280 }, { "epoch": 11.705445544554456, "grad_norm": 0.4965733587741852, "learning_rate": 7.09780241879243e-05, "loss": 0.471, "step": 47290 }, { "epoch": 11.707920792079207, "grad_norm": 0.4834735095500946, "learning_rate": 7.096551589293458e-05, "loss": 0.4702, "step": 47300 }, { "epoch": 11.71039603960396, "grad_norm": 0.5080304741859436, "learning_rate": 7.095300600575227e-05, "loss": 0.4692, "step": 47310 }, { "epoch": 11.712871287128714, "grad_norm": 0.4894322454929352, "learning_rate": 7.094049452732746e-05, "loss": 0.4694, "step": 47320 }, { "epoch": 11.715346534653465, "grad_norm": 0.5113309621810913, "learning_rate": 7.092798145861026e-05, "loss": 0.4734, "step": 47330 }, { "epoch": 11.717821782178218, "grad_norm": 0.5011597871780396, "learning_rate": 7.0915466800551e-05, "loss": 0.4737, "step": 47340 }, { "epoch": 11.72029702970297, "grad_norm": 0.512959361076355, "learning_rate": 7.090295055410005e-05, "loss": 0.4823, "step": 47350 }, { "epoch": 11.722772277227723, "grad_norm": 0.5152877569198608, "learning_rate": 7.089043272020797e-05, "loss": 0.4754, "step": 47360 }, { "epoch": 11.725247524752476, "grad_norm": 0.5220128893852234, "learning_rate": 7.087791329982538e-05, "loss": 0.4681, "step": 47370 }, { "epoch": 11.727722772277227, "grad_norm": 0.492777019739151, "learning_rate": 7.086539229390306e-05, "loss": 0.4656, "step": 47380 }, { "epoch": 11.73019801980198, "grad_norm": 0.5022106766700745, "learning_rate": 7.085286970339188e-05, "loss": 0.4744, "step": 47390 }, { "epoch": 11.732673267326733, "grad_norm": 0.5004703402519226, "learning_rate": 7.084034552924288e-05, "loss": 0.474, "step": 47400 }, { "epoch": 11.735148514851485, "grad_norm": 0.4947972297668457, "learning_rate": 7.082781977240717e-05, "loss": 0.4743, "step": 47410 }, { "epoch": 11.737623762376238, "grad_norm": 0.5286426544189453, "learning_rate": 7.0815292433836e-05, "loss": 0.4744, "step": 47420 }, { "epoch": 11.740099009900991, "grad_norm": 0.5342784523963928, "learning_rate": 7.080276351448074e-05, "loss": 0.4756, "step": 47430 }, { "epoch": 11.742574257425742, "grad_norm": 0.5613086819648743, "learning_rate": 7.079023301529287e-05, "loss": 0.471, "step": 47440 }, { "epoch": 11.745049504950495, "grad_norm": 0.5717728734016418, "learning_rate": 7.077770093722401e-05, "loss": 0.4696, "step": 47450 }, { "epoch": 11.747524752475247, "grad_norm": 0.503473699092865, "learning_rate": 7.076516728122589e-05, "loss": 0.4721, "step": 47460 }, { "epoch": 11.75, "grad_norm": 0.48550066351890564, "learning_rate": 7.075263204825036e-05, "loss": 0.4747, "step": 47470 }, { "epoch": 11.752475247524753, "grad_norm": 0.5025471448898315, "learning_rate": 7.074009523924935e-05, "loss": 0.4757, "step": 47480 }, { "epoch": 11.754950495049505, "grad_norm": 0.48869743943214417, "learning_rate": 7.072755685517501e-05, "loss": 0.4693, "step": 47490 }, { "epoch": 11.757425742574258, "grad_norm": 0.5143890976905823, "learning_rate": 7.07150168969795e-05, "loss": 0.4739, "step": 47500 }, { "epoch": 11.759900990099009, "grad_norm": 0.48729407787323, "learning_rate": 7.070247536561516e-05, "loss": 0.4729, "step": 47510 }, { "epoch": 11.762376237623762, "grad_norm": 0.5059307217597961, "learning_rate": 7.068993226203447e-05, "loss": 0.4759, "step": 47520 }, { "epoch": 11.764851485148515, "grad_norm": 0.48342233896255493, "learning_rate": 7.067738758718994e-05, "loss": 0.4741, "step": 47530 }, { "epoch": 11.767326732673267, "grad_norm": 0.48038536310195923, "learning_rate": 7.066484134203428e-05, "loss": 0.4771, "step": 47540 }, { "epoch": 11.76980198019802, "grad_norm": 0.49915120005607605, "learning_rate": 7.06522935275203e-05, "loss": 0.4702, "step": 47550 }, { "epoch": 11.772277227722773, "grad_norm": 0.4672190248966217, "learning_rate": 7.063974414460093e-05, "loss": 0.4713, "step": 47560 }, { "epoch": 11.774752475247524, "grad_norm": 0.5033764243125916, "learning_rate": 7.06271931942292e-05, "loss": 0.4781, "step": 47570 }, { "epoch": 11.777227722772277, "grad_norm": 0.5001113414764404, "learning_rate": 7.061464067735826e-05, "loss": 0.4725, "step": 47580 }, { "epoch": 11.77970297029703, "grad_norm": 0.4772246778011322, "learning_rate": 7.060208659494142e-05, "loss": 0.4727, "step": 47590 }, { "epoch": 11.782178217821782, "grad_norm": 0.48449456691741943, "learning_rate": 7.058953094793206e-05, "loss": 0.4773, "step": 47600 }, { "epoch": 11.784653465346535, "grad_norm": 0.4997667372226715, "learning_rate": 7.057697373728371e-05, "loss": 0.4665, "step": 47610 }, { "epoch": 11.787128712871286, "grad_norm": 0.5010185837745667, "learning_rate": 7.056441496395e-05, "loss": 0.4735, "step": 47620 }, { "epoch": 11.78960396039604, "grad_norm": 0.48737895488739014, "learning_rate": 7.055185462888468e-05, "loss": 0.4755, "step": 47630 }, { "epoch": 11.792079207920793, "grad_norm": 0.4687788784503937, "learning_rate": 7.053929273304163e-05, "loss": 0.4724, "step": 47640 }, { "epoch": 11.794554455445544, "grad_norm": 0.49586841464042664, "learning_rate": 7.052672927737485e-05, "loss": 0.4741, "step": 47650 }, { "epoch": 11.797029702970297, "grad_norm": 0.5234423279762268, "learning_rate": 7.051416426283845e-05, "loss": 0.4714, "step": 47660 }, { "epoch": 11.799504950495049, "grad_norm": 0.4825381338596344, "learning_rate": 7.050159769038664e-05, "loss": 0.4721, "step": 47670 }, { "epoch": 11.801980198019802, "grad_norm": 0.5156236290931702, "learning_rate": 7.04890295609738e-05, "loss": 0.4739, "step": 47680 }, { "epoch": 11.804455445544555, "grad_norm": 0.5052488446235657, "learning_rate": 7.047645987555438e-05, "loss": 0.4693, "step": 47690 }, { "epoch": 11.806930693069306, "grad_norm": 0.5201445817947388, "learning_rate": 7.046388863508295e-05, "loss": 0.4731, "step": 47700 }, { "epoch": 11.80940594059406, "grad_norm": 0.4977570176124573, "learning_rate": 7.045131584051426e-05, "loss": 0.4759, "step": 47710 }, { "epoch": 11.811881188118813, "grad_norm": 0.5090262293815613, "learning_rate": 7.043874149280305e-05, "loss": 0.4694, "step": 47720 }, { "epoch": 11.814356435643564, "grad_norm": 0.5143716931343079, "learning_rate": 7.042616559290436e-05, "loss": 0.4733, "step": 47730 }, { "epoch": 11.816831683168317, "grad_norm": 0.4848558008670807, "learning_rate": 7.041358814177316e-05, "loss": 0.4786, "step": 47740 }, { "epoch": 11.819306930693068, "grad_norm": 0.4727092385292053, "learning_rate": 7.040100914036466e-05, "loss": 0.4685, "step": 47750 }, { "epoch": 11.821782178217822, "grad_norm": 0.5363078713417053, "learning_rate": 7.038842858963416e-05, "loss": 0.4759, "step": 47760 }, { "epoch": 11.824257425742575, "grad_norm": 0.5161546468734741, "learning_rate": 7.037584649053707e-05, "loss": 0.4738, "step": 47770 }, { "epoch": 11.826732673267326, "grad_norm": 0.5100323557853699, "learning_rate": 7.036326284402888e-05, "loss": 0.469, "step": 47780 }, { "epoch": 11.82920792079208, "grad_norm": 0.5454686880111694, "learning_rate": 7.035067765106528e-05, "loss": 0.4723, "step": 47790 }, { "epoch": 11.831683168316832, "grad_norm": 0.5041705965995789, "learning_rate": 7.033809091260202e-05, "loss": 0.474, "step": 47800 }, { "epoch": 11.834158415841584, "grad_norm": 0.5092796087265015, "learning_rate": 7.032550262959497e-05, "loss": 0.4722, "step": 47810 }, { "epoch": 11.836633663366337, "grad_norm": 0.5112662315368652, "learning_rate": 7.031291280300012e-05, "loss": 0.4713, "step": 47820 }, { "epoch": 11.839108910891088, "grad_norm": 0.4705776572227478, "learning_rate": 7.030032143377361e-05, "loss": 0.4687, "step": 47830 }, { "epoch": 11.841584158415841, "grad_norm": 0.4924173057079315, "learning_rate": 7.028772852287167e-05, "loss": 0.4733, "step": 47840 }, { "epoch": 11.844059405940595, "grad_norm": 0.4827168583869934, "learning_rate": 7.027513407125062e-05, "loss": 0.4699, "step": 47850 }, { "epoch": 11.846534653465346, "grad_norm": 0.4756148159503937, "learning_rate": 7.026253807986694e-05, "loss": 0.4786, "step": 47860 }, { "epoch": 11.849009900990099, "grad_norm": 0.4925374984741211, "learning_rate": 7.024994054967722e-05, "loss": 0.4733, "step": 47870 }, { "epoch": 11.851485148514852, "grad_norm": 0.5149680972099304, "learning_rate": 7.023734148163816e-05, "loss": 0.474, "step": 47880 }, { "epoch": 11.853960396039604, "grad_norm": 0.5121704339981079, "learning_rate": 7.022474087670655e-05, "loss": 0.4703, "step": 47890 }, { "epoch": 11.856435643564357, "grad_norm": 0.47762641310691833, "learning_rate": 7.021213873583936e-05, "loss": 0.4726, "step": 47900 }, { "epoch": 11.858910891089108, "grad_norm": 0.4431845247745514, "learning_rate": 7.019953505999361e-05, "loss": 0.4696, "step": 47910 }, { "epoch": 11.861386138613861, "grad_norm": 0.47454833984375, "learning_rate": 7.018692985012648e-05, "loss": 0.471, "step": 47920 }, { "epoch": 11.863861386138614, "grad_norm": 0.4991554617881775, "learning_rate": 7.017432310719525e-05, "loss": 0.4722, "step": 47930 }, { "epoch": 11.866336633663366, "grad_norm": 0.506272554397583, "learning_rate": 7.016171483215732e-05, "loss": 0.4768, "step": 47940 }, { "epoch": 11.868811881188119, "grad_norm": 0.5016335844993591, "learning_rate": 7.01491050259702e-05, "loss": 0.4743, "step": 47950 }, { "epoch": 11.871287128712872, "grad_norm": 0.4787018895149231, "learning_rate": 7.013649368959153e-05, "loss": 0.4702, "step": 47960 }, { "epoch": 11.873762376237623, "grad_norm": 0.49112439155578613, "learning_rate": 7.012388082397905e-05, "loss": 0.4745, "step": 47970 }, { "epoch": 11.876237623762377, "grad_norm": 0.5207687020301819, "learning_rate": 7.011126643009064e-05, "loss": 0.4775, "step": 47980 }, { "epoch": 11.878712871287128, "grad_norm": 0.5070419311523438, "learning_rate": 7.009865050888426e-05, "loss": 0.4742, "step": 47990 }, { "epoch": 11.881188118811881, "grad_norm": 0.49030664563179016, "learning_rate": 7.0086033061318e-05, "loss": 0.4744, "step": 48000 }, { "epoch": 11.883663366336634, "grad_norm": 0.5024755001068115, "learning_rate": 7.007341408835011e-05, "loss": 0.4774, "step": 48010 }, { "epoch": 11.886138613861386, "grad_norm": 0.48509150743484497, "learning_rate": 7.006079359093887e-05, "loss": 0.4764, "step": 48020 }, { "epoch": 11.888613861386139, "grad_norm": 0.500302255153656, "learning_rate": 7.004817157004276e-05, "loss": 0.4684, "step": 48030 }, { "epoch": 11.891089108910892, "grad_norm": 0.5007067918777466, "learning_rate": 7.003554802662032e-05, "loss": 0.4734, "step": 48040 }, { "epoch": 11.893564356435643, "grad_norm": 0.49333345890045166, "learning_rate": 7.002292296163025e-05, "loss": 0.4663, "step": 48050 }, { "epoch": 11.896039603960396, "grad_norm": 0.49229440093040466, "learning_rate": 7.001029637603129e-05, "loss": 0.4722, "step": 48060 }, { "epoch": 11.898514851485148, "grad_norm": 0.49651965498924255, "learning_rate": 6.99976682707824e-05, "loss": 0.4767, "step": 48070 }, { "epoch": 11.900990099009901, "grad_norm": 0.4992873966693878, "learning_rate": 6.998503864684256e-05, "loss": 0.4739, "step": 48080 }, { "epoch": 11.903465346534654, "grad_norm": 0.5171707272529602, "learning_rate": 6.997240750517094e-05, "loss": 0.4729, "step": 48090 }, { "epoch": 11.905940594059405, "grad_norm": 0.49174049496650696, "learning_rate": 6.995977484672675e-05, "loss": 0.4721, "step": 48100 }, { "epoch": 11.908415841584159, "grad_norm": 0.5110551118850708, "learning_rate": 6.99471406724694e-05, "loss": 0.4741, "step": 48110 }, { "epoch": 11.910891089108912, "grad_norm": 0.5452681183815002, "learning_rate": 6.993450498335836e-05, "loss": 0.4762, "step": 48120 }, { "epoch": 11.913366336633663, "grad_norm": 0.5415369868278503, "learning_rate": 6.992186778035323e-05, "loss": 0.4776, "step": 48130 }, { "epoch": 11.915841584158416, "grad_norm": 0.4991104006767273, "learning_rate": 6.99092290644137e-05, "loss": 0.4738, "step": 48140 }, { "epoch": 11.918316831683168, "grad_norm": 0.5027674436569214, "learning_rate": 6.989658883649962e-05, "loss": 0.4784, "step": 48150 }, { "epoch": 11.92079207920792, "grad_norm": 0.5196136236190796, "learning_rate": 6.988394709757094e-05, "loss": 0.4682, "step": 48160 }, { "epoch": 11.923267326732674, "grad_norm": 0.4825960695743561, "learning_rate": 6.987130384858767e-05, "loss": 0.4734, "step": 48170 }, { "epoch": 11.925742574257425, "grad_norm": 0.5115978121757507, "learning_rate": 6.985865909051003e-05, "loss": 0.4776, "step": 48180 }, { "epoch": 11.928217821782178, "grad_norm": 0.4840361773967743, "learning_rate": 6.984601282429827e-05, "loss": 0.4688, "step": 48190 }, { "epoch": 11.930693069306932, "grad_norm": 0.4986479580402374, "learning_rate": 6.983336505091283e-05, "loss": 0.4768, "step": 48200 }, { "epoch": 11.933168316831683, "grad_norm": 0.5037546753883362, "learning_rate": 6.982071577131419e-05, "loss": 0.4718, "step": 48210 }, { "epoch": 11.935643564356436, "grad_norm": 0.46178582310676575, "learning_rate": 6.9808064986463e-05, "loss": 0.4671, "step": 48220 }, { "epoch": 11.938118811881187, "grad_norm": 0.4571698307991028, "learning_rate": 6.979541269732e-05, "loss": 0.4722, "step": 48230 }, { "epoch": 11.94059405940594, "grad_norm": 0.5408113598823547, "learning_rate": 6.978275890484602e-05, "loss": 0.4741, "step": 48240 }, { "epoch": 11.943069306930694, "grad_norm": 0.5050601959228516, "learning_rate": 6.977010361000208e-05, "loss": 0.4719, "step": 48250 }, { "epoch": 11.945544554455445, "grad_norm": 0.5134822130203247, "learning_rate": 6.975744681374922e-05, "loss": 0.4732, "step": 48260 }, { "epoch": 11.948019801980198, "grad_norm": 0.5062435269355774, "learning_rate": 6.974478851704868e-05, "loss": 0.4801, "step": 48270 }, { "epoch": 11.950495049504951, "grad_norm": 0.4684373736381531, "learning_rate": 6.973212872086176e-05, "loss": 0.4754, "step": 48280 }, { "epoch": 11.952970297029703, "grad_norm": 0.519059956073761, "learning_rate": 6.971946742614987e-05, "loss": 0.4765, "step": 48290 }, { "epoch": 11.955445544554456, "grad_norm": 0.4744786024093628, "learning_rate": 6.970680463387458e-05, "loss": 0.4717, "step": 48300 }, { "epoch": 11.957920792079207, "grad_norm": 0.48169076442718506, "learning_rate": 6.969414034499753e-05, "loss": 0.4721, "step": 48310 }, { "epoch": 11.96039603960396, "grad_norm": 0.46598073840141296, "learning_rate": 6.968147456048047e-05, "loss": 0.4714, "step": 48320 }, { "epoch": 11.962871287128714, "grad_norm": 0.5391489863395691, "learning_rate": 6.966880728128533e-05, "loss": 0.467, "step": 48330 }, { "epoch": 11.965346534653465, "grad_norm": 0.5161120891571045, "learning_rate": 6.965613850837407e-05, "loss": 0.4722, "step": 48340 }, { "epoch": 11.967821782178218, "grad_norm": 0.5369539856910706, "learning_rate": 6.964346824270882e-05, "loss": 0.4733, "step": 48350 }, { "epoch": 11.97029702970297, "grad_norm": 0.4856313467025757, "learning_rate": 6.963079648525177e-05, "loss": 0.4801, "step": 48360 }, { "epoch": 11.972772277227723, "grad_norm": 0.4872223138809204, "learning_rate": 6.96181232369653e-05, "loss": 0.4714, "step": 48370 }, { "epoch": 11.975247524752476, "grad_norm": 0.5027946829795837, "learning_rate": 6.960544849881181e-05, "loss": 0.4693, "step": 48380 }, { "epoch": 11.977722772277227, "grad_norm": 0.5175607204437256, "learning_rate": 6.959277227175393e-05, "loss": 0.4675, "step": 48390 }, { "epoch": 11.98019801980198, "grad_norm": 0.5042790770530701, "learning_rate": 6.958009455675427e-05, "loss": 0.4774, "step": 48400 }, { "epoch": 11.982673267326733, "grad_norm": 0.510139524936676, "learning_rate": 6.956741535477565e-05, "loss": 0.4744, "step": 48410 }, { "epoch": 11.985148514851485, "grad_norm": 0.48636728525161743, "learning_rate": 6.955473466678097e-05, "loss": 0.4704, "step": 48420 }, { "epoch": 11.987623762376238, "grad_norm": 0.47824087738990784, "learning_rate": 6.954205249373324e-05, "loss": 0.4686, "step": 48430 }, { "epoch": 11.990099009900991, "grad_norm": 0.4959395229816437, "learning_rate": 6.95293688365956e-05, "loss": 0.4734, "step": 48440 }, { "epoch": 11.992574257425742, "grad_norm": 0.5269767642021179, "learning_rate": 6.951668369633127e-05, "loss": 0.4763, "step": 48450 }, { "epoch": 11.995049504950495, "grad_norm": 0.48723796010017395, "learning_rate": 6.950399707390361e-05, "loss": 0.4708, "step": 48460 }, { "epoch": 11.997524752475247, "grad_norm": 0.4917437732219696, "learning_rate": 6.94913089702761e-05, "loss": 0.4757, "step": 48470 }, { "epoch": 12.0, "grad_norm": 0.48487797379493713, "learning_rate": 6.947861938641229e-05, "loss": 0.4645, "step": 48480 }, { "epoch": 12.002475247524753, "grad_norm": 0.49266305565834045, "learning_rate": 6.946592832327589e-05, "loss": 0.4724, "step": 48490 }, { "epoch": 12.004950495049505, "grad_norm": 0.48208677768707275, "learning_rate": 6.94532357818307e-05, "loss": 0.4644, "step": 48500 }, { "epoch": 12.007425742574258, "grad_norm": 0.49479198455810547, "learning_rate": 6.944054176304064e-05, "loss": 0.4705, "step": 48510 }, { "epoch": 12.009900990099009, "grad_norm": 0.5130391716957092, "learning_rate": 6.942784626786972e-05, "loss": 0.4711, "step": 48520 }, { "epoch": 12.012376237623762, "grad_norm": 0.5141984820365906, "learning_rate": 6.94151492972821e-05, "loss": 0.469, "step": 48530 }, { "epoch": 12.014851485148515, "grad_norm": 0.5123237371444702, "learning_rate": 6.9402450852242e-05, "loss": 0.4737, "step": 48540 }, { "epoch": 12.017326732673267, "grad_norm": 0.49169445037841797, "learning_rate": 6.938975093371382e-05, "loss": 0.4773, "step": 48550 }, { "epoch": 12.01980198019802, "grad_norm": 0.5103230476379395, "learning_rate": 6.9377049542662e-05, "loss": 0.4719, "step": 48560 }, { "epoch": 12.022277227722773, "grad_norm": 0.502217173576355, "learning_rate": 6.936434668005116e-05, "loss": 0.4711, "step": 48570 }, { "epoch": 12.024752475247524, "grad_norm": 0.5570521950721741, "learning_rate": 6.935164234684597e-05, "loss": 0.4725, "step": 48580 }, { "epoch": 12.027227722772277, "grad_norm": 0.5172271132469177, "learning_rate": 6.933893654401127e-05, "loss": 0.4722, "step": 48590 }, { "epoch": 12.029702970297029, "grad_norm": 0.4771610498428345, "learning_rate": 6.932622927251195e-05, "loss": 0.4757, "step": 48600 }, { "epoch": 12.032178217821782, "grad_norm": 0.4854738712310791, "learning_rate": 6.931352053331307e-05, "loss": 0.4686, "step": 48610 }, { "epoch": 12.034653465346535, "grad_norm": 0.4610885977745056, "learning_rate": 6.930081032737976e-05, "loss": 0.4695, "step": 48620 }, { "epoch": 12.037128712871286, "grad_norm": 0.47993558645248413, "learning_rate": 6.928809865567727e-05, "loss": 0.4678, "step": 48630 }, { "epoch": 12.03960396039604, "grad_norm": 0.5169703960418701, "learning_rate": 6.927538551917099e-05, "loss": 0.4758, "step": 48640 }, { "epoch": 12.042079207920793, "grad_norm": 0.5023720264434814, "learning_rate": 6.926267091882638e-05, "loss": 0.4745, "step": 48650 }, { "epoch": 12.044554455445544, "grad_norm": 0.5098037719726562, "learning_rate": 6.924995485560905e-05, "loss": 0.4712, "step": 48660 }, { "epoch": 12.047029702970297, "grad_norm": 0.4946846067905426, "learning_rate": 6.923723733048467e-05, "loss": 0.4703, "step": 48670 }, { "epoch": 12.049504950495049, "grad_norm": 0.4478660821914673, "learning_rate": 6.922451834441908e-05, "loss": 0.4723, "step": 48680 }, { "epoch": 12.051980198019802, "grad_norm": 0.501508355140686, "learning_rate": 6.92117978983782e-05, "loss": 0.4685, "step": 48690 }, { "epoch": 12.054455445544555, "grad_norm": 0.5025461912155151, "learning_rate": 6.919907599332803e-05, "loss": 0.4713, "step": 48700 }, { "epoch": 12.056930693069306, "grad_norm": 0.46780452132225037, "learning_rate": 6.918635263023476e-05, "loss": 0.4722, "step": 48710 }, { "epoch": 12.05940594059406, "grad_norm": 0.46657055616378784, "learning_rate": 6.917362781006462e-05, "loss": 0.4745, "step": 48720 }, { "epoch": 12.061881188118813, "grad_norm": 0.4863523840904236, "learning_rate": 6.916090153378398e-05, "loss": 0.4687, "step": 48730 }, { "epoch": 12.064356435643564, "grad_norm": 0.5074535608291626, "learning_rate": 6.914817380235933e-05, "loss": 0.4661, "step": 48740 }, { "epoch": 12.066831683168317, "grad_norm": 0.4805072546005249, "learning_rate": 6.913544461675725e-05, "loss": 0.4659, "step": 48750 }, { "epoch": 12.069306930693068, "grad_norm": 0.5013250112533569, "learning_rate": 6.912271397794441e-05, "loss": 0.4716, "step": 48760 }, { "epoch": 12.071782178217822, "grad_norm": 0.4629349112510681, "learning_rate": 6.910998188688767e-05, "loss": 0.4695, "step": 48770 }, { "epoch": 12.074257425742575, "grad_norm": 0.5075501203536987, "learning_rate": 6.909724834455391e-05, "loss": 0.4679, "step": 48780 }, { "epoch": 12.076732673267326, "grad_norm": 0.47426989674568176, "learning_rate": 6.908451335191017e-05, "loss": 0.4724, "step": 48790 }, { "epoch": 12.07920792079208, "grad_norm": 0.48612460494041443, "learning_rate": 6.907177690992359e-05, "loss": 0.4734, "step": 48800 }, { "epoch": 12.081683168316832, "grad_norm": 0.47560223937034607, "learning_rate": 6.90590390195614e-05, "loss": 0.4707, "step": 48810 }, { "epoch": 12.084158415841584, "grad_norm": 0.477830708026886, "learning_rate": 6.9046299681791e-05, "loss": 0.4642, "step": 48820 }, { "epoch": 12.086633663366337, "grad_norm": 0.5011861324310303, "learning_rate": 6.90335588975798e-05, "loss": 0.467, "step": 48830 }, { "epoch": 12.089108910891088, "grad_norm": 0.4725037217140198, "learning_rate": 6.902081666789545e-05, "loss": 0.4698, "step": 48840 }, { "epoch": 12.091584158415841, "grad_norm": 0.45929375290870667, "learning_rate": 6.900807299370559e-05, "loss": 0.4704, "step": 48850 }, { "epoch": 12.094059405940595, "grad_norm": 0.4808672368526459, "learning_rate": 6.899532787597801e-05, "loss": 0.4805, "step": 48860 }, { "epoch": 12.096534653465346, "grad_norm": 0.4861266613006592, "learning_rate": 6.898258131568066e-05, "loss": 0.4719, "step": 48870 }, { "epoch": 12.099009900990099, "grad_norm": 0.4933287501335144, "learning_rate": 6.89698333137815e-05, "loss": 0.478, "step": 48880 }, { "epoch": 12.101485148514852, "grad_norm": 0.5144807696342468, "learning_rate": 6.895708387124873e-05, "loss": 0.4701, "step": 48890 }, { "epoch": 12.103960396039604, "grad_norm": 0.49831971526145935, "learning_rate": 6.894433298905051e-05, "loss": 0.4683, "step": 48900 }, { "epoch": 12.106435643564357, "grad_norm": 0.4956330358982086, "learning_rate": 6.893158066815526e-05, "loss": 0.4725, "step": 48910 }, { "epoch": 12.108910891089108, "grad_norm": 0.5342705249786377, "learning_rate": 6.891882690953137e-05, "loss": 0.4708, "step": 48920 }, { "epoch": 12.111386138613861, "grad_norm": 0.5186063647270203, "learning_rate": 6.890607171414743e-05, "loss": 0.4722, "step": 48930 }, { "epoch": 12.113861386138614, "grad_norm": 0.4743349254131317, "learning_rate": 6.889331508297212e-05, "loss": 0.4738, "step": 48940 }, { "epoch": 12.116336633663366, "grad_norm": 0.471545547246933, "learning_rate": 6.888055701697422e-05, "loss": 0.4665, "step": 48950 }, { "epoch": 12.118811881188119, "grad_norm": 0.48133018612861633, "learning_rate": 6.88677975171226e-05, "loss": 0.469, "step": 48960 }, { "epoch": 12.121287128712872, "grad_norm": 0.49193641543388367, "learning_rate": 6.885503658438628e-05, "loss": 0.4745, "step": 48970 }, { "epoch": 12.123762376237623, "grad_norm": 0.5164262652397156, "learning_rate": 6.884227421973437e-05, "loss": 0.4717, "step": 48980 }, { "epoch": 12.126237623762377, "grad_norm": 0.5069225430488586, "learning_rate": 6.882951042413608e-05, "loss": 0.4713, "step": 48990 }, { "epoch": 12.128712871287128, "grad_norm": 0.500255823135376, "learning_rate": 6.881674519856072e-05, "loss": 0.4807, "step": 49000 }, { "epoch": 12.131188118811881, "grad_norm": 0.5235340595245361, "learning_rate": 6.880397854397775e-05, "loss": 0.4742, "step": 49010 }, { "epoch": 12.133663366336634, "grad_norm": 0.5086034536361694, "learning_rate": 6.879121046135673e-05, "loss": 0.4657, "step": 49020 }, { "epoch": 12.136138613861386, "grad_norm": 0.47754424810409546, "learning_rate": 6.877844095166725e-05, "loss": 0.4718, "step": 49030 }, { "epoch": 12.138613861386139, "grad_norm": 0.4759502410888672, "learning_rate": 6.876567001587915e-05, "loss": 0.4722, "step": 49040 }, { "epoch": 12.141089108910892, "grad_norm": 0.48508724570274353, "learning_rate": 6.875289765496221e-05, "loss": 0.4696, "step": 49050 }, { "epoch": 12.143564356435643, "grad_norm": 0.5047056078910828, "learning_rate": 6.874012386988647e-05, "loss": 0.4731, "step": 49060 }, { "epoch": 12.146039603960396, "grad_norm": 0.5220636129379272, "learning_rate": 6.8727348661622e-05, "loss": 0.4712, "step": 49070 }, { "epoch": 12.148514851485148, "grad_norm": 0.46153807640075684, "learning_rate": 6.871457203113899e-05, "loss": 0.4721, "step": 49080 }, { "epoch": 12.150990099009901, "grad_norm": 0.504180908203125, "learning_rate": 6.870179397940775e-05, "loss": 0.4705, "step": 49090 }, { "epoch": 12.153465346534654, "grad_norm": 0.5238760709762573, "learning_rate": 6.868901450739865e-05, "loss": 0.476, "step": 49100 }, { "epoch": 12.155940594059405, "grad_norm": 0.49525952339172363, "learning_rate": 6.867623361608224e-05, "loss": 0.4689, "step": 49110 }, { "epoch": 12.158415841584159, "grad_norm": 0.48823121190071106, "learning_rate": 6.866345130642915e-05, "loss": 0.4677, "step": 49120 }, { "epoch": 12.160891089108912, "grad_norm": 0.4887367784976959, "learning_rate": 6.86506675794101e-05, "loss": 0.4754, "step": 49130 }, { "epoch": 12.163366336633663, "grad_norm": 0.48950162529945374, "learning_rate": 6.863788243599594e-05, "loss": 0.4676, "step": 49140 }, { "epoch": 12.165841584158416, "grad_norm": 0.4982987940311432, "learning_rate": 6.86250958771576e-05, "loss": 0.4776, "step": 49150 }, { "epoch": 12.168316831683168, "grad_norm": 0.4976232945919037, "learning_rate": 6.861230790386613e-05, "loss": 0.4703, "step": 49160 }, { "epoch": 12.17079207920792, "grad_norm": 0.4890320301055908, "learning_rate": 6.859951851709273e-05, "loss": 0.4717, "step": 49170 }, { "epoch": 12.173267326732674, "grad_norm": 0.47518685460090637, "learning_rate": 6.858672771780862e-05, "loss": 0.4693, "step": 49180 }, { "epoch": 12.175742574257425, "grad_norm": 0.48016732931137085, "learning_rate": 6.857393550698523e-05, "loss": 0.4694, "step": 49190 }, { "epoch": 12.178217821782178, "grad_norm": 0.5181581974029541, "learning_rate": 6.8561141885594e-05, "loss": 0.4722, "step": 49200 }, { "epoch": 12.180693069306932, "grad_norm": 0.4872973561286926, "learning_rate": 6.854834685460653e-05, "loss": 0.4679, "step": 49210 }, { "epoch": 12.183168316831683, "grad_norm": 0.5193012356758118, "learning_rate": 6.853555041499453e-05, "loss": 0.4741, "step": 49220 }, { "epoch": 12.185643564356436, "grad_norm": 0.4946136176586151, "learning_rate": 6.852275256772979e-05, "loss": 0.4733, "step": 49230 }, { "epoch": 12.188118811881187, "grad_norm": 0.4844743609428406, "learning_rate": 6.850995331378425e-05, "loss": 0.4723, "step": 49240 }, { "epoch": 12.19059405940594, "grad_norm": 0.47901788353919983, "learning_rate": 6.84971526541299e-05, "loss": 0.465, "step": 49250 }, { "epoch": 12.193069306930694, "grad_norm": 0.5176918506622314, "learning_rate": 6.848435058973888e-05, "loss": 0.4731, "step": 49260 }, { "epoch": 12.195544554455445, "grad_norm": 0.4984610378742218, "learning_rate": 6.84715471215834e-05, "loss": 0.4704, "step": 49270 }, { "epoch": 12.198019801980198, "grad_norm": 0.49188748002052307, "learning_rate": 6.845874225063586e-05, "loss": 0.4697, "step": 49280 }, { "epoch": 12.200495049504951, "grad_norm": 0.4952690005302429, "learning_rate": 6.844593597786864e-05, "loss": 0.4669, "step": 49290 }, { "epoch": 12.202970297029703, "grad_norm": 0.5008203983306885, "learning_rate": 6.84331283042543e-05, "loss": 0.4726, "step": 49300 }, { "epoch": 12.205445544554456, "grad_norm": 0.470784455537796, "learning_rate": 6.842031923076555e-05, "loss": 0.4745, "step": 49310 }, { "epoch": 12.207920792079207, "grad_norm": 0.46724870800971985, "learning_rate": 6.84075087583751e-05, "loss": 0.4752, "step": 49320 }, { "epoch": 12.21039603960396, "grad_norm": 0.5257015228271484, "learning_rate": 6.839469688805582e-05, "loss": 0.4729, "step": 49330 }, { "epoch": 12.212871287128714, "grad_norm": 0.4929926097393036, "learning_rate": 6.838188362078073e-05, "loss": 0.473, "step": 49340 }, { "epoch": 12.215346534653465, "grad_norm": 0.4922361969947815, "learning_rate": 6.836906895752286e-05, "loss": 0.4772, "step": 49350 }, { "epoch": 12.217821782178218, "grad_norm": 0.5152124762535095, "learning_rate": 6.835625289925547e-05, "loss": 0.4687, "step": 49360 }, { "epoch": 12.220297029702971, "grad_norm": 0.49349701404571533, "learning_rate": 6.834343544695178e-05, "loss": 0.4714, "step": 49370 }, { "epoch": 12.222772277227723, "grad_norm": 0.4823210835456848, "learning_rate": 6.833061660158524e-05, "loss": 0.4719, "step": 49380 }, { "epoch": 12.225247524752476, "grad_norm": 0.5141571760177612, "learning_rate": 6.831779636412932e-05, "loss": 0.4703, "step": 49390 }, { "epoch": 12.227722772277227, "grad_norm": 0.5112707018852234, "learning_rate": 6.830497473555768e-05, "loss": 0.4737, "step": 49400 }, { "epoch": 12.23019801980198, "grad_norm": 0.49042460322380066, "learning_rate": 6.8292151716844e-05, "loss": 0.4746, "step": 49410 }, { "epoch": 12.232673267326733, "grad_norm": 0.4866285026073456, "learning_rate": 6.827932730896211e-05, "loss": 0.4693, "step": 49420 }, { "epoch": 12.235148514851485, "grad_norm": 0.4795570373535156, "learning_rate": 6.826650151288593e-05, "loss": 0.4766, "step": 49430 }, { "epoch": 12.237623762376238, "grad_norm": 0.512580931186676, "learning_rate": 6.825367432958955e-05, "loss": 0.4651, "step": 49440 }, { "epoch": 12.240099009900991, "grad_norm": 0.5385483503341675, "learning_rate": 6.824084576004704e-05, "loss": 0.4741, "step": 49450 }, { "epoch": 12.242574257425742, "grad_norm": 0.5034129619598389, "learning_rate": 6.822801580523268e-05, "loss": 0.465, "step": 49460 }, { "epoch": 12.245049504950495, "grad_norm": 0.530184805393219, "learning_rate": 6.82151844661208e-05, "loss": 0.4712, "step": 49470 }, { "epoch": 12.247524752475247, "grad_norm": 0.4957630932331085, "learning_rate": 6.820235174368589e-05, "loss": 0.4701, "step": 49480 }, { "epoch": 12.25, "grad_norm": 0.48749226331710815, "learning_rate": 6.818951763890249e-05, "loss": 0.4731, "step": 49490 }, { "epoch": 12.252475247524753, "grad_norm": 0.4868044853210449, "learning_rate": 6.817668215274525e-05, "loss": 0.467, "step": 49500 }, { "epoch": 12.254950495049505, "grad_norm": 0.5100067853927612, "learning_rate": 6.816384528618898e-05, "loss": 0.4747, "step": 49510 }, { "epoch": 12.257425742574258, "grad_norm": 0.4861205220222473, "learning_rate": 6.815100704020851e-05, "loss": 0.4757, "step": 49520 }, { "epoch": 12.259900990099009, "grad_norm": 0.5222535729408264, "learning_rate": 6.813816741577885e-05, "loss": 0.4711, "step": 49530 }, { "epoch": 12.262376237623762, "grad_norm": 0.523205578327179, "learning_rate": 6.812532641387507e-05, "loss": 0.4661, "step": 49540 }, { "epoch": 12.264851485148515, "grad_norm": 0.48249852657318115, "learning_rate": 6.811248403547236e-05, "loss": 0.472, "step": 49550 }, { "epoch": 12.267326732673267, "grad_norm": 0.4557303190231323, "learning_rate": 6.809964028154604e-05, "loss": 0.4669, "step": 49560 }, { "epoch": 12.26980198019802, "grad_norm": 0.47011932730674744, "learning_rate": 6.808679515307146e-05, "loss": 0.4752, "step": 49570 }, { "epoch": 12.272277227722773, "grad_norm": 0.5231656432151794, "learning_rate": 6.807394865102416e-05, "loss": 0.4758, "step": 49580 }, { "epoch": 12.274752475247524, "grad_norm": 0.4751845896244049, "learning_rate": 6.806110077637977e-05, "loss": 0.4689, "step": 49590 }, { "epoch": 12.277227722772277, "grad_norm": 0.5100659132003784, "learning_rate": 6.804825153011394e-05, "loss": 0.472, "step": 49600 }, { "epoch": 12.27970297029703, "grad_norm": 0.46387779712677, "learning_rate": 6.80354009132025e-05, "loss": 0.4688, "step": 49610 }, { "epoch": 12.282178217821782, "grad_norm": 0.47295573353767395, "learning_rate": 6.802254892662141e-05, "loss": 0.4678, "step": 49620 }, { "epoch": 12.284653465346535, "grad_norm": 0.4976544678211212, "learning_rate": 6.800969557134666e-05, "loss": 0.4647, "step": 49630 }, { "epoch": 12.287128712871286, "grad_norm": 0.47478628158569336, "learning_rate": 6.79968408483544e-05, "loss": 0.4713, "step": 49640 }, { "epoch": 12.28960396039604, "grad_norm": 0.49204763770103455, "learning_rate": 6.798398475862081e-05, "loss": 0.4688, "step": 49650 }, { "epoch": 12.292079207920793, "grad_norm": 0.4704623222351074, "learning_rate": 6.79711273031223e-05, "loss": 0.4704, "step": 49660 }, { "epoch": 12.294554455445544, "grad_norm": 0.5021716356277466, "learning_rate": 6.795826848283524e-05, "loss": 0.4715, "step": 49670 }, { "epoch": 12.297029702970297, "grad_norm": 0.5330753922462463, "learning_rate": 6.794540829873621e-05, "loss": 0.4695, "step": 49680 }, { "epoch": 12.299504950495049, "grad_norm": 0.4842248260974884, "learning_rate": 6.793254675180186e-05, "loss": 0.473, "step": 49690 }, { "epoch": 12.301980198019802, "grad_norm": 0.541347324848175, "learning_rate": 6.791968384300892e-05, "loss": 0.4708, "step": 49700 }, { "epoch": 12.304455445544555, "grad_norm": 0.4927033483982086, "learning_rate": 6.790681957333427e-05, "loss": 0.4689, "step": 49710 }, { "epoch": 12.306930693069306, "grad_norm": 0.49224212765693665, "learning_rate": 6.789395394375482e-05, "loss": 0.469, "step": 49720 }, { "epoch": 12.30940594059406, "grad_norm": 0.49425604939460754, "learning_rate": 6.788108695524769e-05, "loss": 0.4712, "step": 49730 }, { "epoch": 12.311881188118813, "grad_norm": 0.4712781012058258, "learning_rate": 6.786821860878997e-05, "loss": 0.4703, "step": 49740 }, { "epoch": 12.314356435643564, "grad_norm": 0.5133546590805054, "learning_rate": 6.7855348905359e-05, "loss": 0.4755, "step": 49750 }, { "epoch": 12.316831683168317, "grad_norm": 0.5081899762153625, "learning_rate": 6.78424778459321e-05, "loss": 0.4676, "step": 49760 }, { "epoch": 12.319306930693068, "grad_norm": 0.5358014702796936, "learning_rate": 6.782960543148677e-05, "loss": 0.4663, "step": 49770 }, { "epoch": 12.321782178217822, "grad_norm": 0.49797600507736206, "learning_rate": 6.781673166300056e-05, "loss": 0.4699, "step": 49780 }, { "epoch": 12.324257425742575, "grad_norm": 0.4709063172340393, "learning_rate": 6.780385654145116e-05, "loss": 0.4672, "step": 49790 }, { "epoch": 12.326732673267326, "grad_norm": 0.4851759672164917, "learning_rate": 6.779098006781636e-05, "loss": 0.472, "step": 49800 }, { "epoch": 12.32920792079208, "grad_norm": 0.5073134899139404, "learning_rate": 6.777810224307403e-05, "loss": 0.4691, "step": 49810 }, { "epoch": 12.331683168316832, "grad_norm": 0.48818495869636536, "learning_rate": 6.776522306820216e-05, "loss": 0.4705, "step": 49820 }, { "epoch": 12.334158415841584, "grad_norm": 0.5019403100013733, "learning_rate": 6.775234254417884e-05, "loss": 0.4723, "step": 49830 }, { "epoch": 12.336633663366337, "grad_norm": 0.4844907224178314, "learning_rate": 6.773946067198226e-05, "loss": 0.4693, "step": 49840 }, { "epoch": 12.339108910891088, "grad_norm": 0.5011253356933594, "learning_rate": 6.77265774525907e-05, "loss": 0.4684, "step": 49850 }, { "epoch": 12.341584158415841, "grad_norm": 0.4948078989982605, "learning_rate": 6.771369288698258e-05, "loss": 0.478, "step": 49860 }, { "epoch": 12.344059405940595, "grad_norm": 0.4885108172893524, "learning_rate": 6.770080697613638e-05, "loss": 0.4653, "step": 49870 }, { "epoch": 12.346534653465346, "grad_norm": 0.47775647044181824, "learning_rate": 6.768791972103072e-05, "loss": 0.4705, "step": 49880 }, { "epoch": 12.349009900990099, "grad_norm": 0.5187346339225769, "learning_rate": 6.767503112264426e-05, "loss": 0.4713, "step": 49890 }, { "epoch": 12.351485148514852, "grad_norm": 0.46915921568870544, "learning_rate": 6.766214118195585e-05, "loss": 0.4721, "step": 49900 }, { "epoch": 12.353960396039604, "grad_norm": 0.4685302972793579, "learning_rate": 6.764924989994438e-05, "loss": 0.4703, "step": 49910 }, { "epoch": 12.356435643564357, "grad_norm": 0.5081121325492859, "learning_rate": 6.763635727758885e-05, "loss": 0.4735, "step": 49920 }, { "epoch": 12.358910891089108, "grad_norm": 0.47752320766448975, "learning_rate": 6.762346331586838e-05, "loss": 0.4759, "step": 49930 }, { "epoch": 12.361386138613861, "grad_norm": 0.5477039813995361, "learning_rate": 6.761056801576215e-05, "loss": 0.4731, "step": 49940 }, { "epoch": 12.363861386138614, "grad_norm": 0.49216270446777344, "learning_rate": 6.759767137824951e-05, "loss": 0.4707, "step": 49950 }, { "epoch": 12.366336633663366, "grad_norm": 0.4878896474838257, "learning_rate": 6.758477340430988e-05, "loss": 0.465, "step": 49960 }, { "epoch": 12.368811881188119, "grad_norm": 0.4902288317680359, "learning_rate": 6.757187409492277e-05, "loss": 0.4718, "step": 49970 }, { "epoch": 12.371287128712872, "grad_norm": 0.51814866065979, "learning_rate": 6.755897345106775e-05, "loss": 0.467, "step": 49980 }, { "epoch": 12.373762376237623, "grad_norm": 0.5139514803886414, "learning_rate": 6.754607147372461e-05, "loss": 0.4647, "step": 49990 }, { "epoch": 12.376237623762377, "grad_norm": 0.5176911354064941, "learning_rate": 6.75331681638731e-05, "loss": 0.4689, "step": 50000 }, { "epoch": 12.378712871287128, "grad_norm": 0.4941324293613434, "learning_rate": 6.75202635224932e-05, "loss": 0.4708, "step": 50010 }, { "epoch": 12.381188118811881, "grad_norm": 0.48226499557495117, "learning_rate": 6.75073575505649e-05, "loss": 0.4715, "step": 50020 }, { "epoch": 12.383663366336634, "grad_norm": 0.48927533626556396, "learning_rate": 6.749445024906835e-05, "loss": 0.4712, "step": 50030 }, { "epoch": 12.386138613861386, "grad_norm": 0.4521268606185913, "learning_rate": 6.748154161898374e-05, "loss": 0.4712, "step": 50040 }, { "epoch": 12.388613861386139, "grad_norm": 0.4780959188938141, "learning_rate": 6.74686316612914e-05, "loss": 0.4712, "step": 50050 }, { "epoch": 12.391089108910892, "grad_norm": 0.5009123086929321, "learning_rate": 6.74557203769718e-05, "loss": 0.4753, "step": 50060 }, { "epoch": 12.393564356435643, "grad_norm": 0.5125042200088501, "learning_rate": 6.744280776700542e-05, "loss": 0.4683, "step": 50070 }, { "epoch": 12.396039603960396, "grad_norm": 0.4796009659767151, "learning_rate": 6.74298938323729e-05, "loss": 0.4734, "step": 50080 }, { "epoch": 12.398514851485148, "grad_norm": 0.5146180987358093, "learning_rate": 6.741697857405498e-05, "loss": 0.4729, "step": 50090 }, { "epoch": 12.400990099009901, "grad_norm": 0.5111371278762817, "learning_rate": 6.740406199303246e-05, "loss": 0.4752, "step": 50100 }, { "epoch": 12.403465346534654, "grad_norm": 0.4779842793941498, "learning_rate": 6.739114409028631e-05, "loss": 0.4668, "step": 50110 }, { "epoch": 12.405940594059405, "grad_norm": 0.48137521743774414, "learning_rate": 6.737822486679752e-05, "loss": 0.471, "step": 50120 }, { "epoch": 12.408415841584159, "grad_norm": 0.4928349256515503, "learning_rate": 6.736530432354724e-05, "loss": 0.4738, "step": 50130 }, { "epoch": 12.410891089108912, "grad_norm": 0.46119529008865356, "learning_rate": 6.735238246151672e-05, "loss": 0.4706, "step": 50140 }, { "epoch": 12.413366336633663, "grad_norm": 0.49335163831710815, "learning_rate": 6.733945928168724e-05, "loss": 0.4713, "step": 50150 }, { "epoch": 12.415841584158416, "grad_norm": 0.5137199759483337, "learning_rate": 6.732653478504028e-05, "loss": 0.4765, "step": 50160 }, { "epoch": 12.418316831683168, "grad_norm": 0.510932207107544, "learning_rate": 6.731360897255731e-05, "loss": 0.4663, "step": 50170 }, { "epoch": 12.42079207920792, "grad_norm": 0.49967992305755615, "learning_rate": 6.730068184522005e-05, "loss": 0.4652, "step": 50180 }, { "epoch": 12.423267326732674, "grad_norm": 0.4800235331058502, "learning_rate": 6.728775340401014e-05, "loss": 0.4674, "step": 50190 }, { "epoch": 12.425742574257425, "grad_norm": 0.5210447311401367, "learning_rate": 6.727482364990949e-05, "loss": 0.4666, "step": 50200 }, { "epoch": 12.428217821782178, "grad_norm": 0.5060445070266724, "learning_rate": 6.726189258389994e-05, "loss": 0.4707, "step": 50210 }, { "epoch": 12.430693069306932, "grad_norm": 0.47783759236335754, "learning_rate": 6.72489602069636e-05, "loss": 0.4691, "step": 50220 }, { "epoch": 12.433168316831683, "grad_norm": 0.48306265473365784, "learning_rate": 6.723602652008255e-05, "loss": 0.47, "step": 50230 }, { "epoch": 12.435643564356436, "grad_norm": 0.5154641270637512, "learning_rate": 6.722309152423904e-05, "loss": 0.4687, "step": 50240 }, { "epoch": 12.438118811881187, "grad_norm": 0.5059535503387451, "learning_rate": 6.721015522041538e-05, "loss": 0.4707, "step": 50250 }, { "epoch": 12.44059405940594, "grad_norm": 0.5007003545761108, "learning_rate": 6.719721760959403e-05, "loss": 0.4716, "step": 50260 }, { "epoch": 12.443069306930694, "grad_norm": 0.47053495049476624, "learning_rate": 6.718427869275747e-05, "loss": 0.4678, "step": 50270 }, { "epoch": 12.445544554455445, "grad_norm": 0.5014821290969849, "learning_rate": 6.717133847088836e-05, "loss": 0.4697, "step": 50280 }, { "epoch": 12.448019801980198, "grad_norm": 0.45549276471138, "learning_rate": 6.715839694496942e-05, "loss": 0.4723, "step": 50290 }, { "epoch": 12.450495049504951, "grad_norm": 0.465765118598938, "learning_rate": 6.714545411598346e-05, "loss": 0.4733, "step": 50300 }, { "epoch": 12.452970297029703, "grad_norm": 0.4734641909599304, "learning_rate": 6.713250998491341e-05, "loss": 0.4784, "step": 50310 }, { "epoch": 12.455445544554456, "grad_norm": 0.4913042187690735, "learning_rate": 6.711956455274229e-05, "loss": 0.4669, "step": 50320 }, { "epoch": 12.457920792079207, "grad_norm": 0.48014992475509644, "learning_rate": 6.710661782045323e-05, "loss": 0.4674, "step": 50330 }, { "epoch": 12.46039603960396, "grad_norm": 0.48328810930252075, "learning_rate": 6.709366978902942e-05, "loss": 0.471, "step": 50340 }, { "epoch": 12.462871287128714, "grad_norm": 0.4919702410697937, "learning_rate": 6.708072045945422e-05, "loss": 0.4721, "step": 50350 }, { "epoch": 12.465346534653465, "grad_norm": 0.4678889811038971, "learning_rate": 6.706776983271102e-05, "loss": 0.4716, "step": 50360 }, { "epoch": 12.467821782178218, "grad_norm": 0.4995007812976837, "learning_rate": 6.705481790978334e-05, "loss": 0.467, "step": 50370 }, { "epoch": 12.47029702970297, "grad_norm": 0.4659787714481354, "learning_rate": 6.704186469165478e-05, "loss": 0.4691, "step": 50380 }, { "epoch": 12.472772277227723, "grad_norm": 0.48980003595352173, "learning_rate": 6.702891017930907e-05, "loss": 0.4718, "step": 50390 }, { "epoch": 12.475247524752476, "grad_norm": 0.4877959191799164, "learning_rate": 6.701595437373002e-05, "loss": 0.4702, "step": 50400 }, { "epoch": 12.477722772277227, "grad_norm": 0.4734112620353699, "learning_rate": 6.700299727590153e-05, "loss": 0.4675, "step": 50410 }, { "epoch": 12.48019801980198, "grad_norm": 0.4846065640449524, "learning_rate": 6.69900388868076e-05, "loss": 0.4687, "step": 50420 }, { "epoch": 12.482673267326733, "grad_norm": 0.4684927463531494, "learning_rate": 6.697707920743234e-05, "loss": 0.4743, "step": 50430 }, { "epoch": 12.485148514851485, "grad_norm": 0.4962174594402313, "learning_rate": 6.696411823875995e-05, "loss": 0.4661, "step": 50440 }, { "epoch": 12.487623762376238, "grad_norm": 0.48434826731681824, "learning_rate": 6.695115598177474e-05, "loss": 0.4744, "step": 50450 }, { "epoch": 12.490099009900991, "grad_norm": 0.4603107273578644, "learning_rate": 6.69381924374611e-05, "loss": 0.4707, "step": 50460 }, { "epoch": 12.492574257425742, "grad_norm": 0.523501455783844, "learning_rate": 6.692522760680354e-05, "loss": 0.4701, "step": 50470 }, { "epoch": 12.495049504950495, "grad_norm": 0.5029081106185913, "learning_rate": 6.691226149078662e-05, "loss": 0.4779, "step": 50480 }, { "epoch": 12.497524752475247, "grad_norm": 0.5066601634025574, "learning_rate": 6.689929409039505e-05, "loss": 0.4686, "step": 50490 }, { "epoch": 12.5, "grad_norm": 0.4834675192832947, "learning_rate": 6.688632540661364e-05, "loss": 0.4688, "step": 50500 }, { "epoch": 12.502475247524753, "grad_norm": 0.46884942054748535, "learning_rate": 6.687335544042724e-05, "loss": 0.4676, "step": 50510 }, { "epoch": 12.504950495049505, "grad_norm": 0.4755229651927948, "learning_rate": 6.686038419282086e-05, "loss": 0.4702, "step": 50520 }, { "epoch": 12.507425742574258, "grad_norm": 0.4745275676250458, "learning_rate": 6.684741166477954e-05, "loss": 0.4692, "step": 50530 }, { "epoch": 12.509900990099009, "grad_norm": 0.482341468334198, "learning_rate": 6.683443785728852e-05, "loss": 0.4697, "step": 50540 }, { "epoch": 12.512376237623762, "grad_norm": 0.45899537205696106, "learning_rate": 6.682146277133302e-05, "loss": 0.4728, "step": 50550 }, { "epoch": 12.514851485148515, "grad_norm": 0.4591987729072571, "learning_rate": 6.680848640789844e-05, "loss": 0.4764, "step": 50560 }, { "epoch": 12.517326732673267, "grad_norm": 0.47576385736465454, "learning_rate": 6.679550876797026e-05, "loss": 0.4667, "step": 50570 }, { "epoch": 12.51980198019802, "grad_norm": 0.48487389087677, "learning_rate": 6.678252985253401e-05, "loss": 0.4739, "step": 50580 }, { "epoch": 12.522277227722773, "grad_norm": 0.4887629449367523, "learning_rate": 6.676954966257538e-05, "loss": 0.4723, "step": 50590 }, { "epoch": 12.524752475247524, "grad_norm": 0.4893248379230499, "learning_rate": 6.675656819908012e-05, "loss": 0.4675, "step": 50600 }, { "epoch": 12.527227722772277, "grad_norm": 0.5157448053359985, "learning_rate": 6.67435854630341e-05, "loss": 0.4803, "step": 50610 }, { "epoch": 12.52970297029703, "grad_norm": 0.5006124377250671, "learning_rate": 6.673060145542326e-05, "loss": 0.4713, "step": 50620 }, { "epoch": 12.532178217821782, "grad_norm": 0.47026559710502625, "learning_rate": 6.671761617723364e-05, "loss": 0.4759, "step": 50630 }, { "epoch": 12.534653465346535, "grad_norm": 0.5233968496322632, "learning_rate": 6.67046296294514e-05, "loss": 0.4716, "step": 50640 }, { "epoch": 12.537128712871286, "grad_norm": 0.45258527994155884, "learning_rate": 6.66916418130628e-05, "loss": 0.4692, "step": 50650 }, { "epoch": 12.53960396039604, "grad_norm": 0.45673471689224243, "learning_rate": 6.667865272905413e-05, "loss": 0.4697, "step": 50660 }, { "epoch": 12.542079207920793, "grad_norm": 0.4854966700077057, "learning_rate": 6.666566237841187e-05, "loss": 0.4704, "step": 50670 }, { "epoch": 12.544554455445544, "grad_norm": 0.48507145047187805, "learning_rate": 6.665267076212253e-05, "loss": 0.468, "step": 50680 }, { "epoch": 12.547029702970297, "grad_norm": 0.4676271975040436, "learning_rate": 6.663967788117275e-05, "loss": 0.4681, "step": 50690 }, { "epoch": 12.549504950495049, "grad_norm": 0.4635549485683441, "learning_rate": 6.662668373654925e-05, "loss": 0.4755, "step": 50700 }, { "epoch": 12.551980198019802, "grad_norm": 0.47320103645324707, "learning_rate": 6.661368832923884e-05, "loss": 0.471, "step": 50710 }, { "epoch": 12.554455445544555, "grad_norm": 0.479062557220459, "learning_rate": 6.660069166022846e-05, "loss": 0.4696, "step": 50720 }, { "epoch": 12.556930693069306, "grad_norm": 0.5063884258270264, "learning_rate": 6.658769373050508e-05, "loss": 0.4696, "step": 50730 }, { "epoch": 12.55940594059406, "grad_norm": 0.4769791066646576, "learning_rate": 6.657469454105584e-05, "loss": 0.4631, "step": 50740 }, { "epoch": 12.561881188118813, "grad_norm": 0.46686699986457825, "learning_rate": 6.656169409286793e-05, "loss": 0.4758, "step": 50750 }, { "epoch": 12.564356435643564, "grad_norm": 0.47487083077430725, "learning_rate": 6.654869238692866e-05, "loss": 0.4691, "step": 50760 }, { "epoch": 12.566831683168317, "grad_norm": 0.488626092672348, "learning_rate": 6.653568942422542e-05, "loss": 0.4735, "step": 50770 }, { "epoch": 12.569306930693068, "grad_norm": 0.4406539499759674, "learning_rate": 6.652268520574567e-05, "loss": 0.4738, "step": 50780 }, { "epoch": 12.571782178217822, "grad_norm": 0.4662708640098572, "learning_rate": 6.650967973247702e-05, "loss": 0.4746, "step": 50790 }, { "epoch": 12.574257425742575, "grad_norm": 0.47464486956596375, "learning_rate": 6.649667300540719e-05, "loss": 0.4715, "step": 50800 }, { "epoch": 12.576732673267326, "grad_norm": 0.4844857156276703, "learning_rate": 6.648366502552387e-05, "loss": 0.4691, "step": 50810 }, { "epoch": 12.57920792079208, "grad_norm": 0.5063576102256775, "learning_rate": 6.647065579381498e-05, "loss": 0.4732, "step": 50820 }, { "epoch": 12.581683168316832, "grad_norm": 0.4716656506061554, "learning_rate": 6.64576453112685e-05, "loss": 0.4683, "step": 50830 }, { "epoch": 12.584158415841584, "grad_norm": 0.4975482225418091, "learning_rate": 6.644463357887245e-05, "loss": 0.4666, "step": 50840 }, { "epoch": 12.586633663366337, "grad_norm": 0.5241139531135559, "learning_rate": 6.643162059761502e-05, "loss": 0.4727, "step": 50850 }, { "epoch": 12.589108910891088, "grad_norm": 0.48459184169769287, "learning_rate": 6.641860636848442e-05, "loss": 0.4736, "step": 50860 }, { "epoch": 12.591584158415841, "grad_norm": 0.4831238389015198, "learning_rate": 6.640559089246905e-05, "loss": 0.4706, "step": 50870 }, { "epoch": 12.594059405940595, "grad_norm": 0.5038567781448364, "learning_rate": 6.63925741705573e-05, "loss": 0.4796, "step": 50880 }, { "epoch": 12.596534653465346, "grad_norm": 0.4714480936527252, "learning_rate": 6.637955620373775e-05, "loss": 0.4722, "step": 50890 }, { "epoch": 12.599009900990099, "grad_norm": 0.4846528172492981, "learning_rate": 6.636653699299896e-05, "loss": 0.4754, "step": 50900 }, { "epoch": 12.601485148514852, "grad_norm": 0.4644559323787689, "learning_rate": 6.635351653932973e-05, "loss": 0.468, "step": 50910 }, { "epoch": 12.603960396039604, "grad_norm": 0.5330882668495178, "learning_rate": 6.634049484371882e-05, "loss": 0.4707, "step": 50920 }, { "epoch": 12.606435643564357, "grad_norm": 0.48040375113487244, "learning_rate": 6.632747190715518e-05, "loss": 0.4716, "step": 50930 }, { "epoch": 12.608910891089108, "grad_norm": 0.479574978351593, "learning_rate": 6.631444773062779e-05, "loss": 0.474, "step": 50940 }, { "epoch": 12.611386138613861, "grad_norm": 0.49884432554244995, "learning_rate": 6.630142231512577e-05, "loss": 0.4783, "step": 50950 }, { "epoch": 12.613861386138614, "grad_norm": 0.4528210163116455, "learning_rate": 6.628839566163829e-05, "loss": 0.4737, "step": 50960 }, { "epoch": 12.616336633663366, "grad_norm": 0.466295063495636, "learning_rate": 6.627536777115466e-05, "loss": 0.4652, "step": 50970 }, { "epoch": 12.618811881188119, "grad_norm": 0.455353707075119, "learning_rate": 6.626233864466427e-05, "loss": 0.4695, "step": 50980 }, { "epoch": 12.621287128712872, "grad_norm": 0.46016043424606323, "learning_rate": 6.624930828315658e-05, "loss": 0.4683, "step": 50990 }, { "epoch": 12.623762376237623, "grad_norm": 0.4830321967601776, "learning_rate": 6.623627668762115e-05, "loss": 0.468, "step": 51000 }, { "epoch": 12.626237623762377, "grad_norm": 0.48234111070632935, "learning_rate": 6.622324385904764e-05, "loss": 0.4675, "step": 51010 }, { "epoch": 12.628712871287128, "grad_norm": 0.48938897252082825, "learning_rate": 6.621020979842586e-05, "loss": 0.4733, "step": 51020 }, { "epoch": 12.631188118811881, "grad_norm": 0.5087903141975403, "learning_rate": 6.619717450674559e-05, "loss": 0.4736, "step": 51030 }, { "epoch": 12.633663366336634, "grad_norm": 0.4948287010192871, "learning_rate": 6.618413798499686e-05, "loss": 0.4699, "step": 51040 }, { "epoch": 12.636138613861386, "grad_norm": 0.49970224499702454, "learning_rate": 6.61711002341696e-05, "loss": 0.4762, "step": 51050 }, { "epoch": 12.638613861386139, "grad_norm": 0.529681384563446, "learning_rate": 6.615806125525401e-05, "loss": 0.4705, "step": 51060 }, { "epoch": 12.641089108910892, "grad_norm": 0.4787222445011139, "learning_rate": 6.614502104924032e-05, "loss": 0.4716, "step": 51070 }, { "epoch": 12.643564356435643, "grad_norm": 0.45964938402175903, "learning_rate": 6.613197961711881e-05, "loss": 0.4649, "step": 51080 }, { "epoch": 12.646039603960396, "grad_norm": 0.48768919706344604, "learning_rate": 6.611893695987993e-05, "loss": 0.4683, "step": 51090 }, { "epoch": 12.648514851485148, "grad_norm": 0.4806927442550659, "learning_rate": 6.610589307851414e-05, "loss": 0.473, "step": 51100 }, { "epoch": 12.650990099009901, "grad_norm": 0.4640361964702606, "learning_rate": 6.609284797401207e-05, "loss": 0.4764, "step": 51110 }, { "epoch": 12.653465346534654, "grad_norm": 0.467386394739151, "learning_rate": 6.60798016473644e-05, "loss": 0.4702, "step": 51120 }, { "epoch": 12.655940594059405, "grad_norm": 0.4898168444633484, "learning_rate": 6.606675409956192e-05, "loss": 0.4728, "step": 51130 }, { "epoch": 12.658415841584159, "grad_norm": 0.4561639428138733, "learning_rate": 6.60537053315955e-05, "loss": 0.4738, "step": 51140 }, { "epoch": 12.660891089108912, "grad_norm": 0.4752826690673828, "learning_rate": 6.60406553444561e-05, "loss": 0.4666, "step": 51150 }, { "epoch": 12.663366336633663, "grad_norm": 0.46557581424713135, "learning_rate": 6.602760413913477e-05, "loss": 0.4712, "step": 51160 }, { "epoch": 12.665841584158416, "grad_norm": 0.4746409058570862, "learning_rate": 6.601455171662271e-05, "loss": 0.4686, "step": 51170 }, { "epoch": 12.668316831683168, "grad_norm": 0.48293137550354004, "learning_rate": 6.600149807791109e-05, "loss": 0.4696, "step": 51180 }, { "epoch": 12.67079207920792, "grad_norm": 0.5040169358253479, "learning_rate": 6.598844322399134e-05, "loss": 0.4629, "step": 51190 }, { "epoch": 12.673267326732674, "grad_norm": 0.482746422290802, "learning_rate": 6.597538715585481e-05, "loss": 0.4755, "step": 51200 }, { "epoch": 12.675742574257425, "grad_norm": 0.45326584577560425, "learning_rate": 6.596232987449307e-05, "loss": 0.4705, "step": 51210 }, { "epoch": 12.678217821782178, "grad_norm": 0.48888498544692993, "learning_rate": 6.594927138089771e-05, "loss": 0.4671, "step": 51220 }, { "epoch": 12.680693069306932, "grad_norm": 0.4950842261314392, "learning_rate": 6.593621167606044e-05, "loss": 0.4676, "step": 51230 }, { "epoch": 12.683168316831683, "grad_norm": 0.47751036286354065, "learning_rate": 6.592315076097307e-05, "loss": 0.474, "step": 51240 }, { "epoch": 12.685643564356436, "grad_norm": 0.49840351939201355, "learning_rate": 6.591008863662748e-05, "loss": 0.4689, "step": 51250 }, { "epoch": 12.688118811881187, "grad_norm": 0.49990957975387573, "learning_rate": 6.589702530401566e-05, "loss": 0.4685, "step": 51260 }, { "epoch": 12.69059405940594, "grad_norm": 0.5047640204429626, "learning_rate": 6.588396076412968e-05, "loss": 0.4682, "step": 51270 }, { "epoch": 12.693069306930694, "grad_norm": 0.48168885707855225, "learning_rate": 6.58708950179617e-05, "loss": 0.4709, "step": 51280 }, { "epoch": 12.695544554455445, "grad_norm": 0.47155314683914185, "learning_rate": 6.585782806650397e-05, "loss": 0.4704, "step": 51290 }, { "epoch": 12.698019801980198, "grad_norm": 0.5067349076271057, "learning_rate": 6.584475991074887e-05, "loss": 0.4679, "step": 51300 }, { "epoch": 12.700495049504951, "grad_norm": 0.47530147433280945, "learning_rate": 6.583169055168883e-05, "loss": 0.4709, "step": 51310 }, { "epoch": 12.702970297029703, "grad_norm": 0.5126628875732422, "learning_rate": 6.581861999031636e-05, "loss": 0.4684, "step": 51320 }, { "epoch": 12.705445544554456, "grad_norm": 0.5028371214866638, "learning_rate": 6.580554822762408e-05, "loss": 0.475, "step": 51330 }, { "epoch": 12.707920792079207, "grad_norm": 0.4813506305217743, "learning_rate": 6.579247526460475e-05, "loss": 0.4646, "step": 51340 }, { "epoch": 12.71039603960396, "grad_norm": 0.47719526290893555, "learning_rate": 6.577940110225113e-05, "loss": 0.4674, "step": 51350 }, { "epoch": 12.712871287128714, "grad_norm": 0.4845878779888153, "learning_rate": 6.576632574155613e-05, "loss": 0.4646, "step": 51360 }, { "epoch": 12.715346534653465, "grad_norm": 0.47501063346862793, "learning_rate": 6.575324918351274e-05, "loss": 0.4698, "step": 51370 }, { "epoch": 12.717821782178218, "grad_norm": 0.5041617751121521, "learning_rate": 6.574017142911405e-05, "loss": 0.474, "step": 51380 }, { "epoch": 12.72029702970297, "grad_norm": 0.4831080138683319, "learning_rate": 6.572709247935318e-05, "loss": 0.4663, "step": 51390 }, { "epoch": 12.722772277227723, "grad_norm": 0.5475236177444458, "learning_rate": 6.571401233522345e-05, "loss": 0.4714, "step": 51400 }, { "epoch": 12.725247524752476, "grad_norm": 0.4577425420284271, "learning_rate": 6.570093099771819e-05, "loss": 0.4707, "step": 51410 }, { "epoch": 12.727722772277227, "grad_norm": 0.49457767605781555, "learning_rate": 6.568784846783082e-05, "loss": 0.4682, "step": 51420 }, { "epoch": 12.73019801980198, "grad_norm": 0.49432969093322754, "learning_rate": 6.567476474655491e-05, "loss": 0.4711, "step": 51430 }, { "epoch": 12.732673267326733, "grad_norm": 0.4743878245353699, "learning_rate": 6.566167983488405e-05, "loss": 0.4762, "step": 51440 }, { "epoch": 12.735148514851485, "grad_norm": 0.4748997688293457, "learning_rate": 6.564859373381199e-05, "loss": 0.4681, "step": 51450 }, { "epoch": 12.737623762376238, "grad_norm": 0.507167637348175, "learning_rate": 6.563550644433248e-05, "loss": 0.4699, "step": 51460 }, { "epoch": 12.740099009900991, "grad_norm": 0.4983545243740082, "learning_rate": 6.562241796743944e-05, "loss": 0.4699, "step": 51470 }, { "epoch": 12.742574257425742, "grad_norm": 0.5130771994590759, "learning_rate": 6.560932830412685e-05, "loss": 0.4732, "step": 51480 }, { "epoch": 12.745049504950495, "grad_norm": 0.45693421363830566, "learning_rate": 6.559623745538881e-05, "loss": 0.4659, "step": 51490 }, { "epoch": 12.747524752475247, "grad_norm": 0.4658326506614685, "learning_rate": 6.558314542221944e-05, "loss": 0.4701, "step": 51500 }, { "epoch": 12.75, "grad_norm": 0.4983939826488495, "learning_rate": 6.557005220561304e-05, "loss": 0.4737, "step": 51510 }, { "epoch": 12.752475247524753, "grad_norm": 0.5125213861465454, "learning_rate": 6.555695780656391e-05, "loss": 0.4724, "step": 51520 }, { "epoch": 12.754950495049505, "grad_norm": 0.43958622217178345, "learning_rate": 6.554386222606651e-05, "loss": 0.4713, "step": 51530 }, { "epoch": 12.757425742574258, "grad_norm": 0.4523855447769165, "learning_rate": 6.553076546511536e-05, "loss": 0.4675, "step": 51540 }, { "epoch": 12.759900990099009, "grad_norm": 0.4479128420352936, "learning_rate": 6.551766752470506e-05, "loss": 0.4736, "step": 51550 }, { "epoch": 12.762376237623762, "grad_norm": 0.4777299165725708, "learning_rate": 6.55045684058303e-05, "loss": 0.4655, "step": 51560 }, { "epoch": 12.764851485148515, "grad_norm": 0.458258718252182, "learning_rate": 6.54914681094859e-05, "loss": 0.4671, "step": 51570 }, { "epoch": 12.767326732673267, "grad_norm": 0.4302176535129547, "learning_rate": 6.547836663666674e-05, "loss": 0.4718, "step": 51580 }, { "epoch": 12.76980198019802, "grad_norm": 0.4540048837661743, "learning_rate": 6.546526398836778e-05, "loss": 0.467, "step": 51590 }, { "epoch": 12.772277227722773, "grad_norm": 0.4808363616466522, "learning_rate": 6.545216016558409e-05, "loss": 0.4687, "step": 51600 }, { "epoch": 12.774752475247524, "grad_norm": 0.4762725532054901, "learning_rate": 6.543905516931082e-05, "loss": 0.4685, "step": 51610 }, { "epoch": 12.777227722772277, "grad_norm": 0.5191477537155151, "learning_rate": 6.542594900054318e-05, "loss": 0.4704, "step": 51620 }, { "epoch": 12.77970297029703, "grad_norm": 0.4692723751068115, "learning_rate": 6.54128416602765e-05, "loss": 0.4674, "step": 51630 }, { "epoch": 12.782178217821782, "grad_norm": 0.4665762186050415, "learning_rate": 6.539973314950625e-05, "loss": 0.4681, "step": 51640 }, { "epoch": 12.784653465346535, "grad_norm": 0.4652264416217804, "learning_rate": 6.538662346922786e-05, "loss": 0.4743, "step": 51650 }, { "epoch": 12.787128712871286, "grad_norm": 0.474397212266922, "learning_rate": 6.5373512620437e-05, "loss": 0.466, "step": 51660 }, { "epoch": 12.78960396039604, "grad_norm": 0.4747855067253113, "learning_rate": 6.536040060412928e-05, "loss": 0.4694, "step": 51670 }, { "epoch": 12.792079207920793, "grad_norm": 0.4759570360183716, "learning_rate": 6.534728742130051e-05, "loss": 0.472, "step": 51680 }, { "epoch": 12.794554455445544, "grad_norm": 0.4699491858482361, "learning_rate": 6.533417307294655e-05, "loss": 0.4716, "step": 51690 }, { "epoch": 12.797029702970297, "grad_norm": 0.47584909200668335, "learning_rate": 6.532105756006334e-05, "loss": 0.4706, "step": 51700 }, { "epoch": 12.799504950495049, "grad_norm": 0.5292508006095886, "learning_rate": 6.530794088364692e-05, "loss": 0.4698, "step": 51710 }, { "epoch": 12.801980198019802, "grad_norm": 0.5163838863372803, "learning_rate": 6.529482304469341e-05, "loss": 0.4633, "step": 51720 }, { "epoch": 12.804455445544555, "grad_norm": 0.5026618838310242, "learning_rate": 6.528170404419904e-05, "loss": 0.4673, "step": 51730 }, { "epoch": 12.806930693069306, "grad_norm": 0.483222633600235, "learning_rate": 6.526858388316007e-05, "loss": 0.4642, "step": 51740 }, { "epoch": 12.80940594059406, "grad_norm": 0.48721933364868164, "learning_rate": 6.525546256257295e-05, "loss": 0.4666, "step": 51750 }, { "epoch": 12.811881188118813, "grad_norm": 0.488550066947937, "learning_rate": 6.524234008343411e-05, "loss": 0.4672, "step": 51760 }, { "epoch": 12.814356435643564, "grad_norm": 0.45085620880126953, "learning_rate": 6.522921644674012e-05, "loss": 0.4695, "step": 51770 }, { "epoch": 12.816831683168317, "grad_norm": 0.47165971994400024, "learning_rate": 6.521609165348766e-05, "loss": 0.4692, "step": 51780 }, { "epoch": 12.819306930693068, "grad_norm": 0.4840282201766968, "learning_rate": 6.520296570467346e-05, "loss": 0.4643, "step": 51790 }, { "epoch": 12.821782178217822, "grad_norm": 0.487420916557312, "learning_rate": 6.518983860129432e-05, "loss": 0.4696, "step": 51800 }, { "epoch": 12.824257425742575, "grad_norm": 0.4970153570175171, "learning_rate": 6.517671034434723e-05, "loss": 0.4741, "step": 51810 }, { "epoch": 12.826732673267326, "grad_norm": 0.5161300897598267, "learning_rate": 6.51635809348291e-05, "loss": 0.4627, "step": 51820 }, { "epoch": 12.82920792079208, "grad_norm": 0.4906443655490875, "learning_rate": 6.51504503737371e-05, "loss": 0.4719, "step": 51830 }, { "epoch": 12.831683168316832, "grad_norm": 0.46789050102233887, "learning_rate": 6.513731866206835e-05, "loss": 0.4713, "step": 51840 }, { "epoch": 12.834158415841584, "grad_norm": 0.5094728469848633, "learning_rate": 6.512418580082019e-05, "loss": 0.4657, "step": 51850 }, { "epoch": 12.836633663366337, "grad_norm": 0.5042576193809509, "learning_rate": 6.511105179098988e-05, "loss": 0.4646, "step": 51860 }, { "epoch": 12.839108910891088, "grad_norm": 0.46671798825263977, "learning_rate": 6.509791663357493e-05, "loss": 0.4754, "step": 51870 }, { "epoch": 12.841584158415841, "grad_norm": 0.462789386510849, "learning_rate": 6.508478032957287e-05, "loss": 0.4734, "step": 51880 }, { "epoch": 12.844059405940595, "grad_norm": 0.4657549262046814, "learning_rate": 6.507164287998125e-05, "loss": 0.4654, "step": 51890 }, { "epoch": 12.846534653465346, "grad_norm": 0.4778996407985687, "learning_rate": 6.505850428579785e-05, "loss": 0.4655, "step": 51900 }, { "epoch": 12.849009900990099, "grad_norm": 0.47199273109436035, "learning_rate": 6.504536454802043e-05, "loss": 0.4704, "step": 51910 }, { "epoch": 12.851485148514852, "grad_norm": 0.5160977840423584, "learning_rate": 6.503222366764685e-05, "loss": 0.4645, "step": 51920 }, { "epoch": 12.853960396039604, "grad_norm": 0.48086124658584595, "learning_rate": 6.501908164567509e-05, "loss": 0.4698, "step": 51930 }, { "epoch": 12.856435643564357, "grad_norm": 0.49894675612449646, "learning_rate": 6.500593848310319e-05, "loss": 0.4691, "step": 51940 }, { "epoch": 12.858910891089108, "grad_norm": 0.49830421805381775, "learning_rate": 6.499279418092929e-05, "loss": 0.4736, "step": 51950 }, { "epoch": 12.861386138613861, "grad_norm": 0.4910685420036316, "learning_rate": 6.497964874015165e-05, "loss": 0.4767, "step": 51960 }, { "epoch": 12.863861386138614, "grad_norm": 0.47853824496269226, "learning_rate": 6.496650216176851e-05, "loss": 0.4698, "step": 51970 }, { "epoch": 12.866336633663366, "grad_norm": 0.4636903703212738, "learning_rate": 6.49533544467783e-05, "loss": 0.4687, "step": 51980 }, { "epoch": 12.868811881188119, "grad_norm": 0.4719599485397339, "learning_rate": 6.494020559617952e-05, "loss": 0.4684, "step": 51990 }, { "epoch": 12.871287128712872, "grad_norm": 0.4794366955757141, "learning_rate": 6.492705561097073e-05, "loss": 0.4725, "step": 52000 }, { "epoch": 12.873762376237623, "grad_norm": 0.5396580696105957, "learning_rate": 6.491390449215055e-05, "loss": 0.4715, "step": 52010 }, { "epoch": 12.876237623762377, "grad_norm": 0.4727857708930969, "learning_rate": 6.490075224071776e-05, "loss": 0.4702, "step": 52020 }, { "epoch": 12.878712871287128, "grad_norm": 0.5049969553947449, "learning_rate": 6.488759885767117e-05, "loss": 0.4678, "step": 52030 }, { "epoch": 12.881188118811881, "grad_norm": 0.4559984803199768, "learning_rate": 6.48744443440097e-05, "loss": 0.4699, "step": 52040 }, { "epoch": 12.883663366336634, "grad_norm": 0.45987921953201294, "learning_rate": 6.486128870073233e-05, "loss": 0.4701, "step": 52050 }, { "epoch": 12.886138613861386, "grad_norm": 0.4779287576675415, "learning_rate": 6.484813192883819e-05, "loss": 0.467, "step": 52060 }, { "epoch": 12.888613861386139, "grad_norm": 0.49080511927604675, "learning_rate": 6.48349740293264e-05, "loss": 0.4717, "step": 52070 }, { "epoch": 12.891089108910892, "grad_norm": 0.4519059658050537, "learning_rate": 6.482181500319625e-05, "loss": 0.4681, "step": 52080 }, { "epoch": 12.893564356435643, "grad_norm": 0.4672601819038391, "learning_rate": 6.480865485144704e-05, "loss": 0.47, "step": 52090 }, { "epoch": 12.896039603960396, "grad_norm": 0.49581968784332275, "learning_rate": 6.479549357507823e-05, "loss": 0.4691, "step": 52100 }, { "epoch": 12.898514851485148, "grad_norm": 0.46031197905540466, "learning_rate": 6.478233117508934e-05, "loss": 0.4717, "step": 52110 }, { "epoch": 12.900990099009901, "grad_norm": 0.4565674066543579, "learning_rate": 6.476916765247993e-05, "loss": 0.471, "step": 52120 }, { "epoch": 12.903465346534654, "grad_norm": 0.4817519783973694, "learning_rate": 6.475600300824974e-05, "loss": 0.4679, "step": 52130 }, { "epoch": 12.905940594059405, "grad_norm": 0.525933563709259, "learning_rate": 6.474283724339847e-05, "loss": 0.4667, "step": 52140 }, { "epoch": 12.908415841584159, "grad_norm": 0.4771799147129059, "learning_rate": 6.472967035892603e-05, "loss": 0.4715, "step": 52150 }, { "epoch": 12.910891089108912, "grad_norm": 0.4906778037548065, "learning_rate": 6.471650235583232e-05, "loss": 0.4735, "step": 52160 }, { "epoch": 12.913366336633663, "grad_norm": 0.5053895711898804, "learning_rate": 6.470333323511736e-05, "loss": 0.4705, "step": 52170 }, { "epoch": 12.915841584158416, "grad_norm": 0.5137230753898621, "learning_rate": 6.469016299778131e-05, "loss": 0.4722, "step": 52180 }, { "epoch": 12.918316831683168, "grad_norm": 0.491201251745224, "learning_rate": 6.467699164482428e-05, "loss": 0.4679, "step": 52190 }, { "epoch": 12.92079207920792, "grad_norm": 0.46958115696907043, "learning_rate": 6.466381917724664e-05, "loss": 0.4663, "step": 52200 }, { "epoch": 12.923267326732674, "grad_norm": 0.465714693069458, "learning_rate": 6.465064559604868e-05, "loss": 0.4693, "step": 52210 }, { "epoch": 12.925742574257425, "grad_norm": 0.4633512794971466, "learning_rate": 6.463747090223088e-05, "loss": 0.4713, "step": 52220 }, { "epoch": 12.928217821782178, "grad_norm": 0.4828576445579529, "learning_rate": 6.462429509679376e-05, "loss": 0.4757, "step": 52230 }, { "epoch": 12.930693069306932, "grad_norm": 0.49307137727737427, "learning_rate": 6.461111818073795e-05, "loss": 0.4729, "step": 52240 }, { "epoch": 12.933168316831683, "grad_norm": 0.4342295825481415, "learning_rate": 6.459794015506414e-05, "loss": 0.4699, "step": 52250 }, { "epoch": 12.935643564356436, "grad_norm": 0.48528602719306946, "learning_rate": 6.458476102077311e-05, "loss": 0.4646, "step": 52260 }, { "epoch": 12.938118811881187, "grad_norm": 0.4483453035354614, "learning_rate": 6.457158077886572e-05, "loss": 0.4729, "step": 52270 }, { "epoch": 12.94059405940594, "grad_norm": 0.4531427025794983, "learning_rate": 6.455839943034294e-05, "loss": 0.4662, "step": 52280 }, { "epoch": 12.943069306930694, "grad_norm": 0.4355674088001251, "learning_rate": 6.45452169762058e-05, "loss": 0.4701, "step": 52290 }, { "epoch": 12.945544554455445, "grad_norm": 0.46486300230026245, "learning_rate": 6.453203341745543e-05, "loss": 0.4669, "step": 52300 }, { "epoch": 12.948019801980198, "grad_norm": 0.4481797218322754, "learning_rate": 6.451884875509302e-05, "loss": 0.4663, "step": 52310 }, { "epoch": 12.950495049504951, "grad_norm": 0.45659247040748596, "learning_rate": 6.450566299011984e-05, "loss": 0.4691, "step": 52320 }, { "epoch": 12.952970297029703, "grad_norm": 0.49492529034614563, "learning_rate": 6.449247612353733e-05, "loss": 0.4703, "step": 52330 }, { "epoch": 12.955445544554456, "grad_norm": 0.5016133785247803, "learning_rate": 6.447928815634686e-05, "loss": 0.4739, "step": 52340 }, { "epoch": 12.957920792079207, "grad_norm": 0.4984807074069977, "learning_rate": 6.446609908955003e-05, "loss": 0.4722, "step": 52350 }, { "epoch": 12.96039603960396, "grad_norm": 0.4527847468852997, "learning_rate": 6.445290892414844e-05, "loss": 0.472, "step": 52360 }, { "epoch": 12.962871287128714, "grad_norm": 0.49998894333839417, "learning_rate": 6.44397176611438e-05, "loss": 0.4694, "step": 52370 }, { "epoch": 12.965346534653465, "grad_norm": 0.4713253080844879, "learning_rate": 6.442652530153789e-05, "loss": 0.4695, "step": 52380 }, { "epoch": 12.967821782178218, "grad_norm": 0.4433077275753021, "learning_rate": 6.441333184633261e-05, "loss": 0.4665, "step": 52390 }, { "epoch": 12.97029702970297, "grad_norm": 0.46584293246269226, "learning_rate": 6.440013729652988e-05, "loss": 0.4664, "step": 52400 }, { "epoch": 12.972772277227723, "grad_norm": 0.4552637040615082, "learning_rate": 6.438694165313175e-05, "loss": 0.4679, "step": 52410 }, { "epoch": 12.975247524752476, "grad_norm": 0.5189120173454285, "learning_rate": 6.437374491714033e-05, "loss": 0.473, "step": 52420 }, { "epoch": 12.977722772277227, "grad_norm": 0.47552254796028137, "learning_rate": 6.436054708955787e-05, "loss": 0.4728, "step": 52430 }, { "epoch": 12.98019801980198, "grad_norm": 0.4853539764881134, "learning_rate": 6.434734817138662e-05, "loss": 0.471, "step": 52440 }, { "epoch": 12.982673267326733, "grad_norm": 0.4643867015838623, "learning_rate": 6.433414816362897e-05, "loss": 0.4733, "step": 52450 }, { "epoch": 12.985148514851485, "grad_norm": 0.4743245244026184, "learning_rate": 6.432094706728737e-05, "loss": 0.4687, "step": 52460 }, { "epoch": 12.987623762376238, "grad_norm": 0.5348871946334839, "learning_rate": 6.430774488336432e-05, "loss": 0.4657, "step": 52470 }, { "epoch": 12.990099009900991, "grad_norm": 0.504967451095581, "learning_rate": 6.42945416128625e-05, "loss": 0.4688, "step": 52480 }, { "epoch": 12.992574257425742, "grad_norm": 0.5061749815940857, "learning_rate": 6.428133725678456e-05, "loss": 0.4734, "step": 52490 }, { "epoch": 12.995049504950495, "grad_norm": 0.4753495454788208, "learning_rate": 6.426813181613333e-05, "loss": 0.4728, "step": 52500 }, { "epoch": 12.997524752475247, "grad_norm": 0.4610406160354614, "learning_rate": 6.425492529191161e-05, "loss": 0.4734, "step": 52510 }, { "epoch": 13.0, "grad_norm": 0.45067694783210754, "learning_rate": 6.424171768512243e-05, "loss": 0.4703, "step": 52520 }, { "epoch": 13.002475247524753, "grad_norm": 0.4936497211456299, "learning_rate": 6.422850899676876e-05, "loss": 0.4648, "step": 52530 }, { "epoch": 13.004950495049505, "grad_norm": 0.4597608745098114, "learning_rate": 6.421529922785373e-05, "loss": 0.468, "step": 52540 }, { "epoch": 13.007425742574258, "grad_norm": 0.45992425084114075, "learning_rate": 6.420208837938053e-05, "loss": 0.4696, "step": 52550 }, { "epoch": 13.009900990099009, "grad_norm": 0.5167187452316284, "learning_rate": 6.418887645235246e-05, "loss": 0.4657, "step": 52560 }, { "epoch": 13.012376237623762, "grad_norm": 0.47264906764030457, "learning_rate": 6.417566344777285e-05, "loss": 0.4688, "step": 52570 }, { "epoch": 13.014851485148515, "grad_norm": 0.493240088224411, "learning_rate": 6.416244936664514e-05, "loss": 0.47, "step": 52580 }, { "epoch": 13.017326732673267, "grad_norm": 0.4820718467235565, "learning_rate": 6.414923420997288e-05, "loss": 0.472, "step": 52590 }, { "epoch": 13.01980198019802, "grad_norm": 0.4827084541320801, "learning_rate": 6.413601797875966e-05, "loss": 0.4694, "step": 52600 }, { "epoch": 13.022277227722773, "grad_norm": 0.482381671667099, "learning_rate": 6.412280067400917e-05, "loss": 0.4782, "step": 52610 }, { "epoch": 13.024752475247524, "grad_norm": 0.4516582489013672, "learning_rate": 6.410958229672517e-05, "loss": 0.4676, "step": 52620 }, { "epoch": 13.027227722772277, "grad_norm": 0.485466331243515, "learning_rate": 6.40963628479115e-05, "loss": 0.4678, "step": 52630 }, { "epoch": 13.029702970297029, "grad_norm": 0.49469318985939026, "learning_rate": 6.40831423285721e-05, "loss": 0.4746, "step": 52640 }, { "epoch": 13.032178217821782, "grad_norm": 0.47022902965545654, "learning_rate": 6.406992073971098e-05, "loss": 0.4693, "step": 52650 }, { "epoch": 13.034653465346535, "grad_norm": 0.4828602075576782, "learning_rate": 6.405669808233224e-05, "loss": 0.4658, "step": 52660 }, { "epoch": 13.037128712871286, "grad_norm": 0.45929601788520813, "learning_rate": 6.404347435744004e-05, "loss": 0.4666, "step": 52670 }, { "epoch": 13.03960396039604, "grad_norm": 0.45154091715812683, "learning_rate": 6.403024956603865e-05, "loss": 0.4663, "step": 52680 }, { "epoch": 13.042079207920793, "grad_norm": 0.4892207384109497, "learning_rate": 6.401702370913239e-05, "loss": 0.4704, "step": 52690 }, { "epoch": 13.044554455445544, "grad_norm": 0.46108222007751465, "learning_rate": 6.400379678772569e-05, "loss": 0.4694, "step": 52700 }, { "epoch": 13.047029702970297, "grad_norm": 0.44023165106773376, "learning_rate": 6.399056880282304e-05, "loss": 0.4728, "step": 52710 }, { "epoch": 13.049504950495049, "grad_norm": 0.48762327432632446, "learning_rate": 6.3977339755429e-05, "loss": 0.4697, "step": 52720 }, { "epoch": 13.051980198019802, "grad_norm": 0.47786858677864075, "learning_rate": 6.396410964654827e-05, "loss": 0.4656, "step": 52730 }, { "epoch": 13.054455445544555, "grad_norm": 0.4956514537334442, "learning_rate": 6.395087847718556e-05, "loss": 0.4669, "step": 52740 }, { "epoch": 13.056930693069306, "grad_norm": 0.4887917637825012, "learning_rate": 6.393764624834571e-05, "loss": 0.4708, "step": 52750 }, { "epoch": 13.05940594059406, "grad_norm": 0.5147301554679871, "learning_rate": 6.392441296103358e-05, "loss": 0.4682, "step": 52760 }, { "epoch": 13.061881188118813, "grad_norm": 0.4932542145252228, "learning_rate": 6.391117861625421e-05, "loss": 0.4708, "step": 52770 }, { "epoch": 13.064356435643564, "grad_norm": 0.4637851417064667, "learning_rate": 6.38979432150126e-05, "loss": 0.4685, "step": 52780 }, { "epoch": 13.066831683168317, "grad_norm": 0.4754749834537506, "learning_rate": 6.388470675831394e-05, "loss": 0.4659, "step": 52790 }, { "epoch": 13.069306930693068, "grad_norm": 0.5058633685112, "learning_rate": 6.387146924716344e-05, "loss": 0.4687, "step": 52800 }, { "epoch": 13.071782178217822, "grad_norm": 0.4779832363128662, "learning_rate": 6.385823068256638e-05, "loss": 0.4665, "step": 52810 }, { "epoch": 13.074257425742575, "grad_norm": 0.5012484788894653, "learning_rate": 6.384499106552817e-05, "loss": 0.467, "step": 52820 }, { "epoch": 13.076732673267326, "grad_norm": 0.4673083424568176, "learning_rate": 6.383175039705423e-05, "loss": 0.4698, "step": 52830 }, { "epoch": 13.07920792079208, "grad_norm": 0.4931451082229614, "learning_rate": 6.381850867815016e-05, "loss": 0.4726, "step": 52840 }, { "epoch": 13.081683168316832, "grad_norm": 0.4651638865470886, "learning_rate": 6.380526590982154e-05, "loss": 0.4685, "step": 52850 }, { "epoch": 13.084158415841584, "grad_norm": 0.4800644516944885, "learning_rate": 6.379202209307406e-05, "loss": 0.4673, "step": 52860 }, { "epoch": 13.086633663366337, "grad_norm": 0.4728248715400696, "learning_rate": 6.377877722891354e-05, "loss": 0.471, "step": 52870 }, { "epoch": 13.089108910891088, "grad_norm": 0.48753219842910767, "learning_rate": 6.376553131834582e-05, "loss": 0.4674, "step": 52880 }, { "epoch": 13.091584158415841, "grad_norm": 0.45714667439460754, "learning_rate": 6.375228436237683e-05, "loss": 0.4692, "step": 52890 }, { "epoch": 13.094059405940595, "grad_norm": 0.4868590831756592, "learning_rate": 6.373903636201262e-05, "loss": 0.4718, "step": 52900 }, { "epoch": 13.096534653465346, "grad_norm": 0.46267661452293396, "learning_rate": 6.372578731825927e-05, "loss": 0.4731, "step": 52910 }, { "epoch": 13.099009900990099, "grad_norm": 0.4798213243484497, "learning_rate": 6.371253723212294e-05, "loss": 0.4655, "step": 52920 }, { "epoch": 13.101485148514852, "grad_norm": 0.4910077154636383, "learning_rate": 6.369928610460991e-05, "loss": 0.4656, "step": 52930 }, { "epoch": 13.103960396039604, "grad_norm": 0.4586970806121826, "learning_rate": 6.36860339367265e-05, "loss": 0.4729, "step": 52940 }, { "epoch": 13.106435643564357, "grad_norm": 0.4657965898513794, "learning_rate": 6.367278072947914e-05, "loss": 0.4736, "step": 52950 }, { "epoch": 13.108910891089108, "grad_norm": 0.49795883893966675, "learning_rate": 6.365952648387431e-05, "loss": 0.4669, "step": 52960 }, { "epoch": 13.111386138613861, "grad_norm": 0.48594164848327637, "learning_rate": 6.36462712009186e-05, "loss": 0.4703, "step": 52970 }, { "epoch": 13.113861386138614, "grad_norm": 0.4529670476913452, "learning_rate": 6.363301488161863e-05, "loss": 0.4678, "step": 52980 }, { "epoch": 13.116336633663366, "grad_norm": 0.4621922969818115, "learning_rate": 6.361975752698117e-05, "loss": 0.4662, "step": 52990 }, { "epoch": 13.118811881188119, "grad_norm": 0.439414918422699, "learning_rate": 6.3606499138013e-05, "loss": 0.4649, "step": 53000 }, { "epoch": 13.121287128712872, "grad_norm": 0.46249693632125854, "learning_rate": 6.359323971572101e-05, "loss": 0.4693, "step": 53010 }, { "epoch": 13.123762376237623, "grad_norm": 0.4642597734928131, "learning_rate": 6.357997926111219e-05, "loss": 0.469, "step": 53020 }, { "epoch": 13.126237623762377, "grad_norm": 0.5124688744544983, "learning_rate": 6.356671777519354e-05, "loss": 0.4685, "step": 53030 }, { "epoch": 13.128712871287128, "grad_norm": 0.46130847930908203, "learning_rate": 6.355345525897222e-05, "loss": 0.47, "step": 53040 }, { "epoch": 13.131188118811881, "grad_norm": 0.4831077754497528, "learning_rate": 6.35401917134554e-05, "loss": 0.467, "step": 53050 }, { "epoch": 13.133663366336634, "grad_norm": 0.48014554381370544, "learning_rate": 6.35269271396504e-05, "loss": 0.4706, "step": 53060 }, { "epoch": 13.136138613861386, "grad_norm": 0.46501004695892334, "learning_rate": 6.351366153856456e-05, "loss": 0.4704, "step": 53070 }, { "epoch": 13.138613861386139, "grad_norm": 0.47090646624565125, "learning_rate": 6.35003949112053e-05, "loss": 0.476, "step": 53080 }, { "epoch": 13.141089108910892, "grad_norm": 0.4873649477958679, "learning_rate": 6.348712725858014e-05, "loss": 0.4668, "step": 53090 }, { "epoch": 13.143564356435643, "grad_norm": 0.47511792182922363, "learning_rate": 6.347385858169667e-05, "loss": 0.4701, "step": 53100 }, { "epoch": 13.146039603960396, "grad_norm": 0.4601670503616333, "learning_rate": 6.346058888156254e-05, "loss": 0.4733, "step": 53110 }, { "epoch": 13.148514851485148, "grad_norm": 0.4518653452396393, "learning_rate": 6.344731815918555e-05, "loss": 0.4705, "step": 53120 }, { "epoch": 13.150990099009901, "grad_norm": 0.4970157742500305, "learning_rate": 6.343404641557348e-05, "loss": 0.4746, "step": 53130 }, { "epoch": 13.153465346534654, "grad_norm": 0.49724292755126953, "learning_rate": 6.342077365173423e-05, "loss": 0.47, "step": 53140 }, { "epoch": 13.155940594059405, "grad_norm": 0.4701465368270874, "learning_rate": 6.34074998686758e-05, "loss": 0.4676, "step": 53150 }, { "epoch": 13.158415841584159, "grad_norm": 0.46279704570770264, "learning_rate": 6.339422506740624e-05, "loss": 0.4692, "step": 53160 }, { "epoch": 13.160891089108912, "grad_norm": 0.49440252780914307, "learning_rate": 6.338094924893367e-05, "loss": 0.4666, "step": 53170 }, { "epoch": 13.163366336633663, "grad_norm": 0.4507003128528595, "learning_rate": 6.336767241426632e-05, "loss": 0.471, "step": 53180 }, { "epoch": 13.165841584158416, "grad_norm": 0.47753554582595825, "learning_rate": 6.335439456441248e-05, "loss": 0.4632, "step": 53190 }, { "epoch": 13.168316831683168, "grad_norm": 0.457916796207428, "learning_rate": 6.334111570038049e-05, "loss": 0.4679, "step": 53200 }, { "epoch": 13.17079207920792, "grad_norm": 0.48553603887557983, "learning_rate": 6.33278358231788e-05, "loss": 0.4664, "step": 53210 }, { "epoch": 13.173267326732674, "grad_norm": 0.4706002175807953, "learning_rate": 6.331455493381594e-05, "loss": 0.471, "step": 53220 }, { "epoch": 13.175742574257425, "grad_norm": 0.4546787738800049, "learning_rate": 6.330127303330051e-05, "loss": 0.4736, "step": 53230 }, { "epoch": 13.178217821782178, "grad_norm": 0.43539485335350037, "learning_rate": 6.328799012264118e-05, "loss": 0.4657, "step": 53240 }, { "epoch": 13.180693069306932, "grad_norm": 0.46779629588127136, "learning_rate": 6.327470620284667e-05, "loss": 0.4733, "step": 53250 }, { "epoch": 13.183168316831683, "grad_norm": 0.4775826334953308, "learning_rate": 6.326142127492582e-05, "loss": 0.474, "step": 53260 }, { "epoch": 13.185643564356436, "grad_norm": 0.47768884897232056, "learning_rate": 6.324813533988759e-05, "loss": 0.471, "step": 53270 }, { "epoch": 13.188118811881187, "grad_norm": 0.4716109335422516, "learning_rate": 6.323484839874087e-05, "loss": 0.4717, "step": 53280 }, { "epoch": 13.19059405940594, "grad_norm": 0.4461488127708435, "learning_rate": 6.322156045249477e-05, "loss": 0.4656, "step": 53290 }, { "epoch": 13.193069306930694, "grad_norm": 0.4810808002948761, "learning_rate": 6.320827150215841e-05, "loss": 0.4677, "step": 53300 }, { "epoch": 13.195544554455445, "grad_norm": 0.5128520131111145, "learning_rate": 6.3194981548741e-05, "loss": 0.473, "step": 53310 }, { "epoch": 13.198019801980198, "grad_norm": 0.4928038418292999, "learning_rate": 6.31816905932518e-05, "loss": 0.4709, "step": 53320 }, { "epoch": 13.200495049504951, "grad_norm": 0.4979044497013092, "learning_rate": 6.31683986367002e-05, "loss": 0.4738, "step": 53330 }, { "epoch": 13.202970297029703, "grad_norm": 0.4634977877140045, "learning_rate": 6.315510568009564e-05, "loss": 0.4675, "step": 53340 }, { "epoch": 13.205445544554456, "grad_norm": 0.48832330107688904, "learning_rate": 6.314181172444762e-05, "loss": 0.4603, "step": 53350 }, { "epoch": 13.207920792079207, "grad_norm": 0.4733043611049652, "learning_rate": 6.312851677076573e-05, "loss": 0.4659, "step": 53360 }, { "epoch": 13.21039603960396, "grad_norm": 0.44581320881843567, "learning_rate": 6.311522082005962e-05, "loss": 0.4684, "step": 53370 }, { "epoch": 13.212871287128714, "grad_norm": 0.45498934388160706, "learning_rate": 6.310192387333904e-05, "loss": 0.464, "step": 53380 }, { "epoch": 13.215346534653465, "grad_norm": 0.44606176018714905, "learning_rate": 6.308862593161381e-05, "loss": 0.4706, "step": 53390 }, { "epoch": 13.217821782178218, "grad_norm": 0.4591412842273712, "learning_rate": 6.307532699589382e-05, "loss": 0.4682, "step": 53400 }, { "epoch": 13.220297029702971, "grad_norm": 0.45387589931488037, "learning_rate": 6.306202706718904e-05, "loss": 0.4698, "step": 53410 }, { "epoch": 13.222772277227723, "grad_norm": 0.4664760231971741, "learning_rate": 6.30487261465095e-05, "loss": 0.4707, "step": 53420 }, { "epoch": 13.225247524752476, "grad_norm": 0.4971325993537903, "learning_rate": 6.303542423486532e-05, "loss": 0.4727, "step": 53430 }, { "epoch": 13.227722772277227, "grad_norm": 0.5026951432228088, "learning_rate": 6.30221213332667e-05, "loss": 0.4696, "step": 53440 }, { "epoch": 13.23019801980198, "grad_norm": 0.4901789128780365, "learning_rate": 6.300881744272389e-05, "loss": 0.4738, "step": 53450 }, { "epoch": 13.232673267326733, "grad_norm": 0.4775807559490204, "learning_rate": 6.299551256424727e-05, "loss": 0.4745, "step": 53460 }, { "epoch": 13.235148514851485, "grad_norm": 0.46561235189437866, "learning_rate": 6.298220669884722e-05, "loss": 0.4712, "step": 53470 }, { "epoch": 13.237623762376238, "grad_norm": 0.4674564301967621, "learning_rate": 6.296889984753425e-05, "loss": 0.4687, "step": 53480 }, { "epoch": 13.240099009900991, "grad_norm": 0.4876158535480499, "learning_rate": 6.295559201131894e-05, "loss": 0.4725, "step": 53490 }, { "epoch": 13.242574257425742, "grad_norm": 0.4741060137748718, "learning_rate": 6.294228319121188e-05, "loss": 0.468, "step": 53500 }, { "epoch": 13.245049504950495, "grad_norm": 0.46822983026504517, "learning_rate": 6.292897338822386e-05, "loss": 0.4683, "step": 53510 }, { "epoch": 13.247524752475247, "grad_norm": 0.4624643623828888, "learning_rate": 6.29156626033656e-05, "loss": 0.4662, "step": 53520 }, { "epoch": 13.25, "grad_norm": 0.4571066200733185, "learning_rate": 6.290235083764801e-05, "loss": 0.4699, "step": 53530 }, { "epoch": 13.252475247524753, "grad_norm": 0.4672732651233673, "learning_rate": 6.288903809208205e-05, "loss": 0.4717, "step": 53540 }, { "epoch": 13.254950495049505, "grad_norm": 0.4974534213542938, "learning_rate": 6.287572436767868e-05, "loss": 0.468, "step": 53550 }, { "epoch": 13.257425742574258, "grad_norm": 0.4726722538471222, "learning_rate": 6.286240966544902e-05, "loss": 0.4688, "step": 53560 }, { "epoch": 13.259900990099009, "grad_norm": 0.46893712878227234, "learning_rate": 6.284909398640424e-05, "loss": 0.4712, "step": 53570 }, { "epoch": 13.262376237623762, "grad_norm": 0.48598021268844604, "learning_rate": 6.283577733155555e-05, "loss": 0.4712, "step": 53580 }, { "epoch": 13.264851485148515, "grad_norm": 0.4661098122596741, "learning_rate": 6.282245970191429e-05, "loss": 0.4723, "step": 53590 }, { "epoch": 13.267326732673267, "grad_norm": 0.4680236577987671, "learning_rate": 6.280914109849185e-05, "loss": 0.4667, "step": 53600 }, { "epoch": 13.26980198019802, "grad_norm": 0.5056700110435486, "learning_rate": 6.279582152229966e-05, "loss": 0.4695, "step": 53610 }, { "epoch": 13.272277227722773, "grad_norm": 0.48704105615615845, "learning_rate": 6.278250097434928e-05, "loss": 0.4696, "step": 53620 }, { "epoch": 13.274752475247524, "grad_norm": 0.5125346183776855, "learning_rate": 6.276917945565229e-05, "loss": 0.467, "step": 53630 }, { "epoch": 13.277227722772277, "grad_norm": 0.472478449344635, "learning_rate": 6.275585696722042e-05, "loss": 0.4672, "step": 53640 }, { "epoch": 13.27970297029703, "grad_norm": 0.5017993450164795, "learning_rate": 6.274253351006537e-05, "loss": 0.4696, "step": 53650 }, { "epoch": 13.282178217821782, "grad_norm": 0.4635014533996582, "learning_rate": 6.272920908519901e-05, "loss": 0.4708, "step": 53660 }, { "epoch": 13.284653465346535, "grad_norm": 0.4756684899330139, "learning_rate": 6.271588369363321e-05, "loss": 0.4669, "step": 53670 }, { "epoch": 13.287128712871286, "grad_norm": 0.44851821660995483, "learning_rate": 6.270255733637998e-05, "loss": 0.4739, "step": 53680 }, { "epoch": 13.28960396039604, "grad_norm": 0.4368901550769806, "learning_rate": 6.268923001445133e-05, "loss": 0.4671, "step": 53690 }, { "epoch": 13.292079207920793, "grad_norm": 0.4678645133972168, "learning_rate": 6.267590172885942e-05, "loss": 0.4686, "step": 53700 }, { "epoch": 13.294554455445544, "grad_norm": 0.4827016592025757, "learning_rate": 6.266257248061641e-05, "loss": 0.4717, "step": 53710 }, { "epoch": 13.297029702970297, "grad_norm": 0.49228084087371826, "learning_rate": 6.26492422707346e-05, "loss": 0.47, "step": 53720 }, { "epoch": 13.299504950495049, "grad_norm": 0.49218815565109253, "learning_rate": 6.263591110022631e-05, "loss": 0.4665, "step": 53730 }, { "epoch": 13.301980198019802, "grad_norm": 0.47797390818595886, "learning_rate": 6.262257897010395e-05, "loss": 0.4687, "step": 53740 }, { "epoch": 13.304455445544555, "grad_norm": 0.4662219285964966, "learning_rate": 6.260924588138005e-05, "loss": 0.4683, "step": 53750 }, { "epoch": 13.306930693069306, "grad_norm": 0.4747028052806854, "learning_rate": 6.259591183506711e-05, "loss": 0.4631, "step": 53760 }, { "epoch": 13.30940594059406, "grad_norm": 0.4794829487800598, "learning_rate": 6.258257683217781e-05, "loss": 0.4644, "step": 53770 }, { "epoch": 13.311881188118813, "grad_norm": 0.48709729313850403, "learning_rate": 6.256924087372482e-05, "loss": 0.4737, "step": 53780 }, { "epoch": 13.314356435643564, "grad_norm": 0.4566553831100464, "learning_rate": 6.255590396072095e-05, "loss": 0.4631, "step": 53790 }, { "epoch": 13.316831683168317, "grad_norm": 0.47148922085762024, "learning_rate": 6.254256609417902e-05, "loss": 0.4669, "step": 53800 }, { "epoch": 13.319306930693068, "grad_norm": 0.45744580030441284, "learning_rate": 6.252922727511199e-05, "loss": 0.4677, "step": 53810 }, { "epoch": 13.321782178217822, "grad_norm": 0.46828317642211914, "learning_rate": 6.251588750453282e-05, "loss": 0.4656, "step": 53820 }, { "epoch": 13.324257425742575, "grad_norm": 0.45330101251602173, "learning_rate": 6.25025467834546e-05, "loss": 0.4685, "step": 53830 }, { "epoch": 13.326732673267326, "grad_norm": 0.45085132122039795, "learning_rate": 6.248920511289046e-05, "loss": 0.468, "step": 53840 }, { "epoch": 13.32920792079208, "grad_norm": 0.4621696472167969, "learning_rate": 6.247586249385361e-05, "loss": 0.4688, "step": 53850 }, { "epoch": 13.331683168316832, "grad_norm": 0.4606682360172272, "learning_rate": 6.246251892735734e-05, "loss": 0.4702, "step": 53860 }, { "epoch": 13.334158415841584, "grad_norm": 0.459055632352829, "learning_rate": 6.2449174414415e-05, "loss": 0.4699, "step": 53870 }, { "epoch": 13.336633663366337, "grad_norm": 0.47116053104400635, "learning_rate": 6.243582895604002e-05, "loss": 0.4703, "step": 53880 }, { "epoch": 13.339108910891088, "grad_norm": 0.4778042733669281, "learning_rate": 6.24224825532459e-05, "loss": 0.4669, "step": 53890 }, { "epoch": 13.341584158415841, "grad_norm": 0.46863871812820435, "learning_rate": 6.240913520704621e-05, "loss": 0.4674, "step": 53900 }, { "epoch": 13.344059405940595, "grad_norm": 0.4251959025859833, "learning_rate": 6.239578691845461e-05, "loss": 0.4663, "step": 53910 }, { "epoch": 13.346534653465346, "grad_norm": 0.4769328832626343, "learning_rate": 6.238243768848478e-05, "loss": 0.4684, "step": 53920 }, { "epoch": 13.349009900990099, "grad_norm": 0.4414314031600952, "learning_rate": 6.236908751815052e-05, "loss": 0.4663, "step": 53930 }, { "epoch": 13.351485148514852, "grad_norm": 0.4766632616519928, "learning_rate": 6.235573640846571e-05, "loss": 0.4706, "step": 53940 }, { "epoch": 13.353960396039604, "grad_norm": 0.4330506920814514, "learning_rate": 6.234238436044424e-05, "loss": 0.4677, "step": 53950 }, { "epoch": 13.356435643564357, "grad_norm": 0.45482563972473145, "learning_rate": 6.232903137510016e-05, "loss": 0.4709, "step": 53960 }, { "epoch": 13.358910891089108, "grad_norm": 0.4964986741542816, "learning_rate": 6.231567745344748e-05, "loss": 0.4711, "step": 53970 }, { "epoch": 13.361386138613861, "grad_norm": 0.4506460726261139, "learning_rate": 6.23023225965004e-05, "loss": 0.4693, "step": 53980 }, { "epoch": 13.363861386138614, "grad_norm": 0.48298409581184387, "learning_rate": 6.22889668052731e-05, "loss": 0.4683, "step": 53990 }, { "epoch": 13.366336633663366, "grad_norm": 0.4442572295665741, "learning_rate": 6.227561008077985e-05, "loss": 0.4642, "step": 54000 }, { "epoch": 13.368811881188119, "grad_norm": 0.4594038426876068, "learning_rate": 6.226225242403505e-05, "loss": 0.4744, "step": 54010 }, { "epoch": 13.371287128712872, "grad_norm": 0.49944984912872314, "learning_rate": 6.22488938360531e-05, "loss": 0.4688, "step": 54020 }, { "epoch": 13.373762376237623, "grad_norm": 0.49536171555519104, "learning_rate": 6.22355343178485e-05, "loss": 0.4614, "step": 54030 }, { "epoch": 13.376237623762377, "grad_norm": 0.45888271927833557, "learning_rate": 6.222217387043579e-05, "loss": 0.4764, "step": 54040 }, { "epoch": 13.378712871287128, "grad_norm": 0.4632095992565155, "learning_rate": 6.220881249482966e-05, "loss": 0.4685, "step": 54050 }, { "epoch": 13.381188118811881, "grad_norm": 0.4679730236530304, "learning_rate": 6.219545019204479e-05, "loss": 0.4665, "step": 54060 }, { "epoch": 13.383663366336634, "grad_norm": 0.4614689350128174, "learning_rate": 6.218208696309597e-05, "loss": 0.4735, "step": 54070 }, { "epoch": 13.386138613861386, "grad_norm": 0.49275466799736023, "learning_rate": 6.216872280899802e-05, "loss": 0.4659, "step": 54080 }, { "epoch": 13.388613861386139, "grad_norm": 0.49863436818122864, "learning_rate": 6.215535773076588e-05, "loss": 0.464, "step": 54090 }, { "epoch": 13.391089108910892, "grad_norm": 0.49893373250961304, "learning_rate": 6.214199172941453e-05, "loss": 0.4731, "step": 54100 }, { "epoch": 13.393564356435643, "grad_norm": 0.4602048099040985, "learning_rate": 6.212862480595909e-05, "loss": 0.4671, "step": 54110 }, { "epoch": 13.396039603960396, "grad_norm": 0.49333202838897705, "learning_rate": 6.211525696141459e-05, "loss": 0.4632, "step": 54120 }, { "epoch": 13.398514851485148, "grad_norm": 0.47976887226104736, "learning_rate": 6.210188819679628e-05, "loss": 0.4667, "step": 54130 }, { "epoch": 13.400990099009901, "grad_norm": 0.49916595220565796, "learning_rate": 6.208851851311944e-05, "loss": 0.4642, "step": 54140 }, { "epoch": 13.403465346534654, "grad_norm": 0.4787651002407074, "learning_rate": 6.207514791139939e-05, "loss": 0.4667, "step": 54150 }, { "epoch": 13.405940594059405, "grad_norm": 0.475373774766922, "learning_rate": 6.206177639265155e-05, "loss": 0.4671, "step": 54160 }, { "epoch": 13.408415841584159, "grad_norm": 0.44849446415901184, "learning_rate": 6.204840395789137e-05, "loss": 0.4642, "step": 54170 }, { "epoch": 13.410891089108912, "grad_norm": 0.47925204038619995, "learning_rate": 6.203503060813447e-05, "loss": 0.4714, "step": 54180 }, { "epoch": 13.413366336633663, "grad_norm": 0.45538368821144104, "learning_rate": 6.202165634439639e-05, "loss": 0.4672, "step": 54190 }, { "epoch": 13.415841584158416, "grad_norm": 0.47807571291923523, "learning_rate": 6.200828116769285e-05, "loss": 0.4704, "step": 54200 }, { "epoch": 13.418316831683168, "grad_norm": 0.4952605068683624, "learning_rate": 6.19949050790396e-05, "loss": 0.4665, "step": 54210 }, { "epoch": 13.42079207920792, "grad_norm": 0.4958434998989105, "learning_rate": 6.198152807945247e-05, "loss": 0.4669, "step": 54220 }, { "epoch": 13.423267326732674, "grad_norm": 0.47014713287353516, "learning_rate": 6.196815016994735e-05, "loss": 0.4673, "step": 54230 }, { "epoch": 13.425742574257425, "grad_norm": 0.46020713448524475, "learning_rate": 6.195477135154021e-05, "loss": 0.4673, "step": 54240 }, { "epoch": 13.428217821782178, "grad_norm": 0.4595475494861603, "learning_rate": 6.194139162524709e-05, "loss": 0.4656, "step": 54250 }, { "epoch": 13.430693069306932, "grad_norm": 0.4652479887008667, "learning_rate": 6.192801099208407e-05, "loss": 0.472, "step": 54260 }, { "epoch": 13.433168316831683, "grad_norm": 0.4540005028247833, "learning_rate": 6.191462945306734e-05, "loss": 0.4592, "step": 54270 }, { "epoch": 13.435643564356436, "grad_norm": 0.48641905188560486, "learning_rate": 6.190124700921312e-05, "loss": 0.4668, "step": 54280 }, { "epoch": 13.438118811881187, "grad_norm": 0.48250266909599304, "learning_rate": 6.188786366153775e-05, "loss": 0.4699, "step": 54290 }, { "epoch": 13.44059405940594, "grad_norm": 0.46270647644996643, "learning_rate": 6.187447941105759e-05, "loss": 0.4711, "step": 54300 }, { "epoch": 13.443069306930694, "grad_norm": 0.47316503524780273, "learning_rate": 6.186109425878907e-05, "loss": 0.4692, "step": 54310 }, { "epoch": 13.445544554455445, "grad_norm": 0.4689427316188812, "learning_rate": 6.184770820574872e-05, "loss": 0.4658, "step": 54320 }, { "epoch": 13.448019801980198, "grad_norm": 0.4744347035884857, "learning_rate": 6.183432125295312e-05, "loss": 0.4703, "step": 54330 }, { "epoch": 13.450495049504951, "grad_norm": 0.4360598921775818, "learning_rate": 6.182093340141892e-05, "loss": 0.4651, "step": 54340 }, { "epoch": 13.452970297029703, "grad_norm": 0.44091659784317017, "learning_rate": 6.180754465216285e-05, "loss": 0.4657, "step": 54350 }, { "epoch": 13.455445544554456, "grad_norm": 0.4471241533756256, "learning_rate": 6.179415500620167e-05, "loss": 0.4714, "step": 54360 }, { "epoch": 13.457920792079207, "grad_norm": 0.437473326921463, "learning_rate": 6.178076446455226e-05, "loss": 0.4716, "step": 54370 }, { "epoch": 13.46039603960396, "grad_norm": 0.45462650060653687, "learning_rate": 6.176737302823153e-05, "loss": 0.4697, "step": 54380 }, { "epoch": 13.462871287128714, "grad_norm": 0.45957550406455994, "learning_rate": 6.175398069825648e-05, "loss": 0.4648, "step": 54390 }, { "epoch": 13.465346534653465, "grad_norm": 0.4507170617580414, "learning_rate": 6.174058747564417e-05, "loss": 0.4739, "step": 54400 }, { "epoch": 13.467821782178218, "grad_norm": 0.4885860085487366, "learning_rate": 6.172719336141172e-05, "loss": 0.4703, "step": 54410 }, { "epoch": 13.47029702970297, "grad_norm": 0.4651173949241638, "learning_rate": 6.171379835657632e-05, "loss": 0.4704, "step": 54420 }, { "epoch": 13.472772277227723, "grad_norm": 0.4881846010684967, "learning_rate": 6.170040246215525e-05, "loss": 0.4659, "step": 54430 }, { "epoch": 13.475247524752476, "grad_norm": 0.48703324794769287, "learning_rate": 6.168700567916582e-05, "loss": 0.4707, "step": 54440 }, { "epoch": 13.477722772277227, "grad_norm": 0.46802663803100586, "learning_rate": 6.167360800862544e-05, "loss": 0.4725, "step": 54450 }, { "epoch": 13.48019801980198, "grad_norm": 0.4773053824901581, "learning_rate": 6.166020945155158e-05, "loss": 0.4688, "step": 54460 }, { "epoch": 13.482673267326733, "grad_norm": 0.49133414030075073, "learning_rate": 6.164681000896175e-05, "loss": 0.4733, "step": 54470 }, { "epoch": 13.485148514851485, "grad_norm": 0.4676755368709564, "learning_rate": 6.163340968187356e-05, "loss": 0.47, "step": 54480 }, { "epoch": 13.487623762376238, "grad_norm": 0.4769458472728729, "learning_rate": 6.162000847130468e-05, "loss": 0.4674, "step": 54490 }, { "epoch": 13.490099009900991, "grad_norm": 0.4533303678035736, "learning_rate": 6.160660637827286e-05, "loss": 0.4658, "step": 54500 }, { "epoch": 13.492574257425742, "grad_norm": 0.4579811990261078, "learning_rate": 6.159320340379586e-05, "loss": 0.4674, "step": 54510 }, { "epoch": 13.495049504950495, "grad_norm": 0.4432674050331116, "learning_rate": 6.157979954889159e-05, "loss": 0.4691, "step": 54520 }, { "epoch": 13.497524752475247, "grad_norm": 0.4471476972103119, "learning_rate": 6.156639481457795e-05, "loss": 0.4702, "step": 54530 }, { "epoch": 13.5, "grad_norm": 0.4570443034172058, "learning_rate": 6.155298920187296e-05, "loss": 0.4708, "step": 54540 }, { "epoch": 13.502475247524753, "grad_norm": 0.44637542963027954, "learning_rate": 6.153958271179468e-05, "loss": 0.4649, "step": 54550 }, { "epoch": 13.504950495049505, "grad_norm": 0.4308849573135376, "learning_rate": 6.152617534536124e-05, "loss": 0.4632, "step": 54560 }, { "epoch": 13.507425742574258, "grad_norm": 0.4685789942741394, "learning_rate": 6.151276710359087e-05, "loss": 0.4709, "step": 54570 }, { "epoch": 13.509900990099009, "grad_norm": 0.46851101517677307, "learning_rate": 6.14993579875018e-05, "loss": 0.466, "step": 54580 }, { "epoch": 13.512376237623762, "grad_norm": 0.4684322774410248, "learning_rate": 6.148594799811238e-05, "loss": 0.4679, "step": 54590 }, { "epoch": 13.514851485148515, "grad_norm": 0.4912455976009369, "learning_rate": 6.147253713644101e-05, "loss": 0.4671, "step": 54600 }, { "epoch": 13.517326732673267, "grad_norm": 0.45490092039108276, "learning_rate": 6.145912540350616e-05, "loss": 0.4615, "step": 54610 }, { "epoch": 13.51980198019802, "grad_norm": 0.4840298593044281, "learning_rate": 6.144571280032638e-05, "loss": 0.4719, "step": 54620 }, { "epoch": 13.522277227722773, "grad_norm": 0.46094006299972534, "learning_rate": 6.143229932792022e-05, "loss": 0.4644, "step": 54630 }, { "epoch": 13.524752475247524, "grad_norm": 0.47056934237480164, "learning_rate": 6.141888498730637e-05, "loss": 0.4659, "step": 54640 }, { "epoch": 13.527227722772277, "grad_norm": 0.43976572155952454, "learning_rate": 6.140546977950359e-05, "loss": 0.4662, "step": 54650 }, { "epoch": 13.52970297029703, "grad_norm": 0.480892539024353, "learning_rate": 6.139205370553063e-05, "loss": 0.4662, "step": 54660 }, { "epoch": 13.532178217821782, "grad_norm": 0.5132603645324707, "learning_rate": 6.137863676640639e-05, "loss": 0.4677, "step": 54670 }, { "epoch": 13.534653465346535, "grad_norm": 0.4732940196990967, "learning_rate": 6.136521896314976e-05, "loss": 0.4632, "step": 54680 }, { "epoch": 13.537128712871286, "grad_norm": 0.47900620102882385, "learning_rate": 6.135180029677975e-05, "loss": 0.4653, "step": 54690 }, { "epoch": 13.53960396039604, "grad_norm": 0.49117282032966614, "learning_rate": 6.133838076831543e-05, "loss": 0.4722, "step": 54700 }, { "epoch": 13.542079207920793, "grad_norm": 0.4819001853466034, "learning_rate": 6.132496037877593e-05, "loss": 0.4657, "step": 54710 }, { "epoch": 13.544554455445544, "grad_norm": 0.4572666585445404, "learning_rate": 6.131153912918042e-05, "loss": 0.47, "step": 54720 }, { "epoch": 13.547029702970297, "grad_norm": 0.45433005690574646, "learning_rate": 6.129811702054814e-05, "loss": 0.4659, "step": 54730 }, { "epoch": 13.549504950495049, "grad_norm": 0.44926688075065613, "learning_rate": 6.128469405389846e-05, "loss": 0.4664, "step": 54740 }, { "epoch": 13.551980198019802, "grad_norm": 0.4459124207496643, "learning_rate": 6.127127023025074e-05, "loss": 0.4646, "step": 54750 }, { "epoch": 13.554455445544555, "grad_norm": 0.44012489914894104, "learning_rate": 6.125784555062442e-05, "loss": 0.4635, "step": 54760 }, { "epoch": 13.556930693069306, "grad_norm": 0.4690788984298706, "learning_rate": 6.124442001603902e-05, "loss": 0.4655, "step": 54770 }, { "epoch": 13.55940594059406, "grad_norm": 0.4474780559539795, "learning_rate": 6.123099362751414e-05, "loss": 0.4693, "step": 54780 }, { "epoch": 13.561881188118813, "grad_norm": 0.5043419003486633, "learning_rate": 6.12175663860694e-05, "loss": 0.4703, "step": 54790 }, { "epoch": 13.564356435643564, "grad_norm": 0.45316052436828613, "learning_rate": 6.120413829272454e-05, "loss": 0.4729, "step": 54800 }, { "epoch": 13.566831683168317, "grad_norm": 0.43446919322013855, "learning_rate": 6.11907093484993e-05, "loss": 0.4688, "step": 54810 }, { "epoch": 13.569306930693068, "grad_norm": 0.4194030463695526, "learning_rate": 6.117727955441355e-05, "loss": 0.4635, "step": 54820 }, { "epoch": 13.571782178217822, "grad_norm": 0.45683181285858154, "learning_rate": 6.116384891148718e-05, "loss": 0.4666, "step": 54830 }, { "epoch": 13.574257425742575, "grad_norm": 0.4519588053226471, "learning_rate": 6.115041742074015e-05, "loss": 0.469, "step": 54840 }, { "epoch": 13.576732673267326, "grad_norm": 0.46724146604537964, "learning_rate": 6.113698508319251e-05, "loss": 0.4652, "step": 54850 }, { "epoch": 13.57920792079208, "grad_norm": 0.4486507177352905, "learning_rate": 6.112355189986436e-05, "loss": 0.4689, "step": 54860 }, { "epoch": 13.581683168316832, "grad_norm": 0.43503788113594055, "learning_rate": 6.111011787177585e-05, "loss": 0.4669, "step": 54870 }, { "epoch": 13.584158415841584, "grad_norm": 0.4481828808784485, "learning_rate": 6.109668299994718e-05, "loss": 0.4706, "step": 54880 }, { "epoch": 13.586633663366337, "grad_norm": 0.4665251076221466, "learning_rate": 6.10832472853987e-05, "loss": 0.4646, "step": 54890 }, { "epoch": 13.589108910891088, "grad_norm": 0.4589391350746155, "learning_rate": 6.106981072915072e-05, "loss": 0.4695, "step": 54900 }, { "epoch": 13.591584158415841, "grad_norm": 0.47897759079933167, "learning_rate": 6.105637333222366e-05, "loss": 0.4627, "step": 54910 }, { "epoch": 13.594059405940595, "grad_norm": 0.4629999101161957, "learning_rate": 6.104293509563802e-05, "loss": 0.468, "step": 54920 }, { "epoch": 13.596534653465346, "grad_norm": 0.45631998777389526, "learning_rate": 6.1029496020414346e-05, "loss": 0.4689, "step": 54930 }, { "epoch": 13.599009900990099, "grad_norm": 0.46108320355415344, "learning_rate": 6.1016056107573225e-05, "loss": 0.4673, "step": 54940 }, { "epoch": 13.601485148514852, "grad_norm": 0.4532075822353363, "learning_rate": 6.1002615358135354e-05, "loss": 0.4744, "step": 54950 }, { "epoch": 13.603960396039604, "grad_norm": 0.4826335310935974, "learning_rate": 6.098917377312143e-05, "loss": 0.47, "step": 54960 }, { "epoch": 13.606435643564357, "grad_norm": 0.46990835666656494, "learning_rate": 6.097573135355231e-05, "loss": 0.4697, "step": 54970 }, { "epoch": 13.608910891089108, "grad_norm": 0.47455549240112305, "learning_rate": 6.0962288100448815e-05, "loss": 0.471, "step": 54980 }, { "epoch": 13.611386138613861, "grad_norm": 0.4925731122493744, "learning_rate": 6.094884401483189e-05, "loss": 0.4711, "step": 54990 }, { "epoch": 13.613861386138614, "grad_norm": 0.46828654408454895, "learning_rate": 6.093539909772251e-05, "loss": 0.4664, "step": 55000 }, { "epoch": 13.616336633663366, "grad_norm": 0.466122567653656, "learning_rate": 6.092195335014174e-05, "loss": 0.4689, "step": 55010 }, { "epoch": 13.618811881188119, "grad_norm": 0.48068711161613464, "learning_rate": 6.09085067731107e-05, "loss": 0.4675, "step": 55020 }, { "epoch": 13.621287128712872, "grad_norm": 0.45771947503089905, "learning_rate": 6.089505936765053e-05, "loss": 0.4677, "step": 55030 }, { "epoch": 13.623762376237623, "grad_norm": 0.4716132581233978, "learning_rate": 6.0881611134782546e-05, "loss": 0.4729, "step": 55040 }, { "epoch": 13.626237623762377, "grad_norm": 0.4473971128463745, "learning_rate": 6.086816207552797e-05, "loss": 0.4641, "step": 55050 }, { "epoch": 13.628712871287128, "grad_norm": 0.4665331244468689, "learning_rate": 6.085471219090823e-05, "loss": 0.4672, "step": 55060 }, { "epoch": 13.631188118811881, "grad_norm": 0.476480633020401, "learning_rate": 6.084126148194472e-05, "loss": 0.4657, "step": 55070 }, { "epoch": 13.633663366336634, "grad_norm": 0.5052682161331177, "learning_rate": 6.082780994965896e-05, "loss": 0.4679, "step": 55080 }, { "epoch": 13.636138613861386, "grad_norm": 0.4567249119281769, "learning_rate": 6.081435759507249e-05, "loss": 0.4732, "step": 55090 }, { "epoch": 13.638613861386139, "grad_norm": 0.48283645510673523, "learning_rate": 6.0800904419206926e-05, "loss": 0.4717, "step": 55100 }, { "epoch": 13.641089108910892, "grad_norm": 0.4721844792366028, "learning_rate": 6.078745042308395e-05, "loss": 0.4633, "step": 55110 }, { "epoch": 13.643564356435643, "grad_norm": 0.4865453243255615, "learning_rate": 6.077399560772531e-05, "loss": 0.4658, "step": 55120 }, { "epoch": 13.646039603960396, "grad_norm": 0.4598110318183899, "learning_rate": 6.0760539974152795e-05, "loss": 0.47, "step": 55130 }, { "epoch": 13.648514851485148, "grad_norm": 0.48107537627220154, "learning_rate": 6.074708352338829e-05, "loss": 0.4649, "step": 55140 }, { "epoch": 13.650990099009901, "grad_norm": 0.47800981998443604, "learning_rate": 6.0733626256453714e-05, "loss": 0.4657, "step": 55150 }, { "epoch": 13.653465346534654, "grad_norm": 0.4711437225341797, "learning_rate": 6.0720168174371064e-05, "loss": 0.4624, "step": 55160 }, { "epoch": 13.655940594059405, "grad_norm": 0.4524095356464386, "learning_rate": 6.0706709278162385e-05, "loss": 0.4641, "step": 55170 }, { "epoch": 13.658415841584159, "grad_norm": 0.4336671233177185, "learning_rate": 6.069324956884979e-05, "loss": 0.4639, "step": 55180 }, { "epoch": 13.660891089108912, "grad_norm": 0.4636041820049286, "learning_rate": 6.067978904745547e-05, "loss": 0.4683, "step": 55190 }, { "epoch": 13.663366336633663, "grad_norm": 0.4568415582180023, "learning_rate": 6.066632771500164e-05, "loss": 0.4681, "step": 55200 }, { "epoch": 13.665841584158416, "grad_norm": 0.4421442747116089, "learning_rate": 6.065286557251062e-05, "loss": 0.4727, "step": 55210 }, { "epoch": 13.668316831683168, "grad_norm": 0.4784179925918579, "learning_rate": 6.063940262100476e-05, "loss": 0.4682, "step": 55220 }, { "epoch": 13.67079207920792, "grad_norm": 0.44078412652015686, "learning_rate": 6.062593886150649e-05, "loss": 0.4708, "step": 55230 }, { "epoch": 13.673267326732674, "grad_norm": 0.46274468302726746, "learning_rate": 6.061247429503828e-05, "loss": 0.4636, "step": 55240 }, { "epoch": 13.675742574257425, "grad_norm": 0.48632529377937317, "learning_rate": 6.059900892262269e-05, "loss": 0.4699, "step": 55250 }, { "epoch": 13.678217821782178, "grad_norm": 0.47191405296325684, "learning_rate": 6.058554274528231e-05, "loss": 0.4689, "step": 55260 }, { "epoch": 13.680693069306932, "grad_norm": 0.47986137866973877, "learning_rate": 6.057207576403984e-05, "loss": 0.465, "step": 55270 }, { "epoch": 13.683168316831683, "grad_norm": 0.46522992849349976, "learning_rate": 6.0558607979917955e-05, "loss": 0.4659, "step": 55280 }, { "epoch": 13.685643564356436, "grad_norm": 0.44807571172714233, "learning_rate": 6.054513939393949e-05, "loss": 0.4652, "step": 55290 }, { "epoch": 13.688118811881187, "grad_norm": 0.45222148299217224, "learning_rate": 6.053167000712728e-05, "loss": 0.4657, "step": 55300 }, { "epoch": 13.69059405940594, "grad_norm": 0.47523778676986694, "learning_rate": 6.051819982050424e-05, "loss": 0.4714, "step": 55310 }, { "epoch": 13.693069306930694, "grad_norm": 0.4527718424797058, "learning_rate": 6.050472883509334e-05, "loss": 0.4686, "step": 55320 }, { "epoch": 13.695544554455445, "grad_norm": 0.44433653354644775, "learning_rate": 6.049125705191758e-05, "loss": 0.4686, "step": 55330 }, { "epoch": 13.698019801980198, "grad_norm": 0.46011292934417725, "learning_rate": 6.047778447200012e-05, "loss": 0.4696, "step": 55340 }, { "epoch": 13.700495049504951, "grad_norm": 0.4566459357738495, "learning_rate": 6.046431109636406e-05, "loss": 0.4633, "step": 55350 }, { "epoch": 13.702970297029703, "grad_norm": 0.49228402972221375, "learning_rate": 6.045083692603264e-05, "loss": 0.4666, "step": 55360 }, { "epoch": 13.705445544554456, "grad_norm": 0.48308268189430237, "learning_rate": 6.043736196202911e-05, "loss": 0.4675, "step": 55370 }, { "epoch": 13.707920792079207, "grad_norm": 0.4964218735694885, "learning_rate": 6.042388620537684e-05, "loss": 0.4672, "step": 55380 }, { "epoch": 13.71039603960396, "grad_norm": 0.4683830738067627, "learning_rate": 6.041040965709919e-05, "loss": 0.467, "step": 55390 }, { "epoch": 13.712871287128714, "grad_norm": 0.4558008909225464, "learning_rate": 6.039693231821963e-05, "loss": 0.4629, "step": 55400 }, { "epoch": 13.715346534653465, "grad_norm": 0.46179625391960144, "learning_rate": 6.038345418976168e-05, "loss": 0.4673, "step": 55410 }, { "epoch": 13.717821782178218, "grad_norm": 0.4404899775981903, "learning_rate": 6.03699752727489e-05, "loss": 0.4637, "step": 55420 }, { "epoch": 13.72029702970297, "grad_norm": 0.46036776900291443, "learning_rate": 6.035649556820493e-05, "loss": 0.4664, "step": 55430 }, { "epoch": 13.722772277227723, "grad_norm": 0.4374261200428009, "learning_rate": 6.034301507715349e-05, "loss": 0.4653, "step": 55440 }, { "epoch": 13.725247524752476, "grad_norm": 0.45732593536376953, "learning_rate": 6.0329533800618296e-05, "loss": 0.4636, "step": 55450 }, { "epoch": 13.727722772277227, "grad_norm": 0.47203436493873596, "learning_rate": 6.031605173962318e-05, "loss": 0.4655, "step": 55460 }, { "epoch": 13.73019801980198, "grad_norm": 0.46461787819862366, "learning_rate": 6.030256889519202e-05, "loss": 0.4681, "step": 55470 }, { "epoch": 13.732673267326733, "grad_norm": 0.4634492099285126, "learning_rate": 6.0289085268348736e-05, "loss": 0.4675, "step": 55480 }, { "epoch": 13.735148514851485, "grad_norm": 0.4529115557670593, "learning_rate": 6.0275600860117346e-05, "loss": 0.4694, "step": 55490 }, { "epoch": 13.737623762376238, "grad_norm": 0.5108538269996643, "learning_rate": 6.026211567152186e-05, "loss": 0.4648, "step": 55500 }, { "epoch": 13.740099009900991, "grad_norm": 0.47880271077156067, "learning_rate": 6.024862970358644e-05, "loss": 0.471, "step": 55510 }, { "epoch": 13.742574257425742, "grad_norm": 0.463512122631073, "learning_rate": 6.02351429573352e-05, "loss": 0.4718, "step": 55520 }, { "epoch": 13.745049504950495, "grad_norm": 0.4571281969547272, "learning_rate": 6.022165543379241e-05, "loss": 0.4607, "step": 55530 }, { "epoch": 13.747524752475247, "grad_norm": 0.44783815741539, "learning_rate": 6.020816713398235e-05, "loss": 0.4607, "step": 55540 }, { "epoch": 13.75, "grad_norm": 0.479839563369751, "learning_rate": 6.0194678058929364e-05, "loss": 0.4672, "step": 55550 }, { "epoch": 13.752475247524753, "grad_norm": 0.4824866056442261, "learning_rate": 6.018118820965786e-05, "loss": 0.4685, "step": 55560 }, { "epoch": 13.754950495049505, "grad_norm": 0.4580458402633667, "learning_rate": 6.01676975871923e-05, "loss": 0.4629, "step": 55570 }, { "epoch": 13.757425742574258, "grad_norm": 0.45097142457962036, "learning_rate": 6.0154206192557196e-05, "loss": 0.4701, "step": 55580 }, { "epoch": 13.759900990099009, "grad_norm": 0.43838444352149963, "learning_rate": 6.0140714026777154e-05, "loss": 0.4695, "step": 55590 }, { "epoch": 13.762376237623762, "grad_norm": 0.4493144750595093, "learning_rate": 6.012722109087681e-05, "loss": 0.4699, "step": 55600 }, { "epoch": 13.764851485148515, "grad_norm": 0.4392685294151306, "learning_rate": 6.0113727385880856e-05, "loss": 0.4678, "step": 55610 }, { "epoch": 13.767326732673267, "grad_norm": 0.46181827783584595, "learning_rate": 6.0100232912814046e-05, "loss": 0.4679, "step": 55620 }, { "epoch": 13.76980198019802, "grad_norm": 0.4484651982784271, "learning_rate": 6.0086737672701196e-05, "loss": 0.4679, "step": 55630 }, { "epoch": 13.772277227722773, "grad_norm": 0.49254655838012695, "learning_rate": 6.00732416665672e-05, "loss": 0.4712, "step": 55640 }, { "epoch": 13.774752475247524, "grad_norm": 0.4393973648548126, "learning_rate": 6.0059744895436974e-05, "loss": 0.468, "step": 55650 }, { "epoch": 13.777227722772277, "grad_norm": 0.4641468822956085, "learning_rate": 6.004624736033552e-05, "loss": 0.4723, "step": 55660 }, { "epoch": 13.77970297029703, "grad_norm": 0.4347970485687256, "learning_rate": 6.003274906228786e-05, "loss": 0.4696, "step": 55670 }, { "epoch": 13.782178217821782, "grad_norm": 0.4381587505340576, "learning_rate": 6.001925000231913e-05, "loss": 0.4655, "step": 55680 }, { "epoch": 13.784653465346535, "grad_norm": 0.4502268135547638, "learning_rate": 6.000575018145448e-05, "loss": 0.4721, "step": 55690 }, { "epoch": 13.787128712871286, "grad_norm": 0.4560410678386688, "learning_rate": 5.999224960071914e-05, "loss": 0.4648, "step": 55700 }, { "epoch": 13.78960396039604, "grad_norm": 0.46558648347854614, "learning_rate": 5.997874826113839e-05, "loss": 0.473, "step": 55710 }, { "epoch": 13.792079207920793, "grad_norm": 0.4460858702659607, "learning_rate": 5.996524616373755e-05, "loss": 0.468, "step": 55720 }, { "epoch": 13.794554455445544, "grad_norm": 0.4602980315685272, "learning_rate": 5.995174330954203e-05, "loss": 0.4689, "step": 55730 }, { "epoch": 13.797029702970297, "grad_norm": 0.45427268743515015, "learning_rate": 5.993823969957729e-05, "loss": 0.4686, "step": 55740 }, { "epoch": 13.799504950495049, "grad_norm": 0.4799495339393616, "learning_rate": 5.992473533486883e-05, "loss": 0.4732, "step": 55750 }, { "epoch": 13.801980198019802, "grad_norm": 0.5078845620155334, "learning_rate": 5.991123021644221e-05, "loss": 0.4714, "step": 55760 }, { "epoch": 13.804455445544555, "grad_norm": 0.4756689965724945, "learning_rate": 5.989772434532308e-05, "loss": 0.4688, "step": 55770 }, { "epoch": 13.806930693069306, "grad_norm": 0.44758275151252747, "learning_rate": 5.988421772253709e-05, "loss": 0.4652, "step": 55780 }, { "epoch": 13.80940594059406, "grad_norm": 0.44774848222732544, "learning_rate": 5.987071034911001e-05, "loss": 0.4674, "step": 55790 }, { "epoch": 13.811881188118813, "grad_norm": 0.44121384620666504, "learning_rate": 5.98572022260676e-05, "loss": 0.4624, "step": 55800 }, { "epoch": 13.814356435643564, "grad_norm": 0.46949532628059387, "learning_rate": 5.984369335443576e-05, "loss": 0.4629, "step": 55810 }, { "epoch": 13.816831683168317, "grad_norm": 0.46719294786453247, "learning_rate": 5.983018373524034e-05, "loss": 0.4645, "step": 55820 }, { "epoch": 13.819306930693068, "grad_norm": 0.43171119689941406, "learning_rate": 5.981667336950737e-05, "loss": 0.4665, "step": 55830 }, { "epoch": 13.821782178217822, "grad_norm": 0.4516667425632477, "learning_rate": 5.980316225826281e-05, "loss": 0.468, "step": 55840 }, { "epoch": 13.824257425742575, "grad_norm": 0.4706740379333496, "learning_rate": 5.9789650402532795e-05, "loss": 0.467, "step": 55850 }, { "epoch": 13.826732673267326, "grad_norm": 0.46262702345848083, "learning_rate": 5.9776137803343435e-05, "loss": 0.4693, "step": 55860 }, { "epoch": 13.82920792079208, "grad_norm": 0.4738786816596985, "learning_rate": 5.976262446172093e-05, "loss": 0.4648, "step": 55870 }, { "epoch": 13.831683168316832, "grad_norm": 0.43770498037338257, "learning_rate": 5.974911037869151e-05, "loss": 0.4706, "step": 55880 }, { "epoch": 13.834158415841584, "grad_norm": 0.46115073561668396, "learning_rate": 5.97355955552815e-05, "loss": 0.4632, "step": 55890 }, { "epoch": 13.836633663366337, "grad_norm": 0.44057220220565796, "learning_rate": 5.972207999251726e-05, "loss": 0.4671, "step": 55900 }, { "epoch": 13.839108910891088, "grad_norm": 0.46064552664756775, "learning_rate": 5.97085636914252e-05, "loss": 0.4626, "step": 55910 }, { "epoch": 13.841584158415841, "grad_norm": 0.44702988862991333, "learning_rate": 5.9695046653031803e-05, "loss": 0.4648, "step": 55920 }, { "epoch": 13.844059405940595, "grad_norm": 0.4681459963321686, "learning_rate": 5.96815288783636e-05, "loss": 0.4704, "step": 55930 }, { "epoch": 13.846534653465346, "grad_norm": 0.461515337228775, "learning_rate": 5.966801036844716e-05, "loss": 0.4647, "step": 55940 }, { "epoch": 13.849009900990099, "grad_norm": 0.47963666915893555, "learning_rate": 5.965449112430913e-05, "loss": 0.4672, "step": 55950 }, { "epoch": 13.851485148514852, "grad_norm": 0.43754130601882935, "learning_rate": 5.964097114697624e-05, "loss": 0.4753, "step": 55960 }, { "epoch": 13.853960396039604, "grad_norm": 0.47248774766921997, "learning_rate": 5.96274504374752e-05, "loss": 0.4712, "step": 55970 }, { "epoch": 13.856435643564357, "grad_norm": 0.44885000586509705, "learning_rate": 5.9613928996832843e-05, "loss": 0.4634, "step": 55980 }, { "epoch": 13.858910891089108, "grad_norm": 0.4757695496082306, "learning_rate": 5.9600406826076006e-05, "loss": 0.4633, "step": 55990 }, { "epoch": 13.861386138613861, "grad_norm": 0.4386000335216522, "learning_rate": 5.958688392623164e-05, "loss": 0.4635, "step": 56000 }, { "epoch": 13.863861386138614, "grad_norm": 0.4595034718513489, "learning_rate": 5.957336029832671e-05, "loss": 0.4692, "step": 56010 }, { "epoch": 13.866336633663366, "grad_norm": 0.45194974541664124, "learning_rate": 5.9559835943388245e-05, "loss": 0.4661, "step": 56020 }, { "epoch": 13.868811881188119, "grad_norm": 0.4755452275276184, "learning_rate": 5.954631086244332e-05, "loss": 0.4671, "step": 56030 }, { "epoch": 13.871287128712872, "grad_norm": 0.44121330976486206, "learning_rate": 5.953278505651908e-05, "loss": 0.4638, "step": 56040 }, { "epoch": 13.873762376237623, "grad_norm": 0.435325562953949, "learning_rate": 5.951925852664274e-05, "loss": 0.4674, "step": 56050 }, { "epoch": 13.876237623762377, "grad_norm": 0.4500017464160919, "learning_rate": 5.950573127384154e-05, "loss": 0.4716, "step": 56060 }, { "epoch": 13.878712871287128, "grad_norm": 0.4719405174255371, "learning_rate": 5.949220329914277e-05, "loss": 0.4639, "step": 56070 }, { "epoch": 13.881188118811881, "grad_norm": 0.4687858819961548, "learning_rate": 5.947867460357382e-05, "loss": 0.4672, "step": 56080 }, { "epoch": 13.883663366336634, "grad_norm": 0.4450179636478424, "learning_rate": 5.9465145188162065e-05, "loss": 0.4676, "step": 56090 }, { "epoch": 13.886138613861386, "grad_norm": 0.44858574867248535, "learning_rate": 5.9451615053935005e-05, "loss": 0.4684, "step": 56100 }, { "epoch": 13.888613861386139, "grad_norm": 0.4347352683544159, "learning_rate": 5.9438084201920165e-05, "loss": 0.4655, "step": 56110 }, { "epoch": 13.891089108910892, "grad_norm": 0.4832305610179901, "learning_rate": 5.94245526331451e-05, "loss": 0.4702, "step": 56120 }, { "epoch": 13.893564356435643, "grad_norm": 0.44136956334114075, "learning_rate": 5.941102034863748e-05, "loss": 0.4709, "step": 56130 }, { "epoch": 13.896039603960396, "grad_norm": 0.4342210292816162, "learning_rate": 5.939748734942496e-05, "loss": 0.4662, "step": 56140 }, { "epoch": 13.898514851485148, "grad_norm": 0.4627821147441864, "learning_rate": 5.9383953636535296e-05, "loss": 0.4685, "step": 56150 }, { "epoch": 13.900990099009901, "grad_norm": 0.4747895300388336, "learning_rate": 5.937041921099629e-05, "loss": 0.4698, "step": 56160 }, { "epoch": 13.903465346534654, "grad_norm": 0.45203807950019836, "learning_rate": 5.935688407383577e-05, "loss": 0.4632, "step": 56170 }, { "epoch": 13.905940594059405, "grad_norm": 0.4786052107810974, "learning_rate": 5.934334822608166e-05, "loss": 0.4683, "step": 56180 }, { "epoch": 13.908415841584159, "grad_norm": 0.4362395703792572, "learning_rate": 5.9329811668761906e-05, "loss": 0.467, "step": 56190 }, { "epoch": 13.910891089108912, "grad_norm": 0.4618537127971649, "learning_rate": 5.9316274402904546e-05, "loss": 0.4667, "step": 56200 }, { "epoch": 13.913366336633663, "grad_norm": 0.4478905200958252, "learning_rate": 5.9302736429537606e-05, "loss": 0.4763, "step": 56210 }, { "epoch": 13.915841584158416, "grad_norm": 0.4702622592449188, "learning_rate": 5.928919774968924e-05, "loss": 0.4671, "step": 56220 }, { "epoch": 13.918316831683168, "grad_norm": 0.4656516909599304, "learning_rate": 5.92756583643876e-05, "loss": 0.471, "step": 56230 }, { "epoch": 13.92079207920792, "grad_norm": 0.4296852648258209, "learning_rate": 5.926211827466093e-05, "loss": 0.4694, "step": 56240 }, { "epoch": 13.923267326732674, "grad_norm": 0.4299020767211914, "learning_rate": 5.92485774815375e-05, "loss": 0.4653, "step": 56250 }, { "epoch": 13.925742574257425, "grad_norm": 0.4323998987674713, "learning_rate": 5.923503598604565e-05, "loss": 0.4669, "step": 56260 }, { "epoch": 13.928217821782178, "grad_norm": 0.45267775654792786, "learning_rate": 5.922149378921374e-05, "loss": 0.4709, "step": 56270 }, { "epoch": 13.930693069306932, "grad_norm": 0.49151554703712463, "learning_rate": 5.920795089207026e-05, "loss": 0.4686, "step": 56280 }, { "epoch": 13.933168316831683, "grad_norm": 0.43473413586616516, "learning_rate": 5.919440729564366e-05, "loss": 0.4744, "step": 56290 }, { "epoch": 13.935643564356436, "grad_norm": 0.480712354183197, "learning_rate": 5.918086300096251e-05, "loss": 0.4676, "step": 56300 }, { "epoch": 13.938118811881187, "grad_norm": 0.47395604848861694, "learning_rate": 5.916731800905541e-05, "loss": 0.469, "step": 56310 }, { "epoch": 13.94059405940594, "grad_norm": 0.4935346245765686, "learning_rate": 5.9153772320950995e-05, "loss": 0.467, "step": 56320 }, { "epoch": 13.943069306930694, "grad_norm": 0.45044293999671936, "learning_rate": 5.914022593767798e-05, "loss": 0.4684, "step": 56330 }, { "epoch": 13.945544554455445, "grad_norm": 0.45829102396965027, "learning_rate": 5.912667886026513e-05, "loss": 0.4645, "step": 56340 }, { "epoch": 13.948019801980198, "grad_norm": 0.43587613105773926, "learning_rate": 5.911313108974127e-05, "loss": 0.464, "step": 56350 }, { "epoch": 13.950495049504951, "grad_norm": 0.42776477336883545, "learning_rate": 5.909958262713522e-05, "loss": 0.4628, "step": 56360 }, { "epoch": 13.952970297029703, "grad_norm": 0.4389893412590027, "learning_rate": 5.9086033473475934e-05, "loss": 0.4657, "step": 56370 }, { "epoch": 13.955445544554456, "grad_norm": 0.4274655878543854, "learning_rate": 5.907248362979236e-05, "loss": 0.464, "step": 56380 }, { "epoch": 13.957920792079207, "grad_norm": 0.43113332986831665, "learning_rate": 5.9058933097113535e-05, "loss": 0.4618, "step": 56390 }, { "epoch": 13.96039603960396, "grad_norm": 0.47454696893692017, "learning_rate": 5.904538187646852e-05, "loss": 0.4617, "step": 56400 }, { "epoch": 13.962871287128714, "grad_norm": 0.4346463978290558, "learning_rate": 5.9031829968886455e-05, "loss": 0.4639, "step": 56410 }, { "epoch": 13.965346534653465, "grad_norm": 0.4745256006717682, "learning_rate": 5.901827737539649e-05, "loss": 0.4658, "step": 56420 }, { "epoch": 13.967821782178218, "grad_norm": 0.4770919382572174, "learning_rate": 5.9004724097027895e-05, "loss": 0.471, "step": 56430 }, { "epoch": 13.97029702970297, "grad_norm": 0.5017343759536743, "learning_rate": 5.899117013480991e-05, "loss": 0.4721, "step": 56440 }, { "epoch": 13.972772277227723, "grad_norm": 0.4430561363697052, "learning_rate": 5.8977615489771896e-05, "loss": 0.4663, "step": 56450 }, { "epoch": 13.975247524752476, "grad_norm": 0.4573807120323181, "learning_rate": 5.8964060162943244e-05, "loss": 0.4731, "step": 56460 }, { "epoch": 13.977722772277227, "grad_norm": 0.4514230787754059, "learning_rate": 5.895050415535338e-05, "loss": 0.4667, "step": 56470 }, { "epoch": 13.98019801980198, "grad_norm": 0.4377761483192444, "learning_rate": 5.893694746803179e-05, "loss": 0.4666, "step": 56480 }, { "epoch": 13.982673267326733, "grad_norm": 0.4552178978919983, "learning_rate": 5.8923390102008e-05, "loss": 0.4647, "step": 56490 }, { "epoch": 13.985148514851485, "grad_norm": 0.45848390460014343, "learning_rate": 5.8909832058311656e-05, "loss": 0.4654, "step": 56500 }, { "epoch": 13.987623762376238, "grad_norm": 0.4588562846183777, "learning_rate": 5.889627333797233e-05, "loss": 0.4656, "step": 56510 }, { "epoch": 13.990099009900991, "grad_norm": 0.4661352038383484, "learning_rate": 5.888271394201977e-05, "loss": 0.464, "step": 56520 }, { "epoch": 13.992574257425742, "grad_norm": 0.45233458280563354, "learning_rate": 5.886915387148372e-05, "loss": 0.4657, "step": 56530 }, { "epoch": 13.995049504950495, "grad_norm": 0.443431556224823, "learning_rate": 5.885559312739395e-05, "loss": 0.4727, "step": 56540 }, { "epoch": 13.997524752475247, "grad_norm": 0.46655622124671936, "learning_rate": 5.8842031710780333e-05, "loss": 0.4638, "step": 56550 }, { "epoch": 14.0, "grad_norm": 0.49265924096107483, "learning_rate": 5.8828469622672754e-05, "loss": 0.4705, "step": 56560 }, { "epoch": 14.002475247524753, "grad_norm": 0.45562684535980225, "learning_rate": 5.881490686410117e-05, "loss": 0.4689, "step": 56570 }, { "epoch": 14.004950495049505, "grad_norm": 0.4851182997226715, "learning_rate": 5.880134343609559e-05, "loss": 0.4717, "step": 56580 }, { "epoch": 14.007425742574258, "grad_norm": 0.44145816564559937, "learning_rate": 5.8787779339686045e-05, "loss": 0.4649, "step": 56590 }, { "epoch": 14.009900990099009, "grad_norm": 0.45717546343803406, "learning_rate": 5.877421457590266e-05, "loss": 0.4625, "step": 56600 }, { "epoch": 14.012376237623762, "grad_norm": 0.49888285994529724, "learning_rate": 5.876064914577558e-05, "loss": 0.4667, "step": 56610 }, { "epoch": 14.014851485148515, "grad_norm": 0.45340219140052795, "learning_rate": 5.8747083050335027e-05, "loss": 0.4605, "step": 56620 }, { "epoch": 14.017326732673267, "grad_norm": 0.48549506068229675, "learning_rate": 5.873351629061124e-05, "loss": 0.4635, "step": 56630 }, { "epoch": 14.01980198019802, "grad_norm": 0.4631431996822357, "learning_rate": 5.87199488676345e-05, "loss": 0.4706, "step": 56640 }, { "epoch": 14.022277227722773, "grad_norm": 0.4537290632724762, "learning_rate": 5.870638078243522e-05, "loss": 0.4664, "step": 56650 }, { "epoch": 14.024752475247524, "grad_norm": 0.44314223527908325, "learning_rate": 5.8692812036043753e-05, "loss": 0.4633, "step": 56660 }, { "epoch": 14.027227722772277, "grad_norm": 0.4541832506656647, "learning_rate": 5.867924262949061e-05, "loss": 0.4674, "step": 56670 }, { "epoch": 14.029702970297029, "grad_norm": 0.4619991183280945, "learning_rate": 5.866567256380624e-05, "loss": 0.4705, "step": 56680 }, { "epoch": 14.032178217821782, "grad_norm": 0.4470980167388916, "learning_rate": 5.865210184002123e-05, "loss": 0.4649, "step": 56690 }, { "epoch": 14.034653465346535, "grad_norm": 0.4480043053627014, "learning_rate": 5.86385304591662e-05, "loss": 0.467, "step": 56700 }, { "epoch": 14.037128712871286, "grad_norm": 0.45833340287208557, "learning_rate": 5.862495842227178e-05, "loss": 0.4649, "step": 56710 }, { "epoch": 14.03960396039604, "grad_norm": 0.46614500880241394, "learning_rate": 5.861138573036869e-05, "loss": 0.4662, "step": 56720 }, { "epoch": 14.042079207920793, "grad_norm": 0.4756738245487213, "learning_rate": 5.859781238448769e-05, "loss": 0.4695, "step": 56730 }, { "epoch": 14.044554455445544, "grad_norm": 0.43991851806640625, "learning_rate": 5.858423838565956e-05, "loss": 0.4631, "step": 56740 }, { "epoch": 14.047029702970297, "grad_norm": 0.454190731048584, "learning_rate": 5.85706637349152e-05, "loss": 0.4642, "step": 56750 }, { "epoch": 14.049504950495049, "grad_norm": 0.46113333106040955, "learning_rate": 5.855708843328548e-05, "loss": 0.4699, "step": 56760 }, { "epoch": 14.051980198019802, "grad_norm": 0.4508342146873474, "learning_rate": 5.8543512481801365e-05, "loss": 0.4688, "step": 56770 }, { "epoch": 14.054455445544555, "grad_norm": 0.45955032110214233, "learning_rate": 5.852993588149386e-05, "loss": 0.4618, "step": 56780 }, { "epoch": 14.056930693069306, "grad_norm": 0.4347342848777771, "learning_rate": 5.8516358633394005e-05, "loss": 0.4639, "step": 56790 }, { "epoch": 14.05940594059406, "grad_norm": 0.4886294901371002, "learning_rate": 5.8502780738532936e-05, "loss": 0.4718, "step": 56800 }, { "epoch": 14.061881188118813, "grad_norm": 0.4541705548763275, "learning_rate": 5.8489202197941775e-05, "loss": 0.4645, "step": 56810 }, { "epoch": 14.064356435643564, "grad_norm": 0.45247071981430054, "learning_rate": 5.8475623012651734e-05, "loss": 0.4682, "step": 56820 }, { "epoch": 14.066831683168317, "grad_norm": 0.44424667954444885, "learning_rate": 5.846204318369405e-05, "loss": 0.4636, "step": 56830 }, { "epoch": 14.069306930693068, "grad_norm": 0.4656534194946289, "learning_rate": 5.844846271210004e-05, "loss": 0.4697, "step": 56840 }, { "epoch": 14.071782178217822, "grad_norm": 0.46159911155700684, "learning_rate": 5.8434881598901035e-05, "loss": 0.4655, "step": 56850 }, { "epoch": 14.074257425742575, "grad_norm": 0.43306195735931396, "learning_rate": 5.8421299845128444e-05, "loss": 0.4697, "step": 56860 }, { "epoch": 14.076732673267326, "grad_norm": 0.44128525257110596, "learning_rate": 5.84077174518137e-05, "loss": 0.4686, "step": 56870 }, { "epoch": 14.07920792079208, "grad_norm": 0.4301503598690033, "learning_rate": 5.8394134419988314e-05, "loss": 0.4659, "step": 56880 }, { "epoch": 14.081683168316832, "grad_norm": 0.44510945677757263, "learning_rate": 5.8380550750683794e-05, "loss": 0.4659, "step": 56890 }, { "epoch": 14.084158415841584, "grad_norm": 0.4293767809867859, "learning_rate": 5.8366966444931756e-05, "loss": 0.4635, "step": 56900 }, { "epoch": 14.086633663366337, "grad_norm": 0.4518324136734009, "learning_rate": 5.835338150376385e-05, "loss": 0.4695, "step": 56910 }, { "epoch": 14.089108910891088, "grad_norm": 0.4482659101486206, "learning_rate": 5.833979592821174e-05, "loss": 0.4671, "step": 56920 }, { "epoch": 14.091584158415841, "grad_norm": 0.45048677921295166, "learning_rate": 5.832620971930717e-05, "loss": 0.4643, "step": 56930 }, { "epoch": 14.094059405940595, "grad_norm": 0.46535781025886536, "learning_rate": 5.8312622878081904e-05, "loss": 0.4681, "step": 56940 }, { "epoch": 14.096534653465346, "grad_norm": 0.44677218794822693, "learning_rate": 5.8299035405567806e-05, "loss": 0.4644, "step": 56950 }, { "epoch": 14.099009900990099, "grad_norm": 0.44186216592788696, "learning_rate": 5.8285447302796726e-05, "loss": 0.4662, "step": 56960 }, { "epoch": 14.101485148514852, "grad_norm": 0.4491756856441498, "learning_rate": 5.827185857080062e-05, "loss": 0.4603, "step": 56970 }, { "epoch": 14.103960396039604, "grad_norm": 0.4393271803855896, "learning_rate": 5.8258269210611415e-05, "loss": 0.4668, "step": 56980 }, { "epoch": 14.106435643564357, "grad_norm": 0.4399644136428833, "learning_rate": 5.82446792232612e-05, "loss": 0.4691, "step": 56990 }, { "epoch": 14.108910891089108, "grad_norm": 0.47083136439323425, "learning_rate": 5.823108860978198e-05, "loss": 0.4687, "step": 57000 }, { "epoch": 14.111386138613861, "grad_norm": 0.4441166818141937, "learning_rate": 5.821749737120591e-05, "loss": 0.4672, "step": 57010 }, { "epoch": 14.113861386138614, "grad_norm": 0.4788978099822998, "learning_rate": 5.820390550856515e-05, "loss": 0.4653, "step": 57020 }, { "epoch": 14.116336633663366, "grad_norm": 0.4625248610973358, "learning_rate": 5.8190313022891905e-05, "loss": 0.4661, "step": 57030 }, { "epoch": 14.118811881188119, "grad_norm": 0.458743155002594, "learning_rate": 5.8176719915218436e-05, "loss": 0.4705, "step": 57040 }, { "epoch": 14.121287128712872, "grad_norm": 0.4439573884010315, "learning_rate": 5.8163126186577035e-05, "loss": 0.4668, "step": 57050 }, { "epoch": 14.123762376237623, "grad_norm": 0.45162567496299744, "learning_rate": 5.8149531838000094e-05, "loss": 0.4673, "step": 57060 }, { "epoch": 14.126237623762377, "grad_norm": 0.42839929461479187, "learning_rate": 5.8135936870519984e-05, "loss": 0.4697, "step": 57070 }, { "epoch": 14.128712871287128, "grad_norm": 0.4704226851463318, "learning_rate": 5.812234128516917e-05, "loss": 0.4629, "step": 57080 }, { "epoch": 14.131188118811881, "grad_norm": 0.436655193567276, "learning_rate": 5.810874508298012e-05, "loss": 0.4661, "step": 57090 }, { "epoch": 14.133663366336634, "grad_norm": 0.4649132192134857, "learning_rate": 5.809514826498541e-05, "loss": 0.4687, "step": 57100 }, { "epoch": 14.136138613861386, "grad_norm": 0.45689675211906433, "learning_rate": 5.80815508322176e-05, "loss": 0.4668, "step": 57110 }, { "epoch": 14.138613861386139, "grad_norm": 0.4607427716255188, "learning_rate": 5.806795278570936e-05, "loss": 0.4721, "step": 57120 }, { "epoch": 14.141089108910892, "grad_norm": 0.4667443037033081, "learning_rate": 5.8054354126493324e-05, "loss": 0.4661, "step": 57130 }, { "epoch": 14.143564356435643, "grad_norm": 0.4425988793373108, "learning_rate": 5.8040754855602274e-05, "loss": 0.4612, "step": 57140 }, { "epoch": 14.146039603960396, "grad_norm": 0.4471902549266815, "learning_rate": 5.802715497406892e-05, "loss": 0.4655, "step": 57150 }, { "epoch": 14.148514851485148, "grad_norm": 0.4441864490509033, "learning_rate": 5.801355448292615e-05, "loss": 0.4622, "step": 57160 }, { "epoch": 14.150990099009901, "grad_norm": 0.4517591893672943, "learning_rate": 5.79999533832068e-05, "loss": 0.4677, "step": 57170 }, { "epoch": 14.153465346534654, "grad_norm": 0.4751587510108948, "learning_rate": 5.7986351675943775e-05, "loss": 0.4662, "step": 57180 }, { "epoch": 14.155940594059405, "grad_norm": 0.4367099106311798, "learning_rate": 5.797274936217004e-05, "loss": 0.4708, "step": 57190 }, { "epoch": 14.158415841584159, "grad_norm": 0.48863688111305237, "learning_rate": 5.795914644291861e-05, "loss": 0.4636, "step": 57200 }, { "epoch": 14.160891089108912, "grad_norm": 0.46340152621269226, "learning_rate": 5.794554291922254e-05, "loss": 0.4652, "step": 57210 }, { "epoch": 14.163366336633663, "grad_norm": 0.4536518156528473, "learning_rate": 5.7931938792114926e-05, "loss": 0.4653, "step": 57220 }, { "epoch": 14.165841584158416, "grad_norm": 0.48466023802757263, "learning_rate": 5.79183340626289e-05, "loss": 0.465, "step": 57230 }, { "epoch": 14.168316831683168, "grad_norm": 0.45981860160827637, "learning_rate": 5.7904728731797664e-05, "loss": 0.4663, "step": 57240 }, { "epoch": 14.17079207920792, "grad_norm": 0.46123558282852173, "learning_rate": 5.789112280065444e-05, "loss": 0.4725, "step": 57250 }, { "epoch": 14.173267326732674, "grad_norm": 0.45585158467292786, "learning_rate": 5.787751627023251e-05, "loss": 0.466, "step": 57260 }, { "epoch": 14.175742574257425, "grad_norm": 0.4572192132472992, "learning_rate": 5.786390914156523e-05, "loss": 0.4711, "step": 57270 }, { "epoch": 14.178217821782178, "grad_norm": 0.4507255256175995, "learning_rate": 5.7850301415685925e-05, "loss": 0.4677, "step": 57280 }, { "epoch": 14.180693069306932, "grad_norm": 0.4604237675666809, "learning_rate": 5.7836693093628056e-05, "loss": 0.4654, "step": 57290 }, { "epoch": 14.183168316831683, "grad_norm": 0.44219741225242615, "learning_rate": 5.782308417642504e-05, "loss": 0.4642, "step": 57300 }, { "epoch": 14.185643564356436, "grad_norm": 0.4519168734550476, "learning_rate": 5.780947466511042e-05, "loss": 0.4638, "step": 57310 }, { "epoch": 14.188118811881187, "grad_norm": 0.4610651731491089, "learning_rate": 5.779586456071774e-05, "loss": 0.467, "step": 57320 }, { "epoch": 14.19059405940594, "grad_norm": 0.465526282787323, "learning_rate": 5.77822538642806e-05, "loss": 0.4689, "step": 57330 }, { "epoch": 14.193069306930694, "grad_norm": 0.43938836455345154, "learning_rate": 5.776864257683262e-05, "loss": 0.4642, "step": 57340 }, { "epoch": 14.195544554455445, "grad_norm": 0.4442647695541382, "learning_rate": 5.7755030699407506e-05, "loss": 0.4743, "step": 57350 }, { "epoch": 14.198019801980198, "grad_norm": 0.4424201250076294, "learning_rate": 5.7741418233039e-05, "loss": 0.4651, "step": 57360 }, { "epoch": 14.200495049504951, "grad_norm": 0.45135337114334106, "learning_rate": 5.7727805178760876e-05, "loss": 0.4689, "step": 57370 }, { "epoch": 14.202970297029703, "grad_norm": 0.4699576199054718, "learning_rate": 5.771419153760693e-05, "loss": 0.4659, "step": 57380 }, { "epoch": 14.205445544554456, "grad_norm": 0.4803159236907959, "learning_rate": 5.770057731061106e-05, "loss": 0.4657, "step": 57390 }, { "epoch": 14.207920792079207, "grad_norm": 0.48474153876304626, "learning_rate": 5.768696249880716e-05, "loss": 0.4703, "step": 57400 }, { "epoch": 14.21039603960396, "grad_norm": 0.4529893696308136, "learning_rate": 5.7673347103229183e-05, "loss": 0.4649, "step": 57410 }, { "epoch": 14.212871287128714, "grad_norm": 0.45413199067115784, "learning_rate": 5.765973112491113e-05, "loss": 0.4672, "step": 57420 }, { "epoch": 14.215346534653465, "grad_norm": 0.43320944905281067, "learning_rate": 5.764611456488705e-05, "loss": 0.4657, "step": 57430 }, { "epoch": 14.217821782178218, "grad_norm": 0.4562806189060211, "learning_rate": 5.763249742419104e-05, "loss": 0.4665, "step": 57440 }, { "epoch": 14.220297029702971, "grad_norm": 0.4559164047241211, "learning_rate": 5.76188797038572e-05, "loss": 0.4649, "step": 57450 }, { "epoch": 14.222772277227723, "grad_norm": 0.45738381147384644, "learning_rate": 5.760526140491974e-05, "loss": 0.4749, "step": 57460 }, { "epoch": 14.225247524752476, "grad_norm": 0.4493579566478729, "learning_rate": 5.759164252841287e-05, "loss": 0.4599, "step": 57470 }, { "epoch": 14.227722772277227, "grad_norm": 0.4505433738231659, "learning_rate": 5.757802307537085e-05, "loss": 0.4682, "step": 57480 }, { "epoch": 14.23019801980198, "grad_norm": 0.46707162261009216, "learning_rate": 5.7564403046827975e-05, "loss": 0.4633, "step": 57490 }, { "epoch": 14.232673267326733, "grad_norm": 0.45050498843193054, "learning_rate": 5.7550782443818615e-05, "loss": 0.4679, "step": 57500 }, { "epoch": 14.235148514851485, "grad_norm": 0.44429537653923035, "learning_rate": 5.753716126737717e-05, "loss": 0.4714, "step": 57510 }, { "epoch": 14.237623762376238, "grad_norm": 0.42655402421951294, "learning_rate": 5.752353951853805e-05, "loss": 0.4654, "step": 57520 }, { "epoch": 14.240099009900991, "grad_norm": 0.48209500312805176, "learning_rate": 5.750991719833577e-05, "loss": 0.4635, "step": 57530 }, { "epoch": 14.242574257425742, "grad_norm": 0.4688476026058197, "learning_rate": 5.749629430780483e-05, "loss": 0.4684, "step": 57540 }, { "epoch": 14.245049504950495, "grad_norm": 0.45579972863197327, "learning_rate": 5.7482670847979824e-05, "loss": 0.469, "step": 57550 }, { "epoch": 14.247524752475247, "grad_norm": 0.43793797492980957, "learning_rate": 5.7469046819895334e-05, "loss": 0.4704, "step": 57560 }, { "epoch": 14.25, "grad_norm": 0.4542066752910614, "learning_rate": 5.745542222458603e-05, "loss": 0.4579, "step": 57570 }, { "epoch": 14.252475247524753, "grad_norm": 0.4497096538543701, "learning_rate": 5.744179706308661e-05, "loss": 0.4612, "step": 57580 }, { "epoch": 14.254950495049505, "grad_norm": 0.4512421488761902, "learning_rate": 5.7428171336431825e-05, "loss": 0.4607, "step": 57590 }, { "epoch": 14.257425742574258, "grad_norm": 0.45070627331733704, "learning_rate": 5.741454504565643e-05, "loss": 0.4635, "step": 57600 }, { "epoch": 14.259900990099009, "grad_norm": 0.45174744725227356, "learning_rate": 5.740091819179528e-05, "loss": 0.4647, "step": 57610 }, { "epoch": 14.262376237623762, "grad_norm": 0.4779855012893677, "learning_rate": 5.738729077588324e-05, "loss": 0.4687, "step": 57620 }, { "epoch": 14.264851485148515, "grad_norm": 0.46965745091438293, "learning_rate": 5.737366279895521e-05, "loss": 0.4704, "step": 57630 }, { "epoch": 14.267326732673267, "grad_norm": 0.476120263338089, "learning_rate": 5.736003426204617e-05, "loss": 0.4715, "step": 57640 }, { "epoch": 14.26980198019802, "grad_norm": 0.4453626275062561, "learning_rate": 5.7346405166191074e-05, "loss": 0.465, "step": 57650 }, { "epoch": 14.272277227722773, "grad_norm": 0.4533250033855438, "learning_rate": 5.733277551242501e-05, "loss": 0.4681, "step": 57660 }, { "epoch": 14.274752475247524, "grad_norm": 0.4325959384441376, "learning_rate": 5.7319145301783014e-05, "loss": 0.467, "step": 57670 }, { "epoch": 14.277227722772277, "grad_norm": 0.44107431173324585, "learning_rate": 5.730551453530025e-05, "loss": 0.4682, "step": 57680 }, { "epoch": 14.27970297029703, "grad_norm": 0.46468302607536316, "learning_rate": 5.7291883214011866e-05, "loss": 0.4689, "step": 57690 }, { "epoch": 14.282178217821782, "grad_norm": 0.4531310200691223, "learning_rate": 5.7278251338953084e-05, "loss": 0.4631, "step": 57700 }, { "epoch": 14.284653465346535, "grad_norm": 0.43585604429244995, "learning_rate": 5.726461891115913e-05, "loss": 0.4732, "step": 57710 }, { "epoch": 14.287128712871286, "grad_norm": 0.4364577531814575, "learning_rate": 5.7250985931665325e-05, "loss": 0.4676, "step": 57720 }, { "epoch": 14.28960396039604, "grad_norm": 0.4695614278316498, "learning_rate": 5.723735240150697e-05, "loss": 0.4705, "step": 57730 }, { "epoch": 14.292079207920793, "grad_norm": 0.4515240788459778, "learning_rate": 5.722371832171948e-05, "loss": 0.4616, "step": 57740 }, { "epoch": 14.294554455445544, "grad_norm": 0.46347635984420776, "learning_rate": 5.721008369333826e-05, "loss": 0.4633, "step": 57750 }, { "epoch": 14.297029702970297, "grad_norm": 0.44717079401016235, "learning_rate": 5.719644851739876e-05, "loss": 0.4634, "step": 57760 }, { "epoch": 14.299504950495049, "grad_norm": 0.43067625164985657, "learning_rate": 5.718281279493648e-05, "loss": 0.4712, "step": 57770 }, { "epoch": 14.301980198019802, "grad_norm": 0.4657990634441376, "learning_rate": 5.7169176526986965e-05, "loss": 0.4638, "step": 57780 }, { "epoch": 14.304455445544555, "grad_norm": 0.4398679733276367, "learning_rate": 5.715553971458584e-05, "loss": 0.4703, "step": 57790 }, { "epoch": 14.306930693069306, "grad_norm": 0.44869932532310486, "learning_rate": 5.714190235876866e-05, "loss": 0.4691, "step": 57800 }, { "epoch": 14.30940594059406, "grad_norm": 0.4439176023006439, "learning_rate": 5.7128264460571144e-05, "loss": 0.4671, "step": 57810 }, { "epoch": 14.311881188118813, "grad_norm": 0.45798277854919434, "learning_rate": 5.7114626021028984e-05, "loss": 0.4681, "step": 57820 }, { "epoch": 14.314356435643564, "grad_norm": 0.47368887066841125, "learning_rate": 5.7100987041177936e-05, "loss": 0.4698, "step": 57830 }, { "epoch": 14.316831683168317, "grad_norm": 0.460286945104599, "learning_rate": 5.708734752205377e-05, "loss": 0.4621, "step": 57840 }, { "epoch": 14.319306930693068, "grad_norm": 0.5000451803207397, "learning_rate": 5.707370746469234e-05, "loss": 0.4626, "step": 57850 }, { "epoch": 14.321782178217822, "grad_norm": 0.42757102847099304, "learning_rate": 5.70600668701295e-05, "loss": 0.4671, "step": 57860 }, { "epoch": 14.324257425742575, "grad_norm": 0.44506505131721497, "learning_rate": 5.704642573940119e-05, "loss": 0.4669, "step": 57870 }, { "epoch": 14.326732673267326, "grad_norm": 0.4472278952598572, "learning_rate": 5.703278407354334e-05, "loss": 0.4662, "step": 57880 }, { "epoch": 14.32920792079208, "grad_norm": 0.4560011923313141, "learning_rate": 5.701914187359194e-05, "loss": 0.4635, "step": 57890 }, { "epoch": 14.331683168316832, "grad_norm": 0.4288666248321533, "learning_rate": 5.700549914058305e-05, "loss": 0.4689, "step": 57900 }, { "epoch": 14.334158415841584, "grad_norm": 0.44089481234550476, "learning_rate": 5.699185587555273e-05, "loss": 0.4679, "step": 57910 }, { "epoch": 14.336633663366337, "grad_norm": 0.4552619457244873, "learning_rate": 5.697821207953711e-05, "loss": 0.4675, "step": 57920 }, { "epoch": 14.339108910891088, "grad_norm": 0.42920851707458496, "learning_rate": 5.696456775357233e-05, "loss": 0.4697, "step": 57930 }, { "epoch": 14.341584158415841, "grad_norm": 0.42724481225013733, "learning_rate": 5.6950922898694595e-05, "loss": 0.4605, "step": 57940 }, { "epoch": 14.344059405940595, "grad_norm": 0.44983163475990295, "learning_rate": 5.693727751594011e-05, "loss": 0.4682, "step": 57950 }, { "epoch": 14.346534653465346, "grad_norm": 0.4316558241844177, "learning_rate": 5.692363160634522e-05, "loss": 0.4692, "step": 57960 }, { "epoch": 14.349009900990099, "grad_norm": 0.42644235491752625, "learning_rate": 5.6909985170946165e-05, "loss": 0.4688, "step": 57970 }, { "epoch": 14.351485148514852, "grad_norm": 0.4485628306865692, "learning_rate": 5.689633821077937e-05, "loss": 0.4623, "step": 57980 }, { "epoch": 14.353960396039604, "grad_norm": 0.45895594358444214, "learning_rate": 5.688269072688117e-05, "loss": 0.4706, "step": 57990 }, { "epoch": 14.356435643564357, "grad_norm": 0.4307098984718323, "learning_rate": 5.686904272028805e-05, "loss": 0.4646, "step": 58000 }, { "epoch": 14.358910891089108, "grad_norm": 0.4452184736728668, "learning_rate": 5.685539419203646e-05, "loss": 0.4636, "step": 58010 }, { "epoch": 14.361386138613861, "grad_norm": 0.4338264763355255, "learning_rate": 5.6841745143162925e-05, "loss": 0.4626, "step": 58020 }, { "epoch": 14.363861386138614, "grad_norm": 0.45166245102882385, "learning_rate": 5.6828095574704e-05, "loss": 0.4655, "step": 58030 }, { "epoch": 14.366336633663366, "grad_norm": 0.42310845851898193, "learning_rate": 5.681444548769627e-05, "loss": 0.4667, "step": 58040 }, { "epoch": 14.368811881188119, "grad_norm": 0.4442639946937561, "learning_rate": 5.6800794883176375e-05, "loss": 0.4622, "step": 58050 }, { "epoch": 14.371287128712872, "grad_norm": 0.4130527377128601, "learning_rate": 5.6787143762180996e-05, "loss": 0.4642, "step": 58060 }, { "epoch": 14.373762376237623, "grad_norm": 0.4487362802028656, "learning_rate": 5.677349212574684e-05, "loss": 0.4655, "step": 58070 }, { "epoch": 14.376237623762377, "grad_norm": 0.43822094798088074, "learning_rate": 5.675983997491067e-05, "loss": 0.4617, "step": 58080 }, { "epoch": 14.378712871287128, "grad_norm": 0.43323948979377747, "learning_rate": 5.6746187310709256e-05, "loss": 0.4672, "step": 58090 }, { "epoch": 14.381188118811881, "grad_norm": 0.4664696753025055, "learning_rate": 5.6732534134179417e-05, "loss": 0.4675, "step": 58100 }, { "epoch": 14.383663366336634, "grad_norm": 0.4389886260032654, "learning_rate": 5.671888044635807e-05, "loss": 0.4664, "step": 58110 }, { "epoch": 14.386138613861386, "grad_norm": 0.4447878897190094, "learning_rate": 5.670522624828207e-05, "loss": 0.4634, "step": 58120 }, { "epoch": 14.388613861386139, "grad_norm": 0.47157713770866394, "learning_rate": 5.669157154098842e-05, "loss": 0.4736, "step": 58130 }, { "epoch": 14.391089108910892, "grad_norm": 0.4516012966632843, "learning_rate": 5.6677916325514035e-05, "loss": 0.4637, "step": 58140 }, { "epoch": 14.393564356435643, "grad_norm": 0.4138747453689575, "learning_rate": 5.6664260602896005e-05, "loss": 0.4713, "step": 58150 }, { "epoch": 14.396039603960396, "grad_norm": 0.456405371427536, "learning_rate": 5.665060437417134e-05, "loss": 0.4628, "step": 58160 }, { "epoch": 14.398514851485148, "grad_norm": 0.46736598014831543, "learning_rate": 5.663694764037718e-05, "loss": 0.4701, "step": 58170 }, { "epoch": 14.400990099009901, "grad_norm": 0.46073687076568604, "learning_rate": 5.6623290402550636e-05, "loss": 0.4664, "step": 58180 }, { "epoch": 14.403465346534654, "grad_norm": 0.4257688820362091, "learning_rate": 5.660963266172889e-05, "loss": 0.465, "step": 58190 }, { "epoch": 14.405940594059405, "grad_norm": 0.4336605370044708, "learning_rate": 5.6595974418949184e-05, "loss": 0.4695, "step": 58200 }, { "epoch": 14.408415841584159, "grad_norm": 0.4688839614391327, "learning_rate": 5.658231567524873e-05, "loss": 0.4717, "step": 58210 }, { "epoch": 14.410891089108912, "grad_norm": 0.4745732545852661, "learning_rate": 5.656865643166485e-05, "loss": 0.4656, "step": 58220 }, { "epoch": 14.413366336633663, "grad_norm": 0.4323647618293762, "learning_rate": 5.655499668923486e-05, "loss": 0.4747, "step": 58230 }, { "epoch": 14.415841584158416, "grad_norm": 0.4596117436885834, "learning_rate": 5.654133644899614e-05, "loss": 0.465, "step": 58240 }, { "epoch": 14.418316831683168, "grad_norm": 0.4351334869861603, "learning_rate": 5.652767571198607e-05, "loss": 0.4634, "step": 58250 }, { "epoch": 14.42079207920792, "grad_norm": 0.4431022107601166, "learning_rate": 5.651401447924213e-05, "loss": 0.4656, "step": 58260 }, { "epoch": 14.423267326732674, "grad_norm": 0.47213131189346313, "learning_rate": 5.650035275180175e-05, "loss": 0.4676, "step": 58270 }, { "epoch": 14.425742574257425, "grad_norm": 0.45941153168678284, "learning_rate": 5.6486690530702505e-05, "loss": 0.4665, "step": 58280 }, { "epoch": 14.428217821782178, "grad_norm": 0.4493877589702606, "learning_rate": 5.6473027816981906e-05, "loss": 0.4659, "step": 58290 }, { "epoch": 14.430693069306932, "grad_norm": 0.45807453989982605, "learning_rate": 5.645936461167757e-05, "loss": 0.4659, "step": 58300 }, { "epoch": 14.433168316831683, "grad_norm": 0.44461098313331604, "learning_rate": 5.6445700915827114e-05, "loss": 0.4735, "step": 58310 }, { "epoch": 14.435643564356436, "grad_norm": 0.4433344304561615, "learning_rate": 5.643203673046822e-05, "loss": 0.4646, "step": 58320 }, { "epoch": 14.438118811881187, "grad_norm": 0.41633081436157227, "learning_rate": 5.641837205663858e-05, "loss": 0.4637, "step": 58330 }, { "epoch": 14.44059405940594, "grad_norm": 0.4645678997039795, "learning_rate": 5.6404706895375925e-05, "loss": 0.4613, "step": 58340 }, { "epoch": 14.443069306930694, "grad_norm": 0.44903847575187683, "learning_rate": 5.639104124771808e-05, "loss": 0.4649, "step": 58350 }, { "epoch": 14.445544554455445, "grad_norm": 0.4426872134208679, "learning_rate": 5.63773751147028e-05, "loss": 0.4655, "step": 58360 }, { "epoch": 14.448019801980198, "grad_norm": 0.4450054168701172, "learning_rate": 5.6363708497367974e-05, "loss": 0.4701, "step": 58370 }, { "epoch": 14.450495049504951, "grad_norm": 0.42910975217819214, "learning_rate": 5.63500413967515e-05, "loss": 0.4626, "step": 58380 }, { "epoch": 14.452970297029703, "grad_norm": 0.4448002874851227, "learning_rate": 5.6336373813891277e-05, "loss": 0.4685, "step": 58390 }, { "epoch": 14.455445544554456, "grad_norm": 0.444141149520874, "learning_rate": 5.632270574982528e-05, "loss": 0.4676, "step": 58400 }, { "epoch": 14.457920792079207, "grad_norm": 0.4403518736362457, "learning_rate": 5.6309037205591505e-05, "loss": 0.4682, "step": 58410 }, { "epoch": 14.46039603960396, "grad_norm": 0.4649355709552765, "learning_rate": 5.629536818222798e-05, "loss": 0.4679, "step": 58420 }, { "epoch": 14.462871287128714, "grad_norm": 0.44711625576019287, "learning_rate": 5.628169868077281e-05, "loss": 0.4668, "step": 58430 }, { "epoch": 14.465346534653465, "grad_norm": 0.4379241168498993, "learning_rate": 5.626802870226407e-05, "loss": 0.4625, "step": 58440 }, { "epoch": 14.467821782178218, "grad_norm": 0.48730000853538513, "learning_rate": 5.625435824773991e-05, "loss": 0.47, "step": 58450 }, { "epoch": 14.47029702970297, "grad_norm": 0.47484806180000305, "learning_rate": 5.624068731823853e-05, "loss": 0.4663, "step": 58460 }, { "epoch": 14.472772277227723, "grad_norm": 0.4556747376918793, "learning_rate": 5.622701591479812e-05, "loss": 0.4682, "step": 58470 }, { "epoch": 14.475247524752476, "grad_norm": 0.4534676969051361, "learning_rate": 5.621334403845696e-05, "loss": 0.4617, "step": 58480 }, { "epoch": 14.477722772277227, "grad_norm": 0.4508077800273895, "learning_rate": 5.6199671690253306e-05, "loss": 0.4721, "step": 58490 }, { "epoch": 14.48019801980198, "grad_norm": 0.43248018622398376, "learning_rate": 5.618599887122552e-05, "loss": 0.4698, "step": 58500 }, { "epoch": 14.482673267326733, "grad_norm": 0.4381757080554962, "learning_rate": 5.617232558241193e-05, "loss": 0.4676, "step": 58510 }, { "epoch": 14.485148514851485, "grad_norm": 0.4251505434513092, "learning_rate": 5.615865182485095e-05, "loss": 0.4695, "step": 58520 }, { "epoch": 14.487623762376238, "grad_norm": 0.4763842821121216, "learning_rate": 5.6144977599581016e-05, "loss": 0.4691, "step": 58530 }, { "epoch": 14.490099009900991, "grad_norm": 0.44949498772621155, "learning_rate": 5.613130290764058e-05, "loss": 0.4661, "step": 58540 }, { "epoch": 14.492574257425742, "grad_norm": 0.4660920798778534, "learning_rate": 5.6117627750068146e-05, "loss": 0.4675, "step": 58550 }, { "epoch": 14.495049504950495, "grad_norm": 0.42120710015296936, "learning_rate": 5.610395212790227e-05, "loss": 0.4647, "step": 58560 }, { "epoch": 14.497524752475247, "grad_norm": 0.44432592391967773, "learning_rate": 5.609027604218149e-05, "loss": 0.4688, "step": 58570 }, { "epoch": 14.5, "grad_norm": 0.4187135398387909, "learning_rate": 5.607659949394446e-05, "loss": 0.4679, "step": 58580 }, { "epoch": 14.502475247524753, "grad_norm": 0.43744024634361267, "learning_rate": 5.6062922484229784e-05, "loss": 0.4662, "step": 58590 }, { "epoch": 14.504950495049505, "grad_norm": 0.43643712997436523, "learning_rate": 5.604924501407617e-05, "loss": 0.4598, "step": 58600 }, { "epoch": 14.507425742574258, "grad_norm": 0.44809436798095703, "learning_rate": 5.603556708452231e-05, "loss": 0.4661, "step": 58610 }, { "epoch": 14.509900990099009, "grad_norm": 0.43289610743522644, "learning_rate": 5.602188869660697e-05, "loss": 0.4606, "step": 58620 }, { "epoch": 14.512376237623762, "grad_norm": 0.43297502398490906, "learning_rate": 5.600820985136892e-05, "loss": 0.4668, "step": 58630 }, { "epoch": 14.514851485148515, "grad_norm": 0.44622308015823364, "learning_rate": 5.599453054984698e-05, "loss": 0.47, "step": 58640 }, { "epoch": 14.517326732673267, "grad_norm": 0.46226510405540466, "learning_rate": 5.598085079308002e-05, "loss": 0.464, "step": 58650 }, { "epoch": 14.51980198019802, "grad_norm": 0.43905526399612427, "learning_rate": 5.5967170582106886e-05, "loss": 0.4654, "step": 58660 }, { "epoch": 14.522277227722773, "grad_norm": 0.46524158120155334, "learning_rate": 5.595348991796655e-05, "loss": 0.4717, "step": 58670 }, { "epoch": 14.524752475247524, "grad_norm": 0.4827989637851715, "learning_rate": 5.5939808801697926e-05, "loss": 0.4665, "step": 58680 }, { "epoch": 14.527227722772277, "grad_norm": 0.4822310209274292, "learning_rate": 5.5926127234340034e-05, "loss": 0.4667, "step": 58690 }, { "epoch": 14.52970297029703, "grad_norm": 0.4768467843532562, "learning_rate": 5.591244521693189e-05, "loss": 0.465, "step": 58700 }, { "epoch": 14.532178217821782, "grad_norm": 0.47861507534980774, "learning_rate": 5.589876275051254e-05, "loss": 0.4714, "step": 58710 }, { "epoch": 14.534653465346535, "grad_norm": 0.45432740449905396, "learning_rate": 5.588507983612109e-05, "loss": 0.4662, "step": 58720 }, { "epoch": 14.537128712871286, "grad_norm": 0.4553093910217285, "learning_rate": 5.5871396474796666e-05, "loss": 0.4725, "step": 58730 }, { "epoch": 14.53960396039604, "grad_norm": 0.4383953809738159, "learning_rate": 5.5857712667578424e-05, "loss": 0.47, "step": 58740 }, { "epoch": 14.542079207920793, "grad_norm": 0.42381203174591064, "learning_rate": 5.584402841550557e-05, "loss": 0.4703, "step": 58750 }, { "epoch": 14.544554455445544, "grad_norm": 0.4438800811767578, "learning_rate": 5.583034371961733e-05, "loss": 0.4642, "step": 58760 }, { "epoch": 14.547029702970297, "grad_norm": 0.4306689500808716, "learning_rate": 5.581665858095295e-05, "loss": 0.462, "step": 58770 }, { "epoch": 14.549504950495049, "grad_norm": 0.45132115483283997, "learning_rate": 5.580297300055174e-05, "loss": 0.4641, "step": 58780 }, { "epoch": 14.551980198019802, "grad_norm": 0.4328160285949707, "learning_rate": 5.5789286979453016e-05, "loss": 0.4673, "step": 58790 }, { "epoch": 14.554455445544555, "grad_norm": 0.45303189754486084, "learning_rate": 5.577560051869618e-05, "loss": 0.4653, "step": 58800 }, { "epoch": 14.556930693069306, "grad_norm": 0.42323341965675354, "learning_rate": 5.5761913619320575e-05, "loss": 0.465, "step": 58810 }, { "epoch": 14.55940594059406, "grad_norm": 0.43427756428718567, "learning_rate": 5.574822628236568e-05, "loss": 0.4648, "step": 58820 }, { "epoch": 14.561881188118813, "grad_norm": 0.4369516372680664, "learning_rate": 5.57345385088709e-05, "loss": 0.4652, "step": 58830 }, { "epoch": 14.564356435643564, "grad_norm": 0.4115322530269623, "learning_rate": 5.572085029987579e-05, "loss": 0.4665, "step": 58840 }, { "epoch": 14.566831683168317, "grad_norm": 0.44158411026000977, "learning_rate": 5.5707161656419845e-05, "loss": 0.4668, "step": 58850 }, { "epoch": 14.569306930693068, "grad_norm": 0.4159918427467346, "learning_rate": 5.569347257954265e-05, "loss": 0.4658, "step": 58860 }, { "epoch": 14.571782178217822, "grad_norm": 0.47180548310279846, "learning_rate": 5.5679783070283774e-05, "loss": 0.4688, "step": 58870 }, { "epoch": 14.574257425742575, "grad_norm": 0.4466851055622101, "learning_rate": 5.566609312968286e-05, "loss": 0.4678, "step": 58880 }, { "epoch": 14.576732673267326, "grad_norm": 0.4366268217563629, "learning_rate": 5.5652402758779556e-05, "loss": 0.4607, "step": 58890 }, { "epoch": 14.57920792079208, "grad_norm": 0.4192240536212921, "learning_rate": 5.5638711958613576e-05, "loss": 0.4672, "step": 58900 }, { "epoch": 14.581683168316832, "grad_norm": 0.4195210039615631, "learning_rate": 5.562502073022463e-05, "loss": 0.466, "step": 58910 }, { "epoch": 14.584158415841584, "grad_norm": 0.4310092628002167, "learning_rate": 5.561132907465248e-05, "loss": 0.4646, "step": 58920 }, { "epoch": 14.586633663366337, "grad_norm": 0.44845709204673767, "learning_rate": 5.559763699293693e-05, "loss": 0.4732, "step": 58930 }, { "epoch": 14.589108910891088, "grad_norm": 0.44719836115837097, "learning_rate": 5.558394448611777e-05, "loss": 0.463, "step": 58940 }, { "epoch": 14.591584158415841, "grad_norm": 0.44917717576026917, "learning_rate": 5.5570251555234906e-05, "loss": 0.4635, "step": 58950 }, { "epoch": 14.594059405940595, "grad_norm": 0.42836621403694153, "learning_rate": 5.5556558201328166e-05, "loss": 0.4657, "step": 58960 }, { "epoch": 14.596534653465346, "grad_norm": 0.43703895807266235, "learning_rate": 5.554286442543753e-05, "loss": 0.4692, "step": 58970 }, { "epoch": 14.599009900990099, "grad_norm": 0.43309491872787476, "learning_rate": 5.55291702286029e-05, "loss": 0.4633, "step": 58980 }, { "epoch": 14.601485148514852, "grad_norm": 0.43054142594337463, "learning_rate": 5.5515475611864284e-05, "loss": 0.4732, "step": 58990 }, { "epoch": 14.603960396039604, "grad_norm": 0.44230446219444275, "learning_rate": 5.550178057626171e-05, "loss": 0.4677, "step": 59000 }, { "epoch": 14.606435643564357, "grad_norm": 0.4771728813648224, "learning_rate": 5.54880851228352e-05, "loss": 0.4686, "step": 59010 }, { "epoch": 14.608910891089108, "grad_norm": 0.46205446124076843, "learning_rate": 5.5474389252624834e-05, "loss": 0.4715, "step": 59020 }, { "epoch": 14.611386138613861, "grad_norm": 0.45838621258735657, "learning_rate": 5.546069296667075e-05, "loss": 0.4651, "step": 59030 }, { "epoch": 14.613861386138614, "grad_norm": 0.4734512269496918, "learning_rate": 5.544699626601305e-05, "loss": 0.4654, "step": 59040 }, { "epoch": 14.616336633663366, "grad_norm": 0.4280191659927368, "learning_rate": 5.543329915169194e-05, "loss": 0.4675, "step": 59050 }, { "epoch": 14.618811881188119, "grad_norm": 0.43648555874824524, "learning_rate": 5.541960162474762e-05, "loss": 0.4654, "step": 59060 }, { "epoch": 14.621287128712872, "grad_norm": 0.4723166525363922, "learning_rate": 5.540590368622032e-05, "loss": 0.465, "step": 59070 }, { "epoch": 14.623762376237623, "grad_norm": 0.4444965720176697, "learning_rate": 5.539220533715031e-05, "loss": 0.4673, "step": 59080 }, { "epoch": 14.626237623762377, "grad_norm": 0.4220719635486603, "learning_rate": 5.537850657857789e-05, "loss": 0.4713, "step": 59090 }, { "epoch": 14.628712871287128, "grad_norm": 0.42394599318504333, "learning_rate": 5.536480741154339e-05, "loss": 0.4661, "step": 59100 }, { "epoch": 14.631188118811881, "grad_norm": 0.45004531741142273, "learning_rate": 5.5351107837087166e-05, "loss": 0.4601, "step": 59110 }, { "epoch": 14.633663366336634, "grad_norm": 0.41515347361564636, "learning_rate": 5.5337407856249644e-05, "loss": 0.4643, "step": 59120 }, { "epoch": 14.636138613861386, "grad_norm": 0.4331709146499634, "learning_rate": 5.5323707470071186e-05, "loss": 0.4708, "step": 59130 }, { "epoch": 14.638613861386139, "grad_norm": 0.4348033368587494, "learning_rate": 5.531000667959231e-05, "loss": 0.4652, "step": 59140 }, { "epoch": 14.641089108910892, "grad_norm": 0.44060638546943665, "learning_rate": 5.529630548585344e-05, "loss": 0.4645, "step": 59150 }, { "epoch": 14.643564356435643, "grad_norm": 0.42972609400749207, "learning_rate": 5.528260388989513e-05, "loss": 0.463, "step": 59160 }, { "epoch": 14.646039603960396, "grad_norm": 0.4627300500869751, "learning_rate": 5.526890189275793e-05, "loss": 0.4652, "step": 59170 }, { "epoch": 14.648514851485148, "grad_norm": 0.4212910234928131, "learning_rate": 5.525519949548239e-05, "loss": 0.4709, "step": 59180 }, { "epoch": 14.650990099009901, "grad_norm": 0.4608825743198395, "learning_rate": 5.524149669910913e-05, "loss": 0.4617, "step": 59190 }, { "epoch": 14.653465346534654, "grad_norm": 0.4744596481323242, "learning_rate": 5.5227793504678785e-05, "loss": 0.4644, "step": 59200 }, { "epoch": 14.655940594059405, "grad_norm": 0.43246138095855713, "learning_rate": 5.521408991323203e-05, "loss": 0.4684, "step": 59210 }, { "epoch": 14.658415841584159, "grad_norm": 0.4400622546672821, "learning_rate": 5.520038592580955e-05, "loss": 0.4663, "step": 59220 }, { "epoch": 14.660891089108912, "grad_norm": 0.43526560068130493, "learning_rate": 5.5186681543452076e-05, "loss": 0.4635, "step": 59230 }, { "epoch": 14.663366336633663, "grad_norm": 0.4462193548679352, "learning_rate": 5.517297676720038e-05, "loss": 0.4649, "step": 59240 }, { "epoch": 14.665841584158416, "grad_norm": 0.4670877754688263, "learning_rate": 5.515927159809524e-05, "loss": 0.4652, "step": 59250 }, { "epoch": 14.668316831683168, "grad_norm": 0.49223965406417847, "learning_rate": 5.5145566037177445e-05, "loss": 0.4664, "step": 59260 }, { "epoch": 14.67079207920792, "grad_norm": 0.44909656047821045, "learning_rate": 5.5131860085487895e-05, "loss": 0.4617, "step": 59270 }, { "epoch": 14.673267326732674, "grad_norm": 0.41665440797805786, "learning_rate": 5.511815374406743e-05, "loss": 0.4668, "step": 59280 }, { "epoch": 14.675742574257425, "grad_norm": 0.4278295338153839, "learning_rate": 5.510444701395697e-05, "loss": 0.4669, "step": 59290 }, { "epoch": 14.678217821782178, "grad_norm": 0.42172375321388245, "learning_rate": 5.509073989619743e-05, "loss": 0.4672, "step": 59300 }, { "epoch": 14.680693069306932, "grad_norm": 0.4336238205432892, "learning_rate": 5.50770323918298e-05, "loss": 0.4672, "step": 59310 }, { "epoch": 14.683168316831683, "grad_norm": 0.4278545081615448, "learning_rate": 5.5063324501895066e-05, "loss": 0.4681, "step": 59320 }, { "epoch": 14.685643564356436, "grad_norm": 0.4486401081085205, "learning_rate": 5.5049616227434255e-05, "loss": 0.4622, "step": 59330 }, { "epoch": 14.688118811881187, "grad_norm": 0.44434669613838196, "learning_rate": 5.503590756948841e-05, "loss": 0.4646, "step": 59340 }, { "epoch": 14.69059405940594, "grad_norm": 0.42804157733917236, "learning_rate": 5.50221985290986e-05, "loss": 0.4642, "step": 59350 }, { "epoch": 14.693069306930694, "grad_norm": 0.41085872054100037, "learning_rate": 5.500848910730598e-05, "loss": 0.4673, "step": 59360 }, { "epoch": 14.695544554455445, "grad_norm": 0.42803120613098145, "learning_rate": 5.499477930515167e-05, "loss": 0.466, "step": 59370 }, { "epoch": 14.698019801980198, "grad_norm": 0.4211589992046356, "learning_rate": 5.498106912367682e-05, "loss": 0.4625, "step": 59380 }, { "epoch": 14.700495049504951, "grad_norm": 0.4394269287586212, "learning_rate": 5.496735856392266e-05, "loss": 0.4619, "step": 59390 }, { "epoch": 14.702970297029703, "grad_norm": 0.4272172152996063, "learning_rate": 5.495364762693038e-05, "loss": 0.4659, "step": 59400 }, { "epoch": 14.705445544554456, "grad_norm": 0.41623687744140625, "learning_rate": 5.4939936313741245e-05, "loss": 0.4655, "step": 59410 }, { "epoch": 14.707920792079207, "grad_norm": 0.4408927857875824, "learning_rate": 5.492622462539658e-05, "loss": 0.4608, "step": 59420 }, { "epoch": 14.71039603960396, "grad_norm": 0.4508751630783081, "learning_rate": 5.4912512562937635e-05, "loss": 0.4684, "step": 59430 }, { "epoch": 14.712871287128714, "grad_norm": 0.4689216911792755, "learning_rate": 5.48988001274058e-05, "loss": 0.4638, "step": 59440 }, { "epoch": 14.715346534653465, "grad_norm": 0.47953397035598755, "learning_rate": 5.488508731984241e-05, "loss": 0.4695, "step": 59450 }, { "epoch": 14.717821782178218, "grad_norm": 0.4259132444858551, "learning_rate": 5.487137414128889e-05, "loss": 0.4612, "step": 59460 }, { "epoch": 14.72029702970297, "grad_norm": 0.41609689593315125, "learning_rate": 5.4857660592786656e-05, "loss": 0.4653, "step": 59470 }, { "epoch": 14.722772277227723, "grad_norm": 0.42620670795440674, "learning_rate": 5.4843946675377144e-05, "loss": 0.464, "step": 59480 }, { "epoch": 14.725247524752476, "grad_norm": 0.4116571247577667, "learning_rate": 5.4830232390101854e-05, "loss": 0.4679, "step": 59490 }, { "epoch": 14.727722772277227, "grad_norm": 0.4466497302055359, "learning_rate": 5.481651773800228e-05, "loss": 0.4667, "step": 59500 }, { "epoch": 14.73019801980198, "grad_norm": 0.44624748826026917, "learning_rate": 5.480280272012e-05, "loss": 0.471, "step": 59510 }, { "epoch": 14.732673267326733, "grad_norm": 0.43142303824424744, "learning_rate": 5.4789087337496515e-05, "loss": 0.4656, "step": 59520 }, { "epoch": 14.735148514851485, "grad_norm": 0.4185386896133423, "learning_rate": 5.477537159117348e-05, "loss": 0.467, "step": 59530 }, { "epoch": 14.737623762376238, "grad_norm": 0.43063756823539734, "learning_rate": 5.476165548219246e-05, "loss": 0.4658, "step": 59540 }, { "epoch": 14.740099009900991, "grad_norm": 0.4183155596256256, "learning_rate": 5.4747939011595154e-05, "loss": 0.4638, "step": 59550 }, { "epoch": 14.742574257425742, "grad_norm": 0.432296484708786, "learning_rate": 5.47342221804232e-05, "loss": 0.4636, "step": 59560 }, { "epoch": 14.745049504950495, "grad_norm": 0.43242138624191284, "learning_rate": 5.472050498971833e-05, "loss": 0.4678, "step": 59570 }, { "epoch": 14.747524752475247, "grad_norm": 0.44761404395103455, "learning_rate": 5.470678744052223e-05, "loss": 0.4719, "step": 59580 }, { "epoch": 14.75, "grad_norm": 0.44681495428085327, "learning_rate": 5.4693069533876717e-05, "loss": 0.4646, "step": 59590 }, { "epoch": 14.752475247524753, "grad_norm": 0.4613163471221924, "learning_rate": 5.467935127082352e-05, "loss": 0.4653, "step": 59600 }, { "epoch": 14.754950495049505, "grad_norm": 0.4257645308971405, "learning_rate": 5.466563265240448e-05, "loss": 0.4664, "step": 59610 }, { "epoch": 14.757425742574258, "grad_norm": 0.41859012842178345, "learning_rate": 5.4651913679661446e-05, "loss": 0.4599, "step": 59620 }, { "epoch": 14.759900990099009, "grad_norm": 0.44250261783599854, "learning_rate": 5.463819435363626e-05, "loss": 0.4622, "step": 59630 }, { "epoch": 14.762376237623762, "grad_norm": 0.412847638130188, "learning_rate": 5.462447467537083e-05, "loss": 0.4696, "step": 59640 }, { "epoch": 14.764851485148515, "grad_norm": 0.4258665144443512, "learning_rate": 5.461075464590705e-05, "loss": 0.4637, "step": 59650 }, { "epoch": 14.767326732673267, "grad_norm": 0.4321511387825012, "learning_rate": 5.4597034266286896e-05, "loss": 0.4703, "step": 59660 }, { "epoch": 14.76980198019802, "grad_norm": 0.42865893244743347, "learning_rate": 5.4583313537552315e-05, "loss": 0.4653, "step": 59670 }, { "epoch": 14.772277227722773, "grad_norm": 0.41646909713745117, "learning_rate": 5.4569592460745325e-05, "loss": 0.4665, "step": 59680 }, { "epoch": 14.774752475247524, "grad_norm": 0.4304431676864624, "learning_rate": 5.455587103690795e-05, "loss": 0.4629, "step": 59690 }, { "epoch": 14.777227722772277, "grad_norm": 0.43845605850219727, "learning_rate": 5.454214926708223e-05, "loss": 0.4685, "step": 59700 }, { "epoch": 14.77970297029703, "grad_norm": 0.4395665228366852, "learning_rate": 5.4528427152310244e-05, "loss": 0.4686, "step": 59710 }, { "epoch": 14.782178217821782, "grad_norm": 0.4373319745063782, "learning_rate": 5.4514704693634114e-05, "loss": 0.4658, "step": 59720 }, { "epoch": 14.784653465346535, "grad_norm": 0.45163387060165405, "learning_rate": 5.4500981892095934e-05, "loss": 0.4646, "step": 59730 }, { "epoch": 14.787128712871286, "grad_norm": 0.4014970362186432, "learning_rate": 5.448725874873791e-05, "loss": 0.4662, "step": 59740 }, { "epoch": 14.78960396039604, "grad_norm": 0.4319639503955841, "learning_rate": 5.4473535264602185e-05, "loss": 0.4649, "step": 59750 }, { "epoch": 14.792079207920793, "grad_norm": 0.42415761947631836, "learning_rate": 5.445981144073098e-05, "loss": 0.4638, "step": 59760 }, { "epoch": 14.794554455445544, "grad_norm": 0.4222150444984436, "learning_rate": 5.4446087278166526e-05, "loss": 0.4703, "step": 59770 }, { "epoch": 14.797029702970297, "grad_norm": 0.44088295102119446, "learning_rate": 5.4432362777951095e-05, "loss": 0.4606, "step": 59780 }, { "epoch": 14.799504950495049, "grad_norm": 0.43010714650154114, "learning_rate": 5.4418637941126946e-05, "loss": 0.4625, "step": 59790 }, { "epoch": 14.801980198019802, "grad_norm": 0.418795645236969, "learning_rate": 5.4404912768736404e-05, "loss": 0.4665, "step": 59800 }, { "epoch": 14.804455445544555, "grad_norm": 0.44333159923553467, "learning_rate": 5.439118726182184e-05, "loss": 0.471, "step": 59810 }, { "epoch": 14.806930693069306, "grad_norm": 0.4381115138530731, "learning_rate": 5.437746142142555e-05, "loss": 0.4671, "step": 59820 }, { "epoch": 14.80940594059406, "grad_norm": 0.4277040362358093, "learning_rate": 5.436373524858997e-05, "loss": 0.4594, "step": 59830 }, { "epoch": 14.811881188118813, "grad_norm": 0.42326614260673523, "learning_rate": 5.4350008744357485e-05, "loss": 0.4646, "step": 59840 }, { "epoch": 14.814356435643564, "grad_norm": 0.4246821403503418, "learning_rate": 5.433628190977055e-05, "loss": 0.4716, "step": 59850 }, { "epoch": 14.816831683168317, "grad_norm": 0.43500471115112305, "learning_rate": 5.432255474587162e-05, "loss": 0.4635, "step": 59860 }, { "epoch": 14.819306930693068, "grad_norm": 0.4498732089996338, "learning_rate": 5.4308827253703184e-05, "loss": 0.4709, "step": 59870 }, { "epoch": 14.821782178217822, "grad_norm": 0.4367036521434784, "learning_rate": 5.429509943430775e-05, "loss": 0.4634, "step": 59880 }, { "epoch": 14.824257425742575, "grad_norm": 0.4366390109062195, "learning_rate": 5.428137128872787e-05, "loss": 0.4682, "step": 59890 }, { "epoch": 14.826732673267326, "grad_norm": 0.42235687375068665, "learning_rate": 5.4267642818006074e-05, "loss": 0.4574, "step": 59900 }, { "epoch": 14.82920792079208, "grad_norm": 0.4423868656158447, "learning_rate": 5.425391402318498e-05, "loss": 0.4635, "step": 59910 }, { "epoch": 14.831683168316832, "grad_norm": 0.4093429148197174, "learning_rate": 5.42401849053072e-05, "loss": 0.4582, "step": 59920 }, { "epoch": 14.834158415841584, "grad_norm": 0.4517213702201843, "learning_rate": 5.422645546541535e-05, "loss": 0.4647, "step": 59930 }, { "epoch": 14.836633663366337, "grad_norm": 0.40761807560920715, "learning_rate": 5.4212725704552104e-05, "loss": 0.4601, "step": 59940 }, { "epoch": 14.839108910891088, "grad_norm": 0.46008890867233276, "learning_rate": 5.419899562376013e-05, "loss": 0.4679, "step": 59950 }, { "epoch": 14.841584158415841, "grad_norm": 0.4325108826160431, "learning_rate": 5.418526522408217e-05, "loss": 0.4623, "step": 59960 }, { "epoch": 14.844059405940595, "grad_norm": 0.43396276235580444, "learning_rate": 5.417153450656092e-05, "loss": 0.4651, "step": 59970 }, { "epoch": 14.846534653465346, "grad_norm": 0.4341215193271637, "learning_rate": 5.4157803472239164e-05, "loss": 0.4681, "step": 59980 }, { "epoch": 14.849009900990099, "grad_norm": 0.43436652421951294, "learning_rate": 5.414407212215966e-05, "loss": 0.472, "step": 59990 }, { "epoch": 14.851485148514852, "grad_norm": 0.4134324789047241, "learning_rate": 5.4130340457365233e-05, "loss": 0.4645, "step": 60000 }, { "epoch": 14.853960396039604, "grad_norm": 0.42642590403556824, "learning_rate": 5.411660847889871e-05, "loss": 0.4628, "step": 60010 }, { "epoch": 14.856435643564357, "grad_norm": 0.4344082176685333, "learning_rate": 5.410287618780294e-05, "loss": 0.467, "step": 60020 }, { "epoch": 14.858910891089108, "grad_norm": 0.43609002232551575, "learning_rate": 5.408914358512081e-05, "loss": 0.4683, "step": 60030 }, { "epoch": 14.861386138613861, "grad_norm": 0.4331074059009552, "learning_rate": 5.40754106718952e-05, "loss": 0.4663, "step": 60040 }, { "epoch": 14.863861386138614, "grad_norm": 0.42441853880882263, "learning_rate": 5.406167744916904e-05, "loss": 0.4674, "step": 60050 }, { "epoch": 14.866336633663366, "grad_norm": 0.45026034116744995, "learning_rate": 5.404794391798528e-05, "loss": 0.4713, "step": 60060 }, { "epoch": 14.868811881188119, "grad_norm": 0.4104576110839844, "learning_rate": 5.40342100793869e-05, "loss": 0.4631, "step": 60070 }, { "epoch": 14.871287128712872, "grad_norm": 0.4360179603099823, "learning_rate": 5.402047593441689e-05, "loss": 0.4693, "step": 60080 }, { "epoch": 14.873762376237623, "grad_norm": 0.45726221799850464, "learning_rate": 5.4006741484118264e-05, "loss": 0.4663, "step": 60090 }, { "epoch": 14.876237623762377, "grad_norm": 0.41974231600761414, "learning_rate": 5.3993006729534054e-05, "loss": 0.4654, "step": 60100 }, { "epoch": 14.878712871287128, "grad_norm": 0.41761600971221924, "learning_rate": 5.397927167170735e-05, "loss": 0.4644, "step": 60110 }, { "epoch": 14.881188118811881, "grad_norm": 0.4323881268501282, "learning_rate": 5.396553631168122e-05, "loss": 0.4718, "step": 60120 }, { "epoch": 14.883663366336634, "grad_norm": 0.4274595379829407, "learning_rate": 5.395180065049879e-05, "loss": 0.4681, "step": 60130 }, { "epoch": 14.886138613861386, "grad_norm": 0.4420296847820282, "learning_rate": 5.393806468920315e-05, "loss": 0.4586, "step": 60140 }, { "epoch": 14.888613861386139, "grad_norm": 0.44229230284690857, "learning_rate": 5.39243284288375e-05, "loss": 0.4631, "step": 60150 }, { "epoch": 14.891089108910892, "grad_norm": 0.44786337018013, "learning_rate": 5.391059187044501e-05, "loss": 0.4688, "step": 60160 }, { "epoch": 14.893564356435643, "grad_norm": 0.404521644115448, "learning_rate": 5.389685501506887e-05, "loss": 0.4656, "step": 60170 }, { "epoch": 14.896039603960396, "grad_norm": 0.42261335253715515, "learning_rate": 5.388311786375232e-05, "loss": 0.4652, "step": 60180 }, { "epoch": 14.898514851485148, "grad_norm": 0.4223041534423828, "learning_rate": 5.3869380417538584e-05, "loss": 0.4634, "step": 60190 }, { "epoch": 14.900990099009901, "grad_norm": 0.4198046624660492, "learning_rate": 5.385564267747093e-05, "loss": 0.4674, "step": 60200 }, { "epoch": 14.903465346534654, "grad_norm": 0.42142751812934875, "learning_rate": 5.3841904644592664e-05, "loss": 0.4568, "step": 60210 }, { "epoch": 14.905940594059405, "grad_norm": 0.4578849673271179, "learning_rate": 5.3828166319947113e-05, "loss": 0.4614, "step": 60220 }, { "epoch": 14.908415841584159, "grad_norm": 0.4431530833244324, "learning_rate": 5.381442770457759e-05, "loss": 0.4575, "step": 60230 }, { "epoch": 14.910891089108912, "grad_norm": 0.46166762709617615, "learning_rate": 5.3800688799527445e-05, "loss": 0.4636, "step": 60240 }, { "epoch": 14.913366336633663, "grad_norm": 0.4612281918525696, "learning_rate": 5.3786949605840064e-05, "loss": 0.4624, "step": 60250 }, { "epoch": 14.915841584158416, "grad_norm": 0.45534151792526245, "learning_rate": 5.377321012455887e-05, "loss": 0.4686, "step": 60260 }, { "epoch": 14.918316831683168, "grad_norm": 0.43000465631484985, "learning_rate": 5.375947035672724e-05, "loss": 0.4651, "step": 60270 }, { "epoch": 14.92079207920792, "grad_norm": 0.42153531312942505, "learning_rate": 5.3745730303388674e-05, "loss": 0.4687, "step": 60280 }, { "epoch": 14.923267326732674, "grad_norm": 0.45484665036201477, "learning_rate": 5.3731989965586596e-05, "loss": 0.4615, "step": 60290 }, { "epoch": 14.925742574257425, "grad_norm": 0.41243717074394226, "learning_rate": 5.371824934436452e-05, "loss": 0.4698, "step": 60300 }, { "epoch": 14.928217821782178, "grad_norm": 0.428608238697052, "learning_rate": 5.370450844076593e-05, "loss": 0.4633, "step": 60310 }, { "epoch": 14.930693069306932, "grad_norm": 0.42500948905944824, "learning_rate": 5.369076725583438e-05, "loss": 0.4651, "step": 60320 }, { "epoch": 14.933168316831683, "grad_norm": 0.4466593563556671, "learning_rate": 5.367702579061342e-05, "loss": 0.4681, "step": 60330 }, { "epoch": 14.935643564356436, "grad_norm": 0.4194672405719757, "learning_rate": 5.366328404614662e-05, "loss": 0.4609, "step": 60340 }, { "epoch": 14.938118811881187, "grad_norm": 0.43284061551094055, "learning_rate": 5.364954202347756e-05, "loss": 0.4643, "step": 60350 }, { "epoch": 14.94059405940594, "grad_norm": 0.44580596685409546, "learning_rate": 5.363579972364987e-05, "loss": 0.4666, "step": 60360 }, { "epoch": 14.943069306930694, "grad_norm": 0.4458353817462921, "learning_rate": 5.362205714770718e-05, "loss": 0.4656, "step": 60370 }, { "epoch": 14.945544554455445, "grad_norm": 0.4201640188694, "learning_rate": 5.360831429669316e-05, "loss": 0.4674, "step": 60380 }, { "epoch": 14.948019801980198, "grad_norm": 0.4234355092048645, "learning_rate": 5.359457117165149e-05, "loss": 0.4701, "step": 60390 }, { "epoch": 14.950495049504951, "grad_norm": 0.430165559053421, "learning_rate": 5.3580827773625854e-05, "loss": 0.4633, "step": 60400 }, { "epoch": 14.952970297029703, "grad_norm": 0.4237815737724304, "learning_rate": 5.356708410365998e-05, "loss": 0.4704, "step": 60410 }, { "epoch": 14.955445544554456, "grad_norm": 0.40903809666633606, "learning_rate": 5.35533401627976e-05, "loss": 0.4667, "step": 60420 }, { "epoch": 14.957920792079207, "grad_norm": 0.4568192958831787, "learning_rate": 5.353959595208251e-05, "loss": 0.4683, "step": 60430 }, { "epoch": 14.96039603960396, "grad_norm": 0.45594099164009094, "learning_rate": 5.352585147255844e-05, "loss": 0.4616, "step": 60440 }, { "epoch": 14.962871287128714, "grad_norm": 0.45777571201324463, "learning_rate": 5.351210672526924e-05, "loss": 0.4656, "step": 60450 }, { "epoch": 14.965346534653465, "grad_norm": 0.40915733575820923, "learning_rate": 5.349836171125869e-05, "loss": 0.4744, "step": 60460 }, { "epoch": 14.967821782178218, "grad_norm": 0.43969711661338806, "learning_rate": 5.3484616431570676e-05, "loss": 0.4663, "step": 60470 }, { "epoch": 14.97029702970297, "grad_norm": 0.43060818314552307, "learning_rate": 5.347087088724902e-05, "loss": 0.467, "step": 60480 }, { "epoch": 14.972772277227723, "grad_norm": 0.4333745837211609, "learning_rate": 5.345712507933764e-05, "loss": 0.4623, "step": 60490 }, { "epoch": 14.975247524752476, "grad_norm": 0.4216449558734894, "learning_rate": 5.3443379008880436e-05, "loss": 0.4627, "step": 60500 }, { "epoch": 14.977722772277227, "grad_norm": 0.44756194949150085, "learning_rate": 5.3429632676921296e-05, "loss": 0.4705, "step": 60510 }, { "epoch": 14.98019801980198, "grad_norm": 0.4394982159137726, "learning_rate": 5.3415886084504194e-05, "loss": 0.4718, "step": 60520 }, { "epoch": 14.982673267326733, "grad_norm": 0.4224836528301239, "learning_rate": 5.34021392326731e-05, "loss": 0.4699, "step": 60530 }, { "epoch": 14.985148514851485, "grad_norm": 0.4499940872192383, "learning_rate": 5.3388392122471976e-05, "loss": 0.4698, "step": 60540 }, { "epoch": 14.987623762376238, "grad_norm": 0.41553524136543274, "learning_rate": 5.3374644754944836e-05, "loss": 0.4642, "step": 60550 }, { "epoch": 14.990099009900991, "grad_norm": 0.4380952715873718, "learning_rate": 5.336089713113569e-05, "loss": 0.4664, "step": 60560 }, { "epoch": 14.992574257425742, "grad_norm": 0.4419282376766205, "learning_rate": 5.334714925208858e-05, "loss": 0.4648, "step": 60570 }, { "epoch": 14.995049504950495, "grad_norm": 0.4440552592277527, "learning_rate": 5.3333401118847595e-05, "loss": 0.4699, "step": 60580 }, { "epoch": 14.997524752475247, "grad_norm": 0.45296865701675415, "learning_rate": 5.331965273245677e-05, "loss": 0.4693, "step": 60590 }, { "epoch": 15.0, "grad_norm": 0.43074023723602295, "learning_rate": 5.330590409396026e-05, "loss": 0.4655, "step": 60600 }, { "epoch": 15.002475247524753, "grad_norm": 0.4217928946018219, "learning_rate": 5.3292155204402116e-05, "loss": 0.4623, "step": 60610 }, { "epoch": 15.004950495049505, "grad_norm": 0.4266216456890106, "learning_rate": 5.3278406064826526e-05, "loss": 0.4678, "step": 60620 }, { "epoch": 15.007425742574258, "grad_norm": 0.43646377325057983, "learning_rate": 5.3264656676277635e-05, "loss": 0.4633, "step": 60630 }, { "epoch": 15.009900990099009, "grad_norm": 0.42281386256217957, "learning_rate": 5.32509070397996e-05, "loss": 0.4665, "step": 60640 }, { "epoch": 15.012376237623762, "grad_norm": 0.43095332384109497, "learning_rate": 5.3237157156436646e-05, "loss": 0.4586, "step": 60650 }, { "epoch": 15.014851485148515, "grad_norm": 0.45917561650276184, "learning_rate": 5.322340702723295e-05, "loss": 0.4625, "step": 60660 }, { "epoch": 15.017326732673267, "grad_norm": 0.4344160854816437, "learning_rate": 5.3209656653232784e-05, "loss": 0.4633, "step": 60670 }, { "epoch": 15.01980198019802, "grad_norm": 0.44564348459243774, "learning_rate": 5.319590603548037e-05, "loss": 0.4617, "step": 60680 }, { "epoch": 15.022277227722773, "grad_norm": 0.4396517276763916, "learning_rate": 5.3182155175019976e-05, "loss": 0.4667, "step": 60690 }, { "epoch": 15.024752475247524, "grad_norm": 0.4368729591369629, "learning_rate": 5.316840407289591e-05, "loss": 0.4681, "step": 60700 }, { "epoch": 15.027227722772277, "grad_norm": 0.4204849600791931, "learning_rate": 5.315465273015245e-05, "loss": 0.4705, "step": 60710 }, { "epoch": 15.029702970297029, "grad_norm": 0.434926837682724, "learning_rate": 5.314090114783394e-05, "loss": 0.4616, "step": 60720 }, { "epoch": 15.032178217821782, "grad_norm": 0.41326600313186646, "learning_rate": 5.3127149326984725e-05, "loss": 0.4591, "step": 60730 }, { "epoch": 15.034653465346535, "grad_norm": 0.44197553396224976, "learning_rate": 5.311339726864915e-05, "loss": 0.4623, "step": 60740 }, { "epoch": 15.037128712871286, "grad_norm": 0.4363371431827545, "learning_rate": 5.30996449738716e-05, "loss": 0.469, "step": 60750 }, { "epoch": 15.03960396039604, "grad_norm": 0.42576515674591064, "learning_rate": 5.308589244369646e-05, "loss": 0.463, "step": 60760 }, { "epoch": 15.042079207920793, "grad_norm": 0.43761736154556274, "learning_rate": 5.3072139679168164e-05, "loss": 0.4681, "step": 60770 }, { "epoch": 15.044554455445544, "grad_norm": 0.43552374839782715, "learning_rate": 5.3058386681331126e-05, "loss": 0.4688, "step": 60780 }, { "epoch": 15.047029702970297, "grad_norm": 0.4168100357055664, "learning_rate": 5.3044633451229807e-05, "loss": 0.4649, "step": 60790 }, { "epoch": 15.049504950495049, "grad_norm": 0.43861687183380127, "learning_rate": 5.303087998990868e-05, "loss": 0.4699, "step": 60800 }, { "epoch": 15.051980198019802, "grad_norm": 0.42367351055145264, "learning_rate": 5.30171262984122e-05, "loss": 0.4684, "step": 60810 }, { "epoch": 15.054455445544555, "grad_norm": 0.43279123306274414, "learning_rate": 5.300337237778491e-05, "loss": 0.4606, "step": 60820 }, { "epoch": 15.056930693069306, "grad_norm": 0.45012637972831726, "learning_rate": 5.298961822907129e-05, "loss": 0.4677, "step": 60830 }, { "epoch": 15.05940594059406, "grad_norm": 0.41835132241249084, "learning_rate": 5.297586385331591e-05, "loss": 0.4659, "step": 60840 }, { "epoch": 15.061881188118813, "grad_norm": 0.44451117515563965, "learning_rate": 5.296210925156331e-05, "loss": 0.4702, "step": 60850 }, { "epoch": 15.064356435643564, "grad_norm": 0.417241632938385, "learning_rate": 5.294835442485805e-05, "loss": 0.4611, "step": 60860 }, { "epoch": 15.066831683168317, "grad_norm": 0.44532865285873413, "learning_rate": 5.293459937424473e-05, "loss": 0.4704, "step": 60870 }, { "epoch": 15.069306930693068, "grad_norm": 0.46430057287216187, "learning_rate": 5.292084410076796e-05, "loss": 0.4635, "step": 60880 }, { "epoch": 15.071782178217822, "grad_norm": 0.45223405957221985, "learning_rate": 5.2907088605472345e-05, "loss": 0.4647, "step": 60890 }, { "epoch": 15.074257425742575, "grad_norm": 0.4402162432670593, "learning_rate": 5.2893332889402557e-05, "loss": 0.4633, "step": 60900 }, { "epoch": 15.076732673267326, "grad_norm": 0.43994131684303284, "learning_rate": 5.2879576953603206e-05, "loss": 0.4704, "step": 60910 }, { "epoch": 15.07920792079208, "grad_norm": 0.42091262340545654, "learning_rate": 5.2865820799119e-05, "loss": 0.4593, "step": 60920 }, { "epoch": 15.081683168316832, "grad_norm": 0.39041775465011597, "learning_rate": 5.285206442699462e-05, "loss": 0.4614, "step": 60930 }, { "epoch": 15.084158415841584, "grad_norm": 0.4217328131198883, "learning_rate": 5.2838307838274767e-05, "loss": 0.4642, "step": 60940 }, { "epoch": 15.086633663366337, "grad_norm": 0.4186389148235321, "learning_rate": 5.2824551034004166e-05, "loss": 0.4648, "step": 60950 }, { "epoch": 15.089108910891088, "grad_norm": 0.44742947816848755, "learning_rate": 5.281079401522754e-05, "loss": 0.4641, "step": 60960 }, { "epoch": 15.091584158415841, "grad_norm": 0.41158026456832886, "learning_rate": 5.279703678298968e-05, "loss": 0.464, "step": 60970 }, { "epoch": 15.094059405940595, "grad_norm": 0.4300667345523834, "learning_rate": 5.2783279338335326e-05, "loss": 0.462, "step": 60980 }, { "epoch": 15.096534653465346, "grad_norm": 0.44059109687805176, "learning_rate": 5.276952168230927e-05, "loss": 0.4664, "step": 60990 }, { "epoch": 15.099009900990099, "grad_norm": 0.4539371728897095, "learning_rate": 5.2755763815956324e-05, "loss": 0.4663, "step": 61000 }, { "epoch": 15.101485148514852, "grad_norm": 0.4297918975353241, "learning_rate": 5.274200574032131e-05, "loss": 0.468, "step": 61010 }, { "epoch": 15.103960396039604, "grad_norm": 0.4276983439922333, "learning_rate": 5.272824745644904e-05, "loss": 0.4619, "step": 61020 }, { "epoch": 15.106435643564357, "grad_norm": 0.4569064676761627, "learning_rate": 5.271448896538439e-05, "loss": 0.465, "step": 61030 }, { "epoch": 15.108910891089108, "grad_norm": 0.4407162070274353, "learning_rate": 5.270073026817221e-05, "loss": 0.4662, "step": 61040 }, { "epoch": 15.111386138613861, "grad_norm": 0.42028388381004333, "learning_rate": 5.268697136585741e-05, "loss": 0.4677, "step": 61050 }, { "epoch": 15.113861386138614, "grad_norm": 0.413433700799942, "learning_rate": 5.267321225948484e-05, "loss": 0.4642, "step": 61060 }, { "epoch": 15.116336633663366, "grad_norm": 0.4254726469516754, "learning_rate": 5.265945295009945e-05, "loss": 0.4683, "step": 61070 }, { "epoch": 15.118811881188119, "grad_norm": 0.4156513214111328, "learning_rate": 5.264569343874617e-05, "loss": 0.4674, "step": 61080 }, { "epoch": 15.121287128712872, "grad_norm": 0.4472174346446991, "learning_rate": 5.263193372646992e-05, "loss": 0.4662, "step": 61090 }, { "epoch": 15.123762376237623, "grad_norm": 0.49070391058921814, "learning_rate": 5.2618173814315686e-05, "loss": 0.4634, "step": 61100 }, { "epoch": 15.126237623762377, "grad_norm": 0.4399355947971344, "learning_rate": 5.26044137033284e-05, "loss": 0.4684, "step": 61110 }, { "epoch": 15.128712871287128, "grad_norm": 0.45934346318244934, "learning_rate": 5.2590653394553127e-05, "loss": 0.4631, "step": 61120 }, { "epoch": 15.131188118811881, "grad_norm": 0.4370071589946747, "learning_rate": 5.257689288903479e-05, "loss": 0.4639, "step": 61130 }, { "epoch": 15.133663366336634, "grad_norm": 0.4084262549877167, "learning_rate": 5.256313218781846e-05, "loss": 0.4596, "step": 61140 }, { "epoch": 15.136138613861386, "grad_norm": 0.4424203634262085, "learning_rate": 5.2549371291949146e-05, "loss": 0.4606, "step": 61150 }, { "epoch": 15.138613861386139, "grad_norm": 0.419411838054657, "learning_rate": 5.253561020247191e-05, "loss": 0.4639, "step": 61160 }, { "epoch": 15.141089108910892, "grad_norm": 0.4443139135837555, "learning_rate": 5.2521848920431815e-05, "loss": 0.4577, "step": 61170 }, { "epoch": 15.143564356435643, "grad_norm": 0.4251137971878052, "learning_rate": 5.2508087446873947e-05, "loss": 0.4634, "step": 61180 }, { "epoch": 15.146039603960396, "grad_norm": 0.44249427318573, "learning_rate": 5.2494325782843366e-05, "loss": 0.4623, "step": 61190 }, { "epoch": 15.148514851485148, "grad_norm": 0.454041451215744, "learning_rate": 5.2480563929385216e-05, "loss": 0.4626, "step": 61200 }, { "epoch": 15.150990099009901, "grad_norm": 0.43805694580078125, "learning_rate": 5.24668018875446e-05, "loss": 0.4647, "step": 61210 }, { "epoch": 15.153465346534654, "grad_norm": 0.4314875304698944, "learning_rate": 5.2453039658366666e-05, "loss": 0.4654, "step": 61220 }, { "epoch": 15.155940594059405, "grad_norm": 0.4202844798564911, "learning_rate": 5.243927724289656e-05, "loss": 0.4685, "step": 61230 }, { "epoch": 15.158415841584159, "grad_norm": 0.44710367918014526, "learning_rate": 5.242551464217944e-05, "loss": 0.4655, "step": 61240 }, { "epoch": 15.160891089108912, "grad_norm": 0.44451791048049927, "learning_rate": 5.24117518572605e-05, "loss": 0.4627, "step": 61250 }, { "epoch": 15.163366336633663, "grad_norm": 0.49076414108276367, "learning_rate": 5.23979888891849e-05, "loss": 0.4607, "step": 61260 }, { "epoch": 15.165841584158416, "grad_norm": 0.4199361801147461, "learning_rate": 5.238422573899791e-05, "loss": 0.4705, "step": 61270 }, { "epoch": 15.168316831683168, "grad_norm": 0.4634381830692291, "learning_rate": 5.237046240774467e-05, "loss": 0.4702, "step": 61280 }, { "epoch": 15.17079207920792, "grad_norm": 0.4210541248321533, "learning_rate": 5.235669889647048e-05, "loss": 0.4672, "step": 61290 }, { "epoch": 15.173267326732674, "grad_norm": 0.4418328106403351, "learning_rate": 5.234293520622053e-05, "loss": 0.4602, "step": 61300 }, { "epoch": 15.175742574257425, "grad_norm": 0.39919814467430115, "learning_rate": 5.232917133804014e-05, "loss": 0.4639, "step": 61310 }, { "epoch": 15.178217821782178, "grad_norm": 0.42153117060661316, "learning_rate": 5.231540729297455e-05, "loss": 0.4655, "step": 61320 }, { "epoch": 15.180693069306932, "grad_norm": 0.4155046045780182, "learning_rate": 5.2301643072069065e-05, "loss": 0.4623, "step": 61330 }, { "epoch": 15.183168316831683, "grad_norm": 0.4123366177082062, "learning_rate": 5.228787867636897e-05, "loss": 0.4675, "step": 61340 }, { "epoch": 15.185643564356436, "grad_norm": 0.4177636206150055, "learning_rate": 5.227411410691958e-05, "loss": 0.4625, "step": 61350 }, { "epoch": 15.188118811881187, "grad_norm": 0.44712916016578674, "learning_rate": 5.226034936476625e-05, "loss": 0.4599, "step": 61360 }, { "epoch": 15.19059405940594, "grad_norm": 0.4094912111759186, "learning_rate": 5.224658445095428e-05, "loss": 0.4651, "step": 61370 }, { "epoch": 15.193069306930694, "grad_norm": 0.4257690906524658, "learning_rate": 5.223281936652907e-05, "loss": 0.4638, "step": 61380 }, { "epoch": 15.195544554455445, "grad_norm": 0.40778642892837524, "learning_rate": 5.221905411253596e-05, "loss": 0.4703, "step": 61390 }, { "epoch": 15.198019801980198, "grad_norm": 0.4255067706108093, "learning_rate": 5.220528869002033e-05, "loss": 0.4662, "step": 61400 }, { "epoch": 15.200495049504951, "grad_norm": 0.43451300263404846, "learning_rate": 5.2191523100027564e-05, "loss": 0.4626, "step": 61410 }, { "epoch": 15.202970297029703, "grad_norm": 0.4354059398174286, "learning_rate": 5.2177757343603106e-05, "loss": 0.4643, "step": 61420 }, { "epoch": 15.205445544554456, "grad_norm": 0.4132711887359619, "learning_rate": 5.216399142179233e-05, "loss": 0.4653, "step": 61430 }, { "epoch": 15.207920792079207, "grad_norm": 0.42594480514526367, "learning_rate": 5.2150225335640704e-05, "loss": 0.4721, "step": 61440 }, { "epoch": 15.21039603960396, "grad_norm": 0.4246843457221985, "learning_rate": 5.213645908619363e-05, "loss": 0.4607, "step": 61450 }, { "epoch": 15.212871287128714, "grad_norm": 0.4584197402000427, "learning_rate": 5.212269267449661e-05, "loss": 0.4669, "step": 61460 }, { "epoch": 15.215346534653465, "grad_norm": 0.4394252896308899, "learning_rate": 5.210892610159508e-05, "loss": 0.4722, "step": 61470 }, { "epoch": 15.217821782178218, "grad_norm": 0.4478314220905304, "learning_rate": 5.209515936853454e-05, "loss": 0.4613, "step": 61480 }, { "epoch": 15.220297029702971, "grad_norm": 0.4181347191333771, "learning_rate": 5.208139247636047e-05, "loss": 0.465, "step": 61490 }, { "epoch": 15.222772277227723, "grad_norm": 0.4492931663990021, "learning_rate": 5.206762542611836e-05, "loss": 0.4659, "step": 61500 }, { "epoch": 15.225247524752476, "grad_norm": 0.4171941876411438, "learning_rate": 5.205385821885377e-05, "loss": 0.4647, "step": 61510 }, { "epoch": 15.227722772277227, "grad_norm": 0.4302346408367157, "learning_rate": 5.204009085561218e-05, "loss": 0.4662, "step": 61520 }, { "epoch": 15.23019801980198, "grad_norm": 0.4361395537853241, "learning_rate": 5.2026323337439166e-05, "loss": 0.4643, "step": 61530 }, { "epoch": 15.232673267326733, "grad_norm": 0.42957088351249695, "learning_rate": 5.201255566538026e-05, "loss": 0.4544, "step": 61540 }, { "epoch": 15.235148514851485, "grad_norm": 0.4118240475654602, "learning_rate": 5.199878784048103e-05, "loss": 0.4649, "step": 61550 }, { "epoch": 15.237623762376238, "grad_norm": 0.4460446834564209, "learning_rate": 5.1985019863787066e-05, "loss": 0.4637, "step": 61560 }, { "epoch": 15.240099009900991, "grad_norm": 0.4639696776866913, "learning_rate": 5.197125173634394e-05, "loss": 0.467, "step": 61570 }, { "epoch": 15.242574257425742, "grad_norm": 0.4139614403247833, "learning_rate": 5.195748345919724e-05, "loss": 0.457, "step": 61580 }, { "epoch": 15.245049504950495, "grad_norm": 0.40380388498306274, "learning_rate": 5.194371503339262e-05, "loss": 0.4676, "step": 61590 }, { "epoch": 15.247524752475247, "grad_norm": 0.4022931456565857, "learning_rate": 5.192994645997564e-05, "loss": 0.4656, "step": 61600 }, { "epoch": 15.25, "grad_norm": 0.41067612171173096, "learning_rate": 5.191617773999198e-05, "loss": 0.467, "step": 61610 }, { "epoch": 15.252475247524753, "grad_norm": 0.41698122024536133, "learning_rate": 5.1902408874487265e-05, "loss": 0.4698, "step": 61620 }, { "epoch": 15.254950495049505, "grad_norm": 0.41228729486465454, "learning_rate": 5.188863986450715e-05, "loss": 0.4634, "step": 61630 }, { "epoch": 15.257425742574258, "grad_norm": 0.4331871271133423, "learning_rate": 5.187487071109729e-05, "loss": 0.4624, "step": 61640 }, { "epoch": 15.259900990099009, "grad_norm": 0.4109867215156555, "learning_rate": 5.1861101415303385e-05, "loss": 0.4637, "step": 61650 }, { "epoch": 15.262376237623762, "grad_norm": 0.42469239234924316, "learning_rate": 5.184733197817111e-05, "loss": 0.4628, "step": 61660 }, { "epoch": 15.264851485148515, "grad_norm": 0.43536943197250366, "learning_rate": 5.183356240074615e-05, "loss": 0.4658, "step": 61670 }, { "epoch": 15.267326732673267, "grad_norm": 0.4103938937187195, "learning_rate": 5.181979268407423e-05, "loss": 0.464, "step": 61680 }, { "epoch": 15.26980198019802, "grad_norm": 0.4277317523956299, "learning_rate": 5.180602282920107e-05, "loss": 0.4691, "step": 61690 }, { "epoch": 15.272277227722773, "grad_norm": 0.4398157298564911, "learning_rate": 5.17922528371724e-05, "loss": 0.4592, "step": 61700 }, { "epoch": 15.274752475247524, "grad_norm": 0.4117205739021301, "learning_rate": 5.177848270903395e-05, "loss": 0.467, "step": 61710 }, { "epoch": 15.277227722772277, "grad_norm": 0.44378066062927246, "learning_rate": 5.176471244583148e-05, "loss": 0.4639, "step": 61720 }, { "epoch": 15.27970297029703, "grad_norm": 0.42009660601615906, "learning_rate": 5.175094204861073e-05, "loss": 0.4621, "step": 61730 }, { "epoch": 15.282178217821782, "grad_norm": 0.435194194316864, "learning_rate": 5.173717151841751e-05, "loss": 0.4651, "step": 61740 }, { "epoch": 15.284653465346535, "grad_norm": 0.42959219217300415, "learning_rate": 5.172340085629756e-05, "loss": 0.4625, "step": 61750 }, { "epoch": 15.287128712871286, "grad_norm": 0.4403650164604187, "learning_rate": 5.170963006329669e-05, "loss": 0.4665, "step": 61760 }, { "epoch": 15.28960396039604, "grad_norm": 0.43920043110847473, "learning_rate": 5.16958591404607e-05, "loss": 0.4658, "step": 61770 }, { "epoch": 15.292079207920793, "grad_norm": 0.42927441000938416, "learning_rate": 5.1682088088835404e-05, "loss": 0.4642, "step": 61780 }, { "epoch": 15.294554455445544, "grad_norm": 0.4194762706756592, "learning_rate": 5.166831690946662e-05, "loss": 0.4641, "step": 61790 }, { "epoch": 15.297029702970297, "grad_norm": 0.4260956645011902, "learning_rate": 5.1654545603400164e-05, "loss": 0.4658, "step": 61800 }, { "epoch": 15.299504950495049, "grad_norm": 0.45099568367004395, "learning_rate": 5.164077417168191e-05, "loss": 0.464, "step": 61810 }, { "epoch": 15.301980198019802, "grad_norm": 0.43808332085609436, "learning_rate": 5.162700261535766e-05, "loss": 0.4666, "step": 61820 }, { "epoch": 15.304455445544555, "grad_norm": 0.4294263422489166, "learning_rate": 5.161323093547331e-05, "loss": 0.4687, "step": 61830 }, { "epoch": 15.306930693069306, "grad_norm": 0.4482755959033966, "learning_rate": 5.1599459133074725e-05, "loss": 0.4674, "step": 61840 }, { "epoch": 15.30940594059406, "grad_norm": 0.43859678506851196, "learning_rate": 5.158568720920777e-05, "loss": 0.4672, "step": 61850 }, { "epoch": 15.311881188118813, "grad_norm": 0.41076499223709106, "learning_rate": 5.157191516491834e-05, "loss": 0.4643, "step": 61860 }, { "epoch": 15.314356435643564, "grad_norm": 0.43906694650650024, "learning_rate": 5.1558143001252316e-05, "loss": 0.4584, "step": 61870 }, { "epoch": 15.316831683168317, "grad_norm": 0.4502849578857422, "learning_rate": 5.154437071925562e-05, "loss": 0.4645, "step": 61880 }, { "epoch": 15.319306930693068, "grad_norm": 0.4377893805503845, "learning_rate": 5.153059831997417e-05, "loss": 0.4656, "step": 61890 }, { "epoch": 15.321782178217822, "grad_norm": 0.42566749453544617, "learning_rate": 5.151682580445385e-05, "loss": 0.4645, "step": 61900 }, { "epoch": 15.324257425742575, "grad_norm": 0.4183887243270874, "learning_rate": 5.150305317374064e-05, "loss": 0.465, "step": 61910 }, { "epoch": 15.326732673267326, "grad_norm": 0.41337156295776367, "learning_rate": 5.148928042888046e-05, "loss": 0.4654, "step": 61920 }, { "epoch": 15.32920792079208, "grad_norm": 0.42572665214538574, "learning_rate": 5.147550757091926e-05, "loss": 0.4622, "step": 61930 }, { "epoch": 15.331683168316832, "grad_norm": 0.4297121465206146, "learning_rate": 5.1461734600903e-05, "loss": 0.4624, "step": 61940 }, { "epoch": 15.334158415841584, "grad_norm": 0.44403064250946045, "learning_rate": 5.144796151987763e-05, "loss": 0.4633, "step": 61950 }, { "epoch": 15.336633663366337, "grad_norm": 0.4282536804676056, "learning_rate": 5.1434188328889155e-05, "loss": 0.4609, "step": 61960 }, { "epoch": 15.339108910891088, "grad_norm": 0.41948598623275757, "learning_rate": 5.1420415028983524e-05, "loss": 0.4644, "step": 61970 }, { "epoch": 15.341584158415841, "grad_norm": 0.45002681016921997, "learning_rate": 5.1406641621206776e-05, "loss": 0.4603, "step": 61980 }, { "epoch": 15.344059405940595, "grad_norm": 0.42964357137680054, "learning_rate": 5.139286810660484e-05, "loss": 0.461, "step": 61990 }, { "epoch": 15.346534653465346, "grad_norm": 0.4279215931892395, "learning_rate": 5.137909448622379e-05, "loss": 0.4631, "step": 62000 }, { "epoch": 15.349009900990099, "grad_norm": 0.4075809419155121, "learning_rate": 5.1365320761109616e-05, "loss": 0.4618, "step": 62010 }, { "epoch": 15.351485148514852, "grad_norm": 0.4084044396877289, "learning_rate": 5.1351546932308325e-05, "loss": 0.4633, "step": 62020 }, { "epoch": 15.353960396039604, "grad_norm": 0.43656936287879944, "learning_rate": 5.1337773000865974e-05, "loss": 0.4679, "step": 62030 }, { "epoch": 15.356435643564357, "grad_norm": 0.4246675372123718, "learning_rate": 5.1323998967828593e-05, "loss": 0.4599, "step": 62040 }, { "epoch": 15.358910891089108, "grad_norm": 0.42828965187072754, "learning_rate": 5.1310224834242216e-05, "loss": 0.4629, "step": 62050 }, { "epoch": 15.361386138613861, "grad_norm": 0.43893033266067505, "learning_rate": 5.1296450601152926e-05, "loss": 0.4706, "step": 62060 }, { "epoch": 15.363861386138614, "grad_norm": 0.4133976399898529, "learning_rate": 5.1282676269606756e-05, "loss": 0.4695, "step": 62070 }, { "epoch": 15.366336633663366, "grad_norm": 0.4443417191505432, "learning_rate": 5.12689018406498e-05, "loss": 0.4605, "step": 62080 }, { "epoch": 15.368811881188119, "grad_norm": 0.4373583495616913, "learning_rate": 5.125512731532811e-05, "loss": 0.4668, "step": 62090 }, { "epoch": 15.371287128712872, "grad_norm": 0.4783879220485687, "learning_rate": 5.1241352694687786e-05, "loss": 0.4674, "step": 62100 }, { "epoch": 15.373762376237623, "grad_norm": 0.3997972011566162, "learning_rate": 5.122757797977493e-05, "loss": 0.4593, "step": 62110 }, { "epoch": 15.376237623762377, "grad_norm": 0.4600655734539032, "learning_rate": 5.121380317163561e-05, "loss": 0.4617, "step": 62120 }, { "epoch": 15.378712871287128, "grad_norm": 0.47756603360176086, "learning_rate": 5.120002827131597e-05, "loss": 0.466, "step": 62130 }, { "epoch": 15.381188118811881, "grad_norm": 0.4327103793621063, "learning_rate": 5.1186253279862084e-05, "loss": 0.4722, "step": 62140 }, { "epoch": 15.383663366336634, "grad_norm": 0.43807321786880493, "learning_rate": 5.11724781983201e-05, "loss": 0.4621, "step": 62150 }, { "epoch": 15.386138613861386, "grad_norm": 0.4133993685245514, "learning_rate": 5.115870302773613e-05, "loss": 0.4649, "step": 62160 }, { "epoch": 15.388613861386139, "grad_norm": 0.4332495331764221, "learning_rate": 5.114492776915632e-05, "loss": 0.4674, "step": 62170 }, { "epoch": 15.391089108910892, "grad_norm": 0.4301159977912903, "learning_rate": 5.1131152423626796e-05, "loss": 0.4695, "step": 62180 }, { "epoch": 15.393564356435643, "grad_norm": 0.446226567029953, "learning_rate": 5.11173769921937e-05, "loss": 0.4625, "step": 62190 }, { "epoch": 15.396039603960396, "grad_norm": 0.4052012860774994, "learning_rate": 5.11036014759032e-05, "loss": 0.4656, "step": 62200 }, { "epoch": 15.398514851485148, "grad_norm": 0.4472113251686096, "learning_rate": 5.1089825875801454e-05, "loss": 0.4648, "step": 62210 }, { "epoch": 15.400990099009901, "grad_norm": 0.4409220814704895, "learning_rate": 5.107605019293463e-05, "loss": 0.4684, "step": 62220 }, { "epoch": 15.403465346534654, "grad_norm": 0.43154892325401306, "learning_rate": 5.106227442834889e-05, "loss": 0.4644, "step": 62230 }, { "epoch": 15.405940594059405, "grad_norm": 0.416980117559433, "learning_rate": 5.104849858309042e-05, "loss": 0.4652, "step": 62240 }, { "epoch": 15.408415841584159, "grad_norm": 0.41798916459083557, "learning_rate": 5.103472265820539e-05, "loss": 0.4646, "step": 62250 }, { "epoch": 15.410891089108912, "grad_norm": 0.4453297555446625, "learning_rate": 5.102094665474003e-05, "loss": 0.4667, "step": 62260 }, { "epoch": 15.413366336633663, "grad_norm": 0.4417930245399475, "learning_rate": 5.1007170573740484e-05, "loss": 0.473, "step": 62270 }, { "epoch": 15.415841584158416, "grad_norm": 0.44533398747444153, "learning_rate": 5.0993394416253006e-05, "loss": 0.4618, "step": 62280 }, { "epoch": 15.418316831683168, "grad_norm": 0.4695917069911957, "learning_rate": 5.097961818332375e-05, "loss": 0.4636, "step": 62290 }, { "epoch": 15.42079207920792, "grad_norm": 0.4227719008922577, "learning_rate": 5.096584187599898e-05, "loss": 0.4616, "step": 62300 }, { "epoch": 15.423267326732674, "grad_norm": 0.4414965808391571, "learning_rate": 5.095206549532487e-05, "loss": 0.4605, "step": 62310 }, { "epoch": 15.425742574257425, "grad_norm": 0.41025885939598083, "learning_rate": 5.0938289042347686e-05, "loss": 0.4638, "step": 62320 }, { "epoch": 15.428217821782178, "grad_norm": 0.4185539782047272, "learning_rate": 5.092451251811363e-05, "loss": 0.4661, "step": 62330 }, { "epoch": 15.430693069306932, "grad_norm": 0.4353887736797333, "learning_rate": 5.091073592366896e-05, "loss": 0.4633, "step": 62340 }, { "epoch": 15.433168316831683, "grad_norm": 0.4400327205657959, "learning_rate": 5.089695926005991e-05, "loss": 0.4678, "step": 62350 }, { "epoch": 15.435643564356436, "grad_norm": 0.3967083990573883, "learning_rate": 5.0883182528332706e-05, "loss": 0.4626, "step": 62360 }, { "epoch": 15.438118811881187, "grad_norm": 0.3895415663719177, "learning_rate": 5.0869405729533626e-05, "loss": 0.4637, "step": 62370 }, { "epoch": 15.44059405940594, "grad_norm": 0.3912866413593292, "learning_rate": 5.0855628864708924e-05, "loss": 0.4666, "step": 62380 }, { "epoch": 15.443069306930694, "grad_norm": 0.4220433533191681, "learning_rate": 5.084185193490485e-05, "loss": 0.4621, "step": 62390 }, { "epoch": 15.445544554455445, "grad_norm": 0.471871018409729, "learning_rate": 5.082807494116768e-05, "loss": 0.4666, "step": 62400 }, { "epoch": 15.448019801980198, "grad_norm": 0.433987557888031, "learning_rate": 5.081429788454369e-05, "loss": 0.4614, "step": 62410 }, { "epoch": 15.450495049504951, "grad_norm": 0.4313748776912689, "learning_rate": 5.080052076607913e-05, "loss": 0.465, "step": 62420 }, { "epoch": 15.452970297029703, "grad_norm": 0.4222522974014282, "learning_rate": 5.078674358682033e-05, "loss": 0.4614, "step": 62430 }, { "epoch": 15.455445544554456, "grad_norm": 0.43467026948928833, "learning_rate": 5.0772966347813524e-05, "loss": 0.4585, "step": 62440 }, { "epoch": 15.457920792079207, "grad_norm": 0.4218433201313019, "learning_rate": 5.075918905010504e-05, "loss": 0.4632, "step": 62450 }, { "epoch": 15.46039603960396, "grad_norm": 0.41259199380874634, "learning_rate": 5.074541169474114e-05, "loss": 0.4611, "step": 62460 }, { "epoch": 15.462871287128714, "grad_norm": 0.41320207715034485, "learning_rate": 5.0731634282768146e-05, "loss": 0.4679, "step": 62470 }, { "epoch": 15.465346534653465, "grad_norm": 0.41693174839019775, "learning_rate": 5.0717856815232366e-05, "loss": 0.4674, "step": 62480 }, { "epoch": 15.467821782178218, "grad_norm": 0.42537063360214233, "learning_rate": 5.0704079293180096e-05, "loss": 0.4649, "step": 62490 }, { "epoch": 15.47029702970297, "grad_norm": 0.4160230755805969, "learning_rate": 5.069030171765765e-05, "loss": 0.4607, "step": 62500 }, { "epoch": 15.472772277227723, "grad_norm": 0.42175477743148804, "learning_rate": 5.067652408971133e-05, "loss": 0.4607, "step": 62510 }, { "epoch": 15.475247524752476, "grad_norm": 0.4194185435771942, "learning_rate": 5.066274641038749e-05, "loss": 0.4683, "step": 62520 }, { "epoch": 15.477722772277227, "grad_norm": 0.4562740623950958, "learning_rate": 5.064896868073242e-05, "loss": 0.4666, "step": 62530 }, { "epoch": 15.48019801980198, "grad_norm": 0.4331519603729248, "learning_rate": 5.063519090179246e-05, "loss": 0.469, "step": 62540 }, { "epoch": 15.482673267326733, "grad_norm": 0.43802154064178467, "learning_rate": 5.062141307461395e-05, "loss": 0.4659, "step": 62550 }, { "epoch": 15.485148514851485, "grad_norm": 0.4303302466869354, "learning_rate": 5.060763520024323e-05, "loss": 0.4603, "step": 62560 }, { "epoch": 15.487623762376238, "grad_norm": 0.44214680790901184, "learning_rate": 5.05938572797266e-05, "loss": 0.4664, "step": 62570 }, { "epoch": 15.490099009900991, "grad_norm": 0.45103752613067627, "learning_rate": 5.058007931411045e-05, "loss": 0.4609, "step": 62580 }, { "epoch": 15.492574257425742, "grad_norm": 0.4306606352329254, "learning_rate": 5.05663013044411e-05, "loss": 0.4642, "step": 62590 }, { "epoch": 15.495049504950495, "grad_norm": 0.4591819941997528, "learning_rate": 5.055252325176491e-05, "loss": 0.4626, "step": 62600 }, { "epoch": 15.497524752475247, "grad_norm": 0.40254372358322144, "learning_rate": 5.05387451571282e-05, "loss": 0.4628, "step": 62610 }, { "epoch": 15.5, "grad_norm": 0.43953296542167664, "learning_rate": 5.0524967021577376e-05, "loss": 0.4617, "step": 62620 }, { "epoch": 15.502475247524753, "grad_norm": 0.4250246584415436, "learning_rate": 5.051118884615876e-05, "loss": 0.4658, "step": 62630 }, { "epoch": 15.504950495049505, "grad_norm": 0.42145779728889465, "learning_rate": 5.049741063191873e-05, "loss": 0.4789, "step": 62640 }, { "epoch": 15.507425742574258, "grad_norm": 0.41579779982566833, "learning_rate": 5.0483632379903635e-05, "loss": 0.4617, "step": 62650 }, { "epoch": 15.509900990099009, "grad_norm": 0.41656294465065, "learning_rate": 5.046985409115985e-05, "loss": 0.4664, "step": 62660 }, { "epoch": 15.512376237623762, "grad_norm": 0.4415099322795868, "learning_rate": 5.0456075766733754e-05, "loss": 0.4615, "step": 62670 }, { "epoch": 15.514851485148515, "grad_norm": 0.4254520833492279, "learning_rate": 5.044229740767172e-05, "loss": 0.4637, "step": 62680 }, { "epoch": 15.517326732673267, "grad_norm": 0.4396282732486725, "learning_rate": 5.0428519015020115e-05, "loss": 0.4692, "step": 62690 }, { "epoch": 15.51980198019802, "grad_norm": 0.41932451725006104, "learning_rate": 5.0414740589825314e-05, "loss": 0.4686, "step": 62700 }, { "epoch": 15.522277227722773, "grad_norm": 0.421754390001297, "learning_rate": 5.040096213313371e-05, "loss": 0.467, "step": 62710 }, { "epoch": 15.524752475247524, "grad_norm": 0.4084194600582123, "learning_rate": 5.0387183645991674e-05, "loss": 0.4642, "step": 62720 }, { "epoch": 15.527227722772277, "grad_norm": 0.43285396695137024, "learning_rate": 5.0373405129445596e-05, "loss": 0.4658, "step": 62730 }, { "epoch": 15.52970297029703, "grad_norm": 0.4538920223712921, "learning_rate": 5.035962658454185e-05, "loss": 0.4613, "step": 62740 }, { "epoch": 15.532178217821782, "grad_norm": 0.4110185503959656, "learning_rate": 5.034584801232687e-05, "loss": 0.4661, "step": 62750 }, { "epoch": 15.534653465346535, "grad_norm": 0.4307270348072052, "learning_rate": 5.033206941384698e-05, "loss": 0.4609, "step": 62760 }, { "epoch": 15.537128712871286, "grad_norm": 0.43010449409484863, "learning_rate": 5.031829079014863e-05, "loss": 0.4617, "step": 62770 }, { "epoch": 15.53960396039604, "grad_norm": 0.4094620645046234, "learning_rate": 5.030451214227818e-05, "loss": 0.4651, "step": 62780 }, { "epoch": 15.542079207920793, "grad_norm": 0.41544732451438904, "learning_rate": 5.029073347128206e-05, "loss": 0.4686, "step": 62790 }, { "epoch": 15.544554455445544, "grad_norm": 0.4089542329311371, "learning_rate": 5.027695477820664e-05, "loss": 0.4646, "step": 62800 }, { "epoch": 15.547029702970297, "grad_norm": 0.4417215883731842, "learning_rate": 5.026317606409833e-05, "loss": 0.4628, "step": 62810 }, { "epoch": 15.549504950495049, "grad_norm": 0.4278249740600586, "learning_rate": 5.0249397330003554e-05, "loss": 0.4622, "step": 62820 }, { "epoch": 15.551980198019802, "grad_norm": 0.4049536883831024, "learning_rate": 5.023561857696867e-05, "loss": 0.4626, "step": 62830 }, { "epoch": 15.554455445544555, "grad_norm": 0.44680413603782654, "learning_rate": 5.0221839806040125e-05, "loss": 0.4666, "step": 62840 }, { "epoch": 15.556930693069306, "grad_norm": 0.4457240402698517, "learning_rate": 5.0208061018264305e-05, "loss": 0.4618, "step": 62850 }, { "epoch": 15.55940594059406, "grad_norm": 0.4303438365459442, "learning_rate": 5.0194282214687635e-05, "loss": 0.4648, "step": 62860 }, { "epoch": 15.561881188118813, "grad_norm": 0.43000879883766174, "learning_rate": 5.0180503396356504e-05, "loss": 0.4651, "step": 62870 }, { "epoch": 15.564356435643564, "grad_norm": 0.4204363524913788, "learning_rate": 5.016672456431734e-05, "loss": 0.4625, "step": 62880 }, { "epoch": 15.566831683168317, "grad_norm": 0.4123478829860687, "learning_rate": 5.015294571961653e-05, "loss": 0.4624, "step": 62890 }, { "epoch": 15.569306930693068, "grad_norm": 0.42013275623321533, "learning_rate": 5.013916686330052e-05, "loss": 0.4593, "step": 62900 }, { "epoch": 15.571782178217822, "grad_norm": 0.43509915471076965, "learning_rate": 5.012538799641568e-05, "loss": 0.4688, "step": 62910 }, { "epoch": 15.574257425742575, "grad_norm": 0.4159391522407532, "learning_rate": 5.0111609120008476e-05, "loss": 0.4566, "step": 62920 }, { "epoch": 15.576732673267326, "grad_norm": 0.43388277292251587, "learning_rate": 5.009783023512529e-05, "loss": 0.4714, "step": 62930 }, { "epoch": 15.57920792079208, "grad_norm": 0.4069518446922302, "learning_rate": 5.008405134281253e-05, "loss": 0.4604, "step": 62940 }, { "epoch": 15.581683168316832, "grad_norm": 0.42347368597984314, "learning_rate": 5.007027244411664e-05, "loss": 0.4663, "step": 62950 }, { "epoch": 15.584158415841584, "grad_norm": 0.4102078676223755, "learning_rate": 5.0056493540084e-05, "loss": 0.4641, "step": 62960 }, { "epoch": 15.586633663366337, "grad_norm": 0.3988952338695526, "learning_rate": 5.004271463176108e-05, "loss": 0.4651, "step": 62970 }, { "epoch": 15.589108910891088, "grad_norm": 0.38814210891723633, "learning_rate": 5.0028935720194236e-05, "loss": 0.4654, "step": 62980 }, { "epoch": 15.591584158415841, "grad_norm": 0.3899674117565155, "learning_rate": 5.001515680642993e-05, "loss": 0.4616, "step": 62990 }, { "epoch": 15.594059405940595, "grad_norm": 0.39176371693611145, "learning_rate": 5.0001377891514566e-05, "loss": 0.4605, "step": 63000 }, { "epoch": 15.596534653465346, "grad_norm": 0.4435890018939972, "learning_rate": 4.998759897649456e-05, "loss": 0.4624, "step": 63010 }, { "epoch": 15.599009900990099, "grad_norm": 0.4176597595214844, "learning_rate": 4.997382006241632e-05, "loss": 0.4651, "step": 63020 }, { "epoch": 15.601485148514852, "grad_norm": 0.42919981479644775, "learning_rate": 4.996004115032628e-05, "loss": 0.4583, "step": 63030 }, { "epoch": 15.603960396039604, "grad_norm": 0.4078577160835266, "learning_rate": 4.9946262241270846e-05, "loss": 0.4656, "step": 63040 }, { "epoch": 15.606435643564357, "grad_norm": 0.4201822876930237, "learning_rate": 4.993248333629645e-05, "loss": 0.4608, "step": 63050 }, { "epoch": 15.608910891089108, "grad_norm": 0.39953580498695374, "learning_rate": 4.9918704436449506e-05, "loss": 0.4629, "step": 63060 }, { "epoch": 15.611386138613861, "grad_norm": 0.44442614912986755, "learning_rate": 4.99049255427764e-05, "loss": 0.4667, "step": 63070 }, { "epoch": 15.613861386138614, "grad_norm": 0.42190903425216675, "learning_rate": 4.989114665632358e-05, "loss": 0.46, "step": 63080 }, { "epoch": 15.616336633663366, "grad_norm": 0.3954247832298279, "learning_rate": 4.987736777813746e-05, "loss": 0.4648, "step": 63090 }, { "epoch": 15.618811881188119, "grad_norm": 0.41144677996635437, "learning_rate": 4.986358890926443e-05, "loss": 0.462, "step": 63100 }, { "epoch": 15.621287128712872, "grad_norm": 0.40678396821022034, "learning_rate": 4.984981005075094e-05, "loss": 0.4623, "step": 63110 }, { "epoch": 15.623762376237623, "grad_norm": 0.4076082408428192, "learning_rate": 4.98360312036434e-05, "loss": 0.4652, "step": 63120 }, { "epoch": 15.626237623762377, "grad_norm": 0.41196101903915405, "learning_rate": 4.9822252368988187e-05, "loss": 0.4657, "step": 63130 }, { "epoch": 15.628712871287128, "grad_norm": 0.43964120745658875, "learning_rate": 4.980847354783173e-05, "loss": 0.4648, "step": 63140 }, { "epoch": 15.631188118811881, "grad_norm": 0.4374467134475708, "learning_rate": 4.979469474122045e-05, "loss": 0.4676, "step": 63150 }, { "epoch": 15.633663366336634, "grad_norm": 0.4382932782173157, "learning_rate": 4.978091595020076e-05, "loss": 0.4626, "step": 63160 }, { "epoch": 15.636138613861386, "grad_norm": 0.43880435824394226, "learning_rate": 4.976713717581902e-05, "loss": 0.4617, "step": 63170 }, { "epoch": 15.638613861386139, "grad_norm": 0.42162445187568665, "learning_rate": 4.975335841912172e-05, "loss": 0.462, "step": 63180 }, { "epoch": 15.641089108910892, "grad_norm": 0.4141017496585846, "learning_rate": 4.97395796811552e-05, "loss": 0.4651, "step": 63190 }, { "epoch": 15.643564356435643, "grad_norm": 0.4191303849220276, "learning_rate": 4.972580096296588e-05, "loss": 0.4646, "step": 63200 }, { "epoch": 15.646039603960396, "grad_norm": 0.3728099465370178, "learning_rate": 4.971202226560017e-05, "loss": 0.4592, "step": 63210 }, { "epoch": 15.648514851485148, "grad_norm": 0.40211397409439087, "learning_rate": 4.969824359010448e-05, "loss": 0.4599, "step": 63220 }, { "epoch": 15.650990099009901, "grad_norm": 0.4180207848548889, "learning_rate": 4.96844649375252e-05, "loss": 0.4612, "step": 63230 }, { "epoch": 15.653465346534654, "grad_norm": 0.42005354166030884, "learning_rate": 4.96706863089087e-05, "loss": 0.4679, "step": 63240 }, { "epoch": 15.655940594059405, "grad_norm": 0.4356705844402313, "learning_rate": 4.9656907705301426e-05, "loss": 0.4645, "step": 63250 }, { "epoch": 15.658415841584159, "grad_norm": 0.41770634055137634, "learning_rate": 4.964312912774974e-05, "loss": 0.4703, "step": 63260 }, { "epoch": 15.660891089108912, "grad_norm": 0.4268690347671509, "learning_rate": 4.962935057730003e-05, "loss": 0.4649, "step": 63270 }, { "epoch": 15.663366336633663, "grad_norm": 0.4391199052333832, "learning_rate": 4.961557205499871e-05, "loss": 0.4637, "step": 63280 }, { "epoch": 15.665841584158416, "grad_norm": 0.4109588861465454, "learning_rate": 4.9601793561892164e-05, "loss": 0.4637, "step": 63290 }, { "epoch": 15.668316831683168, "grad_norm": 0.41347670555114746, "learning_rate": 4.958801509902676e-05, "loss": 0.465, "step": 63300 }, { "epoch": 15.67079207920792, "grad_norm": 0.43177667260169983, "learning_rate": 4.957423666744888e-05, "loss": 0.4644, "step": 63310 }, { "epoch": 15.673267326732674, "grad_norm": 0.41068634390830994, "learning_rate": 4.9560458268204925e-05, "loss": 0.464, "step": 63320 }, { "epoch": 15.675742574257425, "grad_norm": 0.3970552682876587, "learning_rate": 4.9546679902341265e-05, "loss": 0.4611, "step": 63330 }, { "epoch": 15.678217821782178, "grad_norm": 0.4200807213783264, "learning_rate": 4.953290157090426e-05, "loss": 0.4656, "step": 63340 }, { "epoch": 15.680693069306932, "grad_norm": 0.4302622377872467, "learning_rate": 4.951912327494033e-05, "loss": 0.466, "step": 63350 }, { "epoch": 15.683168316831683, "grad_norm": 0.4277346432209015, "learning_rate": 4.950534501549579e-05, "loss": 0.4657, "step": 63360 }, { "epoch": 15.685643564356436, "grad_norm": 0.4172593653202057, "learning_rate": 4.949156679361703e-05, "loss": 0.4685, "step": 63370 }, { "epoch": 15.688118811881187, "grad_norm": 0.4090403914451599, "learning_rate": 4.9477788610350423e-05, "loss": 0.4625, "step": 63380 }, { "epoch": 15.69059405940594, "grad_norm": 0.43813028931617737, "learning_rate": 4.946401046674232e-05, "loss": 0.464, "step": 63390 }, { "epoch": 15.693069306930694, "grad_norm": 0.4190915524959564, "learning_rate": 4.94502323638391e-05, "loss": 0.4649, "step": 63400 }, { "epoch": 15.695544554455445, "grad_norm": 0.4243529438972473, "learning_rate": 4.943645430268708e-05, "loss": 0.4596, "step": 63410 }, { "epoch": 15.698019801980198, "grad_norm": 0.3997313380241394, "learning_rate": 4.942267628433266e-05, "loss": 0.4671, "step": 63420 }, { "epoch": 15.700495049504951, "grad_norm": 0.4034735858440399, "learning_rate": 4.9408898309822163e-05, "loss": 0.4595, "step": 63430 }, { "epoch": 15.702970297029703, "grad_norm": 0.4049980938434601, "learning_rate": 4.939512038020192e-05, "loss": 0.4634, "step": 63440 }, { "epoch": 15.705445544554456, "grad_norm": 0.4184381663799286, "learning_rate": 4.93813424965183e-05, "loss": 0.4691, "step": 63450 }, { "epoch": 15.707920792079207, "grad_norm": 0.41651028394699097, "learning_rate": 4.936756465981765e-05, "loss": 0.4615, "step": 63460 }, { "epoch": 15.71039603960396, "grad_norm": 0.4227321445941925, "learning_rate": 4.9353786871146264e-05, "loss": 0.4675, "step": 63470 }, { "epoch": 15.712871287128714, "grad_norm": 0.4221028983592987, "learning_rate": 4.934000913155052e-05, "loss": 0.4629, "step": 63480 }, { "epoch": 15.715346534653465, "grad_norm": 0.3980560600757599, "learning_rate": 4.9326231442076734e-05, "loss": 0.4564, "step": 63490 }, { "epoch": 15.717821782178218, "grad_norm": 0.39921095967292786, "learning_rate": 4.931245380377121e-05, "loss": 0.4639, "step": 63500 }, { "epoch": 15.72029702970297, "grad_norm": 0.41241171956062317, "learning_rate": 4.9298676217680275e-05, "loss": 0.4655, "step": 63510 }, { "epoch": 15.722772277227723, "grad_norm": 0.41517794132232666, "learning_rate": 4.928489868485026e-05, "loss": 0.4592, "step": 63520 }, { "epoch": 15.725247524752476, "grad_norm": 0.4069756865501404, "learning_rate": 4.927112120632749e-05, "loss": 0.465, "step": 63530 }, { "epoch": 15.727722772277227, "grad_norm": 0.4036789834499359, "learning_rate": 4.9257343783158224e-05, "loss": 0.4645, "step": 63540 }, { "epoch": 15.73019801980198, "grad_norm": 0.4375847280025482, "learning_rate": 4.924356641638881e-05, "loss": 0.4697, "step": 63550 }, { "epoch": 15.732673267326733, "grad_norm": 0.4280169904232025, "learning_rate": 4.922978910706553e-05, "loss": 0.4674, "step": 63560 }, { "epoch": 15.735148514851485, "grad_norm": 0.39940592646598816, "learning_rate": 4.921601185623468e-05, "loss": 0.4619, "step": 63570 }, { "epoch": 15.737623762376238, "grad_norm": 0.4460451602935791, "learning_rate": 4.920223466494256e-05, "loss": 0.4607, "step": 63580 }, { "epoch": 15.740099009900991, "grad_norm": 0.4129227101802826, "learning_rate": 4.918845753423548e-05, "loss": 0.4598, "step": 63590 }, { "epoch": 15.742574257425742, "grad_norm": 0.4182070791721344, "learning_rate": 4.917468046515967e-05, "loss": 0.4656, "step": 63600 }, { "epoch": 15.745049504950495, "grad_norm": 0.44060683250427246, "learning_rate": 4.916090345876142e-05, "loss": 0.4632, "step": 63610 }, { "epoch": 15.747524752475247, "grad_norm": 0.41737839579582214, "learning_rate": 4.914712651608703e-05, "loss": 0.4617, "step": 63620 }, { "epoch": 15.75, "grad_norm": 0.39642858505249023, "learning_rate": 4.913334963818275e-05, "loss": 0.4637, "step": 63630 }, { "epoch": 15.752475247524753, "grad_norm": 0.40073490142822266, "learning_rate": 4.911957282609484e-05, "loss": 0.4643, "step": 63640 }, { "epoch": 15.754950495049505, "grad_norm": 0.3806579113006592, "learning_rate": 4.910579608086958e-05, "loss": 0.465, "step": 63650 }, { "epoch": 15.757425742574258, "grad_norm": 0.40312278270721436, "learning_rate": 4.90920194035532e-05, "loss": 0.4677, "step": 63660 }, { "epoch": 15.759900990099009, "grad_norm": 0.4065137505531311, "learning_rate": 4.907824279519194e-05, "loss": 0.4618, "step": 63670 }, { "epoch": 15.762376237623762, "grad_norm": 0.41120055317878723, "learning_rate": 4.906446625683206e-05, "loss": 0.4642, "step": 63680 }, { "epoch": 15.764851485148515, "grad_norm": 0.42254891991615295, "learning_rate": 4.905068978951981e-05, "loss": 0.4644, "step": 63690 }, { "epoch": 15.767326732673267, "grad_norm": 0.3853707015514374, "learning_rate": 4.9036913394301406e-05, "loss": 0.4646, "step": 63700 }, { "epoch": 15.76980198019802, "grad_norm": 0.4004983603954315, "learning_rate": 4.902313707222304e-05, "loss": 0.4659, "step": 63710 }, { "epoch": 15.772277227722773, "grad_norm": 0.4292733669281006, "learning_rate": 4.9009360824331e-05, "loss": 0.4614, "step": 63720 }, { "epoch": 15.774752475247524, "grad_norm": 0.4379642903804779, "learning_rate": 4.8995584651671464e-05, "loss": 0.4643, "step": 63730 }, { "epoch": 15.777227722772277, "grad_norm": 0.47549840807914734, "learning_rate": 4.898180855529063e-05, "loss": 0.464, "step": 63740 }, { "epoch": 15.77970297029703, "grad_norm": 0.39283719658851624, "learning_rate": 4.896803253623472e-05, "loss": 0.4596, "step": 63750 }, { "epoch": 15.782178217821782, "grad_norm": 0.39702361822128296, "learning_rate": 4.895425659554995e-05, "loss": 0.4646, "step": 63760 }, { "epoch": 15.784653465346535, "grad_norm": 0.4276711046695709, "learning_rate": 4.8940480734282476e-05, "loss": 0.4636, "step": 63770 }, { "epoch": 15.787128712871286, "grad_norm": 0.4182164967060089, "learning_rate": 4.892670495347849e-05, "loss": 0.4639, "step": 63780 }, { "epoch": 15.78960396039604, "grad_norm": 0.40217116475105286, "learning_rate": 4.891292925418419e-05, "loss": 0.4572, "step": 63790 }, { "epoch": 15.792079207920793, "grad_norm": 0.42095890641212463, "learning_rate": 4.8899153637445736e-05, "loss": 0.462, "step": 63800 }, { "epoch": 15.794554455445544, "grad_norm": 0.42034968733787537, "learning_rate": 4.8885378104309286e-05, "loss": 0.4619, "step": 63810 }, { "epoch": 15.797029702970297, "grad_norm": 0.4117017984390259, "learning_rate": 4.887160265582103e-05, "loss": 0.4598, "step": 63820 }, { "epoch": 15.799504950495049, "grad_norm": 0.41253662109375, "learning_rate": 4.885782729302712e-05, "loss": 0.4658, "step": 63830 }, { "epoch": 15.801980198019802, "grad_norm": 0.4566859006881714, "learning_rate": 4.884405201697365e-05, "loss": 0.4627, "step": 63840 }, { "epoch": 15.804455445544555, "grad_norm": 0.3975274860858917, "learning_rate": 4.883027682870682e-05, "loss": 0.4606, "step": 63850 }, { "epoch": 15.806930693069306, "grad_norm": 0.4175892174243927, "learning_rate": 4.8816501729272754e-05, "loss": 0.4652, "step": 63860 }, { "epoch": 15.80940594059406, "grad_norm": 0.4303349256515503, "learning_rate": 4.880272671971757e-05, "loss": 0.4616, "step": 63870 }, { "epoch": 15.811881188118813, "grad_norm": 0.4089392125606537, "learning_rate": 4.878895180108737e-05, "loss": 0.4593, "step": 63880 }, { "epoch": 15.814356435643564, "grad_norm": 0.4171774089336395, "learning_rate": 4.877517697442831e-05, "loss": 0.4612, "step": 63890 }, { "epoch": 15.816831683168317, "grad_norm": 0.4160580337047577, "learning_rate": 4.876140224078647e-05, "loss": 0.4621, "step": 63900 }, { "epoch": 15.819306930693068, "grad_norm": 0.41096803545951843, "learning_rate": 4.874762760120795e-05, "loss": 0.4676, "step": 63910 }, { "epoch": 15.821782178217822, "grad_norm": 0.4202718436717987, "learning_rate": 4.873385305673885e-05, "loss": 0.4665, "step": 63920 }, { "epoch": 15.824257425742575, "grad_norm": 0.3980984687805176, "learning_rate": 4.872007860842526e-05, "loss": 0.4605, "step": 63930 }, { "epoch": 15.826732673267326, "grad_norm": 0.42708832025527954, "learning_rate": 4.8706304257313237e-05, "loss": 0.4697, "step": 63940 }, { "epoch": 15.82920792079208, "grad_norm": 0.42953363060951233, "learning_rate": 4.8692530004448885e-05, "loss": 0.4614, "step": 63950 }, { "epoch": 15.831683168316832, "grad_norm": 0.43551719188690186, "learning_rate": 4.8678755850878256e-05, "loss": 0.4678, "step": 63960 }, { "epoch": 15.834158415841584, "grad_norm": 0.4488072693347931, "learning_rate": 4.866498179764739e-05, "loss": 0.4637, "step": 63970 }, { "epoch": 15.836633663366337, "grad_norm": 0.4175417125225067, "learning_rate": 4.8651207845802336e-05, "loss": 0.4702, "step": 63980 }, { "epoch": 15.839108910891088, "grad_norm": 0.4012461006641388, "learning_rate": 4.863743399638916e-05, "loss": 0.4656, "step": 63990 }, { "epoch": 15.841584158415841, "grad_norm": 0.4201701581478119, "learning_rate": 4.862366025045389e-05, "loss": 0.4666, "step": 64000 }, { "epoch": 15.844059405940595, "grad_norm": 0.4465920031070709, "learning_rate": 4.8609886609042505e-05, "loss": 0.4659, "step": 64010 }, { "epoch": 15.846534653465346, "grad_norm": 0.44124606251716614, "learning_rate": 4.859611307320109e-05, "loss": 0.464, "step": 64020 }, { "epoch": 15.849009900990099, "grad_norm": 0.4078739285469055, "learning_rate": 4.858233964397561e-05, "loss": 0.4676, "step": 64030 }, { "epoch": 15.851485148514852, "grad_norm": 0.41768473386764526, "learning_rate": 4.8568566322412075e-05, "loss": 0.465, "step": 64040 }, { "epoch": 15.853960396039604, "grad_norm": 0.4261740744113922, "learning_rate": 4.855479310955649e-05, "loss": 0.4698, "step": 64050 }, { "epoch": 15.856435643564357, "grad_norm": 0.43219444155693054, "learning_rate": 4.8541020006454835e-05, "loss": 0.4639, "step": 64060 }, { "epoch": 15.858910891089108, "grad_norm": 0.42710134387016296, "learning_rate": 4.8527247014153074e-05, "loss": 0.4682, "step": 64070 }, { "epoch": 15.861386138613861, "grad_norm": 0.43906158208847046, "learning_rate": 4.8513474133697176e-05, "loss": 0.4642, "step": 64080 }, { "epoch": 15.863861386138614, "grad_norm": 0.43396899104118347, "learning_rate": 4.849970136613312e-05, "loss": 0.4722, "step": 64090 }, { "epoch": 15.866336633663366, "grad_norm": 0.39092230796813965, "learning_rate": 4.848592871250684e-05, "loss": 0.4636, "step": 64100 }, { "epoch": 15.868811881188119, "grad_norm": 0.4174165725708008, "learning_rate": 4.847215617386427e-05, "loss": 0.4623, "step": 64110 }, { "epoch": 15.871287128712872, "grad_norm": 0.4014437794685364, "learning_rate": 4.8458383751251366e-05, "loss": 0.4594, "step": 64120 }, { "epoch": 15.873762376237623, "grad_norm": 0.41752585768699646, "learning_rate": 4.844461144571405e-05, "loss": 0.4618, "step": 64130 }, { "epoch": 15.876237623762377, "grad_norm": 0.3870440125465393, "learning_rate": 4.8430839258298236e-05, "loss": 0.4652, "step": 64140 }, { "epoch": 15.878712871287128, "grad_norm": 0.4134899973869324, "learning_rate": 4.8417067190049796e-05, "loss": 0.4627, "step": 64150 }, { "epoch": 15.881188118811881, "grad_norm": 0.438856840133667, "learning_rate": 4.840329524201467e-05, "loss": 0.4626, "step": 64160 }, { "epoch": 15.883663366336634, "grad_norm": 0.43093860149383545, "learning_rate": 4.838952341523875e-05, "loss": 0.4584, "step": 64170 }, { "epoch": 15.886138613861386, "grad_norm": 0.4535503089427948, "learning_rate": 4.837575171076786e-05, "loss": 0.4604, "step": 64180 }, { "epoch": 15.888613861386139, "grad_norm": 0.3921302258968353, "learning_rate": 4.836198012964795e-05, "loss": 0.4612, "step": 64190 }, { "epoch": 15.891089108910892, "grad_norm": 0.4067109525203705, "learning_rate": 4.8348208672924824e-05, "loss": 0.4601, "step": 64200 }, { "epoch": 15.893564356435643, "grad_norm": 0.4053335189819336, "learning_rate": 4.833443734164433e-05, "loss": 0.4573, "step": 64210 }, { "epoch": 15.896039603960396, "grad_norm": 0.40212079882621765, "learning_rate": 4.832066613685235e-05, "loss": 0.4635, "step": 64220 }, { "epoch": 15.898514851485148, "grad_norm": 0.4398483335971832, "learning_rate": 4.830689505959469e-05, "loss": 0.4628, "step": 64230 }, { "epoch": 15.900990099009901, "grad_norm": 0.41900476813316345, "learning_rate": 4.829312411091719e-05, "loss": 0.4659, "step": 64240 }, { "epoch": 15.903465346534654, "grad_norm": 0.3956958055496216, "learning_rate": 4.827935329186562e-05, "loss": 0.4587, "step": 64250 }, { "epoch": 15.905940594059405, "grad_norm": 0.4201498031616211, "learning_rate": 4.826558260348583e-05, "loss": 0.4626, "step": 64260 }, { "epoch": 15.908415841584159, "grad_norm": 0.452551931142807, "learning_rate": 4.825181204682359e-05, "loss": 0.4588, "step": 64270 }, { "epoch": 15.910891089108912, "grad_norm": 0.41849881410598755, "learning_rate": 4.8238041622924686e-05, "loss": 0.4651, "step": 64280 }, { "epoch": 15.913366336633663, "grad_norm": 0.4263664186000824, "learning_rate": 4.8224271332834894e-05, "loss": 0.4621, "step": 64290 }, { "epoch": 15.915841584158416, "grad_norm": 0.40420013666152954, "learning_rate": 4.821050117759999e-05, "loss": 0.4603, "step": 64300 }, { "epoch": 15.918316831683168, "grad_norm": 0.4799107015132904, "learning_rate": 4.819673115826568e-05, "loss": 0.4596, "step": 64310 }, { "epoch": 15.92079207920792, "grad_norm": 0.4276679754257202, "learning_rate": 4.8182961275877766e-05, "loss": 0.4656, "step": 64320 }, { "epoch": 15.923267326732674, "grad_norm": 0.3849667012691498, "learning_rate": 4.816919153148195e-05, "loss": 0.4613, "step": 64330 }, { "epoch": 15.925742574257425, "grad_norm": 0.4096757769584656, "learning_rate": 4.815542192612395e-05, "loss": 0.4594, "step": 64340 }, { "epoch": 15.928217821782178, "grad_norm": 0.43181347846984863, "learning_rate": 4.8141652460849467e-05, "loss": 0.4595, "step": 64350 }, { "epoch": 15.930693069306932, "grad_norm": 0.4060688614845276, "learning_rate": 4.8127883136704244e-05, "loss": 0.4621, "step": 64360 }, { "epoch": 15.933168316831683, "grad_norm": 0.4066707193851471, "learning_rate": 4.811411395473393e-05, "loss": 0.4606, "step": 64370 }, { "epoch": 15.935643564356436, "grad_norm": 0.4123203158378601, "learning_rate": 4.81003449159842e-05, "loss": 0.4576, "step": 64380 }, { "epoch": 15.938118811881187, "grad_norm": 0.42766305804252625, "learning_rate": 4.808657602150075e-05, "loss": 0.4621, "step": 64390 }, { "epoch": 15.94059405940594, "grad_norm": 0.41528576612472534, "learning_rate": 4.807280727232922e-05, "loss": 0.4637, "step": 64400 }, { "epoch": 15.943069306930694, "grad_norm": 0.4381345808506012, "learning_rate": 4.805903866951526e-05, "loss": 0.4594, "step": 64410 }, { "epoch": 15.945544554455445, "grad_norm": 0.42696627974510193, "learning_rate": 4.80452702141045e-05, "loss": 0.4624, "step": 64420 }, { "epoch": 15.948019801980198, "grad_norm": 0.42797377705574036, "learning_rate": 4.803150190714259e-05, "loss": 0.4643, "step": 64430 }, { "epoch": 15.950495049504951, "grad_norm": 0.4043055772781372, "learning_rate": 4.8017733749675105e-05, "loss": 0.4575, "step": 64440 }, { "epoch": 15.952970297029703, "grad_norm": 0.3877270221710205, "learning_rate": 4.800396574274764e-05, "loss": 0.4627, "step": 64450 }, { "epoch": 15.955445544554456, "grad_norm": 0.41179710626602173, "learning_rate": 4.799019788740582e-05, "loss": 0.4626, "step": 64460 }, { "epoch": 15.957920792079207, "grad_norm": 0.4061444401741028, "learning_rate": 4.797643018469523e-05, "loss": 0.4638, "step": 64470 }, { "epoch": 15.96039603960396, "grad_norm": 0.3998942971229553, "learning_rate": 4.796266263566137e-05, "loss": 0.4616, "step": 64480 }, { "epoch": 15.962871287128714, "grad_norm": 0.3836098611354828, "learning_rate": 4.7948895241349865e-05, "loss": 0.4662, "step": 64490 }, { "epoch": 15.965346534653465, "grad_norm": 0.4058927595615387, "learning_rate": 4.793512800280623e-05, "loss": 0.4597, "step": 64500 }, { "epoch": 15.967821782178218, "grad_norm": 0.4119036793708801, "learning_rate": 4.792136092107599e-05, "loss": 0.4628, "step": 64510 }, { "epoch": 15.97029702970297, "grad_norm": 0.39665353298187256, "learning_rate": 4.790759399720466e-05, "loss": 0.4638, "step": 64520 }, { "epoch": 15.972772277227723, "grad_norm": 0.3959481120109558, "learning_rate": 4.789382723223777e-05, "loss": 0.4657, "step": 64530 }, { "epoch": 15.975247524752476, "grad_norm": 0.3982611298561096, "learning_rate": 4.788006062722081e-05, "loss": 0.4658, "step": 64540 }, { "epoch": 15.977722772277227, "grad_norm": 0.40007638931274414, "learning_rate": 4.7866294183199237e-05, "loss": 0.4696, "step": 64550 }, { "epoch": 15.98019801980198, "grad_norm": 0.43914854526519775, "learning_rate": 4.785252790121855e-05, "loss": 0.4636, "step": 64560 }, { "epoch": 15.982673267326733, "grad_norm": 0.45542481541633606, "learning_rate": 4.783876178232419e-05, "loss": 0.4629, "step": 64570 }, { "epoch": 15.985148514851485, "grad_norm": 0.41049203276634216, "learning_rate": 4.782499582756161e-05, "loss": 0.4588, "step": 64580 }, { "epoch": 15.987623762376238, "grad_norm": 0.43544185161590576, "learning_rate": 4.7811230037976244e-05, "loss": 0.4604, "step": 64590 }, { "epoch": 15.990099009900991, "grad_norm": 0.4326061010360718, "learning_rate": 4.779746441461353e-05, "loss": 0.4686, "step": 64600 }, { "epoch": 15.992574257425742, "grad_norm": 0.3953317403793335, "learning_rate": 4.7783698958518843e-05, "loss": 0.4636, "step": 64610 }, { "epoch": 15.995049504950495, "grad_norm": 0.41454723477363586, "learning_rate": 4.776993367073758e-05, "loss": 0.4643, "step": 64620 }, { "epoch": 15.997524752475247, "grad_norm": 0.42295897006988525, "learning_rate": 4.775616855231515e-05, "loss": 0.4631, "step": 64630 }, { "epoch": 16.0, "grad_norm": 0.4198838472366333, "learning_rate": 4.774240360429692e-05, "loss": 0.4611, "step": 64640 }, { "epoch": 16.002475247524753, "grad_norm": 0.40924182534217834, "learning_rate": 4.772863882772822e-05, "loss": 0.4638, "step": 64650 }, { "epoch": 16.004950495049506, "grad_norm": 0.4097885489463806, "learning_rate": 4.771487422365443e-05, "loss": 0.4629, "step": 64660 }, { "epoch": 16.007425742574256, "grad_norm": 0.41755884885787964, "learning_rate": 4.770110979312086e-05, "loss": 0.4625, "step": 64670 }, { "epoch": 16.00990099009901, "grad_norm": 0.40694698691368103, "learning_rate": 4.768734553717281e-05, "loss": 0.4629, "step": 64680 }, { "epoch": 16.012376237623762, "grad_norm": 0.4217650592327118, "learning_rate": 4.7673581456855614e-05, "loss": 0.4624, "step": 64690 }, { "epoch": 16.014851485148515, "grad_norm": 0.4098777770996094, "learning_rate": 4.765981755321456e-05, "loss": 0.4637, "step": 64700 }, { "epoch": 16.01732673267327, "grad_norm": 0.3999331593513489, "learning_rate": 4.764605382729493e-05, "loss": 0.4614, "step": 64710 }, { "epoch": 16.019801980198018, "grad_norm": 0.4035802185535431, "learning_rate": 4.763229028014194e-05, "loss": 0.4634, "step": 64720 }, { "epoch": 16.02227722772277, "grad_norm": 0.42027658224105835, "learning_rate": 4.761852691280092e-05, "loss": 0.4681, "step": 64730 }, { "epoch": 16.024752475247524, "grad_norm": 0.42326033115386963, "learning_rate": 4.760476372631704e-05, "loss": 0.4608, "step": 64740 }, { "epoch": 16.027227722772277, "grad_norm": 0.4171302020549774, "learning_rate": 4.759100072173554e-05, "loss": 0.4618, "step": 64750 }, { "epoch": 16.02970297029703, "grad_norm": 0.4133657217025757, "learning_rate": 4.757723790010165e-05, "loss": 0.4623, "step": 64760 }, { "epoch": 16.032178217821784, "grad_norm": 0.423877477645874, "learning_rate": 4.756347526246056e-05, "loss": 0.4662, "step": 64770 }, { "epoch": 16.034653465346533, "grad_norm": 0.42808374762535095, "learning_rate": 4.754971280985742e-05, "loss": 0.4668, "step": 64780 }, { "epoch": 16.037128712871286, "grad_norm": 0.4185500144958496, "learning_rate": 4.753595054333745e-05, "loss": 0.4676, "step": 64790 }, { "epoch": 16.03960396039604, "grad_norm": 0.402682900428772, "learning_rate": 4.752218846394577e-05, "loss": 0.4593, "step": 64800 }, { "epoch": 16.042079207920793, "grad_norm": 0.4105837047100067, "learning_rate": 4.750842657272751e-05, "loss": 0.4659, "step": 64810 }, { "epoch": 16.044554455445546, "grad_norm": 0.3998730778694153, "learning_rate": 4.749466487072781e-05, "loss": 0.4691, "step": 64820 }, { "epoch": 16.047029702970296, "grad_norm": 0.42019471526145935, "learning_rate": 4.748090335899179e-05, "loss": 0.4662, "step": 64830 }, { "epoch": 16.04950495049505, "grad_norm": 0.4264671802520752, "learning_rate": 4.746714203856454e-05, "loss": 0.4615, "step": 64840 }, { "epoch": 16.051980198019802, "grad_norm": 0.39489835500717163, "learning_rate": 4.7453380910491105e-05, "loss": 0.4601, "step": 64850 }, { "epoch": 16.054455445544555, "grad_norm": 0.43830615282058716, "learning_rate": 4.743961997581663e-05, "loss": 0.4591, "step": 64860 }, { "epoch": 16.056930693069308, "grad_norm": 0.3819473385810852, "learning_rate": 4.742585923558609e-05, "loss": 0.4615, "step": 64870 }, { "epoch": 16.059405940594058, "grad_norm": 0.39786162972450256, "learning_rate": 4.7412098690844545e-05, "loss": 0.4625, "step": 64880 }, { "epoch": 16.06188118811881, "grad_norm": 0.41205790638923645, "learning_rate": 4.7398338342637045e-05, "loss": 0.4697, "step": 64890 }, { "epoch": 16.064356435643564, "grad_norm": 0.4109930992126465, "learning_rate": 4.7384578192008586e-05, "loss": 0.462, "step": 64900 }, { "epoch": 16.066831683168317, "grad_norm": 0.4334339499473572, "learning_rate": 4.737081824000414e-05, "loss": 0.4639, "step": 64910 }, { "epoch": 16.06930693069307, "grad_norm": 0.40938135981559753, "learning_rate": 4.7357058487668695e-05, "loss": 0.4609, "step": 64920 }, { "epoch": 16.071782178217823, "grad_norm": 0.42680689692497253, "learning_rate": 4.734329893604722e-05, "loss": 0.4638, "step": 64930 }, { "epoch": 16.074257425742573, "grad_norm": 0.42236071825027466, "learning_rate": 4.732953958618466e-05, "loss": 0.4633, "step": 64940 }, { "epoch": 16.076732673267326, "grad_norm": 0.40666261315345764, "learning_rate": 4.731578043912593e-05, "loss": 0.4648, "step": 64950 }, { "epoch": 16.07920792079208, "grad_norm": 0.43534618616104126, "learning_rate": 4.730202149591598e-05, "loss": 0.4665, "step": 64960 }, { "epoch": 16.081683168316832, "grad_norm": 0.39360183477401733, "learning_rate": 4.7288262757599694e-05, "loss": 0.4606, "step": 64970 }, { "epoch": 16.084158415841586, "grad_norm": 0.40156757831573486, "learning_rate": 4.727450422522195e-05, "loss": 0.4627, "step": 64980 }, { "epoch": 16.086633663366335, "grad_norm": 0.42481130361557007, "learning_rate": 4.726074589982761e-05, "loss": 0.4633, "step": 64990 }, { "epoch": 16.08910891089109, "grad_norm": 0.43334755301475525, "learning_rate": 4.7246987782461553e-05, "loss": 0.4583, "step": 65000 }, { "epoch": 16.09158415841584, "grad_norm": 0.37220287322998047, "learning_rate": 4.723322987416862e-05, "loss": 0.4602, "step": 65010 }, { "epoch": 16.094059405940595, "grad_norm": 0.3951917290687561, "learning_rate": 4.7219472175993586e-05, "loss": 0.4592, "step": 65020 }, { "epoch": 16.096534653465348, "grad_norm": 0.41402751207351685, "learning_rate": 4.7205714688981315e-05, "loss": 0.4631, "step": 65030 }, { "epoch": 16.099009900990097, "grad_norm": 0.4604015350341797, "learning_rate": 4.7191957414176574e-05, "loss": 0.464, "step": 65040 }, { "epoch": 16.10148514851485, "grad_norm": 0.39615538716316223, "learning_rate": 4.717820035262412e-05, "loss": 0.4664, "step": 65050 }, { "epoch": 16.103960396039604, "grad_norm": 0.4086991250514984, "learning_rate": 4.716444350536873e-05, "loss": 0.4562, "step": 65060 }, { "epoch": 16.106435643564357, "grad_norm": 0.4042205810546875, "learning_rate": 4.7150686873455155e-05, "loss": 0.463, "step": 65070 }, { "epoch": 16.10891089108911, "grad_norm": 0.4133627116680145, "learning_rate": 4.713693045792812e-05, "loss": 0.4674, "step": 65080 }, { "epoch": 16.111386138613863, "grad_norm": 0.402741014957428, "learning_rate": 4.712317425983229e-05, "loss": 0.4589, "step": 65090 }, { "epoch": 16.113861386138613, "grad_norm": 0.39939936995506287, "learning_rate": 4.71094182802124e-05, "loss": 0.4645, "step": 65100 }, { "epoch": 16.116336633663366, "grad_norm": 0.40549612045288086, "learning_rate": 4.7095662520113114e-05, "loss": 0.462, "step": 65110 }, { "epoch": 16.11881188118812, "grad_norm": 0.40322819352149963, "learning_rate": 4.708190698057909e-05, "loss": 0.4666, "step": 65120 }, { "epoch": 16.121287128712872, "grad_norm": 0.39441829919815063, "learning_rate": 4.706815166265498e-05, "loss": 0.4653, "step": 65130 }, { "epoch": 16.123762376237625, "grad_norm": 0.40984395146369934, "learning_rate": 4.7054396567385416e-05, "loss": 0.4613, "step": 65140 }, { "epoch": 16.126237623762375, "grad_norm": 0.4250398278236389, "learning_rate": 4.704064169581496e-05, "loss": 0.46, "step": 65150 }, { "epoch": 16.128712871287128, "grad_norm": 0.41148945689201355, "learning_rate": 4.702688704898827e-05, "loss": 0.4645, "step": 65160 }, { "epoch": 16.13118811881188, "grad_norm": 0.4277973771095276, "learning_rate": 4.7013132627949874e-05, "loss": 0.465, "step": 65170 }, { "epoch": 16.133663366336634, "grad_norm": 0.3986806869506836, "learning_rate": 4.6999378433744345e-05, "loss": 0.4611, "step": 65180 }, { "epoch": 16.136138613861387, "grad_norm": 0.43806639313697815, "learning_rate": 4.698562446741621e-05, "loss": 0.4692, "step": 65190 }, { "epoch": 16.138613861386137, "grad_norm": 0.4278442859649658, "learning_rate": 4.6971870730010034e-05, "loss": 0.46, "step": 65200 }, { "epoch": 16.14108910891089, "grad_norm": 0.41151097416877747, "learning_rate": 4.695811722257027e-05, "loss": 0.4589, "step": 65210 }, { "epoch": 16.143564356435643, "grad_norm": 0.42783215641975403, "learning_rate": 4.694436394614142e-05, "loss": 0.4639, "step": 65220 }, { "epoch": 16.146039603960396, "grad_norm": 0.3990986943244934, "learning_rate": 4.693061090176798e-05, "loss": 0.4591, "step": 65230 }, { "epoch": 16.14851485148515, "grad_norm": 0.4194772243499756, "learning_rate": 4.6916858090494374e-05, "loss": 0.4615, "step": 65240 }, { "epoch": 16.150990099009903, "grad_norm": 0.42480748891830444, "learning_rate": 4.690310551336505e-05, "loss": 0.4635, "step": 65250 }, { "epoch": 16.153465346534652, "grad_norm": 0.4327613413333893, "learning_rate": 4.688935317142443e-05, "loss": 0.4638, "step": 65260 }, { "epoch": 16.155940594059405, "grad_norm": 0.43208426237106323, "learning_rate": 4.687560106571692e-05, "loss": 0.4636, "step": 65270 }, { "epoch": 16.15841584158416, "grad_norm": 0.3951106667518616, "learning_rate": 4.6861849197286875e-05, "loss": 0.4587, "step": 65280 }, { "epoch": 16.16089108910891, "grad_norm": 0.397322416305542, "learning_rate": 4.684809756717867e-05, "loss": 0.4677, "step": 65290 }, { "epoch": 16.163366336633665, "grad_norm": 0.39666861295700073, "learning_rate": 4.6834346176436664e-05, "loss": 0.4616, "step": 65300 }, { "epoch": 16.165841584158414, "grad_norm": 0.4023500978946686, "learning_rate": 4.682059502610517e-05, "loss": 0.4614, "step": 65310 }, { "epoch": 16.168316831683168, "grad_norm": 0.42838141322135925, "learning_rate": 4.680684411722849e-05, "loss": 0.4673, "step": 65320 }, { "epoch": 16.17079207920792, "grad_norm": 0.4002232253551483, "learning_rate": 4.6793093450850944e-05, "loss": 0.466, "step": 65330 }, { "epoch": 16.173267326732674, "grad_norm": 0.3974602520465851, "learning_rate": 4.6779343028016786e-05, "loss": 0.4614, "step": 65340 }, { "epoch": 16.175742574257427, "grad_norm": 0.4054079055786133, "learning_rate": 4.676559284977027e-05, "loss": 0.4629, "step": 65350 }, { "epoch": 16.178217821782177, "grad_norm": 0.42228373885154724, "learning_rate": 4.675184291715562e-05, "loss": 0.4641, "step": 65360 }, { "epoch": 16.18069306930693, "grad_norm": 0.399770051240921, "learning_rate": 4.673809323121707e-05, "loss": 0.4606, "step": 65370 }, { "epoch": 16.183168316831683, "grad_norm": 0.4029637575149536, "learning_rate": 4.672434379299882e-05, "loss": 0.4649, "step": 65380 }, { "epoch": 16.185643564356436, "grad_norm": 0.43890753388404846, "learning_rate": 4.671059460354503e-05, "loss": 0.4616, "step": 65390 }, { "epoch": 16.18811881188119, "grad_norm": 0.3986510932445526, "learning_rate": 4.669684566389988e-05, "loss": 0.4665, "step": 65400 }, { "epoch": 16.190594059405942, "grad_norm": 0.44396719336509705, "learning_rate": 4.6683096975107496e-05, "loss": 0.4603, "step": 65410 }, { "epoch": 16.193069306930692, "grad_norm": 0.41859644651412964, "learning_rate": 4.6669348538212e-05, "loss": 0.4651, "step": 65420 }, { "epoch": 16.195544554455445, "grad_norm": 0.42379701137542725, "learning_rate": 4.665560035425752e-05, "loss": 0.4639, "step": 65430 }, { "epoch": 16.198019801980198, "grad_norm": 0.3985842764377594, "learning_rate": 4.664185242428812e-05, "loss": 0.4647, "step": 65440 }, { "epoch": 16.20049504950495, "grad_norm": 0.41469258069992065, "learning_rate": 4.662810474934787e-05, "loss": 0.4675, "step": 65450 }, { "epoch": 16.202970297029704, "grad_norm": 0.40965092182159424, "learning_rate": 4.661435733048079e-05, "loss": 0.4661, "step": 65460 }, { "epoch": 16.205445544554454, "grad_norm": 0.3890034854412079, "learning_rate": 4.660061016873094e-05, "loss": 0.4595, "step": 65470 }, { "epoch": 16.207920792079207, "grad_norm": 0.38030996918678284, "learning_rate": 4.658686326514232e-05, "loss": 0.454, "step": 65480 }, { "epoch": 16.21039603960396, "grad_norm": 0.44398069381713867, "learning_rate": 4.657311662075889e-05, "loss": 0.4676, "step": 65490 }, { "epoch": 16.212871287128714, "grad_norm": 0.4591060280799866, "learning_rate": 4.655937023662467e-05, "loss": 0.4646, "step": 65500 }, { "epoch": 16.215346534653467, "grad_norm": 0.39845356345176697, "learning_rate": 4.654562411378355e-05, "loss": 0.4642, "step": 65510 }, { "epoch": 16.217821782178216, "grad_norm": 0.40344205498695374, "learning_rate": 4.653187825327948e-05, "loss": 0.4608, "step": 65520 }, { "epoch": 16.22029702970297, "grad_norm": 0.4395723342895508, "learning_rate": 4.6518132656156375e-05, "loss": 0.4618, "step": 65530 }, { "epoch": 16.222772277227723, "grad_norm": 0.38727137446403503, "learning_rate": 4.650438732345812e-05, "loss": 0.4665, "step": 65540 }, { "epoch": 16.225247524752476, "grad_norm": 0.412761390209198, "learning_rate": 4.649064225622859e-05, "loss": 0.4634, "step": 65550 }, { "epoch": 16.22772277227723, "grad_norm": 0.3992636203765869, "learning_rate": 4.6476897455511585e-05, "loss": 0.4666, "step": 65560 }, { "epoch": 16.230198019801982, "grad_norm": 0.42552050948143005, "learning_rate": 4.6463152922351e-05, "loss": 0.4633, "step": 65570 }, { "epoch": 16.23267326732673, "grad_norm": 0.40617135167121887, "learning_rate": 4.644940865779061e-05, "loss": 0.4652, "step": 65580 }, { "epoch": 16.235148514851485, "grad_norm": 0.4045540690422058, "learning_rate": 4.6435664662874176e-05, "loss": 0.4598, "step": 65590 }, { "epoch": 16.237623762376238, "grad_norm": 0.409507155418396, "learning_rate": 4.642192093864551e-05, "loss": 0.4614, "step": 65600 }, { "epoch": 16.24009900990099, "grad_norm": 0.4314989447593689, "learning_rate": 4.6408177486148344e-05, "loss": 0.4641, "step": 65610 }, { "epoch": 16.242574257425744, "grad_norm": 0.3786556124687195, "learning_rate": 4.639443430642635e-05, "loss": 0.4581, "step": 65620 }, { "epoch": 16.245049504950494, "grad_norm": 0.41153448820114136, "learning_rate": 4.638069140052332e-05, "loss": 0.4593, "step": 65630 }, { "epoch": 16.247524752475247, "grad_norm": 0.42350471019744873, "learning_rate": 4.6366948769482876e-05, "loss": 0.467, "step": 65640 }, { "epoch": 16.25, "grad_norm": 0.4262145161628723, "learning_rate": 4.6353206414348696e-05, "loss": 0.4649, "step": 65650 }, { "epoch": 16.252475247524753, "grad_norm": 0.4504544138908386, "learning_rate": 4.633946433616442e-05, "loss": 0.4606, "step": 65660 }, { "epoch": 16.254950495049506, "grad_norm": 0.42394810914993286, "learning_rate": 4.632572253597367e-05, "loss": 0.4631, "step": 65670 }, { "epoch": 16.257425742574256, "grad_norm": 0.38412797451019287, "learning_rate": 4.631198101482007e-05, "loss": 0.4659, "step": 65680 }, { "epoch": 16.25990099009901, "grad_norm": 0.4055861830711365, "learning_rate": 4.629823977374714e-05, "loss": 0.4633, "step": 65690 }, { "epoch": 16.262376237623762, "grad_norm": 0.4187222123146057, "learning_rate": 4.628449881379848e-05, "loss": 0.4652, "step": 65700 }, { "epoch": 16.264851485148515, "grad_norm": 0.42619606852531433, "learning_rate": 4.627075813601762e-05, "loss": 0.4582, "step": 65710 }, { "epoch": 16.26732673267327, "grad_norm": 0.40956997871398926, "learning_rate": 4.6257017741448055e-05, "loss": 0.4668, "step": 65720 }, { "epoch": 16.269801980198018, "grad_norm": 0.4053620994091034, "learning_rate": 4.6243277631133304e-05, "loss": 0.4618, "step": 65730 }, { "epoch": 16.27227722772277, "grad_norm": 0.4048449695110321, "learning_rate": 4.6229537806116834e-05, "loss": 0.4589, "step": 65740 }, { "epoch": 16.274752475247524, "grad_norm": 0.4093003273010254, "learning_rate": 4.6215798267442074e-05, "loss": 0.4673, "step": 65750 }, { "epoch": 16.277227722772277, "grad_norm": 0.4206947088241577, "learning_rate": 4.6202059016152455e-05, "loss": 0.4646, "step": 65760 }, { "epoch": 16.27970297029703, "grad_norm": 0.41724810004234314, "learning_rate": 4.61883200532914e-05, "loss": 0.458, "step": 65770 }, { "epoch": 16.282178217821784, "grad_norm": 0.40669217705726624, "learning_rate": 4.617458137990227e-05, "loss": 0.4643, "step": 65780 }, { "epoch": 16.284653465346533, "grad_norm": 0.42735129594802856, "learning_rate": 4.6160842997028444e-05, "loss": 0.4677, "step": 65790 }, { "epoch": 16.287128712871286, "grad_norm": 0.39921993017196655, "learning_rate": 4.614710490571327e-05, "loss": 0.4632, "step": 65800 }, { "epoch": 16.28960396039604, "grad_norm": 0.39083340764045715, "learning_rate": 4.613336710700004e-05, "loss": 0.4618, "step": 65810 }, { "epoch": 16.292079207920793, "grad_norm": 0.41677504777908325, "learning_rate": 4.611962960193206e-05, "loss": 0.4638, "step": 65820 }, { "epoch": 16.294554455445546, "grad_norm": 0.39816954731941223, "learning_rate": 4.6105892391552594e-05, "loss": 0.4621, "step": 65830 }, { "epoch": 16.297029702970296, "grad_norm": 0.41328510642051697, "learning_rate": 4.6092155476904915e-05, "loss": 0.4626, "step": 65840 }, { "epoch": 16.29950495049505, "grad_norm": 0.43746069073677063, "learning_rate": 4.6078418859032245e-05, "loss": 0.4661, "step": 65850 }, { "epoch": 16.301980198019802, "grad_norm": 0.3938531279563904, "learning_rate": 4.6064682538977754e-05, "loss": 0.457, "step": 65860 }, { "epoch": 16.304455445544555, "grad_norm": 0.3948988616466522, "learning_rate": 4.605094651778469e-05, "loss": 0.4648, "step": 65870 }, { "epoch": 16.306930693069308, "grad_norm": 0.4030776023864746, "learning_rate": 4.603721079649616e-05, "loss": 0.4606, "step": 65880 }, { "epoch": 16.309405940594058, "grad_norm": 0.43707460165023804, "learning_rate": 4.60234753761553e-05, "loss": 0.4657, "step": 65890 }, { "epoch": 16.31188118811881, "grad_norm": 0.3994789123535156, "learning_rate": 4.600974025780526e-05, "loss": 0.4676, "step": 65900 }, { "epoch": 16.314356435643564, "grad_norm": 0.4264979362487793, "learning_rate": 4.599600544248913e-05, "loss": 0.4637, "step": 65910 }, { "epoch": 16.316831683168317, "grad_norm": 0.3936634659767151, "learning_rate": 4.598227093124994e-05, "loss": 0.4616, "step": 65920 }, { "epoch": 16.31930693069307, "grad_norm": 0.40697479248046875, "learning_rate": 4.5968536725130754e-05, "loss": 0.4638, "step": 65930 }, { "epoch": 16.321782178217823, "grad_norm": 0.4107305109500885, "learning_rate": 4.595480282517461e-05, "loss": 0.4689, "step": 65940 }, { "epoch": 16.324257425742573, "grad_norm": 0.40028145909309387, "learning_rate": 4.5941069232424494e-05, "loss": 0.4631, "step": 65950 }, { "epoch": 16.326732673267326, "grad_norm": 0.4084720015525818, "learning_rate": 4.592733594792338e-05, "loss": 0.4649, "step": 65960 }, { "epoch": 16.32920792079208, "grad_norm": 0.42552557587623596, "learning_rate": 4.591360297271422e-05, "loss": 0.4632, "step": 65970 }, { "epoch": 16.331683168316832, "grad_norm": 0.4199501872062683, "learning_rate": 4.5899870307839964e-05, "loss": 0.4663, "step": 65980 }, { "epoch": 16.334158415841586, "grad_norm": 0.3790273070335388, "learning_rate": 4.5886137954343486e-05, "loss": 0.4574, "step": 65990 }, { "epoch": 16.336633663366335, "grad_norm": 0.40320396423339844, "learning_rate": 4.587240591326768e-05, "loss": 0.4653, "step": 66000 }, { "epoch": 16.33910891089109, "grad_norm": 0.42114564776420593, "learning_rate": 4.585867418565541e-05, "loss": 0.4609, "step": 66010 }, { "epoch": 16.34158415841584, "grad_norm": 0.3863641619682312, "learning_rate": 4.584494277254951e-05, "loss": 0.454, "step": 66020 }, { "epoch": 16.344059405940595, "grad_norm": 0.3838490843772888, "learning_rate": 4.583121167499276e-05, "loss": 0.4628, "step": 66030 }, { "epoch": 16.346534653465348, "grad_norm": 0.414209246635437, "learning_rate": 4.581748089402801e-05, "loss": 0.46, "step": 66040 }, { "epoch": 16.349009900990097, "grad_norm": 0.39605632424354553, "learning_rate": 4.5803750430697965e-05, "loss": 0.4582, "step": 66050 }, { "epoch": 16.35148514851485, "grad_norm": 0.3945838212966919, "learning_rate": 4.579002028604537e-05, "loss": 0.4651, "step": 66060 }, { "epoch": 16.353960396039604, "grad_norm": 0.3775377571582794, "learning_rate": 4.577629046111296e-05, "loss": 0.4616, "step": 66070 }, { "epoch": 16.356435643564357, "grad_norm": 0.38640958070755005, "learning_rate": 4.5762560956943426e-05, "loss": 0.4623, "step": 66080 }, { "epoch": 16.35891089108911, "grad_norm": 0.42439961433410645, "learning_rate": 4.5748831774579405e-05, "loss": 0.4603, "step": 66090 }, { "epoch": 16.361386138613863, "grad_norm": 0.43099936842918396, "learning_rate": 4.573510291506357e-05, "loss": 0.4639, "step": 66100 }, { "epoch": 16.363861386138613, "grad_norm": 0.3949735164642334, "learning_rate": 4.572137437943852e-05, "loss": 0.4593, "step": 66110 }, { "epoch": 16.366336633663366, "grad_norm": 0.4214049279689789, "learning_rate": 4.5707646168746854e-05, "loss": 0.4625, "step": 66120 }, { "epoch": 16.36881188118812, "grad_norm": 0.4189988970756531, "learning_rate": 4.5693918284031115e-05, "loss": 0.4589, "step": 66130 }, { "epoch": 16.371287128712872, "grad_norm": 0.3845464885234833, "learning_rate": 4.568019072633388e-05, "loss": 0.4611, "step": 66140 }, { "epoch": 16.373762376237625, "grad_norm": 0.38376355171203613, "learning_rate": 4.566646349669766e-05, "loss": 0.4608, "step": 66150 }, { "epoch": 16.376237623762375, "grad_norm": 0.3977300226688385, "learning_rate": 4.5652736596164905e-05, "loss": 0.4609, "step": 66160 }, { "epoch": 16.378712871287128, "grad_norm": 0.3785818815231323, "learning_rate": 4.563901002577815e-05, "loss": 0.4649, "step": 66170 }, { "epoch": 16.38118811881188, "grad_norm": 0.39752158522605896, "learning_rate": 4.562528378657979e-05, "loss": 0.4682, "step": 66180 }, { "epoch": 16.383663366336634, "grad_norm": 0.39438074827194214, "learning_rate": 4.561155787961226e-05, "loss": 0.4666, "step": 66190 }, { "epoch": 16.386138613861387, "grad_norm": 0.38985395431518555, "learning_rate": 4.5597832305917936e-05, "loss": 0.465, "step": 66200 }, { "epoch": 16.388613861386137, "grad_norm": 0.39755475521087646, "learning_rate": 4.558410706653922e-05, "loss": 0.4613, "step": 66210 }, { "epoch": 16.39108910891089, "grad_norm": 0.3923608064651489, "learning_rate": 4.557038216251841e-05, "loss": 0.4611, "step": 66220 }, { "epoch": 16.393564356435643, "grad_norm": 0.4095759391784668, "learning_rate": 4.555665759489784e-05, "loss": 0.4613, "step": 66230 }, { "epoch": 16.396039603960396, "grad_norm": 0.38426077365875244, "learning_rate": 4.5542933364719815e-05, "loss": 0.4633, "step": 66240 }, { "epoch": 16.39851485148515, "grad_norm": 0.3892022669315338, "learning_rate": 4.552920947302658e-05, "loss": 0.4646, "step": 66250 }, { "epoch": 16.400990099009903, "grad_norm": 0.38320139050483704, "learning_rate": 4.551548592086037e-05, "loss": 0.4564, "step": 66260 }, { "epoch": 16.403465346534652, "grad_norm": 0.3943519592285156, "learning_rate": 4.5501762709263426e-05, "loss": 0.4637, "step": 66270 }, { "epoch": 16.405940594059405, "grad_norm": 0.385306715965271, "learning_rate": 4.548803983927792e-05, "loss": 0.4607, "step": 66280 }, { "epoch": 16.40841584158416, "grad_norm": 0.39885711669921875, "learning_rate": 4.547431731194601e-05, "loss": 0.4599, "step": 66290 }, { "epoch": 16.41089108910891, "grad_norm": 0.39506009221076965, "learning_rate": 4.546059512830982e-05, "loss": 0.4626, "step": 66300 }, { "epoch": 16.413366336633665, "grad_norm": 0.4104597568511963, "learning_rate": 4.5446873289411486e-05, "loss": 0.4622, "step": 66310 }, { "epoch": 16.415841584158414, "grad_norm": 0.4074114263057709, "learning_rate": 4.543315179629308e-05, "loss": 0.4595, "step": 66320 }, { "epoch": 16.418316831683168, "grad_norm": 0.4031493365764618, "learning_rate": 4.541943064999663e-05, "loss": 0.4633, "step": 66330 }, { "epoch": 16.42079207920792, "grad_norm": 0.4044497609138489, "learning_rate": 4.5405709851564224e-05, "loss": 0.4584, "step": 66340 }, { "epoch": 16.423267326732674, "grad_norm": 0.4195460081100464, "learning_rate": 4.5391989402037824e-05, "loss": 0.4668, "step": 66350 }, { "epoch": 16.425742574257427, "grad_norm": 0.3990340232849121, "learning_rate": 4.537826930245941e-05, "loss": 0.4681, "step": 66360 }, { "epoch": 16.428217821782177, "grad_norm": 0.4015171527862549, "learning_rate": 4.536454955387096e-05, "loss": 0.4614, "step": 66370 }, { "epoch": 16.43069306930693, "grad_norm": 0.4037661552429199, "learning_rate": 4.535083015731437e-05, "loss": 0.4646, "step": 66380 }, { "epoch": 16.433168316831683, "grad_norm": 0.3970927298069, "learning_rate": 4.533711111383155e-05, "loss": 0.4639, "step": 66390 }, { "epoch": 16.435643564356436, "grad_norm": 0.40477612614631653, "learning_rate": 4.532339242446436e-05, "loss": 0.4585, "step": 66400 }, { "epoch": 16.43811881188119, "grad_norm": 0.4070747494697571, "learning_rate": 4.530967409025466e-05, "loss": 0.4628, "step": 66410 }, { "epoch": 16.440594059405942, "grad_norm": 0.38677120208740234, "learning_rate": 4.529595611224427e-05, "loss": 0.4619, "step": 66420 }, { "epoch": 16.443069306930692, "grad_norm": 0.4167737662792206, "learning_rate": 4.528223849147494e-05, "loss": 0.457, "step": 66430 }, { "epoch": 16.445544554455445, "grad_norm": 0.4235958755016327, "learning_rate": 4.526852122898848e-05, "loss": 0.4695, "step": 66440 }, { "epoch": 16.448019801980198, "grad_norm": 0.43758150935173035, "learning_rate": 4.525480432582662e-05, "loss": 0.4592, "step": 66450 }, { "epoch": 16.45049504950495, "grad_norm": 0.39418184757232666, "learning_rate": 4.524108778303102e-05, "loss": 0.4649, "step": 66460 }, { "epoch": 16.452970297029704, "grad_norm": 0.39299798011779785, "learning_rate": 4.522737160164344e-05, "loss": 0.4626, "step": 66470 }, { "epoch": 16.455445544554454, "grad_norm": 0.3986590802669525, "learning_rate": 4.5213655782705464e-05, "loss": 0.4623, "step": 66480 }, { "epoch": 16.457920792079207, "grad_norm": 0.39646607637405396, "learning_rate": 4.519994032725876e-05, "loss": 0.4682, "step": 66490 }, { "epoch": 16.46039603960396, "grad_norm": 0.4018208980560303, "learning_rate": 4.5186225236344886e-05, "loss": 0.4596, "step": 66500 }, { "epoch": 16.462871287128714, "grad_norm": 0.41131144762039185, "learning_rate": 4.517251051100546e-05, "loss": 0.4602, "step": 66510 }, { "epoch": 16.465346534653467, "grad_norm": 0.3885403573513031, "learning_rate": 4.5158796152282e-05, "loss": 0.4645, "step": 66520 }, { "epoch": 16.467821782178216, "grad_norm": 0.40930843353271484, "learning_rate": 4.5145082161216004e-05, "loss": 0.4625, "step": 66530 }, { "epoch": 16.47029702970297, "grad_norm": 0.41184407472610474, "learning_rate": 4.5131368538848993e-05, "loss": 0.4682, "step": 66540 }, { "epoch": 16.472772277227723, "grad_norm": 0.4060671925544739, "learning_rate": 4.511765528622241e-05, "loss": 0.4681, "step": 66550 }, { "epoch": 16.475247524752476, "grad_norm": 0.40459713339805603, "learning_rate": 4.5103942404377705e-05, "loss": 0.4606, "step": 66560 }, { "epoch": 16.47772277227723, "grad_norm": 0.4126116633415222, "learning_rate": 4.509022989435622e-05, "loss": 0.4642, "step": 66570 }, { "epoch": 16.480198019801982, "grad_norm": 0.4367951452732086, "learning_rate": 4.507651775719941e-05, "loss": 0.4689, "step": 66580 }, { "epoch": 16.48267326732673, "grad_norm": 0.3992130160331726, "learning_rate": 4.5062805993948574e-05, "loss": 0.4577, "step": 66590 }, { "epoch": 16.485148514851485, "grad_norm": 0.4171733260154724, "learning_rate": 4.504909460564502e-05, "loss": 0.4616, "step": 66600 }, { "epoch": 16.487623762376238, "grad_norm": 0.40172362327575684, "learning_rate": 4.5035383593330085e-05, "loss": 0.4633, "step": 66610 }, { "epoch": 16.49009900990099, "grad_norm": 0.41096991300582886, "learning_rate": 4.502167295804501e-05, "loss": 0.4614, "step": 66620 }, { "epoch": 16.492574257425744, "grad_norm": 0.40164971351623535, "learning_rate": 4.500796270083098e-05, "loss": 0.4554, "step": 66630 }, { "epoch": 16.495049504950494, "grad_norm": 0.3922439217567444, "learning_rate": 4.4994252822729274e-05, "loss": 0.46, "step": 66640 }, { "epoch": 16.497524752475247, "grad_norm": 0.39117559790611267, "learning_rate": 4.498054332478101e-05, "loss": 0.4573, "step": 66650 }, { "epoch": 16.5, "grad_norm": 0.3952556848526001, "learning_rate": 4.496683420802737e-05, "loss": 0.4693, "step": 66660 }, { "epoch": 16.502475247524753, "grad_norm": 0.43701642751693726, "learning_rate": 4.495312547350944e-05, "loss": 0.4621, "step": 66670 }, { "epoch": 16.504950495049506, "grad_norm": 0.401186466217041, "learning_rate": 4.493941712226834e-05, "loss": 0.4668, "step": 66680 }, { "epoch": 16.507425742574256, "grad_norm": 0.40698400139808655, "learning_rate": 4.492570915534511e-05, "loss": 0.4623, "step": 66690 }, { "epoch": 16.50990099009901, "grad_norm": 0.4063383638858795, "learning_rate": 4.491200157378077e-05, "loss": 0.4631, "step": 66700 }, { "epoch": 16.512376237623762, "grad_norm": 0.413252055644989, "learning_rate": 4.489829437861635e-05, "loss": 0.4607, "step": 66710 }, { "epoch": 16.514851485148515, "grad_norm": 0.4160444438457489, "learning_rate": 4.488458757089279e-05, "loss": 0.4585, "step": 66720 }, { "epoch": 16.51732673267327, "grad_norm": 0.3887125551700592, "learning_rate": 4.4870881151651036e-05, "loss": 0.4643, "step": 66730 }, { "epoch": 16.519801980198018, "grad_norm": 0.40861743688583374, "learning_rate": 4.4857175121932026e-05, "loss": 0.461, "step": 66740 }, { "epoch": 16.52227722772277, "grad_norm": 0.40916457772254944, "learning_rate": 4.4843469482776634e-05, "loss": 0.4594, "step": 66750 }, { "epoch": 16.524752475247524, "grad_norm": 0.3940983712673187, "learning_rate": 4.4829764235225695e-05, "loss": 0.4663, "step": 66760 }, { "epoch": 16.527227722772277, "grad_norm": 0.39472559094429016, "learning_rate": 4.481605938032003e-05, "loss": 0.4603, "step": 66770 }, { "epoch": 16.52970297029703, "grad_norm": 0.39433321356773376, "learning_rate": 4.480235491910045e-05, "loss": 0.4653, "step": 66780 }, { "epoch": 16.532178217821784, "grad_norm": 0.4037609398365021, "learning_rate": 4.478865085260772e-05, "loss": 0.46, "step": 66790 }, { "epoch": 16.534653465346533, "grad_norm": 0.4119497537612915, "learning_rate": 4.477494718188256e-05, "loss": 0.4601, "step": 66800 }, { "epoch": 16.537128712871286, "grad_norm": 0.4193311631679535, "learning_rate": 4.476124390796569e-05, "loss": 0.463, "step": 66810 }, { "epoch": 16.53960396039604, "grad_norm": 0.38819411396980286, "learning_rate": 4.474754103189777e-05, "loss": 0.4613, "step": 66820 }, { "epoch": 16.542079207920793, "grad_norm": 0.38596466183662415, "learning_rate": 4.473383855471942e-05, "loss": 0.4657, "step": 66830 }, { "epoch": 16.544554455445546, "grad_norm": 0.40186551213264465, "learning_rate": 4.4720136477471304e-05, "loss": 0.4628, "step": 66840 }, { "epoch": 16.547029702970296, "grad_norm": 0.4142504930496216, "learning_rate": 4.4706434801193966e-05, "loss": 0.4624, "step": 66850 }, { "epoch": 16.54950495049505, "grad_norm": 0.4088940918445587, "learning_rate": 4.469273352692799e-05, "loss": 0.4598, "step": 66860 }, { "epoch": 16.551980198019802, "grad_norm": 0.3989747166633606, "learning_rate": 4.467903265571385e-05, "loss": 0.4547, "step": 66870 }, { "epoch": 16.554455445544555, "grad_norm": 0.392657607793808, "learning_rate": 4.4665332188592084e-05, "loss": 0.4652, "step": 66880 }, { "epoch": 16.556930693069308, "grad_norm": 0.412692666053772, "learning_rate": 4.4651632126603133e-05, "loss": 0.4606, "step": 66890 }, { "epoch": 16.55940594059406, "grad_norm": 0.39365947246551514, "learning_rate": 4.4637932470787406e-05, "loss": 0.4604, "step": 66900 }, { "epoch": 16.56188118811881, "grad_norm": 0.38402408361434937, "learning_rate": 4.462423322218534e-05, "loss": 0.4625, "step": 66910 }, { "epoch": 16.564356435643564, "grad_norm": 0.4177553355693817, "learning_rate": 4.4610534381837285e-05, "loss": 0.4595, "step": 66920 }, { "epoch": 16.566831683168317, "grad_norm": 0.4279825687408447, "learning_rate": 4.4596835950783555e-05, "loss": 0.4635, "step": 66930 }, { "epoch": 16.56930693069307, "grad_norm": 0.41899025440216064, "learning_rate": 4.45831379300645e-05, "loss": 0.4591, "step": 66940 }, { "epoch": 16.571782178217823, "grad_norm": 0.390260249376297, "learning_rate": 4.456944032072037e-05, "loss": 0.4634, "step": 66950 }, { "epoch": 16.574257425742573, "grad_norm": 0.3902193605899811, "learning_rate": 4.45557431237914e-05, "loss": 0.4664, "step": 66960 }, { "epoch": 16.576732673267326, "grad_norm": 0.4094404876232147, "learning_rate": 4.45420463403178e-05, "loss": 0.4603, "step": 66970 }, { "epoch": 16.57920792079208, "grad_norm": 0.3796340823173523, "learning_rate": 4.4528349971339775e-05, "loss": 0.4646, "step": 66980 }, { "epoch": 16.581683168316832, "grad_norm": 0.4019617736339569, "learning_rate": 4.451465401789747e-05, "loss": 0.4607, "step": 66990 }, { "epoch": 16.584158415841586, "grad_norm": 0.413560152053833, "learning_rate": 4.450095848103097e-05, "loss": 0.4663, "step": 67000 }, { "epoch": 16.586633663366335, "grad_norm": 0.3973037302494049, "learning_rate": 4.44872633617804e-05, "loss": 0.4613, "step": 67010 }, { "epoch": 16.58910891089109, "grad_norm": 0.406139075756073, "learning_rate": 4.447356866118579e-05, "loss": 0.4606, "step": 67020 }, { "epoch": 16.59158415841584, "grad_norm": 0.4195728600025177, "learning_rate": 4.445987438028718e-05, "loss": 0.4652, "step": 67030 }, { "epoch": 16.594059405940595, "grad_norm": 0.38939160108566284, "learning_rate": 4.444618052012452e-05, "loss": 0.462, "step": 67040 }, { "epoch": 16.596534653465348, "grad_norm": 0.39475560188293457, "learning_rate": 4.443248708173783e-05, "loss": 0.4597, "step": 67050 }, { "epoch": 16.599009900990097, "grad_norm": 0.39964932203292847, "learning_rate": 4.4418794066166994e-05, "loss": 0.4661, "step": 67060 }, { "epoch": 16.60148514851485, "grad_norm": 0.3942404091358185, "learning_rate": 4.44051014744519e-05, "loss": 0.4687, "step": 67070 }, { "epoch": 16.603960396039604, "grad_norm": 0.3762909173965454, "learning_rate": 4.4391409307632434e-05, "loss": 0.4641, "step": 67080 }, { "epoch": 16.606435643564357, "grad_norm": 0.39532315731048584, "learning_rate": 4.4377717566748424e-05, "loss": 0.4642, "step": 67090 }, { "epoch": 16.60891089108911, "grad_norm": 0.4174122214317322, "learning_rate": 4.436402625283965e-05, "loss": 0.4646, "step": 67100 }, { "epoch": 16.611386138613863, "grad_norm": 0.4228416681289673, "learning_rate": 4.4350335366945905e-05, "loss": 0.4598, "step": 67110 }, { "epoch": 16.613861386138613, "grad_norm": 0.410400927066803, "learning_rate": 4.43366449101069e-05, "loss": 0.4683, "step": 67120 }, { "epoch": 16.616336633663366, "grad_norm": 0.3887132704257965, "learning_rate": 4.4322954883362336e-05, "loss": 0.4614, "step": 67130 }, { "epoch": 16.61881188118812, "grad_norm": 0.40299931168556213, "learning_rate": 4.4309265287751875e-05, "loss": 0.4664, "step": 67140 }, { "epoch": 16.621287128712872, "grad_norm": 0.3986513912677765, "learning_rate": 4.429557612431516e-05, "loss": 0.4667, "step": 67150 }, { "epoch": 16.623762376237625, "grad_norm": 0.40694093704223633, "learning_rate": 4.4281887394091814e-05, "loss": 0.4647, "step": 67160 }, { "epoch": 16.626237623762375, "grad_norm": 0.3912188708782196, "learning_rate": 4.426819909812135e-05, "loss": 0.4669, "step": 67170 }, { "epoch": 16.628712871287128, "grad_norm": 0.4104647934436798, "learning_rate": 4.425451123744337e-05, "loss": 0.4651, "step": 67180 }, { "epoch": 16.63118811881188, "grad_norm": 0.40362676978111267, "learning_rate": 4.424082381309733e-05, "loss": 0.4636, "step": 67190 }, { "epoch": 16.633663366336634, "grad_norm": 0.4363099932670593, "learning_rate": 4.422713682612271e-05, "loss": 0.4593, "step": 67200 }, { "epoch": 16.636138613861387, "grad_norm": 0.4059889018535614, "learning_rate": 4.421345027755896e-05, "loss": 0.4683, "step": 67210 }, { "epoch": 16.638613861386137, "grad_norm": 0.4206155836582184, "learning_rate": 4.4199764168445476e-05, "loss": 0.4668, "step": 67220 }, { "epoch": 16.64108910891089, "grad_norm": 0.39053499698638916, "learning_rate": 4.4186078499821627e-05, "loss": 0.4616, "step": 67230 }, { "epoch": 16.643564356435643, "grad_norm": 0.42431002855300903, "learning_rate": 4.4172393272726725e-05, "loss": 0.4631, "step": 67240 }, { "epoch": 16.646039603960396, "grad_norm": 0.41337692737579346, "learning_rate": 4.4158708488200114e-05, "loss": 0.4726, "step": 67250 }, { "epoch": 16.64851485148515, "grad_norm": 0.38507309556007385, "learning_rate": 4.4145024147281036e-05, "loss": 0.4648, "step": 67260 }, { "epoch": 16.650990099009903, "grad_norm": 0.3856373727321625, "learning_rate": 4.413134025100873e-05, "loss": 0.4616, "step": 67270 }, { "epoch": 16.653465346534652, "grad_norm": 0.38947808742523193, "learning_rate": 4.411765680042241e-05, "loss": 0.4593, "step": 67280 }, { "epoch": 16.655940594059405, "grad_norm": 0.41345134377479553, "learning_rate": 4.410397379656125e-05, "loss": 0.4655, "step": 67290 }, { "epoch": 16.65841584158416, "grad_norm": 0.3943387567996979, "learning_rate": 4.409029124046435e-05, "loss": 0.4735, "step": 67300 }, { "epoch": 16.66089108910891, "grad_norm": 0.40485644340515137, "learning_rate": 4.407660913317084e-05, "loss": 0.4669, "step": 67310 }, { "epoch": 16.663366336633665, "grad_norm": 0.4088256061077118, "learning_rate": 4.4062927475719766e-05, "loss": 0.4572, "step": 67320 }, { "epoch": 16.665841584158414, "grad_norm": 0.40303295850753784, "learning_rate": 4.404924626915019e-05, "loss": 0.4661, "step": 67330 }, { "epoch": 16.668316831683168, "grad_norm": 0.4068496823310852, "learning_rate": 4.403556551450105e-05, "loss": 0.4582, "step": 67340 }, { "epoch": 16.67079207920792, "grad_norm": 0.38456976413726807, "learning_rate": 4.402188521281138e-05, "loss": 0.4708, "step": 67350 }, { "epoch": 16.673267326732674, "grad_norm": 0.38850486278533936, "learning_rate": 4.400820536512007e-05, "loss": 0.4623, "step": 67360 }, { "epoch": 16.675742574257427, "grad_norm": 0.41626405715942383, "learning_rate": 4.3994525972466006e-05, "loss": 0.4645, "step": 67370 }, { "epoch": 16.678217821782177, "grad_norm": 0.37796011567115784, "learning_rate": 4.3980847035888074e-05, "loss": 0.4616, "step": 67380 }, { "epoch": 16.68069306930693, "grad_norm": 0.39247748255729675, "learning_rate": 4.3967168556425085e-05, "loss": 0.4605, "step": 67390 }, { "epoch": 16.683168316831683, "grad_norm": 0.3858839273452759, "learning_rate": 4.395349053511584e-05, "loss": 0.4663, "step": 67400 }, { "epoch": 16.685643564356436, "grad_norm": 0.38522869348526, "learning_rate": 4.393981297299907e-05, "loss": 0.4607, "step": 67410 }, { "epoch": 16.68811881188119, "grad_norm": 0.39789363741874695, "learning_rate": 4.392613587111351e-05, "loss": 0.4607, "step": 67420 }, { "epoch": 16.69059405940594, "grad_norm": 0.40151116251945496, "learning_rate": 4.391245923049786e-05, "loss": 0.4637, "step": 67430 }, { "epoch": 16.693069306930692, "grad_norm": 0.4130855202674866, "learning_rate": 4.389878305219074e-05, "loss": 0.4645, "step": 67440 }, { "epoch": 16.695544554455445, "grad_norm": 0.43082883954048157, "learning_rate": 4.388510733723079e-05, "loss": 0.4609, "step": 67450 }, { "epoch": 16.698019801980198, "grad_norm": 0.4295928478240967, "learning_rate": 4.38714320866566e-05, "loss": 0.468, "step": 67460 }, { "epoch": 16.70049504950495, "grad_norm": 0.4183633625507355, "learning_rate": 4.3857757301506664e-05, "loss": 0.4598, "step": 67470 }, { "epoch": 16.702970297029704, "grad_norm": 0.4078315198421478, "learning_rate": 4.384408298281956e-05, "loss": 0.4631, "step": 67480 }, { "epoch": 16.705445544554454, "grad_norm": 0.41253599524497986, "learning_rate": 4.3830409131633715e-05, "loss": 0.4659, "step": 67490 }, { "epoch": 16.707920792079207, "grad_norm": 0.3987802565097809, "learning_rate": 4.381673574898758e-05, "loss": 0.4645, "step": 67500 }, { "epoch": 16.71039603960396, "grad_norm": 0.3970479965209961, "learning_rate": 4.3803062835919547e-05, "loss": 0.4603, "step": 67510 }, { "epoch": 16.712871287128714, "grad_norm": 0.41399189829826355, "learning_rate": 4.378939039346802e-05, "loss": 0.464, "step": 67520 }, { "epoch": 16.715346534653467, "grad_norm": 0.3792743980884552, "learning_rate": 4.377571842267129e-05, "loss": 0.4608, "step": 67530 }, { "epoch": 16.717821782178216, "grad_norm": 0.3766933083534241, "learning_rate": 4.3762046924567665e-05, "loss": 0.47, "step": 67540 }, { "epoch": 16.72029702970297, "grad_norm": 0.38025879859924316, "learning_rate": 4.374837590019541e-05, "loss": 0.4588, "step": 67550 }, { "epoch": 16.722772277227723, "grad_norm": 0.4057876169681549, "learning_rate": 4.3734705350592765e-05, "loss": 0.4714, "step": 67560 }, { "epoch": 16.725247524752476, "grad_norm": 0.3983391225337982, "learning_rate": 4.372103527679788e-05, "loss": 0.4676, "step": 67570 }, { "epoch": 16.72772277227723, "grad_norm": 0.4208782911300659, "learning_rate": 4.370736567984894e-05, "loss": 0.4623, "step": 67580 }, { "epoch": 16.730198019801982, "grad_norm": 0.4006892442703247, "learning_rate": 4.369369656078406e-05, "loss": 0.4704, "step": 67590 }, { "epoch": 16.73267326732673, "grad_norm": 0.44494882225990295, "learning_rate": 4.368002792064129e-05, "loss": 0.4649, "step": 67600 }, { "epoch": 16.735148514851485, "grad_norm": 0.3991153836250305, "learning_rate": 4.366635976045869e-05, "loss": 0.4641, "step": 67610 }, { "epoch": 16.737623762376238, "grad_norm": 0.3813631236553192, "learning_rate": 4.3652692081274276e-05, "loss": 0.4601, "step": 67620 }, { "epoch": 16.74009900990099, "grad_norm": 0.400333434343338, "learning_rate": 4.3639024884126023e-05, "loss": 0.4587, "step": 67630 }, { "epoch": 16.742574257425744, "grad_norm": 0.389137864112854, "learning_rate": 4.3625358170051814e-05, "loss": 0.4617, "step": 67640 }, { "epoch": 16.745049504950494, "grad_norm": 0.4050292670726776, "learning_rate": 4.3611691940089616e-05, "loss": 0.4622, "step": 67650 }, { "epoch": 16.747524752475247, "grad_norm": 0.39449208974838257, "learning_rate": 4.359802619527725e-05, "loss": 0.4624, "step": 67660 }, { "epoch": 16.75, "grad_norm": 0.40358155965805054, "learning_rate": 4.358436093665253e-05, "loss": 0.4613, "step": 67670 }, { "epoch": 16.752475247524753, "grad_norm": 0.39196375012397766, "learning_rate": 4.3570696165253264e-05, "loss": 0.4607, "step": 67680 }, { "epoch": 16.754950495049506, "grad_norm": 0.38742297887802124, "learning_rate": 4.35570318821172e-05, "loss": 0.4653, "step": 67690 }, { "epoch": 16.757425742574256, "grad_norm": 0.4104333817958832, "learning_rate": 4.354336808828206e-05, "loss": 0.4633, "step": 67700 }, { "epoch": 16.75990099009901, "grad_norm": 0.38343384861946106, "learning_rate": 4.352970478478548e-05, "loss": 0.4623, "step": 67710 }, { "epoch": 16.762376237623762, "grad_norm": 0.40379977226257324, "learning_rate": 4.3516041972665125e-05, "loss": 0.4618, "step": 67720 }, { "epoch": 16.764851485148515, "grad_norm": 0.3914257884025574, "learning_rate": 4.35023796529586e-05, "loss": 0.4587, "step": 67730 }, { "epoch": 16.76732673267327, "grad_norm": 0.40854576230049133, "learning_rate": 4.348871782670345e-05, "loss": 0.4625, "step": 67740 }, { "epoch": 16.769801980198018, "grad_norm": 0.3662225902080536, "learning_rate": 4.3475056494937214e-05, "loss": 0.4647, "step": 67750 }, { "epoch": 16.77227722772277, "grad_norm": 0.39817318320274353, "learning_rate": 4.3461395658697384e-05, "loss": 0.4682, "step": 67760 }, { "epoch": 16.774752475247524, "grad_norm": 0.42035388946533203, "learning_rate": 4.34477353190214e-05, "loss": 0.4628, "step": 67770 }, { "epoch": 16.777227722772277, "grad_norm": 0.4184127748012543, "learning_rate": 4.3434075476946665e-05, "loss": 0.457, "step": 67780 }, { "epoch": 16.77970297029703, "grad_norm": 0.3922857642173767, "learning_rate": 4.342041613351058e-05, "loss": 0.4639, "step": 67790 }, { "epoch": 16.782178217821784, "grad_norm": 0.4018504023551941, "learning_rate": 4.340675728975046e-05, "loss": 0.4646, "step": 67800 }, { "epoch": 16.784653465346533, "grad_norm": 0.3924741744995117, "learning_rate": 4.33930989467036e-05, "loss": 0.4639, "step": 67810 }, { "epoch": 16.787128712871286, "grad_norm": 0.38840925693511963, "learning_rate": 4.3379441105407306e-05, "loss": 0.4572, "step": 67820 }, { "epoch": 16.78960396039604, "grad_norm": 0.4046109914779663, "learning_rate": 4.336578376689875e-05, "loss": 0.4603, "step": 67830 }, { "epoch": 16.792079207920793, "grad_norm": 0.39819538593292236, "learning_rate": 4.3352126932215126e-05, "loss": 0.4669, "step": 67840 }, { "epoch": 16.794554455445546, "grad_norm": 0.3949044644832611, "learning_rate": 4.33384706023936e-05, "loss": 0.4581, "step": 67850 }, { "epoch": 16.797029702970296, "grad_norm": 0.39236512780189514, "learning_rate": 4.332481477847127e-05, "loss": 0.4655, "step": 67860 }, { "epoch": 16.79950495049505, "grad_norm": 0.3735702633857727, "learning_rate": 4.331115946148522e-05, "loss": 0.4657, "step": 67870 }, { "epoch": 16.801980198019802, "grad_norm": 0.3900485038757324, "learning_rate": 4.329750465247243e-05, "loss": 0.4587, "step": 67880 }, { "epoch": 16.804455445544555, "grad_norm": 0.4158833622932434, "learning_rate": 4.3283850352469956e-05, "loss": 0.4619, "step": 67890 }, { "epoch": 16.806930693069308, "grad_norm": 0.38883018493652344, "learning_rate": 4.327019656251472e-05, "loss": 0.4602, "step": 67900 }, { "epoch": 16.80940594059406, "grad_norm": 0.39094507694244385, "learning_rate": 4.325654328364363e-05, "loss": 0.4569, "step": 67910 }, { "epoch": 16.81188118811881, "grad_norm": 0.3998451828956604, "learning_rate": 4.324289051689359e-05, "loss": 0.4663, "step": 67920 }, { "epoch": 16.814356435643564, "grad_norm": 0.3897075653076172, "learning_rate": 4.3229238263301425e-05, "loss": 0.4576, "step": 67930 }, { "epoch": 16.816831683168317, "grad_norm": 0.3977862298488617, "learning_rate": 4.3215586523903907e-05, "loss": 0.4626, "step": 67940 }, { "epoch": 16.81930693069307, "grad_norm": 0.4060142934322357, "learning_rate": 4.320193529973785e-05, "loss": 0.4608, "step": 67950 }, { "epoch": 16.821782178217823, "grad_norm": 0.4176364243030548, "learning_rate": 4.318828459183992e-05, "loss": 0.4593, "step": 67960 }, { "epoch": 16.824257425742573, "grad_norm": 0.37170708179473877, "learning_rate": 4.317463440124684e-05, "loss": 0.4606, "step": 67970 }, { "epoch": 16.826732673267326, "grad_norm": 0.3951549828052521, "learning_rate": 4.316098472899521e-05, "loss": 0.4654, "step": 67980 }, { "epoch": 16.82920792079208, "grad_norm": 0.4144608676433563, "learning_rate": 4.314733557612167e-05, "loss": 0.4641, "step": 67990 }, { "epoch": 16.831683168316832, "grad_norm": 0.3947773873806, "learning_rate": 4.313368694366278e-05, "loss": 0.4666, "step": 68000 }, { "epoch": 16.834158415841586, "grad_norm": 0.41391026973724365, "learning_rate": 4.3120038832655035e-05, "loss": 0.4647, "step": 68010 }, { "epoch": 16.836633663366335, "grad_norm": 0.4289913475513458, "learning_rate": 4.3106391244134946e-05, "loss": 0.4659, "step": 68020 }, { "epoch": 16.83910891089109, "grad_norm": 0.4197275936603546, "learning_rate": 4.309274417913895e-05, "loss": 0.4688, "step": 68030 }, { "epoch": 16.84158415841584, "grad_norm": 0.42780542373657227, "learning_rate": 4.3079097638703436e-05, "loss": 0.4628, "step": 68040 }, { "epoch": 16.844059405940595, "grad_norm": 0.41990697383880615, "learning_rate": 4.306545162386479e-05, "loss": 0.4626, "step": 68050 }, { "epoch": 16.846534653465348, "grad_norm": 0.42597559094429016, "learning_rate": 4.305180613565935e-05, "loss": 0.4627, "step": 68060 }, { "epoch": 16.849009900990097, "grad_norm": 0.39893215894699097, "learning_rate": 4.303816117512336e-05, "loss": 0.4662, "step": 68070 }, { "epoch": 16.85148514851485, "grad_norm": 0.4355306029319763, "learning_rate": 4.302451674329308e-05, "loss": 0.4642, "step": 68080 }, { "epoch": 16.853960396039604, "grad_norm": 0.4401584565639496, "learning_rate": 4.3010872841204726e-05, "loss": 0.4643, "step": 68090 }, { "epoch": 16.856435643564357, "grad_norm": 0.40544548630714417, "learning_rate": 4.2997229469894456e-05, "loss": 0.4669, "step": 68100 }, { "epoch": 16.85891089108911, "grad_norm": 0.38995182514190674, "learning_rate": 4.2983586630398384e-05, "loss": 0.4608, "step": 68110 }, { "epoch": 16.861386138613863, "grad_norm": 0.4071565270423889, "learning_rate": 4.296994432375262e-05, "loss": 0.4599, "step": 68120 }, { "epoch": 16.863861386138613, "grad_norm": 0.4023283123970032, "learning_rate": 4.295630255099319e-05, "loss": 0.4625, "step": 68130 }, { "epoch": 16.866336633663366, "grad_norm": 0.3608205020427704, "learning_rate": 4.2942661313156075e-05, "loss": 0.4601, "step": 68140 }, { "epoch": 16.86881188118812, "grad_norm": 0.3813001811504364, "learning_rate": 4.2929020611277274e-05, "loss": 0.4597, "step": 68150 }, { "epoch": 16.871287128712872, "grad_norm": 0.3738459348678589, "learning_rate": 4.29153804463927e-05, "loss": 0.4672, "step": 68160 }, { "epoch": 16.873762376237625, "grad_norm": 0.38650012016296387, "learning_rate": 4.290174081953823e-05, "loss": 0.4591, "step": 68170 }, { "epoch": 16.876237623762375, "grad_norm": 0.39680251479148865, "learning_rate": 4.2888101731749666e-05, "loss": 0.4669, "step": 68180 }, { "epoch": 16.878712871287128, "grad_norm": 0.3629950284957886, "learning_rate": 4.287446318406287e-05, "loss": 0.4593, "step": 68190 }, { "epoch": 16.88118811881188, "grad_norm": 0.3820855915546417, "learning_rate": 4.286082517751357e-05, "loss": 0.4638, "step": 68200 }, { "epoch": 16.883663366336634, "grad_norm": 0.41004833579063416, "learning_rate": 4.284718771313747e-05, "loss": 0.4597, "step": 68210 }, { "epoch": 16.886138613861387, "grad_norm": 0.3998149633407593, "learning_rate": 4.283355079197027e-05, "loss": 0.463, "step": 68220 }, { "epoch": 16.888613861386137, "grad_norm": 0.3972175121307373, "learning_rate": 4.28199144150476e-05, "loss": 0.4638, "step": 68230 }, { "epoch": 16.89108910891089, "grad_norm": 0.4094051718711853, "learning_rate": 4.280627858340503e-05, "loss": 0.4641, "step": 68240 }, { "epoch": 16.893564356435643, "grad_norm": 0.39885959029197693, "learning_rate": 4.279264329807813e-05, "loss": 0.4602, "step": 68250 }, { "epoch": 16.896039603960396, "grad_norm": 0.3780154883861542, "learning_rate": 4.277900856010241e-05, "loss": 0.4596, "step": 68260 }, { "epoch": 16.89851485148515, "grad_norm": 0.3949078619480133, "learning_rate": 4.276537437051333e-05, "loss": 0.4586, "step": 68270 }, { "epoch": 16.900990099009903, "grad_norm": 0.38566485047340393, "learning_rate": 4.275174073034632e-05, "loss": 0.4597, "step": 68280 }, { "epoch": 16.903465346534652, "grad_norm": 0.38879379630088806, "learning_rate": 4.273810764063678e-05, "loss": 0.4651, "step": 68290 }, { "epoch": 16.905940594059405, "grad_norm": 0.38734737038612366, "learning_rate": 4.2724475102420045e-05, "loss": 0.4646, "step": 68300 }, { "epoch": 16.90841584158416, "grad_norm": 0.4144049286842346, "learning_rate": 4.271084311673138e-05, "loss": 0.4635, "step": 68310 }, { "epoch": 16.91089108910891, "grad_norm": 0.40336620807647705, "learning_rate": 4.269721168460611e-05, "loss": 0.4595, "step": 68320 }, { "epoch": 16.913366336633665, "grad_norm": 0.4063796401023865, "learning_rate": 4.268358080707941e-05, "loss": 0.4612, "step": 68330 }, { "epoch": 16.915841584158414, "grad_norm": 0.41113874316215515, "learning_rate": 4.266995048518647e-05, "loss": 0.4629, "step": 68340 }, { "epoch": 16.918316831683168, "grad_norm": 0.4189111888408661, "learning_rate": 4.265632071996238e-05, "loss": 0.4604, "step": 68350 }, { "epoch": 16.92079207920792, "grad_norm": 0.4098854660987854, "learning_rate": 4.2642691512442315e-05, "loss": 0.4618, "step": 68360 }, { "epoch": 16.923267326732674, "grad_norm": 0.39104679226875305, "learning_rate": 4.262906286366125e-05, "loss": 0.4608, "step": 68370 }, { "epoch": 16.925742574257427, "grad_norm": 0.38821709156036377, "learning_rate": 4.261543477465421e-05, "loss": 0.459, "step": 68380 }, { "epoch": 16.928217821782177, "grad_norm": 0.35850492119789124, "learning_rate": 4.260180724645618e-05, "loss": 0.4584, "step": 68390 }, { "epoch": 16.93069306930693, "grad_norm": 0.4010511636734009, "learning_rate": 4.2588180280102064e-05, "loss": 0.4617, "step": 68400 }, { "epoch": 16.933168316831683, "grad_norm": 0.374199241399765, "learning_rate": 4.257455387662673e-05, "loss": 0.4617, "step": 68410 }, { "epoch": 16.935643564356436, "grad_norm": 0.40655821561813354, "learning_rate": 4.2560928037065035e-05, "loss": 0.4675, "step": 68420 }, { "epoch": 16.93811881188119, "grad_norm": 0.38269519805908203, "learning_rate": 4.254730276245177e-05, "loss": 0.4615, "step": 68430 }, { "epoch": 16.94059405940594, "grad_norm": 0.39609912037849426, "learning_rate": 4.253367805382167e-05, "loss": 0.4648, "step": 68440 }, { "epoch": 16.943069306930692, "grad_norm": 0.419944167137146, "learning_rate": 4.252005391220943e-05, "loss": 0.4649, "step": 68450 }, { "epoch": 16.945544554455445, "grad_norm": 0.37825801968574524, "learning_rate": 4.250643033864976e-05, "loss": 0.4645, "step": 68460 }, { "epoch": 16.948019801980198, "grad_norm": 0.397494912147522, "learning_rate": 4.249280733417725e-05, "loss": 0.4639, "step": 68470 }, { "epoch": 16.95049504950495, "grad_norm": 0.3710395097732544, "learning_rate": 4.247918489982645e-05, "loss": 0.4579, "step": 68480 }, { "epoch": 16.952970297029704, "grad_norm": 0.38998138904571533, "learning_rate": 4.246556303663196e-05, "loss": 0.4629, "step": 68490 }, { "epoch": 16.955445544554454, "grad_norm": 0.40519022941589355, "learning_rate": 4.245194174562822e-05, "loss": 0.4665, "step": 68500 }, { "epoch": 16.957920792079207, "grad_norm": 0.38306495547294617, "learning_rate": 4.243832102784968e-05, "loss": 0.4658, "step": 68510 }, { "epoch": 16.96039603960396, "grad_norm": 0.40602999925613403, "learning_rate": 4.242470088433077e-05, "loss": 0.4627, "step": 68520 }, { "epoch": 16.962871287128714, "grad_norm": 0.37951573729515076, "learning_rate": 4.2411081316105824e-05, "loss": 0.4566, "step": 68530 }, { "epoch": 16.965346534653467, "grad_norm": 0.4123370051383972, "learning_rate": 4.239746232420919e-05, "loss": 0.458, "step": 68540 }, { "epoch": 16.967821782178216, "grad_norm": 0.41247716546058655, "learning_rate": 4.23838439096751e-05, "loss": 0.4599, "step": 68550 }, { "epoch": 16.97029702970297, "grad_norm": 0.4072439670562744, "learning_rate": 4.237022607353781e-05, "loss": 0.4633, "step": 68560 }, { "epoch": 16.972772277227723, "grad_norm": 0.40695762634277344, "learning_rate": 4.23566088168315e-05, "loss": 0.4654, "step": 68570 }, { "epoch": 16.975247524752476, "grad_norm": 0.4155096411705017, "learning_rate": 4.23429921405903e-05, "loss": 0.4641, "step": 68580 }, { "epoch": 16.97772277227723, "grad_norm": 0.4080570638179779, "learning_rate": 4.232937604584832e-05, "loss": 0.4586, "step": 68590 }, { "epoch": 16.980198019801982, "grad_norm": 0.44924187660217285, "learning_rate": 4.231576053363962e-05, "loss": 0.4596, "step": 68600 }, { "epoch": 16.98267326732673, "grad_norm": 0.3955889344215393, "learning_rate": 4.230214560499818e-05, "loss": 0.4618, "step": 68610 }, { "epoch": 16.985148514851485, "grad_norm": 0.3941110074520111, "learning_rate": 4.2288531260957974e-05, "loss": 0.4603, "step": 68620 }, { "epoch": 16.987623762376238, "grad_norm": 0.4056500494480133, "learning_rate": 4.2274917502552946e-05, "loss": 0.4632, "step": 68630 }, { "epoch": 16.99009900990099, "grad_norm": 0.40137338638305664, "learning_rate": 4.226130433081694e-05, "loss": 0.4586, "step": 68640 }, { "epoch": 16.992574257425744, "grad_norm": 0.3990088105201721, "learning_rate": 4.224769174678379e-05, "loss": 0.4615, "step": 68650 }, { "epoch": 16.995049504950494, "grad_norm": 0.3957236707210541, "learning_rate": 4.223407975148732e-05, "loss": 0.4622, "step": 68660 }, { "epoch": 16.997524752475247, "grad_norm": 0.38212233781814575, "learning_rate": 4.222046834596123e-05, "loss": 0.4603, "step": 68670 }, { "epoch": 17.0, "grad_norm": 0.3927862346172333, "learning_rate": 4.2206857531239217e-05, "loss": 0.4585, "step": 68680 }, { "epoch": 17.002475247524753, "grad_norm": 0.3976561725139618, "learning_rate": 4.2193247308354955e-05, "loss": 0.4584, "step": 68690 }, { "epoch": 17.004950495049506, "grad_norm": 0.37638112902641296, "learning_rate": 4.217963767834204e-05, "loss": 0.4579, "step": 68700 }, { "epoch": 17.007425742574256, "grad_norm": 0.36468690633773804, "learning_rate": 4.216602864223404e-05, "loss": 0.4647, "step": 68710 }, { "epoch": 17.00990099009901, "grad_norm": 0.36610788106918335, "learning_rate": 4.2152420201064434e-05, "loss": 0.4628, "step": 68720 }, { "epoch": 17.012376237623762, "grad_norm": 0.39146241545677185, "learning_rate": 4.213881235586676e-05, "loss": 0.4577, "step": 68730 }, { "epoch": 17.014851485148515, "grad_norm": 0.39273306727409363, "learning_rate": 4.212520510767439e-05, "loss": 0.4614, "step": 68740 }, { "epoch": 17.01732673267327, "grad_norm": 0.38707292079925537, "learning_rate": 4.211159845752072e-05, "loss": 0.4603, "step": 68750 }, { "epoch": 17.019801980198018, "grad_norm": 0.3933263123035431, "learning_rate": 4.20979924064391e-05, "loss": 0.4565, "step": 68760 }, { "epoch": 17.02227722772277, "grad_norm": 0.3778023421764374, "learning_rate": 4.2084386955462815e-05, "loss": 0.4642, "step": 68770 }, { "epoch": 17.024752475247524, "grad_norm": 0.3936576545238495, "learning_rate": 4.2070782105625064e-05, "loss": 0.4578, "step": 68780 }, { "epoch": 17.027227722772277, "grad_norm": 0.3994792699813843, "learning_rate": 4.205717785795912e-05, "loss": 0.4656, "step": 68790 }, { "epoch": 17.02970297029703, "grad_norm": 0.3826717138290405, "learning_rate": 4.204357421349809e-05, "loss": 0.4603, "step": 68800 }, { "epoch": 17.032178217821784, "grad_norm": 0.41775280237197876, "learning_rate": 4.2029971173275094e-05, "loss": 0.4626, "step": 68810 }, { "epoch": 17.034653465346533, "grad_norm": 0.3951377272605896, "learning_rate": 4.201636873832318e-05, "loss": 0.4642, "step": 68820 }, { "epoch": 17.037128712871286, "grad_norm": 0.3980152904987335, "learning_rate": 4.200276690967538e-05, "loss": 0.4619, "step": 68830 }, { "epoch": 17.03960396039604, "grad_norm": 0.4127873182296753, "learning_rate": 4.198916568836467e-05, "loss": 0.4582, "step": 68840 }, { "epoch": 17.042079207920793, "grad_norm": 0.380984365940094, "learning_rate": 4.1975565075423936e-05, "loss": 0.4678, "step": 68850 }, { "epoch": 17.044554455445546, "grad_norm": 0.39408543705940247, "learning_rate": 4.196196507188608e-05, "loss": 0.4663, "step": 68860 }, { "epoch": 17.047029702970296, "grad_norm": 0.4006730914115906, "learning_rate": 4.194836567878394e-05, "loss": 0.4634, "step": 68870 }, { "epoch": 17.04950495049505, "grad_norm": 0.3829180598258972, "learning_rate": 4.193476689715028e-05, "loss": 0.4599, "step": 68880 }, { "epoch": 17.051980198019802, "grad_norm": 0.39388296008110046, "learning_rate": 4.192116872801786e-05, "loss": 0.4618, "step": 68890 }, { "epoch": 17.054455445544555, "grad_norm": 0.39576274156570435, "learning_rate": 4.190757117241936e-05, "loss": 0.4603, "step": 68900 }, { "epoch": 17.056930693069308, "grad_norm": 0.4078618586063385, "learning_rate": 4.1893974231387424e-05, "loss": 0.4628, "step": 68910 }, { "epoch": 17.059405940594058, "grad_norm": 0.3816097676753998, "learning_rate": 4.1880377905954636e-05, "loss": 0.4609, "step": 68920 }, { "epoch": 17.06188118811881, "grad_norm": 0.3840703070163727, "learning_rate": 4.186678219715357e-05, "loss": 0.4643, "step": 68930 }, { "epoch": 17.064356435643564, "grad_norm": 0.4040839374065399, "learning_rate": 4.185318710601672e-05, "loss": 0.4592, "step": 68940 }, { "epoch": 17.066831683168317, "grad_norm": 0.3978244662284851, "learning_rate": 4.183959263357654e-05, "loss": 0.4607, "step": 68950 }, { "epoch": 17.06930693069307, "grad_norm": 0.3875484764575958, "learning_rate": 4.182599878086547e-05, "loss": 0.4641, "step": 68960 }, { "epoch": 17.071782178217823, "grad_norm": 0.3925911784172058, "learning_rate": 4.1812405548915825e-05, "loss": 0.4666, "step": 68970 }, { "epoch": 17.074257425742573, "grad_norm": 0.39115428924560547, "learning_rate": 4.1798812938759944e-05, "loss": 0.4602, "step": 68980 }, { "epoch": 17.076732673267326, "grad_norm": 0.3875017762184143, "learning_rate": 4.1785220951430106e-05, "loss": 0.4556, "step": 68990 }, { "epoch": 17.07920792079208, "grad_norm": 0.4180142879486084, "learning_rate": 4.177162958795852e-05, "loss": 0.4662, "step": 69000 }, { "epoch": 17.081683168316832, "grad_norm": 0.4325101971626282, "learning_rate": 4.175803884937738e-05, "loss": 0.464, "step": 69010 }, { "epoch": 17.084158415841586, "grad_norm": 0.4062599837779999, "learning_rate": 4.174444873671877e-05, "loss": 0.4587, "step": 69020 }, { "epoch": 17.086633663366335, "grad_norm": 0.3913719356060028, "learning_rate": 4.173085925101482e-05, "loss": 0.46, "step": 69030 }, { "epoch": 17.08910891089109, "grad_norm": 0.38506919145584106, "learning_rate": 4.171727039329752e-05, "loss": 0.4681, "step": 69040 }, { "epoch": 17.09158415841584, "grad_norm": 0.3817078769207001, "learning_rate": 4.170368216459888e-05, "loss": 0.4642, "step": 69050 }, { "epoch": 17.094059405940595, "grad_norm": 0.39888229966163635, "learning_rate": 4.169009456595083e-05, "loss": 0.4628, "step": 69060 }, { "epoch": 17.096534653465348, "grad_norm": 0.38884395360946655, "learning_rate": 4.1676507598385266e-05, "loss": 0.4633, "step": 69070 }, { "epoch": 17.099009900990097, "grad_norm": 0.396334171295166, "learning_rate": 4.166292126293401e-05, "loss": 0.4659, "step": 69080 }, { "epoch": 17.10148514851485, "grad_norm": 0.3632986843585968, "learning_rate": 4.164933556062886e-05, "loss": 0.4616, "step": 69090 }, { "epoch": 17.103960396039604, "grad_norm": 0.40601325035095215, "learning_rate": 4.163575049250157e-05, "loss": 0.4652, "step": 69100 }, { "epoch": 17.106435643564357, "grad_norm": 0.4334351420402527, "learning_rate": 4.162216605958383e-05, "loss": 0.4636, "step": 69110 }, { "epoch": 17.10891089108911, "grad_norm": 0.39566218852996826, "learning_rate": 4.160858226290728e-05, "loss": 0.4641, "step": 69120 }, { "epoch": 17.111386138613863, "grad_norm": 0.4074453115463257, "learning_rate": 4.159499910350354e-05, "loss": 0.4635, "step": 69130 }, { "epoch": 17.113861386138613, "grad_norm": 0.40394389629364014, "learning_rate": 4.158141658240416e-05, "loss": 0.4602, "step": 69140 }, { "epoch": 17.116336633663366, "grad_norm": 0.4040292501449585, "learning_rate": 4.156783470064061e-05, "loss": 0.4622, "step": 69150 }, { "epoch": 17.11881188118812, "grad_norm": 0.375695139169693, "learning_rate": 4.155425345924437e-05, "loss": 0.458, "step": 69160 }, { "epoch": 17.121287128712872, "grad_norm": 0.3793119192123413, "learning_rate": 4.154067285924686e-05, "loss": 0.4694, "step": 69170 }, { "epoch": 17.123762376237625, "grad_norm": 0.40200158953666687, "learning_rate": 4.152709290167942e-05, "loss": 0.4654, "step": 69180 }, { "epoch": 17.126237623762375, "grad_norm": 0.384209543466568, "learning_rate": 4.151351358757333e-05, "loss": 0.4593, "step": 69190 }, { "epoch": 17.128712871287128, "grad_norm": 0.41183584928512573, "learning_rate": 4.149993491795991e-05, "loss": 0.4595, "step": 69200 }, { "epoch": 17.13118811881188, "grad_norm": 0.3942430913448334, "learning_rate": 4.1486356893870314e-05, "loss": 0.4617, "step": 69210 }, { "epoch": 17.133663366336634, "grad_norm": 0.37778693437576294, "learning_rate": 4.147277951633572e-05, "loss": 0.4626, "step": 69220 }, { "epoch": 17.136138613861387, "grad_norm": 0.39874231815338135, "learning_rate": 4.145920278638727e-05, "loss": 0.4617, "step": 69230 }, { "epoch": 17.138613861386137, "grad_norm": 0.37844550609588623, "learning_rate": 4.1445626705055995e-05, "loss": 0.4643, "step": 69240 }, { "epoch": 17.14108910891089, "grad_norm": 0.3978525698184967, "learning_rate": 4.143205127337291e-05, "loss": 0.4591, "step": 69250 }, { "epoch": 17.143564356435643, "grad_norm": 0.38589757680892944, "learning_rate": 4.141847649236901e-05, "loss": 0.4573, "step": 69260 }, { "epoch": 17.146039603960396, "grad_norm": 0.3832060396671295, "learning_rate": 4.140490236307517e-05, "loss": 0.4636, "step": 69270 }, { "epoch": 17.14851485148515, "grad_norm": 0.39943066239356995, "learning_rate": 4.139132888652228e-05, "loss": 0.4637, "step": 69280 }, { "epoch": 17.150990099009903, "grad_norm": 0.37280142307281494, "learning_rate": 4.1377756063741135e-05, "loss": 0.4637, "step": 69290 }, { "epoch": 17.153465346534652, "grad_norm": 0.37377968430519104, "learning_rate": 4.136418389576253e-05, "loss": 0.4571, "step": 69300 }, { "epoch": 17.155940594059405, "grad_norm": 0.4111306965351105, "learning_rate": 4.135061238361717e-05, "loss": 0.4637, "step": 69310 }, { "epoch": 17.15841584158416, "grad_norm": 0.3992733061313629, "learning_rate": 4.133704152833568e-05, "loss": 0.4637, "step": 69320 }, { "epoch": 17.16089108910891, "grad_norm": 0.3798258602619171, "learning_rate": 4.132347133094877e-05, "loss": 0.4633, "step": 69330 }, { "epoch": 17.163366336633665, "grad_norm": 0.37847819924354553, "learning_rate": 4.1309901792486924e-05, "loss": 0.4612, "step": 69340 }, { "epoch": 17.165841584158414, "grad_norm": 0.36951813101768494, "learning_rate": 4.129633291398068e-05, "loss": 0.4585, "step": 69350 }, { "epoch": 17.168316831683168, "grad_norm": 0.37456947565078735, "learning_rate": 4.128276469646053e-05, "loss": 0.4605, "step": 69360 }, { "epoch": 17.17079207920792, "grad_norm": 0.36101579666137695, "learning_rate": 4.126919714095687e-05, "loss": 0.4611, "step": 69370 }, { "epoch": 17.173267326732674, "grad_norm": 0.3844078779220581, "learning_rate": 4.125563024850007e-05, "loss": 0.4674, "step": 69380 }, { "epoch": 17.175742574257427, "grad_norm": 0.40000849962234497, "learning_rate": 4.1242064020120425e-05, "loss": 0.4585, "step": 69390 }, { "epoch": 17.178217821782177, "grad_norm": 0.386534184217453, "learning_rate": 4.122849845684824e-05, "loss": 0.4585, "step": 69400 }, { "epoch": 17.18069306930693, "grad_norm": 0.393439918756485, "learning_rate": 4.12149335597137e-05, "loss": 0.4617, "step": 69410 }, { "epoch": 17.183168316831683, "grad_norm": 0.3750164210796356, "learning_rate": 4.1201369329746975e-05, "loss": 0.4639, "step": 69420 }, { "epoch": 17.185643564356436, "grad_norm": 0.40950527787208557, "learning_rate": 4.1187805767978196e-05, "loss": 0.4595, "step": 69430 }, { "epoch": 17.18811881188119, "grad_norm": 0.3660174608230591, "learning_rate": 4.1174242875437416e-05, "loss": 0.4594, "step": 69440 }, { "epoch": 17.190594059405942, "grad_norm": 0.37707433104515076, "learning_rate": 4.116068065315465e-05, "loss": 0.4609, "step": 69450 }, { "epoch": 17.193069306930692, "grad_norm": 0.36990851163864136, "learning_rate": 4.114711910215983e-05, "loss": 0.4601, "step": 69460 }, { "epoch": 17.195544554455445, "grad_norm": 0.37794724106788635, "learning_rate": 4.11335582234829e-05, "loss": 0.4623, "step": 69470 }, { "epoch": 17.198019801980198, "grad_norm": 0.4300885498523712, "learning_rate": 4.1119998018153726e-05, "loss": 0.4622, "step": 69480 }, { "epoch": 17.20049504950495, "grad_norm": 0.39680108428001404, "learning_rate": 4.110643848720207e-05, "loss": 0.4598, "step": 69490 }, { "epoch": 17.202970297029704, "grad_norm": 0.4049360752105713, "learning_rate": 4.109287963165774e-05, "loss": 0.4611, "step": 69500 }, { "epoch": 17.205445544554454, "grad_norm": 0.3702179789543152, "learning_rate": 4.107932145255042e-05, "loss": 0.4601, "step": 69510 }, { "epoch": 17.207920792079207, "grad_norm": 0.40357762575149536, "learning_rate": 4.1065763950909756e-05, "loss": 0.4608, "step": 69520 }, { "epoch": 17.21039603960396, "grad_norm": 0.38478243350982666, "learning_rate": 4.105220712776536e-05, "loss": 0.4641, "step": 69530 }, { "epoch": 17.212871287128714, "grad_norm": 0.3962326645851135, "learning_rate": 4.103865098414678e-05, "loss": 0.466, "step": 69540 }, { "epoch": 17.215346534653467, "grad_norm": 0.3845520317554474, "learning_rate": 4.102509552108354e-05, "loss": 0.4621, "step": 69550 }, { "epoch": 17.217821782178216, "grad_norm": 0.38350650668144226, "learning_rate": 4.1011540739605034e-05, "loss": 0.463, "step": 69560 }, { "epoch": 17.22029702970297, "grad_norm": 0.36544543504714966, "learning_rate": 4.099798664074071e-05, "loss": 0.461, "step": 69570 }, { "epoch": 17.222772277227723, "grad_norm": 0.3893640637397766, "learning_rate": 4.098443322551988e-05, "loss": 0.4545, "step": 69580 }, { "epoch": 17.225247524752476, "grad_norm": 0.35903868079185486, "learning_rate": 4.097088049497185e-05, "loss": 0.4606, "step": 69590 }, { "epoch": 17.22772277227723, "grad_norm": 0.37401866912841797, "learning_rate": 4.095732845012587e-05, "loss": 0.4632, "step": 69600 }, { "epoch": 17.230198019801982, "grad_norm": 0.39909449219703674, "learning_rate": 4.0943777092011125e-05, "loss": 0.4609, "step": 69610 }, { "epoch": 17.23267326732673, "grad_norm": 0.36744487285614014, "learning_rate": 4.0930226421656716e-05, "loss": 0.461, "step": 69620 }, { "epoch": 17.235148514851485, "grad_norm": 0.39031293988227844, "learning_rate": 4.091667644009178e-05, "loss": 0.4594, "step": 69630 }, { "epoch": 17.237623762376238, "grad_norm": 0.3857913315296173, "learning_rate": 4.090312714834532e-05, "loss": 0.4609, "step": 69640 }, { "epoch": 17.24009900990099, "grad_norm": 0.3917371928691864, "learning_rate": 4.0889578547446314e-05, "loss": 0.4625, "step": 69650 }, { "epoch": 17.242574257425744, "grad_norm": 0.3764437437057495, "learning_rate": 4.0876030638423676e-05, "loss": 0.462, "step": 69660 }, { "epoch": 17.245049504950494, "grad_norm": 0.37259119749069214, "learning_rate": 4.086248342230633e-05, "loss": 0.4569, "step": 69670 }, { "epoch": 17.247524752475247, "grad_norm": 0.40463608503341675, "learning_rate": 4.084893690012306e-05, "loss": 0.4591, "step": 69680 }, { "epoch": 17.25, "grad_norm": 0.3705921471118927, "learning_rate": 4.0835391072902615e-05, "loss": 0.4597, "step": 69690 }, { "epoch": 17.252475247524753, "grad_norm": 0.3819584250450134, "learning_rate": 4.082184594167376e-05, "loss": 0.4663, "step": 69700 }, { "epoch": 17.254950495049506, "grad_norm": 0.36834171414375305, "learning_rate": 4.080830150746513e-05, "loss": 0.4589, "step": 69710 }, { "epoch": 17.257425742574256, "grad_norm": 0.38746416568756104, "learning_rate": 4.079475777130533e-05, "loss": 0.4638, "step": 69720 }, { "epoch": 17.25990099009901, "grad_norm": 0.40191757678985596, "learning_rate": 4.078121473422294e-05, "loss": 0.4637, "step": 69730 }, { "epoch": 17.262376237623762, "grad_norm": 0.4055062234401703, "learning_rate": 4.076767239724646e-05, "loss": 0.4619, "step": 69740 }, { "epoch": 17.264851485148515, "grad_norm": 0.39248353242874146, "learning_rate": 4.0754130761404325e-05, "loss": 0.4649, "step": 69750 }, { "epoch": 17.26732673267327, "grad_norm": 0.36288386583328247, "learning_rate": 4.074058982772493e-05, "loss": 0.4615, "step": 69760 }, { "epoch": 17.269801980198018, "grad_norm": 0.38087937235832214, "learning_rate": 4.072704959723664e-05, "loss": 0.4648, "step": 69770 }, { "epoch": 17.27227722772277, "grad_norm": 0.3824830949306488, "learning_rate": 4.071351007096775e-05, "loss": 0.4696, "step": 69780 }, { "epoch": 17.274752475247524, "grad_norm": 0.3820497989654541, "learning_rate": 4.0699971249946454e-05, "loss": 0.4599, "step": 69790 }, { "epoch": 17.277227722772277, "grad_norm": 0.3956044912338257, "learning_rate": 4.0686433135200994e-05, "loss": 0.4619, "step": 69800 }, { "epoch": 17.27970297029703, "grad_norm": 0.40679770708084106, "learning_rate": 4.067289572775946e-05, "loss": 0.4696, "step": 69810 }, { "epoch": 17.282178217821784, "grad_norm": 0.389217734336853, "learning_rate": 4.0659359028649955e-05, "loss": 0.4627, "step": 69820 }, { "epoch": 17.284653465346533, "grad_norm": 0.3744863271713257, "learning_rate": 4.064582303890047e-05, "loss": 0.4615, "step": 69830 }, { "epoch": 17.287128712871286, "grad_norm": 0.3914305567741394, "learning_rate": 4.0632287759539e-05, "loss": 0.4671, "step": 69840 }, { "epoch": 17.28960396039604, "grad_norm": 0.418298602104187, "learning_rate": 4.0618753191593475e-05, "loss": 0.4622, "step": 69850 }, { "epoch": 17.292079207920793, "grad_norm": 0.38331252336502075, "learning_rate": 4.06052193360917e-05, "loss": 0.464, "step": 69860 }, { "epoch": 17.294554455445546, "grad_norm": 0.4186030626296997, "learning_rate": 4.059168619406153e-05, "loss": 0.466, "step": 69870 }, { "epoch": 17.297029702970296, "grad_norm": 0.42251449823379517, "learning_rate": 4.057815376653071e-05, "loss": 0.4646, "step": 69880 }, { "epoch": 17.29950495049505, "grad_norm": 0.3961377739906311, "learning_rate": 4.056462205452692e-05, "loss": 0.4583, "step": 69890 }, { "epoch": 17.301980198019802, "grad_norm": 0.44795823097229004, "learning_rate": 4.0551091059077825e-05, "loss": 0.4635, "step": 69900 }, { "epoch": 17.304455445544555, "grad_norm": 0.41210484504699707, "learning_rate": 4.053756078121101e-05, "loss": 0.4647, "step": 69910 }, { "epoch": 17.306930693069308, "grad_norm": 0.3748868405818939, "learning_rate": 4.052403122195401e-05, "loss": 0.459, "step": 69920 }, { "epoch": 17.309405940594058, "grad_norm": 0.3931578993797302, "learning_rate": 4.051050238233429e-05, "loss": 0.4668, "step": 69930 }, { "epoch": 17.31188118811881, "grad_norm": 0.38659679889678955, "learning_rate": 4.0496974263379295e-05, "loss": 0.4616, "step": 69940 }, { "epoch": 17.314356435643564, "grad_norm": 0.3748052716255188, "learning_rate": 4.0483446866116395e-05, "loss": 0.4601, "step": 69950 }, { "epoch": 17.316831683168317, "grad_norm": 0.37853801250457764, "learning_rate": 4.0469920191572884e-05, "loss": 0.4618, "step": 69960 }, { "epoch": 17.31930693069307, "grad_norm": 0.4062868654727936, "learning_rate": 4.0456394240776066e-05, "loss": 0.4585, "step": 69970 }, { "epoch": 17.321782178217823, "grad_norm": 0.4150836169719696, "learning_rate": 4.044286901475311e-05, "loss": 0.4661, "step": 69980 }, { "epoch": 17.324257425742573, "grad_norm": 0.38991811871528625, "learning_rate": 4.0429344514531177e-05, "loss": 0.4587, "step": 69990 }, { "epoch": 17.326732673267326, "grad_norm": 0.38065752387046814, "learning_rate": 4.041582074113737e-05, "loss": 0.4617, "step": 70000 }, { "epoch": 17.32920792079208, "grad_norm": 0.38521608710289, "learning_rate": 4.040229769559873e-05, "loss": 0.4619, "step": 70010 }, { "epoch": 17.331683168316832, "grad_norm": 0.38528597354888916, "learning_rate": 4.038877537894226e-05, "loss": 0.4602, "step": 70020 }, { "epoch": 17.334158415841586, "grad_norm": 0.3999534249305725, "learning_rate": 4.037525379219483e-05, "loss": 0.4597, "step": 70030 }, { "epoch": 17.336633663366335, "grad_norm": 0.4040047228336334, "learning_rate": 4.036173293638339e-05, "loss": 0.4672, "step": 70040 }, { "epoch": 17.33910891089109, "grad_norm": 0.38777831196784973, "learning_rate": 4.034821281253472e-05, "loss": 0.4559, "step": 70050 }, { "epoch": 17.34158415841584, "grad_norm": 0.39044445753097534, "learning_rate": 4.033469342167557e-05, "loss": 0.4618, "step": 70060 }, { "epoch": 17.344059405940595, "grad_norm": 0.37435388565063477, "learning_rate": 4.0321174764832694e-05, "loss": 0.4557, "step": 70070 }, { "epoch": 17.346534653465348, "grad_norm": 0.3905079960823059, "learning_rate": 4.030765684303272e-05, "loss": 0.4606, "step": 70080 }, { "epoch": 17.349009900990097, "grad_norm": 0.3945111334323883, "learning_rate": 4.029413965730221e-05, "loss": 0.4626, "step": 70090 }, { "epoch": 17.35148514851485, "grad_norm": 0.3924538791179657, "learning_rate": 4.0280623208667784e-05, "loss": 0.4536, "step": 70100 }, { "epoch": 17.353960396039604, "grad_norm": 0.3875526487827301, "learning_rate": 4.0267107498155864e-05, "loss": 0.4579, "step": 70110 }, { "epoch": 17.356435643564357, "grad_norm": 0.3752746284008026, "learning_rate": 4.0253592526792906e-05, "loss": 0.4557, "step": 70120 }, { "epoch": 17.35891089108911, "grad_norm": 0.3902117908000946, "learning_rate": 4.024007829560526e-05, "loss": 0.4627, "step": 70130 }, { "epoch": 17.361386138613863, "grad_norm": 0.41048896312713623, "learning_rate": 4.022656480561926e-05, "loss": 0.4629, "step": 70140 }, { "epoch": 17.363861386138613, "grad_norm": 0.3877231478691101, "learning_rate": 4.0213052057861185e-05, "loss": 0.4677, "step": 70150 }, { "epoch": 17.366336633663366, "grad_norm": 0.3737093508243561, "learning_rate": 4.019954005335719e-05, "loss": 0.4631, "step": 70160 }, { "epoch": 17.36881188118812, "grad_norm": 0.4054954946041107, "learning_rate": 4.018602879313347e-05, "loss": 0.4555, "step": 70170 }, { "epoch": 17.371287128712872, "grad_norm": 0.3977574110031128, "learning_rate": 4.017251827821609e-05, "loss": 0.4562, "step": 70180 }, { "epoch": 17.373762376237625, "grad_norm": 0.4044245779514313, "learning_rate": 4.015900850963108e-05, "loss": 0.4637, "step": 70190 }, { "epoch": 17.376237623762375, "grad_norm": 0.39535605907440186, "learning_rate": 4.0145499488404436e-05, "loss": 0.4598, "step": 70200 }, { "epoch": 17.378712871287128, "grad_norm": 0.3988327383995056, "learning_rate": 4.0131991215562096e-05, "loss": 0.4617, "step": 70210 }, { "epoch": 17.38118811881188, "grad_norm": 0.3691483438014984, "learning_rate": 4.011848369212987e-05, "loss": 0.461, "step": 70220 }, { "epoch": 17.383663366336634, "grad_norm": 0.39533013105392456, "learning_rate": 4.0104976919133595e-05, "loss": 0.4592, "step": 70230 }, { "epoch": 17.386138613861387, "grad_norm": 0.37548843026161194, "learning_rate": 4.009147089759904e-05, "loss": 0.4629, "step": 70240 }, { "epoch": 17.388613861386137, "grad_norm": 0.38851460814476013, "learning_rate": 4.0077965628551867e-05, "loss": 0.4638, "step": 70250 }, { "epoch": 17.39108910891089, "grad_norm": 0.37950146198272705, "learning_rate": 4.006446111301772e-05, "loss": 0.4597, "step": 70260 }, { "epoch": 17.393564356435643, "grad_norm": 0.38605520129203796, "learning_rate": 4.00509573520222e-05, "loss": 0.4558, "step": 70270 }, { "epoch": 17.396039603960396, "grad_norm": 0.36712366342544556, "learning_rate": 4.003745434659081e-05, "loss": 0.4637, "step": 70280 }, { "epoch": 17.39851485148515, "grad_norm": 0.3896017074584961, "learning_rate": 4.002395209774901e-05, "loss": 0.4648, "step": 70290 }, { "epoch": 17.400990099009903, "grad_norm": 0.39584505558013916, "learning_rate": 4.00104506065222e-05, "loss": 0.4619, "step": 70300 }, { "epoch": 17.403465346534652, "grad_norm": 0.38249868154525757, "learning_rate": 3.999694987393575e-05, "loss": 0.4676, "step": 70310 }, { "epoch": 17.405940594059405, "grad_norm": 0.3769229054450989, "learning_rate": 3.9983449901014955e-05, "loss": 0.4585, "step": 70320 }, { "epoch": 17.40841584158416, "grad_norm": 0.37833794951438904, "learning_rate": 3.9969950688785e-05, "loss": 0.4611, "step": 70330 }, { "epoch": 17.41089108910891, "grad_norm": 0.3665030896663666, "learning_rate": 3.995645223827114e-05, "loss": 0.4574, "step": 70340 }, { "epoch": 17.413366336633665, "grad_norm": 0.38000503182411194, "learning_rate": 3.994295455049843e-05, "loss": 0.4633, "step": 70350 }, { "epoch": 17.415841584158414, "grad_norm": 0.36652693152427673, "learning_rate": 3.992945762649195e-05, "loss": 0.4649, "step": 70360 }, { "epoch": 17.418316831683168, "grad_norm": 0.3989081084728241, "learning_rate": 3.9915961467276705e-05, "loss": 0.4634, "step": 70370 }, { "epoch": 17.42079207920792, "grad_norm": 0.4000416100025177, "learning_rate": 3.990246607387765e-05, "loss": 0.4618, "step": 70380 }, { "epoch": 17.423267326732674, "grad_norm": 0.37839365005493164, "learning_rate": 3.9888971447319656e-05, "loss": 0.4632, "step": 70390 }, { "epoch": 17.425742574257427, "grad_norm": 0.3819405436515808, "learning_rate": 3.9875477588627534e-05, "loss": 0.4617, "step": 70400 }, { "epoch": 17.428217821782177, "grad_norm": 0.3752555847167969, "learning_rate": 3.986198449882609e-05, "loss": 0.4675, "step": 70410 }, { "epoch": 17.43069306930693, "grad_norm": 0.4135698974132538, "learning_rate": 3.9848492178940014e-05, "loss": 0.4657, "step": 70420 }, { "epoch": 17.433168316831683, "grad_norm": 0.4018266499042511, "learning_rate": 3.9835000629993955e-05, "loss": 0.4571, "step": 70430 }, { "epoch": 17.435643564356436, "grad_norm": 0.3812427520751953, "learning_rate": 3.982150985301252e-05, "loss": 0.4592, "step": 70440 }, { "epoch": 17.43811881188119, "grad_norm": 0.37452834844589233, "learning_rate": 3.9808019849020264e-05, "loss": 0.4628, "step": 70450 }, { "epoch": 17.440594059405942, "grad_norm": 0.3728240728378296, "learning_rate": 3.9794530619041604e-05, "loss": 0.4638, "step": 70460 }, { "epoch": 17.443069306930692, "grad_norm": 0.36705008149147034, "learning_rate": 3.9781042164101004e-05, "loss": 0.4602, "step": 70470 }, { "epoch": 17.445544554455445, "grad_norm": 0.3823069632053375, "learning_rate": 3.976755448522282e-05, "loss": 0.4593, "step": 70480 }, { "epoch": 17.448019801980198, "grad_norm": 0.39394691586494446, "learning_rate": 3.975406758343136e-05, "loss": 0.4616, "step": 70490 }, { "epoch": 17.45049504950495, "grad_norm": 0.37960129976272583, "learning_rate": 3.97405814597508e-05, "loss": 0.4616, "step": 70500 }, { "epoch": 17.452970297029704, "grad_norm": 0.36009156703948975, "learning_rate": 3.9727096115205416e-05, "loss": 0.4565, "step": 70510 }, { "epoch": 17.455445544554454, "grad_norm": 0.38005003333091736, "learning_rate": 3.971361155081927e-05, "loss": 0.4609, "step": 70520 }, { "epoch": 17.457920792079207, "grad_norm": 0.39524510502815247, "learning_rate": 3.970012776761644e-05, "loss": 0.4617, "step": 70530 }, { "epoch": 17.46039603960396, "grad_norm": 0.384438693523407, "learning_rate": 3.968664476662094e-05, "loss": 0.4624, "step": 70540 }, { "epoch": 17.462871287128714, "grad_norm": 0.3973504304885864, "learning_rate": 3.967316254885671e-05, "loss": 0.4624, "step": 70550 }, { "epoch": 17.465346534653467, "grad_norm": 0.39330968260765076, "learning_rate": 3.9659681115347616e-05, "loss": 0.4581, "step": 70560 }, { "epoch": 17.467821782178216, "grad_norm": 0.3667357563972473, "learning_rate": 3.964620046711752e-05, "loss": 0.4612, "step": 70570 }, { "epoch": 17.47029702970297, "grad_norm": 0.376436710357666, "learning_rate": 3.963272060519017e-05, "loss": 0.4618, "step": 70580 }, { "epoch": 17.472772277227723, "grad_norm": 0.3857611417770386, "learning_rate": 3.9619241530589265e-05, "loss": 0.4634, "step": 70590 }, { "epoch": 17.475247524752476, "grad_norm": 0.39891350269317627, "learning_rate": 3.960576324433845e-05, "loss": 0.46, "step": 70600 }, { "epoch": 17.47772277227723, "grad_norm": 0.3900618851184845, "learning_rate": 3.9592285747461334e-05, "loss": 0.4627, "step": 70610 }, { "epoch": 17.480198019801982, "grad_norm": 0.3696044087409973, "learning_rate": 3.957880904098143e-05, "loss": 0.4579, "step": 70620 }, { "epoch": 17.48267326732673, "grad_norm": 0.3809184432029724, "learning_rate": 3.956533312592219e-05, "loss": 0.4628, "step": 70630 }, { "epoch": 17.485148514851485, "grad_norm": 0.38262081146240234, "learning_rate": 3.9551858003307055e-05, "loss": 0.4584, "step": 70640 }, { "epoch": 17.487623762376238, "grad_norm": 0.40011486411094666, "learning_rate": 3.953838367415935e-05, "loss": 0.4646, "step": 70650 }, { "epoch": 17.49009900990099, "grad_norm": 0.3861837685108185, "learning_rate": 3.952491013950236e-05, "loss": 0.462, "step": 70660 }, { "epoch": 17.492574257425744, "grad_norm": 0.38519811630249023, "learning_rate": 3.951143740035931e-05, "loss": 0.4599, "step": 70670 }, { "epoch": 17.495049504950494, "grad_norm": 0.40270692110061646, "learning_rate": 3.949796545775338e-05, "loss": 0.4608, "step": 70680 }, { "epoch": 17.497524752475247, "grad_norm": 0.3980374038219452, "learning_rate": 3.9484494312707666e-05, "loss": 0.4662, "step": 70690 }, { "epoch": 17.5, "grad_norm": 0.39328256249427795, "learning_rate": 3.94710239662452e-05, "loss": 0.4594, "step": 70700 }, { "epoch": 17.502475247524753, "grad_norm": 0.37811478972435, "learning_rate": 3.945755441938899e-05, "loss": 0.4603, "step": 70710 }, { "epoch": 17.504950495049506, "grad_norm": 0.3634980022907257, "learning_rate": 3.944408567316195e-05, "loss": 0.4617, "step": 70720 }, { "epoch": 17.507425742574256, "grad_norm": 0.3786051273345947, "learning_rate": 3.9430617728586924e-05, "loss": 0.4608, "step": 70730 }, { "epoch": 17.50990099009901, "grad_norm": 0.4049520194530487, "learning_rate": 3.941715058668674e-05, "loss": 0.4597, "step": 70740 }, { "epoch": 17.512376237623762, "grad_norm": 0.36572128534317017, "learning_rate": 3.940368424848414e-05, "loss": 0.4598, "step": 70750 }, { "epoch": 17.514851485148515, "grad_norm": 0.3878881633281708, "learning_rate": 3.9390218715001785e-05, "loss": 0.4672, "step": 70760 }, { "epoch": 17.51732673267327, "grad_norm": 0.36969268321990967, "learning_rate": 3.937675398726229e-05, "loss": 0.4586, "step": 70770 }, { "epoch": 17.519801980198018, "grad_norm": 0.3910587430000305, "learning_rate": 3.9363290066288225e-05, "loss": 0.4656, "step": 70780 }, { "epoch": 17.52227722772277, "grad_norm": 0.3881164491176605, "learning_rate": 3.93498269531021e-05, "loss": 0.4619, "step": 70790 }, { "epoch": 17.524752475247524, "grad_norm": 0.39451658725738525, "learning_rate": 3.93363646487263e-05, "loss": 0.4614, "step": 70800 }, { "epoch": 17.527227722772277, "grad_norm": 0.4047802984714508, "learning_rate": 3.9322903154183263e-05, "loss": 0.4607, "step": 70810 }, { "epoch": 17.52970297029703, "grad_norm": 0.3811718821525574, "learning_rate": 3.930944247049526e-05, "loss": 0.4654, "step": 70820 }, { "epoch": 17.532178217821784, "grad_norm": 0.3669409155845642, "learning_rate": 3.9295982598684536e-05, "loss": 0.4582, "step": 70830 }, { "epoch": 17.534653465346533, "grad_norm": 0.3945077657699585, "learning_rate": 3.92825235397733e-05, "loss": 0.4605, "step": 70840 }, { "epoch": 17.537128712871286, "grad_norm": 0.39995187520980835, "learning_rate": 3.926906529478368e-05, "loss": 0.4632, "step": 70850 }, { "epoch": 17.53960396039604, "grad_norm": 0.3804313540458679, "learning_rate": 3.925560786473774e-05, "loss": 0.4595, "step": 70860 }, { "epoch": 17.542079207920793, "grad_norm": 0.36063289642333984, "learning_rate": 3.9242151250657454e-05, "loss": 0.4556, "step": 70870 }, { "epoch": 17.544554455445546, "grad_norm": 0.3946571946144104, "learning_rate": 3.9228695453564804e-05, "loss": 0.4607, "step": 70880 }, { "epoch": 17.547029702970296, "grad_norm": 0.3653043806552887, "learning_rate": 3.921524047448164e-05, "loss": 0.4617, "step": 70890 }, { "epoch": 17.54950495049505, "grad_norm": 0.3873591125011444, "learning_rate": 3.920178631442978e-05, "loss": 0.4592, "step": 70900 }, { "epoch": 17.551980198019802, "grad_norm": 0.38121986389160156, "learning_rate": 3.9188332974431e-05, "loss": 0.4671, "step": 70910 }, { "epoch": 17.554455445544555, "grad_norm": 0.378648042678833, "learning_rate": 3.9174880455506986e-05, "loss": 0.4597, "step": 70920 }, { "epoch": 17.556930693069308, "grad_norm": 0.3769017457962036, "learning_rate": 3.916142875867933e-05, "loss": 0.4584, "step": 70930 }, { "epoch": 17.55940594059406, "grad_norm": 0.39540526270866394, "learning_rate": 3.9147977884969665e-05, "loss": 0.459, "step": 70940 }, { "epoch": 17.56188118811881, "grad_norm": 0.37913820147514343, "learning_rate": 3.913452783539946e-05, "loss": 0.46, "step": 70950 }, { "epoch": 17.564356435643564, "grad_norm": 0.3977522552013397, "learning_rate": 3.912107861099015e-05, "loss": 0.4583, "step": 70960 }, { "epoch": 17.566831683168317, "grad_norm": 0.3893582224845886, "learning_rate": 3.9107630212763116e-05, "loss": 0.4582, "step": 70970 }, { "epoch": 17.56930693069307, "grad_norm": 0.38570189476013184, "learning_rate": 3.9094182641739706e-05, "loss": 0.466, "step": 70980 }, { "epoch": 17.571782178217823, "grad_norm": 0.4295145869255066, "learning_rate": 3.908073589894115e-05, "loss": 0.4562, "step": 70990 }, { "epoch": 17.574257425742573, "grad_norm": 0.4211367666721344, "learning_rate": 3.906728998538862e-05, "loss": 0.462, "step": 71000 }, { "epoch": 17.576732673267326, "grad_norm": 0.4075818955898285, "learning_rate": 3.905384490210328e-05, "loss": 0.4612, "step": 71010 }, { "epoch": 17.57920792079208, "grad_norm": 0.37322255969047546, "learning_rate": 3.9040400650106176e-05, "loss": 0.4636, "step": 71020 }, { "epoch": 17.581683168316832, "grad_norm": 0.3820130228996277, "learning_rate": 3.902695723041834e-05, "loss": 0.4669, "step": 71030 }, { "epoch": 17.584158415841586, "grad_norm": 0.3808313310146332, "learning_rate": 3.901351464406064e-05, "loss": 0.4573, "step": 71040 }, { "epoch": 17.586633663366335, "grad_norm": 0.3819969594478607, "learning_rate": 3.900007289205403e-05, "loss": 0.4601, "step": 71050 }, { "epoch": 17.58910891089109, "grad_norm": 0.3675610423088074, "learning_rate": 3.898663197541929e-05, "loss": 0.4647, "step": 71060 }, { "epoch": 17.59158415841584, "grad_norm": 0.3836857080459595, "learning_rate": 3.897319189517716e-05, "loss": 0.4623, "step": 71070 }, { "epoch": 17.594059405940595, "grad_norm": 0.4021874666213989, "learning_rate": 3.8959752652348335e-05, "loss": 0.465, "step": 71080 }, { "epoch": 17.596534653465348, "grad_norm": 0.37641793489456177, "learning_rate": 3.894631424795346e-05, "loss": 0.4615, "step": 71090 }, { "epoch": 17.599009900990097, "grad_norm": 0.36568817496299744, "learning_rate": 3.893287668301302e-05, "loss": 0.4583, "step": 71100 }, { "epoch": 17.60148514851485, "grad_norm": 0.3850829005241394, "learning_rate": 3.891943995854761e-05, "loss": 0.4601, "step": 71110 }, { "epoch": 17.603960396039604, "grad_norm": 0.3608248829841614, "learning_rate": 3.890600407557759e-05, "loss": 0.4609, "step": 71120 }, { "epoch": 17.606435643564357, "grad_norm": 0.3798404335975647, "learning_rate": 3.889256903512335e-05, "loss": 0.4614, "step": 71130 }, { "epoch": 17.60891089108911, "grad_norm": 0.38317379355430603, "learning_rate": 3.8879134838205175e-05, "loss": 0.4632, "step": 71140 }, { "epoch": 17.611386138613863, "grad_norm": 0.3694987893104553, "learning_rate": 3.886570148584333e-05, "loss": 0.4691, "step": 71150 }, { "epoch": 17.613861386138613, "grad_norm": 0.3938247561454773, "learning_rate": 3.885226897905799e-05, "loss": 0.4583, "step": 71160 }, { "epoch": 17.616336633663366, "grad_norm": 0.38270285725593567, "learning_rate": 3.8838837318869225e-05, "loss": 0.4634, "step": 71170 }, { "epoch": 17.61881188118812, "grad_norm": 0.37148958444595337, "learning_rate": 3.8825406506297125e-05, "loss": 0.4582, "step": 71180 }, { "epoch": 17.621287128712872, "grad_norm": 0.3781049847602844, "learning_rate": 3.881197654236165e-05, "loss": 0.4593, "step": 71190 }, { "epoch": 17.623762376237625, "grad_norm": 0.3724437952041626, "learning_rate": 3.879854742808271e-05, "loss": 0.456, "step": 71200 }, { "epoch": 17.626237623762375, "grad_norm": 0.37046942114830017, "learning_rate": 3.8785119164480186e-05, "loss": 0.4551, "step": 71210 }, { "epoch": 17.628712871287128, "grad_norm": 0.38151490688323975, "learning_rate": 3.877169175257386e-05, "loss": 0.4633, "step": 71220 }, { "epoch": 17.63118811881188, "grad_norm": 0.36745622754096985, "learning_rate": 3.875826519338344e-05, "loss": 0.4624, "step": 71230 }, { "epoch": 17.633663366336634, "grad_norm": 0.3804628849029541, "learning_rate": 3.874483948792857e-05, "loss": 0.4578, "step": 71240 }, { "epoch": 17.636138613861387, "grad_norm": 0.3563994765281677, "learning_rate": 3.8731414637228875e-05, "loss": 0.4589, "step": 71250 }, { "epoch": 17.638613861386137, "grad_norm": 0.38480275869369507, "learning_rate": 3.871799064230387e-05, "loss": 0.4591, "step": 71260 }, { "epoch": 17.64108910891089, "grad_norm": 0.369177907705307, "learning_rate": 3.8704567504173016e-05, "loss": 0.4597, "step": 71270 }, { "epoch": 17.643564356435643, "grad_norm": 0.39047691226005554, "learning_rate": 3.8691145223855734e-05, "loss": 0.4602, "step": 71280 }, { "epoch": 17.646039603960396, "grad_norm": 0.3645438253879547, "learning_rate": 3.867772380237133e-05, "loss": 0.4612, "step": 71290 }, { "epoch": 17.64851485148515, "grad_norm": 0.3796091675758362, "learning_rate": 3.866430324073907e-05, "loss": 0.4598, "step": 71300 }, { "epoch": 17.650990099009903, "grad_norm": 0.3685668706893921, "learning_rate": 3.865088353997819e-05, "loss": 0.4594, "step": 71310 }, { "epoch": 17.653465346534652, "grad_norm": 0.3999274969100952, "learning_rate": 3.8637464701107804e-05, "loss": 0.4673, "step": 71320 }, { "epoch": 17.655940594059405, "grad_norm": 0.36946746706962585, "learning_rate": 3.862404672514699e-05, "loss": 0.4603, "step": 71330 }, { "epoch": 17.65841584158416, "grad_norm": 0.36426591873168945, "learning_rate": 3.861062961311472e-05, "loss": 0.4652, "step": 71340 }, { "epoch": 17.66089108910891, "grad_norm": 0.37908250093460083, "learning_rate": 3.8597213366030014e-05, "loss": 0.4563, "step": 71350 }, { "epoch": 17.663366336633665, "grad_norm": 0.36614790558815, "learning_rate": 3.8583797984911676e-05, "loss": 0.4701, "step": 71360 }, { "epoch": 17.665841584158414, "grad_norm": 0.36928415298461914, "learning_rate": 3.857038347077854e-05, "loss": 0.4628, "step": 71370 }, { "epoch": 17.668316831683168, "grad_norm": 0.36077389121055603, "learning_rate": 3.8556969824649355e-05, "loss": 0.4554, "step": 71380 }, { "epoch": 17.67079207920792, "grad_norm": 0.3867587149143219, "learning_rate": 3.854355704754281e-05, "loss": 0.4582, "step": 71390 }, { "epoch": 17.673267326732674, "grad_norm": 0.4045966565608978, "learning_rate": 3.853014514047746e-05, "loss": 0.4658, "step": 71400 }, { "epoch": 17.675742574257427, "grad_norm": 0.395753413438797, "learning_rate": 3.8516734104471936e-05, "loss": 0.4637, "step": 71410 }, { "epoch": 17.678217821782177, "grad_norm": 0.3886242210865021, "learning_rate": 3.850332394054466e-05, "loss": 0.4629, "step": 71420 }, { "epoch": 17.68069306930693, "grad_norm": 0.41839784383773804, "learning_rate": 3.848991464971405e-05, "loss": 0.4596, "step": 71430 }, { "epoch": 17.683168316831683, "grad_norm": 0.3767780363559723, "learning_rate": 3.847650623299846e-05, "loss": 0.4606, "step": 71440 }, { "epoch": 17.685643564356436, "grad_norm": 0.3820485770702362, "learning_rate": 3.8463098691416185e-05, "loss": 0.4601, "step": 71450 }, { "epoch": 17.68811881188119, "grad_norm": 0.3748888373374939, "learning_rate": 3.8449692025985436e-05, "loss": 0.463, "step": 71460 }, { "epoch": 17.69059405940594, "grad_norm": 0.38060247898101807, "learning_rate": 3.843628623772433e-05, "loss": 0.4593, "step": 71470 }, { "epoch": 17.693069306930692, "grad_norm": 0.3687135875225067, "learning_rate": 3.842288132765098e-05, "loss": 0.4582, "step": 71480 }, { "epoch": 17.695544554455445, "grad_norm": 0.4014057517051697, "learning_rate": 3.840947729678339e-05, "loss": 0.4629, "step": 71490 }, { "epoch": 17.698019801980198, "grad_norm": 0.3714596629142761, "learning_rate": 3.839607414613953e-05, "loss": 0.4636, "step": 71500 }, { "epoch": 17.70049504950495, "grad_norm": 0.3757873475551605, "learning_rate": 3.8382671876737215e-05, "loss": 0.4637, "step": 71510 }, { "epoch": 17.702970297029704, "grad_norm": 0.3897061347961426, "learning_rate": 3.836927048959434e-05, "loss": 0.4589, "step": 71520 }, { "epoch": 17.705445544554454, "grad_norm": 0.37456846237182617, "learning_rate": 3.83558699857286e-05, "loss": 0.4636, "step": 71530 }, { "epoch": 17.707920792079207, "grad_norm": 0.36283373832702637, "learning_rate": 3.8342470366157685e-05, "loss": 0.461, "step": 71540 }, { "epoch": 17.71039603960396, "grad_norm": 0.38378554582595825, "learning_rate": 3.832907163189922e-05, "loss": 0.4595, "step": 71550 }, { "epoch": 17.712871287128714, "grad_norm": 0.3624092638492584, "learning_rate": 3.8315673783970744e-05, "loss": 0.4621, "step": 71560 }, { "epoch": 17.715346534653467, "grad_norm": 0.3832320272922516, "learning_rate": 3.8302276823389725e-05, "loss": 0.4641, "step": 71570 }, { "epoch": 17.717821782178216, "grad_norm": 0.39288660883903503, "learning_rate": 3.82888807511736e-05, "loss": 0.4628, "step": 71580 }, { "epoch": 17.72029702970297, "grad_norm": 0.36961543560028076, "learning_rate": 3.827548556833969e-05, "loss": 0.4662, "step": 71590 }, { "epoch": 17.722772277227723, "grad_norm": 0.3691694140434265, "learning_rate": 3.826209127590528e-05, "loss": 0.4584, "step": 71600 }, { "epoch": 17.725247524752476, "grad_norm": 0.4148341417312622, "learning_rate": 3.824869787488755e-05, "loss": 0.4598, "step": 71610 }, { "epoch": 17.72772277227723, "grad_norm": 0.38958460092544556, "learning_rate": 3.8235305366303684e-05, "loss": 0.465, "step": 71620 }, { "epoch": 17.730198019801982, "grad_norm": 0.3931077718734741, "learning_rate": 3.822191375117075e-05, "loss": 0.4578, "step": 71630 }, { "epoch": 17.73267326732673, "grad_norm": 0.42027172446250916, "learning_rate": 3.8208523030505697e-05, "loss": 0.4616, "step": 71640 }, { "epoch": 17.735148514851485, "grad_norm": 0.39664822816848755, "learning_rate": 3.8195133205325536e-05, "loss": 0.4636, "step": 71650 }, { "epoch": 17.737623762376238, "grad_norm": 0.3823932111263275, "learning_rate": 3.8181744276647094e-05, "loss": 0.4642, "step": 71660 }, { "epoch": 17.74009900990099, "grad_norm": 0.38131338357925415, "learning_rate": 3.816835624548717e-05, "loss": 0.4576, "step": 71670 }, { "epoch": 17.742574257425744, "grad_norm": 0.4012521207332611, "learning_rate": 3.8154969112862524e-05, "loss": 0.4607, "step": 71680 }, { "epoch": 17.745049504950494, "grad_norm": 0.3850025534629822, "learning_rate": 3.8141582879789803e-05, "loss": 0.4619, "step": 71690 }, { "epoch": 17.747524752475247, "grad_norm": 0.38611799478530884, "learning_rate": 3.8128197547285596e-05, "loss": 0.461, "step": 71700 }, { "epoch": 17.75, "grad_norm": 0.38720956444740295, "learning_rate": 3.8114813116366435e-05, "loss": 0.4608, "step": 71710 }, { "epoch": 17.752475247524753, "grad_norm": 0.361794650554657, "learning_rate": 3.810142958804879e-05, "loss": 0.4638, "step": 71720 }, { "epoch": 17.754950495049506, "grad_norm": 0.3801381289958954, "learning_rate": 3.8088046963349034e-05, "loss": 0.4567, "step": 71730 }, { "epoch": 17.757425742574256, "grad_norm": 0.3926541805267334, "learning_rate": 3.8074665243283496e-05, "loss": 0.4572, "step": 71740 }, { "epoch": 17.75990099009901, "grad_norm": 0.409004271030426, "learning_rate": 3.806128442886845e-05, "loss": 0.46, "step": 71750 }, { "epoch": 17.762376237623762, "grad_norm": 0.3944215774536133, "learning_rate": 3.804790452112006e-05, "loss": 0.4611, "step": 71760 }, { "epoch": 17.764851485148515, "grad_norm": 0.3916487395763397, "learning_rate": 3.803452552105442e-05, "loss": 0.4612, "step": 71770 }, { "epoch": 17.76732673267327, "grad_norm": 0.39447614550590515, "learning_rate": 3.802114742968764e-05, "loss": 0.458, "step": 71780 }, { "epoch": 17.769801980198018, "grad_norm": 0.3676811456680298, "learning_rate": 3.800777024803563e-05, "loss": 0.4595, "step": 71790 }, { "epoch": 17.77227722772277, "grad_norm": 0.3818962574005127, "learning_rate": 3.7994393977114356e-05, "loss": 0.467, "step": 71800 }, { "epoch": 17.774752475247524, "grad_norm": 0.3797551393508911, "learning_rate": 3.798101861793959e-05, "loss": 0.4674, "step": 71810 }, { "epoch": 17.777227722772277, "grad_norm": 0.38406550884246826, "learning_rate": 3.796764417152718e-05, "loss": 0.4634, "step": 71820 }, { "epoch": 17.77970297029703, "grad_norm": 0.3789431154727936, "learning_rate": 3.795427063889277e-05, "loss": 0.4588, "step": 71830 }, { "epoch": 17.782178217821784, "grad_norm": 0.3759351074695587, "learning_rate": 3.7940898021052003e-05, "loss": 0.4572, "step": 71840 }, { "epoch": 17.784653465346533, "grad_norm": 0.3697070777416229, "learning_rate": 3.7927526319020455e-05, "loss": 0.4596, "step": 71850 }, { "epoch": 17.787128712871286, "grad_norm": 0.36920520663261414, "learning_rate": 3.7914155533813615e-05, "loss": 0.4646, "step": 71860 }, { "epoch": 17.78960396039604, "grad_norm": 0.37294793128967285, "learning_rate": 3.790078566644691e-05, "loss": 0.4609, "step": 71870 }, { "epoch": 17.792079207920793, "grad_norm": 0.36875206232070923, "learning_rate": 3.7887416717935656e-05, "loss": 0.4611, "step": 71880 }, { "epoch": 17.794554455445546, "grad_norm": 0.36934617161750793, "learning_rate": 3.787404868929521e-05, "loss": 0.4573, "step": 71890 }, { "epoch": 17.797029702970296, "grad_norm": 0.3644259572029114, "learning_rate": 3.786068158154071e-05, "loss": 0.4572, "step": 71900 }, { "epoch": 17.79950495049505, "grad_norm": 0.371050626039505, "learning_rate": 3.7847315395687335e-05, "loss": 0.4638, "step": 71910 }, { "epoch": 17.801980198019802, "grad_norm": 0.37337374687194824, "learning_rate": 3.783395013275016e-05, "loss": 0.4618, "step": 71920 }, { "epoch": 17.804455445544555, "grad_norm": 0.3955356478691101, "learning_rate": 3.7820585793744194e-05, "loss": 0.4597, "step": 71930 }, { "epoch": 17.806930693069308, "grad_norm": 0.4033055603504181, "learning_rate": 3.780722237968433e-05, "loss": 0.4574, "step": 71940 }, { "epoch": 17.80940594059406, "grad_norm": 0.40221256017684937, "learning_rate": 3.779385989158549e-05, "loss": 0.4602, "step": 71950 }, { "epoch": 17.81188118811881, "grad_norm": 0.3795357346534729, "learning_rate": 3.778049833046243e-05, "loss": 0.4616, "step": 71960 }, { "epoch": 17.814356435643564, "grad_norm": 0.3832695782184601, "learning_rate": 3.7767137697329876e-05, "loss": 0.4654, "step": 71970 }, { "epoch": 17.816831683168317, "grad_norm": 0.37431976199150085, "learning_rate": 3.775377799320248e-05, "loss": 0.4588, "step": 71980 }, { "epoch": 17.81930693069307, "grad_norm": 0.37400779128074646, "learning_rate": 3.774041921909484e-05, "loss": 0.4691, "step": 71990 }, { "epoch": 17.821782178217823, "grad_norm": 0.375160276889801, "learning_rate": 3.772706137602145e-05, "loss": 0.4639, "step": 72000 }, { "epoch": 17.824257425742573, "grad_norm": 0.3683346211910248, "learning_rate": 3.771370446499674e-05, "loss": 0.4619, "step": 72010 }, { "epoch": 17.826732673267326, "grad_norm": 0.3630719482898712, "learning_rate": 3.7700348487035115e-05, "loss": 0.4614, "step": 72020 }, { "epoch": 17.82920792079208, "grad_norm": 0.38861021399497986, "learning_rate": 3.768699344315084e-05, "loss": 0.4629, "step": 72030 }, { "epoch": 17.831683168316832, "grad_norm": 0.3545626401901245, "learning_rate": 3.767363933435815e-05, "loss": 0.4619, "step": 72040 }, { "epoch": 17.834158415841586, "grad_norm": 0.363385945558548, "learning_rate": 3.766028616167121e-05, "loss": 0.4556, "step": 72050 }, { "epoch": 17.836633663366335, "grad_norm": 0.3990083336830139, "learning_rate": 3.7646933926104114e-05, "loss": 0.4638, "step": 72060 }, { "epoch": 17.83910891089109, "grad_norm": 0.4044201374053955, "learning_rate": 3.763358262867086e-05, "loss": 0.4679, "step": 72070 }, { "epoch": 17.84158415841584, "grad_norm": 0.36584043502807617, "learning_rate": 3.762023227038537e-05, "loss": 0.4571, "step": 72080 }, { "epoch": 17.844059405940595, "grad_norm": 0.3945075273513794, "learning_rate": 3.760688285226156e-05, "loss": 0.4588, "step": 72090 }, { "epoch": 17.846534653465348, "grad_norm": 0.369067519903183, "learning_rate": 3.75935343753132e-05, "loss": 0.4649, "step": 72100 }, { "epoch": 17.849009900990097, "grad_norm": 0.3678344488143921, "learning_rate": 3.7580186840554024e-05, "loss": 0.463, "step": 72110 }, { "epoch": 17.85148514851485, "grad_norm": 0.36675265431404114, "learning_rate": 3.756684024899772e-05, "loss": 0.4602, "step": 72120 }, { "epoch": 17.853960396039604, "grad_norm": 0.37058520317077637, "learning_rate": 3.755349460165783e-05, "loss": 0.4628, "step": 72130 }, { "epoch": 17.856435643564357, "grad_norm": 0.3693937659263611, "learning_rate": 3.754014989954788e-05, "loss": 0.4611, "step": 72140 }, { "epoch": 17.85891089108911, "grad_norm": 0.36832791566848755, "learning_rate": 3.752680614368132e-05, "loss": 0.4655, "step": 72150 }, { "epoch": 17.861386138613863, "grad_norm": 0.3863028287887573, "learning_rate": 3.751346333507153e-05, "loss": 0.4625, "step": 72160 }, { "epoch": 17.863861386138613, "grad_norm": 0.40555328130722046, "learning_rate": 3.75001214747318e-05, "loss": 0.4593, "step": 72170 }, { "epoch": 17.866336633663366, "grad_norm": 0.4155922532081604, "learning_rate": 3.7486780563675314e-05, "loss": 0.4593, "step": 72180 }, { "epoch": 17.86881188118812, "grad_norm": 0.3692428469657898, "learning_rate": 3.74734406029153e-05, "loss": 0.4618, "step": 72190 }, { "epoch": 17.871287128712872, "grad_norm": 0.37454888224601746, "learning_rate": 3.74601015934648e-05, "loss": 0.4577, "step": 72200 }, { "epoch": 17.873762376237625, "grad_norm": 0.365033358335495, "learning_rate": 3.744676353633681e-05, "loss": 0.4599, "step": 72210 }, { "epoch": 17.876237623762375, "grad_norm": 0.3774564564228058, "learning_rate": 3.74334264325443e-05, "loss": 0.4613, "step": 72220 }, { "epoch": 17.878712871287128, "grad_norm": 0.3794775903224945, "learning_rate": 3.7420090283100126e-05, "loss": 0.4625, "step": 72230 }, { "epoch": 17.88118811881188, "grad_norm": 0.3945351541042328, "learning_rate": 3.740675508901704e-05, "loss": 0.4633, "step": 72240 }, { "epoch": 17.883663366336634, "grad_norm": 0.3631151020526886, "learning_rate": 3.7393420851307836e-05, "loss": 0.4554, "step": 72250 }, { "epoch": 17.886138613861387, "grad_norm": 0.38153085112571716, "learning_rate": 3.738008757098511e-05, "loss": 0.4595, "step": 72260 }, { "epoch": 17.888613861386137, "grad_norm": 0.36071470379829407, "learning_rate": 3.736675524906145e-05, "loss": 0.4563, "step": 72270 }, { "epoch": 17.89108910891089, "grad_norm": 0.38888832926750183, "learning_rate": 3.735342388654934e-05, "loss": 0.4596, "step": 72280 }, { "epoch": 17.893564356435643, "grad_norm": 0.37255287170410156, "learning_rate": 3.7340093484461246e-05, "loss": 0.4587, "step": 72290 }, { "epoch": 17.896039603960396, "grad_norm": 0.36024221777915955, "learning_rate": 3.732676404380951e-05, "loss": 0.4581, "step": 72300 }, { "epoch": 17.89851485148515, "grad_norm": 0.368453711271286, "learning_rate": 3.731343556560639e-05, "loss": 0.4644, "step": 72310 }, { "epoch": 17.900990099009903, "grad_norm": 0.37065941095352173, "learning_rate": 3.730010805086412e-05, "loss": 0.4586, "step": 72320 }, { "epoch": 17.903465346534652, "grad_norm": 0.35244110226631165, "learning_rate": 3.728678150059484e-05, "loss": 0.4544, "step": 72330 }, { "epoch": 17.905940594059405, "grad_norm": 0.3569662868976593, "learning_rate": 3.72734559158106e-05, "loss": 0.4565, "step": 72340 }, { "epoch": 17.90841584158416, "grad_norm": 0.36641329526901245, "learning_rate": 3.726013129752337e-05, "loss": 0.4624, "step": 72350 }, { "epoch": 17.91089108910891, "grad_norm": 0.38518550992012024, "learning_rate": 3.724680764674513e-05, "loss": 0.4616, "step": 72360 }, { "epoch": 17.913366336633665, "grad_norm": 0.393460214138031, "learning_rate": 3.723348496448768e-05, "loss": 0.4594, "step": 72370 }, { "epoch": 17.915841584158414, "grad_norm": 0.3868362307548523, "learning_rate": 3.722016325176277e-05, "loss": 0.4611, "step": 72380 }, { "epoch": 17.918316831683168, "grad_norm": 0.37274643778800964, "learning_rate": 3.720684250958214e-05, "loss": 0.4622, "step": 72390 }, { "epoch": 17.92079207920792, "grad_norm": 0.3508493900299072, "learning_rate": 3.719352273895739e-05, "loss": 0.4604, "step": 72400 }, { "epoch": 17.923267326732674, "grad_norm": 0.38166382908821106, "learning_rate": 3.718020394090006e-05, "loss": 0.4609, "step": 72410 }, { "epoch": 17.925742574257427, "grad_norm": 0.3838481903076172, "learning_rate": 3.716688611642167e-05, "loss": 0.4587, "step": 72420 }, { "epoch": 17.928217821782177, "grad_norm": 0.3753851056098938, "learning_rate": 3.715356926653357e-05, "loss": 0.4512, "step": 72430 }, { "epoch": 17.93069306930693, "grad_norm": 0.38973939418792725, "learning_rate": 3.7140253392247106e-05, "loss": 0.4592, "step": 72440 }, { "epoch": 17.933168316831683, "grad_norm": 0.38764268159866333, "learning_rate": 3.7126938494573514e-05, "loss": 0.4652, "step": 72450 }, { "epoch": 17.935643564356436, "grad_norm": 0.37845227122306824, "learning_rate": 3.7113624574524005e-05, "loss": 0.457, "step": 72460 }, { "epoch": 17.93811881188119, "grad_norm": 0.3469946086406708, "learning_rate": 3.710031163310968e-05, "loss": 0.4571, "step": 72470 }, { "epoch": 17.94059405940594, "grad_norm": 0.3812539577484131, "learning_rate": 3.708699967134152e-05, "loss": 0.4572, "step": 72480 }, { "epoch": 17.943069306930692, "grad_norm": 0.3737436830997467, "learning_rate": 3.7073688690230544e-05, "loss": 0.4587, "step": 72490 }, { "epoch": 17.945544554455445, "grad_norm": 0.39312881231307983, "learning_rate": 3.7060378690787605e-05, "loss": 0.4609, "step": 72500 }, { "epoch": 17.948019801980198, "grad_norm": 0.38799959421157837, "learning_rate": 3.7047069674023495e-05, "loss": 0.4643, "step": 72510 }, { "epoch": 17.95049504950495, "grad_norm": 0.3771146535873413, "learning_rate": 3.7033761640948975e-05, "loss": 0.4561, "step": 72520 }, { "epoch": 17.952970297029704, "grad_norm": 0.36993104219436646, "learning_rate": 3.702045459257469e-05, "loss": 0.4572, "step": 72530 }, { "epoch": 17.955445544554454, "grad_norm": 0.3574545979499817, "learning_rate": 3.7007148529911216e-05, "loss": 0.465, "step": 72540 }, { "epoch": 17.957920792079207, "grad_norm": 0.36094483733177185, "learning_rate": 3.699384345396906e-05, "loss": 0.46, "step": 72550 }, { "epoch": 17.96039603960396, "grad_norm": 0.3619650900363922, "learning_rate": 3.698053936575867e-05, "loss": 0.4615, "step": 72560 }, { "epoch": 17.962871287128714, "grad_norm": 0.36222317814826965, "learning_rate": 3.6967236266290387e-05, "loss": 0.4632, "step": 72570 }, { "epoch": 17.965346534653467, "grad_norm": 0.3630262613296509, "learning_rate": 3.69539341565745e-05, "loss": 0.4582, "step": 72580 }, { "epoch": 17.967821782178216, "grad_norm": 0.37107008695602417, "learning_rate": 3.694063303762122e-05, "loss": 0.4638, "step": 72590 }, { "epoch": 17.97029702970297, "grad_norm": 0.3502562940120697, "learning_rate": 3.6927332910440684e-05, "loss": 0.4586, "step": 72600 }, { "epoch": 17.972772277227723, "grad_norm": 0.36066293716430664, "learning_rate": 3.6914033776042936e-05, "loss": 0.4606, "step": 72610 }, { "epoch": 17.975247524752476, "grad_norm": 0.36945921182632446, "learning_rate": 3.690073563543796e-05, "loss": 0.4625, "step": 72620 }, { "epoch": 17.97772277227723, "grad_norm": 0.37915146350860596, "learning_rate": 3.688743848963566e-05, "loss": 0.4615, "step": 72630 }, { "epoch": 17.980198019801982, "grad_norm": 0.3632241487503052, "learning_rate": 3.6874142339645895e-05, "loss": 0.4583, "step": 72640 }, { "epoch": 17.98267326732673, "grad_norm": 0.353508859872818, "learning_rate": 3.686084718647836e-05, "loss": 0.4593, "step": 72650 }, { "epoch": 17.985148514851485, "grad_norm": 0.36296573281288147, "learning_rate": 3.68475530311428e-05, "loss": 0.4565, "step": 72660 }, { "epoch": 17.987623762376238, "grad_norm": 0.37778350710868835, "learning_rate": 3.683425987464878e-05, "loss": 0.4599, "step": 72670 }, { "epoch": 17.99009900990099, "grad_norm": 0.3499910533428192, "learning_rate": 3.682096771800583e-05, "loss": 0.4628, "step": 72680 }, { "epoch": 17.992574257425744, "grad_norm": 0.35765042901039124, "learning_rate": 3.680767656222341e-05, "loss": 0.4598, "step": 72690 }, { "epoch": 17.995049504950494, "grad_norm": 0.3879956901073456, "learning_rate": 3.6794386408310894e-05, "loss": 0.4574, "step": 72700 }, { "epoch": 17.997524752475247, "grad_norm": 0.3709898889064789, "learning_rate": 3.6781097257277595e-05, "loss": 0.4622, "step": 72710 }, { "epoch": 18.0, "grad_norm": 0.39342862367630005, "learning_rate": 3.67678091101327e-05, "loss": 0.4586, "step": 72720 }, { "epoch": 18.002475247524753, "grad_norm": 0.38975152373313904, "learning_rate": 3.675452196788539e-05, "loss": 0.4623, "step": 72730 }, { "epoch": 18.004950495049506, "grad_norm": 0.39965853095054626, "learning_rate": 3.674123583154472e-05, "loss": 0.4585, "step": 72740 }, { "epoch": 18.007425742574256, "grad_norm": 0.3749074339866638, "learning_rate": 3.6727950702119686e-05, "loss": 0.4609, "step": 72750 }, { "epoch": 18.00990099009901, "grad_norm": 0.39615142345428467, "learning_rate": 3.6714666580619215e-05, "loss": 0.4622, "step": 72760 }, { "epoch": 18.012376237623762, "grad_norm": 0.360237717628479, "learning_rate": 3.6701383468052145e-05, "loss": 0.4641, "step": 72770 }, { "epoch": 18.014851485148515, "grad_norm": 0.39313414692878723, "learning_rate": 3.668810136542721e-05, "loss": 0.4621, "step": 72780 }, { "epoch": 18.01732673267327, "grad_norm": 0.3691244423389435, "learning_rate": 3.6674820273753166e-05, "loss": 0.4605, "step": 72790 }, { "epoch": 18.019801980198018, "grad_norm": 0.37790346145629883, "learning_rate": 3.6661540194038556e-05, "loss": 0.4612, "step": 72800 }, { "epoch": 18.02227722772277, "grad_norm": 0.3586122989654541, "learning_rate": 3.664826112729196e-05, "loss": 0.4573, "step": 72810 }, { "epoch": 18.024752475247524, "grad_norm": 0.3700590133666992, "learning_rate": 3.6634983074521794e-05, "loss": 0.4639, "step": 72820 }, { "epoch": 18.027227722772277, "grad_norm": 0.3681800365447998, "learning_rate": 3.6621706036736495e-05, "loss": 0.4633, "step": 72830 }, { "epoch": 18.02970297029703, "grad_norm": 0.3712395131587982, "learning_rate": 3.660843001494431e-05, "loss": 0.4601, "step": 72840 }, { "epoch": 18.032178217821784, "grad_norm": 0.36776456236839294, "learning_rate": 3.659515501015348e-05, "loss": 0.4654, "step": 72850 }, { "epoch": 18.034653465346533, "grad_norm": 0.36333972215652466, "learning_rate": 3.658188102337218e-05, "loss": 0.4647, "step": 72860 }, { "epoch": 18.037128712871286, "grad_norm": 0.39050719141960144, "learning_rate": 3.656860805560847e-05, "loss": 0.4572, "step": 72870 }, { "epoch": 18.03960396039604, "grad_norm": 0.3787689208984375, "learning_rate": 3.6555336107870314e-05, "loss": 0.46, "step": 72880 }, { "epoch": 18.042079207920793, "grad_norm": 0.3789113759994507, "learning_rate": 3.654206518116567e-05, "loss": 0.4612, "step": 72890 }, { "epoch": 18.044554455445546, "grad_norm": 0.37994977831840515, "learning_rate": 3.652879527650237e-05, "loss": 0.4546, "step": 72900 }, { "epoch": 18.047029702970296, "grad_norm": 0.388140469789505, "learning_rate": 3.651552639488815e-05, "loss": 0.4631, "step": 72910 }, { "epoch": 18.04950495049505, "grad_norm": 0.3936406075954437, "learning_rate": 3.6502258537330715e-05, "loss": 0.4591, "step": 72920 }, { "epoch": 18.051980198019802, "grad_norm": 0.3760058581829071, "learning_rate": 3.648899170483767e-05, "loss": 0.4573, "step": 72930 }, { "epoch": 18.054455445544555, "grad_norm": 0.3646100163459778, "learning_rate": 3.647572589841654e-05, "loss": 0.4579, "step": 72940 }, { "epoch": 18.056930693069308, "grad_norm": 0.37975600361824036, "learning_rate": 3.6462461119074765e-05, "loss": 0.459, "step": 72950 }, { "epoch": 18.059405940594058, "grad_norm": 0.40223029255867004, "learning_rate": 3.644919736781974e-05, "loss": 0.4624, "step": 72960 }, { "epoch": 18.06188118811881, "grad_norm": 0.361607164144516, "learning_rate": 3.643593464565875e-05, "loss": 0.457, "step": 72970 }, { "epoch": 18.064356435643564, "grad_norm": 0.3578018546104431, "learning_rate": 3.6422672953599e-05, "loss": 0.4607, "step": 72980 }, { "epoch": 18.066831683168317, "grad_norm": 0.37194618582725525, "learning_rate": 3.640941229264764e-05, "loss": 0.4613, "step": 72990 }, { "epoch": 18.06930693069307, "grad_norm": 0.3906879723072052, "learning_rate": 3.639615266381172e-05, "loss": 0.4626, "step": 73000 }, { "epoch": 18.071782178217823, "grad_norm": 0.3643864095211029, "learning_rate": 3.638289406809825e-05, "loss": 0.4626, "step": 73010 }, { "epoch": 18.074257425742573, "grad_norm": 0.39365869760513306, "learning_rate": 3.6369636506514085e-05, "loss": 0.4616, "step": 73020 }, { "epoch": 18.076732673267326, "grad_norm": 0.39603954553604126, "learning_rate": 3.6356379980066084e-05, "loss": 0.4616, "step": 73030 }, { "epoch": 18.07920792079208, "grad_norm": 0.37931910157203674, "learning_rate": 3.634312448976098e-05, "loss": 0.465, "step": 73040 }, { "epoch": 18.081683168316832, "grad_norm": 0.3572339415550232, "learning_rate": 3.632987003660544e-05, "loss": 0.459, "step": 73050 }, { "epoch": 18.084158415841586, "grad_norm": 0.3778248429298401, "learning_rate": 3.6316616621606066e-05, "loss": 0.4605, "step": 73060 }, { "epoch": 18.086633663366335, "grad_norm": 0.36237767338752747, "learning_rate": 3.6303364245769364e-05, "loss": 0.4605, "step": 73070 }, { "epoch": 18.08910891089109, "grad_norm": 0.35801470279693604, "learning_rate": 3.6290112910101746e-05, "loss": 0.46, "step": 73080 }, { "epoch": 18.09158415841584, "grad_norm": 0.36692366003990173, "learning_rate": 3.627686261560957e-05, "loss": 0.4615, "step": 73090 }, { "epoch": 18.094059405940595, "grad_norm": 0.35499781370162964, "learning_rate": 3.626361336329912e-05, "loss": 0.4585, "step": 73100 }, { "epoch": 18.096534653465348, "grad_norm": 0.3640507757663727, "learning_rate": 3.625036515417658e-05, "loss": 0.4636, "step": 73110 }, { "epoch": 18.099009900990097, "grad_norm": 0.346591591835022, "learning_rate": 3.6237117989248064e-05, "loss": 0.463, "step": 73120 }, { "epoch": 18.10148514851485, "grad_norm": 0.36239728331565857, "learning_rate": 3.6223871869519623e-05, "loss": 0.463, "step": 73130 }, { "epoch": 18.103960396039604, "grad_norm": 0.3614811301231384, "learning_rate": 3.621062679599719e-05, "loss": 0.4544, "step": 73140 }, { "epoch": 18.106435643564357, "grad_norm": 0.36587417125701904, "learning_rate": 3.619738276968665e-05, "loss": 0.4582, "step": 73150 }, { "epoch": 18.10891089108911, "grad_norm": 0.38050609827041626, "learning_rate": 3.61841397915938e-05, "loss": 0.4608, "step": 73160 }, { "epoch": 18.111386138613863, "grad_norm": 0.35333624482154846, "learning_rate": 3.6170897862724356e-05, "loss": 0.4581, "step": 73170 }, { "epoch": 18.113861386138613, "grad_norm": 0.3672831058502197, "learning_rate": 3.615765698408396e-05, "loss": 0.4598, "step": 73180 }, { "epoch": 18.116336633663366, "grad_norm": 0.359668493270874, "learning_rate": 3.614441715667814e-05, "loss": 0.4696, "step": 73190 }, { "epoch": 18.11881188118812, "grad_norm": 0.3600573241710663, "learning_rate": 3.613117838151244e-05, "loss": 0.4608, "step": 73200 }, { "epoch": 18.121287128712872, "grad_norm": 0.3587244749069214, "learning_rate": 3.611794065959219e-05, "loss": 0.4621, "step": 73210 }, { "epoch": 18.123762376237625, "grad_norm": 0.3785676956176758, "learning_rate": 3.6104703991922725e-05, "loss": 0.4614, "step": 73220 }, { "epoch": 18.126237623762375, "grad_norm": 0.3555796444416046, "learning_rate": 3.609146837950931e-05, "loss": 0.4567, "step": 73230 }, { "epoch": 18.128712871287128, "grad_norm": 0.39165711402893066, "learning_rate": 3.607823382335709e-05, "loss": 0.4633, "step": 73240 }, { "epoch": 18.13118811881188, "grad_norm": 0.36280444264411926, "learning_rate": 3.6065000324471096e-05, "loss": 0.4588, "step": 73250 }, { "epoch": 18.133663366336634, "grad_norm": 0.36660516262054443, "learning_rate": 3.60517678838564e-05, "loss": 0.4643, "step": 73260 }, { "epoch": 18.136138613861387, "grad_norm": 0.38129720091819763, "learning_rate": 3.603853650251788e-05, "loss": 0.4662, "step": 73270 }, { "epoch": 18.138613861386137, "grad_norm": 0.3875581920146942, "learning_rate": 3.602530618146037e-05, "loss": 0.4566, "step": 73280 }, { "epoch": 18.14108910891089, "grad_norm": 0.38505932688713074, "learning_rate": 3.601207692168862e-05, "loss": 0.4628, "step": 73290 }, { "epoch": 18.143564356435643, "grad_norm": 0.3701353073120117, "learning_rate": 3.5998848724207325e-05, "loss": 0.4608, "step": 73300 }, { "epoch": 18.146039603960396, "grad_norm": 0.39437931776046753, "learning_rate": 3.598562159002108e-05, "loss": 0.4628, "step": 73310 }, { "epoch": 18.14851485148515, "grad_norm": 0.36357569694519043, "learning_rate": 3.597239552013436e-05, "loss": 0.4585, "step": 73320 }, { "epoch": 18.150990099009903, "grad_norm": 0.37373384833335876, "learning_rate": 3.595917051555165e-05, "loss": 0.4563, "step": 73330 }, { "epoch": 18.153465346534652, "grad_norm": 0.3522190451622009, "learning_rate": 3.594594657727727e-05, "loss": 0.4576, "step": 73340 }, { "epoch": 18.155940594059405, "grad_norm": 0.3665500283241272, "learning_rate": 3.593272370631549e-05, "loss": 0.4614, "step": 73350 }, { "epoch": 18.15841584158416, "grad_norm": 0.3421677350997925, "learning_rate": 3.591950190367052e-05, "loss": 0.4596, "step": 73360 }, { "epoch": 18.16089108910891, "grad_norm": 0.37943702936172485, "learning_rate": 3.5906281170346464e-05, "loss": 0.4622, "step": 73370 }, { "epoch": 18.163366336633665, "grad_norm": 0.3753732442855835, "learning_rate": 3.589306150734734e-05, "loss": 0.4627, "step": 73380 }, { "epoch": 18.165841584158414, "grad_norm": 0.3787451684474945, "learning_rate": 3.587984291567708e-05, "loss": 0.4525, "step": 73390 }, { "epoch": 18.168316831683168, "grad_norm": 0.35108891129493713, "learning_rate": 3.586662539633958e-05, "loss": 0.463, "step": 73400 }, { "epoch": 18.17079207920792, "grad_norm": 0.36026236414909363, "learning_rate": 3.585340895033861e-05, "loss": 0.4656, "step": 73410 }, { "epoch": 18.173267326732674, "grad_norm": 0.3685458302497864, "learning_rate": 3.5840193578677856e-05, "loss": 0.4607, "step": 73420 }, { "epoch": 18.175742574257427, "grad_norm": 0.3614843785762787, "learning_rate": 3.582697928236097e-05, "loss": 0.4618, "step": 73430 }, { "epoch": 18.178217821782177, "grad_norm": 0.37007442116737366, "learning_rate": 3.581376606239147e-05, "loss": 0.4607, "step": 73440 }, { "epoch": 18.18069306930693, "grad_norm": 0.37611573934555054, "learning_rate": 3.5800553919772805e-05, "loss": 0.4641, "step": 73450 }, { "epoch": 18.183168316831683, "grad_norm": 0.36039993166923523, "learning_rate": 3.5787342855508376e-05, "loss": 0.4587, "step": 73460 }, { "epoch": 18.185643564356436, "grad_norm": 0.36333271861076355, "learning_rate": 3.577413287060146e-05, "loss": 0.4583, "step": 73470 }, { "epoch": 18.18811881188119, "grad_norm": 0.3670811057090759, "learning_rate": 3.5760923966055274e-05, "loss": 0.4582, "step": 73480 }, { "epoch": 18.190594059405942, "grad_norm": 0.36322730779647827, "learning_rate": 3.574771614287292e-05, "loss": 0.455, "step": 73490 }, { "epoch": 18.193069306930692, "grad_norm": 0.3736707866191864, "learning_rate": 3.573450940205749e-05, "loss": 0.4619, "step": 73500 }, { "epoch": 18.195544554455445, "grad_norm": 0.3972836434841156, "learning_rate": 3.572130374461192e-05, "loss": 0.4648, "step": 73510 }, { "epoch": 18.198019801980198, "grad_norm": 0.390063613653183, "learning_rate": 3.570809917153909e-05, "loss": 0.4625, "step": 73520 }, { "epoch": 18.20049504950495, "grad_norm": 0.37633755803108215, "learning_rate": 3.569489568384182e-05, "loss": 0.4611, "step": 73530 }, { "epoch": 18.202970297029704, "grad_norm": 0.37700268626213074, "learning_rate": 3.5681693282522834e-05, "loss": 0.4663, "step": 73540 }, { "epoch": 18.205445544554454, "grad_norm": 0.3536418378353119, "learning_rate": 3.566849196858473e-05, "loss": 0.4538, "step": 73550 }, { "epoch": 18.207920792079207, "grad_norm": 0.382554292678833, "learning_rate": 3.565529174303008e-05, "loss": 0.4611, "step": 73560 }, { "epoch": 18.21039603960396, "grad_norm": 0.36507168412208557, "learning_rate": 3.5642092606861356e-05, "loss": 0.4622, "step": 73570 }, { "epoch": 18.212871287128714, "grad_norm": 0.3789760172367096, "learning_rate": 3.562889456108095e-05, "loss": 0.4591, "step": 73580 }, { "epoch": 18.215346534653467, "grad_norm": 0.41698840260505676, "learning_rate": 3.5615697606691154e-05, "loss": 0.4597, "step": 73590 }, { "epoch": 18.217821782178216, "grad_norm": 0.3999786078929901, "learning_rate": 3.56025017446942e-05, "loss": 0.4664, "step": 73600 }, { "epoch": 18.22029702970297, "grad_norm": 0.3940012454986572, "learning_rate": 3.558930697609224e-05, "loss": 0.4616, "step": 73610 }, { "epoch": 18.222772277227723, "grad_norm": 0.37724533677101135, "learning_rate": 3.557611330188729e-05, "loss": 0.4597, "step": 73620 }, { "epoch": 18.225247524752476, "grad_norm": 0.3500216603279114, "learning_rate": 3.556292072308135e-05, "loss": 0.4604, "step": 73630 }, { "epoch": 18.22772277227723, "grad_norm": 0.3643939197063446, "learning_rate": 3.554972924067631e-05, "loss": 0.463, "step": 73640 }, { "epoch": 18.230198019801982, "grad_norm": 0.3851644694805145, "learning_rate": 3.553653885567398e-05, "loss": 0.464, "step": 73650 }, { "epoch": 18.23267326732673, "grad_norm": 0.34754490852355957, "learning_rate": 3.552334956907604e-05, "loss": 0.4586, "step": 73660 }, { "epoch": 18.235148514851485, "grad_norm": 0.3629036843776703, "learning_rate": 3.55101613818842e-05, "loss": 0.4625, "step": 73670 }, { "epoch": 18.237623762376238, "grad_norm": 0.36097654700279236, "learning_rate": 3.549697429509996e-05, "loss": 0.4628, "step": 73680 }, { "epoch": 18.24009900990099, "grad_norm": 0.3669235408306122, "learning_rate": 3.548378830972481e-05, "loss": 0.4616, "step": 73690 }, { "epoch": 18.242574257425744, "grad_norm": 0.3647232949733734, "learning_rate": 3.547060342676014e-05, "loss": 0.4625, "step": 73700 }, { "epoch": 18.245049504950494, "grad_norm": 0.3709026575088501, "learning_rate": 3.5457419647207266e-05, "loss": 0.4646, "step": 73710 }, { "epoch": 18.247524752475247, "grad_norm": 0.36539798974990845, "learning_rate": 3.544423697206737e-05, "loss": 0.4583, "step": 73720 }, { "epoch": 18.25, "grad_norm": 0.36325469613075256, "learning_rate": 3.543105540234165e-05, "loss": 0.4602, "step": 73730 }, { "epoch": 18.252475247524753, "grad_norm": 0.3641827702522278, "learning_rate": 3.5417874939031114e-05, "loss": 0.4576, "step": 73740 }, { "epoch": 18.254950495049506, "grad_norm": 0.36742135882377625, "learning_rate": 3.540469558313674e-05, "loss": 0.4647, "step": 73750 }, { "epoch": 18.257425742574256, "grad_norm": 0.36760133504867554, "learning_rate": 3.5391517335659406e-05, "loss": 0.4588, "step": 73760 }, { "epoch": 18.25990099009901, "grad_norm": 0.35079675912857056, "learning_rate": 3.537834019759993e-05, "loss": 0.4592, "step": 73770 }, { "epoch": 18.262376237623762, "grad_norm": 0.3640747368335724, "learning_rate": 3.536516416995903e-05, "loss": 0.4587, "step": 73780 }, { "epoch": 18.264851485148515, "grad_norm": 0.37289711833000183, "learning_rate": 3.5351989253737304e-05, "loss": 0.4577, "step": 73790 }, { "epoch": 18.26732673267327, "grad_norm": 0.36679893732070923, "learning_rate": 3.533881544993536e-05, "loss": 0.4571, "step": 73800 }, { "epoch": 18.269801980198018, "grad_norm": 0.3717658817768097, "learning_rate": 3.5325642759553615e-05, "loss": 0.4674, "step": 73810 }, { "epoch": 18.27227722772277, "grad_norm": 0.3792077600955963, "learning_rate": 3.5312471183592445e-05, "loss": 0.4617, "step": 73820 }, { "epoch": 18.274752475247524, "grad_norm": 0.3870130777359009, "learning_rate": 3.5299300723052175e-05, "loss": 0.4584, "step": 73830 }, { "epoch": 18.277227722772277, "grad_norm": 0.36365509033203125, "learning_rate": 3.528613137893301e-05, "loss": 0.4602, "step": 73840 }, { "epoch": 18.27970297029703, "grad_norm": 0.362435907125473, "learning_rate": 3.527296315223505e-05, "loss": 0.4604, "step": 73850 }, { "epoch": 18.282178217821784, "grad_norm": 0.37105098366737366, "learning_rate": 3.525979604395833e-05, "loss": 0.4609, "step": 73860 }, { "epoch": 18.284653465346533, "grad_norm": 0.3868013322353363, "learning_rate": 3.524663005510284e-05, "loss": 0.4576, "step": 73870 }, { "epoch": 18.287128712871286, "grad_norm": 0.36583852767944336, "learning_rate": 3.5233465186668435e-05, "loss": 0.4583, "step": 73880 }, { "epoch": 18.28960396039604, "grad_norm": 0.36598750948905945, "learning_rate": 3.522030143965489e-05, "loss": 0.4639, "step": 73890 }, { "epoch": 18.292079207920793, "grad_norm": 0.37726685404777527, "learning_rate": 3.520713881506193e-05, "loss": 0.4663, "step": 73900 }, { "epoch": 18.294554455445546, "grad_norm": 0.3562542498111725, "learning_rate": 3.5193977313889146e-05, "loss": 0.4592, "step": 73910 }, { "epoch": 18.297029702970296, "grad_norm": 0.36136695742607117, "learning_rate": 3.5180816937136074e-05, "loss": 0.4636, "step": 73920 }, { "epoch": 18.29950495049505, "grad_norm": 0.37717175483703613, "learning_rate": 3.5167657685802145e-05, "loss": 0.4588, "step": 73930 }, { "epoch": 18.301980198019802, "grad_norm": 0.3759666085243225, "learning_rate": 3.5154499560886746e-05, "loss": 0.4615, "step": 73940 }, { "epoch": 18.304455445544555, "grad_norm": 0.3650987148284912, "learning_rate": 3.5141342563389135e-05, "loss": 0.4587, "step": 73950 }, { "epoch": 18.306930693069308, "grad_norm": 0.34525054693222046, "learning_rate": 3.512818669430847e-05, "loss": 0.4569, "step": 73960 }, { "epoch": 18.309405940594058, "grad_norm": 0.3692311942577362, "learning_rate": 3.511503195464391e-05, "loss": 0.4555, "step": 73970 }, { "epoch": 18.31188118811881, "grad_norm": 0.352586030960083, "learning_rate": 3.510187834539443e-05, "loss": 0.4631, "step": 73980 }, { "epoch": 18.314356435643564, "grad_norm": 0.3704007863998413, "learning_rate": 3.508872586755897e-05, "loss": 0.4577, "step": 73990 }, { "epoch": 18.316831683168317, "grad_norm": 0.358549028635025, "learning_rate": 3.507557452213637e-05, "loss": 0.4593, "step": 74000 }, { "epoch": 18.31930693069307, "grad_norm": 0.34639573097229004, "learning_rate": 3.5062424310125403e-05, "loss": 0.4615, "step": 74010 }, { "epoch": 18.321782178217823, "grad_norm": 0.3556521534919739, "learning_rate": 3.5049275232524735e-05, "loss": 0.4624, "step": 74020 }, { "epoch": 18.324257425742573, "grad_norm": 0.35757774114608765, "learning_rate": 3.503612729033293e-05, "loss": 0.4598, "step": 74030 }, { "epoch": 18.326732673267326, "grad_norm": 0.37923988699913025, "learning_rate": 3.502298048454851e-05, "loss": 0.4601, "step": 74040 }, { "epoch": 18.32920792079208, "grad_norm": 0.35944730043411255, "learning_rate": 3.500983481616989e-05, "loss": 0.4616, "step": 74050 }, { "epoch": 18.331683168316832, "grad_norm": 0.35774528980255127, "learning_rate": 3.499669028619537e-05, "loss": 0.4575, "step": 74060 }, { "epoch": 18.334158415841586, "grad_norm": 0.3589507043361664, "learning_rate": 3.498354689562322e-05, "loss": 0.4596, "step": 74070 }, { "epoch": 18.336633663366335, "grad_norm": 0.3722967505455017, "learning_rate": 3.4970404645451605e-05, "loss": 0.4601, "step": 74080 }, { "epoch": 18.33910891089109, "grad_norm": 0.3774113655090332, "learning_rate": 3.495726353667853e-05, "loss": 0.4621, "step": 74090 }, { "epoch": 18.34158415841584, "grad_norm": 0.37330901622772217, "learning_rate": 3.494412357030205e-05, "loss": 0.4585, "step": 74100 }, { "epoch": 18.344059405940595, "grad_norm": 0.3687647581100464, "learning_rate": 3.4930984747320016e-05, "loss": 0.4617, "step": 74110 }, { "epoch": 18.346534653465348, "grad_norm": 0.3695968687534332, "learning_rate": 3.491784706873024e-05, "loss": 0.4544, "step": 74120 }, { "epoch": 18.349009900990097, "grad_norm": 0.3769010901451111, "learning_rate": 3.490471053553044e-05, "loss": 0.4613, "step": 74130 }, { "epoch": 18.35148514851485, "grad_norm": 0.36674895882606506, "learning_rate": 3.489157514871827e-05, "loss": 0.4563, "step": 74140 }, { "epoch": 18.353960396039604, "grad_norm": 0.37835219502449036, "learning_rate": 3.4878440909291255e-05, "loss": 0.4593, "step": 74150 }, { "epoch": 18.356435643564357, "grad_norm": 0.3628920614719391, "learning_rate": 3.4865307818246836e-05, "loss": 0.4584, "step": 74160 }, { "epoch": 18.35891089108911, "grad_norm": 0.37247639894485474, "learning_rate": 3.485217587658243e-05, "loss": 0.4633, "step": 74170 }, { "epoch": 18.361386138613863, "grad_norm": 0.3549656569957733, "learning_rate": 3.48390450852953e-05, "loss": 0.4561, "step": 74180 }, { "epoch": 18.363861386138613, "grad_norm": 0.3728756308555603, "learning_rate": 3.482591544538263e-05, "loss": 0.4624, "step": 74190 }, { "epoch": 18.366336633663366, "grad_norm": 0.3698674440383911, "learning_rate": 3.481278695784155e-05, "loss": 0.462, "step": 74200 }, { "epoch": 18.36881188118812, "grad_norm": 0.3854811489582062, "learning_rate": 3.479965962366909e-05, "loss": 0.4586, "step": 74210 }, { "epoch": 18.371287128712872, "grad_norm": 0.35035455226898193, "learning_rate": 3.4786533443862144e-05, "loss": 0.4572, "step": 74220 }, { "epoch": 18.373762376237625, "grad_norm": 0.35714903473854065, "learning_rate": 3.477340841941758e-05, "loss": 0.4589, "step": 74230 }, { "epoch": 18.376237623762375, "grad_norm": 0.360434889793396, "learning_rate": 3.4760284551332176e-05, "loss": 0.4601, "step": 74240 }, { "epoch": 18.378712871287128, "grad_norm": 0.3755660951137543, "learning_rate": 3.47471618406026e-05, "loss": 0.4587, "step": 74250 }, { "epoch": 18.38118811881188, "grad_norm": 0.37879735231399536, "learning_rate": 3.4734040288225375e-05, "loss": 0.4659, "step": 74260 }, { "epoch": 18.383663366336634, "grad_norm": 0.3672618865966797, "learning_rate": 3.472091989519709e-05, "loss": 0.4617, "step": 74270 }, { "epoch": 18.386138613861387, "grad_norm": 0.3694382607936859, "learning_rate": 3.470780066251409e-05, "loss": 0.4595, "step": 74280 }, { "epoch": 18.388613861386137, "grad_norm": 0.36262887716293335, "learning_rate": 3.46946825911727e-05, "loss": 0.4626, "step": 74290 }, { "epoch": 18.39108910891089, "grad_norm": 0.3470180630683899, "learning_rate": 3.468156568216917e-05, "loss": 0.4613, "step": 74300 }, { "epoch": 18.393564356435643, "grad_norm": 0.3770878314971924, "learning_rate": 3.466844993649963e-05, "loss": 0.463, "step": 74310 }, { "epoch": 18.396039603960396, "grad_norm": 0.3684820532798767, "learning_rate": 3.465533535516015e-05, "loss": 0.4612, "step": 74320 }, { "epoch": 18.39851485148515, "grad_norm": 0.3735346496105194, "learning_rate": 3.464222193914667e-05, "loss": 0.4567, "step": 74330 }, { "epoch": 18.400990099009903, "grad_norm": 0.35495510697364807, "learning_rate": 3.4629109689455086e-05, "loss": 0.4602, "step": 74340 }, { "epoch": 18.403465346534652, "grad_norm": 0.3701956868171692, "learning_rate": 3.461599860708118e-05, "loss": 0.464, "step": 74350 }, { "epoch": 18.405940594059405, "grad_norm": 0.3314210772514343, "learning_rate": 3.460288869302065e-05, "loss": 0.4635, "step": 74360 }, { "epoch": 18.40841584158416, "grad_norm": 0.37171387672424316, "learning_rate": 3.4589779948269113e-05, "loss": 0.4651, "step": 74370 }, { "epoch": 18.41089108910891, "grad_norm": 0.34833571314811707, "learning_rate": 3.457667237382211e-05, "loss": 0.454, "step": 74380 }, { "epoch": 18.413366336633665, "grad_norm": 0.36507976055145264, "learning_rate": 3.456356597067504e-05, "loss": 0.4596, "step": 74390 }, { "epoch": 18.415841584158414, "grad_norm": 0.36781731247901917, "learning_rate": 3.455046073982325e-05, "loss": 0.456, "step": 74400 }, { "epoch": 18.418316831683168, "grad_norm": 0.3468751907348633, "learning_rate": 3.4537356682262025e-05, "loss": 0.4623, "step": 74410 }, { "epoch": 18.42079207920792, "grad_norm": 0.33845874667167664, "learning_rate": 3.452425379898651e-05, "loss": 0.4581, "step": 74420 }, { "epoch": 18.423267326732674, "grad_norm": 0.36462652683258057, "learning_rate": 3.451115209099178e-05, "loss": 0.4599, "step": 74430 }, { "epoch": 18.425742574257427, "grad_norm": 0.3527473211288452, "learning_rate": 3.449805155927286e-05, "loss": 0.4624, "step": 74440 }, { "epoch": 18.428217821782177, "grad_norm": 0.36862385272979736, "learning_rate": 3.4484952204824596e-05, "loss": 0.4575, "step": 74450 }, { "epoch": 18.43069306930693, "grad_norm": 0.3809501826763153, "learning_rate": 3.4471854028641815e-05, "loss": 0.4592, "step": 74460 }, { "epoch": 18.433168316831683, "grad_norm": 0.37775036692619324, "learning_rate": 3.4458757031719255e-05, "loss": 0.4641, "step": 74470 }, { "epoch": 18.435643564356436, "grad_norm": 0.36083823442459106, "learning_rate": 3.4445661215051536e-05, "loss": 0.4586, "step": 74480 }, { "epoch": 18.43811881188119, "grad_norm": 0.3655446469783783, "learning_rate": 3.443256657963321e-05, "loss": 0.4597, "step": 74490 }, { "epoch": 18.440594059405942, "grad_norm": 0.36730071902275085, "learning_rate": 3.441947312645869e-05, "loss": 0.4609, "step": 74500 }, { "epoch": 18.443069306930692, "grad_norm": 0.37083297967910767, "learning_rate": 3.440638085652239e-05, "loss": 0.4607, "step": 74510 }, { "epoch": 18.445544554455445, "grad_norm": 0.35293906927108765, "learning_rate": 3.439328977081854e-05, "loss": 0.4619, "step": 74520 }, { "epoch": 18.448019801980198, "grad_norm": 0.379232257604599, "learning_rate": 3.4380199870341326e-05, "loss": 0.463, "step": 74530 }, { "epoch": 18.45049504950495, "grad_norm": 0.36006930470466614, "learning_rate": 3.4367111156084864e-05, "loss": 0.4636, "step": 74540 }, { "epoch": 18.452970297029704, "grad_norm": 0.34839335083961487, "learning_rate": 3.435402362904315e-05, "loss": 0.4606, "step": 74550 }, { "epoch": 18.455445544554454, "grad_norm": 0.33313044905662537, "learning_rate": 3.4340937290210066e-05, "loss": 0.456, "step": 74560 }, { "epoch": 18.457920792079207, "grad_norm": 0.3314104974269867, "learning_rate": 3.432785214057947e-05, "loss": 0.4608, "step": 74570 }, { "epoch": 18.46039603960396, "grad_norm": 0.3612028956413269, "learning_rate": 3.4314768181145075e-05, "loss": 0.4586, "step": 74580 }, { "epoch": 18.462871287128714, "grad_norm": 0.3764086365699768, "learning_rate": 3.430168541290052e-05, "loss": 0.4636, "step": 74590 }, { "epoch": 18.465346534653467, "grad_norm": 0.35074594616889954, "learning_rate": 3.428860383683935e-05, "loss": 0.4597, "step": 74600 }, { "epoch": 18.467821782178216, "grad_norm": 0.3701923191547394, "learning_rate": 3.427552345395505e-05, "loss": 0.4565, "step": 74610 }, { "epoch": 18.47029702970297, "grad_norm": 0.3402465283870697, "learning_rate": 3.426244426524097e-05, "loss": 0.4598, "step": 74620 }, { "epoch": 18.472772277227723, "grad_norm": 0.3864842355251312, "learning_rate": 3.424936627169037e-05, "loss": 0.4555, "step": 74630 }, { "epoch": 18.475247524752476, "grad_norm": 0.36310073733329773, "learning_rate": 3.423628947429647e-05, "loss": 0.4608, "step": 74640 }, { "epoch": 18.47772277227723, "grad_norm": 0.4011482000350952, "learning_rate": 3.422321387405235e-05, "loss": 0.4603, "step": 74650 }, { "epoch": 18.480198019801982, "grad_norm": 0.37858012318611145, "learning_rate": 3.421013947195101e-05, "loss": 0.4585, "step": 74660 }, { "epoch": 18.48267326732673, "grad_norm": 0.363052636384964, "learning_rate": 3.419706626898538e-05, "loss": 0.4556, "step": 74670 }, { "epoch": 18.485148514851485, "grad_norm": 0.3762802183628082, "learning_rate": 3.418399426614829e-05, "loss": 0.4598, "step": 74680 }, { "epoch": 18.487623762376238, "grad_norm": 0.4000522196292877, "learning_rate": 3.417092346443245e-05, "loss": 0.4614, "step": 74690 }, { "epoch": 18.49009900990099, "grad_norm": 0.37001726031303406, "learning_rate": 3.4157853864830504e-05, "loss": 0.4595, "step": 74700 }, { "epoch": 18.492574257425744, "grad_norm": 0.3485468626022339, "learning_rate": 3.4144785468335004e-05, "loss": 0.4575, "step": 74710 }, { "epoch": 18.495049504950494, "grad_norm": 0.365945041179657, "learning_rate": 3.413171827593843e-05, "loss": 0.4569, "step": 74720 }, { "epoch": 18.497524752475247, "grad_norm": 0.36673876643180847, "learning_rate": 3.411865228863311e-05, "loss": 0.4579, "step": 74730 }, { "epoch": 18.5, "grad_norm": 0.35206544399261475, "learning_rate": 3.4105587507411366e-05, "loss": 0.4591, "step": 74740 }, { "epoch": 18.502475247524753, "grad_norm": 0.3412816822528839, "learning_rate": 3.409252393326534e-05, "loss": 0.4546, "step": 74750 }, { "epoch": 18.504950495049506, "grad_norm": 0.3636153042316437, "learning_rate": 3.4079461567187144e-05, "loss": 0.46, "step": 74760 }, { "epoch": 18.507425742574256, "grad_norm": 0.34888824820518494, "learning_rate": 3.406640041016877e-05, "loss": 0.4631, "step": 74770 }, { "epoch": 18.50990099009901, "grad_norm": 0.3579597771167755, "learning_rate": 3.405334046320214e-05, "loss": 0.4593, "step": 74780 }, { "epoch": 18.512376237623762, "grad_norm": 0.37299972772598267, "learning_rate": 3.4040281727279065e-05, "loss": 0.4536, "step": 74790 }, { "epoch": 18.514851485148515, "grad_norm": 0.3856549859046936, "learning_rate": 3.402722420339125e-05, "loss": 0.4644, "step": 74800 }, { "epoch": 18.51732673267327, "grad_norm": 0.3808298408985138, "learning_rate": 3.401416789253037e-05, "loss": 0.4572, "step": 74810 }, { "epoch": 18.519801980198018, "grad_norm": 0.3686024248600006, "learning_rate": 3.4001112795687926e-05, "loss": 0.4613, "step": 74820 }, { "epoch": 18.52227722772277, "grad_norm": 0.35405778884887695, "learning_rate": 3.398805891385538e-05, "loss": 0.4586, "step": 74830 }, { "epoch": 18.524752475247524, "grad_norm": 0.39152830839157104, "learning_rate": 3.39750062480241e-05, "loss": 0.4592, "step": 74840 }, { "epoch": 18.527227722772277, "grad_norm": 0.36331915855407715, "learning_rate": 3.396195479918535e-05, "loss": 0.4636, "step": 74850 }, { "epoch": 18.52970297029703, "grad_norm": 0.3754497170448303, "learning_rate": 3.3948904568330275e-05, "loss": 0.459, "step": 74860 }, { "epoch": 18.532178217821784, "grad_norm": 0.40142613649368286, "learning_rate": 3.3935855556449965e-05, "loss": 0.4597, "step": 74870 }, { "epoch": 18.534653465346533, "grad_norm": 0.3555051386356354, "learning_rate": 3.392280776453542e-05, "loss": 0.4581, "step": 74880 }, { "epoch": 18.537128712871286, "grad_norm": 0.34304532408714294, "learning_rate": 3.390976119357753e-05, "loss": 0.4619, "step": 74890 }, { "epoch": 18.53960396039604, "grad_norm": 0.34765470027923584, "learning_rate": 3.389671584456707e-05, "loss": 0.4595, "step": 74900 }, { "epoch": 18.542079207920793, "grad_norm": 0.36166808009147644, "learning_rate": 3.388367171849478e-05, "loss": 0.4599, "step": 74910 }, { "epoch": 18.544554455445546, "grad_norm": 0.3884485363960266, "learning_rate": 3.3870628816351277e-05, "loss": 0.46, "step": 74920 }, { "epoch": 18.547029702970296, "grad_norm": 0.3662203848361969, "learning_rate": 3.3857587139127056e-05, "loss": 0.4581, "step": 74930 }, { "epoch": 18.54950495049505, "grad_norm": 0.37588152289390564, "learning_rate": 3.384454668781257e-05, "loss": 0.4585, "step": 74940 }, { "epoch": 18.551980198019802, "grad_norm": 0.36810562014579773, "learning_rate": 3.383150746339815e-05, "loss": 0.4623, "step": 74950 }, { "epoch": 18.554455445544555, "grad_norm": 0.3427535891532898, "learning_rate": 3.3818469466874034e-05, "loss": 0.4582, "step": 74960 }, { "epoch": 18.556930693069308, "grad_norm": 0.3609691858291626, "learning_rate": 3.380543269923035e-05, "loss": 0.4624, "step": 74970 }, { "epoch": 18.55940594059406, "grad_norm": 0.362819105386734, "learning_rate": 3.379239716145721e-05, "loss": 0.4582, "step": 74980 }, { "epoch": 18.56188118811881, "grad_norm": 0.3569414019584656, "learning_rate": 3.377936285454453e-05, "loss": 0.4605, "step": 74990 }, { "epoch": 18.564356435643564, "grad_norm": 0.364665687084198, "learning_rate": 3.3766329779482187e-05, "loss": 0.456, "step": 75000 }, { "epoch": 18.566831683168317, "grad_norm": 0.36162084341049194, "learning_rate": 3.375329793725997e-05, "loss": 0.4612, "step": 75010 }, { "epoch": 18.56930693069307, "grad_norm": 0.37187013030052185, "learning_rate": 3.374026732886756e-05, "loss": 0.4601, "step": 75020 }, { "epoch": 18.571782178217823, "grad_norm": 0.3850307762622833, "learning_rate": 3.372723795529453e-05, "loss": 0.4577, "step": 75030 }, { "epoch": 18.574257425742573, "grad_norm": 0.3788365125656128, "learning_rate": 3.3714209817530405e-05, "loss": 0.4597, "step": 75040 }, { "epoch": 18.576732673267326, "grad_norm": 0.3619806170463562, "learning_rate": 3.370118291656456e-05, "loss": 0.4517, "step": 75050 }, { "epoch": 18.57920792079208, "grad_norm": 0.36763718724250793, "learning_rate": 3.368815725338631e-05, "loss": 0.4603, "step": 75060 }, { "epoch": 18.581683168316832, "grad_norm": 0.34447816014289856, "learning_rate": 3.367513282898486e-05, "loss": 0.4544, "step": 75070 }, { "epoch": 18.584158415841586, "grad_norm": 0.3664329946041107, "learning_rate": 3.3662109644349336e-05, "loss": 0.4645, "step": 75080 }, { "epoch": 18.586633663366335, "grad_norm": 0.374601274728775, "learning_rate": 3.3649087700468784e-05, "loss": 0.4641, "step": 75090 }, { "epoch": 18.58910891089109, "grad_norm": 0.35776835680007935, "learning_rate": 3.363606699833208e-05, "loss": 0.4586, "step": 75100 }, { "epoch": 18.59158415841584, "grad_norm": 0.37265849113464355, "learning_rate": 3.362304753892813e-05, "loss": 0.4579, "step": 75110 }, { "epoch": 18.594059405940595, "grad_norm": 0.35756728053092957, "learning_rate": 3.3610029323245626e-05, "loss": 0.4682, "step": 75120 }, { "epoch": 18.596534653465348, "grad_norm": 0.40802639722824097, "learning_rate": 3.359701235227324e-05, "loss": 0.4615, "step": 75130 }, { "epoch": 18.599009900990097, "grad_norm": 0.3854217827320099, "learning_rate": 3.3583996626999495e-05, "loss": 0.4588, "step": 75140 }, { "epoch": 18.60148514851485, "grad_norm": 0.3630434274673462, "learning_rate": 3.357098214841289e-05, "loss": 0.4596, "step": 75150 }, { "epoch": 18.603960396039604, "grad_norm": 0.3463594615459442, "learning_rate": 3.3557968917501764e-05, "loss": 0.4629, "step": 75160 }, { "epoch": 18.606435643564357, "grad_norm": 0.34222888946533203, "learning_rate": 3.354495693525438e-05, "loss": 0.4582, "step": 75170 }, { "epoch": 18.60891089108911, "grad_norm": 0.3360988199710846, "learning_rate": 3.3531946202658923e-05, "loss": 0.4549, "step": 75180 }, { "epoch": 18.611386138613863, "grad_norm": 0.34975752234458923, "learning_rate": 3.3518936720703486e-05, "loss": 0.4601, "step": 75190 }, { "epoch": 18.613861386138613, "grad_norm": 0.3633991479873657, "learning_rate": 3.350592849037603e-05, "loss": 0.4634, "step": 75200 }, { "epoch": 18.616336633663366, "grad_norm": 0.36995062232017517, "learning_rate": 3.349292151266445e-05, "loss": 0.4541, "step": 75210 }, { "epoch": 18.61881188118812, "grad_norm": 0.3870871067047119, "learning_rate": 3.347991578855656e-05, "loss": 0.4619, "step": 75220 }, { "epoch": 18.621287128712872, "grad_norm": 0.3536844253540039, "learning_rate": 3.346691131904004e-05, "loss": 0.4567, "step": 75230 }, { "epoch": 18.623762376237625, "grad_norm": 0.346021443605423, "learning_rate": 3.3453908105102474e-05, "loss": 0.4574, "step": 75240 }, { "epoch": 18.626237623762375, "grad_norm": 0.3725137710571289, "learning_rate": 3.344090614773141e-05, "loss": 0.4625, "step": 75250 }, { "epoch": 18.628712871287128, "grad_norm": 0.3364815413951874, "learning_rate": 3.3427905447914254e-05, "loss": 0.461, "step": 75260 }, { "epoch": 18.63118811881188, "grad_norm": 0.358842134475708, "learning_rate": 3.3414906006638286e-05, "loss": 0.4581, "step": 75270 }, { "epoch": 18.633663366336634, "grad_norm": 0.3507729470729828, "learning_rate": 3.3401907824890776e-05, "loss": 0.4593, "step": 75280 }, { "epoch": 18.636138613861387, "grad_norm": 0.35078907012939453, "learning_rate": 3.338891090365883e-05, "loss": 0.4609, "step": 75290 }, { "epoch": 18.638613861386137, "grad_norm": 0.353664755821228, "learning_rate": 3.337591524392946e-05, "loss": 0.4595, "step": 75300 }, { "epoch": 18.64108910891089, "grad_norm": 0.3527042865753174, "learning_rate": 3.336292084668963e-05, "loss": 0.4632, "step": 75310 }, { "epoch": 18.643564356435643, "grad_norm": 0.3603135049343109, "learning_rate": 3.334992771292616e-05, "loss": 0.4586, "step": 75320 }, { "epoch": 18.646039603960396, "grad_norm": 0.3762860894203186, "learning_rate": 3.333693584362581e-05, "loss": 0.4607, "step": 75330 }, { "epoch": 18.64851485148515, "grad_norm": 0.34991031885147095, "learning_rate": 3.3323945239775204e-05, "loss": 0.457, "step": 75340 }, { "epoch": 18.650990099009903, "grad_norm": 0.3580498695373535, "learning_rate": 3.331095590236091e-05, "loss": 0.4657, "step": 75350 }, { "epoch": 18.653465346534652, "grad_norm": 0.3405778706073761, "learning_rate": 3.329796783236938e-05, "loss": 0.459, "step": 75360 }, { "epoch": 18.655940594059405, "grad_norm": 0.35049256682395935, "learning_rate": 3.328498103078696e-05, "loss": 0.4559, "step": 75370 }, { "epoch": 18.65841584158416, "grad_norm": 0.3376536965370178, "learning_rate": 3.327199549859993e-05, "loss": 0.4611, "step": 75380 }, { "epoch": 18.66089108910891, "grad_norm": 0.3553427457809448, "learning_rate": 3.3259011236794455e-05, "loss": 0.4598, "step": 75390 }, { "epoch": 18.663366336633665, "grad_norm": 0.3616202473640442, "learning_rate": 3.324602824635656e-05, "loss": 0.4605, "step": 75400 }, { "epoch": 18.665841584158414, "grad_norm": 0.3764841854572296, "learning_rate": 3.3233046528272295e-05, "loss": 0.463, "step": 75410 }, { "epoch": 18.668316831683168, "grad_norm": 0.3617147207260132, "learning_rate": 3.322006608352748e-05, "loss": 0.4593, "step": 75420 }, { "epoch": 18.67079207920792, "grad_norm": 0.353447824716568, "learning_rate": 3.3207086913107905e-05, "loss": 0.4639, "step": 75430 }, { "epoch": 18.673267326732674, "grad_norm": 0.3673190772533417, "learning_rate": 3.319410901799924e-05, "loss": 0.4608, "step": 75440 }, { "epoch": 18.675742574257427, "grad_norm": 0.3888559341430664, "learning_rate": 3.318113239918712e-05, "loss": 0.4633, "step": 75450 }, { "epoch": 18.678217821782177, "grad_norm": 0.37158140540122986, "learning_rate": 3.316815705765698e-05, "loss": 0.4601, "step": 75460 }, { "epoch": 18.68069306930693, "grad_norm": 0.3437500298023224, "learning_rate": 3.315518299439422e-05, "loss": 0.459, "step": 75470 }, { "epoch": 18.683168316831683, "grad_norm": 0.34532687067985535, "learning_rate": 3.314221021038416e-05, "loss": 0.4534, "step": 75480 }, { "epoch": 18.685643564356436, "grad_norm": 0.3592110872268677, "learning_rate": 3.312923870661198e-05, "loss": 0.4616, "step": 75490 }, { "epoch": 18.68811881188119, "grad_norm": 0.35937461256980896, "learning_rate": 3.3116268484062774e-05, "loss": 0.4604, "step": 75500 }, { "epoch": 18.69059405940594, "grad_norm": 0.3558747470378876, "learning_rate": 3.3103299543721565e-05, "loss": 0.4586, "step": 75510 }, { "epoch": 18.693069306930692, "grad_norm": 0.3488064408302307, "learning_rate": 3.3090331886573254e-05, "loss": 0.4616, "step": 75520 }, { "epoch": 18.695544554455445, "grad_norm": 0.36545678973197937, "learning_rate": 3.307736551360263e-05, "loss": 0.4599, "step": 75530 }, { "epoch": 18.698019801980198, "grad_norm": 0.34674036502838135, "learning_rate": 3.306440042579441e-05, "loss": 0.4553, "step": 75540 }, { "epoch": 18.70049504950495, "grad_norm": 0.3455877900123596, "learning_rate": 3.3051436624133224e-05, "loss": 0.4574, "step": 75550 }, { "epoch": 18.702970297029704, "grad_norm": 0.36299970746040344, "learning_rate": 3.3038474109603584e-05, "loss": 0.4579, "step": 75560 }, { "epoch": 18.705445544554454, "grad_norm": 0.35311171412467957, "learning_rate": 3.302551288318987e-05, "loss": 0.4643, "step": 75570 }, { "epoch": 18.707920792079207, "grad_norm": 0.37916138768196106, "learning_rate": 3.301255294587646e-05, "loss": 0.4639, "step": 75580 }, { "epoch": 18.71039603960396, "grad_norm": 0.3598775267601013, "learning_rate": 3.299959429864753e-05, "loss": 0.464, "step": 75590 }, { "epoch": 18.712871287128714, "grad_norm": 0.35110050439834595, "learning_rate": 3.298663694248722e-05, "loss": 0.4591, "step": 75600 }, { "epoch": 18.715346534653467, "grad_norm": 0.3499232530593872, "learning_rate": 3.297368087837956e-05, "loss": 0.4577, "step": 75610 }, { "epoch": 18.717821782178216, "grad_norm": 0.36518394947052, "learning_rate": 3.296072610730847e-05, "loss": 0.4596, "step": 75620 }, { "epoch": 18.72029702970297, "grad_norm": 0.3692692518234253, "learning_rate": 3.29477726302578e-05, "loss": 0.4607, "step": 75630 }, { "epoch": 18.722772277227723, "grad_norm": 0.34041813015937805, "learning_rate": 3.2934820448211235e-05, "loss": 0.4585, "step": 75640 }, { "epoch": 18.725247524752476, "grad_norm": 0.367617130279541, "learning_rate": 3.2921869562152466e-05, "loss": 0.4625, "step": 75650 }, { "epoch": 18.72772277227723, "grad_norm": 0.3418990969657898, "learning_rate": 3.2908919973064976e-05, "loss": 0.461, "step": 75660 }, { "epoch": 18.730198019801982, "grad_norm": 0.3526822328567505, "learning_rate": 3.2895971681932225e-05, "loss": 0.4628, "step": 75670 }, { "epoch": 18.73267326732673, "grad_norm": 0.36270612478256226, "learning_rate": 3.288302468973755e-05, "loss": 0.4613, "step": 75680 }, { "epoch": 18.735148514851485, "grad_norm": 0.35510003566741943, "learning_rate": 3.2870078997464205e-05, "loss": 0.4586, "step": 75690 }, { "epoch": 18.737623762376238, "grad_norm": 0.350153386592865, "learning_rate": 3.28571346060953e-05, "loss": 0.462, "step": 75700 }, { "epoch": 18.74009900990099, "grad_norm": 0.35928794741630554, "learning_rate": 3.2844191516613873e-05, "loss": 0.4571, "step": 75710 }, { "epoch": 18.742574257425744, "grad_norm": 0.37098726630210876, "learning_rate": 3.28312497300029e-05, "loss": 0.4582, "step": 75720 }, { "epoch": 18.745049504950494, "grad_norm": 0.38677093386650085, "learning_rate": 3.2818309247245206e-05, "loss": 0.4597, "step": 75730 }, { "epoch": 18.747524752475247, "grad_norm": 0.36409682035446167, "learning_rate": 3.280537006932353e-05, "loss": 0.4641, "step": 75740 }, { "epoch": 18.75, "grad_norm": 0.36161836981773376, "learning_rate": 3.279243219722052e-05, "loss": 0.4592, "step": 75750 }, { "epoch": 18.752475247524753, "grad_norm": 0.352592796087265, "learning_rate": 3.277949563191874e-05, "loss": 0.4599, "step": 75760 }, { "epoch": 18.754950495049506, "grad_norm": 0.3671746850013733, "learning_rate": 3.27665603744006e-05, "loss": 0.4548, "step": 75770 }, { "epoch": 18.757425742574256, "grad_norm": 0.3754304349422455, "learning_rate": 3.275362642564848e-05, "loss": 0.4623, "step": 75780 }, { "epoch": 18.75990099009901, "grad_norm": 0.36576518416404724, "learning_rate": 3.274069378664462e-05, "loss": 0.4615, "step": 75790 }, { "epoch": 18.762376237623762, "grad_norm": 0.3613584637641907, "learning_rate": 3.272776245837117e-05, "loss": 0.4575, "step": 75800 }, { "epoch": 18.764851485148515, "grad_norm": 0.35119491815567017, "learning_rate": 3.271483244181014e-05, "loss": 0.4637, "step": 75810 }, { "epoch": 18.76732673267327, "grad_norm": 0.35724347829818726, "learning_rate": 3.270190373794355e-05, "loss": 0.4658, "step": 75820 }, { "epoch": 18.769801980198018, "grad_norm": 0.3629991412162781, "learning_rate": 3.268897634775321e-05, "loss": 0.4641, "step": 75830 }, { "epoch": 18.77227722772277, "grad_norm": 0.40287157893180847, "learning_rate": 3.267605027222085e-05, "loss": 0.4628, "step": 75840 }, { "epoch": 18.774752475247524, "grad_norm": 0.3469175696372986, "learning_rate": 3.266312551232816e-05, "loss": 0.4609, "step": 75850 }, { "epoch": 18.777227722772277, "grad_norm": 0.35833311080932617, "learning_rate": 3.265020206905668e-05, "loss": 0.4622, "step": 75860 }, { "epoch": 18.77970297029703, "grad_norm": 0.3498152196407318, "learning_rate": 3.263727994338784e-05, "loss": 0.4586, "step": 75870 }, { "epoch": 18.782178217821784, "grad_norm": 0.34043076634407043, "learning_rate": 3.2624359136303024e-05, "loss": 0.456, "step": 75880 }, { "epoch": 18.784653465346533, "grad_norm": 0.34573623538017273, "learning_rate": 3.261143964878345e-05, "loss": 0.4627, "step": 75890 }, { "epoch": 18.787128712871286, "grad_norm": 0.3602239787578583, "learning_rate": 3.2598521481810284e-05, "loss": 0.458, "step": 75900 }, { "epoch": 18.78960396039604, "grad_norm": 0.3531194031238556, "learning_rate": 3.258560463636456e-05, "loss": 0.456, "step": 75910 }, { "epoch": 18.792079207920793, "grad_norm": 0.3383200764656067, "learning_rate": 3.257268911342724e-05, "loss": 0.4609, "step": 75920 }, { "epoch": 18.794554455445546, "grad_norm": 0.3346165418624878, "learning_rate": 3.2559774913979194e-05, "loss": 0.461, "step": 75930 }, { "epoch": 18.797029702970296, "grad_norm": 0.35944679379463196, "learning_rate": 3.254686203900111e-05, "loss": 0.4633, "step": 75940 }, { "epoch": 18.79950495049505, "grad_norm": 0.3670659363269806, "learning_rate": 3.253395048947372e-05, "loss": 0.4615, "step": 75950 }, { "epoch": 18.801980198019802, "grad_norm": 0.3576223850250244, "learning_rate": 3.2521040266377495e-05, "loss": 0.4646, "step": 75960 }, { "epoch": 18.804455445544555, "grad_norm": 0.3736066222190857, "learning_rate": 3.250813137069292e-05, "loss": 0.4589, "step": 75970 }, { "epoch": 18.806930693069308, "grad_norm": 0.353499174118042, "learning_rate": 3.249522380340032e-05, "loss": 0.4591, "step": 75980 }, { "epoch": 18.80940594059406, "grad_norm": 0.37359127402305603, "learning_rate": 3.248231756547997e-05, "loss": 0.4644, "step": 75990 }, { "epoch": 18.81188118811881, "grad_norm": 0.3523930609226227, "learning_rate": 3.246941265791198e-05, "loss": 0.4632, "step": 76000 }, { "epoch": 18.814356435643564, "grad_norm": 0.3589574098587036, "learning_rate": 3.24565090816764e-05, "loss": 0.4614, "step": 76010 }, { "epoch": 18.816831683168317, "grad_norm": 0.34847623109817505, "learning_rate": 3.2443606837753185e-05, "loss": 0.4545, "step": 76020 }, { "epoch": 18.81930693069307, "grad_norm": 0.35176464915275574, "learning_rate": 3.243070592712217e-05, "loss": 0.4606, "step": 76030 }, { "epoch": 18.821782178217823, "grad_norm": 0.3553978502750397, "learning_rate": 3.241780635076309e-05, "loss": 0.4595, "step": 76040 }, { "epoch": 18.824257425742573, "grad_norm": 0.3612617552280426, "learning_rate": 3.2404908109655585e-05, "loss": 0.4633, "step": 76050 }, { "epoch": 18.826732673267326, "grad_norm": 0.3783946931362152, "learning_rate": 3.239201120477921e-05, "loss": 0.4623, "step": 76060 }, { "epoch": 18.82920792079208, "grad_norm": 0.34445318579673767, "learning_rate": 3.2379115637113366e-05, "loss": 0.4542, "step": 76070 }, { "epoch": 18.831683168316832, "grad_norm": 0.34895122051239014, "learning_rate": 3.23662214076374e-05, "loss": 0.4593, "step": 76080 }, { "epoch": 18.834158415841586, "grad_norm": 0.35022351145744324, "learning_rate": 3.235332851733055e-05, "loss": 0.4588, "step": 76090 }, { "epoch": 18.836633663366335, "grad_norm": 0.36189571022987366, "learning_rate": 3.2340436967171945e-05, "loss": 0.4627, "step": 76100 }, { "epoch": 18.83910891089109, "grad_norm": 0.38130611181259155, "learning_rate": 3.2327546758140584e-05, "loss": 0.4642, "step": 76110 }, { "epoch": 18.84158415841584, "grad_norm": 0.37864795327186584, "learning_rate": 3.231465789121545e-05, "loss": 0.4568, "step": 76120 }, { "epoch": 18.844059405940595, "grad_norm": 0.37594276666641235, "learning_rate": 3.230177036737533e-05, "loss": 0.4604, "step": 76130 }, { "epoch": 18.846534653465348, "grad_norm": 0.3555859923362732, "learning_rate": 3.228888418759893e-05, "loss": 0.456, "step": 76140 }, { "epoch": 18.849009900990097, "grad_norm": 0.39019879698753357, "learning_rate": 3.227599935286492e-05, "loss": 0.463, "step": 76150 }, { "epoch": 18.85148514851485, "grad_norm": 0.349791556596756, "learning_rate": 3.2263115864151775e-05, "loss": 0.455, "step": 76160 }, { "epoch": 18.853960396039604, "grad_norm": 0.3597946763038635, "learning_rate": 3.225023372243794e-05, "loss": 0.458, "step": 76170 }, { "epoch": 18.856435643564357, "grad_norm": 0.3368768095970154, "learning_rate": 3.22373529287017e-05, "loss": 0.4603, "step": 76180 }, { "epoch": 18.85891089108911, "grad_norm": 0.35997170209884644, "learning_rate": 3.222447348392129e-05, "loss": 0.4558, "step": 76190 }, { "epoch": 18.861386138613863, "grad_norm": 0.34397614002227783, "learning_rate": 3.2211595389074814e-05, "loss": 0.4576, "step": 76200 }, { "epoch": 18.863861386138613, "grad_norm": 0.347932904958725, "learning_rate": 3.2198718645140255e-05, "loss": 0.4586, "step": 76210 }, { "epoch": 18.866336633663366, "grad_norm": 0.3456214964389801, "learning_rate": 3.218584325309556e-05, "loss": 0.4599, "step": 76220 }, { "epoch": 18.86881188118812, "grad_norm": 0.3751014173030853, "learning_rate": 3.217296921391852e-05, "loss": 0.4596, "step": 76230 }, { "epoch": 18.871287128712872, "grad_norm": 0.3337511122226715, "learning_rate": 3.216009652858677e-05, "loss": 0.4627, "step": 76240 }, { "epoch": 18.873762376237625, "grad_norm": 0.3498731851577759, "learning_rate": 3.2147225198077995e-05, "loss": 0.4655, "step": 76250 }, { "epoch": 18.876237623762375, "grad_norm": 0.34361109137535095, "learning_rate": 3.213435522336964e-05, "loss": 0.4622, "step": 76260 }, { "epoch": 18.878712871287128, "grad_norm": 0.3649521768093109, "learning_rate": 3.21214866054391e-05, "loss": 0.4551, "step": 76270 }, { "epoch": 18.88118811881188, "grad_norm": 0.3717203140258789, "learning_rate": 3.210861934526366e-05, "loss": 0.4597, "step": 76280 }, { "epoch": 18.883663366336634, "grad_norm": 0.3652522563934326, "learning_rate": 3.209575344382052e-05, "loss": 0.4595, "step": 76290 }, { "epoch": 18.886138613861387, "grad_norm": 0.34829777479171753, "learning_rate": 3.2082888902086746e-05, "loss": 0.4594, "step": 76300 }, { "epoch": 18.888613861386137, "grad_norm": 0.3444229066371918, "learning_rate": 3.20700257210393e-05, "loss": 0.4641, "step": 76310 }, { "epoch": 18.89108910891089, "grad_norm": 0.3549734354019165, "learning_rate": 3.205716390165509e-05, "loss": 0.4642, "step": 76320 }, { "epoch": 18.893564356435643, "grad_norm": 0.3682183623313904, "learning_rate": 3.204430344491086e-05, "loss": 0.459, "step": 76330 }, { "epoch": 18.896039603960396, "grad_norm": 0.34826818108558655, "learning_rate": 3.2031444351783307e-05, "loss": 0.4591, "step": 76340 }, { "epoch": 18.89851485148515, "grad_norm": 0.38614723086357117, "learning_rate": 3.201858662324894e-05, "loss": 0.4581, "step": 76350 }, { "epoch": 18.900990099009903, "grad_norm": 0.36732593178749084, "learning_rate": 3.200573026028428e-05, "loss": 0.4703, "step": 76360 }, { "epoch": 18.903465346534652, "grad_norm": 0.34434476494789124, "learning_rate": 3.199287526386564e-05, "loss": 0.459, "step": 76370 }, { "epoch": 18.905940594059405, "grad_norm": 0.34408777952194214, "learning_rate": 3.1980021634969283e-05, "loss": 0.4647, "step": 76380 }, { "epoch": 18.90841584158416, "grad_norm": 0.329745352268219, "learning_rate": 3.196716937457137e-05, "loss": 0.4546, "step": 76390 }, { "epoch": 18.91089108910891, "grad_norm": 0.3486716151237488, "learning_rate": 3.195431848364795e-05, "loss": 0.456, "step": 76400 }, { "epoch": 18.913366336633665, "grad_norm": 0.335649311542511, "learning_rate": 3.194146896317491e-05, "loss": 0.4619, "step": 76410 }, { "epoch": 18.915841584158414, "grad_norm": 0.35278892517089844, "learning_rate": 3.1928620814128176e-05, "loss": 0.4656, "step": 76420 }, { "epoch": 18.918316831683168, "grad_norm": 0.3621905446052551, "learning_rate": 3.19157740374834e-05, "loss": 0.4531, "step": 76430 }, { "epoch": 18.92079207920792, "grad_norm": 0.3427746593952179, "learning_rate": 3.190292863421625e-05, "loss": 0.4552, "step": 76440 }, { "epoch": 18.923267326732674, "grad_norm": 0.3471815288066864, "learning_rate": 3.189008460530223e-05, "loss": 0.4618, "step": 76450 }, { "epoch": 18.925742574257427, "grad_norm": 0.35620346665382385, "learning_rate": 3.187724195171677e-05, "loss": 0.4538, "step": 76460 }, { "epoch": 18.928217821782177, "grad_norm": 0.3524448275566101, "learning_rate": 3.1864400674435205e-05, "loss": 0.4527, "step": 76470 }, { "epoch": 18.93069306930693, "grad_norm": 0.3626071810722351, "learning_rate": 3.1851560774432705e-05, "loss": 0.4585, "step": 76480 }, { "epoch": 18.933168316831683, "grad_norm": 0.3688206672668457, "learning_rate": 3.18387222526844e-05, "loss": 0.4612, "step": 76490 }, { "epoch": 18.935643564356436, "grad_norm": 0.3655429780483246, "learning_rate": 3.182588511016529e-05, "loss": 0.4573, "step": 76500 }, { "epoch": 18.93811881188119, "grad_norm": 0.414021760225296, "learning_rate": 3.181304934785025e-05, "loss": 0.459, "step": 76510 }, { "epoch": 18.94059405940594, "grad_norm": 0.37040066719055176, "learning_rate": 3.18002149667141e-05, "loss": 0.4616, "step": 76520 }, { "epoch": 18.943069306930692, "grad_norm": 0.3638975918292999, "learning_rate": 3.1787381967731525e-05, "loss": 0.4633, "step": 76530 }, { "epoch": 18.945544554455445, "grad_norm": 0.3832348883152008, "learning_rate": 3.17745503518771e-05, "loss": 0.46, "step": 76540 }, { "epoch": 18.948019801980198, "grad_norm": 0.35277512669563293, "learning_rate": 3.176172012012527e-05, "loss": 0.4594, "step": 76550 }, { "epoch": 18.95049504950495, "grad_norm": 0.35409361124038696, "learning_rate": 3.174889127345045e-05, "loss": 0.4601, "step": 76560 }, { "epoch": 18.952970297029704, "grad_norm": 0.3652227818965912, "learning_rate": 3.173606381282689e-05, "loss": 0.4526, "step": 76570 }, { "epoch": 18.955445544554454, "grad_norm": 0.35817834734916687, "learning_rate": 3.172323773922875e-05, "loss": 0.4616, "step": 76580 }, { "epoch": 18.957920792079207, "grad_norm": 0.34626486897468567, "learning_rate": 3.1710413053630104e-05, "loss": 0.4681, "step": 76590 }, { "epoch": 18.96039603960396, "grad_norm": 0.3730912506580353, "learning_rate": 3.1697589757004874e-05, "loss": 0.4596, "step": 76600 }, { "epoch": 18.962871287128714, "grad_norm": 0.3774896562099457, "learning_rate": 3.1684767850326904e-05, "loss": 0.4567, "step": 76610 }, { "epoch": 18.965346534653467, "grad_norm": 0.37531113624572754, "learning_rate": 3.167194733456996e-05, "loss": 0.4621, "step": 76620 }, { "epoch": 18.967821782178216, "grad_norm": 0.37200844287872314, "learning_rate": 3.165912821070766e-05, "loss": 0.4579, "step": 76630 }, { "epoch": 18.97029702970297, "grad_norm": 0.34426024556159973, "learning_rate": 3.164631047971354e-05, "loss": 0.4542, "step": 76640 }, { "epoch": 18.972772277227723, "grad_norm": 0.36771994829177856, "learning_rate": 3.1633494142560996e-05, "loss": 0.4629, "step": 76650 }, { "epoch": 18.975247524752476, "grad_norm": 0.38174012303352356, "learning_rate": 3.162067920022338e-05, "loss": 0.4664, "step": 76660 }, { "epoch": 18.97772277227723, "grad_norm": 0.37848880887031555, "learning_rate": 3.160786565367388e-05, "loss": 0.4546, "step": 76670 }, { "epoch": 18.980198019801982, "grad_norm": 0.350805401802063, "learning_rate": 3.15950535038856e-05, "loss": 0.4549, "step": 76680 }, { "epoch": 18.98267326732673, "grad_norm": 0.3634800612926483, "learning_rate": 3.158224275183155e-05, "loss": 0.4616, "step": 76690 }, { "epoch": 18.985148514851485, "grad_norm": 0.358889639377594, "learning_rate": 3.156943339848463e-05, "loss": 0.4642, "step": 76700 }, { "epoch": 18.987623762376238, "grad_norm": 0.32970118522644043, "learning_rate": 3.155662544481758e-05, "loss": 0.4592, "step": 76710 }, { "epoch": 18.99009900990099, "grad_norm": 0.3296988606452942, "learning_rate": 3.154381889180315e-05, "loss": 0.466, "step": 76720 }, { "epoch": 18.992574257425744, "grad_norm": 0.3352426290512085, "learning_rate": 3.153101374041385e-05, "loss": 0.4548, "step": 76730 }, { "epoch": 18.995049504950494, "grad_norm": 0.3340758681297302, "learning_rate": 3.151820999162218e-05, "loss": 0.4639, "step": 76740 }, { "epoch": 18.997524752475247, "grad_norm": 0.36154186725616455, "learning_rate": 3.150540764640048e-05, "loss": 0.4558, "step": 76750 }, { "epoch": 19.0, "grad_norm": 0.4180958569049835, "learning_rate": 3.149260670572103e-05, "loss": 0.4597, "step": 76760 }, { "epoch": 19.002475247524753, "grad_norm": 0.349069207906723, "learning_rate": 3.147980717055596e-05, "loss": 0.4558, "step": 76770 }, { "epoch": 19.004950495049506, "grad_norm": 0.338290274143219, "learning_rate": 3.14670090418773e-05, "loss": 0.4594, "step": 76780 }, { "epoch": 19.007425742574256, "grad_norm": 0.36730435490608215, "learning_rate": 3.1454212320657015e-05, "loss": 0.4629, "step": 76790 }, { "epoch": 19.00990099009901, "grad_norm": 0.3475325107574463, "learning_rate": 3.144141700786691e-05, "loss": 0.4599, "step": 76800 }, { "epoch": 19.012376237623762, "grad_norm": 0.34617552161216736, "learning_rate": 3.142862310447871e-05, "loss": 0.4598, "step": 76810 }, { "epoch": 19.014851485148515, "grad_norm": 0.34601953625679016, "learning_rate": 3.1415830611464e-05, "loss": 0.4548, "step": 76820 }, { "epoch": 19.01732673267327, "grad_norm": 0.35931453108787537, "learning_rate": 3.140303952979434e-05, "loss": 0.4614, "step": 76830 }, { "epoch": 19.019801980198018, "grad_norm": 0.3576635718345642, "learning_rate": 3.1390249860441076e-05, "loss": 0.4593, "step": 76840 }, { "epoch": 19.02227722772277, "grad_norm": 0.3338426649570465, "learning_rate": 3.137746160437553e-05, "loss": 0.4587, "step": 76850 }, { "epoch": 19.024752475247524, "grad_norm": 0.3269326090812683, "learning_rate": 3.136467476256888e-05, "loss": 0.4593, "step": 76860 }, { "epoch": 19.027227722772277, "grad_norm": 0.3572040796279907, "learning_rate": 3.13518893359922e-05, "loss": 0.4606, "step": 76870 }, { "epoch": 19.02970297029703, "grad_norm": 0.34378379583358765, "learning_rate": 3.133910532561644e-05, "loss": 0.4583, "step": 76880 }, { "epoch": 19.032178217821784, "grad_norm": 0.3780091404914856, "learning_rate": 3.132632273241251e-05, "loss": 0.4643, "step": 76890 }, { "epoch": 19.034653465346533, "grad_norm": 0.3626440763473511, "learning_rate": 3.131354155735112e-05, "loss": 0.4578, "step": 76900 }, { "epoch": 19.037128712871286, "grad_norm": 0.36085644364356995, "learning_rate": 3.130076180140292e-05, "loss": 0.4602, "step": 76910 }, { "epoch": 19.03960396039604, "grad_norm": 0.35836413502693176, "learning_rate": 3.128798346553846e-05, "loss": 0.4555, "step": 76920 }, { "epoch": 19.042079207920793, "grad_norm": 0.3434407114982605, "learning_rate": 3.127520655072817e-05, "loss": 0.4603, "step": 76930 }, { "epoch": 19.044554455445546, "grad_norm": 0.3539215624332428, "learning_rate": 3.126243105794238e-05, "loss": 0.4607, "step": 76940 }, { "epoch": 19.047029702970296, "grad_norm": 0.3617514669895172, "learning_rate": 3.1249656988151265e-05, "loss": 0.4564, "step": 76950 }, { "epoch": 19.04950495049505, "grad_norm": 0.3472101390361786, "learning_rate": 3.123688434232499e-05, "loss": 0.4552, "step": 76960 }, { "epoch": 19.051980198019802, "grad_norm": 0.34013018012046814, "learning_rate": 3.122411312143352e-05, "loss": 0.4607, "step": 76970 }, { "epoch": 19.054455445544555, "grad_norm": 0.34675025939941406, "learning_rate": 3.121134332644673e-05, "loss": 0.461, "step": 76980 }, { "epoch": 19.056930693069308, "grad_norm": 0.3446463346481323, "learning_rate": 3.119857495833443e-05, "loss": 0.4588, "step": 76990 }, { "epoch": 19.059405940594058, "grad_norm": 0.3393343985080719, "learning_rate": 3.1185808018066295e-05, "loss": 0.4547, "step": 77000 }, { "epoch": 19.06188118811881, "grad_norm": 0.35166165232658386, "learning_rate": 3.117304250661187e-05, "loss": 0.4549, "step": 77010 }, { "epoch": 19.064356435643564, "grad_norm": 0.3414174020290375, "learning_rate": 3.11602784249406e-05, "loss": 0.4609, "step": 77020 }, { "epoch": 19.066831683168317, "grad_norm": 0.359482079744339, "learning_rate": 3.114751577402188e-05, "loss": 0.4621, "step": 77030 }, { "epoch": 19.06930693069307, "grad_norm": 0.3611217439174652, "learning_rate": 3.113475455482491e-05, "loss": 0.4479, "step": 77040 }, { "epoch": 19.071782178217823, "grad_norm": 0.3497098982334137, "learning_rate": 3.112199476831882e-05, "loss": 0.4562, "step": 77050 }, { "epoch": 19.074257425742573, "grad_norm": 0.3678768575191498, "learning_rate": 3.110923641547266e-05, "loss": 0.4602, "step": 77060 }, { "epoch": 19.076732673267326, "grad_norm": 0.37694358825683594, "learning_rate": 3.109647949725534e-05, "loss": 0.4577, "step": 77070 }, { "epoch": 19.07920792079208, "grad_norm": 0.36849522590637207, "learning_rate": 3.108372401463562e-05, "loss": 0.4531, "step": 77080 }, { "epoch": 19.081683168316832, "grad_norm": 0.34305673837661743, "learning_rate": 3.107096996858223e-05, "loss": 0.4563, "step": 77090 }, { "epoch": 19.084158415841586, "grad_norm": 0.34082916378974915, "learning_rate": 3.105821736006376e-05, "loss": 0.4547, "step": 77100 }, { "epoch": 19.086633663366335, "grad_norm": 0.3466210663318634, "learning_rate": 3.104546619004869e-05, "loss": 0.4559, "step": 77110 }, { "epoch": 19.08910891089109, "grad_norm": 0.35258835554122925, "learning_rate": 3.1032716459505334e-05, "loss": 0.4615, "step": 77120 }, { "epoch": 19.09158415841584, "grad_norm": 0.3577996492385864, "learning_rate": 3.1019968169402026e-05, "loss": 0.4611, "step": 77130 }, { "epoch": 19.094059405940595, "grad_norm": 0.32526493072509766, "learning_rate": 3.100722132070686e-05, "loss": 0.46, "step": 77140 }, { "epoch": 19.096534653465348, "grad_norm": 0.33847281336784363, "learning_rate": 3.099447591438789e-05, "loss": 0.4574, "step": 77150 }, { "epoch": 19.099009900990097, "grad_norm": 0.32679256796836853, "learning_rate": 3.098173195141306e-05, "loss": 0.4566, "step": 77160 }, { "epoch": 19.10148514851485, "grad_norm": 0.33706334233283997, "learning_rate": 3.0968989432750174e-05, "loss": 0.4599, "step": 77170 }, { "epoch": 19.103960396039604, "grad_norm": 0.3233673572540283, "learning_rate": 3.095624835936696e-05, "loss": 0.4596, "step": 77180 }, { "epoch": 19.106435643564357, "grad_norm": 0.34409332275390625, "learning_rate": 3.094350873223099e-05, "loss": 0.4584, "step": 77190 }, { "epoch": 19.10891089108911, "grad_norm": 0.33770912885665894, "learning_rate": 3.0930770552309775e-05, "loss": 0.46, "step": 77200 }, { "epoch": 19.111386138613863, "grad_norm": 0.41683223843574524, "learning_rate": 3.091803382057069e-05, "loss": 0.4579, "step": 77210 }, { "epoch": 19.113861386138613, "grad_norm": 0.36008667945861816, "learning_rate": 3.0905298537980994e-05, "loss": 0.457, "step": 77220 }, { "epoch": 19.116336633663366, "grad_norm": 0.34504199028015137, "learning_rate": 3.089256470550787e-05, "loss": 0.4563, "step": 77230 }, { "epoch": 19.11881188118812, "grad_norm": 0.3589993119239807, "learning_rate": 3.087983232411837e-05, "loss": 0.4548, "step": 77240 }, { "epoch": 19.121287128712872, "grad_norm": 0.3603547513484955, "learning_rate": 3.0867101394779394e-05, "loss": 0.4586, "step": 77250 }, { "epoch": 19.123762376237625, "grad_norm": 0.357075959444046, "learning_rate": 3.0854371918457836e-05, "loss": 0.457, "step": 77260 }, { "epoch": 19.126237623762375, "grad_norm": 0.35846927762031555, "learning_rate": 3.084164389612037e-05, "loss": 0.4618, "step": 77270 }, { "epoch": 19.128712871287128, "grad_norm": 0.34983590245246887, "learning_rate": 3.08289173287336e-05, "loss": 0.459, "step": 77280 }, { "epoch": 19.13118811881188, "grad_norm": 0.34119588136672974, "learning_rate": 3.081619221726404e-05, "loss": 0.4538, "step": 77290 }, { "epoch": 19.133663366336634, "grad_norm": 0.3227631449699402, "learning_rate": 3.080346856267811e-05, "loss": 0.4593, "step": 77300 }, { "epoch": 19.136138613861387, "grad_norm": 0.33111587166786194, "learning_rate": 3.0790746365942034e-05, "loss": 0.4561, "step": 77310 }, { "epoch": 19.138613861386137, "grad_norm": 0.35694023966789246, "learning_rate": 3.0778025628022e-05, "loss": 0.4613, "step": 77320 }, { "epoch": 19.14108910891089, "grad_norm": 0.3923538625240326, "learning_rate": 3.0765306349884074e-05, "loss": 0.4598, "step": 77330 }, { "epoch": 19.143564356435643, "grad_norm": 0.3379540741443634, "learning_rate": 3.075258853249419e-05, "loss": 0.457, "step": 77340 }, { "epoch": 19.146039603960396, "grad_norm": 0.34618955850601196, "learning_rate": 3.0739872176818165e-05, "loss": 0.4593, "step": 77350 }, { "epoch": 19.14851485148515, "grad_norm": 0.35361236333847046, "learning_rate": 3.072715728382176e-05, "loss": 0.4597, "step": 77360 }, { "epoch": 19.150990099009903, "grad_norm": 0.3576050102710724, "learning_rate": 3.0714443854470584e-05, "loss": 0.4612, "step": 77370 }, { "epoch": 19.153465346534652, "grad_norm": 0.3453976809978485, "learning_rate": 3.070173188973011e-05, "loss": 0.4638, "step": 77380 }, { "epoch": 19.155940594059405, "grad_norm": 0.34878653287887573, "learning_rate": 3.0689021390565734e-05, "loss": 0.46, "step": 77390 }, { "epoch": 19.15841584158416, "grad_norm": 0.34343284368515015, "learning_rate": 3.067631235794275e-05, "loss": 0.4532, "step": 77400 }, { "epoch": 19.16089108910891, "grad_norm": 0.33050259947776794, "learning_rate": 3.066360479282633e-05, "loss": 0.461, "step": 77410 }, { "epoch": 19.163366336633665, "grad_norm": 0.34337568283081055, "learning_rate": 3.0650898696181485e-05, "loss": 0.4617, "step": 77420 }, { "epoch": 19.165841584158414, "grad_norm": 0.352726548910141, "learning_rate": 3.063819406897322e-05, "loss": 0.4599, "step": 77430 }, { "epoch": 19.168316831683168, "grad_norm": 0.3718431293964386, "learning_rate": 3.062549091216633e-05, "loss": 0.4565, "step": 77440 }, { "epoch": 19.17079207920792, "grad_norm": 0.33070504665374756, "learning_rate": 3.0612789226725535e-05, "loss": 0.4634, "step": 77450 }, { "epoch": 19.173267326732674, "grad_norm": 0.41082891821861267, "learning_rate": 3.060008901361546e-05, "loss": 0.4545, "step": 77460 }, { "epoch": 19.175742574257427, "grad_norm": 0.368224561214447, "learning_rate": 3.05873902738006e-05, "loss": 0.4561, "step": 77470 }, { "epoch": 19.178217821782177, "grad_norm": 0.34532707929611206, "learning_rate": 3.057469300824535e-05, "loss": 0.4611, "step": 77480 }, { "epoch": 19.18069306930693, "grad_norm": 0.3496035635471344, "learning_rate": 3.056199721791394e-05, "loss": 0.457, "step": 77490 }, { "epoch": 19.183168316831683, "grad_norm": 0.3517247140407562, "learning_rate": 3.054930290377058e-05, "loss": 0.4596, "step": 77500 }, { "epoch": 19.185643564356436, "grad_norm": 0.33333781361579895, "learning_rate": 3.05366100667793e-05, "loss": 0.4572, "step": 77510 }, { "epoch": 19.18811881188119, "grad_norm": 0.33748114109039307, "learning_rate": 3.052391870790403e-05, "loss": 0.4594, "step": 77520 }, { "epoch": 19.190594059405942, "grad_norm": 0.33093973994255066, "learning_rate": 3.0511228828108605e-05, "loss": 0.4594, "step": 77530 }, { "epoch": 19.193069306930692, "grad_norm": 0.339750736951828, "learning_rate": 3.0498540428356747e-05, "loss": 0.4606, "step": 77540 }, { "epoch": 19.195544554455445, "grad_norm": 0.3385768234729767, "learning_rate": 3.048585350961204e-05, "loss": 0.4536, "step": 77550 }, { "epoch": 19.198019801980198, "grad_norm": 0.3466474711894989, "learning_rate": 3.0473168072837966e-05, "loss": 0.4563, "step": 77560 }, { "epoch": 19.20049504950495, "grad_norm": 0.40061643719673157, "learning_rate": 3.0460484118997924e-05, "loss": 0.4657, "step": 77570 }, { "epoch": 19.202970297029704, "grad_norm": 0.3466661870479584, "learning_rate": 3.0447801649055157e-05, "loss": 0.4573, "step": 77580 }, { "epoch": 19.205445544554454, "grad_norm": 0.33418676257133484, "learning_rate": 3.0435120663972814e-05, "loss": 0.4562, "step": 77590 }, { "epoch": 19.207920792079207, "grad_norm": 0.3528425097465515, "learning_rate": 3.0422441164713967e-05, "loss": 0.4579, "step": 77600 }, { "epoch": 19.21039603960396, "grad_norm": 0.35485395789146423, "learning_rate": 3.0409763152241487e-05, "loss": 0.4595, "step": 77610 }, { "epoch": 19.212871287128714, "grad_norm": 0.3494068384170532, "learning_rate": 3.0397086627518212e-05, "loss": 0.4539, "step": 77620 }, { "epoch": 19.215346534653467, "grad_norm": 0.3447993993759155, "learning_rate": 3.038441159150685e-05, "loss": 0.4617, "step": 77630 }, { "epoch": 19.217821782178216, "grad_norm": 0.38700369000434875, "learning_rate": 3.0371738045169973e-05, "loss": 0.4607, "step": 77640 }, { "epoch": 19.22029702970297, "grad_norm": 0.3505297005176544, "learning_rate": 3.0359065989470072e-05, "loss": 0.4568, "step": 77650 }, { "epoch": 19.222772277227723, "grad_norm": 0.3484156131744385, "learning_rate": 3.0346395425369457e-05, "loss": 0.4606, "step": 77660 }, { "epoch": 19.225247524752476, "grad_norm": 0.35514703392982483, "learning_rate": 3.0333726353830428e-05, "loss": 0.4582, "step": 77670 }, { "epoch": 19.22772277227723, "grad_norm": 0.3475865423679352, "learning_rate": 3.0321058775815093e-05, "loss": 0.4593, "step": 77680 }, { "epoch": 19.230198019801982, "grad_norm": 0.33294177055358887, "learning_rate": 3.030839269228547e-05, "loss": 0.4563, "step": 77690 }, { "epoch": 19.23267326732673, "grad_norm": 0.4163728952407837, "learning_rate": 3.029572810420347e-05, "loss": 0.4607, "step": 77700 }, { "epoch": 19.235148514851485, "grad_norm": 0.34176352620124817, "learning_rate": 3.02830650125309e-05, "loss": 0.4607, "step": 77710 }, { "epoch": 19.237623762376238, "grad_norm": 0.3358851671218872, "learning_rate": 3.0270403418229388e-05, "loss": 0.4581, "step": 77720 }, { "epoch": 19.24009900990099, "grad_norm": 0.3468569219112396, "learning_rate": 3.0257743322260564e-05, "loss": 0.4567, "step": 77730 }, { "epoch": 19.242574257425744, "grad_norm": 0.32398322224617004, "learning_rate": 3.0245084725585837e-05, "loss": 0.4614, "step": 77740 }, { "epoch": 19.245049504950494, "grad_norm": 0.33916032314300537, "learning_rate": 3.023242762916655e-05, "loss": 0.4608, "step": 77750 }, { "epoch": 19.247524752475247, "grad_norm": 0.3345005512237549, "learning_rate": 3.0219772033963922e-05, "loss": 0.4565, "step": 77760 }, { "epoch": 19.25, "grad_norm": 0.3382745683193207, "learning_rate": 3.020711794093909e-05, "loss": 0.4622, "step": 77770 }, { "epoch": 19.252475247524753, "grad_norm": 0.3616936206817627, "learning_rate": 3.0194465351053026e-05, "loss": 0.4608, "step": 77780 }, { "epoch": 19.254950495049506, "grad_norm": 0.35972726345062256, "learning_rate": 3.0181814265266593e-05, "loss": 0.4568, "step": 77790 }, { "epoch": 19.257425742574256, "grad_norm": 0.3345932364463806, "learning_rate": 3.0169164684540597e-05, "loss": 0.4555, "step": 77800 }, { "epoch": 19.25990099009901, "grad_norm": 0.3506533205509186, "learning_rate": 3.0156516609835667e-05, "loss": 0.4594, "step": 77810 }, { "epoch": 19.262376237623762, "grad_norm": 0.3696208894252777, "learning_rate": 3.0143870042112333e-05, "loss": 0.4593, "step": 77820 }, { "epoch": 19.264851485148515, "grad_norm": 0.3473582863807678, "learning_rate": 3.013122498233104e-05, "loss": 0.4585, "step": 77830 }, { "epoch": 19.26732673267327, "grad_norm": 0.3515170216560364, "learning_rate": 3.0118581431452096e-05, "loss": 0.4608, "step": 77840 }, { "epoch": 19.269801980198018, "grad_norm": 0.33867892622947693, "learning_rate": 3.0105939390435682e-05, "loss": 0.4567, "step": 77850 }, { "epoch": 19.27227722772277, "grad_norm": 0.34409308433532715, "learning_rate": 3.009329886024187e-05, "loss": 0.4634, "step": 77860 }, { "epoch": 19.274752475247524, "grad_norm": 0.345540851354599, "learning_rate": 3.008065984183065e-05, "loss": 0.4621, "step": 77870 }, { "epoch": 19.277227722772277, "grad_norm": 0.372707337141037, "learning_rate": 3.0068022336161848e-05, "loss": 0.4595, "step": 77880 }, { "epoch": 19.27970297029703, "grad_norm": 0.3284643590450287, "learning_rate": 3.0055386344195215e-05, "loss": 0.4538, "step": 77890 }, { "epoch": 19.282178217821784, "grad_norm": 0.3489438593387604, "learning_rate": 3.0042751866890373e-05, "loss": 0.4612, "step": 77900 }, { "epoch": 19.284653465346533, "grad_norm": 0.35534530878067017, "learning_rate": 3.0030118905206816e-05, "loss": 0.4538, "step": 77910 }, { "epoch": 19.287128712871286, "grad_norm": 0.3420029580593109, "learning_rate": 3.0017487460103926e-05, "loss": 0.4621, "step": 77920 }, { "epoch": 19.28960396039604, "grad_norm": 0.36925047636032104, "learning_rate": 3.0004857532541002e-05, "loss": 0.4583, "step": 77930 }, { "epoch": 19.292079207920793, "grad_norm": 0.38361856341362, "learning_rate": 2.999222912347719e-05, "loss": 0.4598, "step": 77940 }, { "epoch": 19.294554455445546, "grad_norm": 0.3585953712463379, "learning_rate": 2.9979602233871552e-05, "loss": 0.4592, "step": 77950 }, { "epoch": 19.297029702970296, "grad_norm": 0.35454896092414856, "learning_rate": 2.9966976864682966e-05, "loss": 0.4613, "step": 77960 }, { "epoch": 19.29950495049505, "grad_norm": 0.3390122950077057, "learning_rate": 2.995435301687031e-05, "loss": 0.4625, "step": 77970 }, { "epoch": 19.301980198019802, "grad_norm": 0.36236098408699036, "learning_rate": 2.9941730691392246e-05, "loss": 0.4555, "step": 77980 }, { "epoch": 19.304455445544555, "grad_norm": 0.3450169265270233, "learning_rate": 2.992910988920735e-05, "loss": 0.4606, "step": 77990 }, { "epoch": 19.306930693069308, "grad_norm": 0.3553237020969391, "learning_rate": 2.9916490611274117e-05, "loss": 0.4547, "step": 78000 }, { "epoch": 19.309405940594058, "grad_norm": 0.3556491732597351, "learning_rate": 2.9903872858550885e-05, "loss": 0.4578, "step": 78010 }, { "epoch": 19.31188118811881, "grad_norm": 0.34488359093666077, "learning_rate": 2.989125663199588e-05, "loss": 0.4595, "step": 78020 }, { "epoch": 19.314356435643564, "grad_norm": 0.33244380354881287, "learning_rate": 2.987864193256722e-05, "loss": 0.4578, "step": 78030 }, { "epoch": 19.316831683168317, "grad_norm": 0.3268295228481293, "learning_rate": 2.986602876122292e-05, "loss": 0.4563, "step": 78040 }, { "epoch": 19.31930693069307, "grad_norm": 0.3537699282169342, "learning_rate": 2.985341711892087e-05, "loss": 0.4607, "step": 78050 }, { "epoch": 19.321782178217823, "grad_norm": 0.34881532192230225, "learning_rate": 2.984080700661882e-05, "loss": 0.4672, "step": 78060 }, { "epoch": 19.324257425742573, "grad_norm": 0.3472887873649597, "learning_rate": 2.9828198425274444e-05, "loss": 0.4604, "step": 78070 }, { "epoch": 19.326732673267326, "grad_norm": 0.3411482274532318, "learning_rate": 2.981559137584529e-05, "loss": 0.4601, "step": 78080 }, { "epoch": 19.32920792079208, "grad_norm": 0.33559536933898926, "learning_rate": 2.9802985859288746e-05, "loss": 0.4608, "step": 78090 }, { "epoch": 19.331683168316832, "grad_norm": 0.3352382779121399, "learning_rate": 2.9790381876562152e-05, "loss": 0.4607, "step": 78100 }, { "epoch": 19.334158415841586, "grad_norm": 0.34450310468673706, "learning_rate": 2.9777779428622677e-05, "loss": 0.4609, "step": 78110 }, { "epoch": 19.336633663366335, "grad_norm": 0.3673448860645294, "learning_rate": 2.9765178516427407e-05, "loss": 0.4562, "step": 78120 }, { "epoch": 19.33910891089109, "grad_norm": 0.36490893363952637, "learning_rate": 2.9752579140933268e-05, "loss": 0.4621, "step": 78130 }, { "epoch": 19.34158415841584, "grad_norm": 0.3365243077278137, "learning_rate": 2.973998130309714e-05, "loss": 0.4556, "step": 78140 }, { "epoch": 19.344059405940595, "grad_norm": 0.32585856318473816, "learning_rate": 2.972738500387573e-05, "loss": 0.4572, "step": 78150 }, { "epoch": 19.346534653465348, "grad_norm": 0.3351055681705475, "learning_rate": 2.9714790244225628e-05, "loss": 0.4607, "step": 78160 }, { "epoch": 19.349009900990097, "grad_norm": 0.3325420618057251, "learning_rate": 2.970219702510334e-05, "loss": 0.4586, "step": 78170 }, { "epoch": 19.35148514851485, "grad_norm": 0.3099993169307709, "learning_rate": 2.9689605347465242e-05, "loss": 0.457, "step": 78180 }, { "epoch": 19.353960396039604, "grad_norm": 0.3489314317703247, "learning_rate": 2.9677015212267566e-05, "loss": 0.4608, "step": 78190 }, { "epoch": 19.356435643564357, "grad_norm": 0.3379907011985779, "learning_rate": 2.966442662046649e-05, "loss": 0.4614, "step": 78200 }, { "epoch": 19.35891089108911, "grad_norm": 0.3533095419406891, "learning_rate": 2.9651839573017992e-05, "loss": 0.4607, "step": 78210 }, { "epoch": 19.361386138613863, "grad_norm": 0.36166107654571533, "learning_rate": 2.9639254070877996e-05, "loss": 0.4628, "step": 78220 }, { "epoch": 19.363861386138613, "grad_norm": 0.34316152334213257, "learning_rate": 2.9626670115002277e-05, "loss": 0.4592, "step": 78230 }, { "epoch": 19.366336633663366, "grad_norm": 0.3407367169857025, "learning_rate": 2.9614087706346517e-05, "loss": 0.4603, "step": 78240 }, { "epoch": 19.36881188118812, "grad_norm": 0.37594765424728394, "learning_rate": 2.960150684586627e-05, "loss": 0.4569, "step": 78250 }, { "epoch": 19.371287128712872, "grad_norm": 0.3593400716781616, "learning_rate": 2.9588927534516924e-05, "loss": 0.4579, "step": 78260 }, { "epoch": 19.373762376237625, "grad_norm": 0.34377163648605347, "learning_rate": 2.957634977325387e-05, "loss": 0.4595, "step": 78270 }, { "epoch": 19.376237623762375, "grad_norm": 0.3626748323440552, "learning_rate": 2.956377356303225e-05, "loss": 0.46, "step": 78280 }, { "epoch": 19.378712871287128, "grad_norm": 0.3442298471927643, "learning_rate": 2.9551198904807154e-05, "loss": 0.462, "step": 78290 }, { "epoch": 19.38118811881188, "grad_norm": 0.36291155219078064, "learning_rate": 2.953862579953356e-05, "loss": 0.4534, "step": 78300 }, { "epoch": 19.383663366336634, "grad_norm": 0.36009538173675537, "learning_rate": 2.9526054248166314e-05, "loss": 0.4566, "step": 78310 }, { "epoch": 19.386138613861387, "grad_norm": 0.36779916286468506, "learning_rate": 2.9513484251660123e-05, "loss": 0.4568, "step": 78320 }, { "epoch": 19.388613861386137, "grad_norm": 0.3735370635986328, "learning_rate": 2.9500915810969586e-05, "loss": 0.4595, "step": 78330 }, { "epoch": 19.39108910891089, "grad_norm": 0.3532697558403015, "learning_rate": 2.948834892704923e-05, "loss": 0.4576, "step": 78340 }, { "epoch": 19.393564356435643, "grad_norm": 0.36638009548187256, "learning_rate": 2.9475783600853406e-05, "loss": 0.4619, "step": 78350 }, { "epoch": 19.396039603960396, "grad_norm": 0.34425315260887146, "learning_rate": 2.9463219833336358e-05, "loss": 0.456, "step": 78360 }, { "epoch": 19.39851485148515, "grad_norm": 0.3613905906677246, "learning_rate": 2.9450657625452243e-05, "loss": 0.4594, "step": 78370 }, { "epoch": 19.400990099009903, "grad_norm": 0.3606683909893036, "learning_rate": 2.9438096978155078e-05, "loss": 0.4536, "step": 78380 }, { "epoch": 19.403465346534652, "grad_norm": 0.3340111970901489, "learning_rate": 2.9425537892398737e-05, "loss": 0.4585, "step": 78390 }, { "epoch": 19.405940594059405, "grad_norm": 0.32319459319114685, "learning_rate": 2.9412980369137012e-05, "loss": 0.4577, "step": 78400 }, { "epoch": 19.40841584158416, "grad_norm": 0.33605730533599854, "learning_rate": 2.940042440932357e-05, "loss": 0.4563, "step": 78410 }, { "epoch": 19.41089108910891, "grad_norm": 0.3286687731742859, "learning_rate": 2.9387870013911955e-05, "loss": 0.4561, "step": 78420 }, { "epoch": 19.413366336633665, "grad_norm": 0.33756768703460693, "learning_rate": 2.9375317183855555e-05, "loss": 0.4597, "step": 78430 }, { "epoch": 19.415841584158414, "grad_norm": 0.3318008482456207, "learning_rate": 2.9362765920107737e-05, "loss": 0.4624, "step": 78440 }, { "epoch": 19.418316831683168, "grad_norm": 0.34587427973747253, "learning_rate": 2.9350216223621648e-05, "loss": 0.4571, "step": 78450 }, { "epoch": 19.42079207920792, "grad_norm": 0.3328779637813568, "learning_rate": 2.9337668095350344e-05, "loss": 0.4647, "step": 78460 }, { "epoch": 19.423267326732674, "grad_norm": 0.37308451533317566, "learning_rate": 2.9325121536246803e-05, "loss": 0.4584, "step": 78470 }, { "epoch": 19.425742574257427, "grad_norm": 0.34891417622566223, "learning_rate": 2.9312576547263836e-05, "loss": 0.4533, "step": 78480 }, { "epoch": 19.428217821782177, "grad_norm": 0.34190884232521057, "learning_rate": 2.9300033129354167e-05, "loss": 0.4561, "step": 78490 }, { "epoch": 19.43069306930693, "grad_norm": 0.3449145257472992, "learning_rate": 2.9287491283470346e-05, "loss": 0.4573, "step": 78500 }, { "epoch": 19.433168316831683, "grad_norm": 0.3470601737499237, "learning_rate": 2.927495101056489e-05, "loss": 0.4543, "step": 78510 }, { "epoch": 19.435643564356436, "grad_norm": 0.32498690485954285, "learning_rate": 2.9262412311590125e-05, "loss": 0.4595, "step": 78520 }, { "epoch": 19.43811881188119, "grad_norm": 0.35643574595451355, "learning_rate": 2.924987518749829e-05, "loss": 0.4607, "step": 78530 }, { "epoch": 19.440594059405942, "grad_norm": 0.35092535614967346, "learning_rate": 2.923733963924149e-05, "loss": 0.4578, "step": 78540 }, { "epoch": 19.443069306930692, "grad_norm": 0.33974114060401917, "learning_rate": 2.922480566777174e-05, "loss": 0.4555, "step": 78550 }, { "epoch": 19.445544554455445, "grad_norm": 0.3462543189525604, "learning_rate": 2.921227327404087e-05, "loss": 0.4573, "step": 78560 }, { "epoch": 19.448019801980198, "grad_norm": 0.37957504391670227, "learning_rate": 2.9199742459000685e-05, "loss": 0.4567, "step": 78570 }, { "epoch": 19.45049504950495, "grad_norm": 0.35431694984436035, "learning_rate": 2.9187213223602776e-05, "loss": 0.459, "step": 78580 }, { "epoch": 19.452970297029704, "grad_norm": 0.36613214015960693, "learning_rate": 2.9174685568798687e-05, "loss": 0.4592, "step": 78590 }, { "epoch": 19.455445544554454, "grad_norm": 0.3620545268058777, "learning_rate": 2.916215949553977e-05, "loss": 0.4567, "step": 78600 }, { "epoch": 19.457920792079207, "grad_norm": 0.3446720838546753, "learning_rate": 2.9149635004777322e-05, "loss": 0.4588, "step": 78610 }, { "epoch": 19.46039603960396, "grad_norm": 0.3276040256023407, "learning_rate": 2.9137112097462517e-05, "loss": 0.4629, "step": 78620 }, { "epoch": 19.462871287128714, "grad_norm": 0.3499205708503723, "learning_rate": 2.912459077454634e-05, "loss": 0.4633, "step": 78630 }, { "epoch": 19.465346534653467, "grad_norm": 0.32818982005119324, "learning_rate": 2.9112071036979733e-05, "loss": 0.4569, "step": 78640 }, { "epoch": 19.467821782178216, "grad_norm": 0.3578893840312958, "learning_rate": 2.90995528857135e-05, "loss": 0.4533, "step": 78650 }, { "epoch": 19.47029702970297, "grad_norm": 0.37108689546585083, "learning_rate": 2.9087036321698275e-05, "loss": 0.4592, "step": 78660 }, { "epoch": 19.472772277227723, "grad_norm": 0.3750417232513428, "learning_rate": 2.9074521345884642e-05, "loss": 0.4593, "step": 78670 }, { "epoch": 19.475247524752476, "grad_norm": 0.3466266393661499, "learning_rate": 2.9062007959223003e-05, "loss": 0.4535, "step": 78680 }, { "epoch": 19.47772277227723, "grad_norm": 0.3453145921230316, "learning_rate": 2.90494961626637e-05, "loss": 0.4584, "step": 78690 }, { "epoch": 19.480198019801982, "grad_norm": 0.34575197100639343, "learning_rate": 2.9036985957156882e-05, "loss": 0.4586, "step": 78700 }, { "epoch": 19.48267326732673, "grad_norm": 0.3555418848991394, "learning_rate": 2.9024477343652658e-05, "loss": 0.4591, "step": 78710 }, { "epoch": 19.485148514851485, "grad_norm": 0.3620103895664215, "learning_rate": 2.9011970323100935e-05, "loss": 0.4621, "step": 78720 }, { "epoch": 19.487623762376238, "grad_norm": 0.33692818880081177, "learning_rate": 2.8999464896451557e-05, "loss": 0.4583, "step": 78730 }, { "epoch": 19.49009900990099, "grad_norm": 0.3419765830039978, "learning_rate": 2.898696106465424e-05, "loss": 0.4615, "step": 78740 }, { "epoch": 19.492574257425744, "grad_norm": 0.3752687871456146, "learning_rate": 2.8974458828658546e-05, "loss": 0.4576, "step": 78750 }, { "epoch": 19.495049504950494, "grad_norm": 0.33076679706573486, "learning_rate": 2.896195818941396e-05, "loss": 0.4663, "step": 78760 }, { "epoch": 19.497524752475247, "grad_norm": 0.34246739745140076, "learning_rate": 2.8949459147869797e-05, "loss": 0.4585, "step": 78770 }, { "epoch": 19.5, "grad_norm": 0.3425825536251068, "learning_rate": 2.8936961704975308e-05, "loss": 0.4595, "step": 78780 }, { "epoch": 19.502475247524753, "grad_norm": 0.3420809507369995, "learning_rate": 2.892446586167955e-05, "loss": 0.4619, "step": 78790 }, { "epoch": 19.504950495049506, "grad_norm": 0.3501681685447693, "learning_rate": 2.8911971618931526e-05, "loss": 0.4579, "step": 78800 }, { "epoch": 19.507425742574256, "grad_norm": 0.3651089072227478, "learning_rate": 2.8899478977680112e-05, "loss": 0.4563, "step": 78810 }, { "epoch": 19.50990099009901, "grad_norm": 0.3494352400302887, "learning_rate": 2.888698793887401e-05, "loss": 0.4553, "step": 78820 }, { "epoch": 19.512376237623762, "grad_norm": 0.34833869338035583, "learning_rate": 2.8874498503461812e-05, "loss": 0.457, "step": 78830 }, { "epoch": 19.514851485148515, "grad_norm": 0.34823718667030334, "learning_rate": 2.886201067239207e-05, "loss": 0.4604, "step": 78840 }, { "epoch": 19.51732673267327, "grad_norm": 0.3448064923286438, "learning_rate": 2.884952444661312e-05, "loss": 0.4614, "step": 78850 }, { "epoch": 19.519801980198018, "grad_norm": 0.34733399748802185, "learning_rate": 2.883703982707318e-05, "loss": 0.4586, "step": 78860 }, { "epoch": 19.52227722772277, "grad_norm": 0.36027806997299194, "learning_rate": 2.882455681472041e-05, "loss": 0.4599, "step": 78870 }, { "epoch": 19.524752475247524, "grad_norm": 0.3695180416107178, "learning_rate": 2.8812075410502825e-05, "loss": 0.459, "step": 78880 }, { "epoch": 19.527227722772277, "grad_norm": 0.32832619547843933, "learning_rate": 2.879959561536828e-05, "loss": 0.4585, "step": 78890 }, { "epoch": 19.52970297029703, "grad_norm": 0.3400978446006775, "learning_rate": 2.878711743026452e-05, "loss": 0.4614, "step": 78900 }, { "epoch": 19.532178217821784, "grad_norm": 0.34555989503860474, "learning_rate": 2.877464085613919e-05, "loss": 0.4524, "step": 78910 }, { "epoch": 19.534653465346533, "grad_norm": 0.34964683651924133, "learning_rate": 2.876216589393984e-05, "loss": 0.4579, "step": 78920 }, { "epoch": 19.537128712871286, "grad_norm": 0.36120203137397766, "learning_rate": 2.8749692544613815e-05, "loss": 0.4623, "step": 78930 }, { "epoch": 19.53960396039604, "grad_norm": 0.3491654694080353, "learning_rate": 2.87372208091084e-05, "loss": 0.4566, "step": 78940 }, { "epoch": 19.542079207920793, "grad_norm": 0.3325408697128296, "learning_rate": 2.872475068837076e-05, "loss": 0.4593, "step": 78950 }, { "epoch": 19.544554455445546, "grad_norm": 0.3263935446739197, "learning_rate": 2.8712282183347898e-05, "loss": 0.4552, "step": 78960 }, { "epoch": 19.547029702970296, "grad_norm": 0.340201735496521, "learning_rate": 2.8699815294986705e-05, "loss": 0.459, "step": 78970 }, { "epoch": 19.54950495049505, "grad_norm": 0.33766576647758484, "learning_rate": 2.8687350024233967e-05, "loss": 0.4586, "step": 78980 }, { "epoch": 19.551980198019802, "grad_norm": 0.34251829981803894, "learning_rate": 2.8674886372036363e-05, "loss": 0.4572, "step": 78990 }, { "epoch": 19.554455445544555, "grad_norm": 0.3462251126766205, "learning_rate": 2.8662424339340387e-05, "loss": 0.4595, "step": 79000 }, { "epoch": 19.556930693069308, "grad_norm": 0.3372035622596741, "learning_rate": 2.8649963927092483e-05, "loss": 0.4639, "step": 79010 }, { "epoch": 19.55940594059406, "grad_norm": 0.32094815373420715, "learning_rate": 2.8637505136238905e-05, "loss": 0.4589, "step": 79020 }, { "epoch": 19.56188118811881, "grad_norm": 0.3510250151157379, "learning_rate": 2.862504796772583e-05, "loss": 0.4605, "step": 79030 }, { "epoch": 19.564356435643564, "grad_norm": 0.3675455152988434, "learning_rate": 2.861259242249932e-05, "loss": 0.4557, "step": 79040 }, { "epoch": 19.566831683168317, "grad_norm": 0.33869150280952454, "learning_rate": 2.860013850150525e-05, "loss": 0.4551, "step": 79050 }, { "epoch": 19.56930693069307, "grad_norm": 0.36913371086120605, "learning_rate": 2.8587686205689458e-05, "loss": 0.4594, "step": 79060 }, { "epoch": 19.571782178217823, "grad_norm": 0.36490073800086975, "learning_rate": 2.857523553599757e-05, "loss": 0.4596, "step": 79070 }, { "epoch": 19.574257425742573, "grad_norm": 0.3417479991912842, "learning_rate": 2.856278649337516e-05, "loss": 0.4648, "step": 79080 }, { "epoch": 19.576732673267326, "grad_norm": 0.3549274802207947, "learning_rate": 2.8550339078767633e-05, "loss": 0.4627, "step": 79090 }, { "epoch": 19.57920792079208, "grad_norm": 0.3732544481754303, "learning_rate": 2.85378932931203e-05, "loss": 0.4632, "step": 79100 }, { "epoch": 19.581683168316832, "grad_norm": 0.32790517807006836, "learning_rate": 2.8525449137378346e-05, "loss": 0.4548, "step": 79110 }, { "epoch": 19.584158415841586, "grad_norm": 0.36429232358932495, "learning_rate": 2.8513006612486813e-05, "loss": 0.4555, "step": 79120 }, { "epoch": 19.586633663366335, "grad_norm": 0.3350070118904114, "learning_rate": 2.8500565719390588e-05, "loss": 0.4588, "step": 79130 }, { "epoch": 19.58910891089109, "grad_norm": 0.38554811477661133, "learning_rate": 2.8488126459034547e-05, "loss": 0.4633, "step": 79140 }, { "epoch": 19.59158415841584, "grad_norm": 0.3328046500682831, "learning_rate": 2.8475688832363346e-05, "loss": 0.4588, "step": 79150 }, { "epoch": 19.594059405940595, "grad_norm": 0.3214871883392334, "learning_rate": 2.8463252840321496e-05, "loss": 0.4539, "step": 79160 }, { "epoch": 19.596534653465348, "grad_norm": 0.3407209515571594, "learning_rate": 2.8450818483853474e-05, "loss": 0.4552, "step": 79170 }, { "epoch": 19.599009900990097, "grad_norm": 0.3505622446537018, "learning_rate": 2.8438385763903597e-05, "loss": 0.4585, "step": 79180 }, { "epoch": 19.60148514851485, "grad_norm": 0.3455846905708313, "learning_rate": 2.8425954681416023e-05, "loss": 0.462, "step": 79190 }, { "epoch": 19.603960396039604, "grad_norm": 0.32678893208503723, "learning_rate": 2.8413525237334793e-05, "loss": 0.4578, "step": 79200 }, { "epoch": 19.606435643564357, "grad_norm": 0.34322062134742737, "learning_rate": 2.8401097432603863e-05, "loss": 0.4599, "step": 79210 }, { "epoch": 19.60891089108911, "grad_norm": 0.33050671219825745, "learning_rate": 2.8388671268167062e-05, "loss": 0.4564, "step": 79220 }, { "epoch": 19.611386138613863, "grad_norm": 0.33799436688423157, "learning_rate": 2.8376246744968056e-05, "loss": 0.4574, "step": 79230 }, { "epoch": 19.613861386138613, "grad_norm": 0.35380035638809204, "learning_rate": 2.8363823863950368e-05, "loss": 0.4587, "step": 79240 }, { "epoch": 19.616336633663366, "grad_norm": 0.34975123405456543, "learning_rate": 2.835140262605751e-05, "loss": 0.4631, "step": 79250 }, { "epoch": 19.61881188118812, "grad_norm": 0.3672381639480591, "learning_rate": 2.8338983032232752e-05, "loss": 0.4658, "step": 79260 }, { "epoch": 19.621287128712872, "grad_norm": 0.3271930515766144, "learning_rate": 2.8326565083419266e-05, "loss": 0.4608, "step": 79270 }, { "epoch": 19.623762376237625, "grad_norm": 0.35748612880706787, "learning_rate": 2.8314148780560124e-05, "loss": 0.4618, "step": 79280 }, { "epoch": 19.626237623762375, "grad_norm": 0.3632723093032837, "learning_rate": 2.8301734124598296e-05, "loss": 0.4649, "step": 79290 }, { "epoch": 19.628712871287128, "grad_norm": 0.3391232192516327, "learning_rate": 2.8289321116476542e-05, "loss": 0.4583, "step": 79300 }, { "epoch": 19.63118811881188, "grad_norm": 0.33866047859191895, "learning_rate": 2.8276909757137586e-05, "loss": 0.4597, "step": 79310 }, { "epoch": 19.633663366336634, "grad_norm": 0.35798078775405884, "learning_rate": 2.8264500047523963e-05, "loss": 0.4566, "step": 79320 }, { "epoch": 19.636138613861387, "grad_norm": 0.34011268615722656, "learning_rate": 2.825209198857813e-05, "loss": 0.4583, "step": 79330 }, { "epoch": 19.638613861386137, "grad_norm": 0.34036126732826233, "learning_rate": 2.8239685581242376e-05, "loss": 0.4549, "step": 79340 }, { "epoch": 19.64108910891089, "grad_norm": 0.33445072174072266, "learning_rate": 2.822728082645889e-05, "loss": 0.46, "step": 79350 }, { "epoch": 19.643564356435643, "grad_norm": 0.340475469827652, "learning_rate": 2.8214877725169765e-05, "loss": 0.457, "step": 79360 }, { "epoch": 19.646039603960396, "grad_norm": 0.327628493309021, "learning_rate": 2.820247627831688e-05, "loss": 0.4555, "step": 79370 }, { "epoch": 19.64851485148515, "grad_norm": 0.3417118787765503, "learning_rate": 2.8190076486842094e-05, "loss": 0.4574, "step": 79380 }, { "epoch": 19.650990099009903, "grad_norm": 0.34019792079925537, "learning_rate": 2.8177678351687042e-05, "loss": 0.4622, "step": 79390 }, { "epoch": 19.653465346534652, "grad_norm": 0.3396281599998474, "learning_rate": 2.8165281873793307e-05, "loss": 0.4619, "step": 79400 }, { "epoch": 19.655940594059405, "grad_norm": 0.3537454605102539, "learning_rate": 2.815288705410234e-05, "loss": 0.461, "step": 79410 }, { "epoch": 19.65841584158416, "grad_norm": 0.34312236309051514, "learning_rate": 2.814049389355542e-05, "loss": 0.4596, "step": 79420 }, { "epoch": 19.66089108910891, "grad_norm": 0.33189862966537476, "learning_rate": 2.8128102393093708e-05, "loss": 0.4624, "step": 79430 }, { "epoch": 19.663366336633665, "grad_norm": 0.34706175327301025, "learning_rate": 2.811571255365828e-05, "loss": 0.4608, "step": 79440 }, { "epoch": 19.665841584158414, "grad_norm": 0.3540433943271637, "learning_rate": 2.8103324376190078e-05, "loss": 0.4551, "step": 79450 }, { "epoch": 19.668316831683168, "grad_norm": 0.3213331699371338, "learning_rate": 2.8090937861629863e-05, "loss": 0.4639, "step": 79460 }, { "epoch": 19.67079207920792, "grad_norm": 0.34824979305267334, "learning_rate": 2.807855301091833e-05, "loss": 0.4567, "step": 79470 }, { "epoch": 19.673267326732674, "grad_norm": 0.3406839668750763, "learning_rate": 2.8066169824996048e-05, "loss": 0.4551, "step": 79480 }, { "epoch": 19.675742574257427, "grad_norm": 0.3497585654258728, "learning_rate": 2.805378830480342e-05, "loss": 0.4638, "step": 79490 }, { "epoch": 19.678217821782177, "grad_norm": 0.3312886655330658, "learning_rate": 2.8041408451280713e-05, "loss": 0.4613, "step": 79500 }, { "epoch": 19.68069306930693, "grad_norm": 0.32964807748794556, "learning_rate": 2.8029030265368122e-05, "loss": 0.4576, "step": 79510 }, { "epoch": 19.683168316831683, "grad_norm": 0.3329312801361084, "learning_rate": 2.8016653748005706e-05, "loss": 0.4522, "step": 79520 }, { "epoch": 19.685643564356436, "grad_norm": 0.34490686655044556, "learning_rate": 2.8004278900133363e-05, "loss": 0.4571, "step": 79530 }, { "epoch": 19.68811881188119, "grad_norm": 0.3459480404853821, "learning_rate": 2.7991905722690837e-05, "loss": 0.4609, "step": 79540 }, { "epoch": 19.69059405940594, "grad_norm": 0.38420748710632324, "learning_rate": 2.7979534216617863e-05, "loss": 0.4565, "step": 79550 }, { "epoch": 19.693069306930692, "grad_norm": 0.3556305468082428, "learning_rate": 2.7967164382853956e-05, "loss": 0.4545, "step": 79560 }, { "epoch": 19.695544554455445, "grad_norm": 0.3436272442340851, "learning_rate": 2.7954796222338486e-05, "loss": 0.4548, "step": 79570 }, { "epoch": 19.698019801980198, "grad_norm": 0.34290963411331177, "learning_rate": 2.7942429736010755e-05, "loss": 0.4552, "step": 79580 }, { "epoch": 19.70049504950495, "grad_norm": 0.3444775938987732, "learning_rate": 2.7930064924809935e-05, "loss": 0.4591, "step": 79590 }, { "epoch": 19.702970297029704, "grad_norm": 0.35135963559150696, "learning_rate": 2.791770178967504e-05, "loss": 0.4581, "step": 79600 }, { "epoch": 19.705445544554454, "grad_norm": 0.3434872329235077, "learning_rate": 2.790534033154494e-05, "loss": 0.4601, "step": 79610 }, { "epoch": 19.707920792079207, "grad_norm": 0.344655305147171, "learning_rate": 2.7892980551358432e-05, "loss": 0.455, "step": 79620 }, { "epoch": 19.71039603960396, "grad_norm": 0.32583367824554443, "learning_rate": 2.7880622450054177e-05, "loss": 0.4572, "step": 79630 }, { "epoch": 19.712871287128714, "grad_norm": 0.31571003794670105, "learning_rate": 2.7868266028570654e-05, "loss": 0.4566, "step": 79640 }, { "epoch": 19.715346534653467, "grad_norm": 0.3358837366104126, "learning_rate": 2.785591128784627e-05, "loss": 0.4559, "step": 79650 }, { "epoch": 19.717821782178216, "grad_norm": 0.35841381549835205, "learning_rate": 2.78435582288193e-05, "loss": 0.4615, "step": 79660 }, { "epoch": 19.72029702970297, "grad_norm": 0.35376715660095215, "learning_rate": 2.7831206852427848e-05, "loss": 0.4595, "step": 79670 }, { "epoch": 19.722772277227723, "grad_norm": 0.3298497498035431, "learning_rate": 2.7818857159609947e-05, "loss": 0.4606, "step": 79680 }, { "epoch": 19.725247524752476, "grad_norm": 0.34296420216560364, "learning_rate": 2.780650915130345e-05, "loss": 0.4595, "step": 79690 }, { "epoch": 19.72772277227723, "grad_norm": 0.3378806412220001, "learning_rate": 2.7794162828446136e-05, "loss": 0.4594, "step": 79700 }, { "epoch": 19.730198019801982, "grad_norm": 0.32478681206703186, "learning_rate": 2.7781818191975584e-05, "loss": 0.4593, "step": 79710 }, { "epoch": 19.73267326732673, "grad_norm": 0.3390493392944336, "learning_rate": 2.776947524282934e-05, "loss": 0.4518, "step": 79720 }, { "epoch": 19.735148514851485, "grad_norm": 0.33103644847869873, "learning_rate": 2.7757133981944722e-05, "loss": 0.456, "step": 79730 }, { "epoch": 19.737623762376238, "grad_norm": 0.35982993245124817, "learning_rate": 2.774479441025899e-05, "loss": 0.4551, "step": 79740 }, { "epoch": 19.74009900990099, "grad_norm": 0.34880343079566956, "learning_rate": 2.773245652870926e-05, "loss": 0.4592, "step": 79750 }, { "epoch": 19.742574257425744, "grad_norm": 0.34804877638816833, "learning_rate": 2.772012033823249e-05, "loss": 0.4525, "step": 79760 }, { "epoch": 19.745049504950494, "grad_norm": 0.3610893189907074, "learning_rate": 2.7707785839765555e-05, "loss": 0.4579, "step": 79770 }, { "epoch": 19.747524752475247, "grad_norm": 0.3655250668525696, "learning_rate": 2.7695453034245177e-05, "loss": 0.4648, "step": 79780 }, { "epoch": 19.75, "grad_norm": 0.33999624848365784, "learning_rate": 2.7683121922607948e-05, "loss": 0.4603, "step": 79790 }, { "epoch": 19.752475247524753, "grad_norm": 0.33541667461395264, "learning_rate": 2.767079250579031e-05, "loss": 0.4566, "step": 79800 }, { "epoch": 19.754950495049506, "grad_norm": 0.3312552571296692, "learning_rate": 2.765846478472862e-05, "loss": 0.4653, "step": 79810 }, { "epoch": 19.757425742574256, "grad_norm": 0.3242460787296295, "learning_rate": 2.7646138760359115e-05, "loss": 0.4582, "step": 79820 }, { "epoch": 19.75990099009901, "grad_norm": 0.32922548055648804, "learning_rate": 2.7633814433617844e-05, "loss": 0.4601, "step": 79830 }, { "epoch": 19.762376237623762, "grad_norm": 0.35747215151786804, "learning_rate": 2.7621491805440724e-05, "loss": 0.458, "step": 79840 }, { "epoch": 19.764851485148515, "grad_norm": 0.33247876167297363, "learning_rate": 2.7609170876763656e-05, "loss": 0.4651, "step": 79850 }, { "epoch": 19.76732673267327, "grad_norm": 0.3718802034854889, "learning_rate": 2.759685164852229e-05, "loss": 0.4569, "step": 79860 }, { "epoch": 19.769801980198018, "grad_norm": 0.3443586826324463, "learning_rate": 2.7584534121652173e-05, "loss": 0.4573, "step": 79870 }, { "epoch": 19.77227722772277, "grad_norm": 0.37553396821022034, "learning_rate": 2.7572218297088765e-05, "loss": 0.4603, "step": 79880 }, { "epoch": 19.774752475247524, "grad_norm": 0.33517029881477356, "learning_rate": 2.755990417576738e-05, "loss": 0.46, "step": 79890 }, { "epoch": 19.777227722772277, "grad_norm": 0.35126498341560364, "learning_rate": 2.754759175862319e-05, "loss": 0.461, "step": 79900 }, { "epoch": 19.77970297029703, "grad_norm": 0.32041242718696594, "learning_rate": 2.7535281046591212e-05, "loss": 0.4558, "step": 79910 }, { "epoch": 19.782178217821784, "grad_norm": 0.3211100995540619, "learning_rate": 2.7522972040606377e-05, "loss": 0.4545, "step": 79920 }, { "epoch": 19.784653465346533, "grad_norm": 0.3072395324707031, "learning_rate": 2.7510664741603504e-05, "loss": 0.4616, "step": 79930 }, { "epoch": 19.787128712871286, "grad_norm": 0.3137519657611847, "learning_rate": 2.7498359150517205e-05, "loss": 0.4573, "step": 79940 }, { "epoch": 19.78960396039604, "grad_norm": 0.3325648009777069, "learning_rate": 2.7486055268282034e-05, "loss": 0.4543, "step": 79950 }, { "epoch": 19.792079207920793, "grad_norm": 0.3457896411418915, "learning_rate": 2.7473753095832406e-05, "loss": 0.458, "step": 79960 }, { "epoch": 19.794554455445546, "grad_norm": 0.3380264341831207, "learning_rate": 2.7461452634102546e-05, "loss": 0.4584, "step": 79970 }, { "epoch": 19.797029702970296, "grad_norm": 0.32906076312065125, "learning_rate": 2.7449153884026647e-05, "loss": 0.4625, "step": 79980 }, { "epoch": 19.79950495049505, "grad_norm": 0.33824053406715393, "learning_rate": 2.7436856846538654e-05, "loss": 0.4583, "step": 79990 }, { "epoch": 19.801980198019802, "grad_norm": 0.32215166091918945, "learning_rate": 2.7424561522572513e-05, "loss": 0.4584, "step": 80000 }, { "epoch": 19.804455445544555, "grad_norm": 0.34340327978134155, "learning_rate": 2.7412267913061908e-05, "loss": 0.4592, "step": 80010 }, { "epoch": 19.806930693069308, "grad_norm": 0.33645591139793396, "learning_rate": 2.7399976018940517e-05, "loss": 0.4588, "step": 80020 }, { "epoch": 19.80940594059406, "grad_norm": 0.3519756495952606, "learning_rate": 2.7387685841141773e-05, "loss": 0.4558, "step": 80030 }, { "epoch": 19.81188118811881, "grad_norm": 0.32214757800102234, "learning_rate": 2.7375397380599066e-05, "loss": 0.4546, "step": 80040 }, { "epoch": 19.814356435643564, "grad_norm": 0.3227890729904175, "learning_rate": 2.7363110638245637e-05, "loss": 0.4563, "step": 80050 }, { "epoch": 19.816831683168317, "grad_norm": 0.34073659777641296, "learning_rate": 2.7350825615014547e-05, "loss": 0.4614, "step": 80060 }, { "epoch": 19.81930693069307, "grad_norm": 0.3344051241874695, "learning_rate": 2.73385423118388e-05, "loss": 0.4585, "step": 80070 }, { "epoch": 19.821782178217823, "grad_norm": 0.36353468894958496, "learning_rate": 2.7326260729651186e-05, "loss": 0.4583, "step": 80080 }, { "epoch": 19.824257425742573, "grad_norm": 0.31626757979393005, "learning_rate": 2.7313980869384458e-05, "loss": 0.461, "step": 80090 }, { "epoch": 19.826732673267326, "grad_norm": 0.3513270914554596, "learning_rate": 2.7301702731971157e-05, "loss": 0.46, "step": 80100 }, { "epoch": 19.82920792079208, "grad_norm": 0.3424987494945526, "learning_rate": 2.7289426318343724e-05, "loss": 0.4606, "step": 80110 }, { "epoch": 19.831683168316832, "grad_norm": 0.33950236439704895, "learning_rate": 2.7277151629434516e-05, "loss": 0.456, "step": 80120 }, { "epoch": 19.834158415841586, "grad_norm": 0.35791799426078796, "learning_rate": 2.7264878666175682e-05, "loss": 0.4579, "step": 80130 }, { "epoch": 19.836633663366335, "grad_norm": 0.3384437561035156, "learning_rate": 2.7252607429499233e-05, "loss": 0.4573, "step": 80140 }, { "epoch": 19.83910891089109, "grad_norm": 0.34903767704963684, "learning_rate": 2.7240337920337166e-05, "loss": 0.4555, "step": 80150 }, { "epoch": 19.84158415841584, "grad_norm": 0.34511977434158325, "learning_rate": 2.722807013962124e-05, "loss": 0.4597, "step": 80160 }, { "epoch": 19.844059405940595, "grad_norm": 0.35604962706565857, "learning_rate": 2.7215804088283082e-05, "loss": 0.4578, "step": 80170 }, { "epoch": 19.846534653465348, "grad_norm": 0.3366512358188629, "learning_rate": 2.7203539767254242e-05, "loss": 0.4554, "step": 80180 }, { "epoch": 19.849009900990097, "grad_norm": 0.34144341945648193, "learning_rate": 2.7191277177466134e-05, "loss": 0.4594, "step": 80190 }, { "epoch": 19.85148514851485, "grad_norm": 0.3536304831504822, "learning_rate": 2.7179016319849997e-05, "loss": 0.4544, "step": 80200 }, { "epoch": 19.853960396039604, "grad_norm": 0.3364953100681305, "learning_rate": 2.7166757195336946e-05, "loss": 0.4625, "step": 80210 }, { "epoch": 19.856435643564357, "grad_norm": 0.31028327345848083, "learning_rate": 2.7154499804858e-05, "loss": 0.4515, "step": 80220 }, { "epoch": 19.85891089108911, "grad_norm": 0.34725823998451233, "learning_rate": 2.7142244149344048e-05, "loss": 0.4613, "step": 80230 }, { "epoch": 19.861386138613863, "grad_norm": 0.34159931540489197, "learning_rate": 2.7129990229725786e-05, "loss": 0.4583, "step": 80240 }, { "epoch": 19.863861386138613, "grad_norm": 0.34521010518074036, "learning_rate": 2.711773804693383e-05, "loss": 0.4575, "step": 80250 }, { "epoch": 19.866336633663366, "grad_norm": 0.33372679352760315, "learning_rate": 2.710548760189869e-05, "loss": 0.4583, "step": 80260 }, { "epoch": 19.86881188118812, "grad_norm": 0.3519323170185089, "learning_rate": 2.7093238895550667e-05, "loss": 0.4568, "step": 80270 }, { "epoch": 19.871287128712872, "grad_norm": 0.32634809613227844, "learning_rate": 2.7080991928819964e-05, "loss": 0.4564, "step": 80280 }, { "epoch": 19.873762376237625, "grad_norm": 0.33097192645072937, "learning_rate": 2.7068746702636672e-05, "loss": 0.4561, "step": 80290 }, { "epoch": 19.876237623762375, "grad_norm": 0.3317004442214966, "learning_rate": 2.705650321793075e-05, "loss": 0.4603, "step": 80300 }, { "epoch": 19.878712871287128, "grad_norm": 0.3569756746292114, "learning_rate": 2.7044261475631976e-05, "loss": 0.4592, "step": 80310 }, { "epoch": 19.88118811881188, "grad_norm": 0.34271085262298584, "learning_rate": 2.7032021476670067e-05, "loss": 0.4533, "step": 80320 }, { "epoch": 19.883663366336634, "grad_norm": 0.35287192463874817, "learning_rate": 2.7019783221974537e-05, "loss": 0.4652, "step": 80330 }, { "epoch": 19.886138613861387, "grad_norm": 0.3558550477027893, "learning_rate": 2.700754671247481e-05, "loss": 0.46, "step": 80340 }, { "epoch": 19.888613861386137, "grad_norm": 0.33240392804145813, "learning_rate": 2.6995311949100193e-05, "loss": 0.4605, "step": 80350 }, { "epoch": 19.89108910891089, "grad_norm": 0.33943668007850647, "learning_rate": 2.69830789327798e-05, "loss": 0.4617, "step": 80360 }, { "epoch": 19.893564356435643, "grad_norm": 0.3535802662372589, "learning_rate": 2.6970847664442677e-05, "loss": 0.4586, "step": 80370 }, { "epoch": 19.896039603960396, "grad_norm": 0.352471262216568, "learning_rate": 2.6958618145017684e-05, "loss": 0.4549, "step": 80380 }, { "epoch": 19.89851485148515, "grad_norm": 0.3272910416126251, "learning_rate": 2.6946390375433594e-05, "loss": 0.4641, "step": 80390 }, { "epoch": 19.900990099009903, "grad_norm": 0.33490559458732605, "learning_rate": 2.6934164356619003e-05, "loss": 0.4579, "step": 80400 }, { "epoch": 19.903465346534652, "grad_norm": 0.3418443500995636, "learning_rate": 2.69219400895024e-05, "loss": 0.4605, "step": 80410 }, { "epoch": 19.905940594059405, "grad_norm": 0.36363136768341064, "learning_rate": 2.6909717575012173e-05, "loss": 0.4597, "step": 80420 }, { "epoch": 19.90841584158416, "grad_norm": 0.341061532497406, "learning_rate": 2.6897496814076505e-05, "loss": 0.4642, "step": 80430 }, { "epoch": 19.91089108910891, "grad_norm": 0.33901411294937134, "learning_rate": 2.6885277807623478e-05, "loss": 0.4592, "step": 80440 }, { "epoch": 19.913366336633665, "grad_norm": 0.323801726102829, "learning_rate": 2.6873060556581053e-05, "loss": 0.457, "step": 80450 }, { "epoch": 19.915841584158414, "grad_norm": 0.34133854508399963, "learning_rate": 2.686084506187707e-05, "loss": 0.4612, "step": 80460 }, { "epoch": 19.918316831683168, "grad_norm": 0.3491821587085724, "learning_rate": 2.684863132443919e-05, "loss": 0.4623, "step": 80470 }, { "epoch": 19.92079207920792, "grad_norm": 0.3982532322406769, "learning_rate": 2.6836419345194967e-05, "loss": 0.4583, "step": 80480 }, { "epoch": 19.923267326732674, "grad_norm": 0.35149508714675903, "learning_rate": 2.6824209125071853e-05, "loss": 0.4609, "step": 80490 }, { "epoch": 19.925742574257427, "grad_norm": 0.37526264786720276, "learning_rate": 2.6812000664997107e-05, "loss": 0.4678, "step": 80500 }, { "epoch": 19.928217821782177, "grad_norm": 0.34718653559684753, "learning_rate": 2.6799793965897868e-05, "loss": 0.4592, "step": 80510 }, { "epoch": 19.93069306930693, "grad_norm": 0.31923115253448486, "learning_rate": 2.6787589028701167e-05, "loss": 0.4574, "step": 80520 }, { "epoch": 19.933168316831683, "grad_norm": 0.33985158801078796, "learning_rate": 2.67753858543339e-05, "loss": 0.4633, "step": 80530 }, { "epoch": 19.935643564356436, "grad_norm": 0.34397128224372864, "learning_rate": 2.6763184443722822e-05, "loss": 0.457, "step": 80540 }, { "epoch": 19.93811881188119, "grad_norm": 0.3558051884174347, "learning_rate": 2.6750984797794486e-05, "loss": 0.4567, "step": 80550 }, { "epoch": 19.94059405940594, "grad_norm": 0.3642856776714325, "learning_rate": 2.6738786917475474e-05, "loss": 0.4606, "step": 80560 }, { "epoch": 19.943069306930692, "grad_norm": 0.3190125524997711, "learning_rate": 2.6726590803692076e-05, "loss": 0.455, "step": 80570 }, { "epoch": 19.945544554455445, "grad_norm": 0.3286765515804291, "learning_rate": 2.67143964573705e-05, "loss": 0.4552, "step": 80580 }, { "epoch": 19.948019801980198, "grad_norm": 0.34433308243751526, "learning_rate": 2.6702203879436838e-05, "loss": 0.4575, "step": 80590 }, { "epoch": 19.95049504950495, "grad_norm": 0.32025864720344543, "learning_rate": 2.6690013070817066e-05, "loss": 0.457, "step": 80600 }, { "epoch": 19.952970297029704, "grad_norm": 0.32960009574890137, "learning_rate": 2.667782403243695e-05, "loss": 0.4591, "step": 80610 }, { "epoch": 19.955445544554454, "grad_norm": 0.3403775095939636, "learning_rate": 2.6665636765222195e-05, "loss": 0.4609, "step": 80620 }, { "epoch": 19.957920792079207, "grad_norm": 0.34383344650268555, "learning_rate": 2.6653451270098316e-05, "loss": 0.4573, "step": 80630 }, { "epoch": 19.96039603960396, "grad_norm": 0.3260113596916199, "learning_rate": 2.6641267547990765e-05, "loss": 0.4546, "step": 80640 }, { "epoch": 19.962871287128714, "grad_norm": 0.33216795325279236, "learning_rate": 2.662908559982477e-05, "loss": 0.455, "step": 80650 }, { "epoch": 19.965346534653467, "grad_norm": 0.32894670963287354, "learning_rate": 2.6616905426525483e-05, "loss": 0.4552, "step": 80660 }, { "epoch": 19.967821782178216, "grad_norm": 0.33891499042510986, "learning_rate": 2.660472702901794e-05, "loss": 0.4629, "step": 80670 }, { "epoch": 19.97029702970297, "grad_norm": 0.34339243173599243, "learning_rate": 2.6592550408226963e-05, "loss": 0.4568, "step": 80680 }, { "epoch": 19.972772277227723, "grad_norm": 0.34326526522636414, "learning_rate": 2.6580375565077325e-05, "loss": 0.4597, "step": 80690 }, { "epoch": 19.975247524752476, "grad_norm": 0.33022624254226685, "learning_rate": 2.6568202500493587e-05, "loss": 0.4633, "step": 80700 }, { "epoch": 19.97772277227723, "grad_norm": 0.3343759775161743, "learning_rate": 2.6556031215400234e-05, "loss": 0.4613, "step": 80710 }, { "epoch": 19.980198019801982, "grad_norm": 0.3407602608203888, "learning_rate": 2.6543861710721607e-05, "loss": 0.4548, "step": 80720 }, { "epoch": 19.98267326732673, "grad_norm": 0.3327662944793701, "learning_rate": 2.6531693987381895e-05, "loss": 0.4629, "step": 80730 }, { "epoch": 19.985148514851485, "grad_norm": 0.344658762216568, "learning_rate": 2.651952804630512e-05, "loss": 0.4603, "step": 80740 }, { "epoch": 19.987623762376238, "grad_norm": 0.35425299406051636, "learning_rate": 2.650736388841524e-05, "loss": 0.4613, "step": 80750 }, { "epoch": 19.99009900990099, "grad_norm": 0.31310123205184937, "learning_rate": 2.649520151463605e-05, "loss": 0.4543, "step": 80760 }, { "epoch": 19.992574257425744, "grad_norm": 0.3629249930381775, "learning_rate": 2.6483040925891166e-05, "loss": 0.4646, "step": 80770 }, { "epoch": 19.995049504950494, "grad_norm": 0.376954048871994, "learning_rate": 2.6470882123104125e-05, "loss": 0.4617, "step": 80780 }, { "epoch": 19.997524752475247, "grad_norm": 0.34572893381118774, "learning_rate": 2.6458725107198335e-05, "loss": 0.4557, "step": 80790 }, { "epoch": 20.0, "grad_norm": 0.34319400787353516, "learning_rate": 2.6446569879097004e-05, "loss": 0.4628, "step": 80800 }, { "epoch": 20.002475247524753, "grad_norm": 0.3454239070415497, "learning_rate": 2.6434416439723235e-05, "loss": 0.4553, "step": 80810 }, { "epoch": 20.004950495049506, "grad_norm": 0.36208128929138184, "learning_rate": 2.6422264790000018e-05, "loss": 0.4571, "step": 80820 }, { "epoch": 20.007425742574256, "grad_norm": 0.3427159786224365, "learning_rate": 2.6410114930850216e-05, "loss": 0.4582, "step": 80830 }, { "epoch": 20.00990099009901, "grad_norm": 0.33018749952316284, "learning_rate": 2.6397966863196498e-05, "loss": 0.4599, "step": 80840 }, { "epoch": 20.012376237623762, "grad_norm": 0.35347071290016174, "learning_rate": 2.63858205879614e-05, "loss": 0.4567, "step": 80850 }, { "epoch": 20.014851485148515, "grad_norm": 0.33582207560539246, "learning_rate": 2.6373676106067436e-05, "loss": 0.4607, "step": 80860 }, { "epoch": 20.01732673267327, "grad_norm": 0.3317604959011078, "learning_rate": 2.636153341843684e-05, "loss": 0.4611, "step": 80870 }, { "epoch": 20.019801980198018, "grad_norm": 0.3286827504634857, "learning_rate": 2.6349392525991767e-05, "loss": 0.4558, "step": 80880 }, { "epoch": 20.02227722772277, "grad_norm": 0.33123084902763367, "learning_rate": 2.6337253429654245e-05, "loss": 0.4581, "step": 80890 }, { "epoch": 20.024752475247524, "grad_norm": 0.3590695858001709, "learning_rate": 2.632511613034619e-05, "loss": 0.4528, "step": 80900 }, { "epoch": 20.027227722772277, "grad_norm": 0.3654429614543915, "learning_rate": 2.6312980628989325e-05, "loss": 0.4624, "step": 80910 }, { "epoch": 20.02970297029703, "grad_norm": 0.32806646823883057, "learning_rate": 2.6300846926505234e-05, "loss": 0.4609, "step": 80920 }, { "epoch": 20.032178217821784, "grad_norm": 0.33062416315078735, "learning_rate": 2.6288715023815425e-05, "loss": 0.4608, "step": 80930 }, { "epoch": 20.034653465346533, "grad_norm": 0.35056746006011963, "learning_rate": 2.6276584921841242e-05, "loss": 0.4577, "step": 80940 }, { "epoch": 20.037128712871286, "grad_norm": 0.31592288613319397, "learning_rate": 2.6264456621503864e-05, "loss": 0.456, "step": 80950 }, { "epoch": 20.03960396039604, "grad_norm": 0.3384642004966736, "learning_rate": 2.6252330123724356e-05, "loss": 0.4588, "step": 80960 }, { "epoch": 20.042079207920793, "grad_norm": 0.34158533811569214, "learning_rate": 2.6240205429423675e-05, "loss": 0.4601, "step": 80970 }, { "epoch": 20.044554455445546, "grad_norm": 0.36478784680366516, "learning_rate": 2.622808253952257e-05, "loss": 0.4594, "step": 80980 }, { "epoch": 20.047029702970296, "grad_norm": 0.32785630226135254, "learning_rate": 2.621596145494174e-05, "loss": 0.4546, "step": 80990 }, { "epoch": 20.04950495049505, "grad_norm": 0.32480332255363464, "learning_rate": 2.6203842176601656e-05, "loss": 0.4547, "step": 81000 }, { "epoch": 20.051980198019802, "grad_norm": 0.30328673124313354, "learning_rate": 2.619172470542273e-05, "loss": 0.4589, "step": 81010 }, { "epoch": 20.054455445544555, "grad_norm": 0.3212184011936188, "learning_rate": 2.6179609042325183e-05, "loss": 0.4609, "step": 81020 }, { "epoch": 20.056930693069308, "grad_norm": 0.31688377261161804, "learning_rate": 2.616749518822914e-05, "loss": 0.4597, "step": 81030 }, { "epoch": 20.059405940594058, "grad_norm": 0.33602774143218994, "learning_rate": 2.6155383144054535e-05, "loss": 0.4591, "step": 81040 }, { "epoch": 20.06188118811881, "grad_norm": 0.338998019695282, "learning_rate": 2.6143272910721227e-05, "loss": 0.4596, "step": 81050 }, { "epoch": 20.064356435643564, "grad_norm": 0.33311790227890015, "learning_rate": 2.613116448914892e-05, "loss": 0.4568, "step": 81060 }, { "epoch": 20.066831683168317, "grad_norm": 0.36065998673439026, "learning_rate": 2.6119057880257125e-05, "loss": 0.4579, "step": 81070 }, { "epoch": 20.06930693069307, "grad_norm": 0.3416443169116974, "learning_rate": 2.610695308496528e-05, "loss": 0.4564, "step": 81080 }, { "epoch": 20.071782178217823, "grad_norm": 0.32642775774002075, "learning_rate": 2.6094850104192696e-05, "loss": 0.4621, "step": 81090 }, { "epoch": 20.074257425742573, "grad_norm": 0.3734050691127777, "learning_rate": 2.6082748938858483e-05, "loss": 0.4569, "step": 81100 }, { "epoch": 20.076732673267326, "grad_norm": 0.3130946457386017, "learning_rate": 2.607064958988163e-05, "loss": 0.4609, "step": 81110 }, { "epoch": 20.07920792079208, "grad_norm": 0.34367433190345764, "learning_rate": 2.605855205818102e-05, "loss": 0.4574, "step": 81120 }, { "epoch": 20.081683168316832, "grad_norm": 0.355969101190567, "learning_rate": 2.6046456344675397e-05, "loss": 0.4586, "step": 81130 }, { "epoch": 20.084158415841586, "grad_norm": 0.31919506192207336, "learning_rate": 2.603436245028334e-05, "loss": 0.4573, "step": 81140 }, { "epoch": 20.086633663366335, "grad_norm": 0.4009447991847992, "learning_rate": 2.6022270375923252e-05, "loss": 0.4566, "step": 81150 }, { "epoch": 20.08910891089109, "grad_norm": 0.4012027978897095, "learning_rate": 2.6010180122513527e-05, "loss": 0.4606, "step": 81160 }, { "epoch": 20.09158415841584, "grad_norm": 0.35113325715065, "learning_rate": 2.5998091690972314e-05, "loss": 0.4579, "step": 81170 }, { "epoch": 20.094059405940595, "grad_norm": 0.35570016503334045, "learning_rate": 2.5986005082217614e-05, "loss": 0.4574, "step": 81180 }, { "epoch": 20.096534653465348, "grad_norm": 0.34720364212989807, "learning_rate": 2.5973920297167344e-05, "loss": 0.4599, "step": 81190 }, { "epoch": 20.099009900990097, "grad_norm": 0.33957579731941223, "learning_rate": 2.596183733673929e-05, "loss": 0.4608, "step": 81200 }, { "epoch": 20.10148514851485, "grad_norm": 0.33406153321266174, "learning_rate": 2.594975620185105e-05, "loss": 0.4536, "step": 81210 }, { "epoch": 20.103960396039604, "grad_norm": 0.3279368579387665, "learning_rate": 2.5937676893420092e-05, "loss": 0.4658, "step": 81220 }, { "epoch": 20.106435643564357, "grad_norm": 0.3216570019721985, "learning_rate": 2.5925599412363782e-05, "loss": 0.4592, "step": 81230 }, { "epoch": 20.10891089108911, "grad_norm": 0.3549259901046753, "learning_rate": 2.5913523759599335e-05, "loss": 0.4602, "step": 81240 }, { "epoch": 20.111386138613863, "grad_norm": 0.3381674289703369, "learning_rate": 2.590144993604378e-05, "loss": 0.4615, "step": 81250 }, { "epoch": 20.113861386138613, "grad_norm": 0.32523825764656067, "learning_rate": 2.588937794261407e-05, "loss": 0.4529, "step": 81260 }, { "epoch": 20.116336633663366, "grad_norm": 0.31976327300071716, "learning_rate": 2.5877307780227e-05, "loss": 0.4602, "step": 81270 }, { "epoch": 20.11881188118812, "grad_norm": 0.3117688298225403, "learning_rate": 2.5865239449799216e-05, "loss": 0.4604, "step": 81280 }, { "epoch": 20.121287128712872, "grad_norm": 0.30947211384773254, "learning_rate": 2.5853172952247194e-05, "loss": 0.4527, "step": 81290 }, { "epoch": 20.123762376237625, "grad_norm": 0.31912362575531006, "learning_rate": 2.5841108288487337e-05, "loss": 0.4593, "step": 81300 }, { "epoch": 20.126237623762375, "grad_norm": 0.3224978446960449, "learning_rate": 2.582904545943589e-05, "loss": 0.4523, "step": 81310 }, { "epoch": 20.128712871287128, "grad_norm": 0.33195799589157104, "learning_rate": 2.5816984466008908e-05, "loss": 0.4597, "step": 81320 }, { "epoch": 20.13118811881188, "grad_norm": 0.3440788686275482, "learning_rate": 2.5804925309122373e-05, "loss": 0.4565, "step": 81330 }, { "epoch": 20.133663366336634, "grad_norm": 0.31987473368644714, "learning_rate": 2.5792867989692077e-05, "loss": 0.4616, "step": 81340 }, { "epoch": 20.136138613861387, "grad_norm": 0.3383030593395233, "learning_rate": 2.57808125086337e-05, "loss": 0.4592, "step": 81350 }, { "epoch": 20.138613861386137, "grad_norm": 0.3328649699687958, "learning_rate": 2.5768758866862803e-05, "loss": 0.4598, "step": 81360 }, { "epoch": 20.14108910891089, "grad_norm": 0.3307620584964752, "learning_rate": 2.5756707065294734e-05, "loss": 0.4497, "step": 81370 }, { "epoch": 20.143564356435643, "grad_norm": 0.3206620514392853, "learning_rate": 2.5744657104844793e-05, "loss": 0.4557, "step": 81380 }, { "epoch": 20.146039603960396, "grad_norm": 0.3328161835670471, "learning_rate": 2.573260898642805e-05, "loss": 0.464, "step": 81390 }, { "epoch": 20.14851485148515, "grad_norm": 0.3301093876361847, "learning_rate": 2.572056271095953e-05, "loss": 0.4548, "step": 81400 }, { "epoch": 20.150990099009903, "grad_norm": 0.3330940008163452, "learning_rate": 2.5708518279354016e-05, "loss": 0.4589, "step": 81410 }, { "epoch": 20.153465346534652, "grad_norm": 0.3326241672039032, "learning_rate": 2.5696475692526235e-05, "loss": 0.453, "step": 81420 }, { "epoch": 20.155940594059405, "grad_norm": 0.3418187201023102, "learning_rate": 2.5684434951390744e-05, "loss": 0.4551, "step": 81430 }, { "epoch": 20.15841584158416, "grad_norm": 0.3402256965637207, "learning_rate": 2.5672396056861963e-05, "loss": 0.4593, "step": 81440 }, { "epoch": 20.16089108910891, "grad_norm": 0.34466853737831116, "learning_rate": 2.5660359009854107e-05, "loss": 0.4623, "step": 81450 }, { "epoch": 20.163366336633665, "grad_norm": 0.3425311744213104, "learning_rate": 2.56483238112814e-05, "loss": 0.4602, "step": 81460 }, { "epoch": 20.165841584158414, "grad_norm": 0.367744117975235, "learning_rate": 2.5636290462057787e-05, "loss": 0.4597, "step": 81470 }, { "epoch": 20.168316831683168, "grad_norm": 0.3318507671356201, "learning_rate": 2.5624258963097115e-05, "loss": 0.463, "step": 81480 }, { "epoch": 20.17079207920792, "grad_norm": 0.3085669279098511, "learning_rate": 2.56122293153131e-05, "loss": 0.4547, "step": 81490 }, { "epoch": 20.173267326732674, "grad_norm": 0.3604252338409424, "learning_rate": 2.560020151961935e-05, "loss": 0.4589, "step": 81500 }, { "epoch": 20.175742574257427, "grad_norm": 0.3235148787498474, "learning_rate": 2.5588175576929263e-05, "loss": 0.4574, "step": 81510 }, { "epoch": 20.178217821782177, "grad_norm": 0.31292086839675903, "learning_rate": 2.557615148815612e-05, "loss": 0.455, "step": 81520 }, { "epoch": 20.18069306930693, "grad_norm": 0.36466720700263977, "learning_rate": 2.5564129254213094e-05, "loss": 0.456, "step": 81530 }, { "epoch": 20.183168316831683, "grad_norm": 0.3234642744064331, "learning_rate": 2.5552108876013205e-05, "loss": 0.463, "step": 81540 }, { "epoch": 20.185643564356436, "grad_norm": 0.33011528849601746, "learning_rate": 2.554009035446928e-05, "loss": 0.4576, "step": 81550 }, { "epoch": 20.18811881188119, "grad_norm": 0.3218376040458679, "learning_rate": 2.5528073690494076e-05, "loss": 0.4553, "step": 81560 }, { "epoch": 20.190594059405942, "grad_norm": 0.30636829137802124, "learning_rate": 2.5516058885000192e-05, "loss": 0.4556, "step": 81570 }, { "epoch": 20.193069306930692, "grad_norm": 0.31300103664398193, "learning_rate": 2.5504045938900055e-05, "loss": 0.4572, "step": 81580 }, { "epoch": 20.195544554455445, "grad_norm": 0.3332136869430542, "learning_rate": 2.5492034853105952e-05, "loss": 0.461, "step": 81590 }, { "epoch": 20.198019801980198, "grad_norm": 0.3356695771217346, "learning_rate": 2.5480025628530063e-05, "loss": 0.4589, "step": 81600 }, { "epoch": 20.20049504950495, "grad_norm": 0.30507221817970276, "learning_rate": 2.546801826608443e-05, "loss": 0.4579, "step": 81610 }, { "epoch": 20.202970297029704, "grad_norm": 0.3403671085834503, "learning_rate": 2.5456012766680892e-05, "loss": 0.4564, "step": 81620 }, { "epoch": 20.205445544554454, "grad_norm": 0.3051093816757202, "learning_rate": 2.5444009131231228e-05, "loss": 0.4571, "step": 81630 }, { "epoch": 20.207920792079207, "grad_norm": 0.320944219827652, "learning_rate": 2.5432007360646997e-05, "loss": 0.4641, "step": 81640 }, { "epoch": 20.21039603960396, "grad_norm": 0.32055556774139404, "learning_rate": 2.542000745583969e-05, "loss": 0.457, "step": 81650 }, { "epoch": 20.212871287128714, "grad_norm": 0.32719799876213074, "learning_rate": 2.5408009417720586e-05, "loss": 0.4584, "step": 81660 }, { "epoch": 20.215346534653467, "grad_norm": 0.32434922456741333, "learning_rate": 2.5396013247200868e-05, "loss": 0.463, "step": 81670 }, { "epoch": 20.217821782178216, "grad_norm": 0.32838478684425354, "learning_rate": 2.53840189451916e-05, "loss": 0.4584, "step": 81680 }, { "epoch": 20.22029702970297, "grad_norm": 0.31931421160697937, "learning_rate": 2.5372026512603613e-05, "loss": 0.4579, "step": 81690 }, { "epoch": 20.222772277227723, "grad_norm": 0.332080602645874, "learning_rate": 2.5360035950347704e-05, "loss": 0.4573, "step": 81700 }, { "epoch": 20.225247524752476, "grad_norm": 0.32440099120140076, "learning_rate": 2.534804725933444e-05, "loss": 0.4511, "step": 81710 }, { "epoch": 20.22772277227723, "grad_norm": 0.34757477045059204, "learning_rate": 2.5336060440474297e-05, "loss": 0.4566, "step": 81720 }, { "epoch": 20.230198019801982, "grad_norm": 0.320122092962265, "learning_rate": 2.5324075494677614e-05, "loss": 0.4581, "step": 81730 }, { "epoch": 20.23267326732673, "grad_norm": 0.32260188460350037, "learning_rate": 2.531209242285455e-05, "loss": 0.4571, "step": 81740 }, { "epoch": 20.235148514851485, "grad_norm": 0.3209070861339569, "learning_rate": 2.5300111225915114e-05, "loss": 0.4573, "step": 81750 }, { "epoch": 20.237623762376238, "grad_norm": 0.3415176272392273, "learning_rate": 2.5288131904769235e-05, "loss": 0.459, "step": 81760 }, { "epoch": 20.24009900990099, "grad_norm": 0.3472953140735626, "learning_rate": 2.5276154460326673e-05, "loss": 0.4565, "step": 81770 }, { "epoch": 20.242574257425744, "grad_norm": 0.3843872547149658, "learning_rate": 2.526417889349699e-05, "loss": 0.4622, "step": 81780 }, { "epoch": 20.245049504950494, "grad_norm": 0.3440203368663788, "learning_rate": 2.525220520518968e-05, "loss": 0.456, "step": 81790 }, { "epoch": 20.247524752475247, "grad_norm": 0.3178268074989319, "learning_rate": 2.5240233396314085e-05, "loss": 0.4604, "step": 81800 }, { "epoch": 20.25, "grad_norm": 0.32739636301994324, "learning_rate": 2.522826346777937e-05, "loss": 0.4569, "step": 81810 }, { "epoch": 20.252475247524753, "grad_norm": 0.3323131799697876, "learning_rate": 2.5216295420494535e-05, "loss": 0.459, "step": 81820 }, { "epoch": 20.254950495049506, "grad_norm": 0.3261423110961914, "learning_rate": 2.520432925536851e-05, "loss": 0.4553, "step": 81830 }, { "epoch": 20.257425742574256, "grad_norm": 0.35139524936676025, "learning_rate": 2.5192364973310057e-05, "loss": 0.4583, "step": 81840 }, { "epoch": 20.25990099009901, "grad_norm": 0.3411571681499481, "learning_rate": 2.5180402575227768e-05, "loss": 0.4569, "step": 81850 }, { "epoch": 20.262376237623762, "grad_norm": 0.3504261374473572, "learning_rate": 2.5168442062030072e-05, "loss": 0.4581, "step": 81860 }, { "epoch": 20.264851485148515, "grad_norm": 0.3136325478553772, "learning_rate": 2.5156483434625367e-05, "loss": 0.4601, "step": 81870 }, { "epoch": 20.26732673267327, "grad_norm": 0.30997058749198914, "learning_rate": 2.514452669392179e-05, "loss": 0.4597, "step": 81880 }, { "epoch": 20.269801980198018, "grad_norm": 0.33237308263778687, "learning_rate": 2.5132571840827358e-05, "loss": 0.4554, "step": 81890 }, { "epoch": 20.27227722772277, "grad_norm": 0.3252522051334381, "learning_rate": 2.512061887624999e-05, "loss": 0.4629, "step": 81900 }, { "epoch": 20.274752475247524, "grad_norm": 0.3330554962158203, "learning_rate": 2.510866780109744e-05, "loss": 0.4563, "step": 81910 }, { "epoch": 20.277227722772277, "grad_norm": 0.3418138325214386, "learning_rate": 2.5096718616277292e-05, "loss": 0.4629, "step": 81920 }, { "epoch": 20.27970297029703, "grad_norm": 0.32141074538230896, "learning_rate": 2.508477132269703e-05, "loss": 0.4574, "step": 81930 }, { "epoch": 20.282178217821784, "grad_norm": 0.3288855254650116, "learning_rate": 2.5072825921263943e-05, "loss": 0.4591, "step": 81940 }, { "epoch": 20.284653465346533, "grad_norm": 0.3065589368343353, "learning_rate": 2.5060882412885238e-05, "loss": 0.4606, "step": 81950 }, { "epoch": 20.287128712871286, "grad_norm": 0.33058586716651917, "learning_rate": 2.5048940798467913e-05, "loss": 0.4582, "step": 81960 }, { "epoch": 20.28960396039604, "grad_norm": 0.33086973428726196, "learning_rate": 2.5037001078918877e-05, "loss": 0.4663, "step": 81970 }, { "epoch": 20.292079207920793, "grad_norm": 0.308482825756073, "learning_rate": 2.5025063255144875e-05, "loss": 0.4574, "step": 81980 }, { "epoch": 20.294554455445546, "grad_norm": 0.3280128538608551, "learning_rate": 2.501312732805249e-05, "loss": 0.4553, "step": 81990 }, { "epoch": 20.297029702970296, "grad_norm": 0.32548725605010986, "learning_rate": 2.50011932985482e-05, "loss": 0.4529, "step": 82000 }, { "epoch": 20.29950495049505, "grad_norm": 0.33064985275268555, "learning_rate": 2.4989261167538287e-05, "loss": 0.4564, "step": 82010 }, { "epoch": 20.301980198019802, "grad_norm": 0.35022878646850586, "learning_rate": 2.4977330935928944e-05, "loss": 0.4568, "step": 82020 }, { "epoch": 20.304455445544555, "grad_norm": 0.3625031113624573, "learning_rate": 2.4965402604626164e-05, "loss": 0.4628, "step": 82030 }, { "epoch": 20.306930693069308, "grad_norm": 0.33017459511756897, "learning_rate": 2.495347617453586e-05, "loss": 0.4574, "step": 82040 }, { "epoch": 20.309405940594058, "grad_norm": 0.35091182589530945, "learning_rate": 2.4941551646563736e-05, "loss": 0.4572, "step": 82050 }, { "epoch": 20.31188118811881, "grad_norm": 0.34261229634284973, "learning_rate": 2.4929629021615385e-05, "loss": 0.454, "step": 82060 }, { "epoch": 20.314356435643564, "grad_norm": 0.35241004824638367, "learning_rate": 2.491770830059628e-05, "loss": 0.4515, "step": 82070 }, { "epoch": 20.316831683168317, "grad_norm": 0.32119956612586975, "learning_rate": 2.490578948441169e-05, "loss": 0.4635, "step": 82080 }, { "epoch": 20.31930693069307, "grad_norm": 0.33462512493133545, "learning_rate": 2.4893872573966776e-05, "loss": 0.4609, "step": 82090 }, { "epoch": 20.321782178217823, "grad_norm": 0.3392103910446167, "learning_rate": 2.488195757016657e-05, "loss": 0.4562, "step": 82100 }, { "epoch": 20.324257425742573, "grad_norm": 0.33425459265708923, "learning_rate": 2.487004447391592e-05, "loss": 0.4587, "step": 82110 }, { "epoch": 20.326732673267326, "grad_norm": 0.3411332666873932, "learning_rate": 2.485813328611954e-05, "loss": 0.4606, "step": 82120 }, { "epoch": 20.32920792079208, "grad_norm": 0.3348357677459717, "learning_rate": 2.4846224007682008e-05, "loss": 0.4537, "step": 82130 }, { "epoch": 20.331683168316832, "grad_norm": 0.32353752851486206, "learning_rate": 2.4834316639507782e-05, "loss": 0.4631, "step": 82140 }, { "epoch": 20.334158415841586, "grad_norm": 0.3343771696090698, "learning_rate": 2.4822411182501127e-05, "loss": 0.4586, "step": 82150 }, { "epoch": 20.336633663366335, "grad_norm": 0.33793994784355164, "learning_rate": 2.481050763756615e-05, "loss": 0.4608, "step": 82160 }, { "epoch": 20.33910891089109, "grad_norm": 0.3016563057899475, "learning_rate": 2.4798606005606917e-05, "loss": 0.4614, "step": 82170 }, { "epoch": 20.34158415841584, "grad_norm": 0.3176313042640686, "learning_rate": 2.4786706287527246e-05, "loss": 0.459, "step": 82180 }, { "epoch": 20.344059405940595, "grad_norm": 0.3163524866104126, "learning_rate": 2.4774808484230817e-05, "loss": 0.4586, "step": 82190 }, { "epoch": 20.346534653465348, "grad_norm": 0.37074512243270874, "learning_rate": 2.476291259662121e-05, "loss": 0.4553, "step": 82200 }, { "epoch": 20.349009900990097, "grad_norm": 0.3237336277961731, "learning_rate": 2.475101862560187e-05, "loss": 0.456, "step": 82210 }, { "epoch": 20.35148514851485, "grad_norm": 0.3169906437397003, "learning_rate": 2.4739126572076027e-05, "loss": 0.4624, "step": 82220 }, { "epoch": 20.353960396039604, "grad_norm": 0.33461570739746094, "learning_rate": 2.47272364369468e-05, "loss": 0.4575, "step": 82230 }, { "epoch": 20.356435643564357, "grad_norm": 0.332351952791214, "learning_rate": 2.4715348221117173e-05, "loss": 0.4569, "step": 82240 }, { "epoch": 20.35891089108911, "grad_norm": 0.3180263042449951, "learning_rate": 2.4703461925490006e-05, "loss": 0.4555, "step": 82250 }, { "epoch": 20.361386138613863, "grad_norm": 0.31821590662002563, "learning_rate": 2.469157755096795e-05, "loss": 0.4603, "step": 82260 }, { "epoch": 20.363861386138613, "grad_norm": 0.31425797939300537, "learning_rate": 2.4679695098453553e-05, "loss": 0.4611, "step": 82270 }, { "epoch": 20.366336633663366, "grad_norm": 0.330145001411438, "learning_rate": 2.4667814568849234e-05, "loss": 0.4551, "step": 82280 }, { "epoch": 20.36881188118812, "grad_norm": 0.32386624813079834, "learning_rate": 2.4655935963057204e-05, "loss": 0.4539, "step": 82290 }, { "epoch": 20.371287128712872, "grad_norm": 0.331983357667923, "learning_rate": 2.4644059281979594e-05, "loss": 0.4524, "step": 82300 }, { "epoch": 20.373762376237625, "grad_norm": 0.3578823506832123, "learning_rate": 2.463218452651833e-05, "loss": 0.4562, "step": 82310 }, { "epoch": 20.376237623762375, "grad_norm": 0.3404834270477295, "learning_rate": 2.462031169757526e-05, "loss": 0.4607, "step": 82320 }, { "epoch": 20.378712871287128, "grad_norm": 0.3360453248023987, "learning_rate": 2.460844079605201e-05, "loss": 0.4593, "step": 82330 }, { "epoch": 20.38118811881188, "grad_norm": 0.36075514554977417, "learning_rate": 2.4596571822850124e-05, "loss": 0.4648, "step": 82340 }, { "epoch": 20.383663366336634, "grad_norm": 0.3420717418193817, "learning_rate": 2.458470477887094e-05, "loss": 0.4554, "step": 82350 }, { "epoch": 20.386138613861387, "grad_norm": 0.3379521667957306, "learning_rate": 2.4572839665015707e-05, "loss": 0.4625, "step": 82360 }, { "epoch": 20.388613861386137, "grad_norm": 0.32363030314445496, "learning_rate": 2.456097648218551e-05, "loss": 0.4576, "step": 82370 }, { "epoch": 20.39108910891089, "grad_norm": 0.32343190908432007, "learning_rate": 2.454911523128125e-05, "loss": 0.455, "step": 82380 }, { "epoch": 20.393564356435643, "grad_norm": 0.3165019452571869, "learning_rate": 2.4537255913203727e-05, "loss": 0.4575, "step": 82390 }, { "epoch": 20.396039603960396, "grad_norm": 0.31510671973228455, "learning_rate": 2.4525398528853598e-05, "loss": 0.461, "step": 82400 }, { "epoch": 20.39851485148515, "grad_norm": 0.3152640163898468, "learning_rate": 2.4513543079131335e-05, "loss": 0.4597, "step": 82410 }, { "epoch": 20.400990099009903, "grad_norm": 0.323141485452652, "learning_rate": 2.4501689564937257e-05, "loss": 0.4641, "step": 82420 }, { "epoch": 20.403465346534652, "grad_norm": 0.3185863196849823, "learning_rate": 2.4489837987171582e-05, "loss": 0.4551, "step": 82430 }, { "epoch": 20.405940594059405, "grad_norm": 0.32287490367889404, "learning_rate": 2.4477988346734376e-05, "loss": 0.4536, "step": 82440 }, { "epoch": 20.40841584158416, "grad_norm": 0.3389348089694977, "learning_rate": 2.4466140644525532e-05, "loss": 0.4642, "step": 82450 }, { "epoch": 20.41089108910891, "grad_norm": 0.3173821270465851, "learning_rate": 2.4454294881444756e-05, "loss": 0.4525, "step": 82460 }, { "epoch": 20.413366336633665, "grad_norm": 0.32416877150535583, "learning_rate": 2.444245105839173e-05, "loss": 0.459, "step": 82470 }, { "epoch": 20.415841584158414, "grad_norm": 0.31801900267601013, "learning_rate": 2.4430609176265883e-05, "loss": 0.4565, "step": 82480 }, { "epoch": 20.418316831683168, "grad_norm": 0.3349255323410034, "learning_rate": 2.441876923596651e-05, "loss": 0.4647, "step": 82490 }, { "epoch": 20.42079207920792, "grad_norm": 0.35078567266464233, "learning_rate": 2.4406931238392795e-05, "loss": 0.459, "step": 82500 }, { "epoch": 20.423267326732674, "grad_norm": 0.31212472915649414, "learning_rate": 2.4395095184443768e-05, "loss": 0.4569, "step": 82510 }, { "epoch": 20.425742574257427, "grad_norm": 0.3204166293144226, "learning_rate": 2.4383261075018284e-05, "loss": 0.458, "step": 82520 }, { "epoch": 20.428217821782177, "grad_norm": 0.33034640550613403, "learning_rate": 2.4371428911015043e-05, "loss": 0.4517, "step": 82530 }, { "epoch": 20.43069306930693, "grad_norm": 0.34183281660079956, "learning_rate": 2.4359598693332647e-05, "loss": 0.4639, "step": 82540 }, { "epoch": 20.433168316831683, "grad_norm": 0.3374174237251282, "learning_rate": 2.4347770422869532e-05, "loss": 0.4567, "step": 82550 }, { "epoch": 20.435643564356436, "grad_norm": 0.32566773891448975, "learning_rate": 2.4335944100523948e-05, "loss": 0.4616, "step": 82560 }, { "epoch": 20.43811881188119, "grad_norm": 0.33529627323150635, "learning_rate": 2.4324119727194043e-05, "loss": 0.4519, "step": 82570 }, { "epoch": 20.440594059405942, "grad_norm": 0.326928973197937, "learning_rate": 2.4312297303777815e-05, "loss": 0.4624, "step": 82580 }, { "epoch": 20.443069306930692, "grad_norm": 0.30588242411613464, "learning_rate": 2.4300476831173085e-05, "loss": 0.4595, "step": 82590 }, { "epoch": 20.445544554455445, "grad_norm": 0.31665048003196716, "learning_rate": 2.4288658310277518e-05, "loss": 0.4571, "step": 82600 }, { "epoch": 20.448019801980198, "grad_norm": 0.3274935781955719, "learning_rate": 2.427684174198867e-05, "loss": 0.4581, "step": 82610 }, { "epoch": 20.45049504950495, "grad_norm": 0.33090993762016296, "learning_rate": 2.426502712720396e-05, "loss": 0.4577, "step": 82620 }, { "epoch": 20.452970297029704, "grad_norm": 0.34682542085647583, "learning_rate": 2.4253214466820577e-05, "loss": 0.4574, "step": 82630 }, { "epoch": 20.455445544554454, "grad_norm": 0.327426552772522, "learning_rate": 2.424140376173566e-05, "loss": 0.4619, "step": 82640 }, { "epoch": 20.457920792079207, "grad_norm": 0.31893643736839294, "learning_rate": 2.4229595012846117e-05, "loss": 0.4561, "step": 82650 }, { "epoch": 20.46039603960396, "grad_norm": 0.3301022946834564, "learning_rate": 2.421778822104877e-05, "loss": 0.4625, "step": 82660 }, { "epoch": 20.462871287128714, "grad_norm": 0.3037640154361725, "learning_rate": 2.4205983387240274e-05, "loss": 0.4525, "step": 82670 }, { "epoch": 20.465346534653467, "grad_norm": 0.32621708512306213, "learning_rate": 2.4194180512317095e-05, "loss": 0.4553, "step": 82680 }, { "epoch": 20.467821782178216, "grad_norm": 0.3499085009098053, "learning_rate": 2.418237959717562e-05, "loss": 0.4589, "step": 82690 }, { "epoch": 20.47029702970297, "grad_norm": 0.3272117078304291, "learning_rate": 2.4170580642712016e-05, "loss": 0.4564, "step": 82700 }, { "epoch": 20.472772277227723, "grad_norm": 0.3113751709461212, "learning_rate": 2.415878364982237e-05, "loss": 0.4599, "step": 82710 }, { "epoch": 20.475247524752476, "grad_norm": 0.3404912054538727, "learning_rate": 2.414698861940255e-05, "loss": 0.4657, "step": 82720 }, { "epoch": 20.47772277227723, "grad_norm": 0.30344656109809875, "learning_rate": 2.4135195552348327e-05, "loss": 0.459, "step": 82730 }, { "epoch": 20.480198019801982, "grad_norm": 0.3660331070423126, "learning_rate": 2.412340444955533e-05, "loss": 0.4546, "step": 82740 }, { "epoch": 20.48267326732673, "grad_norm": 0.3124750256538391, "learning_rate": 2.4111615311919e-05, "loss": 0.4537, "step": 82750 }, { "epoch": 20.485148514851485, "grad_norm": 0.33871370553970337, "learning_rate": 2.4099828140334595e-05, "loss": 0.4564, "step": 82760 }, { "epoch": 20.487623762376238, "grad_norm": 0.3038397431373596, "learning_rate": 2.4088042935697353e-05, "loss": 0.4605, "step": 82770 }, { "epoch": 20.49009900990099, "grad_norm": 0.31947460770606995, "learning_rate": 2.407625969890225e-05, "loss": 0.4579, "step": 82780 }, { "epoch": 20.492574257425744, "grad_norm": 0.31310921907424927, "learning_rate": 2.4064478430844118e-05, "loss": 0.4534, "step": 82790 }, { "epoch": 20.495049504950494, "grad_norm": 0.30814027786254883, "learning_rate": 2.4052699132417696e-05, "loss": 0.453, "step": 82800 }, { "epoch": 20.497524752475247, "grad_norm": 0.30352410674095154, "learning_rate": 2.404092180451755e-05, "loss": 0.4546, "step": 82810 }, { "epoch": 20.5, "grad_norm": 0.3269056975841522, "learning_rate": 2.402914644803808e-05, "loss": 0.4557, "step": 82820 }, { "epoch": 20.502475247524753, "grad_norm": 0.32221925258636475, "learning_rate": 2.4017373063873526e-05, "loss": 0.4601, "step": 82830 }, { "epoch": 20.504950495049506, "grad_norm": 0.3359532654285431, "learning_rate": 2.400560165291802e-05, "loss": 0.4517, "step": 82840 }, { "epoch": 20.507425742574256, "grad_norm": 0.31540313363075256, "learning_rate": 2.3993832216065536e-05, "loss": 0.4609, "step": 82850 }, { "epoch": 20.50990099009901, "grad_norm": 0.3334510326385498, "learning_rate": 2.398206475420987e-05, "loss": 0.4607, "step": 82860 }, { "epoch": 20.512376237623762, "grad_norm": 0.334190309047699, "learning_rate": 2.3970299268244645e-05, "loss": 0.4567, "step": 82870 }, { "epoch": 20.514851485148515, "grad_norm": 0.32693901658058167, "learning_rate": 2.3958535759063443e-05, "loss": 0.4573, "step": 82880 }, { "epoch": 20.51732673267327, "grad_norm": 0.313210666179657, "learning_rate": 2.3946774227559597e-05, "loss": 0.4546, "step": 82890 }, { "epoch": 20.519801980198018, "grad_norm": 0.33132442831993103, "learning_rate": 2.3935014674626295e-05, "loss": 0.4577, "step": 82900 }, { "epoch": 20.52227722772277, "grad_norm": 0.3522726893424988, "learning_rate": 2.392325710115661e-05, "loss": 0.452, "step": 82910 }, { "epoch": 20.524752475247524, "grad_norm": 0.33384472131729126, "learning_rate": 2.3911501508043476e-05, "loss": 0.4599, "step": 82920 }, { "epoch": 20.527227722772277, "grad_norm": 0.31242358684539795, "learning_rate": 2.3899747896179614e-05, "loss": 0.4547, "step": 82930 }, { "epoch": 20.52970297029703, "grad_norm": 0.32135123014450073, "learning_rate": 2.3887996266457674e-05, "loss": 0.4551, "step": 82940 }, { "epoch": 20.532178217821784, "grad_norm": 0.3051261901855469, "learning_rate": 2.3876246619770077e-05, "loss": 0.4534, "step": 82950 }, { "epoch": 20.534653465346533, "grad_norm": 0.33462148904800415, "learning_rate": 2.3864498957009164e-05, "loss": 0.4545, "step": 82960 }, { "epoch": 20.537128712871286, "grad_norm": 0.3280618488788605, "learning_rate": 2.3852753279067054e-05, "loss": 0.4583, "step": 82970 }, { "epoch": 20.53960396039604, "grad_norm": 0.33726587891578674, "learning_rate": 2.384100958683578e-05, "loss": 0.4524, "step": 82980 }, { "epoch": 20.542079207920793, "grad_norm": 0.31293052434921265, "learning_rate": 2.3829267881207205e-05, "loss": 0.4596, "step": 82990 }, { "epoch": 20.544554455445546, "grad_norm": 0.33222296833992004, "learning_rate": 2.381752816307301e-05, "loss": 0.4612, "step": 83000 }, { "epoch": 20.547029702970296, "grad_norm": 0.32528457045555115, "learning_rate": 2.3805790433324777e-05, "loss": 0.4602, "step": 83010 }, { "epoch": 20.54950495049505, "grad_norm": 0.34600338339805603, "learning_rate": 2.379405469285388e-05, "loss": 0.4562, "step": 83020 }, { "epoch": 20.551980198019802, "grad_norm": 0.3159213662147522, "learning_rate": 2.3782320942551578e-05, "loss": 0.4549, "step": 83030 }, { "epoch": 20.554455445544555, "grad_norm": 0.32502615451812744, "learning_rate": 2.3770589183309005e-05, "loss": 0.4616, "step": 83040 }, { "epoch": 20.556930693069308, "grad_norm": 0.3209330439567566, "learning_rate": 2.3758859416017076e-05, "loss": 0.4533, "step": 83050 }, { "epoch": 20.55940594059406, "grad_norm": 0.33050617575645447, "learning_rate": 2.374713164156659e-05, "loss": 0.4613, "step": 83060 }, { "epoch": 20.56188118811881, "grad_norm": 0.3317410349845886, "learning_rate": 2.3735405860848194e-05, "loss": 0.4616, "step": 83070 }, { "epoch": 20.564356435643564, "grad_norm": 0.3280683755874634, "learning_rate": 2.372368207475241e-05, "loss": 0.4608, "step": 83080 }, { "epoch": 20.566831683168317, "grad_norm": 0.3129073977470398, "learning_rate": 2.371196028416955e-05, "loss": 0.4601, "step": 83090 }, { "epoch": 20.56930693069307, "grad_norm": 0.3145526349544525, "learning_rate": 2.3700240489989817e-05, "loss": 0.4617, "step": 83100 }, { "epoch": 20.571782178217823, "grad_norm": 0.32869839668273926, "learning_rate": 2.3688522693103278e-05, "loss": 0.4605, "step": 83110 }, { "epoch": 20.574257425742573, "grad_norm": 0.3180861473083496, "learning_rate": 2.3676806894399795e-05, "loss": 0.4574, "step": 83120 }, { "epoch": 20.576732673267326, "grad_norm": 0.31232383847236633, "learning_rate": 2.366509309476909e-05, "loss": 0.458, "step": 83130 }, { "epoch": 20.57920792079208, "grad_norm": 0.32472315430641174, "learning_rate": 2.3653381295100774e-05, "loss": 0.4584, "step": 83140 }, { "epoch": 20.581683168316832, "grad_norm": 0.32693639397621155, "learning_rate": 2.3641671496284297e-05, "loss": 0.462, "step": 83150 }, { "epoch": 20.584158415841586, "grad_norm": 0.34212443232536316, "learning_rate": 2.3629963699208918e-05, "loss": 0.461, "step": 83160 }, { "epoch": 20.586633663366335, "grad_norm": 0.32884514331817627, "learning_rate": 2.361825790476373e-05, "loss": 0.4571, "step": 83170 }, { "epoch": 20.58910891089109, "grad_norm": 0.3144597113132477, "learning_rate": 2.3606554113837785e-05, "loss": 0.4543, "step": 83180 }, { "epoch": 20.59158415841584, "grad_norm": 0.30928030610084534, "learning_rate": 2.359485232731988e-05, "loss": 0.4633, "step": 83190 }, { "epoch": 20.594059405940595, "grad_norm": 0.33591780066490173, "learning_rate": 2.358315254609866e-05, "loss": 0.458, "step": 83200 }, { "epoch": 20.596534653465348, "grad_norm": 0.3153681755065918, "learning_rate": 2.357145477106266e-05, "loss": 0.4588, "step": 83210 }, { "epoch": 20.599009900990097, "grad_norm": 0.3254019021987915, "learning_rate": 2.3559759003100285e-05, "loss": 0.4552, "step": 83220 }, { "epoch": 20.60148514851485, "grad_norm": 0.3110259175300598, "learning_rate": 2.3548065243099703e-05, "loss": 0.4584, "step": 83230 }, { "epoch": 20.603960396039604, "grad_norm": 0.3310171067714691, "learning_rate": 2.3536373491949014e-05, "loss": 0.455, "step": 83240 }, { "epoch": 20.606435643564357, "grad_norm": 0.324693500995636, "learning_rate": 2.3524683750536096e-05, "loss": 0.456, "step": 83250 }, { "epoch": 20.60891089108911, "grad_norm": 0.3240879774093628, "learning_rate": 2.3512996019748752e-05, "loss": 0.4581, "step": 83260 }, { "epoch": 20.611386138613863, "grad_norm": 0.3400302827358246, "learning_rate": 2.3501310300474532e-05, "loss": 0.4545, "step": 83270 }, { "epoch": 20.613861386138613, "grad_norm": 0.31578248739242554, "learning_rate": 2.348962659360092e-05, "loss": 0.4626, "step": 83280 }, { "epoch": 20.616336633663366, "grad_norm": 0.3442944288253784, "learning_rate": 2.3477944900015236e-05, "loss": 0.4615, "step": 83290 }, { "epoch": 20.61881188118812, "grad_norm": 0.31674912571907043, "learning_rate": 2.3466265220604588e-05, "loss": 0.4567, "step": 83300 }, { "epoch": 20.621287128712872, "grad_norm": 0.30270448327064514, "learning_rate": 2.3454587556256008e-05, "loss": 0.4589, "step": 83310 }, { "epoch": 20.623762376237625, "grad_norm": 0.3185817003250122, "learning_rate": 2.3442911907856302e-05, "loss": 0.4576, "step": 83320 }, { "epoch": 20.626237623762375, "grad_norm": 0.3094819188117981, "learning_rate": 2.343123827629219e-05, "loss": 0.4526, "step": 83330 }, { "epoch": 20.628712871287128, "grad_norm": 0.3123879134654999, "learning_rate": 2.3419566662450176e-05, "loss": 0.4551, "step": 83340 }, { "epoch": 20.63118811881188, "grad_norm": 0.31312626600265503, "learning_rate": 2.3407897067216677e-05, "loss": 0.4581, "step": 83350 }, { "epoch": 20.633663366336634, "grad_norm": 0.3127352297306061, "learning_rate": 2.3396229491477885e-05, "loss": 0.4566, "step": 83360 }, { "epoch": 20.636138613861387, "grad_norm": 0.3161003589630127, "learning_rate": 2.338456393611989e-05, "loss": 0.4607, "step": 83370 }, { "epoch": 20.638613861386137, "grad_norm": 0.31436827778816223, "learning_rate": 2.3372900402028635e-05, "loss": 0.4565, "step": 83380 }, { "epoch": 20.64108910891089, "grad_norm": 0.340269535779953, "learning_rate": 2.3361238890089858e-05, "loss": 0.462, "step": 83390 }, { "epoch": 20.643564356435643, "grad_norm": 0.3375588357448578, "learning_rate": 2.334957940118918e-05, "loss": 0.4576, "step": 83400 }, { "epoch": 20.646039603960396, "grad_norm": 0.33224835991859436, "learning_rate": 2.33379219362121e-05, "loss": 0.4574, "step": 83410 }, { "epoch": 20.64851485148515, "grad_norm": 0.3409093916416168, "learning_rate": 2.332626649604388e-05, "loss": 0.4583, "step": 83420 }, { "epoch": 20.650990099009903, "grad_norm": 0.3019517958164215, "learning_rate": 2.331461308156968e-05, "loss": 0.4608, "step": 83430 }, { "epoch": 20.653465346534652, "grad_norm": 0.3030034601688385, "learning_rate": 2.3302961693674502e-05, "loss": 0.4574, "step": 83440 }, { "epoch": 20.655940594059405, "grad_norm": 0.3643341064453125, "learning_rate": 2.329131233324322e-05, "loss": 0.4619, "step": 83450 }, { "epoch": 20.65841584158416, "grad_norm": 0.3788546025753021, "learning_rate": 2.32796650011605e-05, "loss": 0.4564, "step": 83460 }, { "epoch": 20.66089108910891, "grad_norm": 0.34069305658340454, "learning_rate": 2.326801969831085e-05, "loss": 0.4612, "step": 83470 }, { "epoch": 20.663366336633665, "grad_norm": 0.3363094627857208, "learning_rate": 2.325637642557873e-05, "loss": 0.4563, "step": 83480 }, { "epoch": 20.665841584158414, "grad_norm": 0.32588374614715576, "learning_rate": 2.3244735183848322e-05, "loss": 0.4599, "step": 83490 }, { "epoch": 20.668316831683168, "grad_norm": 0.3285391330718994, "learning_rate": 2.3233095974003693e-05, "loss": 0.4581, "step": 83500 }, { "epoch": 20.67079207920792, "grad_norm": 0.3523057699203491, "learning_rate": 2.3221458796928773e-05, "loss": 0.4554, "step": 83510 }, { "epoch": 20.673267326732674, "grad_norm": 0.333565354347229, "learning_rate": 2.3209823653507358e-05, "loss": 0.4618, "step": 83520 }, { "epoch": 20.675742574257427, "grad_norm": 0.32638323307037354, "learning_rate": 2.3198190544623032e-05, "loss": 0.4556, "step": 83530 }, { "epoch": 20.678217821782177, "grad_norm": 0.31246688961982727, "learning_rate": 2.318655947115924e-05, "loss": 0.4573, "step": 83540 }, { "epoch": 20.68069306930693, "grad_norm": 0.3077695965766907, "learning_rate": 2.317493043399931e-05, "loss": 0.4576, "step": 83550 }, { "epoch": 20.683168316831683, "grad_norm": 0.3183000385761261, "learning_rate": 2.3163303434026395e-05, "loss": 0.4578, "step": 83560 }, { "epoch": 20.685643564356436, "grad_norm": 0.3188183009624481, "learning_rate": 2.315167847212346e-05, "loss": 0.4583, "step": 83570 }, { "epoch": 20.68811881188119, "grad_norm": 0.2965545058250427, "learning_rate": 2.3140055549173363e-05, "loss": 0.4554, "step": 83580 }, { "epoch": 20.69059405940594, "grad_norm": 0.30483996868133545, "learning_rate": 2.312843466605881e-05, "loss": 0.46, "step": 83590 }, { "epoch": 20.693069306930692, "grad_norm": 0.30476436018943787, "learning_rate": 2.311681582366228e-05, "loss": 0.4561, "step": 83600 }, { "epoch": 20.695544554455445, "grad_norm": 0.3132075071334839, "learning_rate": 2.310519902286619e-05, "loss": 0.4574, "step": 83610 }, { "epoch": 20.698019801980198, "grad_norm": 0.3071272671222687, "learning_rate": 2.3093584264552738e-05, "loss": 0.4554, "step": 83620 }, { "epoch": 20.70049504950495, "grad_norm": 0.31987500190734863, "learning_rate": 2.3081971549604003e-05, "loss": 0.4588, "step": 83630 }, { "epoch": 20.702970297029704, "grad_norm": 0.30617424845695496, "learning_rate": 2.307036087890187e-05, "loss": 0.4603, "step": 83640 }, { "epoch": 20.705445544554454, "grad_norm": 0.3098512291908264, "learning_rate": 2.305875225332813e-05, "loss": 0.4523, "step": 83650 }, { "epoch": 20.707920792079207, "grad_norm": 0.32032284140586853, "learning_rate": 2.3047145673764336e-05, "loss": 0.4579, "step": 83660 }, { "epoch": 20.71039603960396, "grad_norm": 0.32340165972709656, "learning_rate": 2.303554114109196e-05, "loss": 0.4617, "step": 83670 }, { "epoch": 20.712871287128714, "grad_norm": 0.3048284947872162, "learning_rate": 2.3023938656192296e-05, "loss": 0.455, "step": 83680 }, { "epoch": 20.715346534653467, "grad_norm": 0.340138703584671, "learning_rate": 2.301233821994645e-05, "loss": 0.4571, "step": 83690 }, { "epoch": 20.717821782178216, "grad_norm": 0.3256269097328186, "learning_rate": 2.300073983323543e-05, "loss": 0.4601, "step": 83700 }, { "epoch": 20.72029702970297, "grad_norm": 0.3054274916648865, "learning_rate": 2.2989143496940025e-05, "loss": 0.4611, "step": 83710 }, { "epoch": 20.722772277227723, "grad_norm": 0.32706180214881897, "learning_rate": 2.297754921194093e-05, "loss": 0.4547, "step": 83720 }, { "epoch": 20.725247524752476, "grad_norm": 0.29923662543296814, "learning_rate": 2.2965956979118624e-05, "loss": 0.4604, "step": 83730 }, { "epoch": 20.72772277227723, "grad_norm": 0.31547465920448303, "learning_rate": 2.2954366799353478e-05, "loss": 0.4608, "step": 83740 }, { "epoch": 20.730198019801982, "grad_norm": 0.3232886493206024, "learning_rate": 2.29427786735257e-05, "loss": 0.4587, "step": 83750 }, { "epoch": 20.73267326732673, "grad_norm": 0.3089028596878052, "learning_rate": 2.293119260251532e-05, "loss": 0.4567, "step": 83760 }, { "epoch": 20.735148514851485, "grad_norm": 0.32402288913726807, "learning_rate": 2.291960858720219e-05, "loss": 0.4551, "step": 83770 }, { "epoch": 20.737623762376238, "grad_norm": 0.340626060962677, "learning_rate": 2.2908026628466113e-05, "loss": 0.4589, "step": 83780 }, { "epoch": 20.74009900990099, "grad_norm": 0.3107551038265228, "learning_rate": 2.2896446727186617e-05, "loss": 0.4604, "step": 83790 }, { "epoch": 20.742574257425744, "grad_norm": 0.3312685191631317, "learning_rate": 2.2884868884243105e-05, "loss": 0.4567, "step": 83800 }, { "epoch": 20.745049504950494, "grad_norm": 0.3366631269454956, "learning_rate": 2.287329310051486e-05, "loss": 0.4589, "step": 83810 }, { "epoch": 20.747524752475247, "grad_norm": 0.37838876247406006, "learning_rate": 2.2861719376881e-05, "loss": 0.462, "step": 83820 }, { "epoch": 20.75, "grad_norm": 0.32119518518447876, "learning_rate": 2.285014771422046e-05, "loss": 0.4619, "step": 83830 }, { "epoch": 20.752475247524753, "grad_norm": 0.31526270508766174, "learning_rate": 2.2838578113412008e-05, "loss": 0.457, "step": 83840 }, { "epoch": 20.754950495049506, "grad_norm": 0.31096509099006653, "learning_rate": 2.282701057533429e-05, "loss": 0.4578, "step": 83850 }, { "epoch": 20.757425742574256, "grad_norm": 0.32531142234802246, "learning_rate": 2.2815445100865823e-05, "loss": 0.4571, "step": 83860 }, { "epoch": 20.75990099009901, "grad_norm": 0.3171260356903076, "learning_rate": 2.280388169088487e-05, "loss": 0.4583, "step": 83870 }, { "epoch": 20.762376237623762, "grad_norm": 0.30660203099250793, "learning_rate": 2.279232034626963e-05, "loss": 0.459, "step": 83880 }, { "epoch": 20.764851485148515, "grad_norm": 0.33027374744415283, "learning_rate": 2.278076106789812e-05, "loss": 0.4596, "step": 83890 }, { "epoch": 20.76732673267327, "grad_norm": 0.3119077980518341, "learning_rate": 2.276920385664818e-05, "loss": 0.4567, "step": 83900 }, { "epoch": 20.769801980198018, "grad_norm": 0.3196277618408203, "learning_rate": 2.275764871339748e-05, "loss": 0.4566, "step": 83910 }, { "epoch": 20.77227722772277, "grad_norm": 0.3033137321472168, "learning_rate": 2.274609563902357e-05, "loss": 0.4579, "step": 83920 }, { "epoch": 20.774752475247524, "grad_norm": 0.33783918619155884, "learning_rate": 2.273454463440386e-05, "loss": 0.461, "step": 83930 }, { "epoch": 20.777227722772277, "grad_norm": 0.3394819498062134, "learning_rate": 2.2722995700415527e-05, "loss": 0.4553, "step": 83940 }, { "epoch": 20.77970297029703, "grad_norm": 0.323654443025589, "learning_rate": 2.2711448837935677e-05, "loss": 0.4609, "step": 83950 }, { "epoch": 20.782178217821784, "grad_norm": 0.3141268789768219, "learning_rate": 2.2699904047841187e-05, "loss": 0.4562, "step": 83960 }, { "epoch": 20.784653465346533, "grad_norm": 0.3304513990879059, "learning_rate": 2.268836133100881e-05, "loss": 0.4572, "step": 83970 }, { "epoch": 20.787128712871286, "grad_norm": 0.3101342022418976, "learning_rate": 2.2676820688315176e-05, "loss": 0.4574, "step": 83980 }, { "epoch": 20.78960396039604, "grad_norm": 0.3038616180419922, "learning_rate": 2.266528212063667e-05, "loss": 0.4553, "step": 83990 }, { "epoch": 20.792079207920793, "grad_norm": 0.30607470870018005, "learning_rate": 2.265374562884961e-05, "loss": 0.4547, "step": 84000 }, { "epoch": 20.794554455445546, "grad_norm": 0.32626059651374817, "learning_rate": 2.2642211213830084e-05, "loss": 0.4543, "step": 84010 }, { "epoch": 20.797029702970296, "grad_norm": 0.32187339663505554, "learning_rate": 2.263067887645409e-05, "loss": 0.4596, "step": 84020 }, { "epoch": 20.79950495049505, "grad_norm": 0.34992897510528564, "learning_rate": 2.2619148617597397e-05, "loss": 0.4612, "step": 84030 }, { "epoch": 20.801980198019802, "grad_norm": 0.31812742352485657, "learning_rate": 2.2607620438135664e-05, "loss": 0.4544, "step": 84040 }, { "epoch": 20.804455445544555, "grad_norm": 0.3142867088317871, "learning_rate": 2.2596094338944407e-05, "loss": 0.4552, "step": 84050 }, { "epoch": 20.806930693069308, "grad_norm": 0.30933597683906555, "learning_rate": 2.2584570320898935e-05, "loss": 0.4551, "step": 84060 }, { "epoch": 20.80940594059406, "grad_norm": 0.3355408012866974, "learning_rate": 2.2573048384874397e-05, "loss": 0.4547, "step": 84070 }, { "epoch": 20.81188118811881, "grad_norm": 0.3222861886024475, "learning_rate": 2.2561528531745835e-05, "loss": 0.4558, "step": 84080 }, { "epoch": 20.814356435643564, "grad_norm": 0.31400761008262634, "learning_rate": 2.2550010762388125e-05, "loss": 0.4544, "step": 84090 }, { "epoch": 20.816831683168317, "grad_norm": 0.3166002929210663, "learning_rate": 2.253849507767592e-05, "loss": 0.461, "step": 84100 }, { "epoch": 20.81930693069307, "grad_norm": 0.3030269742012024, "learning_rate": 2.252698147848379e-05, "loss": 0.4603, "step": 84110 }, { "epoch": 20.821782178217823, "grad_norm": 0.32059013843536377, "learning_rate": 2.251546996568612e-05, "loss": 0.4558, "step": 84120 }, { "epoch": 20.824257425742573, "grad_norm": 0.3106051981449127, "learning_rate": 2.2503960540157133e-05, "loss": 0.4547, "step": 84130 }, { "epoch": 20.826732673267326, "grad_norm": 0.306813508272171, "learning_rate": 2.2492453202770858e-05, "loss": 0.4571, "step": 84140 }, { "epoch": 20.82920792079208, "grad_norm": 0.31588295102119446, "learning_rate": 2.2480947954401233e-05, "loss": 0.4606, "step": 84150 }, { "epoch": 20.831683168316832, "grad_norm": 0.314955472946167, "learning_rate": 2.246944479592202e-05, "loss": 0.4576, "step": 84160 }, { "epoch": 20.834158415841586, "grad_norm": 0.3066965937614441, "learning_rate": 2.2457943728206786e-05, "loss": 0.4603, "step": 84170 }, { "epoch": 20.836633663366335, "grad_norm": 0.31845638155937195, "learning_rate": 2.2446444752128932e-05, "loss": 0.458, "step": 84180 }, { "epoch": 20.83910891089109, "grad_norm": 0.3321104645729065, "learning_rate": 2.243494786856179e-05, "loss": 0.4558, "step": 84190 }, { "epoch": 20.84158415841584, "grad_norm": 0.3251655399799347, "learning_rate": 2.2423453078378453e-05, "loss": 0.4555, "step": 84200 }, { "epoch": 20.844059405940595, "grad_norm": 0.31702756881713867, "learning_rate": 2.241196038245184e-05, "loss": 0.455, "step": 84210 }, { "epoch": 20.846534653465348, "grad_norm": 0.3061336278915405, "learning_rate": 2.2400469781654776e-05, "loss": 0.4596, "step": 84220 }, { "epoch": 20.849009900990097, "grad_norm": 0.3340380787849426, "learning_rate": 2.2388981276859905e-05, "loss": 0.458, "step": 84230 }, { "epoch": 20.85148514851485, "grad_norm": 0.30981355905532837, "learning_rate": 2.2377494868939673e-05, "loss": 0.461, "step": 84240 }, { "epoch": 20.853960396039604, "grad_norm": 0.3172255754470825, "learning_rate": 2.2366010558766432e-05, "loss": 0.4567, "step": 84250 }, { "epoch": 20.856435643564357, "grad_norm": 0.34511324763298035, "learning_rate": 2.23545283472123e-05, "loss": 0.4566, "step": 84260 }, { "epoch": 20.85891089108911, "grad_norm": 0.3650362491607666, "learning_rate": 2.2343048235149317e-05, "loss": 0.461, "step": 84270 }, { "epoch": 20.861386138613863, "grad_norm": 0.3280099630355835, "learning_rate": 2.2331570223449283e-05, "loss": 0.4573, "step": 84280 }, { "epoch": 20.863861386138613, "grad_norm": 0.3112821578979492, "learning_rate": 2.2320094312983892e-05, "loss": 0.4595, "step": 84290 }, { "epoch": 20.866336633663366, "grad_norm": 0.31264030933380127, "learning_rate": 2.2308620504624683e-05, "loss": 0.4553, "step": 84300 }, { "epoch": 20.86881188118812, "grad_norm": 0.31348416209220886, "learning_rate": 2.2297148799242984e-05, "loss": 0.457, "step": 84310 }, { "epoch": 20.871287128712872, "grad_norm": 0.3264022171497345, "learning_rate": 2.228567919771003e-05, "loss": 0.4587, "step": 84320 }, { "epoch": 20.873762376237625, "grad_norm": 0.32387205958366394, "learning_rate": 2.2274211700896817e-05, "loss": 0.4566, "step": 84330 }, { "epoch": 20.876237623762375, "grad_norm": 0.31463906168937683, "learning_rate": 2.2262746309674258e-05, "loss": 0.4592, "step": 84340 }, { "epoch": 20.878712871287128, "grad_norm": 0.30236825346946716, "learning_rate": 2.2251283024913087e-05, "loss": 0.4564, "step": 84350 }, { "epoch": 20.88118811881188, "grad_norm": 0.30053243041038513, "learning_rate": 2.2239821847483844e-05, "loss": 0.4533, "step": 84360 }, { "epoch": 20.883663366336634, "grad_norm": 0.31896132230758667, "learning_rate": 2.2228362778256906e-05, "loss": 0.4546, "step": 84370 }, { "epoch": 20.886138613861387, "grad_norm": 0.34427258372306824, "learning_rate": 2.221690581810254e-05, "loss": 0.4578, "step": 84380 }, { "epoch": 20.888613861386137, "grad_norm": 0.31658196449279785, "learning_rate": 2.2205450967890847e-05, "loss": 0.4615, "step": 84390 }, { "epoch": 20.89108910891089, "grad_norm": 0.3050771951675415, "learning_rate": 2.2193998228491704e-05, "loss": 0.4569, "step": 84400 }, { "epoch": 20.893564356435643, "grad_norm": 0.3081858158111572, "learning_rate": 2.2182547600774894e-05, "loss": 0.458, "step": 84410 }, { "epoch": 20.896039603960396, "grad_norm": 0.35845935344696045, "learning_rate": 2.2171099085610032e-05, "loss": 0.456, "step": 84420 }, { "epoch": 20.89851485148515, "grad_norm": 0.2939111888408661, "learning_rate": 2.2159652683866537e-05, "loss": 0.4565, "step": 84430 }, { "epoch": 20.900990099009903, "grad_norm": 0.3071082532405853, "learning_rate": 2.2148208396413657e-05, "loss": 0.4559, "step": 84440 }, { "epoch": 20.903465346534652, "grad_norm": 0.3551214933395386, "learning_rate": 2.2136766224120576e-05, "loss": 0.4542, "step": 84450 }, { "epoch": 20.905940594059405, "grad_norm": 0.32743266224861145, "learning_rate": 2.2125326167856225e-05, "loss": 0.4569, "step": 84460 }, { "epoch": 20.90841584158416, "grad_norm": 0.3147239685058594, "learning_rate": 2.2113888228489386e-05, "loss": 0.4555, "step": 84470 }, { "epoch": 20.91089108910891, "grad_norm": 0.31416553258895874, "learning_rate": 2.2102452406888674e-05, "loss": 0.4641, "step": 84480 }, { "epoch": 20.913366336633665, "grad_norm": 0.29073548316955566, "learning_rate": 2.209101870392263e-05, "loss": 0.4573, "step": 84490 }, { "epoch": 20.915841584158414, "grad_norm": 0.3080594837665558, "learning_rate": 2.2079587120459533e-05, "loss": 0.4608, "step": 84500 }, { "epoch": 20.918316831683168, "grad_norm": 0.328267365694046, "learning_rate": 2.206815765736751e-05, "loss": 0.457, "step": 84510 }, { "epoch": 20.92079207920792, "grad_norm": 0.35034260153770447, "learning_rate": 2.205673031551459e-05, "loss": 0.4577, "step": 84520 }, { "epoch": 20.923267326732674, "grad_norm": 0.3104664087295532, "learning_rate": 2.2045305095768602e-05, "loss": 0.4603, "step": 84530 }, { "epoch": 20.925742574257427, "grad_norm": 0.3047553598880768, "learning_rate": 2.2033881998997215e-05, "loss": 0.4599, "step": 84540 }, { "epoch": 20.928217821782177, "grad_norm": 0.3149332106113434, "learning_rate": 2.2022461026067887e-05, "loss": 0.4541, "step": 84550 }, { "epoch": 20.93069306930693, "grad_norm": 0.33116835355758667, "learning_rate": 2.2011042177848047e-05, "loss": 0.4547, "step": 84560 }, { "epoch": 20.933168316831683, "grad_norm": 0.31742268800735474, "learning_rate": 2.199962545520484e-05, "loss": 0.4569, "step": 84570 }, { "epoch": 20.935643564356436, "grad_norm": 0.3293824791908264, "learning_rate": 2.1988210859005275e-05, "loss": 0.4548, "step": 84580 }, { "epoch": 20.93811881188119, "grad_norm": 0.35563337802886963, "learning_rate": 2.1976798390116237e-05, "loss": 0.4639, "step": 84590 }, { "epoch": 20.94059405940594, "grad_norm": 0.30239564180374146, "learning_rate": 2.196538804940444e-05, "loss": 0.4566, "step": 84600 }, { "epoch": 20.943069306930692, "grad_norm": 0.35270747542381287, "learning_rate": 2.1953979837736394e-05, "loss": 0.4567, "step": 84610 }, { "epoch": 20.945544554455445, "grad_norm": 0.31651630997657776, "learning_rate": 2.1942573755978506e-05, "loss": 0.4596, "step": 84620 }, { "epoch": 20.948019801980198, "grad_norm": 0.3335171639919281, "learning_rate": 2.1931169804996958e-05, "loss": 0.4577, "step": 84630 }, { "epoch": 20.95049504950495, "grad_norm": 0.31310904026031494, "learning_rate": 2.191976798565785e-05, "loss": 0.4647, "step": 84640 }, { "epoch": 20.952970297029704, "grad_norm": 0.3197125792503357, "learning_rate": 2.1908368298827024e-05, "loss": 0.4623, "step": 84650 }, { "epoch": 20.955445544554454, "grad_norm": 0.34191587567329407, "learning_rate": 2.1896970745370242e-05, "loss": 0.4564, "step": 84660 }, { "epoch": 20.957920792079207, "grad_norm": 0.31918594241142273, "learning_rate": 2.188557532615309e-05, "loss": 0.4578, "step": 84670 }, { "epoch": 20.96039603960396, "grad_norm": 0.341836154460907, "learning_rate": 2.187418204204093e-05, "loss": 0.4555, "step": 84680 }, { "epoch": 20.962871287128714, "grad_norm": 0.29798638820648193, "learning_rate": 2.186279089389905e-05, "loss": 0.4578, "step": 84690 }, { "epoch": 20.965346534653467, "grad_norm": 0.31156125664711, "learning_rate": 2.1851401882592493e-05, "loss": 0.459, "step": 84700 }, { "epoch": 20.967821782178216, "grad_norm": 0.30520662665367126, "learning_rate": 2.1840015008986198e-05, "loss": 0.4567, "step": 84710 }, { "epoch": 20.97029702970297, "grad_norm": 0.31846052408218384, "learning_rate": 2.1828630273944938e-05, "loss": 0.4564, "step": 84720 }, { "epoch": 20.972772277227723, "grad_norm": 0.31270766258239746, "learning_rate": 2.1817247678333295e-05, "loss": 0.4535, "step": 84730 }, { "epoch": 20.975247524752476, "grad_norm": 0.30446818470954895, "learning_rate": 2.1805867223015685e-05, "loss": 0.4566, "step": 84740 }, { "epoch": 20.97772277227723, "grad_norm": 0.3095996677875519, "learning_rate": 2.1794488908856398e-05, "loss": 0.4578, "step": 84750 }, { "epoch": 20.980198019801982, "grad_norm": 0.30938294529914856, "learning_rate": 2.178311273671955e-05, "loss": 0.4591, "step": 84760 }, { "epoch": 20.98267326732673, "grad_norm": 0.30667197704315186, "learning_rate": 2.1771738707469057e-05, "loss": 0.4558, "step": 84770 }, { "epoch": 20.985148514851485, "grad_norm": 0.3574204444885254, "learning_rate": 2.1760366821968724e-05, "loss": 0.4599, "step": 84780 }, { "epoch": 20.987623762376238, "grad_norm": 0.30074945092201233, "learning_rate": 2.1748997081082178e-05, "loss": 0.4575, "step": 84790 }, { "epoch": 20.99009900990099, "grad_norm": 0.32159623503685, "learning_rate": 2.1737629485672865e-05, "loss": 0.4542, "step": 84800 }, { "epoch": 20.992574257425744, "grad_norm": 0.3475413918495178, "learning_rate": 2.1726264036604062e-05, "loss": 0.4552, "step": 84810 }, { "epoch": 20.995049504950494, "grad_norm": 0.3047974407672882, "learning_rate": 2.1714900734738914e-05, "loss": 0.4542, "step": 84820 }, { "epoch": 20.997524752475247, "grad_norm": 0.2963158190250397, "learning_rate": 2.1703539580940406e-05, "loss": 0.4584, "step": 84830 }, { "epoch": 21.0, "grad_norm": 0.31065648794174194, "learning_rate": 2.1692180576071326e-05, "loss": 0.4584, "step": 84840 }, { "epoch": 21.002475247524753, "grad_norm": 0.3123495280742645, "learning_rate": 2.1680823720994283e-05, "loss": 0.4518, "step": 84850 }, { "epoch": 21.004950495049506, "grad_norm": 0.3261151611804962, "learning_rate": 2.166946901657182e-05, "loss": 0.4569, "step": 84860 }, { "epoch": 21.007425742574256, "grad_norm": 0.3243480920791626, "learning_rate": 2.1658116463666224e-05, "loss": 0.4564, "step": 84870 }, { "epoch": 21.00990099009901, "grad_norm": 0.30894193053245544, "learning_rate": 2.1646766063139616e-05, "loss": 0.4595, "step": 84880 }, { "epoch": 21.012376237623762, "grad_norm": 0.3316606283187866, "learning_rate": 2.163541781585402e-05, "loss": 0.453, "step": 84890 }, { "epoch": 21.014851485148515, "grad_norm": 0.30377405881881714, "learning_rate": 2.1624071722671268e-05, "loss": 0.4604, "step": 84900 }, { "epoch": 21.01732673267327, "grad_norm": 0.3192685842514038, "learning_rate": 2.1612727784453002e-05, "loss": 0.4584, "step": 84910 }, { "epoch": 21.019801980198018, "grad_norm": 0.2998015284538269, "learning_rate": 2.1601386002060704e-05, "loss": 0.456, "step": 84920 }, { "epoch": 21.02227722772277, "grad_norm": 0.32069072127342224, "learning_rate": 2.1590046376355723e-05, "loss": 0.4606, "step": 84930 }, { "epoch": 21.024752475247524, "grad_norm": 0.3040386438369751, "learning_rate": 2.1578708908199245e-05, "loss": 0.4589, "step": 84940 }, { "epoch": 21.027227722772277, "grad_norm": 0.32685497403144836, "learning_rate": 2.1567373598452246e-05, "loss": 0.4557, "step": 84950 }, { "epoch": 21.02970297029703, "grad_norm": 0.32984864711761475, "learning_rate": 2.1556040447975584e-05, "loss": 0.4562, "step": 84960 }, { "epoch": 21.032178217821784, "grad_norm": 0.32768040895462036, "learning_rate": 2.154470945762995e-05, "loss": 0.4603, "step": 84970 }, { "epoch": 21.034653465346533, "grad_norm": 0.2960442304611206, "learning_rate": 2.1533380628275835e-05, "loss": 0.4541, "step": 84980 }, { "epoch": 21.037128712871286, "grad_norm": 0.3164452910423279, "learning_rate": 2.152205396077361e-05, "loss": 0.4573, "step": 84990 }, { "epoch": 21.03960396039604, "grad_norm": 0.28768810629844666, "learning_rate": 2.151072945598343e-05, "loss": 0.4575, "step": 85000 }, { "epoch": 21.042079207920793, "grad_norm": 0.3059053122997284, "learning_rate": 2.1499407114765357e-05, "loss": 0.4598, "step": 85010 }, { "epoch": 21.044554455445546, "grad_norm": 0.3274344503879547, "learning_rate": 2.1488086937979207e-05, "loss": 0.4514, "step": 85020 }, { "epoch": 21.047029702970296, "grad_norm": 0.315519243478775, "learning_rate": 2.147676892648471e-05, "loss": 0.4565, "step": 85030 }, { "epoch": 21.04950495049505, "grad_norm": 0.326934278011322, "learning_rate": 2.146545308114135e-05, "loss": 0.46, "step": 85040 }, { "epoch": 21.051980198019802, "grad_norm": 0.28769582509994507, "learning_rate": 2.1454139402808526e-05, "loss": 0.4567, "step": 85050 }, { "epoch": 21.054455445544555, "grad_norm": 0.31833991408348083, "learning_rate": 2.1442827892345447e-05, "loss": 0.4588, "step": 85060 }, { "epoch": 21.056930693069308, "grad_norm": 0.32226869463920593, "learning_rate": 2.1431518550611107e-05, "loss": 0.46, "step": 85070 }, { "epoch": 21.059405940594058, "grad_norm": 0.33134138584136963, "learning_rate": 2.1420211378464396e-05, "loss": 0.4606, "step": 85080 }, { "epoch": 21.06188118811881, "grad_norm": 0.300838440656662, "learning_rate": 2.1408906376764044e-05, "loss": 0.4532, "step": 85090 }, { "epoch": 21.064356435643564, "grad_norm": 0.31649601459503174, "learning_rate": 2.139760354636856e-05, "loss": 0.4567, "step": 85100 }, { "epoch": 21.066831683168317, "grad_norm": 0.2923707067966461, "learning_rate": 2.1386302888136313e-05, "loss": 0.4563, "step": 85110 }, { "epoch": 21.06930693069307, "grad_norm": 0.29970136284828186, "learning_rate": 2.1375004402925532e-05, "loss": 0.4542, "step": 85120 }, { "epoch": 21.071782178217823, "grad_norm": 0.3195132315158844, "learning_rate": 2.136370809159427e-05, "loss": 0.4533, "step": 85130 }, { "epoch": 21.074257425742573, "grad_norm": 0.3308466076850891, "learning_rate": 2.1352413955000393e-05, "loss": 0.4572, "step": 85140 }, { "epoch": 21.076732673267326, "grad_norm": 0.3411208391189575, "learning_rate": 2.134112199400159e-05, "loss": 0.4601, "step": 85150 }, { "epoch": 21.07920792079208, "grad_norm": 0.31702592968940735, "learning_rate": 2.1329832209455474e-05, "loss": 0.4564, "step": 85160 }, { "epoch": 21.081683168316832, "grad_norm": 0.32021912932395935, "learning_rate": 2.131854460221939e-05, "loss": 0.4555, "step": 85170 }, { "epoch": 21.084158415841586, "grad_norm": 0.3162181079387665, "learning_rate": 2.1307259173150553e-05, "loss": 0.4625, "step": 85180 }, { "epoch": 21.086633663366335, "grad_norm": 0.3445245623588562, "learning_rate": 2.1295975923106022e-05, "loss": 0.456, "step": 85190 }, { "epoch": 21.08910891089109, "grad_norm": 0.31176409125328064, "learning_rate": 2.1284694852942706e-05, "loss": 0.4605, "step": 85200 }, { "epoch": 21.09158415841584, "grad_norm": 0.32491084933280945, "learning_rate": 2.127341596351731e-05, "loss": 0.4547, "step": 85210 }, { "epoch": 21.094059405940595, "grad_norm": 0.3069522976875305, "learning_rate": 2.1262139255686376e-05, "loss": 0.46, "step": 85220 }, { "epoch": 21.096534653465348, "grad_norm": 0.2842050790786743, "learning_rate": 2.1250864730306313e-05, "loss": 0.4561, "step": 85230 }, { "epoch": 21.099009900990097, "grad_norm": 0.3061066269874573, "learning_rate": 2.1239592388233364e-05, "loss": 0.4592, "step": 85240 }, { "epoch": 21.10148514851485, "grad_norm": 0.3185037076473236, "learning_rate": 2.1228322230323554e-05, "loss": 0.453, "step": 85250 }, { "epoch": 21.103960396039604, "grad_norm": 0.3092673420906067, "learning_rate": 2.121705425743279e-05, "loss": 0.4568, "step": 85260 }, { "epoch": 21.106435643564357, "grad_norm": 0.31026193499565125, "learning_rate": 2.1205788470416827e-05, "loss": 0.4622, "step": 85270 }, { "epoch": 21.10891089108911, "grad_norm": 0.31248313188552856, "learning_rate": 2.1194524870131198e-05, "loss": 0.4556, "step": 85280 }, { "epoch": 21.111386138613863, "grad_norm": 0.31800377368927, "learning_rate": 2.1183263457431296e-05, "loss": 0.4608, "step": 85290 }, { "epoch": 21.113861386138613, "grad_norm": 0.30093711614608765, "learning_rate": 2.1172004233172355e-05, "loss": 0.4552, "step": 85300 }, { "epoch": 21.116336633663366, "grad_norm": 0.30904385447502136, "learning_rate": 2.1160747198209464e-05, "loss": 0.4537, "step": 85310 }, { "epoch": 21.11881188118812, "grad_norm": 0.2901950478553772, "learning_rate": 2.1149492353397483e-05, "loss": 0.4541, "step": 85320 }, { "epoch": 21.121287128712872, "grad_norm": 0.3224174976348877, "learning_rate": 2.113823969959118e-05, "loss": 0.4561, "step": 85330 }, { "epoch": 21.123762376237625, "grad_norm": 0.3028750717639923, "learning_rate": 2.1126989237645078e-05, "loss": 0.4542, "step": 85340 }, { "epoch": 21.126237623762375, "grad_norm": 0.2961932420730591, "learning_rate": 2.1115740968413607e-05, "loss": 0.4573, "step": 85350 }, { "epoch": 21.128712871287128, "grad_norm": 0.29628926515579224, "learning_rate": 2.1104494892750996e-05, "loss": 0.459, "step": 85360 }, { "epoch": 21.13118811881188, "grad_norm": 0.3059825897216797, "learning_rate": 2.1093251011511296e-05, "loss": 0.4508, "step": 85370 }, { "epoch": 21.133663366336634, "grad_norm": 0.30206015706062317, "learning_rate": 2.1082009325548423e-05, "loss": 0.4598, "step": 85380 }, { "epoch": 21.136138613861387, "grad_norm": 0.32309967279434204, "learning_rate": 2.1070769835716093e-05, "loss": 0.4546, "step": 85390 }, { "epoch": 21.138613861386137, "grad_norm": 0.317705363035202, "learning_rate": 2.105953254286789e-05, "loss": 0.4582, "step": 85400 }, { "epoch": 21.14108910891089, "grad_norm": 0.3260877728462219, "learning_rate": 2.104829744785718e-05, "loss": 0.4588, "step": 85410 }, { "epoch": 21.143564356435643, "grad_norm": 0.33003270626068115, "learning_rate": 2.103706455153721e-05, "loss": 0.4623, "step": 85420 }, { "epoch": 21.146039603960396, "grad_norm": 0.31332433223724365, "learning_rate": 2.1025833854761074e-05, "loss": 0.461, "step": 85430 }, { "epoch": 21.14851485148515, "grad_norm": 0.32210296392440796, "learning_rate": 2.1014605358381646e-05, "loss": 0.4545, "step": 85440 }, { "epoch": 21.150990099009903, "grad_norm": 0.3062106966972351, "learning_rate": 2.100337906325161e-05, "loss": 0.4604, "step": 85450 }, { "epoch": 21.153465346534652, "grad_norm": 0.30522486567497253, "learning_rate": 2.0992154970223616e-05, "loss": 0.4572, "step": 85460 }, { "epoch": 21.155940594059405, "grad_norm": 0.29957786202430725, "learning_rate": 2.0980933080150013e-05, "loss": 0.4592, "step": 85470 }, { "epoch": 21.15841584158416, "grad_norm": 0.315795361995697, "learning_rate": 2.0969713393883018e-05, "loss": 0.4556, "step": 85480 }, { "epoch": 21.16089108910891, "grad_norm": 0.3049813508987427, "learning_rate": 2.0958495912274707e-05, "loss": 0.4604, "step": 85490 }, { "epoch": 21.163366336633665, "grad_norm": 0.31490856409072876, "learning_rate": 2.0947280636176987e-05, "loss": 0.4562, "step": 85500 }, { "epoch": 21.165841584158414, "grad_norm": 0.31892886757850647, "learning_rate": 2.0936067566441575e-05, "loss": 0.459, "step": 85510 }, { "epoch": 21.168316831683168, "grad_norm": 0.3067571222782135, "learning_rate": 2.092485670392001e-05, "loss": 0.4561, "step": 85520 }, { "epoch": 21.17079207920792, "grad_norm": 0.3165014088153839, "learning_rate": 2.0913648049463696e-05, "loss": 0.4559, "step": 85530 }, { "epoch": 21.173267326732674, "grad_norm": 0.3753540813922882, "learning_rate": 2.0902441603923884e-05, "loss": 0.4624, "step": 85540 }, { "epoch": 21.175742574257427, "grad_norm": 0.3261050283908844, "learning_rate": 2.089123736815158e-05, "loss": 0.4558, "step": 85550 }, { "epoch": 21.178217821782177, "grad_norm": 0.3335440158843994, "learning_rate": 2.08800353429977e-05, "loss": 0.4621, "step": 85560 }, { "epoch": 21.18069306930693, "grad_norm": 0.32535839080810547, "learning_rate": 2.0868835529312974e-05, "loss": 0.4592, "step": 85570 }, { "epoch": 21.183168316831683, "grad_norm": 0.3240704834461212, "learning_rate": 2.0857637927947948e-05, "loss": 0.4539, "step": 85580 }, { "epoch": 21.185643564356436, "grad_norm": 0.3088417947292328, "learning_rate": 2.0846442539752974e-05, "loss": 0.4538, "step": 85590 }, { "epoch": 21.18811881188119, "grad_norm": 0.3343394994735718, "learning_rate": 2.0835249365578296e-05, "loss": 0.4531, "step": 85600 }, { "epoch": 21.190594059405942, "grad_norm": 0.3275616765022278, "learning_rate": 2.0824058406273977e-05, "loss": 0.4581, "step": 85610 }, { "epoch": 21.193069306930692, "grad_norm": 0.3031502962112427, "learning_rate": 2.0812869662689854e-05, "loss": 0.4554, "step": 85620 }, { "epoch": 21.195544554455445, "grad_norm": 0.3226848542690277, "learning_rate": 2.0801683135675686e-05, "loss": 0.4603, "step": 85630 }, { "epoch": 21.198019801980198, "grad_norm": 0.3059500753879547, "learning_rate": 2.079049882608097e-05, "loss": 0.4573, "step": 85640 }, { "epoch": 21.20049504950495, "grad_norm": 0.31680718064308167, "learning_rate": 2.0779316734755105e-05, "loss": 0.452, "step": 85650 }, { "epoch": 21.202970297029704, "grad_norm": 0.31626537442207336, "learning_rate": 2.076813686254731e-05, "loss": 0.4554, "step": 85660 }, { "epoch": 21.205445544554454, "grad_norm": 0.3329479992389679, "learning_rate": 2.0756959210306588e-05, "loss": 0.4582, "step": 85670 }, { "epoch": 21.207920792079207, "grad_norm": 0.34747713804244995, "learning_rate": 2.0745783778881846e-05, "loss": 0.4567, "step": 85680 }, { "epoch": 21.21039603960396, "grad_norm": 0.3041094243526459, "learning_rate": 2.073461056912175e-05, "loss": 0.4573, "step": 85690 }, { "epoch": 21.212871287128714, "grad_norm": 0.295543372631073, "learning_rate": 2.072343958187486e-05, "loss": 0.4557, "step": 85700 }, { "epoch": 21.215346534653467, "grad_norm": 0.3096039295196533, "learning_rate": 2.071227081798951e-05, "loss": 0.4573, "step": 85710 }, { "epoch": 21.217821782178216, "grad_norm": 0.31771332025527954, "learning_rate": 2.070110427831391e-05, "loss": 0.4546, "step": 85720 }, { "epoch": 21.22029702970297, "grad_norm": 0.29919713735580444, "learning_rate": 2.06899399636961e-05, "loss": 0.4564, "step": 85730 }, { "epoch": 21.222772277227723, "grad_norm": 0.3140414357185364, "learning_rate": 2.067877787498392e-05, "loss": 0.4525, "step": 85740 }, { "epoch": 21.225247524752476, "grad_norm": 0.31018537282943726, "learning_rate": 2.0667618013025037e-05, "loss": 0.4554, "step": 85750 }, { "epoch": 21.22772277227723, "grad_norm": 0.35618460178375244, "learning_rate": 2.0656460378666992e-05, "loss": 0.4546, "step": 85760 }, { "epoch": 21.230198019801982, "grad_norm": 0.3619539439678192, "learning_rate": 2.064530497275715e-05, "loss": 0.4617, "step": 85770 }, { "epoch": 21.23267326732673, "grad_norm": 0.36292433738708496, "learning_rate": 2.0634151796142648e-05, "loss": 0.4569, "step": 85780 }, { "epoch": 21.235148514851485, "grad_norm": 0.31179362535476685, "learning_rate": 2.0623000849670525e-05, "loss": 0.4567, "step": 85790 }, { "epoch": 21.237623762376238, "grad_norm": 0.32046669721603394, "learning_rate": 2.061185213418763e-05, "loss": 0.4552, "step": 85800 }, { "epoch": 21.24009900990099, "grad_norm": 0.3262799382209778, "learning_rate": 2.0600705650540624e-05, "loss": 0.4579, "step": 85810 }, { "epoch": 21.242574257425744, "grad_norm": 0.3017880320549011, "learning_rate": 2.0589561399575985e-05, "loss": 0.4583, "step": 85820 }, { "epoch": 21.245049504950494, "grad_norm": 0.3316352367401123, "learning_rate": 2.0578419382140067e-05, "loss": 0.4621, "step": 85830 }, { "epoch": 21.247524752475247, "grad_norm": 0.2881028652191162, "learning_rate": 2.056727959907905e-05, "loss": 0.4555, "step": 85840 }, { "epoch": 21.25, "grad_norm": 0.3072699010372162, "learning_rate": 2.055614205123891e-05, "loss": 0.4597, "step": 85850 }, { "epoch": 21.252475247524753, "grad_norm": 0.32754650712013245, "learning_rate": 2.0545006739465428e-05, "loss": 0.4617, "step": 85860 }, { "epoch": 21.254950495049506, "grad_norm": 0.3594135046005249, "learning_rate": 2.0533873664604337e-05, "loss": 0.4602, "step": 85870 }, { "epoch": 21.257425742574256, "grad_norm": 0.299475759267807, "learning_rate": 2.052274282750108e-05, "loss": 0.4612, "step": 85880 }, { "epoch": 21.25990099009901, "grad_norm": 0.3274804651737213, "learning_rate": 2.051161422900095e-05, "loss": 0.4511, "step": 85890 }, { "epoch": 21.262376237623762, "grad_norm": 0.35597753524780273, "learning_rate": 2.0500487869949125e-05, "loss": 0.4557, "step": 85900 }, { "epoch": 21.264851485148515, "grad_norm": 0.30966654419898987, "learning_rate": 2.0489363751190576e-05, "loss": 0.4562, "step": 85910 }, { "epoch": 21.26732673267327, "grad_norm": 0.3343530595302582, "learning_rate": 2.047824187357008e-05, "loss": 0.4629, "step": 85920 }, { "epoch": 21.269801980198018, "grad_norm": 0.2953992784023285, "learning_rate": 2.0467122237932306e-05, "loss": 0.4538, "step": 85930 }, { "epoch": 21.27227722772277, "grad_norm": 0.32363617420196533, "learning_rate": 2.0456004845121678e-05, "loss": 0.4517, "step": 85940 }, { "epoch": 21.274752475247524, "grad_norm": 0.3061356246471405, "learning_rate": 2.044488969598253e-05, "loss": 0.4521, "step": 85950 }, { "epoch": 21.277227722772277, "grad_norm": 0.3427048921585083, "learning_rate": 2.0433776791358944e-05, "loss": 0.4573, "step": 85960 }, { "epoch": 21.27970297029703, "grad_norm": 0.3375462591648102, "learning_rate": 2.0422666132094887e-05, "loss": 0.4532, "step": 85970 }, { "epoch": 21.282178217821784, "grad_norm": 0.3038133382797241, "learning_rate": 2.041155771903416e-05, "loss": 0.4619, "step": 85980 }, { "epoch": 21.284653465346533, "grad_norm": 0.3051995635032654, "learning_rate": 2.040045155302035e-05, "loss": 0.4539, "step": 85990 }, { "epoch": 21.287128712871286, "grad_norm": 0.29301029443740845, "learning_rate": 2.0389347634896917e-05, "loss": 0.4577, "step": 86000 }, { "epoch": 21.28960396039604, "grad_norm": 0.3029516339302063, "learning_rate": 2.0378245965507097e-05, "loss": 0.4592, "step": 86010 }, { "epoch": 21.292079207920793, "grad_norm": 0.3147197961807251, "learning_rate": 2.036714654569401e-05, "loss": 0.4606, "step": 86020 }, { "epoch": 21.294554455445546, "grad_norm": 0.31022879481315613, "learning_rate": 2.0356049376300608e-05, "loss": 0.4574, "step": 86030 }, { "epoch": 21.297029702970296, "grad_norm": 0.3189852237701416, "learning_rate": 2.034495445816961e-05, "loss": 0.4571, "step": 86040 }, { "epoch": 21.29950495049505, "grad_norm": 0.30605778098106384, "learning_rate": 2.0333861792143605e-05, "loss": 0.4534, "step": 86050 }, { "epoch": 21.301980198019802, "grad_norm": 0.29370349645614624, "learning_rate": 2.032277137906502e-05, "loss": 0.4527, "step": 86060 }, { "epoch": 21.304455445544555, "grad_norm": 0.2907230854034424, "learning_rate": 2.0311683219776106e-05, "loss": 0.4581, "step": 86070 }, { "epoch": 21.306930693069308, "grad_norm": 0.3138841390609741, "learning_rate": 2.0300597315118915e-05, "loss": 0.4589, "step": 86080 }, { "epoch": 21.309405940594058, "grad_norm": 0.3291234076023102, "learning_rate": 2.0289513665935357e-05, "loss": 0.4562, "step": 86090 }, { "epoch": 21.31188118811881, "grad_norm": 0.3168216347694397, "learning_rate": 2.0278432273067183e-05, "loss": 0.4532, "step": 86100 }, { "epoch": 21.314356435643564, "grad_norm": 0.30605167150497437, "learning_rate": 2.0267353137355926e-05, "loss": 0.4565, "step": 86110 }, { "epoch": 21.316831683168317, "grad_norm": 0.2977610230445862, "learning_rate": 2.025627625964297e-05, "loss": 0.457, "step": 86120 }, { "epoch": 21.31930693069307, "grad_norm": 0.3159129321575165, "learning_rate": 2.0245201640769535e-05, "loss": 0.4593, "step": 86130 }, { "epoch": 21.321782178217823, "grad_norm": 0.2967125177383423, "learning_rate": 2.0234129281576692e-05, "loss": 0.4579, "step": 86140 }, { "epoch": 21.324257425742573, "grad_norm": 0.29505637288093567, "learning_rate": 2.022305918290529e-05, "loss": 0.4588, "step": 86150 }, { "epoch": 21.326732673267326, "grad_norm": 0.28018122911453247, "learning_rate": 2.0211991345596e-05, "loss": 0.4556, "step": 86160 }, { "epoch": 21.32920792079208, "grad_norm": 0.30316129326820374, "learning_rate": 2.0200925770489416e-05, "loss": 0.4612, "step": 86170 }, { "epoch": 21.331683168316832, "grad_norm": 0.33158594369888306, "learning_rate": 2.0189862458425863e-05, "loss": 0.4572, "step": 86180 }, { "epoch": 21.334158415841586, "grad_norm": 0.33943310379981995, "learning_rate": 2.0178801410245507e-05, "loss": 0.4539, "step": 86190 }, { "epoch": 21.336633663366335, "grad_norm": 0.3079715073108673, "learning_rate": 2.0167742626788384e-05, "loss": 0.454, "step": 86200 }, { "epoch": 21.33910891089109, "grad_norm": 0.33427107334136963, "learning_rate": 2.015668610889435e-05, "loss": 0.4549, "step": 86210 }, { "epoch": 21.34158415841584, "grad_norm": 0.29869717359542847, "learning_rate": 2.0145631857403057e-05, "loss": 0.4584, "step": 86220 }, { "epoch": 21.344059405940595, "grad_norm": 0.31918761134147644, "learning_rate": 2.0134579873153984e-05, "loss": 0.4596, "step": 86230 }, { "epoch": 21.346534653465348, "grad_norm": 0.30885061621665955, "learning_rate": 2.012353015698648e-05, "loss": 0.4601, "step": 86240 }, { "epoch": 21.349009900990097, "grad_norm": 0.30568400025367737, "learning_rate": 2.0112482709739706e-05, "loss": 0.4525, "step": 86250 }, { "epoch": 21.35148514851485, "grad_norm": 0.30502548813819885, "learning_rate": 2.0101437532252615e-05, "loss": 0.453, "step": 86260 }, { "epoch": 21.353960396039604, "grad_norm": 0.3182929754257202, "learning_rate": 2.009039462536403e-05, "loss": 0.4516, "step": 86270 }, { "epoch": 21.356435643564357, "grad_norm": 0.3241312801837921, "learning_rate": 2.0079353989912604e-05, "loss": 0.4575, "step": 86280 }, { "epoch": 21.35891089108911, "grad_norm": 0.34859171509742737, "learning_rate": 2.006831562673676e-05, "loss": 0.4571, "step": 86290 }, { "epoch": 21.361386138613863, "grad_norm": 0.31172287464141846, "learning_rate": 2.005727953667484e-05, "loss": 0.4597, "step": 86300 }, { "epoch": 21.363861386138613, "grad_norm": 0.2995964288711548, "learning_rate": 2.004624572056491e-05, "loss": 0.4552, "step": 86310 }, { "epoch": 21.366336633663366, "grad_norm": 0.29646599292755127, "learning_rate": 2.0035214179244954e-05, "loss": 0.455, "step": 86320 }, { "epoch": 21.36881188118812, "grad_norm": 0.32776203751564026, "learning_rate": 2.0024184913552718e-05, "loss": 0.4599, "step": 86330 }, { "epoch": 21.371287128712872, "grad_norm": 0.3021012246608734, "learning_rate": 2.0013157924325838e-05, "loss": 0.458, "step": 86340 }, { "epoch": 21.373762376237625, "grad_norm": 0.3125920295715332, "learning_rate": 2.000213321240169e-05, "loss": 0.4605, "step": 86350 }, { "epoch": 21.376237623762375, "grad_norm": 0.3104737102985382, "learning_rate": 1.999111077861756e-05, "loss": 0.4612, "step": 86360 }, { "epoch": 21.378712871287128, "grad_norm": 0.3206599950790405, "learning_rate": 1.9980090623810544e-05, "loss": 0.4597, "step": 86370 }, { "epoch": 21.38118811881188, "grad_norm": 0.30036461353302, "learning_rate": 1.9969072748817514e-05, "loss": 0.4571, "step": 86380 }, { "epoch": 21.383663366336634, "grad_norm": 0.30654358863830566, "learning_rate": 1.9958057154475218e-05, "loss": 0.4541, "step": 86390 }, { "epoch": 21.386138613861387, "grad_norm": 0.3116571009159088, "learning_rate": 1.9947043841620246e-05, "loss": 0.4602, "step": 86400 }, { "epoch": 21.388613861386137, "grad_norm": 0.31314602494239807, "learning_rate": 1.993603281108895e-05, "loss": 0.4492, "step": 86410 }, { "epoch": 21.39108910891089, "grad_norm": 0.30184850096702576, "learning_rate": 1.992502406371755e-05, "loss": 0.4625, "step": 86420 }, { "epoch": 21.393564356435643, "grad_norm": 0.29316332936286926, "learning_rate": 1.991401760034209e-05, "loss": 0.4612, "step": 86430 }, { "epoch": 21.396039603960396, "grad_norm": 0.3449891209602356, "learning_rate": 1.990301342179846e-05, "loss": 0.4508, "step": 86440 }, { "epoch": 21.39851485148515, "grad_norm": 0.3035705089569092, "learning_rate": 1.9892011528922343e-05, "loss": 0.4581, "step": 86450 }, { "epoch": 21.400990099009903, "grad_norm": 0.2957937717437744, "learning_rate": 1.9881011922549213e-05, "loss": 0.4535, "step": 86460 }, { "epoch": 21.403465346534652, "grad_norm": 0.3063206672668457, "learning_rate": 1.98700146035145e-05, "loss": 0.4552, "step": 86470 }, { "epoch": 21.405940594059405, "grad_norm": 0.29459846019744873, "learning_rate": 1.9859019572653337e-05, "loss": 0.4575, "step": 86480 }, { "epoch": 21.40841584158416, "grad_norm": 0.3026101291179657, "learning_rate": 1.9848026830800704e-05, "loss": 0.4586, "step": 86490 }, { "epoch": 21.41089108910891, "grad_norm": 0.31028127670288086, "learning_rate": 1.9837036378791452e-05, "loss": 0.457, "step": 86500 }, { "epoch": 21.413366336633665, "grad_norm": 0.32216528058052063, "learning_rate": 1.982604821746024e-05, "loss": 0.4648, "step": 86510 }, { "epoch": 21.415841584158414, "grad_norm": 0.2859138548374176, "learning_rate": 1.9815062347641538e-05, "loss": 0.4584, "step": 86520 }, { "epoch": 21.418316831683168, "grad_norm": 0.3332684338092804, "learning_rate": 1.9804078770169632e-05, "loss": 0.4606, "step": 86530 }, { "epoch": 21.42079207920792, "grad_norm": 0.31339532136917114, "learning_rate": 1.9793097485878665e-05, "loss": 0.4581, "step": 86540 }, { "epoch": 21.423267326732674, "grad_norm": 0.3096730411052704, "learning_rate": 1.9782118495602614e-05, "loss": 0.4512, "step": 86550 }, { "epoch": 21.425742574257427, "grad_norm": 0.31824129819869995, "learning_rate": 1.9771141800175223e-05, "loss": 0.4546, "step": 86560 }, { "epoch": 21.428217821782177, "grad_norm": 0.29011765122413635, "learning_rate": 1.9760167400430124e-05, "loss": 0.4568, "step": 86570 }, { "epoch": 21.43069306930693, "grad_norm": 0.31064388155937195, "learning_rate": 1.9749195297200758e-05, "loss": 0.4553, "step": 86580 }, { "epoch": 21.433168316831683, "grad_norm": 0.31281211972236633, "learning_rate": 1.9738225491320372e-05, "loss": 0.4601, "step": 86590 }, { "epoch": 21.435643564356436, "grad_norm": 0.3003929853439331, "learning_rate": 1.9727257983622037e-05, "loss": 0.4541, "step": 86600 }, { "epoch": 21.43811881188119, "grad_norm": 0.3081766366958618, "learning_rate": 1.971629277493867e-05, "loss": 0.457, "step": 86610 }, { "epoch": 21.440594059405942, "grad_norm": 0.30130907893180847, "learning_rate": 1.9705329866103035e-05, "loss": 0.4594, "step": 86620 }, { "epoch": 21.443069306930692, "grad_norm": 0.30981290340423584, "learning_rate": 1.9694369257947642e-05, "loss": 0.4575, "step": 86630 }, { "epoch": 21.445544554455445, "grad_norm": 0.32764095067977905, "learning_rate": 1.9683410951304932e-05, "loss": 0.4557, "step": 86640 }, { "epoch": 21.448019801980198, "grad_norm": 0.353294312953949, "learning_rate": 1.9672454947007063e-05, "loss": 0.4551, "step": 86650 }, { "epoch": 21.45049504950495, "grad_norm": 0.33022305369377136, "learning_rate": 1.96615012458861e-05, "loss": 0.4581, "step": 86660 }, { "epoch": 21.452970297029704, "grad_norm": 0.30045607686042786, "learning_rate": 1.965054984877392e-05, "loss": 0.4598, "step": 86670 }, { "epoch": 21.455445544554454, "grad_norm": 0.290896475315094, "learning_rate": 1.963960075650217e-05, "loss": 0.4569, "step": 86680 }, { "epoch": 21.457920792079207, "grad_norm": 0.3393512964248657, "learning_rate": 1.96286539699024e-05, "loss": 0.4556, "step": 86690 }, { "epoch": 21.46039603960396, "grad_norm": 0.3103494942188263, "learning_rate": 1.9617709489805907e-05, "loss": 0.4569, "step": 86700 }, { "epoch": 21.462871287128714, "grad_norm": 0.2954244613647461, "learning_rate": 1.960676731704389e-05, "loss": 0.4595, "step": 86710 }, { "epoch": 21.465346534653467, "grad_norm": 0.3286442756652832, "learning_rate": 1.95958274524473e-05, "loss": 0.4586, "step": 86720 }, { "epoch": 21.467821782178216, "grad_norm": 0.29272589087486267, "learning_rate": 1.958488989684697e-05, "loss": 0.4554, "step": 86730 }, { "epoch": 21.47029702970297, "grad_norm": 0.31941863894462585, "learning_rate": 1.9573954651073545e-05, "loss": 0.4547, "step": 86740 }, { "epoch": 21.472772277227723, "grad_norm": 0.3078796863555908, "learning_rate": 1.9563021715957468e-05, "loss": 0.4597, "step": 86750 }, { "epoch": 21.475247524752476, "grad_norm": 0.30179736018180847, "learning_rate": 1.9552091092328994e-05, "loss": 0.4589, "step": 86760 }, { "epoch": 21.47772277227723, "grad_norm": 0.3405032157897949, "learning_rate": 1.9541162781018297e-05, "loss": 0.4522, "step": 86770 }, { "epoch": 21.480198019801982, "grad_norm": 0.31217795610427856, "learning_rate": 1.953023678285527e-05, "loss": 0.4593, "step": 86780 }, { "epoch": 21.48267326732673, "grad_norm": 0.297579824924469, "learning_rate": 1.9519313098669665e-05, "loss": 0.4556, "step": 86790 }, { "epoch": 21.485148514851485, "grad_norm": 0.30325278639793396, "learning_rate": 1.9508391729291075e-05, "loss": 0.4594, "step": 86800 }, { "epoch": 21.487623762376238, "grad_norm": 0.29992353916168213, "learning_rate": 1.9497472675548922e-05, "loss": 0.4533, "step": 86810 }, { "epoch": 21.49009900990099, "grad_norm": 0.30682650208473206, "learning_rate": 1.9486555938272417e-05, "loss": 0.4604, "step": 86820 }, { "epoch": 21.492574257425744, "grad_norm": 0.3105921447277069, "learning_rate": 1.9475641518290606e-05, "loss": 0.4588, "step": 86830 }, { "epoch": 21.495049504950494, "grad_norm": 0.30905210971832275, "learning_rate": 1.9464729416432374e-05, "loss": 0.4547, "step": 86840 }, { "epoch": 21.497524752475247, "grad_norm": 0.30217665433883667, "learning_rate": 1.9453819633526443e-05, "loss": 0.4574, "step": 86850 }, { "epoch": 21.5, "grad_norm": 0.2958139479160309, "learning_rate": 1.944291217040131e-05, "loss": 0.464, "step": 86860 }, { "epoch": 21.502475247524753, "grad_norm": 0.31171026825904846, "learning_rate": 1.943200702788534e-05, "loss": 0.4568, "step": 86870 }, { "epoch": 21.504950495049506, "grad_norm": 0.28105399012565613, "learning_rate": 1.9421104206806716e-05, "loss": 0.4581, "step": 86880 }, { "epoch": 21.507425742574256, "grad_norm": 0.31026411056518555, "learning_rate": 1.9410203707993435e-05, "loss": 0.4523, "step": 86890 }, { "epoch": 21.50990099009901, "grad_norm": 0.305429071187973, "learning_rate": 1.939930553227328e-05, "loss": 0.4567, "step": 86900 }, { "epoch": 21.512376237623762, "grad_norm": 0.3059220016002655, "learning_rate": 1.938840968047393e-05, "loss": 0.4575, "step": 86910 }, { "epoch": 21.514851485148515, "grad_norm": 0.3173791766166687, "learning_rate": 1.937751615342286e-05, "loss": 0.4512, "step": 86920 }, { "epoch": 21.51732673267327, "grad_norm": 0.32188889384269714, "learning_rate": 1.9366624951947327e-05, "loss": 0.4586, "step": 86930 }, { "epoch": 21.519801980198018, "grad_norm": 0.3492930233478546, "learning_rate": 1.9355736076874488e-05, "loss": 0.4572, "step": 86940 }, { "epoch": 21.52227722772277, "grad_norm": 0.2971484959125519, "learning_rate": 1.9344849529031238e-05, "loss": 0.4533, "step": 86950 }, { "epoch": 21.524752475247524, "grad_norm": 0.28686052560806274, "learning_rate": 1.933396530924438e-05, "loss": 0.4566, "step": 86960 }, { "epoch": 21.527227722772277, "grad_norm": 0.32755741477012634, "learning_rate": 1.932308341834046e-05, "loss": 0.4651, "step": 86970 }, { "epoch": 21.52970297029703, "grad_norm": 0.32639217376708984, "learning_rate": 1.9312203857145907e-05, "loss": 0.4566, "step": 86980 }, { "epoch": 21.532178217821784, "grad_norm": 0.3032589852809906, "learning_rate": 1.930132662648696e-05, "loss": 0.4548, "step": 86990 }, { "epoch": 21.534653465346533, "grad_norm": 0.2974906265735626, "learning_rate": 1.929045172718965e-05, "loss": 0.4579, "step": 87000 }, { "epoch": 21.537128712871286, "grad_norm": 0.27269411087036133, "learning_rate": 1.9279579160079886e-05, "loss": 0.4511, "step": 87010 }, { "epoch": 21.53960396039604, "grad_norm": 0.2964635193347931, "learning_rate": 1.9268708925983326e-05, "loss": 0.4564, "step": 87020 }, { "epoch": 21.542079207920793, "grad_norm": 0.2935657203197479, "learning_rate": 1.9257841025725515e-05, "loss": 0.4593, "step": 87030 }, { "epoch": 21.544554455445546, "grad_norm": 0.32459118962287903, "learning_rate": 1.924697546013181e-05, "loss": 0.4628, "step": 87040 }, { "epoch": 21.547029702970296, "grad_norm": 0.34072187542915344, "learning_rate": 1.9236112230027375e-05, "loss": 0.4571, "step": 87050 }, { "epoch": 21.54950495049505, "grad_norm": 0.29458123445510864, "learning_rate": 1.9225251336237172e-05, "loss": 0.4558, "step": 87060 }, { "epoch": 21.551980198019802, "grad_norm": 0.2994418144226074, "learning_rate": 1.9214392779586033e-05, "loss": 0.4537, "step": 87070 }, { "epoch": 21.554455445544555, "grad_norm": 0.2996836304664612, "learning_rate": 1.920353656089861e-05, "loss": 0.4608, "step": 87080 }, { "epoch": 21.556930693069308, "grad_norm": 0.3186798095703125, "learning_rate": 1.9192682680999335e-05, "loss": 0.4564, "step": 87090 }, { "epoch": 21.55940594059406, "grad_norm": 0.30866917967796326, "learning_rate": 1.9181831140712496e-05, "loss": 0.4546, "step": 87100 }, { "epoch": 21.56188118811881, "grad_norm": 0.31298649311065674, "learning_rate": 1.9170981940862214e-05, "loss": 0.4569, "step": 87110 }, { "epoch": 21.564356435643564, "grad_norm": 0.2978231608867645, "learning_rate": 1.9160135082272407e-05, "loss": 0.4527, "step": 87120 }, { "epoch": 21.566831683168317, "grad_norm": 0.34461650252342224, "learning_rate": 1.9149290565766793e-05, "loss": 0.46, "step": 87130 }, { "epoch": 21.56930693069307, "grad_norm": 0.37862610816955566, "learning_rate": 1.9138448392168968e-05, "loss": 0.4592, "step": 87140 }, { "epoch": 21.571782178217823, "grad_norm": 0.3366420269012451, "learning_rate": 1.912760856230233e-05, "loss": 0.4593, "step": 87150 }, { "epoch": 21.574257425742573, "grad_norm": 0.31470757722854614, "learning_rate": 1.9116771076990088e-05, "loss": 0.4582, "step": 87160 }, { "epoch": 21.576732673267326, "grad_norm": 0.319855660200119, "learning_rate": 1.910593593705523e-05, "loss": 0.4584, "step": 87170 }, { "epoch": 21.57920792079208, "grad_norm": 0.3204684257507324, "learning_rate": 1.9095103143320693e-05, "loss": 0.4589, "step": 87180 }, { "epoch": 21.581683168316832, "grad_norm": 0.28798821568489075, "learning_rate": 1.9084272696609118e-05, "loss": 0.4521, "step": 87190 }, { "epoch": 21.584158415841586, "grad_norm": 0.3110942840576172, "learning_rate": 1.9073444597742985e-05, "loss": 0.459, "step": 87200 }, { "epoch": 21.586633663366335, "grad_norm": 0.339761346578598, "learning_rate": 1.906261884754464e-05, "loss": 0.4579, "step": 87210 }, { "epoch": 21.58910891089109, "grad_norm": 0.3101853132247925, "learning_rate": 1.9051795446836242e-05, "loss": 0.4569, "step": 87220 }, { "epoch": 21.59158415841584, "grad_norm": 0.29176947474479675, "learning_rate": 1.904097439643972e-05, "loss": 0.4573, "step": 87230 }, { "epoch": 21.594059405940595, "grad_norm": 0.3215395510196686, "learning_rate": 1.9030155697176898e-05, "loss": 0.46, "step": 87240 }, { "epoch": 21.596534653465348, "grad_norm": 0.32088813185691833, "learning_rate": 1.9019339349869354e-05, "loss": 0.4524, "step": 87250 }, { "epoch": 21.599009900990097, "grad_norm": 0.3544797897338867, "learning_rate": 1.9008525355338545e-05, "loss": 0.455, "step": 87260 }, { "epoch": 21.60148514851485, "grad_norm": 0.29322806000709534, "learning_rate": 1.8997713714405696e-05, "loss": 0.457, "step": 87270 }, { "epoch": 21.603960396039604, "grad_norm": 0.3015206754207611, "learning_rate": 1.8986904427891894e-05, "loss": 0.4541, "step": 87280 }, { "epoch": 21.606435643564357, "grad_norm": 0.3039247691631317, "learning_rate": 1.8976097496618044e-05, "loss": 0.4608, "step": 87290 }, { "epoch": 21.60891089108911, "grad_norm": 0.30722567439079285, "learning_rate": 1.8965292921404836e-05, "loss": 0.4605, "step": 87300 }, { "epoch": 21.611386138613863, "grad_norm": 0.305207222700119, "learning_rate": 1.8954490703072836e-05, "loss": 0.453, "step": 87310 }, { "epoch": 21.613861386138613, "grad_norm": 0.3181057274341583, "learning_rate": 1.8943690842442363e-05, "loss": 0.4574, "step": 87320 }, { "epoch": 21.616336633663366, "grad_norm": 0.3029590845108032, "learning_rate": 1.893289334033364e-05, "loss": 0.4652, "step": 87330 }, { "epoch": 21.61881188118812, "grad_norm": 0.31048864126205444, "learning_rate": 1.8922098197566624e-05, "loss": 0.455, "step": 87340 }, { "epoch": 21.621287128712872, "grad_norm": 0.30695319175720215, "learning_rate": 1.8911305414961166e-05, "loss": 0.4551, "step": 87350 }, { "epoch": 21.623762376237625, "grad_norm": 0.312252938747406, "learning_rate": 1.890051499333688e-05, "loss": 0.4592, "step": 87360 }, { "epoch": 21.626237623762375, "grad_norm": 0.3213062584400177, "learning_rate": 1.8889726933513247e-05, "loss": 0.4579, "step": 87370 }, { "epoch": 21.628712871287128, "grad_norm": 0.3023599088191986, "learning_rate": 1.8878941236309557e-05, "loss": 0.4572, "step": 87380 }, { "epoch": 21.63118811881188, "grad_norm": 0.305774450302124, "learning_rate": 1.886815790254488e-05, "loss": 0.4625, "step": 87390 }, { "epoch": 21.633663366336634, "grad_norm": 0.30348047614097595, "learning_rate": 1.8857376933038156e-05, "loss": 0.4571, "step": 87400 }, { "epoch": 21.636138613861387, "grad_norm": 0.28058966994285583, "learning_rate": 1.884659832860815e-05, "loss": 0.4547, "step": 87410 }, { "epoch": 21.638613861386137, "grad_norm": 0.2920067608356476, "learning_rate": 1.8835822090073406e-05, "loss": 0.4549, "step": 87420 }, { "epoch": 21.64108910891089, "grad_norm": 0.2924214005470276, "learning_rate": 1.8825048218252294e-05, "loss": 0.4564, "step": 87430 }, { "epoch": 21.643564356435643, "grad_norm": 0.2946043312549591, "learning_rate": 1.881427671396303e-05, "loss": 0.4562, "step": 87440 }, { "epoch": 21.646039603960396, "grad_norm": 0.3060379922389984, "learning_rate": 1.880350757802366e-05, "loss": 0.456, "step": 87450 }, { "epoch": 21.64851485148515, "grad_norm": 0.3358705937862396, "learning_rate": 1.8792740811252012e-05, "loss": 0.4542, "step": 87460 }, { "epoch": 21.650990099009903, "grad_norm": 0.2982158958911896, "learning_rate": 1.878197641446571e-05, "loss": 0.4563, "step": 87470 }, { "epoch": 21.653465346534652, "grad_norm": 0.2963380515575409, "learning_rate": 1.8771214388482323e-05, "loss": 0.455, "step": 87480 }, { "epoch": 21.655940594059405, "grad_norm": 0.29402098059654236, "learning_rate": 1.8760454734119104e-05, "loss": 0.4588, "step": 87490 }, { "epoch": 21.65841584158416, "grad_norm": 0.299283504486084, "learning_rate": 1.8749697452193167e-05, "loss": 0.4532, "step": 87500 }, { "epoch": 21.66089108910891, "grad_norm": 0.283674955368042, "learning_rate": 1.8738942543521475e-05, "loss": 0.4573, "step": 87510 }, { "epoch": 21.663366336633665, "grad_norm": 0.30250421166419983, "learning_rate": 1.872819000892081e-05, "loss": 0.4572, "step": 87520 }, { "epoch": 21.665841584158414, "grad_norm": 0.2904397249221802, "learning_rate": 1.871743984920774e-05, "loss": 0.4551, "step": 87530 }, { "epoch": 21.668316831683168, "grad_norm": 0.31445595622062683, "learning_rate": 1.8706692065198643e-05, "loss": 0.4588, "step": 87540 }, { "epoch": 21.67079207920792, "grad_norm": 0.31163063645362854, "learning_rate": 1.869594665770977e-05, "loss": 0.461, "step": 87550 }, { "epoch": 21.673267326732674, "grad_norm": 0.3009238839149475, "learning_rate": 1.868520362755718e-05, "loss": 0.4544, "step": 87560 }, { "epoch": 21.675742574257427, "grad_norm": 0.2765917479991913, "learning_rate": 1.867446297555669e-05, "loss": 0.4585, "step": 87570 }, { "epoch": 21.678217821782177, "grad_norm": 0.3205582797527313, "learning_rate": 1.866372470252401e-05, "loss": 0.4551, "step": 87580 }, { "epoch": 21.68069306930693, "grad_norm": 0.3037179410457611, "learning_rate": 1.8652988809274658e-05, "loss": 0.4572, "step": 87590 }, { "epoch": 21.683168316831683, "grad_norm": 0.30897125601768494, "learning_rate": 1.864225529662391e-05, "loss": 0.4545, "step": 87600 }, { "epoch": 21.685643564356436, "grad_norm": 0.302101731300354, "learning_rate": 1.8631524165386956e-05, "loss": 0.4591, "step": 87610 }, { "epoch": 21.68811881188119, "grad_norm": 0.33644700050354004, "learning_rate": 1.86207954163787e-05, "loss": 0.4507, "step": 87620 }, { "epoch": 21.69059405940594, "grad_norm": 0.31693461537361145, "learning_rate": 1.861006905041397e-05, "loss": 0.4578, "step": 87630 }, { "epoch": 21.693069306930692, "grad_norm": 0.3126542270183563, "learning_rate": 1.8599345068307322e-05, "loss": 0.4545, "step": 87640 }, { "epoch": 21.695544554455445, "grad_norm": 0.29426443576812744, "learning_rate": 1.8588623470873208e-05, "loss": 0.4625, "step": 87650 }, { "epoch": 21.698019801980198, "grad_norm": 0.30100369453430176, "learning_rate": 1.8577904258925827e-05, "loss": 0.4531, "step": 87660 }, { "epoch": 21.70049504950495, "grad_norm": 0.3062558174133301, "learning_rate": 1.856718743327925e-05, "loss": 0.4617, "step": 87670 }, { "epoch": 21.702970297029704, "grad_norm": 0.31467533111572266, "learning_rate": 1.855647299474736e-05, "loss": 0.4568, "step": 87680 }, { "epoch": 21.705445544554454, "grad_norm": 0.3169610798358917, "learning_rate": 1.8545760944143826e-05, "loss": 0.4595, "step": 87690 }, { "epoch": 21.707920792079207, "grad_norm": 0.30006858706474304, "learning_rate": 1.8535051282282167e-05, "loss": 0.4615, "step": 87700 }, { "epoch": 21.71039603960396, "grad_norm": 0.3023248612880707, "learning_rate": 1.852434400997573e-05, "loss": 0.4576, "step": 87710 }, { "epoch": 21.712871287128714, "grad_norm": 0.2906584143638611, "learning_rate": 1.851363912803764e-05, "loss": 0.4547, "step": 87720 }, { "epoch": 21.715346534653467, "grad_norm": 0.3267797529697418, "learning_rate": 1.8502936637280854e-05, "loss": 0.4543, "step": 87730 }, { "epoch": 21.717821782178216, "grad_norm": 0.2827913165092468, "learning_rate": 1.849223653851817e-05, "loss": 0.4604, "step": 87740 }, { "epoch": 21.72029702970297, "grad_norm": 0.29712051153182983, "learning_rate": 1.8481538832562202e-05, "loss": 0.4533, "step": 87750 }, { "epoch": 21.722772277227723, "grad_norm": 0.2882883846759796, "learning_rate": 1.8470843520225356e-05, "loss": 0.4558, "step": 87760 }, { "epoch": 21.725247524752476, "grad_norm": 0.28740954399108887, "learning_rate": 1.846015060231983e-05, "loss": 0.4568, "step": 87770 }, { "epoch": 21.72772277227723, "grad_norm": 0.2964887022972107, "learning_rate": 1.8449460079657764e-05, "loss": 0.4632, "step": 87780 }, { "epoch": 21.730198019801982, "grad_norm": 0.2997540235519409, "learning_rate": 1.8438771953050987e-05, "loss": 0.4613, "step": 87790 }, { "epoch": 21.73267326732673, "grad_norm": 0.2954403758049011, "learning_rate": 1.8428086223311176e-05, "loss": 0.4601, "step": 87800 }, { "epoch": 21.735148514851485, "grad_norm": 0.28617364168167114, "learning_rate": 1.841740289124986e-05, "loss": 0.4566, "step": 87810 }, { "epoch": 21.737623762376238, "grad_norm": 0.2894216775894165, "learning_rate": 1.8406721957678386e-05, "loss": 0.4521, "step": 87820 }, { "epoch": 21.74009900990099, "grad_norm": 0.3180599808692932, "learning_rate": 1.8396043423407876e-05, "loss": 0.4589, "step": 87830 }, { "epoch": 21.742574257425744, "grad_norm": 0.29023975133895874, "learning_rate": 1.8385367289249288e-05, "loss": 0.4574, "step": 87840 }, { "epoch": 21.745049504950494, "grad_norm": 0.2989390194416046, "learning_rate": 1.8374693556013416e-05, "loss": 0.461, "step": 87850 }, { "epoch": 21.747524752475247, "grad_norm": 0.28895851969718933, "learning_rate": 1.836402222451088e-05, "loss": 0.454, "step": 87860 }, { "epoch": 21.75, "grad_norm": 0.2980140745639801, "learning_rate": 1.8353353295552056e-05, "loss": 0.4581, "step": 87870 }, { "epoch": 21.752475247524753, "grad_norm": 0.3242093324661255, "learning_rate": 1.8342686769947197e-05, "loss": 0.458, "step": 87880 }, { "epoch": 21.754950495049506, "grad_norm": 0.28792762756347656, "learning_rate": 1.8332022648506375e-05, "loss": 0.4552, "step": 87890 }, { "epoch": 21.757425742574256, "grad_norm": 0.29751163721084595, "learning_rate": 1.8321360932039444e-05, "loss": 0.4596, "step": 87900 }, { "epoch": 21.75990099009901, "grad_norm": 0.3017401695251465, "learning_rate": 1.8310701621356075e-05, "loss": 0.4576, "step": 87910 }, { "epoch": 21.762376237623762, "grad_norm": 0.29947930574417114, "learning_rate": 1.8300044717265787e-05, "loss": 0.4566, "step": 87920 }, { "epoch": 21.764851485148515, "grad_norm": 0.30487093329429626, "learning_rate": 1.828939022057791e-05, "loss": 0.4591, "step": 87930 }, { "epoch": 21.76732673267327, "grad_norm": 0.31044235825538635, "learning_rate": 1.8278738132101563e-05, "loss": 0.4566, "step": 87940 }, { "epoch": 21.769801980198018, "grad_norm": 0.33154401183128357, "learning_rate": 1.8268088452645733e-05, "loss": 0.4542, "step": 87950 }, { "epoch": 21.77227722772277, "grad_norm": 0.29837724566459656, "learning_rate": 1.8257441183019152e-05, "loss": 0.456, "step": 87960 }, { "epoch": 21.774752475247524, "grad_norm": 0.29304078221321106, "learning_rate": 1.824679632403043e-05, "loss": 0.4548, "step": 87970 }, { "epoch": 21.777227722772277, "grad_norm": 0.2977586090564728, "learning_rate": 1.8236153876487994e-05, "loss": 0.4572, "step": 87980 }, { "epoch": 21.77970297029703, "grad_norm": 0.2945115268230438, "learning_rate": 1.8225513841200027e-05, "loss": 0.4529, "step": 87990 }, { "epoch": 21.782178217821784, "grad_norm": 0.2818536162376404, "learning_rate": 1.8214876218974607e-05, "loss": 0.4581, "step": 88000 }, { "epoch": 21.784653465346533, "grad_norm": 0.2931840121746063, "learning_rate": 1.820424101061956e-05, "loss": 0.4608, "step": 88010 }, { "epoch": 21.787128712871286, "grad_norm": 0.3016495406627655, "learning_rate": 1.8193608216942593e-05, "loss": 0.4567, "step": 88020 }, { "epoch": 21.78960396039604, "grad_norm": 0.3153245747089386, "learning_rate": 1.818297783875116e-05, "loss": 0.4483, "step": 88030 }, { "epoch": 21.792079207920793, "grad_norm": 0.299856960773468, "learning_rate": 1.8172349876852586e-05, "loss": 0.4613, "step": 88040 }, { "epoch": 21.794554455445546, "grad_norm": 0.2921539545059204, "learning_rate": 1.816172433205401e-05, "loss": 0.4589, "step": 88050 }, { "epoch": 21.797029702970296, "grad_norm": 0.3041907548904419, "learning_rate": 1.8151101205162357e-05, "loss": 0.4562, "step": 88060 }, { "epoch": 21.79950495049505, "grad_norm": 0.30313342809677124, "learning_rate": 1.8140480496984346e-05, "loss": 0.4568, "step": 88070 }, { "epoch": 21.801980198019802, "grad_norm": 0.3479236662387848, "learning_rate": 1.8129862208326625e-05, "loss": 0.4574, "step": 88080 }, { "epoch": 21.804455445544555, "grad_norm": 0.2851215898990631, "learning_rate": 1.8119246339995542e-05, "loss": 0.459, "step": 88090 }, { "epoch": 21.806930693069308, "grad_norm": 0.288215696811676, "learning_rate": 1.8108632892797296e-05, "loss": 0.4561, "step": 88100 }, { "epoch": 21.80940594059406, "grad_norm": 0.29879140853881836, "learning_rate": 1.8098021867537907e-05, "loss": 0.4589, "step": 88110 }, { "epoch": 21.81188118811881, "grad_norm": 0.3624948561191559, "learning_rate": 1.8087413265023244e-05, "loss": 0.4592, "step": 88120 }, { "epoch": 21.814356435643564, "grad_norm": 0.29049575328826904, "learning_rate": 1.807680708605894e-05, "loss": 0.4605, "step": 88130 }, { "epoch": 21.816831683168317, "grad_norm": 0.27938196063041687, "learning_rate": 1.8066203331450445e-05, "loss": 0.4529, "step": 88140 }, { "epoch": 21.81930693069307, "grad_norm": 0.28405654430389404, "learning_rate": 1.805560200200306e-05, "loss": 0.4546, "step": 88150 }, { "epoch": 21.821782178217823, "grad_norm": 0.29535090923309326, "learning_rate": 1.8045003098521908e-05, "loss": 0.4566, "step": 88160 }, { "epoch": 21.824257425742573, "grad_norm": 0.2842867374420166, "learning_rate": 1.8034406621811882e-05, "loss": 0.4608, "step": 88170 }, { "epoch": 21.826732673267326, "grad_norm": 0.31715428829193115, "learning_rate": 1.8023812572677683e-05, "loss": 0.4587, "step": 88180 }, { "epoch": 21.82920792079208, "grad_norm": 0.29793649911880493, "learning_rate": 1.8013220951923938e-05, "loss": 0.4524, "step": 88190 }, { "epoch": 21.831683168316832, "grad_norm": 0.2930491268634796, "learning_rate": 1.800263176035496e-05, "loss": 0.4544, "step": 88200 }, { "epoch": 21.834158415841586, "grad_norm": 0.3353261351585388, "learning_rate": 1.7992044998774922e-05, "loss": 0.4575, "step": 88210 }, { "epoch": 21.836633663366335, "grad_norm": 0.2771942913532257, "learning_rate": 1.7981460667987832e-05, "loss": 0.4502, "step": 88220 }, { "epoch": 21.83910891089109, "grad_norm": 0.29497143626213074, "learning_rate": 1.7970878768797516e-05, "loss": 0.4539, "step": 88230 }, { "epoch": 21.84158415841584, "grad_norm": 0.2890934944152832, "learning_rate": 1.7960299302007567e-05, "loss": 0.4552, "step": 88240 }, { "epoch": 21.844059405940595, "grad_norm": 0.2792344093322754, "learning_rate": 1.794972226842146e-05, "loss": 0.4474, "step": 88250 }, { "epoch": 21.846534653465348, "grad_norm": 0.3027302920818329, "learning_rate": 1.7939147668842417e-05, "loss": 0.4597, "step": 88260 }, { "epoch": 21.849009900990097, "grad_norm": 0.31833818554878235, "learning_rate": 1.7928575504073537e-05, "loss": 0.4586, "step": 88270 }, { "epoch": 21.85148514851485, "grad_norm": 0.2971346974372864, "learning_rate": 1.7918005774917677e-05, "loss": 0.4531, "step": 88280 }, { "epoch": 21.853960396039604, "grad_norm": 0.29482176899909973, "learning_rate": 1.7907438482177562e-05, "loss": 0.4552, "step": 88290 }, { "epoch": 21.856435643564357, "grad_norm": 0.27311238646507263, "learning_rate": 1.789687362665572e-05, "loss": 0.4551, "step": 88300 }, { "epoch": 21.85891089108911, "grad_norm": 0.30787956714630127, "learning_rate": 1.7886311209154443e-05, "loss": 0.4577, "step": 88310 }, { "epoch": 21.861386138613863, "grad_norm": 0.3122032582759857, "learning_rate": 1.787575123047592e-05, "loss": 0.4615, "step": 88320 }, { "epoch": 21.863861386138613, "grad_norm": 0.2925533652305603, "learning_rate": 1.7865193691422062e-05, "loss": 0.4584, "step": 88330 }, { "epoch": 21.866336633663366, "grad_norm": 0.29000386595726013, "learning_rate": 1.785463859279468e-05, "loss": 0.4511, "step": 88340 }, { "epoch": 21.86881188118812, "grad_norm": 0.2908448576927185, "learning_rate": 1.784408593539537e-05, "loss": 0.4541, "step": 88350 }, { "epoch": 21.871287128712872, "grad_norm": 0.29777854681015015, "learning_rate": 1.7833535720025524e-05, "loss": 0.4545, "step": 88360 }, { "epoch": 21.873762376237625, "grad_norm": 0.29574915766716003, "learning_rate": 1.782298794748634e-05, "loss": 0.4528, "step": 88370 }, { "epoch": 21.876237623762375, "grad_norm": 0.29086512327194214, "learning_rate": 1.7812442618578872e-05, "loss": 0.4568, "step": 88380 }, { "epoch": 21.878712871287128, "grad_norm": 0.2921440005302429, "learning_rate": 1.7801899734103988e-05, "loss": 0.4561, "step": 88390 }, { "epoch": 21.88118811881188, "grad_norm": 0.30901044607162476, "learning_rate": 1.7791359294862315e-05, "loss": 0.4589, "step": 88400 }, { "epoch": 21.883663366336634, "grad_norm": 0.2909184694290161, "learning_rate": 1.7780821301654337e-05, "loss": 0.458, "step": 88410 }, { "epoch": 21.886138613861387, "grad_norm": 0.30073487758636475, "learning_rate": 1.777028575528038e-05, "loss": 0.4544, "step": 88420 }, { "epoch": 21.888613861386137, "grad_norm": 0.3095621168613434, "learning_rate": 1.7759752656540517e-05, "loss": 0.4576, "step": 88430 }, { "epoch": 21.89108910891089, "grad_norm": 0.2939528524875641, "learning_rate": 1.7749222006234657e-05, "loss": 0.4561, "step": 88440 }, { "epoch": 21.893564356435643, "grad_norm": 0.322380393743515, "learning_rate": 1.7738693805162544e-05, "loss": 0.4599, "step": 88450 }, { "epoch": 21.896039603960396, "grad_norm": 0.3046919107437134, "learning_rate": 1.7728168054123752e-05, "loss": 0.4502, "step": 88460 }, { "epoch": 21.89851485148515, "grad_norm": 0.30949676036834717, "learning_rate": 1.7717644753917624e-05, "loss": 0.4566, "step": 88470 }, { "epoch": 21.900990099009903, "grad_norm": 0.30098217725753784, "learning_rate": 1.7707123905343293e-05, "loss": 0.4618, "step": 88480 }, { "epoch": 21.903465346534652, "grad_norm": 0.3356541395187378, "learning_rate": 1.769660550919982e-05, "loss": 0.4557, "step": 88490 }, { "epoch": 21.905940594059405, "grad_norm": 0.2859033942222595, "learning_rate": 1.7686089566285974e-05, "loss": 0.4557, "step": 88500 }, { "epoch": 21.90841584158416, "grad_norm": 0.3096464276313782, "learning_rate": 1.7675576077400358e-05, "loss": 0.4578, "step": 88510 }, { "epoch": 21.91089108910891, "grad_norm": 0.27836647629737854, "learning_rate": 1.7665065043341418e-05, "loss": 0.4617, "step": 88520 }, { "epoch": 21.913366336633665, "grad_norm": 0.30281275510787964, "learning_rate": 1.7654556464907408e-05, "loss": 0.4569, "step": 88530 }, { "epoch": 21.915841584158414, "grad_norm": 0.29210779070854187, "learning_rate": 1.764405034289638e-05, "loss": 0.4586, "step": 88540 }, { "epoch": 21.918316831683168, "grad_norm": 0.30267634987831116, "learning_rate": 1.763354667810617e-05, "loss": 0.4543, "step": 88550 }, { "epoch": 21.92079207920792, "grad_norm": 0.28116586804389954, "learning_rate": 1.7623045471334503e-05, "loss": 0.4583, "step": 88560 }, { "epoch": 21.923267326732674, "grad_norm": 0.28213125467300415, "learning_rate": 1.761254672337887e-05, "loss": 0.4506, "step": 88570 }, { "epoch": 21.925742574257427, "grad_norm": 0.2904404103755951, "learning_rate": 1.7602050435036564e-05, "loss": 0.4522, "step": 88580 }, { "epoch": 21.928217821782177, "grad_norm": 0.28293460607528687, "learning_rate": 1.7591556607104715e-05, "loss": 0.4562, "step": 88590 }, { "epoch": 21.93069306930693, "grad_norm": 0.2944314777851105, "learning_rate": 1.7581065240380284e-05, "loss": 0.4545, "step": 88600 }, { "epoch": 21.933168316831683, "grad_norm": 0.2941323220729828, "learning_rate": 1.757057633565999e-05, "loss": 0.4567, "step": 88610 }, { "epoch": 21.935643564356436, "grad_norm": 0.3165309727191925, "learning_rate": 1.7560089893740416e-05, "loss": 0.4658, "step": 88620 }, { "epoch": 21.93811881188119, "grad_norm": 0.29697689414024353, "learning_rate": 1.7549605915417918e-05, "loss": 0.4537, "step": 88630 }, { "epoch": 21.94059405940594, "grad_norm": 0.2757877707481384, "learning_rate": 1.753912440148871e-05, "loss": 0.4597, "step": 88640 }, { "epoch": 21.943069306930692, "grad_norm": 0.2952753007411957, "learning_rate": 1.752864535274876e-05, "loss": 0.4565, "step": 88650 }, { "epoch": 21.945544554455445, "grad_norm": 0.30282726883888245, "learning_rate": 1.7518168769993932e-05, "loss": 0.4592, "step": 88660 }, { "epoch": 21.948019801980198, "grad_norm": 0.2968950569629669, "learning_rate": 1.7507694654019796e-05, "loss": 0.4559, "step": 88670 }, { "epoch": 21.95049504950495, "grad_norm": 0.28705865144729614, "learning_rate": 1.7497223005621828e-05, "loss": 0.454, "step": 88680 }, { "epoch": 21.952970297029704, "grad_norm": 0.34885531663894653, "learning_rate": 1.7486753825595287e-05, "loss": 0.4528, "step": 88690 }, { "epoch": 21.955445544554454, "grad_norm": 0.29269322752952576, "learning_rate": 1.7476287114735205e-05, "loss": 0.458, "step": 88700 }, { "epoch": 21.957920792079207, "grad_norm": 0.28458231687545776, "learning_rate": 1.7465822873836487e-05, "loss": 0.4534, "step": 88710 }, { "epoch": 21.96039603960396, "grad_norm": 0.3017105460166931, "learning_rate": 1.7455361103693823e-05, "loss": 0.4591, "step": 88720 }, { "epoch": 21.962871287128714, "grad_norm": 0.3060215711593628, "learning_rate": 1.744490180510171e-05, "loss": 0.4537, "step": 88730 }, { "epoch": 21.965346534653467, "grad_norm": 0.2988560199737549, "learning_rate": 1.7434444978854443e-05, "loss": 0.4591, "step": 88740 }, { "epoch": 21.967821782178216, "grad_norm": 0.2980991005897522, "learning_rate": 1.7423990625746166e-05, "loss": 0.4592, "step": 88750 }, { "epoch": 21.97029702970297, "grad_norm": 0.29990696907043457, "learning_rate": 1.7413538746570834e-05, "loss": 0.4557, "step": 88760 }, { "epoch": 21.972772277227723, "grad_norm": 0.33794617652893066, "learning_rate": 1.7403089342122187e-05, "loss": 0.4546, "step": 88770 }, { "epoch": 21.975247524752476, "grad_norm": 0.3250364661216736, "learning_rate": 1.7392642413193742e-05, "loss": 0.4588, "step": 88780 }, { "epoch": 21.97772277227723, "grad_norm": 0.31385350227355957, "learning_rate": 1.7382197960578957e-05, "loss": 0.4536, "step": 88790 }, { "epoch": 21.980198019801982, "grad_norm": 0.2993066608905792, "learning_rate": 1.7371755985070975e-05, "loss": 0.4576, "step": 88800 }, { "epoch": 21.98267326732673, "grad_norm": 0.2861303687095642, "learning_rate": 1.7361316487462782e-05, "loss": 0.463, "step": 88810 }, { "epoch": 21.985148514851485, "grad_norm": 0.289351224899292, "learning_rate": 1.7350879468547204e-05, "loss": 0.4564, "step": 88820 }, { "epoch": 21.987623762376238, "grad_norm": 0.3117884397506714, "learning_rate": 1.7340444929116882e-05, "loss": 0.4585, "step": 88830 }, { "epoch": 21.99009900990099, "grad_norm": 0.2802339494228363, "learning_rate": 1.7330012869964234e-05, "loss": 0.4518, "step": 88840 }, { "epoch": 21.992574257425744, "grad_norm": 0.28880083560943604, "learning_rate": 1.7319583291881485e-05, "loss": 0.4528, "step": 88850 }, { "epoch": 21.995049504950494, "grad_norm": 0.33908963203430176, "learning_rate": 1.730915619566072e-05, "loss": 0.458, "step": 88860 }, { "epoch": 21.997524752475247, "grad_norm": 0.2985749840736389, "learning_rate": 1.7298731582093814e-05, "loss": 0.4576, "step": 88870 }, { "epoch": 22.0, "grad_norm": 0.31366005539894104, "learning_rate": 1.7288309451972416e-05, "loss": 0.4542, "step": 88880 }, { "epoch": 22.002475247524753, "grad_norm": 0.29634180665016174, "learning_rate": 1.7277889806088045e-05, "loss": 0.4578, "step": 88890 }, { "epoch": 22.004950495049506, "grad_norm": 0.3180277347564697, "learning_rate": 1.7267472645232003e-05, "loss": 0.4598, "step": 88900 }, { "epoch": 22.007425742574256, "grad_norm": 0.33301234245300293, "learning_rate": 1.7257057970195384e-05, "loss": 0.4538, "step": 88910 }, { "epoch": 22.00990099009901, "grad_norm": 0.30295896530151367, "learning_rate": 1.7246645781769145e-05, "loss": 0.4556, "step": 88920 }, { "epoch": 22.012376237623762, "grad_norm": 0.28270259499549866, "learning_rate": 1.723623608074399e-05, "loss": 0.45, "step": 88930 }, { "epoch": 22.014851485148515, "grad_norm": 0.29381436109542847, "learning_rate": 1.7225828867910497e-05, "loss": 0.4589, "step": 88940 }, { "epoch": 22.01732673267327, "grad_norm": 0.2703405022621155, "learning_rate": 1.7215424144058993e-05, "loss": 0.4578, "step": 88950 }, { "epoch": 22.019801980198018, "grad_norm": 0.31121447682380676, "learning_rate": 1.7205021909979692e-05, "loss": 0.4624, "step": 88960 }, { "epoch": 22.02227722772277, "grad_norm": 0.2939441204071045, "learning_rate": 1.719462216646252e-05, "loss": 0.4525, "step": 88970 }, { "epoch": 22.024752475247524, "grad_norm": 0.2607184052467346, "learning_rate": 1.7184224914297304e-05, "loss": 0.4535, "step": 88980 }, { "epoch": 22.027227722772277, "grad_norm": 0.29853448271751404, "learning_rate": 1.7173830154273658e-05, "loss": 0.453, "step": 88990 }, { "epoch": 22.02970297029703, "grad_norm": 0.3062483072280884, "learning_rate": 1.716343788718096e-05, "loss": 0.4581, "step": 89000 }, { "epoch": 22.032178217821784, "grad_norm": 0.2946467101573944, "learning_rate": 1.7153048113808466e-05, "loss": 0.4546, "step": 89010 }, { "epoch": 22.034653465346533, "grad_norm": 0.28607410192489624, "learning_rate": 1.7142660834945186e-05, "loss": 0.4567, "step": 89020 }, { "epoch": 22.037128712871286, "grad_norm": 0.2926862835884094, "learning_rate": 1.7132276051379985e-05, "loss": 0.4549, "step": 89030 }, { "epoch": 22.03960396039604, "grad_norm": 0.28597426414489746, "learning_rate": 1.71218937639015e-05, "loss": 0.4537, "step": 89040 }, { "epoch": 22.042079207920793, "grad_norm": 0.3050517439842224, "learning_rate": 1.71115139732982e-05, "loss": 0.4617, "step": 89050 }, { "epoch": 22.044554455445546, "grad_norm": 0.2941342294216156, "learning_rate": 1.710113668035839e-05, "loss": 0.4579, "step": 89060 }, { "epoch": 22.047029702970296, "grad_norm": 0.3026047348976135, "learning_rate": 1.7090761885870132e-05, "loss": 0.4578, "step": 89070 }, { "epoch": 22.04950495049505, "grad_norm": 0.299101322889328, "learning_rate": 1.70803895906213e-05, "loss": 0.4523, "step": 89080 }, { "epoch": 22.051980198019802, "grad_norm": 0.286354124546051, "learning_rate": 1.7070019795399655e-05, "loss": 0.4575, "step": 89090 }, { "epoch": 22.054455445544555, "grad_norm": 0.28686949610710144, "learning_rate": 1.705965250099269e-05, "loss": 0.4526, "step": 89100 }, { "epoch": 22.056930693069308, "grad_norm": 0.3309682607650757, "learning_rate": 1.7049287708187717e-05, "loss": 0.46, "step": 89110 }, { "epoch": 22.059405940594058, "grad_norm": 0.2918776273727417, "learning_rate": 1.7038925417771885e-05, "loss": 0.4575, "step": 89120 }, { "epoch": 22.06188118811881, "grad_norm": 0.28694310784339905, "learning_rate": 1.7028565630532155e-05, "loss": 0.4567, "step": 89130 }, { "epoch": 22.064356435643564, "grad_norm": 0.3071777820587158, "learning_rate": 1.7018208347255276e-05, "loss": 0.4546, "step": 89140 }, { "epoch": 22.066831683168317, "grad_norm": 0.3214167654514313, "learning_rate": 1.7007853568727796e-05, "loss": 0.4615, "step": 89150 }, { "epoch": 22.06930693069307, "grad_norm": 0.3077830672264099, "learning_rate": 1.6997501295736106e-05, "loss": 0.4542, "step": 89160 }, { "epoch": 22.071782178217823, "grad_norm": 0.2969280779361725, "learning_rate": 1.6987151529066413e-05, "loss": 0.4535, "step": 89170 }, { "epoch": 22.074257425742573, "grad_norm": 0.29208990931510925, "learning_rate": 1.697680426950468e-05, "loss": 0.4502, "step": 89180 }, { "epoch": 22.076732673267326, "grad_norm": 0.28953665494918823, "learning_rate": 1.6966459517836732e-05, "loss": 0.4613, "step": 89190 }, { "epoch": 22.07920792079208, "grad_norm": 0.2841518819332123, "learning_rate": 1.6956117274848193e-05, "loss": 0.4519, "step": 89200 }, { "epoch": 22.081683168316832, "grad_norm": 0.32337725162506104, "learning_rate": 1.6945777541324476e-05, "loss": 0.4538, "step": 89210 }, { "epoch": 22.084158415841586, "grad_norm": 0.2849646210670471, "learning_rate": 1.69354403180508e-05, "loss": 0.4524, "step": 89220 }, { "epoch": 22.086633663366335, "grad_norm": 0.2780199348926544, "learning_rate": 1.6925105605812223e-05, "loss": 0.4577, "step": 89230 }, { "epoch": 22.08910891089109, "grad_norm": 0.31256425380706787, "learning_rate": 1.691477340539362e-05, "loss": 0.4538, "step": 89240 }, { "epoch": 22.09158415841584, "grad_norm": 0.2936531901359558, "learning_rate": 1.6904443717579616e-05, "loss": 0.4573, "step": 89250 }, { "epoch": 22.094059405940595, "grad_norm": 0.3332618474960327, "learning_rate": 1.689411654315472e-05, "loss": 0.4559, "step": 89260 }, { "epoch": 22.096534653465348, "grad_norm": 0.30439651012420654, "learning_rate": 1.688379188290317e-05, "loss": 0.4536, "step": 89270 }, { "epoch": 22.099009900990097, "grad_norm": 0.27099519968032837, "learning_rate": 1.6873469737609083e-05, "loss": 0.4616, "step": 89280 }, { "epoch": 22.10148514851485, "grad_norm": 0.2843390703201294, "learning_rate": 1.6863150108056364e-05, "loss": 0.4591, "step": 89290 }, { "epoch": 22.103960396039604, "grad_norm": 0.27914831042289734, "learning_rate": 1.68528329950287e-05, "loss": 0.4545, "step": 89300 }, { "epoch": 22.106435643564357, "grad_norm": 0.30101820826530457, "learning_rate": 1.684251839930963e-05, "loss": 0.4511, "step": 89310 }, { "epoch": 22.10891089108911, "grad_norm": 0.3119588792324066, "learning_rate": 1.6832206321682454e-05, "loss": 0.4573, "step": 89320 }, { "epoch": 22.111386138613863, "grad_norm": 0.31450554728507996, "learning_rate": 1.6821896762930332e-05, "loss": 0.4506, "step": 89330 }, { "epoch": 22.113861386138613, "grad_norm": 0.2759707272052765, "learning_rate": 1.681158972383618e-05, "loss": 0.4634, "step": 89340 }, { "epoch": 22.116336633663366, "grad_norm": 0.3064236342906952, "learning_rate": 1.6801285205182764e-05, "loss": 0.4551, "step": 89350 }, { "epoch": 22.11881188118812, "grad_norm": 0.29270145297050476, "learning_rate": 1.6790983207752665e-05, "loss": 0.4586, "step": 89360 }, { "epoch": 22.121287128712872, "grad_norm": 0.2909778356552124, "learning_rate": 1.6780683732328224e-05, "loss": 0.4584, "step": 89370 }, { "epoch": 22.123762376237625, "grad_norm": 0.2885589599609375, "learning_rate": 1.6770386779691612e-05, "loss": 0.4611, "step": 89380 }, { "epoch": 22.126237623762375, "grad_norm": 0.28315654397010803, "learning_rate": 1.676009235062483e-05, "loss": 0.4528, "step": 89390 }, { "epoch": 22.128712871287128, "grad_norm": 0.3002675473690033, "learning_rate": 1.674980044590968e-05, "loss": 0.4538, "step": 89400 }, { "epoch": 22.13118811881188, "grad_norm": 0.28070127964019775, "learning_rate": 1.673951106632774e-05, "loss": 0.4491, "step": 89410 }, { "epoch": 22.133663366336634, "grad_norm": 0.29485273361206055, "learning_rate": 1.6729224212660437e-05, "loss": 0.4566, "step": 89420 }, { "epoch": 22.136138613861387, "grad_norm": 0.2998288869857788, "learning_rate": 1.6718939885689e-05, "loss": 0.4554, "step": 89430 }, { "epoch": 22.138613861386137, "grad_norm": 0.3148513436317444, "learning_rate": 1.6708658086194445e-05, "loss": 0.4567, "step": 89440 }, { "epoch": 22.14108910891089, "grad_norm": 0.27894988656044006, "learning_rate": 1.6698378814957587e-05, "loss": 0.454, "step": 89450 }, { "epoch": 22.143564356435643, "grad_norm": 0.2885909378528595, "learning_rate": 1.668810207275909e-05, "loss": 0.4518, "step": 89460 }, { "epoch": 22.146039603960396, "grad_norm": 0.29961907863616943, "learning_rate": 1.6677827860379414e-05, "loss": 0.458, "step": 89470 }, { "epoch": 22.14851485148515, "grad_norm": 0.3112356960773468, "learning_rate": 1.6667556178598804e-05, "loss": 0.461, "step": 89480 }, { "epoch": 22.150990099009903, "grad_norm": 0.2929479479789734, "learning_rate": 1.6657287028197295e-05, "loss": 0.4572, "step": 89490 }, { "epoch": 22.153465346534652, "grad_norm": 0.3112681806087494, "learning_rate": 1.664702040995483e-05, "loss": 0.4552, "step": 89500 }, { "epoch": 22.155940594059405, "grad_norm": 0.30182722210884094, "learning_rate": 1.6636756324651048e-05, "loss": 0.4573, "step": 89510 }, { "epoch": 22.15841584158416, "grad_norm": 0.28328800201416016, "learning_rate": 1.662649477306543e-05, "loss": 0.4532, "step": 89520 }, { "epoch": 22.16089108910891, "grad_norm": 0.32412487268447876, "learning_rate": 1.6616235755977293e-05, "loss": 0.4581, "step": 89530 }, { "epoch": 22.163366336633665, "grad_norm": 0.28270184993743896, "learning_rate": 1.660597927416575e-05, "loss": 0.4533, "step": 89540 }, { "epoch": 22.165841584158414, "grad_norm": 0.30987194180488586, "learning_rate": 1.6595725328409682e-05, "loss": 0.4571, "step": 89550 }, { "epoch": 22.168316831683168, "grad_norm": 0.2904319763183594, "learning_rate": 1.6585473919487842e-05, "loss": 0.4603, "step": 89560 }, { "epoch": 22.17079207920792, "grad_norm": 0.2863529622554779, "learning_rate": 1.6575225048178734e-05, "loss": 0.4535, "step": 89570 }, { "epoch": 22.173267326732674, "grad_norm": 0.29805615544319153, "learning_rate": 1.6564978715260704e-05, "loss": 0.4567, "step": 89580 }, { "epoch": 22.175742574257427, "grad_norm": 0.2684760093688965, "learning_rate": 1.655473492151188e-05, "loss": 0.4607, "step": 89590 }, { "epoch": 22.178217821782177, "grad_norm": 0.272915780544281, "learning_rate": 1.6544493667710216e-05, "loss": 0.4534, "step": 89600 }, { "epoch": 22.18069306930693, "grad_norm": 0.29539960622787476, "learning_rate": 1.6534254954633483e-05, "loss": 0.4561, "step": 89610 }, { "epoch": 22.183168316831683, "grad_norm": 0.3114522397518158, "learning_rate": 1.652401878305922e-05, "loss": 0.4519, "step": 89620 }, { "epoch": 22.185643564356436, "grad_norm": 0.2926904261112213, "learning_rate": 1.651378515376483e-05, "loss": 0.4641, "step": 89630 }, { "epoch": 22.18811881188119, "grad_norm": 0.29256612062454224, "learning_rate": 1.6503554067527438e-05, "loss": 0.4584, "step": 89640 }, { "epoch": 22.190594059405942, "grad_norm": 0.3062370717525482, "learning_rate": 1.6493325525124064e-05, "loss": 0.4521, "step": 89650 }, { "epoch": 22.193069306930692, "grad_norm": 0.3002052903175354, "learning_rate": 1.6483099527331507e-05, "loss": 0.4564, "step": 89660 }, { "epoch": 22.195544554455445, "grad_norm": 0.287408709526062, "learning_rate": 1.6472876074926353e-05, "loss": 0.4568, "step": 89670 }, { "epoch": 22.198019801980198, "grad_norm": 0.29100432991981506, "learning_rate": 1.6462655168684983e-05, "loss": 0.4576, "step": 89680 }, { "epoch": 22.20049504950495, "grad_norm": 0.31872084736824036, "learning_rate": 1.6452436809383624e-05, "loss": 0.4544, "step": 89690 }, { "epoch": 22.202970297029704, "grad_norm": 0.30819058418273926, "learning_rate": 1.6442220997798315e-05, "loss": 0.4591, "step": 89700 }, { "epoch": 22.205445544554454, "grad_norm": 0.2784774899482727, "learning_rate": 1.6432007734704835e-05, "loss": 0.4541, "step": 89710 }, { "epoch": 22.207920792079207, "grad_norm": 0.2933376431465149, "learning_rate": 1.642179702087884e-05, "loss": 0.4537, "step": 89720 }, { "epoch": 22.21039603960396, "grad_norm": 0.3064402937889099, "learning_rate": 1.6411588857095788e-05, "loss": 0.4517, "step": 89730 }, { "epoch": 22.212871287128714, "grad_norm": 0.3280593454837799, "learning_rate": 1.6401383244130893e-05, "loss": 0.4503, "step": 89740 }, { "epoch": 22.215346534653467, "grad_norm": 0.2943814992904663, "learning_rate": 1.639118018275919e-05, "loss": 0.4571, "step": 89750 }, { "epoch": 22.217821782178216, "grad_norm": 0.33427801728248596, "learning_rate": 1.638097967375556e-05, "loss": 0.4573, "step": 89760 }, { "epoch": 22.22029702970297, "grad_norm": 0.2951095700263977, "learning_rate": 1.6370781717894667e-05, "loss": 0.4545, "step": 89770 }, { "epoch": 22.222772277227723, "grad_norm": 0.32060137391090393, "learning_rate": 1.6360586315950976e-05, "loss": 0.4528, "step": 89780 }, { "epoch": 22.225247524752476, "grad_norm": 0.353079229593277, "learning_rate": 1.6350393468698715e-05, "loss": 0.4534, "step": 89790 }, { "epoch": 22.22772277227723, "grad_norm": 0.3158054053783417, "learning_rate": 1.6340203176912038e-05, "loss": 0.4597, "step": 89800 }, { "epoch": 22.230198019801982, "grad_norm": 0.28183215856552124, "learning_rate": 1.633001544136479e-05, "loss": 0.4528, "step": 89810 }, { "epoch": 22.23267326732673, "grad_norm": 0.2972293794155121, "learning_rate": 1.6319830262830655e-05, "loss": 0.4525, "step": 89820 }, { "epoch": 22.235148514851485, "grad_norm": 0.3145465850830078, "learning_rate": 1.6309647642083143e-05, "loss": 0.4555, "step": 89830 }, { "epoch": 22.237623762376238, "grad_norm": 0.2979932427406311, "learning_rate": 1.6299467579895566e-05, "loss": 0.4574, "step": 89840 }, { "epoch": 22.24009900990099, "grad_norm": 0.297413170337677, "learning_rate": 1.6289290077041024e-05, "loss": 0.4588, "step": 89850 }, { "epoch": 22.242574257425744, "grad_norm": 0.30567416548728943, "learning_rate": 1.6279115134292418e-05, "loss": 0.4524, "step": 89860 }, { "epoch": 22.245049504950494, "grad_norm": 0.318991482257843, "learning_rate": 1.626894275242247e-05, "loss": 0.4571, "step": 89870 }, { "epoch": 22.247524752475247, "grad_norm": 0.29469534754753113, "learning_rate": 1.625877293220374e-05, "loss": 0.4604, "step": 89880 }, { "epoch": 22.25, "grad_norm": 0.3070632219314575, "learning_rate": 1.6248605674408513e-05, "loss": 0.4582, "step": 89890 }, { "epoch": 22.252475247524753, "grad_norm": 0.2735156714916229, "learning_rate": 1.6238440979808943e-05, "loss": 0.4539, "step": 89900 }, { "epoch": 22.254950495049506, "grad_norm": 0.2994857728481293, "learning_rate": 1.6228278849176993e-05, "loss": 0.4617, "step": 89910 }, { "epoch": 22.257425742574256, "grad_norm": 0.27988913655281067, "learning_rate": 1.621811928328437e-05, "loss": 0.4624, "step": 89920 }, { "epoch": 22.25990099009901, "grad_norm": 0.315143883228302, "learning_rate": 1.6207962282902666e-05, "loss": 0.4594, "step": 89930 }, { "epoch": 22.262376237623762, "grad_norm": 0.29916495084762573, "learning_rate": 1.6197807848803204e-05, "loss": 0.4494, "step": 89940 }, { "epoch": 22.264851485148515, "grad_norm": 0.30664247274398804, "learning_rate": 1.6187655981757177e-05, "loss": 0.4583, "step": 89950 }, { "epoch": 22.26732673267327, "grad_norm": 0.3023900091648102, "learning_rate": 1.6177506682535515e-05, "loss": 0.4563, "step": 89960 }, { "epoch": 22.269801980198018, "grad_norm": 0.27796489000320435, "learning_rate": 1.6167359951909035e-05, "loss": 0.4521, "step": 89970 }, { "epoch": 22.27227722772277, "grad_norm": 0.2869395613670349, "learning_rate": 1.6157215790648267e-05, "loss": 0.449, "step": 89980 }, { "epoch": 22.274752475247524, "grad_norm": 0.3544795513153076, "learning_rate": 1.614707419952362e-05, "loss": 0.4569, "step": 89990 }, { "epoch": 22.277227722772277, "grad_norm": 0.33452966809272766, "learning_rate": 1.6136935179305295e-05, "loss": 0.4554, "step": 90000 }, { "epoch": 22.27970297029703, "grad_norm": 0.2645222842693329, "learning_rate": 1.612679873076325e-05, "loss": 0.4528, "step": 90010 }, { "epoch": 22.282178217821784, "grad_norm": 0.28547096252441406, "learning_rate": 1.6116664854667295e-05, "loss": 0.4602, "step": 90020 }, { "epoch": 22.284653465346533, "grad_norm": 0.27737465500831604, "learning_rate": 1.610653355178705e-05, "loss": 0.4575, "step": 90030 }, { "epoch": 22.287128712871286, "grad_norm": 0.27198687195777893, "learning_rate": 1.609640482289191e-05, "loss": 0.4526, "step": 90040 }, { "epoch": 22.28960396039604, "grad_norm": 0.29397112131118774, "learning_rate": 1.6086278668751054e-05, "loss": 0.4597, "step": 90050 }, { "epoch": 22.292079207920793, "grad_norm": 0.26081717014312744, "learning_rate": 1.6076155090133525e-05, "loss": 0.4627, "step": 90060 }, { "epoch": 22.294554455445546, "grad_norm": 0.2815098166465759, "learning_rate": 1.6066034087808163e-05, "loss": 0.4517, "step": 90070 }, { "epoch": 22.297029702970296, "grad_norm": 0.2687840461730957, "learning_rate": 1.6055915662543558e-05, "loss": 0.4595, "step": 90080 }, { "epoch": 22.29950495049505, "grad_norm": 0.28422990441322327, "learning_rate": 1.604579981510812e-05, "loss": 0.4558, "step": 90090 }, { "epoch": 22.301980198019802, "grad_norm": 0.30101293325424194, "learning_rate": 1.603568654627014e-05, "loss": 0.4551, "step": 90100 }, { "epoch": 22.304455445544555, "grad_norm": 0.299277126789093, "learning_rate": 1.602557585679762e-05, "loss": 0.4566, "step": 90110 }, { "epoch": 22.306930693069308, "grad_norm": 0.30030888319015503, "learning_rate": 1.6015467747458386e-05, "loss": 0.453, "step": 90120 }, { "epoch": 22.309405940594058, "grad_norm": 0.298689067363739, "learning_rate": 1.6005362219020098e-05, "loss": 0.4589, "step": 90130 }, { "epoch": 22.31188118811881, "grad_norm": 0.2922825813293457, "learning_rate": 1.5995259272250217e-05, "loss": 0.4571, "step": 90140 }, { "epoch": 22.314356435643564, "grad_norm": 0.3330461382865906, "learning_rate": 1.5985158907915994e-05, "loss": 0.4569, "step": 90150 }, { "epoch": 22.316831683168317, "grad_norm": 0.3083480894565582, "learning_rate": 1.5975061126784446e-05, "loss": 0.457, "step": 90160 }, { "epoch": 22.31930693069307, "grad_norm": 0.3045145571231842, "learning_rate": 1.5964965929622466e-05, "loss": 0.4541, "step": 90170 }, { "epoch": 22.321782178217823, "grad_norm": 0.29008620977401733, "learning_rate": 1.5954873317196723e-05, "loss": 0.4558, "step": 90180 }, { "epoch": 22.324257425742573, "grad_norm": 0.31080353260040283, "learning_rate": 1.594478329027366e-05, "loss": 0.4569, "step": 90190 }, { "epoch": 22.326732673267326, "grad_norm": 0.3176664412021637, "learning_rate": 1.5934695849619564e-05, "loss": 0.4551, "step": 90200 }, { "epoch": 22.32920792079208, "grad_norm": 0.31855177879333496, "learning_rate": 1.592461099600052e-05, "loss": 0.4557, "step": 90210 }, { "epoch": 22.331683168316832, "grad_norm": 0.26518017053604126, "learning_rate": 1.5914528730182392e-05, "loss": 0.4574, "step": 90220 }, { "epoch": 22.334158415841586, "grad_norm": 0.27677103877067566, "learning_rate": 1.590444905293085e-05, "loss": 0.4586, "step": 90230 }, { "epoch": 22.336633663366335, "grad_norm": 0.2961769104003906, "learning_rate": 1.589437196501139e-05, "loss": 0.4565, "step": 90240 }, { "epoch": 22.33910891089109, "grad_norm": 0.27106770873069763, "learning_rate": 1.5884297467189314e-05, "loss": 0.4556, "step": 90250 }, { "epoch": 22.34158415841584, "grad_norm": 0.30269163846969604, "learning_rate": 1.5874225560229688e-05, "loss": 0.4578, "step": 90260 }, { "epoch": 22.344059405940595, "grad_norm": 0.2853265106678009, "learning_rate": 1.5864156244897438e-05, "loss": 0.4575, "step": 90270 }, { "epoch": 22.346534653465348, "grad_norm": 0.28232625126838684, "learning_rate": 1.5854089521957228e-05, "loss": 0.4584, "step": 90280 }, { "epoch": 22.349009900990097, "grad_norm": 0.2846035957336426, "learning_rate": 1.5844025392173572e-05, "loss": 0.4536, "step": 90290 }, { "epoch": 22.35148514851485, "grad_norm": 0.2788126468658447, "learning_rate": 1.5833963856310797e-05, "loss": 0.4548, "step": 90300 }, { "epoch": 22.353960396039604, "grad_norm": 0.3127968907356262, "learning_rate": 1.582390491513297e-05, "loss": 0.4519, "step": 90310 }, { "epoch": 22.356435643564357, "grad_norm": 0.2973272204399109, "learning_rate": 1.5813848569404044e-05, "loss": 0.4568, "step": 90320 }, { "epoch": 22.35891089108911, "grad_norm": 0.29070430994033813, "learning_rate": 1.580379481988769e-05, "loss": 0.4549, "step": 90330 }, { "epoch": 22.361386138613863, "grad_norm": 0.2914305031299591, "learning_rate": 1.579374366734746e-05, "loss": 0.451, "step": 90340 }, { "epoch": 22.363861386138613, "grad_norm": 0.2614460289478302, "learning_rate": 1.5783695112546642e-05, "loss": 0.4498, "step": 90350 }, { "epoch": 22.366336633663366, "grad_norm": 0.29224878549575806, "learning_rate": 1.5773649156248378e-05, "loss": 0.4562, "step": 90360 }, { "epoch": 22.36881188118812, "grad_norm": 0.2943849563598633, "learning_rate": 1.5763605799215592e-05, "loss": 0.4592, "step": 90370 }, { "epoch": 22.371287128712872, "grad_norm": 0.28831231594085693, "learning_rate": 1.575356504221101e-05, "loss": 0.4533, "step": 90380 }, { "epoch": 22.373762376237625, "grad_norm": 0.3304946720600128, "learning_rate": 1.5743526885997127e-05, "loss": 0.4551, "step": 90390 }, { "epoch": 22.376237623762375, "grad_norm": 0.31051182746887207, "learning_rate": 1.573349133133633e-05, "loss": 0.4583, "step": 90400 }, { "epoch": 22.378712871287128, "grad_norm": 0.2748316824436188, "learning_rate": 1.5723458378990735e-05, "loss": 0.4533, "step": 90410 }, { "epoch": 22.38118811881188, "grad_norm": 0.28801631927490234, "learning_rate": 1.5713428029722248e-05, "loss": 0.4503, "step": 90420 }, { "epoch": 22.383663366336634, "grad_norm": 0.3125493824481964, "learning_rate": 1.5703400284292636e-05, "loss": 0.4599, "step": 90430 }, { "epoch": 22.386138613861387, "grad_norm": 0.2939353883266449, "learning_rate": 1.5693375143463447e-05, "loss": 0.4515, "step": 90440 }, { "epoch": 22.388613861386137, "grad_norm": 0.2786017954349518, "learning_rate": 1.568335260799601e-05, "loss": 0.4545, "step": 90450 }, { "epoch": 22.39108910891089, "grad_norm": 0.2772767245769501, "learning_rate": 1.567333267865146e-05, "loss": 0.4536, "step": 90460 }, { "epoch": 22.393564356435643, "grad_norm": 0.2826213836669922, "learning_rate": 1.5663315356190748e-05, "loss": 0.4525, "step": 90470 }, { "epoch": 22.396039603960396, "grad_norm": 0.26824378967285156, "learning_rate": 1.5653300641374657e-05, "loss": 0.4578, "step": 90480 }, { "epoch": 22.39851485148515, "grad_norm": 0.2953321039676666, "learning_rate": 1.5643288534963696e-05, "loss": 0.4553, "step": 90490 }, { "epoch": 22.400990099009903, "grad_norm": 0.29612356424331665, "learning_rate": 1.563327903771824e-05, "loss": 0.4564, "step": 90500 }, { "epoch": 22.403465346534652, "grad_norm": 0.3013019859790802, "learning_rate": 1.562327215039845e-05, "loss": 0.4549, "step": 90510 }, { "epoch": 22.405940594059405, "grad_norm": 0.3018209636211395, "learning_rate": 1.5613267873764275e-05, "loss": 0.4566, "step": 90520 }, { "epoch": 22.40841584158416, "grad_norm": 0.3124528229236603, "learning_rate": 1.5603266208575464e-05, "loss": 0.4574, "step": 90530 }, { "epoch": 22.41089108910891, "grad_norm": 0.2960216701030731, "learning_rate": 1.5593267155591572e-05, "loss": 0.4591, "step": 90540 }, { "epoch": 22.413366336633665, "grad_norm": 0.2887892425060272, "learning_rate": 1.5583270715572002e-05, "loss": 0.4574, "step": 90550 }, { "epoch": 22.415841584158414, "grad_norm": 0.28839296102523804, "learning_rate": 1.5573276889275873e-05, "loss": 0.4515, "step": 90560 }, { "epoch": 22.418316831683168, "grad_norm": 0.28862881660461426, "learning_rate": 1.5563285677462182e-05, "loss": 0.4608, "step": 90570 }, { "epoch": 22.42079207920792, "grad_norm": 0.3011038303375244, "learning_rate": 1.5553297080889662e-05, "loss": 0.4538, "step": 90580 }, { "epoch": 22.423267326732674, "grad_norm": 0.28768521547317505, "learning_rate": 1.554331110031692e-05, "loss": 0.458, "step": 90590 }, { "epoch": 22.425742574257427, "grad_norm": 0.28265252709388733, "learning_rate": 1.5533327736502286e-05, "loss": 0.4565, "step": 90600 }, { "epoch": 22.428217821782177, "grad_norm": 0.28808829188346863, "learning_rate": 1.552334699020395e-05, "loss": 0.4537, "step": 90610 }, { "epoch": 22.43069306930693, "grad_norm": 0.31968873739242554, "learning_rate": 1.5513368862179904e-05, "loss": 0.455, "step": 90620 }, { "epoch": 22.433168316831683, "grad_norm": 0.3013305962085724, "learning_rate": 1.5503393353187883e-05, "loss": 0.4558, "step": 90630 }, { "epoch": 22.435643564356436, "grad_norm": 0.2762969732284546, "learning_rate": 1.5493420463985497e-05, "loss": 0.4564, "step": 90640 }, { "epoch": 22.43811881188119, "grad_norm": 0.29283228516578674, "learning_rate": 1.5483450195330085e-05, "loss": 0.4583, "step": 90650 }, { "epoch": 22.440594059405942, "grad_norm": 0.293859601020813, "learning_rate": 1.5473482547978845e-05, "loss": 0.4534, "step": 90660 }, { "epoch": 22.443069306930692, "grad_norm": 0.3213336765766144, "learning_rate": 1.5463517522688763e-05, "loss": 0.4563, "step": 90670 }, { "epoch": 22.445544554455445, "grad_norm": 0.35042592883110046, "learning_rate": 1.5453555120216602e-05, "loss": 0.4568, "step": 90680 }, { "epoch": 22.448019801980198, "grad_norm": 0.28067636489868164, "learning_rate": 1.5443595341318928e-05, "loss": 0.4581, "step": 90690 }, { "epoch": 22.45049504950495, "grad_norm": 0.28794917464256287, "learning_rate": 1.5433638186752135e-05, "loss": 0.4585, "step": 90700 }, { "epoch": 22.452970297029704, "grad_norm": 0.29421061277389526, "learning_rate": 1.5423683657272427e-05, "loss": 0.4525, "step": 90710 }, { "epoch": 22.455445544554454, "grad_norm": 0.27351197600364685, "learning_rate": 1.541373175363574e-05, "loss": 0.4537, "step": 90720 }, { "epoch": 22.457920792079207, "grad_norm": 0.28810790181159973, "learning_rate": 1.540378247659787e-05, "loss": 0.4578, "step": 90730 }, { "epoch": 22.46039603960396, "grad_norm": 0.274728924036026, "learning_rate": 1.539383582691443e-05, "loss": 0.4549, "step": 90740 }, { "epoch": 22.462871287128714, "grad_norm": 0.2924592196941376, "learning_rate": 1.538389180534078e-05, "loss": 0.4524, "step": 90750 }, { "epoch": 22.465346534653467, "grad_norm": 0.2767695188522339, "learning_rate": 1.5373950412632082e-05, "loss": 0.4521, "step": 90760 }, { "epoch": 22.467821782178216, "grad_norm": 0.2536010146141052, "learning_rate": 1.5364011649543337e-05, "loss": 0.4579, "step": 90770 }, { "epoch": 22.47029702970297, "grad_norm": 0.2761642038822174, "learning_rate": 1.5354075516829348e-05, "loss": 0.4603, "step": 90780 }, { "epoch": 22.472772277227723, "grad_norm": 0.2815895974636078, "learning_rate": 1.5344142015244678e-05, "loss": 0.453, "step": 90790 }, { "epoch": 22.475247524752476, "grad_norm": 0.26119089126586914, "learning_rate": 1.5334211145543682e-05, "loss": 0.4618, "step": 90800 }, { "epoch": 22.47772277227723, "grad_norm": 0.27741050720214844, "learning_rate": 1.5324282908480602e-05, "loss": 0.4583, "step": 90810 }, { "epoch": 22.480198019801982, "grad_norm": 0.36477312445640564, "learning_rate": 1.53143573048094e-05, "loss": 0.4569, "step": 90820 }, { "epoch": 22.48267326732673, "grad_norm": 0.3239222764968872, "learning_rate": 1.530443433528384e-05, "loss": 0.4575, "step": 90830 }, { "epoch": 22.485148514851485, "grad_norm": 0.28034016489982605, "learning_rate": 1.5294514000657524e-05, "loss": 0.4559, "step": 90840 }, { "epoch": 22.487623762376238, "grad_norm": 0.2826177775859833, "learning_rate": 1.5284596301683847e-05, "loss": 0.4543, "step": 90850 }, { "epoch": 22.49009900990099, "grad_norm": 0.321482390165329, "learning_rate": 1.5274681239115957e-05, "loss": 0.4587, "step": 90860 }, { "epoch": 22.492574257425744, "grad_norm": 0.3096616268157959, "learning_rate": 1.5264768813706882e-05, "loss": 0.4583, "step": 90870 }, { "epoch": 22.495049504950494, "grad_norm": 0.27780625224113464, "learning_rate": 1.5254859026209368e-05, "loss": 0.4548, "step": 90880 }, { "epoch": 22.497524752475247, "grad_norm": 0.2748188376426697, "learning_rate": 1.5244951877376024e-05, "loss": 0.4522, "step": 90890 }, { "epoch": 22.5, "grad_norm": 0.29153192043304443, "learning_rate": 1.5235047367959209e-05, "loss": 0.4524, "step": 90900 }, { "epoch": 22.502475247524753, "grad_norm": 0.2963409721851349, "learning_rate": 1.5225145498711118e-05, "loss": 0.4558, "step": 90910 }, { "epoch": 22.504950495049506, "grad_norm": 0.2984151542186737, "learning_rate": 1.5215246270383749e-05, "loss": 0.4586, "step": 90920 }, { "epoch": 22.507425742574256, "grad_norm": 0.2897805869579315, "learning_rate": 1.5205349683728848e-05, "loss": 0.4568, "step": 90930 }, { "epoch": 22.50990099009901, "grad_norm": 0.28967660665512085, "learning_rate": 1.5195455739498033e-05, "loss": 0.4551, "step": 90940 }, { "epoch": 22.512376237623762, "grad_norm": 0.3077452480792999, "learning_rate": 1.5185564438442645e-05, "loss": 0.4562, "step": 90950 }, { "epoch": 22.514851485148515, "grad_norm": 0.3207610547542572, "learning_rate": 1.5175675781313886e-05, "loss": 0.4603, "step": 90960 }, { "epoch": 22.51732673267327, "grad_norm": 0.2810305953025818, "learning_rate": 1.5165789768862743e-05, "loss": 0.4572, "step": 90970 }, { "epoch": 22.519801980198018, "grad_norm": 0.2859381437301636, "learning_rate": 1.515590640183998e-05, "loss": 0.4557, "step": 90980 }, { "epoch": 22.52227722772277, "grad_norm": 0.2841491997241974, "learning_rate": 1.5146025680996162e-05, "loss": 0.461, "step": 90990 }, { "epoch": 22.524752475247524, "grad_norm": 0.28487545251846313, "learning_rate": 1.513614760708168e-05, "loss": 0.4561, "step": 91000 }, { "epoch": 22.527227722772277, "grad_norm": 0.3059730529785156, "learning_rate": 1.5126272180846718e-05, "loss": 0.4575, "step": 91010 }, { "epoch": 22.52970297029703, "grad_norm": 0.3057188391685486, "learning_rate": 1.5116399403041221e-05, "loss": 0.4539, "step": 91020 }, { "epoch": 22.532178217821784, "grad_norm": 0.29198747873306274, "learning_rate": 1.5106529274414983e-05, "loss": 0.4607, "step": 91030 }, { "epoch": 22.534653465346533, "grad_norm": 0.3159915804862976, "learning_rate": 1.5096661795717581e-05, "loss": 0.4557, "step": 91040 }, { "epoch": 22.537128712871286, "grad_norm": 0.30231931805610657, "learning_rate": 1.5086796967698374e-05, "loss": 0.4609, "step": 91050 }, { "epoch": 22.53960396039604, "grad_norm": 0.2909601628780365, "learning_rate": 1.5076934791106512e-05, "loss": 0.4547, "step": 91060 }, { "epoch": 22.542079207920793, "grad_norm": 0.2788833975791931, "learning_rate": 1.5067075266690983e-05, "loss": 0.4595, "step": 91070 }, { "epoch": 22.544554455445546, "grad_norm": 0.2789647579193115, "learning_rate": 1.5057218395200567e-05, "loss": 0.457, "step": 91080 }, { "epoch": 22.547029702970296, "grad_norm": 0.2927896976470947, "learning_rate": 1.5047364177383811e-05, "loss": 0.4576, "step": 91090 }, { "epoch": 22.54950495049505, "grad_norm": 0.33278337121009827, "learning_rate": 1.5037512613989046e-05, "loss": 0.4547, "step": 91100 }, { "epoch": 22.551980198019802, "grad_norm": 0.2733975648880005, "learning_rate": 1.50276637057645e-05, "loss": 0.4585, "step": 91110 }, { "epoch": 22.554455445544555, "grad_norm": 0.29183652997016907, "learning_rate": 1.5017817453458094e-05, "loss": 0.4579, "step": 91120 }, { "epoch": 22.556930693069308, "grad_norm": 0.28387191891670227, "learning_rate": 1.5007973857817576e-05, "loss": 0.452, "step": 91130 }, { "epoch": 22.55940594059406, "grad_norm": 0.28690052032470703, "learning_rate": 1.4998132919590518e-05, "loss": 0.4536, "step": 91140 }, { "epoch": 22.56188118811881, "grad_norm": 0.2813664376735687, "learning_rate": 1.4988294639524287e-05, "loss": 0.4577, "step": 91150 }, { "epoch": 22.564356435643564, "grad_norm": 0.2938475012779236, "learning_rate": 1.497845901836602e-05, "loss": 0.4613, "step": 91160 }, { "epoch": 22.566831683168317, "grad_norm": 0.2934369742870331, "learning_rate": 1.496862605686265e-05, "loss": 0.4564, "step": 91170 }, { "epoch": 22.56930693069307, "grad_norm": 0.27708137035369873, "learning_rate": 1.4958795755760952e-05, "loss": 0.4595, "step": 91180 }, { "epoch": 22.571782178217823, "grad_norm": 0.29358768463134766, "learning_rate": 1.494896811580747e-05, "loss": 0.4484, "step": 91190 }, { "epoch": 22.574257425742573, "grad_norm": 0.30600690841674805, "learning_rate": 1.4939143137748535e-05, "loss": 0.4576, "step": 91200 }, { "epoch": 22.576732673267326, "grad_norm": 0.26628586649894714, "learning_rate": 1.4929320822330294e-05, "loss": 0.4541, "step": 91210 }, { "epoch": 22.57920792079208, "grad_norm": 0.27127835154533386, "learning_rate": 1.4919501170298711e-05, "loss": 0.4543, "step": 91220 }, { "epoch": 22.581683168316832, "grad_norm": 0.277919739484787, "learning_rate": 1.490968418239948e-05, "loss": 0.4531, "step": 91230 }, { "epoch": 22.584158415841586, "grad_norm": 0.28274863958358765, "learning_rate": 1.489986985937818e-05, "loss": 0.4565, "step": 91240 }, { "epoch": 22.586633663366335, "grad_norm": 0.29646536707878113, "learning_rate": 1.4890058201980105e-05, "loss": 0.4522, "step": 91250 }, { "epoch": 22.58910891089109, "grad_norm": 0.28865760564804077, "learning_rate": 1.4880249210950419e-05, "loss": 0.4569, "step": 91260 }, { "epoch": 22.59158415841584, "grad_norm": 0.2722833752632141, "learning_rate": 1.4870442887034025e-05, "loss": 0.4583, "step": 91270 }, { "epoch": 22.594059405940595, "grad_norm": 0.2897284924983978, "learning_rate": 1.4860639230975671e-05, "loss": 0.455, "step": 91280 }, { "epoch": 22.596534653465348, "grad_norm": 0.31671494245529175, "learning_rate": 1.4850838243519849e-05, "loss": 0.4536, "step": 91290 }, { "epoch": 22.599009900990097, "grad_norm": 0.27748724818229675, "learning_rate": 1.4841039925410904e-05, "loss": 0.4543, "step": 91300 }, { "epoch": 22.60148514851485, "grad_norm": 0.33132264018058777, "learning_rate": 1.4831244277392958e-05, "loss": 0.4527, "step": 91310 }, { "epoch": 22.603960396039604, "grad_norm": 0.31505492329597473, "learning_rate": 1.4821451300209904e-05, "loss": 0.4534, "step": 91320 }, { "epoch": 22.606435643564357, "grad_norm": 0.28965073823928833, "learning_rate": 1.4811660994605465e-05, "loss": 0.4532, "step": 91330 }, { "epoch": 22.60891089108911, "grad_norm": 0.278996080160141, "learning_rate": 1.4801873361323165e-05, "loss": 0.4538, "step": 91340 }, { "epoch": 22.611386138613863, "grad_norm": 0.28477489948272705, "learning_rate": 1.4792088401106297e-05, "loss": 0.4596, "step": 91350 }, { "epoch": 22.613861386138613, "grad_norm": 0.28029876947402954, "learning_rate": 1.4782306114697947e-05, "loss": 0.4582, "step": 91360 }, { "epoch": 22.616336633663366, "grad_norm": 0.31631365418434143, "learning_rate": 1.4772526502841027e-05, "loss": 0.4575, "step": 91370 }, { "epoch": 22.61881188118812, "grad_norm": 0.3138810396194458, "learning_rate": 1.4762749566278262e-05, "loss": 0.456, "step": 91380 }, { "epoch": 22.621287128712872, "grad_norm": 0.27788832783699036, "learning_rate": 1.4752975305752115e-05, "loss": 0.4542, "step": 91390 }, { "epoch": 22.623762376237625, "grad_norm": 0.28710949420928955, "learning_rate": 1.4743203722004856e-05, "loss": 0.4583, "step": 91400 }, { "epoch": 22.626237623762375, "grad_norm": 0.2745669484138489, "learning_rate": 1.4733434815778624e-05, "loss": 0.4562, "step": 91410 }, { "epoch": 22.628712871287128, "grad_norm": 0.28018656373023987, "learning_rate": 1.4723668587815287e-05, "loss": 0.4547, "step": 91420 }, { "epoch": 22.63118811881188, "grad_norm": 0.28634926676750183, "learning_rate": 1.4713905038856497e-05, "loss": 0.4524, "step": 91430 }, { "epoch": 22.633663366336634, "grad_norm": 0.28870925307273865, "learning_rate": 1.4704144169643753e-05, "loss": 0.455, "step": 91440 }, { "epoch": 22.636138613861387, "grad_norm": 0.2955121695995331, "learning_rate": 1.4694385980918341e-05, "loss": 0.4543, "step": 91450 }, { "epoch": 22.638613861386137, "grad_norm": 0.2959040701389313, "learning_rate": 1.4684630473421317e-05, "loss": 0.4566, "step": 91460 }, { "epoch": 22.64108910891089, "grad_norm": 0.2667786777019501, "learning_rate": 1.467487764789352e-05, "loss": 0.4582, "step": 91470 }, { "epoch": 22.643564356435643, "grad_norm": 0.2586413025856018, "learning_rate": 1.466512750507567e-05, "loss": 0.4549, "step": 91480 }, { "epoch": 22.646039603960396, "grad_norm": 0.3172668516635895, "learning_rate": 1.4655380045708194e-05, "loss": 0.4567, "step": 91490 }, { "epoch": 22.64851485148515, "grad_norm": 0.2657909095287323, "learning_rate": 1.4645635270531332e-05, "loss": 0.4518, "step": 91500 }, { "epoch": 22.650990099009903, "grad_norm": 0.32700085639953613, "learning_rate": 1.463589318028516e-05, "loss": 0.4543, "step": 91510 }, { "epoch": 22.653465346534652, "grad_norm": 0.28135228157043457, "learning_rate": 1.4626153775709528e-05, "loss": 0.4547, "step": 91520 }, { "epoch": 22.655940594059405, "grad_norm": 0.26959964632987976, "learning_rate": 1.4616417057544069e-05, "loss": 0.4604, "step": 91530 }, { "epoch": 22.65841584158416, "grad_norm": 0.28587159514427185, "learning_rate": 1.4606683026528207e-05, "loss": 0.4571, "step": 91540 }, { "epoch": 22.66089108910891, "grad_norm": 0.32055097818374634, "learning_rate": 1.45969516834012e-05, "loss": 0.4527, "step": 91550 }, { "epoch": 22.663366336633665, "grad_norm": 0.31116029620170593, "learning_rate": 1.458722302890208e-05, "loss": 0.4606, "step": 91560 }, { "epoch": 22.665841584158414, "grad_norm": 0.27422404289245605, "learning_rate": 1.4577497063769657e-05, "loss": 0.4535, "step": 91570 }, { "epoch": 22.668316831683168, "grad_norm": 0.2762835919857025, "learning_rate": 1.4567773788742561e-05, "loss": 0.4564, "step": 91580 }, { "epoch": 22.67079207920792, "grad_norm": 0.27530360221862793, "learning_rate": 1.455805320455923e-05, "loss": 0.4568, "step": 91590 }, { "epoch": 22.673267326732674, "grad_norm": 0.2729698717594147, "learning_rate": 1.454833531195785e-05, "loss": 0.4588, "step": 91600 }, { "epoch": 22.675742574257427, "grad_norm": 0.2834424674510956, "learning_rate": 1.4538620111676454e-05, "loss": 0.4594, "step": 91610 }, { "epoch": 22.678217821782177, "grad_norm": 0.2700245976448059, "learning_rate": 1.4528907604452818e-05, "loss": 0.4573, "step": 91620 }, { "epoch": 22.68069306930693, "grad_norm": 0.29646745324134827, "learning_rate": 1.4519197791024581e-05, "loss": 0.4582, "step": 91630 }, { "epoch": 22.683168316831683, "grad_norm": 0.2779526114463806, "learning_rate": 1.45094906721291e-05, "loss": 0.4528, "step": 91640 }, { "epoch": 22.685643564356436, "grad_norm": 0.28947165608406067, "learning_rate": 1.44997862485036e-05, "loss": 0.4538, "step": 91650 }, { "epoch": 22.68811881188119, "grad_norm": 0.27092108130455017, "learning_rate": 1.4490084520885044e-05, "loss": 0.453, "step": 91660 }, { "epoch": 22.69059405940594, "grad_norm": 0.26339229941368103, "learning_rate": 1.4480385490010217e-05, "loss": 0.457, "step": 91670 }, { "epoch": 22.693069306930692, "grad_norm": 0.2885223925113678, "learning_rate": 1.4470689156615725e-05, "loss": 0.4559, "step": 91680 }, { "epoch": 22.695544554455445, "grad_norm": 0.31536585092544556, "learning_rate": 1.4460995521437898e-05, "loss": 0.4572, "step": 91690 }, { "epoch": 22.698019801980198, "grad_norm": 0.2776119112968445, "learning_rate": 1.4451304585212921e-05, "loss": 0.459, "step": 91700 }, { "epoch": 22.70049504950495, "grad_norm": 0.2723505198955536, "learning_rate": 1.4441616348676779e-05, "loss": 0.4583, "step": 91710 }, { "epoch": 22.702970297029704, "grad_norm": 0.2815389633178711, "learning_rate": 1.4431930812565213e-05, "loss": 0.4538, "step": 91720 }, { "epoch": 22.705445544554454, "grad_norm": 0.2863255441188812, "learning_rate": 1.4422247977613756e-05, "loss": 0.452, "step": 91730 }, { "epoch": 22.707920792079207, "grad_norm": 0.2750052809715271, "learning_rate": 1.4412567844557773e-05, "loss": 0.4589, "step": 91740 }, { "epoch": 22.71039603960396, "grad_norm": 0.2787324786186218, "learning_rate": 1.4402890414132419e-05, "loss": 0.4542, "step": 91750 }, { "epoch": 22.712871287128714, "grad_norm": 0.28458845615386963, "learning_rate": 1.4393215687072619e-05, "loss": 0.4543, "step": 91760 }, { "epoch": 22.715346534653467, "grad_norm": 0.2938506007194519, "learning_rate": 1.438354366411307e-05, "loss": 0.4605, "step": 91770 }, { "epoch": 22.717821782178216, "grad_norm": 0.27739638090133667, "learning_rate": 1.4373874345988365e-05, "loss": 0.4577, "step": 91780 }, { "epoch": 22.72029702970297, "grad_norm": 0.288512647151947, "learning_rate": 1.4364207733432788e-05, "loss": 0.4559, "step": 91790 }, { "epoch": 22.722772277227723, "grad_norm": 0.2952694892883301, "learning_rate": 1.4354543827180444e-05, "loss": 0.4547, "step": 91800 }, { "epoch": 22.725247524752476, "grad_norm": 0.27767065167427063, "learning_rate": 1.4344882627965261e-05, "loss": 0.4488, "step": 91810 }, { "epoch": 22.72772277227723, "grad_norm": 0.28807660937309265, "learning_rate": 1.4335224136520952e-05, "loss": 0.4624, "step": 91820 }, { "epoch": 22.730198019801982, "grad_norm": 0.2997969388961792, "learning_rate": 1.4325568353580998e-05, "loss": 0.4598, "step": 91830 }, { "epoch": 22.73267326732673, "grad_norm": 0.3032352924346924, "learning_rate": 1.4315915279878689e-05, "loss": 0.4556, "step": 91840 }, { "epoch": 22.735148514851485, "grad_norm": 0.2786375880241394, "learning_rate": 1.4306264916147116e-05, "loss": 0.4582, "step": 91850 }, { "epoch": 22.737623762376238, "grad_norm": 0.29703766107559204, "learning_rate": 1.4296617263119177e-05, "loss": 0.4583, "step": 91860 }, { "epoch": 22.74009900990099, "grad_norm": 0.2896001636981964, "learning_rate": 1.4286972321527526e-05, "loss": 0.4587, "step": 91870 }, { "epoch": 22.742574257425744, "grad_norm": 0.3058946430683136, "learning_rate": 1.427733009210464e-05, "loss": 0.4551, "step": 91880 }, { "epoch": 22.745049504950494, "grad_norm": 0.28017187118530273, "learning_rate": 1.4267690575582798e-05, "loss": 0.4549, "step": 91890 }, { "epoch": 22.747524752475247, "grad_norm": 0.3404656946659088, "learning_rate": 1.425805377269404e-05, "loss": 0.4523, "step": 91900 }, { "epoch": 22.75, "grad_norm": 0.29061615467071533, "learning_rate": 1.4248419684170217e-05, "loss": 0.4541, "step": 91910 }, { "epoch": 22.752475247524753, "grad_norm": 0.3475334644317627, "learning_rate": 1.4238788310742968e-05, "loss": 0.4616, "step": 91920 }, { "epoch": 22.754950495049506, "grad_norm": 0.3007899820804596, "learning_rate": 1.4229159653143764e-05, "loss": 0.4576, "step": 91930 }, { "epoch": 22.757425742574256, "grad_norm": 0.2752327024936676, "learning_rate": 1.4219533712103806e-05, "loss": 0.4557, "step": 91940 }, { "epoch": 22.75990099009901, "grad_norm": 0.2779841125011444, "learning_rate": 1.4209910488354139e-05, "loss": 0.4624, "step": 91950 }, { "epoch": 22.762376237623762, "grad_norm": 0.2837803363800049, "learning_rate": 1.4200289982625565e-05, "loss": 0.4539, "step": 91960 }, { "epoch": 22.764851485148515, "grad_norm": 0.27118930220603943, "learning_rate": 1.4190672195648708e-05, "loss": 0.448, "step": 91970 }, { "epoch": 22.76732673267327, "grad_norm": 0.2891641855239868, "learning_rate": 1.4181057128153996e-05, "loss": 0.4551, "step": 91980 }, { "epoch": 22.769801980198018, "grad_norm": 0.3074309229850769, "learning_rate": 1.417144478087159e-05, "loss": 0.4609, "step": 91990 }, { "epoch": 22.77227722772277, "grad_norm": 0.2938230037689209, "learning_rate": 1.4161835154531528e-05, "loss": 0.4549, "step": 92000 }, { "epoch": 22.774752475247524, "grad_norm": 0.2782328724861145, "learning_rate": 1.4152228249863554e-05, "loss": 0.4624, "step": 92010 }, { "epoch": 22.777227722772277, "grad_norm": 0.303148090839386, "learning_rate": 1.4142624067597283e-05, "loss": 0.4535, "step": 92020 }, { "epoch": 22.77970297029703, "grad_norm": 0.26946592330932617, "learning_rate": 1.4133022608462066e-05, "loss": 0.4571, "step": 92030 }, { "epoch": 22.782178217821784, "grad_norm": 0.28725066781044006, "learning_rate": 1.4123423873187076e-05, "loss": 0.4552, "step": 92040 }, { "epoch": 22.784653465346533, "grad_norm": 0.29427996277809143, "learning_rate": 1.4113827862501294e-05, "loss": 0.4538, "step": 92050 }, { "epoch": 22.787128712871286, "grad_norm": 0.2870928943157196, "learning_rate": 1.4104234577133457e-05, "loss": 0.4576, "step": 92060 }, { "epoch": 22.78960396039604, "grad_norm": 0.2689356207847595, "learning_rate": 1.4094644017812086e-05, "loss": 0.4575, "step": 92070 }, { "epoch": 22.792079207920793, "grad_norm": 0.2929190397262573, "learning_rate": 1.4085056185265572e-05, "loss": 0.4548, "step": 92080 }, { "epoch": 22.794554455445546, "grad_norm": 0.273712158203125, "learning_rate": 1.407547108022203e-05, "loss": 0.4509, "step": 92090 }, { "epoch": 22.797029702970296, "grad_norm": 0.27621614933013916, "learning_rate": 1.4065888703409357e-05, "loss": 0.453, "step": 92100 }, { "epoch": 22.79950495049505, "grad_norm": 0.3100840151309967, "learning_rate": 1.4056309055555294e-05, "loss": 0.4645, "step": 92110 }, { "epoch": 22.801980198019802, "grad_norm": 0.26787108182907104, "learning_rate": 1.4046732137387363e-05, "loss": 0.4616, "step": 92120 }, { "epoch": 22.804455445544555, "grad_norm": 0.2711201310157776, "learning_rate": 1.4037157949632861e-05, "loss": 0.4527, "step": 92130 }, { "epoch": 22.806930693069308, "grad_norm": 0.2926148772239685, "learning_rate": 1.4027586493018862e-05, "loss": 0.4586, "step": 92140 }, { "epoch": 22.80940594059406, "grad_norm": 0.2730569541454315, "learning_rate": 1.4018017768272268e-05, "loss": 0.4534, "step": 92150 }, { "epoch": 22.81188118811881, "grad_norm": 0.28374817967414856, "learning_rate": 1.400845177611978e-05, "loss": 0.4486, "step": 92160 }, { "epoch": 22.814356435643564, "grad_norm": 0.2651922106742859, "learning_rate": 1.3998888517287845e-05, "loss": 0.4611, "step": 92170 }, { "epoch": 22.816831683168317, "grad_norm": 0.2637994885444641, "learning_rate": 1.3989327992502737e-05, "loss": 0.4552, "step": 92180 }, { "epoch": 22.81930693069307, "grad_norm": 0.2784987986087799, "learning_rate": 1.3979770202490533e-05, "loss": 0.4577, "step": 92190 }, { "epoch": 22.821782178217823, "grad_norm": 0.2785949409008026, "learning_rate": 1.3970215147977072e-05, "loss": 0.4501, "step": 92200 }, { "epoch": 22.824257425742573, "grad_norm": 0.27097994089126587, "learning_rate": 1.3960662829687977e-05, "loss": 0.4591, "step": 92210 }, { "epoch": 22.826732673267326, "grad_norm": 0.2679927349090576, "learning_rate": 1.3951113248348701e-05, "loss": 0.4587, "step": 92220 }, { "epoch": 22.82920792079208, "grad_norm": 0.29725298285484314, "learning_rate": 1.3941566404684493e-05, "loss": 0.4506, "step": 92230 }, { "epoch": 22.831683168316832, "grad_norm": 0.2620542049407959, "learning_rate": 1.3932022299420328e-05, "loss": 0.4511, "step": 92240 }, { "epoch": 22.834158415841586, "grad_norm": 0.3131125569343567, "learning_rate": 1.3922480933281062e-05, "loss": 0.4542, "step": 92250 }, { "epoch": 22.836633663366335, "grad_norm": 0.27855363488197327, "learning_rate": 1.391294230699126e-05, "loss": 0.4538, "step": 92260 }, { "epoch": 22.83910891089109, "grad_norm": 0.2841041684150696, "learning_rate": 1.390340642127535e-05, "loss": 0.4574, "step": 92270 }, { "epoch": 22.84158415841584, "grad_norm": 0.2826424241065979, "learning_rate": 1.3893873276857495e-05, "loss": 0.4577, "step": 92280 }, { "epoch": 22.844059405940595, "grad_norm": 0.2893073260784149, "learning_rate": 1.3884342874461682e-05, "loss": 0.4683, "step": 92290 }, { "epoch": 22.846534653465348, "grad_norm": 0.28969261050224304, "learning_rate": 1.3874815214811698e-05, "loss": 0.4521, "step": 92300 }, { "epoch": 22.849009900990097, "grad_norm": 0.3064957559108734, "learning_rate": 1.3865290298631085e-05, "loss": 0.4599, "step": 92310 }, { "epoch": 22.85148514851485, "grad_norm": 0.2802755832672119, "learning_rate": 1.3855768126643214e-05, "loss": 0.4543, "step": 92320 }, { "epoch": 22.853960396039604, "grad_norm": 0.25513172149658203, "learning_rate": 1.3846248699571212e-05, "loss": 0.4561, "step": 92330 }, { "epoch": 22.856435643564357, "grad_norm": 0.27123621106147766, "learning_rate": 1.3836732018138021e-05, "loss": 0.4553, "step": 92340 }, { "epoch": 22.85891089108911, "grad_norm": 0.2734934389591217, "learning_rate": 1.3827218083066396e-05, "loss": 0.461, "step": 92350 }, { "epoch": 22.861386138613863, "grad_norm": 0.2818988561630249, "learning_rate": 1.3817706895078841e-05, "loss": 0.4539, "step": 92360 }, { "epoch": 22.863861386138613, "grad_norm": 0.25541621446609497, "learning_rate": 1.3808198454897642e-05, "loss": 0.4533, "step": 92370 }, { "epoch": 22.866336633663366, "grad_norm": 0.31693655252456665, "learning_rate": 1.379869276324493e-05, "loss": 0.451, "step": 92380 }, { "epoch": 22.86881188118812, "grad_norm": 0.28493475914001465, "learning_rate": 1.3789189820842612e-05, "loss": 0.4532, "step": 92390 }, { "epoch": 22.871287128712872, "grad_norm": 0.2597072124481201, "learning_rate": 1.377968962841234e-05, "loss": 0.4522, "step": 92400 }, { "epoch": 22.873762376237625, "grad_norm": 0.28482869267463684, "learning_rate": 1.3770192186675606e-05, "loss": 0.4551, "step": 92410 }, { "epoch": 22.876237623762375, "grad_norm": 0.26448988914489746, "learning_rate": 1.3760697496353691e-05, "loss": 0.4612, "step": 92420 }, { "epoch": 22.878712871287128, "grad_norm": 0.31263992190361023, "learning_rate": 1.3751205558167651e-05, "loss": 0.4545, "step": 92430 }, { "epoch": 22.88118811881188, "grad_norm": 0.27898654341697693, "learning_rate": 1.3741716372838304e-05, "loss": 0.4531, "step": 92440 }, { "epoch": 22.883663366336634, "grad_norm": 0.27495846152305603, "learning_rate": 1.3732229941086322e-05, "loss": 0.4578, "step": 92450 }, { "epoch": 22.886138613861387, "grad_norm": 0.3052781820297241, "learning_rate": 1.3722746263632141e-05, "loss": 0.4513, "step": 92460 }, { "epoch": 22.888613861386137, "grad_norm": 0.275034099817276, "learning_rate": 1.3713265341195975e-05, "loss": 0.4545, "step": 92470 }, { "epoch": 22.89108910891089, "grad_norm": 0.26704853773117065, "learning_rate": 1.3703787174497802e-05, "loss": 0.4561, "step": 92480 }, { "epoch": 22.893564356435643, "grad_norm": 0.2689867913722992, "learning_rate": 1.3694311764257483e-05, "loss": 0.4606, "step": 92490 }, { "epoch": 22.896039603960396, "grad_norm": 0.2887977361679077, "learning_rate": 1.3684839111194597e-05, "loss": 0.4515, "step": 92500 }, { "epoch": 22.89851485148515, "grad_norm": 0.2883581519126892, "learning_rate": 1.3675369216028506e-05, "loss": 0.4585, "step": 92510 }, { "epoch": 22.900990099009903, "grad_norm": 0.268919974565506, "learning_rate": 1.3665902079478399e-05, "loss": 0.4549, "step": 92520 }, { "epoch": 22.903465346534652, "grad_norm": 0.2707456648349762, "learning_rate": 1.3656437702263258e-05, "loss": 0.4586, "step": 92530 }, { "epoch": 22.905940594059405, "grad_norm": 0.27755993604660034, "learning_rate": 1.3646976085101815e-05, "loss": 0.4581, "step": 92540 }, { "epoch": 22.90841584158416, "grad_norm": 0.3188958764076233, "learning_rate": 1.3637517228712642e-05, "loss": 0.4464, "step": 92550 }, { "epoch": 22.91089108910891, "grad_norm": 0.276246041059494, "learning_rate": 1.3628061133814052e-05, "loss": 0.4542, "step": 92560 }, { "epoch": 22.913366336633665, "grad_norm": 0.28031131625175476, "learning_rate": 1.36186078011242e-05, "loss": 0.4553, "step": 92570 }, { "epoch": 22.915841584158414, "grad_norm": 0.2845943570137024, "learning_rate": 1.3609157231360975e-05, "loss": 0.4593, "step": 92580 }, { "epoch": 22.918316831683168, "grad_norm": 0.29377639293670654, "learning_rate": 1.3599709425242096e-05, "loss": 0.4555, "step": 92590 }, { "epoch": 22.92079207920792, "grad_norm": 0.27312538027763367, "learning_rate": 1.3590264383485079e-05, "loss": 0.4581, "step": 92600 }, { "epoch": 22.923267326732674, "grad_norm": 0.2817402184009552, "learning_rate": 1.358082210680719e-05, "loss": 0.4527, "step": 92610 }, { "epoch": 22.925742574257427, "grad_norm": 0.2847234308719635, "learning_rate": 1.3571382595925525e-05, "loss": 0.4532, "step": 92620 }, { "epoch": 22.928217821782177, "grad_norm": 0.28738418221473694, "learning_rate": 1.3561945851556934e-05, "loss": 0.4599, "step": 92630 }, { "epoch": 22.93069306930693, "grad_norm": 0.2803628742694855, "learning_rate": 1.3552511874418095e-05, "loss": 0.4548, "step": 92640 }, { "epoch": 22.933168316831683, "grad_norm": 0.2844915986061096, "learning_rate": 1.3543080665225437e-05, "loss": 0.4508, "step": 92650 }, { "epoch": 22.935643564356436, "grad_norm": 0.30465033650398254, "learning_rate": 1.3533652224695221e-05, "loss": 0.4602, "step": 92660 }, { "epoch": 22.93811881188119, "grad_norm": 0.30287379026412964, "learning_rate": 1.3524226553543445e-05, "loss": 0.4571, "step": 92670 }, { "epoch": 22.94059405940594, "grad_norm": 0.2765454053878784, "learning_rate": 1.3514803652485942e-05, "loss": 0.4539, "step": 92680 }, { "epoch": 22.943069306930692, "grad_norm": 0.2885938882827759, "learning_rate": 1.3505383522238334e-05, "loss": 0.4579, "step": 92690 }, { "epoch": 22.945544554455445, "grad_norm": 0.31125906109809875, "learning_rate": 1.3495966163515993e-05, "loss": 0.4576, "step": 92700 }, { "epoch": 22.948019801980198, "grad_norm": 0.29051318764686584, "learning_rate": 1.3486551577034117e-05, "loss": 0.4571, "step": 92710 }, { "epoch": 22.95049504950495, "grad_norm": 0.26723071932792664, "learning_rate": 1.3477139763507696e-05, "loss": 0.4556, "step": 92720 }, { "epoch": 22.952970297029704, "grad_norm": 0.2769916355609894, "learning_rate": 1.3467730723651479e-05, "loss": 0.4512, "step": 92730 }, { "epoch": 22.955445544554454, "grad_norm": 0.2812923491001129, "learning_rate": 1.3458324458180005e-05, "loss": 0.4585, "step": 92740 }, { "epoch": 22.957920792079207, "grad_norm": 0.2784523367881775, "learning_rate": 1.3448920967807632e-05, "loss": 0.457, "step": 92750 }, { "epoch": 22.96039603960396, "grad_norm": 0.26995253562927246, "learning_rate": 1.3439520253248516e-05, "loss": 0.4625, "step": 92760 }, { "epoch": 22.962871287128714, "grad_norm": 0.3073599636554718, "learning_rate": 1.343012231521656e-05, "loss": 0.4542, "step": 92770 }, { "epoch": 22.965346534653467, "grad_norm": 0.27923280000686646, "learning_rate": 1.3420727154425439e-05, "loss": 0.4564, "step": 92780 }, { "epoch": 22.967821782178216, "grad_norm": 0.2736437916755676, "learning_rate": 1.3411334771588718e-05, "loss": 0.4578, "step": 92790 }, { "epoch": 22.97029702970297, "grad_norm": 0.29077228903770447, "learning_rate": 1.3401945167419654e-05, "loss": 0.4595, "step": 92800 }, { "epoch": 22.972772277227723, "grad_norm": 0.27793827652931213, "learning_rate": 1.3392558342631323e-05, "loss": 0.4538, "step": 92810 }, { "epoch": 22.975247524752476, "grad_norm": 0.2599557340145111, "learning_rate": 1.338317429793659e-05, "loss": 0.455, "step": 92820 }, { "epoch": 22.97772277227723, "grad_norm": 0.3013426661491394, "learning_rate": 1.3373793034048137e-05, "loss": 0.453, "step": 92830 }, { "epoch": 22.980198019801982, "grad_norm": 0.28750914335250854, "learning_rate": 1.336441455167839e-05, "loss": 0.4517, "step": 92840 }, { "epoch": 22.98267326732673, "grad_norm": 0.2641003429889679, "learning_rate": 1.335503885153957e-05, "loss": 0.4561, "step": 92850 }, { "epoch": 22.985148514851485, "grad_norm": 0.29188570380210876, "learning_rate": 1.3345665934343715e-05, "loss": 0.4521, "step": 92860 }, { "epoch": 22.987623762376238, "grad_norm": 0.29967954754829407, "learning_rate": 1.3336295800802646e-05, "loss": 0.4522, "step": 92870 }, { "epoch": 22.99009900990099, "grad_norm": 0.2822265625, "learning_rate": 1.332692845162794e-05, "loss": 0.4584, "step": 92880 }, { "epoch": 22.992574257425744, "grad_norm": 0.31248101592063904, "learning_rate": 1.3317563887530998e-05, "loss": 0.4605, "step": 92890 }, { "epoch": 22.995049504950494, "grad_norm": 0.274570494890213, "learning_rate": 1.3308202109223006e-05, "loss": 0.4546, "step": 92900 }, { "epoch": 22.997524752475247, "grad_norm": 0.27764251828193665, "learning_rate": 1.3298843117414906e-05, "loss": 0.4601, "step": 92910 }, { "epoch": 23.0, "grad_norm": 0.2740662693977356, "learning_rate": 1.3289486912817479e-05, "loss": 0.4506, "step": 92920 }, { "epoch": 23.002475247524753, "grad_norm": 0.29922494292259216, "learning_rate": 1.328013349614124e-05, "loss": 0.4534, "step": 92930 }, { "epoch": 23.004950495049506, "grad_norm": 0.2765539884567261, "learning_rate": 1.3270782868096543e-05, "loss": 0.453, "step": 92940 }, { "epoch": 23.007425742574256, "grad_norm": 0.3158343434333801, "learning_rate": 1.3261435029393482e-05, "loss": 0.4581, "step": 92950 }, { "epoch": 23.00990099009901, "grad_norm": 0.2876065671443939, "learning_rate": 1.3252089980741993e-05, "loss": 0.4489, "step": 92960 }, { "epoch": 23.012376237623762, "grad_norm": 0.26830101013183594, "learning_rate": 1.3242747722851745e-05, "loss": 0.4597, "step": 92970 }, { "epoch": 23.014851485148515, "grad_norm": 0.3342415988445282, "learning_rate": 1.3233408256432223e-05, "loss": 0.46, "step": 92980 }, { "epoch": 23.01732673267327, "grad_norm": 0.27177363634109497, "learning_rate": 1.3224071582192721e-05, "loss": 0.4541, "step": 92990 }, { "epoch": 23.019801980198018, "grad_norm": 0.2681385576725006, "learning_rate": 1.3214737700842272e-05, "loss": 0.4568, "step": 93000 }, { "epoch": 23.02227722772277, "grad_norm": 0.3008459806442261, "learning_rate": 1.3205406613089744e-05, "loss": 0.4534, "step": 93010 }, { "epoch": 23.024752475247524, "grad_norm": 0.2793739438056946, "learning_rate": 1.3196078319643745e-05, "loss": 0.4607, "step": 93020 }, { "epoch": 23.027227722772277, "grad_norm": 0.31118983030319214, "learning_rate": 1.3186752821212733e-05, "loss": 0.4542, "step": 93030 }, { "epoch": 23.02970297029703, "grad_norm": 0.2761774957180023, "learning_rate": 1.317743011850488e-05, "loss": 0.4545, "step": 93040 }, { "epoch": 23.032178217821784, "grad_norm": 0.27503490447998047, "learning_rate": 1.3168110212228202e-05, "loss": 0.4572, "step": 93050 }, { "epoch": 23.034653465346533, "grad_norm": 0.28273117542266846, "learning_rate": 1.31587931030905e-05, "loss": 0.459, "step": 93060 }, { "epoch": 23.037128712871286, "grad_norm": 0.30444812774658203, "learning_rate": 1.3149478791799325e-05, "loss": 0.4541, "step": 93070 }, { "epoch": 23.03960396039604, "grad_norm": 0.29093262553215027, "learning_rate": 1.3140167279062021e-05, "loss": 0.4544, "step": 93080 }, { "epoch": 23.042079207920793, "grad_norm": 0.2576877176761627, "learning_rate": 1.3130858565585785e-05, "loss": 0.4541, "step": 93090 }, { "epoch": 23.044554455445546, "grad_norm": 0.2840493321418762, "learning_rate": 1.3121552652077523e-05, "loss": 0.4553, "step": 93100 }, { "epoch": 23.047029702970296, "grad_norm": 0.2705690264701843, "learning_rate": 1.3112249539243948e-05, "loss": 0.4556, "step": 93110 }, { "epoch": 23.04950495049505, "grad_norm": 0.29980945587158203, "learning_rate": 1.3102949227791588e-05, "loss": 0.4597, "step": 93120 }, { "epoch": 23.051980198019802, "grad_norm": 0.26997488737106323, "learning_rate": 1.3093651718426741e-05, "loss": 0.4579, "step": 93130 }, { "epoch": 23.054455445544555, "grad_norm": 0.2800385355949402, "learning_rate": 1.3084357011855492e-05, "loss": 0.4573, "step": 93140 }, { "epoch": 23.056930693069308, "grad_norm": 0.28534719347953796, "learning_rate": 1.3075065108783685e-05, "loss": 0.4546, "step": 93150 }, { "epoch": 23.059405940594058, "grad_norm": 0.28426551818847656, "learning_rate": 1.3065776009917008e-05, "loss": 0.4549, "step": 93160 }, { "epoch": 23.06188118811881, "grad_norm": 0.27529677748680115, "learning_rate": 1.3056489715960907e-05, "loss": 0.4531, "step": 93170 }, { "epoch": 23.064356435643564, "grad_norm": 0.30324098467826843, "learning_rate": 1.304720622762059e-05, "loss": 0.457, "step": 93180 }, { "epoch": 23.066831683168317, "grad_norm": 0.28698450326919556, "learning_rate": 1.3037925545601099e-05, "loss": 0.4562, "step": 93190 }, { "epoch": 23.06930693069307, "grad_norm": 0.2970496714115143, "learning_rate": 1.3028647670607241e-05, "loss": 0.4588, "step": 93200 }, { "epoch": 23.071782178217823, "grad_norm": 0.2558928430080414, "learning_rate": 1.301937260334361e-05, "loss": 0.457, "step": 93210 }, { "epoch": 23.074257425742573, "grad_norm": 0.28065282106399536, "learning_rate": 1.3010100344514559e-05, "loss": 0.4573, "step": 93220 }, { "epoch": 23.076732673267326, "grad_norm": 0.28969234228134155, "learning_rate": 1.3000830894824279e-05, "loss": 0.4539, "step": 93230 }, { "epoch": 23.07920792079208, "grad_norm": 0.2994759678840637, "learning_rate": 1.2991564254976728e-05, "loss": 0.4598, "step": 93240 }, { "epoch": 23.081683168316832, "grad_norm": 0.2849537432193756, "learning_rate": 1.2982300425675631e-05, "loss": 0.451, "step": 93250 }, { "epoch": 23.084158415841586, "grad_norm": 0.27828431129455566, "learning_rate": 1.2973039407624532e-05, "loss": 0.4608, "step": 93260 }, { "epoch": 23.086633663366335, "grad_norm": 0.2710697054862976, "learning_rate": 1.2963781201526715e-05, "loss": 0.4527, "step": 93270 }, { "epoch": 23.08910891089109, "grad_norm": 0.275319904088974, "learning_rate": 1.29545258080853e-05, "loss": 0.4616, "step": 93280 }, { "epoch": 23.09158415841584, "grad_norm": 0.2763826549053192, "learning_rate": 1.2945273228003191e-05, "loss": 0.4493, "step": 93290 }, { "epoch": 23.094059405940595, "grad_norm": 0.28691548109054565, "learning_rate": 1.293602346198302e-05, "loss": 0.4521, "step": 93300 }, { "epoch": 23.096534653465348, "grad_norm": 0.27013665437698364, "learning_rate": 1.2926776510727278e-05, "loss": 0.4588, "step": 93310 }, { "epoch": 23.099009900990097, "grad_norm": 0.2736288607120514, "learning_rate": 1.2917532374938191e-05, "loss": 0.4556, "step": 93320 }, { "epoch": 23.10148514851485, "grad_norm": 0.27806729078292847, "learning_rate": 1.2908291055317806e-05, "loss": 0.4544, "step": 93330 }, { "epoch": 23.103960396039604, "grad_norm": 0.27393561601638794, "learning_rate": 1.289905255256792e-05, "loss": 0.4547, "step": 93340 }, { "epoch": 23.106435643564357, "grad_norm": 0.2879808247089386, "learning_rate": 1.2889816867390148e-05, "loss": 0.4598, "step": 93350 }, { "epoch": 23.10891089108911, "grad_norm": 0.26963427662849426, "learning_rate": 1.288058400048589e-05, "loss": 0.4569, "step": 93360 }, { "epoch": 23.111386138613863, "grad_norm": 0.28993675112724304, "learning_rate": 1.2871353952556315e-05, "loss": 0.4489, "step": 93370 }, { "epoch": 23.113861386138613, "grad_norm": 0.2944217920303345, "learning_rate": 1.2862126724302348e-05, "loss": 0.4549, "step": 93380 }, { "epoch": 23.116336633663366, "grad_norm": 0.2650025188922882, "learning_rate": 1.2852902316424798e-05, "loss": 0.4532, "step": 93390 }, { "epoch": 23.11881188118812, "grad_norm": 0.26218971610069275, "learning_rate": 1.2843680729624163e-05, "loss": 0.4544, "step": 93400 }, { "epoch": 23.121287128712872, "grad_norm": 0.25865697860717773, "learning_rate": 1.2834461964600758e-05, "loss": 0.4544, "step": 93410 }, { "epoch": 23.123762376237625, "grad_norm": 0.30467352271080017, "learning_rate": 1.2825246022054693e-05, "loss": 0.4524, "step": 93420 }, { "epoch": 23.126237623762375, "grad_norm": 0.26597461104393005, "learning_rate": 1.2816032902685876e-05, "loss": 0.4523, "step": 93430 }, { "epoch": 23.128712871287128, "grad_norm": 0.27609458565711975, "learning_rate": 1.2806822607193963e-05, "loss": 0.4534, "step": 93440 }, { "epoch": 23.13118811881188, "grad_norm": 0.293660044670105, "learning_rate": 1.2797615136278406e-05, "loss": 0.4584, "step": 93450 }, { "epoch": 23.133663366336634, "grad_norm": 0.2860117256641388, "learning_rate": 1.278841049063847e-05, "loss": 0.4522, "step": 93460 }, { "epoch": 23.136138613861387, "grad_norm": 0.2686750888824463, "learning_rate": 1.2779208670973192e-05, "loss": 0.4529, "step": 93470 }, { "epoch": 23.138613861386137, "grad_norm": 0.2676382064819336, "learning_rate": 1.277000967798138e-05, "loss": 0.4574, "step": 93480 }, { "epoch": 23.14108910891089, "grad_norm": 0.29325899481773376, "learning_rate": 1.2760813512361614e-05, "loss": 0.4515, "step": 93490 }, { "epoch": 23.143564356435643, "grad_norm": 0.2673764228820801, "learning_rate": 1.2751620174812323e-05, "loss": 0.4574, "step": 93500 }, { "epoch": 23.146039603960396, "grad_norm": 0.28857991099357605, "learning_rate": 1.2742429666031669e-05, "loss": 0.4562, "step": 93510 }, { "epoch": 23.14851485148515, "grad_norm": 0.2819797396659851, "learning_rate": 1.2733241986717587e-05, "loss": 0.4516, "step": 93520 }, { "epoch": 23.150990099009903, "grad_norm": 0.2931899130344391, "learning_rate": 1.2724057137567835e-05, "loss": 0.4595, "step": 93530 }, { "epoch": 23.153465346534652, "grad_norm": 0.3037790358066559, "learning_rate": 1.2714875119279961e-05, "loss": 0.4621, "step": 93540 }, { "epoch": 23.155940594059405, "grad_norm": 0.2573161721229553, "learning_rate": 1.2705695932551243e-05, "loss": 0.4581, "step": 93550 }, { "epoch": 23.15841584158416, "grad_norm": 0.2918238937854767, "learning_rate": 1.2696519578078819e-05, "loss": 0.4564, "step": 93560 }, { "epoch": 23.16089108910891, "grad_norm": 0.2642275393009186, "learning_rate": 1.268734605655953e-05, "loss": 0.4609, "step": 93570 }, { "epoch": 23.163366336633665, "grad_norm": 0.2749636471271515, "learning_rate": 1.2678175368690082e-05, "loss": 0.453, "step": 93580 }, { "epoch": 23.165841584158414, "grad_norm": 0.2534898817539215, "learning_rate": 1.2669007515166903e-05, "loss": 0.4573, "step": 93590 }, { "epoch": 23.168316831683168, "grad_norm": 0.26054444909095764, "learning_rate": 1.2659842496686236e-05, "loss": 0.451, "step": 93600 }, { "epoch": 23.17079207920792, "grad_norm": 0.2576928436756134, "learning_rate": 1.2650680313944118e-05, "loss": 0.4581, "step": 93610 }, { "epoch": 23.173267326732674, "grad_norm": 0.2907864451408386, "learning_rate": 1.2641520967636338e-05, "loss": 0.4556, "step": 93620 }, { "epoch": 23.175742574257427, "grad_norm": 0.27472054958343506, "learning_rate": 1.2632364458458507e-05, "loss": 0.4547, "step": 93630 }, { "epoch": 23.178217821782177, "grad_norm": 0.25004109740257263, "learning_rate": 1.2623210787105976e-05, "loss": 0.4545, "step": 93640 }, { "epoch": 23.18069306930693, "grad_norm": 0.2842794954776764, "learning_rate": 1.2614059954273926e-05, "loss": 0.4551, "step": 93650 }, { "epoch": 23.183168316831683, "grad_norm": 0.26787465810775757, "learning_rate": 1.2604911960657306e-05, "loss": 0.4599, "step": 93660 }, { "epoch": 23.185643564356436, "grad_norm": 0.2833523750305176, "learning_rate": 1.2595766806950838e-05, "loss": 0.4575, "step": 93670 }, { "epoch": 23.18811881188119, "grad_norm": 0.2842061519622803, "learning_rate": 1.2586624493849019e-05, "loss": 0.4556, "step": 93680 }, { "epoch": 23.190594059405942, "grad_norm": 0.28047487139701843, "learning_rate": 1.2577485022046165e-05, "loss": 0.4498, "step": 93690 }, { "epoch": 23.193069306930692, "grad_norm": 0.27838051319122314, "learning_rate": 1.2568348392236368e-05, "loss": 0.4587, "step": 93700 }, { "epoch": 23.195544554455445, "grad_norm": 0.27215129137039185, "learning_rate": 1.2559214605113468e-05, "loss": 0.4527, "step": 93710 }, { "epoch": 23.198019801980198, "grad_norm": 0.32718995213508606, "learning_rate": 1.2550083661371132e-05, "loss": 0.4546, "step": 93720 }, { "epoch": 23.20049504950495, "grad_norm": 0.33289891481399536, "learning_rate": 1.254095556170281e-05, "loss": 0.4554, "step": 93730 }, { "epoch": 23.202970297029704, "grad_norm": 0.2795799672603607, "learning_rate": 1.2531830306801706e-05, "loss": 0.4531, "step": 93740 }, { "epoch": 23.205445544554454, "grad_norm": 0.2720200717449188, "learning_rate": 1.2522707897360808e-05, "loss": 0.4575, "step": 93750 }, { "epoch": 23.207920792079207, "grad_norm": 0.26854002475738525, "learning_rate": 1.2513588334072912e-05, "loss": 0.4551, "step": 93760 }, { "epoch": 23.21039603960396, "grad_norm": 0.26702654361724854, "learning_rate": 1.2504471617630609e-05, "loss": 0.46, "step": 93770 }, { "epoch": 23.212871287128714, "grad_norm": 0.2701220214366913, "learning_rate": 1.2495357748726239e-05, "loss": 0.4563, "step": 93780 }, { "epoch": 23.215346534653467, "grad_norm": 0.27297958731651306, "learning_rate": 1.248624672805191e-05, "loss": 0.4545, "step": 93790 }, { "epoch": 23.217821782178216, "grad_norm": 0.27528807520866394, "learning_rate": 1.24771385562996e-05, "loss": 0.4533, "step": 93800 }, { "epoch": 23.22029702970297, "grad_norm": 0.27046865224838257, "learning_rate": 1.2468033234160991e-05, "loss": 0.4534, "step": 93810 }, { "epoch": 23.222772277227723, "grad_norm": 0.26488572359085083, "learning_rate": 1.2458930762327553e-05, "loss": 0.4578, "step": 93820 }, { "epoch": 23.225247524752476, "grad_norm": 0.2661105692386627, "learning_rate": 1.244983114149057e-05, "loss": 0.4587, "step": 93830 }, { "epoch": 23.22772277227723, "grad_norm": 0.26372790336608887, "learning_rate": 1.2440734372341122e-05, "loss": 0.4548, "step": 93840 }, { "epoch": 23.230198019801982, "grad_norm": 0.2695234417915344, "learning_rate": 1.2431640455570026e-05, "loss": 0.4581, "step": 93850 }, { "epoch": 23.23267326732673, "grad_norm": 0.3242933452129364, "learning_rate": 1.2422549391867893e-05, "loss": 0.4576, "step": 93860 }, { "epoch": 23.235148514851485, "grad_norm": 0.2590925991535187, "learning_rate": 1.2413461181925146e-05, "loss": 0.4545, "step": 93870 }, { "epoch": 23.237623762376238, "grad_norm": 0.2919066846370697, "learning_rate": 1.2404375826431985e-05, "loss": 0.4572, "step": 93880 }, { "epoch": 23.24009900990099, "grad_norm": 0.280329167842865, "learning_rate": 1.2395293326078356e-05, "loss": 0.4532, "step": 93890 }, { "epoch": 23.242574257425744, "grad_norm": 0.29052019119262695, "learning_rate": 1.2386213681554032e-05, "loss": 0.4561, "step": 93900 }, { "epoch": 23.245049504950494, "grad_norm": 0.2720940411090851, "learning_rate": 1.2377136893548563e-05, "loss": 0.4604, "step": 93910 }, { "epoch": 23.247524752475247, "grad_norm": 0.27756643295288086, "learning_rate": 1.2368062962751243e-05, "loss": 0.455, "step": 93920 }, { "epoch": 23.25, "grad_norm": 0.27704301476478577, "learning_rate": 1.2358991889851201e-05, "loss": 0.4527, "step": 93930 }, { "epoch": 23.252475247524753, "grad_norm": 0.25495174527168274, "learning_rate": 1.2349923675537307e-05, "loss": 0.4469, "step": 93940 }, { "epoch": 23.254950495049506, "grad_norm": 0.2786136865615845, "learning_rate": 1.2340858320498256e-05, "loss": 0.4578, "step": 93950 }, { "epoch": 23.257425742574256, "grad_norm": 0.26209452748298645, "learning_rate": 1.2331795825422471e-05, "loss": 0.4569, "step": 93960 }, { "epoch": 23.25990099009901, "grad_norm": 0.2703257203102112, "learning_rate": 1.2322736190998218e-05, "loss": 0.4553, "step": 93970 }, { "epoch": 23.262376237623762, "grad_norm": 0.30127501487731934, "learning_rate": 1.2313679417913487e-05, "loss": 0.456, "step": 93980 }, { "epoch": 23.264851485148515, "grad_norm": 0.26305171847343445, "learning_rate": 1.2304625506856099e-05, "loss": 0.4559, "step": 93990 }, { "epoch": 23.26732673267327, "grad_norm": 0.26986485719680786, "learning_rate": 1.2295574458513653e-05, "loss": 0.4497, "step": 94000 }, { "epoch": 23.269801980198018, "grad_norm": 0.26854008436203003, "learning_rate": 1.228652627357348e-05, "loss": 0.4555, "step": 94010 }, { "epoch": 23.27227722772277, "grad_norm": 0.2609918713569641, "learning_rate": 1.2277480952722753e-05, "loss": 0.4592, "step": 94020 }, { "epoch": 23.274752475247524, "grad_norm": 0.26857736706733704, "learning_rate": 1.2268438496648416e-05, "loss": 0.4545, "step": 94030 }, { "epoch": 23.277227722772277, "grad_norm": 0.27145257592201233, "learning_rate": 1.2259398906037162e-05, "loss": 0.4543, "step": 94040 }, { "epoch": 23.27970297029703, "grad_norm": 0.27932193875312805, "learning_rate": 1.2250362181575486e-05, "loss": 0.4555, "step": 94050 }, { "epoch": 23.282178217821784, "grad_norm": 0.2920565605163574, "learning_rate": 1.2241328323949674e-05, "loss": 0.4552, "step": 94060 }, { "epoch": 23.284653465346533, "grad_norm": 0.2759397327899933, "learning_rate": 1.2232297333845805e-05, "loss": 0.4548, "step": 94070 }, { "epoch": 23.287128712871286, "grad_norm": 0.2696145176887512, "learning_rate": 1.2223269211949706e-05, "loss": 0.4533, "step": 94080 }, { "epoch": 23.28960396039604, "grad_norm": 0.2807368338108063, "learning_rate": 1.221424395894698e-05, "loss": 0.4535, "step": 94090 }, { "epoch": 23.292079207920793, "grad_norm": 0.25628596544265747, "learning_rate": 1.220522157552309e-05, "loss": 0.4559, "step": 94100 }, { "epoch": 23.294554455445546, "grad_norm": 0.2862738370895386, "learning_rate": 1.21962020623632e-05, "loss": 0.4604, "step": 94110 }, { "epoch": 23.297029702970296, "grad_norm": 0.27381089329719543, "learning_rate": 1.2187185420152258e-05, "loss": 0.454, "step": 94120 }, { "epoch": 23.29950495049505, "grad_norm": 0.26914626359939575, "learning_rate": 1.2178171649575048e-05, "loss": 0.4565, "step": 94130 }, { "epoch": 23.301980198019802, "grad_norm": 0.2767283022403717, "learning_rate": 1.2169160751316111e-05, "loss": 0.4551, "step": 94140 }, { "epoch": 23.304455445544555, "grad_norm": 0.3170936405658722, "learning_rate": 1.2160152726059749e-05, "loss": 0.4487, "step": 94150 }, { "epoch": 23.306930693069308, "grad_norm": 0.2674403488636017, "learning_rate": 1.2151147574490057e-05, "loss": 0.4504, "step": 94160 }, { "epoch": 23.309405940594058, "grad_norm": 0.24640868604183197, "learning_rate": 1.2142145297290924e-05, "loss": 0.4586, "step": 94170 }, { "epoch": 23.31188118811881, "grad_norm": 0.25715747475624084, "learning_rate": 1.213314589514603e-05, "loss": 0.4534, "step": 94180 }, { "epoch": 23.314356435643564, "grad_norm": 0.25387588143348694, "learning_rate": 1.212414936873879e-05, "loss": 0.4533, "step": 94190 }, { "epoch": 23.316831683168317, "grad_norm": 0.26045936346054077, "learning_rate": 1.2115155718752447e-05, "loss": 0.4622, "step": 94200 }, { "epoch": 23.31930693069307, "grad_norm": 0.2736327350139618, "learning_rate": 1.2106164945870024e-05, "loss": 0.4555, "step": 94210 }, { "epoch": 23.321782178217823, "grad_norm": 0.2785238027572632, "learning_rate": 1.2097177050774283e-05, "loss": 0.4565, "step": 94220 }, { "epoch": 23.324257425742573, "grad_norm": 0.27376899123191833, "learning_rate": 1.2088192034147822e-05, "loss": 0.461, "step": 94230 }, { "epoch": 23.326732673267326, "grad_norm": 0.30455243587493896, "learning_rate": 1.2079209896672967e-05, "loss": 0.4574, "step": 94240 }, { "epoch": 23.32920792079208, "grad_norm": 0.2837681770324707, "learning_rate": 1.2070230639031871e-05, "loss": 0.4538, "step": 94250 }, { "epoch": 23.331683168316832, "grad_norm": 0.27031537890434265, "learning_rate": 1.2061254261906435e-05, "loss": 0.4552, "step": 94260 }, { "epoch": 23.334158415841586, "grad_norm": 0.26908713579177856, "learning_rate": 1.205228076597838e-05, "loss": 0.4588, "step": 94270 }, { "epoch": 23.336633663366335, "grad_norm": 0.2692922055721283, "learning_rate": 1.204331015192915e-05, "loss": 0.4534, "step": 94280 }, { "epoch": 23.33910891089109, "grad_norm": 0.29532018303871155, "learning_rate": 1.2034342420440025e-05, "loss": 0.4585, "step": 94290 }, { "epoch": 23.34158415841584, "grad_norm": 0.28611764311790466, "learning_rate": 1.2025377572192054e-05, "loss": 0.4578, "step": 94300 }, { "epoch": 23.344059405940595, "grad_norm": 0.26295986771583557, "learning_rate": 1.2016415607866032e-05, "loss": 0.4542, "step": 94310 }, { "epoch": 23.346534653465348, "grad_norm": 0.25704729557037354, "learning_rate": 1.2007456528142586e-05, "loss": 0.4557, "step": 94320 }, { "epoch": 23.349009900990097, "grad_norm": 0.2661350667476654, "learning_rate": 1.199850033370208e-05, "loss": 0.4535, "step": 94330 }, { "epoch": 23.35148514851485, "grad_norm": 0.2873525619506836, "learning_rate": 1.198954702522469e-05, "loss": 0.4517, "step": 94340 }, { "epoch": 23.353960396039604, "grad_norm": 0.2686917781829834, "learning_rate": 1.1980596603390348e-05, "loss": 0.4548, "step": 94350 }, { "epoch": 23.356435643564357, "grad_norm": 0.26026853919029236, "learning_rate": 1.1971649068878788e-05, "loss": 0.4507, "step": 94360 }, { "epoch": 23.35891089108911, "grad_norm": 0.27740997076034546, "learning_rate": 1.196270442236953e-05, "loss": 0.456, "step": 94370 }, { "epoch": 23.361386138613863, "grad_norm": 0.27790331840515137, "learning_rate": 1.1953762664541845e-05, "loss": 0.4592, "step": 94380 }, { "epoch": 23.363861386138613, "grad_norm": 0.2646804749965668, "learning_rate": 1.1944823796074779e-05, "loss": 0.4573, "step": 94390 }, { "epoch": 23.366336633663366, "grad_norm": 0.2657192647457123, "learning_rate": 1.193588781764723e-05, "loss": 0.4555, "step": 94400 }, { "epoch": 23.36881188118812, "grad_norm": 0.34385523200035095, "learning_rate": 1.1926954729937795e-05, "loss": 0.4542, "step": 94410 }, { "epoch": 23.371287128712872, "grad_norm": 0.26852425932884216, "learning_rate": 1.1918024533624877e-05, "loss": 0.4475, "step": 94420 }, { "epoch": 23.373762376237625, "grad_norm": 0.2467423379421234, "learning_rate": 1.1909097229386674e-05, "loss": 0.4577, "step": 94430 }, { "epoch": 23.376237623762375, "grad_norm": 0.25374138355255127, "learning_rate": 1.1900172817901179e-05, "loss": 0.4554, "step": 94440 }, { "epoch": 23.378712871287128, "grad_norm": 0.275444358587265, "learning_rate": 1.1891251299846118e-05, "loss": 0.4584, "step": 94450 }, { "epoch": 23.38118811881188, "grad_norm": 0.2793377637863159, "learning_rate": 1.1882332675899006e-05, "loss": 0.4603, "step": 94460 }, { "epoch": 23.383663366336634, "grad_norm": 0.27347123622894287, "learning_rate": 1.1873416946737176e-05, "loss": 0.4466, "step": 94470 }, { "epoch": 23.386138613861387, "grad_norm": 0.3121391534805298, "learning_rate": 1.1864504113037733e-05, "loss": 0.4535, "step": 94480 }, { "epoch": 23.388613861386137, "grad_norm": 0.25482064485549927, "learning_rate": 1.1855594175477514e-05, "loss": 0.4612, "step": 94490 }, { "epoch": 23.39108910891089, "grad_norm": 0.25301551818847656, "learning_rate": 1.1846687134733181e-05, "loss": 0.4553, "step": 94500 }, { "epoch": 23.393564356435643, "grad_norm": 0.2819260060787201, "learning_rate": 1.1837782991481189e-05, "loss": 0.4572, "step": 94510 }, { "epoch": 23.396039603960396, "grad_norm": 0.31215986609458923, "learning_rate": 1.1828881746397734e-05, "loss": 0.4549, "step": 94520 }, { "epoch": 23.39851485148515, "grad_norm": 0.2793262302875519, "learning_rate": 1.1819983400158785e-05, "loss": 0.4581, "step": 94530 }, { "epoch": 23.400990099009903, "grad_norm": 0.2809358835220337, "learning_rate": 1.181108795344013e-05, "loss": 0.4618, "step": 94540 }, { "epoch": 23.403465346534652, "grad_norm": 0.2616093158721924, "learning_rate": 1.1802195406917333e-05, "loss": 0.4558, "step": 94550 }, { "epoch": 23.405940594059405, "grad_norm": 0.27718594670295715, "learning_rate": 1.17933057612657e-05, "loss": 0.4612, "step": 94560 }, { "epoch": 23.40841584158416, "grad_norm": 0.25724855065345764, "learning_rate": 1.1784419017160365e-05, "loss": 0.4555, "step": 94570 }, { "epoch": 23.41089108910891, "grad_norm": 0.27120885252952576, "learning_rate": 1.1775535175276187e-05, "loss": 0.4533, "step": 94580 }, { "epoch": 23.413366336633665, "grad_norm": 0.28888097405433655, "learning_rate": 1.1766654236287855e-05, "loss": 0.4551, "step": 94590 }, { "epoch": 23.415841584158414, "grad_norm": 0.26993924379348755, "learning_rate": 1.1757776200869835e-05, "loss": 0.4533, "step": 94600 }, { "epoch": 23.418316831683168, "grad_norm": 0.30333948135375977, "learning_rate": 1.1748901069696317e-05, "loss": 0.4584, "step": 94610 }, { "epoch": 23.42079207920792, "grad_norm": 0.315559983253479, "learning_rate": 1.1740028843441342e-05, "loss": 0.4617, "step": 94620 }, { "epoch": 23.423267326732674, "grad_norm": 0.2608976364135742, "learning_rate": 1.1731159522778667e-05, "loss": 0.4544, "step": 94630 }, { "epoch": 23.425742574257427, "grad_norm": 0.2618882954120636, "learning_rate": 1.1722293108381893e-05, "loss": 0.4557, "step": 94640 }, { "epoch": 23.428217821782177, "grad_norm": 0.24774517118930817, "learning_rate": 1.1713429600924326e-05, "loss": 0.4577, "step": 94650 }, { "epoch": 23.43069306930693, "grad_norm": 0.2598988115787506, "learning_rate": 1.1704569001079118e-05, "loss": 0.4552, "step": 94660 }, { "epoch": 23.433168316831683, "grad_norm": 0.25112950801849365, "learning_rate": 1.1695711309519175e-05, "loss": 0.4574, "step": 94670 }, { "epoch": 23.435643564356436, "grad_norm": 0.26379793882369995, "learning_rate": 1.1686856526917173e-05, "loss": 0.4575, "step": 94680 }, { "epoch": 23.43811881188119, "grad_norm": 0.26231271028518677, "learning_rate": 1.1678004653945563e-05, "loss": 0.4523, "step": 94690 }, { "epoch": 23.440594059405942, "grad_norm": 0.2626574635505676, "learning_rate": 1.166915569127659e-05, "loss": 0.4532, "step": 94700 }, { "epoch": 23.443069306930692, "grad_norm": 0.2706606388092041, "learning_rate": 1.16603096395823e-05, "loss": 0.4548, "step": 94710 }, { "epoch": 23.445544554455445, "grad_norm": 0.2880083918571472, "learning_rate": 1.1651466499534457e-05, "loss": 0.454, "step": 94720 }, { "epoch": 23.448019801980198, "grad_norm": 0.27269089221954346, "learning_rate": 1.1642626271804652e-05, "loss": 0.4579, "step": 94730 }, { "epoch": 23.45049504950495, "grad_norm": 0.26979756355285645, "learning_rate": 1.1633788957064263e-05, "loss": 0.4517, "step": 94740 }, { "epoch": 23.452970297029704, "grad_norm": 0.26491525769233704, "learning_rate": 1.1624954555984407e-05, "loss": 0.4542, "step": 94750 }, { "epoch": 23.455445544554454, "grad_norm": 0.25128984451293945, "learning_rate": 1.1616123069235985e-05, "loss": 0.453, "step": 94760 }, { "epoch": 23.457920792079207, "grad_norm": 0.25472262501716614, "learning_rate": 1.1607294497489708e-05, "loss": 0.4581, "step": 94770 }, { "epoch": 23.46039603960396, "grad_norm": 0.3206019401550293, "learning_rate": 1.1598468841416055e-05, "loss": 0.4532, "step": 94780 }, { "epoch": 23.462871287128714, "grad_norm": 0.2788274586200714, "learning_rate": 1.1589646101685264e-05, "loss": 0.4554, "step": 94790 }, { "epoch": 23.465346534653467, "grad_norm": 0.2664632499217987, "learning_rate": 1.1580826278967339e-05, "loss": 0.4524, "step": 94800 }, { "epoch": 23.467821782178216, "grad_norm": 0.2544911503791809, "learning_rate": 1.1572009373932136e-05, "loss": 0.452, "step": 94810 }, { "epoch": 23.47029702970297, "grad_norm": 0.3075639605522156, "learning_rate": 1.1563195387249226e-05, "loss": 0.4582, "step": 94820 }, { "epoch": 23.472772277227723, "grad_norm": 0.2717576026916504, "learning_rate": 1.1554384319587947e-05, "loss": 0.4517, "step": 94830 }, { "epoch": 23.475247524752476, "grad_norm": 0.2605363428592682, "learning_rate": 1.1545576171617462e-05, "loss": 0.4542, "step": 94840 }, { "epoch": 23.47772277227723, "grad_norm": 0.2562829256057739, "learning_rate": 1.1536770944006702e-05, "loss": 0.454, "step": 94850 }, { "epoch": 23.480198019801982, "grad_norm": 0.2764088213443756, "learning_rate": 1.152796863742434e-05, "loss": 0.4634, "step": 94860 }, { "epoch": 23.48267326732673, "grad_norm": 0.2926144599914551, "learning_rate": 1.1519169252538885e-05, "loss": 0.4553, "step": 94870 }, { "epoch": 23.485148514851485, "grad_norm": 0.2584834396839142, "learning_rate": 1.1510372790018554e-05, "loss": 0.4553, "step": 94880 }, { "epoch": 23.487623762376238, "grad_norm": 0.2476368099451065, "learning_rate": 1.1501579250531418e-05, "loss": 0.4576, "step": 94890 }, { "epoch": 23.49009900990099, "grad_norm": 0.26033252477645874, "learning_rate": 1.1492788634745256e-05, "loss": 0.4533, "step": 94900 }, { "epoch": 23.492574257425744, "grad_norm": 0.28448981046676636, "learning_rate": 1.1484000943327672e-05, "loss": 0.4611, "step": 94910 }, { "epoch": 23.495049504950494, "grad_norm": 0.25956615805625916, "learning_rate": 1.147521617694604e-05, "loss": 0.4516, "step": 94920 }, { "epoch": 23.497524752475247, "grad_norm": 0.3064250349998474, "learning_rate": 1.1466434336267495e-05, "loss": 0.4528, "step": 94930 }, { "epoch": 23.5, "grad_norm": 0.2654056251049042, "learning_rate": 1.1457655421958969e-05, "loss": 0.4592, "step": 94940 }, { "epoch": 23.502475247524753, "grad_norm": 0.2724801003932953, "learning_rate": 1.1448879434687143e-05, "loss": 0.4564, "step": 94950 }, { "epoch": 23.504950495049506, "grad_norm": 0.2999362051486969, "learning_rate": 1.1440106375118514e-05, "loss": 0.455, "step": 94960 }, { "epoch": 23.507425742574256, "grad_norm": 0.27317264676094055, "learning_rate": 1.1431336243919345e-05, "loss": 0.4529, "step": 94970 }, { "epoch": 23.50990099009901, "grad_norm": 0.2759837210178375, "learning_rate": 1.1422569041755655e-05, "loss": 0.4538, "step": 94980 }, { "epoch": 23.512376237623762, "grad_norm": 0.26786044239997864, "learning_rate": 1.141380476929324e-05, "loss": 0.4603, "step": 94990 }, { "epoch": 23.514851485148515, "grad_norm": 0.26585152745246887, "learning_rate": 1.1405043427197709e-05, "loss": 0.4547, "step": 95000 }, { "epoch": 23.51732673267327, "grad_norm": 0.25748246908187866, "learning_rate": 1.139628501613444e-05, "loss": 0.4571, "step": 95010 }, { "epoch": 23.519801980198018, "grad_norm": 0.27749067544937134, "learning_rate": 1.1387529536768548e-05, "loss": 0.4531, "step": 95020 }, { "epoch": 23.52227722772277, "grad_norm": 0.26016759872436523, "learning_rate": 1.1378776989764966e-05, "loss": 0.4527, "step": 95030 }, { "epoch": 23.524752475247524, "grad_norm": 0.26244524121284485, "learning_rate": 1.1370027375788412e-05, "loss": 0.4561, "step": 95040 }, { "epoch": 23.527227722772277, "grad_norm": 0.2820667624473572, "learning_rate": 1.136128069550334e-05, "loss": 0.4579, "step": 95050 }, { "epoch": 23.52970297029703, "grad_norm": 0.3045015037059784, "learning_rate": 1.1352536949573989e-05, "loss": 0.4537, "step": 95060 }, { "epoch": 23.532178217821784, "grad_norm": 0.2694218158721924, "learning_rate": 1.1343796138664408e-05, "loss": 0.4536, "step": 95070 }, { "epoch": 23.534653465346533, "grad_norm": 0.29231858253479004, "learning_rate": 1.1335058263438413e-05, "loss": 0.451, "step": 95080 }, { "epoch": 23.537128712871286, "grad_norm": 0.2770455479621887, "learning_rate": 1.1326323324559584e-05, "loss": 0.4569, "step": 95090 }, { "epoch": 23.53960396039604, "grad_norm": 0.2537807822227478, "learning_rate": 1.131759132269124e-05, "loss": 0.4522, "step": 95100 }, { "epoch": 23.542079207920793, "grad_norm": 0.25696346163749695, "learning_rate": 1.1308862258496584e-05, "loss": 0.4568, "step": 95110 }, { "epoch": 23.544554455445546, "grad_norm": 0.2684898376464844, "learning_rate": 1.1300136132638501e-05, "loss": 0.4522, "step": 95120 }, { "epoch": 23.547029702970296, "grad_norm": 0.2733089029788971, "learning_rate": 1.1291412945779667e-05, "loss": 0.4591, "step": 95130 }, { "epoch": 23.54950495049505, "grad_norm": 0.25263985991477966, "learning_rate": 1.1282692698582564e-05, "loss": 0.4537, "step": 95140 }, { "epoch": 23.551980198019802, "grad_norm": 0.2663271427154541, "learning_rate": 1.1273975391709452e-05, "loss": 0.4564, "step": 95150 }, { "epoch": 23.554455445544555, "grad_norm": 0.26273056864738464, "learning_rate": 1.1265261025822338e-05, "loss": 0.4563, "step": 95160 }, { "epoch": 23.556930693069308, "grad_norm": 0.2499534785747528, "learning_rate": 1.1256549601583011e-05, "loss": 0.4564, "step": 95170 }, { "epoch": 23.55940594059406, "grad_norm": 0.26061099767684937, "learning_rate": 1.1247841119653058e-05, "loss": 0.4601, "step": 95180 }, { "epoch": 23.56188118811881, "grad_norm": 0.266534686088562, "learning_rate": 1.1239135580693843e-05, "loss": 0.4536, "step": 95190 }, { "epoch": 23.564356435643564, "grad_norm": 0.24643263220787048, "learning_rate": 1.1230432985366468e-05, "loss": 0.4578, "step": 95200 }, { "epoch": 23.566831683168317, "grad_norm": 0.2936232388019562, "learning_rate": 1.1221733334331846e-05, "loss": 0.4566, "step": 95210 }, { "epoch": 23.56930693069307, "grad_norm": 0.27941110730171204, "learning_rate": 1.1213036628250684e-05, "loss": 0.4601, "step": 95220 }, { "epoch": 23.571782178217823, "grad_norm": 0.271324098110199, "learning_rate": 1.1204342867783401e-05, "loss": 0.4577, "step": 95230 }, { "epoch": 23.574257425742573, "grad_norm": 0.26069098711013794, "learning_rate": 1.119565205359026e-05, "loss": 0.4571, "step": 95240 }, { "epoch": 23.576732673267326, "grad_norm": 0.2544002830982208, "learning_rate": 1.1186964186331251e-05, "loss": 0.4486, "step": 95250 }, { "epoch": 23.57920792079208, "grad_norm": 0.29075589776039124, "learning_rate": 1.1178279266666175e-05, "loss": 0.4548, "step": 95260 }, { "epoch": 23.581683168316832, "grad_norm": 0.24326744675636292, "learning_rate": 1.116959729525458e-05, "loss": 0.4549, "step": 95270 }, { "epoch": 23.584158415841586, "grad_norm": 0.24896496534347534, "learning_rate": 1.1160918272755821e-05, "loss": 0.4571, "step": 95280 }, { "epoch": 23.586633663366335, "grad_norm": 0.2617568373680115, "learning_rate": 1.1152242199828994e-05, "loss": 0.4552, "step": 95290 }, { "epoch": 23.58910891089109, "grad_norm": 0.2579233646392822, "learning_rate": 1.1143569077132998e-05, "loss": 0.4598, "step": 95300 }, { "epoch": 23.59158415841584, "grad_norm": 0.24767418205738068, "learning_rate": 1.1134898905326512e-05, "loss": 0.4527, "step": 95310 }, { "epoch": 23.594059405940595, "grad_norm": 0.28965848684310913, "learning_rate": 1.1126231685067956e-05, "loss": 0.4515, "step": 95320 }, { "epoch": 23.596534653465348, "grad_norm": 0.25164559483528137, "learning_rate": 1.1117567417015557e-05, "loss": 0.4518, "step": 95330 }, { "epoch": 23.599009900990097, "grad_norm": 0.26921021938323975, "learning_rate": 1.1108906101827327e-05, "loss": 0.4565, "step": 95340 }, { "epoch": 23.60148514851485, "grad_norm": 0.2602376639842987, "learning_rate": 1.1100247740161012e-05, "loss": 0.4533, "step": 95350 }, { "epoch": 23.603960396039604, "grad_norm": 0.2467442899942398, "learning_rate": 1.109159233267416e-05, "loss": 0.4517, "step": 95360 }, { "epoch": 23.606435643564357, "grad_norm": 0.25974583625793457, "learning_rate": 1.1082939880024096e-05, "loss": 0.4546, "step": 95370 }, { "epoch": 23.60891089108911, "grad_norm": 0.2717379927635193, "learning_rate": 1.107429038286793e-05, "loss": 0.4554, "step": 95380 }, { "epoch": 23.611386138613863, "grad_norm": 0.26117199659347534, "learning_rate": 1.1065643841862521e-05, "loss": 0.4593, "step": 95390 }, { "epoch": 23.613861386138613, "grad_norm": 0.267156183719635, "learning_rate": 1.1057000257664491e-05, "loss": 0.4595, "step": 95400 }, { "epoch": 23.616336633663366, "grad_norm": 0.26336976885795593, "learning_rate": 1.1048359630930316e-05, "loss": 0.4571, "step": 95410 }, { "epoch": 23.61881188118812, "grad_norm": 0.26660794019699097, "learning_rate": 1.1039721962316174e-05, "loss": 0.4547, "step": 95420 }, { "epoch": 23.621287128712872, "grad_norm": 0.36222949624061584, "learning_rate": 1.1031087252478012e-05, "loss": 0.4568, "step": 95430 }, { "epoch": 23.623762376237625, "grad_norm": 0.2731405198574066, "learning_rate": 1.1022455502071605e-05, "loss": 0.4531, "step": 95440 }, { "epoch": 23.626237623762375, "grad_norm": 0.2522147595882416, "learning_rate": 1.101382671175249e-05, "loss": 0.454, "step": 95450 }, { "epoch": 23.628712871287128, "grad_norm": 0.2516346275806427, "learning_rate": 1.1005200882175947e-05, "loss": 0.4627, "step": 95460 }, { "epoch": 23.63118811881188, "grad_norm": 0.2553238570690155, "learning_rate": 1.0996578013997039e-05, "loss": 0.4563, "step": 95470 }, { "epoch": 23.633663366336634, "grad_norm": 0.2520558834075928, "learning_rate": 1.0987958107870632e-05, "loss": 0.4569, "step": 95480 }, { "epoch": 23.636138613861387, "grad_norm": 0.26169392466545105, "learning_rate": 1.0979341164451363e-05, "loss": 0.4552, "step": 95490 }, { "epoch": 23.638613861386137, "grad_norm": 0.2721419632434845, "learning_rate": 1.0970727184393603e-05, "loss": 0.4519, "step": 95500 }, { "epoch": 23.64108910891089, "grad_norm": 0.2608218193054199, "learning_rate": 1.0962116168351544e-05, "loss": 0.4568, "step": 95510 }, { "epoch": 23.643564356435643, "grad_norm": 0.2553229331970215, "learning_rate": 1.095350811697915e-05, "loss": 0.4574, "step": 95520 }, { "epoch": 23.646039603960396, "grad_norm": 0.2846487760543823, "learning_rate": 1.0944903030930125e-05, "loss": 0.4573, "step": 95530 }, { "epoch": 23.64851485148515, "grad_norm": 0.24524137377738953, "learning_rate": 1.0936300910857966e-05, "loss": 0.4601, "step": 95540 }, { "epoch": 23.650990099009903, "grad_norm": 0.25209447741508484, "learning_rate": 1.0927701757415948e-05, "loss": 0.4513, "step": 95550 }, { "epoch": 23.653465346534652, "grad_norm": 0.2721219062805176, "learning_rate": 1.0919105571257148e-05, "loss": 0.4578, "step": 95560 }, { "epoch": 23.655940594059405, "grad_norm": 0.2401401698589325, "learning_rate": 1.0910512353034352e-05, "loss": 0.4467, "step": 95570 }, { "epoch": 23.65841584158416, "grad_norm": 0.2483706772327423, "learning_rate": 1.0901922103400187e-05, "loss": 0.457, "step": 95580 }, { "epoch": 23.66089108910891, "grad_norm": 0.2740303575992584, "learning_rate": 1.0893334823007001e-05, "loss": 0.4558, "step": 95590 }, { "epoch": 23.663366336633665, "grad_norm": 0.26972806453704834, "learning_rate": 1.0884750512506952e-05, "loss": 0.4567, "step": 95600 }, { "epoch": 23.665841584158414, "grad_norm": 0.2590489983558655, "learning_rate": 1.0876169172551975e-05, "loss": 0.4536, "step": 95610 }, { "epoch": 23.668316831683168, "grad_norm": 0.2940957844257355, "learning_rate": 1.0867590803793748e-05, "loss": 0.4524, "step": 95620 }, { "epoch": 23.67079207920792, "grad_norm": 0.2616775631904602, "learning_rate": 1.0859015406883755e-05, "loss": 0.4553, "step": 95630 }, { "epoch": 23.673267326732674, "grad_norm": 0.2537645697593689, "learning_rate": 1.0850442982473224e-05, "loss": 0.4588, "step": 95640 }, { "epoch": 23.675742574257427, "grad_norm": 0.2725614607334137, "learning_rate": 1.0841873531213197e-05, "loss": 0.4533, "step": 95650 }, { "epoch": 23.678217821782177, "grad_norm": 0.27950364351272583, "learning_rate": 1.0833307053754438e-05, "loss": 0.4556, "step": 95660 }, { "epoch": 23.68069306930693, "grad_norm": 0.24686011672019958, "learning_rate": 1.0824743550747535e-05, "loss": 0.4553, "step": 95670 }, { "epoch": 23.683168316831683, "grad_norm": 0.2673272490501404, "learning_rate": 1.0816183022842835e-05, "loss": 0.4579, "step": 95680 }, { "epoch": 23.685643564356436, "grad_norm": 0.25770822167396545, "learning_rate": 1.0807625470690442e-05, "loss": 0.4544, "step": 95690 }, { "epoch": 23.68811881188119, "grad_norm": 0.27177557349205017, "learning_rate": 1.0799070894940222e-05, "loss": 0.4556, "step": 95700 }, { "epoch": 23.69059405940594, "grad_norm": 0.27444273233413696, "learning_rate": 1.0790519296241897e-05, "loss": 0.4594, "step": 95710 }, { "epoch": 23.693069306930692, "grad_norm": 0.2783656418323517, "learning_rate": 1.0781970675244863e-05, "loss": 0.4528, "step": 95720 }, { "epoch": 23.695544554455445, "grad_norm": 0.2782231569290161, "learning_rate": 1.0773425032598333e-05, "loss": 0.4574, "step": 95730 }, { "epoch": 23.698019801980198, "grad_norm": 0.2721543610095978, "learning_rate": 1.0764882368951296e-05, "loss": 0.4502, "step": 95740 }, { "epoch": 23.70049504950495, "grad_norm": 0.2741278111934662, "learning_rate": 1.0756342684952526e-05, "loss": 0.4533, "step": 95750 }, { "epoch": 23.702970297029704, "grad_norm": 0.2553066611289978, "learning_rate": 1.0747805981250542e-05, "loss": 0.45, "step": 95760 }, { "epoch": 23.705445544554454, "grad_norm": 0.2516101896762848, "learning_rate": 1.0739272258493643e-05, "loss": 0.452, "step": 95770 }, { "epoch": 23.707920792079207, "grad_norm": 0.28731927275657654, "learning_rate": 1.0730741517329912e-05, "loss": 0.4553, "step": 95780 }, { "epoch": 23.71039603960396, "grad_norm": 0.2609249949455261, "learning_rate": 1.0722213758407229e-05, "loss": 0.4619, "step": 95790 }, { "epoch": 23.712871287128714, "grad_norm": 0.25780656933784485, "learning_rate": 1.0713688982373182e-05, "loss": 0.4531, "step": 95800 }, { "epoch": 23.715346534653467, "grad_norm": 0.2795276641845703, "learning_rate": 1.0705167189875187e-05, "loss": 0.4547, "step": 95810 }, { "epoch": 23.717821782178216, "grad_norm": 0.2821468114852905, "learning_rate": 1.0696648381560436e-05, "loss": 0.4543, "step": 95820 }, { "epoch": 23.72029702970297, "grad_norm": 0.2575977146625519, "learning_rate": 1.0688132558075864e-05, "loss": 0.4558, "step": 95830 }, { "epoch": 23.722772277227723, "grad_norm": 0.29671257734298706, "learning_rate": 1.0679619720068174e-05, "loss": 0.4554, "step": 95840 }, { "epoch": 23.725247524752476, "grad_norm": 0.25232523679733276, "learning_rate": 1.0671109868183876e-05, "loss": 0.4521, "step": 95850 }, { "epoch": 23.72772277227723, "grad_norm": 0.294870525598526, "learning_rate": 1.0662603003069244e-05, "loss": 0.4557, "step": 95860 }, { "epoch": 23.730198019801982, "grad_norm": 0.2905539870262146, "learning_rate": 1.0654099125370298e-05, "loss": 0.4521, "step": 95870 }, { "epoch": 23.73267326732673, "grad_norm": 0.2632767856121063, "learning_rate": 1.0645598235732879e-05, "loss": 0.4584, "step": 95880 }, { "epoch": 23.735148514851485, "grad_norm": 0.27231720089912415, "learning_rate": 1.063710033480254e-05, "loss": 0.4562, "step": 95890 }, { "epoch": 23.737623762376238, "grad_norm": 0.2711007297039032, "learning_rate": 1.0628605423224675e-05, "loss": 0.4535, "step": 95900 }, { "epoch": 23.74009900990099, "grad_norm": 0.3018523156642914, "learning_rate": 1.0620113501644386e-05, "loss": 0.4577, "step": 95910 }, { "epoch": 23.742574257425744, "grad_norm": 0.29148203134536743, "learning_rate": 1.061162457070659e-05, "loss": 0.4499, "step": 95920 }, { "epoch": 23.745049504950494, "grad_norm": 0.2948766350746155, "learning_rate": 1.060313863105598e-05, "loss": 0.4571, "step": 95930 }, { "epoch": 23.747524752475247, "grad_norm": 0.26534304022789, "learning_rate": 1.0594655683336985e-05, "loss": 0.4489, "step": 95940 }, { "epoch": 23.75, "grad_norm": 0.2756655216217041, "learning_rate": 1.0586175728193847e-05, "loss": 0.4555, "step": 95950 }, { "epoch": 23.752475247524753, "grad_norm": 0.32867345213890076, "learning_rate": 1.0577698766270544e-05, "loss": 0.452, "step": 95960 }, { "epoch": 23.754950495049506, "grad_norm": 0.23317164182662964, "learning_rate": 1.0569224798210854e-05, "loss": 0.4561, "step": 95970 }, { "epoch": 23.757425742574256, "grad_norm": 0.24174058437347412, "learning_rate": 1.0560753824658332e-05, "loss": 0.4543, "step": 95980 }, { "epoch": 23.75990099009901, "grad_norm": 0.2801617980003357, "learning_rate": 1.0552285846256282e-05, "loss": 0.4575, "step": 95990 }, { "epoch": 23.762376237623762, "grad_norm": 0.25083720684051514, "learning_rate": 1.054382086364778e-05, "loss": 0.451, "step": 96000 }, { "epoch": 23.764851485148515, "grad_norm": 0.2739637494087219, "learning_rate": 1.0535358877475687e-05, "loss": 0.4583, "step": 96010 }, { "epoch": 23.76732673267327, "grad_norm": 0.2483636438846588, "learning_rate": 1.0526899888382663e-05, "loss": 0.4582, "step": 96020 }, { "epoch": 23.769801980198018, "grad_norm": 0.2672099173069, "learning_rate": 1.0518443897011076e-05, "loss": 0.4555, "step": 96030 }, { "epoch": 23.77227722772277, "grad_norm": 0.3104897141456604, "learning_rate": 1.050999090400312e-05, "loss": 0.459, "step": 96040 }, { "epoch": 23.774752475247524, "grad_norm": 0.29895493388175964, "learning_rate": 1.0501540910000762e-05, "loss": 0.4582, "step": 96050 }, { "epoch": 23.777227722772277, "grad_norm": 0.3131973445415497, "learning_rate": 1.0493093915645696e-05, "loss": 0.4557, "step": 96060 }, { "epoch": 23.77970297029703, "grad_norm": 0.2506769299507141, "learning_rate": 1.0484649921579421e-05, "loss": 0.4567, "step": 96070 }, { "epoch": 23.782178217821784, "grad_norm": 0.25968825817108154, "learning_rate": 1.0476208928443198e-05, "loss": 0.4508, "step": 96080 }, { "epoch": 23.784653465346533, "grad_norm": 0.2805582582950592, "learning_rate": 1.0467770936878086e-05, "loss": 0.4541, "step": 96090 }, { "epoch": 23.787128712871286, "grad_norm": 0.2581479549407959, "learning_rate": 1.0459335947524884e-05, "loss": 0.4562, "step": 96100 }, { "epoch": 23.78960396039604, "grad_norm": 0.2687745988368988, "learning_rate": 1.0450903961024144e-05, "loss": 0.4582, "step": 96110 }, { "epoch": 23.792079207920793, "grad_norm": 0.24678254127502441, "learning_rate": 1.044247497801627e-05, "loss": 0.459, "step": 96120 }, { "epoch": 23.794554455445546, "grad_norm": 0.24063508212566376, "learning_rate": 1.0434048999141372e-05, "loss": 0.4521, "step": 96130 }, { "epoch": 23.797029702970296, "grad_norm": 0.27326032519340515, "learning_rate": 1.042562602503932e-05, "loss": 0.4572, "step": 96140 }, { "epoch": 23.79950495049505, "grad_norm": 0.2528499960899353, "learning_rate": 1.0417206056349804e-05, "loss": 0.4577, "step": 96150 }, { "epoch": 23.801980198019802, "grad_norm": 0.26638346910476685, "learning_rate": 1.0408789093712279e-05, "loss": 0.4567, "step": 96160 }, { "epoch": 23.804455445544555, "grad_norm": 0.2660953104496002, "learning_rate": 1.0400375137765932e-05, "loss": 0.46, "step": 96170 }, { "epoch": 23.806930693069308, "grad_norm": 0.2850979268550873, "learning_rate": 1.0391964189149772e-05, "loss": 0.4568, "step": 96180 }, { "epoch": 23.80940594059406, "grad_norm": 0.2811453938484192, "learning_rate": 1.0383556248502524e-05, "loss": 0.4528, "step": 96190 }, { "epoch": 23.81188118811881, "grad_norm": 0.2622501254081726, "learning_rate": 1.037515131646275e-05, "loss": 0.4538, "step": 96200 }, { "epoch": 23.814356435643564, "grad_norm": 0.26423078775405884, "learning_rate": 1.0366749393668717e-05, "loss": 0.4578, "step": 96210 }, { "epoch": 23.816831683168317, "grad_norm": 0.29691949486732483, "learning_rate": 1.035835048075851e-05, "loss": 0.4541, "step": 96220 }, { "epoch": 23.81930693069307, "grad_norm": 0.26701852679252625, "learning_rate": 1.0349954578369985e-05, "loss": 0.4587, "step": 96230 }, { "epoch": 23.821782178217823, "grad_norm": 0.2758401334285736, "learning_rate": 1.0341561687140733e-05, "loss": 0.4521, "step": 96240 }, { "epoch": 23.824257425742573, "grad_norm": 0.2509419322013855, "learning_rate": 1.0333171807708159e-05, "loss": 0.4554, "step": 96250 }, { "epoch": 23.826732673267326, "grad_norm": 0.26432520151138306, "learning_rate": 1.0324784940709392e-05, "loss": 0.457, "step": 96260 }, { "epoch": 23.82920792079208, "grad_norm": 0.27269095182418823, "learning_rate": 1.0316401086781391e-05, "loss": 0.4512, "step": 96270 }, { "epoch": 23.831683168316832, "grad_norm": 0.27890631556510925, "learning_rate": 1.0308020246560823e-05, "loss": 0.4568, "step": 96280 }, { "epoch": 23.834158415841586, "grad_norm": 0.29170238971710205, "learning_rate": 1.029964242068419e-05, "loss": 0.4502, "step": 96290 }, { "epoch": 23.836633663366335, "grad_norm": 0.24757902324199677, "learning_rate": 1.0291267609787703e-05, "loss": 0.4539, "step": 96300 }, { "epoch": 23.83910891089109, "grad_norm": 0.25094595551490784, "learning_rate": 1.0282895814507381e-05, "loss": 0.4524, "step": 96310 }, { "epoch": 23.84158415841584, "grad_norm": 0.2546205222606659, "learning_rate": 1.027452703547903e-05, "loss": 0.4568, "step": 96320 }, { "epoch": 23.844059405940595, "grad_norm": 0.3039623200893402, "learning_rate": 1.026616127333817e-05, "loss": 0.4521, "step": 96330 }, { "epoch": 23.846534653465348, "grad_norm": 0.30026721954345703, "learning_rate": 1.0257798528720142e-05, "loss": 0.4575, "step": 96340 }, { "epoch": 23.849009900990097, "grad_norm": 0.2804669141769409, "learning_rate": 1.0249438802260047e-05, "loss": 0.4562, "step": 96350 }, { "epoch": 23.85148514851485, "grad_norm": 0.2752875089645386, "learning_rate": 1.0241082094592752e-05, "loss": 0.4498, "step": 96360 }, { "epoch": 23.853960396039604, "grad_norm": 0.2642636299133301, "learning_rate": 1.023272840635287e-05, "loss": 0.4533, "step": 96370 }, { "epoch": 23.856435643564357, "grad_norm": 0.23471684753894806, "learning_rate": 1.0224377738174823e-05, "loss": 0.4513, "step": 96380 }, { "epoch": 23.85891089108911, "grad_norm": 0.26238730549812317, "learning_rate": 1.0216030090692802e-05, "loss": 0.4532, "step": 96390 }, { "epoch": 23.861386138613863, "grad_norm": 0.26003503799438477, "learning_rate": 1.020768546454075e-05, "loss": 0.4568, "step": 96400 }, { "epoch": 23.863861386138613, "grad_norm": 0.30198201537132263, "learning_rate": 1.019934386035235e-05, "loss": 0.4564, "step": 96410 }, { "epoch": 23.866336633663366, "grad_norm": 0.2913205027580261, "learning_rate": 1.0191005278761152e-05, "loss": 0.4538, "step": 96420 }, { "epoch": 23.86881188118812, "grad_norm": 0.25674182176589966, "learning_rate": 1.0182669720400383e-05, "loss": 0.4487, "step": 96430 }, { "epoch": 23.871287128712872, "grad_norm": 0.2694770395755768, "learning_rate": 1.0174337185903066e-05, "loss": 0.4531, "step": 96440 }, { "epoch": 23.873762376237625, "grad_norm": 0.25873681902885437, "learning_rate": 1.0166007675902012e-05, "loss": 0.4555, "step": 96450 }, { "epoch": 23.876237623762375, "grad_norm": 0.2514834702014923, "learning_rate": 1.0157681191029805e-05, "loss": 0.453, "step": 96460 }, { "epoch": 23.878712871287128, "grad_norm": 0.28356078267097473, "learning_rate": 1.014935773191878e-05, "loss": 0.4583, "step": 96470 }, { "epoch": 23.88118811881188, "grad_norm": 0.2430100440979004, "learning_rate": 1.0141037299201029e-05, "loss": 0.4569, "step": 96480 }, { "epoch": 23.883663366336634, "grad_norm": 0.2663801312446594, "learning_rate": 1.0132719893508446e-05, "loss": 0.4589, "step": 96490 }, { "epoch": 23.886138613861387, "grad_norm": 0.3260082006454468, "learning_rate": 1.0124405515472701e-05, "loss": 0.4549, "step": 96500 }, { "epoch": 23.888613861386137, "grad_norm": 0.2473663091659546, "learning_rate": 1.0116094165725188e-05, "loss": 0.4551, "step": 96510 }, { "epoch": 23.89108910891089, "grad_norm": 0.24560067057609558, "learning_rate": 1.0107785844897117e-05, "loss": 0.4572, "step": 96520 }, { "epoch": 23.893564356435643, "grad_norm": 0.2643875479698181, "learning_rate": 1.0099480553619451e-05, "loss": 0.4539, "step": 96530 }, { "epoch": 23.896039603960396, "grad_norm": 0.31505852937698364, "learning_rate": 1.0091178292522907e-05, "loss": 0.4533, "step": 96540 }, { "epoch": 23.89851485148515, "grad_norm": 0.25497767329216003, "learning_rate": 1.008287906223801e-05, "loss": 0.4538, "step": 96550 }, { "epoch": 23.900990099009903, "grad_norm": 0.29256895184516907, "learning_rate": 1.0074582863394999e-05, "loss": 0.4505, "step": 96560 }, { "epoch": 23.903465346534652, "grad_norm": 0.2591795027256012, "learning_rate": 1.0066289696623954e-05, "loss": 0.4589, "step": 96570 }, { "epoch": 23.905940594059405, "grad_norm": 0.27410653233528137, "learning_rate": 1.0057999562554655e-05, "loss": 0.4566, "step": 96580 }, { "epoch": 23.90841584158416, "grad_norm": 0.27256661653518677, "learning_rate": 1.0049712461816702e-05, "loss": 0.4533, "step": 96590 }, { "epoch": 23.91089108910891, "grad_norm": 0.2529489994049072, "learning_rate": 1.0041428395039431e-05, "loss": 0.4536, "step": 96600 }, { "epoch": 23.913366336633665, "grad_norm": 0.2677614688873291, "learning_rate": 1.003314736285197e-05, "loss": 0.4548, "step": 96610 }, { "epoch": 23.915841584158414, "grad_norm": 0.25222525000572205, "learning_rate": 1.0024869365883217e-05, "loss": 0.4509, "step": 96620 }, { "epoch": 23.918316831683168, "grad_norm": 0.2656833827495575, "learning_rate": 1.0016594404761814e-05, "loss": 0.4568, "step": 96630 }, { "epoch": 23.92079207920792, "grad_norm": 0.27164697647094727, "learning_rate": 1.0008322480116195e-05, "loss": 0.4548, "step": 96640 }, { "epoch": 23.923267326732674, "grad_norm": 0.29977890849113464, "learning_rate": 1.0000053592574576e-05, "loss": 0.4521, "step": 96650 }, { "epoch": 23.925742574257427, "grad_norm": 0.2946946620941162, "learning_rate": 9.991787742764907e-06, "loss": 0.451, "step": 96660 }, { "epoch": 23.928217821782177, "grad_norm": 0.25653839111328125, "learning_rate": 9.983524931314919e-06, "loss": 0.4516, "step": 96670 }, { "epoch": 23.93069306930693, "grad_norm": 0.2472803294658661, "learning_rate": 9.975265158852125e-06, "loss": 0.4561, "step": 96680 }, { "epoch": 23.933168316831683, "grad_norm": 0.24894113838672638, "learning_rate": 9.967008426003816e-06, "loss": 0.4555, "step": 96690 }, { "epoch": 23.935643564356436, "grad_norm": 0.23991861939430237, "learning_rate": 9.958754733397024e-06, "loss": 0.4527, "step": 96700 }, { "epoch": 23.93811881188119, "grad_norm": 0.2612514793872833, "learning_rate": 9.95050408165853e-06, "loss": 0.452, "step": 96710 }, { "epoch": 23.94059405940594, "grad_norm": 0.2727908790111542, "learning_rate": 9.942256471414973e-06, "loss": 0.4512, "step": 96720 }, { "epoch": 23.943069306930692, "grad_norm": 0.24556109309196472, "learning_rate": 9.934011903292684e-06, "loss": 0.456, "step": 96730 }, { "epoch": 23.945544554455445, "grad_norm": 0.2345927357673645, "learning_rate": 9.925770377917765e-06, "loss": 0.4555, "step": 96740 }, { "epoch": 23.948019801980198, "grad_norm": 0.32132306694984436, "learning_rate": 9.917531895916116e-06, "loss": 0.4533, "step": 96750 }, { "epoch": 23.95049504950495, "grad_norm": 0.2640189528465271, "learning_rate": 9.909296457913414e-06, "loss": 0.4521, "step": 96760 }, { "epoch": 23.952970297029704, "grad_norm": 0.25840306282043457, "learning_rate": 9.901064064535065e-06, "loss": 0.4563, "step": 96770 }, { "epoch": 23.955445544554454, "grad_norm": 0.2829767167568207, "learning_rate": 9.892834716406263e-06, "loss": 0.4566, "step": 96780 }, { "epoch": 23.957920792079207, "grad_norm": 0.24448975920677185, "learning_rate": 9.884608414151974e-06, "loss": 0.4549, "step": 96790 }, { "epoch": 23.96039603960396, "grad_norm": 0.25216352939605713, "learning_rate": 9.876385158396955e-06, "loss": 0.4551, "step": 96800 }, { "epoch": 23.962871287128714, "grad_norm": 0.2533431649208069, "learning_rate": 9.868164949765678e-06, "loss": 0.4556, "step": 96810 }, { "epoch": 23.965346534653467, "grad_norm": 0.28231164813041687, "learning_rate": 9.859947788882423e-06, "loss": 0.453, "step": 96820 }, { "epoch": 23.967821782178216, "grad_norm": 0.2683058977127075, "learning_rate": 9.85173367637125e-06, "loss": 0.4569, "step": 96830 }, { "epoch": 23.97029702970297, "grad_norm": 0.24758702516555786, "learning_rate": 9.843522612855944e-06, "loss": 0.4534, "step": 96840 }, { "epoch": 23.972772277227723, "grad_norm": 0.24413640797138214, "learning_rate": 9.835314598960077e-06, "loss": 0.4545, "step": 96850 }, { "epoch": 23.975247524752476, "grad_norm": 0.23791135847568512, "learning_rate": 9.827109635306997e-06, "loss": 0.4575, "step": 96860 }, { "epoch": 23.97772277227723, "grad_norm": 0.24351327121257782, "learning_rate": 9.818907722519838e-06, "loss": 0.4548, "step": 96870 }, { "epoch": 23.980198019801982, "grad_norm": 0.27419644594192505, "learning_rate": 9.810708861221451e-06, "loss": 0.4578, "step": 96880 }, { "epoch": 23.98267326732673, "grad_norm": 0.2625954747200012, "learning_rate": 9.802513052034518e-06, "loss": 0.4589, "step": 96890 }, { "epoch": 23.985148514851485, "grad_norm": 0.24078455567359924, "learning_rate": 9.794320295581422e-06, "loss": 0.456, "step": 96900 }, { "epoch": 23.987623762376238, "grad_norm": 0.2482687383890152, "learning_rate": 9.78613059248436e-06, "loss": 0.4525, "step": 96910 }, { "epoch": 23.99009900990099, "grad_norm": 0.28351032733917236, "learning_rate": 9.777943943365314e-06, "loss": 0.4551, "step": 96920 }, { "epoch": 23.992574257425744, "grad_norm": 0.2554527819156647, "learning_rate": 9.769760348845964e-06, "loss": 0.4531, "step": 96930 }, { "epoch": 23.995049504950494, "grad_norm": 0.24600927531719208, "learning_rate": 9.761579809547834e-06, "loss": 0.4542, "step": 96940 }, { "epoch": 23.997524752475247, "grad_norm": 0.24897563457489014, "learning_rate": 9.753402326092148e-06, "loss": 0.4512, "step": 96950 }, { "epoch": 24.0, "grad_norm": 0.2542131543159485, "learning_rate": 9.745227899099973e-06, "loss": 0.4542, "step": 96960 }, { "epoch": 24.002475247524753, "grad_norm": 0.24151024222373962, "learning_rate": 9.737056529192062e-06, "loss": 0.4602, "step": 96970 }, { "epoch": 24.004950495049506, "grad_norm": 0.2652761936187744, "learning_rate": 9.728888216988995e-06, "loss": 0.454, "step": 96980 }, { "epoch": 24.007425742574256, "grad_norm": 0.3250737488269806, "learning_rate": 9.720722963111117e-06, "loss": 0.4584, "step": 96990 }, { "epoch": 24.00990099009901, "grad_norm": 0.2726956903934479, "learning_rate": 9.712560768178508e-06, "loss": 0.4543, "step": 97000 }, { "epoch": 24.012376237623762, "grad_norm": 0.23965704441070557, "learning_rate": 9.704401632811011e-06, "loss": 0.4575, "step": 97010 }, { "epoch": 24.014851485148515, "grad_norm": 0.35353630781173706, "learning_rate": 9.696245557628308e-06, "loss": 0.4484, "step": 97020 }, { "epoch": 24.01732673267327, "grad_norm": 0.2660057544708252, "learning_rate": 9.68809254324977e-06, "loss": 0.4492, "step": 97030 }, { "epoch": 24.019801980198018, "grad_norm": 0.25000157952308655, "learning_rate": 9.67994259029456e-06, "loss": 0.4541, "step": 97040 }, { "epoch": 24.02227722772277, "grad_norm": 0.2408396452665329, "learning_rate": 9.671795699381619e-06, "loss": 0.4554, "step": 97050 }, { "epoch": 24.024752475247524, "grad_norm": 0.2256695181131363, "learning_rate": 9.663651871129664e-06, "loss": 0.4541, "step": 97060 }, { "epoch": 24.027227722772277, "grad_norm": 0.24949371814727783, "learning_rate": 9.65551110615715e-06, "loss": 0.4546, "step": 97070 }, { "epoch": 24.02970297029703, "grad_norm": 0.24872960150241852, "learning_rate": 9.647373405082305e-06, "loss": 0.4581, "step": 97080 }, { "epoch": 24.032178217821784, "grad_norm": 0.2487279772758484, "learning_rate": 9.63923876852315e-06, "loss": 0.4551, "step": 97090 }, { "epoch": 24.034653465346533, "grad_norm": 0.26150456070899963, "learning_rate": 9.631107197097461e-06, "loss": 0.4514, "step": 97100 }, { "epoch": 24.037128712871286, "grad_norm": 0.2983907163143158, "learning_rate": 9.622978691422774e-06, "loss": 0.4512, "step": 97110 }, { "epoch": 24.03960396039604, "grad_norm": 0.26120510697364807, "learning_rate": 9.614853252116356e-06, "loss": 0.4531, "step": 97120 }, { "epoch": 24.042079207920793, "grad_norm": 0.2527562975883484, "learning_rate": 9.606730879795351e-06, "loss": 0.4522, "step": 97130 }, { "epoch": 24.044554455445546, "grad_norm": 0.27327367663383484, "learning_rate": 9.598611575076555e-06, "loss": 0.4566, "step": 97140 }, { "epoch": 24.047029702970296, "grad_norm": 0.2709987163543701, "learning_rate": 9.590495338576578e-06, "loss": 0.4561, "step": 97150 }, { "epoch": 24.04950495049505, "grad_norm": 0.2459821254014969, "learning_rate": 9.582382170911791e-06, "loss": 0.4561, "step": 97160 }, { "epoch": 24.051980198019802, "grad_norm": 0.25782957673072815, "learning_rate": 9.574272072698365e-06, "loss": 0.454, "step": 97170 }, { "epoch": 24.054455445544555, "grad_norm": 0.25176072120666504, "learning_rate": 9.566165044552177e-06, "loss": 0.4567, "step": 97180 }, { "epoch": 24.056930693069308, "grad_norm": 0.25959542393684387, "learning_rate": 9.558061087088922e-06, "loss": 0.455, "step": 97190 }, { "epoch": 24.059405940594058, "grad_norm": 0.25839313864707947, "learning_rate": 9.549960200924024e-06, "loss": 0.4553, "step": 97200 }, { "epoch": 24.06188118811881, "grad_norm": 0.25644758343696594, "learning_rate": 9.541862386672713e-06, "loss": 0.4556, "step": 97210 }, { "epoch": 24.064356435643564, "grad_norm": 0.255871057510376, "learning_rate": 9.533767644949942e-06, "loss": 0.4529, "step": 97220 }, { "epoch": 24.066831683168317, "grad_norm": 0.2778407335281372, "learning_rate": 9.525675976370463e-06, "loss": 0.4514, "step": 97230 }, { "epoch": 24.06930693069307, "grad_norm": 0.2676478326320648, "learning_rate": 9.517587381548798e-06, "loss": 0.4511, "step": 97240 }, { "epoch": 24.071782178217823, "grad_norm": 0.27020716667175293, "learning_rate": 9.509501861099202e-06, "loss": 0.4519, "step": 97250 }, { "epoch": 24.074257425742573, "grad_norm": 0.2745780050754547, "learning_rate": 9.501419415635732e-06, "loss": 0.4528, "step": 97260 }, { "epoch": 24.076732673267326, "grad_norm": 0.2594093680381775, "learning_rate": 9.49334004577218e-06, "loss": 0.457, "step": 97270 }, { "epoch": 24.07920792079208, "grad_norm": 0.2571711838245392, "learning_rate": 9.48526375212213e-06, "loss": 0.452, "step": 97280 }, { "epoch": 24.081683168316832, "grad_norm": 0.25537601113319397, "learning_rate": 9.477190535298941e-06, "loss": 0.4555, "step": 97290 }, { "epoch": 24.084158415841586, "grad_norm": 0.2576839029788971, "learning_rate": 9.469120395915704e-06, "loss": 0.4569, "step": 97300 }, { "epoch": 24.086633663366335, "grad_norm": 0.246210515499115, "learning_rate": 9.461053334585273e-06, "loss": 0.449, "step": 97310 }, { "epoch": 24.08910891089109, "grad_norm": 0.27375897765159607, "learning_rate": 9.45298935192031e-06, "loss": 0.454, "step": 97320 }, { "epoch": 24.09158415841584, "grad_norm": 0.25779280066490173, "learning_rate": 9.444928448533236e-06, "loss": 0.4584, "step": 97330 }, { "epoch": 24.094059405940595, "grad_norm": 0.3162015378475189, "learning_rate": 9.436870625036187e-06, "loss": 0.459, "step": 97340 }, { "epoch": 24.096534653465348, "grad_norm": 0.2528685927391052, "learning_rate": 9.428815882041125e-06, "loss": 0.4485, "step": 97350 }, { "epoch": 24.099009900990097, "grad_norm": 0.26630088686943054, "learning_rate": 9.420764220159761e-06, "loss": 0.4516, "step": 97360 }, { "epoch": 24.10148514851485, "grad_norm": 0.23838607966899872, "learning_rate": 9.412715640003555e-06, "loss": 0.451, "step": 97370 }, { "epoch": 24.103960396039604, "grad_norm": 0.24317686259746552, "learning_rate": 9.404670142183736e-06, "loss": 0.458, "step": 97380 }, { "epoch": 24.106435643564357, "grad_norm": 0.24628712236881256, "learning_rate": 9.396627727311308e-06, "loss": 0.4573, "step": 97390 }, { "epoch": 24.10891089108911, "grad_norm": 0.2633766829967499, "learning_rate": 9.388588395997055e-06, "loss": 0.4477, "step": 97400 }, { "epoch": 24.111386138613863, "grad_norm": 0.2534455358982086, "learning_rate": 9.380552148851507e-06, "loss": 0.4569, "step": 97410 }, { "epoch": 24.113861386138613, "grad_norm": 0.25414371490478516, "learning_rate": 9.37251898648493e-06, "loss": 0.4573, "step": 97420 }, { "epoch": 24.116336633663366, "grad_norm": 0.2412179708480835, "learning_rate": 9.364488909507447e-06, "loss": 0.4597, "step": 97430 }, { "epoch": 24.11881188118812, "grad_norm": 0.2543964087963104, "learning_rate": 9.356461918528858e-06, "loss": 0.455, "step": 97440 }, { "epoch": 24.121287128712872, "grad_norm": 0.2571719288825989, "learning_rate": 9.34843801415875e-06, "loss": 0.4576, "step": 97450 }, { "epoch": 24.123762376237625, "grad_norm": 0.2706945240497589, "learning_rate": 9.340417197006495e-06, "loss": 0.453, "step": 97460 }, { "epoch": 24.126237623762375, "grad_norm": 0.2695094048976898, "learning_rate": 9.33239946768124e-06, "loss": 0.4584, "step": 97470 }, { "epoch": 24.128712871287128, "grad_norm": 0.27301856875419617, "learning_rate": 9.32438482679185e-06, "loss": 0.4524, "step": 97480 }, { "epoch": 24.13118811881188, "grad_norm": 0.2586551904678345, "learning_rate": 9.316373274947004e-06, "loss": 0.4536, "step": 97490 }, { "epoch": 24.133663366336634, "grad_norm": 0.25698262453079224, "learning_rate": 9.30836481275511e-06, "loss": 0.4547, "step": 97500 }, { "epoch": 24.136138613861387, "grad_norm": 0.25392672419548035, "learning_rate": 9.30035944082438e-06, "loss": 0.4536, "step": 97510 }, { "epoch": 24.138613861386137, "grad_norm": 0.25297999382019043, "learning_rate": 9.292357159762748e-06, "loss": 0.4532, "step": 97520 }, { "epoch": 24.14108910891089, "grad_norm": 0.25081244111061096, "learning_rate": 9.284357970177937e-06, "loss": 0.4528, "step": 97530 }, { "epoch": 24.143564356435643, "grad_norm": 0.24883121252059937, "learning_rate": 9.276361872677452e-06, "loss": 0.4519, "step": 97540 }, { "epoch": 24.146039603960396, "grad_norm": 0.2516750991344452, "learning_rate": 9.268368867868521e-06, "loss": 0.4513, "step": 97550 }, { "epoch": 24.14851485148515, "grad_norm": 0.23579196631908417, "learning_rate": 9.260378956358179e-06, "loss": 0.4507, "step": 97560 }, { "epoch": 24.150990099009903, "grad_norm": 0.24718986451625824, "learning_rate": 9.25239213875318e-06, "loss": 0.455, "step": 97570 }, { "epoch": 24.153465346534652, "grad_norm": 0.2515830993652344, "learning_rate": 9.244408415660105e-06, "loss": 0.4577, "step": 97580 }, { "epoch": 24.155940594059405, "grad_norm": 0.2979368269443512, "learning_rate": 9.236427787685237e-06, "loss": 0.4566, "step": 97590 }, { "epoch": 24.15841584158416, "grad_norm": 0.2637249529361725, "learning_rate": 9.228450255434667e-06, "loss": 0.46, "step": 97600 }, { "epoch": 24.16089108910891, "grad_norm": 0.26201844215393066, "learning_rate": 9.220475819514223e-06, "loss": 0.4557, "step": 97610 }, { "epoch": 24.163366336633665, "grad_norm": 0.2431420534849167, "learning_rate": 9.21250448052952e-06, "loss": 0.4551, "step": 97620 }, { "epoch": 24.165841584158414, "grad_norm": 0.24434472620487213, "learning_rate": 9.204536239085942e-06, "loss": 0.4548, "step": 97630 }, { "epoch": 24.168316831683168, "grad_norm": 0.2545508146286011, "learning_rate": 9.196571095788598e-06, "loss": 0.45, "step": 97640 }, { "epoch": 24.17079207920792, "grad_norm": 0.23977050185203552, "learning_rate": 9.188609051242397e-06, "loss": 0.4601, "step": 97650 }, { "epoch": 24.173267326732674, "grad_norm": 0.28983014822006226, "learning_rate": 9.180650106052018e-06, "loss": 0.4576, "step": 97660 }, { "epoch": 24.175742574257427, "grad_norm": 0.25051820278167725, "learning_rate": 9.17269426082188e-06, "loss": 0.4574, "step": 97670 }, { "epoch": 24.178217821782177, "grad_norm": 0.29068249464035034, "learning_rate": 9.164741516156167e-06, "loss": 0.4564, "step": 97680 }, { "epoch": 24.18069306930693, "grad_norm": 0.24906396865844727, "learning_rate": 9.156791872658844e-06, "loss": 0.4503, "step": 97690 }, { "epoch": 24.183168316831683, "grad_norm": 0.27861642837524414, "learning_rate": 9.148845330933647e-06, "loss": 0.4532, "step": 97700 }, { "epoch": 24.185643564356436, "grad_norm": 0.24566294252872467, "learning_rate": 9.140901891584053e-06, "loss": 0.4535, "step": 97710 }, { "epoch": 24.18811881188119, "grad_norm": 0.33589157462120056, "learning_rate": 9.132961555213292e-06, "loss": 0.4534, "step": 97720 }, { "epoch": 24.190594059405942, "grad_norm": 0.243168443441391, "learning_rate": 9.125024322424425e-06, "loss": 0.4546, "step": 97730 }, { "epoch": 24.193069306930692, "grad_norm": 0.24626843631267548, "learning_rate": 9.11709019382021e-06, "loss": 0.4499, "step": 97740 }, { "epoch": 24.195544554455445, "grad_norm": 0.2737409472465515, "learning_rate": 9.109159170003179e-06, "loss": 0.458, "step": 97750 }, { "epoch": 24.198019801980198, "grad_norm": 0.2510537803173065, "learning_rate": 9.101231251575653e-06, "loss": 0.4556, "step": 97760 }, { "epoch": 24.20049504950495, "grad_norm": 0.24292100965976715, "learning_rate": 9.09330643913972e-06, "loss": 0.4584, "step": 97770 }, { "epoch": 24.202970297029704, "grad_norm": 0.26932862401008606, "learning_rate": 9.085384733297197e-06, "loss": 0.4601, "step": 97780 }, { "epoch": 24.205445544554454, "grad_norm": 0.2498496025800705, "learning_rate": 9.077466134649681e-06, "loss": 0.451, "step": 97790 }, { "epoch": 24.207920792079207, "grad_norm": 0.3136528730392456, "learning_rate": 9.069550643798547e-06, "loss": 0.4581, "step": 97800 }, { "epoch": 24.21039603960396, "grad_norm": 0.2445586919784546, "learning_rate": 9.061638261344935e-06, "loss": 0.4506, "step": 97810 }, { "epoch": 24.212871287128714, "grad_norm": 0.25602710247039795, "learning_rate": 9.053728987889715e-06, "loss": 0.455, "step": 97820 }, { "epoch": 24.215346534653467, "grad_norm": 0.24785280227661133, "learning_rate": 9.045822824033556e-06, "loss": 0.4544, "step": 97830 }, { "epoch": 24.217821782178216, "grad_norm": 0.2403358817100525, "learning_rate": 9.037919770376896e-06, "loss": 0.4639, "step": 97840 }, { "epoch": 24.22029702970297, "grad_norm": 0.2567109167575836, "learning_rate": 9.030019827519887e-06, "loss": 0.4537, "step": 97850 }, { "epoch": 24.222772277227723, "grad_norm": 0.25102248787879944, "learning_rate": 9.02212299606251e-06, "loss": 0.4573, "step": 97860 }, { "epoch": 24.225247524752476, "grad_norm": 0.24307236075401306, "learning_rate": 9.014229276604442e-06, "loss": 0.4564, "step": 97870 }, { "epoch": 24.22772277227723, "grad_norm": 0.2502610385417938, "learning_rate": 9.006338669745195e-06, "loss": 0.4606, "step": 97880 }, { "epoch": 24.230198019801982, "grad_norm": 0.25698912143707275, "learning_rate": 8.998451176083977e-06, "loss": 0.4592, "step": 97890 }, { "epoch": 24.23267326732673, "grad_norm": 0.24907168745994568, "learning_rate": 8.990566796219819e-06, "loss": 0.4591, "step": 97900 }, { "epoch": 24.235148514851485, "grad_norm": 0.30747923254966736, "learning_rate": 8.982685530751462e-06, "loss": 0.4565, "step": 97910 }, { "epoch": 24.237623762376238, "grad_norm": 0.25945231318473816, "learning_rate": 8.97480738027745e-06, "loss": 0.4579, "step": 97920 }, { "epoch": 24.24009900990099, "grad_norm": 0.26704463362693787, "learning_rate": 8.966932345396085e-06, "loss": 0.4541, "step": 97930 }, { "epoch": 24.242574257425744, "grad_norm": 0.24265794456005096, "learning_rate": 8.959060426705401e-06, "loss": 0.448, "step": 97940 }, { "epoch": 24.245049504950494, "grad_norm": 0.2518724203109741, "learning_rate": 8.951191624803246e-06, "loss": 0.4526, "step": 97950 }, { "epoch": 24.247524752475247, "grad_norm": 0.2429775446653366, "learning_rate": 8.943325940287173e-06, "loss": 0.4524, "step": 97960 }, { "epoch": 24.25, "grad_norm": 0.22436389327049255, "learning_rate": 8.935463373754555e-06, "loss": 0.4577, "step": 97970 }, { "epoch": 24.252475247524753, "grad_norm": 0.24637731909751892, "learning_rate": 8.92760392580248e-06, "loss": 0.4601, "step": 97980 }, { "epoch": 24.254950495049506, "grad_norm": 0.2744891941547394, "learning_rate": 8.91974759702784e-06, "loss": 0.4551, "step": 97990 }, { "epoch": 24.257425742574256, "grad_norm": 0.2596324384212494, "learning_rate": 8.911894388027269e-06, "loss": 0.454, "step": 98000 }, { "epoch": 24.25990099009901, "grad_norm": 0.24510431289672852, "learning_rate": 8.904044299397163e-06, "loss": 0.4558, "step": 98010 }, { "epoch": 24.262376237623762, "grad_norm": 0.2396531105041504, "learning_rate": 8.896197331733657e-06, "loss": 0.4573, "step": 98020 }, { "epoch": 24.264851485148515, "grad_norm": 0.28598344326019287, "learning_rate": 8.888353485632728e-06, "loss": 0.4554, "step": 98030 }, { "epoch": 24.26732673267327, "grad_norm": 0.22943371534347534, "learning_rate": 8.880512761690034e-06, "loss": 0.4555, "step": 98040 }, { "epoch": 24.269801980198018, "grad_norm": 0.2726864814758301, "learning_rate": 8.872675160501026e-06, "loss": 0.4588, "step": 98050 }, { "epoch": 24.27227722772277, "grad_norm": 0.2405385971069336, "learning_rate": 8.864840682660918e-06, "loss": 0.4524, "step": 98060 }, { "epoch": 24.274752475247524, "grad_norm": 0.25294071435928345, "learning_rate": 8.85700932876471e-06, "loss": 0.4545, "step": 98070 }, { "epoch": 24.277227722772277, "grad_norm": 0.23657119274139404, "learning_rate": 8.849181099407117e-06, "loss": 0.451, "step": 98080 }, { "epoch": 24.27970297029703, "grad_norm": 0.24479089677333832, "learning_rate": 8.841355995182637e-06, "loss": 0.4544, "step": 98090 }, { "epoch": 24.282178217821784, "grad_norm": 0.25769203901290894, "learning_rate": 8.833534016685547e-06, "loss": 0.4514, "step": 98100 }, { "epoch": 24.284653465346533, "grad_norm": 0.3083924353122711, "learning_rate": 8.825715164509884e-06, "loss": 0.4488, "step": 98110 }, { "epoch": 24.287128712871286, "grad_norm": 0.255249559879303, "learning_rate": 8.817899439249416e-06, "loss": 0.4531, "step": 98120 }, { "epoch": 24.28960396039604, "grad_norm": 0.3133590817451477, "learning_rate": 8.810086841497706e-06, "loss": 0.4603, "step": 98130 }, { "epoch": 24.292079207920793, "grad_norm": 0.2504406273365021, "learning_rate": 8.802277371848083e-06, "loss": 0.4537, "step": 98140 }, { "epoch": 24.294554455445546, "grad_norm": 0.24881885945796967, "learning_rate": 8.794471030893613e-06, "loss": 0.4498, "step": 98150 }, { "epoch": 24.297029702970296, "grad_norm": 0.2545291483402252, "learning_rate": 8.786667819227129e-06, "loss": 0.4509, "step": 98160 }, { "epoch": 24.29950495049505, "grad_norm": 0.22774270176887512, "learning_rate": 8.778867737441232e-06, "loss": 0.455, "step": 98170 }, { "epoch": 24.301980198019802, "grad_norm": 0.29589197039604187, "learning_rate": 8.77107078612831e-06, "loss": 0.4594, "step": 98180 }, { "epoch": 24.304455445544555, "grad_norm": 0.24834421277046204, "learning_rate": 8.763276965880463e-06, "loss": 0.4547, "step": 98190 }, { "epoch": 24.306930693069308, "grad_norm": 0.2621721923351288, "learning_rate": 8.7554862772896e-06, "loss": 0.4517, "step": 98200 }, { "epoch": 24.309405940594058, "grad_norm": 0.2705640494823456, "learning_rate": 8.747698720947355e-06, "loss": 0.4529, "step": 98210 }, { "epoch": 24.31188118811881, "grad_norm": 0.23456047475337982, "learning_rate": 8.739914297445151e-06, "loss": 0.4573, "step": 98220 }, { "epoch": 24.314356435643564, "grad_norm": 0.2520632743835449, "learning_rate": 8.732133007374172e-06, "loss": 0.4514, "step": 98230 }, { "epoch": 24.316831683168317, "grad_norm": 0.251886785030365, "learning_rate": 8.724354851325339e-06, "loss": 0.4564, "step": 98240 }, { "epoch": 24.31930693069307, "grad_norm": 0.2932831048965454, "learning_rate": 8.716579829889371e-06, "loss": 0.4586, "step": 98250 }, { "epoch": 24.321782178217823, "grad_norm": 0.25280502438545227, "learning_rate": 8.708807943656706e-06, "loss": 0.453, "step": 98260 }, { "epoch": 24.324257425742573, "grad_norm": 0.27650186419487, "learning_rate": 8.701039193217585e-06, "loss": 0.453, "step": 98270 }, { "epoch": 24.326732673267326, "grad_norm": 0.2804175019264221, "learning_rate": 8.693273579161986e-06, "loss": 0.4558, "step": 98280 }, { "epoch": 24.32920792079208, "grad_norm": 0.24734008312225342, "learning_rate": 8.685511102079652e-06, "loss": 0.4522, "step": 98290 }, { "epoch": 24.331683168316832, "grad_norm": 0.22946929931640625, "learning_rate": 8.67775176256011e-06, "loss": 0.4538, "step": 98300 }, { "epoch": 24.334158415841586, "grad_norm": 0.24683837592601776, "learning_rate": 8.669995561192622e-06, "loss": 0.4546, "step": 98310 }, { "epoch": 24.336633663366335, "grad_norm": 0.29228803515434265, "learning_rate": 8.662242498566203e-06, "loss": 0.4557, "step": 98320 }, { "epoch": 24.33910891089109, "grad_norm": 0.24144023656845093, "learning_rate": 8.654492575269658e-06, "loss": 0.4576, "step": 98330 }, { "epoch": 24.34158415841584, "grad_norm": 0.23474173247814178, "learning_rate": 8.646745791891558e-06, "loss": 0.4559, "step": 98340 }, { "epoch": 24.344059405940595, "grad_norm": 0.3042367100715637, "learning_rate": 8.639002149020197e-06, "loss": 0.4567, "step": 98350 }, { "epoch": 24.346534653465348, "grad_norm": 0.24872730672359467, "learning_rate": 8.631261647243665e-06, "loss": 0.4552, "step": 98360 }, { "epoch": 24.349009900990097, "grad_norm": 0.24990755319595337, "learning_rate": 8.623524287149814e-06, "loss": 0.454, "step": 98370 }, { "epoch": 24.35148514851485, "grad_norm": 0.2298586517572403, "learning_rate": 8.615790069326229e-06, "loss": 0.4566, "step": 98380 }, { "epoch": 24.353960396039604, "grad_norm": 0.28381475806236267, "learning_rate": 8.60805899436027e-06, "loss": 0.4566, "step": 98390 }, { "epoch": 24.356435643564357, "grad_norm": 0.24229520559310913, "learning_rate": 8.600331062839056e-06, "loss": 0.4513, "step": 98400 }, { "epoch": 24.35891089108911, "grad_norm": 0.26705336570739746, "learning_rate": 8.592606275349508e-06, "loss": 0.456, "step": 98410 }, { "epoch": 24.361386138613863, "grad_norm": 0.2486540824174881, "learning_rate": 8.58488463247824e-06, "loss": 0.4588, "step": 98420 }, { "epoch": 24.363861386138613, "grad_norm": 0.28111985325813293, "learning_rate": 8.577166134811648e-06, "loss": 0.453, "step": 98430 }, { "epoch": 24.366336633663366, "grad_norm": 0.3036789298057556, "learning_rate": 8.569450782935945e-06, "loss": 0.4521, "step": 98440 }, { "epoch": 24.36881188118812, "grad_norm": 0.33593374490737915, "learning_rate": 8.561738577437034e-06, "loss": 0.453, "step": 98450 }, { "epoch": 24.371287128712872, "grad_norm": 0.2640785872936249, "learning_rate": 8.554029518900602e-06, "loss": 0.4505, "step": 98460 }, { "epoch": 24.373762376237625, "grad_norm": 0.2812563478946686, "learning_rate": 8.546323607912105e-06, "loss": 0.4575, "step": 98470 }, { "epoch": 24.376237623762375, "grad_norm": 0.2661490738391876, "learning_rate": 8.538620845056772e-06, "loss": 0.4573, "step": 98480 }, { "epoch": 24.378712871287128, "grad_norm": 0.24993816018104553, "learning_rate": 8.530921230919553e-06, "loss": 0.4592, "step": 98490 }, { "epoch": 24.38118811881188, "grad_norm": 0.2333524078130722, "learning_rate": 8.523224766085208e-06, "loss": 0.4509, "step": 98500 }, { "epoch": 24.383663366336634, "grad_norm": 0.2627670168876648, "learning_rate": 8.51553145113821e-06, "loss": 0.4502, "step": 98510 }, { "epoch": 24.386138613861387, "grad_norm": 0.2672160267829895, "learning_rate": 8.507841286662826e-06, "loss": 0.45, "step": 98520 }, { "epoch": 24.388613861386137, "grad_norm": 0.24787066876888275, "learning_rate": 8.500154273243072e-06, "loss": 0.4549, "step": 98530 }, { "epoch": 24.39108910891089, "grad_norm": 0.23600831627845764, "learning_rate": 8.492470411462717e-06, "loss": 0.4535, "step": 98540 }, { "epoch": 24.393564356435643, "grad_norm": 0.23064105212688446, "learning_rate": 8.484789701905321e-06, "loss": 0.4542, "step": 98550 }, { "epoch": 24.396039603960396, "grad_norm": 0.2651861608028412, "learning_rate": 8.477112145154164e-06, "loss": 0.4528, "step": 98560 }, { "epoch": 24.39851485148515, "grad_norm": 0.2572256624698639, "learning_rate": 8.469437741792319e-06, "loss": 0.4537, "step": 98570 }, { "epoch": 24.400990099009903, "grad_norm": 0.24520033597946167, "learning_rate": 8.46176649240259e-06, "loss": 0.458, "step": 98580 }, { "epoch": 24.403465346534652, "grad_norm": 0.2706906199455261, "learning_rate": 8.454098397567567e-06, "loss": 0.4524, "step": 98590 }, { "epoch": 24.405940594059405, "grad_norm": 0.24596938490867615, "learning_rate": 8.446433457869607e-06, "loss": 0.4582, "step": 98600 }, { "epoch": 24.40841584158416, "grad_norm": 0.2645319700241089, "learning_rate": 8.438771673890794e-06, "loss": 0.4599, "step": 98610 }, { "epoch": 24.41089108910891, "grad_norm": 0.26046502590179443, "learning_rate": 8.43111304621298e-06, "loss": 0.4527, "step": 98620 }, { "epoch": 24.413366336633665, "grad_norm": 0.2694169878959656, "learning_rate": 8.423457575417798e-06, "loss": 0.452, "step": 98630 }, { "epoch": 24.415841584158414, "grad_norm": 0.23643366992473602, "learning_rate": 8.415805262086646e-06, "loss": 0.4493, "step": 98640 }, { "epoch": 24.418316831683168, "grad_norm": 0.2533527612686157, "learning_rate": 8.408156106800636e-06, "loss": 0.4558, "step": 98650 }, { "epoch": 24.42079207920792, "grad_norm": 0.36603856086730957, "learning_rate": 8.400510110140691e-06, "loss": 0.4571, "step": 98660 }, { "epoch": 24.423267326732674, "grad_norm": 0.2497468888759613, "learning_rate": 8.392867272687482e-06, "loss": 0.451, "step": 98670 }, { "epoch": 24.425742574257427, "grad_norm": 0.2610436677932739, "learning_rate": 8.385227595021422e-06, "loss": 0.4525, "step": 98680 }, { "epoch": 24.428217821782177, "grad_norm": 0.24170316755771637, "learning_rate": 8.377591077722669e-06, "loss": 0.4515, "step": 98690 }, { "epoch": 24.43069306930693, "grad_norm": 0.23323765397071838, "learning_rate": 8.369957721371209e-06, "loss": 0.4535, "step": 98700 }, { "epoch": 24.433168316831683, "grad_norm": 0.24650761485099792, "learning_rate": 8.362327526546726e-06, "loss": 0.4569, "step": 98710 }, { "epoch": 24.435643564356436, "grad_norm": 0.24361607432365417, "learning_rate": 8.354700493828687e-06, "loss": 0.4561, "step": 98720 }, { "epoch": 24.43811881188119, "grad_norm": 0.2561355233192444, "learning_rate": 8.347076623796285e-06, "loss": 0.4546, "step": 98730 }, { "epoch": 24.440594059405942, "grad_norm": 0.24786466360092163, "learning_rate": 8.339455917028549e-06, "loss": 0.4575, "step": 98740 }, { "epoch": 24.443069306930692, "grad_norm": 0.2236291766166687, "learning_rate": 8.331838374104206e-06, "loss": 0.4503, "step": 98750 }, { "epoch": 24.445544554455445, "grad_norm": 0.2417009323835373, "learning_rate": 8.324223995601737e-06, "loss": 0.4524, "step": 98760 }, { "epoch": 24.448019801980198, "grad_norm": 0.26962611079216003, "learning_rate": 8.31661278209942e-06, "loss": 0.4552, "step": 98770 }, { "epoch": 24.45049504950495, "grad_norm": 0.2519167959690094, "learning_rate": 8.309004734175286e-06, "loss": 0.4544, "step": 98780 }, { "epoch": 24.452970297029704, "grad_norm": 0.2438962161540985, "learning_rate": 8.301399852407111e-06, "loss": 0.4576, "step": 98790 }, { "epoch": 24.455445544554454, "grad_norm": 0.22777120769023895, "learning_rate": 8.2937981373724e-06, "loss": 0.4512, "step": 98800 }, { "epoch": 24.457920792079207, "grad_norm": 0.2316112220287323, "learning_rate": 8.286199589648508e-06, "loss": 0.4549, "step": 98810 }, { "epoch": 24.46039603960396, "grad_norm": 0.2506992816925049, "learning_rate": 8.278604209812473e-06, "loss": 0.4521, "step": 98820 }, { "epoch": 24.462871287128714, "grad_norm": 0.26318302750587463, "learning_rate": 8.271011998441097e-06, "loss": 0.4509, "step": 98830 }, { "epoch": 24.465346534653467, "grad_norm": 0.25237250328063965, "learning_rate": 8.263422956110978e-06, "loss": 0.4527, "step": 98840 }, { "epoch": 24.467821782178216, "grad_norm": 0.2521318793296814, "learning_rate": 8.255837083398455e-06, "loss": 0.4557, "step": 98850 }, { "epoch": 24.47029702970297, "grad_norm": 0.23296232521533966, "learning_rate": 8.248254380879606e-06, "loss": 0.4564, "step": 98860 }, { "epoch": 24.472772277227723, "grad_norm": 0.24918150901794434, "learning_rate": 8.240674849130309e-06, "loss": 0.4592, "step": 98870 }, { "epoch": 24.475247524752476, "grad_norm": 0.2975027561187744, "learning_rate": 8.233098488726166e-06, "loss": 0.4522, "step": 98880 }, { "epoch": 24.47772277227723, "grad_norm": 0.27264755964279175, "learning_rate": 8.225525300242559e-06, "loss": 0.4576, "step": 98890 }, { "epoch": 24.480198019801982, "grad_norm": 0.28068992495536804, "learning_rate": 8.217955284254608e-06, "loss": 0.4546, "step": 98900 }, { "epoch": 24.48267326732673, "grad_norm": 0.2669121325016022, "learning_rate": 8.21038844133722e-06, "loss": 0.4559, "step": 98910 }, { "epoch": 24.485148514851485, "grad_norm": 0.24857620894908905, "learning_rate": 8.202824772065049e-06, "loss": 0.4585, "step": 98920 }, { "epoch": 24.487623762376238, "grad_norm": 0.25563547015190125, "learning_rate": 8.195264277012487e-06, "loss": 0.4529, "step": 98930 }, { "epoch": 24.49009900990099, "grad_norm": 0.24148117005825043, "learning_rate": 8.187706956753732e-06, "loss": 0.459, "step": 98940 }, { "epoch": 24.492574257425744, "grad_norm": 0.26189711689949036, "learning_rate": 8.18015281186268e-06, "loss": 0.4498, "step": 98950 }, { "epoch": 24.495049504950494, "grad_norm": 0.24306491017341614, "learning_rate": 8.172601842913036e-06, "loss": 0.458, "step": 98960 }, { "epoch": 24.497524752475247, "grad_norm": 0.28746461868286133, "learning_rate": 8.165054050478261e-06, "loss": 0.4531, "step": 98970 }, { "epoch": 24.5, "grad_norm": 0.2831116020679474, "learning_rate": 8.157509435131544e-06, "loss": 0.452, "step": 98980 }, { "epoch": 24.502475247524753, "grad_norm": 0.2520565092563629, "learning_rate": 8.149967997445834e-06, "loss": 0.4593, "step": 98990 }, { "epoch": 24.504950495049506, "grad_norm": 0.24818870425224304, "learning_rate": 8.142429737993867e-06, "loss": 0.4443, "step": 99000 }, { "epoch": 24.507425742574256, "grad_norm": 0.27107667922973633, "learning_rate": 8.134894657348141e-06, "loss": 0.4521, "step": 99010 }, { "epoch": 24.50990099009901, "grad_norm": 0.28272491693496704, "learning_rate": 8.127362756080864e-06, "loss": 0.4574, "step": 99020 }, { "epoch": 24.512376237623762, "grad_norm": 0.24464750289916992, "learning_rate": 8.119834034764056e-06, "loss": 0.4568, "step": 99030 }, { "epoch": 24.514851485148515, "grad_norm": 0.2879779040813446, "learning_rate": 8.112308493969478e-06, "loss": 0.4546, "step": 99040 }, { "epoch": 24.51732673267327, "grad_norm": 0.2303396314382553, "learning_rate": 8.104786134268633e-06, "loss": 0.4577, "step": 99050 }, { "epoch": 24.519801980198018, "grad_norm": 0.2628869116306305, "learning_rate": 8.097266956232785e-06, "loss": 0.4519, "step": 99060 }, { "epoch": 24.52227722772277, "grad_norm": 0.25516432523727417, "learning_rate": 8.08975096043298e-06, "loss": 0.4584, "step": 99070 }, { "epoch": 24.524752475247524, "grad_norm": 0.25836366415023804, "learning_rate": 8.082238147440019e-06, "loss": 0.4511, "step": 99080 }, { "epoch": 24.527227722772277, "grad_norm": 0.26257088780403137, "learning_rate": 8.074728517824432e-06, "loss": 0.4526, "step": 99090 }, { "epoch": 24.52970297029703, "grad_norm": 0.25551337003707886, "learning_rate": 8.06722207215651e-06, "loss": 0.4526, "step": 99100 }, { "epoch": 24.532178217821784, "grad_norm": 0.27191218733787537, "learning_rate": 8.059718811006367e-06, "loss": 0.4578, "step": 99110 }, { "epoch": 24.534653465346533, "grad_norm": 0.27214938402175903, "learning_rate": 8.052218734943795e-06, "loss": 0.4546, "step": 99120 }, { "epoch": 24.537128712871286, "grad_norm": 0.24840770661830902, "learning_rate": 8.044721844538366e-06, "loss": 0.4586, "step": 99130 }, { "epoch": 24.53960396039604, "grad_norm": 0.23361872136592865, "learning_rate": 8.037228140359432e-06, "loss": 0.4541, "step": 99140 }, { "epoch": 24.542079207920793, "grad_norm": 0.2779480814933777, "learning_rate": 8.029737622976107e-06, "loss": 0.4524, "step": 99150 }, { "epoch": 24.544554455445546, "grad_norm": 0.2582630217075348, "learning_rate": 8.022250292957223e-06, "loss": 0.455, "step": 99160 }, { "epoch": 24.547029702970296, "grad_norm": 0.2562495470046997, "learning_rate": 8.014766150871395e-06, "loss": 0.4548, "step": 99170 }, { "epoch": 24.54950495049505, "grad_norm": 0.2790917456150055, "learning_rate": 8.007285197286995e-06, "loss": 0.45, "step": 99180 }, { "epoch": 24.551980198019802, "grad_norm": 0.2321254014968872, "learning_rate": 7.999807432772172e-06, "loss": 0.4502, "step": 99190 }, { "epoch": 24.554455445544555, "grad_norm": 0.372453510761261, "learning_rate": 7.992332857894785e-06, "loss": 0.4554, "step": 99200 }, { "epoch": 24.556930693069308, "grad_norm": 0.24824118614196777, "learning_rate": 7.984861473222488e-06, "loss": 0.4515, "step": 99210 }, { "epoch": 24.55940594059406, "grad_norm": 0.23592455685138702, "learning_rate": 7.977393279322697e-06, "loss": 0.4499, "step": 99220 }, { "epoch": 24.56188118811881, "grad_norm": 0.2424858808517456, "learning_rate": 7.969928276762557e-06, "loss": 0.4562, "step": 99230 }, { "epoch": 24.564356435643564, "grad_norm": 0.2500881850719452, "learning_rate": 7.962466466108993e-06, "loss": 0.459, "step": 99240 }, { "epoch": 24.566831683168317, "grad_norm": 0.24969816207885742, "learning_rate": 7.95500784792867e-06, "loss": 0.4532, "step": 99250 }, { "epoch": 24.56930693069307, "grad_norm": 0.24482257664203644, "learning_rate": 7.94755242278804e-06, "loss": 0.4551, "step": 99260 }, { "epoch": 24.571782178217823, "grad_norm": 0.2752855718135834, "learning_rate": 7.940100191253269e-06, "loss": 0.4548, "step": 99270 }, { "epoch": 24.574257425742573, "grad_norm": 0.24733085930347443, "learning_rate": 7.93265115389033e-06, "loss": 0.4497, "step": 99280 }, { "epoch": 24.576732673267326, "grad_norm": 0.2599397897720337, "learning_rate": 7.9252053112649e-06, "loss": 0.4539, "step": 99290 }, { "epoch": 24.57920792079208, "grad_norm": 0.2543126344680786, "learning_rate": 7.917762663942463e-06, "loss": 0.4544, "step": 99300 }, { "epoch": 24.581683168316832, "grad_norm": 0.31152814626693726, "learning_rate": 7.910323212488241e-06, "loss": 0.455, "step": 99310 }, { "epoch": 24.584158415841586, "grad_norm": 0.28632333874702454, "learning_rate": 7.902886957467192e-06, "loss": 0.4545, "step": 99320 }, { "epoch": 24.586633663366335, "grad_norm": 0.2456323504447937, "learning_rate": 7.89545389944406e-06, "loss": 0.456, "step": 99330 }, { "epoch": 24.58910891089109, "grad_norm": 0.25274091958999634, "learning_rate": 7.888024038983355e-06, "loss": 0.4524, "step": 99340 }, { "epoch": 24.59158415841584, "grad_norm": 0.2347196787595749, "learning_rate": 7.880597376649306e-06, "loss": 0.4539, "step": 99350 }, { "epoch": 24.594059405940595, "grad_norm": 0.2637004554271698, "learning_rate": 7.87317391300591e-06, "loss": 0.4562, "step": 99360 }, { "epoch": 24.596534653465348, "grad_norm": 0.2388538122177124, "learning_rate": 7.865753648616946e-06, "loss": 0.4567, "step": 99370 }, { "epoch": 24.599009900990097, "grad_norm": 0.24480406939983368, "learning_rate": 7.858336584045938e-06, "loss": 0.4526, "step": 99380 }, { "epoch": 24.60148514851485, "grad_norm": 0.2509201169013977, "learning_rate": 7.850922719856163e-06, "loss": 0.4553, "step": 99390 }, { "epoch": 24.603960396039604, "grad_norm": 0.22035999596118927, "learning_rate": 7.843512056610618e-06, "loss": 0.4544, "step": 99400 }, { "epoch": 24.606435643564357, "grad_norm": 0.24770976603031158, "learning_rate": 7.836104594872146e-06, "loss": 0.4551, "step": 99410 }, { "epoch": 24.60891089108911, "grad_norm": 0.23899996280670166, "learning_rate": 7.828700335203275e-06, "loss": 0.4525, "step": 99420 }, { "epoch": 24.611386138613863, "grad_norm": 0.26585862040519714, "learning_rate": 7.821299278166294e-06, "loss": 0.4509, "step": 99430 }, { "epoch": 24.613861386138613, "grad_norm": 0.24216824769973755, "learning_rate": 7.813901424323284e-06, "loss": 0.4563, "step": 99440 }, { "epoch": 24.616336633663366, "grad_norm": 0.26122012734413147, "learning_rate": 7.806506774236061e-06, "loss": 0.4539, "step": 99450 }, { "epoch": 24.61881188118812, "grad_norm": 0.2517588436603546, "learning_rate": 7.7991153284662e-06, "loss": 0.4594, "step": 99460 }, { "epoch": 24.621287128712872, "grad_norm": 0.24997125566005707, "learning_rate": 7.791727087575013e-06, "loss": 0.4521, "step": 99470 }, { "epoch": 24.623762376237625, "grad_norm": 0.23983216285705566, "learning_rate": 7.784342052123605e-06, "loss": 0.454, "step": 99480 }, { "epoch": 24.626237623762375, "grad_norm": 0.22535674273967743, "learning_rate": 7.77696022267283e-06, "loss": 0.4512, "step": 99490 }, { "epoch": 24.628712871287128, "grad_norm": 0.2848794460296631, "learning_rate": 7.769581599783265e-06, "loss": 0.4487, "step": 99500 }, { "epoch": 24.63118811881188, "grad_norm": 0.2549726963043213, "learning_rate": 7.762206184015287e-06, "loss": 0.4526, "step": 99510 }, { "epoch": 24.633663366336634, "grad_norm": 0.2397550642490387, "learning_rate": 7.75483397592901e-06, "loss": 0.4584, "step": 99520 }, { "epoch": 24.636138613861387, "grad_norm": 0.2451825886964798, "learning_rate": 7.747464976084302e-06, "loss": 0.4545, "step": 99530 }, { "epoch": 24.638613861386137, "grad_norm": 0.22929736971855164, "learning_rate": 7.740099185040772e-06, "loss": 0.4612, "step": 99540 }, { "epoch": 24.64108910891089, "grad_norm": 0.25600624084472656, "learning_rate": 7.732736603357821e-06, "loss": 0.449, "step": 99550 }, { "epoch": 24.643564356435643, "grad_norm": 0.24442049860954285, "learning_rate": 7.725377231594594e-06, "loss": 0.4531, "step": 99560 }, { "epoch": 24.646039603960396, "grad_norm": 0.2618473470211029, "learning_rate": 7.718021070309966e-06, "loss": 0.4542, "step": 99570 }, { "epoch": 24.64851485148515, "grad_norm": 0.2352476269006729, "learning_rate": 7.71066812006262e-06, "loss": 0.4548, "step": 99580 }, { "epoch": 24.650990099009903, "grad_norm": 0.2474265992641449, "learning_rate": 7.703318381410924e-06, "loss": 0.4565, "step": 99590 }, { "epoch": 24.653465346534652, "grad_norm": 0.2633042633533478, "learning_rate": 7.69597185491307e-06, "loss": 0.4512, "step": 99600 }, { "epoch": 24.655940594059405, "grad_norm": 0.2413942962884903, "learning_rate": 7.68862854112698e-06, "loss": 0.4558, "step": 99610 }, { "epoch": 24.65841584158416, "grad_norm": 0.2545469105243683, "learning_rate": 7.681288440610308e-06, "loss": 0.4561, "step": 99620 }, { "epoch": 24.66089108910891, "grad_norm": 0.2729696035385132, "learning_rate": 7.67395155392051e-06, "loss": 0.4529, "step": 99630 }, { "epoch": 24.663366336633665, "grad_norm": 0.2554851770401001, "learning_rate": 7.666617881614757e-06, "loss": 0.4538, "step": 99640 }, { "epoch": 24.665841584158414, "grad_norm": 0.24911436438560486, "learning_rate": 7.659287424250012e-06, "loss": 0.4545, "step": 99650 }, { "epoch": 24.668316831683168, "grad_norm": 0.2359461933374405, "learning_rate": 7.651960182382944e-06, "loss": 0.4525, "step": 99660 }, { "epoch": 24.67079207920792, "grad_norm": 0.23412935435771942, "learning_rate": 7.644636156570034e-06, "loss": 0.4578, "step": 99670 }, { "epoch": 24.673267326732674, "grad_norm": 0.2321546971797943, "learning_rate": 7.637315347367497e-06, "loss": 0.4574, "step": 99680 }, { "epoch": 24.675742574257427, "grad_norm": 0.31112536787986755, "learning_rate": 7.629997755331292e-06, "loss": 0.455, "step": 99690 }, { "epoch": 24.678217821782177, "grad_norm": 0.255327045917511, "learning_rate": 7.622683381017115e-06, "loss": 0.4517, "step": 99700 }, { "epoch": 24.68069306930693, "grad_norm": 0.23733055591583252, "learning_rate": 7.615372224980488e-06, "loss": 0.4566, "step": 99710 }, { "epoch": 24.683168316831683, "grad_norm": 0.28745031356811523, "learning_rate": 7.608064287776634e-06, "loss": 0.4513, "step": 99720 }, { "epoch": 24.685643564356436, "grad_norm": 0.2688659131526947, "learning_rate": 7.60075956996052e-06, "loss": 0.4483, "step": 99730 }, { "epoch": 24.68811881188119, "grad_norm": 0.2260126918554306, "learning_rate": 7.593458072086901e-06, "loss": 0.4523, "step": 99740 }, { "epoch": 24.69059405940594, "grad_norm": 0.2541813552379608, "learning_rate": 7.5861597947103014e-06, "loss": 0.4571, "step": 99750 }, { "epoch": 24.693069306930692, "grad_norm": 0.23653212189674377, "learning_rate": 7.5788647383849555e-06, "loss": 0.4546, "step": 99760 }, { "epoch": 24.695544554455445, "grad_norm": 0.23014169931411743, "learning_rate": 7.571572903664864e-06, "loss": 0.4533, "step": 99770 }, { "epoch": 24.698019801980198, "grad_norm": 0.2462392896413803, "learning_rate": 7.564284291103813e-06, "loss": 0.46, "step": 99780 }, { "epoch": 24.70049504950495, "grad_norm": 0.22504070401191711, "learning_rate": 7.5569989012553245e-06, "loss": 0.457, "step": 99790 }, { "epoch": 24.702970297029704, "grad_norm": 0.28128916025161743, "learning_rate": 7.5497167346726626e-06, "loss": 0.4528, "step": 99800 }, { "epoch": 24.705445544554454, "grad_norm": 0.22356857359409332, "learning_rate": 7.5424377919088625e-06, "loss": 0.456, "step": 99810 }, { "epoch": 24.707920792079207, "grad_norm": 0.24509066343307495, "learning_rate": 7.535162073516733e-06, "loss": 0.4505, "step": 99820 }, { "epoch": 24.71039603960396, "grad_norm": 0.24559862911701202, "learning_rate": 7.527889580048797e-06, "loss": 0.448, "step": 99830 }, { "epoch": 24.712871287128714, "grad_norm": 0.22683586180210114, "learning_rate": 7.520620312057347e-06, "loss": 0.4467, "step": 99840 }, { "epoch": 24.715346534653467, "grad_norm": 0.21077723801136017, "learning_rate": 7.513354270094442e-06, "loss": 0.4495, "step": 99850 }, { "epoch": 24.717821782178216, "grad_norm": 0.2380923628807068, "learning_rate": 7.5060914547119055e-06, "loss": 0.453, "step": 99860 }, { "epoch": 24.72029702970297, "grad_norm": 0.22705519199371338, "learning_rate": 7.498831866461276e-06, "loss": 0.4528, "step": 99870 }, { "epoch": 24.722772277227723, "grad_norm": 0.22308886051177979, "learning_rate": 7.491575505893894e-06, "loss": 0.4546, "step": 99880 }, { "epoch": 24.725247524752476, "grad_norm": 0.24865902960300446, "learning_rate": 7.484322373560815e-06, "loss": 0.4571, "step": 99890 }, { "epoch": 24.72772277227723, "grad_norm": 0.2234695851802826, "learning_rate": 7.477072470012863e-06, "loss": 0.4543, "step": 99900 }, { "epoch": 24.730198019801982, "grad_norm": 0.2594826817512512, "learning_rate": 7.46982579580065e-06, "loss": 0.4522, "step": 99910 }, { "epoch": 24.73267326732673, "grad_norm": 0.2518175542354584, "learning_rate": 7.462582351474479e-06, "loss": 0.4563, "step": 99920 }, { "epoch": 24.735148514851485, "grad_norm": 0.24625736474990845, "learning_rate": 7.455342137584465e-06, "loss": 0.4572, "step": 99930 }, { "epoch": 24.737623762376238, "grad_norm": 0.30707162618637085, "learning_rate": 7.448105154680435e-06, "loss": 0.452, "step": 99940 }, { "epoch": 24.74009900990099, "grad_norm": 0.26814088225364685, "learning_rate": 7.44087140331201e-06, "loss": 0.4531, "step": 99950 }, { "epoch": 24.742574257425744, "grad_norm": 0.28869664669036865, "learning_rate": 7.433640884028531e-06, "loss": 0.4572, "step": 99960 }, { "epoch": 24.745049504950494, "grad_norm": 0.24286076426506042, "learning_rate": 7.426413597379106e-06, "loss": 0.4551, "step": 99970 }, { "epoch": 24.747524752475247, "grad_norm": 0.23109498620033264, "learning_rate": 7.4191895439126246e-06, "loss": 0.4556, "step": 99980 }, { "epoch": 24.75, "grad_norm": 0.23221245408058167, "learning_rate": 7.411968724177687e-06, "loss": 0.4492, "step": 99990 }, { "epoch": 24.752475247524753, "grad_norm": 0.26120322942733765, "learning_rate": 7.404751138722654e-06, "loss": 0.4547, "step": 100000 } ], "logging_steps": 10, "max_steps": 120000, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 120, "trial_name": null, "trial_params": null }