{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 245625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002035623409669211, "grad_norm": 190.0157753986322, "learning_rate": 4.071163945772097e-09, "loss": 3.5109, "step": 10 }, { "epoch": 0.0004071246819338422, "grad_norm": 185.93646252169503, "learning_rate": 8.142327891544194e-09, "loss": 3.5717, "step": 20 }, { "epoch": 0.0006106870229007634, "grad_norm": 204.70538623922909, "learning_rate": 1.221349183731629e-08, "loss": 3.9258, "step": 30 }, { "epoch": 0.0008142493638676844, "grad_norm": 194.73217242056964, "learning_rate": 1.6284655783088388e-08, "loss": 3.7759, "step": 40 }, { "epoch": 0.0010178117048346056, "grad_norm": 152.5453712584695, "learning_rate": 2.035581972886048e-08, "loss": 3.5974, "step": 50 }, { "epoch": 0.0012213740458015267, "grad_norm": 185.34416217567363, "learning_rate": 2.442698367463258e-08, "loss": 3.7904, "step": 60 }, { "epoch": 0.001424936386768448, "grad_norm": 198.50613359624123, "learning_rate": 2.8498147620404675e-08, "loss": 3.3961, "step": 70 }, { "epoch": 0.0016284987277353689, "grad_norm": 177.25705198351415, "learning_rate": 3.2569311566176775e-08, "loss": 3.1707, "step": 80 }, { "epoch": 0.00183206106870229, "grad_norm": 179.91289914959228, "learning_rate": 3.664047551194887e-08, "loss": 3.3409, "step": 90 }, { "epoch": 0.002035623409669211, "grad_norm": 253.3773185155617, "learning_rate": 4.071163945772096e-08, "loss": 3.2748, "step": 100 }, { "epoch": 0.002239185750636132, "grad_norm": 185.8051308347097, "learning_rate": 4.4782803403493056e-08, "loss": 3.2331, "step": 110 }, { "epoch": 0.0024427480916030535, "grad_norm": 165.55850262659754, "learning_rate": 4.885396734926516e-08, "loss": 2.8274, "step": 120 }, { "epoch": 0.0026463104325699744, "grad_norm": 171.9710253268187, "learning_rate": 5.292513129503726e-08, "loss": 2.8032, "step": 130 }, { "epoch": 0.002849872773536896, "grad_norm": 169.08962941899986, "learning_rate": 5.699629524080935e-08, "loss": 2.9423, "step": 140 }, { "epoch": 0.0030534351145038168, "grad_norm": 148.52280872652193, "learning_rate": 6.106745918658145e-08, "loss": 2.2736, "step": 150 }, { "epoch": 0.0032569974554707377, "grad_norm": 139.34284321296232, "learning_rate": 6.513862313235355e-08, "loss": 1.7608, "step": 160 }, { "epoch": 0.003460559796437659, "grad_norm": 153.44512312136686, "learning_rate": 6.920978707812564e-08, "loss": 1.4573, "step": 170 }, { "epoch": 0.00366412213740458, "grad_norm": 105.34746409353411, "learning_rate": 7.328095102389774e-08, "loss": 1.3563, "step": 180 }, { "epoch": 0.0038676844783715014, "grad_norm": 72.36734704127049, "learning_rate": 7.735211496966984e-08, "loss": 0.7554, "step": 190 }, { "epoch": 0.004071246819338422, "grad_norm": 57.1406477153231, "learning_rate": 8.142327891544193e-08, "loss": 0.5197, "step": 200 }, { "epoch": 0.004274809160305344, "grad_norm": 33.89165803336025, "learning_rate": 8.549444286121403e-08, "loss": 0.5513, "step": 210 }, { "epoch": 0.004478371501272264, "grad_norm": 22.236065859554593, "learning_rate": 8.956560680698611e-08, "loss": 0.3495, "step": 220 }, { "epoch": 0.004681933842239186, "grad_norm": 24.956009968299988, "learning_rate": 9.363677075275821e-08, "loss": 0.3694, "step": 230 }, { "epoch": 0.004885496183206107, "grad_norm": 32.030893624463786, "learning_rate": 9.770793469853033e-08, "loss": 0.3489, "step": 240 }, { "epoch": 0.005089058524173028, "grad_norm": 16.480117050596068, "learning_rate": 1.017790986443024e-07, "loss": 0.256, "step": 250 }, { "epoch": 0.005292620865139949, "grad_norm": 32.73346846467184, "learning_rate": 1.0585026259007451e-07, "loss": 0.3554, "step": 260 }, { "epoch": 0.00549618320610687, "grad_norm": 35.73233555887921, "learning_rate": 1.0992142653584661e-07, "loss": 0.3258, "step": 270 }, { "epoch": 0.005699745547073792, "grad_norm": 29.38736535109923, "learning_rate": 1.139925904816187e-07, "loss": 0.2721, "step": 280 }, { "epoch": 0.005903307888040712, "grad_norm": 33.465662765543996, "learning_rate": 1.180637544273908e-07, "loss": 0.2967, "step": 290 }, { "epoch": 0.0061068702290076335, "grad_norm": 30.86433198432593, "learning_rate": 1.221349183731629e-07, "loss": 0.3164, "step": 300 }, { "epoch": 0.006310432569974555, "grad_norm": 11.307721790308358, "learning_rate": 1.2620608231893497e-07, "loss": 0.2778, "step": 310 }, { "epoch": 0.006513994910941475, "grad_norm": 10.741612536865372, "learning_rate": 1.302772462647071e-07, "loss": 0.3136, "step": 320 }, { "epoch": 0.006717557251908397, "grad_norm": 31.23028383019122, "learning_rate": 1.343484102104792e-07, "loss": 0.2118, "step": 330 }, { "epoch": 0.006921119592875318, "grad_norm": 46.08755030852673, "learning_rate": 1.3841957415625128e-07, "loss": 0.2933, "step": 340 }, { "epoch": 0.0071246819338422395, "grad_norm": 28.469250337951546, "learning_rate": 1.4249073810202338e-07, "loss": 0.2391, "step": 350 }, { "epoch": 0.00732824427480916, "grad_norm": 20.561839827880277, "learning_rate": 1.4656190204779548e-07, "loss": 0.2654, "step": 360 }, { "epoch": 0.007531806615776081, "grad_norm": 64.98528985190775, "learning_rate": 1.5063306599356758e-07, "loss": 0.3535, "step": 370 }, { "epoch": 0.007735368956743003, "grad_norm": 22.029183505854977, "learning_rate": 1.5470422993933968e-07, "loss": 0.3116, "step": 380 }, { "epoch": 0.007938931297709924, "grad_norm": 31.141214718519127, "learning_rate": 1.5877539388511175e-07, "loss": 0.2094, "step": 390 }, { "epoch": 0.008142493638676845, "grad_norm": 36.054864652762525, "learning_rate": 1.6284655783088385e-07, "loss": 0.2762, "step": 400 }, { "epoch": 0.008346055979643765, "grad_norm": 20.05751131734724, "learning_rate": 1.6691772177665595e-07, "loss": 0.3885, "step": 410 }, { "epoch": 0.008549618320610687, "grad_norm": 65.13607461341003, "learning_rate": 1.7098888572242805e-07, "loss": 0.3907, "step": 420 }, { "epoch": 0.008753180661577608, "grad_norm": 27.00480548556214, "learning_rate": 1.7506004966820018e-07, "loss": 0.2526, "step": 430 }, { "epoch": 0.008956743002544528, "grad_norm": 23.47585581904029, "learning_rate": 1.7913121361397223e-07, "loss": 0.3029, "step": 440 }, { "epoch": 0.00916030534351145, "grad_norm": 36.96187543345107, "learning_rate": 1.8320237755974433e-07, "loss": 0.2711, "step": 450 }, { "epoch": 0.009363867684478371, "grad_norm": 14.117719536939104, "learning_rate": 1.8727354150551643e-07, "loss": 0.1834, "step": 460 }, { "epoch": 0.009567430025445293, "grad_norm": 36.783287699995945, "learning_rate": 1.9134470545128855e-07, "loss": 0.3216, "step": 470 }, { "epoch": 0.009770992366412214, "grad_norm": 16.86087746037254, "learning_rate": 1.9541586939706065e-07, "loss": 0.1956, "step": 480 }, { "epoch": 0.009974554707379134, "grad_norm": 52.014464271761476, "learning_rate": 1.9948703334283275e-07, "loss": 0.2808, "step": 490 }, { "epoch": 0.010178117048346057, "grad_norm": 41.47789729619163, "learning_rate": 2.035581972886048e-07, "loss": 0.2846, "step": 500 }, { "epoch": 0.010381679389312977, "grad_norm": 30.34190124558441, "learning_rate": 2.0762936123437693e-07, "loss": 0.3009, "step": 510 }, { "epoch": 0.010585241730279898, "grad_norm": 43.331831419098016, "learning_rate": 2.1170052518014903e-07, "loss": 0.2405, "step": 520 }, { "epoch": 0.01078880407124682, "grad_norm": 35.90838499900882, "learning_rate": 2.1577168912592113e-07, "loss": 0.155, "step": 530 }, { "epoch": 0.01099236641221374, "grad_norm": 18.416429553787122, "learning_rate": 2.1984285307169323e-07, "loss": 0.1798, "step": 540 }, { "epoch": 0.011195928753180661, "grad_norm": 34.21548711845646, "learning_rate": 2.2391401701746533e-07, "loss": 0.2464, "step": 550 }, { "epoch": 0.011399491094147583, "grad_norm": 45.035955571778956, "learning_rate": 2.279851809632374e-07, "loss": 0.2394, "step": 560 }, { "epoch": 0.011603053435114504, "grad_norm": 45.895003856751295, "learning_rate": 2.320563449090095e-07, "loss": 0.2908, "step": 570 }, { "epoch": 0.011806615776081424, "grad_norm": 53.6641620919155, "learning_rate": 2.361275088547816e-07, "loss": 0.2811, "step": 580 }, { "epoch": 0.012010178117048347, "grad_norm": 23.017817895427015, "learning_rate": 2.401986728005537e-07, "loss": 0.1856, "step": 590 }, { "epoch": 0.012213740458015267, "grad_norm": 22.591461460278513, "learning_rate": 2.442698367463258e-07, "loss": 0.2217, "step": 600 }, { "epoch": 0.012417302798982188, "grad_norm": 23.98242525424637, "learning_rate": 2.483410006920979e-07, "loss": 0.2571, "step": 610 }, { "epoch": 0.01262086513994911, "grad_norm": 42.40588759212475, "learning_rate": 2.5241216463786995e-07, "loss": 0.287, "step": 620 }, { "epoch": 0.01282442748091603, "grad_norm": 22.18237626100966, "learning_rate": 2.5648332858364205e-07, "loss": 0.2208, "step": 630 }, { "epoch": 0.01302798982188295, "grad_norm": 14.362687610621256, "learning_rate": 2.605544925294142e-07, "loss": 0.1965, "step": 640 }, { "epoch": 0.013231552162849873, "grad_norm": 43.65115214474868, "learning_rate": 2.646256564751863e-07, "loss": 0.2847, "step": 650 }, { "epoch": 0.013435114503816794, "grad_norm": 54.68692931228039, "learning_rate": 2.686968204209584e-07, "loss": 0.2, "step": 660 }, { "epoch": 0.013638676844783716, "grad_norm": 44.086044174113184, "learning_rate": 2.727679843667305e-07, "loss": 0.2868, "step": 670 }, { "epoch": 0.013842239185750636, "grad_norm": 56.276928073260144, "learning_rate": 2.7683914831250255e-07, "loss": 0.3394, "step": 680 }, { "epoch": 0.014045801526717557, "grad_norm": 24.330761149184298, "learning_rate": 2.8091031225827465e-07, "loss": 0.2443, "step": 690 }, { "epoch": 0.014249363867684479, "grad_norm": 23.949136818317946, "learning_rate": 2.8498147620404675e-07, "loss": 0.1371, "step": 700 }, { "epoch": 0.0144529262086514, "grad_norm": 29.614752591661315, "learning_rate": 2.8905264014981885e-07, "loss": 0.2155, "step": 710 }, { "epoch": 0.01465648854961832, "grad_norm": 44.905463609867404, "learning_rate": 2.9312380409559095e-07, "loss": 0.1865, "step": 720 }, { "epoch": 0.014860050890585242, "grad_norm": 11.687314311025649, "learning_rate": 2.9719496804136305e-07, "loss": 0.2441, "step": 730 }, { "epoch": 0.015063613231552163, "grad_norm": 52.12532481915468, "learning_rate": 3.0126613198713515e-07, "loss": 0.2016, "step": 740 }, { "epoch": 0.015267175572519083, "grad_norm": 25.194001130485592, "learning_rate": 3.0533729593290725e-07, "loss": 0.3253, "step": 750 }, { "epoch": 0.015470737913486006, "grad_norm": 13.059092197754019, "learning_rate": 3.0940845987867935e-07, "loss": 0.4053, "step": 760 }, { "epoch": 0.015674300254452926, "grad_norm": 47.48382722251534, "learning_rate": 3.134796238244514e-07, "loss": 0.2919, "step": 770 }, { "epoch": 0.01587786259541985, "grad_norm": 34.93950777487901, "learning_rate": 3.175507877702235e-07, "loss": 0.221, "step": 780 }, { "epoch": 0.016081424936386767, "grad_norm": 25.422591063339198, "learning_rate": 3.216219517159956e-07, "loss": 0.2118, "step": 790 }, { "epoch": 0.01628498727735369, "grad_norm": 40.868064947041425, "learning_rate": 3.256931156617677e-07, "loss": 0.2008, "step": 800 }, { "epoch": 0.01648854961832061, "grad_norm": 32.13130014874036, "learning_rate": 3.297642796075398e-07, "loss": 0.2498, "step": 810 }, { "epoch": 0.01669211195928753, "grad_norm": 48.279752795249465, "learning_rate": 3.338354435533119e-07, "loss": 0.244, "step": 820 }, { "epoch": 0.016895674300254453, "grad_norm": 55.24271207687501, "learning_rate": 3.37906607499084e-07, "loss": 0.2584, "step": 830 }, { "epoch": 0.017099236641221375, "grad_norm": 23.314197828383335, "learning_rate": 3.419777714448561e-07, "loss": 0.2588, "step": 840 }, { "epoch": 0.017302798982188294, "grad_norm": 28.279381655693467, "learning_rate": 3.460489353906282e-07, "loss": 0.2185, "step": 850 }, { "epoch": 0.017506361323155216, "grad_norm": 59.57766902280486, "learning_rate": 3.5012009933640036e-07, "loss": 0.3916, "step": 860 }, { "epoch": 0.017709923664122138, "grad_norm": 49.3423935933153, "learning_rate": 3.5419126328217246e-07, "loss": 0.2614, "step": 870 }, { "epoch": 0.017913486005089057, "grad_norm": 23.916951255415423, "learning_rate": 3.5826242722794445e-07, "loss": 0.1979, "step": 880 }, { "epoch": 0.01811704834605598, "grad_norm": 25.87025216388524, "learning_rate": 3.6233359117371655e-07, "loss": 0.2793, "step": 890 }, { "epoch": 0.0183206106870229, "grad_norm": 16.630356458791365, "learning_rate": 3.6640475511948865e-07, "loss": 0.2567, "step": 900 }, { "epoch": 0.01852417302798982, "grad_norm": 25.722883877809164, "learning_rate": 3.7047591906526075e-07, "loss": 0.1939, "step": 910 }, { "epoch": 0.018727735368956742, "grad_norm": 11.568125973781871, "learning_rate": 3.7454708301103285e-07, "loss": 0.2239, "step": 920 }, { "epoch": 0.018931297709923665, "grad_norm": 35.04707352120332, "learning_rate": 3.7861824695680495e-07, "loss": 0.2101, "step": 930 }, { "epoch": 0.019134860050890587, "grad_norm": 36.449279730305754, "learning_rate": 3.826894109025771e-07, "loss": 0.324, "step": 940 }, { "epoch": 0.019338422391857506, "grad_norm": 37.240907208406625, "learning_rate": 3.867605748483492e-07, "loss": 0.3205, "step": 950 }, { "epoch": 0.019541984732824428, "grad_norm": 23.589919960613106, "learning_rate": 3.908317387941213e-07, "loss": 0.2299, "step": 960 }, { "epoch": 0.01974554707379135, "grad_norm": 37.50436901021628, "learning_rate": 3.949029027398934e-07, "loss": 0.2963, "step": 970 }, { "epoch": 0.01994910941475827, "grad_norm": 11.612736147441511, "learning_rate": 3.989740666856655e-07, "loss": 0.1927, "step": 980 }, { "epoch": 0.02015267175572519, "grad_norm": 18.590511598419955, "learning_rate": 4.030452306314376e-07, "loss": 0.2316, "step": 990 }, { "epoch": 0.020356234096692113, "grad_norm": 31.877029028866925, "learning_rate": 4.071163945772096e-07, "loss": 0.1831, "step": 1000 }, { "epoch": 0.020559796437659032, "grad_norm": 4.588371631852851, "learning_rate": 4.1118755852298175e-07, "loss": 0.2231, "step": 1010 }, { "epoch": 0.020763358778625954, "grad_norm": 31.446938379833448, "learning_rate": 4.1525872246875385e-07, "loss": 0.248, "step": 1020 }, { "epoch": 0.020966921119592877, "grad_norm": 41.85416259178439, "learning_rate": 4.1932988641452595e-07, "loss": 0.2043, "step": 1030 }, { "epoch": 0.021170483460559796, "grad_norm": 63.233479979145045, "learning_rate": 4.2340105036029805e-07, "loss": 0.256, "step": 1040 }, { "epoch": 0.021374045801526718, "grad_norm": 35.52175014975416, "learning_rate": 4.2747221430607015e-07, "loss": 0.2934, "step": 1050 }, { "epoch": 0.02157760814249364, "grad_norm": 24.51678456772389, "learning_rate": 4.3154337825184225e-07, "loss": 0.2443, "step": 1060 }, { "epoch": 0.02178117048346056, "grad_norm": 49.82659670743485, "learning_rate": 4.3561454219761435e-07, "loss": 0.1817, "step": 1070 }, { "epoch": 0.02198473282442748, "grad_norm": 29.834400495745996, "learning_rate": 4.3968570614338645e-07, "loss": 0.201, "step": 1080 }, { "epoch": 0.022188295165394403, "grad_norm": 20.891874052906818, "learning_rate": 4.4375687008915856e-07, "loss": 0.1768, "step": 1090 }, { "epoch": 0.022391857506361322, "grad_norm": 21.380542449151594, "learning_rate": 4.4782803403493066e-07, "loss": 0.2944, "step": 1100 }, { "epoch": 0.022595419847328244, "grad_norm": 13.93353881761503, "learning_rate": 4.5189919798070276e-07, "loss": 0.2211, "step": 1110 }, { "epoch": 0.022798982188295167, "grad_norm": 39.125803825638656, "learning_rate": 4.559703619264748e-07, "loss": 0.3192, "step": 1120 }, { "epoch": 0.023002544529262085, "grad_norm": 28.93263354552015, "learning_rate": 4.600415258722469e-07, "loss": 0.2404, "step": 1130 }, { "epoch": 0.023206106870229008, "grad_norm": 15.620849203628062, "learning_rate": 4.64112689818019e-07, "loss": 0.2614, "step": 1140 }, { "epoch": 0.02340966921119593, "grad_norm": 20.663926939018495, "learning_rate": 4.681838537637911e-07, "loss": 0.2376, "step": 1150 }, { "epoch": 0.02361323155216285, "grad_norm": 72.84424687555304, "learning_rate": 4.722550177095632e-07, "loss": 0.3722, "step": 1160 }, { "epoch": 0.02381679389312977, "grad_norm": 16.59605703241561, "learning_rate": 4.763261816553353e-07, "loss": 0.2321, "step": 1170 }, { "epoch": 0.024020356234096693, "grad_norm": 31.73208132665829, "learning_rate": 4.803973456011074e-07, "loss": 0.2342, "step": 1180 }, { "epoch": 0.024223918575063612, "grad_norm": 10.946855791768417, "learning_rate": 4.844685095468795e-07, "loss": 0.1737, "step": 1190 }, { "epoch": 0.024427480916030534, "grad_norm": 61.25176749507701, "learning_rate": 4.885396734926516e-07, "loss": 0.2155, "step": 1200 }, { "epoch": 0.024631043256997456, "grad_norm": 8.188690984569861, "learning_rate": 4.926108374384237e-07, "loss": 0.1557, "step": 1210 }, { "epoch": 0.024834605597964375, "grad_norm": 42.65110861248474, "learning_rate": 4.966820013841958e-07, "loss": 0.2681, "step": 1220 }, { "epoch": 0.025038167938931297, "grad_norm": 26.620372815592564, "learning_rate": 5.007531653299678e-07, "loss": 0.2893, "step": 1230 }, { "epoch": 0.02524173027989822, "grad_norm": 35.49757787391337, "learning_rate": 5.048243292757399e-07, "loss": 0.2229, "step": 1240 }, { "epoch": 0.02544529262086514, "grad_norm": 30.245005576653504, "learning_rate": 5.08895493221512e-07, "loss": 0.2079, "step": 1250 }, { "epoch": 0.02564885496183206, "grad_norm": 69.09735520921434, "learning_rate": 5.129666571672841e-07, "loss": 0.2686, "step": 1260 }, { "epoch": 0.025852417302798983, "grad_norm": 0.06342663353004313, "learning_rate": 5.170378211130563e-07, "loss": 0.2166, "step": 1270 }, { "epoch": 0.0260559796437659, "grad_norm": 32.343199673654766, "learning_rate": 5.211089850588284e-07, "loss": 0.3097, "step": 1280 }, { "epoch": 0.026259541984732824, "grad_norm": 20.308161660170136, "learning_rate": 5.251801490046005e-07, "loss": 0.1795, "step": 1290 }, { "epoch": 0.026463104325699746, "grad_norm": 7.408602299027068, "learning_rate": 5.292513129503726e-07, "loss": 0.2395, "step": 1300 }, { "epoch": 0.02666666666666667, "grad_norm": 30.175808910001837, "learning_rate": 5.333224768961447e-07, "loss": 0.2203, "step": 1310 }, { "epoch": 0.026870229007633587, "grad_norm": 11.654018090631208, "learning_rate": 5.373936408419168e-07, "loss": 0.2246, "step": 1320 }, { "epoch": 0.02707379134860051, "grad_norm": 10.253052749559158, "learning_rate": 5.414648047876889e-07, "loss": 0.2823, "step": 1330 }, { "epoch": 0.02727735368956743, "grad_norm": 35.88146305162712, "learning_rate": 5.45535968733461e-07, "loss": 0.2484, "step": 1340 }, { "epoch": 0.02748091603053435, "grad_norm": 42.33452012217831, "learning_rate": 5.49607132679233e-07, "loss": 0.2202, "step": 1350 }, { "epoch": 0.027684478371501273, "grad_norm": 23.816939834910254, "learning_rate": 5.536782966250051e-07, "loss": 0.185, "step": 1360 }, { "epoch": 0.027888040712468195, "grad_norm": 24.164596140843702, "learning_rate": 5.577494605707772e-07, "loss": 0.2899, "step": 1370 }, { "epoch": 0.028091603053435114, "grad_norm": 51.159054251466465, "learning_rate": 5.618206245165493e-07, "loss": 0.1805, "step": 1380 }, { "epoch": 0.028295165394402036, "grad_norm": 31.569942269970884, "learning_rate": 5.658917884623214e-07, "loss": 0.368, "step": 1390 }, { "epoch": 0.028498727735368958, "grad_norm": 37.35874758317211, "learning_rate": 5.699629524080935e-07, "loss": 0.2198, "step": 1400 }, { "epoch": 0.028702290076335877, "grad_norm": 57.88096476979791, "learning_rate": 5.740341163538656e-07, "loss": 0.2374, "step": 1410 }, { "epoch": 0.0289058524173028, "grad_norm": 15.305241601666875, "learning_rate": 5.781052802996377e-07, "loss": 0.2134, "step": 1420 }, { "epoch": 0.02910941475826972, "grad_norm": 30.899094509529082, "learning_rate": 5.821764442454098e-07, "loss": 0.209, "step": 1430 }, { "epoch": 0.02931297709923664, "grad_norm": 18.83131154717868, "learning_rate": 5.862476081911819e-07, "loss": 0.1638, "step": 1440 }, { "epoch": 0.029516539440203562, "grad_norm": 34.467746362323844, "learning_rate": 5.90318772136954e-07, "loss": 0.23, "step": 1450 }, { "epoch": 0.029720101781170485, "grad_norm": 15.452544975099453, "learning_rate": 5.943899360827261e-07, "loss": 0.1926, "step": 1460 }, { "epoch": 0.029923664122137403, "grad_norm": 42.05206620409973, "learning_rate": 5.984611000284982e-07, "loss": 0.2602, "step": 1470 }, { "epoch": 0.030127226463104326, "grad_norm": 38.75958304613648, "learning_rate": 6.025322639742703e-07, "loss": 0.2798, "step": 1480 }, { "epoch": 0.030330788804071248, "grad_norm": 21.844683104050656, "learning_rate": 6.066034279200424e-07, "loss": 0.2621, "step": 1490 }, { "epoch": 0.030534351145038167, "grad_norm": 40.36942411726007, "learning_rate": 6.106745918658145e-07, "loss": 0.1311, "step": 1500 }, { "epoch": 0.03073791348600509, "grad_norm": 34.11538619826671, "learning_rate": 6.147457558115866e-07, "loss": 0.2009, "step": 1510 }, { "epoch": 0.03094147582697201, "grad_norm": 67.40042365167002, "learning_rate": 6.188169197573587e-07, "loss": 0.1553, "step": 1520 }, { "epoch": 0.03114503816793893, "grad_norm": 10.61590436380032, "learning_rate": 6.228880837031308e-07, "loss": 0.1552, "step": 1530 }, { "epoch": 0.03134860050890585, "grad_norm": 53.49886496335603, "learning_rate": 6.269592476489028e-07, "loss": 0.2794, "step": 1540 }, { "epoch": 0.03155216284987277, "grad_norm": 61.803966247713504, "learning_rate": 6.31030411594675e-07, "loss": 0.3193, "step": 1550 }, { "epoch": 0.0317557251908397, "grad_norm": 33.975372671768525, "learning_rate": 6.35101575540447e-07, "loss": 0.2031, "step": 1560 }, { "epoch": 0.031959287531806616, "grad_norm": 33.416567994831496, "learning_rate": 6.391727394862192e-07, "loss": 0.1551, "step": 1570 }, { "epoch": 0.032162849872773534, "grad_norm": 31.19121374770952, "learning_rate": 6.432439034319912e-07, "loss": 0.2478, "step": 1580 }, { "epoch": 0.03236641221374046, "grad_norm": 54.27229113600801, "learning_rate": 6.473150673777634e-07, "loss": 0.2088, "step": 1590 }, { "epoch": 0.03256997455470738, "grad_norm": 17.967070458190133, "learning_rate": 6.513862313235354e-07, "loss": 0.2542, "step": 1600 }, { "epoch": 0.0327735368956743, "grad_norm": 15.910812692481837, "learning_rate": 6.554573952693076e-07, "loss": 0.3401, "step": 1610 }, { "epoch": 0.03297709923664122, "grad_norm": 21.81078984878033, "learning_rate": 6.595285592150796e-07, "loss": 0.2489, "step": 1620 }, { "epoch": 0.03318066157760814, "grad_norm": 16.52106560041347, "learning_rate": 6.635997231608518e-07, "loss": 0.195, "step": 1630 }, { "epoch": 0.03338422391857506, "grad_norm": 0.69021174724638, "learning_rate": 6.676708871066238e-07, "loss": 0.2776, "step": 1640 }, { "epoch": 0.03358778625954199, "grad_norm": 28.02008629084781, "learning_rate": 6.717420510523959e-07, "loss": 0.2089, "step": 1650 }, { "epoch": 0.033791348600508905, "grad_norm": 12.5497823333358, "learning_rate": 6.75813214998168e-07, "loss": 0.1823, "step": 1660 }, { "epoch": 0.033994910941475824, "grad_norm": 5.140339927245315, "learning_rate": 6.798843789439401e-07, "loss": 0.2153, "step": 1670 }, { "epoch": 0.03419847328244275, "grad_norm": 47.438391998123244, "learning_rate": 6.839555428897122e-07, "loss": 0.2907, "step": 1680 }, { "epoch": 0.03440203562340967, "grad_norm": 14.56824226140893, "learning_rate": 6.880267068354843e-07, "loss": 0.1263, "step": 1690 }, { "epoch": 0.03460559796437659, "grad_norm": 24.723851946236007, "learning_rate": 6.920978707812564e-07, "loss": 0.2353, "step": 1700 }, { "epoch": 0.03480916030534351, "grad_norm": 15.51091669272874, "learning_rate": 6.961690347270285e-07, "loss": 0.2501, "step": 1710 }, { "epoch": 0.03501272264631043, "grad_norm": 50.82105761595492, "learning_rate": 7.002401986728007e-07, "loss": 0.2745, "step": 1720 }, { "epoch": 0.03521628498727735, "grad_norm": 3.3228615949694555, "learning_rate": 7.043113626185727e-07, "loss": 0.1653, "step": 1730 }, { "epoch": 0.035419847328244276, "grad_norm": 20.523314429292338, "learning_rate": 7.083825265643449e-07, "loss": 0.2804, "step": 1740 }, { "epoch": 0.035623409669211195, "grad_norm": 19.582912577603324, "learning_rate": 7.124536905101169e-07, "loss": 0.1466, "step": 1750 }, { "epoch": 0.035826972010178114, "grad_norm": 58.77780220659113, "learning_rate": 7.165248544558889e-07, "loss": 0.2301, "step": 1760 }, { "epoch": 0.03603053435114504, "grad_norm": 18.1361808590394, "learning_rate": 7.205960184016611e-07, "loss": 0.2295, "step": 1770 }, { "epoch": 0.03623409669211196, "grad_norm": 21.13837719619139, "learning_rate": 7.246671823474331e-07, "loss": 0.2546, "step": 1780 }, { "epoch": 0.03643765903307888, "grad_norm": 44.07123919945341, "learning_rate": 7.287383462932053e-07, "loss": 0.3489, "step": 1790 }, { "epoch": 0.0366412213740458, "grad_norm": 9.31302518393176, "learning_rate": 7.328095102389773e-07, "loss": 0.2061, "step": 1800 }, { "epoch": 0.03684478371501272, "grad_norm": 19.50565042627264, "learning_rate": 7.368806741847495e-07, "loss": 0.3023, "step": 1810 }, { "epoch": 0.03704834605597964, "grad_norm": 35.00734161860886, "learning_rate": 7.409518381305215e-07, "loss": 0.2682, "step": 1820 }, { "epoch": 0.037251908396946566, "grad_norm": 45.032499639699715, "learning_rate": 7.450230020762937e-07, "loss": 0.2441, "step": 1830 }, { "epoch": 0.037455470737913485, "grad_norm": 15.55587631035051, "learning_rate": 7.490941660220657e-07, "loss": 0.1167, "step": 1840 }, { "epoch": 0.037659033078880404, "grad_norm": 12.207618078581165, "learning_rate": 7.531653299678379e-07, "loss": 0.3691, "step": 1850 }, { "epoch": 0.03786259541984733, "grad_norm": 31.327303969594162, "learning_rate": 7.572364939136099e-07, "loss": 0.2431, "step": 1860 }, { "epoch": 0.03806615776081425, "grad_norm": 11.966867757678422, "learning_rate": 7.613076578593821e-07, "loss": 0.1449, "step": 1870 }, { "epoch": 0.038269720101781174, "grad_norm": 35.76321749412805, "learning_rate": 7.653788218051542e-07, "loss": 0.25, "step": 1880 }, { "epoch": 0.03847328244274809, "grad_norm": 36.660268217627916, "learning_rate": 7.694499857509262e-07, "loss": 0.1879, "step": 1890 }, { "epoch": 0.03867684478371501, "grad_norm": 19.067415469878245, "learning_rate": 7.735211496966984e-07, "loss": 0.3155, "step": 1900 }, { "epoch": 0.03888040712468194, "grad_norm": 25.296141360040973, "learning_rate": 7.775923136424704e-07, "loss": 0.2381, "step": 1910 }, { "epoch": 0.039083969465648856, "grad_norm": 20.7693921129373, "learning_rate": 7.816634775882426e-07, "loss": 0.1358, "step": 1920 }, { "epoch": 0.039287531806615775, "grad_norm": 48.587749048066954, "learning_rate": 7.857346415340146e-07, "loss": 0.3406, "step": 1930 }, { "epoch": 0.0394910941475827, "grad_norm": 14.020234098395614, "learning_rate": 7.898058054797868e-07, "loss": 0.2673, "step": 1940 }, { "epoch": 0.03969465648854962, "grad_norm": 22.36671781344876, "learning_rate": 7.938769694255588e-07, "loss": 0.2326, "step": 1950 }, { "epoch": 0.03989821882951654, "grad_norm": 19.884419467604676, "learning_rate": 7.97948133371331e-07, "loss": 0.2478, "step": 1960 }, { "epoch": 0.040101781170483464, "grad_norm": 30.923192314330667, "learning_rate": 8.02019297317103e-07, "loss": 0.2351, "step": 1970 }, { "epoch": 0.04030534351145038, "grad_norm": 26.532371606293605, "learning_rate": 8.060904612628752e-07, "loss": 0.1456, "step": 1980 }, { "epoch": 0.0405089058524173, "grad_norm": 31.12540955299195, "learning_rate": 8.101616252086472e-07, "loss": 0.1976, "step": 1990 }, { "epoch": 0.04071246819338423, "grad_norm": 30.21196156887797, "learning_rate": 8.142327891544192e-07, "loss": 0.2341, "step": 2000 }, { "epoch": 0.040916030534351146, "grad_norm": 26.27677911476743, "learning_rate": 8.183039531001914e-07, "loss": 0.3063, "step": 2010 }, { "epoch": 0.041119592875318065, "grad_norm": 20.782899060715636, "learning_rate": 8.223751170459635e-07, "loss": 0.3691, "step": 2020 }, { "epoch": 0.04132315521628499, "grad_norm": 17.932625949374874, "learning_rate": 8.264462809917356e-07, "loss": 0.1988, "step": 2030 }, { "epoch": 0.04152671755725191, "grad_norm": 28.208487050551568, "learning_rate": 8.305174449375077e-07, "loss": 0.1666, "step": 2040 }, { "epoch": 0.04173027989821883, "grad_norm": 33.60227381722497, "learning_rate": 8.345886088832798e-07, "loss": 0.205, "step": 2050 }, { "epoch": 0.041933842239185753, "grad_norm": 59.489986371873584, "learning_rate": 8.386597728290519e-07, "loss": 0.2096, "step": 2060 }, { "epoch": 0.04213740458015267, "grad_norm": 38.708909205456195, "learning_rate": 8.42730936774824e-07, "loss": 0.2957, "step": 2070 }, { "epoch": 0.04234096692111959, "grad_norm": 20.085195440459945, "learning_rate": 8.468021007205961e-07, "loss": 0.2859, "step": 2080 }, { "epoch": 0.04254452926208652, "grad_norm": 34.0335543677694, "learning_rate": 8.508732646663682e-07, "loss": 0.1958, "step": 2090 }, { "epoch": 0.042748091603053436, "grad_norm": 20.86200977228214, "learning_rate": 8.549444286121403e-07, "loss": 0.3115, "step": 2100 }, { "epoch": 0.042951653944020354, "grad_norm": 31.73425954731872, "learning_rate": 8.590155925579123e-07, "loss": 0.217, "step": 2110 }, { "epoch": 0.04315521628498728, "grad_norm": 23.580161200693592, "learning_rate": 8.630867565036845e-07, "loss": 0.2493, "step": 2120 }, { "epoch": 0.0433587786259542, "grad_norm": 78.8807371606334, "learning_rate": 8.671579204494565e-07, "loss": 0.237, "step": 2130 }, { "epoch": 0.04356234096692112, "grad_norm": 13.575065744903101, "learning_rate": 8.712290843952287e-07, "loss": 0.1964, "step": 2140 }, { "epoch": 0.04376590330788804, "grad_norm": 32.407727282292136, "learning_rate": 8.753002483410007e-07, "loss": 0.1385, "step": 2150 }, { "epoch": 0.04396946564885496, "grad_norm": 43.17392728180007, "learning_rate": 8.793714122867729e-07, "loss": 0.3732, "step": 2160 }, { "epoch": 0.04417302798982188, "grad_norm": 83.28278296219855, "learning_rate": 8.834425762325449e-07, "loss": 0.2862, "step": 2170 }, { "epoch": 0.04437659033078881, "grad_norm": 11.535101187071634, "learning_rate": 8.875137401783171e-07, "loss": 0.164, "step": 2180 }, { "epoch": 0.044580152671755725, "grad_norm": 10.305006058390964, "learning_rate": 8.915849041240891e-07, "loss": 0.162, "step": 2190 }, { "epoch": 0.044783715012722644, "grad_norm": 33.796420982715006, "learning_rate": 8.956560680698613e-07, "loss": 0.1777, "step": 2200 }, { "epoch": 0.04498727735368957, "grad_norm": 2.189488756914724, "learning_rate": 8.997272320156333e-07, "loss": 0.2268, "step": 2210 }, { "epoch": 0.04519083969465649, "grad_norm": 20.31669610746357, "learning_rate": 9.037983959614055e-07, "loss": 0.2301, "step": 2220 }, { "epoch": 0.04539440203562341, "grad_norm": 16.79337619314055, "learning_rate": 9.078695599071775e-07, "loss": 0.1469, "step": 2230 }, { "epoch": 0.04559796437659033, "grad_norm": 29.586438821524897, "learning_rate": 9.119407238529496e-07, "loss": 0.2994, "step": 2240 }, { "epoch": 0.04580152671755725, "grad_norm": 20.070861376916625, "learning_rate": 9.160118877987217e-07, "loss": 0.267, "step": 2250 }, { "epoch": 0.04600508905852417, "grad_norm": 41.75579194561241, "learning_rate": 9.200830517444938e-07, "loss": 0.2897, "step": 2260 }, { "epoch": 0.046208651399491096, "grad_norm": 28.244419095341275, "learning_rate": 9.241542156902659e-07, "loss": 0.2792, "step": 2270 }, { "epoch": 0.046412213740458015, "grad_norm": 24.8395635407895, "learning_rate": 9.28225379636038e-07, "loss": 0.2507, "step": 2280 }, { "epoch": 0.046615776081424934, "grad_norm": 55.662521012216374, "learning_rate": 9.322965435818101e-07, "loss": 0.2372, "step": 2290 }, { "epoch": 0.04681933842239186, "grad_norm": 22.40678579884331, "learning_rate": 9.363677075275822e-07, "loss": 0.1766, "step": 2300 }, { "epoch": 0.04702290076335878, "grad_norm": 13.501762031564267, "learning_rate": 9.404388714733543e-07, "loss": 0.2154, "step": 2310 }, { "epoch": 0.0472264631043257, "grad_norm": 17.19766085283428, "learning_rate": 9.445100354191264e-07, "loss": 0.2873, "step": 2320 }, { "epoch": 0.04743002544529262, "grad_norm": 25.288567670097358, "learning_rate": 9.485811993648986e-07, "loss": 0.1828, "step": 2330 }, { "epoch": 0.04763358778625954, "grad_norm": 11.088709606066583, "learning_rate": 9.526523633106706e-07, "loss": 0.2417, "step": 2340 }, { "epoch": 0.04783715012722646, "grad_norm": 13.984648947726102, "learning_rate": 9.567235272564426e-07, "loss": 0.1338, "step": 2350 }, { "epoch": 0.048040712468193386, "grad_norm": 29.56661609264282, "learning_rate": 9.607946912022148e-07, "loss": 0.2461, "step": 2360 }, { "epoch": 0.048244274809160305, "grad_norm": 121.59986027236017, "learning_rate": 9.648658551479868e-07, "loss": 0.2425, "step": 2370 }, { "epoch": 0.048447837150127224, "grad_norm": 24.32185095321848, "learning_rate": 9.68937019093759e-07, "loss": 0.2459, "step": 2380 }, { "epoch": 0.04865139949109415, "grad_norm": 5.114644852360373, "learning_rate": 9.73008183039531e-07, "loss": 0.2369, "step": 2390 }, { "epoch": 0.04885496183206107, "grad_norm": 46.68333316016171, "learning_rate": 9.770793469853032e-07, "loss": 0.2678, "step": 2400 }, { "epoch": 0.04905852417302799, "grad_norm": 57.16385985422543, "learning_rate": 9.811505109310752e-07, "loss": 0.2776, "step": 2410 }, { "epoch": 0.04926208651399491, "grad_norm": 28.83671374766842, "learning_rate": 9.852216748768474e-07, "loss": 0.3201, "step": 2420 }, { "epoch": 0.04946564885496183, "grad_norm": 11.917633126049852, "learning_rate": 9.892928388226194e-07, "loss": 0.2435, "step": 2430 }, { "epoch": 0.04966921119592875, "grad_norm": 27.303670794417098, "learning_rate": 9.933640027683916e-07, "loss": 0.2097, "step": 2440 }, { "epoch": 0.049872773536895676, "grad_norm": 14.56462501068326, "learning_rate": 9.974351667141636e-07, "loss": 0.1891, "step": 2450 }, { "epoch": 0.050076335877862595, "grad_norm": 2.257512349381676, "learning_rate": 1.0015063306599356e-06, "loss": 0.2028, "step": 2460 }, { "epoch": 0.050279898218829513, "grad_norm": 21.72131242837976, "learning_rate": 1.0055774946057078e-06, "loss": 0.2653, "step": 2470 }, { "epoch": 0.05048346055979644, "grad_norm": 2.0547994851978477, "learning_rate": 1.0096486585514798e-06, "loss": 0.2616, "step": 2480 }, { "epoch": 0.05068702290076336, "grad_norm": 19.762123229240387, "learning_rate": 1.013719822497252e-06, "loss": 0.2029, "step": 2490 }, { "epoch": 0.05089058524173028, "grad_norm": 34.772883255199446, "learning_rate": 1.017790986443024e-06, "loss": 0.1514, "step": 2500 }, { "epoch": 0.0510941475826972, "grad_norm": 55.08362931403814, "learning_rate": 1.0218621503887962e-06, "loss": 0.1977, "step": 2510 }, { "epoch": 0.05129770992366412, "grad_norm": 25.82336023618739, "learning_rate": 1.0259333143345682e-06, "loss": 0.163, "step": 2520 }, { "epoch": 0.05150127226463104, "grad_norm": 34.006632585451506, "learning_rate": 1.0300044782803404e-06, "loss": 0.4077, "step": 2530 }, { "epoch": 0.051704834605597966, "grad_norm": 33.74472655800399, "learning_rate": 1.0340756422261126e-06, "loss": 0.1448, "step": 2540 }, { "epoch": 0.051908396946564885, "grad_norm": 25.457008718159226, "learning_rate": 1.0381468061718846e-06, "loss": 0.1668, "step": 2550 }, { "epoch": 0.0521119592875318, "grad_norm": 22.580304418763088, "learning_rate": 1.0422179701176568e-06, "loss": 0.1577, "step": 2560 }, { "epoch": 0.05231552162849873, "grad_norm": 6.009649635820894, "learning_rate": 1.0462891340634288e-06, "loss": 0.3525, "step": 2570 }, { "epoch": 0.05251908396946565, "grad_norm": 36.793921188570984, "learning_rate": 1.050360298009201e-06, "loss": 0.258, "step": 2580 }, { "epoch": 0.05272264631043257, "grad_norm": 35.57644165005436, "learning_rate": 1.054431461954973e-06, "loss": 0.2962, "step": 2590 }, { "epoch": 0.05292620865139949, "grad_norm": 55.67181560737901, "learning_rate": 1.0585026259007452e-06, "loss": 0.2229, "step": 2600 }, { "epoch": 0.05312977099236641, "grad_norm": 21.36574962189088, "learning_rate": 1.0625737898465172e-06, "loss": 0.2205, "step": 2610 }, { "epoch": 0.05333333333333334, "grad_norm": 33.977266673283054, "learning_rate": 1.0666449537922894e-06, "loss": 0.2697, "step": 2620 }, { "epoch": 0.053536895674300256, "grad_norm": 9.846240596586647, "learning_rate": 1.0707161177380614e-06, "loss": 0.2135, "step": 2630 }, { "epoch": 0.053740458015267174, "grad_norm": 36.14643690765187, "learning_rate": 1.0747872816838336e-06, "loss": 0.2638, "step": 2640 }, { "epoch": 0.0539440203562341, "grad_norm": 61.744675186788484, "learning_rate": 1.0788584456296056e-06, "loss": 0.202, "step": 2650 }, { "epoch": 0.05414758269720102, "grad_norm": 19.63822506579224, "learning_rate": 1.0829296095753778e-06, "loss": 0.2685, "step": 2660 }, { "epoch": 0.05435114503816794, "grad_norm": 45.79673577531547, "learning_rate": 1.0870007735211498e-06, "loss": 0.1704, "step": 2670 }, { "epoch": 0.05455470737913486, "grad_norm": 24.756661215828878, "learning_rate": 1.091071937466922e-06, "loss": 0.2283, "step": 2680 }, { "epoch": 0.05475826972010178, "grad_norm": 134.6899746441954, "learning_rate": 1.095143101412694e-06, "loss": 0.2233, "step": 2690 }, { "epoch": 0.0549618320610687, "grad_norm": 57.59583251818345, "learning_rate": 1.099214265358466e-06, "loss": 0.3972, "step": 2700 }, { "epoch": 0.05516539440203563, "grad_norm": 22.010841227069218, "learning_rate": 1.1032854293042382e-06, "loss": 0.2392, "step": 2710 }, { "epoch": 0.055368956743002545, "grad_norm": 35.45475172244916, "learning_rate": 1.1073565932500102e-06, "loss": 0.2681, "step": 2720 }, { "epoch": 0.055572519083969464, "grad_norm": 26.60438848205804, "learning_rate": 1.1114277571957824e-06, "loss": 0.2765, "step": 2730 }, { "epoch": 0.05577608142493639, "grad_norm": 29.70514154756114, "learning_rate": 1.1154989211415544e-06, "loss": 0.2749, "step": 2740 }, { "epoch": 0.05597964376590331, "grad_norm": 13.863930801608499, "learning_rate": 1.1195700850873266e-06, "loss": 0.1894, "step": 2750 }, { "epoch": 0.05618320610687023, "grad_norm": 32.64222745038447, "learning_rate": 1.1236412490330986e-06, "loss": 0.2184, "step": 2760 }, { "epoch": 0.05638676844783715, "grad_norm": 27.254276845947828, "learning_rate": 1.1277124129788708e-06, "loss": 0.2985, "step": 2770 }, { "epoch": 0.05659033078880407, "grad_norm": 27.620879934293814, "learning_rate": 1.1317835769246428e-06, "loss": 0.2054, "step": 2780 }, { "epoch": 0.05679389312977099, "grad_norm": 53.89958139957937, "learning_rate": 1.135854740870415e-06, "loss": 0.2253, "step": 2790 }, { "epoch": 0.056997455470737916, "grad_norm": 23.001622103102303, "learning_rate": 1.139925904816187e-06, "loss": 0.2437, "step": 2800 }, { "epoch": 0.057201017811704835, "grad_norm": 12.404846046280122, "learning_rate": 1.1439970687619592e-06, "loss": 0.2275, "step": 2810 }, { "epoch": 0.057404580152671754, "grad_norm": 46.61761547907678, "learning_rate": 1.1480682327077312e-06, "loss": 0.3426, "step": 2820 }, { "epoch": 0.05760814249363868, "grad_norm": 3.271245807104625, "learning_rate": 1.1521393966535032e-06, "loss": 0.1731, "step": 2830 }, { "epoch": 0.0578117048346056, "grad_norm": 27.03886883453908, "learning_rate": 1.1562105605992754e-06, "loss": 0.2772, "step": 2840 }, { "epoch": 0.05801526717557252, "grad_norm": 22.98585291381041, "learning_rate": 1.1602817245450474e-06, "loss": 0.2138, "step": 2850 }, { "epoch": 0.05821882951653944, "grad_norm": 39.838567928807514, "learning_rate": 1.1643528884908196e-06, "loss": 0.2476, "step": 2860 }, { "epoch": 0.05842239185750636, "grad_norm": 33.21478191794923, "learning_rate": 1.1684240524365916e-06, "loss": 0.2728, "step": 2870 }, { "epoch": 0.05862595419847328, "grad_norm": 27.811549215212192, "learning_rate": 1.1724952163823638e-06, "loss": 0.1527, "step": 2880 }, { "epoch": 0.058829516539440206, "grad_norm": 39.59601438490822, "learning_rate": 1.1765663803281358e-06, "loss": 0.2213, "step": 2890 }, { "epoch": 0.059033078880407125, "grad_norm": 36.30377193446514, "learning_rate": 1.180637544273908e-06, "loss": 0.2632, "step": 2900 }, { "epoch": 0.059236641221374044, "grad_norm": 25.497906550292644, "learning_rate": 1.18470870821968e-06, "loss": 0.3615, "step": 2910 }, { "epoch": 0.05944020356234097, "grad_norm": 47.59598733649967, "learning_rate": 1.1887798721654522e-06, "loss": 0.2075, "step": 2920 }, { "epoch": 0.05964376590330789, "grad_norm": 12.321172317668278, "learning_rate": 1.1928510361112242e-06, "loss": 0.2443, "step": 2930 }, { "epoch": 0.05984732824427481, "grad_norm": 21.567477611123074, "learning_rate": 1.1969222000569964e-06, "loss": 0.23, "step": 2940 }, { "epoch": 0.06005089058524173, "grad_norm": 14.060807612913962, "learning_rate": 1.2009933640027684e-06, "loss": 0.2091, "step": 2950 }, { "epoch": 0.06025445292620865, "grad_norm": 41.18955530653078, "learning_rate": 1.2050645279485406e-06, "loss": 0.2588, "step": 2960 }, { "epoch": 0.06045801526717557, "grad_norm": 17.44300526523061, "learning_rate": 1.2091356918943126e-06, "loss": 0.2737, "step": 2970 }, { "epoch": 0.060661577608142496, "grad_norm": 33.21434809274464, "learning_rate": 1.2132068558400848e-06, "loss": 0.3291, "step": 2980 }, { "epoch": 0.060865139949109415, "grad_norm": 35.88745333971158, "learning_rate": 1.217278019785857e-06, "loss": 0.2406, "step": 2990 }, { "epoch": 0.061068702290076333, "grad_norm": 19.672581416220684, "learning_rate": 1.221349183731629e-06, "loss": 0.2039, "step": 3000 }, { "epoch": 0.06127226463104326, "grad_norm": 35.00855458146197, "learning_rate": 1.2254203476774012e-06, "loss": 0.2205, "step": 3010 }, { "epoch": 0.06147582697201018, "grad_norm": 16.04979318184698, "learning_rate": 1.2294915116231732e-06, "loss": 0.2905, "step": 3020 }, { "epoch": 0.0616793893129771, "grad_norm": 14.694944525533026, "learning_rate": 1.2335626755689454e-06, "loss": 0.1776, "step": 3030 }, { "epoch": 0.06188295165394402, "grad_norm": 15.130424522292822, "learning_rate": 1.2376338395147174e-06, "loss": 0.1817, "step": 3040 }, { "epoch": 0.06208651399491094, "grad_norm": 20.730628051354838, "learning_rate": 1.2417050034604894e-06, "loss": 0.2241, "step": 3050 }, { "epoch": 0.06229007633587786, "grad_norm": 37.708101415017474, "learning_rate": 1.2457761674062616e-06, "loss": 0.2559, "step": 3060 }, { "epoch": 0.062493638676844786, "grad_norm": 21.054738192249513, "learning_rate": 1.2498473313520336e-06, "loss": 0.2959, "step": 3070 }, { "epoch": 0.0626972010178117, "grad_norm": 32.380029061566866, "learning_rate": 1.2539184952978056e-06, "loss": 0.2642, "step": 3080 }, { "epoch": 0.06290076335877863, "grad_norm": 39.34125671455686, "learning_rate": 1.2579896592435778e-06, "loss": 0.2053, "step": 3090 }, { "epoch": 0.06310432569974554, "grad_norm": 38.22979204270955, "learning_rate": 1.26206082318935e-06, "loss": 0.256, "step": 3100 }, { "epoch": 0.06330788804071247, "grad_norm": 26.72321049888365, "learning_rate": 1.2661319871351222e-06, "loss": 0.2769, "step": 3110 }, { "epoch": 0.0635114503816794, "grad_norm": 49.852049495786815, "learning_rate": 1.270203151080894e-06, "loss": 0.2001, "step": 3120 }, { "epoch": 0.0637150127226463, "grad_norm": 21.944777947649538, "learning_rate": 1.2742743150266662e-06, "loss": 0.2408, "step": 3130 }, { "epoch": 0.06391857506361323, "grad_norm": 117.82465885863125, "learning_rate": 1.2783454789724384e-06, "loss": 0.3385, "step": 3140 }, { "epoch": 0.06412213740458016, "grad_norm": 29.12458071665958, "learning_rate": 1.2824166429182106e-06, "loss": 0.1982, "step": 3150 }, { "epoch": 0.06432569974554707, "grad_norm": 34.71861588863645, "learning_rate": 1.2864878068639824e-06, "loss": 0.271, "step": 3160 }, { "epoch": 0.064529262086514, "grad_norm": 15.901412313088187, "learning_rate": 1.2905589708097546e-06, "loss": 0.2263, "step": 3170 }, { "epoch": 0.06473282442748092, "grad_norm": 59.23708474554474, "learning_rate": 1.2946301347555268e-06, "loss": 0.2624, "step": 3180 }, { "epoch": 0.06493638676844783, "grad_norm": 28.460134794267887, "learning_rate": 1.2987012987012986e-06, "loss": 0.2261, "step": 3190 }, { "epoch": 0.06513994910941476, "grad_norm": 28.34674571606028, "learning_rate": 1.3027724626470708e-06, "loss": 0.2375, "step": 3200 }, { "epoch": 0.06534351145038168, "grad_norm": 38.362671269015145, "learning_rate": 1.306843626592843e-06, "loss": 0.2504, "step": 3210 }, { "epoch": 0.0655470737913486, "grad_norm": 42.476468593591356, "learning_rate": 1.3109147905386152e-06, "loss": 0.1966, "step": 3220 }, { "epoch": 0.06575063613231552, "grad_norm": 29.08533343972635, "learning_rate": 1.314985954484387e-06, "loss": 0.2028, "step": 3230 }, { "epoch": 0.06595419847328245, "grad_norm": 24.913410408364552, "learning_rate": 1.3190571184301592e-06, "loss": 0.2253, "step": 3240 }, { "epoch": 0.06615776081424936, "grad_norm": 69.24075579908127, "learning_rate": 1.3231282823759314e-06, "loss": 0.2249, "step": 3250 }, { "epoch": 0.06636132315521628, "grad_norm": 21.004083570863337, "learning_rate": 1.3271994463217036e-06, "loss": 0.2381, "step": 3260 }, { "epoch": 0.06656488549618321, "grad_norm": 1.8828673123893922, "learning_rate": 1.3312706102674754e-06, "loss": 0.2813, "step": 3270 }, { "epoch": 0.06676844783715012, "grad_norm": 29.64836557288031, "learning_rate": 1.3353417742132476e-06, "loss": 0.2148, "step": 3280 }, { "epoch": 0.06697201017811705, "grad_norm": 18.11412208013246, "learning_rate": 1.3394129381590198e-06, "loss": 0.2161, "step": 3290 }, { "epoch": 0.06717557251908397, "grad_norm": 28.06033996693574, "learning_rate": 1.3434841021047918e-06, "loss": 0.1992, "step": 3300 }, { "epoch": 0.06737913486005088, "grad_norm": 12.793753115836152, "learning_rate": 1.347555266050564e-06, "loss": 0.2333, "step": 3310 }, { "epoch": 0.06758269720101781, "grad_norm": 29.185245263300974, "learning_rate": 1.351626429996336e-06, "loss": 0.2395, "step": 3320 }, { "epoch": 0.06778625954198474, "grad_norm": 46.71622790164024, "learning_rate": 1.3556975939421082e-06, "loss": 0.2927, "step": 3330 }, { "epoch": 0.06798982188295165, "grad_norm": 34.09485331964585, "learning_rate": 1.3597687578878802e-06, "loss": 0.2153, "step": 3340 }, { "epoch": 0.06819338422391857, "grad_norm": 48.00627105310634, "learning_rate": 1.3638399218336524e-06, "loss": 0.1826, "step": 3350 }, { "epoch": 0.0683969465648855, "grad_norm": 7.360724443910155, "learning_rate": 1.3679110857794244e-06, "loss": 0.2206, "step": 3360 }, { "epoch": 0.06860050890585241, "grad_norm": 32.30159106701799, "learning_rate": 1.3719822497251966e-06, "loss": 0.2222, "step": 3370 }, { "epoch": 0.06880407124681934, "grad_norm": 26.732789287251286, "learning_rate": 1.3760534136709686e-06, "loss": 0.2886, "step": 3380 }, { "epoch": 0.06900763358778626, "grad_norm": 9.525930149837418, "learning_rate": 1.3801245776167408e-06, "loss": 0.2639, "step": 3390 }, { "epoch": 0.06921119592875317, "grad_norm": 19.959001337966285, "learning_rate": 1.3841957415625128e-06, "loss": 0.2718, "step": 3400 }, { "epoch": 0.0694147582697201, "grad_norm": 32.94067268476076, "learning_rate": 1.3882669055082848e-06, "loss": 0.2563, "step": 3410 }, { "epoch": 0.06961832061068703, "grad_norm": 48.2918963689343, "learning_rate": 1.392338069454057e-06, "loss": 0.2248, "step": 3420 }, { "epoch": 0.06982188295165394, "grad_norm": 41.76675777007916, "learning_rate": 1.3964092333998292e-06, "loss": 0.2675, "step": 3430 }, { "epoch": 0.07002544529262086, "grad_norm": 15.129605998325552, "learning_rate": 1.4004803973456014e-06, "loss": 0.2453, "step": 3440 }, { "epoch": 0.07022900763358779, "grad_norm": 23.641275560225345, "learning_rate": 1.4045515612913732e-06, "loss": 0.214, "step": 3450 }, { "epoch": 0.0704325699745547, "grad_norm": 5.9732098911278575, "learning_rate": 1.4086227252371454e-06, "loss": 0.2471, "step": 3460 }, { "epoch": 0.07063613231552163, "grad_norm": 16.51221097856778, "learning_rate": 1.4126938891829176e-06, "loss": 0.1733, "step": 3470 }, { "epoch": 0.07083969465648855, "grad_norm": 0.3852191107064337, "learning_rate": 1.4167650531286898e-06, "loss": 0.1919, "step": 3480 }, { "epoch": 0.07104325699745546, "grad_norm": 52.709801281023125, "learning_rate": 1.4208362170744616e-06, "loss": 0.3082, "step": 3490 }, { "epoch": 0.07124681933842239, "grad_norm": 72.70235179968026, "learning_rate": 1.4249073810202338e-06, "loss": 0.3005, "step": 3500 }, { "epoch": 0.07145038167938932, "grad_norm": 22.373835383552716, "learning_rate": 1.428978544966006e-06, "loss": 0.1128, "step": 3510 }, { "epoch": 0.07165394402035623, "grad_norm": 10.73602379571793, "learning_rate": 1.4330497089117778e-06, "loss": 0.1211, "step": 3520 }, { "epoch": 0.07185750636132315, "grad_norm": 46.668916816377774, "learning_rate": 1.43712087285755e-06, "loss": 0.2203, "step": 3530 }, { "epoch": 0.07206106870229008, "grad_norm": 10.539753034605189, "learning_rate": 1.4411920368033222e-06, "loss": 0.2591, "step": 3540 }, { "epoch": 0.07226463104325699, "grad_norm": 36.253488058445456, "learning_rate": 1.4452632007490944e-06, "loss": 0.1867, "step": 3550 }, { "epoch": 0.07246819338422392, "grad_norm": 0.22909360534797543, "learning_rate": 1.4493343646948662e-06, "loss": 0.3051, "step": 3560 }, { "epoch": 0.07267175572519084, "grad_norm": 21.848494878978467, "learning_rate": 1.4534055286406384e-06, "loss": 0.2276, "step": 3570 }, { "epoch": 0.07287531806615775, "grad_norm": 13.333918073075713, "learning_rate": 1.4574766925864106e-06, "loss": 0.2433, "step": 3580 }, { "epoch": 0.07307888040712468, "grad_norm": 13.79035474041306, "learning_rate": 1.4615478565321828e-06, "loss": 0.2006, "step": 3590 }, { "epoch": 0.0732824427480916, "grad_norm": 20.160704501661602, "learning_rate": 1.4656190204779546e-06, "loss": 0.2387, "step": 3600 }, { "epoch": 0.07348600508905852, "grad_norm": 83.51037042915304, "learning_rate": 1.4696901844237268e-06, "loss": 0.2539, "step": 3610 }, { "epoch": 0.07368956743002544, "grad_norm": 18.40485698301673, "learning_rate": 1.473761348369499e-06, "loss": 0.2964, "step": 3620 }, { "epoch": 0.07389312977099237, "grad_norm": 53.525855002218286, "learning_rate": 1.4778325123152712e-06, "loss": 0.2606, "step": 3630 }, { "epoch": 0.07409669211195928, "grad_norm": 30.79692481008714, "learning_rate": 1.481903676261043e-06, "loss": 0.2738, "step": 3640 }, { "epoch": 0.0743002544529262, "grad_norm": 20.71327796192302, "learning_rate": 1.4859748402068152e-06, "loss": 0.3519, "step": 3650 }, { "epoch": 0.07450381679389313, "grad_norm": 90.90891674820286, "learning_rate": 1.4900460041525874e-06, "loss": 0.2395, "step": 3660 }, { "epoch": 0.07470737913486004, "grad_norm": 12.715023848045751, "learning_rate": 1.4941171680983594e-06, "loss": 0.2119, "step": 3670 }, { "epoch": 0.07491094147582697, "grad_norm": 59.68090658222929, "learning_rate": 1.4981883320441314e-06, "loss": 0.2113, "step": 3680 }, { "epoch": 0.0751145038167939, "grad_norm": 23.80874782425253, "learning_rate": 1.5022594959899036e-06, "loss": 0.1979, "step": 3690 }, { "epoch": 0.07531806615776081, "grad_norm": 37.58078257103819, "learning_rate": 1.5063306599356758e-06, "loss": 0.2073, "step": 3700 }, { "epoch": 0.07552162849872773, "grad_norm": 13.207265425320864, "learning_rate": 1.5104018238814478e-06, "loss": 0.2285, "step": 3710 }, { "epoch": 0.07572519083969466, "grad_norm": 24.790713370212586, "learning_rate": 1.5144729878272198e-06, "loss": 0.2371, "step": 3720 }, { "epoch": 0.07592875318066158, "grad_norm": 47.37639493142363, "learning_rate": 1.518544151772992e-06, "loss": 0.275, "step": 3730 }, { "epoch": 0.0761323155216285, "grad_norm": 33.94037124100434, "learning_rate": 1.5226153157187642e-06, "loss": 0.2156, "step": 3740 }, { "epoch": 0.07633587786259542, "grad_norm": 15.41188193501799, "learning_rate": 1.5266864796645362e-06, "loss": 0.2649, "step": 3750 }, { "epoch": 0.07653944020356235, "grad_norm": 51.14438200845242, "learning_rate": 1.5307576436103084e-06, "loss": 0.161, "step": 3760 }, { "epoch": 0.07674300254452926, "grad_norm": 19.39265661368504, "learning_rate": 1.5348288075560804e-06, "loss": 0.1615, "step": 3770 }, { "epoch": 0.07694656488549619, "grad_norm": 36.220119662144064, "learning_rate": 1.5388999715018524e-06, "loss": 0.1905, "step": 3780 }, { "epoch": 0.07715012722646311, "grad_norm": 63.8942066238071, "learning_rate": 1.5429711354476246e-06, "loss": 0.2447, "step": 3790 }, { "epoch": 0.07735368956743002, "grad_norm": 52.05208453984146, "learning_rate": 1.5470422993933968e-06, "loss": 0.3228, "step": 3800 }, { "epoch": 0.07755725190839695, "grad_norm": 27.927557868084723, "learning_rate": 1.5511134633391688e-06, "loss": 0.2117, "step": 3810 }, { "epoch": 0.07776081424936387, "grad_norm": 54.25292668850879, "learning_rate": 1.5551846272849408e-06, "loss": 0.1305, "step": 3820 }, { "epoch": 0.07796437659033079, "grad_norm": 13.025620920599621, "learning_rate": 1.559255791230713e-06, "loss": 0.3321, "step": 3830 }, { "epoch": 0.07816793893129771, "grad_norm": 110.64627464741893, "learning_rate": 1.5633269551764852e-06, "loss": 0.2652, "step": 3840 }, { "epoch": 0.07837150127226464, "grad_norm": 29.59322008696465, "learning_rate": 1.5673981191222572e-06, "loss": 0.3119, "step": 3850 }, { "epoch": 0.07857506361323155, "grad_norm": 3.11057083959016, "learning_rate": 1.5714692830680292e-06, "loss": 0.2732, "step": 3860 }, { "epoch": 0.07877862595419848, "grad_norm": 15.795144505425696, "learning_rate": 1.5755404470138014e-06, "loss": 0.2377, "step": 3870 }, { "epoch": 0.0789821882951654, "grad_norm": 30.554489633847624, "learning_rate": 1.5796116109595736e-06, "loss": 0.2118, "step": 3880 }, { "epoch": 0.07918575063613231, "grad_norm": 23.041701846117657, "learning_rate": 1.5836827749053454e-06, "loss": 0.2742, "step": 3890 }, { "epoch": 0.07938931297709924, "grad_norm": 42.96775604621834, "learning_rate": 1.5877539388511176e-06, "loss": 0.2368, "step": 3900 }, { "epoch": 0.07959287531806616, "grad_norm": 15.403098158078125, "learning_rate": 1.5918251027968898e-06, "loss": 0.2777, "step": 3910 }, { "epoch": 0.07979643765903308, "grad_norm": 42.62864856531689, "learning_rate": 1.595896266742662e-06, "loss": 0.1715, "step": 3920 }, { "epoch": 0.08, "grad_norm": 47.3656727483421, "learning_rate": 1.5999674306884338e-06, "loss": 0.2431, "step": 3930 }, { "epoch": 0.08020356234096693, "grad_norm": 37.29540000453043, "learning_rate": 1.604038594634206e-06, "loss": 0.1636, "step": 3940 }, { "epoch": 0.08040712468193384, "grad_norm": 24.712113181652533, "learning_rate": 1.6081097585799782e-06, "loss": 0.2092, "step": 3950 }, { "epoch": 0.08061068702290076, "grad_norm": 20.816331517651683, "learning_rate": 1.6121809225257504e-06, "loss": 0.2846, "step": 3960 }, { "epoch": 0.08081424936386769, "grad_norm": 31.347467536765993, "learning_rate": 1.6162520864715222e-06, "loss": 0.2658, "step": 3970 }, { "epoch": 0.0810178117048346, "grad_norm": 35.15221732229293, "learning_rate": 1.6203232504172944e-06, "loss": 0.2171, "step": 3980 }, { "epoch": 0.08122137404580153, "grad_norm": 19.70565351718827, "learning_rate": 1.6243944143630666e-06, "loss": 0.1965, "step": 3990 }, { "epoch": 0.08142493638676845, "grad_norm": 36.53962437016624, "learning_rate": 1.6284655783088384e-06, "loss": 0.2682, "step": 4000 }, { "epoch": 0.08162849872773537, "grad_norm": 19.13933299961507, "learning_rate": 1.6325367422546106e-06, "loss": 0.3674, "step": 4010 }, { "epoch": 0.08183206106870229, "grad_norm": 18.04509177143568, "learning_rate": 1.6366079062003828e-06, "loss": 0.3709, "step": 4020 }, { "epoch": 0.08203562340966922, "grad_norm": 20.618943301598243, "learning_rate": 1.640679070146155e-06, "loss": 0.2255, "step": 4030 }, { "epoch": 0.08223918575063613, "grad_norm": 32.048002950670856, "learning_rate": 1.644750234091927e-06, "loss": 0.2442, "step": 4040 }, { "epoch": 0.08244274809160305, "grad_norm": 20.074613301162806, "learning_rate": 1.648821398037699e-06, "loss": 0.2195, "step": 4050 }, { "epoch": 0.08264631043256998, "grad_norm": 15.845786188946015, "learning_rate": 1.6528925619834712e-06, "loss": 0.3056, "step": 4060 }, { "epoch": 0.08284987277353689, "grad_norm": 9.90821349446082, "learning_rate": 1.6569637259292434e-06, "loss": 0.2501, "step": 4070 }, { "epoch": 0.08305343511450382, "grad_norm": 25.291113232652727, "learning_rate": 1.6610348898750154e-06, "loss": 0.1821, "step": 4080 }, { "epoch": 0.08325699745547074, "grad_norm": 2.2618578749863847, "learning_rate": 1.6651060538207874e-06, "loss": 0.1508, "step": 4090 }, { "epoch": 0.08346055979643766, "grad_norm": 26.736424600585188, "learning_rate": 1.6691772177665596e-06, "loss": 0.223, "step": 4100 }, { "epoch": 0.08366412213740458, "grad_norm": 31.573155828470846, "learning_rate": 1.6732483817123316e-06, "loss": 0.2235, "step": 4110 }, { "epoch": 0.08386768447837151, "grad_norm": 13.494892164952528, "learning_rate": 1.6773195456581038e-06, "loss": 0.2949, "step": 4120 }, { "epoch": 0.08407124681933842, "grad_norm": 27.76338588264567, "learning_rate": 1.6813907096038758e-06, "loss": 0.3003, "step": 4130 }, { "epoch": 0.08427480916030534, "grad_norm": 14.411297516569125, "learning_rate": 1.685461873549648e-06, "loss": 0.1692, "step": 4140 }, { "epoch": 0.08447837150127227, "grad_norm": 28.17770468736691, "learning_rate": 1.68953303749542e-06, "loss": 0.2692, "step": 4150 }, { "epoch": 0.08468193384223918, "grad_norm": 24.26717750382395, "learning_rate": 1.6936042014411922e-06, "loss": 0.1801, "step": 4160 }, { "epoch": 0.08488549618320611, "grad_norm": 14.107540097859545, "learning_rate": 1.6976753653869642e-06, "loss": 0.292, "step": 4170 }, { "epoch": 0.08508905852417303, "grad_norm": 24.12381541978215, "learning_rate": 1.7017465293327364e-06, "loss": 0.3153, "step": 4180 }, { "epoch": 0.08529262086513995, "grad_norm": 21.965673356853515, "learning_rate": 1.7058176932785084e-06, "loss": 0.2877, "step": 4190 }, { "epoch": 0.08549618320610687, "grad_norm": 13.790988165710136, "learning_rate": 1.7098888572242806e-06, "loss": 0.2074, "step": 4200 }, { "epoch": 0.0856997455470738, "grad_norm": 15.429975887663188, "learning_rate": 1.7139600211700528e-06, "loss": 0.1971, "step": 4210 }, { "epoch": 0.08590330788804071, "grad_norm": 29.65521028650621, "learning_rate": 1.7180311851158246e-06, "loss": 0.3181, "step": 4220 }, { "epoch": 0.08610687022900763, "grad_norm": 25.7800915560964, "learning_rate": 1.7221023490615968e-06, "loss": 0.238, "step": 4230 }, { "epoch": 0.08631043256997456, "grad_norm": 14.09102826232206, "learning_rate": 1.726173513007369e-06, "loss": 0.1768, "step": 4240 }, { "epoch": 0.08651399491094147, "grad_norm": 8.710343040909647, "learning_rate": 1.7302446769531412e-06, "loss": 0.1519, "step": 4250 }, { "epoch": 0.0867175572519084, "grad_norm": 24.833025702199457, "learning_rate": 1.734315840898913e-06, "loss": 0.267, "step": 4260 }, { "epoch": 0.08692111959287532, "grad_norm": 23.70050396079689, "learning_rate": 1.7383870048446852e-06, "loss": 0.2921, "step": 4270 }, { "epoch": 0.08712468193384224, "grad_norm": 21.02506498653098, "learning_rate": 1.7424581687904574e-06, "loss": 0.258, "step": 4280 }, { "epoch": 0.08732824427480916, "grad_norm": 22.186732792875766, "learning_rate": 1.7465293327362296e-06, "loss": 0.3483, "step": 4290 }, { "epoch": 0.08753180661577609, "grad_norm": 40.36286953777187, "learning_rate": 1.7506004966820014e-06, "loss": 0.1711, "step": 4300 }, { "epoch": 0.087735368956743, "grad_norm": 19.498474567626275, "learning_rate": 1.7546716606277736e-06, "loss": 0.2052, "step": 4310 }, { "epoch": 0.08793893129770992, "grad_norm": 6.443590369239075, "learning_rate": 1.7587428245735458e-06, "loss": 0.2238, "step": 4320 }, { "epoch": 0.08814249363867685, "grad_norm": 50.05746423580708, "learning_rate": 1.762813988519318e-06, "loss": 0.2588, "step": 4330 }, { "epoch": 0.08834605597964376, "grad_norm": 18.183743319042062, "learning_rate": 1.7668851524650898e-06, "loss": 0.2466, "step": 4340 }, { "epoch": 0.08854961832061069, "grad_norm": 7.442697668140508, "learning_rate": 1.770956316410862e-06, "loss": 0.1992, "step": 4350 }, { "epoch": 0.08875318066157761, "grad_norm": 24.2569846112018, "learning_rate": 1.7750274803566342e-06, "loss": 0.3088, "step": 4360 }, { "epoch": 0.08895674300254452, "grad_norm": 12.65044188315599, "learning_rate": 1.779098644302406e-06, "loss": 0.1821, "step": 4370 }, { "epoch": 0.08916030534351145, "grad_norm": 31.17665583453152, "learning_rate": 1.7831698082481782e-06, "loss": 0.2581, "step": 4380 }, { "epoch": 0.08936386768447838, "grad_norm": 17.477636264873155, "learning_rate": 1.7872409721939504e-06, "loss": 0.2573, "step": 4390 }, { "epoch": 0.08956743002544529, "grad_norm": 22.899930986696596, "learning_rate": 1.7913121361397226e-06, "loss": 0.3481, "step": 4400 }, { "epoch": 0.08977099236641221, "grad_norm": 14.656707276104799, "learning_rate": 1.7953833000854944e-06, "loss": 0.2727, "step": 4410 }, { "epoch": 0.08997455470737914, "grad_norm": 21.167618461265572, "learning_rate": 1.7994544640312666e-06, "loss": 0.2128, "step": 4420 }, { "epoch": 0.09017811704834605, "grad_norm": 25.074630903277903, "learning_rate": 1.8035256279770388e-06, "loss": 0.2583, "step": 4430 }, { "epoch": 0.09038167938931298, "grad_norm": 13.074914731752353, "learning_rate": 1.807596791922811e-06, "loss": 0.1879, "step": 4440 }, { "epoch": 0.0905852417302799, "grad_norm": 12.17519706061902, "learning_rate": 1.8116679558685828e-06, "loss": 0.1965, "step": 4450 }, { "epoch": 0.09078880407124681, "grad_norm": 23.633025678650753, "learning_rate": 1.815739119814355e-06, "loss": 0.2068, "step": 4460 }, { "epoch": 0.09099236641221374, "grad_norm": 24.906592702734113, "learning_rate": 1.8198102837601272e-06, "loss": 0.1723, "step": 4470 }, { "epoch": 0.09119592875318067, "grad_norm": 1.635690415181701, "learning_rate": 1.8238814477058992e-06, "loss": 0.3019, "step": 4480 }, { "epoch": 0.09139949109414758, "grad_norm": 37.16708547632727, "learning_rate": 1.8279526116516714e-06, "loss": 0.2391, "step": 4490 }, { "epoch": 0.0916030534351145, "grad_norm": 26.526812491996736, "learning_rate": 1.8320237755974434e-06, "loss": 0.2666, "step": 4500 }, { "epoch": 0.09180661577608143, "grad_norm": 32.653158291490755, "learning_rate": 1.8360949395432156e-06, "loss": 0.1588, "step": 4510 }, { "epoch": 0.09201017811704834, "grad_norm": 32.23933643649592, "learning_rate": 1.8401661034889876e-06, "loss": 0.2877, "step": 4520 }, { "epoch": 0.09221374045801527, "grad_norm": 3.044027272500336, "learning_rate": 1.8442372674347598e-06, "loss": 0.1953, "step": 4530 }, { "epoch": 0.09241730279898219, "grad_norm": 9.390419463748042, "learning_rate": 1.8483084313805318e-06, "loss": 0.2302, "step": 4540 }, { "epoch": 0.0926208651399491, "grad_norm": 12.268932112524555, "learning_rate": 1.852379595326304e-06, "loss": 0.2032, "step": 4550 }, { "epoch": 0.09282442748091603, "grad_norm": 31.255297644896544, "learning_rate": 1.856450759272076e-06, "loss": 0.2415, "step": 4560 }, { "epoch": 0.09302798982188296, "grad_norm": 16.9044544986004, "learning_rate": 1.8605219232178482e-06, "loss": 0.1846, "step": 4570 }, { "epoch": 0.09323155216284987, "grad_norm": 26.708478125223913, "learning_rate": 1.8645930871636202e-06, "loss": 0.2962, "step": 4580 }, { "epoch": 0.0934351145038168, "grad_norm": 4.745980096352669, "learning_rate": 1.8686642511093922e-06, "loss": 0.1912, "step": 4590 }, { "epoch": 0.09363867684478372, "grad_norm": 62.82584751032593, "learning_rate": 1.8727354150551644e-06, "loss": 0.2578, "step": 4600 }, { "epoch": 0.09384223918575063, "grad_norm": 22.49320724662823, "learning_rate": 1.8768065790009366e-06, "loss": 0.2465, "step": 4610 }, { "epoch": 0.09404580152671756, "grad_norm": 6.913155831430545, "learning_rate": 1.8808777429467086e-06, "loss": 0.1933, "step": 4620 }, { "epoch": 0.09424936386768448, "grad_norm": 21.410196602806522, "learning_rate": 1.8849489068924806e-06, "loss": 0.267, "step": 4630 }, { "epoch": 0.0944529262086514, "grad_norm": 27.52449560814439, "learning_rate": 1.8890200708382528e-06, "loss": 0.2451, "step": 4640 }, { "epoch": 0.09465648854961832, "grad_norm": 16.796842521069635, "learning_rate": 1.893091234784025e-06, "loss": 0.2533, "step": 4650 }, { "epoch": 0.09486005089058525, "grad_norm": 29.4364752817446, "learning_rate": 1.8971623987297972e-06, "loss": 0.2927, "step": 4660 }, { "epoch": 0.09506361323155216, "grad_norm": 14.671279166500625, "learning_rate": 1.901233562675569e-06, "loss": 0.2282, "step": 4670 }, { "epoch": 0.09526717557251908, "grad_norm": 27.84951631194869, "learning_rate": 1.9053047266213412e-06, "loss": 0.2493, "step": 4680 }, { "epoch": 0.09547073791348601, "grad_norm": 10.513082204539502, "learning_rate": 1.9093758905671134e-06, "loss": 0.2481, "step": 4690 }, { "epoch": 0.09567430025445292, "grad_norm": 34.94604904729507, "learning_rate": 1.913447054512885e-06, "loss": 0.3062, "step": 4700 }, { "epoch": 0.09587786259541985, "grad_norm": 23.014371213022486, "learning_rate": 1.9175182184586574e-06, "loss": 0.2834, "step": 4710 }, { "epoch": 0.09608142493638677, "grad_norm": 13.837981278456272, "learning_rate": 1.9215893824044296e-06, "loss": 0.2505, "step": 4720 }, { "epoch": 0.09628498727735368, "grad_norm": 37.441926482962124, "learning_rate": 1.925660546350202e-06, "loss": 0.2683, "step": 4730 }, { "epoch": 0.09648854961832061, "grad_norm": 30.51383476852255, "learning_rate": 1.9297317102959736e-06, "loss": 0.2025, "step": 4740 }, { "epoch": 0.09669211195928754, "grad_norm": 63.389868538690784, "learning_rate": 1.933802874241746e-06, "loss": 0.2798, "step": 4750 }, { "epoch": 0.09689567430025445, "grad_norm": 41.28859003233439, "learning_rate": 1.937874038187518e-06, "loss": 0.3894, "step": 4760 }, { "epoch": 0.09709923664122137, "grad_norm": 7.444045787340816, "learning_rate": 1.9419452021332902e-06, "loss": 0.3046, "step": 4770 }, { "epoch": 0.0973027989821883, "grad_norm": 15.871155684983197, "learning_rate": 1.946016366079062e-06, "loss": 0.2248, "step": 4780 }, { "epoch": 0.09750636132315521, "grad_norm": 19.087894590992864, "learning_rate": 1.9500875300248342e-06, "loss": 0.261, "step": 4790 }, { "epoch": 0.09770992366412214, "grad_norm": 24.855089124063433, "learning_rate": 1.9541586939706064e-06, "loss": 0.2149, "step": 4800 }, { "epoch": 0.09791348600508906, "grad_norm": 2.4736207101397194, "learning_rate": 1.958229857916378e-06, "loss": 0.2514, "step": 4810 }, { "epoch": 0.09811704834605597, "grad_norm": 58.58175713476653, "learning_rate": 1.9623010218621504e-06, "loss": 0.2126, "step": 4820 }, { "epoch": 0.0983206106870229, "grad_norm": 43.09499054722852, "learning_rate": 1.9663721858079226e-06, "loss": 0.2732, "step": 4830 }, { "epoch": 0.09852417302798983, "grad_norm": 17.829348229210456, "learning_rate": 1.970443349753695e-06, "loss": 0.2212, "step": 4840 }, { "epoch": 0.09872773536895674, "grad_norm": 18.368932642953066, "learning_rate": 1.9745145136994666e-06, "loss": 0.2432, "step": 4850 }, { "epoch": 0.09893129770992366, "grad_norm": 36.42838261764852, "learning_rate": 1.978585677645239e-06, "loss": 0.2719, "step": 4860 }, { "epoch": 0.09913486005089059, "grad_norm": 10.51538829190822, "learning_rate": 1.982656841591011e-06, "loss": 0.1984, "step": 4870 }, { "epoch": 0.0993384223918575, "grad_norm": 14.139626795201314, "learning_rate": 1.9867280055367832e-06, "loss": 0.1586, "step": 4880 }, { "epoch": 0.09954198473282443, "grad_norm": 31.251449027469217, "learning_rate": 1.990799169482555e-06, "loss": 0.2353, "step": 4890 }, { "epoch": 0.09974554707379135, "grad_norm": 45.67353788504766, "learning_rate": 1.994870333428327e-06, "loss": 0.3134, "step": 4900 }, { "epoch": 0.09994910941475826, "grad_norm": 24.559569697442562, "learning_rate": 1.9989414973740994e-06, "loss": 0.1808, "step": 4910 }, { "epoch": 0.10015267175572519, "grad_norm": 29.30200881985161, "learning_rate": 2.003012661319871e-06, "loss": 0.1692, "step": 4920 }, { "epoch": 0.10035623409669212, "grad_norm": 1.5346056364880887, "learning_rate": 2.0070838252656434e-06, "loss": 0.2596, "step": 4930 }, { "epoch": 0.10055979643765903, "grad_norm": 45.3288330919448, "learning_rate": 2.0111549892114156e-06, "loss": 0.3275, "step": 4940 }, { "epoch": 0.10076335877862595, "grad_norm": 26.487136303756845, "learning_rate": 2.015226153157188e-06, "loss": 0.3062, "step": 4950 }, { "epoch": 0.10096692111959288, "grad_norm": 6.291043542811259, "learning_rate": 2.0192973171029596e-06, "loss": 0.1377, "step": 4960 }, { "epoch": 0.10117048346055979, "grad_norm": 35.443474792386155, "learning_rate": 2.023368481048732e-06, "loss": 0.2987, "step": 4970 }, { "epoch": 0.10137404580152672, "grad_norm": 31.494276086412672, "learning_rate": 2.027439644994504e-06, "loss": 0.24, "step": 4980 }, { "epoch": 0.10157760814249364, "grad_norm": 15.615336417806848, "learning_rate": 2.0315108089402762e-06, "loss": 0.2402, "step": 4990 }, { "epoch": 0.10178117048346055, "grad_norm": 23.154992243519164, "learning_rate": 2.035581972886048e-06, "loss": 0.2073, "step": 5000 }, { "epoch": 0.10198473282442748, "grad_norm": 36.55222417286224, "learning_rate": 2.03965313683182e-06, "loss": 0.3102, "step": 5010 }, { "epoch": 0.1021882951653944, "grad_norm": 9.925092732036083, "learning_rate": 2.0437243007775924e-06, "loss": 0.2604, "step": 5020 }, { "epoch": 0.10239185750636132, "grad_norm": 28.697561333878216, "learning_rate": 2.0477954647233646e-06, "loss": 0.3379, "step": 5030 }, { "epoch": 0.10259541984732824, "grad_norm": 20.280832557537252, "learning_rate": 2.0518666286691364e-06, "loss": 0.2313, "step": 5040 }, { "epoch": 0.10279898218829517, "grad_norm": 14.655418774533189, "learning_rate": 2.0559377926149086e-06, "loss": 0.2178, "step": 5050 }, { "epoch": 0.10300254452926208, "grad_norm": 11.487369109129338, "learning_rate": 2.060008956560681e-06, "loss": 0.1367, "step": 5060 }, { "epoch": 0.103206106870229, "grad_norm": 31.794876780227018, "learning_rate": 2.064080120506453e-06, "loss": 0.3632, "step": 5070 }, { "epoch": 0.10340966921119593, "grad_norm": 60.23423444260063, "learning_rate": 2.0681512844522252e-06, "loss": 0.2621, "step": 5080 }, { "epoch": 0.10361323155216284, "grad_norm": 14.416539208733528, "learning_rate": 2.072222448397997e-06, "loss": 0.3016, "step": 5090 }, { "epoch": 0.10381679389312977, "grad_norm": 15.479877029025785, "learning_rate": 2.0762936123437692e-06, "loss": 0.2556, "step": 5100 }, { "epoch": 0.1040203562340967, "grad_norm": 12.816810982445304, "learning_rate": 2.0803647762895414e-06, "loss": 0.2084, "step": 5110 }, { "epoch": 0.1042239185750636, "grad_norm": 34.03873492936592, "learning_rate": 2.0844359402353136e-06, "loss": 0.2843, "step": 5120 }, { "epoch": 0.10442748091603053, "grad_norm": 14.514011200072654, "learning_rate": 2.0885071041810854e-06, "loss": 0.2056, "step": 5130 }, { "epoch": 0.10463104325699746, "grad_norm": 18.230291144644344, "learning_rate": 2.0925782681268576e-06, "loss": 0.1871, "step": 5140 }, { "epoch": 0.10483460559796437, "grad_norm": 43.225050272170954, "learning_rate": 2.09664943207263e-06, "loss": 0.3011, "step": 5150 }, { "epoch": 0.1050381679389313, "grad_norm": 57.064435092039716, "learning_rate": 2.100720596018402e-06, "loss": 0.3351, "step": 5160 }, { "epoch": 0.10524173027989822, "grad_norm": 31.497978507743927, "learning_rate": 2.104791759964174e-06, "loss": 0.2315, "step": 5170 }, { "epoch": 0.10544529262086513, "grad_norm": 11.009272380962988, "learning_rate": 2.108862923909946e-06, "loss": 0.2705, "step": 5180 }, { "epoch": 0.10564885496183206, "grad_norm": 14.742657313849392, "learning_rate": 2.1129340878557182e-06, "loss": 0.1297, "step": 5190 }, { "epoch": 0.10585241730279898, "grad_norm": 50.23757120826895, "learning_rate": 2.1170052518014904e-06, "loss": 0.2086, "step": 5200 }, { "epoch": 0.1060559796437659, "grad_norm": 25.254343553167033, "learning_rate": 2.121076415747262e-06, "loss": 0.2964, "step": 5210 }, { "epoch": 0.10625954198473282, "grad_norm": 40.13798359914051, "learning_rate": 2.1251475796930344e-06, "loss": 0.2263, "step": 5220 }, { "epoch": 0.10646310432569975, "grad_norm": 64.07473594400997, "learning_rate": 2.1292187436388066e-06, "loss": 0.2773, "step": 5230 }, { "epoch": 0.10666666666666667, "grad_norm": 13.28159276162973, "learning_rate": 2.133289907584579e-06, "loss": 0.1678, "step": 5240 }, { "epoch": 0.10687022900763359, "grad_norm": 25.190962892777847, "learning_rate": 2.137361071530351e-06, "loss": 0.2769, "step": 5250 }, { "epoch": 0.10707379134860051, "grad_norm": 31.99503355050341, "learning_rate": 2.141432235476123e-06, "loss": 0.2456, "step": 5260 }, { "epoch": 0.10727735368956744, "grad_norm": 21.534833467840162, "learning_rate": 2.145503399421895e-06, "loss": 0.3114, "step": 5270 }, { "epoch": 0.10748091603053435, "grad_norm": 15.190776979414608, "learning_rate": 2.1495745633676672e-06, "loss": 0.1665, "step": 5280 }, { "epoch": 0.10768447837150127, "grad_norm": 17.893592636976912, "learning_rate": 2.153645727313439e-06, "loss": 0.2722, "step": 5290 }, { "epoch": 0.1078880407124682, "grad_norm": 15.39746552982832, "learning_rate": 2.1577168912592112e-06, "loss": 0.2183, "step": 5300 }, { "epoch": 0.10809160305343511, "grad_norm": 29.11319225595952, "learning_rate": 2.1617880552049834e-06, "loss": 0.292, "step": 5310 }, { "epoch": 0.10829516539440204, "grad_norm": 6.548711663473555, "learning_rate": 2.1658592191507556e-06, "loss": 0.2447, "step": 5320 }, { "epoch": 0.10849872773536896, "grad_norm": 20.880742968385146, "learning_rate": 2.1699303830965274e-06, "loss": 0.2312, "step": 5330 }, { "epoch": 0.10870229007633588, "grad_norm": 36.39195467013411, "learning_rate": 2.1740015470422996e-06, "loss": 0.3524, "step": 5340 }, { "epoch": 0.1089058524173028, "grad_norm": 28.504852070450557, "learning_rate": 2.178072710988072e-06, "loss": 0.2199, "step": 5350 }, { "epoch": 0.10910941475826973, "grad_norm": 25.407091046648368, "learning_rate": 2.182143874933844e-06, "loss": 0.3418, "step": 5360 }, { "epoch": 0.10931297709923664, "grad_norm": 19.235518608470205, "learning_rate": 2.186215038879616e-06, "loss": 0.1836, "step": 5370 }, { "epoch": 0.10951653944020356, "grad_norm": 40.65289780517672, "learning_rate": 2.190286202825388e-06, "loss": 0.2523, "step": 5380 }, { "epoch": 0.10972010178117049, "grad_norm": 20.09588933581342, "learning_rate": 2.1943573667711602e-06, "loss": 0.2063, "step": 5390 }, { "epoch": 0.1099236641221374, "grad_norm": 23.549009415488996, "learning_rate": 2.198428530716932e-06, "loss": 0.3501, "step": 5400 }, { "epoch": 0.11012722646310433, "grad_norm": 29.703779656618, "learning_rate": 2.2024996946627042e-06, "loss": 0.3956, "step": 5410 }, { "epoch": 0.11033078880407125, "grad_norm": 34.25018041524416, "learning_rate": 2.2065708586084764e-06, "loss": 0.2632, "step": 5420 }, { "epoch": 0.11053435114503816, "grad_norm": 17.470288719285186, "learning_rate": 2.2106420225542486e-06, "loss": 0.2087, "step": 5430 }, { "epoch": 0.11073791348600509, "grad_norm": 21.902194608985056, "learning_rate": 2.2147131865000204e-06, "loss": 0.2763, "step": 5440 }, { "epoch": 0.11094147582697202, "grad_norm": 17.279754706849772, "learning_rate": 2.2187843504457926e-06, "loss": 0.2819, "step": 5450 }, { "epoch": 0.11114503816793893, "grad_norm": 18.301365477022692, "learning_rate": 2.222855514391565e-06, "loss": 0.2423, "step": 5460 }, { "epoch": 0.11134860050890585, "grad_norm": 11.428837221784416, "learning_rate": 2.226926678337337e-06, "loss": 0.203, "step": 5470 }, { "epoch": 0.11155216284987278, "grad_norm": 44.83288518821388, "learning_rate": 2.230997842283109e-06, "loss": 0.2945, "step": 5480 }, { "epoch": 0.11175572519083969, "grad_norm": 18.251067365529646, "learning_rate": 2.235069006228881e-06, "loss": 0.2514, "step": 5490 }, { "epoch": 0.11195928753180662, "grad_norm": 14.698049951948132, "learning_rate": 2.2391401701746532e-06, "loss": 0.178, "step": 5500 }, { "epoch": 0.11216284987277354, "grad_norm": 12.508646215812291, "learning_rate": 2.243211334120425e-06, "loss": 0.2965, "step": 5510 }, { "epoch": 0.11236641221374045, "grad_norm": 42.96848814549963, "learning_rate": 2.2472824980661972e-06, "loss": 0.2781, "step": 5520 }, { "epoch": 0.11256997455470738, "grad_norm": 14.749839884734339, "learning_rate": 2.2513536620119694e-06, "loss": 0.2235, "step": 5530 }, { "epoch": 0.1127735368956743, "grad_norm": 22.73573130638087, "learning_rate": 2.2554248259577416e-06, "loss": 0.3119, "step": 5540 }, { "epoch": 0.11297709923664122, "grad_norm": 16.519733991258445, "learning_rate": 2.2594959899035134e-06, "loss": 0.2155, "step": 5550 }, { "epoch": 0.11318066157760814, "grad_norm": 22.98582268287872, "learning_rate": 2.2635671538492856e-06, "loss": 0.1932, "step": 5560 }, { "epoch": 0.11338422391857507, "grad_norm": 28.2944999458975, "learning_rate": 2.267638317795058e-06, "loss": 0.226, "step": 5570 }, { "epoch": 0.11358778625954198, "grad_norm": 20.30655176374684, "learning_rate": 2.27170948174083e-06, "loss": 0.2334, "step": 5580 }, { "epoch": 0.11379134860050891, "grad_norm": 21.284089683366457, "learning_rate": 2.275780645686602e-06, "loss": 0.3099, "step": 5590 }, { "epoch": 0.11399491094147583, "grad_norm": 14.616846470953334, "learning_rate": 2.279851809632374e-06, "loss": 0.2473, "step": 5600 }, { "epoch": 0.11419847328244274, "grad_norm": 9.37064699436701, "learning_rate": 2.2839229735781462e-06, "loss": 0.249, "step": 5610 }, { "epoch": 0.11440203562340967, "grad_norm": 31.62444019692543, "learning_rate": 2.2879941375239184e-06, "loss": 0.2421, "step": 5620 }, { "epoch": 0.1146055979643766, "grad_norm": 10.175408423983198, "learning_rate": 2.29206530146969e-06, "loss": 0.3329, "step": 5630 }, { "epoch": 0.11480916030534351, "grad_norm": 26.304867835629533, "learning_rate": 2.2961364654154624e-06, "loss": 0.3113, "step": 5640 }, { "epoch": 0.11501272264631043, "grad_norm": 47.30209887944391, "learning_rate": 2.3002076293612346e-06, "loss": 0.2385, "step": 5650 }, { "epoch": 0.11521628498727736, "grad_norm": 17.048043659909204, "learning_rate": 2.3042787933070064e-06, "loss": 0.2165, "step": 5660 }, { "epoch": 0.11541984732824427, "grad_norm": 34.958664794439784, "learning_rate": 2.3083499572527786e-06, "loss": 0.2515, "step": 5670 }, { "epoch": 0.1156234096692112, "grad_norm": 41.863736964955095, "learning_rate": 2.312421121198551e-06, "loss": 0.2466, "step": 5680 }, { "epoch": 0.11582697201017812, "grad_norm": 24.13255293032183, "learning_rate": 2.316492285144323e-06, "loss": 0.1945, "step": 5690 }, { "epoch": 0.11603053435114503, "grad_norm": 29.40499018925057, "learning_rate": 2.320563449090095e-06, "loss": 0.227, "step": 5700 }, { "epoch": 0.11623409669211196, "grad_norm": 11.911579035519937, "learning_rate": 2.324634613035867e-06, "loss": 0.2664, "step": 5710 }, { "epoch": 0.11643765903307889, "grad_norm": 6.160723373033507, "learning_rate": 2.3287057769816392e-06, "loss": 0.2982, "step": 5720 }, { "epoch": 0.1166412213740458, "grad_norm": 14.117158502971492, "learning_rate": 2.3327769409274114e-06, "loss": 0.2661, "step": 5730 }, { "epoch": 0.11684478371501272, "grad_norm": 8.474076958991649, "learning_rate": 2.336848104873183e-06, "loss": 0.2121, "step": 5740 }, { "epoch": 0.11704834605597965, "grad_norm": 15.235299548971831, "learning_rate": 2.3409192688189554e-06, "loss": 0.2504, "step": 5750 }, { "epoch": 0.11725190839694656, "grad_norm": 41.69907663481582, "learning_rate": 2.3449904327647276e-06, "loss": 0.2932, "step": 5760 }, { "epoch": 0.11745547073791349, "grad_norm": 16.59736929601554, "learning_rate": 2.3490615967104994e-06, "loss": 0.2275, "step": 5770 }, { "epoch": 0.11765903307888041, "grad_norm": 31.77956378620989, "learning_rate": 2.3531327606562716e-06, "loss": 0.2873, "step": 5780 }, { "epoch": 0.11786259541984732, "grad_norm": 36.479145762959654, "learning_rate": 2.357203924602044e-06, "loss": 0.2072, "step": 5790 }, { "epoch": 0.11806615776081425, "grad_norm": 70.55610347043253, "learning_rate": 2.361275088547816e-06, "loss": 0.363, "step": 5800 }, { "epoch": 0.11826972010178118, "grad_norm": 35.073567210210264, "learning_rate": 2.3653462524935882e-06, "loss": 0.2638, "step": 5810 }, { "epoch": 0.11847328244274809, "grad_norm": 0.4918459683287509, "learning_rate": 2.36941741643936e-06, "loss": 0.2173, "step": 5820 }, { "epoch": 0.11867684478371501, "grad_norm": 17.348714626661263, "learning_rate": 2.3734885803851322e-06, "loss": 0.3196, "step": 5830 }, { "epoch": 0.11888040712468194, "grad_norm": 15.006856361081967, "learning_rate": 2.3775597443309044e-06, "loss": 0.2164, "step": 5840 }, { "epoch": 0.11908396946564885, "grad_norm": 9.45994078503315, "learning_rate": 2.3816309082766766e-06, "loss": 0.2249, "step": 5850 }, { "epoch": 0.11928753180661578, "grad_norm": 15.245146762565366, "learning_rate": 2.3857020722224484e-06, "loss": 0.2363, "step": 5860 }, { "epoch": 0.1194910941475827, "grad_norm": 23.63272388246353, "learning_rate": 2.3897732361682206e-06, "loss": 0.2104, "step": 5870 }, { "epoch": 0.11969465648854961, "grad_norm": 4.736332498675408, "learning_rate": 2.393844400113993e-06, "loss": 0.1745, "step": 5880 }, { "epoch": 0.11989821882951654, "grad_norm": 38.31373347013575, "learning_rate": 2.397915564059765e-06, "loss": 0.3145, "step": 5890 }, { "epoch": 0.12010178117048347, "grad_norm": 42.882828141457225, "learning_rate": 2.401986728005537e-06, "loss": 0.3682, "step": 5900 }, { "epoch": 0.12030534351145038, "grad_norm": 33.375240255137555, "learning_rate": 2.406057891951309e-06, "loss": 0.2329, "step": 5910 }, { "epoch": 0.1205089058524173, "grad_norm": 13.593997072883782, "learning_rate": 2.4101290558970812e-06, "loss": 0.2784, "step": 5920 }, { "epoch": 0.12071246819338423, "grad_norm": 40.775998334600544, "learning_rate": 2.4142002198428534e-06, "loss": 0.3232, "step": 5930 }, { "epoch": 0.12091603053435114, "grad_norm": 11.466200727198792, "learning_rate": 2.418271383788625e-06, "loss": 0.2696, "step": 5940 }, { "epoch": 0.12111959287531807, "grad_norm": 19.85401537539843, "learning_rate": 2.4223425477343974e-06, "loss": 0.2718, "step": 5950 }, { "epoch": 0.12132315521628499, "grad_norm": 43.2351125021208, "learning_rate": 2.4264137116801696e-06, "loss": 0.2098, "step": 5960 }, { "epoch": 0.1215267175572519, "grad_norm": 16.44354526920455, "learning_rate": 2.430484875625942e-06, "loss": 0.2248, "step": 5970 }, { "epoch": 0.12173027989821883, "grad_norm": 27.065039184409525, "learning_rate": 2.434556039571714e-06, "loss": 0.2783, "step": 5980 }, { "epoch": 0.12193384223918576, "grad_norm": 31.66790881612453, "learning_rate": 2.438627203517486e-06, "loss": 0.2529, "step": 5990 }, { "epoch": 0.12213740458015267, "grad_norm": 18.196269070556358, "learning_rate": 2.442698367463258e-06, "loss": 0.1937, "step": 6000 }, { "epoch": 0.12234096692111959, "grad_norm": 10.30546281626025, "learning_rate": 2.4467695314090302e-06, "loss": 0.2238, "step": 6010 }, { "epoch": 0.12254452926208652, "grad_norm": 23.532275327150817, "learning_rate": 2.4508406953548024e-06, "loss": 0.3562, "step": 6020 }, { "epoch": 0.12274809160305343, "grad_norm": 36.32460047442023, "learning_rate": 2.4549118593005742e-06, "loss": 0.2032, "step": 6030 }, { "epoch": 0.12295165394402036, "grad_norm": 17.132813324009902, "learning_rate": 2.4589830232463464e-06, "loss": 0.2172, "step": 6040 }, { "epoch": 0.12315521628498728, "grad_norm": 26.35100935250295, "learning_rate": 2.4630541871921186e-06, "loss": 0.267, "step": 6050 }, { "epoch": 0.1233587786259542, "grad_norm": 40.23688106289657, "learning_rate": 2.467125351137891e-06, "loss": 0.2953, "step": 6060 }, { "epoch": 0.12356234096692112, "grad_norm": 13.817545328088379, "learning_rate": 2.4711965150836626e-06, "loss": 0.2182, "step": 6070 }, { "epoch": 0.12376590330788804, "grad_norm": 15.254867669425527, "learning_rate": 2.475267679029435e-06, "loss": 0.1698, "step": 6080 }, { "epoch": 0.12396946564885496, "grad_norm": 33.76325778202484, "learning_rate": 2.479338842975207e-06, "loss": 0.2489, "step": 6090 }, { "epoch": 0.12417302798982188, "grad_norm": 35.41981179205174, "learning_rate": 2.483410006920979e-06, "loss": 0.3111, "step": 6100 }, { "epoch": 0.12437659033078881, "grad_norm": 14.898919439423208, "learning_rate": 2.487481170866751e-06, "loss": 0.2666, "step": 6110 }, { "epoch": 0.12458015267175572, "grad_norm": 13.533098209355673, "learning_rate": 2.4915523348125232e-06, "loss": 0.33, "step": 6120 }, { "epoch": 0.12478371501272265, "grad_norm": 46.19084904874355, "learning_rate": 2.4956234987582954e-06, "loss": 0.2531, "step": 6130 }, { "epoch": 0.12498727735368957, "grad_norm": 41.32595626044249, "learning_rate": 2.4996946627040672e-06, "loss": 0.2525, "step": 6140 }, { "epoch": 0.1251908396946565, "grad_norm": 19.030372655768094, "learning_rate": 2.5037658266498394e-06, "loss": 0.1841, "step": 6150 }, { "epoch": 0.1253944020356234, "grad_norm": 9.14026656668428, "learning_rate": 2.507836990595611e-06, "loss": 0.2057, "step": 6160 }, { "epoch": 0.12559796437659032, "grad_norm": 29.16893929819986, "learning_rate": 2.511908154541384e-06, "loss": 0.3065, "step": 6170 }, { "epoch": 0.12580152671755726, "grad_norm": 49.812099136795965, "learning_rate": 2.5159793184871556e-06, "loss": 0.2283, "step": 6180 }, { "epoch": 0.12600508905852417, "grad_norm": 26.140196215342424, "learning_rate": 2.5200504824329282e-06, "loss": 0.2719, "step": 6190 }, { "epoch": 0.12620865139949108, "grad_norm": 39.00739296708633, "learning_rate": 2.5241216463787e-06, "loss": 0.2254, "step": 6200 }, { "epoch": 0.12641221374045802, "grad_norm": 28.850604580877484, "learning_rate": 2.528192810324472e-06, "loss": 0.2823, "step": 6210 }, { "epoch": 0.12661577608142494, "grad_norm": 63.653205841807214, "learning_rate": 2.5322639742702444e-06, "loss": 0.3013, "step": 6220 }, { "epoch": 0.12681933842239185, "grad_norm": 25.418070128705768, "learning_rate": 2.5363351382160162e-06, "loss": 0.2989, "step": 6230 }, { "epoch": 0.1270229007633588, "grad_norm": 82.65803817888654, "learning_rate": 2.540406302161788e-06, "loss": 0.2764, "step": 6240 }, { "epoch": 0.1272264631043257, "grad_norm": 5.519942348711346, "learning_rate": 2.5444774661075606e-06, "loss": 0.2195, "step": 6250 }, { "epoch": 0.1274300254452926, "grad_norm": 34.2643227826083, "learning_rate": 2.5485486300533324e-06, "loss": 0.2578, "step": 6260 }, { "epoch": 0.12763358778625955, "grad_norm": 47.292732584621874, "learning_rate": 2.552619793999104e-06, "loss": 0.3188, "step": 6270 }, { "epoch": 0.12783715012722646, "grad_norm": 23.014485466047912, "learning_rate": 2.556690957944877e-06, "loss": 0.2162, "step": 6280 }, { "epoch": 0.12804071246819337, "grad_norm": 20.346854956599415, "learning_rate": 2.5607621218906486e-06, "loss": 0.2156, "step": 6290 }, { "epoch": 0.1282442748091603, "grad_norm": 30.24593606677829, "learning_rate": 2.5648332858364212e-06, "loss": 0.3277, "step": 6300 }, { "epoch": 0.12844783715012723, "grad_norm": 18.134588752981557, "learning_rate": 2.568904449782193e-06, "loss": 0.192, "step": 6310 }, { "epoch": 0.12865139949109414, "grad_norm": 57.72278974689008, "learning_rate": 2.572975613727965e-06, "loss": 0.2709, "step": 6320 }, { "epoch": 0.12885496183206108, "grad_norm": 20.239619463254428, "learning_rate": 2.5770467776737374e-06, "loss": 0.251, "step": 6330 }, { "epoch": 0.129058524173028, "grad_norm": 16.63174850557366, "learning_rate": 2.5811179416195092e-06, "loss": 0.2891, "step": 6340 }, { "epoch": 0.1292620865139949, "grad_norm": 9.559192040853663, "learning_rate": 2.585189105565281e-06, "loss": 0.2044, "step": 6350 }, { "epoch": 0.12946564885496184, "grad_norm": 26.899564282860446, "learning_rate": 2.5892602695110536e-06, "loss": 0.2677, "step": 6360 }, { "epoch": 0.12966921119592875, "grad_norm": 46.60399699871743, "learning_rate": 2.5933314334568254e-06, "loss": 0.2617, "step": 6370 }, { "epoch": 0.12987277353689566, "grad_norm": 27.55244513939173, "learning_rate": 2.597402597402597e-06, "loss": 0.2213, "step": 6380 }, { "epoch": 0.1300763358778626, "grad_norm": 10.782734509471812, "learning_rate": 2.60147376134837e-06, "loss": 0.2776, "step": 6390 }, { "epoch": 0.13027989821882952, "grad_norm": 29.42045100856112, "learning_rate": 2.6055449252941416e-06, "loss": 0.2683, "step": 6400 }, { "epoch": 0.13048346055979643, "grad_norm": 38.452651797422064, "learning_rate": 2.6096160892399142e-06, "loss": 0.288, "step": 6410 }, { "epoch": 0.13068702290076337, "grad_norm": 41.49417209946768, "learning_rate": 2.613687253185686e-06, "loss": 0.2758, "step": 6420 }, { "epoch": 0.13089058524173028, "grad_norm": 18.639477277031965, "learning_rate": 2.617758417131458e-06, "loss": 0.3903, "step": 6430 }, { "epoch": 0.1310941475826972, "grad_norm": 31.587498038967887, "learning_rate": 2.6218295810772304e-06, "loss": 0.2101, "step": 6440 }, { "epoch": 0.13129770992366413, "grad_norm": 13.663587210624584, "learning_rate": 2.6259007450230022e-06, "loss": 0.31, "step": 6450 }, { "epoch": 0.13150127226463104, "grad_norm": 16.8554663204173, "learning_rate": 2.629971908968774e-06, "loss": 0.281, "step": 6460 }, { "epoch": 0.13170483460559795, "grad_norm": 7.040685880669882, "learning_rate": 2.6340430729145466e-06, "loss": 0.2687, "step": 6470 }, { "epoch": 0.1319083969465649, "grad_norm": 21.560107112162985, "learning_rate": 2.6381142368603184e-06, "loss": 0.2649, "step": 6480 }, { "epoch": 0.1321119592875318, "grad_norm": 8.899226104482096, "learning_rate": 2.6421854008060906e-06, "loss": 0.2794, "step": 6490 }, { "epoch": 0.13231552162849872, "grad_norm": 26.870783586025883, "learning_rate": 2.646256564751863e-06, "loss": 0.247, "step": 6500 }, { "epoch": 0.13251908396946566, "grad_norm": 29.49175556034283, "learning_rate": 2.6503277286976346e-06, "loss": 0.277, "step": 6510 }, { "epoch": 0.13272264631043257, "grad_norm": 7.934355065337781, "learning_rate": 2.6543988926434072e-06, "loss": 0.2741, "step": 6520 }, { "epoch": 0.13292620865139948, "grad_norm": 53.34164050266494, "learning_rate": 2.658470056589179e-06, "loss": 0.2027, "step": 6530 }, { "epoch": 0.13312977099236642, "grad_norm": 75.67983718430143, "learning_rate": 2.662541220534951e-06, "loss": 0.2599, "step": 6540 }, { "epoch": 0.13333333333333333, "grad_norm": 20.703340085805195, "learning_rate": 2.6666123844807234e-06, "loss": 0.2381, "step": 6550 }, { "epoch": 0.13353689567430024, "grad_norm": 9.387362594979615, "learning_rate": 2.6706835484264952e-06, "loss": 0.1947, "step": 6560 }, { "epoch": 0.13374045801526718, "grad_norm": 28.633874600743553, "learning_rate": 2.6747547123722674e-06, "loss": 0.2266, "step": 6570 }, { "epoch": 0.1339440203562341, "grad_norm": 19.632485353307377, "learning_rate": 2.6788258763180396e-06, "loss": 0.2639, "step": 6580 }, { "epoch": 0.134147582697201, "grad_norm": 1.3830012035195178, "learning_rate": 2.6828970402638114e-06, "loss": 0.189, "step": 6590 }, { "epoch": 0.13435114503816795, "grad_norm": 22.58816527286994, "learning_rate": 2.6869682042095836e-06, "loss": 0.2239, "step": 6600 }, { "epoch": 0.13455470737913486, "grad_norm": 12.215738762794587, "learning_rate": 2.691039368155356e-06, "loss": 0.1978, "step": 6610 }, { "epoch": 0.13475826972010177, "grad_norm": 24.263580202042878, "learning_rate": 2.695110532101128e-06, "loss": 0.363, "step": 6620 }, { "epoch": 0.1349618320610687, "grad_norm": 39.29651789972685, "learning_rate": 2.6991816960469002e-06, "loss": 0.2113, "step": 6630 }, { "epoch": 0.13516539440203562, "grad_norm": 25.329064492447696, "learning_rate": 2.703252859992672e-06, "loss": 0.2731, "step": 6640 }, { "epoch": 0.13536895674300253, "grad_norm": 13.098072785988316, "learning_rate": 2.7073240239384442e-06, "loss": 0.3231, "step": 6650 }, { "epoch": 0.13557251908396947, "grad_norm": 9.604144558338062, "learning_rate": 2.7113951878842164e-06, "loss": 0.2612, "step": 6660 }, { "epoch": 0.13577608142493638, "grad_norm": 20.645972340795396, "learning_rate": 2.715466351829988e-06, "loss": 0.1794, "step": 6670 }, { "epoch": 0.1359796437659033, "grad_norm": 21.865552449399342, "learning_rate": 2.7195375157757604e-06, "loss": 0.2161, "step": 6680 }, { "epoch": 0.13618320610687024, "grad_norm": 47.85489724511346, "learning_rate": 2.7236086797215326e-06, "loss": 0.2558, "step": 6690 }, { "epoch": 0.13638676844783715, "grad_norm": 37.08382527266949, "learning_rate": 2.727679843667305e-06, "loss": 0.3157, "step": 6700 }, { "epoch": 0.13659033078880406, "grad_norm": 3.5682074159366732, "learning_rate": 2.7317510076130766e-06, "loss": 0.2031, "step": 6710 }, { "epoch": 0.136793893129771, "grad_norm": 33.02549077755872, "learning_rate": 2.735822171558849e-06, "loss": 0.231, "step": 6720 }, { "epoch": 0.1369974554707379, "grad_norm": 16.515033067469574, "learning_rate": 2.739893335504621e-06, "loss": 0.2467, "step": 6730 }, { "epoch": 0.13720101781170482, "grad_norm": 14.611624249720899, "learning_rate": 2.7439644994503932e-06, "loss": 0.292, "step": 6740 }, { "epoch": 0.13740458015267176, "grad_norm": 10.35124137272135, "learning_rate": 2.7480356633961654e-06, "loss": 0.2552, "step": 6750 }, { "epoch": 0.13760814249363867, "grad_norm": 7.486960720655562, "learning_rate": 2.7521068273419372e-06, "loss": 0.2852, "step": 6760 }, { "epoch": 0.1378117048346056, "grad_norm": 24.739162021745756, "learning_rate": 2.7561779912877094e-06, "loss": 0.2893, "step": 6770 }, { "epoch": 0.13801526717557253, "grad_norm": 8.29586198302137, "learning_rate": 2.7602491552334816e-06, "loss": 0.242, "step": 6780 }, { "epoch": 0.13821882951653944, "grad_norm": 161.1092399553621, "learning_rate": 2.7643203191792534e-06, "loss": 0.1605, "step": 6790 }, { "epoch": 0.13842239185750635, "grad_norm": 86.38674493719381, "learning_rate": 2.7683914831250256e-06, "loss": 0.313, "step": 6800 }, { "epoch": 0.1386259541984733, "grad_norm": 15.237435017006192, "learning_rate": 2.772462647070798e-06, "loss": 0.3949, "step": 6810 }, { "epoch": 0.1388295165394402, "grad_norm": 13.599566818627455, "learning_rate": 2.7765338110165696e-06, "loss": 0.1841, "step": 6820 }, { "epoch": 0.1390330788804071, "grad_norm": 24.51341097982621, "learning_rate": 2.7806049749623422e-06, "loss": 0.2546, "step": 6830 }, { "epoch": 0.13923664122137405, "grad_norm": 7.932083907422943, "learning_rate": 2.784676138908114e-06, "loss": 0.1788, "step": 6840 }, { "epoch": 0.13944020356234096, "grad_norm": 12.20393216764211, "learning_rate": 2.7887473028538862e-06, "loss": 0.2777, "step": 6850 }, { "epoch": 0.13964376590330788, "grad_norm": 15.713905081529878, "learning_rate": 2.7928184667996584e-06, "loss": 0.2985, "step": 6860 }, { "epoch": 0.13984732824427482, "grad_norm": 15.41733981090303, "learning_rate": 2.7968896307454302e-06, "loss": 0.2568, "step": 6870 }, { "epoch": 0.14005089058524173, "grad_norm": 19.710614693013166, "learning_rate": 2.800960794691203e-06, "loss": 0.2732, "step": 6880 }, { "epoch": 0.14025445292620864, "grad_norm": 8.914628178504074, "learning_rate": 2.8050319586369746e-06, "loss": 0.2697, "step": 6890 }, { "epoch": 0.14045801526717558, "grad_norm": 14.697570034820465, "learning_rate": 2.8091031225827464e-06, "loss": 0.3184, "step": 6900 }, { "epoch": 0.1406615776081425, "grad_norm": 29.429378577430466, "learning_rate": 2.813174286528519e-06, "loss": 0.2626, "step": 6910 }, { "epoch": 0.1408651399491094, "grad_norm": 30.99090227977476, "learning_rate": 2.817245450474291e-06, "loss": 0.2735, "step": 6920 }, { "epoch": 0.14106870229007634, "grad_norm": 47.50960910699505, "learning_rate": 2.8213166144200626e-06, "loss": 0.266, "step": 6930 }, { "epoch": 0.14127226463104325, "grad_norm": 14.274471616691304, "learning_rate": 2.8253877783658352e-06, "loss": 0.3149, "step": 6940 }, { "epoch": 0.14147582697201017, "grad_norm": 20.462088008246806, "learning_rate": 2.829458942311607e-06, "loss": 0.2843, "step": 6950 }, { "epoch": 0.1416793893129771, "grad_norm": 10.098831036950328, "learning_rate": 2.8335301062573796e-06, "loss": 0.2528, "step": 6960 }, { "epoch": 0.14188295165394402, "grad_norm": 13.58083887265476, "learning_rate": 2.8376012702031514e-06, "loss": 0.3056, "step": 6970 }, { "epoch": 0.14208651399491093, "grad_norm": 29.733984661251355, "learning_rate": 2.841672434148923e-06, "loss": 0.281, "step": 6980 }, { "epoch": 0.14229007633587787, "grad_norm": 65.25099404469952, "learning_rate": 2.845743598094696e-06, "loss": 0.2099, "step": 6990 }, { "epoch": 0.14249363867684478, "grad_norm": 0.794593222394192, "learning_rate": 2.8498147620404676e-06, "loss": 0.2581, "step": 7000 }, { "epoch": 0.1426972010178117, "grad_norm": 18.05691803321156, "learning_rate": 2.8538859259862394e-06, "loss": 0.3344, "step": 7010 }, { "epoch": 0.14290076335877863, "grad_norm": 33.66855700851005, "learning_rate": 2.857957089932012e-06, "loss": 0.3173, "step": 7020 }, { "epoch": 0.14310432569974554, "grad_norm": 13.980421206239576, "learning_rate": 2.862028253877784e-06, "loss": 0.2944, "step": 7030 }, { "epoch": 0.14330788804071246, "grad_norm": 11.369204019315493, "learning_rate": 2.8660994178235556e-06, "loss": 0.3014, "step": 7040 }, { "epoch": 0.1435114503816794, "grad_norm": 20.25927589838859, "learning_rate": 2.8701705817693282e-06, "loss": 0.2908, "step": 7050 }, { "epoch": 0.1437150127226463, "grad_norm": 14.24102319205919, "learning_rate": 2.8742417457151e-06, "loss": 0.298, "step": 7060 }, { "epoch": 0.14391857506361322, "grad_norm": 14.588291527854471, "learning_rate": 2.8783129096608726e-06, "loss": 0.2559, "step": 7070 }, { "epoch": 0.14412213740458016, "grad_norm": 28.9412447481218, "learning_rate": 2.8823840736066444e-06, "loss": 0.2324, "step": 7080 }, { "epoch": 0.14432569974554707, "grad_norm": 30.51760205710315, "learning_rate": 2.886455237552416e-06, "loss": 0.2523, "step": 7090 }, { "epoch": 0.14452926208651398, "grad_norm": 23.959222563337004, "learning_rate": 2.890526401498189e-06, "loss": 0.268, "step": 7100 }, { "epoch": 0.14473282442748092, "grad_norm": 1.9036187603922496, "learning_rate": 2.8945975654439606e-06, "loss": 0.2543, "step": 7110 }, { "epoch": 0.14493638676844783, "grad_norm": 29.146252116695273, "learning_rate": 2.8986687293897324e-06, "loss": 0.2493, "step": 7120 }, { "epoch": 0.14513994910941475, "grad_norm": 9.514271848365654, "learning_rate": 2.902739893335505e-06, "loss": 0.2615, "step": 7130 }, { "epoch": 0.14534351145038168, "grad_norm": 68.58658084962184, "learning_rate": 2.906811057281277e-06, "loss": 0.2319, "step": 7140 }, { "epoch": 0.1455470737913486, "grad_norm": 41.78605784564809, "learning_rate": 2.9108822212270486e-06, "loss": 0.3751, "step": 7150 }, { "epoch": 0.1457506361323155, "grad_norm": 16.682877783366383, "learning_rate": 2.9149533851728212e-06, "loss": 0.3033, "step": 7160 }, { "epoch": 0.14595419847328245, "grad_norm": 15.937110222294724, "learning_rate": 2.919024549118593e-06, "loss": 0.1735, "step": 7170 }, { "epoch": 0.14615776081424936, "grad_norm": 14.014326176753643, "learning_rate": 2.9230957130643656e-06, "loss": 0.3763, "step": 7180 }, { "epoch": 0.14636132315521627, "grad_norm": 14.456745598445893, "learning_rate": 2.9271668770101374e-06, "loss": 0.2513, "step": 7190 }, { "epoch": 0.1465648854961832, "grad_norm": 9.587156497737693, "learning_rate": 2.931238040955909e-06, "loss": 0.261, "step": 7200 }, { "epoch": 0.14676844783715012, "grad_norm": 25.663129719107246, "learning_rate": 2.935309204901682e-06, "loss": 0.2182, "step": 7210 }, { "epoch": 0.14697201017811704, "grad_norm": 5.733519892409962, "learning_rate": 2.9393803688474536e-06, "loss": 0.2108, "step": 7220 }, { "epoch": 0.14717557251908397, "grad_norm": 14.302022660270989, "learning_rate": 2.9434515327932254e-06, "loss": 0.2805, "step": 7230 }, { "epoch": 0.1473791348600509, "grad_norm": 7.96811449867533, "learning_rate": 2.947522696738998e-06, "loss": 0.2531, "step": 7240 }, { "epoch": 0.1475826972010178, "grad_norm": 12.029816325052234, "learning_rate": 2.95159386068477e-06, "loss": 0.3519, "step": 7250 }, { "epoch": 0.14778625954198474, "grad_norm": 22.569232604940595, "learning_rate": 2.9556650246305424e-06, "loss": 0.2063, "step": 7260 }, { "epoch": 0.14798982188295165, "grad_norm": 8.993303663556873, "learning_rate": 2.9597361885763142e-06, "loss": 0.2157, "step": 7270 }, { "epoch": 0.14819338422391856, "grad_norm": 33.37455687166376, "learning_rate": 2.963807352522086e-06, "loss": 0.2157, "step": 7280 }, { "epoch": 0.1483969465648855, "grad_norm": 36.18158722031923, "learning_rate": 2.9678785164678586e-06, "loss": 0.2795, "step": 7290 }, { "epoch": 0.1486005089058524, "grad_norm": 60.674072865933624, "learning_rate": 2.9719496804136304e-06, "loss": 0.3208, "step": 7300 }, { "epoch": 0.14880407124681932, "grad_norm": 14.51167410190112, "learning_rate": 2.9760208443594026e-06, "loss": 0.3027, "step": 7310 }, { "epoch": 0.14900763358778626, "grad_norm": 19.87235567471656, "learning_rate": 2.980092008305175e-06, "loss": 0.2855, "step": 7320 }, { "epoch": 0.14921119592875318, "grad_norm": 30.35775237973352, "learning_rate": 2.9841631722509466e-06, "loss": 0.244, "step": 7330 }, { "epoch": 0.1494147582697201, "grad_norm": 16.14345640200835, "learning_rate": 2.988234336196719e-06, "loss": 0.2069, "step": 7340 }, { "epoch": 0.14961832061068703, "grad_norm": 10.921047588557586, "learning_rate": 2.992305500142491e-06, "loss": 0.307, "step": 7350 }, { "epoch": 0.14982188295165394, "grad_norm": 10.115234637416062, "learning_rate": 2.996376664088263e-06, "loss": 0.2158, "step": 7360 }, { "epoch": 0.15002544529262085, "grad_norm": 16.014600414589488, "learning_rate": 3.0004478280340354e-06, "loss": 0.2785, "step": 7370 }, { "epoch": 0.1502290076335878, "grad_norm": 10.42739238233724, "learning_rate": 3.0045189919798072e-06, "loss": 0.2684, "step": 7380 }, { "epoch": 0.1504325699745547, "grad_norm": 12.51199031914803, "learning_rate": 3.0085901559255794e-06, "loss": 0.2657, "step": 7390 }, { "epoch": 0.15063613231552161, "grad_norm": 15.895293455242703, "learning_rate": 3.0126613198713516e-06, "loss": 0.2542, "step": 7400 }, { "epoch": 0.15083969465648855, "grad_norm": 8.381814667052756, "learning_rate": 3.0167324838171234e-06, "loss": 0.2541, "step": 7410 }, { "epoch": 0.15104325699745547, "grad_norm": 10.615501123921106, "learning_rate": 3.0208036477628956e-06, "loss": 0.3125, "step": 7420 }, { "epoch": 0.15124681933842238, "grad_norm": 42.43345769756911, "learning_rate": 3.024874811708668e-06, "loss": 0.2334, "step": 7430 }, { "epoch": 0.15145038167938932, "grad_norm": 25.63309787720963, "learning_rate": 3.0289459756544396e-06, "loss": 0.3103, "step": 7440 }, { "epoch": 0.15165394402035623, "grad_norm": 11.561623226556849, "learning_rate": 3.033017139600212e-06, "loss": 0.2535, "step": 7450 }, { "epoch": 0.15185750636132317, "grad_norm": 54.20913206912375, "learning_rate": 3.037088303545984e-06, "loss": 0.2471, "step": 7460 }, { "epoch": 0.15206106870229008, "grad_norm": 22.9101656784454, "learning_rate": 3.0411594674917562e-06, "loss": 0.2032, "step": 7470 }, { "epoch": 0.152264631043257, "grad_norm": 24.505787162020955, "learning_rate": 3.0452306314375284e-06, "loss": 0.2906, "step": 7480 }, { "epoch": 0.15246819338422393, "grad_norm": 17.896722960116996, "learning_rate": 3.0493017953833002e-06, "loss": 0.2203, "step": 7490 }, { "epoch": 0.15267175572519084, "grad_norm": 11.172653719431326, "learning_rate": 3.0533729593290724e-06, "loss": 0.238, "step": 7500 }, { "epoch": 0.15287531806615776, "grad_norm": 16.43095832595499, "learning_rate": 3.0574441232748446e-06, "loss": 0.209, "step": 7510 }, { "epoch": 0.1530788804071247, "grad_norm": 38.85032284034495, "learning_rate": 3.061515287220617e-06, "loss": 0.3645, "step": 7520 }, { "epoch": 0.1532824427480916, "grad_norm": 20.69942554340257, "learning_rate": 3.0655864511663886e-06, "loss": 0.3597, "step": 7530 }, { "epoch": 0.15348600508905852, "grad_norm": 13.827573770506826, "learning_rate": 3.069657615112161e-06, "loss": 0.2474, "step": 7540 }, { "epoch": 0.15368956743002546, "grad_norm": 12.260284552391026, "learning_rate": 3.073728779057933e-06, "loss": 0.2492, "step": 7550 }, { "epoch": 0.15389312977099237, "grad_norm": 21.605534554243544, "learning_rate": 3.077799943003705e-06, "loss": 0.2913, "step": 7560 }, { "epoch": 0.15409669211195928, "grad_norm": 28.37463398159592, "learning_rate": 3.081871106949477e-06, "loss": 0.2411, "step": 7570 }, { "epoch": 0.15430025445292622, "grad_norm": 22.83502460984263, "learning_rate": 3.0859422708952492e-06, "loss": 0.2526, "step": 7580 }, { "epoch": 0.15450381679389313, "grad_norm": 15.513708831583743, "learning_rate": 3.0900134348410214e-06, "loss": 0.2421, "step": 7590 }, { "epoch": 0.15470737913486005, "grad_norm": 20.18047571801603, "learning_rate": 3.0940845987867936e-06, "loss": 0.2543, "step": 7600 }, { "epoch": 0.15491094147582699, "grad_norm": 15.546230408504437, "learning_rate": 3.0981557627325654e-06, "loss": 0.2024, "step": 7610 }, { "epoch": 0.1551145038167939, "grad_norm": 10.426020116452069, "learning_rate": 3.1022269266783376e-06, "loss": 0.234, "step": 7620 }, { "epoch": 0.1553180661577608, "grad_norm": 20.311569282538603, "learning_rate": 3.10629809062411e-06, "loss": 0.3196, "step": 7630 }, { "epoch": 0.15552162849872775, "grad_norm": 11.17926500724303, "learning_rate": 3.1103692545698816e-06, "loss": 0.2265, "step": 7640 }, { "epoch": 0.15572519083969466, "grad_norm": 18.835468079605256, "learning_rate": 3.1144404185156542e-06, "loss": 0.3347, "step": 7650 }, { "epoch": 0.15592875318066157, "grad_norm": 9.874400597107202, "learning_rate": 3.118511582461426e-06, "loss": 0.1486, "step": 7660 }, { "epoch": 0.1561323155216285, "grad_norm": 11.975916187329233, "learning_rate": 3.122582746407198e-06, "loss": 0.2651, "step": 7670 }, { "epoch": 0.15633587786259542, "grad_norm": 27.903230931472716, "learning_rate": 3.1266539103529704e-06, "loss": 0.2616, "step": 7680 }, { "epoch": 0.15653944020356234, "grad_norm": 7.489845717259584, "learning_rate": 3.1307250742987422e-06, "loss": 0.1861, "step": 7690 }, { "epoch": 0.15674300254452928, "grad_norm": 10.239592761671588, "learning_rate": 3.1347962382445144e-06, "loss": 0.3135, "step": 7700 }, { "epoch": 0.1569465648854962, "grad_norm": 27.3094542559221, "learning_rate": 3.1388674021902866e-06, "loss": 0.3041, "step": 7710 }, { "epoch": 0.1571501272264631, "grad_norm": 14.890488138872188, "learning_rate": 3.1429385661360584e-06, "loss": 0.2688, "step": 7720 }, { "epoch": 0.15735368956743004, "grad_norm": 11.10223069650607, "learning_rate": 3.147009730081831e-06, "loss": 0.3056, "step": 7730 }, { "epoch": 0.15755725190839695, "grad_norm": 7.050129812917152, "learning_rate": 3.151080894027603e-06, "loss": 0.2709, "step": 7740 }, { "epoch": 0.15776081424936386, "grad_norm": 16.75121952945811, "learning_rate": 3.1551520579733746e-06, "loss": 0.2292, "step": 7750 }, { "epoch": 0.1579643765903308, "grad_norm": 0.48360809490260864, "learning_rate": 3.1592232219191472e-06, "loss": 0.2151, "step": 7760 }, { "epoch": 0.1581679389312977, "grad_norm": 6.845035612670533, "learning_rate": 3.163294385864919e-06, "loss": 0.2889, "step": 7770 }, { "epoch": 0.15837150127226463, "grad_norm": 20.630685897854416, "learning_rate": 3.167365549810691e-06, "loss": 0.2307, "step": 7780 }, { "epoch": 0.15857506361323156, "grad_norm": 14.136244253074016, "learning_rate": 3.1714367137564634e-06, "loss": 0.29, "step": 7790 }, { "epoch": 0.15877862595419848, "grad_norm": 18.507421853113268, "learning_rate": 3.1755078777022352e-06, "loss": 0.291, "step": 7800 }, { "epoch": 0.1589821882951654, "grad_norm": 24.60750579345035, "learning_rate": 3.179579041648008e-06, "loss": 0.2225, "step": 7810 }, { "epoch": 0.15918575063613233, "grad_norm": 2.9766865005532326, "learning_rate": 3.1836502055937796e-06, "loss": 0.2736, "step": 7820 }, { "epoch": 0.15938931297709924, "grad_norm": 18.359090688885214, "learning_rate": 3.1877213695395514e-06, "loss": 0.2704, "step": 7830 }, { "epoch": 0.15959287531806615, "grad_norm": 25.883504550761742, "learning_rate": 3.191792533485324e-06, "loss": 0.2958, "step": 7840 }, { "epoch": 0.1597964376590331, "grad_norm": 37.388460502004115, "learning_rate": 3.195863697431096e-06, "loss": 0.2111, "step": 7850 }, { "epoch": 0.16, "grad_norm": 37.589720160036535, "learning_rate": 3.1999348613768676e-06, "loss": 0.1992, "step": 7860 }, { "epoch": 0.16020356234096692, "grad_norm": 9.143257769616797, "learning_rate": 3.2040060253226402e-06, "loss": 0.2601, "step": 7870 }, { "epoch": 0.16040712468193385, "grad_norm": 20.68001513780142, "learning_rate": 3.208077189268412e-06, "loss": 0.2711, "step": 7880 }, { "epoch": 0.16061068702290077, "grad_norm": 6.888159952786166, "learning_rate": 3.212148353214184e-06, "loss": 0.2752, "step": 7890 }, { "epoch": 0.16081424936386768, "grad_norm": 9.236457044313275, "learning_rate": 3.2162195171599564e-06, "loss": 0.1888, "step": 7900 }, { "epoch": 0.16101781170483462, "grad_norm": 13.464257608090312, "learning_rate": 3.2202906811057282e-06, "loss": 0.252, "step": 7910 }, { "epoch": 0.16122137404580153, "grad_norm": 21.537781997950006, "learning_rate": 3.224361845051501e-06, "loss": 0.2675, "step": 7920 }, { "epoch": 0.16142493638676844, "grad_norm": 12.424685396211125, "learning_rate": 3.2284330089972726e-06, "loss": 0.2597, "step": 7930 }, { "epoch": 0.16162849872773538, "grad_norm": 44.81178018429336, "learning_rate": 3.2325041729430444e-06, "loss": 0.2141, "step": 7940 }, { "epoch": 0.1618320610687023, "grad_norm": 26.257147973711632, "learning_rate": 3.236575336888817e-06, "loss": 0.2421, "step": 7950 }, { "epoch": 0.1620356234096692, "grad_norm": 19.386691613539714, "learning_rate": 3.240646500834589e-06, "loss": 0.285, "step": 7960 }, { "epoch": 0.16223918575063614, "grad_norm": 7.820419043937768, "learning_rate": 3.2447176647803606e-06, "loss": 0.2741, "step": 7970 }, { "epoch": 0.16244274809160306, "grad_norm": 9.966053301865927, "learning_rate": 3.2487888287261332e-06, "loss": 0.239, "step": 7980 }, { "epoch": 0.16264631043256997, "grad_norm": 10.500445288450582, "learning_rate": 3.252859992671905e-06, "loss": 0.2222, "step": 7990 }, { "epoch": 0.1628498727735369, "grad_norm": 13.275826540891343, "learning_rate": 3.256931156617677e-06, "loss": 0.2173, "step": 8000 }, { "epoch": 0.16305343511450382, "grad_norm": 11.656269826319836, "learning_rate": 3.2610023205634494e-06, "loss": 0.2921, "step": 8010 }, { "epoch": 0.16325699745547073, "grad_norm": 7.982311015957967, "learning_rate": 3.2650734845092212e-06, "loss": 0.1904, "step": 8020 }, { "epoch": 0.16346055979643767, "grad_norm": 3.6852327689358457, "learning_rate": 3.269144648454994e-06, "loss": 0.3029, "step": 8030 }, { "epoch": 0.16366412213740458, "grad_norm": 19.895552109136982, "learning_rate": 3.2732158124007656e-06, "loss": 0.2924, "step": 8040 }, { "epoch": 0.1638676844783715, "grad_norm": 11.154919310617085, "learning_rate": 3.2772869763465374e-06, "loss": 0.2428, "step": 8050 }, { "epoch": 0.16407124681933843, "grad_norm": 24.521655344436137, "learning_rate": 3.28135814029231e-06, "loss": 0.3238, "step": 8060 }, { "epoch": 0.16427480916030535, "grad_norm": 29.048074521035105, "learning_rate": 3.285429304238082e-06, "loss": 0.2312, "step": 8070 }, { "epoch": 0.16447837150127226, "grad_norm": 13.419236273467419, "learning_rate": 3.289500468183854e-06, "loss": 0.3281, "step": 8080 }, { "epoch": 0.1646819338422392, "grad_norm": 20.7973647268404, "learning_rate": 3.2935716321296262e-06, "loss": 0.2874, "step": 8090 }, { "epoch": 0.1648854961832061, "grad_norm": 11.330562275173481, "learning_rate": 3.297642796075398e-06, "loss": 0.3134, "step": 8100 }, { "epoch": 0.16508905852417302, "grad_norm": 3.1512960471085307, "learning_rate": 3.3017139600211702e-06, "loss": 0.2352, "step": 8110 }, { "epoch": 0.16529262086513996, "grad_norm": 10.053443099964124, "learning_rate": 3.3057851239669424e-06, "loss": 0.1874, "step": 8120 }, { "epoch": 0.16549618320610687, "grad_norm": 29.16349099442916, "learning_rate": 3.309856287912714e-06, "loss": 0.3068, "step": 8130 }, { "epoch": 0.16569974554707378, "grad_norm": 17.066625662317897, "learning_rate": 3.313927451858487e-06, "loss": 0.2376, "step": 8140 }, { "epoch": 0.16590330788804072, "grad_norm": 11.109327303309389, "learning_rate": 3.3179986158042586e-06, "loss": 0.3446, "step": 8150 }, { "epoch": 0.16610687022900764, "grad_norm": 122.24263716289907, "learning_rate": 3.322069779750031e-06, "loss": 0.2135, "step": 8160 }, { "epoch": 0.16631043256997455, "grad_norm": 23.802058187969717, "learning_rate": 3.326140943695803e-06, "loss": 0.3382, "step": 8170 }, { "epoch": 0.1665139949109415, "grad_norm": 17.0863423764715, "learning_rate": 3.330212107641575e-06, "loss": 0.2377, "step": 8180 }, { "epoch": 0.1667175572519084, "grad_norm": 12.987654732984554, "learning_rate": 3.334283271587347e-06, "loss": 0.3578, "step": 8190 }, { "epoch": 0.1669211195928753, "grad_norm": 7.174961528594817, "learning_rate": 3.3383544355331192e-06, "loss": 0.2455, "step": 8200 }, { "epoch": 0.16712468193384225, "grad_norm": 12.841733719062953, "learning_rate": 3.3424255994788914e-06, "loss": 0.2629, "step": 8210 }, { "epoch": 0.16732824427480916, "grad_norm": 24.09948596394017, "learning_rate": 3.3464967634246632e-06, "loss": 0.2193, "step": 8220 }, { "epoch": 0.16753180661577607, "grad_norm": 19.366444186619464, "learning_rate": 3.3505679273704354e-06, "loss": 0.2993, "step": 8230 }, { "epoch": 0.16773536895674301, "grad_norm": 21.97052444881093, "learning_rate": 3.3546390913162076e-06, "loss": 0.208, "step": 8240 }, { "epoch": 0.16793893129770993, "grad_norm": 14.358021418713014, "learning_rate": 3.35871025526198e-06, "loss": 0.2212, "step": 8250 }, { "epoch": 0.16814249363867684, "grad_norm": 10.971480338608274, "learning_rate": 3.3627814192077516e-06, "loss": 0.2223, "step": 8260 }, { "epoch": 0.16834605597964378, "grad_norm": 19.594477911564496, "learning_rate": 3.366852583153524e-06, "loss": 0.3261, "step": 8270 }, { "epoch": 0.1685496183206107, "grad_norm": 28.758896938540158, "learning_rate": 3.370923747099296e-06, "loss": 0.208, "step": 8280 }, { "epoch": 0.1687531806615776, "grad_norm": 30.556937863433834, "learning_rate": 3.3749949110450682e-06, "loss": 0.2211, "step": 8290 }, { "epoch": 0.16895674300254454, "grad_norm": 21.9634898960589, "learning_rate": 3.37906607499084e-06, "loss": 0.2662, "step": 8300 }, { "epoch": 0.16916030534351145, "grad_norm": 3.8603510518129056, "learning_rate": 3.3831372389366122e-06, "loss": 0.2309, "step": 8310 }, { "epoch": 0.16936386768447836, "grad_norm": 3.7863464307156027, "learning_rate": 3.3872084028823844e-06, "loss": 0.1942, "step": 8320 }, { "epoch": 0.1695674300254453, "grad_norm": 8.700973324704236, "learning_rate": 3.3912795668281562e-06, "loss": 0.1768, "step": 8330 }, { "epoch": 0.16977099236641222, "grad_norm": 13.75562726142615, "learning_rate": 3.3953507307739284e-06, "loss": 0.2668, "step": 8340 }, { "epoch": 0.16997455470737913, "grad_norm": 59.44923408242978, "learning_rate": 3.3994218947197006e-06, "loss": 0.2709, "step": 8350 }, { "epoch": 0.17017811704834607, "grad_norm": 8.147450666444222, "learning_rate": 3.403493058665473e-06, "loss": 0.3325, "step": 8360 }, { "epoch": 0.17038167938931298, "grad_norm": 21.589430133841738, "learning_rate": 3.407564222611245e-06, "loss": 0.1925, "step": 8370 }, { "epoch": 0.1705852417302799, "grad_norm": 42.33040236245334, "learning_rate": 3.411635386557017e-06, "loss": 0.2632, "step": 8380 }, { "epoch": 0.17078880407124683, "grad_norm": 35.01666771377554, "learning_rate": 3.415706550502789e-06, "loss": 0.2875, "step": 8390 }, { "epoch": 0.17099236641221374, "grad_norm": 1.475213034200709, "learning_rate": 3.4197777144485612e-06, "loss": 0.2181, "step": 8400 }, { "epoch": 0.17119592875318065, "grad_norm": 29.44645123297371, "learning_rate": 3.423848878394333e-06, "loss": 0.3598, "step": 8410 }, { "epoch": 0.1713994910941476, "grad_norm": 23.695635509261567, "learning_rate": 3.4279200423401056e-06, "loss": 0.2778, "step": 8420 }, { "epoch": 0.1716030534351145, "grad_norm": 15.405885837259218, "learning_rate": 3.4319912062858774e-06, "loss": 0.2391, "step": 8430 }, { "epoch": 0.17180661577608142, "grad_norm": 15.851189062248315, "learning_rate": 3.436062370231649e-06, "loss": 0.2154, "step": 8440 }, { "epoch": 0.17201017811704836, "grad_norm": 11.551832274959141, "learning_rate": 3.440133534177422e-06, "loss": 0.257, "step": 8450 }, { "epoch": 0.17221374045801527, "grad_norm": 31.289467187507555, "learning_rate": 3.4442046981231936e-06, "loss": 0.3091, "step": 8460 }, { "epoch": 0.17241730279898218, "grad_norm": 37.87758457854313, "learning_rate": 3.448275862068966e-06, "loss": 0.2453, "step": 8470 }, { "epoch": 0.17262086513994912, "grad_norm": 32.21416286304782, "learning_rate": 3.452347026014738e-06, "loss": 0.4005, "step": 8480 }, { "epoch": 0.17282442748091603, "grad_norm": 20.369780244617324, "learning_rate": 3.45641818996051e-06, "loss": 0.3385, "step": 8490 }, { "epoch": 0.17302798982188294, "grad_norm": 26.3472221693956, "learning_rate": 3.4604893539062824e-06, "loss": 0.2367, "step": 8500 }, { "epoch": 0.17323155216284988, "grad_norm": 20.064176483961752, "learning_rate": 3.4645605178520542e-06, "loss": 0.3194, "step": 8510 }, { "epoch": 0.1734351145038168, "grad_norm": 22.862961844835038, "learning_rate": 3.468631681797826e-06, "loss": 0.2325, "step": 8520 }, { "epoch": 0.1736386768447837, "grad_norm": 9.93984619989582, "learning_rate": 3.4727028457435986e-06, "loss": 0.2249, "step": 8530 }, { "epoch": 0.17384223918575065, "grad_norm": 33.74750524359904, "learning_rate": 3.4767740096893704e-06, "loss": 0.3131, "step": 8540 }, { "epoch": 0.17404580152671756, "grad_norm": 14.75875016322053, "learning_rate": 3.480845173635143e-06, "loss": 0.2593, "step": 8550 }, { "epoch": 0.17424936386768447, "grad_norm": 12.042262149166962, "learning_rate": 3.484916337580915e-06, "loss": 0.1857, "step": 8560 }, { "epoch": 0.1744529262086514, "grad_norm": 63.132654134790684, "learning_rate": 3.4889875015266866e-06, "loss": 0.2695, "step": 8570 }, { "epoch": 0.17465648854961832, "grad_norm": 39.29092653362255, "learning_rate": 3.4930586654724592e-06, "loss": 0.2702, "step": 8580 }, { "epoch": 0.17486005089058523, "grad_norm": 20.28043031797764, "learning_rate": 3.497129829418231e-06, "loss": 0.2522, "step": 8590 }, { "epoch": 0.17506361323155217, "grad_norm": 46.63538475375591, "learning_rate": 3.501200993364003e-06, "loss": 0.2559, "step": 8600 }, { "epoch": 0.17526717557251908, "grad_norm": 16.364270946054514, "learning_rate": 3.5052721573097754e-06, "loss": 0.2569, "step": 8610 }, { "epoch": 0.175470737913486, "grad_norm": 14.241667517858586, "learning_rate": 3.5093433212555472e-06, "loss": 0.3414, "step": 8620 }, { "epoch": 0.17567430025445294, "grad_norm": 16.798491297404446, "learning_rate": 3.513414485201319e-06, "loss": 0.2714, "step": 8630 }, { "epoch": 0.17587786259541985, "grad_norm": 27.104055512161853, "learning_rate": 3.5174856491470916e-06, "loss": 0.2765, "step": 8640 }, { "epoch": 0.17608142493638676, "grad_norm": 15.12223316810803, "learning_rate": 3.5215568130928634e-06, "loss": 0.2586, "step": 8650 }, { "epoch": 0.1762849872773537, "grad_norm": 12.469094495477357, "learning_rate": 3.525627977038636e-06, "loss": 0.284, "step": 8660 }, { "epoch": 0.1764885496183206, "grad_norm": 9.516875796582443, "learning_rate": 3.529699140984408e-06, "loss": 0.3515, "step": 8670 }, { "epoch": 0.17669211195928752, "grad_norm": 6.525815421054568, "learning_rate": 3.5337703049301796e-06, "loss": 0.1995, "step": 8680 }, { "epoch": 0.17689567430025446, "grad_norm": 21.13741302498005, "learning_rate": 3.5378414688759522e-06, "loss": 0.1636, "step": 8690 }, { "epoch": 0.17709923664122137, "grad_norm": 11.477885276965411, "learning_rate": 3.541912632821724e-06, "loss": 0.2583, "step": 8700 }, { "epoch": 0.1773027989821883, "grad_norm": 42.383815068169774, "learning_rate": 3.545983796767496e-06, "loss": 0.2356, "step": 8710 }, { "epoch": 0.17750636132315523, "grad_norm": 14.391197437039066, "learning_rate": 3.5500549607132684e-06, "loss": 0.3586, "step": 8720 }, { "epoch": 0.17770992366412214, "grad_norm": 24.136068237225313, "learning_rate": 3.5541261246590402e-06, "loss": 0.3504, "step": 8730 }, { "epoch": 0.17791348600508905, "grad_norm": 9.666877822821355, "learning_rate": 3.558197288604812e-06, "loss": 0.1913, "step": 8740 }, { "epoch": 0.178117048346056, "grad_norm": 12.225986957827287, "learning_rate": 3.5622684525505846e-06, "loss": 0.2434, "step": 8750 }, { "epoch": 0.1783206106870229, "grad_norm": 19.983397260728832, "learning_rate": 3.5663396164963564e-06, "loss": 0.2216, "step": 8760 }, { "epoch": 0.1785241730279898, "grad_norm": 30.389418796888908, "learning_rate": 3.570410780442129e-06, "loss": 0.3091, "step": 8770 }, { "epoch": 0.17872773536895675, "grad_norm": 17.588581984848922, "learning_rate": 3.574481944387901e-06, "loss": 0.2151, "step": 8780 }, { "epoch": 0.17893129770992366, "grad_norm": 16.965260592808665, "learning_rate": 3.5785531083336726e-06, "loss": 0.2279, "step": 8790 }, { "epoch": 0.17913486005089058, "grad_norm": 21.478675884975015, "learning_rate": 3.5826242722794452e-06, "loss": 0.3519, "step": 8800 }, { "epoch": 0.17933842239185752, "grad_norm": 30.433981257434656, "learning_rate": 3.586695436225217e-06, "loss": 0.2574, "step": 8810 }, { "epoch": 0.17954198473282443, "grad_norm": 9.293555950177888, "learning_rate": 3.590766600170989e-06, "loss": 0.2198, "step": 8820 }, { "epoch": 0.17974554707379134, "grad_norm": 20.093156760177177, "learning_rate": 3.5948377641167614e-06, "loss": 0.2026, "step": 8830 }, { "epoch": 0.17994910941475828, "grad_norm": 8.936405290871903, "learning_rate": 3.5989089280625332e-06, "loss": 0.22, "step": 8840 }, { "epoch": 0.1801526717557252, "grad_norm": 0.14118442650608776, "learning_rate": 3.6029800920083054e-06, "loss": 0.2594, "step": 8850 }, { "epoch": 0.1803562340966921, "grad_norm": 13.304393446488568, "learning_rate": 3.6070512559540776e-06, "loss": 0.2992, "step": 8860 }, { "epoch": 0.18055979643765904, "grad_norm": 18.19407745126758, "learning_rate": 3.6111224198998494e-06, "loss": 0.193, "step": 8870 }, { "epoch": 0.18076335877862595, "grad_norm": 20.380362726265226, "learning_rate": 3.615193583845622e-06, "loss": 0.2394, "step": 8880 }, { "epoch": 0.18096692111959287, "grad_norm": 8.3842849293771, "learning_rate": 3.619264747791394e-06, "loss": 0.2429, "step": 8890 }, { "epoch": 0.1811704834605598, "grad_norm": 16.987579180827932, "learning_rate": 3.6233359117371656e-06, "loss": 0.287, "step": 8900 }, { "epoch": 0.18137404580152672, "grad_norm": 71.78717392365876, "learning_rate": 3.6274070756829382e-06, "loss": 0.309, "step": 8910 }, { "epoch": 0.18157760814249363, "grad_norm": 7.001127308597625, "learning_rate": 3.63147823962871e-06, "loss": 0.2809, "step": 8920 }, { "epoch": 0.18178117048346057, "grad_norm": 24.530118321678565, "learning_rate": 3.6355494035744822e-06, "loss": 0.3743, "step": 8930 }, { "epoch": 0.18198473282442748, "grad_norm": 16.590194695629766, "learning_rate": 3.6396205675202544e-06, "loss": 0.3181, "step": 8940 }, { "epoch": 0.1821882951653944, "grad_norm": 12.168516370517754, "learning_rate": 3.6436917314660262e-06, "loss": 0.1862, "step": 8950 }, { "epoch": 0.18239185750636133, "grad_norm": 16.549630704977144, "learning_rate": 3.6477628954117984e-06, "loss": 0.2817, "step": 8960 }, { "epoch": 0.18259541984732824, "grad_norm": 29.539047499917167, "learning_rate": 3.6518340593575706e-06, "loss": 0.2555, "step": 8970 }, { "epoch": 0.18279898218829516, "grad_norm": 20.53183180696864, "learning_rate": 3.655905223303343e-06, "loss": 0.2648, "step": 8980 }, { "epoch": 0.1830025445292621, "grad_norm": 32.80910659192196, "learning_rate": 3.659976387249115e-06, "loss": 0.325, "step": 8990 }, { "epoch": 0.183206106870229, "grad_norm": 39.90620752071427, "learning_rate": 3.664047551194887e-06, "loss": 0.3328, "step": 9000 }, { "epoch": 0.18340966921119592, "grad_norm": 7.302896380007877, "learning_rate": 3.668118715140659e-06, "loss": 0.2878, "step": 9010 }, { "epoch": 0.18361323155216286, "grad_norm": 9.500148296894796, "learning_rate": 3.6721898790864312e-06, "loss": 0.2776, "step": 9020 }, { "epoch": 0.18381679389312977, "grad_norm": 32.43104548768725, "learning_rate": 3.676261043032203e-06, "loss": 0.2286, "step": 9030 }, { "epoch": 0.18402035623409668, "grad_norm": 24.827772528101143, "learning_rate": 3.6803322069779752e-06, "loss": 0.298, "step": 9040 }, { "epoch": 0.18422391857506362, "grad_norm": 21.55991297780102, "learning_rate": 3.6844033709237474e-06, "loss": 0.3255, "step": 9050 }, { "epoch": 0.18442748091603053, "grad_norm": 9.29502115533431, "learning_rate": 3.6884745348695196e-06, "loss": 0.3139, "step": 9060 }, { "epoch": 0.18463104325699745, "grad_norm": 33.25313764774006, "learning_rate": 3.6925456988152914e-06, "loss": 0.3611, "step": 9070 }, { "epoch": 0.18483460559796439, "grad_norm": 14.148177819435306, "learning_rate": 3.6966168627610636e-06, "loss": 0.2631, "step": 9080 }, { "epoch": 0.1850381679389313, "grad_norm": 10.414303080823666, "learning_rate": 3.700688026706836e-06, "loss": 0.3475, "step": 9090 }, { "epoch": 0.1852417302798982, "grad_norm": 14.920801393929152, "learning_rate": 3.704759190652608e-06, "loss": 0.3722, "step": 9100 }, { "epoch": 0.18544529262086515, "grad_norm": 25.992253041243114, "learning_rate": 3.7088303545983802e-06, "loss": 0.1969, "step": 9110 }, { "epoch": 0.18564885496183206, "grad_norm": 7.9031359070864795, "learning_rate": 3.712901518544152e-06, "loss": 0.2986, "step": 9120 }, { "epoch": 0.18585241730279897, "grad_norm": 15.95060677989781, "learning_rate": 3.7169726824899242e-06, "loss": 0.2936, "step": 9130 }, { "epoch": 0.1860559796437659, "grad_norm": 25.735057608908303, "learning_rate": 3.7210438464356964e-06, "loss": 0.299, "step": 9140 }, { "epoch": 0.18625954198473282, "grad_norm": 12.142441236420956, "learning_rate": 3.7251150103814682e-06, "loss": 0.3084, "step": 9150 }, { "epoch": 0.18646310432569974, "grad_norm": 14.332977351611765, "learning_rate": 3.7291861743272404e-06, "loss": 0.2092, "step": 9160 }, { "epoch": 0.18666666666666668, "grad_norm": 21.718123619307033, "learning_rate": 3.7332573382730126e-06, "loss": 0.2408, "step": 9170 }, { "epoch": 0.1868702290076336, "grad_norm": 21.20160067991175, "learning_rate": 3.7373285022187844e-06, "loss": 0.2939, "step": 9180 }, { "epoch": 0.1870737913486005, "grad_norm": 17.023679984171743, "learning_rate": 3.741399666164557e-06, "loss": 0.2119, "step": 9190 }, { "epoch": 0.18727735368956744, "grad_norm": 21.937563614967452, "learning_rate": 3.745470830110329e-06, "loss": 0.2159, "step": 9200 }, { "epoch": 0.18748091603053435, "grad_norm": 40.94805508368382, "learning_rate": 3.749541994056101e-06, "loss": 0.2146, "step": 9210 }, { "epoch": 0.18768447837150126, "grad_norm": 35.987358615882655, "learning_rate": 3.7536131580018732e-06, "loss": 0.3717, "step": 9220 }, { "epoch": 0.1878880407124682, "grad_norm": 19.536284480452835, "learning_rate": 3.757684321947645e-06, "loss": 0.3246, "step": 9230 }, { "epoch": 0.1880916030534351, "grad_norm": 15.648475921495903, "learning_rate": 3.7617554858934172e-06, "loss": 0.2829, "step": 9240 }, { "epoch": 0.18829516539440203, "grad_norm": 17.821357204589198, "learning_rate": 3.7658266498391894e-06, "loss": 0.3408, "step": 9250 }, { "epoch": 0.18849872773536896, "grad_norm": 11.933691094550333, "learning_rate": 3.7698978137849612e-06, "loss": 0.297, "step": 9260 }, { "epoch": 0.18870229007633588, "grad_norm": 16.633175049049257, "learning_rate": 3.773968977730734e-06, "loss": 0.2481, "step": 9270 }, { "epoch": 0.1889058524173028, "grad_norm": 13.530871305971317, "learning_rate": 3.7780401416765056e-06, "loss": 0.2327, "step": 9280 }, { "epoch": 0.18910941475826973, "grad_norm": 33.13397196127856, "learning_rate": 3.7821113056222774e-06, "loss": 0.1774, "step": 9290 }, { "epoch": 0.18931297709923664, "grad_norm": 46.6430278317388, "learning_rate": 3.78618246956805e-06, "loss": 0.2831, "step": 9300 }, { "epoch": 0.18951653944020355, "grad_norm": 14.197676162074815, "learning_rate": 3.790253633513822e-06, "loss": 0.2658, "step": 9310 }, { "epoch": 0.1897201017811705, "grad_norm": 14.57762514081696, "learning_rate": 3.7943247974595945e-06, "loss": 0.3151, "step": 9320 }, { "epoch": 0.1899236641221374, "grad_norm": 8.08719631706662, "learning_rate": 3.7983959614053662e-06, "loss": 0.3004, "step": 9330 }, { "epoch": 0.19012722646310432, "grad_norm": 29.789575252975975, "learning_rate": 3.802467125351138e-06, "loss": 0.2544, "step": 9340 }, { "epoch": 0.19033078880407125, "grad_norm": 19.45298182532706, "learning_rate": 3.8065382892969106e-06, "loss": 0.2226, "step": 9350 }, { "epoch": 0.19053435114503817, "grad_norm": 13.335440430037096, "learning_rate": 3.8106094532426824e-06, "loss": 0.2626, "step": 9360 }, { "epoch": 0.19073791348600508, "grad_norm": 20.506663681939415, "learning_rate": 3.8146806171884542e-06, "loss": 0.2986, "step": 9370 }, { "epoch": 0.19094147582697202, "grad_norm": 12.592936428051521, "learning_rate": 3.818751781134227e-06, "loss": 0.2237, "step": 9380 }, { "epoch": 0.19114503816793893, "grad_norm": 60.12164086748771, "learning_rate": 3.822822945079999e-06, "loss": 0.3239, "step": 9390 }, { "epoch": 0.19134860050890584, "grad_norm": 3.6150235083836484, "learning_rate": 3.82689410902577e-06, "loss": 0.3059, "step": 9400 }, { "epoch": 0.19155216284987278, "grad_norm": 7.079015799872929, "learning_rate": 3.830965272971543e-06, "loss": 0.2933, "step": 9410 }, { "epoch": 0.1917557251908397, "grad_norm": 12.94243660415339, "learning_rate": 3.835036436917315e-06, "loss": 0.2326, "step": 9420 }, { "epoch": 0.1919592875318066, "grad_norm": 8.550260055298553, "learning_rate": 3.8391076008630875e-06, "loss": 0.2965, "step": 9430 }, { "epoch": 0.19216284987277354, "grad_norm": 5.841506771826476, "learning_rate": 3.843178764808859e-06, "loss": 0.2918, "step": 9440 }, { "epoch": 0.19236641221374046, "grad_norm": 12.666535059501202, "learning_rate": 3.847249928754631e-06, "loss": 0.2546, "step": 9450 }, { "epoch": 0.19256997455470737, "grad_norm": 13.56875378469262, "learning_rate": 3.851321092700404e-06, "loss": 0.2765, "step": 9460 }, { "epoch": 0.1927735368956743, "grad_norm": 36.2822506073764, "learning_rate": 3.8553922566461754e-06, "loss": 0.2874, "step": 9470 }, { "epoch": 0.19297709923664122, "grad_norm": 36.96781894600417, "learning_rate": 3.859463420591947e-06, "loss": 0.2417, "step": 9480 }, { "epoch": 0.19318066157760813, "grad_norm": 20.975622343497317, "learning_rate": 3.86353458453772e-06, "loss": 0.3019, "step": 9490 }, { "epoch": 0.19338422391857507, "grad_norm": 21.157161247329025, "learning_rate": 3.867605748483492e-06, "loss": 0.28, "step": 9500 }, { "epoch": 0.19358778625954198, "grad_norm": 14.99287413410765, "learning_rate": 3.871676912429263e-06, "loss": 0.3016, "step": 9510 }, { "epoch": 0.1937913486005089, "grad_norm": 14.166774898079224, "learning_rate": 3.875748076375036e-06, "loss": 0.2242, "step": 9520 }, { "epoch": 0.19399491094147583, "grad_norm": 12.31473594282165, "learning_rate": 3.879819240320808e-06, "loss": 0.1955, "step": 9530 }, { "epoch": 0.19419847328244275, "grad_norm": 51.5477560555615, "learning_rate": 3.8838904042665804e-06, "loss": 0.3163, "step": 9540 }, { "epoch": 0.19440203562340966, "grad_norm": 13.935000694321984, "learning_rate": 3.887961568212352e-06, "loss": 0.2613, "step": 9550 }, { "epoch": 0.1946055979643766, "grad_norm": 38.475351002388834, "learning_rate": 3.892032732158124e-06, "loss": 0.2166, "step": 9560 }, { "epoch": 0.1948091603053435, "grad_norm": 17.93786456992988, "learning_rate": 3.896103896103897e-06, "loss": 0.3507, "step": 9570 }, { "epoch": 0.19501272264631042, "grad_norm": 4.740739621380432, "learning_rate": 3.9001750600496684e-06, "loss": 0.283, "step": 9580 }, { "epoch": 0.19521628498727736, "grad_norm": 21.39833997808443, "learning_rate": 3.90424622399544e-06, "loss": 0.2785, "step": 9590 }, { "epoch": 0.19541984732824427, "grad_norm": 6.613778282677281, "learning_rate": 3.908317387941213e-06, "loss": 0.2465, "step": 9600 }, { "epoch": 0.19562340966921118, "grad_norm": 21.966460135552857, "learning_rate": 3.912388551886985e-06, "loss": 0.2181, "step": 9610 }, { "epoch": 0.19582697201017812, "grad_norm": 19.247216732667848, "learning_rate": 3.916459715832756e-06, "loss": 0.271, "step": 9620 }, { "epoch": 0.19603053435114504, "grad_norm": 91.95204731314111, "learning_rate": 3.920530879778529e-06, "loss": 0.2837, "step": 9630 }, { "epoch": 0.19623409669211195, "grad_norm": 9.803556058263535, "learning_rate": 3.924602043724301e-06, "loss": 0.2686, "step": 9640 }, { "epoch": 0.1964376590330789, "grad_norm": 20.425429942188007, "learning_rate": 3.9286732076700734e-06, "loss": 0.2663, "step": 9650 }, { "epoch": 0.1966412213740458, "grad_norm": 20.449539092966617, "learning_rate": 3.932744371615845e-06, "loss": 0.2867, "step": 9660 }, { "epoch": 0.1968447837150127, "grad_norm": 16.315741706915144, "learning_rate": 3.936815535561617e-06, "loss": 0.2137, "step": 9670 }, { "epoch": 0.19704834605597965, "grad_norm": 36.03117153577165, "learning_rate": 3.94088669950739e-06, "loss": 0.265, "step": 9680 }, { "epoch": 0.19725190839694656, "grad_norm": 11.3179551211606, "learning_rate": 3.944957863453161e-06, "loss": 0.2873, "step": 9690 }, { "epoch": 0.19745547073791347, "grad_norm": 11.424921209344088, "learning_rate": 3.949029027398933e-06, "loss": 0.263, "step": 9700 }, { "epoch": 0.19765903307888041, "grad_norm": 18.602657984347385, "learning_rate": 3.953100191344706e-06, "loss": 0.3313, "step": 9710 }, { "epoch": 0.19786259541984733, "grad_norm": 6.236227254895919, "learning_rate": 3.957171355290478e-06, "loss": 0.2964, "step": 9720 }, { "epoch": 0.19806615776081424, "grad_norm": 16.304332033750203, "learning_rate": 3.961242519236249e-06, "loss": 0.1541, "step": 9730 }, { "epoch": 0.19826972010178118, "grad_norm": 16.077689649680444, "learning_rate": 3.965313683182022e-06, "loss": 0.1588, "step": 9740 }, { "epoch": 0.1984732824427481, "grad_norm": 9.837643414174535, "learning_rate": 3.969384847127794e-06, "loss": 0.2684, "step": 9750 }, { "epoch": 0.198676844783715, "grad_norm": 16.563741919724663, "learning_rate": 3.9734560110735664e-06, "loss": 0.2449, "step": 9760 }, { "epoch": 0.19888040712468194, "grad_norm": 77.49642258374402, "learning_rate": 3.977527175019338e-06, "loss": 0.2204, "step": 9770 }, { "epoch": 0.19908396946564885, "grad_norm": 18.554311769535687, "learning_rate": 3.98159833896511e-06, "loss": 0.2922, "step": 9780 }, { "epoch": 0.19928753180661576, "grad_norm": 37.39007605367398, "learning_rate": 3.985669502910883e-06, "loss": 0.3156, "step": 9790 }, { "epoch": 0.1994910941475827, "grad_norm": 26.879774900559173, "learning_rate": 3.989740666856654e-06, "loss": 0.3134, "step": 9800 }, { "epoch": 0.19969465648854962, "grad_norm": 10.56436606478266, "learning_rate": 3.993811830802426e-06, "loss": 0.2479, "step": 9810 }, { "epoch": 0.19989821882951653, "grad_norm": 7.886411254917431, "learning_rate": 3.997882994748199e-06, "loss": 0.2055, "step": 9820 }, { "epoch": 0.20010178117048347, "grad_norm": 14.809435599613622, "learning_rate": 4.001954158693971e-06, "loss": 0.2647, "step": 9830 }, { "epoch": 0.20030534351145038, "grad_norm": 22.9330052018332, "learning_rate": 4.006025322639742e-06, "loss": 0.1665, "step": 9840 }, { "epoch": 0.2005089058524173, "grad_norm": 34.33720314678791, "learning_rate": 4.010096486585515e-06, "loss": 0.351, "step": 9850 }, { "epoch": 0.20071246819338423, "grad_norm": 53.813459173261776, "learning_rate": 4.014167650531287e-06, "loss": 0.2242, "step": 9860 }, { "epoch": 0.20091603053435114, "grad_norm": 29.61860773920872, "learning_rate": 4.0182388144770594e-06, "loss": 0.2613, "step": 9870 }, { "epoch": 0.20111959287531805, "grad_norm": 7.9118199846128805, "learning_rate": 4.022309978422831e-06, "loss": 0.2591, "step": 9880 }, { "epoch": 0.201323155216285, "grad_norm": 7.282655239956603, "learning_rate": 4.026381142368603e-06, "loss": 0.3231, "step": 9890 }, { "epoch": 0.2015267175572519, "grad_norm": 8.82252762549662, "learning_rate": 4.030452306314376e-06, "loss": 0.2713, "step": 9900 }, { "epoch": 0.20173027989821882, "grad_norm": 8.344421606290535, "learning_rate": 4.034523470260147e-06, "loss": 0.1961, "step": 9910 }, { "epoch": 0.20193384223918576, "grad_norm": 26.38234275816899, "learning_rate": 4.038594634205919e-06, "loss": 0.1668, "step": 9920 }, { "epoch": 0.20213740458015267, "grad_norm": 28.91644381995542, "learning_rate": 4.042665798151692e-06, "loss": 0.2809, "step": 9930 }, { "epoch": 0.20234096692111958, "grad_norm": 23.729179043818174, "learning_rate": 4.046736962097464e-06, "loss": 0.2631, "step": 9940 }, { "epoch": 0.20254452926208652, "grad_norm": 16.767103459794626, "learning_rate": 4.050808126043236e-06, "loss": 0.3306, "step": 9950 }, { "epoch": 0.20274809160305343, "grad_norm": 8.461170304178957, "learning_rate": 4.054879289989008e-06, "loss": 0.312, "step": 9960 }, { "epoch": 0.20295165394402034, "grad_norm": 38.97090766330789, "learning_rate": 4.05895045393478e-06, "loss": 0.2403, "step": 9970 }, { "epoch": 0.20315521628498728, "grad_norm": 13.265668907155915, "learning_rate": 4.0630216178805524e-06, "loss": 0.2614, "step": 9980 }, { "epoch": 0.2033587786259542, "grad_norm": 2.0952526802830183, "learning_rate": 4.067092781826324e-06, "loss": 0.3084, "step": 9990 }, { "epoch": 0.2035623409669211, "grad_norm": 9.065845915340836, "learning_rate": 4.071163945772096e-06, "loss": 0.2826, "step": 10000 }, { "epoch": 0.20376590330788805, "grad_norm": 13.438038522377276, "learning_rate": 4.075235109717869e-06, "loss": 0.395, "step": 10010 }, { "epoch": 0.20396946564885496, "grad_norm": 9.350014717756908, "learning_rate": 4.07930627366364e-06, "loss": 0.2288, "step": 10020 }, { "epoch": 0.20417302798982187, "grad_norm": 50.32090321696821, "learning_rate": 4.083377437609413e-06, "loss": 0.274, "step": 10030 }, { "epoch": 0.2043765903307888, "grad_norm": 11.831088568055634, "learning_rate": 4.087448601555185e-06, "loss": 0.3237, "step": 10040 }, { "epoch": 0.20458015267175572, "grad_norm": 50.621346007245954, "learning_rate": 4.091519765500957e-06, "loss": 0.2108, "step": 10050 }, { "epoch": 0.20478371501272263, "grad_norm": 34.934427165575016, "learning_rate": 4.095590929446729e-06, "loss": 0.2369, "step": 10060 }, { "epoch": 0.20498727735368957, "grad_norm": 38.92195384210837, "learning_rate": 4.099662093392501e-06, "loss": 0.2564, "step": 10070 }, { "epoch": 0.20519083969465648, "grad_norm": 12.445076409130317, "learning_rate": 4.103733257338273e-06, "loss": 0.2064, "step": 10080 }, { "epoch": 0.2053944020356234, "grad_norm": 40.98624497921973, "learning_rate": 4.1078044212840454e-06, "loss": 0.3032, "step": 10090 }, { "epoch": 0.20559796437659034, "grad_norm": 9.893929503824468, "learning_rate": 4.111875585229817e-06, "loss": 0.2614, "step": 10100 }, { "epoch": 0.20580152671755725, "grad_norm": 44.98287939848992, "learning_rate": 4.11594674917559e-06, "loss": 0.2626, "step": 10110 }, { "epoch": 0.20600508905852416, "grad_norm": 20.012464730678023, "learning_rate": 4.120017913121362e-06, "loss": 0.3674, "step": 10120 }, { "epoch": 0.2062086513994911, "grad_norm": 11.090869395424601, "learning_rate": 4.124089077067133e-06, "loss": 0.2559, "step": 10130 }, { "epoch": 0.206412213740458, "grad_norm": 21.823704780514298, "learning_rate": 4.128160241012906e-06, "loss": 0.3131, "step": 10140 }, { "epoch": 0.20661577608142492, "grad_norm": 4.549870100428031, "learning_rate": 4.132231404958678e-06, "loss": 0.3644, "step": 10150 }, { "epoch": 0.20681933842239186, "grad_norm": 11.52599161189086, "learning_rate": 4.1363025689044505e-06, "loss": 0.2882, "step": 10160 }, { "epoch": 0.20702290076335877, "grad_norm": 7.209137264737146, "learning_rate": 4.140373732850222e-06, "loss": 0.3038, "step": 10170 }, { "epoch": 0.2072264631043257, "grad_norm": 22.07692223349841, "learning_rate": 4.144444896795994e-06, "loss": 0.3034, "step": 10180 }, { "epoch": 0.20743002544529263, "grad_norm": 15.423334218629664, "learning_rate": 4.148516060741767e-06, "loss": 0.2708, "step": 10190 }, { "epoch": 0.20763358778625954, "grad_norm": 21.584593064916074, "learning_rate": 4.1525872246875384e-06, "loss": 0.2679, "step": 10200 }, { "epoch": 0.20783715012722645, "grad_norm": 15.272087875739295, "learning_rate": 4.15665838863331e-06, "loss": 0.3371, "step": 10210 }, { "epoch": 0.2080407124681934, "grad_norm": 32.97424525343153, "learning_rate": 4.160729552579083e-06, "loss": 0.271, "step": 10220 }, { "epoch": 0.2082442748091603, "grad_norm": 0.33553224798934, "learning_rate": 4.164800716524855e-06, "loss": 0.3022, "step": 10230 }, { "epoch": 0.2084478371501272, "grad_norm": 27.209435992683055, "learning_rate": 4.168871880470627e-06, "loss": 0.2809, "step": 10240 }, { "epoch": 0.20865139949109415, "grad_norm": 11.053880233027083, "learning_rate": 4.172943044416399e-06, "loss": 0.3668, "step": 10250 }, { "epoch": 0.20885496183206106, "grad_norm": 8.26232421033672, "learning_rate": 4.177014208362171e-06, "loss": 0.2224, "step": 10260 }, { "epoch": 0.20905852417302798, "grad_norm": 23.70527140536451, "learning_rate": 4.1810853723079434e-06, "loss": 0.2797, "step": 10270 }, { "epoch": 0.20926208651399492, "grad_norm": 14.936594134007816, "learning_rate": 4.185156536253715e-06, "loss": 0.4195, "step": 10280 }, { "epoch": 0.20946564885496183, "grad_norm": 9.876890635745653, "learning_rate": 4.189227700199487e-06, "loss": 0.3242, "step": 10290 }, { "epoch": 0.20966921119592874, "grad_norm": 12.784699073347186, "learning_rate": 4.19329886414526e-06, "loss": 0.2672, "step": 10300 }, { "epoch": 0.20987277353689568, "grad_norm": 12.306575176970993, "learning_rate": 4.1973700280910314e-06, "loss": 0.282, "step": 10310 }, { "epoch": 0.2100763358778626, "grad_norm": 10.641295226990207, "learning_rate": 4.201441192036804e-06, "loss": 0.2341, "step": 10320 }, { "epoch": 0.2102798982188295, "grad_norm": 21.64850119256873, "learning_rate": 4.205512355982576e-06, "loss": 0.295, "step": 10330 }, { "epoch": 0.21048346055979644, "grad_norm": 15.371019639814449, "learning_rate": 4.209583519928348e-06, "loss": 0.2874, "step": 10340 }, { "epoch": 0.21068702290076335, "grad_norm": 5.800351149632085, "learning_rate": 4.21365468387412e-06, "loss": 0.2243, "step": 10350 }, { "epoch": 0.21089058524173027, "grad_norm": 16.48413778515728, "learning_rate": 4.217725847819892e-06, "loss": 0.2206, "step": 10360 }, { "epoch": 0.2110941475826972, "grad_norm": 13.265444428526296, "learning_rate": 4.221797011765665e-06, "loss": 0.3319, "step": 10370 }, { "epoch": 0.21129770992366412, "grad_norm": 20.296254097832833, "learning_rate": 4.2258681757114364e-06, "loss": 0.2547, "step": 10380 }, { "epoch": 0.21150127226463103, "grad_norm": 3.3844137537216716, "learning_rate": 4.229939339657208e-06, "loss": 0.2642, "step": 10390 }, { "epoch": 0.21170483460559797, "grad_norm": 17.540355402137042, "learning_rate": 4.234010503602981e-06, "loss": 0.2071, "step": 10400 }, { "epoch": 0.21190839694656488, "grad_norm": 11.398589017819972, "learning_rate": 4.238081667548753e-06, "loss": 0.3032, "step": 10410 }, { "epoch": 0.2121119592875318, "grad_norm": 52.06444587485278, "learning_rate": 4.242152831494524e-06, "loss": 0.28, "step": 10420 }, { "epoch": 0.21231552162849873, "grad_norm": 0.8464120073090566, "learning_rate": 4.246223995440297e-06, "loss": 0.317, "step": 10430 }, { "epoch": 0.21251908396946564, "grad_norm": 21.268110750908647, "learning_rate": 4.250295159386069e-06, "loss": 0.2593, "step": 10440 }, { "epoch": 0.21272264631043258, "grad_norm": 29.08339280538832, "learning_rate": 4.2543663233318415e-06, "loss": 0.3219, "step": 10450 }, { "epoch": 0.2129262086513995, "grad_norm": 14.758974636777811, "learning_rate": 4.258437487277613e-06, "loss": 0.2849, "step": 10460 }, { "epoch": 0.2131297709923664, "grad_norm": 14.898120198430489, "learning_rate": 4.262508651223385e-06, "loss": 0.3334, "step": 10470 }, { "epoch": 0.21333333333333335, "grad_norm": 7.772633119032867, "learning_rate": 4.266579815169158e-06, "loss": 0.2835, "step": 10480 }, { "epoch": 0.21353689567430026, "grad_norm": 12.682453395261497, "learning_rate": 4.2706509791149294e-06, "loss": 0.3226, "step": 10490 }, { "epoch": 0.21374045801526717, "grad_norm": 16.847932490114044, "learning_rate": 4.274722143060702e-06, "loss": 0.248, "step": 10500 }, { "epoch": 0.2139440203562341, "grad_norm": 20.68948995188208, "learning_rate": 4.278793307006474e-06, "loss": 0.3357, "step": 10510 }, { "epoch": 0.21414758269720102, "grad_norm": 24.47424548663904, "learning_rate": 4.282864470952246e-06, "loss": 0.3003, "step": 10520 }, { "epoch": 0.21435114503816793, "grad_norm": 11.34213934278875, "learning_rate": 4.286935634898018e-06, "loss": 0.2768, "step": 10530 }, { "epoch": 0.21455470737913487, "grad_norm": 14.080748278132068, "learning_rate": 4.29100679884379e-06, "loss": 0.2177, "step": 10540 }, { "epoch": 0.21475826972010179, "grad_norm": 24.927642400350773, "learning_rate": 4.295077962789562e-06, "loss": 0.2819, "step": 10550 }, { "epoch": 0.2149618320610687, "grad_norm": 15.882435458849258, "learning_rate": 4.2991491267353345e-06, "loss": 0.2452, "step": 10560 }, { "epoch": 0.21516539440203564, "grad_norm": 22.91491624876595, "learning_rate": 4.303220290681106e-06, "loss": 0.1828, "step": 10570 }, { "epoch": 0.21536895674300255, "grad_norm": 15.937189269656699, "learning_rate": 4.307291454626878e-06, "loss": 0.3137, "step": 10580 }, { "epoch": 0.21557251908396946, "grad_norm": 11.11453803370187, "learning_rate": 4.311362618572651e-06, "loss": 0.3796, "step": 10590 }, { "epoch": 0.2157760814249364, "grad_norm": 23.066566555192058, "learning_rate": 4.3154337825184224e-06, "loss": 0.3292, "step": 10600 }, { "epoch": 0.2159796437659033, "grad_norm": 12.461951492017011, "learning_rate": 4.319504946464195e-06, "loss": 0.2889, "step": 10610 }, { "epoch": 0.21618320610687022, "grad_norm": 18.49231840565138, "learning_rate": 4.323576110409967e-06, "loss": 0.2277, "step": 10620 }, { "epoch": 0.21638676844783716, "grad_norm": 16.36879181045994, "learning_rate": 4.327647274355739e-06, "loss": 0.2734, "step": 10630 }, { "epoch": 0.21659033078880408, "grad_norm": 20.096182765987542, "learning_rate": 4.331718438301511e-06, "loss": 0.3289, "step": 10640 }, { "epoch": 0.216793893129771, "grad_norm": 21.65067053743384, "learning_rate": 4.335789602247283e-06, "loss": 0.3231, "step": 10650 }, { "epoch": 0.21699745547073793, "grad_norm": 12.828642353584245, "learning_rate": 4.339860766193055e-06, "loss": 0.32, "step": 10660 }, { "epoch": 0.21720101781170484, "grad_norm": 26.995077114645216, "learning_rate": 4.3439319301388275e-06, "loss": 0.2497, "step": 10670 }, { "epoch": 0.21740458015267175, "grad_norm": 6.909043183913654, "learning_rate": 4.348003094084599e-06, "loss": 0.2139, "step": 10680 }, { "epoch": 0.2176081424936387, "grad_norm": 15.722909864420876, "learning_rate": 4.352074258030371e-06, "loss": 0.1815, "step": 10690 }, { "epoch": 0.2178117048346056, "grad_norm": 55.74323549168644, "learning_rate": 4.356145421976144e-06, "loss": 0.2609, "step": 10700 }, { "epoch": 0.2180152671755725, "grad_norm": 11.225780304692018, "learning_rate": 4.3602165859219154e-06, "loss": 0.3278, "step": 10710 }, { "epoch": 0.21821882951653945, "grad_norm": 22.987899269722565, "learning_rate": 4.364287749867688e-06, "loss": 0.1851, "step": 10720 }, { "epoch": 0.21842239185750636, "grad_norm": 5.692323692917414, "learning_rate": 4.36835891381346e-06, "loss": 0.3538, "step": 10730 }, { "epoch": 0.21862595419847328, "grad_norm": 13.870323200935102, "learning_rate": 4.372430077759232e-06, "loss": 0.2855, "step": 10740 }, { "epoch": 0.21882951653944022, "grad_norm": 27.392195625986194, "learning_rate": 4.376501241705004e-06, "loss": 0.306, "step": 10750 }, { "epoch": 0.21903307888040713, "grad_norm": 18.58183332376263, "learning_rate": 4.380572405650776e-06, "loss": 0.2866, "step": 10760 }, { "epoch": 0.21923664122137404, "grad_norm": 16.95104373054376, "learning_rate": 4.384643569596548e-06, "loss": 0.3112, "step": 10770 }, { "epoch": 0.21944020356234098, "grad_norm": 8.582104289698053, "learning_rate": 4.3887147335423205e-06, "loss": 0.3055, "step": 10780 }, { "epoch": 0.2196437659033079, "grad_norm": 17.647742128219278, "learning_rate": 4.392785897488092e-06, "loss": 0.3119, "step": 10790 }, { "epoch": 0.2198473282442748, "grad_norm": 18.680874620232643, "learning_rate": 4.396857061433864e-06, "loss": 0.2751, "step": 10800 }, { "epoch": 0.22005089058524174, "grad_norm": 5.444002409784441, "learning_rate": 4.400928225379637e-06, "loss": 0.1869, "step": 10810 }, { "epoch": 0.22025445292620865, "grad_norm": 19.173119318084947, "learning_rate": 4.4049993893254084e-06, "loss": 0.2331, "step": 10820 }, { "epoch": 0.22045801526717557, "grad_norm": 0.2443799655392276, "learning_rate": 4.409070553271181e-06, "loss": 0.2692, "step": 10830 }, { "epoch": 0.2206615776081425, "grad_norm": 10.88822880615587, "learning_rate": 4.413141717216953e-06, "loss": 0.2243, "step": 10840 }, { "epoch": 0.22086513994910942, "grad_norm": 16.516927677079412, "learning_rate": 4.417212881162725e-06, "loss": 0.3251, "step": 10850 }, { "epoch": 0.22106870229007633, "grad_norm": 12.970100699506194, "learning_rate": 4.421284045108497e-06, "loss": 0.2102, "step": 10860 }, { "epoch": 0.22127226463104327, "grad_norm": 27.898074280009155, "learning_rate": 4.425355209054269e-06, "loss": 0.2218, "step": 10870 }, { "epoch": 0.22147582697201018, "grad_norm": 12.492068173695182, "learning_rate": 4.429426373000041e-06, "loss": 0.2946, "step": 10880 }, { "epoch": 0.2216793893129771, "grad_norm": 7.879682569878567, "learning_rate": 4.4334975369458135e-06, "loss": 0.1778, "step": 10890 }, { "epoch": 0.22188295165394403, "grad_norm": 17.795180869907554, "learning_rate": 4.437568700891585e-06, "loss": 0.2924, "step": 10900 }, { "epoch": 0.22208651399491094, "grad_norm": 10.491930978471396, "learning_rate": 4.441639864837357e-06, "loss": 0.3261, "step": 10910 }, { "epoch": 0.22229007633587786, "grad_norm": 19.047879366515843, "learning_rate": 4.44571102878313e-06, "loss": 0.2652, "step": 10920 }, { "epoch": 0.2224936386768448, "grad_norm": 10.081855946333695, "learning_rate": 4.4497821927289014e-06, "loss": 0.217, "step": 10930 }, { "epoch": 0.2226972010178117, "grad_norm": 28.312795412028873, "learning_rate": 4.453853356674674e-06, "loss": 0.3399, "step": 10940 }, { "epoch": 0.22290076335877862, "grad_norm": 11.763704189246178, "learning_rate": 4.457924520620446e-06, "loss": 0.3072, "step": 10950 }, { "epoch": 0.22310432569974556, "grad_norm": 10.824270845067515, "learning_rate": 4.461995684566218e-06, "loss": 0.3234, "step": 10960 }, { "epoch": 0.22330788804071247, "grad_norm": 33.65606417088694, "learning_rate": 4.46606684851199e-06, "loss": 0.3047, "step": 10970 }, { "epoch": 0.22351145038167938, "grad_norm": 11.361475829933635, "learning_rate": 4.470138012457762e-06, "loss": 0.2052, "step": 10980 }, { "epoch": 0.22371501272264632, "grad_norm": 13.425725412015845, "learning_rate": 4.474209176403534e-06, "loss": 0.2985, "step": 10990 }, { "epoch": 0.22391857506361323, "grad_norm": 21.273351558808134, "learning_rate": 4.4782803403493064e-06, "loss": 0.2921, "step": 11000 }, { "epoch": 0.22412213740458015, "grad_norm": 28.89325813707783, "learning_rate": 4.482351504295078e-06, "loss": 0.2666, "step": 11010 }, { "epoch": 0.22432569974554709, "grad_norm": 36.84743707168173, "learning_rate": 4.48642266824085e-06, "loss": 0.2176, "step": 11020 }, { "epoch": 0.224529262086514, "grad_norm": 17.812934351529464, "learning_rate": 4.490493832186623e-06, "loss": 0.374, "step": 11030 }, { "epoch": 0.2247328244274809, "grad_norm": 43.731373995614454, "learning_rate": 4.4945649961323944e-06, "loss": 0.2435, "step": 11040 }, { "epoch": 0.22493638676844785, "grad_norm": 44.288075621725504, "learning_rate": 4.498636160078167e-06, "loss": 0.2537, "step": 11050 }, { "epoch": 0.22513994910941476, "grad_norm": 28.459234337525235, "learning_rate": 4.502707324023939e-06, "loss": 0.3078, "step": 11060 }, { "epoch": 0.22534351145038167, "grad_norm": 3.147608553493612, "learning_rate": 4.506778487969711e-06, "loss": 0.2199, "step": 11070 }, { "epoch": 0.2255470737913486, "grad_norm": 8.706066281724375, "learning_rate": 4.510849651915483e-06, "loss": 0.2634, "step": 11080 }, { "epoch": 0.22575063613231552, "grad_norm": 11.165002238550157, "learning_rate": 4.514920815861255e-06, "loss": 0.3295, "step": 11090 }, { "epoch": 0.22595419847328244, "grad_norm": 6.347478783276214, "learning_rate": 4.518991979807027e-06, "loss": 0.2293, "step": 11100 }, { "epoch": 0.22615776081424938, "grad_norm": 7.380004472991267, "learning_rate": 4.5230631437527994e-06, "loss": 0.2803, "step": 11110 }, { "epoch": 0.2263613231552163, "grad_norm": 19.338068401258553, "learning_rate": 4.527134307698571e-06, "loss": 0.2695, "step": 11120 }, { "epoch": 0.2265648854961832, "grad_norm": 23.84855905253736, "learning_rate": 4.531205471644343e-06, "loss": 0.2527, "step": 11130 }, { "epoch": 0.22676844783715014, "grad_norm": 19.901625322848428, "learning_rate": 4.535276635590116e-06, "loss": 0.2764, "step": 11140 }, { "epoch": 0.22697201017811705, "grad_norm": 15.561112245456078, "learning_rate": 4.539347799535887e-06, "loss": 0.1898, "step": 11150 }, { "epoch": 0.22717557251908396, "grad_norm": 21.701793530130402, "learning_rate": 4.54341896348166e-06, "loss": 0.3081, "step": 11160 }, { "epoch": 0.2273791348600509, "grad_norm": 8.734965580802982, "learning_rate": 4.547490127427432e-06, "loss": 0.3661, "step": 11170 }, { "epoch": 0.22758269720101781, "grad_norm": 18.373498041679404, "learning_rate": 4.551561291373204e-06, "loss": 0.2918, "step": 11180 }, { "epoch": 0.22778625954198473, "grad_norm": 6.94351917016469, "learning_rate": 4.555632455318976e-06, "loss": 0.2845, "step": 11190 }, { "epoch": 0.22798982188295167, "grad_norm": 10.214222589747157, "learning_rate": 4.559703619264748e-06, "loss": 0.1935, "step": 11200 }, { "epoch": 0.22819338422391858, "grad_norm": 36.42207488668868, "learning_rate": 4.56377478321052e-06, "loss": 0.2467, "step": 11210 }, { "epoch": 0.2283969465648855, "grad_norm": 9.326433605820108, "learning_rate": 4.5678459471562924e-06, "loss": 0.3279, "step": 11220 }, { "epoch": 0.22860050890585243, "grad_norm": 47.99944361467907, "learning_rate": 4.571917111102064e-06, "loss": 0.3387, "step": 11230 }, { "epoch": 0.22880407124681934, "grad_norm": 36.15425208347892, "learning_rate": 4.575988275047837e-06, "loss": 0.2072, "step": 11240 }, { "epoch": 0.22900763358778625, "grad_norm": 9.576475853248365, "learning_rate": 4.580059438993609e-06, "loss": 0.3535, "step": 11250 }, { "epoch": 0.2292111959287532, "grad_norm": 26.76776842806661, "learning_rate": 4.58413060293938e-06, "loss": 0.2908, "step": 11260 }, { "epoch": 0.2294147582697201, "grad_norm": 61.61029496461979, "learning_rate": 4.588201766885153e-06, "loss": 0.2417, "step": 11270 }, { "epoch": 0.22961832061068702, "grad_norm": 7.251601278835738, "learning_rate": 4.592272930830925e-06, "loss": 0.2542, "step": 11280 }, { "epoch": 0.22982188295165396, "grad_norm": 18.02485912080566, "learning_rate": 4.596344094776697e-06, "loss": 0.307, "step": 11290 }, { "epoch": 0.23002544529262087, "grad_norm": 15.706375893250891, "learning_rate": 4.600415258722469e-06, "loss": 0.3109, "step": 11300 }, { "epoch": 0.23022900763358778, "grad_norm": 51.23773751690457, "learning_rate": 4.604486422668241e-06, "loss": 0.2587, "step": 11310 }, { "epoch": 0.23043256997455472, "grad_norm": 13.240733401213305, "learning_rate": 4.608557586614013e-06, "loss": 0.3319, "step": 11320 }, { "epoch": 0.23063613231552163, "grad_norm": 33.12585280313642, "learning_rate": 4.6126287505597854e-06, "loss": 0.1994, "step": 11330 }, { "epoch": 0.23083969465648854, "grad_norm": 10.461505092837557, "learning_rate": 4.616699914505557e-06, "loss": 0.3041, "step": 11340 }, { "epoch": 0.23104325699745548, "grad_norm": 34.16313129027048, "learning_rate": 4.62077107845133e-06, "loss": 0.2278, "step": 11350 }, { "epoch": 0.2312468193384224, "grad_norm": 19.285405421090836, "learning_rate": 4.624842242397102e-06, "loss": 0.2738, "step": 11360 }, { "epoch": 0.2314503816793893, "grad_norm": 12.718210873451465, "learning_rate": 4.628913406342873e-06, "loss": 0.2729, "step": 11370 }, { "epoch": 0.23165394402035624, "grad_norm": 7.483418580643134, "learning_rate": 4.632984570288646e-06, "loss": 0.3472, "step": 11380 }, { "epoch": 0.23185750636132316, "grad_norm": 18.150528871929698, "learning_rate": 4.637055734234418e-06, "loss": 0.3658, "step": 11390 }, { "epoch": 0.23206106870229007, "grad_norm": 32.822276480179, "learning_rate": 4.64112689818019e-06, "loss": 0.2116, "step": 11400 }, { "epoch": 0.232264631043257, "grad_norm": 9.489439055999652, "learning_rate": 4.645198062125962e-06, "loss": 0.237, "step": 11410 }, { "epoch": 0.23246819338422392, "grad_norm": 14.981322945803345, "learning_rate": 4.649269226071734e-06, "loss": 0.3151, "step": 11420 }, { "epoch": 0.23267175572519083, "grad_norm": 19.725009895359218, "learning_rate": 4.653340390017506e-06, "loss": 0.3751, "step": 11430 }, { "epoch": 0.23287531806615777, "grad_norm": 6.894554237725404, "learning_rate": 4.6574115539632784e-06, "loss": 0.2341, "step": 11440 }, { "epoch": 0.23307888040712468, "grad_norm": 9.931059945958468, "learning_rate": 4.66148271790905e-06, "loss": 0.1834, "step": 11450 }, { "epoch": 0.2332824427480916, "grad_norm": 40.11612879205926, "learning_rate": 4.665553881854823e-06, "loss": 0.2813, "step": 11460 }, { "epoch": 0.23348600508905853, "grad_norm": 15.449671015801753, "learning_rate": 4.669625045800595e-06, "loss": 0.3186, "step": 11470 }, { "epoch": 0.23368956743002545, "grad_norm": 7.002085523388378, "learning_rate": 4.673696209746366e-06, "loss": 0.1981, "step": 11480 }, { "epoch": 0.23389312977099236, "grad_norm": 19.429549904316815, "learning_rate": 4.677767373692139e-06, "loss": 0.2436, "step": 11490 }, { "epoch": 0.2340966921119593, "grad_norm": 29.521979942045416, "learning_rate": 4.681838537637911e-06, "loss": 0.3504, "step": 11500 }, { "epoch": 0.2343002544529262, "grad_norm": 12.95943934930659, "learning_rate": 4.685909701583683e-06, "loss": 0.1868, "step": 11510 }, { "epoch": 0.23450381679389312, "grad_norm": 10.914584670037575, "learning_rate": 4.689980865529455e-06, "loss": 0.2165, "step": 11520 }, { "epoch": 0.23470737913486006, "grad_norm": 8.163762591670801, "learning_rate": 4.694052029475227e-06, "loss": 0.249, "step": 11530 }, { "epoch": 0.23491094147582697, "grad_norm": 17.582351181220407, "learning_rate": 4.698123193420999e-06, "loss": 0.2349, "step": 11540 }, { "epoch": 0.23511450381679388, "grad_norm": 22.870890986711785, "learning_rate": 4.7021943573667714e-06, "loss": 0.3125, "step": 11550 }, { "epoch": 0.23531806615776082, "grad_norm": 5.425028772517632, "learning_rate": 4.706265521312543e-06, "loss": 0.2713, "step": 11560 }, { "epoch": 0.23552162849872774, "grad_norm": 4.792903518240284, "learning_rate": 4.710336685258316e-06, "loss": 0.2069, "step": 11570 }, { "epoch": 0.23572519083969465, "grad_norm": 4.139208251259894, "learning_rate": 4.714407849204088e-06, "loss": 0.388, "step": 11580 }, { "epoch": 0.2359287531806616, "grad_norm": 10.716854882209407, "learning_rate": 4.718479013149859e-06, "loss": 0.3619, "step": 11590 }, { "epoch": 0.2361323155216285, "grad_norm": 20.541170605968134, "learning_rate": 4.722550177095632e-06, "loss": 0.3104, "step": 11600 }, { "epoch": 0.2363358778625954, "grad_norm": 7.7634486533799665, "learning_rate": 4.726621341041404e-06, "loss": 0.2133, "step": 11610 }, { "epoch": 0.23653944020356235, "grad_norm": 10.445632521399444, "learning_rate": 4.7306925049871764e-06, "loss": 0.2385, "step": 11620 }, { "epoch": 0.23674300254452926, "grad_norm": 23.473985842062646, "learning_rate": 4.734763668932948e-06, "loss": 0.2469, "step": 11630 }, { "epoch": 0.23694656488549617, "grad_norm": 50.80835430142418, "learning_rate": 4.73883483287872e-06, "loss": 0.3529, "step": 11640 }, { "epoch": 0.23715012722646311, "grad_norm": 40.369378256216656, "learning_rate": 4.742905996824493e-06, "loss": 0.299, "step": 11650 }, { "epoch": 0.23735368956743003, "grad_norm": 31.091188433976775, "learning_rate": 4.7469771607702644e-06, "loss": 0.1848, "step": 11660 }, { "epoch": 0.23755725190839694, "grad_norm": 25.157558925577984, "learning_rate": 4.751048324716036e-06, "loss": 0.2552, "step": 11670 }, { "epoch": 0.23776081424936388, "grad_norm": 7.082567687527428, "learning_rate": 4.755119488661809e-06, "loss": 0.3005, "step": 11680 }, { "epoch": 0.2379643765903308, "grad_norm": 7.706132773533783, "learning_rate": 4.759190652607581e-06, "loss": 0.3353, "step": 11690 }, { "epoch": 0.2381679389312977, "grad_norm": 10.240820554007557, "learning_rate": 4.763261816553353e-06, "loss": 0.2127, "step": 11700 }, { "epoch": 0.23837150127226464, "grad_norm": 8.041675050103473, "learning_rate": 4.767332980499125e-06, "loss": 0.295, "step": 11710 }, { "epoch": 0.23857506361323155, "grad_norm": 23.594887138113055, "learning_rate": 4.771404144444897e-06, "loss": 0.4118, "step": 11720 }, { "epoch": 0.23877862595419846, "grad_norm": 16.640711433301863, "learning_rate": 4.7754753083906694e-06, "loss": 0.2658, "step": 11730 }, { "epoch": 0.2389821882951654, "grad_norm": 14.960975800390864, "learning_rate": 4.779546472336441e-06, "loss": 0.3102, "step": 11740 }, { "epoch": 0.23918575063613232, "grad_norm": 19.25985763253272, "learning_rate": 4.783617636282213e-06, "loss": 0.3071, "step": 11750 }, { "epoch": 0.23938931297709923, "grad_norm": 7.860337264017097, "learning_rate": 4.787688800227986e-06, "loss": 0.2556, "step": 11760 }, { "epoch": 0.23959287531806617, "grad_norm": 13.960881293274637, "learning_rate": 4.7917599641737574e-06, "loss": 0.2703, "step": 11770 }, { "epoch": 0.23979643765903308, "grad_norm": 20.101197664657985, "learning_rate": 4.79583112811953e-06, "loss": 0.3271, "step": 11780 }, { "epoch": 0.24, "grad_norm": 7.929578567122723, "learning_rate": 4.799902292065302e-06, "loss": 0.3262, "step": 11790 }, { "epoch": 0.24020356234096693, "grad_norm": 17.576861940325852, "learning_rate": 4.803973456011074e-06, "loss": 0.2479, "step": 11800 }, { "epoch": 0.24040712468193384, "grad_norm": 8.61896255096177, "learning_rate": 4.808044619956846e-06, "loss": 0.1803, "step": 11810 }, { "epoch": 0.24061068702290075, "grad_norm": 33.43673918279717, "learning_rate": 4.812115783902618e-06, "loss": 0.2348, "step": 11820 }, { "epoch": 0.2408142493638677, "grad_norm": 34.851610286262044, "learning_rate": 4.816186947848391e-06, "loss": 0.3572, "step": 11830 }, { "epoch": 0.2410178117048346, "grad_norm": 0.1481582535537009, "learning_rate": 4.8202581117941624e-06, "loss": 0.3351, "step": 11840 }, { "epoch": 0.24122137404580152, "grad_norm": 39.19541610561803, "learning_rate": 4.824329275739934e-06, "loss": 0.3962, "step": 11850 }, { "epoch": 0.24142493638676846, "grad_norm": 25.75217470993721, "learning_rate": 4.828400439685707e-06, "loss": 0.2185, "step": 11860 }, { "epoch": 0.24162849872773537, "grad_norm": 35.35830494127416, "learning_rate": 4.832471603631479e-06, "loss": 0.3618, "step": 11870 }, { "epoch": 0.24183206106870228, "grad_norm": 8.049495872514187, "learning_rate": 4.83654276757725e-06, "loss": 0.2942, "step": 11880 }, { "epoch": 0.24203562340966922, "grad_norm": 34.28065164764424, "learning_rate": 4.840613931523023e-06, "loss": 0.2721, "step": 11890 }, { "epoch": 0.24223918575063613, "grad_norm": 3.882766233445948, "learning_rate": 4.844685095468795e-06, "loss": 0.2454, "step": 11900 }, { "epoch": 0.24244274809160304, "grad_norm": 9.492445678615603, "learning_rate": 4.8487562594145675e-06, "loss": 0.1866, "step": 11910 }, { "epoch": 0.24264631043256998, "grad_norm": 7.812101538311583, "learning_rate": 4.852827423360339e-06, "loss": 0.2662, "step": 11920 }, { "epoch": 0.2428498727735369, "grad_norm": 15.958550651472814, "learning_rate": 4.856898587306111e-06, "loss": 0.2913, "step": 11930 }, { "epoch": 0.2430534351145038, "grad_norm": 35.44463250841149, "learning_rate": 4.860969751251884e-06, "loss": 0.3458, "step": 11940 }, { "epoch": 0.24325699745547075, "grad_norm": 16.396841098750155, "learning_rate": 4.8650409151976554e-06, "loss": 0.1942, "step": 11950 }, { "epoch": 0.24346055979643766, "grad_norm": 10.635160856048088, "learning_rate": 4.869112079143428e-06, "loss": 0.2503, "step": 11960 }, { "epoch": 0.24366412213740457, "grad_norm": 11.173833379277598, "learning_rate": 4.8731832430892e-06, "loss": 0.2659, "step": 11970 }, { "epoch": 0.2438676844783715, "grad_norm": 27.770791021729863, "learning_rate": 4.877254407034972e-06, "loss": 0.225, "step": 11980 }, { "epoch": 0.24407124681933842, "grad_norm": 6.831947715754739, "learning_rate": 4.881325570980744e-06, "loss": 0.3452, "step": 11990 }, { "epoch": 0.24427480916030533, "grad_norm": 44.983709547111715, "learning_rate": 4.885396734926516e-06, "loss": 0.2933, "step": 12000 }, { "epoch": 0.24447837150127227, "grad_norm": 43.452175338508326, "learning_rate": 4.889467898872288e-06, "loss": 0.2766, "step": 12010 }, { "epoch": 0.24468193384223919, "grad_norm": 27.8417668951676, "learning_rate": 4.8935390628180605e-06, "loss": 0.3244, "step": 12020 }, { "epoch": 0.2448854961832061, "grad_norm": 29.232464179058965, "learning_rate": 4.897610226763832e-06, "loss": 0.2468, "step": 12030 }, { "epoch": 0.24508905852417304, "grad_norm": 9.129608216302886, "learning_rate": 4.901681390709605e-06, "loss": 0.279, "step": 12040 }, { "epoch": 0.24529262086513995, "grad_norm": 4.22977554729135, "learning_rate": 4.905752554655377e-06, "loss": 0.2998, "step": 12050 }, { "epoch": 0.24549618320610686, "grad_norm": 18.768648028350473, "learning_rate": 4.9098237186011484e-06, "loss": 0.2979, "step": 12060 }, { "epoch": 0.2456997455470738, "grad_norm": 4.777127310335756, "learning_rate": 4.913894882546921e-06, "loss": 0.2254, "step": 12070 }, { "epoch": 0.2459033078880407, "grad_norm": 13.221458979989682, "learning_rate": 4.917966046492693e-06, "loss": 0.3366, "step": 12080 }, { "epoch": 0.24610687022900762, "grad_norm": 10.404470363701193, "learning_rate": 4.922037210438465e-06, "loss": 0.2997, "step": 12090 }, { "epoch": 0.24631043256997456, "grad_norm": 15.616640743436413, "learning_rate": 4.926108374384237e-06, "loss": 0.2614, "step": 12100 }, { "epoch": 0.24651399491094148, "grad_norm": 16.645046979764484, "learning_rate": 4.930179538330009e-06, "loss": 0.3364, "step": 12110 }, { "epoch": 0.2467175572519084, "grad_norm": 22.306379927370582, "learning_rate": 4.934250702275782e-06, "loss": 0.2253, "step": 12120 }, { "epoch": 0.24692111959287533, "grad_norm": 12.238360690604493, "learning_rate": 4.9383218662215535e-06, "loss": 0.3379, "step": 12130 }, { "epoch": 0.24712468193384224, "grad_norm": 6.259787594219983, "learning_rate": 4.942393030167325e-06, "loss": 0.2565, "step": 12140 }, { "epoch": 0.24732824427480915, "grad_norm": 11.115842766744652, "learning_rate": 4.946464194113098e-06, "loss": 0.3042, "step": 12150 }, { "epoch": 0.2475318066157761, "grad_norm": 9.558453400291151, "learning_rate": 4.95053535805887e-06, "loss": 0.2944, "step": 12160 }, { "epoch": 0.247735368956743, "grad_norm": 5.115921139362901, "learning_rate": 4.9546065220046414e-06, "loss": 0.3065, "step": 12170 }, { "epoch": 0.2479389312977099, "grad_norm": 6.900450834679178, "learning_rate": 4.958677685950414e-06, "loss": 0.2926, "step": 12180 }, { "epoch": 0.24814249363867685, "grad_norm": 13.681894479073142, "learning_rate": 4.962748849896186e-06, "loss": 0.2073, "step": 12190 }, { "epoch": 0.24834605597964376, "grad_norm": 6.767167043197811, "learning_rate": 4.966820013841958e-06, "loss": 0.3351, "step": 12200 }, { "epoch": 0.24854961832061068, "grad_norm": 39.618151921568554, "learning_rate": 4.97089117778773e-06, "loss": 0.2685, "step": 12210 }, { "epoch": 0.24875318066157762, "grad_norm": 15.88648185734808, "learning_rate": 4.974962341733502e-06, "loss": 0.3325, "step": 12220 }, { "epoch": 0.24895674300254453, "grad_norm": 9.462060056896977, "learning_rate": 4.979033505679275e-06, "loss": 0.3906, "step": 12230 }, { "epoch": 0.24916030534351144, "grad_norm": 10.91184373966527, "learning_rate": 4.9831046696250465e-06, "loss": 0.2138, "step": 12240 }, { "epoch": 0.24936386768447838, "grad_norm": 19.371374065508803, "learning_rate": 4.987175833570818e-06, "loss": 0.2536, "step": 12250 }, { "epoch": 0.2495674300254453, "grad_norm": 35.87802701555425, "learning_rate": 4.991246997516591e-06, "loss": 0.2126, "step": 12260 }, { "epoch": 0.2497709923664122, "grad_norm": 14.285229536740465, "learning_rate": 4.995318161462363e-06, "loss": 0.3669, "step": 12270 }, { "epoch": 0.24997455470737914, "grad_norm": 21.51154319462196, "learning_rate": 4.9993893254081344e-06, "loss": 0.3116, "step": 12280 }, { "epoch": 0.2501781170483461, "grad_norm": 2.0608640998901087, "learning_rate": 5.003460489353906e-06, "loss": 0.3296, "step": 12290 }, { "epoch": 0.250381679389313, "grad_norm": 10.986796239708987, "learning_rate": 5.007531653299679e-06, "loss": 0.3236, "step": 12300 }, { "epoch": 0.2505852417302799, "grad_norm": 13.208264409938126, "learning_rate": 5.0116028172454515e-06, "loss": 0.2248, "step": 12310 }, { "epoch": 0.2507888040712468, "grad_norm": 5.491723287307241, "learning_rate": 5.015673981191222e-06, "loss": 0.2381, "step": 12320 }, { "epoch": 0.25099236641221373, "grad_norm": 13.183896347514452, "learning_rate": 5.019745145136995e-06, "loss": 0.3081, "step": 12330 }, { "epoch": 0.25119592875318064, "grad_norm": 16.384246811708916, "learning_rate": 5.023816309082768e-06, "loss": 0.3148, "step": 12340 }, { "epoch": 0.2513994910941476, "grad_norm": 9.926898255947258, "learning_rate": 5.027887473028539e-06, "loss": 0.2079, "step": 12350 }, { "epoch": 0.2516030534351145, "grad_norm": 21.684803268003485, "learning_rate": 5.031958636974311e-06, "loss": 0.2404, "step": 12360 }, { "epoch": 0.25180661577608143, "grad_norm": 10.683688867639166, "learning_rate": 5.036029800920084e-06, "loss": 0.2385, "step": 12370 }, { "epoch": 0.25201017811704834, "grad_norm": 9.280929321880052, "learning_rate": 5.0401009648658565e-06, "loss": 0.321, "step": 12380 }, { "epoch": 0.25221374045801526, "grad_norm": 8.466130293854079, "learning_rate": 5.0441721288116274e-06, "loss": 0.3512, "step": 12390 }, { "epoch": 0.25241730279898217, "grad_norm": 5.20692969110676, "learning_rate": 5.0482432927574e-06, "loss": 0.2976, "step": 12400 }, { "epoch": 0.25262086513994914, "grad_norm": 17.520350125722327, "learning_rate": 5.052314456703173e-06, "loss": 0.3159, "step": 12410 }, { "epoch": 0.25282442748091605, "grad_norm": 7.196936436602528, "learning_rate": 5.056385620648944e-06, "loss": 0.2181, "step": 12420 }, { "epoch": 0.25302798982188296, "grad_norm": 8.80697858449337, "learning_rate": 5.060456784594716e-06, "loss": 0.2499, "step": 12430 }, { "epoch": 0.25323155216284987, "grad_norm": 18.62387218322379, "learning_rate": 5.064527948540489e-06, "loss": 0.2355, "step": 12440 }, { "epoch": 0.2534351145038168, "grad_norm": 19.49231655273745, "learning_rate": 5.06859911248626e-06, "loss": 0.3513, "step": 12450 }, { "epoch": 0.2536386768447837, "grad_norm": 6.141389731204219, "learning_rate": 5.0726702764320324e-06, "loss": 0.2929, "step": 12460 }, { "epoch": 0.25384223918575066, "grad_norm": 10.837317345488716, "learning_rate": 5.076741440377805e-06, "loss": 0.3283, "step": 12470 }, { "epoch": 0.2540458015267176, "grad_norm": 10.537400088310434, "learning_rate": 5.080812604323576e-06, "loss": 0.2657, "step": 12480 }, { "epoch": 0.2542493638676845, "grad_norm": 9.211948536056667, "learning_rate": 5.084883768269349e-06, "loss": 0.3737, "step": 12490 }, { "epoch": 0.2544529262086514, "grad_norm": 14.137319743116986, "learning_rate": 5.088954932215121e-06, "loss": 0.2782, "step": 12500 }, { "epoch": 0.2546564885496183, "grad_norm": 8.067506520159727, "learning_rate": 5.093026096160892e-06, "loss": 0.2793, "step": 12510 }, { "epoch": 0.2548600508905852, "grad_norm": 47.50652989378909, "learning_rate": 5.097097260106665e-06, "loss": 0.2573, "step": 12520 }, { "epoch": 0.2550636132315522, "grad_norm": 34.06237700671897, "learning_rate": 5.1011684240524375e-06, "loss": 0.2999, "step": 12530 }, { "epoch": 0.2552671755725191, "grad_norm": 23.55669976770615, "learning_rate": 5.105239587998208e-06, "loss": 0.3699, "step": 12540 }, { "epoch": 0.255470737913486, "grad_norm": 18.76808995370149, "learning_rate": 5.109310751943981e-06, "loss": 0.3432, "step": 12550 }, { "epoch": 0.2556743002544529, "grad_norm": 21.84322071425197, "learning_rate": 5.113381915889754e-06, "loss": 0.3097, "step": 12560 }, { "epoch": 0.25587786259541984, "grad_norm": 19.98569266494116, "learning_rate": 5.117453079835525e-06, "loss": 0.3688, "step": 12570 }, { "epoch": 0.25608142493638675, "grad_norm": 11.498436235741675, "learning_rate": 5.121524243781297e-06, "loss": 0.3196, "step": 12580 }, { "epoch": 0.2562849872773537, "grad_norm": 25.004842510743497, "learning_rate": 5.12559540772707e-06, "loss": 0.3403, "step": 12590 }, { "epoch": 0.2564885496183206, "grad_norm": 13.634005653548694, "learning_rate": 5.1296665716728425e-06, "loss": 0.3128, "step": 12600 }, { "epoch": 0.25669211195928754, "grad_norm": 19.863289303081963, "learning_rate": 5.133737735618613e-06, "loss": 0.2657, "step": 12610 }, { "epoch": 0.25689567430025445, "grad_norm": 2.846927985868772, "learning_rate": 5.137808899564386e-06, "loss": 0.1932, "step": 12620 }, { "epoch": 0.25709923664122136, "grad_norm": 14.143316960888917, "learning_rate": 5.141880063510159e-06, "loss": 0.3881, "step": 12630 }, { "epoch": 0.2573027989821883, "grad_norm": 15.120656453011465, "learning_rate": 5.14595122745593e-06, "loss": 0.2676, "step": 12640 }, { "epoch": 0.25750636132315524, "grad_norm": 13.142091577864687, "learning_rate": 5.150022391401702e-06, "loss": 0.2316, "step": 12650 }, { "epoch": 0.25770992366412215, "grad_norm": 13.936452740261577, "learning_rate": 5.154093555347475e-06, "loss": 0.2722, "step": 12660 }, { "epoch": 0.25791348600508907, "grad_norm": 3.3151387967255217, "learning_rate": 5.158164719293246e-06, "loss": 0.2743, "step": 12670 }, { "epoch": 0.258117048346056, "grad_norm": 15.92949778890395, "learning_rate": 5.1622358832390184e-06, "loss": 0.3329, "step": 12680 }, { "epoch": 0.2583206106870229, "grad_norm": 27.47922612025451, "learning_rate": 5.166307047184791e-06, "loss": 0.3135, "step": 12690 }, { "epoch": 0.2585241730279898, "grad_norm": 21.25411252219949, "learning_rate": 5.170378211130562e-06, "loss": 0.2412, "step": 12700 }, { "epoch": 0.25872773536895677, "grad_norm": 16.411502236479215, "learning_rate": 5.174449375076335e-06, "loss": 0.2686, "step": 12710 }, { "epoch": 0.2589312977099237, "grad_norm": 28.937883634113206, "learning_rate": 5.178520539022107e-06, "loss": 0.3016, "step": 12720 }, { "epoch": 0.2591348600508906, "grad_norm": 22.317756529023367, "learning_rate": 5.182591702967878e-06, "loss": 0.3267, "step": 12730 }, { "epoch": 0.2593384223918575, "grad_norm": 42.035079574246346, "learning_rate": 5.186662866913651e-06, "loss": 0.2831, "step": 12740 }, { "epoch": 0.2595419847328244, "grad_norm": 6.461513090310909, "learning_rate": 5.1907340308594235e-06, "loss": 0.3017, "step": 12750 }, { "epoch": 0.2597455470737913, "grad_norm": 29.299116661999797, "learning_rate": 5.194805194805194e-06, "loss": 0.244, "step": 12760 }, { "epoch": 0.2599491094147583, "grad_norm": 5.743822979670701, "learning_rate": 5.198876358750967e-06, "loss": 0.2115, "step": 12770 }, { "epoch": 0.2601526717557252, "grad_norm": 7.2506130230738295, "learning_rate": 5.20294752269674e-06, "loss": 0.2102, "step": 12780 }, { "epoch": 0.2603562340966921, "grad_norm": 21.517783428892592, "learning_rate": 5.207018686642511e-06, "loss": 0.3329, "step": 12790 }, { "epoch": 0.26055979643765903, "grad_norm": 23.92603047038119, "learning_rate": 5.211089850588283e-06, "loss": 0.28, "step": 12800 }, { "epoch": 0.26076335877862594, "grad_norm": 23.74740568407295, "learning_rate": 5.215161014534056e-06, "loss": 0.2223, "step": 12810 }, { "epoch": 0.26096692111959285, "grad_norm": 9.14707879721343, "learning_rate": 5.2192321784798285e-06, "loss": 0.2465, "step": 12820 }, { "epoch": 0.2611704834605598, "grad_norm": 15.235331580318508, "learning_rate": 5.223303342425599e-06, "loss": 0.2456, "step": 12830 }, { "epoch": 0.26137404580152673, "grad_norm": 46.19963896320543, "learning_rate": 5.227374506371372e-06, "loss": 0.3265, "step": 12840 }, { "epoch": 0.26157760814249365, "grad_norm": 12.338534688791944, "learning_rate": 5.231445670317145e-06, "loss": 0.3524, "step": 12850 }, { "epoch": 0.26178117048346056, "grad_norm": 14.520329356773997, "learning_rate": 5.235516834262916e-06, "loss": 0.3245, "step": 12860 }, { "epoch": 0.26198473282442747, "grad_norm": 4.201890587218774, "learning_rate": 5.239587998208688e-06, "loss": 0.2454, "step": 12870 }, { "epoch": 0.2621882951653944, "grad_norm": 23.673781182624797, "learning_rate": 5.243659162154461e-06, "loss": 0.3234, "step": 12880 }, { "epoch": 0.26239185750636135, "grad_norm": 9.527367236359265, "learning_rate": 5.247730326100232e-06, "loss": 0.2046, "step": 12890 }, { "epoch": 0.26259541984732826, "grad_norm": 11.233179292215343, "learning_rate": 5.2518014900460044e-06, "loss": 0.3727, "step": 12900 }, { "epoch": 0.26279898218829517, "grad_norm": 13.756577051811933, "learning_rate": 5.255872653991777e-06, "loss": 0.2539, "step": 12910 }, { "epoch": 0.2630025445292621, "grad_norm": 21.398106910549554, "learning_rate": 5.259943817937548e-06, "loss": 0.2782, "step": 12920 }, { "epoch": 0.263206106870229, "grad_norm": 24.471475486772462, "learning_rate": 5.264014981883321e-06, "loss": 0.3249, "step": 12930 }, { "epoch": 0.2634096692111959, "grad_norm": 9.330266730140211, "learning_rate": 5.268086145829093e-06, "loss": 0.2218, "step": 12940 }, { "epoch": 0.2636132315521629, "grad_norm": 19.7811760231199, "learning_rate": 5.272157309774865e-06, "loss": 0.2918, "step": 12950 }, { "epoch": 0.2638167938931298, "grad_norm": 14.049452859869914, "learning_rate": 5.276228473720637e-06, "loss": 0.3079, "step": 12960 }, { "epoch": 0.2640203562340967, "grad_norm": 7.895598266219487, "learning_rate": 5.2802996376664095e-06, "loss": 0.2378, "step": 12970 }, { "epoch": 0.2642239185750636, "grad_norm": 14.193869262935461, "learning_rate": 5.284370801612181e-06, "loss": 0.3359, "step": 12980 }, { "epoch": 0.2644274809160305, "grad_norm": 12.430339770490308, "learning_rate": 5.288441965557953e-06, "loss": 0.278, "step": 12990 }, { "epoch": 0.26463104325699743, "grad_norm": 18.222082655162247, "learning_rate": 5.292513129503726e-06, "loss": 0.2689, "step": 13000 }, { "epoch": 0.2648346055979644, "grad_norm": 8.379697431141027, "learning_rate": 5.2965842934494974e-06, "loss": 0.2688, "step": 13010 }, { "epoch": 0.2650381679389313, "grad_norm": 20.658823740629902, "learning_rate": 5.300655457395269e-06, "loss": 0.2868, "step": 13020 }, { "epoch": 0.2652417302798982, "grad_norm": 23.616557514353314, "learning_rate": 5.304726621341042e-06, "loss": 0.3228, "step": 13030 }, { "epoch": 0.26544529262086514, "grad_norm": 12.170003277398003, "learning_rate": 5.3087977852868145e-06, "loss": 0.1991, "step": 13040 }, { "epoch": 0.26564885496183205, "grad_norm": 27.274755477388677, "learning_rate": 5.312868949232585e-06, "loss": 0.3609, "step": 13050 }, { "epoch": 0.26585241730279896, "grad_norm": 20.26137791162282, "learning_rate": 5.316940113178358e-06, "loss": 0.226, "step": 13060 }, { "epoch": 0.2660559796437659, "grad_norm": 13.681284164275606, "learning_rate": 5.321011277124131e-06, "loss": 0.2946, "step": 13070 }, { "epoch": 0.26625954198473284, "grad_norm": 7.011001713498057, "learning_rate": 5.325082441069902e-06, "loss": 0.2922, "step": 13080 }, { "epoch": 0.26646310432569975, "grad_norm": 5.761972378995442, "learning_rate": 5.329153605015674e-06, "loss": 0.2011, "step": 13090 }, { "epoch": 0.26666666666666666, "grad_norm": 12.947639977683048, "learning_rate": 5.333224768961447e-06, "loss": 0.3611, "step": 13100 }, { "epoch": 0.2668702290076336, "grad_norm": 7.637654909913038, "learning_rate": 5.337295932907219e-06, "loss": 0.288, "step": 13110 }, { "epoch": 0.2670737913486005, "grad_norm": 11.189028624115558, "learning_rate": 5.3413670968529904e-06, "loss": 0.234, "step": 13120 }, { "epoch": 0.26727735368956745, "grad_norm": 20.532550344155986, "learning_rate": 5.345438260798763e-06, "loss": 0.3372, "step": 13130 }, { "epoch": 0.26748091603053437, "grad_norm": 15.155891800730238, "learning_rate": 5.349509424744535e-06, "loss": 0.3505, "step": 13140 }, { "epoch": 0.2676844783715013, "grad_norm": 16.309991093094027, "learning_rate": 5.353580588690307e-06, "loss": 0.2815, "step": 13150 }, { "epoch": 0.2678880407124682, "grad_norm": 18.711018321896628, "learning_rate": 5.357651752636079e-06, "loss": 0.3136, "step": 13160 }, { "epoch": 0.2680916030534351, "grad_norm": 14.9015742021763, "learning_rate": 5.361722916581851e-06, "loss": 0.2761, "step": 13170 }, { "epoch": 0.268295165394402, "grad_norm": 10.850894082997034, "learning_rate": 5.365794080527623e-06, "loss": 0.2497, "step": 13180 }, { "epoch": 0.268498727735369, "grad_norm": 6.733723595164772, "learning_rate": 5.3698652444733954e-06, "loss": 0.2634, "step": 13190 }, { "epoch": 0.2687022900763359, "grad_norm": 13.115244630482646, "learning_rate": 5.373936408419167e-06, "loss": 0.2826, "step": 13200 }, { "epoch": 0.2689058524173028, "grad_norm": 8.0225748516599, "learning_rate": 5.378007572364939e-06, "loss": 0.2175, "step": 13210 }, { "epoch": 0.2691094147582697, "grad_norm": 9.732739243411318, "learning_rate": 5.382078736310712e-06, "loss": 0.3308, "step": 13220 }, { "epoch": 0.26931297709923663, "grad_norm": 48.14926890746906, "learning_rate": 5.386149900256484e-06, "loss": 0.2669, "step": 13230 }, { "epoch": 0.26951653944020354, "grad_norm": 12.541803777307715, "learning_rate": 5.390221064202256e-06, "loss": 0.3252, "step": 13240 }, { "epoch": 0.2697201017811705, "grad_norm": 28.181194300448702, "learning_rate": 5.394292228148028e-06, "loss": 0.3813, "step": 13250 }, { "epoch": 0.2699236641221374, "grad_norm": 16.27447256078624, "learning_rate": 5.3983633920938005e-06, "loss": 0.2493, "step": 13260 }, { "epoch": 0.27012722646310433, "grad_norm": 37.03181178964024, "learning_rate": 5.402434556039572e-06, "loss": 0.2175, "step": 13270 }, { "epoch": 0.27033078880407124, "grad_norm": 4.556087154351204, "learning_rate": 5.406505719985344e-06, "loss": 0.264, "step": 13280 }, { "epoch": 0.27053435114503815, "grad_norm": 19.468037920921795, "learning_rate": 5.410576883931117e-06, "loss": 0.2672, "step": 13290 }, { "epoch": 0.27073791348600507, "grad_norm": 14.654004630850777, "learning_rate": 5.4146480478768884e-06, "loss": 0.238, "step": 13300 }, { "epoch": 0.27094147582697203, "grad_norm": 18.850503285445964, "learning_rate": 5.41871921182266e-06, "loss": 0.2615, "step": 13310 }, { "epoch": 0.27114503816793895, "grad_norm": 9.805605803821559, "learning_rate": 5.422790375768433e-06, "loss": 0.3078, "step": 13320 }, { "epoch": 0.27134860050890586, "grad_norm": 8.541926360059113, "learning_rate": 5.426861539714205e-06, "loss": 0.2333, "step": 13330 }, { "epoch": 0.27155216284987277, "grad_norm": 9.762599126958786, "learning_rate": 5.430932703659976e-06, "loss": 0.341, "step": 13340 }, { "epoch": 0.2717557251908397, "grad_norm": 39.5996782143268, "learning_rate": 5.435003867605749e-06, "loss": 0.3648, "step": 13350 }, { "epoch": 0.2719592875318066, "grad_norm": 31.45639827945936, "learning_rate": 5.439075031551521e-06, "loss": 0.3209, "step": 13360 }, { "epoch": 0.27216284987277356, "grad_norm": 9.30919691045719, "learning_rate": 5.4431461954972935e-06, "loss": 0.1828, "step": 13370 }, { "epoch": 0.27236641221374047, "grad_norm": 25.05277662640774, "learning_rate": 5.447217359443065e-06, "loss": 0.2437, "step": 13380 }, { "epoch": 0.2725699745547074, "grad_norm": 22.478492772646003, "learning_rate": 5.451288523388837e-06, "loss": 0.3421, "step": 13390 }, { "epoch": 0.2727735368956743, "grad_norm": 25.089570309735386, "learning_rate": 5.45535968733461e-06, "loss": 0.3507, "step": 13400 }, { "epoch": 0.2729770992366412, "grad_norm": 20.72307139096729, "learning_rate": 5.4594308512803814e-06, "loss": 0.2933, "step": 13410 }, { "epoch": 0.2731806615776081, "grad_norm": 13.278884858046993, "learning_rate": 5.463502015226153e-06, "loss": 0.277, "step": 13420 }, { "epoch": 0.2733842239185751, "grad_norm": 27.835964274961302, "learning_rate": 5.467573179171926e-06, "loss": 0.3144, "step": 13430 }, { "epoch": 0.273587786259542, "grad_norm": 26.635268299696275, "learning_rate": 5.471644343117698e-06, "loss": 0.356, "step": 13440 }, { "epoch": 0.2737913486005089, "grad_norm": 7.142417147765978, "learning_rate": 5.47571550706347e-06, "loss": 0.3361, "step": 13450 }, { "epoch": 0.2739949109414758, "grad_norm": 13.260449884451996, "learning_rate": 5.479786671009242e-06, "loss": 0.3103, "step": 13460 }, { "epoch": 0.27419847328244273, "grad_norm": 5.671573872082901, "learning_rate": 5.483857834955014e-06, "loss": 0.2289, "step": 13470 }, { "epoch": 0.27440203562340965, "grad_norm": 17.179157599145235, "learning_rate": 5.4879289989007865e-06, "loss": 0.2997, "step": 13480 }, { "epoch": 0.2746055979643766, "grad_norm": 7.94923528648639, "learning_rate": 5.492000162846558e-06, "loss": 0.2653, "step": 13490 }, { "epoch": 0.2748091603053435, "grad_norm": 25.230340303408404, "learning_rate": 5.496071326792331e-06, "loss": 0.2778, "step": 13500 }, { "epoch": 0.27501272264631044, "grad_norm": 5.672356619639747, "learning_rate": 5.500142490738103e-06, "loss": 0.3386, "step": 13510 }, { "epoch": 0.27521628498727735, "grad_norm": 30.238395653459772, "learning_rate": 5.5042136546838744e-06, "loss": 0.2473, "step": 13520 }, { "epoch": 0.27541984732824426, "grad_norm": 2.7750669591321966, "learning_rate": 5.508284818629647e-06, "loss": 0.2379, "step": 13530 }, { "epoch": 0.2756234096692112, "grad_norm": 12.142290840517271, "learning_rate": 5.512355982575419e-06, "loss": 0.3007, "step": 13540 }, { "epoch": 0.27582697201017814, "grad_norm": 27.01468971908195, "learning_rate": 5.516427146521191e-06, "loss": 0.443, "step": 13550 }, { "epoch": 0.27603053435114505, "grad_norm": 41.347181192020265, "learning_rate": 5.520498310466963e-06, "loss": 0.2636, "step": 13560 }, { "epoch": 0.27623409669211196, "grad_norm": 6.828719089053188, "learning_rate": 5.524569474412735e-06, "loss": 0.2682, "step": 13570 }, { "epoch": 0.2764376590330789, "grad_norm": 12.029763666463598, "learning_rate": 5.528640638358507e-06, "loss": 0.3407, "step": 13580 }, { "epoch": 0.2766412213740458, "grad_norm": 14.45652200348726, "learning_rate": 5.5327118023042795e-06, "loss": 0.3159, "step": 13590 }, { "epoch": 0.2768447837150127, "grad_norm": 19.559539809934563, "learning_rate": 5.536782966250051e-06, "loss": 0.3052, "step": 13600 }, { "epoch": 0.27704834605597967, "grad_norm": 5.977212176648015, "learning_rate": 5.540854130195823e-06, "loss": 0.2122, "step": 13610 }, { "epoch": 0.2772519083969466, "grad_norm": 10.834330868599626, "learning_rate": 5.544925294141596e-06, "loss": 0.2412, "step": 13620 }, { "epoch": 0.2774554707379135, "grad_norm": 9.09155180485333, "learning_rate": 5.548996458087368e-06, "loss": 0.2707, "step": 13630 }, { "epoch": 0.2776590330788804, "grad_norm": 16.297473196582192, "learning_rate": 5.553067622033139e-06, "loss": 0.2721, "step": 13640 }, { "epoch": 0.2778625954198473, "grad_norm": 28.253775405664463, "learning_rate": 5.557138785978912e-06, "loss": 0.3082, "step": 13650 }, { "epoch": 0.2780661577608142, "grad_norm": 24.967488380597853, "learning_rate": 5.5612099499246845e-06, "loss": 0.2501, "step": 13660 }, { "epoch": 0.2782697201017812, "grad_norm": 9.145499398521252, "learning_rate": 5.565281113870456e-06, "loss": 0.2336, "step": 13670 }, { "epoch": 0.2784732824427481, "grad_norm": 10.795599550219059, "learning_rate": 5.569352277816228e-06, "loss": 0.2701, "step": 13680 }, { "epoch": 0.278676844783715, "grad_norm": 31.402928586642442, "learning_rate": 5.573423441762001e-06, "loss": 0.2371, "step": 13690 }, { "epoch": 0.27888040712468193, "grad_norm": 12.643808974536936, "learning_rate": 5.5774946057077725e-06, "loss": 0.2761, "step": 13700 }, { "epoch": 0.27908396946564884, "grad_norm": 13.202854351531577, "learning_rate": 5.581565769653544e-06, "loss": 0.2812, "step": 13710 }, { "epoch": 0.27928753180661575, "grad_norm": 4.568053408873063, "learning_rate": 5.585636933599317e-06, "loss": 0.2944, "step": 13720 }, { "epoch": 0.2794910941475827, "grad_norm": 6.490346476819247, "learning_rate": 5.589708097545089e-06, "loss": 0.2831, "step": 13730 }, { "epoch": 0.27969465648854963, "grad_norm": 13.893296285105219, "learning_rate": 5.5937792614908604e-06, "loss": 0.3025, "step": 13740 }, { "epoch": 0.27989821882951654, "grad_norm": 15.579785960783148, "learning_rate": 5.597850425436633e-06, "loss": 0.2729, "step": 13750 }, { "epoch": 0.28010178117048345, "grad_norm": 31.16042689628481, "learning_rate": 5.601921589382406e-06, "loss": 0.3674, "step": 13760 }, { "epoch": 0.28030534351145037, "grad_norm": 13.196573411158301, "learning_rate": 5.605992753328177e-06, "loss": 0.3035, "step": 13770 }, { "epoch": 0.2805089058524173, "grad_norm": 14.373036576511385, "learning_rate": 5.610063917273949e-06, "loss": 0.2642, "step": 13780 }, { "epoch": 0.28071246819338425, "grad_norm": 10.755929390886898, "learning_rate": 5.614135081219722e-06, "loss": 0.2078, "step": 13790 }, { "epoch": 0.28091603053435116, "grad_norm": 33.58328272128699, "learning_rate": 5.618206245165493e-06, "loss": 0.342, "step": 13800 }, { "epoch": 0.28111959287531807, "grad_norm": 21.090921580191356, "learning_rate": 5.6222774091112654e-06, "loss": 0.3201, "step": 13810 }, { "epoch": 0.281323155216285, "grad_norm": 28.813815494620773, "learning_rate": 5.626348573057038e-06, "loss": 0.2928, "step": 13820 }, { "epoch": 0.2815267175572519, "grad_norm": 8.26613066567604, "learning_rate": 5.630419737002809e-06, "loss": 0.3086, "step": 13830 }, { "epoch": 0.2817302798982188, "grad_norm": 14.078035200874739, "learning_rate": 5.634490900948582e-06, "loss": 0.2379, "step": 13840 }, { "epoch": 0.2819338422391858, "grad_norm": 5.372490217606332, "learning_rate": 5.638562064894354e-06, "loss": 0.1838, "step": 13850 }, { "epoch": 0.2821374045801527, "grad_norm": 19.46825913774154, "learning_rate": 5.642633228840125e-06, "loss": 0.2882, "step": 13860 }, { "epoch": 0.2823409669211196, "grad_norm": 19.737763211301516, "learning_rate": 5.646704392785898e-06, "loss": 0.4191, "step": 13870 }, { "epoch": 0.2825445292620865, "grad_norm": 14.47707134535483, "learning_rate": 5.6507755567316705e-06, "loss": 0.2736, "step": 13880 }, { "epoch": 0.2827480916030534, "grad_norm": 22.019415564477747, "learning_rate": 5.654846720677442e-06, "loss": 0.2943, "step": 13890 }, { "epoch": 0.28295165394402033, "grad_norm": 11.367725596736966, "learning_rate": 5.658917884623214e-06, "loss": 0.2871, "step": 13900 }, { "epoch": 0.2831552162849873, "grad_norm": 14.222661681630733, "learning_rate": 5.662989048568987e-06, "loss": 0.3561, "step": 13910 }, { "epoch": 0.2833587786259542, "grad_norm": 10.19812662193727, "learning_rate": 5.667060212514759e-06, "loss": 0.2197, "step": 13920 }, { "epoch": 0.2835623409669211, "grad_norm": 13.309292151772706, "learning_rate": 5.67113137646053e-06, "loss": 0.2127, "step": 13930 }, { "epoch": 0.28376590330788803, "grad_norm": 16.220413988745143, "learning_rate": 5.675202540406303e-06, "loss": 0.2188, "step": 13940 }, { "epoch": 0.28396946564885495, "grad_norm": 23.36059936767486, "learning_rate": 5.6792737043520755e-06, "loss": 0.313, "step": 13950 }, { "epoch": 0.28417302798982186, "grad_norm": 29.504981845704098, "learning_rate": 5.683344868297846e-06, "loss": 0.3654, "step": 13960 }, { "epoch": 0.2843765903307888, "grad_norm": 5.4026559319836585, "learning_rate": 5.687416032243619e-06, "loss": 0.3137, "step": 13970 }, { "epoch": 0.28458015267175574, "grad_norm": 6.2462079057686095, "learning_rate": 5.691487196189392e-06, "loss": 0.2917, "step": 13980 }, { "epoch": 0.28478371501272265, "grad_norm": 11.173892520140178, "learning_rate": 5.695558360135163e-06, "loss": 0.2791, "step": 13990 }, { "epoch": 0.28498727735368956, "grad_norm": 9.888863866890835, "learning_rate": 5.699629524080935e-06, "loss": 0.2949, "step": 14000 }, { "epoch": 0.2851908396946565, "grad_norm": 12.89238396411845, "learning_rate": 5.703700688026708e-06, "loss": 0.3789, "step": 14010 }, { "epoch": 0.2853944020356234, "grad_norm": 9.672072019333104, "learning_rate": 5.707771851972479e-06, "loss": 0.2736, "step": 14020 }, { "epoch": 0.28559796437659035, "grad_norm": 12.575666074709694, "learning_rate": 5.7118430159182514e-06, "loss": 0.2637, "step": 14030 }, { "epoch": 0.28580152671755726, "grad_norm": 18.397408108716007, "learning_rate": 5.715914179864024e-06, "loss": 0.2821, "step": 14040 }, { "epoch": 0.2860050890585242, "grad_norm": 3.85390317794873, "learning_rate": 5.719985343809795e-06, "loss": 0.3228, "step": 14050 }, { "epoch": 0.2862086513994911, "grad_norm": 22.230882237792766, "learning_rate": 5.724056507755568e-06, "loss": 0.3068, "step": 14060 }, { "epoch": 0.286412213740458, "grad_norm": 9.404381150669, "learning_rate": 5.72812767170134e-06, "loss": 0.2257, "step": 14070 }, { "epoch": 0.2866157760814249, "grad_norm": 18.427646670128485, "learning_rate": 5.732198835647111e-06, "loss": 0.3071, "step": 14080 }, { "epoch": 0.2868193384223919, "grad_norm": 20.109056198214777, "learning_rate": 5.736269999592884e-06, "loss": 0.272, "step": 14090 }, { "epoch": 0.2870229007633588, "grad_norm": 10.50735459328375, "learning_rate": 5.7403411635386565e-06, "loss": 0.3641, "step": 14100 }, { "epoch": 0.2872264631043257, "grad_norm": 9.323954973266048, "learning_rate": 5.744412327484429e-06, "loss": 0.2933, "step": 14110 }, { "epoch": 0.2874300254452926, "grad_norm": 17.209459003615752, "learning_rate": 5.7484834914302e-06, "loss": 0.2649, "step": 14120 }, { "epoch": 0.2876335877862595, "grad_norm": 19.57905606400601, "learning_rate": 5.752554655375973e-06, "loss": 0.2912, "step": 14130 }, { "epoch": 0.28783715012722644, "grad_norm": 14.891811128064594, "learning_rate": 5.756625819321745e-06, "loss": 0.2859, "step": 14140 }, { "epoch": 0.2880407124681934, "grad_norm": 30.008220101490796, "learning_rate": 5.760696983267516e-06, "loss": 0.2546, "step": 14150 }, { "epoch": 0.2882442748091603, "grad_norm": 10.441622890439469, "learning_rate": 5.764768147213289e-06, "loss": 0.2, "step": 14160 }, { "epoch": 0.28844783715012723, "grad_norm": 14.806309520332201, "learning_rate": 5.7688393111590615e-06, "loss": 0.2875, "step": 14170 }, { "epoch": 0.28865139949109414, "grad_norm": 13.47779246780832, "learning_rate": 5.772910475104832e-06, "loss": 0.3011, "step": 14180 }, { "epoch": 0.28885496183206105, "grad_norm": 30.134005297114722, "learning_rate": 5.776981639050605e-06, "loss": 0.2659, "step": 14190 }, { "epoch": 0.28905852417302796, "grad_norm": 17.14098212206586, "learning_rate": 5.781052802996378e-06, "loss": 0.3475, "step": 14200 }, { "epoch": 0.28926208651399493, "grad_norm": 8.918280317296723, "learning_rate": 5.785123966942149e-06, "loss": 0.2539, "step": 14210 }, { "epoch": 0.28946564885496184, "grad_norm": 15.77790182182856, "learning_rate": 5.789195130887921e-06, "loss": 0.2995, "step": 14220 }, { "epoch": 0.28966921119592876, "grad_norm": 14.54936497865066, "learning_rate": 5.793266294833694e-06, "loss": 0.2677, "step": 14230 }, { "epoch": 0.28987277353689567, "grad_norm": 13.794543866089645, "learning_rate": 5.797337458779465e-06, "loss": 0.2742, "step": 14240 }, { "epoch": 0.2900763358778626, "grad_norm": 2.995340114811141, "learning_rate": 5.8014086227252374e-06, "loss": 0.2832, "step": 14250 }, { "epoch": 0.2902798982188295, "grad_norm": 9.000083847419173, "learning_rate": 5.80547978667101e-06, "loss": 0.2728, "step": 14260 }, { "epoch": 0.29048346055979646, "grad_norm": 14.857367787493871, "learning_rate": 5.809550950616781e-06, "loss": 0.2824, "step": 14270 }, { "epoch": 0.29068702290076337, "grad_norm": 8.639136774286012, "learning_rate": 5.813622114562554e-06, "loss": 0.3426, "step": 14280 }, { "epoch": 0.2908905852417303, "grad_norm": 15.533549602240967, "learning_rate": 5.817693278508326e-06, "loss": 0.2216, "step": 14290 }, { "epoch": 0.2910941475826972, "grad_norm": 22.587949109882075, "learning_rate": 5.821764442454097e-06, "loss": 0.2864, "step": 14300 }, { "epoch": 0.2912977099236641, "grad_norm": 15.05127153197699, "learning_rate": 5.82583560639987e-06, "loss": 0.3162, "step": 14310 }, { "epoch": 0.291501272264631, "grad_norm": 7.095462987873233, "learning_rate": 5.8299067703456425e-06, "loss": 0.2446, "step": 14320 }, { "epoch": 0.291704834605598, "grad_norm": 10.181954339080642, "learning_rate": 5.833977934291415e-06, "loss": 0.3361, "step": 14330 }, { "epoch": 0.2919083969465649, "grad_norm": 13.494432955912703, "learning_rate": 5.838049098237186e-06, "loss": 0.1863, "step": 14340 }, { "epoch": 0.2921119592875318, "grad_norm": 16.48966579751534, "learning_rate": 5.842120262182959e-06, "loss": 0.3319, "step": 14350 }, { "epoch": 0.2923155216284987, "grad_norm": 32.00293325407916, "learning_rate": 5.846191426128731e-06, "loss": 0.2973, "step": 14360 }, { "epoch": 0.29251908396946563, "grad_norm": 30.0477643844165, "learning_rate": 5.850262590074502e-06, "loss": 0.3517, "step": 14370 }, { "epoch": 0.29272264631043254, "grad_norm": 7.491275183952736, "learning_rate": 5.854333754020275e-06, "loss": 0.2418, "step": 14380 }, { "epoch": 0.2929262086513995, "grad_norm": 16.043475328684178, "learning_rate": 5.8584049179660475e-06, "loss": 0.3121, "step": 14390 }, { "epoch": 0.2931297709923664, "grad_norm": 12.632566392623108, "learning_rate": 5.862476081911818e-06, "loss": 0.2525, "step": 14400 }, { "epoch": 0.29333333333333333, "grad_norm": 13.126465218668962, "learning_rate": 5.866547245857591e-06, "loss": 0.254, "step": 14410 }, { "epoch": 0.29353689567430025, "grad_norm": 11.785763937669454, "learning_rate": 5.870618409803364e-06, "loss": 0.2897, "step": 14420 }, { "epoch": 0.29374045801526716, "grad_norm": 6.486663425717412, "learning_rate": 5.874689573749135e-06, "loss": 0.2145, "step": 14430 }, { "epoch": 0.29394402035623407, "grad_norm": 5.045196402381943, "learning_rate": 5.878760737694907e-06, "loss": 0.2765, "step": 14440 }, { "epoch": 0.29414758269720104, "grad_norm": 16.85395887420515, "learning_rate": 5.88283190164068e-06, "loss": 0.2174, "step": 14450 }, { "epoch": 0.29435114503816795, "grad_norm": 24.143938608956905, "learning_rate": 5.886903065586451e-06, "loss": 0.2393, "step": 14460 }, { "epoch": 0.29455470737913486, "grad_norm": 19.3306231490812, "learning_rate": 5.8909742295322234e-06, "loss": 0.215, "step": 14470 }, { "epoch": 0.2947582697201018, "grad_norm": 16.802170712938697, "learning_rate": 5.895045393477996e-06, "loss": 0.32, "step": 14480 }, { "epoch": 0.2949618320610687, "grad_norm": 8.045860060173895, "learning_rate": 5.899116557423768e-06, "loss": 0.2866, "step": 14490 }, { "epoch": 0.2951653944020356, "grad_norm": 3.3341737369734328, "learning_rate": 5.90318772136954e-06, "loss": 0.2111, "step": 14500 }, { "epoch": 0.29536895674300256, "grad_norm": 7.248068602024194, "learning_rate": 5.907258885315312e-06, "loss": 0.3231, "step": 14510 }, { "epoch": 0.2955725190839695, "grad_norm": 39.867720230261256, "learning_rate": 5.911330049261085e-06, "loss": 0.2643, "step": 14520 }, { "epoch": 0.2957760814249364, "grad_norm": 7.91271739896479, "learning_rate": 5.915401213206856e-06, "loss": 0.2999, "step": 14530 }, { "epoch": 0.2959796437659033, "grad_norm": 14.867541560792963, "learning_rate": 5.9194723771526284e-06, "loss": 0.2549, "step": 14540 }, { "epoch": 0.2961832061068702, "grad_norm": 14.746340291523852, "learning_rate": 5.923543541098401e-06, "loss": 0.2662, "step": 14550 }, { "epoch": 0.2963867684478371, "grad_norm": 16.39374209535279, "learning_rate": 5.927614705044172e-06, "loss": 0.2625, "step": 14560 }, { "epoch": 0.2965903307888041, "grad_norm": 11.273975923003366, "learning_rate": 5.931685868989945e-06, "loss": 0.2539, "step": 14570 }, { "epoch": 0.296793893129771, "grad_norm": 23.756039963415617, "learning_rate": 5.935757032935717e-06, "loss": 0.2819, "step": 14580 }, { "epoch": 0.2969974554707379, "grad_norm": 14.644793630296048, "learning_rate": 5.939828196881488e-06, "loss": 0.3084, "step": 14590 }, { "epoch": 0.2972010178117048, "grad_norm": 13.129248651818425, "learning_rate": 5.943899360827261e-06, "loss": 0.3066, "step": 14600 }, { "epoch": 0.29740458015267174, "grad_norm": 13.60854042216258, "learning_rate": 5.9479705247730335e-06, "loss": 0.34, "step": 14610 }, { "epoch": 0.29760814249363865, "grad_norm": 0.007402855038016974, "learning_rate": 5.952041688718805e-06, "loss": 0.1977, "step": 14620 }, { "epoch": 0.2978117048346056, "grad_norm": 9.639638914129625, "learning_rate": 5.956112852664577e-06, "loss": 0.2992, "step": 14630 }, { "epoch": 0.29801526717557253, "grad_norm": 10.347194857024043, "learning_rate": 5.96018401661035e-06, "loss": 0.3788, "step": 14640 }, { "epoch": 0.29821882951653944, "grad_norm": 6.2074363829880195, "learning_rate": 5.9642551805561214e-06, "loss": 0.3443, "step": 14650 }, { "epoch": 0.29842239185750635, "grad_norm": 10.323589471804869, "learning_rate": 5.968326344501893e-06, "loss": 0.2278, "step": 14660 }, { "epoch": 0.29862595419847326, "grad_norm": 19.40256806009853, "learning_rate": 5.972397508447666e-06, "loss": 0.3483, "step": 14670 }, { "epoch": 0.2988295165394402, "grad_norm": 14.881409861238605, "learning_rate": 5.976468672393438e-06, "loss": 0.2986, "step": 14680 }, { "epoch": 0.29903307888040714, "grad_norm": 10.172726435719202, "learning_rate": 5.980539836339209e-06, "loss": 0.2563, "step": 14690 }, { "epoch": 0.29923664122137406, "grad_norm": 4.597640084845402, "learning_rate": 5.984611000284982e-06, "loss": 0.2164, "step": 14700 }, { "epoch": 0.29944020356234097, "grad_norm": 16.483739204031387, "learning_rate": 5.988682164230754e-06, "loss": 0.4006, "step": 14710 }, { "epoch": 0.2996437659033079, "grad_norm": 15.45200990882687, "learning_rate": 5.992753328176526e-06, "loss": 0.3316, "step": 14720 }, { "epoch": 0.2998473282442748, "grad_norm": 11.608429787990458, "learning_rate": 5.996824492122298e-06, "loss": 0.287, "step": 14730 }, { "epoch": 0.3000508905852417, "grad_norm": 8.092622569312187, "learning_rate": 6.000895656068071e-06, "loss": 0.2596, "step": 14740 }, { "epoch": 0.30025445292620867, "grad_norm": 14.555028767504364, "learning_rate": 6.004966820013843e-06, "loss": 0.2705, "step": 14750 }, { "epoch": 0.3004580152671756, "grad_norm": 11.008492744032242, "learning_rate": 6.0090379839596144e-06, "loss": 0.3523, "step": 14760 }, { "epoch": 0.3006615776081425, "grad_norm": 32.7084881237034, "learning_rate": 6.013109147905387e-06, "loss": 0.2982, "step": 14770 }, { "epoch": 0.3008651399491094, "grad_norm": 15.150641397235015, "learning_rate": 6.017180311851159e-06, "loss": 0.3304, "step": 14780 }, { "epoch": 0.3010687022900763, "grad_norm": 19.779595038545157, "learning_rate": 6.021251475796931e-06, "loss": 0.3598, "step": 14790 }, { "epoch": 0.30127226463104323, "grad_norm": 16.8433817796413, "learning_rate": 6.025322639742703e-06, "loss": 0.2556, "step": 14800 }, { "epoch": 0.3014758269720102, "grad_norm": 42.212773159581346, "learning_rate": 6.029393803688475e-06, "loss": 0.2557, "step": 14810 }, { "epoch": 0.3016793893129771, "grad_norm": 21.906438539829924, "learning_rate": 6.033464967634247e-06, "loss": 0.3247, "step": 14820 }, { "epoch": 0.301882951653944, "grad_norm": 5.666799273974501, "learning_rate": 6.0375361315800195e-06, "loss": 0.294, "step": 14830 }, { "epoch": 0.30208651399491093, "grad_norm": 4.836824016411176, "learning_rate": 6.041607295525791e-06, "loss": 0.348, "step": 14840 }, { "epoch": 0.30229007633587784, "grad_norm": 17.657010768755708, "learning_rate": 6.045678459471563e-06, "loss": 0.2837, "step": 14850 }, { "epoch": 0.30249363867684476, "grad_norm": 24.190908503882927, "learning_rate": 6.049749623417336e-06, "loss": 0.1873, "step": 14860 }, { "epoch": 0.3026972010178117, "grad_norm": 16.83481048962213, "learning_rate": 6.0538207873631074e-06, "loss": 0.2546, "step": 14870 }, { "epoch": 0.30290076335877864, "grad_norm": 8.854224569913047, "learning_rate": 6.057891951308879e-06, "loss": 0.2185, "step": 14880 }, { "epoch": 0.30310432569974555, "grad_norm": 4.394587759245087, "learning_rate": 6.061963115254652e-06, "loss": 0.3769, "step": 14890 }, { "epoch": 0.30330788804071246, "grad_norm": 1.9061206459391162, "learning_rate": 6.066034279200424e-06, "loss": 0.2868, "step": 14900 }, { "epoch": 0.30351145038167937, "grad_norm": 33.26458602785583, "learning_rate": 6.070105443146196e-06, "loss": 0.3619, "step": 14910 }, { "epoch": 0.30371501272264634, "grad_norm": 10.809841388718237, "learning_rate": 6.074176607091968e-06, "loss": 0.3459, "step": 14920 }, { "epoch": 0.30391857506361325, "grad_norm": 14.066198623251154, "learning_rate": 6.07824777103774e-06, "loss": 0.3094, "step": 14930 }, { "epoch": 0.30412213740458016, "grad_norm": 5.712886066318989, "learning_rate": 6.0823189349835125e-06, "loss": 0.2571, "step": 14940 }, { "epoch": 0.3043256997455471, "grad_norm": 9.771746857497815, "learning_rate": 6.086390098929284e-06, "loss": 0.3057, "step": 14950 }, { "epoch": 0.304529262086514, "grad_norm": 14.564536386268761, "learning_rate": 6.090461262875057e-06, "loss": 0.2654, "step": 14960 }, { "epoch": 0.3047328244274809, "grad_norm": 15.83590366040503, "learning_rate": 6.094532426820829e-06, "loss": 0.2306, "step": 14970 }, { "epoch": 0.30493638676844786, "grad_norm": 12.043682050278383, "learning_rate": 6.0986035907666004e-06, "loss": 0.2449, "step": 14980 }, { "epoch": 0.3051399491094148, "grad_norm": 12.149112099953959, "learning_rate": 6.102674754712373e-06, "loss": 0.2569, "step": 14990 }, { "epoch": 0.3053435114503817, "grad_norm": 11.492798401194166, "learning_rate": 6.106745918658145e-06, "loss": 0.3272, "step": 15000 }, { "epoch": 0.3055470737913486, "grad_norm": 5.432513363229565, "learning_rate": 6.110817082603917e-06, "loss": 0.3012, "step": 15010 }, { "epoch": 0.3057506361323155, "grad_norm": 17.78728420688418, "learning_rate": 6.114888246549689e-06, "loss": 0.291, "step": 15020 }, { "epoch": 0.3059541984732824, "grad_norm": 6.598800101936732, "learning_rate": 6.118959410495461e-06, "loss": 0.3066, "step": 15030 }, { "epoch": 0.3061577608142494, "grad_norm": 21.111561326503846, "learning_rate": 6.123030574441234e-06, "loss": 0.3364, "step": 15040 }, { "epoch": 0.3063613231552163, "grad_norm": 3.9880042350121863, "learning_rate": 6.1271017383870055e-06, "loss": 0.2399, "step": 15050 }, { "epoch": 0.3065648854961832, "grad_norm": 9.297942372581614, "learning_rate": 6.131172902332777e-06, "loss": 0.2685, "step": 15060 }, { "epoch": 0.3067684478371501, "grad_norm": 11.814388029689564, "learning_rate": 6.13524406627855e-06, "loss": 0.3471, "step": 15070 }, { "epoch": 0.30697201017811704, "grad_norm": 6.440337056665507, "learning_rate": 6.139315230224322e-06, "loss": 0.2523, "step": 15080 }, { "epoch": 0.30717557251908395, "grad_norm": 28.724995281169964, "learning_rate": 6.1433863941700934e-06, "loss": 0.2941, "step": 15090 }, { "epoch": 0.3073791348600509, "grad_norm": 14.42119692532089, "learning_rate": 6.147457558115866e-06, "loss": 0.3262, "step": 15100 }, { "epoch": 0.30758269720101783, "grad_norm": 12.16101168450281, "learning_rate": 6.151528722061638e-06, "loss": 0.2465, "step": 15110 }, { "epoch": 0.30778625954198474, "grad_norm": 3.553115065121232, "learning_rate": 6.15559988600741e-06, "loss": 0.2453, "step": 15120 }, { "epoch": 0.30798982188295165, "grad_norm": 15.895141482544458, "learning_rate": 6.159671049953182e-06, "loss": 0.2585, "step": 15130 }, { "epoch": 0.30819338422391857, "grad_norm": 23.784649768261744, "learning_rate": 6.163742213898954e-06, "loss": 0.1934, "step": 15140 }, { "epoch": 0.3083969465648855, "grad_norm": 10.127312931590943, "learning_rate": 6.167813377844726e-06, "loss": 0.4125, "step": 15150 }, { "epoch": 0.30860050890585244, "grad_norm": 10.458288146891546, "learning_rate": 6.1718845417904985e-06, "loss": 0.2807, "step": 15160 }, { "epoch": 0.30880407124681936, "grad_norm": 8.657401836897709, "learning_rate": 6.175955705736271e-06, "loss": 0.3441, "step": 15170 }, { "epoch": 0.30900763358778627, "grad_norm": 10.31022886990181, "learning_rate": 6.180026869682043e-06, "loss": 0.272, "step": 15180 }, { "epoch": 0.3092111959287532, "grad_norm": 17.35500323758872, "learning_rate": 6.184098033627815e-06, "loss": 0.306, "step": 15190 }, { "epoch": 0.3094147582697201, "grad_norm": 9.11604462194753, "learning_rate": 6.188169197573587e-06, "loss": 0.4157, "step": 15200 }, { "epoch": 0.309618320610687, "grad_norm": 10.658203564490751, "learning_rate": 6.192240361519359e-06, "loss": 0.2818, "step": 15210 }, { "epoch": 0.30982188295165397, "grad_norm": 15.216309703505493, "learning_rate": 6.196311525465131e-06, "loss": 0.2724, "step": 15220 }, { "epoch": 0.3100254452926209, "grad_norm": 9.705382381135456, "learning_rate": 6.2003826894109035e-06, "loss": 0.2607, "step": 15230 }, { "epoch": 0.3102290076335878, "grad_norm": 11.904228714115778, "learning_rate": 6.204453853356675e-06, "loss": 0.3518, "step": 15240 }, { "epoch": 0.3104325699745547, "grad_norm": 12.6051089974717, "learning_rate": 6.208525017302447e-06, "loss": 0.2899, "step": 15250 }, { "epoch": 0.3106361323155216, "grad_norm": 10.577242161216901, "learning_rate": 6.21259618124822e-06, "loss": 0.3068, "step": 15260 }, { "epoch": 0.31083969465648853, "grad_norm": 15.701997386230897, "learning_rate": 6.2166673451939914e-06, "loss": 0.3061, "step": 15270 }, { "epoch": 0.3110432569974555, "grad_norm": 12.656423745743346, "learning_rate": 6.220738509139763e-06, "loss": 0.3077, "step": 15280 }, { "epoch": 0.3112468193384224, "grad_norm": 41.68517070075623, "learning_rate": 6.224809673085536e-06, "loss": 0.3181, "step": 15290 }, { "epoch": 0.3114503816793893, "grad_norm": 14.23882579833358, "learning_rate": 6.2288808370313085e-06, "loss": 0.2722, "step": 15300 }, { "epoch": 0.31165394402035623, "grad_norm": 6.443726338021155, "learning_rate": 6.2329520009770794e-06, "loss": 0.3701, "step": 15310 }, { "epoch": 0.31185750636132314, "grad_norm": 4.843637825767856, "learning_rate": 6.237023164922852e-06, "loss": 0.3038, "step": 15320 }, { "epoch": 0.31206106870229006, "grad_norm": 5.986397155401847, "learning_rate": 6.241094328868625e-06, "loss": 0.3112, "step": 15330 }, { "epoch": 0.312264631043257, "grad_norm": 12.920954999507511, "learning_rate": 6.245165492814396e-06, "loss": 0.2758, "step": 15340 }, { "epoch": 0.31246819338422394, "grad_norm": 34.7147593201713, "learning_rate": 6.249236656760168e-06, "loss": 0.3403, "step": 15350 }, { "epoch": 0.31267175572519085, "grad_norm": 7.112832896073242, "learning_rate": 6.253307820705941e-06, "loss": 0.3629, "step": 15360 }, { "epoch": 0.31287531806615776, "grad_norm": 15.45994586403425, "learning_rate": 6.257378984651712e-06, "loss": 0.2529, "step": 15370 }, { "epoch": 0.31307888040712467, "grad_norm": 5.941809069028679, "learning_rate": 6.2614501485974844e-06, "loss": 0.299, "step": 15380 }, { "epoch": 0.3132824427480916, "grad_norm": 6.8179097794380645, "learning_rate": 6.265521312543257e-06, "loss": 0.1977, "step": 15390 }, { "epoch": 0.31348600508905855, "grad_norm": 26.22196623604495, "learning_rate": 6.269592476489029e-06, "loss": 0.2456, "step": 15400 }, { "epoch": 0.31368956743002546, "grad_norm": 0.7972662622785884, "learning_rate": 6.273663640434801e-06, "loss": 0.2913, "step": 15410 }, { "epoch": 0.3138931297709924, "grad_norm": 15.603677795159175, "learning_rate": 6.277734804380573e-06, "loss": 0.4532, "step": 15420 }, { "epoch": 0.3140966921119593, "grad_norm": 18.522802901559977, "learning_rate": 6.281805968326346e-06, "loss": 0.3251, "step": 15430 }, { "epoch": 0.3143002544529262, "grad_norm": 12.404287482791858, "learning_rate": 6.285877132272117e-06, "loss": 0.2867, "step": 15440 }, { "epoch": 0.3145038167938931, "grad_norm": 6.598345809626119, "learning_rate": 6.2899482962178895e-06, "loss": 0.2532, "step": 15450 }, { "epoch": 0.3147073791348601, "grad_norm": 12.832690101550408, "learning_rate": 6.294019460163662e-06, "loss": 0.2562, "step": 15460 }, { "epoch": 0.314910941475827, "grad_norm": 14.408400892112434, "learning_rate": 6.298090624109433e-06, "loss": 0.2729, "step": 15470 }, { "epoch": 0.3151145038167939, "grad_norm": 9.598499219148488, "learning_rate": 6.302161788055206e-06, "loss": 0.2672, "step": 15480 }, { "epoch": 0.3153180661577608, "grad_norm": 15.775231791870194, "learning_rate": 6.306232952000978e-06, "loss": 0.3559, "step": 15490 }, { "epoch": 0.3155216284987277, "grad_norm": 9.320499243417913, "learning_rate": 6.310304115946749e-06, "loss": 0.2403, "step": 15500 }, { "epoch": 0.31572519083969464, "grad_norm": 15.192022262352975, "learning_rate": 6.314375279892522e-06, "loss": 0.2918, "step": 15510 }, { "epoch": 0.3159287531806616, "grad_norm": 9.088965949587317, "learning_rate": 6.3184464438382945e-06, "loss": 0.2515, "step": 15520 }, { "epoch": 0.3161323155216285, "grad_norm": 38.494851440772784, "learning_rate": 6.322517607784065e-06, "loss": 0.2778, "step": 15530 }, { "epoch": 0.3163358778625954, "grad_norm": 6.71017765742439, "learning_rate": 6.326588771729838e-06, "loss": 0.3273, "step": 15540 }, { "epoch": 0.31653944020356234, "grad_norm": 20.2092618265239, "learning_rate": 6.330659935675611e-06, "loss": 0.2918, "step": 15550 }, { "epoch": 0.31674300254452925, "grad_norm": 4.878462944703424, "learning_rate": 6.334731099621382e-06, "loss": 0.2787, "step": 15560 }, { "epoch": 0.31694656488549616, "grad_norm": 16.220809673197447, "learning_rate": 6.338802263567154e-06, "loss": 0.2669, "step": 15570 }, { "epoch": 0.31715012722646313, "grad_norm": 29.633073566692918, "learning_rate": 6.342873427512927e-06, "loss": 0.2609, "step": 15580 }, { "epoch": 0.31735368956743004, "grad_norm": 4.7981395674998994, "learning_rate": 6.346944591458698e-06, "loss": 0.2209, "step": 15590 }, { "epoch": 0.31755725190839695, "grad_norm": 23.11519196031633, "learning_rate": 6.3510157554044704e-06, "loss": 0.3319, "step": 15600 }, { "epoch": 0.31776081424936387, "grad_norm": 49.45238630072592, "learning_rate": 6.355086919350243e-06, "loss": 0.242, "step": 15610 }, { "epoch": 0.3179643765903308, "grad_norm": 19.4972912341996, "learning_rate": 6.359158083296016e-06, "loss": 0.3216, "step": 15620 }, { "epoch": 0.3181679389312977, "grad_norm": 30.149790837853384, "learning_rate": 6.363229247241787e-06, "loss": 0.2003, "step": 15630 }, { "epoch": 0.31837150127226466, "grad_norm": 28.901478139139382, "learning_rate": 6.367300411187559e-06, "loss": 0.3008, "step": 15640 }, { "epoch": 0.31857506361323157, "grad_norm": 16.93475635961996, "learning_rate": 6.371371575133332e-06, "loss": 0.3356, "step": 15650 }, { "epoch": 0.3187786259541985, "grad_norm": 6.145648370929582, "learning_rate": 6.375442739079103e-06, "loss": 0.208, "step": 15660 }, { "epoch": 0.3189821882951654, "grad_norm": 24.31583749935642, "learning_rate": 6.3795139030248755e-06, "loss": 0.2867, "step": 15670 }, { "epoch": 0.3191857506361323, "grad_norm": 18.08455571791079, "learning_rate": 6.383585066970648e-06, "loss": 0.3846, "step": 15680 }, { "epoch": 0.3193893129770992, "grad_norm": 10.559930230081209, "learning_rate": 6.387656230916419e-06, "loss": 0.3253, "step": 15690 }, { "epoch": 0.3195928753180662, "grad_norm": 6.989767317343461, "learning_rate": 6.391727394862192e-06, "loss": 0.3409, "step": 15700 }, { "epoch": 0.3197964376590331, "grad_norm": 11.474916254091951, "learning_rate": 6.395798558807964e-06, "loss": 0.2486, "step": 15710 }, { "epoch": 0.32, "grad_norm": 7.003657902262818, "learning_rate": 6.399869722753735e-06, "loss": 0.3531, "step": 15720 }, { "epoch": 0.3202035623409669, "grad_norm": 13.685222885119043, "learning_rate": 6.403940886699508e-06, "loss": 0.3892, "step": 15730 }, { "epoch": 0.32040712468193383, "grad_norm": 5.306419344977999, "learning_rate": 6.4080120506452805e-06, "loss": 0.2537, "step": 15740 }, { "epoch": 0.32061068702290074, "grad_norm": 9.002743222179415, "learning_rate": 6.412083214591051e-06, "loss": 0.3239, "step": 15750 }, { "epoch": 0.3208142493638677, "grad_norm": 12.821654227225672, "learning_rate": 6.416154378536824e-06, "loss": 0.2311, "step": 15760 }, { "epoch": 0.3210178117048346, "grad_norm": 10.81817160825353, "learning_rate": 6.420225542482597e-06, "loss": 0.3118, "step": 15770 }, { "epoch": 0.32122137404580153, "grad_norm": 13.516351437970116, "learning_rate": 6.424296706428368e-06, "loss": 0.3314, "step": 15780 }, { "epoch": 0.32142493638676845, "grad_norm": 8.032918883437882, "learning_rate": 6.42836787037414e-06, "loss": 0.3567, "step": 15790 }, { "epoch": 0.32162849872773536, "grad_norm": 10.54201317502914, "learning_rate": 6.432439034319913e-06, "loss": 0.2931, "step": 15800 }, { "epoch": 0.32183206106870227, "grad_norm": 17.325204812855088, "learning_rate": 6.4365101982656855e-06, "loss": 0.3851, "step": 15810 }, { "epoch": 0.32203562340966924, "grad_norm": 10.27077361350431, "learning_rate": 6.4405813622114564e-06, "loss": 0.3217, "step": 15820 }, { "epoch": 0.32223918575063615, "grad_norm": 6.729721310478147, "learning_rate": 6.444652526157229e-06, "loss": 0.2286, "step": 15830 }, { "epoch": 0.32244274809160306, "grad_norm": 21.80788896212191, "learning_rate": 6.448723690103002e-06, "loss": 0.2971, "step": 15840 }, { "epoch": 0.32264631043256997, "grad_norm": 7.922487418579684, "learning_rate": 6.452794854048773e-06, "loss": 0.2396, "step": 15850 }, { "epoch": 0.3228498727735369, "grad_norm": 25.65982356904004, "learning_rate": 6.456866017994545e-06, "loss": 0.1796, "step": 15860 }, { "epoch": 0.3230534351145038, "grad_norm": 7.027200939347338, "learning_rate": 6.460937181940318e-06, "loss": 0.2192, "step": 15870 }, { "epoch": 0.32325699745547076, "grad_norm": 34.56372962250984, "learning_rate": 6.465008345886089e-06, "loss": 0.4107, "step": 15880 }, { "epoch": 0.3234605597964377, "grad_norm": 0.7207564500707709, "learning_rate": 6.4690795098318615e-06, "loss": 0.316, "step": 15890 }, { "epoch": 0.3236641221374046, "grad_norm": 3.49910921654238, "learning_rate": 6.473150673777634e-06, "loss": 0.251, "step": 15900 }, { "epoch": 0.3238676844783715, "grad_norm": 7.461607074523808, "learning_rate": 6.477221837723405e-06, "loss": 0.3354, "step": 15910 }, { "epoch": 0.3240712468193384, "grad_norm": 22.499020129999497, "learning_rate": 6.481293001669178e-06, "loss": 0.2656, "step": 15920 }, { "epoch": 0.3242748091603053, "grad_norm": 8.746199714849352, "learning_rate": 6.48536416561495e-06, "loss": 0.3134, "step": 15930 }, { "epoch": 0.3244783715012723, "grad_norm": 18.547875005620288, "learning_rate": 6.489435329560721e-06, "loss": 0.2411, "step": 15940 }, { "epoch": 0.3246819338422392, "grad_norm": 12.777917061385644, "learning_rate": 6.493506493506494e-06, "loss": 0.2741, "step": 15950 }, { "epoch": 0.3248854961832061, "grad_norm": 20.16260781678022, "learning_rate": 6.4975776574522665e-06, "loss": 0.3823, "step": 15960 }, { "epoch": 0.325089058524173, "grad_norm": 8.943887277915417, "learning_rate": 6.501648821398037e-06, "loss": 0.3216, "step": 15970 }, { "epoch": 0.32529262086513994, "grad_norm": 9.42732154350159, "learning_rate": 6.50571998534381e-06, "loss": 0.2501, "step": 15980 }, { "epoch": 0.32549618320610685, "grad_norm": 30.388861353836884, "learning_rate": 6.509791149289583e-06, "loss": 0.3229, "step": 15990 }, { "epoch": 0.3256997455470738, "grad_norm": 9.761426953663037, "learning_rate": 6.513862313235354e-06, "loss": 0.321, "step": 16000 }, { "epoch": 0.3259033078880407, "grad_norm": 15.932167515778792, "learning_rate": 6.517933477181126e-06, "loss": 0.3602, "step": 16010 }, { "epoch": 0.32610687022900764, "grad_norm": 10.86793135562432, "learning_rate": 6.522004641126899e-06, "loss": 0.2758, "step": 16020 }, { "epoch": 0.32631043256997455, "grad_norm": 4.327034951798381, "learning_rate": 6.5260758050726715e-06, "loss": 0.2681, "step": 16030 }, { "epoch": 0.32651399491094146, "grad_norm": 7.110617884430371, "learning_rate": 6.5301469690184424e-06, "loss": 0.2929, "step": 16040 }, { "epoch": 0.3267175572519084, "grad_norm": 19.46696023492074, "learning_rate": 6.534218132964215e-06, "loss": 0.3303, "step": 16050 }, { "epoch": 0.32692111959287534, "grad_norm": 14.431624682498281, "learning_rate": 6.538289296909988e-06, "loss": 0.2273, "step": 16060 }, { "epoch": 0.32712468193384225, "grad_norm": 16.357842603308292, "learning_rate": 6.542360460855759e-06, "loss": 0.262, "step": 16070 }, { "epoch": 0.32732824427480917, "grad_norm": 8.529559105410684, "learning_rate": 6.546431624801531e-06, "loss": 0.326, "step": 16080 }, { "epoch": 0.3275318066157761, "grad_norm": 8.562850476828267, "learning_rate": 6.550502788747304e-06, "loss": 0.1672, "step": 16090 }, { "epoch": 0.327735368956743, "grad_norm": 6.144525045855764, "learning_rate": 6.554573952693075e-06, "loss": 0.3024, "step": 16100 }, { "epoch": 0.3279389312977099, "grad_norm": 5.245670277254238, "learning_rate": 6.5586451166388474e-06, "loss": 0.2841, "step": 16110 }, { "epoch": 0.32814249363867687, "grad_norm": 17.59248571703176, "learning_rate": 6.56271628058462e-06, "loss": 0.3427, "step": 16120 }, { "epoch": 0.3283460559796438, "grad_norm": 5.397925490907769, "learning_rate": 6.566787444530391e-06, "loss": 0.2468, "step": 16130 }, { "epoch": 0.3285496183206107, "grad_norm": 15.754922913269318, "learning_rate": 6.570858608476164e-06, "loss": 0.2408, "step": 16140 }, { "epoch": 0.3287531806615776, "grad_norm": 6.309212881566268, "learning_rate": 6.574929772421936e-06, "loss": 0.3047, "step": 16150 }, { "epoch": 0.3289567430025445, "grad_norm": 13.562018938308633, "learning_rate": 6.579000936367708e-06, "loss": 0.2441, "step": 16160 }, { "epoch": 0.32916030534351143, "grad_norm": 25.10511088177541, "learning_rate": 6.58307210031348e-06, "loss": 0.3337, "step": 16170 }, { "epoch": 0.3293638676844784, "grad_norm": 14.037987839498573, "learning_rate": 6.5871432642592525e-06, "loss": 0.2772, "step": 16180 }, { "epoch": 0.3295674300254453, "grad_norm": 29.167244606706237, "learning_rate": 6.591214428205024e-06, "loss": 0.2488, "step": 16190 }, { "epoch": 0.3297709923664122, "grad_norm": 88.1444441991025, "learning_rate": 6.595285592150796e-06, "loss": 0.4747, "step": 16200 }, { "epoch": 0.32997455470737913, "grad_norm": 2.993974595381677, "learning_rate": 6.599356756096569e-06, "loss": 0.2945, "step": 16210 }, { "epoch": 0.33017811704834604, "grad_norm": 17.27437438275398, "learning_rate": 6.6034279200423404e-06, "loss": 0.3031, "step": 16220 }, { "epoch": 0.33038167938931295, "grad_norm": 9.57984381497023, "learning_rate": 6.607499083988112e-06, "loss": 0.3005, "step": 16230 }, { "epoch": 0.3305852417302799, "grad_norm": 5.3007740460069295, "learning_rate": 6.611570247933885e-06, "loss": 0.269, "step": 16240 }, { "epoch": 0.33078880407124683, "grad_norm": 6.415227505178379, "learning_rate": 6.6156414118796575e-06, "loss": 0.3316, "step": 16250 }, { "epoch": 0.33099236641221375, "grad_norm": 8.356570575449842, "learning_rate": 6.619712575825428e-06, "loss": 0.3307, "step": 16260 }, { "epoch": 0.33119592875318066, "grad_norm": 9.708247858927905, "learning_rate": 6.623783739771201e-06, "loss": 0.2896, "step": 16270 }, { "epoch": 0.33139949109414757, "grad_norm": 16.717124196676735, "learning_rate": 6.627854903716974e-06, "loss": 0.3133, "step": 16280 }, { "epoch": 0.3316030534351145, "grad_norm": 6.103028743263798, "learning_rate": 6.6319260676627455e-06, "loss": 0.3349, "step": 16290 }, { "epoch": 0.33180661577608145, "grad_norm": 11.628890809184538, "learning_rate": 6.635997231608517e-06, "loss": 0.3064, "step": 16300 }, { "epoch": 0.33201017811704836, "grad_norm": 15.455757673804085, "learning_rate": 6.64006839555429e-06, "loss": 0.2911, "step": 16310 }, { "epoch": 0.33221374045801527, "grad_norm": 13.320894376668562, "learning_rate": 6.644139559500062e-06, "loss": 0.2493, "step": 16320 }, { "epoch": 0.3324173027989822, "grad_norm": 19.295306242631426, "learning_rate": 6.6482107234458334e-06, "loss": 0.3326, "step": 16330 }, { "epoch": 0.3326208651399491, "grad_norm": 14.33451373594531, "learning_rate": 6.652281887391606e-06, "loss": 0.2356, "step": 16340 }, { "epoch": 0.332824427480916, "grad_norm": 20.306479306770996, "learning_rate": 6.656353051337378e-06, "loss": 0.3014, "step": 16350 }, { "epoch": 0.333027989821883, "grad_norm": 10.136723295113365, "learning_rate": 6.66042421528315e-06, "loss": 0.3614, "step": 16360 }, { "epoch": 0.3332315521628499, "grad_norm": 13.679590952331072, "learning_rate": 6.664495379228922e-06, "loss": 0.3118, "step": 16370 }, { "epoch": 0.3334351145038168, "grad_norm": 3.679464768492899, "learning_rate": 6.668566543174694e-06, "loss": 0.2449, "step": 16380 }, { "epoch": 0.3336386768447837, "grad_norm": 4.886280828747826, "learning_rate": 6.672637707120466e-06, "loss": 0.1556, "step": 16390 }, { "epoch": 0.3338422391857506, "grad_norm": 4.722024660007564, "learning_rate": 6.6767088710662385e-06, "loss": 0.3514, "step": 16400 }, { "epoch": 0.33404580152671753, "grad_norm": 8.583181873880603, "learning_rate": 6.68078003501201e-06, "loss": 0.3351, "step": 16410 }, { "epoch": 0.3342493638676845, "grad_norm": 12.94657315488292, "learning_rate": 6.684851198957783e-06, "loss": 0.275, "step": 16420 }, { "epoch": 0.3344529262086514, "grad_norm": 6.67968806274347, "learning_rate": 6.688922362903555e-06, "loss": 0.3895, "step": 16430 }, { "epoch": 0.3346564885496183, "grad_norm": 7.650382965424581, "learning_rate": 6.6929935268493264e-06, "loss": 0.2328, "step": 16440 }, { "epoch": 0.33486005089058524, "grad_norm": 16.132286867185, "learning_rate": 6.697064690795099e-06, "loss": 0.3273, "step": 16450 }, { "epoch": 0.33506361323155215, "grad_norm": 10.5616466193759, "learning_rate": 6.701135854740871e-06, "loss": 0.336, "step": 16460 }, { "epoch": 0.33526717557251906, "grad_norm": 10.737873641370738, "learning_rate": 6.7052070186866435e-06, "loss": 0.309, "step": 16470 }, { "epoch": 0.33547073791348603, "grad_norm": 10.65112888115699, "learning_rate": 6.709278182632415e-06, "loss": 0.3287, "step": 16480 }, { "epoch": 0.33567430025445294, "grad_norm": 16.623868316116415, "learning_rate": 6.713349346578187e-06, "loss": 0.2698, "step": 16490 }, { "epoch": 0.33587786259541985, "grad_norm": 5.800483609814451, "learning_rate": 6.71742051052396e-06, "loss": 0.3789, "step": 16500 }, { "epoch": 0.33608142493638676, "grad_norm": 18.176243866472976, "learning_rate": 6.7214916744697315e-06, "loss": 0.3583, "step": 16510 }, { "epoch": 0.3362849872773537, "grad_norm": 13.262880919648483, "learning_rate": 6.725562838415503e-06, "loss": 0.3376, "step": 16520 }, { "epoch": 0.3364885496183206, "grad_norm": 13.150441281961253, "learning_rate": 6.729634002361276e-06, "loss": 0.3218, "step": 16530 }, { "epoch": 0.33669211195928755, "grad_norm": 15.840871042254415, "learning_rate": 6.733705166307048e-06, "loss": 0.3051, "step": 16540 }, { "epoch": 0.33689567430025447, "grad_norm": 20.65434936218952, "learning_rate": 6.7377763302528194e-06, "loss": 0.3268, "step": 16550 }, { "epoch": 0.3370992366412214, "grad_norm": 11.674371175216569, "learning_rate": 6.741847494198592e-06, "loss": 0.3056, "step": 16560 }, { "epoch": 0.3373027989821883, "grad_norm": 12.875011731934805, "learning_rate": 6.745918658144364e-06, "loss": 0.2802, "step": 16570 }, { "epoch": 0.3375063613231552, "grad_norm": 11.67739540662132, "learning_rate": 6.7499898220901365e-06, "loss": 0.2257, "step": 16580 }, { "epoch": 0.3377099236641221, "grad_norm": 12.483372845389054, "learning_rate": 6.754060986035908e-06, "loss": 0.3425, "step": 16590 }, { "epoch": 0.3379134860050891, "grad_norm": 7.482734009155246, "learning_rate": 6.75813214998168e-06, "loss": 0.287, "step": 16600 }, { "epoch": 0.338117048346056, "grad_norm": 18.152630540856673, "learning_rate": 6.762203313927453e-06, "loss": 0.2874, "step": 16610 }, { "epoch": 0.3383206106870229, "grad_norm": 10.588689811443981, "learning_rate": 6.7662744778732244e-06, "loss": 0.3043, "step": 16620 }, { "epoch": 0.3385241730279898, "grad_norm": 19.54858556367738, "learning_rate": 6.770345641818996e-06, "loss": 0.2865, "step": 16630 }, { "epoch": 0.33872773536895673, "grad_norm": 17.72100852494698, "learning_rate": 6.774416805764769e-06, "loss": 0.2689, "step": 16640 }, { "epoch": 0.33893129770992364, "grad_norm": 30.09048966525668, "learning_rate": 6.778487969710541e-06, "loss": 0.2392, "step": 16650 }, { "epoch": 0.3391348600508906, "grad_norm": 19.00742166071514, "learning_rate": 6.7825591336563124e-06, "loss": 0.3083, "step": 16660 }, { "epoch": 0.3393384223918575, "grad_norm": 7.65420267311438, "learning_rate": 6.786630297602085e-06, "loss": 0.3149, "step": 16670 }, { "epoch": 0.33954198473282443, "grad_norm": 7.635936696634724, "learning_rate": 6.790701461547857e-06, "loss": 0.2959, "step": 16680 }, { "epoch": 0.33974554707379134, "grad_norm": 15.292626498064608, "learning_rate": 6.7947726254936295e-06, "loss": 0.2632, "step": 16690 }, { "epoch": 0.33994910941475825, "grad_norm": 9.36977152210099, "learning_rate": 6.798843789439401e-06, "loss": 0.2471, "step": 16700 }, { "epoch": 0.34015267175572517, "grad_norm": 24.25054104514472, "learning_rate": 6.802914953385174e-06, "loss": 0.2784, "step": 16710 }, { "epoch": 0.34035623409669213, "grad_norm": 6.746854421343407, "learning_rate": 6.806986117330946e-06, "loss": 0.2435, "step": 16720 }, { "epoch": 0.34055979643765905, "grad_norm": 8.789627825809646, "learning_rate": 6.8110572812767174e-06, "loss": 0.3637, "step": 16730 }, { "epoch": 0.34076335877862596, "grad_norm": 21.70020796452607, "learning_rate": 6.81512844522249e-06, "loss": 0.3806, "step": 16740 }, { "epoch": 0.34096692111959287, "grad_norm": 6.7761770432914, "learning_rate": 6.819199609168262e-06, "loss": 0.2674, "step": 16750 }, { "epoch": 0.3411704834605598, "grad_norm": 16.373981066718166, "learning_rate": 6.823270773114034e-06, "loss": 0.2437, "step": 16760 }, { "epoch": 0.3413740458015267, "grad_norm": 11.925468648121576, "learning_rate": 6.827341937059806e-06, "loss": 0.2987, "step": 16770 }, { "epoch": 0.34157760814249366, "grad_norm": 13.460588075502422, "learning_rate": 6.831413101005578e-06, "loss": 0.2533, "step": 16780 }, { "epoch": 0.3417811704834606, "grad_norm": 10.93399514221324, "learning_rate": 6.83548426495135e-06, "loss": 0.2372, "step": 16790 }, { "epoch": 0.3419847328244275, "grad_norm": 8.96782489252942, "learning_rate": 6.8395554288971225e-06, "loss": 0.28, "step": 16800 }, { "epoch": 0.3421882951653944, "grad_norm": 18.489096291919548, "learning_rate": 6.843626592842894e-06, "loss": 0.2617, "step": 16810 }, { "epoch": 0.3423918575063613, "grad_norm": 14.433230077021328, "learning_rate": 6.847697756788666e-06, "loss": 0.3002, "step": 16820 }, { "epoch": 0.3425954198473282, "grad_norm": 15.581739926427757, "learning_rate": 6.851768920734439e-06, "loss": 0.2856, "step": 16830 }, { "epoch": 0.3427989821882952, "grad_norm": 16.337822912048953, "learning_rate": 6.855840084680211e-06, "loss": 0.3384, "step": 16840 }, { "epoch": 0.3430025445292621, "grad_norm": 26.673982252003622, "learning_rate": 6.859911248625982e-06, "loss": 0.4458, "step": 16850 }, { "epoch": 0.343206106870229, "grad_norm": 3.392224563082346, "learning_rate": 6.863982412571755e-06, "loss": 0.2896, "step": 16860 }, { "epoch": 0.3434096692111959, "grad_norm": 25.121978097762888, "learning_rate": 6.8680535765175275e-06, "loss": 0.2989, "step": 16870 }, { "epoch": 0.34361323155216283, "grad_norm": 27.36211866995432, "learning_rate": 6.872124740463298e-06, "loss": 0.2932, "step": 16880 }, { "epoch": 0.34381679389312975, "grad_norm": 9.517746360820746, "learning_rate": 6.876195904409071e-06, "loss": 0.2438, "step": 16890 }, { "epoch": 0.3440203562340967, "grad_norm": 9.41208383013727, "learning_rate": 6.880267068354844e-06, "loss": 0.2201, "step": 16900 }, { "epoch": 0.3442239185750636, "grad_norm": 9.715055314615057, "learning_rate": 6.8843382323006155e-06, "loss": 0.2363, "step": 16910 }, { "epoch": 0.34442748091603054, "grad_norm": 12.039307289740945, "learning_rate": 6.888409396246387e-06, "loss": 0.3423, "step": 16920 }, { "epoch": 0.34463104325699745, "grad_norm": 13.551965790408298, "learning_rate": 6.89248056019216e-06, "loss": 0.2573, "step": 16930 }, { "epoch": 0.34483460559796436, "grad_norm": 22.580346491311712, "learning_rate": 6.896551724137932e-06, "loss": 0.2648, "step": 16940 }, { "epoch": 0.3450381679389313, "grad_norm": 9.879111665528114, "learning_rate": 6.9006228880837034e-06, "loss": 0.3144, "step": 16950 }, { "epoch": 0.34524173027989824, "grad_norm": 11.312924881318247, "learning_rate": 6.904694052029476e-06, "loss": 0.2576, "step": 16960 }, { "epoch": 0.34544529262086515, "grad_norm": 3.0581391424429194, "learning_rate": 6.908765215975249e-06, "loss": 0.3506, "step": 16970 }, { "epoch": 0.34564885496183206, "grad_norm": 7.763051258309887, "learning_rate": 6.91283637992102e-06, "loss": 0.3179, "step": 16980 }, { "epoch": 0.345852417302799, "grad_norm": 11.320541746222169, "learning_rate": 6.916907543866792e-06, "loss": 0.2816, "step": 16990 }, { "epoch": 0.3460559796437659, "grad_norm": 11.836404924104572, "learning_rate": 6.920978707812565e-06, "loss": 0.3096, "step": 17000 }, { "epoch": 0.3462595419847328, "grad_norm": 7.095105149082762, "learning_rate": 6.925049871758336e-06, "loss": 0.2068, "step": 17010 }, { "epoch": 0.34646310432569977, "grad_norm": 12.839478615984937, "learning_rate": 6.9291210357041085e-06, "loss": 0.3486, "step": 17020 }, { "epoch": 0.3466666666666667, "grad_norm": 11.527741233620146, "learning_rate": 6.933192199649881e-06, "loss": 0.2806, "step": 17030 }, { "epoch": 0.3468702290076336, "grad_norm": 10.832054885555067, "learning_rate": 6.937263363595652e-06, "loss": 0.3795, "step": 17040 }, { "epoch": 0.3470737913486005, "grad_norm": 5.928882092992003, "learning_rate": 6.941334527541425e-06, "loss": 0.2442, "step": 17050 }, { "epoch": 0.3472773536895674, "grad_norm": 17.25423866719426, "learning_rate": 6.945405691487197e-06, "loss": 0.2926, "step": 17060 }, { "epoch": 0.3474809160305343, "grad_norm": 13.990880680284533, "learning_rate": 6.949476855432968e-06, "loss": 0.2459, "step": 17070 }, { "epoch": 0.3476844783715013, "grad_norm": 12.166332302095979, "learning_rate": 6.953548019378741e-06, "loss": 0.412, "step": 17080 }, { "epoch": 0.3478880407124682, "grad_norm": 27.412021240925384, "learning_rate": 6.9576191833245135e-06, "loss": 0.2359, "step": 17090 }, { "epoch": 0.3480916030534351, "grad_norm": 10.514201647079577, "learning_rate": 6.961690347270286e-06, "loss": 0.2557, "step": 17100 }, { "epoch": 0.34829516539440203, "grad_norm": 18.359078130528832, "learning_rate": 6.965761511216057e-06, "loss": 0.4242, "step": 17110 }, { "epoch": 0.34849872773536894, "grad_norm": 9.773068766276486, "learning_rate": 6.96983267516183e-06, "loss": 0.4314, "step": 17120 }, { "epoch": 0.34870229007633585, "grad_norm": 2.84494807207681, "learning_rate": 6.973903839107602e-06, "loss": 0.2428, "step": 17130 }, { "epoch": 0.3489058524173028, "grad_norm": 15.650168216914292, "learning_rate": 6.977975003053373e-06, "loss": 0.2814, "step": 17140 }, { "epoch": 0.34910941475826973, "grad_norm": 17.48925593224514, "learning_rate": 6.982046166999146e-06, "loss": 0.48, "step": 17150 }, { "epoch": 0.34931297709923664, "grad_norm": 7.884463097795677, "learning_rate": 6.9861173309449185e-06, "loss": 0.2246, "step": 17160 }, { "epoch": 0.34951653944020356, "grad_norm": 12.035135741320564, "learning_rate": 6.9901884948906894e-06, "loss": 0.2609, "step": 17170 }, { "epoch": 0.34972010178117047, "grad_norm": 7.740618226712183, "learning_rate": 6.994259658836462e-06, "loss": 0.2102, "step": 17180 }, { "epoch": 0.3499236641221374, "grad_norm": 8.629578593478355, "learning_rate": 6.998330822782235e-06, "loss": 0.3099, "step": 17190 }, { "epoch": 0.35012722646310435, "grad_norm": 21.606921536015772, "learning_rate": 7.002401986728006e-06, "loss": 0.2895, "step": 17200 }, { "epoch": 0.35033078880407126, "grad_norm": 22.123314831352197, "learning_rate": 7.006473150673778e-06, "loss": 0.3741, "step": 17210 }, { "epoch": 0.35053435114503817, "grad_norm": 12.716293452908447, "learning_rate": 7.010544314619551e-06, "loss": 0.2468, "step": 17220 }, { "epoch": 0.3507379134860051, "grad_norm": 15.062660106340756, "learning_rate": 7.014615478565322e-06, "loss": 0.2877, "step": 17230 }, { "epoch": 0.350941475826972, "grad_norm": 8.993537027250929, "learning_rate": 7.0186866425110945e-06, "loss": 0.3003, "step": 17240 }, { "epoch": 0.3511450381679389, "grad_norm": 5.12253367540522, "learning_rate": 7.022757806456867e-06, "loss": 0.3107, "step": 17250 }, { "epoch": 0.3513486005089059, "grad_norm": 7.678349517271129, "learning_rate": 7.026828970402638e-06, "loss": 0.3527, "step": 17260 }, { "epoch": 0.3515521628498728, "grad_norm": 37.080402786823996, "learning_rate": 7.030900134348411e-06, "loss": 0.3172, "step": 17270 }, { "epoch": 0.3517557251908397, "grad_norm": 8.566344243288638, "learning_rate": 7.034971298294183e-06, "loss": 0.2524, "step": 17280 }, { "epoch": 0.3519592875318066, "grad_norm": 14.280252752170593, "learning_rate": 7.039042462239954e-06, "loss": 0.3202, "step": 17290 }, { "epoch": 0.3521628498727735, "grad_norm": 13.620416862227932, "learning_rate": 7.043113626185727e-06, "loss": 0.239, "step": 17300 }, { "epoch": 0.35236641221374043, "grad_norm": 13.03462301954114, "learning_rate": 7.0471847901314995e-06, "loss": 0.3094, "step": 17310 }, { "epoch": 0.3525699745547074, "grad_norm": 12.819161657034206, "learning_rate": 7.051255954077272e-06, "loss": 0.278, "step": 17320 }, { "epoch": 0.3527735368956743, "grad_norm": 20.15969629180658, "learning_rate": 7.055327118023043e-06, "loss": 0.2561, "step": 17330 }, { "epoch": 0.3529770992366412, "grad_norm": 33.82149467119727, "learning_rate": 7.059398281968816e-06, "loss": 0.2742, "step": 17340 }, { "epoch": 0.35318066157760813, "grad_norm": 14.79455293874363, "learning_rate": 7.063469445914588e-06, "loss": 0.2902, "step": 17350 }, { "epoch": 0.35338422391857505, "grad_norm": 7.329557669438122, "learning_rate": 7.067540609860359e-06, "loss": 0.3007, "step": 17360 }, { "epoch": 0.35358778625954196, "grad_norm": 7.212666267263884, "learning_rate": 7.071611773806132e-06, "loss": 0.2133, "step": 17370 }, { "epoch": 0.3537913486005089, "grad_norm": 6.44153345471054, "learning_rate": 7.0756829377519045e-06, "loss": 0.2277, "step": 17380 }, { "epoch": 0.35399491094147584, "grad_norm": 5.390731680792655, "learning_rate": 7.0797541016976754e-06, "loss": 0.3688, "step": 17390 }, { "epoch": 0.35419847328244275, "grad_norm": 14.221594090939213, "learning_rate": 7.083825265643448e-06, "loss": 0.2426, "step": 17400 }, { "epoch": 0.35440203562340966, "grad_norm": 15.474389143105993, "learning_rate": 7.087896429589221e-06, "loss": 0.2888, "step": 17410 }, { "epoch": 0.3546055979643766, "grad_norm": 11.707066453286961, "learning_rate": 7.091967593534992e-06, "loss": 0.2728, "step": 17420 }, { "epoch": 0.3548091603053435, "grad_norm": 4.958901759762281, "learning_rate": 7.096038757480764e-06, "loss": 0.2572, "step": 17430 }, { "epoch": 0.35501272264631045, "grad_norm": 25.19105064474854, "learning_rate": 7.100109921426537e-06, "loss": 0.3152, "step": 17440 }, { "epoch": 0.35521628498727736, "grad_norm": 13.992440162760358, "learning_rate": 7.104181085372308e-06, "loss": 0.2818, "step": 17450 }, { "epoch": 0.3554198473282443, "grad_norm": 20.895381343117197, "learning_rate": 7.1082522493180804e-06, "loss": 0.2808, "step": 17460 }, { "epoch": 0.3556234096692112, "grad_norm": 20.976146722300314, "learning_rate": 7.112323413263853e-06, "loss": 0.3102, "step": 17470 }, { "epoch": 0.3558269720101781, "grad_norm": 4.71812270643489, "learning_rate": 7.116394577209624e-06, "loss": 0.5678, "step": 17480 }, { "epoch": 0.356030534351145, "grad_norm": 9.401730500456495, "learning_rate": 7.120465741155397e-06, "loss": 0.3593, "step": 17490 }, { "epoch": 0.356234096692112, "grad_norm": 10.27726574002072, "learning_rate": 7.124536905101169e-06, "loss": 0.3305, "step": 17500 }, { "epoch": 0.3564376590330789, "grad_norm": 10.32968836305491, "learning_rate": 7.12860806904694e-06, "loss": 0.2673, "step": 17510 }, { "epoch": 0.3566412213740458, "grad_norm": 6.137192361205209, "learning_rate": 7.132679232992713e-06, "loss": 0.3478, "step": 17520 }, { "epoch": 0.3568447837150127, "grad_norm": 12.076503466806347, "learning_rate": 7.1367503969384855e-06, "loss": 0.3135, "step": 17530 }, { "epoch": 0.3570483460559796, "grad_norm": 14.180529292199278, "learning_rate": 7.140821560884258e-06, "loss": 0.3967, "step": 17540 }, { "epoch": 0.35725190839694654, "grad_norm": 7.939073897334162, "learning_rate": 7.144892724830029e-06, "loss": 0.2608, "step": 17550 }, { "epoch": 0.3574554707379135, "grad_norm": 15.385910729903195, "learning_rate": 7.148963888775802e-06, "loss": 0.2535, "step": 17560 }, { "epoch": 0.3576590330788804, "grad_norm": 10.139867649429336, "learning_rate": 7.153035052721574e-06, "loss": 0.2918, "step": 17570 }, { "epoch": 0.35786259541984733, "grad_norm": 19.65801131294389, "learning_rate": 7.157106216667345e-06, "loss": 0.3314, "step": 17580 }, { "epoch": 0.35806615776081424, "grad_norm": 10.941678940379205, "learning_rate": 7.161177380613118e-06, "loss": 0.3104, "step": 17590 }, { "epoch": 0.35826972010178115, "grad_norm": 18.431759585672513, "learning_rate": 7.1652485445588905e-06, "loss": 0.2858, "step": 17600 }, { "epoch": 0.35847328244274806, "grad_norm": 10.712063450412492, "learning_rate": 7.169319708504661e-06, "loss": 0.3489, "step": 17610 }, { "epoch": 0.35867684478371503, "grad_norm": 9.710399692380799, "learning_rate": 7.173390872450434e-06, "loss": 0.3144, "step": 17620 }, { "epoch": 0.35888040712468194, "grad_norm": 3.140169846382267, "learning_rate": 7.177462036396207e-06, "loss": 0.3075, "step": 17630 }, { "epoch": 0.35908396946564886, "grad_norm": 11.077641136825005, "learning_rate": 7.181533200341978e-06, "loss": 0.2738, "step": 17640 }, { "epoch": 0.35928753180661577, "grad_norm": 17.78546034637191, "learning_rate": 7.18560436428775e-06, "loss": 0.2524, "step": 17650 }, { "epoch": 0.3594910941475827, "grad_norm": 9.806310682375416, "learning_rate": 7.189675528233523e-06, "loss": 0.3002, "step": 17660 }, { "epoch": 0.3596946564885496, "grad_norm": 8.86322783756342, "learning_rate": 7.193746692179294e-06, "loss": 0.3082, "step": 17670 }, { "epoch": 0.35989821882951656, "grad_norm": 11.979978076048164, "learning_rate": 7.1978178561250664e-06, "loss": 0.2947, "step": 17680 }, { "epoch": 0.36010178117048347, "grad_norm": 6.98324056643704, "learning_rate": 7.201889020070839e-06, "loss": 0.2624, "step": 17690 }, { "epoch": 0.3603053435114504, "grad_norm": 18.747935258140952, "learning_rate": 7.205960184016611e-06, "loss": 0.2837, "step": 17700 }, { "epoch": 0.3605089058524173, "grad_norm": 6.39019236957314, "learning_rate": 7.210031347962383e-06, "loss": 0.3274, "step": 17710 }, { "epoch": 0.3607124681933842, "grad_norm": 11.05143495296856, "learning_rate": 7.214102511908155e-06, "loss": 0.3188, "step": 17720 }, { "epoch": 0.3609160305343511, "grad_norm": 12.051437378153768, "learning_rate": 7.218173675853927e-06, "loss": 0.3714, "step": 17730 }, { "epoch": 0.3611195928753181, "grad_norm": 4.766423761103581, "learning_rate": 7.222244839799699e-06, "loss": 0.3337, "step": 17740 }, { "epoch": 0.361323155216285, "grad_norm": 14.914123280880938, "learning_rate": 7.2263160037454715e-06, "loss": 0.2679, "step": 17750 }, { "epoch": 0.3615267175572519, "grad_norm": 46.65517939752631, "learning_rate": 7.230387167691244e-06, "loss": 0.1568, "step": 17760 }, { "epoch": 0.3617302798982188, "grad_norm": 13.653810870683992, "learning_rate": 7.234458331637015e-06, "loss": 0.2506, "step": 17770 }, { "epoch": 0.36193384223918573, "grad_norm": 7.738831464794485, "learning_rate": 7.238529495582788e-06, "loss": 0.3708, "step": 17780 }, { "epoch": 0.36213740458015264, "grad_norm": 13.55671294511718, "learning_rate": 7.24260065952856e-06, "loss": 0.3495, "step": 17790 }, { "epoch": 0.3623409669211196, "grad_norm": 23.761173616464035, "learning_rate": 7.246671823474331e-06, "loss": 0.3094, "step": 17800 }, { "epoch": 0.3625445292620865, "grad_norm": 5.0148290287586565, "learning_rate": 7.250742987420104e-06, "loss": 0.1768, "step": 17810 }, { "epoch": 0.36274809160305344, "grad_norm": 7.682634020035224, "learning_rate": 7.2548141513658765e-06, "loss": 0.2972, "step": 17820 }, { "epoch": 0.36295165394402035, "grad_norm": 7.027884547891906, "learning_rate": 7.258885315311648e-06, "loss": 0.2693, "step": 17830 }, { "epoch": 0.36315521628498726, "grad_norm": 13.542424315439817, "learning_rate": 7.26295647925742e-06, "loss": 0.333, "step": 17840 }, { "epoch": 0.36335877862595417, "grad_norm": 24.391553816266534, "learning_rate": 7.267027643203193e-06, "loss": 0.2538, "step": 17850 }, { "epoch": 0.36356234096692114, "grad_norm": 9.09218831200478, "learning_rate": 7.2710988071489645e-06, "loss": 0.3742, "step": 17860 }, { "epoch": 0.36376590330788805, "grad_norm": 18.642931336967603, "learning_rate": 7.275169971094736e-06, "loss": 0.2956, "step": 17870 }, { "epoch": 0.36396946564885496, "grad_norm": 7.128926356360957, "learning_rate": 7.279241135040509e-06, "loss": 0.35, "step": 17880 }, { "epoch": 0.3641730279898219, "grad_norm": 16.216187887979434, "learning_rate": 7.283312298986281e-06, "loss": 0.2848, "step": 17890 }, { "epoch": 0.3643765903307888, "grad_norm": 9.506824544835592, "learning_rate": 7.2873834629320524e-06, "loss": 0.2436, "step": 17900 }, { "epoch": 0.36458015267175575, "grad_norm": 18.781547030245367, "learning_rate": 7.291454626877825e-06, "loss": 0.3303, "step": 17910 }, { "epoch": 0.36478371501272266, "grad_norm": 12.587931577881148, "learning_rate": 7.295525790823597e-06, "loss": 0.2902, "step": 17920 }, { "epoch": 0.3649872773536896, "grad_norm": 12.843487626002801, "learning_rate": 7.299596954769369e-06, "loss": 0.2875, "step": 17930 }, { "epoch": 0.3651908396946565, "grad_norm": 4.930314415384867, "learning_rate": 7.303668118715141e-06, "loss": 0.2785, "step": 17940 }, { "epoch": 0.3653944020356234, "grad_norm": 8.760897064807693, "learning_rate": 7.307739282660913e-06, "loss": 0.2931, "step": 17950 }, { "epoch": 0.3655979643765903, "grad_norm": 12.491274187108475, "learning_rate": 7.311810446606686e-06, "loss": 0.3038, "step": 17960 }, { "epoch": 0.3658015267175573, "grad_norm": 13.563262131755398, "learning_rate": 7.3158816105524575e-06, "loss": 0.3024, "step": 17970 }, { "epoch": 0.3660050890585242, "grad_norm": 3.82743005023599, "learning_rate": 7.31995277449823e-06, "loss": 0.2987, "step": 17980 }, { "epoch": 0.3662086513994911, "grad_norm": 15.01611357080071, "learning_rate": 7.324023938444002e-06, "loss": 0.3136, "step": 17990 }, { "epoch": 0.366412213740458, "grad_norm": 8.957616971096101, "learning_rate": 7.328095102389774e-06, "loss": 0.3133, "step": 18000 }, { "epoch": 0.3666157760814249, "grad_norm": 2.5753606070689647, "learning_rate": 7.332166266335546e-06, "loss": 0.236, "step": 18010 }, { "epoch": 0.36681933842239184, "grad_norm": 11.407571857418233, "learning_rate": 7.336237430281318e-06, "loss": 0.3064, "step": 18020 }, { "epoch": 0.3670229007633588, "grad_norm": 7.005959472846945, "learning_rate": 7.34030859422709e-06, "loss": 0.2609, "step": 18030 }, { "epoch": 0.3672264631043257, "grad_norm": 10.361498032035941, "learning_rate": 7.3443797581728625e-06, "loss": 0.2485, "step": 18040 }, { "epoch": 0.36743002544529263, "grad_norm": 21.937004308041487, "learning_rate": 7.348450922118634e-06, "loss": 0.296, "step": 18050 }, { "epoch": 0.36763358778625954, "grad_norm": 15.297063106951436, "learning_rate": 7.352522086064406e-06, "loss": 0.3263, "step": 18060 }, { "epoch": 0.36783715012722645, "grad_norm": 15.237825815662257, "learning_rate": 7.356593250010179e-06, "loss": 0.3262, "step": 18070 }, { "epoch": 0.36804071246819337, "grad_norm": 11.866311663779335, "learning_rate": 7.3606644139559504e-06, "loss": 0.3048, "step": 18080 }, { "epoch": 0.36824427480916033, "grad_norm": 17.458200502447642, "learning_rate": 7.364735577901723e-06, "loss": 0.2951, "step": 18090 }, { "epoch": 0.36844783715012724, "grad_norm": 14.700237064327666, "learning_rate": 7.368806741847495e-06, "loss": 0.2632, "step": 18100 }, { "epoch": 0.36865139949109416, "grad_norm": 14.221827365096123, "learning_rate": 7.372877905793267e-06, "loss": 0.2576, "step": 18110 }, { "epoch": 0.36885496183206107, "grad_norm": 4.7068070197734935, "learning_rate": 7.376949069739039e-06, "loss": 0.327, "step": 18120 }, { "epoch": 0.369058524173028, "grad_norm": 11.308143466261045, "learning_rate": 7.381020233684811e-06, "loss": 0.3511, "step": 18130 }, { "epoch": 0.3692620865139949, "grad_norm": 18.85701079580159, "learning_rate": 7.385091397630583e-06, "loss": 0.326, "step": 18140 }, { "epoch": 0.36946564885496186, "grad_norm": 15.142430497644849, "learning_rate": 7.3891625615763555e-06, "loss": 0.2419, "step": 18150 }, { "epoch": 0.36966921119592877, "grad_norm": 35.66647171165649, "learning_rate": 7.393233725522127e-06, "loss": 0.2704, "step": 18160 }, { "epoch": 0.3698727735368957, "grad_norm": 24.03597457960898, "learning_rate": 7.397304889467899e-06, "loss": 0.3239, "step": 18170 }, { "epoch": 0.3700763358778626, "grad_norm": 6.966512133841212, "learning_rate": 7.401376053413672e-06, "loss": 0.2791, "step": 18180 }, { "epoch": 0.3702798982188295, "grad_norm": 5.214234167613982, "learning_rate": 7.4054472173594434e-06, "loss": 0.2555, "step": 18190 }, { "epoch": 0.3704834605597964, "grad_norm": 19.417211803529028, "learning_rate": 7.409518381305216e-06, "loss": 0.4036, "step": 18200 }, { "epoch": 0.3706870229007634, "grad_norm": 25.186197597747597, "learning_rate": 7.413589545250988e-06, "loss": 0.2937, "step": 18210 }, { "epoch": 0.3708905852417303, "grad_norm": 11.027995033038469, "learning_rate": 7.4176607091967605e-06, "loss": 0.2281, "step": 18220 }, { "epoch": 0.3710941475826972, "grad_norm": 8.999564332256563, "learning_rate": 7.421731873142532e-06, "loss": 0.2215, "step": 18230 }, { "epoch": 0.3712977099236641, "grad_norm": 19.14929152534258, "learning_rate": 7.425803037088304e-06, "loss": 0.3703, "step": 18240 }, { "epoch": 0.37150127226463103, "grad_norm": 5.81384960189522, "learning_rate": 7.429874201034077e-06, "loss": 0.3763, "step": 18250 }, { "epoch": 0.37170483460559794, "grad_norm": 14.189925178418848, "learning_rate": 7.4339453649798485e-06, "loss": 0.2691, "step": 18260 }, { "epoch": 0.3719083969465649, "grad_norm": 16.35416716057049, "learning_rate": 7.43801652892562e-06, "loss": 0.2727, "step": 18270 }, { "epoch": 0.3721119592875318, "grad_norm": 10.413580978226342, "learning_rate": 7.442087692871393e-06, "loss": 0.227, "step": 18280 }, { "epoch": 0.37231552162849874, "grad_norm": 9.023138737766747, "learning_rate": 7.446158856817165e-06, "loss": 0.3754, "step": 18290 }, { "epoch": 0.37251908396946565, "grad_norm": 7.2936126441332325, "learning_rate": 7.4502300207629364e-06, "loss": 0.2952, "step": 18300 }, { "epoch": 0.37272264631043256, "grad_norm": 14.235508668880678, "learning_rate": 7.454301184708709e-06, "loss": 0.2167, "step": 18310 }, { "epoch": 0.37292620865139947, "grad_norm": 28.55763095047475, "learning_rate": 7.458372348654481e-06, "loss": 0.3072, "step": 18320 }, { "epoch": 0.37312977099236644, "grad_norm": 12.015329878436972, "learning_rate": 7.462443512600253e-06, "loss": 0.5438, "step": 18330 }, { "epoch": 0.37333333333333335, "grad_norm": 9.580874394130811, "learning_rate": 7.466514676546025e-06, "loss": 0.2661, "step": 18340 }, { "epoch": 0.37353689567430026, "grad_norm": 15.87581297743427, "learning_rate": 7.470585840491797e-06, "loss": 0.2894, "step": 18350 }, { "epoch": 0.3737404580152672, "grad_norm": 13.783561167908589, "learning_rate": 7.474657004437569e-06, "loss": 0.2754, "step": 18360 }, { "epoch": 0.3739440203562341, "grad_norm": 9.40209627467605, "learning_rate": 7.4787281683833415e-06, "loss": 0.3111, "step": 18370 }, { "epoch": 0.374147582697201, "grad_norm": 11.107628732134348, "learning_rate": 7.482799332329114e-06, "loss": 0.2828, "step": 18380 }, { "epoch": 0.37435114503816797, "grad_norm": 10.531354930323257, "learning_rate": 7.486870496274886e-06, "loss": 0.2479, "step": 18390 }, { "epoch": 0.3745547073791349, "grad_norm": 18.15677548796038, "learning_rate": 7.490941660220658e-06, "loss": 0.2941, "step": 18400 }, { "epoch": 0.3747582697201018, "grad_norm": 16.62585161236612, "learning_rate": 7.49501282416643e-06, "loss": 0.2248, "step": 18410 }, { "epoch": 0.3749618320610687, "grad_norm": 6.129944536992217, "learning_rate": 7.499083988112202e-06, "loss": 0.2791, "step": 18420 }, { "epoch": 0.3751653944020356, "grad_norm": 9.234122295967055, "learning_rate": 7.503155152057974e-06, "loss": 0.2711, "step": 18430 }, { "epoch": 0.3753689567430025, "grad_norm": 10.754375006689473, "learning_rate": 7.5072263160037465e-06, "loss": 0.2994, "step": 18440 }, { "epoch": 0.3755725190839695, "grad_norm": 5.410078105100662, "learning_rate": 7.511297479949518e-06, "loss": 0.2473, "step": 18450 }, { "epoch": 0.3757760814249364, "grad_norm": 6.0853420053778535, "learning_rate": 7.51536864389529e-06, "loss": 0.282, "step": 18460 }, { "epoch": 0.3759796437659033, "grad_norm": 5.377430419039069, "learning_rate": 7.519439807841063e-06, "loss": 0.3031, "step": 18470 }, { "epoch": 0.3761832061068702, "grad_norm": 8.767827158941872, "learning_rate": 7.5235109717868345e-06, "loss": 0.291, "step": 18480 }, { "epoch": 0.37638676844783714, "grad_norm": 22.477152421480035, "learning_rate": 7.527582135732606e-06, "loss": 0.3118, "step": 18490 }, { "epoch": 0.37659033078880405, "grad_norm": 6.051751418190506, "learning_rate": 7.531653299678379e-06, "loss": 0.2834, "step": 18500 }, { "epoch": 0.376793893129771, "grad_norm": 13.068283561216033, "learning_rate": 7.5357244636241515e-06, "loss": 0.5061, "step": 18510 }, { "epoch": 0.37699745547073793, "grad_norm": 15.689248565522645, "learning_rate": 7.5397956275699224e-06, "loss": 0.3822, "step": 18520 }, { "epoch": 0.37720101781170484, "grad_norm": 40.823400778349885, "learning_rate": 7.543866791515695e-06, "loss": 0.3631, "step": 18530 }, { "epoch": 0.37740458015267175, "grad_norm": 9.313213444435618, "learning_rate": 7.547937955461468e-06, "loss": 0.2173, "step": 18540 }, { "epoch": 0.37760814249363867, "grad_norm": 14.152932034253237, "learning_rate": 7.552009119407239e-06, "loss": 0.2923, "step": 18550 }, { "epoch": 0.3778117048346056, "grad_norm": 13.00853965899215, "learning_rate": 7.556080283353011e-06, "loss": 0.3728, "step": 18560 }, { "epoch": 0.37801526717557254, "grad_norm": 13.874985072874324, "learning_rate": 7.560151447298784e-06, "loss": 0.2034, "step": 18570 }, { "epoch": 0.37821882951653946, "grad_norm": 13.43029385895408, "learning_rate": 7.564222611244555e-06, "loss": 0.3107, "step": 18580 }, { "epoch": 0.37842239185750637, "grad_norm": 17.77025299998178, "learning_rate": 7.5682937751903275e-06, "loss": 0.2392, "step": 18590 }, { "epoch": 0.3786259541984733, "grad_norm": 19.51190844853955, "learning_rate": 7.5723649391361e-06, "loss": 0.3404, "step": 18600 }, { "epoch": 0.3788295165394402, "grad_norm": 7.994330700161622, "learning_rate": 7.576436103081872e-06, "loss": 0.2627, "step": 18610 }, { "epoch": 0.3790330788804071, "grad_norm": 8.045569805769354, "learning_rate": 7.580507267027644e-06, "loss": 0.2792, "step": 18620 }, { "epoch": 0.37923664122137407, "grad_norm": 10.164887634937232, "learning_rate": 7.584578430973416e-06, "loss": 0.2408, "step": 18630 }, { "epoch": 0.379440203562341, "grad_norm": 16.245922526113695, "learning_rate": 7.588649594919189e-06, "loss": 0.3423, "step": 18640 }, { "epoch": 0.3796437659033079, "grad_norm": 10.703803595916263, "learning_rate": 7.59272075886496e-06, "loss": 0.282, "step": 18650 }, { "epoch": 0.3798473282442748, "grad_norm": 10.429837384493236, "learning_rate": 7.5967919228107325e-06, "loss": 0.3312, "step": 18660 }, { "epoch": 0.3800508905852417, "grad_norm": 5.090913940353683, "learning_rate": 7.600863086756505e-06, "loss": 0.2717, "step": 18670 }, { "epoch": 0.38025445292620863, "grad_norm": 12.566081933952397, "learning_rate": 7.604934250702276e-06, "loss": 0.3341, "step": 18680 }, { "epoch": 0.3804580152671756, "grad_norm": 12.22012897697494, "learning_rate": 7.609005414648049e-06, "loss": 0.3753, "step": 18690 }, { "epoch": 0.3806615776081425, "grad_norm": 7.535020605780411, "learning_rate": 7.613076578593821e-06, "loss": 0.3074, "step": 18700 }, { "epoch": 0.3808651399491094, "grad_norm": 11.932914715144433, "learning_rate": 7.617147742539592e-06, "loss": 0.2812, "step": 18710 }, { "epoch": 0.38106870229007633, "grad_norm": 10.954633704146936, "learning_rate": 7.621218906485365e-06, "loss": 0.2275, "step": 18720 }, { "epoch": 0.38127226463104325, "grad_norm": 11.904101120889283, "learning_rate": 7.6252900704311375e-06, "loss": 0.2845, "step": 18730 }, { "epoch": 0.38147582697201016, "grad_norm": 8.578840558428356, "learning_rate": 7.6293612343769084e-06, "loss": 0.4309, "step": 18740 }, { "epoch": 0.3816793893129771, "grad_norm": 9.495146358837088, "learning_rate": 7.633432398322681e-06, "loss": 0.3675, "step": 18750 }, { "epoch": 0.38188295165394404, "grad_norm": 13.851101176577524, "learning_rate": 7.637503562268454e-06, "loss": 0.3936, "step": 18760 }, { "epoch": 0.38208651399491095, "grad_norm": 11.162882465793128, "learning_rate": 7.641574726214225e-06, "loss": 0.2864, "step": 18770 }, { "epoch": 0.38229007633587786, "grad_norm": 13.909541356600391, "learning_rate": 7.645645890159997e-06, "loss": 0.3224, "step": 18780 }, { "epoch": 0.38249363867684477, "grad_norm": 12.553611647717492, "learning_rate": 7.64971705410577e-06, "loss": 0.3318, "step": 18790 }, { "epoch": 0.3826972010178117, "grad_norm": 7.580211574569078, "learning_rate": 7.65378821805154e-06, "loss": 0.2619, "step": 18800 }, { "epoch": 0.38290076335877865, "grad_norm": 3.3515516895832964, "learning_rate": 7.657859381997313e-06, "loss": 0.3099, "step": 18810 }, { "epoch": 0.38310432569974556, "grad_norm": 6.016675888640732, "learning_rate": 7.661930545943086e-06, "loss": 0.3251, "step": 18820 }, { "epoch": 0.3833078880407125, "grad_norm": 22.415350203133965, "learning_rate": 7.666001709888859e-06, "loss": 0.3505, "step": 18830 }, { "epoch": 0.3835114503816794, "grad_norm": 16.718487847741894, "learning_rate": 7.67007287383463e-06, "loss": 0.2715, "step": 18840 }, { "epoch": 0.3837150127226463, "grad_norm": 18.466345715925822, "learning_rate": 7.674144037780402e-06, "loss": 0.2835, "step": 18850 }, { "epoch": 0.3839185750636132, "grad_norm": 28.1096027120529, "learning_rate": 7.678215201726175e-06, "loss": 0.412, "step": 18860 }, { "epoch": 0.3841221374045802, "grad_norm": 9.693814494674431, "learning_rate": 7.682286365671946e-06, "loss": 0.3915, "step": 18870 }, { "epoch": 0.3843256997455471, "grad_norm": 7.001720530422488, "learning_rate": 7.686357529617718e-06, "loss": 0.302, "step": 18880 }, { "epoch": 0.384529262086514, "grad_norm": 11.374731364873062, "learning_rate": 7.690428693563491e-06, "loss": 0.3556, "step": 18890 }, { "epoch": 0.3847328244274809, "grad_norm": 23.880894053067045, "learning_rate": 7.694499857509262e-06, "loss": 0.2922, "step": 18900 }, { "epoch": 0.3849363867684478, "grad_norm": 5.748672183831052, "learning_rate": 7.698571021455035e-06, "loss": 0.3258, "step": 18910 }, { "epoch": 0.38513994910941474, "grad_norm": 10.669301999608255, "learning_rate": 7.702642185400807e-06, "loss": 0.3224, "step": 18920 }, { "epoch": 0.3853435114503817, "grad_norm": 8.955451801484985, "learning_rate": 7.706713349346578e-06, "loss": 0.3035, "step": 18930 }, { "epoch": 0.3855470737913486, "grad_norm": 19.239147836073187, "learning_rate": 7.710784513292351e-06, "loss": 0.4508, "step": 18940 }, { "epoch": 0.3857506361323155, "grad_norm": 12.981624656732933, "learning_rate": 7.714855677238123e-06, "loss": 0.3536, "step": 18950 }, { "epoch": 0.38595419847328244, "grad_norm": 6.927339764605552, "learning_rate": 7.718926841183894e-06, "loss": 0.2645, "step": 18960 }, { "epoch": 0.38615776081424935, "grad_norm": 13.234747766670237, "learning_rate": 7.722998005129667e-06, "loss": 0.4083, "step": 18970 }, { "epoch": 0.38636132315521626, "grad_norm": 13.795937816077991, "learning_rate": 7.72706916907544e-06, "loss": 0.2707, "step": 18980 }, { "epoch": 0.38656488549618323, "grad_norm": 17.02217869082739, "learning_rate": 7.73114033302121e-06, "loss": 0.3396, "step": 18990 }, { "epoch": 0.38676844783715014, "grad_norm": 18.194103399187266, "learning_rate": 7.735211496966983e-06, "loss": 0.3658, "step": 19000 }, { "epoch": 0.38697201017811705, "grad_norm": 8.25002916769267, "learning_rate": 7.739282660912756e-06, "loss": 0.2673, "step": 19010 }, { "epoch": 0.38717557251908397, "grad_norm": 7.808708132156991, "learning_rate": 7.743353824858527e-06, "loss": 0.3004, "step": 19020 }, { "epoch": 0.3873791348600509, "grad_norm": 11.281342664937267, "learning_rate": 7.7474249888043e-06, "loss": 0.2091, "step": 19030 }, { "epoch": 0.3875826972010178, "grad_norm": 13.444279810840083, "learning_rate": 7.751496152750072e-06, "loss": 0.3161, "step": 19040 }, { "epoch": 0.38778625954198476, "grad_norm": 19.112502864877012, "learning_rate": 7.755567316695845e-06, "loss": 0.3026, "step": 19050 }, { "epoch": 0.38798982188295167, "grad_norm": 4.673130726523178, "learning_rate": 7.759638480641616e-06, "loss": 0.2888, "step": 19060 }, { "epoch": 0.3881933842239186, "grad_norm": 13.632443274089567, "learning_rate": 7.763709644587388e-06, "loss": 0.306, "step": 19070 }, { "epoch": 0.3883969465648855, "grad_norm": 8.324337574665321, "learning_rate": 7.767780808533161e-06, "loss": 0.2077, "step": 19080 }, { "epoch": 0.3886005089058524, "grad_norm": 8.728217869221483, "learning_rate": 7.771851972478932e-06, "loss": 0.4235, "step": 19090 }, { "epoch": 0.3888040712468193, "grad_norm": 4.4505149751422515, "learning_rate": 7.775923136424704e-06, "loss": 0.2356, "step": 19100 }, { "epoch": 0.3890076335877863, "grad_norm": 7.656481339181804, "learning_rate": 7.779994300370477e-06, "loss": 0.2583, "step": 19110 }, { "epoch": 0.3892111959287532, "grad_norm": 22.192440619291972, "learning_rate": 7.784065464316248e-06, "loss": 0.2986, "step": 19120 }, { "epoch": 0.3894147582697201, "grad_norm": 11.339936456551708, "learning_rate": 7.78813662826202e-06, "loss": 0.22, "step": 19130 }, { "epoch": 0.389618320610687, "grad_norm": 13.202226153064384, "learning_rate": 7.792207792207793e-06, "loss": 0.3354, "step": 19140 }, { "epoch": 0.38982188295165393, "grad_norm": 23.73516185227623, "learning_rate": 7.796278956153564e-06, "loss": 0.3586, "step": 19150 }, { "epoch": 0.39002544529262084, "grad_norm": 8.018054107681882, "learning_rate": 7.800350120099337e-06, "loss": 0.34, "step": 19160 }, { "epoch": 0.3902290076335878, "grad_norm": 17.62158794002857, "learning_rate": 7.80442128404511e-06, "loss": 0.2186, "step": 19170 }, { "epoch": 0.3904325699745547, "grad_norm": 11.607925323904295, "learning_rate": 7.80849244799088e-06, "loss": 0.2258, "step": 19180 }, { "epoch": 0.39063613231552163, "grad_norm": 5.638911438194639, "learning_rate": 7.812563611936653e-06, "loss": 0.3121, "step": 19190 }, { "epoch": 0.39083969465648855, "grad_norm": 15.530902305683957, "learning_rate": 7.816634775882426e-06, "loss": 0.3209, "step": 19200 }, { "epoch": 0.39104325699745546, "grad_norm": 18.326632622090028, "learning_rate": 7.820705939828197e-06, "loss": 0.3254, "step": 19210 }, { "epoch": 0.39124681933842237, "grad_norm": 8.201925355185487, "learning_rate": 7.82477710377397e-06, "loss": 0.3967, "step": 19220 }, { "epoch": 0.39145038167938934, "grad_norm": 9.826269543293412, "learning_rate": 7.828848267719742e-06, "loss": 0.2874, "step": 19230 }, { "epoch": 0.39165394402035625, "grad_norm": 10.159645068220852, "learning_rate": 7.832919431665513e-06, "loss": 0.2225, "step": 19240 }, { "epoch": 0.39185750636132316, "grad_norm": 23.847831014512188, "learning_rate": 7.836990595611285e-06, "loss": 0.2386, "step": 19250 }, { "epoch": 0.39206106870229007, "grad_norm": 16.4624981800684, "learning_rate": 7.841061759557058e-06, "loss": 0.2204, "step": 19260 }, { "epoch": 0.392264631043257, "grad_norm": 25.613567997337327, "learning_rate": 7.84513292350283e-06, "loss": 0.4282, "step": 19270 }, { "epoch": 0.3924681933842239, "grad_norm": 9.766878904008154, "learning_rate": 7.849204087448602e-06, "loss": 0.4131, "step": 19280 }, { "epoch": 0.39267175572519086, "grad_norm": 16.409615506472093, "learning_rate": 7.853275251394374e-06, "loss": 0.331, "step": 19290 }, { "epoch": 0.3928753180661578, "grad_norm": 8.82293005455463, "learning_rate": 7.857346415340147e-06, "loss": 0.3279, "step": 19300 }, { "epoch": 0.3930788804071247, "grad_norm": 9.661368132938598, "learning_rate": 7.861417579285918e-06, "loss": 0.2541, "step": 19310 }, { "epoch": 0.3932824427480916, "grad_norm": 22.646057681357703, "learning_rate": 7.86548874323169e-06, "loss": 0.3167, "step": 19320 }, { "epoch": 0.3934860050890585, "grad_norm": 4.578118494210807, "learning_rate": 7.869559907177463e-06, "loss": 0.3284, "step": 19330 }, { "epoch": 0.3936895674300254, "grad_norm": 4.360799309538848, "learning_rate": 7.873631071123234e-06, "loss": 0.313, "step": 19340 }, { "epoch": 0.3938931297709924, "grad_norm": 3.985669298701369, "learning_rate": 7.877702235069007e-06, "loss": 0.2941, "step": 19350 }, { "epoch": 0.3940966921119593, "grad_norm": 10.519839886911694, "learning_rate": 7.88177339901478e-06, "loss": 0.298, "step": 19360 }, { "epoch": 0.3943002544529262, "grad_norm": 4.285253739390765, "learning_rate": 7.88584456296055e-06, "loss": 0.2618, "step": 19370 }, { "epoch": 0.3945038167938931, "grad_norm": 17.61188650444881, "learning_rate": 7.889915726906323e-06, "loss": 0.3247, "step": 19380 }, { "epoch": 0.39470737913486004, "grad_norm": 6.3874180771419775, "learning_rate": 7.893986890852095e-06, "loss": 0.2814, "step": 19390 }, { "epoch": 0.39491094147582695, "grad_norm": 8.789245313975227, "learning_rate": 7.898058054797866e-06, "loss": 0.2503, "step": 19400 }, { "epoch": 0.3951145038167939, "grad_norm": 14.863516460857674, "learning_rate": 7.902129218743639e-06, "loss": 0.3481, "step": 19410 }, { "epoch": 0.39531806615776083, "grad_norm": 3.1157370308408017, "learning_rate": 7.906200382689412e-06, "loss": 0.2116, "step": 19420 }, { "epoch": 0.39552162849872774, "grad_norm": 23.23417068049122, "learning_rate": 7.910271546635183e-06, "loss": 0.3723, "step": 19430 }, { "epoch": 0.39572519083969465, "grad_norm": 13.769013318923847, "learning_rate": 7.914342710580955e-06, "loss": 0.3323, "step": 19440 }, { "epoch": 0.39592875318066156, "grad_norm": 8.777486344127018, "learning_rate": 7.918413874526728e-06, "loss": 0.2951, "step": 19450 }, { "epoch": 0.3961323155216285, "grad_norm": 22.80290107744024, "learning_rate": 7.922485038472499e-06, "loss": 0.3565, "step": 19460 }, { "epoch": 0.39633587786259544, "grad_norm": 9.394024518444922, "learning_rate": 7.926556202418271e-06, "loss": 0.4101, "step": 19470 }, { "epoch": 0.39653944020356235, "grad_norm": 13.141212016853112, "learning_rate": 7.930627366364044e-06, "loss": 0.3246, "step": 19480 }, { "epoch": 0.39674300254452927, "grad_norm": 8.52377230477212, "learning_rate": 7.934698530309817e-06, "loss": 0.2471, "step": 19490 }, { "epoch": 0.3969465648854962, "grad_norm": 41.51953694569134, "learning_rate": 7.938769694255588e-06, "loss": 0.3563, "step": 19500 }, { "epoch": 0.3971501272264631, "grad_norm": 15.923220264945233, "learning_rate": 7.94284085820136e-06, "loss": 0.2285, "step": 19510 }, { "epoch": 0.39735368956743, "grad_norm": 12.794777920043959, "learning_rate": 7.946912022147133e-06, "loss": 0.3317, "step": 19520 }, { "epoch": 0.39755725190839697, "grad_norm": 12.01113217080618, "learning_rate": 7.950983186092904e-06, "loss": 0.3281, "step": 19530 }, { "epoch": 0.3977608142493639, "grad_norm": 8.047535049806806, "learning_rate": 7.955054350038676e-06, "loss": 0.2797, "step": 19540 }, { "epoch": 0.3979643765903308, "grad_norm": 13.195324858763561, "learning_rate": 7.959125513984449e-06, "loss": 0.2397, "step": 19550 }, { "epoch": 0.3981679389312977, "grad_norm": 9.238148962799546, "learning_rate": 7.96319667793022e-06, "loss": 0.4536, "step": 19560 }, { "epoch": 0.3983715012722646, "grad_norm": 11.18254367937658, "learning_rate": 7.967267841875993e-06, "loss": 0.3616, "step": 19570 }, { "epoch": 0.39857506361323153, "grad_norm": 12.643112983212832, "learning_rate": 7.971339005821765e-06, "loss": 0.3105, "step": 19580 }, { "epoch": 0.3987786259541985, "grad_norm": 8.487163301973702, "learning_rate": 7.975410169767536e-06, "loss": 0.3738, "step": 19590 }, { "epoch": 0.3989821882951654, "grad_norm": 15.758331427433864, "learning_rate": 7.979481333713309e-06, "loss": 0.2588, "step": 19600 }, { "epoch": 0.3991857506361323, "grad_norm": 22.131356084118043, "learning_rate": 7.983552497659081e-06, "loss": 0.3221, "step": 19610 }, { "epoch": 0.39938931297709923, "grad_norm": 6.406376573430185, "learning_rate": 7.987623661604852e-06, "loss": 0.2798, "step": 19620 }, { "epoch": 0.39959287531806614, "grad_norm": 12.364154036219999, "learning_rate": 7.991694825550625e-06, "loss": 0.3197, "step": 19630 }, { "epoch": 0.39979643765903305, "grad_norm": 7.1450685040072575, "learning_rate": 7.995765989496398e-06, "loss": 0.274, "step": 19640 }, { "epoch": 0.4, "grad_norm": 6.44041657979467, "learning_rate": 7.999837153442169e-06, "loss": 0.3624, "step": 19650 }, { "epoch": 0.40020356234096693, "grad_norm": 9.072964658639494, "learning_rate": 8.003908317387941e-06, "loss": 0.2672, "step": 19660 }, { "epoch": 0.40040712468193385, "grad_norm": 3.7622941256306595, "learning_rate": 8.007979481333714e-06, "loss": 0.2545, "step": 19670 }, { "epoch": 0.40061068702290076, "grad_norm": 11.975132311184904, "learning_rate": 8.012050645279485e-06, "loss": 0.2922, "step": 19680 }, { "epoch": 0.40081424936386767, "grad_norm": 31.655594889832592, "learning_rate": 8.016121809225257e-06, "loss": 0.2494, "step": 19690 }, { "epoch": 0.4010178117048346, "grad_norm": 22.101835000552224, "learning_rate": 8.02019297317103e-06, "loss": 0.2159, "step": 19700 }, { "epoch": 0.40122137404580155, "grad_norm": 38.33107381681635, "learning_rate": 8.024264137116803e-06, "loss": 0.3288, "step": 19710 }, { "epoch": 0.40142493638676846, "grad_norm": 13.433320651949911, "learning_rate": 8.028335301062574e-06, "loss": 0.3672, "step": 19720 }, { "epoch": 0.4016284987277354, "grad_norm": 8.124459669497798, "learning_rate": 8.032406465008346e-06, "loss": 0.2514, "step": 19730 }, { "epoch": 0.4018320610687023, "grad_norm": 4.581815907964372, "learning_rate": 8.036477628954119e-06, "loss": 0.331, "step": 19740 }, { "epoch": 0.4020356234096692, "grad_norm": 8.62082943114584, "learning_rate": 8.04054879289989e-06, "loss": 0.3539, "step": 19750 }, { "epoch": 0.4022391857506361, "grad_norm": 11.222613925266897, "learning_rate": 8.044619956845662e-06, "loss": 0.3193, "step": 19760 }, { "epoch": 0.4024427480916031, "grad_norm": 11.34782931215857, "learning_rate": 8.048691120791435e-06, "loss": 0.1807, "step": 19770 }, { "epoch": 0.40264631043257, "grad_norm": 7.832276303507367, "learning_rate": 8.052762284737206e-06, "loss": 0.2511, "step": 19780 }, { "epoch": 0.4028498727735369, "grad_norm": 19.614983025609845, "learning_rate": 8.056833448682979e-06, "loss": 0.3559, "step": 19790 }, { "epoch": 0.4030534351145038, "grad_norm": 17.126315298084155, "learning_rate": 8.060904612628751e-06, "loss": 0.2942, "step": 19800 }, { "epoch": 0.4032569974554707, "grad_norm": 15.418169244722066, "learning_rate": 8.064975776574522e-06, "loss": 0.3271, "step": 19810 }, { "epoch": 0.40346055979643763, "grad_norm": 14.892202961261527, "learning_rate": 8.069046940520295e-06, "loss": 0.3195, "step": 19820 }, { "epoch": 0.4036641221374046, "grad_norm": 11.530703332420725, "learning_rate": 8.073118104466067e-06, "loss": 0.294, "step": 19830 }, { "epoch": 0.4038676844783715, "grad_norm": 5.490277340158469, "learning_rate": 8.077189268411838e-06, "loss": 0.284, "step": 19840 }, { "epoch": 0.4040712468193384, "grad_norm": 18.62205356823778, "learning_rate": 8.081260432357611e-06, "loss": 0.2889, "step": 19850 }, { "epoch": 0.40427480916030534, "grad_norm": 24.176523443484403, "learning_rate": 8.085331596303384e-06, "loss": 0.3429, "step": 19860 }, { "epoch": 0.40447837150127225, "grad_norm": 4.148454559880532, "learning_rate": 8.089402760249155e-06, "loss": 0.2995, "step": 19870 }, { "epoch": 0.40468193384223916, "grad_norm": 11.2098509906081, "learning_rate": 8.093473924194927e-06, "loss": 0.3379, "step": 19880 }, { "epoch": 0.40488549618320613, "grad_norm": 22.77995201644793, "learning_rate": 8.0975450881407e-06, "loss": 0.4087, "step": 19890 }, { "epoch": 0.40508905852417304, "grad_norm": 11.564276467356777, "learning_rate": 8.101616252086472e-06, "loss": 0.3223, "step": 19900 }, { "epoch": 0.40529262086513995, "grad_norm": 12.305368386780009, "learning_rate": 8.105687416032243e-06, "loss": 0.3354, "step": 19910 }, { "epoch": 0.40549618320610686, "grad_norm": 7.148706632229406, "learning_rate": 8.109758579978016e-06, "loss": 0.3399, "step": 19920 }, { "epoch": 0.4056997455470738, "grad_norm": 11.943605694222018, "learning_rate": 8.113829743923789e-06, "loss": 0.3502, "step": 19930 }, { "epoch": 0.4059033078880407, "grad_norm": 1.5384654191797666, "learning_rate": 8.11790090786956e-06, "loss": 0.2685, "step": 19940 }, { "epoch": 0.40610687022900765, "grad_norm": 9.23061790189204, "learning_rate": 8.121972071815332e-06, "loss": 0.3686, "step": 19950 }, { "epoch": 0.40631043256997457, "grad_norm": 23.255810334524515, "learning_rate": 8.126043235761105e-06, "loss": 0.2917, "step": 19960 }, { "epoch": 0.4065139949109415, "grad_norm": 8.995193966209612, "learning_rate": 8.130114399706876e-06, "loss": 0.3194, "step": 19970 }, { "epoch": 0.4067175572519084, "grad_norm": 6.3041107463999415, "learning_rate": 8.134185563652648e-06, "loss": 0.2179, "step": 19980 }, { "epoch": 0.4069211195928753, "grad_norm": 2.9776430186434317, "learning_rate": 8.138256727598421e-06, "loss": 0.3983, "step": 19990 }, { "epoch": 0.4071246819338422, "grad_norm": 2.67868660269264, "learning_rate": 8.142327891544192e-06, "loss": 0.2631, "step": 20000 }, { "epoch": 0.4073282442748092, "grad_norm": 11.157828362213436, "learning_rate": 8.146399055489965e-06, "loss": 0.2786, "step": 20010 }, { "epoch": 0.4075318066157761, "grad_norm": 11.792983990448436, "learning_rate": 8.150470219435737e-06, "loss": 0.4213, "step": 20020 }, { "epoch": 0.407735368956743, "grad_norm": 3.901041350702766, "learning_rate": 8.154541383381508e-06, "loss": 0.2971, "step": 20030 }, { "epoch": 0.4079389312977099, "grad_norm": 9.525461269260708, "learning_rate": 8.15861254732728e-06, "loss": 0.3105, "step": 20040 }, { "epoch": 0.40814249363867683, "grad_norm": 5.906329341540563, "learning_rate": 8.162683711273053e-06, "loss": 0.2801, "step": 20050 }, { "epoch": 0.40834605597964374, "grad_norm": 4.536550465996101, "learning_rate": 8.166754875218826e-06, "loss": 0.2903, "step": 20060 }, { "epoch": 0.4085496183206107, "grad_norm": 6.310653963175333, "learning_rate": 8.170826039164597e-06, "loss": 0.2574, "step": 20070 }, { "epoch": 0.4087531806615776, "grad_norm": 19.691630990871666, "learning_rate": 8.17489720311037e-06, "loss": 0.3093, "step": 20080 }, { "epoch": 0.40895674300254453, "grad_norm": 6.802160129913813, "learning_rate": 8.178968367056142e-06, "loss": 0.2627, "step": 20090 }, { "epoch": 0.40916030534351144, "grad_norm": 12.350301563195154, "learning_rate": 8.183039531001913e-06, "loss": 0.3155, "step": 20100 }, { "epoch": 0.40936386768447836, "grad_norm": 3.937643822631882, "learning_rate": 8.187110694947686e-06, "loss": 0.2891, "step": 20110 }, { "epoch": 0.40956743002544527, "grad_norm": 14.295568248442457, "learning_rate": 8.191181858893458e-06, "loss": 0.3333, "step": 20120 }, { "epoch": 0.40977099236641223, "grad_norm": 12.096038461133167, "learning_rate": 8.19525302283923e-06, "loss": 0.2803, "step": 20130 }, { "epoch": 0.40997455470737915, "grad_norm": 11.519698925993998, "learning_rate": 8.199324186785002e-06, "loss": 0.3459, "step": 20140 }, { "epoch": 0.41017811704834606, "grad_norm": 5.99788693885721, "learning_rate": 8.203395350730775e-06, "loss": 0.2518, "step": 20150 }, { "epoch": 0.41038167938931297, "grad_norm": 8.134556429051484, "learning_rate": 8.207466514676546e-06, "loss": 0.1943, "step": 20160 }, { "epoch": 0.4105852417302799, "grad_norm": 11.011356182330651, "learning_rate": 8.211537678622318e-06, "loss": 0.2053, "step": 20170 }, { "epoch": 0.4107888040712468, "grad_norm": 14.090134616151946, "learning_rate": 8.215608842568091e-06, "loss": 0.3027, "step": 20180 }, { "epoch": 0.41099236641221376, "grad_norm": 21.166734165762925, "learning_rate": 8.219680006513863e-06, "loss": 0.187, "step": 20190 }, { "epoch": 0.4111959287531807, "grad_norm": 16.446873422754095, "learning_rate": 8.223751170459634e-06, "loss": 0.3693, "step": 20200 }, { "epoch": 0.4113994910941476, "grad_norm": 12.917191193306117, "learning_rate": 8.227822334405407e-06, "loss": 0.2484, "step": 20210 }, { "epoch": 0.4116030534351145, "grad_norm": 4.376664747523803, "learning_rate": 8.23189349835118e-06, "loss": 0.3638, "step": 20220 }, { "epoch": 0.4118066157760814, "grad_norm": 8.154733792131179, "learning_rate": 8.23596466229695e-06, "loss": 0.2912, "step": 20230 }, { "epoch": 0.4120101781170483, "grad_norm": 7.776045696306847, "learning_rate": 8.240035826242723e-06, "loss": 0.2937, "step": 20240 }, { "epoch": 0.4122137404580153, "grad_norm": 22.31489628440051, "learning_rate": 8.244106990188496e-06, "loss": 0.3994, "step": 20250 }, { "epoch": 0.4124173027989822, "grad_norm": 11.135079690360753, "learning_rate": 8.248178154134267e-06, "loss": 0.2543, "step": 20260 }, { "epoch": 0.4126208651399491, "grad_norm": 3.349435062782111, "learning_rate": 8.25224931808004e-06, "loss": 0.2797, "step": 20270 }, { "epoch": 0.412824427480916, "grad_norm": 15.619125269662378, "learning_rate": 8.256320482025812e-06, "loss": 0.3867, "step": 20280 }, { "epoch": 0.41302798982188293, "grad_norm": 5.496058618230268, "learning_rate": 8.260391645971583e-06, "loss": 0.2612, "step": 20290 }, { "epoch": 0.41323155216284985, "grad_norm": 6.010612081859991, "learning_rate": 8.264462809917356e-06, "loss": 0.2223, "step": 20300 }, { "epoch": 0.4134351145038168, "grad_norm": 13.988177902450614, "learning_rate": 8.268533973863128e-06, "loss": 0.2902, "step": 20310 }, { "epoch": 0.4136386768447837, "grad_norm": 4.626974827979815, "learning_rate": 8.272605137808901e-06, "loss": 0.3118, "step": 20320 }, { "epoch": 0.41384223918575064, "grad_norm": 10.56969882968407, "learning_rate": 8.276676301754672e-06, "loss": 0.233, "step": 20330 }, { "epoch": 0.41404580152671755, "grad_norm": 22.829758370231414, "learning_rate": 8.280747465700444e-06, "loss": 0.354, "step": 20340 }, { "epoch": 0.41424936386768446, "grad_norm": 6.227215474290992, "learning_rate": 8.284818629646217e-06, "loss": 0.3058, "step": 20350 }, { "epoch": 0.4144529262086514, "grad_norm": 34.320688142851004, "learning_rate": 8.288889793591988e-06, "loss": 0.2653, "step": 20360 }, { "epoch": 0.41465648854961834, "grad_norm": 31.007296314715557, "learning_rate": 8.29296095753776e-06, "loss": 0.3011, "step": 20370 }, { "epoch": 0.41486005089058525, "grad_norm": 79.10239600354551, "learning_rate": 8.297032121483533e-06, "loss": 0.3296, "step": 20380 }, { "epoch": 0.41506361323155216, "grad_norm": 9.345964063423965, "learning_rate": 8.301103285429304e-06, "loss": 0.2995, "step": 20390 }, { "epoch": 0.4152671755725191, "grad_norm": 9.716343249413683, "learning_rate": 8.305174449375077e-06, "loss": 0.3554, "step": 20400 }, { "epoch": 0.415470737913486, "grad_norm": 7.590291723172948, "learning_rate": 8.30924561332085e-06, "loss": 0.3558, "step": 20410 }, { "epoch": 0.4156743002544529, "grad_norm": 8.596437921665727, "learning_rate": 8.31331677726662e-06, "loss": 0.3274, "step": 20420 }, { "epoch": 0.41587786259541987, "grad_norm": 9.806604830375937, "learning_rate": 8.317387941212393e-06, "loss": 0.2642, "step": 20430 }, { "epoch": 0.4160814249363868, "grad_norm": 10.121688055887383, "learning_rate": 8.321459105158166e-06, "loss": 0.3302, "step": 20440 }, { "epoch": 0.4162849872773537, "grad_norm": 6.943046173955618, "learning_rate": 8.325530269103938e-06, "loss": 0.3164, "step": 20450 }, { "epoch": 0.4164885496183206, "grad_norm": 13.248930003790118, "learning_rate": 8.32960143304971e-06, "loss": 0.2501, "step": 20460 }, { "epoch": 0.4166921119592875, "grad_norm": 13.65300131025983, "learning_rate": 8.333672596995482e-06, "loss": 0.3071, "step": 20470 }, { "epoch": 0.4168956743002544, "grad_norm": 11.174468664946668, "learning_rate": 8.337743760941255e-06, "loss": 0.3566, "step": 20480 }, { "epoch": 0.4170992366412214, "grad_norm": 12.528845780152656, "learning_rate": 8.341814924887025e-06, "loss": 0.3081, "step": 20490 }, { "epoch": 0.4173027989821883, "grad_norm": 11.259960258732477, "learning_rate": 8.345886088832798e-06, "loss": 0.2849, "step": 20500 }, { "epoch": 0.4175063613231552, "grad_norm": 11.761314409588664, "learning_rate": 8.34995725277857e-06, "loss": 0.3226, "step": 20510 }, { "epoch": 0.41770992366412213, "grad_norm": 11.791084453936447, "learning_rate": 8.354028416724342e-06, "loss": 0.2495, "step": 20520 }, { "epoch": 0.41791348600508904, "grad_norm": 17.09308587391443, "learning_rate": 8.358099580670114e-06, "loss": 0.3079, "step": 20530 }, { "epoch": 0.41811704834605595, "grad_norm": 15.049499691342323, "learning_rate": 8.362170744615887e-06, "loss": 0.3441, "step": 20540 }, { "epoch": 0.4183206106870229, "grad_norm": 5.416841201528175, "learning_rate": 8.366241908561658e-06, "loss": 0.3604, "step": 20550 }, { "epoch": 0.41852417302798983, "grad_norm": 12.961369135251816, "learning_rate": 8.37031307250743e-06, "loss": 0.2473, "step": 20560 }, { "epoch": 0.41872773536895674, "grad_norm": 12.117320379543324, "learning_rate": 8.374384236453203e-06, "loss": 0.2958, "step": 20570 }, { "epoch": 0.41893129770992366, "grad_norm": 15.71138473593007, "learning_rate": 8.378455400398974e-06, "loss": 0.3057, "step": 20580 }, { "epoch": 0.41913486005089057, "grad_norm": 18.167948510259006, "learning_rate": 8.382526564344747e-06, "loss": 0.3823, "step": 20590 }, { "epoch": 0.4193384223918575, "grad_norm": 20.929954103396632, "learning_rate": 8.38659772829052e-06, "loss": 0.4408, "step": 20600 }, { "epoch": 0.41954198473282445, "grad_norm": 15.571143311278725, "learning_rate": 8.390668892236292e-06, "loss": 0.252, "step": 20610 }, { "epoch": 0.41974554707379136, "grad_norm": 6.105561947261141, "learning_rate": 8.394740056182063e-06, "loss": 0.357, "step": 20620 }, { "epoch": 0.41994910941475827, "grad_norm": 32.37028652494332, "learning_rate": 8.398811220127835e-06, "loss": 0.3206, "step": 20630 }, { "epoch": 0.4201526717557252, "grad_norm": 12.823680239850336, "learning_rate": 8.402882384073608e-06, "loss": 0.3098, "step": 20640 }, { "epoch": 0.4203562340966921, "grad_norm": 9.638503200400775, "learning_rate": 8.406953548019379e-06, "loss": 0.27, "step": 20650 }, { "epoch": 0.420559796437659, "grad_norm": 11.91390112582762, "learning_rate": 8.411024711965152e-06, "loss": 0.3478, "step": 20660 }, { "epoch": 0.420763358778626, "grad_norm": 9.29288908098258, "learning_rate": 8.415095875910924e-06, "loss": 0.2844, "step": 20670 }, { "epoch": 0.4209669211195929, "grad_norm": 8.754909579025803, "learning_rate": 8.419167039856695e-06, "loss": 0.3381, "step": 20680 }, { "epoch": 0.4211704834605598, "grad_norm": 16.07548731350849, "learning_rate": 8.423238203802468e-06, "loss": 0.3599, "step": 20690 }, { "epoch": 0.4213740458015267, "grad_norm": 16.25449379428233, "learning_rate": 8.42730936774824e-06, "loss": 0.3006, "step": 20700 }, { "epoch": 0.4215776081424936, "grad_norm": 7.50799335298279, "learning_rate": 8.431380531694011e-06, "loss": 0.2625, "step": 20710 }, { "epoch": 0.42178117048346053, "grad_norm": 7.111398790885121, "learning_rate": 8.435451695639784e-06, "loss": 0.3099, "step": 20720 }, { "epoch": 0.4219847328244275, "grad_norm": 6.830717533349099, "learning_rate": 8.439522859585557e-06, "loss": 0.3446, "step": 20730 }, { "epoch": 0.4221882951653944, "grad_norm": 15.934034836143532, "learning_rate": 8.44359402353133e-06, "loss": 0.3051, "step": 20740 }, { "epoch": 0.4223918575063613, "grad_norm": 11.650643326732958, "learning_rate": 8.4476651874771e-06, "loss": 0.3304, "step": 20750 }, { "epoch": 0.42259541984732824, "grad_norm": 8.142509438512239, "learning_rate": 8.451736351422873e-06, "loss": 0.2837, "step": 20760 }, { "epoch": 0.42279898218829515, "grad_norm": 3.509071257493878, "learning_rate": 8.455807515368646e-06, "loss": 0.3816, "step": 20770 }, { "epoch": 0.42300254452926206, "grad_norm": 9.070540031662837, "learning_rate": 8.459878679314416e-06, "loss": 0.3102, "step": 20780 }, { "epoch": 0.423206106870229, "grad_norm": 6.586387957881454, "learning_rate": 8.463949843260189e-06, "loss": 0.2542, "step": 20790 }, { "epoch": 0.42340966921119594, "grad_norm": 7.6695047316228315, "learning_rate": 8.468021007205962e-06, "loss": 0.3183, "step": 20800 }, { "epoch": 0.42361323155216285, "grad_norm": 11.388403273820996, "learning_rate": 8.472092171151733e-06, "loss": 0.2353, "step": 20810 }, { "epoch": 0.42381679389312976, "grad_norm": 16.068408597261456, "learning_rate": 8.476163335097505e-06, "loss": 0.3457, "step": 20820 }, { "epoch": 0.4240203562340967, "grad_norm": 10.535607561177379, "learning_rate": 8.480234499043278e-06, "loss": 0.3969, "step": 20830 }, { "epoch": 0.4242239185750636, "grad_norm": 5.386530579455733, "learning_rate": 8.484305662989049e-06, "loss": 0.2595, "step": 20840 }, { "epoch": 0.42442748091603055, "grad_norm": 17.795632952159554, "learning_rate": 8.488376826934821e-06, "loss": 0.3092, "step": 20850 }, { "epoch": 0.42463104325699746, "grad_norm": 7.999285714491396, "learning_rate": 8.492447990880594e-06, "loss": 0.2548, "step": 20860 }, { "epoch": 0.4248346055979644, "grad_norm": 24.158133445938148, "learning_rate": 8.496519154826367e-06, "loss": 0.3375, "step": 20870 }, { "epoch": 0.4250381679389313, "grad_norm": 12.489755069787028, "learning_rate": 8.500590318772138e-06, "loss": 0.2805, "step": 20880 }, { "epoch": 0.4252417302798982, "grad_norm": 10.005032371290506, "learning_rate": 8.50466148271791e-06, "loss": 0.3226, "step": 20890 }, { "epoch": 0.42544529262086517, "grad_norm": 14.014302636459831, "learning_rate": 8.508732646663683e-06, "loss": 0.313, "step": 20900 }, { "epoch": 0.4256488549618321, "grad_norm": 14.55483365929503, "learning_rate": 8.512803810609454e-06, "loss": 0.3736, "step": 20910 }, { "epoch": 0.425852417302799, "grad_norm": 11.809566165116545, "learning_rate": 8.516874974555226e-06, "loss": 0.3642, "step": 20920 }, { "epoch": 0.4260559796437659, "grad_norm": 8.113832220106618, "learning_rate": 8.520946138500999e-06, "loss": 0.3526, "step": 20930 }, { "epoch": 0.4262595419847328, "grad_norm": 11.951648288705407, "learning_rate": 8.52501730244677e-06, "loss": 0.2866, "step": 20940 }, { "epoch": 0.4264631043256997, "grad_norm": 17.402719986505566, "learning_rate": 8.529088466392543e-06, "loss": 0.3453, "step": 20950 }, { "epoch": 0.4266666666666667, "grad_norm": 14.667589798520561, "learning_rate": 8.533159630338315e-06, "loss": 0.183, "step": 20960 }, { "epoch": 0.4268702290076336, "grad_norm": 10.690005717010528, "learning_rate": 8.537230794284086e-06, "loss": 0.2949, "step": 20970 }, { "epoch": 0.4270737913486005, "grad_norm": 34.17841276251408, "learning_rate": 8.541301958229859e-06, "loss": 0.3053, "step": 20980 }, { "epoch": 0.42727735368956743, "grad_norm": 30.57087458170668, "learning_rate": 8.545373122175632e-06, "loss": 0.4487, "step": 20990 }, { "epoch": 0.42748091603053434, "grad_norm": 12.916655146148491, "learning_rate": 8.549444286121404e-06, "loss": 0.3874, "step": 21000 }, { "epoch": 0.42768447837150125, "grad_norm": 9.698196759940043, "learning_rate": 8.553515450067175e-06, "loss": 0.3396, "step": 21010 }, { "epoch": 0.4278880407124682, "grad_norm": 7.024828726031575, "learning_rate": 8.557586614012948e-06, "loss": 0.352, "step": 21020 }, { "epoch": 0.42809160305343513, "grad_norm": 8.350505848793581, "learning_rate": 8.56165777795872e-06, "loss": 0.319, "step": 21030 }, { "epoch": 0.42829516539440204, "grad_norm": 8.37068148244082, "learning_rate": 8.565728941904491e-06, "loss": 0.1853, "step": 21040 }, { "epoch": 0.42849872773536896, "grad_norm": 9.934761937404657, "learning_rate": 8.569800105850264e-06, "loss": 0.3418, "step": 21050 }, { "epoch": 0.42870229007633587, "grad_norm": 10.875796542018925, "learning_rate": 8.573871269796037e-06, "loss": 0.2997, "step": 21060 }, { "epoch": 0.4289058524173028, "grad_norm": 9.696362327471498, "learning_rate": 8.577942433741807e-06, "loss": 0.3548, "step": 21070 }, { "epoch": 0.42910941475826975, "grad_norm": 7.454086650675364, "learning_rate": 8.58201359768758e-06, "loss": 0.3183, "step": 21080 }, { "epoch": 0.42931297709923666, "grad_norm": 7.797714401465931, "learning_rate": 8.586084761633353e-06, "loss": 0.4329, "step": 21090 }, { "epoch": 0.42951653944020357, "grad_norm": 4.833330057711078, "learning_rate": 8.590155925579124e-06, "loss": 0.2993, "step": 21100 }, { "epoch": 0.4297201017811705, "grad_norm": 7.638381368984011, "learning_rate": 8.594227089524896e-06, "loss": 0.2933, "step": 21110 }, { "epoch": 0.4299236641221374, "grad_norm": 14.598738258616343, "learning_rate": 8.598298253470669e-06, "loss": 0.3156, "step": 21120 }, { "epoch": 0.4301272264631043, "grad_norm": 7.554313318788991, "learning_rate": 8.60236941741644e-06, "loss": 0.2624, "step": 21130 }, { "epoch": 0.4303307888040713, "grad_norm": 9.927363096293506, "learning_rate": 8.606440581362212e-06, "loss": 0.31, "step": 21140 }, { "epoch": 0.4305343511450382, "grad_norm": 9.852862880195786, "learning_rate": 8.610511745307985e-06, "loss": 0.3407, "step": 21150 }, { "epoch": 0.4307379134860051, "grad_norm": 16.193990572902596, "learning_rate": 8.614582909253756e-06, "loss": 0.2482, "step": 21160 }, { "epoch": 0.430941475826972, "grad_norm": 9.972999840805041, "learning_rate": 8.618654073199529e-06, "loss": 0.2928, "step": 21170 }, { "epoch": 0.4311450381679389, "grad_norm": 8.133444080496115, "learning_rate": 8.622725237145301e-06, "loss": 0.3128, "step": 21180 }, { "epoch": 0.43134860050890583, "grad_norm": 11.428030398815581, "learning_rate": 8.626796401091074e-06, "loss": 0.2898, "step": 21190 }, { "epoch": 0.4315521628498728, "grad_norm": 15.162388159227252, "learning_rate": 8.630867565036845e-06, "loss": 0.2884, "step": 21200 }, { "epoch": 0.4317557251908397, "grad_norm": 14.056115976843659, "learning_rate": 8.634938728982618e-06, "loss": 0.3496, "step": 21210 }, { "epoch": 0.4319592875318066, "grad_norm": 10.564615348091968, "learning_rate": 8.63900989292839e-06, "loss": 0.2313, "step": 21220 }, { "epoch": 0.43216284987277354, "grad_norm": 14.675109673321762, "learning_rate": 8.643081056874161e-06, "loss": 0.3071, "step": 21230 }, { "epoch": 0.43236641221374045, "grad_norm": 19.24346270555677, "learning_rate": 8.647152220819934e-06, "loss": 0.1927, "step": 21240 }, { "epoch": 0.43256997455470736, "grad_norm": 15.01512936449106, "learning_rate": 8.651223384765706e-06, "loss": 0.3116, "step": 21250 }, { "epoch": 0.4327735368956743, "grad_norm": 82.53659475522221, "learning_rate": 8.655294548711477e-06, "loss": 0.3352, "step": 21260 }, { "epoch": 0.43297709923664124, "grad_norm": 4.695542128828311, "learning_rate": 8.65936571265725e-06, "loss": 0.2754, "step": 21270 }, { "epoch": 0.43318066157760815, "grad_norm": 0.044883088868169344, "learning_rate": 8.663436876603023e-06, "loss": 0.2548, "step": 21280 }, { "epoch": 0.43338422391857506, "grad_norm": 6.440983282169948, "learning_rate": 8.667508040548793e-06, "loss": 0.3832, "step": 21290 }, { "epoch": 0.433587786259542, "grad_norm": 6.266529840363766, "learning_rate": 8.671579204494566e-06, "loss": 0.2775, "step": 21300 }, { "epoch": 0.4337913486005089, "grad_norm": 13.187287656377997, "learning_rate": 8.675650368440339e-06, "loss": 0.3675, "step": 21310 }, { "epoch": 0.43399491094147585, "grad_norm": 9.182043413469984, "learning_rate": 8.67972153238611e-06, "loss": 0.3017, "step": 21320 }, { "epoch": 0.43419847328244277, "grad_norm": 10.111206971152255, "learning_rate": 8.683792696331882e-06, "loss": 0.2735, "step": 21330 }, { "epoch": 0.4344020356234097, "grad_norm": 10.939564261269389, "learning_rate": 8.687863860277655e-06, "loss": 0.3064, "step": 21340 }, { "epoch": 0.4346055979643766, "grad_norm": 10.830970630454729, "learning_rate": 8.691935024223426e-06, "loss": 0.3143, "step": 21350 }, { "epoch": 0.4348091603053435, "grad_norm": 27.088057773565, "learning_rate": 8.696006188169198e-06, "loss": 0.2779, "step": 21360 }, { "epoch": 0.4350127226463104, "grad_norm": 8.664657389053035, "learning_rate": 8.700077352114971e-06, "loss": 0.3569, "step": 21370 }, { "epoch": 0.4352162849872774, "grad_norm": 16.20006066904558, "learning_rate": 8.704148516060742e-06, "loss": 0.2876, "step": 21380 }, { "epoch": 0.4354198473282443, "grad_norm": 5.7149216928716156, "learning_rate": 8.708219680006515e-06, "loss": 0.3601, "step": 21390 }, { "epoch": 0.4356234096692112, "grad_norm": 5.861941827282097, "learning_rate": 8.712290843952287e-06, "loss": 0.2613, "step": 21400 }, { "epoch": 0.4358269720101781, "grad_norm": 5.66188623676066, "learning_rate": 8.71636200789806e-06, "loss": 0.2502, "step": 21410 }, { "epoch": 0.436030534351145, "grad_norm": 27.011053928097116, "learning_rate": 8.720433171843831e-06, "loss": 0.387, "step": 21420 }, { "epoch": 0.43623409669211194, "grad_norm": 11.17156748401683, "learning_rate": 8.724504335789603e-06, "loss": 0.3536, "step": 21430 }, { "epoch": 0.4364376590330789, "grad_norm": 8.637416042717282, "learning_rate": 8.728575499735376e-06, "loss": 0.2546, "step": 21440 }, { "epoch": 0.4366412213740458, "grad_norm": 16.793110552585784, "learning_rate": 8.732646663681147e-06, "loss": 0.3378, "step": 21450 }, { "epoch": 0.43684478371501273, "grad_norm": 13.176996713260905, "learning_rate": 8.73671782762692e-06, "loss": 0.2742, "step": 21460 }, { "epoch": 0.43704834605597964, "grad_norm": 4.108304174191928, "learning_rate": 8.740788991572692e-06, "loss": 0.35, "step": 21470 }, { "epoch": 0.43725190839694655, "grad_norm": 6.876069173105913, "learning_rate": 8.744860155518463e-06, "loss": 0.335, "step": 21480 }, { "epoch": 0.43745547073791347, "grad_norm": 13.402474513543563, "learning_rate": 8.748931319464236e-06, "loss": 0.2729, "step": 21490 }, { "epoch": 0.43765903307888043, "grad_norm": 7.455830308000421, "learning_rate": 8.753002483410009e-06, "loss": 0.2908, "step": 21500 }, { "epoch": 0.43786259541984734, "grad_norm": 9.951185000524394, "learning_rate": 8.75707364735578e-06, "loss": 0.3674, "step": 21510 }, { "epoch": 0.43806615776081426, "grad_norm": 7.793784924226591, "learning_rate": 8.761144811301552e-06, "loss": 0.2625, "step": 21520 }, { "epoch": 0.43826972010178117, "grad_norm": 12.180880911456612, "learning_rate": 8.765215975247325e-06, "loss": 0.2679, "step": 21530 }, { "epoch": 0.4384732824427481, "grad_norm": 3.9983326893154247, "learning_rate": 8.769287139193096e-06, "loss": 0.3298, "step": 21540 }, { "epoch": 0.438676844783715, "grad_norm": 10.2196860966226, "learning_rate": 8.773358303138868e-06, "loss": 0.2526, "step": 21550 }, { "epoch": 0.43888040712468196, "grad_norm": 5.944274161438905, "learning_rate": 8.777429467084641e-06, "loss": 0.2886, "step": 21560 }, { "epoch": 0.43908396946564887, "grad_norm": 9.225528842602387, "learning_rate": 8.781500631030412e-06, "loss": 0.3149, "step": 21570 }, { "epoch": 0.4392875318066158, "grad_norm": 6.242135442849134, "learning_rate": 8.785571794976184e-06, "loss": 0.3235, "step": 21580 }, { "epoch": 0.4394910941475827, "grad_norm": 7.415000396911991, "learning_rate": 8.789642958921957e-06, "loss": 0.3116, "step": 21590 }, { "epoch": 0.4396946564885496, "grad_norm": 5.573770740724847, "learning_rate": 8.793714122867728e-06, "loss": 0.3402, "step": 21600 }, { "epoch": 0.4398982188295165, "grad_norm": 5.5291220576439875, "learning_rate": 8.7977852868135e-06, "loss": 0.255, "step": 21610 }, { "epoch": 0.4401017811704835, "grad_norm": 6.039715877277345, "learning_rate": 8.801856450759273e-06, "loss": 0.427, "step": 21620 }, { "epoch": 0.4403053435114504, "grad_norm": 16.237181070151113, "learning_rate": 8.805927614705046e-06, "loss": 0.398, "step": 21630 }, { "epoch": 0.4405089058524173, "grad_norm": 6.25208715635676, "learning_rate": 8.809998778650817e-06, "loss": 0.2965, "step": 21640 }, { "epoch": 0.4407124681933842, "grad_norm": 7.524716282568579, "learning_rate": 8.81406994259659e-06, "loss": 0.3201, "step": 21650 }, { "epoch": 0.44091603053435113, "grad_norm": 10.036076836622785, "learning_rate": 8.818141106542362e-06, "loss": 0.2712, "step": 21660 }, { "epoch": 0.44111959287531805, "grad_norm": 20.3318327205338, "learning_rate": 8.822212270488133e-06, "loss": 0.2586, "step": 21670 }, { "epoch": 0.441323155216285, "grad_norm": 16.534421519471973, "learning_rate": 8.826283434433906e-06, "loss": 0.3562, "step": 21680 }, { "epoch": 0.4415267175572519, "grad_norm": 3.2520288937555906, "learning_rate": 8.830354598379678e-06, "loss": 0.3793, "step": 21690 }, { "epoch": 0.44173027989821884, "grad_norm": 11.880641863389544, "learning_rate": 8.83442576232545e-06, "loss": 0.3483, "step": 21700 }, { "epoch": 0.44193384223918575, "grad_norm": 10.187667654147031, "learning_rate": 8.838496926271222e-06, "loss": 0.3137, "step": 21710 }, { "epoch": 0.44213740458015266, "grad_norm": 15.92527376960711, "learning_rate": 8.842568090216995e-06, "loss": 0.3497, "step": 21720 }, { "epoch": 0.44234096692111957, "grad_norm": 8.980035593891209, "learning_rate": 8.846639254162765e-06, "loss": 0.2064, "step": 21730 }, { "epoch": 0.44254452926208654, "grad_norm": 9.919558242798878, "learning_rate": 8.850710418108538e-06, "loss": 0.3898, "step": 21740 }, { "epoch": 0.44274809160305345, "grad_norm": 13.886588122528421, "learning_rate": 8.85478158205431e-06, "loss": 0.251, "step": 21750 }, { "epoch": 0.44295165394402036, "grad_norm": 10.550073005666484, "learning_rate": 8.858852746000082e-06, "loss": 0.3924, "step": 21760 }, { "epoch": 0.4431552162849873, "grad_norm": 13.47600866094909, "learning_rate": 8.862923909945854e-06, "loss": 0.3201, "step": 21770 }, { "epoch": 0.4433587786259542, "grad_norm": 11.107620024845838, "learning_rate": 8.866995073891627e-06, "loss": 0.3131, "step": 21780 }, { "epoch": 0.4435623409669211, "grad_norm": 5.965503442941918, "learning_rate": 8.871066237837398e-06, "loss": 0.3493, "step": 21790 }, { "epoch": 0.44376590330788807, "grad_norm": 4.4156201623677145, "learning_rate": 8.87513740178317e-06, "loss": 0.2322, "step": 21800 }, { "epoch": 0.443969465648855, "grad_norm": 6.41997722755593, "learning_rate": 8.879208565728943e-06, "loss": 0.373, "step": 21810 }, { "epoch": 0.4441730279898219, "grad_norm": 10.036153459106671, "learning_rate": 8.883279729674714e-06, "loss": 0.2975, "step": 21820 }, { "epoch": 0.4443765903307888, "grad_norm": 14.87523940614708, "learning_rate": 8.887350893620487e-06, "loss": 0.303, "step": 21830 }, { "epoch": 0.4445801526717557, "grad_norm": 7.411865677895027, "learning_rate": 8.89142205756626e-06, "loss": 0.3208, "step": 21840 }, { "epoch": 0.4447837150127226, "grad_norm": 13.581006191919224, "learning_rate": 8.895493221512032e-06, "loss": 0.3099, "step": 21850 }, { "epoch": 0.4449872773536896, "grad_norm": 12.381086832205051, "learning_rate": 8.899564385457803e-06, "loss": 0.2161, "step": 21860 }, { "epoch": 0.4451908396946565, "grad_norm": 18.053571304927196, "learning_rate": 8.903635549403575e-06, "loss": 0.4534, "step": 21870 }, { "epoch": 0.4453944020356234, "grad_norm": 5.84890102347474, "learning_rate": 8.907706713349348e-06, "loss": 0.3573, "step": 21880 }, { "epoch": 0.4455979643765903, "grad_norm": 5.65854128161854, "learning_rate": 8.911777877295119e-06, "loss": 0.2522, "step": 21890 }, { "epoch": 0.44580152671755724, "grad_norm": 10.185127125710956, "learning_rate": 8.915849041240892e-06, "loss": 0.2896, "step": 21900 }, { "epoch": 0.44600508905852415, "grad_norm": 5.10439322260064, "learning_rate": 8.919920205186664e-06, "loss": 0.2942, "step": 21910 }, { "epoch": 0.4462086513994911, "grad_norm": 14.66379790192866, "learning_rate": 8.923991369132435e-06, "loss": 0.3871, "step": 21920 }, { "epoch": 0.44641221374045803, "grad_norm": 9.86226769230837, "learning_rate": 8.928062533078208e-06, "loss": 0.3211, "step": 21930 }, { "epoch": 0.44661577608142494, "grad_norm": 6.28348639318644, "learning_rate": 8.93213369702398e-06, "loss": 0.2875, "step": 21940 }, { "epoch": 0.44681933842239185, "grad_norm": 13.388710335389947, "learning_rate": 8.936204860969751e-06, "loss": 0.3737, "step": 21950 }, { "epoch": 0.44702290076335877, "grad_norm": 7.007306561012167, "learning_rate": 8.940276024915524e-06, "loss": 0.2411, "step": 21960 }, { "epoch": 0.4472264631043257, "grad_norm": 4.056756509217545, "learning_rate": 8.944347188861297e-06, "loss": 0.2736, "step": 21970 }, { "epoch": 0.44743002544529265, "grad_norm": 22.997563411230402, "learning_rate": 8.948418352807068e-06, "loss": 0.3325, "step": 21980 }, { "epoch": 0.44763358778625956, "grad_norm": 7.27038948798107, "learning_rate": 8.95248951675284e-06, "loss": 0.3747, "step": 21990 }, { "epoch": 0.44783715012722647, "grad_norm": 5.124797493172762, "learning_rate": 8.956560680698613e-06, "loss": 0.2452, "step": 22000 }, { "epoch": 0.4480407124681934, "grad_norm": 11.846873270534024, "learning_rate": 8.960631844644384e-06, "loss": 0.2288, "step": 22010 }, { "epoch": 0.4482442748091603, "grad_norm": 11.936784687212738, "learning_rate": 8.964703008590156e-06, "loss": 0.3839, "step": 22020 }, { "epoch": 0.4484478371501272, "grad_norm": 10.732923766376329, "learning_rate": 8.968774172535929e-06, "loss": 0.3876, "step": 22030 }, { "epoch": 0.44865139949109417, "grad_norm": 57.99505723888292, "learning_rate": 8.9728453364817e-06, "loss": 0.326, "step": 22040 }, { "epoch": 0.4488549618320611, "grad_norm": 25.698868377458503, "learning_rate": 8.976916500427473e-06, "loss": 0.3239, "step": 22050 }, { "epoch": 0.449058524173028, "grad_norm": 39.18998121186888, "learning_rate": 8.980987664373245e-06, "loss": 0.3256, "step": 22060 }, { "epoch": 0.4492620865139949, "grad_norm": 10.999732296914527, "learning_rate": 8.985058828319018e-06, "loss": 0.2898, "step": 22070 }, { "epoch": 0.4494656488549618, "grad_norm": 9.926882958886637, "learning_rate": 8.989129992264789e-06, "loss": 0.2286, "step": 22080 }, { "epoch": 0.44966921119592873, "grad_norm": 21.213987012182507, "learning_rate": 8.993201156210561e-06, "loss": 0.3106, "step": 22090 }, { "epoch": 0.4498727735368957, "grad_norm": 8.474085213993126, "learning_rate": 8.997272320156334e-06, "loss": 0.3519, "step": 22100 }, { "epoch": 0.4500763358778626, "grad_norm": 10.657923684564231, "learning_rate": 9.001343484102105e-06, "loss": 0.2262, "step": 22110 }, { "epoch": 0.4502798982188295, "grad_norm": 9.661956563310039, "learning_rate": 9.005414648047878e-06, "loss": 0.3369, "step": 22120 }, { "epoch": 0.45048346055979643, "grad_norm": 14.291420708836174, "learning_rate": 9.00948581199365e-06, "loss": 0.3491, "step": 22130 }, { "epoch": 0.45068702290076335, "grad_norm": 10.247982052597179, "learning_rate": 9.013556975939421e-06, "loss": 0.2947, "step": 22140 }, { "epoch": 0.45089058524173026, "grad_norm": 9.040823892297981, "learning_rate": 9.017628139885194e-06, "loss": 0.3408, "step": 22150 }, { "epoch": 0.4510941475826972, "grad_norm": 9.150824386926077, "learning_rate": 9.021699303830966e-06, "loss": 0.3053, "step": 22160 }, { "epoch": 0.45129770992366414, "grad_norm": 9.03877455751849, "learning_rate": 9.025770467776737e-06, "loss": 0.3558, "step": 22170 }, { "epoch": 0.45150127226463105, "grad_norm": 4.904289384194198, "learning_rate": 9.02984163172251e-06, "loss": 0.2508, "step": 22180 }, { "epoch": 0.45170483460559796, "grad_norm": 10.235828638560163, "learning_rate": 9.033912795668283e-06, "loss": 0.3623, "step": 22190 }, { "epoch": 0.45190839694656487, "grad_norm": 12.079857123861276, "learning_rate": 9.037983959614054e-06, "loss": 0.3186, "step": 22200 }, { "epoch": 0.4521119592875318, "grad_norm": 3.7809398338997644, "learning_rate": 9.042055123559826e-06, "loss": 0.2865, "step": 22210 }, { "epoch": 0.45231552162849875, "grad_norm": 8.54851922318094, "learning_rate": 9.046126287505599e-06, "loss": 0.234, "step": 22220 }, { "epoch": 0.45251908396946566, "grad_norm": 11.467887363237555, "learning_rate": 9.05019745145137e-06, "loss": 0.3245, "step": 22230 }, { "epoch": 0.4527226463104326, "grad_norm": 4.024253178499568, "learning_rate": 9.054268615397142e-06, "loss": 0.1497, "step": 22240 }, { "epoch": 0.4529262086513995, "grad_norm": 5.558814656320289, "learning_rate": 9.058339779342915e-06, "loss": 0.2822, "step": 22250 }, { "epoch": 0.4531297709923664, "grad_norm": 9.95523109749595, "learning_rate": 9.062410943288686e-06, "loss": 0.3995, "step": 22260 }, { "epoch": 0.4533333333333333, "grad_norm": 12.54687561413772, "learning_rate": 9.066482107234459e-06, "loss": 0.4176, "step": 22270 }, { "epoch": 0.4535368956743003, "grad_norm": 10.197796054682318, "learning_rate": 9.070553271180231e-06, "loss": 0.2839, "step": 22280 }, { "epoch": 0.4537404580152672, "grad_norm": 9.374662283519536, "learning_rate": 9.074624435126004e-06, "loss": 0.2445, "step": 22290 }, { "epoch": 0.4539440203562341, "grad_norm": 22.240601060694704, "learning_rate": 9.078695599071775e-06, "loss": 0.2575, "step": 22300 }, { "epoch": 0.454147582697201, "grad_norm": 16.33931967416285, "learning_rate": 9.082766763017547e-06, "loss": 0.3442, "step": 22310 }, { "epoch": 0.4543511450381679, "grad_norm": 9.375579867186346, "learning_rate": 9.08683792696332e-06, "loss": 0.3246, "step": 22320 }, { "epoch": 0.45455470737913484, "grad_norm": 6.235033672722698, "learning_rate": 9.090909090909091e-06, "loss": 0.3118, "step": 22330 }, { "epoch": 0.4547582697201018, "grad_norm": 7.031230349606831, "learning_rate": 9.094980254854864e-06, "loss": 0.2659, "step": 22340 }, { "epoch": 0.4549618320610687, "grad_norm": 3.195908673920082, "learning_rate": 9.099051418800636e-06, "loss": 0.3082, "step": 22350 }, { "epoch": 0.45516539440203563, "grad_norm": 12.793285239352151, "learning_rate": 9.103122582746407e-06, "loss": 0.2789, "step": 22360 }, { "epoch": 0.45536895674300254, "grad_norm": 7.452174993317703, "learning_rate": 9.10719374669218e-06, "loss": 0.295, "step": 22370 }, { "epoch": 0.45557251908396945, "grad_norm": 5.604414022820796, "learning_rate": 9.111264910637952e-06, "loss": 0.372, "step": 22380 }, { "epoch": 0.45577608142493636, "grad_norm": 16.477445458584366, "learning_rate": 9.115336074583723e-06, "loss": 0.2967, "step": 22390 }, { "epoch": 0.45597964376590333, "grad_norm": 4.652786870390715, "learning_rate": 9.119407238529496e-06, "loss": 0.3289, "step": 22400 }, { "epoch": 0.45618320610687024, "grad_norm": 3.233075391473242, "learning_rate": 9.123478402475269e-06, "loss": 0.2658, "step": 22410 }, { "epoch": 0.45638676844783715, "grad_norm": 11.718468506508248, "learning_rate": 9.12754956642104e-06, "loss": 0.2883, "step": 22420 }, { "epoch": 0.45659033078880407, "grad_norm": 8.34309517114579, "learning_rate": 9.131620730366812e-06, "loss": 0.3329, "step": 22430 }, { "epoch": 0.456793893129771, "grad_norm": 9.523654198320523, "learning_rate": 9.135691894312585e-06, "loss": 0.2867, "step": 22440 }, { "epoch": 0.4569974554707379, "grad_norm": 18.41481443131134, "learning_rate": 9.139763058258356e-06, "loss": 0.4014, "step": 22450 }, { "epoch": 0.45720101781170486, "grad_norm": 18.79430335278427, "learning_rate": 9.143834222204128e-06, "loss": 0.3211, "step": 22460 }, { "epoch": 0.45740458015267177, "grad_norm": 6.4176892937864904, "learning_rate": 9.147905386149901e-06, "loss": 0.3398, "step": 22470 }, { "epoch": 0.4576081424936387, "grad_norm": 27.37112565241254, "learning_rate": 9.151976550095674e-06, "loss": 0.3428, "step": 22480 }, { "epoch": 0.4578117048346056, "grad_norm": 4.8917586168173255, "learning_rate": 9.156047714041445e-06, "loss": 0.2803, "step": 22490 }, { "epoch": 0.4580152671755725, "grad_norm": 16.875383008856964, "learning_rate": 9.160118877987217e-06, "loss": 0.2954, "step": 22500 }, { "epoch": 0.4582188295165394, "grad_norm": 28.216153166957742, "learning_rate": 9.16419004193299e-06, "loss": 0.3228, "step": 22510 }, { "epoch": 0.4584223918575064, "grad_norm": 8.402239400689217, "learning_rate": 9.16826120587876e-06, "loss": 0.2674, "step": 22520 }, { "epoch": 0.4586259541984733, "grad_norm": 9.09556749317849, "learning_rate": 9.172332369824533e-06, "loss": 0.3072, "step": 22530 }, { "epoch": 0.4588295165394402, "grad_norm": 7.534678167206316, "learning_rate": 9.176403533770306e-06, "loss": 0.2902, "step": 22540 }, { "epoch": 0.4590330788804071, "grad_norm": 15.714785006618357, "learning_rate": 9.180474697716077e-06, "loss": 0.2672, "step": 22550 }, { "epoch": 0.45923664122137403, "grad_norm": 54.50100394271528, "learning_rate": 9.18454586166185e-06, "loss": 0.241, "step": 22560 }, { "epoch": 0.45944020356234094, "grad_norm": 6.245608923897279, "learning_rate": 9.188617025607622e-06, "loss": 0.386, "step": 22570 }, { "epoch": 0.4596437659033079, "grad_norm": 5.7529018439650805, "learning_rate": 9.192688189553393e-06, "loss": 0.2174, "step": 22580 }, { "epoch": 0.4598473282442748, "grad_norm": 5.918486284946158, "learning_rate": 9.196759353499166e-06, "loss": 0.2657, "step": 22590 }, { "epoch": 0.46005089058524173, "grad_norm": 9.806859831463473, "learning_rate": 9.200830517444938e-06, "loss": 0.2981, "step": 22600 }, { "epoch": 0.46025445292620865, "grad_norm": 13.26238969006261, "learning_rate": 9.20490168139071e-06, "loss": 0.2785, "step": 22610 }, { "epoch": 0.46045801526717556, "grad_norm": 21.09932662138466, "learning_rate": 9.208972845336482e-06, "loss": 0.3139, "step": 22620 }, { "epoch": 0.46066157760814247, "grad_norm": 10.641301217661965, "learning_rate": 9.213044009282255e-06, "loss": 0.2936, "step": 22630 }, { "epoch": 0.46086513994910944, "grad_norm": 5.04645484928766, "learning_rate": 9.217115173228026e-06, "loss": 0.3435, "step": 22640 }, { "epoch": 0.46106870229007635, "grad_norm": 25.726027088018938, "learning_rate": 9.221186337173798e-06, "loss": 0.3107, "step": 22650 }, { "epoch": 0.46127226463104326, "grad_norm": 14.669728609139346, "learning_rate": 9.225257501119571e-06, "loss": 0.3537, "step": 22660 }, { "epoch": 0.4614758269720102, "grad_norm": 6.489588678550387, "learning_rate": 9.229328665065342e-06, "loss": 0.3702, "step": 22670 }, { "epoch": 0.4616793893129771, "grad_norm": 9.032286648002199, "learning_rate": 9.233399829011114e-06, "loss": 0.3719, "step": 22680 }, { "epoch": 0.461882951653944, "grad_norm": 8.470059023234347, "learning_rate": 9.237470992956887e-06, "loss": 0.3476, "step": 22690 }, { "epoch": 0.46208651399491096, "grad_norm": 18.38174266483505, "learning_rate": 9.24154215690266e-06, "loss": 0.2639, "step": 22700 }, { "epoch": 0.4622900763358779, "grad_norm": 8.359394490935687, "learning_rate": 9.24561332084843e-06, "loss": 0.3449, "step": 22710 }, { "epoch": 0.4624936386768448, "grad_norm": 2.5088625664297353, "learning_rate": 9.249684484794203e-06, "loss": 0.3743, "step": 22720 }, { "epoch": 0.4626972010178117, "grad_norm": 7.629891576524426, "learning_rate": 9.253755648739976e-06, "loss": 0.3217, "step": 22730 }, { "epoch": 0.4629007633587786, "grad_norm": 16.097153504623492, "learning_rate": 9.257826812685747e-06, "loss": 0.3551, "step": 22740 }, { "epoch": 0.4631043256997455, "grad_norm": 2.807750275319827, "learning_rate": 9.26189797663152e-06, "loss": 0.2819, "step": 22750 }, { "epoch": 0.4633078880407125, "grad_norm": 9.301737630946098, "learning_rate": 9.265969140577292e-06, "loss": 0.3542, "step": 22760 }, { "epoch": 0.4635114503816794, "grad_norm": 17.196482724792013, "learning_rate": 9.270040304523063e-06, "loss": 0.2747, "step": 22770 }, { "epoch": 0.4637150127226463, "grad_norm": 14.945622556096476, "learning_rate": 9.274111468468836e-06, "loss": 0.2998, "step": 22780 }, { "epoch": 0.4639185750636132, "grad_norm": 14.739464888569017, "learning_rate": 9.278182632414608e-06, "loss": 0.3297, "step": 22790 }, { "epoch": 0.46412213740458014, "grad_norm": 34.99002855257928, "learning_rate": 9.28225379636038e-06, "loss": 0.2467, "step": 22800 }, { "epoch": 0.46432569974554705, "grad_norm": 10.655407974457495, "learning_rate": 9.286324960306152e-06, "loss": 0.3358, "step": 22810 }, { "epoch": 0.464529262086514, "grad_norm": 3.74447657889122, "learning_rate": 9.290396124251924e-06, "loss": 0.2253, "step": 22820 }, { "epoch": 0.46473282442748093, "grad_norm": 4.118964424549907, "learning_rate": 9.294467288197695e-06, "loss": 0.1823, "step": 22830 }, { "epoch": 0.46493638676844784, "grad_norm": 5.046136199823253, "learning_rate": 9.298538452143468e-06, "loss": 0.3297, "step": 22840 }, { "epoch": 0.46513994910941475, "grad_norm": 4.742503804151099, "learning_rate": 9.30260961608924e-06, "loss": 0.3151, "step": 22850 }, { "epoch": 0.46534351145038166, "grad_norm": 17.032458639574916, "learning_rate": 9.306680780035012e-06, "loss": 0.354, "step": 22860 }, { "epoch": 0.4655470737913486, "grad_norm": 13.993236053453721, "learning_rate": 9.310751943980784e-06, "loss": 0.3387, "step": 22870 }, { "epoch": 0.46575063613231554, "grad_norm": 7.561025484609954, "learning_rate": 9.314823107926557e-06, "loss": 0.2121, "step": 22880 }, { "epoch": 0.46595419847328245, "grad_norm": 2.694203579153757, "learning_rate": 9.318894271872328e-06, "loss": 0.2866, "step": 22890 }, { "epoch": 0.46615776081424937, "grad_norm": 13.020937006751696, "learning_rate": 9.3229654358181e-06, "loss": 0.3173, "step": 22900 }, { "epoch": 0.4663613231552163, "grad_norm": 8.478199574695552, "learning_rate": 9.327036599763873e-06, "loss": 0.2164, "step": 22910 }, { "epoch": 0.4665648854961832, "grad_norm": 9.476954777500982, "learning_rate": 9.331107763709646e-06, "loss": 0.3539, "step": 22920 }, { "epoch": 0.4667684478371501, "grad_norm": 9.746523422952952, "learning_rate": 9.335178927655417e-06, "loss": 0.354, "step": 22930 }, { "epoch": 0.46697201017811707, "grad_norm": 7.482757218546934, "learning_rate": 9.33925009160119e-06, "loss": 0.2659, "step": 22940 }, { "epoch": 0.467175572519084, "grad_norm": 12.730381262031711, "learning_rate": 9.343321255546962e-06, "loss": 0.2563, "step": 22950 }, { "epoch": 0.4673791348600509, "grad_norm": 9.287164405738102, "learning_rate": 9.347392419492733e-06, "loss": 0.3788, "step": 22960 }, { "epoch": 0.4675826972010178, "grad_norm": 1.6728032997308977, "learning_rate": 9.351463583438505e-06, "loss": 0.2866, "step": 22970 }, { "epoch": 0.4677862595419847, "grad_norm": 8.678067723899872, "learning_rate": 9.355534747384278e-06, "loss": 0.3139, "step": 22980 }, { "epoch": 0.46798982188295163, "grad_norm": 15.164532850443544, "learning_rate": 9.359605911330049e-06, "loss": 0.2758, "step": 22990 }, { "epoch": 0.4681933842239186, "grad_norm": 6.12757346538323, "learning_rate": 9.363677075275822e-06, "loss": 0.2947, "step": 23000 }, { "epoch": 0.4683969465648855, "grad_norm": 15.538297170895428, "learning_rate": 9.367748239221594e-06, "loss": 0.3192, "step": 23010 }, { "epoch": 0.4686005089058524, "grad_norm": 7.064239758157333, "learning_rate": 9.371819403167365e-06, "loss": 0.2786, "step": 23020 }, { "epoch": 0.46880407124681933, "grad_norm": 18.936204941285606, "learning_rate": 9.375890567113138e-06, "loss": 0.3503, "step": 23030 }, { "epoch": 0.46900763358778624, "grad_norm": 12.852313977411573, "learning_rate": 9.37996173105891e-06, "loss": 0.4252, "step": 23040 }, { "epoch": 0.46921119592875316, "grad_norm": 6.314721622131349, "learning_rate": 9.384032895004681e-06, "loss": 0.3717, "step": 23050 }, { "epoch": 0.4694147582697201, "grad_norm": 12.530137803887179, "learning_rate": 9.388104058950454e-06, "loss": 0.3264, "step": 23060 }, { "epoch": 0.46961832061068703, "grad_norm": 15.491785012022513, "learning_rate": 9.392175222896227e-06, "loss": 0.2896, "step": 23070 }, { "epoch": 0.46982188295165395, "grad_norm": 5.4367959327347135, "learning_rate": 9.396246386841998e-06, "loss": 0.2944, "step": 23080 }, { "epoch": 0.47002544529262086, "grad_norm": 17.8676671572025, "learning_rate": 9.40031755078777e-06, "loss": 0.3432, "step": 23090 }, { "epoch": 0.47022900763358777, "grad_norm": 10.589092749596118, "learning_rate": 9.404388714733543e-06, "loss": 0.3137, "step": 23100 }, { "epoch": 0.4704325699745547, "grad_norm": 13.454239855658448, "learning_rate": 9.408459878679315e-06, "loss": 0.3628, "step": 23110 }, { "epoch": 0.47063613231552165, "grad_norm": 12.038186552848744, "learning_rate": 9.412531042625086e-06, "loss": 0.2033, "step": 23120 }, { "epoch": 0.47083969465648856, "grad_norm": 5.2217059161290225, "learning_rate": 9.416602206570859e-06, "loss": 0.3308, "step": 23130 }, { "epoch": 0.4710432569974555, "grad_norm": 15.537343270205112, "learning_rate": 9.420673370516632e-06, "loss": 0.318, "step": 23140 }, { "epoch": 0.4712468193384224, "grad_norm": 1.2479997018091509, "learning_rate": 9.424744534462403e-06, "loss": 0.2412, "step": 23150 }, { "epoch": 0.4714503816793893, "grad_norm": 5.79819599765217, "learning_rate": 9.428815698408175e-06, "loss": 0.2755, "step": 23160 }, { "epoch": 0.4716539440203562, "grad_norm": 21.65697600582194, "learning_rate": 9.432886862353948e-06, "loss": 0.3054, "step": 23170 }, { "epoch": 0.4718575063613232, "grad_norm": 4.8663566645623275, "learning_rate": 9.436958026299719e-06, "loss": 0.2673, "step": 23180 }, { "epoch": 0.4720610687022901, "grad_norm": 7.43372436957221, "learning_rate": 9.441029190245491e-06, "loss": 0.3043, "step": 23190 }, { "epoch": 0.472264631043257, "grad_norm": 14.389361850961292, "learning_rate": 9.445100354191264e-06, "loss": 0.22, "step": 23200 }, { "epoch": 0.4724681933842239, "grad_norm": 10.75996200735265, "learning_rate": 9.449171518137035e-06, "loss": 0.3272, "step": 23210 }, { "epoch": 0.4726717557251908, "grad_norm": 10.806484977093787, "learning_rate": 9.453242682082808e-06, "loss": 0.3723, "step": 23220 }, { "epoch": 0.47287531806615773, "grad_norm": 6.442764915203338, "learning_rate": 9.45731384602858e-06, "loss": 0.3067, "step": 23230 }, { "epoch": 0.4730788804071247, "grad_norm": 10.714543579799884, "learning_rate": 9.461385009974353e-06, "loss": 0.2959, "step": 23240 }, { "epoch": 0.4732824427480916, "grad_norm": 8.122294861432596, "learning_rate": 9.465456173920124e-06, "loss": 0.3187, "step": 23250 }, { "epoch": 0.4734860050890585, "grad_norm": 5.080049532188632, "learning_rate": 9.469527337865896e-06, "loss": 0.3195, "step": 23260 }, { "epoch": 0.47368956743002544, "grad_norm": 7.325641034628724, "learning_rate": 9.473598501811669e-06, "loss": 0.2059, "step": 23270 }, { "epoch": 0.47389312977099235, "grad_norm": 11.858892889338607, "learning_rate": 9.47766966575744e-06, "loss": 0.2267, "step": 23280 }, { "epoch": 0.47409669211195926, "grad_norm": 9.149457515542059, "learning_rate": 9.481740829703213e-06, "loss": 0.2472, "step": 23290 }, { "epoch": 0.47430025445292623, "grad_norm": 8.780815138046647, "learning_rate": 9.485811993648985e-06, "loss": 0.2978, "step": 23300 }, { "epoch": 0.47450381679389314, "grad_norm": 23.923040831835507, "learning_rate": 9.489883157594756e-06, "loss": 0.3583, "step": 23310 }, { "epoch": 0.47470737913486005, "grad_norm": 3.4226287169627776, "learning_rate": 9.493954321540529e-06, "loss": 0.3585, "step": 23320 }, { "epoch": 0.47491094147582696, "grad_norm": 6.093369130721073, "learning_rate": 9.498025485486301e-06, "loss": 0.2967, "step": 23330 }, { "epoch": 0.4751145038167939, "grad_norm": 5.304663848984446, "learning_rate": 9.502096649432072e-06, "loss": 0.3399, "step": 23340 }, { "epoch": 0.4753180661577608, "grad_norm": 0.14512536092671544, "learning_rate": 9.506167813377845e-06, "loss": 0.3611, "step": 23350 }, { "epoch": 0.47552162849872776, "grad_norm": 7.71121675517594, "learning_rate": 9.510238977323618e-06, "loss": 0.2617, "step": 23360 }, { "epoch": 0.47572519083969467, "grad_norm": 10.461316138025461, "learning_rate": 9.514310141269389e-06, "loss": 0.3248, "step": 23370 }, { "epoch": 0.4759287531806616, "grad_norm": 15.16275132576852, "learning_rate": 9.518381305215161e-06, "loss": 0.3066, "step": 23380 }, { "epoch": 0.4761323155216285, "grad_norm": 6.930599478229839, "learning_rate": 9.522452469160934e-06, "loss": 0.2259, "step": 23390 }, { "epoch": 0.4763358778625954, "grad_norm": 8.901412600501658, "learning_rate": 9.526523633106707e-06, "loss": 0.2881, "step": 23400 }, { "epoch": 0.4765394402035623, "grad_norm": 7.381561070974167, "learning_rate": 9.530594797052477e-06, "loss": 0.3181, "step": 23410 }, { "epoch": 0.4767430025445293, "grad_norm": 7.452253712587121, "learning_rate": 9.53466596099825e-06, "loss": 0.3243, "step": 23420 }, { "epoch": 0.4769465648854962, "grad_norm": 10.050194080912055, "learning_rate": 9.538737124944023e-06, "loss": 0.4057, "step": 23430 }, { "epoch": 0.4771501272264631, "grad_norm": 4.110624193075769, "learning_rate": 9.542808288889794e-06, "loss": 0.259, "step": 23440 }, { "epoch": 0.47735368956743, "grad_norm": 21.678115177735297, "learning_rate": 9.546879452835566e-06, "loss": 0.3368, "step": 23450 }, { "epoch": 0.47755725190839693, "grad_norm": 5.5339634042727415, "learning_rate": 9.550950616781339e-06, "loss": 0.2599, "step": 23460 }, { "epoch": 0.47776081424936384, "grad_norm": 8.387780641952919, "learning_rate": 9.55502178072711e-06, "loss": 0.355, "step": 23470 }, { "epoch": 0.4779643765903308, "grad_norm": 12.602028373206338, "learning_rate": 9.559092944672882e-06, "loss": 0.3409, "step": 23480 }, { "epoch": 0.4781679389312977, "grad_norm": 3.5712662447273846, "learning_rate": 9.563164108618655e-06, "loss": 0.1982, "step": 23490 }, { "epoch": 0.47837150127226463, "grad_norm": 8.74192450688208, "learning_rate": 9.567235272564426e-06, "loss": 0.3041, "step": 23500 }, { "epoch": 0.47857506361323154, "grad_norm": 9.691503225067212, "learning_rate": 9.571306436510199e-06, "loss": 0.2867, "step": 23510 }, { "epoch": 0.47877862595419846, "grad_norm": 21.005359593871244, "learning_rate": 9.575377600455971e-06, "loss": 0.3164, "step": 23520 }, { "epoch": 0.47898218829516537, "grad_norm": 7.048574557328933, "learning_rate": 9.579448764401744e-06, "loss": 0.3309, "step": 23530 }, { "epoch": 0.47918575063613233, "grad_norm": 7.320044427376094, "learning_rate": 9.583519928347515e-06, "loss": 0.3961, "step": 23540 }, { "epoch": 0.47938931297709925, "grad_norm": 5.426404193387773, "learning_rate": 9.587591092293287e-06, "loss": 0.3429, "step": 23550 }, { "epoch": 0.47959287531806616, "grad_norm": 4.609125511157022, "learning_rate": 9.59166225623906e-06, "loss": 0.3084, "step": 23560 }, { "epoch": 0.47979643765903307, "grad_norm": 4.963529074013996, "learning_rate": 9.595733420184831e-06, "loss": 0.2873, "step": 23570 }, { "epoch": 0.48, "grad_norm": 6.827900596315227, "learning_rate": 9.599804584130604e-06, "loss": 0.3718, "step": 23580 }, { "epoch": 0.4802035623409669, "grad_norm": 12.816561793431427, "learning_rate": 9.603875748076376e-06, "loss": 0.3721, "step": 23590 }, { "epoch": 0.48040712468193386, "grad_norm": 10.042560942098401, "learning_rate": 9.607946912022147e-06, "loss": 0.3679, "step": 23600 }, { "epoch": 0.4806106870229008, "grad_norm": 12.399633674935155, "learning_rate": 9.61201807596792e-06, "loss": 0.3084, "step": 23610 }, { "epoch": 0.4808142493638677, "grad_norm": 10.745564582865333, "learning_rate": 9.616089239913692e-06, "loss": 0.2057, "step": 23620 }, { "epoch": 0.4810178117048346, "grad_norm": 11.658669863865539, "learning_rate": 9.620160403859463e-06, "loss": 0.2628, "step": 23630 }, { "epoch": 0.4812213740458015, "grad_norm": 16.44158525066778, "learning_rate": 9.624231567805236e-06, "loss": 0.327, "step": 23640 }, { "epoch": 0.4814249363867684, "grad_norm": 8.820150194470852, "learning_rate": 9.628302731751009e-06, "loss": 0.2863, "step": 23650 }, { "epoch": 0.4816284987277354, "grad_norm": 9.837380375397784, "learning_rate": 9.632373895696781e-06, "loss": 0.3972, "step": 23660 }, { "epoch": 0.4818320610687023, "grad_norm": 14.018486144346607, "learning_rate": 9.636445059642552e-06, "loss": 0.265, "step": 23670 }, { "epoch": 0.4820356234096692, "grad_norm": 5.208490522787397, "learning_rate": 9.640516223588325e-06, "loss": 0.37, "step": 23680 }, { "epoch": 0.4822391857506361, "grad_norm": 3.0518426989441196, "learning_rate": 9.644587387534098e-06, "loss": 0.3556, "step": 23690 }, { "epoch": 0.48244274809160304, "grad_norm": 8.675209152025753, "learning_rate": 9.648658551479868e-06, "loss": 0.2923, "step": 23700 }, { "epoch": 0.48264631043256995, "grad_norm": 21.383844114868413, "learning_rate": 9.652729715425641e-06, "loss": 0.3033, "step": 23710 }, { "epoch": 0.4828498727735369, "grad_norm": 16.177495800116475, "learning_rate": 9.656800879371414e-06, "loss": 0.2785, "step": 23720 }, { "epoch": 0.4830534351145038, "grad_norm": 7.632972150443165, "learning_rate": 9.660872043317185e-06, "loss": 0.4163, "step": 23730 }, { "epoch": 0.48325699745547074, "grad_norm": 10.465093407624487, "learning_rate": 9.664943207262957e-06, "loss": 0.3155, "step": 23740 }, { "epoch": 0.48346055979643765, "grad_norm": 11.712562055923456, "learning_rate": 9.66901437120873e-06, "loss": 0.3444, "step": 23750 }, { "epoch": 0.48366412213740456, "grad_norm": 5.699513629519725, "learning_rate": 9.6730855351545e-06, "loss": 0.4356, "step": 23760 }, { "epoch": 0.4838676844783715, "grad_norm": 10.750907366071162, "learning_rate": 9.677156699100273e-06, "loss": 0.3717, "step": 23770 }, { "epoch": 0.48407124681933844, "grad_norm": 8.796675604984527, "learning_rate": 9.681227863046046e-06, "loss": 0.377, "step": 23780 }, { "epoch": 0.48427480916030535, "grad_norm": 11.027614781222075, "learning_rate": 9.685299026991819e-06, "loss": 0.3521, "step": 23790 }, { "epoch": 0.48447837150127226, "grad_norm": 10.613224568505185, "learning_rate": 9.68937019093759e-06, "loss": 0.3335, "step": 23800 }, { "epoch": 0.4846819338422392, "grad_norm": 9.158092108679115, "learning_rate": 9.693441354883362e-06, "loss": 0.4283, "step": 23810 }, { "epoch": 0.4848854961832061, "grad_norm": 6.682825958257876, "learning_rate": 9.697512518829135e-06, "loss": 0.3532, "step": 23820 }, { "epoch": 0.48508905852417306, "grad_norm": 13.031698307549478, "learning_rate": 9.701583682774906e-06, "loss": 0.2885, "step": 23830 }, { "epoch": 0.48529262086513997, "grad_norm": 7.292092791967358, "learning_rate": 9.705654846720678e-06, "loss": 0.2583, "step": 23840 }, { "epoch": 0.4854961832061069, "grad_norm": 13.848051369259975, "learning_rate": 9.709726010666451e-06, "loss": 0.3177, "step": 23850 }, { "epoch": 0.4856997455470738, "grad_norm": 11.612704215880104, "learning_rate": 9.713797174612222e-06, "loss": 0.3003, "step": 23860 }, { "epoch": 0.4859033078880407, "grad_norm": 6.171649128915216, "learning_rate": 9.717868338557995e-06, "loss": 0.3675, "step": 23870 }, { "epoch": 0.4861068702290076, "grad_norm": 10.541486905739491, "learning_rate": 9.721939502503767e-06, "loss": 0.3156, "step": 23880 }, { "epoch": 0.4863104325699746, "grad_norm": 11.058882704179878, "learning_rate": 9.726010666449538e-06, "loss": 0.34, "step": 23890 }, { "epoch": 0.4865139949109415, "grad_norm": 15.631239440819913, "learning_rate": 9.730081830395311e-06, "loss": 0.4223, "step": 23900 }, { "epoch": 0.4867175572519084, "grad_norm": 86.59137445911055, "learning_rate": 9.734152994341084e-06, "loss": 0.4098, "step": 23910 }, { "epoch": 0.4869211195928753, "grad_norm": 19.111846060680236, "learning_rate": 9.738224158286856e-06, "loss": 0.4437, "step": 23920 }, { "epoch": 0.48712468193384223, "grad_norm": 5.197880216711475, "learning_rate": 9.742295322232627e-06, "loss": 0.4174, "step": 23930 }, { "epoch": 0.48732824427480914, "grad_norm": 11.297657550129056, "learning_rate": 9.7463664861784e-06, "loss": 0.3229, "step": 23940 }, { "epoch": 0.4875318066157761, "grad_norm": 10.824327598107226, "learning_rate": 9.750437650124172e-06, "loss": 0.3365, "step": 23950 }, { "epoch": 0.487735368956743, "grad_norm": 9.532990919101255, "learning_rate": 9.754508814069943e-06, "loss": 0.2969, "step": 23960 }, { "epoch": 0.48793893129770993, "grad_norm": 11.158310755539796, "learning_rate": 9.758579978015716e-06, "loss": 0.4022, "step": 23970 }, { "epoch": 0.48814249363867684, "grad_norm": 10.936448639505047, "learning_rate": 9.762651141961489e-06, "loss": 0.3004, "step": 23980 }, { "epoch": 0.48834605597964376, "grad_norm": 9.10044996011828, "learning_rate": 9.76672230590726e-06, "loss": 0.367, "step": 23990 }, { "epoch": 0.48854961832061067, "grad_norm": 5.9867896895529515, "learning_rate": 9.770793469853032e-06, "loss": 0.3046, "step": 24000 }, { "epoch": 0.48875318066157764, "grad_norm": 13.89406284735113, "learning_rate": 9.774864633798805e-06, "loss": 0.3354, "step": 24010 }, { "epoch": 0.48895674300254455, "grad_norm": 36.447353822410584, "learning_rate": 9.778935797744576e-06, "loss": 0.316, "step": 24020 }, { "epoch": 0.48916030534351146, "grad_norm": 7.475459080467179, "learning_rate": 9.783006961690348e-06, "loss": 0.3158, "step": 24030 }, { "epoch": 0.48936386768447837, "grad_norm": 13.056133377172944, "learning_rate": 9.787078125636121e-06, "loss": 0.3015, "step": 24040 }, { "epoch": 0.4895674300254453, "grad_norm": 17.969784067823642, "learning_rate": 9.791149289581894e-06, "loss": 0.302, "step": 24050 }, { "epoch": 0.4897709923664122, "grad_norm": 7.7015308494050165, "learning_rate": 9.795220453527664e-06, "loss": 0.4065, "step": 24060 }, { "epoch": 0.48997455470737916, "grad_norm": 9.683364977304848, "learning_rate": 9.799291617473437e-06, "loss": 0.2901, "step": 24070 }, { "epoch": 0.4901781170483461, "grad_norm": 10.7221972776903, "learning_rate": 9.80336278141921e-06, "loss": 0.3809, "step": 24080 }, { "epoch": 0.490381679389313, "grad_norm": 3.3153739916174523, "learning_rate": 9.80743394536498e-06, "loss": 0.3242, "step": 24090 }, { "epoch": 0.4905852417302799, "grad_norm": 4.276639001834302, "learning_rate": 9.811505109310753e-06, "loss": 0.2381, "step": 24100 }, { "epoch": 0.4907888040712468, "grad_norm": 10.350165988222349, "learning_rate": 9.815576273256526e-06, "loss": 0.3987, "step": 24110 }, { "epoch": 0.4909923664122137, "grad_norm": 14.777068568625005, "learning_rate": 9.819647437202297e-06, "loss": 0.2563, "step": 24120 }, { "epoch": 0.4911959287531807, "grad_norm": 13.091816699085271, "learning_rate": 9.82371860114807e-06, "loss": 0.3644, "step": 24130 }, { "epoch": 0.4913994910941476, "grad_norm": 7.26156181018163, "learning_rate": 9.827789765093842e-06, "loss": 0.3397, "step": 24140 }, { "epoch": 0.4916030534351145, "grad_norm": 10.946204831017843, "learning_rate": 9.831860929039613e-06, "loss": 0.3015, "step": 24150 }, { "epoch": 0.4918066157760814, "grad_norm": 5.758163494191654, "learning_rate": 9.835932092985386e-06, "loss": 0.3163, "step": 24160 }, { "epoch": 0.49201017811704834, "grad_norm": 8.860076223262421, "learning_rate": 9.840003256931158e-06, "loss": 0.3236, "step": 24170 }, { "epoch": 0.49221374045801525, "grad_norm": 7.4349317161214765, "learning_rate": 9.84407442087693e-06, "loss": 0.3098, "step": 24180 }, { "epoch": 0.4924173027989822, "grad_norm": 6.9610060491858725, "learning_rate": 9.848145584822702e-06, "loss": 0.2828, "step": 24190 }, { "epoch": 0.4926208651399491, "grad_norm": 9.087142108001425, "learning_rate": 9.852216748768475e-06, "loss": 0.3393, "step": 24200 }, { "epoch": 0.49282442748091604, "grad_norm": 6.697751592710569, "learning_rate": 9.856287912714247e-06, "loss": 0.2877, "step": 24210 }, { "epoch": 0.49302798982188295, "grad_norm": 8.855685059159994, "learning_rate": 9.860359076660018e-06, "loss": 0.3458, "step": 24220 }, { "epoch": 0.49323155216284986, "grad_norm": 9.38776932731301, "learning_rate": 9.86443024060579e-06, "loss": 0.2946, "step": 24230 }, { "epoch": 0.4934351145038168, "grad_norm": 5.152115874776855, "learning_rate": 9.868501404551563e-06, "loss": 0.2702, "step": 24240 }, { "epoch": 0.49363867684478374, "grad_norm": 11.16628704858639, "learning_rate": 9.872572568497334e-06, "loss": 0.3079, "step": 24250 }, { "epoch": 0.49384223918575065, "grad_norm": 8.35709656331712, "learning_rate": 9.876643732443107e-06, "loss": 0.3235, "step": 24260 }, { "epoch": 0.49404580152671757, "grad_norm": 7.236019180582064, "learning_rate": 9.88071489638888e-06, "loss": 0.2982, "step": 24270 }, { "epoch": 0.4942493638676845, "grad_norm": 18.689698154968823, "learning_rate": 9.88478606033465e-06, "loss": 0.3986, "step": 24280 }, { "epoch": 0.4944529262086514, "grad_norm": 23.260104842380095, "learning_rate": 9.888857224280423e-06, "loss": 0.3058, "step": 24290 }, { "epoch": 0.4946564885496183, "grad_norm": 7.252078604636102, "learning_rate": 9.892928388226196e-06, "loss": 0.4435, "step": 24300 }, { "epoch": 0.49486005089058527, "grad_norm": 8.177977748941348, "learning_rate": 9.896999552171967e-06, "loss": 0.2403, "step": 24310 }, { "epoch": 0.4950636132315522, "grad_norm": 11.48452733587121, "learning_rate": 9.90107071611774e-06, "loss": 0.2459, "step": 24320 }, { "epoch": 0.4952671755725191, "grad_norm": 18.7860148709528, "learning_rate": 9.905141880063512e-06, "loss": 0.4842, "step": 24330 }, { "epoch": 0.495470737913486, "grad_norm": 9.446733751231948, "learning_rate": 9.909213044009283e-06, "loss": 0.2836, "step": 24340 }, { "epoch": 0.4956743002544529, "grad_norm": 6.037384021828571, "learning_rate": 9.913284207955055e-06, "loss": 0.3113, "step": 24350 }, { "epoch": 0.4958778625954198, "grad_norm": 8.051551855504185, "learning_rate": 9.917355371900828e-06, "loss": 0.292, "step": 24360 }, { "epoch": 0.4960814249363868, "grad_norm": 9.64419954890421, "learning_rate": 9.921426535846599e-06, "loss": 0.2973, "step": 24370 }, { "epoch": 0.4962849872773537, "grad_norm": 7.539512219593909, "learning_rate": 9.925497699792372e-06, "loss": 0.3055, "step": 24380 }, { "epoch": 0.4964885496183206, "grad_norm": 9.010173049401011, "learning_rate": 9.929568863738144e-06, "loss": 0.3817, "step": 24390 }, { "epoch": 0.49669211195928753, "grad_norm": 7.236262118345373, "learning_rate": 9.933640027683915e-06, "loss": 0.2729, "step": 24400 }, { "epoch": 0.49689567430025444, "grad_norm": 5.847510159965281, "learning_rate": 9.937711191629688e-06, "loss": 0.388, "step": 24410 }, { "epoch": 0.49709923664122135, "grad_norm": 6.399664311584873, "learning_rate": 9.94178235557546e-06, "loss": 0.2834, "step": 24420 }, { "epoch": 0.4973027989821883, "grad_norm": 3.9014773883584146, "learning_rate": 9.945853519521233e-06, "loss": 0.308, "step": 24430 }, { "epoch": 0.49750636132315523, "grad_norm": 9.744828394574334, "learning_rate": 9.949924683467004e-06, "loss": 0.2896, "step": 24440 }, { "epoch": 0.49770992366412214, "grad_norm": 12.64041379525705, "learning_rate": 9.953995847412777e-06, "loss": 0.3079, "step": 24450 }, { "epoch": 0.49791348600508906, "grad_norm": 8.740929799855705, "learning_rate": 9.95806701135855e-06, "loss": 0.3091, "step": 24460 }, { "epoch": 0.49811704834605597, "grad_norm": 9.001538899059529, "learning_rate": 9.96213817530432e-06, "loss": 0.3019, "step": 24470 }, { "epoch": 0.4983206106870229, "grad_norm": 7.229951737251867, "learning_rate": 9.966209339250093e-06, "loss": 0.2999, "step": 24480 }, { "epoch": 0.49852417302798985, "grad_norm": 6.538372503990473, "learning_rate": 9.970280503195866e-06, "loss": 0.3483, "step": 24490 }, { "epoch": 0.49872773536895676, "grad_norm": 26.31551691513476, "learning_rate": 9.974351667141636e-06, "loss": 0.3375, "step": 24500 }, { "epoch": 0.49893129770992367, "grad_norm": 6.7397306685357075, "learning_rate": 9.978422831087409e-06, "loss": 0.4384, "step": 24510 }, { "epoch": 0.4991348600508906, "grad_norm": 16.00099455955659, "learning_rate": 9.982493995033182e-06, "loss": 0.3057, "step": 24520 }, { "epoch": 0.4993384223918575, "grad_norm": 10.152929037651516, "learning_rate": 9.986565158978953e-06, "loss": 0.3316, "step": 24530 }, { "epoch": 0.4995419847328244, "grad_norm": 16.019077394190504, "learning_rate": 9.990636322924725e-06, "loss": 0.3137, "step": 24540 }, { "epoch": 0.4997455470737914, "grad_norm": 10.544827368794905, "learning_rate": 9.994707486870498e-06, "loss": 0.3078, "step": 24550 }, { "epoch": 0.4999491094147583, "grad_norm": 16.492104983873904, "learning_rate": 9.998778650816269e-06, "loss": 0.3593, "step": 24560 }, { "epoch": 0.5001526717557252, "grad_norm": 5.813789061186107, "learning_rate": 9.999999975259547e-06, "loss": 0.3435, "step": 24570 }, { "epoch": 0.5003562340966922, "grad_norm": 14.77016455423458, "learning_rate": 9.999999854081821e-06, "loss": 0.3297, "step": 24580 }, { "epoch": 0.500559796437659, "grad_norm": 13.05026243447246, "learning_rate": 9.999999631922657e-06, "loss": 0.4469, "step": 24590 }, { "epoch": 0.500763358778626, "grad_norm": 7.869846082890039, "learning_rate": 9.99999930878206e-06, "loss": 0.3011, "step": 24600 }, { "epoch": 0.5009669211195928, "grad_norm": 12.166104978872001, "learning_rate": 9.999998884660037e-06, "loss": 0.3774, "step": 24610 }, { "epoch": 0.5011704834605598, "grad_norm": 6.581145311115501, "learning_rate": 9.999998359556596e-06, "loss": 0.2661, "step": 24620 }, { "epoch": 0.5013740458015267, "grad_norm": 3.9326406710248025, "learning_rate": 9.99999773347175e-06, "loss": 0.3123, "step": 24630 }, { "epoch": 0.5015776081424936, "grad_norm": 12.217641549807938, "learning_rate": 9.999997006405506e-06, "loss": 0.3213, "step": 24640 }, { "epoch": 0.5017811704834606, "grad_norm": 5.07691450629543, "learning_rate": 9.999996178357886e-06, "loss": 0.3174, "step": 24650 }, { "epoch": 0.5019847328244275, "grad_norm": 5.069896735561958, "learning_rate": 9.999995249328902e-06, "loss": 0.3445, "step": 24660 }, { "epoch": 0.5021882951653944, "grad_norm": 14.59567349618866, "learning_rate": 9.999994219318574e-06, "loss": 0.2797, "step": 24670 }, { "epoch": 0.5023918575063613, "grad_norm": 15.100433964211213, "learning_rate": 9.999993088326925e-06, "loss": 0.3982, "step": 24680 }, { "epoch": 0.5025954198473283, "grad_norm": 16.52832918826217, "learning_rate": 9.999991856353972e-06, "loss": 0.2818, "step": 24690 }, { "epoch": 0.5027989821882952, "grad_norm": 12.481082672302396, "learning_rate": 9.999990523399746e-06, "loss": 0.2981, "step": 24700 }, { "epoch": 0.5030025445292621, "grad_norm": 8.790311496513514, "learning_rate": 9.999989089464271e-06, "loss": 0.4077, "step": 24710 }, { "epoch": 0.503206106870229, "grad_norm": 6.158847655548371, "learning_rate": 9.999987554547577e-06, "loss": 0.2739, "step": 24720 }, { "epoch": 0.5034096692111959, "grad_norm": 16.755786518111872, "learning_rate": 9.999985918649693e-06, "loss": 0.4457, "step": 24730 }, { "epoch": 0.5036132315521629, "grad_norm": 4.837876405192685, "learning_rate": 9.999984181770654e-06, "loss": 0.3358, "step": 24740 }, { "epoch": 0.5038167938931297, "grad_norm": 8.807614530547527, "learning_rate": 9.999982343910494e-06, "loss": 0.4225, "step": 24750 }, { "epoch": 0.5040203562340967, "grad_norm": 8.636493843237657, "learning_rate": 9.999980405069253e-06, "loss": 0.317, "step": 24760 }, { "epoch": 0.5042239185750637, "grad_norm": 12.98562236895374, "learning_rate": 9.999978365246964e-06, "loss": 0.3562, "step": 24770 }, { "epoch": 0.5044274809160305, "grad_norm": 18.71142544130426, "learning_rate": 9.999976224443673e-06, "loss": 0.3107, "step": 24780 }, { "epoch": 0.5046310432569975, "grad_norm": 10.818780762201616, "learning_rate": 9.999973982659423e-06, "loss": 0.3874, "step": 24790 }, { "epoch": 0.5048346055979643, "grad_norm": 5.438716574504853, "learning_rate": 9.999971639894259e-06, "loss": 0.2905, "step": 24800 }, { "epoch": 0.5050381679389313, "grad_norm": 8.803039509322723, "learning_rate": 9.999969196148229e-06, "loss": 0.3541, "step": 24810 }, { "epoch": 0.5052417302798983, "grad_norm": 8.050943572470121, "learning_rate": 9.999966651421379e-06, "loss": 0.3086, "step": 24820 }, { "epoch": 0.5054452926208651, "grad_norm": 6.140194812155157, "learning_rate": 9.999964005713763e-06, "loss": 0.3795, "step": 24830 }, { "epoch": 0.5056488549618321, "grad_norm": 5.4398323542333715, "learning_rate": 9.999961259025434e-06, "loss": 0.2601, "step": 24840 }, { "epoch": 0.505852417302799, "grad_norm": 11.009719459925545, "learning_rate": 9.999958411356448e-06, "loss": 0.3337, "step": 24850 }, { "epoch": 0.5060559796437659, "grad_norm": 5.429098094108643, "learning_rate": 9.999955462706861e-06, "loss": 0.3455, "step": 24860 }, { "epoch": 0.5062595419847328, "grad_norm": 5.83999709686785, "learning_rate": 9.999952413076734e-06, "loss": 0.2892, "step": 24870 }, { "epoch": 0.5064631043256997, "grad_norm": 14.38370687809036, "learning_rate": 9.999949262466128e-06, "loss": 0.2917, "step": 24880 }, { "epoch": 0.5066666666666667, "grad_norm": 12.08355569443701, "learning_rate": 9.999946010875108e-06, "loss": 0.3325, "step": 24890 }, { "epoch": 0.5068702290076336, "grad_norm": 10.630768942913571, "learning_rate": 9.999942658303738e-06, "loss": 0.3397, "step": 24900 }, { "epoch": 0.5070737913486005, "grad_norm": 6.401846072131725, "learning_rate": 9.999939204752086e-06, "loss": 0.2581, "step": 24910 }, { "epoch": 0.5072773536895674, "grad_norm": 25.749512591051488, "learning_rate": 9.99993565022022e-06, "loss": 0.3552, "step": 24920 }, { "epoch": 0.5074809160305344, "grad_norm": 7.217491289593392, "learning_rate": 9.999931994708216e-06, "loss": 0.307, "step": 24930 }, { "epoch": 0.5076844783715013, "grad_norm": 11.530977547047907, "learning_rate": 9.999928238216145e-06, "loss": 0.3131, "step": 24940 }, { "epoch": 0.5078880407124682, "grad_norm": 10.763527420227373, "learning_rate": 9.999924380744083e-06, "loss": 0.3366, "step": 24950 }, { "epoch": 0.5080916030534351, "grad_norm": 4.792711254676377, "learning_rate": 9.999920422292108e-06, "loss": 0.2543, "step": 24960 }, { "epoch": 0.508295165394402, "grad_norm": 9.081276348679351, "learning_rate": 9.9999163628603e-06, "loss": 0.3691, "step": 24970 }, { "epoch": 0.508498727735369, "grad_norm": 11.013682832319375, "learning_rate": 9.999912202448743e-06, "loss": 0.3809, "step": 24980 }, { "epoch": 0.5087022900763358, "grad_norm": 6.226388318452471, "learning_rate": 9.999907941057517e-06, "loss": 0.3035, "step": 24990 }, { "epoch": 0.5089058524173028, "grad_norm": 9.948551566240257, "learning_rate": 9.999903578686711e-06, "loss": 0.3773, "step": 25000 }, { "epoch": 0.5091094147582698, "grad_norm": 13.990247278623048, "learning_rate": 9.999899115336414e-06, "loss": 0.3979, "step": 25010 }, { "epoch": 0.5093129770992366, "grad_norm": 4.431293515082209, "learning_rate": 9.999894551006712e-06, "loss": 0.2441, "step": 25020 }, { "epoch": 0.5095165394402036, "grad_norm": 9.560832356440034, "learning_rate": 9.999889885697703e-06, "loss": 0.2247, "step": 25030 }, { "epoch": 0.5097201017811704, "grad_norm": 28.162558385465996, "learning_rate": 9.999885119409473e-06, "loss": 0.3212, "step": 25040 }, { "epoch": 0.5099236641221374, "grad_norm": 13.557456535601109, "learning_rate": 9.999880252142128e-06, "loss": 0.3466, "step": 25050 }, { "epoch": 0.5101272264631044, "grad_norm": 6.932952598598618, "learning_rate": 9.99987528389576e-06, "loss": 0.2892, "step": 25060 }, { "epoch": 0.5103307888040712, "grad_norm": 5.474946414681837, "learning_rate": 9.99987021467047e-06, "loss": 0.3176, "step": 25070 }, { "epoch": 0.5105343511450382, "grad_norm": 10.574948983336023, "learning_rate": 9.999865044466362e-06, "loss": 0.299, "step": 25080 }, { "epoch": 0.5107379134860051, "grad_norm": 15.968640373224602, "learning_rate": 9.999859773283539e-06, "loss": 0.3294, "step": 25090 }, { "epoch": 0.510941475826972, "grad_norm": 9.804330066685198, "learning_rate": 9.999854401122108e-06, "loss": 0.3165, "step": 25100 }, { "epoch": 0.5111450381679389, "grad_norm": 7.237689872452756, "learning_rate": 9.999848927982178e-06, "loss": 0.3617, "step": 25110 }, { "epoch": 0.5113486005089058, "grad_norm": 6.549440626305227, "learning_rate": 9.99984335386386e-06, "loss": 0.3229, "step": 25120 }, { "epoch": 0.5115521628498728, "grad_norm": 3.3759729804995575, "learning_rate": 9.999837678767265e-06, "loss": 0.3862, "step": 25130 }, { "epoch": 0.5117557251908397, "grad_norm": 5.196574716888794, "learning_rate": 9.999831902692507e-06, "loss": 0.3209, "step": 25140 }, { "epoch": 0.5119592875318066, "grad_norm": 9.391143560777268, "learning_rate": 9.999826025639706e-06, "loss": 0.338, "step": 25150 }, { "epoch": 0.5121628498727735, "grad_norm": 5.150912623501877, "learning_rate": 9.999820047608978e-06, "loss": 0.2761, "step": 25160 }, { "epoch": 0.5123664122137405, "grad_norm": 10.763415957461131, "learning_rate": 9.999813968600445e-06, "loss": 0.3798, "step": 25170 }, { "epoch": 0.5125699745547074, "grad_norm": 6.605503747476924, "learning_rate": 9.999807788614228e-06, "loss": 0.2758, "step": 25180 }, { "epoch": 0.5127735368956743, "grad_norm": 9.118833513678268, "learning_rate": 9.999801507650454e-06, "loss": 0.3, "step": 25190 }, { "epoch": 0.5129770992366413, "grad_norm": 22.03932412681489, "learning_rate": 9.999795125709249e-06, "loss": 0.2694, "step": 25200 }, { "epoch": 0.5131806615776081, "grad_norm": 3.8318871111825836, "learning_rate": 9.99978864279074e-06, "loss": 0.2927, "step": 25210 }, { "epoch": 0.5133842239185751, "grad_norm": 7.802315349861222, "learning_rate": 9.999782058895061e-06, "loss": 0.2841, "step": 25220 }, { "epoch": 0.5135877862595419, "grad_norm": 9.891128237628457, "learning_rate": 9.999775374022343e-06, "loss": 0.2651, "step": 25230 }, { "epoch": 0.5137913486005089, "grad_norm": 1.7593914942069269, "learning_rate": 9.999768588172722e-06, "loss": 0.3133, "step": 25240 }, { "epoch": 0.5139949109414759, "grad_norm": 27.15479156939775, "learning_rate": 9.999761701346335e-06, "loss": 0.3429, "step": 25250 }, { "epoch": 0.5141984732824427, "grad_norm": 13.482403591862917, "learning_rate": 9.99975471354332e-06, "loss": 0.2739, "step": 25260 }, { "epoch": 0.5144020356234097, "grad_norm": 18.254804324614405, "learning_rate": 9.99974762476382e-06, "loss": 0.3586, "step": 25270 }, { "epoch": 0.5146055979643765, "grad_norm": 11.043530097700842, "learning_rate": 9.999740435007978e-06, "loss": 0.3045, "step": 25280 }, { "epoch": 0.5148091603053435, "grad_norm": 6.216065989836105, "learning_rate": 9.999733144275935e-06, "loss": 0.2653, "step": 25290 }, { "epoch": 0.5150127226463105, "grad_norm": 7.399433385267858, "learning_rate": 9.999725752567845e-06, "loss": 0.2661, "step": 25300 }, { "epoch": 0.5152162849872773, "grad_norm": 4.627469013883098, "learning_rate": 9.999718259883853e-06, "loss": 0.3094, "step": 25310 }, { "epoch": 0.5154198473282443, "grad_norm": 11.44683766671465, "learning_rate": 9.99971066622411e-06, "loss": 0.3933, "step": 25320 }, { "epoch": 0.5156234096692112, "grad_norm": 18.020660860971834, "learning_rate": 9.999702971588772e-06, "loss": 0.2943, "step": 25330 }, { "epoch": 0.5158269720101781, "grad_norm": 16.612508854611857, "learning_rate": 9.999695175977991e-06, "loss": 0.2887, "step": 25340 }, { "epoch": 0.516030534351145, "grad_norm": 9.046903823166076, "learning_rate": 9.999687279391928e-06, "loss": 0.3136, "step": 25350 }, { "epoch": 0.516234096692112, "grad_norm": 16.958661027588175, "learning_rate": 9.99967928183074e-06, "loss": 0.3302, "step": 25360 }, { "epoch": 0.5164376590330789, "grad_norm": 11.193004932513842, "learning_rate": 9.99967118329459e-06, "loss": 0.3496, "step": 25370 }, { "epoch": 0.5166412213740458, "grad_norm": 9.044474800278937, "learning_rate": 9.99966298378364e-06, "loss": 0.2993, "step": 25380 }, { "epoch": 0.5168447837150127, "grad_norm": 16.429326153722673, "learning_rate": 9.999654683298058e-06, "loss": 0.31, "step": 25390 }, { "epoch": 0.5170483460559796, "grad_norm": 0.09517565889601629, "learning_rate": 9.999646281838007e-06, "loss": 0.2838, "step": 25400 }, { "epoch": 0.5172519083969466, "grad_norm": 9.729420648857177, "learning_rate": 9.999637779403663e-06, "loss": 0.3249, "step": 25410 }, { "epoch": 0.5174554707379135, "grad_norm": 5.8282506144821795, "learning_rate": 9.999629175995194e-06, "loss": 0.3799, "step": 25420 }, { "epoch": 0.5176590330788804, "grad_norm": 7.33331809044595, "learning_rate": 9.999620471612773e-06, "loss": 0.3616, "step": 25430 }, { "epoch": 0.5178625954198474, "grad_norm": 5.39754026597469, "learning_rate": 9.999611666256579e-06, "loss": 0.3456, "step": 25440 }, { "epoch": 0.5180661577608142, "grad_norm": 3.643237319014668, "learning_rate": 9.999602759926785e-06, "loss": 0.279, "step": 25450 }, { "epoch": 0.5182697201017812, "grad_norm": 8.323843929522228, "learning_rate": 9.999593752623576e-06, "loss": 0.2664, "step": 25460 }, { "epoch": 0.518473282442748, "grad_norm": 7.988791025636498, "learning_rate": 9.99958464434713e-06, "loss": 0.3229, "step": 25470 }, { "epoch": 0.518676844783715, "grad_norm": 7.110951602074766, "learning_rate": 9.999575435097637e-06, "loss": 0.4042, "step": 25480 }, { "epoch": 0.518880407124682, "grad_norm": 40.073658073071684, "learning_rate": 9.999566124875276e-06, "loss": 0.2903, "step": 25490 }, { "epoch": 0.5190839694656488, "grad_norm": 5.547437141035592, "learning_rate": 9.999556713680237e-06, "loss": 0.2796, "step": 25500 }, { "epoch": 0.5192875318066158, "grad_norm": 7.128869330557911, "learning_rate": 9.999547201512711e-06, "loss": 0.2638, "step": 25510 }, { "epoch": 0.5194910941475827, "grad_norm": 7.9422692763202365, "learning_rate": 9.99953758837289e-06, "loss": 0.327, "step": 25520 }, { "epoch": 0.5196946564885496, "grad_norm": 10.034226971819155, "learning_rate": 9.999527874260967e-06, "loss": 0.2721, "step": 25530 }, { "epoch": 0.5198982188295166, "grad_norm": 9.56243945833858, "learning_rate": 9.999518059177143e-06, "loss": 0.3203, "step": 25540 }, { "epoch": 0.5201017811704834, "grad_norm": 5.488746996756639, "learning_rate": 9.999508143121609e-06, "loss": 0.331, "step": 25550 }, { "epoch": 0.5203053435114504, "grad_norm": 3.07559093343711, "learning_rate": 9.999498126094569e-06, "loss": 0.3122, "step": 25560 }, { "epoch": 0.5205089058524173, "grad_norm": 6.40926384752535, "learning_rate": 9.999488008096226e-06, "loss": 0.3765, "step": 25570 }, { "epoch": 0.5207124681933842, "grad_norm": 7.092266413259816, "learning_rate": 9.999477789126783e-06, "loss": 0.2883, "step": 25580 }, { "epoch": 0.5209160305343511, "grad_norm": 4.619798197776308, "learning_rate": 9.999467469186448e-06, "loss": 0.3175, "step": 25590 }, { "epoch": 0.5211195928753181, "grad_norm": 10.435846132818206, "learning_rate": 9.999457048275427e-06, "loss": 0.3139, "step": 25600 }, { "epoch": 0.521323155216285, "grad_norm": 13.374636062639697, "learning_rate": 9.999446526393931e-06, "loss": 0.2837, "step": 25610 }, { "epoch": 0.5215267175572519, "grad_norm": 8.001187214496651, "learning_rate": 9.999435903542175e-06, "loss": 0.2486, "step": 25620 }, { "epoch": 0.5217302798982189, "grad_norm": 8.908122974048549, "learning_rate": 9.99942517972037e-06, "loss": 0.2639, "step": 25630 }, { "epoch": 0.5219338422391857, "grad_norm": 6.495513252190244, "learning_rate": 9.999414354928734e-06, "loss": 0.2998, "step": 25640 }, { "epoch": 0.5221374045801527, "grad_norm": 2.7787013408770105, "learning_rate": 9.999403429167487e-06, "loss": 0.2726, "step": 25650 }, { "epoch": 0.5223409669211196, "grad_norm": 10.808603952328713, "learning_rate": 9.99939240243685e-06, "loss": 0.2304, "step": 25660 }, { "epoch": 0.5225445292620865, "grad_norm": 6.334344708228831, "learning_rate": 9.999381274737041e-06, "loss": 0.3779, "step": 25670 }, { "epoch": 0.5227480916030535, "grad_norm": 12.085838052737124, "learning_rate": 9.999370046068292e-06, "loss": 0.3048, "step": 25680 }, { "epoch": 0.5229516539440203, "grad_norm": 13.573346335546697, "learning_rate": 9.999358716430822e-06, "loss": 0.3909, "step": 25690 }, { "epoch": 0.5231552162849873, "grad_norm": 5.715698181801909, "learning_rate": 9.999347285824866e-06, "loss": 0.323, "step": 25700 }, { "epoch": 0.5233587786259541, "grad_norm": 4.738263201014404, "learning_rate": 9.999335754250649e-06, "loss": 0.3579, "step": 25710 }, { "epoch": 0.5235623409669211, "grad_norm": 26.9441963611518, "learning_rate": 9.99932412170841e-06, "loss": 0.2658, "step": 25720 }, { "epoch": 0.5237659033078881, "grad_norm": 7.054003114749113, "learning_rate": 9.999312388198382e-06, "loss": 0.2612, "step": 25730 }, { "epoch": 0.5239694656488549, "grad_norm": 11.332519203479192, "learning_rate": 9.9993005537208e-06, "loss": 0.3047, "step": 25740 }, { "epoch": 0.5241730279898219, "grad_norm": 16.893949082376057, "learning_rate": 9.999288618275904e-06, "loss": 0.3483, "step": 25750 }, { "epoch": 0.5243765903307888, "grad_norm": 5.484306490548045, "learning_rate": 9.999276581863937e-06, "loss": 0.3887, "step": 25760 }, { "epoch": 0.5245801526717557, "grad_norm": 9.254206614743582, "learning_rate": 9.99926444448514e-06, "loss": 0.2727, "step": 25770 }, { "epoch": 0.5247837150127227, "grad_norm": 4.3114391206964395, "learning_rate": 9.999252206139757e-06, "loss": 0.3068, "step": 25780 }, { "epoch": 0.5249872773536896, "grad_norm": 5.94408472874759, "learning_rate": 9.999239866828039e-06, "loss": 0.3219, "step": 25790 }, { "epoch": 0.5251908396946565, "grad_norm": 18.071835783453906, "learning_rate": 9.99922742655023e-06, "loss": 0.3733, "step": 25800 }, { "epoch": 0.5253944020356234, "grad_norm": 10.707731772908202, "learning_rate": 9.999214885306586e-06, "loss": 0.3851, "step": 25810 }, { "epoch": 0.5255979643765903, "grad_norm": 9.405531917304465, "learning_rate": 9.999202243097358e-06, "loss": 0.2647, "step": 25820 }, { "epoch": 0.5258015267175572, "grad_norm": 8.172580459432869, "learning_rate": 9.999189499922802e-06, "loss": 0.3549, "step": 25830 }, { "epoch": 0.5260050890585242, "grad_norm": 13.016572820469227, "learning_rate": 9.999176655783176e-06, "loss": 0.3146, "step": 25840 }, { "epoch": 0.5262086513994911, "grad_norm": 10.40658795635845, "learning_rate": 9.999163710678737e-06, "loss": 0.3476, "step": 25850 }, { "epoch": 0.526412213740458, "grad_norm": 8.921632284553839, "learning_rate": 9.999150664609747e-06, "loss": 0.3011, "step": 25860 }, { "epoch": 0.526615776081425, "grad_norm": 7.001990177468973, "learning_rate": 9.999137517576474e-06, "loss": 0.3037, "step": 25870 }, { "epoch": 0.5268193384223918, "grad_norm": 6.2131412262874575, "learning_rate": 9.999124269579177e-06, "loss": 0.23, "step": 25880 }, { "epoch": 0.5270229007633588, "grad_norm": 7.906396587617039, "learning_rate": 9.999110920618128e-06, "loss": 0.4577, "step": 25890 }, { "epoch": 0.5272264631043257, "grad_norm": 12.089805411567928, "learning_rate": 9.999097470693596e-06, "loss": 0.2949, "step": 25900 }, { "epoch": 0.5274300254452926, "grad_norm": 12.129827190538474, "learning_rate": 9.999083919805848e-06, "loss": 0.2758, "step": 25910 }, { "epoch": 0.5276335877862596, "grad_norm": 10.9956640099601, "learning_rate": 9.999070267955165e-06, "loss": 0.3475, "step": 25920 }, { "epoch": 0.5278371501272264, "grad_norm": 7.311294787395927, "learning_rate": 9.999056515141818e-06, "loss": 0.2832, "step": 25930 }, { "epoch": 0.5280407124681934, "grad_norm": 7.0494514263397186, "learning_rate": 9.999042661366087e-06, "loss": 0.2697, "step": 25940 }, { "epoch": 0.5282442748091603, "grad_norm": 3.1083433405271177, "learning_rate": 9.99902870662825e-06, "loss": 0.3103, "step": 25950 }, { "epoch": 0.5284478371501272, "grad_norm": 5.366146857650942, "learning_rate": 9.99901465092859e-06, "loss": 0.317, "step": 25960 }, { "epoch": 0.5286513994910942, "grad_norm": 12.019133381474292, "learning_rate": 9.999000494267388e-06, "loss": 0.3304, "step": 25970 }, { "epoch": 0.528854961832061, "grad_norm": 10.889768957595834, "learning_rate": 9.998986236644934e-06, "loss": 0.314, "step": 25980 }, { "epoch": 0.529058524173028, "grad_norm": 7.771149821086755, "learning_rate": 9.998971878061515e-06, "loss": 0.3091, "step": 25990 }, { "epoch": 0.5292620865139949, "grad_norm": 17.453746568412893, "learning_rate": 9.99895741851742e-06, "loss": 0.3558, "step": 26000 }, { "epoch": 0.5294656488549618, "grad_norm": 12.913201137564759, "learning_rate": 9.99894285801294e-06, "loss": 0.3415, "step": 26010 }, { "epoch": 0.5296692111959288, "grad_norm": 14.016339615948707, "learning_rate": 9.998928196548373e-06, "loss": 0.2672, "step": 26020 }, { "epoch": 0.5298727735368957, "grad_norm": 10.000378696103539, "learning_rate": 9.998913434124012e-06, "loss": 0.2917, "step": 26030 }, { "epoch": 0.5300763358778626, "grad_norm": 8.936888727396122, "learning_rate": 9.998898570740152e-06, "loss": 0.3316, "step": 26040 }, { "epoch": 0.5302798982188295, "grad_norm": 10.081593253997308, "learning_rate": 9.998883606397101e-06, "loss": 0.2743, "step": 26050 }, { "epoch": 0.5304834605597964, "grad_norm": 11.8189561136633, "learning_rate": 9.998868541095154e-06, "loss": 0.3496, "step": 26060 }, { "epoch": 0.5306870229007633, "grad_norm": 5.050282823656299, "learning_rate": 9.99885337483462e-06, "loss": 0.2558, "step": 26070 }, { "epoch": 0.5308905852417303, "grad_norm": 26.68001481075265, "learning_rate": 9.998838107615804e-06, "loss": 0.3362, "step": 26080 }, { "epoch": 0.5310941475826972, "grad_norm": 6.172947949880142, "learning_rate": 9.998822739439014e-06, "loss": 0.286, "step": 26090 }, { "epoch": 0.5312977099236641, "grad_norm": 23.579655685256835, "learning_rate": 9.99880727030456e-06, "loss": 0.2848, "step": 26100 }, { "epoch": 0.5315012722646311, "grad_norm": 15.150186018329373, "learning_rate": 9.998791700212756e-06, "loss": 0.3428, "step": 26110 }, { "epoch": 0.5317048346055979, "grad_norm": 10.187461303277807, "learning_rate": 9.998776029163913e-06, "loss": 0.4029, "step": 26120 }, { "epoch": 0.5319083969465649, "grad_norm": 13.006666090385252, "learning_rate": 9.99876025715835e-06, "loss": 0.3286, "step": 26130 }, { "epoch": 0.5321119592875319, "grad_norm": 7.410658337558456, "learning_rate": 9.998744384196386e-06, "loss": 0.2214, "step": 26140 }, { "epoch": 0.5323155216284987, "grad_norm": 14.243947870910182, "learning_rate": 9.99872841027834e-06, "loss": 0.4785, "step": 26150 }, { "epoch": 0.5325190839694657, "grad_norm": 5.238258449396081, "learning_rate": 9.998712335404536e-06, "loss": 0.3263, "step": 26160 }, { "epoch": 0.5327226463104325, "grad_norm": 10.332934437446939, "learning_rate": 9.998696159575299e-06, "loss": 0.3008, "step": 26170 }, { "epoch": 0.5329262086513995, "grad_norm": 5.979453351296116, "learning_rate": 9.998679882790954e-06, "loss": 0.233, "step": 26180 }, { "epoch": 0.5331297709923664, "grad_norm": 6.6885793592288705, "learning_rate": 9.99866350505183e-06, "loss": 0.3014, "step": 26190 }, { "epoch": 0.5333333333333333, "grad_norm": 16.435752203165897, "learning_rate": 9.998647026358259e-06, "loss": 0.2927, "step": 26200 }, { "epoch": 0.5335368956743003, "grad_norm": 15.398189067285214, "learning_rate": 9.998630446710572e-06, "loss": 0.3832, "step": 26210 }, { "epoch": 0.5337404580152671, "grad_norm": 10.4348021385295, "learning_rate": 9.998613766109107e-06, "loss": 0.457, "step": 26220 }, { "epoch": 0.5339440203562341, "grad_norm": 10.944603018921274, "learning_rate": 9.998596984554196e-06, "loss": 0.3089, "step": 26230 }, { "epoch": 0.534147582697201, "grad_norm": 10.743080551221736, "learning_rate": 9.998580102046182e-06, "loss": 0.4004, "step": 26240 }, { "epoch": 0.5343511450381679, "grad_norm": 7.867365392947583, "learning_rate": 9.998563118585405e-06, "loss": 0.2969, "step": 26250 }, { "epoch": 0.5345547073791349, "grad_norm": 11.893996934960862, "learning_rate": 9.998546034172208e-06, "loss": 0.3596, "step": 26260 }, { "epoch": 0.5347582697201018, "grad_norm": 0.8394560398659683, "learning_rate": 9.998528848806937e-06, "loss": 0.3509, "step": 26270 }, { "epoch": 0.5349618320610687, "grad_norm": 4.628490984455547, "learning_rate": 9.998511562489936e-06, "loss": 0.2463, "step": 26280 }, { "epoch": 0.5351653944020356, "grad_norm": 18.975463344676907, "learning_rate": 9.998494175221557e-06, "loss": 0.2682, "step": 26290 }, { "epoch": 0.5353689567430026, "grad_norm": 7.504979549249476, "learning_rate": 9.998476687002147e-06, "loss": 0.2519, "step": 26300 }, { "epoch": 0.5355725190839694, "grad_norm": 3.941895481290921, "learning_rate": 9.998459097832067e-06, "loss": 0.2802, "step": 26310 }, { "epoch": 0.5357760814249364, "grad_norm": 10.314965912623755, "learning_rate": 9.998441407711665e-06, "loss": 0.2511, "step": 26320 }, { "epoch": 0.5359796437659033, "grad_norm": 13.057900368930069, "learning_rate": 9.998423616641299e-06, "loss": 0.3748, "step": 26330 }, { "epoch": 0.5361832061068702, "grad_norm": 4.439630046789313, "learning_rate": 9.998405724621334e-06, "loss": 0.3775, "step": 26340 }, { "epoch": 0.5363867684478372, "grad_norm": 12.271731509448472, "learning_rate": 9.998387731652127e-06, "loss": 0.3549, "step": 26350 }, { "epoch": 0.536590330788804, "grad_norm": 4.068582509700017, "learning_rate": 9.99836963773404e-06, "loss": 0.2375, "step": 26360 }, { "epoch": 0.536793893129771, "grad_norm": 12.88874428229158, "learning_rate": 9.99835144286744e-06, "loss": 0.3262, "step": 26370 }, { "epoch": 0.536997455470738, "grad_norm": 13.445136482938064, "learning_rate": 9.998333147052696e-06, "loss": 0.2933, "step": 26380 }, { "epoch": 0.5372010178117048, "grad_norm": 16.92846903912309, "learning_rate": 9.998314750290176e-06, "loss": 0.356, "step": 26390 }, { "epoch": 0.5374045801526718, "grad_norm": 6.6129613388241335, "learning_rate": 9.998296252580252e-06, "loss": 0.3153, "step": 26400 }, { "epoch": 0.5376081424936386, "grad_norm": 9.968603635904799, "learning_rate": 9.998277653923297e-06, "loss": 0.2747, "step": 26410 }, { "epoch": 0.5378117048346056, "grad_norm": 8.837771407235381, "learning_rate": 9.998258954319688e-06, "loss": 0.3656, "step": 26420 }, { "epoch": 0.5380152671755725, "grad_norm": 11.756850388316252, "learning_rate": 9.9982401537698e-06, "loss": 0.2489, "step": 26430 }, { "epoch": 0.5382188295165394, "grad_norm": 16.109238134268853, "learning_rate": 9.998221252274016e-06, "loss": 0.3577, "step": 26440 }, { "epoch": 0.5384223918575064, "grad_norm": 5.916562101150206, "learning_rate": 9.998202249832716e-06, "loss": 0.2972, "step": 26450 }, { "epoch": 0.5386259541984733, "grad_norm": 11.906924117564749, "learning_rate": 9.998183146446283e-06, "loss": 0.2669, "step": 26460 }, { "epoch": 0.5388295165394402, "grad_norm": 7.469647003090889, "learning_rate": 9.998163942115105e-06, "loss": 0.3226, "step": 26470 }, { "epoch": 0.5390330788804071, "grad_norm": 14.808919238221668, "learning_rate": 9.998144636839567e-06, "loss": 0.4118, "step": 26480 }, { "epoch": 0.539236641221374, "grad_norm": 5.019811399963404, "learning_rate": 9.998125230620061e-06, "loss": 0.3086, "step": 26490 }, { "epoch": 0.539440203562341, "grad_norm": 11.955414980846346, "learning_rate": 9.998105723456978e-06, "loss": 0.3336, "step": 26500 }, { "epoch": 0.5396437659033079, "grad_norm": 11.255508034763524, "learning_rate": 9.998086115350715e-06, "loss": 0.3078, "step": 26510 }, { "epoch": 0.5398473282442748, "grad_norm": 5.856829966042512, "learning_rate": 9.998066406301663e-06, "loss": 0.3466, "step": 26520 }, { "epoch": 0.5400508905852417, "grad_norm": 8.839884045670507, "learning_rate": 9.998046596310223e-06, "loss": 0.3039, "step": 26530 }, { "epoch": 0.5402544529262087, "grad_norm": 5.879456809201964, "learning_rate": 9.998026685376795e-06, "loss": 0.2819, "step": 26540 }, { "epoch": 0.5404580152671755, "grad_norm": 7.402928922017705, "learning_rate": 9.99800667350178e-06, "loss": 0.3522, "step": 26550 }, { "epoch": 0.5406615776081425, "grad_norm": 11.348830634399675, "learning_rate": 9.997986560685584e-06, "loss": 0.301, "step": 26560 }, { "epoch": 0.5408651399491095, "grad_norm": 10.634063563985995, "learning_rate": 9.99796634692861e-06, "loss": 0.3938, "step": 26570 }, { "epoch": 0.5410687022900763, "grad_norm": 3.6390645759895928, "learning_rate": 9.99794603223127e-06, "loss": 0.3149, "step": 26580 }, { "epoch": 0.5412722646310433, "grad_norm": 6.706643281464107, "learning_rate": 9.997925616593973e-06, "loss": 0.3058, "step": 26590 }, { "epoch": 0.5414758269720101, "grad_norm": 3.0233388816142694, "learning_rate": 9.997905100017129e-06, "loss": 0.3624, "step": 26600 }, { "epoch": 0.5416793893129771, "grad_norm": 10.501427541170056, "learning_rate": 9.997884482501154e-06, "loss": 0.3425, "step": 26610 }, { "epoch": 0.5418829516539441, "grad_norm": 5.285144033188634, "learning_rate": 9.997863764046466e-06, "loss": 0.3667, "step": 26620 }, { "epoch": 0.5420865139949109, "grad_norm": 4.248043192378411, "learning_rate": 9.997842944653484e-06, "loss": 0.3219, "step": 26630 }, { "epoch": 0.5422900763358779, "grad_norm": 10.665231728371248, "learning_rate": 9.997822024322623e-06, "loss": 0.3395, "step": 26640 }, { "epoch": 0.5424936386768447, "grad_norm": 12.918727438441417, "learning_rate": 9.99780100305431e-06, "loss": 0.3344, "step": 26650 }, { "epoch": 0.5426972010178117, "grad_norm": 27.3290118898133, "learning_rate": 9.99777988084897e-06, "loss": 0.2811, "step": 26660 }, { "epoch": 0.5429007633587786, "grad_norm": 5.0218931299346785, "learning_rate": 9.997758657707029e-06, "loss": 0.3277, "step": 26670 }, { "epoch": 0.5431043256997455, "grad_norm": 9.128682024575152, "learning_rate": 9.997737333628912e-06, "loss": 0.3816, "step": 26680 }, { "epoch": 0.5433078880407125, "grad_norm": 10.544261667941395, "learning_rate": 9.997715908615053e-06, "loss": 0.3328, "step": 26690 }, { "epoch": 0.5435114503816794, "grad_norm": 13.477375027942674, "learning_rate": 9.997694382665885e-06, "loss": 0.369, "step": 26700 }, { "epoch": 0.5437150127226463, "grad_norm": 7.037414995837304, "learning_rate": 9.997672755781841e-06, "loss": 0.3542, "step": 26710 }, { "epoch": 0.5439185750636132, "grad_norm": 17.81219594637997, "learning_rate": 9.997651027963361e-06, "loss": 0.3469, "step": 26720 }, { "epoch": 0.5441221374045802, "grad_norm": 9.019773376983546, "learning_rate": 9.99762919921088e-06, "loss": 0.2665, "step": 26730 }, { "epoch": 0.5443256997455471, "grad_norm": 3.7935534502693407, "learning_rate": 9.997607269524842e-06, "loss": 0.3052, "step": 26740 }, { "epoch": 0.544529262086514, "grad_norm": 4.519874522918317, "learning_rate": 9.997585238905686e-06, "loss": 0.3333, "step": 26750 }, { "epoch": 0.5447328244274809, "grad_norm": 12.868640608989642, "learning_rate": 9.99756310735386e-06, "loss": 0.3528, "step": 26760 }, { "epoch": 0.5449363867684478, "grad_norm": 10.502672101044139, "learning_rate": 9.99754087486981e-06, "loss": 0.2808, "step": 26770 }, { "epoch": 0.5451399491094148, "grad_norm": 20.39072219557206, "learning_rate": 9.997518541453987e-06, "loss": 0.2709, "step": 26780 }, { "epoch": 0.5453435114503816, "grad_norm": 8.167237962099232, "learning_rate": 9.99749610710684e-06, "loss": 0.3465, "step": 26790 }, { "epoch": 0.5455470737913486, "grad_norm": 17.166326391274016, "learning_rate": 9.99747357182882e-06, "loss": 0.3109, "step": 26800 }, { "epoch": 0.5457506361323156, "grad_norm": 3.3779023684885536, "learning_rate": 9.997450935620386e-06, "loss": 0.3299, "step": 26810 }, { "epoch": 0.5459541984732824, "grad_norm": 8.81816616013547, "learning_rate": 9.997428198481993e-06, "loss": 0.3204, "step": 26820 }, { "epoch": 0.5461577608142494, "grad_norm": 21.180928578137568, "learning_rate": 9.997405360414104e-06, "loss": 0.2611, "step": 26830 }, { "epoch": 0.5463613231552162, "grad_norm": 16.3261845059903, "learning_rate": 9.997382421417173e-06, "loss": 0.3379, "step": 26840 }, { "epoch": 0.5465648854961832, "grad_norm": 104.44813633622792, "learning_rate": 9.997359381491669e-06, "loss": 0.375, "step": 26850 }, { "epoch": 0.5467684478371502, "grad_norm": 7.6978262857656, "learning_rate": 9.997336240638056e-06, "loss": 0.3775, "step": 26860 }, { "epoch": 0.546972010178117, "grad_norm": 12.159077070507264, "learning_rate": 9.9973129988568e-06, "loss": 0.3292, "step": 26870 }, { "epoch": 0.547175572519084, "grad_norm": 3.8771645534752004, "learning_rate": 9.997289656148374e-06, "loss": 0.3654, "step": 26880 }, { "epoch": 0.5473791348600509, "grad_norm": 19.88764989550802, "learning_rate": 9.997266212513245e-06, "loss": 0.3119, "step": 26890 }, { "epoch": 0.5475826972010178, "grad_norm": 10.455606416840993, "learning_rate": 9.99724266795189e-06, "loss": 0.327, "step": 26900 }, { "epoch": 0.5477862595419848, "grad_norm": 8.469112557740868, "learning_rate": 9.997219022464781e-06, "loss": 0.3122, "step": 26910 }, { "epoch": 0.5479898218829516, "grad_norm": 6.573352672298155, "learning_rate": 9.997195276052397e-06, "loss": 0.3247, "step": 26920 }, { "epoch": 0.5481933842239186, "grad_norm": 4.12851207735945, "learning_rate": 9.99717142871522e-06, "loss": 0.2429, "step": 26930 }, { "epoch": 0.5483969465648855, "grad_norm": 8.075196109049235, "learning_rate": 9.997147480453728e-06, "loss": 0.287, "step": 26940 }, { "epoch": 0.5486005089058524, "grad_norm": 11.040544878862145, "learning_rate": 9.997123431268408e-06, "loss": 0.2603, "step": 26950 }, { "epoch": 0.5488040712468193, "grad_norm": 6.822874635543163, "learning_rate": 9.997099281159743e-06, "loss": 0.2576, "step": 26960 }, { "epoch": 0.5490076335877863, "grad_norm": 9.505399569207322, "learning_rate": 9.997075030128223e-06, "loss": 0.3513, "step": 26970 }, { "epoch": 0.5492111959287532, "grad_norm": 7.172840458925999, "learning_rate": 9.997050678174335e-06, "loss": 0.4167, "step": 26980 }, { "epoch": 0.5494147582697201, "grad_norm": 55.6543183329035, "learning_rate": 9.997026225298572e-06, "loss": 0.3944, "step": 26990 }, { "epoch": 0.549618320610687, "grad_norm": 10.51531697521533, "learning_rate": 9.997001671501429e-06, "loss": 0.407, "step": 27000 }, { "epoch": 0.5498218829516539, "grad_norm": 10.589240847623222, "learning_rate": 9.996977016783402e-06, "loss": 0.3126, "step": 27010 }, { "epoch": 0.5500254452926209, "grad_norm": 7.594501306003159, "learning_rate": 9.996952261144987e-06, "loss": 0.3675, "step": 27020 }, { "epoch": 0.5502290076335878, "grad_norm": 5.488455888686041, "learning_rate": 9.996927404586688e-06, "loss": 0.2902, "step": 27030 }, { "epoch": 0.5504325699745547, "grad_norm": 3.505176418666095, "learning_rate": 9.996902447109e-06, "loss": 0.3456, "step": 27040 }, { "epoch": 0.5506361323155217, "grad_norm": 13.027736177421739, "learning_rate": 9.996877388712433e-06, "loss": 0.3305, "step": 27050 }, { "epoch": 0.5508396946564885, "grad_norm": 4.248613835850188, "learning_rate": 9.99685222939749e-06, "loss": 0.3191, "step": 27060 }, { "epoch": 0.5510432569974555, "grad_norm": 8.956974734902024, "learning_rate": 9.996826969164683e-06, "loss": 0.3085, "step": 27070 }, { "epoch": 0.5512468193384223, "grad_norm": 13.316245420027668, "learning_rate": 9.996801608014519e-06, "loss": 0.3151, "step": 27080 }, { "epoch": 0.5514503816793893, "grad_norm": 6.674619655036869, "learning_rate": 9.996776145947509e-06, "loss": 0.3477, "step": 27090 }, { "epoch": 0.5516539440203563, "grad_norm": 5.3112849384348415, "learning_rate": 9.99675058296417e-06, "loss": 0.2904, "step": 27100 }, { "epoch": 0.5518575063613231, "grad_norm": 8.693784891921561, "learning_rate": 9.996724919065015e-06, "loss": 0.2418, "step": 27110 }, { "epoch": 0.5520610687022901, "grad_norm": 19.38701740936757, "learning_rate": 9.996699154250567e-06, "loss": 0.3979, "step": 27120 }, { "epoch": 0.552264631043257, "grad_norm": 10.317435771220604, "learning_rate": 9.996673288521343e-06, "loss": 0.39, "step": 27130 }, { "epoch": 0.5524681933842239, "grad_norm": 7.535929079574432, "learning_rate": 9.996647321877865e-06, "loss": 0.3444, "step": 27140 }, { "epoch": 0.5526717557251909, "grad_norm": 11.442164982389553, "learning_rate": 9.996621254320662e-06, "loss": 0.2969, "step": 27150 }, { "epoch": 0.5528753180661578, "grad_norm": 9.591518736814997, "learning_rate": 9.996595085850255e-06, "loss": 0.4278, "step": 27160 }, { "epoch": 0.5530788804071247, "grad_norm": 7.796335478224229, "learning_rate": 9.996568816467175e-06, "loss": 0.1876, "step": 27170 }, { "epoch": 0.5532824427480916, "grad_norm": 19.07207635192141, "learning_rate": 9.996542446171952e-06, "loss": 0.2642, "step": 27180 }, { "epoch": 0.5534860050890585, "grad_norm": 20.826723066177053, "learning_rate": 9.996515974965118e-06, "loss": 0.3002, "step": 27190 }, { "epoch": 0.5536895674300254, "grad_norm": 11.026356817762553, "learning_rate": 9.996489402847211e-06, "loss": 0.3478, "step": 27200 }, { "epoch": 0.5538931297709924, "grad_norm": 6.8560944716355925, "learning_rate": 9.996462729818764e-06, "loss": 0.3619, "step": 27210 }, { "epoch": 0.5540966921119593, "grad_norm": 10.732530148291458, "learning_rate": 9.996435955880315e-06, "loss": 0.2886, "step": 27220 }, { "epoch": 0.5543002544529262, "grad_norm": 5.236255809945093, "learning_rate": 9.996409081032407e-06, "loss": 0.3301, "step": 27230 }, { "epoch": 0.5545038167938932, "grad_norm": 15.933514275862064, "learning_rate": 9.996382105275584e-06, "loss": 0.3275, "step": 27240 }, { "epoch": 0.55470737913486, "grad_norm": 12.70044526616985, "learning_rate": 9.996355028610388e-06, "loss": 0.3468, "step": 27250 }, { "epoch": 0.554910941475827, "grad_norm": 4.1923762558919355, "learning_rate": 9.996327851037367e-06, "loss": 0.2794, "step": 27260 }, { "epoch": 0.555114503816794, "grad_norm": 34.35862939164424, "learning_rate": 9.99630057255707e-06, "loss": 0.2703, "step": 27270 }, { "epoch": 0.5553180661577608, "grad_norm": 13.27601234401627, "learning_rate": 9.996273193170047e-06, "loss": 0.2824, "step": 27280 }, { "epoch": 0.5555216284987278, "grad_norm": 9.246262412956439, "learning_rate": 9.996245712876853e-06, "loss": 0.2789, "step": 27290 }, { "epoch": 0.5557251908396946, "grad_norm": 23.645725484116287, "learning_rate": 9.99621813167804e-06, "loss": 0.3866, "step": 27300 }, { "epoch": 0.5559287531806616, "grad_norm": 12.972341852148316, "learning_rate": 9.996190449574169e-06, "loss": 0.3088, "step": 27310 }, { "epoch": 0.5561323155216285, "grad_norm": 27.092752530770156, "learning_rate": 9.996162666565795e-06, "loss": 0.3824, "step": 27320 }, { "epoch": 0.5563358778625954, "grad_norm": 12.728302486962454, "learning_rate": 9.996134782653482e-06, "loss": 0.3499, "step": 27330 }, { "epoch": 0.5565394402035624, "grad_norm": 8.035233794110555, "learning_rate": 9.99610679783779e-06, "loss": 0.3083, "step": 27340 }, { "epoch": 0.5567430025445292, "grad_norm": 14.013991952012098, "learning_rate": 9.99607871211929e-06, "loss": 0.3592, "step": 27350 }, { "epoch": 0.5569465648854962, "grad_norm": 10.366851876589157, "learning_rate": 9.996050525498543e-06, "loss": 0.3083, "step": 27360 }, { "epoch": 0.5571501272264631, "grad_norm": 21.387372409599173, "learning_rate": 9.996022237976121e-06, "loss": 0.3536, "step": 27370 }, { "epoch": 0.55735368956743, "grad_norm": 23.974059784363547, "learning_rate": 9.995993849552596e-06, "loss": 0.3462, "step": 27380 }, { "epoch": 0.557557251908397, "grad_norm": 3.1883160224128724, "learning_rate": 9.995965360228538e-06, "loss": 0.2898, "step": 27390 }, { "epoch": 0.5577608142493639, "grad_norm": 10.404497053073303, "learning_rate": 9.995936770004527e-06, "loss": 0.3087, "step": 27400 }, { "epoch": 0.5579643765903308, "grad_norm": 6.260592368401059, "learning_rate": 9.995908078881138e-06, "loss": 0.2442, "step": 27410 }, { "epoch": 0.5581679389312977, "grad_norm": 11.608740632868761, "learning_rate": 9.99587928685895e-06, "loss": 0.2896, "step": 27420 }, { "epoch": 0.5583715012722646, "grad_norm": 4.34799190652117, "learning_rate": 9.995850393938546e-06, "loss": 0.2871, "step": 27430 }, { "epoch": 0.5585750636132315, "grad_norm": 7.141078050373307, "learning_rate": 9.99582140012051e-06, "loss": 0.3413, "step": 27440 }, { "epoch": 0.5587786259541985, "grad_norm": 11.816559534979918, "learning_rate": 9.995792305405423e-06, "loss": 0.4207, "step": 27450 }, { "epoch": 0.5589821882951654, "grad_norm": 11.027369483922374, "learning_rate": 9.995763109793877e-06, "loss": 0.3216, "step": 27460 }, { "epoch": 0.5591857506361323, "grad_norm": 7.660223825533956, "learning_rate": 9.995733813286462e-06, "loss": 0.3474, "step": 27470 }, { "epoch": 0.5593893129770993, "grad_norm": 12.627588529923763, "learning_rate": 9.995704415883767e-06, "loss": 0.2944, "step": 27480 }, { "epoch": 0.5595928753180661, "grad_norm": 1.2792056657099953, "learning_rate": 9.995674917586386e-06, "loss": 0.2218, "step": 27490 }, { "epoch": 0.5597964376590331, "grad_norm": 19.65481496331218, "learning_rate": 9.995645318394917e-06, "loss": 0.3679, "step": 27500 }, { "epoch": 0.56, "grad_norm": 9.286991265636638, "learning_rate": 9.995615618309956e-06, "loss": 0.2624, "step": 27510 }, { "epoch": 0.5602035623409669, "grad_norm": 8.295600729892795, "learning_rate": 9.995585817332103e-06, "loss": 0.3461, "step": 27520 }, { "epoch": 0.5604071246819339, "grad_norm": 8.902680903968609, "learning_rate": 9.99555591546196e-06, "loss": 0.2363, "step": 27530 }, { "epoch": 0.5606106870229007, "grad_norm": 11.287231416745806, "learning_rate": 9.995525912700133e-06, "loss": 0.333, "step": 27540 }, { "epoch": 0.5608142493638677, "grad_norm": 7.1262948094664775, "learning_rate": 9.995495809047224e-06, "loss": 0.2608, "step": 27550 }, { "epoch": 0.5610178117048346, "grad_norm": 10.369211992624658, "learning_rate": 9.995465604503842e-06, "loss": 0.3504, "step": 27560 }, { "epoch": 0.5612213740458015, "grad_norm": 8.409338402749562, "learning_rate": 9.995435299070601e-06, "loss": 0.3697, "step": 27570 }, { "epoch": 0.5614249363867685, "grad_norm": 17.681480352465805, "learning_rate": 9.995404892748108e-06, "loss": 0.3562, "step": 27580 }, { "epoch": 0.5616284987277353, "grad_norm": 6.363679370455624, "learning_rate": 9.99537438553698e-06, "loss": 0.3114, "step": 27590 }, { "epoch": 0.5618320610687023, "grad_norm": 5.612563613654637, "learning_rate": 9.995343777437832e-06, "loss": 0.3644, "step": 27600 }, { "epoch": 0.5620356234096692, "grad_norm": 8.21003738995888, "learning_rate": 9.995313068451282e-06, "loss": 0.4059, "step": 27610 }, { "epoch": 0.5622391857506361, "grad_norm": 16.740314563682453, "learning_rate": 9.995282258577952e-06, "loss": 0.2894, "step": 27620 }, { "epoch": 0.5624427480916031, "grad_norm": 15.971444917393008, "learning_rate": 9.995251347818462e-06, "loss": 0.4545, "step": 27630 }, { "epoch": 0.56264631043257, "grad_norm": 8.821170574701823, "learning_rate": 9.995220336173436e-06, "loss": 0.2558, "step": 27640 }, { "epoch": 0.5628498727735369, "grad_norm": 11.340437963290816, "learning_rate": 9.995189223643504e-06, "loss": 0.4128, "step": 27650 }, { "epoch": 0.5630534351145038, "grad_norm": 8.2848635318174, "learning_rate": 9.99515801022929e-06, "loss": 0.3667, "step": 27660 }, { "epoch": 0.5632569974554708, "grad_norm": 8.091042934547998, "learning_rate": 9.995126695931427e-06, "loss": 0.3327, "step": 27670 }, { "epoch": 0.5634605597964376, "grad_norm": 4.775948750805187, "learning_rate": 9.995095280750546e-06, "loss": 0.2807, "step": 27680 }, { "epoch": 0.5636641221374046, "grad_norm": 13.80408793174825, "learning_rate": 9.995063764687285e-06, "loss": 0.272, "step": 27690 }, { "epoch": 0.5638676844783715, "grad_norm": 3.5996014542782278, "learning_rate": 9.995032147742274e-06, "loss": 0.2871, "step": 27700 }, { "epoch": 0.5640712468193384, "grad_norm": 7.448212046435248, "learning_rate": 9.995000429916157e-06, "loss": 0.3118, "step": 27710 }, { "epoch": 0.5642748091603054, "grad_norm": 6.198192809047737, "learning_rate": 9.994968611209572e-06, "loss": 0.3178, "step": 27720 }, { "epoch": 0.5644783715012722, "grad_norm": 7.839812910970371, "learning_rate": 9.994936691623164e-06, "loss": 0.3262, "step": 27730 }, { "epoch": 0.5646819338422392, "grad_norm": 6.967594319394793, "learning_rate": 9.994904671157575e-06, "loss": 0.3066, "step": 27740 }, { "epoch": 0.5648854961832062, "grad_norm": 6.66502483148521, "learning_rate": 9.994872549813453e-06, "loss": 0.2513, "step": 27750 }, { "epoch": 0.565089058524173, "grad_norm": 17.83667098066532, "learning_rate": 9.994840327591447e-06, "loss": 0.3691, "step": 27760 }, { "epoch": 0.56529262086514, "grad_norm": 12.742627969191727, "learning_rate": 9.994808004492205e-06, "loss": 0.3081, "step": 27770 }, { "epoch": 0.5654961832061068, "grad_norm": 7.402443584944328, "learning_rate": 9.994775580516383e-06, "loss": 0.3639, "step": 27780 }, { "epoch": 0.5656997455470738, "grad_norm": 12.126502671679251, "learning_rate": 9.994743055664636e-06, "loss": 0.3295, "step": 27790 }, { "epoch": 0.5659033078880407, "grad_norm": 34.22402990419798, "learning_rate": 9.99471042993762e-06, "loss": 0.2666, "step": 27800 }, { "epoch": 0.5661068702290076, "grad_norm": 10.179390727642671, "learning_rate": 9.994677703335993e-06, "loss": 0.2867, "step": 27810 }, { "epoch": 0.5663104325699746, "grad_norm": 6.372129255331602, "learning_rate": 9.994644875860417e-06, "loss": 0.2572, "step": 27820 }, { "epoch": 0.5665139949109415, "grad_norm": 15.519137466950607, "learning_rate": 9.994611947511554e-06, "loss": 0.3889, "step": 27830 }, { "epoch": 0.5667175572519084, "grad_norm": 18.332648117058426, "learning_rate": 9.99457891829007e-06, "loss": 0.3978, "step": 27840 }, { "epoch": 0.5669211195928753, "grad_norm": 0.674393061782239, "learning_rate": 9.994545788196633e-06, "loss": 0.2246, "step": 27850 }, { "epoch": 0.5671246819338422, "grad_norm": 14.31161058587345, "learning_rate": 9.99451255723191e-06, "loss": 0.347, "step": 27860 }, { "epoch": 0.5673282442748092, "grad_norm": 11.184367441989052, "learning_rate": 9.994479225396572e-06, "loss": 0.2845, "step": 27870 }, { "epoch": 0.5675318066157761, "grad_norm": 11.489155666558275, "learning_rate": 9.994445792691295e-06, "loss": 0.2736, "step": 27880 }, { "epoch": 0.567735368956743, "grad_norm": 9.143878528950014, "learning_rate": 9.99441225911675e-06, "loss": 0.4017, "step": 27890 }, { "epoch": 0.5679389312977099, "grad_norm": 9.362641552125432, "learning_rate": 9.994378624673619e-06, "loss": 0.3759, "step": 27900 }, { "epoch": 0.5681424936386769, "grad_norm": 5.292210818967707, "learning_rate": 9.994344889362579e-06, "loss": 0.2759, "step": 27910 }, { "epoch": 0.5683460559796437, "grad_norm": 9.131463563087863, "learning_rate": 9.99431105318431e-06, "loss": 0.354, "step": 27920 }, { "epoch": 0.5685496183206107, "grad_norm": 5.715166646928032, "learning_rate": 9.994277116139497e-06, "loss": 0.3019, "step": 27930 }, { "epoch": 0.5687531806615777, "grad_norm": 12.386162328982575, "learning_rate": 9.994243078228825e-06, "loss": 0.3011, "step": 27940 }, { "epoch": 0.5689567430025445, "grad_norm": 12.517032497682063, "learning_rate": 9.994208939452982e-06, "loss": 0.3385, "step": 27950 }, { "epoch": 0.5691603053435115, "grad_norm": 13.190983743786362, "learning_rate": 9.994174699812655e-06, "loss": 0.4251, "step": 27960 }, { "epoch": 0.5693638676844783, "grad_norm": 12.488161462650892, "learning_rate": 9.99414035930854e-06, "loss": 0.3673, "step": 27970 }, { "epoch": 0.5695674300254453, "grad_norm": 4.8220091502237645, "learning_rate": 9.994105917941328e-06, "loss": 0.35, "step": 27980 }, { "epoch": 0.5697709923664123, "grad_norm": 9.327458832797237, "learning_rate": 9.994071375711713e-06, "loss": 0.3671, "step": 27990 }, { "epoch": 0.5699745547073791, "grad_norm": 5.286263172289298, "learning_rate": 9.994036732620395e-06, "loss": 0.3225, "step": 28000 }, { "epoch": 0.5701781170483461, "grad_norm": 3.9107861902684276, "learning_rate": 9.994001988668075e-06, "loss": 0.2975, "step": 28010 }, { "epoch": 0.570381679389313, "grad_norm": 14.427037496343946, "learning_rate": 9.993967143855449e-06, "loss": 0.3265, "step": 28020 }, { "epoch": 0.5705852417302799, "grad_norm": 6.175642084301568, "learning_rate": 9.993932198183225e-06, "loss": 0.2745, "step": 28030 }, { "epoch": 0.5707888040712468, "grad_norm": 6.534900058995748, "learning_rate": 9.993897151652109e-06, "loss": 0.3401, "step": 28040 }, { "epoch": 0.5709923664122137, "grad_norm": 5.614311954339361, "learning_rate": 9.993862004262809e-06, "loss": 0.2854, "step": 28050 }, { "epoch": 0.5711959287531807, "grad_norm": 8.031031712921848, "learning_rate": 9.993826756016032e-06, "loss": 0.2685, "step": 28060 }, { "epoch": 0.5713994910941476, "grad_norm": 17.906603391808098, "learning_rate": 9.993791406912491e-06, "loss": 0.297, "step": 28070 }, { "epoch": 0.5716030534351145, "grad_norm": 9.82595672513545, "learning_rate": 9.993755956952903e-06, "loss": 0.3047, "step": 28080 }, { "epoch": 0.5718066157760814, "grad_norm": 17.192815927408464, "learning_rate": 9.993720406137982e-06, "loss": 0.3145, "step": 28090 }, { "epoch": 0.5720101781170484, "grad_norm": 13.598133344004728, "learning_rate": 9.993684754468443e-06, "loss": 0.2872, "step": 28100 }, { "epoch": 0.5722137404580153, "grad_norm": 8.67337408130967, "learning_rate": 9.993649001945009e-06, "loss": 0.4405, "step": 28110 }, { "epoch": 0.5724173027989822, "grad_norm": 5.316885131603889, "learning_rate": 9.993613148568402e-06, "loss": 0.3202, "step": 28120 }, { "epoch": 0.5726208651399491, "grad_norm": 4.497601908009462, "learning_rate": 9.993577194339347e-06, "loss": 0.4216, "step": 28130 }, { "epoch": 0.572824427480916, "grad_norm": 9.383942763148069, "learning_rate": 9.993541139258568e-06, "loss": 0.3041, "step": 28140 }, { "epoch": 0.573027989821883, "grad_norm": 7.5418118270250005, "learning_rate": 9.993504983326794e-06, "loss": 0.3154, "step": 28150 }, { "epoch": 0.5732315521628498, "grad_norm": 6.643332563746182, "learning_rate": 9.993468726544754e-06, "loss": 0.3125, "step": 28160 }, { "epoch": 0.5734351145038168, "grad_norm": 5.387619145199882, "learning_rate": 9.993432368913183e-06, "loss": 0.278, "step": 28170 }, { "epoch": 0.5736386768447838, "grad_norm": 22.317508389356195, "learning_rate": 9.993395910432814e-06, "loss": 0.3478, "step": 28180 }, { "epoch": 0.5738422391857506, "grad_norm": 11.049425373886331, "learning_rate": 9.993359351104384e-06, "loss": 0.3022, "step": 28190 }, { "epoch": 0.5740458015267176, "grad_norm": 10.891068239412096, "learning_rate": 9.993322690928629e-06, "loss": 0.3952, "step": 28200 }, { "epoch": 0.5742493638676844, "grad_norm": 13.174210149378073, "learning_rate": 9.993285929906292e-06, "loss": 0.2978, "step": 28210 }, { "epoch": 0.5744529262086514, "grad_norm": 13.24993108048018, "learning_rate": 9.993249068038111e-06, "loss": 0.3371, "step": 28220 }, { "epoch": 0.5746564885496184, "grad_norm": 10.096561066090171, "learning_rate": 9.993212105324836e-06, "loss": 0.33, "step": 28230 }, { "epoch": 0.5748600508905852, "grad_norm": 13.263893787126477, "learning_rate": 9.993175041767214e-06, "loss": 0.2516, "step": 28240 }, { "epoch": 0.5750636132315522, "grad_norm": 12.779411247368111, "learning_rate": 9.993137877365987e-06, "loss": 0.3181, "step": 28250 }, { "epoch": 0.575267175572519, "grad_norm": 6.7154292330317515, "learning_rate": 9.993100612121911e-06, "loss": 0.2919, "step": 28260 }, { "epoch": 0.575470737913486, "grad_norm": 13.854522044152139, "learning_rate": 9.993063246035737e-06, "loss": 0.3144, "step": 28270 }, { "epoch": 0.5756743002544529, "grad_norm": 13.999414196197714, "learning_rate": 9.993025779108219e-06, "loss": 0.2992, "step": 28280 }, { "epoch": 0.5758778625954198, "grad_norm": 8.880352224382039, "learning_rate": 9.992988211340115e-06, "loss": 0.3656, "step": 28290 }, { "epoch": 0.5760814249363868, "grad_norm": 8.429002378214825, "learning_rate": 9.992950542732186e-06, "loss": 0.3002, "step": 28300 }, { "epoch": 0.5762849872773537, "grad_norm": 7.215677750491849, "learning_rate": 9.992912773285187e-06, "loss": 0.2959, "step": 28310 }, { "epoch": 0.5764885496183206, "grad_norm": 9.231998464755288, "learning_rate": 9.992874902999883e-06, "loss": 0.3472, "step": 28320 }, { "epoch": 0.5766921119592875, "grad_norm": 8.991779745672508, "learning_rate": 9.99283693187704e-06, "loss": 0.2897, "step": 28330 }, { "epoch": 0.5768956743002545, "grad_norm": 12.161885044372804, "learning_rate": 9.992798859917426e-06, "loss": 0.3671, "step": 28340 }, { "epoch": 0.5770992366412214, "grad_norm": 6.051226207624662, "learning_rate": 9.992760687121805e-06, "loss": 0.3101, "step": 28350 }, { "epoch": 0.5773027989821883, "grad_norm": 4.465146651230553, "learning_rate": 9.992722413490955e-06, "loss": 0.2894, "step": 28360 }, { "epoch": 0.5775063613231552, "grad_norm": 8.864252633691523, "learning_rate": 9.992684039025643e-06, "loss": 0.3389, "step": 28370 }, { "epoch": 0.5777099236641221, "grad_norm": 16.65662917456108, "learning_rate": 9.992645563726647e-06, "loss": 0.2967, "step": 28380 }, { "epoch": 0.5779134860050891, "grad_norm": 8.547175581293008, "learning_rate": 9.992606987594744e-06, "loss": 0.3214, "step": 28390 }, { "epoch": 0.5781170483460559, "grad_norm": 8.585542813734618, "learning_rate": 9.992568310630713e-06, "loss": 0.4013, "step": 28400 }, { "epoch": 0.5783206106870229, "grad_norm": 9.417166826503097, "learning_rate": 9.992529532835332e-06, "loss": 0.3998, "step": 28410 }, { "epoch": 0.5785241730279899, "grad_norm": 6.841942465841858, "learning_rate": 9.992490654209387e-06, "loss": 0.3315, "step": 28420 }, { "epoch": 0.5787277353689567, "grad_norm": 6.579429400827683, "learning_rate": 9.992451674753666e-06, "loss": 0.3282, "step": 28430 }, { "epoch": 0.5789312977099237, "grad_norm": 8.317002252670877, "learning_rate": 9.992412594468951e-06, "loss": 0.3348, "step": 28440 }, { "epoch": 0.5791348600508905, "grad_norm": 17.945537994563427, "learning_rate": 9.992373413356034e-06, "loss": 0.2966, "step": 28450 }, { "epoch": 0.5793384223918575, "grad_norm": 7.008223525179547, "learning_rate": 9.992334131415707e-06, "loss": 0.2598, "step": 28460 }, { "epoch": 0.5795419847328245, "grad_norm": 9.994968620653866, "learning_rate": 9.99229474864876e-06, "loss": 0.3529, "step": 28470 }, { "epoch": 0.5797455470737913, "grad_norm": 4.250647639685154, "learning_rate": 9.99225526505599e-06, "loss": 0.3664, "step": 28480 }, { "epoch": 0.5799491094147583, "grad_norm": 9.409557776777255, "learning_rate": 9.992215680638197e-06, "loss": 0.3097, "step": 28490 }, { "epoch": 0.5801526717557252, "grad_norm": 9.254011467453944, "learning_rate": 9.992175995396178e-06, "loss": 0.3154, "step": 28500 }, { "epoch": 0.5803562340966921, "grad_norm": 15.83766506803305, "learning_rate": 9.992136209330736e-06, "loss": 0.3059, "step": 28510 }, { "epoch": 0.580559796437659, "grad_norm": 11.694072692663191, "learning_rate": 9.992096322442671e-06, "loss": 0.3349, "step": 28520 }, { "epoch": 0.580763358778626, "grad_norm": 13.564581541250686, "learning_rate": 9.992056334732791e-06, "loss": 0.329, "step": 28530 }, { "epoch": 0.5809669211195929, "grad_norm": 24.258091866751926, "learning_rate": 9.992016246201906e-06, "loss": 0.321, "step": 28540 }, { "epoch": 0.5811704834605598, "grad_norm": 11.808212662579756, "learning_rate": 9.991976056850821e-06, "loss": 0.2343, "step": 28550 }, { "epoch": 0.5813740458015267, "grad_norm": 10.84063170809689, "learning_rate": 9.991935766680352e-06, "loss": 0.3548, "step": 28560 }, { "epoch": 0.5815776081424936, "grad_norm": 8.7864129714196, "learning_rate": 9.99189537569131e-06, "loss": 0.2852, "step": 28570 }, { "epoch": 0.5817811704834606, "grad_norm": 10.442915931856586, "learning_rate": 9.991854883884509e-06, "loss": 0.2936, "step": 28580 }, { "epoch": 0.5819847328244275, "grad_norm": 13.721934644483099, "learning_rate": 9.991814291260771e-06, "loss": 0.3094, "step": 28590 }, { "epoch": 0.5821882951653944, "grad_norm": 10.047822724364849, "learning_rate": 9.991773597820915e-06, "loss": 0.4016, "step": 28600 }, { "epoch": 0.5823918575063614, "grad_norm": 4.732042623623624, "learning_rate": 9.991732803565761e-06, "loss": 0.3465, "step": 28610 }, { "epoch": 0.5825954198473282, "grad_norm": 8.86645228283427, "learning_rate": 9.991691908496133e-06, "loss": 0.3095, "step": 28620 }, { "epoch": 0.5827989821882952, "grad_norm": 11.104454730857654, "learning_rate": 9.991650912612858e-06, "loss": 0.314, "step": 28630 }, { "epoch": 0.583002544529262, "grad_norm": 15.10693086886297, "learning_rate": 9.991609815916764e-06, "loss": 0.355, "step": 28640 }, { "epoch": 0.583206106870229, "grad_norm": 10.890451424797815, "learning_rate": 9.99156861840868e-06, "loss": 0.416, "step": 28650 }, { "epoch": 0.583409669211196, "grad_norm": 3.840162966903972, "learning_rate": 9.991527320089439e-06, "loss": 0.3792, "step": 28660 }, { "epoch": 0.5836132315521628, "grad_norm": 6.388972296746801, "learning_rate": 9.991485920959875e-06, "loss": 0.3453, "step": 28670 }, { "epoch": 0.5838167938931298, "grad_norm": 4.58103969096431, "learning_rate": 9.991444421020824e-06, "loss": 0.3092, "step": 28680 }, { "epoch": 0.5840203562340966, "grad_norm": 6.8171108243280125, "learning_rate": 9.991402820273124e-06, "loss": 0.2949, "step": 28690 }, { "epoch": 0.5842239185750636, "grad_norm": 11.137020621689048, "learning_rate": 9.991361118717614e-06, "loss": 0.2865, "step": 28700 }, { "epoch": 0.5844274809160306, "grad_norm": 9.811530836022822, "learning_rate": 9.99131931635514e-06, "loss": 0.2825, "step": 28710 }, { "epoch": 0.5846310432569974, "grad_norm": 14.635983453598515, "learning_rate": 9.99127741318654e-06, "loss": 0.2311, "step": 28720 }, { "epoch": 0.5848346055979644, "grad_norm": 5.967492502057878, "learning_rate": 9.991235409212666e-06, "loss": 0.3117, "step": 28730 }, { "epoch": 0.5850381679389313, "grad_norm": 14.129955624141191, "learning_rate": 9.991193304434364e-06, "loss": 0.3892, "step": 28740 }, { "epoch": 0.5852417302798982, "grad_norm": 15.520546678509124, "learning_rate": 9.991151098852485e-06, "loss": 0.2632, "step": 28750 }, { "epoch": 0.5854452926208651, "grad_norm": 13.383529665813482, "learning_rate": 9.991108792467881e-06, "loss": 0.3062, "step": 28760 }, { "epoch": 0.585648854961832, "grad_norm": 9.86969544333436, "learning_rate": 9.991066385281405e-06, "loss": 0.2921, "step": 28770 }, { "epoch": 0.585852417302799, "grad_norm": 10.32952571812138, "learning_rate": 9.991023877293917e-06, "loss": 0.2998, "step": 28780 }, { "epoch": 0.5860559796437659, "grad_norm": 13.131431708077768, "learning_rate": 9.99098126850627e-06, "loss": 0.3418, "step": 28790 }, { "epoch": 0.5862595419847328, "grad_norm": 8.151944186725302, "learning_rate": 9.990938558919332e-06, "loss": 0.3669, "step": 28800 }, { "epoch": 0.5864631043256997, "grad_norm": 13.712272570825535, "learning_rate": 9.99089574853396e-06, "loss": 0.2047, "step": 28810 }, { "epoch": 0.5866666666666667, "grad_norm": 10.374704777312632, "learning_rate": 9.99085283735102e-06, "loss": 0.3524, "step": 28820 }, { "epoch": 0.5868702290076336, "grad_norm": 6.035239813602374, "learning_rate": 9.990809825371378e-06, "loss": 0.2796, "step": 28830 }, { "epoch": 0.5870737913486005, "grad_norm": 11.188136450659803, "learning_rate": 9.990766712595906e-06, "loss": 0.2751, "step": 28840 }, { "epoch": 0.5872773536895675, "grad_norm": 10.371708151255268, "learning_rate": 9.990723499025468e-06, "loss": 0.285, "step": 28850 }, { "epoch": 0.5874809160305343, "grad_norm": 5.337584437259512, "learning_rate": 9.990680184660944e-06, "loss": 0.2586, "step": 28860 }, { "epoch": 0.5876844783715013, "grad_norm": 10.579859976539288, "learning_rate": 9.990636769503206e-06, "loss": 0.3107, "step": 28870 }, { "epoch": 0.5878880407124681, "grad_norm": 11.279458883230298, "learning_rate": 9.990593253553129e-06, "loss": 0.3133, "step": 28880 }, { "epoch": 0.5880916030534351, "grad_norm": 5.337160303487867, "learning_rate": 9.990549636811593e-06, "loss": 0.2524, "step": 28890 }, { "epoch": 0.5882951653944021, "grad_norm": 10.439136955514329, "learning_rate": 9.990505919279481e-06, "loss": 0.3606, "step": 28900 }, { "epoch": 0.5884987277353689, "grad_norm": 7.259311465875829, "learning_rate": 9.990462100957674e-06, "loss": 0.2765, "step": 28910 }, { "epoch": 0.5887022900763359, "grad_norm": 15.881529599917569, "learning_rate": 9.990418181847057e-06, "loss": 0.2996, "step": 28920 }, { "epoch": 0.5889058524173028, "grad_norm": 10.142713837992838, "learning_rate": 9.990374161948516e-06, "loss": 0.274, "step": 28930 }, { "epoch": 0.5891094147582697, "grad_norm": 6.442342681311342, "learning_rate": 9.990330041262943e-06, "loss": 0.2907, "step": 28940 }, { "epoch": 0.5893129770992367, "grad_norm": 6.594332981550736, "learning_rate": 9.990285819791226e-06, "loss": 0.3581, "step": 28950 }, { "epoch": 0.5895165394402035, "grad_norm": 7.1186829152173035, "learning_rate": 9.99024149753426e-06, "loss": 0.4268, "step": 28960 }, { "epoch": 0.5897201017811705, "grad_norm": 9.23182080682586, "learning_rate": 9.990197074492939e-06, "loss": 0.4294, "step": 28970 }, { "epoch": 0.5899236641221374, "grad_norm": 6.711694060531839, "learning_rate": 9.990152550668162e-06, "loss": 0.3458, "step": 28980 }, { "epoch": 0.5901272264631043, "grad_norm": 3.766983911418734, "learning_rate": 9.990107926060825e-06, "loss": 0.2846, "step": 28990 }, { "epoch": 0.5903307888040712, "grad_norm": 8.683086637151206, "learning_rate": 9.990063200671834e-06, "loss": 0.2919, "step": 29000 }, { "epoch": 0.5905343511450382, "grad_norm": 9.062354646656598, "learning_rate": 9.990018374502087e-06, "loss": 0.2372, "step": 29010 }, { "epoch": 0.5907379134860051, "grad_norm": 10.352036380538863, "learning_rate": 9.989973447552492e-06, "loss": 0.4096, "step": 29020 }, { "epoch": 0.590941475826972, "grad_norm": 9.90522781994592, "learning_rate": 9.989928419823958e-06, "loss": 0.3382, "step": 29030 }, { "epoch": 0.591145038167939, "grad_norm": 10.146260475865695, "learning_rate": 9.989883291317391e-06, "loss": 0.2579, "step": 29040 }, { "epoch": 0.5913486005089058, "grad_norm": 9.188433081048744, "learning_rate": 9.989838062033706e-06, "loss": 0.3519, "step": 29050 }, { "epoch": 0.5915521628498728, "grad_norm": 14.7442484057442, "learning_rate": 9.989792731973811e-06, "loss": 0.2933, "step": 29060 }, { "epoch": 0.5917557251908397, "grad_norm": 5.573426732504599, "learning_rate": 9.989747301138628e-06, "loss": 0.3418, "step": 29070 }, { "epoch": 0.5919592875318066, "grad_norm": 11.664865055611356, "learning_rate": 9.98970176952907e-06, "loss": 0.4253, "step": 29080 }, { "epoch": 0.5921628498727736, "grad_norm": 9.378208097999973, "learning_rate": 9.98965613714606e-06, "loss": 0.3006, "step": 29090 }, { "epoch": 0.5923664122137404, "grad_norm": 6.733482432235968, "learning_rate": 9.989610403990515e-06, "loss": 0.3051, "step": 29100 }, { "epoch": 0.5925699745547074, "grad_norm": 8.335682084542375, "learning_rate": 9.989564570063363e-06, "loss": 0.3233, "step": 29110 }, { "epoch": 0.5927735368956742, "grad_norm": 5.159147299307624, "learning_rate": 9.989518635365527e-06, "loss": 0.2738, "step": 29120 }, { "epoch": 0.5929770992366412, "grad_norm": 10.908233433810642, "learning_rate": 9.989472599897937e-06, "loss": 0.2916, "step": 29130 }, { "epoch": 0.5931806615776082, "grad_norm": 7.847338354118386, "learning_rate": 9.98942646366152e-06, "loss": 0.2752, "step": 29140 }, { "epoch": 0.593384223918575, "grad_norm": 3.4580077083063587, "learning_rate": 9.98938022665721e-06, "loss": 0.2555, "step": 29150 }, { "epoch": 0.593587786259542, "grad_norm": 16.159305762556865, "learning_rate": 9.989333888885941e-06, "loss": 0.2539, "step": 29160 }, { "epoch": 0.5937913486005089, "grad_norm": 15.149729399349788, "learning_rate": 9.989287450348647e-06, "loss": 0.4198, "step": 29170 }, { "epoch": 0.5939949109414758, "grad_norm": 14.279106882442084, "learning_rate": 9.989240911046266e-06, "loss": 0.354, "step": 29180 }, { "epoch": 0.5941984732824428, "grad_norm": 11.020858734101251, "learning_rate": 9.989194270979739e-06, "loss": 0.2796, "step": 29190 }, { "epoch": 0.5944020356234097, "grad_norm": 4.984958167762558, "learning_rate": 9.98914753015001e-06, "loss": 0.2287, "step": 29200 }, { "epoch": 0.5946055979643766, "grad_norm": 14.18701331714114, "learning_rate": 9.989100688558018e-06, "loss": 0.4106, "step": 29210 }, { "epoch": 0.5948091603053435, "grad_norm": 7.814748590281031, "learning_rate": 9.989053746204712e-06, "loss": 0.3266, "step": 29220 }, { "epoch": 0.5950127226463104, "grad_norm": 6.287169367400569, "learning_rate": 9.98900670309104e-06, "loss": 0.236, "step": 29230 }, { "epoch": 0.5952162849872773, "grad_norm": 11.065033760197363, "learning_rate": 9.988959559217952e-06, "loss": 0.2602, "step": 29240 }, { "epoch": 0.5954198473282443, "grad_norm": 19.824025924368527, "learning_rate": 9.988912314586402e-06, "loss": 0.2713, "step": 29250 }, { "epoch": 0.5956234096692112, "grad_norm": 14.802229415337056, "learning_rate": 9.988864969197338e-06, "loss": 0.3273, "step": 29260 }, { "epoch": 0.5958269720101781, "grad_norm": 19.62668127974908, "learning_rate": 9.988817523051723e-06, "loss": 0.293, "step": 29270 }, { "epoch": 0.5960305343511451, "grad_norm": 7.940318220279672, "learning_rate": 9.988769976150511e-06, "loss": 0.3295, "step": 29280 }, { "epoch": 0.5962340966921119, "grad_norm": 10.21257388436552, "learning_rate": 9.988722328494666e-06, "loss": 0.2901, "step": 29290 }, { "epoch": 0.5964376590330789, "grad_norm": 8.888028632580358, "learning_rate": 9.988674580085147e-06, "loss": 0.3387, "step": 29300 }, { "epoch": 0.5966412213740458, "grad_norm": 6.616948446166427, "learning_rate": 9.988626730922918e-06, "loss": 0.348, "step": 29310 }, { "epoch": 0.5968447837150127, "grad_norm": 10.745289843200519, "learning_rate": 9.98857878100895e-06, "loss": 0.3222, "step": 29320 }, { "epoch": 0.5970483460559797, "grad_norm": 7.534200160807014, "learning_rate": 9.988530730344206e-06, "loss": 0.3476, "step": 29330 }, { "epoch": 0.5972519083969465, "grad_norm": 16.381128489088606, "learning_rate": 9.988482578929659e-06, "loss": 0.3178, "step": 29340 }, { "epoch": 0.5974554707379135, "grad_norm": 12.70420157434454, "learning_rate": 9.988434326766282e-06, "loss": 0.2731, "step": 29350 }, { "epoch": 0.5976590330788804, "grad_norm": 7.261905176250559, "learning_rate": 9.988385973855048e-06, "loss": 0.3287, "step": 29360 }, { "epoch": 0.5978625954198473, "grad_norm": 9.65148396971482, "learning_rate": 9.988337520196936e-06, "loss": 0.596, "step": 29370 }, { "epoch": 0.5980661577608143, "grad_norm": 6.323764111037288, "learning_rate": 9.98828896579292e-06, "loss": 0.2197, "step": 29380 }, { "epoch": 0.5982697201017811, "grad_norm": 6.7249038330809165, "learning_rate": 9.988240310643985e-06, "loss": 0.3283, "step": 29390 }, { "epoch": 0.5984732824427481, "grad_norm": 10.866908342305537, "learning_rate": 9.988191554751112e-06, "loss": 0.283, "step": 29400 }, { "epoch": 0.598676844783715, "grad_norm": 12.994460356169443, "learning_rate": 9.988142698115287e-06, "loss": 0.3524, "step": 29410 }, { "epoch": 0.5988804071246819, "grad_norm": 8.097855382795448, "learning_rate": 9.988093740737493e-06, "loss": 0.386, "step": 29420 }, { "epoch": 0.5990839694656489, "grad_norm": 13.64871511035223, "learning_rate": 9.988044682618723e-06, "loss": 0.255, "step": 29430 }, { "epoch": 0.5992875318066158, "grad_norm": 17.851102586087155, "learning_rate": 9.987995523759966e-06, "loss": 0.2984, "step": 29440 }, { "epoch": 0.5994910941475827, "grad_norm": 5.599443532673127, "learning_rate": 9.987946264162214e-06, "loss": 0.408, "step": 29450 }, { "epoch": 0.5996946564885496, "grad_norm": 20.356184702968196, "learning_rate": 9.987896903826464e-06, "loss": 0.2688, "step": 29460 }, { "epoch": 0.5998982188295165, "grad_norm": 8.661496302706553, "learning_rate": 9.987847442753711e-06, "loss": 0.2367, "step": 29470 }, { "epoch": 0.6001017811704834, "grad_norm": 4.977573404956761, "learning_rate": 9.987797880944954e-06, "loss": 0.407, "step": 29480 }, { "epoch": 0.6003053435114504, "grad_norm": 10.75139769072193, "learning_rate": 9.987748218401197e-06, "loss": 0.2679, "step": 29490 }, { "epoch": 0.6005089058524173, "grad_norm": 24.536846788218792, "learning_rate": 9.987698455123437e-06, "loss": 0.3195, "step": 29500 }, { "epoch": 0.6007124681933842, "grad_norm": 10.523194211521599, "learning_rate": 9.987648591112685e-06, "loss": 0.3841, "step": 29510 }, { "epoch": 0.6009160305343512, "grad_norm": 7.976389678986524, "learning_rate": 9.987598626369945e-06, "loss": 0.3051, "step": 29520 }, { "epoch": 0.601119592875318, "grad_norm": 3.2272703566487775, "learning_rate": 9.987548560896226e-06, "loss": 0.2842, "step": 29530 }, { "epoch": 0.601323155216285, "grad_norm": 8.154146016567788, "learning_rate": 9.98749839469254e-06, "loss": 0.3499, "step": 29540 }, { "epoch": 0.601526717557252, "grad_norm": 3.434408068465831, "learning_rate": 9.987448127759902e-06, "loss": 0.3377, "step": 29550 }, { "epoch": 0.6017302798982188, "grad_norm": 5.050354590486427, "learning_rate": 9.987397760099324e-06, "loss": 0.274, "step": 29560 }, { "epoch": 0.6019338422391858, "grad_norm": 7.307160720977397, "learning_rate": 9.987347291711823e-06, "loss": 0.2572, "step": 29570 }, { "epoch": 0.6021374045801526, "grad_norm": 10.551133389480919, "learning_rate": 9.987296722598423e-06, "loss": 0.2414, "step": 29580 }, { "epoch": 0.6023409669211196, "grad_norm": 10.637093355090071, "learning_rate": 9.98724605276014e-06, "loss": 0.3351, "step": 29590 }, { "epoch": 0.6025445292620865, "grad_norm": 6.770409715841297, "learning_rate": 9.987195282198e-06, "loss": 0.3254, "step": 29600 }, { "epoch": 0.6027480916030534, "grad_norm": 8.478630729746335, "learning_rate": 9.987144410913028e-06, "loss": 0.3387, "step": 29610 }, { "epoch": 0.6029516539440204, "grad_norm": 11.161458121464737, "learning_rate": 9.98709343890625e-06, "loss": 0.3851, "step": 29620 }, { "epoch": 0.6031552162849873, "grad_norm": 6.683019578762853, "learning_rate": 9.987042366178699e-06, "loss": 0.3293, "step": 29630 }, { "epoch": 0.6033587786259542, "grad_norm": 3.2875044696747624, "learning_rate": 9.986991192731402e-06, "loss": 0.3256, "step": 29640 }, { "epoch": 0.6035623409669211, "grad_norm": 5.65100569887734, "learning_rate": 9.986939918565396e-06, "loss": 0.2978, "step": 29650 }, { "epoch": 0.603765903307888, "grad_norm": 12.621606027821073, "learning_rate": 9.986888543681714e-06, "loss": 0.2181, "step": 29660 }, { "epoch": 0.603969465648855, "grad_norm": 0.11322375990077888, "learning_rate": 9.986837068081395e-06, "loss": 0.2932, "step": 29670 }, { "epoch": 0.6041730279898219, "grad_norm": 7.352475568680053, "learning_rate": 9.986785491765479e-06, "loss": 0.3047, "step": 29680 }, { "epoch": 0.6043765903307888, "grad_norm": 2.907026968789622, "learning_rate": 9.986733814735007e-06, "loss": 0.2346, "step": 29690 }, { "epoch": 0.6045801526717557, "grad_norm": 5.12972084226861, "learning_rate": 9.986682036991023e-06, "loss": 0.2229, "step": 29700 }, { "epoch": 0.6047837150127227, "grad_norm": 12.088744579054547, "learning_rate": 9.986630158534572e-06, "loss": 0.3657, "step": 29710 }, { "epoch": 0.6049872773536895, "grad_norm": 4.413156245071853, "learning_rate": 9.986578179366701e-06, "loss": 0.2791, "step": 29720 }, { "epoch": 0.6051908396946565, "grad_norm": 12.02372368042438, "learning_rate": 9.986526099488463e-06, "loss": 0.4031, "step": 29730 }, { "epoch": 0.6053944020356234, "grad_norm": 4.357264341925328, "learning_rate": 9.986473918900907e-06, "loss": 0.3207, "step": 29740 }, { "epoch": 0.6055979643765903, "grad_norm": 10.143983127866232, "learning_rate": 9.986421637605088e-06, "loss": 0.3658, "step": 29750 }, { "epoch": 0.6058015267175573, "grad_norm": 7.684157260956869, "learning_rate": 9.986369255602061e-06, "loss": 0.2821, "step": 29760 }, { "epoch": 0.6060050890585241, "grad_norm": 7.096523974718735, "learning_rate": 9.986316772892885e-06, "loss": 0.3339, "step": 29770 }, { "epoch": 0.6062086513994911, "grad_norm": 5.905590034090211, "learning_rate": 9.98626418947862e-06, "loss": 0.2919, "step": 29780 }, { "epoch": 0.6064122137404581, "grad_norm": 8.464839317238471, "learning_rate": 9.986211505360325e-06, "loss": 0.2636, "step": 29790 }, { "epoch": 0.6066157760814249, "grad_norm": 2.6782745103867245, "learning_rate": 9.986158720539069e-06, "loss": 0.253, "step": 29800 }, { "epoch": 0.6068193384223919, "grad_norm": 27.155313860086093, "learning_rate": 9.986105835015916e-06, "loss": 0.2351, "step": 29810 }, { "epoch": 0.6070229007633587, "grad_norm": 9.535132542594306, "learning_rate": 9.986052848791931e-06, "loss": 0.3832, "step": 29820 }, { "epoch": 0.6072264631043257, "grad_norm": 6.975198730628698, "learning_rate": 9.985999761868189e-06, "loss": 0.3898, "step": 29830 }, { "epoch": 0.6074300254452927, "grad_norm": 4.668508909149552, "learning_rate": 9.985946574245759e-06, "loss": 0.3309, "step": 29840 }, { "epoch": 0.6076335877862595, "grad_norm": 14.827308493673709, "learning_rate": 9.985893285925717e-06, "loss": 0.3218, "step": 29850 }, { "epoch": 0.6078371501272265, "grad_norm": 3.403223330920845, "learning_rate": 9.985839896909136e-06, "loss": 0.2425, "step": 29860 }, { "epoch": 0.6080407124681934, "grad_norm": 4.388139337581769, "learning_rate": 9.985786407197098e-06, "loss": 0.2861, "step": 29870 }, { "epoch": 0.6082442748091603, "grad_norm": 15.316322763549765, "learning_rate": 9.985732816790682e-06, "loss": 0.3029, "step": 29880 }, { "epoch": 0.6084478371501272, "grad_norm": 11.426287247846936, "learning_rate": 9.985679125690971e-06, "loss": 0.2804, "step": 29890 }, { "epoch": 0.6086513994910941, "grad_norm": 10.73803034247028, "learning_rate": 9.985625333899046e-06, "loss": 0.2764, "step": 29900 }, { "epoch": 0.6088549618320611, "grad_norm": 8.907601805002693, "learning_rate": 9.985571441415998e-06, "loss": 0.3517, "step": 29910 }, { "epoch": 0.609058524173028, "grad_norm": 17.8723577867545, "learning_rate": 9.985517448242911e-06, "loss": 0.3636, "step": 29920 }, { "epoch": 0.6092620865139949, "grad_norm": 7.545905949692126, "learning_rate": 9.98546335438088e-06, "loss": 0.2915, "step": 29930 }, { "epoch": 0.6094656488549618, "grad_norm": 12.994503933052743, "learning_rate": 9.985409159830996e-06, "loss": 0.2774, "step": 29940 }, { "epoch": 0.6096692111959288, "grad_norm": 7.244515240363463, "learning_rate": 9.985354864594353e-06, "loss": 0.3146, "step": 29950 }, { "epoch": 0.6098727735368957, "grad_norm": 12.915864266747752, "learning_rate": 9.985300468672046e-06, "loss": 0.4085, "step": 29960 }, { "epoch": 0.6100763358778626, "grad_norm": 5.601893443280762, "learning_rate": 9.985245972065173e-06, "loss": 0.2612, "step": 29970 }, { "epoch": 0.6102798982188296, "grad_norm": 6.942976773273162, "learning_rate": 9.985191374774841e-06, "loss": 0.3544, "step": 29980 }, { "epoch": 0.6104834605597964, "grad_norm": 27.21639522021877, "learning_rate": 9.985136676802144e-06, "loss": 0.2605, "step": 29990 }, { "epoch": 0.6106870229007634, "grad_norm": 11.93592920472994, "learning_rate": 9.985081878148194e-06, "loss": 0.3203, "step": 30000 }, { "epoch": 0.6108905852417302, "grad_norm": 22.64647858319508, "learning_rate": 9.985026978814094e-06, "loss": 0.4689, "step": 30010 }, { "epoch": 0.6110941475826972, "grad_norm": 4.275923111241072, "learning_rate": 9.984971978800954e-06, "loss": 0.2062, "step": 30020 }, { "epoch": 0.6112977099236642, "grad_norm": 3.6536973197856537, "learning_rate": 9.984916878109883e-06, "loss": 0.3522, "step": 30030 }, { "epoch": 0.611501272264631, "grad_norm": 2.4749466679922567, "learning_rate": 9.984861676741998e-06, "loss": 0.3144, "step": 30040 }, { "epoch": 0.611704834605598, "grad_norm": 9.034409350160562, "learning_rate": 9.984806374698408e-06, "loss": 0.3635, "step": 30050 }, { "epoch": 0.6119083969465648, "grad_norm": 6.226665242308525, "learning_rate": 9.984750971980232e-06, "loss": 0.3716, "step": 30060 }, { "epoch": 0.6121119592875318, "grad_norm": 14.217747822707603, "learning_rate": 9.984695468588592e-06, "loss": 0.3089, "step": 30070 }, { "epoch": 0.6123155216284988, "grad_norm": 19.07214794661358, "learning_rate": 9.984639864524607e-06, "loss": 0.3391, "step": 30080 }, { "epoch": 0.6125190839694656, "grad_norm": 9.128141680372288, "learning_rate": 9.984584159789398e-06, "loss": 0.3562, "step": 30090 }, { "epoch": 0.6127226463104326, "grad_norm": 8.352316750860453, "learning_rate": 9.984528354384095e-06, "loss": 0.2536, "step": 30100 }, { "epoch": 0.6129262086513995, "grad_norm": 10.325615464111246, "learning_rate": 9.984472448309818e-06, "loss": 0.3449, "step": 30110 }, { "epoch": 0.6131297709923664, "grad_norm": 12.099972437808123, "learning_rate": 9.984416441567702e-06, "loss": 0.3553, "step": 30120 }, { "epoch": 0.6133333333333333, "grad_norm": 7.204648393018359, "learning_rate": 9.984360334158876e-06, "loss": 0.2927, "step": 30130 }, { "epoch": 0.6135368956743003, "grad_norm": 11.899333878101906, "learning_rate": 9.984304126084472e-06, "loss": 0.3636, "step": 30140 }, { "epoch": 0.6137404580152672, "grad_norm": 4.48720819821888, "learning_rate": 9.984247817345629e-06, "loss": 0.2541, "step": 30150 }, { "epoch": 0.6139440203562341, "grad_norm": 13.085324296343817, "learning_rate": 9.98419140794348e-06, "loss": 0.3254, "step": 30160 }, { "epoch": 0.614147582697201, "grad_norm": 11.120908327058643, "learning_rate": 9.984134897879166e-06, "loss": 0.3137, "step": 30170 }, { "epoch": 0.6143511450381679, "grad_norm": 7.851674872588839, "learning_rate": 9.984078287153828e-06, "loss": 0.3546, "step": 30180 }, { "epoch": 0.6145547073791349, "grad_norm": 14.061727062545588, "learning_rate": 9.98402157576861e-06, "loss": 0.2293, "step": 30190 }, { "epoch": 0.6147582697201018, "grad_norm": 7.110578079132875, "learning_rate": 9.983964763724656e-06, "loss": 0.3978, "step": 30200 }, { "epoch": 0.6149618320610687, "grad_norm": 59.008195857764335, "learning_rate": 9.983907851023116e-06, "loss": 0.3847, "step": 30210 }, { "epoch": 0.6151653944020357, "grad_norm": 12.052455042812097, "learning_rate": 9.983850837665137e-06, "loss": 0.2881, "step": 30220 }, { "epoch": 0.6153689567430025, "grad_norm": 4.150951942337495, "learning_rate": 9.983793723651872e-06, "loss": 0.3168, "step": 30230 }, { "epoch": 0.6155725190839695, "grad_norm": 5.4435755132393435, "learning_rate": 9.983736508984472e-06, "loss": 0.3041, "step": 30240 }, { "epoch": 0.6157760814249363, "grad_norm": 14.06326014373176, "learning_rate": 9.983679193664093e-06, "loss": 0.3028, "step": 30250 }, { "epoch": 0.6159796437659033, "grad_norm": 11.465926059635999, "learning_rate": 9.983621777691896e-06, "loss": 0.3738, "step": 30260 }, { "epoch": 0.6161832061068703, "grad_norm": 26.703986453526813, "learning_rate": 9.98356426106904e-06, "loss": 0.3095, "step": 30270 }, { "epoch": 0.6163867684478371, "grad_norm": 8.47198982763943, "learning_rate": 9.983506643796682e-06, "loss": 0.3165, "step": 30280 }, { "epoch": 0.6165903307888041, "grad_norm": 22.311482351498, "learning_rate": 9.98344892587599e-06, "loss": 0.3129, "step": 30290 }, { "epoch": 0.616793893129771, "grad_norm": 34.17571357482321, "learning_rate": 9.983391107308126e-06, "loss": 0.3064, "step": 30300 }, { "epoch": 0.6169974554707379, "grad_norm": 7.186394521138109, "learning_rate": 9.983333188094262e-06, "loss": 0.2531, "step": 30310 }, { "epoch": 0.6172010178117049, "grad_norm": 8.152572814985431, "learning_rate": 9.983275168235566e-06, "loss": 0.234, "step": 30320 }, { "epoch": 0.6174045801526717, "grad_norm": 10.076612573394785, "learning_rate": 9.98321704773321e-06, "loss": 0.2882, "step": 30330 }, { "epoch": 0.6176081424936387, "grad_norm": 16.956056411842578, "learning_rate": 9.983158826588367e-06, "loss": 0.2621, "step": 30340 }, { "epoch": 0.6178117048346056, "grad_norm": 8.449400835000214, "learning_rate": 9.983100504802212e-06, "loss": 0.2528, "step": 30350 }, { "epoch": 0.6180152671755725, "grad_norm": 6.5577799132663745, "learning_rate": 9.983042082375925e-06, "loss": 0.2641, "step": 30360 }, { "epoch": 0.6182188295165394, "grad_norm": 9.449971995172563, "learning_rate": 9.982983559310684e-06, "loss": 0.3087, "step": 30370 }, { "epoch": 0.6184223918575064, "grad_norm": 5.2611519405858225, "learning_rate": 9.982924935607673e-06, "loss": 0.3843, "step": 30380 }, { "epoch": 0.6186259541984733, "grad_norm": 7.525383248868187, "learning_rate": 9.982866211268073e-06, "loss": 0.3735, "step": 30390 }, { "epoch": 0.6188295165394402, "grad_norm": 9.150725783592982, "learning_rate": 9.982807386293077e-06, "loss": 0.3152, "step": 30400 }, { "epoch": 0.6190330788804072, "grad_norm": 5.658688070435756, "learning_rate": 9.982748460683864e-06, "loss": 0.2644, "step": 30410 }, { "epoch": 0.619236641221374, "grad_norm": 6.858352506739306, "learning_rate": 9.982689434441628e-06, "loss": 0.356, "step": 30420 }, { "epoch": 0.619440203562341, "grad_norm": 38.994548900117884, "learning_rate": 9.982630307567562e-06, "loss": 0.2138, "step": 30430 }, { "epoch": 0.6196437659033079, "grad_norm": 5.5564886960045445, "learning_rate": 9.98257108006286e-06, "loss": 0.3198, "step": 30440 }, { "epoch": 0.6198473282442748, "grad_norm": 11.788246123783969, "learning_rate": 9.982511751928718e-06, "loss": 0.2974, "step": 30450 }, { "epoch": 0.6200508905852418, "grad_norm": 8.70305178134698, "learning_rate": 9.982452323166332e-06, "loss": 0.1994, "step": 30460 }, { "epoch": 0.6202544529262086, "grad_norm": 11.794276305704479, "learning_rate": 9.982392793776905e-06, "loss": 0.2808, "step": 30470 }, { "epoch": 0.6204580152671756, "grad_norm": 12.996512052929523, "learning_rate": 9.982333163761638e-06, "loss": 0.4333, "step": 30480 }, { "epoch": 0.6206615776081424, "grad_norm": 11.132313550415446, "learning_rate": 9.982273433121736e-06, "loss": 0.3022, "step": 30490 }, { "epoch": 0.6208651399491094, "grad_norm": 5.627404382839865, "learning_rate": 9.982213601858403e-06, "loss": 0.29, "step": 30500 }, { "epoch": 0.6210687022900764, "grad_norm": 8.326931292819603, "learning_rate": 9.982153669972852e-06, "loss": 0.3334, "step": 30510 }, { "epoch": 0.6212722646310432, "grad_norm": 9.832162993179672, "learning_rate": 9.982093637466291e-06, "loss": 0.2751, "step": 30520 }, { "epoch": 0.6214758269720102, "grad_norm": 19.165104060864653, "learning_rate": 9.98203350433993e-06, "loss": 0.3958, "step": 30530 }, { "epoch": 0.6216793893129771, "grad_norm": 10.033356313563981, "learning_rate": 9.981973270594985e-06, "loss": 0.3253, "step": 30540 }, { "epoch": 0.621882951653944, "grad_norm": 10.46615835915853, "learning_rate": 9.981912936232674e-06, "loss": 0.3, "step": 30550 }, { "epoch": 0.622086513994911, "grad_norm": 5.7974554260640385, "learning_rate": 9.981852501254215e-06, "loss": 0.2997, "step": 30560 }, { "epoch": 0.6222900763358779, "grad_norm": 15.649949243477543, "learning_rate": 9.981791965660828e-06, "loss": 0.3673, "step": 30570 }, { "epoch": 0.6224936386768448, "grad_norm": 12.303929890952018, "learning_rate": 9.981731329453737e-06, "loss": 0.3432, "step": 30580 }, { "epoch": 0.6226972010178117, "grad_norm": 4.813762253503457, "learning_rate": 9.981670592634163e-06, "loss": 0.344, "step": 30590 }, { "epoch": 0.6229007633587786, "grad_norm": 5.890156018165763, "learning_rate": 9.981609755203336e-06, "loss": 0.3051, "step": 30600 }, { "epoch": 0.6231043256997455, "grad_norm": 8.887645791961104, "learning_rate": 9.981548817162483e-06, "loss": 0.3855, "step": 30610 }, { "epoch": 0.6233078880407125, "grad_norm": 7.986537373479875, "learning_rate": 9.981487778512837e-06, "loss": 0.3942, "step": 30620 }, { "epoch": 0.6235114503816794, "grad_norm": 36.36856212937505, "learning_rate": 9.98142663925563e-06, "loss": 0.2585, "step": 30630 }, { "epoch": 0.6237150127226463, "grad_norm": 6.839826782374806, "learning_rate": 9.981365399392093e-06, "loss": 0.3335, "step": 30640 }, { "epoch": 0.6239185750636133, "grad_norm": 15.447571884847129, "learning_rate": 9.981304058923467e-06, "loss": 0.3255, "step": 30650 }, { "epoch": 0.6241221374045801, "grad_norm": 6.732497464027775, "learning_rate": 9.98124261785099e-06, "loss": 0.3181, "step": 30660 }, { "epoch": 0.6243256997455471, "grad_norm": 8.715734246712918, "learning_rate": 9.981181076175904e-06, "loss": 0.283, "step": 30670 }, { "epoch": 0.624529262086514, "grad_norm": 11.321813192858071, "learning_rate": 9.98111943389945e-06, "loss": 0.3303, "step": 30680 }, { "epoch": 0.6247328244274809, "grad_norm": 12.79483005422245, "learning_rate": 9.98105769102287e-06, "loss": 0.2597, "step": 30690 }, { "epoch": 0.6249363867684479, "grad_norm": 5.572468884941287, "learning_rate": 9.980995847547418e-06, "loss": 0.3705, "step": 30700 }, { "epoch": 0.6251399491094147, "grad_norm": 5.896963162543664, "learning_rate": 9.980933903474337e-06, "loss": 0.3355, "step": 30710 }, { "epoch": 0.6253435114503817, "grad_norm": 6.717353533685782, "learning_rate": 9.980871858804881e-06, "loss": 0.3119, "step": 30720 }, { "epoch": 0.6255470737913486, "grad_norm": 6.771142811868008, "learning_rate": 9.980809713540305e-06, "loss": 0.3547, "step": 30730 }, { "epoch": 0.6257506361323155, "grad_norm": 5.481734381217271, "learning_rate": 9.98074746768186e-06, "loss": 0.2626, "step": 30740 }, { "epoch": 0.6259541984732825, "grad_norm": 6.918190793658424, "learning_rate": 9.980685121230803e-06, "loss": 0.3993, "step": 30750 }, { "epoch": 0.6261577608142493, "grad_norm": 5.970001050104251, "learning_rate": 9.980622674188396e-06, "loss": 0.3115, "step": 30760 }, { "epoch": 0.6263613231552163, "grad_norm": 13.622908986225465, "learning_rate": 9.9805601265559e-06, "loss": 0.2579, "step": 30770 }, { "epoch": 0.6265648854961832, "grad_norm": 7.271346951123264, "learning_rate": 9.980497478334576e-06, "loss": 0.294, "step": 30780 }, { "epoch": 0.6267684478371501, "grad_norm": 9.104959018412291, "learning_rate": 9.980434729525693e-06, "loss": 0.2782, "step": 30790 }, { "epoch": 0.6269720101781171, "grad_norm": 19.559464018674532, "learning_rate": 9.980371880130513e-06, "loss": 0.3417, "step": 30800 }, { "epoch": 0.627175572519084, "grad_norm": 9.788990331347915, "learning_rate": 9.98030893015031e-06, "loss": 0.3532, "step": 30810 }, { "epoch": 0.6273791348600509, "grad_norm": 6.580508637279997, "learning_rate": 9.980245879586352e-06, "loss": 0.3929, "step": 30820 }, { "epoch": 0.6275826972010178, "grad_norm": 4.713558467303411, "learning_rate": 9.980182728439916e-06, "loss": 0.3234, "step": 30830 }, { "epoch": 0.6277862595419847, "grad_norm": 12.705912319594303, "learning_rate": 9.980119476712274e-06, "loss": 0.3327, "step": 30840 }, { "epoch": 0.6279898218829516, "grad_norm": 12.997122900025804, "learning_rate": 9.980056124404704e-06, "loss": 0.3048, "step": 30850 }, { "epoch": 0.6281933842239186, "grad_norm": 9.748370334118164, "learning_rate": 9.979992671518489e-06, "loss": 0.3243, "step": 30860 }, { "epoch": 0.6283969465648855, "grad_norm": 9.559722728905971, "learning_rate": 9.979929118054905e-06, "loss": 0.3768, "step": 30870 }, { "epoch": 0.6286005089058524, "grad_norm": 5.33168787158943, "learning_rate": 9.97986546401524e-06, "loss": 0.3183, "step": 30880 }, { "epoch": 0.6288040712468194, "grad_norm": 3.795601767799698, "learning_rate": 9.979801709400778e-06, "loss": 0.3538, "step": 30890 }, { "epoch": 0.6290076335877862, "grad_norm": 10.168982300709928, "learning_rate": 9.979737854212805e-06, "loss": 0.3602, "step": 30900 }, { "epoch": 0.6292111959287532, "grad_norm": 7.759834384235235, "learning_rate": 9.979673898452614e-06, "loss": 0.3902, "step": 30910 }, { "epoch": 0.6294147582697202, "grad_norm": 12.964035895096687, "learning_rate": 9.979609842121494e-06, "loss": 0.2818, "step": 30920 }, { "epoch": 0.629618320610687, "grad_norm": 7.732248339901282, "learning_rate": 9.97954568522074e-06, "loss": 0.4098, "step": 30930 }, { "epoch": 0.629821882951654, "grad_norm": 5.240465280115115, "learning_rate": 9.979481427751646e-06, "loss": 0.3312, "step": 30940 }, { "epoch": 0.6300254452926208, "grad_norm": 2.0875907577341963, "learning_rate": 9.979417069715514e-06, "loss": 0.1973, "step": 30950 }, { "epoch": 0.6302290076335878, "grad_norm": 7.319188677823046, "learning_rate": 9.979352611113638e-06, "loss": 0.279, "step": 30960 }, { "epoch": 0.6304325699745547, "grad_norm": 14.003374382232609, "learning_rate": 9.979288051947323e-06, "loss": 0.4475, "step": 30970 }, { "epoch": 0.6306361323155216, "grad_norm": 6.016541171668757, "learning_rate": 9.979223392217873e-06, "loss": 0.3993, "step": 30980 }, { "epoch": 0.6308396946564886, "grad_norm": 5.905232268942691, "learning_rate": 9.979158631926594e-06, "loss": 0.2548, "step": 30990 }, { "epoch": 0.6310432569974554, "grad_norm": 2.6778822904288724, "learning_rate": 9.979093771074793e-06, "loss": 0.3172, "step": 31000 }, { "epoch": 0.6312468193384224, "grad_norm": 5.7740278309233055, "learning_rate": 9.97902880966378e-06, "loss": 0.3327, "step": 31010 }, { "epoch": 0.6314503816793893, "grad_norm": 10.485837610358175, "learning_rate": 9.978963747694867e-06, "loss": 0.3507, "step": 31020 }, { "epoch": 0.6316539440203562, "grad_norm": 8.661308603981487, "learning_rate": 9.978898585169367e-06, "loss": 0.2876, "step": 31030 }, { "epoch": 0.6318575063613232, "grad_norm": 6.669774836907936, "learning_rate": 9.978833322088598e-06, "loss": 0.2564, "step": 31040 }, { "epoch": 0.6320610687022901, "grad_norm": 7.169992137022164, "learning_rate": 9.97876795845388e-06, "loss": 0.3564, "step": 31050 }, { "epoch": 0.632264631043257, "grad_norm": 13.126621506070864, "learning_rate": 9.978702494266527e-06, "loss": 0.3535, "step": 31060 }, { "epoch": 0.6324681933842239, "grad_norm": 6.955657751273387, "learning_rate": 9.978636929527868e-06, "loss": 0.2713, "step": 31070 }, { "epoch": 0.6326717557251909, "grad_norm": 5.864506457184651, "learning_rate": 9.978571264239221e-06, "loss": 0.3456, "step": 31080 }, { "epoch": 0.6328753180661577, "grad_norm": 4.893145966503635, "learning_rate": 9.978505498401918e-06, "loss": 0.3141, "step": 31090 }, { "epoch": 0.6330788804071247, "grad_norm": 6.107238681448697, "learning_rate": 9.97843963201728e-06, "loss": 0.3407, "step": 31100 }, { "epoch": 0.6332824427480916, "grad_norm": 10.555729137459275, "learning_rate": 9.978373665086645e-06, "loss": 0.3745, "step": 31110 }, { "epoch": 0.6334860050890585, "grad_norm": 9.72849378667359, "learning_rate": 9.978307597611341e-06, "loss": 0.3426, "step": 31120 }, { "epoch": 0.6336895674300255, "grad_norm": 3.2235728544592517, "learning_rate": 9.978241429592701e-06, "loss": 0.3502, "step": 31130 }, { "epoch": 0.6338931297709923, "grad_norm": 4.370233217577965, "learning_rate": 9.978175161032065e-06, "loss": 0.2617, "step": 31140 }, { "epoch": 0.6340966921119593, "grad_norm": 9.338205595071527, "learning_rate": 9.97810879193077e-06, "loss": 0.2473, "step": 31150 }, { "epoch": 0.6343002544529263, "grad_norm": 17.9408094848306, "learning_rate": 9.978042322290156e-06, "loss": 0.3666, "step": 31160 }, { "epoch": 0.6345038167938931, "grad_norm": 9.249293393291728, "learning_rate": 9.977975752111565e-06, "loss": 0.2928, "step": 31170 }, { "epoch": 0.6347073791348601, "grad_norm": 12.1806417327135, "learning_rate": 9.977909081396342e-06, "loss": 0.3163, "step": 31180 }, { "epoch": 0.6349109414758269, "grad_norm": 9.518731263882062, "learning_rate": 9.977842310145836e-06, "loss": 0.352, "step": 31190 }, { "epoch": 0.6351145038167939, "grad_norm": 16.555392272481814, "learning_rate": 9.97777543836139e-06, "loss": 0.3941, "step": 31200 }, { "epoch": 0.6353180661577608, "grad_norm": 21.434397168095785, "learning_rate": 9.97770846604436e-06, "loss": 0.361, "step": 31210 }, { "epoch": 0.6355216284987277, "grad_norm": 14.326938864002214, "learning_rate": 9.977641393196094e-06, "loss": 0.2732, "step": 31220 }, { "epoch": 0.6357251908396947, "grad_norm": 16.310648335312216, "learning_rate": 9.97757421981795e-06, "loss": 0.2792, "step": 31230 }, { "epoch": 0.6359287531806616, "grad_norm": 18.29574708655947, "learning_rate": 9.977506945911284e-06, "loss": 0.3461, "step": 31240 }, { "epoch": 0.6361323155216285, "grad_norm": 5.896499525863289, "learning_rate": 9.977439571477455e-06, "loss": 0.2507, "step": 31250 }, { "epoch": 0.6363358778625954, "grad_norm": 6.760518612970228, "learning_rate": 9.977372096517821e-06, "loss": 0.2663, "step": 31260 }, { "epoch": 0.6365394402035623, "grad_norm": 8.077233539243187, "learning_rate": 9.977304521033748e-06, "loss": 0.3891, "step": 31270 }, { "epoch": 0.6367430025445293, "grad_norm": 13.654676256110564, "learning_rate": 9.977236845026599e-06, "loss": 0.3757, "step": 31280 }, { "epoch": 0.6369465648854962, "grad_norm": 12.139462666780458, "learning_rate": 9.97716906849774e-06, "loss": 0.3054, "step": 31290 }, { "epoch": 0.6371501272264631, "grad_norm": 10.166795998345235, "learning_rate": 9.977101191448544e-06, "loss": 0.271, "step": 31300 }, { "epoch": 0.63735368956743, "grad_norm": 13.364442123055264, "learning_rate": 9.977033213880378e-06, "loss": 0.3358, "step": 31310 }, { "epoch": 0.637557251908397, "grad_norm": 17.228807385310425, "learning_rate": 9.976965135794613e-06, "loss": 0.3947, "step": 31320 }, { "epoch": 0.6377608142493638, "grad_norm": 6.717923184110116, "learning_rate": 9.97689695719263e-06, "loss": 0.3141, "step": 31330 }, { "epoch": 0.6379643765903308, "grad_norm": 18.495036321745378, "learning_rate": 9.976828678075803e-06, "loss": 0.3501, "step": 31340 }, { "epoch": 0.6381679389312978, "grad_norm": 4.263380741986926, "learning_rate": 9.97676029844551e-06, "loss": 0.3068, "step": 31350 }, { "epoch": 0.6383715012722646, "grad_norm": 7.380974659645039, "learning_rate": 9.976691818303132e-06, "loss": 0.3723, "step": 31360 }, { "epoch": 0.6385750636132316, "grad_norm": 11.145243990226334, "learning_rate": 9.976623237650052e-06, "loss": 0.2862, "step": 31370 }, { "epoch": 0.6387786259541984, "grad_norm": 9.200000571255318, "learning_rate": 9.976554556487657e-06, "loss": 0.3374, "step": 31380 }, { "epoch": 0.6389821882951654, "grad_norm": 5.430855507894496, "learning_rate": 9.976485774817335e-06, "loss": 0.2605, "step": 31390 }, { "epoch": 0.6391857506361324, "grad_norm": 6.724537617459814, "learning_rate": 9.976416892640471e-06, "loss": 0.3, "step": 31400 }, { "epoch": 0.6393893129770992, "grad_norm": 16.2430897566785, "learning_rate": 9.976347909958457e-06, "loss": 0.3102, "step": 31410 }, { "epoch": 0.6395928753180662, "grad_norm": 7.109928995648941, "learning_rate": 9.97627882677269e-06, "loss": 0.2742, "step": 31420 }, { "epoch": 0.639796437659033, "grad_norm": 9.287519912650225, "learning_rate": 9.976209643084562e-06, "loss": 0.4136, "step": 31430 }, { "epoch": 0.64, "grad_norm": 24.090963025343186, "learning_rate": 9.97614035889547e-06, "loss": 0.4051, "step": 31440 }, { "epoch": 0.6402035623409669, "grad_norm": 8.169241103462333, "learning_rate": 9.976070974206813e-06, "loss": 0.3007, "step": 31450 }, { "epoch": 0.6404071246819338, "grad_norm": 10.0976455903023, "learning_rate": 9.976001489019998e-06, "loss": 0.3097, "step": 31460 }, { "epoch": 0.6406106870229008, "grad_norm": 6.798629483761851, "learning_rate": 9.97593190333642e-06, "loss": 0.3388, "step": 31470 }, { "epoch": 0.6408142493638677, "grad_norm": 17.973601029174894, "learning_rate": 9.975862217157489e-06, "loss": 0.4076, "step": 31480 }, { "epoch": 0.6410178117048346, "grad_norm": 3.5582379470926164, "learning_rate": 9.975792430484612e-06, "loss": 0.2234, "step": 31490 }, { "epoch": 0.6412213740458015, "grad_norm": 9.910343577018045, "learning_rate": 9.975722543319198e-06, "loss": 0.2703, "step": 31500 }, { "epoch": 0.6414249363867685, "grad_norm": 6.379439937162827, "learning_rate": 9.97565255566266e-06, "loss": 0.2631, "step": 31510 }, { "epoch": 0.6416284987277354, "grad_norm": 8.83334532361592, "learning_rate": 9.975582467516409e-06, "loss": 0.4549, "step": 31520 }, { "epoch": 0.6418320610687023, "grad_norm": 15.92134879566913, "learning_rate": 9.97551227888186e-06, "loss": 0.2753, "step": 31530 }, { "epoch": 0.6420356234096692, "grad_norm": 5.6441276410636885, "learning_rate": 9.975441989760435e-06, "loss": 0.2944, "step": 31540 }, { "epoch": 0.6422391857506361, "grad_norm": 4.2638496552476814, "learning_rate": 9.975371600153548e-06, "loss": 0.294, "step": 31550 }, { "epoch": 0.6424427480916031, "grad_norm": 7.906671715193284, "learning_rate": 9.975301110062625e-06, "loss": 0.3672, "step": 31560 }, { "epoch": 0.6426463104325699, "grad_norm": 9.729212905389176, "learning_rate": 9.975230519489087e-06, "loss": 0.2943, "step": 31570 }, { "epoch": 0.6428498727735369, "grad_norm": 8.708922676586482, "learning_rate": 9.975159828434362e-06, "loss": 0.213, "step": 31580 }, { "epoch": 0.6430534351145039, "grad_norm": 11.294851385863701, "learning_rate": 9.975089036899874e-06, "loss": 0.2858, "step": 31590 }, { "epoch": 0.6432569974554707, "grad_norm": 6.572320675254605, "learning_rate": 9.975018144887057e-06, "loss": 0.2406, "step": 31600 }, { "epoch": 0.6434605597964377, "grad_norm": 26.0994295720103, "learning_rate": 9.974947152397341e-06, "loss": 0.4131, "step": 31610 }, { "epoch": 0.6436641221374045, "grad_norm": 10.781463549673733, "learning_rate": 9.974876059432158e-06, "loss": 0.3877, "step": 31620 }, { "epoch": 0.6438676844783715, "grad_norm": 8.114862630648886, "learning_rate": 9.974804865992947e-06, "loss": 0.3193, "step": 31630 }, { "epoch": 0.6440712468193385, "grad_norm": 5.565099440478315, "learning_rate": 9.974733572081143e-06, "loss": 0.3863, "step": 31640 }, { "epoch": 0.6442748091603053, "grad_norm": 3.753207984426454, "learning_rate": 9.974662177698187e-06, "loss": 0.2761, "step": 31650 }, { "epoch": 0.6444783715012723, "grad_norm": 10.824477239603324, "learning_rate": 9.974590682845523e-06, "loss": 0.2895, "step": 31660 }, { "epoch": 0.6446819338422392, "grad_norm": 9.941230286995264, "learning_rate": 9.97451908752459e-06, "loss": 0.3086, "step": 31670 }, { "epoch": 0.6448854961832061, "grad_norm": 2.8769353875761094, "learning_rate": 9.974447391736838e-06, "loss": 0.3785, "step": 31680 }, { "epoch": 0.645089058524173, "grad_norm": 12.744226886723352, "learning_rate": 9.974375595483714e-06, "loss": 0.4057, "step": 31690 }, { "epoch": 0.6452926208651399, "grad_norm": 7.331277810235875, "learning_rate": 9.974303698766669e-06, "loss": 0.3291, "step": 31700 }, { "epoch": 0.6454961832061069, "grad_norm": 4.388002274535159, "learning_rate": 9.974231701587153e-06, "loss": 0.3257, "step": 31710 }, { "epoch": 0.6456997455470738, "grad_norm": 13.619178053095123, "learning_rate": 9.97415960394662e-06, "loss": 0.4196, "step": 31720 }, { "epoch": 0.6459033078880407, "grad_norm": 8.063384782897582, "learning_rate": 9.974087405846529e-06, "loss": 0.3446, "step": 31730 }, { "epoch": 0.6461068702290076, "grad_norm": 4.880413393541671, "learning_rate": 9.974015107288335e-06, "loss": 0.3297, "step": 31740 }, { "epoch": 0.6463104325699746, "grad_norm": 2.988181228992644, "learning_rate": 9.9739427082735e-06, "loss": 0.2815, "step": 31750 }, { "epoch": 0.6465139949109415, "grad_norm": 6.432504905604014, "learning_rate": 9.973870208803487e-06, "loss": 0.2667, "step": 31760 }, { "epoch": 0.6467175572519084, "grad_norm": 9.433215678237758, "learning_rate": 9.973797608879755e-06, "loss": 0.3123, "step": 31770 }, { "epoch": 0.6469211195928753, "grad_norm": 5.813791696293848, "learning_rate": 9.973724908503778e-06, "loss": 0.3215, "step": 31780 }, { "epoch": 0.6471246819338422, "grad_norm": 4.863658713211632, "learning_rate": 9.973652107677018e-06, "loss": 0.393, "step": 31790 }, { "epoch": 0.6473282442748092, "grad_norm": 10.608595240290734, "learning_rate": 9.97357920640095e-06, "loss": 0.3007, "step": 31800 }, { "epoch": 0.647531806615776, "grad_norm": 8.023453789084368, "learning_rate": 9.973506204677041e-06, "loss": 0.2873, "step": 31810 }, { "epoch": 0.647735368956743, "grad_norm": 6.901391091101113, "learning_rate": 9.97343310250677e-06, "loss": 0.3454, "step": 31820 }, { "epoch": 0.64793893129771, "grad_norm": 4.182687334099888, "learning_rate": 9.973359899891613e-06, "loss": 0.2217, "step": 31830 }, { "epoch": 0.6481424936386768, "grad_norm": 5.474666608333147, "learning_rate": 9.973286596833045e-06, "loss": 0.26, "step": 31840 }, { "epoch": 0.6483460559796438, "grad_norm": 4.729983323355626, "learning_rate": 9.97321319333255e-06, "loss": 0.3242, "step": 31850 }, { "epoch": 0.6485496183206106, "grad_norm": 6.3315985756372495, "learning_rate": 9.973139689391608e-06, "loss": 0.282, "step": 31860 }, { "epoch": 0.6487531806615776, "grad_norm": 8.717609043923055, "learning_rate": 9.973066085011704e-06, "loss": 0.3164, "step": 31870 }, { "epoch": 0.6489567430025446, "grad_norm": 9.869343009119486, "learning_rate": 9.972992380194327e-06, "loss": 0.2794, "step": 31880 }, { "epoch": 0.6491603053435114, "grad_norm": 9.249844408407615, "learning_rate": 9.972918574940963e-06, "loss": 0.2428, "step": 31890 }, { "epoch": 0.6493638676844784, "grad_norm": 4.374388998636297, "learning_rate": 9.972844669253103e-06, "loss": 0.2264, "step": 31900 }, { "epoch": 0.6495674300254453, "grad_norm": 6.995939065890242, "learning_rate": 9.97277066313224e-06, "loss": 0.3692, "step": 31910 }, { "epoch": 0.6497709923664122, "grad_norm": 7.656156298785338, "learning_rate": 9.972696556579869e-06, "loss": 0.3271, "step": 31920 }, { "epoch": 0.6499745547073791, "grad_norm": 8.881079129968823, "learning_rate": 9.972622349597487e-06, "loss": 0.3083, "step": 31930 }, { "epoch": 0.650178117048346, "grad_norm": 5.148494607776003, "learning_rate": 9.97254804218659e-06, "loss": 0.3434, "step": 31940 }, { "epoch": 0.650381679389313, "grad_norm": 11.464618241576213, "learning_rate": 9.972473634348682e-06, "loss": 0.3838, "step": 31950 }, { "epoch": 0.6505852417302799, "grad_norm": 16.378556914936492, "learning_rate": 9.972399126085264e-06, "loss": 0.3656, "step": 31960 }, { "epoch": 0.6507888040712468, "grad_norm": 18.806116821140524, "learning_rate": 9.972324517397843e-06, "loss": 0.3124, "step": 31970 }, { "epoch": 0.6509923664122137, "grad_norm": 4.183217466683692, "learning_rate": 9.972249808287922e-06, "loss": 0.2387, "step": 31980 }, { "epoch": 0.6511959287531807, "grad_norm": 9.429401221578706, "learning_rate": 9.972174998757012e-06, "loss": 0.4183, "step": 31990 }, { "epoch": 0.6513994910941476, "grad_norm": 14.64231308873197, "learning_rate": 9.972100088806624e-06, "loss": 0.3037, "step": 32000 }, { "epoch": 0.6516030534351145, "grad_norm": 8.210808212632793, "learning_rate": 9.972025078438272e-06, "loss": 0.3687, "step": 32010 }, { "epoch": 0.6518066157760815, "grad_norm": 7.8324790965939, "learning_rate": 9.971949967653467e-06, "loss": 0.368, "step": 32020 }, { "epoch": 0.6520101781170483, "grad_norm": 10.59635501436031, "learning_rate": 9.97187475645373e-06, "loss": 0.3035, "step": 32030 }, { "epoch": 0.6522137404580153, "grad_norm": 9.739733347998284, "learning_rate": 9.97179944484058e-06, "loss": 0.3052, "step": 32040 }, { "epoch": 0.6524173027989821, "grad_norm": 6.481526037142017, "learning_rate": 9.971724032815536e-06, "loss": 0.1922, "step": 32050 }, { "epoch": 0.6526208651399491, "grad_norm": 16.64919695545345, "learning_rate": 9.97164852038012e-06, "loss": 0.3103, "step": 32060 }, { "epoch": 0.6528244274809161, "grad_norm": 7.760159169653541, "learning_rate": 9.971572907535858e-06, "loss": 0.3857, "step": 32070 }, { "epoch": 0.6530279898218829, "grad_norm": 5.295034452269176, "learning_rate": 9.97149719428428e-06, "loss": 0.2404, "step": 32080 }, { "epoch": 0.6532315521628499, "grad_norm": 6.560239811347352, "learning_rate": 9.971421380626913e-06, "loss": 0.2872, "step": 32090 }, { "epoch": 0.6534351145038167, "grad_norm": 9.417450148011255, "learning_rate": 9.971345466565287e-06, "loss": 0.3475, "step": 32100 }, { "epoch": 0.6536386768447837, "grad_norm": 7.44419577482175, "learning_rate": 9.971269452100936e-06, "loss": 0.2906, "step": 32110 }, { "epoch": 0.6538422391857507, "grad_norm": 2.6269396918582784, "learning_rate": 9.971193337235397e-06, "loss": 0.3033, "step": 32120 }, { "epoch": 0.6540458015267175, "grad_norm": 20.474170899103687, "learning_rate": 9.971117121970203e-06, "loss": 0.4223, "step": 32130 }, { "epoch": 0.6542493638676845, "grad_norm": 10.016998250733867, "learning_rate": 9.9710408063069e-06, "loss": 0.3099, "step": 32140 }, { "epoch": 0.6544529262086514, "grad_norm": 8.524412712973689, "learning_rate": 9.970964390247022e-06, "loss": 0.3485, "step": 32150 }, { "epoch": 0.6546564885496183, "grad_norm": 11.953298453550998, "learning_rate": 9.970887873792116e-06, "loss": 0.3089, "step": 32160 }, { "epoch": 0.6548600508905852, "grad_norm": 5.633399858468917, "learning_rate": 9.970811256943729e-06, "loss": 0.3081, "step": 32170 }, { "epoch": 0.6550636132315522, "grad_norm": 10.565311255653004, "learning_rate": 9.970734539703404e-06, "loss": 0.3362, "step": 32180 }, { "epoch": 0.6552671755725191, "grad_norm": 5.05617430807495, "learning_rate": 9.970657722072694e-06, "loss": 0.2542, "step": 32190 }, { "epoch": 0.655470737913486, "grad_norm": 8.494390487558524, "learning_rate": 9.970580804053148e-06, "loss": 0.3337, "step": 32200 }, { "epoch": 0.655674300254453, "grad_norm": 6.675916537959827, "learning_rate": 9.970503785646322e-06, "loss": 0.3171, "step": 32210 }, { "epoch": 0.6558778625954198, "grad_norm": 8.183331617349591, "learning_rate": 9.970426666853769e-06, "loss": 0.2984, "step": 32220 }, { "epoch": 0.6560814249363868, "grad_norm": 7.866253283411712, "learning_rate": 9.970349447677047e-06, "loss": 0.326, "step": 32230 }, { "epoch": 0.6562849872773537, "grad_norm": 10.350351566068314, "learning_rate": 9.970272128117718e-06, "loss": 0.3421, "step": 32240 }, { "epoch": 0.6564885496183206, "grad_norm": 0.18767577248588896, "learning_rate": 9.97019470817734e-06, "loss": 0.2096, "step": 32250 }, { "epoch": 0.6566921119592876, "grad_norm": 8.67651243302861, "learning_rate": 9.97011718785748e-06, "loss": 0.3097, "step": 32260 }, { "epoch": 0.6568956743002544, "grad_norm": 4.597924333487834, "learning_rate": 9.970039567159702e-06, "loss": 0.3884, "step": 32270 }, { "epoch": 0.6570992366412214, "grad_norm": 19.146716297279106, "learning_rate": 9.969961846085572e-06, "loss": 0.2807, "step": 32280 }, { "epoch": 0.6573027989821882, "grad_norm": 11.682341076226905, "learning_rate": 9.969884024636662e-06, "loss": 0.3208, "step": 32290 }, { "epoch": 0.6575063613231552, "grad_norm": 12.484605616461552, "learning_rate": 9.969806102814542e-06, "loss": 0.2822, "step": 32300 }, { "epoch": 0.6577099236641222, "grad_norm": 15.184986876350445, "learning_rate": 9.969728080620788e-06, "loss": 0.3963, "step": 32310 }, { "epoch": 0.657913486005089, "grad_norm": 6.954879476738139, "learning_rate": 9.969649958056974e-06, "loss": 0.2531, "step": 32320 }, { "epoch": 0.658117048346056, "grad_norm": 11.477596303074423, "learning_rate": 9.969571735124678e-06, "loss": 0.2972, "step": 32330 }, { "epoch": 0.6583206106870229, "grad_norm": 17.524836085898897, "learning_rate": 9.96949341182548e-06, "loss": 0.3587, "step": 32340 }, { "epoch": 0.6585241730279898, "grad_norm": 8.464370689515711, "learning_rate": 9.969414988160962e-06, "loss": 0.3393, "step": 32350 }, { "epoch": 0.6587277353689568, "grad_norm": 8.15466032737003, "learning_rate": 9.969336464132708e-06, "loss": 0.2803, "step": 32360 }, { "epoch": 0.6589312977099236, "grad_norm": 23.292453241088968, "learning_rate": 9.969257839742303e-06, "loss": 0.3289, "step": 32370 }, { "epoch": 0.6591348600508906, "grad_norm": 8.588128326723433, "learning_rate": 9.969179114991338e-06, "loss": 0.3104, "step": 32380 }, { "epoch": 0.6593384223918575, "grad_norm": 18.514342046658225, "learning_rate": 9.969100289881398e-06, "loss": 0.3515, "step": 32390 }, { "epoch": 0.6595419847328244, "grad_norm": 10.547822343418124, "learning_rate": 9.969021364414077e-06, "loss": 0.3164, "step": 32400 }, { "epoch": 0.6597455470737913, "grad_norm": 3.9019949183442795, "learning_rate": 9.968942338590972e-06, "loss": 0.2533, "step": 32410 }, { "epoch": 0.6599491094147583, "grad_norm": 9.816875307257868, "learning_rate": 9.968863212413675e-06, "loss": 0.2985, "step": 32420 }, { "epoch": 0.6601526717557252, "grad_norm": 14.909048829607956, "learning_rate": 9.968783985883786e-06, "loss": 0.2628, "step": 32430 }, { "epoch": 0.6603562340966921, "grad_norm": 8.020473476179113, "learning_rate": 9.968704659002903e-06, "loss": 0.3712, "step": 32440 }, { "epoch": 0.660559796437659, "grad_norm": 7.277019112724053, "learning_rate": 9.968625231772632e-06, "loss": 0.3845, "step": 32450 }, { "epoch": 0.6607633587786259, "grad_norm": 29.70701229494267, "learning_rate": 9.968545704194574e-06, "loss": 0.4038, "step": 32460 }, { "epoch": 0.6609669211195929, "grad_norm": 13.10950066358131, "learning_rate": 9.968466076270336e-06, "loss": 0.3037, "step": 32470 }, { "epoch": 0.6611704834605598, "grad_norm": 8.94571584567027, "learning_rate": 9.968386348001526e-06, "loss": 0.3159, "step": 32480 }, { "epoch": 0.6613740458015267, "grad_norm": 21.999885232479254, "learning_rate": 9.968306519389757e-06, "loss": 0.3423, "step": 32490 }, { "epoch": 0.6615776081424937, "grad_norm": 9.859659214030692, "learning_rate": 9.968226590436637e-06, "loss": 0.2911, "step": 32500 }, { "epoch": 0.6617811704834605, "grad_norm": 9.335215253529729, "learning_rate": 9.96814656114378e-06, "loss": 0.3461, "step": 32510 }, { "epoch": 0.6619847328244275, "grad_norm": 13.917528455915983, "learning_rate": 9.968066431512806e-06, "loss": 0.3937, "step": 32520 }, { "epoch": 0.6621882951653943, "grad_norm": 5.568185478731658, "learning_rate": 9.96798620154533e-06, "loss": 0.2732, "step": 32530 }, { "epoch": 0.6623918575063613, "grad_norm": 8.188664204268123, "learning_rate": 9.967905871242977e-06, "loss": 0.3178, "step": 32540 }, { "epoch": 0.6625954198473283, "grad_norm": 7.573830770572348, "learning_rate": 9.967825440607364e-06, "loss": 0.2989, "step": 32550 }, { "epoch": 0.6627989821882951, "grad_norm": 5.7417042026835405, "learning_rate": 9.967744909640119e-06, "loss": 0.2651, "step": 32560 }, { "epoch": 0.6630025445292621, "grad_norm": 9.517586160454748, "learning_rate": 9.967664278342866e-06, "loss": 0.3288, "step": 32570 }, { "epoch": 0.663206106870229, "grad_norm": 10.217637658389686, "learning_rate": 9.967583546717236e-06, "loss": 0.3102, "step": 32580 }, { "epoch": 0.6634096692111959, "grad_norm": 22.921720721936378, "learning_rate": 9.967502714764856e-06, "loss": 0.2969, "step": 32590 }, { "epoch": 0.6636132315521629, "grad_norm": 6.54167387552212, "learning_rate": 9.967421782487363e-06, "loss": 0.3072, "step": 32600 }, { "epoch": 0.6638167938931298, "grad_norm": 7.861313836723483, "learning_rate": 9.967340749886388e-06, "loss": 0.3752, "step": 32610 }, { "epoch": 0.6640203562340967, "grad_norm": 4.430425652109748, "learning_rate": 9.967259616963567e-06, "loss": 0.2421, "step": 32620 }, { "epoch": 0.6642239185750636, "grad_norm": 4.303563067575692, "learning_rate": 9.967178383720543e-06, "loss": 0.2644, "step": 32630 }, { "epoch": 0.6644274809160305, "grad_norm": 8.923472072215423, "learning_rate": 9.967097050158953e-06, "loss": 0.3052, "step": 32640 }, { "epoch": 0.6646310432569974, "grad_norm": 12.267327610780308, "learning_rate": 9.96701561628044e-06, "loss": 0.4434, "step": 32650 }, { "epoch": 0.6648346055979644, "grad_norm": 5.969927874570747, "learning_rate": 9.96693408208665e-06, "loss": 0.2692, "step": 32660 }, { "epoch": 0.6650381679389313, "grad_norm": 5.083937510939492, "learning_rate": 9.966852447579228e-06, "loss": 0.2466, "step": 32670 }, { "epoch": 0.6652417302798982, "grad_norm": 3.837151777184038, "learning_rate": 9.966770712759823e-06, "loss": 0.268, "step": 32680 }, { "epoch": 0.6654452926208652, "grad_norm": 15.500537212890622, "learning_rate": 9.966688877630086e-06, "loss": 0.3137, "step": 32690 }, { "epoch": 0.665648854961832, "grad_norm": 5.914159469505285, "learning_rate": 9.966606942191673e-06, "loss": 0.3856, "step": 32700 }, { "epoch": 0.665852417302799, "grad_norm": 6.451642963123516, "learning_rate": 9.966524906446233e-06, "loss": 0.3237, "step": 32710 }, { "epoch": 0.666055979643766, "grad_norm": 9.607041175247794, "learning_rate": 9.966442770395427e-06, "loss": 0.1915, "step": 32720 }, { "epoch": 0.6662595419847328, "grad_norm": 18.881264610646145, "learning_rate": 9.966360534040913e-06, "loss": 0.2884, "step": 32730 }, { "epoch": 0.6664631043256998, "grad_norm": 6.663445193194422, "learning_rate": 9.96627819738435e-06, "loss": 0.3722, "step": 32740 }, { "epoch": 0.6666666666666666, "grad_norm": 6.95611319949749, "learning_rate": 9.966195760427402e-06, "loss": 0.3813, "step": 32750 }, { "epoch": 0.6668702290076336, "grad_norm": 9.612559132880438, "learning_rate": 9.966113223171736e-06, "loss": 0.4053, "step": 32760 }, { "epoch": 0.6670737913486006, "grad_norm": 4.081373516538464, "learning_rate": 9.966030585619016e-06, "loss": 0.3504, "step": 32770 }, { "epoch": 0.6672773536895674, "grad_norm": 10.110899126669105, "learning_rate": 9.965947847770912e-06, "loss": 0.2537, "step": 32780 }, { "epoch": 0.6674809160305344, "grad_norm": 8.80671184673712, "learning_rate": 9.965865009629096e-06, "loss": 0.3312, "step": 32790 }, { "epoch": 0.6676844783715012, "grad_norm": 4.920843854903835, "learning_rate": 9.96578207119524e-06, "loss": 0.2798, "step": 32800 }, { "epoch": 0.6678880407124682, "grad_norm": 6.642020440972727, "learning_rate": 9.965699032471017e-06, "loss": 0.321, "step": 32810 }, { "epoch": 0.6680916030534351, "grad_norm": 8.081340927725192, "learning_rate": 9.96561589345811e-06, "loss": 0.2786, "step": 32820 }, { "epoch": 0.668295165394402, "grad_norm": 8.125430692000238, "learning_rate": 9.965532654158193e-06, "loss": 0.3042, "step": 32830 }, { "epoch": 0.668498727735369, "grad_norm": 2.856366621408674, "learning_rate": 9.965449314572946e-06, "loss": 0.3793, "step": 32840 }, { "epoch": 0.6687022900763359, "grad_norm": 16.65295626884736, "learning_rate": 9.965365874704057e-06, "loss": 0.3154, "step": 32850 }, { "epoch": 0.6689058524173028, "grad_norm": 4.13684784074822, "learning_rate": 9.965282334553208e-06, "loss": 0.3184, "step": 32860 }, { "epoch": 0.6691094147582697, "grad_norm": 3.254568566760943, "learning_rate": 9.965198694122088e-06, "loss": 0.3886, "step": 32870 }, { "epoch": 0.6693129770992367, "grad_norm": 11.586279567236815, "learning_rate": 9.965114953412385e-06, "loss": 0.2942, "step": 32880 }, { "epoch": 0.6695165394402036, "grad_norm": 5.55601909657881, "learning_rate": 9.965031112425789e-06, "loss": 0.3756, "step": 32890 }, { "epoch": 0.6697201017811705, "grad_norm": 8.061880383504805, "learning_rate": 9.964947171163997e-06, "loss": 0.3133, "step": 32900 }, { "epoch": 0.6699236641221374, "grad_norm": 6.361868078231776, "learning_rate": 9.9648631296287e-06, "loss": 0.3254, "step": 32910 }, { "epoch": 0.6701272264631043, "grad_norm": 4.099221032546046, "learning_rate": 9.9647789878216e-06, "loss": 0.2386, "step": 32920 }, { "epoch": 0.6703307888040713, "grad_norm": 7.75771832938536, "learning_rate": 9.964694745744392e-06, "loss": 0.328, "step": 32930 }, { "epoch": 0.6705343511450381, "grad_norm": 12.111285595790338, "learning_rate": 9.964610403398777e-06, "loss": 0.2918, "step": 32940 }, { "epoch": 0.6707379134860051, "grad_norm": 4.615915627458313, "learning_rate": 9.964525960786464e-06, "loss": 0.2796, "step": 32950 }, { "epoch": 0.6709414758269721, "grad_norm": 5.828283267704951, "learning_rate": 9.964441417909154e-06, "loss": 0.4341, "step": 32960 }, { "epoch": 0.6711450381679389, "grad_norm": 6.858153436011166, "learning_rate": 9.964356774768555e-06, "loss": 0.281, "step": 32970 }, { "epoch": 0.6713486005089059, "grad_norm": 13.697609018182696, "learning_rate": 9.964272031366378e-06, "loss": 0.3124, "step": 32980 }, { "epoch": 0.6715521628498727, "grad_norm": 5.7265221640006985, "learning_rate": 9.964187187704332e-06, "loss": 0.3062, "step": 32990 }, { "epoch": 0.6717557251908397, "grad_norm": 13.04107658827338, "learning_rate": 9.964102243784131e-06, "loss": 0.3524, "step": 33000 }, { "epoch": 0.6719592875318067, "grad_norm": 16.003322895713513, "learning_rate": 9.964017199607495e-06, "loss": 0.2484, "step": 33010 }, { "epoch": 0.6721628498727735, "grad_norm": 9.243537224190131, "learning_rate": 9.963932055176135e-06, "loss": 0.259, "step": 33020 }, { "epoch": 0.6723664122137405, "grad_norm": 25.36316805533096, "learning_rate": 9.963846810491773e-06, "loss": 0.3716, "step": 33030 }, { "epoch": 0.6725699745547074, "grad_norm": 18.72135136087342, "learning_rate": 9.963761465556134e-06, "loss": 0.3585, "step": 33040 }, { "epoch": 0.6727735368956743, "grad_norm": 11.5779428794192, "learning_rate": 9.963676020370938e-06, "loss": 0.361, "step": 33050 }, { "epoch": 0.6729770992366412, "grad_norm": 9.830061894170482, "learning_rate": 9.963590474937913e-06, "loss": 0.3684, "step": 33060 }, { "epoch": 0.6731806615776081, "grad_norm": 10.661082338997938, "learning_rate": 9.963504829258782e-06, "loss": 0.2623, "step": 33070 }, { "epoch": 0.6733842239185751, "grad_norm": 10.025417726450437, "learning_rate": 9.96341908333528e-06, "loss": 0.3119, "step": 33080 }, { "epoch": 0.673587786259542, "grad_norm": 5.582458491204332, "learning_rate": 9.963333237169136e-06, "loss": 0.2981, "step": 33090 }, { "epoch": 0.6737913486005089, "grad_norm": 9.226824456949025, "learning_rate": 9.963247290762086e-06, "loss": 0.3257, "step": 33100 }, { "epoch": 0.6739949109414758, "grad_norm": 6.787203265570386, "learning_rate": 9.963161244115862e-06, "loss": 0.2503, "step": 33110 }, { "epoch": 0.6741984732824428, "grad_norm": 9.738203680173408, "learning_rate": 9.963075097232206e-06, "loss": 0.431, "step": 33120 }, { "epoch": 0.6744020356234097, "grad_norm": 13.077579569510856, "learning_rate": 9.962988850112855e-06, "loss": 0.3848, "step": 33130 }, { "epoch": 0.6746055979643766, "grad_norm": 7.265431914097796, "learning_rate": 9.962902502759553e-06, "loss": 0.3528, "step": 33140 }, { "epoch": 0.6748091603053435, "grad_norm": 8.345739339396827, "learning_rate": 9.962816055174043e-06, "loss": 0.3241, "step": 33150 }, { "epoch": 0.6750127226463104, "grad_norm": 3.2872566558987066, "learning_rate": 9.96272950735807e-06, "loss": 0.2526, "step": 33160 }, { "epoch": 0.6752162849872774, "grad_norm": 11.512150496612477, "learning_rate": 9.962642859313382e-06, "loss": 0.2987, "step": 33170 }, { "epoch": 0.6754198473282442, "grad_norm": 14.410992518734195, "learning_rate": 9.96255611104173e-06, "loss": 0.3455, "step": 33180 }, { "epoch": 0.6756234096692112, "grad_norm": 5.13848506523614, "learning_rate": 9.962469262544865e-06, "loss": 0.3197, "step": 33190 }, { "epoch": 0.6758269720101782, "grad_norm": 11.070136143023563, "learning_rate": 9.962382313824544e-06, "loss": 0.3851, "step": 33200 }, { "epoch": 0.676030534351145, "grad_norm": 9.411714154331756, "learning_rate": 9.962295264882518e-06, "loss": 0.2531, "step": 33210 }, { "epoch": 0.676234096692112, "grad_norm": 7.752575160703839, "learning_rate": 9.962208115720548e-06, "loss": 0.2676, "step": 33220 }, { "epoch": 0.6764376590330788, "grad_norm": 5.166594454032948, "learning_rate": 9.962120866340396e-06, "loss": 0.2291, "step": 33230 }, { "epoch": 0.6766412213740458, "grad_norm": 55.89028588769655, "learning_rate": 9.96203351674382e-06, "loss": 0.3241, "step": 33240 }, { "epoch": 0.6768447837150128, "grad_norm": 5.6272713366941804, "learning_rate": 9.961946066932587e-06, "loss": 0.325, "step": 33250 }, { "epoch": 0.6770483460559796, "grad_norm": 12.314126679810563, "learning_rate": 9.961858516908463e-06, "loss": 0.3069, "step": 33260 }, { "epoch": 0.6772519083969466, "grad_norm": 3.576194401867221, "learning_rate": 9.961770866673213e-06, "loss": 0.3672, "step": 33270 }, { "epoch": 0.6774554707379135, "grad_norm": 9.547340227753265, "learning_rate": 9.96168311622861e-06, "loss": 0.3163, "step": 33280 }, { "epoch": 0.6776590330788804, "grad_norm": 6.468402567818651, "learning_rate": 9.961595265576428e-06, "loss": 0.3005, "step": 33290 }, { "epoch": 0.6778625954198473, "grad_norm": 5.420547581361786, "learning_rate": 9.961507314718438e-06, "loss": 0.3543, "step": 33300 }, { "epoch": 0.6780661577608142, "grad_norm": 22.106976318555137, "learning_rate": 9.961419263656418e-06, "loss": 0.3348, "step": 33310 }, { "epoch": 0.6782697201017812, "grad_norm": 8.927788765478818, "learning_rate": 9.961331112392145e-06, "loss": 0.3547, "step": 33320 }, { "epoch": 0.6784732824427481, "grad_norm": 8.88034811670591, "learning_rate": 9.9612428609274e-06, "loss": 0.3025, "step": 33330 }, { "epoch": 0.678676844783715, "grad_norm": 13.2583058800451, "learning_rate": 9.961154509263966e-06, "loss": 0.468, "step": 33340 }, { "epoch": 0.6788804071246819, "grad_norm": 341.839785807824, "learning_rate": 9.961066057403625e-06, "loss": 0.2833, "step": 33350 }, { "epoch": 0.6790839694656489, "grad_norm": 12.082338208418308, "learning_rate": 9.960977505348168e-06, "loss": 0.3607, "step": 33360 }, { "epoch": 0.6792875318066158, "grad_norm": 14.624917816349134, "learning_rate": 9.960888853099379e-06, "loss": 0.3396, "step": 33370 }, { "epoch": 0.6794910941475827, "grad_norm": 9.808393949327558, "learning_rate": 9.960800100659049e-06, "loss": 0.3132, "step": 33380 }, { "epoch": 0.6796946564885497, "grad_norm": 5.96723508488222, "learning_rate": 9.960711248028973e-06, "loss": 0.4547, "step": 33390 }, { "epoch": 0.6798982188295165, "grad_norm": 12.910666521999257, "learning_rate": 9.960622295210944e-06, "loss": 0.2413, "step": 33400 }, { "epoch": 0.6801017811704835, "grad_norm": 10.468541668331588, "learning_rate": 9.960533242206758e-06, "loss": 0.319, "step": 33410 }, { "epoch": 0.6803053435114503, "grad_norm": 13.174657160713142, "learning_rate": 9.960444089018213e-06, "loss": 0.2623, "step": 33420 }, { "epoch": 0.6805089058524173, "grad_norm": 8.574773234002516, "learning_rate": 9.960354835647113e-06, "loss": 0.2626, "step": 33430 }, { "epoch": 0.6807124681933843, "grad_norm": 12.831150932908955, "learning_rate": 9.960265482095257e-06, "loss": 0.4234, "step": 33440 }, { "epoch": 0.6809160305343511, "grad_norm": 9.56026011623206, "learning_rate": 9.96017602836445e-06, "loss": 0.3194, "step": 33450 }, { "epoch": 0.6811195928753181, "grad_norm": 5.933414989929221, "learning_rate": 9.960086474456501e-06, "loss": 0.3415, "step": 33460 }, { "epoch": 0.681323155216285, "grad_norm": 7.774945049872774, "learning_rate": 9.959996820373217e-06, "loss": 0.3345, "step": 33470 }, { "epoch": 0.6815267175572519, "grad_norm": 8.84642787001665, "learning_rate": 9.959907066116407e-06, "loss": 0.369, "step": 33480 }, { "epoch": 0.6817302798982189, "grad_norm": 9.176400767975416, "learning_rate": 9.959817211687886e-06, "loss": 0.2854, "step": 33490 }, { "epoch": 0.6819338422391857, "grad_norm": 10.392795565545459, "learning_rate": 9.95972725708947e-06, "loss": 0.2698, "step": 33500 }, { "epoch": 0.6821374045801527, "grad_norm": 19.66427353362889, "learning_rate": 9.959637202322972e-06, "loss": 0.3217, "step": 33510 }, { "epoch": 0.6823409669211196, "grad_norm": 7.2049173012324665, "learning_rate": 9.959547047390215e-06, "loss": 0.3335, "step": 33520 }, { "epoch": 0.6825445292620865, "grad_norm": 17.714216091484154, "learning_rate": 9.959456792293016e-06, "loss": 0.3623, "step": 33530 }, { "epoch": 0.6827480916030534, "grad_norm": 7.761802355166234, "learning_rate": 9.959366437033197e-06, "loss": 0.3967, "step": 33540 }, { "epoch": 0.6829516539440204, "grad_norm": 10.068549717388814, "learning_rate": 9.959275981612589e-06, "loss": 0.3884, "step": 33550 }, { "epoch": 0.6831552162849873, "grad_norm": 11.69426469900877, "learning_rate": 9.959185426033014e-06, "loss": 0.3223, "step": 33560 }, { "epoch": 0.6833587786259542, "grad_norm": 7.5393471329238455, "learning_rate": 9.959094770296302e-06, "loss": 0.3231, "step": 33570 }, { "epoch": 0.6835623409669211, "grad_norm": 6.61746119917283, "learning_rate": 9.959004014404283e-06, "loss": 0.23, "step": 33580 }, { "epoch": 0.683765903307888, "grad_norm": 15.36198809046589, "learning_rate": 9.958913158358792e-06, "loss": 0.3536, "step": 33590 }, { "epoch": 0.683969465648855, "grad_norm": 7.151428527162702, "learning_rate": 9.95882220216166e-06, "loss": 0.3151, "step": 33600 }, { "epoch": 0.6841730279898219, "grad_norm": 10.738750151247906, "learning_rate": 9.95873114581473e-06, "loss": 0.3462, "step": 33610 }, { "epoch": 0.6843765903307888, "grad_norm": 10.264481363491445, "learning_rate": 9.958639989319835e-06, "loss": 0.2767, "step": 33620 }, { "epoch": 0.6845801526717558, "grad_norm": 5.895717397301154, "learning_rate": 9.958548732678821e-06, "loss": 0.3369, "step": 33630 }, { "epoch": 0.6847837150127226, "grad_norm": 12.711552515156477, "learning_rate": 9.958457375893529e-06, "loss": 0.2977, "step": 33640 }, { "epoch": 0.6849872773536896, "grad_norm": 7.209815940241719, "learning_rate": 9.958365918965803e-06, "loss": 0.2664, "step": 33650 }, { "epoch": 0.6851908396946564, "grad_norm": 17.346276810139933, "learning_rate": 9.958274361897488e-06, "loss": 0.4478, "step": 33660 }, { "epoch": 0.6853944020356234, "grad_norm": 6.197599096580078, "learning_rate": 9.958182704690441e-06, "loss": 0.3958, "step": 33670 }, { "epoch": 0.6855979643765904, "grad_norm": 8.517477570863003, "learning_rate": 9.958090947346505e-06, "loss": 0.2557, "step": 33680 }, { "epoch": 0.6858015267175572, "grad_norm": 1.9017450821206439, "learning_rate": 9.957999089867537e-06, "loss": 0.2619, "step": 33690 }, { "epoch": 0.6860050890585242, "grad_norm": 4.12346540151351, "learning_rate": 9.957907132255391e-06, "loss": 0.2677, "step": 33700 }, { "epoch": 0.686208651399491, "grad_norm": 6.272016127804434, "learning_rate": 9.957815074511927e-06, "loss": 0.3284, "step": 33710 }, { "epoch": 0.686412213740458, "grad_norm": 6.885112842082621, "learning_rate": 9.957722916639e-06, "loss": 0.2115, "step": 33720 }, { "epoch": 0.686615776081425, "grad_norm": 6.970315180411113, "learning_rate": 9.957630658638472e-06, "loss": 0.3115, "step": 33730 }, { "epoch": 0.6868193384223918, "grad_norm": 9.17208764271076, "learning_rate": 9.95753830051221e-06, "loss": 0.4197, "step": 33740 }, { "epoch": 0.6870229007633588, "grad_norm": 5.586325632902035, "learning_rate": 9.957445842262077e-06, "loss": 0.543, "step": 33750 }, { "epoch": 0.6872264631043257, "grad_norm": 11.340608416257693, "learning_rate": 9.957353283889938e-06, "loss": 0.3394, "step": 33760 }, { "epoch": 0.6874300254452926, "grad_norm": 9.045879156973042, "learning_rate": 9.957260625397667e-06, "loss": 0.3022, "step": 33770 }, { "epoch": 0.6876335877862595, "grad_norm": 6.271913840631403, "learning_rate": 9.95716786678713e-06, "loss": 0.3383, "step": 33780 }, { "epoch": 0.6878371501272265, "grad_norm": 6.051600673177307, "learning_rate": 9.957075008060204e-06, "loss": 0.2997, "step": 33790 }, { "epoch": 0.6880407124681934, "grad_norm": 9.71506855228422, "learning_rate": 9.956982049218764e-06, "loss": 0.244, "step": 33800 }, { "epoch": 0.6882442748091603, "grad_norm": 33.98066492590616, "learning_rate": 9.956888990264687e-06, "loss": 0.2993, "step": 33810 }, { "epoch": 0.6884478371501273, "grad_norm": 6.999162673278364, "learning_rate": 9.956795831199852e-06, "loss": 0.3292, "step": 33820 }, { "epoch": 0.6886513994910941, "grad_norm": 7.561155005693313, "learning_rate": 9.956702572026143e-06, "loss": 0.2745, "step": 33830 }, { "epoch": 0.6888549618320611, "grad_norm": 10.258537181976974, "learning_rate": 9.956609212745439e-06, "loss": 0.3198, "step": 33840 }, { "epoch": 0.689058524173028, "grad_norm": 10.573372188646664, "learning_rate": 9.956515753359627e-06, "loss": 0.2712, "step": 33850 }, { "epoch": 0.6892620865139949, "grad_norm": 5.494445344785882, "learning_rate": 9.956422193870597e-06, "loss": 0.3871, "step": 33860 }, { "epoch": 0.6894656488549619, "grad_norm": 2.334656870453514, "learning_rate": 9.956328534280238e-06, "loss": 0.3553, "step": 33870 }, { "epoch": 0.6896692111959287, "grad_norm": 9.267925737654618, "learning_rate": 9.956234774590439e-06, "loss": 0.3363, "step": 33880 }, { "epoch": 0.6898727735368957, "grad_norm": 8.66514258841906, "learning_rate": 9.956140914803093e-06, "loss": 0.2563, "step": 33890 }, { "epoch": 0.6900763358778625, "grad_norm": 12.857246698162548, "learning_rate": 9.956046954920103e-06, "loss": 0.2906, "step": 33900 }, { "epoch": 0.6902798982188295, "grad_norm": 4.11264946533385, "learning_rate": 9.955952894943356e-06, "loss": 0.3216, "step": 33910 }, { "epoch": 0.6904834605597965, "grad_norm": 12.884318150695856, "learning_rate": 9.955858734874761e-06, "loss": 0.3161, "step": 33920 }, { "epoch": 0.6906870229007633, "grad_norm": 9.526812103767147, "learning_rate": 9.955764474716214e-06, "loss": 0.3582, "step": 33930 }, { "epoch": 0.6908905852417303, "grad_norm": 10.727107858970873, "learning_rate": 9.955670114469621e-06, "loss": 0.3905, "step": 33940 }, { "epoch": 0.6910941475826972, "grad_norm": 5.440068518832389, "learning_rate": 9.955575654136887e-06, "loss": 0.224, "step": 33950 }, { "epoch": 0.6912977099236641, "grad_norm": 7.324568808692092, "learning_rate": 9.955481093719921e-06, "loss": 0.2517, "step": 33960 }, { "epoch": 0.6915012722646311, "grad_norm": 6.278790998422847, "learning_rate": 9.95538643322063e-06, "loss": 0.3305, "step": 33970 }, { "epoch": 0.691704834605598, "grad_norm": 4.680541983012006, "learning_rate": 9.955291672640928e-06, "loss": 0.3215, "step": 33980 }, { "epoch": 0.6919083969465649, "grad_norm": 10.043422013656595, "learning_rate": 9.955196811982729e-06, "loss": 0.3507, "step": 33990 }, { "epoch": 0.6921119592875318, "grad_norm": 8.448128119876273, "learning_rate": 9.955101851247948e-06, "loss": 0.2277, "step": 34000 }, { "epoch": 0.6923155216284987, "grad_norm": 6.704519523037601, "learning_rate": 9.955006790438502e-06, "loss": 0.4273, "step": 34010 }, { "epoch": 0.6925190839694656, "grad_norm": 4.87701282517015, "learning_rate": 9.954911629556312e-06, "loss": 0.2942, "step": 34020 }, { "epoch": 0.6927226463104326, "grad_norm": 6.072303726741752, "learning_rate": 9.954816368603301e-06, "loss": 0.3749, "step": 34030 }, { "epoch": 0.6929262086513995, "grad_norm": 8.235523490226338, "learning_rate": 9.954721007581391e-06, "loss": 0.2563, "step": 34040 }, { "epoch": 0.6931297709923664, "grad_norm": 7.502874629162316, "learning_rate": 9.95462554649251e-06, "loss": 0.261, "step": 34050 }, { "epoch": 0.6933333333333334, "grad_norm": 8.144145961532427, "learning_rate": 9.954529985338581e-06, "loss": 0.3916, "step": 34060 }, { "epoch": 0.6935368956743002, "grad_norm": 12.357505407203377, "learning_rate": 9.95443432412154e-06, "loss": 0.258, "step": 34070 }, { "epoch": 0.6937404580152672, "grad_norm": 5.323473478649191, "learning_rate": 9.954338562843317e-06, "loss": 0.2722, "step": 34080 }, { "epoch": 0.6939440203562341, "grad_norm": 7.407733306345096, "learning_rate": 9.954242701505845e-06, "loss": 0.4235, "step": 34090 }, { "epoch": 0.694147582697201, "grad_norm": 7.1407708569397, "learning_rate": 9.95414674011106e-06, "loss": 0.3919, "step": 34100 }, { "epoch": 0.694351145038168, "grad_norm": 6.94770242403363, "learning_rate": 9.9540506786609e-06, "loss": 0.3169, "step": 34110 }, { "epoch": 0.6945547073791348, "grad_norm": 18.862477295207018, "learning_rate": 9.953954517157307e-06, "loss": 0.3177, "step": 34120 }, { "epoch": 0.6947582697201018, "grad_norm": 3.73774876541692, "learning_rate": 9.953858255602222e-06, "loss": 0.3394, "step": 34130 }, { "epoch": 0.6949618320610687, "grad_norm": 9.555816285519056, "learning_rate": 9.953761893997588e-06, "loss": 0.3336, "step": 34140 }, { "epoch": 0.6951653944020356, "grad_norm": 4.302896181571982, "learning_rate": 9.953665432345354e-06, "loss": 0.3625, "step": 34150 }, { "epoch": 0.6953689567430026, "grad_norm": 4.847118898336821, "learning_rate": 9.953568870647465e-06, "loss": 0.3468, "step": 34160 }, { "epoch": 0.6955725190839694, "grad_norm": 9.63155972519885, "learning_rate": 9.953472208905873e-06, "loss": 0.2321, "step": 34170 }, { "epoch": 0.6957760814249364, "grad_norm": 16.74915144410753, "learning_rate": 9.953375447122532e-06, "loss": 0.3328, "step": 34180 }, { "epoch": 0.6959796437659033, "grad_norm": 6.765221037677187, "learning_rate": 9.95327858529939e-06, "loss": 0.3395, "step": 34190 }, { "epoch": 0.6961832061068702, "grad_norm": 6.580644127405783, "learning_rate": 9.953181623438409e-06, "loss": 0.2459, "step": 34200 }, { "epoch": 0.6963867684478372, "grad_norm": 35.60898304802803, "learning_rate": 9.953084561541544e-06, "loss": 0.4851, "step": 34210 }, { "epoch": 0.6965903307888041, "grad_norm": 5.611872592471468, "learning_rate": 9.952987399610758e-06, "loss": 0.3774, "step": 34220 }, { "epoch": 0.696793893129771, "grad_norm": 7.0589211211567635, "learning_rate": 9.952890137648013e-06, "loss": 0.3228, "step": 34230 }, { "epoch": 0.6969974554707379, "grad_norm": 3.8119651611799084, "learning_rate": 9.952792775655272e-06, "loss": 0.2986, "step": 34240 }, { "epoch": 0.6972010178117048, "grad_norm": 12.211044780122355, "learning_rate": 9.9526953136345e-06, "loss": 0.3607, "step": 34250 }, { "epoch": 0.6974045801526717, "grad_norm": 8.304732147060534, "learning_rate": 9.952597751587667e-06, "loss": 0.2922, "step": 34260 }, { "epoch": 0.6976081424936387, "grad_norm": 6.223916369681085, "learning_rate": 9.952500089516746e-06, "loss": 0.3316, "step": 34270 }, { "epoch": 0.6978117048346056, "grad_norm": 8.134213749579095, "learning_rate": 9.952402327423705e-06, "loss": 0.3417, "step": 34280 }, { "epoch": 0.6980152671755725, "grad_norm": 20.155885793322362, "learning_rate": 9.95230446531052e-06, "loss": 0.2788, "step": 34290 }, { "epoch": 0.6982188295165395, "grad_norm": 4.777181369574274, "learning_rate": 9.95220650317917e-06, "loss": 0.2977, "step": 34300 }, { "epoch": 0.6984223918575063, "grad_norm": 7.376039399633981, "learning_rate": 9.952108441031629e-06, "loss": 0.3106, "step": 34310 }, { "epoch": 0.6986259541984733, "grad_norm": 4.980595884687508, "learning_rate": 9.952010278869882e-06, "loss": 0.3849, "step": 34320 }, { "epoch": 0.6988295165394403, "grad_norm": 11.804732271257802, "learning_rate": 9.951912016695909e-06, "loss": 0.2707, "step": 34330 }, { "epoch": 0.6990330788804071, "grad_norm": 5.75661294492922, "learning_rate": 9.951813654511693e-06, "loss": 0.3532, "step": 34340 }, { "epoch": 0.6992366412213741, "grad_norm": 9.309366242955022, "learning_rate": 9.951715192319223e-06, "loss": 0.3447, "step": 34350 }, { "epoch": 0.6994402035623409, "grad_norm": 3.343089729528955, "learning_rate": 9.951616630120486e-06, "loss": 0.2507, "step": 34360 }, { "epoch": 0.6996437659033079, "grad_norm": 11.618671798705712, "learning_rate": 9.951517967917476e-06, "loss": 0.27, "step": 34370 }, { "epoch": 0.6998473282442748, "grad_norm": 8.145671667415002, "learning_rate": 9.951419205712181e-06, "loss": 0.326, "step": 34380 }, { "epoch": 0.7000508905852417, "grad_norm": 4.905472295282587, "learning_rate": 9.951320343506599e-06, "loss": 0.3185, "step": 34390 }, { "epoch": 0.7002544529262087, "grad_norm": 5.802118701359266, "learning_rate": 9.951221381302725e-06, "loss": 0.3155, "step": 34400 }, { "epoch": 0.7004580152671755, "grad_norm": 7.219987196660877, "learning_rate": 9.951122319102558e-06, "loss": 0.3597, "step": 34410 }, { "epoch": 0.7006615776081425, "grad_norm": 2.8281399425316582, "learning_rate": 9.9510231569081e-06, "loss": 0.2859, "step": 34420 }, { "epoch": 0.7008651399491094, "grad_norm": 9.546765712526708, "learning_rate": 9.95092389472135e-06, "loss": 0.2955, "step": 34430 }, { "epoch": 0.7010687022900763, "grad_norm": 30.855737413540425, "learning_rate": 9.950824532544317e-06, "loss": 0.3853, "step": 34440 }, { "epoch": 0.7012722646310433, "grad_norm": 18.52633299370407, "learning_rate": 9.950725070379007e-06, "loss": 0.2806, "step": 34450 }, { "epoch": 0.7014758269720102, "grad_norm": 15.020352047162202, "learning_rate": 9.950625508227425e-06, "loss": 0.2922, "step": 34460 }, { "epoch": 0.7016793893129771, "grad_norm": 6.7137735611720935, "learning_rate": 9.950525846091586e-06, "loss": 0.3435, "step": 34470 }, { "epoch": 0.701882951653944, "grad_norm": 6.286116062438395, "learning_rate": 9.950426083973501e-06, "loss": 0.2797, "step": 34480 }, { "epoch": 0.702086513994911, "grad_norm": 3.3902255091597375, "learning_rate": 9.950326221875184e-06, "loss": 0.2802, "step": 34490 }, { "epoch": 0.7022900763358778, "grad_norm": 15.503396624150335, "learning_rate": 9.950226259798654e-06, "loss": 0.2877, "step": 34500 }, { "epoch": 0.7024936386768448, "grad_norm": 9.15758449033445, "learning_rate": 9.95012619774593e-06, "loss": 0.309, "step": 34510 }, { "epoch": 0.7026972010178117, "grad_norm": 9.370206997121405, "learning_rate": 9.950026035719031e-06, "loss": 0.3337, "step": 34520 }, { "epoch": 0.7029007633587786, "grad_norm": 13.018042996288848, "learning_rate": 9.94992577371998e-06, "loss": 0.3811, "step": 34530 }, { "epoch": 0.7031043256997456, "grad_norm": 9.94931743520857, "learning_rate": 9.949825411750802e-06, "loss": 0.2629, "step": 34540 }, { "epoch": 0.7033078880407124, "grad_norm": 4.479210242403849, "learning_rate": 9.949724949813526e-06, "loss": 0.3543, "step": 34550 }, { "epoch": 0.7035114503816794, "grad_norm": 9.088100245211523, "learning_rate": 9.949624387910178e-06, "loss": 0.3226, "step": 34560 }, { "epoch": 0.7037150127226464, "grad_norm": 13.0857834555064, "learning_rate": 9.949523726042791e-06, "loss": 0.2529, "step": 34570 }, { "epoch": 0.7039185750636132, "grad_norm": 7.093021699762761, "learning_rate": 9.949422964213398e-06, "loss": 0.3113, "step": 34580 }, { "epoch": 0.7041221374045802, "grad_norm": 28.523198559015952, "learning_rate": 9.949322102424032e-06, "loss": 0.4713, "step": 34590 }, { "epoch": 0.704325699745547, "grad_norm": 7.550815194409901, "learning_rate": 9.949221140676735e-06, "loss": 0.2955, "step": 34600 }, { "epoch": 0.704529262086514, "grad_norm": 30.45689086807, "learning_rate": 9.94912007897354e-06, "loss": 0.2878, "step": 34610 }, { "epoch": 0.7047328244274809, "grad_norm": 15.408622401333352, "learning_rate": 9.94901891731649e-06, "loss": 0.2781, "step": 34620 }, { "epoch": 0.7049363867684478, "grad_norm": 6.914525129294526, "learning_rate": 9.948917655707629e-06, "loss": 0.3736, "step": 34630 }, { "epoch": 0.7051399491094148, "grad_norm": 6.322264918099636, "learning_rate": 9.948816294149002e-06, "loss": 0.3062, "step": 34640 }, { "epoch": 0.7053435114503817, "grad_norm": 9.741188140225832, "learning_rate": 9.948714832642657e-06, "loss": 0.331, "step": 34650 }, { "epoch": 0.7055470737913486, "grad_norm": 18.72140690130849, "learning_rate": 9.94861327119064e-06, "loss": 0.3327, "step": 34660 }, { "epoch": 0.7057506361323155, "grad_norm": 10.03285209742982, "learning_rate": 9.948511609795005e-06, "loss": 0.36, "step": 34670 }, { "epoch": 0.7059541984732824, "grad_norm": 10.108499144643037, "learning_rate": 9.948409848457805e-06, "loss": 0.3777, "step": 34680 }, { "epoch": 0.7061577608142494, "grad_norm": 3.239311690200959, "learning_rate": 9.948307987181094e-06, "loss": 0.3964, "step": 34690 }, { "epoch": 0.7063613231552163, "grad_norm": 3.758346627309522, "learning_rate": 9.94820602596693e-06, "loss": 0.3834, "step": 34700 }, { "epoch": 0.7065648854961832, "grad_norm": 9.598925888378126, "learning_rate": 9.948103964817373e-06, "loss": 0.3533, "step": 34710 }, { "epoch": 0.7067684478371501, "grad_norm": 10.448805301555293, "learning_rate": 9.948001803734481e-06, "loss": 0.2968, "step": 34720 }, { "epoch": 0.7069720101781171, "grad_norm": 9.533255175608144, "learning_rate": 9.947899542720322e-06, "loss": 0.3331, "step": 34730 }, { "epoch": 0.7071755725190839, "grad_norm": 18.711764276155463, "learning_rate": 9.947797181776958e-06, "loss": 0.3918, "step": 34740 }, { "epoch": 0.7073791348600509, "grad_norm": 29.447347272940075, "learning_rate": 9.947694720906458e-06, "loss": 0.3621, "step": 34750 }, { "epoch": 0.7075826972010179, "grad_norm": 11.41630410357721, "learning_rate": 9.947592160110888e-06, "loss": 0.3047, "step": 34760 }, { "epoch": 0.7077862595419847, "grad_norm": 8.247313151751614, "learning_rate": 9.947489499392324e-06, "loss": 0.2493, "step": 34770 }, { "epoch": 0.7079898218829517, "grad_norm": 5.1012919418783405, "learning_rate": 9.947386738752838e-06, "loss": 0.3306, "step": 34780 }, { "epoch": 0.7081933842239185, "grad_norm": 7.167182445538702, "learning_rate": 9.947283878194502e-06, "loss": 0.3283, "step": 34790 }, { "epoch": 0.7083969465648855, "grad_norm": 14.623489118322931, "learning_rate": 9.947180917719397e-06, "loss": 0.3654, "step": 34800 }, { "epoch": 0.7086005089058525, "grad_norm": 8.975019548387348, "learning_rate": 9.947077857329604e-06, "loss": 0.2792, "step": 34810 }, { "epoch": 0.7088040712468193, "grad_norm": 8.097633437362862, "learning_rate": 9.946974697027199e-06, "loss": 0.3465, "step": 34820 }, { "epoch": 0.7090076335877863, "grad_norm": 10.970176549211873, "learning_rate": 9.94687143681427e-06, "loss": 0.2949, "step": 34830 }, { "epoch": 0.7092111959287531, "grad_norm": 13.827334984612426, "learning_rate": 9.9467680766929e-06, "loss": 0.2516, "step": 34840 }, { "epoch": 0.7094147582697201, "grad_norm": 5.754060198064298, "learning_rate": 9.946664616665175e-06, "loss": 0.2358, "step": 34850 }, { "epoch": 0.709618320610687, "grad_norm": 5.710520279188427, "learning_rate": 9.94656105673319e-06, "loss": 0.3169, "step": 34860 }, { "epoch": 0.7098218829516539, "grad_norm": 16.533610400755986, "learning_rate": 9.946457396899032e-06, "loss": 0.2622, "step": 34870 }, { "epoch": 0.7100254452926209, "grad_norm": 9.676823845554713, "learning_rate": 9.946353637164797e-06, "loss": 0.2795, "step": 34880 }, { "epoch": 0.7102290076335878, "grad_norm": 6.362039892131455, "learning_rate": 9.946249777532578e-06, "loss": 0.2452, "step": 34890 }, { "epoch": 0.7104325699745547, "grad_norm": 17.742760195579457, "learning_rate": 9.946145818004475e-06, "loss": 0.3324, "step": 34900 }, { "epoch": 0.7106361323155216, "grad_norm": 11.46630695743399, "learning_rate": 9.946041758582587e-06, "loss": 0.3351, "step": 34910 }, { "epoch": 0.7108396946564886, "grad_norm": 20.17726187853993, "learning_rate": 9.945937599269014e-06, "loss": 0.3047, "step": 34920 }, { "epoch": 0.7110432569974555, "grad_norm": 6.400800558111722, "learning_rate": 9.945833340065862e-06, "loss": 0.2748, "step": 34930 }, { "epoch": 0.7112468193384224, "grad_norm": 9.777336392884857, "learning_rate": 9.945728980975235e-06, "loss": 0.2967, "step": 34940 }, { "epoch": 0.7114503816793893, "grad_norm": 11.34250992699146, "learning_rate": 9.94562452199924e-06, "loss": 0.3291, "step": 34950 }, { "epoch": 0.7116539440203562, "grad_norm": 8.847813855513644, "learning_rate": 9.945519963139989e-06, "loss": 0.4262, "step": 34960 }, { "epoch": 0.7118575063613232, "grad_norm": 3.125864834089372, "learning_rate": 9.945415304399593e-06, "loss": 0.3116, "step": 34970 }, { "epoch": 0.71206106870229, "grad_norm": 4.840043734949961, "learning_rate": 9.945310545780164e-06, "loss": 0.2774, "step": 34980 }, { "epoch": 0.712264631043257, "grad_norm": 12.452122932442013, "learning_rate": 9.94520568728382e-06, "loss": 0.2947, "step": 34990 }, { "epoch": 0.712468193384224, "grad_norm": 8.376446801001686, "learning_rate": 9.945100728912677e-06, "loss": 0.3328, "step": 35000 }, { "epoch": 0.7126717557251908, "grad_norm": 18.408052766982284, "learning_rate": 9.944995670668858e-06, "loss": 0.3399, "step": 35010 }, { "epoch": 0.7128753180661578, "grad_norm": 4.828782870985588, "learning_rate": 9.94489051255448e-06, "loss": 0.3467, "step": 35020 }, { "epoch": 0.7130788804071246, "grad_norm": 6.754905907290877, "learning_rate": 9.944785254571669e-06, "loss": 0.2914, "step": 35030 }, { "epoch": 0.7132824427480916, "grad_norm": 8.490785739358294, "learning_rate": 9.944679896722552e-06, "loss": 0.2803, "step": 35040 }, { "epoch": 0.7134860050890586, "grad_norm": 9.919096524658553, "learning_rate": 9.944574439009255e-06, "loss": 0.2104, "step": 35050 }, { "epoch": 0.7136895674300254, "grad_norm": 13.763659769025326, "learning_rate": 9.944468881433908e-06, "loss": 0.4681, "step": 35060 }, { "epoch": 0.7138931297709924, "grad_norm": 8.82308332357831, "learning_rate": 9.944363223998645e-06, "loss": 0.2393, "step": 35070 }, { "epoch": 0.7140966921119593, "grad_norm": 6.506379832238105, "learning_rate": 9.944257466705597e-06, "loss": 0.3499, "step": 35080 }, { "epoch": 0.7143002544529262, "grad_norm": 4.8951190415136505, "learning_rate": 9.944151609556903e-06, "loss": 0.2151, "step": 35090 }, { "epoch": 0.7145038167938931, "grad_norm": 8.770862844674134, "learning_rate": 9.944045652554699e-06, "loss": 0.3527, "step": 35100 }, { "epoch": 0.71470737913486, "grad_norm": 11.904281986456676, "learning_rate": 9.943939595701124e-06, "loss": 0.2982, "step": 35110 }, { "epoch": 0.714910941475827, "grad_norm": 11.027429670386889, "learning_rate": 9.943833438998323e-06, "loss": 0.3068, "step": 35120 }, { "epoch": 0.7151145038167939, "grad_norm": 27.69337889176831, "learning_rate": 9.943727182448435e-06, "loss": 0.3487, "step": 35130 }, { "epoch": 0.7153180661577608, "grad_norm": 5.761800463730807, "learning_rate": 9.943620826053612e-06, "loss": 0.3042, "step": 35140 }, { "epoch": 0.7155216284987277, "grad_norm": 9.436737232052803, "learning_rate": 9.943514369815998e-06, "loss": 0.2998, "step": 35150 }, { "epoch": 0.7157251908396947, "grad_norm": 6.519620292737373, "learning_rate": 9.943407813737743e-06, "loss": 0.2577, "step": 35160 }, { "epoch": 0.7159287531806616, "grad_norm": 4.828490070070882, "learning_rate": 9.943301157821001e-06, "loss": 0.321, "step": 35170 }, { "epoch": 0.7161323155216285, "grad_norm": 8.99765311823397, "learning_rate": 9.943194402067926e-06, "loss": 0.3344, "step": 35180 }, { "epoch": 0.7163358778625954, "grad_norm": 13.001003939905402, "learning_rate": 9.943087546480673e-06, "loss": 0.342, "step": 35190 }, { "epoch": 0.7165394402035623, "grad_norm": 3.884766587954656, "learning_rate": 9.9429805910614e-06, "loss": 0.2912, "step": 35200 }, { "epoch": 0.7167430025445293, "grad_norm": 24.4979984802844, "learning_rate": 9.942873535812267e-06, "loss": 0.2686, "step": 35210 }, { "epoch": 0.7169465648854961, "grad_norm": 10.071776626850687, "learning_rate": 9.942766380735438e-06, "loss": 0.3968, "step": 35220 }, { "epoch": 0.7171501272264631, "grad_norm": 13.124799279919987, "learning_rate": 9.942659125833073e-06, "loss": 0.3531, "step": 35230 }, { "epoch": 0.7173536895674301, "grad_norm": 6.846457221727596, "learning_rate": 9.942551771107344e-06, "loss": 0.3201, "step": 35240 }, { "epoch": 0.7175572519083969, "grad_norm": 9.345572761540614, "learning_rate": 9.942444316560414e-06, "loss": 0.2692, "step": 35250 }, { "epoch": 0.7177608142493639, "grad_norm": 14.833824179039828, "learning_rate": 9.942336762194456e-06, "loss": 0.3124, "step": 35260 }, { "epoch": 0.7179643765903307, "grad_norm": 10.431372028942313, "learning_rate": 9.942229108011641e-06, "loss": 0.2797, "step": 35270 }, { "epoch": 0.7181679389312977, "grad_norm": 7.9696867198117864, "learning_rate": 9.942121354014144e-06, "loss": 0.3996, "step": 35280 }, { "epoch": 0.7183715012722647, "grad_norm": 29.85240650181453, "learning_rate": 9.942013500204141e-06, "loss": 0.3618, "step": 35290 }, { "epoch": 0.7185750636132315, "grad_norm": 7.880178042502139, "learning_rate": 9.94190554658381e-06, "loss": 0.4026, "step": 35300 }, { "epoch": 0.7187786259541985, "grad_norm": 5.220213319611779, "learning_rate": 9.94179749315533e-06, "loss": 0.2388, "step": 35310 }, { "epoch": 0.7189821882951654, "grad_norm": 7.711963542277313, "learning_rate": 9.941689339920886e-06, "loss": 0.2777, "step": 35320 }, { "epoch": 0.7191857506361323, "grad_norm": 4.8633623984223115, "learning_rate": 9.941581086882661e-06, "loss": 0.2963, "step": 35330 }, { "epoch": 0.7193893129770992, "grad_norm": 8.009862848273753, "learning_rate": 9.941472734042841e-06, "loss": 0.4144, "step": 35340 }, { "epoch": 0.7195928753180661, "grad_norm": 4.82528754672046, "learning_rate": 9.941364281403614e-06, "loss": 0.2328, "step": 35350 }, { "epoch": 0.7197964376590331, "grad_norm": 4.042825084877374, "learning_rate": 9.941255728967172e-06, "loss": 0.317, "step": 35360 }, { "epoch": 0.72, "grad_norm": 5.434193697927413, "learning_rate": 9.941147076735707e-06, "loss": 0.3067, "step": 35370 }, { "epoch": 0.7202035623409669, "grad_norm": 8.654962183219393, "learning_rate": 9.94103832471141e-06, "loss": 0.2576, "step": 35380 }, { "epoch": 0.7204071246819338, "grad_norm": 10.648412231167704, "learning_rate": 9.940929472896481e-06, "loss": 0.3005, "step": 35390 }, { "epoch": 0.7206106870229008, "grad_norm": 5.839308819313152, "learning_rate": 9.940820521293118e-06, "loss": 0.3106, "step": 35400 }, { "epoch": 0.7208142493638677, "grad_norm": 4.511828657914898, "learning_rate": 9.940711469903521e-06, "loss": 0.2965, "step": 35410 }, { "epoch": 0.7210178117048346, "grad_norm": 1.7942729982704413, "learning_rate": 9.940602318729892e-06, "loss": 0.334, "step": 35420 }, { "epoch": 0.7212213740458016, "grad_norm": 9.559827288489851, "learning_rate": 9.940493067774435e-06, "loss": 0.2656, "step": 35430 }, { "epoch": 0.7214249363867684, "grad_norm": 11.3646593520481, "learning_rate": 9.940383717039359e-06, "loss": 0.3265, "step": 35440 }, { "epoch": 0.7216284987277354, "grad_norm": 9.786656303955814, "learning_rate": 9.940274266526868e-06, "loss": 0.3237, "step": 35450 }, { "epoch": 0.7218320610687022, "grad_norm": 5.373595646122553, "learning_rate": 9.940164716239178e-06, "loss": 0.445, "step": 35460 }, { "epoch": 0.7220356234096692, "grad_norm": 1.6953574901232271, "learning_rate": 9.940055066178496e-06, "loss": 0.2417, "step": 35470 }, { "epoch": 0.7222391857506362, "grad_norm": 6.624037747238679, "learning_rate": 9.939945316347042e-06, "loss": 0.2895, "step": 35480 }, { "epoch": 0.722442748091603, "grad_norm": 19.14676552502193, "learning_rate": 9.939835466747028e-06, "loss": 0.4401, "step": 35490 }, { "epoch": 0.72264631043257, "grad_norm": 5.701082931939347, "learning_rate": 9.939725517380674e-06, "loss": 0.3169, "step": 35500 }, { "epoch": 0.7228498727735369, "grad_norm": 6.734579866634173, "learning_rate": 9.939615468250201e-06, "loss": 0.2606, "step": 35510 }, { "epoch": 0.7230534351145038, "grad_norm": 9.62313209348508, "learning_rate": 9.939505319357832e-06, "loss": 0.2839, "step": 35520 }, { "epoch": 0.7232569974554708, "grad_norm": 8.509916295105993, "learning_rate": 9.93939507070579e-06, "loss": 0.3105, "step": 35530 }, { "epoch": 0.7234605597964376, "grad_norm": 8.212743769772747, "learning_rate": 9.939284722296305e-06, "loss": 0.418, "step": 35540 }, { "epoch": 0.7236641221374046, "grad_norm": 12.169369422014073, "learning_rate": 9.939174274131602e-06, "loss": 0.2985, "step": 35550 }, { "epoch": 0.7238676844783715, "grad_norm": 10.73675089299286, "learning_rate": 9.939063726213912e-06, "loss": 0.3574, "step": 35560 }, { "epoch": 0.7240712468193384, "grad_norm": 13.076771377500526, "learning_rate": 9.938953078545471e-06, "loss": 0.3397, "step": 35570 }, { "epoch": 0.7242748091603053, "grad_norm": 7.961187578540907, "learning_rate": 9.938842331128508e-06, "loss": 0.4295, "step": 35580 }, { "epoch": 0.7244783715012723, "grad_norm": 11.676675953209608, "learning_rate": 9.938731483965265e-06, "loss": 0.3343, "step": 35590 }, { "epoch": 0.7246819338422392, "grad_norm": 7.065184214049965, "learning_rate": 9.938620537057977e-06, "loss": 0.313, "step": 35600 }, { "epoch": 0.7248854961832061, "grad_norm": 9.249577595029919, "learning_rate": 9.938509490408887e-06, "loss": 0.3808, "step": 35610 }, { "epoch": 0.725089058524173, "grad_norm": 0.3235623222826286, "learning_rate": 9.938398344020239e-06, "loss": 0.2739, "step": 35620 }, { "epoch": 0.7252926208651399, "grad_norm": 8.323592380574526, "learning_rate": 9.938287097894275e-06, "loss": 0.3244, "step": 35630 }, { "epoch": 0.7254961832061069, "grad_norm": 23.118919352576196, "learning_rate": 9.938175752033241e-06, "loss": 0.3649, "step": 35640 }, { "epoch": 0.7256997455470738, "grad_norm": 7.250231509483235, "learning_rate": 9.938064306439387e-06, "loss": 0.3056, "step": 35650 }, { "epoch": 0.7259033078880407, "grad_norm": 7.707130342488144, "learning_rate": 9.937952761114967e-06, "loss": 0.3102, "step": 35660 }, { "epoch": 0.7261068702290077, "grad_norm": 4.458548040625752, "learning_rate": 9.93784111606223e-06, "loss": 0.3989, "step": 35670 }, { "epoch": 0.7263104325699745, "grad_norm": 12.288312961696583, "learning_rate": 9.93772937128343e-06, "loss": 0.3026, "step": 35680 }, { "epoch": 0.7265139949109415, "grad_norm": 11.705559139965299, "learning_rate": 9.937617526780828e-06, "loss": 0.369, "step": 35690 }, { "epoch": 0.7267175572519083, "grad_norm": 7.6484008552307206, "learning_rate": 9.937505582556679e-06, "loss": 0.3098, "step": 35700 }, { "epoch": 0.7269211195928753, "grad_norm": 7.1155208715290295, "learning_rate": 9.937393538613245e-06, "loss": 0.3435, "step": 35710 }, { "epoch": 0.7271246819338423, "grad_norm": 14.065484484420494, "learning_rate": 9.93728139495279e-06, "loss": 0.4504, "step": 35720 }, { "epoch": 0.7273282442748091, "grad_norm": 7.404424563991404, "learning_rate": 9.937169151577578e-06, "loss": 0.2513, "step": 35730 }, { "epoch": 0.7275318066157761, "grad_norm": 7.799001667066032, "learning_rate": 9.937056808489875e-06, "loss": 0.2713, "step": 35740 }, { "epoch": 0.727735368956743, "grad_norm": 2.894421856316931, "learning_rate": 9.936944365691953e-06, "loss": 0.2645, "step": 35750 }, { "epoch": 0.7279389312977099, "grad_norm": 10.729038027943025, "learning_rate": 9.936831823186079e-06, "loss": 0.3683, "step": 35760 }, { "epoch": 0.7281424936386769, "grad_norm": 5.419845467206008, "learning_rate": 9.936719180974528e-06, "loss": 0.2753, "step": 35770 }, { "epoch": 0.7283460559796437, "grad_norm": 11.637714479833221, "learning_rate": 9.936606439059574e-06, "loss": 0.2957, "step": 35780 }, { "epoch": 0.7285496183206107, "grad_norm": 7.854661856343174, "learning_rate": 9.936493597443498e-06, "loss": 0.3543, "step": 35790 }, { "epoch": 0.7287531806615776, "grad_norm": 5.759319232224979, "learning_rate": 9.936380656128572e-06, "loss": 0.2932, "step": 35800 }, { "epoch": 0.7289567430025445, "grad_norm": 7.89209530877027, "learning_rate": 9.936267615117081e-06, "loss": 0.3033, "step": 35810 }, { "epoch": 0.7291603053435115, "grad_norm": 7.924473163373028, "learning_rate": 9.93615447441131e-06, "loss": 0.3199, "step": 35820 }, { "epoch": 0.7293638676844784, "grad_norm": 10.065640549859562, "learning_rate": 9.93604123401354e-06, "loss": 0.2566, "step": 35830 }, { "epoch": 0.7295674300254453, "grad_norm": 8.090130827252748, "learning_rate": 9.93592789392606e-06, "loss": 0.3448, "step": 35840 }, { "epoch": 0.7297709923664122, "grad_norm": 7.063256755143038, "learning_rate": 9.935814454151159e-06, "loss": 0.2549, "step": 35850 }, { "epoch": 0.7299745547073792, "grad_norm": 16.595613760644497, "learning_rate": 9.935700914691127e-06, "loss": 0.2691, "step": 35860 }, { "epoch": 0.730178117048346, "grad_norm": 8.793851727459892, "learning_rate": 9.935587275548257e-06, "loss": 0.4536, "step": 35870 }, { "epoch": 0.730381679389313, "grad_norm": 4.766769297584607, "learning_rate": 9.935473536724847e-06, "loss": 0.2811, "step": 35880 }, { "epoch": 0.7305852417302799, "grad_norm": 12.100323073614561, "learning_rate": 9.935359698223191e-06, "loss": 0.3639, "step": 35890 }, { "epoch": 0.7307888040712468, "grad_norm": 5.062309894762423, "learning_rate": 9.93524576004559e-06, "loss": 0.2762, "step": 35900 }, { "epoch": 0.7309923664122138, "grad_norm": 7.091806760096546, "learning_rate": 9.935131722194344e-06, "loss": 0.2737, "step": 35910 }, { "epoch": 0.7311959287531806, "grad_norm": 7.946187695723094, "learning_rate": 9.935017584671756e-06, "loss": 0.2652, "step": 35920 }, { "epoch": 0.7313994910941476, "grad_norm": 5.349574866176582, "learning_rate": 9.934903347480132e-06, "loss": 0.3145, "step": 35930 }, { "epoch": 0.7316030534351146, "grad_norm": 15.847198118844641, "learning_rate": 9.93478901062178e-06, "loss": 0.335, "step": 35940 }, { "epoch": 0.7318066157760814, "grad_norm": 6.917322987114926, "learning_rate": 9.934674574099007e-06, "loss": 0.315, "step": 35950 }, { "epoch": 0.7320101781170484, "grad_norm": 5.090541259703083, "learning_rate": 9.934560037914123e-06, "loss": 0.2236, "step": 35960 }, { "epoch": 0.7322137404580152, "grad_norm": 16.131106593167175, "learning_rate": 9.934445402069446e-06, "loss": 0.2971, "step": 35970 }, { "epoch": 0.7324173027989822, "grad_norm": 7.029427217388481, "learning_rate": 9.934330666567288e-06, "loss": 0.3359, "step": 35980 }, { "epoch": 0.7326208651399491, "grad_norm": 11.950562574445236, "learning_rate": 9.934215831409967e-06, "loss": 0.363, "step": 35990 }, { "epoch": 0.732824427480916, "grad_norm": 8.890464180873888, "learning_rate": 9.934100896599801e-06, "loss": 0.3024, "step": 36000 }, { "epoch": 0.733027989821883, "grad_norm": 3.7039898710558017, "learning_rate": 9.933985862139113e-06, "loss": 0.3409, "step": 36010 }, { "epoch": 0.7332315521628499, "grad_norm": 7.764146177910539, "learning_rate": 9.933870728030227e-06, "loss": 0.2842, "step": 36020 }, { "epoch": 0.7334351145038168, "grad_norm": 13.303985265979106, "learning_rate": 9.933755494275465e-06, "loss": 0.2858, "step": 36030 }, { "epoch": 0.7336386768447837, "grad_norm": 13.538503502205206, "learning_rate": 9.933640160877156e-06, "loss": 0.3306, "step": 36040 }, { "epoch": 0.7338422391857506, "grad_norm": 16.613955232798588, "learning_rate": 9.933524727837632e-06, "loss": 0.3257, "step": 36050 }, { "epoch": 0.7340458015267176, "grad_norm": 18.136004166267977, "learning_rate": 9.933409195159219e-06, "loss": 0.339, "step": 36060 }, { "epoch": 0.7342493638676845, "grad_norm": 10.743571208827598, "learning_rate": 9.933293562844253e-06, "loss": 0.263, "step": 36070 }, { "epoch": 0.7344529262086514, "grad_norm": 18.915442061168072, "learning_rate": 9.933177830895071e-06, "loss": 0.3518, "step": 36080 }, { "epoch": 0.7346564885496183, "grad_norm": 14.155873160419857, "learning_rate": 9.93306199931401e-06, "loss": 0.3397, "step": 36090 }, { "epoch": 0.7348600508905853, "grad_norm": 7.881797826508009, "learning_rate": 9.932946068103406e-06, "loss": 0.2522, "step": 36100 }, { "epoch": 0.7350636132315521, "grad_norm": 4.816126507051365, "learning_rate": 9.932830037265602e-06, "loss": 0.3456, "step": 36110 }, { "epoch": 0.7352671755725191, "grad_norm": 3.9405113618452177, "learning_rate": 9.932713906802942e-06, "loss": 0.4594, "step": 36120 }, { "epoch": 0.735470737913486, "grad_norm": 4.033610007874875, "learning_rate": 9.932597676717773e-06, "loss": 0.2881, "step": 36130 }, { "epoch": 0.7356743002544529, "grad_norm": 10.298783775089998, "learning_rate": 9.93248134701244e-06, "loss": 0.3157, "step": 36140 }, { "epoch": 0.7358778625954199, "grad_norm": 6.692661855789226, "learning_rate": 9.932364917689295e-06, "loss": 0.3821, "step": 36150 }, { "epoch": 0.7360814249363867, "grad_norm": 7.2919808581902945, "learning_rate": 9.932248388750685e-06, "loss": 0.3148, "step": 36160 }, { "epoch": 0.7362849872773537, "grad_norm": 11.12480507078115, "learning_rate": 9.932131760198967e-06, "loss": 0.3514, "step": 36170 }, { "epoch": 0.7364885496183207, "grad_norm": 11.383682923256703, "learning_rate": 9.932015032036496e-06, "loss": 0.3187, "step": 36180 }, { "epoch": 0.7366921119592875, "grad_norm": 10.347553278987581, "learning_rate": 9.931898204265629e-06, "loss": 0.2865, "step": 36190 }, { "epoch": 0.7368956743002545, "grad_norm": 9.068114157123059, "learning_rate": 9.931781276888726e-06, "loss": 0.3065, "step": 36200 }, { "epoch": 0.7370992366412213, "grad_norm": 7.104899874000364, "learning_rate": 9.931664249908147e-06, "loss": 0.2491, "step": 36210 }, { "epoch": 0.7373027989821883, "grad_norm": 3.8015288004676253, "learning_rate": 9.931547123326257e-06, "loss": 0.3418, "step": 36220 }, { "epoch": 0.7375063613231552, "grad_norm": 5.012577035144505, "learning_rate": 9.931429897145421e-06, "loss": 0.2607, "step": 36230 }, { "epoch": 0.7377099236641221, "grad_norm": 33.67901126654367, "learning_rate": 9.931312571368006e-06, "loss": 0.3158, "step": 36240 }, { "epoch": 0.7379134860050891, "grad_norm": 20.097679796242115, "learning_rate": 9.931195145996383e-06, "loss": 0.2956, "step": 36250 }, { "epoch": 0.738117048346056, "grad_norm": 9.616676891769119, "learning_rate": 9.931077621032923e-06, "loss": 0.2461, "step": 36260 }, { "epoch": 0.7383206106870229, "grad_norm": 6.976348723226902, "learning_rate": 9.930959996479998e-06, "loss": 0.2941, "step": 36270 }, { "epoch": 0.7385241730279898, "grad_norm": 6.735924809310204, "learning_rate": 9.930842272339985e-06, "loss": 0.3159, "step": 36280 }, { "epoch": 0.7387277353689568, "grad_norm": 13.405420354450328, "learning_rate": 9.930724448615261e-06, "loss": 0.2671, "step": 36290 }, { "epoch": 0.7389312977099237, "grad_norm": 6.955152336977707, "learning_rate": 9.930606525308208e-06, "loss": 0.2773, "step": 36300 }, { "epoch": 0.7391348600508906, "grad_norm": 10.064688863595896, "learning_rate": 9.930488502421206e-06, "loss": 0.3095, "step": 36310 }, { "epoch": 0.7393384223918575, "grad_norm": 5.88703741749042, "learning_rate": 9.930370379956636e-06, "loss": 0.245, "step": 36320 }, { "epoch": 0.7395419847328244, "grad_norm": 6.814479314620109, "learning_rate": 9.930252157916886e-06, "loss": 0.3246, "step": 36330 }, { "epoch": 0.7397455470737914, "grad_norm": 8.920747531036222, "learning_rate": 9.930133836304345e-06, "loss": 0.38, "step": 36340 }, { "epoch": 0.7399491094147582, "grad_norm": 5.110089342920217, "learning_rate": 9.9300154151214e-06, "loss": 0.3185, "step": 36350 }, { "epoch": 0.7401526717557252, "grad_norm": 10.938829894145615, "learning_rate": 9.929896894370444e-06, "loss": 0.3627, "step": 36360 }, { "epoch": 0.7403562340966922, "grad_norm": 4.413032775647485, "learning_rate": 9.929778274053872e-06, "loss": 0.3126, "step": 36370 }, { "epoch": 0.740559796437659, "grad_norm": 6.290723490287815, "learning_rate": 9.929659554174076e-06, "loss": 0.2763, "step": 36380 }, { "epoch": 0.740763358778626, "grad_norm": 3.4128787225408694, "learning_rate": 9.929540734733458e-06, "loss": 0.323, "step": 36390 }, { "epoch": 0.7409669211195928, "grad_norm": 0.34242899947988326, "learning_rate": 9.929421815734415e-06, "loss": 0.2988, "step": 36400 }, { "epoch": 0.7411704834605598, "grad_norm": 6.291238374984629, "learning_rate": 9.929302797179348e-06, "loss": 0.2814, "step": 36410 }, { "epoch": 0.7413740458015268, "grad_norm": 10.89770177665639, "learning_rate": 9.929183679070664e-06, "loss": 0.2816, "step": 36420 }, { "epoch": 0.7415776081424936, "grad_norm": 13.96224081379309, "learning_rate": 9.929064461410767e-06, "loss": 0.2828, "step": 36430 }, { "epoch": 0.7417811704834606, "grad_norm": 7.372366598257311, "learning_rate": 9.928945144202064e-06, "loss": 0.3265, "step": 36440 }, { "epoch": 0.7419847328244275, "grad_norm": 8.793285733061865, "learning_rate": 9.928825727446964e-06, "loss": 0.2741, "step": 36450 }, { "epoch": 0.7421882951653944, "grad_norm": 20.88749565963586, "learning_rate": 9.928706211147882e-06, "loss": 0.384, "step": 36460 }, { "epoch": 0.7423918575063613, "grad_norm": 7.02089859513358, "learning_rate": 9.928586595307229e-06, "loss": 0.2941, "step": 36470 }, { "epoch": 0.7425954198473282, "grad_norm": 4.573118649169563, "learning_rate": 9.928466879927421e-06, "loss": 0.1928, "step": 36480 }, { "epoch": 0.7427989821882952, "grad_norm": 18.96362481806737, "learning_rate": 9.92834706501088e-06, "loss": 0.3015, "step": 36490 }, { "epoch": 0.7430025445292621, "grad_norm": 14.768878148922447, "learning_rate": 9.92822715056002e-06, "loss": 0.3454, "step": 36500 }, { "epoch": 0.743206106870229, "grad_norm": 5.4975943263493985, "learning_rate": 9.928107136577263e-06, "loss": 0.3655, "step": 36510 }, { "epoch": 0.7434096692111959, "grad_norm": 22.987788455641354, "learning_rate": 9.927987023065037e-06, "loss": 0.3413, "step": 36520 }, { "epoch": 0.7436132315521629, "grad_norm": 9.84783365256503, "learning_rate": 9.927866810025767e-06, "loss": 0.3932, "step": 36530 }, { "epoch": 0.7438167938931298, "grad_norm": 0.1275887564008058, "learning_rate": 9.927746497461878e-06, "loss": 0.2585, "step": 36540 }, { "epoch": 0.7440203562340967, "grad_norm": 8.719333515873158, "learning_rate": 9.927626085375804e-06, "loss": 0.3428, "step": 36550 }, { "epoch": 0.7442239185750636, "grad_norm": 5.1249320242128205, "learning_rate": 9.92750557376997e-06, "loss": 0.3876, "step": 36560 }, { "epoch": 0.7444274809160305, "grad_norm": 7.8290022970502795, "learning_rate": 9.92738496264682e-06, "loss": 0.3338, "step": 36570 }, { "epoch": 0.7446310432569975, "grad_norm": 11.587555082747299, "learning_rate": 9.927264252008779e-06, "loss": 0.3568, "step": 36580 }, { "epoch": 0.7448346055979643, "grad_norm": 3.9243511143914356, "learning_rate": 9.927143441858294e-06, "loss": 0.2739, "step": 36590 }, { "epoch": 0.7450381679389313, "grad_norm": 6.704510852967905, "learning_rate": 9.927022532197799e-06, "loss": 0.3081, "step": 36600 }, { "epoch": 0.7452417302798983, "grad_norm": 9.973760780271613, "learning_rate": 9.926901523029739e-06, "loss": 0.2592, "step": 36610 }, { "epoch": 0.7454452926208651, "grad_norm": 8.836061287939765, "learning_rate": 9.926780414356556e-06, "loss": 0.3032, "step": 36620 }, { "epoch": 0.7456488549618321, "grad_norm": 9.765786102875158, "learning_rate": 9.926659206180698e-06, "loss": 0.3198, "step": 36630 }, { "epoch": 0.7458524173027989, "grad_norm": 10.208831534319039, "learning_rate": 9.926537898504612e-06, "loss": 0.2613, "step": 36640 }, { "epoch": 0.7460559796437659, "grad_norm": 15.098500199469779, "learning_rate": 9.926416491330745e-06, "loss": 0.2793, "step": 36650 }, { "epoch": 0.7462595419847329, "grad_norm": 31.36914712478676, "learning_rate": 9.926294984661554e-06, "loss": 0.2786, "step": 36660 }, { "epoch": 0.7464631043256997, "grad_norm": 11.189830412934334, "learning_rate": 9.926173378499492e-06, "loss": 0.2839, "step": 36670 }, { "epoch": 0.7466666666666667, "grad_norm": 4.524973764026456, "learning_rate": 9.926051672847012e-06, "loss": 0.4357, "step": 36680 }, { "epoch": 0.7468702290076336, "grad_norm": 8.47641410298455, "learning_rate": 9.925929867706574e-06, "loss": 0.3361, "step": 36690 }, { "epoch": 0.7470737913486005, "grad_norm": 13.145793810133254, "learning_rate": 9.925807963080638e-06, "loss": 0.2673, "step": 36700 }, { "epoch": 0.7472773536895674, "grad_norm": 6.792980533209154, "learning_rate": 9.925685958971666e-06, "loss": 0.3632, "step": 36710 }, { "epoch": 0.7474809160305343, "grad_norm": 11.301728300856357, "learning_rate": 9.925563855382121e-06, "loss": 0.2632, "step": 36720 }, { "epoch": 0.7476844783715013, "grad_norm": 11.5877575348288, "learning_rate": 9.925441652314471e-06, "loss": 0.3512, "step": 36730 }, { "epoch": 0.7478880407124682, "grad_norm": 4.867107426775528, "learning_rate": 9.925319349771183e-06, "loss": 0.3185, "step": 36740 }, { "epoch": 0.7480916030534351, "grad_norm": 18.804415683593398, "learning_rate": 9.925196947754724e-06, "loss": 0.2713, "step": 36750 }, { "epoch": 0.748295165394402, "grad_norm": 9.961031244341186, "learning_rate": 9.925074446267573e-06, "loss": 0.3324, "step": 36760 }, { "epoch": 0.748498727735369, "grad_norm": 2.712759803840097, "learning_rate": 9.924951845312198e-06, "loss": 0.2365, "step": 36770 }, { "epoch": 0.7487022900763359, "grad_norm": 3.8820391652943824, "learning_rate": 9.924829144891079e-06, "loss": 0.3119, "step": 36780 }, { "epoch": 0.7489058524173028, "grad_norm": 3.9205770364841124, "learning_rate": 9.924706345006689e-06, "loss": 0.3673, "step": 36790 }, { "epoch": 0.7491094147582698, "grad_norm": 4.588566041964266, "learning_rate": 9.924583445661515e-06, "loss": 0.2738, "step": 36800 }, { "epoch": 0.7493129770992366, "grad_norm": 11.079372046536808, "learning_rate": 9.924460446858034e-06, "loss": 0.3886, "step": 36810 }, { "epoch": 0.7495165394402036, "grad_norm": 6.076403148430672, "learning_rate": 9.92433734859873e-06, "loss": 0.2742, "step": 36820 }, { "epoch": 0.7497201017811704, "grad_norm": 6.798486587666354, "learning_rate": 9.924214150886093e-06, "loss": 0.3604, "step": 36830 }, { "epoch": 0.7499236641221374, "grad_norm": 7.821763092537897, "learning_rate": 9.924090853722608e-06, "loss": 0.2619, "step": 36840 }, { "epoch": 0.7501272264631044, "grad_norm": 13.36061471947159, "learning_rate": 9.923967457110766e-06, "loss": 0.3031, "step": 36850 }, { "epoch": 0.7503307888040712, "grad_norm": 7.088712617773825, "learning_rate": 9.92384396105306e-06, "loss": 0.2576, "step": 36860 }, { "epoch": 0.7505343511450382, "grad_norm": 9.628606354351366, "learning_rate": 9.92372036555198e-06, "loss": 0.2896, "step": 36870 }, { "epoch": 0.750737913486005, "grad_norm": 7.940465905729374, "learning_rate": 9.92359667061003e-06, "loss": 0.3288, "step": 36880 }, { "epoch": 0.750941475826972, "grad_norm": 7.810803528990647, "learning_rate": 9.923472876229701e-06, "loss": 0.3315, "step": 36890 }, { "epoch": 0.751145038167939, "grad_norm": 7.123063293158341, "learning_rate": 9.923348982413496e-06, "loss": 0.3061, "step": 36900 }, { "epoch": 0.7513486005089058, "grad_norm": 12.512165857755024, "learning_rate": 9.92322498916392e-06, "loss": 0.3038, "step": 36910 }, { "epoch": 0.7515521628498728, "grad_norm": 13.460187162275552, "learning_rate": 9.923100896483471e-06, "loss": 0.3537, "step": 36920 }, { "epoch": 0.7517557251908397, "grad_norm": 6.7410590544024425, "learning_rate": 9.92297670437466e-06, "loss": 0.3902, "step": 36930 }, { "epoch": 0.7519592875318066, "grad_norm": 8.078214113053264, "learning_rate": 9.922852412839992e-06, "loss": 0.255, "step": 36940 }, { "epoch": 0.7521628498727735, "grad_norm": 4.424684256674827, "learning_rate": 9.92272802188198e-06, "loss": 0.2542, "step": 36950 }, { "epoch": 0.7523664122137405, "grad_norm": 11.455997701926504, "learning_rate": 9.922603531503137e-06, "loss": 0.3358, "step": 36960 }, { "epoch": 0.7525699745547074, "grad_norm": 14.31239033166801, "learning_rate": 9.922478941705972e-06, "loss": 0.2545, "step": 36970 }, { "epoch": 0.7527735368956743, "grad_norm": 11.452609854522473, "learning_rate": 9.922354252493008e-06, "loss": 0.3607, "step": 36980 }, { "epoch": 0.7529770992366412, "grad_norm": 14.924442560835493, "learning_rate": 9.922229463866757e-06, "loss": 0.3452, "step": 36990 }, { "epoch": 0.7531806615776081, "grad_norm": 22.213524152884137, "learning_rate": 9.922104575829745e-06, "loss": 0.2895, "step": 37000 }, { "epoch": 0.7533842239185751, "grad_norm": 7.292855143601442, "learning_rate": 9.92197958838449e-06, "loss": 0.3884, "step": 37010 }, { "epoch": 0.753587786259542, "grad_norm": 115.25979768978411, "learning_rate": 9.921854501533517e-06, "loss": 0.3412, "step": 37020 }, { "epoch": 0.7537913486005089, "grad_norm": 6.810216000683764, "learning_rate": 9.921729315279354e-06, "loss": 0.2702, "step": 37030 }, { "epoch": 0.7539949109414759, "grad_norm": 9.751291071429105, "learning_rate": 9.921604029624528e-06, "loss": 0.2934, "step": 37040 }, { "epoch": 0.7541984732824427, "grad_norm": 11.919414422305074, "learning_rate": 9.92147864457157e-06, "loss": 0.2504, "step": 37050 }, { "epoch": 0.7544020356234097, "grad_norm": 9.127263548046178, "learning_rate": 9.921353160123011e-06, "loss": 0.287, "step": 37060 }, { "epoch": 0.7546055979643765, "grad_norm": 7.052478983954081, "learning_rate": 9.921227576281388e-06, "loss": 0.3544, "step": 37070 }, { "epoch": 0.7548091603053435, "grad_norm": 12.617426557064837, "learning_rate": 9.921101893049234e-06, "loss": 0.3632, "step": 37080 }, { "epoch": 0.7550127226463105, "grad_norm": 9.120186197540498, "learning_rate": 9.92097611042909e-06, "loss": 0.3475, "step": 37090 }, { "epoch": 0.7552162849872773, "grad_norm": 9.64859978118463, "learning_rate": 9.920850228423495e-06, "loss": 0.3178, "step": 37100 }, { "epoch": 0.7554198473282443, "grad_norm": 9.169536300676805, "learning_rate": 9.920724247034991e-06, "loss": 0.3186, "step": 37110 }, { "epoch": 0.7556234096692112, "grad_norm": 4.577363088628529, "learning_rate": 9.920598166266124e-06, "loss": 0.3445, "step": 37120 }, { "epoch": 0.7558269720101781, "grad_norm": 12.303277424447405, "learning_rate": 9.920471986119438e-06, "loss": 0.3534, "step": 37130 }, { "epoch": 0.7560305343511451, "grad_norm": 6.552022384135955, "learning_rate": 9.920345706597484e-06, "loss": 0.2733, "step": 37140 }, { "epoch": 0.756234096692112, "grad_norm": 7.314061583750825, "learning_rate": 9.920219327702811e-06, "loss": 0.3444, "step": 37150 }, { "epoch": 0.7564376590330789, "grad_norm": 8.284106691707464, "learning_rate": 9.92009284943797e-06, "loss": 0.3252, "step": 37160 }, { "epoch": 0.7566412213740458, "grad_norm": 36.296819632516254, "learning_rate": 9.919966271805519e-06, "loss": 0.2721, "step": 37170 }, { "epoch": 0.7568447837150127, "grad_norm": 5.690210203183673, "learning_rate": 9.919839594808012e-06, "loss": 0.3568, "step": 37180 }, { "epoch": 0.7570483460559796, "grad_norm": 9.10218687138183, "learning_rate": 9.919712818448007e-06, "loss": 0.3373, "step": 37190 }, { "epoch": 0.7572519083969466, "grad_norm": 8.616816748713633, "learning_rate": 9.919585942728065e-06, "loss": 0.2446, "step": 37200 }, { "epoch": 0.7574554707379135, "grad_norm": 12.752600127158255, "learning_rate": 9.919458967650748e-06, "loss": 0.3192, "step": 37210 }, { "epoch": 0.7576590330788804, "grad_norm": 6.223054056041009, "learning_rate": 9.919331893218621e-06, "loss": 0.3203, "step": 37220 }, { "epoch": 0.7578625954198474, "grad_norm": 9.81859954041872, "learning_rate": 9.91920471943425e-06, "loss": 0.3517, "step": 37230 }, { "epoch": 0.7580661577608142, "grad_norm": 7.413320721073621, "learning_rate": 9.919077446300205e-06, "loss": 0.3644, "step": 37240 }, { "epoch": 0.7582697201017812, "grad_norm": 10.153575992697094, "learning_rate": 9.918950073819056e-06, "loss": 0.2995, "step": 37250 }, { "epoch": 0.7584732824427481, "grad_norm": 4.564999538577245, "learning_rate": 9.918822601993372e-06, "loss": 0.3293, "step": 37260 }, { "epoch": 0.758676844783715, "grad_norm": 31.24135189574934, "learning_rate": 9.918695030825733e-06, "loss": 0.409, "step": 37270 }, { "epoch": 0.758880407124682, "grad_norm": 6.008023870327404, "learning_rate": 9.918567360318712e-06, "loss": 0.3022, "step": 37280 }, { "epoch": 0.7590839694656488, "grad_norm": 8.538691054949663, "learning_rate": 9.918439590474887e-06, "loss": 0.3956, "step": 37290 }, { "epoch": 0.7592875318066158, "grad_norm": 6.751636439612432, "learning_rate": 9.91831172129684e-06, "loss": 0.3563, "step": 37300 }, { "epoch": 0.7594910941475826, "grad_norm": 6.021562839539698, "learning_rate": 9.918183752787152e-06, "loss": 0.2693, "step": 37310 }, { "epoch": 0.7596946564885496, "grad_norm": 5.529577418215079, "learning_rate": 9.91805568494841e-06, "loss": 0.3255, "step": 37320 }, { "epoch": 0.7598982188295166, "grad_norm": 8.73673302102513, "learning_rate": 9.9179275177832e-06, "loss": 0.2477, "step": 37330 }, { "epoch": 0.7601017811704834, "grad_norm": 19.14058063136255, "learning_rate": 9.917799251294107e-06, "loss": 0.3523, "step": 37340 }, { "epoch": 0.7603053435114504, "grad_norm": 4.483984912007952, "learning_rate": 9.917670885483726e-06, "loss": 0.2466, "step": 37350 }, { "epoch": 0.7605089058524173, "grad_norm": 13.304779396677212, "learning_rate": 9.917542420354645e-06, "loss": 0.2889, "step": 37360 }, { "epoch": 0.7607124681933842, "grad_norm": 11.433266839799142, "learning_rate": 9.917413855909463e-06, "loss": 0.3226, "step": 37370 }, { "epoch": 0.7609160305343512, "grad_norm": 11.03220548335237, "learning_rate": 9.917285192150776e-06, "loss": 0.3414, "step": 37380 }, { "epoch": 0.761119592875318, "grad_norm": 7.183376815872533, "learning_rate": 9.917156429081179e-06, "loss": 0.3501, "step": 37390 }, { "epoch": 0.761323155216285, "grad_norm": 7.045863373754397, "learning_rate": 9.917027566703274e-06, "loss": 0.3061, "step": 37400 }, { "epoch": 0.7615267175572519, "grad_norm": 6.253934098775225, "learning_rate": 9.916898605019667e-06, "loss": 0.2992, "step": 37410 }, { "epoch": 0.7617302798982188, "grad_norm": 13.604363037709305, "learning_rate": 9.916769544032958e-06, "loss": 0.3191, "step": 37420 }, { "epoch": 0.7619338422391857, "grad_norm": 3.7757183080726255, "learning_rate": 9.916640383745755e-06, "loss": 0.1876, "step": 37430 }, { "epoch": 0.7621374045801527, "grad_norm": 4.226075051461771, "learning_rate": 9.916511124160667e-06, "loss": 0.3207, "step": 37440 }, { "epoch": 0.7623409669211196, "grad_norm": 11.212865904340779, "learning_rate": 9.916381765280306e-06, "loss": 0.294, "step": 37450 }, { "epoch": 0.7625445292620865, "grad_norm": 12.725246084312534, "learning_rate": 9.916252307107281e-06, "loss": 0.3293, "step": 37460 }, { "epoch": 0.7627480916030535, "grad_norm": 9.61525268088379, "learning_rate": 9.91612274964421e-06, "loss": 0.3287, "step": 37470 }, { "epoch": 0.7629516539440203, "grad_norm": 3.1689692083991052, "learning_rate": 9.915993092893706e-06, "loss": 0.3066, "step": 37480 }, { "epoch": 0.7631552162849873, "grad_norm": 8.172456012360552, "learning_rate": 9.915863336858391e-06, "loss": 0.3109, "step": 37490 }, { "epoch": 0.7633587786259542, "grad_norm": 10.961016691164362, "learning_rate": 9.915733481540886e-06, "loss": 0.3199, "step": 37500 }, { "epoch": 0.7635623409669211, "grad_norm": 12.347522749863936, "learning_rate": 9.91560352694381e-06, "loss": 0.3573, "step": 37510 }, { "epoch": 0.7637659033078881, "grad_norm": 12.108763872958018, "learning_rate": 9.91547347306979e-06, "loss": 0.2914, "step": 37520 }, { "epoch": 0.7639694656488549, "grad_norm": 7.209775317451046, "learning_rate": 9.915343319921451e-06, "loss": 0.2937, "step": 37530 }, { "epoch": 0.7641730279898219, "grad_norm": 9.332929099261424, "learning_rate": 9.915213067501426e-06, "loss": 0.2703, "step": 37540 }, { "epoch": 0.7643765903307888, "grad_norm": 4.506567896405693, "learning_rate": 9.915082715812338e-06, "loss": 0.2792, "step": 37550 }, { "epoch": 0.7645801526717557, "grad_norm": 9.994195130583226, "learning_rate": 9.914952264856828e-06, "loss": 0.4491, "step": 37560 }, { "epoch": 0.7647837150127227, "grad_norm": 9.018584793477077, "learning_rate": 9.914821714637523e-06, "loss": 0.2546, "step": 37570 }, { "epoch": 0.7649872773536895, "grad_norm": 1.8218495348026786, "learning_rate": 9.914691065157066e-06, "loss": 0.283, "step": 37580 }, { "epoch": 0.7651908396946565, "grad_norm": 8.446524434315535, "learning_rate": 9.914560316418092e-06, "loss": 0.2657, "step": 37590 }, { "epoch": 0.7653944020356234, "grad_norm": 8.390149390664094, "learning_rate": 9.914429468423244e-06, "loss": 0.2967, "step": 37600 }, { "epoch": 0.7655979643765903, "grad_norm": 6.402891467129521, "learning_rate": 9.914298521175161e-06, "loss": 0.3169, "step": 37610 }, { "epoch": 0.7658015267175573, "grad_norm": 9.857489813606206, "learning_rate": 9.91416747467649e-06, "loss": 0.2948, "step": 37620 }, { "epoch": 0.7660050890585242, "grad_norm": 5.676098265228622, "learning_rate": 9.914036328929877e-06, "loss": 0.2752, "step": 37630 }, { "epoch": 0.7662086513994911, "grad_norm": 8.098924659335792, "learning_rate": 9.913905083937972e-06, "loss": 0.25, "step": 37640 }, { "epoch": 0.766412213740458, "grad_norm": 17.08277596048285, "learning_rate": 9.913773739703425e-06, "loss": 0.3159, "step": 37650 }, { "epoch": 0.766615776081425, "grad_norm": 7.111563310702554, "learning_rate": 9.913642296228888e-06, "loss": 0.2686, "step": 37660 }, { "epoch": 0.7668193384223918, "grad_norm": 5.257683418833783, "learning_rate": 9.913510753517017e-06, "loss": 0.3136, "step": 37670 }, { "epoch": 0.7670229007633588, "grad_norm": 9.975633489432127, "learning_rate": 9.913379111570466e-06, "loss": 0.2786, "step": 37680 }, { "epoch": 0.7672264631043257, "grad_norm": 10.040508741783137, "learning_rate": 9.913247370391896e-06, "loss": 0.3434, "step": 37690 }, { "epoch": 0.7674300254452926, "grad_norm": 6.803640603808881, "learning_rate": 9.913115529983968e-06, "loss": 0.2909, "step": 37700 }, { "epoch": 0.7676335877862596, "grad_norm": 8.15229530229086, "learning_rate": 9.912983590349343e-06, "loss": 0.2432, "step": 37710 }, { "epoch": 0.7678371501272264, "grad_norm": 7.3032623153932406, "learning_rate": 9.912851551490688e-06, "loss": 0.2775, "step": 37720 }, { "epoch": 0.7680407124681934, "grad_norm": 11.510765903513485, "learning_rate": 9.912719413410667e-06, "loss": 0.2943, "step": 37730 }, { "epoch": 0.7682442748091604, "grad_norm": 6.9232819905926775, "learning_rate": 9.91258717611195e-06, "loss": 0.3951, "step": 37740 }, { "epoch": 0.7684478371501272, "grad_norm": 9.998525903758125, "learning_rate": 9.912454839597207e-06, "loss": 0.2696, "step": 37750 }, { "epoch": 0.7686513994910942, "grad_norm": 13.711360394030033, "learning_rate": 9.912322403869113e-06, "loss": 0.3125, "step": 37760 }, { "epoch": 0.768854961832061, "grad_norm": 9.920218058267551, "learning_rate": 9.91218986893034e-06, "loss": 0.3136, "step": 37770 }, { "epoch": 0.769058524173028, "grad_norm": 9.715902839718927, "learning_rate": 9.912057234783566e-06, "loss": 0.2269, "step": 37780 }, { "epoch": 0.7692620865139949, "grad_norm": 2.984334177179209, "learning_rate": 9.91192450143147e-06, "loss": 0.3322, "step": 37790 }, { "epoch": 0.7694656488549618, "grad_norm": 10.48436559339725, "learning_rate": 9.91179166887673e-06, "loss": 0.2394, "step": 37800 }, { "epoch": 0.7696692111959288, "grad_norm": 3.1814514050380476, "learning_rate": 9.911658737122032e-06, "loss": 0.4082, "step": 37810 }, { "epoch": 0.7698727735368956, "grad_norm": 7.9062827041158945, "learning_rate": 9.91152570617006e-06, "loss": 0.4022, "step": 37820 }, { "epoch": 0.7700763358778626, "grad_norm": 6.654826792324845, "learning_rate": 9.911392576023502e-06, "loss": 0.3406, "step": 37830 }, { "epoch": 0.7702798982188295, "grad_norm": 8.26787946566421, "learning_rate": 9.911259346685043e-06, "loss": 0.2668, "step": 37840 }, { "epoch": 0.7704834605597964, "grad_norm": 9.640887310102586, "learning_rate": 9.911126018157376e-06, "loss": 0.3868, "step": 37850 }, { "epoch": 0.7706870229007634, "grad_norm": 6.584490621086436, "learning_rate": 9.910992590443193e-06, "loss": 0.3205, "step": 37860 }, { "epoch": 0.7708905852417303, "grad_norm": 10.120840749025946, "learning_rate": 9.91085906354519e-06, "loss": 0.299, "step": 37870 }, { "epoch": 0.7710941475826972, "grad_norm": 7.0049206664196895, "learning_rate": 9.910725437466063e-06, "loss": 0.2815, "step": 37880 }, { "epoch": 0.7712977099236641, "grad_norm": 5.272173362762478, "learning_rate": 9.91059171220851e-06, "loss": 0.3239, "step": 37890 }, { "epoch": 0.771501272264631, "grad_norm": 6.6856087147372, "learning_rate": 9.910457887775236e-06, "loss": 0.3002, "step": 37900 }, { "epoch": 0.7717048346055979, "grad_norm": 10.56695226440265, "learning_rate": 9.910323964168937e-06, "loss": 0.3841, "step": 37910 }, { "epoch": 0.7719083969465649, "grad_norm": 7.127663828212846, "learning_rate": 9.910189941392323e-06, "loss": 0.3006, "step": 37920 }, { "epoch": 0.7721119592875318, "grad_norm": 13.770271928037484, "learning_rate": 9.910055819448097e-06, "loss": 0.3308, "step": 37930 }, { "epoch": 0.7723155216284987, "grad_norm": 13.769583799422808, "learning_rate": 9.90992159833897e-06, "loss": 0.3811, "step": 37940 }, { "epoch": 0.7725190839694657, "grad_norm": 14.976235070210457, "learning_rate": 9.909787278067653e-06, "loss": 0.335, "step": 37950 }, { "epoch": 0.7727226463104325, "grad_norm": 11.951910597225401, "learning_rate": 9.90965285863686e-06, "loss": 0.4191, "step": 37960 }, { "epoch": 0.7729262086513995, "grad_norm": 10.807627938068995, "learning_rate": 9.9095183400493e-06, "loss": 0.3971, "step": 37970 }, { "epoch": 0.7731297709923665, "grad_norm": 9.439783002225363, "learning_rate": 9.909383722307697e-06, "loss": 0.2519, "step": 37980 }, { "epoch": 0.7733333333333333, "grad_norm": 8.93816536751371, "learning_rate": 9.909249005414765e-06, "loss": 0.3019, "step": 37990 }, { "epoch": 0.7735368956743003, "grad_norm": 13.646649405166484, "learning_rate": 9.909114189373226e-06, "loss": 0.3643, "step": 38000 }, { "epoch": 0.7737404580152671, "grad_norm": 6.9611322386833745, "learning_rate": 9.908979274185804e-06, "loss": 0.3306, "step": 38010 }, { "epoch": 0.7739440203562341, "grad_norm": 2.6775486638967276, "learning_rate": 9.908844259855222e-06, "loss": 0.2755, "step": 38020 }, { "epoch": 0.774147582697201, "grad_norm": 7.402666546848631, "learning_rate": 9.908709146384207e-06, "loss": 0.2694, "step": 38030 }, { "epoch": 0.7743511450381679, "grad_norm": 6.452750687340792, "learning_rate": 9.90857393377549e-06, "loss": 0.2558, "step": 38040 }, { "epoch": 0.7745547073791349, "grad_norm": 19.3651311189519, "learning_rate": 9.9084386220318e-06, "loss": 0.2093, "step": 38050 }, { "epoch": 0.7747582697201018, "grad_norm": 1.297851002895949, "learning_rate": 9.908303211155869e-06, "loss": 0.2823, "step": 38060 }, { "epoch": 0.7749618320610687, "grad_norm": 11.286146655525114, "learning_rate": 9.908167701150435e-06, "loss": 0.3314, "step": 38070 }, { "epoch": 0.7751653944020356, "grad_norm": 7.518044612902989, "learning_rate": 9.90803209201823e-06, "loss": 0.2085, "step": 38080 }, { "epoch": 0.7753689567430025, "grad_norm": 8.513996456170755, "learning_rate": 9.907896383761996e-06, "loss": 0.3106, "step": 38090 }, { "epoch": 0.7755725190839695, "grad_norm": 28.246473327364168, "learning_rate": 9.907760576384475e-06, "loss": 0.3158, "step": 38100 }, { "epoch": 0.7757760814249364, "grad_norm": 4.950814556944682, "learning_rate": 9.907624669888407e-06, "loss": 0.3183, "step": 38110 }, { "epoch": 0.7759796437659033, "grad_norm": 9.700751863921532, "learning_rate": 9.907488664276536e-06, "loss": 0.2303, "step": 38120 }, { "epoch": 0.7761832061068702, "grad_norm": 24.867977035068733, "learning_rate": 9.907352559551613e-06, "loss": 0.2519, "step": 38130 }, { "epoch": 0.7763867684478372, "grad_norm": 14.56047663466368, "learning_rate": 9.907216355716383e-06, "loss": 0.3581, "step": 38140 }, { "epoch": 0.776590330788804, "grad_norm": 18.312736230454973, "learning_rate": 9.907080052773599e-06, "loss": 0.3735, "step": 38150 }, { "epoch": 0.776793893129771, "grad_norm": 6.361219734994256, "learning_rate": 9.906943650726014e-06, "loss": 0.2374, "step": 38160 }, { "epoch": 0.776997455470738, "grad_norm": 5.733910605110358, "learning_rate": 9.906807149576382e-06, "loss": 0.2685, "step": 38170 }, { "epoch": 0.7772010178117048, "grad_norm": 14.150066947962806, "learning_rate": 9.906670549327455e-06, "loss": 0.4099, "step": 38180 }, { "epoch": 0.7774045801526718, "grad_norm": 10.379971923315706, "learning_rate": 9.906533849982e-06, "loss": 0.3801, "step": 38190 }, { "epoch": 0.7776081424936386, "grad_norm": 3.4944022352313606, "learning_rate": 9.906397051542775e-06, "loss": 0.316, "step": 38200 }, { "epoch": 0.7778117048346056, "grad_norm": 11.795279833426994, "learning_rate": 9.90626015401254e-06, "loss": 0.3688, "step": 38210 }, { "epoch": 0.7780152671755726, "grad_norm": 10.046890937684203, "learning_rate": 9.906123157394066e-06, "loss": 0.2888, "step": 38220 }, { "epoch": 0.7782188295165394, "grad_norm": 5.043032774296467, "learning_rate": 9.905986061690112e-06, "loss": 0.3293, "step": 38230 }, { "epoch": 0.7784223918575064, "grad_norm": 7.889251024627019, "learning_rate": 9.905848866903453e-06, "loss": 0.3823, "step": 38240 }, { "epoch": 0.7786259541984732, "grad_norm": 6.996179012037283, "learning_rate": 9.905711573036856e-06, "loss": 0.3654, "step": 38250 }, { "epoch": 0.7788295165394402, "grad_norm": 6.184356125435579, "learning_rate": 9.905574180093095e-06, "loss": 0.3978, "step": 38260 }, { "epoch": 0.7790330788804071, "grad_norm": 5.326299677968778, "learning_rate": 9.905436688074945e-06, "loss": 0.3043, "step": 38270 }, { "epoch": 0.779236641221374, "grad_norm": 3.9187507002300745, "learning_rate": 9.905299096985185e-06, "loss": 0.296, "step": 38280 }, { "epoch": 0.779440203562341, "grad_norm": 9.2117513142765, "learning_rate": 9.90516140682659e-06, "loss": 0.3408, "step": 38290 }, { "epoch": 0.7796437659033079, "grad_norm": 3.643443359671578, "learning_rate": 9.905023617601943e-06, "loss": 0.2702, "step": 38300 }, { "epoch": 0.7798473282442748, "grad_norm": 10.843435651339853, "learning_rate": 9.904885729314027e-06, "loss": 0.3986, "step": 38310 }, { "epoch": 0.7800508905852417, "grad_norm": 5.979784459359326, "learning_rate": 9.904747741965624e-06, "loss": 0.3181, "step": 38320 }, { "epoch": 0.7802544529262087, "grad_norm": 6.821744694826569, "learning_rate": 9.904609655559525e-06, "loss": 0.263, "step": 38330 }, { "epoch": 0.7804580152671756, "grad_norm": 14.797039111742363, "learning_rate": 9.904471470098517e-06, "loss": 0.281, "step": 38340 }, { "epoch": 0.7806615776081425, "grad_norm": 7.042035938898944, "learning_rate": 9.904333185585391e-06, "loss": 0.3641, "step": 38350 }, { "epoch": 0.7808651399491094, "grad_norm": 8.700454561410115, "learning_rate": 9.904194802022939e-06, "loss": 0.36, "step": 38360 }, { "epoch": 0.7810687022900763, "grad_norm": 6.680933902057667, "learning_rate": 9.904056319413955e-06, "loss": 0.3631, "step": 38370 }, { "epoch": 0.7812722646310433, "grad_norm": 6.458950370356263, "learning_rate": 9.90391773776124e-06, "loss": 0.2973, "step": 38380 }, { "epoch": 0.7814758269720101, "grad_norm": 5.448109718356176, "learning_rate": 9.903779057067589e-06, "loss": 0.2211, "step": 38390 }, { "epoch": 0.7816793893129771, "grad_norm": 7.59261219297353, "learning_rate": 9.903640277335803e-06, "loss": 0.2753, "step": 38400 }, { "epoch": 0.7818829516539441, "grad_norm": 14.11966932701587, "learning_rate": 9.903501398568687e-06, "loss": 0.3568, "step": 38410 }, { "epoch": 0.7820865139949109, "grad_norm": 15.28845726067016, "learning_rate": 9.903362420769043e-06, "loss": 0.3697, "step": 38420 }, { "epoch": 0.7822900763358779, "grad_norm": 5.265543116203267, "learning_rate": 9.903223343939681e-06, "loss": 0.3496, "step": 38430 }, { "epoch": 0.7824936386768447, "grad_norm": 7.768570035415367, "learning_rate": 9.903084168083408e-06, "loss": 0.3184, "step": 38440 }, { "epoch": 0.7826972010178117, "grad_norm": 9.731285918596628, "learning_rate": 9.902944893203034e-06, "loss": 0.3805, "step": 38450 }, { "epoch": 0.7829007633587787, "grad_norm": 8.407550263485929, "learning_rate": 9.902805519301373e-06, "loss": 0.3299, "step": 38460 }, { "epoch": 0.7831043256997455, "grad_norm": 12.370666135365003, "learning_rate": 9.90266604638124e-06, "loss": 0.3142, "step": 38470 }, { "epoch": 0.7833078880407125, "grad_norm": 7.885878281175809, "learning_rate": 9.902526474445453e-06, "loss": 0.2993, "step": 38480 }, { "epoch": 0.7835114503816794, "grad_norm": 13.052642786282323, "learning_rate": 9.902386803496828e-06, "loss": 0.3697, "step": 38490 }, { "epoch": 0.7837150127226463, "grad_norm": 7.046917189855229, "learning_rate": 9.902247033538188e-06, "loss": 0.3275, "step": 38500 }, { "epoch": 0.7839185750636132, "grad_norm": 8.667426933929335, "learning_rate": 9.902107164572356e-06, "loss": 0.2531, "step": 38510 }, { "epoch": 0.7841221374045801, "grad_norm": 6.442193597825729, "learning_rate": 9.901967196602153e-06, "loss": 0.275, "step": 38520 }, { "epoch": 0.7843256997455471, "grad_norm": 11.155152680605367, "learning_rate": 9.901827129630413e-06, "loss": 0.2754, "step": 38530 }, { "epoch": 0.784529262086514, "grad_norm": 8.321008458847178, "learning_rate": 9.901686963659957e-06, "loss": 0.2746, "step": 38540 }, { "epoch": 0.7847328244274809, "grad_norm": 10.295077948539632, "learning_rate": 9.90154669869362e-06, "loss": 0.4, "step": 38550 }, { "epoch": 0.7849363867684478, "grad_norm": 12.324414604025186, "learning_rate": 9.901406334734235e-06, "loss": 0.4398, "step": 38560 }, { "epoch": 0.7851399491094148, "grad_norm": 8.971807010516219, "learning_rate": 9.901265871784636e-06, "loss": 0.3405, "step": 38570 }, { "epoch": 0.7853435114503817, "grad_norm": 8.886823685835212, "learning_rate": 9.901125309847662e-06, "loss": 0.3229, "step": 38580 }, { "epoch": 0.7855470737913486, "grad_norm": 8.84582172516386, "learning_rate": 9.900984648926146e-06, "loss": 0.3013, "step": 38590 }, { "epoch": 0.7857506361323155, "grad_norm": 7.341923485817122, "learning_rate": 9.900843889022935e-06, "loss": 0.3124, "step": 38600 }, { "epoch": 0.7859541984732824, "grad_norm": 14.770126791439807, "learning_rate": 9.900703030140868e-06, "loss": 0.2591, "step": 38610 }, { "epoch": 0.7861577608142494, "grad_norm": 17.300146882506873, "learning_rate": 9.900562072282794e-06, "loss": 0.2924, "step": 38620 }, { "epoch": 0.7863613231552162, "grad_norm": 6.515013976583222, "learning_rate": 9.900421015451553e-06, "loss": 0.2782, "step": 38630 }, { "epoch": 0.7865648854961832, "grad_norm": 3.5874916596262914, "learning_rate": 9.900279859650001e-06, "loss": 0.3386, "step": 38640 }, { "epoch": 0.7867684478371502, "grad_norm": 20.176311927925507, "learning_rate": 9.900138604880984e-06, "loss": 0.3082, "step": 38650 }, { "epoch": 0.786972010178117, "grad_norm": 19.645851880545393, "learning_rate": 9.899997251147358e-06, "loss": 0.3619, "step": 38660 }, { "epoch": 0.787175572519084, "grad_norm": 15.93030722999231, "learning_rate": 9.899855798451976e-06, "loss": 0.3817, "step": 38670 }, { "epoch": 0.7873791348600508, "grad_norm": 5.160662111760048, "learning_rate": 9.899714246797694e-06, "loss": 0.3491, "step": 38680 }, { "epoch": 0.7875826972010178, "grad_norm": 7.201696780601135, "learning_rate": 9.899572596187371e-06, "loss": 0.3229, "step": 38690 }, { "epoch": 0.7877862595419848, "grad_norm": 9.686293932269248, "learning_rate": 9.899430846623872e-06, "loss": 0.3518, "step": 38700 }, { "epoch": 0.7879898218829516, "grad_norm": 6.945264978593751, "learning_rate": 9.899288998110056e-06, "loss": 0.2807, "step": 38710 }, { "epoch": 0.7881933842239186, "grad_norm": 4.297990701861889, "learning_rate": 9.899147050648787e-06, "loss": 0.3169, "step": 38720 }, { "epoch": 0.7883969465648855, "grad_norm": 13.066637779027825, "learning_rate": 9.899005004242934e-06, "loss": 0.3619, "step": 38730 }, { "epoch": 0.7886005089058524, "grad_norm": 5.163891793321988, "learning_rate": 9.898862858895366e-06, "loss": 0.3717, "step": 38740 }, { "epoch": 0.7888040712468194, "grad_norm": 13.372939341817666, "learning_rate": 9.898720614608952e-06, "loss": 0.2921, "step": 38750 }, { "epoch": 0.7890076335877863, "grad_norm": 6.629685900540075, "learning_rate": 9.898578271386565e-06, "loss": 0.2927, "step": 38760 }, { "epoch": 0.7892111959287532, "grad_norm": 3.2259429533885875, "learning_rate": 9.898435829231082e-06, "loss": 0.2593, "step": 38770 }, { "epoch": 0.7894147582697201, "grad_norm": 6.77700657075273, "learning_rate": 9.898293288145376e-06, "loss": 0.2989, "step": 38780 }, { "epoch": 0.789618320610687, "grad_norm": 5.1353673624909835, "learning_rate": 9.89815064813233e-06, "loss": 0.2871, "step": 38790 }, { "epoch": 0.7898218829516539, "grad_norm": 7.144817653664147, "learning_rate": 9.898007909194822e-06, "loss": 0.4448, "step": 38800 }, { "epoch": 0.7900254452926209, "grad_norm": 4.482389854376913, "learning_rate": 9.897865071335736e-06, "loss": 0.3048, "step": 38810 }, { "epoch": 0.7902290076335878, "grad_norm": 9.490692455313777, "learning_rate": 9.897722134557957e-06, "loss": 0.302, "step": 38820 }, { "epoch": 0.7904325699745547, "grad_norm": 5.307691352629594, "learning_rate": 9.89757909886437e-06, "loss": 0.2212, "step": 38830 }, { "epoch": 0.7906361323155217, "grad_norm": 7.094091955115213, "learning_rate": 9.897435964257867e-06, "loss": 0.2883, "step": 38840 }, { "epoch": 0.7908396946564885, "grad_norm": 19.040566535636117, "learning_rate": 9.897292730741335e-06, "loss": 0.3022, "step": 38850 }, { "epoch": 0.7910432569974555, "grad_norm": 16.508039696927238, "learning_rate": 9.89714939831767e-06, "loss": 0.365, "step": 38860 }, { "epoch": 0.7912468193384224, "grad_norm": 12.702592987225861, "learning_rate": 9.897005966989764e-06, "loss": 0.2572, "step": 38870 }, { "epoch": 0.7914503816793893, "grad_norm": 7.296603183935482, "learning_rate": 9.896862436760516e-06, "loss": 0.2594, "step": 38880 }, { "epoch": 0.7916539440203563, "grad_norm": 13.602140621031454, "learning_rate": 9.896718807632824e-06, "loss": 0.3777, "step": 38890 }, { "epoch": 0.7918575063613231, "grad_norm": 6.384774047095729, "learning_rate": 9.896575079609589e-06, "loss": 0.3682, "step": 38900 }, { "epoch": 0.7920610687022901, "grad_norm": 4.646760601941956, "learning_rate": 9.896431252693712e-06, "loss": 0.2519, "step": 38910 }, { "epoch": 0.792264631043257, "grad_norm": 8.29583124093012, "learning_rate": 9.896287326888102e-06, "loss": 0.274, "step": 38920 }, { "epoch": 0.7924681933842239, "grad_norm": 8.812140024834175, "learning_rate": 9.896143302195661e-06, "loss": 0.1957, "step": 38930 }, { "epoch": 0.7926717557251909, "grad_norm": 24.59868182137366, "learning_rate": 9.8959991786193e-06, "loss": 0.4213, "step": 38940 }, { "epoch": 0.7928753180661577, "grad_norm": 5.02908895422055, "learning_rate": 9.89585495616193e-06, "loss": 0.289, "step": 38950 }, { "epoch": 0.7930788804071247, "grad_norm": 11.451499303586182, "learning_rate": 9.895710634826464e-06, "loss": 0.3071, "step": 38960 }, { "epoch": 0.7932824427480916, "grad_norm": 11.305083310951309, "learning_rate": 9.895566214615815e-06, "loss": 0.2898, "step": 38970 }, { "epoch": 0.7934860050890585, "grad_norm": 6.579645426900936, "learning_rate": 9.895421695532901e-06, "loss": 0.2861, "step": 38980 }, { "epoch": 0.7936895674300255, "grad_norm": 10.929760429469964, "learning_rate": 9.895277077580642e-06, "loss": 0.3208, "step": 38990 }, { "epoch": 0.7938931297709924, "grad_norm": 7.503412856611652, "learning_rate": 9.895132360761956e-06, "loss": 0.2878, "step": 39000 }, { "epoch": 0.7940966921119593, "grad_norm": 9.008494530961773, "learning_rate": 9.894987545079768e-06, "loss": 0.2336, "step": 39010 }, { "epoch": 0.7943002544529262, "grad_norm": 5.72499433424437, "learning_rate": 9.894842630537e-06, "loss": 0.3453, "step": 39020 }, { "epoch": 0.7945038167938931, "grad_norm": 8.265270598384339, "learning_rate": 9.894697617136581e-06, "loss": 0.1767, "step": 39030 }, { "epoch": 0.79470737913486, "grad_norm": 9.319131013496424, "learning_rate": 9.894552504881441e-06, "loss": 0.3948, "step": 39040 }, { "epoch": 0.794910941475827, "grad_norm": 9.865933262939897, "learning_rate": 9.894407293774508e-06, "loss": 0.2986, "step": 39050 }, { "epoch": 0.7951145038167939, "grad_norm": 7.725111769089984, "learning_rate": 9.894261983818717e-06, "loss": 0.315, "step": 39060 }, { "epoch": 0.7953180661577608, "grad_norm": 6.121034091290769, "learning_rate": 9.894116575017e-06, "loss": 0.2908, "step": 39070 }, { "epoch": 0.7955216284987278, "grad_norm": 19.50661830076292, "learning_rate": 9.893971067372294e-06, "loss": 0.3956, "step": 39080 }, { "epoch": 0.7957251908396946, "grad_norm": 14.794451152840384, "learning_rate": 9.89382546088754e-06, "loss": 0.3766, "step": 39090 }, { "epoch": 0.7959287531806616, "grad_norm": 9.467472939963113, "learning_rate": 9.89367975556568e-06, "loss": 0.2952, "step": 39100 }, { "epoch": 0.7961323155216286, "grad_norm": 9.207198945406942, "learning_rate": 9.89353395140965e-06, "loss": 0.342, "step": 39110 }, { "epoch": 0.7963358778625954, "grad_norm": 6.169590700963754, "learning_rate": 9.893388048422403e-06, "loss": 0.2352, "step": 39120 }, { "epoch": 0.7965394402035624, "grad_norm": 18.0502092198031, "learning_rate": 9.893242046606879e-06, "loss": 0.422, "step": 39130 }, { "epoch": 0.7967430025445292, "grad_norm": 10.96204601508052, "learning_rate": 9.893095945966031e-06, "loss": 0.316, "step": 39140 }, { "epoch": 0.7969465648854962, "grad_norm": 4.787811952551181, "learning_rate": 9.892949746502809e-06, "loss": 0.3559, "step": 39150 }, { "epoch": 0.7971501272264631, "grad_norm": 9.128957059279493, "learning_rate": 9.892803448220164e-06, "loss": 0.234, "step": 39160 }, { "epoch": 0.79735368956743, "grad_norm": 11.35722749230726, "learning_rate": 9.892657051121049e-06, "loss": 0.2454, "step": 39170 }, { "epoch": 0.797557251908397, "grad_norm": 12.93474718835405, "learning_rate": 9.892510555208426e-06, "loss": 0.3812, "step": 39180 }, { "epoch": 0.7977608142493638, "grad_norm": 15.030496315552382, "learning_rate": 9.892363960485249e-06, "loss": 0.3819, "step": 39190 }, { "epoch": 0.7979643765903308, "grad_norm": 6.598583038089509, "learning_rate": 9.892217266954481e-06, "loss": 0.3226, "step": 39200 }, { "epoch": 0.7981679389312977, "grad_norm": 11.347280189764877, "learning_rate": 9.892070474619085e-06, "loss": 0.2511, "step": 39210 }, { "epoch": 0.7983715012722646, "grad_norm": 5.1577405767653275, "learning_rate": 9.891923583482024e-06, "loss": 0.3606, "step": 39220 }, { "epoch": 0.7985750636132316, "grad_norm": 8.443252557235809, "learning_rate": 9.891776593546266e-06, "loss": 0.3706, "step": 39230 }, { "epoch": 0.7987786259541985, "grad_norm": 6.921950486640489, "learning_rate": 9.891629504814779e-06, "loss": 0.2693, "step": 39240 }, { "epoch": 0.7989821882951654, "grad_norm": 2.7074389381627313, "learning_rate": 9.891482317290533e-06, "loss": 0.3002, "step": 39250 }, { "epoch": 0.7991857506361323, "grad_norm": 12.020422426228945, "learning_rate": 9.891335030976503e-06, "loss": 0.2713, "step": 39260 }, { "epoch": 0.7993893129770993, "grad_norm": 8.901107453902082, "learning_rate": 9.891187645875662e-06, "loss": 0.2984, "step": 39270 }, { "epoch": 0.7995928753180661, "grad_norm": 0.5063122233991338, "learning_rate": 9.891040161990985e-06, "loss": 0.2827, "step": 39280 }, { "epoch": 0.7997964376590331, "grad_norm": 11.718570629277359, "learning_rate": 9.890892579325454e-06, "loss": 0.3076, "step": 39290 }, { "epoch": 0.8, "grad_norm": 12.153343300383206, "learning_rate": 9.890744897882048e-06, "loss": 0.2891, "step": 39300 }, { "epoch": 0.8002035623409669, "grad_norm": 16.16623877778622, "learning_rate": 9.890597117663747e-06, "loss": 0.4794, "step": 39310 }, { "epoch": 0.8004071246819339, "grad_norm": 7.910505939836907, "learning_rate": 9.89044923867354e-06, "loss": 0.3309, "step": 39320 }, { "epoch": 0.8006106870229007, "grad_norm": 6.992885292099223, "learning_rate": 9.890301260914412e-06, "loss": 0.3398, "step": 39330 }, { "epoch": 0.8008142493638677, "grad_norm": 11.649723002606143, "learning_rate": 9.89015318438935e-06, "loss": 0.3471, "step": 39340 }, { "epoch": 0.8010178117048347, "grad_norm": 5.3559820723671345, "learning_rate": 9.89000500910135e-06, "loss": 0.3444, "step": 39350 }, { "epoch": 0.8012213740458015, "grad_norm": 6.059112099797759, "learning_rate": 9.889856735053396e-06, "loss": 0.2356, "step": 39360 }, { "epoch": 0.8014249363867685, "grad_norm": 9.74382282999558, "learning_rate": 9.88970836224849e-06, "loss": 0.3169, "step": 39370 }, { "epoch": 0.8016284987277353, "grad_norm": 6.116503525691733, "learning_rate": 9.889559890689622e-06, "loss": 0.3607, "step": 39380 }, { "epoch": 0.8018320610687023, "grad_norm": 8.376672358320661, "learning_rate": 9.889411320379798e-06, "loss": 0.3275, "step": 39390 }, { "epoch": 0.8020356234096692, "grad_norm": 6.150784193523734, "learning_rate": 9.889262651322014e-06, "loss": 0.3332, "step": 39400 }, { "epoch": 0.8022391857506361, "grad_norm": 7.6645418369008445, "learning_rate": 9.889113883519273e-06, "loss": 0.3462, "step": 39410 }, { "epoch": 0.8024427480916031, "grad_norm": 12.500291612450678, "learning_rate": 9.888965016974578e-06, "loss": 0.2729, "step": 39420 }, { "epoch": 0.80264631043257, "grad_norm": 7.292630328704965, "learning_rate": 9.88881605169094e-06, "loss": 0.2209, "step": 39430 }, { "epoch": 0.8028498727735369, "grad_norm": 6.21310983765661, "learning_rate": 9.888666987671366e-06, "loss": 0.283, "step": 39440 }, { "epoch": 0.8030534351145038, "grad_norm": 8.90420612705716, "learning_rate": 9.888517824918864e-06, "loss": 0.2678, "step": 39450 }, { "epoch": 0.8032569974554707, "grad_norm": 8.437216373379407, "learning_rate": 9.888368563436448e-06, "loss": 0.3105, "step": 39460 }, { "epoch": 0.8034605597964377, "grad_norm": 5.08638588242629, "learning_rate": 9.888219203227134e-06, "loss": 0.2973, "step": 39470 }, { "epoch": 0.8036641221374046, "grad_norm": 9.648216962508739, "learning_rate": 9.888069744293935e-06, "loss": 0.383, "step": 39480 }, { "epoch": 0.8038676844783715, "grad_norm": 15.014538639320293, "learning_rate": 9.887920186639875e-06, "loss": 0.3227, "step": 39490 }, { "epoch": 0.8040712468193384, "grad_norm": 6.0763370843054245, "learning_rate": 9.887770530267968e-06, "loss": 0.249, "step": 39500 }, { "epoch": 0.8042748091603054, "grad_norm": 8.366092201717704, "learning_rate": 9.887620775181241e-06, "loss": 0.34, "step": 39510 }, { "epoch": 0.8044783715012722, "grad_norm": 5.4219192117732655, "learning_rate": 9.887470921382719e-06, "loss": 0.2711, "step": 39520 }, { "epoch": 0.8046819338422392, "grad_norm": 9.905881848829699, "learning_rate": 9.887320968875424e-06, "loss": 0.3867, "step": 39530 }, { "epoch": 0.8048854961832062, "grad_norm": 4.336663912245824, "learning_rate": 9.887170917662387e-06, "loss": 0.3364, "step": 39540 }, { "epoch": 0.805089058524173, "grad_norm": 10.247201666658103, "learning_rate": 9.887020767746642e-06, "loss": 0.3085, "step": 39550 }, { "epoch": 0.80529262086514, "grad_norm": 5.740736765245323, "learning_rate": 9.886870519131216e-06, "loss": 0.286, "step": 39560 }, { "epoch": 0.8054961832061068, "grad_norm": 5.375632293022026, "learning_rate": 9.886720171819145e-06, "loss": 0.2731, "step": 39570 }, { "epoch": 0.8056997455470738, "grad_norm": 0.42514983747146284, "learning_rate": 9.886569725813466e-06, "loss": 0.2731, "step": 39580 }, { "epoch": 0.8059033078880408, "grad_norm": 4.976210648379692, "learning_rate": 9.886419181117219e-06, "loss": 0.2548, "step": 39590 }, { "epoch": 0.8061068702290076, "grad_norm": 8.908582479990597, "learning_rate": 9.886268537733444e-06, "loss": 0.3508, "step": 39600 }, { "epoch": 0.8063104325699746, "grad_norm": 7.228452270979971, "learning_rate": 9.88611779566518e-06, "loss": 0.2389, "step": 39610 }, { "epoch": 0.8065139949109414, "grad_norm": 6.232263605523468, "learning_rate": 9.885966954915473e-06, "loss": 0.3343, "step": 39620 }, { "epoch": 0.8067175572519084, "grad_norm": 8.065410391805193, "learning_rate": 9.885816015487372e-06, "loss": 0.366, "step": 39630 }, { "epoch": 0.8069211195928753, "grad_norm": 7.727191623144984, "learning_rate": 9.885664977383924e-06, "loss": 0.4617, "step": 39640 }, { "epoch": 0.8071246819338422, "grad_norm": 7.561748502534497, "learning_rate": 9.885513840608178e-06, "loss": 0.2855, "step": 39650 }, { "epoch": 0.8073282442748092, "grad_norm": 5.951401460628014, "learning_rate": 9.885362605163189e-06, "loss": 0.3131, "step": 39660 }, { "epoch": 0.8075318066157761, "grad_norm": 11.806135555858, "learning_rate": 9.885211271052008e-06, "loss": 0.3184, "step": 39670 }, { "epoch": 0.807735368956743, "grad_norm": 5.43499181009953, "learning_rate": 9.885059838277695e-06, "loss": 0.2581, "step": 39680 }, { "epoch": 0.8079389312977099, "grad_norm": 9.246958868918842, "learning_rate": 9.884908306843305e-06, "loss": 0.3381, "step": 39690 }, { "epoch": 0.8081424936386769, "grad_norm": 5.967890958119301, "learning_rate": 9.884756676751902e-06, "loss": 0.2622, "step": 39700 }, { "epoch": 0.8083460559796438, "grad_norm": 5.47413873425914, "learning_rate": 9.884604948006545e-06, "loss": 0.212, "step": 39710 }, { "epoch": 0.8085496183206107, "grad_norm": 13.783634274361646, "learning_rate": 9.8844531206103e-06, "loss": 0.3462, "step": 39720 }, { "epoch": 0.8087531806615776, "grad_norm": 4.964298732765868, "learning_rate": 9.884301194566233e-06, "loss": 0.3479, "step": 39730 }, { "epoch": 0.8089567430025445, "grad_norm": 6.906556797364551, "learning_rate": 9.884149169877413e-06, "loss": 0.3729, "step": 39740 }, { "epoch": 0.8091603053435115, "grad_norm": 10.206978478062789, "learning_rate": 9.883997046546909e-06, "loss": 0.4085, "step": 39750 }, { "epoch": 0.8093638676844783, "grad_norm": 9.255886750864851, "learning_rate": 9.883844824577794e-06, "loss": 0.2667, "step": 39760 }, { "epoch": 0.8095674300254453, "grad_norm": 4.224247901666152, "learning_rate": 9.883692503973145e-06, "loss": 0.2605, "step": 39770 }, { "epoch": 0.8097709923664123, "grad_norm": 4.375519844087032, "learning_rate": 9.883540084736033e-06, "loss": 0.3082, "step": 39780 }, { "epoch": 0.8099745547073791, "grad_norm": 11.207716895209408, "learning_rate": 9.88338756686954e-06, "loss": 0.3882, "step": 39790 }, { "epoch": 0.8101781170483461, "grad_norm": 5.5972997454326965, "learning_rate": 9.883234950376745e-06, "loss": 0.3317, "step": 39800 }, { "epoch": 0.8103816793893129, "grad_norm": 4.1792192484602655, "learning_rate": 9.88308223526073e-06, "loss": 0.3467, "step": 39810 }, { "epoch": 0.8105852417302799, "grad_norm": 7.695048263393507, "learning_rate": 9.88292942152458e-06, "loss": 0.2898, "step": 39820 }, { "epoch": 0.8107888040712469, "grad_norm": 8.880275385947009, "learning_rate": 9.882776509171383e-06, "loss": 0.3292, "step": 39830 }, { "epoch": 0.8109923664122137, "grad_norm": 12.10643943063623, "learning_rate": 9.882623498204224e-06, "loss": 0.3103, "step": 39840 }, { "epoch": 0.8111959287531807, "grad_norm": 17.531633097136233, "learning_rate": 9.882470388626196e-06, "loss": 0.2485, "step": 39850 }, { "epoch": 0.8113994910941476, "grad_norm": 7.59470163237029, "learning_rate": 9.882317180440388e-06, "loss": 0.4226, "step": 39860 }, { "epoch": 0.8116030534351145, "grad_norm": 21.78338092841653, "learning_rate": 9.882163873649898e-06, "loss": 0.2675, "step": 39870 }, { "epoch": 0.8118066157760814, "grad_norm": 4.76404636594171, "learning_rate": 9.882010468257818e-06, "loss": 0.2868, "step": 39880 }, { "epoch": 0.8120101781170483, "grad_norm": 10.330810254974354, "learning_rate": 9.88185696426725e-06, "loss": 0.3131, "step": 39890 }, { "epoch": 0.8122137404580153, "grad_norm": 7.332475522688846, "learning_rate": 9.881703361681292e-06, "loss": 0.3719, "step": 39900 }, { "epoch": 0.8124173027989822, "grad_norm": 15.949003190058903, "learning_rate": 9.881549660503047e-06, "loss": 0.2972, "step": 39910 }, { "epoch": 0.8126208651399491, "grad_norm": 9.800708103002417, "learning_rate": 9.88139586073562e-06, "loss": 0.3589, "step": 39920 }, { "epoch": 0.812824427480916, "grad_norm": 9.841938297844697, "learning_rate": 9.881241962382118e-06, "loss": 0.3145, "step": 39930 }, { "epoch": 0.813027989821883, "grad_norm": 9.154199622046953, "learning_rate": 9.881087965445646e-06, "loss": 0.3381, "step": 39940 }, { "epoch": 0.8132315521628499, "grad_norm": 6.070390359156888, "learning_rate": 9.880933869929316e-06, "loss": 0.369, "step": 39950 }, { "epoch": 0.8134351145038168, "grad_norm": 10.274244908092456, "learning_rate": 9.880779675836238e-06, "loss": 0.3377, "step": 39960 }, { "epoch": 0.8136386768447837, "grad_norm": 9.208055737786994, "learning_rate": 9.880625383169531e-06, "loss": 0.3182, "step": 39970 }, { "epoch": 0.8138422391857506, "grad_norm": 9.38496983995532, "learning_rate": 9.880470991932308e-06, "loss": 0.3154, "step": 39980 }, { "epoch": 0.8140458015267176, "grad_norm": 9.947867593342455, "learning_rate": 9.880316502127685e-06, "loss": 0.3488, "step": 39990 }, { "epoch": 0.8142493638676844, "grad_norm": 3.2574397643868394, "learning_rate": 9.880161913758785e-06, "loss": 0.3161, "step": 40000 }, { "epoch": 0.8144529262086514, "grad_norm": 10.276703769241703, "learning_rate": 9.880007226828731e-06, "loss": 0.2575, "step": 40010 }, { "epoch": 0.8146564885496184, "grad_norm": 8.854468193439525, "learning_rate": 9.879852441340644e-06, "loss": 0.317, "step": 40020 }, { "epoch": 0.8148600508905852, "grad_norm": 13.389299547368083, "learning_rate": 9.879697557297653e-06, "loss": 0.2931, "step": 40030 }, { "epoch": 0.8150636132315522, "grad_norm": 8.615219022517758, "learning_rate": 9.879542574702885e-06, "loss": 0.2923, "step": 40040 }, { "epoch": 0.815267175572519, "grad_norm": 5.9420287634382, "learning_rate": 9.87938749355947e-06, "loss": 0.3659, "step": 40050 }, { "epoch": 0.815470737913486, "grad_norm": 13.606369115824181, "learning_rate": 9.879232313870538e-06, "loss": 0.2557, "step": 40060 }, { "epoch": 0.815674300254453, "grad_norm": 13.91541204811299, "learning_rate": 9.879077035639227e-06, "loss": 0.3375, "step": 40070 }, { "epoch": 0.8158778625954198, "grad_norm": 2.465564954551788, "learning_rate": 9.87892165886867e-06, "loss": 0.293, "step": 40080 }, { "epoch": 0.8160814249363868, "grad_norm": 23.255979152437334, "learning_rate": 9.878766183562006e-06, "loss": 0.4216, "step": 40090 }, { "epoch": 0.8162849872773537, "grad_norm": 9.30599796125056, "learning_rate": 9.878610609722376e-06, "loss": 0.282, "step": 40100 }, { "epoch": 0.8164885496183206, "grad_norm": 6.180603161551195, "learning_rate": 9.87845493735292e-06, "loss": 0.3145, "step": 40110 }, { "epoch": 0.8166921119592875, "grad_norm": 8.413315546073102, "learning_rate": 9.878299166456784e-06, "loss": 0.3014, "step": 40120 }, { "epoch": 0.8168956743002544, "grad_norm": 5.337333396898729, "learning_rate": 9.878143297037113e-06, "loss": 0.2972, "step": 40130 }, { "epoch": 0.8170992366412214, "grad_norm": 8.94113795453275, "learning_rate": 9.877987329097056e-06, "loss": 0.3252, "step": 40140 }, { "epoch": 0.8173027989821883, "grad_norm": 9.303619726552615, "learning_rate": 9.87783126263976e-06, "loss": 0.3746, "step": 40150 }, { "epoch": 0.8175063613231552, "grad_norm": 13.870415095904235, "learning_rate": 9.87767509766838e-06, "loss": 0.3328, "step": 40160 }, { "epoch": 0.8177099236641221, "grad_norm": 4.603648251874185, "learning_rate": 9.877518834186069e-06, "loss": 0.3727, "step": 40170 }, { "epoch": 0.8179134860050891, "grad_norm": 13.00540358752359, "learning_rate": 9.877362472195983e-06, "loss": 0.2735, "step": 40180 }, { "epoch": 0.818117048346056, "grad_norm": 8.24216822159013, "learning_rate": 9.877206011701281e-06, "loss": 0.3659, "step": 40190 }, { "epoch": 0.8183206106870229, "grad_norm": 10.198735030084508, "learning_rate": 9.87704945270512e-06, "loss": 0.3102, "step": 40200 }, { "epoch": 0.8185241730279899, "grad_norm": 8.189109525879843, "learning_rate": 9.876892795210665e-06, "loss": 0.3461, "step": 40210 }, { "epoch": 0.8187277353689567, "grad_norm": 5.891130593371692, "learning_rate": 9.876736039221078e-06, "loss": 0.3827, "step": 40220 }, { "epoch": 0.8189312977099237, "grad_norm": 9.35870486864774, "learning_rate": 9.876579184739526e-06, "loss": 0.3065, "step": 40230 }, { "epoch": 0.8191348600508905, "grad_norm": 6.833119852781618, "learning_rate": 9.876422231769176e-06, "loss": 0.3636, "step": 40240 }, { "epoch": 0.8193384223918575, "grad_norm": 4.9583412623962495, "learning_rate": 9.876265180313197e-06, "loss": 0.2703, "step": 40250 }, { "epoch": 0.8195419847328245, "grad_norm": 15.869622735722754, "learning_rate": 9.876108030374765e-06, "loss": 0.2139, "step": 40260 }, { "epoch": 0.8197455470737913, "grad_norm": 0.6928718310007096, "learning_rate": 9.875950781957049e-06, "loss": 0.3264, "step": 40270 }, { "epoch": 0.8199491094147583, "grad_norm": 1.7566224366224474, "learning_rate": 9.875793435063227e-06, "loss": 0.3042, "step": 40280 }, { "epoch": 0.8201526717557251, "grad_norm": 3.906661102652854, "learning_rate": 9.875635989696477e-06, "loss": 0.2919, "step": 40290 }, { "epoch": 0.8203562340966921, "grad_norm": 8.669400086305467, "learning_rate": 9.875478445859978e-06, "loss": 0.3239, "step": 40300 }, { "epoch": 0.8205597964376591, "grad_norm": 10.933993763887083, "learning_rate": 9.875320803556911e-06, "loss": 0.2913, "step": 40310 }, { "epoch": 0.8207633587786259, "grad_norm": 13.79703976807115, "learning_rate": 9.875163062790463e-06, "loss": 0.2619, "step": 40320 }, { "epoch": 0.8209669211195929, "grad_norm": 16.999100355453546, "learning_rate": 9.875005223563818e-06, "loss": 0.2706, "step": 40330 }, { "epoch": 0.8211704834605598, "grad_norm": 3.9159075451183982, "learning_rate": 9.874847285880163e-06, "loss": 0.2665, "step": 40340 }, { "epoch": 0.8213740458015267, "grad_norm": 10.799256823737116, "learning_rate": 9.874689249742688e-06, "loss": 0.3063, "step": 40350 }, { "epoch": 0.8215776081424936, "grad_norm": 10.434954882016983, "learning_rate": 9.874531115154585e-06, "loss": 0.403, "step": 40360 }, { "epoch": 0.8217811704834606, "grad_norm": 3.4253159344830855, "learning_rate": 9.874372882119047e-06, "loss": 0.2597, "step": 40370 }, { "epoch": 0.8219847328244275, "grad_norm": 14.654132582385534, "learning_rate": 9.874214550639271e-06, "loss": 0.2802, "step": 40380 }, { "epoch": 0.8221882951653944, "grad_norm": 0.3113792905105509, "learning_rate": 9.874056120718455e-06, "loss": 0.2812, "step": 40390 }, { "epoch": 0.8223918575063613, "grad_norm": 10.910903770781209, "learning_rate": 9.873897592359799e-06, "loss": 0.3842, "step": 40400 }, { "epoch": 0.8225954198473282, "grad_norm": 22.555061193609113, "learning_rate": 9.873738965566501e-06, "loss": 0.2856, "step": 40410 }, { "epoch": 0.8227989821882952, "grad_norm": 11.76132970152167, "learning_rate": 9.873580240341769e-06, "loss": 0.2625, "step": 40420 }, { "epoch": 0.8230025445292621, "grad_norm": 3.4260916123742855, "learning_rate": 9.873421416688804e-06, "loss": 0.2984, "step": 40430 }, { "epoch": 0.823206106870229, "grad_norm": 22.015331243069994, "learning_rate": 9.87326249461082e-06, "loss": 0.3379, "step": 40440 }, { "epoch": 0.823409669211196, "grad_norm": 8.16197423939045, "learning_rate": 9.87310347411102e-06, "loss": 0.2768, "step": 40450 }, { "epoch": 0.8236132315521628, "grad_norm": 7.645497298454068, "learning_rate": 9.872944355192621e-06, "loss": 0.2938, "step": 40460 }, { "epoch": 0.8238167938931298, "grad_norm": 0.028331327728851944, "learning_rate": 9.872785137858836e-06, "loss": 0.2652, "step": 40470 }, { "epoch": 0.8240203562340966, "grad_norm": 16.529613520522023, "learning_rate": 9.872625822112876e-06, "loss": 0.2864, "step": 40480 }, { "epoch": 0.8242239185750636, "grad_norm": 9.550247692115251, "learning_rate": 9.872466407957963e-06, "loss": 0.2415, "step": 40490 }, { "epoch": 0.8244274809160306, "grad_norm": 8.785351894557554, "learning_rate": 9.872306895397317e-06, "loss": 0.3133, "step": 40500 }, { "epoch": 0.8246310432569974, "grad_norm": 4.7558380367368525, "learning_rate": 9.872147284434155e-06, "loss": 0.3558, "step": 40510 }, { "epoch": 0.8248346055979644, "grad_norm": 2.896778607007635, "learning_rate": 9.871987575071704e-06, "loss": 0.3091, "step": 40520 }, { "epoch": 0.8250381679389313, "grad_norm": 2.888831488522856, "learning_rate": 9.87182776731319e-06, "loss": 0.3108, "step": 40530 }, { "epoch": 0.8252417302798982, "grad_norm": 4.596906738820908, "learning_rate": 9.871667861161838e-06, "loss": 0.2688, "step": 40540 }, { "epoch": 0.8254452926208652, "grad_norm": 4.128484479908781, "learning_rate": 9.871507856620879e-06, "loss": 0.273, "step": 40550 }, { "epoch": 0.825648854961832, "grad_norm": 6.315662934918145, "learning_rate": 9.871347753693544e-06, "loss": 0.2945, "step": 40560 }, { "epoch": 0.825852417302799, "grad_norm": 4.252236330421429, "learning_rate": 9.871187552383067e-06, "loss": 0.2136, "step": 40570 }, { "epoch": 0.8260559796437659, "grad_norm": 0.9277880970357927, "learning_rate": 9.871027252692684e-06, "loss": 0.3042, "step": 40580 }, { "epoch": 0.8262595419847328, "grad_norm": 8.315261276751933, "learning_rate": 9.870866854625633e-06, "loss": 0.2208, "step": 40590 }, { "epoch": 0.8264631043256997, "grad_norm": 11.453813786420667, "learning_rate": 9.870706358185151e-06, "loss": 0.2892, "step": 40600 }, { "epoch": 0.8266666666666667, "grad_norm": 13.218651294987211, "learning_rate": 9.870545763374479e-06, "loss": 0.2095, "step": 40610 }, { "epoch": 0.8268702290076336, "grad_norm": 20.00814479583112, "learning_rate": 9.870385070196865e-06, "loss": 0.3101, "step": 40620 }, { "epoch": 0.8270737913486005, "grad_norm": 17.508009130377904, "learning_rate": 9.87022427865555e-06, "loss": 0.3704, "step": 40630 }, { "epoch": 0.8272773536895675, "grad_norm": 5.855921621648579, "learning_rate": 9.870063388753783e-06, "loss": 0.3158, "step": 40640 }, { "epoch": 0.8274809160305343, "grad_norm": 8.169101765704161, "learning_rate": 9.869902400494813e-06, "loss": 0.3645, "step": 40650 }, { "epoch": 0.8276844783715013, "grad_norm": 5.5543165876638945, "learning_rate": 9.869741313881893e-06, "loss": 0.274, "step": 40660 }, { "epoch": 0.8278880407124682, "grad_norm": 10.615040025197096, "learning_rate": 9.869580128918274e-06, "loss": 0.2447, "step": 40670 }, { "epoch": 0.8280916030534351, "grad_norm": 13.470924758989028, "learning_rate": 9.869418845607211e-06, "loss": 0.3312, "step": 40680 }, { "epoch": 0.8282951653944021, "grad_norm": 27.716597952310845, "learning_rate": 9.869257463951964e-06, "loss": 0.3092, "step": 40690 }, { "epoch": 0.8284987277353689, "grad_norm": 5.162953654360446, "learning_rate": 9.869095983955792e-06, "loss": 0.3091, "step": 40700 }, { "epoch": 0.8287022900763359, "grad_norm": 9.539634416815316, "learning_rate": 9.868934405621953e-06, "loss": 0.2992, "step": 40710 }, { "epoch": 0.8289058524173027, "grad_norm": 3.7810553622270984, "learning_rate": 9.868772728953715e-06, "loss": 0.2507, "step": 40720 }, { "epoch": 0.8291094147582697, "grad_norm": 11.311305568008331, "learning_rate": 9.868610953954338e-06, "loss": 0.3129, "step": 40730 }, { "epoch": 0.8293129770992367, "grad_norm": 11.209109923762338, "learning_rate": 9.868449080627094e-06, "loss": 0.3479, "step": 40740 }, { "epoch": 0.8295165394402035, "grad_norm": 6.977922425054789, "learning_rate": 9.86828710897525e-06, "loss": 0.3291, "step": 40750 }, { "epoch": 0.8297201017811705, "grad_norm": 8.7113893974018, "learning_rate": 9.868125039002077e-06, "loss": 0.2956, "step": 40760 }, { "epoch": 0.8299236641221374, "grad_norm": 8.096389937839787, "learning_rate": 9.867962870710849e-06, "loss": 0.305, "step": 40770 }, { "epoch": 0.8301272264631043, "grad_norm": 11.398801369371604, "learning_rate": 9.867800604104842e-06, "loss": 0.2483, "step": 40780 }, { "epoch": 0.8303307888040713, "grad_norm": 12.505252984775202, "learning_rate": 9.867638239187331e-06, "loss": 0.2803, "step": 40790 }, { "epoch": 0.8305343511450382, "grad_norm": 11.831776998323695, "learning_rate": 9.867475775961597e-06, "loss": 0.318, "step": 40800 }, { "epoch": 0.8307379134860051, "grad_norm": 9.899814074437968, "learning_rate": 9.867313214430919e-06, "loss": 0.3524, "step": 40810 }, { "epoch": 0.830941475826972, "grad_norm": 2.070263044809452, "learning_rate": 9.867150554598584e-06, "loss": 0.2638, "step": 40820 }, { "epoch": 0.8311450381679389, "grad_norm": 8.144281855994977, "learning_rate": 9.866987796467872e-06, "loss": 0.2738, "step": 40830 }, { "epoch": 0.8313486005089058, "grad_norm": 6.7172932430443835, "learning_rate": 9.866824940042075e-06, "loss": 0.3281, "step": 40840 }, { "epoch": 0.8315521628498728, "grad_norm": 4.656290827555206, "learning_rate": 9.866661985324478e-06, "loss": 0.3112, "step": 40850 }, { "epoch": 0.8317557251908397, "grad_norm": 7.1809192825970705, "learning_rate": 9.866498932318375e-06, "loss": 0.3344, "step": 40860 }, { "epoch": 0.8319592875318066, "grad_norm": 7.235413908767848, "learning_rate": 9.866335781027058e-06, "loss": 0.2647, "step": 40870 }, { "epoch": 0.8321628498727736, "grad_norm": 4.988448963702021, "learning_rate": 9.866172531453822e-06, "loss": 0.3037, "step": 40880 }, { "epoch": 0.8323664122137404, "grad_norm": 7.9455311138397215, "learning_rate": 9.866009183601963e-06, "loss": 0.2774, "step": 40890 }, { "epoch": 0.8325699745547074, "grad_norm": 12.039597417535317, "learning_rate": 9.865845737474785e-06, "loss": 0.2841, "step": 40900 }, { "epoch": 0.8327735368956743, "grad_norm": 7.367663154684688, "learning_rate": 9.86568219307558e-06, "loss": 0.4817, "step": 40910 }, { "epoch": 0.8329770992366412, "grad_norm": 16.737641303137114, "learning_rate": 9.86551855040766e-06, "loss": 0.3427, "step": 40920 }, { "epoch": 0.8331806615776082, "grad_norm": 2.503462689627406, "learning_rate": 9.865354809474325e-06, "loss": 0.2086, "step": 40930 }, { "epoch": 0.833384223918575, "grad_norm": 8.929447426423224, "learning_rate": 9.865190970278881e-06, "loss": 0.2841, "step": 40940 }, { "epoch": 0.833587786259542, "grad_norm": 5.943810284476106, "learning_rate": 9.865027032824643e-06, "loss": 0.3821, "step": 40950 }, { "epoch": 0.8337913486005089, "grad_norm": 9.77828150564337, "learning_rate": 9.864862997114916e-06, "loss": 0.3492, "step": 40960 }, { "epoch": 0.8339949109414758, "grad_norm": 4.943001825977508, "learning_rate": 9.864698863153014e-06, "loss": 0.329, "step": 40970 }, { "epoch": 0.8341984732824428, "grad_norm": 11.424749377059607, "learning_rate": 9.864534630942254e-06, "loss": 0.2457, "step": 40980 }, { "epoch": 0.8344020356234096, "grad_norm": 4.919443742758433, "learning_rate": 9.86437030048595e-06, "loss": 0.1924, "step": 40990 }, { "epoch": 0.8346055979643766, "grad_norm": 9.17769301927163, "learning_rate": 9.864205871787424e-06, "loss": 0.2344, "step": 41000 }, { "epoch": 0.8348091603053435, "grad_norm": 6.05531526079812, "learning_rate": 9.864041344849996e-06, "loss": 0.3768, "step": 41010 }, { "epoch": 0.8350127226463104, "grad_norm": 15.863003705919665, "learning_rate": 9.863876719676988e-06, "loss": 0.2843, "step": 41020 }, { "epoch": 0.8352162849872774, "grad_norm": 12.892521450923642, "learning_rate": 9.863711996271723e-06, "loss": 0.3222, "step": 41030 }, { "epoch": 0.8354198473282443, "grad_norm": 10.16002704993557, "learning_rate": 9.863547174637533e-06, "loss": 0.337, "step": 41040 }, { "epoch": 0.8356234096692112, "grad_norm": 22.493094890062295, "learning_rate": 9.863382254777741e-06, "loss": 0.322, "step": 41050 }, { "epoch": 0.8358269720101781, "grad_norm": 3.676351684087691, "learning_rate": 9.863217236695682e-06, "loss": 0.3379, "step": 41060 }, { "epoch": 0.836030534351145, "grad_norm": 10.309183975460689, "learning_rate": 9.863052120394686e-06, "loss": 0.291, "step": 41070 }, { "epoch": 0.8362340966921119, "grad_norm": 10.629764577591708, "learning_rate": 9.862886905878088e-06, "loss": 0.301, "step": 41080 }, { "epoch": 0.8364376590330789, "grad_norm": 9.481208656193026, "learning_rate": 9.862721593149226e-06, "loss": 0.3871, "step": 41090 }, { "epoch": 0.8366412213740458, "grad_norm": 5.218805790061177, "learning_rate": 9.86255618221144e-06, "loss": 0.3569, "step": 41100 }, { "epoch": 0.8368447837150127, "grad_norm": 10.852370939753065, "learning_rate": 9.862390673068067e-06, "loss": 0.3368, "step": 41110 }, { "epoch": 0.8370483460559797, "grad_norm": 7.6420090430164205, "learning_rate": 9.862225065722454e-06, "loss": 0.2866, "step": 41120 }, { "epoch": 0.8372519083969465, "grad_norm": 9.672395001982784, "learning_rate": 9.862059360177943e-06, "loss": 0.2974, "step": 41130 }, { "epoch": 0.8374554707379135, "grad_norm": 11.940034132323376, "learning_rate": 9.861893556437878e-06, "loss": 0.2959, "step": 41140 }, { "epoch": 0.8376590330788805, "grad_norm": 6.7311618073015165, "learning_rate": 9.861727654505612e-06, "loss": 0.2767, "step": 41150 }, { "epoch": 0.8378625954198473, "grad_norm": 15.608790386179226, "learning_rate": 9.861561654384494e-06, "loss": 0.3427, "step": 41160 }, { "epoch": 0.8380661577608143, "grad_norm": 6.459188572307004, "learning_rate": 9.86139555607788e-06, "loss": 0.2222, "step": 41170 }, { "epoch": 0.8382697201017811, "grad_norm": 8.208028724709864, "learning_rate": 9.861229359589117e-06, "loss": 0.3494, "step": 41180 }, { "epoch": 0.8384732824427481, "grad_norm": 7.801534379743374, "learning_rate": 9.861063064921567e-06, "loss": 0.4139, "step": 41190 }, { "epoch": 0.838676844783715, "grad_norm": 14.164377329227326, "learning_rate": 9.86089667207859e-06, "loss": 0.3891, "step": 41200 }, { "epoch": 0.8388804071246819, "grad_norm": 4.195885194522715, "learning_rate": 9.86073018106354e-06, "loss": 0.3233, "step": 41210 }, { "epoch": 0.8390839694656489, "grad_norm": 3.553474768457429, "learning_rate": 9.860563591879787e-06, "loss": 0.271, "step": 41220 }, { "epoch": 0.8392875318066157, "grad_norm": 26.718756423625226, "learning_rate": 9.86039690453069e-06, "loss": 0.3553, "step": 41230 }, { "epoch": 0.8394910941475827, "grad_norm": 8.242891912855315, "learning_rate": 9.860230119019618e-06, "loss": 0.2892, "step": 41240 }, { "epoch": 0.8396946564885496, "grad_norm": 6.581691569520119, "learning_rate": 9.860063235349937e-06, "loss": 0.2928, "step": 41250 }, { "epoch": 0.8398982188295165, "grad_norm": 4.4361978150043315, "learning_rate": 9.85989625352502e-06, "loss": 0.2407, "step": 41260 }, { "epoch": 0.8401017811704835, "grad_norm": 4.032014704722261, "learning_rate": 9.859729173548241e-06, "loss": 0.2734, "step": 41270 }, { "epoch": 0.8403053435114504, "grad_norm": 3.334020954717441, "learning_rate": 9.85956199542297e-06, "loss": 0.2364, "step": 41280 }, { "epoch": 0.8405089058524173, "grad_norm": 6.757586781229188, "learning_rate": 9.859394719152585e-06, "loss": 0.2757, "step": 41290 }, { "epoch": 0.8407124681933842, "grad_norm": 11.747321327322636, "learning_rate": 9.859227344740466e-06, "loss": 0.3072, "step": 41300 }, { "epoch": 0.8409160305343512, "grad_norm": 11.542588657521895, "learning_rate": 9.859059872189991e-06, "loss": 0.3901, "step": 41310 }, { "epoch": 0.841119592875318, "grad_norm": 8.941388557189931, "learning_rate": 9.858892301504545e-06, "loss": 0.2923, "step": 41320 }, { "epoch": 0.841323155216285, "grad_norm": 6.287983545191491, "learning_rate": 9.85872463268751e-06, "loss": 0.3475, "step": 41330 }, { "epoch": 0.841526717557252, "grad_norm": 5.295438053579413, "learning_rate": 9.858556865742272e-06, "loss": 0.3371, "step": 41340 }, { "epoch": 0.8417302798982188, "grad_norm": 4.894026894701354, "learning_rate": 9.85838900067222e-06, "loss": 0.3189, "step": 41350 }, { "epoch": 0.8419338422391858, "grad_norm": 3.039682232420038, "learning_rate": 9.858221037480748e-06, "loss": 0.3351, "step": 41360 }, { "epoch": 0.8421374045801526, "grad_norm": 7.7576820797251616, "learning_rate": 9.858052976171241e-06, "loss": 0.3617, "step": 41370 }, { "epoch": 0.8423409669211196, "grad_norm": 8.20207410342447, "learning_rate": 9.8578848167471e-06, "loss": 0.2537, "step": 41380 }, { "epoch": 0.8425445292620866, "grad_norm": 6.742121165930639, "learning_rate": 9.857716559211717e-06, "loss": 0.3402, "step": 41390 }, { "epoch": 0.8427480916030534, "grad_norm": 8.778988670797574, "learning_rate": 9.857548203568492e-06, "loss": 0.3799, "step": 41400 }, { "epoch": 0.8429516539440204, "grad_norm": 9.038569135794413, "learning_rate": 9.857379749820823e-06, "loss": 0.4251, "step": 41410 }, { "epoch": 0.8431552162849872, "grad_norm": 7.3440009333176715, "learning_rate": 9.857211197972114e-06, "loss": 0.2925, "step": 41420 }, { "epoch": 0.8433587786259542, "grad_norm": 3.8832345013027907, "learning_rate": 9.85704254802577e-06, "loss": 0.2682, "step": 41430 }, { "epoch": 0.8435623409669211, "grad_norm": 6.104091418877754, "learning_rate": 9.856873799985195e-06, "loss": 0.3037, "step": 41440 }, { "epoch": 0.843765903307888, "grad_norm": 5.293264697708153, "learning_rate": 9.856704953853798e-06, "loss": 0.3175, "step": 41450 }, { "epoch": 0.843969465648855, "grad_norm": 3.384617456251725, "learning_rate": 9.85653600963499e-06, "loss": 0.274, "step": 41460 }, { "epoch": 0.8441730279898219, "grad_norm": 4.925378154286671, "learning_rate": 9.856366967332181e-06, "loss": 0.2314, "step": 41470 }, { "epoch": 0.8443765903307888, "grad_norm": 13.393814564601449, "learning_rate": 9.856197826948787e-06, "loss": 0.3935, "step": 41480 }, { "epoch": 0.8445801526717557, "grad_norm": 5.087342507750495, "learning_rate": 9.856028588488222e-06, "loss": 0.3648, "step": 41490 }, { "epoch": 0.8447837150127226, "grad_norm": 9.648301116927513, "learning_rate": 9.855859251953907e-06, "loss": 0.2449, "step": 41500 }, { "epoch": 0.8449872773536896, "grad_norm": 2.7535830328905813, "learning_rate": 9.855689817349258e-06, "loss": 0.2403, "step": 41510 }, { "epoch": 0.8451908396946565, "grad_norm": 4.043185051170275, "learning_rate": 9.8555202846777e-06, "loss": 0.3688, "step": 41520 }, { "epoch": 0.8453944020356234, "grad_norm": 11.989824294911823, "learning_rate": 9.855350653942656e-06, "loss": 0.3729, "step": 41530 }, { "epoch": 0.8455979643765903, "grad_norm": 12.401733193278464, "learning_rate": 9.855180925147551e-06, "loss": 0.3074, "step": 41540 }, { "epoch": 0.8458015267175573, "grad_norm": 8.50397183444343, "learning_rate": 9.855011098295817e-06, "loss": 0.2505, "step": 41550 }, { "epoch": 0.8460050890585241, "grad_norm": 14.233616050463791, "learning_rate": 9.854841173390878e-06, "loss": 0.4725, "step": 41560 }, { "epoch": 0.8462086513994911, "grad_norm": 7.380308000135858, "learning_rate": 9.854671150436168e-06, "loss": 0.3258, "step": 41570 }, { "epoch": 0.846412213740458, "grad_norm": 10.334767701634718, "learning_rate": 9.854501029435123e-06, "loss": 0.3004, "step": 41580 }, { "epoch": 0.8466157760814249, "grad_norm": 11.333800624017346, "learning_rate": 9.854330810391176e-06, "loss": 0.3702, "step": 41590 }, { "epoch": 0.8468193384223919, "grad_norm": 9.556394533913638, "learning_rate": 9.854160493307765e-06, "loss": 0.3572, "step": 41600 }, { "epoch": 0.8470229007633587, "grad_norm": 9.312279480797331, "learning_rate": 9.853990078188334e-06, "loss": 0.2701, "step": 41610 }, { "epoch": 0.8472264631043257, "grad_norm": 6.732182489289858, "learning_rate": 9.853819565036318e-06, "loss": 0.2947, "step": 41620 }, { "epoch": 0.8474300254452927, "grad_norm": 8.373143540277049, "learning_rate": 9.853648953855165e-06, "loss": 0.336, "step": 41630 }, { "epoch": 0.8476335877862595, "grad_norm": 3.6626999233830815, "learning_rate": 9.853478244648322e-06, "loss": 0.3134, "step": 41640 }, { "epoch": 0.8478371501272265, "grad_norm": 28.826872644741442, "learning_rate": 9.853307437419234e-06, "loss": 0.392, "step": 41650 }, { "epoch": 0.8480407124681933, "grad_norm": 12.01355140496626, "learning_rate": 9.85313653217135e-06, "loss": 0.4199, "step": 41660 }, { "epoch": 0.8482442748091603, "grad_norm": 5.4041930048108515, "learning_rate": 9.852965528908124e-06, "loss": 0.3188, "step": 41670 }, { "epoch": 0.8484478371501272, "grad_norm": 11.123510475672116, "learning_rate": 9.852794427633008e-06, "loss": 0.2657, "step": 41680 }, { "epoch": 0.8486513994910941, "grad_norm": 14.63696539762647, "learning_rate": 9.852623228349458e-06, "loss": 0.4236, "step": 41690 }, { "epoch": 0.8488549618320611, "grad_norm": 9.186593202300381, "learning_rate": 9.852451931060932e-06, "loss": 0.2847, "step": 41700 }, { "epoch": 0.849058524173028, "grad_norm": 5.5439150277662765, "learning_rate": 9.852280535770891e-06, "loss": 0.3293, "step": 41710 }, { "epoch": 0.8492620865139949, "grad_norm": 7.7612066694608, "learning_rate": 9.852109042482791e-06, "loss": 0.2877, "step": 41720 }, { "epoch": 0.8494656488549618, "grad_norm": 6.868795248365345, "learning_rate": 9.851937451200103e-06, "loss": 0.3213, "step": 41730 }, { "epoch": 0.8496692111959288, "grad_norm": 8.161741069518522, "learning_rate": 9.851765761926287e-06, "loss": 0.3087, "step": 41740 }, { "epoch": 0.8498727735368957, "grad_norm": 4.38811421289846, "learning_rate": 9.851593974664813e-06, "loss": 0.2538, "step": 41750 }, { "epoch": 0.8500763358778626, "grad_norm": 8.609331699944317, "learning_rate": 9.851422089419151e-06, "loss": 0.3353, "step": 41760 }, { "epoch": 0.8502798982188295, "grad_norm": 5.515660371461399, "learning_rate": 9.85125010619277e-06, "loss": 0.2759, "step": 41770 }, { "epoch": 0.8504834605597964, "grad_norm": 9.086061305652855, "learning_rate": 9.851078024989145e-06, "loss": 0.2684, "step": 41780 }, { "epoch": 0.8506870229007634, "grad_norm": 10.233283816516549, "learning_rate": 9.85090584581175e-06, "loss": 0.4283, "step": 41790 }, { "epoch": 0.8508905852417303, "grad_norm": 12.512127260371608, "learning_rate": 9.850733568664066e-06, "loss": 0.3795, "step": 41800 }, { "epoch": 0.8510941475826972, "grad_norm": 10.566968740031784, "learning_rate": 9.850561193549568e-06, "loss": 0.3409, "step": 41810 }, { "epoch": 0.8512977099236642, "grad_norm": 8.658819664141014, "learning_rate": 9.85038872047174e-06, "loss": 0.3234, "step": 41820 }, { "epoch": 0.851501272264631, "grad_norm": 6.50631521313851, "learning_rate": 9.850216149434064e-06, "loss": 0.2858, "step": 41830 }, { "epoch": 0.851704834605598, "grad_norm": 8.358636768013179, "learning_rate": 9.850043480440025e-06, "loss": 0.3215, "step": 41840 }, { "epoch": 0.8519083969465648, "grad_norm": 13.93814862708593, "learning_rate": 9.849870713493114e-06, "loss": 0.3173, "step": 41850 }, { "epoch": 0.8521119592875318, "grad_norm": 14.109562197705221, "learning_rate": 9.849697848596816e-06, "loss": 0.2678, "step": 41860 }, { "epoch": 0.8523155216284988, "grad_norm": 14.091907898943697, "learning_rate": 9.849524885754621e-06, "loss": 0.3541, "step": 41870 }, { "epoch": 0.8525190839694656, "grad_norm": 8.015249122997702, "learning_rate": 9.849351824970028e-06, "loss": 0.3284, "step": 41880 }, { "epoch": 0.8527226463104326, "grad_norm": 10.385462860179311, "learning_rate": 9.849178666246528e-06, "loss": 0.3239, "step": 41890 }, { "epoch": 0.8529262086513995, "grad_norm": 10.567474139945915, "learning_rate": 9.84900540958762e-06, "loss": 0.3109, "step": 41900 }, { "epoch": 0.8531297709923664, "grad_norm": 10.616443664693591, "learning_rate": 9.8488320549968e-06, "loss": 0.2453, "step": 41910 }, { "epoch": 0.8533333333333334, "grad_norm": 8.428323002911855, "learning_rate": 9.848658602477574e-06, "loss": 0.3393, "step": 41920 }, { "epoch": 0.8535368956743002, "grad_norm": 14.251528519699205, "learning_rate": 9.84848505203344e-06, "loss": 0.3456, "step": 41930 }, { "epoch": 0.8537404580152672, "grad_norm": 5.911411447472813, "learning_rate": 9.848311403667907e-06, "loss": 0.3502, "step": 41940 }, { "epoch": 0.8539440203562341, "grad_norm": 7.759022580797712, "learning_rate": 9.84813765738448e-06, "loss": 0.3695, "step": 41950 }, { "epoch": 0.854147582697201, "grad_norm": 1.9080247175967138, "learning_rate": 9.84796381318667e-06, "loss": 0.2797, "step": 41960 }, { "epoch": 0.8543511450381679, "grad_norm": 2.5680460670876673, "learning_rate": 9.847789871077986e-06, "loss": 0.2532, "step": 41970 }, { "epoch": 0.8545547073791349, "grad_norm": 14.348175823003908, "learning_rate": 9.847615831061941e-06, "loss": 0.3058, "step": 41980 }, { "epoch": 0.8547582697201018, "grad_norm": 8.696977139767025, "learning_rate": 9.847441693142052e-06, "loss": 0.3288, "step": 41990 }, { "epoch": 0.8549618320610687, "grad_norm": 8.77034007432613, "learning_rate": 9.847267457321832e-06, "loss": 0.4076, "step": 42000 }, { "epoch": 0.8551653944020357, "grad_norm": 9.587772721378878, "learning_rate": 9.847093123604804e-06, "loss": 0.3278, "step": 42010 }, { "epoch": 0.8553689567430025, "grad_norm": 4.567952082829658, "learning_rate": 9.846918691994488e-06, "loss": 0.3167, "step": 42020 }, { "epoch": 0.8555725190839695, "grad_norm": 6.463981347668532, "learning_rate": 9.846744162494406e-06, "loss": 0.3113, "step": 42030 }, { "epoch": 0.8557760814249364, "grad_norm": 5.347610152962338, "learning_rate": 9.846569535108082e-06, "loss": 0.2661, "step": 42040 }, { "epoch": 0.8559796437659033, "grad_norm": 8.92849940688936, "learning_rate": 9.846394809839044e-06, "loss": 0.3384, "step": 42050 }, { "epoch": 0.8561832061068703, "grad_norm": 8.244915261155223, "learning_rate": 9.846219986690822e-06, "loss": 0.2875, "step": 42060 }, { "epoch": 0.8563867684478371, "grad_norm": 6.644887689840423, "learning_rate": 9.846045065666944e-06, "loss": 0.3087, "step": 42070 }, { "epoch": 0.8565903307888041, "grad_norm": 0.44698891842207894, "learning_rate": 9.845870046770947e-06, "loss": 0.2418, "step": 42080 }, { "epoch": 0.856793893129771, "grad_norm": 8.66962824254746, "learning_rate": 9.84569493000636e-06, "loss": 0.2765, "step": 42090 }, { "epoch": 0.8569974554707379, "grad_norm": 8.164212443278943, "learning_rate": 9.845519715376723e-06, "loss": 0.3022, "step": 42100 }, { "epoch": 0.8572010178117049, "grad_norm": 7.719587810207891, "learning_rate": 9.845344402885576e-06, "loss": 0.2605, "step": 42110 }, { "epoch": 0.8574045801526717, "grad_norm": 12.921503058681635, "learning_rate": 9.845168992536458e-06, "loss": 0.3671, "step": 42120 }, { "epoch": 0.8576081424936387, "grad_norm": 7.095709888018747, "learning_rate": 9.84499348433291e-06, "loss": 0.2544, "step": 42130 }, { "epoch": 0.8578117048346056, "grad_norm": 8.458778298461985, "learning_rate": 9.844817878278479e-06, "loss": 0.3582, "step": 42140 }, { "epoch": 0.8580152671755725, "grad_norm": 7.844873844200513, "learning_rate": 9.84464217437671e-06, "loss": 0.2354, "step": 42150 }, { "epoch": 0.8582188295165395, "grad_norm": 9.5406165835876, "learning_rate": 9.844466372631155e-06, "loss": 0.2711, "step": 42160 }, { "epoch": 0.8584223918575064, "grad_norm": 9.151316720027427, "learning_rate": 9.844290473045359e-06, "loss": 0.348, "step": 42170 }, { "epoch": 0.8586259541984733, "grad_norm": 7.812042816613748, "learning_rate": 9.844114475622879e-06, "loss": 0.2399, "step": 42180 }, { "epoch": 0.8588295165394402, "grad_norm": 3.5226765583781714, "learning_rate": 9.843938380367268e-06, "loss": 0.2856, "step": 42190 }, { "epoch": 0.8590330788804071, "grad_norm": 5.340035349156226, "learning_rate": 9.843762187282081e-06, "loss": 0.3476, "step": 42200 }, { "epoch": 0.859236641221374, "grad_norm": 7.007880114370497, "learning_rate": 9.843585896370879e-06, "loss": 0.277, "step": 42210 }, { "epoch": 0.859440203562341, "grad_norm": 10.083723025577711, "learning_rate": 9.843409507637222e-06, "loss": 0.3075, "step": 42220 }, { "epoch": 0.8596437659033079, "grad_norm": 7.5869119113658785, "learning_rate": 9.843233021084672e-06, "loss": 0.2367, "step": 42230 }, { "epoch": 0.8598473282442748, "grad_norm": 4.918892492993603, "learning_rate": 9.843056436716793e-06, "loss": 0.3394, "step": 42240 }, { "epoch": 0.8600508905852418, "grad_norm": 6.509066235501212, "learning_rate": 9.84287975453715e-06, "loss": 0.2786, "step": 42250 }, { "epoch": 0.8602544529262086, "grad_norm": 7.032733303934035, "learning_rate": 9.842702974549314e-06, "loss": 0.3271, "step": 42260 }, { "epoch": 0.8604580152671756, "grad_norm": 9.357701542748208, "learning_rate": 9.842526096756852e-06, "loss": 0.3138, "step": 42270 }, { "epoch": 0.8606615776081425, "grad_norm": 3.6206031450056138, "learning_rate": 9.842349121163341e-06, "loss": 0.1899, "step": 42280 }, { "epoch": 0.8608651399491094, "grad_norm": 4.300541009524754, "learning_rate": 9.842172047772352e-06, "loss": 0.3365, "step": 42290 }, { "epoch": 0.8610687022900764, "grad_norm": 4.984897541210419, "learning_rate": 9.841994876587462e-06, "loss": 0.2832, "step": 42300 }, { "epoch": 0.8612722646310432, "grad_norm": 14.226081461225537, "learning_rate": 9.841817607612247e-06, "loss": 0.2254, "step": 42310 }, { "epoch": 0.8614758269720102, "grad_norm": 11.117519515351312, "learning_rate": 9.841640240850292e-06, "loss": 0.3145, "step": 42320 }, { "epoch": 0.861679389312977, "grad_norm": 6.6763673127381535, "learning_rate": 9.841462776305174e-06, "loss": 0.3065, "step": 42330 }, { "epoch": 0.861882951653944, "grad_norm": 3.660180180550442, "learning_rate": 9.841285213980481e-06, "loss": 0.3136, "step": 42340 }, { "epoch": 0.862086513994911, "grad_norm": 11.680590699033111, "learning_rate": 9.841107553879796e-06, "loss": 0.3885, "step": 42350 }, { "epoch": 0.8622900763358778, "grad_norm": 9.60951051437501, "learning_rate": 9.84092979600671e-06, "loss": 0.3087, "step": 42360 }, { "epoch": 0.8624936386768448, "grad_norm": 7.390775553961468, "learning_rate": 9.840751940364812e-06, "loss": 0.2722, "step": 42370 }, { "epoch": 0.8626972010178117, "grad_norm": 4.336963039894663, "learning_rate": 9.840573986957692e-06, "loss": 0.3373, "step": 42380 }, { "epoch": 0.8629007633587786, "grad_norm": 7.647388568616615, "learning_rate": 9.840395935788949e-06, "loss": 0.3248, "step": 42390 }, { "epoch": 0.8631043256997456, "grad_norm": 4.426213925941982, "learning_rate": 9.840217786862172e-06, "loss": 0.2784, "step": 42400 }, { "epoch": 0.8633078880407125, "grad_norm": 8.95061630958387, "learning_rate": 9.840039540180965e-06, "loss": 0.1953, "step": 42410 }, { "epoch": 0.8635114503816794, "grad_norm": 8.893118510467666, "learning_rate": 9.839861195748925e-06, "loss": 0.2747, "step": 42420 }, { "epoch": 0.8637150127226463, "grad_norm": 14.748153570111011, "learning_rate": 9.839682753569652e-06, "loss": 0.2886, "step": 42430 }, { "epoch": 0.8639185750636132, "grad_norm": 16.924964309851596, "learning_rate": 9.839504213646755e-06, "loss": 0.2963, "step": 42440 }, { "epoch": 0.8641221374045801, "grad_norm": 3.9206553408671585, "learning_rate": 9.839325575983838e-06, "loss": 0.3512, "step": 42450 }, { "epoch": 0.8643256997455471, "grad_norm": 3.7207828855956233, "learning_rate": 9.839146840584505e-06, "loss": 0.2298, "step": 42460 }, { "epoch": 0.864529262086514, "grad_norm": 7.61128406493728, "learning_rate": 9.838968007452371e-06, "loss": 0.3611, "step": 42470 }, { "epoch": 0.8647328244274809, "grad_norm": 5.729378643810227, "learning_rate": 9.838789076591045e-06, "loss": 0.2715, "step": 42480 }, { "epoch": 0.8649363867684479, "grad_norm": 11.357905213311199, "learning_rate": 9.838610048004142e-06, "loss": 0.3562, "step": 42490 }, { "epoch": 0.8651399491094147, "grad_norm": 3.95092834928144, "learning_rate": 9.838430921695277e-06, "loss": 0.3499, "step": 42500 }, { "epoch": 0.8653435114503817, "grad_norm": 9.716503256256635, "learning_rate": 9.838251697668067e-06, "loss": 0.2539, "step": 42510 }, { "epoch": 0.8655470737913487, "grad_norm": 11.53538362623949, "learning_rate": 9.83807237592613e-06, "loss": 0.2578, "step": 42520 }, { "epoch": 0.8657506361323155, "grad_norm": 18.631837975367272, "learning_rate": 9.837892956473095e-06, "loss": 0.3914, "step": 42530 }, { "epoch": 0.8659541984732825, "grad_norm": 8.332978565391517, "learning_rate": 9.837713439312577e-06, "loss": 0.3057, "step": 42540 }, { "epoch": 0.8661577608142493, "grad_norm": 10.7770408084891, "learning_rate": 9.837533824448205e-06, "loss": 0.2778, "step": 42550 }, { "epoch": 0.8663613231552163, "grad_norm": 4.280766528012531, "learning_rate": 9.837354111883609e-06, "loss": 0.2876, "step": 42560 }, { "epoch": 0.8665648854961832, "grad_norm": 8.461644889463063, "learning_rate": 9.837174301622414e-06, "loss": 0.3812, "step": 42570 }, { "epoch": 0.8667684478371501, "grad_norm": 5.554792560030091, "learning_rate": 9.836994393668255e-06, "loss": 0.3761, "step": 42580 }, { "epoch": 0.8669720101781171, "grad_norm": 8.340010631811825, "learning_rate": 9.836814388024765e-06, "loss": 0.2746, "step": 42590 }, { "epoch": 0.867175572519084, "grad_norm": 4.999135765908935, "learning_rate": 9.836634284695576e-06, "loss": 0.4231, "step": 42600 }, { "epoch": 0.8673791348600509, "grad_norm": 8.468019794286015, "learning_rate": 9.83645408368433e-06, "loss": 0.2818, "step": 42610 }, { "epoch": 0.8675826972010178, "grad_norm": 7.522435850125892, "learning_rate": 9.836273784994664e-06, "loss": 0.2687, "step": 42620 }, { "epoch": 0.8677862595419847, "grad_norm": 8.378304427099419, "learning_rate": 9.83609338863022e-06, "loss": 0.3122, "step": 42630 }, { "epoch": 0.8679898218829517, "grad_norm": 10.858777931277833, "learning_rate": 9.835912894594641e-06, "loss": 0.3297, "step": 42640 }, { "epoch": 0.8681933842239186, "grad_norm": 14.073698755686197, "learning_rate": 9.835732302891573e-06, "loss": 0.3732, "step": 42650 }, { "epoch": 0.8683969465648855, "grad_norm": 13.964047414784728, "learning_rate": 9.835551613524663e-06, "loss": 0.3285, "step": 42660 }, { "epoch": 0.8686005089058524, "grad_norm": 5.806265061925912, "learning_rate": 9.835370826497559e-06, "loss": 0.3796, "step": 42670 }, { "epoch": 0.8688040712468194, "grad_norm": 4.711887561820873, "learning_rate": 9.835189941813913e-06, "loss": 0.3032, "step": 42680 }, { "epoch": 0.8690076335877862, "grad_norm": 5.801408156007652, "learning_rate": 9.83500895947738e-06, "loss": 0.3499, "step": 42690 }, { "epoch": 0.8692111959287532, "grad_norm": 5.661291361232757, "learning_rate": 9.834827879491612e-06, "loss": 0.3107, "step": 42700 }, { "epoch": 0.8694147582697201, "grad_norm": 3.3630420012809252, "learning_rate": 9.834646701860268e-06, "loss": 0.3182, "step": 42710 }, { "epoch": 0.869618320610687, "grad_norm": 6.213924659876646, "learning_rate": 9.834465426587008e-06, "loss": 0.3329, "step": 42720 }, { "epoch": 0.869821882951654, "grad_norm": 11.425032530407508, "learning_rate": 9.834284053675491e-06, "loss": 0.3393, "step": 42730 }, { "epoch": 0.8700254452926208, "grad_norm": 7.873612329199707, "learning_rate": 9.834102583129382e-06, "loss": 0.2678, "step": 42740 }, { "epoch": 0.8702290076335878, "grad_norm": 10.143528710721114, "learning_rate": 9.833921014952346e-06, "loss": 0.3234, "step": 42750 }, { "epoch": 0.8704325699745548, "grad_norm": 4.729339798672291, "learning_rate": 9.833739349148045e-06, "loss": 0.2715, "step": 42760 }, { "epoch": 0.8706361323155216, "grad_norm": 9.584930458660127, "learning_rate": 9.833557585720157e-06, "loss": 0.298, "step": 42770 }, { "epoch": 0.8708396946564886, "grad_norm": 4.666429810377614, "learning_rate": 9.833375724672344e-06, "loss": 0.3764, "step": 42780 }, { "epoch": 0.8710432569974554, "grad_norm": 7.9763113774064065, "learning_rate": 9.833193766008285e-06, "loss": 0.3388, "step": 42790 }, { "epoch": 0.8712468193384224, "grad_norm": 14.795999755624361, "learning_rate": 9.833011709731653e-06, "loss": 0.369, "step": 42800 }, { "epoch": 0.8714503816793893, "grad_norm": 5.304234635880186, "learning_rate": 9.832829555846123e-06, "loss": 0.2925, "step": 42810 }, { "epoch": 0.8716539440203562, "grad_norm": 25.172899625573464, "learning_rate": 9.832647304355375e-06, "loss": 0.2499, "step": 42820 }, { "epoch": 0.8718575063613232, "grad_norm": 8.606058915360038, "learning_rate": 9.832464955263093e-06, "loss": 0.3491, "step": 42830 }, { "epoch": 0.87206106870229, "grad_norm": 9.630537618296728, "learning_rate": 9.832282508572955e-06, "loss": 0.27, "step": 42840 }, { "epoch": 0.872264631043257, "grad_norm": 4.117084441387831, "learning_rate": 9.832099964288649e-06, "loss": 0.254, "step": 42850 }, { "epoch": 0.8724681933842239, "grad_norm": 6.8824572527025465, "learning_rate": 9.83191732241386e-06, "loss": 0.3018, "step": 42860 }, { "epoch": 0.8726717557251908, "grad_norm": 17.24007761768278, "learning_rate": 9.831734582952276e-06, "loss": 0.2456, "step": 42870 }, { "epoch": 0.8728753180661578, "grad_norm": 7.106854046563965, "learning_rate": 9.831551745907591e-06, "loss": 0.2578, "step": 42880 }, { "epoch": 0.8730788804071247, "grad_norm": 3.324270461182237, "learning_rate": 9.831368811283494e-06, "loss": 0.2824, "step": 42890 }, { "epoch": 0.8732824427480916, "grad_norm": 10.221906524968315, "learning_rate": 9.831185779083684e-06, "loss": 0.3384, "step": 42900 }, { "epoch": 0.8734860050890585, "grad_norm": 9.360669092899876, "learning_rate": 9.831002649311852e-06, "loss": 0.2569, "step": 42910 }, { "epoch": 0.8736895674300255, "grad_norm": 3.323434703808394, "learning_rate": 9.830819421971698e-06, "loss": 0.1966, "step": 42920 }, { "epoch": 0.8738931297709923, "grad_norm": 8.958344788547707, "learning_rate": 9.830636097066925e-06, "loss": 0.3344, "step": 42930 }, { "epoch": 0.8740966921119593, "grad_norm": 4.59526802826952, "learning_rate": 9.830452674601234e-06, "loss": 0.2551, "step": 42940 }, { "epoch": 0.8743002544529263, "grad_norm": 4.826386957873381, "learning_rate": 9.83026915457833e-06, "loss": 0.2038, "step": 42950 }, { "epoch": 0.8745038167938931, "grad_norm": 8.943049000066075, "learning_rate": 9.83008553700192e-06, "loss": 0.3479, "step": 42960 }, { "epoch": 0.8747073791348601, "grad_norm": 8.266915387025469, "learning_rate": 9.829901821875711e-06, "loss": 0.266, "step": 42970 }, { "epoch": 0.8749109414758269, "grad_norm": 16.325008297830692, "learning_rate": 9.829718009203414e-06, "loss": 0.3803, "step": 42980 }, { "epoch": 0.8751145038167939, "grad_norm": 3.881242531395163, "learning_rate": 9.829534098988742e-06, "loss": 0.306, "step": 42990 }, { "epoch": 0.8753180661577609, "grad_norm": 8.928338560381343, "learning_rate": 9.829350091235405e-06, "loss": 0.3082, "step": 43000 }, { "epoch": 0.8755216284987277, "grad_norm": 10.864074622254908, "learning_rate": 9.829165985947126e-06, "loss": 0.2967, "step": 43010 }, { "epoch": 0.8757251908396947, "grad_norm": 5.429900121909837, "learning_rate": 9.82898178312762e-06, "loss": 0.3549, "step": 43020 }, { "epoch": 0.8759287531806615, "grad_norm": 9.756908842386414, "learning_rate": 9.828797482780606e-06, "loss": 0.2073, "step": 43030 }, { "epoch": 0.8761323155216285, "grad_norm": 6.189832636356775, "learning_rate": 9.828613084909806e-06, "loss": 0.239, "step": 43040 }, { "epoch": 0.8763358778625954, "grad_norm": 8.73376938054268, "learning_rate": 9.828428589518948e-06, "loss": 0.3273, "step": 43050 }, { "epoch": 0.8765394402035623, "grad_norm": 13.1669580904101, "learning_rate": 9.828243996611755e-06, "loss": 0.3837, "step": 43060 }, { "epoch": 0.8767430025445293, "grad_norm": 5.099801065203991, "learning_rate": 9.828059306191954e-06, "loss": 0.2971, "step": 43070 }, { "epoch": 0.8769465648854962, "grad_norm": 15.065752976647437, "learning_rate": 9.82787451826328e-06, "loss": 0.2816, "step": 43080 }, { "epoch": 0.8771501272264631, "grad_norm": 8.589708023953236, "learning_rate": 9.827689632829461e-06, "loss": 0.2941, "step": 43090 }, { "epoch": 0.87735368956743, "grad_norm": 12.404870644401807, "learning_rate": 9.827504649894231e-06, "loss": 0.3481, "step": 43100 }, { "epoch": 0.877557251908397, "grad_norm": 6.202501548550979, "learning_rate": 9.827319569461325e-06, "loss": 0.2272, "step": 43110 }, { "epoch": 0.8777608142493639, "grad_norm": 4.130084163566859, "learning_rate": 9.827134391534486e-06, "loss": 0.2513, "step": 43120 }, { "epoch": 0.8779643765903308, "grad_norm": 10.132912481017366, "learning_rate": 9.826949116117448e-06, "loss": 0.2971, "step": 43130 }, { "epoch": 0.8781679389312977, "grad_norm": 11.308841133657024, "learning_rate": 9.826763743213958e-06, "loss": 0.2543, "step": 43140 }, { "epoch": 0.8783715012722646, "grad_norm": 11.296432982226818, "learning_rate": 9.826578272827755e-06, "loss": 0.3434, "step": 43150 }, { "epoch": 0.8785750636132316, "grad_norm": 45.3810317288427, "learning_rate": 9.826392704962589e-06, "loss": 0.323, "step": 43160 }, { "epoch": 0.8787786259541984, "grad_norm": 8.411303835186988, "learning_rate": 9.826207039622206e-06, "loss": 0.1885, "step": 43170 }, { "epoch": 0.8789821882951654, "grad_norm": 7.720995438591909, "learning_rate": 9.826021276810353e-06, "loss": 0.408, "step": 43180 }, { "epoch": 0.8791857506361324, "grad_norm": 3.631803177000356, "learning_rate": 9.825835416530789e-06, "loss": 0.2107, "step": 43190 }, { "epoch": 0.8793893129770992, "grad_norm": 6.018757597819073, "learning_rate": 9.82564945878726e-06, "loss": 0.2757, "step": 43200 }, { "epoch": 0.8795928753180662, "grad_norm": 9.76551581660658, "learning_rate": 9.825463403583525e-06, "loss": 0.3427, "step": 43210 }, { "epoch": 0.879796437659033, "grad_norm": 8.041365014568546, "learning_rate": 9.825277250923342e-06, "loss": 0.3913, "step": 43220 }, { "epoch": 0.88, "grad_norm": 11.68225443570152, "learning_rate": 9.825091000810469e-06, "loss": 0.3376, "step": 43230 }, { "epoch": 0.880203562340967, "grad_norm": 13.456214913079542, "learning_rate": 9.82490465324867e-06, "loss": 0.2524, "step": 43240 }, { "epoch": 0.8804071246819338, "grad_norm": 10.484625072936359, "learning_rate": 9.824718208241707e-06, "loss": 0.272, "step": 43250 }, { "epoch": 0.8806106870229008, "grad_norm": 9.00058400611165, "learning_rate": 9.824531665793343e-06, "loss": 0.2428, "step": 43260 }, { "epoch": 0.8808142493638677, "grad_norm": 12.511749449025697, "learning_rate": 9.824345025907351e-06, "loss": 0.2365, "step": 43270 }, { "epoch": 0.8810178117048346, "grad_norm": 9.992205483020024, "learning_rate": 9.824158288587498e-06, "loss": 0.3575, "step": 43280 }, { "epoch": 0.8812213740458015, "grad_norm": 4.478606050868119, "learning_rate": 9.823971453837553e-06, "loss": 0.3315, "step": 43290 }, { "epoch": 0.8814249363867684, "grad_norm": 3.430270773754727, "learning_rate": 9.823784521661292e-06, "loss": 0.2948, "step": 43300 }, { "epoch": 0.8816284987277354, "grad_norm": 10.356062929354966, "learning_rate": 9.823597492062488e-06, "loss": 0.2427, "step": 43310 }, { "epoch": 0.8818320610687023, "grad_norm": 7.230015872604013, "learning_rate": 9.823410365044922e-06, "loss": 0.2671, "step": 43320 }, { "epoch": 0.8820356234096692, "grad_norm": 8.049322664841254, "learning_rate": 9.82322314061237e-06, "loss": 0.2986, "step": 43330 }, { "epoch": 0.8822391857506361, "grad_norm": 6.790779021782749, "learning_rate": 9.823035818768616e-06, "loss": 0.3045, "step": 43340 }, { "epoch": 0.8824427480916031, "grad_norm": 9.26721402908779, "learning_rate": 9.822848399517442e-06, "loss": 0.338, "step": 43350 }, { "epoch": 0.88264631043257, "grad_norm": 7.014031904788932, "learning_rate": 9.82266088286263e-06, "loss": 0.2325, "step": 43360 }, { "epoch": 0.8828498727735369, "grad_norm": 10.586299053873985, "learning_rate": 9.822473268807972e-06, "loss": 0.2556, "step": 43370 }, { "epoch": 0.8830534351145038, "grad_norm": 3.1424183248843756, "learning_rate": 9.822285557357254e-06, "loss": 0.2952, "step": 43380 }, { "epoch": 0.8832569974554707, "grad_norm": 9.391708851608968, "learning_rate": 9.822097748514268e-06, "loss": 0.3089, "step": 43390 }, { "epoch": 0.8834605597964377, "grad_norm": 7.961600499919846, "learning_rate": 9.821909842282808e-06, "loss": 0.3265, "step": 43400 }, { "epoch": 0.8836641221374045, "grad_norm": 10.401342217236008, "learning_rate": 9.821721838666668e-06, "loss": 0.2792, "step": 43410 }, { "epoch": 0.8838676844783715, "grad_norm": 4.793561587753332, "learning_rate": 9.821533737669645e-06, "loss": 0.2188, "step": 43420 }, { "epoch": 0.8840712468193385, "grad_norm": 7.000365802168821, "learning_rate": 9.821345539295537e-06, "loss": 0.3345, "step": 43430 }, { "epoch": 0.8842748091603053, "grad_norm": 6.670437249080872, "learning_rate": 9.821157243548146e-06, "loss": 0.2735, "step": 43440 }, { "epoch": 0.8844783715012723, "grad_norm": 8.33421733103291, "learning_rate": 9.820968850431278e-06, "loss": 0.2805, "step": 43450 }, { "epoch": 0.8846819338422391, "grad_norm": 4.111766286907754, "learning_rate": 9.820780359948732e-06, "loss": 0.2954, "step": 43460 }, { "epoch": 0.8848854961832061, "grad_norm": 16.337098541192404, "learning_rate": 9.820591772104317e-06, "loss": 0.3098, "step": 43470 }, { "epoch": 0.8850890585241731, "grad_norm": 5.32094256426203, "learning_rate": 9.820403086901845e-06, "loss": 0.3915, "step": 43480 }, { "epoch": 0.8852926208651399, "grad_norm": 14.545299686745293, "learning_rate": 9.82021430434512e-06, "loss": 0.3591, "step": 43490 }, { "epoch": 0.8854961832061069, "grad_norm": 2.0655979195743894, "learning_rate": 9.820025424437962e-06, "loss": 0.2025, "step": 43500 }, { "epoch": 0.8856997455470738, "grad_norm": 12.415376323717705, "learning_rate": 9.81983644718418e-06, "loss": 0.3626, "step": 43510 }, { "epoch": 0.8859033078880407, "grad_norm": 3.9966930290615794, "learning_rate": 9.819647372587596e-06, "loss": 0.3298, "step": 43520 }, { "epoch": 0.8861068702290076, "grad_norm": 14.340464541073304, "learning_rate": 9.819458200652024e-06, "loss": 0.2808, "step": 43530 }, { "epoch": 0.8863104325699745, "grad_norm": 10.870522339181015, "learning_rate": 9.819268931381288e-06, "loss": 0.378, "step": 43540 }, { "epoch": 0.8865139949109415, "grad_norm": 33.59118178013167, "learning_rate": 9.819079564779206e-06, "loss": 0.3428, "step": 43550 }, { "epoch": 0.8867175572519084, "grad_norm": 9.320055919213146, "learning_rate": 9.818890100849606e-06, "loss": 0.2515, "step": 43560 }, { "epoch": 0.8869211195928753, "grad_norm": 7.029635976571627, "learning_rate": 9.818700539596315e-06, "loss": 0.3388, "step": 43570 }, { "epoch": 0.8871246819338422, "grad_norm": 5.889939699705099, "learning_rate": 9.81851088102316e-06, "loss": 0.2961, "step": 43580 }, { "epoch": 0.8873282442748092, "grad_norm": 30.408839890752706, "learning_rate": 9.81832112513397e-06, "loss": 0.2495, "step": 43590 }, { "epoch": 0.8875318066157761, "grad_norm": 4.293947733416406, "learning_rate": 9.818131271932579e-06, "loss": 0.3942, "step": 43600 }, { "epoch": 0.887735368956743, "grad_norm": 6.283234398822455, "learning_rate": 9.817941321422822e-06, "loss": 0.3393, "step": 43610 }, { "epoch": 0.88793893129771, "grad_norm": 14.44912537516421, "learning_rate": 9.817751273608536e-06, "loss": 0.3253, "step": 43620 }, { "epoch": 0.8881424936386768, "grad_norm": 3.597035295514431, "learning_rate": 9.817561128493555e-06, "loss": 0.2892, "step": 43630 }, { "epoch": 0.8883460559796438, "grad_norm": 10.740774855875596, "learning_rate": 9.817370886081723e-06, "loss": 0.3583, "step": 43640 }, { "epoch": 0.8885496183206106, "grad_norm": 18.405541985749075, "learning_rate": 9.817180546376883e-06, "loss": 0.2393, "step": 43650 }, { "epoch": 0.8887531806615776, "grad_norm": 13.190063796111621, "learning_rate": 9.816990109382876e-06, "loss": 0.2241, "step": 43660 }, { "epoch": 0.8889567430025446, "grad_norm": 5.466325613005206, "learning_rate": 9.816799575103548e-06, "loss": 0.2371, "step": 43670 }, { "epoch": 0.8891603053435114, "grad_norm": 18.700105331968555, "learning_rate": 9.816608943542752e-06, "loss": 0.3584, "step": 43680 }, { "epoch": 0.8893638676844784, "grad_norm": 6.132349764614917, "learning_rate": 9.816418214704332e-06, "loss": 0.292, "step": 43690 }, { "epoch": 0.8895674300254452, "grad_norm": 7.222276688207293, "learning_rate": 9.816227388592143e-06, "loss": 0.2977, "step": 43700 }, { "epoch": 0.8897709923664122, "grad_norm": 8.782119391450546, "learning_rate": 9.81603646521004e-06, "loss": 0.2776, "step": 43710 }, { "epoch": 0.8899745547073792, "grad_norm": 10.590768762638344, "learning_rate": 9.815845444561876e-06, "loss": 0.4501, "step": 43720 }, { "epoch": 0.890178117048346, "grad_norm": 7.095606976060428, "learning_rate": 9.815654326651511e-06, "loss": 0.264, "step": 43730 }, { "epoch": 0.890381679389313, "grad_norm": 6.52864566078125, "learning_rate": 9.815463111482805e-06, "loss": 0.2656, "step": 43740 }, { "epoch": 0.8905852417302799, "grad_norm": 10.580171884761192, "learning_rate": 9.815271799059618e-06, "loss": 0.3686, "step": 43750 }, { "epoch": 0.8907888040712468, "grad_norm": 6.735830652143752, "learning_rate": 9.815080389385815e-06, "loss": 0.1903, "step": 43760 }, { "epoch": 0.8909923664122137, "grad_norm": 11.200013884359125, "learning_rate": 9.814888882465262e-06, "loss": 0.3, "step": 43770 }, { "epoch": 0.8911959287531807, "grad_norm": 15.50207990953945, "learning_rate": 9.814697278301827e-06, "loss": 0.294, "step": 43780 }, { "epoch": 0.8913994910941476, "grad_norm": 7.897366137354236, "learning_rate": 9.814505576899381e-06, "loss": 0.2997, "step": 43790 }, { "epoch": 0.8916030534351145, "grad_norm": 2.347555274310695, "learning_rate": 9.81431377826179e-06, "loss": 0.2471, "step": 43800 }, { "epoch": 0.8918066157760814, "grad_norm": 15.295941058716203, "learning_rate": 9.814121882392935e-06, "loss": 0.3593, "step": 43810 }, { "epoch": 0.8920101781170483, "grad_norm": 15.964082450706982, "learning_rate": 9.813929889296686e-06, "loss": 0.3059, "step": 43820 }, { "epoch": 0.8922137404580153, "grad_norm": 11.037141569124364, "learning_rate": 9.813737798976923e-06, "loss": 0.3588, "step": 43830 }, { "epoch": 0.8924173027989822, "grad_norm": 3.6249430375475127, "learning_rate": 9.813545611437524e-06, "loss": 0.3082, "step": 43840 }, { "epoch": 0.8926208651399491, "grad_norm": 6.649121867702029, "learning_rate": 9.813353326682374e-06, "loss": 0.2638, "step": 43850 }, { "epoch": 0.8928244274809161, "grad_norm": 4.1748663798789725, "learning_rate": 9.813160944715353e-06, "loss": 0.2375, "step": 43860 }, { "epoch": 0.8930279898218829, "grad_norm": 9.874994188980285, "learning_rate": 9.812968465540346e-06, "loss": 0.2681, "step": 43870 }, { "epoch": 0.8932315521628499, "grad_norm": 5.5255771005445995, "learning_rate": 9.812775889161244e-06, "loss": 0.193, "step": 43880 }, { "epoch": 0.8934351145038167, "grad_norm": 11.319683032801747, "learning_rate": 9.812583215581932e-06, "loss": 0.283, "step": 43890 }, { "epoch": 0.8936386768447837, "grad_norm": 14.137164339617415, "learning_rate": 9.812390444806305e-06, "loss": 0.2967, "step": 43900 }, { "epoch": 0.8938422391857507, "grad_norm": 5.669554281609652, "learning_rate": 9.812197576838254e-06, "loss": 0.249, "step": 43910 }, { "epoch": 0.8940458015267175, "grad_norm": 10.860766621193125, "learning_rate": 9.812004611681676e-06, "loss": 0.3238, "step": 43920 }, { "epoch": 0.8942493638676845, "grad_norm": 13.031189693079817, "learning_rate": 9.811811549340465e-06, "loss": 0.2358, "step": 43930 }, { "epoch": 0.8944529262086514, "grad_norm": 6.579480573100906, "learning_rate": 9.811618389818523e-06, "loss": 0.3287, "step": 43940 }, { "epoch": 0.8946564885496183, "grad_norm": 6.548063649458269, "learning_rate": 9.811425133119751e-06, "loss": 0.2268, "step": 43950 }, { "epoch": 0.8948600508905853, "grad_norm": 13.124727374365701, "learning_rate": 9.811231779248051e-06, "loss": 0.3548, "step": 43960 }, { "epoch": 0.8950636132315521, "grad_norm": 20.89045506282126, "learning_rate": 9.811038328207329e-06, "loss": 0.3627, "step": 43970 }, { "epoch": 0.8952671755725191, "grad_norm": 11.148627063298154, "learning_rate": 9.810844780001489e-06, "loss": 0.3219, "step": 43980 }, { "epoch": 0.895470737913486, "grad_norm": 3.78143963823841, "learning_rate": 9.810651134634445e-06, "loss": 0.2943, "step": 43990 }, { "epoch": 0.8956743002544529, "grad_norm": 9.62493632774705, "learning_rate": 9.810457392110104e-06, "loss": 0.2714, "step": 44000 }, { "epoch": 0.8958778625954198, "grad_norm": 15.063580508150391, "learning_rate": 9.810263552432381e-06, "loss": 0.3085, "step": 44010 }, { "epoch": 0.8960814249363868, "grad_norm": 6.227444485203813, "learning_rate": 9.810069615605189e-06, "loss": 0.2761, "step": 44020 }, { "epoch": 0.8962849872773537, "grad_norm": 2.764240504538992, "learning_rate": 9.809875581632447e-06, "loss": 0.3014, "step": 44030 }, { "epoch": 0.8964885496183206, "grad_norm": 5.993514546209353, "learning_rate": 9.809681450518071e-06, "loss": 0.3245, "step": 44040 }, { "epoch": 0.8966921119592876, "grad_norm": 8.26480444927941, "learning_rate": 9.809487222265985e-06, "loss": 0.3124, "step": 44050 }, { "epoch": 0.8968956743002544, "grad_norm": 9.667133671971822, "learning_rate": 9.809292896880109e-06, "loss": 0.262, "step": 44060 }, { "epoch": 0.8970992366412214, "grad_norm": 3.5942379592540674, "learning_rate": 9.809098474364366e-06, "loss": 0.2766, "step": 44070 }, { "epoch": 0.8973027989821883, "grad_norm": 5.569040616793265, "learning_rate": 9.80890395472269e-06, "loss": 0.3258, "step": 44080 }, { "epoch": 0.8975063613231552, "grad_norm": 9.089226455963873, "learning_rate": 9.808709337959001e-06, "loss": 0.3497, "step": 44090 }, { "epoch": 0.8977099236641222, "grad_norm": 6.269775373846951, "learning_rate": 9.808514624077237e-06, "loss": 0.2123, "step": 44100 }, { "epoch": 0.897913486005089, "grad_norm": 6.145689051021762, "learning_rate": 9.808319813081323e-06, "loss": 0.2622, "step": 44110 }, { "epoch": 0.898117048346056, "grad_norm": 6.926265046491042, "learning_rate": 9.8081249049752e-06, "loss": 0.4013, "step": 44120 }, { "epoch": 0.8983206106870228, "grad_norm": 6.473660407020217, "learning_rate": 9.807929899762802e-06, "loss": 0.3285, "step": 44130 }, { "epoch": 0.8985241730279898, "grad_norm": 5.337580489603805, "learning_rate": 9.807734797448064e-06, "loss": 0.2079, "step": 44140 }, { "epoch": 0.8987277353689568, "grad_norm": 9.183671687016012, "learning_rate": 9.807539598034931e-06, "loss": 0.3531, "step": 44150 }, { "epoch": 0.8989312977099236, "grad_norm": 26.727415451684294, "learning_rate": 9.807344301527346e-06, "loss": 0.2353, "step": 44160 }, { "epoch": 0.8991348600508906, "grad_norm": 5.142953204082906, "learning_rate": 9.807148907929249e-06, "loss": 0.303, "step": 44170 }, { "epoch": 0.8993384223918575, "grad_norm": 15.471565187134669, "learning_rate": 9.806953417244589e-06, "loss": 0.2879, "step": 44180 }, { "epoch": 0.8995419847328244, "grad_norm": 5.275006630633413, "learning_rate": 9.806757829477313e-06, "loss": 0.3365, "step": 44190 }, { "epoch": 0.8997455470737914, "grad_norm": 8.53126451199774, "learning_rate": 9.80656214463137e-06, "loss": 0.288, "step": 44200 }, { "epoch": 0.8999491094147583, "grad_norm": 7.493016990681573, "learning_rate": 9.806366362710716e-06, "loss": 0.3891, "step": 44210 }, { "epoch": 0.9001526717557252, "grad_norm": 8.110504610702193, "learning_rate": 9.806170483719302e-06, "loss": 0.3147, "step": 44220 }, { "epoch": 0.9003562340966921, "grad_norm": 5.146867605693692, "learning_rate": 9.805974507661085e-06, "loss": 0.2837, "step": 44230 }, { "epoch": 0.900559796437659, "grad_norm": 8.570140928044902, "learning_rate": 9.805778434540021e-06, "loss": 0.3771, "step": 44240 }, { "epoch": 0.9007633587786259, "grad_norm": 9.968998134691976, "learning_rate": 9.805582264360074e-06, "loss": 0.2548, "step": 44250 }, { "epoch": 0.9009669211195929, "grad_norm": 10.265826909882835, "learning_rate": 9.805385997125203e-06, "loss": 0.2555, "step": 44260 }, { "epoch": 0.9011704834605598, "grad_norm": 8.769931482429106, "learning_rate": 9.805189632839373e-06, "loss": 0.1841, "step": 44270 }, { "epoch": 0.9013740458015267, "grad_norm": 5.586434739529011, "learning_rate": 9.804993171506548e-06, "loss": 0.3248, "step": 44280 }, { "epoch": 0.9015776081424937, "grad_norm": 11.65187023378275, "learning_rate": 9.804796613130698e-06, "loss": 0.2518, "step": 44290 }, { "epoch": 0.9017811704834605, "grad_norm": 4.192522990449908, "learning_rate": 9.804599957715791e-06, "loss": 0.3092, "step": 44300 }, { "epoch": 0.9019847328244275, "grad_norm": 9.168903956096456, "learning_rate": 9.8044032052658e-06, "loss": 0.1597, "step": 44310 }, { "epoch": 0.9021882951653944, "grad_norm": 8.246299386034222, "learning_rate": 9.804206355784698e-06, "loss": 0.4173, "step": 44320 }, { "epoch": 0.9023918575063613, "grad_norm": 10.631079288869445, "learning_rate": 9.804009409276463e-06, "loss": 0.2786, "step": 44330 }, { "epoch": 0.9025954198473283, "grad_norm": 6.330485220093835, "learning_rate": 9.803812365745068e-06, "loss": 0.2403, "step": 44340 }, { "epoch": 0.9027989821882951, "grad_norm": 10.594030558189178, "learning_rate": 9.803615225194497e-06, "loss": 0.2774, "step": 44350 }, { "epoch": 0.9030025445292621, "grad_norm": 6.826340778118438, "learning_rate": 9.803417987628727e-06, "loss": 0.3221, "step": 44360 }, { "epoch": 0.903206106870229, "grad_norm": 14.31117420395953, "learning_rate": 9.803220653051746e-06, "loss": 0.2747, "step": 44370 }, { "epoch": 0.9034096692111959, "grad_norm": 9.064258327412617, "learning_rate": 9.803023221467535e-06, "loss": 0.3746, "step": 44380 }, { "epoch": 0.9036132315521629, "grad_norm": 7.968441358594166, "learning_rate": 9.802825692880087e-06, "loss": 0.2811, "step": 44390 }, { "epoch": 0.9038167938931297, "grad_norm": 12.856146535081079, "learning_rate": 9.802628067293388e-06, "loss": 0.3173, "step": 44400 }, { "epoch": 0.9040203562340967, "grad_norm": 9.70501946106496, "learning_rate": 9.802430344711427e-06, "loss": 0.2598, "step": 44410 }, { "epoch": 0.9042239185750636, "grad_norm": 13.914389851603135, "learning_rate": 9.8022325251382e-06, "loss": 0.3279, "step": 44420 }, { "epoch": 0.9044274809160305, "grad_norm": 7.39586818040356, "learning_rate": 9.802034608577702e-06, "loss": 0.2593, "step": 44430 }, { "epoch": 0.9046310432569975, "grad_norm": 6.526192809011853, "learning_rate": 9.801836595033932e-06, "loss": 0.3075, "step": 44440 }, { "epoch": 0.9048346055979644, "grad_norm": 8.375291167863361, "learning_rate": 9.801638484510886e-06, "loss": 0.3509, "step": 44450 }, { "epoch": 0.9050381679389313, "grad_norm": 14.292089444050331, "learning_rate": 9.801440277012566e-06, "loss": 0.3126, "step": 44460 }, { "epoch": 0.9052417302798982, "grad_norm": 2.606441725143211, "learning_rate": 9.801241972542977e-06, "loss": 0.2925, "step": 44470 }, { "epoch": 0.9054452926208651, "grad_norm": 4.711044618271051, "learning_rate": 9.801043571106121e-06, "loss": 0.4179, "step": 44480 }, { "epoch": 0.905648854961832, "grad_norm": 5.186025686509132, "learning_rate": 9.800845072706006e-06, "loss": 0.3901, "step": 44490 }, { "epoch": 0.905852417302799, "grad_norm": 4.5297420578416965, "learning_rate": 9.800646477346642e-06, "loss": 0.2827, "step": 44500 }, { "epoch": 0.9060559796437659, "grad_norm": 4.884352268298898, "learning_rate": 9.800447785032038e-06, "loss": 0.3617, "step": 44510 }, { "epoch": 0.9062595419847328, "grad_norm": 8.437562186120838, "learning_rate": 9.800248995766211e-06, "loss": 0.2328, "step": 44520 }, { "epoch": 0.9064631043256998, "grad_norm": 6.754877292153999, "learning_rate": 9.800050109553169e-06, "loss": 0.2543, "step": 44530 }, { "epoch": 0.9066666666666666, "grad_norm": 9.01798551560426, "learning_rate": 9.799851126396935e-06, "loss": 0.3113, "step": 44540 }, { "epoch": 0.9068702290076336, "grad_norm": 10.208204979435566, "learning_rate": 9.799652046301525e-06, "loss": 0.2815, "step": 44550 }, { "epoch": 0.9070737913486006, "grad_norm": 7.83646679905131, "learning_rate": 9.79945286927096e-06, "loss": 0.2291, "step": 44560 }, { "epoch": 0.9072773536895674, "grad_norm": 5.113627957836717, "learning_rate": 9.799253595309264e-06, "loss": 0.3484, "step": 44570 }, { "epoch": 0.9074809160305344, "grad_norm": 7.995688252983435, "learning_rate": 9.799054224420458e-06, "loss": 0.2726, "step": 44580 }, { "epoch": 0.9076844783715012, "grad_norm": 7.036498684308881, "learning_rate": 9.798854756608571e-06, "loss": 0.3448, "step": 44590 }, { "epoch": 0.9078880407124682, "grad_norm": 4.491202418976339, "learning_rate": 9.798655191877633e-06, "loss": 0.2657, "step": 44600 }, { "epoch": 0.9080916030534351, "grad_norm": 6.998948418655157, "learning_rate": 9.798455530231673e-06, "loss": 0.2262, "step": 44610 }, { "epoch": 0.908295165394402, "grad_norm": 8.792463512091354, "learning_rate": 9.79825577167472e-06, "loss": 0.2511, "step": 44620 }, { "epoch": 0.908498727735369, "grad_norm": 3.517285056953488, "learning_rate": 9.798055916210813e-06, "loss": 0.3731, "step": 44630 }, { "epoch": 0.9087022900763359, "grad_norm": 14.249102290994617, "learning_rate": 9.797855963843988e-06, "loss": 0.3611, "step": 44640 }, { "epoch": 0.9089058524173028, "grad_norm": 7.4488571340975795, "learning_rate": 9.797655914578283e-06, "loss": 0.3612, "step": 44650 }, { "epoch": 0.9091094147582697, "grad_norm": 10.89970630721114, "learning_rate": 9.797455768417735e-06, "loss": 0.2883, "step": 44660 }, { "epoch": 0.9093129770992366, "grad_norm": 4.139198955917063, "learning_rate": 9.79725552536639e-06, "loss": 0.4059, "step": 44670 }, { "epoch": 0.9095165394402036, "grad_norm": 4.616387446626373, "learning_rate": 9.79705518542829e-06, "loss": 0.3696, "step": 44680 }, { "epoch": 0.9097201017811705, "grad_norm": 8.788793703761758, "learning_rate": 9.796854748607483e-06, "loss": 0.3107, "step": 44690 }, { "epoch": 0.9099236641221374, "grad_norm": 10.947868042518133, "learning_rate": 9.796654214908017e-06, "loss": 0.3861, "step": 44700 }, { "epoch": 0.9101272264631043, "grad_norm": 4.6883782797558755, "learning_rate": 9.796453584333939e-06, "loss": 0.2479, "step": 44710 }, { "epoch": 0.9103307888040713, "grad_norm": 6.152590373448225, "learning_rate": 9.796252856889304e-06, "loss": 0.2909, "step": 44720 }, { "epoch": 0.9105343511450382, "grad_norm": 12.191318650032287, "learning_rate": 9.796052032578164e-06, "loss": 0.3228, "step": 44730 }, { "epoch": 0.9107379134860051, "grad_norm": 7.424671699623647, "learning_rate": 9.795851111404577e-06, "loss": 0.3317, "step": 44740 }, { "epoch": 0.910941475826972, "grad_norm": 22.464027036343676, "learning_rate": 9.795650093372599e-06, "loss": 0.2512, "step": 44750 }, { "epoch": 0.9111450381679389, "grad_norm": 13.715778214322674, "learning_rate": 9.795448978486293e-06, "loss": 0.3641, "step": 44760 }, { "epoch": 0.9113486005089059, "grad_norm": 17.86126849282873, "learning_rate": 9.795247766749715e-06, "loss": 0.4606, "step": 44770 }, { "epoch": 0.9115521628498727, "grad_norm": 13.727418466135775, "learning_rate": 9.795046458166934e-06, "loss": 0.3628, "step": 44780 }, { "epoch": 0.9117557251908397, "grad_norm": 4.3017761390883855, "learning_rate": 9.794845052742015e-06, "loss": 0.3416, "step": 44790 }, { "epoch": 0.9119592875318067, "grad_norm": 6.484309668089973, "learning_rate": 9.794643550479023e-06, "loss": 0.3673, "step": 44800 }, { "epoch": 0.9121628498727735, "grad_norm": 8.052441771179756, "learning_rate": 9.794441951382029e-06, "loss": 0.2733, "step": 44810 }, { "epoch": 0.9123664122137405, "grad_norm": 5.9602702764432625, "learning_rate": 9.794240255455104e-06, "loss": 0.2132, "step": 44820 }, { "epoch": 0.9125699745547073, "grad_norm": 11.287391745263353, "learning_rate": 9.794038462702324e-06, "loss": 0.273, "step": 44830 }, { "epoch": 0.9127735368956743, "grad_norm": 10.046095728287009, "learning_rate": 9.79383657312776e-06, "loss": 0.2169, "step": 44840 }, { "epoch": 0.9129770992366413, "grad_norm": 8.6357739043558, "learning_rate": 9.793634586735494e-06, "loss": 0.2897, "step": 44850 }, { "epoch": 0.9131806615776081, "grad_norm": 13.673459759159003, "learning_rate": 9.793432503529602e-06, "loss": 0.3145, "step": 44860 }, { "epoch": 0.9133842239185751, "grad_norm": 8.548455902235498, "learning_rate": 9.793230323514168e-06, "loss": 0.2733, "step": 44870 }, { "epoch": 0.913587786259542, "grad_norm": 12.67771751557083, "learning_rate": 9.793028046693273e-06, "loss": 0.2963, "step": 44880 }, { "epoch": 0.9137913486005089, "grad_norm": 9.784311182510738, "learning_rate": 9.792825673071003e-06, "loss": 0.3215, "step": 44890 }, { "epoch": 0.9139949109414758, "grad_norm": 14.647078571895605, "learning_rate": 9.792623202651446e-06, "loss": 0.3909, "step": 44900 }, { "epoch": 0.9141984732824427, "grad_norm": 8.549651812269719, "learning_rate": 9.79242063543869e-06, "loss": 0.2774, "step": 44910 }, { "epoch": 0.9144020356234097, "grad_norm": 13.992829919409001, "learning_rate": 9.792217971436828e-06, "loss": 0.2338, "step": 44920 }, { "epoch": 0.9146055979643766, "grad_norm": 5.766913016820909, "learning_rate": 9.792015210649952e-06, "loss": 0.3936, "step": 44930 }, { "epoch": 0.9148091603053435, "grad_norm": 9.910817700111823, "learning_rate": 9.791812353082155e-06, "loss": 0.3651, "step": 44940 }, { "epoch": 0.9150127226463104, "grad_norm": 4.5353392399754675, "learning_rate": 9.791609398737535e-06, "loss": 0.2579, "step": 44950 }, { "epoch": 0.9152162849872774, "grad_norm": 15.465540930785892, "learning_rate": 9.791406347620194e-06, "loss": 0.2898, "step": 44960 }, { "epoch": 0.9154198473282443, "grad_norm": 7.938881071733179, "learning_rate": 9.79120319973423e-06, "loss": 0.2932, "step": 44970 }, { "epoch": 0.9156234096692112, "grad_norm": 15.170406511844234, "learning_rate": 9.790999955083747e-06, "loss": 0.2629, "step": 44980 }, { "epoch": 0.9158269720101782, "grad_norm": 6.830560215715959, "learning_rate": 9.790796613672848e-06, "loss": 0.2728, "step": 44990 }, { "epoch": 0.916030534351145, "grad_norm": 24.65612684687104, "learning_rate": 9.79059317550564e-06, "loss": 0.4075, "step": 45000 }, { "epoch": 0.916234096692112, "grad_norm": 3.278848674179013, "learning_rate": 9.790389640586234e-06, "loss": 0.3551, "step": 45010 }, { "epoch": 0.9164376590330788, "grad_norm": 2.3629745470758654, "learning_rate": 9.790186008918738e-06, "loss": 0.3015, "step": 45020 }, { "epoch": 0.9166412213740458, "grad_norm": 6.151196490236993, "learning_rate": 9.789982280507268e-06, "loss": 0.2467, "step": 45030 }, { "epoch": 0.9168447837150128, "grad_norm": 8.467854433100184, "learning_rate": 9.789778455355934e-06, "loss": 0.2565, "step": 45040 }, { "epoch": 0.9170483460559796, "grad_norm": 4.059606391339944, "learning_rate": 9.789574533468857e-06, "loss": 0.3419, "step": 45050 }, { "epoch": 0.9172519083969466, "grad_norm": 10.810240134153936, "learning_rate": 9.789370514850152e-06, "loss": 0.3454, "step": 45060 }, { "epoch": 0.9174554707379134, "grad_norm": 7.636036707038919, "learning_rate": 9.789166399503941e-06, "loss": 0.3593, "step": 45070 }, { "epoch": 0.9176590330788804, "grad_norm": 12.030137864989651, "learning_rate": 9.788962187434346e-06, "loss": 0.2832, "step": 45080 }, { "epoch": 0.9178625954198474, "grad_norm": 12.956790987319204, "learning_rate": 9.788757878645492e-06, "loss": 0.2447, "step": 45090 }, { "epoch": 0.9180661577608142, "grad_norm": 5.614682724149736, "learning_rate": 9.788553473141504e-06, "loss": 0.2653, "step": 45100 }, { "epoch": 0.9182697201017812, "grad_norm": 4.511367675023768, "learning_rate": 9.788348970926512e-06, "loss": 0.2158, "step": 45110 }, { "epoch": 0.9184732824427481, "grad_norm": 8.230552181132765, "learning_rate": 9.788144372004645e-06, "loss": 0.3352, "step": 45120 }, { "epoch": 0.918676844783715, "grad_norm": 9.368969468070059, "learning_rate": 9.787939676380035e-06, "loss": 0.2947, "step": 45130 }, { "epoch": 0.9188804071246819, "grad_norm": 3.2045726570430384, "learning_rate": 9.787734884056816e-06, "loss": 0.2352, "step": 45140 }, { "epoch": 0.9190839694656489, "grad_norm": 2.9268368186524527, "learning_rate": 9.787529995039126e-06, "loss": 0.2503, "step": 45150 }, { "epoch": 0.9192875318066158, "grad_norm": 6.0557128270063405, "learning_rate": 9.7873250093311e-06, "loss": 0.4758, "step": 45160 }, { "epoch": 0.9194910941475827, "grad_norm": 7.551121937601392, "learning_rate": 9.78711992693688e-06, "loss": 0.367, "step": 45170 }, { "epoch": 0.9196946564885496, "grad_norm": 7.784873718196746, "learning_rate": 9.786914747860608e-06, "loss": 0.3657, "step": 45180 }, { "epoch": 0.9198982188295165, "grad_norm": 12.811989598621006, "learning_rate": 9.786709472106426e-06, "loss": 0.3259, "step": 45190 }, { "epoch": 0.9201017811704835, "grad_norm": 9.958492818726004, "learning_rate": 9.78650409967848e-06, "loss": 0.3345, "step": 45200 }, { "epoch": 0.9203053435114504, "grad_norm": 6.5994741612912975, "learning_rate": 9.78629863058092e-06, "loss": 0.3237, "step": 45210 }, { "epoch": 0.9205089058524173, "grad_norm": 8.188852280124, "learning_rate": 9.786093064817894e-06, "loss": 0.3743, "step": 45220 }, { "epoch": 0.9207124681933843, "grad_norm": 8.791952574768077, "learning_rate": 9.785887402393554e-06, "loss": 0.2552, "step": 45230 }, { "epoch": 0.9209160305343511, "grad_norm": 5.571725076347751, "learning_rate": 9.785681643312055e-06, "loss": 0.2718, "step": 45240 }, { "epoch": 0.9211195928753181, "grad_norm": 8.790367512772127, "learning_rate": 9.785475787577551e-06, "loss": 0.3251, "step": 45250 }, { "epoch": 0.9213231552162849, "grad_norm": 5.128972984398062, "learning_rate": 9.7852698351942e-06, "loss": 0.3533, "step": 45260 }, { "epoch": 0.9215267175572519, "grad_norm": 5.864450770500675, "learning_rate": 9.78506378616616e-06, "loss": 0.4187, "step": 45270 }, { "epoch": 0.9217302798982189, "grad_norm": 7.8361808648149145, "learning_rate": 9.784857640497594e-06, "loss": 0.2783, "step": 45280 }, { "epoch": 0.9219338422391857, "grad_norm": 3.638915892789924, "learning_rate": 9.784651398192666e-06, "loss": 0.2329, "step": 45290 }, { "epoch": 0.9221374045801527, "grad_norm": 12.57564692698023, "learning_rate": 9.78444505925554e-06, "loss": 0.267, "step": 45300 }, { "epoch": 0.9223409669211196, "grad_norm": 13.814408832186416, "learning_rate": 9.784238623690384e-06, "loss": 0.3306, "step": 45310 }, { "epoch": 0.9225445292620865, "grad_norm": 5.503902080035182, "learning_rate": 9.784032091501367e-06, "loss": 0.2419, "step": 45320 }, { "epoch": 0.9227480916030535, "grad_norm": 9.385289048298484, "learning_rate": 9.78382546269266e-06, "loss": 0.2915, "step": 45330 }, { "epoch": 0.9229516539440203, "grad_norm": 17.263630806624803, "learning_rate": 9.783618737268436e-06, "loss": 0.2472, "step": 45340 }, { "epoch": 0.9231552162849873, "grad_norm": 11.336323883464551, "learning_rate": 9.78341191523287e-06, "loss": 0.2617, "step": 45350 }, { "epoch": 0.9233587786259542, "grad_norm": 3.6973353732955827, "learning_rate": 9.783204996590139e-06, "loss": 0.2771, "step": 45360 }, { "epoch": 0.9235623409669211, "grad_norm": 6.389981608804394, "learning_rate": 9.782997981344423e-06, "loss": 0.4138, "step": 45370 }, { "epoch": 0.923765903307888, "grad_norm": 15.178339817071281, "learning_rate": 9.782790869499903e-06, "loss": 0.2486, "step": 45380 }, { "epoch": 0.923969465648855, "grad_norm": 15.262492292061765, "learning_rate": 9.78258366106076e-06, "loss": 0.3296, "step": 45390 }, { "epoch": 0.9241730279898219, "grad_norm": 5.990248331957924, "learning_rate": 9.782376356031181e-06, "loss": 0.3469, "step": 45400 }, { "epoch": 0.9243765903307888, "grad_norm": 8.0588323365312, "learning_rate": 9.782168954415353e-06, "loss": 0.2343, "step": 45410 }, { "epoch": 0.9245801526717558, "grad_norm": 13.033584623815203, "learning_rate": 9.78196145621746e-06, "loss": 0.2579, "step": 45420 }, { "epoch": 0.9247837150127226, "grad_norm": 6.1123776310218485, "learning_rate": 9.7817538614417e-06, "loss": 0.2335, "step": 45430 }, { "epoch": 0.9249872773536896, "grad_norm": 10.013485283417209, "learning_rate": 9.78154617009226e-06, "loss": 0.269, "step": 45440 }, { "epoch": 0.9251908396946565, "grad_norm": 10.562581173962089, "learning_rate": 9.781338382173336e-06, "loss": 0.2867, "step": 45450 }, { "epoch": 0.9253944020356234, "grad_norm": 21.23738724219577, "learning_rate": 9.781130497689127e-06, "loss": 0.4263, "step": 45460 }, { "epoch": 0.9255979643765904, "grad_norm": 12.021907118919664, "learning_rate": 9.780922516643828e-06, "loss": 0.3734, "step": 45470 }, { "epoch": 0.9258015267175572, "grad_norm": 9.739909747118087, "learning_rate": 9.780714439041642e-06, "loss": 0.2558, "step": 45480 }, { "epoch": 0.9260050890585242, "grad_norm": 5.751349754313455, "learning_rate": 9.78050626488677e-06, "loss": 0.3424, "step": 45490 }, { "epoch": 0.926208651399491, "grad_norm": 8.29979316490049, "learning_rate": 9.780297994183414e-06, "loss": 0.3043, "step": 45500 }, { "epoch": 0.926412213740458, "grad_norm": 9.343872395557696, "learning_rate": 9.780089626935786e-06, "loss": 0.315, "step": 45510 }, { "epoch": 0.926615776081425, "grad_norm": 20.92505441323507, "learning_rate": 9.77988116314809e-06, "loss": 0.2674, "step": 45520 }, { "epoch": 0.9268193384223918, "grad_norm": 11.818030867831528, "learning_rate": 9.77967260282454e-06, "loss": 0.3034, "step": 45530 }, { "epoch": 0.9270229007633588, "grad_norm": 5.00682214137209, "learning_rate": 9.779463945969344e-06, "loss": 0.3368, "step": 45540 }, { "epoch": 0.9272264631043257, "grad_norm": 7.746534695862956, "learning_rate": 9.779255192586717e-06, "loss": 0.2855, "step": 45550 }, { "epoch": 0.9274300254452926, "grad_norm": 10.288950939347629, "learning_rate": 9.779046342680875e-06, "loss": 0.2596, "step": 45560 }, { "epoch": 0.9276335877862596, "grad_norm": 10.467286056461047, "learning_rate": 9.77883739625604e-06, "loss": 0.2096, "step": 45570 }, { "epoch": 0.9278371501272265, "grad_norm": 14.839716964211116, "learning_rate": 9.778628353316426e-06, "loss": 0.3264, "step": 45580 }, { "epoch": 0.9280407124681934, "grad_norm": 8.546767969860909, "learning_rate": 9.778419213866258e-06, "loss": 0.2654, "step": 45590 }, { "epoch": 0.9282442748091603, "grad_norm": 18.797298529448636, "learning_rate": 9.77820997790976e-06, "loss": 0.3093, "step": 45600 }, { "epoch": 0.9284478371501272, "grad_norm": 18.834355874173188, "learning_rate": 9.778000645451156e-06, "loss": 0.292, "step": 45610 }, { "epoch": 0.9286513994910941, "grad_norm": 5.469087339758054, "learning_rate": 9.777791216494676e-06, "loss": 0.3926, "step": 45620 }, { "epoch": 0.9288549618320611, "grad_norm": 10.27294532657599, "learning_rate": 9.777581691044548e-06, "loss": 0.3693, "step": 45630 }, { "epoch": 0.929058524173028, "grad_norm": 4.226944413510225, "learning_rate": 9.777372069105008e-06, "loss": 0.2304, "step": 45640 }, { "epoch": 0.9292620865139949, "grad_norm": 2.527439232862912, "learning_rate": 9.777162350680281e-06, "loss": 0.2252, "step": 45650 }, { "epoch": 0.9294656488549619, "grad_norm": 5.523554083272156, "learning_rate": 9.776952535774609e-06, "loss": 0.365, "step": 45660 }, { "epoch": 0.9296692111959287, "grad_norm": 3.4049272018724843, "learning_rate": 9.776742624392228e-06, "loss": 0.3602, "step": 45670 }, { "epoch": 0.9298727735368957, "grad_norm": 3.6361885310939526, "learning_rate": 9.776532616537377e-06, "loss": 0.2803, "step": 45680 }, { "epoch": 0.9300763358778626, "grad_norm": 7.949735508658478, "learning_rate": 9.776322512214298e-06, "loss": 0.3486, "step": 45690 }, { "epoch": 0.9302798982188295, "grad_norm": 7.646690516354387, "learning_rate": 9.776112311427234e-06, "loss": 0.2984, "step": 45700 }, { "epoch": 0.9304834605597965, "grad_norm": 7.387008498535959, "learning_rate": 9.775902014180429e-06, "loss": 0.2739, "step": 45710 }, { "epoch": 0.9306870229007633, "grad_norm": 9.567147461248204, "learning_rate": 9.775691620478133e-06, "loss": 0.2812, "step": 45720 }, { "epoch": 0.9308905852417303, "grad_norm": 10.269644165300651, "learning_rate": 9.775481130324594e-06, "loss": 0.2954, "step": 45730 }, { "epoch": 0.9310941475826972, "grad_norm": 5.415470216546203, "learning_rate": 9.775270543724063e-06, "loss": 0.3333, "step": 45740 }, { "epoch": 0.9312977099236641, "grad_norm": 17.257548328032836, "learning_rate": 9.775059860680791e-06, "loss": 0.3535, "step": 45750 }, { "epoch": 0.9315012722646311, "grad_norm": 32.93568300419585, "learning_rate": 9.774849081199035e-06, "loss": 0.2752, "step": 45760 }, { "epoch": 0.9317048346055979, "grad_norm": 10.179666354312463, "learning_rate": 9.774638205283053e-06, "loss": 0.2351, "step": 45770 }, { "epoch": 0.9319083969465649, "grad_norm": 8.458480954357006, "learning_rate": 9.774427232937104e-06, "loss": 0.4173, "step": 45780 }, { "epoch": 0.9321119592875318, "grad_norm": 7.999988747360303, "learning_rate": 9.774216164165445e-06, "loss": 0.2993, "step": 45790 }, { "epoch": 0.9323155216284987, "grad_norm": 5.144340202169317, "learning_rate": 9.774004998972344e-06, "loss": 0.427, "step": 45800 }, { "epoch": 0.9325190839694657, "grad_norm": 8.50592960714205, "learning_rate": 9.77379373736206e-06, "loss": 0.2521, "step": 45810 }, { "epoch": 0.9327226463104326, "grad_norm": 10.343993285096293, "learning_rate": 9.773582379338867e-06, "loss": 0.304, "step": 45820 }, { "epoch": 0.9329262086513995, "grad_norm": 18.768789165134592, "learning_rate": 9.773370924907027e-06, "loss": 0.2241, "step": 45830 }, { "epoch": 0.9331297709923664, "grad_norm": 5.66421152162743, "learning_rate": 9.773159374070811e-06, "loss": 0.3531, "step": 45840 }, { "epoch": 0.9333333333333333, "grad_norm": 11.397303830691898, "learning_rate": 9.772947726834496e-06, "loss": 0.3439, "step": 45850 }, { "epoch": 0.9335368956743002, "grad_norm": 22.795918594433253, "learning_rate": 9.772735983202355e-06, "loss": 0.2585, "step": 45860 }, { "epoch": 0.9337404580152672, "grad_norm": 13.304782554892949, "learning_rate": 9.772524143178664e-06, "loss": 0.3017, "step": 45870 }, { "epoch": 0.9339440203562341, "grad_norm": 42.00505200375138, "learning_rate": 9.772312206767697e-06, "loss": 0.3058, "step": 45880 }, { "epoch": 0.934147582697201, "grad_norm": 16.936753333876524, "learning_rate": 9.772100173973743e-06, "loss": 0.3759, "step": 45890 }, { "epoch": 0.934351145038168, "grad_norm": 9.83777380931133, "learning_rate": 9.771888044801076e-06, "loss": 0.4051, "step": 45900 }, { "epoch": 0.9345547073791348, "grad_norm": 5.65715698745361, "learning_rate": 9.771675819253985e-06, "loss": 0.3286, "step": 45910 }, { "epoch": 0.9347582697201018, "grad_norm": 4.533135692542844, "learning_rate": 9.771463497336755e-06, "loss": 0.294, "step": 45920 }, { "epoch": 0.9349618320610688, "grad_norm": 19.328197884211722, "learning_rate": 9.771251079053675e-06, "loss": 0.4071, "step": 45930 }, { "epoch": 0.9351653944020356, "grad_norm": 7.119125629504021, "learning_rate": 9.771038564409035e-06, "loss": 0.2112, "step": 45940 }, { "epoch": 0.9353689567430026, "grad_norm": 5.656424626128214, "learning_rate": 9.770825953407124e-06, "loss": 0.283, "step": 45950 }, { "epoch": 0.9355725190839694, "grad_norm": 3.201846119561519, "learning_rate": 9.770613246052239e-06, "loss": 0.2853, "step": 45960 }, { "epoch": 0.9357760814249364, "grad_norm": 12.569196240775618, "learning_rate": 9.770400442348673e-06, "loss": 0.2385, "step": 45970 }, { "epoch": 0.9359796437659033, "grad_norm": 6.500908843249151, "learning_rate": 9.770187542300728e-06, "loss": 0.2538, "step": 45980 }, { "epoch": 0.9361832061068702, "grad_norm": 33.11957219970939, "learning_rate": 9.7699745459127e-06, "loss": 0.3145, "step": 45990 }, { "epoch": 0.9363867684478372, "grad_norm": 11.767679694905315, "learning_rate": 9.769761453188896e-06, "loss": 0.3086, "step": 46000 }, { "epoch": 0.936590330788804, "grad_norm": 7.575887328007464, "learning_rate": 9.769548264133613e-06, "loss": 0.2843, "step": 46010 }, { "epoch": 0.936793893129771, "grad_norm": 6.531904763064422, "learning_rate": 9.76933497875116e-06, "loss": 0.3536, "step": 46020 }, { "epoch": 0.9369974554707379, "grad_norm": 9.172974376177148, "learning_rate": 9.769121597045844e-06, "loss": 0.3401, "step": 46030 }, { "epoch": 0.9372010178117048, "grad_norm": 5.7036727368283495, "learning_rate": 9.768908119021976e-06, "loss": 0.2987, "step": 46040 }, { "epoch": 0.9374045801526718, "grad_norm": 7.56143201973203, "learning_rate": 9.768694544683865e-06, "loss": 0.239, "step": 46050 }, { "epoch": 0.9376081424936387, "grad_norm": 2.7909778238285554, "learning_rate": 9.768480874035826e-06, "loss": 0.2768, "step": 46060 }, { "epoch": 0.9378117048346056, "grad_norm": 15.905641614844017, "learning_rate": 9.768267107082174e-06, "loss": 0.3553, "step": 46070 }, { "epoch": 0.9380152671755725, "grad_norm": 5.54907934427738, "learning_rate": 9.768053243827226e-06, "loss": 0.353, "step": 46080 }, { "epoch": 0.9382188295165395, "grad_norm": 17.305588990154074, "learning_rate": 9.767839284275302e-06, "loss": 0.2919, "step": 46090 }, { "epoch": 0.9384223918575063, "grad_norm": 5.798526645276552, "learning_rate": 9.767625228430721e-06, "loss": 0.3987, "step": 46100 }, { "epoch": 0.9386259541984733, "grad_norm": 7.12365979791363, "learning_rate": 9.76741107629781e-06, "loss": 0.319, "step": 46110 }, { "epoch": 0.9388295165394402, "grad_norm": 10.781761528708692, "learning_rate": 9.767196827880891e-06, "loss": 0.3181, "step": 46120 }, { "epoch": 0.9390330788804071, "grad_norm": 4.552704620201594, "learning_rate": 9.766982483184292e-06, "loss": 0.3087, "step": 46130 }, { "epoch": 0.9392366412213741, "grad_norm": 4.656266442929606, "learning_rate": 9.766768042212345e-06, "loss": 0.262, "step": 46140 }, { "epoch": 0.9394402035623409, "grad_norm": 15.827380666834653, "learning_rate": 9.766553504969373e-06, "loss": 0.3839, "step": 46150 }, { "epoch": 0.9396437659033079, "grad_norm": 20.962800561177673, "learning_rate": 9.766338871459718e-06, "loss": 0.3178, "step": 46160 }, { "epoch": 0.9398473282442749, "grad_norm": 6.945959622978582, "learning_rate": 9.766124141687708e-06, "loss": 0.3856, "step": 46170 }, { "epoch": 0.9400508905852417, "grad_norm": 7.103978519475091, "learning_rate": 9.765909315657682e-06, "loss": 0.2064, "step": 46180 }, { "epoch": 0.9402544529262087, "grad_norm": 16.65320868523629, "learning_rate": 9.765694393373978e-06, "loss": 0.2361, "step": 46190 }, { "epoch": 0.9404580152671755, "grad_norm": 10.586006313968529, "learning_rate": 9.76547937484094e-06, "loss": 0.4034, "step": 46200 }, { "epoch": 0.9406615776081425, "grad_norm": 11.755691978199852, "learning_rate": 9.765264260062907e-06, "loss": 0.3551, "step": 46210 }, { "epoch": 0.9408651399491094, "grad_norm": 3.9805773053874645, "learning_rate": 9.765049049044225e-06, "loss": 0.255, "step": 46220 }, { "epoch": 0.9410687022900763, "grad_norm": 4.608555838440301, "learning_rate": 9.76483374178924e-06, "loss": 0.2961, "step": 46230 }, { "epoch": 0.9412722646310433, "grad_norm": 2.9437210037637915, "learning_rate": 9.7646183383023e-06, "loss": 0.215, "step": 46240 }, { "epoch": 0.9414758269720102, "grad_norm": 5.6930924136600485, "learning_rate": 9.764402838587756e-06, "loss": 0.3636, "step": 46250 }, { "epoch": 0.9416793893129771, "grad_norm": 9.646805293539357, "learning_rate": 9.76418724264996e-06, "loss": 0.338, "step": 46260 }, { "epoch": 0.941882951653944, "grad_norm": 10.586188601567878, "learning_rate": 9.763971550493267e-06, "loss": 0.3234, "step": 46270 }, { "epoch": 0.942086513994911, "grad_norm": 7.145553325596367, "learning_rate": 9.763755762122034e-06, "loss": 0.3842, "step": 46280 }, { "epoch": 0.9422900763358779, "grad_norm": 4.044190683886426, "learning_rate": 9.763539877540615e-06, "loss": 0.2569, "step": 46290 }, { "epoch": 0.9424936386768448, "grad_norm": 31.34476129745657, "learning_rate": 9.763323896753374e-06, "loss": 0.326, "step": 46300 }, { "epoch": 0.9426972010178117, "grad_norm": 10.593307863355891, "learning_rate": 9.763107819764672e-06, "loss": 0.3408, "step": 46310 }, { "epoch": 0.9429007633587786, "grad_norm": 15.772186583272253, "learning_rate": 9.762891646578873e-06, "loss": 0.2773, "step": 46320 }, { "epoch": 0.9431043256997456, "grad_norm": 8.445640140636531, "learning_rate": 9.762675377200342e-06, "loss": 0.3109, "step": 46330 }, { "epoch": 0.9433078880407124, "grad_norm": 3.4963420262274147, "learning_rate": 9.762459011633447e-06, "loss": 0.3362, "step": 46340 }, { "epoch": 0.9435114503816794, "grad_norm": 6.834493056355693, "learning_rate": 9.762242549882559e-06, "loss": 0.2154, "step": 46350 }, { "epoch": 0.9437150127226464, "grad_norm": 11.472329172524809, "learning_rate": 9.762025991952049e-06, "loss": 0.3651, "step": 46360 }, { "epoch": 0.9439185750636132, "grad_norm": 9.298216207398495, "learning_rate": 9.76180933784629e-06, "loss": 0.2895, "step": 46370 }, { "epoch": 0.9441221374045802, "grad_norm": 10.434947637103184, "learning_rate": 9.761592587569659e-06, "loss": 0.2642, "step": 46380 }, { "epoch": 0.944325699745547, "grad_norm": 16.16378738031666, "learning_rate": 9.761375741126533e-06, "loss": 0.3998, "step": 46390 }, { "epoch": 0.944529262086514, "grad_norm": 9.051074562544924, "learning_rate": 9.76115879852129e-06, "loss": 0.323, "step": 46400 }, { "epoch": 0.944732824427481, "grad_norm": 12.089866351423744, "learning_rate": 9.760941759758315e-06, "loss": 0.3455, "step": 46410 }, { "epoch": 0.9449363867684478, "grad_norm": 31.384058436069136, "learning_rate": 9.760724624841989e-06, "loss": 0.2564, "step": 46420 }, { "epoch": 0.9451399491094148, "grad_norm": 7.415975395701584, "learning_rate": 9.760507393776694e-06, "loss": 0.3126, "step": 46430 }, { "epoch": 0.9453435114503816, "grad_norm": 5.882476653700551, "learning_rate": 9.760290066566825e-06, "loss": 0.3088, "step": 46440 }, { "epoch": 0.9455470737913486, "grad_norm": 8.106584691028196, "learning_rate": 9.760072643216764e-06, "loss": 0.316, "step": 46450 }, { "epoch": 0.9457506361323155, "grad_norm": 11.113241378109405, "learning_rate": 9.759855123730905e-06, "loss": 0.3641, "step": 46460 }, { "epoch": 0.9459541984732824, "grad_norm": 4.1952520703682445, "learning_rate": 9.759637508113642e-06, "loss": 0.335, "step": 46470 }, { "epoch": 0.9461577608142494, "grad_norm": 6.323867918274377, "learning_rate": 9.75941979636937e-06, "loss": 0.2996, "step": 46480 }, { "epoch": 0.9463613231552163, "grad_norm": 4.701609950265954, "learning_rate": 9.759201988502483e-06, "loss": 0.321, "step": 46490 }, { "epoch": 0.9465648854961832, "grad_norm": 8.283683773494438, "learning_rate": 9.758984084517385e-06, "loss": 0.3687, "step": 46500 }, { "epoch": 0.9467684478371501, "grad_norm": 7.781464477064251, "learning_rate": 9.758766084418472e-06, "loss": 0.3045, "step": 46510 }, { "epoch": 0.946972010178117, "grad_norm": 7.986547867194278, "learning_rate": 9.758547988210148e-06, "loss": 0.2156, "step": 46520 }, { "epoch": 0.947175572519084, "grad_norm": 9.748914701944377, "learning_rate": 9.75832979589682e-06, "loss": 0.2633, "step": 46530 }, { "epoch": 0.9473791348600509, "grad_norm": 4.044899506257607, "learning_rate": 9.758111507482891e-06, "loss": 0.3064, "step": 46540 }, { "epoch": 0.9475826972010178, "grad_norm": 8.893983120868327, "learning_rate": 9.757893122972773e-06, "loss": 0.3099, "step": 46550 }, { "epoch": 0.9477862595419847, "grad_norm": 7.048361258022219, "learning_rate": 9.757674642370875e-06, "loss": 0.2602, "step": 46560 }, { "epoch": 0.9479898218829517, "grad_norm": 13.535562676427395, "learning_rate": 9.75745606568161e-06, "loss": 0.3882, "step": 46570 }, { "epoch": 0.9481933842239185, "grad_norm": 11.110970349135284, "learning_rate": 9.757237392909394e-06, "loss": 0.2726, "step": 46580 }, { "epoch": 0.9483969465648855, "grad_norm": 8.538196132597477, "learning_rate": 9.757018624058638e-06, "loss": 0.2854, "step": 46590 }, { "epoch": 0.9486005089058525, "grad_norm": 6.690859144547603, "learning_rate": 9.756799759133767e-06, "loss": 0.2321, "step": 46600 }, { "epoch": 0.9488040712468193, "grad_norm": 5.51442743340614, "learning_rate": 9.756580798139196e-06, "loss": 0.317, "step": 46610 }, { "epoch": 0.9490076335877863, "grad_norm": 5.652364789396657, "learning_rate": 9.756361741079351e-06, "loss": 0.2663, "step": 46620 }, { "epoch": 0.9492111959287531, "grad_norm": 6.361835705468421, "learning_rate": 9.756142587958653e-06, "loss": 0.3178, "step": 46630 }, { "epoch": 0.9494147582697201, "grad_norm": 7.388266723847296, "learning_rate": 9.755923338781529e-06, "loss": 0.2542, "step": 46640 }, { "epoch": 0.9496183206106871, "grad_norm": 8.594364861502685, "learning_rate": 9.75570399355241e-06, "loss": 0.3132, "step": 46650 }, { "epoch": 0.9498218829516539, "grad_norm": 6.543770751492861, "learning_rate": 9.755484552275722e-06, "loss": 0.3265, "step": 46660 }, { "epoch": 0.9500254452926209, "grad_norm": 6.9324417979790365, "learning_rate": 9.755265014955899e-06, "loss": 0.3188, "step": 46670 }, { "epoch": 0.9502290076335878, "grad_norm": 17.129302042277157, "learning_rate": 9.755045381597374e-06, "loss": 0.3677, "step": 46680 }, { "epoch": 0.9504325699745547, "grad_norm": 9.568930472264126, "learning_rate": 9.754825652204585e-06, "loss": 0.2414, "step": 46690 }, { "epoch": 0.9506361323155216, "grad_norm": 7.04449901458392, "learning_rate": 9.754605826781965e-06, "loss": 0.2003, "step": 46700 }, { "epoch": 0.9508396946564885, "grad_norm": 19.466717269260226, "learning_rate": 9.754385905333959e-06, "loss": 0.3374, "step": 46710 }, { "epoch": 0.9510432569974555, "grad_norm": 8.006119031597951, "learning_rate": 9.754165887865004e-06, "loss": 0.3114, "step": 46720 }, { "epoch": 0.9512468193384224, "grad_norm": 6.638857833961425, "learning_rate": 9.753945774379545e-06, "loss": 0.3504, "step": 46730 }, { "epoch": 0.9514503816793893, "grad_norm": 4.870604144839336, "learning_rate": 9.75372556488203e-06, "loss": 0.3597, "step": 46740 }, { "epoch": 0.9516539440203562, "grad_norm": 6.809259699202609, "learning_rate": 9.753505259376904e-06, "loss": 0.3644, "step": 46750 }, { "epoch": 0.9518575063613232, "grad_norm": 9.599159848166895, "learning_rate": 9.753284857868614e-06, "loss": 0.2948, "step": 46760 }, { "epoch": 0.9520610687022901, "grad_norm": 7.217201807347491, "learning_rate": 9.753064360361617e-06, "loss": 0.3469, "step": 46770 }, { "epoch": 0.952264631043257, "grad_norm": 16.794762010668517, "learning_rate": 9.752843766860364e-06, "loss": 0.2773, "step": 46780 }, { "epoch": 0.952468193384224, "grad_norm": 5.139671507562116, "learning_rate": 9.752623077369307e-06, "loss": 0.3509, "step": 46790 }, { "epoch": 0.9526717557251908, "grad_norm": 5.584497895202074, "learning_rate": 9.752402291892909e-06, "loss": 0.3997, "step": 46800 }, { "epoch": 0.9528753180661578, "grad_norm": 11.55584826043831, "learning_rate": 9.752181410435623e-06, "loss": 0.2961, "step": 46810 }, { "epoch": 0.9530788804071246, "grad_norm": 12.271953975246861, "learning_rate": 9.751960433001913e-06, "loss": 0.3237, "step": 46820 }, { "epoch": 0.9532824427480916, "grad_norm": 8.025467523689123, "learning_rate": 9.751739359596242e-06, "loss": 0.391, "step": 46830 }, { "epoch": 0.9534860050890586, "grad_norm": 9.891445706700797, "learning_rate": 9.751518190223075e-06, "loss": 0.3781, "step": 46840 }, { "epoch": 0.9536895674300254, "grad_norm": 7.218804427126389, "learning_rate": 9.751296924886878e-06, "loss": 0.304, "step": 46850 }, { "epoch": 0.9538931297709924, "grad_norm": 7.708379501091815, "learning_rate": 9.751075563592118e-06, "loss": 0.2476, "step": 46860 }, { "epoch": 0.9540966921119592, "grad_norm": 11.734246926271481, "learning_rate": 9.75085410634327e-06, "loss": 0.3266, "step": 46870 }, { "epoch": 0.9543002544529262, "grad_norm": 14.742087226447556, "learning_rate": 9.750632553144804e-06, "loss": 0.2601, "step": 46880 }, { "epoch": 0.9545038167938932, "grad_norm": 5.365229666879689, "learning_rate": 9.750410904001193e-06, "loss": 0.3955, "step": 46890 }, { "epoch": 0.95470737913486, "grad_norm": 7.907484187225018, "learning_rate": 9.750189158916917e-06, "loss": 0.2758, "step": 46900 }, { "epoch": 0.954910941475827, "grad_norm": 4.347441009572241, "learning_rate": 9.749967317896452e-06, "loss": 0.2397, "step": 46910 }, { "epoch": 0.9551145038167939, "grad_norm": 6.850678307481148, "learning_rate": 9.74974538094428e-06, "loss": 0.3363, "step": 46920 }, { "epoch": 0.9553180661577608, "grad_norm": 7.2798641136900075, "learning_rate": 9.749523348064881e-06, "loss": 0.2785, "step": 46930 }, { "epoch": 0.9555216284987277, "grad_norm": 6.249836499833664, "learning_rate": 9.749301219262741e-06, "loss": 0.3552, "step": 46940 }, { "epoch": 0.9557251908396946, "grad_norm": 7.548410197566982, "learning_rate": 9.749078994542347e-06, "loss": 0.2695, "step": 46950 }, { "epoch": 0.9559287531806616, "grad_norm": 6.913411305276673, "learning_rate": 9.748856673908185e-06, "loss": 0.2554, "step": 46960 }, { "epoch": 0.9561323155216285, "grad_norm": 12.677384241975174, "learning_rate": 9.748634257364747e-06, "loss": 0.4604, "step": 46970 }, { "epoch": 0.9563358778625954, "grad_norm": 59.54661638858759, "learning_rate": 9.748411744916523e-06, "loss": 0.2728, "step": 46980 }, { "epoch": 0.9565394402035623, "grad_norm": 11.335477443432968, "learning_rate": 9.748189136568007e-06, "loss": 0.3658, "step": 46990 }, { "epoch": 0.9567430025445293, "grad_norm": 32.01999373091756, "learning_rate": 9.747966432323698e-06, "loss": 0.3484, "step": 47000 }, { "epoch": 0.9569465648854962, "grad_norm": 17.992614428558547, "learning_rate": 9.74774363218809e-06, "loss": 0.284, "step": 47010 }, { "epoch": 0.9571501272264631, "grad_norm": 10.076562268649056, "learning_rate": 9.747520736165685e-06, "loss": 0.3142, "step": 47020 }, { "epoch": 0.95735368956743, "grad_norm": 4.478877678687194, "learning_rate": 9.747297744260984e-06, "loss": 0.1665, "step": 47030 }, { "epoch": 0.9575572519083969, "grad_norm": 6.8205211494609745, "learning_rate": 9.74707465647849e-06, "loss": 0.2987, "step": 47040 }, { "epoch": 0.9577608142493639, "grad_norm": 6.745380417841452, "learning_rate": 9.74685147282271e-06, "loss": 0.3044, "step": 47050 }, { "epoch": 0.9579643765903307, "grad_norm": 7.687141772651523, "learning_rate": 9.746628193298149e-06, "loss": 0.2839, "step": 47060 }, { "epoch": 0.9581679389312977, "grad_norm": 10.470969487979835, "learning_rate": 9.746404817909318e-06, "loss": 0.3881, "step": 47070 }, { "epoch": 0.9583715012722647, "grad_norm": 9.05306437205627, "learning_rate": 9.74618134666073e-06, "loss": 0.2731, "step": 47080 }, { "epoch": 0.9585750636132315, "grad_norm": 6.621874847211212, "learning_rate": 9.745957779556897e-06, "loss": 0.2861, "step": 47090 }, { "epoch": 0.9587786259541985, "grad_norm": 7.219365861466626, "learning_rate": 9.745734116602332e-06, "loss": 0.2491, "step": 47100 }, { "epoch": 0.9589821882951653, "grad_norm": 18.849340516138014, "learning_rate": 9.745510357801556e-06, "loss": 0.3631, "step": 47110 }, { "epoch": 0.9591857506361323, "grad_norm": 9.141349616450668, "learning_rate": 9.745286503159085e-06, "loss": 0.2674, "step": 47120 }, { "epoch": 0.9593893129770993, "grad_norm": 4.330230961620932, "learning_rate": 9.74506255267944e-06, "loss": 0.3331, "step": 47130 }, { "epoch": 0.9595928753180661, "grad_norm": 10.993302734520586, "learning_rate": 9.744838506367147e-06, "loss": 0.297, "step": 47140 }, { "epoch": 0.9597964376590331, "grad_norm": 8.244275356328851, "learning_rate": 9.74461436422673e-06, "loss": 0.3802, "step": 47150 }, { "epoch": 0.96, "grad_norm": 13.153865699200264, "learning_rate": 9.744390126262713e-06, "loss": 0.3269, "step": 47160 }, { "epoch": 0.9602035623409669, "grad_norm": 1.2593707878516396, "learning_rate": 9.744165792479628e-06, "loss": 0.2525, "step": 47170 }, { "epoch": 0.9604071246819338, "grad_norm": 4.400403753147557, "learning_rate": 9.743941362882002e-06, "loss": 0.3381, "step": 47180 }, { "epoch": 0.9606106870229008, "grad_norm": 16.468280231640573, "learning_rate": 9.743716837474372e-06, "loss": 0.4135, "step": 47190 }, { "epoch": 0.9608142493638677, "grad_norm": 14.093066172670612, "learning_rate": 9.74349221626127e-06, "loss": 0.2917, "step": 47200 }, { "epoch": 0.9610178117048346, "grad_norm": 8.379613178307768, "learning_rate": 9.743267499247233e-06, "loss": 0.3538, "step": 47210 }, { "epoch": 0.9612213740458015, "grad_norm": 7.912716090392999, "learning_rate": 9.743042686436801e-06, "loss": 0.2726, "step": 47220 }, { "epoch": 0.9614249363867684, "grad_norm": 3.6284117830549483, "learning_rate": 9.742817777834512e-06, "loss": 0.3261, "step": 47230 }, { "epoch": 0.9616284987277354, "grad_norm": 8.342503808641778, "learning_rate": 9.742592773444907e-06, "loss": 0.2934, "step": 47240 }, { "epoch": 0.9618320610687023, "grad_norm": 12.986384674247358, "learning_rate": 9.742367673272535e-06, "loss": 0.3433, "step": 47250 }, { "epoch": 0.9620356234096692, "grad_norm": 5.826039018615631, "learning_rate": 9.74214247732194e-06, "loss": 0.3396, "step": 47260 }, { "epoch": 0.9622391857506362, "grad_norm": 18.399383290123374, "learning_rate": 9.741917185597669e-06, "loss": 0.3243, "step": 47270 }, { "epoch": 0.962442748091603, "grad_norm": 7.74297332026352, "learning_rate": 9.741691798104274e-06, "loss": 0.3431, "step": 47280 }, { "epoch": 0.96264631043257, "grad_norm": 9.897145958356983, "learning_rate": 9.741466314846304e-06, "loss": 0.3128, "step": 47290 }, { "epoch": 0.9628498727735368, "grad_norm": 5.975798895234129, "learning_rate": 9.741240735828316e-06, "loss": 0.2749, "step": 47300 }, { "epoch": 0.9630534351145038, "grad_norm": 9.849160630407356, "learning_rate": 9.741015061054865e-06, "loss": 0.339, "step": 47310 }, { "epoch": 0.9632569974554708, "grad_norm": 6.724543676776768, "learning_rate": 9.740789290530508e-06, "loss": 0.2698, "step": 47320 }, { "epoch": 0.9634605597964376, "grad_norm": 5.983406344505986, "learning_rate": 9.740563424259804e-06, "loss": 0.2837, "step": 47330 }, { "epoch": 0.9636641221374046, "grad_norm": 18.71110580941272, "learning_rate": 9.740337462247318e-06, "loss": 0.3111, "step": 47340 }, { "epoch": 0.9638676844783715, "grad_norm": 16.43549809544859, "learning_rate": 9.74011140449761e-06, "loss": 0.2674, "step": 47350 }, { "epoch": 0.9640712468193384, "grad_norm": 41.64184479692648, "learning_rate": 9.739885251015246e-06, "loss": 0.297, "step": 47360 }, { "epoch": 0.9642748091603054, "grad_norm": 5.399056021117263, "learning_rate": 9.739659001804797e-06, "loss": 0.2608, "step": 47370 }, { "epoch": 0.9644783715012722, "grad_norm": 5.195747029654527, "learning_rate": 9.739432656870828e-06, "loss": 0.3534, "step": 47380 }, { "epoch": 0.9646819338422392, "grad_norm": 13.23403634430802, "learning_rate": 9.739206216217914e-06, "loss": 0.2482, "step": 47390 }, { "epoch": 0.9648854961832061, "grad_norm": 9.318181079548888, "learning_rate": 9.738979679850624e-06, "loss": 0.338, "step": 47400 }, { "epoch": 0.965089058524173, "grad_norm": 8.094934306997036, "learning_rate": 9.738753047773536e-06, "loss": 0.2319, "step": 47410 }, { "epoch": 0.9652926208651399, "grad_norm": 3.284462277719749, "learning_rate": 9.738526319991229e-06, "loss": 0.2472, "step": 47420 }, { "epoch": 0.9654961832061069, "grad_norm": 13.713549750178466, "learning_rate": 9.738299496508278e-06, "loss": 0.3078, "step": 47430 }, { "epoch": 0.9656997455470738, "grad_norm": 4.043317913533992, "learning_rate": 9.738072577329265e-06, "loss": 0.1881, "step": 47440 }, { "epoch": 0.9659033078880407, "grad_norm": 10.17737708408842, "learning_rate": 9.737845562458776e-06, "loss": 0.2561, "step": 47450 }, { "epoch": 0.9661068702290077, "grad_norm": 14.68421399998713, "learning_rate": 9.737618451901392e-06, "loss": 0.3363, "step": 47460 }, { "epoch": 0.9663104325699745, "grad_norm": 6.967539634568439, "learning_rate": 9.737391245661701e-06, "loss": 0.3209, "step": 47470 }, { "epoch": 0.9665139949109415, "grad_norm": 12.334269365128197, "learning_rate": 9.737163943744294e-06, "loss": 0.3275, "step": 47480 }, { "epoch": 0.9667175572519084, "grad_norm": 8.198765334329597, "learning_rate": 9.736936546153758e-06, "loss": 0.3191, "step": 47490 }, { "epoch": 0.9669211195928753, "grad_norm": 7.14361414472151, "learning_rate": 9.736709052894688e-06, "loss": 0.2781, "step": 47500 }, { "epoch": 0.9671246819338423, "grad_norm": 9.48038431037019, "learning_rate": 9.736481463971677e-06, "loss": 0.2867, "step": 47510 }, { "epoch": 0.9673282442748091, "grad_norm": 5.659134188342267, "learning_rate": 9.736253779389323e-06, "loss": 0.2864, "step": 47520 }, { "epoch": 0.9675318066157761, "grad_norm": 6.002856600449962, "learning_rate": 9.736025999152222e-06, "loss": 0.2771, "step": 47530 }, { "epoch": 0.967735368956743, "grad_norm": 2.0393793422049535, "learning_rate": 9.735798123264978e-06, "loss": 0.289, "step": 47540 }, { "epoch": 0.9679389312977099, "grad_norm": 12.759797312533124, "learning_rate": 9.735570151732192e-06, "loss": 0.2933, "step": 47550 }, { "epoch": 0.9681424936386769, "grad_norm": 11.970240363366278, "learning_rate": 9.735342084558467e-06, "loss": 0.3184, "step": 47560 }, { "epoch": 0.9683460559796437, "grad_norm": 5.826359266778894, "learning_rate": 9.735113921748409e-06, "loss": 0.3983, "step": 47570 }, { "epoch": 0.9685496183206107, "grad_norm": 5.584754319466636, "learning_rate": 9.734885663306625e-06, "loss": 0.3047, "step": 47580 }, { "epoch": 0.9687531806615776, "grad_norm": 7.546606802841367, "learning_rate": 9.73465730923773e-06, "loss": 0.3597, "step": 47590 }, { "epoch": 0.9689567430025445, "grad_norm": 4.173225342474941, "learning_rate": 9.734428859546329e-06, "loss": 0.3576, "step": 47600 }, { "epoch": 0.9691603053435115, "grad_norm": 4.434740061313827, "learning_rate": 9.73420031423704e-06, "loss": 0.2727, "step": 47610 }, { "epoch": 0.9693638676844784, "grad_norm": 4.306359669395366, "learning_rate": 9.73397167331448e-06, "loss": 0.3323, "step": 47620 }, { "epoch": 0.9695674300254453, "grad_norm": 12.459501032320992, "learning_rate": 9.733742936783262e-06, "loss": 0.3011, "step": 47630 }, { "epoch": 0.9697709923664122, "grad_norm": 25.67862100990498, "learning_rate": 9.733514104648011e-06, "loss": 0.3553, "step": 47640 }, { "epoch": 0.9699745547073791, "grad_norm": 9.537343059551464, "learning_rate": 9.733285176913346e-06, "loss": 0.3227, "step": 47650 }, { "epoch": 0.9701781170483461, "grad_norm": 22.828702166786, "learning_rate": 9.733056153583887e-06, "loss": 0.3322, "step": 47660 }, { "epoch": 0.970381679389313, "grad_norm": 6.412927395208218, "learning_rate": 9.732827034664264e-06, "loss": 0.3298, "step": 47670 }, { "epoch": 0.9705852417302799, "grad_norm": 10.00828453678326, "learning_rate": 9.732597820159107e-06, "loss": 0.2199, "step": 47680 }, { "epoch": 0.9707888040712468, "grad_norm": 16.533715679792092, "learning_rate": 9.732368510073038e-06, "loss": 0.3782, "step": 47690 }, { "epoch": 0.9709923664122138, "grad_norm": 9.601137672385946, "learning_rate": 9.732139104410691e-06, "loss": 0.2921, "step": 47700 }, { "epoch": 0.9711959287531806, "grad_norm": 9.34639805145361, "learning_rate": 9.7319096031767e-06, "loss": 0.4587, "step": 47710 }, { "epoch": 0.9713994910941476, "grad_norm": 5.3927451682025564, "learning_rate": 9.731680006375704e-06, "loss": 0.3628, "step": 47720 }, { "epoch": 0.9716030534351145, "grad_norm": 16.477887875197577, "learning_rate": 9.731450314012332e-06, "loss": 0.2871, "step": 47730 }, { "epoch": 0.9718066157760814, "grad_norm": 11.699808542932482, "learning_rate": 9.731220526091227e-06, "loss": 0.318, "step": 47740 }, { "epoch": 0.9720101781170484, "grad_norm": 9.747521101993861, "learning_rate": 9.730990642617032e-06, "loss": 0.341, "step": 47750 }, { "epoch": 0.9722137404580152, "grad_norm": 8.374084106251681, "learning_rate": 9.730760663594385e-06, "loss": 0.2915, "step": 47760 }, { "epoch": 0.9724173027989822, "grad_norm": 6.1874643427989575, "learning_rate": 9.730530589027936e-06, "loss": 0.3135, "step": 47770 }, { "epoch": 0.9726208651399492, "grad_norm": 18.700124678834506, "learning_rate": 9.730300418922327e-06, "loss": 0.246, "step": 47780 }, { "epoch": 0.972824427480916, "grad_norm": 8.21492544783182, "learning_rate": 9.73007015328221e-06, "loss": 0.2946, "step": 47790 }, { "epoch": 0.973027989821883, "grad_norm": 6.38211307472078, "learning_rate": 9.72983979211223e-06, "loss": 0.2659, "step": 47800 }, { "epoch": 0.9732315521628498, "grad_norm": 42.6694004484686, "learning_rate": 9.729609335417046e-06, "loss": 0.3775, "step": 47810 }, { "epoch": 0.9734351145038168, "grad_norm": 13.025355741218782, "learning_rate": 9.72937878320131e-06, "loss": 0.2308, "step": 47820 }, { "epoch": 0.9736386768447837, "grad_norm": 35.89540272159153, "learning_rate": 9.729148135469678e-06, "loss": 0.2463, "step": 47830 }, { "epoch": 0.9738422391857506, "grad_norm": 10.964266433983836, "learning_rate": 9.728917392226808e-06, "loss": 0.2749, "step": 47840 }, { "epoch": 0.9740458015267176, "grad_norm": 7.260459157227236, "learning_rate": 9.728686553477363e-06, "loss": 0.3821, "step": 47850 }, { "epoch": 0.9742493638676845, "grad_norm": 9.346555997087291, "learning_rate": 9.728455619225998e-06, "loss": 0.2953, "step": 47860 }, { "epoch": 0.9744529262086514, "grad_norm": 12.76575096709661, "learning_rate": 9.728224589477384e-06, "loss": 0.2269, "step": 47870 }, { "epoch": 0.9746564885496183, "grad_norm": 14.867830677026305, "learning_rate": 9.727993464236185e-06, "loss": 0.4418, "step": 47880 }, { "epoch": 0.9748600508905853, "grad_norm": 6.047460180200062, "learning_rate": 9.727762243507068e-06, "loss": 0.2262, "step": 47890 }, { "epoch": 0.9750636132315522, "grad_norm": 6.450874690384682, "learning_rate": 9.727530927294701e-06, "loss": 0.3174, "step": 47900 }, { "epoch": 0.9752671755725191, "grad_norm": 4.627187821017051, "learning_rate": 9.727299515603761e-06, "loss": 0.1974, "step": 47910 }, { "epoch": 0.975470737913486, "grad_norm": 10.10150710167497, "learning_rate": 9.727068008438918e-06, "loss": 0.3088, "step": 47920 }, { "epoch": 0.9756743002544529, "grad_norm": 12.165934495370422, "learning_rate": 9.726836405804847e-06, "loss": 0.322, "step": 47930 }, { "epoch": 0.9758778625954199, "grad_norm": 7.514918956580254, "learning_rate": 9.726604707706226e-06, "loss": 0.3462, "step": 47940 }, { "epoch": 0.9760814249363867, "grad_norm": 10.277127953967398, "learning_rate": 9.726372914147737e-06, "loss": 0.2625, "step": 47950 }, { "epoch": 0.9762849872773537, "grad_norm": 12.611623061922943, "learning_rate": 9.72614102513406e-06, "loss": 0.2847, "step": 47960 }, { "epoch": 0.9764885496183207, "grad_norm": 8.178724529443725, "learning_rate": 9.725909040669876e-06, "loss": 0.2825, "step": 47970 }, { "epoch": 0.9766921119592875, "grad_norm": 15.17729612146974, "learning_rate": 9.725676960759872e-06, "loss": 0.3041, "step": 47980 }, { "epoch": 0.9768956743002545, "grad_norm": 13.118611411371806, "learning_rate": 9.725444785408736e-06, "loss": 0.321, "step": 47990 }, { "epoch": 0.9770992366412213, "grad_norm": 15.52902133444492, "learning_rate": 9.725212514621156e-06, "loss": 0.2569, "step": 48000 }, { "epoch": 0.9773027989821883, "grad_norm": 11.868858992116568, "learning_rate": 9.724980148401824e-06, "loss": 0.2453, "step": 48010 }, { "epoch": 0.9775063613231553, "grad_norm": 24.962398496549003, "learning_rate": 9.72474768675543e-06, "loss": 0.2841, "step": 48020 }, { "epoch": 0.9777099236641221, "grad_norm": 11.34014847364133, "learning_rate": 9.724515129686673e-06, "loss": 0.3708, "step": 48030 }, { "epoch": 0.9779134860050891, "grad_norm": 6.400345049377266, "learning_rate": 9.724282477200246e-06, "loss": 0.3237, "step": 48040 }, { "epoch": 0.978117048346056, "grad_norm": 10.697203896384531, "learning_rate": 9.72404972930085e-06, "loss": 0.3007, "step": 48050 }, { "epoch": 0.9783206106870229, "grad_norm": 8.56161346346573, "learning_rate": 9.723816885993187e-06, "loss": 0.3495, "step": 48060 }, { "epoch": 0.9785241730279898, "grad_norm": 5.438838956579199, "learning_rate": 9.723583947281958e-06, "loss": 0.2902, "step": 48070 }, { "epoch": 0.9787277353689567, "grad_norm": 7.900654666309709, "learning_rate": 9.723350913171866e-06, "loss": 0.3373, "step": 48080 }, { "epoch": 0.9789312977099237, "grad_norm": 8.685346038658388, "learning_rate": 9.723117783667617e-06, "loss": 0.2546, "step": 48090 }, { "epoch": 0.9791348600508906, "grad_norm": 10.541491127677808, "learning_rate": 9.722884558773923e-06, "loss": 0.3264, "step": 48100 }, { "epoch": 0.9793384223918575, "grad_norm": 9.378515131587752, "learning_rate": 9.722651238495491e-06, "loss": 0.3493, "step": 48110 }, { "epoch": 0.9795419847328244, "grad_norm": 11.56135286458837, "learning_rate": 9.722417822837036e-06, "loss": 0.3483, "step": 48120 }, { "epoch": 0.9797455470737914, "grad_norm": 12.911507068907232, "learning_rate": 9.722184311803271e-06, "loss": 0.2635, "step": 48130 }, { "epoch": 0.9799491094147583, "grad_norm": 12.47211108935023, "learning_rate": 9.72195070539891e-06, "loss": 0.2465, "step": 48140 }, { "epoch": 0.9801526717557252, "grad_norm": 13.671010923105642, "learning_rate": 9.721717003628674e-06, "loss": 0.227, "step": 48150 }, { "epoch": 0.9803562340966921, "grad_norm": 14.414435180385315, "learning_rate": 9.721483206497281e-06, "loss": 0.3192, "step": 48160 }, { "epoch": 0.980559796437659, "grad_norm": 4.6537902709072885, "learning_rate": 9.721249314009454e-06, "loss": 0.2541, "step": 48170 }, { "epoch": 0.980763358778626, "grad_norm": 12.27454574834799, "learning_rate": 9.721015326169914e-06, "loss": 0.3144, "step": 48180 }, { "epoch": 0.9809669211195928, "grad_norm": 9.03927599010573, "learning_rate": 9.720781242983391e-06, "loss": 0.313, "step": 48190 }, { "epoch": 0.9811704834605598, "grad_norm": 4.067489670862722, "learning_rate": 9.720547064454611e-06, "loss": 0.4045, "step": 48200 }, { "epoch": 0.9813740458015268, "grad_norm": 10.196036388796054, "learning_rate": 9.720312790588302e-06, "loss": 0.2607, "step": 48210 }, { "epoch": 0.9815776081424936, "grad_norm": 7.60299615810191, "learning_rate": 9.720078421389196e-06, "loss": 0.324, "step": 48220 }, { "epoch": 0.9817811704834606, "grad_norm": 7.585012217739228, "learning_rate": 9.719843956862027e-06, "loss": 0.2933, "step": 48230 }, { "epoch": 0.9819847328244274, "grad_norm": 11.794783226759822, "learning_rate": 9.71960939701153e-06, "loss": 0.2968, "step": 48240 }, { "epoch": 0.9821882951653944, "grad_norm": 4.618563922603514, "learning_rate": 9.719374741842444e-06, "loss": 0.3078, "step": 48250 }, { "epoch": 0.9823918575063614, "grad_norm": 8.116184758508824, "learning_rate": 9.719139991359506e-06, "loss": 0.3153, "step": 48260 }, { "epoch": 0.9825954198473282, "grad_norm": 9.244930667478316, "learning_rate": 9.718905145567457e-06, "loss": 0.3406, "step": 48270 }, { "epoch": 0.9827989821882952, "grad_norm": 4.311937006962542, "learning_rate": 9.71867020447104e-06, "loss": 0.2849, "step": 48280 }, { "epoch": 0.9830025445292621, "grad_norm": 12.39209651304269, "learning_rate": 9.718435168075002e-06, "loss": 0.3134, "step": 48290 }, { "epoch": 0.983206106870229, "grad_norm": 12.59572125777806, "learning_rate": 9.718200036384086e-06, "loss": 0.2811, "step": 48300 }, { "epoch": 0.9834096692111959, "grad_norm": 6.143476613463613, "learning_rate": 9.717964809403047e-06, "loss": 0.289, "step": 48310 }, { "epoch": 0.9836132315521628, "grad_norm": 5.780384565147215, "learning_rate": 9.71772948713663e-06, "loss": 0.3106, "step": 48320 }, { "epoch": 0.9838167938931298, "grad_norm": 13.881421110743247, "learning_rate": 9.717494069589591e-06, "loss": 0.3128, "step": 48330 }, { "epoch": 0.9840203562340967, "grad_norm": 10.594114870219512, "learning_rate": 9.717258556766682e-06, "loss": 0.3126, "step": 48340 }, { "epoch": 0.9842239185750636, "grad_norm": 8.377262892749055, "learning_rate": 9.71702294867266e-06, "loss": 0.325, "step": 48350 }, { "epoch": 0.9844274809160305, "grad_norm": 28.16185771575302, "learning_rate": 9.716787245312284e-06, "loss": 0.3234, "step": 48360 }, { "epoch": 0.9846310432569975, "grad_norm": 4.194110515476824, "learning_rate": 9.716551446690316e-06, "loss": 0.3226, "step": 48370 }, { "epoch": 0.9848346055979644, "grad_norm": 3.285993367995731, "learning_rate": 9.716315552811514e-06, "loss": 0.3323, "step": 48380 }, { "epoch": 0.9850381679389313, "grad_norm": 12.471084950923927, "learning_rate": 9.716079563680647e-06, "loss": 0.2451, "step": 48390 }, { "epoch": 0.9852417302798983, "grad_norm": 6.965941692910294, "learning_rate": 9.715843479302479e-06, "loss": 0.2187, "step": 48400 }, { "epoch": 0.9854452926208651, "grad_norm": 34.66291492339592, "learning_rate": 9.715607299681778e-06, "loss": 0.2555, "step": 48410 }, { "epoch": 0.9856488549618321, "grad_norm": 4.909954187356723, "learning_rate": 9.715371024823313e-06, "loss": 0.2573, "step": 48420 }, { "epoch": 0.9858524173027989, "grad_norm": 6.22128916930925, "learning_rate": 9.715134654731858e-06, "loss": 0.335, "step": 48430 }, { "epoch": 0.9860559796437659, "grad_norm": 9.683269242492395, "learning_rate": 9.714898189412185e-06, "loss": 0.3156, "step": 48440 }, { "epoch": 0.9862595419847329, "grad_norm": 7.35294782297903, "learning_rate": 9.71466162886907e-06, "loss": 0.3786, "step": 48450 }, { "epoch": 0.9864631043256997, "grad_norm": 6.559745752040727, "learning_rate": 9.71442497310729e-06, "loss": 0.3193, "step": 48460 }, { "epoch": 0.9866666666666667, "grad_norm": 9.715568357950017, "learning_rate": 9.714188222131627e-06, "loss": 0.2216, "step": 48470 }, { "epoch": 0.9868702290076335, "grad_norm": 6.30292659045522, "learning_rate": 9.713951375946863e-06, "loss": 0.2664, "step": 48480 }, { "epoch": 0.9870737913486005, "grad_norm": 5.839083248062345, "learning_rate": 9.713714434557775e-06, "loss": 0.247, "step": 48490 }, { "epoch": 0.9872773536895675, "grad_norm": 14.51696436707577, "learning_rate": 9.713477397969155e-06, "loss": 0.3168, "step": 48500 }, { "epoch": 0.9874809160305343, "grad_norm": 12.97388095083541, "learning_rate": 9.713240266185789e-06, "loss": 0.2982, "step": 48510 }, { "epoch": 0.9876844783715013, "grad_norm": 5.848936970721313, "learning_rate": 9.713003039212464e-06, "loss": 0.2565, "step": 48520 }, { "epoch": 0.9878880407124682, "grad_norm": 11.09886352580432, "learning_rate": 9.712765717053972e-06, "loss": 0.3017, "step": 48530 }, { "epoch": 0.9880916030534351, "grad_norm": 8.65512399698048, "learning_rate": 9.712528299715108e-06, "loss": 0.3519, "step": 48540 }, { "epoch": 0.988295165394402, "grad_norm": 14.088449032356756, "learning_rate": 9.712290787200664e-06, "loss": 0.2587, "step": 48550 }, { "epoch": 0.988498727735369, "grad_norm": 6.4548095229011935, "learning_rate": 9.712053179515438e-06, "loss": 0.3143, "step": 48560 }, { "epoch": 0.9887022900763359, "grad_norm": 22.469709678653892, "learning_rate": 9.711815476664229e-06, "loss": 0.2778, "step": 48570 }, { "epoch": 0.9889058524173028, "grad_norm": 5.666178628579337, "learning_rate": 9.711577678651838e-06, "loss": 0.2409, "step": 48580 }, { "epoch": 0.9891094147582697, "grad_norm": 7.301407877691796, "learning_rate": 9.711339785483067e-06, "loss": 0.3658, "step": 48590 }, { "epoch": 0.9893129770992366, "grad_norm": 2.493294596571134, "learning_rate": 9.71110179716272e-06, "loss": 0.3748, "step": 48600 }, { "epoch": 0.9895165394402036, "grad_norm": 5.928430144014402, "learning_rate": 9.710863713695607e-06, "loss": 0.3516, "step": 48610 }, { "epoch": 0.9897201017811705, "grad_norm": 9.079979158374234, "learning_rate": 9.710625535086532e-06, "loss": 0.3774, "step": 48620 }, { "epoch": 0.9899236641221374, "grad_norm": 4.337843970978986, "learning_rate": 9.710387261340307e-06, "loss": 0.2947, "step": 48630 }, { "epoch": 0.9901272264631044, "grad_norm": 6.538976419273497, "learning_rate": 9.710148892461745e-06, "loss": 0.3461, "step": 48640 }, { "epoch": 0.9903307888040712, "grad_norm": 2.2854534612602384, "learning_rate": 9.709910428455659e-06, "loss": 0.2472, "step": 48650 }, { "epoch": 0.9905343511450382, "grad_norm": 8.006143525603257, "learning_rate": 9.709671869326867e-06, "loss": 0.2825, "step": 48660 }, { "epoch": 0.990737913486005, "grad_norm": 8.492768333400702, "learning_rate": 9.709433215080184e-06, "loss": 0.3056, "step": 48670 }, { "epoch": 0.990941475826972, "grad_norm": 7.674433951078266, "learning_rate": 9.709194465720431e-06, "loss": 0.3762, "step": 48680 }, { "epoch": 0.991145038167939, "grad_norm": 6.180433312724238, "learning_rate": 9.708955621252432e-06, "loss": 0.2203, "step": 48690 }, { "epoch": 0.9913486005089058, "grad_norm": 9.716453388830478, "learning_rate": 9.708716681681007e-06, "loss": 0.2695, "step": 48700 }, { "epoch": 0.9915521628498728, "grad_norm": 11.521256136205201, "learning_rate": 9.708477647010985e-06, "loss": 0.3728, "step": 48710 }, { "epoch": 0.9917557251908397, "grad_norm": 8.773674607907909, "learning_rate": 9.708238517247193e-06, "loss": 0.214, "step": 48720 }, { "epoch": 0.9919592875318066, "grad_norm": 11.850864015644778, "learning_rate": 9.70799929239446e-06, "loss": 0.3354, "step": 48730 }, { "epoch": 0.9921628498727736, "grad_norm": 7.7341148704732685, "learning_rate": 9.707759972457617e-06, "loss": 0.2629, "step": 48740 }, { "epoch": 0.9923664122137404, "grad_norm": 11.785060997509756, "learning_rate": 9.707520557441498e-06, "loss": 0.3753, "step": 48750 }, { "epoch": 0.9925699745547074, "grad_norm": 11.427215394260775, "learning_rate": 9.707281047350938e-06, "loss": 0.313, "step": 48760 }, { "epoch": 0.9927735368956743, "grad_norm": 28.400225818109938, "learning_rate": 9.707041442190774e-06, "loss": 0.4116, "step": 48770 }, { "epoch": 0.9929770992366412, "grad_norm": 7.727963721471031, "learning_rate": 9.706801741965845e-06, "loss": 0.2948, "step": 48780 }, { "epoch": 0.9931806615776081, "grad_norm": 9.204476318482845, "learning_rate": 9.706561946680994e-06, "loss": 0.2495, "step": 48790 }, { "epoch": 0.9933842239185751, "grad_norm": 10.075436924557556, "learning_rate": 9.706322056341061e-06, "loss": 0.429, "step": 48800 }, { "epoch": 0.993587786259542, "grad_norm": 8.943799301468333, "learning_rate": 9.706082070950892e-06, "loss": 0.2977, "step": 48810 }, { "epoch": 0.9937913486005089, "grad_norm": 4.825481895390607, "learning_rate": 9.705841990515336e-06, "loss": 0.3314, "step": 48820 }, { "epoch": 0.9939949109414759, "grad_norm": 6.549922948293416, "learning_rate": 9.70560181503924e-06, "loss": 0.2943, "step": 48830 }, { "epoch": 0.9941984732824427, "grad_norm": 5.723427897961432, "learning_rate": 9.705361544527455e-06, "loss": 0.2709, "step": 48840 }, { "epoch": 0.9944020356234097, "grad_norm": 9.602635396657739, "learning_rate": 9.705121178984832e-06, "loss": 0.3257, "step": 48850 }, { "epoch": 0.9946055979643766, "grad_norm": 13.113637345561544, "learning_rate": 9.704880718416225e-06, "loss": 0.2902, "step": 48860 }, { "epoch": 0.9948091603053435, "grad_norm": 11.007493966788921, "learning_rate": 9.704640162826493e-06, "loss": 0.2458, "step": 48870 }, { "epoch": 0.9950127226463105, "grad_norm": 3.4308679725928406, "learning_rate": 9.704399512220495e-06, "loss": 0.4068, "step": 48880 }, { "epoch": 0.9952162849872773, "grad_norm": 7.172211968623775, "learning_rate": 9.704158766603089e-06, "loss": 0.4092, "step": 48890 }, { "epoch": 0.9954198473282443, "grad_norm": 3.0829735189202747, "learning_rate": 9.703917925979135e-06, "loss": 0.2884, "step": 48900 }, { "epoch": 0.9956234096692111, "grad_norm": 12.572590869517644, "learning_rate": 9.703676990353503e-06, "loss": 0.3006, "step": 48910 }, { "epoch": 0.9958269720101781, "grad_norm": 15.797024250165014, "learning_rate": 9.703435959731055e-06, "loss": 0.3214, "step": 48920 }, { "epoch": 0.9960305343511451, "grad_norm": 6.9941610215255565, "learning_rate": 9.703194834116661e-06, "loss": 0.2609, "step": 48930 }, { "epoch": 0.9962340966921119, "grad_norm": 9.657299378753073, "learning_rate": 9.70295361351519e-06, "loss": 0.3572, "step": 48940 }, { "epoch": 0.9964376590330789, "grad_norm": 8.84835810546944, "learning_rate": 9.702712297931512e-06, "loss": 0.255, "step": 48950 }, { "epoch": 0.9966412213740458, "grad_norm": 3.9941721776365227, "learning_rate": 9.702470887370501e-06, "loss": 0.2201, "step": 48960 }, { "epoch": 0.9968447837150127, "grad_norm": 19.973798422386743, "learning_rate": 9.702229381837034e-06, "loss": 0.2359, "step": 48970 }, { "epoch": 0.9970483460559797, "grad_norm": 9.547599200078034, "learning_rate": 9.70198778133599e-06, "loss": 0.2534, "step": 48980 }, { "epoch": 0.9972519083969466, "grad_norm": 13.767576980506073, "learning_rate": 9.701746085872246e-06, "loss": 0.3152, "step": 48990 }, { "epoch": 0.9974554707379135, "grad_norm": 14.392409024404241, "learning_rate": 9.701504295450684e-06, "loss": 0.357, "step": 49000 }, { "epoch": 0.9976590330788804, "grad_norm": 3.935191790602311, "learning_rate": 9.701262410076186e-06, "loss": 0.3026, "step": 49010 }, { "epoch": 0.9978625954198473, "grad_norm": 5.437923693030105, "learning_rate": 9.70102042975364e-06, "loss": 0.2396, "step": 49020 }, { "epoch": 0.9980661577608142, "grad_norm": 6.194085396995921, "learning_rate": 9.70077835448793e-06, "loss": 0.2321, "step": 49030 }, { "epoch": 0.9982697201017812, "grad_norm": 8.764484419509934, "learning_rate": 9.700536184283949e-06, "loss": 0.2981, "step": 49040 }, { "epoch": 0.9984732824427481, "grad_norm": 9.001940302705101, "learning_rate": 9.700293919146585e-06, "loss": 0.3498, "step": 49050 }, { "epoch": 0.998676844783715, "grad_norm": 8.717236890875789, "learning_rate": 9.700051559080729e-06, "loss": 0.3225, "step": 49060 }, { "epoch": 0.998880407124682, "grad_norm": 7.325496423277098, "learning_rate": 9.699809104091279e-06, "loss": 0.3083, "step": 49070 }, { "epoch": 0.9990839694656488, "grad_norm": 5.011763002431026, "learning_rate": 9.69956655418313e-06, "loss": 0.2981, "step": 49080 }, { "epoch": 0.9992875318066158, "grad_norm": 5.815480234573446, "learning_rate": 9.699323909361184e-06, "loss": 0.3237, "step": 49090 }, { "epoch": 0.9994910941475827, "grad_norm": 6.1744828532937746, "learning_rate": 9.699081169630338e-06, "loss": 0.2897, "step": 49100 }, { "epoch": 0.9996946564885496, "grad_norm": 10.43053628776772, "learning_rate": 9.698838334995495e-06, "loss": 0.247, "step": 49110 }, { "epoch": 0.9998982188295166, "grad_norm": 6.876042203464593, "learning_rate": 9.69859540546156e-06, "loss": 0.2574, "step": 49120 }, { "epoch": 1.0001017811704835, "grad_norm": 4.626809637111421, "learning_rate": 9.698352381033438e-06, "loss": 0.1921, "step": 49130 }, { "epoch": 1.0003053435114504, "grad_norm": 10.637620602936972, "learning_rate": 9.69810926171604e-06, "loss": 0.2457, "step": 49140 }, { "epoch": 1.0005089058524173, "grad_norm": 11.469868653016482, "learning_rate": 9.697866047514274e-06, "loss": 0.215, "step": 49150 }, { "epoch": 1.0007124681933843, "grad_norm": 12.30809559121345, "learning_rate": 9.697622738433053e-06, "loss": 0.2908, "step": 49160 }, { "epoch": 1.0009160305343512, "grad_norm": 2.8447590366567375, "learning_rate": 9.697379334477289e-06, "loss": 0.2366, "step": 49170 }, { "epoch": 1.001119592875318, "grad_norm": 6.91808700742211, "learning_rate": 9.697135835651899e-06, "loss": 0.3594, "step": 49180 }, { "epoch": 1.001323155216285, "grad_norm": 10.242920398568238, "learning_rate": 9.696892241961802e-06, "loss": 0.2353, "step": 49190 }, { "epoch": 1.001526717557252, "grad_norm": 10.211334160257785, "learning_rate": 9.696648553411915e-06, "loss": 0.2793, "step": 49200 }, { "epoch": 1.0017302798982188, "grad_norm": 3.0006676932259047, "learning_rate": 9.696404770007161e-06, "loss": 0.2213, "step": 49210 }, { "epoch": 1.0019338422391857, "grad_norm": 7.903835695013817, "learning_rate": 9.696160891752466e-06, "loss": 0.3295, "step": 49220 }, { "epoch": 1.0021374045801528, "grad_norm": 4.766402584918845, "learning_rate": 9.695916918652753e-06, "loss": 0.1523, "step": 49230 }, { "epoch": 1.0023409669211196, "grad_norm": 15.816296646439554, "learning_rate": 9.695672850712947e-06, "loss": 0.283, "step": 49240 }, { "epoch": 1.0025445292620865, "grad_norm": 11.541865475129532, "learning_rate": 9.695428687937981e-06, "loss": 0.2883, "step": 49250 }, { "epoch": 1.0027480916030533, "grad_norm": 6.566004643706077, "learning_rate": 9.695184430332786e-06, "loss": 0.2591, "step": 49260 }, { "epoch": 1.0029516539440204, "grad_norm": 8.8571575118591, "learning_rate": 9.694940077902295e-06, "loss": 0.1828, "step": 49270 }, { "epoch": 1.0031552162849873, "grad_norm": 24.203192900953177, "learning_rate": 9.694695630651439e-06, "loss": 0.243, "step": 49280 }, { "epoch": 1.0033587786259541, "grad_norm": 13.117539440252262, "learning_rate": 9.69445108858516e-06, "loss": 0.2577, "step": 49290 }, { "epoch": 1.0035623409669212, "grad_norm": 4.501093716912964, "learning_rate": 9.694206451708394e-06, "loss": 0.2274, "step": 49300 }, { "epoch": 1.003765903307888, "grad_norm": 9.571433091802726, "learning_rate": 9.693961720026083e-06, "loss": 0.2862, "step": 49310 }, { "epoch": 1.003969465648855, "grad_norm": 4.774225345841254, "learning_rate": 9.69371689354317e-06, "loss": 0.4051, "step": 49320 }, { "epoch": 1.0041730279898218, "grad_norm": 6.428865756050812, "learning_rate": 9.693471972264599e-06, "loss": 0.2869, "step": 49330 }, { "epoch": 1.0043765903307889, "grad_norm": 16.872630585317204, "learning_rate": 9.693226956195315e-06, "loss": 0.2338, "step": 49340 }, { "epoch": 1.0045801526717557, "grad_norm": 3.768145419846644, "learning_rate": 9.692981845340269e-06, "loss": 0.2264, "step": 49350 }, { "epoch": 1.0047837150127226, "grad_norm": 4.77771127818541, "learning_rate": 9.69273663970441e-06, "loss": 0.1683, "step": 49360 }, { "epoch": 1.0049872773536896, "grad_norm": 14.29990332951817, "learning_rate": 9.692491339292689e-06, "loss": 0.2795, "step": 49370 }, { "epoch": 1.0051908396946565, "grad_norm": 9.904925508593237, "learning_rate": 9.692245944110062e-06, "loss": 0.2512, "step": 49380 }, { "epoch": 1.0053944020356234, "grad_norm": 3.669964720765925, "learning_rate": 9.692000454161485e-06, "loss": 0.2567, "step": 49390 }, { "epoch": 1.0055979643765904, "grad_norm": 13.806673184584199, "learning_rate": 9.691754869451916e-06, "loss": 0.1604, "step": 49400 }, { "epoch": 1.0058015267175573, "grad_norm": 10.193608024974857, "learning_rate": 9.691509189986315e-06, "loss": 0.2794, "step": 49410 }, { "epoch": 1.0060050890585241, "grad_norm": 5.7837343381893795, "learning_rate": 9.691263415769641e-06, "loss": 0.3267, "step": 49420 }, { "epoch": 1.006208651399491, "grad_norm": 12.647187207959862, "learning_rate": 9.691017546806862e-06, "loss": 0.32, "step": 49430 }, { "epoch": 1.006412213740458, "grad_norm": 3.997953071780869, "learning_rate": 9.69077158310294e-06, "loss": 0.22, "step": 49440 }, { "epoch": 1.006615776081425, "grad_norm": 4.861271846944449, "learning_rate": 9.690525524662846e-06, "loss": 0.184, "step": 49450 }, { "epoch": 1.0068193384223918, "grad_norm": 13.310712922718714, "learning_rate": 9.690279371491546e-06, "loss": 0.2217, "step": 49460 }, { "epoch": 1.0070229007633589, "grad_norm": 5.2340806109391576, "learning_rate": 9.690033123594015e-06, "loss": 0.2847, "step": 49470 }, { "epoch": 1.0072264631043257, "grad_norm": 20.72440944199425, "learning_rate": 9.689786780975223e-06, "loss": 0.2187, "step": 49480 }, { "epoch": 1.0074300254452926, "grad_norm": 10.31720136541713, "learning_rate": 9.689540343640147e-06, "loss": 0.2083, "step": 49490 }, { "epoch": 1.0076335877862594, "grad_norm": 7.4799362155250595, "learning_rate": 9.689293811593763e-06, "loss": 0.2559, "step": 49500 }, { "epoch": 1.0078371501272265, "grad_norm": 17.514972224314942, "learning_rate": 9.689047184841051e-06, "loss": 0.2481, "step": 49510 }, { "epoch": 1.0080407124681934, "grad_norm": 2.973032279941057, "learning_rate": 9.688800463386993e-06, "loss": 0.3087, "step": 49520 }, { "epoch": 1.0082442748091602, "grad_norm": 41.631956756590455, "learning_rate": 9.68855364723657e-06, "loss": 0.2685, "step": 49530 }, { "epoch": 1.0084478371501273, "grad_norm": 16.308511242686045, "learning_rate": 9.688306736394766e-06, "loss": 0.1649, "step": 49540 }, { "epoch": 1.0086513994910942, "grad_norm": 7.152427660315329, "learning_rate": 9.688059730866569e-06, "loss": 0.2099, "step": 49550 }, { "epoch": 1.008854961832061, "grad_norm": 12.490509644810519, "learning_rate": 9.687812630656968e-06, "loss": 0.2131, "step": 49560 }, { "epoch": 1.0090585241730279, "grad_norm": 5.0782343717309395, "learning_rate": 9.687565435770954e-06, "loss": 0.2829, "step": 49570 }, { "epoch": 1.009262086513995, "grad_norm": 2.497670509915368, "learning_rate": 9.687318146213518e-06, "loss": 0.1613, "step": 49580 }, { "epoch": 1.0094656488549618, "grad_norm": 15.632487757873097, "learning_rate": 9.687070761989654e-06, "loss": 0.1899, "step": 49590 }, { "epoch": 1.0096692111959287, "grad_norm": 4.286220240594147, "learning_rate": 9.68682328310436e-06, "loss": 0.278, "step": 49600 }, { "epoch": 1.0098727735368958, "grad_norm": 6.721856739717449, "learning_rate": 9.686575709562634e-06, "loss": 0.2477, "step": 49610 }, { "epoch": 1.0100763358778626, "grad_norm": 15.539052179549037, "learning_rate": 9.686328041369473e-06, "loss": 0.201, "step": 49620 }, { "epoch": 1.0102798982188295, "grad_norm": 8.907102511894072, "learning_rate": 9.686080278529882e-06, "loss": 0.28, "step": 49630 }, { "epoch": 1.0104834605597965, "grad_norm": 18.482466978427432, "learning_rate": 9.685832421048864e-06, "loss": 0.2306, "step": 49640 }, { "epoch": 1.0106870229007634, "grad_norm": 6.523233122442103, "learning_rate": 9.685584468931426e-06, "loss": 0.333, "step": 49650 }, { "epoch": 1.0108905852417303, "grad_norm": 16.760526535737686, "learning_rate": 9.685336422182573e-06, "loss": 0.1991, "step": 49660 }, { "epoch": 1.0110941475826971, "grad_norm": 8.99825460352773, "learning_rate": 9.68508828080732e-06, "loss": 0.2487, "step": 49670 }, { "epoch": 1.0112977099236642, "grad_norm": 15.505197807710607, "learning_rate": 9.68484004481067e-06, "loss": 0.2129, "step": 49680 }, { "epoch": 1.011501272264631, "grad_norm": 1.7803948632926045, "learning_rate": 9.684591714197645e-06, "loss": 0.281, "step": 49690 }, { "epoch": 1.011704834605598, "grad_norm": 9.54469853200861, "learning_rate": 9.684343288973254e-06, "loss": 0.2586, "step": 49700 }, { "epoch": 1.011908396946565, "grad_norm": 11.851740614278544, "learning_rate": 9.68409476914252e-06, "loss": 0.2341, "step": 49710 }, { "epoch": 1.0121119592875318, "grad_norm": 8.02796764000708, "learning_rate": 9.683846154710457e-06, "loss": 0.1522, "step": 49720 }, { "epoch": 1.0123155216284987, "grad_norm": 11.198206744182249, "learning_rate": 9.683597445682088e-06, "loss": 0.2146, "step": 49730 }, { "epoch": 1.0125190839694655, "grad_norm": 18.726928803357424, "learning_rate": 9.683348642062435e-06, "loss": 0.2795, "step": 49740 }, { "epoch": 1.0127226463104326, "grad_norm": 11.658796998478104, "learning_rate": 9.683099743856526e-06, "loss": 0.3909, "step": 49750 }, { "epoch": 1.0129262086513995, "grad_norm": 9.922971223381843, "learning_rate": 9.682850751069386e-06, "loss": 0.2453, "step": 49760 }, { "epoch": 1.0131297709923663, "grad_norm": 5.492529387894751, "learning_rate": 9.682601663706043e-06, "loss": 0.2131, "step": 49770 }, { "epoch": 1.0133333333333334, "grad_norm": 8.445550940200631, "learning_rate": 9.682352481771528e-06, "loss": 0.2491, "step": 49780 }, { "epoch": 1.0135368956743003, "grad_norm": 7.04585174189078, "learning_rate": 9.682103205270874e-06, "loss": 0.2262, "step": 49790 }, { "epoch": 1.0137404580152671, "grad_norm": 10.475342034536116, "learning_rate": 9.681853834209115e-06, "loss": 0.1783, "step": 49800 }, { "epoch": 1.013944020356234, "grad_norm": 48.061990398958436, "learning_rate": 9.681604368591288e-06, "loss": 0.2328, "step": 49810 }, { "epoch": 1.014147582697201, "grad_norm": 13.624375576049133, "learning_rate": 9.681354808422432e-06, "loss": 0.2065, "step": 49820 }, { "epoch": 1.014351145038168, "grad_norm": 4.279797852707709, "learning_rate": 9.681105153707584e-06, "loss": 0.1842, "step": 49830 }, { "epoch": 1.0145547073791348, "grad_norm": 2.9288661973891346, "learning_rate": 9.68085540445179e-06, "loss": 0.2345, "step": 49840 }, { "epoch": 1.0147582697201019, "grad_norm": 5.663324421381875, "learning_rate": 9.68060556066009e-06, "loss": 0.2159, "step": 49850 }, { "epoch": 1.0149618320610687, "grad_norm": 13.472277403071532, "learning_rate": 9.680355622337534e-06, "loss": 0.2914, "step": 49860 }, { "epoch": 1.0151653944020356, "grad_norm": 19.100261589156766, "learning_rate": 9.680105589489169e-06, "loss": 0.2746, "step": 49870 }, { "epoch": 1.0153689567430026, "grad_norm": 12.677158330847002, "learning_rate": 9.67985546212004e-06, "loss": 0.3052, "step": 49880 }, { "epoch": 1.0155725190839695, "grad_norm": 4.444000181375248, "learning_rate": 9.679605240235207e-06, "loss": 0.2228, "step": 49890 }, { "epoch": 1.0157760814249364, "grad_norm": 3.52237391893563, "learning_rate": 9.679354923839715e-06, "loss": 0.2693, "step": 49900 }, { "epoch": 1.0159796437659032, "grad_norm": 9.024943510535982, "learning_rate": 9.679104512938624e-06, "loss": 0.1743, "step": 49910 }, { "epoch": 1.0161832061068703, "grad_norm": 9.349502442469232, "learning_rate": 9.678854007536991e-06, "loss": 0.2111, "step": 49920 }, { "epoch": 1.0163867684478372, "grad_norm": 23.52771034590769, "learning_rate": 9.678603407639877e-06, "loss": 0.2166, "step": 49930 }, { "epoch": 1.016590330788804, "grad_norm": 9.658580568374068, "learning_rate": 9.678352713252339e-06, "loss": 0.244, "step": 49940 }, { "epoch": 1.016793893129771, "grad_norm": 14.194465888946937, "learning_rate": 9.678101924379443e-06, "loss": 0.2853, "step": 49950 }, { "epoch": 1.016997455470738, "grad_norm": 23.262480988653348, "learning_rate": 9.677851041026253e-06, "loss": 0.2287, "step": 49960 }, { "epoch": 1.0172010178117048, "grad_norm": 9.695082384843667, "learning_rate": 9.677600063197837e-06, "loss": 0.2128, "step": 49970 }, { "epoch": 1.0174045801526717, "grad_norm": 5.201155226259619, "learning_rate": 9.677348990899262e-06, "loss": 0.3001, "step": 49980 }, { "epoch": 1.0176081424936387, "grad_norm": 13.630054248702267, "learning_rate": 9.6770978241356e-06, "loss": 0.2535, "step": 49990 }, { "epoch": 1.0178117048346056, "grad_norm": 8.699539912768554, "learning_rate": 9.676846562911923e-06, "loss": 0.2557, "step": 50000 }, { "epoch": 1.0180152671755724, "grad_norm": 8.066913700074899, "learning_rate": 9.676595207233307e-06, "loss": 0.2073, "step": 50010 }, { "epoch": 1.0182188295165395, "grad_norm": 0.3344996834278773, "learning_rate": 9.676343757104826e-06, "loss": 0.2885, "step": 50020 }, { "epoch": 1.0184223918575064, "grad_norm": 16.53448898481459, "learning_rate": 9.676092212531561e-06, "loss": 0.2695, "step": 50030 }, { "epoch": 1.0186259541984732, "grad_norm": 1.2880411350441239, "learning_rate": 9.675840573518591e-06, "loss": 0.1704, "step": 50040 }, { "epoch": 1.01882951653944, "grad_norm": 9.215947626612131, "learning_rate": 9.675588840070997e-06, "loss": 0.2338, "step": 50050 }, { "epoch": 1.0190330788804072, "grad_norm": 14.868804788897796, "learning_rate": 9.675337012193866e-06, "loss": 0.202, "step": 50060 }, { "epoch": 1.019236641221374, "grad_norm": 5.100539497546868, "learning_rate": 9.675085089892282e-06, "loss": 0.2381, "step": 50070 }, { "epoch": 1.0194402035623409, "grad_norm": 12.617942359492154, "learning_rate": 9.674833073171332e-06, "loss": 0.3395, "step": 50080 }, { "epoch": 1.019643765903308, "grad_norm": 12.582663232922908, "learning_rate": 9.674580962036107e-06, "loss": 0.2525, "step": 50090 }, { "epoch": 1.0198473282442748, "grad_norm": 24.951726995975907, "learning_rate": 9.6743287564917e-06, "loss": 0.2178, "step": 50100 }, { "epoch": 1.0200508905852417, "grad_norm": 14.557002144168013, "learning_rate": 9.674076456543202e-06, "loss": 0.3837, "step": 50110 }, { "epoch": 1.0202544529262088, "grad_norm": 9.227443513284713, "learning_rate": 9.67382406219571e-06, "loss": 0.1842, "step": 50120 }, { "epoch": 1.0204580152671756, "grad_norm": 11.185652798650864, "learning_rate": 9.673571573454321e-06, "loss": 0.2522, "step": 50130 }, { "epoch": 1.0206615776081425, "grad_norm": 15.944142217963776, "learning_rate": 9.673318990324134e-06, "loss": 0.216, "step": 50140 }, { "epoch": 1.0208651399491093, "grad_norm": 10.192251995511569, "learning_rate": 9.673066312810253e-06, "loss": 0.3154, "step": 50150 }, { "epoch": 1.0210687022900764, "grad_norm": 11.478773173551767, "learning_rate": 9.672813540917778e-06, "loss": 0.203, "step": 50160 }, { "epoch": 1.0212722646310433, "grad_norm": 9.872859393116979, "learning_rate": 9.672560674651814e-06, "loss": 0.2666, "step": 50170 }, { "epoch": 1.0214758269720101, "grad_norm": 6.475932896290087, "learning_rate": 9.67230771401747e-06, "loss": 0.2181, "step": 50180 }, { "epoch": 1.0216793893129772, "grad_norm": 4.330712494919726, "learning_rate": 9.672054659019854e-06, "loss": 0.1743, "step": 50190 }, { "epoch": 1.021882951653944, "grad_norm": 12.84042114769264, "learning_rate": 9.671801509664075e-06, "loss": 0.3167, "step": 50200 }, { "epoch": 1.022086513994911, "grad_norm": 4.673139136983006, "learning_rate": 9.671548265955249e-06, "loss": 0.2303, "step": 50210 }, { "epoch": 1.0222900763358778, "grad_norm": 9.46853617816794, "learning_rate": 9.671294927898487e-06, "loss": 0.2911, "step": 50220 }, { "epoch": 1.0224936386768448, "grad_norm": 5.9729997357306805, "learning_rate": 9.671041495498909e-06, "loss": 0.2704, "step": 50230 }, { "epoch": 1.0226972010178117, "grad_norm": 16.354871038240866, "learning_rate": 9.670787968761631e-06, "loss": 0.2961, "step": 50240 }, { "epoch": 1.0229007633587786, "grad_norm": 16.18357274722313, "learning_rate": 9.670534347691775e-06, "loss": 0.2669, "step": 50250 }, { "epoch": 1.0231043256997456, "grad_norm": 10.984941279600962, "learning_rate": 9.670280632294462e-06, "loss": 0.2677, "step": 50260 }, { "epoch": 1.0233078880407125, "grad_norm": 11.853094947868449, "learning_rate": 9.670026822574815e-06, "loss": 0.1445, "step": 50270 }, { "epoch": 1.0235114503816793, "grad_norm": 12.628366848167488, "learning_rate": 9.669772918537963e-06, "loss": 0.2897, "step": 50280 }, { "epoch": 1.0237150127226462, "grad_norm": 11.175803031866288, "learning_rate": 9.669518920189032e-06, "loss": 0.1972, "step": 50290 }, { "epoch": 1.0239185750636133, "grad_norm": 7.163064190576253, "learning_rate": 9.669264827533152e-06, "loss": 0.3474, "step": 50300 }, { "epoch": 1.0241221374045801, "grad_norm": 17.33364247842038, "learning_rate": 9.669010640575454e-06, "loss": 0.176, "step": 50310 }, { "epoch": 1.024325699745547, "grad_norm": 12.15220243922881, "learning_rate": 9.668756359321072e-06, "loss": 0.2744, "step": 50320 }, { "epoch": 1.024529262086514, "grad_norm": 17.831383193908707, "learning_rate": 9.668501983775143e-06, "loss": 0.3225, "step": 50330 }, { "epoch": 1.024732824427481, "grad_norm": 16.611604083579035, "learning_rate": 9.668247513942803e-06, "loss": 0.228, "step": 50340 }, { "epoch": 1.0249363867684478, "grad_norm": 4.566204402503823, "learning_rate": 9.667992949829192e-06, "loss": 0.2353, "step": 50350 }, { "epoch": 1.0251399491094149, "grad_norm": 29.97912863403628, "learning_rate": 9.667738291439452e-06, "loss": 0.2796, "step": 50360 }, { "epoch": 1.0253435114503817, "grad_norm": 10.187657112089068, "learning_rate": 9.667483538778725e-06, "loss": 0.2412, "step": 50370 }, { "epoch": 1.0255470737913486, "grad_norm": 10.167300402433517, "learning_rate": 9.667228691852155e-06, "loss": 0.2291, "step": 50380 }, { "epoch": 1.0257506361323154, "grad_norm": 10.425884505090234, "learning_rate": 9.666973750664893e-06, "loss": 0.2088, "step": 50390 }, { "epoch": 1.0259541984732825, "grad_norm": 11.11903266413066, "learning_rate": 9.66671871522208e-06, "loss": 0.207, "step": 50400 }, { "epoch": 1.0261577608142494, "grad_norm": 9.787552394465997, "learning_rate": 9.666463585528876e-06, "loss": 0.3755, "step": 50410 }, { "epoch": 1.0263613231552162, "grad_norm": 18.684285603018882, "learning_rate": 9.66620836159043e-06, "loss": 0.3472, "step": 50420 }, { "epoch": 1.0265648854961833, "grad_norm": 8.661562912567872, "learning_rate": 9.665953043411894e-06, "loss": 0.2387, "step": 50430 }, { "epoch": 1.0267684478371502, "grad_norm": 5.6472881296815185, "learning_rate": 9.665697630998427e-06, "loss": 0.1545, "step": 50440 }, { "epoch": 1.026972010178117, "grad_norm": 19.28291332994954, "learning_rate": 9.665442124355186e-06, "loss": 0.2873, "step": 50450 }, { "epoch": 1.0271755725190839, "grad_norm": 10.064072105900058, "learning_rate": 9.665186523487334e-06, "loss": 0.1883, "step": 50460 }, { "epoch": 1.027379134860051, "grad_norm": 10.112758230172105, "learning_rate": 9.664930828400032e-06, "loss": 0.1724, "step": 50470 }, { "epoch": 1.0275826972010178, "grad_norm": 0.29673728644294534, "learning_rate": 9.664675039098442e-06, "loss": 0.1128, "step": 50480 }, { "epoch": 1.0277862595419847, "grad_norm": 4.300686532694317, "learning_rate": 9.664419155587732e-06, "loss": 0.1448, "step": 50490 }, { "epoch": 1.0279898218829517, "grad_norm": 0.6396939427956492, "learning_rate": 9.664163177873069e-06, "loss": 0.1955, "step": 50500 }, { "epoch": 1.0281933842239186, "grad_norm": 38.272277233828405, "learning_rate": 9.663907105959624e-06, "loss": 0.2846, "step": 50510 }, { "epoch": 1.0283969465648855, "grad_norm": 12.721660882416238, "learning_rate": 9.663650939852566e-06, "loss": 0.2898, "step": 50520 }, { "epoch": 1.0286005089058525, "grad_norm": 12.103825910950885, "learning_rate": 9.663394679557073e-06, "loss": 0.2018, "step": 50530 }, { "epoch": 1.0288040712468194, "grad_norm": 18.51394506104695, "learning_rate": 9.663138325078316e-06, "loss": 0.2473, "step": 50540 }, { "epoch": 1.0290076335877862, "grad_norm": 23.536728630641086, "learning_rate": 9.662881876421475e-06, "loss": 0.3251, "step": 50550 }, { "epoch": 1.029211195928753, "grad_norm": 10.834752719837923, "learning_rate": 9.66262533359173e-06, "loss": 0.1824, "step": 50560 }, { "epoch": 1.0294147582697202, "grad_norm": 11.113271053007043, "learning_rate": 9.662368696594261e-06, "loss": 0.2399, "step": 50570 }, { "epoch": 1.029618320610687, "grad_norm": 8.11661225245997, "learning_rate": 9.66211196543425e-06, "loss": 0.2082, "step": 50580 }, { "epoch": 1.0298218829516539, "grad_norm": 5.211383976161766, "learning_rate": 9.661855140116883e-06, "loss": 0.2666, "step": 50590 }, { "epoch": 1.030025445292621, "grad_norm": 13.029086178101029, "learning_rate": 9.661598220647348e-06, "loss": 0.2855, "step": 50600 }, { "epoch": 1.0302290076335878, "grad_norm": 8.650062452784073, "learning_rate": 9.661341207030831e-06, "loss": 0.2308, "step": 50610 }, { "epoch": 1.0304325699745547, "grad_norm": 6.418460648185283, "learning_rate": 9.661084099272527e-06, "loss": 0.2381, "step": 50620 }, { "epoch": 1.0306361323155215, "grad_norm": 16.065435777306266, "learning_rate": 9.660826897377625e-06, "loss": 0.2569, "step": 50630 }, { "epoch": 1.0308396946564886, "grad_norm": 11.538429210939093, "learning_rate": 9.660569601351321e-06, "loss": 0.2758, "step": 50640 }, { "epoch": 1.0310432569974555, "grad_norm": 8.886475034233674, "learning_rate": 9.66031221119881e-06, "loss": 0.2529, "step": 50650 }, { "epoch": 1.0312468193384223, "grad_norm": 10.66938098838346, "learning_rate": 9.660054726925292e-06, "loss": 0.286, "step": 50660 }, { "epoch": 1.0314503816793894, "grad_norm": 4.430256373395943, "learning_rate": 9.659797148535967e-06, "loss": 0.1763, "step": 50670 }, { "epoch": 1.0316539440203563, "grad_norm": 12.03630655766314, "learning_rate": 9.659539476036038e-06, "loss": 0.3623, "step": 50680 }, { "epoch": 1.0318575063613231, "grad_norm": 15.008920143446955, "learning_rate": 9.659281709430706e-06, "loss": 0.2853, "step": 50690 }, { "epoch": 1.03206106870229, "grad_norm": 33.5419682296557, "learning_rate": 9.65902384872518e-06, "loss": 0.2538, "step": 50700 }, { "epoch": 1.032264631043257, "grad_norm": 8.359493158484739, "learning_rate": 9.658765893924665e-06, "loss": 0.2699, "step": 50710 }, { "epoch": 1.032468193384224, "grad_norm": 8.194316955737541, "learning_rate": 9.658507845034375e-06, "loss": 0.2543, "step": 50720 }, { "epoch": 1.0326717557251908, "grad_norm": 11.509516653422184, "learning_rate": 9.658249702059517e-06, "loss": 0.3117, "step": 50730 }, { "epoch": 1.0328753180661578, "grad_norm": 14.710046236611104, "learning_rate": 9.657991465005305e-06, "loss": 0.2395, "step": 50740 }, { "epoch": 1.0330788804071247, "grad_norm": 8.531791070868303, "learning_rate": 9.657733133876958e-06, "loss": 0.229, "step": 50750 }, { "epoch": 1.0332824427480916, "grad_norm": 0.8111139253575743, "learning_rate": 9.657474708679691e-06, "loss": 0.3035, "step": 50760 }, { "epoch": 1.0334860050890584, "grad_norm": 2.8696152968964297, "learning_rate": 9.657216189418724e-06, "loss": 0.2822, "step": 50770 }, { "epoch": 1.0336895674300255, "grad_norm": 8.658642055388814, "learning_rate": 9.656957576099278e-06, "loss": 0.1804, "step": 50780 }, { "epoch": 1.0338931297709923, "grad_norm": 5.080733243685905, "learning_rate": 9.656698868726574e-06, "loss": 0.2136, "step": 50790 }, { "epoch": 1.0340966921119592, "grad_norm": 13.503136621735996, "learning_rate": 9.656440067305838e-06, "loss": 0.2864, "step": 50800 }, { "epoch": 1.0343002544529263, "grad_norm": 7.220004141424886, "learning_rate": 9.6561811718423e-06, "loss": 0.1167, "step": 50810 }, { "epoch": 1.0345038167938931, "grad_norm": 12.665611893153129, "learning_rate": 9.655922182341184e-06, "loss": 0.2291, "step": 50820 }, { "epoch": 1.03470737913486, "grad_norm": 8.418654414250625, "learning_rate": 9.655663098807723e-06, "loss": 0.3788, "step": 50830 }, { "epoch": 1.034910941475827, "grad_norm": 20.69894982194962, "learning_rate": 9.65540392124715e-06, "loss": 0.2527, "step": 50840 }, { "epoch": 1.035114503816794, "grad_norm": 7.483271045084948, "learning_rate": 9.655144649664698e-06, "loss": 0.2307, "step": 50850 }, { "epoch": 1.0353180661577608, "grad_norm": 9.589894418567308, "learning_rate": 9.654885284065604e-06, "loss": 0.2554, "step": 50860 }, { "epoch": 1.0355216284987276, "grad_norm": 14.693513334410888, "learning_rate": 9.654625824455109e-06, "loss": 0.235, "step": 50870 }, { "epoch": 1.0357251908396947, "grad_norm": 7.771780149013645, "learning_rate": 9.654366270838447e-06, "loss": 0.2864, "step": 50880 }, { "epoch": 1.0359287531806616, "grad_norm": 1.2376888153136447, "learning_rate": 9.654106623220865e-06, "loss": 0.1251, "step": 50890 }, { "epoch": 1.0361323155216284, "grad_norm": 16.81761422077415, "learning_rate": 9.653846881607605e-06, "loss": 0.2179, "step": 50900 }, { "epoch": 1.0363358778625955, "grad_norm": 14.49445706186863, "learning_rate": 9.653587046003912e-06, "loss": 0.2935, "step": 50910 }, { "epoch": 1.0365394402035624, "grad_norm": 29.318348575122926, "learning_rate": 9.653327116415036e-06, "loss": 0.2745, "step": 50920 }, { "epoch": 1.0367430025445292, "grad_norm": 13.596489796230415, "learning_rate": 9.653067092846224e-06, "loss": 0.2593, "step": 50930 }, { "epoch": 1.036946564885496, "grad_norm": 12.250066711667685, "learning_rate": 9.652806975302731e-06, "loss": 0.268, "step": 50940 }, { "epoch": 1.0371501272264632, "grad_norm": 1.5787709648043726, "learning_rate": 9.652546763789807e-06, "loss": 0.1855, "step": 50950 }, { "epoch": 1.03735368956743, "grad_norm": 6.916684374709932, "learning_rate": 9.65228645831271e-06, "loss": 0.2134, "step": 50960 }, { "epoch": 1.0375572519083969, "grad_norm": 6.4255727474408655, "learning_rate": 9.652026058876694e-06, "loss": 0.2218, "step": 50970 }, { "epoch": 1.037760814249364, "grad_norm": 5.100699308999364, "learning_rate": 9.651765565487021e-06, "loss": 0.1727, "step": 50980 }, { "epoch": 1.0379643765903308, "grad_norm": 1.4812932758147983, "learning_rate": 9.651504978148951e-06, "loss": 0.2541, "step": 50990 }, { "epoch": 1.0381679389312977, "grad_norm": 9.708690116217916, "learning_rate": 9.651244296867749e-06, "loss": 0.1496, "step": 51000 }, { "epoch": 1.0383715012722647, "grad_norm": 14.556228924105813, "learning_rate": 9.650983521648674e-06, "loss": 0.2243, "step": 51010 }, { "epoch": 1.0385750636132316, "grad_norm": 5.755588063927773, "learning_rate": 9.650722652496998e-06, "loss": 0.292, "step": 51020 }, { "epoch": 1.0387786259541985, "grad_norm": 33.911335596106156, "learning_rate": 9.650461689417987e-06, "loss": 0.2565, "step": 51030 }, { "epoch": 1.0389821882951653, "grad_norm": 5.976878054949065, "learning_rate": 9.650200632416912e-06, "loss": 0.1327, "step": 51040 }, { "epoch": 1.0391857506361324, "grad_norm": 18.522933295222746, "learning_rate": 9.649939481499049e-06, "loss": 0.2621, "step": 51050 }, { "epoch": 1.0393893129770992, "grad_norm": 14.234716094539499, "learning_rate": 9.649678236669665e-06, "loss": 0.2143, "step": 51060 }, { "epoch": 1.039592875318066, "grad_norm": 3.7033242317055795, "learning_rate": 9.649416897934044e-06, "loss": 0.2785, "step": 51070 }, { "epoch": 1.0397964376590332, "grad_norm": 12.168369148633952, "learning_rate": 9.649155465297459e-06, "loss": 0.2663, "step": 51080 }, { "epoch": 1.04, "grad_norm": 12.393667826116149, "learning_rate": 9.648893938765192e-06, "loss": 0.2695, "step": 51090 }, { "epoch": 1.040203562340967, "grad_norm": 7.597583621857748, "learning_rate": 9.648632318342522e-06, "loss": 0.2311, "step": 51100 }, { "epoch": 1.0404071246819337, "grad_norm": 1.4651477981308796, "learning_rate": 9.648370604034736e-06, "loss": 0.2094, "step": 51110 }, { "epoch": 1.0406106870229008, "grad_norm": 13.798654859987803, "learning_rate": 9.64810879584712e-06, "loss": 0.4282, "step": 51120 }, { "epoch": 1.0408142493638677, "grad_norm": 5.180252643526285, "learning_rate": 9.647846893784958e-06, "loss": 0.2275, "step": 51130 }, { "epoch": 1.0410178117048345, "grad_norm": 5.254097812292121, "learning_rate": 9.647584897853542e-06, "loss": 0.1911, "step": 51140 }, { "epoch": 1.0412213740458016, "grad_norm": 6.21330482158313, "learning_rate": 9.647322808058165e-06, "loss": 0.2425, "step": 51150 }, { "epoch": 1.0414249363867685, "grad_norm": 6.505516520852773, "learning_rate": 9.647060624404117e-06, "loss": 0.2662, "step": 51160 }, { "epoch": 1.0416284987277353, "grad_norm": 12.252318201019763, "learning_rate": 9.646798346896692e-06, "loss": 0.2483, "step": 51170 }, { "epoch": 1.0418320610687022, "grad_norm": 6.011838632443626, "learning_rate": 9.646535975541192e-06, "loss": 0.2743, "step": 51180 }, { "epoch": 1.0420356234096693, "grad_norm": 17.872038038060175, "learning_rate": 9.646273510342913e-06, "loss": 0.2548, "step": 51190 }, { "epoch": 1.0422391857506361, "grad_norm": 4.7914955708365365, "learning_rate": 9.646010951307155e-06, "loss": 0.2003, "step": 51200 }, { "epoch": 1.042442748091603, "grad_norm": 11.469957075616598, "learning_rate": 9.645748298439222e-06, "loss": 0.264, "step": 51210 }, { "epoch": 1.04264631043257, "grad_norm": 12.823357498605446, "learning_rate": 9.645485551744421e-06, "loss": 0.2466, "step": 51220 }, { "epoch": 1.042849872773537, "grad_norm": 1.4577449553425879, "learning_rate": 9.645222711228052e-06, "loss": 0.2038, "step": 51230 }, { "epoch": 1.0430534351145038, "grad_norm": 17.064572800336528, "learning_rate": 9.64495977689543e-06, "loss": 0.2281, "step": 51240 }, { "epoch": 1.0432569974554706, "grad_norm": 13.54534751004517, "learning_rate": 9.644696748751862e-06, "loss": 0.2591, "step": 51250 }, { "epoch": 1.0434605597964377, "grad_norm": 13.14425765050221, "learning_rate": 9.64443362680266e-06, "loss": 0.3189, "step": 51260 }, { "epoch": 1.0436641221374046, "grad_norm": 5.992597566547478, "learning_rate": 9.64417041105314e-06, "loss": 0.2456, "step": 51270 }, { "epoch": 1.0438676844783714, "grad_norm": 5.649673778075487, "learning_rate": 9.643907101508617e-06, "loss": 0.2155, "step": 51280 }, { "epoch": 1.0440712468193385, "grad_norm": 6.88247801869306, "learning_rate": 9.643643698174408e-06, "loss": 0.171, "step": 51290 }, { "epoch": 1.0442748091603054, "grad_norm": 5.205996719419801, "learning_rate": 9.643380201055833e-06, "loss": 0.2423, "step": 51300 }, { "epoch": 1.0444783715012722, "grad_norm": 9.463733064283423, "learning_rate": 9.643116610158216e-06, "loss": 0.2093, "step": 51310 }, { "epoch": 1.0446819338422393, "grad_norm": 10.03847481991519, "learning_rate": 9.642852925486878e-06, "loss": 0.1862, "step": 51320 }, { "epoch": 1.0448854961832061, "grad_norm": 12.560898699128533, "learning_rate": 9.642589147047144e-06, "loss": 0.3172, "step": 51330 }, { "epoch": 1.045089058524173, "grad_norm": 6.067718657773901, "learning_rate": 9.642325274844344e-06, "loss": 0.2791, "step": 51340 }, { "epoch": 1.0452926208651399, "grad_norm": 13.572685140200527, "learning_rate": 9.642061308883805e-06, "loss": 0.3376, "step": 51350 }, { "epoch": 1.045496183206107, "grad_norm": 12.971513033441504, "learning_rate": 9.64179724917086e-06, "loss": 0.2578, "step": 51360 }, { "epoch": 1.0456997455470738, "grad_norm": 9.774209150657594, "learning_rate": 9.64153309571084e-06, "loss": 0.1883, "step": 51370 }, { "epoch": 1.0459033078880406, "grad_norm": 10.813322238331617, "learning_rate": 9.641268848509083e-06, "loss": 0.2157, "step": 51380 }, { "epoch": 1.0461068702290077, "grad_norm": 11.426043947249564, "learning_rate": 9.641004507570921e-06, "loss": 0.2224, "step": 51390 }, { "epoch": 1.0463104325699746, "grad_norm": 9.596483754754761, "learning_rate": 9.640740072901695e-06, "loss": 0.3195, "step": 51400 }, { "epoch": 1.0465139949109414, "grad_norm": 5.457196944140488, "learning_rate": 9.640475544506748e-06, "loss": 0.2542, "step": 51410 }, { "epoch": 1.0467175572519083, "grad_norm": 6.541254177266056, "learning_rate": 9.640210922391419e-06, "loss": 0.2082, "step": 51420 }, { "epoch": 1.0469211195928754, "grad_norm": 8.584348309018582, "learning_rate": 9.639946206561054e-06, "loss": 0.27, "step": 51430 }, { "epoch": 1.0471246819338422, "grad_norm": 13.675850465819568, "learning_rate": 9.639681397021002e-06, "loss": 0.2104, "step": 51440 }, { "epoch": 1.047328244274809, "grad_norm": 12.597933668135525, "learning_rate": 9.639416493776605e-06, "loss": 0.2579, "step": 51450 }, { "epoch": 1.0475318066157762, "grad_norm": 14.645824231085745, "learning_rate": 9.639151496833218e-06, "loss": 0.2828, "step": 51460 }, { "epoch": 1.047735368956743, "grad_norm": 7.820714229437414, "learning_rate": 9.63888640619619e-06, "loss": 0.2449, "step": 51470 }, { "epoch": 1.0479389312977099, "grad_norm": 9.033020708933401, "learning_rate": 9.638621221870879e-06, "loss": 0.2123, "step": 51480 }, { "epoch": 1.048142493638677, "grad_norm": 9.992219775632385, "learning_rate": 9.638355943862634e-06, "loss": 0.2988, "step": 51490 }, { "epoch": 1.0483460559796438, "grad_norm": 6.66568050340316, "learning_rate": 9.63809057217682e-06, "loss": 0.1868, "step": 51500 }, { "epoch": 1.0485496183206107, "grad_norm": 10.023087063666537, "learning_rate": 9.63782510681879e-06, "loss": 0.2092, "step": 51510 }, { "epoch": 1.0487531806615775, "grad_norm": 13.065755427022527, "learning_rate": 9.63755954779391e-06, "loss": 0.2733, "step": 51520 }, { "epoch": 1.0489567430025446, "grad_norm": 3.1442299497520083, "learning_rate": 9.637293895107543e-06, "loss": 0.2174, "step": 51530 }, { "epoch": 1.0491603053435115, "grad_norm": 11.215782274293275, "learning_rate": 9.637028148765051e-06, "loss": 0.2618, "step": 51540 }, { "epoch": 1.0493638676844783, "grad_norm": 3.7375521660715036, "learning_rate": 9.636762308771805e-06, "loss": 0.1925, "step": 51550 }, { "epoch": 1.0495674300254454, "grad_norm": 13.643750408207259, "learning_rate": 9.636496375133171e-06, "loss": 0.2644, "step": 51560 }, { "epoch": 1.0497709923664122, "grad_norm": 8.22515244302638, "learning_rate": 9.63623034785452e-06, "loss": 0.2908, "step": 51570 }, { "epoch": 1.049974554707379, "grad_norm": 7.125538650609837, "learning_rate": 9.635964226941228e-06, "loss": 0.2477, "step": 51580 }, { "epoch": 1.050178117048346, "grad_norm": 21.083101145503353, "learning_rate": 9.635698012398666e-06, "loss": 0.2741, "step": 51590 }, { "epoch": 1.050381679389313, "grad_norm": 3.2669200513610837, "learning_rate": 9.635431704232212e-06, "loss": 0.1977, "step": 51600 }, { "epoch": 1.05058524173028, "grad_norm": 10.324851316332857, "learning_rate": 9.635165302447245e-06, "loss": 0.2996, "step": 51610 }, { "epoch": 1.0507888040712468, "grad_norm": 6.736347812186826, "learning_rate": 9.634898807049143e-06, "loss": 0.2517, "step": 51620 }, { "epoch": 1.0509923664122138, "grad_norm": 54.628203436665174, "learning_rate": 9.63463221804329e-06, "loss": 0.218, "step": 51630 }, { "epoch": 1.0511959287531807, "grad_norm": 7.706579672443945, "learning_rate": 9.634365535435072e-06, "loss": 0.2842, "step": 51640 }, { "epoch": 1.0513994910941475, "grad_norm": 10.500957619716152, "learning_rate": 9.634098759229872e-06, "loss": 0.2059, "step": 51650 }, { "epoch": 1.0516030534351144, "grad_norm": 9.50885902920715, "learning_rate": 9.633831889433078e-06, "loss": 0.2784, "step": 51660 }, { "epoch": 1.0518066157760815, "grad_norm": 25.8754543893184, "learning_rate": 9.633564926050081e-06, "loss": 0.2746, "step": 51670 }, { "epoch": 1.0520101781170483, "grad_norm": 8.99189937188331, "learning_rate": 9.633297869086273e-06, "loss": 0.2733, "step": 51680 }, { "epoch": 1.0522137404580152, "grad_norm": 8.798487927977776, "learning_rate": 9.633030718547048e-06, "loss": 0.2958, "step": 51690 }, { "epoch": 1.0524173027989823, "grad_norm": 5.8653904845951645, "learning_rate": 9.632763474437798e-06, "loss": 0.3393, "step": 51700 }, { "epoch": 1.0526208651399491, "grad_norm": 7.423633703218583, "learning_rate": 9.632496136763924e-06, "loss": 0.2673, "step": 51710 }, { "epoch": 1.052824427480916, "grad_norm": 7.202459838738675, "learning_rate": 9.632228705530824e-06, "loss": 0.2797, "step": 51720 }, { "epoch": 1.0530279898218828, "grad_norm": 9.983957572421877, "learning_rate": 9.631961180743899e-06, "loss": 0.1766, "step": 51730 }, { "epoch": 1.05323155216285, "grad_norm": 26.688781485397556, "learning_rate": 9.631693562408551e-06, "loss": 0.2714, "step": 51740 }, { "epoch": 1.0534351145038168, "grad_norm": 8.55969584345735, "learning_rate": 9.631425850530186e-06, "loss": 0.2789, "step": 51750 }, { "epoch": 1.0536386768447836, "grad_norm": 4.849995501568783, "learning_rate": 9.63115804511421e-06, "loss": 0.2347, "step": 51760 }, { "epoch": 1.0538422391857507, "grad_norm": 12.3702367632544, "learning_rate": 9.630890146166034e-06, "loss": 0.223, "step": 51770 }, { "epoch": 1.0540458015267176, "grad_norm": 10.024652614433922, "learning_rate": 9.630622153691068e-06, "loss": 0.3242, "step": 51780 }, { "epoch": 1.0542493638676844, "grad_norm": 0.21572196467200794, "learning_rate": 9.630354067694722e-06, "loss": 0.1823, "step": 51790 }, { "epoch": 1.0544529262086515, "grad_norm": 3.4288591524477723, "learning_rate": 9.630085888182413e-06, "loss": 0.2869, "step": 51800 }, { "epoch": 1.0546564885496184, "grad_norm": 9.167711468993659, "learning_rate": 9.629817615159554e-06, "loss": 0.2571, "step": 51810 }, { "epoch": 1.0548600508905852, "grad_norm": 1.2557360971350813, "learning_rate": 9.629549248631564e-06, "loss": 0.2259, "step": 51820 }, { "epoch": 1.055063613231552, "grad_norm": 0.6267030284430867, "learning_rate": 9.629280788603867e-06, "loss": 0.1573, "step": 51830 }, { "epoch": 1.0552671755725191, "grad_norm": 5.7258694896543885, "learning_rate": 9.62901223508188e-06, "loss": 0.2193, "step": 51840 }, { "epoch": 1.055470737913486, "grad_norm": 18.02322875930095, "learning_rate": 9.62874358807103e-06, "loss": 0.2424, "step": 51850 }, { "epoch": 1.0556743002544529, "grad_norm": 9.872949124511567, "learning_rate": 9.628474847576742e-06, "loss": 0.204, "step": 51860 }, { "epoch": 1.05587786259542, "grad_norm": 7.122610554181052, "learning_rate": 9.628206013604442e-06, "loss": 0.2864, "step": 51870 }, { "epoch": 1.0560814249363868, "grad_norm": 11.071386413140418, "learning_rate": 9.62793708615956e-06, "loss": 0.2602, "step": 51880 }, { "epoch": 1.0562849872773536, "grad_norm": 9.014105267211928, "learning_rate": 9.627668065247529e-06, "loss": 0.2205, "step": 51890 }, { "epoch": 1.0564885496183205, "grad_norm": 3.3874640165951635, "learning_rate": 9.62739895087378e-06, "loss": 0.1924, "step": 51900 }, { "epoch": 1.0566921119592876, "grad_norm": 8.930920944203637, "learning_rate": 9.62712974304375e-06, "loss": 0.2588, "step": 51910 }, { "epoch": 1.0568956743002544, "grad_norm": 11.496201138501858, "learning_rate": 9.626860441762876e-06, "loss": 0.1938, "step": 51920 }, { "epoch": 1.0570992366412213, "grad_norm": 8.667869954482798, "learning_rate": 9.626591047036594e-06, "loss": 0.2204, "step": 51930 }, { "epoch": 1.0573027989821884, "grad_norm": 14.89349754315758, "learning_rate": 9.626321558870348e-06, "loss": 0.2823, "step": 51940 }, { "epoch": 1.0575063613231552, "grad_norm": 7.994394556907675, "learning_rate": 9.626051977269579e-06, "loss": 0.2515, "step": 51950 }, { "epoch": 1.057709923664122, "grad_norm": 5.864325775130715, "learning_rate": 9.625782302239732e-06, "loss": 0.218, "step": 51960 }, { "epoch": 1.0579134860050892, "grad_norm": 11.332996363105718, "learning_rate": 9.625512533786254e-06, "loss": 0.2824, "step": 51970 }, { "epoch": 1.058117048346056, "grad_norm": 20.465299353937855, "learning_rate": 9.625242671914592e-06, "loss": 0.2144, "step": 51980 }, { "epoch": 1.0583206106870229, "grad_norm": 6.077307108192694, "learning_rate": 9.624972716630197e-06, "loss": 0.1719, "step": 51990 }, { "epoch": 1.0585241730279897, "grad_norm": 1.262657802220483, "learning_rate": 9.624702667938522e-06, "loss": 0.2644, "step": 52000 }, { "epoch": 1.0587277353689568, "grad_norm": 8.744088050983096, "learning_rate": 9.62443252584502e-06, "loss": 0.2344, "step": 52010 }, { "epoch": 1.0589312977099237, "grad_norm": 5.842113074245832, "learning_rate": 9.624162290355145e-06, "loss": 0.2736, "step": 52020 }, { "epoch": 1.0591348600508905, "grad_norm": 11.21544587907852, "learning_rate": 9.623891961474359e-06, "loss": 0.2711, "step": 52030 }, { "epoch": 1.0593384223918576, "grad_norm": 4.159428885364927, "learning_rate": 9.623621539208118e-06, "loss": 0.3022, "step": 52040 }, { "epoch": 1.0595419847328245, "grad_norm": 6.667664486575356, "learning_rate": 9.623351023561884e-06, "loss": 0.3028, "step": 52050 }, { "epoch": 1.0597455470737913, "grad_norm": 8.433316459358934, "learning_rate": 9.623080414541124e-06, "loss": 0.2514, "step": 52060 }, { "epoch": 1.0599491094147582, "grad_norm": 0.45587374393461344, "learning_rate": 9.622809712151299e-06, "loss": 0.2937, "step": 52070 }, { "epoch": 1.0601526717557253, "grad_norm": 5.8936652564421195, "learning_rate": 9.622538916397877e-06, "loss": 0.1203, "step": 52080 }, { "epoch": 1.060356234096692, "grad_norm": 7.180882714120761, "learning_rate": 9.622268027286328e-06, "loss": 0.1736, "step": 52090 }, { "epoch": 1.060559796437659, "grad_norm": 8.851057729861449, "learning_rate": 9.621997044822124e-06, "loss": 0.2543, "step": 52100 }, { "epoch": 1.060763358778626, "grad_norm": 24.16571181394018, "learning_rate": 9.621725969010736e-06, "loss": 0.2664, "step": 52110 }, { "epoch": 1.060966921119593, "grad_norm": 6.591396743778473, "learning_rate": 9.621454799857639e-06, "loss": 0.1773, "step": 52120 }, { "epoch": 1.0611704834605598, "grad_norm": 5.481889977157003, "learning_rate": 9.62118353736831e-06, "loss": 0.2088, "step": 52130 }, { "epoch": 1.0613740458015266, "grad_norm": 19.26818354997565, "learning_rate": 9.620912181548228e-06, "loss": 0.3191, "step": 52140 }, { "epoch": 1.0615776081424937, "grad_norm": 10.51291555651549, "learning_rate": 9.620640732402872e-06, "loss": 0.3071, "step": 52150 }, { "epoch": 1.0617811704834605, "grad_norm": 6.2582962555313655, "learning_rate": 9.620369189937725e-06, "loss": 0.3201, "step": 52160 }, { "epoch": 1.0619847328244274, "grad_norm": 3.099198201334223, "learning_rate": 9.620097554158271e-06, "loss": 0.2686, "step": 52170 }, { "epoch": 1.0621882951653945, "grad_norm": 18.828775405112463, "learning_rate": 9.619825825069997e-06, "loss": 0.239, "step": 52180 }, { "epoch": 1.0623918575063613, "grad_norm": 5.584917827430868, "learning_rate": 9.61955400267839e-06, "loss": 0.1863, "step": 52190 }, { "epoch": 1.0625954198473282, "grad_norm": 9.981802834041984, "learning_rate": 9.61928208698894e-06, "loss": 0.2542, "step": 52200 }, { "epoch": 1.062798982188295, "grad_norm": 19.644170624311936, "learning_rate": 9.619010078007138e-06, "loss": 0.2917, "step": 52210 }, { "epoch": 1.0630025445292621, "grad_norm": 19.266190138987763, "learning_rate": 9.61873797573848e-06, "loss": 0.2647, "step": 52220 }, { "epoch": 1.063206106870229, "grad_norm": 5.4396123710774305, "learning_rate": 9.618465780188458e-06, "loss": 0.2541, "step": 52230 }, { "epoch": 1.0634096692111958, "grad_norm": 18.997201115290398, "learning_rate": 9.618193491362572e-06, "loss": 0.2691, "step": 52240 }, { "epoch": 1.063613231552163, "grad_norm": 6.2274414732073025, "learning_rate": 9.617921109266319e-06, "loss": 0.2403, "step": 52250 }, { "epoch": 1.0638167938931298, "grad_norm": 11.434627806178693, "learning_rate": 9.617648633905203e-06, "loss": 0.2406, "step": 52260 }, { "epoch": 1.0640203562340966, "grad_norm": 12.811659540788753, "learning_rate": 9.617376065284725e-06, "loss": 0.2242, "step": 52270 }, { "epoch": 1.0642239185750637, "grad_norm": 11.428258277698262, "learning_rate": 9.61710340341039e-06, "loss": 0.1947, "step": 52280 }, { "epoch": 1.0644274809160306, "grad_norm": 8.296649046462367, "learning_rate": 9.616830648287704e-06, "loss": 0.2556, "step": 52290 }, { "epoch": 1.0646310432569974, "grad_norm": 11.756715974248065, "learning_rate": 9.616557799922177e-06, "loss": 0.2136, "step": 52300 }, { "epoch": 1.0648346055979643, "grad_norm": 1.336357734760757, "learning_rate": 9.61628485831932e-06, "loss": 0.1904, "step": 52310 }, { "epoch": 1.0650381679389314, "grad_norm": 5.734287107339553, "learning_rate": 9.616011823484644e-06, "loss": 0.2068, "step": 52320 }, { "epoch": 1.0652417302798982, "grad_norm": 14.46409764981722, "learning_rate": 9.615738695423664e-06, "loss": 0.233, "step": 52330 }, { "epoch": 1.065445292620865, "grad_norm": 8.991674930389818, "learning_rate": 9.615465474141897e-06, "loss": 0.2421, "step": 52340 }, { "epoch": 1.0656488549618321, "grad_norm": 7.213156829671859, "learning_rate": 9.615192159644859e-06, "loss": 0.3019, "step": 52350 }, { "epoch": 1.065852417302799, "grad_norm": 5.750741410770563, "learning_rate": 9.614918751938069e-06, "loss": 0.2097, "step": 52360 }, { "epoch": 1.0660559796437659, "grad_norm": 12.967503963383942, "learning_rate": 9.614645251027054e-06, "loss": 0.2788, "step": 52370 }, { "epoch": 1.0662595419847327, "grad_norm": 8.106882707968552, "learning_rate": 9.614371656917334e-06, "loss": 0.2447, "step": 52380 }, { "epoch": 1.0664631043256998, "grad_norm": 5.841506706349004, "learning_rate": 9.614097969614434e-06, "loss": 0.2602, "step": 52390 }, { "epoch": 1.0666666666666667, "grad_norm": 13.188897543606018, "learning_rate": 9.613824189123883e-06, "loss": 0.2471, "step": 52400 }, { "epoch": 1.0668702290076335, "grad_norm": 18.999488523922995, "learning_rate": 9.61355031545121e-06, "loss": 0.2572, "step": 52410 }, { "epoch": 1.0670737913486006, "grad_norm": 11.179386336330458, "learning_rate": 9.613276348601947e-06, "loss": 0.3118, "step": 52420 }, { "epoch": 1.0672773536895674, "grad_norm": 8.638345220596445, "learning_rate": 9.613002288581626e-06, "loss": 0.1841, "step": 52430 }, { "epoch": 1.0674809160305343, "grad_norm": 9.111138403030399, "learning_rate": 9.61272813539578e-06, "loss": 0.2203, "step": 52440 }, { "epoch": 1.0676844783715014, "grad_norm": 14.150778014366184, "learning_rate": 9.61245388904995e-06, "loss": 0.2629, "step": 52450 }, { "epoch": 1.0678880407124682, "grad_norm": 7.499217969707573, "learning_rate": 9.612179549549674e-06, "loss": 0.2858, "step": 52460 }, { "epoch": 1.068091603053435, "grad_norm": 7.873676486557469, "learning_rate": 9.611905116900491e-06, "loss": 0.2491, "step": 52470 }, { "epoch": 1.068295165394402, "grad_norm": 15.803451770002516, "learning_rate": 9.611630591107943e-06, "loss": 0.2549, "step": 52480 }, { "epoch": 1.068498727735369, "grad_norm": 13.047167826581951, "learning_rate": 9.611355972177577e-06, "loss": 0.2401, "step": 52490 }, { "epoch": 1.0687022900763359, "grad_norm": 6.014370847246681, "learning_rate": 9.611081260114936e-06, "loss": 0.2399, "step": 52500 }, { "epoch": 1.0689058524173027, "grad_norm": 14.228043738829905, "learning_rate": 9.61080645492557e-06, "loss": 0.3134, "step": 52510 }, { "epoch": 1.0691094147582698, "grad_norm": 6.463048509305892, "learning_rate": 9.61053155661503e-06, "loss": 0.2685, "step": 52520 }, { "epoch": 1.0693129770992367, "grad_norm": 8.983038680752207, "learning_rate": 9.610256565188866e-06, "loss": 0.1981, "step": 52530 }, { "epoch": 1.0695165394402035, "grad_norm": 8.370983799449274, "learning_rate": 9.609981480652633e-06, "loss": 0.2063, "step": 52540 }, { "epoch": 1.0697201017811704, "grad_norm": 26.507355282475455, "learning_rate": 9.609706303011888e-06, "loss": 0.3504, "step": 52550 }, { "epoch": 1.0699236641221375, "grad_norm": 4.056166409496389, "learning_rate": 9.609431032272185e-06, "loss": 0.2388, "step": 52560 }, { "epoch": 1.0701272264631043, "grad_norm": 16.328045025249203, "learning_rate": 9.609155668439086e-06, "loss": 0.2427, "step": 52570 }, { "epoch": 1.0703307888040712, "grad_norm": 16.20220583839624, "learning_rate": 9.608880211518151e-06, "loss": 0.267, "step": 52580 }, { "epoch": 1.0705343511450383, "grad_norm": 21.042895135240574, "learning_rate": 9.608604661514945e-06, "loss": 0.1765, "step": 52590 }, { "epoch": 1.070737913486005, "grad_norm": 26.0431529135027, "learning_rate": 9.608329018435032e-06, "loss": 0.1483, "step": 52600 }, { "epoch": 1.070941475826972, "grad_norm": 8.266562667440171, "learning_rate": 9.608053282283977e-06, "loss": 0.2616, "step": 52610 }, { "epoch": 1.071145038167939, "grad_norm": 4.076771682551637, "learning_rate": 9.607777453067353e-06, "loss": 0.2715, "step": 52620 }, { "epoch": 1.071348600508906, "grad_norm": 7.771964633223559, "learning_rate": 9.607501530790728e-06, "loss": 0.2013, "step": 52630 }, { "epoch": 1.0715521628498728, "grad_norm": 51.39801517120818, "learning_rate": 9.607225515459676e-06, "loss": 0.2169, "step": 52640 }, { "epoch": 1.0717557251908396, "grad_norm": 10.318010341932288, "learning_rate": 9.606949407079769e-06, "loss": 0.2383, "step": 52650 }, { "epoch": 1.0719592875318067, "grad_norm": 11.290796546891837, "learning_rate": 9.606673205656588e-06, "loss": 0.207, "step": 52660 }, { "epoch": 1.0721628498727735, "grad_norm": 6.176302392312387, "learning_rate": 9.606396911195704e-06, "loss": 0.2161, "step": 52670 }, { "epoch": 1.0723664122137404, "grad_norm": 10.703735487757163, "learning_rate": 9.606120523702705e-06, "loss": 0.2524, "step": 52680 }, { "epoch": 1.0725699745547073, "grad_norm": 0.7522757191813113, "learning_rate": 9.605844043183169e-06, "loss": 0.2077, "step": 52690 }, { "epoch": 1.0727735368956743, "grad_norm": 17.726157907328027, "learning_rate": 9.605567469642677e-06, "loss": 0.3099, "step": 52700 }, { "epoch": 1.0729770992366412, "grad_norm": 23.29093213218615, "learning_rate": 9.60529080308682e-06, "loss": 0.1636, "step": 52710 }, { "epoch": 1.073180661577608, "grad_norm": 7.288464737154543, "learning_rate": 9.605014043521185e-06, "loss": 0.2654, "step": 52720 }, { "epoch": 1.0733842239185751, "grad_norm": 1.9741614254207978, "learning_rate": 9.604737190951358e-06, "loss": 0.2738, "step": 52730 }, { "epoch": 1.073587786259542, "grad_norm": 7.417129893302566, "learning_rate": 9.604460245382934e-06, "loss": 0.2005, "step": 52740 }, { "epoch": 1.0737913486005088, "grad_norm": 13.131958148385165, "learning_rate": 9.604183206821504e-06, "loss": 0.2509, "step": 52750 }, { "epoch": 1.073994910941476, "grad_norm": 15.97973902346167, "learning_rate": 9.603906075272665e-06, "loss": 0.2634, "step": 52760 }, { "epoch": 1.0741984732824428, "grad_norm": 4.962983835073305, "learning_rate": 9.603628850742012e-06, "loss": 0.2524, "step": 52770 }, { "epoch": 1.0744020356234096, "grad_norm": 8.369661712968924, "learning_rate": 9.603351533235146e-06, "loss": 0.2375, "step": 52780 }, { "epoch": 1.0746055979643765, "grad_norm": 3.653975944745413, "learning_rate": 9.603074122757666e-06, "loss": 0.2241, "step": 52790 }, { "epoch": 1.0748091603053436, "grad_norm": 14.853895892444383, "learning_rate": 9.602796619315176e-06, "loss": 0.3116, "step": 52800 }, { "epoch": 1.0750127226463104, "grad_norm": 4.319215441936534, "learning_rate": 9.60251902291328e-06, "loss": 0.2188, "step": 52810 }, { "epoch": 1.0752162849872773, "grad_norm": 10.002162499202301, "learning_rate": 9.602241333557583e-06, "loss": 0.2541, "step": 52820 }, { "epoch": 1.0754198473282444, "grad_norm": 19.1753234377824, "learning_rate": 9.601963551253695e-06, "loss": 0.2601, "step": 52830 }, { "epoch": 1.0756234096692112, "grad_norm": 2.1488219769565644, "learning_rate": 9.601685676007226e-06, "loss": 0.168, "step": 52840 }, { "epoch": 1.075826972010178, "grad_norm": 2.4151573473434826, "learning_rate": 9.601407707823789e-06, "loss": 0.2098, "step": 52850 }, { "epoch": 1.076030534351145, "grad_norm": 11.204614437803071, "learning_rate": 9.601129646708996e-06, "loss": 0.2099, "step": 52860 }, { "epoch": 1.076234096692112, "grad_norm": 12.36550800746352, "learning_rate": 9.600851492668463e-06, "loss": 0.1918, "step": 52870 }, { "epoch": 1.0764376590330789, "grad_norm": 11.302414793094655, "learning_rate": 9.600573245707807e-06, "loss": 0.1947, "step": 52880 }, { "epoch": 1.0766412213740457, "grad_norm": 5.566589290078371, "learning_rate": 9.600294905832652e-06, "loss": 0.1892, "step": 52890 }, { "epoch": 1.0768447837150128, "grad_norm": 65.80320667346233, "learning_rate": 9.600016473048614e-06, "loss": 0.238, "step": 52900 }, { "epoch": 1.0770483460559797, "grad_norm": 28.277753222287522, "learning_rate": 9.59973794736132e-06, "loss": 0.2153, "step": 52910 }, { "epoch": 1.0772519083969465, "grad_norm": 12.34003793576325, "learning_rate": 9.599459328776392e-06, "loss": 0.2194, "step": 52920 }, { "epoch": 1.0774554707379136, "grad_norm": 10.681902886532118, "learning_rate": 9.59918061729946e-06, "loss": 0.1647, "step": 52930 }, { "epoch": 1.0776590330788804, "grad_norm": 6.905187689681407, "learning_rate": 9.598901812936153e-06, "loss": 0.189, "step": 52940 }, { "epoch": 1.0778625954198473, "grad_norm": 15.243752006035868, "learning_rate": 9.598622915692098e-06, "loss": 0.1347, "step": 52950 }, { "epoch": 1.0780661577608142, "grad_norm": 5.626120805335375, "learning_rate": 9.59834392557293e-06, "loss": 0.2266, "step": 52960 }, { "epoch": 1.0782697201017812, "grad_norm": 19.275811810068745, "learning_rate": 9.598064842584287e-06, "loss": 0.1889, "step": 52970 }, { "epoch": 1.078473282442748, "grad_norm": 10.77616036850238, "learning_rate": 9.597785666731798e-06, "loss": 0.2463, "step": 52980 }, { "epoch": 1.078676844783715, "grad_norm": 8.411553607512007, "learning_rate": 9.597506398021108e-06, "loss": 0.2037, "step": 52990 }, { "epoch": 1.078880407124682, "grad_norm": 9.733179622535987, "learning_rate": 9.597227036457854e-06, "loss": 0.2434, "step": 53000 }, { "epoch": 1.0790839694656489, "grad_norm": 3.494130846489166, "learning_rate": 9.596947582047682e-06, "loss": 0.2117, "step": 53010 }, { "epoch": 1.0792875318066157, "grad_norm": 17.174587763122602, "learning_rate": 9.596668034796229e-06, "loss": 0.1997, "step": 53020 }, { "epoch": 1.0794910941475826, "grad_norm": 11.874737096304152, "learning_rate": 9.596388394709146e-06, "loss": 0.2801, "step": 53030 }, { "epoch": 1.0796946564885497, "grad_norm": 3.3667315211827313, "learning_rate": 9.596108661792078e-06, "loss": 0.2292, "step": 53040 }, { "epoch": 1.0798982188295165, "grad_norm": 5.119577411274291, "learning_rate": 9.595828836050678e-06, "loss": 0.2746, "step": 53050 }, { "epoch": 1.0801017811704834, "grad_norm": 9.921818794141497, "learning_rate": 9.595548917490594e-06, "loss": 0.2369, "step": 53060 }, { "epoch": 1.0803053435114505, "grad_norm": 17.536589624874317, "learning_rate": 9.595268906117482e-06, "loss": 0.2666, "step": 53070 }, { "epoch": 1.0805089058524173, "grad_norm": 11.417780394525607, "learning_rate": 9.594988801936994e-06, "loss": 0.2155, "step": 53080 }, { "epoch": 1.0807124681933842, "grad_norm": 26.20777912346837, "learning_rate": 9.594708604954788e-06, "loss": 0.1737, "step": 53090 }, { "epoch": 1.0809160305343513, "grad_norm": 16.792891550694645, "learning_rate": 9.594428315176525e-06, "loss": 0.287, "step": 53100 }, { "epoch": 1.0811195928753181, "grad_norm": 11.397205880381351, "learning_rate": 9.594147932607865e-06, "loss": 0.147, "step": 53110 }, { "epoch": 1.081323155216285, "grad_norm": 6.106497705614562, "learning_rate": 9.59386745725447e-06, "loss": 0.2202, "step": 53120 }, { "epoch": 1.0815267175572518, "grad_norm": 14.916722897118618, "learning_rate": 9.593586889122003e-06, "loss": 0.2794, "step": 53130 }, { "epoch": 1.081730279898219, "grad_norm": 10.723516753658423, "learning_rate": 9.593306228216136e-06, "loss": 0.3115, "step": 53140 }, { "epoch": 1.0819338422391858, "grad_norm": 20.82570919900104, "learning_rate": 9.593025474542532e-06, "loss": 0.2287, "step": 53150 }, { "epoch": 1.0821374045801526, "grad_norm": 1.8175465912802444, "learning_rate": 9.592744628106863e-06, "loss": 0.2895, "step": 53160 }, { "epoch": 1.0823409669211197, "grad_norm": 10.606999577227612, "learning_rate": 9.592463688914799e-06, "loss": 0.1986, "step": 53170 }, { "epoch": 1.0825445292620866, "grad_norm": 11.503705607549893, "learning_rate": 9.592182656972019e-06, "loss": 0.308, "step": 53180 }, { "epoch": 1.0827480916030534, "grad_norm": 12.34851886656444, "learning_rate": 9.591901532284191e-06, "loss": 0.3083, "step": 53190 }, { "epoch": 1.0829516539440203, "grad_norm": 8.113028383194644, "learning_rate": 9.591620314857001e-06, "loss": 0.2236, "step": 53200 }, { "epoch": 1.0831552162849873, "grad_norm": 9.29856985749182, "learning_rate": 9.591339004696123e-06, "loss": 0.2052, "step": 53210 }, { "epoch": 1.0833587786259542, "grad_norm": 5.1986158249430146, "learning_rate": 9.59105760180724e-06, "loss": 0.2576, "step": 53220 }, { "epoch": 1.083562340966921, "grad_norm": 12.648576211436612, "learning_rate": 9.590776106196038e-06, "loss": 0.3135, "step": 53230 }, { "epoch": 1.0837659033078881, "grad_norm": 3.9978842470086704, "learning_rate": 9.590494517868198e-06, "loss": 0.2245, "step": 53240 }, { "epoch": 1.083969465648855, "grad_norm": 9.252492132471403, "learning_rate": 9.590212836829408e-06, "loss": 0.2418, "step": 53250 }, { "epoch": 1.0841730279898218, "grad_norm": 11.851469506970313, "learning_rate": 9.589931063085358e-06, "loss": 0.3392, "step": 53260 }, { "epoch": 1.0843765903307887, "grad_norm": 12.007710496221886, "learning_rate": 9.589649196641739e-06, "loss": 0.2101, "step": 53270 }, { "epoch": 1.0845801526717558, "grad_norm": 7.76533834100181, "learning_rate": 9.589367237504243e-06, "loss": 0.2555, "step": 53280 }, { "epoch": 1.0847837150127226, "grad_norm": 7.715803462012786, "learning_rate": 9.589085185678563e-06, "loss": 0.2856, "step": 53290 }, { "epoch": 1.0849872773536895, "grad_norm": 21.61815035457805, "learning_rate": 9.588803041170401e-06, "loss": 0.3011, "step": 53300 }, { "epoch": 1.0851908396946566, "grad_norm": 8.550074358654085, "learning_rate": 9.588520803985447e-06, "loss": 0.1952, "step": 53310 }, { "epoch": 1.0853944020356234, "grad_norm": 16.894558669169374, "learning_rate": 9.588238474129408e-06, "loss": 0.2068, "step": 53320 }, { "epoch": 1.0855979643765903, "grad_norm": 10.64598849330174, "learning_rate": 9.587956051607981e-06, "loss": 0.2127, "step": 53330 }, { "epoch": 1.0858015267175571, "grad_norm": 15.581342406323234, "learning_rate": 9.587673536426874e-06, "loss": 0.16, "step": 53340 }, { "epoch": 1.0860050890585242, "grad_norm": 22.85525099721118, "learning_rate": 9.587390928591791e-06, "loss": 0.3406, "step": 53350 }, { "epoch": 1.086208651399491, "grad_norm": 13.362767362844469, "learning_rate": 9.58710822810844e-06, "loss": 0.3726, "step": 53360 }, { "epoch": 1.086412213740458, "grad_norm": 9.192855884030868, "learning_rate": 9.586825434982528e-06, "loss": 0.2429, "step": 53370 }, { "epoch": 1.086615776081425, "grad_norm": 3.611415768483163, "learning_rate": 9.58654254921977e-06, "loss": 0.2558, "step": 53380 }, { "epoch": 1.0868193384223919, "grad_norm": 4.683079097298269, "learning_rate": 9.586259570825877e-06, "loss": 0.2263, "step": 53390 }, { "epoch": 1.0870229007633587, "grad_norm": 6.348927799089259, "learning_rate": 9.585976499806567e-06, "loss": 0.2114, "step": 53400 }, { "epoch": 1.0872264631043258, "grad_norm": 9.012268622712332, "learning_rate": 9.585693336167554e-06, "loss": 0.2262, "step": 53410 }, { "epoch": 1.0874300254452927, "grad_norm": 18.79600600619629, "learning_rate": 9.585410079914557e-06, "loss": 0.2155, "step": 53420 }, { "epoch": 1.0876335877862595, "grad_norm": 14.532499366228413, "learning_rate": 9.585126731053298e-06, "loss": 0.2477, "step": 53430 }, { "epoch": 1.0878371501272264, "grad_norm": 15.045342360138875, "learning_rate": 9.5848432895895e-06, "loss": 0.2399, "step": 53440 }, { "epoch": 1.0880407124681934, "grad_norm": 9.309521750543833, "learning_rate": 9.584559755528885e-06, "loss": 0.2779, "step": 53450 }, { "epoch": 1.0882442748091603, "grad_norm": 7.99666846792526, "learning_rate": 9.584276128877181e-06, "loss": 0.2314, "step": 53460 }, { "epoch": 1.0884478371501272, "grad_norm": 11.284989312896707, "learning_rate": 9.583992409640117e-06, "loss": 0.1964, "step": 53470 }, { "epoch": 1.0886513994910942, "grad_norm": 14.039812936291597, "learning_rate": 9.583708597823421e-06, "loss": 0.2234, "step": 53480 }, { "epoch": 1.088854961832061, "grad_norm": 9.533874906369604, "learning_rate": 9.583424693432827e-06, "loss": 0.3025, "step": 53490 }, { "epoch": 1.089058524173028, "grad_norm": 4.51465353665923, "learning_rate": 9.583140696474067e-06, "loss": 0.2757, "step": 53500 }, { "epoch": 1.0892620865139948, "grad_norm": 13.824013901010845, "learning_rate": 9.582856606952879e-06, "loss": 0.3717, "step": 53510 }, { "epoch": 1.0894656488549619, "grad_norm": 12.36299517011998, "learning_rate": 9.582572424874998e-06, "loss": 0.1992, "step": 53520 }, { "epoch": 1.0896692111959287, "grad_norm": 1.6244514520895055, "learning_rate": 9.582288150246164e-06, "loss": 0.2661, "step": 53530 }, { "epoch": 1.0898727735368956, "grad_norm": 26.798167267495607, "learning_rate": 9.58200378307212e-06, "loss": 0.2241, "step": 53540 }, { "epoch": 1.0900763358778627, "grad_norm": 6.55830632395875, "learning_rate": 9.58171932335861e-06, "loss": 0.1923, "step": 53550 }, { "epoch": 1.0902798982188295, "grad_norm": 19.875326170800626, "learning_rate": 9.581434771111372e-06, "loss": 0.2545, "step": 53560 }, { "epoch": 1.0904834605597964, "grad_norm": 11.12923289556944, "learning_rate": 9.581150126336162e-06, "loss": 0.3041, "step": 53570 }, { "epoch": 1.0906870229007635, "grad_norm": 4.400896745388831, "learning_rate": 9.580865389038722e-06, "loss": 0.2241, "step": 53580 }, { "epoch": 1.0908905852417303, "grad_norm": 8.780438017408343, "learning_rate": 9.580580559224807e-06, "loss": 0.2554, "step": 53590 }, { "epoch": 1.0910941475826972, "grad_norm": 14.972601108990112, "learning_rate": 9.58029563690017e-06, "loss": 0.2479, "step": 53600 }, { "epoch": 1.091297709923664, "grad_norm": 7.783462273083129, "learning_rate": 9.58001062207056e-06, "loss": 0.2282, "step": 53610 }, { "epoch": 1.0915012722646311, "grad_norm": 9.40197131186081, "learning_rate": 9.579725514741738e-06, "loss": 0.3089, "step": 53620 }, { "epoch": 1.091704834605598, "grad_norm": 7.858313101767758, "learning_rate": 9.57944031491946e-06, "loss": 0.1836, "step": 53630 }, { "epoch": 1.0919083969465648, "grad_norm": 6.945254783916468, "learning_rate": 9.579155022609486e-06, "loss": 0.24, "step": 53640 }, { "epoch": 1.092111959287532, "grad_norm": 17.00231849206737, "learning_rate": 9.578869637817579e-06, "loss": 0.2998, "step": 53650 }, { "epoch": 1.0923155216284988, "grad_norm": 16.112082249374726, "learning_rate": 9.578584160549503e-06, "loss": 0.324, "step": 53660 }, { "epoch": 1.0925190839694656, "grad_norm": 6.696272974091894, "learning_rate": 9.578298590811021e-06, "loss": 0.1983, "step": 53670 }, { "epoch": 1.0927226463104325, "grad_norm": 5.548561644777355, "learning_rate": 9.578012928607902e-06, "loss": 0.2139, "step": 53680 }, { "epoch": 1.0929262086513996, "grad_norm": 12.74426851173102, "learning_rate": 9.577727173945917e-06, "loss": 0.2303, "step": 53690 }, { "epoch": 1.0931297709923664, "grad_norm": 9.387444595011793, "learning_rate": 9.577441326830837e-06, "loss": 0.2394, "step": 53700 }, { "epoch": 1.0933333333333333, "grad_norm": 18.05763600821052, "learning_rate": 9.577155387268431e-06, "loss": 0.2911, "step": 53710 }, { "epoch": 1.0935368956743003, "grad_norm": 1.1212203415473239, "learning_rate": 9.576869355264478e-06, "loss": 0.174, "step": 53720 }, { "epoch": 1.0937404580152672, "grad_norm": 11.707593810766916, "learning_rate": 9.576583230824751e-06, "loss": 0.2179, "step": 53730 }, { "epoch": 1.093944020356234, "grad_norm": 15.503211874933244, "learning_rate": 9.576297013955034e-06, "loss": 0.1915, "step": 53740 }, { "epoch": 1.094147582697201, "grad_norm": 15.393615058208534, "learning_rate": 9.576010704661103e-06, "loss": 0.2116, "step": 53750 }, { "epoch": 1.094351145038168, "grad_norm": 16.2077922964205, "learning_rate": 9.575724302948743e-06, "loss": 0.2521, "step": 53760 }, { "epoch": 1.0945547073791349, "grad_norm": 9.474505853986146, "learning_rate": 9.575437808823736e-06, "loss": 0.189, "step": 53770 }, { "epoch": 1.0947582697201017, "grad_norm": 8.75278287123875, "learning_rate": 9.575151222291871e-06, "loss": 0.2441, "step": 53780 }, { "epoch": 1.0949618320610688, "grad_norm": 9.448084844532168, "learning_rate": 9.574864543358932e-06, "loss": 0.2194, "step": 53790 }, { "epoch": 1.0951653944020356, "grad_norm": 3.4933288133520226, "learning_rate": 9.574577772030713e-06, "loss": 0.2135, "step": 53800 }, { "epoch": 1.0953689567430025, "grad_norm": 0.5087569348585965, "learning_rate": 9.574290908313005e-06, "loss": 0.2269, "step": 53810 }, { "epoch": 1.0955725190839694, "grad_norm": 3.6367617597446684, "learning_rate": 9.574003952211597e-06, "loss": 0.3129, "step": 53820 }, { "epoch": 1.0957760814249364, "grad_norm": 16.959534720638946, "learning_rate": 9.57371690373229e-06, "loss": 0.3127, "step": 53830 }, { "epoch": 1.0959796437659033, "grad_norm": 8.726268556965179, "learning_rate": 9.57342976288088e-06, "loss": 0.3596, "step": 53840 }, { "epoch": 1.0961832061068701, "grad_norm": 6.915470779373992, "learning_rate": 9.573142529663164e-06, "loss": 0.1621, "step": 53850 }, { "epoch": 1.0963867684478372, "grad_norm": 11.89407626149554, "learning_rate": 9.572855204084945e-06, "loss": 0.2812, "step": 53860 }, { "epoch": 1.096590330788804, "grad_norm": 8.721921809522112, "learning_rate": 9.572567786152026e-06, "loss": 0.225, "step": 53870 }, { "epoch": 1.096793893129771, "grad_norm": 5.670258882965274, "learning_rate": 9.57228027587021e-06, "loss": 0.1749, "step": 53880 }, { "epoch": 1.096997455470738, "grad_norm": 9.110549613435111, "learning_rate": 9.571992673245304e-06, "loss": 0.23, "step": 53890 }, { "epoch": 1.0972010178117049, "grad_norm": 7.734682464018817, "learning_rate": 9.571704978283118e-06, "loss": 0.2473, "step": 53900 }, { "epoch": 1.0974045801526717, "grad_norm": 7.197846885671872, "learning_rate": 9.571417190989463e-06, "loss": 0.2567, "step": 53910 }, { "epoch": 1.0976081424936386, "grad_norm": 11.688062527749816, "learning_rate": 9.571129311370148e-06, "loss": 0.2349, "step": 53920 }, { "epoch": 1.0978117048346057, "grad_norm": 13.651794683699231, "learning_rate": 9.57084133943099e-06, "loss": 0.2297, "step": 53930 }, { "epoch": 1.0980152671755725, "grad_norm": 19.518989283327794, "learning_rate": 9.570553275177804e-06, "loss": 0.2423, "step": 53940 }, { "epoch": 1.0982188295165394, "grad_norm": 10.80660010062086, "learning_rate": 9.570265118616408e-06, "loss": 0.1869, "step": 53950 }, { "epoch": 1.0984223918575065, "grad_norm": 13.53199920515992, "learning_rate": 9.569976869752622e-06, "loss": 0.2777, "step": 53960 }, { "epoch": 1.0986259541984733, "grad_norm": 17.500083652819637, "learning_rate": 9.569688528592264e-06, "loss": 0.2155, "step": 53970 }, { "epoch": 1.0988295165394402, "grad_norm": 33.61512106742834, "learning_rate": 9.569400095141163e-06, "loss": 0.1801, "step": 53980 }, { "epoch": 1.099033078880407, "grad_norm": 0.8417726799019842, "learning_rate": 9.569111569405142e-06, "loss": 0.1946, "step": 53990 }, { "epoch": 1.099236641221374, "grad_norm": 37.04937532411459, "learning_rate": 9.568822951390026e-06, "loss": 0.246, "step": 54000 }, { "epoch": 1.099440203562341, "grad_norm": 6.728891438001662, "learning_rate": 9.568534241101648e-06, "loss": 0.223, "step": 54010 }, { "epoch": 1.0996437659033078, "grad_norm": 9.702612562976737, "learning_rate": 9.568245438545836e-06, "loss": 0.2372, "step": 54020 }, { "epoch": 1.099847328244275, "grad_norm": 14.282510661079515, "learning_rate": 9.567956543728423e-06, "loss": 0.1721, "step": 54030 }, { "epoch": 1.1000508905852417, "grad_norm": 15.702707120598472, "learning_rate": 9.567667556655245e-06, "loss": 0.2393, "step": 54040 }, { "epoch": 1.1002544529262086, "grad_norm": 12.006813612412127, "learning_rate": 9.567378477332137e-06, "loss": 0.3047, "step": 54050 }, { "epoch": 1.1004580152671757, "grad_norm": 11.493568059208075, "learning_rate": 9.567089305764938e-06, "loss": 0.2403, "step": 54060 }, { "epoch": 1.1006615776081425, "grad_norm": 7.76116395454552, "learning_rate": 9.566800041959488e-06, "loss": 0.2345, "step": 54070 }, { "epoch": 1.1008651399491094, "grad_norm": 9.372974358904038, "learning_rate": 9.566510685921629e-06, "loss": 0.2825, "step": 54080 }, { "epoch": 1.1010687022900763, "grad_norm": 12.428954979024061, "learning_rate": 9.566221237657205e-06, "loss": 0.253, "step": 54090 }, { "epoch": 1.1012722646310433, "grad_norm": 7.404203538940527, "learning_rate": 9.565931697172062e-06, "loss": 0.246, "step": 54100 }, { "epoch": 1.1014758269720102, "grad_norm": 3.9104411077153274, "learning_rate": 9.565642064472049e-06, "loss": 0.2671, "step": 54110 }, { "epoch": 1.101679389312977, "grad_norm": 20.519854229690914, "learning_rate": 9.565352339563012e-06, "loss": 0.2754, "step": 54120 }, { "epoch": 1.1018829516539441, "grad_norm": 4.785955029795716, "learning_rate": 9.565062522450805e-06, "loss": 0.2221, "step": 54130 }, { "epoch": 1.102086513994911, "grad_norm": 19.77200511318062, "learning_rate": 9.56477261314128e-06, "loss": 0.2274, "step": 54140 }, { "epoch": 1.1022900763358778, "grad_norm": 18.334260692093338, "learning_rate": 9.564482611640294e-06, "loss": 0.242, "step": 54150 }, { "epoch": 1.1024936386768447, "grad_norm": 14.145067620940026, "learning_rate": 9.564192517953704e-06, "loss": 0.2202, "step": 54160 }, { "epoch": 1.1026972010178118, "grad_norm": 12.220531813283497, "learning_rate": 9.563902332087365e-06, "loss": 0.1437, "step": 54170 }, { "epoch": 1.1029007633587786, "grad_norm": 30.277625464488878, "learning_rate": 9.56361205404714e-06, "loss": 0.2888, "step": 54180 }, { "epoch": 1.1031043256997455, "grad_norm": 25.23954835266888, "learning_rate": 9.563321683838892e-06, "loss": 0.2676, "step": 54190 }, { "epoch": 1.1033078880407126, "grad_norm": 14.109827186787836, "learning_rate": 9.563031221468485e-06, "loss": 0.3363, "step": 54200 }, { "epoch": 1.1035114503816794, "grad_norm": 1.4828248499077386, "learning_rate": 9.562740666941786e-06, "loss": 0.2371, "step": 54210 }, { "epoch": 1.1037150127226463, "grad_norm": 25.585012963164328, "learning_rate": 9.562450020264662e-06, "loss": 0.2327, "step": 54220 }, { "epoch": 1.1039185750636131, "grad_norm": 6.244427566011763, "learning_rate": 9.562159281442984e-06, "loss": 0.2287, "step": 54230 }, { "epoch": 1.1041221374045802, "grad_norm": 5.460398343840714, "learning_rate": 9.561868450482623e-06, "loss": 0.1572, "step": 54240 }, { "epoch": 1.104325699745547, "grad_norm": 8.734936362363399, "learning_rate": 9.561577527389453e-06, "loss": 0.2503, "step": 54250 }, { "epoch": 1.104529262086514, "grad_norm": 5.688268585603495, "learning_rate": 9.56128651216935e-06, "loss": 0.2109, "step": 54260 }, { "epoch": 1.104732824427481, "grad_norm": 8.381571457924723, "learning_rate": 9.56099540482819e-06, "loss": 0.2655, "step": 54270 }, { "epoch": 1.1049363867684479, "grad_norm": 3.619639286996558, "learning_rate": 9.560704205371853e-06, "loss": 0.2409, "step": 54280 }, { "epoch": 1.1051399491094147, "grad_norm": 15.057018943322351, "learning_rate": 9.560412913806222e-06, "loss": 0.2415, "step": 54290 }, { "epoch": 1.1053435114503816, "grad_norm": 16.947597522035867, "learning_rate": 9.560121530137177e-06, "loss": 0.2269, "step": 54300 }, { "epoch": 1.1055470737913486, "grad_norm": 7.2579720051509895, "learning_rate": 9.559830054370604e-06, "loss": 0.2637, "step": 54310 }, { "epoch": 1.1057506361323155, "grad_norm": 7.3739163336440035, "learning_rate": 9.559538486512392e-06, "loss": 0.21, "step": 54320 }, { "epoch": 1.1059541984732824, "grad_norm": 15.499484183612521, "learning_rate": 9.559246826568427e-06, "loss": 0.2421, "step": 54330 }, { "epoch": 1.1061577608142494, "grad_norm": 13.0206606383276, "learning_rate": 9.5589550745446e-06, "loss": 0.2667, "step": 54340 }, { "epoch": 1.1063613231552163, "grad_norm": 7.851396882243399, "learning_rate": 9.558663230446803e-06, "loss": 0.2798, "step": 54350 }, { "epoch": 1.1065648854961831, "grad_norm": 22.467833991898765, "learning_rate": 9.55837129428093e-06, "loss": 0.2869, "step": 54360 }, { "epoch": 1.1067684478371502, "grad_norm": 12.581485846016554, "learning_rate": 9.558079266052878e-06, "loss": 0.3225, "step": 54370 }, { "epoch": 1.106972010178117, "grad_norm": 1.865129245825506, "learning_rate": 9.557787145768544e-06, "loss": 0.2042, "step": 54380 }, { "epoch": 1.107175572519084, "grad_norm": 9.180380624509427, "learning_rate": 9.55749493343383e-06, "loss": 0.2902, "step": 54390 }, { "epoch": 1.1073791348600508, "grad_norm": 4.478558239092904, "learning_rate": 9.557202629054633e-06, "loss": 0.2809, "step": 54400 }, { "epoch": 1.1075826972010179, "grad_norm": 3.6844688665380763, "learning_rate": 9.556910232636863e-06, "loss": 0.2408, "step": 54410 }, { "epoch": 1.1077862595419847, "grad_norm": 8.502930594099734, "learning_rate": 9.556617744186418e-06, "loss": 0.3039, "step": 54420 }, { "epoch": 1.1079898218829516, "grad_norm": 11.068717488176532, "learning_rate": 9.55632516370921e-06, "loss": 0.2231, "step": 54430 }, { "epoch": 1.1081933842239187, "grad_norm": 9.522221281360153, "learning_rate": 9.556032491211147e-06, "loss": 0.206, "step": 54440 }, { "epoch": 1.1083969465648855, "grad_norm": 1.8902424227321237, "learning_rate": 9.555739726698142e-06, "loss": 0.2064, "step": 54450 }, { "epoch": 1.1086005089058524, "grad_norm": 0.2644474771598765, "learning_rate": 9.555446870176102e-06, "loss": 0.1948, "step": 54460 }, { "epoch": 1.1088040712468192, "grad_norm": 7.794738984886269, "learning_rate": 9.555153921650945e-06, "loss": 0.2883, "step": 54470 }, { "epoch": 1.1090076335877863, "grad_norm": 20.32872378663616, "learning_rate": 9.554860881128591e-06, "loss": 0.2436, "step": 54480 }, { "epoch": 1.1092111959287532, "grad_norm": 2.9383774031088925, "learning_rate": 9.554567748614952e-06, "loss": 0.2653, "step": 54490 }, { "epoch": 1.10941475826972, "grad_norm": 8.459653757162725, "learning_rate": 9.554274524115953e-06, "loss": 0.2017, "step": 54500 }, { "epoch": 1.109618320610687, "grad_norm": 12.01626240244707, "learning_rate": 9.553981207637513e-06, "loss": 0.3878, "step": 54510 }, { "epoch": 1.109821882951654, "grad_norm": 35.7439333570665, "learning_rate": 9.553687799185556e-06, "loss": 0.3227, "step": 54520 }, { "epoch": 1.1100254452926208, "grad_norm": 14.30043204918881, "learning_rate": 9.55339429876601e-06, "loss": 0.2874, "step": 54530 }, { "epoch": 1.110229007633588, "grad_norm": 7.660667686367615, "learning_rate": 9.553100706384802e-06, "loss": 0.2245, "step": 54540 }, { "epoch": 1.1104325699745548, "grad_norm": 7.9012174615428, "learning_rate": 9.55280702204786e-06, "loss": 0.2365, "step": 54550 }, { "epoch": 1.1106361323155216, "grad_norm": 15.726160788938438, "learning_rate": 9.552513245761115e-06, "loss": 0.2094, "step": 54560 }, { "epoch": 1.1108396946564885, "grad_norm": 6.601502108809474, "learning_rate": 9.552219377530504e-06, "loss": 0.221, "step": 54570 }, { "epoch": 1.1110432569974555, "grad_norm": 13.270669406533216, "learning_rate": 9.551925417361959e-06, "loss": 0.2594, "step": 54580 }, { "epoch": 1.1112468193384224, "grad_norm": 12.98749288006792, "learning_rate": 9.551631365261415e-06, "loss": 0.3435, "step": 54590 }, { "epoch": 1.1114503816793893, "grad_norm": 25.125463436945047, "learning_rate": 9.551337221234815e-06, "loss": 0.2505, "step": 54600 }, { "epoch": 1.1116539440203563, "grad_norm": 7.354637278487519, "learning_rate": 9.551042985288096e-06, "loss": 0.188, "step": 54610 }, { "epoch": 1.1118575063613232, "grad_norm": 11.178552965647633, "learning_rate": 9.550748657427205e-06, "loss": 0.2466, "step": 54620 }, { "epoch": 1.11206106870229, "grad_norm": 10.197147128118708, "learning_rate": 9.550454237658082e-06, "loss": 0.1848, "step": 54630 }, { "epoch": 1.112264631043257, "grad_norm": 19.832804202875693, "learning_rate": 9.550159725986676e-06, "loss": 0.2483, "step": 54640 }, { "epoch": 1.112468193384224, "grad_norm": 7.245269981244433, "learning_rate": 9.549865122418932e-06, "loss": 0.1903, "step": 54650 }, { "epoch": 1.1126717557251908, "grad_norm": 10.358262266255773, "learning_rate": 9.549570426960803e-06, "loss": 0.1944, "step": 54660 }, { "epoch": 1.1128753180661577, "grad_norm": 14.73462293942341, "learning_rate": 9.549275639618238e-06, "loss": 0.2161, "step": 54670 }, { "epoch": 1.1130788804071248, "grad_norm": 7.732507527880319, "learning_rate": 9.548980760397192e-06, "loss": 0.2439, "step": 54680 }, { "epoch": 1.1132824427480916, "grad_norm": 12.66977037775172, "learning_rate": 9.548685789303623e-06, "loss": 0.2799, "step": 54690 }, { "epoch": 1.1134860050890585, "grad_norm": 13.152980669378266, "learning_rate": 9.548390726343484e-06, "loss": 0.2763, "step": 54700 }, { "epoch": 1.1136895674300256, "grad_norm": 9.705287366187179, "learning_rate": 9.548095571522736e-06, "loss": 0.1944, "step": 54710 }, { "epoch": 1.1138931297709924, "grad_norm": 5.392601125743354, "learning_rate": 9.54780032484734e-06, "loss": 0.2452, "step": 54720 }, { "epoch": 1.1140966921119593, "grad_norm": 23.872145342733553, "learning_rate": 9.547504986323259e-06, "loss": 0.24, "step": 54730 }, { "epoch": 1.1143002544529261, "grad_norm": 22.83608961220777, "learning_rate": 9.547209555956458e-06, "loss": 0.2584, "step": 54740 }, { "epoch": 1.1145038167938932, "grad_norm": 21.197141000966607, "learning_rate": 9.546914033752903e-06, "loss": 0.2279, "step": 54750 }, { "epoch": 1.11470737913486, "grad_norm": 0.2280776970925518, "learning_rate": 9.546618419718563e-06, "loss": 0.1991, "step": 54760 }, { "epoch": 1.114910941475827, "grad_norm": 6.561601959120619, "learning_rate": 9.546322713859407e-06, "loss": 0.275, "step": 54770 }, { "epoch": 1.1151145038167938, "grad_norm": 19.862180992207005, "learning_rate": 9.54602691618141e-06, "loss": 0.2741, "step": 54780 }, { "epoch": 1.1153180661577609, "grad_norm": 9.161281132925362, "learning_rate": 9.545731026690542e-06, "loss": 0.3196, "step": 54790 }, { "epoch": 1.1155216284987277, "grad_norm": 13.137597323763838, "learning_rate": 9.545435045392783e-06, "loss": 0.1618, "step": 54800 }, { "epoch": 1.1157251908396946, "grad_norm": 15.805150409318793, "learning_rate": 9.545138972294107e-06, "loss": 0.2754, "step": 54810 }, { "epoch": 1.1159287531806616, "grad_norm": 9.072605675797968, "learning_rate": 9.544842807400496e-06, "loss": 0.3378, "step": 54820 }, { "epoch": 1.1161323155216285, "grad_norm": 12.687299237612846, "learning_rate": 9.544546550717931e-06, "loss": 0.1627, "step": 54830 }, { "epoch": 1.1163358778625954, "grad_norm": 6.21251215553615, "learning_rate": 9.544250202252397e-06, "loss": 0.2286, "step": 54840 }, { "epoch": 1.1165394402035624, "grad_norm": 10.572068490416031, "learning_rate": 9.543953762009874e-06, "loss": 0.2783, "step": 54850 }, { "epoch": 1.1167430025445293, "grad_norm": 11.13377991091508, "learning_rate": 9.543657229996354e-06, "loss": 0.2464, "step": 54860 }, { "epoch": 1.1169465648854962, "grad_norm": 10.690831358540215, "learning_rate": 9.543360606217824e-06, "loss": 0.2617, "step": 54870 }, { "epoch": 1.117150127226463, "grad_norm": 12.43612036598035, "learning_rate": 9.543063890680273e-06, "loss": 0.2944, "step": 54880 }, { "epoch": 1.11735368956743, "grad_norm": 6.930965704606453, "learning_rate": 9.542767083389697e-06, "loss": 0.2341, "step": 54890 }, { "epoch": 1.117557251908397, "grad_norm": 12.593695387922423, "learning_rate": 9.542470184352088e-06, "loss": 0.242, "step": 54900 }, { "epoch": 1.1177608142493638, "grad_norm": 3.5895701323501217, "learning_rate": 9.542173193573443e-06, "loss": 0.1967, "step": 54910 }, { "epoch": 1.1179643765903309, "grad_norm": 3.590976410672192, "learning_rate": 9.541876111059762e-06, "loss": 0.2152, "step": 54920 }, { "epoch": 1.1181679389312977, "grad_norm": 8.866146171866891, "learning_rate": 9.541578936817042e-06, "loss": 0.1981, "step": 54930 }, { "epoch": 1.1183715012722646, "grad_norm": 7.065077461965738, "learning_rate": 9.541281670851284e-06, "loss": 0.3047, "step": 54940 }, { "epoch": 1.1185750636132314, "grad_norm": 5.990449992571995, "learning_rate": 9.540984313168496e-06, "loss": 0.2002, "step": 54950 }, { "epoch": 1.1187786259541985, "grad_norm": 9.86447632427466, "learning_rate": 9.540686863774679e-06, "loss": 0.1897, "step": 54960 }, { "epoch": 1.1189821882951654, "grad_norm": 7.36495819348474, "learning_rate": 9.540389322675844e-06, "loss": 0.2383, "step": 54970 }, { "epoch": 1.1191857506361322, "grad_norm": 6.984290152200498, "learning_rate": 9.540091689877997e-06, "loss": 0.3427, "step": 54980 }, { "epoch": 1.1193893129770993, "grad_norm": 8.771111030946889, "learning_rate": 9.539793965387152e-06, "loss": 0.2107, "step": 54990 }, { "epoch": 1.1195928753180662, "grad_norm": 14.833105323277543, "learning_rate": 9.53949614920932e-06, "loss": 0.2313, "step": 55000 }, { "epoch": 1.119796437659033, "grad_norm": 16.868319064827347, "learning_rate": 9.539198241350518e-06, "loss": 0.1819, "step": 55010 }, { "epoch": 1.12, "grad_norm": 12.874542082903218, "learning_rate": 9.53890024181676e-06, "loss": 0.2333, "step": 55020 }, { "epoch": 1.120203562340967, "grad_norm": 1.6155215838502062, "learning_rate": 9.538602150614065e-06, "loss": 0.1271, "step": 55030 }, { "epoch": 1.1204071246819338, "grad_norm": 29.859251473948508, "learning_rate": 9.538303967748454e-06, "loss": 0.2155, "step": 55040 }, { "epoch": 1.1206106870229007, "grad_norm": 8.627825766870803, "learning_rate": 9.53800569322595e-06, "loss": 0.2262, "step": 55050 }, { "epoch": 1.1208142493638678, "grad_norm": 12.88710319851362, "learning_rate": 9.537707327052575e-06, "loss": 0.2138, "step": 55060 }, { "epoch": 1.1210178117048346, "grad_norm": 6.558606298970283, "learning_rate": 9.537408869234356e-06, "loss": 0.2614, "step": 55070 }, { "epoch": 1.1212213740458015, "grad_norm": 22.584876957565182, "learning_rate": 9.537110319777323e-06, "loss": 0.3176, "step": 55080 }, { "epoch": 1.1214249363867685, "grad_norm": 6.920706714686106, "learning_rate": 9.5368116786875e-06, "loss": 0.2568, "step": 55090 }, { "epoch": 1.1216284987277354, "grad_norm": 3.292262656858662, "learning_rate": 9.536512945970924e-06, "loss": 0.2379, "step": 55100 }, { "epoch": 1.1218320610687023, "grad_norm": 4.70449691486369, "learning_rate": 9.536214121633627e-06, "loss": 0.2246, "step": 55110 }, { "epoch": 1.1220356234096691, "grad_norm": 6.64741772061776, "learning_rate": 9.53591520568164e-06, "loss": 0.3458, "step": 55120 }, { "epoch": 1.1222391857506362, "grad_norm": 1.4546985925028542, "learning_rate": 9.535616198121004e-06, "loss": 0.2095, "step": 55130 }, { "epoch": 1.122442748091603, "grad_norm": 5.7674334614574985, "learning_rate": 9.53531709895776e-06, "loss": 0.2839, "step": 55140 }, { "epoch": 1.12264631043257, "grad_norm": 10.970420985503015, "learning_rate": 9.535017908197943e-06, "loss": 0.2772, "step": 55150 }, { "epoch": 1.122849872773537, "grad_norm": 2.9352737022711484, "learning_rate": 9.5347186258476e-06, "loss": 0.2114, "step": 55160 }, { "epoch": 1.1230534351145038, "grad_norm": 14.052912513006254, "learning_rate": 9.534419251912772e-06, "loss": 0.1846, "step": 55170 }, { "epoch": 1.1232569974554707, "grad_norm": 11.930547884736159, "learning_rate": 9.534119786399508e-06, "loss": 0.2869, "step": 55180 }, { "epoch": 1.1234605597964378, "grad_norm": 14.054734172325489, "learning_rate": 9.533820229313853e-06, "loss": 0.2673, "step": 55190 }, { "epoch": 1.1236641221374046, "grad_norm": 24.400117243690698, "learning_rate": 9.533520580661862e-06, "loss": 0.2571, "step": 55200 }, { "epoch": 1.1238676844783715, "grad_norm": 6.7034270954524064, "learning_rate": 9.533220840449582e-06, "loss": 0.2601, "step": 55210 }, { "epoch": 1.1240712468193383, "grad_norm": 26.539962206211793, "learning_rate": 9.53292100868307e-06, "loss": 0.2482, "step": 55220 }, { "epoch": 1.1242748091603054, "grad_norm": 15.353619445535667, "learning_rate": 9.532621085368376e-06, "loss": 0.2576, "step": 55230 }, { "epoch": 1.1244783715012723, "grad_norm": 9.350141271012527, "learning_rate": 9.532321070511565e-06, "loss": 0.2595, "step": 55240 }, { "epoch": 1.1246819338422391, "grad_norm": 24.52752200367005, "learning_rate": 9.532020964118692e-06, "loss": 0.2616, "step": 55250 }, { "epoch": 1.124885496183206, "grad_norm": 5.278780115887106, "learning_rate": 9.531720766195816e-06, "loss": 0.2227, "step": 55260 }, { "epoch": 1.125089058524173, "grad_norm": 5.0410930709438135, "learning_rate": 9.531420476749004e-06, "loss": 0.2468, "step": 55270 }, { "epoch": 1.12529262086514, "grad_norm": 4.530432299700876, "learning_rate": 9.53112009578432e-06, "loss": 0.27, "step": 55280 }, { "epoch": 1.1254961832061068, "grad_norm": 5.794965505740069, "learning_rate": 9.530819623307828e-06, "loss": 0.336, "step": 55290 }, { "epoch": 1.1256997455470739, "grad_norm": 9.116779579539624, "learning_rate": 9.530519059325598e-06, "loss": 0.2124, "step": 55300 }, { "epoch": 1.1259033078880407, "grad_norm": 4.658908046740052, "learning_rate": 9.530218403843701e-06, "loss": 0.2541, "step": 55310 }, { "epoch": 1.1261068702290076, "grad_norm": 10.56381565709754, "learning_rate": 9.529917656868208e-06, "loss": 0.3123, "step": 55320 }, { "epoch": 1.1263104325699747, "grad_norm": 10.274589955220534, "learning_rate": 9.529616818405193e-06, "loss": 0.1728, "step": 55330 }, { "epoch": 1.1265139949109415, "grad_norm": 10.237509153999929, "learning_rate": 9.529315888460736e-06, "loss": 0.3265, "step": 55340 }, { "epoch": 1.1267175572519084, "grad_norm": 4.461943504876569, "learning_rate": 9.529014867040907e-06, "loss": 0.1827, "step": 55350 }, { "epoch": 1.1269211195928754, "grad_norm": 6.715174143096505, "learning_rate": 9.52871375415179e-06, "loss": 0.2036, "step": 55360 }, { "epoch": 1.1271246819338423, "grad_norm": 12.062454420083023, "learning_rate": 9.528412549799467e-06, "loss": 0.2628, "step": 55370 }, { "epoch": 1.1273282442748092, "grad_norm": 5.811240249399602, "learning_rate": 9.528111253990018e-06, "loss": 0.1987, "step": 55380 }, { "epoch": 1.127531806615776, "grad_norm": 7.033204029513491, "learning_rate": 9.527809866729531e-06, "loss": 0.3048, "step": 55390 }, { "epoch": 1.127735368956743, "grad_norm": 13.560600456227577, "learning_rate": 9.527508388024094e-06, "loss": 0.2749, "step": 55400 }, { "epoch": 1.12793893129771, "grad_norm": 8.123177172043688, "learning_rate": 9.52720681787979e-06, "loss": 0.2257, "step": 55410 }, { "epoch": 1.1281424936386768, "grad_norm": 9.693801071869549, "learning_rate": 9.526905156302718e-06, "loss": 0.2261, "step": 55420 }, { "epoch": 1.1283460559796437, "grad_norm": 9.916293283365468, "learning_rate": 9.526603403298962e-06, "loss": 0.223, "step": 55430 }, { "epoch": 1.1285496183206107, "grad_norm": 12.022801997164835, "learning_rate": 9.52630155887462e-06, "loss": 0.1443, "step": 55440 }, { "epoch": 1.1287531806615776, "grad_norm": 5.787280869071517, "learning_rate": 9.52599962303579e-06, "loss": 0.227, "step": 55450 }, { "epoch": 1.1289567430025444, "grad_norm": 5.627050992408545, "learning_rate": 9.525697595788568e-06, "loss": 0.1655, "step": 55460 }, { "epoch": 1.1291603053435115, "grad_norm": 5.284990636526007, "learning_rate": 9.525395477139053e-06, "loss": 0.1659, "step": 55470 }, { "epoch": 1.1293638676844784, "grad_norm": 11.258645413499956, "learning_rate": 9.525093267093348e-06, "loss": 0.2134, "step": 55480 }, { "epoch": 1.1295674300254452, "grad_norm": 5.449345579623506, "learning_rate": 9.524790965657557e-06, "loss": 0.2634, "step": 55490 }, { "epoch": 1.1297709923664123, "grad_norm": 4.406095914694358, "learning_rate": 9.524488572837783e-06, "loss": 0.2752, "step": 55500 }, { "epoch": 1.1299745547073792, "grad_norm": 39.78696018561592, "learning_rate": 9.524186088640136e-06, "loss": 0.328, "step": 55510 }, { "epoch": 1.130178117048346, "grad_norm": 4.8779927169793735, "learning_rate": 9.523883513070725e-06, "loss": 0.3024, "step": 55520 }, { "epoch": 1.1303816793893129, "grad_norm": 14.515878409098866, "learning_rate": 9.523580846135657e-06, "loss": 0.1924, "step": 55530 }, { "epoch": 1.13058524173028, "grad_norm": 13.516030925584769, "learning_rate": 9.523278087841048e-06, "loss": 0.2621, "step": 55540 }, { "epoch": 1.1307888040712468, "grad_norm": 22.510264236561273, "learning_rate": 9.522975238193014e-06, "loss": 0.2741, "step": 55550 }, { "epoch": 1.1309923664122137, "grad_norm": 4.990459898487582, "learning_rate": 9.522672297197669e-06, "loss": 0.2146, "step": 55560 }, { "epoch": 1.1311959287531808, "grad_norm": 4.659817384723108, "learning_rate": 9.52236926486113e-06, "loss": 0.2115, "step": 55570 }, { "epoch": 1.1313994910941476, "grad_norm": 13.126439908198368, "learning_rate": 9.52206614118952e-06, "loss": 0.3063, "step": 55580 }, { "epoch": 1.1316030534351145, "grad_norm": 10.874780825124956, "learning_rate": 9.521762926188959e-06, "loss": 0.23, "step": 55590 }, { "epoch": 1.1318066157760813, "grad_norm": 18.409233854096534, "learning_rate": 9.521459619865573e-06, "loss": 0.2163, "step": 55600 }, { "epoch": 1.1320101781170484, "grad_norm": 5.945885371320411, "learning_rate": 9.521156222225488e-06, "loss": 0.1894, "step": 55610 }, { "epoch": 1.1322137404580153, "grad_norm": 6.16979418250559, "learning_rate": 9.520852733274826e-06, "loss": 0.2066, "step": 55620 }, { "epoch": 1.1324173027989821, "grad_norm": 6.088771954247253, "learning_rate": 9.520549153019723e-06, "loss": 0.1937, "step": 55630 }, { "epoch": 1.1326208651399492, "grad_norm": 6.554024915664978, "learning_rate": 9.520245481466306e-06, "loss": 0.2291, "step": 55640 }, { "epoch": 1.132824427480916, "grad_norm": 4.518540501694849, "learning_rate": 9.519941718620712e-06, "loss": 0.2089, "step": 55650 }, { "epoch": 1.133027989821883, "grad_norm": 9.38725460298269, "learning_rate": 9.51963786448907e-06, "loss": 0.2478, "step": 55660 }, { "epoch": 1.13323155216285, "grad_norm": 12.75291600112378, "learning_rate": 9.519333919077522e-06, "loss": 0.2429, "step": 55670 }, { "epoch": 1.1334351145038168, "grad_norm": 21.045671953356663, "learning_rate": 9.519029882392203e-06, "loss": 0.2835, "step": 55680 }, { "epoch": 1.1336386768447837, "grad_norm": 21.186595347787872, "learning_rate": 9.518725754439257e-06, "loss": 0.2208, "step": 55690 }, { "epoch": 1.1338422391857506, "grad_norm": 13.313105963857566, "learning_rate": 9.518421535224823e-06, "loss": 0.2728, "step": 55700 }, { "epoch": 1.1340458015267176, "grad_norm": 4.433499723079105, "learning_rate": 9.518117224755047e-06, "loss": 0.208, "step": 55710 }, { "epoch": 1.1342493638676845, "grad_norm": 4.125054848043678, "learning_rate": 9.517812823036074e-06, "loss": 0.2285, "step": 55720 }, { "epoch": 1.1344529262086513, "grad_norm": 16.38871638891276, "learning_rate": 9.517508330074052e-06, "loss": 0.2935, "step": 55730 }, { "epoch": 1.1346564885496182, "grad_norm": 18.266138494681655, "learning_rate": 9.517203745875132e-06, "loss": 0.2194, "step": 55740 }, { "epoch": 1.1348600508905853, "grad_norm": 11.968834612536638, "learning_rate": 9.516899070445463e-06, "loss": 0.1723, "step": 55750 }, { "epoch": 1.1350636132315521, "grad_norm": 8.274011211511326, "learning_rate": 9.5165943037912e-06, "loss": 0.2966, "step": 55760 }, { "epoch": 1.135267175572519, "grad_norm": 20.208487353585593, "learning_rate": 9.516289445918498e-06, "loss": 0.2197, "step": 55770 }, { "epoch": 1.135470737913486, "grad_norm": 9.30053813024552, "learning_rate": 9.515984496833512e-06, "loss": 0.1992, "step": 55780 }, { "epoch": 1.135674300254453, "grad_norm": 9.377885563201568, "learning_rate": 9.515679456542405e-06, "loss": 0.2543, "step": 55790 }, { "epoch": 1.1358778625954198, "grad_norm": 8.371304794989605, "learning_rate": 9.515374325051334e-06, "loss": 0.2346, "step": 55800 }, { "epoch": 1.1360814249363869, "grad_norm": 4.236416848468356, "learning_rate": 9.515069102366463e-06, "loss": 0.2198, "step": 55810 }, { "epoch": 1.1362849872773537, "grad_norm": 12.058726698788265, "learning_rate": 9.514763788493956e-06, "loss": 0.3365, "step": 55820 }, { "epoch": 1.1364885496183206, "grad_norm": 2.523651329980588, "learning_rate": 9.514458383439979e-06, "loss": 0.1762, "step": 55830 }, { "epoch": 1.1366921119592877, "grad_norm": 5.427213755650828, "learning_rate": 9.514152887210701e-06, "loss": 0.1902, "step": 55840 }, { "epoch": 1.1368956743002545, "grad_norm": 3.7477693855405247, "learning_rate": 9.513847299812291e-06, "loss": 0.2876, "step": 55850 }, { "epoch": 1.1370992366412214, "grad_norm": 20.31511850304435, "learning_rate": 9.513541621250921e-06, "loss": 0.2668, "step": 55860 }, { "epoch": 1.1373027989821882, "grad_norm": 12.333823168346216, "learning_rate": 9.513235851532765e-06, "loss": 0.2475, "step": 55870 }, { "epoch": 1.1375063613231553, "grad_norm": 2.918512527261535, "learning_rate": 9.512929990663996e-06, "loss": 0.282, "step": 55880 }, { "epoch": 1.1377099236641222, "grad_norm": 12.18036699406871, "learning_rate": 9.512624038650794e-06, "loss": 0.2194, "step": 55890 }, { "epoch": 1.137913486005089, "grad_norm": 15.019786464836137, "learning_rate": 9.512317995499341e-06, "loss": 0.2335, "step": 55900 }, { "epoch": 1.1381170483460559, "grad_norm": 4.777255745196414, "learning_rate": 9.512011861215809e-06, "loss": 0.1568, "step": 55910 }, { "epoch": 1.138320610687023, "grad_norm": 5.27223521374841, "learning_rate": 9.511705635806389e-06, "loss": 0.2791, "step": 55920 }, { "epoch": 1.1385241730279898, "grad_norm": 16.688140316393493, "learning_rate": 9.511399319277261e-06, "loss": 0.2791, "step": 55930 }, { "epoch": 1.1387277353689567, "grad_norm": 12.503137833311847, "learning_rate": 9.511092911634615e-06, "loss": 0.2249, "step": 55940 }, { "epoch": 1.1389312977099237, "grad_norm": 8.229448241261485, "learning_rate": 9.510786412884637e-06, "loss": 0.2476, "step": 55950 }, { "epoch": 1.1391348600508906, "grad_norm": 4.310078926332958, "learning_rate": 9.510479823033516e-06, "loss": 0.2221, "step": 55960 }, { "epoch": 1.1393384223918575, "grad_norm": 15.632475791191881, "learning_rate": 9.510173142087446e-06, "loss": 0.2384, "step": 55970 }, { "epoch": 1.1395419847328245, "grad_norm": 11.829750302257725, "learning_rate": 9.509866370052624e-06, "loss": 0.2597, "step": 55980 }, { "epoch": 1.1397455470737914, "grad_norm": 9.025778239309101, "learning_rate": 9.509559506935237e-06, "loss": 0.1956, "step": 55990 }, { "epoch": 1.1399491094147582, "grad_norm": 8.403545900624955, "learning_rate": 9.50925255274149e-06, "loss": 0.3237, "step": 56000 }, { "epoch": 1.140152671755725, "grad_norm": 3.9760626596168485, "learning_rate": 9.508945507477579e-06, "loss": 0.2765, "step": 56010 }, { "epoch": 1.1403562340966922, "grad_norm": 13.062744673264545, "learning_rate": 9.508638371149705e-06, "loss": 0.2179, "step": 56020 }, { "epoch": 1.140559796437659, "grad_norm": 7.9796104221341455, "learning_rate": 9.508331143764073e-06, "loss": 0.3174, "step": 56030 }, { "epoch": 1.140763358778626, "grad_norm": 12.209586120073048, "learning_rate": 9.508023825326888e-06, "loss": 0.2153, "step": 56040 }, { "epoch": 1.140966921119593, "grad_norm": 12.24478542163533, "learning_rate": 9.507716415844355e-06, "loss": 0.2281, "step": 56050 }, { "epoch": 1.1411704834605598, "grad_norm": 23.9081792377919, "learning_rate": 9.507408915322682e-06, "loss": 0.1889, "step": 56060 }, { "epoch": 1.1413740458015267, "grad_norm": 20.420834091525847, "learning_rate": 9.507101323768082e-06, "loss": 0.247, "step": 56070 }, { "epoch": 1.1415776081424935, "grad_norm": 22.674984593398275, "learning_rate": 9.506793641186764e-06, "loss": 0.2716, "step": 56080 }, { "epoch": 1.1417811704834606, "grad_norm": 8.216527332971673, "learning_rate": 9.506485867584945e-06, "loss": 0.2671, "step": 56090 }, { "epoch": 1.1419847328244275, "grad_norm": 18.08809290354991, "learning_rate": 9.506178002968839e-06, "loss": 0.2617, "step": 56100 }, { "epoch": 1.1421882951653943, "grad_norm": 19.52051357166804, "learning_rate": 9.505870047344664e-06, "loss": 0.2, "step": 56110 }, { "epoch": 1.1423918575063614, "grad_norm": 10.695967450318575, "learning_rate": 9.50556200071864e-06, "loss": 0.2692, "step": 56120 }, { "epoch": 1.1425954198473283, "grad_norm": 10.55452021442166, "learning_rate": 9.50525386309699e-06, "loss": 0.2427, "step": 56130 }, { "epoch": 1.1427989821882951, "grad_norm": 6.961312981732791, "learning_rate": 9.504945634485934e-06, "loss": 0.3706, "step": 56140 }, { "epoch": 1.1430025445292622, "grad_norm": 5.179805977127208, "learning_rate": 9.504637314891698e-06, "loss": 0.2925, "step": 56150 }, { "epoch": 1.143206106870229, "grad_norm": 16.293953366594366, "learning_rate": 9.50432890432051e-06, "loss": 0.1525, "step": 56160 }, { "epoch": 1.143409669211196, "grad_norm": 3.3268066714939413, "learning_rate": 9.504020402778599e-06, "loss": 0.2627, "step": 56170 }, { "epoch": 1.1436132315521628, "grad_norm": 14.03031578174708, "learning_rate": 9.503711810272194e-06, "loss": 0.21, "step": 56180 }, { "epoch": 1.1438167938931298, "grad_norm": 16.313812636738206, "learning_rate": 9.503403126807528e-06, "loss": 0.2703, "step": 56190 }, { "epoch": 1.1440203562340967, "grad_norm": 2.7937420755014544, "learning_rate": 9.503094352390837e-06, "loss": 0.2685, "step": 56200 }, { "epoch": 1.1442239185750636, "grad_norm": 8.255106380910323, "learning_rate": 9.502785487028355e-06, "loss": 0.2743, "step": 56210 }, { "epoch": 1.1444274809160304, "grad_norm": 7.372001487231208, "learning_rate": 9.502476530726318e-06, "loss": 0.1846, "step": 56220 }, { "epoch": 1.1446310432569975, "grad_norm": 19.684093340460915, "learning_rate": 9.502167483490972e-06, "loss": 0.212, "step": 56230 }, { "epoch": 1.1448346055979643, "grad_norm": 55.360858941465345, "learning_rate": 9.501858345328553e-06, "loss": 0.3575, "step": 56240 }, { "epoch": 1.1450381679389312, "grad_norm": 2.501955492953478, "learning_rate": 9.501549116245307e-06, "loss": 0.2758, "step": 56250 }, { "epoch": 1.1452417302798983, "grad_norm": 3.427774564724657, "learning_rate": 9.501239796247479e-06, "loss": 0.1403, "step": 56260 }, { "epoch": 1.1454452926208651, "grad_norm": 2.013728967843908, "learning_rate": 9.500930385341315e-06, "loss": 0.1788, "step": 56270 }, { "epoch": 1.145648854961832, "grad_norm": 18.06080635939958, "learning_rate": 9.500620883533066e-06, "loss": 0.2965, "step": 56280 }, { "epoch": 1.145852417302799, "grad_norm": 5.705607789057748, "learning_rate": 9.500311290828979e-06, "loss": 0.2722, "step": 56290 }, { "epoch": 1.146055979643766, "grad_norm": 16.194236305660777, "learning_rate": 9.50000160723531e-06, "loss": 0.3057, "step": 56300 }, { "epoch": 1.1462595419847328, "grad_norm": 7.154026749449539, "learning_rate": 9.499691832758313e-06, "loss": 0.2753, "step": 56310 }, { "epoch": 1.1464631043256999, "grad_norm": 12.468419106744049, "learning_rate": 9.499381967404244e-06, "loss": 0.2709, "step": 56320 }, { "epoch": 1.1466666666666667, "grad_norm": 24.59058209115142, "learning_rate": 9.499072011179361e-06, "loss": 0.4032, "step": 56330 }, { "epoch": 1.1468702290076336, "grad_norm": 3.2311417559022093, "learning_rate": 9.498761964089924e-06, "loss": 0.2439, "step": 56340 }, { "epoch": 1.1470737913486004, "grad_norm": 14.575732569265885, "learning_rate": 9.498451826142196e-06, "loss": 0.2252, "step": 56350 }, { "epoch": 1.1472773536895675, "grad_norm": 17.76042363298508, "learning_rate": 9.498141597342437e-06, "loss": 0.3091, "step": 56360 }, { "epoch": 1.1474809160305344, "grad_norm": 9.265500581247315, "learning_rate": 9.497831277696916e-06, "loss": 0.1719, "step": 56370 }, { "epoch": 1.1476844783715012, "grad_norm": 11.273259822962048, "learning_rate": 9.4975208672119e-06, "loss": 0.2717, "step": 56380 }, { "epoch": 1.147888040712468, "grad_norm": 11.814193852779638, "learning_rate": 9.497210365893656e-06, "loss": 0.248, "step": 56390 }, { "epoch": 1.1480916030534352, "grad_norm": 2.5458110776099367, "learning_rate": 9.496899773748457e-06, "loss": 0.3124, "step": 56400 }, { "epoch": 1.148295165394402, "grad_norm": 16.8624443888023, "learning_rate": 9.496589090782575e-06, "loss": 0.2668, "step": 56410 }, { "epoch": 1.1484987277353689, "grad_norm": 11.658335709645161, "learning_rate": 9.496278317002284e-06, "loss": 0.3452, "step": 56420 }, { "epoch": 1.148702290076336, "grad_norm": 10.364310078683612, "learning_rate": 9.495967452413863e-06, "loss": 0.2119, "step": 56430 }, { "epoch": 1.1489058524173028, "grad_norm": 12.002679650673034, "learning_rate": 9.495656497023586e-06, "loss": 0.2229, "step": 56440 }, { "epoch": 1.1491094147582697, "grad_norm": 2.6313825244153284, "learning_rate": 9.495345450837738e-06, "loss": 0.2451, "step": 56450 }, { "epoch": 1.1493129770992367, "grad_norm": 13.636702454867578, "learning_rate": 9.495034313862598e-06, "loss": 0.2097, "step": 56460 }, { "epoch": 1.1495165394402036, "grad_norm": 3.58215149381893, "learning_rate": 9.49472308610445e-06, "loss": 0.1652, "step": 56470 }, { "epoch": 1.1497201017811705, "grad_norm": 2.8277319325235046, "learning_rate": 9.49441176756958e-06, "loss": 0.1686, "step": 56480 }, { "epoch": 1.1499236641221373, "grad_norm": 14.865030421552975, "learning_rate": 9.494100358264277e-06, "loss": 0.2804, "step": 56490 }, { "epoch": 1.1501272264631044, "grad_norm": 5.3096817338740285, "learning_rate": 9.493788858194828e-06, "loss": 0.1331, "step": 56500 }, { "epoch": 1.1503307888040712, "grad_norm": 19.725736231164294, "learning_rate": 9.493477267367524e-06, "loss": 0.2344, "step": 56510 }, { "epoch": 1.150534351145038, "grad_norm": 16.146685529234517, "learning_rate": 9.493165585788659e-06, "loss": 0.2927, "step": 56520 }, { "epoch": 1.1507379134860052, "grad_norm": 11.733203383654788, "learning_rate": 9.49285381346453e-06, "loss": 0.3074, "step": 56530 }, { "epoch": 1.150941475826972, "grad_norm": 6.271366226228611, "learning_rate": 9.492541950401431e-06, "loss": 0.1946, "step": 56540 }, { "epoch": 1.151145038167939, "grad_norm": 12.866707350386909, "learning_rate": 9.49222999660566e-06, "loss": 0.1996, "step": 56550 }, { "epoch": 1.1513486005089058, "grad_norm": 11.788665026501086, "learning_rate": 9.491917952083518e-06, "loss": 0.2517, "step": 56560 }, { "epoch": 1.1515521628498728, "grad_norm": 9.600590607849975, "learning_rate": 9.491605816841308e-06, "loss": 0.2064, "step": 56570 }, { "epoch": 1.1517557251908397, "grad_norm": 6.695127039323345, "learning_rate": 9.491293590885333e-06, "loss": 0.3283, "step": 56580 }, { "epoch": 1.1519592875318065, "grad_norm": 6.742941380949785, "learning_rate": 9.4909812742219e-06, "loss": 0.2718, "step": 56590 }, { "epoch": 1.1521628498727736, "grad_norm": 9.41820997736917, "learning_rate": 9.490668866857316e-06, "loss": 0.3166, "step": 56600 }, { "epoch": 1.1523664122137405, "grad_norm": 5.985921282786836, "learning_rate": 9.490356368797889e-06, "loss": 0.1917, "step": 56610 }, { "epoch": 1.1525699745547073, "grad_norm": 11.394321493294065, "learning_rate": 9.490043780049932e-06, "loss": 0.2565, "step": 56620 }, { "epoch": 1.1527735368956744, "grad_norm": 7.628002159167968, "learning_rate": 9.489731100619758e-06, "loss": 0.2328, "step": 56630 }, { "epoch": 1.1529770992366413, "grad_norm": 9.363039897486798, "learning_rate": 9.489418330513683e-06, "loss": 0.2383, "step": 56640 }, { "epoch": 1.1531806615776081, "grad_norm": 10.45417893674277, "learning_rate": 9.48910546973802e-06, "loss": 0.2068, "step": 56650 }, { "epoch": 1.153384223918575, "grad_norm": 9.514435632080856, "learning_rate": 9.48879251829909e-06, "loss": 0.2195, "step": 56660 }, { "epoch": 1.153587786259542, "grad_norm": 11.429105103967833, "learning_rate": 9.488479476203215e-06, "loss": 0.2335, "step": 56670 }, { "epoch": 1.153791348600509, "grad_norm": 13.250175577259599, "learning_rate": 9.488166343456717e-06, "loss": 0.1536, "step": 56680 }, { "epoch": 1.1539949109414758, "grad_norm": 9.478400019037828, "learning_rate": 9.487853120065918e-06, "loss": 0.2649, "step": 56690 }, { "epoch": 1.1541984732824426, "grad_norm": 11.225347909124281, "learning_rate": 9.487539806037144e-06, "loss": 0.2553, "step": 56700 }, { "epoch": 1.1544020356234097, "grad_norm": 3.106485896678214, "learning_rate": 9.487226401376724e-06, "loss": 0.3156, "step": 56710 }, { "epoch": 1.1546055979643766, "grad_norm": 16.657782502574733, "learning_rate": 9.486912906090988e-06, "loss": 0.2469, "step": 56720 }, { "epoch": 1.1548091603053434, "grad_norm": 12.709021610405276, "learning_rate": 9.486599320186266e-06, "loss": 0.2707, "step": 56730 }, { "epoch": 1.1550127226463105, "grad_norm": 6.148471097880326, "learning_rate": 9.486285643668892e-06, "loss": 0.2776, "step": 56740 }, { "epoch": 1.1552162849872774, "grad_norm": 2.830803465916509, "learning_rate": 9.4859718765452e-06, "loss": 0.2033, "step": 56750 }, { "epoch": 1.1554198473282442, "grad_norm": 8.865357962903895, "learning_rate": 9.485658018821531e-06, "loss": 0.2313, "step": 56760 }, { "epoch": 1.1556234096692113, "grad_norm": 9.526484193982673, "learning_rate": 9.48534407050422e-06, "loss": 0.2694, "step": 56770 }, { "epoch": 1.1558269720101781, "grad_norm": 12.234913813171683, "learning_rate": 9.485030031599608e-06, "loss": 0.1936, "step": 56780 }, { "epoch": 1.156030534351145, "grad_norm": 11.347471902584724, "learning_rate": 9.484715902114036e-06, "loss": 0.2454, "step": 56790 }, { "epoch": 1.156234096692112, "grad_norm": 3.281972526069412, "learning_rate": 9.484401682053853e-06, "loss": 0.27, "step": 56800 }, { "epoch": 1.156437659033079, "grad_norm": 6.740167193079418, "learning_rate": 9.484087371425402e-06, "loss": 0.2717, "step": 56810 }, { "epoch": 1.1566412213740458, "grad_norm": 11.325525568536312, "learning_rate": 9.483772970235028e-06, "loss": 0.2709, "step": 56820 }, { "epoch": 1.1568447837150126, "grad_norm": 7.397069205077709, "learning_rate": 9.483458478489086e-06, "loss": 0.2484, "step": 56830 }, { "epoch": 1.1570483460559797, "grad_norm": 11.98704116986552, "learning_rate": 9.483143896193926e-06, "loss": 0.2067, "step": 56840 }, { "epoch": 1.1572519083969466, "grad_norm": 9.56473204555879, "learning_rate": 9.482829223355901e-06, "loss": 0.1778, "step": 56850 }, { "epoch": 1.1574554707379134, "grad_norm": 8.799710271835036, "learning_rate": 9.482514459981365e-06, "loss": 0.2155, "step": 56860 }, { "epoch": 1.1576590330788803, "grad_norm": 13.309859704381546, "learning_rate": 9.482199606076676e-06, "loss": 0.1792, "step": 56870 }, { "epoch": 1.1578625954198474, "grad_norm": 14.986797784303604, "learning_rate": 9.481884661648192e-06, "loss": 0.2929, "step": 56880 }, { "epoch": 1.1580661577608142, "grad_norm": 25.15450954479152, "learning_rate": 9.481569626702276e-06, "loss": 0.2704, "step": 56890 }, { "epoch": 1.158269720101781, "grad_norm": 4.700813897972151, "learning_rate": 9.481254501245288e-06, "loss": 0.3487, "step": 56900 }, { "epoch": 1.1584732824427482, "grad_norm": 6.954957755529558, "learning_rate": 9.480939285283594e-06, "loss": 0.1707, "step": 56910 }, { "epoch": 1.158676844783715, "grad_norm": 9.557743650034029, "learning_rate": 9.48062397882356e-06, "loss": 0.2683, "step": 56920 }, { "epoch": 1.1588804071246819, "grad_norm": 3.4695328016358182, "learning_rate": 9.480308581871553e-06, "loss": 0.2981, "step": 56930 }, { "epoch": 1.159083969465649, "grad_norm": 6.997809434629552, "learning_rate": 9.479993094433946e-06, "loss": 0.2673, "step": 56940 }, { "epoch": 1.1592875318066158, "grad_norm": 4.720553739737732, "learning_rate": 9.479677516517105e-06, "loss": 0.2351, "step": 56950 }, { "epoch": 1.1594910941475827, "grad_norm": 5.549946615917568, "learning_rate": 9.47936184812741e-06, "loss": 0.2323, "step": 56960 }, { "epoch": 1.1596946564885495, "grad_norm": 7.606842623944108, "learning_rate": 9.479046089271231e-06, "loss": 0.3299, "step": 56970 }, { "epoch": 1.1598982188295166, "grad_norm": 12.58149869337024, "learning_rate": 9.478730239954947e-06, "loss": 0.2324, "step": 56980 }, { "epoch": 1.1601017811704835, "grad_norm": 10.320215745401647, "learning_rate": 9.47841430018494e-06, "loss": 0.3443, "step": 56990 }, { "epoch": 1.1603053435114503, "grad_norm": 11.553338758704616, "learning_rate": 9.478098269967584e-06, "loss": 0.2297, "step": 57000 }, { "epoch": 1.1605089058524174, "grad_norm": 7.7706779992211645, "learning_rate": 9.47778214930927e-06, "loss": 0.1863, "step": 57010 }, { "epoch": 1.1607124681933843, "grad_norm": 5.221433437482855, "learning_rate": 9.477465938216374e-06, "loss": 0.2417, "step": 57020 }, { "epoch": 1.160916030534351, "grad_norm": 21.508103354681936, "learning_rate": 9.477149636695288e-06, "loss": 0.3506, "step": 57030 }, { "epoch": 1.161119592875318, "grad_norm": 7.517345479266777, "learning_rate": 9.476833244752399e-06, "loss": 0.3695, "step": 57040 }, { "epoch": 1.161323155216285, "grad_norm": 7.730350401478463, "learning_rate": 9.476516762394097e-06, "loss": 0.2584, "step": 57050 }, { "epoch": 1.161526717557252, "grad_norm": 13.655613557581956, "learning_rate": 9.476200189626771e-06, "loss": 0.1917, "step": 57060 }, { "epoch": 1.1617302798982188, "grad_norm": 6.947350709675171, "learning_rate": 9.47588352645682e-06, "loss": 0.2282, "step": 57070 }, { "epoch": 1.1619338422391858, "grad_norm": 14.976536180183947, "learning_rate": 9.475566772890634e-06, "loss": 0.2282, "step": 57080 }, { "epoch": 1.1621374045801527, "grad_norm": 11.118978651349186, "learning_rate": 9.475249928934614e-06, "loss": 0.2786, "step": 57090 }, { "epoch": 1.1623409669211195, "grad_norm": 12.832776673624371, "learning_rate": 9.474932994595157e-06, "loss": 0.2399, "step": 57100 }, { "epoch": 1.1625445292620866, "grad_norm": 6.3428962632191785, "learning_rate": 9.474615969878664e-06, "loss": 0.1894, "step": 57110 }, { "epoch": 1.1627480916030535, "grad_norm": 6.4448285429617505, "learning_rate": 9.47429885479154e-06, "loss": 0.255, "step": 57120 }, { "epoch": 1.1629516539440203, "grad_norm": 11.755274330789305, "learning_rate": 9.473981649340186e-06, "loss": 0.1614, "step": 57130 }, { "epoch": 1.1631552162849872, "grad_norm": 7.194934265903918, "learning_rate": 9.47366435353101e-06, "loss": 0.2644, "step": 57140 }, { "epoch": 1.1633587786259543, "grad_norm": 11.449101065621639, "learning_rate": 9.473346967370422e-06, "loss": 0.2383, "step": 57150 }, { "epoch": 1.1635623409669211, "grad_norm": 3.1407692715829048, "learning_rate": 9.473029490864829e-06, "loss": 0.2377, "step": 57160 }, { "epoch": 1.163765903307888, "grad_norm": 3.8182913288754277, "learning_rate": 9.472711924020644e-06, "loss": 0.128, "step": 57170 }, { "epoch": 1.1639694656488548, "grad_norm": 14.257933233847249, "learning_rate": 9.472394266844282e-06, "loss": 0.1747, "step": 57180 }, { "epoch": 1.164173027989822, "grad_norm": 7.13979187262362, "learning_rate": 9.472076519342158e-06, "loss": 0.2173, "step": 57190 }, { "epoch": 1.1643765903307888, "grad_norm": 19.82369336160403, "learning_rate": 9.471758681520686e-06, "loss": 0.1996, "step": 57200 }, { "epoch": 1.1645801526717556, "grad_norm": 13.325831770881893, "learning_rate": 9.471440753386289e-06, "loss": 0.2311, "step": 57210 }, { "epoch": 1.1647837150127227, "grad_norm": 17.09583936176337, "learning_rate": 9.471122734945387e-06, "loss": 0.2914, "step": 57220 }, { "epoch": 1.1649872773536896, "grad_norm": 21.51664963651275, "learning_rate": 9.470804626204402e-06, "loss": 0.2828, "step": 57230 }, { "epoch": 1.1651908396946564, "grad_norm": 7.2855117859169205, "learning_rate": 9.470486427169761e-06, "loss": 0.1743, "step": 57240 }, { "epoch": 1.1653944020356235, "grad_norm": 13.730642478623366, "learning_rate": 9.470168137847889e-06, "loss": 0.2113, "step": 57250 }, { "epoch": 1.1655979643765904, "grad_norm": 7.285613654441126, "learning_rate": 9.469849758245212e-06, "loss": 0.2412, "step": 57260 }, { "epoch": 1.1658015267175572, "grad_norm": 6.182650457790887, "learning_rate": 9.469531288368162e-06, "loss": 0.2999, "step": 57270 }, { "epoch": 1.1660050890585243, "grad_norm": 18.71962630737434, "learning_rate": 9.469212728223171e-06, "loss": 0.3928, "step": 57280 }, { "epoch": 1.1662086513994911, "grad_norm": 15.159540474860048, "learning_rate": 9.468894077816673e-06, "loss": 0.224, "step": 57290 }, { "epoch": 1.166412213740458, "grad_norm": 6.461851666283523, "learning_rate": 9.468575337155105e-06, "loss": 0.2587, "step": 57300 }, { "epoch": 1.1666157760814249, "grad_norm": 18.770043396705397, "learning_rate": 9.4682565062449e-06, "loss": 0.2091, "step": 57310 }, { "epoch": 1.166819338422392, "grad_norm": 4.7697303631497165, "learning_rate": 9.4679375850925e-06, "loss": 0.203, "step": 57320 }, { "epoch": 1.1670229007633588, "grad_norm": 8.711306988542432, "learning_rate": 9.467618573704346e-06, "loss": 0.2156, "step": 57330 }, { "epoch": 1.1672264631043257, "grad_norm": 19.54188636887956, "learning_rate": 9.467299472086882e-06, "loss": 0.2619, "step": 57340 }, { "epoch": 1.1674300254452925, "grad_norm": 6.604713547944362, "learning_rate": 9.46698028024655e-06, "loss": 0.232, "step": 57350 }, { "epoch": 1.1676335877862596, "grad_norm": 11.53931960399844, "learning_rate": 9.466660998189798e-06, "loss": 0.2335, "step": 57360 }, { "epoch": 1.1678371501272264, "grad_norm": 4.402278169885436, "learning_rate": 9.466341625923075e-06, "loss": 0.2839, "step": 57370 }, { "epoch": 1.1680407124681933, "grad_norm": 6.122594754656979, "learning_rate": 9.466022163452831e-06, "loss": 0.2199, "step": 57380 }, { "epoch": 1.1682442748091604, "grad_norm": 13.840687222807722, "learning_rate": 9.465702610785514e-06, "loss": 0.2226, "step": 57390 }, { "epoch": 1.1684478371501272, "grad_norm": 10.65012374142873, "learning_rate": 9.465382967927583e-06, "loss": 0.2558, "step": 57400 }, { "epoch": 1.168651399491094, "grad_norm": 21.978025074497754, "learning_rate": 9.465063234885491e-06, "loss": 0.1956, "step": 57410 }, { "epoch": 1.1688549618320612, "grad_norm": 14.114233800804088, "learning_rate": 9.464743411665697e-06, "loss": 0.1674, "step": 57420 }, { "epoch": 1.169058524173028, "grad_norm": 1.368122847043657, "learning_rate": 9.46442349827466e-06, "loss": 0.1991, "step": 57430 }, { "epoch": 1.1692620865139949, "grad_norm": 12.955495683361884, "learning_rate": 9.464103494718839e-06, "loss": 0.1685, "step": 57440 }, { "epoch": 1.1694656488549617, "grad_norm": 2.2082556380618947, "learning_rate": 9.463783401004699e-06, "loss": 0.2496, "step": 57450 }, { "epoch": 1.1696692111959288, "grad_norm": 84.75818865541459, "learning_rate": 9.463463217138702e-06, "loss": 0.3046, "step": 57460 }, { "epoch": 1.1698727735368957, "grad_norm": 4.559988577114799, "learning_rate": 9.463142943127318e-06, "loss": 0.1895, "step": 57470 }, { "epoch": 1.1700763358778625, "grad_norm": 8.646621750172134, "learning_rate": 9.462822578977013e-06, "loss": 0.2264, "step": 57480 }, { "epoch": 1.1702798982188296, "grad_norm": 10.708582119628888, "learning_rate": 9.462502124694258e-06, "loss": 0.2849, "step": 57490 }, { "epoch": 1.1704834605597965, "grad_norm": 5.091712409167318, "learning_rate": 9.462181580285525e-06, "loss": 0.1451, "step": 57500 }, { "epoch": 1.1706870229007633, "grad_norm": 3.4984883482927334, "learning_rate": 9.461860945757289e-06, "loss": 0.272, "step": 57510 }, { "epoch": 1.1708905852417302, "grad_norm": 22.046641471405188, "learning_rate": 9.461540221116023e-06, "loss": 0.2812, "step": 57520 }, { "epoch": 1.1710941475826973, "grad_norm": 23.875923673230567, "learning_rate": 9.461219406368207e-06, "loss": 0.3144, "step": 57530 }, { "epoch": 1.171297709923664, "grad_norm": 7.804517339589228, "learning_rate": 9.460898501520316e-06, "loss": 0.2701, "step": 57540 }, { "epoch": 1.171501272264631, "grad_norm": 7.04959080305402, "learning_rate": 9.460577506578838e-06, "loss": 0.1989, "step": 57550 }, { "epoch": 1.171704834605598, "grad_norm": 14.805347162568697, "learning_rate": 9.46025642155025e-06, "loss": 0.2523, "step": 57560 }, { "epoch": 1.171908396946565, "grad_norm": 33.91960432032657, "learning_rate": 9.459935246441039e-06, "loss": 0.2029, "step": 57570 }, { "epoch": 1.1721119592875318, "grad_norm": 4.241889946091734, "learning_rate": 9.459613981257692e-06, "loss": 0.1689, "step": 57580 }, { "epoch": 1.1723155216284988, "grad_norm": 10.039380808579248, "learning_rate": 9.459292626006697e-06, "loss": 0.172, "step": 57590 }, { "epoch": 1.1725190839694657, "grad_norm": 3.0092823103878126, "learning_rate": 9.458971180694543e-06, "loss": 0.2438, "step": 57600 }, { "epoch": 1.1727226463104325, "grad_norm": 1.8654352572652833, "learning_rate": 9.458649645327723e-06, "loss": 0.2344, "step": 57610 }, { "epoch": 1.1729262086513994, "grad_norm": 12.102368113480244, "learning_rate": 9.458328019912732e-06, "loss": 0.3159, "step": 57620 }, { "epoch": 1.1731297709923665, "grad_norm": 10.720991659748764, "learning_rate": 9.458006304456062e-06, "loss": 0.1728, "step": 57630 }, { "epoch": 1.1733333333333333, "grad_norm": 9.77968413234486, "learning_rate": 9.457684498964214e-06, "loss": 0.2363, "step": 57640 }, { "epoch": 1.1735368956743002, "grad_norm": 8.997688371880885, "learning_rate": 9.457362603443688e-06, "loss": 0.2893, "step": 57650 }, { "epoch": 1.173740458015267, "grad_norm": 28.52559191169176, "learning_rate": 9.45704061790098e-06, "loss": 0.2176, "step": 57660 }, { "epoch": 1.1739440203562341, "grad_norm": 1.4843220811355327, "learning_rate": 9.456718542342597e-06, "loss": 0.1593, "step": 57670 }, { "epoch": 1.174147582697201, "grad_norm": 6.8055897393760505, "learning_rate": 9.456396376775043e-06, "loss": 0.2236, "step": 57680 }, { "epoch": 1.1743511450381678, "grad_norm": 21.641289316654714, "learning_rate": 9.456074121204826e-06, "loss": 0.2073, "step": 57690 }, { "epoch": 1.174554707379135, "grad_norm": 23.340634676341377, "learning_rate": 9.45575177563845e-06, "loss": 0.2856, "step": 57700 }, { "epoch": 1.1747582697201018, "grad_norm": 16.494548049565413, "learning_rate": 9.45542934008243e-06, "loss": 0.2622, "step": 57710 }, { "epoch": 1.1749618320610686, "grad_norm": 8.922063518663249, "learning_rate": 9.455106814543276e-06, "loss": 0.3462, "step": 57720 }, { "epoch": 1.1751653944020357, "grad_norm": 12.57029197901962, "learning_rate": 9.4547841990275e-06, "loss": 0.2741, "step": 57730 }, { "epoch": 1.1753689567430026, "grad_norm": 5.843525181620575, "learning_rate": 9.45446149354162e-06, "loss": 0.2291, "step": 57740 }, { "epoch": 1.1755725190839694, "grad_norm": 5.894318085601612, "learning_rate": 9.454138698092153e-06, "loss": 0.2939, "step": 57750 }, { "epoch": 1.1757760814249365, "grad_norm": 21.409554903214534, "learning_rate": 9.45381581268562e-06, "loss": 0.283, "step": 57760 }, { "epoch": 1.1759796437659034, "grad_norm": 9.431313450155988, "learning_rate": 9.453492837328537e-06, "loss": 0.1674, "step": 57770 }, { "epoch": 1.1761832061068702, "grad_norm": 8.701442127339584, "learning_rate": 9.453169772027432e-06, "loss": 0.2597, "step": 57780 }, { "epoch": 1.176386768447837, "grad_norm": 7.953328317601455, "learning_rate": 9.452846616788827e-06, "loss": 0.2609, "step": 57790 }, { "epoch": 1.1765903307888042, "grad_norm": 6.444512083323708, "learning_rate": 9.45252337161925e-06, "loss": 0.2685, "step": 57800 }, { "epoch": 1.176793893129771, "grad_norm": 9.036710057639848, "learning_rate": 9.452200036525229e-06, "loss": 0.3422, "step": 57810 }, { "epoch": 1.1769974554707379, "grad_norm": 6.8389160130498485, "learning_rate": 9.451876611513293e-06, "loss": 0.2391, "step": 57820 }, { "epoch": 1.1772010178117047, "grad_norm": 5.275471328027307, "learning_rate": 9.451553096589975e-06, "loss": 0.2436, "step": 57830 }, { "epoch": 1.1774045801526718, "grad_norm": 4.036424560578364, "learning_rate": 9.45122949176181e-06, "loss": 0.2348, "step": 57840 }, { "epoch": 1.1776081424936387, "grad_norm": 15.059368442971481, "learning_rate": 9.45090579703533e-06, "loss": 0.2098, "step": 57850 }, { "epoch": 1.1778117048346055, "grad_norm": 13.733602206751227, "learning_rate": 9.450582012417076e-06, "loss": 0.2334, "step": 57860 }, { "epoch": 1.1780152671755726, "grad_norm": 11.024120303152841, "learning_rate": 9.450258137913586e-06, "loss": 0.2195, "step": 57870 }, { "epoch": 1.1782188295165394, "grad_norm": 15.078206215683945, "learning_rate": 9.4499341735314e-06, "loss": 0.2981, "step": 57880 }, { "epoch": 1.1784223918575063, "grad_norm": 10.39187108607139, "learning_rate": 9.449610119277063e-06, "loss": 0.3007, "step": 57890 }, { "epoch": 1.1786259541984734, "grad_norm": 7.048083146612702, "learning_rate": 9.44928597515712e-06, "loss": 0.1879, "step": 57900 }, { "epoch": 1.1788295165394402, "grad_norm": 7.054383344096861, "learning_rate": 9.448961741178112e-06, "loss": 0.2376, "step": 57910 }, { "epoch": 1.179033078880407, "grad_norm": 17.327430474526544, "learning_rate": 9.448637417346593e-06, "loss": 0.2613, "step": 57920 }, { "epoch": 1.179236641221374, "grad_norm": 15.699503908402583, "learning_rate": 9.448313003669114e-06, "loss": 0.2813, "step": 57930 }, { "epoch": 1.179440203562341, "grad_norm": 16.031642281072934, "learning_rate": 9.447988500152222e-06, "loss": 0.3515, "step": 57940 }, { "epoch": 1.1796437659033079, "grad_norm": 11.481000514624354, "learning_rate": 9.447663906802475e-06, "loss": 0.2367, "step": 57950 }, { "epoch": 1.1798473282442747, "grad_norm": 10.287337412922708, "learning_rate": 9.447339223626426e-06, "loss": 0.2864, "step": 57960 }, { "epoch": 1.1800508905852418, "grad_norm": 11.807112241317403, "learning_rate": 9.447014450630631e-06, "loss": 0.2308, "step": 57970 }, { "epoch": 1.1802544529262087, "grad_norm": 4.777842440248886, "learning_rate": 9.446689587821655e-06, "loss": 0.3083, "step": 57980 }, { "epoch": 1.1804580152671755, "grad_norm": 3.9581261660367213, "learning_rate": 9.446364635206053e-06, "loss": 0.4117, "step": 57990 }, { "epoch": 1.1806615776081424, "grad_norm": 7.954735637255657, "learning_rate": 9.446039592790393e-06, "loss": 0.2414, "step": 58000 }, { "epoch": 1.1808651399491095, "grad_norm": 9.421801882622912, "learning_rate": 9.445714460581234e-06, "loss": 0.2427, "step": 58010 }, { "epoch": 1.1810687022900763, "grad_norm": 3.734080699221245, "learning_rate": 9.445389238585147e-06, "loss": 0.2638, "step": 58020 }, { "epoch": 1.1812722646310432, "grad_norm": 8.739234080376951, "learning_rate": 9.4450639268087e-06, "loss": 0.1918, "step": 58030 }, { "epoch": 1.1814758269720103, "grad_norm": 1.8568983980667064, "learning_rate": 9.44473852525846e-06, "loss": 0.2121, "step": 58040 }, { "epoch": 1.1816793893129771, "grad_norm": 9.580302924398774, "learning_rate": 9.444413033941e-06, "loss": 0.2048, "step": 58050 }, { "epoch": 1.181882951653944, "grad_norm": 14.63770360373165, "learning_rate": 9.444087452862895e-06, "loss": 0.2618, "step": 58060 }, { "epoch": 1.182086513994911, "grad_norm": 8.424893922416386, "learning_rate": 9.443761782030722e-06, "loss": 0.2856, "step": 58070 }, { "epoch": 1.182290076335878, "grad_norm": 1.8460786381510979, "learning_rate": 9.443436021451054e-06, "loss": 0.2315, "step": 58080 }, { "epoch": 1.1824936386768448, "grad_norm": 8.338013481401871, "learning_rate": 9.443110171130474e-06, "loss": 0.2128, "step": 58090 }, { "epoch": 1.1826972010178116, "grad_norm": 31.064712359053516, "learning_rate": 9.44278423107556e-06, "loss": 0.301, "step": 58100 }, { "epoch": 1.1829007633587787, "grad_norm": 5.494253243379312, "learning_rate": 9.442458201292897e-06, "loss": 0.3331, "step": 58110 }, { "epoch": 1.1831043256997456, "grad_norm": 6.1417081728769904, "learning_rate": 9.442132081789068e-06, "loss": 0.2367, "step": 58120 }, { "epoch": 1.1833078880407124, "grad_norm": 2.1667405723954305, "learning_rate": 9.44180587257066e-06, "loss": 0.2121, "step": 58130 }, { "epoch": 1.1835114503816795, "grad_norm": 4.595137672813904, "learning_rate": 9.441479573644263e-06, "loss": 0.2625, "step": 58140 }, { "epoch": 1.1837150127226463, "grad_norm": 6.731354776248258, "learning_rate": 9.441153185016465e-06, "loss": 0.375, "step": 58150 }, { "epoch": 1.1839185750636132, "grad_norm": 19.781493400846248, "learning_rate": 9.440826706693858e-06, "loss": 0.2201, "step": 58160 }, { "epoch": 1.18412213740458, "grad_norm": 9.466008667673075, "learning_rate": 9.440500138683035e-06, "loss": 0.2259, "step": 58170 }, { "epoch": 1.1843256997455471, "grad_norm": 15.127719517582035, "learning_rate": 9.440173480990593e-06, "loss": 0.2712, "step": 58180 }, { "epoch": 1.184529262086514, "grad_norm": 9.249155686154449, "learning_rate": 9.439846733623128e-06, "loss": 0.2859, "step": 58190 }, { "epoch": 1.1847328244274808, "grad_norm": 10.485292941318, "learning_rate": 9.439519896587241e-06, "loss": 0.1783, "step": 58200 }, { "epoch": 1.184936386768448, "grad_norm": 49.738585754771556, "learning_rate": 9.43919296988953e-06, "loss": 0.2466, "step": 58210 }, { "epoch": 1.1851399491094148, "grad_norm": 17.529913173854464, "learning_rate": 9.438865953536603e-06, "loss": 0.2062, "step": 58220 }, { "epoch": 1.1853435114503816, "grad_norm": 9.738041371832026, "learning_rate": 9.438538847535057e-06, "loss": 0.2631, "step": 58230 }, { "epoch": 1.1855470737913487, "grad_norm": 17.44200156130057, "learning_rate": 9.438211651891505e-06, "loss": 0.2403, "step": 58240 }, { "epoch": 1.1857506361323156, "grad_norm": 16.178510185914426, "learning_rate": 9.437884366612549e-06, "loss": 0.1973, "step": 58250 }, { "epoch": 1.1859541984732824, "grad_norm": 21.207592375794995, "learning_rate": 9.437556991704805e-06, "loss": 0.34, "step": 58260 }, { "epoch": 1.1861577608142493, "grad_norm": 9.998674819445537, "learning_rate": 9.437229527174881e-06, "loss": 0.1467, "step": 58270 }, { "epoch": 1.1863613231552164, "grad_norm": 18.116102569819603, "learning_rate": 9.436901973029392e-06, "loss": 0.3089, "step": 58280 }, { "epoch": 1.1865648854961832, "grad_norm": 15.563964758986765, "learning_rate": 9.436574329274952e-06, "loss": 0.236, "step": 58290 }, { "epoch": 1.18676844783715, "grad_norm": 6.506262555252575, "learning_rate": 9.436246595918179e-06, "loss": 0.2377, "step": 58300 }, { "epoch": 1.186972010178117, "grad_norm": 9.658191390720576, "learning_rate": 9.435918772965693e-06, "loss": 0.2135, "step": 58310 }, { "epoch": 1.187175572519084, "grad_norm": 12.434385534228936, "learning_rate": 9.435590860424116e-06, "loss": 0.2605, "step": 58320 }, { "epoch": 1.1873791348600509, "grad_norm": 8.87386217580911, "learning_rate": 9.435262858300065e-06, "loss": 0.2601, "step": 58330 }, { "epoch": 1.1875826972010177, "grad_norm": 6.657190219076648, "learning_rate": 9.43493476660017e-06, "loss": 0.1929, "step": 58340 }, { "epoch": 1.1877862595419848, "grad_norm": 23.80224783979063, "learning_rate": 9.434606585331053e-06, "loss": 0.1311, "step": 58350 }, { "epoch": 1.1879898218829517, "grad_norm": 11.14421133424908, "learning_rate": 9.434278314499347e-06, "loss": 0.2888, "step": 58360 }, { "epoch": 1.1881933842239185, "grad_norm": 11.153413998619273, "learning_rate": 9.433949954111678e-06, "loss": 0.235, "step": 58370 }, { "epoch": 1.1883969465648856, "grad_norm": 3.526921248273147, "learning_rate": 9.43362150417468e-06, "loss": 0.2453, "step": 58380 }, { "epoch": 1.1886005089058524, "grad_norm": 11.010185773594278, "learning_rate": 9.433292964694985e-06, "loss": 0.2318, "step": 58390 }, { "epoch": 1.1888040712468193, "grad_norm": 2.8060061952290205, "learning_rate": 9.432964335679227e-06, "loss": 0.1835, "step": 58400 }, { "epoch": 1.1890076335877864, "grad_norm": 13.995050731624064, "learning_rate": 9.432635617134046e-06, "loss": 0.2425, "step": 58410 }, { "epoch": 1.1892111959287532, "grad_norm": 8.94639193386623, "learning_rate": 9.432306809066077e-06, "loss": 0.2046, "step": 58420 }, { "epoch": 1.18941475826972, "grad_norm": 4.144189377322178, "learning_rate": 9.431977911481967e-06, "loss": 0.3172, "step": 58430 }, { "epoch": 1.189618320610687, "grad_norm": 2.8607847689735917, "learning_rate": 9.43164892438835e-06, "loss": 0.2253, "step": 58440 }, { "epoch": 1.189821882951654, "grad_norm": 11.817789508526845, "learning_rate": 9.43131984779188e-06, "loss": 0.2302, "step": 58450 }, { "epoch": 1.1900254452926209, "grad_norm": 8.18280436179556, "learning_rate": 9.430990681699195e-06, "loss": 0.1623, "step": 58460 }, { "epoch": 1.1902290076335877, "grad_norm": 6.331880506829081, "learning_rate": 9.430661426116947e-06, "loss": 0.2204, "step": 58470 }, { "epoch": 1.1904325699745546, "grad_norm": 2.345366882538317, "learning_rate": 9.430332081051786e-06, "loss": 0.2352, "step": 58480 }, { "epoch": 1.1906361323155217, "grad_norm": 4.439799459247679, "learning_rate": 9.430002646510362e-06, "loss": 0.271, "step": 58490 }, { "epoch": 1.1908396946564885, "grad_norm": 15.38079472792238, "learning_rate": 9.429673122499328e-06, "loss": 0.3024, "step": 58500 }, { "epoch": 1.1910432569974554, "grad_norm": 11.364519196941236, "learning_rate": 9.42934350902534e-06, "loss": 0.267, "step": 58510 }, { "epoch": 1.1912468193384225, "grad_norm": 3.267352833371084, "learning_rate": 9.429013806095055e-06, "loss": 0.2695, "step": 58520 }, { "epoch": 1.1914503816793893, "grad_norm": 4.902226875499729, "learning_rate": 9.42868401371513e-06, "loss": 0.2083, "step": 58530 }, { "epoch": 1.1916539440203562, "grad_norm": 16.707336107881638, "learning_rate": 9.42835413189223e-06, "loss": 0.3986, "step": 58540 }, { "epoch": 1.1918575063613233, "grad_norm": 4.36207167755596, "learning_rate": 9.428024160633013e-06, "loss": 0.2883, "step": 58550 }, { "epoch": 1.1920610687022901, "grad_norm": 23.562018264676983, "learning_rate": 9.427694099944145e-06, "loss": 0.2366, "step": 58560 }, { "epoch": 1.192264631043257, "grad_norm": 16.101230032448534, "learning_rate": 9.427363949832294e-06, "loss": 0.3123, "step": 58570 }, { "epoch": 1.1924681933842238, "grad_norm": 7.45606227576175, "learning_rate": 9.427033710304123e-06, "loss": 0.2318, "step": 58580 }, { "epoch": 1.192671755725191, "grad_norm": 4.163037589290903, "learning_rate": 9.426703381366304e-06, "loss": 0.2445, "step": 58590 }, { "epoch": 1.1928753180661578, "grad_norm": 8.20664538267959, "learning_rate": 9.42637296302551e-06, "loss": 0.2769, "step": 58600 }, { "epoch": 1.1930788804071246, "grad_norm": 14.027002619013688, "learning_rate": 9.426042455288413e-06, "loss": 0.2137, "step": 58610 }, { "epoch": 1.1932824427480917, "grad_norm": 13.99346268044922, "learning_rate": 9.425711858161687e-06, "loss": 0.2611, "step": 58620 }, { "epoch": 1.1934860050890586, "grad_norm": 16.170347177228194, "learning_rate": 9.42538117165201e-06, "loss": 0.2733, "step": 58630 }, { "epoch": 1.1936895674300254, "grad_norm": 5.990064657079665, "learning_rate": 9.42505039576606e-06, "loss": 0.2151, "step": 58640 }, { "epoch": 1.1938931297709923, "grad_norm": 15.333301396390459, "learning_rate": 9.42471953051052e-06, "loss": 0.2426, "step": 58650 }, { "epoch": 1.1940966921119593, "grad_norm": 10.888463971780393, "learning_rate": 9.424388575892068e-06, "loss": 0.2087, "step": 58660 }, { "epoch": 1.1943002544529262, "grad_norm": 0.01385784144058378, "learning_rate": 9.42405753191739e-06, "loss": 0.238, "step": 58670 }, { "epoch": 1.194503816793893, "grad_norm": 22.78606765508695, "learning_rate": 9.423726398593173e-06, "loss": 0.2912, "step": 58680 }, { "epoch": 1.1947073791348601, "grad_norm": 10.019163931429311, "learning_rate": 9.423395175926104e-06, "loss": 0.2634, "step": 58690 }, { "epoch": 1.194910941475827, "grad_norm": 11.391169343905293, "learning_rate": 9.42306386392287e-06, "loss": 0.2344, "step": 58700 }, { "epoch": 1.1951145038167938, "grad_norm": 6.342988033946462, "learning_rate": 9.422732462590167e-06, "loss": 0.2781, "step": 58710 }, { "epoch": 1.195318066157761, "grad_norm": 11.519838449496518, "learning_rate": 9.422400971934684e-06, "loss": 0.2397, "step": 58720 }, { "epoch": 1.1955216284987278, "grad_norm": 4.7545484708622245, "learning_rate": 9.422069391963118e-06, "loss": 0.255, "step": 58730 }, { "epoch": 1.1957251908396946, "grad_norm": 15.850510343222435, "learning_rate": 9.421737722682164e-06, "loss": 0.2565, "step": 58740 }, { "epoch": 1.1959287531806615, "grad_norm": 11.182550656062233, "learning_rate": 9.421405964098523e-06, "loss": 0.2454, "step": 58750 }, { "epoch": 1.1961323155216286, "grad_norm": 8.23428499866582, "learning_rate": 9.421074116218893e-06, "loss": 0.2513, "step": 58760 }, { "epoch": 1.1963358778625954, "grad_norm": 6.7678071177620165, "learning_rate": 9.420742179049978e-06, "loss": 0.25, "step": 58770 }, { "epoch": 1.1965394402035623, "grad_norm": 9.55889343785665, "learning_rate": 9.420410152598479e-06, "loss": 0.1264, "step": 58780 }, { "epoch": 1.1967430025445291, "grad_norm": 7.930097400102812, "learning_rate": 9.420078036871105e-06, "loss": 0.2479, "step": 58790 }, { "epoch": 1.1969465648854962, "grad_norm": 18.426520096710675, "learning_rate": 9.419745831874561e-06, "loss": 0.2314, "step": 58800 }, { "epoch": 1.197150127226463, "grad_norm": 9.802976344585078, "learning_rate": 9.419413537615558e-06, "loss": 0.2367, "step": 58810 }, { "epoch": 1.19735368956743, "grad_norm": 19.681400283277636, "learning_rate": 9.419081154100806e-06, "loss": 0.3625, "step": 58820 }, { "epoch": 1.197557251908397, "grad_norm": 6.754714926682456, "learning_rate": 9.418748681337017e-06, "loss": 0.2185, "step": 58830 }, { "epoch": 1.1977608142493639, "grad_norm": 11.479265988284638, "learning_rate": 9.418416119330911e-06, "loss": 0.21, "step": 58840 }, { "epoch": 1.1979643765903307, "grad_norm": 7.24097818081975, "learning_rate": 9.418083468089198e-06, "loss": 0.2148, "step": 58850 }, { "epoch": 1.1981679389312978, "grad_norm": 0.22098406212329838, "learning_rate": 9.417750727618598e-06, "loss": 0.2352, "step": 58860 }, { "epoch": 1.1983715012722647, "grad_norm": 15.532350187505083, "learning_rate": 9.417417897925833e-06, "loss": 0.235, "step": 58870 }, { "epoch": 1.1985750636132315, "grad_norm": 10.215220595252312, "learning_rate": 9.417084979017626e-06, "loss": 0.2108, "step": 58880 }, { "epoch": 1.1987786259541986, "grad_norm": 12.585468200553626, "learning_rate": 9.416751970900694e-06, "loss": 0.217, "step": 58890 }, { "epoch": 1.1989821882951655, "grad_norm": 8.287154327777063, "learning_rate": 9.416418873581771e-06, "loss": 0.1971, "step": 58900 }, { "epoch": 1.1991857506361323, "grad_norm": 7.118204214684253, "learning_rate": 9.41608568706758e-06, "loss": 0.2289, "step": 58910 }, { "epoch": 1.1993893129770992, "grad_norm": 7.268198403046029, "learning_rate": 9.41575241136485e-06, "loss": 0.2557, "step": 58920 }, { "epoch": 1.1995928753180662, "grad_norm": 9.447689647488506, "learning_rate": 9.415419046480313e-06, "loss": 0.2474, "step": 58930 }, { "epoch": 1.199796437659033, "grad_norm": 12.337230686236145, "learning_rate": 9.415085592420701e-06, "loss": 0.2549, "step": 58940 }, { "epoch": 1.2, "grad_norm": 9.550778684260505, "learning_rate": 9.41475204919275e-06, "loss": 0.2508, "step": 58950 }, { "epoch": 1.2002035623409668, "grad_norm": 13.284164007584124, "learning_rate": 9.414418416803194e-06, "loss": 0.1268, "step": 58960 }, { "epoch": 1.200407124681934, "grad_norm": 5.079097806986843, "learning_rate": 9.414084695258771e-06, "loss": 0.1852, "step": 58970 }, { "epoch": 1.2006106870229007, "grad_norm": 19.512814558126557, "learning_rate": 9.413750884566227e-06, "loss": 0.3202, "step": 58980 }, { "epoch": 1.2008142493638676, "grad_norm": 6.423729820956929, "learning_rate": 9.413416984732294e-06, "loss": 0.245, "step": 58990 }, { "epoch": 1.2010178117048347, "grad_norm": 4.517056192557023, "learning_rate": 9.413082995763722e-06, "loss": 0.2206, "step": 59000 }, { "epoch": 1.2012213740458015, "grad_norm": 28.713672004194546, "learning_rate": 9.412748917667255e-06, "loss": 0.3714, "step": 59010 }, { "epoch": 1.2014249363867684, "grad_norm": 1.85136165176825, "learning_rate": 9.412414750449641e-06, "loss": 0.1476, "step": 59020 }, { "epoch": 1.2016284987277355, "grad_norm": 5.329770127824993, "learning_rate": 9.412080494117628e-06, "loss": 0.2049, "step": 59030 }, { "epoch": 1.2018320610687023, "grad_norm": 4.363404076268881, "learning_rate": 9.411746148677966e-06, "loss": 0.1223, "step": 59040 }, { "epoch": 1.2020356234096692, "grad_norm": 22.24808792057377, "learning_rate": 9.41141171413741e-06, "loss": 0.2239, "step": 59050 }, { "epoch": 1.202239185750636, "grad_norm": 23.556400523791638, "learning_rate": 9.411077190502708e-06, "loss": 0.2068, "step": 59060 }, { "epoch": 1.2024427480916031, "grad_norm": 22.686387318273248, "learning_rate": 9.410742577780626e-06, "loss": 0.209, "step": 59070 }, { "epoch": 1.20264631043257, "grad_norm": 11.914740046429358, "learning_rate": 9.410407875977914e-06, "loss": 0.2992, "step": 59080 }, { "epoch": 1.2028498727735368, "grad_norm": 2.559964858654871, "learning_rate": 9.410073085101335e-06, "loss": 0.1613, "step": 59090 }, { "epoch": 1.203053435114504, "grad_norm": 3.9191077727085695, "learning_rate": 9.409738205157648e-06, "loss": 0.241, "step": 59100 }, { "epoch": 1.2032569974554708, "grad_norm": 12.497056658833603, "learning_rate": 9.40940323615362e-06, "loss": 0.2288, "step": 59110 }, { "epoch": 1.2034605597964376, "grad_norm": 4.6120290984381995, "learning_rate": 9.409068178096014e-06, "loss": 0.3106, "step": 59120 }, { "epoch": 1.2036641221374045, "grad_norm": 11.701721303834105, "learning_rate": 9.408733030991598e-06, "loss": 0.2769, "step": 59130 }, { "epoch": 1.2038676844783716, "grad_norm": 5.862032257158718, "learning_rate": 9.408397794847139e-06, "loss": 0.1981, "step": 59140 }, { "epoch": 1.2040712468193384, "grad_norm": 12.240280543234991, "learning_rate": 9.40806246966941e-06, "loss": 0.2197, "step": 59150 }, { "epoch": 1.2042748091603053, "grad_norm": 5.353863811339918, "learning_rate": 9.40772705546518e-06, "loss": 0.1516, "step": 59160 }, { "epoch": 1.2044783715012723, "grad_norm": 9.39664911666235, "learning_rate": 9.407391552241225e-06, "loss": 0.1896, "step": 59170 }, { "epoch": 1.2046819338422392, "grad_norm": 8.162775619949405, "learning_rate": 9.407055960004324e-06, "loss": 0.2897, "step": 59180 }, { "epoch": 1.204885496183206, "grad_norm": 9.98125698184719, "learning_rate": 9.406720278761249e-06, "loss": 0.1523, "step": 59190 }, { "epoch": 1.2050890585241731, "grad_norm": 4.579294051984003, "learning_rate": 9.406384508518783e-06, "loss": 0.1788, "step": 59200 }, { "epoch": 1.20529262086514, "grad_norm": 9.276565360343781, "learning_rate": 9.406048649283706e-06, "loss": 0.2032, "step": 59210 }, { "epoch": 1.2054961832061069, "grad_norm": 12.942904581554432, "learning_rate": 9.4057127010628e-06, "loss": 0.2297, "step": 59220 }, { "epoch": 1.2056997455470737, "grad_norm": 14.329867120241905, "learning_rate": 9.405376663862854e-06, "loss": 0.3613, "step": 59230 }, { "epoch": 1.2059033078880408, "grad_norm": 6.851310415709038, "learning_rate": 9.405040537690652e-06, "loss": 0.2066, "step": 59240 }, { "epoch": 1.2061068702290076, "grad_norm": 3.3728426271819614, "learning_rate": 9.404704322552981e-06, "loss": 0.1982, "step": 59250 }, { "epoch": 1.2063104325699745, "grad_norm": 7.826096298044962, "learning_rate": 9.404368018456636e-06, "loss": 0.3338, "step": 59260 }, { "epoch": 1.2065139949109414, "grad_norm": 9.632436569276793, "learning_rate": 9.404031625408403e-06, "loss": 0.3212, "step": 59270 }, { "epoch": 1.2067175572519084, "grad_norm": 5.015953926255785, "learning_rate": 9.403695143415081e-06, "loss": 0.3394, "step": 59280 }, { "epoch": 1.2069211195928753, "grad_norm": 4.011646005514038, "learning_rate": 9.403358572483462e-06, "loss": 0.1713, "step": 59290 }, { "epoch": 1.2071246819338421, "grad_norm": 11.495917309347641, "learning_rate": 9.403021912620345e-06, "loss": 0.2407, "step": 59300 }, { "epoch": 1.2073282442748092, "grad_norm": 11.78028547716831, "learning_rate": 9.402685163832531e-06, "loss": 0.264, "step": 59310 }, { "epoch": 1.207531806615776, "grad_norm": 5.87266446266497, "learning_rate": 9.402348326126819e-06, "loss": 0.2166, "step": 59320 }, { "epoch": 1.207735368956743, "grad_norm": 8.099369881646435, "learning_rate": 9.402011399510012e-06, "loss": 0.2505, "step": 59330 }, { "epoch": 1.20793893129771, "grad_norm": 3.222947556261295, "learning_rate": 9.401674383988916e-06, "loss": 0.216, "step": 59340 }, { "epoch": 1.2081424936386769, "grad_norm": 11.939018355693824, "learning_rate": 9.401337279570335e-06, "loss": 0.3422, "step": 59350 }, { "epoch": 1.2083460559796437, "grad_norm": 12.342591798942996, "learning_rate": 9.40100008626108e-06, "loss": 0.2613, "step": 59360 }, { "epoch": 1.2085496183206108, "grad_norm": 4.858371986695139, "learning_rate": 9.400662804067959e-06, "loss": 0.1685, "step": 59370 }, { "epoch": 1.2087531806615777, "grad_norm": 14.036493293074649, "learning_rate": 9.400325432997785e-06, "loss": 0.3054, "step": 59380 }, { "epoch": 1.2089567430025445, "grad_norm": 6.47569636710634, "learning_rate": 9.399987973057371e-06, "loss": 0.2338, "step": 59390 }, { "epoch": 1.2091603053435114, "grad_norm": 1.9670976609055382, "learning_rate": 9.399650424253534e-06, "loss": 0.2655, "step": 59400 }, { "epoch": 1.2093638676844785, "grad_norm": 15.371139693060247, "learning_rate": 9.399312786593089e-06, "loss": 0.2347, "step": 59410 }, { "epoch": 1.2095674300254453, "grad_norm": 10.486271261513435, "learning_rate": 9.398975060082855e-06, "loss": 0.3336, "step": 59420 }, { "epoch": 1.2097709923664122, "grad_norm": 2.361706322499908, "learning_rate": 9.398637244729656e-06, "loss": 0.2231, "step": 59430 }, { "epoch": 1.209974554707379, "grad_norm": 10.132607586075295, "learning_rate": 9.398299340540311e-06, "loss": 0.2706, "step": 59440 }, { "epoch": 1.210178117048346, "grad_norm": 3.393960697788672, "learning_rate": 9.397961347521647e-06, "loss": 0.196, "step": 59450 }, { "epoch": 1.210381679389313, "grad_norm": 6.127907061707099, "learning_rate": 9.397623265680487e-06, "loss": 0.2925, "step": 59460 }, { "epoch": 1.2105852417302798, "grad_norm": 14.403586375757271, "learning_rate": 9.397285095023662e-06, "loss": 0.3024, "step": 59470 }, { "epoch": 1.210788804071247, "grad_norm": 3.6081732939396076, "learning_rate": 9.396946835558001e-06, "loss": 0.2515, "step": 59480 }, { "epoch": 1.2109923664122137, "grad_norm": 7.3035641532505755, "learning_rate": 9.396608487290336e-06, "loss": 0.1674, "step": 59490 }, { "epoch": 1.2111959287531806, "grad_norm": 10.964739588083797, "learning_rate": 9.3962700502275e-06, "loss": 0.2275, "step": 59500 }, { "epoch": 1.2113994910941477, "grad_norm": 5.806669066171423, "learning_rate": 9.395931524376328e-06, "loss": 0.2745, "step": 59510 }, { "epoch": 1.2116030534351145, "grad_norm": 9.723318106590408, "learning_rate": 9.395592909743657e-06, "loss": 0.2341, "step": 59520 }, { "epoch": 1.2118066157760814, "grad_norm": 2.23774366492488, "learning_rate": 9.395254206336324e-06, "loss": 0.2137, "step": 59530 }, { "epoch": 1.2120101781170483, "grad_norm": 8.160435124723117, "learning_rate": 9.394915414161172e-06, "loss": 0.2891, "step": 59540 }, { "epoch": 1.2122137404580153, "grad_norm": 3.8938472914945783, "learning_rate": 9.394576533225042e-06, "loss": 0.1649, "step": 59550 }, { "epoch": 1.2124173027989822, "grad_norm": 12.553389996072834, "learning_rate": 9.39423756353478e-06, "loss": 0.2686, "step": 59560 }, { "epoch": 1.212620865139949, "grad_norm": 13.265468218265626, "learning_rate": 9.39389850509723e-06, "loss": 0.3099, "step": 59570 }, { "epoch": 1.2128244274809161, "grad_norm": 7.097165642625932, "learning_rate": 9.39355935791924e-06, "loss": 0.1904, "step": 59580 }, { "epoch": 1.213027989821883, "grad_norm": 12.146835601654042, "learning_rate": 9.39322012200766e-06, "loss": 0.2896, "step": 59590 }, { "epoch": 1.2132315521628498, "grad_norm": 6.413125575850769, "learning_rate": 9.392880797369344e-06, "loss": 0.3033, "step": 59600 }, { "epoch": 1.2134351145038167, "grad_norm": 3.4559031387270194, "learning_rate": 9.392541384011139e-06, "loss": 0.1543, "step": 59610 }, { "epoch": 1.2136386768447838, "grad_norm": 15.711233182554798, "learning_rate": 9.392201881939904e-06, "loss": 0.2403, "step": 59620 }, { "epoch": 1.2138422391857506, "grad_norm": 5.707649930818067, "learning_rate": 9.391862291162495e-06, "loss": 0.3487, "step": 59630 }, { "epoch": 1.2140458015267175, "grad_norm": 12.184059842306972, "learning_rate": 9.391522611685771e-06, "loss": 0.2805, "step": 59640 }, { "epoch": 1.2142493638676846, "grad_norm": 4.183559346672249, "learning_rate": 9.391182843516592e-06, "loss": 0.2832, "step": 59650 }, { "epoch": 1.2144529262086514, "grad_norm": 17.672102899453552, "learning_rate": 9.390842986661819e-06, "loss": 0.2766, "step": 59660 }, { "epoch": 1.2146564885496183, "grad_norm": 5.505794306553428, "learning_rate": 9.390503041128316e-06, "loss": 0.2362, "step": 59670 }, { "epoch": 1.2148600508905854, "grad_norm": 8.15516181010842, "learning_rate": 9.390163006922948e-06, "loss": 0.2064, "step": 59680 }, { "epoch": 1.2150636132315522, "grad_norm": 5.838711812743147, "learning_rate": 9.389822884052586e-06, "loss": 0.243, "step": 59690 }, { "epoch": 1.215267175572519, "grad_norm": 9.049243456726062, "learning_rate": 9.389482672524097e-06, "loss": 0.2262, "step": 59700 }, { "epoch": 1.215470737913486, "grad_norm": 16.347721853710077, "learning_rate": 9.389142372344348e-06, "loss": 0.2136, "step": 59710 }, { "epoch": 1.215674300254453, "grad_norm": 13.212465602105665, "learning_rate": 9.38880198352022e-06, "loss": 0.2678, "step": 59720 }, { "epoch": 1.2158778625954199, "grad_norm": 3.692652888940894, "learning_rate": 9.38846150605858e-06, "loss": 0.1994, "step": 59730 }, { "epoch": 1.2160814249363867, "grad_norm": 14.142741466721, "learning_rate": 9.38812093996631e-06, "loss": 0.2296, "step": 59740 }, { "epoch": 1.2162849872773536, "grad_norm": 4.651711510826382, "learning_rate": 9.387780285250285e-06, "loss": 0.1401, "step": 59750 }, { "epoch": 1.2164885496183206, "grad_norm": 2.520206211130326, "learning_rate": 9.387439541917386e-06, "loss": 0.1713, "step": 59760 }, { "epoch": 1.2166921119592875, "grad_norm": 16.81479500057799, "learning_rate": 9.387098709974493e-06, "loss": 0.2714, "step": 59770 }, { "epoch": 1.2168956743002544, "grad_norm": 20.280235095360034, "learning_rate": 9.386757789428493e-06, "loss": 0.183, "step": 59780 }, { "epoch": 1.2170992366412214, "grad_norm": 4.253498991855765, "learning_rate": 9.386416780286267e-06, "loss": 0.3024, "step": 59790 }, { "epoch": 1.2173027989821883, "grad_norm": 8.573980547049509, "learning_rate": 9.386075682554707e-06, "loss": 0.2539, "step": 59800 }, { "epoch": 1.2175063613231552, "grad_norm": 11.332528565551693, "learning_rate": 9.3857344962407e-06, "loss": 0.2928, "step": 59810 }, { "epoch": 1.2177099236641222, "grad_norm": 10.384736118253723, "learning_rate": 9.385393221351133e-06, "loss": 0.2027, "step": 59820 }, { "epoch": 1.217913486005089, "grad_norm": 18.51576091787566, "learning_rate": 9.385051857892903e-06, "loss": 0.3157, "step": 59830 }, { "epoch": 1.218117048346056, "grad_norm": 10.99040758230818, "learning_rate": 9.384710405872902e-06, "loss": 0.2162, "step": 59840 }, { "epoch": 1.218320610687023, "grad_norm": 2.4762194056986764, "learning_rate": 9.384368865298029e-06, "loss": 0.2766, "step": 59850 }, { "epoch": 1.2185241730279899, "grad_norm": 8.216728571614313, "learning_rate": 9.384027236175177e-06, "loss": 0.2559, "step": 59860 }, { "epoch": 1.2187277353689567, "grad_norm": 4.227773695164134, "learning_rate": 9.38368551851125e-06, "loss": 0.2257, "step": 59870 }, { "epoch": 1.2189312977099236, "grad_norm": 4.761843102346847, "learning_rate": 9.383343712313148e-06, "loss": 0.318, "step": 59880 }, { "epoch": 1.2191348600508907, "grad_norm": 0.6371654174147284, "learning_rate": 9.383001817587774e-06, "loss": 0.3128, "step": 59890 }, { "epoch": 1.2193384223918575, "grad_norm": 15.55668144810034, "learning_rate": 9.38265983434203e-06, "loss": 0.2777, "step": 59900 }, { "epoch": 1.2195419847328244, "grad_norm": 9.353682428590956, "learning_rate": 9.38231776258283e-06, "loss": 0.1996, "step": 59910 }, { "epoch": 1.2197455470737912, "grad_norm": 54.169687437148184, "learning_rate": 9.381975602317078e-06, "loss": 0.1919, "step": 59920 }, { "epoch": 1.2199491094147583, "grad_norm": 18.012921335050212, "learning_rate": 9.381633353551683e-06, "loss": 0.2922, "step": 59930 }, { "epoch": 1.2201526717557252, "grad_norm": 5.629981159853912, "learning_rate": 9.38129101629356e-06, "loss": 0.3065, "step": 59940 }, { "epoch": 1.220356234096692, "grad_norm": 7.544935266462612, "learning_rate": 9.38094859054962e-06, "loss": 0.2843, "step": 59950 }, { "epoch": 1.220559796437659, "grad_norm": 9.407889830080594, "learning_rate": 9.380606076326782e-06, "loss": 0.2503, "step": 59960 }, { "epoch": 1.220763358778626, "grad_norm": 9.444994473982183, "learning_rate": 9.380263473631962e-06, "loss": 0.2241, "step": 59970 }, { "epoch": 1.2209669211195928, "grad_norm": 8.483614584982277, "learning_rate": 9.37992078247208e-06, "loss": 0.2032, "step": 59980 }, { "epoch": 1.22117048346056, "grad_norm": 4.97716339492225, "learning_rate": 9.379578002854054e-06, "loss": 0.1687, "step": 59990 }, { "epoch": 1.2213740458015268, "grad_norm": 4.2202433518949825, "learning_rate": 9.379235134784811e-06, "loss": 0.2677, "step": 60000 }, { "epoch": 1.2215776081424936, "grad_norm": 10.426141266336854, "learning_rate": 9.378892178271276e-06, "loss": 0.2525, "step": 60010 }, { "epoch": 1.2217811704834605, "grad_norm": 6.310939144436112, "learning_rate": 9.378549133320371e-06, "loss": 0.2428, "step": 60020 }, { "epoch": 1.2219847328244275, "grad_norm": 7.736515242111841, "learning_rate": 9.378205999939028e-06, "loss": 0.2505, "step": 60030 }, { "epoch": 1.2221882951653944, "grad_norm": 3.9355066032203774, "learning_rate": 9.377862778134176e-06, "loss": 0.2486, "step": 60040 }, { "epoch": 1.2223918575063613, "grad_norm": 5.596728193423072, "learning_rate": 9.377519467912745e-06, "loss": 0.2406, "step": 60050 }, { "epoch": 1.2225954198473283, "grad_norm": 4.880611000048012, "learning_rate": 9.37717606928167e-06, "loss": 0.2672, "step": 60060 }, { "epoch": 1.2227989821882952, "grad_norm": 13.527772853650081, "learning_rate": 9.376832582247889e-06, "loss": 0.1741, "step": 60070 }, { "epoch": 1.223002544529262, "grad_norm": 6.385973504007002, "learning_rate": 9.376489006818336e-06, "loss": 0.2105, "step": 60080 }, { "epoch": 1.223206106870229, "grad_norm": 4.924794931164449, "learning_rate": 9.376145342999949e-06, "loss": 0.2984, "step": 60090 }, { "epoch": 1.223409669211196, "grad_norm": 9.019839673922345, "learning_rate": 9.375801590799672e-06, "loss": 0.2924, "step": 60100 }, { "epoch": 1.2236132315521628, "grad_norm": 8.687566883782758, "learning_rate": 9.375457750224445e-06, "loss": 0.2545, "step": 60110 }, { "epoch": 1.2238167938931297, "grad_norm": 13.112449241053971, "learning_rate": 9.375113821281213e-06, "loss": 0.2136, "step": 60120 }, { "epoch": 1.2240203562340968, "grad_norm": 10.22725187350968, "learning_rate": 9.374769803976921e-06, "loss": 0.3153, "step": 60130 }, { "epoch": 1.2242239185750636, "grad_norm": 15.48517460975753, "learning_rate": 9.374425698318522e-06, "loss": 0.2372, "step": 60140 }, { "epoch": 1.2244274809160305, "grad_norm": 5.3656563174816005, "learning_rate": 9.374081504312958e-06, "loss": 0.2199, "step": 60150 }, { "epoch": 1.2246310432569976, "grad_norm": 11.052682955426466, "learning_rate": 9.373737221967187e-06, "loss": 0.3275, "step": 60160 }, { "epoch": 1.2248346055979644, "grad_norm": 9.747458292603953, "learning_rate": 9.373392851288158e-06, "loss": 0.235, "step": 60170 }, { "epoch": 1.2250381679389313, "grad_norm": 3.8513525491107687, "learning_rate": 9.373048392282828e-06, "loss": 0.1834, "step": 60180 }, { "epoch": 1.2252417302798981, "grad_norm": 9.630682276761384, "learning_rate": 9.372703844958153e-06, "loss": 0.3117, "step": 60190 }, { "epoch": 1.2254452926208652, "grad_norm": 10.571627433046599, "learning_rate": 9.37235920932109e-06, "loss": 0.2214, "step": 60200 }, { "epoch": 1.225648854961832, "grad_norm": 13.73664812038217, "learning_rate": 9.372014485378604e-06, "loss": 0.4519, "step": 60210 }, { "epoch": 1.225852417302799, "grad_norm": 7.609521682585301, "learning_rate": 9.371669673137653e-06, "loss": 0.3878, "step": 60220 }, { "epoch": 1.2260559796437658, "grad_norm": 5.6307876155441825, "learning_rate": 9.371324772605204e-06, "loss": 0.2698, "step": 60230 }, { "epoch": 1.2262595419847329, "grad_norm": 4.671381230486624, "learning_rate": 9.370979783788219e-06, "loss": 0.2121, "step": 60240 }, { "epoch": 1.2264631043256997, "grad_norm": 3.1107385712207645, "learning_rate": 9.370634706693668e-06, "loss": 0.1819, "step": 60250 }, { "epoch": 1.2266666666666666, "grad_norm": 22.33494127867922, "learning_rate": 9.37028954132852e-06, "loss": 0.2018, "step": 60260 }, { "epoch": 1.2268702290076337, "grad_norm": 6.891605855789697, "learning_rate": 9.369944287699747e-06, "loss": 0.254, "step": 60270 }, { "epoch": 1.2270737913486005, "grad_norm": 21.63867647146025, "learning_rate": 9.369598945814318e-06, "loss": 0.2683, "step": 60280 }, { "epoch": 1.2272773536895674, "grad_norm": 12.133446026323679, "learning_rate": 9.369253515679211e-06, "loss": 0.3281, "step": 60290 }, { "epoch": 1.2274809160305344, "grad_norm": 13.12785123646942, "learning_rate": 9.368907997301403e-06, "loss": 0.1435, "step": 60300 }, { "epoch": 1.2276844783715013, "grad_norm": 7.217241601271341, "learning_rate": 9.36856239068787e-06, "loss": 0.3025, "step": 60310 }, { "epoch": 1.2278880407124682, "grad_norm": 6.909247866732839, "learning_rate": 9.368216695845593e-06, "loss": 0.2679, "step": 60320 }, { "epoch": 1.2280916030534352, "grad_norm": 15.600116012472704, "learning_rate": 9.367870912781554e-06, "loss": 0.1884, "step": 60330 }, { "epoch": 1.228295165394402, "grad_norm": 13.038349553238888, "learning_rate": 9.367525041502737e-06, "loss": 0.3644, "step": 60340 }, { "epoch": 1.228498727735369, "grad_norm": 24.95577172697425, "learning_rate": 9.367179082016125e-06, "loss": 0.2343, "step": 60350 }, { "epoch": 1.2287022900763358, "grad_norm": 11.749517298885056, "learning_rate": 9.366833034328706e-06, "loss": 0.1933, "step": 60360 }, { "epoch": 1.2289058524173029, "grad_norm": 9.176201002616102, "learning_rate": 9.36648689844747e-06, "loss": 0.1787, "step": 60370 }, { "epoch": 1.2291094147582697, "grad_norm": 5.152006145557368, "learning_rate": 9.366140674379409e-06, "loss": 0.2595, "step": 60380 }, { "epoch": 1.2293129770992366, "grad_norm": 10.425362274684428, "learning_rate": 9.365794362131511e-06, "loss": 0.1905, "step": 60390 }, { "epoch": 1.2295165394402034, "grad_norm": 4.492287880583968, "learning_rate": 9.365447961710773e-06, "loss": 0.2285, "step": 60400 }, { "epoch": 1.2297201017811705, "grad_norm": 5.696342491402465, "learning_rate": 9.365101473124192e-06, "loss": 0.2126, "step": 60410 }, { "epoch": 1.2299236641221374, "grad_norm": 0.8779613586930065, "learning_rate": 9.364754896378766e-06, "loss": 0.2084, "step": 60420 }, { "epoch": 1.2301272264631042, "grad_norm": 21.333405087826808, "learning_rate": 9.36440823148149e-06, "loss": 0.2176, "step": 60430 }, { "epoch": 1.2303307888040713, "grad_norm": 10.373591017126554, "learning_rate": 9.36406147843937e-06, "loss": 0.2549, "step": 60440 }, { "epoch": 1.2305343511450382, "grad_norm": 14.738950288099156, "learning_rate": 9.363714637259407e-06, "loss": 0.3813, "step": 60450 }, { "epoch": 1.230737913486005, "grad_norm": 5.6918438185398115, "learning_rate": 9.363367707948605e-06, "loss": 0.2189, "step": 60460 }, { "epoch": 1.230941475826972, "grad_norm": 7.180234984476544, "learning_rate": 9.363020690513976e-06, "loss": 0.317, "step": 60470 }, { "epoch": 1.231145038167939, "grad_norm": 10.540732361867011, "learning_rate": 9.362673584962521e-06, "loss": 0.2646, "step": 60480 }, { "epoch": 1.2313486005089058, "grad_norm": 18.948679211077295, "learning_rate": 9.362326391301256e-06, "loss": 0.2623, "step": 60490 }, { "epoch": 1.2315521628498727, "grad_norm": 5.437128364258992, "learning_rate": 9.36197910953719e-06, "loss": 0.2751, "step": 60500 }, { "epoch": 1.2317557251908398, "grad_norm": 6.983456969550471, "learning_rate": 9.361631739677337e-06, "loss": 0.2225, "step": 60510 }, { "epoch": 1.2319592875318066, "grad_norm": 8.397958568746095, "learning_rate": 9.361284281728714e-06, "loss": 0.2839, "step": 60520 }, { "epoch": 1.2321628498727735, "grad_norm": 0.5150280187967539, "learning_rate": 9.360936735698338e-06, "loss": 0.1458, "step": 60530 }, { "epoch": 1.2323664122137405, "grad_norm": 15.728174336787855, "learning_rate": 9.360589101593226e-06, "loss": 0.298, "step": 60540 }, { "epoch": 1.2325699745547074, "grad_norm": 11.712014260556037, "learning_rate": 9.360241379420404e-06, "loss": 0.2675, "step": 60550 }, { "epoch": 1.2327735368956743, "grad_norm": 9.551772538144753, "learning_rate": 9.35989356918689e-06, "loss": 0.1683, "step": 60560 }, { "epoch": 1.2329770992366411, "grad_norm": 8.77577444466578, "learning_rate": 9.35954567089971e-06, "loss": 0.2252, "step": 60570 }, { "epoch": 1.2331806615776082, "grad_norm": 19.99861412606956, "learning_rate": 9.359197684565889e-06, "loss": 0.2501, "step": 60580 }, { "epoch": 1.233384223918575, "grad_norm": 23.172464812403405, "learning_rate": 9.358849610192457e-06, "loss": 0.2942, "step": 60590 }, { "epoch": 1.233587786259542, "grad_norm": 8.653236563090738, "learning_rate": 9.358501447786442e-06, "loss": 0.3564, "step": 60600 }, { "epoch": 1.233791348600509, "grad_norm": 6.348783799058248, "learning_rate": 9.358153197354878e-06, "loss": 0.2126, "step": 60610 }, { "epoch": 1.2339949109414758, "grad_norm": 7.975493565464032, "learning_rate": 9.357804858904795e-06, "loss": 0.222, "step": 60620 }, { "epoch": 1.2341984732824427, "grad_norm": 8.539308167760526, "learning_rate": 9.357456432443233e-06, "loss": 0.2251, "step": 60630 }, { "epoch": 1.2344020356234098, "grad_norm": 6.445087893973515, "learning_rate": 9.357107917977223e-06, "loss": 0.1608, "step": 60640 }, { "epoch": 1.2346055979643766, "grad_norm": 7.626037550345333, "learning_rate": 9.356759315513807e-06, "loss": 0.307, "step": 60650 }, { "epoch": 1.2348091603053435, "grad_norm": 22.397679575928017, "learning_rate": 9.356410625060028e-06, "loss": 0.1855, "step": 60660 }, { "epoch": 1.2350127226463103, "grad_norm": 4.246393758917859, "learning_rate": 9.356061846622924e-06, "loss": 0.2319, "step": 60670 }, { "epoch": 1.2352162849872774, "grad_norm": 9.560330078400616, "learning_rate": 9.355712980209539e-06, "loss": 0.2817, "step": 60680 }, { "epoch": 1.2354198473282443, "grad_norm": 11.805568656160501, "learning_rate": 9.35536402582692e-06, "loss": 0.2313, "step": 60690 }, { "epoch": 1.2356234096692111, "grad_norm": 11.34522352319707, "learning_rate": 9.355014983482118e-06, "loss": 0.2485, "step": 60700 }, { "epoch": 1.235826972010178, "grad_norm": 23.50465776572027, "learning_rate": 9.354665853182177e-06, "loss": 0.193, "step": 60710 }, { "epoch": 1.236030534351145, "grad_norm": 8.749117763844199, "learning_rate": 9.35431663493415e-06, "loss": 0.2153, "step": 60720 }, { "epoch": 1.236234096692112, "grad_norm": 2.3463897236201805, "learning_rate": 9.35396732874509e-06, "loss": 0.2294, "step": 60730 }, { "epoch": 1.2364376590330788, "grad_norm": 0.757093164174914, "learning_rate": 9.353617934622054e-06, "loss": 0.2581, "step": 60740 }, { "epoch": 1.2366412213740459, "grad_norm": 13.653013966869441, "learning_rate": 9.353268452572096e-06, "loss": 0.3479, "step": 60750 }, { "epoch": 1.2368447837150127, "grad_norm": 4.697076028954348, "learning_rate": 9.352918882602274e-06, "loss": 0.2145, "step": 60760 }, { "epoch": 1.2370483460559796, "grad_norm": 9.153809917698029, "learning_rate": 9.352569224719648e-06, "loss": 0.2225, "step": 60770 }, { "epoch": 1.2372519083969467, "grad_norm": 6.644983444184621, "learning_rate": 9.352219478931279e-06, "loss": 0.1792, "step": 60780 }, { "epoch": 1.2374554707379135, "grad_norm": 5.269705853845757, "learning_rate": 9.351869645244235e-06, "loss": 0.1633, "step": 60790 }, { "epoch": 1.2376590330788804, "grad_norm": 10.75346140103978, "learning_rate": 9.351519723665577e-06, "loss": 0.3385, "step": 60800 }, { "epoch": 1.2378625954198474, "grad_norm": 5.866222091511469, "learning_rate": 9.351169714202374e-06, "loss": 0.1417, "step": 60810 }, { "epoch": 1.2380661577608143, "grad_norm": 7.427468369552911, "learning_rate": 9.350819616861694e-06, "loss": 0.2762, "step": 60820 }, { "epoch": 1.2382697201017812, "grad_norm": 13.0691908212147, "learning_rate": 9.350469431650609e-06, "loss": 0.3508, "step": 60830 }, { "epoch": 1.238473282442748, "grad_norm": 18.861464817588722, "learning_rate": 9.350119158576188e-06, "loss": 0.25, "step": 60840 }, { "epoch": 1.238676844783715, "grad_norm": 10.479308909735419, "learning_rate": 9.34976879764551e-06, "loss": 0.279, "step": 60850 }, { "epoch": 1.238880407124682, "grad_norm": 8.500839598977207, "learning_rate": 9.349418348865647e-06, "loss": 0.2465, "step": 60860 }, { "epoch": 1.2390839694656488, "grad_norm": 4.82482015340048, "learning_rate": 9.34906781224368e-06, "loss": 0.2187, "step": 60870 }, { "epoch": 1.2392875318066157, "grad_norm": 6.679208020115356, "learning_rate": 9.348717187786687e-06, "loss": 0.2201, "step": 60880 }, { "epoch": 1.2394910941475827, "grad_norm": 11.133109256933848, "learning_rate": 9.348366475501747e-06, "loss": 0.2948, "step": 60890 }, { "epoch": 1.2396946564885496, "grad_norm": 7.131030949505138, "learning_rate": 9.348015675395948e-06, "loss": 0.2431, "step": 60900 }, { "epoch": 1.2398982188295165, "grad_norm": 8.539193340907966, "learning_rate": 9.34766478747637e-06, "loss": 0.2537, "step": 60910 }, { "epoch": 1.2401017811704835, "grad_norm": 4.764346174943278, "learning_rate": 9.347313811750103e-06, "loss": 0.2079, "step": 60920 }, { "epoch": 1.2403053435114504, "grad_norm": 16.86109987341589, "learning_rate": 9.346962748224235e-06, "loss": 0.2731, "step": 60930 }, { "epoch": 1.2405089058524172, "grad_norm": 16.092924460582076, "learning_rate": 9.346611596905854e-06, "loss": 0.2787, "step": 60940 }, { "epoch": 1.2407124681933843, "grad_norm": 11.019341980891204, "learning_rate": 9.346260357802055e-06, "loss": 0.2263, "step": 60950 }, { "epoch": 1.2409160305343512, "grad_norm": 20.229963716658215, "learning_rate": 9.345909030919929e-06, "loss": 0.2093, "step": 60960 }, { "epoch": 1.241119592875318, "grad_norm": 13.869570744462429, "learning_rate": 9.345557616266573e-06, "loss": 0.1376, "step": 60970 }, { "epoch": 1.241323155216285, "grad_norm": 4.885314091675208, "learning_rate": 9.345206113849083e-06, "loss": 0.2333, "step": 60980 }, { "epoch": 1.241526717557252, "grad_norm": 30.242747597086552, "learning_rate": 9.34485452367456e-06, "loss": 0.2747, "step": 60990 }, { "epoch": 1.2417302798982188, "grad_norm": 8.622366936549641, "learning_rate": 9.344502845750105e-06, "loss": 0.2667, "step": 61000 }, { "epoch": 1.2419338422391857, "grad_norm": 28.631743064623617, "learning_rate": 9.344151080082817e-06, "loss": 0.2722, "step": 61010 }, { "epoch": 1.2421374045801528, "grad_norm": 7.460709090050926, "learning_rate": 9.343799226679804e-06, "loss": 0.233, "step": 61020 }, { "epoch": 1.2423409669211196, "grad_norm": 4.775669244527112, "learning_rate": 9.34344728554817e-06, "loss": 0.2076, "step": 61030 }, { "epoch": 1.2425445292620865, "grad_norm": 8.18648242280529, "learning_rate": 9.343095256695025e-06, "loss": 0.1945, "step": 61040 }, { "epoch": 1.2427480916030533, "grad_norm": 9.089468823675105, "learning_rate": 9.342743140127476e-06, "loss": 0.2841, "step": 61050 }, { "epoch": 1.2429516539440204, "grad_norm": 3.063615742134738, "learning_rate": 9.342390935852637e-06, "loss": 0.2329, "step": 61060 }, { "epoch": 1.2431552162849873, "grad_norm": 8.98911036403176, "learning_rate": 9.34203864387762e-06, "loss": 0.2255, "step": 61070 }, { "epoch": 1.2433587786259541, "grad_norm": 7.022835442185312, "learning_rate": 9.34168626420954e-06, "loss": 0.2218, "step": 61080 }, { "epoch": 1.2435623409669212, "grad_norm": 5.669805602649339, "learning_rate": 9.341333796855513e-06, "loss": 0.2729, "step": 61090 }, { "epoch": 1.243765903307888, "grad_norm": 13.814127705079867, "learning_rate": 9.340981241822659e-06, "loss": 0.23, "step": 61100 }, { "epoch": 1.243969465648855, "grad_norm": 2.782898201815961, "learning_rate": 9.340628599118098e-06, "loss": 0.1781, "step": 61110 }, { "epoch": 1.244173027989822, "grad_norm": 21.498715278546733, "learning_rate": 9.340275868748952e-06, "loss": 0.2171, "step": 61120 }, { "epoch": 1.2443765903307888, "grad_norm": 7.088465650831547, "learning_rate": 9.339923050722343e-06, "loss": 0.2057, "step": 61130 }, { "epoch": 1.2445801526717557, "grad_norm": 28.817516735225894, "learning_rate": 9.339570145045398e-06, "loss": 0.1972, "step": 61140 }, { "epoch": 1.2447837150127226, "grad_norm": 1.609092939359776, "learning_rate": 9.339217151725246e-06, "loss": 0.1991, "step": 61150 }, { "epoch": 1.2449872773536896, "grad_norm": 6.1358965516369866, "learning_rate": 9.338864070769014e-06, "loss": 0.2513, "step": 61160 }, { "epoch": 1.2451908396946565, "grad_norm": 8.636020687106855, "learning_rate": 9.338510902183835e-06, "loss": 0.2898, "step": 61170 }, { "epoch": 1.2453944020356233, "grad_norm": 3.9812911874615264, "learning_rate": 9.33815764597684e-06, "loss": 0.1932, "step": 61180 }, { "epoch": 1.2455979643765904, "grad_norm": 7.309950762530816, "learning_rate": 9.337804302155163e-06, "loss": 0.2093, "step": 61190 }, { "epoch": 1.2458015267175573, "grad_norm": 6.581424074735477, "learning_rate": 9.337450870725941e-06, "loss": 0.2302, "step": 61200 }, { "epoch": 1.2460050890585241, "grad_norm": 5.0129841461339115, "learning_rate": 9.337097351696314e-06, "loss": 0.2618, "step": 61210 }, { "epoch": 1.246208651399491, "grad_norm": 3.0196472049743472, "learning_rate": 9.336743745073417e-06, "loss": 0.3058, "step": 61220 }, { "epoch": 1.246412213740458, "grad_norm": 8.866028280906557, "learning_rate": 9.336390050864396e-06, "loss": 0.2227, "step": 61230 }, { "epoch": 1.246615776081425, "grad_norm": 9.044372779552234, "learning_rate": 9.336036269076392e-06, "loss": 0.2467, "step": 61240 }, { "epoch": 1.2468193384223918, "grad_norm": 13.73707815119326, "learning_rate": 9.335682399716552e-06, "loss": 0.2499, "step": 61250 }, { "epoch": 1.2470229007633589, "grad_norm": 7.862044688414259, "learning_rate": 9.33532844279202e-06, "loss": 0.1548, "step": 61260 }, { "epoch": 1.2472264631043257, "grad_norm": 8.157154228536008, "learning_rate": 9.334974398309949e-06, "loss": 0.2038, "step": 61270 }, { "epoch": 1.2474300254452926, "grad_norm": 18.653323431499963, "learning_rate": 9.334620266277484e-06, "loss": 0.2758, "step": 61280 }, { "epoch": 1.2476335877862597, "grad_norm": 15.264012916660468, "learning_rate": 9.33426604670178e-06, "loss": 0.362, "step": 61290 }, { "epoch": 1.2478371501272265, "grad_norm": 11.265460635540704, "learning_rate": 9.333911739589992e-06, "loss": 0.1823, "step": 61300 }, { "epoch": 1.2480407124681934, "grad_norm": 5.760235213556512, "learning_rate": 9.333557344949273e-06, "loss": 0.2066, "step": 61310 }, { "epoch": 1.2482442748091602, "grad_norm": 8.786318053546246, "learning_rate": 9.333202862786782e-06, "loss": 0.2394, "step": 61320 }, { "epoch": 1.2484478371501273, "grad_norm": 10.365758366389295, "learning_rate": 9.332848293109678e-06, "loss": 0.3569, "step": 61330 }, { "epoch": 1.2486513994910942, "grad_norm": 7.861666971798945, "learning_rate": 9.332493635925123e-06, "loss": 0.2674, "step": 61340 }, { "epoch": 1.248854961832061, "grad_norm": 10.773589398748536, "learning_rate": 9.332138891240278e-06, "loss": 0.2129, "step": 61350 }, { "epoch": 1.2490585241730279, "grad_norm": 7.959666190105155, "learning_rate": 9.331784059062309e-06, "loss": 0.2047, "step": 61360 }, { "epoch": 1.249262086513995, "grad_norm": 11.284893340530921, "learning_rate": 9.331429139398382e-06, "loss": 0.2759, "step": 61370 }, { "epoch": 1.2494656488549618, "grad_norm": 7.58879777090531, "learning_rate": 9.331074132255663e-06, "loss": 0.2389, "step": 61380 }, { "epoch": 1.2496692111959287, "grad_norm": 14.834781855417667, "learning_rate": 9.330719037641325e-06, "loss": 0.3101, "step": 61390 }, { "epoch": 1.2498727735368957, "grad_norm": 6.391208289223307, "learning_rate": 9.330363855562537e-06, "loss": 0.2527, "step": 61400 }, { "epoch": 1.2500763358778626, "grad_norm": 10.836668118889209, "learning_rate": 9.330008586026474e-06, "loss": 0.3768, "step": 61410 }, { "epoch": 1.2502798982188295, "grad_norm": 20.00961313888803, "learning_rate": 9.32965322904031e-06, "loss": 0.2334, "step": 61420 }, { "epoch": 1.2504834605597965, "grad_norm": 9.056784705265875, "learning_rate": 9.329297784611224e-06, "loss": 0.2229, "step": 61430 }, { "epoch": 1.2506870229007634, "grad_norm": 9.002963082253427, "learning_rate": 9.328942252746393e-06, "loss": 0.1932, "step": 61440 }, { "epoch": 1.2508905852417302, "grad_norm": 12.149151865242699, "learning_rate": 9.328586633452996e-06, "loss": 0.2586, "step": 61450 }, { "epoch": 1.2510941475826973, "grad_norm": 12.55376016798671, "learning_rate": 9.328230926738218e-06, "loss": 0.2745, "step": 61460 }, { "epoch": 1.2512977099236642, "grad_norm": 17.803157378162727, "learning_rate": 9.327875132609241e-06, "loss": 0.2397, "step": 61470 }, { "epoch": 1.251501272264631, "grad_norm": 5.42731954893242, "learning_rate": 9.327519251073251e-06, "loss": 0.2123, "step": 61480 }, { "epoch": 1.251704834605598, "grad_norm": 13.370784649389716, "learning_rate": 9.327163282137437e-06, "loss": 0.2445, "step": 61490 }, { "epoch": 1.2519083969465647, "grad_norm": 11.85641879056667, "learning_rate": 9.326807225808986e-06, "loss": 0.2396, "step": 61500 }, { "epoch": 1.2521119592875318, "grad_norm": 11.783564024415744, "learning_rate": 9.326451082095093e-06, "loss": 0.1824, "step": 61510 }, { "epoch": 1.2523155216284987, "grad_norm": 5.379777338636216, "learning_rate": 9.326094851002946e-06, "loss": 0.1714, "step": 61520 }, { "epoch": 1.2525190839694655, "grad_norm": 8.944019518586307, "learning_rate": 9.325738532539741e-06, "loss": 0.181, "step": 61530 }, { "epoch": 1.2527226463104326, "grad_norm": 14.550738623494425, "learning_rate": 9.325382126712676e-06, "loss": 0.2712, "step": 61540 }, { "epoch": 1.2529262086513995, "grad_norm": 0.6334826238752537, "learning_rate": 9.325025633528946e-06, "loss": 0.1918, "step": 61550 }, { "epoch": 1.2531297709923663, "grad_norm": 5.957428090272671, "learning_rate": 9.324669052995756e-06, "loss": 0.328, "step": 61560 }, { "epoch": 1.2533333333333334, "grad_norm": 6.4837046170351185, "learning_rate": 9.324312385120302e-06, "loss": 0.3797, "step": 61570 }, { "epoch": 1.2535368956743003, "grad_norm": 7.6954755953398415, "learning_rate": 9.32395562990979e-06, "loss": 0.1966, "step": 61580 }, { "epoch": 1.2537404580152671, "grad_norm": 9.363955067179544, "learning_rate": 9.323598787371424e-06, "loss": 0.2044, "step": 61590 }, { "epoch": 1.2539440203562342, "grad_norm": 16.293072268982247, "learning_rate": 9.323241857512413e-06, "loss": 0.1856, "step": 61600 }, { "epoch": 1.254147582697201, "grad_norm": 14.430138824252962, "learning_rate": 9.322884840339964e-06, "loss": 0.2368, "step": 61610 }, { "epoch": 1.254351145038168, "grad_norm": 6.368592268043632, "learning_rate": 9.322527735861287e-06, "loss": 0.3131, "step": 61620 }, { "epoch": 1.254554707379135, "grad_norm": 11.17479807930221, "learning_rate": 9.322170544083596e-06, "loss": 0.2886, "step": 61630 }, { "epoch": 1.2547582697201018, "grad_norm": 12.933699455339864, "learning_rate": 9.321813265014105e-06, "loss": 0.2849, "step": 61640 }, { "epoch": 1.2549618320610687, "grad_norm": 11.638161511895234, "learning_rate": 9.321455898660026e-06, "loss": 0.2609, "step": 61650 }, { "epoch": 1.2551653944020356, "grad_norm": 8.528416431482558, "learning_rate": 9.321098445028582e-06, "loss": 0.1693, "step": 61660 }, { "epoch": 1.2553689567430024, "grad_norm": 3.2827183117924332, "learning_rate": 9.320740904126988e-06, "loss": 0.2136, "step": 61670 }, { "epoch": 1.2555725190839695, "grad_norm": 3.16760406955219, "learning_rate": 9.320383275962465e-06, "loss": 0.2434, "step": 61680 }, { "epoch": 1.2557760814249364, "grad_norm": 9.997249595035465, "learning_rate": 9.32002556054224e-06, "loss": 0.1886, "step": 61690 }, { "epoch": 1.2559796437659032, "grad_norm": 7.39142872567501, "learning_rate": 9.319667757873532e-06, "loss": 0.2618, "step": 61700 }, { "epoch": 1.2561832061068703, "grad_norm": 11.391466346152264, "learning_rate": 9.31930986796357e-06, "loss": 0.2541, "step": 61710 }, { "epoch": 1.2563867684478371, "grad_norm": 6.240392275254172, "learning_rate": 9.318951890819585e-06, "loss": 0.2818, "step": 61720 }, { "epoch": 1.256590330788804, "grad_norm": 2.0378813187628895, "learning_rate": 9.318593826448802e-06, "loss": 0.2149, "step": 61730 }, { "epoch": 1.256793893129771, "grad_norm": 12.078758896757988, "learning_rate": 9.318235674858453e-06, "loss": 0.2036, "step": 61740 }, { "epoch": 1.256997455470738, "grad_norm": 13.200978095851678, "learning_rate": 9.317877436055775e-06, "loss": 0.2614, "step": 61750 }, { "epoch": 1.2572010178117048, "grad_norm": 14.47071017411204, "learning_rate": 9.317519110047998e-06, "loss": 0.2901, "step": 61760 }, { "epoch": 1.2574045801526719, "grad_norm": 12.012067319002345, "learning_rate": 9.317160696842364e-06, "loss": 0.2223, "step": 61770 }, { "epoch": 1.2576081424936387, "grad_norm": 1.0482601065007169, "learning_rate": 9.316802196446108e-06, "loss": 0.1907, "step": 61780 }, { "epoch": 1.2578117048346056, "grad_norm": 13.062854442398162, "learning_rate": 9.316443608866472e-06, "loss": 0.2997, "step": 61790 }, { "epoch": 1.2580152671755724, "grad_norm": 7.627925228320983, "learning_rate": 9.316084934110699e-06, "loss": 0.3065, "step": 61800 }, { "epoch": 1.2582188295165395, "grad_norm": 15.308724241321771, "learning_rate": 9.31572617218603e-06, "loss": 0.2419, "step": 61810 }, { "epoch": 1.2584223918575064, "grad_norm": 16.927561226684514, "learning_rate": 9.315367323099714e-06, "loss": 0.3159, "step": 61820 }, { "epoch": 1.2586259541984732, "grad_norm": 10.33215987148619, "learning_rate": 9.315008386858995e-06, "loss": 0.2166, "step": 61830 }, { "epoch": 1.25882951653944, "grad_norm": 9.13316400359982, "learning_rate": 9.314649363471126e-06, "loss": 0.2331, "step": 61840 }, { "epoch": 1.2590330788804072, "grad_norm": 39.60590482033762, "learning_rate": 9.314290252943355e-06, "loss": 0.1579, "step": 61850 }, { "epoch": 1.259236641221374, "grad_norm": 7.247211886434954, "learning_rate": 9.313931055282934e-06, "loss": 0.2211, "step": 61860 }, { "epoch": 1.2594402035623409, "grad_norm": 5.543555322181824, "learning_rate": 9.313571770497121e-06, "loss": 0.2506, "step": 61870 }, { "epoch": 1.259643765903308, "grad_norm": 13.335414573847292, "learning_rate": 9.313212398593168e-06, "loss": 0.3495, "step": 61880 }, { "epoch": 1.2598473282442748, "grad_norm": 0.8399178372445103, "learning_rate": 9.312852939578338e-06, "loss": 0.1518, "step": 61890 }, { "epoch": 1.2600508905852417, "grad_norm": 7.495332004472739, "learning_rate": 9.312493393459887e-06, "loss": 0.2275, "step": 61900 }, { "epoch": 1.2602544529262087, "grad_norm": 1.6282921318313666, "learning_rate": 9.312133760245076e-06, "loss": 0.1522, "step": 61910 }, { "epoch": 1.2604580152671756, "grad_norm": 7.900986493407886, "learning_rate": 9.311774039941171e-06, "loss": 0.3242, "step": 61920 }, { "epoch": 1.2606615776081425, "grad_norm": 7.536844076268218, "learning_rate": 9.311414232555435e-06, "loss": 0.3103, "step": 61930 }, { "epoch": 1.2608651399491095, "grad_norm": 3.4859998408407553, "learning_rate": 9.311054338095138e-06, "loss": 0.2297, "step": 61940 }, { "epoch": 1.2610687022900764, "grad_norm": 12.728252386147958, "learning_rate": 9.310694356567544e-06, "loss": 0.3662, "step": 61950 }, { "epoch": 1.2612722646310432, "grad_norm": 10.009883360883363, "learning_rate": 9.310334287979926e-06, "loss": 0.2597, "step": 61960 }, { "epoch": 1.26147582697201, "grad_norm": 8.326430808069214, "learning_rate": 9.309974132339555e-06, "loss": 0.3015, "step": 61970 }, { "epoch": 1.261679389312977, "grad_norm": 8.093642064951762, "learning_rate": 9.309613889653704e-06, "loss": 0.2796, "step": 61980 }, { "epoch": 1.261882951653944, "grad_norm": 9.717140687502157, "learning_rate": 9.309253559929651e-06, "loss": 0.2603, "step": 61990 }, { "epoch": 1.262086513994911, "grad_norm": 4.544016218989089, "learning_rate": 9.308893143174673e-06, "loss": 0.2145, "step": 62000 }, { "epoch": 1.2622900763358778, "grad_norm": 4.416304499741243, "learning_rate": 9.308532639396046e-06, "loss": 0.2839, "step": 62010 }, { "epoch": 1.2624936386768448, "grad_norm": 10.603569663309791, "learning_rate": 9.308172048601053e-06, "loss": 0.1872, "step": 62020 }, { "epoch": 1.2626972010178117, "grad_norm": 6.4716691372112205, "learning_rate": 9.30781137079698e-06, "loss": 0.1865, "step": 62030 }, { "epoch": 1.2629007633587785, "grad_norm": 4.247257713053318, "learning_rate": 9.307450605991104e-06, "loss": 0.2034, "step": 62040 }, { "epoch": 1.2631043256997456, "grad_norm": 7.6936476158710985, "learning_rate": 9.307089754190717e-06, "loss": 0.2931, "step": 62050 }, { "epoch": 1.2633078880407125, "grad_norm": 17.53283797628073, "learning_rate": 9.306728815403106e-06, "loss": 0.2122, "step": 62060 }, { "epoch": 1.2635114503816793, "grad_norm": 17.021551103850925, "learning_rate": 9.306367789635558e-06, "loss": 0.2037, "step": 62070 }, { "epoch": 1.2637150127226464, "grad_norm": 9.785047269926903, "learning_rate": 9.306006676895365e-06, "loss": 0.2563, "step": 62080 }, { "epoch": 1.2639185750636133, "grad_norm": 12.711498154495292, "learning_rate": 9.305645477189822e-06, "loss": 0.2414, "step": 62090 }, { "epoch": 1.2641221374045801, "grad_norm": 2.0511880078044187, "learning_rate": 9.305284190526224e-06, "loss": 0.1597, "step": 62100 }, { "epoch": 1.2643256997455472, "grad_norm": 17.424850339158734, "learning_rate": 9.304922816911866e-06, "loss": 0.2336, "step": 62110 }, { "epoch": 1.264529262086514, "grad_norm": 8.652496427839376, "learning_rate": 9.304561356354046e-06, "loss": 0.2193, "step": 62120 }, { "epoch": 1.264732824427481, "grad_norm": 6.9348725745985185, "learning_rate": 9.304199808860066e-06, "loss": 0.2403, "step": 62130 }, { "epoch": 1.2649363867684478, "grad_norm": 5.232840518625016, "learning_rate": 9.303838174437228e-06, "loss": 0.1907, "step": 62140 }, { "epoch": 1.2651399491094146, "grad_norm": 11.837888408657161, "learning_rate": 9.303476453092832e-06, "loss": 0.2258, "step": 62150 }, { "epoch": 1.2653435114503817, "grad_norm": 12.516167614171827, "learning_rate": 9.303114644834188e-06, "loss": 0.1756, "step": 62160 }, { "epoch": 1.2655470737913486, "grad_norm": 8.084586104487247, "learning_rate": 9.302752749668603e-06, "loss": 0.1627, "step": 62170 }, { "epoch": 1.2657506361323154, "grad_norm": 16.60060014824525, "learning_rate": 9.302390767603382e-06, "loss": 0.2878, "step": 62180 }, { "epoch": 1.2659541984732825, "grad_norm": 10.851226379561568, "learning_rate": 9.302028698645838e-06, "loss": 0.2386, "step": 62190 }, { "epoch": 1.2661577608142494, "grad_norm": 2.5457202940460766, "learning_rate": 9.301666542803285e-06, "loss": 0.2426, "step": 62200 }, { "epoch": 1.2663613231552162, "grad_norm": 11.915373377939748, "learning_rate": 9.301304300083034e-06, "loss": 0.216, "step": 62210 }, { "epoch": 1.2665648854961833, "grad_norm": 4.502971104552322, "learning_rate": 9.300941970492404e-06, "loss": 0.2321, "step": 62220 }, { "epoch": 1.2667684478371501, "grad_norm": 19.680464316719593, "learning_rate": 9.30057955403871e-06, "loss": 0.253, "step": 62230 }, { "epoch": 1.266972010178117, "grad_norm": 10.735900325381715, "learning_rate": 9.300217050729275e-06, "loss": 0.2827, "step": 62240 }, { "epoch": 1.267175572519084, "grad_norm": 5.7022580255838475, "learning_rate": 9.299854460571415e-06, "loss": 0.273, "step": 62250 }, { "epoch": 1.267379134860051, "grad_norm": 8.012880747555602, "learning_rate": 9.299491783572458e-06, "loss": 0.2685, "step": 62260 }, { "epoch": 1.2675826972010178, "grad_norm": 10.925271490071522, "learning_rate": 9.299129019739727e-06, "loss": 0.2458, "step": 62270 }, { "epoch": 1.2677862595419847, "grad_norm": 6.212947380443805, "learning_rate": 9.298766169080548e-06, "loss": 0.1655, "step": 62280 }, { "epoch": 1.2679898218829517, "grad_norm": 11.348578104298518, "learning_rate": 9.298403231602249e-06, "loss": 0.2162, "step": 62290 }, { "epoch": 1.2681933842239186, "grad_norm": 8.347435108095729, "learning_rate": 9.29804020731216e-06, "loss": 0.3119, "step": 62300 }, { "epoch": 1.2683969465648854, "grad_norm": 9.894486500510082, "learning_rate": 9.297677096217613e-06, "loss": 0.346, "step": 62310 }, { "epoch": 1.2686005089058523, "grad_norm": 8.972820733781559, "learning_rate": 9.297313898325943e-06, "loss": 0.342, "step": 62320 }, { "epoch": 1.2688040712468194, "grad_norm": 3.651578416195096, "learning_rate": 9.296950613644482e-06, "loss": 0.1792, "step": 62330 }, { "epoch": 1.2690076335877862, "grad_norm": 12.48320230656893, "learning_rate": 9.29658724218057e-06, "loss": 0.2868, "step": 62340 }, { "epoch": 1.269211195928753, "grad_norm": 7.9733923358656735, "learning_rate": 9.296223783941544e-06, "loss": 0.19, "step": 62350 }, { "epoch": 1.2694147582697202, "grad_norm": 20.828391623152843, "learning_rate": 9.295860238934748e-06, "loss": 0.3254, "step": 62360 }, { "epoch": 1.269618320610687, "grad_norm": 6.199859733733765, "learning_rate": 9.29549660716752e-06, "loss": 0.243, "step": 62370 }, { "epoch": 1.2698218829516539, "grad_norm": 31.23265301197443, "learning_rate": 9.295132888647204e-06, "loss": 0.197, "step": 62380 }, { "epoch": 1.270025445292621, "grad_norm": 8.065884770204319, "learning_rate": 9.294769083381147e-06, "loss": 0.2, "step": 62390 }, { "epoch": 1.2702290076335878, "grad_norm": 12.60607692020462, "learning_rate": 9.294405191376699e-06, "loss": 0.259, "step": 62400 }, { "epoch": 1.2704325699745547, "grad_norm": 2.907862405655167, "learning_rate": 9.294041212641204e-06, "loss": 0.2275, "step": 62410 }, { "epoch": 1.2706361323155217, "grad_norm": 7.757637046029035, "learning_rate": 9.29367714718202e-06, "loss": 0.2874, "step": 62420 }, { "epoch": 1.2708396946564886, "grad_norm": 9.44925496731024, "learning_rate": 9.293312995006494e-06, "loss": 0.2682, "step": 62430 }, { "epoch": 1.2710432569974555, "grad_norm": 13.191244261241259, "learning_rate": 9.292948756121982e-06, "loss": 0.1877, "step": 62440 }, { "epoch": 1.2712468193384223, "grad_norm": 20.312769639257642, "learning_rate": 9.29258443053584e-06, "loss": 0.2667, "step": 62450 }, { "epoch": 1.2714503816793892, "grad_norm": 5.861433000268758, "learning_rate": 9.292220018255427e-06, "loss": 0.242, "step": 62460 }, { "epoch": 1.2716539440203563, "grad_norm": 8.714658965024782, "learning_rate": 9.291855519288102e-06, "loss": 0.207, "step": 62470 }, { "epoch": 1.271857506361323, "grad_norm": 13.737978133684486, "learning_rate": 9.29149093364123e-06, "loss": 0.1878, "step": 62480 }, { "epoch": 1.27206106870229, "grad_norm": 12.0978557333866, "learning_rate": 9.291126261322167e-06, "loss": 0.3022, "step": 62490 }, { "epoch": 1.272264631043257, "grad_norm": 13.151028738483602, "learning_rate": 9.290761502338284e-06, "loss": 0.1747, "step": 62500 }, { "epoch": 1.272468193384224, "grad_norm": 3.1647440776691957, "learning_rate": 9.290396656696945e-06, "loss": 0.168, "step": 62510 }, { "epoch": 1.2726717557251908, "grad_norm": 14.784177649671303, "learning_rate": 9.290031724405522e-06, "loss": 0.2755, "step": 62520 }, { "epoch": 1.2728753180661578, "grad_norm": 11.64518008079941, "learning_rate": 9.28966670547138e-06, "loss": 0.2209, "step": 62530 }, { "epoch": 1.2730788804071247, "grad_norm": 15.023296006926673, "learning_rate": 9.289301599901895e-06, "loss": 0.2126, "step": 62540 }, { "epoch": 1.2732824427480915, "grad_norm": 12.23795055731681, "learning_rate": 9.28893640770444e-06, "loss": 0.3022, "step": 62550 }, { "epoch": 1.2734860050890586, "grad_norm": 1.2330192840862377, "learning_rate": 9.28857112888639e-06, "loss": 0.1891, "step": 62560 }, { "epoch": 1.2736895674300255, "grad_norm": 0.16186607577036838, "learning_rate": 9.288205763455123e-06, "loss": 0.206, "step": 62570 }, { "epoch": 1.2738931297709923, "grad_norm": 12.041543942899146, "learning_rate": 9.287840311418016e-06, "loss": 0.2544, "step": 62580 }, { "epoch": 1.2740966921119594, "grad_norm": 11.972771333822601, "learning_rate": 9.287474772782452e-06, "loss": 0.2363, "step": 62590 }, { "epoch": 1.2743002544529263, "grad_norm": 15.380359820995091, "learning_rate": 9.287109147555814e-06, "loss": 0.204, "step": 62600 }, { "epoch": 1.2745038167938931, "grad_norm": 16.096767378587604, "learning_rate": 9.286743435745483e-06, "loss": 0.2947, "step": 62610 }, { "epoch": 1.27470737913486, "grad_norm": 8.918952463642388, "learning_rate": 9.286377637358847e-06, "loss": 0.3212, "step": 62620 }, { "epoch": 1.2749109414758268, "grad_norm": 10.801324311930118, "learning_rate": 9.286011752403294e-06, "loss": 0.2236, "step": 62630 }, { "epoch": 1.275114503816794, "grad_norm": 10.120481570076759, "learning_rate": 9.285645780886214e-06, "loss": 0.187, "step": 62640 }, { "epoch": 1.2753180661577608, "grad_norm": 4.08990546014614, "learning_rate": 9.285279722814998e-06, "loss": 0.1403, "step": 62650 }, { "epoch": 1.2755216284987276, "grad_norm": 10.593966228869537, "learning_rate": 9.284913578197038e-06, "loss": 0.279, "step": 62660 }, { "epoch": 1.2757251908396947, "grad_norm": 3.729508789601434, "learning_rate": 9.28454734703973e-06, "loss": 0.1983, "step": 62670 }, { "epoch": 1.2759287531806616, "grad_norm": 4.481315188300955, "learning_rate": 9.284181029350469e-06, "loss": 0.2775, "step": 62680 }, { "epoch": 1.2761323155216284, "grad_norm": 8.566282782919394, "learning_rate": 9.283814625136653e-06, "loss": 0.1594, "step": 62690 }, { "epoch": 1.2763358778625955, "grad_norm": 8.115772573787734, "learning_rate": 9.283448134405687e-06, "loss": 0.2668, "step": 62700 }, { "epoch": 1.2765394402035624, "grad_norm": 20.085700429593714, "learning_rate": 9.283081557164966e-06, "loss": 0.2353, "step": 62710 }, { "epoch": 1.2767430025445292, "grad_norm": 11.754020177999775, "learning_rate": 9.282714893421896e-06, "loss": 0.2632, "step": 62720 }, { "epoch": 1.2769465648854963, "grad_norm": 4.9790248650204125, "learning_rate": 9.282348143183883e-06, "loss": 0.2689, "step": 62730 }, { "epoch": 1.2771501272264631, "grad_norm": 6.4538555516238825, "learning_rate": 9.281981306458335e-06, "loss": 0.2373, "step": 62740 }, { "epoch": 1.27735368956743, "grad_norm": 1.8526381523094246, "learning_rate": 9.281614383252657e-06, "loss": 0.1864, "step": 62750 }, { "epoch": 1.277557251908397, "grad_norm": 14.400074113145527, "learning_rate": 9.281247373574263e-06, "loss": 0.2549, "step": 62760 }, { "epoch": 1.277760814249364, "grad_norm": 11.83432449069799, "learning_rate": 9.280880277430565e-06, "loss": 0.2259, "step": 62770 }, { "epoch": 1.2779643765903308, "grad_norm": 15.061109996838237, "learning_rate": 9.280513094828977e-06, "loss": 0.2718, "step": 62780 }, { "epoch": 1.2781679389312977, "grad_norm": 15.56161332944032, "learning_rate": 9.28014582577691e-06, "loss": 0.2386, "step": 62790 }, { "epoch": 1.2783715012722645, "grad_norm": 13.055802545165953, "learning_rate": 9.279778470281788e-06, "loss": 0.3418, "step": 62800 }, { "epoch": 1.2785750636132316, "grad_norm": 10.956483049777756, "learning_rate": 9.279411028351026e-06, "loss": 0.173, "step": 62810 }, { "epoch": 1.2787786259541984, "grad_norm": 29.56803626071591, "learning_rate": 9.27904349999205e-06, "loss": 0.1961, "step": 62820 }, { "epoch": 1.2789821882951653, "grad_norm": 10.461324351679787, "learning_rate": 9.278675885212274e-06, "loss": 0.1963, "step": 62830 }, { "epoch": 1.2791857506361324, "grad_norm": 12.963292565410322, "learning_rate": 9.278308184019132e-06, "loss": 0.2803, "step": 62840 }, { "epoch": 1.2793893129770992, "grad_norm": 11.285870814923758, "learning_rate": 9.277940396420043e-06, "loss": 0.177, "step": 62850 }, { "epoch": 1.279592875318066, "grad_norm": 13.80143046700477, "learning_rate": 9.277572522422438e-06, "loss": 0.2052, "step": 62860 }, { "epoch": 1.2797964376590332, "grad_norm": 5.800210975464363, "learning_rate": 9.277204562033748e-06, "loss": 0.2351, "step": 62870 }, { "epoch": 1.28, "grad_norm": 8.714868913585454, "learning_rate": 9.276836515261401e-06, "loss": 0.2245, "step": 62880 }, { "epoch": 1.2802035623409669, "grad_norm": 7.659473617742692, "learning_rate": 9.276468382112833e-06, "loss": 0.2692, "step": 62890 }, { "epoch": 1.280407124681934, "grad_norm": 1.7800280169984857, "learning_rate": 9.276100162595478e-06, "loss": 0.2776, "step": 62900 }, { "epoch": 1.2806106870229008, "grad_norm": 8.928224185283936, "learning_rate": 9.275731856716775e-06, "loss": 0.2529, "step": 62910 }, { "epoch": 1.2808142493638677, "grad_norm": 17.437978950311667, "learning_rate": 9.275363464484155e-06, "loss": 0.3035, "step": 62920 }, { "epoch": 1.2810178117048345, "grad_norm": 9.959134304713432, "learning_rate": 9.274994985905067e-06, "loss": 0.3236, "step": 62930 }, { "epoch": 1.2812213740458016, "grad_norm": 12.446242547410012, "learning_rate": 9.274626420986949e-06, "loss": 0.2759, "step": 62940 }, { "epoch": 1.2814249363867685, "grad_norm": 10.096818463705741, "learning_rate": 9.274257769737243e-06, "loss": 0.2528, "step": 62950 }, { "epoch": 1.2816284987277353, "grad_norm": 8.442240943307118, "learning_rate": 9.273889032163397e-06, "loss": 0.2358, "step": 62960 }, { "epoch": 1.2818320610687022, "grad_norm": 14.078969696527446, "learning_rate": 9.273520208272857e-06, "loss": 0.1454, "step": 62970 }, { "epoch": 1.2820356234096693, "grad_norm": 16.94183278967005, "learning_rate": 9.273151298073073e-06, "loss": 0.263, "step": 62980 }, { "epoch": 1.2822391857506361, "grad_norm": 18.14023704071741, "learning_rate": 9.272782301571493e-06, "loss": 0.2686, "step": 62990 }, { "epoch": 1.282442748091603, "grad_norm": 20.55599926375617, "learning_rate": 9.272413218775573e-06, "loss": 0.2587, "step": 63000 }, { "epoch": 1.28264631043257, "grad_norm": 9.330902445610235, "learning_rate": 9.272044049692764e-06, "loss": 0.2456, "step": 63010 }, { "epoch": 1.282849872773537, "grad_norm": 4.392254198322061, "learning_rate": 9.271674794330524e-06, "loss": 0.2379, "step": 63020 }, { "epoch": 1.2830534351145038, "grad_norm": 7.426684621833082, "learning_rate": 9.271305452696309e-06, "loss": 0.2944, "step": 63030 }, { "epoch": 1.2832569974554708, "grad_norm": 15.825313277594931, "learning_rate": 9.270936024797579e-06, "loss": 0.1727, "step": 63040 }, { "epoch": 1.2834605597964377, "grad_norm": 11.36554846041083, "learning_rate": 9.270566510641796e-06, "loss": 0.2954, "step": 63050 }, { "epoch": 1.2836641221374046, "grad_norm": 13.374701375927934, "learning_rate": 9.27019691023642e-06, "loss": 0.3001, "step": 63060 }, { "epoch": 1.2838676844783716, "grad_norm": 7.9289643559116465, "learning_rate": 9.269827223588918e-06, "loss": 0.235, "step": 63070 }, { "epoch": 1.2840712468193385, "grad_norm": 5.102967606260312, "learning_rate": 9.269457450706757e-06, "loss": 0.2, "step": 63080 }, { "epoch": 1.2842748091603053, "grad_norm": 7.029645730713625, "learning_rate": 9.269087591597402e-06, "loss": 0.3784, "step": 63090 }, { "epoch": 1.2844783715012722, "grad_norm": 15.419787354589097, "learning_rate": 9.268717646268325e-06, "loss": 0.2995, "step": 63100 }, { "epoch": 1.284681933842239, "grad_norm": 4.53461128065422, "learning_rate": 9.268347614726998e-06, "loss": 0.2507, "step": 63110 }, { "epoch": 1.2848854961832061, "grad_norm": 11.972128663302994, "learning_rate": 9.267977496980894e-06, "loss": 0.2113, "step": 63120 }, { "epoch": 1.285089058524173, "grad_norm": 8.588268957874059, "learning_rate": 9.267607293037486e-06, "loss": 0.2457, "step": 63130 }, { "epoch": 1.2852926208651398, "grad_norm": 2.0247165358882677, "learning_rate": 9.267237002904252e-06, "loss": 0.2734, "step": 63140 }, { "epoch": 1.285496183206107, "grad_norm": 3.510388637209143, "learning_rate": 9.26686662658867e-06, "loss": 0.1583, "step": 63150 }, { "epoch": 1.2856997455470738, "grad_norm": 19.715502480529327, "learning_rate": 9.266496164098221e-06, "loss": 0.3306, "step": 63160 }, { "epoch": 1.2859033078880406, "grad_norm": 5.424321784121892, "learning_rate": 9.26612561544039e-06, "loss": 0.2741, "step": 63170 }, { "epoch": 1.2861068702290077, "grad_norm": 9.003788607751165, "learning_rate": 9.265754980622655e-06, "loss": 0.2261, "step": 63180 }, { "epoch": 1.2863104325699746, "grad_norm": 9.111059078654653, "learning_rate": 9.265384259652503e-06, "loss": 0.1663, "step": 63190 }, { "epoch": 1.2865139949109414, "grad_norm": 5.017570335670404, "learning_rate": 9.265013452537422e-06, "loss": 0.2344, "step": 63200 }, { "epoch": 1.2867175572519085, "grad_norm": 8.675179770889194, "learning_rate": 9.264642559284902e-06, "loss": 0.2565, "step": 63210 }, { "epoch": 1.2869211195928754, "grad_norm": 26.551546916793853, "learning_rate": 9.264271579902433e-06, "loss": 0.2313, "step": 63220 }, { "epoch": 1.2871246819338422, "grad_norm": 19.17833492873261, "learning_rate": 9.263900514397506e-06, "loss": 0.2566, "step": 63230 }, { "epoch": 1.2873282442748093, "grad_norm": 8.464291006604583, "learning_rate": 9.263529362777618e-06, "loss": 0.2369, "step": 63240 }, { "epoch": 1.2875318066157762, "grad_norm": 12.017192439621953, "learning_rate": 9.263158125050262e-06, "loss": 0.2951, "step": 63250 }, { "epoch": 1.287735368956743, "grad_norm": 9.186091286605892, "learning_rate": 9.262786801222936e-06, "loss": 0.2663, "step": 63260 }, { "epoch": 1.2879389312977099, "grad_norm": 3.14948528498508, "learning_rate": 9.26241539130314e-06, "loss": 0.2268, "step": 63270 }, { "epoch": 1.2881424936386767, "grad_norm": 11.1226329044047, "learning_rate": 9.262043895298378e-06, "loss": 0.2013, "step": 63280 }, { "epoch": 1.2883460559796438, "grad_norm": 4.0161753354024015, "learning_rate": 9.261672313216148e-06, "loss": 0.2331, "step": 63290 }, { "epoch": 1.2885496183206107, "grad_norm": 8.48599030084119, "learning_rate": 9.261300645063957e-06, "loss": 0.2975, "step": 63300 }, { "epoch": 1.2887531806615775, "grad_norm": 7.611990899709241, "learning_rate": 9.260928890849312e-06, "loss": 0.1912, "step": 63310 }, { "epoch": 1.2889567430025446, "grad_norm": 7.418571945890036, "learning_rate": 9.260557050579718e-06, "loss": 0.1759, "step": 63320 }, { "epoch": 1.2891603053435114, "grad_norm": 38.06273609733072, "learning_rate": 9.260185124262688e-06, "loss": 0.2613, "step": 63330 }, { "epoch": 1.2893638676844783, "grad_norm": 5.020703550282339, "learning_rate": 9.259813111905732e-06, "loss": 0.2827, "step": 63340 }, { "epoch": 1.2895674300254454, "grad_norm": 25.101978880161358, "learning_rate": 9.259441013516364e-06, "loss": 0.2563, "step": 63350 }, { "epoch": 1.2897709923664122, "grad_norm": 7.154861467417781, "learning_rate": 9.259068829102098e-06, "loss": 0.2493, "step": 63360 }, { "epoch": 1.289974554707379, "grad_norm": 2.431183713767726, "learning_rate": 9.25869655867045e-06, "loss": 0.2609, "step": 63370 }, { "epoch": 1.2901781170483462, "grad_norm": 3.113105573796979, "learning_rate": 9.258324202228943e-06, "loss": 0.2363, "step": 63380 }, { "epoch": 1.290381679389313, "grad_norm": 9.606074116256433, "learning_rate": 9.257951759785094e-06, "loss": 0.2364, "step": 63390 }, { "epoch": 1.2905852417302799, "grad_norm": 2.9415767997493263, "learning_rate": 9.257579231346421e-06, "loss": 0.2057, "step": 63400 }, { "epoch": 1.2907888040712467, "grad_norm": 4.87819222277654, "learning_rate": 9.257206616920455e-06, "loss": 0.2487, "step": 63410 }, { "epoch": 1.2909923664122138, "grad_norm": 5.790407932722567, "learning_rate": 9.256833916514719e-06, "loss": 0.2263, "step": 63420 }, { "epoch": 1.2911959287531807, "grad_norm": 6.965972303479704, "learning_rate": 9.256461130136736e-06, "loss": 0.3871, "step": 63430 }, { "epoch": 1.2913994910941475, "grad_norm": 20.90995734612994, "learning_rate": 9.25608825779404e-06, "loss": 0.1703, "step": 63440 }, { "epoch": 1.2916030534351144, "grad_norm": 6.189262564874956, "learning_rate": 9.25571529949416e-06, "loss": 0.2846, "step": 63450 }, { "epoch": 1.2918066157760815, "grad_norm": 3.1112097225684274, "learning_rate": 9.255342255244628e-06, "loss": 0.2189, "step": 63460 }, { "epoch": 1.2920101781170483, "grad_norm": 5.931644074474312, "learning_rate": 9.254969125052977e-06, "loss": 0.2252, "step": 63470 }, { "epoch": 1.2922137404580152, "grad_norm": 3.6011900807270987, "learning_rate": 9.254595908926745e-06, "loss": 0.2314, "step": 63480 }, { "epoch": 1.2924173027989823, "grad_norm": 3.183264964998863, "learning_rate": 9.254222606873471e-06, "loss": 0.2618, "step": 63490 }, { "epoch": 1.2926208651399491, "grad_norm": 6.245195156651885, "learning_rate": 9.25384921890069e-06, "loss": 0.166, "step": 63500 }, { "epoch": 1.292824427480916, "grad_norm": 3.909075308777305, "learning_rate": 9.253475745015945e-06, "loss": 0.1684, "step": 63510 }, { "epoch": 1.293027989821883, "grad_norm": 22.803319166540593, "learning_rate": 9.253102185226778e-06, "loss": 0.2479, "step": 63520 }, { "epoch": 1.29323155216285, "grad_norm": 12.786831907704425, "learning_rate": 9.252728539540736e-06, "loss": 0.3385, "step": 63530 }, { "epoch": 1.2934351145038168, "grad_norm": 1.3746425668876923, "learning_rate": 9.252354807965364e-06, "loss": 0.2077, "step": 63540 }, { "epoch": 1.2936386768447838, "grad_norm": 7.979096533558619, "learning_rate": 9.25198099050821e-06, "loss": 0.2036, "step": 63550 }, { "epoch": 1.2938422391857507, "grad_norm": 8.349353415549059, "learning_rate": 9.251607087176821e-06, "loss": 0.2155, "step": 63560 }, { "epoch": 1.2940458015267176, "grad_norm": 8.026546957321958, "learning_rate": 9.251233097978753e-06, "loss": 0.233, "step": 63570 }, { "epoch": 1.2942493638676844, "grad_norm": 11.211208354294103, "learning_rate": 9.250859022921555e-06, "loss": 0.2278, "step": 63580 }, { "epoch": 1.2944529262086513, "grad_norm": 8.724038742143643, "learning_rate": 9.250484862012784e-06, "loss": 0.2192, "step": 63590 }, { "epoch": 1.2946564885496183, "grad_norm": 11.673426416860844, "learning_rate": 9.25011061526e-06, "loss": 0.2499, "step": 63600 }, { "epoch": 1.2948600508905852, "grad_norm": 5.0173838561427235, "learning_rate": 9.249736282670755e-06, "loss": 0.2569, "step": 63610 }, { "epoch": 1.295063613231552, "grad_norm": 12.179020210450123, "learning_rate": 9.249361864252614e-06, "loss": 0.3109, "step": 63620 }, { "epoch": 1.2952671755725191, "grad_norm": 5.86292656403356, "learning_rate": 9.248987360013135e-06, "loss": 0.2596, "step": 63630 }, { "epoch": 1.295470737913486, "grad_norm": 8.174730753001146, "learning_rate": 9.248612769959884e-06, "loss": 0.1224, "step": 63640 }, { "epoch": 1.2956743002544528, "grad_norm": 0.3063594180530029, "learning_rate": 9.248238094100427e-06, "loss": 0.2296, "step": 63650 }, { "epoch": 1.29587786259542, "grad_norm": 12.925216734604733, "learning_rate": 9.24786333244233e-06, "loss": 0.3071, "step": 63660 }, { "epoch": 1.2960814249363868, "grad_norm": 4.440501294873726, "learning_rate": 9.24748848499316e-06, "loss": 0.2029, "step": 63670 }, { "epoch": 1.2962849872773536, "grad_norm": 18.52606233401469, "learning_rate": 9.247113551760491e-06, "loss": 0.2149, "step": 63680 }, { "epoch": 1.2964885496183207, "grad_norm": 2.7327629303971306, "learning_rate": 9.246738532751894e-06, "loss": 0.1834, "step": 63690 }, { "epoch": 1.2966921119592876, "grad_norm": 10.673402751209434, "learning_rate": 9.246363427974942e-06, "loss": 0.2431, "step": 63700 }, { "epoch": 1.2968956743002544, "grad_norm": 9.261665302753881, "learning_rate": 9.24598823743721e-06, "loss": 0.2259, "step": 63710 }, { "epoch": 1.2970992366412215, "grad_norm": 6.565032076411583, "learning_rate": 9.245612961146279e-06, "loss": 0.3326, "step": 63720 }, { "epoch": 1.2973027989821884, "grad_norm": 5.4816217077509375, "learning_rate": 9.245237599109724e-06, "loss": 0.1936, "step": 63730 }, { "epoch": 1.2975063613231552, "grad_norm": 17.737791531841406, "learning_rate": 9.24486215133513e-06, "loss": 0.2038, "step": 63740 }, { "epoch": 1.297709923664122, "grad_norm": 2.2708596615339753, "learning_rate": 9.244486617830075e-06, "loss": 0.2277, "step": 63750 }, { "epoch": 1.297913486005089, "grad_norm": 8.92083005590968, "learning_rate": 9.244110998602147e-06, "loss": 0.2495, "step": 63760 }, { "epoch": 1.298117048346056, "grad_norm": 6.279793389478657, "learning_rate": 9.243735293658932e-06, "loss": 0.2676, "step": 63770 }, { "epoch": 1.2983206106870229, "grad_norm": 5.819445415507942, "learning_rate": 9.243359503008015e-06, "loss": 0.163, "step": 63780 }, { "epoch": 1.2985241730279897, "grad_norm": 8.39293236296076, "learning_rate": 9.242983626656988e-06, "loss": 0.1846, "step": 63790 }, { "epoch": 1.2987277353689568, "grad_norm": 8.13654229513274, "learning_rate": 9.242607664613443e-06, "loss": 0.2168, "step": 63800 }, { "epoch": 1.2989312977099237, "grad_norm": 5.709237033134925, "learning_rate": 9.242231616884969e-06, "loss": 0.248, "step": 63810 }, { "epoch": 1.2991348600508905, "grad_norm": 28.813065431025073, "learning_rate": 9.241855483479166e-06, "loss": 0.2969, "step": 63820 }, { "epoch": 1.2993384223918576, "grad_norm": 7.843238619623, "learning_rate": 9.241479264403627e-06, "loss": 0.2345, "step": 63830 }, { "epoch": 1.2995419847328245, "grad_norm": 13.667971464033657, "learning_rate": 9.241102959665953e-06, "loss": 0.2411, "step": 63840 }, { "epoch": 1.2997455470737913, "grad_norm": 7.936751385885887, "learning_rate": 9.24072656927374e-06, "loss": 0.2063, "step": 63850 }, { "epoch": 1.2999491094147584, "grad_norm": 0.8164873318113453, "learning_rate": 9.240350093234594e-06, "loss": 0.1803, "step": 63860 }, { "epoch": 1.3001526717557252, "grad_norm": 0.9528506520861767, "learning_rate": 9.239973531556115e-06, "loss": 0.1793, "step": 63870 }, { "epoch": 1.300356234096692, "grad_norm": 0.23176147881669928, "learning_rate": 9.23959688424591e-06, "loss": 0.2809, "step": 63880 }, { "epoch": 1.300559796437659, "grad_norm": 5.07001289886942, "learning_rate": 9.239220151311585e-06, "loss": 0.2233, "step": 63890 }, { "epoch": 1.300763358778626, "grad_norm": 15.484256362283872, "learning_rate": 9.238843332760747e-06, "loss": 0.3055, "step": 63900 }, { "epoch": 1.300966921119593, "grad_norm": 8.145687358729129, "learning_rate": 9.238466428601013e-06, "loss": 0.2511, "step": 63910 }, { "epoch": 1.3011704834605597, "grad_norm": 7.127339411440428, "learning_rate": 9.238089438839986e-06, "loss": 0.2208, "step": 63920 }, { "epoch": 1.3013740458015266, "grad_norm": 3.365652612140983, "learning_rate": 9.237712363485286e-06, "loss": 0.2471, "step": 63930 }, { "epoch": 1.3015776081424937, "grad_norm": 9.506596480578565, "learning_rate": 9.237335202544527e-06, "loss": 0.1674, "step": 63940 }, { "epoch": 1.3017811704834605, "grad_norm": 13.852918766966022, "learning_rate": 9.236957956025326e-06, "loss": 0.2239, "step": 63950 }, { "epoch": 1.3019847328244274, "grad_norm": 16.66337346677674, "learning_rate": 9.2365806239353e-06, "loss": 0.3263, "step": 63960 }, { "epoch": 1.3021882951653945, "grad_norm": 4.435562247381259, "learning_rate": 9.236203206282074e-06, "loss": 0.193, "step": 63970 }, { "epoch": 1.3023918575063613, "grad_norm": 12.501042623928523, "learning_rate": 9.235825703073266e-06, "loss": 0.2269, "step": 63980 }, { "epoch": 1.3025954198473282, "grad_norm": 13.461749381088927, "learning_rate": 9.235448114316504e-06, "loss": 0.2623, "step": 63990 }, { "epoch": 1.3027989821882953, "grad_norm": 19.917727756721423, "learning_rate": 9.235070440019412e-06, "loss": 0.2479, "step": 64000 }, { "epoch": 1.3030025445292621, "grad_norm": 11.969562571905366, "learning_rate": 9.234692680189616e-06, "loss": 0.1774, "step": 64010 }, { "epoch": 1.303206106870229, "grad_norm": 11.65075270221852, "learning_rate": 9.234314834834748e-06, "loss": 0.221, "step": 64020 }, { "epoch": 1.303409669211196, "grad_norm": 11.003288683529158, "learning_rate": 9.23393690396244e-06, "loss": 0.2233, "step": 64030 }, { "epoch": 1.303613231552163, "grad_norm": 6.1899230314206415, "learning_rate": 9.23355888758032e-06, "loss": 0.2459, "step": 64040 }, { "epoch": 1.3038167938931298, "grad_norm": 8.452462601428007, "learning_rate": 9.233180785696027e-06, "loss": 0.3345, "step": 64050 }, { "epoch": 1.3040203562340966, "grad_norm": 9.711052565018692, "learning_rate": 9.232802598317197e-06, "loss": 0.263, "step": 64060 }, { "epoch": 1.3042239185750635, "grad_norm": 6.954299137193541, "learning_rate": 9.232424325451463e-06, "loss": 0.2854, "step": 64070 }, { "epoch": 1.3044274809160306, "grad_norm": 3.804730445713467, "learning_rate": 9.232045967106471e-06, "loss": 0.1716, "step": 64080 }, { "epoch": 1.3046310432569974, "grad_norm": 0.29510781305779027, "learning_rate": 9.23166752328986e-06, "loss": 0.227, "step": 64090 }, { "epoch": 1.3048346055979643, "grad_norm": 1.8298251625198565, "learning_rate": 9.231288994009274e-06, "loss": 0.2332, "step": 64100 }, { "epoch": 1.3050381679389313, "grad_norm": 7.781390461131923, "learning_rate": 9.230910379272354e-06, "loss": 0.2083, "step": 64110 }, { "epoch": 1.3052417302798982, "grad_norm": 4.772071358774873, "learning_rate": 9.230531679086752e-06, "loss": 0.2535, "step": 64120 }, { "epoch": 1.305445292620865, "grad_norm": 9.577550294337852, "learning_rate": 9.230152893460111e-06, "loss": 0.2742, "step": 64130 }, { "epoch": 1.3056488549618321, "grad_norm": 9.373988773802512, "learning_rate": 9.229774022400086e-06, "loss": 0.2518, "step": 64140 }, { "epoch": 1.305852417302799, "grad_norm": 8.594191261387722, "learning_rate": 9.229395065914328e-06, "loss": 0.3022, "step": 64150 }, { "epoch": 1.3060559796437659, "grad_norm": 10.102456081957994, "learning_rate": 9.229016024010487e-06, "loss": 0.1634, "step": 64160 }, { "epoch": 1.306259541984733, "grad_norm": 8.703771839897746, "learning_rate": 9.228636896696222e-06, "loss": 0.2486, "step": 64170 }, { "epoch": 1.3064631043256998, "grad_norm": 11.512300517941087, "learning_rate": 9.228257683979186e-06, "loss": 0.3002, "step": 64180 }, { "epoch": 1.3066666666666666, "grad_norm": 17.48525243596456, "learning_rate": 9.227878385867043e-06, "loss": 0.2339, "step": 64190 }, { "epoch": 1.3068702290076337, "grad_norm": 15.777959098764297, "learning_rate": 9.227499002367449e-06, "loss": 0.2211, "step": 64200 }, { "epoch": 1.3070737913486006, "grad_norm": 7.956828015829866, "learning_rate": 9.227119533488068e-06, "loss": 0.3145, "step": 64210 }, { "epoch": 1.3072773536895674, "grad_norm": 65.51257578604404, "learning_rate": 9.226739979236563e-06, "loss": 0.2909, "step": 64220 }, { "epoch": 1.3074809160305343, "grad_norm": 5.220011539092494, "learning_rate": 9.226360339620602e-06, "loss": 0.216, "step": 64230 }, { "epoch": 1.3076844783715011, "grad_norm": 15.12455460218524, "learning_rate": 9.225980614647847e-06, "loss": 0.2663, "step": 64240 }, { "epoch": 1.3078880407124682, "grad_norm": 7.126237899664543, "learning_rate": 9.225600804325973e-06, "loss": 0.2558, "step": 64250 }, { "epoch": 1.308091603053435, "grad_norm": 3.0220912593635942, "learning_rate": 9.225220908662649e-06, "loss": 0.2325, "step": 64260 }, { "epoch": 1.308295165394402, "grad_norm": 8.473676318257908, "learning_rate": 9.224840927665544e-06, "loss": 0.2859, "step": 64270 }, { "epoch": 1.308498727735369, "grad_norm": 9.991985494968937, "learning_rate": 9.224460861342337e-06, "loss": 0.2021, "step": 64280 }, { "epoch": 1.3087022900763359, "grad_norm": 16.228378804609616, "learning_rate": 9.224080709700701e-06, "loss": 0.2359, "step": 64290 }, { "epoch": 1.3089058524173027, "grad_norm": 0.9520061814984934, "learning_rate": 9.223700472748313e-06, "loss": 0.1568, "step": 64300 }, { "epoch": 1.3091094147582698, "grad_norm": 6.360516732666854, "learning_rate": 9.223320150492857e-06, "loss": 0.2631, "step": 64310 }, { "epoch": 1.3093129770992367, "grad_norm": 4.894959019550732, "learning_rate": 9.22293974294201e-06, "loss": 0.2001, "step": 64320 }, { "epoch": 1.3095165394402035, "grad_norm": 16.67738285035632, "learning_rate": 9.222559250103456e-06, "loss": 0.2952, "step": 64330 }, { "epoch": 1.3097201017811706, "grad_norm": 0.09401256036291011, "learning_rate": 9.222178671984879e-06, "loss": 0.2057, "step": 64340 }, { "epoch": 1.3099236641221375, "grad_norm": 14.869429751876508, "learning_rate": 9.221798008593962e-06, "loss": 0.2592, "step": 64350 }, { "epoch": 1.3101272264631043, "grad_norm": 4.998953249353408, "learning_rate": 9.221417259938402e-06, "loss": 0.1603, "step": 64360 }, { "epoch": 1.3103307888040712, "grad_norm": 18.52985074718985, "learning_rate": 9.22103642602588e-06, "loss": 0.3008, "step": 64370 }, { "epoch": 1.3105343511450382, "grad_norm": 4.161139071796782, "learning_rate": 9.220655506864092e-06, "loss": 0.2651, "step": 64380 }, { "epoch": 1.310737913486005, "grad_norm": 3.94029486407037, "learning_rate": 9.22027450246073e-06, "loss": 0.2577, "step": 64390 }, { "epoch": 1.310941475826972, "grad_norm": 0.1572904023230103, "learning_rate": 9.219893412823488e-06, "loss": 0.2079, "step": 64400 }, { "epoch": 1.3111450381679388, "grad_norm": 11.451000255277743, "learning_rate": 9.219512237960064e-06, "loss": 0.1575, "step": 64410 }, { "epoch": 1.311348600508906, "grad_norm": 8.875016897607056, "learning_rate": 9.219130977878155e-06, "loss": 0.2463, "step": 64420 }, { "epoch": 1.3115521628498727, "grad_norm": 5.7692157469932726, "learning_rate": 9.218749632585462e-06, "loss": 0.2859, "step": 64430 }, { "epoch": 1.3117557251908396, "grad_norm": 8.237257760052431, "learning_rate": 9.218368202089687e-06, "loss": 0.3111, "step": 64440 }, { "epoch": 1.3119592875318067, "grad_norm": 5.741367986745894, "learning_rate": 9.217986686398533e-06, "loss": 0.2081, "step": 64450 }, { "epoch": 1.3121628498727735, "grad_norm": 6.111111229280269, "learning_rate": 9.217605085519704e-06, "loss": 0.2012, "step": 64460 }, { "epoch": 1.3123664122137404, "grad_norm": 7.89455848680368, "learning_rate": 9.217223399460909e-06, "loss": 0.2312, "step": 64470 }, { "epoch": 1.3125699745547075, "grad_norm": 18.2459281367943, "learning_rate": 9.216841628229856e-06, "loss": 0.2707, "step": 64480 }, { "epoch": 1.3127735368956743, "grad_norm": 6.591842240202029, "learning_rate": 9.216459771834253e-06, "loss": 0.1674, "step": 64490 }, { "epoch": 1.3129770992366412, "grad_norm": 5.921410381384855, "learning_rate": 9.216077830281817e-06, "loss": 0.2854, "step": 64500 }, { "epoch": 1.3131806615776083, "grad_norm": 11.620922029925094, "learning_rate": 9.215695803580257e-06, "loss": 0.2338, "step": 64510 }, { "epoch": 1.3133842239185751, "grad_norm": 6.7424123432641085, "learning_rate": 9.215313691737292e-06, "loss": 0.3833, "step": 64520 }, { "epoch": 1.313587786259542, "grad_norm": 10.461940531366325, "learning_rate": 9.214931494760635e-06, "loss": 0.265, "step": 64530 }, { "epoch": 1.3137913486005088, "grad_norm": 5.604816660783339, "learning_rate": 9.21454921265801e-06, "loss": 0.2365, "step": 64540 }, { "epoch": 1.3139949109414757, "grad_norm": 8.924418974426674, "learning_rate": 9.214166845437136e-06, "loss": 0.3086, "step": 64550 }, { "epoch": 1.3141984732824428, "grad_norm": 2.249555636722092, "learning_rate": 9.213784393105732e-06, "loss": 0.1569, "step": 64560 }, { "epoch": 1.3144020356234096, "grad_norm": 9.722487691302442, "learning_rate": 9.213401855671526e-06, "loss": 0.2579, "step": 64570 }, { "epoch": 1.3146055979643765, "grad_norm": 9.870140067439978, "learning_rate": 9.213019233142244e-06, "loss": 0.2085, "step": 64580 }, { "epoch": 1.3148091603053436, "grad_norm": 17.70058862226816, "learning_rate": 9.212636525525611e-06, "loss": 0.1718, "step": 64590 }, { "epoch": 1.3150127226463104, "grad_norm": 10.711622143748096, "learning_rate": 9.21225373282936e-06, "loss": 0.3043, "step": 64600 }, { "epoch": 1.3152162849872773, "grad_norm": 8.645990407640335, "learning_rate": 9.211870855061216e-06, "loss": 0.2351, "step": 64610 }, { "epoch": 1.3154198473282444, "grad_norm": 6.864360398568302, "learning_rate": 9.211487892228917e-06, "loss": 0.2767, "step": 64620 }, { "epoch": 1.3156234096692112, "grad_norm": 20.37702826363474, "learning_rate": 9.211104844340196e-06, "loss": 0.2169, "step": 64630 }, { "epoch": 1.315826972010178, "grad_norm": 14.172943956500506, "learning_rate": 9.210721711402786e-06, "loss": 0.1967, "step": 64640 }, { "epoch": 1.3160305343511451, "grad_norm": 23.790557718567566, "learning_rate": 9.210338493424432e-06, "loss": 0.2461, "step": 64650 }, { "epoch": 1.316234096692112, "grad_norm": 10.633392419505697, "learning_rate": 9.209955190412866e-06, "loss": 0.3388, "step": 64660 }, { "epoch": 1.3164376590330789, "grad_norm": 39.48284948170846, "learning_rate": 9.209571802375833e-06, "loss": 0.2203, "step": 64670 }, { "epoch": 1.316641221374046, "grad_norm": 8.919421174873142, "learning_rate": 9.209188329321077e-06, "loss": 0.2735, "step": 64680 }, { "epoch": 1.3168447837150128, "grad_norm": 9.293025054541879, "learning_rate": 9.20880477125634e-06, "loss": 0.3108, "step": 64690 }, { "epoch": 1.3170483460559796, "grad_norm": 8.330897238727514, "learning_rate": 9.20842112818937e-06, "loss": 0.2337, "step": 64700 }, { "epoch": 1.3172519083969465, "grad_norm": 4.878533365385784, "learning_rate": 9.208037400127915e-06, "loss": 0.2117, "step": 64710 }, { "epoch": 1.3174554707379134, "grad_norm": 8.230112287847751, "learning_rate": 9.207653587079726e-06, "loss": 0.279, "step": 64720 }, { "epoch": 1.3176590330788804, "grad_norm": 4.07020889170279, "learning_rate": 9.207269689052552e-06, "loss": 0.2762, "step": 64730 }, { "epoch": 1.3178625954198473, "grad_norm": 22.797149657712932, "learning_rate": 9.206885706054148e-06, "loss": 0.2878, "step": 64740 }, { "epoch": 1.3180661577608141, "grad_norm": 4.0274685395999095, "learning_rate": 9.206501638092269e-06, "loss": 0.1718, "step": 64750 }, { "epoch": 1.3182697201017812, "grad_norm": 6.97305484818387, "learning_rate": 9.20611748517467e-06, "loss": 0.2096, "step": 64760 }, { "epoch": 1.318473282442748, "grad_norm": 14.61305942399997, "learning_rate": 9.205733247309113e-06, "loss": 0.1797, "step": 64770 }, { "epoch": 1.318676844783715, "grad_norm": 11.008350645386516, "learning_rate": 9.205348924503354e-06, "loss": 0.1477, "step": 64780 }, { "epoch": 1.318880407124682, "grad_norm": 9.25140888950453, "learning_rate": 9.204964516765161e-06, "loss": 0.2342, "step": 64790 }, { "epoch": 1.3190839694656489, "grad_norm": 7.278304025933735, "learning_rate": 9.20458002410229e-06, "loss": 0.1793, "step": 64800 }, { "epoch": 1.3192875318066157, "grad_norm": 22.847155261922886, "learning_rate": 9.20419544652251e-06, "loss": 0.247, "step": 64810 }, { "epoch": 1.3194910941475828, "grad_norm": 13.229098191183198, "learning_rate": 9.203810784033588e-06, "loss": 0.2387, "step": 64820 }, { "epoch": 1.3196946564885497, "grad_norm": 2.9428702786030905, "learning_rate": 9.203426036643294e-06, "loss": 0.2166, "step": 64830 }, { "epoch": 1.3198982188295165, "grad_norm": 14.96220265072622, "learning_rate": 9.203041204359398e-06, "loss": 0.1813, "step": 64840 }, { "epoch": 1.3201017811704834, "grad_norm": 6.836689239202026, "learning_rate": 9.20265628718967e-06, "loss": 0.261, "step": 64850 }, { "epoch": 1.3203053435114505, "grad_norm": 2.621510357225208, "learning_rate": 9.202271285141886e-06, "loss": 0.2096, "step": 64860 }, { "epoch": 1.3205089058524173, "grad_norm": 17.00026820833864, "learning_rate": 9.20188619822382e-06, "loss": 0.4551, "step": 64870 }, { "epoch": 1.3207124681933842, "grad_norm": 12.572812657364574, "learning_rate": 9.201501026443252e-06, "loss": 0.1687, "step": 64880 }, { "epoch": 1.320916030534351, "grad_norm": 21.053474090012394, "learning_rate": 9.201115769807957e-06, "loss": 0.1762, "step": 64890 }, { "epoch": 1.321119592875318, "grad_norm": 11.11237395765951, "learning_rate": 9.200730428325719e-06, "loss": 0.3481, "step": 64900 }, { "epoch": 1.321323155216285, "grad_norm": 6.810464185180538, "learning_rate": 9.200345002004318e-06, "loss": 0.2495, "step": 64910 }, { "epoch": 1.3215267175572518, "grad_norm": 13.767826678159286, "learning_rate": 9.199959490851542e-06, "loss": 0.2337, "step": 64920 }, { "epoch": 1.321730279898219, "grad_norm": 17.367808420544527, "learning_rate": 9.199573894875174e-06, "loss": 0.2385, "step": 64930 }, { "epoch": 1.3219338422391858, "grad_norm": 12.711801045995163, "learning_rate": 9.199188214083002e-06, "loss": 0.2338, "step": 64940 }, { "epoch": 1.3221374045801526, "grad_norm": 10.315302719260028, "learning_rate": 9.198802448482815e-06, "loss": 0.2581, "step": 64950 }, { "epoch": 1.3223409669211197, "grad_norm": 5.7909796536033635, "learning_rate": 9.198416598082405e-06, "loss": 0.2058, "step": 64960 }, { "epoch": 1.3225445292620865, "grad_norm": 4.220687476758586, "learning_rate": 9.198030662889566e-06, "loss": 0.2758, "step": 64970 }, { "epoch": 1.3227480916030534, "grad_norm": 2.0222028406656336, "learning_rate": 9.197644642912088e-06, "loss": 0.1484, "step": 64980 }, { "epoch": 1.3229516539440205, "grad_norm": 3.7614108904693873, "learning_rate": 9.197258538157771e-06, "loss": 0.1545, "step": 64990 }, { "epoch": 1.3231552162849873, "grad_norm": 6.338354625537587, "learning_rate": 9.19687234863441e-06, "loss": 0.2303, "step": 65000 }, { "epoch": 1.3233587786259542, "grad_norm": 4.800804396269162, "learning_rate": 9.19648607434981e-06, "loss": 0.2673, "step": 65010 }, { "epoch": 1.323562340966921, "grad_norm": 11.697142525416078, "learning_rate": 9.196099715311766e-06, "loss": 0.3254, "step": 65020 }, { "epoch": 1.323765903307888, "grad_norm": 8.174756500536033, "learning_rate": 9.195713271528085e-06, "loss": 0.1992, "step": 65030 }, { "epoch": 1.323969465648855, "grad_norm": 12.570343198472258, "learning_rate": 9.195326743006569e-06, "loss": 0.2363, "step": 65040 }, { "epoch": 1.3241730279898218, "grad_norm": 9.72241939202013, "learning_rate": 9.194940129755027e-06, "loss": 0.3269, "step": 65050 }, { "epoch": 1.3243765903307887, "grad_norm": 12.98880167043196, "learning_rate": 9.194553431781267e-06, "loss": 0.2518, "step": 65060 }, { "epoch": 1.3245801526717558, "grad_norm": 18.31807228071578, "learning_rate": 9.194166649093097e-06, "loss": 0.2138, "step": 65070 }, { "epoch": 1.3247837150127226, "grad_norm": 12.732803676330606, "learning_rate": 9.193779781698329e-06, "loss": 0.3536, "step": 65080 }, { "epoch": 1.3249872773536895, "grad_norm": 6.508178370315606, "learning_rate": 9.193392829604777e-06, "loss": 0.1833, "step": 65090 }, { "epoch": 1.3251908396946566, "grad_norm": 10.505168440670541, "learning_rate": 9.193005792820255e-06, "loss": 0.2111, "step": 65100 }, { "epoch": 1.3253944020356234, "grad_norm": 6.92543551732482, "learning_rate": 9.192618671352583e-06, "loss": 0.2911, "step": 65110 }, { "epoch": 1.3255979643765903, "grad_norm": 4.134284415686371, "learning_rate": 9.192231465209573e-06, "loss": 0.26, "step": 65120 }, { "epoch": 1.3258015267175574, "grad_norm": 8.708165580036308, "learning_rate": 9.191844174399052e-06, "loss": 0.2227, "step": 65130 }, { "epoch": 1.3260050890585242, "grad_norm": 8.764616216375753, "learning_rate": 9.191456798928838e-06, "loss": 0.2755, "step": 65140 }, { "epoch": 1.326208651399491, "grad_norm": 17.205077618968073, "learning_rate": 9.191069338806754e-06, "loss": 0.1395, "step": 65150 }, { "epoch": 1.3264122137404581, "grad_norm": 12.816892654666834, "learning_rate": 9.19068179404063e-06, "loss": 0.2001, "step": 65160 }, { "epoch": 1.326615776081425, "grad_norm": 14.080993782916652, "learning_rate": 9.190294164638286e-06, "loss": 0.2783, "step": 65170 }, { "epoch": 1.3268193384223919, "grad_norm": 8.770486705543139, "learning_rate": 9.189906450607558e-06, "loss": 0.2132, "step": 65180 }, { "epoch": 1.3270229007633587, "grad_norm": 20.957660771772613, "learning_rate": 9.18951865195627e-06, "loss": 0.242, "step": 65190 }, { "epoch": 1.3272264631043256, "grad_norm": 6.720109608324441, "learning_rate": 9.189130768692258e-06, "loss": 0.2006, "step": 65200 }, { "epoch": 1.3274300254452926, "grad_norm": 11.519572179608497, "learning_rate": 9.188742800823353e-06, "loss": 0.1543, "step": 65210 }, { "epoch": 1.3276335877862595, "grad_norm": 12.466111317587083, "learning_rate": 9.188354748357393e-06, "loss": 0.309, "step": 65220 }, { "epoch": 1.3278371501272264, "grad_norm": 3.029191264179181, "learning_rate": 9.187966611302215e-06, "loss": 0.2718, "step": 65230 }, { "epoch": 1.3280407124681934, "grad_norm": 15.356195611948081, "learning_rate": 9.187578389665655e-06, "loss": 0.2786, "step": 65240 }, { "epoch": 1.3282442748091603, "grad_norm": 25.431812516673595, "learning_rate": 9.187190083455558e-06, "loss": 0.2858, "step": 65250 }, { "epoch": 1.3284478371501272, "grad_norm": 15.910080874345665, "learning_rate": 9.186801692679764e-06, "loss": 0.1856, "step": 65260 }, { "epoch": 1.3286513994910942, "grad_norm": 9.633305517979117, "learning_rate": 9.186413217346118e-06, "loss": 0.2113, "step": 65270 }, { "epoch": 1.328854961832061, "grad_norm": 9.949396008164108, "learning_rate": 9.186024657462463e-06, "loss": 0.2536, "step": 65280 }, { "epoch": 1.329058524173028, "grad_norm": 4.9640988275417595, "learning_rate": 9.185636013036648e-06, "loss": 0.2457, "step": 65290 }, { "epoch": 1.329262086513995, "grad_norm": 14.082156637534217, "learning_rate": 9.185247284076526e-06, "loss": 0.2982, "step": 65300 }, { "epoch": 1.3294656488549619, "grad_norm": 7.09554377289445, "learning_rate": 9.184858470589941e-06, "loss": 0.2614, "step": 65310 }, { "epoch": 1.3296692111959287, "grad_norm": 3.9191860125713482, "learning_rate": 9.184469572584752e-06, "loss": 0.2389, "step": 65320 }, { "epoch": 1.3298727735368956, "grad_norm": 7.032386979813678, "learning_rate": 9.184080590068809e-06, "loss": 0.2327, "step": 65330 }, { "epoch": 1.3300763358778627, "grad_norm": 10.860334908167134, "learning_rate": 9.183691523049969e-06, "loss": 0.2812, "step": 65340 }, { "epoch": 1.3302798982188295, "grad_norm": 13.250124424922827, "learning_rate": 9.183302371536089e-06, "loss": 0.2639, "step": 65350 }, { "epoch": 1.3304834605597964, "grad_norm": 6.282132334142099, "learning_rate": 9.182913135535031e-06, "loss": 0.1793, "step": 65360 }, { "epoch": 1.3306870229007632, "grad_norm": 15.781272000443144, "learning_rate": 9.182523815054655e-06, "loss": 0.2417, "step": 65370 }, { "epoch": 1.3308905852417303, "grad_norm": 5.921836668099211, "learning_rate": 9.182134410102824e-06, "loss": 0.3337, "step": 65380 }, { "epoch": 1.3310941475826972, "grad_norm": 20.049026297150576, "learning_rate": 9.181744920687403e-06, "loss": 0.3269, "step": 65390 }, { "epoch": 1.331297709923664, "grad_norm": 26.062783527654325, "learning_rate": 9.181355346816255e-06, "loss": 0.2943, "step": 65400 }, { "epoch": 1.331501272264631, "grad_norm": 8.6147345175778, "learning_rate": 9.180965688497251e-06, "loss": 0.2403, "step": 65410 }, { "epoch": 1.331704834605598, "grad_norm": 10.320014383158808, "learning_rate": 9.180575945738259e-06, "loss": 0.4021, "step": 65420 }, { "epoch": 1.3319083969465648, "grad_norm": 4.8295398799440825, "learning_rate": 9.180186118547152e-06, "loss": 0.2201, "step": 65430 }, { "epoch": 1.332111959287532, "grad_norm": 3.7768145593537366, "learning_rate": 9.179796206931803e-06, "loss": 0.173, "step": 65440 }, { "epoch": 1.3323155216284988, "grad_norm": 11.34735574718887, "learning_rate": 9.179406210900086e-06, "loss": 0.1898, "step": 65450 }, { "epoch": 1.3325190839694656, "grad_norm": 15.144847435364793, "learning_rate": 9.179016130459877e-06, "loss": 0.1864, "step": 65460 }, { "epoch": 1.3327226463104327, "grad_norm": 1.5764272098019165, "learning_rate": 9.178625965619053e-06, "loss": 0.3009, "step": 65470 }, { "epoch": 1.3329262086513995, "grad_norm": 8.561220446533865, "learning_rate": 9.178235716385497e-06, "loss": 0.2145, "step": 65480 }, { "epoch": 1.3331297709923664, "grad_norm": 8.15987553327552, "learning_rate": 9.17784538276709e-06, "loss": 0.1908, "step": 65490 }, { "epoch": 1.3333333333333333, "grad_norm": 6.953701926522676, "learning_rate": 9.177454964771715e-06, "loss": 0.2339, "step": 65500 }, { "epoch": 1.3335368956743003, "grad_norm": 14.318956200688447, "learning_rate": 9.177064462407255e-06, "loss": 0.2149, "step": 65510 }, { "epoch": 1.3337404580152672, "grad_norm": 12.095983333816218, "learning_rate": 9.176673875681598e-06, "loss": 0.2169, "step": 65520 }, { "epoch": 1.333944020356234, "grad_norm": 10.701469750805495, "learning_rate": 9.176283204602634e-06, "loss": 0.2148, "step": 65530 }, { "epoch": 1.334147582697201, "grad_norm": 2.2448447334401616, "learning_rate": 9.17589244917825e-06, "loss": 0.2236, "step": 65540 }, { "epoch": 1.334351145038168, "grad_norm": 12.491305445467878, "learning_rate": 9.17550160941634e-06, "loss": 0.1968, "step": 65550 }, { "epoch": 1.3345547073791348, "grad_norm": 17.33670710445588, "learning_rate": 9.175110685324797e-06, "loss": 0.2192, "step": 65560 }, { "epoch": 1.3347582697201017, "grad_norm": 5.423868092627224, "learning_rate": 9.174719676911517e-06, "loss": 0.2087, "step": 65570 }, { "epoch": 1.3349618320610688, "grad_norm": 5.681044882537976, "learning_rate": 9.174328584184395e-06, "loss": 0.2457, "step": 65580 }, { "epoch": 1.3351653944020356, "grad_norm": 9.692856385485307, "learning_rate": 9.173937407151333e-06, "loss": 0.2462, "step": 65590 }, { "epoch": 1.3353689567430025, "grad_norm": 4.358879178000305, "learning_rate": 9.173546145820226e-06, "loss": 0.2315, "step": 65600 }, { "epoch": 1.3355725190839696, "grad_norm": 2.4353731044749303, "learning_rate": 9.17315480019898e-06, "loss": 0.1946, "step": 65610 }, { "epoch": 1.3357760814249364, "grad_norm": 12.625309762585204, "learning_rate": 9.172763370295498e-06, "loss": 0.2122, "step": 65620 }, { "epoch": 1.3359796437659033, "grad_norm": 5.566436905368442, "learning_rate": 9.172371856117685e-06, "loss": 0.183, "step": 65630 }, { "epoch": 1.3361832061068704, "grad_norm": 7.647258640936363, "learning_rate": 9.17198025767345e-06, "loss": 0.2034, "step": 65640 }, { "epoch": 1.3363867684478372, "grad_norm": 8.591925385255712, "learning_rate": 9.1715885749707e-06, "loss": 0.2241, "step": 65650 }, { "epoch": 1.336590330788804, "grad_norm": 3.492497260362895, "learning_rate": 9.171196808017345e-06, "loss": 0.2308, "step": 65660 }, { "epoch": 1.336793893129771, "grad_norm": 5.375219113531847, "learning_rate": 9.170804956821297e-06, "loss": 0.224, "step": 65670 }, { "epoch": 1.3369974554707378, "grad_norm": 13.498593151364954, "learning_rate": 9.170413021390472e-06, "loss": 0.2839, "step": 65680 }, { "epoch": 1.3372010178117049, "grad_norm": 12.1819159991176, "learning_rate": 9.170021001732785e-06, "loss": 0.2175, "step": 65690 }, { "epoch": 1.3374045801526717, "grad_norm": 0.9740286291028815, "learning_rate": 9.169628897856152e-06, "loss": 0.2634, "step": 65700 }, { "epoch": 1.3376081424936386, "grad_norm": 6.277836313304754, "learning_rate": 9.169236709768494e-06, "loss": 0.3207, "step": 65710 }, { "epoch": 1.3378117048346057, "grad_norm": 18.27859681106508, "learning_rate": 9.16884443747773e-06, "loss": 0.3062, "step": 65720 }, { "epoch": 1.3380152671755725, "grad_norm": 10.897824750934547, "learning_rate": 9.168452080991783e-06, "loss": 0.2269, "step": 65730 }, { "epoch": 1.3382188295165394, "grad_norm": 11.128289235924612, "learning_rate": 9.168059640318577e-06, "loss": 0.204, "step": 65740 }, { "epoch": 1.3384223918575064, "grad_norm": 9.196698737719492, "learning_rate": 9.167667115466039e-06, "loss": 0.2633, "step": 65750 }, { "epoch": 1.3386259541984733, "grad_norm": 6.489448673079278, "learning_rate": 9.167274506442095e-06, "loss": 0.3207, "step": 65760 }, { "epoch": 1.3388295165394402, "grad_norm": 6.810390989762832, "learning_rate": 9.166881813254676e-06, "loss": 0.2693, "step": 65770 }, { "epoch": 1.3390330788804072, "grad_norm": 11.129166718792188, "learning_rate": 9.166489035911711e-06, "loss": 0.3426, "step": 65780 }, { "epoch": 1.339236641221374, "grad_norm": 2.0228552633572177, "learning_rate": 9.166096174421132e-06, "loss": 0.2183, "step": 65790 }, { "epoch": 1.339440203562341, "grad_norm": 8.54059856131471, "learning_rate": 9.16570322879088e-06, "loss": 0.2355, "step": 65800 }, { "epoch": 1.339643765903308, "grad_norm": 4.826935117254774, "learning_rate": 9.16531019902888e-06, "loss": 0.1298, "step": 65810 }, { "epoch": 1.3398473282442749, "grad_norm": 5.484248255057336, "learning_rate": 9.164917085143078e-06, "loss": 0.1902, "step": 65820 }, { "epoch": 1.3400508905852417, "grad_norm": 9.422557663622703, "learning_rate": 9.164523887141412e-06, "loss": 0.2081, "step": 65830 }, { "epoch": 1.3402544529262086, "grad_norm": 12.522244991258468, "learning_rate": 9.16413060503182e-06, "loss": 0.2666, "step": 65840 }, { "epoch": 1.3404580152671755, "grad_norm": 8.193570377593934, "learning_rate": 9.16373723882225e-06, "loss": 0.2543, "step": 65850 }, { "epoch": 1.3406615776081425, "grad_norm": 11.87576538106231, "learning_rate": 9.163343788520642e-06, "loss": 0.2321, "step": 65860 }, { "epoch": 1.3408651399491094, "grad_norm": 3.2414989495902127, "learning_rate": 9.162950254134943e-06, "loss": 0.1871, "step": 65870 }, { "epoch": 1.3410687022900762, "grad_norm": 1.6842784580179597, "learning_rate": 9.162556635673103e-06, "loss": 0.2311, "step": 65880 }, { "epoch": 1.3412722646310433, "grad_norm": 12.9790992031712, "learning_rate": 9.16216293314307e-06, "loss": 0.1684, "step": 65890 }, { "epoch": 1.3414758269720102, "grad_norm": 4.009644557460139, "learning_rate": 9.161769146552796e-06, "loss": 0.1639, "step": 65900 }, { "epoch": 1.341679389312977, "grad_norm": 5.706913413307932, "learning_rate": 9.161375275910234e-06, "loss": 0.161, "step": 65910 }, { "epoch": 1.3418829516539441, "grad_norm": 22.12819067931537, "learning_rate": 9.160981321223338e-06, "loss": 0.245, "step": 65920 }, { "epoch": 1.342086513994911, "grad_norm": 23.528957695007097, "learning_rate": 9.160587282500065e-06, "loss": 0.3522, "step": 65930 }, { "epoch": 1.3422900763358778, "grad_norm": 5.341619920216853, "learning_rate": 9.160193159748374e-06, "loss": 0.2454, "step": 65940 }, { "epoch": 1.342493638676845, "grad_norm": 0.9071615805001962, "learning_rate": 9.159798952976223e-06, "loss": 0.1775, "step": 65950 }, { "epoch": 1.3426972010178118, "grad_norm": 22.85751920299393, "learning_rate": 9.159404662191573e-06, "loss": 0.257, "step": 65960 }, { "epoch": 1.3429007633587786, "grad_norm": 10.003422254983864, "learning_rate": 9.15901028740239e-06, "loss": 0.2655, "step": 65970 }, { "epoch": 1.3431043256997455, "grad_norm": 16.750669448644665, "learning_rate": 9.158615828616637e-06, "loss": 0.2583, "step": 65980 }, { "epoch": 1.3433078880407125, "grad_norm": 11.061293983558084, "learning_rate": 9.158221285842282e-06, "loss": 0.2972, "step": 65990 }, { "epoch": 1.3435114503816794, "grad_norm": 3.6409054236609206, "learning_rate": 9.157826659087292e-06, "loss": 0.2285, "step": 66000 }, { "epoch": 1.3437150127226463, "grad_norm": 5.697438954176085, "learning_rate": 9.157431948359636e-06, "loss": 0.1961, "step": 66010 }, { "epoch": 1.3439185750636131, "grad_norm": 11.283995464146647, "learning_rate": 9.15703715366729e-06, "loss": 0.2501, "step": 66020 }, { "epoch": 1.3441221374045802, "grad_norm": 5.509865501864426, "learning_rate": 9.156642275018221e-06, "loss": 0.3271, "step": 66030 }, { "epoch": 1.344325699745547, "grad_norm": 13.80946051843758, "learning_rate": 9.15624731242041e-06, "loss": 0.3466, "step": 66040 }, { "epoch": 1.344529262086514, "grad_norm": 3.7914503352181925, "learning_rate": 9.15585226588183e-06, "loss": 0.1707, "step": 66050 }, { "epoch": 1.344732824427481, "grad_norm": 1.1375747426744183, "learning_rate": 9.155457135410463e-06, "loss": 0.193, "step": 66060 }, { "epoch": 1.3449363867684478, "grad_norm": 7.421822259936566, "learning_rate": 9.155061921014285e-06, "loss": 0.1376, "step": 66070 }, { "epoch": 1.3451399491094147, "grad_norm": 8.589269973193373, "learning_rate": 9.15466662270128e-06, "loss": 0.2251, "step": 66080 }, { "epoch": 1.3453435114503818, "grad_norm": 0.9417254698360017, "learning_rate": 9.15427124047943e-06, "loss": 0.2855, "step": 66090 }, { "epoch": 1.3455470737913486, "grad_norm": 10.941033402549284, "learning_rate": 9.153875774356725e-06, "loss": 0.2088, "step": 66100 }, { "epoch": 1.3457506361323155, "grad_norm": 12.064017962991715, "learning_rate": 9.153480224341147e-06, "loss": 0.2993, "step": 66110 }, { "epoch": 1.3459541984732826, "grad_norm": 8.23478384918357, "learning_rate": 9.153084590440686e-06, "loss": 0.1904, "step": 66120 }, { "epoch": 1.3461577608142494, "grad_norm": 14.046387982251977, "learning_rate": 9.152688872663333e-06, "loss": 0.1529, "step": 66130 }, { "epoch": 1.3463613231552163, "grad_norm": 5.034539153270161, "learning_rate": 9.152293071017078e-06, "loss": 0.2954, "step": 66140 }, { "epoch": 1.3465648854961831, "grad_norm": 5.277950330776636, "learning_rate": 9.151897185509917e-06, "loss": 0.3552, "step": 66150 }, { "epoch": 1.34676844783715, "grad_norm": 15.210857742965615, "learning_rate": 9.151501216149845e-06, "loss": 0.1938, "step": 66160 }, { "epoch": 1.346972010178117, "grad_norm": 15.129239316433221, "learning_rate": 9.15110516294486e-06, "loss": 0.3084, "step": 66170 }, { "epoch": 1.347175572519084, "grad_norm": 5.613665195566633, "learning_rate": 9.150709025902959e-06, "loss": 0.2205, "step": 66180 }, { "epoch": 1.3473791348600508, "grad_norm": 8.070561399548811, "learning_rate": 9.150312805032142e-06, "loss": 0.2723, "step": 66190 }, { "epoch": 1.3475826972010179, "grad_norm": 1.4822248138773009, "learning_rate": 9.149916500340414e-06, "loss": 0.2012, "step": 66200 }, { "epoch": 1.3477862595419847, "grad_norm": 1.2999735988949603, "learning_rate": 9.149520111835777e-06, "loss": 0.2502, "step": 66210 }, { "epoch": 1.3479898218829516, "grad_norm": 0.8032806407612286, "learning_rate": 9.149123639526235e-06, "loss": 0.1472, "step": 66220 }, { "epoch": 1.3481933842239187, "grad_norm": 12.585534361085358, "learning_rate": 9.148727083419799e-06, "loss": 0.2498, "step": 66230 }, { "epoch": 1.3483969465648855, "grad_norm": 4.736690016811126, "learning_rate": 9.148330443524475e-06, "loss": 0.282, "step": 66240 }, { "epoch": 1.3486005089058524, "grad_norm": 11.54154727727448, "learning_rate": 9.147933719848275e-06, "loss": 0.316, "step": 66250 }, { "epoch": 1.3488040712468194, "grad_norm": 0.4945697746656254, "learning_rate": 9.14753691239921e-06, "loss": 0.1863, "step": 66260 }, { "epoch": 1.3490076335877863, "grad_norm": 9.612295554907075, "learning_rate": 9.147140021185297e-06, "loss": 0.2004, "step": 66270 }, { "epoch": 1.3492111959287532, "grad_norm": 7.499263995966094, "learning_rate": 9.146743046214549e-06, "loss": 0.2697, "step": 66280 }, { "epoch": 1.3494147582697202, "grad_norm": 4.6114351265101154, "learning_rate": 9.146345987494985e-06, "loss": 0.2476, "step": 66290 }, { "epoch": 1.349618320610687, "grad_norm": 4.632539846272268, "learning_rate": 9.145948845034622e-06, "loss": 0.2117, "step": 66300 }, { "epoch": 1.349821882951654, "grad_norm": 12.011385123950303, "learning_rate": 9.145551618841482e-06, "loss": 0.1958, "step": 66310 }, { "epoch": 1.3500254452926208, "grad_norm": 13.873273099446202, "learning_rate": 9.145154308923588e-06, "loss": 0.276, "step": 66320 }, { "epoch": 1.3502290076335877, "grad_norm": 5.6938423455047875, "learning_rate": 9.144756915288963e-06, "loss": 0.2567, "step": 66330 }, { "epoch": 1.3504325699745547, "grad_norm": 4.891927048751542, "learning_rate": 9.144359437945635e-06, "loss": 0.1862, "step": 66340 }, { "epoch": 1.3506361323155216, "grad_norm": 6.387819476456787, "learning_rate": 9.14396187690163e-06, "loss": 0.154, "step": 66350 }, { "epoch": 1.3508396946564885, "grad_norm": 4.843300067024291, "learning_rate": 9.143564232164978e-06, "loss": 0.2106, "step": 66360 }, { "epoch": 1.3510432569974555, "grad_norm": 10.664800167215489, "learning_rate": 9.14316650374371e-06, "loss": 0.3256, "step": 66370 }, { "epoch": 1.3512468193384224, "grad_norm": 7.962064833888198, "learning_rate": 9.142768691645855e-06, "loss": 0.2144, "step": 66380 }, { "epoch": 1.3514503816793892, "grad_norm": 7.96805852530379, "learning_rate": 9.142370795879454e-06, "loss": 0.1476, "step": 66390 }, { "epoch": 1.3516539440203563, "grad_norm": 22.04652064688918, "learning_rate": 9.141972816452537e-06, "loss": 0.278, "step": 66400 }, { "epoch": 1.3518575063613232, "grad_norm": 9.955477914247616, "learning_rate": 9.141574753373146e-06, "loss": 0.2036, "step": 66410 }, { "epoch": 1.35206106870229, "grad_norm": 2.28760306189043, "learning_rate": 9.141176606649318e-06, "loss": 0.1084, "step": 66420 }, { "epoch": 1.3522646310432571, "grad_norm": 0.4235481497336454, "learning_rate": 9.140778376289093e-06, "loss": 0.1777, "step": 66430 }, { "epoch": 1.352468193384224, "grad_norm": 22.00719923093673, "learning_rate": 9.140380062300518e-06, "loss": 0.2142, "step": 66440 }, { "epoch": 1.3526717557251908, "grad_norm": 17.053889871043307, "learning_rate": 9.139981664691633e-06, "loss": 0.2694, "step": 66450 }, { "epoch": 1.3528753180661577, "grad_norm": 6.968209434248124, "learning_rate": 9.139583183470486e-06, "loss": 0.1619, "step": 66460 }, { "epoch": 1.3530788804071248, "grad_norm": 6.377600672651844, "learning_rate": 9.139184618645124e-06, "loss": 0.2351, "step": 66470 }, { "epoch": 1.3532824427480916, "grad_norm": 24.378123611028908, "learning_rate": 9.1387859702236e-06, "loss": 0.2443, "step": 66480 }, { "epoch": 1.3534860050890585, "grad_norm": 1.600571502944966, "learning_rate": 9.138387238213962e-06, "loss": 0.2374, "step": 66490 }, { "epoch": 1.3536895674300253, "grad_norm": 4.261515280495484, "learning_rate": 9.137988422624263e-06, "loss": 0.2335, "step": 66500 }, { "epoch": 1.3538931297709924, "grad_norm": 12.79905228015491, "learning_rate": 9.137589523462559e-06, "loss": 0.2894, "step": 66510 }, { "epoch": 1.3540966921119593, "grad_norm": 9.498742172725086, "learning_rate": 9.137190540736903e-06, "loss": 0.329, "step": 66520 }, { "epoch": 1.3543002544529261, "grad_norm": 8.435635316938539, "learning_rate": 9.136791474455357e-06, "loss": 0.1513, "step": 66530 }, { "epoch": 1.3545038167938932, "grad_norm": 6.990747091877037, "learning_rate": 9.13639232462598e-06, "loss": 0.2733, "step": 66540 }, { "epoch": 1.35470737913486, "grad_norm": 10.008861475350827, "learning_rate": 9.135993091256829e-06, "loss": 0.2594, "step": 66550 }, { "epoch": 1.354910941475827, "grad_norm": 6.619545740866206, "learning_rate": 9.135593774355972e-06, "loss": 0.1828, "step": 66560 }, { "epoch": 1.355114503816794, "grad_norm": 5.830666294425751, "learning_rate": 9.135194373931475e-06, "loss": 0.1872, "step": 66570 }, { "epoch": 1.3553180661577608, "grad_norm": 14.395518954986427, "learning_rate": 9.134794889991398e-06, "loss": 0.3102, "step": 66580 }, { "epoch": 1.3555216284987277, "grad_norm": 13.615838290638381, "learning_rate": 9.134395322543814e-06, "loss": 0.2961, "step": 66590 }, { "epoch": 1.3557251908396948, "grad_norm": 4.662532704776769, "learning_rate": 9.133995671596792e-06, "loss": 0.2027, "step": 66600 }, { "epoch": 1.3559287531806616, "grad_norm": 7.122945671159371, "learning_rate": 9.133595937158401e-06, "loss": 0.1747, "step": 66610 }, { "epoch": 1.3561323155216285, "grad_norm": 12.054909594438536, "learning_rate": 9.133196119236718e-06, "loss": 0.2698, "step": 66620 }, { "epoch": 1.3563358778625954, "grad_norm": 9.045617395677308, "learning_rate": 9.132796217839814e-06, "loss": 0.2688, "step": 66630 }, { "epoch": 1.3565394402035622, "grad_norm": 15.655868735817982, "learning_rate": 9.132396232975768e-06, "loss": 0.259, "step": 66640 }, { "epoch": 1.3567430025445293, "grad_norm": 16.029390571119187, "learning_rate": 9.131996164652658e-06, "loss": 0.1613, "step": 66650 }, { "epoch": 1.3569465648854961, "grad_norm": 5.722341326855232, "learning_rate": 9.131596012878565e-06, "loss": 0.2921, "step": 66660 }, { "epoch": 1.357150127226463, "grad_norm": 4.3838701074959365, "learning_rate": 9.131195777661568e-06, "loss": 0.2333, "step": 66670 }, { "epoch": 1.35735368956743, "grad_norm": 7.3443502426553895, "learning_rate": 9.13079545900975e-06, "loss": 0.2566, "step": 66680 }, { "epoch": 1.357557251908397, "grad_norm": 5.2814012598261275, "learning_rate": 9.130395056931198e-06, "loss": 0.2377, "step": 66690 }, { "epoch": 1.3577608142493638, "grad_norm": 4.891277453039411, "learning_rate": 9.129994571433999e-06, "loss": 0.2305, "step": 66700 }, { "epoch": 1.3579643765903309, "grad_norm": 2.9849830998872253, "learning_rate": 9.129594002526239e-06, "loss": 0.1527, "step": 66710 }, { "epoch": 1.3581679389312977, "grad_norm": 13.137043220490224, "learning_rate": 9.12919335021601e-06, "loss": 0.2883, "step": 66720 }, { "epoch": 1.3583715012722646, "grad_norm": 10.206659795608985, "learning_rate": 9.128792614511401e-06, "loss": 0.1356, "step": 66730 }, { "epoch": 1.3585750636132317, "grad_norm": 2.3566048271099973, "learning_rate": 9.12839179542051e-06, "loss": 0.1951, "step": 66740 }, { "epoch": 1.3587786259541985, "grad_norm": 14.07927121263859, "learning_rate": 9.127990892951429e-06, "loss": 0.1983, "step": 66750 }, { "epoch": 1.3589821882951654, "grad_norm": 10.63468505809267, "learning_rate": 9.127589907112253e-06, "loss": 0.2772, "step": 66760 }, { "epoch": 1.3591857506361325, "grad_norm": 10.894942565377638, "learning_rate": 9.127188837911084e-06, "loss": 0.1702, "step": 66770 }, { "epoch": 1.3593893129770993, "grad_norm": 11.048734116362658, "learning_rate": 9.12678768535602e-06, "loss": 0.2105, "step": 66780 }, { "epoch": 1.3595928753180662, "grad_norm": 4.309601999957437, "learning_rate": 9.126386449455165e-06, "loss": 0.1838, "step": 66790 }, { "epoch": 1.359796437659033, "grad_norm": 17.203262996863216, "learning_rate": 9.125985130216619e-06, "loss": 0.2607, "step": 66800 }, { "epoch": 1.3599999999999999, "grad_norm": 8.731224853747914, "learning_rate": 9.125583727648491e-06, "loss": 0.3323, "step": 66810 }, { "epoch": 1.360203562340967, "grad_norm": 6.606043965792874, "learning_rate": 9.125182241758883e-06, "loss": 0.2799, "step": 66820 }, { "epoch": 1.3604071246819338, "grad_norm": 8.813956131436667, "learning_rate": 9.124780672555908e-06, "loss": 0.2397, "step": 66830 }, { "epoch": 1.3606106870229007, "grad_norm": 6.86157566336264, "learning_rate": 9.124379020047675e-06, "loss": 0.2628, "step": 66840 }, { "epoch": 1.3608142493638677, "grad_norm": 13.704578049787333, "learning_rate": 9.123977284242295e-06, "loss": 0.2074, "step": 66850 }, { "epoch": 1.3610178117048346, "grad_norm": 3.4362129088533595, "learning_rate": 9.123575465147882e-06, "loss": 0.167, "step": 66860 }, { "epoch": 1.3612213740458015, "grad_norm": 9.878369350794175, "learning_rate": 9.123173562772553e-06, "loss": 0.2681, "step": 66870 }, { "epoch": 1.3614249363867685, "grad_norm": 14.295830751576274, "learning_rate": 9.122771577124422e-06, "loss": 0.1695, "step": 66880 }, { "epoch": 1.3616284987277354, "grad_norm": 6.519651284504179, "learning_rate": 9.12236950821161e-06, "loss": 0.2324, "step": 66890 }, { "epoch": 1.3618320610687022, "grad_norm": 11.181040202740599, "learning_rate": 9.121967356042234e-06, "loss": 0.1336, "step": 66900 }, { "epoch": 1.3620356234096693, "grad_norm": 17.764332375324905, "learning_rate": 9.12156512062442e-06, "loss": 0.2396, "step": 66910 }, { "epoch": 1.3622391857506362, "grad_norm": 8.972700531704929, "learning_rate": 9.12116280196629e-06, "loss": 0.3023, "step": 66920 }, { "epoch": 1.362442748091603, "grad_norm": 13.231232753475814, "learning_rate": 9.120760400075968e-06, "loss": 0.2224, "step": 66930 }, { "epoch": 1.36264631043257, "grad_norm": 4.632267587068589, "learning_rate": 9.120357914961582e-06, "loss": 0.1533, "step": 66940 }, { "epoch": 1.362849872773537, "grad_norm": 15.756678375518678, "learning_rate": 9.119955346631262e-06, "loss": 0.272, "step": 66950 }, { "epoch": 1.3630534351145038, "grad_norm": 15.06808260937224, "learning_rate": 9.119552695093136e-06, "loss": 0.1689, "step": 66960 }, { "epoch": 1.3632569974554707, "grad_norm": 9.776423018231004, "learning_rate": 9.119149960355338e-06, "loss": 0.1728, "step": 66970 }, { "epoch": 1.3634605597964375, "grad_norm": 15.841466842461733, "learning_rate": 9.118747142426001e-06, "loss": 0.2957, "step": 66980 }, { "epoch": 1.3636641221374046, "grad_norm": 9.665512455672836, "learning_rate": 9.118344241313262e-06, "loss": 0.303, "step": 66990 }, { "epoch": 1.3638676844783715, "grad_norm": 9.484291039177078, "learning_rate": 9.117941257025256e-06, "loss": 0.1768, "step": 67000 }, { "epoch": 1.3640712468193383, "grad_norm": 3.7907115942153524, "learning_rate": 9.117538189570122e-06, "loss": 0.2775, "step": 67010 }, { "epoch": 1.3642748091603054, "grad_norm": 11.228922197515933, "learning_rate": 9.117135038956001e-06, "loss": 0.25, "step": 67020 }, { "epoch": 1.3644783715012723, "grad_norm": 11.374355930860379, "learning_rate": 9.116731805191034e-06, "loss": 0.2293, "step": 67030 }, { "epoch": 1.3646819338422391, "grad_norm": 10.156438040605014, "learning_rate": 9.116328488283368e-06, "loss": 0.2148, "step": 67040 }, { "epoch": 1.3648854961832062, "grad_norm": 7.931531102081468, "learning_rate": 9.115925088241145e-06, "loss": 0.3333, "step": 67050 }, { "epoch": 1.365089058524173, "grad_norm": 4.412900053036475, "learning_rate": 9.115521605072514e-06, "loss": 0.2224, "step": 67060 }, { "epoch": 1.36529262086514, "grad_norm": 8.389831463869339, "learning_rate": 9.115118038785623e-06, "loss": 0.2221, "step": 67070 }, { "epoch": 1.365496183206107, "grad_norm": 18.477521813170735, "learning_rate": 9.114714389388624e-06, "loss": 0.2236, "step": 67080 }, { "epoch": 1.3656997455470739, "grad_norm": 0.686455200499253, "learning_rate": 9.114310656889667e-06, "loss": 0.2673, "step": 67090 }, { "epoch": 1.3659033078880407, "grad_norm": 9.357630377269667, "learning_rate": 9.113906841296907e-06, "loss": 0.1373, "step": 67100 }, { "epoch": 1.3661068702290076, "grad_norm": 9.832246339620898, "learning_rate": 9.1135029426185e-06, "loss": 0.2109, "step": 67110 }, { "epoch": 1.3663104325699744, "grad_norm": 5.856766051216551, "learning_rate": 9.113098960862604e-06, "loss": 0.3129, "step": 67120 }, { "epoch": 1.3665139949109415, "grad_norm": 14.233220180934929, "learning_rate": 9.112694896037375e-06, "loss": 0.3497, "step": 67130 }, { "epoch": 1.3667175572519084, "grad_norm": 11.111860062387422, "learning_rate": 9.112290748150977e-06, "loss": 0.2842, "step": 67140 }, { "epoch": 1.3669211195928752, "grad_norm": 2.8239915466783483, "learning_rate": 9.11188651721157e-06, "loss": 0.2368, "step": 67150 }, { "epoch": 1.3671246819338423, "grad_norm": 10.361301664516771, "learning_rate": 9.11148220322732e-06, "loss": 0.2328, "step": 67160 }, { "epoch": 1.3673282442748091, "grad_norm": 16.763785054994482, "learning_rate": 9.11107780620639e-06, "loss": 0.2979, "step": 67170 }, { "epoch": 1.367531806615776, "grad_norm": 4.143322502779735, "learning_rate": 9.110673326156948e-06, "loss": 0.1658, "step": 67180 }, { "epoch": 1.367735368956743, "grad_norm": 21.990903108838456, "learning_rate": 9.110268763087165e-06, "loss": 0.2934, "step": 67190 }, { "epoch": 1.36793893129771, "grad_norm": 10.793115745889827, "learning_rate": 9.10986411700521e-06, "loss": 0.2608, "step": 67200 }, { "epoch": 1.3681424936386768, "grad_norm": 7.283441453541769, "learning_rate": 9.109459387919256e-06, "loss": 0.1979, "step": 67210 }, { "epoch": 1.3683460559796439, "grad_norm": 1.5788125429947921, "learning_rate": 9.109054575837477e-06, "loss": 0.2001, "step": 67220 }, { "epoch": 1.3685496183206107, "grad_norm": 8.746572066257437, "learning_rate": 9.108649680768047e-06, "loss": 0.2114, "step": 67230 }, { "epoch": 1.3687531806615776, "grad_norm": 13.2822082528819, "learning_rate": 9.108244702719145e-06, "loss": 0.3181, "step": 67240 }, { "epoch": 1.3689567430025447, "grad_norm": 5.446552691297838, "learning_rate": 9.107839641698952e-06, "loss": 0.3199, "step": 67250 }, { "epoch": 1.3691603053435115, "grad_norm": 14.074390831127564, "learning_rate": 9.107434497715643e-06, "loss": 0.279, "step": 67260 }, { "epoch": 1.3693638676844784, "grad_norm": 8.877120269477576, "learning_rate": 9.107029270777408e-06, "loss": 0.2483, "step": 67270 }, { "epoch": 1.3695674300254452, "grad_norm": 6.268408979640504, "learning_rate": 9.106623960892425e-06, "loss": 0.1259, "step": 67280 }, { "epoch": 1.369770992366412, "grad_norm": 14.013179604188384, "learning_rate": 9.10621856806888e-06, "loss": 0.2737, "step": 67290 }, { "epoch": 1.3699745547073792, "grad_norm": 36.66600523466054, "learning_rate": 9.105813092314966e-06, "loss": 0.2874, "step": 67300 }, { "epoch": 1.370178117048346, "grad_norm": 29.87627165183626, "learning_rate": 9.105407533638866e-06, "loss": 0.1698, "step": 67310 }, { "epoch": 1.3703816793893129, "grad_norm": 0.4644167668281592, "learning_rate": 9.105001892048774e-06, "loss": 0.1634, "step": 67320 }, { "epoch": 1.37058524173028, "grad_norm": 11.261077932975688, "learning_rate": 9.10459616755288e-06, "loss": 0.3576, "step": 67330 }, { "epoch": 1.3707888040712468, "grad_norm": 5.347931535567278, "learning_rate": 9.104190360159383e-06, "loss": 0.1779, "step": 67340 }, { "epoch": 1.3709923664122137, "grad_norm": 16.99504839018168, "learning_rate": 9.103784469876473e-06, "loss": 0.1767, "step": 67350 }, { "epoch": 1.3711959287531807, "grad_norm": 2.059805831228463, "learning_rate": 9.10337849671235e-06, "loss": 0.2116, "step": 67360 }, { "epoch": 1.3713994910941476, "grad_norm": 9.565217721171841, "learning_rate": 9.102972440675212e-06, "loss": 0.3058, "step": 67370 }, { "epoch": 1.3716030534351145, "grad_norm": 13.389622362371922, "learning_rate": 9.102566301773264e-06, "loss": 0.2822, "step": 67380 }, { "epoch": 1.3718066157760815, "grad_norm": 15.690526742901758, "learning_rate": 9.102160080014704e-06, "loss": 0.2742, "step": 67390 }, { "epoch": 1.3720101781170484, "grad_norm": 16.41720369935429, "learning_rate": 9.101753775407737e-06, "loss": 0.2716, "step": 67400 }, { "epoch": 1.3722137404580153, "grad_norm": 8.801045729503642, "learning_rate": 9.10134738796057e-06, "loss": 0.2402, "step": 67410 }, { "epoch": 1.372417302798982, "grad_norm": 11.502246010027608, "learning_rate": 9.100940917681411e-06, "loss": 0.1915, "step": 67420 }, { "epoch": 1.3726208651399492, "grad_norm": 17.340999651563266, "learning_rate": 9.100534364578466e-06, "loss": 0.2384, "step": 67430 }, { "epoch": 1.372824427480916, "grad_norm": 3.95501726725054, "learning_rate": 9.10012772865995e-06, "loss": 0.187, "step": 67440 }, { "epoch": 1.373027989821883, "grad_norm": 11.544322003385505, "learning_rate": 9.099721009934073e-06, "loss": 0.2975, "step": 67450 }, { "epoch": 1.3732315521628498, "grad_norm": 8.948960659239374, "learning_rate": 9.099314208409051e-06, "loss": 0.2442, "step": 67460 }, { "epoch": 1.3734351145038168, "grad_norm": 10.748706280565504, "learning_rate": 9.098907324093096e-06, "loss": 0.2976, "step": 67470 }, { "epoch": 1.3736386768447837, "grad_norm": 8.233570666896963, "learning_rate": 9.09850035699443e-06, "loss": 0.2839, "step": 67480 }, { "epoch": 1.3738422391857505, "grad_norm": 9.859909406288798, "learning_rate": 9.09809330712127e-06, "loss": 0.2723, "step": 67490 }, { "epoch": 1.3740458015267176, "grad_norm": 7.021192656356324, "learning_rate": 9.097686174481839e-06, "loss": 0.2194, "step": 67500 }, { "epoch": 1.3742493638676845, "grad_norm": 11.099187837696551, "learning_rate": 9.097278959084356e-06, "loss": 0.2073, "step": 67510 }, { "epoch": 1.3744529262086513, "grad_norm": 11.233437869150187, "learning_rate": 9.096871660937047e-06, "loss": 0.2155, "step": 67520 }, { "epoch": 1.3746564885496184, "grad_norm": 10.312817455961683, "learning_rate": 9.096464280048139e-06, "loss": 0.267, "step": 67530 }, { "epoch": 1.3748600508905853, "grad_norm": 9.511977133199434, "learning_rate": 9.096056816425858e-06, "loss": 0.238, "step": 67540 }, { "epoch": 1.3750636132315521, "grad_norm": 12.732707947643922, "learning_rate": 9.095649270078434e-06, "loss": 0.3142, "step": 67550 }, { "epoch": 1.3752671755725192, "grad_norm": 4.476768881129382, "learning_rate": 9.095241641014098e-06, "loss": 0.228, "step": 67560 }, { "epoch": 1.375470737913486, "grad_norm": 18.668568494936284, "learning_rate": 9.094833929241083e-06, "loss": 0.2351, "step": 67570 }, { "epoch": 1.375674300254453, "grad_norm": 5.683271586531376, "learning_rate": 9.094426134767623e-06, "loss": 0.1922, "step": 67580 }, { "epoch": 1.3758778625954198, "grad_norm": 6.302925232662444, "learning_rate": 9.094018257601953e-06, "loss": 0.2688, "step": 67590 }, { "epoch": 1.3760814249363866, "grad_norm": 12.377512119395888, "learning_rate": 9.093610297752311e-06, "loss": 0.3572, "step": 67600 }, { "epoch": 1.3762849872773537, "grad_norm": 9.741758788236947, "learning_rate": 9.093202255226936e-06, "loss": 0.1711, "step": 67610 }, { "epoch": 1.3764885496183206, "grad_norm": 9.706887859970545, "learning_rate": 9.092794130034071e-06, "loss": 0.1697, "step": 67620 }, { "epoch": 1.3766921119592874, "grad_norm": 5.028807984740871, "learning_rate": 9.092385922181956e-06, "loss": 0.2664, "step": 67630 }, { "epoch": 1.3768956743002545, "grad_norm": 5.826972467339242, "learning_rate": 9.091977631678836e-06, "loss": 0.2463, "step": 67640 }, { "epoch": 1.3770992366412214, "grad_norm": 9.592284448870705, "learning_rate": 9.091569258532958e-06, "loss": 0.2333, "step": 67650 }, { "epoch": 1.3773027989821882, "grad_norm": 10.012383506863634, "learning_rate": 9.091160802752568e-06, "loss": 0.2594, "step": 67660 }, { "epoch": 1.3775063613231553, "grad_norm": 10.93462114720945, "learning_rate": 9.090752264345916e-06, "loss": 0.1761, "step": 67670 }, { "epoch": 1.3777099236641221, "grad_norm": 6.381759362409219, "learning_rate": 9.090343643321255e-06, "loss": 0.2969, "step": 67680 }, { "epoch": 1.377913486005089, "grad_norm": 8.107508643851542, "learning_rate": 9.089934939686836e-06, "loss": 0.1727, "step": 67690 }, { "epoch": 1.378117048346056, "grad_norm": 19.90075460859063, "learning_rate": 9.089526153450912e-06, "loss": 0.2923, "step": 67700 }, { "epoch": 1.378320610687023, "grad_norm": 7.223837110137435, "learning_rate": 9.08911728462174e-06, "loss": 0.1871, "step": 67710 }, { "epoch": 1.3785241730279898, "grad_norm": 1.5528381786902699, "learning_rate": 9.088708333207576e-06, "loss": 0.2594, "step": 67720 }, { "epoch": 1.3787277353689569, "grad_norm": 10.416624293730777, "learning_rate": 9.088299299216684e-06, "loss": 0.2328, "step": 67730 }, { "epoch": 1.3789312977099237, "grad_norm": 22.989324082312397, "learning_rate": 9.08789018265732e-06, "loss": 0.266, "step": 67740 }, { "epoch": 1.3791348600508906, "grad_norm": 20.31108452820964, "learning_rate": 9.087480983537749e-06, "loss": 0.3254, "step": 67750 }, { "epoch": 1.3793384223918574, "grad_norm": 4.816080302439958, "learning_rate": 9.087071701866236e-06, "loss": 0.207, "step": 67760 }, { "epoch": 1.3795419847328243, "grad_norm": 4.89478539485482, "learning_rate": 9.086662337651045e-06, "loss": 0.2559, "step": 67770 }, { "epoch": 1.3797455470737914, "grad_norm": 9.766379702561023, "learning_rate": 9.086252890900445e-06, "loss": 0.2596, "step": 67780 }, { "epoch": 1.3799491094147582, "grad_norm": 5.40493560894019, "learning_rate": 9.085843361622703e-06, "loss": 0.2634, "step": 67790 }, { "epoch": 1.380152671755725, "grad_norm": 0.38895219664139735, "learning_rate": 9.085433749826094e-06, "loss": 0.2252, "step": 67800 }, { "epoch": 1.3803562340966922, "grad_norm": 5.347747042927615, "learning_rate": 9.085024055518887e-06, "loss": 0.2024, "step": 67810 }, { "epoch": 1.380559796437659, "grad_norm": 13.831990388208638, "learning_rate": 9.084614278709358e-06, "loss": 0.2219, "step": 67820 }, { "epoch": 1.3807633587786259, "grad_norm": 5.782734258342773, "learning_rate": 9.084204419405785e-06, "loss": 0.2874, "step": 67830 }, { "epoch": 1.380966921119593, "grad_norm": 6.625204844991468, "learning_rate": 9.08379447761644e-06, "loss": 0.2695, "step": 67840 }, { "epoch": 1.3811704834605598, "grad_norm": 6.024353302215344, "learning_rate": 9.083384453349607e-06, "loss": 0.1866, "step": 67850 }, { "epoch": 1.3813740458015267, "grad_norm": 5.195602766088198, "learning_rate": 9.082974346613566e-06, "loss": 0.2512, "step": 67860 }, { "epoch": 1.3815776081424938, "grad_norm": 14.371874806846039, "learning_rate": 9.0825641574166e-06, "loss": 0.1994, "step": 67870 }, { "epoch": 1.3817811704834606, "grad_norm": 13.321001507312383, "learning_rate": 9.08215388576699e-06, "loss": 0.2452, "step": 67880 }, { "epoch": 1.3819847328244275, "grad_norm": 10.487370490262578, "learning_rate": 9.081743531673026e-06, "loss": 0.2477, "step": 67890 }, { "epoch": 1.3821882951653943, "grad_norm": 5.143062026067292, "learning_rate": 9.081333095142995e-06, "loss": 0.3523, "step": 67900 }, { "epoch": 1.3823918575063614, "grad_norm": 0.4454615645742732, "learning_rate": 9.080922576185182e-06, "loss": 0.228, "step": 67910 }, { "epoch": 1.3825954198473283, "grad_norm": 7.942631245474415, "learning_rate": 9.080511974807885e-06, "loss": 0.2247, "step": 67920 }, { "epoch": 1.3827989821882951, "grad_norm": 4.941573855140092, "learning_rate": 9.080101291019391e-06, "loss": 0.2117, "step": 67930 }, { "epoch": 1.383002544529262, "grad_norm": 7.686894616920702, "learning_rate": 9.079690524827997e-06, "loss": 0.161, "step": 67940 }, { "epoch": 1.383206106870229, "grad_norm": 11.191695853531707, "learning_rate": 9.079279676241998e-06, "loss": 0.2412, "step": 67950 }, { "epoch": 1.383409669211196, "grad_norm": 8.888496385332221, "learning_rate": 9.078868745269694e-06, "loss": 0.2292, "step": 67960 }, { "epoch": 1.3836132315521628, "grad_norm": 5.0475416235008215, "learning_rate": 9.07845773191938e-06, "loss": 0.2162, "step": 67970 }, { "epoch": 1.3838167938931298, "grad_norm": 14.006269745435402, "learning_rate": 9.07804663619936e-06, "loss": 0.2889, "step": 67980 }, { "epoch": 1.3840203562340967, "grad_norm": 18.262569633289413, "learning_rate": 9.077635458117936e-06, "loss": 0.2499, "step": 67990 }, { "epoch": 1.3842239185750635, "grad_norm": 7.33864240044048, "learning_rate": 9.07722419768341e-06, "loss": 0.2758, "step": 68000 }, { "epoch": 1.3844274809160306, "grad_norm": 20.977084685029002, "learning_rate": 9.076812854904091e-06, "loss": 0.2282, "step": 68010 }, { "epoch": 1.3846310432569975, "grad_norm": 6.463086213303182, "learning_rate": 9.076401429788286e-06, "loss": 0.2259, "step": 68020 }, { "epoch": 1.3848346055979643, "grad_norm": 9.350805315905736, "learning_rate": 9.075989922344302e-06, "loss": 0.2634, "step": 68030 }, { "epoch": 1.3850381679389314, "grad_norm": 8.698213182855316, "learning_rate": 9.075578332580454e-06, "loss": 0.268, "step": 68040 }, { "epoch": 1.3852417302798983, "grad_norm": 11.892634220871406, "learning_rate": 9.07516666050505e-06, "loss": 0.2456, "step": 68050 }, { "epoch": 1.3854452926208651, "grad_norm": 7.6147097138415525, "learning_rate": 9.074754906126408e-06, "loss": 0.1794, "step": 68060 }, { "epoch": 1.385648854961832, "grad_norm": 5.635361608643161, "learning_rate": 9.074343069452842e-06, "loss": 0.189, "step": 68070 }, { "epoch": 1.3858524173027988, "grad_norm": 2.1174498720069916, "learning_rate": 9.07393115049267e-06, "loss": 0.2233, "step": 68080 }, { "epoch": 1.386055979643766, "grad_norm": 17.042479654336653, "learning_rate": 9.073519149254213e-06, "loss": 0.2838, "step": 68090 }, { "epoch": 1.3862595419847328, "grad_norm": 14.941852047003813, "learning_rate": 9.073107065745788e-06, "loss": 0.2629, "step": 68100 }, { "epoch": 1.3864631043256996, "grad_norm": 11.675398024460117, "learning_rate": 9.07269489997572e-06, "loss": 0.1962, "step": 68110 }, { "epoch": 1.3866666666666667, "grad_norm": 1.3893021990189767, "learning_rate": 9.072282651952334e-06, "loss": 0.2132, "step": 68120 }, { "epoch": 1.3868702290076336, "grad_norm": 6.549209765232652, "learning_rate": 9.071870321683953e-06, "loss": 0.2305, "step": 68130 }, { "epoch": 1.3870737913486004, "grad_norm": 12.490304053175528, "learning_rate": 9.071457909178906e-06, "loss": 0.3543, "step": 68140 }, { "epoch": 1.3872773536895675, "grad_norm": 5.3763462859827555, "learning_rate": 9.071045414445525e-06, "loss": 0.1942, "step": 68150 }, { "epoch": 1.3874809160305344, "grad_norm": 8.79920783510481, "learning_rate": 9.070632837492138e-06, "loss": 0.2935, "step": 68160 }, { "epoch": 1.3876844783715012, "grad_norm": 10.229162537381358, "learning_rate": 9.070220178327076e-06, "loss": 0.2841, "step": 68170 }, { "epoch": 1.3878880407124683, "grad_norm": 12.827436765761492, "learning_rate": 9.069807436958676e-06, "loss": 0.296, "step": 68180 }, { "epoch": 1.3880916030534352, "grad_norm": 5.760658273003755, "learning_rate": 9.069394613395274e-06, "loss": 0.2423, "step": 68190 }, { "epoch": 1.388295165394402, "grad_norm": 7.418366870422591, "learning_rate": 9.068981707645206e-06, "loss": 0.2166, "step": 68200 }, { "epoch": 1.388498727735369, "grad_norm": 4.807816131566804, "learning_rate": 9.06856871971681e-06, "loss": 0.1488, "step": 68210 }, { "epoch": 1.388702290076336, "grad_norm": 5.202858796401607, "learning_rate": 9.06815564961843e-06, "loss": 0.1358, "step": 68220 }, { "epoch": 1.3889058524173028, "grad_norm": 8.248372724090544, "learning_rate": 9.067742497358407e-06, "loss": 0.2674, "step": 68230 }, { "epoch": 1.3891094147582697, "grad_norm": 4.459960846729413, "learning_rate": 9.067329262945083e-06, "loss": 0.3351, "step": 68240 }, { "epoch": 1.3893129770992365, "grad_norm": 10.000624999701484, "learning_rate": 9.066915946386809e-06, "loss": 0.1849, "step": 68250 }, { "epoch": 1.3895165394402036, "grad_norm": 13.377962213767171, "learning_rate": 9.066502547691926e-06, "loss": 0.2427, "step": 68260 }, { "epoch": 1.3897201017811704, "grad_norm": 8.68865339365669, "learning_rate": 9.066089066868788e-06, "loss": 0.2334, "step": 68270 }, { "epoch": 1.3899236641221373, "grad_norm": 12.412811868351096, "learning_rate": 9.065675503925745e-06, "loss": 0.2638, "step": 68280 }, { "epoch": 1.3901272264631044, "grad_norm": 2.1706648614719937, "learning_rate": 9.065261858871149e-06, "loss": 0.2029, "step": 68290 }, { "epoch": 1.3903307888040712, "grad_norm": 7.208936611994776, "learning_rate": 9.064848131713354e-06, "loss": 0.2388, "step": 68300 }, { "epoch": 1.390534351145038, "grad_norm": 3.6171383394561234, "learning_rate": 9.064434322460712e-06, "loss": 0.1942, "step": 68310 }, { "epoch": 1.3907379134860052, "grad_norm": 5.083804082991412, "learning_rate": 9.064020431121587e-06, "loss": 0.3419, "step": 68320 }, { "epoch": 1.390941475826972, "grad_norm": 6.7012331776525675, "learning_rate": 9.063606457704335e-06, "loss": 0.294, "step": 68330 }, { "epoch": 1.3911450381679389, "grad_norm": 5.076288120853551, "learning_rate": 9.063192402217315e-06, "loss": 0.2654, "step": 68340 }, { "epoch": 1.391348600508906, "grad_norm": 4.838943119975271, "learning_rate": 9.062778264668892e-06, "loss": 0.1499, "step": 68350 }, { "epoch": 1.3915521628498728, "grad_norm": 23.06381192899896, "learning_rate": 9.06236404506743e-06, "loss": 0.2542, "step": 68360 }, { "epoch": 1.3917557251908397, "grad_norm": 8.3478252156417, "learning_rate": 9.061949743421291e-06, "loss": 0.2694, "step": 68370 }, { "epoch": 1.3919592875318065, "grad_norm": 3.75222335779024, "learning_rate": 9.061535359738847e-06, "loss": 0.3105, "step": 68380 }, { "epoch": 1.3921628498727736, "grad_norm": 8.83956042022886, "learning_rate": 9.061120894028466e-06, "loss": 0.2229, "step": 68390 }, { "epoch": 1.3923664122137405, "grad_norm": 6.247323478846938, "learning_rate": 9.060706346298517e-06, "loss": 0.2265, "step": 68400 }, { "epoch": 1.3925699745547073, "grad_norm": 11.233040621896901, "learning_rate": 9.060291716557372e-06, "loss": 0.2819, "step": 68410 }, { "epoch": 1.3927735368956742, "grad_norm": 4.679852314045988, "learning_rate": 9.059877004813409e-06, "loss": 0.1644, "step": 68420 }, { "epoch": 1.3929770992366413, "grad_norm": 22.736887111037603, "learning_rate": 9.059462211075e-06, "loss": 0.1998, "step": 68430 }, { "epoch": 1.3931806615776081, "grad_norm": 5.132990210623082, "learning_rate": 9.059047335350521e-06, "loss": 0.2668, "step": 68440 }, { "epoch": 1.393384223918575, "grad_norm": 6.975562096519633, "learning_rate": 9.058632377648355e-06, "loss": 0.2547, "step": 68450 }, { "epoch": 1.393587786259542, "grad_norm": 8.042518611873835, "learning_rate": 9.05821733797688e-06, "loss": 0.2895, "step": 68460 }, { "epoch": 1.393791348600509, "grad_norm": 4.935207268421847, "learning_rate": 9.057802216344478e-06, "loss": 0.2044, "step": 68470 }, { "epoch": 1.3939949109414758, "grad_norm": 14.55510705166925, "learning_rate": 9.057387012759535e-06, "loss": 0.2387, "step": 68480 }, { "epoch": 1.3941984732824428, "grad_norm": 13.9483432516578, "learning_rate": 9.056971727230434e-06, "loss": 0.1953, "step": 68490 }, { "epoch": 1.3944020356234097, "grad_norm": 9.624030783087074, "learning_rate": 9.056556359765566e-06, "loss": 0.2681, "step": 68500 }, { "epoch": 1.3946055979643766, "grad_norm": 6.686278875319916, "learning_rate": 9.056140910373316e-06, "loss": 0.2094, "step": 68510 }, { "epoch": 1.3948091603053436, "grad_norm": 14.332458028883032, "learning_rate": 9.055725379062078e-06, "loss": 0.2015, "step": 68520 }, { "epoch": 1.3950127226463105, "grad_norm": 5.7728232556878085, "learning_rate": 9.055309765840239e-06, "loss": 0.2611, "step": 68530 }, { "epoch": 1.3952162849872773, "grad_norm": 8.763442217272436, "learning_rate": 9.054894070716198e-06, "loss": 0.266, "step": 68540 }, { "epoch": 1.3954198473282442, "grad_norm": 0.356935784498383, "learning_rate": 9.054478293698348e-06, "loss": 0.1755, "step": 68550 }, { "epoch": 1.3956234096692113, "grad_norm": 2.966620211755843, "learning_rate": 9.054062434795088e-06, "loss": 0.2031, "step": 68560 }, { "epoch": 1.3958269720101781, "grad_norm": 15.99655934148682, "learning_rate": 9.053646494014814e-06, "loss": 0.216, "step": 68570 }, { "epoch": 1.396030534351145, "grad_norm": 13.967704945696077, "learning_rate": 9.053230471365928e-06, "loss": 0.3067, "step": 68580 }, { "epoch": 1.3962340966921118, "grad_norm": 10.175325274900024, "learning_rate": 9.052814366856833e-06, "loss": 0.1837, "step": 68590 }, { "epoch": 1.396437659033079, "grad_norm": 1.0800724224134428, "learning_rate": 9.052398180495932e-06, "loss": 0.2019, "step": 68600 }, { "epoch": 1.3966412213740458, "grad_norm": 8.339863990940463, "learning_rate": 9.05198191229163e-06, "loss": 0.2136, "step": 68610 }, { "epoch": 1.3968447837150126, "grad_norm": 15.342846227144115, "learning_rate": 9.051565562252333e-06, "loss": 0.1958, "step": 68620 }, { "epoch": 1.3970483460559797, "grad_norm": 21.40229278814795, "learning_rate": 9.051149130386453e-06, "loss": 0.2895, "step": 68630 }, { "epoch": 1.3972519083969466, "grad_norm": 15.823033438077905, "learning_rate": 9.050732616702397e-06, "loss": 0.2034, "step": 68640 }, { "epoch": 1.3974554707379134, "grad_norm": 9.673023045682632, "learning_rate": 9.05031602120858e-06, "loss": 0.3047, "step": 68650 }, { "epoch": 1.3976590330788805, "grad_norm": 13.976681144073485, "learning_rate": 9.049899343913413e-06, "loss": 0.2486, "step": 68660 }, { "epoch": 1.3978625954198474, "grad_norm": 8.101781761637456, "learning_rate": 9.049482584825312e-06, "loss": 0.3013, "step": 68670 }, { "epoch": 1.3980661577608142, "grad_norm": 18.444184860855817, "learning_rate": 9.049065743952697e-06, "loss": 0.1742, "step": 68680 }, { "epoch": 1.3982697201017813, "grad_norm": 15.477894287016124, "learning_rate": 9.048648821303982e-06, "loss": 0.1821, "step": 68690 }, { "epoch": 1.3984732824427482, "grad_norm": 10.998891497278464, "learning_rate": 9.048231816887589e-06, "loss": 0.256, "step": 68700 }, { "epoch": 1.398676844783715, "grad_norm": 25.809404363724155, "learning_rate": 9.047814730711942e-06, "loss": 0.1915, "step": 68710 }, { "epoch": 1.3988804071246819, "grad_norm": 7.764240500572118, "learning_rate": 9.047397562785462e-06, "loss": 0.1791, "step": 68720 }, { "epoch": 1.3990839694656487, "grad_norm": 12.05423293185565, "learning_rate": 9.046980313116576e-06, "loss": 0.2146, "step": 68730 }, { "epoch": 1.3992875318066158, "grad_norm": 5.119252186593266, "learning_rate": 9.04656298171371e-06, "loss": 0.1921, "step": 68740 }, { "epoch": 1.3994910941475827, "grad_norm": 5.264576920532827, "learning_rate": 9.046145568585292e-06, "loss": 0.1873, "step": 68750 }, { "epoch": 1.3996946564885495, "grad_norm": 5.47888563286665, "learning_rate": 9.045728073739753e-06, "loss": 0.2563, "step": 68760 }, { "epoch": 1.3998982188295166, "grad_norm": 11.543550076673863, "learning_rate": 9.045310497185525e-06, "loss": 0.2773, "step": 68770 }, { "epoch": 1.4001017811704835, "grad_norm": 22.558065644364213, "learning_rate": 9.04489283893104e-06, "loss": 0.1945, "step": 68780 }, { "epoch": 1.4003053435114503, "grad_norm": 6.457945803124176, "learning_rate": 9.044475098984737e-06, "loss": 0.339, "step": 68790 }, { "epoch": 1.4005089058524174, "grad_norm": 11.238525903251718, "learning_rate": 9.044057277355048e-06, "loss": 0.2304, "step": 68800 }, { "epoch": 1.4007124681933842, "grad_norm": 5.595030326853538, "learning_rate": 9.043639374050415e-06, "loss": 0.2486, "step": 68810 }, { "epoch": 1.400916030534351, "grad_norm": 12.711924059373503, "learning_rate": 9.043221389079276e-06, "loss": 0.2872, "step": 68820 }, { "epoch": 1.4011195928753182, "grad_norm": 8.888159931094869, "learning_rate": 9.042803322450072e-06, "loss": 0.2231, "step": 68830 }, { "epoch": 1.401323155216285, "grad_norm": 8.813300064668047, "learning_rate": 9.042385174171249e-06, "loss": 0.1705, "step": 68840 }, { "epoch": 1.4015267175572519, "grad_norm": 12.10704119991579, "learning_rate": 9.04196694425125e-06, "loss": 0.2488, "step": 68850 }, { "epoch": 1.401730279898219, "grad_norm": 4.538092301717465, "learning_rate": 9.041548632698525e-06, "loss": 0.2291, "step": 68860 }, { "epoch": 1.4019338422391858, "grad_norm": 2.2716866490910084, "learning_rate": 9.041130239521518e-06, "loss": 0.187, "step": 68870 }, { "epoch": 1.4021374045801527, "grad_norm": 17.486309884214815, "learning_rate": 9.04071176472868e-06, "loss": 0.2438, "step": 68880 }, { "epoch": 1.4023409669211195, "grad_norm": 8.514077251093317, "learning_rate": 9.040293208328466e-06, "loss": 0.1666, "step": 68890 }, { "epoch": 1.4025445292620864, "grad_norm": 30.09451635383781, "learning_rate": 9.039874570329325e-06, "loss": 0.237, "step": 68900 }, { "epoch": 1.4027480916030535, "grad_norm": 17.469267844835763, "learning_rate": 9.039455850739715e-06, "loss": 0.1917, "step": 68910 }, { "epoch": 1.4029516539440203, "grad_norm": 22.716996966117165, "learning_rate": 9.03903704956809e-06, "loss": 0.2924, "step": 68920 }, { "epoch": 1.4031552162849872, "grad_norm": 6.635868439543776, "learning_rate": 9.03861816682291e-06, "loss": 0.1891, "step": 68930 }, { "epoch": 1.4033587786259543, "grad_norm": 11.326156417840332, "learning_rate": 9.038199202512635e-06, "loss": 0.3104, "step": 68940 }, { "epoch": 1.4035623409669211, "grad_norm": 11.1112671510912, "learning_rate": 9.037780156645725e-06, "loss": 0.2764, "step": 68950 }, { "epoch": 1.403765903307888, "grad_norm": 6.714026529721781, "learning_rate": 9.037361029230645e-06, "loss": 0.2746, "step": 68960 }, { "epoch": 1.403969465648855, "grad_norm": 7.673901500193225, "learning_rate": 9.036941820275858e-06, "loss": 0.2454, "step": 68970 }, { "epoch": 1.404173027989822, "grad_norm": 4.946888447297368, "learning_rate": 9.036522529789832e-06, "loss": 0.2228, "step": 68980 }, { "epoch": 1.4043765903307888, "grad_norm": 5.773785070112138, "learning_rate": 9.036103157781033e-06, "loss": 0.2262, "step": 68990 }, { "epoch": 1.4045801526717558, "grad_norm": 7.292276041136931, "learning_rate": 9.035683704257933e-06, "loss": 0.1706, "step": 69000 }, { "epoch": 1.4047837150127227, "grad_norm": 0.6031510446882933, "learning_rate": 9.035264169229005e-06, "loss": 0.3024, "step": 69010 }, { "epoch": 1.4049872773536896, "grad_norm": 8.274443693691572, "learning_rate": 9.034844552702717e-06, "loss": 0.2637, "step": 69020 }, { "epoch": 1.4051908396946564, "grad_norm": 5.86719043074795, "learning_rate": 9.034424854687549e-06, "loss": 0.2488, "step": 69030 }, { "epoch": 1.4053944020356235, "grad_norm": 10.892850980242656, "learning_rate": 9.034005075191973e-06, "loss": 0.2421, "step": 69040 }, { "epoch": 1.4055979643765903, "grad_norm": 3.748806145166522, "learning_rate": 9.033585214224468e-06, "loss": 0.217, "step": 69050 }, { "epoch": 1.4058015267175572, "grad_norm": 7.789700869718372, "learning_rate": 9.033165271793516e-06, "loss": 0.1897, "step": 69060 }, { "epoch": 1.406005089058524, "grad_norm": 4.365469608896833, "learning_rate": 9.032745247907596e-06, "loss": 0.2687, "step": 69070 }, { "epoch": 1.4062086513994911, "grad_norm": 5.69645490327957, "learning_rate": 9.032325142575191e-06, "loss": 0.2144, "step": 69080 }, { "epoch": 1.406412213740458, "grad_norm": 16.015929090406505, "learning_rate": 9.031904955804786e-06, "loss": 0.2311, "step": 69090 }, { "epoch": 1.4066157760814249, "grad_norm": 9.357713051725687, "learning_rate": 9.03148468760487e-06, "loss": 0.2171, "step": 69100 }, { "epoch": 1.406819338422392, "grad_norm": 17.254484460435666, "learning_rate": 9.031064337983926e-06, "loss": 0.2637, "step": 69110 }, { "epoch": 1.4070229007633588, "grad_norm": 15.495550248013553, "learning_rate": 9.030643906950444e-06, "loss": 0.1303, "step": 69120 }, { "epoch": 1.4072264631043256, "grad_norm": 7.164147066654596, "learning_rate": 9.03022339451292e-06, "loss": 0.2519, "step": 69130 }, { "epoch": 1.4074300254452927, "grad_norm": 14.983305042731192, "learning_rate": 9.029802800679843e-06, "loss": 0.2261, "step": 69140 }, { "epoch": 1.4076335877862596, "grad_norm": 11.876187192047851, "learning_rate": 9.029382125459707e-06, "loss": 0.2785, "step": 69150 }, { "epoch": 1.4078371501272264, "grad_norm": 11.659916329570287, "learning_rate": 9.02896136886101e-06, "loss": 0.2044, "step": 69160 }, { "epoch": 1.4080407124681935, "grad_norm": 4.0332989847625464, "learning_rate": 9.02854053089225e-06, "loss": 0.1718, "step": 69170 }, { "epoch": 1.4082442748091604, "grad_norm": 5.680761355782369, "learning_rate": 9.028119611561923e-06, "loss": 0.323, "step": 69180 }, { "epoch": 1.4084478371501272, "grad_norm": 3.332388110277259, "learning_rate": 9.027698610878533e-06, "loss": 0.2541, "step": 69190 }, { "epoch": 1.408651399491094, "grad_norm": 9.584379712910552, "learning_rate": 9.027277528850582e-06, "loss": 0.2994, "step": 69200 }, { "epoch": 1.408854961832061, "grad_norm": 6.183366082012266, "learning_rate": 9.026856365486575e-06, "loss": 0.2119, "step": 69210 }, { "epoch": 1.409058524173028, "grad_norm": 0.9066533763994842, "learning_rate": 9.026435120795016e-06, "loss": 0.1741, "step": 69220 }, { "epoch": 1.4092620865139949, "grad_norm": 4.51456769390452, "learning_rate": 9.026013794784415e-06, "loss": 0.2159, "step": 69230 }, { "epoch": 1.4094656488549617, "grad_norm": 5.39686048965341, "learning_rate": 9.025592387463279e-06, "loss": 0.141, "step": 69240 }, { "epoch": 1.4096692111959288, "grad_norm": 7.681284358376012, "learning_rate": 9.025170898840119e-06, "loss": 0.2977, "step": 69250 }, { "epoch": 1.4098727735368957, "grad_norm": 3.4449267674226514, "learning_rate": 9.02474932892345e-06, "loss": 0.1918, "step": 69260 }, { "epoch": 1.4100763358778625, "grad_norm": 12.680226668451525, "learning_rate": 9.024327677721784e-06, "loss": 0.2094, "step": 69270 }, { "epoch": 1.4102798982188296, "grad_norm": 10.287210581913062, "learning_rate": 9.023905945243636e-06, "loss": 0.3659, "step": 69280 }, { "epoch": 1.4104834605597965, "grad_norm": 13.868732534725515, "learning_rate": 9.023484131497527e-06, "loss": 0.2428, "step": 69290 }, { "epoch": 1.4106870229007633, "grad_norm": 0.6708685545857375, "learning_rate": 9.023062236491973e-06, "loss": 0.1815, "step": 69300 }, { "epoch": 1.4108905852417304, "grad_norm": 7.035820940943937, "learning_rate": 9.022640260235493e-06, "loss": 0.2649, "step": 69310 }, { "epoch": 1.4110941475826972, "grad_norm": 0.7900149056171998, "learning_rate": 9.022218202736615e-06, "loss": 0.2794, "step": 69320 }, { "epoch": 1.411297709923664, "grad_norm": 4.159346227552238, "learning_rate": 9.021796064003859e-06, "loss": 0.1732, "step": 69330 }, { "epoch": 1.4115012722646312, "grad_norm": 5.315629879722577, "learning_rate": 9.021373844045751e-06, "loss": 0.3113, "step": 69340 }, { "epoch": 1.411704834605598, "grad_norm": 2.155143781591554, "learning_rate": 9.020951542870819e-06, "loss": 0.2253, "step": 69350 }, { "epoch": 1.411908396946565, "grad_norm": 8.839882701069362, "learning_rate": 9.02052916048759e-06, "loss": 0.1858, "step": 69360 }, { "epoch": 1.4121119592875317, "grad_norm": 5.6704935103103065, "learning_rate": 9.020106696904597e-06, "loss": 0.1829, "step": 69370 }, { "epoch": 1.4123155216284986, "grad_norm": 6.818520836836071, "learning_rate": 9.019684152130372e-06, "loss": 0.2314, "step": 69380 }, { "epoch": 1.4125190839694657, "grad_norm": 4.449340084944888, "learning_rate": 9.01926152617345e-06, "loss": 0.2598, "step": 69390 }, { "epoch": 1.4127226463104325, "grad_norm": 6.46361015361454, "learning_rate": 9.018838819042362e-06, "loss": 0.2073, "step": 69400 }, { "epoch": 1.4129262086513994, "grad_norm": 7.479274582252842, "learning_rate": 9.01841603074565e-06, "loss": 0.2562, "step": 69410 }, { "epoch": 1.4131297709923665, "grad_norm": 4.4917403990825004, "learning_rate": 9.01799316129185e-06, "loss": 0.189, "step": 69420 }, { "epoch": 1.4133333333333333, "grad_norm": 10.709017717420018, "learning_rate": 9.017570210689501e-06, "loss": 0.2066, "step": 69430 }, { "epoch": 1.4135368956743002, "grad_norm": 5.557523487582773, "learning_rate": 9.017147178947147e-06, "loss": 0.2178, "step": 69440 }, { "epoch": 1.4137404580152673, "grad_norm": 9.649972530032487, "learning_rate": 9.016724066073334e-06, "loss": 0.2635, "step": 69450 }, { "epoch": 1.4139440203562341, "grad_norm": 11.54778196367953, "learning_rate": 9.016300872076605e-06, "loss": 0.1598, "step": 69460 }, { "epoch": 1.414147582697201, "grad_norm": 17.26489631424861, "learning_rate": 9.015877596965506e-06, "loss": 0.2277, "step": 69470 }, { "epoch": 1.414351145038168, "grad_norm": 11.98797253350845, "learning_rate": 9.015454240748584e-06, "loss": 0.2933, "step": 69480 }, { "epoch": 1.414554707379135, "grad_norm": 6.536074071050846, "learning_rate": 9.015030803434394e-06, "loss": 0.2232, "step": 69490 }, { "epoch": 1.4147582697201018, "grad_norm": 4.712138532835574, "learning_rate": 9.014607285031487e-06, "loss": 0.2074, "step": 69500 }, { "epoch": 1.4149618320610686, "grad_norm": 3.0131324377762816, "learning_rate": 9.014183685548414e-06, "loss": 0.2326, "step": 69510 }, { "epoch": 1.4151653944020357, "grad_norm": 6.420584100069842, "learning_rate": 9.01376000499373e-06, "loss": 0.2262, "step": 69520 }, { "epoch": 1.4153689567430026, "grad_norm": 12.384956662215826, "learning_rate": 9.013336243375995e-06, "loss": 0.1661, "step": 69530 }, { "epoch": 1.4155725190839694, "grad_norm": 5.133997578742388, "learning_rate": 9.012912400703765e-06, "loss": 0.2209, "step": 69540 }, { "epoch": 1.4157760814249363, "grad_norm": 7.344729479996043, "learning_rate": 9.012488476985599e-06, "loss": 0.2784, "step": 69550 }, { "epoch": 1.4159796437659034, "grad_norm": 8.475685285775468, "learning_rate": 9.012064472230061e-06, "loss": 0.1806, "step": 69560 }, { "epoch": 1.4161832061068702, "grad_norm": 8.910322612054262, "learning_rate": 9.011640386445716e-06, "loss": 0.1449, "step": 69570 }, { "epoch": 1.416386768447837, "grad_norm": 14.718208918604397, "learning_rate": 9.011216219641123e-06, "loss": 0.2219, "step": 69580 }, { "epoch": 1.4165903307888041, "grad_norm": 9.345259861716329, "learning_rate": 9.010791971824853e-06, "loss": 0.2758, "step": 69590 }, { "epoch": 1.416793893129771, "grad_norm": 9.54785667267994, "learning_rate": 9.010367643005472e-06, "loss": 0.2254, "step": 69600 }, { "epoch": 1.4169974554707379, "grad_norm": 11.511901653142067, "learning_rate": 9.009943233191554e-06, "loss": 0.2646, "step": 69610 }, { "epoch": 1.417201017811705, "grad_norm": 11.912225065747466, "learning_rate": 9.009518742391666e-06, "loss": 0.2337, "step": 69620 }, { "epoch": 1.4174045801526718, "grad_norm": 8.533594041442099, "learning_rate": 9.009094170614382e-06, "loss": 0.2458, "step": 69630 }, { "epoch": 1.4176081424936386, "grad_norm": 10.29382935458622, "learning_rate": 9.008669517868281e-06, "loss": 0.2473, "step": 69640 }, { "epoch": 1.4178117048346057, "grad_norm": 5.708319595535588, "learning_rate": 9.008244784161932e-06, "loss": 0.2731, "step": 69650 }, { "epoch": 1.4180152671755726, "grad_norm": 9.658598623881414, "learning_rate": 9.007819969503919e-06, "loss": 0.2306, "step": 69660 }, { "epoch": 1.4182188295165394, "grad_norm": 8.36970758034391, "learning_rate": 9.007395073902817e-06, "loss": 0.1711, "step": 69670 }, { "epoch": 1.4184223918575063, "grad_norm": 6.757052301929915, "learning_rate": 9.006970097367213e-06, "loss": 0.1917, "step": 69680 }, { "epoch": 1.4186259541984731, "grad_norm": 0.9710011107078274, "learning_rate": 9.006545039905688e-06, "loss": 0.1543, "step": 69690 }, { "epoch": 1.4188295165394402, "grad_norm": 15.178461573053854, "learning_rate": 9.006119901526822e-06, "loss": 0.2602, "step": 69700 }, { "epoch": 1.419033078880407, "grad_norm": 11.399057357270511, "learning_rate": 9.005694682239207e-06, "loss": 0.2828, "step": 69710 }, { "epoch": 1.419236641221374, "grad_norm": 6.577490002728545, "learning_rate": 9.005269382051428e-06, "loss": 0.2472, "step": 69720 }, { "epoch": 1.419440203562341, "grad_norm": 11.335725872956843, "learning_rate": 9.004844000972075e-06, "loss": 0.1976, "step": 69730 }, { "epoch": 1.4196437659033079, "grad_norm": 8.948506287874158, "learning_rate": 9.00441853900974e-06, "loss": 0.22, "step": 69740 }, { "epoch": 1.4198473282442747, "grad_norm": 15.202069536994692, "learning_rate": 9.003992996173015e-06, "loss": 0.2532, "step": 69750 }, { "epoch": 1.4200508905852418, "grad_norm": 11.811916300484116, "learning_rate": 9.003567372470492e-06, "loss": 0.1149, "step": 69760 }, { "epoch": 1.4202544529262087, "grad_norm": 13.235566604982306, "learning_rate": 9.00314166791077e-06, "loss": 0.2185, "step": 69770 }, { "epoch": 1.4204580152671755, "grad_norm": 6.588017306168581, "learning_rate": 9.002715882502449e-06, "loss": 0.169, "step": 69780 }, { "epoch": 1.4206615776081426, "grad_norm": 0.38664720945650727, "learning_rate": 9.002290016254123e-06, "loss": 0.2529, "step": 69790 }, { "epoch": 1.4208651399491095, "grad_norm": 6.164762203099941, "learning_rate": 9.001864069174395e-06, "loss": 0.2285, "step": 69800 }, { "epoch": 1.4210687022900763, "grad_norm": 16.80533784823494, "learning_rate": 9.001438041271869e-06, "loss": 0.2494, "step": 69810 }, { "epoch": 1.4212722646310434, "grad_norm": 18.452593490000094, "learning_rate": 9.001011932555146e-06, "loss": 0.2081, "step": 69820 }, { "epoch": 1.4214758269720102, "grad_norm": 8.174514440381309, "learning_rate": 9.000585743032835e-06, "loss": 0.2503, "step": 69830 }, { "epoch": 1.421679389312977, "grad_norm": 7.6107975020453775, "learning_rate": 9.000159472713542e-06, "loss": 0.2755, "step": 69840 }, { "epoch": 1.421882951653944, "grad_norm": 8.741168919265128, "learning_rate": 8.999733121605876e-06, "loss": 0.2173, "step": 69850 }, { "epoch": 1.4220865139949108, "grad_norm": 7.458575494155426, "learning_rate": 8.99930668971845e-06, "loss": 0.3231, "step": 69860 }, { "epoch": 1.422290076335878, "grad_norm": 8.364558933373791, "learning_rate": 8.998880177059872e-06, "loss": 0.2165, "step": 69870 }, { "epoch": 1.4224936386768448, "grad_norm": 13.932210456719899, "learning_rate": 8.998453583638757e-06, "loss": 0.3033, "step": 69880 }, { "epoch": 1.4226972010178116, "grad_norm": 11.231616091399463, "learning_rate": 8.998026909463724e-06, "loss": 0.2267, "step": 69890 }, { "epoch": 1.4229007633587787, "grad_norm": 4.279908607326244, "learning_rate": 8.997600154543389e-06, "loss": 0.2976, "step": 69900 }, { "epoch": 1.4231043256997455, "grad_norm": 4.407099842198576, "learning_rate": 8.99717331888637e-06, "loss": 0.1778, "step": 69910 }, { "epoch": 1.4233078880407124, "grad_norm": 3.3791621063662727, "learning_rate": 8.996746402501287e-06, "loss": 0.1773, "step": 69920 }, { "epoch": 1.4235114503816795, "grad_norm": 18.201595654530283, "learning_rate": 8.996319405396761e-06, "loss": 0.249, "step": 69930 }, { "epoch": 1.4237150127226463, "grad_norm": 12.0731278232774, "learning_rate": 8.99589232758142e-06, "loss": 0.2047, "step": 69940 }, { "epoch": 1.4239185750636132, "grad_norm": 6.9364549077835225, "learning_rate": 8.995465169063883e-06, "loss": 0.2338, "step": 69950 }, { "epoch": 1.4241221374045803, "grad_norm": 14.83456554536506, "learning_rate": 8.995037929852784e-06, "loss": 0.2203, "step": 69960 }, { "epoch": 1.4243256997455471, "grad_norm": 2.2433752465498737, "learning_rate": 8.994610609956748e-06, "loss": 0.1993, "step": 69970 }, { "epoch": 1.424529262086514, "grad_norm": 11.58288229660099, "learning_rate": 8.994183209384404e-06, "loss": 0.2687, "step": 69980 }, { "epoch": 1.4247328244274808, "grad_norm": 7.4809001867471805, "learning_rate": 8.993755728144389e-06, "loss": 0.1498, "step": 69990 }, { "epoch": 1.424936386768448, "grad_norm": 10.898536662178651, "learning_rate": 8.99332816624533e-06, "loss": 0.2216, "step": 70000 }, { "epoch": 1.4251399491094148, "grad_norm": 4.880304079337192, "learning_rate": 8.992900523695867e-06, "loss": 0.2455, "step": 70010 }, { "epoch": 1.4253435114503816, "grad_norm": 12.13941191089229, "learning_rate": 8.992472800504635e-06, "loss": 0.2365, "step": 70020 }, { "epoch": 1.4255470737913485, "grad_norm": 4.344582689488299, "learning_rate": 8.99204499668027e-06, "loss": 0.354, "step": 70030 }, { "epoch": 1.4257506361323156, "grad_norm": 13.501218827635851, "learning_rate": 8.991617112231419e-06, "loss": 0.2387, "step": 70040 }, { "epoch": 1.4259541984732824, "grad_norm": 13.545417215201846, "learning_rate": 8.991189147166714e-06, "loss": 0.2561, "step": 70050 }, { "epoch": 1.4261577608142493, "grad_norm": 2.0465541034266375, "learning_rate": 8.990761101494807e-06, "loss": 0.1602, "step": 70060 }, { "epoch": 1.4263613231552164, "grad_norm": 11.680525586963068, "learning_rate": 8.99033297522434e-06, "loss": 0.2442, "step": 70070 }, { "epoch": 1.4265648854961832, "grad_norm": 4.243936833893244, "learning_rate": 8.989904768363957e-06, "loss": 0.2371, "step": 70080 }, { "epoch": 1.42676844783715, "grad_norm": 13.518091173978869, "learning_rate": 8.98947648092231e-06, "loss": 0.2549, "step": 70090 }, { "epoch": 1.4269720101781171, "grad_norm": 13.029906200936919, "learning_rate": 8.989048112908045e-06, "loss": 0.213, "step": 70100 }, { "epoch": 1.427175572519084, "grad_norm": 10.431698297023141, "learning_rate": 8.988619664329817e-06, "loss": 0.1712, "step": 70110 }, { "epoch": 1.4273791348600509, "grad_norm": 23.21887638110717, "learning_rate": 8.988191135196277e-06, "loss": 0.2325, "step": 70120 }, { "epoch": 1.427582697201018, "grad_norm": 10.190577712539147, "learning_rate": 8.987762525516081e-06, "loss": 0.2976, "step": 70130 }, { "epoch": 1.4277862595419848, "grad_norm": 13.635324561653668, "learning_rate": 8.987333835297885e-06, "loss": 0.1632, "step": 70140 }, { "epoch": 1.4279898218829516, "grad_norm": 12.553941152796599, "learning_rate": 8.986905064550345e-06, "loss": 0.2764, "step": 70150 }, { "epoch": 1.4281933842239185, "grad_norm": 25.06424155574134, "learning_rate": 8.986476213282122e-06, "loss": 0.1615, "step": 70160 }, { "epoch": 1.4283969465648854, "grad_norm": 10.39041622682288, "learning_rate": 8.98604728150188e-06, "loss": 0.3046, "step": 70170 }, { "epoch": 1.4286005089058524, "grad_norm": 17.590168830227785, "learning_rate": 8.985618269218277e-06, "loss": 0.1976, "step": 70180 }, { "epoch": 1.4288040712468193, "grad_norm": 12.952396798674428, "learning_rate": 8.98518917643998e-06, "loss": 0.254, "step": 70190 }, { "epoch": 1.4290076335877862, "grad_norm": 24.384050572645272, "learning_rate": 8.984760003175655e-06, "loss": 0.3249, "step": 70200 }, { "epoch": 1.4292111959287532, "grad_norm": 8.505339053932609, "learning_rate": 8.984330749433969e-06, "loss": 0.3247, "step": 70210 }, { "epoch": 1.42941475826972, "grad_norm": 7.445269064169218, "learning_rate": 8.98390141522359e-06, "loss": 0.1839, "step": 70220 }, { "epoch": 1.429618320610687, "grad_norm": 4.949711335347355, "learning_rate": 8.983472000553194e-06, "loss": 0.2087, "step": 70230 }, { "epoch": 1.429821882951654, "grad_norm": 15.275436088931386, "learning_rate": 8.983042505431448e-06, "loss": 0.3128, "step": 70240 }, { "epoch": 1.4300254452926209, "grad_norm": 4.4617525289495354, "learning_rate": 8.982612929867029e-06, "loss": 0.2249, "step": 70250 }, { "epoch": 1.4302290076335877, "grad_norm": 10.266296257121075, "learning_rate": 8.982183273868614e-06, "loss": 0.2803, "step": 70260 }, { "epoch": 1.4304325699745548, "grad_norm": 5.998159006067826, "learning_rate": 8.981753537444877e-06, "loss": 0.1825, "step": 70270 }, { "epoch": 1.4306361323155217, "grad_norm": 4.578810295964338, "learning_rate": 8.981323720604499e-06, "loss": 0.187, "step": 70280 }, { "epoch": 1.4308396946564885, "grad_norm": 12.998249280015434, "learning_rate": 8.98089382335616e-06, "loss": 0.1532, "step": 70290 }, { "epoch": 1.4310432569974556, "grad_norm": 16.69509335079505, "learning_rate": 8.980463845708545e-06, "loss": 0.2237, "step": 70300 }, { "epoch": 1.4312468193384225, "grad_norm": 6.544679620841064, "learning_rate": 8.980033787670333e-06, "loss": 0.1903, "step": 70310 }, { "epoch": 1.4314503816793893, "grad_norm": 13.237625631935416, "learning_rate": 8.979603649250215e-06, "loss": 0.1977, "step": 70320 }, { "epoch": 1.4316539440203562, "grad_norm": 18.15398738288008, "learning_rate": 8.979173430456875e-06, "loss": 0.2421, "step": 70330 }, { "epoch": 1.431857506361323, "grad_norm": 4.2033127874258325, "learning_rate": 8.978743131299001e-06, "loss": 0.2654, "step": 70340 }, { "epoch": 1.43206106870229, "grad_norm": 0.8507570375200041, "learning_rate": 8.978312751785286e-06, "loss": 0.2706, "step": 70350 }, { "epoch": 1.432264631043257, "grad_norm": 3.6926086939393508, "learning_rate": 8.97788229192442e-06, "loss": 0.228, "step": 70360 }, { "epoch": 1.4324681933842238, "grad_norm": 8.497920882005577, "learning_rate": 8.9774517517251e-06, "loss": 0.3146, "step": 70370 }, { "epoch": 1.432671755725191, "grad_norm": 13.508418345407463, "learning_rate": 8.977021131196016e-06, "loss": 0.1946, "step": 70380 }, { "epoch": 1.4328753180661578, "grad_norm": 23.970372182745148, "learning_rate": 8.97659043034587e-06, "loss": 0.171, "step": 70390 }, { "epoch": 1.4330788804071246, "grad_norm": 18.121436560625053, "learning_rate": 8.976159649183357e-06, "loss": 0.2163, "step": 70400 }, { "epoch": 1.4332824427480917, "grad_norm": 13.17736545261619, "learning_rate": 8.97572878771718e-06, "loss": 0.2235, "step": 70410 }, { "epoch": 1.4334860050890585, "grad_norm": 9.633324241807307, "learning_rate": 8.975297845956038e-06, "loss": 0.235, "step": 70420 }, { "epoch": 1.4336895674300254, "grad_norm": 11.321763066294197, "learning_rate": 8.974866823908638e-06, "loss": 0.2823, "step": 70430 }, { "epoch": 1.4338931297709925, "grad_norm": 8.330383774254354, "learning_rate": 8.97443572158368e-06, "loss": 0.2041, "step": 70440 }, { "epoch": 1.4340966921119593, "grad_norm": 10.947276138206078, "learning_rate": 8.974004538989875e-06, "loss": 0.2882, "step": 70450 }, { "epoch": 1.4343002544529262, "grad_norm": 7.947545908642457, "learning_rate": 8.97357327613593e-06, "loss": 0.2498, "step": 70460 }, { "epoch": 1.434503816793893, "grad_norm": 6.261658428609284, "learning_rate": 8.973141933030555e-06, "loss": 0.2212, "step": 70470 }, { "epoch": 1.4347073791348601, "grad_norm": 24.613067579838663, "learning_rate": 8.972710509682462e-06, "loss": 0.2009, "step": 70480 }, { "epoch": 1.434910941475827, "grad_norm": 7.934530236947033, "learning_rate": 8.972279006100361e-06, "loss": 0.2198, "step": 70490 }, { "epoch": 1.4351145038167938, "grad_norm": 5.252929615009177, "learning_rate": 8.971847422292971e-06, "loss": 0.2952, "step": 70500 }, { "epoch": 1.4353180661577607, "grad_norm": 9.615055188683572, "learning_rate": 8.971415758269007e-06, "loss": 0.2102, "step": 70510 }, { "epoch": 1.4355216284987278, "grad_norm": 12.370465220575788, "learning_rate": 8.970984014037185e-06, "loss": 0.2255, "step": 70520 }, { "epoch": 1.4357251908396946, "grad_norm": 7.726834892896001, "learning_rate": 8.970552189606227e-06, "loss": 0.1551, "step": 70530 }, { "epoch": 1.4359287531806615, "grad_norm": 14.37917816099441, "learning_rate": 8.970120284984854e-06, "loss": 0.2738, "step": 70540 }, { "epoch": 1.4361323155216286, "grad_norm": 21.2370514919694, "learning_rate": 8.969688300181789e-06, "loss": 0.1639, "step": 70550 }, { "epoch": 1.4363358778625954, "grad_norm": 7.180589796807765, "learning_rate": 8.969256235205754e-06, "loss": 0.198, "step": 70560 }, { "epoch": 1.4365394402035623, "grad_norm": 8.950275372702732, "learning_rate": 8.968824090065478e-06, "loss": 0.2786, "step": 70570 }, { "epoch": 1.4367430025445294, "grad_norm": 11.806676441419233, "learning_rate": 8.968391864769688e-06, "loss": 0.2721, "step": 70580 }, { "epoch": 1.4369465648854962, "grad_norm": 13.245425894256938, "learning_rate": 8.967959559327113e-06, "loss": 0.3019, "step": 70590 }, { "epoch": 1.437150127226463, "grad_norm": 9.178848440139992, "learning_rate": 8.967527173746482e-06, "loss": 0.2996, "step": 70600 }, { "epoch": 1.4373536895674301, "grad_norm": 7.926720884399698, "learning_rate": 8.967094708036532e-06, "loss": 0.2418, "step": 70610 }, { "epoch": 1.437557251908397, "grad_norm": 6.076089344760328, "learning_rate": 8.966662162205994e-06, "loss": 0.2575, "step": 70620 }, { "epoch": 1.4377608142493639, "grad_norm": 5.504719649371056, "learning_rate": 8.966229536263604e-06, "loss": 0.2138, "step": 70630 }, { "epoch": 1.4379643765903307, "grad_norm": 19.144026905163553, "learning_rate": 8.9657968302181e-06, "loss": 0.2732, "step": 70640 }, { "epoch": 1.4381679389312976, "grad_norm": 8.510338249085443, "learning_rate": 8.965364044078221e-06, "loss": 0.2604, "step": 70650 }, { "epoch": 1.4383715012722647, "grad_norm": 10.789283487169014, "learning_rate": 8.96493117785271e-06, "loss": 0.1654, "step": 70660 }, { "epoch": 1.4385750636132315, "grad_norm": 14.899677247828286, "learning_rate": 8.964498231550307e-06, "loss": 0.1897, "step": 70670 }, { "epoch": 1.4387786259541984, "grad_norm": 5.749238264696594, "learning_rate": 8.964065205179752e-06, "loss": 0.1751, "step": 70680 }, { "epoch": 1.4389821882951654, "grad_norm": 3.1331400282952147, "learning_rate": 8.963632098749799e-06, "loss": 0.214, "step": 70690 }, { "epoch": 1.4391857506361323, "grad_norm": 12.057769829827883, "learning_rate": 8.963198912269189e-06, "loss": 0.2728, "step": 70700 }, { "epoch": 1.4393893129770992, "grad_norm": 5.141611333346152, "learning_rate": 8.962765645746671e-06, "loss": 0.2908, "step": 70710 }, { "epoch": 1.4395928753180662, "grad_norm": 6.883036473468787, "learning_rate": 8.962332299190998e-06, "loss": 0.1711, "step": 70720 }, { "epoch": 1.439796437659033, "grad_norm": 10.020664965002878, "learning_rate": 8.96189887261092e-06, "loss": 0.1897, "step": 70730 }, { "epoch": 1.44, "grad_norm": 8.304394454463361, "learning_rate": 8.961465366015193e-06, "loss": 0.2273, "step": 70740 }, { "epoch": 1.440203562340967, "grad_norm": 7.586447773491633, "learning_rate": 8.961031779412568e-06, "loss": 0.3168, "step": 70750 }, { "epoch": 1.4404071246819339, "grad_norm": 10.713244811156788, "learning_rate": 8.960598112811807e-06, "loss": 0.2987, "step": 70760 }, { "epoch": 1.4406106870229007, "grad_norm": 5.739095677102582, "learning_rate": 8.960164366221665e-06, "loss": 0.1501, "step": 70770 }, { "epoch": 1.4408142493638678, "grad_norm": 6.508585035526439, "learning_rate": 8.959730539650903e-06, "loss": 0.2941, "step": 70780 }, { "epoch": 1.4410178117048347, "grad_norm": 16.31641263662527, "learning_rate": 8.959296633108283e-06, "loss": 0.3149, "step": 70790 }, { "epoch": 1.4412213740458015, "grad_norm": 8.884739174926922, "learning_rate": 8.958862646602568e-06, "loss": 0.2833, "step": 70800 }, { "epoch": 1.4414249363867684, "grad_norm": 11.034902531688456, "learning_rate": 8.958428580142522e-06, "loss": 0.2782, "step": 70810 }, { "epoch": 1.4416284987277352, "grad_norm": 3.796716740483631, "learning_rate": 8.957994433736914e-06, "loss": 0.2293, "step": 70820 }, { "epoch": 1.4418320610687023, "grad_norm": 4.590778876667298, "learning_rate": 8.957560207394507e-06, "loss": 0.1414, "step": 70830 }, { "epoch": 1.4420356234096692, "grad_norm": 7.636851321516027, "learning_rate": 8.957125901124078e-06, "loss": 0.2672, "step": 70840 }, { "epoch": 1.442239185750636, "grad_norm": 7.668241964058666, "learning_rate": 8.956691514934392e-06, "loss": 0.2587, "step": 70850 }, { "epoch": 1.442442748091603, "grad_norm": 7.951169854880754, "learning_rate": 8.956257048834228e-06, "loss": 0.256, "step": 70860 }, { "epoch": 1.44264631043257, "grad_norm": 4.941731264686983, "learning_rate": 8.955822502832354e-06, "loss": 0.1614, "step": 70870 }, { "epoch": 1.4428498727735368, "grad_norm": 1.4106665053271743, "learning_rate": 8.95538787693755e-06, "loss": 0.1749, "step": 70880 }, { "epoch": 1.443053435114504, "grad_norm": 19.476056772593815, "learning_rate": 8.954953171158595e-06, "loss": 0.2478, "step": 70890 }, { "epoch": 1.4432569974554708, "grad_norm": 9.67291267525783, "learning_rate": 8.954518385504267e-06, "loss": 0.2475, "step": 70900 }, { "epoch": 1.4434605597964376, "grad_norm": 13.189491255861759, "learning_rate": 8.954083519983345e-06, "loss": 0.3536, "step": 70910 }, { "epoch": 1.4436641221374047, "grad_norm": 21.35633079608891, "learning_rate": 8.953648574604615e-06, "loss": 0.1657, "step": 70920 }, { "epoch": 1.4438676844783715, "grad_norm": 5.243793762102563, "learning_rate": 8.953213549376859e-06, "loss": 0.1385, "step": 70930 }, { "epoch": 1.4440712468193384, "grad_norm": 3.2605984062643185, "learning_rate": 8.952778444308862e-06, "loss": 0.265, "step": 70940 }, { "epoch": 1.4442748091603053, "grad_norm": 11.490819299555016, "learning_rate": 8.952343259409415e-06, "loss": 0.2664, "step": 70950 }, { "epoch": 1.4444783715012723, "grad_norm": 9.500163063738606, "learning_rate": 8.951907994687306e-06, "loss": 0.2144, "step": 70960 }, { "epoch": 1.4446819338422392, "grad_norm": 9.420827529565882, "learning_rate": 8.951472650151325e-06, "loss": 0.2844, "step": 70970 }, { "epoch": 1.444885496183206, "grad_norm": 9.583699305709391, "learning_rate": 8.951037225810263e-06, "loss": 0.2474, "step": 70980 }, { "epoch": 1.445089058524173, "grad_norm": 7.961086418096827, "learning_rate": 8.950601721672916e-06, "loss": 0.2061, "step": 70990 }, { "epoch": 1.44529262086514, "grad_norm": 6.666616098013886, "learning_rate": 8.950166137748079e-06, "loss": 0.2588, "step": 71000 }, { "epoch": 1.4454961832061068, "grad_norm": 7.787574786354417, "learning_rate": 8.949730474044547e-06, "loss": 0.2563, "step": 71010 }, { "epoch": 1.4456997455470737, "grad_norm": 18.67591225332433, "learning_rate": 8.949294730571124e-06, "loss": 0.1636, "step": 71020 }, { "epoch": 1.4459033078880408, "grad_norm": 13.824920087516752, "learning_rate": 8.948858907336606e-06, "loss": 0.2265, "step": 71030 }, { "epoch": 1.4461068702290076, "grad_norm": 8.178019693646405, "learning_rate": 8.948423004349796e-06, "loss": 0.2143, "step": 71040 }, { "epoch": 1.4463104325699745, "grad_norm": 5.201738146839964, "learning_rate": 8.947987021619499e-06, "loss": 0.275, "step": 71050 }, { "epoch": 1.4465139949109416, "grad_norm": 4.791652188159473, "learning_rate": 8.947550959154518e-06, "loss": 0.3087, "step": 71060 }, { "epoch": 1.4467175572519084, "grad_norm": 7.570809564034572, "learning_rate": 8.94711481696366e-06, "loss": 0.2505, "step": 71070 }, { "epoch": 1.4469211195928753, "grad_norm": 11.584765426650703, "learning_rate": 8.946678595055736e-06, "loss": 0.3767, "step": 71080 }, { "epoch": 1.4471246819338424, "grad_norm": 7.321731287375625, "learning_rate": 8.946242293439556e-06, "loss": 0.2862, "step": 71090 }, { "epoch": 1.4473282442748092, "grad_norm": 4.097038649445915, "learning_rate": 8.945805912123928e-06, "loss": 0.2207, "step": 71100 }, { "epoch": 1.447531806615776, "grad_norm": 0.12687091266814557, "learning_rate": 8.945369451117667e-06, "loss": 0.2026, "step": 71110 }, { "epoch": 1.447735368956743, "grad_norm": 5.811162606716863, "learning_rate": 8.94493291042959e-06, "loss": 0.1963, "step": 71120 }, { "epoch": 1.4479389312977098, "grad_norm": 11.358053497916272, "learning_rate": 8.94449629006851e-06, "loss": 0.2297, "step": 71130 }, { "epoch": 1.4481424936386769, "grad_norm": 13.741446290894373, "learning_rate": 8.944059590043248e-06, "loss": 0.3023, "step": 71140 }, { "epoch": 1.4483460559796437, "grad_norm": 4.876361746542377, "learning_rate": 8.943622810362621e-06, "loss": 0.1707, "step": 71150 }, { "epoch": 1.4485496183206106, "grad_norm": 23.4583514232108, "learning_rate": 8.943185951035454e-06, "loss": 0.2735, "step": 71160 }, { "epoch": 1.4487531806615777, "grad_norm": 11.19529926775098, "learning_rate": 8.942749012070569e-06, "loss": 0.2082, "step": 71170 }, { "epoch": 1.4489567430025445, "grad_norm": 4.619924912005094, "learning_rate": 8.942311993476786e-06, "loss": 0.2986, "step": 71180 }, { "epoch": 1.4491603053435114, "grad_norm": 12.780065470690271, "learning_rate": 8.94187489526294e-06, "loss": 0.2653, "step": 71190 }, { "epoch": 1.4493638676844784, "grad_norm": 4.928896633140914, "learning_rate": 8.941437717437848e-06, "loss": 0.1461, "step": 71200 }, { "epoch": 1.4495674300254453, "grad_norm": 13.624598069717862, "learning_rate": 8.941000460010347e-06, "loss": 0.2302, "step": 71210 }, { "epoch": 1.4497709923664122, "grad_norm": 7.327186665695188, "learning_rate": 8.940563122989266e-06, "loss": 0.3649, "step": 71220 }, { "epoch": 1.4499745547073792, "grad_norm": 4.0309813820957485, "learning_rate": 8.940125706383436e-06, "loss": 0.2689, "step": 71230 }, { "epoch": 1.450178117048346, "grad_norm": 9.365112021934948, "learning_rate": 8.939688210201694e-06, "loss": 0.2737, "step": 71240 }, { "epoch": 1.450381679389313, "grad_norm": 7.035619457394324, "learning_rate": 8.939250634452872e-06, "loss": 0.214, "step": 71250 }, { "epoch": 1.45058524173028, "grad_norm": 3.9780319011512772, "learning_rate": 8.938812979145812e-06, "loss": 0.2503, "step": 71260 }, { "epoch": 1.4507888040712469, "grad_norm": 11.276206806265058, "learning_rate": 8.938375244289351e-06, "loss": 0.3183, "step": 71270 }, { "epoch": 1.4509923664122137, "grad_norm": 7.378632109733434, "learning_rate": 8.937937429892327e-06, "loss": 0.2632, "step": 71280 }, { "epoch": 1.4511959287531806, "grad_norm": 7.3731970732826335, "learning_rate": 8.937499535963588e-06, "loss": 0.3195, "step": 71290 }, { "epoch": 1.4513994910941475, "grad_norm": 6.425123380727585, "learning_rate": 8.937061562511971e-06, "loss": 0.2336, "step": 71300 }, { "epoch": 1.4516030534351145, "grad_norm": 9.902550745667428, "learning_rate": 8.936623509546327e-06, "loss": 0.3027, "step": 71310 }, { "epoch": 1.4518066157760814, "grad_norm": 15.38907333283546, "learning_rate": 8.936185377075498e-06, "loss": 0.2113, "step": 71320 }, { "epoch": 1.4520101781170482, "grad_norm": 6.353644923547121, "learning_rate": 8.93574716510834e-06, "loss": 0.2687, "step": 71330 }, { "epoch": 1.4522137404580153, "grad_norm": 4.085566240362023, "learning_rate": 8.935308873653696e-06, "loss": 0.202, "step": 71340 }, { "epoch": 1.4524173027989822, "grad_norm": 10.086337760920836, "learning_rate": 8.934870502720422e-06, "loss": 0.2493, "step": 71350 }, { "epoch": 1.452620865139949, "grad_norm": 11.283387017484113, "learning_rate": 8.934432052317367e-06, "loss": 0.272, "step": 71360 }, { "epoch": 1.4528244274809161, "grad_norm": 16.90339967592055, "learning_rate": 8.933993522453392e-06, "loss": 0.1827, "step": 71370 }, { "epoch": 1.453027989821883, "grad_norm": 10.697833100959686, "learning_rate": 8.933554913137353e-06, "loss": 0.2202, "step": 71380 }, { "epoch": 1.4532315521628498, "grad_norm": 1.3522040029981888, "learning_rate": 8.933116224378102e-06, "loss": 0.1639, "step": 71390 }, { "epoch": 1.453435114503817, "grad_norm": 3.5702871871510276, "learning_rate": 8.932677456184504e-06, "loss": 0.1411, "step": 71400 }, { "epoch": 1.4536386768447838, "grad_norm": 6.135211438061492, "learning_rate": 8.93223860856542e-06, "loss": 0.146, "step": 71410 }, { "epoch": 1.4538422391857506, "grad_norm": 16.529384913225904, "learning_rate": 8.931799681529713e-06, "loss": 0.2531, "step": 71420 }, { "epoch": 1.4540458015267175, "grad_norm": 5.684673117245128, "learning_rate": 8.931360675086248e-06, "loss": 0.2048, "step": 71430 }, { "epoch": 1.4542493638676846, "grad_norm": 5.427051305626997, "learning_rate": 8.930921589243888e-06, "loss": 0.2517, "step": 71440 }, { "epoch": 1.4544529262086514, "grad_norm": 19.066534055754758, "learning_rate": 8.930482424011506e-06, "loss": 0.1749, "step": 71450 }, { "epoch": 1.4546564885496183, "grad_norm": 5.941868184347318, "learning_rate": 8.930043179397968e-06, "loss": 0.3046, "step": 71460 }, { "epoch": 1.4548600508905851, "grad_norm": 33.75519169358953, "learning_rate": 8.929603855412146e-06, "loss": 0.1723, "step": 71470 }, { "epoch": 1.4550636132315522, "grad_norm": 2.634006849778897, "learning_rate": 8.929164452062913e-06, "loss": 0.2888, "step": 71480 }, { "epoch": 1.455267175572519, "grad_norm": 3.250510387773531, "learning_rate": 8.928724969359144e-06, "loss": 0.1295, "step": 71490 }, { "epoch": 1.455470737913486, "grad_norm": 0.148091823966542, "learning_rate": 8.928285407309714e-06, "loss": 0.1607, "step": 71500 }, { "epoch": 1.455674300254453, "grad_norm": 4.381976865910753, "learning_rate": 8.927845765923499e-06, "loss": 0.2828, "step": 71510 }, { "epoch": 1.4558778625954198, "grad_norm": 11.041919083079595, "learning_rate": 8.92740604520938e-06, "loss": 0.1951, "step": 71520 }, { "epoch": 1.4560814249363867, "grad_norm": 17.428767656064128, "learning_rate": 8.92696624517624e-06, "loss": 0.1782, "step": 71530 }, { "epoch": 1.4562849872773538, "grad_norm": 7.0864607257591805, "learning_rate": 8.926526365832957e-06, "loss": 0.2876, "step": 71540 }, { "epoch": 1.4564885496183206, "grad_norm": 7.749205639016163, "learning_rate": 8.926086407188415e-06, "loss": 0.2496, "step": 71550 }, { "epoch": 1.4566921119592875, "grad_norm": 7.024478880271394, "learning_rate": 8.925646369251503e-06, "loss": 0.2369, "step": 71560 }, { "epoch": 1.4568956743002546, "grad_norm": 7.016568313905356, "learning_rate": 8.925206252031106e-06, "loss": 0.2491, "step": 71570 }, { "epoch": 1.4570992366412214, "grad_norm": 8.430927511608983, "learning_rate": 8.924766055536113e-06, "loss": 0.2848, "step": 71580 }, { "epoch": 1.4573027989821883, "grad_norm": 5.728019878864405, "learning_rate": 8.924325779775416e-06, "loss": 0.295, "step": 71590 }, { "epoch": 1.4575063613231551, "grad_norm": 9.186437522561201, "learning_rate": 8.923885424757904e-06, "loss": 0.1475, "step": 71600 }, { "epoch": 1.4577099236641222, "grad_norm": 7.435886672592545, "learning_rate": 8.923444990492472e-06, "loss": 0.2013, "step": 71610 }, { "epoch": 1.457913486005089, "grad_norm": 8.95450210533776, "learning_rate": 8.923004476988015e-06, "loss": 0.2458, "step": 71620 }, { "epoch": 1.458117048346056, "grad_norm": 18.770316471531284, "learning_rate": 8.92256388425343e-06, "loss": 0.1881, "step": 71630 }, { "epoch": 1.4583206106870228, "grad_norm": 6.083250730855465, "learning_rate": 8.922123212297616e-06, "loss": 0.1647, "step": 71640 }, { "epoch": 1.4585241730279899, "grad_norm": 4.416195764830281, "learning_rate": 8.92168246112947e-06, "loss": 0.2438, "step": 71650 }, { "epoch": 1.4587277353689567, "grad_norm": 7.756511452737443, "learning_rate": 8.921241630757898e-06, "loss": 0.332, "step": 71660 }, { "epoch": 1.4589312977099236, "grad_norm": 10.362380667699535, "learning_rate": 8.9208007211918e-06, "loss": 0.2448, "step": 71670 }, { "epoch": 1.4591348600508907, "grad_norm": 5.266715723994889, "learning_rate": 8.92035973244008e-06, "loss": 0.263, "step": 71680 }, { "epoch": 1.4593384223918575, "grad_norm": 9.075908459293704, "learning_rate": 8.919918664511647e-06, "loss": 0.2003, "step": 71690 }, { "epoch": 1.4595419847328244, "grad_norm": 17.785354281081574, "learning_rate": 8.919477517415407e-06, "loss": 0.2858, "step": 71700 }, { "epoch": 1.4597455470737914, "grad_norm": 4.493824646065589, "learning_rate": 8.919036291160271e-06, "loss": 0.1618, "step": 71710 }, { "epoch": 1.4599491094147583, "grad_norm": 2.5199008616274057, "learning_rate": 8.91859498575515e-06, "loss": 0.1865, "step": 71720 }, { "epoch": 1.4601526717557252, "grad_norm": 4.6826790441133275, "learning_rate": 8.918153601208955e-06, "loss": 0.1831, "step": 71730 }, { "epoch": 1.4603562340966922, "grad_norm": 5.2264794651092625, "learning_rate": 8.917712137530602e-06, "loss": 0.2323, "step": 71740 }, { "epoch": 1.460559796437659, "grad_norm": 4.804411718123574, "learning_rate": 8.917270594729007e-06, "loss": 0.1865, "step": 71750 }, { "epoch": 1.460763358778626, "grad_norm": 20.865900013925817, "learning_rate": 8.916828972813085e-06, "loss": 0.3486, "step": 71760 }, { "epoch": 1.4609669211195928, "grad_norm": 7.97696855132978, "learning_rate": 8.91638727179176e-06, "loss": 0.2791, "step": 71770 }, { "epoch": 1.4611704834605597, "grad_norm": 8.065509288485302, "learning_rate": 8.915945491673947e-06, "loss": 0.2085, "step": 71780 }, { "epoch": 1.4613740458015267, "grad_norm": 9.026350738111022, "learning_rate": 8.915503632468571e-06, "loss": 0.2681, "step": 71790 }, { "epoch": 1.4615776081424936, "grad_norm": 9.437996012444126, "learning_rate": 8.91506169418456e-06, "loss": 0.3096, "step": 71800 }, { "epoch": 1.4617811704834605, "grad_norm": 8.293044360802728, "learning_rate": 8.914619676830831e-06, "loss": 0.2054, "step": 71810 }, { "epoch": 1.4619847328244275, "grad_norm": 7.174725564273299, "learning_rate": 8.914177580416317e-06, "loss": 0.2023, "step": 71820 }, { "epoch": 1.4621882951653944, "grad_norm": 2.6764709001344085, "learning_rate": 8.913735404949947e-06, "loss": 0.2495, "step": 71830 }, { "epoch": 1.4623918575063612, "grad_norm": 4.797508619228837, "learning_rate": 8.913293150440647e-06, "loss": 0.1688, "step": 71840 }, { "epoch": 1.4625954198473283, "grad_norm": 21.81625164027604, "learning_rate": 8.912850816897354e-06, "loss": 0.3319, "step": 71850 }, { "epoch": 1.4627989821882952, "grad_norm": 13.653220984400667, "learning_rate": 8.912408404328999e-06, "loss": 0.1638, "step": 71860 }, { "epoch": 1.463002544529262, "grad_norm": 1.6928880250359637, "learning_rate": 8.911965912744516e-06, "loss": 0.2821, "step": 71870 }, { "epoch": 1.4632061068702291, "grad_norm": 2.7352477493572795, "learning_rate": 8.91152334215284e-06, "loss": 0.1669, "step": 71880 }, { "epoch": 1.463409669211196, "grad_norm": 15.405833846762164, "learning_rate": 8.911080692562917e-06, "loss": 0.2412, "step": 71890 }, { "epoch": 1.4636132315521628, "grad_norm": 18.18045058935253, "learning_rate": 8.91063796398368e-06, "loss": 0.2497, "step": 71900 }, { "epoch": 1.46381679389313, "grad_norm": 11.119028641256705, "learning_rate": 8.910195156424073e-06, "loss": 0.2883, "step": 71910 }, { "epoch": 1.4640203562340968, "grad_norm": 2.735020150379321, "learning_rate": 8.909752269893038e-06, "loss": 0.287, "step": 71920 }, { "epoch": 1.4642239185750636, "grad_norm": 8.958911700809058, "learning_rate": 8.909309304399519e-06, "loss": 0.2313, "step": 71930 }, { "epoch": 1.4644274809160305, "grad_norm": 10.661700417180464, "learning_rate": 8.908866259952464e-06, "loss": 0.1994, "step": 71940 }, { "epoch": 1.4646310432569973, "grad_norm": 1.0996314134780172, "learning_rate": 8.908423136560822e-06, "loss": 0.3072, "step": 71950 }, { "epoch": 1.4648346055979644, "grad_norm": 5.748155880009289, "learning_rate": 8.90797993423354e-06, "loss": 0.152, "step": 71960 }, { "epoch": 1.4650381679389313, "grad_norm": 10.726564648939144, "learning_rate": 8.907536652979568e-06, "loss": 0.1957, "step": 71970 }, { "epoch": 1.4652417302798981, "grad_norm": 3.256374546092251, "learning_rate": 8.907093292807862e-06, "loss": 0.0975, "step": 71980 }, { "epoch": 1.4654452926208652, "grad_norm": 4.09677439229535, "learning_rate": 8.906649853727375e-06, "loss": 0.2134, "step": 71990 }, { "epoch": 1.465648854961832, "grad_norm": 12.605565644520542, "learning_rate": 8.906206335747061e-06, "loss": 0.2856, "step": 72000 }, { "epoch": 1.465852417302799, "grad_norm": 4.132684763129551, "learning_rate": 8.90576273887588e-06, "loss": 0.2995, "step": 72010 }, { "epoch": 1.466055979643766, "grad_norm": 9.810451692109872, "learning_rate": 8.90531906312279e-06, "loss": 0.2414, "step": 72020 }, { "epoch": 1.4662595419847329, "grad_norm": 4.312417821377285, "learning_rate": 8.904875308496752e-06, "loss": 0.2128, "step": 72030 }, { "epoch": 1.4664631043256997, "grad_norm": 14.840708823830656, "learning_rate": 8.904431475006728e-06, "loss": 0.1869, "step": 72040 }, { "epoch": 1.4666666666666668, "grad_norm": 10.274924264020335, "learning_rate": 8.903987562661679e-06, "loss": 0.2391, "step": 72050 }, { "epoch": 1.4668702290076336, "grad_norm": 2.6133795943659512, "learning_rate": 8.903543571470574e-06, "loss": 0.2313, "step": 72060 }, { "epoch": 1.4670737913486005, "grad_norm": 13.200587114395026, "learning_rate": 8.90309950144238e-06, "loss": 0.2428, "step": 72070 }, { "epoch": 1.4672773536895674, "grad_norm": 8.043392610358142, "learning_rate": 8.902655352586064e-06, "loss": 0.2303, "step": 72080 }, { "epoch": 1.4674809160305344, "grad_norm": 4.655056219367476, "learning_rate": 8.902211124910596e-06, "loss": 0.2193, "step": 72090 }, { "epoch": 1.4676844783715013, "grad_norm": 4.856236634893444, "learning_rate": 8.901766818424949e-06, "loss": 0.1802, "step": 72100 }, { "epoch": 1.4678880407124681, "grad_norm": 11.580704873040341, "learning_rate": 8.901322433138095e-06, "loss": 0.3376, "step": 72110 }, { "epoch": 1.468091603053435, "grad_norm": 3.9982523858311985, "learning_rate": 8.90087796905901e-06, "loss": 0.162, "step": 72120 }, { "epoch": 1.468295165394402, "grad_norm": 2.700753169768324, "learning_rate": 8.900433426196668e-06, "loss": 0.183, "step": 72130 }, { "epoch": 1.468498727735369, "grad_norm": 14.279550787749637, "learning_rate": 8.899988804560052e-06, "loss": 0.2131, "step": 72140 }, { "epoch": 1.4687022900763358, "grad_norm": 10.181672444317504, "learning_rate": 8.899544104158138e-06, "loss": 0.2584, "step": 72150 }, { "epoch": 1.4689058524173029, "grad_norm": 12.378682349250205, "learning_rate": 8.89909932499991e-06, "loss": 0.1941, "step": 72160 }, { "epoch": 1.4691094147582697, "grad_norm": 9.207044821806717, "learning_rate": 8.898654467094346e-06, "loss": 0.2563, "step": 72170 }, { "epoch": 1.4693129770992366, "grad_norm": 8.52167706890445, "learning_rate": 8.898209530450435e-06, "loss": 0.1556, "step": 72180 }, { "epoch": 1.4695165394402037, "grad_norm": 7.6436321476709885, "learning_rate": 8.89776451507716e-06, "loss": 0.1692, "step": 72190 }, { "epoch": 1.4697201017811705, "grad_norm": 16.98987130089716, "learning_rate": 8.897319420983514e-06, "loss": 0.2895, "step": 72200 }, { "epoch": 1.4699236641221374, "grad_norm": 9.449058923357722, "learning_rate": 8.89687424817848e-06, "loss": 0.2061, "step": 72210 }, { "epoch": 1.4701272264631045, "grad_norm": 11.95138615560946, "learning_rate": 8.896428996671051e-06, "loss": 0.341, "step": 72220 }, { "epoch": 1.4703307888040713, "grad_norm": 10.102830352251807, "learning_rate": 8.895983666470223e-06, "loss": 0.2727, "step": 72230 }, { "epoch": 1.4705343511450382, "grad_norm": 11.087843459433818, "learning_rate": 8.895538257584983e-06, "loss": 0.1668, "step": 72240 }, { "epoch": 1.470737913486005, "grad_norm": 4.053043667338446, "learning_rate": 8.895092770024332e-06, "loss": 0.1804, "step": 72250 }, { "epoch": 1.4709414758269719, "grad_norm": 12.220669337098288, "learning_rate": 8.894647203797267e-06, "loss": 0.2236, "step": 72260 }, { "epoch": 1.471145038167939, "grad_norm": 11.343474091575883, "learning_rate": 8.894201558912786e-06, "loss": 0.3156, "step": 72270 }, { "epoch": 1.4713486005089058, "grad_norm": 11.494380315068772, "learning_rate": 8.893755835379886e-06, "loss": 0.2613, "step": 72280 }, { "epoch": 1.4715521628498727, "grad_norm": 12.868389674831986, "learning_rate": 8.893310033207573e-06, "loss": 0.2599, "step": 72290 }, { "epoch": 1.4717557251908397, "grad_norm": 11.715038609173146, "learning_rate": 8.89286415240485e-06, "loss": 0.3739, "step": 72300 }, { "epoch": 1.4719592875318066, "grad_norm": 5.351335543745791, "learning_rate": 8.89241819298072e-06, "loss": 0.2013, "step": 72310 }, { "epoch": 1.4721628498727735, "grad_norm": 8.430302544381343, "learning_rate": 8.891972154944192e-06, "loss": 0.2377, "step": 72320 }, { "epoch": 1.4723664122137405, "grad_norm": 4.816324377396521, "learning_rate": 8.891526038304272e-06, "loss": 0.2325, "step": 72330 }, { "epoch": 1.4725699745547074, "grad_norm": 10.354564306677366, "learning_rate": 8.891079843069974e-06, "loss": 0.2388, "step": 72340 }, { "epoch": 1.4727735368956743, "grad_norm": 9.174867962035236, "learning_rate": 8.890633569250306e-06, "loss": 0.2014, "step": 72350 }, { "epoch": 1.4729770992366413, "grad_norm": 7.797038304137006, "learning_rate": 8.890187216854283e-06, "loss": 0.2921, "step": 72360 }, { "epoch": 1.4731806615776082, "grad_norm": 0.19318143788287376, "learning_rate": 8.889740785890916e-06, "loss": 0.2918, "step": 72370 }, { "epoch": 1.473384223918575, "grad_norm": 7.317389981101294, "learning_rate": 8.889294276369224e-06, "loss": 0.164, "step": 72380 }, { "epoch": 1.4735877862595421, "grad_norm": 5.152403784554482, "learning_rate": 8.888847688298228e-06, "loss": 0.2206, "step": 72390 }, { "epoch": 1.473791348600509, "grad_norm": 10.972636358039031, "learning_rate": 8.888401021686941e-06, "loss": 0.2405, "step": 72400 }, { "epoch": 1.4739949109414758, "grad_norm": 0.14177137206098403, "learning_rate": 8.887954276544388e-06, "loss": 0.2133, "step": 72410 }, { "epoch": 1.4741984732824427, "grad_norm": 5.379726370413557, "learning_rate": 8.88750745287959e-06, "loss": 0.1599, "step": 72420 }, { "epoch": 1.4744020356234095, "grad_norm": 13.870167663575229, "learning_rate": 8.887060550701575e-06, "loss": 0.3341, "step": 72430 }, { "epoch": 1.4746055979643766, "grad_norm": 24.14799166540644, "learning_rate": 8.886613570019363e-06, "loss": 0.2659, "step": 72440 }, { "epoch": 1.4748091603053435, "grad_norm": 3.829470866720633, "learning_rate": 8.886166510841984e-06, "loss": 0.2659, "step": 72450 }, { "epoch": 1.4750127226463103, "grad_norm": 4.566514043472674, "learning_rate": 8.885719373178468e-06, "loss": 0.2063, "step": 72460 }, { "epoch": 1.4752162849872774, "grad_norm": 10.887553092800932, "learning_rate": 8.885272157037845e-06, "loss": 0.3417, "step": 72470 }, { "epoch": 1.4754198473282443, "grad_norm": 13.755781141903055, "learning_rate": 8.884824862429144e-06, "loss": 0.2033, "step": 72480 }, { "epoch": 1.4756234096692111, "grad_norm": 9.351412646653703, "learning_rate": 8.884377489361404e-06, "loss": 0.1883, "step": 72490 }, { "epoch": 1.4758269720101782, "grad_norm": 11.298227555516075, "learning_rate": 8.883930037843657e-06, "loss": 0.2729, "step": 72500 }, { "epoch": 1.476030534351145, "grad_norm": 20.70195171812761, "learning_rate": 8.88348250788494e-06, "loss": 0.1834, "step": 72510 }, { "epoch": 1.476234096692112, "grad_norm": 10.400410385631186, "learning_rate": 8.883034899494291e-06, "loss": 0.2961, "step": 72520 }, { "epoch": 1.476437659033079, "grad_norm": 14.574569931966273, "learning_rate": 8.882587212680752e-06, "loss": 0.2541, "step": 72530 }, { "epoch": 1.4766412213740459, "grad_norm": 9.491417477840349, "learning_rate": 8.882139447453363e-06, "loss": 0.2583, "step": 72540 }, { "epoch": 1.4768447837150127, "grad_norm": 11.82154284004357, "learning_rate": 8.881691603821167e-06, "loss": 0.2321, "step": 72550 }, { "epoch": 1.4770483460559796, "grad_norm": 14.359645946934839, "learning_rate": 8.881243681793213e-06, "loss": 0.2396, "step": 72560 }, { "epoch": 1.4772519083969466, "grad_norm": 4.791394470360389, "learning_rate": 8.88079568137854e-06, "loss": 0.2164, "step": 72570 }, { "epoch": 1.4774554707379135, "grad_norm": 5.648997986439759, "learning_rate": 8.880347602586202e-06, "loss": 0.2871, "step": 72580 }, { "epoch": 1.4776590330788804, "grad_norm": 6.0041026793609245, "learning_rate": 8.879899445425243e-06, "loss": 0.169, "step": 72590 }, { "epoch": 1.4778625954198472, "grad_norm": 9.92871752922117, "learning_rate": 8.879451209904722e-06, "loss": 0.2278, "step": 72600 }, { "epoch": 1.4780661577608143, "grad_norm": 3.8228317615163983, "learning_rate": 8.879002896033684e-06, "loss": 0.2439, "step": 72610 }, { "epoch": 1.4782697201017811, "grad_norm": 30.961591840917794, "learning_rate": 8.878554503821188e-06, "loss": 0.2686, "step": 72620 }, { "epoch": 1.478473282442748, "grad_norm": 5.971267435998355, "learning_rate": 8.878106033276288e-06, "loss": 0.2654, "step": 72630 }, { "epoch": 1.478676844783715, "grad_norm": 9.231843017275096, "learning_rate": 8.87765748440804e-06, "loss": 0.2272, "step": 72640 }, { "epoch": 1.478880407124682, "grad_norm": 15.2399527736167, "learning_rate": 8.877208857225505e-06, "loss": 0.1481, "step": 72650 }, { "epoch": 1.4790839694656488, "grad_norm": 14.026047845358876, "learning_rate": 8.876760151737745e-06, "loss": 0.2164, "step": 72660 }, { "epoch": 1.4792875318066159, "grad_norm": 8.798755020212651, "learning_rate": 8.876311367953821e-06, "loss": 0.2233, "step": 72670 }, { "epoch": 1.4794910941475827, "grad_norm": 7.060458009831084, "learning_rate": 8.875862505882793e-06, "loss": 0.2414, "step": 72680 }, { "epoch": 1.4796946564885496, "grad_norm": 0.528705183311898, "learning_rate": 8.875413565533733e-06, "loss": 0.2035, "step": 72690 }, { "epoch": 1.4798982188295167, "grad_norm": 1.984121433293727, "learning_rate": 8.874964546915703e-06, "loss": 0.2475, "step": 72700 }, { "epoch": 1.4801017811704835, "grad_norm": 5.18936420890469, "learning_rate": 8.874515450037772e-06, "loss": 0.3722, "step": 72710 }, { "epoch": 1.4803053435114504, "grad_norm": 0.010307954118169378, "learning_rate": 8.874066274909013e-06, "loss": 0.1463, "step": 72720 }, { "epoch": 1.4805089058524172, "grad_norm": 12.969825243631107, "learning_rate": 8.873617021538497e-06, "loss": 0.2136, "step": 72730 }, { "epoch": 1.480712468193384, "grad_norm": 5.2720394052550255, "learning_rate": 8.873167689935293e-06, "loss": 0.327, "step": 72740 }, { "epoch": 1.4809160305343512, "grad_norm": 12.247760400309575, "learning_rate": 8.872718280108481e-06, "loss": 0.1844, "step": 72750 }, { "epoch": 1.481119592875318, "grad_norm": 4.559063019906411, "learning_rate": 8.872268792067133e-06, "loss": 0.3076, "step": 72760 }, { "epoch": 1.4813231552162849, "grad_norm": 5.409130219997804, "learning_rate": 8.871819225820331e-06, "loss": 0.2358, "step": 72770 }, { "epoch": 1.481526717557252, "grad_norm": 5.247719286149426, "learning_rate": 8.871369581377153e-06, "loss": 0.15, "step": 72780 }, { "epoch": 1.4817302798982188, "grad_norm": 5.009431276061772, "learning_rate": 8.87091985874668e-06, "loss": 0.2782, "step": 72790 }, { "epoch": 1.4819338422391857, "grad_norm": 6.689278700485031, "learning_rate": 8.870470057937994e-06, "loss": 0.2172, "step": 72800 }, { "epoch": 1.4821374045801528, "grad_norm": 4.057969479130316, "learning_rate": 8.870020178960182e-06, "loss": 0.182, "step": 72810 }, { "epoch": 1.4823409669211196, "grad_norm": 5.8374432146416835, "learning_rate": 8.869570221822325e-06, "loss": 0.2355, "step": 72820 }, { "epoch": 1.4825445292620865, "grad_norm": 9.283803894170761, "learning_rate": 8.869120186533515e-06, "loss": 0.1468, "step": 72830 }, { "epoch": 1.4827480916030535, "grad_norm": 6.749693097167302, "learning_rate": 8.86867007310284e-06, "loss": 0.2631, "step": 72840 }, { "epoch": 1.4829516539440204, "grad_norm": 27.377708461651505, "learning_rate": 8.868219881539389e-06, "loss": 0.1786, "step": 72850 }, { "epoch": 1.4831552162849873, "grad_norm": 4.888998934061351, "learning_rate": 8.867769611852256e-06, "loss": 0.2703, "step": 72860 }, { "epoch": 1.4833587786259543, "grad_norm": 2.0848959599121173, "learning_rate": 8.867319264050533e-06, "loss": 0.1435, "step": 72870 }, { "epoch": 1.4835623409669212, "grad_norm": 9.15244341435631, "learning_rate": 8.866868838143317e-06, "loss": 0.2692, "step": 72880 }, { "epoch": 1.483765903307888, "grad_norm": 12.808208415489155, "learning_rate": 8.866418334139705e-06, "loss": 0.1532, "step": 72890 }, { "epoch": 1.483969465648855, "grad_norm": 8.883300050203937, "learning_rate": 8.865967752048794e-06, "loss": 0.2739, "step": 72900 }, { "epoch": 1.4841730279898218, "grad_norm": 4.726681005983504, "learning_rate": 8.865517091879685e-06, "loss": 0.2141, "step": 72910 }, { "epoch": 1.4843765903307888, "grad_norm": 5.4455103973353225, "learning_rate": 8.865066353641481e-06, "loss": 0.2083, "step": 72920 }, { "epoch": 1.4845801526717557, "grad_norm": 22.018866069567693, "learning_rate": 8.864615537343281e-06, "loss": 0.2521, "step": 72930 }, { "epoch": 1.4847837150127225, "grad_norm": 10.13965543269488, "learning_rate": 8.864164642994195e-06, "loss": 0.2177, "step": 72940 }, { "epoch": 1.4849872773536896, "grad_norm": 11.93149601822052, "learning_rate": 8.863713670603325e-06, "loss": 0.2633, "step": 72950 }, { "epoch": 1.4851908396946565, "grad_norm": 7.817837323419307, "learning_rate": 8.863262620179782e-06, "loss": 0.2439, "step": 72960 }, { "epoch": 1.4853944020356233, "grad_norm": 5.26418215219691, "learning_rate": 8.862811491732675e-06, "loss": 0.2736, "step": 72970 }, { "epoch": 1.4855979643765904, "grad_norm": 12.475241508399739, "learning_rate": 8.862360285271114e-06, "loss": 0.2529, "step": 72980 }, { "epoch": 1.4858015267175573, "grad_norm": 1.945503634995969, "learning_rate": 8.861909000804214e-06, "loss": 0.1128, "step": 72990 }, { "epoch": 1.4860050890585241, "grad_norm": 9.238411022760692, "learning_rate": 8.861457638341085e-06, "loss": 0.2572, "step": 73000 }, { "epoch": 1.4862086513994912, "grad_norm": 10.77986092373959, "learning_rate": 8.861006197890847e-06, "loss": 0.2941, "step": 73010 }, { "epoch": 1.486412213740458, "grad_norm": 9.806915514985569, "learning_rate": 8.860554679462615e-06, "loss": 0.2031, "step": 73020 }, { "epoch": 1.486615776081425, "grad_norm": 3.946597276797692, "learning_rate": 8.86010308306551e-06, "loss": 0.306, "step": 73030 }, { "epoch": 1.4868193384223918, "grad_norm": 6.370375968030832, "learning_rate": 8.859651408708652e-06, "loss": 0.2565, "step": 73040 }, { "epoch": 1.4870229007633589, "grad_norm": 7.831146494403471, "learning_rate": 8.859199656401161e-06, "loss": 0.2378, "step": 73050 }, { "epoch": 1.4872264631043257, "grad_norm": 5.493076797520977, "learning_rate": 8.858747826152163e-06, "loss": 0.1755, "step": 73060 }, { "epoch": 1.4874300254452926, "grad_norm": 6.129643060202972, "learning_rate": 8.858295917970783e-06, "loss": 0.2415, "step": 73070 }, { "epoch": 1.4876335877862594, "grad_norm": 2.9129401974963263, "learning_rate": 8.857843931866147e-06, "loss": 0.1427, "step": 73080 }, { "epoch": 1.4878371501272265, "grad_norm": 6.005556049202612, "learning_rate": 8.857391867847384e-06, "loss": 0.2885, "step": 73090 }, { "epoch": 1.4880407124681934, "grad_norm": 3.6757233508261065, "learning_rate": 8.856939725923624e-06, "loss": 0.1923, "step": 73100 }, { "epoch": 1.4882442748091602, "grad_norm": 7.168277880743169, "learning_rate": 8.856487506103999e-06, "loss": 0.1959, "step": 73110 }, { "epoch": 1.4884478371501273, "grad_norm": 3.7315793141087434, "learning_rate": 8.856035208397642e-06, "loss": 0.2515, "step": 73120 }, { "epoch": 1.4886513994910942, "grad_norm": 11.647780181085247, "learning_rate": 8.855582832813686e-06, "loss": 0.2486, "step": 73130 }, { "epoch": 1.488854961832061, "grad_norm": 11.70232900434052, "learning_rate": 8.85513037936127e-06, "loss": 0.3244, "step": 73140 }, { "epoch": 1.489058524173028, "grad_norm": 8.228060902481337, "learning_rate": 8.85467784804953e-06, "loss": 0.1661, "step": 73150 }, { "epoch": 1.489262086513995, "grad_norm": 10.573830227356508, "learning_rate": 8.854225238887607e-06, "loss": 0.2509, "step": 73160 }, { "epoch": 1.4894656488549618, "grad_norm": 11.624294742975623, "learning_rate": 8.853772551884639e-06, "loss": 0.2252, "step": 73170 }, { "epoch": 1.4896692111959289, "grad_norm": 14.905186066058484, "learning_rate": 8.853319787049773e-06, "loss": 0.2365, "step": 73180 }, { "epoch": 1.4898727735368957, "grad_norm": 5.850996538222756, "learning_rate": 8.852866944392149e-06, "loss": 0.2106, "step": 73190 }, { "epoch": 1.4900763358778626, "grad_norm": 13.148348176017713, "learning_rate": 8.852414023920914e-06, "loss": 0.3042, "step": 73200 }, { "epoch": 1.4902798982188294, "grad_norm": 6.083315696603769, "learning_rate": 8.851961025645218e-06, "loss": 0.2017, "step": 73210 }, { "epoch": 1.4904834605597963, "grad_norm": 9.797927009208776, "learning_rate": 8.851507949574208e-06, "loss": 0.2402, "step": 73220 }, { "epoch": 1.4906870229007634, "grad_norm": 9.798867683107966, "learning_rate": 8.851054795717031e-06, "loss": 0.2213, "step": 73230 }, { "epoch": 1.4908905852417302, "grad_norm": 21.049669843797357, "learning_rate": 8.850601564082846e-06, "loss": 0.2258, "step": 73240 }, { "epoch": 1.491094147582697, "grad_norm": 3.5865530837081674, "learning_rate": 8.850148254680801e-06, "loss": 0.168, "step": 73250 }, { "epoch": 1.4912977099236642, "grad_norm": 11.045073650667875, "learning_rate": 8.849694867520053e-06, "loss": 0.2519, "step": 73260 }, { "epoch": 1.491501272264631, "grad_norm": 2.2463228276631058, "learning_rate": 8.849241402609761e-06, "loss": 0.2018, "step": 73270 }, { "epoch": 1.4917048346055979, "grad_norm": 2.7005552321707422, "learning_rate": 8.848787859959078e-06, "loss": 0.4694, "step": 73280 }, { "epoch": 1.491908396946565, "grad_norm": 13.703374531376456, "learning_rate": 8.848334239577168e-06, "loss": 0.2568, "step": 73290 }, { "epoch": 1.4921119592875318, "grad_norm": 6.6949684429595795, "learning_rate": 8.847880541473193e-06, "loss": 0.1782, "step": 73300 }, { "epoch": 1.4923155216284987, "grad_norm": 3.354531698273435, "learning_rate": 8.847426765656312e-06, "loss": 0.2818, "step": 73310 }, { "epoch": 1.4925190839694658, "grad_norm": 2.7718955244404824, "learning_rate": 8.846972912135693e-06, "loss": 0.1561, "step": 73320 }, { "epoch": 1.4927226463104326, "grad_norm": 3.3869223073907437, "learning_rate": 8.846518980920503e-06, "loss": 0.1738, "step": 73330 }, { "epoch": 1.4929262086513995, "grad_norm": 8.901275421316186, "learning_rate": 8.846064972019906e-06, "loss": 0.3633, "step": 73340 }, { "epoch": 1.4931297709923665, "grad_norm": 6.114679698076582, "learning_rate": 8.845610885443074e-06, "loss": 0.1665, "step": 73350 }, { "epoch": 1.4933333333333334, "grad_norm": 9.583416077030039, "learning_rate": 8.845156721199175e-06, "loss": 0.313, "step": 73360 }, { "epoch": 1.4935368956743003, "grad_norm": 5.195477622001602, "learning_rate": 8.844702479297385e-06, "loss": 0.2108, "step": 73370 }, { "epoch": 1.4937404580152671, "grad_norm": 31.933810400410113, "learning_rate": 8.844248159746877e-06, "loss": 0.1573, "step": 73380 }, { "epoch": 1.493944020356234, "grad_norm": 16.17072742708471, "learning_rate": 8.843793762556824e-06, "loss": 0.2306, "step": 73390 }, { "epoch": 1.494147582697201, "grad_norm": 13.134793408907443, "learning_rate": 8.843339287736408e-06, "loss": 0.1627, "step": 73400 }, { "epoch": 1.494351145038168, "grad_norm": 4.295609494708349, "learning_rate": 8.842884735294805e-06, "loss": 0.1901, "step": 73410 }, { "epoch": 1.4945547073791348, "grad_norm": 11.19835723548145, "learning_rate": 8.842430105241193e-06, "loss": 0.1655, "step": 73420 }, { "epoch": 1.4947582697201018, "grad_norm": 3.8015105371049693, "learning_rate": 8.841975397584757e-06, "loss": 0.2924, "step": 73430 }, { "epoch": 1.4949618320610687, "grad_norm": 10.401144460331091, "learning_rate": 8.84152061233468e-06, "loss": 0.2477, "step": 73440 }, { "epoch": 1.4951653944020356, "grad_norm": 9.40427366059375, "learning_rate": 8.841065749500144e-06, "loss": 0.2089, "step": 73450 }, { "epoch": 1.4953689567430026, "grad_norm": 5.137318608909914, "learning_rate": 8.84061080909034e-06, "loss": 0.2108, "step": 73460 }, { "epoch": 1.4955725190839695, "grad_norm": 5.417918794913411, "learning_rate": 8.840155791114454e-06, "loss": 0.2658, "step": 73470 }, { "epoch": 1.4957760814249363, "grad_norm": 10.565121662852226, "learning_rate": 8.839700695581675e-06, "loss": 0.2536, "step": 73480 }, { "epoch": 1.4959796437659034, "grad_norm": 21.01918340407485, "learning_rate": 8.839245522501196e-06, "loss": 0.2414, "step": 73490 }, { "epoch": 1.4961832061068703, "grad_norm": 2.6054784682560728, "learning_rate": 8.838790271882208e-06, "loss": 0.3178, "step": 73500 }, { "epoch": 1.4963867684478371, "grad_norm": 0.3617346479581048, "learning_rate": 8.838334943733906e-06, "loss": 0.2368, "step": 73510 }, { "epoch": 1.496590330788804, "grad_norm": 9.547846612101548, "learning_rate": 8.837879538065486e-06, "loss": 0.2222, "step": 73520 }, { "epoch": 1.496793893129771, "grad_norm": 8.834415404549265, "learning_rate": 8.837424054886146e-06, "loss": 0.2582, "step": 73530 }, { "epoch": 1.496997455470738, "grad_norm": 9.535090907982115, "learning_rate": 8.836968494205084e-06, "loss": 0.2748, "step": 73540 }, { "epoch": 1.4972010178117048, "grad_norm": 6.818983661686076, "learning_rate": 8.836512856031502e-06, "loss": 0.195, "step": 73550 }, { "epoch": 1.4974045801526716, "grad_norm": 1.400403709393656, "learning_rate": 8.8360571403746e-06, "loss": 0.1317, "step": 73560 }, { "epoch": 1.4976081424936387, "grad_norm": 10.222502319696435, "learning_rate": 8.835601347243586e-06, "loss": 0.195, "step": 73570 }, { "epoch": 1.4978117048346056, "grad_norm": 10.42337345020325, "learning_rate": 8.835145476647661e-06, "loss": 0.1983, "step": 73580 }, { "epoch": 1.4980152671755724, "grad_norm": 4.610887837866471, "learning_rate": 8.834689528596034e-06, "loss": 0.195, "step": 73590 }, { "epoch": 1.4982188295165395, "grad_norm": 12.899693847833328, "learning_rate": 8.834233503097912e-06, "loss": 0.2339, "step": 73600 }, { "epoch": 1.4984223918575064, "grad_norm": 1.9052653942973405, "learning_rate": 8.833777400162507e-06, "loss": 0.2193, "step": 73610 }, { "epoch": 1.4986259541984732, "grad_norm": 5.949622712518647, "learning_rate": 8.833321219799029e-06, "loss": 0.3088, "step": 73620 }, { "epoch": 1.4988295165394403, "grad_norm": 12.810383442426648, "learning_rate": 8.832864962016692e-06, "loss": 0.201, "step": 73630 }, { "epoch": 1.4990330788804072, "grad_norm": 6.356479161099238, "learning_rate": 8.832408626824709e-06, "loss": 0.151, "step": 73640 }, { "epoch": 1.499236641221374, "grad_norm": 14.700644776109888, "learning_rate": 8.8319522142323e-06, "loss": 0.3139, "step": 73650 }, { "epoch": 1.499440203562341, "grad_norm": 2.1011410360427485, "learning_rate": 8.831495724248677e-06, "loss": 0.208, "step": 73660 }, { "epoch": 1.499643765903308, "grad_norm": 7.377401931420632, "learning_rate": 8.831039156883066e-06, "loss": 0.2108, "step": 73670 }, { "epoch": 1.4998473282442748, "grad_norm": 0.08838566062229722, "learning_rate": 8.830582512144684e-06, "loss": 0.1301, "step": 73680 }, { "epoch": 1.5000508905852419, "grad_norm": 6.029056238624507, "learning_rate": 8.830125790042753e-06, "loss": 0.2307, "step": 73690 }, { "epoch": 1.5002544529262085, "grad_norm": 14.048640353306299, "learning_rate": 8.829668990586502e-06, "loss": 0.2512, "step": 73700 }, { "epoch": 1.5004580152671756, "grad_norm": 3.9082230550228076, "learning_rate": 8.82921211378515e-06, "loss": 0.1399, "step": 73710 }, { "epoch": 1.5006615776081424, "grad_norm": 7.370375377619572, "learning_rate": 8.828755159647929e-06, "loss": 0.2571, "step": 73720 }, { "epoch": 1.5008651399491093, "grad_norm": 6.058026654995396, "learning_rate": 8.828298128184065e-06, "loss": 0.1954, "step": 73730 }, { "epoch": 1.5010687022900764, "grad_norm": 9.992349947570196, "learning_rate": 8.827841019402792e-06, "loss": 0.2319, "step": 73740 }, { "epoch": 1.5012722646310432, "grad_norm": 12.51039486326792, "learning_rate": 8.827383833313336e-06, "loss": 0.2492, "step": 73750 }, { "epoch": 1.50147582697201, "grad_norm": 0.8310662552144841, "learning_rate": 8.826926569924936e-06, "loss": 0.1769, "step": 73760 }, { "epoch": 1.5016793893129772, "grad_norm": 20.738969229852344, "learning_rate": 8.826469229246824e-06, "loss": 0.2641, "step": 73770 }, { "epoch": 1.501882951653944, "grad_norm": 3.5824010608010677, "learning_rate": 8.826011811288238e-06, "loss": 0.1778, "step": 73780 }, { "epoch": 1.5020865139949109, "grad_norm": 17.074485533939285, "learning_rate": 8.825554316058416e-06, "loss": 0.176, "step": 73790 }, { "epoch": 1.502290076335878, "grad_norm": 7.450812965607563, "learning_rate": 8.825096743566597e-06, "loss": 0.257, "step": 73800 }, { "epoch": 1.5024936386768448, "grad_norm": 14.124034018525398, "learning_rate": 8.824639093822021e-06, "loss": 0.3404, "step": 73810 }, { "epoch": 1.5026972010178117, "grad_norm": 3.654485601004504, "learning_rate": 8.824181366833934e-06, "loss": 0.1838, "step": 73820 }, { "epoch": 1.5029007633587788, "grad_norm": 12.424289674298045, "learning_rate": 8.823723562611579e-06, "loss": 0.2637, "step": 73830 }, { "epoch": 1.5031043256997454, "grad_norm": 3.735704641440666, "learning_rate": 8.823265681164201e-06, "loss": 0.2816, "step": 73840 }, { "epoch": 1.5033078880407125, "grad_norm": 6.91443731231454, "learning_rate": 8.822807722501047e-06, "loss": 0.2089, "step": 73850 }, { "epoch": 1.5035114503816795, "grad_norm": 15.661690403438344, "learning_rate": 8.82234968663137e-06, "loss": 0.1744, "step": 73860 }, { "epoch": 1.5037150127226462, "grad_norm": 12.29323136987937, "learning_rate": 8.821891573564416e-06, "loss": 0.2395, "step": 73870 }, { "epoch": 1.5039185750636133, "grad_norm": 11.994223792461288, "learning_rate": 8.82143338330944e-06, "loss": 0.2039, "step": 73880 }, { "epoch": 1.5041221374045801, "grad_norm": 7.880171400968913, "learning_rate": 8.820975115875694e-06, "loss": 0.2188, "step": 73890 }, { "epoch": 1.504325699745547, "grad_norm": 6.340398321786182, "learning_rate": 8.820516771272435e-06, "loss": 0.1729, "step": 73900 }, { "epoch": 1.504529262086514, "grad_norm": 10.585979509920207, "learning_rate": 8.820058349508919e-06, "loss": 0.3277, "step": 73910 }, { "epoch": 1.504732824427481, "grad_norm": 8.06327704155369, "learning_rate": 8.819599850594404e-06, "loss": 0.1298, "step": 73920 }, { "epoch": 1.5049363867684478, "grad_norm": 0.7032778912659621, "learning_rate": 8.81914127453815e-06, "loss": 0.2668, "step": 73930 }, { "epoch": 1.5051399491094148, "grad_norm": 10.417126164642177, "learning_rate": 8.818682621349421e-06, "loss": 0.3028, "step": 73940 }, { "epoch": 1.5053435114503817, "grad_norm": 11.991415563663285, "learning_rate": 8.818223891037477e-06, "loss": 0.3183, "step": 73950 }, { "epoch": 1.5055470737913486, "grad_norm": 11.275400347401762, "learning_rate": 8.817765083611583e-06, "loss": 0.311, "step": 73960 }, { "epoch": 1.5057506361323156, "grad_norm": 10.357677193005077, "learning_rate": 8.817306199081006e-06, "loss": 0.3068, "step": 73970 }, { "epoch": 1.5059541984732825, "grad_norm": 18.661361864358945, "learning_rate": 8.816847237455013e-06, "loss": 0.1801, "step": 73980 }, { "epoch": 1.5061577608142493, "grad_norm": 0.4665820457763429, "learning_rate": 8.816388198742874e-06, "loss": 0.09, "step": 73990 }, { "epoch": 1.5063613231552164, "grad_norm": 11.964798349706863, "learning_rate": 8.81592908295386e-06, "loss": 0.2864, "step": 74000 }, { "epoch": 1.506564885496183, "grad_norm": 9.96875782073572, "learning_rate": 8.815469890097247e-06, "loss": 0.2392, "step": 74010 }, { "epoch": 1.5067684478371501, "grad_norm": 9.268989208766348, "learning_rate": 8.815010620182301e-06, "loss": 0.2559, "step": 74020 }, { "epoch": 1.506972010178117, "grad_norm": 10.529983710515987, "learning_rate": 8.814551273218303e-06, "loss": 0.1903, "step": 74030 }, { "epoch": 1.5071755725190839, "grad_norm": 9.755444824768189, "learning_rate": 8.814091849214528e-06, "loss": 0.2174, "step": 74040 }, { "epoch": 1.507379134860051, "grad_norm": 5.721329257767332, "learning_rate": 8.813632348180256e-06, "loss": 0.1691, "step": 74050 }, { "epoch": 1.5075826972010178, "grad_norm": 5.372637685539456, "learning_rate": 8.813172770124769e-06, "loss": 0.1834, "step": 74060 }, { "epoch": 1.5077862595419846, "grad_norm": 8.069455469955386, "learning_rate": 8.812713115057344e-06, "loss": 0.3322, "step": 74070 }, { "epoch": 1.5079898218829517, "grad_norm": 6.430835448149142, "learning_rate": 8.812253382987269e-06, "loss": 0.1897, "step": 74080 }, { "epoch": 1.5081933842239186, "grad_norm": 12.524892202103747, "learning_rate": 8.811793573923824e-06, "loss": 0.2511, "step": 74090 }, { "epoch": 1.5083969465648854, "grad_norm": 7.934973382211411, "learning_rate": 8.8113336878763e-06, "loss": 0.1689, "step": 74100 }, { "epoch": 1.5086005089058525, "grad_norm": 7.37593083839758, "learning_rate": 8.810873724853983e-06, "loss": 0.1695, "step": 74110 }, { "epoch": 1.5088040712468194, "grad_norm": 5.351596797416668, "learning_rate": 8.810413684866164e-06, "loss": 0.2633, "step": 74120 }, { "epoch": 1.5090076335877862, "grad_norm": 3.7221917194337912, "learning_rate": 8.809953567922132e-06, "loss": 0.2555, "step": 74130 }, { "epoch": 1.5092111959287533, "grad_norm": 8.360046397735177, "learning_rate": 8.80949337403118e-06, "loss": 0.1394, "step": 74140 }, { "epoch": 1.50941475826972, "grad_norm": 9.111850971827165, "learning_rate": 8.809033103202603e-06, "loss": 0.2639, "step": 74150 }, { "epoch": 1.509618320610687, "grad_norm": 14.66346717312279, "learning_rate": 8.808572755445696e-06, "loss": 0.2222, "step": 74160 }, { "epoch": 1.509821882951654, "grad_norm": 8.95843595090064, "learning_rate": 8.808112330769756e-06, "loss": 0.2283, "step": 74170 }, { "epoch": 1.5100254452926207, "grad_norm": 4.785243667188295, "learning_rate": 8.807651829184086e-06, "loss": 0.1574, "step": 74180 }, { "epoch": 1.5102290076335878, "grad_norm": 13.560933995282827, "learning_rate": 8.80719125069798e-06, "loss": 0.2833, "step": 74190 }, { "epoch": 1.5104325699745547, "grad_norm": 8.03177154986393, "learning_rate": 8.806730595320744e-06, "loss": 0.2222, "step": 74200 }, { "epoch": 1.5106361323155215, "grad_norm": 11.692458685959455, "learning_rate": 8.806269863061681e-06, "loss": 0.2748, "step": 74210 }, { "epoch": 1.5108396946564886, "grad_norm": 4.72843850854348, "learning_rate": 8.805809053930094e-06, "loss": 0.1603, "step": 74220 }, { "epoch": 1.5110432569974555, "grad_norm": 10.050002292859807, "learning_rate": 8.805348167935293e-06, "loss": 0.3188, "step": 74230 }, { "epoch": 1.5112468193384223, "grad_norm": 4.988730358861663, "learning_rate": 8.804887205086584e-06, "loss": 0.1711, "step": 74240 }, { "epoch": 1.5114503816793894, "grad_norm": 8.440926895878484, "learning_rate": 8.804426165393277e-06, "loss": 0.2489, "step": 74250 }, { "epoch": 1.5116539440203562, "grad_norm": 2.343842616999339, "learning_rate": 8.803965048864684e-06, "loss": 0.2682, "step": 74260 }, { "epoch": 1.511857506361323, "grad_norm": 7.636728515363991, "learning_rate": 8.803503855510117e-06, "loss": 0.2395, "step": 74270 }, { "epoch": 1.5120610687022902, "grad_norm": 14.267178170009041, "learning_rate": 8.803042585338893e-06, "loss": 0.1819, "step": 74280 }, { "epoch": 1.512264631043257, "grad_norm": 14.203268755010974, "learning_rate": 8.802581238360322e-06, "loss": 0.2327, "step": 74290 }, { "epoch": 1.512468193384224, "grad_norm": 6.3653958208040935, "learning_rate": 8.802119814583727e-06, "loss": 0.2193, "step": 74300 }, { "epoch": 1.512671755725191, "grad_norm": 6.932711863069168, "learning_rate": 8.801658314018425e-06, "loss": 0.2061, "step": 74310 }, { "epoch": 1.5128753180661576, "grad_norm": 9.838835002303448, "learning_rate": 8.801196736673736e-06, "loss": 0.1426, "step": 74320 }, { "epoch": 1.5130788804071247, "grad_norm": 12.973344447673428, "learning_rate": 8.800735082558986e-06, "loss": 0.3105, "step": 74330 }, { "epoch": 1.5132824427480918, "grad_norm": 0.415107474487384, "learning_rate": 8.800273351683493e-06, "loss": 0.1629, "step": 74340 }, { "epoch": 1.5134860050890584, "grad_norm": 11.011340497840632, "learning_rate": 8.799811544056586e-06, "loss": 0.1989, "step": 74350 }, { "epoch": 1.5136895674300255, "grad_norm": 31.28770708170488, "learning_rate": 8.79934965968759e-06, "loss": 0.2617, "step": 74360 }, { "epoch": 1.5138931297709923, "grad_norm": 3.1308941678197044, "learning_rate": 8.798887698585833e-06, "loss": 0.1776, "step": 74370 }, { "epoch": 1.5140966921119592, "grad_norm": 12.867132227170114, "learning_rate": 8.798425660760648e-06, "loss": 0.2548, "step": 74380 }, { "epoch": 1.5143002544529263, "grad_norm": 9.671479214769445, "learning_rate": 8.797963546221364e-06, "loss": 0.247, "step": 74390 }, { "epoch": 1.5145038167938931, "grad_norm": 18.159977950838115, "learning_rate": 8.797501354977316e-06, "loss": 0.257, "step": 74400 }, { "epoch": 1.51470737913486, "grad_norm": 3.53916095770249, "learning_rate": 8.797039087037833e-06, "loss": 0.2192, "step": 74410 }, { "epoch": 1.514910941475827, "grad_norm": 7.936300050487919, "learning_rate": 8.79657674241226e-06, "loss": 0.2053, "step": 74420 }, { "epoch": 1.515114503816794, "grad_norm": 0.6658727696814462, "learning_rate": 8.796114321109926e-06, "loss": 0.2275, "step": 74430 }, { "epoch": 1.5153180661577608, "grad_norm": 0.5421420833194597, "learning_rate": 8.795651823140176e-06, "loss": 0.0976, "step": 74440 }, { "epoch": 1.5155216284987278, "grad_norm": 5.641917031212767, "learning_rate": 8.795189248512347e-06, "loss": 0.1902, "step": 74450 }, { "epoch": 1.5157251908396947, "grad_norm": 12.911497722294497, "learning_rate": 8.794726597235784e-06, "loss": 0.2239, "step": 74460 }, { "epoch": 1.5159287531806616, "grad_norm": 14.53436089297144, "learning_rate": 8.79426386931983e-06, "loss": 0.3258, "step": 74470 }, { "epoch": 1.5161323155216286, "grad_norm": 8.111108096201967, "learning_rate": 8.793801064773829e-06, "loss": 0.1883, "step": 74480 }, { "epoch": 1.5163358778625953, "grad_norm": 13.276201086296794, "learning_rate": 8.793338183607129e-06, "loss": 0.3272, "step": 74490 }, { "epoch": 1.5165394402035624, "grad_norm": 31.70361975585843, "learning_rate": 8.79287522582908e-06, "loss": 0.2723, "step": 74500 }, { "epoch": 1.5167430025445294, "grad_norm": 6.838973050762021, "learning_rate": 8.79241219144903e-06, "loss": 0.2785, "step": 74510 }, { "epoch": 1.516946564885496, "grad_norm": 4.979088335514472, "learning_rate": 8.791949080476331e-06, "loss": 0.2049, "step": 74520 }, { "epoch": 1.5171501272264631, "grad_norm": 12.799600993815854, "learning_rate": 8.791485892920337e-06, "loss": 0.2547, "step": 74530 }, { "epoch": 1.51735368956743, "grad_norm": 11.459474509159163, "learning_rate": 8.791022628790402e-06, "loss": 0.1938, "step": 74540 }, { "epoch": 1.5175572519083969, "grad_norm": 12.813884247788701, "learning_rate": 8.790559288095881e-06, "loss": 0.2439, "step": 74550 }, { "epoch": 1.517760814249364, "grad_norm": 2.5554632287172088, "learning_rate": 8.790095870846134e-06, "loss": 0.2536, "step": 74560 }, { "epoch": 1.5179643765903308, "grad_norm": 10.075151422821524, "learning_rate": 8.789632377050518e-06, "loss": 0.2901, "step": 74570 }, { "epoch": 1.5181679389312976, "grad_norm": 3.2563191289479305, "learning_rate": 8.789168806718398e-06, "loss": 0.2281, "step": 74580 }, { "epoch": 1.5183715012722647, "grad_norm": 14.133240808794934, "learning_rate": 8.78870515985913e-06, "loss": 0.1887, "step": 74590 }, { "epoch": 1.5185750636132316, "grad_norm": 10.775006281335811, "learning_rate": 8.788241436482084e-06, "loss": 0.1821, "step": 74600 }, { "epoch": 1.5187786259541984, "grad_norm": 7.407224829882602, "learning_rate": 8.787777636596623e-06, "loss": 0.178, "step": 74610 }, { "epoch": 1.5189821882951655, "grad_norm": 14.205288472792056, "learning_rate": 8.787313760212113e-06, "loss": 0.1672, "step": 74620 }, { "epoch": 1.5191857506361324, "grad_norm": 2.1607349332540466, "learning_rate": 8.786849807337923e-06, "loss": 0.2732, "step": 74630 }, { "epoch": 1.5193893129770992, "grad_norm": 24.04347209356199, "learning_rate": 8.786385777983425e-06, "loss": 0.2147, "step": 74640 }, { "epoch": 1.5195928753180663, "grad_norm": 1.3432666944904106, "learning_rate": 8.785921672157988e-06, "loss": 0.2781, "step": 74650 }, { "epoch": 1.519796437659033, "grad_norm": 14.03331713182748, "learning_rate": 8.785457489870988e-06, "loss": 0.3009, "step": 74660 }, { "epoch": 1.52, "grad_norm": 4.121689451510154, "learning_rate": 8.784993231131797e-06, "loss": 0.1979, "step": 74670 }, { "epoch": 1.5202035623409669, "grad_norm": 31.809159714235655, "learning_rate": 8.784528895949795e-06, "loss": 0.2, "step": 74680 }, { "epoch": 1.5204071246819337, "grad_norm": 5.833141695433253, "learning_rate": 8.784064484334355e-06, "loss": 0.142, "step": 74690 }, { "epoch": 1.5206106870229008, "grad_norm": 0.4677865675929051, "learning_rate": 8.783599996294861e-06, "loss": 0.2251, "step": 74700 }, { "epoch": 1.5208142493638677, "grad_norm": 9.699882945007385, "learning_rate": 8.78313543184069e-06, "loss": 0.2209, "step": 74710 }, { "epoch": 1.5210178117048345, "grad_norm": 8.533745072529179, "learning_rate": 8.782670790981227e-06, "loss": 0.2124, "step": 74720 }, { "epoch": 1.5212213740458016, "grad_norm": 6.967970673035397, "learning_rate": 8.782206073725856e-06, "loss": 0.2911, "step": 74730 }, { "epoch": 1.5214249363867685, "grad_norm": 5.055547015850146, "learning_rate": 8.781741280083961e-06, "loss": 0.2713, "step": 74740 }, { "epoch": 1.5216284987277353, "grad_norm": 5.630947082404019, "learning_rate": 8.781276410064931e-06, "loss": 0.2925, "step": 74750 }, { "epoch": 1.5218320610687024, "grad_norm": 7.288407749607272, "learning_rate": 8.780811463678152e-06, "loss": 0.1544, "step": 74760 }, { "epoch": 1.5220356234096692, "grad_norm": 5.3213522710448, "learning_rate": 8.780346440933016e-06, "loss": 0.2159, "step": 74770 }, { "epoch": 1.522239185750636, "grad_norm": 8.779322832816677, "learning_rate": 8.779881341838916e-06, "loss": 0.2445, "step": 74780 }, { "epoch": 1.5224427480916032, "grad_norm": 9.1697303040792, "learning_rate": 8.779416166405243e-06, "loss": 0.1624, "step": 74790 }, { "epoch": 1.5226463104325698, "grad_norm": 3.876562184011909, "learning_rate": 8.778950914641393e-06, "loss": 0.2008, "step": 74800 }, { "epoch": 1.522849872773537, "grad_norm": 15.452212280676873, "learning_rate": 8.778485586556761e-06, "loss": 0.2943, "step": 74810 }, { "epoch": 1.523053435114504, "grad_norm": 2.5565165499632645, "learning_rate": 8.778020182160748e-06, "loss": 0.1586, "step": 74820 }, { "epoch": 1.5232569974554706, "grad_norm": 7.626528101228248, "learning_rate": 8.77755470146275e-06, "loss": 0.1982, "step": 74830 }, { "epoch": 1.5234605597964377, "grad_norm": 7.368915562080422, "learning_rate": 8.77708914447217e-06, "loss": 0.2159, "step": 74840 }, { "epoch": 1.5236641221374045, "grad_norm": 8.363770687063491, "learning_rate": 8.77662351119841e-06, "loss": 0.3041, "step": 74850 }, { "epoch": 1.5238676844783714, "grad_norm": 2.244932679966014, "learning_rate": 8.776157801650873e-06, "loss": 0.2437, "step": 74860 }, { "epoch": 1.5240712468193385, "grad_norm": 16.90319232875292, "learning_rate": 8.775692015838965e-06, "loss": 0.3157, "step": 74870 }, { "epoch": 1.5242748091603053, "grad_norm": 5.681067080064488, "learning_rate": 8.775226153772096e-06, "loss": 0.2093, "step": 74880 }, { "epoch": 1.5244783715012722, "grad_norm": 8.853414365215025, "learning_rate": 8.77476021545967e-06, "loss": 0.16, "step": 74890 }, { "epoch": 1.5246819338422393, "grad_norm": 2.761468354600301, "learning_rate": 8.774294200911101e-06, "loss": 0.1933, "step": 74900 }, { "epoch": 1.5248854961832061, "grad_norm": 7.93585329973871, "learning_rate": 8.773828110135798e-06, "loss": 0.2891, "step": 74910 }, { "epoch": 1.525089058524173, "grad_norm": 14.433667962687917, "learning_rate": 8.773361943143177e-06, "loss": 0.2955, "step": 74920 }, { "epoch": 1.52529262086514, "grad_norm": 17.9517597319807, "learning_rate": 8.772895699942652e-06, "loss": 0.3413, "step": 74930 }, { "epoch": 1.525496183206107, "grad_norm": 1.36645927153765, "learning_rate": 8.772429380543636e-06, "loss": 0.2554, "step": 74940 }, { "epoch": 1.5256997455470738, "grad_norm": 6.1331266060096254, "learning_rate": 8.771962984955554e-06, "loss": 0.1709, "step": 74950 }, { "epoch": 1.5259033078880408, "grad_norm": 7.211166698340803, "learning_rate": 8.771496513187818e-06, "loss": 0.2308, "step": 74960 }, { "epoch": 1.5261068702290075, "grad_norm": 3.5711316349709663, "learning_rate": 8.771029965249853e-06, "loss": 0.1796, "step": 74970 }, { "epoch": 1.5263104325699746, "grad_norm": 6.698379551740229, "learning_rate": 8.770563341151082e-06, "loss": 0.2874, "step": 74980 }, { "epoch": 1.5265139949109416, "grad_norm": 16.338337325760406, "learning_rate": 8.770096640900925e-06, "loss": 0.2735, "step": 74990 }, { "epoch": 1.5267175572519083, "grad_norm": 11.13668746655016, "learning_rate": 8.769629864508813e-06, "loss": 0.245, "step": 75000 }, { "epoch": 1.5269211195928754, "grad_norm": 6.689756320976985, "learning_rate": 8.76916301198417e-06, "loss": 0.2549, "step": 75010 }, { "epoch": 1.5271246819338422, "grad_norm": 9.064554439942748, "learning_rate": 8.768696083336426e-06, "loss": 0.1902, "step": 75020 }, { "epoch": 1.527328244274809, "grad_norm": 5.507521239895516, "learning_rate": 8.768229078575011e-06, "loss": 0.2181, "step": 75030 }, { "epoch": 1.5275318066157761, "grad_norm": 14.76124240043196, "learning_rate": 8.767761997709354e-06, "loss": 0.2418, "step": 75040 }, { "epoch": 1.527735368956743, "grad_norm": 4.638346217402652, "learning_rate": 8.767294840748893e-06, "loss": 0.1741, "step": 75050 }, { "epoch": 1.5279389312977099, "grad_norm": 5.085171110868882, "learning_rate": 8.766827607703059e-06, "loss": 0.2505, "step": 75060 }, { "epoch": 1.528142493638677, "grad_norm": 11.232146686493103, "learning_rate": 8.76636029858129e-06, "loss": 0.2091, "step": 75070 }, { "epoch": 1.5283460559796438, "grad_norm": 9.43640905491529, "learning_rate": 8.765892913393024e-06, "loss": 0.2727, "step": 75080 }, { "epoch": 1.5285496183206106, "grad_norm": 9.460656459693247, "learning_rate": 8.7654254521477e-06, "loss": 0.1807, "step": 75090 }, { "epoch": 1.5287531806615777, "grad_norm": 13.155334917200044, "learning_rate": 8.764957914854759e-06, "loss": 0.2129, "step": 75100 }, { "epoch": 1.5289567430025446, "grad_norm": 8.041458455096901, "learning_rate": 8.764490301523644e-06, "loss": 0.1904, "step": 75110 }, { "epoch": 1.5291603053435114, "grad_norm": 11.622321613906776, "learning_rate": 8.764022612163799e-06, "loss": 0.2621, "step": 75120 }, { "epoch": 1.5293638676844785, "grad_norm": 8.72122481989113, "learning_rate": 8.76355484678467e-06, "loss": 0.2149, "step": 75130 }, { "epoch": 1.5295674300254452, "grad_norm": 35.23641645476448, "learning_rate": 8.7630870053957e-06, "loss": 0.2181, "step": 75140 }, { "epoch": 1.5297709923664122, "grad_norm": 4.744164038317457, "learning_rate": 8.762619088006344e-06, "loss": 0.1929, "step": 75150 }, { "epoch": 1.529974554707379, "grad_norm": 5.635311632931575, "learning_rate": 8.76215109462605e-06, "loss": 0.3001, "step": 75160 }, { "epoch": 1.530178117048346, "grad_norm": 3.5039603659376506, "learning_rate": 8.761683025264267e-06, "loss": 0.1543, "step": 75170 }, { "epoch": 1.530381679389313, "grad_norm": 15.43682787139189, "learning_rate": 8.761214879930452e-06, "loss": 0.2011, "step": 75180 }, { "epoch": 1.5305852417302799, "grad_norm": 16.83754529230102, "learning_rate": 8.760746658634056e-06, "loss": 0.2538, "step": 75190 }, { "epoch": 1.5307888040712467, "grad_norm": 7.317853302402955, "learning_rate": 8.76027836138454e-06, "loss": 0.2095, "step": 75200 }, { "epoch": 1.5309923664122138, "grad_norm": 19.321655375478507, "learning_rate": 8.759809988191357e-06, "loss": 0.2239, "step": 75210 }, { "epoch": 1.5311959287531807, "grad_norm": 5.445736710360377, "learning_rate": 8.75934153906397e-06, "loss": 0.2127, "step": 75220 }, { "epoch": 1.5313994910941475, "grad_norm": 11.38562731519704, "learning_rate": 8.758873014011839e-06, "loss": 0.2904, "step": 75230 }, { "epoch": 1.5316030534351146, "grad_norm": 7.494239995453411, "learning_rate": 8.758404413044426e-06, "loss": 0.1903, "step": 75240 }, { "epoch": 1.5318066157760815, "grad_norm": 8.010438154241921, "learning_rate": 8.757935736171192e-06, "loss": 0.1948, "step": 75250 }, { "epoch": 1.5320101781170483, "grad_norm": 6.045131176449988, "learning_rate": 8.757466983401609e-06, "loss": 0.364, "step": 75260 }, { "epoch": 1.5322137404580154, "grad_norm": 5.982418479291541, "learning_rate": 8.75699815474514e-06, "loss": 0.1873, "step": 75270 }, { "epoch": 1.532417302798982, "grad_norm": 54.474843827596736, "learning_rate": 8.756529250211255e-06, "loss": 0.2212, "step": 75280 }, { "epoch": 1.532620865139949, "grad_norm": 8.708141992664592, "learning_rate": 8.756060269809421e-06, "loss": 0.2851, "step": 75290 }, { "epoch": 1.5328244274809162, "grad_norm": 5.90734590247666, "learning_rate": 8.755591213549115e-06, "loss": 0.1871, "step": 75300 }, { "epoch": 1.5330279898218828, "grad_norm": 9.784271691344713, "learning_rate": 8.755122081439806e-06, "loss": 0.3179, "step": 75310 }, { "epoch": 1.53323155216285, "grad_norm": 12.998995289610688, "learning_rate": 8.754652873490968e-06, "loss": 0.2642, "step": 75320 }, { "epoch": 1.5334351145038168, "grad_norm": 6.704059249885986, "learning_rate": 8.754183589712082e-06, "loss": 0.2709, "step": 75330 }, { "epoch": 1.5336386768447836, "grad_norm": 8.866156139162998, "learning_rate": 8.753714230112621e-06, "loss": 0.1927, "step": 75340 }, { "epoch": 1.5338422391857507, "grad_norm": 1.0616142115714577, "learning_rate": 8.75324479470207e-06, "loss": 0.2868, "step": 75350 }, { "epoch": 1.5340458015267175, "grad_norm": 7.551993234840146, "learning_rate": 8.752775283489903e-06, "loss": 0.2137, "step": 75360 }, { "epoch": 1.5342493638676844, "grad_norm": 6.159779500189102, "learning_rate": 8.752305696485605e-06, "loss": 0.2294, "step": 75370 }, { "epoch": 1.5344529262086515, "grad_norm": 13.094518330397175, "learning_rate": 8.751836033698662e-06, "loss": 0.1849, "step": 75380 }, { "epoch": 1.5346564885496183, "grad_norm": 8.665179876304055, "learning_rate": 8.751366295138558e-06, "loss": 0.1974, "step": 75390 }, { "epoch": 1.5348600508905852, "grad_norm": 9.139421189980753, "learning_rate": 8.750896480814782e-06, "loss": 0.2585, "step": 75400 }, { "epoch": 1.5350636132315523, "grad_norm": 11.74583063042965, "learning_rate": 8.750426590736816e-06, "loss": 0.1903, "step": 75410 }, { "epoch": 1.5352671755725191, "grad_norm": 8.864813298938891, "learning_rate": 8.749956624914158e-06, "loss": 0.1613, "step": 75420 }, { "epoch": 1.535470737913486, "grad_norm": 7.03687677719668, "learning_rate": 8.749486583356295e-06, "loss": 0.3653, "step": 75430 }, { "epoch": 1.535674300254453, "grad_norm": 5.162748490938148, "learning_rate": 8.74901646607272e-06, "loss": 0.1599, "step": 75440 }, { "epoch": 1.5358778625954197, "grad_norm": 16.535959667361297, "learning_rate": 8.74854627307293e-06, "loss": 0.2101, "step": 75450 }, { "epoch": 1.5360814249363868, "grad_norm": 6.087975851936349, "learning_rate": 8.748076004366419e-06, "loss": 0.1787, "step": 75460 }, { "epoch": 1.5362849872773539, "grad_norm": 17.758040552198302, "learning_rate": 8.747605659962686e-06, "loss": 0.2815, "step": 75470 }, { "epoch": 1.5364885496183205, "grad_norm": 4.110022341882738, "learning_rate": 8.74713523987123e-06, "loss": 0.2159, "step": 75480 }, { "epoch": 1.5366921119592876, "grad_norm": 5.76761769299474, "learning_rate": 8.746664744101553e-06, "loss": 0.2374, "step": 75490 }, { "epoch": 1.5368956743002544, "grad_norm": 4.44842273125237, "learning_rate": 8.746194172663155e-06, "loss": 0.1939, "step": 75500 }, { "epoch": 1.5370992366412213, "grad_norm": 18.32106206269006, "learning_rate": 8.745723525565539e-06, "loss": 0.2256, "step": 75510 }, { "epoch": 1.5373027989821884, "grad_norm": 5.280914590475964, "learning_rate": 8.745252802818213e-06, "loss": 0.1958, "step": 75520 }, { "epoch": 1.5375063613231552, "grad_norm": 12.987536386694398, "learning_rate": 8.744782004430682e-06, "loss": 0.1673, "step": 75530 }, { "epoch": 1.537709923664122, "grad_norm": 3.2650011588345533, "learning_rate": 8.744311130412457e-06, "loss": 0.2491, "step": 75540 }, { "epoch": 1.5379134860050891, "grad_norm": 7.813251862892709, "learning_rate": 8.743840180773045e-06, "loss": 0.2515, "step": 75550 }, { "epoch": 1.538117048346056, "grad_norm": 9.279485599601097, "learning_rate": 8.74336915552196e-06, "loss": 0.2456, "step": 75560 }, { "epoch": 1.5383206106870229, "grad_norm": 13.75733839097505, "learning_rate": 8.742898054668711e-06, "loss": 0.2524, "step": 75570 }, { "epoch": 1.53852417302799, "grad_norm": 4.994226781719476, "learning_rate": 8.742426878222817e-06, "loss": 0.2266, "step": 75580 }, { "epoch": 1.5387277353689568, "grad_norm": 3.037628502274088, "learning_rate": 8.74195562619379e-06, "loss": 0.1512, "step": 75590 }, { "epoch": 1.5389312977099237, "grad_norm": 15.232472569745251, "learning_rate": 8.741484298591149e-06, "loss": 0.2706, "step": 75600 }, { "epoch": 1.5391348600508907, "grad_norm": 2.085447035104827, "learning_rate": 8.741012895424417e-06, "loss": 0.2258, "step": 75610 }, { "epoch": 1.5393384223918574, "grad_norm": 0.5691030547808145, "learning_rate": 8.74054141670311e-06, "loss": 0.2636, "step": 75620 }, { "epoch": 1.5395419847328244, "grad_norm": 3.2116542328545106, "learning_rate": 8.74006986243675e-06, "loss": 0.1637, "step": 75630 }, { "epoch": 1.5397455470737913, "grad_norm": 11.560411942396014, "learning_rate": 8.739598232634863e-06, "loss": 0.212, "step": 75640 }, { "epoch": 1.5399491094147582, "grad_norm": 10.73579909945917, "learning_rate": 8.739126527306972e-06, "loss": 0.1995, "step": 75650 }, { "epoch": 1.5401526717557252, "grad_norm": 4.034795533536358, "learning_rate": 8.738654746462605e-06, "loss": 0.1825, "step": 75660 }, { "epoch": 1.540356234096692, "grad_norm": 9.196472886194673, "learning_rate": 8.73818289011129e-06, "loss": 0.3894, "step": 75670 }, { "epoch": 1.540559796437659, "grad_norm": 17.500498157821557, "learning_rate": 8.737710958262558e-06, "loss": 0.2066, "step": 75680 }, { "epoch": 1.540763358778626, "grad_norm": 9.30519502188945, "learning_rate": 8.73723895092594e-06, "loss": 0.2074, "step": 75690 }, { "epoch": 1.5409669211195929, "grad_norm": 9.566274334438237, "learning_rate": 8.736766868110965e-06, "loss": 0.1669, "step": 75700 }, { "epoch": 1.5411704834605597, "grad_norm": 8.598429994118447, "learning_rate": 8.73629470982717e-06, "loss": 0.1514, "step": 75710 }, { "epoch": 1.5413740458015268, "grad_norm": 6.057012988429003, "learning_rate": 8.735822476084094e-06, "loss": 0.1505, "step": 75720 }, { "epoch": 1.5415776081424937, "grad_norm": 6.7440413386997635, "learning_rate": 8.73535016689127e-06, "loss": 0.2551, "step": 75730 }, { "epoch": 1.5417811704834605, "grad_norm": 2.724879938039998, "learning_rate": 8.734877782258238e-06, "loss": 0.2062, "step": 75740 }, { "epoch": 1.5419847328244276, "grad_norm": 14.200091229184864, "learning_rate": 8.734405322194539e-06, "loss": 0.1863, "step": 75750 }, { "epoch": 1.5421882951653942, "grad_norm": 4.881347247035739, "learning_rate": 8.733932786709714e-06, "loss": 0.3163, "step": 75760 }, { "epoch": 1.5423918575063613, "grad_norm": 6.059095542988076, "learning_rate": 8.733460175813309e-06, "loss": 0.2027, "step": 75770 }, { "epoch": 1.5425954198473284, "grad_norm": 4.9174314406697, "learning_rate": 8.732987489514865e-06, "loss": 0.2495, "step": 75780 }, { "epoch": 1.542798982188295, "grad_norm": 3.4469434453560526, "learning_rate": 8.73251472782393e-06, "loss": 0.2606, "step": 75790 }, { "epoch": 1.543002544529262, "grad_norm": 8.617321915988207, "learning_rate": 8.732041890750057e-06, "loss": 0.2655, "step": 75800 }, { "epoch": 1.543206106870229, "grad_norm": 7.427616460668228, "learning_rate": 8.731568978302787e-06, "loss": 0.2731, "step": 75810 }, { "epoch": 1.5434096692111958, "grad_norm": 2.8324805074908954, "learning_rate": 8.731095990491677e-06, "loss": 0.1767, "step": 75820 }, { "epoch": 1.543613231552163, "grad_norm": 8.850911639988439, "learning_rate": 8.730622927326276e-06, "loss": 0.2132, "step": 75830 }, { "epoch": 1.5438167938931298, "grad_norm": 6.120564040051579, "learning_rate": 8.730149788816141e-06, "loss": 0.1865, "step": 75840 }, { "epoch": 1.5440203562340966, "grad_norm": 12.320961430728708, "learning_rate": 8.729676574970826e-06, "loss": 0.2406, "step": 75850 }, { "epoch": 1.5442239185750637, "grad_norm": 14.416715125611143, "learning_rate": 8.729203285799891e-06, "loss": 0.2212, "step": 75860 }, { "epoch": 1.5444274809160305, "grad_norm": 1.4150425517590526, "learning_rate": 8.728729921312891e-06, "loss": 0.1519, "step": 75870 }, { "epoch": 1.5446310432569974, "grad_norm": 9.49753680759883, "learning_rate": 8.728256481519388e-06, "loss": 0.1983, "step": 75880 }, { "epoch": 1.5448346055979645, "grad_norm": 10.5132310170571, "learning_rate": 8.727782966428943e-06, "loss": 0.1755, "step": 75890 }, { "epoch": 1.5450381679389313, "grad_norm": 11.801249153934007, "learning_rate": 8.727309376051119e-06, "loss": 0.2456, "step": 75900 }, { "epoch": 1.5452417302798982, "grad_norm": 12.205935900277602, "learning_rate": 8.726835710395482e-06, "loss": 0.1872, "step": 75910 }, { "epoch": 1.5454452926208653, "grad_norm": 11.509299892378476, "learning_rate": 8.726361969471599e-06, "loss": 0.1689, "step": 75920 }, { "epoch": 1.545648854961832, "grad_norm": 15.319034960940895, "learning_rate": 8.725888153289034e-06, "loss": 0.1698, "step": 75930 }, { "epoch": 1.545852417302799, "grad_norm": 12.391133137365294, "learning_rate": 8.725414261857362e-06, "loss": 0.2732, "step": 75940 }, { "epoch": 1.546055979643766, "grad_norm": 8.525893909976823, "learning_rate": 8.724940295186148e-06, "loss": 0.1825, "step": 75950 }, { "epoch": 1.5462595419847327, "grad_norm": 1.1170537002673169, "learning_rate": 8.724466253284969e-06, "loss": 0.2258, "step": 75960 }, { "epoch": 1.5464631043256998, "grad_norm": 24.867643093298646, "learning_rate": 8.723992136163395e-06, "loss": 0.2873, "step": 75970 }, { "epoch": 1.5466666666666666, "grad_norm": 4.320002515017814, "learning_rate": 8.723517943831004e-06, "loss": 0.2295, "step": 75980 }, { "epoch": 1.5468702290076335, "grad_norm": 8.205463905478206, "learning_rate": 8.723043676297374e-06, "loss": 0.2262, "step": 75990 }, { "epoch": 1.5470737913486006, "grad_norm": 3.8004305255470365, "learning_rate": 8.72256933357208e-06, "loss": 0.1672, "step": 76000 }, { "epoch": 1.5472773536895674, "grad_norm": 8.716625434166339, "learning_rate": 8.722094915664703e-06, "loss": 0.3316, "step": 76010 }, { "epoch": 1.5474809160305343, "grad_norm": 7.629243554027593, "learning_rate": 8.721620422584827e-06, "loss": 0.2531, "step": 76020 }, { "epoch": 1.5476844783715014, "grad_norm": 13.967111506154444, "learning_rate": 8.721145854342032e-06, "loss": 0.2832, "step": 76030 }, { "epoch": 1.5478880407124682, "grad_norm": 15.538812228132732, "learning_rate": 8.720671210945903e-06, "loss": 0.2603, "step": 76040 }, { "epoch": 1.548091603053435, "grad_norm": 19.85030534793793, "learning_rate": 8.720196492406028e-06, "loss": 0.2376, "step": 76050 }, { "epoch": 1.5482951653944022, "grad_norm": 17.050984296171723, "learning_rate": 8.719721698731993e-06, "loss": 0.3313, "step": 76060 }, { "epoch": 1.548498727735369, "grad_norm": 8.682935335479694, "learning_rate": 8.719246829933387e-06, "loss": 0.1972, "step": 76070 }, { "epoch": 1.5487022900763359, "grad_norm": 13.455869484058208, "learning_rate": 8.718771886019801e-06, "loss": 0.3224, "step": 76080 }, { "epoch": 1.548905852417303, "grad_norm": 1.5280931832350952, "learning_rate": 8.718296867000827e-06, "loss": 0.2172, "step": 76090 }, { "epoch": 1.5491094147582696, "grad_norm": 17.674570274902575, "learning_rate": 8.717821772886059e-06, "loss": 0.2607, "step": 76100 }, { "epoch": 1.5493129770992367, "grad_norm": 5.178046812902351, "learning_rate": 8.717346603685092e-06, "loss": 0.2513, "step": 76110 }, { "epoch": 1.5495165394402035, "grad_norm": 5.33788691870032, "learning_rate": 8.716871359407521e-06, "loss": 0.2498, "step": 76120 }, { "epoch": 1.5497201017811704, "grad_norm": 4.437750864768236, "learning_rate": 8.716396040062947e-06, "loss": 0.2422, "step": 76130 }, { "epoch": 1.5499236641221374, "grad_norm": 8.982980507870737, "learning_rate": 8.715920645660968e-06, "loss": 0.16, "step": 76140 }, { "epoch": 1.5501272264631043, "grad_norm": 6.62501169749662, "learning_rate": 8.715445176211186e-06, "loss": 0.179, "step": 76150 }, { "epoch": 1.5503307888040712, "grad_norm": 5.782436430537848, "learning_rate": 8.714969631723202e-06, "loss": 0.1807, "step": 76160 }, { "epoch": 1.5505343511450382, "grad_norm": 10.897848248124678, "learning_rate": 8.714494012206622e-06, "loss": 0.1668, "step": 76170 }, { "epoch": 1.550737913486005, "grad_norm": 16.039384289865815, "learning_rate": 8.714018317671051e-06, "loss": 0.2528, "step": 76180 }, { "epoch": 1.550941475826972, "grad_norm": 0.20787106106858183, "learning_rate": 8.713542548126096e-06, "loss": 0.2712, "step": 76190 }, { "epoch": 1.551145038167939, "grad_norm": 3.4283634540947108, "learning_rate": 8.713066703581368e-06, "loss": 0.2083, "step": 76200 }, { "epoch": 1.5513486005089059, "grad_norm": 12.07899386625976, "learning_rate": 8.712590784046474e-06, "loss": 0.288, "step": 76210 }, { "epoch": 1.5515521628498727, "grad_norm": 7.553987640085913, "learning_rate": 8.712114789531027e-06, "loss": 0.2081, "step": 76220 }, { "epoch": 1.5517557251908398, "grad_norm": 5.040828591020367, "learning_rate": 8.711638720044642e-06, "loss": 0.2363, "step": 76230 }, { "epoch": 1.5519592875318065, "grad_norm": 11.96464235031314, "learning_rate": 8.711162575596931e-06, "loss": 0.1694, "step": 76240 }, { "epoch": 1.5521628498727735, "grad_norm": 8.806116915904136, "learning_rate": 8.710686356197515e-06, "loss": 0.2763, "step": 76250 }, { "epoch": 1.5523664122137406, "grad_norm": 5.121212218205601, "learning_rate": 8.710210061856006e-06, "loss": 0.0908, "step": 76260 }, { "epoch": 1.5525699745547072, "grad_norm": 9.811159828793626, "learning_rate": 8.709733692582025e-06, "loss": 0.2252, "step": 76270 }, { "epoch": 1.5527735368956743, "grad_norm": 1.980694017222386, "learning_rate": 8.709257248385197e-06, "loss": 0.1459, "step": 76280 }, { "epoch": 1.5529770992366412, "grad_norm": 27.38032534917564, "learning_rate": 8.70878072927514e-06, "loss": 0.2114, "step": 76290 }, { "epoch": 1.553180661577608, "grad_norm": 5.587521154069999, "learning_rate": 8.708304135261479e-06, "loss": 0.247, "step": 76300 }, { "epoch": 1.5533842239185751, "grad_norm": 15.789893952527535, "learning_rate": 8.70782746635384e-06, "loss": 0.3117, "step": 76310 }, { "epoch": 1.553587786259542, "grad_norm": 3.7925778809316713, "learning_rate": 8.70735072256185e-06, "loss": 0.2278, "step": 76320 }, { "epoch": 1.5537913486005088, "grad_norm": 26.550735199010152, "learning_rate": 8.706873903895137e-06, "loss": 0.2852, "step": 76330 }, { "epoch": 1.553994910941476, "grad_norm": 18.68536593186605, "learning_rate": 8.70639701036333e-06, "loss": 0.1771, "step": 76340 }, { "epoch": 1.5541984732824428, "grad_norm": 16.381353474427073, "learning_rate": 8.705920041976063e-06, "loss": 0.2753, "step": 76350 }, { "epoch": 1.5544020356234096, "grad_norm": 11.670367375903814, "learning_rate": 8.705442998742965e-06, "loss": 0.2902, "step": 76360 }, { "epoch": 1.5546055979643767, "grad_norm": 8.60822999159339, "learning_rate": 8.704965880673677e-06, "loss": 0.1621, "step": 76370 }, { "epoch": 1.5548091603053436, "grad_norm": 10.208227762830399, "learning_rate": 8.704488687777827e-06, "loss": 0.2159, "step": 76380 }, { "epoch": 1.5550127226463104, "grad_norm": 12.960309167901968, "learning_rate": 8.70401142006506e-06, "loss": 0.2153, "step": 76390 }, { "epoch": 1.5552162849872775, "grad_norm": 1.3430915748489136, "learning_rate": 8.703534077545011e-06, "loss": 0.2636, "step": 76400 }, { "epoch": 1.5554198473282441, "grad_norm": 10.392349570263923, "learning_rate": 8.70305666022732e-06, "loss": 0.2434, "step": 76410 }, { "epoch": 1.5556234096692112, "grad_norm": 23.430228029704786, "learning_rate": 8.702579168121632e-06, "loss": 0.2465, "step": 76420 }, { "epoch": 1.5558269720101783, "grad_norm": 10.603677771299417, "learning_rate": 8.702101601237586e-06, "loss": 0.2446, "step": 76430 }, { "epoch": 1.556030534351145, "grad_norm": 4.6197600245511365, "learning_rate": 8.701623959584831e-06, "loss": 0.2078, "step": 76440 }, { "epoch": 1.556234096692112, "grad_norm": 8.4857549443857, "learning_rate": 8.701146243173013e-06, "loss": 0.2023, "step": 76450 }, { "epoch": 1.5564376590330788, "grad_norm": 14.182161951821175, "learning_rate": 8.70066845201178e-06, "loss": 0.2793, "step": 76460 }, { "epoch": 1.5566412213740457, "grad_norm": 8.500924137758368, "learning_rate": 8.700190586110781e-06, "loss": 0.3333, "step": 76470 }, { "epoch": 1.5568447837150128, "grad_norm": 13.037525091732501, "learning_rate": 8.699712645479667e-06, "loss": 0.161, "step": 76480 }, { "epoch": 1.5570483460559796, "grad_norm": 18.750408205113715, "learning_rate": 8.69923463012809e-06, "loss": 0.2433, "step": 76490 }, { "epoch": 1.5572519083969465, "grad_norm": 7.7689748355190105, "learning_rate": 8.698756540065707e-06, "loss": 0.3185, "step": 76500 }, { "epoch": 1.5574554707379136, "grad_norm": 6.385100923519966, "learning_rate": 8.698278375302172e-06, "loss": 0.2757, "step": 76510 }, { "epoch": 1.5576590330788804, "grad_norm": 3.434843686020174, "learning_rate": 8.697800135847139e-06, "loss": 0.2213, "step": 76520 }, { "epoch": 1.5578625954198473, "grad_norm": 4.229162249740319, "learning_rate": 8.697321821710272e-06, "loss": 0.2825, "step": 76530 }, { "epoch": 1.5580661577608144, "grad_norm": 8.661795501603308, "learning_rate": 8.696843432901228e-06, "loss": 0.2135, "step": 76540 }, { "epoch": 1.5582697201017812, "grad_norm": 6.5871373889310165, "learning_rate": 8.696364969429669e-06, "loss": 0.2025, "step": 76550 }, { "epoch": 1.558473282442748, "grad_norm": 5.413509527353011, "learning_rate": 8.695886431305258e-06, "loss": 0.238, "step": 76560 }, { "epoch": 1.5586768447837152, "grad_norm": 7.823816006825103, "learning_rate": 8.695407818537662e-06, "loss": 0.2044, "step": 76570 }, { "epoch": 1.5588804071246818, "grad_norm": 9.34931263062968, "learning_rate": 8.694929131136543e-06, "loss": 0.263, "step": 76580 }, { "epoch": 1.5590839694656489, "grad_norm": 10.124267575488178, "learning_rate": 8.694450369111574e-06, "loss": 0.1377, "step": 76590 }, { "epoch": 1.5592875318066157, "grad_norm": 7.264420228416376, "learning_rate": 8.69397153247242e-06, "loss": 0.2188, "step": 76600 }, { "epoch": 1.5594910941475826, "grad_norm": 12.073662679052859, "learning_rate": 8.693492621228753e-06, "loss": 0.2026, "step": 76610 }, { "epoch": 1.5596946564885497, "grad_norm": 7.93717504137607, "learning_rate": 8.693013635390244e-06, "loss": 0.2304, "step": 76620 }, { "epoch": 1.5598982188295165, "grad_norm": 10.741907887304956, "learning_rate": 8.692534574966571e-06, "loss": 0.2792, "step": 76630 }, { "epoch": 1.5601017811704834, "grad_norm": 3.8009713015329085, "learning_rate": 8.692055439967405e-06, "loss": 0.1351, "step": 76640 }, { "epoch": 1.5603053435114504, "grad_norm": 0.903308911058141, "learning_rate": 8.691576230402424e-06, "loss": 0.3095, "step": 76650 }, { "epoch": 1.5605089058524173, "grad_norm": 10.519484840126154, "learning_rate": 8.691096946281305e-06, "loss": 0.2668, "step": 76660 }, { "epoch": 1.5607124681933842, "grad_norm": 1.1870437943409269, "learning_rate": 8.690617587613731e-06, "loss": 0.1786, "step": 76670 }, { "epoch": 1.5609160305343512, "grad_norm": 12.111331392835694, "learning_rate": 8.69013815440938e-06, "loss": 0.2367, "step": 76680 }, { "epoch": 1.561119592875318, "grad_norm": 13.892707203961871, "learning_rate": 8.689658646677938e-06, "loss": 0.2025, "step": 76690 }, { "epoch": 1.561323155216285, "grad_norm": 17.898005992389677, "learning_rate": 8.689179064429086e-06, "loss": 0.291, "step": 76700 }, { "epoch": 1.561526717557252, "grad_norm": 3.7025539781458123, "learning_rate": 8.68869940767251e-06, "loss": 0.2255, "step": 76710 }, { "epoch": 1.5617302798982187, "grad_norm": 13.725974400923597, "learning_rate": 8.688219676417899e-06, "loss": 0.2242, "step": 76720 }, { "epoch": 1.5619338422391857, "grad_norm": 7.840704554335457, "learning_rate": 8.687739870674943e-06, "loss": 0.3041, "step": 76730 }, { "epoch": 1.5621374045801528, "grad_norm": 9.358300922842872, "learning_rate": 8.68725999045333e-06, "loss": 0.3153, "step": 76740 }, { "epoch": 1.5623409669211195, "grad_norm": 6.983336759080465, "learning_rate": 8.686780035762752e-06, "loss": 0.186, "step": 76750 }, { "epoch": 1.5625445292620865, "grad_norm": 4.611193221208782, "learning_rate": 8.686300006612902e-06, "loss": 0.3296, "step": 76760 }, { "epoch": 1.5627480916030534, "grad_norm": 15.836579413335532, "learning_rate": 8.685819903013476e-06, "loss": 0.2417, "step": 76770 }, { "epoch": 1.5629516539440202, "grad_norm": 5.726523218728057, "learning_rate": 8.68533972497417e-06, "loss": 0.2313, "step": 76780 }, { "epoch": 1.5631552162849873, "grad_norm": 8.105013637774778, "learning_rate": 8.68485947250468e-06, "loss": 0.2088, "step": 76790 }, { "epoch": 1.5633587786259542, "grad_norm": 7.973203381379252, "learning_rate": 8.68437914561471e-06, "loss": 0.2009, "step": 76800 }, { "epoch": 1.563562340966921, "grad_norm": 5.694658449242635, "learning_rate": 8.683898744313954e-06, "loss": 0.2142, "step": 76810 }, { "epoch": 1.5637659033078881, "grad_norm": 9.833614291733314, "learning_rate": 8.68341826861212e-06, "loss": 0.2392, "step": 76820 }, { "epoch": 1.563969465648855, "grad_norm": 17.623836046278008, "learning_rate": 8.682937718518912e-06, "loss": 0.2009, "step": 76830 }, { "epoch": 1.5641730279898218, "grad_norm": 27.23752764987543, "learning_rate": 8.68245709404403e-06, "loss": 0.2307, "step": 76840 }, { "epoch": 1.564376590330789, "grad_norm": 6.624863968680519, "learning_rate": 8.681976395197184e-06, "loss": 0.1905, "step": 76850 }, { "epoch": 1.5645801526717558, "grad_norm": 15.360971921854288, "learning_rate": 8.681495621988083e-06, "loss": 0.1988, "step": 76860 }, { "epoch": 1.5647837150127226, "grad_norm": 6.997033028961957, "learning_rate": 8.681014774426437e-06, "loss": 0.2863, "step": 76870 }, { "epoch": 1.5649872773536897, "grad_norm": 9.30152545887703, "learning_rate": 8.680533852521956e-06, "loss": 0.2864, "step": 76880 }, { "epoch": 1.5651908396946563, "grad_norm": 12.489409550381609, "learning_rate": 8.680052856284353e-06, "loss": 0.3144, "step": 76890 }, { "epoch": 1.5653944020356234, "grad_norm": 14.267427551772629, "learning_rate": 8.679571785723344e-06, "loss": 0.2877, "step": 76900 }, { "epoch": 1.5655979643765905, "grad_norm": 9.963706694127852, "learning_rate": 8.679090640848642e-06, "loss": 0.2223, "step": 76910 }, { "epoch": 1.5658015267175571, "grad_norm": 5.656750268318724, "learning_rate": 8.678609421669967e-06, "loss": 0.1775, "step": 76920 }, { "epoch": 1.5660050890585242, "grad_norm": 12.972536473362881, "learning_rate": 8.678128128197036e-06, "loss": 0.1989, "step": 76930 }, { "epoch": 1.566208651399491, "grad_norm": 16.109455729386603, "learning_rate": 8.67764676043957e-06, "loss": 0.2503, "step": 76940 }, { "epoch": 1.566412213740458, "grad_norm": 15.357442044361115, "learning_rate": 8.677165318407291e-06, "loss": 0.2269, "step": 76950 }, { "epoch": 1.566615776081425, "grad_norm": 5.425413024960685, "learning_rate": 8.676683802109923e-06, "loss": 0.238, "step": 76960 }, { "epoch": 1.5668193384223918, "grad_norm": 4.5808099355463305, "learning_rate": 8.676202211557189e-06, "loss": 0.1735, "step": 76970 }, { "epoch": 1.5670229007633587, "grad_norm": 13.289298159722183, "learning_rate": 8.675720546758817e-06, "loss": 0.2558, "step": 76980 }, { "epoch": 1.5672264631043258, "grad_norm": 12.000087020401232, "learning_rate": 8.675238807724534e-06, "loss": 0.2634, "step": 76990 }, { "epoch": 1.5674300254452926, "grad_norm": 0.4659903556489007, "learning_rate": 8.674756994464072e-06, "loss": 0.2634, "step": 77000 }, { "epoch": 1.5676335877862595, "grad_norm": 11.84014706548791, "learning_rate": 8.674275106987155e-06, "loss": 0.1766, "step": 77010 }, { "epoch": 1.5678371501272266, "grad_norm": 3.9535721909400903, "learning_rate": 8.673793145303522e-06, "loss": 0.1933, "step": 77020 }, { "epoch": 1.5680407124681934, "grad_norm": 10.63080540214999, "learning_rate": 8.673311109422905e-06, "loss": 0.1732, "step": 77030 }, { "epoch": 1.5682442748091603, "grad_norm": 7.880128820798519, "learning_rate": 8.672828999355039e-06, "loss": 0.2733, "step": 77040 }, { "epoch": 1.5684478371501274, "grad_norm": 4.99348213404658, "learning_rate": 8.67234681510966e-06, "loss": 0.2602, "step": 77050 }, { "epoch": 1.568651399491094, "grad_norm": 16.942141348784777, "learning_rate": 8.671864556696508e-06, "loss": 0.171, "step": 77060 }, { "epoch": 1.568854961832061, "grad_norm": 11.88593146745672, "learning_rate": 8.67138222412532e-06, "loss": 0.2439, "step": 77070 }, { "epoch": 1.569058524173028, "grad_norm": 15.348830608222375, "learning_rate": 8.670899817405841e-06, "loss": 0.1389, "step": 77080 }, { "epoch": 1.5692620865139948, "grad_norm": 12.978873506036523, "learning_rate": 8.670417336547812e-06, "loss": 0.2176, "step": 77090 }, { "epoch": 1.5694656488549619, "grad_norm": 17.892807968640213, "learning_rate": 8.669934781560978e-06, "loss": 0.2495, "step": 77100 }, { "epoch": 1.5696692111959287, "grad_norm": 4.550840088784628, "learning_rate": 8.669452152455083e-06, "loss": 0.163, "step": 77110 }, { "epoch": 1.5698727735368956, "grad_norm": 11.248934898901542, "learning_rate": 8.668969449239876e-06, "loss": 0.3066, "step": 77120 }, { "epoch": 1.5700763358778627, "grad_norm": 12.430747832731488, "learning_rate": 8.668486671925105e-06, "loss": 0.2839, "step": 77130 }, { "epoch": 1.5702798982188295, "grad_norm": 7.7267273539728425, "learning_rate": 8.66800382052052e-06, "loss": 0.3083, "step": 77140 }, { "epoch": 1.5704834605597964, "grad_norm": 8.06298079552153, "learning_rate": 8.667520895035876e-06, "loss": 0.139, "step": 77150 }, { "epoch": 1.5706870229007635, "grad_norm": 12.376411107035416, "learning_rate": 8.667037895480923e-06, "loss": 0.236, "step": 77160 }, { "epoch": 1.5708905852417303, "grad_norm": 11.974306969345514, "learning_rate": 8.666554821865415e-06, "loss": 0.2679, "step": 77170 }, { "epoch": 1.5710941475826972, "grad_norm": 6.605058194927608, "learning_rate": 8.66607167419911e-06, "loss": 0.1847, "step": 77180 }, { "epoch": 1.5712977099236642, "grad_norm": 7.6483335049691386, "learning_rate": 8.665588452491768e-06, "loss": 0.3148, "step": 77190 }, { "epoch": 1.5715012722646309, "grad_norm": 4.7313071429529385, "learning_rate": 8.665105156753143e-06, "loss": 0.2089, "step": 77200 }, { "epoch": 1.571704834605598, "grad_norm": 3.699242164002351, "learning_rate": 8.664621786993001e-06, "loss": 0.2772, "step": 77210 }, { "epoch": 1.571908396946565, "grad_norm": 5.997141073390293, "learning_rate": 8.664138343221102e-06, "loss": 0.2512, "step": 77220 }, { "epoch": 1.5721119592875317, "grad_norm": 8.274837276090157, "learning_rate": 8.66365482544721e-06, "loss": 0.4274, "step": 77230 }, { "epoch": 1.5723155216284987, "grad_norm": 0.3189892525730312, "learning_rate": 8.663171233681089e-06, "loss": 0.2171, "step": 77240 }, { "epoch": 1.5725190839694656, "grad_norm": 3.5254748939335494, "learning_rate": 8.662687567932507e-06, "loss": 0.2957, "step": 77250 }, { "epoch": 1.5727226463104325, "grad_norm": 1.1990849723327328, "learning_rate": 8.662203828211231e-06, "loss": 0.1653, "step": 77260 }, { "epoch": 1.5729262086513995, "grad_norm": 5.179414952181382, "learning_rate": 8.661720014527032e-06, "loss": 0.1936, "step": 77270 }, { "epoch": 1.5731297709923664, "grad_norm": 9.457586309378565, "learning_rate": 8.661236126889682e-06, "loss": 0.288, "step": 77280 }, { "epoch": 1.5733333333333333, "grad_norm": 9.51740538130437, "learning_rate": 8.660752165308953e-06, "loss": 0.3636, "step": 77290 }, { "epoch": 1.5735368956743003, "grad_norm": 9.432143503158029, "learning_rate": 8.660268129794618e-06, "loss": 0.2552, "step": 77300 }, { "epoch": 1.5737404580152672, "grad_norm": 6.663053009670227, "learning_rate": 8.659784020356454e-06, "loss": 0.2969, "step": 77310 }, { "epoch": 1.573944020356234, "grad_norm": 8.415086072794928, "learning_rate": 8.659299837004239e-06, "loss": 0.2129, "step": 77320 }, { "epoch": 1.5741475826972011, "grad_norm": 5.858227935013128, "learning_rate": 8.65881557974775e-06, "loss": 0.227, "step": 77330 }, { "epoch": 1.574351145038168, "grad_norm": 3.3965785417211363, "learning_rate": 8.658331248596768e-06, "loss": 0.1759, "step": 77340 }, { "epoch": 1.5745547073791348, "grad_norm": 9.973029544052006, "learning_rate": 8.657846843561074e-06, "loss": 0.1996, "step": 77350 }, { "epoch": 1.574758269720102, "grad_norm": 27.25097212180379, "learning_rate": 8.657362364650452e-06, "loss": 0.1564, "step": 77360 }, { "epoch": 1.5749618320610685, "grad_norm": 9.224929865788443, "learning_rate": 8.656877811874687e-06, "loss": 0.1773, "step": 77370 }, { "epoch": 1.5751653944020356, "grad_norm": 3.5393800050535345, "learning_rate": 8.656393185243563e-06, "loss": 0.1815, "step": 77380 }, { "epoch": 1.5753689567430027, "grad_norm": 0.7740337026640809, "learning_rate": 8.655908484766871e-06, "loss": 0.2147, "step": 77390 }, { "epoch": 1.5755725190839693, "grad_norm": 7.737920748761617, "learning_rate": 8.655423710454397e-06, "loss": 0.179, "step": 77400 }, { "epoch": 1.5757760814249364, "grad_norm": 11.44812903311342, "learning_rate": 8.654938862315934e-06, "loss": 0.2394, "step": 77410 }, { "epoch": 1.5759796437659033, "grad_norm": 6.264917081270245, "learning_rate": 8.654453940361273e-06, "loss": 0.2394, "step": 77420 }, { "epoch": 1.5761832061068701, "grad_norm": 9.366467272500467, "learning_rate": 8.653968944600205e-06, "loss": 0.1854, "step": 77430 }, { "epoch": 1.5763867684478372, "grad_norm": 3.628888286224607, "learning_rate": 8.653483875042532e-06, "loss": 0.2317, "step": 77440 }, { "epoch": 1.576590330788804, "grad_norm": 1.0472842315259918, "learning_rate": 8.652998731698046e-06, "loss": 0.3001, "step": 77450 }, { "epoch": 1.576793893129771, "grad_norm": 10.140957083556875, "learning_rate": 8.652513514576543e-06, "loss": 0.1669, "step": 77460 }, { "epoch": 1.576997455470738, "grad_norm": 12.510442345670883, "learning_rate": 8.652028223687828e-06, "loss": 0.2448, "step": 77470 }, { "epoch": 1.5772010178117049, "grad_norm": 0.1574715808091117, "learning_rate": 8.651542859041698e-06, "loss": 0.2248, "step": 77480 }, { "epoch": 1.5774045801526717, "grad_norm": 7.475079175235986, "learning_rate": 8.651057420647958e-06, "loss": 0.2319, "step": 77490 }, { "epoch": 1.5776081424936388, "grad_norm": 10.977096294220651, "learning_rate": 8.650571908516408e-06, "loss": 0.3191, "step": 77500 }, { "epoch": 1.5778117048346056, "grad_norm": 10.031851222102347, "learning_rate": 8.650086322656858e-06, "loss": 0.2364, "step": 77510 }, { "epoch": 1.5780152671755725, "grad_norm": 3.2263647476678803, "learning_rate": 8.649600663079113e-06, "loss": 0.2102, "step": 77520 }, { "epoch": 1.5782188295165396, "grad_norm": 3.7940656021541854, "learning_rate": 8.649114929792982e-06, "loss": 0.2426, "step": 77530 }, { "epoch": 1.5784223918575062, "grad_norm": 1.7804740347682226, "learning_rate": 8.648629122808276e-06, "loss": 0.1732, "step": 77540 }, { "epoch": 1.5786259541984733, "grad_norm": 7.768402469529083, "learning_rate": 8.648143242134806e-06, "loss": 0.23, "step": 77550 }, { "epoch": 1.5788295165394404, "grad_norm": 0.5112469616976131, "learning_rate": 8.647657287782382e-06, "loss": 0.2439, "step": 77560 }, { "epoch": 1.579033078880407, "grad_norm": 4.036666853049146, "learning_rate": 8.647171259760822e-06, "loss": 0.186, "step": 77570 }, { "epoch": 1.579236641221374, "grad_norm": 35.572131781207155, "learning_rate": 8.646685158079942e-06, "loss": 0.2495, "step": 77580 }, { "epoch": 1.579440203562341, "grad_norm": 22.169492401347373, "learning_rate": 8.646198982749557e-06, "loss": 0.1922, "step": 77590 }, { "epoch": 1.5796437659033078, "grad_norm": 7.875624209799574, "learning_rate": 8.645712733779488e-06, "loss": 0.2315, "step": 77600 }, { "epoch": 1.5798473282442749, "grad_norm": 12.583380388028734, "learning_rate": 8.645226411179553e-06, "loss": 0.278, "step": 77610 }, { "epoch": 1.5800508905852417, "grad_norm": 13.22450873412036, "learning_rate": 8.64474001495958e-06, "loss": 0.228, "step": 77620 }, { "epoch": 1.5802544529262086, "grad_norm": 19.997281680137633, "learning_rate": 8.644253545129383e-06, "loss": 0.1439, "step": 77630 }, { "epoch": 1.5804580152671757, "grad_norm": 3.599969068043411, "learning_rate": 8.643767001698792e-06, "loss": 0.2399, "step": 77640 }, { "epoch": 1.5806615776081425, "grad_norm": 19.237320634262, "learning_rate": 8.643280384677637e-06, "loss": 0.203, "step": 77650 }, { "epoch": 1.5808651399491094, "grad_norm": 13.83515788453945, "learning_rate": 8.64279369407574e-06, "loss": 0.3098, "step": 77660 }, { "epoch": 1.5810687022900765, "grad_norm": 11.396747502651326, "learning_rate": 8.642306929902932e-06, "loss": 0.1801, "step": 77670 }, { "epoch": 1.5812722646310433, "grad_norm": 7.7762177665760115, "learning_rate": 8.641820092169047e-06, "loss": 0.2649, "step": 77680 }, { "epoch": 1.5814758269720102, "grad_norm": 3.7664763518124698, "learning_rate": 8.641333180883911e-06, "loss": 0.192, "step": 77690 }, { "epoch": 1.5816793893129772, "grad_norm": 6.970119321971111, "learning_rate": 8.640846196057363e-06, "loss": 0.2145, "step": 77700 }, { "epoch": 1.5818829516539439, "grad_norm": 2.4794762695905965, "learning_rate": 8.640359137699236e-06, "loss": 0.155, "step": 77710 }, { "epoch": 1.582086513994911, "grad_norm": 10.7884650175557, "learning_rate": 8.639872005819369e-06, "loss": 0.1994, "step": 77720 }, { "epoch": 1.5822900763358778, "grad_norm": 18.724847742660266, "learning_rate": 8.639384800427599e-06, "loss": 0.2502, "step": 77730 }, { "epoch": 1.5824936386768447, "grad_norm": 18.82321185241034, "learning_rate": 8.638897521533764e-06, "loss": 0.2605, "step": 77740 }, { "epoch": 1.5826972010178118, "grad_norm": 12.657866033790848, "learning_rate": 8.638410169147707e-06, "loss": 0.382, "step": 77750 }, { "epoch": 1.5829007633587786, "grad_norm": 7.305569307316222, "learning_rate": 8.63792274327927e-06, "loss": 0.2593, "step": 77760 }, { "epoch": 1.5831043256997455, "grad_norm": 2.102008884364147, "learning_rate": 8.6374352439383e-06, "loss": 0.24, "step": 77770 }, { "epoch": 1.5833078880407125, "grad_norm": 5.072619883512683, "learning_rate": 8.636947671134637e-06, "loss": 0.162, "step": 77780 }, { "epoch": 1.5835114503816794, "grad_norm": 4.121684029631337, "learning_rate": 8.636460024878134e-06, "loss": 0.219, "step": 77790 }, { "epoch": 1.5837150127226463, "grad_norm": 13.028782334017938, "learning_rate": 8.635972305178636e-06, "loss": 0.2451, "step": 77800 }, { "epoch": 1.5839185750636133, "grad_norm": 2.7517092798854597, "learning_rate": 8.635484512045993e-06, "loss": 0.1925, "step": 77810 }, { "epoch": 1.5841221374045802, "grad_norm": 3.120504311015689, "learning_rate": 8.634996645490058e-06, "loss": 0.2358, "step": 77820 }, { "epoch": 1.584325699745547, "grad_norm": 8.748183347649183, "learning_rate": 8.634508705520686e-06, "loss": 0.2268, "step": 77830 }, { "epoch": 1.5845292620865141, "grad_norm": 8.671509808457655, "learning_rate": 8.634020692147728e-06, "loss": 0.1871, "step": 77840 }, { "epoch": 1.5847328244274808, "grad_norm": 13.990410309249082, "learning_rate": 8.633532605381041e-06, "loss": 0.2194, "step": 77850 }, { "epoch": 1.5849363867684478, "grad_norm": 15.286400934913335, "learning_rate": 8.633044445230484e-06, "loss": 0.2604, "step": 77860 }, { "epoch": 1.585139949109415, "grad_norm": 12.652593321115255, "learning_rate": 8.632556211705915e-06, "loss": 0.2119, "step": 77870 }, { "epoch": 1.5853435114503815, "grad_norm": 7.863643079024096, "learning_rate": 8.632067904817194e-06, "loss": 0.1819, "step": 77880 }, { "epoch": 1.5855470737913486, "grad_norm": 6.715603874313485, "learning_rate": 8.631579524574185e-06, "loss": 0.2463, "step": 77890 }, { "epoch": 1.5857506361323155, "grad_norm": 7.48658721897242, "learning_rate": 8.63109107098675e-06, "loss": 0.2621, "step": 77900 }, { "epoch": 1.5859541984732823, "grad_norm": 38.604750531243184, "learning_rate": 8.630602544064753e-06, "loss": 0.2897, "step": 77910 }, { "epoch": 1.5861577608142494, "grad_norm": 5.629480990427683, "learning_rate": 8.630113943818062e-06, "loss": 0.2178, "step": 77920 }, { "epoch": 1.5863613231552163, "grad_norm": 13.342118889687951, "learning_rate": 8.629625270256542e-06, "loss": 0.2256, "step": 77930 }, { "epoch": 1.5865648854961831, "grad_norm": 22.544891922587613, "learning_rate": 8.629136523390068e-06, "loss": 0.233, "step": 77940 }, { "epoch": 1.5867684478371502, "grad_norm": 0.4377277119156551, "learning_rate": 8.628647703228506e-06, "loss": 0.1879, "step": 77950 }, { "epoch": 1.586972010178117, "grad_norm": 10.753242202822948, "learning_rate": 8.62815880978173e-06, "loss": 0.3486, "step": 77960 }, { "epoch": 1.587175572519084, "grad_norm": 9.888174578032825, "learning_rate": 8.627669843059615e-06, "loss": 0.179, "step": 77970 }, { "epoch": 1.587379134860051, "grad_norm": 8.13690143690017, "learning_rate": 8.627180803072033e-06, "loss": 0.2077, "step": 77980 }, { "epoch": 1.5875826972010179, "grad_norm": 5.6085680580415405, "learning_rate": 8.626691689828866e-06, "loss": 0.1351, "step": 77990 }, { "epoch": 1.5877862595419847, "grad_norm": 9.822077592170587, "learning_rate": 8.626202503339987e-06, "loss": 0.1807, "step": 78000 }, { "epoch": 1.5879898218829518, "grad_norm": 0.24148699009683156, "learning_rate": 8.625713243615278e-06, "loss": 0.1949, "step": 78010 }, { "epoch": 1.5881933842239184, "grad_norm": 42.64261966352926, "learning_rate": 8.62522391066462e-06, "loss": 0.2949, "step": 78020 }, { "epoch": 1.5883969465648855, "grad_norm": 9.17570644144699, "learning_rate": 8.624734504497896e-06, "loss": 0.2272, "step": 78030 }, { "epoch": 1.5886005089058526, "grad_norm": 11.840920543325003, "learning_rate": 8.62424502512499e-06, "loss": 0.2833, "step": 78040 }, { "epoch": 1.5888040712468192, "grad_norm": 4.83382479647315, "learning_rate": 8.623755472555789e-06, "loss": 0.2656, "step": 78050 }, { "epoch": 1.5890076335877863, "grad_norm": 3.3345556061801926, "learning_rate": 8.623265846800178e-06, "loss": 0.1801, "step": 78060 }, { "epoch": 1.5892111959287532, "grad_norm": 4.423729222587663, "learning_rate": 8.622776147868047e-06, "loss": 0.131, "step": 78070 }, { "epoch": 1.58941475826972, "grad_norm": 24.850110199763204, "learning_rate": 8.622286375769284e-06, "loss": 0.385, "step": 78080 }, { "epoch": 1.589618320610687, "grad_norm": 13.970714996055522, "learning_rate": 8.621796530513784e-06, "loss": 0.1816, "step": 78090 }, { "epoch": 1.589821882951654, "grad_norm": 1.5883514426482803, "learning_rate": 8.621306612111437e-06, "loss": 0.1786, "step": 78100 }, { "epoch": 1.5900254452926208, "grad_norm": 0.14984279392129007, "learning_rate": 8.620816620572139e-06, "loss": 0.2203, "step": 78110 }, { "epoch": 1.5902290076335879, "grad_norm": 6.018806179895159, "learning_rate": 8.620326555905788e-06, "loss": 0.2044, "step": 78120 }, { "epoch": 1.5904325699745547, "grad_norm": 17.0761871189034, "learning_rate": 8.619836418122277e-06, "loss": 0.2412, "step": 78130 }, { "epoch": 1.5906361323155216, "grad_norm": 12.393899511050867, "learning_rate": 8.619346207231508e-06, "loss": 0.3129, "step": 78140 }, { "epoch": 1.5908396946564887, "grad_norm": 9.930008822002481, "learning_rate": 8.618855923243378e-06, "loss": 0.1911, "step": 78150 }, { "epoch": 1.5910432569974555, "grad_norm": 0.7694996542261133, "learning_rate": 8.618365566167794e-06, "loss": 0.2626, "step": 78160 }, { "epoch": 1.5912468193384224, "grad_norm": 0.26747711076813047, "learning_rate": 8.617875136014658e-06, "loss": 0.2485, "step": 78170 }, { "epoch": 1.5914503816793895, "grad_norm": 16.55001406926174, "learning_rate": 8.617384632793872e-06, "loss": 0.3348, "step": 78180 }, { "epoch": 1.591653944020356, "grad_norm": 5.137242239526401, "learning_rate": 8.616894056515343e-06, "loss": 0.2649, "step": 78190 }, { "epoch": 1.5918575063613232, "grad_norm": 5.296183864273038, "learning_rate": 8.616403407188983e-06, "loss": 0.2014, "step": 78200 }, { "epoch": 1.59206106870229, "grad_norm": 8.174803819646712, "learning_rate": 8.615912684824697e-06, "loss": 0.274, "step": 78210 }, { "epoch": 1.5922646310432569, "grad_norm": 11.76056498026173, "learning_rate": 8.615421889432397e-06, "loss": 0.2226, "step": 78220 }, { "epoch": 1.592468193384224, "grad_norm": 9.043248155678898, "learning_rate": 8.614931021021995e-06, "loss": 0.1751, "step": 78230 }, { "epoch": 1.5926717557251908, "grad_norm": 7.97658828478469, "learning_rate": 8.614440079603405e-06, "loss": 0.2632, "step": 78240 }, { "epoch": 1.5928753180661577, "grad_norm": 19.998034877096753, "learning_rate": 8.613949065186544e-06, "loss": 0.2738, "step": 78250 }, { "epoch": 1.5930788804071248, "grad_norm": 7.087187690825072, "learning_rate": 8.613457977781326e-06, "loss": 0.2151, "step": 78260 }, { "epoch": 1.5932824427480916, "grad_norm": 12.303752239496044, "learning_rate": 8.612966817397669e-06, "loss": 0.1612, "step": 78270 }, { "epoch": 1.5934860050890585, "grad_norm": 8.876159536396791, "learning_rate": 8.612475584045495e-06, "loss": 0.1571, "step": 78280 }, { "epoch": 1.5936895674300255, "grad_norm": 10.684343664355724, "learning_rate": 8.611984277734722e-06, "loss": 0.2444, "step": 78290 }, { "epoch": 1.5938931297709924, "grad_norm": 7.706042100728218, "learning_rate": 8.611492898475276e-06, "loss": 0.3079, "step": 78300 }, { "epoch": 1.5940966921119593, "grad_norm": 14.97216635600586, "learning_rate": 8.611001446277078e-06, "loss": 0.2091, "step": 78310 }, { "epoch": 1.5943002544529263, "grad_norm": 0.3410405997085967, "learning_rate": 8.610509921150056e-06, "loss": 0.2165, "step": 78320 }, { "epoch": 1.594503816793893, "grad_norm": 4.4756163851134545, "learning_rate": 8.610018323104134e-06, "loss": 0.2417, "step": 78330 }, { "epoch": 1.59470737913486, "grad_norm": 3.9404441269459847, "learning_rate": 8.609526652149246e-06, "loss": 0.1659, "step": 78340 }, { "epoch": 1.5949109414758271, "grad_norm": 12.558596751086716, "learning_rate": 8.609034908295315e-06, "loss": 0.2869, "step": 78350 }, { "epoch": 1.5951145038167938, "grad_norm": 15.579646312314614, "learning_rate": 8.608543091552274e-06, "loss": 0.334, "step": 78360 }, { "epoch": 1.5953180661577608, "grad_norm": 10.474881601688704, "learning_rate": 8.608051201930062e-06, "loss": 0.1204, "step": 78370 }, { "epoch": 1.5955216284987277, "grad_norm": 7.447256110451554, "learning_rate": 8.607559239438606e-06, "loss": 0.2206, "step": 78380 }, { "epoch": 1.5957251908396946, "grad_norm": 1.3487608223471155, "learning_rate": 8.607067204087846e-06, "loss": 0.2574, "step": 78390 }, { "epoch": 1.5959287531806616, "grad_norm": 15.906725640497434, "learning_rate": 8.606575095887718e-06, "loss": 0.2017, "step": 78400 }, { "epoch": 1.5961323155216285, "grad_norm": 20.62581168472554, "learning_rate": 8.60608291484816e-06, "loss": 0.3078, "step": 78410 }, { "epoch": 1.5963358778625953, "grad_norm": 11.626473862753503, "learning_rate": 8.605590660979113e-06, "loss": 0.2932, "step": 78420 }, { "epoch": 1.5965394402035624, "grad_norm": 0.7285842546524005, "learning_rate": 8.605098334290519e-06, "loss": 0.1594, "step": 78430 }, { "epoch": 1.5967430025445293, "grad_norm": 17.270922611262755, "learning_rate": 8.604605934792321e-06, "loss": 0.3394, "step": 78440 }, { "epoch": 1.5969465648854961, "grad_norm": 18.492488372432142, "learning_rate": 8.604113462494462e-06, "loss": 0.2611, "step": 78450 }, { "epoch": 1.5971501272264632, "grad_norm": 0.42826768235380397, "learning_rate": 8.60362091740689e-06, "loss": 0.2068, "step": 78460 }, { "epoch": 1.59735368956743, "grad_norm": 3.6264546536490454, "learning_rate": 8.603128299539553e-06, "loss": 0.2022, "step": 78470 }, { "epoch": 1.597557251908397, "grad_norm": 1.5426094061469136, "learning_rate": 8.6026356089024e-06, "loss": 0.2041, "step": 78480 }, { "epoch": 1.597760814249364, "grad_norm": 5.107199715103628, "learning_rate": 8.60214284550538e-06, "loss": 0.2144, "step": 78490 }, { "epoch": 1.5979643765903306, "grad_norm": 9.340073280774973, "learning_rate": 8.601650009358444e-06, "loss": 0.2114, "step": 78500 }, { "epoch": 1.5981679389312977, "grad_norm": 5.797844004863432, "learning_rate": 8.601157100471549e-06, "loss": 0.162, "step": 78510 }, { "epoch": 1.5983715012722648, "grad_norm": 19.484784153415536, "learning_rate": 8.600664118854647e-06, "loss": 0.1536, "step": 78520 }, { "epoch": 1.5985750636132314, "grad_norm": 13.397202538972612, "learning_rate": 8.600171064517697e-06, "loss": 0.2655, "step": 78530 }, { "epoch": 1.5987786259541985, "grad_norm": 7.979019104189807, "learning_rate": 8.599677937470654e-06, "loss": 0.2236, "step": 78540 }, { "epoch": 1.5989821882951654, "grad_norm": 7.498627901049873, "learning_rate": 8.599184737723478e-06, "loss": 0.1861, "step": 78550 }, { "epoch": 1.5991857506361322, "grad_norm": 10.899551208828084, "learning_rate": 8.598691465286133e-06, "loss": 0.175, "step": 78560 }, { "epoch": 1.5993893129770993, "grad_norm": 10.964257589333704, "learning_rate": 8.598198120168577e-06, "loss": 0.1969, "step": 78570 }, { "epoch": 1.5995928753180662, "grad_norm": 2.545028812161458, "learning_rate": 8.597704702380775e-06, "loss": 0.2141, "step": 78580 }, { "epoch": 1.599796437659033, "grad_norm": 6.312554044771011, "learning_rate": 8.597211211932695e-06, "loss": 0.2544, "step": 78590 }, { "epoch": 1.6, "grad_norm": 6.443539578997487, "learning_rate": 8.5967176488343e-06, "loss": 0.261, "step": 78600 }, { "epoch": 1.600203562340967, "grad_norm": 11.09092030486324, "learning_rate": 8.59622401309556e-06, "loss": 0.3529, "step": 78610 }, { "epoch": 1.6004071246819338, "grad_norm": 8.327323120790508, "learning_rate": 8.595730304726444e-06, "loss": 0.213, "step": 78620 }, { "epoch": 1.6006106870229009, "grad_norm": 12.50995226378882, "learning_rate": 8.595236523736923e-06, "loss": 0.303, "step": 78630 }, { "epoch": 1.6008142493638677, "grad_norm": 9.469940279735487, "learning_rate": 8.594742670136968e-06, "loss": 0.203, "step": 78640 }, { "epoch": 1.6010178117048346, "grad_norm": 9.927536912460049, "learning_rate": 8.594248743936557e-06, "loss": 0.2966, "step": 78650 }, { "epoch": 1.6012213740458017, "grad_norm": 5.1854198790185855, "learning_rate": 8.593754745145665e-06, "loss": 0.1489, "step": 78660 }, { "epoch": 1.6014249363867683, "grad_norm": 4.887794778879721, "learning_rate": 8.593260673774262e-06, "loss": 0.2605, "step": 78670 }, { "epoch": 1.6016284987277354, "grad_norm": 7.916387260619861, "learning_rate": 8.592766529832336e-06, "loss": 0.267, "step": 78680 }, { "epoch": 1.6018320610687022, "grad_norm": 3.4440339768032504, "learning_rate": 8.592272313329861e-06, "loss": 0.2283, "step": 78690 }, { "epoch": 1.602035623409669, "grad_norm": 10.337389460173709, "learning_rate": 8.59177802427682e-06, "loss": 0.2583, "step": 78700 }, { "epoch": 1.6022391857506362, "grad_norm": 6.445878225691015, "learning_rate": 8.591283662683196e-06, "loss": 0.3118, "step": 78710 }, { "epoch": 1.602442748091603, "grad_norm": 25.12043513872744, "learning_rate": 8.590789228558972e-06, "loss": 0.3422, "step": 78720 }, { "epoch": 1.6026463104325699, "grad_norm": 0.3623987847501319, "learning_rate": 8.590294721914134e-06, "loss": 0.1908, "step": 78730 }, { "epoch": 1.602849872773537, "grad_norm": 3.837552678372004, "learning_rate": 8.589800142758672e-06, "loss": 0.141, "step": 78740 }, { "epoch": 1.6030534351145038, "grad_norm": 9.025142893586803, "learning_rate": 8.58930549110257e-06, "loss": 0.3078, "step": 78750 }, { "epoch": 1.6032569974554707, "grad_norm": 8.402576921285638, "learning_rate": 8.588810766955823e-06, "loss": 0.2129, "step": 78760 }, { "epoch": 1.6034605597964378, "grad_norm": 9.275374407567893, "learning_rate": 8.58831597032842e-06, "loss": 0.2652, "step": 78770 }, { "epoch": 1.6036641221374046, "grad_norm": 38.04836922758852, "learning_rate": 8.587821101230354e-06, "loss": 0.2031, "step": 78780 }, { "epoch": 1.6038676844783715, "grad_norm": 12.371330553234271, "learning_rate": 8.587326159671619e-06, "loss": 0.2137, "step": 78790 }, { "epoch": 1.6040712468193385, "grad_norm": 4.97591729440365, "learning_rate": 8.586831145662212e-06, "loss": 0.1727, "step": 78800 }, { "epoch": 1.6042748091603052, "grad_norm": 8.17418376000622, "learning_rate": 8.58633605921213e-06, "loss": 0.1858, "step": 78810 }, { "epoch": 1.6044783715012723, "grad_norm": 18.358833951349155, "learning_rate": 8.585840900331372e-06, "loss": 0.1832, "step": 78820 }, { "epoch": 1.6046819338422393, "grad_norm": 5.208426983847026, "learning_rate": 8.58534566902994e-06, "loss": 0.2696, "step": 78830 }, { "epoch": 1.604885496183206, "grad_norm": 8.162978556160024, "learning_rate": 8.584850365317832e-06, "loss": 0.2604, "step": 78840 }, { "epoch": 1.605089058524173, "grad_norm": 11.764930303231226, "learning_rate": 8.584354989205055e-06, "loss": 0.2514, "step": 78850 }, { "epoch": 1.60529262086514, "grad_norm": 10.64169583443859, "learning_rate": 8.583859540701612e-06, "loss": 0.1989, "step": 78860 }, { "epoch": 1.6054961832061068, "grad_norm": 8.846590808815858, "learning_rate": 8.583364019817511e-06, "loss": 0.2129, "step": 78870 }, { "epoch": 1.6056997455470738, "grad_norm": 12.83647855048999, "learning_rate": 8.582868426562756e-06, "loss": 0.2483, "step": 78880 }, { "epoch": 1.6059033078880407, "grad_norm": 10.443326100287816, "learning_rate": 8.58237276094736e-06, "loss": 0.3416, "step": 78890 }, { "epoch": 1.6061068702290076, "grad_norm": 4.943988206420569, "learning_rate": 8.58187702298133e-06, "loss": 0.3486, "step": 78900 }, { "epoch": 1.6063104325699746, "grad_norm": 4.597819590182094, "learning_rate": 8.581381212674683e-06, "loss": 0.2345, "step": 78910 }, { "epoch": 1.6065139949109415, "grad_norm": 4.188004725085161, "learning_rate": 8.580885330037428e-06, "loss": 0.2465, "step": 78920 }, { "epoch": 1.6067175572519083, "grad_norm": 1.8899403510685933, "learning_rate": 8.58038937507958e-06, "loss": 0.1923, "step": 78930 }, { "epoch": 1.6069211195928754, "grad_norm": 3.477565419149921, "learning_rate": 8.57989334781116e-06, "loss": 0.2284, "step": 78940 }, { "epoch": 1.6071246819338423, "grad_norm": 10.700343203365428, "learning_rate": 8.579397248242183e-06, "loss": 0.2673, "step": 78950 }, { "epoch": 1.6073282442748091, "grad_norm": 11.788434462702307, "learning_rate": 8.578901076382666e-06, "loss": 0.25, "step": 78960 }, { "epoch": 1.6075318066157762, "grad_norm": 11.985188227853035, "learning_rate": 8.578404832242634e-06, "loss": 0.2535, "step": 78970 }, { "epoch": 1.6077353689567428, "grad_norm": 8.88286345733264, "learning_rate": 8.577908515832108e-06, "loss": 0.1857, "step": 78980 }, { "epoch": 1.60793893129771, "grad_norm": 1.3538640144891612, "learning_rate": 8.577412127161109e-06, "loss": 0.1501, "step": 78990 }, { "epoch": 1.608142493638677, "grad_norm": 13.471967643141147, "learning_rate": 8.576915666239665e-06, "loss": 0.1814, "step": 79000 }, { "epoch": 1.6083460559796436, "grad_norm": 0.14123310748017234, "learning_rate": 8.576419133077802e-06, "loss": 0.2288, "step": 79010 }, { "epoch": 1.6085496183206107, "grad_norm": 5.257235790768104, "learning_rate": 8.57592252768555e-06, "loss": 0.3396, "step": 79020 }, { "epoch": 1.6087531806615776, "grad_norm": 11.294822158759116, "learning_rate": 8.575425850072936e-06, "loss": 0.3197, "step": 79030 }, { "epoch": 1.6089567430025444, "grad_norm": 13.10665200902347, "learning_rate": 8.57492910024999e-06, "loss": 0.1879, "step": 79040 }, { "epoch": 1.6091603053435115, "grad_norm": 0.6409668936365776, "learning_rate": 8.574432278226748e-06, "loss": 0.2639, "step": 79050 }, { "epoch": 1.6093638676844784, "grad_norm": 7.07457143511119, "learning_rate": 8.573935384013244e-06, "loss": 0.2403, "step": 79060 }, { "epoch": 1.6095674300254452, "grad_norm": 0.9232267775742463, "learning_rate": 8.573438417619509e-06, "loss": 0.221, "step": 79070 }, { "epoch": 1.6097709923664123, "grad_norm": 10.469222249739177, "learning_rate": 8.572941379055583e-06, "loss": 0.1398, "step": 79080 }, { "epoch": 1.6099745547073792, "grad_norm": 15.424866032944216, "learning_rate": 8.572444268331503e-06, "loss": 0.2073, "step": 79090 }, { "epoch": 1.610178117048346, "grad_norm": 11.175824881215172, "learning_rate": 8.571947085457312e-06, "loss": 0.106, "step": 79100 }, { "epoch": 1.610381679389313, "grad_norm": 15.450378370746428, "learning_rate": 8.571449830443048e-06, "loss": 0.2273, "step": 79110 }, { "epoch": 1.61058524173028, "grad_norm": 14.760902299783227, "learning_rate": 8.570952503298754e-06, "loss": 0.1635, "step": 79120 }, { "epoch": 1.6107888040712468, "grad_norm": 16.059275996153247, "learning_rate": 8.570455104034476e-06, "loss": 0.1814, "step": 79130 }, { "epoch": 1.6109923664122139, "grad_norm": 8.740527724679943, "learning_rate": 8.569957632660259e-06, "loss": 0.1682, "step": 79140 }, { "epoch": 1.6111959287531805, "grad_norm": 22.31154176575043, "learning_rate": 8.569460089186148e-06, "loss": 0.1822, "step": 79150 }, { "epoch": 1.6113994910941476, "grad_norm": 5.234307319783281, "learning_rate": 8.568962473622192e-06, "loss": 0.2206, "step": 79160 }, { "epoch": 1.6116030534351145, "grad_norm": 17.87870682558312, "learning_rate": 8.568464785978446e-06, "loss": 0.2177, "step": 79170 }, { "epoch": 1.6118066157760813, "grad_norm": 9.11080336549808, "learning_rate": 8.567967026264955e-06, "loss": 0.206, "step": 79180 }, { "epoch": 1.6120101781170484, "grad_norm": 11.440515037683255, "learning_rate": 8.567469194491772e-06, "loss": 0.1887, "step": 79190 }, { "epoch": 1.6122137404580152, "grad_norm": 11.624388779228946, "learning_rate": 8.566971290668958e-06, "loss": 0.2541, "step": 79200 }, { "epoch": 1.612417302798982, "grad_norm": 12.36030597549583, "learning_rate": 8.566473314806562e-06, "loss": 0.2158, "step": 79210 }, { "epoch": 1.6126208651399492, "grad_norm": 15.405241817251401, "learning_rate": 8.565975266914646e-06, "loss": 0.3126, "step": 79220 }, { "epoch": 1.612824427480916, "grad_norm": 18.99022278528813, "learning_rate": 8.565477147003264e-06, "loss": 0.227, "step": 79230 }, { "epoch": 1.613027989821883, "grad_norm": 11.46793274202601, "learning_rate": 8.56497895508248e-06, "loss": 0.138, "step": 79240 }, { "epoch": 1.61323155216285, "grad_norm": 12.522406232002439, "learning_rate": 8.564480691162355e-06, "loss": 0.2837, "step": 79250 }, { "epoch": 1.6134351145038168, "grad_norm": 0.8311069713217918, "learning_rate": 8.56398235525295e-06, "loss": 0.1562, "step": 79260 }, { "epoch": 1.6136386768447837, "grad_norm": 12.87037435055493, "learning_rate": 8.563483947364332e-06, "loss": 0.1898, "step": 79270 }, { "epoch": 1.6138422391857508, "grad_norm": 1.2120466474909946, "learning_rate": 8.562985467506567e-06, "loss": 0.2113, "step": 79280 }, { "epoch": 1.6140458015267174, "grad_norm": 16.72238789165602, "learning_rate": 8.56248691568972e-06, "loss": 0.2693, "step": 79290 }, { "epoch": 1.6142493638676845, "grad_norm": 15.425866155067865, "learning_rate": 8.561988291923862e-06, "loss": 0.1345, "step": 79300 }, { "epoch": 1.6144529262086516, "grad_norm": 9.516899619296764, "learning_rate": 8.56148959621906e-06, "loss": 0.3334, "step": 79310 }, { "epoch": 1.6146564885496182, "grad_norm": 2.2072121264140785, "learning_rate": 8.560990828585392e-06, "loss": 0.2416, "step": 79320 }, { "epoch": 1.6148600508905853, "grad_norm": 8.669180370195157, "learning_rate": 8.560491989032926e-06, "loss": 0.3427, "step": 79330 }, { "epoch": 1.6150636132315521, "grad_norm": 10.837068386837377, "learning_rate": 8.559993077571738e-06, "loss": 0.2318, "step": 79340 }, { "epoch": 1.615267175572519, "grad_norm": 7.499748910955212, "learning_rate": 8.559494094211906e-06, "loss": 0.2059, "step": 79350 }, { "epoch": 1.615470737913486, "grad_norm": 3.9039981100488865, "learning_rate": 8.558995038963504e-06, "loss": 0.189, "step": 79360 }, { "epoch": 1.615674300254453, "grad_norm": 3.5166666192295657, "learning_rate": 8.558495911836615e-06, "loss": 0.2032, "step": 79370 }, { "epoch": 1.6158778625954198, "grad_norm": 8.352279092035046, "learning_rate": 8.557996712841318e-06, "loss": 0.1868, "step": 79380 }, { "epoch": 1.6160814249363868, "grad_norm": 12.909292352121785, "learning_rate": 8.557497441987693e-06, "loss": 0.2237, "step": 79390 }, { "epoch": 1.6162849872773537, "grad_norm": 5.2842718270141225, "learning_rate": 8.556998099285827e-06, "loss": 0.185, "step": 79400 }, { "epoch": 1.6164885496183206, "grad_norm": 0.714462947017256, "learning_rate": 8.556498684745802e-06, "loss": 0.2087, "step": 79410 }, { "epoch": 1.6166921119592876, "grad_norm": 11.137426665538165, "learning_rate": 8.555999198377704e-06, "loss": 0.2075, "step": 79420 }, { "epoch": 1.6168956743002545, "grad_norm": 13.86774495779575, "learning_rate": 8.555499640191625e-06, "loss": 0.2004, "step": 79430 }, { "epoch": 1.6170992366412213, "grad_norm": 8.510995545195765, "learning_rate": 8.55500001019765e-06, "loss": 0.1993, "step": 79440 }, { "epoch": 1.6173027989821884, "grad_norm": 15.442791469767416, "learning_rate": 8.554500308405871e-06, "loss": 0.24, "step": 79450 }, { "epoch": 1.617506361323155, "grad_norm": 5.118051917204265, "learning_rate": 8.554000534826379e-06, "loss": 0.2535, "step": 79460 }, { "epoch": 1.6177099236641221, "grad_norm": 12.392884292288581, "learning_rate": 8.55350068946927e-06, "loss": 0.2429, "step": 79470 }, { "epoch": 1.6179134860050892, "grad_norm": 2.1055263109911326, "learning_rate": 8.553000772344637e-06, "loss": 0.1153, "step": 79480 }, { "epoch": 1.6181170483460559, "grad_norm": 6.7625299908823076, "learning_rate": 8.552500783462579e-06, "loss": 0.3127, "step": 79490 }, { "epoch": 1.618320610687023, "grad_norm": 10.34443741702215, "learning_rate": 8.55200072283319e-06, "loss": 0.3305, "step": 79500 }, { "epoch": 1.6185241730279898, "grad_norm": 12.242432600297676, "learning_rate": 8.551500590466572e-06, "loss": 0.2822, "step": 79510 }, { "epoch": 1.6187277353689566, "grad_norm": 10.618244479167316, "learning_rate": 8.551000386372825e-06, "loss": 0.2772, "step": 79520 }, { "epoch": 1.6189312977099237, "grad_norm": 20.857007168226207, "learning_rate": 8.550500110562052e-06, "loss": 0.1801, "step": 79530 }, { "epoch": 1.6191348600508906, "grad_norm": 12.795121100652846, "learning_rate": 8.549999763044357e-06, "loss": 0.176, "step": 79540 }, { "epoch": 1.6193384223918574, "grad_norm": 12.654123675650649, "learning_rate": 8.549499343829842e-06, "loss": 0.2287, "step": 79550 }, { "epoch": 1.6195419847328245, "grad_norm": 6.973601902702427, "learning_rate": 8.548998852928619e-06, "loss": 0.2387, "step": 79560 }, { "epoch": 1.6197455470737914, "grad_norm": 8.053979714656073, "learning_rate": 8.548498290350791e-06, "loss": 0.2274, "step": 79570 }, { "epoch": 1.6199491094147582, "grad_norm": 5.546823232080715, "learning_rate": 8.54799765610647e-06, "loss": 0.1703, "step": 79580 }, { "epoch": 1.6201526717557253, "grad_norm": 10.947521112081228, "learning_rate": 8.547496950205766e-06, "loss": 0.2851, "step": 79590 }, { "epoch": 1.6203562340966922, "grad_norm": 10.181542675382872, "learning_rate": 8.546996172658793e-06, "loss": 0.2507, "step": 79600 }, { "epoch": 1.620559796437659, "grad_norm": 7.593460882417039, "learning_rate": 8.546495323475664e-06, "loss": 0.1951, "step": 79610 }, { "epoch": 1.620763358778626, "grad_norm": 80.233363116135, "learning_rate": 8.545994402666493e-06, "loss": 0.2826, "step": 79620 }, { "epoch": 1.6209669211195927, "grad_norm": 4.779809135647029, "learning_rate": 8.5454934102414e-06, "loss": 0.2451, "step": 79630 }, { "epoch": 1.6211704834605598, "grad_norm": 8.10844100214874, "learning_rate": 8.544992346210499e-06, "loss": 0.1849, "step": 79640 }, { "epoch": 1.6213740458015267, "grad_norm": 2.5061078041471596, "learning_rate": 8.544491210583912e-06, "loss": 0.1925, "step": 79650 }, { "epoch": 1.6215776081424935, "grad_norm": 20.73110897980137, "learning_rate": 8.54399000337176e-06, "loss": 0.2955, "step": 79660 }, { "epoch": 1.6217811704834606, "grad_norm": 12.830193167899935, "learning_rate": 8.543488724584165e-06, "loss": 0.1957, "step": 79670 }, { "epoch": 1.6219847328244275, "grad_norm": 10.649584507826404, "learning_rate": 8.54298737423125e-06, "loss": 0.1802, "step": 79680 }, { "epoch": 1.6221882951653943, "grad_norm": 10.354355994398972, "learning_rate": 8.542485952323145e-06, "loss": 0.2533, "step": 79690 }, { "epoch": 1.6223918575063614, "grad_norm": 10.169216684099341, "learning_rate": 8.54198445886997e-06, "loss": 0.2034, "step": 79700 }, { "epoch": 1.6225954198473282, "grad_norm": 5.256107081419638, "learning_rate": 8.541482893881859e-06, "loss": 0.182, "step": 79710 }, { "epoch": 1.622798982188295, "grad_norm": 9.445918890852537, "learning_rate": 8.540981257368938e-06, "loss": 0.2038, "step": 79720 }, { "epoch": 1.6230025445292622, "grad_norm": 2.11302036483562, "learning_rate": 8.54047954934134e-06, "loss": 0.2632, "step": 79730 }, { "epoch": 1.623206106870229, "grad_norm": 20.90293714635394, "learning_rate": 8.5399777698092e-06, "loss": 0.2313, "step": 79740 }, { "epoch": 1.623409669211196, "grad_norm": 12.696886298540571, "learning_rate": 8.539475918782647e-06, "loss": 0.1865, "step": 79750 }, { "epoch": 1.623613231552163, "grad_norm": 3.635989910866244, "learning_rate": 8.53897399627182e-06, "loss": 0.2202, "step": 79760 }, { "epoch": 1.6238167938931296, "grad_norm": 8.327914011344665, "learning_rate": 8.538472002286853e-06, "loss": 0.2501, "step": 79770 }, { "epoch": 1.6240203562340967, "grad_norm": 4.811417462878067, "learning_rate": 8.537969936837888e-06, "loss": 0.1601, "step": 79780 }, { "epoch": 1.6242239185750638, "grad_norm": 7.653927751379996, "learning_rate": 8.537467799935065e-06, "loss": 0.304, "step": 79790 }, { "epoch": 1.6244274809160304, "grad_norm": 7.63241386445948, "learning_rate": 8.536965591588521e-06, "loss": 0.2101, "step": 79800 }, { "epoch": 1.6246310432569975, "grad_norm": 9.623479789155503, "learning_rate": 8.536463311808402e-06, "loss": 0.2197, "step": 79810 }, { "epoch": 1.6248346055979643, "grad_norm": 11.68552871557791, "learning_rate": 8.535960960604853e-06, "loss": 0.2507, "step": 79820 }, { "epoch": 1.6250381679389312, "grad_norm": 3.4248850603513805, "learning_rate": 8.535458537988014e-06, "loss": 0.2439, "step": 79830 }, { "epoch": 1.6252417302798983, "grad_norm": 8.912158965307146, "learning_rate": 8.534956043968039e-06, "loss": 0.1481, "step": 79840 }, { "epoch": 1.6254452926208651, "grad_norm": 8.158329631434961, "learning_rate": 8.534453478555074e-06, "loss": 0.1221, "step": 79850 }, { "epoch": 1.625648854961832, "grad_norm": 15.80098995076071, "learning_rate": 8.533950841759269e-06, "loss": 0.2715, "step": 79860 }, { "epoch": 1.625852417302799, "grad_norm": 6.112013145506114, "learning_rate": 8.533448133590774e-06, "loss": 0.2355, "step": 79870 }, { "epoch": 1.626055979643766, "grad_norm": 10.431203465973551, "learning_rate": 8.532945354059742e-06, "loss": 0.2516, "step": 79880 }, { "epoch": 1.6262595419847328, "grad_norm": 9.40781272419196, "learning_rate": 8.53244250317633e-06, "loss": 0.2288, "step": 79890 }, { "epoch": 1.6264631043256998, "grad_norm": 4.298373016946657, "learning_rate": 8.531939580950692e-06, "loss": 0.2215, "step": 79900 }, { "epoch": 1.6266666666666667, "grad_norm": 7.799794595340477, "learning_rate": 8.531436587392984e-06, "loss": 0.1629, "step": 79910 }, { "epoch": 1.6268702290076336, "grad_norm": 16.231335951292742, "learning_rate": 8.530933522513365e-06, "loss": 0.2429, "step": 79920 }, { "epoch": 1.6270737913486006, "grad_norm": 2.0604816271207818, "learning_rate": 8.530430386321997e-06, "loss": 0.211, "step": 79930 }, { "epoch": 1.6272773536895673, "grad_norm": 8.131568919137392, "learning_rate": 8.52992717882904e-06, "loss": 0.2662, "step": 79940 }, { "epoch": 1.6274809160305344, "grad_norm": 9.705510821064244, "learning_rate": 8.529423900044658e-06, "loss": 0.2331, "step": 79950 }, { "epoch": 1.6276844783715014, "grad_norm": 27.845940150000228, "learning_rate": 8.528920549979011e-06, "loss": 0.2129, "step": 79960 }, { "epoch": 1.627888040712468, "grad_norm": 7.9371785753188275, "learning_rate": 8.528417128642273e-06, "loss": 0.2235, "step": 79970 }, { "epoch": 1.6280916030534351, "grad_norm": 14.659921450398878, "learning_rate": 8.527913636044603e-06, "loss": 0.2257, "step": 79980 }, { "epoch": 1.628295165394402, "grad_norm": 4.675424528252691, "learning_rate": 8.527410072196175e-06, "loss": 0.1873, "step": 79990 }, { "epoch": 1.6284987277353689, "grad_norm": 14.556697350927598, "learning_rate": 8.526906437107156e-06, "loss": 0.2186, "step": 80000 }, { "epoch": 1.628702290076336, "grad_norm": 3.202557731563333, "learning_rate": 8.52640273078772e-06, "loss": 0.195, "step": 80010 }, { "epoch": 1.6289058524173028, "grad_norm": 6.16987673761835, "learning_rate": 8.525898953248038e-06, "loss": 0.195, "step": 80020 }, { "epoch": 1.6291094147582696, "grad_norm": 0.6952954940082258, "learning_rate": 8.525395104498286e-06, "loss": 0.2022, "step": 80030 }, { "epoch": 1.6293129770992367, "grad_norm": 4.941306132093254, "learning_rate": 8.524891184548639e-06, "loss": 0.1924, "step": 80040 }, { "epoch": 1.6295165394402036, "grad_norm": 7.000507667695199, "learning_rate": 8.524387193409274e-06, "loss": 0.296, "step": 80050 }, { "epoch": 1.6297201017811704, "grad_norm": 6.614316568624944, "learning_rate": 8.523883131090371e-06, "loss": 0.2096, "step": 80060 }, { "epoch": 1.6299236641221375, "grad_norm": 7.64200925175578, "learning_rate": 8.52337899760211e-06, "loss": 0.2361, "step": 80070 }, { "epoch": 1.6301272264631044, "grad_norm": 8.530251423323211, "learning_rate": 8.522874792954669e-06, "loss": 0.2046, "step": 80080 }, { "epoch": 1.6303307888040712, "grad_norm": 0.25034486457396754, "learning_rate": 8.522370517158239e-06, "loss": 0.2171, "step": 80090 }, { "epoch": 1.6305343511450383, "grad_norm": 5.462525498482428, "learning_rate": 8.521866170222996e-06, "loss": 0.1764, "step": 80100 }, { "epoch": 1.630737913486005, "grad_norm": 8.099873756412075, "learning_rate": 8.52136175215913e-06, "loss": 0.2859, "step": 80110 }, { "epoch": 1.630941475826972, "grad_norm": 4.514663622027192, "learning_rate": 8.520857262976829e-06, "loss": 0.1595, "step": 80120 }, { "epoch": 1.6311450381679389, "grad_norm": 4.475331342255261, "learning_rate": 8.520352702686281e-06, "loss": 0.2768, "step": 80130 }, { "epoch": 1.6313486005089057, "grad_norm": 6.351161281339332, "learning_rate": 8.519848071297674e-06, "loss": 0.2477, "step": 80140 }, { "epoch": 1.6315521628498728, "grad_norm": 13.101396837782634, "learning_rate": 8.519343368821204e-06, "loss": 0.2314, "step": 80150 }, { "epoch": 1.6317557251908397, "grad_norm": 5.455952388917726, "learning_rate": 8.51883859526706e-06, "loss": 0.1672, "step": 80160 }, { "epoch": 1.6319592875318065, "grad_norm": 15.632829127440763, "learning_rate": 8.51833375064544e-06, "loss": 0.239, "step": 80170 }, { "epoch": 1.6321628498727736, "grad_norm": 1.005365283330098, "learning_rate": 8.517828834966538e-06, "loss": 0.1405, "step": 80180 }, { "epoch": 1.6323664122137405, "grad_norm": 10.330657626604973, "learning_rate": 8.517323848240549e-06, "loss": 0.2699, "step": 80190 }, { "epoch": 1.6325699745547073, "grad_norm": 8.922441803628976, "learning_rate": 8.516818790477676e-06, "loss": 0.1856, "step": 80200 }, { "epoch": 1.6327735368956744, "grad_norm": 16.47919803959774, "learning_rate": 8.51631366168812e-06, "loss": 0.1643, "step": 80210 }, { "epoch": 1.6329770992366412, "grad_norm": 16.576416894555827, "learning_rate": 8.51580846188208e-06, "loss": 0.1548, "step": 80220 }, { "epoch": 1.633180661577608, "grad_norm": 2.8774212877173486, "learning_rate": 8.515303191069758e-06, "loss": 0.1922, "step": 80230 }, { "epoch": 1.6333842239185752, "grad_norm": 14.534469836305972, "learning_rate": 8.51479784926136e-06, "loss": 0.2785, "step": 80240 }, { "epoch": 1.6335877862595418, "grad_norm": 6.256737463916137, "learning_rate": 8.514292436467095e-06, "loss": 0.1451, "step": 80250 }, { "epoch": 1.633791348600509, "grad_norm": 4.6453119086288694, "learning_rate": 8.513786952697168e-06, "loss": 0.2999, "step": 80260 }, { "epoch": 1.633994910941476, "grad_norm": 13.671505240666011, "learning_rate": 8.513281397961786e-06, "loss": 0.2982, "step": 80270 }, { "epoch": 1.6341984732824426, "grad_norm": 0.5987891281734214, "learning_rate": 8.51277577227116e-06, "loss": 0.218, "step": 80280 }, { "epoch": 1.6344020356234097, "grad_norm": 8.260467846377598, "learning_rate": 8.512270075635505e-06, "loss": 0.3062, "step": 80290 }, { "epoch": 1.6346055979643765, "grad_norm": 6.327893684585352, "learning_rate": 8.511764308065031e-06, "loss": 0.1937, "step": 80300 }, { "epoch": 1.6348091603053434, "grad_norm": 6.192265413872456, "learning_rate": 8.511258469569954e-06, "loss": 0.2283, "step": 80310 }, { "epoch": 1.6350127226463105, "grad_norm": 9.332938877683688, "learning_rate": 8.510752560160491e-06, "loss": 0.2072, "step": 80320 }, { "epoch": 1.6352162849872773, "grad_norm": 7.258802306893391, "learning_rate": 8.510246579846858e-06, "loss": 0.1931, "step": 80330 }, { "epoch": 1.6354198473282442, "grad_norm": 12.547223572800101, "learning_rate": 8.509740528639272e-06, "loss": 0.2326, "step": 80340 }, { "epoch": 1.6356234096692113, "grad_norm": 8.355837729612599, "learning_rate": 8.509234406547957e-06, "loss": 0.1456, "step": 80350 }, { "epoch": 1.6358269720101781, "grad_norm": 4.57106820598923, "learning_rate": 8.508728213583132e-06, "loss": 0.1128, "step": 80360 }, { "epoch": 1.636030534351145, "grad_norm": 0.9252393441193126, "learning_rate": 8.508221949755021e-06, "loss": 0.2295, "step": 80370 }, { "epoch": 1.636234096692112, "grad_norm": 4.8325015075049365, "learning_rate": 8.507715615073853e-06, "loss": 0.2258, "step": 80380 }, { "epoch": 1.636437659033079, "grad_norm": 9.83125688039953, "learning_rate": 8.507209209549848e-06, "loss": 0.2444, "step": 80390 }, { "epoch": 1.6366412213740458, "grad_norm": 11.349399790294049, "learning_rate": 8.506702733193237e-06, "loss": 0.2583, "step": 80400 }, { "epoch": 1.6368447837150129, "grad_norm": 3.624388125392308, "learning_rate": 8.506196186014245e-06, "loss": 0.2279, "step": 80410 }, { "epoch": 1.6370483460559795, "grad_norm": 6.168944612802385, "learning_rate": 8.505689568023107e-06, "loss": 0.2788, "step": 80420 }, { "epoch": 1.6372519083969466, "grad_norm": 7.620609775055823, "learning_rate": 8.505182879230052e-06, "loss": 0.2228, "step": 80430 }, { "epoch": 1.6374554707379136, "grad_norm": 9.028939827598546, "learning_rate": 8.504676119645314e-06, "loss": 0.2205, "step": 80440 }, { "epoch": 1.6376590330788803, "grad_norm": 9.891678243701291, "learning_rate": 8.50416928927913e-06, "loss": 0.2135, "step": 80450 }, { "epoch": 1.6378625954198474, "grad_norm": 3.5581671979961538, "learning_rate": 8.50366238814173e-06, "loss": 0.1853, "step": 80460 }, { "epoch": 1.6380661577608142, "grad_norm": 8.276359134579476, "learning_rate": 8.503155416243358e-06, "loss": 0.3428, "step": 80470 }, { "epoch": 1.638269720101781, "grad_norm": 3.7563144583335646, "learning_rate": 8.502648373594251e-06, "loss": 0.196, "step": 80480 }, { "epoch": 1.6384732824427481, "grad_norm": 3.268592567410529, "learning_rate": 8.502141260204647e-06, "loss": 0.2212, "step": 80490 }, { "epoch": 1.638676844783715, "grad_norm": 15.411253236844276, "learning_rate": 8.50163407608479e-06, "loss": 0.2592, "step": 80500 }, { "epoch": 1.6388804071246819, "grad_norm": 8.46293502968331, "learning_rate": 8.501126821244923e-06, "loss": 0.2354, "step": 80510 }, { "epoch": 1.639083969465649, "grad_norm": 6.99787525707776, "learning_rate": 8.50061949569529e-06, "loss": 0.1848, "step": 80520 }, { "epoch": 1.6392875318066158, "grad_norm": 13.158817519433375, "learning_rate": 8.500112099446138e-06, "loss": 0.0731, "step": 80530 }, { "epoch": 1.6394910941475827, "grad_norm": 4.712226289530765, "learning_rate": 8.499604632507715e-06, "loss": 0.1527, "step": 80540 }, { "epoch": 1.6396946564885497, "grad_norm": 4.742619686711604, "learning_rate": 8.499097094890267e-06, "loss": 0.2764, "step": 80550 }, { "epoch": 1.6398982188295166, "grad_norm": 12.390477427418206, "learning_rate": 8.498589486604047e-06, "loss": 0.2311, "step": 80560 }, { "epoch": 1.6401017811704834, "grad_norm": 14.730692043480241, "learning_rate": 8.498081807659307e-06, "loss": 0.3093, "step": 80570 }, { "epoch": 1.6403053435114505, "grad_norm": 10.709591957693313, "learning_rate": 8.497574058066298e-06, "loss": 0.1697, "step": 80580 }, { "epoch": 1.6405089058524172, "grad_norm": 10.154251246149977, "learning_rate": 8.497066237835278e-06, "loss": 0.2093, "step": 80590 }, { "epoch": 1.6407124681933842, "grad_norm": 4.467395136880562, "learning_rate": 8.4965583469765e-06, "loss": 0.1843, "step": 80600 }, { "epoch": 1.6409160305343513, "grad_norm": 1.0742296635253221, "learning_rate": 8.496050385500224e-06, "loss": 0.1382, "step": 80610 }, { "epoch": 1.641119592875318, "grad_norm": 14.031319932938247, "learning_rate": 8.495542353416705e-06, "loss": 0.1833, "step": 80620 }, { "epoch": 1.641323155216285, "grad_norm": 3.2227761773010304, "learning_rate": 8.495034250736207e-06, "loss": 0.2507, "step": 80630 }, { "epoch": 1.6415267175572519, "grad_norm": 4.452971084248534, "learning_rate": 8.49452607746899e-06, "loss": 0.1901, "step": 80640 }, { "epoch": 1.6417302798982187, "grad_norm": 8.019356694758477, "learning_rate": 8.49401783362532e-06, "loss": 0.2393, "step": 80650 }, { "epoch": 1.6419338422391858, "grad_norm": 13.260466351850917, "learning_rate": 8.493509519215458e-06, "loss": 0.2266, "step": 80660 }, { "epoch": 1.6421374045801527, "grad_norm": 7.340727707700534, "learning_rate": 8.493001134249671e-06, "loss": 0.2985, "step": 80670 }, { "epoch": 1.6423409669211195, "grad_norm": 6.809165639077822, "learning_rate": 8.49249267873823e-06, "loss": 0.2432, "step": 80680 }, { "epoch": 1.6425445292620866, "grad_norm": 3.200242164962684, "learning_rate": 8.4919841526914e-06, "loss": 0.2788, "step": 80690 }, { "epoch": 1.6427480916030535, "grad_norm": 14.531561744051809, "learning_rate": 8.491475556119451e-06, "loss": 0.2469, "step": 80700 }, { "epoch": 1.6429516539440203, "grad_norm": 14.48775229276779, "learning_rate": 8.490966889032657e-06, "loss": 0.2032, "step": 80710 }, { "epoch": 1.6431552162849874, "grad_norm": 4.485785799158179, "learning_rate": 8.49045815144129e-06, "loss": 0.2292, "step": 80720 }, { "epoch": 1.6433587786259543, "grad_norm": 3.2288830107992523, "learning_rate": 8.489949343355626e-06, "loss": 0.2445, "step": 80730 }, { "epoch": 1.643562340966921, "grad_norm": 9.201985175107676, "learning_rate": 8.489440464785938e-06, "loss": 0.2484, "step": 80740 }, { "epoch": 1.6437659033078882, "grad_norm": 9.311177992482033, "learning_rate": 8.488931515742506e-06, "loss": 0.1952, "step": 80750 }, { "epoch": 1.6439694656488548, "grad_norm": 5.519414034998048, "learning_rate": 8.48842249623561e-06, "loss": 0.2715, "step": 80760 }, { "epoch": 1.644173027989822, "grad_norm": 7.595180528571372, "learning_rate": 8.487913406275527e-06, "loss": 0.2396, "step": 80770 }, { "epoch": 1.6443765903307888, "grad_norm": 12.876273129071356, "learning_rate": 8.487404245872542e-06, "loss": 0.2428, "step": 80780 }, { "epoch": 1.6445801526717556, "grad_norm": 3.5240456367021338, "learning_rate": 8.486895015036933e-06, "loss": 0.1714, "step": 80790 }, { "epoch": 1.6447837150127227, "grad_norm": 6.431972221426287, "learning_rate": 8.486385713778992e-06, "loss": 0.2152, "step": 80800 }, { "epoch": 1.6449872773536895, "grad_norm": 11.101985397812687, "learning_rate": 8.485876342109e-06, "loss": 0.2285, "step": 80810 }, { "epoch": 1.6451908396946564, "grad_norm": 10.553606018998071, "learning_rate": 8.485366900037244e-06, "loss": 0.2738, "step": 80820 }, { "epoch": 1.6453944020356235, "grad_norm": 0.2663590185925298, "learning_rate": 8.484857387574016e-06, "loss": 0.1915, "step": 80830 }, { "epoch": 1.6455979643765903, "grad_norm": 11.895615685490927, "learning_rate": 8.484347804729604e-06, "loss": 0.3499, "step": 80840 }, { "epoch": 1.6458015267175572, "grad_norm": 14.616264618643601, "learning_rate": 8.483838151514301e-06, "loss": 0.1755, "step": 80850 }, { "epoch": 1.6460050890585243, "grad_norm": 5.40755612876815, "learning_rate": 8.483328427938398e-06, "loss": 0.2257, "step": 80860 }, { "epoch": 1.6462086513994911, "grad_norm": 10.81983548204773, "learning_rate": 8.482818634012191e-06, "loss": 0.1799, "step": 80870 }, { "epoch": 1.646412213740458, "grad_norm": 3.161557680840244, "learning_rate": 8.482308769745977e-06, "loss": 0.1774, "step": 80880 }, { "epoch": 1.646615776081425, "grad_norm": 4.833511890349762, "learning_rate": 8.481798835150053e-06, "loss": 0.2487, "step": 80890 }, { "epoch": 1.6468193384223917, "grad_norm": 25.736841603643946, "learning_rate": 8.481288830234715e-06, "loss": 0.2292, "step": 80900 }, { "epoch": 1.6470229007633588, "grad_norm": 7.595944483080636, "learning_rate": 8.480778755010265e-06, "loss": 0.2633, "step": 80910 }, { "epoch": 1.6472264631043259, "grad_norm": 5.046956594649595, "learning_rate": 8.480268609487006e-06, "loss": 0.2181, "step": 80920 }, { "epoch": 1.6474300254452925, "grad_norm": 3.6962791000697885, "learning_rate": 8.479758393675238e-06, "loss": 0.2659, "step": 80930 }, { "epoch": 1.6476335877862596, "grad_norm": 4.602897941284909, "learning_rate": 8.479248107585271e-06, "loss": 0.2057, "step": 80940 }, { "epoch": 1.6478371501272264, "grad_norm": 6.326654665354438, "learning_rate": 8.478737751227404e-06, "loss": 0.1681, "step": 80950 }, { "epoch": 1.6480407124681933, "grad_norm": 13.69367914535873, "learning_rate": 8.47822732461195e-06, "loss": 0.158, "step": 80960 }, { "epoch": 1.6482442748091604, "grad_norm": 5.699559039993561, "learning_rate": 8.477716827749215e-06, "loss": 0.251, "step": 80970 }, { "epoch": 1.6484478371501272, "grad_norm": 0.6556491476867666, "learning_rate": 8.477206260649506e-06, "loss": 0.2982, "step": 80980 }, { "epoch": 1.648651399491094, "grad_norm": 10.546566651857308, "learning_rate": 8.476695623323141e-06, "loss": 0.2792, "step": 80990 }, { "epoch": 1.6488549618320612, "grad_norm": 2.7954176590584483, "learning_rate": 8.476184915780428e-06, "loss": 0.2818, "step": 81000 }, { "epoch": 1.649058524173028, "grad_norm": 1.4466785878730668, "learning_rate": 8.475674138031686e-06, "loss": 0.1953, "step": 81010 }, { "epoch": 1.6492620865139949, "grad_norm": 12.191209874407068, "learning_rate": 8.475163290087228e-06, "loss": 0.1883, "step": 81020 }, { "epoch": 1.649465648854962, "grad_norm": 14.232688866796307, "learning_rate": 8.474652371957369e-06, "loss": 0.146, "step": 81030 }, { "epoch": 1.6496692111959288, "grad_norm": 6.971317895768414, "learning_rate": 8.47414138365243e-06, "loss": 0.2231, "step": 81040 }, { "epoch": 1.6498727735368957, "grad_norm": 22.636627253584184, "learning_rate": 8.473630325182733e-06, "loss": 0.2334, "step": 81050 }, { "epoch": 1.6500763358778627, "grad_norm": 10.029476476320035, "learning_rate": 8.473119196558597e-06, "loss": 0.2428, "step": 81060 }, { "epoch": 1.6502798982188294, "grad_norm": 5.041767604361208, "learning_rate": 8.472607997790345e-06, "loss": 0.2769, "step": 81070 }, { "epoch": 1.6504834605597964, "grad_norm": 7.3692319842950145, "learning_rate": 8.472096728888301e-06, "loss": 0.2337, "step": 81080 }, { "epoch": 1.6506870229007635, "grad_norm": 4.601503227509084, "learning_rate": 8.471585389862793e-06, "loss": 0.2124, "step": 81090 }, { "epoch": 1.6508905852417302, "grad_norm": 3.317485425377049, "learning_rate": 8.471073980724147e-06, "loss": 0.1914, "step": 81100 }, { "epoch": 1.6510941475826972, "grad_norm": 8.534455837226961, "learning_rate": 8.470562501482691e-06, "loss": 0.2524, "step": 81110 }, { "epoch": 1.651297709923664, "grad_norm": 12.468572538179174, "learning_rate": 8.470050952148754e-06, "loss": 0.2328, "step": 81120 }, { "epoch": 1.651501272264631, "grad_norm": 0.1856258353441448, "learning_rate": 8.46953933273267e-06, "loss": 0.239, "step": 81130 }, { "epoch": 1.651704834605598, "grad_norm": 7.792961976077194, "learning_rate": 8.469027643244769e-06, "loss": 0.235, "step": 81140 }, { "epoch": 1.6519083969465649, "grad_norm": 7.257117466886199, "learning_rate": 8.468515883695387e-06, "loss": 0.1613, "step": 81150 }, { "epoch": 1.6521119592875317, "grad_norm": 6.162019027313955, "learning_rate": 8.46800405409486e-06, "loss": 0.1726, "step": 81160 }, { "epoch": 1.6523155216284988, "grad_norm": 24.13994255446194, "learning_rate": 8.467492154453525e-06, "loss": 0.3487, "step": 81170 }, { "epoch": 1.6525190839694657, "grad_norm": 4.134403724126616, "learning_rate": 8.466980184781719e-06, "loss": 0.2117, "step": 81180 }, { "epoch": 1.6527226463104325, "grad_norm": 8.407613920701323, "learning_rate": 8.466468145089783e-06, "loss": 0.2392, "step": 81190 }, { "epoch": 1.6529262086513996, "grad_norm": 7.633929920168507, "learning_rate": 8.465956035388057e-06, "loss": 0.2057, "step": 81200 }, { "epoch": 1.6531297709923665, "grad_norm": 2.5834225295755093, "learning_rate": 8.465443855686886e-06, "loss": 0.1641, "step": 81210 }, { "epoch": 1.6533333333333333, "grad_norm": 10.61193641808924, "learning_rate": 8.464931605996614e-06, "loss": 0.2317, "step": 81220 }, { "epoch": 1.6535368956743004, "grad_norm": 15.330112494388903, "learning_rate": 8.464419286327584e-06, "loss": 0.2186, "step": 81230 }, { "epoch": 1.653740458015267, "grad_norm": 3.4500928715986916, "learning_rate": 8.463906896690142e-06, "loss": 0.1233, "step": 81240 }, { "epoch": 1.6539440203562341, "grad_norm": 9.600436469301078, "learning_rate": 8.46339443709464e-06, "loss": 0.1442, "step": 81250 }, { "epoch": 1.654147582697201, "grad_norm": 12.35754356427165, "learning_rate": 8.46288190755143e-06, "loss": 0.2205, "step": 81260 }, { "epoch": 1.6543511450381678, "grad_norm": 6.395776495156954, "learning_rate": 8.462369308070857e-06, "loss": 0.2343, "step": 81270 }, { "epoch": 1.654554707379135, "grad_norm": 17.849532491849654, "learning_rate": 8.461856638663275e-06, "loss": 0.3131, "step": 81280 }, { "epoch": 1.6547582697201018, "grad_norm": 4.554818034684907, "learning_rate": 8.461343899339042e-06, "loss": 0.2361, "step": 81290 }, { "epoch": 1.6549618320610686, "grad_norm": 22.070183147945034, "learning_rate": 8.46083109010851e-06, "loss": 0.1683, "step": 81300 }, { "epoch": 1.6551653944020357, "grad_norm": 2.2400410011199363, "learning_rate": 8.460318210982036e-06, "loss": 0.3222, "step": 81310 }, { "epoch": 1.6553689567430026, "grad_norm": 0.21116617038524726, "learning_rate": 8.45980526196998e-06, "loss": 0.2016, "step": 81320 }, { "epoch": 1.6555725190839694, "grad_norm": 19.709018880284848, "learning_rate": 8.459292243082698e-06, "loss": 0.1359, "step": 81330 }, { "epoch": 1.6557760814249365, "grad_norm": 15.410451864946651, "learning_rate": 8.458779154330557e-06, "loss": 0.2492, "step": 81340 }, { "epoch": 1.6559796437659033, "grad_norm": 5.978569211060592, "learning_rate": 8.458265995723914e-06, "loss": 0.1464, "step": 81350 }, { "epoch": 1.6561832061068702, "grad_norm": 10.609721357088764, "learning_rate": 8.457752767273136e-06, "loss": 0.311, "step": 81360 }, { "epoch": 1.6563867684478373, "grad_norm": 12.393612928765736, "learning_rate": 8.457239468988585e-06, "loss": 0.2035, "step": 81370 }, { "epoch": 1.656590330788804, "grad_norm": 14.245429812138838, "learning_rate": 8.456726100880632e-06, "loss": 0.2137, "step": 81380 }, { "epoch": 1.656793893129771, "grad_norm": 18.227565852029265, "learning_rate": 8.456212662959643e-06, "loss": 0.2846, "step": 81390 }, { "epoch": 1.656997455470738, "grad_norm": 1.4399204635114817, "learning_rate": 8.455699155235987e-06, "loss": 0.1292, "step": 81400 }, { "epoch": 1.6572010178117047, "grad_norm": 9.041416426704268, "learning_rate": 8.455185577720037e-06, "loss": 0.2035, "step": 81410 }, { "epoch": 1.6574045801526718, "grad_norm": 4.47124415384888, "learning_rate": 8.454671930422163e-06, "loss": 0.2075, "step": 81420 }, { "epoch": 1.6576081424936386, "grad_norm": 7.357037544830513, "learning_rate": 8.45415821335274e-06, "loss": 0.2228, "step": 81430 }, { "epoch": 1.6578117048346055, "grad_norm": 6.835739215589111, "learning_rate": 8.453644426522143e-06, "loss": 0.2347, "step": 81440 }, { "epoch": 1.6580152671755726, "grad_norm": 3.238780797631313, "learning_rate": 8.453130569940748e-06, "loss": 0.1496, "step": 81450 }, { "epoch": 1.6582188295165394, "grad_norm": 5.733934524959576, "learning_rate": 8.452616643618935e-06, "loss": 0.2414, "step": 81460 }, { "epoch": 1.6584223918575063, "grad_norm": 22.611119700184933, "learning_rate": 8.452102647567082e-06, "loss": 0.2198, "step": 81470 }, { "epoch": 1.6586259541984734, "grad_norm": 16.949225828997577, "learning_rate": 8.451588581795567e-06, "loss": 0.285, "step": 81480 }, { "epoch": 1.6588295165394402, "grad_norm": 1.8259508520166292, "learning_rate": 8.451074446314777e-06, "loss": 0.144, "step": 81490 }, { "epoch": 1.659033078880407, "grad_norm": 12.692697008954442, "learning_rate": 8.450560241135095e-06, "loss": 0.3027, "step": 81500 }, { "epoch": 1.6592366412213742, "grad_norm": 9.73722671096951, "learning_rate": 8.450045966266902e-06, "loss": 0.2592, "step": 81510 }, { "epoch": 1.659440203562341, "grad_norm": 9.453588391379116, "learning_rate": 8.449531621720588e-06, "loss": 0.2062, "step": 81520 }, { "epoch": 1.6596437659033079, "grad_norm": 5.310643469628514, "learning_rate": 8.44901720750654e-06, "loss": 0.1314, "step": 81530 }, { "epoch": 1.659847328244275, "grad_norm": 17.998158826364676, "learning_rate": 8.448502723635149e-06, "loss": 0.2974, "step": 81540 }, { "epoch": 1.6600508905852416, "grad_norm": 5.687548502398543, "learning_rate": 8.447988170116801e-06, "loss": 0.3325, "step": 81550 }, { "epoch": 1.6602544529262087, "grad_norm": 10.7576544692723, "learning_rate": 8.447473546961891e-06, "loss": 0.154, "step": 81560 }, { "epoch": 1.6604580152671757, "grad_norm": 12.740862171153772, "learning_rate": 8.446958854180815e-06, "loss": 0.2769, "step": 81570 }, { "epoch": 1.6606615776081424, "grad_norm": 1.3834388492274832, "learning_rate": 8.446444091783963e-06, "loss": 0.2289, "step": 81580 }, { "epoch": 1.6608651399491094, "grad_norm": 9.866411207918814, "learning_rate": 8.445929259781735e-06, "loss": 0.2156, "step": 81590 }, { "epoch": 1.6610687022900763, "grad_norm": 9.295693959102106, "learning_rate": 8.445414358184526e-06, "loss": 0.2232, "step": 81600 }, { "epoch": 1.6612722646310432, "grad_norm": 7.522077821326332, "learning_rate": 8.444899387002737e-06, "loss": 0.2223, "step": 81610 }, { "epoch": 1.6614758269720102, "grad_norm": 8.400678546628937, "learning_rate": 8.444384346246768e-06, "loss": 0.2593, "step": 81620 }, { "epoch": 1.661679389312977, "grad_norm": 13.072145018581011, "learning_rate": 8.44386923592702e-06, "loss": 0.3034, "step": 81630 }, { "epoch": 1.661882951653944, "grad_norm": 4.445531617768826, "learning_rate": 8.443354056053898e-06, "loss": 0.2726, "step": 81640 }, { "epoch": 1.662086513994911, "grad_norm": 7.700298130843525, "learning_rate": 8.442838806637806e-06, "loss": 0.2347, "step": 81650 }, { "epoch": 1.6622900763358779, "grad_norm": 6.785727329244319, "learning_rate": 8.442323487689148e-06, "loss": 0.1482, "step": 81660 }, { "epoch": 1.6624936386768447, "grad_norm": 11.648128412118536, "learning_rate": 8.441808099218336e-06, "loss": 0.1772, "step": 81670 }, { "epoch": 1.6626972010178118, "grad_norm": 5.8016748977609955, "learning_rate": 8.441292641235776e-06, "loss": 0.2096, "step": 81680 }, { "epoch": 1.6629007633587787, "grad_norm": 11.973491531282834, "learning_rate": 8.440777113751877e-06, "loss": 0.2612, "step": 81690 }, { "epoch": 1.6631043256997455, "grad_norm": 7.5130061137463455, "learning_rate": 8.440261516777055e-06, "loss": 0.249, "step": 81700 }, { "epoch": 1.6633078880407126, "grad_norm": 13.208951366730663, "learning_rate": 8.439745850321718e-06, "loss": 0.2968, "step": 81710 }, { "epoch": 1.6635114503816792, "grad_norm": 6.527893391228037, "learning_rate": 8.439230114396286e-06, "loss": 0.2232, "step": 81720 }, { "epoch": 1.6637150127226463, "grad_norm": 9.294649176525596, "learning_rate": 8.43871430901117e-06, "loss": 0.2321, "step": 81730 }, { "epoch": 1.6639185750636132, "grad_norm": 5.7180490988458015, "learning_rate": 8.43819843417679e-06, "loss": 0.1941, "step": 81740 }, { "epoch": 1.66412213740458, "grad_norm": 8.041178476468207, "learning_rate": 8.437682489903565e-06, "loss": 0.2807, "step": 81750 }, { "epoch": 1.6643256997455471, "grad_norm": 6.6959011180107195, "learning_rate": 8.437166476201914e-06, "loss": 0.252, "step": 81760 }, { "epoch": 1.664529262086514, "grad_norm": 8.755676939848472, "learning_rate": 8.436650393082258e-06, "loss": 0.1764, "step": 81770 }, { "epoch": 1.6647328244274808, "grad_norm": 23.832869633810912, "learning_rate": 8.436134240555023e-06, "loss": 0.2653, "step": 81780 }, { "epoch": 1.664936386768448, "grad_norm": 5.298724721333376, "learning_rate": 8.435618018630629e-06, "loss": 0.3062, "step": 81790 }, { "epoch": 1.6651399491094148, "grad_norm": 8.867662108891519, "learning_rate": 8.435101727319505e-06, "loss": 0.2314, "step": 81800 }, { "epoch": 1.6653435114503816, "grad_norm": 20.06523992654037, "learning_rate": 8.434585366632078e-06, "loss": 0.2814, "step": 81810 }, { "epoch": 1.6655470737913487, "grad_norm": 5.767776194289408, "learning_rate": 8.434068936578775e-06, "loss": 0.1723, "step": 81820 }, { "epoch": 1.6657506361323156, "grad_norm": 10.967119781284143, "learning_rate": 8.433552437170027e-06, "loss": 0.3567, "step": 81830 }, { "epoch": 1.6659541984732824, "grad_norm": 5.9037421539766, "learning_rate": 8.433035868416266e-06, "loss": 0.232, "step": 81840 }, { "epoch": 1.6661577608142495, "grad_norm": 5.428781515601145, "learning_rate": 8.432519230327923e-06, "loss": 0.1428, "step": 81850 }, { "epoch": 1.6663613231552161, "grad_norm": 8.313171999692477, "learning_rate": 8.432002522915433e-06, "loss": 0.1769, "step": 81860 }, { "epoch": 1.6665648854961832, "grad_norm": 8.92155234504408, "learning_rate": 8.431485746189232e-06, "loss": 0.2491, "step": 81870 }, { "epoch": 1.6667684478371503, "grad_norm": 7.366159237883369, "learning_rate": 8.430968900159757e-06, "loss": 0.1765, "step": 81880 }, { "epoch": 1.666972010178117, "grad_norm": 6.203126547758309, "learning_rate": 8.430451984837445e-06, "loss": 0.1989, "step": 81890 }, { "epoch": 1.667175572519084, "grad_norm": 7.89577540567389, "learning_rate": 8.429935000232737e-06, "loss": 0.2952, "step": 81900 }, { "epoch": 1.6673791348600508, "grad_norm": 10.859144654699323, "learning_rate": 8.429417946356074e-06, "loss": 0.2614, "step": 81910 }, { "epoch": 1.6675826972010177, "grad_norm": 1.2997797595297638, "learning_rate": 8.428900823217898e-06, "loss": 0.1665, "step": 81920 }, { "epoch": 1.6677862595419848, "grad_norm": 11.69816065000028, "learning_rate": 8.428383630828655e-06, "loss": 0.1216, "step": 81930 }, { "epoch": 1.6679898218829516, "grad_norm": 10.61979402393557, "learning_rate": 8.427866369198786e-06, "loss": 0.1975, "step": 81940 }, { "epoch": 1.6681933842239185, "grad_norm": 12.878226286797, "learning_rate": 8.427349038338743e-06, "loss": 0.2219, "step": 81950 }, { "epoch": 1.6683969465648856, "grad_norm": 6.953738102398546, "learning_rate": 8.426831638258972e-06, "loss": 0.2505, "step": 81960 }, { "epoch": 1.6686005089058524, "grad_norm": 7.5518366682530615, "learning_rate": 8.426314168969923e-06, "loss": 0.2082, "step": 81970 }, { "epoch": 1.6688040712468193, "grad_norm": 10.532792260077727, "learning_rate": 8.425796630482044e-06, "loss": 0.2035, "step": 81980 }, { "epoch": 1.6690076335877864, "grad_norm": 6.758588411791961, "learning_rate": 8.425279022805791e-06, "loss": 0.2546, "step": 81990 }, { "epoch": 1.6692111959287532, "grad_norm": 15.106009276377229, "learning_rate": 8.424761345951614e-06, "loss": 0.2641, "step": 82000 }, { "epoch": 1.66941475826972, "grad_norm": 3.9950680291225873, "learning_rate": 8.424243599929976e-06, "loss": 0.1679, "step": 82010 }, { "epoch": 1.6696183206106872, "grad_norm": 5.958786578876527, "learning_rate": 8.423725784751323e-06, "loss": 0.1809, "step": 82020 }, { "epoch": 1.6698218829516538, "grad_norm": 14.054305700643758, "learning_rate": 8.42320790042612e-06, "loss": 0.1351, "step": 82030 }, { "epoch": 1.6700254452926209, "grad_norm": 12.66991476981206, "learning_rate": 8.422689946964825e-06, "loss": 0.1683, "step": 82040 }, { "epoch": 1.670229007633588, "grad_norm": 20.41719034131211, "learning_rate": 8.422171924377898e-06, "loss": 0.1897, "step": 82050 }, { "epoch": 1.6704325699745546, "grad_norm": 32.592910159119924, "learning_rate": 8.4216538326758e-06, "loss": 0.2175, "step": 82060 }, { "epoch": 1.6706361323155217, "grad_norm": 12.437896991655027, "learning_rate": 8.421135671868996e-06, "loss": 0.2238, "step": 82070 }, { "epoch": 1.6708396946564885, "grad_norm": 13.89894807492156, "learning_rate": 8.420617441967952e-06, "loss": 0.1996, "step": 82080 }, { "epoch": 1.6710432569974554, "grad_norm": 7.2701780683887085, "learning_rate": 8.42009914298313e-06, "loss": 0.166, "step": 82090 }, { "epoch": 1.6712468193384225, "grad_norm": 9.705909054886412, "learning_rate": 8.419580774925004e-06, "loss": 0.2654, "step": 82100 }, { "epoch": 1.6714503816793893, "grad_norm": 10.842400710162396, "learning_rate": 8.419062337804039e-06, "loss": 0.1781, "step": 82110 }, { "epoch": 1.6716539440203562, "grad_norm": 18.924903532780895, "learning_rate": 8.418543831630705e-06, "loss": 0.2931, "step": 82120 }, { "epoch": 1.6718575063613232, "grad_norm": 8.952735885490736, "learning_rate": 8.418025256415477e-06, "loss": 0.2217, "step": 82130 }, { "epoch": 1.67206106870229, "grad_norm": 7.293275814944796, "learning_rate": 8.417506612168826e-06, "loss": 0.1828, "step": 82140 }, { "epoch": 1.672264631043257, "grad_norm": 9.215547886051771, "learning_rate": 8.416987898901227e-06, "loss": 0.1402, "step": 82150 }, { "epoch": 1.672468193384224, "grad_norm": 9.747151129581333, "learning_rate": 8.416469116623157e-06, "loss": 0.1597, "step": 82160 }, { "epoch": 1.672671755725191, "grad_norm": 9.315675916362435, "learning_rate": 8.415950265345094e-06, "loss": 0.178, "step": 82170 }, { "epoch": 1.6728753180661577, "grad_norm": 12.667734664169252, "learning_rate": 8.415431345077512e-06, "loss": 0.2162, "step": 82180 }, { "epoch": 1.6730788804071248, "grad_norm": 9.158774280543968, "learning_rate": 8.414912355830898e-06, "loss": 0.2862, "step": 82190 }, { "epoch": 1.6732824427480915, "grad_norm": 24.276082122944906, "learning_rate": 8.41439329761573e-06, "loss": 0.3506, "step": 82200 }, { "epoch": 1.6734860050890585, "grad_norm": 14.897591277771632, "learning_rate": 8.41387417044249e-06, "loss": 0.1824, "step": 82210 }, { "epoch": 1.6736895674300254, "grad_norm": 12.382684167257022, "learning_rate": 8.413354974321666e-06, "loss": 0.1613, "step": 82220 }, { "epoch": 1.6738931297709922, "grad_norm": 11.689772113508091, "learning_rate": 8.41283570926374e-06, "loss": 0.2431, "step": 82230 }, { "epoch": 1.6740966921119593, "grad_norm": 14.882134597688932, "learning_rate": 8.412316375279204e-06, "loss": 0.2948, "step": 82240 }, { "epoch": 1.6743002544529262, "grad_norm": 9.615715845658787, "learning_rate": 8.411796972378542e-06, "loss": 0.2274, "step": 82250 }, { "epoch": 1.674503816793893, "grad_norm": 6.566352829582723, "learning_rate": 8.411277500572246e-06, "loss": 0.1942, "step": 82260 }, { "epoch": 1.6747073791348601, "grad_norm": 19.748633201569586, "learning_rate": 8.410757959870807e-06, "loss": 0.2097, "step": 82270 }, { "epoch": 1.674910941475827, "grad_norm": 5.878963052386331, "learning_rate": 8.410238350284719e-06, "loss": 0.1817, "step": 82280 }, { "epoch": 1.6751145038167938, "grad_norm": 5.0476707662596105, "learning_rate": 8.409718671824475e-06, "loss": 0.1891, "step": 82290 }, { "epoch": 1.675318066157761, "grad_norm": 15.685901872256524, "learning_rate": 8.409198924500568e-06, "loss": 0.2222, "step": 82300 }, { "epoch": 1.6755216284987278, "grad_norm": 0.4441604412879536, "learning_rate": 8.4086791083235e-06, "loss": 0.1966, "step": 82310 }, { "epoch": 1.6757251908396946, "grad_norm": 1.7747007952302443, "learning_rate": 8.408159223303767e-06, "loss": 0.1937, "step": 82320 }, { "epoch": 1.6759287531806617, "grad_norm": 0.5767505901845266, "learning_rate": 8.407639269451869e-06, "loss": 0.1023, "step": 82330 }, { "epoch": 1.6761323155216283, "grad_norm": 19.710854703154183, "learning_rate": 8.407119246778306e-06, "loss": 0.2552, "step": 82340 }, { "epoch": 1.6763358778625954, "grad_norm": 6.537129575275538, "learning_rate": 8.406599155293583e-06, "loss": 0.3836, "step": 82350 }, { "epoch": 1.6765394402035625, "grad_norm": 10.15422381146494, "learning_rate": 8.4060789950082e-06, "loss": 0.162, "step": 82360 }, { "epoch": 1.6767430025445291, "grad_norm": 9.681781747953936, "learning_rate": 8.405558765932666e-06, "loss": 0.2644, "step": 82370 }, { "epoch": 1.6769465648854962, "grad_norm": 5.22745287097644, "learning_rate": 8.405038468077487e-06, "loss": 0.2122, "step": 82380 }, { "epoch": 1.677150127226463, "grad_norm": 15.406059862434345, "learning_rate": 8.40451810145317e-06, "loss": 0.2143, "step": 82390 }, { "epoch": 1.67735368956743, "grad_norm": 4.447229311630213, "learning_rate": 8.403997666070224e-06, "loss": 0.1255, "step": 82400 }, { "epoch": 1.677557251908397, "grad_norm": 8.347875351156782, "learning_rate": 8.403477161939163e-06, "loss": 0.2235, "step": 82410 }, { "epoch": 1.6777608142493639, "grad_norm": 11.032289945086015, "learning_rate": 8.402956589070495e-06, "loss": 0.1927, "step": 82420 }, { "epoch": 1.6779643765903307, "grad_norm": 7.299240433883643, "learning_rate": 8.402435947474736e-06, "loss": 0.2766, "step": 82430 }, { "epoch": 1.6781679389312978, "grad_norm": 9.950298112641917, "learning_rate": 8.401915237162403e-06, "loss": 0.2113, "step": 82440 }, { "epoch": 1.6783715012722646, "grad_norm": 6.016242204800994, "learning_rate": 8.401394458144008e-06, "loss": 0.2652, "step": 82450 }, { "epoch": 1.6785750636132315, "grad_norm": 13.254737144301075, "learning_rate": 8.400873610430071e-06, "loss": 0.2951, "step": 82460 }, { "epoch": 1.6787786259541986, "grad_norm": 5.253506676186439, "learning_rate": 8.400352694031111e-06, "loss": 0.1511, "step": 82470 }, { "epoch": 1.6789821882951654, "grad_norm": 15.58081354420951, "learning_rate": 8.39983170895765e-06, "loss": 0.2808, "step": 82480 }, { "epoch": 1.6791857506361323, "grad_norm": 12.452621492638343, "learning_rate": 8.399310655220207e-06, "loss": 0.2422, "step": 82490 }, { "epoch": 1.6793893129770994, "grad_norm": 9.18152398211912, "learning_rate": 8.39878953282931e-06, "loss": 0.2081, "step": 82500 }, { "epoch": 1.679592875318066, "grad_norm": 14.295581745666597, "learning_rate": 8.398268341795477e-06, "loss": 0.1813, "step": 82510 }, { "epoch": 1.679796437659033, "grad_norm": 17.225666068786577, "learning_rate": 8.397747082129239e-06, "loss": 0.2218, "step": 82520 }, { "epoch": 1.6800000000000002, "grad_norm": 13.592963463497764, "learning_rate": 8.397225753841124e-06, "loss": 0.3469, "step": 82530 }, { "epoch": 1.6802035623409668, "grad_norm": 7.285706002038147, "learning_rate": 8.396704356941659e-06, "loss": 0.2305, "step": 82540 }, { "epoch": 1.6804071246819339, "grad_norm": 7.55995640571053, "learning_rate": 8.396182891441374e-06, "loss": 0.2378, "step": 82550 }, { "epoch": 1.6806106870229007, "grad_norm": 23.551768820527805, "learning_rate": 8.3956613573508e-06, "loss": 0.2509, "step": 82560 }, { "epoch": 1.6808142493638676, "grad_norm": 0.3992216991549867, "learning_rate": 8.395139754680473e-06, "loss": 0.1591, "step": 82570 }, { "epoch": 1.6810178117048347, "grad_norm": 4.43526982577582, "learning_rate": 8.394618083440924e-06, "loss": 0.2467, "step": 82580 }, { "epoch": 1.6812213740458015, "grad_norm": 4.635337120360387, "learning_rate": 8.394096343642692e-06, "loss": 0.1848, "step": 82590 }, { "epoch": 1.6814249363867684, "grad_norm": 1.3268876206662246, "learning_rate": 8.393574535296314e-06, "loss": 0.1783, "step": 82600 }, { "epoch": 1.6816284987277355, "grad_norm": 5.736634054935752, "learning_rate": 8.393052658412326e-06, "loss": 0.2573, "step": 82610 }, { "epoch": 1.6818320610687023, "grad_norm": 7.117996414590676, "learning_rate": 8.39253071300127e-06, "loss": 0.2882, "step": 82620 }, { "epoch": 1.6820356234096692, "grad_norm": 11.351983405848708, "learning_rate": 8.392008699073687e-06, "loss": 0.2451, "step": 82630 }, { "epoch": 1.6822391857506362, "grad_norm": 14.76001163675592, "learning_rate": 8.391486616640117e-06, "loss": 0.2277, "step": 82640 }, { "epoch": 1.682442748091603, "grad_norm": 5.766704951371766, "learning_rate": 8.390964465711108e-06, "loss": 0.1707, "step": 82650 }, { "epoch": 1.68264631043257, "grad_norm": 8.698988160351425, "learning_rate": 8.390442246297203e-06, "loss": 0.2049, "step": 82660 }, { "epoch": 1.682849872773537, "grad_norm": 10.938183398898309, "learning_rate": 8.389919958408951e-06, "loss": 0.2471, "step": 82670 }, { "epoch": 1.6830534351145037, "grad_norm": 11.351200330940568, "learning_rate": 8.3893976020569e-06, "loss": 0.2612, "step": 82680 }, { "epoch": 1.6832569974554707, "grad_norm": 11.43168036260731, "learning_rate": 8.388875177251599e-06, "loss": 0.1897, "step": 82690 }, { "epoch": 1.6834605597964376, "grad_norm": 14.953206060800165, "learning_rate": 8.388352684003597e-06, "loss": 0.2384, "step": 82700 }, { "epoch": 1.6836641221374045, "grad_norm": 6.134640751769075, "learning_rate": 8.38783012232345e-06, "loss": 0.3074, "step": 82710 }, { "epoch": 1.6838676844783715, "grad_norm": 14.117575472449897, "learning_rate": 8.38730749222171e-06, "loss": 0.2071, "step": 82720 }, { "epoch": 1.6840712468193384, "grad_norm": 8.70666898370862, "learning_rate": 8.386784793708934e-06, "loss": 0.1769, "step": 82730 }, { "epoch": 1.6842748091603053, "grad_norm": 8.350300281731641, "learning_rate": 8.386262026795673e-06, "loss": 0.2341, "step": 82740 }, { "epoch": 1.6844783715012723, "grad_norm": 10.983456964306459, "learning_rate": 8.385739191492492e-06, "loss": 0.1614, "step": 82750 }, { "epoch": 1.6846819338422392, "grad_norm": 7.03411471677182, "learning_rate": 8.385216287809945e-06, "loss": 0.2214, "step": 82760 }, { "epoch": 1.684885496183206, "grad_norm": 18.81092465948441, "learning_rate": 8.384693315758598e-06, "loss": 0.2115, "step": 82770 }, { "epoch": 1.6850890585241731, "grad_norm": 9.509212628758752, "learning_rate": 8.384170275349008e-06, "loss": 0.1894, "step": 82780 }, { "epoch": 1.68529262086514, "grad_norm": 3.0709864495605, "learning_rate": 8.383647166591739e-06, "loss": 0.2219, "step": 82790 }, { "epoch": 1.6854961832061068, "grad_norm": 8.497270382948017, "learning_rate": 8.383123989497359e-06, "loss": 0.0943, "step": 82800 }, { "epoch": 1.685699745547074, "grad_norm": 10.606111563216643, "learning_rate": 8.382600744076433e-06, "loss": 0.2435, "step": 82810 }, { "epoch": 1.6859033078880405, "grad_norm": 11.223768506747122, "learning_rate": 8.382077430339528e-06, "loss": 0.2038, "step": 82820 }, { "epoch": 1.6861068702290076, "grad_norm": 7.108227730339177, "learning_rate": 8.381554048297214e-06, "loss": 0.2304, "step": 82830 }, { "epoch": 1.6863104325699747, "grad_norm": 14.400599863321219, "learning_rate": 8.38103059796006e-06, "loss": 0.246, "step": 82840 }, { "epoch": 1.6865139949109413, "grad_norm": 5.408013984412302, "learning_rate": 8.380507079338638e-06, "loss": 0.1151, "step": 82850 }, { "epoch": 1.6867175572519084, "grad_norm": 7.8383659653553845, "learning_rate": 8.379983492443522e-06, "loss": 0.3158, "step": 82860 }, { "epoch": 1.6869211195928753, "grad_norm": 5.772572881274401, "learning_rate": 8.379459837285285e-06, "loss": 0.2493, "step": 82870 }, { "epoch": 1.6871246819338421, "grad_norm": 13.469940992713088, "learning_rate": 8.378936113874503e-06, "loss": 0.2003, "step": 82880 }, { "epoch": 1.6873282442748092, "grad_norm": 13.279266104733809, "learning_rate": 8.378412322221755e-06, "loss": 0.2065, "step": 82890 }, { "epoch": 1.687531806615776, "grad_norm": 7.651988011554893, "learning_rate": 8.37788846233762e-06, "loss": 0.1954, "step": 82900 }, { "epoch": 1.687735368956743, "grad_norm": 2.9553462376566935, "learning_rate": 8.377364534232677e-06, "loss": 0.2321, "step": 82910 }, { "epoch": 1.68793893129771, "grad_norm": 6.978855595618924, "learning_rate": 8.376840537917506e-06, "loss": 0.2719, "step": 82920 }, { "epoch": 1.6881424936386769, "grad_norm": 12.80805410227728, "learning_rate": 8.376316473402693e-06, "loss": 0.1703, "step": 82930 }, { "epoch": 1.6883460559796437, "grad_norm": 10.318226897474679, "learning_rate": 8.375792340698817e-06, "loss": 0.2605, "step": 82940 }, { "epoch": 1.6885496183206108, "grad_norm": 23.270866746008636, "learning_rate": 8.375268139816468e-06, "loss": 0.2064, "step": 82950 }, { "epoch": 1.6887531806615776, "grad_norm": 7.714190816073332, "learning_rate": 8.374743870766232e-06, "loss": 0.2497, "step": 82960 }, { "epoch": 1.6889567430025445, "grad_norm": 2.9044982159379478, "learning_rate": 8.374219533558697e-06, "loss": 0.1898, "step": 82970 }, { "epoch": 1.6891603053435116, "grad_norm": 5.432483681481642, "learning_rate": 8.373695128204453e-06, "loss": 0.1505, "step": 82980 }, { "epoch": 1.6893638676844782, "grad_norm": 18.08989217363304, "learning_rate": 8.373170654714089e-06, "loss": 0.231, "step": 82990 }, { "epoch": 1.6895674300254453, "grad_norm": 0.5519312762845472, "learning_rate": 8.3726461130982e-06, "loss": 0.2234, "step": 83000 }, { "epoch": 1.6897709923664124, "grad_norm": 4.0856281830962375, "learning_rate": 8.372121503367379e-06, "loss": 0.2757, "step": 83010 }, { "epoch": 1.689974554707379, "grad_norm": 22.93754093296804, "learning_rate": 8.37159682553222e-06, "loss": 0.1824, "step": 83020 }, { "epoch": 1.690178117048346, "grad_norm": 4.72415596181364, "learning_rate": 8.371072079603321e-06, "loss": 0.1486, "step": 83030 }, { "epoch": 1.690381679389313, "grad_norm": 4.263993821301454, "learning_rate": 8.37054726559128e-06, "loss": 0.239, "step": 83040 }, { "epoch": 1.6905852417302798, "grad_norm": 17.60673919901011, "learning_rate": 8.370022383506696e-06, "loss": 0.2188, "step": 83050 }, { "epoch": 1.6907888040712469, "grad_norm": 15.588506549531386, "learning_rate": 8.369497433360169e-06, "loss": 0.1927, "step": 83060 }, { "epoch": 1.6909923664122137, "grad_norm": 22.210642727901853, "learning_rate": 8.3689724151623e-06, "loss": 0.1984, "step": 83070 }, { "epoch": 1.6911959287531806, "grad_norm": 8.962767000688082, "learning_rate": 8.368447328923696e-06, "loss": 0.264, "step": 83080 }, { "epoch": 1.6913994910941477, "grad_norm": 4.39891270812773, "learning_rate": 8.367922174654958e-06, "loss": 0.2572, "step": 83090 }, { "epoch": 1.6916030534351145, "grad_norm": 7.599884790443996, "learning_rate": 8.367396952366695e-06, "loss": 0.2965, "step": 83100 }, { "epoch": 1.6918066157760814, "grad_norm": 8.198739017371881, "learning_rate": 8.366871662069514e-06, "loss": 0.2759, "step": 83110 }, { "epoch": 1.6920101781170485, "grad_norm": 12.740857872450816, "learning_rate": 8.366346303774022e-06, "loss": 0.3091, "step": 83120 }, { "epoch": 1.6922137404580153, "grad_norm": 10.383452205530885, "learning_rate": 8.365820877490831e-06, "loss": 0.1551, "step": 83130 }, { "epoch": 1.6924173027989822, "grad_norm": 6.743153061156407, "learning_rate": 8.365295383230552e-06, "loss": 0.2799, "step": 83140 }, { "epoch": 1.6926208651399492, "grad_norm": 13.942374013656902, "learning_rate": 8.364769821003798e-06, "loss": 0.1935, "step": 83150 }, { "epoch": 1.6928244274809159, "grad_norm": 14.46245488110427, "learning_rate": 8.364244190821184e-06, "loss": 0.1841, "step": 83160 }, { "epoch": 1.693027989821883, "grad_norm": 15.56368916768202, "learning_rate": 8.363718492693324e-06, "loss": 0.1762, "step": 83170 }, { "epoch": 1.6932315521628498, "grad_norm": 13.531359226233624, "learning_rate": 8.36319272663084e-06, "loss": 0.2757, "step": 83180 }, { "epoch": 1.6934351145038167, "grad_norm": 2.953819655394233, "learning_rate": 8.362666892644344e-06, "loss": 0.2092, "step": 83190 }, { "epoch": 1.6936386768447838, "grad_norm": 11.161567556510757, "learning_rate": 8.36214099074446e-06, "loss": 0.2206, "step": 83200 }, { "epoch": 1.6938422391857506, "grad_norm": 13.218774699213004, "learning_rate": 8.36161502094181e-06, "loss": 0.2503, "step": 83210 }, { "epoch": 1.6940458015267175, "grad_norm": 8.833061658827143, "learning_rate": 8.361088983247012e-06, "loss": 0.2628, "step": 83220 }, { "epoch": 1.6942493638676845, "grad_norm": 5.071353216931524, "learning_rate": 8.360562877670695e-06, "loss": 0.2742, "step": 83230 }, { "epoch": 1.6944529262086514, "grad_norm": 8.236318527136557, "learning_rate": 8.360036704223482e-06, "loss": 0.1731, "step": 83240 }, { "epoch": 1.6946564885496183, "grad_norm": 4.079676692159144, "learning_rate": 8.359510462915999e-06, "loss": 0.1659, "step": 83250 }, { "epoch": 1.6948600508905853, "grad_norm": 5.194663306221983, "learning_rate": 8.358984153758876e-06, "loss": 0.1865, "step": 83260 }, { "epoch": 1.6950636132315522, "grad_norm": 7.407838072371941, "learning_rate": 8.358457776762742e-06, "loss": 0.1821, "step": 83270 }, { "epoch": 1.695267175572519, "grad_norm": 12.099190759152453, "learning_rate": 8.357931331938227e-06, "loss": 0.2265, "step": 83280 }, { "epoch": 1.6954707379134861, "grad_norm": 11.687143359789024, "learning_rate": 8.357404819295964e-06, "loss": 0.1689, "step": 83290 }, { "epoch": 1.6956743002544528, "grad_norm": 7.6420213808139446, "learning_rate": 8.356878238846587e-06, "loss": 0.1936, "step": 83300 }, { "epoch": 1.6958778625954198, "grad_norm": 16.614261224999915, "learning_rate": 8.356351590600729e-06, "loss": 0.1837, "step": 83310 }, { "epoch": 1.696081424936387, "grad_norm": 14.679857542970232, "learning_rate": 8.355824874569029e-06, "loss": 0.2149, "step": 83320 }, { "epoch": 1.6962849872773536, "grad_norm": 7.614906565599289, "learning_rate": 8.355298090762122e-06, "loss": 0.2496, "step": 83330 }, { "epoch": 1.6964885496183206, "grad_norm": 14.324531730187516, "learning_rate": 8.354771239190648e-06, "loss": 0.279, "step": 83340 }, { "epoch": 1.6966921119592875, "grad_norm": 9.523471304252574, "learning_rate": 8.354244319865251e-06, "loss": 0.2155, "step": 83350 }, { "epoch": 1.6968956743002543, "grad_norm": 4.700716689706781, "learning_rate": 8.353717332796568e-06, "loss": 0.1412, "step": 83360 }, { "epoch": 1.6970992366412214, "grad_norm": 16.321753439559377, "learning_rate": 8.353190277995242e-06, "loss": 0.2542, "step": 83370 }, { "epoch": 1.6973027989821883, "grad_norm": 4.286497316774078, "learning_rate": 8.352663155471921e-06, "loss": 0.2244, "step": 83380 }, { "epoch": 1.6975063613231551, "grad_norm": 15.902002227234545, "learning_rate": 8.35213596523725e-06, "loss": 0.2407, "step": 83390 }, { "epoch": 1.6977099236641222, "grad_norm": 13.971966375936288, "learning_rate": 8.351608707301874e-06, "loss": 0.1648, "step": 83400 }, { "epoch": 1.697913486005089, "grad_norm": 3.9566549131200963, "learning_rate": 8.351081381676446e-06, "loss": 0.2472, "step": 83410 }, { "epoch": 1.698117048346056, "grad_norm": 10.949167550485909, "learning_rate": 8.350553988371612e-06, "loss": 0.2608, "step": 83420 }, { "epoch": 1.698320610687023, "grad_norm": 0.43186572076974883, "learning_rate": 8.350026527398025e-06, "loss": 0.2834, "step": 83430 }, { "epoch": 1.6985241730279899, "grad_norm": 10.947526788871857, "learning_rate": 8.349498998766336e-06, "loss": 0.2485, "step": 83440 }, { "epoch": 1.6987277353689567, "grad_norm": 13.92251202413389, "learning_rate": 8.348971402487203e-06, "loss": 0.2335, "step": 83450 }, { "epoch": 1.6989312977099238, "grad_norm": 7.668103777903114, "learning_rate": 8.348443738571277e-06, "loss": 0.2365, "step": 83460 }, { "epoch": 1.6991348600508904, "grad_norm": 1.7176163102278486, "learning_rate": 8.347916007029217e-06, "loss": 0.1884, "step": 83470 }, { "epoch": 1.6993384223918575, "grad_norm": 5.37777418445977, "learning_rate": 8.347388207871683e-06, "loss": 0.203, "step": 83480 }, { "epoch": 1.6995419847328246, "grad_norm": 16.394976766583756, "learning_rate": 8.346860341109331e-06, "loss": 0.1733, "step": 83490 }, { "epoch": 1.6997455470737912, "grad_norm": 0.675372979151271, "learning_rate": 8.346332406752823e-06, "loss": 0.2291, "step": 83500 }, { "epoch": 1.6999491094147583, "grad_norm": 5.239113440679392, "learning_rate": 8.345804404812824e-06, "loss": 0.2223, "step": 83510 }, { "epoch": 1.7001526717557252, "grad_norm": 15.66781165935856, "learning_rate": 8.345276335299995e-06, "loss": 0.2039, "step": 83520 }, { "epoch": 1.700356234096692, "grad_norm": 4.771425832315675, "learning_rate": 8.344748198225001e-06, "loss": 0.1896, "step": 83530 }, { "epoch": 1.700559796437659, "grad_norm": 12.804827033027694, "learning_rate": 8.34421999359851e-06, "loss": 0.2651, "step": 83540 }, { "epoch": 1.700763358778626, "grad_norm": 6.156067294806952, "learning_rate": 8.343691721431188e-06, "loss": 0.1796, "step": 83550 }, { "epoch": 1.7009669211195928, "grad_norm": 10.132920599539153, "learning_rate": 8.343163381733706e-06, "loss": 0.1903, "step": 83560 }, { "epoch": 1.7011704834605599, "grad_norm": 12.403464700158139, "learning_rate": 8.342634974516734e-06, "loss": 0.1671, "step": 83570 }, { "epoch": 1.7013740458015267, "grad_norm": 15.344150707921441, "learning_rate": 8.342106499790943e-06, "loss": 0.3058, "step": 83580 }, { "epoch": 1.7015776081424936, "grad_norm": 0.6158072347826404, "learning_rate": 8.341577957567007e-06, "loss": 0.1688, "step": 83590 }, { "epoch": 1.7017811704834607, "grad_norm": 20.786269350012013, "learning_rate": 8.341049347855599e-06, "loss": 0.1992, "step": 83600 }, { "epoch": 1.7019847328244275, "grad_norm": 6.647337718365164, "learning_rate": 8.340520670667397e-06, "loss": 0.2217, "step": 83610 }, { "epoch": 1.7021882951653944, "grad_norm": 2.2981351587330225, "learning_rate": 8.339991926013076e-06, "loss": 0.1443, "step": 83620 }, { "epoch": 1.7023918575063615, "grad_norm": 3.1662929097406267, "learning_rate": 8.339463113903318e-06, "loss": 0.3037, "step": 83630 }, { "epoch": 1.702595419847328, "grad_norm": 20.50654408498852, "learning_rate": 8.3389342343488e-06, "loss": 0.1664, "step": 83640 }, { "epoch": 1.7027989821882952, "grad_norm": 16.18973798086422, "learning_rate": 8.338405287360207e-06, "loss": 0.2782, "step": 83650 }, { "epoch": 1.7030025445292623, "grad_norm": 6.268555441299248, "learning_rate": 8.337876272948217e-06, "loss": 0.3014, "step": 83660 }, { "epoch": 1.7032061068702289, "grad_norm": 14.676205969064142, "learning_rate": 8.337347191123517e-06, "loss": 0.2121, "step": 83670 }, { "epoch": 1.703409669211196, "grad_norm": 11.334087875776191, "learning_rate": 8.336818041896793e-06, "loss": 0.2424, "step": 83680 }, { "epoch": 1.7036132315521628, "grad_norm": 6.987766761379743, "learning_rate": 8.33628882527873e-06, "loss": 0.1698, "step": 83690 }, { "epoch": 1.7038167938931297, "grad_norm": 7.570270705746356, "learning_rate": 8.335759541280017e-06, "loss": 0.2547, "step": 83700 }, { "epoch": 1.7040203562340968, "grad_norm": 7.453219054805963, "learning_rate": 8.335230189911343e-06, "loss": 0.1255, "step": 83710 }, { "epoch": 1.7042239185750636, "grad_norm": 1.6141719443995097, "learning_rate": 8.3347007711834e-06, "loss": 0.1546, "step": 83720 }, { "epoch": 1.7044274809160305, "grad_norm": 9.991213279572804, "learning_rate": 8.33417128510688e-06, "loss": 0.199, "step": 83730 }, { "epoch": 1.7046310432569975, "grad_norm": 5.283931908608606, "learning_rate": 8.333641731692477e-06, "loss": 0.2849, "step": 83740 }, { "epoch": 1.7048346055979644, "grad_norm": 5.417956429823338, "learning_rate": 8.333112110950885e-06, "loss": 0.2398, "step": 83750 }, { "epoch": 1.7050381679389313, "grad_norm": 7.7433024975486395, "learning_rate": 8.332582422892801e-06, "loss": 0.384, "step": 83760 }, { "epoch": 1.7052417302798983, "grad_norm": 9.114256182008491, "learning_rate": 8.332052667528922e-06, "loss": 0.3081, "step": 83770 }, { "epoch": 1.7054452926208652, "grad_norm": 5.515674759580967, "learning_rate": 8.331522844869949e-06, "loss": 0.1399, "step": 83780 }, { "epoch": 1.705648854961832, "grad_norm": 5.971638808399369, "learning_rate": 8.33099295492658e-06, "loss": 0.2083, "step": 83790 }, { "epoch": 1.7058524173027991, "grad_norm": 11.87888263905116, "learning_rate": 8.33046299770952e-06, "loss": 0.261, "step": 83800 }, { "epoch": 1.7060559796437658, "grad_norm": 22.756169728788016, "learning_rate": 8.32993297322947e-06, "loss": 0.219, "step": 83810 }, { "epoch": 1.7062595419847328, "grad_norm": 11.462390905946666, "learning_rate": 8.329402881497131e-06, "loss": 0.2287, "step": 83820 }, { "epoch": 1.7064631043256997, "grad_norm": 13.539283397227942, "learning_rate": 8.328872722523216e-06, "loss": 0.2213, "step": 83830 }, { "epoch": 1.7066666666666666, "grad_norm": 1.361702137986344, "learning_rate": 8.328342496318426e-06, "loss": 0.2089, "step": 83840 }, { "epoch": 1.7068702290076336, "grad_norm": 9.408372720207058, "learning_rate": 8.327812202893474e-06, "loss": 0.2728, "step": 83850 }, { "epoch": 1.7070737913486005, "grad_norm": 2.645129570112779, "learning_rate": 8.32728184225907e-06, "loss": 0.2051, "step": 83860 }, { "epoch": 1.7072773536895673, "grad_norm": 2.9500596502083147, "learning_rate": 8.326751414425922e-06, "loss": 0.2274, "step": 83870 }, { "epoch": 1.7074809160305344, "grad_norm": 9.146339091101282, "learning_rate": 8.326220919404746e-06, "loss": 0.2572, "step": 83880 }, { "epoch": 1.7076844783715013, "grad_norm": 11.088901406433003, "learning_rate": 8.325690357206254e-06, "loss": 0.3005, "step": 83890 }, { "epoch": 1.7078880407124681, "grad_norm": 3.990939524769702, "learning_rate": 8.325159727841158e-06, "loss": 0.2069, "step": 83900 }, { "epoch": 1.7080916030534352, "grad_norm": 8.241482730670361, "learning_rate": 8.32462903132018e-06, "loss": 0.1775, "step": 83910 }, { "epoch": 1.708295165394402, "grad_norm": 10.066428237756245, "learning_rate": 8.324098267654041e-06, "loss": 0.2021, "step": 83920 }, { "epoch": 1.708498727735369, "grad_norm": 6.761310297539057, "learning_rate": 8.323567436853452e-06, "loss": 0.1598, "step": 83930 }, { "epoch": 1.708702290076336, "grad_norm": 12.487946914533996, "learning_rate": 8.323036538929141e-06, "loss": 0.1579, "step": 83940 }, { "epoch": 1.7089058524173026, "grad_norm": 18.494086048615586, "learning_rate": 8.322505573891824e-06, "loss": 0.1754, "step": 83950 }, { "epoch": 1.7091094147582697, "grad_norm": 11.72302511095078, "learning_rate": 8.321974541752229e-06, "loss": 0.2373, "step": 83960 }, { "epoch": 1.7093129770992368, "grad_norm": 10.343996604313523, "learning_rate": 8.321443442521078e-06, "loss": 0.2501, "step": 83970 }, { "epoch": 1.7095165394402034, "grad_norm": 12.105977895387767, "learning_rate": 8.3209122762091e-06, "loss": 0.2327, "step": 83980 }, { "epoch": 1.7097201017811705, "grad_norm": 6.572318250025573, "learning_rate": 8.32038104282702e-06, "loss": 0.2963, "step": 83990 }, { "epoch": 1.7099236641221374, "grad_norm": 9.125666796919388, "learning_rate": 8.319849742385571e-06, "loss": 0.3087, "step": 84000 }, { "epoch": 1.7101272264631042, "grad_norm": 2.1375609964577675, "learning_rate": 8.319318374895477e-06, "loss": 0.2848, "step": 84010 }, { "epoch": 1.7103307888040713, "grad_norm": 11.13916080344156, "learning_rate": 8.318786940367476e-06, "loss": 0.2243, "step": 84020 }, { "epoch": 1.7105343511450382, "grad_norm": 7.321662217143313, "learning_rate": 8.318255438812298e-06, "loss": 0.2049, "step": 84030 }, { "epoch": 1.710737913486005, "grad_norm": 7.858479227629387, "learning_rate": 8.317723870240675e-06, "loss": 0.1998, "step": 84040 }, { "epoch": 1.710941475826972, "grad_norm": 8.246981581552644, "learning_rate": 8.317192234663346e-06, "loss": 0.2083, "step": 84050 }, { "epoch": 1.711145038167939, "grad_norm": 12.02002412300333, "learning_rate": 8.31666053209105e-06, "loss": 0.1568, "step": 84060 }, { "epoch": 1.7113486005089058, "grad_norm": 14.193484121666474, "learning_rate": 8.31612876253452e-06, "loss": 0.3052, "step": 84070 }, { "epoch": 1.7115521628498729, "grad_norm": 5.8658061550520095, "learning_rate": 8.315596926004502e-06, "loss": 0.2135, "step": 84080 }, { "epoch": 1.7117557251908397, "grad_norm": 20.69705568672977, "learning_rate": 8.315065022511729e-06, "loss": 0.1963, "step": 84090 }, { "epoch": 1.7119592875318066, "grad_norm": 9.983646461261678, "learning_rate": 8.314533052066951e-06, "loss": 0.2792, "step": 84100 }, { "epoch": 1.7121628498727737, "grad_norm": 7.045550801810377, "learning_rate": 8.314001014680909e-06, "loss": 0.1668, "step": 84110 }, { "epoch": 1.7123664122137403, "grad_norm": 11.065076868039135, "learning_rate": 8.313468910364345e-06, "loss": 0.1971, "step": 84120 }, { "epoch": 1.7125699745547074, "grad_norm": 9.356921064936863, "learning_rate": 8.312936739128012e-06, "loss": 0.3428, "step": 84130 }, { "epoch": 1.7127735368956745, "grad_norm": 10.778854488854655, "learning_rate": 8.312404500982654e-06, "loss": 0.199, "step": 84140 }, { "epoch": 1.712977099236641, "grad_norm": 1.8046446953647657, "learning_rate": 8.31187219593902e-06, "loss": 0.2143, "step": 84150 }, { "epoch": 1.7131806615776082, "grad_norm": 6.210922684647401, "learning_rate": 8.31133982400786e-06, "loss": 0.2636, "step": 84160 }, { "epoch": 1.713384223918575, "grad_norm": 4.210929180946063, "learning_rate": 8.31080738519993e-06, "loss": 0.0829, "step": 84170 }, { "epoch": 1.713587786259542, "grad_norm": 12.392362206743316, "learning_rate": 8.31027487952598e-06, "loss": 0.2542, "step": 84180 }, { "epoch": 1.713791348600509, "grad_norm": 3.44570445780167, "learning_rate": 8.309742306996762e-06, "loss": 0.2287, "step": 84190 }, { "epoch": 1.7139949109414758, "grad_norm": 10.725327681789524, "learning_rate": 8.309209667623039e-06, "loss": 0.2115, "step": 84200 }, { "epoch": 1.7141984732824427, "grad_norm": 10.663904133657239, "learning_rate": 8.308676961415563e-06, "loss": 0.2131, "step": 84210 }, { "epoch": 1.7144020356234098, "grad_norm": 4.920820400715851, "learning_rate": 8.308144188385094e-06, "loss": 0.1611, "step": 84220 }, { "epoch": 1.7146055979643766, "grad_norm": 2.9837434152212, "learning_rate": 8.307611348542392e-06, "loss": 0.1763, "step": 84230 }, { "epoch": 1.7148091603053435, "grad_norm": 4.508202830902023, "learning_rate": 8.30707844189822e-06, "loss": 0.2917, "step": 84240 }, { "epoch": 1.7150127226463106, "grad_norm": 10.696996491750468, "learning_rate": 8.306545468463339e-06, "loss": 0.1886, "step": 84250 }, { "epoch": 1.7152162849872774, "grad_norm": 9.78324011117261, "learning_rate": 8.306012428248514e-06, "loss": 0.2517, "step": 84260 }, { "epoch": 1.7154198473282443, "grad_norm": 7.66808857934713, "learning_rate": 8.305479321264509e-06, "loss": 0.2815, "step": 84270 }, { "epoch": 1.7156234096692113, "grad_norm": 9.335542669043944, "learning_rate": 8.30494614752209e-06, "loss": 0.1914, "step": 84280 }, { "epoch": 1.715826972010178, "grad_norm": 11.352708566697313, "learning_rate": 8.304412907032029e-06, "loss": 0.1984, "step": 84290 }, { "epoch": 1.716030534351145, "grad_norm": 9.300142536737974, "learning_rate": 8.303879599805093e-06, "loss": 0.1779, "step": 84300 }, { "epoch": 1.716234096692112, "grad_norm": 10.967563332100106, "learning_rate": 8.303346225852053e-06, "loss": 0.2545, "step": 84310 }, { "epoch": 1.7164376590330788, "grad_norm": 3.873385542778951, "learning_rate": 8.302812785183681e-06, "loss": 0.2101, "step": 84320 }, { "epoch": 1.7166412213740458, "grad_norm": 4.623196593688046, "learning_rate": 8.302279277810751e-06, "loss": 0.1936, "step": 84330 }, { "epoch": 1.7168447837150127, "grad_norm": 4.752630767007315, "learning_rate": 8.301745703744038e-06, "loss": 0.2302, "step": 84340 }, { "epoch": 1.7170483460559796, "grad_norm": 0.693630622476278, "learning_rate": 8.301212062994319e-06, "loss": 0.1495, "step": 84350 }, { "epoch": 1.7172519083969466, "grad_norm": 6.31464847749017, "learning_rate": 8.300678355572368e-06, "loss": 0.2098, "step": 84360 }, { "epoch": 1.7174554707379135, "grad_norm": 13.895513874066438, "learning_rate": 8.300144581488968e-06, "loss": 0.1893, "step": 84370 }, { "epoch": 1.7176590330788803, "grad_norm": 7.047286560244598, "learning_rate": 8.299610740754898e-06, "loss": 0.2903, "step": 84380 }, { "epoch": 1.7178625954198474, "grad_norm": 4.229613815380746, "learning_rate": 8.299076833380937e-06, "loss": 0.1695, "step": 84390 }, { "epoch": 1.7180661577608143, "grad_norm": 11.242322657420496, "learning_rate": 8.298542859377872e-06, "loss": 0.2808, "step": 84400 }, { "epoch": 1.7182697201017811, "grad_norm": 10.497460278310914, "learning_rate": 8.298008818756485e-06, "loss": 0.3356, "step": 84410 }, { "epoch": 1.7184732824427482, "grad_norm": 10.52381591643098, "learning_rate": 8.297474711527563e-06, "loss": 0.2637, "step": 84420 }, { "epoch": 1.7186768447837149, "grad_norm": 12.748054575717399, "learning_rate": 8.29694053770189e-06, "loss": 0.235, "step": 84430 }, { "epoch": 1.718880407124682, "grad_norm": 3.7332590018254637, "learning_rate": 8.296406297290259e-06, "loss": 0.1273, "step": 84440 }, { "epoch": 1.719083969465649, "grad_norm": 7.699781412089047, "learning_rate": 8.295871990303455e-06, "loss": 0.282, "step": 84450 }, { "epoch": 1.7192875318066156, "grad_norm": 7.279363265156676, "learning_rate": 8.295337616752271e-06, "loss": 0.1693, "step": 84460 }, { "epoch": 1.7194910941475827, "grad_norm": 6.286063143422939, "learning_rate": 8.2948031766475e-06, "loss": 0.1038, "step": 84470 }, { "epoch": 1.7196946564885496, "grad_norm": 3.5642458745510757, "learning_rate": 8.294268669999936e-06, "loss": 0.2066, "step": 84480 }, { "epoch": 1.7198982188295164, "grad_norm": 6.549703096525695, "learning_rate": 8.293734096820372e-06, "loss": 0.2173, "step": 84490 }, { "epoch": 1.7201017811704835, "grad_norm": 5.8060699749817655, "learning_rate": 8.293199457119607e-06, "loss": 0.1996, "step": 84500 }, { "epoch": 1.7203053435114504, "grad_norm": 4.515894310032722, "learning_rate": 8.292664750908437e-06, "loss": 0.2612, "step": 84510 }, { "epoch": 1.7205089058524172, "grad_norm": 3.978311645461609, "learning_rate": 8.29212997819766e-06, "loss": 0.2926, "step": 84520 }, { "epoch": 1.7207124681933843, "grad_norm": 7.347298186787386, "learning_rate": 8.291595138998078e-06, "loss": 0.2154, "step": 84530 }, { "epoch": 1.7209160305343512, "grad_norm": 8.400908839035612, "learning_rate": 8.291060233320494e-06, "loss": 0.1497, "step": 84540 }, { "epoch": 1.721119592875318, "grad_norm": 9.398428912053687, "learning_rate": 8.29052526117571e-06, "loss": 0.1195, "step": 84550 }, { "epoch": 1.721323155216285, "grad_norm": 11.44885375026088, "learning_rate": 8.28999022257453e-06, "loss": 0.2496, "step": 84560 }, { "epoch": 1.721526717557252, "grad_norm": 5.080809505598278, "learning_rate": 8.289455117527759e-06, "loss": 0.232, "step": 84570 }, { "epoch": 1.7217302798982188, "grad_norm": 10.618361311021783, "learning_rate": 8.288919946046206e-06, "loss": 0.2858, "step": 84580 }, { "epoch": 1.7219338422391859, "grad_norm": 12.170925812992513, "learning_rate": 8.288384708140677e-06, "loss": 0.3001, "step": 84590 }, { "epoch": 1.7221374045801525, "grad_norm": 11.54361283537849, "learning_rate": 8.287849403821986e-06, "loss": 0.1726, "step": 84600 }, { "epoch": 1.7223409669211196, "grad_norm": 10.596832108735848, "learning_rate": 8.287314033100939e-06, "loss": 0.2356, "step": 84610 }, { "epoch": 1.7225445292620867, "grad_norm": 7.334334244666652, "learning_rate": 8.286778595988351e-06, "loss": 0.2875, "step": 84620 }, { "epoch": 1.7227480916030533, "grad_norm": 4.687014617213176, "learning_rate": 8.286243092495039e-06, "loss": 0.226, "step": 84630 }, { "epoch": 1.7229516539440204, "grad_norm": 10.93379961810222, "learning_rate": 8.285707522631812e-06, "loss": 0.2489, "step": 84640 }, { "epoch": 1.7231552162849872, "grad_norm": 0.1793017086160635, "learning_rate": 8.28517188640949e-06, "loss": 0.1888, "step": 84650 }, { "epoch": 1.723358778625954, "grad_norm": 6.054233516486088, "learning_rate": 8.284636183838893e-06, "loss": 0.181, "step": 84660 }, { "epoch": 1.7235623409669212, "grad_norm": 0.7412067988701743, "learning_rate": 8.284100414930835e-06, "loss": 0.2035, "step": 84670 }, { "epoch": 1.723765903307888, "grad_norm": 9.382502832263107, "learning_rate": 8.283564579696142e-06, "loss": 0.2017, "step": 84680 }, { "epoch": 1.723969465648855, "grad_norm": 5.221076561807006, "learning_rate": 8.28302867814563e-06, "loss": 0.2179, "step": 84690 }, { "epoch": 1.724173027989822, "grad_norm": 10.68323450920915, "learning_rate": 8.282492710290126e-06, "loss": 0.1974, "step": 84700 }, { "epoch": 1.7243765903307888, "grad_norm": 8.950785955942829, "learning_rate": 8.281956676140455e-06, "loss": 0.2923, "step": 84710 }, { "epoch": 1.7245801526717557, "grad_norm": 6.065478832428383, "learning_rate": 8.281420575707442e-06, "loss": 0.2031, "step": 84720 }, { "epoch": 1.7247837150127228, "grad_norm": 5.573796005362552, "learning_rate": 8.280884409001912e-06, "loss": 0.1887, "step": 84730 }, { "epoch": 1.7249872773536896, "grad_norm": 1.720465411415075, "learning_rate": 8.280348176034697e-06, "loss": 0.2401, "step": 84740 }, { "epoch": 1.7251908396946565, "grad_norm": 7.913493839607254, "learning_rate": 8.279811876816626e-06, "loss": 0.2539, "step": 84750 }, { "epoch": 1.7253944020356236, "grad_norm": 16.242576230940852, "learning_rate": 8.27927551135853e-06, "loss": 0.2727, "step": 84760 }, { "epoch": 1.7255979643765902, "grad_norm": 6.805058001089607, "learning_rate": 8.27873907967124e-06, "loss": 0.2121, "step": 84770 }, { "epoch": 1.7258015267175573, "grad_norm": 3.77948747402477, "learning_rate": 8.278202581765593e-06, "loss": 0.242, "step": 84780 }, { "epoch": 1.7260050890585241, "grad_norm": 4.450185912332743, "learning_rate": 8.277666017652421e-06, "loss": 0.1762, "step": 84790 }, { "epoch": 1.726208651399491, "grad_norm": 6.924759461032543, "learning_rate": 8.277129387342563e-06, "loss": 0.2921, "step": 84800 }, { "epoch": 1.726412213740458, "grad_norm": 8.370152431384433, "learning_rate": 8.276592690846856e-06, "loss": 0.1817, "step": 84810 }, { "epoch": 1.726615776081425, "grad_norm": 7.193801439950563, "learning_rate": 8.27605592817614e-06, "loss": 0.1323, "step": 84820 }, { "epoch": 1.7268193384223918, "grad_norm": 6.370159115713409, "learning_rate": 8.275519099341253e-06, "loss": 0.1933, "step": 84830 }, { "epoch": 1.7270229007633588, "grad_norm": 7.662466261638273, "learning_rate": 8.274982204353041e-06, "loss": 0.2159, "step": 84840 }, { "epoch": 1.7272264631043257, "grad_norm": 5.0953652715044635, "learning_rate": 8.274445243222344e-06, "loss": 0.2333, "step": 84850 }, { "epoch": 1.7274300254452926, "grad_norm": 9.758938800396662, "learning_rate": 8.273908215960009e-06, "loss": 0.2812, "step": 84860 }, { "epoch": 1.7276335877862596, "grad_norm": 12.055347263765405, "learning_rate": 8.273371122576879e-06, "loss": 0.1603, "step": 84870 }, { "epoch": 1.7278371501272265, "grad_norm": 21.51094011111731, "learning_rate": 8.272833963083803e-06, "loss": 0.2517, "step": 84880 }, { "epoch": 1.7280407124681934, "grad_norm": 13.162264353582053, "learning_rate": 8.272296737491632e-06, "loss": 0.3389, "step": 84890 }, { "epoch": 1.7282442748091604, "grad_norm": 16.13807016463463, "learning_rate": 8.271759445811213e-06, "loss": 0.2364, "step": 84900 }, { "epoch": 1.728447837150127, "grad_norm": 5.840336189247589, "learning_rate": 8.271222088053398e-06, "loss": 0.102, "step": 84910 }, { "epoch": 1.7286513994910941, "grad_norm": 20.497889004363763, "learning_rate": 8.270684664229039e-06, "loss": 0.1805, "step": 84920 }, { "epoch": 1.7288549618320612, "grad_norm": 0.28344009200380016, "learning_rate": 8.27014717434899e-06, "loss": 0.2113, "step": 84930 }, { "epoch": 1.7290585241730279, "grad_norm": 10.68013433370134, "learning_rate": 8.269609618424109e-06, "loss": 0.1665, "step": 84940 }, { "epoch": 1.729262086513995, "grad_norm": 15.098798621920064, "learning_rate": 8.269071996465249e-06, "loss": 0.2726, "step": 84950 }, { "epoch": 1.7294656488549618, "grad_norm": 12.917547814413451, "learning_rate": 8.268534308483269e-06, "loss": 0.1365, "step": 84960 }, { "epoch": 1.7296692111959286, "grad_norm": 7.92857006154866, "learning_rate": 8.267996554489031e-06, "loss": 0.1665, "step": 84970 }, { "epoch": 1.7298727735368957, "grad_norm": 14.803561458523324, "learning_rate": 8.26745873449339e-06, "loss": 0.2624, "step": 84980 }, { "epoch": 1.7300763358778626, "grad_norm": 18.073240624341473, "learning_rate": 8.266920848507215e-06, "loss": 0.1846, "step": 84990 }, { "epoch": 1.7302798982188294, "grad_norm": 15.680168133466804, "learning_rate": 8.266382896541364e-06, "loss": 0.2155, "step": 85000 }, { "epoch": 1.7304834605597965, "grad_norm": 1.0525948326266876, "learning_rate": 8.265844878606704e-06, "loss": 0.1723, "step": 85010 }, { "epoch": 1.7306870229007634, "grad_norm": 21.516273398757413, "learning_rate": 8.265306794714099e-06, "loss": 0.2016, "step": 85020 }, { "epoch": 1.7308905852417302, "grad_norm": 12.120966942583062, "learning_rate": 8.264768644874418e-06, "loss": 0.2184, "step": 85030 }, { "epoch": 1.7310941475826973, "grad_norm": 20.210139379124985, "learning_rate": 8.26423042909853e-06, "loss": 0.1505, "step": 85040 }, { "epoch": 1.7312977099236642, "grad_norm": 10.397748131311511, "learning_rate": 8.263692147397304e-06, "loss": 0.2005, "step": 85050 }, { "epoch": 1.731501272264631, "grad_norm": 5.483302537703748, "learning_rate": 8.263153799781611e-06, "loss": 0.2056, "step": 85060 }, { "epoch": 1.731704834605598, "grad_norm": 7.223966510272216, "learning_rate": 8.262615386262325e-06, "loss": 0.2372, "step": 85070 }, { "epoch": 1.7319083969465647, "grad_norm": 11.569563499605495, "learning_rate": 8.262076906850319e-06, "loss": 0.1766, "step": 85080 }, { "epoch": 1.7321119592875318, "grad_norm": 1.9016951489144598, "learning_rate": 8.261538361556467e-06, "loss": 0.1885, "step": 85090 }, { "epoch": 1.7323155216284989, "grad_norm": 17.079923237815848, "learning_rate": 8.260999750391648e-06, "loss": 0.2827, "step": 85100 }, { "epoch": 1.7325190839694655, "grad_norm": 14.20209617905528, "learning_rate": 8.260461073366738e-06, "loss": 0.2646, "step": 85110 }, { "epoch": 1.7327226463104326, "grad_norm": 3.1670689581138465, "learning_rate": 8.259922330492617e-06, "loss": 0.3003, "step": 85120 }, { "epoch": 1.7329262086513995, "grad_norm": 6.4245038006067245, "learning_rate": 8.259383521780167e-06, "loss": 0.2034, "step": 85130 }, { "epoch": 1.7331297709923663, "grad_norm": 7.545758818558514, "learning_rate": 8.258844647240269e-06, "loss": 0.218, "step": 85140 }, { "epoch": 1.7333333333333334, "grad_norm": 8.30601569571156, "learning_rate": 8.258305706883805e-06, "loss": 0.1857, "step": 85150 }, { "epoch": 1.7335368956743002, "grad_norm": 5.6348593851085305, "learning_rate": 8.257766700721661e-06, "loss": 0.1826, "step": 85160 }, { "epoch": 1.733740458015267, "grad_norm": 16.493816921966566, "learning_rate": 8.257227628764722e-06, "loss": 0.194, "step": 85170 }, { "epoch": 1.7339440203562342, "grad_norm": 11.808050573321127, "learning_rate": 8.256688491023877e-06, "loss": 0.2757, "step": 85180 }, { "epoch": 1.734147582697201, "grad_norm": 24.045244694042346, "learning_rate": 8.25614928751001e-06, "loss": 0.3427, "step": 85190 }, { "epoch": 1.734351145038168, "grad_norm": 6.416241357502372, "learning_rate": 8.255610018234019e-06, "loss": 0.2508, "step": 85200 }, { "epoch": 1.734554707379135, "grad_norm": 20.362850883299267, "learning_rate": 8.255070683206787e-06, "loss": 0.2258, "step": 85210 }, { "epoch": 1.7347582697201018, "grad_norm": 10.085389991437227, "learning_rate": 8.254531282439211e-06, "loss": 0.173, "step": 85220 }, { "epoch": 1.7349618320610687, "grad_norm": 9.44395492023031, "learning_rate": 8.253991815942186e-06, "loss": 0.1721, "step": 85230 }, { "epoch": 1.7351653944020358, "grad_norm": 3.9676038037438017, "learning_rate": 8.253452283726603e-06, "loss": 0.2801, "step": 85240 }, { "epoch": 1.7353689567430024, "grad_norm": 6.91250010822672, "learning_rate": 8.252912685803361e-06, "loss": 0.199, "step": 85250 }, { "epoch": 1.7355725190839695, "grad_norm": 9.581242938031322, "learning_rate": 8.252373022183358e-06, "loss": 0.1994, "step": 85260 }, { "epoch": 1.7357760814249363, "grad_norm": 9.598314667485582, "learning_rate": 8.251833292877491e-06, "loss": 0.2241, "step": 85270 }, { "epoch": 1.7359796437659032, "grad_norm": 9.013732017390431, "learning_rate": 8.251293497896664e-06, "loss": 0.2625, "step": 85280 }, { "epoch": 1.7361832061068703, "grad_norm": 6.162983170887338, "learning_rate": 8.250753637251779e-06, "loss": 0.2474, "step": 85290 }, { "epoch": 1.7363867684478371, "grad_norm": 2.369675085060778, "learning_rate": 8.250213710953735e-06, "loss": 0.1811, "step": 85300 }, { "epoch": 1.736590330788804, "grad_norm": 6.104843408712628, "learning_rate": 8.249673719013437e-06, "loss": 0.2144, "step": 85310 }, { "epoch": 1.736793893129771, "grad_norm": 8.877058126791196, "learning_rate": 8.249133661441797e-06, "loss": 0.1751, "step": 85320 }, { "epoch": 1.736997455470738, "grad_norm": 14.046587816604303, "learning_rate": 8.248593538249716e-06, "loss": 0.2563, "step": 85330 }, { "epoch": 1.7372010178117048, "grad_norm": 12.262244482556374, "learning_rate": 8.248053349448104e-06, "loss": 0.2216, "step": 85340 }, { "epoch": 1.7374045801526719, "grad_norm": 8.895938298621836, "learning_rate": 8.24751309504787e-06, "loss": 0.1699, "step": 85350 }, { "epoch": 1.7376081424936387, "grad_norm": 5.366864150374219, "learning_rate": 8.246972775059927e-06, "loss": 0.2252, "step": 85360 }, { "epoch": 1.7378117048346056, "grad_norm": 11.65980912918941, "learning_rate": 8.246432389495187e-06, "loss": 0.3338, "step": 85370 }, { "epoch": 1.7380152671755726, "grad_norm": 8.300215486079173, "learning_rate": 8.245891938364565e-06, "loss": 0.174, "step": 85380 }, { "epoch": 1.7382188295165393, "grad_norm": 4.357106429388749, "learning_rate": 8.245351421678973e-06, "loss": 0.2293, "step": 85390 }, { "epoch": 1.7384223918575064, "grad_norm": 8.23659674772344, "learning_rate": 8.24481083944933e-06, "loss": 0.1625, "step": 85400 }, { "epoch": 1.7386259541984734, "grad_norm": 13.521390526764058, "learning_rate": 8.244270191686552e-06, "loss": 0.2129, "step": 85410 }, { "epoch": 1.73882951653944, "grad_norm": 0.43031614469946383, "learning_rate": 8.243729478401559e-06, "loss": 0.1865, "step": 85420 }, { "epoch": 1.7390330788804071, "grad_norm": 4.341770747780806, "learning_rate": 8.243188699605272e-06, "loss": 0.259, "step": 85430 }, { "epoch": 1.739236641221374, "grad_norm": 7.506983053650834, "learning_rate": 8.242647855308612e-06, "loss": 0.2026, "step": 85440 }, { "epoch": 1.7394402035623409, "grad_norm": 10.569168532959912, "learning_rate": 8.242106945522501e-06, "loss": 0.2617, "step": 85450 }, { "epoch": 1.739643765903308, "grad_norm": 17.783907756132677, "learning_rate": 8.241565970257866e-06, "loss": 0.334, "step": 85460 }, { "epoch": 1.7398473282442748, "grad_norm": 14.573304797986781, "learning_rate": 8.24102492952563e-06, "loss": 0.262, "step": 85470 }, { "epoch": 1.7400508905852416, "grad_norm": 7.059177984833816, "learning_rate": 8.24048382333672e-06, "loss": 0.1718, "step": 85480 }, { "epoch": 1.7402544529262087, "grad_norm": 13.280607374956462, "learning_rate": 8.239942651702066e-06, "loss": 0.2866, "step": 85490 }, { "epoch": 1.7404580152671756, "grad_norm": 6.368490890558778, "learning_rate": 8.2394014146326e-06, "loss": 0.194, "step": 85500 }, { "epoch": 1.7406615776081424, "grad_norm": 3.5153297146913416, "learning_rate": 8.238860112139246e-06, "loss": 0.1839, "step": 85510 }, { "epoch": 1.7408651399491095, "grad_norm": 15.108434919403221, "learning_rate": 8.238318744232942e-06, "loss": 0.2393, "step": 85520 }, { "epoch": 1.7410687022900764, "grad_norm": 8.373774713454834, "learning_rate": 8.237777310924622e-06, "loss": 0.1503, "step": 85530 }, { "epoch": 1.7412722646310432, "grad_norm": 8.580801583682756, "learning_rate": 8.237235812225218e-06, "loss": 0.239, "step": 85540 }, { "epoch": 1.7414758269720103, "grad_norm": 3.256131986875206, "learning_rate": 8.236694248145665e-06, "loss": 0.2865, "step": 85550 }, { "epoch": 1.741679389312977, "grad_norm": 12.843307149434034, "learning_rate": 8.236152618696905e-06, "loss": 0.1829, "step": 85560 }, { "epoch": 1.741882951653944, "grad_norm": 6.060799898838772, "learning_rate": 8.235610923889874e-06, "loss": 0.3057, "step": 85570 }, { "epoch": 1.742086513994911, "grad_norm": 16.755702312552753, "learning_rate": 8.235069163735513e-06, "loss": 0.3127, "step": 85580 }, { "epoch": 1.7422900763358777, "grad_norm": 6.447572059333335, "learning_rate": 8.234527338244765e-06, "loss": 0.1745, "step": 85590 }, { "epoch": 1.7424936386768448, "grad_norm": 10.316865686525272, "learning_rate": 8.233985447428568e-06, "loss": 0.1596, "step": 85600 }, { "epoch": 1.7426972010178117, "grad_norm": 13.927280518686642, "learning_rate": 8.233443491297872e-06, "loss": 0.1414, "step": 85610 }, { "epoch": 1.7429007633587785, "grad_norm": 0.3099269032901682, "learning_rate": 8.23290146986362e-06, "loss": 0.173, "step": 85620 }, { "epoch": 1.7431043256997456, "grad_norm": 15.593389027642527, "learning_rate": 8.232359383136758e-06, "loss": 0.2614, "step": 85630 }, { "epoch": 1.7433078880407125, "grad_norm": 7.778420097471149, "learning_rate": 8.231817231128237e-06, "loss": 0.1699, "step": 85640 }, { "epoch": 1.7435114503816793, "grad_norm": 7.532598786609412, "learning_rate": 8.231275013849002e-06, "loss": 0.2472, "step": 85650 }, { "epoch": 1.7437150127226464, "grad_norm": 0.6429171202912709, "learning_rate": 8.230732731310007e-06, "loss": 0.1395, "step": 85660 }, { "epoch": 1.7439185750636133, "grad_norm": 13.632368006605603, "learning_rate": 8.230190383522203e-06, "loss": 0.2116, "step": 85670 }, { "epoch": 1.74412213740458, "grad_norm": 10.675519406468746, "learning_rate": 8.229647970496544e-06, "loss": 0.2443, "step": 85680 }, { "epoch": 1.7443256997455472, "grad_norm": 15.062495733707305, "learning_rate": 8.229105492243986e-06, "loss": 0.1397, "step": 85690 }, { "epoch": 1.744529262086514, "grad_norm": 3.553752585483907, "learning_rate": 8.228562948775483e-06, "loss": 0.1029, "step": 85700 }, { "epoch": 1.744732824427481, "grad_norm": 11.547788035591651, "learning_rate": 8.228020340101992e-06, "loss": 0.3443, "step": 85710 }, { "epoch": 1.744936386768448, "grad_norm": 4.29320330016427, "learning_rate": 8.227477666234472e-06, "loss": 0.3336, "step": 85720 }, { "epoch": 1.7451399491094146, "grad_norm": 3.768214297241855, "learning_rate": 8.226934927183884e-06, "loss": 0.2694, "step": 85730 }, { "epoch": 1.7453435114503817, "grad_norm": 6.852773610481237, "learning_rate": 8.226392122961189e-06, "loss": 0.1822, "step": 85740 }, { "epoch": 1.7455470737913485, "grad_norm": 12.263527053291934, "learning_rate": 8.22584925357735e-06, "loss": 0.1344, "step": 85750 }, { "epoch": 1.7457506361323154, "grad_norm": 5.304633499098162, "learning_rate": 8.225306319043329e-06, "loss": 0.2123, "step": 85760 }, { "epoch": 1.7459541984732825, "grad_norm": 10.761402095933171, "learning_rate": 8.224763319370095e-06, "loss": 0.3444, "step": 85770 }, { "epoch": 1.7461577608142493, "grad_norm": 3.1543630366743094, "learning_rate": 8.224220254568611e-06, "loss": 0.1316, "step": 85780 }, { "epoch": 1.7463613231552162, "grad_norm": 10.010376371627213, "learning_rate": 8.223677124649847e-06, "loss": 0.2232, "step": 85790 }, { "epoch": 1.7465648854961833, "grad_norm": 17.881102568559402, "learning_rate": 8.223133929624771e-06, "loss": 0.2652, "step": 85800 }, { "epoch": 1.7467684478371501, "grad_norm": 12.322900220437445, "learning_rate": 8.222590669504353e-06, "loss": 0.2868, "step": 85810 }, { "epoch": 1.746972010178117, "grad_norm": 11.980532195377295, "learning_rate": 8.222047344299566e-06, "loss": 0.2708, "step": 85820 }, { "epoch": 1.747175572519084, "grad_norm": 8.316133617417309, "learning_rate": 8.221503954021385e-06, "loss": 0.2424, "step": 85830 }, { "epoch": 1.747379134860051, "grad_norm": 10.106102689839345, "learning_rate": 8.22096049868078e-06, "loss": 0.2612, "step": 85840 }, { "epoch": 1.7475826972010178, "grad_norm": 9.921937509887021, "learning_rate": 8.22041697828873e-06, "loss": 0.3094, "step": 85850 }, { "epoch": 1.7477862595419849, "grad_norm": 7.703619440821707, "learning_rate": 8.219873392856212e-06, "loss": 0.2736, "step": 85860 }, { "epoch": 1.7479898218829515, "grad_norm": 7.237767977126335, "learning_rate": 8.219329742394202e-06, "loss": 0.1588, "step": 85870 }, { "epoch": 1.7481933842239186, "grad_norm": 6.163752572778717, "learning_rate": 8.218786026913682e-06, "loss": 0.1755, "step": 85880 }, { "epoch": 1.7483969465648856, "grad_norm": 1.1629574332179073, "learning_rate": 8.218242246425632e-06, "loss": 0.2898, "step": 85890 }, { "epoch": 1.7486005089058523, "grad_norm": 0.4279076378591346, "learning_rate": 8.217698400941037e-06, "loss": 0.095, "step": 85900 }, { "epoch": 1.7488040712468194, "grad_norm": 1.2536691745065685, "learning_rate": 8.217154490470876e-06, "loss": 0.2389, "step": 85910 }, { "epoch": 1.7490076335877862, "grad_norm": 12.892754546876048, "learning_rate": 8.216610515026138e-06, "loss": 0.2218, "step": 85920 }, { "epoch": 1.749211195928753, "grad_norm": 17.465246077543867, "learning_rate": 8.216066474617806e-06, "loss": 0.2248, "step": 85930 }, { "epoch": 1.7494147582697201, "grad_norm": 15.197067131622946, "learning_rate": 8.215522369256871e-06, "loss": 0.2069, "step": 85940 }, { "epoch": 1.749618320610687, "grad_norm": 3.215939838855019, "learning_rate": 8.214978198954319e-06, "loss": 0.1327, "step": 85950 }, { "epoch": 1.7498218829516539, "grad_norm": 0.11090057822523756, "learning_rate": 8.214433963721142e-06, "loss": 0.2321, "step": 85960 }, { "epoch": 1.750025445292621, "grad_norm": 9.623593798084203, "learning_rate": 8.213889663568331e-06, "loss": 0.3262, "step": 85970 }, { "epoch": 1.7502290076335878, "grad_norm": 14.660772336433492, "learning_rate": 8.213345298506878e-06, "loss": 0.2495, "step": 85980 }, { "epoch": 1.7504325699745547, "grad_norm": 6.060838381045666, "learning_rate": 8.21280086854778e-06, "loss": 0.2132, "step": 85990 }, { "epoch": 1.7506361323155217, "grad_norm": 6.173348564791751, "learning_rate": 8.212256373702028e-06, "loss": 0.2335, "step": 86000 }, { "epoch": 1.7508396946564886, "grad_norm": 3.3622738193006896, "learning_rate": 8.211711813980622e-06, "loss": 0.2664, "step": 86010 }, { "epoch": 1.7510432569974554, "grad_norm": 9.694693538045621, "learning_rate": 8.21116718939456e-06, "loss": 0.1858, "step": 86020 }, { "epoch": 1.7512468193384225, "grad_norm": 16.697770035454365, "learning_rate": 8.21062249995484e-06, "loss": 0.143, "step": 86030 }, { "epoch": 1.7514503816793892, "grad_norm": 6.635756142443483, "learning_rate": 8.210077745672463e-06, "loss": 0.1948, "step": 86040 }, { "epoch": 1.7516539440203562, "grad_norm": 7.146204283976286, "learning_rate": 8.209532926558432e-06, "loss": 0.2357, "step": 86050 }, { "epoch": 1.7518575063613233, "grad_norm": 2.525549152770474, "learning_rate": 8.20898804262375e-06, "loss": 0.2132, "step": 86060 }, { "epoch": 1.75206106870229, "grad_norm": 9.88971298353803, "learning_rate": 8.20844309387942e-06, "loss": 0.1802, "step": 86070 }, { "epoch": 1.752264631043257, "grad_norm": 6.142233282886113, "learning_rate": 8.207898080336451e-06, "loss": 0.2276, "step": 86080 }, { "epoch": 1.7524681933842239, "grad_norm": 3.2753435912316133, "learning_rate": 8.207353002005847e-06, "loss": 0.1523, "step": 86090 }, { "epoch": 1.7526717557251907, "grad_norm": 11.254327663813399, "learning_rate": 8.206807858898618e-06, "loss": 0.1995, "step": 86100 }, { "epoch": 1.7528753180661578, "grad_norm": 13.17924500875483, "learning_rate": 8.206262651025777e-06, "loss": 0.1644, "step": 86110 }, { "epoch": 1.7530788804071247, "grad_norm": 10.341814905349262, "learning_rate": 8.205717378398328e-06, "loss": 0.2062, "step": 86120 }, { "epoch": 1.7532824427480915, "grad_norm": 11.188193385775003, "learning_rate": 8.20517204102729e-06, "loss": 0.2479, "step": 86130 }, { "epoch": 1.7534860050890586, "grad_norm": 3.135506050489938, "learning_rate": 8.204626638923673e-06, "loss": 0.136, "step": 86140 }, { "epoch": 1.7536895674300255, "grad_norm": 7.808367922175034, "learning_rate": 8.204081172098495e-06, "loss": 0.168, "step": 86150 }, { "epoch": 1.7538931297709923, "grad_norm": 6.160513247036245, "learning_rate": 8.20353564056277e-06, "loss": 0.1234, "step": 86160 }, { "epoch": 1.7540966921119594, "grad_norm": 1.0436704915619448, "learning_rate": 8.202990044327516e-06, "loss": 0.2334, "step": 86170 }, { "epoch": 1.7543002544529263, "grad_norm": 8.96039732733283, "learning_rate": 8.202444383403755e-06, "loss": 0.3066, "step": 86180 }, { "epoch": 1.7545038167938931, "grad_norm": 1.1293957465282858, "learning_rate": 8.201898657802503e-06, "loss": 0.1795, "step": 86190 }, { "epoch": 1.7547073791348602, "grad_norm": 18.675067927732744, "learning_rate": 8.201352867534783e-06, "loss": 0.1643, "step": 86200 }, { "epoch": 1.7549109414758268, "grad_norm": 4.338689489898618, "learning_rate": 8.20080701261162e-06, "loss": 0.1949, "step": 86210 }, { "epoch": 1.755114503816794, "grad_norm": 4.1265230336669365, "learning_rate": 8.200261093044035e-06, "loss": 0.1343, "step": 86220 }, { "epoch": 1.7553180661577608, "grad_norm": 12.7948220090276, "learning_rate": 8.199715108843058e-06, "loss": 0.2952, "step": 86230 }, { "epoch": 1.7555216284987276, "grad_norm": 9.815460670187038, "learning_rate": 8.199169060019712e-06, "loss": 0.2127, "step": 86240 }, { "epoch": 1.7557251908396947, "grad_norm": 5.2014825650853425, "learning_rate": 8.198622946585026e-06, "loss": 0.2671, "step": 86250 }, { "epoch": 1.7559287531806616, "grad_norm": 0.1629691242814796, "learning_rate": 8.19807676855003e-06, "loss": 0.1888, "step": 86260 }, { "epoch": 1.7561323155216284, "grad_norm": 13.406403531197927, "learning_rate": 8.197530525925755e-06, "loss": 0.241, "step": 86270 }, { "epoch": 1.7563358778625955, "grad_norm": 16.010817542232093, "learning_rate": 8.196984218723232e-06, "loss": 0.2406, "step": 86280 }, { "epoch": 1.7565394402035623, "grad_norm": 5.286755706834831, "learning_rate": 8.196437846953493e-06, "loss": 0.2517, "step": 86290 }, { "epoch": 1.7567430025445292, "grad_norm": 14.067061601083058, "learning_rate": 8.195891410627577e-06, "loss": 0.3169, "step": 86300 }, { "epoch": 1.7569465648854963, "grad_norm": 1.5026407303960014, "learning_rate": 8.19534490975652e-06, "loss": 0.2452, "step": 86310 }, { "epoch": 1.7571501272264631, "grad_norm": 4.458006074873578, "learning_rate": 8.194798344351354e-06, "loss": 0.2155, "step": 86320 }, { "epoch": 1.75735368956743, "grad_norm": 7.895551285537333, "learning_rate": 8.19425171442312e-06, "loss": 0.2046, "step": 86330 }, { "epoch": 1.757557251908397, "grad_norm": 4.304709323562623, "learning_rate": 8.193705019982859e-06, "loss": 0.1891, "step": 86340 }, { "epoch": 1.757760814249364, "grad_norm": 8.377645332002292, "learning_rate": 8.193158261041612e-06, "loss": 0.1073, "step": 86350 }, { "epoch": 1.7579643765903308, "grad_norm": 2.743040131022343, "learning_rate": 8.192611437610423e-06, "loss": 0.1944, "step": 86360 }, { "epoch": 1.7581679389312979, "grad_norm": 4.32805996330871, "learning_rate": 8.192064549700331e-06, "loss": 0.1499, "step": 86370 }, { "epoch": 1.7583715012722645, "grad_norm": 3.9081168976543412, "learning_rate": 8.191517597322385e-06, "loss": 0.1678, "step": 86380 }, { "epoch": 1.7585750636132316, "grad_norm": 10.291010642477799, "learning_rate": 8.190970580487631e-06, "loss": 0.2049, "step": 86390 }, { "epoch": 1.7587786259541984, "grad_norm": 0.32905942036798624, "learning_rate": 8.190423499207116e-06, "loss": 0.1605, "step": 86400 }, { "epoch": 1.7589821882951653, "grad_norm": 12.325008258605463, "learning_rate": 8.189876353491891e-06, "loss": 0.1744, "step": 86410 }, { "epoch": 1.7591857506361324, "grad_norm": 4.169140232486208, "learning_rate": 8.189329143353002e-06, "loss": 0.1159, "step": 86420 }, { "epoch": 1.7593893129770992, "grad_norm": 23.280349204280167, "learning_rate": 8.188781868801503e-06, "loss": 0.2697, "step": 86430 }, { "epoch": 1.759592875318066, "grad_norm": 0.6158571168244434, "learning_rate": 8.188234529848448e-06, "loss": 0.1987, "step": 86440 }, { "epoch": 1.7597964376590332, "grad_norm": 14.313839426626501, "learning_rate": 8.187687126504891e-06, "loss": 0.1327, "step": 86450 }, { "epoch": 1.76, "grad_norm": 12.44418610059489, "learning_rate": 8.187139658781886e-06, "loss": 0.2254, "step": 86460 }, { "epoch": 1.7602035623409669, "grad_norm": 11.171863101970121, "learning_rate": 8.18659212669049e-06, "loss": 0.2197, "step": 86470 }, { "epoch": 1.760407124681934, "grad_norm": 1.5110453644070756, "learning_rate": 8.186044530241762e-06, "loss": 0.0988, "step": 86480 }, { "epoch": 1.7606106870229008, "grad_norm": 21.21064535091974, "learning_rate": 8.185496869446763e-06, "loss": 0.2998, "step": 86490 }, { "epoch": 1.7608142493638677, "grad_norm": 11.920643998729131, "learning_rate": 8.18494914431655e-06, "loss": 0.1988, "step": 86500 }, { "epoch": 1.7610178117048347, "grad_norm": 1.8943452852381393, "learning_rate": 8.184401354862185e-06, "loss": 0.2184, "step": 86510 }, { "epoch": 1.7612213740458014, "grad_norm": 13.056832913966911, "learning_rate": 8.183853501094737e-06, "loss": 0.2761, "step": 86520 }, { "epoch": 1.7614249363867684, "grad_norm": 4.317631613343409, "learning_rate": 8.183305583025263e-06, "loss": 0.237, "step": 86530 }, { "epoch": 1.7616284987277355, "grad_norm": 6.679245716915216, "learning_rate": 8.182757600664837e-06, "loss": 0.1801, "step": 86540 }, { "epoch": 1.7618320610687022, "grad_norm": 10.397110944385116, "learning_rate": 8.182209554024519e-06, "loss": 0.1568, "step": 86550 }, { "epoch": 1.7620356234096692, "grad_norm": 8.13599372730095, "learning_rate": 8.181661443115381e-06, "loss": 0.178, "step": 86560 }, { "epoch": 1.762239185750636, "grad_norm": 12.261575197412368, "learning_rate": 8.181113267948493e-06, "loss": 0.2191, "step": 86570 }, { "epoch": 1.762442748091603, "grad_norm": 11.145933653416142, "learning_rate": 8.180565028534924e-06, "loss": 0.3142, "step": 86580 }, { "epoch": 1.76264631043257, "grad_norm": 5.4087476100996135, "learning_rate": 8.180016724885746e-06, "loss": 0.2913, "step": 86590 }, { "epoch": 1.7628498727735369, "grad_norm": 10.868395646050152, "learning_rate": 8.179468357012037e-06, "loss": 0.2226, "step": 86600 }, { "epoch": 1.7630534351145037, "grad_norm": 2.608769909810057, "learning_rate": 8.17891992492487e-06, "loss": 0.2084, "step": 86610 }, { "epoch": 1.7632569974554708, "grad_norm": 5.428503891846182, "learning_rate": 8.17837142863532e-06, "loss": 0.1726, "step": 86620 }, { "epoch": 1.7634605597964377, "grad_norm": 6.450665498569375, "learning_rate": 8.177822868154464e-06, "loss": 0.1487, "step": 86630 }, { "epoch": 1.7636641221374045, "grad_norm": 0.10406210444872264, "learning_rate": 8.177274243493383e-06, "loss": 0.2713, "step": 86640 }, { "epoch": 1.7638676844783716, "grad_norm": 7.209786332035234, "learning_rate": 8.176725554663157e-06, "loss": 0.2107, "step": 86650 }, { "epoch": 1.7640712468193385, "grad_norm": 8.357831070176058, "learning_rate": 8.176176801674867e-06, "loss": 0.1465, "step": 86660 }, { "epoch": 1.7642748091603053, "grad_norm": 6.574570497874323, "learning_rate": 8.175627984539594e-06, "loss": 0.1896, "step": 86670 }, { "epoch": 1.7644783715012724, "grad_norm": 12.130579681476132, "learning_rate": 8.175079103268424e-06, "loss": 0.1812, "step": 86680 }, { "epoch": 1.764681933842239, "grad_norm": 11.527620218039273, "learning_rate": 8.174530157872443e-06, "loss": 0.2008, "step": 86690 }, { "epoch": 1.7648854961832061, "grad_norm": 10.686249617652646, "learning_rate": 8.173981148362737e-06, "loss": 0.2644, "step": 86700 }, { "epoch": 1.7650890585241732, "grad_norm": 12.071923787243167, "learning_rate": 8.173432074750393e-06, "loss": 0.2058, "step": 86710 }, { "epoch": 1.7652926208651398, "grad_norm": 2.476817663455795, "learning_rate": 8.1728829370465e-06, "loss": 0.2851, "step": 86720 }, { "epoch": 1.765496183206107, "grad_norm": 4.806169359339849, "learning_rate": 8.17233373526215e-06, "loss": 0.2918, "step": 86730 }, { "epoch": 1.7656997455470738, "grad_norm": 12.635037882323182, "learning_rate": 8.171784469408436e-06, "loss": 0.2562, "step": 86740 }, { "epoch": 1.7659033078880406, "grad_norm": 8.289825566665174, "learning_rate": 8.171235139496448e-06, "loss": 0.2704, "step": 86750 }, { "epoch": 1.7661068702290077, "grad_norm": 15.949281815581427, "learning_rate": 8.17068574553728e-06, "loss": 0.145, "step": 86760 }, { "epoch": 1.7663104325699746, "grad_norm": 4.088404856316544, "learning_rate": 8.170136287542032e-06, "loss": 0.3842, "step": 86770 }, { "epoch": 1.7665139949109414, "grad_norm": 9.103416117198075, "learning_rate": 8.169586765521798e-06, "loss": 0.2389, "step": 86780 }, { "epoch": 1.7667175572519085, "grad_norm": 10.388495893522116, "learning_rate": 8.169037179487678e-06, "loss": 0.3085, "step": 86790 }, { "epoch": 1.7669211195928753, "grad_norm": 7.533682875223007, "learning_rate": 8.168487529450769e-06, "loss": 0.2594, "step": 86800 }, { "epoch": 1.7671246819338422, "grad_norm": 7.794328186747167, "learning_rate": 8.167937815422174e-06, "loss": 0.1753, "step": 86810 }, { "epoch": 1.7673282442748093, "grad_norm": 11.688215402442419, "learning_rate": 8.167388037412993e-06, "loss": 0.2818, "step": 86820 }, { "epoch": 1.7675318066157761, "grad_norm": 6.318670393857256, "learning_rate": 8.166838195434333e-06, "loss": 0.1781, "step": 86830 }, { "epoch": 1.767735368956743, "grad_norm": 11.499396366210531, "learning_rate": 8.166288289497297e-06, "loss": 0.1931, "step": 86840 }, { "epoch": 1.76793893129771, "grad_norm": 5.73024122482371, "learning_rate": 8.16573831961299e-06, "loss": 0.1984, "step": 86850 }, { "epoch": 1.7681424936386767, "grad_norm": 6.428282654794684, "learning_rate": 8.16518828579252e-06, "loss": 0.2686, "step": 86860 }, { "epoch": 1.7683460559796438, "grad_norm": 15.353961553364655, "learning_rate": 8.164638188046997e-06, "loss": 0.2467, "step": 86870 }, { "epoch": 1.7685496183206106, "grad_norm": 26.629103184944945, "learning_rate": 8.164088026387527e-06, "loss": 0.2254, "step": 86880 }, { "epoch": 1.7687531806615775, "grad_norm": 8.404491025521349, "learning_rate": 8.163537800825225e-06, "loss": 0.1674, "step": 86890 }, { "epoch": 1.7689567430025446, "grad_norm": 13.920630244558222, "learning_rate": 8.162987511371203e-06, "loss": 0.2772, "step": 86900 }, { "epoch": 1.7691603053435114, "grad_norm": 5.0077791358598684, "learning_rate": 8.162437158036576e-06, "loss": 0.0799, "step": 86910 }, { "epoch": 1.7693638676844783, "grad_norm": 3.4555002228411476, "learning_rate": 8.161886740832455e-06, "loss": 0.2535, "step": 86920 }, { "epoch": 1.7695674300254454, "grad_norm": 6.4538559315529564, "learning_rate": 8.16133625976996e-06, "loss": 0.2187, "step": 86930 }, { "epoch": 1.7697709923664122, "grad_norm": 13.874689464182715, "learning_rate": 8.160785714860207e-06, "loss": 0.2452, "step": 86940 }, { "epoch": 1.769974554707379, "grad_norm": 5.652268077987875, "learning_rate": 8.160235106114317e-06, "loss": 0.3008, "step": 86950 }, { "epoch": 1.7701781170483462, "grad_norm": 10.404225063236687, "learning_rate": 8.159684433543406e-06, "loss": 0.2736, "step": 86960 }, { "epoch": 1.770381679389313, "grad_norm": 13.04181753682553, "learning_rate": 8.159133697158599e-06, "loss": 0.1936, "step": 86970 }, { "epoch": 1.7705852417302799, "grad_norm": 10.863780398985902, "learning_rate": 8.158582896971019e-06, "loss": 0.2014, "step": 86980 }, { "epoch": 1.770788804071247, "grad_norm": 14.714523747953638, "learning_rate": 8.158032032991789e-06, "loss": 0.1793, "step": 86990 }, { "epoch": 1.7709923664122136, "grad_norm": 7.336677032438, "learning_rate": 8.157481105232033e-06, "loss": 0.2176, "step": 87000 }, { "epoch": 1.7711959287531807, "grad_norm": 11.075600497101238, "learning_rate": 8.15693011370288e-06, "loss": 0.1144, "step": 87010 }, { "epoch": 1.7713994910941477, "grad_norm": 7.277664841711446, "learning_rate": 8.156379058415459e-06, "loss": 0.2403, "step": 87020 }, { "epoch": 1.7716030534351144, "grad_norm": 10.120911585775325, "learning_rate": 8.155827939380897e-06, "loss": 0.2117, "step": 87030 }, { "epoch": 1.7718066157760815, "grad_norm": 10.48998430330537, "learning_rate": 8.155276756610324e-06, "loss": 0.2411, "step": 87040 }, { "epoch": 1.7720101781170483, "grad_norm": 8.803956563464084, "learning_rate": 8.154725510114874e-06, "loss": 0.2422, "step": 87050 }, { "epoch": 1.7722137404580152, "grad_norm": 4.320942643519257, "learning_rate": 8.154174199905677e-06, "loss": 0.2252, "step": 87060 }, { "epoch": 1.7724173027989822, "grad_norm": 6.697652358184715, "learning_rate": 8.153622825993871e-06, "loss": 0.2968, "step": 87070 }, { "epoch": 1.772620865139949, "grad_norm": 6.956698119153544, "learning_rate": 8.15307138839059e-06, "loss": 0.1438, "step": 87080 }, { "epoch": 1.772824427480916, "grad_norm": 10.118446236001407, "learning_rate": 8.152519887106973e-06, "loss": 0.2352, "step": 87090 }, { "epoch": 1.773027989821883, "grad_norm": 11.922791624130937, "learning_rate": 8.151968322154153e-06, "loss": 0.2279, "step": 87100 }, { "epoch": 1.7732315521628499, "grad_norm": 8.240357076057977, "learning_rate": 8.151416693543276e-06, "loss": 0.2692, "step": 87110 }, { "epoch": 1.7734351145038167, "grad_norm": 5.8027827641086445, "learning_rate": 8.150865001285477e-06, "loss": 0.2693, "step": 87120 }, { "epoch": 1.7736386768447838, "grad_norm": 1.538336561514277, "learning_rate": 8.150313245391904e-06, "loss": 0.2775, "step": 87130 }, { "epoch": 1.7738422391857507, "grad_norm": 3.6261377114524964, "learning_rate": 8.149761425873696e-06, "loss": 0.1791, "step": 87140 }, { "epoch": 1.7740458015267175, "grad_norm": 18.731413575237283, "learning_rate": 8.149209542742e-06, "loss": 0.1692, "step": 87150 }, { "epoch": 1.7742493638676846, "grad_norm": 5.8487020952323086, "learning_rate": 8.14865759600796e-06, "loss": 0.2027, "step": 87160 }, { "epoch": 1.7744529262086512, "grad_norm": 9.046631698231058, "learning_rate": 8.148105585682725e-06, "loss": 0.23, "step": 87170 }, { "epoch": 1.7746564885496183, "grad_norm": 9.823800914746135, "learning_rate": 8.147553511777442e-06, "loss": 0.2256, "step": 87180 }, { "epoch": 1.7748600508905854, "grad_norm": 14.690037623496494, "learning_rate": 8.147001374303263e-06, "loss": 0.1867, "step": 87190 }, { "epoch": 1.775063613231552, "grad_norm": 20.513103973054008, "learning_rate": 8.146449173271338e-06, "loss": 0.2637, "step": 87200 }, { "epoch": 1.7752671755725191, "grad_norm": 9.638159354932043, "learning_rate": 8.14589690869282e-06, "loss": 0.2165, "step": 87210 }, { "epoch": 1.775470737913486, "grad_norm": 13.819978459990677, "learning_rate": 8.14534458057886e-06, "loss": 0.2244, "step": 87220 }, { "epoch": 1.7756743002544528, "grad_norm": 10.479160744506636, "learning_rate": 8.144792188940616e-06, "loss": 0.1573, "step": 87230 }, { "epoch": 1.77587786259542, "grad_norm": 9.88045509943557, "learning_rate": 8.144239733789242e-06, "loss": 0.2426, "step": 87240 }, { "epoch": 1.7760814249363868, "grad_norm": 15.09128521319639, "learning_rate": 8.143687215135898e-06, "loss": 0.2752, "step": 87250 }, { "epoch": 1.7762849872773536, "grad_norm": 16.902407462796315, "learning_rate": 8.143134632991743e-06, "loss": 0.2389, "step": 87260 }, { "epoch": 1.7764885496183207, "grad_norm": 10.373533372018626, "learning_rate": 8.142581987367935e-06, "loss": 0.2081, "step": 87270 }, { "epoch": 1.7766921119592876, "grad_norm": 11.131117968157833, "learning_rate": 8.142029278275635e-06, "loss": 0.1706, "step": 87280 }, { "epoch": 1.7768956743002544, "grad_norm": 7.313382748076017, "learning_rate": 8.141476505726008e-06, "loss": 0.1551, "step": 87290 }, { "epoch": 1.7770992366412215, "grad_norm": 1.6671478287721693, "learning_rate": 8.140923669730215e-06, "loss": 0.1762, "step": 87300 }, { "epoch": 1.7773027989821883, "grad_norm": 4.009454493082197, "learning_rate": 8.140370770299423e-06, "loss": 0.1723, "step": 87310 }, { "epoch": 1.7775063613231552, "grad_norm": 7.128395939687445, "learning_rate": 8.1398178074448e-06, "loss": 0.2623, "step": 87320 }, { "epoch": 1.7777099236641223, "grad_norm": 5.204124914664005, "learning_rate": 8.139264781177515e-06, "loss": 0.1787, "step": 87330 }, { "epoch": 1.777913486005089, "grad_norm": 9.822321859780551, "learning_rate": 8.138711691508729e-06, "loss": 0.1576, "step": 87340 }, { "epoch": 1.778117048346056, "grad_norm": 12.739209576511893, "learning_rate": 8.138158538449622e-06, "loss": 0.1629, "step": 87350 }, { "epoch": 1.7783206106870229, "grad_norm": 14.131100187712313, "learning_rate": 8.137605322011359e-06, "loss": 0.2518, "step": 87360 }, { "epoch": 1.7785241730279897, "grad_norm": 7.94155407252373, "learning_rate": 8.137052042205117e-06, "loss": 0.2605, "step": 87370 }, { "epoch": 1.7787277353689568, "grad_norm": 10.167442302225956, "learning_rate": 8.136498699042067e-06, "loss": 0.2239, "step": 87380 }, { "epoch": 1.7789312977099236, "grad_norm": 7.189927230199704, "learning_rate": 8.135945292533385e-06, "loss": 0.2212, "step": 87390 }, { "epoch": 1.7791348600508905, "grad_norm": 6.813699578618689, "learning_rate": 8.13539182269025e-06, "loss": 0.1487, "step": 87400 }, { "epoch": 1.7793384223918576, "grad_norm": 4.168927292852196, "learning_rate": 8.134838289523838e-06, "loss": 0.2359, "step": 87410 }, { "epoch": 1.7795419847328244, "grad_norm": 6.110647042872015, "learning_rate": 8.134284693045331e-06, "loss": 0.1288, "step": 87420 }, { "epoch": 1.7797455470737913, "grad_norm": 7.084794665924452, "learning_rate": 8.133731033265907e-06, "loss": 0.2014, "step": 87430 }, { "epoch": 1.7799491094147584, "grad_norm": 2.2319013057653674, "learning_rate": 8.13317731019675e-06, "loss": 0.2329, "step": 87440 }, { "epoch": 1.7801526717557252, "grad_norm": 18.797840785001043, "learning_rate": 8.13262352384904e-06, "loss": 0.3232, "step": 87450 }, { "epoch": 1.780356234096692, "grad_norm": 10.272136002423851, "learning_rate": 8.132069674233962e-06, "loss": 0.1491, "step": 87460 }, { "epoch": 1.7805597964376592, "grad_norm": 7.885303858961541, "learning_rate": 8.131515761362704e-06, "loss": 0.2223, "step": 87470 }, { "epoch": 1.7807633587786258, "grad_norm": 3.503972743666383, "learning_rate": 8.130961785246451e-06, "loss": 0.1795, "step": 87480 }, { "epoch": 1.7809669211195929, "grad_norm": 1.7549470622253125, "learning_rate": 8.130407745896394e-06, "loss": 0.1943, "step": 87490 }, { "epoch": 1.78117048346056, "grad_norm": 2.9029164073867983, "learning_rate": 8.12985364332372e-06, "loss": 0.1481, "step": 87500 }, { "epoch": 1.7813740458015266, "grad_norm": 13.173359414309049, "learning_rate": 8.129299477539621e-06, "loss": 0.1737, "step": 87510 }, { "epoch": 1.7815776081424937, "grad_norm": 13.48727384729568, "learning_rate": 8.128745248555288e-06, "loss": 0.2871, "step": 87520 }, { "epoch": 1.7817811704834605, "grad_norm": 12.221120603993711, "learning_rate": 8.128190956381913e-06, "loss": 0.3386, "step": 87530 }, { "epoch": 1.7819847328244274, "grad_norm": 9.916605944121867, "learning_rate": 8.127636601030695e-06, "loss": 0.2731, "step": 87540 }, { "epoch": 1.7821882951653945, "grad_norm": 3.3361191191252453, "learning_rate": 8.127082182512826e-06, "loss": 0.1428, "step": 87550 }, { "epoch": 1.7823918575063613, "grad_norm": 6.0602374724382, "learning_rate": 8.126527700839506e-06, "loss": 0.1845, "step": 87560 }, { "epoch": 1.7825954198473282, "grad_norm": 4.258714304778375, "learning_rate": 8.125973156021932e-06, "loss": 0.3298, "step": 87570 }, { "epoch": 1.7827989821882952, "grad_norm": 17.995199413465134, "learning_rate": 8.125418548071302e-06, "loss": 0.2536, "step": 87580 }, { "epoch": 1.783002544529262, "grad_norm": 0.5453677125396555, "learning_rate": 8.124863876998821e-06, "loss": 0.1254, "step": 87590 }, { "epoch": 1.783206106870229, "grad_norm": 6.884113969770999, "learning_rate": 8.124309142815687e-06, "loss": 0.1362, "step": 87600 }, { "epoch": 1.783409669211196, "grad_norm": 10.586389479819374, "learning_rate": 8.123754345533107e-06, "loss": 0.2616, "step": 87610 }, { "epoch": 1.783613231552163, "grad_norm": 7.528806745675384, "learning_rate": 8.123199485162286e-06, "loss": 0.2041, "step": 87620 }, { "epoch": 1.7838167938931297, "grad_norm": 10.76256933371955, "learning_rate": 8.122644561714428e-06, "loss": 0.1294, "step": 87630 }, { "epoch": 1.7840203562340968, "grad_norm": 4.190827813052342, "learning_rate": 8.12208957520074e-06, "loss": 0.212, "step": 87640 }, { "epoch": 1.7842239185750635, "grad_norm": 5.062139300911908, "learning_rate": 8.121534525632433e-06, "loss": 0.2005, "step": 87650 }, { "epoch": 1.7844274809160305, "grad_norm": 6.058580423133847, "learning_rate": 8.120979413020714e-06, "loss": 0.2266, "step": 87660 }, { "epoch": 1.7846310432569976, "grad_norm": 6.927682980903703, "learning_rate": 8.120424237376797e-06, "loss": 0.1618, "step": 87670 }, { "epoch": 1.7848346055979643, "grad_norm": 7.576406296640156, "learning_rate": 8.119868998711895e-06, "loss": 0.2055, "step": 87680 }, { "epoch": 1.7850381679389313, "grad_norm": 8.176613515046252, "learning_rate": 8.119313697037218e-06, "loss": 0.2411, "step": 87690 }, { "epoch": 1.7852417302798982, "grad_norm": 1.9767271699738562, "learning_rate": 8.118758332363985e-06, "loss": 0.2818, "step": 87700 }, { "epoch": 1.785445292620865, "grad_norm": 6.1017740388864, "learning_rate": 8.11820290470341e-06, "loss": 0.2337, "step": 87710 }, { "epoch": 1.7856488549618321, "grad_norm": 0.2805924610164643, "learning_rate": 8.11764741406671e-06, "loss": 0.2277, "step": 87720 }, { "epoch": 1.785852417302799, "grad_norm": 3.330739072028094, "learning_rate": 8.117091860465107e-06, "loss": 0.2361, "step": 87730 }, { "epoch": 1.7860559796437658, "grad_norm": 0.2603880596358399, "learning_rate": 8.116536243909816e-06, "loss": 0.1807, "step": 87740 }, { "epoch": 1.786259541984733, "grad_norm": 10.206585583693961, "learning_rate": 8.115980564412063e-06, "loss": 0.243, "step": 87750 }, { "epoch": 1.7864631043256998, "grad_norm": 1.9588869296435731, "learning_rate": 8.115424821983072e-06, "loss": 0.1984, "step": 87760 }, { "epoch": 1.7866666666666666, "grad_norm": 12.497385001129697, "learning_rate": 8.114869016634061e-06, "loss": 0.2302, "step": 87770 }, { "epoch": 1.7868702290076337, "grad_norm": 21.37013460050032, "learning_rate": 8.114313148376259e-06, "loss": 0.1937, "step": 87780 }, { "epoch": 1.7870737913486006, "grad_norm": 7.566037809126498, "learning_rate": 8.11375721722089e-06, "loss": 0.1396, "step": 87790 }, { "epoch": 1.7872773536895674, "grad_norm": 9.29953375301455, "learning_rate": 8.113201223179185e-06, "loss": 0.2029, "step": 87800 }, { "epoch": 1.7874809160305345, "grad_norm": 12.234473535931077, "learning_rate": 8.112645166262373e-06, "loss": 0.172, "step": 87810 }, { "epoch": 1.7876844783715011, "grad_norm": 10.183621637569692, "learning_rate": 8.11208904648168e-06, "loss": 0.2274, "step": 87820 }, { "epoch": 1.7878880407124682, "grad_norm": 26.1949104723459, "learning_rate": 8.111532863848343e-06, "loss": 0.2039, "step": 87830 }, { "epoch": 1.788091603053435, "grad_norm": 11.774142379415643, "learning_rate": 8.110976618373591e-06, "loss": 0.4275, "step": 87840 }, { "epoch": 1.788295165394402, "grad_norm": 9.572166898633709, "learning_rate": 8.11042031006866e-06, "loss": 0.1725, "step": 87850 }, { "epoch": 1.788498727735369, "grad_norm": 11.572649867830082, "learning_rate": 8.109863938944783e-06, "loss": 0.2097, "step": 87860 }, { "epoch": 1.7887022900763359, "grad_norm": 3.8519449432136414, "learning_rate": 8.1093075050132e-06, "loss": 0.2568, "step": 87870 }, { "epoch": 1.7889058524173027, "grad_norm": 5.311865763046059, "learning_rate": 8.108751008285145e-06, "loss": 0.1029, "step": 87880 }, { "epoch": 1.7891094147582698, "grad_norm": 8.989852444936282, "learning_rate": 8.108194448771862e-06, "loss": 0.2222, "step": 87890 }, { "epoch": 1.7893129770992366, "grad_norm": 8.869095247202818, "learning_rate": 8.107637826484587e-06, "loss": 0.2548, "step": 87900 }, { "epoch": 1.7895165394402035, "grad_norm": 9.694591104926033, "learning_rate": 8.107081141434563e-06, "loss": 0.1552, "step": 87910 }, { "epoch": 1.7897201017811706, "grad_norm": 10.41463164996771, "learning_rate": 8.106524393633034e-06, "loss": 0.2333, "step": 87920 }, { "epoch": 1.7899236641221374, "grad_norm": 6.18054743233674, "learning_rate": 8.105967583091245e-06, "loss": 0.2064, "step": 87930 }, { "epoch": 1.7901272264631043, "grad_norm": 5.168190335381944, "learning_rate": 8.10541070982044e-06, "loss": 0.2242, "step": 87940 }, { "epoch": 1.7903307888040714, "grad_norm": 6.903980690231518, "learning_rate": 8.104853773831866e-06, "loss": 0.2757, "step": 87950 }, { "epoch": 1.790534351145038, "grad_norm": 8.637329688763558, "learning_rate": 8.104296775136769e-06, "loss": 0.2747, "step": 87960 }, { "epoch": 1.790737913486005, "grad_norm": 3.2907821332404406, "learning_rate": 8.103739713746404e-06, "loss": 0.1864, "step": 87970 }, { "epoch": 1.7909414758269722, "grad_norm": 3.2583295148958884, "learning_rate": 8.103182589672015e-06, "loss": 0.201, "step": 87980 }, { "epoch": 1.7911450381679388, "grad_norm": 9.002800918488866, "learning_rate": 8.102625402924856e-06, "loss": 0.1353, "step": 87990 }, { "epoch": 1.7913486005089059, "grad_norm": 6.759748270521313, "learning_rate": 8.102068153516183e-06, "loss": 0.3324, "step": 88000 }, { "epoch": 1.7915521628498727, "grad_norm": 9.80308894449343, "learning_rate": 8.101510841457247e-06, "loss": 0.3127, "step": 88010 }, { "epoch": 1.7917557251908396, "grad_norm": 4.593630740434637, "learning_rate": 8.100953466759305e-06, "loss": 0.1844, "step": 88020 }, { "epoch": 1.7919592875318067, "grad_norm": 4.318912279768092, "learning_rate": 8.100396029433614e-06, "loss": 0.1956, "step": 88030 }, { "epoch": 1.7921628498727735, "grad_norm": 8.982841522937333, "learning_rate": 8.099838529491433e-06, "loss": 0.1957, "step": 88040 }, { "epoch": 1.7923664122137404, "grad_norm": 6.2762657963119866, "learning_rate": 8.09928096694402e-06, "loss": 0.1376, "step": 88050 }, { "epoch": 1.7925699745547075, "grad_norm": 10.799423279101601, "learning_rate": 8.098723341802635e-06, "loss": 0.2433, "step": 88060 }, { "epoch": 1.7927735368956743, "grad_norm": 3.6664475499887788, "learning_rate": 8.098165654078541e-06, "loss": 0.1951, "step": 88070 }, { "epoch": 1.7929770992366412, "grad_norm": 5.936375121336929, "learning_rate": 8.097607903783e-06, "loss": 0.3182, "step": 88080 }, { "epoch": 1.7931806615776082, "grad_norm": 8.337161753284747, "learning_rate": 8.097050090927278e-06, "loss": 0.1911, "step": 88090 }, { "epoch": 1.793384223918575, "grad_norm": 5.451439615474905, "learning_rate": 8.096492215522644e-06, "loss": 0.2692, "step": 88100 }, { "epoch": 1.793587786259542, "grad_norm": 4.107582965800475, "learning_rate": 8.095934277580357e-06, "loss": 0.2083, "step": 88110 }, { "epoch": 1.793791348600509, "grad_norm": 14.780849240213, "learning_rate": 8.095376277111692e-06, "loss": 0.2424, "step": 88120 }, { "epoch": 1.7939949109414757, "grad_norm": 8.495523345415817, "learning_rate": 8.094818214127918e-06, "loss": 0.2718, "step": 88130 }, { "epoch": 1.7941984732824428, "grad_norm": 16.393444567567734, "learning_rate": 8.0942600886403e-06, "loss": 0.3317, "step": 88140 }, { "epoch": 1.7944020356234098, "grad_norm": 3.44258093163863, "learning_rate": 8.093701900660117e-06, "loss": 0.1963, "step": 88150 }, { "epoch": 1.7946055979643765, "grad_norm": 17.70464699015762, "learning_rate": 8.093143650198638e-06, "loss": 0.2336, "step": 88160 }, { "epoch": 1.7948091603053435, "grad_norm": 13.15428543842534, "learning_rate": 8.09258533726714e-06, "loss": 0.351, "step": 88170 }, { "epoch": 1.7950127226463104, "grad_norm": 10.392280004698202, "learning_rate": 8.092026961876899e-06, "loss": 0.2866, "step": 88180 }, { "epoch": 1.7952162849872773, "grad_norm": 7.336104837537187, "learning_rate": 8.091468524039189e-06, "loss": 0.308, "step": 88190 }, { "epoch": 1.7954198473282443, "grad_norm": 6.899204680946918, "learning_rate": 8.090910023765291e-06, "loss": 0.2539, "step": 88200 }, { "epoch": 1.7956234096692112, "grad_norm": 10.139631338374453, "learning_rate": 8.090351461066483e-06, "loss": 0.2063, "step": 88210 }, { "epoch": 1.795826972010178, "grad_norm": 6.638822947577387, "learning_rate": 8.08979283595405e-06, "loss": 0.1756, "step": 88220 }, { "epoch": 1.7960305343511451, "grad_norm": 12.97676144446544, "learning_rate": 8.089234148439266e-06, "loss": 0.1342, "step": 88230 }, { "epoch": 1.796234096692112, "grad_norm": 7.480029599010827, "learning_rate": 8.088675398533422e-06, "loss": 0.1606, "step": 88240 }, { "epoch": 1.7964376590330788, "grad_norm": 6.8131147794254, "learning_rate": 8.088116586247803e-06, "loss": 0.1928, "step": 88250 }, { "epoch": 1.796641221374046, "grad_norm": 8.36297170409601, "learning_rate": 8.08755771159369e-06, "loss": 0.1947, "step": 88260 }, { "epoch": 1.7968447837150128, "grad_norm": 10.63331808430747, "learning_rate": 8.086998774582372e-06, "loss": 0.228, "step": 88270 }, { "epoch": 1.7970483460559796, "grad_norm": 8.168044681947942, "learning_rate": 8.086439775225137e-06, "loss": 0.2661, "step": 88280 }, { "epoch": 1.7972519083969467, "grad_norm": 16.95797353941469, "learning_rate": 8.085880713533276e-06, "loss": 0.2444, "step": 88290 }, { "epoch": 1.7974554707379133, "grad_norm": 5.7329803037958, "learning_rate": 8.08532158951808e-06, "loss": 0.2235, "step": 88300 }, { "epoch": 1.7976590330788804, "grad_norm": 10.20216846255315, "learning_rate": 8.08476240319084e-06, "loss": 0.1643, "step": 88310 }, { "epoch": 1.7978625954198473, "grad_norm": 8.471641802383564, "learning_rate": 8.084203154562848e-06, "loss": 0.1556, "step": 88320 }, { "epoch": 1.7980661577608141, "grad_norm": 8.334425838451626, "learning_rate": 8.083643843645404e-06, "loss": 0.367, "step": 88330 }, { "epoch": 1.7982697201017812, "grad_norm": 8.697094621850882, "learning_rate": 8.083084470449802e-06, "loss": 0.2344, "step": 88340 }, { "epoch": 1.798473282442748, "grad_norm": 5.3598910797761175, "learning_rate": 8.082525034987336e-06, "loss": 0.2703, "step": 88350 }, { "epoch": 1.798676844783715, "grad_norm": 8.251870455111128, "learning_rate": 8.081965537269304e-06, "loss": 0.2159, "step": 88360 }, { "epoch": 1.798880407124682, "grad_norm": 12.395911599428892, "learning_rate": 8.081405977307013e-06, "loss": 0.2144, "step": 88370 }, { "epoch": 1.7990839694656489, "grad_norm": 10.024352020928308, "learning_rate": 8.080846355111758e-06, "loss": 0.1972, "step": 88380 }, { "epoch": 1.7992875318066157, "grad_norm": 4.550136183890463, "learning_rate": 8.080286670694843e-06, "loss": 0.1471, "step": 88390 }, { "epoch": 1.7994910941475828, "grad_norm": 10.851005550815811, "learning_rate": 8.079726924067571e-06, "loss": 0.1711, "step": 88400 }, { "epoch": 1.7996946564885496, "grad_norm": 4.458954130783601, "learning_rate": 8.079167115241247e-06, "loss": 0.1568, "step": 88410 }, { "epoch": 1.7998982188295165, "grad_norm": 10.084301554049404, "learning_rate": 8.078607244227176e-06, "loss": 0.2539, "step": 88420 }, { "epoch": 1.8001017811704836, "grad_norm": 30.58764395896339, "learning_rate": 8.078047311036668e-06, "loss": 0.2745, "step": 88430 }, { "epoch": 1.8003053435114502, "grad_norm": 4.661216170170006, "learning_rate": 8.077487315681029e-06, "loss": 0.1755, "step": 88440 }, { "epoch": 1.8005089058524173, "grad_norm": 5.601735835557226, "learning_rate": 8.07692725817157e-06, "loss": 0.2232, "step": 88450 }, { "epoch": 1.8007124681933844, "grad_norm": 7.571296309067121, "learning_rate": 8.076367138519603e-06, "loss": 0.1809, "step": 88460 }, { "epoch": 1.800916030534351, "grad_norm": 21.9834610755429, "learning_rate": 8.075806956736438e-06, "loss": 0.1677, "step": 88470 }, { "epoch": 1.801119592875318, "grad_norm": 11.368620107762657, "learning_rate": 8.075246712833392e-06, "loss": 0.3098, "step": 88480 }, { "epoch": 1.801323155216285, "grad_norm": 6.196108495754621, "learning_rate": 8.074686406821775e-06, "loss": 0.2519, "step": 88490 }, { "epoch": 1.8015267175572518, "grad_norm": 6.826160715942246, "learning_rate": 8.074126038712908e-06, "loss": 0.1712, "step": 88500 }, { "epoch": 1.8017302798982189, "grad_norm": 1.122307752653586, "learning_rate": 8.073565608518106e-06, "loss": 0.1395, "step": 88510 }, { "epoch": 1.8019338422391857, "grad_norm": 9.102106481787553, "learning_rate": 8.073005116248686e-06, "loss": 0.2347, "step": 88520 }, { "epoch": 1.8021374045801526, "grad_norm": 10.297857428576387, "learning_rate": 8.072444561915971e-06, "loss": 0.3068, "step": 88530 }, { "epoch": 1.8023409669211197, "grad_norm": 18.21438151987113, "learning_rate": 8.07188394553128e-06, "loss": 0.2243, "step": 88540 }, { "epoch": 1.8025445292620865, "grad_norm": 15.198366567833522, "learning_rate": 8.071323267105937e-06, "loss": 0.2519, "step": 88550 }, { "epoch": 1.8027480916030534, "grad_norm": 2.5871939923495924, "learning_rate": 8.070762526651266e-06, "loss": 0.178, "step": 88560 }, { "epoch": 1.8029516539440205, "grad_norm": 15.453926604138044, "learning_rate": 8.070201724178588e-06, "loss": 0.1876, "step": 88570 }, { "epoch": 1.8031552162849873, "grad_norm": 10.854814540789995, "learning_rate": 8.069640859699233e-06, "loss": 0.2045, "step": 88580 }, { "epoch": 1.8033587786259542, "grad_norm": 10.813945219138251, "learning_rate": 8.069079933224528e-06, "loss": 0.1967, "step": 88590 }, { "epoch": 1.8035623409669213, "grad_norm": 5.454792384517995, "learning_rate": 8.068518944765799e-06, "loss": 0.0796, "step": 88600 }, { "epoch": 1.8037659033078879, "grad_norm": 3.2962241268314023, "learning_rate": 8.06795789433438e-06, "loss": 0.1653, "step": 88610 }, { "epoch": 1.803969465648855, "grad_norm": 10.136861479546868, "learning_rate": 8.067396781941598e-06, "loss": 0.1649, "step": 88620 }, { "epoch": 1.804173027989822, "grad_norm": 8.200096826229432, "learning_rate": 8.06683560759879e-06, "loss": 0.2565, "step": 88630 }, { "epoch": 1.8043765903307887, "grad_norm": 1.913839190729071, "learning_rate": 8.066274371317284e-06, "loss": 0.2555, "step": 88640 }, { "epoch": 1.8045801526717558, "grad_norm": 8.674890788340749, "learning_rate": 8.065713073108417e-06, "loss": 0.1667, "step": 88650 }, { "epoch": 1.8047837150127226, "grad_norm": 8.179672267004674, "learning_rate": 8.065151712983529e-06, "loss": 0.2113, "step": 88660 }, { "epoch": 1.8049872773536895, "grad_norm": 6.462807984918819, "learning_rate": 8.06459029095395e-06, "loss": 0.162, "step": 88670 }, { "epoch": 1.8051908396946565, "grad_norm": 5.9781317660134965, "learning_rate": 8.064028807031026e-06, "loss": 0.1675, "step": 88680 }, { "epoch": 1.8053944020356234, "grad_norm": 14.82017438337446, "learning_rate": 8.063467261226094e-06, "loss": 0.28, "step": 88690 }, { "epoch": 1.8055979643765903, "grad_norm": 14.115692501178907, "learning_rate": 8.062905653550494e-06, "loss": 0.2623, "step": 88700 }, { "epoch": 1.8058015267175573, "grad_norm": 9.712409169895603, "learning_rate": 8.06234398401557e-06, "loss": 0.2573, "step": 88710 }, { "epoch": 1.8060050890585242, "grad_norm": 9.447732531473084, "learning_rate": 8.061782252632664e-06, "loss": 0.1917, "step": 88720 }, { "epoch": 1.806208651399491, "grad_norm": 12.244563122694734, "learning_rate": 8.061220459413121e-06, "loss": 0.2185, "step": 88730 }, { "epoch": 1.8064122137404581, "grad_norm": 3.7342894338269996, "learning_rate": 8.060658604368292e-06, "loss": 0.2966, "step": 88740 }, { "epoch": 1.806615776081425, "grad_norm": 5.378350332997276, "learning_rate": 8.060096687509516e-06, "loss": 0.1668, "step": 88750 }, { "epoch": 1.8068193384223918, "grad_norm": 10.49603163333246, "learning_rate": 8.059534708848149e-06, "loss": 0.1975, "step": 88760 }, { "epoch": 1.807022900763359, "grad_norm": 13.525123085677995, "learning_rate": 8.058972668395535e-06, "loss": 0.1867, "step": 88770 }, { "epoch": 1.8072264631043256, "grad_norm": 11.132649460131825, "learning_rate": 8.05841056616303e-06, "loss": 0.2417, "step": 88780 }, { "epoch": 1.8074300254452926, "grad_norm": 1.5323020548804986, "learning_rate": 8.057848402161986e-06, "loss": 0.1708, "step": 88790 }, { "epoch": 1.8076335877862595, "grad_norm": 13.723255152941897, "learning_rate": 8.05728617640375e-06, "loss": 0.1978, "step": 88800 }, { "epoch": 1.8078371501272263, "grad_norm": 18.78912128532011, "learning_rate": 8.056723888899687e-06, "loss": 0.2444, "step": 88810 }, { "epoch": 1.8080407124681934, "grad_norm": 12.590406034718441, "learning_rate": 8.056161539661146e-06, "loss": 0.3116, "step": 88820 }, { "epoch": 1.8082442748091603, "grad_norm": 6.643357442163541, "learning_rate": 8.055599128699487e-06, "loss": 0.1973, "step": 88830 }, { "epoch": 1.8084478371501271, "grad_norm": 8.151693700785192, "learning_rate": 8.055036656026067e-06, "loss": 0.2302, "step": 88840 }, { "epoch": 1.8086513994910942, "grad_norm": 5.464258619995802, "learning_rate": 8.054474121652247e-06, "loss": 0.1471, "step": 88850 }, { "epoch": 1.808854961832061, "grad_norm": 11.716141028009, "learning_rate": 8.053911525589388e-06, "loss": 0.2325, "step": 88860 }, { "epoch": 1.809058524173028, "grad_norm": 4.959636517885903, "learning_rate": 8.053348867848854e-06, "loss": 0.2355, "step": 88870 }, { "epoch": 1.809262086513995, "grad_norm": 10.019405911169128, "learning_rate": 8.052786148442006e-06, "loss": 0.231, "step": 88880 }, { "epoch": 1.8094656488549619, "grad_norm": 9.719195545149736, "learning_rate": 8.052223367380209e-06, "loss": 0.1512, "step": 88890 }, { "epoch": 1.8096692111959287, "grad_norm": 13.909860382481146, "learning_rate": 8.051660524674828e-06, "loss": 0.2836, "step": 88900 }, { "epoch": 1.8098727735368958, "grad_norm": 11.531775383150453, "learning_rate": 8.051097620337235e-06, "loss": 0.279, "step": 88910 }, { "epoch": 1.8100763358778624, "grad_norm": 4.310846744411394, "learning_rate": 8.050534654378796e-06, "loss": 0.1308, "step": 88920 }, { "epoch": 1.8102798982188295, "grad_norm": 12.239910258310031, "learning_rate": 8.04997162681088e-06, "loss": 0.2139, "step": 88930 }, { "epoch": 1.8104834605597966, "grad_norm": 17.017570082892476, "learning_rate": 8.049408537644858e-06, "loss": 0.3269, "step": 88940 }, { "epoch": 1.8106870229007632, "grad_norm": 1.069460605557961, "learning_rate": 8.048845386892105e-06, "loss": 0.1517, "step": 88950 }, { "epoch": 1.8108905852417303, "grad_norm": 9.444004889686402, "learning_rate": 8.04828217456399e-06, "loss": 0.2643, "step": 88960 }, { "epoch": 1.8110941475826972, "grad_norm": 12.535712174867804, "learning_rate": 8.047718900671893e-06, "loss": 0.1946, "step": 88970 }, { "epoch": 1.811297709923664, "grad_norm": 16.088339373480654, "learning_rate": 8.047155565227185e-06, "loss": 0.2489, "step": 88980 }, { "epoch": 1.811501272264631, "grad_norm": 15.697087000248562, "learning_rate": 8.046592168241245e-06, "loss": 0.2598, "step": 88990 }, { "epoch": 1.811704834605598, "grad_norm": 5.764284395089613, "learning_rate": 8.046028709725454e-06, "loss": 0.2695, "step": 89000 }, { "epoch": 1.8119083969465648, "grad_norm": 13.26469845811457, "learning_rate": 8.045465189691191e-06, "loss": 0.2668, "step": 89010 }, { "epoch": 1.8121119592875319, "grad_norm": 10.419809384495522, "learning_rate": 8.044901608149833e-06, "loss": 0.2511, "step": 89020 }, { "epoch": 1.8123155216284987, "grad_norm": 7.636182841054921, "learning_rate": 8.044337965112767e-06, "loss": 0.1917, "step": 89030 }, { "epoch": 1.8125190839694656, "grad_norm": 9.028569278001351, "learning_rate": 8.043774260591375e-06, "loss": 0.216, "step": 89040 }, { "epoch": 1.8127226463104327, "grad_norm": 3.677509687678829, "learning_rate": 8.043210494597041e-06, "loss": 0.1927, "step": 89050 }, { "epoch": 1.8129262086513995, "grad_norm": 10.324105472004803, "learning_rate": 8.04264666714115e-06, "loss": 0.2844, "step": 89060 }, { "epoch": 1.8131297709923664, "grad_norm": 0.3880185412872654, "learning_rate": 8.042082778235091e-06, "loss": 0.1347, "step": 89070 }, { "epoch": 1.8133333333333335, "grad_norm": 16.16204262649038, "learning_rate": 8.041518827890255e-06, "loss": 0.2634, "step": 89080 }, { "epoch": 1.8135368956743, "grad_norm": 13.093417732039297, "learning_rate": 8.040954816118027e-06, "loss": 0.1957, "step": 89090 }, { "epoch": 1.8137404580152672, "grad_norm": 6.084429868549191, "learning_rate": 8.040390742929799e-06, "loss": 0.2198, "step": 89100 }, { "epoch": 1.8139440203562343, "grad_norm": 3.6549955767598177, "learning_rate": 8.039826608336965e-06, "loss": 0.1988, "step": 89110 }, { "epoch": 1.8141475826972009, "grad_norm": 5.194799740928172, "learning_rate": 8.039262412350916e-06, "loss": 0.1879, "step": 89120 }, { "epoch": 1.814351145038168, "grad_norm": 9.19263672880008, "learning_rate": 8.03869815498305e-06, "loss": 0.1816, "step": 89130 }, { "epoch": 1.8145547073791348, "grad_norm": 7.420240393560342, "learning_rate": 8.03813383624476e-06, "loss": 0.254, "step": 89140 }, { "epoch": 1.8147582697201017, "grad_norm": 9.906042362656285, "learning_rate": 8.037569456147443e-06, "loss": 0.209, "step": 89150 }, { "epoch": 1.8149618320610688, "grad_norm": 9.304623858449311, "learning_rate": 8.0370050147025e-06, "loss": 0.17, "step": 89160 }, { "epoch": 1.8151653944020356, "grad_norm": 0.4435923021317412, "learning_rate": 8.036440511921329e-06, "loss": 0.2265, "step": 89170 }, { "epoch": 1.8153689567430025, "grad_norm": 8.951387862244001, "learning_rate": 8.03587594781533e-06, "loss": 0.2243, "step": 89180 }, { "epoch": 1.8155725190839695, "grad_norm": 12.93854384848582, "learning_rate": 8.035311322395906e-06, "loss": 0.196, "step": 89190 }, { "epoch": 1.8157760814249364, "grad_norm": 5.293071495625054, "learning_rate": 8.034746635674461e-06, "loss": 0.2114, "step": 89200 }, { "epoch": 1.8159796437659033, "grad_norm": 15.759862368272831, "learning_rate": 8.0341818876624e-06, "loss": 0.1708, "step": 89210 }, { "epoch": 1.8161832061068703, "grad_norm": 9.720240543823456, "learning_rate": 8.033617078371126e-06, "loss": 0.2407, "step": 89220 }, { "epoch": 1.8163867684478372, "grad_norm": 3.296407277342413, "learning_rate": 8.033052207812049e-06, "loss": 0.18, "step": 89230 }, { "epoch": 1.816590330788804, "grad_norm": 6.120871626256587, "learning_rate": 8.032487275996576e-06, "loss": 0.1613, "step": 89240 }, { "epoch": 1.8167938931297711, "grad_norm": 5.063454660275074, "learning_rate": 8.031922282936117e-06, "loss": 0.176, "step": 89250 }, { "epoch": 1.8169974554707378, "grad_norm": 13.476099324622016, "learning_rate": 8.03135722864208e-06, "loss": 0.2245, "step": 89260 }, { "epoch": 1.8172010178117048, "grad_norm": 2.6977358691686226, "learning_rate": 8.030792113125883e-06, "loss": 0.229, "step": 89270 }, { "epoch": 1.8174045801526717, "grad_norm": 0.8941648182766969, "learning_rate": 8.030226936398932e-06, "loss": 0.1183, "step": 89280 }, { "epoch": 1.8176081424936386, "grad_norm": 17.028323181159884, "learning_rate": 8.029661698472649e-06, "loss": 0.1399, "step": 89290 }, { "epoch": 1.8178117048346056, "grad_norm": 13.489801828350288, "learning_rate": 8.029096399358443e-06, "loss": 0.3183, "step": 89300 }, { "epoch": 1.8180152671755725, "grad_norm": 7.892982131256119, "learning_rate": 8.028531039067734e-06, "loss": 0.2663, "step": 89310 }, { "epoch": 1.8182188295165393, "grad_norm": 17.233499721068426, "learning_rate": 8.027965617611942e-06, "loss": 0.1778, "step": 89320 }, { "epoch": 1.8184223918575064, "grad_norm": 4.018548188825588, "learning_rate": 8.027400135002482e-06, "loss": 0.2432, "step": 89330 }, { "epoch": 1.8186259541984733, "grad_norm": 4.031100882045738, "learning_rate": 8.026834591250778e-06, "loss": 0.2541, "step": 89340 }, { "epoch": 1.8188295165394401, "grad_norm": 7.2244321711311175, "learning_rate": 8.026268986368251e-06, "loss": 0.2413, "step": 89350 }, { "epoch": 1.8190330788804072, "grad_norm": 9.056274470649893, "learning_rate": 8.025703320366323e-06, "loss": 0.2026, "step": 89360 }, { "epoch": 1.819236641221374, "grad_norm": 3.1044771033293768, "learning_rate": 8.02513759325642e-06, "loss": 0.19, "step": 89370 }, { "epoch": 1.819440203562341, "grad_norm": 0.5183364864960633, "learning_rate": 8.024571805049968e-06, "loss": 0.1735, "step": 89380 }, { "epoch": 1.819643765903308, "grad_norm": 21.591275636357214, "learning_rate": 8.024005955758391e-06, "loss": 0.1308, "step": 89390 }, { "epoch": 1.8198473282442749, "grad_norm": 10.312334035666517, "learning_rate": 8.02344004539312e-06, "loss": 0.1643, "step": 89400 }, { "epoch": 1.8200508905852417, "grad_norm": 15.371667738997948, "learning_rate": 8.022874073965583e-06, "loss": 0.2381, "step": 89410 }, { "epoch": 1.8202544529262088, "grad_norm": 13.188130625965249, "learning_rate": 8.022308041487211e-06, "loss": 0.3003, "step": 89420 }, { "epoch": 1.8204580152671754, "grad_norm": 4.960151324554191, "learning_rate": 8.021741947969434e-06, "loss": 0.2466, "step": 89430 }, { "epoch": 1.8206615776081425, "grad_norm": 9.256335879584725, "learning_rate": 8.021175793423689e-06, "loss": 0.2287, "step": 89440 }, { "epoch": 1.8208651399491094, "grad_norm": 9.98367828604154, "learning_rate": 8.020609577861405e-06, "loss": 0.236, "step": 89450 }, { "epoch": 1.8210687022900762, "grad_norm": 2.6348146873331912, "learning_rate": 8.020043301294021e-06, "loss": 0.2082, "step": 89460 }, { "epoch": 1.8212722646310433, "grad_norm": 5.522096365223071, "learning_rate": 8.019476963732972e-06, "loss": 0.2921, "step": 89470 }, { "epoch": 1.8214758269720102, "grad_norm": 16.05260914035635, "learning_rate": 8.018910565189698e-06, "loss": 0.2622, "step": 89480 }, { "epoch": 1.821679389312977, "grad_norm": 4.230785123660706, "learning_rate": 8.018344105675635e-06, "loss": 0.1786, "step": 89490 }, { "epoch": 1.821882951653944, "grad_norm": 17.544895931351352, "learning_rate": 8.017777585202226e-06, "loss": 0.1956, "step": 89500 }, { "epoch": 1.822086513994911, "grad_norm": 11.017680258194696, "learning_rate": 8.017211003780913e-06, "loss": 0.1664, "step": 89510 }, { "epoch": 1.8222900763358778, "grad_norm": 8.326484309605394, "learning_rate": 8.016644361423135e-06, "loss": 0.2243, "step": 89520 }, { "epoch": 1.8224936386768449, "grad_norm": 14.412088920624427, "learning_rate": 8.016077658140343e-06, "loss": 0.2572, "step": 89530 }, { "epoch": 1.8226972010178117, "grad_norm": 24.49243693807568, "learning_rate": 8.015510893943974e-06, "loss": 0.2412, "step": 89540 }, { "epoch": 1.8229007633587786, "grad_norm": 14.323162967317062, "learning_rate": 8.01494406884548e-06, "loss": 0.1741, "step": 89550 }, { "epoch": 1.8231043256997457, "grad_norm": 14.030057411907602, "learning_rate": 8.014377182856308e-06, "loss": 0.2804, "step": 89560 }, { "epoch": 1.8233078880407123, "grad_norm": 0.2764151065540924, "learning_rate": 8.013810235987906e-06, "loss": 0.2695, "step": 89570 }, { "epoch": 1.8235114503816794, "grad_norm": 10.42704224060919, "learning_rate": 8.013243228251724e-06, "loss": 0.1513, "step": 89580 }, { "epoch": 1.8237150127226465, "grad_norm": 9.597295163461908, "learning_rate": 8.012676159659214e-06, "loss": 0.2022, "step": 89590 }, { "epoch": 1.823918575063613, "grad_norm": 21.05682754155128, "learning_rate": 8.01210903022183e-06, "loss": 0.2909, "step": 89600 }, { "epoch": 1.8241221374045802, "grad_norm": 3.8222552927101967, "learning_rate": 8.011541839951023e-06, "loss": 0.1155, "step": 89610 }, { "epoch": 1.824325699745547, "grad_norm": 18.383951026285086, "learning_rate": 8.010974588858249e-06, "loss": 0.2246, "step": 89620 }, { "epoch": 1.824529262086514, "grad_norm": 41.56276262654885, "learning_rate": 8.010407276954965e-06, "loss": 0.2242, "step": 89630 }, { "epoch": 1.824732824427481, "grad_norm": 20.75219557720523, "learning_rate": 8.00983990425263e-06, "loss": 0.2036, "step": 89640 }, { "epoch": 1.8249363867684478, "grad_norm": 10.749355954454607, "learning_rate": 8.009272470762704e-06, "loss": 0.186, "step": 89650 }, { "epoch": 1.8251399491094147, "grad_norm": 13.32134324520364, "learning_rate": 8.00870497649664e-06, "loss": 0.2158, "step": 89660 }, { "epoch": 1.8253435114503818, "grad_norm": 20.394397226849033, "learning_rate": 8.008137421465907e-06, "loss": 0.1738, "step": 89670 }, { "epoch": 1.8255470737913486, "grad_norm": 8.93302237021329, "learning_rate": 8.007569805681962e-06, "loss": 0.1957, "step": 89680 }, { "epoch": 1.8257506361323155, "grad_norm": 4.884302104436794, "learning_rate": 8.007002129156272e-06, "loss": 0.2514, "step": 89690 }, { "epoch": 1.8259541984732826, "grad_norm": 39.77709536394515, "learning_rate": 8.006434391900301e-06, "loss": 0.2196, "step": 89700 }, { "epoch": 1.8261577608142494, "grad_norm": 17.353250385348606, "learning_rate": 8.005866593925516e-06, "loss": 0.2589, "step": 89710 }, { "epoch": 1.8263613231552163, "grad_norm": 10.5230583121305, "learning_rate": 8.005298735243383e-06, "loss": 0.2497, "step": 89720 }, { "epoch": 1.8265648854961833, "grad_norm": 18.31176062028436, "learning_rate": 8.00473081586537e-06, "loss": 0.1912, "step": 89730 }, { "epoch": 1.82676844783715, "grad_norm": 2.5619003603208212, "learning_rate": 8.00416283580295e-06, "loss": 0.2413, "step": 89740 }, { "epoch": 1.826972010178117, "grad_norm": 9.454124044363954, "learning_rate": 8.00359479506759e-06, "loss": 0.2013, "step": 89750 }, { "epoch": 1.8271755725190841, "grad_norm": 21.04001074044028, "learning_rate": 8.003026693670765e-06, "loss": 0.2827, "step": 89760 }, { "epoch": 1.8273791348600508, "grad_norm": 9.621036280750788, "learning_rate": 8.002458531623951e-06, "loss": 0.1724, "step": 89770 }, { "epoch": 1.8275826972010178, "grad_norm": 0.2878647157901442, "learning_rate": 8.001890308938618e-06, "loss": 0.1831, "step": 89780 }, { "epoch": 1.8277862595419847, "grad_norm": 1.8131799552979317, "learning_rate": 8.001322025626244e-06, "loss": 0.2627, "step": 89790 }, { "epoch": 1.8279898218829516, "grad_norm": 8.147720332775169, "learning_rate": 8.000753681698304e-06, "loss": 0.1805, "step": 89800 }, { "epoch": 1.8281933842239186, "grad_norm": 10.8640441196945, "learning_rate": 8.000185277166281e-06, "loss": 0.2105, "step": 89810 }, { "epoch": 1.8283969465648855, "grad_norm": 0.6965105275504536, "learning_rate": 7.999616812041651e-06, "loss": 0.1829, "step": 89820 }, { "epoch": 1.8286005089058524, "grad_norm": 17.3887725147394, "learning_rate": 7.999048286335897e-06, "loss": 0.1935, "step": 89830 }, { "epoch": 1.8288040712468194, "grad_norm": 1.8466302319760632, "learning_rate": 7.998479700060499e-06, "loss": 0.2024, "step": 89840 }, { "epoch": 1.8290076335877863, "grad_norm": 21.04917642676172, "learning_rate": 7.99791105322694e-06, "loss": 0.2186, "step": 89850 }, { "epoch": 1.8292111959287531, "grad_norm": 2.3902209428479018, "learning_rate": 7.997342345846709e-06, "loss": 0.2109, "step": 89860 }, { "epoch": 1.8294147582697202, "grad_norm": 5.69169707985216, "learning_rate": 7.996773577931289e-06, "loss": 0.2381, "step": 89870 }, { "epoch": 1.829618320610687, "grad_norm": 0.14669979355960802, "learning_rate": 7.996204749492164e-06, "loss": 0.1738, "step": 89880 }, { "epoch": 1.829821882951654, "grad_norm": 3.021072901704917, "learning_rate": 7.995635860540827e-06, "loss": 0.2865, "step": 89890 }, { "epoch": 1.830025445292621, "grad_norm": 31.746227518402584, "learning_rate": 7.995066911088765e-06, "loss": 0.2508, "step": 89900 }, { "epoch": 1.8302290076335876, "grad_norm": 11.433032575002137, "learning_rate": 7.994497901147468e-06, "loss": 0.2057, "step": 89910 }, { "epoch": 1.8304325699745547, "grad_norm": 8.944743614628146, "learning_rate": 7.993928830728428e-06, "loss": 0.1978, "step": 89920 }, { "epoch": 1.8306361323155216, "grad_norm": 6.165129971947981, "learning_rate": 7.993359699843143e-06, "loss": 0.3086, "step": 89930 }, { "epoch": 1.8308396946564884, "grad_norm": 10.616654313020183, "learning_rate": 7.992790508503101e-06, "loss": 0.287, "step": 89940 }, { "epoch": 1.8310432569974555, "grad_norm": 22.97448546731865, "learning_rate": 7.992221256719801e-06, "loss": 0.2445, "step": 89950 }, { "epoch": 1.8312468193384224, "grad_norm": 12.926183893156008, "learning_rate": 7.991651944504736e-06, "loss": 0.2449, "step": 89960 }, { "epoch": 1.8314503816793892, "grad_norm": 6.867575154618464, "learning_rate": 7.99108257186941e-06, "loss": 0.235, "step": 89970 }, { "epoch": 1.8316539440203563, "grad_norm": 5.763671745368245, "learning_rate": 7.990513138825318e-06, "loss": 0.2094, "step": 89980 }, { "epoch": 1.8318575063613232, "grad_norm": 11.370766633260297, "learning_rate": 7.989943645383961e-06, "loss": 0.2597, "step": 89990 }, { "epoch": 1.83206106870229, "grad_norm": 4.050031242525744, "learning_rate": 7.989374091556842e-06, "loss": 0.2114, "step": 90000 }, { "epoch": 1.832264631043257, "grad_norm": 6.7327323760063065, "learning_rate": 7.988804477355463e-06, "loss": 0.2097, "step": 90010 }, { "epoch": 1.832468193384224, "grad_norm": 11.621611735589585, "learning_rate": 7.988234802791328e-06, "loss": 0.2093, "step": 90020 }, { "epoch": 1.8326717557251908, "grad_norm": 9.370231726677515, "learning_rate": 7.98766506787594e-06, "loss": 0.2353, "step": 90030 }, { "epoch": 1.8328753180661579, "grad_norm": 8.470030547836027, "learning_rate": 7.98709527262081e-06, "loss": 0.2104, "step": 90040 }, { "epoch": 1.8330788804071245, "grad_norm": 8.266125171864585, "learning_rate": 7.986525417037443e-06, "loss": 0.2015, "step": 90050 }, { "epoch": 1.8332824427480916, "grad_norm": 0.04063924345728876, "learning_rate": 7.98595550113735e-06, "loss": 0.272, "step": 90060 }, { "epoch": 1.8334860050890587, "grad_norm": 3.8732079757982536, "learning_rate": 7.985385524932039e-06, "loss": 0.1985, "step": 90070 }, { "epoch": 1.8336895674300253, "grad_norm": 0.49545742433563555, "learning_rate": 7.98481548843302e-06, "loss": 0.1841, "step": 90080 }, { "epoch": 1.8338931297709924, "grad_norm": 16.070121233038225, "learning_rate": 7.984245391651811e-06, "loss": 0.2773, "step": 90090 }, { "epoch": 1.8340966921119592, "grad_norm": 12.406767858970138, "learning_rate": 7.98367523459992e-06, "loss": 0.2374, "step": 90100 }, { "epoch": 1.834300254452926, "grad_norm": 15.693744112573604, "learning_rate": 7.983105017288866e-06, "loss": 0.2358, "step": 90110 }, { "epoch": 1.8345038167938932, "grad_norm": 13.35659581918639, "learning_rate": 7.982534739730165e-06, "loss": 0.2818, "step": 90120 }, { "epoch": 1.83470737913486, "grad_norm": 8.44128825332605, "learning_rate": 7.981964401935332e-06, "loss": 0.1702, "step": 90130 }, { "epoch": 1.834910941475827, "grad_norm": 9.588130382217138, "learning_rate": 7.981394003915887e-06, "loss": 0.2512, "step": 90140 }, { "epoch": 1.835114503816794, "grad_norm": 9.630395339055974, "learning_rate": 7.98082354568335e-06, "loss": 0.1845, "step": 90150 }, { "epoch": 1.8353180661577608, "grad_norm": 10.599862140167167, "learning_rate": 7.980253027249243e-06, "loss": 0.2991, "step": 90160 }, { "epoch": 1.8355216284987277, "grad_norm": 10.701046606512111, "learning_rate": 7.979682448625088e-06, "loss": 0.1699, "step": 90170 }, { "epoch": 1.8357251908396948, "grad_norm": 7.560343052308402, "learning_rate": 7.979111809822407e-06, "loss": 0.2001, "step": 90180 }, { "epoch": 1.8359287531806616, "grad_norm": 12.287048980147631, "learning_rate": 7.978541110852727e-06, "loss": 0.2505, "step": 90190 }, { "epoch": 1.8361323155216285, "grad_norm": 2.000960051827636, "learning_rate": 7.977970351727572e-06, "loss": 0.1582, "step": 90200 }, { "epoch": 1.8363358778625956, "grad_norm": 9.735090031798805, "learning_rate": 7.97739953245847e-06, "loss": 0.2499, "step": 90210 }, { "epoch": 1.8365394402035622, "grad_norm": 8.822024183469912, "learning_rate": 7.976828653056949e-06, "loss": 0.1946, "step": 90220 }, { "epoch": 1.8367430025445293, "grad_norm": 7.734417792532642, "learning_rate": 7.976257713534542e-06, "loss": 0.1744, "step": 90230 }, { "epoch": 1.8369465648854963, "grad_norm": 7.807635958363519, "learning_rate": 7.975686713902775e-06, "loss": 0.1774, "step": 90240 }, { "epoch": 1.837150127226463, "grad_norm": 4.541803704981609, "learning_rate": 7.975115654173183e-06, "loss": 0.218, "step": 90250 }, { "epoch": 1.83735368956743, "grad_norm": 8.234870803951337, "learning_rate": 7.974544534357298e-06, "loss": 0.2169, "step": 90260 }, { "epoch": 1.837557251908397, "grad_norm": 15.470121612005185, "learning_rate": 7.973973354466652e-06, "loss": 0.2357, "step": 90270 }, { "epoch": 1.8377608142493638, "grad_norm": 4.292107073271944, "learning_rate": 7.973402114512789e-06, "loss": 0.2296, "step": 90280 }, { "epoch": 1.8379643765903309, "grad_norm": 12.546808874880975, "learning_rate": 7.972830814507238e-06, "loss": 0.2828, "step": 90290 }, { "epoch": 1.8381679389312977, "grad_norm": 5.109280529436588, "learning_rate": 7.97225945446154e-06, "loss": 0.1203, "step": 90300 }, { "epoch": 1.8383715012722646, "grad_norm": 6.776227253649361, "learning_rate": 7.971688034387235e-06, "loss": 0.2277, "step": 90310 }, { "epoch": 1.8385750636132316, "grad_norm": 13.772473898755845, "learning_rate": 7.971116554295861e-06, "loss": 0.1677, "step": 90320 }, { "epoch": 1.8387786259541985, "grad_norm": 0.2877150677219119, "learning_rate": 7.97054501419896e-06, "loss": 0.2255, "step": 90330 }, { "epoch": 1.8389821882951654, "grad_norm": 17.157882090109762, "learning_rate": 7.969973414108078e-06, "loss": 0.1671, "step": 90340 }, { "epoch": 1.8391857506361324, "grad_norm": 5.210940909967344, "learning_rate": 7.96940175403476e-06, "loss": 0.2421, "step": 90350 }, { "epoch": 1.8393893129770993, "grad_norm": 10.276058827327057, "learning_rate": 7.968830033990547e-06, "loss": 0.106, "step": 90360 }, { "epoch": 1.8395928753180661, "grad_norm": 6.1784258284953815, "learning_rate": 7.968258253986988e-06, "loss": 0.2628, "step": 90370 }, { "epoch": 1.8397964376590332, "grad_norm": 8.781604193170342, "learning_rate": 7.967686414035629e-06, "loss": 0.2391, "step": 90380 }, { "epoch": 1.8399999999999999, "grad_norm": 7.130920067485972, "learning_rate": 7.967114514148022e-06, "loss": 0.2397, "step": 90390 }, { "epoch": 1.840203562340967, "grad_norm": 13.207809968008116, "learning_rate": 7.966542554335715e-06, "loss": 0.199, "step": 90400 }, { "epoch": 1.8404071246819338, "grad_norm": 2.3990100445057663, "learning_rate": 7.965970534610262e-06, "loss": 0.2289, "step": 90410 }, { "epoch": 1.8406106870229006, "grad_norm": 1.2514314589472917, "learning_rate": 7.965398454983212e-06, "loss": 0.1912, "step": 90420 }, { "epoch": 1.8408142493638677, "grad_norm": 8.910226613241024, "learning_rate": 7.964826315466122e-06, "loss": 0.1853, "step": 90430 }, { "epoch": 1.8410178117048346, "grad_norm": 11.410761170524813, "learning_rate": 7.964254116070544e-06, "loss": 0.3269, "step": 90440 }, { "epoch": 1.8412213740458014, "grad_norm": 7.577475243265976, "learning_rate": 7.963681856808039e-06, "loss": 0.3228, "step": 90450 }, { "epoch": 1.8414249363867685, "grad_norm": 11.451013694798442, "learning_rate": 7.963109537690158e-06, "loss": 0.2316, "step": 90460 }, { "epoch": 1.8416284987277354, "grad_norm": 12.177273209779026, "learning_rate": 7.962537158728467e-06, "loss": 0.2193, "step": 90470 }, { "epoch": 1.8418320610687022, "grad_norm": 2.951297872193809, "learning_rate": 7.961964719934521e-06, "loss": 0.144, "step": 90480 }, { "epoch": 1.8420356234096693, "grad_norm": 9.403712970041962, "learning_rate": 7.961392221319883e-06, "loss": 0.1904, "step": 90490 }, { "epoch": 1.8422391857506362, "grad_norm": 40.92002780460197, "learning_rate": 7.960819662896113e-06, "loss": 0.3272, "step": 90500 }, { "epoch": 1.842442748091603, "grad_norm": 4.4667257301247885, "learning_rate": 7.960247044674778e-06, "loss": 0.1745, "step": 90510 }, { "epoch": 1.84264631043257, "grad_norm": 8.263811756102086, "learning_rate": 7.959674366667442e-06, "loss": 0.2036, "step": 90520 }, { "epoch": 1.8428498727735367, "grad_norm": 5.825752709115151, "learning_rate": 7.959101628885669e-06, "loss": 0.2458, "step": 90530 }, { "epoch": 1.8430534351145038, "grad_norm": 12.660044392137456, "learning_rate": 7.958528831341025e-06, "loss": 0.1464, "step": 90540 }, { "epoch": 1.843256997455471, "grad_norm": 17.74268993583856, "learning_rate": 7.957955974045085e-06, "loss": 0.2083, "step": 90550 }, { "epoch": 1.8434605597964375, "grad_norm": 13.01387823640504, "learning_rate": 7.957383057009413e-06, "loss": 0.2018, "step": 90560 }, { "epoch": 1.8436641221374046, "grad_norm": 5.0263482781489355, "learning_rate": 7.95681008024558e-06, "loss": 0.1287, "step": 90570 }, { "epoch": 1.8438676844783715, "grad_norm": 5.749887058560057, "learning_rate": 7.956237043765159e-06, "loss": 0.238, "step": 90580 }, { "epoch": 1.8440712468193383, "grad_norm": 2.8132639744386827, "learning_rate": 7.955663947579723e-06, "loss": 0.1379, "step": 90590 }, { "epoch": 1.8442748091603054, "grad_norm": 13.888913001877706, "learning_rate": 7.955090791700847e-06, "loss": 0.1692, "step": 90600 }, { "epoch": 1.8444783715012723, "grad_norm": 14.372117655928598, "learning_rate": 7.954517576140108e-06, "loss": 0.1932, "step": 90610 }, { "epoch": 1.844681933842239, "grad_norm": 8.155752505407893, "learning_rate": 7.95394430090908e-06, "loss": 0.2899, "step": 90620 }, { "epoch": 1.8448854961832062, "grad_norm": 8.58678665407738, "learning_rate": 7.953370966019342e-06, "loss": 0.335, "step": 90630 }, { "epoch": 1.845089058524173, "grad_norm": 15.371806377497428, "learning_rate": 7.952797571482473e-06, "loss": 0.1513, "step": 90640 }, { "epoch": 1.84529262086514, "grad_norm": 5.209052110578196, "learning_rate": 7.952224117310056e-06, "loss": 0.2028, "step": 90650 }, { "epoch": 1.845496183206107, "grad_norm": 14.170662001119421, "learning_rate": 7.951650603513667e-06, "loss": 0.1852, "step": 90660 }, { "epoch": 1.8456997455470738, "grad_norm": 15.189460103003336, "learning_rate": 7.951077030104894e-06, "loss": 0.305, "step": 90670 }, { "epoch": 1.8459033078880407, "grad_norm": 8.40037039934353, "learning_rate": 7.950503397095322e-06, "loss": 0.205, "step": 90680 }, { "epoch": 1.8461068702290078, "grad_norm": 10.78716732593756, "learning_rate": 7.94992970449653e-06, "loss": 0.1823, "step": 90690 }, { "epoch": 1.8463104325699744, "grad_norm": 0.037613361626525725, "learning_rate": 7.94935595232011e-06, "loss": 0.1712, "step": 90700 }, { "epoch": 1.8465139949109415, "grad_norm": 0.10370723681058865, "learning_rate": 7.948782140577647e-06, "loss": 0.1953, "step": 90710 }, { "epoch": 1.8467175572519086, "grad_norm": 6.309791689332181, "learning_rate": 7.948208269280732e-06, "loss": 0.2527, "step": 90720 }, { "epoch": 1.8469211195928752, "grad_norm": 5.961526923466967, "learning_rate": 7.947634338440953e-06, "loss": 0.1644, "step": 90730 }, { "epoch": 1.8471246819338423, "grad_norm": 10.265775333073506, "learning_rate": 7.947060348069902e-06, "loss": 0.1904, "step": 90740 }, { "epoch": 1.8473282442748091, "grad_norm": 4.2168137833437145, "learning_rate": 7.94648629817917e-06, "loss": 0.2085, "step": 90750 }, { "epoch": 1.847531806615776, "grad_norm": 5.5180231304941705, "learning_rate": 7.945912188780353e-06, "loss": 0.2017, "step": 90760 }, { "epoch": 1.847735368956743, "grad_norm": 10.542046493951016, "learning_rate": 7.945338019885048e-06, "loss": 0.2386, "step": 90770 }, { "epoch": 1.84793893129771, "grad_norm": 7.601030787416616, "learning_rate": 7.944763791504845e-06, "loss": 0.1927, "step": 90780 }, { "epoch": 1.8481424936386768, "grad_norm": 15.514380685329991, "learning_rate": 7.944189503651344e-06, "loss": 0.3727, "step": 90790 }, { "epoch": 1.8483460559796439, "grad_norm": 9.213091850426256, "learning_rate": 7.943615156336143e-06, "loss": 0.1764, "step": 90800 }, { "epoch": 1.8485496183206107, "grad_norm": 9.734864595203891, "learning_rate": 7.943040749570847e-06, "loss": 0.148, "step": 90810 }, { "epoch": 1.8487531806615776, "grad_norm": 24.404743224285074, "learning_rate": 7.94246628336705e-06, "loss": 0.237, "step": 90820 }, { "epoch": 1.8489567430025446, "grad_norm": 10.503535222352792, "learning_rate": 7.941891757736355e-06, "loss": 0.3496, "step": 90830 }, { "epoch": 1.8491603053435115, "grad_norm": 23.46408349241945, "learning_rate": 7.941317172690368e-06, "loss": 0.2292, "step": 90840 }, { "epoch": 1.8493638676844784, "grad_norm": 4.758432727446094, "learning_rate": 7.940742528240695e-06, "loss": 0.2926, "step": 90850 }, { "epoch": 1.8495674300254454, "grad_norm": 2.8355924949262934, "learning_rate": 7.940167824398935e-06, "loss": 0.2217, "step": 90860 }, { "epoch": 1.849770992366412, "grad_norm": 4.917472971019979, "learning_rate": 7.9395930611767e-06, "loss": 0.1637, "step": 90870 }, { "epoch": 1.8499745547073791, "grad_norm": 4.16371645121205, "learning_rate": 7.939018238585599e-06, "loss": 0.22, "step": 90880 }, { "epoch": 1.850178117048346, "grad_norm": 9.76122622426684, "learning_rate": 7.938443356637237e-06, "loss": 0.186, "step": 90890 }, { "epoch": 1.8503816793893129, "grad_norm": 4.282048348754833, "learning_rate": 7.937868415343227e-06, "loss": 0.1554, "step": 90900 }, { "epoch": 1.85058524173028, "grad_norm": 0.19318088944099862, "learning_rate": 7.937293414715182e-06, "loss": 0.2607, "step": 90910 }, { "epoch": 1.8507888040712468, "grad_norm": 9.265616704326254, "learning_rate": 7.936718354764712e-06, "loss": 0.3175, "step": 90920 }, { "epoch": 1.8509923664122137, "grad_norm": 13.269405944381292, "learning_rate": 7.936143235503431e-06, "loss": 0.2813, "step": 90930 }, { "epoch": 1.8511959287531807, "grad_norm": 8.309962970028284, "learning_rate": 7.935568056942957e-06, "loss": 0.1806, "step": 90940 }, { "epoch": 1.8513994910941476, "grad_norm": 4.119141814760163, "learning_rate": 7.934992819094905e-06, "loss": 0.1951, "step": 90950 }, { "epoch": 1.8516030534351144, "grad_norm": 19.89377252893222, "learning_rate": 7.934417521970894e-06, "loss": 0.1885, "step": 90960 }, { "epoch": 1.8518066157760815, "grad_norm": 9.472796117904641, "learning_rate": 7.93384216558254e-06, "loss": 0.2451, "step": 90970 }, { "epoch": 1.8520101781170484, "grad_norm": 14.402366569170015, "learning_rate": 7.933266749941466e-06, "loss": 0.1979, "step": 90980 }, { "epoch": 1.8522137404580152, "grad_norm": 7.575024634558738, "learning_rate": 7.93269127505929e-06, "loss": 0.1114, "step": 90990 }, { "epoch": 1.8524173027989823, "grad_norm": 7.1740065525817185, "learning_rate": 7.932115740947638e-06, "loss": 0.2332, "step": 91000 }, { "epoch": 1.852620865139949, "grad_norm": 7.6643550485396, "learning_rate": 7.931540147618134e-06, "loss": 0.2729, "step": 91010 }, { "epoch": 1.852824427480916, "grad_norm": 4.901132711854226, "learning_rate": 7.930964495082396e-06, "loss": 0.176, "step": 91020 }, { "epoch": 1.853027989821883, "grad_norm": 2.8860197855520413, "learning_rate": 7.930388783352058e-06, "loss": 0.2092, "step": 91030 }, { "epoch": 1.8532315521628497, "grad_norm": 4.881472382869219, "learning_rate": 7.929813012438745e-06, "loss": 0.2691, "step": 91040 }, { "epoch": 1.8534351145038168, "grad_norm": 6.70363224193998, "learning_rate": 7.929237182354084e-06, "loss": 0.1867, "step": 91050 }, { "epoch": 1.8536386768447837, "grad_norm": 18.71193454501126, "learning_rate": 7.928661293109704e-06, "loss": 0.1484, "step": 91060 }, { "epoch": 1.8538422391857505, "grad_norm": 3.758272008583187, "learning_rate": 7.928085344717238e-06, "loss": 0.1742, "step": 91070 }, { "epoch": 1.8540458015267176, "grad_norm": 9.864994563502236, "learning_rate": 7.927509337188317e-06, "loss": 0.1896, "step": 91080 }, { "epoch": 1.8542493638676845, "grad_norm": 3.2146299601789474, "learning_rate": 7.926933270534574e-06, "loss": 0.2314, "step": 91090 }, { "epoch": 1.8544529262086513, "grad_norm": 5.34423497064525, "learning_rate": 7.926357144767644e-06, "loss": 0.2753, "step": 91100 }, { "epoch": 1.8546564885496184, "grad_norm": 9.830192092844843, "learning_rate": 7.925780959899162e-06, "loss": 0.2797, "step": 91110 }, { "epoch": 1.8548600508905853, "grad_norm": 8.04786662270342, "learning_rate": 7.925204715940766e-06, "loss": 0.2937, "step": 91120 }, { "epoch": 1.855063613231552, "grad_norm": 14.922847495349458, "learning_rate": 7.924628412904093e-06, "loss": 0.2215, "step": 91130 }, { "epoch": 1.8552671755725192, "grad_norm": 7.645830748412717, "learning_rate": 7.924052050800782e-06, "loss": 0.1663, "step": 91140 }, { "epoch": 1.855470737913486, "grad_norm": 7.066606162145574, "learning_rate": 7.923475629642475e-06, "loss": 0.2066, "step": 91150 }, { "epoch": 1.855674300254453, "grad_norm": 1.9245223856516969, "learning_rate": 7.922899149440809e-06, "loss": 0.17, "step": 91160 }, { "epoch": 1.85587786259542, "grad_norm": 28.33538783014415, "learning_rate": 7.922322610207432e-06, "loss": 0.237, "step": 91170 }, { "epoch": 1.8560814249363866, "grad_norm": 1.8583599245772788, "learning_rate": 7.921746011953986e-06, "loss": 0.2025, "step": 91180 }, { "epoch": 1.8562849872773537, "grad_norm": 10.1408522583183, "learning_rate": 7.921169354692116e-06, "loss": 0.3174, "step": 91190 }, { "epoch": 1.8564885496183208, "grad_norm": 8.942927284823327, "learning_rate": 7.920592638433469e-06, "loss": 0.2057, "step": 91200 }, { "epoch": 1.8566921119592874, "grad_norm": 11.320204971638802, "learning_rate": 7.920015863189692e-06, "loss": 0.2702, "step": 91210 }, { "epoch": 1.8568956743002545, "grad_norm": 16.439810896146174, "learning_rate": 7.919439028972432e-06, "loss": 0.1807, "step": 91220 }, { "epoch": 1.8570992366412213, "grad_norm": 12.279269964841117, "learning_rate": 7.918862135793341e-06, "loss": 0.2785, "step": 91230 }, { "epoch": 1.8573027989821882, "grad_norm": 17.279189783973006, "learning_rate": 7.918285183664072e-06, "loss": 0.2806, "step": 91240 }, { "epoch": 1.8575063613231553, "grad_norm": 3.583524312985723, "learning_rate": 7.917708172596272e-06, "loss": 0.1957, "step": 91250 }, { "epoch": 1.8577099236641221, "grad_norm": 9.606017767923971, "learning_rate": 7.9171311026016e-06, "loss": 0.4271, "step": 91260 }, { "epoch": 1.857913486005089, "grad_norm": 1.1626160612765015, "learning_rate": 7.916553973691708e-06, "loss": 0.1958, "step": 91270 }, { "epoch": 1.858117048346056, "grad_norm": 7.2053515108638635, "learning_rate": 7.91597678587825e-06, "loss": 0.1528, "step": 91280 }, { "epoch": 1.858320610687023, "grad_norm": 3.9482413012795514, "learning_rate": 7.915399539172886e-06, "loss": 0.1818, "step": 91290 }, { "epoch": 1.8585241730279898, "grad_norm": 5.080264908849987, "learning_rate": 7.914822233587273e-06, "loss": 0.0953, "step": 91300 }, { "epoch": 1.8587277353689569, "grad_norm": 9.755860328488804, "learning_rate": 7.914244869133072e-06, "loss": 0.198, "step": 91310 }, { "epoch": 1.8589312977099237, "grad_norm": 16.858694142011604, "learning_rate": 7.91366744582194e-06, "loss": 0.2789, "step": 91320 }, { "epoch": 1.8591348600508906, "grad_norm": 10.162455586066633, "learning_rate": 7.913089963665544e-06, "loss": 0.2423, "step": 91330 }, { "epoch": 1.8593384223918576, "grad_norm": 9.193818345360866, "learning_rate": 7.912512422675543e-06, "loss": 0.1878, "step": 91340 }, { "epoch": 1.8595419847328243, "grad_norm": 11.742547298770539, "learning_rate": 7.911934822863602e-06, "loss": 0.1997, "step": 91350 }, { "epoch": 1.8597455470737914, "grad_norm": 7.4770172633084275, "learning_rate": 7.911357164241385e-06, "loss": 0.1749, "step": 91360 }, { "epoch": 1.8599491094147582, "grad_norm": 7.193932997733764, "learning_rate": 7.910779446820564e-06, "loss": 0.1736, "step": 91370 }, { "epoch": 1.860152671755725, "grad_norm": 3.772930179060206, "learning_rate": 7.9102016706128e-06, "loss": 0.2354, "step": 91380 }, { "epoch": 1.8603562340966922, "grad_norm": 15.048650521397263, "learning_rate": 7.909623835629767e-06, "loss": 0.2103, "step": 91390 }, { "epoch": 1.860559796437659, "grad_norm": 8.276846662369671, "learning_rate": 7.90904594188313e-06, "loss": 0.2409, "step": 91400 }, { "epoch": 1.8607633587786259, "grad_norm": 3.3133731521602443, "learning_rate": 7.908467989384568e-06, "loss": 0.1745, "step": 91410 }, { "epoch": 1.860966921119593, "grad_norm": 6.983654817828181, "learning_rate": 7.907889978145746e-06, "loss": 0.1789, "step": 91420 }, { "epoch": 1.8611704834605598, "grad_norm": 9.505346222154122, "learning_rate": 7.907311908178341e-06, "loss": 0.1824, "step": 91430 }, { "epoch": 1.8613740458015267, "grad_norm": 15.568405233546944, "learning_rate": 7.906733779494027e-06, "loss": 0.2064, "step": 91440 }, { "epoch": 1.8615776081424937, "grad_norm": 19.417615702375677, "learning_rate": 7.906155592104481e-06, "loss": 0.1915, "step": 91450 }, { "epoch": 1.8617811704834606, "grad_norm": 10.789161653810815, "learning_rate": 7.90557734602138e-06, "loss": 0.3674, "step": 91460 }, { "epoch": 1.8619847328244274, "grad_norm": 9.618169083768608, "learning_rate": 7.904999041256402e-06, "loss": 0.238, "step": 91470 }, { "epoch": 1.8621882951653945, "grad_norm": 4.332707453788571, "learning_rate": 7.904420677821224e-06, "loss": 0.2397, "step": 91480 }, { "epoch": 1.8623918575063612, "grad_norm": 12.365295824607168, "learning_rate": 7.903842255727533e-06, "loss": 0.2474, "step": 91490 }, { "epoch": 1.8625954198473282, "grad_norm": 8.29669918994157, "learning_rate": 7.903263774987006e-06, "loss": 0.2472, "step": 91500 }, { "epoch": 1.8627989821882953, "grad_norm": 7.614802210872935, "learning_rate": 7.902685235611329e-06, "loss": 0.1588, "step": 91510 }, { "epoch": 1.863002544529262, "grad_norm": 15.031222673885841, "learning_rate": 7.902106637612184e-06, "loss": 0.21, "step": 91520 }, { "epoch": 1.863206106870229, "grad_norm": 8.626227035534821, "learning_rate": 7.901527981001258e-06, "loss": 0.2671, "step": 91530 }, { "epoch": 1.8634096692111959, "grad_norm": 3.5125882942748623, "learning_rate": 7.900949265790238e-06, "loss": 0.2016, "step": 91540 }, { "epoch": 1.8636132315521627, "grad_norm": 8.238724455908596, "learning_rate": 7.90037049199081e-06, "loss": 0.1907, "step": 91550 }, { "epoch": 1.8638167938931298, "grad_norm": 2.9173030543059904, "learning_rate": 7.899791659614664e-06, "loss": 0.3205, "step": 91560 }, { "epoch": 1.8640203562340967, "grad_norm": 2.2735319440281176, "learning_rate": 7.89921276867349e-06, "loss": 0.1979, "step": 91570 }, { "epoch": 1.8642239185750635, "grad_norm": 9.90690642833786, "learning_rate": 7.898633819178983e-06, "loss": 0.1796, "step": 91580 }, { "epoch": 1.8644274809160306, "grad_norm": 14.849851023147751, "learning_rate": 7.89805481114283e-06, "loss": 0.2252, "step": 91590 }, { "epoch": 1.8646310432569975, "grad_norm": 5.172189345662729, "learning_rate": 7.897475744576728e-06, "loss": 0.1432, "step": 91600 }, { "epoch": 1.8648346055979643, "grad_norm": 7.194288467928943, "learning_rate": 7.896896619492372e-06, "loss": 0.2376, "step": 91610 }, { "epoch": 1.8650381679389314, "grad_norm": 7.593981078938114, "learning_rate": 7.896317435901456e-06, "loss": 0.2299, "step": 91620 }, { "epoch": 1.8652417302798983, "grad_norm": 6.763969793107676, "learning_rate": 7.89573819381568e-06, "loss": 0.2217, "step": 91630 }, { "epoch": 1.8654452926208651, "grad_norm": 14.52024568679369, "learning_rate": 7.895158893246742e-06, "loss": 0.2346, "step": 91640 }, { "epoch": 1.8656488549618322, "grad_norm": 9.057274291790467, "learning_rate": 7.894579534206341e-06, "loss": 0.2432, "step": 91650 }, { "epoch": 1.8658524173027988, "grad_norm": 13.91105553105624, "learning_rate": 7.894000116706177e-06, "loss": 0.3237, "step": 91660 }, { "epoch": 1.866055979643766, "grad_norm": 13.447825202846522, "learning_rate": 7.893420640757956e-06, "loss": 0.2556, "step": 91670 }, { "epoch": 1.866259541984733, "grad_norm": 6.650788890674651, "learning_rate": 7.892841106373376e-06, "loss": 0.1423, "step": 91680 }, { "epoch": 1.8664631043256996, "grad_norm": 3.1449386686869087, "learning_rate": 7.892261513564144e-06, "loss": 0.111, "step": 91690 }, { "epoch": 1.8666666666666667, "grad_norm": 7.038480859988116, "learning_rate": 7.891681862341967e-06, "loss": 0.1622, "step": 91700 }, { "epoch": 1.8668702290076336, "grad_norm": 7.120222395606333, "learning_rate": 7.891102152718548e-06, "loss": 0.138, "step": 91710 }, { "epoch": 1.8670737913486004, "grad_norm": 11.03339414481448, "learning_rate": 7.890522384705599e-06, "loss": 0.2614, "step": 91720 }, { "epoch": 1.8672773536895675, "grad_norm": 6.1463997151570045, "learning_rate": 7.889942558314827e-06, "loss": 0.2217, "step": 91730 }, { "epoch": 1.8674809160305343, "grad_norm": 10.853386421637714, "learning_rate": 7.889362673557944e-06, "loss": 0.2056, "step": 91740 }, { "epoch": 1.8676844783715012, "grad_norm": 7.654698443117416, "learning_rate": 7.888782730446659e-06, "loss": 0.1644, "step": 91750 }, { "epoch": 1.8678880407124683, "grad_norm": 9.118206288470486, "learning_rate": 7.888202728992686e-06, "loss": 0.0974, "step": 91760 }, { "epoch": 1.8680916030534351, "grad_norm": 9.450115117850235, "learning_rate": 7.88762266920774e-06, "loss": 0.3208, "step": 91770 }, { "epoch": 1.868295165394402, "grad_norm": 6.769452297278513, "learning_rate": 7.887042551103533e-06, "loss": 0.2099, "step": 91780 }, { "epoch": 1.868498727735369, "grad_norm": 2.498802501849347, "learning_rate": 7.886462374691783e-06, "loss": 0.205, "step": 91790 }, { "epoch": 1.868702290076336, "grad_norm": 4.7035511383317905, "learning_rate": 7.88588213998421e-06, "loss": 0.318, "step": 91800 }, { "epoch": 1.8689058524173028, "grad_norm": 5.272966430836903, "learning_rate": 7.88530184699253e-06, "loss": 0.1311, "step": 91810 }, { "epoch": 1.8691094147582699, "grad_norm": 8.654688381970997, "learning_rate": 7.884721495728462e-06, "loss": 0.1653, "step": 91820 }, { "epoch": 1.8693129770992365, "grad_norm": 5.656714783631617, "learning_rate": 7.884141086203727e-06, "loss": 0.1718, "step": 91830 }, { "epoch": 1.8695165394402036, "grad_norm": 12.744144108742214, "learning_rate": 7.88356061843005e-06, "loss": 0.3999, "step": 91840 }, { "epoch": 1.8697201017811704, "grad_norm": 10.689444098231993, "learning_rate": 7.88298009241915e-06, "loss": 0.15, "step": 91850 }, { "epoch": 1.8699236641221373, "grad_norm": 7.6643078367736885, "learning_rate": 7.882399508182756e-06, "loss": 0.2704, "step": 91860 }, { "epoch": 1.8701272264631044, "grad_norm": 11.865068495579212, "learning_rate": 7.88181886573259e-06, "loss": 0.205, "step": 91870 }, { "epoch": 1.8703307888040712, "grad_norm": 8.826895729926113, "learning_rate": 7.881238165080381e-06, "loss": 0.2012, "step": 91880 }, { "epoch": 1.870534351145038, "grad_norm": 4.159083049682032, "learning_rate": 7.880657406237857e-06, "loss": 0.145, "step": 91890 }, { "epoch": 1.8707379134860052, "grad_norm": 6.067444568622219, "learning_rate": 7.880076589216745e-06, "loss": 0.1741, "step": 91900 }, { "epoch": 1.870941475826972, "grad_norm": 13.982360065498236, "learning_rate": 7.879495714028777e-06, "loss": 0.2852, "step": 91910 }, { "epoch": 1.8711450381679389, "grad_norm": 5.618084048465651, "learning_rate": 7.878914780685685e-06, "loss": 0.2745, "step": 91920 }, { "epoch": 1.871348600508906, "grad_norm": 8.332142774511569, "learning_rate": 7.8783337891992e-06, "loss": 0.2285, "step": 91930 }, { "epoch": 1.8715521628498728, "grad_norm": 10.293643609770024, "learning_rate": 7.877752739581058e-06, "loss": 0.1573, "step": 91940 }, { "epoch": 1.8717557251908397, "grad_norm": 11.7561761535587, "learning_rate": 7.877171631842992e-06, "loss": 0.2211, "step": 91950 }, { "epoch": 1.8719592875318067, "grad_norm": 10.894489085187665, "learning_rate": 7.876590465996739e-06, "loss": 0.2258, "step": 91960 }, { "epoch": 1.8721628498727734, "grad_norm": 6.701669390830301, "learning_rate": 7.876009242054037e-06, "loss": 0.1627, "step": 91970 }, { "epoch": 1.8723664122137404, "grad_norm": 5.594646928906022, "learning_rate": 7.875427960026624e-06, "loss": 0.1841, "step": 91980 }, { "epoch": 1.8725699745547075, "grad_norm": 11.715901507421536, "learning_rate": 7.87484661992624e-06, "loss": 0.1449, "step": 91990 }, { "epoch": 1.8727735368956742, "grad_norm": 5.1171792054825715, "learning_rate": 7.874265221764626e-06, "loss": 0.1953, "step": 92000 }, { "epoch": 1.8729770992366412, "grad_norm": 8.703699035629823, "learning_rate": 7.873683765553523e-06, "loss": 0.2295, "step": 92010 }, { "epoch": 1.873180661577608, "grad_norm": 15.019017497431557, "learning_rate": 7.873102251304675e-06, "loss": 0.2212, "step": 92020 }, { "epoch": 1.873384223918575, "grad_norm": 16.22776227112164, "learning_rate": 7.872520679029826e-06, "loss": 0.2211, "step": 92030 }, { "epoch": 1.873587786259542, "grad_norm": 7.5134408241055555, "learning_rate": 7.871939048740724e-06, "loss": 0.1831, "step": 92040 }, { "epoch": 1.8737913486005089, "grad_norm": 6.820160826062777, "learning_rate": 7.871357360449114e-06, "loss": 0.265, "step": 92050 }, { "epoch": 1.8739949109414757, "grad_norm": 13.281420213125784, "learning_rate": 7.870775614166742e-06, "loss": 0.2366, "step": 92060 }, { "epoch": 1.8741984732824428, "grad_norm": 11.956926064433867, "learning_rate": 7.870193809905359e-06, "loss": 0.3145, "step": 92070 }, { "epoch": 1.8744020356234097, "grad_norm": 11.712951795665138, "learning_rate": 7.869611947676718e-06, "loss": 0.1811, "step": 92080 }, { "epoch": 1.8746055979643765, "grad_norm": 8.76943562439962, "learning_rate": 7.869030027492566e-06, "loss": 0.2529, "step": 92090 }, { "epoch": 1.8748091603053436, "grad_norm": 12.138874680450751, "learning_rate": 7.868448049364657e-06, "loss": 0.2652, "step": 92100 }, { "epoch": 1.8750127226463105, "grad_norm": 2.7518696986695925, "learning_rate": 7.867866013304743e-06, "loss": 0.2044, "step": 92110 }, { "epoch": 1.8752162849872773, "grad_norm": 5.446340354433922, "learning_rate": 7.867283919324584e-06, "loss": 0.1952, "step": 92120 }, { "epoch": 1.8754198473282444, "grad_norm": 11.612525385047052, "learning_rate": 7.866701767435933e-06, "loss": 0.23, "step": 92130 }, { "epoch": 1.875623409669211, "grad_norm": 11.956608217736212, "learning_rate": 7.866119557650547e-06, "loss": 0.3443, "step": 92140 }, { "epoch": 1.8758269720101781, "grad_norm": 22.81660433441116, "learning_rate": 7.865537289980184e-06, "loss": 0.299, "step": 92150 }, { "epoch": 1.8760305343511452, "grad_norm": 4.412558926840491, "learning_rate": 7.864954964436607e-06, "loss": 0.1714, "step": 92160 }, { "epoch": 1.8762340966921118, "grad_norm": 10.13864647141528, "learning_rate": 7.864372581031571e-06, "loss": 0.263, "step": 92170 }, { "epoch": 1.876437659033079, "grad_norm": 0.4362503743336128, "learning_rate": 7.863790139776845e-06, "loss": 0.21, "step": 92180 }, { "epoch": 1.8766412213740458, "grad_norm": 20.41372792099036, "learning_rate": 7.863207640684187e-06, "loss": 0.1737, "step": 92190 }, { "epoch": 1.8768447837150126, "grad_norm": 5.725297296752952, "learning_rate": 7.862625083765363e-06, "loss": 0.2542, "step": 92200 }, { "epoch": 1.8770483460559797, "grad_norm": 14.173879031947475, "learning_rate": 7.862042469032138e-06, "loss": 0.2626, "step": 92210 }, { "epoch": 1.8772519083969466, "grad_norm": 7.216008049251166, "learning_rate": 7.86145979649628e-06, "loss": 0.1213, "step": 92220 }, { "epoch": 1.8774554707379134, "grad_norm": 4.906838800962499, "learning_rate": 7.860877066169555e-06, "loss": 0.16, "step": 92230 }, { "epoch": 1.8776590330788805, "grad_norm": 7.010463204350065, "learning_rate": 7.860294278063732e-06, "loss": 0.2686, "step": 92240 }, { "epoch": 1.8778625954198473, "grad_norm": 7.122524136197252, "learning_rate": 7.859711432190584e-06, "loss": 0.1837, "step": 92250 }, { "epoch": 1.8780661577608142, "grad_norm": 0.46325830803640494, "learning_rate": 7.859128528561878e-06, "loss": 0.1581, "step": 92260 }, { "epoch": 1.8782697201017813, "grad_norm": 9.431645542034204, "learning_rate": 7.85854556718939e-06, "loss": 0.1859, "step": 92270 }, { "epoch": 1.8784732824427481, "grad_norm": 9.241443380685842, "learning_rate": 7.857962548084894e-06, "loss": 0.1441, "step": 92280 }, { "epoch": 1.878676844783715, "grad_norm": 23.26927855088626, "learning_rate": 7.857379471260163e-06, "loss": 0.1912, "step": 92290 }, { "epoch": 1.878880407124682, "grad_norm": 2.0340174094245316, "learning_rate": 7.856796336726972e-06, "loss": 0.2932, "step": 92300 }, { "epoch": 1.8790839694656487, "grad_norm": 23.04576169986077, "learning_rate": 7.8562131444971e-06, "loss": 0.2111, "step": 92310 }, { "epoch": 1.8792875318066158, "grad_norm": 2.7479923280099894, "learning_rate": 7.855629894582323e-06, "loss": 0.2862, "step": 92320 }, { "epoch": 1.8794910941475829, "grad_norm": 19.648864921385467, "learning_rate": 7.855046586994426e-06, "loss": 0.2627, "step": 92330 }, { "epoch": 1.8796946564885495, "grad_norm": 9.572832894051405, "learning_rate": 7.854463221745183e-06, "loss": 0.2594, "step": 92340 }, { "epoch": 1.8798982188295166, "grad_norm": 21.053157293685654, "learning_rate": 7.85387979884638e-06, "loss": 0.1908, "step": 92350 }, { "epoch": 1.8801017811704834, "grad_norm": 13.112931321566418, "learning_rate": 7.853296318309798e-06, "loss": 0.223, "step": 92360 }, { "epoch": 1.8803053435114503, "grad_norm": 3.708280899385768, "learning_rate": 7.852712780147222e-06, "loss": 0.2389, "step": 92370 }, { "epoch": 1.8805089058524174, "grad_norm": 10.382672739218076, "learning_rate": 7.852129184370438e-06, "loss": 0.178, "step": 92380 }, { "epoch": 1.8807124681933842, "grad_norm": 10.938590613866204, "learning_rate": 7.851545530991228e-06, "loss": 0.3056, "step": 92390 }, { "epoch": 1.880916030534351, "grad_norm": 4.630913931918623, "learning_rate": 7.850961820021388e-06, "loss": 0.1926, "step": 92400 }, { "epoch": 1.8811195928753182, "grad_norm": 6.626420864617848, "learning_rate": 7.8503780514727e-06, "loss": 0.2304, "step": 92410 }, { "epoch": 1.881323155216285, "grad_norm": 8.954044961417955, "learning_rate": 7.849794225356957e-06, "loss": 0.1807, "step": 92420 }, { "epoch": 1.8815267175572519, "grad_norm": 11.400053286590637, "learning_rate": 7.849210341685947e-06, "loss": 0.2464, "step": 92430 }, { "epoch": 1.881730279898219, "grad_norm": 18.246466378158974, "learning_rate": 7.848626400471466e-06, "loss": 0.2619, "step": 92440 }, { "epoch": 1.8819338422391858, "grad_norm": 7.2976209758247546, "learning_rate": 7.848042401725306e-06, "loss": 0.262, "step": 92450 }, { "epoch": 1.8821374045801527, "grad_norm": 6.3682260737046255, "learning_rate": 7.84745834545926e-06, "loss": 0.186, "step": 92460 }, { "epoch": 1.8823409669211197, "grad_norm": 1.7432980702026928, "learning_rate": 7.846874231685127e-06, "loss": 0.1994, "step": 92470 }, { "epoch": 1.8825445292620864, "grad_norm": 3.7646111352786327, "learning_rate": 7.8462900604147e-06, "loss": 0.257, "step": 92480 }, { "epoch": 1.8827480916030535, "grad_norm": 7.561373790116907, "learning_rate": 7.84570583165978e-06, "loss": 0.1884, "step": 92490 }, { "epoch": 1.8829516539440203, "grad_norm": 10.969603762197657, "learning_rate": 7.845121545432167e-06, "loss": 0.2274, "step": 92500 }, { "epoch": 1.8831552162849872, "grad_norm": 7.9221423213105115, "learning_rate": 7.84453720174366e-06, "loss": 0.2651, "step": 92510 }, { "epoch": 1.8833587786259542, "grad_norm": 18.382071812243854, "learning_rate": 7.843952800606059e-06, "loss": 0.2335, "step": 92520 }, { "epoch": 1.883562340966921, "grad_norm": 11.52074904714787, "learning_rate": 7.843368342031168e-06, "loss": 0.2342, "step": 92530 }, { "epoch": 1.883765903307888, "grad_norm": 9.110278440907187, "learning_rate": 7.842783826030791e-06, "loss": 0.2466, "step": 92540 }, { "epoch": 1.883969465648855, "grad_norm": 6.190548524292844, "learning_rate": 7.842199252616734e-06, "loss": 0.1585, "step": 92550 }, { "epoch": 1.884173027989822, "grad_norm": 7.599856819341221, "learning_rate": 7.841614621800801e-06, "loss": 0.1497, "step": 92560 }, { "epoch": 1.8843765903307887, "grad_norm": 8.032650923880903, "learning_rate": 7.841029933594804e-06, "loss": 0.1193, "step": 92570 }, { "epoch": 1.8845801526717558, "grad_norm": 10.12635459816031, "learning_rate": 7.840445188010545e-06, "loss": 0.2093, "step": 92580 }, { "epoch": 1.8847837150127227, "grad_norm": 20.563590516510935, "learning_rate": 7.83986038505984e-06, "loss": 0.2903, "step": 92590 }, { "epoch": 1.8849872773536895, "grad_norm": 12.976471045454241, "learning_rate": 7.839275524754493e-06, "loss": 0.2158, "step": 92600 }, { "epoch": 1.8851908396946566, "grad_norm": 7.140320856266814, "learning_rate": 7.838690607106322e-06, "loss": 0.1647, "step": 92610 }, { "epoch": 1.8853944020356233, "grad_norm": 6.022188476527419, "learning_rate": 7.838105632127138e-06, "loss": 0.2271, "step": 92620 }, { "epoch": 1.8855979643765903, "grad_norm": 15.805181905620115, "learning_rate": 7.837520599828755e-06, "loss": 0.2863, "step": 92630 }, { "epoch": 1.8858015267175574, "grad_norm": 10.112674349558878, "learning_rate": 7.83693551022299e-06, "loss": 0.1172, "step": 92640 }, { "epoch": 1.886005089058524, "grad_norm": 4.13190650434604, "learning_rate": 7.836350363321657e-06, "loss": 0.2394, "step": 92650 }, { "epoch": 1.8862086513994911, "grad_norm": 2.2702697758664163, "learning_rate": 7.835765159136574e-06, "loss": 0.1339, "step": 92660 }, { "epoch": 1.886412213740458, "grad_norm": 12.503669013126709, "learning_rate": 7.835179897679563e-06, "loss": 0.1541, "step": 92670 }, { "epoch": 1.8866157760814248, "grad_norm": 8.402927870872906, "learning_rate": 7.834594578962442e-06, "loss": 0.193, "step": 92680 }, { "epoch": 1.886819338422392, "grad_norm": 0.7076236893659246, "learning_rate": 7.834009202997032e-06, "loss": 0.1853, "step": 92690 }, { "epoch": 1.8870229007633588, "grad_norm": 9.467318114725751, "learning_rate": 7.833423769795157e-06, "loss": 0.2325, "step": 92700 }, { "epoch": 1.8872264631043256, "grad_norm": 6.433108865537907, "learning_rate": 7.83283827936864e-06, "loss": 0.2926, "step": 92710 }, { "epoch": 1.8874300254452927, "grad_norm": 6.333102606318824, "learning_rate": 7.832252731729305e-06, "loss": 0.1918, "step": 92720 }, { "epoch": 1.8876335877862596, "grad_norm": 9.710049874572684, "learning_rate": 7.831667126888976e-06, "loss": 0.1929, "step": 92730 }, { "epoch": 1.8878371501272264, "grad_norm": 9.757116918323664, "learning_rate": 7.831081464859484e-06, "loss": 0.1468, "step": 92740 }, { "epoch": 1.8880407124681935, "grad_norm": 8.375601517269583, "learning_rate": 7.830495745652655e-06, "loss": 0.2594, "step": 92750 }, { "epoch": 1.8882442748091604, "grad_norm": 16.026236016300395, "learning_rate": 7.829909969280319e-06, "loss": 0.1828, "step": 92760 }, { "epoch": 1.8884478371501272, "grad_norm": 8.835965966036404, "learning_rate": 7.829324135754306e-06, "loss": 0.3003, "step": 92770 }, { "epoch": 1.8886513994910943, "grad_norm": 16.88406903054409, "learning_rate": 7.82873824508645e-06, "loss": 0.279, "step": 92780 }, { "epoch": 1.888854961832061, "grad_norm": 14.709157977559524, "learning_rate": 7.828152297288579e-06, "loss": 0.213, "step": 92790 }, { "epoch": 1.889058524173028, "grad_norm": 2.966316898588473, "learning_rate": 7.827566292372529e-06, "loss": 0.1601, "step": 92800 }, { "epoch": 1.889262086513995, "grad_norm": 11.889239083165878, "learning_rate": 7.826980230350139e-06, "loss": 0.2496, "step": 92810 }, { "epoch": 1.8894656488549617, "grad_norm": 7.436948054546135, "learning_rate": 7.82639411123324e-06, "loss": 0.1198, "step": 92820 }, { "epoch": 1.8896692111959288, "grad_norm": 25.32050926679262, "learning_rate": 7.825807935033671e-06, "loss": 0.1738, "step": 92830 }, { "epoch": 1.8898727735368956, "grad_norm": 7.016068727970922, "learning_rate": 7.825221701763273e-06, "loss": 0.2115, "step": 92840 }, { "epoch": 1.8900763358778625, "grad_norm": 10.746390386179453, "learning_rate": 7.824635411433883e-06, "loss": 0.327, "step": 92850 }, { "epoch": 1.8902798982188296, "grad_norm": 10.003479345647124, "learning_rate": 7.824049064057342e-06, "loss": 0.2187, "step": 92860 }, { "epoch": 1.8904834605597964, "grad_norm": 21.723497782504904, "learning_rate": 7.823462659645495e-06, "loss": 0.1972, "step": 92870 }, { "epoch": 1.8906870229007633, "grad_norm": 0.4899365006848575, "learning_rate": 7.82287619821018e-06, "loss": 0.159, "step": 92880 }, { "epoch": 1.8908905852417304, "grad_norm": 2.211808899457411, "learning_rate": 7.822289679763247e-06, "loss": 0.1666, "step": 92890 }, { "epoch": 1.8910941475826972, "grad_norm": 1.7144970940918314, "learning_rate": 7.821703104316537e-06, "loss": 0.2509, "step": 92900 }, { "epoch": 1.891297709923664, "grad_norm": 12.247642933551068, "learning_rate": 7.8211164718819e-06, "loss": 0.1762, "step": 92910 }, { "epoch": 1.8915012722646312, "grad_norm": 3.717141712467041, "learning_rate": 7.820529782471183e-06, "loss": 0.2134, "step": 92920 }, { "epoch": 1.891704834605598, "grad_norm": 13.675400613751766, "learning_rate": 7.819943036096232e-06, "loss": 0.2986, "step": 92930 }, { "epoch": 1.8919083969465649, "grad_norm": 3.9791023790939084, "learning_rate": 7.8193562327689e-06, "loss": 0.168, "step": 92940 }, { "epoch": 1.892111959287532, "grad_norm": 16.869632383254007, "learning_rate": 7.818769372501037e-06, "loss": 0.149, "step": 92950 }, { "epoch": 1.8923155216284986, "grad_norm": 15.813679234899375, "learning_rate": 7.818182455304497e-06, "loss": 0.2343, "step": 92960 }, { "epoch": 1.8925190839694657, "grad_norm": 12.399239630017217, "learning_rate": 7.81759548119113e-06, "loss": 0.2607, "step": 92970 }, { "epoch": 1.8927226463104325, "grad_norm": 6.4266618492882985, "learning_rate": 7.817008450172797e-06, "loss": 0.1616, "step": 92980 }, { "epoch": 1.8929262086513994, "grad_norm": 16.495738510703042, "learning_rate": 7.816421362261349e-06, "loss": 0.2007, "step": 92990 }, { "epoch": 1.8931297709923665, "grad_norm": 14.112789820656124, "learning_rate": 7.815834217468644e-06, "loss": 0.3151, "step": 93000 }, { "epoch": 1.8933333333333333, "grad_norm": 11.652759320581653, "learning_rate": 7.815247015806539e-06, "loss": 0.2176, "step": 93010 }, { "epoch": 1.8935368956743002, "grad_norm": 16.76722854941041, "learning_rate": 7.814659757286895e-06, "loss": 0.1735, "step": 93020 }, { "epoch": 1.8937404580152672, "grad_norm": 1.8773546205497535, "learning_rate": 7.814072441921573e-06, "loss": 0.155, "step": 93030 }, { "epoch": 1.893944020356234, "grad_norm": 7.924732848002227, "learning_rate": 7.813485069722432e-06, "loss": 0.2605, "step": 93040 }, { "epoch": 1.894147582697201, "grad_norm": 8.211832552979578, "learning_rate": 7.812897640701338e-06, "loss": 0.1707, "step": 93050 }, { "epoch": 1.894351145038168, "grad_norm": 15.409262850179443, "learning_rate": 7.812310154870153e-06, "loss": 0.2725, "step": 93060 }, { "epoch": 1.894554707379135, "grad_norm": 0.2729127361742504, "learning_rate": 7.81172261224074e-06, "loss": 0.2041, "step": 93070 }, { "epoch": 1.8947582697201018, "grad_norm": 17.420068718975646, "learning_rate": 7.811135012824968e-06, "loss": 0.1937, "step": 93080 }, { "epoch": 1.8949618320610688, "grad_norm": 10.443265972249737, "learning_rate": 7.810547356634706e-06, "loss": 0.1826, "step": 93090 }, { "epoch": 1.8951653944020355, "grad_norm": 8.42072731664568, "learning_rate": 7.809959643681818e-06, "loss": 0.1802, "step": 93100 }, { "epoch": 1.8953689567430025, "grad_norm": 16.732540413032247, "learning_rate": 7.809371873978176e-06, "loss": 0.2054, "step": 93110 }, { "epoch": 1.8955725190839696, "grad_norm": 8.847262526404354, "learning_rate": 7.808784047535652e-06, "loss": 0.1607, "step": 93120 }, { "epoch": 1.8957760814249363, "grad_norm": 0.9482095253618014, "learning_rate": 7.808196164366115e-06, "loss": 0.1679, "step": 93130 }, { "epoch": 1.8959796437659033, "grad_norm": 7.422127955793906, "learning_rate": 7.80760822448144e-06, "loss": 0.2219, "step": 93140 }, { "epoch": 1.8961832061068702, "grad_norm": 17.59054999607096, "learning_rate": 7.807020227893501e-06, "loss": 0.1935, "step": 93150 }, { "epoch": 1.896386768447837, "grad_norm": 29.220110037906466, "learning_rate": 7.806432174614173e-06, "loss": 0.245, "step": 93160 }, { "epoch": 1.8965903307888041, "grad_norm": 8.292721925193629, "learning_rate": 7.805844064655332e-06, "loss": 0.2205, "step": 93170 }, { "epoch": 1.896793893129771, "grad_norm": 7.771854348274284, "learning_rate": 7.805255898028855e-06, "loss": 0.2896, "step": 93180 }, { "epoch": 1.8969974554707378, "grad_norm": 11.97496497738248, "learning_rate": 7.804667674746625e-06, "loss": 0.3079, "step": 93190 }, { "epoch": 1.897201017811705, "grad_norm": 5.705112784394695, "learning_rate": 7.804079394820518e-06, "loss": 0.1393, "step": 93200 }, { "epoch": 1.8974045801526718, "grad_norm": 13.770866136635567, "learning_rate": 7.803491058262415e-06, "loss": 0.1635, "step": 93210 }, { "epoch": 1.8976081424936386, "grad_norm": 4.2636244311055425, "learning_rate": 7.8029026650842e-06, "loss": 0.2273, "step": 93220 }, { "epoch": 1.8978117048346057, "grad_norm": 12.51859885412217, "learning_rate": 7.802314215297755e-06, "loss": 0.2609, "step": 93230 }, { "epoch": 1.8980152671755726, "grad_norm": 11.549376604264026, "learning_rate": 7.801725708914966e-06, "loss": 0.2916, "step": 93240 }, { "epoch": 1.8982188295165394, "grad_norm": 3.554955826460828, "learning_rate": 7.801137145947717e-06, "loss": 0.2748, "step": 93250 }, { "epoch": 1.8984223918575065, "grad_norm": 10.354648435277104, "learning_rate": 7.800548526407896e-06, "loss": 0.1972, "step": 93260 }, { "epoch": 1.8986259541984731, "grad_norm": 16.44149099106403, "learning_rate": 7.79995985030739e-06, "loss": 0.1614, "step": 93270 }, { "epoch": 1.8988295165394402, "grad_norm": 12.68442872867443, "learning_rate": 7.799371117658088e-06, "loss": 0.2244, "step": 93280 }, { "epoch": 1.8990330788804073, "grad_norm": 9.800054761523134, "learning_rate": 7.79878232847188e-06, "loss": 0.2027, "step": 93290 }, { "epoch": 1.899236641221374, "grad_norm": 8.205601770047835, "learning_rate": 7.79819348276066e-06, "loss": 0.189, "step": 93300 }, { "epoch": 1.899440203562341, "grad_norm": 11.16790564442621, "learning_rate": 7.797604580536318e-06, "loss": 0.2715, "step": 93310 }, { "epoch": 1.8996437659033079, "grad_norm": 7.969881658987776, "learning_rate": 7.797015621810748e-06, "loss": 0.2556, "step": 93320 }, { "epoch": 1.8998473282442747, "grad_norm": 3.9144549121066596, "learning_rate": 7.796426606595847e-06, "loss": 0.2119, "step": 93330 }, { "epoch": 1.9000508905852418, "grad_norm": 0.6580683282813409, "learning_rate": 7.795837534903505e-06, "loss": 0.2425, "step": 93340 }, { "epoch": 1.9002544529262086, "grad_norm": 7.856440878492422, "learning_rate": 7.795248406745625e-06, "loss": 0.2114, "step": 93350 }, { "epoch": 1.9004580152671755, "grad_norm": 6.706578307585306, "learning_rate": 7.794659222134104e-06, "loss": 0.1692, "step": 93360 }, { "epoch": 1.9006615776081426, "grad_norm": 4.334948512276572, "learning_rate": 7.79406998108084e-06, "loss": 0.2391, "step": 93370 }, { "epoch": 1.9008651399491094, "grad_norm": 10.740726557027855, "learning_rate": 7.793480683597731e-06, "loss": 0.2977, "step": 93380 }, { "epoch": 1.9010687022900763, "grad_norm": 7.1050595785059105, "learning_rate": 7.792891329696685e-06, "loss": 0.1772, "step": 93390 }, { "epoch": 1.9012722646310434, "grad_norm": 5.570486083186729, "learning_rate": 7.792301919389603e-06, "loss": 0.2223, "step": 93400 }, { "epoch": 1.9014758269720102, "grad_norm": 14.368572630824191, "learning_rate": 7.791712452688385e-06, "loss": 0.2721, "step": 93410 }, { "epoch": 1.901679389312977, "grad_norm": 12.627334029034346, "learning_rate": 7.791122929604937e-06, "loss": 0.3024, "step": 93420 }, { "epoch": 1.9018829516539442, "grad_norm": 4.011054214070852, "learning_rate": 7.790533350151169e-06, "loss": 0.1964, "step": 93430 }, { "epoch": 1.9020865139949108, "grad_norm": 8.248345404497144, "learning_rate": 7.789943714338984e-06, "loss": 0.2733, "step": 93440 }, { "epoch": 1.9022900763358779, "grad_norm": 10.822352789782189, "learning_rate": 7.789354022180292e-06, "loss": 0.1889, "step": 93450 }, { "epoch": 1.9024936386768447, "grad_norm": 9.857024936405391, "learning_rate": 7.788764273687003e-06, "loss": 0.257, "step": 93460 }, { "epoch": 1.9026972010178116, "grad_norm": 10.988469706610505, "learning_rate": 7.788174468871029e-06, "loss": 0.2156, "step": 93470 }, { "epoch": 1.9029007633587787, "grad_norm": 2.7861914187686945, "learning_rate": 7.787584607744279e-06, "loss": 0.204, "step": 93480 }, { "epoch": 1.9031043256997455, "grad_norm": 13.638680792546484, "learning_rate": 7.78699469031867e-06, "loss": 0.1827, "step": 93490 }, { "epoch": 1.9033078880407124, "grad_norm": 6.634273609585943, "learning_rate": 7.78640471660611e-06, "loss": 0.2191, "step": 93500 }, { "epoch": 1.9035114503816795, "grad_norm": 12.384544802795322, "learning_rate": 7.78581468661852e-06, "loss": 0.1465, "step": 93510 }, { "epoch": 1.9037150127226463, "grad_norm": 1.2491723891072246, "learning_rate": 7.785224600367814e-06, "loss": 0.1737, "step": 93520 }, { "epoch": 1.9039185750636132, "grad_norm": 14.30257391424585, "learning_rate": 7.784634457865909e-06, "loss": 0.3806, "step": 93530 }, { "epoch": 1.9041221374045803, "grad_norm": 8.819131588382636, "learning_rate": 7.784044259124727e-06, "loss": 0.2072, "step": 93540 }, { "epoch": 1.904325699745547, "grad_norm": 7.976610262922781, "learning_rate": 7.783454004156184e-06, "loss": 0.1794, "step": 93550 }, { "epoch": 1.904529262086514, "grad_norm": 6.692435769446669, "learning_rate": 7.782863692972201e-06, "loss": 0.2688, "step": 93560 }, { "epoch": 1.904732824427481, "grad_norm": 6.729975658947901, "learning_rate": 7.782273325584702e-06, "loss": 0.295, "step": 93570 }, { "epoch": 1.9049363867684477, "grad_norm": 13.643993251059877, "learning_rate": 7.781682902005612e-06, "loss": 0.2747, "step": 93580 }, { "epoch": 1.9051399491094148, "grad_norm": 9.998129734286122, "learning_rate": 7.78109242224685e-06, "loss": 0.2565, "step": 93590 }, { "epoch": 1.9053435114503818, "grad_norm": 7.5774066310351165, "learning_rate": 7.780501886320345e-06, "loss": 0.2494, "step": 93600 }, { "epoch": 1.9055470737913485, "grad_norm": 4.800046186009256, "learning_rate": 7.779911294238024e-06, "loss": 0.2242, "step": 93610 }, { "epoch": 1.9057506361323155, "grad_norm": 6.394766123595956, "learning_rate": 7.779320646011813e-06, "loss": 0.2515, "step": 93620 }, { "epoch": 1.9059541984732824, "grad_norm": 3.5174736040876136, "learning_rate": 7.778729941653644e-06, "loss": 0.1823, "step": 93630 }, { "epoch": 1.9061577608142493, "grad_norm": 10.635987758141876, "learning_rate": 7.778139181175444e-06, "loss": 0.1794, "step": 93640 }, { "epoch": 1.9063613231552163, "grad_norm": 12.915758291813882, "learning_rate": 7.777548364589143e-06, "loss": 0.2756, "step": 93650 }, { "epoch": 1.9065648854961832, "grad_norm": 7.1037293366765795, "learning_rate": 7.776957491906679e-06, "loss": 0.1432, "step": 93660 }, { "epoch": 1.90676844783715, "grad_norm": 13.162591263162764, "learning_rate": 7.776366563139976e-06, "loss": 0.2002, "step": 93670 }, { "epoch": 1.9069720101781171, "grad_norm": 7.12391380937917, "learning_rate": 7.77577557830098e-06, "loss": 0.2871, "step": 93680 }, { "epoch": 1.907175572519084, "grad_norm": 5.935724148688521, "learning_rate": 7.77518453740162e-06, "loss": 0.2036, "step": 93690 }, { "epoch": 1.9073791348600508, "grad_norm": 8.12422852655689, "learning_rate": 7.774593440453832e-06, "loss": 0.2447, "step": 93700 }, { "epoch": 1.907582697201018, "grad_norm": 14.67773322252337, "learning_rate": 7.774002287469556e-06, "loss": 0.2432, "step": 93710 }, { "epoch": 1.9077862595419848, "grad_norm": 9.003361789186238, "learning_rate": 7.77341107846073e-06, "loss": 0.1573, "step": 93720 }, { "epoch": 1.9079898218829516, "grad_norm": 7.21770175618759, "learning_rate": 7.772819813439297e-06, "loss": 0.1957, "step": 93730 }, { "epoch": 1.9081933842239187, "grad_norm": 7.946986874187833, "learning_rate": 7.772228492417198e-06, "loss": 0.1724, "step": 93740 }, { "epoch": 1.9083969465648853, "grad_norm": 0.16017048836716416, "learning_rate": 7.77163711540637e-06, "loss": 0.127, "step": 93750 }, { "epoch": 1.9086005089058524, "grad_norm": 7.586419854371772, "learning_rate": 7.771045682418762e-06, "loss": 0.1589, "step": 93760 }, { "epoch": 1.9088040712468195, "grad_norm": 15.675900874018712, "learning_rate": 7.770454193466318e-06, "loss": 0.1555, "step": 93770 }, { "epoch": 1.9090076335877861, "grad_norm": 11.156316882142256, "learning_rate": 7.769862648560986e-06, "loss": 0.1334, "step": 93780 }, { "epoch": 1.9092111959287532, "grad_norm": 0.5852475753064869, "learning_rate": 7.769271047714705e-06, "loss": 0.2548, "step": 93790 }, { "epoch": 1.90941475826972, "grad_norm": 17.078322362615037, "learning_rate": 7.768679390939432e-06, "loss": 0.1317, "step": 93800 }, { "epoch": 1.909618320610687, "grad_norm": 7.924635463609873, "learning_rate": 7.768087678247111e-06, "loss": 0.2369, "step": 93810 }, { "epoch": 1.909821882951654, "grad_norm": 4.836796389856002, "learning_rate": 7.767495909649696e-06, "loss": 0.2488, "step": 93820 }, { "epoch": 1.9100254452926209, "grad_norm": 14.3146294456407, "learning_rate": 7.766904085159135e-06, "loss": 0.1433, "step": 93830 }, { "epoch": 1.9102290076335877, "grad_norm": 3.3977110787841096, "learning_rate": 7.766312204787381e-06, "loss": 0.1764, "step": 93840 }, { "epoch": 1.9104325699745548, "grad_norm": 8.01601048994763, "learning_rate": 7.765720268546392e-06, "loss": 0.232, "step": 93850 }, { "epoch": 1.9106361323155217, "grad_norm": 13.589808892685408, "learning_rate": 7.765128276448118e-06, "loss": 0.3067, "step": 93860 }, { "epoch": 1.9108396946564885, "grad_norm": 0.4233066029224578, "learning_rate": 7.764536228504517e-06, "loss": 0.1492, "step": 93870 }, { "epoch": 1.9110432569974556, "grad_norm": 1.7733104308641763, "learning_rate": 7.763944124727547e-06, "loss": 0.1198, "step": 93880 }, { "epoch": 1.9112468193384224, "grad_norm": 13.720655857689481, "learning_rate": 7.763351965129166e-06, "loss": 0.1651, "step": 93890 }, { "epoch": 1.9114503816793893, "grad_norm": 1.223910038937588, "learning_rate": 7.76275974972133e-06, "loss": 0.2077, "step": 93900 }, { "epoch": 1.9116539440203564, "grad_norm": 7.556173841355195, "learning_rate": 7.762167478516007e-06, "loss": 0.2555, "step": 93910 }, { "epoch": 1.911857506361323, "grad_norm": 9.64333563120399, "learning_rate": 7.761575151525152e-06, "loss": 0.2512, "step": 93920 }, { "epoch": 1.91206106870229, "grad_norm": 10.637655774336572, "learning_rate": 7.760982768760728e-06, "loss": 0.2587, "step": 93930 }, { "epoch": 1.912264631043257, "grad_norm": 0.4457170734446518, "learning_rate": 7.760390330234702e-06, "loss": 0.2528, "step": 93940 }, { "epoch": 1.9124681933842238, "grad_norm": 0.8997302040414769, "learning_rate": 7.75979783595904e-06, "loss": 0.1702, "step": 93950 }, { "epoch": 1.9126717557251909, "grad_norm": 6.7570689199682406, "learning_rate": 7.759205285945703e-06, "loss": 0.1975, "step": 93960 }, { "epoch": 1.9128753180661577, "grad_norm": 6.893353082687893, "learning_rate": 7.758612680206662e-06, "loss": 0.2271, "step": 93970 }, { "epoch": 1.9130788804071246, "grad_norm": 6.580339895493121, "learning_rate": 7.758020018753886e-06, "loss": 0.2142, "step": 93980 }, { "epoch": 1.9132824427480917, "grad_norm": 5.588543958362121, "learning_rate": 7.757427301599346e-06, "loss": 0.1618, "step": 93990 }, { "epoch": 1.9134860050890585, "grad_norm": 8.612776865859814, "learning_rate": 7.756834528755004e-06, "loss": 0.1899, "step": 94000 }, { "epoch": 1.9136895674300254, "grad_norm": 7.748259585028814, "learning_rate": 7.756241700232843e-06, "loss": 0.1802, "step": 94010 }, { "epoch": 1.9138931297709925, "grad_norm": 0.5348629693181466, "learning_rate": 7.755648816044828e-06, "loss": 0.1238, "step": 94020 }, { "epoch": 1.9140966921119593, "grad_norm": 8.923258038552651, "learning_rate": 7.755055876202937e-06, "loss": 0.2205, "step": 94030 }, { "epoch": 1.9143002544529262, "grad_norm": 26.38957897220306, "learning_rate": 7.754462880719145e-06, "loss": 0.1977, "step": 94040 }, { "epoch": 1.9145038167938933, "grad_norm": 8.46708878837535, "learning_rate": 7.753869829605426e-06, "loss": 0.254, "step": 94050 }, { "epoch": 1.9147073791348599, "grad_norm": 10.490284061114824, "learning_rate": 7.753276722873759e-06, "loss": 0.2184, "step": 94060 }, { "epoch": 1.914910941475827, "grad_norm": 8.25076549600513, "learning_rate": 7.752683560536123e-06, "loss": 0.1805, "step": 94070 }, { "epoch": 1.915114503816794, "grad_norm": 18.30909298954629, "learning_rate": 7.752090342604498e-06, "loss": 0.2445, "step": 94080 }, { "epoch": 1.9153180661577607, "grad_norm": 11.83327019037817, "learning_rate": 7.75149706909086e-06, "loss": 0.2785, "step": 94090 }, { "epoch": 1.9155216284987278, "grad_norm": 9.92979809698527, "learning_rate": 7.750903740007199e-06, "loss": 0.2125, "step": 94100 }, { "epoch": 1.9157251908396946, "grad_norm": 6.125210839520171, "learning_rate": 7.750310355365492e-06, "loss": 0.1912, "step": 94110 }, { "epoch": 1.9159287531806615, "grad_norm": 13.64721799278343, "learning_rate": 7.749716915177725e-06, "loss": 0.184, "step": 94120 }, { "epoch": 1.9161323155216285, "grad_norm": 14.885462158987828, "learning_rate": 7.749123419455884e-06, "loss": 0.1282, "step": 94130 }, { "epoch": 1.9163358778625954, "grad_norm": 0.28869680932700376, "learning_rate": 7.748529868211954e-06, "loss": 0.1134, "step": 94140 }, { "epoch": 1.9165394402035623, "grad_norm": 3.475949681666727, "learning_rate": 7.747936261457923e-06, "loss": 0.2018, "step": 94150 }, { "epoch": 1.9167430025445293, "grad_norm": 9.677442355333293, "learning_rate": 7.74734259920578e-06, "loss": 0.1922, "step": 94160 }, { "epoch": 1.9169465648854962, "grad_norm": 10.259738705977217, "learning_rate": 7.746748881467514e-06, "loss": 0.2649, "step": 94170 }, { "epoch": 1.917150127226463, "grad_norm": 12.512605991688826, "learning_rate": 7.74615510825512e-06, "loss": 0.2068, "step": 94180 }, { "epoch": 1.9173536895674301, "grad_norm": 22.86116035149644, "learning_rate": 7.745561279580582e-06, "loss": 0.1713, "step": 94190 }, { "epoch": 1.917557251908397, "grad_norm": 23.343690188016385, "learning_rate": 7.7449673954559e-06, "loss": 0.2367, "step": 94200 }, { "epoch": 1.9177608142493638, "grad_norm": 15.538481269838877, "learning_rate": 7.744373455893066e-06, "loss": 0.2502, "step": 94210 }, { "epoch": 1.917964376590331, "grad_norm": 5.0257564856813595, "learning_rate": 7.743779460904075e-06, "loss": 0.1437, "step": 94220 }, { "epoch": 1.9181679389312976, "grad_norm": 5.6819277518628954, "learning_rate": 7.743185410500924e-06, "loss": 0.1702, "step": 94230 }, { "epoch": 1.9183715012722646, "grad_norm": 13.531366373715828, "learning_rate": 7.742591304695611e-06, "loss": 0.1897, "step": 94240 }, { "epoch": 1.9185750636132317, "grad_norm": 6.190806290817693, "learning_rate": 7.741997143500132e-06, "loss": 0.257, "step": 94250 }, { "epoch": 1.9187786259541983, "grad_norm": 9.44601880191139, "learning_rate": 7.741402926926491e-06, "loss": 0.1657, "step": 94260 }, { "epoch": 1.9189821882951654, "grad_norm": 10.140652271004377, "learning_rate": 7.740808654986687e-06, "loss": 0.3838, "step": 94270 }, { "epoch": 1.9191857506361323, "grad_norm": 5.003799172783925, "learning_rate": 7.740214327692721e-06, "loss": 0.1882, "step": 94280 }, { "epoch": 1.9193893129770991, "grad_norm": 5.405687676492996, "learning_rate": 7.739619945056597e-06, "loss": 0.2022, "step": 94290 }, { "epoch": 1.9195928753180662, "grad_norm": 12.66024111083996, "learning_rate": 7.739025507090323e-06, "loss": 0.2574, "step": 94300 }, { "epoch": 1.919796437659033, "grad_norm": 0.09334938134506637, "learning_rate": 7.738431013805899e-06, "loss": 0.2017, "step": 94310 }, { "epoch": 1.92, "grad_norm": 3.0636488382889207, "learning_rate": 7.737836465215332e-06, "loss": 0.1596, "step": 94320 }, { "epoch": 1.920203562340967, "grad_norm": 6.418942470701165, "learning_rate": 7.737241861330633e-06, "loss": 0.1924, "step": 94330 }, { "epoch": 1.9204071246819339, "grad_norm": 12.715548128553873, "learning_rate": 7.736647202163807e-06, "loss": 0.2968, "step": 94340 }, { "epoch": 1.9206106870229007, "grad_norm": 6.5298414325293255, "learning_rate": 7.736052487726869e-06, "loss": 0.3056, "step": 94350 }, { "epoch": 1.9208142493638678, "grad_norm": 3.999970646094116, "learning_rate": 7.735457718031825e-06, "loss": 0.1576, "step": 94360 }, { "epoch": 1.9210178117048347, "grad_norm": 9.625114282395229, "learning_rate": 7.73486289309069e-06, "loss": 0.2182, "step": 94370 }, { "epoch": 1.9212213740458015, "grad_norm": 10.744810478492315, "learning_rate": 7.734268012915476e-06, "loss": 0.2913, "step": 94380 }, { "epoch": 1.9214249363867686, "grad_norm": 16.843102265729573, "learning_rate": 7.733673077518198e-06, "loss": 0.2374, "step": 94390 }, { "epoch": 1.9216284987277352, "grad_norm": 12.700731360248346, "learning_rate": 7.73307808691087e-06, "loss": 0.2488, "step": 94400 }, { "epoch": 1.9218320610687023, "grad_norm": 1.1194189853553882, "learning_rate": 7.73248304110551e-06, "loss": 0.2035, "step": 94410 }, { "epoch": 1.9220356234096692, "grad_norm": 6.200413601835158, "learning_rate": 7.731887940114137e-06, "loss": 0.2478, "step": 94420 }, { "epoch": 1.922239185750636, "grad_norm": 13.5874312215631, "learning_rate": 7.731292783948769e-06, "loss": 0.2183, "step": 94430 }, { "epoch": 1.922442748091603, "grad_norm": 11.634433682251156, "learning_rate": 7.730697572621424e-06, "loss": 0.2627, "step": 94440 }, { "epoch": 1.92264631043257, "grad_norm": 3.287877954170053, "learning_rate": 7.730102306144122e-06, "loss": 0.2243, "step": 94450 }, { "epoch": 1.9228498727735368, "grad_norm": 11.380449882779791, "learning_rate": 7.729506984528891e-06, "loss": 0.2048, "step": 94460 }, { "epoch": 1.9230534351145039, "grad_norm": 10.335465613991737, "learning_rate": 7.728911607787751e-06, "loss": 0.2696, "step": 94470 }, { "epoch": 1.9232569974554707, "grad_norm": 1.5664089209424412, "learning_rate": 7.728316175932727e-06, "loss": 0.1431, "step": 94480 }, { "epoch": 1.9234605597964376, "grad_norm": 13.879178912928722, "learning_rate": 7.72772068897584e-06, "loss": 0.2461, "step": 94490 }, { "epoch": 1.9236641221374047, "grad_norm": 9.796059255282808, "learning_rate": 7.727125146929123e-06, "loss": 0.2137, "step": 94500 }, { "epoch": 1.9238676844783715, "grad_norm": 3.0109230840575534, "learning_rate": 7.7265295498046e-06, "loss": 0.1846, "step": 94510 }, { "epoch": 1.9240712468193384, "grad_norm": 5.501841204113065, "learning_rate": 7.725933897614303e-06, "loss": 0.1294, "step": 94520 }, { "epoch": 1.9242748091603055, "grad_norm": 17.040621887722935, "learning_rate": 7.725338190370258e-06, "loss": 0.1786, "step": 94530 }, { "epoch": 1.924478371501272, "grad_norm": 11.915076072470162, "learning_rate": 7.7247424280845e-06, "loss": 0.2636, "step": 94540 }, { "epoch": 1.9246819338422392, "grad_norm": 12.41570368680537, "learning_rate": 7.724146610769057e-06, "loss": 0.1381, "step": 94550 }, { "epoch": 1.9248854961832063, "grad_norm": 5.964062477356806, "learning_rate": 7.723550738435966e-06, "loss": 0.1311, "step": 94560 }, { "epoch": 1.925089058524173, "grad_norm": 15.674491800630719, "learning_rate": 7.722954811097259e-06, "loss": 0.1926, "step": 94570 }, { "epoch": 1.92529262086514, "grad_norm": 13.526270797428248, "learning_rate": 7.722358828764972e-06, "loss": 0.1141, "step": 94580 }, { "epoch": 1.9254961832061068, "grad_norm": 15.843969600987153, "learning_rate": 7.721762791451144e-06, "loss": 0.2276, "step": 94590 }, { "epoch": 1.9256997455470737, "grad_norm": 18.296941657689686, "learning_rate": 7.721166699167809e-06, "loss": 0.1598, "step": 94600 }, { "epoch": 1.9259033078880408, "grad_norm": 5.898426852215036, "learning_rate": 7.720570551927009e-06, "loss": 0.2003, "step": 94610 }, { "epoch": 1.9261068702290076, "grad_norm": 5.256680321411502, "learning_rate": 7.719974349740781e-06, "loss": 0.2477, "step": 94620 }, { "epoch": 1.9263104325699745, "grad_norm": 3.989549927894699, "learning_rate": 7.71937809262117e-06, "loss": 0.1575, "step": 94630 }, { "epoch": 1.9265139949109416, "grad_norm": 20.248510324973672, "learning_rate": 7.718781780580213e-06, "loss": 0.2855, "step": 94640 }, { "epoch": 1.9267175572519084, "grad_norm": 4.1685627889520775, "learning_rate": 7.71818541362996e-06, "loss": 0.1651, "step": 94650 }, { "epoch": 1.9269211195928753, "grad_norm": 8.175851842333943, "learning_rate": 7.717588991782448e-06, "loss": 0.2458, "step": 94660 }, { "epoch": 1.9271246819338423, "grad_norm": 23.737829478805136, "learning_rate": 7.716992515049728e-06, "loss": 0.3001, "step": 94670 }, { "epoch": 1.9273282442748092, "grad_norm": 1.4193104343927139, "learning_rate": 7.716395983443843e-06, "loss": 0.199, "step": 94680 }, { "epoch": 1.927531806615776, "grad_norm": 6.068456551494226, "learning_rate": 7.715799396976845e-06, "loss": 0.1475, "step": 94690 }, { "epoch": 1.9277353689567431, "grad_norm": 0.4143355311794175, "learning_rate": 7.715202755660778e-06, "loss": 0.1967, "step": 94700 }, { "epoch": 1.9279389312977098, "grad_norm": 2.2124753658745626, "learning_rate": 7.714606059507694e-06, "loss": 0.2642, "step": 94710 }, { "epoch": 1.9281424936386768, "grad_norm": 6.874448620537929, "learning_rate": 7.714009308529646e-06, "loss": 0.2305, "step": 94720 }, { "epoch": 1.928346055979644, "grad_norm": 6.567462148102152, "learning_rate": 7.713412502738683e-06, "loss": 0.1531, "step": 94730 }, { "epoch": 1.9285496183206106, "grad_norm": 7.004548612907242, "learning_rate": 7.712815642146861e-06, "loss": 0.21, "step": 94740 }, { "epoch": 1.9287531806615776, "grad_norm": 2.6389165551301224, "learning_rate": 7.712218726766232e-06, "loss": 0.1741, "step": 94750 }, { "epoch": 1.9289567430025445, "grad_norm": 8.057161753611396, "learning_rate": 7.711621756608853e-06, "loss": 0.1891, "step": 94760 }, { "epoch": 1.9291603053435114, "grad_norm": 10.310540768785875, "learning_rate": 7.71102473168678e-06, "loss": 0.1906, "step": 94770 }, { "epoch": 1.9293638676844784, "grad_norm": 14.3060080445159, "learning_rate": 7.71042765201207e-06, "loss": 0.2128, "step": 94780 }, { "epoch": 1.9295674300254453, "grad_norm": 14.790650028696595, "learning_rate": 7.709830517596783e-06, "loss": 0.2029, "step": 94790 }, { "epoch": 1.9297709923664121, "grad_norm": 1.1986869616103726, "learning_rate": 7.70923332845298e-06, "loss": 0.1621, "step": 94800 }, { "epoch": 1.9299745547073792, "grad_norm": 6.8017280773839275, "learning_rate": 7.708636084592719e-06, "loss": 0.0965, "step": 94810 }, { "epoch": 1.930178117048346, "grad_norm": 4.236566310854634, "learning_rate": 7.708038786028063e-06, "loss": 0.1317, "step": 94820 }, { "epoch": 1.930381679389313, "grad_norm": 7.853645451624749, "learning_rate": 7.707441432771077e-06, "loss": 0.1952, "step": 94830 }, { "epoch": 1.93058524173028, "grad_norm": 12.594506650408151, "learning_rate": 7.706844024833824e-06, "loss": 0.1933, "step": 94840 }, { "epoch": 1.9307888040712469, "grad_norm": 19.478003083203227, "learning_rate": 7.70624656222837e-06, "loss": 0.2172, "step": 94850 }, { "epoch": 1.9309923664122137, "grad_norm": 8.246504705937445, "learning_rate": 7.705649044966779e-06, "loss": 0.3287, "step": 94860 }, { "epoch": 1.9311959287531808, "grad_norm": 2.0351748250334203, "learning_rate": 7.70505147306112e-06, "loss": 0.3235, "step": 94870 }, { "epoch": 1.9313994910941474, "grad_norm": 8.984877463524674, "learning_rate": 7.704453846523466e-06, "loss": 0.2411, "step": 94880 }, { "epoch": 1.9316030534351145, "grad_norm": 19.367184580924405, "learning_rate": 7.703856165365882e-06, "loss": 0.1895, "step": 94890 }, { "epoch": 1.9318066157760814, "grad_norm": 20.082424208578097, "learning_rate": 7.70325842960044e-06, "loss": 0.2074, "step": 94900 }, { "epoch": 1.9320101781170482, "grad_norm": 4.561013280169526, "learning_rate": 7.702660639239211e-06, "loss": 0.3421, "step": 94910 }, { "epoch": 1.9322137404580153, "grad_norm": 9.927798852491922, "learning_rate": 7.702062794294272e-06, "loss": 0.21, "step": 94920 }, { "epoch": 1.9324173027989822, "grad_norm": 11.675035938537325, "learning_rate": 7.701464894777693e-06, "loss": 0.2047, "step": 94930 }, { "epoch": 1.932620865139949, "grad_norm": 5.112454865422645, "learning_rate": 7.70086694070155e-06, "loss": 0.2215, "step": 94940 }, { "epoch": 1.932824427480916, "grad_norm": 10.742150459301465, "learning_rate": 7.70026893207792e-06, "loss": 0.2093, "step": 94950 }, { "epoch": 1.933027989821883, "grad_norm": 17.8455044040286, "learning_rate": 7.699670868918884e-06, "loss": 0.2398, "step": 94960 }, { "epoch": 1.9332315521628498, "grad_norm": 10.853176020336472, "learning_rate": 7.699072751236517e-06, "loss": 0.158, "step": 94970 }, { "epoch": 1.9334351145038169, "grad_norm": 10.416645345946296, "learning_rate": 7.698474579042898e-06, "loss": 0.2476, "step": 94980 }, { "epoch": 1.9336386768447837, "grad_norm": 12.163292364160984, "learning_rate": 7.69787635235011e-06, "loss": 0.1895, "step": 94990 }, { "epoch": 1.9338422391857506, "grad_norm": 18.13026868865171, "learning_rate": 7.697278071170234e-06, "loss": 0.3165, "step": 95000 }, { "epoch": 1.9340458015267177, "grad_norm": 3.3097862533794595, "learning_rate": 7.696679735515354e-06, "loss": 0.132, "step": 95010 }, { "epoch": 1.9342493638676843, "grad_norm": 8.246629568625027, "learning_rate": 7.696081345397551e-06, "loss": 0.352, "step": 95020 }, { "epoch": 1.9344529262086514, "grad_norm": 10.815858028786074, "learning_rate": 7.695482900828915e-06, "loss": 0.1874, "step": 95030 }, { "epoch": 1.9346564885496185, "grad_norm": 5.718914829486252, "learning_rate": 7.69488440182153e-06, "loss": 0.1923, "step": 95040 }, { "epoch": 1.934860050890585, "grad_norm": 10.811217022146623, "learning_rate": 7.69428584838748e-06, "loss": 0.2491, "step": 95050 }, { "epoch": 1.9350636132315522, "grad_norm": 6.518527615906298, "learning_rate": 7.693687240538863e-06, "loss": 0.0971, "step": 95060 }, { "epoch": 1.935267175572519, "grad_norm": 11.148199840031145, "learning_rate": 7.69308857828776e-06, "loss": 0.1334, "step": 95070 }, { "epoch": 1.935470737913486, "grad_norm": 16.66601907512759, "learning_rate": 7.692489861646264e-06, "loss": 0.2084, "step": 95080 }, { "epoch": 1.935674300254453, "grad_norm": 9.896497085123515, "learning_rate": 7.691891090626468e-06, "loss": 0.153, "step": 95090 }, { "epoch": 1.9358778625954198, "grad_norm": 10.616191133603328, "learning_rate": 7.691292265240465e-06, "loss": 0.1756, "step": 95100 }, { "epoch": 1.9360814249363867, "grad_norm": 15.765395328134382, "learning_rate": 7.690693385500346e-06, "loss": 0.2289, "step": 95110 }, { "epoch": 1.9362849872773538, "grad_norm": 7.61459437236873, "learning_rate": 7.69009445141821e-06, "loss": 0.1782, "step": 95120 }, { "epoch": 1.9364885496183206, "grad_norm": 3.2835071375107625, "learning_rate": 7.689495463006152e-06, "loss": 0.1248, "step": 95130 }, { "epoch": 1.9366921119592875, "grad_norm": 15.840482183101157, "learning_rate": 7.688896420276269e-06, "loss": 0.2132, "step": 95140 }, { "epoch": 1.9368956743002546, "grad_norm": 4.806669948714463, "learning_rate": 7.688297323240658e-06, "loss": 0.2249, "step": 95150 }, { "epoch": 1.9370992366412214, "grad_norm": 6.0531505858513714, "learning_rate": 7.68769817191142e-06, "loss": 0.187, "step": 95160 }, { "epoch": 1.9373027989821883, "grad_norm": 4.859499550014795, "learning_rate": 7.687098966300659e-06, "loss": 0.253, "step": 95170 }, { "epoch": 1.9375063613231553, "grad_norm": 7.3951921155627725, "learning_rate": 7.686499706420471e-06, "loss": 0.1498, "step": 95180 }, { "epoch": 1.937709923664122, "grad_norm": 8.364649078854672, "learning_rate": 7.68590039228296e-06, "loss": 0.2407, "step": 95190 }, { "epoch": 1.937913486005089, "grad_norm": 6.649160798787132, "learning_rate": 7.685301023900234e-06, "loss": 0.2645, "step": 95200 }, { "epoch": 1.9381170483460561, "grad_norm": 6.555446086952039, "learning_rate": 7.684701601284395e-06, "loss": 0.2535, "step": 95210 }, { "epoch": 1.9383206106870228, "grad_norm": 12.459700071592396, "learning_rate": 7.684102124447548e-06, "loss": 0.3065, "step": 95220 }, { "epoch": 1.9385241730279898, "grad_norm": 12.386784920948575, "learning_rate": 7.6835025934018e-06, "loss": 0.2367, "step": 95230 }, { "epoch": 1.9387277353689567, "grad_norm": 4.2500915567562885, "learning_rate": 7.682903008159264e-06, "loss": 0.1982, "step": 95240 }, { "epoch": 1.9389312977099236, "grad_norm": 6.451563280415311, "learning_rate": 7.682303368732044e-06, "loss": 0.2461, "step": 95250 }, { "epoch": 1.9391348600508906, "grad_norm": 24.76794524431235, "learning_rate": 7.681703675132254e-06, "loss": 0.2445, "step": 95260 }, { "epoch": 1.9393384223918575, "grad_norm": 0.691582568226162, "learning_rate": 7.681103927372003e-06, "loss": 0.252, "step": 95270 }, { "epoch": 1.9395419847328244, "grad_norm": 18.101225034136064, "learning_rate": 7.680504125463404e-06, "loss": 0.3161, "step": 95280 }, { "epoch": 1.9397455470737914, "grad_norm": 10.700476129989754, "learning_rate": 7.679904269418575e-06, "loss": 0.2075, "step": 95290 }, { "epoch": 1.9399491094147583, "grad_norm": 4.73574377706617, "learning_rate": 7.679304359249626e-06, "loss": 0.1764, "step": 95300 }, { "epoch": 1.9401526717557251, "grad_norm": 2.9013470354458772, "learning_rate": 7.678704394968674e-06, "loss": 0.2418, "step": 95310 }, { "epoch": 1.9403562340966922, "grad_norm": 2.9722069679861574, "learning_rate": 7.678104376587837e-06, "loss": 0.169, "step": 95320 }, { "epoch": 1.940559796437659, "grad_norm": 7.481033929451646, "learning_rate": 7.677504304119233e-06, "loss": 0.2063, "step": 95330 }, { "epoch": 1.940763358778626, "grad_norm": 21.40831239780627, "learning_rate": 7.67690417757498e-06, "loss": 0.2536, "step": 95340 }, { "epoch": 1.940966921119593, "grad_norm": 7.515168695143095, "learning_rate": 7.676303996967201e-06, "loss": 0.1928, "step": 95350 }, { "epoch": 1.9411704834605596, "grad_norm": 4.028081112428922, "learning_rate": 7.675703762308014e-06, "loss": 0.2096, "step": 95360 }, { "epoch": 1.9413740458015267, "grad_norm": 8.77459678469417, "learning_rate": 7.675103473609545e-06, "loss": 0.2085, "step": 95370 }, { "epoch": 1.9415776081424938, "grad_norm": 19.26104954016513, "learning_rate": 7.674503130883915e-06, "loss": 0.1586, "step": 95380 }, { "epoch": 1.9417811704834604, "grad_norm": 1.317957879999412, "learning_rate": 7.67390273414325e-06, "loss": 0.1419, "step": 95390 }, { "epoch": 1.9419847328244275, "grad_norm": 11.316039526757303, "learning_rate": 7.673302283399674e-06, "loss": 0.3175, "step": 95400 }, { "epoch": 1.9421882951653944, "grad_norm": 10.48907399017064, "learning_rate": 7.672701778665316e-06, "loss": 0.1629, "step": 95410 }, { "epoch": 1.9423918575063612, "grad_norm": 2.8105267245211167, "learning_rate": 7.672101219952303e-06, "loss": 0.1142, "step": 95420 }, { "epoch": 1.9425954198473283, "grad_norm": 1.5006907879330234, "learning_rate": 7.671500607272766e-06, "loss": 0.2844, "step": 95430 }, { "epoch": 1.9427989821882952, "grad_norm": 9.880380657024565, "learning_rate": 7.67089994063883e-06, "loss": 0.2522, "step": 95440 }, { "epoch": 1.943002544529262, "grad_norm": 11.818908691106245, "learning_rate": 7.670299220062632e-06, "loss": 0.2542, "step": 95450 }, { "epoch": 1.943206106870229, "grad_norm": 8.045238593551812, "learning_rate": 7.669698445556301e-06, "loss": 0.2013, "step": 95460 }, { "epoch": 1.943409669211196, "grad_norm": 8.419023217878172, "learning_rate": 7.669097617131972e-06, "loss": 0.2612, "step": 95470 }, { "epoch": 1.9436132315521628, "grad_norm": 0.408508461227362, "learning_rate": 7.668496734801778e-06, "loss": 0.1015, "step": 95480 }, { "epoch": 1.94381679389313, "grad_norm": 7.7239966541352105, "learning_rate": 7.667895798577856e-06, "loss": 0.2532, "step": 95490 }, { "epoch": 1.9440203562340967, "grad_norm": 5.67458394045541, "learning_rate": 7.667294808472342e-06, "loss": 0.2963, "step": 95500 }, { "epoch": 1.9442239185750636, "grad_norm": 9.38604077964658, "learning_rate": 7.666693764497374e-06, "loss": 0.208, "step": 95510 }, { "epoch": 1.9444274809160307, "grad_norm": 15.478794714340587, "learning_rate": 7.66609266666509e-06, "loss": 0.2193, "step": 95520 }, { "epoch": 1.9446310432569973, "grad_norm": 14.068916466908831, "learning_rate": 7.665491514987631e-06, "loss": 0.2074, "step": 95530 }, { "epoch": 1.9448346055979644, "grad_norm": 7.537501871785529, "learning_rate": 7.664890309477139e-06, "loss": 0.2155, "step": 95540 }, { "epoch": 1.9450381679389313, "grad_norm": 1.0583015978642447, "learning_rate": 7.664289050145755e-06, "loss": 0.1654, "step": 95550 }, { "epoch": 1.945241730279898, "grad_norm": 6.884838243112654, "learning_rate": 7.663687737005619e-06, "loss": 0.2501, "step": 95560 }, { "epoch": 1.9454452926208652, "grad_norm": 7.32064800266354, "learning_rate": 7.663086370068882e-06, "loss": 0.2414, "step": 95570 }, { "epoch": 1.945648854961832, "grad_norm": 13.696917891424537, "learning_rate": 7.662484949347684e-06, "loss": 0.2866, "step": 95580 }, { "epoch": 1.945852417302799, "grad_norm": 22.662533377288273, "learning_rate": 7.661883474854172e-06, "loss": 0.12, "step": 95590 }, { "epoch": 1.946055979643766, "grad_norm": 6.904685718908559, "learning_rate": 7.661281946600497e-06, "loss": 0.2215, "step": 95600 }, { "epoch": 1.9462595419847328, "grad_norm": 7.470506506076064, "learning_rate": 7.660680364598805e-06, "loss": 0.2083, "step": 95610 }, { "epoch": 1.9464631043256997, "grad_norm": 3.1026902552994216, "learning_rate": 7.660078728861244e-06, "loss": 0.1809, "step": 95620 }, { "epoch": 1.9466666666666668, "grad_norm": 7.021934490268919, "learning_rate": 7.659477039399968e-06, "loss": 0.1683, "step": 95630 }, { "epoch": 1.9468702290076336, "grad_norm": 0.46396274210869953, "learning_rate": 7.658875296227129e-06, "loss": 0.1686, "step": 95640 }, { "epoch": 1.9470737913486005, "grad_norm": 17.37001174464654, "learning_rate": 7.658273499354878e-06, "loss": 0.2329, "step": 95650 }, { "epoch": 1.9472773536895676, "grad_norm": 29.86877561080297, "learning_rate": 7.65767164879537e-06, "loss": 0.2548, "step": 95660 }, { "epoch": 1.9474809160305342, "grad_norm": 8.786435501908862, "learning_rate": 7.65706974456076e-06, "loss": 0.2271, "step": 95670 }, { "epoch": 1.9476844783715013, "grad_norm": 12.09794380409205, "learning_rate": 7.656467786663205e-06, "loss": 0.2384, "step": 95680 }, { "epoch": 1.9478880407124683, "grad_norm": 15.906430723953342, "learning_rate": 7.65586577511486e-06, "loss": 0.197, "step": 95690 }, { "epoch": 1.948091603053435, "grad_norm": 1.9216595506362553, "learning_rate": 7.655263709927885e-06, "loss": 0.17, "step": 95700 }, { "epoch": 1.948295165394402, "grad_norm": 9.095871875492852, "learning_rate": 7.65466159111444e-06, "loss": 0.2465, "step": 95710 }, { "epoch": 1.948498727735369, "grad_norm": 5.436720924715908, "learning_rate": 7.654059418686686e-06, "loss": 0.2628, "step": 95720 }, { "epoch": 1.9487022900763358, "grad_norm": 8.68381733412888, "learning_rate": 7.653457192656781e-06, "loss": 0.1779, "step": 95730 }, { "epoch": 1.9489058524173029, "grad_norm": 5.57204896669328, "learning_rate": 7.652854913036892e-06, "loss": 0.1865, "step": 95740 }, { "epoch": 1.9491094147582697, "grad_norm": 9.060904125106095, "learning_rate": 7.652252579839182e-06, "loss": 0.3288, "step": 95750 }, { "epoch": 1.9493129770992366, "grad_norm": 2.9906149343139234, "learning_rate": 7.651650193075815e-06, "loss": 0.2064, "step": 95760 }, { "epoch": 1.9495165394402036, "grad_norm": 6.292159715196698, "learning_rate": 7.651047752758955e-06, "loss": 0.2043, "step": 95770 }, { "epoch": 1.9497201017811705, "grad_norm": 0.38872564318361236, "learning_rate": 7.650445258900772e-06, "loss": 0.2182, "step": 95780 }, { "epoch": 1.9499236641221374, "grad_norm": 3.399845647771995, "learning_rate": 7.649842711513435e-06, "loss": 0.171, "step": 95790 }, { "epoch": 1.9501272264631044, "grad_norm": 4.89694558608758, "learning_rate": 7.64924011060911e-06, "loss": 0.1959, "step": 95800 }, { "epoch": 1.9503307888040713, "grad_norm": 9.2585198315264, "learning_rate": 7.648637456199968e-06, "loss": 0.1683, "step": 95810 }, { "epoch": 1.9505343511450381, "grad_norm": 4.779565084867205, "learning_rate": 7.648034748298182e-06, "loss": 0.2862, "step": 95820 }, { "epoch": 1.9507379134860052, "grad_norm": 10.986669379475288, "learning_rate": 7.647431986915923e-06, "loss": 0.1357, "step": 95830 }, { "epoch": 1.9509414758269719, "grad_norm": 8.518787509324984, "learning_rate": 7.646829172065367e-06, "loss": 0.1756, "step": 95840 }, { "epoch": 1.951145038167939, "grad_norm": 13.66539956106929, "learning_rate": 7.646226303758684e-06, "loss": 0.2921, "step": 95850 }, { "epoch": 1.951348600508906, "grad_norm": 6.4912427627056495, "learning_rate": 7.645623382008056e-06, "loss": 0.3367, "step": 95860 }, { "epoch": 1.9515521628498727, "grad_norm": 7.669532993815448, "learning_rate": 7.645020406825654e-06, "loss": 0.1323, "step": 95870 }, { "epoch": 1.9517557251908397, "grad_norm": 3.611129313818164, "learning_rate": 7.64441737822366e-06, "loss": 0.2049, "step": 95880 }, { "epoch": 1.9519592875318066, "grad_norm": 7.8029474649496535, "learning_rate": 7.643814296214247e-06, "loss": 0.2771, "step": 95890 }, { "epoch": 1.9521628498727734, "grad_norm": 6.886053473078539, "learning_rate": 7.643211160809603e-06, "loss": 0.1981, "step": 95900 }, { "epoch": 1.9523664122137405, "grad_norm": 11.806896545386358, "learning_rate": 7.642607972021903e-06, "loss": 0.2931, "step": 95910 }, { "epoch": 1.9525699745547074, "grad_norm": 10.301415374095074, "learning_rate": 7.642004729863332e-06, "loss": 0.2673, "step": 95920 }, { "epoch": 1.9527735368956742, "grad_norm": 11.893249861381594, "learning_rate": 7.641401434346072e-06, "loss": 0.2251, "step": 95930 }, { "epoch": 1.9529770992366413, "grad_norm": 8.951117512923739, "learning_rate": 7.64079808548231e-06, "loss": 0.1834, "step": 95940 }, { "epoch": 1.9531806615776082, "grad_norm": 16.44711973287702, "learning_rate": 7.640194683284228e-06, "loss": 0.1904, "step": 95950 }, { "epoch": 1.953384223918575, "grad_norm": 10.188896290192512, "learning_rate": 7.639591227764015e-06, "loss": 0.2528, "step": 95960 }, { "epoch": 1.953587786259542, "grad_norm": 9.684385013979298, "learning_rate": 7.638987718933854e-06, "loss": 0.1878, "step": 95970 }, { "epoch": 1.953791348600509, "grad_norm": 6.335116594687869, "learning_rate": 7.63838415680594e-06, "loss": 0.2047, "step": 95980 }, { "epoch": 1.9539949109414758, "grad_norm": 4.759558447690779, "learning_rate": 7.637780541392459e-06, "loss": 0.2318, "step": 95990 }, { "epoch": 1.954198473282443, "grad_norm": 11.761600847585312, "learning_rate": 7.637176872705602e-06, "loss": 0.2641, "step": 96000 }, { "epoch": 1.9544020356234095, "grad_norm": 9.337784148670538, "learning_rate": 7.636573150757562e-06, "loss": 0.1921, "step": 96010 }, { "epoch": 1.9546055979643766, "grad_norm": 14.252986492410752, "learning_rate": 7.63596937556053e-06, "loss": 0.2151, "step": 96020 }, { "epoch": 1.9548091603053435, "grad_norm": 11.102501517310113, "learning_rate": 7.635365547126705e-06, "loss": 0.2198, "step": 96030 }, { "epoch": 1.9550127226463103, "grad_norm": 13.73881879123947, "learning_rate": 7.634761665468276e-06, "loss": 0.2992, "step": 96040 }, { "epoch": 1.9552162849872774, "grad_norm": 8.089224794161327, "learning_rate": 7.634157730597441e-06, "loss": 0.1399, "step": 96050 }, { "epoch": 1.9554198473282443, "grad_norm": 19.192421494427432, "learning_rate": 7.633553742526399e-06, "loss": 0.2423, "step": 96060 }, { "epoch": 1.955623409669211, "grad_norm": 11.139549519193737, "learning_rate": 7.632949701267347e-06, "loss": 0.2771, "step": 96070 }, { "epoch": 1.9558269720101782, "grad_norm": 9.540475290161552, "learning_rate": 7.632345606832484e-06, "loss": 0.2564, "step": 96080 }, { "epoch": 1.956030534351145, "grad_norm": 4.775759173300744, "learning_rate": 7.63174145923401e-06, "loss": 0.2162, "step": 96090 }, { "epoch": 1.956234096692112, "grad_norm": 6.418866182051531, "learning_rate": 7.631137258484128e-06, "loss": 0.2254, "step": 96100 }, { "epoch": 1.956437659033079, "grad_norm": 12.689447635036647, "learning_rate": 7.630533004595041e-06, "loss": 0.2269, "step": 96110 }, { "epoch": 1.9566412213740458, "grad_norm": 3.1408063182637878, "learning_rate": 7.629928697578954e-06, "loss": 0.1952, "step": 96120 }, { "epoch": 1.9568447837150127, "grad_norm": 11.412916941486884, "learning_rate": 7.629324337448067e-06, "loss": 0.1815, "step": 96130 }, { "epoch": 1.9570483460559798, "grad_norm": 0.598113610280862, "learning_rate": 7.628719924214589e-06, "loss": 0.1568, "step": 96140 }, { "epoch": 1.9572519083969464, "grad_norm": 2.7312789018637265, "learning_rate": 7.628115457890727e-06, "loss": 0.1951, "step": 96150 }, { "epoch": 1.9574554707379135, "grad_norm": 3.5572550658474698, "learning_rate": 7.627510938488687e-06, "loss": 0.1859, "step": 96160 }, { "epoch": 1.9576590330788806, "grad_norm": 16.620458350093134, "learning_rate": 7.626906366020681e-06, "loss": 0.2293, "step": 96170 }, { "epoch": 1.9578625954198472, "grad_norm": 6.662668039502462, "learning_rate": 7.626301740498916e-06, "loss": 0.2637, "step": 96180 }, { "epoch": 1.9580661577608143, "grad_norm": 4.755711884273, "learning_rate": 7.625697061935606e-06, "loss": 0.1901, "step": 96190 }, { "epoch": 1.9582697201017811, "grad_norm": 13.380823363043962, "learning_rate": 7.62509233034296e-06, "loss": 0.241, "step": 96200 }, { "epoch": 1.958473282442748, "grad_norm": 9.18261184270999, "learning_rate": 7.624487545733196e-06, "loss": 0.2318, "step": 96210 }, { "epoch": 1.958676844783715, "grad_norm": 2.1124296182603493, "learning_rate": 7.623882708118524e-06, "loss": 0.2235, "step": 96220 }, { "epoch": 1.958880407124682, "grad_norm": 6.030016536327999, "learning_rate": 7.623277817511161e-06, "loss": 0.2065, "step": 96230 }, { "epoch": 1.9590839694656488, "grad_norm": 21.28595005977711, "learning_rate": 7.622672873923324e-06, "loss": 0.1805, "step": 96240 }, { "epoch": 1.9592875318066159, "grad_norm": 12.655121547833568, "learning_rate": 7.622067877367232e-06, "loss": 0.2172, "step": 96250 }, { "epoch": 1.9594910941475827, "grad_norm": 18.86627460808944, "learning_rate": 7.6214628278551e-06, "loss": 0.1843, "step": 96260 }, { "epoch": 1.9596946564885496, "grad_norm": 19.85832871568432, "learning_rate": 7.62085772539915e-06, "loss": 0.1706, "step": 96270 }, { "epoch": 1.9598982188295166, "grad_norm": 10.21864496032159, "learning_rate": 7.620252570011603e-06, "loss": 0.1944, "step": 96280 }, { "epoch": 1.9601017811704835, "grad_norm": 9.380985418689292, "learning_rate": 7.619647361704681e-06, "loss": 0.1321, "step": 96290 }, { "epoch": 1.9603053435114504, "grad_norm": 3.65320489648175, "learning_rate": 7.619042100490605e-06, "loss": 0.2002, "step": 96300 }, { "epoch": 1.9605089058524174, "grad_norm": 5.411949015238946, "learning_rate": 7.618436786381601e-06, "loss": 0.0978, "step": 96310 }, { "epoch": 1.960712468193384, "grad_norm": 10.55642940216932, "learning_rate": 7.617831419389894e-06, "loss": 0.2119, "step": 96320 }, { "epoch": 1.9609160305343512, "grad_norm": 34.49890752037242, "learning_rate": 7.6172259995277105e-06, "loss": 0.2205, "step": 96330 }, { "epoch": 1.9611195928753182, "grad_norm": 18.83937107221922, "learning_rate": 7.616620526807276e-06, "loss": 0.1316, "step": 96340 }, { "epoch": 1.9613231552162849, "grad_norm": 3.2565131068277644, "learning_rate": 7.616015001240819e-06, "loss": 0.3933, "step": 96350 }, { "epoch": 1.961526717557252, "grad_norm": 7.275937922958298, "learning_rate": 7.615409422840571e-06, "loss": 0.113, "step": 96360 }, { "epoch": 1.9617302798982188, "grad_norm": 7.826519853193702, "learning_rate": 7.614803791618761e-06, "loss": 0.2384, "step": 96370 }, { "epoch": 1.9619338422391857, "grad_norm": 9.349304282700592, "learning_rate": 7.6141981075876205e-06, "loss": 0.2222, "step": 96380 }, { "epoch": 1.9621374045801527, "grad_norm": 4.641362690038927, "learning_rate": 7.6135923707593814e-06, "loss": 0.1971, "step": 96390 }, { "epoch": 1.9623409669211196, "grad_norm": 10.779743516198309, "learning_rate": 7.612986581146278e-06, "loss": 0.2724, "step": 96400 }, { "epoch": 1.9625445292620864, "grad_norm": 0.06953235566656638, "learning_rate": 7.612380738760546e-06, "loss": 0.2056, "step": 96410 }, { "epoch": 1.9627480916030535, "grad_norm": 0.19833647808737784, "learning_rate": 7.611774843614421e-06, "loss": 0.226, "step": 96420 }, { "epoch": 1.9629516539440204, "grad_norm": 4.467532923748917, "learning_rate": 7.611168895720136e-06, "loss": 0.3364, "step": 96430 }, { "epoch": 1.9631552162849872, "grad_norm": 1.7704945828033356, "learning_rate": 7.610562895089935e-06, "loss": 0.1493, "step": 96440 }, { "epoch": 1.9633587786259543, "grad_norm": 5.812347184189052, "learning_rate": 7.609956841736053e-06, "loss": 0.1594, "step": 96450 }, { "epoch": 1.9635623409669212, "grad_norm": 7.520359740730039, "learning_rate": 7.609350735670731e-06, "loss": 0.2916, "step": 96460 }, { "epoch": 1.963765903307888, "grad_norm": 7.7088169937048, "learning_rate": 7.6087445769062105e-06, "loss": 0.1777, "step": 96470 }, { "epoch": 1.963969465648855, "grad_norm": 12.929783151599125, "learning_rate": 7.6081383654547315e-06, "loss": 0.1796, "step": 96480 }, { "epoch": 1.9641730279898217, "grad_norm": 6.099079481035749, "learning_rate": 7.607532101328541e-06, "loss": 0.1491, "step": 96490 }, { "epoch": 1.9643765903307888, "grad_norm": 7.353952033928951, "learning_rate": 7.6069257845398805e-06, "loss": 0.1638, "step": 96500 }, { "epoch": 1.9645801526717557, "grad_norm": 10.914264948701389, "learning_rate": 7.606319415100995e-06, "loss": 0.2951, "step": 96510 }, { "epoch": 1.9647837150127225, "grad_norm": 7.248240658676192, "learning_rate": 7.6057129930241336e-06, "loss": 0.1955, "step": 96520 }, { "epoch": 1.9649872773536896, "grad_norm": 17.704862677345986, "learning_rate": 7.6051065183215426e-06, "loss": 0.1854, "step": 96530 }, { "epoch": 1.9651908396946565, "grad_norm": 12.201827484113792, "learning_rate": 7.604499991005469e-06, "loss": 0.2626, "step": 96540 }, { "epoch": 1.9653944020356233, "grad_norm": 15.59132543095464, "learning_rate": 7.6038934110881635e-06, "loss": 0.2525, "step": 96550 }, { "epoch": 1.9655979643765904, "grad_norm": 9.437527121104553, "learning_rate": 7.603286778581877e-06, "loss": 0.2307, "step": 96560 }, { "epoch": 1.9658015267175573, "grad_norm": 1.172263625341837, "learning_rate": 7.60268009349886e-06, "loss": 0.1913, "step": 96570 }, { "epoch": 1.9660050890585241, "grad_norm": 3.9164123401638733, "learning_rate": 7.602073355851367e-06, "loss": 0.2493, "step": 96580 }, { "epoch": 1.9662086513994912, "grad_norm": 0.5903697141856342, "learning_rate": 7.601466565651652e-06, "loss": 0.2244, "step": 96590 }, { "epoch": 1.966412213740458, "grad_norm": 6.387445805429602, "learning_rate": 7.600859722911969e-06, "loss": 0.2628, "step": 96600 }, { "epoch": 1.966615776081425, "grad_norm": 14.976930999817595, "learning_rate": 7.600252827644572e-06, "loss": 0.2375, "step": 96610 }, { "epoch": 1.966819338422392, "grad_norm": 8.129508311753876, "learning_rate": 7.599645879861722e-06, "loss": 0.22, "step": 96620 }, { "epoch": 1.9670229007633586, "grad_norm": 8.008873751764963, "learning_rate": 7.599038879575673e-06, "loss": 0.1069, "step": 96630 }, { "epoch": 1.9672264631043257, "grad_norm": 5.059613708456527, "learning_rate": 7.5984318267986885e-06, "loss": 0.1673, "step": 96640 }, { "epoch": 1.9674300254452928, "grad_norm": 14.94804894868729, "learning_rate": 7.597824721543027e-06, "loss": 0.3129, "step": 96650 }, { "epoch": 1.9676335877862594, "grad_norm": 8.793323979625468, "learning_rate": 7.597217563820948e-06, "loss": 0.1775, "step": 96660 }, { "epoch": 1.9678371501272265, "grad_norm": 5.931030264689723, "learning_rate": 7.596610353644716e-06, "loss": 0.1609, "step": 96670 }, { "epoch": 1.9680407124681933, "grad_norm": 3.339505062408085, "learning_rate": 7.596003091026591e-06, "loss": 0.0827, "step": 96680 }, { "epoch": 1.9682442748091602, "grad_norm": 15.73267167604243, "learning_rate": 7.595395775978842e-06, "loss": 0.1911, "step": 96690 }, { "epoch": 1.9684478371501273, "grad_norm": 14.90754881848071, "learning_rate": 7.594788408513734e-06, "loss": 0.2324, "step": 96700 }, { "epoch": 1.9686513994910941, "grad_norm": 10.867861046914975, "learning_rate": 7.594180988643529e-06, "loss": 0.2131, "step": 96710 }, { "epoch": 1.968854961832061, "grad_norm": 3.4769737898146036, "learning_rate": 7.593573516380498e-06, "loss": 0.1942, "step": 96720 }, { "epoch": 1.969058524173028, "grad_norm": 15.40883861681868, "learning_rate": 7.592965991736911e-06, "loss": 0.2047, "step": 96730 }, { "epoch": 1.969262086513995, "grad_norm": 6.591893228768254, "learning_rate": 7.592358414725035e-06, "loss": 0.2247, "step": 96740 }, { "epoch": 1.9694656488549618, "grad_norm": 8.125725248645038, "learning_rate": 7.591750785357142e-06, "loss": 0.2467, "step": 96750 }, { "epoch": 1.9696692111959289, "grad_norm": 9.340295045064309, "learning_rate": 7.591143103645504e-06, "loss": 0.2054, "step": 96760 }, { "epoch": 1.9698727735368957, "grad_norm": 6.661905838192136, "learning_rate": 7.590535369602393e-06, "loss": 0.1287, "step": 96770 }, { "epoch": 1.9700763358778626, "grad_norm": 15.207942950007462, "learning_rate": 7.5899275832400845e-06, "loss": 0.2411, "step": 96780 }, { "epoch": 1.9702798982188297, "grad_norm": 15.916317999565399, "learning_rate": 7.589319744570854e-06, "loss": 0.1771, "step": 96790 }, { "epoch": 1.9704834605597963, "grad_norm": 15.974620402600737, "learning_rate": 7.5887118536069715e-06, "loss": 0.2196, "step": 96800 }, { "epoch": 1.9706870229007634, "grad_norm": 10.165462367318398, "learning_rate": 7.588103910360723e-06, "loss": 0.1949, "step": 96810 }, { "epoch": 1.9708905852417304, "grad_norm": 11.057582214267233, "learning_rate": 7.587495914844382e-06, "loss": 0.177, "step": 96820 }, { "epoch": 1.971094147582697, "grad_norm": 13.911281190595849, "learning_rate": 7.586887867070227e-06, "loss": 0.2162, "step": 96830 }, { "epoch": 1.9712977099236642, "grad_norm": 7.779664187499926, "learning_rate": 7.586279767050542e-06, "loss": 0.1573, "step": 96840 }, { "epoch": 1.971501272264631, "grad_norm": 11.04894219870469, "learning_rate": 7.5856716147976024e-06, "loss": 0.2124, "step": 96850 }, { "epoch": 1.9717048346055979, "grad_norm": 3.6220710674360377, "learning_rate": 7.585063410323697e-06, "loss": 0.1352, "step": 96860 }, { "epoch": 1.971908396946565, "grad_norm": 2.3622542462236384, "learning_rate": 7.584455153641106e-06, "loss": 0.1477, "step": 96870 }, { "epoch": 1.9721119592875318, "grad_norm": 16.591943236784402, "learning_rate": 7.583846844762114e-06, "loss": 0.314, "step": 96880 }, { "epoch": 1.9723155216284987, "grad_norm": 4.896811019167751, "learning_rate": 7.583238483699007e-06, "loss": 0.1592, "step": 96890 }, { "epoch": 1.9725190839694657, "grad_norm": 0.8518700481291197, "learning_rate": 7.5826300704640725e-06, "loss": 0.2069, "step": 96900 }, { "epoch": 1.9727226463104326, "grad_norm": 11.927539827505589, "learning_rate": 7.582021605069596e-06, "loss": 0.2346, "step": 96910 }, { "epoch": 1.9729262086513994, "grad_norm": 7.470845533365887, "learning_rate": 7.581413087527868e-06, "loss": 0.28, "step": 96920 }, { "epoch": 1.9731297709923665, "grad_norm": 12.205440606662732, "learning_rate": 7.580804517851179e-06, "loss": 0.2076, "step": 96930 }, { "epoch": 1.9733333333333334, "grad_norm": 2.124133038563819, "learning_rate": 7.5801958960518175e-06, "loss": 0.1363, "step": 96940 }, { "epoch": 1.9735368956743002, "grad_norm": 12.69914211690753, "learning_rate": 7.579587222142078e-06, "loss": 0.2022, "step": 96950 }, { "epoch": 1.9737404580152673, "grad_norm": 7.097856617271852, "learning_rate": 7.578978496134249e-06, "loss": 0.21, "step": 96960 }, { "epoch": 1.973944020356234, "grad_norm": 6.877501312857816, "learning_rate": 7.57836971804063e-06, "loss": 0.22, "step": 96970 }, { "epoch": 1.974147582697201, "grad_norm": 3.3832555052960527, "learning_rate": 7.5777608878735135e-06, "loss": 0.2076, "step": 96980 }, { "epoch": 1.9743511450381679, "grad_norm": 6.827176660081543, "learning_rate": 7.577152005645196e-06, "loss": 0.253, "step": 96990 }, { "epoch": 1.9745547073791347, "grad_norm": 4.440274643732583, "learning_rate": 7.5765430713679726e-06, "loss": 0.2042, "step": 97000 }, { "epoch": 1.9747582697201018, "grad_norm": 3.1043985718325033, "learning_rate": 7.575934085054144e-06, "loss": 0.2023, "step": 97010 }, { "epoch": 1.9749618320610687, "grad_norm": 12.873574131274975, "learning_rate": 7.575325046716011e-06, "loss": 0.2101, "step": 97020 }, { "epoch": 1.9751653944020355, "grad_norm": 3.8394049266918633, "learning_rate": 7.57471595636587e-06, "loss": 0.2142, "step": 97030 }, { "epoch": 1.9753689567430026, "grad_norm": 11.983545211414022, "learning_rate": 7.574106814016024e-06, "loss": 0.1947, "step": 97040 }, { "epoch": 1.9755725190839695, "grad_norm": 7.1863019639601955, "learning_rate": 7.5734976196787744e-06, "loss": 0.1813, "step": 97050 }, { "epoch": 1.9757760814249363, "grad_norm": 9.022099924518121, "learning_rate": 7.572888373366427e-06, "loss": 0.2576, "step": 97060 }, { "epoch": 1.9759796437659034, "grad_norm": 8.640997959350164, "learning_rate": 7.572279075091285e-06, "loss": 0.2166, "step": 97070 }, { "epoch": 1.9761832061068703, "grad_norm": 12.17307929571441, "learning_rate": 7.571669724865655e-06, "loss": 0.1806, "step": 97080 }, { "epoch": 1.9763867684478371, "grad_norm": 13.942467905410991, "learning_rate": 7.57106032270184e-06, "loss": 0.2948, "step": 97090 }, { "epoch": 1.9765903307888042, "grad_norm": 12.957043754227112, "learning_rate": 7.570450868612152e-06, "loss": 0.1734, "step": 97100 }, { "epoch": 1.9767938931297708, "grad_norm": 1.876269770115878, "learning_rate": 7.569841362608899e-06, "loss": 0.2902, "step": 97110 }, { "epoch": 1.976997455470738, "grad_norm": 13.987847646411206, "learning_rate": 7.5692318047043876e-06, "loss": 0.3324, "step": 97120 }, { "epoch": 1.977201017811705, "grad_norm": 11.531527391167772, "learning_rate": 7.568622194910932e-06, "loss": 0.2903, "step": 97130 }, { "epoch": 1.9774045801526716, "grad_norm": 4.972688298666503, "learning_rate": 7.568012533240843e-06, "loss": 0.1934, "step": 97140 }, { "epoch": 1.9776081424936387, "grad_norm": 2.913277764299496, "learning_rate": 7.5674028197064305e-06, "loss": 0.197, "step": 97150 }, { "epoch": 1.9778117048346056, "grad_norm": 11.95929784675584, "learning_rate": 7.566793054320014e-06, "loss": 0.2502, "step": 97160 }, { "epoch": 1.9780152671755724, "grad_norm": 3.446343840282192, "learning_rate": 7.566183237093906e-06, "loss": 0.1489, "step": 97170 }, { "epoch": 1.9782188295165395, "grad_norm": 1.8902794800490252, "learning_rate": 7.565573368040422e-06, "loss": 0.187, "step": 97180 }, { "epoch": 1.9784223918575063, "grad_norm": 9.67407298761567, "learning_rate": 7.564963447171879e-06, "loss": 0.1579, "step": 97190 }, { "epoch": 1.9786259541984732, "grad_norm": 7.798160656207762, "learning_rate": 7.564353474500597e-06, "loss": 0.2373, "step": 97200 }, { "epoch": 1.9788295165394403, "grad_norm": 10.569244702348062, "learning_rate": 7.56374345003889e-06, "loss": 0.2416, "step": 97210 }, { "epoch": 1.9790330788804071, "grad_norm": 7.656963063764037, "learning_rate": 7.563133373799085e-06, "loss": 0.1939, "step": 97220 }, { "epoch": 1.979236641221374, "grad_norm": 2.7036636524672826, "learning_rate": 7.562523245793499e-06, "loss": 0.3404, "step": 97230 }, { "epoch": 1.979440203562341, "grad_norm": 4.411137639364929, "learning_rate": 7.561913066034457e-06, "loss": 0.227, "step": 97240 }, { "epoch": 1.979643765903308, "grad_norm": 4.6921473037551324, "learning_rate": 7.5613028345342786e-06, "loss": 0.1621, "step": 97250 }, { "epoch": 1.9798473282442748, "grad_norm": 6.398710549585956, "learning_rate": 7.560692551305293e-06, "loss": 0.1802, "step": 97260 }, { "epoch": 1.9800508905852419, "grad_norm": 7.076464769488197, "learning_rate": 7.560082216359821e-06, "loss": 0.185, "step": 97270 }, { "epoch": 1.9802544529262085, "grad_norm": 5.085698464651816, "learning_rate": 7.5594718297101935e-06, "loss": 0.2304, "step": 97280 }, { "epoch": 1.9804580152671756, "grad_norm": 4.000840903867361, "learning_rate": 7.558861391368734e-06, "loss": 0.2001, "step": 97290 }, { "epoch": 1.9806615776081427, "grad_norm": 4.490366431949184, "learning_rate": 7.558250901347772e-06, "loss": 0.1186, "step": 97300 }, { "epoch": 1.9808651399491093, "grad_norm": 5.83673388445834, "learning_rate": 7.557640359659641e-06, "loss": 0.167, "step": 97310 }, { "epoch": 1.9810687022900764, "grad_norm": 0.9599703659584314, "learning_rate": 7.557029766316668e-06, "loss": 0.2077, "step": 97320 }, { "epoch": 1.9812722646310432, "grad_norm": 14.55103074769303, "learning_rate": 7.556419121331184e-06, "loss": 0.2442, "step": 97330 }, { "epoch": 1.98147582697201, "grad_norm": 3.0286371628872324, "learning_rate": 7.555808424715524e-06, "loss": 0.2544, "step": 97340 }, { "epoch": 1.9816793893129772, "grad_norm": 17.70800491735652, "learning_rate": 7.555197676482022e-06, "loss": 0.1837, "step": 97350 }, { "epoch": 1.981882951653944, "grad_norm": 3.1230881901125587, "learning_rate": 7.55458687664301e-06, "loss": 0.2372, "step": 97360 }, { "epoch": 1.9820865139949109, "grad_norm": 6.529390535299333, "learning_rate": 7.553976025210826e-06, "loss": 0.237, "step": 97370 }, { "epoch": 1.982290076335878, "grad_norm": 3.2275995216964075, "learning_rate": 7.553365122197807e-06, "loss": 0.157, "step": 97380 }, { "epoch": 1.9824936386768448, "grad_norm": 4.048488778507033, "learning_rate": 7.552754167616292e-06, "loss": 0.1585, "step": 97390 }, { "epoch": 1.9826972010178117, "grad_norm": 20.736183106808223, "learning_rate": 7.552143161478618e-06, "loss": 0.1522, "step": 97400 }, { "epoch": 1.9829007633587787, "grad_norm": 6.636593634094418, "learning_rate": 7.551532103797125e-06, "loss": 0.2614, "step": 97410 }, { "epoch": 1.9831043256997456, "grad_norm": 3.5102721102317336, "learning_rate": 7.550920994584154e-06, "loss": 0.1702, "step": 97420 }, { "epoch": 1.9833078880407125, "grad_norm": 11.399727880850746, "learning_rate": 7.55030983385205e-06, "loss": 0.266, "step": 97430 }, { "epoch": 1.9835114503816795, "grad_norm": 3.5875824456142964, "learning_rate": 7.549698621613153e-06, "loss": 0.1478, "step": 97440 }, { "epoch": 1.9837150127226462, "grad_norm": 6.331036895645905, "learning_rate": 7.54908735787981e-06, "loss": 0.1868, "step": 97450 }, { "epoch": 1.9839185750636132, "grad_norm": 8.228435037617103, "learning_rate": 7.548476042664361e-06, "loss": 0.2374, "step": 97460 }, { "epoch": 1.98412213740458, "grad_norm": 0.5544067444730835, "learning_rate": 7.5478646759791574e-06, "loss": 0.1607, "step": 97470 }, { "epoch": 1.984325699745547, "grad_norm": 14.76388379506033, "learning_rate": 7.5472532578365466e-06, "loss": 0.2948, "step": 97480 }, { "epoch": 1.984529262086514, "grad_norm": 6.001888258269444, "learning_rate": 7.546641788248874e-06, "loss": 0.2288, "step": 97490 }, { "epoch": 1.984732824427481, "grad_norm": 4.52856496040314, "learning_rate": 7.5460302672284906e-06, "loss": 0.1392, "step": 97500 }, { "epoch": 1.9849363867684477, "grad_norm": 5.696004405769422, "learning_rate": 7.5454186947877475e-06, "loss": 0.2027, "step": 97510 }, { "epoch": 1.9851399491094148, "grad_norm": 6.443547093648575, "learning_rate": 7.544807070938995e-06, "loss": 0.2079, "step": 97520 }, { "epoch": 1.9853435114503817, "grad_norm": 31.241148814259518, "learning_rate": 7.5441953956945865e-06, "loss": 0.201, "step": 97530 }, { "epoch": 1.9855470737913485, "grad_norm": 4.2928643556455155, "learning_rate": 7.543583669066874e-06, "loss": 0.2258, "step": 97540 }, { "epoch": 1.9857506361323156, "grad_norm": 11.287282156349116, "learning_rate": 7.5429718910682135e-06, "loss": 0.1746, "step": 97550 }, { "epoch": 1.9859541984732825, "grad_norm": 9.338867368308645, "learning_rate": 7.54236006171096e-06, "loss": 0.1831, "step": 97560 }, { "epoch": 1.9861577608142493, "grad_norm": 4.177298042933945, "learning_rate": 7.541748181007472e-06, "loss": 0.187, "step": 97570 }, { "epoch": 1.9863613231552164, "grad_norm": 10.568776713741467, "learning_rate": 7.541136248970104e-06, "loss": 0.1907, "step": 97580 }, { "epoch": 1.986564885496183, "grad_norm": 18.851348933897654, "learning_rate": 7.540524265611218e-06, "loss": 0.3085, "step": 97590 }, { "epoch": 1.9867684478371501, "grad_norm": 3.071386261462468, "learning_rate": 7.539912230943171e-06, "loss": 0.283, "step": 97600 }, { "epoch": 1.9869720101781172, "grad_norm": 17.787122871894546, "learning_rate": 7.539300144978327e-06, "loss": 0.1878, "step": 97610 }, { "epoch": 1.9871755725190838, "grad_norm": 19.323243115815146, "learning_rate": 7.5386880077290435e-06, "loss": 0.3141, "step": 97620 }, { "epoch": 1.987379134860051, "grad_norm": 6.217621891959215, "learning_rate": 7.538075819207688e-06, "loss": 0.1197, "step": 97630 }, { "epoch": 1.9875826972010178, "grad_norm": 28.52800518827689, "learning_rate": 7.53746357942662e-06, "loss": 0.3358, "step": 97640 }, { "epoch": 1.9877862595419846, "grad_norm": 9.106215420117758, "learning_rate": 7.536851288398208e-06, "loss": 0.2765, "step": 97650 }, { "epoch": 1.9879898218829517, "grad_norm": 4.963058256985714, "learning_rate": 7.5362389461348176e-06, "loss": 0.1767, "step": 97660 }, { "epoch": 1.9881933842239186, "grad_norm": 10.55502990904727, "learning_rate": 7.535626552648814e-06, "loss": 0.1785, "step": 97670 }, { "epoch": 1.9883969465648854, "grad_norm": 12.380997637797092, "learning_rate": 7.535014107952568e-06, "loss": 0.2716, "step": 97680 }, { "epoch": 1.9886005089058525, "grad_norm": 4.33029827962793, "learning_rate": 7.534401612058445e-06, "loss": 0.1305, "step": 97690 }, { "epoch": 1.9888040712468193, "grad_norm": 9.812883092317621, "learning_rate": 7.533789064978817e-06, "loss": 0.2761, "step": 97700 }, { "epoch": 1.9890076335877862, "grad_norm": 3.4945694051169265, "learning_rate": 7.533176466726057e-06, "loss": 0.1756, "step": 97710 }, { "epoch": 1.9892111959287533, "grad_norm": 8.224995401110434, "learning_rate": 7.532563817312535e-06, "loss": 0.2215, "step": 97720 }, { "epoch": 1.9894147582697201, "grad_norm": 17.408828160966703, "learning_rate": 7.531951116750625e-06, "loss": 0.3074, "step": 97730 }, { "epoch": 1.989618320610687, "grad_norm": 7.721786679064293, "learning_rate": 7.531338365052699e-06, "loss": 0.2609, "step": 97740 }, { "epoch": 1.989821882951654, "grad_norm": 3.4838883262193523, "learning_rate": 7.530725562231136e-06, "loss": 0.1985, "step": 97750 }, { "epoch": 1.9900254452926207, "grad_norm": 8.015133112357415, "learning_rate": 7.530112708298311e-06, "loss": 0.2279, "step": 97760 }, { "epoch": 1.9902290076335878, "grad_norm": 18.377883264426668, "learning_rate": 7.529499803266601e-06, "loss": 0.1705, "step": 97770 }, { "epoch": 1.9904325699745549, "grad_norm": 5.612991546192913, "learning_rate": 7.528886847148384e-06, "loss": 0.2173, "step": 97780 }, { "epoch": 1.9906361323155215, "grad_norm": 10.339996142096771, "learning_rate": 7.528273839956039e-06, "loss": 0.2616, "step": 97790 }, { "epoch": 1.9908396946564886, "grad_norm": 8.575789721874845, "learning_rate": 7.527660781701951e-06, "loss": 0.2049, "step": 97800 }, { "epoch": 1.9910432569974554, "grad_norm": 7.1808728038989615, "learning_rate": 7.527047672398495e-06, "loss": 0.1815, "step": 97810 }, { "epoch": 1.9912468193384223, "grad_norm": 10.12395116297483, "learning_rate": 7.526434512058056e-06, "loss": 0.1987, "step": 97820 }, { "epoch": 1.9914503816793894, "grad_norm": 11.368894009884775, "learning_rate": 7.525821300693019e-06, "loss": 0.2044, "step": 97830 }, { "epoch": 1.9916539440203562, "grad_norm": 7.772963017911432, "learning_rate": 7.525208038315768e-06, "loss": 0.2673, "step": 97840 }, { "epoch": 1.991857506361323, "grad_norm": 4.628865292730608, "learning_rate": 7.524594724938688e-06, "loss": 0.2573, "step": 97850 }, { "epoch": 1.9920610687022902, "grad_norm": 4.022315401019828, "learning_rate": 7.523981360574166e-06, "loss": 0.1418, "step": 97860 }, { "epoch": 1.992264631043257, "grad_norm": 5.6384495437684095, "learning_rate": 7.523367945234587e-06, "loss": 0.165, "step": 97870 }, { "epoch": 1.9924681933842239, "grad_norm": 8.590690836344631, "learning_rate": 7.522754478932346e-06, "loss": 0.2107, "step": 97880 }, { "epoch": 1.992671755725191, "grad_norm": 9.203728405965967, "learning_rate": 7.522140961679828e-06, "loss": 0.1881, "step": 97890 }, { "epoch": 1.9928753180661578, "grad_norm": 13.86920881660764, "learning_rate": 7.5215273934894254e-06, "loss": 0.3411, "step": 97900 }, { "epoch": 1.9930788804071247, "grad_norm": 15.124527835956515, "learning_rate": 7.520913774373528e-06, "loss": 0.2622, "step": 97910 }, { "epoch": 1.9932824427480917, "grad_norm": 3.714303845194737, "learning_rate": 7.520300104344529e-06, "loss": 0.1427, "step": 97920 }, { "epoch": 1.9934860050890584, "grad_norm": 16.408606611154035, "learning_rate": 7.519686383414825e-06, "loss": 0.1448, "step": 97930 }, { "epoch": 1.9936895674300255, "grad_norm": 10.207690963328549, "learning_rate": 7.51907261159681e-06, "loss": 0.146, "step": 97940 }, { "epoch": 1.9938931297709923, "grad_norm": 4.769954211839628, "learning_rate": 7.518458788902876e-06, "loss": 0.1711, "step": 97950 }, { "epoch": 1.9940966921119592, "grad_norm": 6.42488759664793, "learning_rate": 7.517844915345425e-06, "loss": 0.2669, "step": 97960 }, { "epoch": 1.9943002544529262, "grad_norm": 9.643489530874552, "learning_rate": 7.517230990936854e-06, "loss": 0.2062, "step": 97970 }, { "epoch": 1.994503816793893, "grad_norm": 12.539914044837014, "learning_rate": 7.516617015689559e-06, "loss": 0.1511, "step": 97980 }, { "epoch": 1.99470737913486, "grad_norm": 23.04589953408498, "learning_rate": 7.516002989615942e-06, "loss": 0.1684, "step": 97990 }, { "epoch": 1.994910941475827, "grad_norm": 4.201055406664642, "learning_rate": 7.515388912728405e-06, "loss": 0.2347, "step": 98000 }, { "epoch": 1.995114503816794, "grad_norm": 0.41885936822465347, "learning_rate": 7.51477478503935e-06, "loss": 0.2252, "step": 98010 }, { "epoch": 1.9953180661577608, "grad_norm": 7.507504774195708, "learning_rate": 7.5141606065611776e-06, "loss": 0.2233, "step": 98020 }, { "epoch": 1.9955216284987278, "grad_norm": 3.934819081956319, "learning_rate": 7.513546377306293e-06, "loss": 0.232, "step": 98030 }, { "epoch": 1.9957251908396947, "grad_norm": 2.895884862307452, "learning_rate": 7.512932097287103e-06, "loss": 0.1647, "step": 98040 }, { "epoch": 1.9959287531806615, "grad_norm": 5.392420669339754, "learning_rate": 7.512317766516013e-06, "loss": 0.1741, "step": 98050 }, { "epoch": 1.9961323155216286, "grad_norm": 12.14742174316136, "learning_rate": 7.51170338500543e-06, "loss": 0.2994, "step": 98060 }, { "epoch": 1.9963358778625953, "grad_norm": 6.077469993698251, "learning_rate": 7.51108895276776e-06, "loss": 0.1366, "step": 98070 }, { "epoch": 1.9965394402035623, "grad_norm": 33.753361226407655, "learning_rate": 7.510474469815417e-06, "loss": 0.1975, "step": 98080 }, { "epoch": 1.9967430025445294, "grad_norm": 13.143617106461837, "learning_rate": 7.509859936160809e-06, "loss": 0.1865, "step": 98090 }, { "epoch": 1.996946564885496, "grad_norm": 3.393722408473216, "learning_rate": 7.509245351816343e-06, "loss": 0.2487, "step": 98100 }, { "epoch": 1.9971501272264631, "grad_norm": 7.613701718322125, "learning_rate": 7.508630716794439e-06, "loss": 0.1679, "step": 98110 }, { "epoch": 1.99735368956743, "grad_norm": 16.237726361311704, "learning_rate": 7.508016031107505e-06, "loss": 0.1749, "step": 98120 }, { "epoch": 1.9975572519083968, "grad_norm": 0.2747967408280118, "learning_rate": 7.5074012947679575e-06, "loss": 0.1014, "step": 98130 }, { "epoch": 1.997760814249364, "grad_norm": 9.40432329391104, "learning_rate": 7.506786507788211e-06, "loss": 0.1942, "step": 98140 }, { "epoch": 1.9979643765903308, "grad_norm": 26.18187397696248, "learning_rate": 7.506171670180682e-06, "loss": 0.2965, "step": 98150 }, { "epoch": 1.9981679389312976, "grad_norm": 13.558740925094405, "learning_rate": 7.505556781957788e-06, "loss": 0.3327, "step": 98160 }, { "epoch": 1.9983715012722647, "grad_norm": 0.38332423770461393, "learning_rate": 7.5049418431319484e-06, "loss": 0.1806, "step": 98170 }, { "epoch": 1.9985750636132316, "grad_norm": 16.772644344367272, "learning_rate": 7.504326853715582e-06, "loss": 0.1847, "step": 98180 }, { "epoch": 1.9987786259541984, "grad_norm": 4.134836221302911, "learning_rate": 7.503711813721108e-06, "loss": 0.2262, "step": 98190 }, { "epoch": 1.9989821882951655, "grad_norm": 8.514318680102825, "learning_rate": 7.503096723160951e-06, "loss": 0.2254, "step": 98200 }, { "epoch": 1.9991857506361324, "grad_norm": 5.666462983498361, "learning_rate": 7.5024815820475305e-06, "loss": 0.1969, "step": 98210 }, { "epoch": 1.9993893129770992, "grad_norm": 3.8606042235615834, "learning_rate": 7.501866390393271e-06, "loss": 0.2604, "step": 98220 }, { "epoch": 1.9995928753180663, "grad_norm": 2.8611442780050833, "learning_rate": 7.5012511482106e-06, "loss": 0.2858, "step": 98230 }, { "epoch": 1.999796437659033, "grad_norm": 3.6837192799802927, "learning_rate": 7.5006358555119365e-06, "loss": 0.1746, "step": 98240 }, { "epoch": 2.0, "grad_norm": 3.929497461263009, "learning_rate": 7.500020512309713e-06, "loss": 0.0963, "step": 98250 }, { "epoch": 2.000203562340967, "grad_norm": 0.08740357431457499, "learning_rate": 7.499405118616356e-06, "loss": 0.0817, "step": 98260 }, { "epoch": 2.0004071246819337, "grad_norm": 6.709061109589467, "learning_rate": 7.498789674444292e-06, "loss": 0.0749, "step": 98270 }, { "epoch": 2.000610687022901, "grad_norm": 11.669707414499394, "learning_rate": 7.498174179805953e-06, "loss": 0.1211, "step": 98280 }, { "epoch": 2.000814249363868, "grad_norm": 0.32322521169173835, "learning_rate": 7.497558634713769e-06, "loss": 0.1248, "step": 98290 }, { "epoch": 2.0010178117048345, "grad_norm": 25.85113454455637, "learning_rate": 7.496943039180171e-06, "loss": 0.1737, "step": 98300 }, { "epoch": 2.0012213740458016, "grad_norm": 0.11207304370660519, "learning_rate": 7.496327393217593e-06, "loss": 0.0933, "step": 98310 }, { "epoch": 2.0014249363867687, "grad_norm": 0.5810243588643825, "learning_rate": 7.495711696838467e-06, "loss": 0.1056, "step": 98320 }, { "epoch": 2.0016284987277353, "grad_norm": 5.566139441945575, "learning_rate": 7.495095950055229e-06, "loss": 0.1084, "step": 98330 }, { "epoch": 2.0018320610687024, "grad_norm": 8.49667260562107, "learning_rate": 7.4944801528803146e-06, "loss": 0.0492, "step": 98340 }, { "epoch": 2.002035623409669, "grad_norm": 19.720615987509742, "learning_rate": 7.493864305326162e-06, "loss": 0.1831, "step": 98350 }, { "epoch": 2.002239185750636, "grad_norm": 7.572056321277155, "learning_rate": 7.493248407405206e-06, "loss": 0.2093, "step": 98360 }, { "epoch": 2.002442748091603, "grad_norm": 25.96459252135221, "learning_rate": 7.4926324591298884e-06, "loss": 0.1305, "step": 98370 }, { "epoch": 2.00264631043257, "grad_norm": 1.60106826462572, "learning_rate": 7.492016460512647e-06, "loss": 0.1213, "step": 98380 }, { "epoch": 2.002849872773537, "grad_norm": 22.552246741370528, "learning_rate": 7.491400411565924e-06, "loss": 0.2096, "step": 98390 }, { "epoch": 2.003053435114504, "grad_norm": 11.661572240800183, "learning_rate": 7.49078431230216e-06, "loss": 0.1655, "step": 98400 }, { "epoch": 2.0032569974554706, "grad_norm": 0.46976773493011176, "learning_rate": 7.490168162733798e-06, "loss": 0.1043, "step": 98410 }, { "epoch": 2.0034605597964377, "grad_norm": 19.82911871477898, "learning_rate": 7.489551962873285e-06, "loss": 0.1036, "step": 98420 }, { "epoch": 2.0036641221374047, "grad_norm": 1.1669997474870226, "learning_rate": 7.488935712733063e-06, "loss": 0.102, "step": 98430 }, { "epoch": 2.0038676844783714, "grad_norm": 39.45918059453991, "learning_rate": 7.488319412325576e-06, "loss": 0.1805, "step": 98440 }, { "epoch": 2.0040712468193385, "grad_norm": 11.42919685174184, "learning_rate": 7.487703061663276e-06, "loss": 0.1278, "step": 98450 }, { "epoch": 2.0042748091603055, "grad_norm": 20.730000120933965, "learning_rate": 7.487086660758609e-06, "loss": 0.1545, "step": 98460 }, { "epoch": 2.004478371501272, "grad_norm": 0.46665654199664713, "learning_rate": 7.486470209624023e-06, "loss": 0.1631, "step": 98470 }, { "epoch": 2.0046819338422392, "grad_norm": 6.572759885408076, "learning_rate": 7.485853708271967e-06, "loss": 0.2207, "step": 98480 }, { "epoch": 2.0048854961832063, "grad_norm": 26.91213298667994, "learning_rate": 7.485237156714895e-06, "loss": 0.1673, "step": 98490 }, { "epoch": 2.005089058524173, "grad_norm": 0.2540809959773193, "learning_rate": 7.484620554965258e-06, "loss": 0.0556, "step": 98500 }, { "epoch": 2.00529262086514, "grad_norm": 0.29256820122703703, "learning_rate": 7.484003903035507e-06, "loss": 0.0471, "step": 98510 }, { "epoch": 2.0054961832061067, "grad_norm": 0.033513934156285105, "learning_rate": 7.4833872009381e-06, "loss": 0.1081, "step": 98520 }, { "epoch": 2.0056997455470738, "grad_norm": 0.7025200727078679, "learning_rate": 7.482770448685488e-06, "loss": 0.0972, "step": 98530 }, { "epoch": 2.005903307888041, "grad_norm": 28.373405345525875, "learning_rate": 7.48215364629013e-06, "loss": 0.1005, "step": 98540 }, { "epoch": 2.0061068702290075, "grad_norm": 2.7681859006835565, "learning_rate": 7.481536793764483e-06, "loss": 0.0521, "step": 98550 }, { "epoch": 2.0063104325699745, "grad_norm": 15.604464330464607, "learning_rate": 7.480919891121004e-06, "loss": 0.1252, "step": 98560 }, { "epoch": 2.0065139949109416, "grad_norm": 29.13283663958479, "learning_rate": 7.48030293837215e-06, "loss": 0.1102, "step": 98570 }, { "epoch": 2.0067175572519083, "grad_norm": 1.2574139063385619, "learning_rate": 7.479685935530385e-06, "loss": 0.0599, "step": 98580 }, { "epoch": 2.0069211195928753, "grad_norm": 19.854605503110097, "learning_rate": 7.479068882608169e-06, "loss": 0.0372, "step": 98590 }, { "epoch": 2.0071246819338424, "grad_norm": 0.010379399384082987, "learning_rate": 7.478451779617963e-06, "loss": 0.2429, "step": 98600 }, { "epoch": 2.007328244274809, "grad_norm": 4.3091684234457786, "learning_rate": 7.477834626572232e-06, "loss": 0.0684, "step": 98610 }, { "epoch": 2.007531806615776, "grad_norm": 1.6189319607514663, "learning_rate": 7.477217423483439e-06, "loss": 0.059, "step": 98620 }, { "epoch": 2.007735368956743, "grad_norm": 37.455360785145864, "learning_rate": 7.4766001703640515e-06, "loss": 0.0922, "step": 98630 }, { "epoch": 2.00793893129771, "grad_norm": 17.849558688518083, "learning_rate": 7.475982867226532e-06, "loss": 0.1653, "step": 98640 }, { "epoch": 2.008142493638677, "grad_norm": 0.02221158242741421, "learning_rate": 7.475365514083348e-06, "loss": 0.0962, "step": 98650 }, { "epoch": 2.0083460559796436, "grad_norm": 1.1116486695587584, "learning_rate": 7.474748110946971e-06, "loss": 0.2037, "step": 98660 }, { "epoch": 2.0085496183206106, "grad_norm": 27.025035700721567, "learning_rate": 7.474130657829868e-06, "loss": 0.1982, "step": 98670 }, { "epoch": 2.0087531806615777, "grad_norm": 0.9263994072108408, "learning_rate": 7.4735131547445095e-06, "loss": 0.2277, "step": 98680 }, { "epoch": 2.0089567430025443, "grad_norm": 3.7947660423477103, "learning_rate": 7.472895601703367e-06, "loss": 0.1053, "step": 98690 }, { "epoch": 2.0091603053435114, "grad_norm": 0.40138423441248006, "learning_rate": 7.472277998718913e-06, "loss": 0.1333, "step": 98700 }, { "epoch": 2.0093638676844785, "grad_norm": 17.477343266004908, "learning_rate": 7.47166034580362e-06, "loss": 0.2023, "step": 98710 }, { "epoch": 2.009567430025445, "grad_norm": 20.476110487309143, "learning_rate": 7.471042642969964e-06, "loss": 0.1778, "step": 98720 }, { "epoch": 2.009770992366412, "grad_norm": 10.882644824296682, "learning_rate": 7.4704248902304175e-06, "loss": 0.2386, "step": 98730 }, { "epoch": 2.0099745547073793, "grad_norm": 23.311121063519014, "learning_rate": 7.469807087597461e-06, "loss": 0.1245, "step": 98740 }, { "epoch": 2.010178117048346, "grad_norm": 4.070341978366519, "learning_rate": 7.469189235083567e-06, "loss": 0.0749, "step": 98750 }, { "epoch": 2.010381679389313, "grad_norm": 6.026484253651552, "learning_rate": 7.468571332701218e-06, "loss": 0.0996, "step": 98760 }, { "epoch": 2.01058524173028, "grad_norm": 8.102072576302513, "learning_rate": 7.467953380462889e-06, "loss": 0.0478, "step": 98770 }, { "epoch": 2.0107888040712467, "grad_norm": 8.38727286028814, "learning_rate": 7.467335378381064e-06, "loss": 0.2328, "step": 98780 }, { "epoch": 2.010992366412214, "grad_norm": 0.2142825979091848, "learning_rate": 7.466717326468223e-06, "loss": 0.1289, "step": 98790 }, { "epoch": 2.011195928753181, "grad_norm": 8.059196998009545, "learning_rate": 7.466099224736849e-06, "loss": 0.1365, "step": 98800 }, { "epoch": 2.0113994910941475, "grad_norm": 0.4224282799536163, "learning_rate": 7.465481073199426e-06, "loss": 0.0334, "step": 98810 }, { "epoch": 2.0116030534351146, "grad_norm": 9.118872974804425, "learning_rate": 7.464862871868433e-06, "loss": 0.2061, "step": 98820 }, { "epoch": 2.011806615776081, "grad_norm": 9.130298947189127, "learning_rate": 7.464244620756363e-06, "loss": 0.1796, "step": 98830 }, { "epoch": 2.0120101781170483, "grad_norm": 23.933123239988426, "learning_rate": 7.4636263198756985e-06, "loss": 0.1115, "step": 98840 }, { "epoch": 2.0122137404580154, "grad_norm": 19.217964605707426, "learning_rate": 7.463007969238928e-06, "loss": 0.0456, "step": 98850 }, { "epoch": 2.012417302798982, "grad_norm": 14.93433127075987, "learning_rate": 7.4623895688585375e-06, "loss": 0.1493, "step": 98860 }, { "epoch": 2.012620865139949, "grad_norm": 27.547069773365155, "learning_rate": 7.461771118747019e-06, "loss": 0.1118, "step": 98870 }, { "epoch": 2.012824427480916, "grad_norm": 0.1279164643077499, "learning_rate": 7.461152618916863e-06, "loss": 0.0938, "step": 98880 }, { "epoch": 2.013027989821883, "grad_norm": 24.031437943924875, "learning_rate": 7.460534069380558e-06, "loss": 0.2651, "step": 98890 }, { "epoch": 2.01323155216285, "grad_norm": 0.24369281737809922, "learning_rate": 7.459915470150599e-06, "loss": 0.0922, "step": 98900 }, { "epoch": 2.013435114503817, "grad_norm": 31.999753581503285, "learning_rate": 7.459296821239479e-06, "loss": 0.1404, "step": 98910 }, { "epoch": 2.0136386768447836, "grad_norm": 16.037118421537663, "learning_rate": 7.4586781226596924e-06, "loss": 0.1055, "step": 98920 }, { "epoch": 2.0138422391857507, "grad_norm": 9.348143380451923, "learning_rate": 7.458059374423734e-06, "loss": 0.1062, "step": 98930 }, { "epoch": 2.0140458015267177, "grad_norm": 0.16070997624332573, "learning_rate": 7.457440576544099e-06, "loss": 0.1382, "step": 98940 }, { "epoch": 2.0142493638676844, "grad_norm": 18.849295407086544, "learning_rate": 7.45682172903329e-06, "loss": 0.137, "step": 98950 }, { "epoch": 2.0144529262086515, "grad_norm": 9.04191022546497, "learning_rate": 7.456202831903799e-06, "loss": 0.1357, "step": 98960 }, { "epoch": 2.0146564885496185, "grad_norm": 5.686157059279048, "learning_rate": 7.455583885168129e-06, "loss": 0.131, "step": 98970 }, { "epoch": 2.014860050890585, "grad_norm": 33.64435343220401, "learning_rate": 7.454964888838781e-06, "loss": 0.0907, "step": 98980 }, { "epoch": 2.0150636132315523, "grad_norm": 2.4528984677503987, "learning_rate": 7.454345842928252e-06, "loss": 0.0904, "step": 98990 }, { "epoch": 2.015267175572519, "grad_norm": 0.3506087633188861, "learning_rate": 7.45372674744905e-06, "loss": 0.0853, "step": 99000 }, { "epoch": 2.015470737913486, "grad_norm": 19.933043480730007, "learning_rate": 7.453107602413676e-06, "loss": 0.1645, "step": 99010 }, { "epoch": 2.015674300254453, "grad_norm": 6.156161028182388, "learning_rate": 7.452488407834633e-06, "loss": 0.1576, "step": 99020 }, { "epoch": 2.0158778625954197, "grad_norm": 0.02479850375189705, "learning_rate": 7.451869163724428e-06, "loss": 0.0337, "step": 99030 }, { "epoch": 2.0160814249363868, "grad_norm": 24.488213234719517, "learning_rate": 7.45124987009557e-06, "loss": 0.1124, "step": 99040 }, { "epoch": 2.016284987277354, "grad_norm": 0.26447120195131096, "learning_rate": 7.45063052696056e-06, "loss": 0.0993, "step": 99050 }, { "epoch": 2.0164885496183205, "grad_norm": 0.2507029790509186, "learning_rate": 7.450011134331911e-06, "loss": 0.1046, "step": 99060 }, { "epoch": 2.0166921119592875, "grad_norm": 121.78014387235471, "learning_rate": 7.449391692222133e-06, "loss": 0.1415, "step": 99070 }, { "epoch": 2.0168956743002546, "grad_norm": 24.819899006444867, "learning_rate": 7.4487722006437346e-06, "loss": 0.1337, "step": 99080 }, { "epoch": 2.0170992366412213, "grad_norm": 0.5175244142694186, "learning_rate": 7.448152659609227e-06, "loss": 0.0525, "step": 99090 }, { "epoch": 2.0173027989821883, "grad_norm": 7.0178229169031106, "learning_rate": 7.447533069131123e-06, "loss": 0.1143, "step": 99100 }, { "epoch": 2.0175063613231554, "grad_norm": 0.3663313263932661, "learning_rate": 7.4469134292219355e-06, "loss": 0.1901, "step": 99110 }, { "epoch": 2.017709923664122, "grad_norm": 20.403316721563368, "learning_rate": 7.446293739894183e-06, "loss": 0.1263, "step": 99120 }, { "epoch": 2.017913486005089, "grad_norm": 2.7656012040117495, "learning_rate": 7.445674001160374e-06, "loss": 0.0172, "step": 99130 }, { "epoch": 2.0181170483460558, "grad_norm": 0.5345275061392885, "learning_rate": 7.445054213033029e-06, "loss": 0.1004, "step": 99140 }, { "epoch": 2.018320610687023, "grad_norm": 3.429193442364168, "learning_rate": 7.4444343755246676e-06, "loss": 0.1024, "step": 99150 }, { "epoch": 2.01852417302799, "grad_norm": 27.511208915863083, "learning_rate": 7.443814488647805e-06, "loss": 0.0873, "step": 99160 }, { "epoch": 2.0187277353689566, "grad_norm": 25.52108447229, "learning_rate": 7.443194552414959e-06, "loss": 0.2318, "step": 99170 }, { "epoch": 2.0189312977099236, "grad_norm": 23.378100258387363, "learning_rate": 7.442574566838654e-06, "loss": 0.1342, "step": 99180 }, { "epoch": 2.0191348600508907, "grad_norm": 0.006129505010078589, "learning_rate": 7.441954531931408e-06, "loss": 0.173, "step": 99190 }, { "epoch": 2.0193384223918573, "grad_norm": 1.7137202183476294, "learning_rate": 7.441334447705746e-06, "loss": 0.1036, "step": 99200 }, { "epoch": 2.0195419847328244, "grad_norm": 0.07448624147176347, "learning_rate": 7.440714314174191e-06, "loss": 0.0765, "step": 99210 }, { "epoch": 2.0197455470737915, "grad_norm": 18.67116669485797, "learning_rate": 7.4400941313492665e-06, "loss": 0.1491, "step": 99220 }, { "epoch": 2.019949109414758, "grad_norm": 0.465009571484996, "learning_rate": 7.4394738992434965e-06, "loss": 0.0882, "step": 99230 }, { "epoch": 2.020152671755725, "grad_norm": 15.891863314324219, "learning_rate": 7.438853617869411e-06, "loss": 0.1392, "step": 99240 }, { "epoch": 2.0203562340966923, "grad_norm": 0.5414820897123827, "learning_rate": 7.438233287239535e-06, "loss": 0.0912, "step": 99250 }, { "epoch": 2.020559796437659, "grad_norm": 4.614115128840084, "learning_rate": 7.437612907366399e-06, "loss": 0.0796, "step": 99260 }, { "epoch": 2.020763358778626, "grad_norm": 14.119919565174301, "learning_rate": 7.436992478262528e-06, "loss": 0.1094, "step": 99270 }, { "epoch": 2.020966921119593, "grad_norm": 0.13473831497401817, "learning_rate": 7.436371999940456e-06, "loss": 0.1104, "step": 99280 }, { "epoch": 2.0211704834605597, "grad_norm": 17.993971626054297, "learning_rate": 7.435751472412714e-06, "loss": 0.1488, "step": 99290 }, { "epoch": 2.021374045801527, "grad_norm": 3.9487570229418902, "learning_rate": 7.435130895691833e-06, "loss": 0.0665, "step": 99300 }, { "epoch": 2.0215776081424934, "grad_norm": 8.112323449658552, "learning_rate": 7.434510269790347e-06, "loss": 0.1626, "step": 99310 }, { "epoch": 2.0217811704834605, "grad_norm": 40.10744853717022, "learning_rate": 7.433889594720791e-06, "loss": 0.1057, "step": 99320 }, { "epoch": 2.0219847328244276, "grad_norm": 22.591695352131023, "learning_rate": 7.4332688704957e-06, "loss": 0.1472, "step": 99330 }, { "epoch": 2.0221882951653942, "grad_norm": 13.862896588329363, "learning_rate": 7.432648097127611e-06, "loss": 0.1309, "step": 99340 }, { "epoch": 2.0223918575063613, "grad_norm": 8.746151651800163, "learning_rate": 7.432027274629058e-06, "loss": 0.1436, "step": 99350 }, { "epoch": 2.0225954198473284, "grad_norm": 11.698391943589614, "learning_rate": 7.431406403012583e-06, "loss": 0.0934, "step": 99360 }, { "epoch": 2.022798982188295, "grad_norm": 1.2809793726970307, "learning_rate": 7.430785482290724e-06, "loss": 0.1046, "step": 99370 }, { "epoch": 2.023002544529262, "grad_norm": 11.917901350283623, "learning_rate": 7.430164512476021e-06, "loss": 0.0518, "step": 99380 }, { "epoch": 2.023206106870229, "grad_norm": 3.1665930557257114, "learning_rate": 7.429543493581017e-06, "loss": 0.0918, "step": 99390 }, { "epoch": 2.023409669211196, "grad_norm": 1.139613089336601, "learning_rate": 7.4289224256182515e-06, "loss": 0.0798, "step": 99400 }, { "epoch": 2.023613231552163, "grad_norm": 10.54788836847925, "learning_rate": 7.428301308600271e-06, "loss": 0.0626, "step": 99410 }, { "epoch": 2.02381679389313, "grad_norm": 0.8670158250666032, "learning_rate": 7.427680142539616e-06, "loss": 0.1967, "step": 99420 }, { "epoch": 2.0240203562340966, "grad_norm": 22.48390095953827, "learning_rate": 7.427058927448834e-06, "loss": 0.0673, "step": 99430 }, { "epoch": 2.0242239185750637, "grad_norm": 0.020306334032318615, "learning_rate": 7.426437663340472e-06, "loss": 0.1826, "step": 99440 }, { "epoch": 2.0244274809160308, "grad_norm": 0.012889170340751425, "learning_rate": 7.4258163502270755e-06, "loss": 0.1124, "step": 99450 }, { "epoch": 2.0246310432569974, "grad_norm": 18.74486415158427, "learning_rate": 7.425194988121192e-06, "loss": 0.1395, "step": 99460 }, { "epoch": 2.0248346055979645, "grad_norm": 0.2173256700447383, "learning_rate": 7.424573577035374e-06, "loss": 0.1136, "step": 99470 }, { "epoch": 2.025038167938931, "grad_norm": 5.642736740982419, "learning_rate": 7.423952116982169e-06, "loss": 0.1354, "step": 99480 }, { "epoch": 2.025241730279898, "grad_norm": 0.2994924571049588, "learning_rate": 7.42333060797413e-06, "loss": 0.062, "step": 99490 }, { "epoch": 2.0254452926208653, "grad_norm": 0.6534646729162383, "learning_rate": 7.422709050023807e-06, "loss": 0.1073, "step": 99500 }, { "epoch": 2.025648854961832, "grad_norm": 16.159237686733384, "learning_rate": 7.422087443143755e-06, "loss": 0.1271, "step": 99510 }, { "epoch": 2.025852417302799, "grad_norm": 45.316866985846914, "learning_rate": 7.421465787346527e-06, "loss": 0.1682, "step": 99520 }, { "epoch": 2.026055979643766, "grad_norm": 11.406095142825688, "learning_rate": 7.420844082644679e-06, "loss": 0.0553, "step": 99530 }, { "epoch": 2.0262595419847327, "grad_norm": 1.1145983270301534, "learning_rate": 7.420222329050766e-06, "loss": 0.0628, "step": 99540 }, { "epoch": 2.0264631043256998, "grad_norm": 2.1043887727555144, "learning_rate": 7.419600526577347e-06, "loss": 0.206, "step": 99550 }, { "epoch": 2.026666666666667, "grad_norm": 2.1990337215479343, "learning_rate": 7.418978675236977e-06, "loss": 0.09, "step": 99560 }, { "epoch": 2.0268702290076335, "grad_norm": 0.23513878777047326, "learning_rate": 7.418356775042219e-06, "loss": 0.1561, "step": 99570 }, { "epoch": 2.0270737913486006, "grad_norm": 19.975594898710078, "learning_rate": 7.41773482600563e-06, "loss": 0.2265, "step": 99580 }, { "epoch": 2.0272773536895676, "grad_norm": 2.2086782222106187, "learning_rate": 7.417112828139774e-06, "loss": 0.0518, "step": 99590 }, { "epoch": 2.0274809160305343, "grad_norm": 3.371215141472419, "learning_rate": 7.4164907814572084e-06, "loss": 0.1337, "step": 99600 }, { "epoch": 2.0276844783715013, "grad_norm": 19.165217056509878, "learning_rate": 7.415868685970501e-06, "loss": 0.1108, "step": 99610 }, { "epoch": 2.027888040712468, "grad_norm": 0.06313721861881877, "learning_rate": 7.415246541692214e-06, "loss": 0.1171, "step": 99620 }, { "epoch": 2.028091603053435, "grad_norm": 3.767888582373804, "learning_rate": 7.414624348634911e-06, "loss": 0.0582, "step": 99630 }, { "epoch": 2.028295165394402, "grad_norm": 0.13685410736947645, "learning_rate": 7.41400210681116e-06, "loss": 0.0938, "step": 99640 }, { "epoch": 2.0284987277353688, "grad_norm": 1.9592124477152995, "learning_rate": 7.413379816233527e-06, "loss": 0.121, "step": 99650 }, { "epoch": 2.028702290076336, "grad_norm": 0.041083633392674825, "learning_rate": 7.4127574769145795e-06, "loss": 0.1088, "step": 99660 }, { "epoch": 2.028905852417303, "grad_norm": 0.3457642623896494, "learning_rate": 7.412135088866887e-06, "loss": 0.0678, "step": 99670 }, { "epoch": 2.0291094147582696, "grad_norm": 30.172432528609566, "learning_rate": 7.411512652103021e-06, "loss": 0.0846, "step": 99680 }, { "epoch": 2.0293129770992366, "grad_norm": 0.09669809683111429, "learning_rate": 7.410890166635551e-06, "loss": 0.0828, "step": 99690 }, { "epoch": 2.0295165394402037, "grad_norm": 0.13649267081769353, "learning_rate": 7.4102676324770484e-06, "loss": 0.0568, "step": 99700 }, { "epoch": 2.0297201017811703, "grad_norm": 5.256229912262696, "learning_rate": 7.409645049640087e-06, "loss": 0.1766, "step": 99710 }, { "epoch": 2.0299236641221374, "grad_norm": 22.778246087340186, "learning_rate": 7.409022418137238e-06, "loss": 0.0742, "step": 99720 }, { "epoch": 2.0301272264631045, "grad_norm": 10.222569504217205, "learning_rate": 7.408399737981082e-06, "loss": 0.1337, "step": 99730 }, { "epoch": 2.030330788804071, "grad_norm": 6.6914475109418206, "learning_rate": 7.407777009184189e-06, "loss": 0.0883, "step": 99740 }, { "epoch": 2.030534351145038, "grad_norm": 8.471200215347942, "learning_rate": 7.4071542317591385e-06, "loss": 0.0907, "step": 99750 }, { "epoch": 2.0307379134860053, "grad_norm": 0.025499563478132918, "learning_rate": 7.406531405718508e-06, "loss": 0.2404, "step": 99760 }, { "epoch": 2.030941475826972, "grad_norm": 0.8267598288389224, "learning_rate": 7.405908531074875e-06, "loss": 0.1911, "step": 99770 }, { "epoch": 2.031145038167939, "grad_norm": 1.2122665542056388, "learning_rate": 7.405285607840822e-06, "loss": 0.0965, "step": 99780 }, { "epoch": 2.0313486005089056, "grad_norm": 9.477174065154852, "learning_rate": 7.4046626360289296e-06, "loss": 0.1457, "step": 99790 }, { "epoch": 2.0315521628498727, "grad_norm": 1.0637720903599084, "learning_rate": 7.404039615651775e-06, "loss": 0.1279, "step": 99800 }, { "epoch": 2.03175572519084, "grad_norm": 0.070899966136669, "learning_rate": 7.403416546721945e-06, "loss": 0.1384, "step": 99810 }, { "epoch": 2.0319592875318064, "grad_norm": 15.552313074249598, "learning_rate": 7.402793429252023e-06, "loss": 0.1575, "step": 99820 }, { "epoch": 2.0321628498727735, "grad_norm": 10.762485019516886, "learning_rate": 7.402170263254593e-06, "loss": 0.2439, "step": 99830 }, { "epoch": 2.0323664122137406, "grad_norm": 5.551874171135453, "learning_rate": 7.4015470487422395e-06, "loss": 0.1237, "step": 99840 }, { "epoch": 2.0325699745547072, "grad_norm": 6.854723070414289, "learning_rate": 7.40092378572755e-06, "loss": 0.146, "step": 99850 }, { "epoch": 2.0327735368956743, "grad_norm": 5.117012338158577, "learning_rate": 7.4003004742231135e-06, "loss": 0.0979, "step": 99860 }, { "epoch": 2.0329770992366414, "grad_norm": 18.68986751437002, "learning_rate": 7.399677114241517e-06, "loss": 0.0549, "step": 99870 }, { "epoch": 2.033180661577608, "grad_norm": 0.14249399571846957, "learning_rate": 7.3990537057953505e-06, "loss": 0.1029, "step": 99880 }, { "epoch": 2.033384223918575, "grad_norm": 5.856565823846983, "learning_rate": 7.398430248897203e-06, "loss": 0.1557, "step": 99890 }, { "epoch": 2.033587786259542, "grad_norm": 6.319164492432005, "learning_rate": 7.39780674355967e-06, "loss": 0.05, "step": 99900 }, { "epoch": 2.033791348600509, "grad_norm": 0.17245049071301097, "learning_rate": 7.397183189795339e-06, "loss": 0.1349, "step": 99910 }, { "epoch": 2.033994910941476, "grad_norm": 16.25475286133399, "learning_rate": 7.396559587616807e-06, "loss": 0.1194, "step": 99920 }, { "epoch": 2.034198473282443, "grad_norm": 21.308078342611843, "learning_rate": 7.395935937036668e-06, "loss": 0.1796, "step": 99930 }, { "epoch": 2.0344020356234096, "grad_norm": 15.236761229500942, "learning_rate": 7.395312238067516e-06, "loss": 0.1869, "step": 99940 }, { "epoch": 2.0346055979643767, "grad_norm": 3.0231897066505327, "learning_rate": 7.394688490721948e-06, "loss": 0.1049, "step": 99950 }, { "epoch": 2.0348091603053433, "grad_norm": 9.993794267207056, "learning_rate": 7.394064695012564e-06, "loss": 0.1204, "step": 99960 }, { "epoch": 2.0350127226463104, "grad_norm": 0.012664088481393923, "learning_rate": 7.393440850951957e-06, "loss": 0.1557, "step": 99970 }, { "epoch": 2.0352162849872775, "grad_norm": 9.00185246867432, "learning_rate": 7.39281695855273e-06, "loss": 0.0594, "step": 99980 }, { "epoch": 2.035419847328244, "grad_norm": 0.5774597876691249, "learning_rate": 7.392193017827484e-06, "loss": 0.1356, "step": 99990 }, { "epoch": 2.035623409669211, "grad_norm": 19.924247930974364, "learning_rate": 7.391569028788817e-06, "loss": 0.1511, "step": 100000 }, { "epoch": 2.0358269720101783, "grad_norm": 7.968090856279961, "learning_rate": 7.3909449914493324e-06, "loss": 0.1365, "step": 100010 }, { "epoch": 2.036030534351145, "grad_norm": 12.759084960946524, "learning_rate": 7.390320905821637e-06, "loss": 0.2247, "step": 100020 }, { "epoch": 2.036234096692112, "grad_norm": 2.368072528214281, "learning_rate": 7.389696771918331e-06, "loss": 0.1023, "step": 100030 }, { "epoch": 2.036437659033079, "grad_norm": 0.6603149619956024, "learning_rate": 7.38907258975202e-06, "loss": 0.1313, "step": 100040 }, { "epoch": 2.0366412213740457, "grad_norm": 31.853695304184054, "learning_rate": 7.388448359335312e-06, "loss": 0.1166, "step": 100050 }, { "epoch": 2.0368447837150128, "grad_norm": 22.067163733134066, "learning_rate": 7.387824080680812e-06, "loss": 0.1144, "step": 100060 }, { "epoch": 2.03704834605598, "grad_norm": 18.976044806272625, "learning_rate": 7.387199753801131e-06, "loss": 0.0809, "step": 100070 }, { "epoch": 2.0372519083969465, "grad_norm": 11.396010903786085, "learning_rate": 7.386575378708876e-06, "loss": 0.0924, "step": 100080 }, { "epoch": 2.0374554707379136, "grad_norm": 1.7774637069857389, "learning_rate": 7.385950955416654e-06, "loss": 0.1263, "step": 100090 }, { "epoch": 2.03765903307888, "grad_norm": 2.3009109421986484, "learning_rate": 7.3853264839370834e-06, "loss": 0.0546, "step": 100100 }, { "epoch": 2.0378625954198473, "grad_norm": 6.8626968742333, "learning_rate": 7.38470196428277e-06, "loss": 0.0868, "step": 100110 }, { "epoch": 2.0380661577608143, "grad_norm": 0.22037472476159065, "learning_rate": 7.3840773964663305e-06, "loss": 0.0928, "step": 100120 }, { "epoch": 2.038269720101781, "grad_norm": 26.856875947943276, "learning_rate": 7.383452780500375e-06, "loss": 0.148, "step": 100130 }, { "epoch": 2.038473282442748, "grad_norm": 10.676227658924487, "learning_rate": 7.3828281163975225e-06, "loss": 0.2407, "step": 100140 }, { "epoch": 2.038676844783715, "grad_norm": 7.973091112220458, "learning_rate": 7.382203404170387e-06, "loss": 0.1224, "step": 100150 }, { "epoch": 2.0388804071246818, "grad_norm": 0.9765787126280373, "learning_rate": 7.381578643831585e-06, "loss": 0.0854, "step": 100160 }, { "epoch": 2.039083969465649, "grad_norm": 0.0602672781175915, "learning_rate": 7.380953835393736e-06, "loss": 0.1377, "step": 100170 }, { "epoch": 2.039287531806616, "grad_norm": 9.397721144019386, "learning_rate": 7.380328978869457e-06, "loss": 0.2395, "step": 100180 }, { "epoch": 2.0394910941475826, "grad_norm": 7.162988248854504, "learning_rate": 7.379704074271367e-06, "loss": 0.1746, "step": 100190 }, { "epoch": 2.0396946564885496, "grad_norm": 0.7749999243485958, "learning_rate": 7.379079121612091e-06, "loss": 0.1024, "step": 100200 }, { "epoch": 2.0398982188295167, "grad_norm": 0.8568985828156068, "learning_rate": 7.378454120904246e-06, "loss": 0.1987, "step": 100210 }, { "epoch": 2.0401017811704834, "grad_norm": 10.381890733342567, "learning_rate": 7.377829072160456e-06, "loss": 0.1167, "step": 100220 }, { "epoch": 2.0403053435114504, "grad_norm": 1.6233261767377274, "learning_rate": 7.377203975393347e-06, "loss": 0.1397, "step": 100230 }, { "epoch": 2.0405089058524175, "grad_norm": 5.496155415822314, "learning_rate": 7.376578830615541e-06, "loss": 0.154, "step": 100240 }, { "epoch": 2.040712468193384, "grad_norm": 3.643882495847015, "learning_rate": 7.375953637839666e-06, "loss": 0.0876, "step": 100250 }, { "epoch": 2.0409160305343512, "grad_norm": 8.314707876905022, "learning_rate": 7.375328397078344e-06, "loss": 0.1799, "step": 100260 }, { "epoch": 2.041119592875318, "grad_norm": 1.4937694962430925, "learning_rate": 7.374703108344207e-06, "loss": 0.1525, "step": 100270 }, { "epoch": 2.041323155216285, "grad_norm": 0.49885696319058814, "learning_rate": 7.374077771649884e-06, "loss": 0.0674, "step": 100280 }, { "epoch": 2.041526717557252, "grad_norm": 0.20397205901090257, "learning_rate": 7.373452387008e-06, "loss": 0.0473, "step": 100290 }, { "epoch": 2.0417302798982186, "grad_norm": 21.22680274340672, "learning_rate": 7.372826954431188e-06, "loss": 0.0809, "step": 100300 }, { "epoch": 2.0419338422391857, "grad_norm": 0.4723588410659525, "learning_rate": 7.372201473932081e-06, "loss": 0.1744, "step": 100310 }, { "epoch": 2.042137404580153, "grad_norm": 16.80721099760603, "learning_rate": 7.371575945523311e-06, "loss": 0.1475, "step": 100320 }, { "epoch": 2.0423409669211194, "grad_norm": 9.269067427381335, "learning_rate": 7.370950369217508e-06, "loss": 0.1151, "step": 100330 }, { "epoch": 2.0425445292620865, "grad_norm": 0.4445225270324531, "learning_rate": 7.370324745027308e-06, "loss": 0.1759, "step": 100340 }, { "epoch": 2.0427480916030536, "grad_norm": 7.334981631900915, "learning_rate": 7.369699072965348e-06, "loss": 0.0794, "step": 100350 }, { "epoch": 2.0429516539440202, "grad_norm": 15.835265654420255, "learning_rate": 7.369073353044262e-06, "loss": 0.1939, "step": 100360 }, { "epoch": 2.0431552162849873, "grad_norm": 0.06560719881729384, "learning_rate": 7.3684475852766915e-06, "loss": 0.0521, "step": 100370 }, { "epoch": 2.0433587786259544, "grad_norm": 0.35348183515887527, "learning_rate": 7.367821769675269e-06, "loss": 0.1177, "step": 100380 }, { "epoch": 2.043562340966921, "grad_norm": 11.440076653324946, "learning_rate": 7.367195906252637e-06, "loss": 0.11, "step": 100390 }, { "epoch": 2.043765903307888, "grad_norm": 1.8172845711777197, "learning_rate": 7.366569995021433e-06, "loss": 0.0846, "step": 100400 }, { "epoch": 2.043969465648855, "grad_norm": 10.443030635768952, "learning_rate": 7.365944035994301e-06, "loss": 0.095, "step": 100410 }, { "epoch": 2.044173027989822, "grad_norm": 4.8632213435545895, "learning_rate": 7.365318029183882e-06, "loss": 0.0686, "step": 100420 }, { "epoch": 2.044376590330789, "grad_norm": 2.5098437787657435, "learning_rate": 7.364691974602819e-06, "loss": 0.0527, "step": 100430 }, { "epoch": 2.0445801526717555, "grad_norm": 0.19149114392057856, "learning_rate": 7.364065872263756e-06, "loss": 0.1559, "step": 100440 }, { "epoch": 2.0447837150127226, "grad_norm": 1.0460154661976842, "learning_rate": 7.363439722179338e-06, "loss": 0.098, "step": 100450 }, { "epoch": 2.0449872773536897, "grad_norm": 0.015427548962580997, "learning_rate": 7.362813524362209e-06, "loss": 0.0736, "step": 100460 }, { "epoch": 2.0451908396946563, "grad_norm": 0.22393668824517957, "learning_rate": 7.36218727882502e-06, "loss": 0.1754, "step": 100470 }, { "epoch": 2.0453944020356234, "grad_norm": 30.631836737898574, "learning_rate": 7.361560985580415e-06, "loss": 0.1867, "step": 100480 }, { "epoch": 2.0455979643765905, "grad_norm": 4.612159468723567, "learning_rate": 7.360934644641045e-06, "loss": 0.0914, "step": 100490 }, { "epoch": 2.045801526717557, "grad_norm": 14.07938309345836, "learning_rate": 7.360308256019557e-06, "loss": 0.0865, "step": 100500 }, { "epoch": 2.046005089058524, "grad_norm": 0.08281840703905646, "learning_rate": 7.359681819728606e-06, "loss": 0.0929, "step": 100510 }, { "epoch": 2.0462086513994913, "grad_norm": 0.13337533041020908, "learning_rate": 7.3590553357808406e-06, "loss": 0.1705, "step": 100520 }, { "epoch": 2.046412213740458, "grad_norm": 4.327083679725851, "learning_rate": 7.358428804188914e-06, "loss": 0.0822, "step": 100530 }, { "epoch": 2.046615776081425, "grad_norm": 29.927650730655166, "learning_rate": 7.35780222496548e-06, "loss": 0.1822, "step": 100540 }, { "epoch": 2.046819338422392, "grad_norm": 0.05643418542572917, "learning_rate": 7.3571755981231944e-06, "loss": 0.0603, "step": 100550 }, { "epoch": 2.0470229007633587, "grad_norm": 30.460580891598912, "learning_rate": 7.35654892367471e-06, "loss": 0.207, "step": 100560 }, { "epoch": 2.0472264631043258, "grad_norm": 8.841710543820437, "learning_rate": 7.355922201632686e-06, "loss": 0.0675, "step": 100570 }, { "epoch": 2.0474300254452924, "grad_norm": 7.702654885551215, "learning_rate": 7.355295432009779e-06, "loss": 0.0969, "step": 100580 }, { "epoch": 2.0476335877862595, "grad_norm": 0.10777358276538881, "learning_rate": 7.354668614818645e-06, "loss": 0.0563, "step": 100590 }, { "epoch": 2.0478371501272266, "grad_norm": 0.0982412461097647, "learning_rate": 7.354041750071947e-06, "loss": 0.1699, "step": 100600 }, { "epoch": 2.048040712468193, "grad_norm": 0.017682012944752443, "learning_rate": 7.353414837782344e-06, "loss": 0.0738, "step": 100610 }, { "epoch": 2.0482442748091603, "grad_norm": 15.540003412790476, "learning_rate": 7.352787877962497e-06, "loss": 0.1068, "step": 100620 }, { "epoch": 2.0484478371501273, "grad_norm": 0.3207976482101056, "learning_rate": 7.352160870625069e-06, "loss": 0.0574, "step": 100630 }, { "epoch": 2.048651399491094, "grad_norm": 0.033146859793765915, "learning_rate": 7.351533815782722e-06, "loss": 0.2096, "step": 100640 }, { "epoch": 2.048854961832061, "grad_norm": 24.474907429922514, "learning_rate": 7.350906713448121e-06, "loss": 0.0764, "step": 100650 }, { "epoch": 2.049058524173028, "grad_norm": 0.46172324545641746, "learning_rate": 7.3502795636339316e-06, "loss": 0.0874, "step": 100660 }, { "epoch": 2.0492620865139948, "grad_norm": 3.045261740485333, "learning_rate": 7.3496523663528175e-06, "loss": 0.1219, "step": 100670 }, { "epoch": 2.049465648854962, "grad_norm": 0.1478232793599087, "learning_rate": 7.34902512161745e-06, "loss": 0.0401, "step": 100680 }, { "epoch": 2.049669211195929, "grad_norm": 0.0023750837167428617, "learning_rate": 7.348397829440493e-06, "loss": 0.261, "step": 100690 }, { "epoch": 2.0498727735368956, "grad_norm": 0.7772960590824253, "learning_rate": 7.347770489834618e-06, "loss": 0.1293, "step": 100700 }, { "epoch": 2.0500763358778626, "grad_norm": 0.04427274120843471, "learning_rate": 7.347143102812493e-06, "loss": 0.1616, "step": 100710 }, { "epoch": 2.0502798982188297, "grad_norm": 0.03547354027618162, "learning_rate": 7.34651566838679e-06, "loss": 0.1539, "step": 100720 }, { "epoch": 2.0504834605597964, "grad_norm": 41.73808574387623, "learning_rate": 7.3458881865701815e-06, "loss": 0.1251, "step": 100730 }, { "epoch": 2.0506870229007634, "grad_norm": 15.881537624848463, "learning_rate": 7.3452606573753395e-06, "loss": 0.1524, "step": 100740 }, { "epoch": 2.05089058524173, "grad_norm": 1.552515630702795, "learning_rate": 7.344633080814937e-06, "loss": 0.0983, "step": 100750 }, { "epoch": 2.051094147582697, "grad_norm": 15.075088205399489, "learning_rate": 7.34400545690165e-06, "loss": 0.0877, "step": 100760 }, { "epoch": 2.0512977099236642, "grad_norm": 28.631140094833928, "learning_rate": 7.343377785648155e-06, "loss": 0.1374, "step": 100770 }, { "epoch": 2.051501272264631, "grad_norm": 14.407161269724476, "learning_rate": 7.342750067067126e-06, "loss": 0.1635, "step": 100780 }, { "epoch": 2.051704834605598, "grad_norm": 8.11417109924416, "learning_rate": 7.342122301171242e-06, "loss": 0.1065, "step": 100790 }, { "epoch": 2.051908396946565, "grad_norm": 1.4151860588282752, "learning_rate": 7.341494487973182e-06, "loss": 0.0485, "step": 100800 }, { "epoch": 2.0521119592875317, "grad_norm": 1.352259135270255, "learning_rate": 7.340866627485626e-06, "loss": 0.1709, "step": 100810 }, { "epoch": 2.0523155216284987, "grad_norm": 8.298709889334056, "learning_rate": 7.340238719721251e-06, "loss": 0.0868, "step": 100820 }, { "epoch": 2.052519083969466, "grad_norm": 6.18716643248039, "learning_rate": 7.339610764692741e-06, "loss": 0.1259, "step": 100830 }, { "epoch": 2.0527226463104324, "grad_norm": 18.289341596489898, "learning_rate": 7.338982762412781e-06, "loss": 0.1305, "step": 100840 }, { "epoch": 2.0529262086513995, "grad_norm": 8.720818774827087, "learning_rate": 7.338354712894049e-06, "loss": 0.1513, "step": 100850 }, { "epoch": 2.0531297709923666, "grad_norm": 0.742958736808741, "learning_rate": 7.337726616149234e-06, "loss": 0.1834, "step": 100860 }, { "epoch": 2.0533333333333332, "grad_norm": 30.458849037418968, "learning_rate": 7.337098472191015e-06, "loss": 0.1718, "step": 100870 }, { "epoch": 2.0535368956743003, "grad_norm": 11.550030366771496, "learning_rate": 7.3364702810320865e-06, "loss": 0.1952, "step": 100880 }, { "epoch": 2.0537404580152674, "grad_norm": 0.16290997840686802, "learning_rate": 7.335842042685129e-06, "loss": 0.1232, "step": 100890 }, { "epoch": 2.053944020356234, "grad_norm": 11.994826152397625, "learning_rate": 7.335213757162835e-06, "loss": 0.1671, "step": 100900 }, { "epoch": 2.054147582697201, "grad_norm": 1.7934165793270909, "learning_rate": 7.334585424477889e-06, "loss": 0.1192, "step": 100910 }, { "epoch": 2.0543511450381677, "grad_norm": 5.082725301039357, "learning_rate": 7.3339570446429844e-06, "loss": 0.1378, "step": 100920 }, { "epoch": 2.054554707379135, "grad_norm": 1.6272456216790423, "learning_rate": 7.333328617670811e-06, "loss": 0.0586, "step": 100930 }, { "epoch": 2.054758269720102, "grad_norm": 0.47578454392378455, "learning_rate": 7.332700143574062e-06, "loss": 0.0809, "step": 100940 }, { "epoch": 2.0549618320610685, "grad_norm": 36.81792441204898, "learning_rate": 7.332071622365429e-06, "loss": 0.0857, "step": 100950 }, { "epoch": 2.0551653944020356, "grad_norm": 16.118964340459744, "learning_rate": 7.331443054057603e-06, "loss": 0.1576, "step": 100960 }, { "epoch": 2.0553689567430027, "grad_norm": 6.883443124861212, "learning_rate": 7.3308144386632855e-06, "loss": 0.0397, "step": 100970 }, { "epoch": 2.0555725190839693, "grad_norm": 9.84146873665921, "learning_rate": 7.3301857761951655e-06, "loss": 0.1834, "step": 100980 }, { "epoch": 2.0557760814249364, "grad_norm": 0.047941746453308516, "learning_rate": 7.329557066665943e-06, "loss": 0.0787, "step": 100990 }, { "epoch": 2.0559796437659035, "grad_norm": 1.28036262492194, "learning_rate": 7.3289283100883166e-06, "loss": 0.1433, "step": 101000 }, { "epoch": 2.05618320610687, "grad_norm": 10.127010238235384, "learning_rate": 7.328299506474982e-06, "loss": 0.0949, "step": 101010 }, { "epoch": 2.056386768447837, "grad_norm": 20.358410511120308, "learning_rate": 7.32767065583864e-06, "loss": 0.0889, "step": 101020 }, { "epoch": 2.0565903307888043, "grad_norm": 0.22623934180237443, "learning_rate": 7.327041758191993e-06, "loss": 0.073, "step": 101030 }, { "epoch": 2.056793893129771, "grad_norm": 27.522150275952214, "learning_rate": 7.326412813547738e-06, "loss": 0.0574, "step": 101040 }, { "epoch": 2.056997455470738, "grad_norm": 16.129587534978533, "learning_rate": 7.325783821918581e-06, "loss": 0.1577, "step": 101050 }, { "epoch": 2.057201017811705, "grad_norm": 0.10238703431464408, "learning_rate": 7.325154783317225e-06, "loss": 0.0909, "step": 101060 }, { "epoch": 2.0574045801526717, "grad_norm": 16.176410730028042, "learning_rate": 7.324525697756372e-06, "loss": 0.1977, "step": 101070 }, { "epoch": 2.0576081424936388, "grad_norm": 7.65628469106306, "learning_rate": 7.323896565248728e-06, "loss": 0.1773, "step": 101080 }, { "epoch": 2.0578117048346054, "grad_norm": 9.768793024791673, "learning_rate": 7.323267385807e-06, "loss": 0.1123, "step": 101090 }, { "epoch": 2.0580152671755725, "grad_norm": 4.984484324143961, "learning_rate": 7.322638159443895e-06, "loss": 0.1625, "step": 101100 }, { "epoch": 2.0582188295165396, "grad_norm": 0.1530464025735601, "learning_rate": 7.322008886172122e-06, "loss": 0.114, "step": 101110 }, { "epoch": 2.058422391857506, "grad_norm": 14.805933447102003, "learning_rate": 7.321379566004387e-06, "loss": 0.1334, "step": 101120 }, { "epoch": 2.0586259541984733, "grad_norm": 18.05441803373933, "learning_rate": 7.320750198953404e-06, "loss": 0.0499, "step": 101130 }, { "epoch": 2.0588295165394404, "grad_norm": 0.04687244432285394, "learning_rate": 7.320120785031881e-06, "loss": 0.0444, "step": 101140 }, { "epoch": 2.059033078880407, "grad_norm": 25.222465790819456, "learning_rate": 7.3194913242525304e-06, "loss": 0.0944, "step": 101150 }, { "epoch": 2.059236641221374, "grad_norm": 14.317120951002213, "learning_rate": 7.318861816628062e-06, "loss": 0.1419, "step": 101160 }, { "epoch": 2.059440203562341, "grad_norm": 9.173453135213036, "learning_rate": 7.318232262171196e-06, "loss": 0.1677, "step": 101170 }, { "epoch": 2.0596437659033078, "grad_norm": 13.728476229640766, "learning_rate": 7.317602660894643e-06, "loss": 0.085, "step": 101180 }, { "epoch": 2.059847328244275, "grad_norm": 0.16435242644526535, "learning_rate": 7.316973012811119e-06, "loss": 0.1036, "step": 101190 }, { "epoch": 2.060050890585242, "grad_norm": 8.454994673630155, "learning_rate": 7.316343317933341e-06, "loss": 0.1708, "step": 101200 }, { "epoch": 2.0602544529262086, "grad_norm": 0.2876420714790593, "learning_rate": 7.315713576274026e-06, "loss": 0.1121, "step": 101210 }, { "epoch": 2.0604580152671756, "grad_norm": 10.949079739280693, "learning_rate": 7.315083787845891e-06, "loss": 0.042, "step": 101220 }, { "epoch": 2.0606615776081423, "grad_norm": 42.48899907584908, "learning_rate": 7.314453952661661e-06, "loss": 0.1184, "step": 101230 }, { "epoch": 2.0608651399491094, "grad_norm": 10.668019356888818, "learning_rate": 7.313824070734048e-06, "loss": 0.1624, "step": 101240 }, { "epoch": 2.0610687022900764, "grad_norm": 29.77114548344453, "learning_rate": 7.3131941420757805e-06, "loss": 0.1391, "step": 101250 }, { "epoch": 2.061272264631043, "grad_norm": 12.599640243982117, "learning_rate": 7.312564166699578e-06, "loss": 0.236, "step": 101260 }, { "epoch": 2.06147582697201, "grad_norm": 0.1850298761651318, "learning_rate": 7.3119341446181625e-06, "loss": 0.1197, "step": 101270 }, { "epoch": 2.0616793893129772, "grad_norm": 0.017445327791438855, "learning_rate": 7.31130407584426e-06, "loss": 0.0901, "step": 101280 }, { "epoch": 2.061882951653944, "grad_norm": 0.21411058156239215, "learning_rate": 7.310673960390594e-06, "loss": 0.0606, "step": 101290 }, { "epoch": 2.062086513994911, "grad_norm": 0.8413310320323566, "learning_rate": 7.310043798269891e-06, "loss": 0.1541, "step": 101300 }, { "epoch": 2.062290076335878, "grad_norm": 18.381795703375005, "learning_rate": 7.309413589494879e-06, "loss": 0.1121, "step": 101310 }, { "epoch": 2.0624936386768447, "grad_norm": 12.897084808905666, "learning_rate": 7.308783334078286e-06, "loss": 0.2069, "step": 101320 }, { "epoch": 2.0626972010178117, "grad_norm": 1.1845511669582771, "learning_rate": 7.308153032032837e-06, "loss": 0.0943, "step": 101330 }, { "epoch": 2.062900763358779, "grad_norm": 9.0299219107688, "learning_rate": 7.3075226833712665e-06, "loss": 0.1459, "step": 101340 }, { "epoch": 2.0631043256997454, "grad_norm": 17.815073557592214, "learning_rate": 7.306892288106305e-06, "loss": 0.0742, "step": 101350 }, { "epoch": 2.0633078880407125, "grad_norm": 37.336993843985894, "learning_rate": 7.30626184625068e-06, "loss": 0.1634, "step": 101360 }, { "epoch": 2.0635114503816796, "grad_norm": 1.7185473327198093, "learning_rate": 7.305631357817127e-06, "loss": 0.1636, "step": 101370 }, { "epoch": 2.0637150127226462, "grad_norm": 13.094376869773047, "learning_rate": 7.305000822818379e-06, "loss": 0.172, "step": 101380 }, { "epoch": 2.0639185750636133, "grad_norm": 0.11916826832302127, "learning_rate": 7.304370241267171e-06, "loss": 0.1187, "step": 101390 }, { "epoch": 2.06412213740458, "grad_norm": 19.13284524552862, "learning_rate": 7.3037396131762376e-06, "loss": 0.0932, "step": 101400 }, { "epoch": 2.064325699745547, "grad_norm": 0.11176908595513972, "learning_rate": 7.303108938558316e-06, "loss": 0.1597, "step": 101410 }, { "epoch": 2.064529262086514, "grad_norm": 19.23934232741225, "learning_rate": 7.302478217426142e-06, "loss": 0.143, "step": 101420 }, { "epoch": 2.0647328244274807, "grad_norm": 0.42738328084486726, "learning_rate": 7.3018474497924575e-06, "loss": 0.0653, "step": 101430 }, { "epoch": 2.064936386768448, "grad_norm": 1.5480193832196543, "learning_rate": 7.301216635669996e-06, "loss": 0.0601, "step": 101440 }, { "epoch": 2.065139949109415, "grad_norm": 1.2836107893472883, "learning_rate": 7.300585775071501e-06, "loss": 0.074, "step": 101450 }, { "epoch": 2.0653435114503815, "grad_norm": 0.166716837864312, "learning_rate": 7.299954868009715e-06, "loss": 0.0762, "step": 101460 }, { "epoch": 2.0655470737913486, "grad_norm": 0.7886081600794839, "learning_rate": 7.299323914497377e-06, "loss": 0.0733, "step": 101470 }, { "epoch": 2.0657506361323157, "grad_norm": 0.22748959439002042, "learning_rate": 7.29869291454723e-06, "loss": 0.2377, "step": 101480 }, { "epoch": 2.0659541984732823, "grad_norm": 16.111145548736005, "learning_rate": 7.29806186817202e-06, "loss": 0.1381, "step": 101490 }, { "epoch": 2.0661577608142494, "grad_norm": 0.03509423561194518, "learning_rate": 7.297430775384491e-06, "loss": 0.1175, "step": 101500 }, { "epoch": 2.0663613231552165, "grad_norm": 15.742034156707962, "learning_rate": 7.2967996361973884e-06, "loss": 0.1341, "step": 101510 }, { "epoch": 2.066564885496183, "grad_norm": 15.415297426961244, "learning_rate": 7.296168450623459e-06, "loss": 0.1052, "step": 101520 }, { "epoch": 2.06676844783715, "grad_norm": 5.822131356906826, "learning_rate": 7.295537218675448e-06, "loss": 0.1149, "step": 101530 }, { "epoch": 2.066972010178117, "grad_norm": 5.313657390321859, "learning_rate": 7.294905940366108e-06, "loss": 0.1731, "step": 101540 }, { "epoch": 2.067175572519084, "grad_norm": 17.958936070060567, "learning_rate": 7.294274615708188e-06, "loss": 0.2169, "step": 101550 }, { "epoch": 2.067379134860051, "grad_norm": 12.44011168692609, "learning_rate": 7.2936432447144355e-06, "loss": 0.061, "step": 101560 }, { "epoch": 2.0675826972010176, "grad_norm": 0.7185995380613317, "learning_rate": 7.293011827397605e-06, "loss": 0.041, "step": 101570 }, { "epoch": 2.0677862595419847, "grad_norm": 0.3071377743788597, "learning_rate": 7.292380363770446e-06, "loss": 0.0923, "step": 101580 }, { "epoch": 2.0679898218829518, "grad_norm": 24.932096513919717, "learning_rate": 7.2917488538457125e-06, "loss": 0.1556, "step": 101590 }, { "epoch": 2.0681933842239184, "grad_norm": 13.310458563558367, "learning_rate": 7.29111729763616e-06, "loss": 0.0966, "step": 101600 }, { "epoch": 2.0683969465648855, "grad_norm": 55.63779456580064, "learning_rate": 7.290485695154542e-06, "loss": 0.1204, "step": 101610 }, { "epoch": 2.0686005089058526, "grad_norm": 1.1855194719016624, "learning_rate": 7.289854046413616e-06, "loss": 0.0511, "step": 101620 }, { "epoch": 2.068804071246819, "grad_norm": 0.9834199062348811, "learning_rate": 7.289222351426139e-06, "loss": 0.092, "step": 101630 }, { "epoch": 2.0690076335877863, "grad_norm": 49.87365062327018, "learning_rate": 7.288590610204866e-06, "loss": 0.1553, "step": 101640 }, { "epoch": 2.0692111959287534, "grad_norm": 0.4159496040854104, "learning_rate": 7.2879588227625595e-06, "loss": 0.0565, "step": 101650 }, { "epoch": 2.06941475826972, "grad_norm": 6.011265631995295, "learning_rate": 7.287326989111977e-06, "loss": 0.0993, "step": 101660 }, { "epoch": 2.069618320610687, "grad_norm": 0.4597229390690556, "learning_rate": 7.28669510926588e-06, "loss": 0.0415, "step": 101670 }, { "epoch": 2.069821882951654, "grad_norm": 0.3944832291831714, "learning_rate": 7.286063183237031e-06, "loss": 0.1759, "step": 101680 }, { "epoch": 2.070025445292621, "grad_norm": 18.66572488221167, "learning_rate": 7.28543121103819e-06, "loss": 0.2169, "step": 101690 }, { "epoch": 2.070229007633588, "grad_norm": 3.425654708641521, "learning_rate": 7.284799192682123e-06, "loss": 0.0835, "step": 101700 }, { "epoch": 2.0704325699745545, "grad_norm": 8.941298480804676, "learning_rate": 7.284167128181593e-06, "loss": 0.0495, "step": 101710 }, { "epoch": 2.0706361323155216, "grad_norm": 0.5322388786993625, "learning_rate": 7.283535017549367e-06, "loss": 0.1713, "step": 101720 }, { "epoch": 2.0708396946564886, "grad_norm": 0.05468416856552681, "learning_rate": 7.282902860798209e-06, "loss": 0.0999, "step": 101730 }, { "epoch": 2.0710432569974553, "grad_norm": 0.17777757393957766, "learning_rate": 7.282270657940888e-06, "loss": 0.0946, "step": 101740 }, { "epoch": 2.0712468193384224, "grad_norm": 44.389031629295, "learning_rate": 7.281638408990172e-06, "loss": 0.0669, "step": 101750 }, { "epoch": 2.0714503816793894, "grad_norm": 9.298015756320222, "learning_rate": 7.2810061139588285e-06, "loss": 0.121, "step": 101760 }, { "epoch": 2.071653944020356, "grad_norm": 9.6225423525098, "learning_rate": 7.280373772859628e-06, "loss": 0.225, "step": 101770 }, { "epoch": 2.071857506361323, "grad_norm": 9.996199470434162, "learning_rate": 7.2797413857053434e-06, "loss": 0.1467, "step": 101780 }, { "epoch": 2.0720610687022902, "grad_norm": 0.49111209711525244, "learning_rate": 7.279108952508745e-06, "loss": 0.1483, "step": 101790 }, { "epoch": 2.072264631043257, "grad_norm": 35.89213231678368, "learning_rate": 7.278476473282605e-06, "loss": 0.1323, "step": 101800 }, { "epoch": 2.072468193384224, "grad_norm": 16.58981206495354, "learning_rate": 7.2778439480397e-06, "loss": 0.0998, "step": 101810 }, { "epoch": 2.072671755725191, "grad_norm": 1.1856918132030854, "learning_rate": 7.2772113767928e-06, "loss": 0.1582, "step": 101820 }, { "epoch": 2.0728753180661577, "grad_norm": 0.08724289944610125, "learning_rate": 7.276578759554684e-06, "loss": 0.1448, "step": 101830 }, { "epoch": 2.0730788804071247, "grad_norm": 10.898478227530221, "learning_rate": 7.27594609633813e-06, "loss": 0.1485, "step": 101840 }, { "epoch": 2.073282442748092, "grad_norm": 16.444587882823114, "learning_rate": 7.275313387155911e-06, "loss": 0.2032, "step": 101850 }, { "epoch": 2.0734860050890584, "grad_norm": 17.948777843731285, "learning_rate": 7.274680632020807e-06, "loss": 0.1754, "step": 101860 }, { "epoch": 2.0736895674300255, "grad_norm": 1.6901790658801414, "learning_rate": 7.2740478309456e-06, "loss": 0.204, "step": 101870 }, { "epoch": 2.073893129770992, "grad_norm": 0.4416731569315488, "learning_rate": 7.2734149839430665e-06, "loss": 0.0459, "step": 101880 }, { "epoch": 2.0740966921119592, "grad_norm": 3.212015713017217, "learning_rate": 7.272782091025989e-06, "loss": 0.0797, "step": 101890 }, { "epoch": 2.0743002544529263, "grad_norm": 0.06761184460567456, "learning_rate": 7.272149152207151e-06, "loss": 0.1203, "step": 101900 }, { "epoch": 2.074503816793893, "grad_norm": 10.493581969075011, "learning_rate": 7.271516167499334e-06, "loss": 0.0574, "step": 101910 }, { "epoch": 2.07470737913486, "grad_norm": 3.4439129025995516, "learning_rate": 7.270883136915323e-06, "loss": 0.1139, "step": 101920 }, { "epoch": 2.074910941475827, "grad_norm": 31.144839756154365, "learning_rate": 7.270250060467901e-06, "loss": 0.1797, "step": 101930 }, { "epoch": 2.0751145038167937, "grad_norm": 32.824123394917635, "learning_rate": 7.269616938169855e-06, "loss": 0.0487, "step": 101940 }, { "epoch": 2.075318066157761, "grad_norm": 15.67241942488267, "learning_rate": 7.268983770033972e-06, "loss": 0.2077, "step": 101950 }, { "epoch": 2.075521628498728, "grad_norm": 12.74192946227513, "learning_rate": 7.268350556073041e-06, "loss": 0.1586, "step": 101960 }, { "epoch": 2.0757251908396945, "grad_norm": 0.08519222463741338, "learning_rate": 7.267717296299846e-06, "loss": 0.1541, "step": 101970 }, { "epoch": 2.0759287531806616, "grad_norm": 16.827435594595002, "learning_rate": 7.267083990727182e-06, "loss": 0.1492, "step": 101980 }, { "epoch": 2.0761323155216287, "grad_norm": 4.33829705677238, "learning_rate": 7.266450639367835e-06, "loss": 0.0206, "step": 101990 }, { "epoch": 2.0763358778625953, "grad_norm": 6.2431299463165155, "learning_rate": 7.265817242234598e-06, "loss": 0.1513, "step": 102000 }, { "epoch": 2.0765394402035624, "grad_norm": 36.36620669291906, "learning_rate": 7.2651837993402655e-06, "loss": 0.1855, "step": 102010 }, { "epoch": 2.0767430025445295, "grad_norm": 7.269596504471168, "learning_rate": 7.264550310697627e-06, "loss": 0.0852, "step": 102020 }, { "epoch": 2.076946564885496, "grad_norm": 0.4558307304933657, "learning_rate": 7.263916776319477e-06, "loss": 0.1564, "step": 102030 }, { "epoch": 2.077150127226463, "grad_norm": 3.836160374774229, "learning_rate": 7.263283196218614e-06, "loss": 0.1121, "step": 102040 }, { "epoch": 2.07735368956743, "grad_norm": 8.168834249427977, "learning_rate": 7.262649570407832e-06, "loss": 0.1145, "step": 102050 }, { "epoch": 2.077557251908397, "grad_norm": 0.3682341026208696, "learning_rate": 7.262015898899927e-06, "loss": 0.0824, "step": 102060 }, { "epoch": 2.077760814249364, "grad_norm": 11.420132633775378, "learning_rate": 7.261382181707697e-06, "loss": 0.1599, "step": 102070 }, { "epoch": 2.0779643765903306, "grad_norm": 0.34917410983483127, "learning_rate": 7.260748418843941e-06, "loss": 0.1601, "step": 102080 }, { "epoch": 2.0781679389312977, "grad_norm": 0.08105621076504688, "learning_rate": 7.26011461032146e-06, "loss": 0.1919, "step": 102090 }, { "epoch": 2.0783715012722648, "grad_norm": 33.60791247994022, "learning_rate": 7.2594807561530535e-06, "loss": 0.1074, "step": 102100 }, { "epoch": 2.0785750636132314, "grad_norm": 0.08537324786606273, "learning_rate": 7.25884685635152e-06, "loss": 0.185, "step": 102110 }, { "epoch": 2.0787786259541985, "grad_norm": 12.103684767735942, "learning_rate": 7.2582129109296685e-06, "loss": 0.1556, "step": 102120 }, { "epoch": 2.0789821882951656, "grad_norm": 0.21943901284443448, "learning_rate": 7.257578919900297e-06, "loss": 0.0619, "step": 102130 }, { "epoch": 2.079185750636132, "grad_norm": 19.72926458759575, "learning_rate": 7.256944883276211e-06, "loss": 0.1448, "step": 102140 }, { "epoch": 2.0793893129770993, "grad_norm": 9.914449832106884, "learning_rate": 7.256310801070217e-06, "loss": 0.111, "step": 102150 }, { "epoch": 2.0795928753180664, "grad_norm": 56.27892145258134, "learning_rate": 7.255676673295121e-06, "loss": 0.199, "step": 102160 }, { "epoch": 2.079796437659033, "grad_norm": 11.791012280673531, "learning_rate": 7.255042499963728e-06, "loss": 0.1414, "step": 102170 }, { "epoch": 2.08, "grad_norm": 24.226633108603117, "learning_rate": 7.254408281088847e-06, "loss": 0.12, "step": 102180 }, { "epoch": 2.0802035623409667, "grad_norm": 11.04659924364616, "learning_rate": 7.2537740166832884e-06, "loss": 0.1583, "step": 102190 }, { "epoch": 2.080407124681934, "grad_norm": 0.08523970970020109, "learning_rate": 7.25313970675986e-06, "loss": 0.1067, "step": 102200 }, { "epoch": 2.080610687022901, "grad_norm": 4.790126521199198, "learning_rate": 7.252505351331374e-06, "loss": 0.1099, "step": 102210 }, { "epoch": 2.0808142493638675, "grad_norm": 8.537837801227605, "learning_rate": 7.251870950410641e-06, "loss": 0.104, "step": 102220 }, { "epoch": 2.0810178117048346, "grad_norm": 0.1785311071321633, "learning_rate": 7.2512365040104714e-06, "loss": 0.1994, "step": 102230 }, { "epoch": 2.0812213740458017, "grad_norm": 0.9334906244091947, "learning_rate": 7.250602012143684e-06, "loss": 0.0839, "step": 102240 }, { "epoch": 2.0814249363867683, "grad_norm": 49.04256695165142, "learning_rate": 7.2499674748230894e-06, "loss": 0.2249, "step": 102250 }, { "epoch": 2.0816284987277354, "grad_norm": 0.8121965297617574, "learning_rate": 7.2493328920615036e-06, "loss": 0.0932, "step": 102260 }, { "epoch": 2.0818320610687024, "grad_norm": 1.1714868032671617, "learning_rate": 7.248698263871743e-06, "loss": 0.1526, "step": 102270 }, { "epoch": 2.082035623409669, "grad_norm": 1.9950930722782712, "learning_rate": 7.248063590266624e-06, "loss": 0.1037, "step": 102280 }, { "epoch": 2.082239185750636, "grad_norm": 0.6823362360834371, "learning_rate": 7.2474288712589656e-06, "loss": 0.1181, "step": 102290 }, { "epoch": 2.0824427480916032, "grad_norm": 4.559399980155833, "learning_rate": 7.246794106861588e-06, "loss": 0.105, "step": 102300 }, { "epoch": 2.08264631043257, "grad_norm": 6.582987510608297, "learning_rate": 7.2461592970873065e-06, "loss": 0.1361, "step": 102310 }, { "epoch": 2.082849872773537, "grad_norm": 12.360626073370545, "learning_rate": 7.245524441948947e-06, "loss": 0.0667, "step": 102320 }, { "epoch": 2.083053435114504, "grad_norm": 22.780165197182583, "learning_rate": 7.244889541459329e-06, "loss": 0.201, "step": 102330 }, { "epoch": 2.0832569974554707, "grad_norm": 5.667356311879746, "learning_rate": 7.244254595631275e-06, "loss": 0.054, "step": 102340 }, { "epoch": 2.0834605597964377, "grad_norm": 41.36888533075846, "learning_rate": 7.24361960447761e-06, "loss": 0.1209, "step": 102350 }, { "epoch": 2.0836641221374044, "grad_norm": 0.9693124843192347, "learning_rate": 7.242984568011155e-06, "loss": 0.1323, "step": 102360 }, { "epoch": 2.0838676844783715, "grad_norm": 12.398864385357214, "learning_rate": 7.242349486244739e-06, "loss": 0.167, "step": 102370 }, { "epoch": 2.0840712468193385, "grad_norm": 1.1014349831053, "learning_rate": 7.241714359191188e-06, "loss": 0.0945, "step": 102380 }, { "epoch": 2.084274809160305, "grad_norm": 11.995481159681747, "learning_rate": 7.241079186863327e-06, "loss": 0.15, "step": 102390 }, { "epoch": 2.0844783715012722, "grad_norm": 10.856393150814817, "learning_rate": 7.2404439692739846e-06, "loss": 0.1259, "step": 102400 }, { "epoch": 2.0846819338422393, "grad_norm": 0.13340588063648298, "learning_rate": 7.239808706435994e-06, "loss": 0.1973, "step": 102410 }, { "epoch": 2.084885496183206, "grad_norm": 11.85283250541461, "learning_rate": 7.239173398362178e-06, "loss": 0.1518, "step": 102420 }, { "epoch": 2.085089058524173, "grad_norm": 6.260167344857727, "learning_rate": 7.2385380450653715e-06, "loss": 0.1311, "step": 102430 }, { "epoch": 2.08529262086514, "grad_norm": 32.87845964231941, "learning_rate": 7.237902646558408e-06, "loss": 0.1083, "step": 102440 }, { "epoch": 2.0854961832061067, "grad_norm": 0.2492749690063363, "learning_rate": 7.237267202854117e-06, "loss": 0.0671, "step": 102450 }, { "epoch": 2.085699745547074, "grad_norm": 0.25900741515871567, "learning_rate": 7.236631713965333e-06, "loss": 0.132, "step": 102460 }, { "epoch": 2.085903307888041, "grad_norm": 5.634583326141459, "learning_rate": 7.235996179904892e-06, "loss": 0.1327, "step": 102470 }, { "epoch": 2.0861068702290075, "grad_norm": 8.179804034102078, "learning_rate": 7.235360600685626e-06, "loss": 0.0785, "step": 102480 }, { "epoch": 2.0863104325699746, "grad_norm": 8.878257081799493, "learning_rate": 7.234724976320375e-06, "loss": 0.2143, "step": 102490 }, { "epoch": 2.0865139949109412, "grad_norm": 0.008023109187848581, "learning_rate": 7.234089306821975e-06, "loss": 0.1449, "step": 102500 }, { "epoch": 2.0867175572519083, "grad_norm": 0.11868707382415068, "learning_rate": 7.233453592203263e-06, "loss": 0.0291, "step": 102510 }, { "epoch": 2.0869211195928754, "grad_norm": 0.748321395386486, "learning_rate": 7.232817832477079e-06, "loss": 0.1025, "step": 102520 }, { "epoch": 2.087124681933842, "grad_norm": 15.558332354654352, "learning_rate": 7.232182027656265e-06, "loss": 0.2041, "step": 102530 }, { "epoch": 2.087328244274809, "grad_norm": 10.34971099721353, "learning_rate": 7.231546177753659e-06, "loss": 0.1727, "step": 102540 }, { "epoch": 2.087531806615776, "grad_norm": 10.546978634044763, "learning_rate": 7.230910282782103e-06, "loss": 0.1462, "step": 102550 }, { "epoch": 2.087735368956743, "grad_norm": 43.86061653593448, "learning_rate": 7.230274342754441e-06, "loss": 0.1747, "step": 102560 }, { "epoch": 2.08793893129771, "grad_norm": 2.3605313707588045, "learning_rate": 7.229638357683515e-06, "loss": 0.11, "step": 102570 }, { "epoch": 2.088142493638677, "grad_norm": 0.14405262866386795, "learning_rate": 7.229002327582172e-06, "loss": 0.1967, "step": 102580 }, { "epoch": 2.0883460559796436, "grad_norm": 0.4867325433084488, "learning_rate": 7.228366252463256e-06, "loss": 0.1789, "step": 102590 }, { "epoch": 2.0885496183206107, "grad_norm": 0.44487821717379883, "learning_rate": 7.227730132339612e-06, "loss": 0.1387, "step": 102600 }, { "epoch": 2.088753180661578, "grad_norm": 18.673907345933014, "learning_rate": 7.227093967224091e-06, "loss": 0.1109, "step": 102610 }, { "epoch": 2.0889567430025444, "grad_norm": 33.4378834261972, "learning_rate": 7.226457757129537e-06, "loss": 0.1048, "step": 102620 }, { "epoch": 2.0891603053435115, "grad_norm": 0.412920736759818, "learning_rate": 7.225821502068802e-06, "loss": 0.1178, "step": 102630 }, { "epoch": 2.0893638676844786, "grad_norm": 24.063933460306412, "learning_rate": 7.225185202054734e-06, "loss": 0.0936, "step": 102640 }, { "epoch": 2.089567430025445, "grad_norm": 32.64448518182093, "learning_rate": 7.224548857100184e-06, "loss": 0.0657, "step": 102650 }, { "epoch": 2.0897709923664123, "grad_norm": 0.10401171561136435, "learning_rate": 7.223912467218006e-06, "loss": 0.1632, "step": 102660 }, { "epoch": 2.0899745547073794, "grad_norm": 4.558621285914475, "learning_rate": 7.223276032421053e-06, "loss": 0.1867, "step": 102670 }, { "epoch": 2.090178117048346, "grad_norm": 3.74788725426467, "learning_rate": 7.222639552722175e-06, "loss": 0.0872, "step": 102680 }, { "epoch": 2.090381679389313, "grad_norm": 2.006718207534018, "learning_rate": 7.222003028134227e-06, "loss": 0.2005, "step": 102690 }, { "epoch": 2.0905852417302797, "grad_norm": 6.557254391014976, "learning_rate": 7.221366458670068e-06, "loss": 0.1202, "step": 102700 }, { "epoch": 2.090788804071247, "grad_norm": 0.19986447642172717, "learning_rate": 7.220729844342552e-06, "loss": 0.1607, "step": 102710 }, { "epoch": 2.090992366412214, "grad_norm": 8.874183597734609, "learning_rate": 7.220093185164536e-06, "loss": 0.1003, "step": 102720 }, { "epoch": 2.0911959287531805, "grad_norm": 3.4510375934935116, "learning_rate": 7.21945648114888e-06, "loss": 0.1765, "step": 102730 }, { "epoch": 2.0913994910941476, "grad_norm": 7.590479156922282, "learning_rate": 7.21881973230844e-06, "loss": 0.136, "step": 102740 }, { "epoch": 2.0916030534351147, "grad_norm": 0.1629870633497316, "learning_rate": 7.218182938656079e-06, "loss": 0.0786, "step": 102750 }, { "epoch": 2.0918066157760813, "grad_norm": 21.667657335836914, "learning_rate": 7.217546100204657e-06, "loss": 0.0983, "step": 102760 }, { "epoch": 2.0920101781170484, "grad_norm": 25.13896374714182, "learning_rate": 7.216909216967033e-06, "loss": 0.1673, "step": 102770 }, { "epoch": 2.0922137404580154, "grad_norm": 0.13039872387206325, "learning_rate": 7.216272288956073e-06, "loss": 0.0496, "step": 102780 }, { "epoch": 2.092417302798982, "grad_norm": 14.82799367663809, "learning_rate": 7.21563531618464e-06, "loss": 0.1988, "step": 102790 }, { "epoch": 2.092620865139949, "grad_norm": 2.9255253410409674, "learning_rate": 7.214998298665598e-06, "loss": 0.0481, "step": 102800 }, { "epoch": 2.0928244274809162, "grad_norm": 0.10655271688276904, "learning_rate": 7.2143612364118125e-06, "loss": 0.0861, "step": 102810 }, { "epoch": 2.093027989821883, "grad_norm": 9.289913202231904, "learning_rate": 7.213724129436151e-06, "loss": 0.1201, "step": 102820 }, { "epoch": 2.09323155216285, "grad_norm": 13.141517862053242, "learning_rate": 7.213086977751477e-06, "loss": 0.1961, "step": 102830 }, { "epoch": 2.0934351145038166, "grad_norm": 2.9882068836411, "learning_rate": 7.2124497813706614e-06, "loss": 0.1119, "step": 102840 }, { "epoch": 2.0936386768447837, "grad_norm": 6.4371572703018645, "learning_rate": 7.2118125403065746e-06, "loss": 0.0977, "step": 102850 }, { "epoch": 2.0938422391857507, "grad_norm": 0.1108820022383598, "learning_rate": 7.211175254572083e-06, "loss": 0.1562, "step": 102860 }, { "epoch": 2.0940458015267174, "grad_norm": 6.2136621960311995, "learning_rate": 7.21053792418006e-06, "loss": 0.19, "step": 102870 }, { "epoch": 2.0942493638676845, "grad_norm": 13.67008140606774, "learning_rate": 7.2099005491433785e-06, "loss": 0.1647, "step": 102880 }, { "epoch": 2.0944529262086515, "grad_norm": 21.672427295349674, "learning_rate": 7.209263129474905e-06, "loss": 0.0679, "step": 102890 }, { "epoch": 2.094656488549618, "grad_norm": 8.27785323310761, "learning_rate": 7.20862566518752e-06, "loss": 0.1391, "step": 102900 }, { "epoch": 2.0948600508905852, "grad_norm": 0.29953812464257545, "learning_rate": 7.207988156294094e-06, "loss": 0.0826, "step": 102910 }, { "epoch": 2.0950636132315523, "grad_norm": 0.22723185109327937, "learning_rate": 7.207350602807504e-06, "loss": 0.0622, "step": 102920 }, { "epoch": 2.095267175572519, "grad_norm": 8.551868502718975, "learning_rate": 7.2067130047406246e-06, "loss": 0.1147, "step": 102930 }, { "epoch": 2.095470737913486, "grad_norm": 9.158649492933463, "learning_rate": 7.206075362106334e-06, "loss": 0.1308, "step": 102940 }, { "epoch": 2.095674300254453, "grad_norm": 0.007525531011844466, "learning_rate": 7.20543767491751e-06, "loss": 0.2229, "step": 102950 }, { "epoch": 2.0958778625954197, "grad_norm": 17.58341566514361, "learning_rate": 7.204799943187033e-06, "loss": 0.0189, "step": 102960 }, { "epoch": 2.096081424936387, "grad_norm": 42.68078277784901, "learning_rate": 7.204162166927779e-06, "loss": 0.1308, "step": 102970 }, { "epoch": 2.096284987277354, "grad_norm": 15.905359380691676, "learning_rate": 7.203524346152632e-06, "loss": 0.125, "step": 102980 }, { "epoch": 2.0964885496183205, "grad_norm": 8.291118805328045, "learning_rate": 7.202886480874474e-06, "loss": 0.106, "step": 102990 }, { "epoch": 2.0966921119592876, "grad_norm": 11.113887352611204, "learning_rate": 7.202248571106185e-06, "loss": 0.1051, "step": 103000 }, { "epoch": 2.0968956743002543, "grad_norm": 3.091435989983302, "learning_rate": 7.201610616860649e-06, "loss": 0.1522, "step": 103010 }, { "epoch": 2.0970992366412213, "grad_norm": 0.08807854243942831, "learning_rate": 7.200972618150752e-06, "loss": 0.0893, "step": 103020 }, { "epoch": 2.0973027989821884, "grad_norm": 16.4724330695972, "learning_rate": 7.200334574989378e-06, "loss": 0.2042, "step": 103030 }, { "epoch": 2.097506361323155, "grad_norm": 0.15529308944692483, "learning_rate": 7.199696487389414e-06, "loss": 0.0303, "step": 103040 }, { "epoch": 2.097709923664122, "grad_norm": 24.157903099957508, "learning_rate": 7.1990583553637465e-06, "loss": 0.1748, "step": 103050 }, { "epoch": 2.097913486005089, "grad_norm": 13.163617408895684, "learning_rate": 7.19842017892526e-06, "loss": 0.1759, "step": 103060 }, { "epoch": 2.098117048346056, "grad_norm": 0.7967938797792984, "learning_rate": 7.197781958086849e-06, "loss": 0.0832, "step": 103070 }, { "epoch": 2.098320610687023, "grad_norm": 6.9360948038572205, "learning_rate": 7.197143692861401e-06, "loss": 0.1495, "step": 103080 }, { "epoch": 2.09852417302799, "grad_norm": 6.185529471110257, "learning_rate": 7.1965053832618045e-06, "loss": 0.0589, "step": 103090 }, { "epoch": 2.0987277353689566, "grad_norm": 4.770107641720643, "learning_rate": 7.195867029300954e-06, "loss": 0.0214, "step": 103100 }, { "epoch": 2.0989312977099237, "grad_norm": 39.615731306908486, "learning_rate": 7.195228630991741e-06, "loss": 0.1389, "step": 103110 }, { "epoch": 2.099134860050891, "grad_norm": 39.33706426489721, "learning_rate": 7.194590188347057e-06, "loss": 0.0583, "step": 103120 }, { "epoch": 2.0993384223918574, "grad_norm": 0.32705469875442317, "learning_rate": 7.193951701379798e-06, "loss": 0.0658, "step": 103130 }, { "epoch": 2.0995419847328245, "grad_norm": 15.407729460835045, "learning_rate": 7.193313170102858e-06, "loss": 0.2793, "step": 103140 }, { "epoch": 2.099745547073791, "grad_norm": 43.86828999901943, "learning_rate": 7.192674594529134e-06, "loss": 0.2455, "step": 103150 }, { "epoch": 2.099949109414758, "grad_norm": 0.06307958635254189, "learning_rate": 7.192035974671524e-06, "loss": 0.174, "step": 103160 }, { "epoch": 2.1001526717557253, "grad_norm": 11.668114271376407, "learning_rate": 7.191397310542923e-06, "loss": 0.0935, "step": 103170 }, { "epoch": 2.100356234096692, "grad_norm": 19.04137345206686, "learning_rate": 7.1907586021562295e-06, "loss": 0.036, "step": 103180 }, { "epoch": 2.100559796437659, "grad_norm": 0.01212979505116976, "learning_rate": 7.190119849524346e-06, "loss": 0.0927, "step": 103190 }, { "epoch": 2.100763358778626, "grad_norm": 22.262836218339498, "learning_rate": 7.189481052660171e-06, "loss": 0.1894, "step": 103200 }, { "epoch": 2.1009669211195927, "grad_norm": 0.09315882958260817, "learning_rate": 7.188842211576605e-06, "loss": 0.1359, "step": 103210 }, { "epoch": 2.10117048346056, "grad_norm": 0.01578541621707113, "learning_rate": 7.188203326286553e-06, "loss": 0.085, "step": 103220 }, { "epoch": 2.101374045801527, "grad_norm": 1.7058171627429066, "learning_rate": 7.187564396802915e-06, "loss": 0.1902, "step": 103230 }, { "epoch": 2.1015776081424935, "grad_norm": 16.324247819700734, "learning_rate": 7.186925423138598e-06, "loss": 0.1611, "step": 103240 }, { "epoch": 2.1017811704834606, "grad_norm": 32.71559479400846, "learning_rate": 7.1862864053065055e-06, "loss": 0.166, "step": 103250 }, { "epoch": 2.1019847328244277, "grad_norm": 0.9249618188728539, "learning_rate": 7.18564734331954e-06, "loss": 0.0895, "step": 103260 }, { "epoch": 2.1021882951653943, "grad_norm": 3.5681841973027386, "learning_rate": 7.185008237190614e-06, "loss": 0.1702, "step": 103270 }, { "epoch": 2.1023918575063614, "grad_norm": 0.21523177800936663, "learning_rate": 7.1843690869326325e-06, "loss": 0.2268, "step": 103280 }, { "epoch": 2.1025954198473285, "grad_norm": 17.16481176339228, "learning_rate": 7.183729892558503e-06, "loss": 0.0658, "step": 103290 }, { "epoch": 2.102798982188295, "grad_norm": 11.794532246218735, "learning_rate": 7.183090654081135e-06, "loss": 0.1478, "step": 103300 }, { "epoch": 2.103002544529262, "grad_norm": 2.5274371814082204, "learning_rate": 7.182451371513441e-06, "loss": 0.1268, "step": 103310 }, { "epoch": 2.103206106870229, "grad_norm": 0.06939857672277659, "learning_rate": 7.18181204486833e-06, "loss": 0.1094, "step": 103320 }, { "epoch": 2.103409669211196, "grad_norm": 5.031939436903935, "learning_rate": 7.181172674158714e-06, "loss": 0.1039, "step": 103330 }, { "epoch": 2.103613231552163, "grad_norm": 27.029857049483507, "learning_rate": 7.180533259397507e-06, "loss": 0.053, "step": 103340 }, { "epoch": 2.1038167938931296, "grad_norm": 9.998549949758976, "learning_rate": 7.1798938005976224e-06, "loss": 0.1045, "step": 103350 }, { "epoch": 2.1040203562340967, "grad_norm": 20.02571301535334, "learning_rate": 7.1792542977719745e-06, "loss": 0.1186, "step": 103360 }, { "epoch": 2.1042239185750637, "grad_norm": 0.3123385895313775, "learning_rate": 7.17861475093348e-06, "loss": 0.1517, "step": 103370 }, { "epoch": 2.1044274809160304, "grad_norm": 3.8860152183703978, "learning_rate": 7.177975160095054e-06, "loss": 0.1065, "step": 103380 }, { "epoch": 2.1046310432569975, "grad_norm": 0.08689628562568362, "learning_rate": 7.1773355252696156e-06, "loss": 0.1137, "step": 103390 }, { "epoch": 2.1048346055979645, "grad_norm": 13.058829355106635, "learning_rate": 7.176695846470079e-06, "loss": 0.1701, "step": 103400 }, { "epoch": 2.105038167938931, "grad_norm": 10.62889654655855, "learning_rate": 7.176056123709368e-06, "loss": 0.092, "step": 103410 }, { "epoch": 2.1052417302798982, "grad_norm": 18.31260414175162, "learning_rate": 7.175416357000401e-06, "loss": 0.1295, "step": 103420 }, { "epoch": 2.1054452926208653, "grad_norm": 24.8376923807187, "learning_rate": 7.174776546356098e-06, "loss": 0.1775, "step": 103430 }, { "epoch": 2.105648854961832, "grad_norm": 15.589688028606586, "learning_rate": 7.174136691789382e-06, "loss": 0.1106, "step": 103440 }, { "epoch": 2.105852417302799, "grad_norm": 15.176880492866578, "learning_rate": 7.1734967933131774e-06, "loss": 0.0902, "step": 103450 }, { "epoch": 2.1060559796437657, "grad_norm": 14.496611304078554, "learning_rate": 7.172856850940403e-06, "loss": 0.1622, "step": 103460 }, { "epoch": 2.1062595419847328, "grad_norm": 272.3153114882776, "learning_rate": 7.172216864683985e-06, "loss": 0.1939, "step": 103470 }, { "epoch": 2.1064631043257, "grad_norm": 20.157232215547225, "learning_rate": 7.171576834556852e-06, "loss": 0.174, "step": 103480 }, { "epoch": 2.1066666666666665, "grad_norm": 7.420241467330109, "learning_rate": 7.170936760571927e-06, "loss": 0.0875, "step": 103490 }, { "epoch": 2.1068702290076335, "grad_norm": 1.207167523321986, "learning_rate": 7.170296642742137e-06, "loss": 0.1847, "step": 103500 }, { "epoch": 2.1070737913486006, "grad_norm": 0.7312462507990322, "learning_rate": 7.1696564810804115e-06, "loss": 0.1435, "step": 103510 }, { "epoch": 2.1072773536895673, "grad_norm": 27.800019091017788, "learning_rate": 7.169016275599679e-06, "loss": 0.1332, "step": 103520 }, { "epoch": 2.1074809160305343, "grad_norm": 0.16312615222593913, "learning_rate": 7.16837602631287e-06, "loss": 0.0489, "step": 103530 }, { "epoch": 2.1076844783715014, "grad_norm": 2.2774366811960096, "learning_rate": 7.1677357332329145e-06, "loss": 0.1381, "step": 103540 }, { "epoch": 2.107888040712468, "grad_norm": 19.919500322590423, "learning_rate": 7.167095396372742e-06, "loss": 0.1318, "step": 103550 }, { "epoch": 2.108091603053435, "grad_norm": 7.973645902996324, "learning_rate": 7.166455015745287e-06, "loss": 0.1612, "step": 103560 }, { "epoch": 2.108295165394402, "grad_norm": 18.518076147909813, "learning_rate": 7.165814591363485e-06, "loss": 0.1753, "step": 103570 }, { "epoch": 2.108498727735369, "grad_norm": 0.1485322966886099, "learning_rate": 7.165174123240266e-06, "loss": 0.1106, "step": 103580 }, { "epoch": 2.108702290076336, "grad_norm": 49.53535946796548, "learning_rate": 7.164533611388567e-06, "loss": 0.0521, "step": 103590 }, { "epoch": 2.108905852417303, "grad_norm": 38.089177216533855, "learning_rate": 7.163893055821323e-06, "loss": 0.0856, "step": 103600 }, { "epoch": 2.1091094147582696, "grad_norm": 15.692280575192756, "learning_rate": 7.163252456551473e-06, "loss": 0.1345, "step": 103610 }, { "epoch": 2.1093129770992367, "grad_norm": 19.47226546206359, "learning_rate": 7.1626118135919534e-06, "loss": 0.193, "step": 103620 }, { "epoch": 2.109516539440204, "grad_norm": 1.4687036662548374, "learning_rate": 7.161971126955701e-06, "loss": 0.0853, "step": 103630 }, { "epoch": 2.1097201017811704, "grad_norm": 22.10384360362745, "learning_rate": 7.1613303966556595e-06, "loss": 0.0508, "step": 103640 }, { "epoch": 2.1099236641221375, "grad_norm": 12.828254041693409, "learning_rate": 7.160689622704766e-06, "loss": 0.2051, "step": 103650 }, { "epoch": 2.110127226463104, "grad_norm": 23.101970448414576, "learning_rate": 7.160048805115962e-06, "loss": 0.0802, "step": 103660 }, { "epoch": 2.110330788804071, "grad_norm": 0.02857242844427787, "learning_rate": 7.159407943902189e-06, "loss": 0.1554, "step": 103670 }, { "epoch": 2.1105343511450383, "grad_norm": 0.2587770967126883, "learning_rate": 7.158767039076394e-06, "loss": 0.093, "step": 103680 }, { "epoch": 2.110737913486005, "grad_norm": 0.33868566538125494, "learning_rate": 7.158126090651517e-06, "loss": 0.0617, "step": 103690 }, { "epoch": 2.110941475826972, "grad_norm": 29.891618233504246, "learning_rate": 7.157485098640504e-06, "loss": 0.1111, "step": 103700 }, { "epoch": 2.111145038167939, "grad_norm": 0.4109112092381789, "learning_rate": 7.156844063056301e-06, "loss": 0.1428, "step": 103710 }, { "epoch": 2.1113486005089057, "grad_norm": 12.708030539325321, "learning_rate": 7.156202983911853e-06, "loss": 0.0887, "step": 103720 }, { "epoch": 2.111552162849873, "grad_norm": 0.25835878699547143, "learning_rate": 7.155561861220109e-06, "loss": 0.2216, "step": 103730 }, { "epoch": 2.11175572519084, "grad_norm": 16.322176861534686, "learning_rate": 7.154920694994018e-06, "loss": 0.1678, "step": 103740 }, { "epoch": 2.1119592875318065, "grad_norm": 0.44332329086774436, "learning_rate": 7.154279485246527e-06, "loss": 0.0556, "step": 103750 }, { "epoch": 2.1121628498727736, "grad_norm": 4.366908128811813, "learning_rate": 7.153638231990587e-06, "loss": 0.1239, "step": 103760 }, { "epoch": 2.1123664122137407, "grad_norm": 3.501482991256611, "learning_rate": 7.152996935239151e-06, "loss": 0.1008, "step": 103770 }, { "epoch": 2.1125699745547073, "grad_norm": 0.5866420680871501, "learning_rate": 7.152355595005166e-06, "loss": 0.2086, "step": 103780 }, { "epoch": 2.1127735368956744, "grad_norm": 0.6037014903566784, "learning_rate": 7.151714211301589e-06, "loss": 0.1755, "step": 103790 }, { "epoch": 2.112977099236641, "grad_norm": 12.853248289365736, "learning_rate": 7.151072784141372e-06, "loss": 0.1594, "step": 103800 }, { "epoch": 2.113180661577608, "grad_norm": 0.3856113836857863, "learning_rate": 7.150431313537469e-06, "loss": 0.045, "step": 103810 }, { "epoch": 2.113384223918575, "grad_norm": 26.457087084473613, "learning_rate": 7.1497897995028365e-06, "loss": 0.1464, "step": 103820 }, { "epoch": 2.113587786259542, "grad_norm": 0.5391760360053885, "learning_rate": 7.149148242050431e-06, "loss": 0.0317, "step": 103830 }, { "epoch": 2.113791348600509, "grad_norm": 16.270828569288312, "learning_rate": 7.148506641193207e-06, "loss": 0.0804, "step": 103840 }, { "epoch": 2.113994910941476, "grad_norm": 0.010232487074627825, "learning_rate": 7.1478649969441235e-06, "loss": 0.1788, "step": 103850 }, { "epoch": 2.1141984732824426, "grad_norm": 8.088543433107368, "learning_rate": 7.147223309316143e-06, "loss": 0.1174, "step": 103860 }, { "epoch": 2.1144020356234097, "grad_norm": 3.072999787899371, "learning_rate": 7.14658157832222e-06, "loss": 0.0328, "step": 103870 }, { "epoch": 2.1146055979643767, "grad_norm": 0.07016691334461116, "learning_rate": 7.145939803975317e-06, "loss": 0.2194, "step": 103880 }, { "epoch": 2.1148091603053434, "grad_norm": 23.947176257188804, "learning_rate": 7.145297986288396e-06, "loss": 0.1122, "step": 103890 }, { "epoch": 2.1150127226463105, "grad_norm": 14.717509170479163, "learning_rate": 7.1446561252744196e-06, "loss": 0.1701, "step": 103900 }, { "epoch": 2.1152162849872775, "grad_norm": 8.80266997995237, "learning_rate": 7.14401422094635e-06, "loss": 0.1154, "step": 103910 }, { "epoch": 2.115419847328244, "grad_norm": 0.437771117098468, "learning_rate": 7.143372273317153e-06, "loss": 0.0416, "step": 103920 }, { "epoch": 2.1156234096692113, "grad_norm": 5.4846119453941, "learning_rate": 7.14273028239979e-06, "loss": 0.2023, "step": 103930 }, { "epoch": 2.1158269720101783, "grad_norm": 12.279356782160528, "learning_rate": 7.142088248207232e-06, "loss": 0.2244, "step": 103940 }, { "epoch": 2.116030534351145, "grad_norm": 25.955508356294718, "learning_rate": 7.141446170752441e-06, "loss": 0.1008, "step": 103950 }, { "epoch": 2.116234096692112, "grad_norm": 0.8381048155106627, "learning_rate": 7.1408040500483855e-06, "loss": 0.1056, "step": 103960 }, { "epoch": 2.1164376590330787, "grad_norm": 11.060413958628734, "learning_rate": 7.140161886108037e-06, "loss": 0.1372, "step": 103970 }, { "epoch": 2.1166412213740458, "grad_norm": 6.631506417307914, "learning_rate": 7.139519678944361e-06, "loss": 0.1405, "step": 103980 }, { "epoch": 2.116844783715013, "grad_norm": 6.732813925011036, "learning_rate": 7.1388774285703295e-06, "loss": 0.1538, "step": 103990 }, { "epoch": 2.1170483460559795, "grad_norm": 13.92497234889004, "learning_rate": 7.138235134998914e-06, "loss": 0.086, "step": 104000 }, { "epoch": 2.1172519083969465, "grad_norm": 6.775167418074024, "learning_rate": 7.137592798243087e-06, "loss": 0.118, "step": 104010 }, { "epoch": 2.1174554707379136, "grad_norm": 14.239553527483169, "learning_rate": 7.136950418315819e-06, "loss": 0.1453, "step": 104020 }, { "epoch": 2.1176590330788803, "grad_norm": 10.15151930864295, "learning_rate": 7.136307995230086e-06, "loss": 0.056, "step": 104030 }, { "epoch": 2.1178625954198473, "grad_norm": 3.441591591262173, "learning_rate": 7.13566552899886e-06, "loss": 0.1657, "step": 104040 }, { "epoch": 2.1180661577608144, "grad_norm": 4.143064259433546, "learning_rate": 7.135023019635119e-06, "loss": 0.1465, "step": 104050 }, { "epoch": 2.118269720101781, "grad_norm": 3.434117131741599, "learning_rate": 7.134380467151838e-06, "loss": 0.0555, "step": 104060 }, { "epoch": 2.118473282442748, "grad_norm": 0.24864067776294982, "learning_rate": 7.133737871561994e-06, "loss": 0.1616, "step": 104070 }, { "epoch": 2.118676844783715, "grad_norm": 0.3146906682379172, "learning_rate": 7.133095232878566e-06, "loss": 0.1285, "step": 104080 }, { "epoch": 2.118880407124682, "grad_norm": 14.941411649491437, "learning_rate": 7.132452551114532e-06, "loss": 0.1039, "step": 104090 }, { "epoch": 2.119083969465649, "grad_norm": 20.80405966543127, "learning_rate": 7.131809826282872e-06, "loss": 0.1463, "step": 104100 }, { "epoch": 2.1192875318066156, "grad_norm": 23.240524767797073, "learning_rate": 7.131167058396567e-06, "loss": 0.1042, "step": 104110 }, { "epoch": 2.1194910941475826, "grad_norm": 0.18366979577829226, "learning_rate": 7.130524247468599e-06, "loss": 0.1465, "step": 104120 }, { "epoch": 2.1196946564885497, "grad_norm": 2.7642157325495975, "learning_rate": 7.129881393511948e-06, "loss": 0.1831, "step": 104130 }, { "epoch": 2.1198982188295163, "grad_norm": 0.08337411663788483, "learning_rate": 7.129238496539601e-06, "loss": 0.1575, "step": 104140 }, { "epoch": 2.1201017811704834, "grad_norm": 23.946179069715146, "learning_rate": 7.128595556564539e-06, "loss": 0.1767, "step": 104150 }, { "epoch": 2.1203053435114505, "grad_norm": 20.235668757065568, "learning_rate": 7.127952573599748e-06, "loss": 0.1312, "step": 104160 }, { "epoch": 2.120508905852417, "grad_norm": 24.9160040486994, "learning_rate": 7.127309547658214e-06, "loss": 0.1599, "step": 104170 }, { "epoch": 2.120712468193384, "grad_norm": 14.873680632300186, "learning_rate": 7.126666478752924e-06, "loss": 0.1171, "step": 104180 }, { "epoch": 2.1209160305343513, "grad_norm": 0.6034389372036522, "learning_rate": 7.126023366896866e-06, "loss": 0.0822, "step": 104190 }, { "epoch": 2.121119592875318, "grad_norm": 3.9628107837564923, "learning_rate": 7.1253802121030265e-06, "loss": 0.1696, "step": 104200 }, { "epoch": 2.121323155216285, "grad_norm": 10.179321332156858, "learning_rate": 7.1247370143843975e-06, "loss": 0.1955, "step": 104210 }, { "epoch": 2.121526717557252, "grad_norm": 25.327142030878505, "learning_rate": 7.124093773753966e-06, "loss": 0.1854, "step": 104220 }, { "epoch": 2.1217302798982187, "grad_norm": 5.7018598880100155, "learning_rate": 7.123450490224728e-06, "loss": 0.095, "step": 104230 }, { "epoch": 2.121933842239186, "grad_norm": 3.487136030996273, "learning_rate": 7.122807163809671e-06, "loss": 0.0952, "step": 104240 }, { "epoch": 2.122137404580153, "grad_norm": 15.83721781869581, "learning_rate": 7.122163794521787e-06, "loss": 0.1488, "step": 104250 }, { "epoch": 2.1223409669211195, "grad_norm": 0.016157278871799085, "learning_rate": 7.121520382374074e-06, "loss": 0.094, "step": 104260 }, { "epoch": 2.1225445292620866, "grad_norm": 0.24848656551918893, "learning_rate": 7.120876927379524e-06, "loss": 0.2089, "step": 104270 }, { "epoch": 2.1227480916030532, "grad_norm": 5.070065484103967, "learning_rate": 7.120233429551132e-06, "loss": 0.0933, "step": 104280 }, { "epoch": 2.1229516539440203, "grad_norm": 5.409445479350248, "learning_rate": 7.119589888901896e-06, "loss": 0.1133, "step": 104290 }, { "epoch": 2.1231552162849874, "grad_norm": 20.255644668041924, "learning_rate": 7.118946305444811e-06, "loss": 0.0551, "step": 104300 }, { "epoch": 2.123358778625954, "grad_norm": 11.081159118465184, "learning_rate": 7.118302679192878e-06, "loss": 0.1515, "step": 104310 }, { "epoch": 2.123562340966921, "grad_norm": 3.350561694909296, "learning_rate": 7.117659010159094e-06, "loss": 0.0965, "step": 104320 }, { "epoch": 2.123765903307888, "grad_norm": 0.06604475288475735, "learning_rate": 7.117015298356457e-06, "loss": 0.0867, "step": 104330 }, { "epoch": 2.123969465648855, "grad_norm": 17.29172899993664, "learning_rate": 7.116371543797969e-06, "loss": 0.2107, "step": 104340 }, { "epoch": 2.124173027989822, "grad_norm": 16.98161261262752, "learning_rate": 7.115727746496635e-06, "loss": 0.0683, "step": 104350 }, { "epoch": 2.124376590330789, "grad_norm": 59.702787279661294, "learning_rate": 7.115083906465452e-06, "loss": 0.1478, "step": 104360 }, { "epoch": 2.1245801526717556, "grad_norm": 25.288376779572456, "learning_rate": 7.114440023717425e-06, "loss": 0.0535, "step": 104370 }, { "epoch": 2.1247837150127227, "grad_norm": 3.1805361106499044, "learning_rate": 7.113796098265559e-06, "loss": 0.2373, "step": 104380 }, { "epoch": 2.1249872773536898, "grad_norm": 27.985961098414478, "learning_rate": 7.113152130122858e-06, "loss": 0.1467, "step": 104390 }, { "epoch": 2.1251908396946564, "grad_norm": 6.637073231688612, "learning_rate": 7.1125081193023285e-06, "loss": 0.2058, "step": 104400 }, { "epoch": 2.1253944020356235, "grad_norm": 0.3020926309565095, "learning_rate": 7.111864065816976e-06, "loss": 0.1293, "step": 104410 }, { "epoch": 2.12559796437659, "grad_norm": 7.173444359535207, "learning_rate": 7.111219969679808e-06, "loss": 0.0824, "step": 104420 }, { "epoch": 2.125801526717557, "grad_norm": 0.12038605809912556, "learning_rate": 7.110575830903835e-06, "loss": 0.1715, "step": 104430 }, { "epoch": 2.1260050890585243, "grad_norm": 0.022468584401878586, "learning_rate": 7.109931649502065e-06, "loss": 0.0528, "step": 104440 }, { "epoch": 2.126208651399491, "grad_norm": 36.381350668769976, "learning_rate": 7.109287425487506e-06, "loss": 0.1809, "step": 104450 }, { "epoch": 2.126412213740458, "grad_norm": 30.898488214689536, "learning_rate": 7.108643158873172e-06, "loss": 0.2077, "step": 104460 }, { "epoch": 2.126615776081425, "grad_norm": 9.179864705578717, "learning_rate": 7.107998849672072e-06, "loss": 0.1924, "step": 104470 }, { "epoch": 2.1268193384223917, "grad_norm": 10.94445917154748, "learning_rate": 7.107354497897221e-06, "loss": 0.0758, "step": 104480 }, { "epoch": 2.1270229007633588, "grad_norm": 2.2237703158406292, "learning_rate": 7.106710103561633e-06, "loss": 0.1808, "step": 104490 }, { "epoch": 2.127226463104326, "grad_norm": 15.763275096429462, "learning_rate": 7.106065666678319e-06, "loss": 0.2613, "step": 104500 }, { "epoch": 2.1274300254452925, "grad_norm": 20.729411318171305, "learning_rate": 7.105421187260296e-06, "loss": 0.0556, "step": 104510 }, { "epoch": 2.1276335877862596, "grad_norm": 0.6008180074428312, "learning_rate": 7.104776665320583e-06, "loss": 0.1172, "step": 104520 }, { "epoch": 2.1278371501272266, "grad_norm": 16.92740069781747, "learning_rate": 7.1041321008721935e-06, "loss": 0.1262, "step": 104530 }, { "epoch": 2.1280407124681933, "grad_norm": 0.12214394652844672, "learning_rate": 7.103487493928143e-06, "loss": 0.0412, "step": 104540 }, { "epoch": 2.1282442748091603, "grad_norm": 0.21508892059983875, "learning_rate": 7.102842844501457e-06, "loss": 0.1182, "step": 104550 }, { "epoch": 2.1284478371501274, "grad_norm": 17.294556686918032, "learning_rate": 7.10219815260515e-06, "loss": 0.1832, "step": 104560 }, { "epoch": 2.128651399491094, "grad_norm": 19.999224094716844, "learning_rate": 7.101553418252244e-06, "loss": 0.1903, "step": 104570 }, { "epoch": 2.128854961832061, "grad_norm": 9.2919839716395, "learning_rate": 7.100908641455759e-06, "loss": 0.1347, "step": 104580 }, { "epoch": 2.129058524173028, "grad_norm": 0.8614769409609272, "learning_rate": 7.100263822228719e-06, "loss": 0.1877, "step": 104590 }, { "epoch": 2.129262086513995, "grad_norm": 9.830632512868199, "learning_rate": 7.099618960584145e-06, "loss": 0.1425, "step": 104600 }, { "epoch": 2.129465648854962, "grad_norm": 6.571982404427512, "learning_rate": 7.098974056535064e-06, "loss": 0.1921, "step": 104610 }, { "epoch": 2.1296692111959286, "grad_norm": 20.917821260606544, "learning_rate": 7.098329110094495e-06, "loss": 0.0767, "step": 104620 }, { "epoch": 2.1298727735368956, "grad_norm": 5.973453257962137, "learning_rate": 7.09768412127547e-06, "loss": 0.1387, "step": 104630 }, { "epoch": 2.1300763358778627, "grad_norm": 1.9466267737710108, "learning_rate": 7.097039090091011e-06, "loss": 0.0533, "step": 104640 }, { "epoch": 2.1302798982188293, "grad_norm": 16.972149667354653, "learning_rate": 7.0963940165541466e-06, "loss": 0.0934, "step": 104650 }, { "epoch": 2.1304834605597964, "grad_norm": 1.560566819453789, "learning_rate": 7.0957489006779055e-06, "loss": 0.1118, "step": 104660 }, { "epoch": 2.1306870229007635, "grad_norm": 0.027116954828708496, "learning_rate": 7.095103742475315e-06, "loss": 0.0783, "step": 104670 }, { "epoch": 2.13089058524173, "grad_norm": 10.168134955728762, "learning_rate": 7.094458541959408e-06, "loss": 0.1271, "step": 104680 }, { "epoch": 2.131094147582697, "grad_norm": 4.131517186335729, "learning_rate": 7.093813299143212e-06, "loss": 0.2025, "step": 104690 }, { "epoch": 2.1312977099236643, "grad_norm": 0.5478306307069627, "learning_rate": 7.093168014039758e-06, "loss": 0.0963, "step": 104700 }, { "epoch": 2.131501272264631, "grad_norm": 2.1100562811964987, "learning_rate": 7.092522686662082e-06, "loss": 0.1287, "step": 104710 }, { "epoch": 2.131704834605598, "grad_norm": 6.755413727437312, "learning_rate": 7.091877317023215e-06, "loss": 0.1013, "step": 104720 }, { "epoch": 2.131908396946565, "grad_norm": 0.06709685249511532, "learning_rate": 7.091231905136191e-06, "loss": 0.091, "step": 104730 }, { "epoch": 2.1321119592875317, "grad_norm": 0.3092302564724596, "learning_rate": 7.090586451014043e-06, "loss": 0.0966, "step": 104740 }, { "epoch": 2.132315521628499, "grad_norm": 21.462137652830037, "learning_rate": 7.089940954669813e-06, "loss": 0.0707, "step": 104750 }, { "epoch": 2.1325190839694654, "grad_norm": 2.151968696554397, "learning_rate": 7.089295416116531e-06, "loss": 0.0793, "step": 104760 }, { "epoch": 2.1327226463104325, "grad_norm": 38.25156862297013, "learning_rate": 7.0886498353672386e-06, "loss": 0.0833, "step": 104770 }, { "epoch": 2.1329262086513996, "grad_norm": 0.06282055738247766, "learning_rate": 7.088004212434971e-06, "loss": 0.0918, "step": 104780 }, { "epoch": 2.1331297709923662, "grad_norm": 0.011945081990703824, "learning_rate": 7.08735854733277e-06, "loss": 0.113, "step": 104790 }, { "epoch": 2.1333333333333333, "grad_norm": 25.98987004354044, "learning_rate": 7.086712840073674e-06, "loss": 0.2803, "step": 104800 }, { "epoch": 2.1335368956743004, "grad_norm": 22.915899191743538, "learning_rate": 7.086067090670725e-06, "loss": 0.1114, "step": 104810 }, { "epoch": 2.133740458015267, "grad_norm": 5.453751978306525, "learning_rate": 7.085421299136964e-06, "loss": 0.0472, "step": 104820 }, { "epoch": 2.133944020356234, "grad_norm": 1.9639645113399349, "learning_rate": 7.084775465485434e-06, "loss": 0.2145, "step": 104830 }, { "epoch": 2.134147582697201, "grad_norm": 7.330447808602985, "learning_rate": 7.084129589729179e-06, "loss": 0.1411, "step": 104840 }, { "epoch": 2.134351145038168, "grad_norm": 0.04226009320754406, "learning_rate": 7.083483671881242e-06, "loss": 0.1005, "step": 104850 }, { "epoch": 2.134554707379135, "grad_norm": 0.5289585954387529, "learning_rate": 7.082837711954667e-06, "loss": 0.1758, "step": 104860 }, { "epoch": 2.134758269720102, "grad_norm": 12.531140639535257, "learning_rate": 7.082191709962504e-06, "loss": 0.1123, "step": 104870 }, { "epoch": 2.1349618320610686, "grad_norm": 57.2324501133102, "learning_rate": 7.081545665917797e-06, "loss": 0.0869, "step": 104880 }, { "epoch": 2.1351653944020357, "grad_norm": 26.40507084860993, "learning_rate": 7.0808995798335945e-06, "loss": 0.0969, "step": 104890 }, { "epoch": 2.1353689567430028, "grad_norm": 7.240161651496148, "learning_rate": 7.080253451722945e-06, "loss": 0.1795, "step": 104900 }, { "epoch": 2.1355725190839694, "grad_norm": 0.13362003449028018, "learning_rate": 7.079607281598896e-06, "loss": 0.1562, "step": 104910 }, { "epoch": 2.1357760814249365, "grad_norm": 0.04626291641117228, "learning_rate": 7.0789610694745015e-06, "loss": 0.164, "step": 104920 }, { "epoch": 2.135979643765903, "grad_norm": 0.2479094735021834, "learning_rate": 7.07831481536281e-06, "loss": 0.1317, "step": 104930 }, { "epoch": 2.13618320610687, "grad_norm": 22.552515232602182, "learning_rate": 7.077668519276874e-06, "loss": 0.1629, "step": 104940 }, { "epoch": 2.1363867684478373, "grad_norm": 0.18079522573482498, "learning_rate": 7.077022181229744e-06, "loss": 0.1171, "step": 104950 }, { "epoch": 2.136590330788804, "grad_norm": 15.773936405973203, "learning_rate": 7.0763758012344785e-06, "loss": 0.0462, "step": 104960 }, { "epoch": 2.136793893129771, "grad_norm": 14.104196154069117, "learning_rate": 7.075729379304128e-06, "loss": 0.1619, "step": 104970 }, { "epoch": 2.136997455470738, "grad_norm": 0.6168687604229756, "learning_rate": 7.075082915451751e-06, "loss": 0.1022, "step": 104980 }, { "epoch": 2.1372010178117047, "grad_norm": 14.175953737822491, "learning_rate": 7.0744364096903994e-06, "loss": 0.086, "step": 104990 }, { "epoch": 2.1374045801526718, "grad_norm": 14.992243311709935, "learning_rate": 7.073789862033133e-06, "loss": 0.0585, "step": 105000 }, { "epoch": 2.137608142493639, "grad_norm": 33.43118658725642, "learning_rate": 7.073143272493012e-06, "loss": 0.0781, "step": 105010 }, { "epoch": 2.1378117048346055, "grad_norm": 36.79979592577666, "learning_rate": 7.072496641083091e-06, "loss": 0.1064, "step": 105020 }, { "epoch": 2.1380152671755726, "grad_norm": 0.881495517563015, "learning_rate": 7.071849967816429e-06, "loss": 0.2164, "step": 105030 }, { "epoch": 2.1382188295165396, "grad_norm": 8.226203043095127, "learning_rate": 7.071203252706091e-06, "loss": 0.2043, "step": 105040 }, { "epoch": 2.1384223918575063, "grad_norm": 1.989656221742104, "learning_rate": 7.070556495765134e-06, "loss": 0.1068, "step": 105050 }, { "epoch": 2.1386259541984733, "grad_norm": 0.016364704210312383, "learning_rate": 7.069909697006623e-06, "loss": 0.0718, "step": 105060 }, { "epoch": 2.13882951653944, "grad_norm": 14.194659877501238, "learning_rate": 7.0692628564436184e-06, "loss": 0.1698, "step": 105070 }, { "epoch": 2.139033078880407, "grad_norm": 30.990566503893334, "learning_rate": 7.068615974089186e-06, "loss": 0.0833, "step": 105080 }, { "epoch": 2.139236641221374, "grad_norm": 23.797347670622983, "learning_rate": 7.067969049956388e-06, "loss": 0.1291, "step": 105090 }, { "epoch": 2.1394402035623408, "grad_norm": 27.219651889529118, "learning_rate": 7.067322084058295e-06, "loss": 0.222, "step": 105100 }, { "epoch": 2.139643765903308, "grad_norm": 0.11290567585690053, "learning_rate": 7.066675076407966e-06, "loss": 0.1088, "step": 105110 }, { "epoch": 2.139847328244275, "grad_norm": 0.03452488738632404, "learning_rate": 7.0660280270184745e-06, "loss": 0.1635, "step": 105120 }, { "epoch": 2.1400508905852416, "grad_norm": 1.414199429645139, "learning_rate": 7.065380935902885e-06, "loss": 0.1462, "step": 105130 }, { "epoch": 2.1402544529262086, "grad_norm": 26.519547085752727, "learning_rate": 7.064733803074267e-06, "loss": 0.1656, "step": 105140 }, { "epoch": 2.1404580152671757, "grad_norm": 9.870412563037313, "learning_rate": 7.064086628545691e-06, "loss": 0.1236, "step": 105150 }, { "epoch": 2.1406615776081424, "grad_norm": 0.17678375672952146, "learning_rate": 7.063439412330226e-06, "loss": 0.1487, "step": 105160 }, { "epoch": 2.1408651399491094, "grad_norm": 17.586560629039994, "learning_rate": 7.062792154440946e-06, "loss": 0.096, "step": 105170 }, { "epoch": 2.1410687022900765, "grad_norm": 0.18640266835284208, "learning_rate": 7.062144854890922e-06, "loss": 0.0873, "step": 105180 }, { "epoch": 2.141272264631043, "grad_norm": 0.019302258958371656, "learning_rate": 7.061497513693225e-06, "loss": 0.1588, "step": 105190 }, { "epoch": 2.14147582697201, "grad_norm": 0.13315072271697265, "learning_rate": 7.06085013086093e-06, "loss": 0.1984, "step": 105200 }, { "epoch": 2.1416793893129773, "grad_norm": 19.149503955228564, "learning_rate": 7.0602027064071146e-06, "loss": 0.1838, "step": 105210 }, { "epoch": 2.141882951653944, "grad_norm": 0.07055436204943537, "learning_rate": 7.059555240344851e-06, "loss": 0.1302, "step": 105220 }, { "epoch": 2.142086513994911, "grad_norm": 0.057961496322764726, "learning_rate": 7.058907732687216e-06, "loss": 0.1197, "step": 105230 }, { "epoch": 2.142290076335878, "grad_norm": 13.538003920894367, "learning_rate": 7.0582601834472875e-06, "loss": 0.1339, "step": 105240 }, { "epoch": 2.1424936386768447, "grad_norm": 11.579743994455017, "learning_rate": 7.0576125926381435e-06, "loss": 0.0903, "step": 105250 }, { "epoch": 2.142697201017812, "grad_norm": 3.864012090743619, "learning_rate": 7.056964960272864e-06, "loss": 0.1188, "step": 105260 }, { "epoch": 2.1429007633587784, "grad_norm": 0.08039500136392806, "learning_rate": 7.056317286364528e-06, "loss": 0.1314, "step": 105270 }, { "epoch": 2.1431043256997455, "grad_norm": 0.37531382933991203, "learning_rate": 7.055669570926213e-06, "loss": 0.2472, "step": 105280 }, { "epoch": 2.1433078880407126, "grad_norm": 0.05212336511452658, "learning_rate": 7.055021813971006e-06, "loss": 0.0457, "step": 105290 }, { "epoch": 2.1435114503816792, "grad_norm": 9.189331573052447, "learning_rate": 7.0543740155119866e-06, "loss": 0.1168, "step": 105300 }, { "epoch": 2.1437150127226463, "grad_norm": 7.952088857915989, "learning_rate": 7.053726175562238e-06, "loss": 0.0847, "step": 105310 }, { "epoch": 2.1439185750636134, "grad_norm": 8.918467908497506, "learning_rate": 7.053078294134842e-06, "loss": 0.169, "step": 105320 }, { "epoch": 2.14412213740458, "grad_norm": 8.321065691228757, "learning_rate": 7.052430371242889e-06, "loss": 0.059, "step": 105330 }, { "epoch": 2.144325699745547, "grad_norm": 13.155096805114022, "learning_rate": 7.051782406899457e-06, "loss": 0.1656, "step": 105340 }, { "epoch": 2.144529262086514, "grad_norm": 31.91591302130397, "learning_rate": 7.05113440111764e-06, "loss": 0.1168, "step": 105350 }, { "epoch": 2.144732824427481, "grad_norm": 0.3037925871588138, "learning_rate": 7.05048635391052e-06, "loss": 0.1165, "step": 105360 }, { "epoch": 2.144936386768448, "grad_norm": 1.0032929524170378, "learning_rate": 7.049838265291188e-06, "loss": 0.1085, "step": 105370 }, { "epoch": 2.1451399491094145, "grad_norm": 2.6777846383916, "learning_rate": 7.0491901352727324e-06, "loss": 0.1951, "step": 105380 }, { "epoch": 2.1453435114503816, "grad_norm": 7.225671413339472, "learning_rate": 7.048541963868243e-06, "loss": 0.2402, "step": 105390 }, { "epoch": 2.1455470737913487, "grad_norm": 0.06086129609602262, "learning_rate": 7.047893751090808e-06, "loss": 0.0936, "step": 105400 }, { "epoch": 2.1457506361323153, "grad_norm": 0.4826684795884253, "learning_rate": 7.047245496953523e-06, "loss": 0.1503, "step": 105410 }, { "epoch": 2.1459541984732824, "grad_norm": 5.3912728062127835, "learning_rate": 7.046597201469477e-06, "loss": 0.1702, "step": 105420 }, { "epoch": 2.1461577608142495, "grad_norm": 11.046127129852131, "learning_rate": 7.045948864651765e-06, "loss": 0.2066, "step": 105430 }, { "epoch": 2.146361323155216, "grad_norm": 0.4872274598516372, "learning_rate": 7.045300486513481e-06, "loss": 0.1183, "step": 105440 }, { "epoch": 2.146564885496183, "grad_norm": 27.814514995596298, "learning_rate": 7.044652067067719e-06, "loss": 0.159, "step": 105450 }, { "epoch": 2.1467684478371503, "grad_norm": 1.441375304698884, "learning_rate": 7.044003606327574e-06, "loss": 0.0965, "step": 105460 }, { "epoch": 2.146972010178117, "grad_norm": 4.723192653489124, "learning_rate": 7.0433551043061465e-06, "loss": 0.1105, "step": 105470 }, { "epoch": 2.147175572519084, "grad_norm": 40.0900331314067, "learning_rate": 7.042706561016528e-06, "loss": 0.1293, "step": 105480 }, { "epoch": 2.147379134860051, "grad_norm": 0.7177101608739112, "learning_rate": 7.04205797647182e-06, "loss": 0.1494, "step": 105490 }, { "epoch": 2.1475826972010177, "grad_norm": 0.6011933422258058, "learning_rate": 7.041409350685122e-06, "loss": 0.041, "step": 105500 }, { "epoch": 2.1477862595419848, "grad_norm": 4.506860519863403, "learning_rate": 7.040760683669532e-06, "loss": 0.0813, "step": 105510 }, { "epoch": 2.147989821882952, "grad_norm": 9.232538101995434, "learning_rate": 7.040111975438152e-06, "loss": 0.1537, "step": 105520 }, { "epoch": 2.1481933842239185, "grad_norm": 0.0775228019956457, "learning_rate": 7.039463226004082e-06, "loss": 0.1257, "step": 105530 }, { "epoch": 2.1483969465648856, "grad_norm": 0.47179984960499605, "learning_rate": 7.0388144353804264e-06, "loss": 0.0904, "step": 105540 }, { "epoch": 2.1486005089058526, "grad_norm": 21.052317170875867, "learning_rate": 7.038165603580287e-06, "loss": 0.2353, "step": 105550 }, { "epoch": 2.1488040712468193, "grad_norm": 18.294292177927357, "learning_rate": 7.037516730616769e-06, "loss": 0.2451, "step": 105560 }, { "epoch": 2.1490076335877863, "grad_norm": 0.2008993298013567, "learning_rate": 7.0368678165029746e-06, "loss": 0.1219, "step": 105570 }, { "epoch": 2.149211195928753, "grad_norm": 36.25681876584703, "learning_rate": 7.036218861252012e-06, "loss": 0.1225, "step": 105580 }, { "epoch": 2.14941475826972, "grad_norm": 0.22410077841633785, "learning_rate": 7.035569864876987e-06, "loss": 0.1907, "step": 105590 }, { "epoch": 2.149618320610687, "grad_norm": 0.030012820900157472, "learning_rate": 7.034920827391008e-06, "loss": 0.1511, "step": 105600 }, { "epoch": 2.1498218829516538, "grad_norm": 2.0002533013413615, "learning_rate": 7.03427174880718e-06, "loss": 0.1074, "step": 105610 }, { "epoch": 2.150025445292621, "grad_norm": 8.128452966177749, "learning_rate": 7.033622629138614e-06, "loss": 0.0426, "step": 105620 }, { "epoch": 2.150229007633588, "grad_norm": 7.525026508522153, "learning_rate": 7.032973468398421e-06, "loss": 0.1306, "step": 105630 }, { "epoch": 2.1504325699745546, "grad_norm": 1.4935158344408865, "learning_rate": 7.032324266599708e-06, "loss": 0.1236, "step": 105640 }, { "epoch": 2.1506361323155216, "grad_norm": 4.94109910480866, "learning_rate": 7.031675023755591e-06, "loss": 0.1122, "step": 105650 }, { "epoch": 2.1508396946564887, "grad_norm": 7.278085165135259, "learning_rate": 7.031025739879179e-06, "loss": 0.128, "step": 105660 }, { "epoch": 2.1510432569974554, "grad_norm": 1.148042548113908, "learning_rate": 7.030376414983588e-06, "loss": 0.0915, "step": 105670 }, { "epoch": 2.1512468193384224, "grad_norm": 18.982233311323252, "learning_rate": 7.029727049081929e-06, "loss": 0.1263, "step": 105680 }, { "epoch": 2.1514503816793895, "grad_norm": 10.994054652433967, "learning_rate": 7.0290776421873164e-06, "loss": 0.1064, "step": 105690 }, { "epoch": 2.151653944020356, "grad_norm": 1.2912417155326477, "learning_rate": 7.028428194312871e-06, "loss": 0.1923, "step": 105700 }, { "epoch": 2.1518575063613232, "grad_norm": 0.04038968864192353, "learning_rate": 7.0277787054717025e-06, "loss": 0.203, "step": 105710 }, { "epoch": 2.15206106870229, "grad_norm": 0.2024513305681318, "learning_rate": 7.027129175676932e-06, "loss": 0.1693, "step": 105720 }, { "epoch": 2.152264631043257, "grad_norm": 9.909630744832416, "learning_rate": 7.0264796049416776e-06, "loss": 0.0805, "step": 105730 }, { "epoch": 2.152468193384224, "grad_norm": 43.089680527298285, "learning_rate": 7.025829993279056e-06, "loss": 0.0968, "step": 105740 }, { "epoch": 2.1526717557251906, "grad_norm": 15.97308429148824, "learning_rate": 7.02518034070219e-06, "loss": 0.188, "step": 105750 }, { "epoch": 2.1528753180661577, "grad_norm": 27.08046533113727, "learning_rate": 7.0245306472242e-06, "loss": 0.1447, "step": 105760 }, { "epoch": 2.153078880407125, "grad_norm": 6.386247107152265, "learning_rate": 7.023880912858203e-06, "loss": 0.1353, "step": 105770 }, { "epoch": 2.1532824427480914, "grad_norm": 0.6184016684980743, "learning_rate": 7.023231137617325e-06, "loss": 0.1601, "step": 105780 }, { "epoch": 2.1534860050890585, "grad_norm": 13.44444415083327, "learning_rate": 7.02258132151469e-06, "loss": 0.107, "step": 105790 }, { "epoch": 2.1536895674300256, "grad_norm": 0.1257273710615154, "learning_rate": 7.021931464563419e-06, "loss": 0.0591, "step": 105800 }, { "epoch": 2.1538931297709922, "grad_norm": 14.141946960527019, "learning_rate": 7.0212815667766355e-06, "loss": 0.171, "step": 105810 }, { "epoch": 2.1540966921119593, "grad_norm": 0.26544289941563043, "learning_rate": 7.02063162816747e-06, "loss": 0.0848, "step": 105820 }, { "epoch": 2.1543002544529264, "grad_norm": 0.3143015002204348, "learning_rate": 7.019981648749046e-06, "loss": 0.0527, "step": 105830 }, { "epoch": 2.154503816793893, "grad_norm": 6.936728480988149, "learning_rate": 7.01933162853449e-06, "loss": 0.0973, "step": 105840 }, { "epoch": 2.15470737913486, "grad_norm": 11.07568811414951, "learning_rate": 7.018681567536933e-06, "loss": 0.1563, "step": 105850 }, { "epoch": 2.154910941475827, "grad_norm": 22.627801229487797, "learning_rate": 7.0180314657694994e-06, "loss": 0.1933, "step": 105860 }, { "epoch": 2.155114503816794, "grad_norm": 3.915020165537501, "learning_rate": 7.017381323245321e-06, "loss": 0.0782, "step": 105870 }, { "epoch": 2.155318066157761, "grad_norm": 12.711122730811208, "learning_rate": 7.016731139977531e-06, "loss": 0.1516, "step": 105880 }, { "epoch": 2.155521628498728, "grad_norm": 0.3749548701908355, "learning_rate": 7.0160809159792564e-06, "loss": 0.1117, "step": 105890 }, { "epoch": 2.1557251908396946, "grad_norm": 20.942973426240265, "learning_rate": 7.015430651263631e-06, "loss": 0.102, "step": 105900 }, { "epoch": 2.1559287531806617, "grad_norm": 1.5292541139015314, "learning_rate": 7.014780345843788e-06, "loss": 0.1251, "step": 105910 }, { "epoch": 2.1561323155216283, "grad_norm": 59.31529235184954, "learning_rate": 7.014129999732859e-06, "loss": 0.2404, "step": 105920 }, { "epoch": 2.1563358778625954, "grad_norm": 7.945191496603248, "learning_rate": 7.013479612943983e-06, "loss": 0.0984, "step": 105930 }, { "epoch": 2.1565394402035625, "grad_norm": 7.033850273949583, "learning_rate": 7.012829185490292e-06, "loss": 0.117, "step": 105940 }, { "epoch": 2.156743002544529, "grad_norm": 17.93075239942142, "learning_rate": 7.012178717384923e-06, "loss": 0.1174, "step": 105950 }, { "epoch": 2.156946564885496, "grad_norm": 18.692779487648338, "learning_rate": 7.011528208641014e-06, "loss": 0.0356, "step": 105960 }, { "epoch": 2.1571501272264633, "grad_norm": 20.520386001701596, "learning_rate": 7.010877659271702e-06, "loss": 0.1421, "step": 105970 }, { "epoch": 2.15735368956743, "grad_norm": 21.056503192782692, "learning_rate": 7.010227069290123e-06, "loss": 0.2077, "step": 105980 }, { "epoch": 2.157557251908397, "grad_norm": 4.118481962496929, "learning_rate": 7.009576438709422e-06, "loss": 0.1361, "step": 105990 }, { "epoch": 2.157760814249364, "grad_norm": 8.56410877231502, "learning_rate": 7.008925767542735e-06, "loss": 0.1175, "step": 106000 }, { "epoch": 2.1579643765903307, "grad_norm": 7.193195192758233, "learning_rate": 7.008275055803207e-06, "loss": 0.1386, "step": 106010 }, { "epoch": 2.1581679389312978, "grad_norm": 10.707549684084961, "learning_rate": 7.007624303503975e-06, "loss": 0.1079, "step": 106020 }, { "epoch": 2.1583715012722644, "grad_norm": 0.23517283744860193, "learning_rate": 7.006973510658185e-06, "loss": 0.1103, "step": 106030 }, { "epoch": 2.1585750636132315, "grad_norm": 0.09730847252422384, "learning_rate": 7.006322677278981e-06, "loss": 0.075, "step": 106040 }, { "epoch": 2.1587786259541986, "grad_norm": 1.860997030193755, "learning_rate": 7.0056718033795055e-06, "loss": 0.1258, "step": 106050 }, { "epoch": 2.158982188295165, "grad_norm": 26.824594151909317, "learning_rate": 7.005020888972904e-06, "loss": 0.1471, "step": 106060 }, { "epoch": 2.1591857506361323, "grad_norm": 0.14171455156430499, "learning_rate": 7.004369934072324e-06, "loss": 0.1826, "step": 106070 }, { "epoch": 2.1593893129770994, "grad_norm": 4.710216586218551, "learning_rate": 7.003718938690912e-06, "loss": 0.112, "step": 106080 }, { "epoch": 2.159592875318066, "grad_norm": 0.32287422906967506, "learning_rate": 7.003067902841816e-06, "loss": 0.1271, "step": 106090 }, { "epoch": 2.159796437659033, "grad_norm": 0.554879837762399, "learning_rate": 7.002416826538182e-06, "loss": 0.1063, "step": 106100 }, { "epoch": 2.16, "grad_norm": 13.212999197805013, "learning_rate": 7.001765709793161e-06, "loss": 0.1565, "step": 106110 }, { "epoch": 2.1602035623409668, "grad_norm": 0.5559279760393264, "learning_rate": 7.0011145526199045e-06, "loss": 0.1919, "step": 106120 }, { "epoch": 2.160407124681934, "grad_norm": 13.213889747319847, "learning_rate": 7.000463355031562e-06, "loss": 0.128, "step": 106130 }, { "epoch": 2.160610687022901, "grad_norm": 0.08173423405620843, "learning_rate": 6.9998121170412845e-06, "loss": 0.0855, "step": 106140 }, { "epoch": 2.1608142493638676, "grad_norm": 6.55783565684966, "learning_rate": 6.999160838662226e-06, "loss": 0.1236, "step": 106150 }, { "epoch": 2.1610178117048346, "grad_norm": 0.2561813784294134, "learning_rate": 6.99850951990754e-06, "loss": 0.1236, "step": 106160 }, { "epoch": 2.1612213740458017, "grad_norm": 8.786030655558635, "learning_rate": 6.99785816079038e-06, "loss": 0.1889, "step": 106170 }, { "epoch": 2.1614249363867684, "grad_norm": 7.517935443459293, "learning_rate": 6.9972067613239e-06, "loss": 0.092, "step": 106180 }, { "epoch": 2.1616284987277354, "grad_norm": 17.751397365299653, "learning_rate": 6.996555321521259e-06, "loss": 0.118, "step": 106190 }, { "epoch": 2.1618320610687025, "grad_norm": 47.16059648539131, "learning_rate": 6.99590384139561e-06, "loss": 0.0985, "step": 106200 }, { "epoch": 2.162035623409669, "grad_norm": 5.695898739684507, "learning_rate": 6.995252320960114e-06, "loss": 0.0665, "step": 106210 }, { "epoch": 2.1622391857506362, "grad_norm": 0.18668557917070236, "learning_rate": 6.994600760227927e-06, "loss": 0.0656, "step": 106220 }, { "epoch": 2.162442748091603, "grad_norm": 0.15913653808467082, "learning_rate": 6.993949159212207e-06, "loss": 0.174, "step": 106230 }, { "epoch": 2.16264631043257, "grad_norm": 24.81290729317369, "learning_rate": 6.993297517926117e-06, "loss": 0.1349, "step": 106240 }, { "epoch": 2.162849872773537, "grad_norm": 59.83696357630604, "learning_rate": 6.992645836382816e-06, "loss": 0.0913, "step": 106250 }, { "epoch": 2.1630534351145037, "grad_norm": 0.028278078292786963, "learning_rate": 6.9919941145954665e-06, "loss": 0.1883, "step": 106260 }, { "epoch": 2.1632569974554707, "grad_norm": 6.644078254321508, "learning_rate": 6.991342352577228e-06, "loss": 0.2665, "step": 106270 }, { "epoch": 2.163460559796438, "grad_norm": 0.7944634797769113, "learning_rate": 6.990690550341269e-06, "loss": 0.1091, "step": 106280 }, { "epoch": 2.1636641221374044, "grad_norm": 20.155741574844594, "learning_rate": 6.990038707900748e-06, "loss": 0.1592, "step": 106290 }, { "epoch": 2.1638676844783715, "grad_norm": 7.073945672643175, "learning_rate": 6.989386825268833e-06, "loss": 0.2027, "step": 106300 }, { "epoch": 2.1640712468193386, "grad_norm": 0.1755591598349805, "learning_rate": 6.988734902458689e-06, "loss": 0.0655, "step": 106310 }, { "epoch": 2.1642748091603052, "grad_norm": 12.54872829796295, "learning_rate": 6.9880829394834814e-06, "loss": 0.1685, "step": 106320 }, { "epoch": 2.1644783715012723, "grad_norm": 0.2565498947677021, "learning_rate": 6.987430936356379e-06, "loss": 0.1451, "step": 106330 }, { "epoch": 2.1646819338422394, "grad_norm": 0.5398653251460747, "learning_rate": 6.986778893090549e-06, "loss": 0.0574, "step": 106340 }, { "epoch": 2.164885496183206, "grad_norm": 3.464834644840016, "learning_rate": 6.986126809699159e-06, "loss": 0.1044, "step": 106350 }, { "epoch": 2.165089058524173, "grad_norm": 17.783289543576306, "learning_rate": 6.985474686195382e-06, "loss": 0.0928, "step": 106360 }, { "epoch": 2.1652926208651397, "grad_norm": 8.873086748370794, "learning_rate": 6.984822522592385e-06, "loss": 0.2061, "step": 106370 }, { "epoch": 2.165496183206107, "grad_norm": 3.518193088294831, "learning_rate": 6.984170318903341e-06, "loss": 0.0589, "step": 106380 }, { "epoch": 2.165699745547074, "grad_norm": 7.499931188727395, "learning_rate": 6.983518075141422e-06, "loss": 0.1542, "step": 106390 }, { "epoch": 2.1659033078880405, "grad_norm": 4.796643480867599, "learning_rate": 6.9828657913198004e-06, "loss": 0.1914, "step": 106400 }, { "epoch": 2.1661068702290076, "grad_norm": 6.980929418412035, "learning_rate": 6.98221346745165e-06, "loss": 0.1916, "step": 106410 }, { "epoch": 2.1663104325699747, "grad_norm": 2.857835361322475, "learning_rate": 6.9815611035501465e-06, "loss": 0.124, "step": 106420 }, { "epoch": 2.1665139949109413, "grad_norm": 24.5372004357944, "learning_rate": 6.9809086996284636e-06, "loss": 0.077, "step": 106430 }, { "epoch": 2.1667175572519084, "grad_norm": 0.32261699069828026, "learning_rate": 6.980256255699779e-06, "loss": 0.0592, "step": 106440 }, { "epoch": 2.1669211195928755, "grad_norm": 0.18763180290990028, "learning_rate": 6.979603771777268e-06, "loss": 0.1453, "step": 106450 }, { "epoch": 2.167124681933842, "grad_norm": 0.16913975920791569, "learning_rate": 6.9789512478741095e-06, "loss": 0.1651, "step": 106460 }, { "epoch": 2.167328244274809, "grad_norm": 38.967730102160985, "learning_rate": 6.978298684003481e-06, "loss": 0.1373, "step": 106470 }, { "epoch": 2.1675318066157763, "grad_norm": 1.4176287513469161, "learning_rate": 6.977646080178563e-06, "loss": 0.0804, "step": 106480 }, { "epoch": 2.167735368956743, "grad_norm": 11.476492673845987, "learning_rate": 6.976993436412536e-06, "loss": 0.073, "step": 106490 }, { "epoch": 2.16793893129771, "grad_norm": 6.9733509816438595, "learning_rate": 6.976340752718579e-06, "loss": 0.1675, "step": 106500 }, { "epoch": 2.168142493638677, "grad_norm": 15.144775925725678, "learning_rate": 6.975688029109875e-06, "loss": 0.1364, "step": 106510 }, { "epoch": 2.1683460559796437, "grad_norm": 7.530950657633596, "learning_rate": 6.975035265599607e-06, "loss": 0.1078, "step": 106520 }, { "epoch": 2.1685496183206108, "grad_norm": 7.75422140206986, "learning_rate": 6.974382462200957e-06, "loss": 0.1782, "step": 106530 }, { "epoch": 2.1687531806615774, "grad_norm": 12.155169539427009, "learning_rate": 6.973729618927112e-06, "loss": 0.2065, "step": 106540 }, { "epoch": 2.1689567430025445, "grad_norm": 0.03894910167327191, "learning_rate": 6.973076735791254e-06, "loss": 0.196, "step": 106550 }, { "epoch": 2.1691603053435116, "grad_norm": 0.03607140626073714, "learning_rate": 6.97242381280657e-06, "loss": 0.1672, "step": 106560 }, { "epoch": 2.169363867684478, "grad_norm": 13.62340237557532, "learning_rate": 6.971770849986248e-06, "loss": 0.1317, "step": 106570 }, { "epoch": 2.1695674300254453, "grad_norm": 0.16609544279875577, "learning_rate": 6.971117847343473e-06, "loss": 0.1257, "step": 106580 }, { "epoch": 2.1697709923664124, "grad_norm": 15.27855853316031, "learning_rate": 6.970464804891434e-06, "loss": 0.0631, "step": 106590 }, { "epoch": 2.169974554707379, "grad_norm": 0.26758206001640095, "learning_rate": 6.96981172264332e-06, "loss": 0.1426, "step": 106600 }, { "epoch": 2.170178117048346, "grad_norm": 0.02345812821309395, "learning_rate": 6.9691586006123225e-06, "loss": 0.1009, "step": 106610 }, { "epoch": 2.170381679389313, "grad_norm": 18.231813341017045, "learning_rate": 6.96850543881163e-06, "loss": 0.1096, "step": 106620 }, { "epoch": 2.17058524173028, "grad_norm": 12.275378370980745, "learning_rate": 6.967852237254435e-06, "loss": 0.2356, "step": 106630 }, { "epoch": 2.170788804071247, "grad_norm": 2.239522541048679, "learning_rate": 6.967198995953929e-06, "loss": 0.0767, "step": 106640 }, { "epoch": 2.170992366412214, "grad_norm": 0.14670836950428992, "learning_rate": 6.966545714923306e-06, "loss": 0.0925, "step": 106650 }, { "epoch": 2.1711959287531806, "grad_norm": 0.6324812632985541, "learning_rate": 6.965892394175759e-06, "loss": 0.1305, "step": 106660 }, { "epoch": 2.1713994910941476, "grad_norm": 1.4121474486223125, "learning_rate": 6.9652390337244845e-06, "loss": 0.1266, "step": 106670 }, { "epoch": 2.1716030534351143, "grad_norm": 13.309572833137722, "learning_rate": 6.964585633582675e-06, "loss": 0.1368, "step": 106680 }, { "epoch": 2.1718066157760814, "grad_norm": 1.2707778349537349, "learning_rate": 6.963932193763529e-06, "loss": 0.1638, "step": 106690 }, { "epoch": 2.1720101781170484, "grad_norm": 0.2599880192223732, "learning_rate": 6.963278714280241e-06, "loss": 0.1233, "step": 106700 }, { "epoch": 2.172213740458015, "grad_norm": 45.39655588694877, "learning_rate": 6.962625195146013e-06, "loss": 0.1534, "step": 106710 }, { "epoch": 2.172417302798982, "grad_norm": 83.56874881069336, "learning_rate": 6.96197163637404e-06, "loss": 0.1778, "step": 106720 }, { "epoch": 2.1726208651399492, "grad_norm": 7.124379311093645, "learning_rate": 6.9613180379775224e-06, "loss": 0.0616, "step": 106730 }, { "epoch": 2.172824427480916, "grad_norm": 6.818908614067591, "learning_rate": 6.960664399969663e-06, "loss": 0.1348, "step": 106740 }, { "epoch": 2.173027989821883, "grad_norm": 11.845895399434138, "learning_rate": 6.960010722363659e-06, "loss": 0.0834, "step": 106750 }, { "epoch": 2.17323155216285, "grad_norm": 15.797356370468309, "learning_rate": 6.959357005172713e-06, "loss": 0.0837, "step": 106760 }, { "epoch": 2.1734351145038167, "grad_norm": 0.5093388158510336, "learning_rate": 6.9587032484100324e-06, "loss": 0.1202, "step": 106770 }, { "epoch": 2.1736386768447837, "grad_norm": 0.41918694367743237, "learning_rate": 6.958049452088814e-06, "loss": 0.1504, "step": 106780 }, { "epoch": 2.173842239185751, "grad_norm": 27.15022450595198, "learning_rate": 6.9573956162222655e-06, "loss": 0.1319, "step": 106790 }, { "epoch": 2.1740458015267174, "grad_norm": 2.525025327558665, "learning_rate": 6.956741740823591e-06, "loss": 0.1567, "step": 106800 }, { "epoch": 2.1742493638676845, "grad_norm": 0.1358150210253992, "learning_rate": 6.956087825905997e-06, "loss": 0.1838, "step": 106810 }, { "epoch": 2.1744529262086516, "grad_norm": 2.017488032957081, "learning_rate": 6.955433871482691e-06, "loss": 0.0614, "step": 106820 }, { "epoch": 2.1746564885496182, "grad_norm": 25.459433102593426, "learning_rate": 6.95477987756688e-06, "loss": 0.1597, "step": 106830 }, { "epoch": 2.1748600508905853, "grad_norm": 1.1162683517849112, "learning_rate": 6.954125844171768e-06, "loss": 0.2594, "step": 106840 }, { "epoch": 2.1750636132315524, "grad_norm": 13.745767647502038, "learning_rate": 6.95347177131057e-06, "loss": 0.1936, "step": 106850 }, { "epoch": 2.175267175572519, "grad_norm": 0.1723909323191177, "learning_rate": 6.9528176589964945e-06, "loss": 0.0738, "step": 106860 }, { "epoch": 2.175470737913486, "grad_norm": 0.02868233480720207, "learning_rate": 6.9521635072427495e-06, "loss": 0.0503, "step": 106870 }, { "epoch": 2.1756743002544527, "grad_norm": 4.023985126938715, "learning_rate": 6.951509316062549e-06, "loss": 0.1272, "step": 106880 }, { "epoch": 2.17587786259542, "grad_norm": 66.31332562467101, "learning_rate": 6.950855085469104e-06, "loss": 0.0986, "step": 106890 }, { "epoch": 2.176081424936387, "grad_norm": 0.15076345008205444, "learning_rate": 6.950200815475628e-06, "loss": 0.0537, "step": 106900 }, { "epoch": 2.1762849872773535, "grad_norm": 0.07962579099412734, "learning_rate": 6.949546506095334e-06, "loss": 0.0998, "step": 106910 }, { "epoch": 2.1764885496183206, "grad_norm": 2.6948374676295894, "learning_rate": 6.948892157341439e-06, "loss": 0.0763, "step": 106920 }, { "epoch": 2.1766921119592877, "grad_norm": 0.022434911303348678, "learning_rate": 6.948237769227154e-06, "loss": 0.1233, "step": 106930 }, { "epoch": 2.1768956743002543, "grad_norm": 9.556697884297146, "learning_rate": 6.9475833417657015e-06, "loss": 0.1965, "step": 106940 }, { "epoch": 2.1770992366412214, "grad_norm": 8.196997121395619, "learning_rate": 6.946928874970292e-06, "loss": 0.0896, "step": 106950 }, { "epoch": 2.1773027989821885, "grad_norm": 16.181560860194065, "learning_rate": 6.946274368854148e-06, "loss": 0.1715, "step": 106960 }, { "epoch": 2.177506361323155, "grad_norm": 25.797223523294516, "learning_rate": 6.945619823430485e-06, "loss": 0.058, "step": 106970 }, { "epoch": 2.177709923664122, "grad_norm": 11.403872557395244, "learning_rate": 6.944965238712524e-06, "loss": 0.1161, "step": 106980 }, { "epoch": 2.177913486005089, "grad_norm": 0.3342979018001001, "learning_rate": 6.944310614713485e-06, "loss": 0.0739, "step": 106990 }, { "epoch": 2.178117048346056, "grad_norm": 0.8375071828651933, "learning_rate": 6.943655951446591e-06, "loss": 0.1111, "step": 107000 }, { "epoch": 2.178320610687023, "grad_norm": 15.008823460578604, "learning_rate": 6.943001248925059e-06, "loss": 0.1731, "step": 107010 }, { "epoch": 2.1785241730279896, "grad_norm": 0.13121337677602424, "learning_rate": 6.942346507162114e-06, "loss": 0.1597, "step": 107020 }, { "epoch": 2.1787277353689567, "grad_norm": 0.2100623049997828, "learning_rate": 6.941691726170982e-06, "loss": 0.1381, "step": 107030 }, { "epoch": 2.1789312977099238, "grad_norm": 8.665489857842692, "learning_rate": 6.941036905964882e-06, "loss": 0.1434, "step": 107040 }, { "epoch": 2.1791348600508904, "grad_norm": 0.010252746152765028, "learning_rate": 6.940382046557043e-06, "loss": 0.1031, "step": 107050 }, { "epoch": 2.1793384223918575, "grad_norm": 10.944868787504921, "learning_rate": 6.93972714796069e-06, "loss": 0.1351, "step": 107060 }, { "epoch": 2.1795419847328246, "grad_norm": 8.731323596582884, "learning_rate": 6.9390722101890476e-06, "loss": 0.107, "step": 107070 }, { "epoch": 2.179745547073791, "grad_norm": 2.965986509911014, "learning_rate": 6.9384172332553445e-06, "loss": 0.1673, "step": 107080 }, { "epoch": 2.1799491094147583, "grad_norm": 0.08590524653095566, "learning_rate": 6.9377622171728095e-06, "loss": 0.1026, "step": 107090 }, { "epoch": 2.1801526717557254, "grad_norm": 1.0145532158489121, "learning_rate": 6.937107161954671e-06, "loss": 0.0824, "step": 107100 }, { "epoch": 2.180356234096692, "grad_norm": 61.94284975380883, "learning_rate": 6.936452067614157e-06, "loss": 0.214, "step": 107110 }, { "epoch": 2.180559796437659, "grad_norm": 19.99805899918086, "learning_rate": 6.9357969341645e-06, "loss": 0.1098, "step": 107120 }, { "epoch": 2.180763358778626, "grad_norm": 8.752994550758016, "learning_rate": 6.935141761618931e-06, "loss": 0.2079, "step": 107130 }, { "epoch": 2.180966921119593, "grad_norm": 5.103444984767552, "learning_rate": 6.934486549990682e-06, "loss": 0.0694, "step": 107140 }, { "epoch": 2.18117048346056, "grad_norm": 9.860443310218688, "learning_rate": 6.933831299292984e-06, "loss": 0.1405, "step": 107150 }, { "epoch": 2.181374045801527, "grad_norm": 1.7132466823034849, "learning_rate": 6.9331760095390734e-06, "loss": 0.0517, "step": 107160 }, { "epoch": 2.1815776081424936, "grad_norm": 15.336988893279681, "learning_rate": 6.932520680742183e-06, "loss": 0.1107, "step": 107170 }, { "epoch": 2.1817811704834607, "grad_norm": 0.3689608362542156, "learning_rate": 6.931865312915549e-06, "loss": 0.1438, "step": 107180 }, { "epoch": 2.1819847328244273, "grad_norm": 0.0231817626597961, "learning_rate": 6.931209906072406e-06, "loss": 0.008, "step": 107190 }, { "epoch": 2.1821882951653944, "grad_norm": 15.236959454890684, "learning_rate": 6.930554460225993e-06, "loss": 0.1968, "step": 107200 }, { "epoch": 2.1823918575063614, "grad_norm": 12.151163323120086, "learning_rate": 6.929898975389544e-06, "loss": 0.1825, "step": 107210 }, { "epoch": 2.182595419847328, "grad_norm": 7.349905395661819, "learning_rate": 6.9292434515763e-06, "loss": 0.118, "step": 107220 }, { "epoch": 2.182798982188295, "grad_norm": 7.358834229900096, "learning_rate": 6.928587888799501e-06, "loss": 0.0892, "step": 107230 }, { "epoch": 2.1830025445292622, "grad_norm": 13.876106569408371, "learning_rate": 6.927932287072384e-06, "loss": 0.2281, "step": 107240 }, { "epoch": 2.183206106870229, "grad_norm": 20.888951735313135, "learning_rate": 6.927276646408192e-06, "loss": 0.1141, "step": 107250 }, { "epoch": 2.183409669211196, "grad_norm": 5.5489555888082, "learning_rate": 6.926620966820166e-06, "loss": 0.0943, "step": 107260 }, { "epoch": 2.183613231552163, "grad_norm": 14.27569583695767, "learning_rate": 6.925965248321548e-06, "loss": 0.1641, "step": 107270 }, { "epoch": 2.1838167938931297, "grad_norm": 7.633634311827147, "learning_rate": 6.925309490925582e-06, "loss": 0.1174, "step": 107280 }, { "epoch": 2.1840203562340967, "grad_norm": 17.655249823966432, "learning_rate": 6.92465369464551e-06, "loss": 0.1197, "step": 107290 }, { "epoch": 2.184223918575064, "grad_norm": 21.3616726442355, "learning_rate": 6.923997859494577e-06, "loss": 0.106, "step": 107300 }, { "epoch": 2.1844274809160305, "grad_norm": 17.548028422663513, "learning_rate": 6.92334198548603e-06, "loss": 0.1344, "step": 107310 }, { "epoch": 2.1846310432569975, "grad_norm": 13.143370262687132, "learning_rate": 6.922686072633115e-06, "loss": 0.1976, "step": 107320 }, { "epoch": 2.184834605597964, "grad_norm": 11.290000552099514, "learning_rate": 6.922030120949077e-06, "loss": 0.1385, "step": 107330 }, { "epoch": 2.1850381679389312, "grad_norm": 16.378572233284896, "learning_rate": 6.921374130447165e-06, "loss": 0.1441, "step": 107340 }, { "epoch": 2.1852417302798983, "grad_norm": 0.7110187438139832, "learning_rate": 6.92071810114063e-06, "loss": 0.0592, "step": 107350 }, { "epoch": 2.185445292620865, "grad_norm": 0.8359929704336097, "learning_rate": 6.920062033042718e-06, "loss": 0.1295, "step": 107360 }, { "epoch": 2.185648854961832, "grad_norm": 0.21373589790516104, "learning_rate": 6.919405926166679e-06, "loss": 0.0285, "step": 107370 }, { "epoch": 2.185852417302799, "grad_norm": 8.695388466063234, "learning_rate": 6.918749780525766e-06, "loss": 0.207, "step": 107380 }, { "epoch": 2.1860559796437657, "grad_norm": 0.70375104339085, "learning_rate": 6.918093596133231e-06, "loss": 0.0538, "step": 107390 }, { "epoch": 2.186259541984733, "grad_norm": 10.128487400170656, "learning_rate": 6.9174373730023225e-06, "loss": 0.204, "step": 107400 }, { "epoch": 2.1864631043257, "grad_norm": 14.903302211721407, "learning_rate": 6.9167811111463e-06, "loss": 0.1671, "step": 107410 }, { "epoch": 2.1866666666666665, "grad_norm": 0.6514406782356598, "learning_rate": 6.916124810578411e-06, "loss": 0.1548, "step": 107420 }, { "epoch": 2.1868702290076336, "grad_norm": 0.22818881253133985, "learning_rate": 6.915468471311917e-06, "loss": 0.0646, "step": 107430 }, { "epoch": 2.1870737913486007, "grad_norm": 0.8129372409150594, "learning_rate": 6.914812093360067e-06, "loss": 0.1669, "step": 107440 }, { "epoch": 2.1872773536895673, "grad_norm": 0.039556682509769, "learning_rate": 6.914155676736122e-06, "loss": 0.114, "step": 107450 }, { "epoch": 2.1874809160305344, "grad_norm": 0.349365102514365, "learning_rate": 6.913499221453337e-06, "loss": 0.1109, "step": 107460 }, { "epoch": 2.1876844783715015, "grad_norm": 1.7541126098733026, "learning_rate": 6.912842727524971e-06, "loss": 0.0211, "step": 107470 }, { "epoch": 2.187888040712468, "grad_norm": 0.018012026442917963, "learning_rate": 6.912186194964282e-06, "loss": 0.1132, "step": 107480 }, { "epoch": 2.188091603053435, "grad_norm": 30.551267963538315, "learning_rate": 6.911529623784532e-06, "loss": 0.1741, "step": 107490 }, { "epoch": 2.188295165394402, "grad_norm": 0.31974663600475567, "learning_rate": 6.910873013998976e-06, "loss": 0.137, "step": 107500 }, { "epoch": 2.188498727735369, "grad_norm": 8.079906181069582, "learning_rate": 6.91021636562088e-06, "loss": 0.0917, "step": 107510 }, { "epoch": 2.188702290076336, "grad_norm": 0.08247177889236802, "learning_rate": 6.909559678663504e-06, "loss": 0.071, "step": 107520 }, { "epoch": 2.1889058524173026, "grad_norm": 29.513621511504347, "learning_rate": 6.908902953140111e-06, "loss": 0.122, "step": 107530 }, { "epoch": 2.1891094147582697, "grad_norm": 41.43115434551068, "learning_rate": 6.908246189063963e-06, "loss": 0.1903, "step": 107540 }, { "epoch": 2.189312977099237, "grad_norm": 0.6615751424536648, "learning_rate": 6.907589386448328e-06, "loss": 0.1613, "step": 107550 }, { "epoch": 2.1895165394402034, "grad_norm": 0.701515137118789, "learning_rate": 6.906932545306466e-06, "loss": 0.1212, "step": 107560 }, { "epoch": 2.1897201017811705, "grad_norm": 1.3074266516062338, "learning_rate": 6.906275665651646e-06, "loss": 0.1011, "step": 107570 }, { "epoch": 2.1899236641221376, "grad_norm": 19.465889987140915, "learning_rate": 6.905618747497134e-06, "loss": 0.1433, "step": 107580 }, { "epoch": 2.190127226463104, "grad_norm": 12.819170207178754, "learning_rate": 6.904961790856196e-06, "loss": 0.1865, "step": 107590 }, { "epoch": 2.1903307888040713, "grad_norm": 15.655808498709632, "learning_rate": 6.904304795742101e-06, "loss": 0.1645, "step": 107600 }, { "epoch": 2.1905343511450384, "grad_norm": 4.090755446507137, "learning_rate": 6.90364776216812e-06, "loss": 0.1585, "step": 107610 }, { "epoch": 2.190737913486005, "grad_norm": 0.1534653982156059, "learning_rate": 6.902990690147519e-06, "loss": 0.1034, "step": 107620 }, { "epoch": 2.190941475826972, "grad_norm": 0.29997397279886184, "learning_rate": 6.90233357969357e-06, "loss": 0.104, "step": 107630 }, { "epoch": 2.1911450381679387, "grad_norm": 0.076346386837662, "learning_rate": 6.901676430819544e-06, "loss": 0.0491, "step": 107640 }, { "epoch": 2.191348600508906, "grad_norm": 12.707086024315265, "learning_rate": 6.901019243538714e-06, "loss": 0.1098, "step": 107650 }, { "epoch": 2.191552162849873, "grad_norm": 0.8951818230480746, "learning_rate": 6.90036201786435e-06, "loss": 0.183, "step": 107660 }, { "epoch": 2.1917557251908395, "grad_norm": 23.332568710911758, "learning_rate": 6.899704753809729e-06, "loss": 0.1273, "step": 107670 }, { "epoch": 2.1919592875318066, "grad_norm": 22.319638663488597, "learning_rate": 6.899047451388123e-06, "loss": 0.2008, "step": 107680 }, { "epoch": 2.1921628498727737, "grad_norm": 0.09314972199549296, "learning_rate": 6.8983901106128095e-06, "loss": 0.0879, "step": 107690 }, { "epoch": 2.1923664122137403, "grad_norm": 0.5236866787820122, "learning_rate": 6.897732731497061e-06, "loss": 0.1419, "step": 107700 }, { "epoch": 2.1925699745547074, "grad_norm": 0.4269496102334907, "learning_rate": 6.897075314054155e-06, "loss": 0.1194, "step": 107710 }, { "epoch": 2.1927735368956744, "grad_norm": 0.7861872635691541, "learning_rate": 6.89641785829737e-06, "loss": 0.063, "step": 107720 }, { "epoch": 2.192977099236641, "grad_norm": 3.728187424624978, "learning_rate": 6.895760364239985e-06, "loss": 0.1248, "step": 107730 }, { "epoch": 2.193180661577608, "grad_norm": 19.057585534010688, "learning_rate": 6.8951028318952775e-06, "loss": 0.0664, "step": 107740 }, { "epoch": 2.1933842239185752, "grad_norm": 45.56806668481571, "learning_rate": 6.894445261276527e-06, "loss": 0.1677, "step": 107750 }, { "epoch": 2.193587786259542, "grad_norm": 3.2798867471767146, "learning_rate": 6.893787652397014e-06, "loss": 0.0647, "step": 107760 }, { "epoch": 2.193791348600509, "grad_norm": 11.954731192341988, "learning_rate": 6.893130005270021e-06, "loss": 0.0905, "step": 107770 }, { "epoch": 2.193994910941476, "grad_norm": 0.42002879253308645, "learning_rate": 6.89247231990883e-06, "loss": 0.1473, "step": 107780 }, { "epoch": 2.1941984732824427, "grad_norm": 14.177441447723664, "learning_rate": 6.89181459632672e-06, "loss": 0.068, "step": 107790 }, { "epoch": 2.1944020356234097, "grad_norm": 8.130765919416069, "learning_rate": 6.891156834536979e-06, "loss": 0.1739, "step": 107800 }, { "epoch": 2.194605597964377, "grad_norm": 0.0522349948259633, "learning_rate": 6.890499034552892e-06, "loss": 0.1162, "step": 107810 }, { "epoch": 2.1948091603053435, "grad_norm": 16.043847208484422, "learning_rate": 6.88984119638774e-06, "loss": 0.1682, "step": 107820 }, { "epoch": 2.1950127226463105, "grad_norm": 2.5755770341887696, "learning_rate": 6.889183320054811e-06, "loss": 0.2195, "step": 107830 }, { "epoch": 2.195216284987277, "grad_norm": 21.178311810024823, "learning_rate": 6.8885254055673926e-06, "loss": 0.2083, "step": 107840 }, { "epoch": 2.1954198473282442, "grad_norm": 0.8919384703071616, "learning_rate": 6.88786745293877e-06, "loss": 0.0567, "step": 107850 }, { "epoch": 2.1956234096692113, "grad_norm": 23.714297741504723, "learning_rate": 6.887209462182234e-06, "loss": 0.143, "step": 107860 }, { "epoch": 2.195826972010178, "grad_norm": 31.64071334645476, "learning_rate": 6.886551433311071e-06, "loss": 0.0838, "step": 107870 }, { "epoch": 2.196030534351145, "grad_norm": 0.34583439939272226, "learning_rate": 6.885893366338572e-06, "loss": 0.0529, "step": 107880 }, { "epoch": 2.196234096692112, "grad_norm": 11.875723150840841, "learning_rate": 6.885235261278027e-06, "loss": 0.1294, "step": 107890 }, { "epoch": 2.1964376590330787, "grad_norm": 3.906923504743997, "learning_rate": 6.8845771181427305e-06, "loss": 0.0899, "step": 107900 }, { "epoch": 2.196641221374046, "grad_norm": 32.28685466389158, "learning_rate": 6.883918936945968e-06, "loss": 0.0865, "step": 107910 }, { "epoch": 2.196844783715013, "grad_norm": 1.4659045868756648, "learning_rate": 6.883260717701039e-06, "loss": 0.1323, "step": 107920 }, { "epoch": 2.1970483460559795, "grad_norm": 21.899942638664896, "learning_rate": 6.882602460421233e-06, "loss": 0.2028, "step": 107930 }, { "epoch": 2.1972519083969466, "grad_norm": 0.35801736059680705, "learning_rate": 6.881944165119846e-06, "loss": 0.14, "step": 107940 }, { "epoch": 2.1974554707379133, "grad_norm": 0.03375670312005525, "learning_rate": 6.881285831810171e-06, "loss": 0.1122, "step": 107950 }, { "epoch": 2.1976590330788803, "grad_norm": 69.229569977609, "learning_rate": 6.8806274605055075e-06, "loss": 0.104, "step": 107960 }, { "epoch": 2.1978625954198474, "grad_norm": 30.301321360946545, "learning_rate": 6.879969051219149e-06, "loss": 0.023, "step": 107970 }, { "epoch": 2.198066157760814, "grad_norm": 7.900065633765849, "learning_rate": 6.879310603964396e-06, "loss": 0.1531, "step": 107980 }, { "epoch": 2.198269720101781, "grad_norm": 1.6328920742230393, "learning_rate": 6.878652118754544e-06, "loss": 0.1608, "step": 107990 }, { "epoch": 2.198473282442748, "grad_norm": 0.8169358026497484, "learning_rate": 6.877993595602891e-06, "loss": 0.1256, "step": 108000 }, { "epoch": 2.198676844783715, "grad_norm": 1.7056735156351157, "learning_rate": 6.8773350345227405e-06, "loss": 0.1012, "step": 108010 }, { "epoch": 2.198880407124682, "grad_norm": 11.93957249151992, "learning_rate": 6.876676435527391e-06, "loss": 0.138, "step": 108020 }, { "epoch": 2.199083969465649, "grad_norm": 33.085068374585596, "learning_rate": 6.876017798630142e-06, "loss": 0.0811, "step": 108030 }, { "epoch": 2.1992875318066156, "grad_norm": 0.3566568258839887, "learning_rate": 6.875359123844298e-06, "loss": 0.0904, "step": 108040 }, { "epoch": 2.1994910941475827, "grad_norm": 20.06835565459806, "learning_rate": 6.874700411183162e-06, "loss": 0.1669, "step": 108050 }, { "epoch": 2.19969465648855, "grad_norm": 0.14479499979192964, "learning_rate": 6.874041660660036e-06, "loss": 0.0692, "step": 108060 }, { "epoch": 2.1998982188295164, "grad_norm": 0.0857603301810488, "learning_rate": 6.873382872288226e-06, "loss": 0.1685, "step": 108070 }, { "epoch": 2.2001017811704835, "grad_norm": 23.99301906259013, "learning_rate": 6.872724046081033e-06, "loss": 0.1607, "step": 108080 }, { "epoch": 2.2003053435114506, "grad_norm": 12.959080959341067, "learning_rate": 6.872065182051768e-06, "loss": 0.0635, "step": 108090 }, { "epoch": 2.200508905852417, "grad_norm": 0.7326916318610902, "learning_rate": 6.871406280213736e-06, "loss": 0.1377, "step": 108100 }, { "epoch": 2.2007124681933843, "grad_norm": 0.8863924351089444, "learning_rate": 6.8707473405802425e-06, "loss": 0.0821, "step": 108110 }, { "epoch": 2.2009160305343514, "grad_norm": 1.7851245527565702, "learning_rate": 6.870088363164598e-06, "loss": 0.0614, "step": 108120 }, { "epoch": 2.201119592875318, "grad_norm": 8.2674246441354, "learning_rate": 6.86942934798011e-06, "loss": 0.0699, "step": 108130 }, { "epoch": 2.201323155216285, "grad_norm": 13.688992492569316, "learning_rate": 6.868770295040088e-06, "loss": 0.1686, "step": 108140 }, { "epoch": 2.2015267175572517, "grad_norm": 8.966846211299748, "learning_rate": 6.8681112043578425e-06, "loss": 0.1533, "step": 108150 }, { "epoch": 2.201730279898219, "grad_norm": 0.09294660245164685, "learning_rate": 6.867452075946685e-06, "loss": 0.093, "step": 108160 }, { "epoch": 2.201933842239186, "grad_norm": 0.1020438306570142, "learning_rate": 6.866792909819929e-06, "loss": 0.1393, "step": 108170 }, { "epoch": 2.2021374045801525, "grad_norm": 0.10190341860714269, "learning_rate": 6.866133705990886e-06, "loss": 0.2342, "step": 108180 }, { "epoch": 2.2023409669211196, "grad_norm": 0.049776669737325, "learning_rate": 6.865474464472869e-06, "loss": 0.0521, "step": 108190 }, { "epoch": 2.2025445292620867, "grad_norm": 0.04907756765384877, "learning_rate": 6.86481518527919e-06, "loss": 0.0942, "step": 108200 }, { "epoch": 2.2027480916030533, "grad_norm": 20.762989973901362, "learning_rate": 6.86415586842317e-06, "loss": 0.1523, "step": 108210 }, { "epoch": 2.2029516539440204, "grad_norm": 0.49586465801910135, "learning_rate": 6.86349651391812e-06, "loss": 0.1751, "step": 108220 }, { "epoch": 2.2031552162849874, "grad_norm": 0.26568766668756943, "learning_rate": 6.862837121777356e-06, "loss": 0.1622, "step": 108230 }, { "epoch": 2.203358778625954, "grad_norm": 11.608656804438606, "learning_rate": 6.862177692014198e-06, "loss": 0.0669, "step": 108240 }, { "epoch": 2.203562340966921, "grad_norm": 17.30831863743022, "learning_rate": 6.861518224641964e-06, "loss": 0.209, "step": 108250 }, { "epoch": 2.2037659033078882, "grad_norm": 0.3013038130579816, "learning_rate": 6.86085871967397e-06, "loss": 0.1031, "step": 108260 }, { "epoch": 2.203969465648855, "grad_norm": 10.087373063451196, "learning_rate": 6.860199177123541e-06, "loss": 0.1069, "step": 108270 }, { "epoch": 2.204173027989822, "grad_norm": 0.042022926470143766, "learning_rate": 6.85953959700399e-06, "loss": 0.1292, "step": 108280 }, { "epoch": 2.2043765903307886, "grad_norm": 16.680045840358094, "learning_rate": 6.858879979328643e-06, "loss": 0.1933, "step": 108290 }, { "epoch": 2.2045801526717557, "grad_norm": 8.546863267397095, "learning_rate": 6.858220324110821e-06, "loss": 0.195, "step": 108300 }, { "epoch": 2.2047837150127227, "grad_norm": 20.291803600591262, "learning_rate": 6.857560631363847e-06, "loss": 0.2319, "step": 108310 }, { "epoch": 2.2049872773536894, "grad_norm": 0.059395723360051264, "learning_rate": 6.856900901101041e-06, "loss": 0.0893, "step": 108320 }, { "epoch": 2.2051908396946565, "grad_norm": 1.8596895811165592, "learning_rate": 6.856241133335731e-06, "loss": 0.1736, "step": 108330 }, { "epoch": 2.2053944020356235, "grad_norm": 17.52957397032139, "learning_rate": 6.85558132808124e-06, "loss": 0.191, "step": 108340 }, { "epoch": 2.20559796437659, "grad_norm": 0.22807691302672262, "learning_rate": 6.854921485350895e-06, "loss": 0.061, "step": 108350 }, { "epoch": 2.2058015267175572, "grad_norm": 0.23962653722616212, "learning_rate": 6.854261605158023e-06, "loss": 0.0884, "step": 108360 }, { "epoch": 2.2060050890585243, "grad_norm": 4.333545603476203, "learning_rate": 6.853601687515945e-06, "loss": 0.1551, "step": 108370 }, { "epoch": 2.206208651399491, "grad_norm": 37.14240003140422, "learning_rate": 6.852941732437997e-06, "loss": 0.0861, "step": 108380 }, { "epoch": 2.206412213740458, "grad_norm": 10.156129807088648, "learning_rate": 6.852281739937504e-06, "loss": 0.119, "step": 108390 }, { "epoch": 2.206615776081425, "grad_norm": 3.514628556009685, "learning_rate": 6.8516217100277945e-06, "loss": 0.0962, "step": 108400 }, { "epoch": 2.2068193384223918, "grad_norm": 0.9230429734630293, "learning_rate": 6.8509616427222e-06, "loss": 0.1364, "step": 108410 }, { "epoch": 2.207022900763359, "grad_norm": 16.509519682973192, "learning_rate": 6.850301538034052e-06, "loss": 0.1174, "step": 108420 }, { "epoch": 2.207226463104326, "grad_norm": 0.21671682844531187, "learning_rate": 6.84964139597668e-06, "loss": 0.1285, "step": 108430 }, { "epoch": 2.2074300254452925, "grad_norm": 0.05589724196772821, "learning_rate": 6.848981216563417e-06, "loss": 0.1304, "step": 108440 }, { "epoch": 2.2076335877862596, "grad_norm": 11.742248825862177, "learning_rate": 6.848320999807598e-06, "loss": 0.0868, "step": 108450 }, { "epoch": 2.2078371501272263, "grad_norm": 28.492004887147843, "learning_rate": 6.847660745722556e-06, "loss": 0.1096, "step": 108460 }, { "epoch": 2.2080407124681933, "grad_norm": 0.6159274580286306, "learning_rate": 6.847000454321626e-06, "loss": 0.1693, "step": 108470 }, { "epoch": 2.2082442748091604, "grad_norm": 14.93279334183233, "learning_rate": 6.846340125618142e-06, "loss": 0.1919, "step": 108480 }, { "epoch": 2.208447837150127, "grad_norm": 2.1513726928995873, "learning_rate": 6.845679759625441e-06, "loss": 0.0641, "step": 108490 }, { "epoch": 2.208651399491094, "grad_norm": 88.09700392329634, "learning_rate": 6.845019356356861e-06, "loss": 0.1388, "step": 108500 }, { "epoch": 2.208854961832061, "grad_norm": 6.3447546796270196, "learning_rate": 6.844358915825739e-06, "loss": 0.232, "step": 108510 }, { "epoch": 2.209058524173028, "grad_norm": 20.118236921251796, "learning_rate": 6.843698438045411e-06, "loss": 0.0754, "step": 108520 }, { "epoch": 2.209262086513995, "grad_norm": 18.806854315578427, "learning_rate": 6.84303792302922e-06, "loss": 0.0948, "step": 108530 }, { "epoch": 2.209465648854962, "grad_norm": 9.157149471528632, "learning_rate": 6.842377370790503e-06, "loss": 0.1502, "step": 108540 }, { "epoch": 2.2096692111959286, "grad_norm": 3.5199650053338667, "learning_rate": 6.8417167813426035e-06, "loss": 0.2613, "step": 108550 }, { "epoch": 2.2098727735368957, "grad_norm": 6.902045374795431, "learning_rate": 6.841056154698862e-06, "loss": 0.0802, "step": 108560 }, { "epoch": 2.210076335877863, "grad_norm": 30.2389932719803, "learning_rate": 6.840395490872618e-06, "loss": 0.1378, "step": 108570 }, { "epoch": 2.2102798982188294, "grad_norm": 11.994855710332667, "learning_rate": 6.839734789877217e-06, "loss": 0.138, "step": 108580 }, { "epoch": 2.2104834605597965, "grad_norm": 1.8605385104968195, "learning_rate": 6.839074051726005e-06, "loss": 0.0929, "step": 108590 }, { "epoch": 2.210687022900763, "grad_norm": 15.193467914759546, "learning_rate": 6.8384132764323204e-06, "loss": 0.0945, "step": 108600 }, { "epoch": 2.21089058524173, "grad_norm": 7.001717024169164, "learning_rate": 6.837752464009514e-06, "loss": 0.1103, "step": 108610 }, { "epoch": 2.2110941475826973, "grad_norm": 30.95099456586675, "learning_rate": 6.837091614470929e-06, "loss": 0.159, "step": 108620 }, { "epoch": 2.211297709923664, "grad_norm": 1.1817390258109473, "learning_rate": 6.836430727829912e-06, "loss": 0.133, "step": 108630 }, { "epoch": 2.211501272264631, "grad_norm": 0.16232601051955536, "learning_rate": 6.835769804099812e-06, "loss": 0.0997, "step": 108640 }, { "epoch": 2.211704834605598, "grad_norm": 1.7009171669973338, "learning_rate": 6.835108843293977e-06, "loss": 0.0697, "step": 108650 }, { "epoch": 2.2119083969465647, "grad_norm": 7.782334196887586, "learning_rate": 6.8344478454257535e-06, "loss": 0.173, "step": 108660 }, { "epoch": 2.212111959287532, "grad_norm": 7.607121383765297, "learning_rate": 6.833786810508495e-06, "loss": 0.2087, "step": 108670 }, { "epoch": 2.212315521628499, "grad_norm": 0.12003884777868665, "learning_rate": 6.833125738555548e-06, "loss": 0.1255, "step": 108680 }, { "epoch": 2.2125190839694655, "grad_norm": 12.216763710786504, "learning_rate": 6.832464629580267e-06, "loss": 0.1867, "step": 108690 }, { "epoch": 2.2127226463104326, "grad_norm": 7.899416675380144, "learning_rate": 6.8318034835960025e-06, "loss": 0.1838, "step": 108700 }, { "epoch": 2.2129262086513997, "grad_norm": 4.9986998490968775, "learning_rate": 6.8311423006161075e-06, "loss": 0.157, "step": 108710 }, { "epoch": 2.2131297709923663, "grad_norm": 19.419296809147582, "learning_rate": 6.830481080653935e-06, "loss": 0.1074, "step": 108720 }, { "epoch": 2.2133333333333334, "grad_norm": 9.268257300425446, "learning_rate": 6.829819823722838e-06, "loss": 0.1301, "step": 108730 }, { "epoch": 2.2135368956743005, "grad_norm": 0.18392020825707833, "learning_rate": 6.829158529836174e-06, "loss": 0.0557, "step": 108740 }, { "epoch": 2.213740458015267, "grad_norm": 7.557297276339834, "learning_rate": 6.828497199007298e-06, "loss": 0.1297, "step": 108750 }, { "epoch": 2.213944020356234, "grad_norm": 11.206185347592731, "learning_rate": 6.8278358312495665e-06, "loss": 0.0982, "step": 108760 }, { "epoch": 2.2141475826972012, "grad_norm": 2.0478173913226563, "learning_rate": 6.8271744265763354e-06, "loss": 0.1159, "step": 108770 }, { "epoch": 2.214351145038168, "grad_norm": 11.346764602997043, "learning_rate": 6.826512985000961e-06, "loss": 0.1186, "step": 108780 }, { "epoch": 2.214554707379135, "grad_norm": 1.0691539041702613, "learning_rate": 6.825851506536808e-06, "loss": 0.0461, "step": 108790 }, { "epoch": 2.2147582697201016, "grad_norm": 3.670713971509417, "learning_rate": 6.825189991197231e-06, "loss": 0.1766, "step": 108800 }, { "epoch": 2.2149618320610687, "grad_norm": 15.601663969036174, "learning_rate": 6.82452843899559e-06, "loss": 0.1547, "step": 108810 }, { "epoch": 2.2151653944020357, "grad_norm": 1.8381944613962697, "learning_rate": 6.8238668499452485e-06, "loss": 0.1054, "step": 108820 }, { "epoch": 2.2153689567430024, "grad_norm": 4.920605804315135, "learning_rate": 6.8232052240595655e-06, "loss": 0.132, "step": 108830 }, { "epoch": 2.2155725190839695, "grad_norm": 42.75811465311905, "learning_rate": 6.822543561351906e-06, "loss": 0.0885, "step": 108840 }, { "epoch": 2.2157760814249365, "grad_norm": 0.38254942275334625, "learning_rate": 6.821881861835633e-06, "loss": 0.0385, "step": 108850 }, { "epoch": 2.215979643765903, "grad_norm": 31.106819920139053, "learning_rate": 6.821220125524108e-06, "loss": 0.185, "step": 108860 }, { "epoch": 2.2161832061068703, "grad_norm": 0.18406174299409447, "learning_rate": 6.820558352430695e-06, "loss": 0.0186, "step": 108870 }, { "epoch": 2.2163867684478373, "grad_norm": 0.3062019831937936, "learning_rate": 6.819896542568765e-06, "loss": 0.099, "step": 108880 }, { "epoch": 2.216590330788804, "grad_norm": 33.503760709801234, "learning_rate": 6.819234695951677e-06, "loss": 0.0621, "step": 108890 }, { "epoch": 2.216793893129771, "grad_norm": 0.2631420168648066, "learning_rate": 6.818572812592803e-06, "loss": 0.1642, "step": 108900 }, { "epoch": 2.216997455470738, "grad_norm": 16.36933620519315, "learning_rate": 6.817910892505508e-06, "loss": 0.1477, "step": 108910 }, { "epoch": 2.2172010178117048, "grad_norm": 38.18068154307705, "learning_rate": 6.817248935703161e-06, "loss": 0.0948, "step": 108920 }, { "epoch": 2.217404580152672, "grad_norm": 23.14420011563677, "learning_rate": 6.816586942199132e-06, "loss": 0.2211, "step": 108930 }, { "epoch": 2.2176081424936385, "grad_norm": 1.318573490479469, "learning_rate": 6.815924912006788e-06, "loss": 0.1314, "step": 108940 }, { "epoch": 2.2178117048346055, "grad_norm": 8.418662084539518, "learning_rate": 6.815262845139503e-06, "loss": 0.0845, "step": 108950 }, { "epoch": 2.2180152671755726, "grad_norm": 0.5704807841685257, "learning_rate": 6.814600741610647e-06, "loss": 0.117, "step": 108960 }, { "epoch": 2.2182188295165393, "grad_norm": 13.749204457932018, "learning_rate": 6.813938601433592e-06, "loss": 0.1376, "step": 108970 }, { "epoch": 2.2184223918575063, "grad_norm": 9.77445734336936, "learning_rate": 6.813276424621708e-06, "loss": 0.1263, "step": 108980 }, { "epoch": 2.2186259541984734, "grad_norm": 23.588478626854148, "learning_rate": 6.812614211188376e-06, "loss": 0.1547, "step": 108990 }, { "epoch": 2.21882951653944, "grad_norm": 0.32461504809589087, "learning_rate": 6.811951961146962e-06, "loss": 0.2036, "step": 109000 }, { "epoch": 2.219033078880407, "grad_norm": 13.130358665027758, "learning_rate": 6.811289674510845e-06, "loss": 0.1747, "step": 109010 }, { "epoch": 2.219236641221374, "grad_norm": 0.046166471336829, "learning_rate": 6.810627351293401e-06, "loss": 0.1204, "step": 109020 }, { "epoch": 2.219440203562341, "grad_norm": 19.18467956831123, "learning_rate": 6.809964991508003e-06, "loss": 0.128, "step": 109030 }, { "epoch": 2.219643765903308, "grad_norm": 23.13479035218266, "learning_rate": 6.809302595168032e-06, "loss": 0.1171, "step": 109040 }, { "epoch": 2.219847328244275, "grad_norm": 0.3598305235517944, "learning_rate": 6.808640162286866e-06, "loss": 0.0543, "step": 109050 }, { "epoch": 2.2200508905852416, "grad_norm": 9.847191283168026, "learning_rate": 6.807977692877882e-06, "loss": 0.1631, "step": 109060 }, { "epoch": 2.2202544529262087, "grad_norm": 2.6790865444040404, "learning_rate": 6.807315186954458e-06, "loss": 0.0931, "step": 109070 }, { "epoch": 2.220458015267176, "grad_norm": 19.619042236417744, "learning_rate": 6.8066526445299785e-06, "loss": 0.1131, "step": 109080 }, { "epoch": 2.2206615776081424, "grad_norm": 16.08055826328669, "learning_rate": 6.80599006561782e-06, "loss": 0.1257, "step": 109090 }, { "epoch": 2.2208651399491095, "grad_norm": 13.52669222433155, "learning_rate": 6.805327450231367e-06, "loss": 0.2147, "step": 109100 }, { "epoch": 2.221068702290076, "grad_norm": 21.36461028765247, "learning_rate": 6.8046647983840005e-06, "loss": 0.1322, "step": 109110 }, { "epoch": 2.221272264631043, "grad_norm": 2.8868846570386566, "learning_rate": 6.804002110089104e-06, "loss": 0.0303, "step": 109120 }, { "epoch": 2.2214758269720103, "grad_norm": 15.176573807289529, "learning_rate": 6.803339385360062e-06, "loss": 0.0921, "step": 109130 }, { "epoch": 2.221679389312977, "grad_norm": 1.7955963918970455, "learning_rate": 6.802676624210258e-06, "loss": 0.1028, "step": 109140 }, { "epoch": 2.221882951653944, "grad_norm": 23.58103380607088, "learning_rate": 6.802013826653077e-06, "loss": 0.1167, "step": 109150 }, { "epoch": 2.222086513994911, "grad_norm": 0.05655464472245563, "learning_rate": 6.801350992701907e-06, "loss": 0.0909, "step": 109160 }, { "epoch": 2.2222900763358777, "grad_norm": 16.320032369641766, "learning_rate": 6.800688122370134e-06, "loss": 0.1677, "step": 109170 }, { "epoch": 2.222493638676845, "grad_norm": 4.458596715910208, "learning_rate": 6.800025215671143e-06, "loss": 0.0837, "step": 109180 }, { "epoch": 2.222697201017812, "grad_norm": 0.020027662987406995, "learning_rate": 6.7993622726183264e-06, "loss": 0.088, "step": 109190 }, { "epoch": 2.2229007633587785, "grad_norm": 30.681554406217604, "learning_rate": 6.79869929322507e-06, "loss": 0.1264, "step": 109200 }, { "epoch": 2.2231043256997456, "grad_norm": 0.038536831135052525, "learning_rate": 6.798036277504765e-06, "loss": 0.0866, "step": 109210 }, { "epoch": 2.2233078880407127, "grad_norm": 4.681539466249195, "learning_rate": 6.797373225470802e-06, "loss": 0.1608, "step": 109220 }, { "epoch": 2.2235114503816793, "grad_norm": 23.15760589724838, "learning_rate": 6.79671013713657e-06, "loss": 0.2277, "step": 109230 }, { "epoch": 2.2237150127226464, "grad_norm": 13.922824897316469, "learning_rate": 6.796047012515463e-06, "loss": 0.214, "step": 109240 }, { "epoch": 2.223918575063613, "grad_norm": 2.3011077733250853, "learning_rate": 6.795383851620876e-06, "loss": 0.0991, "step": 109250 }, { "epoch": 2.22412213740458, "grad_norm": 7.783840513325176, "learning_rate": 6.794720654466197e-06, "loss": 0.1881, "step": 109260 }, { "epoch": 2.224325699745547, "grad_norm": 4.843462478220431, "learning_rate": 6.794057421064822e-06, "loss": 0.0963, "step": 109270 }, { "epoch": 2.224529262086514, "grad_norm": 4.632935061416482, "learning_rate": 6.79339415143015e-06, "loss": 0.1219, "step": 109280 }, { "epoch": 2.224732824427481, "grad_norm": 0.08491418446257215, "learning_rate": 6.792730845575572e-06, "loss": 0.1499, "step": 109290 }, { "epoch": 2.224936386768448, "grad_norm": 13.68953927035632, "learning_rate": 6.792067503514484e-06, "loss": 0.1265, "step": 109300 }, { "epoch": 2.2251399491094146, "grad_norm": 0.1657736753174018, "learning_rate": 6.791404125260286e-06, "loss": 0.1742, "step": 109310 }, { "epoch": 2.2253435114503817, "grad_norm": 13.305986308571729, "learning_rate": 6.7907407108263735e-06, "loss": 0.1612, "step": 109320 }, { "epoch": 2.2255470737913488, "grad_norm": 5.946763932737402, "learning_rate": 6.7900772602261465e-06, "loss": 0.2142, "step": 109330 }, { "epoch": 2.2257506361323154, "grad_norm": 9.883025440872407, "learning_rate": 6.789413773473004e-06, "loss": 0.1564, "step": 109340 }, { "epoch": 2.2259541984732825, "grad_norm": 0.5039386003258964, "learning_rate": 6.7887502505803445e-06, "loss": 0.1029, "step": 109350 }, { "epoch": 2.2261577608142495, "grad_norm": 5.583788661978968, "learning_rate": 6.7880866915615705e-06, "loss": 0.0664, "step": 109360 }, { "epoch": 2.226361323155216, "grad_norm": 0.18124982577828755, "learning_rate": 6.787423096430084e-06, "loss": 0.0977, "step": 109370 }, { "epoch": 2.2265648854961833, "grad_norm": 0.3107426077927888, "learning_rate": 6.786759465199284e-06, "loss": 0.2011, "step": 109380 }, { "epoch": 2.2267684478371503, "grad_norm": 0.06465315747457305, "learning_rate": 6.7860957978825745e-06, "loss": 0.0835, "step": 109390 }, { "epoch": 2.226972010178117, "grad_norm": 1.534582590214798, "learning_rate": 6.785432094493362e-06, "loss": 0.2657, "step": 109400 }, { "epoch": 2.227175572519084, "grad_norm": 15.382011066918084, "learning_rate": 6.784768355045047e-06, "loss": 0.1171, "step": 109410 }, { "epoch": 2.227379134860051, "grad_norm": 6.450345267854205, "learning_rate": 6.784104579551037e-06, "loss": 0.1165, "step": 109420 }, { "epoch": 2.2275826972010178, "grad_norm": 22.397834836036886, "learning_rate": 6.783440768024738e-06, "loss": 0.0761, "step": 109430 }, { "epoch": 2.227786259541985, "grad_norm": 25.204511770824524, "learning_rate": 6.782776920479554e-06, "loss": 0.0764, "step": 109440 }, { "epoch": 2.2279898218829515, "grad_norm": 8.163960643194404, "learning_rate": 6.782113036928897e-06, "loss": 0.0791, "step": 109450 }, { "epoch": 2.2281933842239185, "grad_norm": 62.49069247225434, "learning_rate": 6.781449117386169e-06, "loss": 0.3075, "step": 109460 }, { "epoch": 2.2283969465648856, "grad_norm": 0.0865401884830182, "learning_rate": 6.780785161864783e-06, "loss": 0.1028, "step": 109470 }, { "epoch": 2.2286005089058523, "grad_norm": 33.4725171172065, "learning_rate": 6.780121170378145e-06, "loss": 0.1586, "step": 109480 }, { "epoch": 2.2288040712468193, "grad_norm": 22.272219123803275, "learning_rate": 6.779457142939669e-06, "loss": 0.2103, "step": 109490 }, { "epoch": 2.2290076335877864, "grad_norm": 0.07699944575119404, "learning_rate": 6.778793079562763e-06, "loss": 0.1428, "step": 109500 }, { "epoch": 2.229211195928753, "grad_norm": 0.828673639437156, "learning_rate": 6.778128980260842e-06, "loss": 0.1087, "step": 109510 }, { "epoch": 2.22941475826972, "grad_norm": 28.713332004615246, "learning_rate": 6.777464845047312e-06, "loss": 0.1275, "step": 109520 }, { "epoch": 2.229618320610687, "grad_norm": 35.00081321756221, "learning_rate": 6.776800673935593e-06, "loss": 0.1427, "step": 109530 }, { "epoch": 2.229821882951654, "grad_norm": 12.930269365663184, "learning_rate": 6.776136466939096e-06, "loss": 0.1419, "step": 109540 }, { "epoch": 2.230025445292621, "grad_norm": 25.372490369828846, "learning_rate": 6.775472224071235e-06, "loss": 0.1546, "step": 109550 }, { "epoch": 2.2302290076335876, "grad_norm": 1.1990204572609593, "learning_rate": 6.774807945345424e-06, "loss": 0.0705, "step": 109560 }, { "epoch": 2.2304325699745546, "grad_norm": 21.227570286475878, "learning_rate": 6.774143630775083e-06, "loss": 0.1157, "step": 109570 }, { "epoch": 2.2306361323155217, "grad_norm": 33.21976730037758, "learning_rate": 6.773479280373625e-06, "loss": 0.1178, "step": 109580 }, { "epoch": 2.2308396946564883, "grad_norm": 15.68315762542359, "learning_rate": 6.772814894154469e-06, "loss": 0.1426, "step": 109590 }, { "epoch": 2.2310432569974554, "grad_norm": 5.889882798098744, "learning_rate": 6.772150472131033e-06, "loss": 0.111, "step": 109600 }, { "epoch": 2.2312468193384225, "grad_norm": 29.418789608319447, "learning_rate": 6.771486014316735e-06, "loss": 0.0787, "step": 109610 }, { "epoch": 2.231450381679389, "grad_norm": 5.859862247356058, "learning_rate": 6.7708215207249964e-06, "loss": 0.0578, "step": 109620 }, { "epoch": 2.231653944020356, "grad_norm": 8.504096628455637, "learning_rate": 6.7701569913692364e-06, "loss": 0.1616, "step": 109630 }, { "epoch": 2.2318575063613233, "grad_norm": 30.561196026126346, "learning_rate": 6.769492426262874e-06, "loss": 0.1452, "step": 109640 }, { "epoch": 2.23206106870229, "grad_norm": 29.256754949939193, "learning_rate": 6.7688278254193354e-06, "loss": 0.1539, "step": 109650 }, { "epoch": 2.232264631043257, "grad_norm": 10.340133059732183, "learning_rate": 6.7681631888520395e-06, "loss": 0.1132, "step": 109660 }, { "epoch": 2.232468193384224, "grad_norm": 0.22062135359644655, "learning_rate": 6.7674985165744104e-06, "loss": 0.0792, "step": 109670 }, { "epoch": 2.2326717557251907, "grad_norm": 14.413403888772402, "learning_rate": 6.766833808599873e-06, "loss": 0.1632, "step": 109680 }, { "epoch": 2.232875318066158, "grad_norm": 13.682851801361956, "learning_rate": 6.76616906494185e-06, "loss": 0.1289, "step": 109690 }, { "epoch": 2.233078880407125, "grad_norm": 6.904063893591585, "learning_rate": 6.765504285613769e-06, "loss": 0.2475, "step": 109700 }, { "epoch": 2.2332824427480915, "grad_norm": 2.06121606718494, "learning_rate": 6.764839470629055e-06, "loss": 0.157, "step": 109710 }, { "epoch": 2.2334860050890586, "grad_norm": 20.030559661510075, "learning_rate": 6.764174620001135e-06, "loss": 0.096, "step": 109720 }, { "epoch": 2.2336895674300257, "grad_norm": 5.368581212196121, "learning_rate": 6.763509733743434e-06, "loss": 0.0578, "step": 109730 }, { "epoch": 2.2338931297709923, "grad_norm": 3.660978048677436, "learning_rate": 6.762844811869386e-06, "loss": 0.1294, "step": 109740 }, { "epoch": 2.2340966921119594, "grad_norm": 16.788693570951637, "learning_rate": 6.7621798543924145e-06, "loss": 0.2726, "step": 109750 }, { "epoch": 2.234300254452926, "grad_norm": 10.425927083585496, "learning_rate": 6.761514861325951e-06, "loss": 0.1132, "step": 109760 }, { "epoch": 2.234503816793893, "grad_norm": 8.611380617453085, "learning_rate": 6.760849832683426e-06, "loss": 0.1903, "step": 109770 }, { "epoch": 2.23470737913486, "grad_norm": 17.352950682619372, "learning_rate": 6.760184768478272e-06, "loss": 0.0841, "step": 109780 }, { "epoch": 2.234910941475827, "grad_norm": 3.0057316031438868, "learning_rate": 6.7595196687239176e-06, "loss": 0.1397, "step": 109790 }, { "epoch": 2.235114503816794, "grad_norm": 20.934283286906844, "learning_rate": 6.758854533433799e-06, "loss": 0.1811, "step": 109800 }, { "epoch": 2.235318066157761, "grad_norm": 19.1238443404149, "learning_rate": 6.758189362621347e-06, "loss": 0.1004, "step": 109810 }, { "epoch": 2.2355216284987276, "grad_norm": 1.1338058870929308, "learning_rate": 6.757524156299996e-06, "loss": 0.1359, "step": 109820 }, { "epoch": 2.2357251908396947, "grad_norm": 1.9818088522453488, "learning_rate": 6.756858914483182e-06, "loss": 0.1283, "step": 109830 }, { "epoch": 2.2359287531806618, "grad_norm": 13.60338627695047, "learning_rate": 6.756193637184338e-06, "loss": 0.1143, "step": 109840 }, { "epoch": 2.2361323155216284, "grad_norm": 6.26455932665518, "learning_rate": 6.755528324416903e-06, "loss": 0.1666, "step": 109850 }, { "epoch": 2.2363358778625955, "grad_norm": 0.1560608960113707, "learning_rate": 6.754862976194312e-06, "loss": 0.1181, "step": 109860 }, { "epoch": 2.2365394402035625, "grad_norm": 3.4873361351585848, "learning_rate": 6.7541975925300014e-06, "loss": 0.1008, "step": 109870 }, { "epoch": 2.236743002544529, "grad_norm": 0.29740733752060594, "learning_rate": 6.753532173437412e-06, "loss": 0.0763, "step": 109880 }, { "epoch": 2.2369465648854963, "grad_norm": 38.9858392321214, "learning_rate": 6.752866718929982e-06, "loss": 0.1821, "step": 109890 }, { "epoch": 2.237150127226463, "grad_norm": 2.7531339873640124, "learning_rate": 6.752201229021152e-06, "loss": 0.1559, "step": 109900 }, { "epoch": 2.23735368956743, "grad_norm": 0.5380304939275123, "learning_rate": 6.751535703724359e-06, "loss": 0.145, "step": 109910 }, { "epoch": 2.237557251908397, "grad_norm": 18.403974022059497, "learning_rate": 6.7508701430530495e-06, "loss": 0.0596, "step": 109920 }, { "epoch": 2.2377608142493637, "grad_norm": 4.219572367692549, "learning_rate": 6.75020454702066e-06, "loss": 0.097, "step": 109930 }, { "epoch": 2.2379643765903308, "grad_norm": 20.538758510375683, "learning_rate": 6.749538915640637e-06, "loss": 0.0718, "step": 109940 }, { "epoch": 2.238167938931298, "grad_norm": 0.0590791395218778, "learning_rate": 6.7488732489264205e-06, "loss": 0.1104, "step": 109950 }, { "epoch": 2.2383715012722645, "grad_norm": 33.55785312894541, "learning_rate": 6.7482075468914565e-06, "loss": 0.1539, "step": 109960 }, { "epoch": 2.2385750636132316, "grad_norm": 9.096596120762571, "learning_rate": 6.7475418095491905e-06, "loss": 0.1317, "step": 109970 }, { "epoch": 2.2387786259541986, "grad_norm": 1.1017178596459432, "learning_rate": 6.7468760369130656e-06, "loss": 0.1429, "step": 109980 }, { "epoch": 2.2389821882951653, "grad_norm": 1.9339442130877522, "learning_rate": 6.74621022899653e-06, "loss": 0.1679, "step": 109990 }, { "epoch": 2.2391857506361323, "grad_norm": 26.154749447014577, "learning_rate": 6.745544385813031e-06, "loss": 0.0805, "step": 110000 }, { "epoch": 2.2393893129770994, "grad_norm": 10.228530881352805, "learning_rate": 6.744878507376013e-06, "loss": 0.0932, "step": 110010 }, { "epoch": 2.239592875318066, "grad_norm": 25.285801362186564, "learning_rate": 6.744212593698926e-06, "loss": 0.1509, "step": 110020 }, { "epoch": 2.239796437659033, "grad_norm": 31.825306806689422, "learning_rate": 6.74354664479522e-06, "loss": 0.0995, "step": 110030 }, { "epoch": 2.24, "grad_norm": 2.8472070058716956, "learning_rate": 6.742880660678344e-06, "loss": 0.0597, "step": 110040 }, { "epoch": 2.240203562340967, "grad_norm": 8.003594529393443, "learning_rate": 6.742214641361746e-06, "loss": 0.0629, "step": 110050 }, { "epoch": 2.240407124681934, "grad_norm": 12.591042061444869, "learning_rate": 6.7415485868588825e-06, "loss": 0.1122, "step": 110060 }, { "epoch": 2.2406106870229006, "grad_norm": 2.532170718923281, "learning_rate": 6.740882497183201e-06, "loss": 0.063, "step": 110070 }, { "epoch": 2.2408142493638676, "grad_norm": 14.099110951974044, "learning_rate": 6.740216372348155e-06, "loss": 0.0707, "step": 110080 }, { "epoch": 2.2410178117048347, "grad_norm": 13.464972502780236, "learning_rate": 6.7395502123672e-06, "loss": 0.0965, "step": 110090 }, { "epoch": 2.2412213740458014, "grad_norm": 3.289666817822231, "learning_rate": 6.738884017253784e-06, "loss": 0.1678, "step": 110100 }, { "epoch": 2.2414249363867684, "grad_norm": 8.680209538248521, "learning_rate": 6.7382177870213685e-06, "loss": 0.1544, "step": 110110 }, { "epoch": 2.2416284987277355, "grad_norm": 9.048854740792702, "learning_rate": 6.737551521683405e-06, "loss": 0.1791, "step": 110120 }, { "epoch": 2.241832061068702, "grad_norm": 12.201002971599115, "learning_rate": 6.736885221253351e-06, "loss": 0.2056, "step": 110130 }, { "epoch": 2.242035623409669, "grad_norm": 20.697357194881704, "learning_rate": 6.736218885744663e-06, "loss": 0.1105, "step": 110140 }, { "epoch": 2.2422391857506363, "grad_norm": 6.396363239543461, "learning_rate": 6.735552515170798e-06, "loss": 0.1706, "step": 110150 }, { "epoch": 2.242442748091603, "grad_norm": 0.10589350548794763, "learning_rate": 6.734886109545213e-06, "loss": 0.0659, "step": 110160 }, { "epoch": 2.24264631043257, "grad_norm": 33.56314246696531, "learning_rate": 6.734219668881371e-06, "loss": 0.1535, "step": 110170 }, { "epoch": 2.242849872773537, "grad_norm": 6.078165834508918, "learning_rate": 6.733553193192728e-06, "loss": 0.1235, "step": 110180 }, { "epoch": 2.2430534351145037, "grad_norm": 8.691459390756298, "learning_rate": 6.7328866824927455e-06, "loss": 0.0699, "step": 110190 }, { "epoch": 2.243256997455471, "grad_norm": 5.559791845974152, "learning_rate": 6.732220136794886e-06, "loss": 0.2706, "step": 110200 }, { "epoch": 2.2434605597964374, "grad_norm": 28.83229523115874, "learning_rate": 6.731553556112608e-06, "loss": 0.1601, "step": 110210 }, { "epoch": 2.2436641221374045, "grad_norm": 7.04069046064076, "learning_rate": 6.730886940459375e-06, "loss": 0.1433, "step": 110220 }, { "epoch": 2.2438676844783716, "grad_norm": 19.13411491921576, "learning_rate": 6.730220289848652e-06, "loss": 0.1686, "step": 110230 }, { "epoch": 2.2440712468193382, "grad_norm": 25.487996433881992, "learning_rate": 6.729553604293902e-06, "loss": 0.0626, "step": 110240 }, { "epoch": 2.2442748091603053, "grad_norm": 16.98728223879381, "learning_rate": 6.7288868838085895e-06, "loss": 0.1964, "step": 110250 }, { "epoch": 2.2444783715012724, "grad_norm": 13.091235509333032, "learning_rate": 6.728220128406179e-06, "loss": 0.1939, "step": 110260 }, { "epoch": 2.244681933842239, "grad_norm": 10.360188184787308, "learning_rate": 6.727553338100137e-06, "loss": 0.0852, "step": 110270 }, { "epoch": 2.244885496183206, "grad_norm": 14.776409181199146, "learning_rate": 6.72688651290393e-06, "loss": 0.1996, "step": 110280 }, { "epoch": 2.245089058524173, "grad_norm": 0.2946347377209916, "learning_rate": 6.726219652831027e-06, "loss": 0.085, "step": 110290 }, { "epoch": 2.24529262086514, "grad_norm": 11.53567765811646, "learning_rate": 6.725552757894893e-06, "loss": 0.186, "step": 110300 }, { "epoch": 2.245496183206107, "grad_norm": 0.8835674589675434, "learning_rate": 6.724885828108999e-06, "loss": 0.14, "step": 110310 }, { "epoch": 2.245699745547074, "grad_norm": 0.4887448331374478, "learning_rate": 6.7242188634868155e-06, "loss": 0.0809, "step": 110320 }, { "epoch": 2.2459033078880406, "grad_norm": 15.711874167608881, "learning_rate": 6.7235518640418106e-06, "loss": 0.1953, "step": 110330 }, { "epoch": 2.2461068702290077, "grad_norm": 15.600831139813483, "learning_rate": 6.722884829787455e-06, "loss": 0.2529, "step": 110340 }, { "epoch": 2.2463104325699748, "grad_norm": 17.928895087122477, "learning_rate": 6.722217760737222e-06, "loss": 0.1738, "step": 110350 }, { "epoch": 2.2465139949109414, "grad_norm": 1.1154391449414678, "learning_rate": 6.721550656904583e-06, "loss": 0.1603, "step": 110360 }, { "epoch": 2.2467175572519085, "grad_norm": 4.101343394748106, "learning_rate": 6.720883518303011e-06, "loss": 0.1162, "step": 110370 }, { "epoch": 2.2469211195928755, "grad_norm": 9.804921405213753, "learning_rate": 6.720216344945979e-06, "loss": 0.1493, "step": 110380 }, { "epoch": 2.247124681933842, "grad_norm": 17.788553525219456, "learning_rate": 6.719549136846964e-06, "loss": 0.1778, "step": 110390 }, { "epoch": 2.2473282442748093, "grad_norm": 2.292645767939365, "learning_rate": 6.718881894019438e-06, "loss": 0.1046, "step": 110400 }, { "epoch": 2.247531806615776, "grad_norm": 7.180632673491246, "learning_rate": 6.7182146164768795e-06, "loss": 0.1497, "step": 110410 }, { "epoch": 2.247735368956743, "grad_norm": 0.26658587364388964, "learning_rate": 6.717547304232763e-06, "loss": 0.1328, "step": 110420 }, { "epoch": 2.24793893129771, "grad_norm": 19.399746170538073, "learning_rate": 6.716879957300566e-06, "loss": 0.1275, "step": 110430 }, { "epoch": 2.2481424936386767, "grad_norm": 6.456639192358575, "learning_rate": 6.716212575693768e-06, "loss": 0.0786, "step": 110440 }, { "epoch": 2.2483460559796438, "grad_norm": 15.16664145836057, "learning_rate": 6.715545159425847e-06, "loss": 0.2711, "step": 110450 }, { "epoch": 2.248549618320611, "grad_norm": 0.10634278416884546, "learning_rate": 6.7148777085102805e-06, "loss": 0.1327, "step": 110460 }, { "epoch": 2.2487531806615775, "grad_norm": 1.5217012881084928, "learning_rate": 6.71421022296055e-06, "loss": 0.1032, "step": 110470 }, { "epoch": 2.2489567430025446, "grad_norm": 0.6381475835493851, "learning_rate": 6.713542702790136e-06, "loss": 0.1124, "step": 110480 }, { "epoch": 2.2491603053435116, "grad_norm": 0.07875066046728159, "learning_rate": 6.712875148012522e-06, "loss": 0.128, "step": 110490 }, { "epoch": 2.2493638676844783, "grad_norm": 3.6808312420181055, "learning_rate": 6.712207558641187e-06, "loss": 0.1052, "step": 110500 }, { "epoch": 2.2495674300254453, "grad_norm": 0.7533574774497185, "learning_rate": 6.711539934689614e-06, "loss": 0.0467, "step": 110510 }, { "epoch": 2.249770992366412, "grad_norm": 27.677344261535602, "learning_rate": 6.71087227617129e-06, "loss": 0.124, "step": 110520 }, { "epoch": 2.249974554707379, "grad_norm": 0.057291651079078465, "learning_rate": 6.710204583099696e-06, "loss": 0.178, "step": 110530 }, { "epoch": 2.250178117048346, "grad_norm": 0.06857586438134779, "learning_rate": 6.709536855488316e-06, "loss": 0.1731, "step": 110540 }, { "epoch": 2.2503816793893128, "grad_norm": 6.7421019711783154, "learning_rate": 6.7088690933506385e-06, "loss": 0.1195, "step": 110550 }, { "epoch": 2.25058524173028, "grad_norm": 0.15914906678714918, "learning_rate": 6.708201296700147e-06, "loss": 0.1425, "step": 110560 }, { "epoch": 2.250788804071247, "grad_norm": 0.12481087362656368, "learning_rate": 6.707533465550333e-06, "loss": 0.0596, "step": 110570 }, { "epoch": 2.2509923664122136, "grad_norm": 11.727281806937706, "learning_rate": 6.706865599914681e-06, "loss": 0.1544, "step": 110580 }, { "epoch": 2.2511959287531806, "grad_norm": 1.413046914865889, "learning_rate": 6.706197699806678e-06, "loss": 0.1623, "step": 110590 }, { "epoch": 2.2513994910941477, "grad_norm": 30.60220567597541, "learning_rate": 6.705529765239815e-06, "loss": 0.1254, "step": 110600 }, { "epoch": 2.2516030534351144, "grad_norm": 22.816768797033667, "learning_rate": 6.704861796227583e-06, "loss": 0.1701, "step": 110610 }, { "epoch": 2.2518066157760814, "grad_norm": 16.450225183756917, "learning_rate": 6.704193792783471e-06, "loss": 0.1236, "step": 110620 }, { "epoch": 2.2520101781170485, "grad_norm": 9.069095556669556, "learning_rate": 6.70352575492097e-06, "loss": 0.1318, "step": 110630 }, { "epoch": 2.252213740458015, "grad_norm": 0.34843197692992156, "learning_rate": 6.702857682653572e-06, "loss": 0.1077, "step": 110640 }, { "epoch": 2.2524173027989822, "grad_norm": 1.0755387537597039, "learning_rate": 6.70218957599477e-06, "loss": 0.0741, "step": 110650 }, { "epoch": 2.2526208651399493, "grad_norm": 0.22261929575925357, "learning_rate": 6.701521434958057e-06, "loss": 0.0408, "step": 110660 }, { "epoch": 2.252824427480916, "grad_norm": 16.69043063688544, "learning_rate": 6.700853259556927e-06, "loss": 0.2165, "step": 110670 }, { "epoch": 2.253027989821883, "grad_norm": 6.7368203589369005, "learning_rate": 6.7001850498048745e-06, "loss": 0.2181, "step": 110680 }, { "epoch": 2.25323155216285, "grad_norm": 36.42055162006644, "learning_rate": 6.699516805715397e-06, "loss": 0.1513, "step": 110690 }, { "epoch": 2.2534351145038167, "grad_norm": 24.69290176972003, "learning_rate": 6.698848527301987e-06, "loss": 0.0938, "step": 110700 }, { "epoch": 2.253638676844784, "grad_norm": 20.354110320143405, "learning_rate": 6.6981802145781424e-06, "loss": 0.0962, "step": 110710 }, { "epoch": 2.253842239185751, "grad_norm": 0.18834665620847643, "learning_rate": 6.697511867557363e-06, "loss": 0.0777, "step": 110720 }, { "epoch": 2.2540458015267175, "grad_norm": 13.876172372105165, "learning_rate": 6.696843486253144e-06, "loss": 0.1617, "step": 110730 }, { "epoch": 2.2542493638676846, "grad_norm": 8.901108029903451, "learning_rate": 6.696175070678985e-06, "loss": 0.1088, "step": 110740 }, { "epoch": 2.2544529262086512, "grad_norm": 7.696162314812889, "learning_rate": 6.695506620848385e-06, "loss": 0.1694, "step": 110750 }, { "epoch": 2.2546564885496183, "grad_norm": 4.956814728933909, "learning_rate": 6.694838136774846e-06, "loss": 0.1051, "step": 110760 }, { "epoch": 2.2548600508905854, "grad_norm": 84.49785947845386, "learning_rate": 6.694169618471868e-06, "loss": 0.0939, "step": 110770 }, { "epoch": 2.255063613231552, "grad_norm": 0.938580207064805, "learning_rate": 6.693501065952954e-06, "loss": 0.0664, "step": 110780 }, { "epoch": 2.255267175572519, "grad_norm": 39.67032838985143, "learning_rate": 6.692832479231604e-06, "loss": 0.0552, "step": 110790 }, { "epoch": 2.255470737913486, "grad_norm": 0.06702135663442371, "learning_rate": 6.692163858321319e-06, "loss": 0.1675, "step": 110800 }, { "epoch": 2.255674300254453, "grad_norm": 0.03817674199574124, "learning_rate": 6.691495203235607e-06, "loss": 0.1202, "step": 110810 }, { "epoch": 2.25587786259542, "grad_norm": 3.8473849872278088, "learning_rate": 6.690826513987971e-06, "loss": 0.1008, "step": 110820 }, { "epoch": 2.2560814249363865, "grad_norm": 12.103051099250449, "learning_rate": 6.690157790591915e-06, "loss": 0.1481, "step": 110830 }, { "epoch": 2.2562849872773536, "grad_norm": 0.1258245219108375, "learning_rate": 6.689489033060948e-06, "loss": 0.1477, "step": 110840 }, { "epoch": 2.2564885496183207, "grad_norm": 0.042806422207160195, "learning_rate": 6.688820241408571e-06, "loss": 0.1254, "step": 110850 }, { "epoch": 2.2566921119592873, "grad_norm": 54.65803800611314, "learning_rate": 6.688151415648296e-06, "loss": 0.1257, "step": 110860 }, { "epoch": 2.2568956743002544, "grad_norm": 38.84749244391475, "learning_rate": 6.687482555793629e-06, "loss": 0.1308, "step": 110870 }, { "epoch": 2.2570992366412215, "grad_norm": 5.814880250930969, "learning_rate": 6.686813661858076e-06, "loss": 0.1398, "step": 110880 }, { "epoch": 2.257302798982188, "grad_norm": 0.08493715235006034, "learning_rate": 6.68614473385515e-06, "loss": 0.1351, "step": 110890 }, { "epoch": 2.257506361323155, "grad_norm": 0.36217342186335244, "learning_rate": 6.68547577179836e-06, "loss": 0.154, "step": 110900 }, { "epoch": 2.2577099236641223, "grad_norm": 4.886506245970319, "learning_rate": 6.684806775701216e-06, "loss": 0.1263, "step": 110910 }, { "epoch": 2.257913486005089, "grad_norm": 14.714015754847232, "learning_rate": 6.684137745577227e-06, "loss": 0.0856, "step": 110920 }, { "epoch": 2.258117048346056, "grad_norm": 7.192005670580594, "learning_rate": 6.683468681439909e-06, "loss": 0.1937, "step": 110930 }, { "epoch": 2.258320610687023, "grad_norm": 0.21226763585989875, "learning_rate": 6.682799583302772e-06, "loss": 0.0309, "step": 110940 }, { "epoch": 2.2585241730279897, "grad_norm": 0.24062086773942104, "learning_rate": 6.682130451179329e-06, "loss": 0.1463, "step": 110950 }, { "epoch": 2.2587277353689568, "grad_norm": 4.498869863959043, "learning_rate": 6.681461285083098e-06, "loss": 0.0758, "step": 110960 }, { "epoch": 2.258931297709924, "grad_norm": 12.234490442000816, "learning_rate": 6.6807920850275875e-06, "loss": 0.2034, "step": 110970 }, { "epoch": 2.2591348600508905, "grad_norm": 0.8997530178927429, "learning_rate": 6.680122851026318e-06, "loss": 0.0489, "step": 110980 }, { "epoch": 2.2593384223918576, "grad_norm": 0.05538616559759795, "learning_rate": 6.6794535830928035e-06, "loss": 0.0479, "step": 110990 }, { "epoch": 2.2595419847328246, "grad_norm": 13.07229661940072, "learning_rate": 6.678784281240558e-06, "loss": 0.1349, "step": 111000 }, { "epoch": 2.2597455470737913, "grad_norm": 25.894514285413145, "learning_rate": 6.678114945483105e-06, "loss": 0.1777, "step": 111010 }, { "epoch": 2.2599491094147584, "grad_norm": 19.531261338618283, "learning_rate": 6.677445575833958e-06, "loss": 0.0975, "step": 111020 }, { "epoch": 2.2601526717557254, "grad_norm": 7.62672153709375, "learning_rate": 6.676776172306636e-06, "loss": 0.0673, "step": 111030 }, { "epoch": 2.260356234096692, "grad_norm": 0.2027215181844294, "learning_rate": 6.676106734914661e-06, "loss": 0.1159, "step": 111040 }, { "epoch": 2.260559796437659, "grad_norm": 9.64830843083439, "learning_rate": 6.675437263671551e-06, "loss": 0.1477, "step": 111050 }, { "epoch": 2.2607633587786258, "grad_norm": 0.11100557060218974, "learning_rate": 6.674767758590828e-06, "loss": 0.1529, "step": 111060 }, { "epoch": 2.260966921119593, "grad_norm": 20.459171950888187, "learning_rate": 6.6740982196860134e-06, "loss": 0.2157, "step": 111070 }, { "epoch": 2.26117048346056, "grad_norm": 41.43530687064633, "learning_rate": 6.673428646970628e-06, "loss": 0.0688, "step": 111080 }, { "epoch": 2.2613740458015266, "grad_norm": 26.28915855379019, "learning_rate": 6.672759040458195e-06, "loss": 0.1605, "step": 111090 }, { "epoch": 2.2615776081424936, "grad_norm": 0.26114473148913275, "learning_rate": 6.672089400162241e-06, "loss": 0.1167, "step": 111100 }, { "epoch": 2.2617811704834607, "grad_norm": 7.216060315835668, "learning_rate": 6.6714197260962864e-06, "loss": 0.1234, "step": 111110 }, { "epoch": 2.2619847328244274, "grad_norm": 1.343460314710506, "learning_rate": 6.670750018273858e-06, "loss": 0.0782, "step": 111120 }, { "epoch": 2.2621882951653944, "grad_norm": 26.431970699693117, "learning_rate": 6.670080276708482e-06, "loss": 0.0927, "step": 111130 }, { "epoch": 2.2623918575063615, "grad_norm": 4.318034981062167, "learning_rate": 6.669410501413682e-06, "loss": 0.1042, "step": 111140 }, { "epoch": 2.262595419847328, "grad_norm": 39.550452781764314, "learning_rate": 6.668740692402988e-06, "loss": 0.2693, "step": 111150 }, { "epoch": 2.2627989821882952, "grad_norm": 28.029256206739177, "learning_rate": 6.668070849689927e-06, "loss": 0.1332, "step": 111160 }, { "epoch": 2.263002544529262, "grad_norm": 8.179687335460542, "learning_rate": 6.667400973288024e-06, "loss": 0.1564, "step": 111170 }, { "epoch": 2.263206106870229, "grad_norm": 18.054818131491512, "learning_rate": 6.666731063210814e-06, "loss": 0.1211, "step": 111180 }, { "epoch": 2.263409669211196, "grad_norm": 8.646692009137164, "learning_rate": 6.666061119471822e-06, "loss": 0.0933, "step": 111190 }, { "epoch": 2.2636132315521627, "grad_norm": 6.037390118351428, "learning_rate": 6.665391142084579e-06, "loss": 0.1292, "step": 111200 }, { "epoch": 2.2638167938931297, "grad_norm": 14.239339571930035, "learning_rate": 6.664721131062617e-06, "loss": 0.1829, "step": 111210 }, { "epoch": 2.264020356234097, "grad_norm": 4.805616032026167, "learning_rate": 6.664051086419469e-06, "loss": 0.2107, "step": 111220 }, { "epoch": 2.2642239185750634, "grad_norm": 20.7795789089838, "learning_rate": 6.663381008168665e-06, "loss": 0.1184, "step": 111230 }, { "epoch": 2.2644274809160305, "grad_norm": 10.138643718562442, "learning_rate": 6.6627108963237405e-06, "loss": 0.1395, "step": 111240 }, { "epoch": 2.2646310432569976, "grad_norm": 0.06876896738999641, "learning_rate": 6.662040750898226e-06, "loss": 0.1418, "step": 111250 }, { "epoch": 2.2648346055979642, "grad_norm": 14.888095426663781, "learning_rate": 6.661370571905657e-06, "loss": 0.0918, "step": 111260 }, { "epoch": 2.2650381679389313, "grad_norm": 12.991467526930737, "learning_rate": 6.660700359359573e-06, "loss": 0.1316, "step": 111270 }, { "epoch": 2.2652417302798984, "grad_norm": 20.279521852173332, "learning_rate": 6.660030113273504e-06, "loss": 0.1259, "step": 111280 }, { "epoch": 2.265445292620865, "grad_norm": 9.431694144946292, "learning_rate": 6.659359833660988e-06, "loss": 0.1049, "step": 111290 }, { "epoch": 2.265648854961832, "grad_norm": 14.351883360153336, "learning_rate": 6.658689520535565e-06, "loss": 0.107, "step": 111300 }, { "epoch": 2.265852417302799, "grad_norm": 21.143570599390156, "learning_rate": 6.65801917391077e-06, "loss": 0.1557, "step": 111310 }, { "epoch": 2.266055979643766, "grad_norm": 14.062399864980003, "learning_rate": 6.657348793800141e-06, "loss": 0.2465, "step": 111320 }, { "epoch": 2.266259541984733, "grad_norm": 1.2140000914186617, "learning_rate": 6.65667838021722e-06, "loss": 0.2087, "step": 111330 }, { "epoch": 2.2664631043257, "grad_norm": 11.897541604113057, "learning_rate": 6.656007933175544e-06, "loss": 0.1851, "step": 111340 }, { "epoch": 2.2666666666666666, "grad_norm": 14.777718205928968, "learning_rate": 6.655337452688656e-06, "loss": 0.1712, "step": 111350 }, { "epoch": 2.2668702290076337, "grad_norm": 0.2418452338921277, "learning_rate": 6.654666938770097e-06, "loss": 0.1208, "step": 111360 }, { "epoch": 2.2670737913486003, "grad_norm": 2.9684963971482947, "learning_rate": 6.653996391433406e-06, "loss": 0.1179, "step": 111370 }, { "epoch": 2.2672773536895674, "grad_norm": 0.459649250558194, "learning_rate": 6.653325810692128e-06, "loss": 0.2209, "step": 111380 }, { "epoch": 2.2674809160305345, "grad_norm": 13.706004008319246, "learning_rate": 6.652655196559807e-06, "loss": 0.2061, "step": 111390 }, { "epoch": 2.267684478371501, "grad_norm": 48.592531077008445, "learning_rate": 6.651984549049985e-06, "loss": 0.1257, "step": 111400 }, { "epoch": 2.267888040712468, "grad_norm": 1.5395866163619556, "learning_rate": 6.651313868176207e-06, "loss": 0.0919, "step": 111410 }, { "epoch": 2.2680916030534353, "grad_norm": 10.732571970032321, "learning_rate": 6.650643153952019e-06, "loss": 0.1178, "step": 111420 }, { "epoch": 2.268295165394402, "grad_norm": 6.234899028173913, "learning_rate": 6.649972406390966e-06, "loss": 0.1854, "step": 111430 }, { "epoch": 2.268498727735369, "grad_norm": 0.15318888499726332, "learning_rate": 6.649301625506595e-06, "loss": 0.1002, "step": 111440 }, { "epoch": 2.268702290076336, "grad_norm": 33.50319851914722, "learning_rate": 6.648630811312453e-06, "loss": 0.0616, "step": 111450 }, { "epoch": 2.2689058524173027, "grad_norm": 20.149675640066153, "learning_rate": 6.647959963822088e-06, "loss": 0.1046, "step": 111460 }, { "epoch": 2.2691094147582698, "grad_norm": 13.579161245532093, "learning_rate": 6.647289083049051e-06, "loss": 0.147, "step": 111470 }, { "epoch": 2.2693129770992364, "grad_norm": 19.196948799579836, "learning_rate": 6.646618169006889e-06, "loss": 0.1575, "step": 111480 }, { "epoch": 2.2695165394402035, "grad_norm": 3.523487502709966, "learning_rate": 6.6459472217091515e-06, "loss": 0.2178, "step": 111490 }, { "epoch": 2.2697201017811706, "grad_norm": 4.220044567401722, "learning_rate": 6.645276241169389e-06, "loss": 0.1167, "step": 111500 }, { "epoch": 2.269923664122137, "grad_norm": 6.655213549490447, "learning_rate": 6.644605227401155e-06, "loss": 0.2294, "step": 111510 }, { "epoch": 2.2701272264631043, "grad_norm": 4.6164741327486984, "learning_rate": 6.643934180417999e-06, "loss": 0.0621, "step": 111520 }, { "epoch": 2.2703307888040714, "grad_norm": 0.0733734351110312, "learning_rate": 6.643263100233477e-06, "loss": 0.0303, "step": 111530 }, { "epoch": 2.270534351145038, "grad_norm": 11.148743895017065, "learning_rate": 6.642591986861138e-06, "loss": 0.1028, "step": 111540 }, { "epoch": 2.270737913486005, "grad_norm": 20.47119346963122, "learning_rate": 6.641920840314538e-06, "loss": 0.0733, "step": 111550 }, { "epoch": 2.270941475826972, "grad_norm": 6.122120004517781, "learning_rate": 6.641249660607234e-06, "loss": 0.1527, "step": 111560 }, { "epoch": 2.271145038167939, "grad_norm": 3.291526041283027, "learning_rate": 6.640578447752779e-06, "loss": 0.1624, "step": 111570 }, { "epoch": 2.271348600508906, "grad_norm": 13.159323607105808, "learning_rate": 6.639907201764727e-06, "loss": 0.1504, "step": 111580 }, { "epoch": 2.271552162849873, "grad_norm": 2.5297813765945016, "learning_rate": 6.639235922656638e-06, "loss": 0.0905, "step": 111590 }, { "epoch": 2.2717557251908396, "grad_norm": 8.494230660943058, "learning_rate": 6.638564610442069e-06, "loss": 0.0903, "step": 111600 }, { "epoch": 2.2719592875318066, "grad_norm": 1.055255597236765, "learning_rate": 6.637893265134577e-06, "loss": 0.1501, "step": 111610 }, { "epoch": 2.2721628498727737, "grad_norm": 8.100310073393677, "learning_rate": 6.63722188674772e-06, "loss": 0.2211, "step": 111620 }, { "epoch": 2.2723664122137404, "grad_norm": 0.5739928090877252, "learning_rate": 6.636550475295058e-06, "loss": 0.0531, "step": 111630 }, { "epoch": 2.2725699745547074, "grad_norm": 13.043351728785089, "learning_rate": 6.635879030790152e-06, "loss": 0.1349, "step": 111640 }, { "epoch": 2.2727735368956745, "grad_norm": 8.80752534754858, "learning_rate": 6.635207553246563e-06, "loss": 0.164, "step": 111650 }, { "epoch": 2.272977099236641, "grad_norm": 9.745709707064979, "learning_rate": 6.634536042677849e-06, "loss": 0.352, "step": 111660 }, { "epoch": 2.2731806615776082, "grad_norm": 6.98523856861661, "learning_rate": 6.6338644990975776e-06, "loss": 0.158, "step": 111670 }, { "epoch": 2.2733842239185753, "grad_norm": 19.648659029285987, "learning_rate": 6.6331929225193055e-06, "loss": 0.1176, "step": 111680 }, { "epoch": 2.273587786259542, "grad_norm": 1.945490795968187, "learning_rate": 6.6325213129566e-06, "loss": 0.1588, "step": 111690 }, { "epoch": 2.273791348600509, "grad_norm": 21.593821308894178, "learning_rate": 6.631849670423023e-06, "loss": 0.0763, "step": 111700 }, { "epoch": 2.2739949109414757, "grad_norm": 0.09174651042064494, "learning_rate": 6.631177994932141e-06, "loss": 0.1981, "step": 111710 }, { "epoch": 2.2741984732824427, "grad_norm": 0.1110171328846941, "learning_rate": 6.630506286497518e-06, "loss": 0.0928, "step": 111720 }, { "epoch": 2.27440203562341, "grad_norm": 0.11739223376178352, "learning_rate": 6.6298345451327215e-06, "loss": 0.0634, "step": 111730 }, { "epoch": 2.2746055979643764, "grad_norm": 6.24791982059022, "learning_rate": 6.629162770851315e-06, "loss": 0.1856, "step": 111740 }, { "epoch": 2.2748091603053435, "grad_norm": 0.024587730909349327, "learning_rate": 6.6284909636668695e-06, "loss": 0.198, "step": 111750 }, { "epoch": 2.2750127226463106, "grad_norm": 11.325951506222399, "learning_rate": 6.627819123592951e-06, "loss": 0.1798, "step": 111760 }, { "epoch": 2.2752162849872772, "grad_norm": 29.79570266182817, "learning_rate": 6.62714725064313e-06, "loss": 0.1422, "step": 111770 }, { "epoch": 2.2754198473282443, "grad_norm": 21.995992711259916, "learning_rate": 6.626475344830972e-06, "loss": 0.1053, "step": 111780 }, { "epoch": 2.275623409669211, "grad_norm": 6.061857653607575, "learning_rate": 6.625803406170051e-06, "loss": 0.1303, "step": 111790 }, { "epoch": 2.275826972010178, "grad_norm": 10.204072930173869, "learning_rate": 6.625131434673937e-06, "loss": 0.1085, "step": 111800 }, { "epoch": 2.276030534351145, "grad_norm": 26.29274585401753, "learning_rate": 6.6244594303562e-06, "loss": 0.162, "step": 111810 }, { "epoch": 2.2762340966921117, "grad_norm": 9.851144012873698, "learning_rate": 6.623787393230413e-06, "loss": 0.2381, "step": 111820 }, { "epoch": 2.276437659033079, "grad_norm": 0.43013348337136037, "learning_rate": 6.623115323310145e-06, "loss": 0.0862, "step": 111830 }, { "epoch": 2.276641221374046, "grad_norm": 0.2230396276722383, "learning_rate": 6.622443220608976e-06, "loss": 0.122, "step": 111840 }, { "epoch": 2.2768447837150125, "grad_norm": 0.15619130016423274, "learning_rate": 6.621771085140476e-06, "loss": 0.0898, "step": 111850 }, { "epoch": 2.2770483460559796, "grad_norm": 0.9158697881924894, "learning_rate": 6.621098916918219e-06, "loss": 0.1391, "step": 111860 }, { "epoch": 2.2772519083969467, "grad_norm": 6.206176150836861, "learning_rate": 6.620426715955781e-06, "loss": 0.1792, "step": 111870 }, { "epoch": 2.2774554707379133, "grad_norm": 0.18984402658683613, "learning_rate": 6.61975448226674e-06, "loss": 0.0863, "step": 111880 }, { "epoch": 2.2776590330788804, "grad_norm": 14.81754901216734, "learning_rate": 6.619082215864671e-06, "loss": 0.0889, "step": 111890 }, { "epoch": 2.2778625954198475, "grad_norm": 15.103338778772274, "learning_rate": 6.61840991676315e-06, "loss": 0.2707, "step": 111900 }, { "epoch": 2.278066157760814, "grad_norm": 16.1837757596033, "learning_rate": 6.617737584975757e-06, "loss": 0.125, "step": 111910 }, { "epoch": 2.278269720101781, "grad_norm": 12.09983347233449, "learning_rate": 6.617065220516069e-06, "loss": 0.0871, "step": 111920 }, { "epoch": 2.2784732824427483, "grad_norm": 0.09380919257938514, "learning_rate": 6.616392823397666e-06, "loss": 0.126, "step": 111930 }, { "epoch": 2.278676844783715, "grad_norm": 3.0099625041621927, "learning_rate": 6.615720393634131e-06, "loss": 0.0832, "step": 111940 }, { "epoch": 2.278880407124682, "grad_norm": 42.17310015075126, "learning_rate": 6.615047931239037e-06, "loss": 0.1904, "step": 111950 }, { "epoch": 2.279083969465649, "grad_norm": 13.13027519330225, "learning_rate": 6.614375436225974e-06, "loss": 0.1411, "step": 111960 }, { "epoch": 2.2792875318066157, "grad_norm": 0.2099141879940703, "learning_rate": 6.613702908608517e-06, "loss": 0.0995, "step": 111970 }, { "epoch": 2.2794910941475828, "grad_norm": 0.055003935842938674, "learning_rate": 6.613030348400252e-06, "loss": 0.0628, "step": 111980 }, { "epoch": 2.27969465648855, "grad_norm": 29.101577489142162, "learning_rate": 6.612357755614762e-06, "loss": 0.1455, "step": 111990 }, { "epoch": 2.2798982188295165, "grad_norm": 0.3249746286762095, "learning_rate": 6.61168513026563e-06, "loss": 0.1109, "step": 112000 }, { "epoch": 2.2801017811704836, "grad_norm": 10.113382125868096, "learning_rate": 6.611012472366441e-06, "loss": 0.1406, "step": 112010 }, { "epoch": 2.28030534351145, "grad_norm": 11.130419552233079, "learning_rate": 6.610339781930781e-06, "loss": 0.125, "step": 112020 }, { "epoch": 2.2805089058524173, "grad_norm": 37.0015972892937, "learning_rate": 6.609667058972235e-06, "loss": 0.1343, "step": 112030 }, { "epoch": 2.2807124681933844, "grad_norm": 1.6320738949967948, "learning_rate": 6.608994303504388e-06, "loss": 0.0642, "step": 112040 }, { "epoch": 2.280916030534351, "grad_norm": 0.08535888351882505, "learning_rate": 6.608321515540831e-06, "loss": 0.0842, "step": 112050 }, { "epoch": 2.281119592875318, "grad_norm": 22.395123773370962, "learning_rate": 6.607648695095148e-06, "loss": 0.0905, "step": 112060 }, { "epoch": 2.281323155216285, "grad_norm": 9.89076523854722, "learning_rate": 6.606975842180927e-06, "loss": 0.1097, "step": 112070 }, { "epoch": 2.281526717557252, "grad_norm": 0.4885884619578629, "learning_rate": 6.606302956811762e-06, "loss": 0.1954, "step": 112080 }, { "epoch": 2.281730279898219, "grad_norm": 11.975422727833015, "learning_rate": 6.605630039001239e-06, "loss": 0.1523, "step": 112090 }, { "epoch": 2.281933842239186, "grad_norm": 18.38580898410828, "learning_rate": 6.60495708876295e-06, "loss": 0.1611, "step": 112100 }, { "epoch": 2.2821374045801526, "grad_norm": 2.9230486654977126, "learning_rate": 6.604284106110485e-06, "loss": 0.0857, "step": 112110 }, { "epoch": 2.2823409669211197, "grad_norm": 0.11384456013637632, "learning_rate": 6.603611091057435e-06, "loss": 0.037, "step": 112120 }, { "epoch": 2.2825445292620863, "grad_norm": 0.16660078083015992, "learning_rate": 6.6029380436173955e-06, "loss": 0.1063, "step": 112130 }, { "epoch": 2.2827480916030534, "grad_norm": 15.41927870700835, "learning_rate": 6.6022649638039574e-06, "loss": 0.1359, "step": 112140 }, { "epoch": 2.2829516539440204, "grad_norm": 7.58874024221917, "learning_rate": 6.601591851630712e-06, "loss": 0.2722, "step": 112150 }, { "epoch": 2.283155216284987, "grad_norm": 2.2682801569462754, "learning_rate": 6.6009187071112594e-06, "loss": 0.2021, "step": 112160 }, { "epoch": 2.283358778625954, "grad_norm": 3.3927645348600413, "learning_rate": 6.600245530259191e-06, "loss": 0.095, "step": 112170 }, { "epoch": 2.2835623409669212, "grad_norm": 9.723668912601994, "learning_rate": 6.599572321088101e-06, "loss": 0.1231, "step": 112180 }, { "epoch": 2.283765903307888, "grad_norm": 2.954945302506264, "learning_rate": 6.598899079611589e-06, "loss": 0.1313, "step": 112190 }, { "epoch": 2.283969465648855, "grad_norm": 15.192985086833954, "learning_rate": 6.598225805843251e-06, "loss": 0.1811, "step": 112200 }, { "epoch": 2.284173027989822, "grad_norm": 16.402653777248887, "learning_rate": 6.597552499796684e-06, "loss": 0.1204, "step": 112210 }, { "epoch": 2.2843765903307887, "grad_norm": 18.612571290744274, "learning_rate": 6.5968791614854875e-06, "loss": 0.2269, "step": 112220 }, { "epoch": 2.2845801526717557, "grad_norm": 20.116943857476, "learning_rate": 6.596205790923259e-06, "loss": 0.1009, "step": 112230 }, { "epoch": 2.284783715012723, "grad_norm": 2.8819894821340557, "learning_rate": 6.595532388123598e-06, "loss": 0.0476, "step": 112240 }, { "epoch": 2.2849872773536894, "grad_norm": 0.6520336850919084, "learning_rate": 6.594858953100106e-06, "loss": 0.1055, "step": 112250 }, { "epoch": 2.2851908396946565, "grad_norm": 3.3165834948398554, "learning_rate": 6.5941854858663835e-06, "loss": 0.1327, "step": 112260 }, { "epoch": 2.2853944020356236, "grad_norm": 28.042872793351048, "learning_rate": 6.593511986436032e-06, "loss": 0.2189, "step": 112270 }, { "epoch": 2.2855979643765902, "grad_norm": 0.22983350785736717, "learning_rate": 6.592838454822652e-06, "loss": 0.1269, "step": 112280 }, { "epoch": 2.2858015267175573, "grad_norm": 9.990213734657756, "learning_rate": 6.5921648910398495e-06, "loss": 0.1596, "step": 112290 }, { "epoch": 2.2860050890585244, "grad_norm": 19.85455862442426, "learning_rate": 6.591491295101225e-06, "loss": 0.1763, "step": 112300 }, { "epoch": 2.286208651399491, "grad_norm": 15.719439984134429, "learning_rate": 6.590817667020386e-06, "loss": 0.2152, "step": 112310 }, { "epoch": 2.286412213740458, "grad_norm": 6.559603434597134, "learning_rate": 6.590144006810932e-06, "loss": 0.1554, "step": 112320 }, { "epoch": 2.2866157760814247, "grad_norm": 2.205914938403014, "learning_rate": 6.589470314486475e-06, "loss": 0.135, "step": 112330 }, { "epoch": 2.286819338422392, "grad_norm": 0.06981611407665643, "learning_rate": 6.588796590060616e-06, "loss": 0.083, "step": 112340 }, { "epoch": 2.287022900763359, "grad_norm": 16.745164115043828, "learning_rate": 6.5881228335469645e-06, "loss": 0.0812, "step": 112350 }, { "epoch": 2.2872264631043255, "grad_norm": 8.470711335656873, "learning_rate": 6.587449044959127e-06, "loss": 0.1197, "step": 112360 }, { "epoch": 2.2874300254452926, "grad_norm": 17.39257130821238, "learning_rate": 6.5867752243107096e-06, "loss": 0.1865, "step": 112370 }, { "epoch": 2.2876335877862597, "grad_norm": 19.147894990583037, "learning_rate": 6.586101371615324e-06, "loss": 0.1131, "step": 112380 }, { "epoch": 2.2878371501272263, "grad_norm": 11.946963159540372, "learning_rate": 6.585427486886579e-06, "loss": 0.1289, "step": 112390 }, { "epoch": 2.2880407124681934, "grad_norm": 0.198201219749579, "learning_rate": 6.5847535701380824e-06, "loss": 0.0926, "step": 112400 }, { "epoch": 2.2882442748091605, "grad_norm": 0.6694297388082934, "learning_rate": 6.584079621383447e-06, "loss": 0.0918, "step": 112410 }, { "epoch": 2.288447837150127, "grad_norm": 27.165971032837973, "learning_rate": 6.583405640636283e-06, "loss": 0.1446, "step": 112420 }, { "epoch": 2.288651399491094, "grad_norm": 15.996596759937205, "learning_rate": 6.582731627910204e-06, "loss": 0.0939, "step": 112430 }, { "epoch": 2.288854961832061, "grad_norm": 0.026713355225260943, "learning_rate": 6.582057583218819e-06, "loss": 0.1411, "step": 112440 }, { "epoch": 2.289058524173028, "grad_norm": 8.73136826131551, "learning_rate": 6.5813835065757455e-06, "loss": 0.0931, "step": 112450 }, { "epoch": 2.289262086513995, "grad_norm": 15.103465786674507, "learning_rate": 6.580709397994596e-06, "loss": 0.2123, "step": 112460 }, { "epoch": 2.2894656488549616, "grad_norm": 0.0735274284374871, "learning_rate": 6.580035257488981e-06, "loss": 0.1059, "step": 112470 }, { "epoch": 2.2896692111959287, "grad_norm": 39.324508627887674, "learning_rate": 6.579361085072522e-06, "loss": 0.1997, "step": 112480 }, { "epoch": 2.289872773536896, "grad_norm": 0.1346584948988825, "learning_rate": 6.57868688075883e-06, "loss": 0.0538, "step": 112490 }, { "epoch": 2.2900763358778624, "grad_norm": 38.92706604282035, "learning_rate": 6.578012644561524e-06, "loss": 0.1293, "step": 112500 }, { "epoch": 2.2902798982188295, "grad_norm": 0.005754438641460443, "learning_rate": 6.57733837649422e-06, "loss": 0.084, "step": 112510 }, { "epoch": 2.2904834605597966, "grad_norm": 13.389542859764191, "learning_rate": 6.576664076570536e-06, "loss": 0.0881, "step": 112520 }, { "epoch": 2.290687022900763, "grad_norm": 0.10672460215262053, "learning_rate": 6.575989744804088e-06, "loss": 0.1117, "step": 112530 }, { "epoch": 2.2908905852417303, "grad_norm": 26.887586429634954, "learning_rate": 6.5753153812085e-06, "loss": 0.1064, "step": 112540 }, { "epoch": 2.2910941475826974, "grad_norm": 0.20701442587821273, "learning_rate": 6.574640985797388e-06, "loss": 0.0821, "step": 112550 }, { "epoch": 2.291297709923664, "grad_norm": 21.140767476002676, "learning_rate": 6.573966558584372e-06, "loss": 0.1522, "step": 112560 }, { "epoch": 2.291501272264631, "grad_norm": 0.08892598703777335, "learning_rate": 6.573292099583076e-06, "loss": 0.1149, "step": 112570 }, { "epoch": 2.291704834605598, "grad_norm": 0.20265162095008846, "learning_rate": 6.572617608807119e-06, "loss": 0.0473, "step": 112580 }, { "epoch": 2.291908396946565, "grad_norm": 19.75912410069039, "learning_rate": 6.5719430862701226e-06, "loss": 0.0871, "step": 112590 }, { "epoch": 2.292111959287532, "grad_norm": 0.07070984478465611, "learning_rate": 6.571268531985714e-06, "loss": 0.1208, "step": 112600 }, { "epoch": 2.292315521628499, "grad_norm": 0.16884250119434216, "learning_rate": 6.5705939459675105e-06, "loss": 0.0379, "step": 112610 }, { "epoch": 2.2925190839694656, "grad_norm": 6.0784698005832976, "learning_rate": 6.5699193282291396e-06, "loss": 0.1084, "step": 112620 }, { "epoch": 2.2927226463104327, "grad_norm": 9.091269542465348, "learning_rate": 6.569244678784228e-06, "loss": 0.1653, "step": 112630 }, { "epoch": 2.2929262086513997, "grad_norm": 21.238591841230754, "learning_rate": 6.5685699976463975e-06, "loss": 0.1024, "step": 112640 }, { "epoch": 2.2931297709923664, "grad_norm": 17.315916121646435, "learning_rate": 6.567895284829275e-06, "loss": 0.1499, "step": 112650 }, { "epoch": 2.2933333333333334, "grad_norm": 25.92047670015376, "learning_rate": 6.56722054034649e-06, "loss": 0.1526, "step": 112660 }, { "epoch": 2.2935368956743, "grad_norm": 8.244280159413723, "learning_rate": 6.566545764211666e-06, "loss": 0.079, "step": 112670 }, { "epoch": 2.293740458015267, "grad_norm": 9.507596117537117, "learning_rate": 6.565870956438433e-06, "loss": 0.1966, "step": 112680 }, { "epoch": 2.2939440203562342, "grad_norm": 32.8561469790255, "learning_rate": 6.5651961170404175e-06, "loss": 0.1356, "step": 112690 }, { "epoch": 2.294147582697201, "grad_norm": 2.4565737365412987, "learning_rate": 6.564521246031253e-06, "loss": 0.1284, "step": 112700 }, { "epoch": 2.294351145038168, "grad_norm": 42.459232109227344, "learning_rate": 6.563846343424567e-06, "loss": 0.1398, "step": 112710 }, { "epoch": 2.294554707379135, "grad_norm": 5.968561133078098, "learning_rate": 6.563171409233988e-06, "loss": 0.0752, "step": 112720 }, { "epoch": 2.2947582697201017, "grad_norm": 30.048086331448175, "learning_rate": 6.5624964434731485e-06, "loss": 0.1123, "step": 112730 }, { "epoch": 2.2949618320610687, "grad_norm": 24.150877167929828, "learning_rate": 6.561821446155683e-06, "loss": 0.1884, "step": 112740 }, { "epoch": 2.2951653944020354, "grad_norm": 0.4225109450321485, "learning_rate": 6.561146417295221e-06, "loss": 0.0913, "step": 112750 }, { "epoch": 2.2953689567430025, "grad_norm": 5.138412466980034, "learning_rate": 6.560471356905396e-06, "loss": 0.1155, "step": 112760 }, { "epoch": 2.2955725190839695, "grad_norm": 0.3870463925855288, "learning_rate": 6.559796264999842e-06, "loss": 0.0999, "step": 112770 }, { "epoch": 2.295776081424936, "grad_norm": 17.359235300120258, "learning_rate": 6.559121141592194e-06, "loss": 0.2086, "step": 112780 }, { "epoch": 2.2959796437659032, "grad_norm": 19.505553251652845, "learning_rate": 6.558445986696086e-06, "loss": 0.1352, "step": 112790 }, { "epoch": 2.2961832061068703, "grad_norm": 14.01086221797187, "learning_rate": 6.557770800325155e-06, "loss": 0.1613, "step": 112800 }, { "epoch": 2.296386768447837, "grad_norm": 0.15190878547712844, "learning_rate": 6.557095582493034e-06, "loss": 0.0946, "step": 112810 }, { "epoch": 2.296590330788804, "grad_norm": 6.660537947749404, "learning_rate": 6.556420333213364e-06, "loss": 0.0607, "step": 112820 }, { "epoch": 2.296793893129771, "grad_norm": 0.8958902629399943, "learning_rate": 6.555745052499781e-06, "loss": 0.0589, "step": 112830 }, { "epoch": 2.2969974554707377, "grad_norm": 1.242879909487082, "learning_rate": 6.5550697403659225e-06, "loss": 0.1943, "step": 112840 }, { "epoch": 2.297201017811705, "grad_norm": 0.8957932455414581, "learning_rate": 6.5543943968254276e-06, "loss": 0.29, "step": 112850 }, { "epoch": 2.297404580152672, "grad_norm": 4.227403829394773, "learning_rate": 6.553719021891936e-06, "loss": 0.0475, "step": 112860 }, { "epoch": 2.2976081424936385, "grad_norm": 30.712398925134092, "learning_rate": 6.553043615579087e-06, "loss": 0.1811, "step": 112870 }, { "epoch": 2.2978117048346056, "grad_norm": 36.89496975327741, "learning_rate": 6.552368177900521e-06, "loss": 0.1182, "step": 112880 }, { "epoch": 2.2980152671755727, "grad_norm": 1.0037855151770263, "learning_rate": 6.551692708869882e-06, "loss": 0.0586, "step": 112890 }, { "epoch": 2.2982188295165393, "grad_norm": 5.145429915593033, "learning_rate": 6.551017208500809e-06, "loss": 0.0489, "step": 112900 }, { "epoch": 2.2984223918575064, "grad_norm": 0.22372982746397832, "learning_rate": 6.550341676806946e-06, "loss": 0.0815, "step": 112910 }, { "epoch": 2.2986259541984735, "grad_norm": 21.09481949886724, "learning_rate": 6.5496661138019364e-06, "loss": 0.1264, "step": 112920 }, { "epoch": 2.29882951653944, "grad_norm": 37.26461907420676, "learning_rate": 6.5489905194994245e-06, "loss": 0.0894, "step": 112930 }, { "epoch": 2.299033078880407, "grad_norm": 22.994671608217843, "learning_rate": 6.5483148939130526e-06, "loss": 0.2125, "step": 112940 }, { "epoch": 2.2992366412213743, "grad_norm": 41.534497030712934, "learning_rate": 6.547639237056468e-06, "loss": 0.1522, "step": 112950 }, { "epoch": 2.299440203562341, "grad_norm": 37.4484850185997, "learning_rate": 6.546963548943315e-06, "loss": 0.2407, "step": 112960 }, { "epoch": 2.299643765903308, "grad_norm": 1.5220540898318382, "learning_rate": 6.546287829587241e-06, "loss": 0.0768, "step": 112970 }, { "epoch": 2.2998473282442746, "grad_norm": 19.061075410948256, "learning_rate": 6.545612079001893e-06, "loss": 0.1994, "step": 112980 }, { "epoch": 2.3000508905852417, "grad_norm": 9.036304281174958, "learning_rate": 6.544936297200917e-06, "loss": 0.0905, "step": 112990 }, { "epoch": 2.300254452926209, "grad_norm": 0.24068082800817575, "learning_rate": 6.544260484197964e-06, "loss": 0.1428, "step": 113000 }, { "epoch": 2.3004580152671754, "grad_norm": 9.826225154503145, "learning_rate": 6.543584640006682e-06, "loss": 0.1214, "step": 113010 }, { "epoch": 2.3006615776081425, "grad_norm": 15.207468276241745, "learning_rate": 6.542908764640717e-06, "loss": 0.1242, "step": 113020 }, { "epoch": 2.3008651399491096, "grad_norm": 0.0977424094542915, "learning_rate": 6.542232858113726e-06, "loss": 0.1049, "step": 113030 }, { "epoch": 2.301068702290076, "grad_norm": 0.008969477867847452, "learning_rate": 6.541556920439355e-06, "loss": 0.1273, "step": 113040 }, { "epoch": 2.3012722646310433, "grad_norm": 9.116402465734868, "learning_rate": 6.540880951631255e-06, "loss": 0.1166, "step": 113050 }, { "epoch": 2.3014758269720104, "grad_norm": 17.200108766154923, "learning_rate": 6.540204951703081e-06, "loss": 0.2059, "step": 113060 }, { "epoch": 2.301679389312977, "grad_norm": 8.49360122264543, "learning_rate": 6.539528920668483e-06, "loss": 0.1236, "step": 113070 }, { "epoch": 2.301882951653944, "grad_norm": 0.07406215472935268, "learning_rate": 6.538852858541116e-06, "loss": 0.0914, "step": 113080 }, { "epoch": 2.3020865139949107, "grad_norm": 6.589061599249781, "learning_rate": 6.5381767653346354e-06, "loss": 0.131, "step": 113090 }, { "epoch": 2.302290076335878, "grad_norm": 0.5107729898609646, "learning_rate": 6.53750064106269e-06, "loss": 0.1293, "step": 113100 }, { "epoch": 2.302493638676845, "grad_norm": 25.153653667289248, "learning_rate": 6.536824485738942e-06, "loss": 0.3027, "step": 113110 }, { "epoch": 2.3026972010178115, "grad_norm": 11.647549003447887, "learning_rate": 6.536148299377044e-06, "loss": 0.1252, "step": 113120 }, { "epoch": 2.3029007633587786, "grad_norm": 9.210678349056165, "learning_rate": 6.535472081990652e-06, "loss": 0.0716, "step": 113130 }, { "epoch": 2.3031043256997457, "grad_norm": 0.6021053907216002, "learning_rate": 6.534795833593423e-06, "loss": 0.0931, "step": 113140 }, { "epoch": 2.3033078880407123, "grad_norm": 10.635802902292069, "learning_rate": 6.534119554199016e-06, "loss": 0.1552, "step": 113150 }, { "epoch": 2.3035114503816794, "grad_norm": 24.564629728035683, "learning_rate": 6.533443243821089e-06, "loss": 0.1458, "step": 113160 }, { "epoch": 2.3037150127226464, "grad_norm": 1.3383374789532911, "learning_rate": 6.532766902473299e-06, "loss": 0.0694, "step": 113170 }, { "epoch": 2.303918575063613, "grad_norm": 0.10063688950299035, "learning_rate": 6.532090530169308e-06, "loss": 0.191, "step": 113180 }, { "epoch": 2.30412213740458, "grad_norm": 0.8100457790362056, "learning_rate": 6.531414126922775e-06, "loss": 0.0478, "step": 113190 }, { "epoch": 2.3043256997455472, "grad_norm": 1.550574657113857, "learning_rate": 6.530737692747363e-06, "loss": 0.1311, "step": 113200 }, { "epoch": 2.304529262086514, "grad_norm": 26.623939144574507, "learning_rate": 6.530061227656731e-06, "loss": 0.2222, "step": 113210 }, { "epoch": 2.304732824427481, "grad_norm": 0.5481498491465683, "learning_rate": 6.529384731664541e-06, "loss": 0.1586, "step": 113220 }, { "epoch": 2.304936386768448, "grad_norm": 15.309902127975533, "learning_rate": 6.528708204784457e-06, "loss": 0.1071, "step": 113230 }, { "epoch": 2.3051399491094147, "grad_norm": 2.945657733494773, "learning_rate": 6.528031647030142e-06, "loss": 0.1215, "step": 113240 }, { "epoch": 2.3053435114503817, "grad_norm": 9.275185437783215, "learning_rate": 6.527355058415259e-06, "loss": 0.146, "step": 113250 }, { "epoch": 2.305547073791349, "grad_norm": 0.10274190486601237, "learning_rate": 6.526678438953476e-06, "loss": 0.0716, "step": 113260 }, { "epoch": 2.3057506361323155, "grad_norm": 1.9378753665003108, "learning_rate": 6.526001788658451e-06, "loss": 0.1942, "step": 113270 }, { "epoch": 2.3059541984732825, "grad_norm": 19.086772474222403, "learning_rate": 6.5253251075438576e-06, "loss": 0.0732, "step": 113280 }, { "epoch": 2.3061577608142496, "grad_norm": 0.10024751160229943, "learning_rate": 6.524648395623358e-06, "loss": 0.1849, "step": 113290 }, { "epoch": 2.3063613231552162, "grad_norm": 19.92658010702733, "learning_rate": 6.523971652910621e-06, "loss": 0.1937, "step": 113300 }, { "epoch": 2.3065648854961833, "grad_norm": 9.838378612444979, "learning_rate": 6.523294879419313e-06, "loss": 0.2235, "step": 113310 }, { "epoch": 2.30676844783715, "grad_norm": 5.671633539878432, "learning_rate": 6.522618075163104e-06, "loss": 0.1139, "step": 113320 }, { "epoch": 2.306972010178117, "grad_norm": 0.1884696311061837, "learning_rate": 6.521941240155662e-06, "loss": 0.0369, "step": 113330 }, { "epoch": 2.307175572519084, "grad_norm": 0.29265668709924103, "learning_rate": 6.521264374410654e-06, "loss": 0.0839, "step": 113340 }, { "epoch": 2.3073791348600508, "grad_norm": 0.042200519346461726, "learning_rate": 6.520587477941754e-06, "loss": 0.135, "step": 113350 }, { "epoch": 2.307582697201018, "grad_norm": 0.37352785195660004, "learning_rate": 6.519910550762632e-06, "loss": 0.0627, "step": 113360 }, { "epoch": 2.307786259541985, "grad_norm": 0.17943635905055905, "learning_rate": 6.519233592886958e-06, "loss": 0.0619, "step": 113370 }, { "epoch": 2.3079898218829515, "grad_norm": 24.99370079349887, "learning_rate": 6.518556604328407e-06, "loss": 0.2425, "step": 113380 }, { "epoch": 2.3081933842239186, "grad_norm": 0.15454831834560573, "learning_rate": 6.517879585100646e-06, "loss": 0.093, "step": 113390 }, { "epoch": 2.3083969465648853, "grad_norm": 0.21355479279796094, "learning_rate": 6.5172025352173525e-06, "loss": 0.1187, "step": 113400 }, { "epoch": 2.3086005089058523, "grad_norm": 12.363458652427227, "learning_rate": 6.516525454692202e-06, "loss": 0.154, "step": 113410 }, { "epoch": 2.3088040712468194, "grad_norm": 0.04737219493126236, "learning_rate": 6.5158483435388655e-06, "loss": 0.0914, "step": 113420 }, { "epoch": 2.309007633587786, "grad_norm": 0.33297861058971145, "learning_rate": 6.515171201771018e-06, "loss": 0.0979, "step": 113430 }, { "epoch": 2.309211195928753, "grad_norm": 7.690148268864818, "learning_rate": 6.514494029402338e-06, "loss": 0.1673, "step": 113440 }, { "epoch": 2.30941475826972, "grad_norm": 1.4823315023693286, "learning_rate": 6.5138168264465e-06, "loss": 0.0894, "step": 113450 }, { "epoch": 2.309618320610687, "grad_norm": 21.124657028296244, "learning_rate": 6.513139592917182e-06, "loss": 0.2256, "step": 113460 }, { "epoch": 2.309821882951654, "grad_norm": 0.0816569276849155, "learning_rate": 6.51246232882806e-06, "loss": 0.1518, "step": 113470 }, { "epoch": 2.310025445292621, "grad_norm": 1.8993958087141738, "learning_rate": 6.511785034192812e-06, "loss": 0.0835, "step": 113480 }, { "epoch": 2.3102290076335876, "grad_norm": 0.17390219035939838, "learning_rate": 6.5111077090251214e-06, "loss": 0.0791, "step": 113490 }, { "epoch": 2.3104325699745547, "grad_norm": 33.216920935650464, "learning_rate": 6.510430353338662e-06, "loss": 0.1127, "step": 113500 }, { "epoch": 2.310636132315522, "grad_norm": 13.658440364509744, "learning_rate": 6.509752967147116e-06, "loss": 0.1182, "step": 113510 }, { "epoch": 2.3108396946564884, "grad_norm": 1.12845211492919, "learning_rate": 6.509075550464165e-06, "loss": 0.0715, "step": 113520 }, { "epoch": 2.3110432569974555, "grad_norm": 9.045298231048847, "learning_rate": 6.50839810330349e-06, "loss": 0.1865, "step": 113530 }, { "epoch": 2.3112468193384226, "grad_norm": 32.871136242230584, "learning_rate": 6.5077206256787725e-06, "loss": 0.1756, "step": 113540 }, { "epoch": 2.311450381679389, "grad_norm": 12.035054226004334, "learning_rate": 6.507043117603695e-06, "loss": 0.183, "step": 113550 }, { "epoch": 2.3116539440203563, "grad_norm": 16.602985442452276, "learning_rate": 6.50636557909194e-06, "loss": 0.1884, "step": 113560 }, { "epoch": 2.3118575063613234, "grad_norm": 0.2980583545089848, "learning_rate": 6.505688010157193e-06, "loss": 0.0704, "step": 113570 }, { "epoch": 2.31206106870229, "grad_norm": 11.7008821977986, "learning_rate": 6.505010410813137e-06, "loss": 0.2015, "step": 113580 }, { "epoch": 2.312264631043257, "grad_norm": 14.739441171827924, "learning_rate": 6.504332781073457e-06, "loss": 0.2472, "step": 113590 }, { "epoch": 2.312468193384224, "grad_norm": 0.1013527233899463, "learning_rate": 6.503655120951837e-06, "loss": 0.0802, "step": 113600 }, { "epoch": 2.312671755725191, "grad_norm": 0.2947137220883112, "learning_rate": 6.502977430461968e-06, "loss": 0.1303, "step": 113610 }, { "epoch": 2.312875318066158, "grad_norm": 23.899062573091232, "learning_rate": 6.5022997096175345e-06, "loss": 0.0949, "step": 113620 }, { "epoch": 2.3130788804071245, "grad_norm": 1.2965900088491735, "learning_rate": 6.501621958432221e-06, "loss": 0.0656, "step": 113630 }, { "epoch": 2.3132824427480916, "grad_norm": 15.393109477990285, "learning_rate": 6.5009441769197194e-06, "loss": 0.1175, "step": 113640 }, { "epoch": 2.3134860050890587, "grad_norm": 17.528546903965914, "learning_rate": 6.500266365093716e-06, "loss": 0.1373, "step": 113650 }, { "epoch": 2.3136895674300253, "grad_norm": 31.939289504674964, "learning_rate": 6.499588522967901e-06, "loss": 0.1193, "step": 113660 }, { "epoch": 2.3138931297709924, "grad_norm": 0.28360837831009894, "learning_rate": 6.498910650555966e-06, "loss": 0.1317, "step": 113670 }, { "epoch": 2.3140966921119595, "grad_norm": 7.03659651616753, "learning_rate": 6.498232747871598e-06, "loss": 0.0805, "step": 113680 }, { "epoch": 2.314300254452926, "grad_norm": 5.261616471904366, "learning_rate": 6.4975548149284905e-06, "loss": 0.1097, "step": 113690 }, { "epoch": 2.314503816793893, "grad_norm": 14.498778273160516, "learning_rate": 6.496876851740334e-06, "loss": 0.2233, "step": 113700 }, { "epoch": 2.3147073791348602, "grad_norm": 0.006436120865291196, "learning_rate": 6.4961988583208235e-06, "loss": 0.0695, "step": 113710 }, { "epoch": 2.314910941475827, "grad_norm": 1.8564006286091008, "learning_rate": 6.495520834683649e-06, "loss": 0.1657, "step": 113720 }, { "epoch": 2.315114503816794, "grad_norm": 0.28472256672410007, "learning_rate": 6.494842780842504e-06, "loss": 0.1479, "step": 113730 }, { "epoch": 2.3153180661577606, "grad_norm": 14.827533033446556, "learning_rate": 6.494164696811085e-06, "loss": 0.1277, "step": 113740 }, { "epoch": 2.3155216284987277, "grad_norm": 0.037329743895933284, "learning_rate": 6.493486582603085e-06, "loss": 0.0474, "step": 113750 }, { "epoch": 2.3157251908396947, "grad_norm": 0.3172179057285006, "learning_rate": 6.492808438232199e-06, "loss": 0.1332, "step": 113760 }, { "epoch": 2.3159287531806614, "grad_norm": 7.7552072114491395, "learning_rate": 6.4921302637121246e-06, "loss": 0.1768, "step": 113770 }, { "epoch": 2.3161323155216285, "grad_norm": 0.09377936533570339, "learning_rate": 6.491452059056559e-06, "loss": 0.0972, "step": 113780 }, { "epoch": 2.3163358778625955, "grad_norm": 0.5253294619487253, "learning_rate": 6.490773824279196e-06, "loss": 0.1627, "step": 113790 }, { "epoch": 2.316539440203562, "grad_norm": 6.559890544870432, "learning_rate": 6.490095559393735e-06, "loss": 0.0782, "step": 113800 }, { "epoch": 2.3167430025445293, "grad_norm": 11.716266026453974, "learning_rate": 6.489417264413877e-06, "loss": 0.1297, "step": 113810 }, { "epoch": 2.3169465648854963, "grad_norm": 1.3326959869813972, "learning_rate": 6.48873893935332e-06, "loss": 0.0318, "step": 113820 }, { "epoch": 2.317150127226463, "grad_norm": 0.2284968510812143, "learning_rate": 6.4880605842257605e-06, "loss": 0.1533, "step": 113830 }, { "epoch": 2.31735368956743, "grad_norm": 1.2742540268655194, "learning_rate": 6.487382199044901e-06, "loss": 0.0959, "step": 113840 }, { "epoch": 2.317557251908397, "grad_norm": 0.8004936032710707, "learning_rate": 6.486703783824442e-06, "loss": 0.1693, "step": 113850 }, { "epoch": 2.3177608142493638, "grad_norm": 9.779752417249552, "learning_rate": 6.486025338578086e-06, "loss": 0.2259, "step": 113860 }, { "epoch": 2.317964376590331, "grad_norm": 15.005439168535275, "learning_rate": 6.4853468633195345e-06, "loss": 0.115, "step": 113870 }, { "epoch": 2.318167938931298, "grad_norm": 9.535562465154472, "learning_rate": 6.4846683580624896e-06, "loss": 0.1125, "step": 113880 }, { "epoch": 2.3183715012722645, "grad_norm": 10.117259014900215, "learning_rate": 6.4839898228206545e-06, "loss": 0.1394, "step": 113890 }, { "epoch": 2.3185750636132316, "grad_norm": 0.19142859012941274, "learning_rate": 6.483311257607735e-06, "loss": 0.1259, "step": 113900 }, { "epoch": 2.3187786259541987, "grad_norm": 6.688054696924993, "learning_rate": 6.482632662437433e-06, "loss": 0.0831, "step": 113910 }, { "epoch": 2.3189821882951653, "grad_norm": 2.4991629280006746, "learning_rate": 6.481954037323455e-06, "loss": 0.0623, "step": 113920 }, { "epoch": 2.3191857506361324, "grad_norm": 1.7876483797569818, "learning_rate": 6.481275382279506e-06, "loss": 0.1554, "step": 113930 }, { "epoch": 2.319389312977099, "grad_norm": 33.48946078064289, "learning_rate": 6.480596697319294e-06, "loss": 0.1322, "step": 113940 }, { "epoch": 2.319592875318066, "grad_norm": 0.21904118147201873, "learning_rate": 6.479917982456523e-06, "loss": 0.1539, "step": 113950 }, { "epoch": 2.319796437659033, "grad_norm": 7.471891718778102, "learning_rate": 6.479239237704904e-06, "loss": 0.1229, "step": 113960 }, { "epoch": 2.32, "grad_norm": 29.123449620365395, "learning_rate": 6.478560463078142e-06, "loss": 0.0477, "step": 113970 }, { "epoch": 2.320203562340967, "grad_norm": 3.940524134808815, "learning_rate": 6.477881658589949e-06, "loss": 0.0375, "step": 113980 }, { "epoch": 2.320407124681934, "grad_norm": 2.7075317484889925, "learning_rate": 6.47720282425403e-06, "loss": 0.0608, "step": 113990 }, { "epoch": 2.3206106870229006, "grad_norm": 0.19680672870544108, "learning_rate": 6.476523960084098e-06, "loss": 0.2501, "step": 114000 }, { "epoch": 2.3208142493638677, "grad_norm": 16.6417036396817, "learning_rate": 6.475845066093863e-06, "loss": 0.2637, "step": 114010 }, { "epoch": 2.321017811704835, "grad_norm": 0.03421215818296624, "learning_rate": 6.4751661422970355e-06, "loss": 0.1327, "step": 114020 }, { "epoch": 2.3212213740458014, "grad_norm": 0.1785074995957194, "learning_rate": 6.474487188707329e-06, "loss": 0.1082, "step": 114030 }, { "epoch": 2.3214249363867685, "grad_norm": 1.373735709240679, "learning_rate": 6.4738082053384555e-06, "loss": 0.0854, "step": 114040 }, { "epoch": 2.321628498727735, "grad_norm": 10.579239548540391, "learning_rate": 6.473129192204124e-06, "loss": 0.1707, "step": 114050 }, { "epoch": 2.321832061068702, "grad_norm": 3.4844046942701246, "learning_rate": 6.472450149318052e-06, "loss": 0.1984, "step": 114060 }, { "epoch": 2.3220356234096693, "grad_norm": 22.247854656942494, "learning_rate": 6.471771076693956e-06, "loss": 0.1386, "step": 114070 }, { "epoch": 2.322239185750636, "grad_norm": 0.0490720624510183, "learning_rate": 6.471091974345543e-06, "loss": 0.0808, "step": 114080 }, { "epoch": 2.322442748091603, "grad_norm": 0.40213789401414, "learning_rate": 6.470412842286534e-06, "loss": 0.1391, "step": 114090 }, { "epoch": 2.32264631043257, "grad_norm": 10.744820663858727, "learning_rate": 6.469733680530645e-06, "loss": 0.0908, "step": 114100 }, { "epoch": 2.3228498727735367, "grad_norm": 9.92991294446493, "learning_rate": 6.469054489091592e-06, "loss": 0.1127, "step": 114110 }, { "epoch": 2.323053435114504, "grad_norm": 17.441681402996768, "learning_rate": 6.46837526798309e-06, "loss": 0.2053, "step": 114120 }, { "epoch": 2.323256997455471, "grad_norm": 19.958692886227816, "learning_rate": 6.467696017218858e-06, "loss": 0.1511, "step": 114130 }, { "epoch": 2.3234605597964375, "grad_norm": 6.848766922544712, "learning_rate": 6.467016736812614e-06, "loss": 0.146, "step": 114140 }, { "epoch": 2.3236641221374046, "grad_norm": 0.04584532157347648, "learning_rate": 6.46633742677808e-06, "loss": 0.0953, "step": 114150 }, { "epoch": 2.3238676844783717, "grad_norm": 61.84924876333046, "learning_rate": 6.465658087128972e-06, "loss": 0.0603, "step": 114160 }, { "epoch": 2.3240712468193383, "grad_norm": 0.0598565393352335, "learning_rate": 6.46497871787901e-06, "loss": 0.1683, "step": 114170 }, { "epoch": 2.3242748091603054, "grad_norm": 11.602279014902601, "learning_rate": 6.464299319041918e-06, "loss": 0.1123, "step": 114180 }, { "epoch": 2.3244783715012725, "grad_norm": 2.8956018230827696, "learning_rate": 6.463619890631414e-06, "loss": 0.1008, "step": 114190 }, { "epoch": 2.324681933842239, "grad_norm": 0.08559515891255114, "learning_rate": 6.46294043266122e-06, "loss": 0.1534, "step": 114200 }, { "epoch": 2.324885496183206, "grad_norm": 17.15004019038262, "learning_rate": 6.462260945145061e-06, "loss": 0.1546, "step": 114210 }, { "epoch": 2.3250890585241732, "grad_norm": 32.03841894879054, "learning_rate": 6.461581428096659e-06, "loss": 0.1799, "step": 114220 }, { "epoch": 2.32529262086514, "grad_norm": 12.976132451129198, "learning_rate": 6.4609018815297376e-06, "loss": 0.1233, "step": 114230 }, { "epoch": 2.325496183206107, "grad_norm": 15.133675269761696, "learning_rate": 6.4602223054580215e-06, "loss": 0.1772, "step": 114240 }, { "epoch": 2.325699745547074, "grad_norm": 8.39468533759974, "learning_rate": 6.459542699895232e-06, "loss": 0.1379, "step": 114250 }, { "epoch": 2.3259033078880407, "grad_norm": 9.59904087626593, "learning_rate": 6.458863064855101e-06, "loss": 0.1611, "step": 114260 }, { "epoch": 2.3261068702290078, "grad_norm": 0.2786119619123327, "learning_rate": 6.458183400351351e-06, "loss": 0.1182, "step": 114270 }, { "epoch": 2.3263104325699744, "grad_norm": 4.023065634365035, "learning_rate": 6.457503706397708e-06, "loss": 0.1828, "step": 114280 }, { "epoch": 2.3265139949109415, "grad_norm": 6.633768843008015, "learning_rate": 6.456823983007902e-06, "loss": 0.1308, "step": 114290 }, { "epoch": 2.3267175572519085, "grad_norm": 18.61227230605683, "learning_rate": 6.4561442301956575e-06, "loss": 0.1648, "step": 114300 }, { "epoch": 2.326921119592875, "grad_norm": 0.1803608080199708, "learning_rate": 6.455464447974707e-06, "loss": 0.101, "step": 114310 }, { "epoch": 2.3271246819338423, "grad_norm": 4.298599764084195, "learning_rate": 6.454784636358775e-06, "loss": 0.1254, "step": 114320 }, { "epoch": 2.3273282442748093, "grad_norm": 4.070396657038285, "learning_rate": 6.454104795361595e-06, "loss": 0.1156, "step": 114330 }, { "epoch": 2.327531806615776, "grad_norm": 11.62611976038055, "learning_rate": 6.453424924996895e-06, "loss": 0.1461, "step": 114340 }, { "epoch": 2.327735368956743, "grad_norm": 3.7276003794967014, "learning_rate": 6.452745025278406e-06, "loss": 0.0844, "step": 114350 }, { "epoch": 2.3279389312977097, "grad_norm": 16.81210889657068, "learning_rate": 6.452065096219863e-06, "loss": 0.0963, "step": 114360 }, { "epoch": 2.3281424936386768, "grad_norm": 8.548257269906218, "learning_rate": 6.451385137834992e-06, "loss": 0.1365, "step": 114370 }, { "epoch": 2.328346055979644, "grad_norm": 4.6936606717734595, "learning_rate": 6.45070515013753e-06, "loss": 0.117, "step": 114380 }, { "epoch": 2.3285496183206105, "grad_norm": 20.891604455951043, "learning_rate": 6.4500251331412085e-06, "loss": 0.1282, "step": 114390 }, { "epoch": 2.3287531806615775, "grad_norm": 0.13738085367347938, "learning_rate": 6.449345086859763e-06, "loss": 0.1504, "step": 114400 }, { "epoch": 2.3289567430025446, "grad_norm": 2.430101383873018, "learning_rate": 6.448665011306926e-06, "loss": 0.1245, "step": 114410 }, { "epoch": 2.3291603053435113, "grad_norm": 8.873945516926097, "learning_rate": 6.4479849064964325e-06, "loss": 0.1795, "step": 114420 }, { "epoch": 2.3293638676844783, "grad_norm": 15.804337396594075, "learning_rate": 6.44730477244202e-06, "loss": 0.1401, "step": 114430 }, { "epoch": 2.3295674300254454, "grad_norm": 0.05282602285900497, "learning_rate": 6.446624609157423e-06, "loss": 0.0803, "step": 114440 }, { "epoch": 2.329770992366412, "grad_norm": 4.045187161801734, "learning_rate": 6.44594441665638e-06, "loss": 0.1786, "step": 114450 }, { "epoch": 2.329974554707379, "grad_norm": 1.4193966930843236, "learning_rate": 6.445264194952624e-06, "loss": 0.1764, "step": 114460 }, { "epoch": 2.330178117048346, "grad_norm": 16.435467336356876, "learning_rate": 6.444583944059899e-06, "loss": 0.1565, "step": 114470 }, { "epoch": 2.330381679389313, "grad_norm": 1.743049358558463, "learning_rate": 6.44390366399194e-06, "loss": 0.0588, "step": 114480 }, { "epoch": 2.33058524173028, "grad_norm": 0.04022860103524637, "learning_rate": 6.4432233547624866e-06, "loss": 0.0961, "step": 114490 }, { "epoch": 2.330788804071247, "grad_norm": 25.88520375529228, "learning_rate": 6.442543016385279e-06, "loss": 0.1191, "step": 114500 }, { "epoch": 2.3309923664122136, "grad_norm": 0.07940585131575041, "learning_rate": 6.441862648874055e-06, "loss": 0.0855, "step": 114510 }, { "epoch": 2.3311959287531807, "grad_norm": 17.766606719590808, "learning_rate": 6.4411822522425604e-06, "loss": 0.1582, "step": 114520 }, { "epoch": 2.331399491094148, "grad_norm": 5.9156276513340105, "learning_rate": 6.4405018265045335e-06, "loss": 0.1131, "step": 114530 }, { "epoch": 2.3316030534351144, "grad_norm": 6.4568592735374954, "learning_rate": 6.439821371673716e-06, "loss": 0.0583, "step": 114540 }, { "epoch": 2.3318066157760815, "grad_norm": 0.4244457518875084, "learning_rate": 6.439140887763851e-06, "loss": 0.1306, "step": 114550 }, { "epoch": 2.3320101781170486, "grad_norm": 6.549117703812473, "learning_rate": 6.438460374788684e-06, "loss": 0.0633, "step": 114560 }, { "epoch": 2.332213740458015, "grad_norm": 7.673552295872383, "learning_rate": 6.437779832761957e-06, "loss": 0.2069, "step": 114570 }, { "epoch": 2.3324173027989823, "grad_norm": 19.060554414242223, "learning_rate": 6.437099261697412e-06, "loss": 0.196, "step": 114580 }, { "epoch": 2.332620865139949, "grad_norm": 5.343079725984093, "learning_rate": 6.436418661608799e-06, "loss": 0.2276, "step": 114590 }, { "epoch": 2.332824427480916, "grad_norm": 0.7761376355084123, "learning_rate": 6.43573803250986e-06, "loss": 0.0969, "step": 114600 }, { "epoch": 2.333027989821883, "grad_norm": 27.49940130630267, "learning_rate": 6.435057374414342e-06, "loss": 0.2415, "step": 114610 }, { "epoch": 2.3332315521628497, "grad_norm": 12.962161545157471, "learning_rate": 6.434376687335991e-06, "loss": 0.2027, "step": 114620 }, { "epoch": 2.333435114503817, "grad_norm": 0.12595104171358515, "learning_rate": 6.433695971288556e-06, "loss": 0.095, "step": 114630 }, { "epoch": 2.333638676844784, "grad_norm": 4.146992970618692, "learning_rate": 6.433015226285785e-06, "loss": 0.0868, "step": 114640 }, { "epoch": 2.3338422391857505, "grad_norm": 13.301159050193318, "learning_rate": 6.4323344523414265e-06, "loss": 0.2222, "step": 114650 }, { "epoch": 2.3340458015267176, "grad_norm": 13.749596349575446, "learning_rate": 6.431653649469228e-06, "loss": 0.0727, "step": 114660 }, { "epoch": 2.3342493638676847, "grad_norm": 11.505132485848653, "learning_rate": 6.43097281768294e-06, "loss": 0.1547, "step": 114670 }, { "epoch": 2.3344529262086513, "grad_norm": 12.36415221363549, "learning_rate": 6.430291956996313e-06, "loss": 0.1375, "step": 114680 }, { "epoch": 2.3346564885496184, "grad_norm": 3.581755870348392, "learning_rate": 6.429611067423097e-06, "loss": 0.1788, "step": 114690 }, { "epoch": 2.334860050890585, "grad_norm": 11.649077971678231, "learning_rate": 6.428930148977045e-06, "loss": 0.1107, "step": 114700 }, { "epoch": 2.335063613231552, "grad_norm": 8.56618368594976, "learning_rate": 6.428249201671908e-06, "loss": 0.1686, "step": 114710 }, { "epoch": 2.335267175572519, "grad_norm": 12.21759654076224, "learning_rate": 6.427568225521439e-06, "loss": 0.1692, "step": 114720 }, { "epoch": 2.335470737913486, "grad_norm": 0.02917580769553514, "learning_rate": 6.426887220539391e-06, "loss": 0.0513, "step": 114730 }, { "epoch": 2.335674300254453, "grad_norm": 10.056544222786627, "learning_rate": 6.426206186739518e-06, "loss": 0.1557, "step": 114740 }, { "epoch": 2.33587786259542, "grad_norm": 5.88468862406504, "learning_rate": 6.425525124135572e-06, "loss": 0.0754, "step": 114750 }, { "epoch": 2.3360814249363866, "grad_norm": 0.677680324668234, "learning_rate": 6.4248440327413134e-06, "loss": 0.1495, "step": 114760 }, { "epoch": 2.3362849872773537, "grad_norm": 10.998487961295158, "learning_rate": 6.4241629125704915e-06, "loss": 0.1179, "step": 114770 }, { "epoch": 2.3364885496183208, "grad_norm": 3.4478138647099343, "learning_rate": 6.4234817636368676e-06, "loss": 0.1001, "step": 114780 }, { "epoch": 2.3366921119592874, "grad_norm": 0.6642706071567832, "learning_rate": 6.422800585954194e-06, "loss": 0.1496, "step": 114790 }, { "epoch": 2.3368956743002545, "grad_norm": 1.3807827939708741, "learning_rate": 6.422119379536231e-06, "loss": 0.0599, "step": 114800 }, { "epoch": 2.3370992366412215, "grad_norm": 2.4343683181500855, "learning_rate": 6.421438144396735e-06, "loss": 0.0776, "step": 114810 }, { "epoch": 2.337302798982188, "grad_norm": 0.10974696646085152, "learning_rate": 6.420756880549466e-06, "loss": 0.1059, "step": 114820 }, { "epoch": 2.3375063613231553, "grad_norm": 8.257172663167564, "learning_rate": 6.42007558800818e-06, "loss": 0.1905, "step": 114830 }, { "epoch": 2.3377099236641223, "grad_norm": 22.045068112410902, "learning_rate": 6.41939426678664e-06, "loss": 0.074, "step": 114840 }, { "epoch": 2.337913486005089, "grad_norm": 10.703005772765193, "learning_rate": 6.418712916898605e-06, "loss": 0.0648, "step": 114850 }, { "epoch": 2.338117048346056, "grad_norm": 11.372704666926042, "learning_rate": 6.418031538357834e-06, "loss": 0.2667, "step": 114860 }, { "epoch": 2.338320610687023, "grad_norm": 6.55540754040613, "learning_rate": 6.417350131178091e-06, "loss": 0.0909, "step": 114870 }, { "epoch": 2.3385241730279898, "grad_norm": 6.142565760114891, "learning_rate": 6.416668695373135e-06, "loss": 0.2131, "step": 114880 }, { "epoch": 2.338727735368957, "grad_norm": 0.45844169072461444, "learning_rate": 6.41598723095673e-06, "loss": 0.1097, "step": 114890 }, { "epoch": 2.3389312977099235, "grad_norm": 2.3384267607557745, "learning_rate": 6.415305737942641e-06, "loss": 0.0847, "step": 114900 }, { "epoch": 2.3391348600508906, "grad_norm": 2.4510102198682118, "learning_rate": 6.414624216344628e-06, "loss": 0.0658, "step": 114910 }, { "epoch": 2.3393384223918576, "grad_norm": 0.3200310505536669, "learning_rate": 6.413942666176458e-06, "loss": 0.1209, "step": 114920 }, { "epoch": 2.3395419847328243, "grad_norm": 17.296664588976853, "learning_rate": 6.413261087451894e-06, "loss": 0.1362, "step": 114930 }, { "epoch": 2.3397455470737913, "grad_norm": 0.2513743997190367, "learning_rate": 6.412579480184704e-06, "loss": 0.0984, "step": 114940 }, { "epoch": 2.3399491094147584, "grad_norm": 15.062438848802927, "learning_rate": 6.411897844388648e-06, "loss": 0.1867, "step": 114950 }, { "epoch": 2.340152671755725, "grad_norm": 4.3382012671411445, "learning_rate": 6.4112161800775005e-06, "loss": 0.2098, "step": 114960 }, { "epoch": 2.340356234096692, "grad_norm": 11.449873123137165, "learning_rate": 6.410534487265023e-06, "loss": 0.0792, "step": 114970 }, { "epoch": 2.340559796437659, "grad_norm": 0.21347249743703056, "learning_rate": 6.409852765964985e-06, "loss": 0.0842, "step": 114980 }, { "epoch": 2.340763358778626, "grad_norm": 14.302951080286098, "learning_rate": 6.409171016191154e-06, "loss": 0.1835, "step": 114990 }, { "epoch": 2.340966921119593, "grad_norm": 2.9378880967162986, "learning_rate": 6.408489237957299e-06, "loss": 0.0921, "step": 115000 }, { "epoch": 2.3411704834605596, "grad_norm": 3.5880070591535556, "learning_rate": 6.407807431277191e-06, "loss": 0.0997, "step": 115010 }, { "epoch": 2.3413740458015266, "grad_norm": 4.499885521090116, "learning_rate": 6.407125596164598e-06, "loss": 0.1921, "step": 115020 }, { "epoch": 2.3415776081424937, "grad_norm": 12.17770832902591, "learning_rate": 6.406443732633292e-06, "loss": 0.1481, "step": 115030 }, { "epoch": 2.3417811704834604, "grad_norm": 5.339621207986726, "learning_rate": 6.405761840697041e-06, "loss": 0.1097, "step": 115040 }, { "epoch": 2.3419847328244274, "grad_norm": 0.37081651434832263, "learning_rate": 6.405079920369621e-06, "loss": 0.1209, "step": 115050 }, { "epoch": 2.3421882951653945, "grad_norm": 7.931958217218968, "learning_rate": 6.404397971664802e-06, "loss": 0.0921, "step": 115060 }, { "epoch": 2.342391857506361, "grad_norm": 15.003635006586766, "learning_rate": 6.403715994596358e-06, "loss": 0.0683, "step": 115070 }, { "epoch": 2.342595419847328, "grad_norm": 3.8465618097952086, "learning_rate": 6.403033989178061e-06, "loss": 0.1165, "step": 115080 }, { "epoch": 2.3427989821882953, "grad_norm": 0.02465246912236942, "learning_rate": 6.4023519554236835e-06, "loss": 0.0577, "step": 115090 }, { "epoch": 2.343002544529262, "grad_norm": 0.8367203883718023, "learning_rate": 6.401669893347003e-06, "loss": 0.1349, "step": 115100 }, { "epoch": 2.343206106870229, "grad_norm": 8.289485511362525, "learning_rate": 6.4009878029617955e-06, "loss": 0.0776, "step": 115110 }, { "epoch": 2.343409669211196, "grad_norm": 0.13941456110505251, "learning_rate": 6.400305684281833e-06, "loss": 0.1803, "step": 115120 }, { "epoch": 2.3436132315521627, "grad_norm": 1.5098960297106925, "learning_rate": 6.399623537320894e-06, "loss": 0.089, "step": 115130 }, { "epoch": 2.34381679389313, "grad_norm": 30.206412216964026, "learning_rate": 6.398941362092757e-06, "loss": 0.1622, "step": 115140 }, { "epoch": 2.344020356234097, "grad_norm": 13.096237070100988, "learning_rate": 6.398259158611194e-06, "loss": 0.1734, "step": 115150 }, { "epoch": 2.3442239185750635, "grad_norm": 20.967850686298846, "learning_rate": 6.397576926889988e-06, "loss": 0.1345, "step": 115160 }, { "epoch": 2.3444274809160306, "grad_norm": 1.2553492675081885, "learning_rate": 6.396894666942916e-06, "loss": 0.0846, "step": 115170 }, { "epoch": 2.3446310432569977, "grad_norm": 4.366322872616322, "learning_rate": 6.3962123787837575e-06, "loss": 0.1101, "step": 115180 }, { "epoch": 2.3448346055979643, "grad_norm": 19.275910086253745, "learning_rate": 6.39553006242629e-06, "loss": 0.0827, "step": 115190 }, { "epoch": 2.3450381679389314, "grad_norm": 31.579408264094134, "learning_rate": 6.394847717884296e-06, "loss": 0.2012, "step": 115200 }, { "epoch": 2.3452417302798985, "grad_norm": 14.085158269169732, "learning_rate": 6.394165345171557e-06, "loss": 0.0487, "step": 115210 }, { "epoch": 2.345445292620865, "grad_norm": 1.1529701707463653, "learning_rate": 6.393482944301853e-06, "loss": 0.0635, "step": 115220 }, { "epoch": 2.345648854961832, "grad_norm": 20.725500023460693, "learning_rate": 6.392800515288965e-06, "loss": 0.1457, "step": 115230 }, { "epoch": 2.345852417302799, "grad_norm": 0.7831650463781407, "learning_rate": 6.392118058146677e-06, "loss": 0.1592, "step": 115240 }, { "epoch": 2.346055979643766, "grad_norm": 0.12233354952881274, "learning_rate": 6.391435572888772e-06, "loss": 0.1429, "step": 115250 }, { "epoch": 2.346259541984733, "grad_norm": 22.224334924740432, "learning_rate": 6.390753059529034e-06, "loss": 0.1445, "step": 115260 }, { "epoch": 2.3464631043256996, "grad_norm": 0.7005728934843168, "learning_rate": 6.390070518081246e-06, "loss": 0.0876, "step": 115270 }, { "epoch": 2.3466666666666667, "grad_norm": 6.406723968817172, "learning_rate": 6.3893879485591935e-06, "loss": 0.0969, "step": 115280 }, { "epoch": 2.3468702290076338, "grad_norm": 12.406630653675975, "learning_rate": 6.388705350976662e-06, "loss": 0.0794, "step": 115290 }, { "epoch": 2.3470737913486004, "grad_norm": 12.875145957925694, "learning_rate": 6.388022725347437e-06, "loss": 0.2053, "step": 115300 }, { "epoch": 2.3472773536895675, "grad_norm": 11.805776850106266, "learning_rate": 6.387340071685306e-06, "loss": 0.0511, "step": 115310 }, { "epoch": 2.347480916030534, "grad_norm": 13.2350573025114, "learning_rate": 6.386657390004054e-06, "loss": 0.0658, "step": 115320 }, { "epoch": 2.347684478371501, "grad_norm": 0.2746695528297098, "learning_rate": 6.385974680317471e-06, "loss": 0.2183, "step": 115330 }, { "epoch": 2.3478880407124683, "grad_norm": 23.977420756495007, "learning_rate": 6.385291942639346e-06, "loss": 0.1258, "step": 115340 }, { "epoch": 2.348091603053435, "grad_norm": 1.1180760978706692, "learning_rate": 6.384609176983463e-06, "loss": 0.1161, "step": 115350 }, { "epoch": 2.348295165394402, "grad_norm": 1.6353698137383967, "learning_rate": 6.383926383363616e-06, "loss": 0.1985, "step": 115360 }, { "epoch": 2.348498727735369, "grad_norm": 20.881850011766865, "learning_rate": 6.383243561793593e-06, "loss": 0.1107, "step": 115370 }, { "epoch": 2.3487022900763357, "grad_norm": 1.9125684514105246, "learning_rate": 6.3825607122871834e-06, "loss": 0.1502, "step": 115380 }, { "epoch": 2.3489058524173028, "grad_norm": 0.06762281647220747, "learning_rate": 6.381877834858181e-06, "loss": 0.1525, "step": 115390 }, { "epoch": 2.34910941475827, "grad_norm": 0.18939306640703088, "learning_rate": 6.381194929520376e-06, "loss": 0.1463, "step": 115400 }, { "epoch": 2.3493129770992365, "grad_norm": 8.989951300352892, "learning_rate": 6.380511996287558e-06, "loss": 0.0673, "step": 115410 }, { "epoch": 2.3495165394402036, "grad_norm": 0.10341843916519994, "learning_rate": 6.379829035173524e-06, "loss": 0.1189, "step": 115420 }, { "epoch": 2.3497201017811706, "grad_norm": 15.930459301981436, "learning_rate": 6.379146046192066e-06, "loss": 0.064, "step": 115430 }, { "epoch": 2.3499236641221373, "grad_norm": 15.948597112335195, "learning_rate": 6.378463029356977e-06, "loss": 0.1132, "step": 115440 }, { "epoch": 2.3501272264631043, "grad_norm": 0.06749821416450705, "learning_rate": 6.377779984682051e-06, "loss": 0.1044, "step": 115450 }, { "epoch": 2.3503307888040714, "grad_norm": 25.125651492611166, "learning_rate": 6.377096912181085e-06, "loss": 0.13, "step": 115460 }, { "epoch": 2.350534351145038, "grad_norm": 1.511623445877769, "learning_rate": 6.376413811867871e-06, "loss": 0.0969, "step": 115470 }, { "epoch": 2.350737913486005, "grad_norm": 2.142432410578811, "learning_rate": 6.375730683756208e-06, "loss": 0.1207, "step": 115480 }, { "epoch": 2.350941475826972, "grad_norm": 15.086517582766318, "learning_rate": 6.375047527859892e-06, "loss": 0.0585, "step": 115490 }, { "epoch": 2.351145038167939, "grad_norm": 9.611368804378715, "learning_rate": 6.374364344192721e-06, "loss": 0.2105, "step": 115500 }, { "epoch": 2.351348600508906, "grad_norm": 11.769674214034602, "learning_rate": 6.373681132768492e-06, "loss": 0.1075, "step": 115510 }, { "epoch": 2.351552162849873, "grad_norm": 10.40627698234901, "learning_rate": 6.372997893601003e-06, "loss": 0.1488, "step": 115520 }, { "epoch": 2.3517557251908396, "grad_norm": 5.475719475355933, "learning_rate": 6.3723146267040515e-06, "loss": 0.1287, "step": 115530 }, { "epoch": 2.3519592875318067, "grad_norm": 13.814977366729952, "learning_rate": 6.3716313320914404e-06, "loss": 0.05, "step": 115540 }, { "epoch": 2.3521628498727734, "grad_norm": 0.044879683045585254, "learning_rate": 6.3709480097769675e-06, "loss": 0.0741, "step": 115550 }, { "epoch": 2.3523664122137404, "grad_norm": 0.05940289473314456, "learning_rate": 6.370264659774433e-06, "loss": 0.1579, "step": 115560 }, { "epoch": 2.3525699745547075, "grad_norm": 0.3908085796348547, "learning_rate": 6.369581282097639e-06, "loss": 0.0693, "step": 115570 }, { "epoch": 2.352773536895674, "grad_norm": 48.15198598836711, "learning_rate": 6.368897876760387e-06, "loss": 0.1539, "step": 115580 }, { "epoch": 2.3529770992366412, "grad_norm": 0.21881889742969937, "learning_rate": 6.36821444377648e-06, "loss": 0.1532, "step": 115590 }, { "epoch": 2.3531806615776083, "grad_norm": 8.969829163481826, "learning_rate": 6.36753098315972e-06, "loss": 0.1795, "step": 115600 }, { "epoch": 2.353384223918575, "grad_norm": 26.251169005925462, "learning_rate": 6.366847494923909e-06, "loss": 0.0816, "step": 115610 }, { "epoch": 2.353587786259542, "grad_norm": 0.7388359155418001, "learning_rate": 6.366163979082853e-06, "loss": 0.1882, "step": 115620 }, { "epoch": 2.353791348600509, "grad_norm": 15.56275661854788, "learning_rate": 6.3654804356503576e-06, "loss": 0.1903, "step": 115630 }, { "epoch": 2.3539949109414757, "grad_norm": 9.833389735196754, "learning_rate": 6.364796864640225e-06, "loss": 0.0911, "step": 115640 }, { "epoch": 2.354198473282443, "grad_norm": 7.764786821314805, "learning_rate": 6.364113266066262e-06, "loss": 0.0953, "step": 115650 }, { "epoch": 2.3544020356234094, "grad_norm": 0.16672553845602106, "learning_rate": 6.363429639942274e-06, "loss": 0.1931, "step": 115660 }, { "epoch": 2.3546055979643765, "grad_norm": 5.817870765221183, "learning_rate": 6.3627459862820686e-06, "loss": 0.1905, "step": 115670 }, { "epoch": 2.3548091603053436, "grad_norm": 0.09737153375884477, "learning_rate": 6.362062305099452e-06, "loss": 0.0901, "step": 115680 }, { "epoch": 2.3550127226463102, "grad_norm": 0.21265684460591194, "learning_rate": 6.361378596408234e-06, "loss": 0.1935, "step": 115690 }, { "epoch": 2.3552162849872773, "grad_norm": 9.761484218815967, "learning_rate": 6.360694860222221e-06, "loss": 0.2158, "step": 115700 }, { "epoch": 2.3554198473282444, "grad_norm": 10.637377821920515, "learning_rate": 6.360011096555224e-06, "loss": 0.1248, "step": 115710 }, { "epoch": 2.355623409669211, "grad_norm": 1.4704302833481613, "learning_rate": 6.3593273054210504e-06, "loss": 0.1612, "step": 115720 }, { "epoch": 2.355826972010178, "grad_norm": 16.55021179380974, "learning_rate": 6.358643486833511e-06, "loss": 0.1121, "step": 115730 }, { "epoch": 2.356030534351145, "grad_norm": 10.574797494290516, "learning_rate": 6.357959640806416e-06, "loss": 0.1339, "step": 115740 }, { "epoch": 2.356234096692112, "grad_norm": 1.8878050304847225, "learning_rate": 6.357275767353578e-06, "loss": 0.1551, "step": 115750 }, { "epoch": 2.356437659033079, "grad_norm": 16.540082833834244, "learning_rate": 6.356591866488807e-06, "loss": 0.1424, "step": 115760 }, { "epoch": 2.356641221374046, "grad_norm": 0.09669635329051536, "learning_rate": 6.355907938225917e-06, "loss": 0.1756, "step": 115770 }, { "epoch": 2.3568447837150126, "grad_norm": 15.435181860303627, "learning_rate": 6.355223982578718e-06, "loss": 0.1216, "step": 115780 }, { "epoch": 2.3570483460559797, "grad_norm": 0.2753837510087225, "learning_rate": 6.354539999561025e-06, "loss": 0.0499, "step": 115790 }, { "epoch": 2.3572519083969468, "grad_norm": 19.53556975460916, "learning_rate": 6.3538559891866545e-06, "loss": 0.0909, "step": 115800 }, { "epoch": 2.3574554707379134, "grad_norm": 27.426805365684196, "learning_rate": 6.353171951469418e-06, "loss": 0.1435, "step": 115810 }, { "epoch": 2.3576590330788805, "grad_norm": 25.34305301337115, "learning_rate": 6.352487886423129e-06, "loss": 0.114, "step": 115820 }, { "epoch": 2.3578625954198476, "grad_norm": 3.9452295673777606, "learning_rate": 6.351803794061607e-06, "loss": 0.0825, "step": 115830 }, { "epoch": 2.358066157760814, "grad_norm": 47.003823128829474, "learning_rate": 6.3511196743986654e-06, "loss": 0.0965, "step": 115840 }, { "epoch": 2.3582697201017813, "grad_norm": 10.497384643136478, "learning_rate": 6.350435527448122e-06, "loss": 0.1612, "step": 115850 }, { "epoch": 2.358473282442748, "grad_norm": 24.202443616711754, "learning_rate": 6.349751353223794e-06, "loss": 0.2013, "step": 115860 }, { "epoch": 2.358676844783715, "grad_norm": 0.15842241655704697, "learning_rate": 6.349067151739499e-06, "loss": 0.0612, "step": 115870 }, { "epoch": 2.358880407124682, "grad_norm": 22.537572558364996, "learning_rate": 6.348382923009056e-06, "loss": 0.112, "step": 115880 }, { "epoch": 2.3590839694656487, "grad_norm": 16.256705189977822, "learning_rate": 6.3476986670462825e-06, "loss": 0.1591, "step": 115890 }, { "epoch": 2.3592875318066158, "grad_norm": 11.260456270792224, "learning_rate": 6.347014383864997e-06, "loss": 0.1327, "step": 115900 }, { "epoch": 2.359491094147583, "grad_norm": 1.1799494305731946, "learning_rate": 6.346330073479023e-06, "loss": 0.1642, "step": 115910 }, { "epoch": 2.3596946564885495, "grad_norm": 13.290589065495691, "learning_rate": 6.345645735902179e-06, "loss": 0.1204, "step": 115920 }, { "epoch": 2.3598982188295166, "grad_norm": 13.838981836406337, "learning_rate": 6.344961371148286e-06, "loss": 0.0706, "step": 115930 }, { "epoch": 2.3601017811704836, "grad_norm": 0.08336108598791016, "learning_rate": 6.3442769792311655e-06, "loss": 0.1157, "step": 115940 }, { "epoch": 2.3603053435114503, "grad_norm": 0.21547370896290968, "learning_rate": 6.34359256016464e-06, "loss": 0.145, "step": 115950 }, { "epoch": 2.3605089058524173, "grad_norm": 1.0379477161324957, "learning_rate": 6.342908113962532e-06, "loss": 0.1512, "step": 115960 }, { "epoch": 2.360712468193384, "grad_norm": 0.48549375361481234, "learning_rate": 6.342223640638665e-06, "loss": 0.1498, "step": 115970 }, { "epoch": 2.360916030534351, "grad_norm": 0.07819603548263751, "learning_rate": 6.341539140206863e-06, "loss": 0.2041, "step": 115980 }, { "epoch": 2.361119592875318, "grad_norm": 8.19752672345549, "learning_rate": 6.34085461268095e-06, "loss": 0.2193, "step": 115990 }, { "epoch": 2.3613231552162848, "grad_norm": 24.150593376221536, "learning_rate": 6.340170058074753e-06, "loss": 0.2382, "step": 116000 }, { "epoch": 2.361526717557252, "grad_norm": 10.3160222677219, "learning_rate": 6.339485476402093e-06, "loss": 0.1067, "step": 116010 }, { "epoch": 2.361730279898219, "grad_norm": 13.831192433829552, "learning_rate": 6.338800867676799e-06, "loss": 0.1652, "step": 116020 }, { "epoch": 2.3619338422391856, "grad_norm": 8.25425588705775, "learning_rate": 6.338116231912698e-06, "loss": 0.102, "step": 116030 }, { "epoch": 2.3621374045801526, "grad_norm": 9.183501466595402, "learning_rate": 6.337431569123615e-06, "loss": 0.056, "step": 116040 }, { "epoch": 2.3623409669211197, "grad_norm": 12.600425110668613, "learning_rate": 6.3367468793233795e-06, "loss": 0.0892, "step": 116050 }, { "epoch": 2.3625445292620864, "grad_norm": 0.8036026905175319, "learning_rate": 6.33606216252582e-06, "loss": 0.1405, "step": 116060 }, { "epoch": 2.3627480916030534, "grad_norm": 17.463019382610923, "learning_rate": 6.3353774187447634e-06, "loss": 0.0518, "step": 116070 }, { "epoch": 2.3629516539440205, "grad_norm": 5.139660533693725, "learning_rate": 6.3346926479940395e-06, "loss": 0.099, "step": 116080 }, { "epoch": 2.363155216284987, "grad_norm": 0.09931710557327657, "learning_rate": 6.334007850287481e-06, "loss": 0.0671, "step": 116090 }, { "epoch": 2.3633587786259542, "grad_norm": 0.17428761216894384, "learning_rate": 6.333323025638912e-06, "loss": 0.1762, "step": 116100 }, { "epoch": 2.3635623409669213, "grad_norm": 18.80529209447588, "learning_rate": 6.332638174062168e-06, "loss": 0.1224, "step": 116110 }, { "epoch": 2.363765903307888, "grad_norm": 0.12286067181999777, "learning_rate": 6.331953295571082e-06, "loss": 0.0422, "step": 116120 }, { "epoch": 2.363969465648855, "grad_norm": 35.70029042091418, "learning_rate": 6.331268390179481e-06, "loss": 0.1364, "step": 116130 }, { "epoch": 2.364173027989822, "grad_norm": 5.2511157481404265, "learning_rate": 6.330583457901203e-06, "loss": 0.1463, "step": 116140 }, { "epoch": 2.3643765903307887, "grad_norm": 5.96669416964223, "learning_rate": 6.329898498750075e-06, "loss": 0.1572, "step": 116150 }, { "epoch": 2.364580152671756, "grad_norm": 41.562743003447146, "learning_rate": 6.329213512739936e-06, "loss": 0.118, "step": 116160 }, { "epoch": 2.364783715012723, "grad_norm": 47.08660132507849, "learning_rate": 6.328528499884619e-06, "loss": 0.1765, "step": 116170 }, { "epoch": 2.3649872773536895, "grad_norm": 0.1707245628863892, "learning_rate": 6.327843460197957e-06, "loss": 0.0294, "step": 116180 }, { "epoch": 2.3651908396946566, "grad_norm": 8.723130492215525, "learning_rate": 6.327158393693784e-06, "loss": 0.1804, "step": 116190 }, { "epoch": 2.3653944020356232, "grad_norm": 0.49004484159422235, "learning_rate": 6.32647330038594e-06, "loss": 0.2048, "step": 116200 }, { "epoch": 2.3655979643765903, "grad_norm": 1.4221551695334196, "learning_rate": 6.325788180288259e-06, "loss": 0.1061, "step": 116210 }, { "epoch": 2.3658015267175574, "grad_norm": 0.49498965061544054, "learning_rate": 6.325103033414578e-06, "loss": 0.1255, "step": 116220 }, { "epoch": 2.366005089058524, "grad_norm": 4.531759793909793, "learning_rate": 6.324417859778735e-06, "loss": 0.0897, "step": 116230 }, { "epoch": 2.366208651399491, "grad_norm": 17.690512474173072, "learning_rate": 6.323732659394566e-06, "loss": 0.2561, "step": 116240 }, { "epoch": 2.366412213740458, "grad_norm": 11.367119175730895, "learning_rate": 6.32304743227591e-06, "loss": 0.234, "step": 116250 }, { "epoch": 2.366615776081425, "grad_norm": 20.29837446442911, "learning_rate": 6.32236217843661e-06, "loss": 0.1038, "step": 116260 }, { "epoch": 2.366819338422392, "grad_norm": 16.303988832819304, "learning_rate": 6.3216768978905005e-06, "loss": 0.1819, "step": 116270 }, { "epoch": 2.367022900763359, "grad_norm": 15.986134089343642, "learning_rate": 6.320991590651425e-06, "loss": 0.1828, "step": 116280 }, { "epoch": 2.3672264631043256, "grad_norm": 13.912073854554917, "learning_rate": 6.320306256733224e-06, "loss": 0.1668, "step": 116290 }, { "epoch": 2.3674300254452927, "grad_norm": 0.16722419010591388, "learning_rate": 6.319620896149736e-06, "loss": 0.0964, "step": 116300 }, { "epoch": 2.3676335877862593, "grad_norm": 6.13128528172966, "learning_rate": 6.318935508914803e-06, "loss": 0.0519, "step": 116310 }, { "epoch": 2.3678371501272264, "grad_norm": 47.796774206616305, "learning_rate": 6.318250095042271e-06, "loss": 0.0951, "step": 116320 }, { "epoch": 2.3680407124681935, "grad_norm": 8.705369312043487, "learning_rate": 6.31756465454598e-06, "loss": 0.0695, "step": 116330 }, { "epoch": 2.36824427480916, "grad_norm": 0.5448408037198218, "learning_rate": 6.316879187439773e-06, "loss": 0.0834, "step": 116340 }, { "epoch": 2.368447837150127, "grad_norm": 0.07094547622342777, "learning_rate": 6.316193693737496e-06, "loss": 0.1316, "step": 116350 }, { "epoch": 2.3686513994910943, "grad_norm": 14.039970334906192, "learning_rate": 6.31550817345299e-06, "loss": 0.105, "step": 116360 }, { "epoch": 2.368854961832061, "grad_norm": 1.8341782984887876, "learning_rate": 6.314822626600103e-06, "loss": 0.0676, "step": 116370 }, { "epoch": 2.369058524173028, "grad_norm": 10.412670380877337, "learning_rate": 6.314137053192681e-06, "loss": 0.1515, "step": 116380 }, { "epoch": 2.369262086513995, "grad_norm": 0.062416666257383666, "learning_rate": 6.313451453244568e-06, "loss": 0.1006, "step": 116390 }, { "epoch": 2.3694656488549617, "grad_norm": 14.587212346400314, "learning_rate": 6.31276582676961e-06, "loss": 0.0908, "step": 116400 }, { "epoch": 2.3696692111959288, "grad_norm": 24.2789350983436, "learning_rate": 6.3120801737816565e-06, "loss": 0.1632, "step": 116410 }, { "epoch": 2.369872773536896, "grad_norm": 5.019709709136883, "learning_rate": 6.311394494294552e-06, "loss": 0.0877, "step": 116420 }, { "epoch": 2.3700763358778625, "grad_norm": 17.892770991734153, "learning_rate": 6.310708788322148e-06, "loss": 0.1244, "step": 116430 }, { "epoch": 2.3702798982188296, "grad_norm": 0.4059487054650139, "learning_rate": 6.310023055878292e-06, "loss": 0.0628, "step": 116440 }, { "epoch": 2.3704834605597966, "grad_norm": 2.476206411717059, "learning_rate": 6.309337296976833e-06, "loss": 0.1204, "step": 116450 }, { "epoch": 2.3706870229007633, "grad_norm": 6.903708305377568, "learning_rate": 6.308651511631621e-06, "loss": 0.1303, "step": 116460 }, { "epoch": 2.3708905852417304, "grad_norm": 0.5918145065592365, "learning_rate": 6.307965699856507e-06, "loss": 0.1253, "step": 116470 }, { "epoch": 2.3710941475826974, "grad_norm": 0.26432475227649116, "learning_rate": 6.307279861665339e-06, "loss": 0.1543, "step": 116480 }, { "epoch": 2.371297709923664, "grad_norm": 7.1828217350913555, "learning_rate": 6.3065939970719724e-06, "loss": 0.1803, "step": 116490 }, { "epoch": 2.371501272264631, "grad_norm": 0.17211118540587997, "learning_rate": 6.305908106090255e-06, "loss": 0.1825, "step": 116500 }, { "epoch": 2.371704834605598, "grad_norm": 1.3663103333058269, "learning_rate": 6.305222188734044e-06, "loss": 0.1589, "step": 116510 }, { "epoch": 2.371908396946565, "grad_norm": 0.16508070337325256, "learning_rate": 6.304536245017188e-06, "loss": 0.0461, "step": 116520 }, { "epoch": 2.372111959287532, "grad_norm": 4.276177111050773, "learning_rate": 6.303850274953542e-06, "loss": 0.0327, "step": 116530 }, { "epoch": 2.3723155216284986, "grad_norm": 29.360670793644342, "learning_rate": 6.303164278556962e-06, "loss": 0.2446, "step": 116540 }, { "epoch": 2.3725190839694656, "grad_norm": 39.09305644923871, "learning_rate": 6.3024782558413e-06, "loss": 0.0954, "step": 116550 }, { "epoch": 2.3727226463104327, "grad_norm": 3.4238445842209866, "learning_rate": 6.301792206820411e-06, "loss": 0.1662, "step": 116560 }, { "epoch": 2.3729262086513994, "grad_norm": 12.914413412779716, "learning_rate": 6.301106131508152e-06, "loss": 0.1983, "step": 116570 }, { "epoch": 2.3731297709923664, "grad_norm": 0.04897549741217172, "learning_rate": 6.300420029918381e-06, "loss": 0.1044, "step": 116580 }, { "epoch": 2.3733333333333335, "grad_norm": 37.251646069898506, "learning_rate": 6.299733902064951e-06, "loss": 0.1717, "step": 116590 }, { "epoch": 2.3735368956743, "grad_norm": 0.921077491205554, "learning_rate": 6.299047747961719e-06, "loss": 0.1596, "step": 116600 }, { "epoch": 2.3737404580152672, "grad_norm": 0.11730109080483399, "learning_rate": 6.298361567622549e-06, "loss": 0.1218, "step": 116610 }, { "epoch": 2.373944020356234, "grad_norm": 0.20607722484811264, "learning_rate": 6.297675361061291e-06, "loss": 0.1387, "step": 116620 }, { "epoch": 2.374147582697201, "grad_norm": 1.7107044209260704, "learning_rate": 6.296989128291809e-06, "loss": 0.1168, "step": 116630 }, { "epoch": 2.374351145038168, "grad_norm": 0.34346919003576887, "learning_rate": 6.296302869327961e-06, "loss": 0.1134, "step": 116640 }, { "epoch": 2.3745547073791347, "grad_norm": 23.33800521637423, "learning_rate": 6.295616584183607e-06, "loss": 0.0795, "step": 116650 }, { "epoch": 2.3747582697201017, "grad_norm": 29.294563773185104, "learning_rate": 6.2949302728726085e-06, "loss": 0.1523, "step": 116660 }, { "epoch": 2.374961832061069, "grad_norm": 12.167339613724494, "learning_rate": 6.2942439354088235e-06, "loss": 0.1391, "step": 116670 }, { "epoch": 2.3751653944020354, "grad_norm": 22.66063940202158, "learning_rate": 6.293557571806115e-06, "loss": 0.1428, "step": 116680 }, { "epoch": 2.3753689567430025, "grad_norm": 8.471421433232909, "learning_rate": 6.292871182078347e-06, "loss": 0.2202, "step": 116690 }, { "epoch": 2.3755725190839696, "grad_norm": 35.05037648523768, "learning_rate": 6.29218476623938e-06, "loss": 0.188, "step": 116700 }, { "epoch": 2.3757760814249362, "grad_norm": 21.889261290262993, "learning_rate": 6.291498324303076e-06, "loss": 0.1023, "step": 116710 }, { "epoch": 2.3759796437659033, "grad_norm": 0.2957906705256011, "learning_rate": 6.290811856283301e-06, "loss": 0.0838, "step": 116720 }, { "epoch": 2.3761832061068704, "grad_norm": 27.36031613785544, "learning_rate": 6.290125362193918e-06, "loss": 0.1172, "step": 116730 }, { "epoch": 2.376386768447837, "grad_norm": 9.120881679299675, "learning_rate": 6.28943884204879e-06, "loss": 0.0933, "step": 116740 }, { "epoch": 2.376590330788804, "grad_norm": 0.4839449547653001, "learning_rate": 6.288752295861787e-06, "loss": 0.0433, "step": 116750 }, { "epoch": 2.376793893129771, "grad_norm": 0.9928228545748, "learning_rate": 6.288065723646769e-06, "loss": 0.1893, "step": 116760 }, { "epoch": 2.376997455470738, "grad_norm": 3.785549820901757, "learning_rate": 6.287379125417604e-06, "loss": 0.1378, "step": 116770 }, { "epoch": 2.377201017811705, "grad_norm": 14.586807329143069, "learning_rate": 6.286692501188162e-06, "loss": 0.0986, "step": 116780 }, { "epoch": 2.377404580152672, "grad_norm": 26.5985090057007, "learning_rate": 6.286005850972307e-06, "loss": 0.1207, "step": 116790 }, { "epoch": 2.3776081424936386, "grad_norm": 0.035337676103504356, "learning_rate": 6.285319174783906e-06, "loss": 0.1638, "step": 116800 }, { "epoch": 2.3778117048346057, "grad_norm": 14.494893754549235, "learning_rate": 6.284632472636829e-06, "loss": 0.1449, "step": 116810 }, { "epoch": 2.3780152671755728, "grad_norm": 8.727888440390386, "learning_rate": 6.283945744544946e-06, "loss": 0.1574, "step": 116820 }, { "epoch": 2.3782188295165394, "grad_norm": 0.25205883232967535, "learning_rate": 6.283258990522124e-06, "loss": 0.1431, "step": 116830 }, { "epoch": 2.3784223918575065, "grad_norm": 44.297543747302775, "learning_rate": 6.282572210582235e-06, "loss": 0.2139, "step": 116840 }, { "epoch": 2.378625954198473, "grad_norm": 10.058328698491767, "learning_rate": 6.281885404739146e-06, "loss": 0.1225, "step": 116850 }, { "epoch": 2.37882951653944, "grad_norm": 0.1056957021044655, "learning_rate": 6.281198573006731e-06, "loss": 0.1542, "step": 116860 }, { "epoch": 2.3790330788804073, "grad_norm": 4.125072322676606, "learning_rate": 6.280511715398862e-06, "loss": 0.0444, "step": 116870 }, { "epoch": 2.379236641221374, "grad_norm": 2.966729585873416, "learning_rate": 6.27982483192941e-06, "loss": 0.0748, "step": 116880 }, { "epoch": 2.379440203562341, "grad_norm": 9.549843311110486, "learning_rate": 6.279137922612246e-06, "loss": 0.1486, "step": 116890 }, { "epoch": 2.379643765903308, "grad_norm": 9.746806023443959, "learning_rate": 6.2784509874612445e-06, "loss": 0.1006, "step": 116900 }, { "epoch": 2.3798473282442747, "grad_norm": 1.476267764813366, "learning_rate": 6.277764026490277e-06, "loss": 0.0639, "step": 116910 }, { "epoch": 2.3800508905852418, "grad_norm": 26.782221919270395, "learning_rate": 6.277077039713221e-06, "loss": 0.1999, "step": 116920 }, { "epoch": 2.3802544529262084, "grad_norm": 0.7908616833294914, "learning_rate": 6.276390027143948e-06, "loss": 0.1536, "step": 116930 }, { "epoch": 2.3804580152671755, "grad_norm": 8.61681379982935, "learning_rate": 6.2757029887963365e-06, "loss": 0.1024, "step": 116940 }, { "epoch": 2.3806615776081426, "grad_norm": 18.250643351543406, "learning_rate": 6.275015924684258e-06, "loss": 0.2071, "step": 116950 }, { "epoch": 2.380865139949109, "grad_norm": 0.09381290494140411, "learning_rate": 6.274328834821593e-06, "loss": 0.1646, "step": 116960 }, { "epoch": 2.3810687022900763, "grad_norm": 54.72644275407387, "learning_rate": 6.273641719222212e-06, "loss": 0.1359, "step": 116970 }, { "epoch": 2.3812722646310434, "grad_norm": 0.035713874167942986, "learning_rate": 6.2729545779e-06, "loss": 0.1058, "step": 116980 }, { "epoch": 2.38147582697201, "grad_norm": 0.062311169876048615, "learning_rate": 6.272267410868829e-06, "loss": 0.0868, "step": 116990 }, { "epoch": 2.381679389312977, "grad_norm": 0.21408991547046136, "learning_rate": 6.271580218142579e-06, "loss": 0.0893, "step": 117000 }, { "epoch": 2.381882951653944, "grad_norm": 0.07075608133839421, "learning_rate": 6.270892999735129e-06, "loss": 0.202, "step": 117010 }, { "epoch": 2.382086513994911, "grad_norm": 4.117621291265732, "learning_rate": 6.2702057556603566e-06, "loss": 0.1014, "step": 117020 }, { "epoch": 2.382290076335878, "grad_norm": 25.963288620088655, "learning_rate": 6.269518485932144e-06, "loss": 0.1187, "step": 117030 }, { "epoch": 2.382493638676845, "grad_norm": 0.08763172270348636, "learning_rate": 6.268831190564373e-06, "loss": 0.0841, "step": 117040 }, { "epoch": 2.3826972010178116, "grad_norm": 13.32608393397835, "learning_rate": 6.268143869570917e-06, "loss": 0.1536, "step": 117050 }, { "epoch": 2.3829007633587787, "grad_norm": 15.974707551017797, "learning_rate": 6.267456522965665e-06, "loss": 0.0571, "step": 117060 }, { "epoch": 2.3831043256997457, "grad_norm": 21.750982545636877, "learning_rate": 6.266769150762497e-06, "loss": 0.0808, "step": 117070 }, { "epoch": 2.3833078880407124, "grad_norm": 6.955197419663664, "learning_rate": 6.266081752975292e-06, "loss": 0.108, "step": 117080 }, { "epoch": 2.3835114503816794, "grad_norm": 6.779100002478008, "learning_rate": 6.265394329617938e-06, "loss": 0.0582, "step": 117090 }, { "epoch": 2.3837150127226465, "grad_norm": 21.071916250581463, "learning_rate": 6.264706880704313e-06, "loss": 0.1122, "step": 117100 }, { "epoch": 2.383918575063613, "grad_norm": 6.966516324231609, "learning_rate": 6.264019406248305e-06, "loss": 0.1695, "step": 117110 }, { "epoch": 2.3841221374045802, "grad_norm": 6.584740582336568, "learning_rate": 6.263331906263797e-06, "loss": 0.1295, "step": 117120 }, { "epoch": 2.3843256997455473, "grad_norm": 8.157710454867777, "learning_rate": 6.262644380764674e-06, "loss": 0.0897, "step": 117130 }, { "epoch": 2.384529262086514, "grad_norm": 0.06891119359126391, "learning_rate": 6.26195682976482e-06, "loss": 0.1192, "step": 117140 }, { "epoch": 2.384732824427481, "grad_norm": 15.010893113638959, "learning_rate": 6.261269253278124e-06, "loss": 0.1611, "step": 117150 }, { "epoch": 2.3849363867684477, "grad_norm": 9.401435418924251, "learning_rate": 6.260581651318471e-06, "loss": 0.1519, "step": 117160 }, { "epoch": 2.3851399491094147, "grad_norm": 0.1285247568148116, "learning_rate": 6.259894023899749e-06, "loss": 0.105, "step": 117170 }, { "epoch": 2.385343511450382, "grad_norm": 7.975439899764684, "learning_rate": 6.259206371035842e-06, "loss": 0.1414, "step": 117180 }, { "epoch": 2.3855470737913484, "grad_norm": 37.00461643797134, "learning_rate": 6.258518692740641e-06, "loss": 0.1011, "step": 117190 }, { "epoch": 2.3857506361323155, "grad_norm": 22.37718924025285, "learning_rate": 6.257830989028035e-06, "loss": 0.1921, "step": 117200 }, { "epoch": 2.3859541984732826, "grad_norm": 11.76468932501733, "learning_rate": 6.257143259911911e-06, "loss": 0.0841, "step": 117210 }, { "epoch": 2.3861577608142492, "grad_norm": 5.319522660321581, "learning_rate": 6.256455505406161e-06, "loss": 0.1136, "step": 117220 }, { "epoch": 2.3863613231552163, "grad_norm": 0.09641558333862718, "learning_rate": 6.2557677255246726e-06, "loss": 0.1925, "step": 117230 }, { "epoch": 2.3865648854961834, "grad_norm": 0.0720159273675712, "learning_rate": 6.255079920281339e-06, "loss": 0.1398, "step": 117240 }, { "epoch": 2.38676844783715, "grad_norm": 1.1511314408985192, "learning_rate": 6.254392089690049e-06, "loss": 0.0841, "step": 117250 }, { "epoch": 2.386972010178117, "grad_norm": 3.398323011342645, "learning_rate": 6.253704233764693e-06, "loss": 0.1369, "step": 117260 }, { "epoch": 2.3871755725190837, "grad_norm": 29.89560445844351, "learning_rate": 6.253016352519169e-06, "loss": 0.0796, "step": 117270 }, { "epoch": 2.387379134860051, "grad_norm": 0.8902693387028761, "learning_rate": 6.252328445967364e-06, "loss": 0.1233, "step": 117280 }, { "epoch": 2.387582697201018, "grad_norm": 7.346335732479551, "learning_rate": 6.251640514123173e-06, "loss": 0.1767, "step": 117290 }, { "epoch": 2.3877862595419845, "grad_norm": 11.494891718115309, "learning_rate": 6.250952557000489e-06, "loss": 0.1468, "step": 117300 }, { "epoch": 2.3879898218829516, "grad_norm": 13.45623185851092, "learning_rate": 6.250264574613208e-06, "loss": 0.1308, "step": 117310 }, { "epoch": 2.3881933842239187, "grad_norm": 13.05130511993382, "learning_rate": 6.249576566975224e-06, "loss": 0.1215, "step": 117320 }, { "epoch": 2.3883969465648853, "grad_norm": 0.14638606611679386, "learning_rate": 6.248888534100432e-06, "loss": 0.0559, "step": 117330 }, { "epoch": 2.3886005089058524, "grad_norm": 2.6019631484588523, "learning_rate": 6.248200476002726e-06, "loss": 0.1492, "step": 117340 }, { "epoch": 2.3888040712468195, "grad_norm": 20.648585582913398, "learning_rate": 6.2475123926960035e-06, "loss": 0.1334, "step": 117350 }, { "epoch": 2.389007633587786, "grad_norm": 0.46083103155537664, "learning_rate": 6.246824284194164e-06, "loss": 0.1489, "step": 117360 }, { "epoch": 2.389211195928753, "grad_norm": 7.003279866840412, "learning_rate": 6.2461361505111e-06, "loss": 0.0925, "step": 117370 }, { "epoch": 2.3894147582697203, "grad_norm": 6.623789170380698, "learning_rate": 6.245447991660712e-06, "loss": 0.1223, "step": 117380 }, { "epoch": 2.389618320610687, "grad_norm": 4.937469608410511, "learning_rate": 6.244759807656897e-06, "loss": 0.0908, "step": 117390 }, { "epoch": 2.389821882951654, "grad_norm": 9.478439090133955, "learning_rate": 6.244071598513555e-06, "loss": 0.1472, "step": 117400 }, { "epoch": 2.390025445292621, "grad_norm": 11.972520547465395, "learning_rate": 6.2433833642445854e-06, "loss": 0.1198, "step": 117410 }, { "epoch": 2.3902290076335877, "grad_norm": 0.1078559985561388, "learning_rate": 6.242695104863886e-06, "loss": 0.1015, "step": 117420 }, { "epoch": 2.3904325699745548, "grad_norm": 3.947571661375435, "learning_rate": 6.242006820385359e-06, "loss": 0.0953, "step": 117430 }, { "epoch": 2.390636132315522, "grad_norm": 24.68062973644417, "learning_rate": 6.241318510822905e-06, "loss": 0.1625, "step": 117440 }, { "epoch": 2.3908396946564885, "grad_norm": 3.6389651798230207, "learning_rate": 6.240630176190425e-06, "loss": 0.08, "step": 117450 }, { "epoch": 2.3910432569974556, "grad_norm": 15.793328191545017, "learning_rate": 6.23994181650182e-06, "loss": 0.085, "step": 117460 }, { "epoch": 2.391246819338422, "grad_norm": 0.06847323010414547, "learning_rate": 6.239253431770993e-06, "loss": 0.13, "step": 117470 }, { "epoch": 2.3914503816793893, "grad_norm": 14.152029855391925, "learning_rate": 6.238565022011847e-06, "loss": 0.127, "step": 117480 }, { "epoch": 2.3916539440203564, "grad_norm": 3.2351268767088555, "learning_rate": 6.2378765872382855e-06, "loss": 0.1645, "step": 117490 }, { "epoch": 2.391857506361323, "grad_norm": 7.476641000377561, "learning_rate": 6.237188127464211e-06, "loss": 0.1106, "step": 117500 }, { "epoch": 2.39206106870229, "grad_norm": 0.04701068878142868, "learning_rate": 6.23649964270353e-06, "loss": 0.0732, "step": 117510 }, { "epoch": 2.392264631043257, "grad_norm": 17.2133769512338, "learning_rate": 6.235811132970144e-06, "loss": 0.1638, "step": 117520 }, { "epoch": 2.392468193384224, "grad_norm": 7.37533745741105, "learning_rate": 6.235122598277963e-06, "loss": 0.2172, "step": 117530 }, { "epoch": 2.392671755725191, "grad_norm": 0.047050000439441075, "learning_rate": 6.23443403864089e-06, "loss": 0.1059, "step": 117540 }, { "epoch": 2.392875318066158, "grad_norm": 0.2828403858978917, "learning_rate": 6.23374545407283e-06, "loss": 0.0894, "step": 117550 }, { "epoch": 2.3930788804071246, "grad_norm": 28.110124756732276, "learning_rate": 6.233056844587693e-06, "loss": 0.163, "step": 117560 }, { "epoch": 2.3932824427480917, "grad_norm": 3.499874584802958, "learning_rate": 6.232368210199383e-06, "loss": 0.1341, "step": 117570 }, { "epoch": 2.3934860050890583, "grad_norm": 2.0164338616027706, "learning_rate": 6.231679550921811e-06, "loss": 0.0189, "step": 117580 }, { "epoch": 2.3936895674300254, "grad_norm": 18.74165505912223, "learning_rate": 6.230990866768885e-06, "loss": 0.1478, "step": 117590 }, { "epoch": 2.3938931297709924, "grad_norm": 0.33348263949642454, "learning_rate": 6.230302157754511e-06, "loss": 0.1908, "step": 117600 }, { "epoch": 2.394096692111959, "grad_norm": 7.994151231642806, "learning_rate": 6.229613423892601e-06, "loss": 0.0796, "step": 117610 }, { "epoch": 2.394300254452926, "grad_norm": 9.278735257505694, "learning_rate": 6.228924665197066e-06, "loss": 0.2149, "step": 117620 }, { "epoch": 2.3945038167938932, "grad_norm": 14.25648294747429, "learning_rate": 6.2282358816818115e-06, "loss": 0.1359, "step": 117630 }, { "epoch": 2.39470737913486, "grad_norm": 0.010112028964588108, "learning_rate": 6.227547073360752e-06, "loss": 0.1158, "step": 117640 }, { "epoch": 2.394910941475827, "grad_norm": 0.0678686062396207, "learning_rate": 6.226858240247801e-06, "loss": 0.0032, "step": 117650 }, { "epoch": 2.395114503816794, "grad_norm": 64.54144035998924, "learning_rate": 6.226169382356866e-06, "loss": 0.1692, "step": 117660 }, { "epoch": 2.3953180661577607, "grad_norm": 0.3188375792578455, "learning_rate": 6.2254804997018604e-06, "loss": 0.089, "step": 117670 }, { "epoch": 2.3955216284987277, "grad_norm": 0.033335593195875694, "learning_rate": 6.224791592296697e-06, "loss": 0.0802, "step": 117680 }, { "epoch": 2.395725190839695, "grad_norm": 6.770600990793256, "learning_rate": 6.224102660155291e-06, "loss": 0.1283, "step": 117690 }, { "epoch": 2.3959287531806615, "grad_norm": 15.608986454545068, "learning_rate": 6.223413703291555e-06, "loss": 0.1105, "step": 117700 }, { "epoch": 2.3961323155216285, "grad_norm": 0.010560487167912377, "learning_rate": 6.2227247217194045e-06, "loss": 0.0907, "step": 117710 }, { "epoch": 2.3963358778625956, "grad_norm": 0.05931141236308838, "learning_rate": 6.222035715452753e-06, "loss": 0.1168, "step": 117720 }, { "epoch": 2.3965394402035622, "grad_norm": 17.799766444624854, "learning_rate": 6.221346684505516e-06, "loss": 0.0633, "step": 117730 }, { "epoch": 2.3967430025445293, "grad_norm": 14.541394911780097, "learning_rate": 6.2206576288916085e-06, "loss": 0.0817, "step": 117740 }, { "epoch": 2.3969465648854964, "grad_norm": 11.343210494457614, "learning_rate": 6.219968548624948e-06, "loss": 0.2022, "step": 117750 }, { "epoch": 2.397150127226463, "grad_norm": 5.4976719407273995, "learning_rate": 6.219279443719454e-06, "loss": 0.2071, "step": 117760 }, { "epoch": 2.39735368956743, "grad_norm": 20.882462359765263, "learning_rate": 6.218590314189039e-06, "loss": 0.1449, "step": 117770 }, { "epoch": 2.397557251908397, "grad_norm": 8.111154995403092, "learning_rate": 6.217901160047624e-06, "loss": 0.0753, "step": 117780 }, { "epoch": 2.397760814249364, "grad_norm": 0.08671527162878631, "learning_rate": 6.217211981309126e-06, "loss": 0.0874, "step": 117790 }, { "epoch": 2.397964376590331, "grad_norm": 4.548699177282851, "learning_rate": 6.216522777987464e-06, "loss": 0.074, "step": 117800 }, { "epoch": 2.3981679389312975, "grad_norm": 36.010964506929355, "learning_rate": 6.215833550096557e-06, "loss": 0.2052, "step": 117810 }, { "epoch": 2.3983715012722646, "grad_norm": 0.10163336741939193, "learning_rate": 6.215144297650327e-06, "loss": 0.1088, "step": 117820 }, { "epoch": 2.3985750636132317, "grad_norm": 14.132387424523145, "learning_rate": 6.214455020662693e-06, "loss": 0.0754, "step": 117830 }, { "epoch": 2.3987786259541983, "grad_norm": 7.477616615382683, "learning_rate": 6.213765719147573e-06, "loss": 0.1772, "step": 117840 }, { "epoch": 2.3989821882951654, "grad_norm": 15.54082620073489, "learning_rate": 6.213076393118894e-06, "loss": 0.2081, "step": 117850 }, { "epoch": 2.3991857506361325, "grad_norm": 14.105123332015918, "learning_rate": 6.2123870425905745e-06, "loss": 0.15, "step": 117860 }, { "epoch": 2.399389312977099, "grad_norm": 29.976440802992034, "learning_rate": 6.211697667576535e-06, "loss": 0.2041, "step": 117870 }, { "epoch": 2.399592875318066, "grad_norm": 0.21491278587991833, "learning_rate": 6.211008268090702e-06, "loss": 0.0879, "step": 117880 }, { "epoch": 2.399796437659033, "grad_norm": 0.4798494469691551, "learning_rate": 6.210318844146997e-06, "loss": 0.0886, "step": 117890 }, { "epoch": 2.4, "grad_norm": 0.08736379471214299, "learning_rate": 6.2096293957593445e-06, "loss": 0.1744, "step": 117900 }, { "epoch": 2.400203562340967, "grad_norm": 25.770794660992312, "learning_rate": 6.208939922941668e-06, "loss": 0.081, "step": 117910 }, { "epoch": 2.4004071246819336, "grad_norm": 1.6771029901653074, "learning_rate": 6.208250425707891e-06, "loss": 0.1767, "step": 117920 }, { "epoch": 2.4006106870229007, "grad_norm": 0.5557487692923392, "learning_rate": 6.207560904071943e-06, "loss": 0.0451, "step": 117930 }, { "epoch": 2.400814249363868, "grad_norm": 13.881338992076852, "learning_rate": 6.206871358047747e-06, "loss": 0.0672, "step": 117940 }, { "epoch": 2.4010178117048344, "grad_norm": 13.481279332311429, "learning_rate": 6.206181787649228e-06, "loss": 0.1111, "step": 117950 }, { "epoch": 2.4012213740458015, "grad_norm": 1.0558966925822624, "learning_rate": 6.205492192890315e-06, "loss": 0.1394, "step": 117960 }, { "epoch": 2.4014249363867686, "grad_norm": 15.258184622081393, "learning_rate": 6.204802573784935e-06, "loss": 0.1803, "step": 117970 }, { "epoch": 2.401628498727735, "grad_norm": 1.8621726103425869, "learning_rate": 6.204112930347014e-06, "loss": 0.1002, "step": 117980 }, { "epoch": 2.4018320610687023, "grad_norm": 22.829656800864615, "learning_rate": 6.203423262590482e-06, "loss": 0.0812, "step": 117990 }, { "epoch": 2.4020356234096694, "grad_norm": 0.6450552931219928, "learning_rate": 6.202733570529268e-06, "loss": 0.0716, "step": 118000 }, { "epoch": 2.402239185750636, "grad_norm": 19.307667678932134, "learning_rate": 6.2020438541773e-06, "loss": 0.1573, "step": 118010 }, { "epoch": 2.402442748091603, "grad_norm": 4.881337876028206, "learning_rate": 6.2013541135485075e-06, "loss": 0.0756, "step": 118020 }, { "epoch": 2.40264631043257, "grad_norm": 20.32605487886667, "learning_rate": 6.200664348656822e-06, "loss": 0.1137, "step": 118030 }, { "epoch": 2.402849872773537, "grad_norm": 8.37063613846427, "learning_rate": 6.199974559516173e-06, "loss": 0.1899, "step": 118040 }, { "epoch": 2.403053435114504, "grad_norm": 10.121684480408238, "learning_rate": 6.199284746140493e-06, "loss": 0.2753, "step": 118050 }, { "epoch": 2.403256997455471, "grad_norm": 34.80789452998231, "learning_rate": 6.198594908543712e-06, "loss": 0.2452, "step": 118060 }, { "epoch": 2.4034605597964376, "grad_norm": 41.284437146219084, "learning_rate": 6.197905046739764e-06, "loss": 0.1725, "step": 118070 }, { "epoch": 2.4036641221374047, "grad_norm": 6.480109781546545, "learning_rate": 6.19721516074258e-06, "loss": 0.1174, "step": 118080 }, { "epoch": 2.4038676844783717, "grad_norm": 4.705923656665812, "learning_rate": 6.196525250566095e-06, "loss": 0.1064, "step": 118090 }, { "epoch": 2.4040712468193384, "grad_norm": 5.238603420205559, "learning_rate": 6.195835316224239e-06, "loss": 0.1211, "step": 118100 }, { "epoch": 2.4042748091603054, "grad_norm": 47.98989210553089, "learning_rate": 6.195145357730952e-06, "loss": 0.1042, "step": 118110 }, { "epoch": 2.404478371501272, "grad_norm": 0.01476339612712056, "learning_rate": 6.194455375100161e-06, "loss": 0.0353, "step": 118120 }, { "epoch": 2.404681933842239, "grad_norm": 5.847606070927242, "learning_rate": 6.193765368345808e-06, "loss": 0.1011, "step": 118130 }, { "epoch": 2.4048854961832062, "grad_norm": 0.02210042145860566, "learning_rate": 6.193075337481825e-06, "loss": 0.1822, "step": 118140 }, { "epoch": 2.405089058524173, "grad_norm": 2.746570220844818, "learning_rate": 6.19238528252215e-06, "loss": 0.1692, "step": 118150 }, { "epoch": 2.40529262086514, "grad_norm": 9.768350596399124, "learning_rate": 6.1916952034807175e-06, "loss": 0.134, "step": 118160 }, { "epoch": 2.405496183206107, "grad_norm": 1.0869931618776427, "learning_rate": 6.191005100371465e-06, "loss": 0.1184, "step": 118170 }, { "epoch": 2.4056997455470737, "grad_norm": 12.758132622465242, "learning_rate": 6.190314973208331e-06, "loss": 0.0759, "step": 118180 }, { "epoch": 2.4059033078880407, "grad_norm": 9.298399347411614, "learning_rate": 6.189624822005254e-06, "loss": 0.2447, "step": 118190 }, { "epoch": 2.406106870229008, "grad_norm": 1.210499784583622, "learning_rate": 6.18893464677617e-06, "loss": 0.1058, "step": 118200 }, { "epoch": 2.4063104325699745, "grad_norm": 30.19872042031312, "learning_rate": 6.18824444753502e-06, "loss": 0.0937, "step": 118210 }, { "epoch": 2.4065139949109415, "grad_norm": 7.318780979825557, "learning_rate": 6.187554224295743e-06, "loss": 0.1623, "step": 118220 }, { "epoch": 2.406717557251908, "grad_norm": 4.75834321673728, "learning_rate": 6.186863977072279e-06, "loss": 0.1367, "step": 118230 }, { "epoch": 2.4069211195928752, "grad_norm": 0.4990714468550271, "learning_rate": 6.186173705878569e-06, "loss": 0.0757, "step": 118240 }, { "epoch": 2.4071246819338423, "grad_norm": 0.1697670609932283, "learning_rate": 6.185483410728553e-06, "loss": 0.1322, "step": 118250 }, { "epoch": 2.407328244274809, "grad_norm": 21.168889317457584, "learning_rate": 6.184793091636171e-06, "loss": 0.1505, "step": 118260 }, { "epoch": 2.407531806615776, "grad_norm": 4.530398302479666, "learning_rate": 6.184102748615367e-06, "loss": 0.077, "step": 118270 }, { "epoch": 2.407735368956743, "grad_norm": 15.976581568458858, "learning_rate": 6.183412381680085e-06, "loss": 0.1123, "step": 118280 }, { "epoch": 2.4079389312977098, "grad_norm": 0.7572209443700156, "learning_rate": 6.182721990844263e-06, "loss": 0.105, "step": 118290 }, { "epoch": 2.408142493638677, "grad_norm": 0.39369418468166334, "learning_rate": 6.182031576121848e-06, "loss": 0.1043, "step": 118300 }, { "epoch": 2.408346055979644, "grad_norm": 7.695162712964158, "learning_rate": 6.181341137526784e-06, "loss": 0.1991, "step": 118310 }, { "epoch": 2.4085496183206105, "grad_norm": 0.21909823086955563, "learning_rate": 6.180650675073012e-06, "loss": 0.058, "step": 118320 }, { "epoch": 2.4087531806615776, "grad_norm": 19.26642950534806, "learning_rate": 6.17996018877448e-06, "loss": 0.1524, "step": 118330 }, { "epoch": 2.4089567430025447, "grad_norm": 15.943448232356227, "learning_rate": 6.179269678645133e-06, "loss": 0.1245, "step": 118340 }, { "epoch": 2.4091603053435113, "grad_norm": 8.830585909400202, "learning_rate": 6.178579144698915e-06, "loss": 0.186, "step": 118350 }, { "epoch": 2.4093638676844784, "grad_norm": 0.4138361264703838, "learning_rate": 6.177888586949774e-06, "loss": 0.1471, "step": 118360 }, { "epoch": 2.4095674300254455, "grad_norm": 24.837770898716364, "learning_rate": 6.177198005411656e-06, "loss": 0.1427, "step": 118370 }, { "epoch": 2.409770992366412, "grad_norm": 5.683199259755108, "learning_rate": 6.1765074000985055e-06, "loss": 0.1123, "step": 118380 }, { "epoch": 2.409974554707379, "grad_norm": 2.339601565501972, "learning_rate": 6.175816771024275e-06, "loss": 0.1097, "step": 118390 }, { "epoch": 2.4101781170483463, "grad_norm": 5.556947087060909, "learning_rate": 6.175126118202912e-06, "loss": 0.175, "step": 118400 }, { "epoch": 2.410381679389313, "grad_norm": 29.708756218402694, "learning_rate": 6.1744354416483595e-06, "loss": 0.1453, "step": 118410 }, { "epoch": 2.41058524173028, "grad_norm": 14.6743362731203, "learning_rate": 6.173744741374574e-06, "loss": 0.1763, "step": 118420 }, { "epoch": 2.4107888040712466, "grad_norm": 41.02589641638212, "learning_rate": 6.1730540173955e-06, "loss": 0.0919, "step": 118430 }, { "epoch": 2.4109923664122137, "grad_norm": 0.08668588202057027, "learning_rate": 6.17236326972509e-06, "loss": 0.1723, "step": 118440 }, { "epoch": 2.411195928753181, "grad_norm": 9.155690469521783, "learning_rate": 6.1716724983772936e-06, "loss": 0.0575, "step": 118450 }, { "epoch": 2.4113994910941474, "grad_norm": 17.495460229362944, "learning_rate": 6.1709817033660615e-06, "loss": 0.2163, "step": 118460 }, { "epoch": 2.4116030534351145, "grad_norm": 0.06396369205937077, "learning_rate": 6.170290884705346e-06, "loss": 0.1227, "step": 118470 }, { "epoch": 2.4118066157760816, "grad_norm": 16.26894956537939, "learning_rate": 6.169600042409098e-06, "loss": 0.1627, "step": 118480 }, { "epoch": 2.412010178117048, "grad_norm": 8.210134406791926, "learning_rate": 6.168909176491271e-06, "loss": 0.1521, "step": 118490 }, { "epoch": 2.4122137404580153, "grad_norm": 8.072843511502874, "learning_rate": 6.1682182869658184e-06, "loss": 0.2172, "step": 118500 }, { "epoch": 2.4124173027989824, "grad_norm": 0.7961177926450825, "learning_rate": 6.167527373846693e-06, "loss": 0.0495, "step": 118510 }, { "epoch": 2.412620865139949, "grad_norm": 17.711965062265556, "learning_rate": 6.166836437147849e-06, "loss": 0.1227, "step": 118520 }, { "epoch": 2.412824427480916, "grad_norm": 0.2710979900614042, "learning_rate": 6.166145476883239e-06, "loss": 0.066, "step": 118530 }, { "epoch": 2.4130279898218827, "grad_norm": 16.496627530402332, "learning_rate": 6.165454493066818e-06, "loss": 0.1771, "step": 118540 }, { "epoch": 2.41323155216285, "grad_norm": 28.30709557110962, "learning_rate": 6.164763485712544e-06, "loss": 0.0744, "step": 118550 }, { "epoch": 2.413435114503817, "grad_norm": 0.04029527511561662, "learning_rate": 6.164072454834371e-06, "loss": 0.1591, "step": 118560 }, { "epoch": 2.4136386768447835, "grad_norm": 1.1928265881779052, "learning_rate": 6.163381400446256e-06, "loss": 0.1899, "step": 118570 }, { "epoch": 2.4138422391857506, "grad_norm": 0.03279044577035408, "learning_rate": 6.1626903225621535e-06, "loss": 0.1741, "step": 118580 }, { "epoch": 2.4140458015267177, "grad_norm": 3.9172300869524923, "learning_rate": 6.161999221196025e-06, "loss": 0.0734, "step": 118590 }, { "epoch": 2.4142493638676843, "grad_norm": 0.7733227170354404, "learning_rate": 6.161308096361824e-06, "loss": 0.1346, "step": 118600 }, { "epoch": 2.4144529262086514, "grad_norm": 0.860392043336961, "learning_rate": 6.16061694807351e-06, "loss": 0.1129, "step": 118610 }, { "epoch": 2.4146564885496185, "grad_norm": 0.31640205236100083, "learning_rate": 6.159925776345042e-06, "loss": 0.0537, "step": 118620 }, { "epoch": 2.414860050890585, "grad_norm": 8.185159323106031, "learning_rate": 6.159234581190379e-06, "loss": 0.1189, "step": 118630 }, { "epoch": 2.415063613231552, "grad_norm": 0.14671190537624887, "learning_rate": 6.158543362623481e-06, "loss": 0.1196, "step": 118640 }, { "epoch": 2.4152671755725192, "grad_norm": 9.655186634987654, "learning_rate": 6.157852120658307e-06, "loss": 0.1334, "step": 118650 }, { "epoch": 2.415470737913486, "grad_norm": 0.012448020328693502, "learning_rate": 6.157160855308819e-06, "loss": 0.1029, "step": 118660 }, { "epoch": 2.415674300254453, "grad_norm": 0.03581520122524354, "learning_rate": 6.156469566588976e-06, "loss": 0.1529, "step": 118670 }, { "epoch": 2.41587786259542, "grad_norm": 9.661897840952427, "learning_rate": 6.155778254512742e-06, "loss": 0.0147, "step": 118680 }, { "epoch": 2.4160814249363867, "grad_norm": 9.03780309940047, "learning_rate": 6.155086919094078e-06, "loss": 0.108, "step": 118690 }, { "epoch": 2.4162849872773537, "grad_norm": 0.44584183699239577, "learning_rate": 6.154395560346943e-06, "loss": 0.1139, "step": 118700 }, { "epoch": 2.416488549618321, "grad_norm": 12.880227383201046, "learning_rate": 6.153704178285306e-06, "loss": 0.2139, "step": 118710 }, { "epoch": 2.4166921119592875, "grad_norm": 0.343262627813096, "learning_rate": 6.153012772923125e-06, "loss": 0.1222, "step": 118720 }, { "epoch": 2.4168956743002545, "grad_norm": 35.66562041462681, "learning_rate": 6.1523213442743665e-06, "loss": 0.1198, "step": 118730 }, { "epoch": 2.4170992366412216, "grad_norm": 16.26985402262577, "learning_rate": 6.1516298923529935e-06, "loss": 0.1105, "step": 118740 }, { "epoch": 2.4173027989821882, "grad_norm": 25.989382432921058, "learning_rate": 6.150938417172972e-06, "loss": 0.095, "step": 118750 }, { "epoch": 2.4175063613231553, "grad_norm": 4.510547515587578, "learning_rate": 6.150246918748268e-06, "loss": 0.2427, "step": 118760 }, { "epoch": 2.417709923664122, "grad_norm": 0.16275715476346508, "learning_rate": 6.149555397092845e-06, "loss": 0.138, "step": 118770 }, { "epoch": 2.417913486005089, "grad_norm": 37.928573212514344, "learning_rate": 6.14886385222067e-06, "loss": 0.1117, "step": 118780 }, { "epoch": 2.418117048346056, "grad_norm": 7.867471029204572, "learning_rate": 6.1481722841457105e-06, "loss": 0.1421, "step": 118790 }, { "epoch": 2.4183206106870228, "grad_norm": 7.972659158617781, "learning_rate": 6.147480692881933e-06, "loss": 0.184, "step": 118800 }, { "epoch": 2.41852417302799, "grad_norm": 27.877509344765134, "learning_rate": 6.146789078443305e-06, "loss": 0.2408, "step": 118810 }, { "epoch": 2.418727735368957, "grad_norm": 0.10341525616562591, "learning_rate": 6.1460974408437926e-06, "loss": 0.1065, "step": 118820 }, { "epoch": 2.4189312977099235, "grad_norm": 0.21841788843006038, "learning_rate": 6.145405780097368e-06, "loss": 0.0999, "step": 118830 }, { "epoch": 2.4191348600508906, "grad_norm": 6.692132678546798, "learning_rate": 6.144714096217998e-06, "loss": 0.0909, "step": 118840 }, { "epoch": 2.4193384223918573, "grad_norm": 7.933694464005019, "learning_rate": 6.144022389219652e-06, "loss": 0.0566, "step": 118850 }, { "epoch": 2.4195419847328243, "grad_norm": 0.12702665330026056, "learning_rate": 6.143330659116302e-06, "loss": 0.1405, "step": 118860 }, { "epoch": 2.4197455470737914, "grad_norm": 11.331833362297706, "learning_rate": 6.1426389059219135e-06, "loss": 0.2376, "step": 118870 }, { "epoch": 2.419949109414758, "grad_norm": 0.11389832192665174, "learning_rate": 6.141947129650463e-06, "loss": 0.1217, "step": 118880 }, { "epoch": 2.420152671755725, "grad_norm": 7.656626990164072, "learning_rate": 6.141255330315919e-06, "loss": 0.1216, "step": 118890 }, { "epoch": 2.420356234096692, "grad_norm": 21.364949656291696, "learning_rate": 6.140563507932255e-06, "loss": 0.1159, "step": 118900 }, { "epoch": 2.420559796437659, "grad_norm": 16.095170327308196, "learning_rate": 6.13987166251344e-06, "loss": 0.1852, "step": 118910 }, { "epoch": 2.420763358778626, "grad_norm": 50.29279648224078, "learning_rate": 6.1391797940734485e-06, "loss": 0.1397, "step": 118920 }, { "epoch": 2.420966921119593, "grad_norm": 0.452782284868873, "learning_rate": 6.138487902626255e-06, "loss": 0.0616, "step": 118930 }, { "epoch": 2.4211704834605596, "grad_norm": 5.881801598868178, "learning_rate": 6.137795988185831e-06, "loss": 0.1589, "step": 118940 }, { "epoch": 2.4213740458015267, "grad_norm": 14.985361498557815, "learning_rate": 6.137104050766152e-06, "loss": 0.099, "step": 118950 }, { "epoch": 2.421577608142494, "grad_norm": 9.728335869783885, "learning_rate": 6.136412090381192e-06, "loss": 0.16, "step": 118960 }, { "epoch": 2.4217811704834604, "grad_norm": 4.2088927427496206, "learning_rate": 6.135720107044927e-06, "loss": 0.0471, "step": 118970 }, { "epoch": 2.4219847328244275, "grad_norm": 6.54426242765128, "learning_rate": 6.135028100771331e-06, "loss": 0.228, "step": 118980 }, { "epoch": 2.4221882951653946, "grad_norm": 34.500058454781836, "learning_rate": 6.13433607157438e-06, "loss": 0.1922, "step": 118990 }, { "epoch": 2.422391857506361, "grad_norm": 3.402950971623663, "learning_rate": 6.133644019468052e-06, "loss": 0.159, "step": 119000 }, { "epoch": 2.4225954198473283, "grad_norm": 0.15484935667512364, "learning_rate": 6.132951944466322e-06, "loss": 0.1231, "step": 119010 }, { "epoch": 2.4227989821882954, "grad_norm": 0.07504870880099157, "learning_rate": 6.132259846583169e-06, "loss": 0.0869, "step": 119020 }, { "epoch": 2.423002544529262, "grad_norm": 6.978771310810149, "learning_rate": 6.13156772583257e-06, "loss": 0.1518, "step": 119030 }, { "epoch": 2.423206106870229, "grad_norm": 0.06032659613488736, "learning_rate": 6.130875582228503e-06, "loss": 0.0915, "step": 119040 }, { "epoch": 2.423409669211196, "grad_norm": 15.55592636184415, "learning_rate": 6.130183415784947e-06, "loss": 0.2178, "step": 119050 }, { "epoch": 2.423613231552163, "grad_norm": 9.829713243211238, "learning_rate": 6.1294912265158825e-06, "loss": 0.0837, "step": 119060 }, { "epoch": 2.42381679389313, "grad_norm": 9.56153935040701, "learning_rate": 6.128799014435285e-06, "loss": 0.0635, "step": 119070 }, { "epoch": 2.4240203562340965, "grad_norm": 26.953462279709676, "learning_rate": 6.128106779557139e-06, "loss": 0.0899, "step": 119080 }, { "epoch": 2.4242239185750636, "grad_norm": 13.647669254849033, "learning_rate": 6.127414521895425e-06, "loss": 0.2999, "step": 119090 }, { "epoch": 2.4244274809160307, "grad_norm": 6.353539338256001, "learning_rate": 6.126722241464121e-06, "loss": 0.099, "step": 119100 }, { "epoch": 2.4246310432569973, "grad_norm": 15.49415380165592, "learning_rate": 6.1260299382772104e-06, "loss": 0.1342, "step": 119110 }, { "epoch": 2.4248346055979644, "grad_norm": 6.611071795099505, "learning_rate": 6.125337612348675e-06, "loss": 0.1778, "step": 119120 }, { "epoch": 2.4250381679389315, "grad_norm": 15.466223822715861, "learning_rate": 6.124645263692498e-06, "loss": 0.1604, "step": 119130 }, { "epoch": 2.425241730279898, "grad_norm": 8.634595467735743, "learning_rate": 6.12395289232266e-06, "loss": 0.1072, "step": 119140 }, { "epoch": 2.425445292620865, "grad_norm": 18.946632186322088, "learning_rate": 6.123260498253146e-06, "loss": 0.1053, "step": 119150 }, { "epoch": 2.4256488549618322, "grad_norm": 7.013172413030905, "learning_rate": 6.12256808149794e-06, "loss": 0.0915, "step": 119160 }, { "epoch": 2.425852417302799, "grad_norm": 0.3714597670405954, "learning_rate": 6.1218756420710256e-06, "loss": 0.0913, "step": 119170 }, { "epoch": 2.426055979643766, "grad_norm": 17.565443431210078, "learning_rate": 6.1211831799863885e-06, "loss": 0.1401, "step": 119180 }, { "epoch": 2.4262595419847326, "grad_norm": 0.11280034805144523, "learning_rate": 6.120490695258011e-06, "loss": 0.2561, "step": 119190 }, { "epoch": 2.4264631043256997, "grad_norm": 0.4432492440829199, "learning_rate": 6.1197981878998824e-06, "loss": 0.11, "step": 119200 }, { "epoch": 2.4266666666666667, "grad_norm": 0.48345524446025645, "learning_rate": 6.119105657925987e-06, "loss": 0.0744, "step": 119210 }, { "epoch": 2.4268702290076334, "grad_norm": 26.174473116313237, "learning_rate": 6.11841310535031e-06, "loss": 0.1341, "step": 119220 }, { "epoch": 2.4270737913486005, "grad_norm": 10.483547860599039, "learning_rate": 6.1177205301868415e-06, "loss": 0.0752, "step": 119230 }, { "epoch": 2.4272773536895675, "grad_norm": 0.7955702307460119, "learning_rate": 6.117027932449566e-06, "loss": 0.1872, "step": 119240 }, { "epoch": 2.427480916030534, "grad_norm": 17.63837302447248, "learning_rate": 6.116335312152474e-06, "loss": 0.0963, "step": 119250 }, { "epoch": 2.4276844783715013, "grad_norm": 12.261712959988285, "learning_rate": 6.115642669309552e-06, "loss": 0.2302, "step": 119260 }, { "epoch": 2.4278880407124683, "grad_norm": 9.042451320951411, "learning_rate": 6.114950003934789e-06, "loss": 0.2764, "step": 119270 }, { "epoch": 2.428091603053435, "grad_norm": 19.378455693135408, "learning_rate": 6.1142573160421736e-06, "loss": 0.1726, "step": 119280 }, { "epoch": 2.428295165394402, "grad_norm": 6.754729479586266, "learning_rate": 6.113564605645699e-06, "loss": 0.2702, "step": 119290 }, { "epoch": 2.428498727735369, "grad_norm": 27.186807436486614, "learning_rate": 6.1128718727593515e-06, "loss": 0.1435, "step": 119300 }, { "epoch": 2.4287022900763358, "grad_norm": 0.3811808599734167, "learning_rate": 6.112179117397124e-06, "loss": 0.0959, "step": 119310 }, { "epoch": 2.428905852417303, "grad_norm": 0.6322184812996247, "learning_rate": 6.1114863395730055e-06, "loss": 0.0975, "step": 119320 }, { "epoch": 2.42910941475827, "grad_norm": 7.361879414166951, "learning_rate": 6.1107935393009885e-06, "loss": 0.1085, "step": 119330 }, { "epoch": 2.4293129770992365, "grad_norm": 2.8785487507545127, "learning_rate": 6.110100716595067e-06, "loss": 0.114, "step": 119340 }, { "epoch": 2.4295165394402036, "grad_norm": 17.739105008722074, "learning_rate": 6.109407871469231e-06, "loss": 0.0935, "step": 119350 }, { "epoch": 2.4297201017811707, "grad_norm": 24.81513222174223, "learning_rate": 6.108715003937473e-06, "loss": 0.1666, "step": 119360 }, { "epoch": 2.4299236641221373, "grad_norm": 0.28443704468414654, "learning_rate": 6.108022114013788e-06, "loss": 0.2101, "step": 119370 }, { "epoch": 2.4301272264631044, "grad_norm": 7.020844342479764, "learning_rate": 6.107329201712171e-06, "loss": 0.1185, "step": 119380 }, { "epoch": 2.4303307888040715, "grad_norm": 8.070788754720606, "learning_rate": 6.106636267046613e-06, "loss": 0.1362, "step": 119390 }, { "epoch": 2.430534351145038, "grad_norm": 0.2920664160192635, "learning_rate": 6.105943310031111e-06, "loss": 0.1153, "step": 119400 }, { "epoch": 2.430737913486005, "grad_norm": 0.2609252447746326, "learning_rate": 6.105250330679658e-06, "loss": 0.1335, "step": 119410 }, { "epoch": 2.430941475826972, "grad_norm": 9.82172832546963, "learning_rate": 6.1045573290062524e-06, "loss": 0.0799, "step": 119420 }, { "epoch": 2.431145038167939, "grad_norm": 0.10615441052653156, "learning_rate": 6.103864305024888e-06, "loss": 0.1241, "step": 119430 }, { "epoch": 2.431348600508906, "grad_norm": 4.608135229886815, "learning_rate": 6.103171258749563e-06, "loss": 0.1806, "step": 119440 }, { "epoch": 2.4315521628498726, "grad_norm": 6.255064161442503, "learning_rate": 6.102478190194273e-06, "loss": 0.1573, "step": 119450 }, { "epoch": 2.4317557251908397, "grad_norm": 15.655817960848498, "learning_rate": 6.101785099373016e-06, "loss": 0.1228, "step": 119460 }, { "epoch": 2.431959287531807, "grad_norm": 0.20352616716400873, "learning_rate": 6.101091986299791e-06, "loss": 0.0349, "step": 119470 }, { "epoch": 2.4321628498727734, "grad_norm": 1.7935230414434196, "learning_rate": 6.100398850988592e-06, "loss": 0.0995, "step": 119480 }, { "epoch": 2.4323664122137405, "grad_norm": 20.340611314138414, "learning_rate": 6.099705693453425e-06, "loss": 0.1424, "step": 119490 }, { "epoch": 2.432569974554707, "grad_norm": 19.603579305270024, "learning_rate": 6.099012513708283e-06, "loss": 0.0855, "step": 119500 }, { "epoch": 2.432773536895674, "grad_norm": 0.011298153196027288, "learning_rate": 6.098319311767168e-06, "loss": 0.0397, "step": 119510 }, { "epoch": 2.4329770992366413, "grad_norm": 9.179603749019355, "learning_rate": 6.09762608764408e-06, "loss": 0.1287, "step": 119520 }, { "epoch": 2.433180661577608, "grad_norm": 22.489739895927677, "learning_rate": 6.096932841353018e-06, "loss": 0.0941, "step": 119530 }, { "epoch": 2.433384223918575, "grad_norm": 1.1516743853148748, "learning_rate": 6.096239572907986e-06, "loss": 0.112, "step": 119540 }, { "epoch": 2.433587786259542, "grad_norm": 20.587955884451038, "learning_rate": 6.095546282322985e-06, "loss": 0.1074, "step": 119550 }, { "epoch": 2.4337913486005087, "grad_norm": 0.48968156197035734, "learning_rate": 6.094852969612014e-06, "loss": 0.0984, "step": 119560 }, { "epoch": 2.433994910941476, "grad_norm": 0.7876317132464336, "learning_rate": 6.094159634789076e-06, "loss": 0.2183, "step": 119570 }, { "epoch": 2.434198473282443, "grad_norm": 0.58605309284763, "learning_rate": 6.093466277868177e-06, "loss": 0.1179, "step": 119580 }, { "epoch": 2.4344020356234095, "grad_norm": 2.364851719602081, "learning_rate": 6.092772898863318e-06, "loss": 0.1682, "step": 119590 }, { "epoch": 2.4346055979643766, "grad_norm": 5.946879017753858, "learning_rate": 6.092079497788502e-06, "loss": 0.1611, "step": 119600 }, { "epoch": 2.4348091603053437, "grad_norm": 25.316168067055642, "learning_rate": 6.0913860746577345e-06, "loss": 0.1837, "step": 119610 }, { "epoch": 2.4350127226463103, "grad_norm": 0.07562319226141724, "learning_rate": 6.090692629485019e-06, "loss": 0.1355, "step": 119620 }, { "epoch": 2.4352162849872774, "grad_norm": 0.2834000961475956, "learning_rate": 6.089999162284361e-06, "loss": 0.09, "step": 119630 }, { "epoch": 2.4354198473282445, "grad_norm": 0.10149170822234223, "learning_rate": 6.089305673069768e-06, "loss": 0.0968, "step": 119640 }, { "epoch": 2.435623409669211, "grad_norm": 22.286825182037855, "learning_rate": 6.088612161855241e-06, "loss": 0.1344, "step": 119650 }, { "epoch": 2.435826972010178, "grad_norm": 0.23587311748176823, "learning_rate": 6.0879186286547895e-06, "loss": 0.2046, "step": 119660 }, { "epoch": 2.4360305343511452, "grad_norm": 0.0837065305537297, "learning_rate": 6.087225073482422e-06, "loss": 0.1354, "step": 119670 }, { "epoch": 2.436234096692112, "grad_norm": 0.056697921114028695, "learning_rate": 6.086531496352144e-06, "loss": 0.0994, "step": 119680 }, { "epoch": 2.436437659033079, "grad_norm": 0.2651784774622425, "learning_rate": 6.085837897277961e-06, "loss": 0.2118, "step": 119690 }, { "epoch": 2.436641221374046, "grad_norm": 9.167898877337128, "learning_rate": 6.085144276273884e-06, "loss": 0.2406, "step": 119700 }, { "epoch": 2.4368447837150127, "grad_norm": 4.893775884386129, "learning_rate": 6.084450633353921e-06, "loss": 0.1243, "step": 119710 }, { "epoch": 2.4370483460559798, "grad_norm": 0.0439979703230997, "learning_rate": 6.08375696853208e-06, "loss": 0.1163, "step": 119720 }, { "epoch": 2.4372519083969464, "grad_norm": 0.9302261843245262, "learning_rate": 6.083063281822371e-06, "loss": 0.1799, "step": 119730 }, { "epoch": 2.4374554707379135, "grad_norm": 7.924888401468157, "learning_rate": 6.082369573238804e-06, "loss": 0.0886, "step": 119740 }, { "epoch": 2.4376590330788805, "grad_norm": 5.7628835761034045, "learning_rate": 6.081675842795392e-06, "loss": 0.1606, "step": 119750 }, { "epoch": 2.437862595419847, "grad_norm": 0.24569140421771812, "learning_rate": 6.080982090506141e-06, "loss": 0.082, "step": 119760 }, { "epoch": 2.4380661577608143, "grad_norm": 23.67426482048645, "learning_rate": 6.080288316385063e-06, "loss": 0.1179, "step": 119770 }, { "epoch": 2.4382697201017813, "grad_norm": 44.39155447963616, "learning_rate": 6.0795945204461736e-06, "loss": 0.2321, "step": 119780 }, { "epoch": 2.438473282442748, "grad_norm": 5.922458438279812, "learning_rate": 6.078900702703482e-06, "loss": 0.1187, "step": 119790 }, { "epoch": 2.438676844783715, "grad_norm": 36.32088946367074, "learning_rate": 6.078206863171e-06, "loss": 0.0986, "step": 119800 }, { "epoch": 2.438880407124682, "grad_norm": 9.052402028790723, "learning_rate": 6.077513001862742e-06, "loss": 0.1706, "step": 119810 }, { "epoch": 2.4390839694656488, "grad_norm": 5.695430124790473, "learning_rate": 6.0768191187927204e-06, "loss": 0.0647, "step": 119820 }, { "epoch": 2.439287531806616, "grad_norm": 0.569960754289016, "learning_rate": 6.07612521397495e-06, "loss": 0.1749, "step": 119830 }, { "epoch": 2.4394910941475825, "grad_norm": 2.24272707039909, "learning_rate": 6.075431287423448e-06, "loss": 0.1293, "step": 119840 }, { "epoch": 2.4396946564885496, "grad_norm": 0.2503027237653583, "learning_rate": 6.074737339152222e-06, "loss": 0.0319, "step": 119850 }, { "epoch": 2.4398982188295166, "grad_norm": 0.89872905484188, "learning_rate": 6.074043369175292e-06, "loss": 0.2076, "step": 119860 }, { "epoch": 2.4401017811704833, "grad_norm": 8.423781142486751, "learning_rate": 6.073349377506675e-06, "loss": 0.0514, "step": 119870 }, { "epoch": 2.4403053435114503, "grad_norm": 17.176121042799544, "learning_rate": 6.072655364160382e-06, "loss": 0.1173, "step": 119880 }, { "epoch": 2.4405089058524174, "grad_norm": 0.020705910069288288, "learning_rate": 6.071961329150434e-06, "loss": 0.0595, "step": 119890 }, { "epoch": 2.440712468193384, "grad_norm": 13.973085492571682, "learning_rate": 6.071267272490846e-06, "loss": 0.2045, "step": 119900 }, { "epoch": 2.440916030534351, "grad_norm": 16.935283927280942, "learning_rate": 6.070573194195636e-06, "loss": 0.1257, "step": 119910 }, { "epoch": 2.441119592875318, "grad_norm": 2.859459133090429, "learning_rate": 6.069879094278821e-06, "loss": 0.0822, "step": 119920 }, { "epoch": 2.441323155216285, "grad_norm": 18.39608616303752, "learning_rate": 6.0691849727544204e-06, "loss": 0.1369, "step": 119930 }, { "epoch": 2.441526717557252, "grad_norm": 14.661809259232468, "learning_rate": 6.06849082963645e-06, "loss": 0.0804, "step": 119940 }, { "epoch": 2.441730279898219, "grad_norm": 0.19331223437998168, "learning_rate": 6.067796664938934e-06, "loss": 0.1346, "step": 119950 }, { "epoch": 2.4419338422391856, "grad_norm": 5.190901263092584, "learning_rate": 6.067102478675887e-06, "loss": 0.1951, "step": 119960 }, { "epoch": 2.4421374045801527, "grad_norm": 10.752058017389219, "learning_rate": 6.0664082708613315e-06, "loss": 0.1304, "step": 119970 }, { "epoch": 2.44234096692112, "grad_norm": 13.227283274457058, "learning_rate": 6.065714041509288e-06, "loss": 0.0974, "step": 119980 }, { "epoch": 2.4425445292620864, "grad_norm": 21.48414315813875, "learning_rate": 6.065019790633776e-06, "loss": 0.1294, "step": 119990 }, { "epoch": 2.4427480916030535, "grad_norm": 0.40163772396704905, "learning_rate": 6.064325518248818e-06, "loss": 0.1409, "step": 120000 }, { "epoch": 2.4429516539440206, "grad_norm": 10.44491952788002, "learning_rate": 6.063631224368434e-06, "loss": 0.1472, "step": 120010 }, { "epoch": 2.443155216284987, "grad_norm": 0.1939557837038223, "learning_rate": 6.062936909006649e-06, "loss": 0.1793, "step": 120020 }, { "epoch": 2.4433587786259543, "grad_norm": 10.283783927652095, "learning_rate": 6.062242572177485e-06, "loss": 0.107, "step": 120030 }, { "epoch": 2.443562340966921, "grad_norm": 0.37968701817746336, "learning_rate": 6.061548213894963e-06, "loss": 0.0938, "step": 120040 }, { "epoch": 2.443765903307888, "grad_norm": 20.680338637822143, "learning_rate": 6.060853834173108e-06, "loss": 0.1318, "step": 120050 }, { "epoch": 2.443969465648855, "grad_norm": 8.53864025482006, "learning_rate": 6.060159433025942e-06, "loss": 0.1994, "step": 120060 }, { "epoch": 2.4441730279898217, "grad_norm": 0.1650416675676116, "learning_rate": 6.059465010467492e-06, "loss": 0.0787, "step": 120070 }, { "epoch": 2.444376590330789, "grad_norm": 0.07960583423634048, "learning_rate": 6.058770566511781e-06, "loss": 0.0703, "step": 120080 }, { "epoch": 2.444580152671756, "grad_norm": 1.9482742273125349, "learning_rate": 6.0580761011728355e-06, "loss": 0.1151, "step": 120090 }, { "epoch": 2.4447837150127225, "grad_norm": 19.902897111621574, "learning_rate": 6.057381614464679e-06, "loss": 0.1119, "step": 120100 }, { "epoch": 2.4449872773536896, "grad_norm": 0.30081150578291993, "learning_rate": 6.05668710640134e-06, "loss": 0.1016, "step": 120110 }, { "epoch": 2.4451908396946567, "grad_norm": 1.2606870249037607, "learning_rate": 6.0559925769968446e-06, "loss": 0.1439, "step": 120120 }, { "epoch": 2.4453944020356233, "grad_norm": 66.12682338298656, "learning_rate": 6.055298026265218e-06, "loss": 0.1984, "step": 120130 }, { "epoch": 2.4455979643765904, "grad_norm": 14.619293493975238, "learning_rate": 6.054603454220488e-06, "loss": 0.126, "step": 120140 }, { "epoch": 2.445801526717557, "grad_norm": 1.6287213708132013, "learning_rate": 6.053908860876683e-06, "loss": 0.1632, "step": 120150 }, { "epoch": 2.446005089058524, "grad_norm": 2.819447687691705, "learning_rate": 6.053214246247833e-06, "loss": 0.1536, "step": 120160 }, { "epoch": 2.446208651399491, "grad_norm": 6.861245642478675, "learning_rate": 6.052519610347962e-06, "loss": 0.1285, "step": 120170 }, { "epoch": 2.446412213740458, "grad_norm": 0.1668014944322982, "learning_rate": 6.051824953191103e-06, "loss": 0.1623, "step": 120180 }, { "epoch": 2.446615776081425, "grad_norm": 0.10378389308364054, "learning_rate": 6.051130274791286e-06, "loss": 0.0976, "step": 120190 }, { "epoch": 2.446819338422392, "grad_norm": 9.13551738037184, "learning_rate": 6.050435575162537e-06, "loss": 0.1656, "step": 120200 }, { "epoch": 2.4470229007633586, "grad_norm": 20.695848332536602, "learning_rate": 6.049740854318889e-06, "loss": 0.1404, "step": 120210 }, { "epoch": 2.4472264631043257, "grad_norm": 17.777448623844936, "learning_rate": 6.049046112274373e-06, "loss": 0.178, "step": 120220 }, { "epoch": 2.4474300254452928, "grad_norm": 1.495471204980179, "learning_rate": 6.048351349043019e-06, "loss": 0.0571, "step": 120230 }, { "epoch": 2.4476335877862594, "grad_norm": 6.991106390195144, "learning_rate": 6.047656564638861e-06, "loss": 0.1588, "step": 120240 }, { "epoch": 2.4478371501272265, "grad_norm": 4.295156767783757, "learning_rate": 6.046961759075928e-06, "loss": 0.0687, "step": 120250 }, { "epoch": 2.4480407124681935, "grad_norm": 0.10939233489730947, "learning_rate": 6.0462669323682534e-06, "loss": 0.0978, "step": 120260 }, { "epoch": 2.44824427480916, "grad_norm": 0.9553793930238681, "learning_rate": 6.04557208452987e-06, "loss": 0.2365, "step": 120270 }, { "epoch": 2.4484478371501273, "grad_norm": 19.468961191981755, "learning_rate": 6.044877215574814e-06, "loss": 0.17, "step": 120280 }, { "epoch": 2.4486513994910943, "grad_norm": 0.3924170138569105, "learning_rate": 6.044182325517114e-06, "loss": 0.1449, "step": 120290 }, { "epoch": 2.448854961832061, "grad_norm": 26.490964312065426, "learning_rate": 6.04348741437081e-06, "loss": 0.165, "step": 120300 }, { "epoch": 2.449058524173028, "grad_norm": 14.684270388966189, "learning_rate": 6.04279248214993e-06, "loss": 0.1873, "step": 120310 }, { "epoch": 2.449262086513995, "grad_norm": 0.32352284163487544, "learning_rate": 6.042097528868516e-06, "loss": 0.1578, "step": 120320 }, { "epoch": 2.4494656488549618, "grad_norm": 0.041870309563463264, "learning_rate": 6.041402554540599e-06, "loss": 0.1498, "step": 120330 }, { "epoch": 2.449669211195929, "grad_norm": 8.904732697004365, "learning_rate": 6.040707559180217e-06, "loss": 0.1413, "step": 120340 }, { "epoch": 2.449872773536896, "grad_norm": 42.5917970401981, "learning_rate": 6.040012542801403e-06, "loss": 0.1613, "step": 120350 }, { "epoch": 2.4500763358778626, "grad_norm": 8.446631131555934, "learning_rate": 6.039317505418198e-06, "loss": 0.2131, "step": 120360 }, { "epoch": 2.4502798982188296, "grad_norm": 0.8212929489107681, "learning_rate": 6.038622447044638e-06, "loss": 0.1132, "step": 120370 }, { "epoch": 2.4504834605597963, "grad_norm": 4.57090962341054, "learning_rate": 6.037927367694758e-06, "loss": 0.0819, "step": 120380 }, { "epoch": 2.4506870229007633, "grad_norm": 34.2600033452257, "learning_rate": 6.037232267382599e-06, "loss": 0.1064, "step": 120390 }, { "epoch": 2.4508905852417304, "grad_norm": 0.46680783578985846, "learning_rate": 6.0365371461221985e-06, "loss": 0.0756, "step": 120400 }, { "epoch": 2.451094147582697, "grad_norm": 10.879823403653196, "learning_rate": 6.035842003927594e-06, "loss": 0.1381, "step": 120410 }, { "epoch": 2.451297709923664, "grad_norm": 18.71939055270884, "learning_rate": 6.035146840812829e-06, "loss": 0.1058, "step": 120420 }, { "epoch": 2.451501272264631, "grad_norm": 6.255626858607773, "learning_rate": 6.034451656791937e-06, "loss": 0.1701, "step": 120430 }, { "epoch": 2.451704834605598, "grad_norm": 19.040117526609492, "learning_rate": 6.033756451878965e-06, "loss": 0.1746, "step": 120440 }, { "epoch": 2.451908396946565, "grad_norm": 14.717336679258679, "learning_rate": 6.033061226087947e-06, "loss": 0.202, "step": 120450 }, { "epoch": 2.4521119592875316, "grad_norm": 5.502482932962892, "learning_rate": 6.032365979432928e-06, "loss": 0.1267, "step": 120460 }, { "epoch": 2.4523155216284986, "grad_norm": 5.092562797926053, "learning_rate": 6.031670711927949e-06, "loss": 0.0763, "step": 120470 }, { "epoch": 2.4525190839694657, "grad_norm": 0.032853827813894246, "learning_rate": 6.030975423587049e-06, "loss": 0.0708, "step": 120480 }, { "epoch": 2.4527226463104324, "grad_norm": 13.224690687173396, "learning_rate": 6.030280114424274e-06, "loss": 0.1447, "step": 120490 }, { "epoch": 2.4529262086513994, "grad_norm": 13.44378069214901, "learning_rate": 6.029584784453665e-06, "loss": 0.165, "step": 120500 }, { "epoch": 2.4531297709923665, "grad_norm": 13.187917732234014, "learning_rate": 6.028889433689265e-06, "loss": 0.0958, "step": 120510 }, { "epoch": 2.453333333333333, "grad_norm": 3.2342570878882184, "learning_rate": 6.028194062145117e-06, "loss": 0.0794, "step": 120520 }, { "epoch": 2.4535368956743002, "grad_norm": 0.24749632265784852, "learning_rate": 6.027498669835268e-06, "loss": 0.1291, "step": 120530 }, { "epoch": 2.4537404580152673, "grad_norm": 8.86422283365805, "learning_rate": 6.026803256773759e-06, "loss": 0.1435, "step": 120540 }, { "epoch": 2.453944020356234, "grad_norm": 0.047483517687930056, "learning_rate": 6.026107822974634e-06, "loss": 0.0807, "step": 120550 }, { "epoch": 2.454147582697201, "grad_norm": 6.13088119884883, "learning_rate": 6.025412368451942e-06, "loss": 0.1245, "step": 120560 }, { "epoch": 2.454351145038168, "grad_norm": 0.13607189486415774, "learning_rate": 6.024716893219726e-06, "loss": 0.1393, "step": 120570 }, { "epoch": 2.4545547073791347, "grad_norm": 5.173820056900162, "learning_rate": 6.0240213972920305e-06, "loss": 0.1798, "step": 120580 }, { "epoch": 2.454758269720102, "grad_norm": 17.996197475045456, "learning_rate": 6.023325880682905e-06, "loss": 0.056, "step": 120590 }, { "epoch": 2.454961832061069, "grad_norm": 11.716054392959386, "learning_rate": 6.022630343406396e-06, "loss": 0.0562, "step": 120600 }, { "epoch": 2.4551653944020355, "grad_norm": 9.776958214715721, "learning_rate": 6.021934785476549e-06, "loss": 0.1543, "step": 120610 }, { "epoch": 2.4553689567430026, "grad_norm": 2.853812948144841, "learning_rate": 6.0212392069074156e-06, "loss": 0.083, "step": 120620 }, { "epoch": 2.4555725190839697, "grad_norm": 19.88030403111516, "learning_rate": 6.020543607713039e-06, "loss": 0.2304, "step": 120630 }, { "epoch": 2.4557760814249363, "grad_norm": 10.459685102649756, "learning_rate": 6.019847987907469e-06, "loss": 0.0831, "step": 120640 }, { "epoch": 2.4559796437659034, "grad_norm": 13.525489898347654, "learning_rate": 6.019152347504758e-06, "loss": 0.1415, "step": 120650 }, { "epoch": 2.4561832061068705, "grad_norm": 20.825765538109795, "learning_rate": 6.01845668651895e-06, "loss": 0.1956, "step": 120660 }, { "epoch": 2.456386768447837, "grad_norm": 20.655486711445274, "learning_rate": 6.017761004964098e-06, "loss": 0.1463, "step": 120670 }, { "epoch": 2.456590330788804, "grad_norm": 11.082018483518414, "learning_rate": 6.017065302854252e-06, "loss": 0.117, "step": 120680 }, { "epoch": 2.456793893129771, "grad_norm": 4.490403697713954, "learning_rate": 6.016369580203463e-06, "loss": 0.1304, "step": 120690 }, { "epoch": 2.456997455470738, "grad_norm": 0.9032584830729695, "learning_rate": 6.015673837025779e-06, "loss": 0.1368, "step": 120700 }, { "epoch": 2.457201017811705, "grad_norm": 1.988697142196512, "learning_rate": 6.014978073335257e-06, "loss": 0.1363, "step": 120710 }, { "epoch": 2.4574045801526716, "grad_norm": 13.847323667251764, "learning_rate": 6.014282289145942e-06, "loss": 0.1176, "step": 120720 }, { "epoch": 2.4576081424936387, "grad_norm": 16.693890438663132, "learning_rate": 6.013586484471893e-06, "loss": 0.1009, "step": 120730 }, { "epoch": 2.4578117048346058, "grad_norm": 12.20962182745182, "learning_rate": 6.012890659327158e-06, "loss": 0.1374, "step": 120740 }, { "epoch": 2.4580152671755724, "grad_norm": 22.24582884842138, "learning_rate": 6.0121948137257916e-06, "loss": 0.0754, "step": 120750 }, { "epoch": 2.4582188295165395, "grad_norm": 20.27330865067385, "learning_rate": 6.011498947681847e-06, "loss": 0.0641, "step": 120760 }, { "epoch": 2.4584223918575066, "grad_norm": 10.37589262532702, "learning_rate": 6.010803061209378e-06, "loss": 0.1363, "step": 120770 }, { "epoch": 2.458625954198473, "grad_norm": 0.9695007135454909, "learning_rate": 6.01010715432244e-06, "loss": 0.0888, "step": 120780 }, { "epoch": 2.4588295165394403, "grad_norm": 6.105223187826616, "learning_rate": 6.009411227035087e-06, "loss": 0.1314, "step": 120790 }, { "epoch": 2.459033078880407, "grad_norm": 3.3024725788789633, "learning_rate": 6.008715279361372e-06, "loss": 0.0845, "step": 120800 }, { "epoch": 2.459236641221374, "grad_norm": 0.3913355747897749, "learning_rate": 6.008019311315355e-06, "loss": 0.1726, "step": 120810 }, { "epoch": 2.459440203562341, "grad_norm": 0.12693108482565915, "learning_rate": 6.00732332291109e-06, "loss": 0.0521, "step": 120820 }, { "epoch": 2.4596437659033077, "grad_norm": 0.28885258336678654, "learning_rate": 6.006627314162633e-06, "loss": 0.1378, "step": 120830 }, { "epoch": 2.4598473282442748, "grad_norm": 6.607610409663787, "learning_rate": 6.005931285084039e-06, "loss": 0.1345, "step": 120840 }, { "epoch": 2.460050890585242, "grad_norm": 7.12400291055678, "learning_rate": 6.005235235689369e-06, "loss": 0.0915, "step": 120850 }, { "epoch": 2.4602544529262085, "grad_norm": 37.517518778154255, "learning_rate": 6.004539165992678e-06, "loss": 0.0845, "step": 120860 }, { "epoch": 2.4604580152671756, "grad_norm": 16.768514777699504, "learning_rate": 6.003843076008025e-06, "loss": 0.0607, "step": 120870 }, { "epoch": 2.4606615776081426, "grad_norm": 11.402023167951603, "learning_rate": 6.003146965749468e-06, "loss": 0.1977, "step": 120880 }, { "epoch": 2.4608651399491093, "grad_norm": 0.07727979108557591, "learning_rate": 6.002450835231065e-06, "loss": 0.1411, "step": 120890 }, { "epoch": 2.4610687022900763, "grad_norm": 4.978515734735407, "learning_rate": 6.001754684466877e-06, "loss": 0.1238, "step": 120900 }, { "epoch": 2.4612722646310434, "grad_norm": 1.485081370766447, "learning_rate": 6.0010585134709645e-06, "loss": 0.0852, "step": 120910 }, { "epoch": 2.46147582697201, "grad_norm": 6.735110794586415, "learning_rate": 6.0003623222573825e-06, "loss": 0.2009, "step": 120920 }, { "epoch": 2.461679389312977, "grad_norm": 0.31238976646411615, "learning_rate": 5.999666110840199e-06, "loss": 0.0828, "step": 120930 }, { "epoch": 2.461882951653944, "grad_norm": 0.0581530940466365, "learning_rate": 5.998969879233468e-06, "loss": 0.1361, "step": 120940 }, { "epoch": 2.462086513994911, "grad_norm": 26.162777814945322, "learning_rate": 5.998273627451254e-06, "loss": 0.1741, "step": 120950 }, { "epoch": 2.462290076335878, "grad_norm": 19.415316741228096, "learning_rate": 5.9975773555076185e-06, "loss": 0.1762, "step": 120960 }, { "epoch": 2.462493638676845, "grad_norm": 1.9166145824553336, "learning_rate": 5.996881063416624e-06, "loss": 0.0781, "step": 120970 }, { "epoch": 2.4626972010178116, "grad_norm": 7.582181201198551, "learning_rate": 5.996184751192332e-06, "loss": 0.2166, "step": 120980 }, { "epoch": 2.4629007633587787, "grad_norm": 0.48017611061372545, "learning_rate": 5.995488418848806e-06, "loss": 0.0615, "step": 120990 }, { "epoch": 2.4631043256997454, "grad_norm": 23.946961272550368, "learning_rate": 5.99479206640011e-06, "loss": 0.0937, "step": 121000 }, { "epoch": 2.4633078880407124, "grad_norm": 0.1525315557676084, "learning_rate": 5.9940956938603045e-06, "loss": 0.0795, "step": 121010 }, { "epoch": 2.4635114503816795, "grad_norm": 24.3663183900485, "learning_rate": 5.9933993012434584e-06, "loss": 0.2297, "step": 121020 }, { "epoch": 2.463715012722646, "grad_norm": 17.214871646774245, "learning_rate": 5.992702888563633e-06, "loss": 0.1519, "step": 121030 }, { "epoch": 2.4639185750636132, "grad_norm": 0.8629562643615802, "learning_rate": 5.992006455834895e-06, "loss": 0.1166, "step": 121040 }, { "epoch": 2.4641221374045803, "grad_norm": 0.8072973667737208, "learning_rate": 5.991310003071308e-06, "loss": 0.1791, "step": 121050 }, { "epoch": 2.464325699745547, "grad_norm": 23.25129919713329, "learning_rate": 5.990613530286938e-06, "loss": 0.1029, "step": 121060 }, { "epoch": 2.464529262086514, "grad_norm": 22.300374387710203, "learning_rate": 5.989917037495854e-06, "loss": 0.0812, "step": 121070 }, { "epoch": 2.464732824427481, "grad_norm": 0.2790287594347492, "learning_rate": 5.98922052471212e-06, "loss": 0.1043, "step": 121080 }, { "epoch": 2.4649363867684477, "grad_norm": 18.535813663932498, "learning_rate": 5.988523991949801e-06, "loss": 0.2025, "step": 121090 }, { "epoch": 2.465139949109415, "grad_norm": 14.444905021726575, "learning_rate": 5.9878274392229685e-06, "loss": 0.1927, "step": 121100 }, { "epoch": 2.4653435114503814, "grad_norm": 39.57905727458388, "learning_rate": 5.98713086654569e-06, "loss": 0.1623, "step": 121110 }, { "epoch": 2.4655470737913485, "grad_norm": 30.875298574973375, "learning_rate": 5.98643427393203e-06, "loss": 0.1624, "step": 121120 }, { "epoch": 2.4657506361323156, "grad_norm": 17.175538454871155, "learning_rate": 5.9857376613960585e-06, "loss": 0.1049, "step": 121130 }, { "epoch": 2.4659541984732822, "grad_norm": 16.077665923147002, "learning_rate": 5.985041028951848e-06, "loss": 0.089, "step": 121140 }, { "epoch": 2.4661577608142493, "grad_norm": 1.8916448785712672, "learning_rate": 5.984344376613465e-06, "loss": 0.0601, "step": 121150 }, { "epoch": 2.4663613231552164, "grad_norm": 5.978696617629485, "learning_rate": 5.983647704394977e-06, "loss": 0.1623, "step": 121160 }, { "epoch": 2.466564885496183, "grad_norm": 0.07069501137253682, "learning_rate": 5.982951012310459e-06, "loss": 0.0639, "step": 121170 }, { "epoch": 2.46676844783715, "grad_norm": 3.076595678661229, "learning_rate": 5.982254300373978e-06, "loss": 0.0895, "step": 121180 }, { "epoch": 2.466972010178117, "grad_norm": 0.21413848269365454, "learning_rate": 5.981557568599606e-06, "loss": 0.1004, "step": 121190 }, { "epoch": 2.467175572519084, "grad_norm": 0.14322798886515764, "learning_rate": 5.980860817001417e-06, "loss": 0.2605, "step": 121200 }, { "epoch": 2.467379134860051, "grad_norm": 0.0581265694773879, "learning_rate": 5.980164045593478e-06, "loss": 0.0639, "step": 121210 }, { "epoch": 2.467582697201018, "grad_norm": 21.60818204587985, "learning_rate": 5.979467254389865e-06, "loss": 0.103, "step": 121220 }, { "epoch": 2.4677862595419846, "grad_norm": 18.78447697736982, "learning_rate": 5.978770443404649e-06, "loss": 0.172, "step": 121230 }, { "epoch": 2.4679898218829517, "grad_norm": 3.349875143298461, "learning_rate": 5.978073612651902e-06, "loss": 0.0886, "step": 121240 }, { "epoch": 2.4681933842239188, "grad_norm": 11.720066028523052, "learning_rate": 5.9773767621457e-06, "loss": 0.1478, "step": 121250 }, { "epoch": 2.4683969465648854, "grad_norm": 25.40635797398472, "learning_rate": 5.9766798919001155e-06, "loss": 0.0473, "step": 121260 }, { "epoch": 2.4686005089058525, "grad_norm": 21.407649568803734, "learning_rate": 5.975983001929222e-06, "loss": 0.0341, "step": 121270 }, { "epoch": 2.4688040712468196, "grad_norm": 0.5293969230520194, "learning_rate": 5.975286092247096e-06, "loss": 0.1323, "step": 121280 }, { "epoch": 2.469007633587786, "grad_norm": 25.972584352608866, "learning_rate": 5.974589162867809e-06, "loss": 0.1554, "step": 121290 }, { "epoch": 2.4692111959287533, "grad_norm": 15.271270715034754, "learning_rate": 5.973892213805439e-06, "loss": 0.0858, "step": 121300 }, { "epoch": 2.4694147582697203, "grad_norm": 0.15190368644228072, "learning_rate": 5.973195245074064e-06, "loss": 0.073, "step": 121310 }, { "epoch": 2.469618320610687, "grad_norm": 10.826343496719842, "learning_rate": 5.972498256687756e-06, "loss": 0.1591, "step": 121320 }, { "epoch": 2.469821882951654, "grad_norm": 33.66059366692416, "learning_rate": 5.971801248660591e-06, "loss": 0.186, "step": 121330 }, { "epoch": 2.4700254452926207, "grad_norm": 0.08736672677115093, "learning_rate": 5.97110422100665e-06, "loss": 0.0927, "step": 121340 }, { "epoch": 2.4702290076335878, "grad_norm": 19.449391319953698, "learning_rate": 5.97040717374001e-06, "loss": 0.225, "step": 121350 }, { "epoch": 2.470432569974555, "grad_norm": 23.07478061111405, "learning_rate": 5.969710106874744e-06, "loss": 0.2081, "step": 121360 }, { "epoch": 2.4706361323155215, "grad_norm": 42.90113139261303, "learning_rate": 5.969013020424936e-06, "loss": 0.2069, "step": 121370 }, { "epoch": 2.4708396946564886, "grad_norm": 16.27883603929749, "learning_rate": 5.968315914404661e-06, "loss": 0.06, "step": 121380 }, { "epoch": 2.4710432569974556, "grad_norm": 5.401348811290897, "learning_rate": 5.967618788827998e-06, "loss": 0.115, "step": 121390 }, { "epoch": 2.4712468193384223, "grad_norm": 0.08007108273278063, "learning_rate": 5.9669216437090295e-06, "loss": 0.0411, "step": 121400 }, { "epoch": 2.4714503816793894, "grad_norm": 5.72549173341297, "learning_rate": 5.966224479061832e-06, "loss": 0.0424, "step": 121410 }, { "epoch": 2.471653944020356, "grad_norm": 0.06437401230524378, "learning_rate": 5.965527294900486e-06, "loss": 0.2138, "step": 121420 }, { "epoch": 2.471857506361323, "grad_norm": 5.069123932538476, "learning_rate": 5.964830091239073e-06, "loss": 0.1552, "step": 121430 }, { "epoch": 2.47206106870229, "grad_norm": 0.048016152203180944, "learning_rate": 5.964132868091674e-06, "loss": 0.184, "step": 121440 }, { "epoch": 2.4722646310432568, "grad_norm": 0.5132253308129279, "learning_rate": 5.963435625472369e-06, "loss": 0.1103, "step": 121450 }, { "epoch": 2.472468193384224, "grad_norm": 9.943248501982053, "learning_rate": 5.962738363395242e-06, "loss": 0.1182, "step": 121460 }, { "epoch": 2.472671755725191, "grad_norm": 1.3897117784765796, "learning_rate": 5.962041081874373e-06, "loss": 0.0939, "step": 121470 }, { "epoch": 2.4728753180661576, "grad_norm": 3.088500343671739, "learning_rate": 5.961343780923845e-06, "loss": 0.0964, "step": 121480 }, { "epoch": 2.4730788804071246, "grad_norm": 13.69137963885937, "learning_rate": 5.960646460557742e-06, "loss": 0.1583, "step": 121490 }, { "epoch": 2.4732824427480917, "grad_norm": 19.224280683009503, "learning_rate": 5.959949120790145e-06, "loss": 0.0848, "step": 121500 }, { "epoch": 2.4734860050890584, "grad_norm": 0.07947627471192431, "learning_rate": 5.9592517616351405e-06, "loss": 0.1658, "step": 121510 }, { "epoch": 2.4736895674300254, "grad_norm": 0.6332434889829597, "learning_rate": 5.958554383106812e-06, "loss": 0.0818, "step": 121520 }, { "epoch": 2.4738931297709925, "grad_norm": 51.962788047157616, "learning_rate": 5.957856985219241e-06, "loss": 0.1864, "step": 121530 }, { "epoch": 2.474096692111959, "grad_norm": 36.73320463466752, "learning_rate": 5.957159567986515e-06, "loss": 0.1499, "step": 121540 }, { "epoch": 2.4743002544529262, "grad_norm": 25.49826447476505, "learning_rate": 5.956462131422719e-06, "loss": 0.1732, "step": 121550 }, { "epoch": 2.4745038167938933, "grad_norm": 0.3442105806894624, "learning_rate": 5.955764675541938e-06, "loss": 0.0795, "step": 121560 }, { "epoch": 2.47470737913486, "grad_norm": 2.3375218143391914, "learning_rate": 5.95506720035826e-06, "loss": 0.1538, "step": 121570 }, { "epoch": 2.474910941475827, "grad_norm": 4.050195957526447, "learning_rate": 5.9543697058857676e-06, "loss": 0.1029, "step": 121580 }, { "epoch": 2.475114503816794, "grad_norm": 0.5896194818164007, "learning_rate": 5.95367219213855e-06, "loss": 0.1494, "step": 121590 }, { "epoch": 2.4753180661577607, "grad_norm": 7.22098911244772, "learning_rate": 5.952974659130696e-06, "loss": 0.1302, "step": 121600 }, { "epoch": 2.475521628498728, "grad_norm": 4.69248343080601, "learning_rate": 5.95227710687629e-06, "loss": 0.1407, "step": 121610 }, { "epoch": 2.475725190839695, "grad_norm": 9.528110279212884, "learning_rate": 5.95157953538942e-06, "loss": 0.2051, "step": 121620 }, { "epoch": 2.4759287531806615, "grad_norm": 3.991583132754206, "learning_rate": 5.950881944684178e-06, "loss": 0.0398, "step": 121630 }, { "epoch": 2.4761323155216286, "grad_norm": 23.47068645176138, "learning_rate": 5.950184334774649e-06, "loss": 0.1627, "step": 121640 }, { "epoch": 2.4763358778625952, "grad_norm": 11.771500594243578, "learning_rate": 5.949486705674924e-06, "loss": 0.0809, "step": 121650 }, { "epoch": 2.4765394402035623, "grad_norm": 13.811589797227919, "learning_rate": 5.948789057399092e-06, "loss": 0.0797, "step": 121660 }, { "epoch": 2.4767430025445294, "grad_norm": 37.89464512651074, "learning_rate": 5.948091389961243e-06, "loss": 0.1566, "step": 121670 }, { "epoch": 2.476946564885496, "grad_norm": 0.00239030505361983, "learning_rate": 5.9473937033754666e-06, "loss": 0.1066, "step": 121680 }, { "epoch": 2.477150127226463, "grad_norm": 0.01138665960436881, "learning_rate": 5.946695997655857e-06, "loss": 0.1409, "step": 121690 }, { "epoch": 2.47735368956743, "grad_norm": 2.3884134407489492, "learning_rate": 5.9459982728165e-06, "loss": 0.0746, "step": 121700 }, { "epoch": 2.477557251908397, "grad_norm": 46.44162472784545, "learning_rate": 5.945300528871489e-06, "loss": 0.1439, "step": 121710 }, { "epoch": 2.477760814249364, "grad_norm": 7.268137950143212, "learning_rate": 5.944602765834917e-06, "loss": 0.1819, "step": 121720 }, { "epoch": 2.477964376590331, "grad_norm": 0.019632432672391755, "learning_rate": 5.943904983720875e-06, "loss": 0.0946, "step": 121730 }, { "epoch": 2.4781679389312976, "grad_norm": 3.149220682272542, "learning_rate": 5.9432071825434565e-06, "loss": 0.0957, "step": 121740 }, { "epoch": 2.4783715012722647, "grad_norm": 11.005753056274031, "learning_rate": 5.942509362316754e-06, "loss": 0.0451, "step": 121750 }, { "epoch": 2.4785750636132313, "grad_norm": 0.10988638602609377, "learning_rate": 5.9418115230548615e-06, "loss": 0.1605, "step": 121760 }, { "epoch": 2.4787786259541984, "grad_norm": 0.13442030435029645, "learning_rate": 5.9411136647718725e-06, "loss": 0.1566, "step": 121770 }, { "epoch": 2.4789821882951655, "grad_norm": 8.286328683633524, "learning_rate": 5.94041578748188e-06, "loss": 0.2296, "step": 121780 }, { "epoch": 2.479185750636132, "grad_norm": 35.82845951733217, "learning_rate": 5.939717891198978e-06, "loss": 0.2263, "step": 121790 }, { "epoch": 2.479389312977099, "grad_norm": 17.424938536984897, "learning_rate": 5.939019975937266e-06, "loss": 0.2036, "step": 121800 }, { "epoch": 2.4795928753180663, "grad_norm": 28.399128301134713, "learning_rate": 5.9383220417108345e-06, "loss": 0.1329, "step": 121810 }, { "epoch": 2.479796437659033, "grad_norm": 6.838468140543808, "learning_rate": 5.937624088533781e-06, "loss": 0.1611, "step": 121820 }, { "epoch": 2.48, "grad_norm": 10.08421508543289, "learning_rate": 5.936926116420202e-06, "loss": 0.0712, "step": 121830 }, { "epoch": 2.480203562340967, "grad_norm": 9.580330168810208, "learning_rate": 5.936228125384192e-06, "loss": 0.1731, "step": 121840 }, { "epoch": 2.4804071246819337, "grad_norm": 5.2973951952092335, "learning_rate": 5.935530115439849e-06, "loss": 0.1041, "step": 121850 }, { "epoch": 2.4806106870229008, "grad_norm": 7.666338023001944, "learning_rate": 5.934832086601273e-06, "loss": 0.1851, "step": 121860 }, { "epoch": 2.480814249363868, "grad_norm": 0.27218205799134165, "learning_rate": 5.934134038882556e-06, "loss": 0.1361, "step": 121870 }, { "epoch": 2.4810178117048345, "grad_norm": 0.5752539160033112, "learning_rate": 5.933435972297799e-06, "loss": 0.1262, "step": 121880 }, { "epoch": 2.4812213740458016, "grad_norm": 0.27085935993674365, "learning_rate": 5.9327378868611026e-06, "loss": 0.0809, "step": 121890 }, { "epoch": 2.4814249363867686, "grad_norm": 0.3559057427392001, "learning_rate": 5.9320397825865625e-06, "loss": 0.0819, "step": 121900 }, { "epoch": 2.4816284987277353, "grad_norm": 8.807856295830046, "learning_rate": 5.931341659488278e-06, "loss": 0.1344, "step": 121910 }, { "epoch": 2.4818320610687024, "grad_norm": 21.873834175259386, "learning_rate": 5.930643517580349e-06, "loss": 0.1627, "step": 121920 }, { "epoch": 2.4820356234096694, "grad_norm": 5.717580911928467, "learning_rate": 5.929945356876876e-06, "loss": 0.0366, "step": 121930 }, { "epoch": 2.482239185750636, "grad_norm": 16.081838310852426, "learning_rate": 5.929247177391958e-06, "loss": 0.1642, "step": 121940 }, { "epoch": 2.482442748091603, "grad_norm": 2.0159777721633017, "learning_rate": 5.928548979139697e-06, "loss": 0.0855, "step": 121950 }, { "epoch": 2.48264631043257, "grad_norm": 6.3806952329219815, "learning_rate": 5.927850762134193e-06, "loss": 0.1746, "step": 121960 }, { "epoch": 2.482849872773537, "grad_norm": 2.6616115571696612, "learning_rate": 5.927152526389549e-06, "loss": 0.1275, "step": 121970 }, { "epoch": 2.483053435114504, "grad_norm": 0.5852752386783272, "learning_rate": 5.926454271919864e-06, "loss": 0.1276, "step": 121980 }, { "epoch": 2.4832569974554706, "grad_norm": 7.194024478178176, "learning_rate": 5.925755998739241e-06, "loss": 0.1128, "step": 121990 }, { "epoch": 2.4834605597964376, "grad_norm": 0.053543595850280935, "learning_rate": 5.925057706861786e-06, "loss": 0.1959, "step": 122000 }, { "epoch": 2.4836641221374047, "grad_norm": 0.6353865802001549, "learning_rate": 5.924359396301598e-06, "loss": 0.0938, "step": 122010 }, { "epoch": 2.4838676844783714, "grad_norm": 12.123229316202744, "learning_rate": 5.9236610670727805e-06, "loss": 0.2016, "step": 122020 }, { "epoch": 2.4840712468193384, "grad_norm": 6.618156762852672, "learning_rate": 5.922962719189438e-06, "loss": 0.0962, "step": 122030 }, { "epoch": 2.4842748091603055, "grad_norm": 9.488441299461266, "learning_rate": 5.922264352665675e-06, "loss": 0.1789, "step": 122040 }, { "epoch": 2.484478371501272, "grad_norm": 1.0542171124177608, "learning_rate": 5.921565967515597e-06, "loss": 0.0314, "step": 122050 }, { "epoch": 2.4846819338422392, "grad_norm": 12.841208184450593, "learning_rate": 5.920867563753307e-06, "loss": 0.0829, "step": 122060 }, { "epoch": 2.484885496183206, "grad_norm": 0.5134247353470109, "learning_rate": 5.920169141392908e-06, "loss": 0.1622, "step": 122070 }, { "epoch": 2.485089058524173, "grad_norm": 0.1495586322447079, "learning_rate": 5.91947070044851e-06, "loss": 0.2176, "step": 122080 }, { "epoch": 2.48529262086514, "grad_norm": 9.215470270205737, "learning_rate": 5.918772240934217e-06, "loss": 0.1347, "step": 122090 }, { "epoch": 2.4854961832061067, "grad_norm": 4.707989864243915, "learning_rate": 5.918073762864135e-06, "loss": 0.1092, "step": 122100 }, { "epoch": 2.4856997455470737, "grad_norm": 2.250104697080387, "learning_rate": 5.917375266252369e-06, "loss": 0.0728, "step": 122110 }, { "epoch": 2.485903307888041, "grad_norm": 0.17561032946082597, "learning_rate": 5.916676751113031e-06, "loss": 0.1761, "step": 122120 }, { "epoch": 2.4861068702290074, "grad_norm": 4.900026117222245, "learning_rate": 5.915978217460224e-06, "loss": 0.146, "step": 122130 }, { "epoch": 2.4863104325699745, "grad_norm": 0.3150863385764181, "learning_rate": 5.915279665308057e-06, "loss": 0.0732, "step": 122140 }, { "epoch": 2.4865139949109416, "grad_norm": 6.56138520373861, "learning_rate": 5.9145810946706395e-06, "loss": 0.1178, "step": 122150 }, { "epoch": 2.4867175572519082, "grad_norm": 17.387381578240998, "learning_rate": 5.9138825055620764e-06, "loss": 0.1107, "step": 122160 }, { "epoch": 2.4869211195928753, "grad_norm": 4.016310860457376, "learning_rate": 5.913183897996479e-06, "loss": 0.1208, "step": 122170 }, { "epoch": 2.4871246819338424, "grad_norm": 8.529196607598136, "learning_rate": 5.91248527198796e-06, "loss": 0.1691, "step": 122180 }, { "epoch": 2.487328244274809, "grad_norm": 11.767811573762977, "learning_rate": 5.911786627550623e-06, "loss": 0.1143, "step": 122190 }, { "epoch": 2.487531806615776, "grad_norm": 0.08842850634265703, "learning_rate": 5.911087964698582e-06, "loss": 0.0559, "step": 122200 }, { "epoch": 2.487735368956743, "grad_norm": 14.909613084494623, "learning_rate": 5.9103892834459454e-06, "loss": 0.0995, "step": 122210 }, { "epoch": 2.48793893129771, "grad_norm": 0.139361224686541, "learning_rate": 5.909690583806824e-06, "loss": 0.0992, "step": 122220 }, { "epoch": 2.488142493638677, "grad_norm": 2.3282887269573345, "learning_rate": 5.90899186579533e-06, "loss": 0.0182, "step": 122230 }, { "epoch": 2.488346055979644, "grad_norm": 14.076910981049352, "learning_rate": 5.908293129425575e-06, "loss": 0.1796, "step": 122240 }, { "epoch": 2.4885496183206106, "grad_norm": 0.6691276933704842, "learning_rate": 5.90759437471167e-06, "loss": 0.0962, "step": 122250 }, { "epoch": 2.4887531806615777, "grad_norm": 0.06143486354482076, "learning_rate": 5.9068956016677285e-06, "loss": 0.0545, "step": 122260 }, { "epoch": 2.4889567430025448, "grad_norm": 5.734714558798223, "learning_rate": 5.906196810307861e-06, "loss": 0.1626, "step": 122270 }, { "epoch": 2.4891603053435114, "grad_norm": 0.029865579638417575, "learning_rate": 5.905498000646181e-06, "loss": 0.1285, "step": 122280 }, { "epoch": 2.4893638676844785, "grad_norm": 4.157087098460863, "learning_rate": 5.904799172696805e-06, "loss": 0.1253, "step": 122290 }, { "epoch": 2.489567430025445, "grad_norm": 10.32901548217058, "learning_rate": 5.9041003264738426e-06, "loss": 0.145, "step": 122300 }, { "epoch": 2.489770992366412, "grad_norm": 15.00563584665295, "learning_rate": 5.903401461991411e-06, "loss": 0.0788, "step": 122310 }, { "epoch": 2.4899745547073793, "grad_norm": 0.1366058496691104, "learning_rate": 5.902702579263622e-06, "loss": 0.1021, "step": 122320 }, { "epoch": 2.490178117048346, "grad_norm": 0.20628793354021743, "learning_rate": 5.902003678304592e-06, "loss": 0.208, "step": 122330 }, { "epoch": 2.490381679389313, "grad_norm": 1.432098657100004, "learning_rate": 5.901304759128435e-06, "loss": 0.2079, "step": 122340 }, { "epoch": 2.49058524173028, "grad_norm": 0.5390806830923351, "learning_rate": 5.90060582174927e-06, "loss": 0.1868, "step": 122350 }, { "epoch": 2.4907888040712467, "grad_norm": 22.344450713533867, "learning_rate": 5.8999068661812085e-06, "loss": 0.1684, "step": 122360 }, { "epoch": 2.4909923664122138, "grad_norm": 0.11929141309242852, "learning_rate": 5.89920789243837e-06, "loss": 0.1002, "step": 122370 }, { "epoch": 2.491195928753181, "grad_norm": 0.15738575541489394, "learning_rate": 5.898508900534869e-06, "loss": 0.0891, "step": 122380 }, { "epoch": 2.4913994910941475, "grad_norm": 5.94014634137802, "learning_rate": 5.8978098904848245e-06, "loss": 0.2342, "step": 122390 }, { "epoch": 2.4916030534351146, "grad_norm": 7.704308115451563, "learning_rate": 5.897110862302352e-06, "loss": 0.1089, "step": 122400 }, { "epoch": 2.491806615776081, "grad_norm": 9.178567243628224, "learning_rate": 5.89641181600157e-06, "loss": 0.0892, "step": 122410 }, { "epoch": 2.4920101781170483, "grad_norm": 8.261677846057943, "learning_rate": 5.895712751596597e-06, "loss": 0.1853, "step": 122420 }, { "epoch": 2.4922137404580154, "grad_norm": 14.63348667092497, "learning_rate": 5.895013669101552e-06, "loss": 0.0901, "step": 122430 }, { "epoch": 2.492417302798982, "grad_norm": 1.5479276468189291, "learning_rate": 5.8943145685305534e-06, "loss": 0.1294, "step": 122440 }, { "epoch": 2.492620865139949, "grad_norm": 12.616942633504113, "learning_rate": 5.893615449897718e-06, "loss": 0.1632, "step": 122450 }, { "epoch": 2.492824427480916, "grad_norm": 0.14399976148618684, "learning_rate": 5.892916313217171e-06, "loss": 0.142, "step": 122460 }, { "epoch": 2.493027989821883, "grad_norm": 23.210303006706916, "learning_rate": 5.8922171585030265e-06, "loss": 0.1215, "step": 122470 }, { "epoch": 2.49323155216285, "grad_norm": 28.162449502732194, "learning_rate": 5.891517985769408e-06, "loss": 0.1904, "step": 122480 }, { "epoch": 2.493435114503817, "grad_norm": 18.154507604975972, "learning_rate": 5.890818795030435e-06, "loss": 0.1146, "step": 122490 }, { "epoch": 2.4936386768447836, "grad_norm": 8.498668024204456, "learning_rate": 5.890119586300229e-06, "loss": 0.0989, "step": 122500 }, { "epoch": 2.4938422391857507, "grad_norm": 0.04956444398526492, "learning_rate": 5.889420359592913e-06, "loss": 0.1404, "step": 122510 }, { "epoch": 2.4940458015267177, "grad_norm": 23.446614556045464, "learning_rate": 5.888721114922605e-06, "loss": 0.1364, "step": 122520 }, { "epoch": 2.4942493638676844, "grad_norm": 33.053521854723975, "learning_rate": 5.888021852303431e-06, "loss": 0.0771, "step": 122530 }, { "epoch": 2.4944529262086514, "grad_norm": 4.718352403176833, "learning_rate": 5.887322571749512e-06, "loss": 0.1159, "step": 122540 }, { "epoch": 2.4946564885496185, "grad_norm": 9.608705499373048, "learning_rate": 5.886623273274972e-06, "loss": 0.1131, "step": 122550 }, { "epoch": 2.494860050890585, "grad_norm": 10.12037552692511, "learning_rate": 5.8859239568939305e-06, "loss": 0.1465, "step": 122560 }, { "epoch": 2.4950636132315522, "grad_norm": 0.02387695978617762, "learning_rate": 5.885224622620513e-06, "loss": 0.1098, "step": 122570 }, { "epoch": 2.4952671755725193, "grad_norm": 0.04826912491638827, "learning_rate": 5.884525270468847e-06, "loss": 0.1394, "step": 122580 }, { "epoch": 2.495470737913486, "grad_norm": 0.2871208053717511, "learning_rate": 5.883825900453053e-06, "loss": 0.0399, "step": 122590 }, { "epoch": 2.495674300254453, "grad_norm": 17.179636750243482, "learning_rate": 5.8831265125872565e-06, "loss": 0.0804, "step": 122600 }, { "epoch": 2.4958778625954197, "grad_norm": 9.111586802568768, "learning_rate": 5.882427106885581e-06, "loss": 0.0801, "step": 122610 }, { "epoch": 2.4960814249363867, "grad_norm": 15.554174367732015, "learning_rate": 5.881727683362155e-06, "loss": 0.2209, "step": 122620 }, { "epoch": 2.496284987277354, "grad_norm": 6.394209314529918, "learning_rate": 5.881028242031103e-06, "loss": 0.1115, "step": 122630 }, { "epoch": 2.4964885496183205, "grad_norm": 0.5491881958579374, "learning_rate": 5.880328782906551e-06, "loss": 0.0994, "step": 122640 }, { "epoch": 2.4966921119592875, "grad_norm": 0.18423578794139636, "learning_rate": 5.879629306002623e-06, "loss": 0.1096, "step": 122650 }, { "epoch": 2.4968956743002546, "grad_norm": 17.237867322770597, "learning_rate": 5.87892981133345e-06, "loss": 0.0875, "step": 122660 }, { "epoch": 2.4970992366412212, "grad_norm": 0.057611645416134304, "learning_rate": 5.878230298913159e-06, "loss": 0.1869, "step": 122670 }, { "epoch": 2.4973027989821883, "grad_norm": 10.408107314144745, "learning_rate": 5.877530768755874e-06, "loss": 0.2159, "step": 122680 }, { "epoch": 2.4975063613231554, "grad_norm": 0.014273830697303827, "learning_rate": 5.876831220875725e-06, "loss": 0.1063, "step": 122690 }, { "epoch": 2.497709923664122, "grad_norm": 0.21705191826718417, "learning_rate": 5.876131655286841e-06, "loss": 0.1049, "step": 122700 }, { "epoch": 2.497913486005089, "grad_norm": 0.06335477174122173, "learning_rate": 5.875432072003348e-06, "loss": 0.0111, "step": 122710 }, { "epoch": 2.4981170483460557, "grad_norm": 0.7718523669062163, "learning_rate": 5.874732471039379e-06, "loss": 0.0994, "step": 122720 }, { "epoch": 2.498320610687023, "grad_norm": 0.47020856157660085, "learning_rate": 5.87403285240906e-06, "loss": 0.1847, "step": 122730 }, { "epoch": 2.49852417302799, "grad_norm": 0.4944557239440466, "learning_rate": 5.8733332161265225e-06, "loss": 0.079, "step": 122740 }, { "epoch": 2.4987277353689565, "grad_norm": 6.757982966941289, "learning_rate": 5.872633562205895e-06, "loss": 0.1878, "step": 122750 }, { "epoch": 2.4989312977099236, "grad_norm": 26.880132964196772, "learning_rate": 5.871933890661309e-06, "loss": 0.2431, "step": 122760 }, { "epoch": 2.4991348600508907, "grad_norm": 0.2249666412661541, "learning_rate": 5.871234201506895e-06, "loss": 0.233, "step": 122770 }, { "epoch": 2.4993384223918573, "grad_norm": 13.075725119274619, "learning_rate": 5.870534494756784e-06, "loss": 0.13, "step": 122780 }, { "epoch": 2.4995419847328244, "grad_norm": 0.6537202357916334, "learning_rate": 5.869834770425108e-06, "loss": 0.0477, "step": 122790 }, { "epoch": 2.4997455470737915, "grad_norm": 0.08305955030269414, "learning_rate": 5.869135028525999e-06, "loss": 0.1078, "step": 122800 }, { "epoch": 2.499949109414758, "grad_norm": 19.051060762486028, "learning_rate": 5.868435269073588e-06, "loss": 0.319, "step": 122810 }, { "epoch": 2.500152671755725, "grad_norm": 2.2641829576064016, "learning_rate": 5.867735492082007e-06, "loss": 0.0239, "step": 122820 }, { "epoch": 2.5003562340966923, "grad_norm": 12.954356308787688, "learning_rate": 5.867035697565391e-06, "loss": 0.1689, "step": 122830 }, { "epoch": 2.500559796437659, "grad_norm": 28.683012140398237, "learning_rate": 5.866335885537874e-06, "loss": 0.1742, "step": 122840 }, { "epoch": 2.500763358778626, "grad_norm": 6.049263170732773, "learning_rate": 5.865636056013586e-06, "loss": 0.1081, "step": 122850 }, { "epoch": 2.500966921119593, "grad_norm": 0.11574058278129458, "learning_rate": 5.864936209006662e-06, "loss": 0.1155, "step": 122860 }, { "epoch": 2.5011704834605597, "grad_norm": 0.058964103028441926, "learning_rate": 5.86423634453124e-06, "loss": 0.1687, "step": 122870 }, { "epoch": 2.501374045801527, "grad_norm": 0.16188583679222637, "learning_rate": 5.863536462601451e-06, "loss": 0.1121, "step": 122880 }, { "epoch": 2.501577608142494, "grad_norm": 17.32773225205753, "learning_rate": 5.8628365632314295e-06, "loss": 0.1515, "step": 122890 }, { "epoch": 2.5017811704834605, "grad_norm": 14.130021485476789, "learning_rate": 5.8621366464353134e-06, "loss": 0.1023, "step": 122900 }, { "epoch": 2.5019847328244276, "grad_norm": 28.52273692111542, "learning_rate": 5.8614367122272374e-06, "loss": 0.1012, "step": 122910 }, { "epoch": 2.5021882951653946, "grad_norm": 15.628395622717761, "learning_rate": 5.860736760621338e-06, "loss": 0.1089, "step": 122920 }, { "epoch": 2.5023918575063613, "grad_norm": 8.186728968788524, "learning_rate": 5.860036791631751e-06, "loss": 0.1647, "step": 122930 }, { "epoch": 2.5025954198473284, "grad_norm": 0.6107464490662817, "learning_rate": 5.85933680527261e-06, "loss": 0.1706, "step": 122940 }, { "epoch": 2.5027989821882954, "grad_norm": 0.014312160376179318, "learning_rate": 5.858636801558061e-06, "loss": 0.1553, "step": 122950 }, { "epoch": 2.503002544529262, "grad_norm": 4.910234062208361, "learning_rate": 5.857936780502232e-06, "loss": 0.2454, "step": 122960 }, { "epoch": 2.503206106870229, "grad_norm": 16.07588996775519, "learning_rate": 5.857236742119266e-06, "loss": 0.1197, "step": 122970 }, { "epoch": 2.503409669211196, "grad_norm": 17.351044725849928, "learning_rate": 5.856536686423299e-06, "loss": 0.1951, "step": 122980 }, { "epoch": 2.503613231552163, "grad_norm": 7.442685370999384, "learning_rate": 5.85583661342847e-06, "loss": 0.2097, "step": 122990 }, { "epoch": 2.5038167938931295, "grad_norm": 4.1341379390083155, "learning_rate": 5.85513652314892e-06, "loss": 0.0987, "step": 123000 }, { "epoch": 2.5040203562340966, "grad_norm": 25.676658831839607, "learning_rate": 5.854436415598785e-06, "loss": 0.1373, "step": 123010 }, { "epoch": 2.5042239185750637, "grad_norm": 7.597110175203984, "learning_rate": 5.853736290792207e-06, "loss": 0.2063, "step": 123020 }, { "epoch": 2.5044274809160303, "grad_norm": 8.099127475254074, "learning_rate": 5.853036148743325e-06, "loss": 0.1377, "step": 123030 }, { "epoch": 2.5046310432569974, "grad_norm": 0.28703358136265966, "learning_rate": 5.8523359894662805e-06, "loss": 0.1558, "step": 123040 }, { "epoch": 2.5048346055979644, "grad_norm": 8.713025182134293, "learning_rate": 5.851635812975212e-06, "loss": 0.1064, "step": 123050 }, { "epoch": 2.505038167938931, "grad_norm": 0.18406895068632326, "learning_rate": 5.850935619284259e-06, "loss": 0.0376, "step": 123060 }, { "epoch": 2.505241730279898, "grad_norm": 24.208720227109023, "learning_rate": 5.850235408407568e-06, "loss": 0.1255, "step": 123070 }, { "epoch": 2.5054452926208652, "grad_norm": 11.674783979406826, "learning_rate": 5.849535180359276e-06, "loss": 0.152, "step": 123080 }, { "epoch": 2.505648854961832, "grad_norm": 12.98554071458972, "learning_rate": 5.848834935153528e-06, "loss": 0.1753, "step": 123090 }, { "epoch": 2.505852417302799, "grad_norm": 9.120617355818093, "learning_rate": 5.8481346728044665e-06, "loss": 0.1496, "step": 123100 }, { "epoch": 2.506055979643766, "grad_norm": 0.4269622728800086, "learning_rate": 5.847434393326229e-06, "loss": 0.1778, "step": 123110 }, { "epoch": 2.5062595419847327, "grad_norm": 3.0685346384826446, "learning_rate": 5.846734096732964e-06, "loss": 0.1054, "step": 123120 }, { "epoch": 2.5064631043256997, "grad_norm": 0.5549273767905994, "learning_rate": 5.846033783038816e-06, "loss": 0.1098, "step": 123130 }, { "epoch": 2.506666666666667, "grad_norm": 0.9458731001905395, "learning_rate": 5.8453334522579245e-06, "loss": 0.0287, "step": 123140 }, { "epoch": 2.5068702290076335, "grad_norm": 0.5155286438913097, "learning_rate": 5.844633104404433e-06, "loss": 0.1842, "step": 123150 }, { "epoch": 2.5070737913486005, "grad_norm": 12.69126912746572, "learning_rate": 5.843932739492491e-06, "loss": 0.0566, "step": 123160 }, { "epoch": 2.5072773536895676, "grad_norm": 9.338190237767687, "learning_rate": 5.843232357536239e-06, "loss": 0.1878, "step": 123170 }, { "epoch": 2.5074809160305342, "grad_norm": 4.370996474149396, "learning_rate": 5.842531958549823e-06, "loss": 0.0892, "step": 123180 }, { "epoch": 2.5076844783715013, "grad_norm": 8.046185960338365, "learning_rate": 5.841831542547389e-06, "loss": 0.1847, "step": 123190 }, { "epoch": 2.5078880407124684, "grad_norm": 5.4993283013432075, "learning_rate": 5.8411311095430844e-06, "loss": 0.1745, "step": 123200 }, { "epoch": 2.508091603053435, "grad_norm": 0.07408309103221886, "learning_rate": 5.840430659551052e-06, "loss": 0.1326, "step": 123210 }, { "epoch": 2.508295165394402, "grad_norm": 0.2684786546794098, "learning_rate": 5.8397301925854425e-06, "loss": 0.1075, "step": 123220 }, { "epoch": 2.508498727735369, "grad_norm": 12.856042838195265, "learning_rate": 5.839029708660396e-06, "loss": 0.1451, "step": 123230 }, { "epoch": 2.508702290076336, "grad_norm": 0.42527032178224605, "learning_rate": 5.838329207790068e-06, "loss": 0.0979, "step": 123240 }, { "epoch": 2.508905852417303, "grad_norm": 11.232317963031518, "learning_rate": 5.837628689988599e-06, "loss": 0.1232, "step": 123250 }, { "epoch": 2.50910941475827, "grad_norm": 0.1972666436885362, "learning_rate": 5.8369281552701405e-06, "loss": 0.1193, "step": 123260 }, { "epoch": 2.5093129770992366, "grad_norm": 0.09906730753507337, "learning_rate": 5.83622760364884e-06, "loss": 0.1356, "step": 123270 }, { "epoch": 2.5095165394402037, "grad_norm": 19.78908321174216, "learning_rate": 5.835527035138846e-06, "loss": 0.2485, "step": 123280 }, { "epoch": 2.5097201017811703, "grad_norm": 12.350521140117202, "learning_rate": 5.8348264497543075e-06, "loss": 0.2119, "step": 123290 }, { "epoch": 2.5099236641221374, "grad_norm": 21.92578500940297, "learning_rate": 5.834125847509375e-06, "loss": 0.307, "step": 123300 }, { "epoch": 2.5101272264631045, "grad_norm": 13.316134972049637, "learning_rate": 5.833425228418193e-06, "loss": 0.0944, "step": 123310 }, { "epoch": 2.510330788804071, "grad_norm": 1.1972235587337658, "learning_rate": 5.832724592494918e-06, "loss": 0.0926, "step": 123320 }, { "epoch": 2.510534351145038, "grad_norm": 13.163543914529965, "learning_rate": 5.832023939753698e-06, "loss": 0.2718, "step": 123330 }, { "epoch": 2.510737913486005, "grad_norm": 0.6955163177130661, "learning_rate": 5.831323270208682e-06, "loss": 0.0893, "step": 123340 }, { "epoch": 2.510941475826972, "grad_norm": 13.469857697630319, "learning_rate": 5.830622583874019e-06, "loss": 0.0804, "step": 123350 }, { "epoch": 2.511145038167939, "grad_norm": 12.847621077488942, "learning_rate": 5.829921880763867e-06, "loss": 0.1582, "step": 123360 }, { "epoch": 2.5113486005089056, "grad_norm": 0.16215111072836058, "learning_rate": 5.829221160892372e-06, "loss": 0.1059, "step": 123370 }, { "epoch": 2.5115521628498727, "grad_norm": 25.504213082659984, "learning_rate": 5.828520424273688e-06, "loss": 0.1501, "step": 123380 }, { "epoch": 2.51175572519084, "grad_norm": 10.192829971241204, "learning_rate": 5.827819670921967e-06, "loss": 0.0302, "step": 123390 }, { "epoch": 2.5119592875318064, "grad_norm": 6.089526336225925, "learning_rate": 5.8271189008513605e-06, "loss": 0.0501, "step": 123400 }, { "epoch": 2.5121628498727735, "grad_norm": 2.5888769198064376, "learning_rate": 5.826418114076024e-06, "loss": 0.2286, "step": 123410 }, { "epoch": 2.5123664122137406, "grad_norm": 23.817905531288456, "learning_rate": 5.825717310610108e-06, "loss": 0.2017, "step": 123420 }, { "epoch": 2.512569974554707, "grad_norm": 9.829702642433846, "learning_rate": 5.8250164904677685e-06, "loss": 0.1512, "step": 123430 }, { "epoch": 2.5127735368956743, "grad_norm": 4.96602925206193, "learning_rate": 5.8243156536631576e-06, "loss": 0.0694, "step": 123440 }, { "epoch": 2.5129770992366414, "grad_norm": 0.6930663071987868, "learning_rate": 5.823614800210431e-06, "loss": 0.1044, "step": 123450 }, { "epoch": 2.513180661577608, "grad_norm": 0.16740370911910768, "learning_rate": 5.8229139301237415e-06, "loss": 0.0718, "step": 123460 }, { "epoch": 2.513384223918575, "grad_norm": 0.4711831722707077, "learning_rate": 5.822213043417247e-06, "loss": 0.0742, "step": 123470 }, { "epoch": 2.513587786259542, "grad_norm": 0.21965103670946787, "learning_rate": 5.821512140105099e-06, "loss": 0.1886, "step": 123480 }, { "epoch": 2.513791348600509, "grad_norm": 0.036188414345088474, "learning_rate": 5.8208112202014565e-06, "loss": 0.1146, "step": 123490 }, { "epoch": 2.513994910941476, "grad_norm": 1.5789345952158034, "learning_rate": 5.820110283720475e-06, "loss": 0.0715, "step": 123500 }, { "epoch": 2.514198473282443, "grad_norm": 11.20120812304908, "learning_rate": 5.819409330676309e-06, "loss": 0.1966, "step": 123510 }, { "epoch": 2.5144020356234096, "grad_norm": 12.41283593709842, "learning_rate": 5.818708361083114e-06, "loss": 0.1612, "step": 123520 }, { "epoch": 2.5146055979643767, "grad_norm": 1.8402589191381478, "learning_rate": 5.818007374955052e-06, "loss": 0.1372, "step": 123530 }, { "epoch": 2.5148091603053437, "grad_norm": 7.679155087493919, "learning_rate": 5.817306372306277e-06, "loss": 0.1652, "step": 123540 }, { "epoch": 2.5150127226463104, "grad_norm": 35.99793032922847, "learning_rate": 5.816605353150948e-06, "loss": 0.1017, "step": 123550 }, { "epoch": 2.5152162849872775, "grad_norm": 0.17358772490395855, "learning_rate": 5.8159043175032205e-06, "loss": 0.1542, "step": 123560 }, { "epoch": 2.5154198473282445, "grad_norm": 7.733037971256098, "learning_rate": 5.815203265377255e-06, "loss": 0.0695, "step": 123570 }, { "epoch": 2.515623409669211, "grad_norm": 0.13335454921389286, "learning_rate": 5.8145021967872095e-06, "loss": 0.1402, "step": 123580 }, { "epoch": 2.5158269720101782, "grad_norm": 6.312892371060462, "learning_rate": 5.813801111747244e-06, "loss": 0.1432, "step": 123590 }, { "epoch": 2.516030534351145, "grad_norm": 9.924627162782906, "learning_rate": 5.813100010271515e-06, "loss": 0.1524, "step": 123600 }, { "epoch": 2.516234096692112, "grad_norm": 33.65445751659304, "learning_rate": 5.812398892374185e-06, "loss": 0.1107, "step": 123610 }, { "epoch": 2.516437659033079, "grad_norm": 8.088052498986457, "learning_rate": 5.811697758069415e-06, "loss": 0.1404, "step": 123620 }, { "epoch": 2.5166412213740457, "grad_norm": 36.002314137100534, "learning_rate": 5.81099660737136e-06, "loss": 0.1477, "step": 123630 }, { "epoch": 2.5168447837150127, "grad_norm": 15.193891638763104, "learning_rate": 5.810295440294184e-06, "loss": 0.0889, "step": 123640 }, { "epoch": 2.5170483460559794, "grad_norm": 14.807111341638397, "learning_rate": 5.8095942568520505e-06, "loss": 0.1441, "step": 123650 }, { "epoch": 2.5172519083969465, "grad_norm": 0.07519606013094624, "learning_rate": 5.808893057059117e-06, "loss": 0.1066, "step": 123660 }, { "epoch": 2.5174554707379135, "grad_norm": 33.30348176255015, "learning_rate": 5.8081918409295464e-06, "loss": 0.1002, "step": 123670 }, { "epoch": 2.51765903307888, "grad_norm": 45.307266514483196, "learning_rate": 5.807490608477501e-06, "loss": 0.1264, "step": 123680 }, { "epoch": 2.5178625954198472, "grad_norm": 12.734193088654484, "learning_rate": 5.806789359717142e-06, "loss": 0.092, "step": 123690 }, { "epoch": 2.5180661577608143, "grad_norm": 0.06348387621111957, "learning_rate": 5.806088094662633e-06, "loss": 0.1436, "step": 123700 }, { "epoch": 2.518269720101781, "grad_norm": 0.0505366220535898, "learning_rate": 5.805386813328138e-06, "loss": 0.059, "step": 123710 }, { "epoch": 2.518473282442748, "grad_norm": 0.44513555392846643, "learning_rate": 5.804685515727816e-06, "loss": 0.0891, "step": 123720 }, { "epoch": 2.518676844783715, "grad_norm": 0.37774711438232855, "learning_rate": 5.803984201875837e-06, "loss": 0.084, "step": 123730 }, { "epoch": 2.5188804071246818, "grad_norm": 22.110662299663076, "learning_rate": 5.8032828717863585e-06, "loss": 0.121, "step": 123740 }, { "epoch": 2.519083969465649, "grad_norm": 3.4725649943145016, "learning_rate": 5.80258152547355e-06, "loss": 0.1294, "step": 123750 }, { "epoch": 2.519287531806616, "grad_norm": 15.258368469535847, "learning_rate": 5.801880162951572e-06, "loss": 0.2111, "step": 123760 }, { "epoch": 2.5194910941475825, "grad_norm": 0.03451604044152879, "learning_rate": 5.801178784234592e-06, "loss": 0.0489, "step": 123770 }, { "epoch": 2.5196946564885496, "grad_norm": 5.8826301528271, "learning_rate": 5.800477389336775e-06, "loss": 0.1058, "step": 123780 }, { "epoch": 2.5198982188295167, "grad_norm": 44.79184962570701, "learning_rate": 5.799775978272287e-06, "loss": 0.1895, "step": 123790 }, { "epoch": 2.5201017811704833, "grad_norm": 17.104357594477438, "learning_rate": 5.799074551055292e-06, "loss": 0.1381, "step": 123800 }, { "epoch": 2.5203053435114504, "grad_norm": 27.323462640501866, "learning_rate": 5.798373107699956e-06, "loss": 0.1016, "step": 123810 }, { "epoch": 2.5205089058524175, "grad_norm": 18.62531950592784, "learning_rate": 5.797671648220449e-06, "loss": 0.1033, "step": 123820 }, { "epoch": 2.520712468193384, "grad_norm": 4.390033565598877, "learning_rate": 5.796970172630935e-06, "loss": 0.1519, "step": 123830 }, { "epoch": 2.520916030534351, "grad_norm": 0.374612323370534, "learning_rate": 5.796268680945582e-06, "loss": 0.0754, "step": 123840 }, { "epoch": 2.5211195928753183, "grad_norm": 0.03875033410847919, "learning_rate": 5.795567173178556e-06, "loss": 0.1123, "step": 123850 }, { "epoch": 2.521323155216285, "grad_norm": 0.135767402297277, "learning_rate": 5.794865649344027e-06, "loss": 0.1436, "step": 123860 }, { "epoch": 2.521526717557252, "grad_norm": 0.047658564404766485, "learning_rate": 5.794164109456163e-06, "loss": 0.1746, "step": 123870 }, { "epoch": 2.521730279898219, "grad_norm": 4.139073384740001, "learning_rate": 5.793462553529132e-06, "loss": 0.0983, "step": 123880 }, { "epoch": 2.5219338422391857, "grad_norm": 0.3606526648923343, "learning_rate": 5.792760981577101e-06, "loss": 0.1799, "step": 123890 }, { "epoch": 2.522137404580153, "grad_norm": 5.680805077062904, "learning_rate": 5.7920593936142424e-06, "loss": 0.1881, "step": 123900 }, { "epoch": 2.52234096692112, "grad_norm": 16.51826390119333, "learning_rate": 5.791357789654725e-06, "loss": 0.1426, "step": 123910 }, { "epoch": 2.5225445292620865, "grad_norm": 1.4927264175999448, "learning_rate": 5.790656169712717e-06, "loss": 0.0698, "step": 123920 }, { "epoch": 2.5227480916030536, "grad_norm": 9.002457865593938, "learning_rate": 5.789954533802389e-06, "loss": 0.1253, "step": 123930 }, { "epoch": 2.52295165394402, "grad_norm": 0.3857311708392344, "learning_rate": 5.789252881937911e-06, "loss": 0.1289, "step": 123940 }, { "epoch": 2.5231552162849873, "grad_norm": 9.034446996352422, "learning_rate": 5.788551214133455e-06, "loss": 0.0884, "step": 123950 }, { "epoch": 2.523358778625954, "grad_norm": 1.4265031574196743, "learning_rate": 5.7878495304031925e-06, "loss": 0.0236, "step": 123960 }, { "epoch": 2.523562340966921, "grad_norm": 15.746215276825767, "learning_rate": 5.787147830761294e-06, "loss": 0.1045, "step": 123970 }, { "epoch": 2.523765903307888, "grad_norm": 37.44409025973127, "learning_rate": 5.78644611522193e-06, "loss": 0.068, "step": 123980 }, { "epoch": 2.5239694656488547, "grad_norm": 15.043501138560467, "learning_rate": 5.785744383799276e-06, "loss": 0.0884, "step": 123990 }, { "epoch": 2.524173027989822, "grad_norm": 11.174018345250035, "learning_rate": 5.7850426365074995e-06, "loss": 0.0606, "step": 124000 }, { "epoch": 2.524376590330789, "grad_norm": 7.628734981532155, "learning_rate": 5.784340873360776e-06, "loss": 0.2091, "step": 124010 }, { "epoch": 2.5245801526717555, "grad_norm": 41.8157376763383, "learning_rate": 5.78363909437328e-06, "loss": 0.2044, "step": 124020 }, { "epoch": 2.5247837150127226, "grad_norm": 11.156574341373016, "learning_rate": 5.782937299559182e-06, "loss": 0.2344, "step": 124030 }, { "epoch": 2.5249872773536897, "grad_norm": 0.08391285569587642, "learning_rate": 5.782235488932657e-06, "loss": 0.1107, "step": 124040 }, { "epoch": 2.5251908396946563, "grad_norm": 0.9401749259258094, "learning_rate": 5.781533662507879e-06, "loss": 0.1135, "step": 124050 }, { "epoch": 2.5253944020356234, "grad_norm": 19.077861732551998, "learning_rate": 5.7808318202990225e-06, "loss": 0.2095, "step": 124060 }, { "epoch": 2.5255979643765905, "grad_norm": 0.009083031168500632, "learning_rate": 5.7801299623202615e-06, "loss": 0.1148, "step": 124070 }, { "epoch": 2.525801526717557, "grad_norm": 64.70038947493961, "learning_rate": 5.779428088585771e-06, "loss": 0.1374, "step": 124080 }, { "epoch": 2.526005089058524, "grad_norm": 1.489879069861173, "learning_rate": 5.778726199109726e-06, "loss": 0.0673, "step": 124090 }, { "epoch": 2.5262086513994912, "grad_norm": 0.202950917982363, "learning_rate": 5.778024293906301e-06, "loss": 0.0941, "step": 124100 }, { "epoch": 2.526412213740458, "grad_norm": 38.780554975830235, "learning_rate": 5.777322372989677e-06, "loss": 0.2188, "step": 124110 }, { "epoch": 2.526615776081425, "grad_norm": 3.205785011495835, "learning_rate": 5.776620436374024e-06, "loss": 0.2632, "step": 124120 }, { "epoch": 2.526819338422392, "grad_norm": 0.22854199272083178, "learning_rate": 5.775918484073521e-06, "loss": 0.1449, "step": 124130 }, { "epoch": 2.5270229007633587, "grad_norm": 0.09523453615022545, "learning_rate": 5.775216516102346e-06, "loss": 0.0625, "step": 124140 }, { "epoch": 2.5272264631043257, "grad_norm": 11.509417110468743, "learning_rate": 5.774514532474673e-06, "loss": 0.0536, "step": 124150 }, { "epoch": 2.527430025445293, "grad_norm": 0.5259529073168273, "learning_rate": 5.773812533204683e-06, "loss": 0.1155, "step": 124160 }, { "epoch": 2.5276335877862595, "grad_norm": 18.97121060676229, "learning_rate": 5.773110518306553e-06, "loss": 0.1587, "step": 124170 }, { "epoch": 2.5278371501272265, "grad_norm": 21.602444107565354, "learning_rate": 5.7724084877944584e-06, "loss": 0.0849, "step": 124180 }, { "epoch": 2.5280407124681936, "grad_norm": 0.05757880776222043, "learning_rate": 5.771706441682581e-06, "loss": 0.1156, "step": 124190 }, { "epoch": 2.5282442748091603, "grad_norm": 0.0784411291049872, "learning_rate": 5.771004379985098e-06, "loss": 0.1049, "step": 124200 }, { "epoch": 2.5284478371501273, "grad_norm": 0.08484584751902984, "learning_rate": 5.770302302716187e-06, "loss": 0.1622, "step": 124210 }, { "epoch": 2.5286513994910944, "grad_norm": 0.035373590953608064, "learning_rate": 5.769600209890031e-06, "loss": 0.1547, "step": 124220 }, { "epoch": 2.528854961832061, "grad_norm": 0.33349574371904134, "learning_rate": 5.7688981015208056e-06, "loss": 0.1018, "step": 124230 }, { "epoch": 2.529058524173028, "grad_norm": 0.14832843173346952, "learning_rate": 5.768195977622692e-06, "loss": 0.0668, "step": 124240 }, { "epoch": 2.5292620865139948, "grad_norm": 1.4206529458699177, "learning_rate": 5.767493838209873e-06, "loss": 0.1252, "step": 124250 }, { "epoch": 2.529465648854962, "grad_norm": 21.334690518006138, "learning_rate": 5.766791683296528e-06, "loss": 0.1619, "step": 124260 }, { "epoch": 2.529669211195929, "grad_norm": 6.255006863559785, "learning_rate": 5.766089512896835e-06, "loss": 0.1501, "step": 124270 }, { "epoch": 2.5298727735368955, "grad_norm": 12.894100587220924, "learning_rate": 5.765387327024981e-06, "loss": 0.0672, "step": 124280 }, { "epoch": 2.5300763358778626, "grad_norm": 34.694859876646376, "learning_rate": 5.764685125695142e-06, "loss": 0.1573, "step": 124290 }, { "epoch": 2.5302798982188293, "grad_norm": 1.6500505466848727, "learning_rate": 5.7639829089215e-06, "loss": 0.0823, "step": 124300 }, { "epoch": 2.5304834605597963, "grad_norm": 28.50411705317148, "learning_rate": 5.763280676718243e-06, "loss": 0.0978, "step": 124310 }, { "epoch": 2.5306870229007634, "grad_norm": 4.100527273426025, "learning_rate": 5.762578429099548e-06, "loss": 0.183, "step": 124320 }, { "epoch": 2.53089058524173, "grad_norm": 4.998752124189541, "learning_rate": 5.761876166079598e-06, "loss": 0.1483, "step": 124330 }, { "epoch": 2.531094147582697, "grad_norm": 13.33455463498086, "learning_rate": 5.7611738876725796e-06, "loss": 0.2745, "step": 124340 }, { "epoch": 2.531297709923664, "grad_norm": 1.4662947244529747, "learning_rate": 5.7604715938926735e-06, "loss": 0.1633, "step": 124350 }, { "epoch": 2.531501272264631, "grad_norm": 0.031128869197323866, "learning_rate": 5.759769284754063e-06, "loss": 0.0641, "step": 124360 }, { "epoch": 2.531704834605598, "grad_norm": 8.195725613003948, "learning_rate": 5.759066960270936e-06, "loss": 0.1234, "step": 124370 }, { "epoch": 2.531908396946565, "grad_norm": 18.064621155298155, "learning_rate": 5.75836462045747e-06, "loss": 0.1516, "step": 124380 }, { "epoch": 2.5321119592875316, "grad_norm": 11.96835538769432, "learning_rate": 5.7576622653278566e-06, "loss": 0.2004, "step": 124390 }, { "epoch": 2.5323155216284987, "grad_norm": 0.23658134949501, "learning_rate": 5.756959894896278e-06, "loss": 0.0819, "step": 124400 }, { "epoch": 2.532519083969466, "grad_norm": 37.097598632548944, "learning_rate": 5.756257509176919e-06, "loss": 0.1173, "step": 124410 }, { "epoch": 2.5327226463104324, "grad_norm": 22.447043400431454, "learning_rate": 5.755555108183964e-06, "loss": 0.1142, "step": 124420 }, { "epoch": 2.5329262086513995, "grad_norm": 7.120364499253622, "learning_rate": 5.754852691931602e-06, "loss": 0.1643, "step": 124430 }, { "epoch": 2.5331297709923666, "grad_norm": 16.345772081374303, "learning_rate": 5.7541502604340174e-06, "loss": 0.1052, "step": 124440 }, { "epoch": 2.533333333333333, "grad_norm": 0.44205009268777773, "learning_rate": 5.753447813705396e-06, "loss": 0.0366, "step": 124450 }, { "epoch": 2.5335368956743003, "grad_norm": 7.209951687215814, "learning_rate": 5.752745351759927e-06, "loss": 0.1601, "step": 124460 }, { "epoch": 2.5337404580152674, "grad_norm": 0.08594569789163746, "learning_rate": 5.752042874611795e-06, "loss": 0.0906, "step": 124470 }, { "epoch": 2.533944020356234, "grad_norm": 16.422507907771397, "learning_rate": 5.7513403822751905e-06, "loss": 0.1059, "step": 124480 }, { "epoch": 2.534147582697201, "grad_norm": 7.9268538073787855, "learning_rate": 5.750637874764298e-06, "loss": 0.0931, "step": 124490 }, { "epoch": 2.534351145038168, "grad_norm": 5.316920173979419, "learning_rate": 5.749935352093305e-06, "loss": 0.1165, "step": 124500 }, { "epoch": 2.534554707379135, "grad_norm": 3.946210780206027, "learning_rate": 5.749232814276404e-06, "loss": 0.0895, "step": 124510 }, { "epoch": 2.534758269720102, "grad_norm": 0.5479478547213629, "learning_rate": 5.74853026132778e-06, "loss": 0.1027, "step": 124520 }, { "epoch": 2.534961832061069, "grad_norm": 7.263866962312268, "learning_rate": 5.747827693261624e-06, "loss": 0.3065, "step": 124530 }, { "epoch": 2.5351653944020356, "grad_norm": 12.647080355009544, "learning_rate": 5.747125110092124e-06, "loss": 0.107, "step": 124540 }, { "epoch": 2.5353689567430027, "grad_norm": 6.625333185077543, "learning_rate": 5.746422511833471e-06, "loss": 0.0526, "step": 124550 }, { "epoch": 2.5355725190839693, "grad_norm": 23.86725381326229, "learning_rate": 5.745719898499853e-06, "loss": 0.1346, "step": 124560 }, { "epoch": 2.5357760814249364, "grad_norm": 42.5558257593412, "learning_rate": 5.745017270105462e-06, "loss": 0.1489, "step": 124570 }, { "epoch": 2.5359796437659035, "grad_norm": 0.10702137953933837, "learning_rate": 5.744314626664488e-06, "loss": 0.1058, "step": 124580 }, { "epoch": 2.53618320610687, "grad_norm": 29.34267598383793, "learning_rate": 5.743611968191118e-06, "loss": 0.1696, "step": 124590 }, { "epoch": 2.536386768447837, "grad_norm": 11.468273446562836, "learning_rate": 5.7429092946995515e-06, "loss": 0.0797, "step": 124600 }, { "epoch": 2.536590330788804, "grad_norm": 7.555564026424744, "learning_rate": 5.742206606203973e-06, "loss": 0.2483, "step": 124610 }, { "epoch": 2.536793893129771, "grad_norm": 18.33166810740065, "learning_rate": 5.7415039027185744e-06, "loss": 0.1828, "step": 124620 }, { "epoch": 2.536997455470738, "grad_norm": 0.06048751960193141, "learning_rate": 5.740801184257552e-06, "loss": 0.0621, "step": 124630 }, { "epoch": 2.5372010178117046, "grad_norm": 10.619064636235313, "learning_rate": 5.740098450835094e-06, "loss": 0.1857, "step": 124640 }, { "epoch": 2.5374045801526717, "grad_norm": 0.4809356022738933, "learning_rate": 5.739395702465395e-06, "loss": 0.0759, "step": 124650 }, { "epoch": 2.5376081424936388, "grad_norm": 13.53387621205015, "learning_rate": 5.738692939162648e-06, "loss": 0.2506, "step": 124660 }, { "epoch": 2.5378117048346054, "grad_norm": 1.4873745355675576, "learning_rate": 5.737990160941044e-06, "loss": 0.0995, "step": 124670 }, { "epoch": 2.5380152671755725, "grad_norm": 4.019669290773534, "learning_rate": 5.7372873678147785e-06, "loss": 0.1711, "step": 124680 }, { "epoch": 2.5382188295165395, "grad_norm": 6.845000640281405, "learning_rate": 5.736584559798046e-06, "loss": 0.2375, "step": 124690 }, { "epoch": 2.538422391857506, "grad_norm": 1.2219937201101538, "learning_rate": 5.7358817369050405e-06, "loss": 0.0862, "step": 124700 }, { "epoch": 2.5386259541984733, "grad_norm": 0.21209236100489035, "learning_rate": 5.735178899149953e-06, "loss": 0.2241, "step": 124710 }, { "epoch": 2.5388295165394403, "grad_norm": 3.8571273287440717, "learning_rate": 5.734476046546983e-06, "loss": 0.1725, "step": 124720 }, { "epoch": 2.539033078880407, "grad_norm": 0.7645744409232031, "learning_rate": 5.733773179110322e-06, "loss": 0.0403, "step": 124730 }, { "epoch": 2.539236641221374, "grad_norm": 11.740771788020739, "learning_rate": 5.733070296854166e-06, "loss": 0.1057, "step": 124740 }, { "epoch": 2.539440203562341, "grad_norm": 0.7894567845042996, "learning_rate": 5.732367399792712e-06, "loss": 0.1256, "step": 124750 }, { "epoch": 2.5396437659033078, "grad_norm": 0.2417836210524033, "learning_rate": 5.731664487940155e-06, "loss": 0.2162, "step": 124760 }, { "epoch": 2.539847328244275, "grad_norm": 0.18212714723968543, "learning_rate": 5.730961561310693e-06, "loss": 0.0837, "step": 124770 }, { "epoch": 2.540050890585242, "grad_norm": 8.9922176139465, "learning_rate": 5.730258619918518e-06, "loss": 0.1496, "step": 124780 }, { "epoch": 2.5402544529262086, "grad_norm": 2.4121203962976745, "learning_rate": 5.72955566377783e-06, "loss": 0.0778, "step": 124790 }, { "epoch": 2.5404580152671756, "grad_norm": 7.271633944834004, "learning_rate": 5.728852692902827e-06, "loss": 0.0937, "step": 124800 }, { "epoch": 2.5406615776081427, "grad_norm": 12.94495500542754, "learning_rate": 5.728149707307705e-06, "loss": 0.1721, "step": 124810 }, { "epoch": 2.5408651399491093, "grad_norm": 24.81422922528758, "learning_rate": 5.727446707006662e-06, "loss": 0.168, "step": 124820 }, { "epoch": 2.5410687022900764, "grad_norm": 18.214871128513927, "learning_rate": 5.726743692013897e-06, "loss": 0.1081, "step": 124830 }, { "epoch": 2.5412722646310435, "grad_norm": 0.1532417527599392, "learning_rate": 5.7260406623436025e-06, "loss": 0.1196, "step": 124840 }, { "epoch": 2.54147582697201, "grad_norm": 8.158888348871342, "learning_rate": 5.725337618009985e-06, "loss": 0.0496, "step": 124850 }, { "epoch": 2.541679389312977, "grad_norm": 0.19310385684810277, "learning_rate": 5.7246345590272405e-06, "loss": 0.0716, "step": 124860 }, { "epoch": 2.5418829516539443, "grad_norm": 13.846788307378109, "learning_rate": 5.723931485409566e-06, "loss": 0.1243, "step": 124870 }, { "epoch": 2.542086513994911, "grad_norm": 14.706996341856122, "learning_rate": 5.7232283971711634e-06, "loss": 0.21, "step": 124880 }, { "epoch": 2.542290076335878, "grad_norm": 0.4955641363793352, "learning_rate": 5.722525294326233e-06, "loss": 0.1592, "step": 124890 }, { "epoch": 2.5424936386768446, "grad_norm": 0.06177215501068474, "learning_rate": 5.721822176888973e-06, "loss": 0.0843, "step": 124900 }, { "epoch": 2.5426972010178117, "grad_norm": 3.1526466120120573, "learning_rate": 5.721119044873583e-06, "loss": 0.0316, "step": 124910 }, { "epoch": 2.5429007633587783, "grad_norm": 1.0041499003059646, "learning_rate": 5.720415898294266e-06, "loss": 0.0421, "step": 124920 }, { "epoch": 2.5431043256997454, "grad_norm": 0.4827176854556001, "learning_rate": 5.719712737165223e-06, "loss": 0.1278, "step": 124930 }, { "epoch": 2.5433078880407125, "grad_norm": 0.14250018313621965, "learning_rate": 5.7190095615006524e-06, "loss": 0.0859, "step": 124940 }, { "epoch": 2.543511450381679, "grad_norm": 49.45822794124995, "learning_rate": 5.71830637131476e-06, "loss": 0.1123, "step": 124950 }, { "epoch": 2.543715012722646, "grad_norm": 28.325588097812396, "learning_rate": 5.717603166621743e-06, "loss": 0.1497, "step": 124960 }, { "epoch": 2.5439185750636133, "grad_norm": 14.074946110880576, "learning_rate": 5.716899947435808e-06, "loss": 0.2756, "step": 124970 }, { "epoch": 2.54412213740458, "grad_norm": 32.15004753844544, "learning_rate": 5.716196713771154e-06, "loss": 0.2164, "step": 124980 }, { "epoch": 2.544325699745547, "grad_norm": 14.3711389918979, "learning_rate": 5.715493465641985e-06, "loss": 0.12, "step": 124990 }, { "epoch": 2.544529262086514, "grad_norm": 0.013660622225670657, "learning_rate": 5.7147902030625035e-06, "loss": 0.1314, "step": 125000 }, { "epoch": 2.5447328244274807, "grad_norm": 5.641149191959273, "learning_rate": 5.714086926046913e-06, "loss": 0.1953, "step": 125010 }, { "epoch": 2.544936386768448, "grad_norm": 16.403704514959706, "learning_rate": 5.713383634609417e-06, "loss": 0.0943, "step": 125020 }, { "epoch": 2.545139949109415, "grad_norm": 0.653177358443466, "learning_rate": 5.712680328764222e-06, "loss": 0.1828, "step": 125030 }, { "epoch": 2.5453435114503815, "grad_norm": 0.9964727946780798, "learning_rate": 5.711977008525527e-06, "loss": 0.0559, "step": 125040 }, { "epoch": 2.5455470737913486, "grad_norm": 0.05738432048394947, "learning_rate": 5.711273673907541e-06, "loss": 0.0341, "step": 125050 }, { "epoch": 2.5457506361323157, "grad_norm": 1.0821664106786595, "learning_rate": 5.710570324924468e-06, "loss": 0.074, "step": 125060 }, { "epoch": 2.5459541984732823, "grad_norm": 0.3435355081656284, "learning_rate": 5.70986696159051e-06, "loss": 0.112, "step": 125070 }, { "epoch": 2.5461577608142494, "grad_norm": 10.921359203693239, "learning_rate": 5.7091635839198744e-06, "loss": 0.2474, "step": 125080 }, { "epoch": 2.5463613231552165, "grad_norm": 1.0957266371848282, "learning_rate": 5.708460191926768e-06, "loss": 0.1637, "step": 125090 }, { "epoch": 2.546564885496183, "grad_norm": 5.380409038914361, "learning_rate": 5.707756785625396e-06, "loss": 0.2262, "step": 125100 }, { "epoch": 2.54676844783715, "grad_norm": 53.697165346721704, "learning_rate": 5.707053365029962e-06, "loss": 0.1144, "step": 125110 }, { "epoch": 2.5469720101781173, "grad_norm": 20.879760129016738, "learning_rate": 5.706349930154676e-06, "loss": 0.0819, "step": 125120 }, { "epoch": 2.547175572519084, "grad_norm": 23.946047803608813, "learning_rate": 5.7056464810137435e-06, "loss": 0.1466, "step": 125130 }, { "epoch": 2.547379134860051, "grad_norm": 0.8864214307695001, "learning_rate": 5.704943017621371e-06, "loss": 0.1333, "step": 125140 }, { "epoch": 2.547582697201018, "grad_norm": 11.654147299309935, "learning_rate": 5.704239539991767e-06, "loss": 0.1064, "step": 125150 }, { "epoch": 2.5477862595419847, "grad_norm": 8.047802059300414, "learning_rate": 5.703536048139138e-06, "loss": 0.1992, "step": 125160 }, { "epoch": 2.5479898218829518, "grad_norm": 0.9333201242290189, "learning_rate": 5.702832542077692e-06, "loss": 0.0635, "step": 125170 }, { "epoch": 2.548193384223919, "grad_norm": 14.20491681237555, "learning_rate": 5.702129021821638e-06, "loss": 0.2604, "step": 125180 }, { "epoch": 2.5483969465648855, "grad_norm": 7.594592625539902, "learning_rate": 5.701425487385184e-06, "loss": 0.1192, "step": 125190 }, { "epoch": 2.5486005089058525, "grad_norm": 0.14288588026717453, "learning_rate": 5.700721938782538e-06, "loss": 0.0992, "step": 125200 }, { "epoch": 2.548804071246819, "grad_norm": 14.06034135023364, "learning_rate": 5.700018376027912e-06, "loss": 0.135, "step": 125210 }, { "epoch": 2.5490076335877863, "grad_norm": 0.5340043329382835, "learning_rate": 5.6993147991355115e-06, "loss": 0.1102, "step": 125220 }, { "epoch": 2.5492111959287533, "grad_norm": 9.032248050132237, "learning_rate": 5.698611208119548e-06, "loss": 0.0831, "step": 125230 }, { "epoch": 2.54941475826972, "grad_norm": 0.12155576109248695, "learning_rate": 5.697907602994232e-06, "loss": 0.1504, "step": 125240 }, { "epoch": 2.549618320610687, "grad_norm": 0.06755956272325854, "learning_rate": 5.697203983773772e-06, "loss": 0.1317, "step": 125250 }, { "epoch": 2.5498218829516537, "grad_norm": 0.03207484405397524, "learning_rate": 5.696500350472381e-06, "loss": 0.0884, "step": 125260 }, { "epoch": 2.5500254452926208, "grad_norm": 43.09328658703646, "learning_rate": 5.695796703104268e-06, "loss": 0.1879, "step": 125270 }, { "epoch": 2.550229007633588, "grad_norm": 6.960930773657451, "learning_rate": 5.695093041683645e-06, "loss": 0.1232, "step": 125280 }, { "epoch": 2.5504325699745545, "grad_norm": 0.021474983902434214, "learning_rate": 5.694389366224722e-06, "loss": 0.1421, "step": 125290 }, { "epoch": 2.5506361323155216, "grad_norm": 29.781761958851067, "learning_rate": 5.693685676741711e-06, "loss": 0.1192, "step": 125300 }, { "epoch": 2.5508396946564886, "grad_norm": 7.380061725676445, "learning_rate": 5.692981973248824e-06, "loss": 0.1062, "step": 125310 }, { "epoch": 2.5510432569974553, "grad_norm": 8.360120599129647, "learning_rate": 5.692278255760275e-06, "loss": 0.113, "step": 125320 }, { "epoch": 2.5512468193384223, "grad_norm": 0.18041192692015215, "learning_rate": 5.691574524290273e-06, "loss": 0.1298, "step": 125330 }, { "epoch": 2.5514503816793894, "grad_norm": 0.00026007663861731136, "learning_rate": 5.690870778853033e-06, "loss": 0.1572, "step": 125340 }, { "epoch": 2.551653944020356, "grad_norm": 21.254538341717648, "learning_rate": 5.69016701946277e-06, "loss": 0.0909, "step": 125350 }, { "epoch": 2.551857506361323, "grad_norm": 0.23493815320581235, "learning_rate": 5.689463246133693e-06, "loss": 0.1554, "step": 125360 }, { "epoch": 2.55206106870229, "grad_norm": 5.713469737014, "learning_rate": 5.688759458880018e-06, "loss": 0.1123, "step": 125370 }, { "epoch": 2.552264631043257, "grad_norm": 0.033584445106327854, "learning_rate": 5.68805565771596e-06, "loss": 0.0497, "step": 125380 }, { "epoch": 2.552468193384224, "grad_norm": 10.691631984044676, "learning_rate": 5.687351842655731e-06, "loss": 0.1631, "step": 125390 }, { "epoch": 2.552671755725191, "grad_norm": 9.92135820693879, "learning_rate": 5.686648013713545e-06, "loss": 0.1292, "step": 125400 }, { "epoch": 2.5528753180661576, "grad_norm": 1.791474148232632, "learning_rate": 5.685944170903619e-06, "loss": 0.0285, "step": 125410 }, { "epoch": 2.5530788804071247, "grad_norm": 26.62078457121029, "learning_rate": 5.685240314240167e-06, "loss": 0.1426, "step": 125420 }, { "epoch": 2.553282442748092, "grad_norm": 0.125601503554574, "learning_rate": 5.684536443737404e-06, "loss": 0.0767, "step": 125430 }, { "epoch": 2.5534860050890584, "grad_norm": 0.05773585397538129, "learning_rate": 5.683832559409547e-06, "loss": 0.088, "step": 125440 }, { "epoch": 2.5536895674300255, "grad_norm": 0.13859865742916774, "learning_rate": 5.683128661270808e-06, "loss": 0.1597, "step": 125450 }, { "epoch": 2.5538931297709926, "grad_norm": 7.805987111131394, "learning_rate": 5.682424749335409e-06, "loss": 0.13, "step": 125460 }, { "epoch": 2.554096692111959, "grad_norm": 13.196912875905571, "learning_rate": 5.6817208236175616e-06, "loss": 0.1448, "step": 125470 }, { "epoch": 2.5543002544529263, "grad_norm": 0.09693785586274051, "learning_rate": 5.681016884131485e-06, "loss": 0.063, "step": 125480 }, { "epoch": 2.5545038167938934, "grad_norm": 0.19354852551168664, "learning_rate": 5.680312930891394e-06, "loss": 0.1771, "step": 125490 }, { "epoch": 2.55470737913486, "grad_norm": 8.621701835984714, "learning_rate": 5.679608963911507e-06, "loss": 0.0732, "step": 125500 }, { "epoch": 2.554910941475827, "grad_norm": 3.550522025622797, "learning_rate": 5.678904983206042e-06, "loss": 0.1234, "step": 125510 }, { "epoch": 2.555114503816794, "grad_norm": 27.914418108952244, "learning_rate": 5.678200988789219e-06, "loss": 0.1488, "step": 125520 }, { "epoch": 2.555318066157761, "grad_norm": 7.141201632756929, "learning_rate": 5.677496980675251e-06, "loss": 0.1537, "step": 125530 }, { "epoch": 2.555521628498728, "grad_norm": 0.047891572954959334, "learning_rate": 5.676792958878358e-06, "loss": 0.0811, "step": 125540 }, { "epoch": 2.5557251908396945, "grad_norm": 6.819161228627506, "learning_rate": 5.676088923412761e-06, "loss": 0.2082, "step": 125550 }, { "epoch": 2.5559287531806616, "grad_norm": 0.12141216429735244, "learning_rate": 5.675384874292678e-06, "loss": 0.0583, "step": 125560 }, { "epoch": 2.5561323155216282, "grad_norm": 18.73370113683663, "learning_rate": 5.674680811532326e-06, "loss": 0.125, "step": 125570 }, { "epoch": 2.5563358778625953, "grad_norm": 5.60478555763387, "learning_rate": 5.673976735145926e-06, "loss": 0.0388, "step": 125580 }, { "epoch": 2.5565394402035624, "grad_norm": 7.800934449400485, "learning_rate": 5.673272645147699e-06, "loss": 0.1065, "step": 125590 }, { "epoch": 2.556743002544529, "grad_norm": 11.699970384628607, "learning_rate": 5.672568541551863e-06, "loss": 0.1367, "step": 125600 }, { "epoch": 2.556946564885496, "grad_norm": 0.04337037103253366, "learning_rate": 5.671864424372641e-06, "loss": 0.2717, "step": 125610 }, { "epoch": 2.557150127226463, "grad_norm": 0.4237886787778977, "learning_rate": 5.671160293624248e-06, "loss": 0.133, "step": 125620 }, { "epoch": 2.55735368956743, "grad_norm": 14.367341510470274, "learning_rate": 5.67045614932091e-06, "loss": 0.2038, "step": 125630 }, { "epoch": 2.557557251908397, "grad_norm": 0.0973978629825841, "learning_rate": 5.6697519914768486e-06, "loss": 0.1526, "step": 125640 }, { "epoch": 2.557760814249364, "grad_norm": 22.583796595448053, "learning_rate": 5.66904782010628e-06, "loss": 0.1476, "step": 125650 }, { "epoch": 2.5579643765903306, "grad_norm": 0.783225837521961, "learning_rate": 5.66834363522343e-06, "loss": 0.0333, "step": 125660 }, { "epoch": 2.5581679389312977, "grad_norm": 18.78560016794482, "learning_rate": 5.66763943684252e-06, "loss": 0.0769, "step": 125670 }, { "epoch": 2.5583715012722648, "grad_norm": 0.12568927961500617, "learning_rate": 5.666935224977772e-06, "loss": 0.0913, "step": 125680 }, { "epoch": 2.5585750636132314, "grad_norm": 34.50324558367445, "learning_rate": 5.6662309996434075e-06, "loss": 0.1224, "step": 125690 }, { "epoch": 2.5587786259541985, "grad_norm": 0.12327954148124157, "learning_rate": 5.665526760853649e-06, "loss": 0.2045, "step": 125700 }, { "epoch": 2.5589821882951655, "grad_norm": 17.024671338763888, "learning_rate": 5.664822508622721e-06, "loss": 0.2728, "step": 125710 }, { "epoch": 2.559185750636132, "grad_norm": 8.371053014245375, "learning_rate": 5.664118242964846e-06, "loss": 0.1841, "step": 125720 }, { "epoch": 2.5593893129770993, "grad_norm": 0.7908885390239304, "learning_rate": 5.66341396389425e-06, "loss": 0.0882, "step": 125730 }, { "epoch": 2.5595928753180663, "grad_norm": 10.28864735451065, "learning_rate": 5.6627096714251504e-06, "loss": 0.1295, "step": 125740 }, { "epoch": 2.559796437659033, "grad_norm": 0.09444352018246811, "learning_rate": 5.662005365571779e-06, "loss": 0.0977, "step": 125750 }, { "epoch": 2.56, "grad_norm": 19.039829661784683, "learning_rate": 5.661301046348355e-06, "loss": 0.197, "step": 125760 }, { "epoch": 2.560203562340967, "grad_norm": 0.08654576078534089, "learning_rate": 5.660596713769105e-06, "loss": 0.0297, "step": 125770 }, { "epoch": 2.5604071246819338, "grad_norm": 0.2231256311793048, "learning_rate": 5.659892367848253e-06, "loss": 0.2037, "step": 125780 }, { "epoch": 2.560610687022901, "grad_norm": 0.07299542456738854, "learning_rate": 5.659188008600025e-06, "loss": 0.1036, "step": 125790 }, { "epoch": 2.560814249363868, "grad_norm": 19.029598928113096, "learning_rate": 5.658483636038646e-06, "loss": 0.1154, "step": 125800 }, { "epoch": 2.5610178117048346, "grad_norm": 10.062765919133636, "learning_rate": 5.657779250178344e-06, "loss": 0.2473, "step": 125810 }, { "epoch": 2.5612213740458016, "grad_norm": 12.165183349139097, "learning_rate": 5.657074851033339e-06, "loss": 0.1427, "step": 125820 }, { "epoch": 2.5614249363867687, "grad_norm": 0.5614669406436148, "learning_rate": 5.656370438617863e-06, "loss": 0.0722, "step": 125830 }, { "epoch": 2.5616284987277353, "grad_norm": 8.996817314118886, "learning_rate": 5.655666012946142e-06, "loss": 0.2003, "step": 125840 }, { "epoch": 2.5618320610687024, "grad_norm": 0.09213985544143215, "learning_rate": 5.6549615740324e-06, "loss": 0.1861, "step": 125850 }, { "epoch": 2.562035623409669, "grad_norm": 8.108934100311684, "learning_rate": 5.6542571218908635e-06, "loss": 0.0646, "step": 125860 }, { "epoch": 2.562239185750636, "grad_norm": 15.915905478986355, "learning_rate": 5.653552656535764e-06, "loss": 0.2164, "step": 125870 }, { "epoch": 2.562442748091603, "grad_norm": 0.06945925317330269, "learning_rate": 5.652848177981327e-06, "loss": 0.1476, "step": 125880 }, { "epoch": 2.56264631043257, "grad_norm": 22.882926677670188, "learning_rate": 5.652143686241779e-06, "loss": 0.1163, "step": 125890 }, { "epoch": 2.562849872773537, "grad_norm": 4.78084403079453, "learning_rate": 5.65143918133135e-06, "loss": 0.1335, "step": 125900 }, { "epoch": 2.5630534351145036, "grad_norm": 0.26251304485269605, "learning_rate": 5.6507346632642675e-06, "loss": 0.0992, "step": 125910 }, { "epoch": 2.5632569974554706, "grad_norm": 6.336218486521993, "learning_rate": 5.65003013205476e-06, "loss": 0.1967, "step": 125920 }, { "epoch": 2.5634605597964377, "grad_norm": 8.889664245074544, "learning_rate": 5.649325587717058e-06, "loss": 0.1265, "step": 125930 }, { "epoch": 2.5636641221374044, "grad_norm": 8.902969182407613, "learning_rate": 5.648621030265389e-06, "loss": 0.1303, "step": 125940 }, { "epoch": 2.5638676844783714, "grad_norm": 35.521523413152714, "learning_rate": 5.647916459713982e-06, "loss": 0.2018, "step": 125950 }, { "epoch": 2.5640712468193385, "grad_norm": 0.349561000943206, "learning_rate": 5.647211876077069e-06, "loss": 0.0979, "step": 125960 }, { "epoch": 2.564274809160305, "grad_norm": 0.3215032958363609, "learning_rate": 5.646507279368877e-06, "loss": 0.1158, "step": 125970 }, { "epoch": 2.5644783715012722, "grad_norm": 0.5977972262596616, "learning_rate": 5.6458026696036385e-06, "loss": 0.0768, "step": 125980 }, { "epoch": 2.5646819338422393, "grad_norm": 5.854237211965753, "learning_rate": 5.645098046795583e-06, "loss": 0.124, "step": 125990 }, { "epoch": 2.564885496183206, "grad_norm": 0.11832702998542866, "learning_rate": 5.644393410958941e-06, "loss": 0.0745, "step": 126000 }, { "epoch": 2.565089058524173, "grad_norm": 2.3321601553883298, "learning_rate": 5.643688762107946e-06, "loss": 0.0943, "step": 126010 }, { "epoch": 2.56529262086514, "grad_norm": 1.248710122212144, "learning_rate": 5.642984100256827e-06, "loss": 0.0635, "step": 126020 }, { "epoch": 2.5654961832061067, "grad_norm": 1.1427881616451738, "learning_rate": 5.6422794254198145e-06, "loss": 0.0687, "step": 126030 }, { "epoch": 2.565699745547074, "grad_norm": 0.764523037229963, "learning_rate": 5.641574737611143e-06, "loss": 0.1591, "step": 126040 }, { "epoch": 2.565903307888041, "grad_norm": 2.3402725836078093, "learning_rate": 5.640870036845043e-06, "loss": 0.1922, "step": 126050 }, { "epoch": 2.5661068702290075, "grad_norm": 16.188264957980728, "learning_rate": 5.640165323135747e-06, "loss": 0.2165, "step": 126060 }, { "epoch": 2.5663104325699746, "grad_norm": 20.81304015903178, "learning_rate": 5.639460596497487e-06, "loss": 0.1323, "step": 126070 }, { "epoch": 2.5665139949109417, "grad_norm": 21.506254325523894, "learning_rate": 5.638755856944498e-06, "loss": 0.2279, "step": 126080 }, { "epoch": 2.5667175572519083, "grad_norm": 1.1959502228820593, "learning_rate": 5.6380511044910105e-06, "loss": 0.1102, "step": 126090 }, { "epoch": 2.5669211195928754, "grad_norm": 7.526015808935792, "learning_rate": 5.6373463391512605e-06, "loss": 0.2008, "step": 126100 }, { "epoch": 2.5671246819338425, "grad_norm": 15.60262767054863, "learning_rate": 5.6366415609394785e-06, "loss": 0.0931, "step": 126110 }, { "epoch": 2.567328244274809, "grad_norm": 8.102792470588675, "learning_rate": 5.635936769869901e-06, "loss": 0.1227, "step": 126120 }, { "epoch": 2.567531806615776, "grad_norm": 13.239345427937991, "learning_rate": 5.635231965956762e-06, "loss": 0.0671, "step": 126130 }, { "epoch": 2.5677353689567433, "grad_norm": 13.581388964603734, "learning_rate": 5.634527149214295e-06, "loss": 0.0562, "step": 126140 }, { "epoch": 2.56793893129771, "grad_norm": 9.420372090690357, "learning_rate": 5.633822319656735e-06, "loss": 0.1323, "step": 126150 }, { "epoch": 2.568142493638677, "grad_norm": 117.96914182232005, "learning_rate": 5.633117477298317e-06, "loss": 0.134, "step": 126160 }, { "epoch": 2.5683460559796436, "grad_norm": 0.8634679390534805, "learning_rate": 5.632412622153276e-06, "loss": 0.122, "step": 126170 }, { "epoch": 2.5685496183206107, "grad_norm": 2.8329692894577523, "learning_rate": 5.631707754235847e-06, "loss": 0.2024, "step": 126180 }, { "epoch": 2.5687531806615778, "grad_norm": 19.204889424176553, "learning_rate": 5.631002873560266e-06, "loss": 0.1141, "step": 126190 }, { "epoch": 2.5689567430025444, "grad_norm": 25.068580037576748, "learning_rate": 5.63029798014077e-06, "loss": 0.2233, "step": 126200 }, { "epoch": 2.5691603053435115, "grad_norm": 0.04440402708461356, "learning_rate": 5.629593073991595e-06, "loss": 0.0435, "step": 126210 }, { "epoch": 2.569363867684478, "grad_norm": 32.41406630040646, "learning_rate": 5.628888155126977e-06, "loss": 0.1014, "step": 126220 }, { "epoch": 2.569567430025445, "grad_norm": 4.625813075255611, "learning_rate": 5.628183223561151e-06, "loss": 0.1248, "step": 126230 }, { "epoch": 2.5697709923664123, "grad_norm": 26.84754867183142, "learning_rate": 5.627478279308357e-06, "loss": 0.0673, "step": 126240 }, { "epoch": 2.569974554707379, "grad_norm": 23.310162963616143, "learning_rate": 5.6267733223828314e-06, "loss": 0.1541, "step": 126250 }, { "epoch": 2.570178117048346, "grad_norm": 8.050795585488238, "learning_rate": 5.62606835279881e-06, "loss": 0.0883, "step": 126260 }, { "epoch": 2.570381679389313, "grad_norm": 9.007681474563494, "learning_rate": 5.625363370570532e-06, "loss": 0.0472, "step": 126270 }, { "epoch": 2.5705852417302797, "grad_norm": 0.7142201070692952, "learning_rate": 5.624658375712236e-06, "loss": 0.1818, "step": 126280 }, { "epoch": 2.5707888040712468, "grad_norm": 55.73692180108901, "learning_rate": 5.623953368238159e-06, "loss": 0.1025, "step": 126290 }, { "epoch": 2.570992366412214, "grad_norm": 13.421435937465024, "learning_rate": 5.623248348162541e-06, "loss": 0.0817, "step": 126300 }, { "epoch": 2.5711959287531805, "grad_norm": 0.012697394313403153, "learning_rate": 5.62254331549962e-06, "loss": 0.2296, "step": 126310 }, { "epoch": 2.5713994910941476, "grad_norm": 0.3534224665700815, "learning_rate": 5.6218382702636325e-06, "loss": 0.2071, "step": 126320 }, { "epoch": 2.5716030534351146, "grad_norm": 0.1463704658135482, "learning_rate": 5.621133212468823e-06, "loss": 0.0198, "step": 126330 }, { "epoch": 2.5718066157760813, "grad_norm": 32.488188665949494, "learning_rate": 5.620428142129427e-06, "loss": 0.1583, "step": 126340 }, { "epoch": 2.5720101781170484, "grad_norm": 1.5401196405007391, "learning_rate": 5.6197230592596855e-06, "loss": 0.0614, "step": 126350 }, { "epoch": 2.5722137404580154, "grad_norm": 13.303765245565014, "learning_rate": 5.619017963873838e-06, "loss": 0.0711, "step": 126360 }, { "epoch": 2.572417302798982, "grad_norm": 15.060478576042684, "learning_rate": 5.6183128559861266e-06, "loss": 0.13, "step": 126370 }, { "epoch": 2.572620865139949, "grad_norm": 8.236551112733352, "learning_rate": 5.61760773561079e-06, "loss": 0.0774, "step": 126380 }, { "epoch": 2.572824427480916, "grad_norm": 61.0043903057642, "learning_rate": 5.616902602762071e-06, "loss": 0.1985, "step": 126390 }, { "epoch": 2.573027989821883, "grad_norm": 12.167156034220218, "learning_rate": 5.616197457454208e-06, "loss": 0.1404, "step": 126400 }, { "epoch": 2.57323155216285, "grad_norm": 14.712642052772313, "learning_rate": 5.615492299701444e-06, "loss": 0.1368, "step": 126410 }, { "epoch": 2.573435114503817, "grad_norm": 0.505268813691321, "learning_rate": 5.61478712951802e-06, "loss": 0.0908, "step": 126420 }, { "epoch": 2.5736386768447836, "grad_norm": 5.38541841928006, "learning_rate": 5.614081946918178e-06, "loss": 0.0354, "step": 126430 }, { "epoch": 2.5738422391857507, "grad_norm": 0.0800588575460361, "learning_rate": 5.6133767519161605e-06, "loss": 0.1882, "step": 126440 }, { "epoch": 2.574045801526718, "grad_norm": 6.850244062904896, "learning_rate": 5.6126715445262105e-06, "loss": 0.1317, "step": 126450 }, { "epoch": 2.5742493638676844, "grad_norm": 4.716240116399194, "learning_rate": 5.611966324762568e-06, "loss": 0.108, "step": 126460 }, { "epoch": 2.5744529262086515, "grad_norm": 2.501323909186494, "learning_rate": 5.611261092639478e-06, "loss": 0.1917, "step": 126470 }, { "epoch": 2.5746564885496186, "grad_norm": 21.60734299047356, "learning_rate": 5.610555848171183e-06, "loss": 0.1247, "step": 126480 }, { "epoch": 2.5748600508905852, "grad_norm": 0.08460503838234043, "learning_rate": 5.609850591371926e-06, "loss": 0.0806, "step": 126490 }, { "epoch": 2.5750636132315523, "grad_norm": 7.219003130966115, "learning_rate": 5.609145322255952e-06, "loss": 0.0494, "step": 126500 }, { "epoch": 2.575267175572519, "grad_norm": 52.21461861534286, "learning_rate": 5.608440040837502e-06, "loss": 0.1885, "step": 126510 }, { "epoch": 2.575470737913486, "grad_norm": 7.36855618720829, "learning_rate": 5.607734747130822e-06, "loss": 0.1817, "step": 126520 }, { "epoch": 2.5756743002544527, "grad_norm": 0.26224925059439946, "learning_rate": 5.607029441150157e-06, "loss": 0.0741, "step": 126530 }, { "epoch": 2.5758778625954197, "grad_norm": 0.03198871651551177, "learning_rate": 5.60632412290975e-06, "loss": 0.1416, "step": 126540 }, { "epoch": 2.576081424936387, "grad_norm": 0.5731449203182086, "learning_rate": 5.605618792423846e-06, "loss": 0.0784, "step": 126550 }, { "epoch": 2.5762849872773534, "grad_norm": 16.27242582026854, "learning_rate": 5.60491344970669e-06, "loss": 0.1235, "step": 126560 }, { "epoch": 2.5764885496183205, "grad_norm": 0.33458759252924164, "learning_rate": 5.604208094772528e-06, "loss": 0.0843, "step": 126570 }, { "epoch": 2.5766921119592876, "grad_norm": 3.561030281988702, "learning_rate": 5.603502727635607e-06, "loss": 0.1448, "step": 126580 }, { "epoch": 2.5768956743002542, "grad_norm": 32.52645672351228, "learning_rate": 5.602797348310171e-06, "loss": 0.1428, "step": 126590 }, { "epoch": 2.5770992366412213, "grad_norm": 45.80523197783841, "learning_rate": 5.602091956810465e-06, "loss": 0.3341, "step": 126600 }, { "epoch": 2.5773027989821884, "grad_norm": 24.79683200135454, "learning_rate": 5.601386553150735e-06, "loss": 0.1385, "step": 126610 }, { "epoch": 2.577506361323155, "grad_norm": 0.04782476023717423, "learning_rate": 5.600681137345232e-06, "loss": 0.09, "step": 126620 }, { "epoch": 2.577709923664122, "grad_norm": 0.08146105009444059, "learning_rate": 5.5999757094081975e-06, "loss": 0.1338, "step": 126630 }, { "epoch": 2.577913486005089, "grad_norm": 0.40993030054883256, "learning_rate": 5.599270269353881e-06, "loss": 0.1655, "step": 126640 }, { "epoch": 2.578117048346056, "grad_norm": 2.316227516920161, "learning_rate": 5.59856481719653e-06, "loss": 0.0879, "step": 126650 }, { "epoch": 2.578320610687023, "grad_norm": 32.23082148907723, "learning_rate": 5.597859352950392e-06, "loss": 0.1735, "step": 126660 }, { "epoch": 2.57852417302799, "grad_norm": 28.789905203268315, "learning_rate": 5.597153876629714e-06, "loss": 0.1126, "step": 126670 }, { "epoch": 2.5787277353689566, "grad_norm": 60.65974236119237, "learning_rate": 5.596448388248745e-06, "loss": 0.0741, "step": 126680 }, { "epoch": 2.5789312977099237, "grad_norm": 0.3596079990703863, "learning_rate": 5.595742887821731e-06, "loss": 0.1489, "step": 126690 }, { "epoch": 2.5791348600508908, "grad_norm": 0.34920189586891354, "learning_rate": 5.595037375362923e-06, "loss": 0.1063, "step": 126700 }, { "epoch": 2.5793384223918574, "grad_norm": 0.03294033780080115, "learning_rate": 5.59433185088657e-06, "loss": 0.023, "step": 126710 }, { "epoch": 2.5795419847328245, "grad_norm": 0.5024524975353952, "learning_rate": 5.593626314406918e-06, "loss": 0.1241, "step": 126720 }, { "epoch": 2.5797455470737916, "grad_norm": 17.930637532493687, "learning_rate": 5.592920765938219e-06, "loss": 0.1253, "step": 126730 }, { "epoch": 2.579949109414758, "grad_norm": 4.480039708306171, "learning_rate": 5.592215205494721e-06, "loss": 0.1306, "step": 126740 }, { "epoch": 2.5801526717557253, "grad_norm": 0.5134180095537166, "learning_rate": 5.5915096330906735e-06, "loss": 0.0956, "step": 126750 }, { "epoch": 2.5803562340966923, "grad_norm": 0.08978320894770986, "learning_rate": 5.590804048740328e-06, "loss": 0.1435, "step": 126760 }, { "epoch": 2.580559796437659, "grad_norm": 29.764452456554906, "learning_rate": 5.590098452457934e-06, "loss": 0.1036, "step": 126770 }, { "epoch": 2.580763358778626, "grad_norm": 0.07492135015986576, "learning_rate": 5.58939284425774e-06, "loss": 0.1079, "step": 126780 }, { "epoch": 2.580966921119593, "grad_norm": 0.8641635593774507, "learning_rate": 5.588687224154001e-06, "loss": 0.0686, "step": 126790 }, { "epoch": 2.5811704834605598, "grad_norm": 42.46694836981657, "learning_rate": 5.587981592160965e-06, "loss": 0.1448, "step": 126800 }, { "epoch": 2.581374045801527, "grad_norm": 0.2766838371851172, "learning_rate": 5.5872759482928805e-06, "loss": 0.0973, "step": 126810 }, { "epoch": 2.5815776081424935, "grad_norm": 7.758915190242564, "learning_rate": 5.586570292564004e-06, "loss": 0.1844, "step": 126820 }, { "epoch": 2.5817811704834606, "grad_norm": 26.971942989114776, "learning_rate": 5.5858646249885855e-06, "loss": 0.0539, "step": 126830 }, { "epoch": 2.5819847328244276, "grad_norm": 38.386535342859624, "learning_rate": 5.585158945580875e-06, "loss": 0.1622, "step": 126840 }, { "epoch": 2.5821882951653943, "grad_norm": 12.501710188029087, "learning_rate": 5.584453254355127e-06, "loss": 0.1009, "step": 126850 }, { "epoch": 2.5823918575063614, "grad_norm": 19.136637075955242, "learning_rate": 5.583747551325591e-06, "loss": 0.1345, "step": 126860 }, { "epoch": 2.582595419847328, "grad_norm": 6.475298762812427, "learning_rate": 5.5830418365065235e-06, "loss": 0.1566, "step": 126870 }, { "epoch": 2.582798982188295, "grad_norm": 0.01425697624868975, "learning_rate": 5.582336109912174e-06, "loss": 0.1319, "step": 126880 }, { "epoch": 2.583002544529262, "grad_norm": 28.3686378486872, "learning_rate": 5.5816303715567965e-06, "loss": 0.0994, "step": 126890 }, { "epoch": 2.583206106870229, "grad_norm": 0.10197657281444895, "learning_rate": 5.5809246214546446e-06, "loss": 0.2329, "step": 126900 }, { "epoch": 2.583409669211196, "grad_norm": 0.138225956433601, "learning_rate": 5.5802188596199725e-06, "loss": 0.1047, "step": 126910 }, { "epoch": 2.583613231552163, "grad_norm": 0.021565983459725636, "learning_rate": 5.579513086067033e-06, "loss": 0.1233, "step": 126920 }, { "epoch": 2.5838167938931296, "grad_norm": 0.24635519350896504, "learning_rate": 5.57880730081008e-06, "loss": 0.0798, "step": 126930 }, { "epoch": 2.5840203562340966, "grad_norm": 12.113883705722612, "learning_rate": 5.578101503863368e-06, "loss": 0.0149, "step": 126940 }, { "epoch": 2.5842239185750637, "grad_norm": 0.16632297040740404, "learning_rate": 5.5773956952411515e-06, "loss": 0.0241, "step": 126950 }, { "epoch": 2.5844274809160304, "grad_norm": 8.306900195417336, "learning_rate": 5.576689874957685e-06, "loss": 0.1411, "step": 126960 }, { "epoch": 2.5846310432569974, "grad_norm": 5.445134805299487, "learning_rate": 5.575984043027225e-06, "loss": 0.1947, "step": 126970 }, { "epoch": 2.5848346055979645, "grad_norm": 0.5894158983183341, "learning_rate": 5.575278199464023e-06, "loss": 0.0603, "step": 126980 }, { "epoch": 2.585038167938931, "grad_norm": 30.68251140636305, "learning_rate": 5.574572344282339e-06, "loss": 0.0653, "step": 126990 }, { "epoch": 2.5852417302798982, "grad_norm": 17.02351924069275, "learning_rate": 5.573866477496427e-06, "loss": 0.145, "step": 127000 }, { "epoch": 2.5854452926208653, "grad_norm": 33.53474088073623, "learning_rate": 5.573160599120541e-06, "loss": 0.1961, "step": 127010 }, { "epoch": 2.585648854961832, "grad_norm": 5.2200050923541665, "learning_rate": 5.5724547091689385e-06, "loss": 0.0623, "step": 127020 }, { "epoch": 2.585852417302799, "grad_norm": 4.479200822424789, "learning_rate": 5.5717488076558755e-06, "loss": 0.073, "step": 127030 }, { "epoch": 2.586055979643766, "grad_norm": 15.789363257740986, "learning_rate": 5.571042894595609e-06, "loss": 0.2431, "step": 127040 }, { "epoch": 2.5862595419847327, "grad_norm": 16.641471213435988, "learning_rate": 5.570336970002395e-06, "loss": 0.1077, "step": 127050 }, { "epoch": 2.5864631043257, "grad_norm": 0.11266969460293613, "learning_rate": 5.569631033890494e-06, "loss": 0.0645, "step": 127060 }, { "epoch": 2.586666666666667, "grad_norm": 0.018292676275948452, "learning_rate": 5.568925086274158e-06, "loss": 0.0986, "step": 127070 }, { "epoch": 2.5868702290076335, "grad_norm": 8.432238625887301, "learning_rate": 5.568219127167649e-06, "loss": 0.1728, "step": 127080 }, { "epoch": 2.5870737913486006, "grad_norm": 4.727992143295941, "learning_rate": 5.567513156585222e-06, "loss": 0.1523, "step": 127090 }, { "epoch": 2.5872773536895677, "grad_norm": 14.43357130425426, "learning_rate": 5.566807174541134e-06, "loss": 0.2015, "step": 127100 }, { "epoch": 2.5874809160305343, "grad_norm": 0.1248102945079254, "learning_rate": 5.566101181049649e-06, "loss": 0.0769, "step": 127110 }, { "epoch": 2.5876844783715014, "grad_norm": 3.0848840972153306, "learning_rate": 5.565395176125019e-06, "loss": 0.0383, "step": 127120 }, { "epoch": 2.587888040712468, "grad_norm": 13.627848609779518, "learning_rate": 5.564689159781505e-06, "loss": 0.0776, "step": 127130 }, { "epoch": 2.588091603053435, "grad_norm": 0.8409207036608981, "learning_rate": 5.563983132033367e-06, "loss": 0.1398, "step": 127140 }, { "epoch": 2.588295165394402, "grad_norm": 22.554187324287277, "learning_rate": 5.563277092894863e-06, "loss": 0.1868, "step": 127150 }, { "epoch": 2.588498727735369, "grad_norm": 3.770557636719372, "learning_rate": 5.562571042380252e-06, "loss": 0.0691, "step": 127160 }, { "epoch": 2.588702290076336, "grad_norm": 5.700068394879694, "learning_rate": 5.561864980503796e-06, "loss": 0.1604, "step": 127170 }, { "epoch": 2.5889058524173025, "grad_norm": 13.262494996595468, "learning_rate": 5.561158907279751e-06, "loss": 0.2138, "step": 127180 }, { "epoch": 2.5891094147582696, "grad_norm": 0.6402964906651628, "learning_rate": 5.56045282272238e-06, "loss": 0.1245, "step": 127190 }, { "epoch": 2.5893129770992367, "grad_norm": 7.0243226893535216, "learning_rate": 5.559746726845943e-06, "loss": 0.2002, "step": 127200 }, { "epoch": 2.5895165394402033, "grad_norm": 7.475884823337662, "learning_rate": 5.5590406196646995e-06, "loss": 0.1164, "step": 127210 }, { "epoch": 2.5897201017811704, "grad_norm": 21.530242503307093, "learning_rate": 5.55833450119291e-06, "loss": 0.1226, "step": 127220 }, { "epoch": 2.5899236641221375, "grad_norm": 11.33333191293837, "learning_rate": 5.557628371444837e-06, "loss": 0.1753, "step": 127230 }, { "epoch": 2.590127226463104, "grad_norm": 0.10507267480469927, "learning_rate": 5.556922230434739e-06, "loss": 0.091, "step": 127240 }, { "epoch": 2.590330788804071, "grad_norm": 7.60938097135777, "learning_rate": 5.55621607817688e-06, "loss": 0.1393, "step": 127250 }, { "epoch": 2.5905343511450383, "grad_norm": 0.268883277054784, "learning_rate": 5.555509914685522e-06, "loss": 0.1497, "step": 127260 }, { "epoch": 2.590737913486005, "grad_norm": 37.93361766955126, "learning_rate": 5.5548037399749255e-06, "loss": 0.1233, "step": 127270 }, { "epoch": 2.590941475826972, "grad_norm": 0.04646737983402533, "learning_rate": 5.554097554059353e-06, "loss": 0.1029, "step": 127280 }, { "epoch": 2.591145038167939, "grad_norm": 7.2446936519058625, "learning_rate": 5.553391356953066e-06, "loss": 0.188, "step": 127290 }, { "epoch": 2.5913486005089057, "grad_norm": 27.78689446088913, "learning_rate": 5.5526851486703284e-06, "loss": 0.1248, "step": 127300 }, { "epoch": 2.5915521628498728, "grad_norm": 22.615549114883972, "learning_rate": 5.551978929225402e-06, "loss": 0.149, "step": 127310 }, { "epoch": 2.59175572519084, "grad_norm": 1.3676825986652408, "learning_rate": 5.55127269863255e-06, "loss": 0.0934, "step": 127320 }, { "epoch": 2.5919592875318065, "grad_norm": 0.1641295275289445, "learning_rate": 5.550566456906036e-06, "loss": 0.16, "step": 127330 }, { "epoch": 2.5921628498727736, "grad_norm": 3.4871077699802013, "learning_rate": 5.549860204060125e-06, "loss": 0.1883, "step": 127340 }, { "epoch": 2.5923664122137406, "grad_norm": 7.2683608671168445, "learning_rate": 5.549153940109075e-06, "loss": 0.1527, "step": 127350 }, { "epoch": 2.5925699745547073, "grad_norm": 4.527647393086941, "learning_rate": 5.548447665067157e-06, "loss": 0.1181, "step": 127360 }, { "epoch": 2.5927735368956744, "grad_norm": 6.476395057493155, "learning_rate": 5.547741378948632e-06, "loss": 0.0867, "step": 127370 }, { "epoch": 2.5929770992366414, "grad_norm": 5.713357927984307, "learning_rate": 5.5470350817677635e-06, "loss": 0.1277, "step": 127380 }, { "epoch": 2.593180661577608, "grad_norm": 12.978567289106545, "learning_rate": 5.546328773538816e-06, "loss": 0.0801, "step": 127390 }, { "epoch": 2.593384223918575, "grad_norm": 7.109319293466708, "learning_rate": 5.545622454276058e-06, "loss": 0.1311, "step": 127400 }, { "epoch": 2.5935877862595422, "grad_norm": 27.024662112679998, "learning_rate": 5.5449161239937486e-06, "loss": 0.1039, "step": 127410 }, { "epoch": 2.593791348600509, "grad_norm": 0.051497979459384016, "learning_rate": 5.544209782706158e-06, "loss": 0.0952, "step": 127420 }, { "epoch": 2.593994910941476, "grad_norm": 0.006431332567472163, "learning_rate": 5.5435034304275495e-06, "loss": 0.1482, "step": 127430 }, { "epoch": 2.594198473282443, "grad_norm": 1.0826248730147199, "learning_rate": 5.54279706717219e-06, "loss": 0.1389, "step": 127440 }, { "epoch": 2.5944020356234097, "grad_norm": 0.05447438840828756, "learning_rate": 5.542090692954343e-06, "loss": 0.0437, "step": 127450 }, { "epoch": 2.5946055979643767, "grad_norm": 6.620357947307359, "learning_rate": 5.541384307788278e-06, "loss": 0.1464, "step": 127460 }, { "epoch": 2.5948091603053434, "grad_norm": 7.654088332307996, "learning_rate": 5.5406779116882585e-06, "loss": 0.0733, "step": 127470 }, { "epoch": 2.5950127226463104, "grad_norm": 14.401106449724214, "learning_rate": 5.539971504668553e-06, "loss": 0.104, "step": 127480 }, { "epoch": 2.595216284987277, "grad_norm": 0.06546723802253242, "learning_rate": 5.539265086743426e-06, "loss": 0.0645, "step": 127490 }, { "epoch": 2.595419847328244, "grad_norm": 6.879923897243051, "learning_rate": 5.538558657927148e-06, "loss": 0.146, "step": 127500 }, { "epoch": 2.5956234096692112, "grad_norm": 0.03991242678836164, "learning_rate": 5.537852218233982e-06, "loss": 0.1141, "step": 127510 }, { "epoch": 2.595826972010178, "grad_norm": 20.593791237042737, "learning_rate": 5.537145767678199e-06, "loss": 0.0699, "step": 127520 }, { "epoch": 2.596030534351145, "grad_norm": 0.20487395725529275, "learning_rate": 5.536439306274065e-06, "loss": 0.1192, "step": 127530 }, { "epoch": 2.596234096692112, "grad_norm": 0.0984737652763554, "learning_rate": 5.5357328340358495e-06, "loss": 0.2462, "step": 127540 }, { "epoch": 2.5964376590330787, "grad_norm": 14.803250500802939, "learning_rate": 5.535026350977818e-06, "loss": 0.0305, "step": 127550 }, { "epoch": 2.5966412213740457, "grad_norm": 13.985937338636885, "learning_rate": 5.53431985711424e-06, "loss": 0.1663, "step": 127560 }, { "epoch": 2.596844783715013, "grad_norm": 6.823378185552767, "learning_rate": 5.533613352459386e-06, "loss": 0.0723, "step": 127570 }, { "epoch": 2.5970483460559795, "grad_norm": 0.0639553806463876, "learning_rate": 5.532906837027522e-06, "loss": 0.0709, "step": 127580 }, { "epoch": 2.5972519083969465, "grad_norm": 9.351810399819598, "learning_rate": 5.532200310832917e-06, "loss": 0.0771, "step": 127590 }, { "epoch": 2.5974554707379136, "grad_norm": 7.24507196286413, "learning_rate": 5.531493773889843e-06, "loss": 0.1289, "step": 127600 }, { "epoch": 2.5976590330788802, "grad_norm": 11.20544992098295, "learning_rate": 5.530787226212568e-06, "loss": 0.257, "step": 127610 }, { "epoch": 2.5978625954198473, "grad_norm": 0.020486379351698773, "learning_rate": 5.53008066781536e-06, "loss": 0.07, "step": 127620 }, { "epoch": 2.5980661577608144, "grad_norm": 9.047366586289998, "learning_rate": 5.52937409871249e-06, "loss": 0.1357, "step": 127630 }, { "epoch": 2.598269720101781, "grad_norm": 6.512475551311078, "learning_rate": 5.528667518918228e-06, "loss": 0.3078, "step": 127640 }, { "epoch": 2.598473282442748, "grad_norm": 14.21472010234827, "learning_rate": 5.527960928446845e-06, "loss": 0.1726, "step": 127650 }, { "epoch": 2.598676844783715, "grad_norm": 17.668197833082747, "learning_rate": 5.527254327312612e-06, "loss": 0.1962, "step": 127660 }, { "epoch": 2.598880407124682, "grad_norm": 16.507888734617516, "learning_rate": 5.526547715529799e-06, "loss": 0.0857, "step": 127670 }, { "epoch": 2.599083969465649, "grad_norm": 0.41855103803828086, "learning_rate": 5.5258410931126736e-06, "loss": 0.0656, "step": 127680 }, { "epoch": 2.599287531806616, "grad_norm": 23.578780147521073, "learning_rate": 5.525134460075514e-06, "loss": 0.1896, "step": 127690 }, { "epoch": 2.5994910941475826, "grad_norm": 0.08183615173887841, "learning_rate": 5.524427816432586e-06, "loss": 0.1409, "step": 127700 }, { "epoch": 2.5996946564885497, "grad_norm": 0.1707337962259775, "learning_rate": 5.523721162198162e-06, "loss": 0.1837, "step": 127710 }, { "epoch": 2.5998982188295168, "grad_norm": 16.14868145421293, "learning_rate": 5.523014497386515e-06, "loss": 0.1655, "step": 127720 }, { "epoch": 2.6001017811704834, "grad_norm": 13.873038103564687, "learning_rate": 5.522307822011916e-06, "loss": 0.1443, "step": 127730 }, { "epoch": 2.6003053435114505, "grad_norm": 0.40722852843151747, "learning_rate": 5.521601136088639e-06, "loss": 0.1304, "step": 127740 }, { "epoch": 2.6005089058524176, "grad_norm": 1.7735737939343874, "learning_rate": 5.520894439630956e-06, "loss": 0.1793, "step": 127750 }, { "epoch": 2.600712468193384, "grad_norm": 51.90487565287855, "learning_rate": 5.520187732653136e-06, "loss": 0.1353, "step": 127760 }, { "epoch": 2.6009160305343513, "grad_norm": 8.16820140920011, "learning_rate": 5.519481015169458e-06, "loss": 0.0714, "step": 127770 }, { "epoch": 2.601119592875318, "grad_norm": 4.005804405038276, "learning_rate": 5.518774287194191e-06, "loss": 0.0326, "step": 127780 }, { "epoch": 2.601323155216285, "grad_norm": 0.3152651470809557, "learning_rate": 5.518067548741608e-06, "loss": 0.0781, "step": 127790 }, { "epoch": 2.601526717557252, "grad_norm": 14.341760204215436, "learning_rate": 5.517360799825984e-06, "loss": 0.0368, "step": 127800 }, { "epoch": 2.6017302798982187, "grad_norm": 29.696396042535618, "learning_rate": 5.516654040461591e-06, "loss": 0.1307, "step": 127810 }, { "epoch": 2.601933842239186, "grad_norm": 0.39430614362552546, "learning_rate": 5.515947270662706e-06, "loss": 0.1103, "step": 127820 }, { "epoch": 2.6021374045801524, "grad_norm": 0.22779245928158987, "learning_rate": 5.515240490443602e-06, "loss": 0.1348, "step": 127830 }, { "epoch": 2.6023409669211195, "grad_norm": 15.836853856235289, "learning_rate": 5.51453369981855e-06, "loss": 0.0877, "step": 127840 }, { "epoch": 2.6025445292620866, "grad_norm": 10.348096907979789, "learning_rate": 5.513826898801828e-06, "loss": 0.0772, "step": 127850 }, { "epoch": 2.602748091603053, "grad_norm": 37.2444973172898, "learning_rate": 5.513120087407712e-06, "loss": 0.1187, "step": 127860 }, { "epoch": 2.6029516539440203, "grad_norm": 1.7160659450966718, "learning_rate": 5.512413265650474e-06, "loss": 0.1145, "step": 127870 }, { "epoch": 2.6031552162849874, "grad_norm": 61.4807779590221, "learning_rate": 5.511706433544388e-06, "loss": 0.1121, "step": 127880 }, { "epoch": 2.603358778625954, "grad_norm": 0.02190745632268655, "learning_rate": 5.510999591103734e-06, "loss": 0.1978, "step": 127890 }, { "epoch": 2.603562340966921, "grad_norm": 9.202894845532997, "learning_rate": 5.510292738342784e-06, "loss": 0.2652, "step": 127900 }, { "epoch": 2.603765903307888, "grad_norm": 6.856005946913501, "learning_rate": 5.509585875275815e-06, "loss": 0.1179, "step": 127910 }, { "epoch": 2.603969465648855, "grad_norm": 4.467235520803403, "learning_rate": 5.508879001917103e-06, "loss": 0.0531, "step": 127920 }, { "epoch": 2.604173027989822, "grad_norm": 28.79874625593404, "learning_rate": 5.508172118280924e-06, "loss": 0.1899, "step": 127930 }, { "epoch": 2.604376590330789, "grad_norm": 67.34396959107589, "learning_rate": 5.5074652243815556e-06, "loss": 0.1361, "step": 127940 }, { "epoch": 2.6045801526717556, "grad_norm": 4.664867126869412, "learning_rate": 5.506758320233273e-06, "loss": 0.178, "step": 127950 }, { "epoch": 2.6047837150127227, "grad_norm": 34.387695439727075, "learning_rate": 5.506051405850352e-06, "loss": 0.0951, "step": 127960 }, { "epoch": 2.6049872773536897, "grad_norm": 15.408364946380381, "learning_rate": 5.505344481247074e-06, "loss": 0.0866, "step": 127970 }, { "epoch": 2.6051908396946564, "grad_norm": 0.11276268486625447, "learning_rate": 5.50463754643771e-06, "loss": 0.0786, "step": 127980 }, { "epoch": 2.6053944020356234, "grad_norm": 0.04477083413078887, "learning_rate": 5.5039306014365424e-06, "loss": 0.0693, "step": 127990 }, { "epoch": 2.6055979643765905, "grad_norm": 0.046266354724712855, "learning_rate": 5.503223646257847e-06, "loss": 0.1356, "step": 128000 }, { "epoch": 2.605801526717557, "grad_norm": 12.669699183273057, "learning_rate": 5.502516680915902e-06, "loss": 0.0948, "step": 128010 }, { "epoch": 2.6060050890585242, "grad_norm": 11.10240950406342, "learning_rate": 5.501809705424986e-06, "loss": 0.0704, "step": 128020 }, { "epoch": 2.6062086513994913, "grad_norm": 0.6267122865216482, "learning_rate": 5.501102719799376e-06, "loss": 0.2555, "step": 128030 }, { "epoch": 2.606412213740458, "grad_norm": 1.2184414818981213, "learning_rate": 5.500395724053351e-06, "loss": 0.1803, "step": 128040 }, { "epoch": 2.606615776081425, "grad_norm": 34.21107967416025, "learning_rate": 5.499688718201188e-06, "loss": 0.1162, "step": 128050 }, { "epoch": 2.606819338422392, "grad_norm": 12.67077081383948, "learning_rate": 5.498981702257171e-06, "loss": 0.0604, "step": 128060 }, { "epoch": 2.6070229007633587, "grad_norm": 0.07022839777112914, "learning_rate": 5.4982746762355755e-06, "loss": 0.2193, "step": 128070 }, { "epoch": 2.607226463104326, "grad_norm": 19.032306471280776, "learning_rate": 5.4975676401506796e-06, "loss": 0.3295, "step": 128080 }, { "epoch": 2.607430025445293, "grad_norm": 0.19753448186751418, "learning_rate": 5.496860594016765e-06, "loss": 0.04, "step": 128090 }, { "epoch": 2.6076335877862595, "grad_norm": 12.556229931673908, "learning_rate": 5.49615353784811e-06, "loss": 0.1186, "step": 128100 }, { "epoch": 2.6078371501272266, "grad_norm": 0.08951435631283572, "learning_rate": 5.495446471658996e-06, "loss": 0.1184, "step": 128110 }, { "epoch": 2.6080407124681932, "grad_norm": 0.3071652228130511, "learning_rate": 5.494739395463704e-06, "loss": 0.1489, "step": 128120 }, { "epoch": 2.6082442748091603, "grad_norm": 0.20559929983130681, "learning_rate": 5.494032309276509e-06, "loss": 0.0951, "step": 128130 }, { "epoch": 2.608447837150127, "grad_norm": 28.620711389792135, "learning_rate": 5.493325213111698e-06, "loss": 0.1468, "step": 128140 }, { "epoch": 2.608651399491094, "grad_norm": 0.44039031133030904, "learning_rate": 5.492618106983548e-06, "loss": 0.0702, "step": 128150 }, { "epoch": 2.608854961832061, "grad_norm": 16.0435942605683, "learning_rate": 5.4919109909063415e-06, "loss": 0.1229, "step": 128160 }, { "epoch": 2.6090585241730277, "grad_norm": 5.1425788228536105, "learning_rate": 5.491203864894358e-06, "loss": 0.1445, "step": 128170 }, { "epoch": 2.609262086513995, "grad_norm": 23.71453317224437, "learning_rate": 5.490496728961881e-06, "loss": 0.1084, "step": 128180 }, { "epoch": 2.609465648854962, "grad_norm": 23.527713711036807, "learning_rate": 5.4897895831231895e-06, "loss": 0.1585, "step": 128190 }, { "epoch": 2.6096692111959285, "grad_norm": 13.665209882011435, "learning_rate": 5.489082427392567e-06, "loss": 0.1585, "step": 128200 }, { "epoch": 2.6098727735368956, "grad_norm": 2.2692851193186603, "learning_rate": 5.4883752617842955e-06, "loss": 0.0717, "step": 128210 }, { "epoch": 2.6100763358778627, "grad_norm": 18.7858188805643, "learning_rate": 5.487668086312656e-06, "loss": 0.1826, "step": 128220 }, { "epoch": 2.6102798982188293, "grad_norm": 18.804293995267223, "learning_rate": 5.486960900991931e-06, "loss": 0.1847, "step": 128230 }, { "epoch": 2.6104834605597964, "grad_norm": 0.9483358908421411, "learning_rate": 5.486253705836405e-06, "loss": 0.1334, "step": 128240 }, { "epoch": 2.6106870229007635, "grad_norm": 0.07565682663127096, "learning_rate": 5.485546500860357e-06, "loss": 0.1114, "step": 128250 }, { "epoch": 2.61089058524173, "grad_norm": 6.1873090023255095, "learning_rate": 5.484839286078074e-06, "loss": 0.1195, "step": 128260 }, { "epoch": 2.611094147582697, "grad_norm": 0.7088421015923042, "learning_rate": 5.484132061503835e-06, "loss": 0.0691, "step": 128270 }, { "epoch": 2.6112977099236643, "grad_norm": 0.043628524117358916, "learning_rate": 5.4834248271519275e-06, "loss": 0.0989, "step": 128280 }, { "epoch": 2.611501272264631, "grad_norm": 9.517483088857563, "learning_rate": 5.482717583036632e-06, "loss": 0.1577, "step": 128290 }, { "epoch": 2.611704834605598, "grad_norm": 13.508763929792805, "learning_rate": 5.482010329172233e-06, "loss": 0.1873, "step": 128300 }, { "epoch": 2.611908396946565, "grad_norm": 0.10745642559018334, "learning_rate": 5.481303065573015e-06, "loss": 0.1093, "step": 128310 }, { "epoch": 2.6121119592875317, "grad_norm": 10.958765662277914, "learning_rate": 5.4805957922532625e-06, "loss": 0.123, "step": 128320 }, { "epoch": 2.612315521628499, "grad_norm": 7.87022824284613, "learning_rate": 5.479888509227257e-06, "loss": 0.1353, "step": 128330 }, { "epoch": 2.612519083969466, "grad_norm": 14.988855615097608, "learning_rate": 5.479181216509285e-06, "loss": 0.2106, "step": 128340 }, { "epoch": 2.6127226463104325, "grad_norm": 0.020037656401351966, "learning_rate": 5.478473914113633e-06, "loss": 0.0212, "step": 128350 }, { "epoch": 2.6129262086513996, "grad_norm": 13.958563586503024, "learning_rate": 5.477766602054584e-06, "loss": 0.1928, "step": 128360 }, { "epoch": 2.6131297709923667, "grad_norm": 0.20003551909187056, "learning_rate": 5.477059280346421e-06, "loss": 0.1178, "step": 128370 }, { "epoch": 2.6133333333333333, "grad_norm": 18.797370936142983, "learning_rate": 5.476351949003434e-06, "loss": 0.0698, "step": 128380 }, { "epoch": 2.6135368956743004, "grad_norm": 0.10287346084646243, "learning_rate": 5.475644608039904e-06, "loss": 0.0759, "step": 128390 }, { "epoch": 2.6137404580152674, "grad_norm": 17.871566751370544, "learning_rate": 5.474937257470118e-06, "loss": 0.1133, "step": 128400 }, { "epoch": 2.613944020356234, "grad_norm": 16.401803102985177, "learning_rate": 5.474229897308365e-06, "loss": 0.1062, "step": 128410 }, { "epoch": 2.614147582697201, "grad_norm": 20.809048019291637, "learning_rate": 5.473522527568925e-06, "loss": 0.1038, "step": 128420 }, { "epoch": 2.614351145038168, "grad_norm": 34.02602086402749, "learning_rate": 5.472815148266089e-06, "loss": 0.1019, "step": 128430 }, { "epoch": 2.614554707379135, "grad_norm": 27.02360021251009, "learning_rate": 5.4721077594141444e-06, "loss": 0.0566, "step": 128440 }, { "epoch": 2.614758269720102, "grad_norm": 0.14121609335741642, "learning_rate": 5.471400361027373e-06, "loss": 0.0443, "step": 128450 }, { "epoch": 2.6149618320610686, "grad_norm": 0.05434903016769451, "learning_rate": 5.470692953120066e-06, "loss": 0.1398, "step": 128460 }, { "epoch": 2.6151653944020357, "grad_norm": 13.574374772098142, "learning_rate": 5.469985535706507e-06, "loss": 0.0626, "step": 128470 }, { "epoch": 2.6153689567430023, "grad_norm": 0.3974846026014711, "learning_rate": 5.469278108800985e-06, "loss": 0.0642, "step": 128480 }, { "epoch": 2.6155725190839694, "grad_norm": 13.599033904185648, "learning_rate": 5.468570672417788e-06, "loss": 0.1043, "step": 128490 }, { "epoch": 2.6157760814249365, "grad_norm": 0.024525422668580925, "learning_rate": 5.467863226571202e-06, "loss": 0.1151, "step": 128500 }, { "epoch": 2.615979643765903, "grad_norm": 0.9312273467915058, "learning_rate": 5.467155771275515e-06, "loss": 0.1292, "step": 128510 }, { "epoch": 2.61618320610687, "grad_norm": 0.1752334767464291, "learning_rate": 5.466448306545017e-06, "loss": 0.1062, "step": 128520 }, { "epoch": 2.6163867684478372, "grad_norm": 0.2578307056596359, "learning_rate": 5.465740832393994e-06, "loss": 0.1243, "step": 128530 }, { "epoch": 2.616590330788804, "grad_norm": 0.0869891024524776, "learning_rate": 5.4650333488367335e-06, "loss": 0.1277, "step": 128540 }, { "epoch": 2.616793893129771, "grad_norm": 0.9148000248544313, "learning_rate": 5.464325855887528e-06, "loss": 0.1341, "step": 128550 }, { "epoch": 2.616997455470738, "grad_norm": 0.6116928142644327, "learning_rate": 5.463618353560663e-06, "loss": 0.0579, "step": 128560 }, { "epoch": 2.6172010178117047, "grad_norm": 0.3313242975666986, "learning_rate": 5.462910841870427e-06, "loss": 0.1704, "step": 128570 }, { "epoch": 2.6174045801526717, "grad_norm": 8.537760104726818, "learning_rate": 5.462203320831111e-06, "loss": 0.1366, "step": 128580 }, { "epoch": 2.617608142493639, "grad_norm": 0.021454740969159877, "learning_rate": 5.461495790457004e-06, "loss": 0.0675, "step": 128590 }, { "epoch": 2.6178117048346055, "grad_norm": 0.016907609218449512, "learning_rate": 5.4607882507623955e-06, "loss": 0.069, "step": 128600 }, { "epoch": 2.6180152671755725, "grad_norm": 7.624365008680887, "learning_rate": 5.460080701761576e-06, "loss": 0.1714, "step": 128610 }, { "epoch": 2.6182188295165396, "grad_norm": 0.07390973557407589, "learning_rate": 5.4593731434688305e-06, "loss": 0.1252, "step": 128620 }, { "epoch": 2.6184223918575062, "grad_norm": 14.014934208211983, "learning_rate": 5.458665575898454e-06, "loss": 0.1862, "step": 128630 }, { "epoch": 2.6186259541984733, "grad_norm": 3.009483026472449, "learning_rate": 5.457957999064737e-06, "loss": 0.0671, "step": 128640 }, { "epoch": 2.6188295165394404, "grad_norm": 0.07178539793524903, "learning_rate": 5.457250412981967e-06, "loss": 0.1946, "step": 128650 }, { "epoch": 2.619033078880407, "grad_norm": 21.327669022804184, "learning_rate": 5.4565428176644355e-06, "loss": 0.0972, "step": 128660 }, { "epoch": 2.619236641221374, "grad_norm": 0.2255803739662872, "learning_rate": 5.455835213126436e-06, "loss": 0.1002, "step": 128670 }, { "epoch": 2.619440203562341, "grad_norm": 5.467732898900782, "learning_rate": 5.455127599382255e-06, "loss": 0.1472, "step": 128680 }, { "epoch": 2.619643765903308, "grad_norm": 0.8154249934250202, "learning_rate": 5.454419976446187e-06, "loss": 0.183, "step": 128690 }, { "epoch": 2.619847328244275, "grad_norm": 6.2821350537361695, "learning_rate": 5.453712344332524e-06, "loss": 0.1748, "step": 128700 }, { "epoch": 2.620050890585242, "grad_norm": 0.1534909650567585, "learning_rate": 5.453004703055553e-06, "loss": 0.1051, "step": 128710 }, { "epoch": 2.6202544529262086, "grad_norm": 16.281332069794086, "learning_rate": 5.4522970526295684e-06, "loss": 0.1175, "step": 128720 }, { "epoch": 2.6204580152671757, "grad_norm": 0.05119783828034208, "learning_rate": 5.451589393068864e-06, "loss": 0.0867, "step": 128730 }, { "epoch": 2.6206615776081423, "grad_norm": 10.716170432149745, "learning_rate": 5.45088172438773e-06, "loss": 0.0741, "step": 128740 }, { "epoch": 2.6208651399491094, "grad_norm": 24.033718345694194, "learning_rate": 5.450174046600458e-06, "loss": 0.1002, "step": 128750 }, { "epoch": 2.6210687022900765, "grad_norm": 0.37547554454274096, "learning_rate": 5.4494663597213414e-06, "loss": 0.084, "step": 128760 }, { "epoch": 2.621272264631043, "grad_norm": 1.2005409169461083, "learning_rate": 5.448758663764674e-06, "loss": 0.1084, "step": 128770 }, { "epoch": 2.62147582697201, "grad_norm": 16.941633018654798, "learning_rate": 5.448050958744746e-06, "loss": 0.141, "step": 128780 }, { "epoch": 2.621679389312977, "grad_norm": 0.4099953655630426, "learning_rate": 5.447343244675852e-06, "loss": 0.0281, "step": 128790 }, { "epoch": 2.621882951653944, "grad_norm": 12.517178573704042, "learning_rate": 5.446635521572285e-06, "loss": 0.2149, "step": 128800 }, { "epoch": 2.622086513994911, "grad_norm": 11.71639539699596, "learning_rate": 5.44592778944834e-06, "loss": 0.0739, "step": 128810 }, { "epoch": 2.6222900763358776, "grad_norm": 0.06964234895796918, "learning_rate": 5.445220048318307e-06, "loss": 0.144, "step": 128820 }, { "epoch": 2.6224936386768447, "grad_norm": 19.42651547048047, "learning_rate": 5.444512298196482e-06, "loss": 0.2065, "step": 128830 }, { "epoch": 2.622697201017812, "grad_norm": 0.014086022220283176, "learning_rate": 5.44380453909716e-06, "loss": 0.0672, "step": 128840 }, { "epoch": 2.6229007633587784, "grad_norm": 19.323487895726235, "learning_rate": 5.443096771034632e-06, "loss": 0.1575, "step": 128850 }, { "epoch": 2.6231043256997455, "grad_norm": 9.63112814027238, "learning_rate": 5.442388994023195e-06, "loss": 0.099, "step": 128860 }, { "epoch": 2.6233078880407126, "grad_norm": 20.020589769213146, "learning_rate": 5.4416812080771405e-06, "loss": 0.1316, "step": 128870 }, { "epoch": 2.623511450381679, "grad_norm": 0.041143224987153004, "learning_rate": 5.440973413210767e-06, "loss": 0.0482, "step": 128880 }, { "epoch": 2.6237150127226463, "grad_norm": 24.35126170614163, "learning_rate": 5.440265609438368e-06, "loss": 0.1485, "step": 128890 }, { "epoch": 2.6239185750636134, "grad_norm": 9.61191733748844, "learning_rate": 5.4395577967742374e-06, "loss": 0.0529, "step": 128900 }, { "epoch": 2.62412213740458, "grad_norm": 32.65103896006601, "learning_rate": 5.43884997523267e-06, "loss": 0.182, "step": 128910 }, { "epoch": 2.624325699745547, "grad_norm": 0.18155385228696083, "learning_rate": 5.438142144827963e-06, "loss": 0.0679, "step": 128920 }, { "epoch": 2.624529262086514, "grad_norm": 10.192161239533057, "learning_rate": 5.43743430557441e-06, "loss": 0.1441, "step": 128930 }, { "epoch": 2.624732824427481, "grad_norm": 0.12057514929813111, "learning_rate": 5.43672645748631e-06, "loss": 0.0752, "step": 128940 }, { "epoch": 2.624936386768448, "grad_norm": 7.121620493463605, "learning_rate": 5.436018600577955e-06, "loss": 0.0593, "step": 128950 }, { "epoch": 2.625139949109415, "grad_norm": 13.601589480422842, "learning_rate": 5.435310734863642e-06, "loss": 0.1985, "step": 128960 }, { "epoch": 2.6253435114503816, "grad_norm": 7.846377109870831, "learning_rate": 5.434602860357669e-06, "loss": 0.2161, "step": 128970 }, { "epoch": 2.6255470737913487, "grad_norm": 0.2524603878894648, "learning_rate": 5.433894977074332e-06, "loss": 0.1755, "step": 128980 }, { "epoch": 2.6257506361323157, "grad_norm": 3.691716610274329, "learning_rate": 5.433187085027927e-06, "loss": 0.1901, "step": 128990 }, { "epoch": 2.6259541984732824, "grad_norm": 1.1276856576533676, "learning_rate": 5.432479184232751e-06, "loss": 0.1122, "step": 129000 }, { "epoch": 2.6261577608142495, "grad_norm": 1.8040167796377549, "learning_rate": 5.4317712747031e-06, "loss": 0.0587, "step": 129010 }, { "epoch": 2.6263613231552165, "grad_norm": 0.10041685811574051, "learning_rate": 5.431063356453274e-06, "loss": 0.1378, "step": 129020 }, { "epoch": 2.626564885496183, "grad_norm": 9.430286447755151, "learning_rate": 5.430355429497565e-06, "loss": 0.1371, "step": 129030 }, { "epoch": 2.6267684478371502, "grad_norm": 0.6301607066596916, "learning_rate": 5.429647493850276e-06, "loss": 0.1612, "step": 129040 }, { "epoch": 2.6269720101781173, "grad_norm": 13.793749451941743, "learning_rate": 5.428939549525702e-06, "loss": 0.116, "step": 129050 }, { "epoch": 2.627175572519084, "grad_norm": 28.029463207196482, "learning_rate": 5.42823159653814e-06, "loss": 0.0799, "step": 129060 }, { "epoch": 2.627379134860051, "grad_norm": 10.159077744937573, "learning_rate": 5.427523634901891e-06, "loss": 0.2709, "step": 129070 }, { "epoch": 2.6275826972010177, "grad_norm": 1.0740628313974248, "learning_rate": 5.426815664631252e-06, "loss": 0.2345, "step": 129080 }, { "epoch": 2.6277862595419847, "grad_norm": 0.2117419756246122, "learning_rate": 5.42610768574052e-06, "loss": 0.1342, "step": 129090 }, { "epoch": 2.6279898218829514, "grad_norm": 0.09317438970622673, "learning_rate": 5.425399698243995e-06, "loss": 0.1489, "step": 129100 }, { "epoch": 2.6281933842239185, "grad_norm": 0.2763076471509684, "learning_rate": 5.424691702155976e-06, "loss": 0.1283, "step": 129110 }, { "epoch": 2.6283969465648855, "grad_norm": 17.288520138792567, "learning_rate": 5.423983697490758e-06, "loss": 0.1431, "step": 129120 }, { "epoch": 2.628600508905852, "grad_norm": 4.2338349733826695, "learning_rate": 5.423275684262645e-06, "loss": 0.0826, "step": 129130 }, { "epoch": 2.6288040712468193, "grad_norm": 12.642322984351793, "learning_rate": 5.4225676624859345e-06, "loss": 0.1243, "step": 129140 }, { "epoch": 2.6290076335877863, "grad_norm": 34.36686859526406, "learning_rate": 5.4218596321749265e-06, "loss": 0.166, "step": 129150 }, { "epoch": 2.629211195928753, "grad_norm": 2.8529960169684165, "learning_rate": 5.421151593343918e-06, "loss": 0.1181, "step": 129160 }, { "epoch": 2.62941475826972, "grad_norm": 25.426108424845236, "learning_rate": 5.420443546007212e-06, "loss": 0.1785, "step": 129170 }, { "epoch": 2.629618320610687, "grad_norm": 0.06062104998114337, "learning_rate": 5.4197354901791066e-06, "loss": 0.1073, "step": 129180 }, { "epoch": 2.6298218829516538, "grad_norm": 4.742987438125724, "learning_rate": 5.419027425873904e-06, "loss": 0.0982, "step": 129190 }, { "epoch": 2.630025445292621, "grad_norm": 61.914637518064495, "learning_rate": 5.4183193531059e-06, "loss": 0.1879, "step": 129200 }, { "epoch": 2.630229007633588, "grad_norm": 0.15363607281326272, "learning_rate": 5.417611271889398e-06, "loss": 0.0677, "step": 129210 }, { "epoch": 2.6304325699745545, "grad_norm": 0.1472677754166547, "learning_rate": 5.416903182238701e-06, "loss": 0.1006, "step": 129220 }, { "epoch": 2.6306361323155216, "grad_norm": 0.34318021247535513, "learning_rate": 5.416195084168107e-06, "loss": 0.1846, "step": 129230 }, { "epoch": 2.6308396946564887, "grad_norm": 0.8207533437577373, "learning_rate": 5.415486977691915e-06, "loss": 0.1354, "step": 129240 }, { "epoch": 2.6310432569974553, "grad_norm": 1.6285922027892432, "learning_rate": 5.414778862824429e-06, "loss": 0.1015, "step": 129250 }, { "epoch": 2.6312468193384224, "grad_norm": 0.08128161664922656, "learning_rate": 5.414070739579951e-06, "loss": 0.1542, "step": 129260 }, { "epoch": 2.6314503816793895, "grad_norm": 19.32421420036104, "learning_rate": 5.413362607972779e-06, "loss": 0.121, "step": 129270 }, { "epoch": 2.631653944020356, "grad_norm": 0.13453475455443548, "learning_rate": 5.412654468017219e-06, "loss": 0.1174, "step": 129280 }, { "epoch": 2.631857506361323, "grad_norm": 16.80724912631618, "learning_rate": 5.411946319727569e-06, "loss": 0.2042, "step": 129290 }, { "epoch": 2.6320610687022903, "grad_norm": 4.456963782237152, "learning_rate": 5.4112381631181344e-06, "loss": 0.0871, "step": 129300 }, { "epoch": 2.632264631043257, "grad_norm": 8.993418121407261, "learning_rate": 5.410529998203213e-06, "loss": 0.0752, "step": 129310 }, { "epoch": 2.632468193384224, "grad_norm": 32.595126905204495, "learning_rate": 5.40982182499711e-06, "loss": 0.1946, "step": 129320 }, { "epoch": 2.632671755725191, "grad_norm": 0.021605535704542368, "learning_rate": 5.409113643514129e-06, "loss": 0.1087, "step": 129330 }, { "epoch": 2.6328753180661577, "grad_norm": 10.918801130656625, "learning_rate": 5.40840545376857e-06, "loss": 0.1017, "step": 129340 }, { "epoch": 2.633078880407125, "grad_norm": 0.2974999999679521, "learning_rate": 5.407697255774737e-06, "loss": 0.117, "step": 129350 }, { "epoch": 2.633282442748092, "grad_norm": 19.274113958661, "learning_rate": 5.406989049546932e-06, "loss": 0.1715, "step": 129360 }, { "epoch": 2.6334860050890585, "grad_norm": 15.77089015800882, "learning_rate": 5.406280835099459e-06, "loss": 0.1, "step": 129370 }, { "epoch": 2.6336895674300256, "grad_norm": 10.756145732903917, "learning_rate": 5.405572612446622e-06, "loss": 0.0583, "step": 129380 }, { "epoch": 2.633893129770992, "grad_norm": 6.85264673592385, "learning_rate": 5.404864381602725e-06, "loss": 0.1037, "step": 129390 }, { "epoch": 2.6340966921119593, "grad_norm": 27.1802103899525, "learning_rate": 5.4041561425820686e-06, "loss": 0.1171, "step": 129400 }, { "epoch": 2.6343002544529264, "grad_norm": 9.2395504734655, "learning_rate": 5.403447895398956e-06, "loss": 0.1022, "step": 129410 }, { "epoch": 2.634503816793893, "grad_norm": 0.2077341470562053, "learning_rate": 5.402739640067698e-06, "loss": 0.0376, "step": 129420 }, { "epoch": 2.63470737913486, "grad_norm": 41.63984464317845, "learning_rate": 5.402031376602592e-06, "loss": 0.1794, "step": 129430 }, { "epoch": 2.6349109414758267, "grad_norm": 1.0902583310046605, "learning_rate": 5.401323105017946e-06, "loss": 0.0931, "step": 129440 }, { "epoch": 2.635114503816794, "grad_norm": 23.195881746391176, "learning_rate": 5.400614825328061e-06, "loss": 0.1557, "step": 129450 }, { "epoch": 2.635318066157761, "grad_norm": 5.899326689560849, "learning_rate": 5.399906537547245e-06, "loss": 0.0744, "step": 129460 }, { "epoch": 2.6355216284987275, "grad_norm": 15.184670669022914, "learning_rate": 5.3991982416898005e-06, "loss": 0.1091, "step": 129470 }, { "epoch": 2.6357251908396946, "grad_norm": 15.020337919567263, "learning_rate": 5.398489937770035e-06, "loss": 0.1369, "step": 129480 }, { "epoch": 2.6359287531806617, "grad_norm": 14.596430692742604, "learning_rate": 5.3977816258022494e-06, "loss": 0.0477, "step": 129490 }, { "epoch": 2.6361323155216283, "grad_norm": 0.019963335531480653, "learning_rate": 5.397073305800754e-06, "loss": 0.0406, "step": 129500 }, { "epoch": 2.6363358778625954, "grad_norm": 22.113051091923257, "learning_rate": 5.39636497777985e-06, "loss": 0.106, "step": 129510 }, { "epoch": 2.6365394402035625, "grad_norm": 0.23257118216038336, "learning_rate": 5.395656641753846e-06, "loss": 0.0605, "step": 129520 }, { "epoch": 2.636743002544529, "grad_norm": 37.60810964173642, "learning_rate": 5.394948297737046e-06, "loss": 0.208, "step": 129530 }, { "epoch": 2.636946564885496, "grad_norm": 15.079571501087896, "learning_rate": 5.394239945743755e-06, "loss": 0.223, "step": 129540 }, { "epoch": 2.6371501272264632, "grad_norm": 2.942625556812894, "learning_rate": 5.393531585788282e-06, "loss": 0.1276, "step": 129550 }, { "epoch": 2.63735368956743, "grad_norm": 0.0800136482702593, "learning_rate": 5.392823217884933e-06, "loss": 0.047, "step": 129560 }, { "epoch": 2.637557251908397, "grad_norm": 0.18165216782771573, "learning_rate": 5.3921148420480105e-06, "loss": 0.0936, "step": 129570 }, { "epoch": 2.637760814249364, "grad_norm": 4.028182831620342, "learning_rate": 5.391406458291825e-06, "loss": 0.0448, "step": 129580 }, { "epoch": 2.6379643765903307, "grad_norm": 7.2400081298705, "learning_rate": 5.390698066630683e-06, "loss": 0.1256, "step": 129590 }, { "epoch": 2.6381679389312978, "grad_norm": 0.040864662445280656, "learning_rate": 5.389989667078888e-06, "loss": 0.1912, "step": 129600 }, { "epoch": 2.638371501272265, "grad_norm": 21.84547162413746, "learning_rate": 5.38928125965075e-06, "loss": 0.1298, "step": 129610 }, { "epoch": 2.6385750636132315, "grad_norm": 0.732359129449534, "learning_rate": 5.388572844360577e-06, "loss": 0.0715, "step": 129620 }, { "epoch": 2.6387786259541985, "grad_norm": 19.71936264484785, "learning_rate": 5.387864421222675e-06, "loss": 0.0791, "step": 129630 }, { "epoch": 2.6389821882951656, "grad_norm": 1.4788635484864912, "learning_rate": 5.387155990251351e-06, "loss": 0.0828, "step": 129640 }, { "epoch": 2.6391857506361323, "grad_norm": 0.16657920309713464, "learning_rate": 5.386447551460911e-06, "loss": 0.1874, "step": 129650 }, { "epoch": 2.6393893129770993, "grad_norm": 14.300937989186778, "learning_rate": 5.385739104865667e-06, "loss": 0.1133, "step": 129660 }, { "epoch": 2.6395928753180664, "grad_norm": 22.66421500925038, "learning_rate": 5.385030650479926e-06, "loss": 0.1188, "step": 129670 }, { "epoch": 2.639796437659033, "grad_norm": 1.0172553636536137, "learning_rate": 5.384322188317994e-06, "loss": 0.1104, "step": 129680 }, { "epoch": 2.64, "grad_norm": 15.27534452233684, "learning_rate": 5.383613718394178e-06, "loss": 0.088, "step": 129690 }, { "epoch": 2.6402035623409668, "grad_norm": 6.9289481311802374, "learning_rate": 5.382905240722791e-06, "loss": 0.14, "step": 129700 }, { "epoch": 2.640407124681934, "grad_norm": 16.31763279050315, "learning_rate": 5.382196755318141e-06, "loss": 0.0673, "step": 129710 }, { "epoch": 2.640610687022901, "grad_norm": 11.427382344786706, "learning_rate": 5.381488262194534e-06, "loss": 0.0489, "step": 129720 }, { "epoch": 2.6408142493638675, "grad_norm": 24.532943516775713, "learning_rate": 5.380779761366279e-06, "loss": 0.0735, "step": 129730 }, { "epoch": 2.6410178117048346, "grad_norm": 0.48817222569458574, "learning_rate": 5.3800712528476876e-06, "loss": 0.089, "step": 129740 }, { "epoch": 2.6412213740458013, "grad_norm": 26.915531471500184, "learning_rate": 5.3793627366530675e-06, "loss": 0.0622, "step": 129750 }, { "epoch": 2.6414249363867683, "grad_norm": 0.07442266308386171, "learning_rate": 5.378654212796728e-06, "loss": 0.0884, "step": 129760 }, { "epoch": 2.6416284987277354, "grad_norm": 24.587182195385086, "learning_rate": 5.37794568129298e-06, "loss": 0.1049, "step": 129770 }, { "epoch": 2.641832061068702, "grad_norm": 16.71649209666419, "learning_rate": 5.3772371421561296e-06, "loss": 0.183, "step": 129780 }, { "epoch": 2.642035623409669, "grad_norm": 14.457157183284355, "learning_rate": 5.376528595400492e-06, "loss": 0.1554, "step": 129790 }, { "epoch": 2.642239185750636, "grad_norm": 27.09905206507858, "learning_rate": 5.375820041040373e-06, "loss": 0.1608, "step": 129800 }, { "epoch": 2.642442748091603, "grad_norm": 0.30723777608342295, "learning_rate": 5.375111479090085e-06, "loss": 0.2076, "step": 129810 }, { "epoch": 2.64264631043257, "grad_norm": 0.6913677789017881, "learning_rate": 5.374402909563937e-06, "loss": 0.0946, "step": 129820 }, { "epoch": 2.642849872773537, "grad_norm": 2.529459029997832, "learning_rate": 5.37369433247624e-06, "loss": 0.1532, "step": 129830 }, { "epoch": 2.6430534351145036, "grad_norm": 16.582661448418452, "learning_rate": 5.372985747841305e-06, "loss": 0.0778, "step": 129840 }, { "epoch": 2.6432569974554707, "grad_norm": 17.237694703829202, "learning_rate": 5.372277155673443e-06, "loss": 0.2069, "step": 129850 }, { "epoch": 2.643460559796438, "grad_norm": 0.12334926642080316, "learning_rate": 5.371568555986963e-06, "loss": 0.1385, "step": 129860 }, { "epoch": 2.6436641221374044, "grad_norm": 14.71064287183695, "learning_rate": 5.370859948796178e-06, "loss": 0.1086, "step": 129870 }, { "epoch": 2.6438676844783715, "grad_norm": 0.011558364055069817, "learning_rate": 5.370151334115399e-06, "loss": 0.1582, "step": 129880 }, { "epoch": 2.6440712468193386, "grad_norm": 5.966681348130254, "learning_rate": 5.369442711958936e-06, "loss": 0.1389, "step": 129890 }, { "epoch": 2.644274809160305, "grad_norm": 0.16175976301220596, "learning_rate": 5.3687340823411016e-06, "loss": 0.1229, "step": 129900 }, { "epoch": 2.6444783715012723, "grad_norm": 19.750143531609265, "learning_rate": 5.368025445276209e-06, "loss": 0.0817, "step": 129910 }, { "epoch": 2.6446819338422394, "grad_norm": 0.9328825759776092, "learning_rate": 5.367316800778568e-06, "loss": 0.1797, "step": 129920 }, { "epoch": 2.644885496183206, "grad_norm": 8.90958178571792, "learning_rate": 5.366608148862491e-06, "loss": 0.0721, "step": 129930 }, { "epoch": 2.645089058524173, "grad_norm": 9.325021373287102, "learning_rate": 5.36589948954229e-06, "loss": 0.135, "step": 129940 }, { "epoch": 2.64529262086514, "grad_norm": 8.005377929387242, "learning_rate": 5.365190822832278e-06, "loss": 0.1368, "step": 129950 }, { "epoch": 2.645496183206107, "grad_norm": 16.947295204893393, "learning_rate": 5.3644821487467655e-06, "loss": 0.0933, "step": 129960 }, { "epoch": 2.645699745547074, "grad_norm": 1.0063308211136108, "learning_rate": 5.363773467300069e-06, "loss": 0.1071, "step": 129970 }, { "epoch": 2.645903307888041, "grad_norm": 0.08917491180813306, "learning_rate": 5.363064778506496e-06, "loss": 0.1265, "step": 129980 }, { "epoch": 2.6461068702290076, "grad_norm": 20.01397854859523, "learning_rate": 5.362356082380365e-06, "loss": 0.1937, "step": 129990 }, { "epoch": 2.6463104325699747, "grad_norm": 0.5385707163057168, "learning_rate": 5.361647378935984e-06, "loss": 0.0694, "step": 130000 }, { "epoch": 2.6465139949109417, "grad_norm": 10.916734383723018, "learning_rate": 5.360938668187669e-06, "loss": 0.1916, "step": 130010 }, { "epoch": 2.6467175572519084, "grad_norm": 9.207334421620388, "learning_rate": 5.360229950149734e-06, "loss": 0.0962, "step": 130020 }, { "epoch": 2.6469211195928755, "grad_norm": 7.631716581467011, "learning_rate": 5.359521224836489e-06, "loss": 0.1493, "step": 130030 }, { "epoch": 2.647124681933842, "grad_norm": 17.48852431178538, "learning_rate": 5.358812492262251e-06, "loss": 0.0589, "step": 130040 }, { "epoch": 2.647328244274809, "grad_norm": 5.475246658508169, "learning_rate": 5.3581037524413334e-06, "loss": 0.2266, "step": 130050 }, { "epoch": 2.647531806615776, "grad_norm": 0.033207577180788025, "learning_rate": 5.357395005388047e-06, "loss": 0.1015, "step": 130060 }, { "epoch": 2.647735368956743, "grad_norm": 0.3710203694571235, "learning_rate": 5.356686251116709e-06, "loss": 0.1494, "step": 130070 }, { "epoch": 2.64793893129771, "grad_norm": 0.0971215168962217, "learning_rate": 5.355977489641635e-06, "loss": 0.0232, "step": 130080 }, { "epoch": 2.6481424936386766, "grad_norm": 4.396892089523773, "learning_rate": 5.355268720977136e-06, "loss": 0.2286, "step": 130090 }, { "epoch": 2.6483460559796437, "grad_norm": 9.667525503272918, "learning_rate": 5.354559945137525e-06, "loss": 0.2333, "step": 130100 }, { "epoch": 2.6485496183206108, "grad_norm": 0.1352315237248965, "learning_rate": 5.353851162137122e-06, "loss": 0.0659, "step": 130110 }, { "epoch": 2.6487531806615774, "grad_norm": 56.409498357611206, "learning_rate": 5.353142371990238e-06, "loss": 0.1675, "step": 130120 }, { "epoch": 2.6489567430025445, "grad_norm": 0.1592074211350338, "learning_rate": 5.35243357471119e-06, "loss": 0.0598, "step": 130130 }, { "epoch": 2.6491603053435115, "grad_norm": 13.400470965697822, "learning_rate": 5.351724770314291e-06, "loss": 0.1782, "step": 130140 }, { "epoch": 2.649363867684478, "grad_norm": 13.410172072539257, "learning_rate": 5.351015958813857e-06, "loss": 0.1822, "step": 130150 }, { "epoch": 2.6495674300254453, "grad_norm": 0.15698206338957135, "learning_rate": 5.350307140224203e-06, "loss": 0.1062, "step": 130160 }, { "epoch": 2.6497709923664123, "grad_norm": 14.551707714595942, "learning_rate": 5.349598314559647e-06, "loss": 0.1654, "step": 130170 }, { "epoch": 2.649974554707379, "grad_norm": 2.124120578840718, "learning_rate": 5.348889481834503e-06, "loss": 0.0494, "step": 130180 }, { "epoch": 2.650178117048346, "grad_norm": 11.774138768267076, "learning_rate": 5.348180642063084e-06, "loss": 0.1023, "step": 130190 }, { "epoch": 2.650381679389313, "grad_norm": 0.37024638268801346, "learning_rate": 5.347471795259711e-06, "loss": 0.1815, "step": 130200 }, { "epoch": 2.6505852417302798, "grad_norm": 18.14584539073451, "learning_rate": 5.3467629414386965e-06, "loss": 0.1768, "step": 130210 }, { "epoch": 2.650788804071247, "grad_norm": 7.5534970414234905, "learning_rate": 5.346054080614358e-06, "loss": 0.1411, "step": 130220 }, { "epoch": 2.650992366412214, "grad_norm": 1.7440481919529154, "learning_rate": 5.345345212801011e-06, "loss": 0.0545, "step": 130230 }, { "epoch": 2.6511959287531806, "grad_norm": 4.740587045342372, "learning_rate": 5.344636338012973e-06, "loss": 0.2105, "step": 130240 }, { "epoch": 2.6513994910941476, "grad_norm": 1.0093688387561015, "learning_rate": 5.343927456264561e-06, "loss": 0.0971, "step": 130250 }, { "epoch": 2.6516030534351147, "grad_norm": 0.06032822610161173, "learning_rate": 5.34321856757009e-06, "loss": 0.0621, "step": 130260 }, { "epoch": 2.6518066157760813, "grad_norm": 3.927480683416659, "learning_rate": 5.342509671943878e-06, "loss": 0.137, "step": 130270 }, { "epoch": 2.6520101781170484, "grad_norm": 0.4206208416667519, "learning_rate": 5.3418007694002425e-06, "loss": 0.126, "step": 130280 }, { "epoch": 2.6522137404580155, "grad_norm": 7.873313945770029, "learning_rate": 5.3410918599535e-06, "loss": 0.0874, "step": 130290 }, { "epoch": 2.652417302798982, "grad_norm": 5.760899697040234, "learning_rate": 5.3403829436179675e-06, "loss": 0.0492, "step": 130300 }, { "epoch": 2.652620865139949, "grad_norm": 29.671270740753965, "learning_rate": 5.339674020407964e-06, "loss": 0.0863, "step": 130310 }, { "epoch": 2.6528244274809163, "grad_norm": 2.130786592439818, "learning_rate": 5.338965090337806e-06, "loss": 0.1277, "step": 130320 }, { "epoch": 2.653027989821883, "grad_norm": 1.989922095283665, "learning_rate": 5.338256153421811e-06, "loss": 0.136, "step": 130330 }, { "epoch": 2.65323155216285, "grad_norm": 0.03363923108110282, "learning_rate": 5.337547209674299e-06, "loss": 0.1031, "step": 130340 }, { "epoch": 2.6534351145038166, "grad_norm": 2.6660493852059526, "learning_rate": 5.3368382591095835e-06, "loss": 0.091, "step": 130350 }, { "epoch": 2.6536386768447837, "grad_norm": 0.06217373484125964, "learning_rate": 5.336129301741987e-06, "loss": 0.1353, "step": 130360 }, { "epoch": 2.653842239185751, "grad_norm": 4.392695740881945, "learning_rate": 5.335420337585827e-06, "loss": 0.0713, "step": 130370 }, { "epoch": 2.6540458015267174, "grad_norm": 7.17470728226221, "learning_rate": 5.33471136665542e-06, "loss": 0.1026, "step": 130380 }, { "epoch": 2.6542493638676845, "grad_norm": 5.2474056903888435, "learning_rate": 5.334002388965085e-06, "loss": 0.2599, "step": 130390 }, { "epoch": 2.654452926208651, "grad_norm": 0.08504694450381844, "learning_rate": 5.333293404529144e-06, "loss": 0.3024, "step": 130400 }, { "epoch": 2.654656488549618, "grad_norm": 0.07087141245024225, "learning_rate": 5.332584413361912e-06, "loss": 0.0979, "step": 130410 }, { "epoch": 2.6548600508905853, "grad_norm": 0.26667960695104465, "learning_rate": 5.33187541547771e-06, "loss": 0.1106, "step": 130420 }, { "epoch": 2.655063613231552, "grad_norm": 13.509750207652527, "learning_rate": 5.331166410890856e-06, "loss": 0.1373, "step": 130430 }, { "epoch": 2.655267175572519, "grad_norm": 27.76809480797352, "learning_rate": 5.33045739961567e-06, "loss": 0.0984, "step": 130440 }, { "epoch": 2.655470737913486, "grad_norm": 11.54658548125681, "learning_rate": 5.329748381666471e-06, "loss": 0.1333, "step": 130450 }, { "epoch": 2.6556743002544527, "grad_norm": 0.3309657480326875, "learning_rate": 5.32903935705758e-06, "loss": 0.0948, "step": 130460 }, { "epoch": 2.65587786259542, "grad_norm": 0.07108897921400723, "learning_rate": 5.328330325803315e-06, "loss": 0.1226, "step": 130470 }, { "epoch": 2.656081424936387, "grad_norm": 0.13845619046198135, "learning_rate": 5.327621287917994e-06, "loss": 0.1115, "step": 130480 }, { "epoch": 2.6562849872773535, "grad_norm": 11.70488283507438, "learning_rate": 5.326912243415942e-06, "loss": 0.2202, "step": 130490 }, { "epoch": 2.6564885496183206, "grad_norm": 19.40290803843147, "learning_rate": 5.326203192311474e-06, "loss": 0.1383, "step": 130500 }, { "epoch": 2.6566921119592877, "grad_norm": 24.12242905532776, "learning_rate": 5.325494134618914e-06, "loss": 0.0979, "step": 130510 }, { "epoch": 2.6568956743002543, "grad_norm": 7.100568205348574, "learning_rate": 5.32478507035258e-06, "loss": 0.179, "step": 130520 }, { "epoch": 2.6570992366412214, "grad_norm": 10.135016996545422, "learning_rate": 5.324075999526792e-06, "loss": 0.0907, "step": 130530 }, { "epoch": 2.6573027989821885, "grad_norm": 13.742530208596914, "learning_rate": 5.323366922155873e-06, "loss": 0.1701, "step": 130540 }, { "epoch": 2.657506361323155, "grad_norm": 20.474888388686036, "learning_rate": 5.322657838254144e-06, "loss": 0.1634, "step": 130550 }, { "epoch": 2.657709923664122, "grad_norm": 4.95872071423959, "learning_rate": 5.321948747835921e-06, "loss": 0.1102, "step": 130560 }, { "epoch": 2.6579134860050893, "grad_norm": 2.3700598480114556, "learning_rate": 5.32123965091553e-06, "loss": 0.1988, "step": 130570 }, { "epoch": 2.658117048346056, "grad_norm": 0.5813666909909465, "learning_rate": 5.320530547507291e-06, "loss": 0.0706, "step": 130580 }, { "epoch": 2.658320610687023, "grad_norm": 42.548299355339424, "learning_rate": 5.319821437625525e-06, "loss": 0.1555, "step": 130590 }, { "epoch": 2.65852417302799, "grad_norm": 0.20454393778101215, "learning_rate": 5.319112321284552e-06, "loss": 0.2193, "step": 130600 }, { "epoch": 2.6587277353689567, "grad_norm": 9.421146179673256, "learning_rate": 5.318403198498695e-06, "loss": 0.2696, "step": 130610 }, { "epoch": 2.6589312977099238, "grad_norm": 30.39826405291684, "learning_rate": 5.317694069282276e-06, "loss": 0.1563, "step": 130620 }, { "epoch": 2.659134860050891, "grad_norm": 0.40984711673994156, "learning_rate": 5.316984933649616e-06, "loss": 0.0776, "step": 130630 }, { "epoch": 2.6593384223918575, "grad_norm": 3.367300493606133, "learning_rate": 5.316275791615035e-06, "loss": 0.0803, "step": 130640 }, { "epoch": 2.6595419847328245, "grad_norm": 7.35760201389394, "learning_rate": 5.315566643192859e-06, "loss": 0.1321, "step": 130650 }, { "epoch": 2.659745547073791, "grad_norm": 0.16783541388094664, "learning_rate": 5.314857488397409e-06, "loss": 0.0861, "step": 130660 }, { "epoch": 2.6599491094147583, "grad_norm": 5.435206053229967, "learning_rate": 5.314148327243005e-06, "loss": 0.1282, "step": 130670 }, { "epoch": 2.6601526717557253, "grad_norm": 0.24975778035313342, "learning_rate": 5.31343915974397e-06, "loss": 0.1256, "step": 130680 }, { "epoch": 2.660356234096692, "grad_norm": 6.54861131313356, "learning_rate": 5.312729985914629e-06, "loss": 0.1132, "step": 130690 }, { "epoch": 2.660559796437659, "grad_norm": 8.001556208898812, "learning_rate": 5.312020805769303e-06, "loss": 0.1793, "step": 130700 }, { "epoch": 2.6607633587786257, "grad_norm": 22.187776432789835, "learning_rate": 5.311311619322316e-06, "loss": 0.1939, "step": 130710 }, { "epoch": 2.6609669211195928, "grad_norm": 13.132717231352782, "learning_rate": 5.310602426587987e-06, "loss": 0.1168, "step": 130720 }, { "epoch": 2.66117048346056, "grad_norm": 0.07878305880821472, "learning_rate": 5.309893227580644e-06, "loss": 0.0778, "step": 130730 }, { "epoch": 2.6613740458015265, "grad_norm": 13.042076387728086, "learning_rate": 5.309184022314607e-06, "loss": 0.0546, "step": 130740 }, { "epoch": 2.6615776081424936, "grad_norm": 0.0784635569786678, "learning_rate": 5.308474810804203e-06, "loss": 0.0461, "step": 130750 }, { "epoch": 2.6617811704834606, "grad_norm": 0.08982717039007518, "learning_rate": 5.30776559306375e-06, "loss": 0.1112, "step": 130760 }, { "epoch": 2.6619847328244273, "grad_norm": 0.061553882922633356, "learning_rate": 5.307056369107576e-06, "loss": 0.1452, "step": 130770 }, { "epoch": 2.6621882951653943, "grad_norm": 1.2592101339963702, "learning_rate": 5.306347138950003e-06, "loss": 0.1118, "step": 130780 }, { "epoch": 2.6623918575063614, "grad_norm": 18.596034432607496, "learning_rate": 5.305637902605354e-06, "loss": 0.1689, "step": 130790 }, { "epoch": 2.662595419847328, "grad_norm": 13.60543903474984, "learning_rate": 5.304928660087954e-06, "loss": 0.1146, "step": 130800 }, { "epoch": 2.662798982188295, "grad_norm": 24.5670153867315, "learning_rate": 5.304219411412128e-06, "loss": 0.235, "step": 130810 }, { "epoch": 2.663002544529262, "grad_norm": 16.715061627656727, "learning_rate": 5.303510156592199e-06, "loss": 0.1498, "step": 130820 }, { "epoch": 2.663206106870229, "grad_norm": 21.139922452949975, "learning_rate": 5.302800895642491e-06, "loss": 0.0664, "step": 130830 }, { "epoch": 2.663409669211196, "grad_norm": 11.27785938467453, "learning_rate": 5.302091628577329e-06, "loss": 0.1241, "step": 130840 }, { "epoch": 2.663613231552163, "grad_norm": 3.67176024958731, "learning_rate": 5.3013823554110365e-06, "loss": 0.0409, "step": 130850 }, { "epoch": 2.6638167938931296, "grad_norm": 16.97968965332797, "learning_rate": 5.300673076157942e-06, "loss": 0.1213, "step": 130860 }, { "epoch": 2.6640203562340967, "grad_norm": 3.3549875326872063, "learning_rate": 5.299963790832364e-06, "loss": 0.0566, "step": 130870 }, { "epoch": 2.664223918575064, "grad_norm": 10.735723882135927, "learning_rate": 5.299254499448633e-06, "loss": 0.0991, "step": 130880 }, { "epoch": 2.6644274809160304, "grad_norm": 0.29326757423805966, "learning_rate": 5.298545202021071e-06, "loss": 0.0492, "step": 130890 }, { "epoch": 2.6646310432569975, "grad_norm": 4.536533321988615, "learning_rate": 5.297835898564004e-06, "loss": 0.1369, "step": 130900 }, { "epoch": 2.6648346055979646, "grad_norm": 0.3109362076975289, "learning_rate": 5.2971265890917585e-06, "loss": 0.221, "step": 130910 }, { "epoch": 2.6650381679389312, "grad_norm": 0.11432091208089334, "learning_rate": 5.296417273618659e-06, "loss": 0.0663, "step": 130920 }, { "epoch": 2.6652417302798983, "grad_norm": 0.30401286353706164, "learning_rate": 5.29570795215903e-06, "loss": 0.0886, "step": 130930 }, { "epoch": 2.6654452926208654, "grad_norm": 0.3325426416488386, "learning_rate": 5.2949986247271975e-06, "loss": 0.1163, "step": 130940 }, { "epoch": 2.665648854961832, "grad_norm": 0.03708132412499165, "learning_rate": 5.29428929133749e-06, "loss": 0.1295, "step": 130950 }, { "epoch": 2.665852417302799, "grad_norm": 39.74309468168902, "learning_rate": 5.293579952004229e-06, "loss": 0.092, "step": 130960 }, { "epoch": 2.666055979643766, "grad_norm": 49.02823085490847, "learning_rate": 5.2928706067417434e-06, "loss": 0.0519, "step": 130970 }, { "epoch": 2.666259541984733, "grad_norm": 0.08578621055297841, "learning_rate": 5.292161255564359e-06, "loss": 0.1384, "step": 130980 }, { "epoch": 2.6664631043257, "grad_norm": 21.33440148573114, "learning_rate": 5.291451898486402e-06, "loss": 0.1258, "step": 130990 }, { "epoch": 2.6666666666666665, "grad_norm": 1.5540802732625076, "learning_rate": 5.290742535522197e-06, "loss": 0.1465, "step": 131000 }, { "epoch": 2.6668702290076336, "grad_norm": 0.317520448855018, "learning_rate": 5.2900331666860725e-06, "loss": 0.1308, "step": 131010 }, { "epoch": 2.6670737913486007, "grad_norm": 2.7639343934213754, "learning_rate": 5.289323791992356e-06, "loss": 0.0701, "step": 131020 }, { "epoch": 2.6672773536895673, "grad_norm": 13.564587051677869, "learning_rate": 5.288614411455373e-06, "loss": 0.1373, "step": 131030 }, { "epoch": 2.6674809160305344, "grad_norm": 9.227190408531927, "learning_rate": 5.287905025089449e-06, "loss": 0.2597, "step": 131040 }, { "epoch": 2.667684478371501, "grad_norm": 30.398264285159534, "learning_rate": 5.2871956329089115e-06, "loss": 0.0579, "step": 131050 }, { "epoch": 2.667888040712468, "grad_norm": 0.3443046565426, "learning_rate": 5.28648623492809e-06, "loss": 0.1289, "step": 131060 }, { "epoch": 2.668091603053435, "grad_norm": 10.227412146674013, "learning_rate": 5.285776831161309e-06, "loss": 0.0496, "step": 131070 }, { "epoch": 2.668295165394402, "grad_norm": 0.05465013318287594, "learning_rate": 5.285067421622897e-06, "loss": 0.0953, "step": 131080 }, { "epoch": 2.668498727735369, "grad_norm": 25.143738911226663, "learning_rate": 5.284358006327182e-06, "loss": 0.1981, "step": 131090 }, { "epoch": 2.668702290076336, "grad_norm": 26.201118852264706, "learning_rate": 5.2836485852884885e-06, "loss": 0.0586, "step": 131100 }, { "epoch": 2.6689058524173026, "grad_norm": 8.404807091815789, "learning_rate": 5.282939158521149e-06, "loss": 0.1101, "step": 131110 }, { "epoch": 2.6691094147582697, "grad_norm": 6.685114152658562, "learning_rate": 5.282229726039488e-06, "loss": 0.0904, "step": 131120 }, { "epoch": 2.6693129770992368, "grad_norm": 21.15498590783927, "learning_rate": 5.281520287857833e-06, "loss": 0.0907, "step": 131130 }, { "epoch": 2.6695165394402034, "grad_norm": 1.1682494157070522, "learning_rate": 5.280810843990515e-06, "loss": 0.2345, "step": 131140 }, { "epoch": 2.6697201017811705, "grad_norm": 7.906201219575418, "learning_rate": 5.28010139445186e-06, "loss": 0.1236, "step": 131150 }, { "epoch": 2.6699236641221376, "grad_norm": 32.11085902644336, "learning_rate": 5.2793919392561964e-06, "loss": 0.0814, "step": 131160 }, { "epoch": 2.670127226463104, "grad_norm": 8.536057563710505, "learning_rate": 5.2786824784178525e-06, "loss": 0.1784, "step": 131170 }, { "epoch": 2.6703307888040713, "grad_norm": 2.650000941161732, "learning_rate": 5.277973011951156e-06, "loss": 0.1344, "step": 131180 }, { "epoch": 2.6705343511450383, "grad_norm": 9.946724914392323, "learning_rate": 5.277263539870438e-06, "loss": 0.1747, "step": 131190 }, { "epoch": 2.670737913486005, "grad_norm": 11.7743570573553, "learning_rate": 5.276554062190026e-06, "loss": 0.1745, "step": 131200 }, { "epoch": 2.670941475826972, "grad_norm": 17.845055287893107, "learning_rate": 5.2758445789242486e-06, "loss": 0.187, "step": 131210 }, { "epoch": 2.671145038167939, "grad_norm": 0.03699349046576394, "learning_rate": 5.275135090087432e-06, "loss": 0.0987, "step": 131220 }, { "epoch": 2.6713486005089058, "grad_norm": 22.3380294516229, "learning_rate": 5.274425595693909e-06, "loss": 0.1109, "step": 131230 }, { "epoch": 2.671552162849873, "grad_norm": 9.223498754497584, "learning_rate": 5.27371609575801e-06, "loss": 0.1198, "step": 131240 }, { "epoch": 2.67175572519084, "grad_norm": 0.3543106316708465, "learning_rate": 5.27300659029406e-06, "loss": 0.0509, "step": 131250 }, { "epoch": 2.6719592875318066, "grad_norm": 0.20462609041206475, "learning_rate": 5.27229707931639e-06, "loss": 0.1703, "step": 131260 }, { "epoch": 2.6721628498727736, "grad_norm": 5.042919787402825, "learning_rate": 5.27158756283933e-06, "loss": 0.1607, "step": 131270 }, { "epoch": 2.6723664122137407, "grad_norm": 5.816044648094816, "learning_rate": 5.27087804087721e-06, "loss": 0.0895, "step": 131280 }, { "epoch": 2.6725699745547074, "grad_norm": 0.035005516637143116, "learning_rate": 5.270168513444358e-06, "loss": 0.065, "step": 131290 }, { "epoch": 2.6727735368956744, "grad_norm": 19.7544968867618, "learning_rate": 5.2694589805551056e-06, "loss": 0.2349, "step": 131300 }, { "epoch": 2.672977099236641, "grad_norm": 16.822029623042724, "learning_rate": 5.2687494422237815e-06, "loss": 0.2014, "step": 131310 }, { "epoch": 2.673180661577608, "grad_norm": 2.7039781656356014, "learning_rate": 5.268039898464719e-06, "loss": 0.1, "step": 131320 }, { "epoch": 2.673384223918575, "grad_norm": 30.040043526404002, "learning_rate": 5.267330349292243e-06, "loss": 0.0789, "step": 131330 }, { "epoch": 2.673587786259542, "grad_norm": 1.146981875617535, "learning_rate": 5.266620794720685e-06, "loss": 0.1846, "step": 131340 }, { "epoch": 2.673791348600509, "grad_norm": 0.07438650762958458, "learning_rate": 5.265911234764379e-06, "loss": 0.0494, "step": 131350 }, { "epoch": 2.6739949109414756, "grad_norm": 0.012197538175620064, "learning_rate": 5.265201669437653e-06, "loss": 0.1447, "step": 131360 }, { "epoch": 2.6741984732824426, "grad_norm": 24.049510163986852, "learning_rate": 5.264492098754837e-06, "loss": 0.1571, "step": 131370 }, { "epoch": 2.6744020356234097, "grad_norm": 0.3073176403185228, "learning_rate": 5.263782522730262e-06, "loss": 0.1183, "step": 131380 }, { "epoch": 2.6746055979643764, "grad_norm": 1.3071893591381656, "learning_rate": 5.263072941378259e-06, "loss": 0.1563, "step": 131390 }, { "epoch": 2.6748091603053434, "grad_norm": 105.0153719128882, "learning_rate": 5.26236335471316e-06, "loss": 0.1342, "step": 131400 }, { "epoch": 2.6750127226463105, "grad_norm": 18.70903414263565, "learning_rate": 5.261653762749296e-06, "loss": 0.1094, "step": 131410 }, { "epoch": 2.675216284987277, "grad_norm": 1.7224161118485783, "learning_rate": 5.260944165500995e-06, "loss": 0.1461, "step": 131420 }, { "epoch": 2.6754198473282442, "grad_norm": 0.21851655704056314, "learning_rate": 5.260234562982591e-06, "loss": 0.1288, "step": 131430 }, { "epoch": 2.6756234096692113, "grad_norm": 0.7367356137213211, "learning_rate": 5.259524955208417e-06, "loss": 0.0799, "step": 131440 }, { "epoch": 2.675826972010178, "grad_norm": 7.4212831338590455, "learning_rate": 5.2588153421928e-06, "loss": 0.1538, "step": 131450 }, { "epoch": 2.676030534351145, "grad_norm": 9.152960063198792, "learning_rate": 5.258105723950075e-06, "loss": 0.1054, "step": 131460 }, { "epoch": 2.676234096692112, "grad_norm": 3.494261475536019, "learning_rate": 5.257396100494572e-06, "loss": 0.1532, "step": 131470 }, { "epoch": 2.6764376590330787, "grad_norm": 21.884448329327817, "learning_rate": 5.256686471840623e-06, "loss": 0.0754, "step": 131480 }, { "epoch": 2.676641221374046, "grad_norm": 0.7714578899905316, "learning_rate": 5.25597683800256e-06, "loss": 0.078, "step": 131490 }, { "epoch": 2.676844783715013, "grad_norm": 3.0297320231268174, "learning_rate": 5.255267198994715e-06, "loss": 0.1214, "step": 131500 }, { "epoch": 2.6770483460559795, "grad_norm": 4.75650802777311, "learning_rate": 5.254557554831421e-06, "loss": 0.0642, "step": 131510 }, { "epoch": 2.6772519083969466, "grad_norm": 15.549166521180833, "learning_rate": 5.2538479055270095e-06, "loss": 0.0671, "step": 131520 }, { "epoch": 2.6774554707379137, "grad_norm": 25.030262221063726, "learning_rate": 5.253138251095812e-06, "loss": 0.23, "step": 131530 }, { "epoch": 2.6776590330788803, "grad_norm": 0.09433129724636245, "learning_rate": 5.252428591552161e-06, "loss": 0.1718, "step": 131540 }, { "epoch": 2.6778625954198474, "grad_norm": 0.8373305254187674, "learning_rate": 5.25171892691039e-06, "loss": 0.1388, "step": 131550 }, { "epoch": 2.6780661577608145, "grad_norm": 11.735129412258374, "learning_rate": 5.2510092571848304e-06, "loss": 0.1205, "step": 131560 }, { "epoch": 2.678269720101781, "grad_norm": 0.11747415471319504, "learning_rate": 5.250299582389816e-06, "loss": 0.1716, "step": 131570 }, { "epoch": 2.678473282442748, "grad_norm": 27.677133993940085, "learning_rate": 5.24958990253968e-06, "loss": 0.2217, "step": 131580 }, { "epoch": 2.6786768447837153, "grad_norm": 17.738567880528745, "learning_rate": 5.248880217648753e-06, "loss": 0.1547, "step": 131590 }, { "epoch": 2.678880407124682, "grad_norm": 13.415832019005423, "learning_rate": 5.24817052773137e-06, "loss": 0.1355, "step": 131600 }, { "epoch": 2.679083969465649, "grad_norm": 8.08408692504222, "learning_rate": 5.247460832801864e-06, "loss": 0.2076, "step": 131610 }, { "epoch": 2.679287531806616, "grad_norm": 0.1584157166990198, "learning_rate": 5.246751132874568e-06, "loss": 0.1062, "step": 131620 }, { "epoch": 2.6794910941475827, "grad_norm": 20.424630600516643, "learning_rate": 5.246041427963812e-06, "loss": 0.2018, "step": 131630 }, { "epoch": 2.6796946564885498, "grad_norm": 0.03610155124834122, "learning_rate": 5.245331718083937e-06, "loss": 0.095, "step": 131640 }, { "epoch": 2.6798982188295164, "grad_norm": 1.0887599518042244, "learning_rate": 5.24462200324927e-06, "loss": 0.1031, "step": 131650 }, { "epoch": 2.6801017811704835, "grad_norm": 1.0672710230761087, "learning_rate": 5.243912283474147e-06, "loss": 0.1163, "step": 131660 }, { "epoch": 2.68030534351145, "grad_norm": 1.3535861235715265, "learning_rate": 5.2432025587729e-06, "loss": 0.0615, "step": 131670 }, { "epoch": 2.680508905852417, "grad_norm": 8.697303388029216, "learning_rate": 5.242492829159864e-06, "loss": 0.0922, "step": 131680 }, { "epoch": 2.6807124681933843, "grad_norm": 1.0138905169838608, "learning_rate": 5.241783094649374e-06, "loss": 0.084, "step": 131690 }, { "epoch": 2.680916030534351, "grad_norm": 0.7935142683228508, "learning_rate": 5.241073355255765e-06, "loss": 0.1071, "step": 131700 }, { "epoch": 2.681119592875318, "grad_norm": 0.025701546689638605, "learning_rate": 5.240363610993365e-06, "loss": 0.058, "step": 131710 }, { "epoch": 2.681323155216285, "grad_norm": 1.4026687906012398, "learning_rate": 5.239653861876513e-06, "loss": 0.0706, "step": 131720 }, { "epoch": 2.6815267175572517, "grad_norm": 5.156838679050736, "learning_rate": 5.238944107919546e-06, "loss": 0.1709, "step": 131730 }, { "epoch": 2.6817302798982188, "grad_norm": 20.178070300986764, "learning_rate": 5.2382343491367925e-06, "loss": 0.2105, "step": 131740 }, { "epoch": 2.681933842239186, "grad_norm": 0.22515391160709528, "learning_rate": 5.237524585542589e-06, "loss": 0.1491, "step": 131750 }, { "epoch": 2.6821374045801525, "grad_norm": 19.767774821284373, "learning_rate": 5.236814817151272e-06, "loss": 0.1612, "step": 131760 }, { "epoch": 2.6823409669211196, "grad_norm": 7.47751197179941, "learning_rate": 5.236105043977173e-06, "loss": 0.1895, "step": 131770 }, { "epoch": 2.6825445292620866, "grad_norm": 18.767298864459935, "learning_rate": 5.23539526603463e-06, "loss": 0.1917, "step": 131780 }, { "epoch": 2.6827480916030533, "grad_norm": 17.346595894900773, "learning_rate": 5.2346854833379755e-06, "loss": 0.0875, "step": 131790 }, { "epoch": 2.6829516539440204, "grad_norm": 0.0696089928743098, "learning_rate": 5.233975695901546e-06, "loss": 0.0716, "step": 131800 }, { "epoch": 2.6831552162849874, "grad_norm": 15.643621140942365, "learning_rate": 5.2332659037396775e-06, "loss": 0.0902, "step": 131810 }, { "epoch": 2.683358778625954, "grad_norm": 17.222221324147274, "learning_rate": 5.232556106866702e-06, "loss": 0.1138, "step": 131820 }, { "epoch": 2.683562340966921, "grad_norm": 24.46635247160356, "learning_rate": 5.231846305296955e-06, "loss": 0.141, "step": 131830 }, { "epoch": 2.6837659033078882, "grad_norm": 6.234769046488435, "learning_rate": 5.231136499044775e-06, "loss": 0.0879, "step": 131840 }, { "epoch": 2.683969465648855, "grad_norm": 18.57237247387349, "learning_rate": 5.230426688124496e-06, "loss": 0.1564, "step": 131850 }, { "epoch": 2.684173027989822, "grad_norm": 10.312937881184677, "learning_rate": 5.229716872550452e-06, "loss": 0.1017, "step": 131860 }, { "epoch": 2.684376590330789, "grad_norm": 21.889111502910875, "learning_rate": 5.229007052336982e-06, "loss": 0.1678, "step": 131870 }, { "epoch": 2.6845801526717556, "grad_norm": 25.21353390759571, "learning_rate": 5.228297227498417e-06, "loss": 0.1459, "step": 131880 }, { "epoch": 2.6847837150127227, "grad_norm": 37.03122205467627, "learning_rate": 5.2275873980490965e-06, "loss": 0.1705, "step": 131890 }, { "epoch": 2.68498727735369, "grad_norm": 16.132379621316588, "learning_rate": 5.226877564003356e-06, "loss": 0.1554, "step": 131900 }, { "epoch": 2.6851908396946564, "grad_norm": 6.216201703260543, "learning_rate": 5.22616772537553e-06, "loss": 0.2428, "step": 131910 }, { "epoch": 2.6853944020356235, "grad_norm": 2.25632775158143, "learning_rate": 5.225457882179955e-06, "loss": 0.1667, "step": 131920 }, { "epoch": 2.6855979643765906, "grad_norm": 0.5304824330083818, "learning_rate": 5.224748034430968e-06, "loss": 0.1417, "step": 131930 }, { "epoch": 2.6858015267175572, "grad_norm": 0.0912881551168167, "learning_rate": 5.224038182142906e-06, "loss": 0.0179, "step": 131940 }, { "epoch": 2.6860050890585243, "grad_norm": 71.94401396599442, "learning_rate": 5.223328325330104e-06, "loss": 0.1372, "step": 131950 }, { "epoch": 2.686208651399491, "grad_norm": 31.86631366705763, "learning_rate": 5.222618464006899e-06, "loss": 0.1152, "step": 131960 }, { "epoch": 2.686412213740458, "grad_norm": 12.126331119774163, "learning_rate": 5.221908598187626e-06, "loss": 0.1351, "step": 131970 }, { "epoch": 2.686615776081425, "grad_norm": 5.679170513225726, "learning_rate": 5.221198727886625e-06, "loss": 0.1153, "step": 131980 }, { "epoch": 2.6868193384223917, "grad_norm": 0.7603662787337946, "learning_rate": 5.220488853118231e-06, "loss": 0.0776, "step": 131990 }, { "epoch": 2.687022900763359, "grad_norm": 0.24032565724492716, "learning_rate": 5.219778973896778e-06, "loss": 0.0612, "step": 132000 }, { "epoch": 2.6872264631043254, "grad_norm": 21.49684249041976, "learning_rate": 5.2190690902366085e-06, "loss": 0.0817, "step": 132010 }, { "epoch": 2.6874300254452925, "grad_norm": 12.752558683955256, "learning_rate": 5.218359202152055e-06, "loss": 0.1046, "step": 132020 }, { "epoch": 2.6876335877862596, "grad_norm": 14.347956869896189, "learning_rate": 5.217649309657456e-06, "loss": 0.107, "step": 132030 }, { "epoch": 2.6878371501272262, "grad_norm": 21.915505722976604, "learning_rate": 5.216939412767148e-06, "loss": 0.0296, "step": 132040 }, { "epoch": 2.6880407124681933, "grad_norm": 1.6062180277081568, "learning_rate": 5.2162295114954695e-06, "loss": 0.0727, "step": 132050 }, { "epoch": 2.6882442748091604, "grad_norm": 0.8508964131575524, "learning_rate": 5.21551960585676e-06, "loss": 0.1132, "step": 132060 }, { "epoch": 2.688447837150127, "grad_norm": 30.056702428642453, "learning_rate": 5.214809695865352e-06, "loss": 0.189, "step": 132070 }, { "epoch": 2.688651399491094, "grad_norm": 1.4385033168663617, "learning_rate": 5.214099781535585e-06, "loss": 0.1332, "step": 132080 }, { "epoch": 2.688854961832061, "grad_norm": 22.581614637968745, "learning_rate": 5.213389862881799e-06, "loss": 0.2587, "step": 132090 }, { "epoch": 2.689058524173028, "grad_norm": 6.698055745616104, "learning_rate": 5.2126799399183305e-06, "loss": 0.1266, "step": 132100 }, { "epoch": 2.689262086513995, "grad_norm": 12.80578499633983, "learning_rate": 5.211970012659515e-06, "loss": 0.1098, "step": 132110 }, { "epoch": 2.689465648854962, "grad_norm": 0.14374442045035343, "learning_rate": 5.211260081119692e-06, "loss": 0.065, "step": 132120 }, { "epoch": 2.6896692111959286, "grad_norm": 11.508108950318958, "learning_rate": 5.2105501453132e-06, "loss": 0.1175, "step": 132130 }, { "epoch": 2.6898727735368957, "grad_norm": 0.1437362953680784, "learning_rate": 5.209840205254377e-06, "loss": 0.1011, "step": 132140 }, { "epoch": 2.6900763358778628, "grad_norm": 4.952913722727602, "learning_rate": 5.20913026095756e-06, "loss": 0.1361, "step": 132150 }, { "epoch": 2.6902798982188294, "grad_norm": 6.082255615091426, "learning_rate": 5.208420312437089e-06, "loss": 0.0532, "step": 132160 }, { "epoch": 2.6904834605597965, "grad_norm": 14.626830935633686, "learning_rate": 5.207710359707301e-06, "loss": 0.1664, "step": 132170 }, { "epoch": 2.6906870229007636, "grad_norm": 0.07138445853669159, "learning_rate": 5.207000402782534e-06, "loss": 0.081, "step": 132180 }, { "epoch": 2.69089058524173, "grad_norm": 15.59283389351119, "learning_rate": 5.206290441677129e-06, "loss": 0.1417, "step": 132190 }, { "epoch": 2.6910941475826973, "grad_norm": 15.195064294530912, "learning_rate": 5.205580476405421e-06, "loss": 0.0718, "step": 132200 }, { "epoch": 2.6912977099236643, "grad_norm": 1.0267679082680985, "learning_rate": 5.2048705069817504e-06, "loss": 0.1622, "step": 132210 }, { "epoch": 2.691501272264631, "grad_norm": 43.9829145923684, "learning_rate": 5.204160533420458e-06, "loss": 0.1508, "step": 132220 }, { "epoch": 2.691704834605598, "grad_norm": 3.0805824124713483, "learning_rate": 5.203450555735879e-06, "loss": 0.0533, "step": 132230 }, { "epoch": 2.691908396946565, "grad_norm": 2.773013103511528, "learning_rate": 5.202740573942355e-06, "loss": 0.1321, "step": 132240 }, { "epoch": 2.6921119592875318, "grad_norm": 35.12264114813562, "learning_rate": 5.2020305880542236e-06, "loss": 0.0823, "step": 132250 }, { "epoch": 2.692315521628499, "grad_norm": 0.06985037159034116, "learning_rate": 5.201320598085824e-06, "loss": 0.1062, "step": 132260 }, { "epoch": 2.6925190839694655, "grad_norm": 3.9054383804715385, "learning_rate": 5.200610604051496e-06, "loss": 0.1077, "step": 132270 }, { "epoch": 2.6927226463104326, "grad_norm": 11.305915389733029, "learning_rate": 5.199900605965579e-06, "loss": 0.1122, "step": 132280 }, { "epoch": 2.6929262086513996, "grad_norm": 0.11900428744327836, "learning_rate": 5.19919060384241e-06, "loss": 0.1333, "step": 132290 }, { "epoch": 2.6931297709923663, "grad_norm": 11.188936263222741, "learning_rate": 5.1984805976963315e-06, "loss": 0.0961, "step": 132300 }, { "epoch": 2.6933333333333334, "grad_norm": 3.5358443971117937, "learning_rate": 5.1977705875416815e-06, "loss": 0.2616, "step": 132310 }, { "epoch": 2.6935368956743, "grad_norm": 28.83511383414917, "learning_rate": 5.1970605733927995e-06, "loss": 0.1025, "step": 132320 }, { "epoch": 2.693740458015267, "grad_norm": 1.0413669852543568, "learning_rate": 5.1963505552640255e-06, "loss": 0.0607, "step": 132330 }, { "epoch": 2.693944020356234, "grad_norm": 0.657291631346589, "learning_rate": 5.195640533169699e-06, "loss": 0.1575, "step": 132340 }, { "epoch": 2.694147582697201, "grad_norm": 1.111810749640814, "learning_rate": 5.1949305071241605e-06, "loss": 0.1183, "step": 132350 }, { "epoch": 2.694351145038168, "grad_norm": 7.948428206109043, "learning_rate": 5.194220477141749e-06, "loss": 0.1922, "step": 132360 }, { "epoch": 2.694554707379135, "grad_norm": 39.136238292222544, "learning_rate": 5.193510443236804e-06, "loss": 0.1822, "step": 132370 }, { "epoch": 2.6947582697201016, "grad_norm": 74.37962379083368, "learning_rate": 5.1928004054236666e-06, "loss": 0.1862, "step": 132380 }, { "epoch": 2.6949618320610687, "grad_norm": 3.317855477643522, "learning_rate": 5.1920903637166785e-06, "loss": 0.0915, "step": 132390 }, { "epoch": 2.6951653944020357, "grad_norm": 0.07424711126683796, "learning_rate": 5.191380318130177e-06, "loss": 0.102, "step": 132400 }, { "epoch": 2.6953689567430024, "grad_norm": 14.748959107591041, "learning_rate": 5.1906702686785025e-06, "loss": 0.1088, "step": 132410 }, { "epoch": 2.6955725190839694, "grad_norm": 0.08519354004548751, "learning_rate": 5.1899602153759976e-06, "loss": 0.0796, "step": 132420 }, { "epoch": 2.6957760814249365, "grad_norm": 35.27262629828438, "learning_rate": 5.189250158237002e-06, "loss": 0.1846, "step": 132430 }, { "epoch": 2.695979643765903, "grad_norm": 8.819364075533828, "learning_rate": 5.188540097275854e-06, "loss": 0.0818, "step": 132440 }, { "epoch": 2.6961832061068702, "grad_norm": 15.156428492104776, "learning_rate": 5.187830032506896e-06, "loss": 0.097, "step": 132450 }, { "epoch": 2.6963867684478373, "grad_norm": 15.70201406822836, "learning_rate": 5.18711996394447e-06, "loss": 0.2151, "step": 132460 }, { "epoch": 2.696590330788804, "grad_norm": 25.282582861593482, "learning_rate": 5.186409891602913e-06, "loss": 0.2689, "step": 132470 }, { "epoch": 2.696793893129771, "grad_norm": 0.06524343481400172, "learning_rate": 5.185699815496572e-06, "loss": 0.0915, "step": 132480 }, { "epoch": 2.696997455470738, "grad_norm": 0.7700599839252807, "learning_rate": 5.18498973563978e-06, "loss": 0.149, "step": 132490 }, { "epoch": 2.6972010178117047, "grad_norm": 9.358645082600578, "learning_rate": 5.184279652046885e-06, "loss": 0.0949, "step": 132500 }, { "epoch": 2.697404580152672, "grad_norm": 8.66586769751688, "learning_rate": 5.1835695647322235e-06, "loss": 0.1375, "step": 132510 }, { "epoch": 2.697608142493639, "grad_norm": 11.368015880864869, "learning_rate": 5.182859473710139e-06, "loss": 0.1174, "step": 132520 }, { "epoch": 2.6978117048346055, "grad_norm": 0.21318039193203742, "learning_rate": 5.1821493789949716e-06, "loss": 0.0765, "step": 132530 }, { "epoch": 2.6980152671755726, "grad_norm": 3.764250949247145, "learning_rate": 5.1814392806010625e-06, "loss": 0.1856, "step": 132540 }, { "epoch": 2.6982188295165397, "grad_norm": 26.716178455697285, "learning_rate": 5.180729178542754e-06, "loss": 0.1271, "step": 132550 }, { "epoch": 2.6984223918575063, "grad_norm": 0.2806153536205513, "learning_rate": 5.180019072834387e-06, "loss": 0.1147, "step": 132560 }, { "epoch": 2.6986259541984734, "grad_norm": 0.168596396156355, "learning_rate": 5.179308963490303e-06, "loss": 0.1096, "step": 132570 }, { "epoch": 2.6988295165394405, "grad_norm": 0.6103092721669033, "learning_rate": 5.178598850524842e-06, "loss": 0.082, "step": 132580 }, { "epoch": 2.699033078880407, "grad_norm": 0.5029894897235998, "learning_rate": 5.1778887339523485e-06, "loss": 0.1256, "step": 132590 }, { "epoch": 2.699236641221374, "grad_norm": 4.042337314555036, "learning_rate": 5.1771786137871625e-06, "loss": 0.1721, "step": 132600 }, { "epoch": 2.699440203562341, "grad_norm": 0.049533677609936384, "learning_rate": 5.176468490043627e-06, "loss": 0.0843, "step": 132610 }, { "epoch": 2.699643765903308, "grad_norm": 3.0281411079943625, "learning_rate": 5.175758362736082e-06, "loss": 0.1474, "step": 132620 }, { "epoch": 2.6998473282442745, "grad_norm": 1.0975442193126623, "learning_rate": 5.175048231878871e-06, "loss": 0.0924, "step": 132630 }, { "epoch": 2.7000508905852416, "grad_norm": 0.22181203535851765, "learning_rate": 5.174338097486335e-06, "loss": 0.1085, "step": 132640 }, { "epoch": 2.7002544529262087, "grad_norm": 18.735187282107212, "learning_rate": 5.173627959572818e-06, "loss": 0.0965, "step": 132650 }, { "epoch": 2.7004580152671753, "grad_norm": 0.28958854573528614, "learning_rate": 5.172917818152658e-06, "loss": 0.0708, "step": 132660 }, { "epoch": 2.7006615776081424, "grad_norm": 25.663013737353634, "learning_rate": 5.172207673240203e-06, "loss": 0.1288, "step": 132670 }, { "epoch": 2.7008651399491095, "grad_norm": 3.1238772573646725, "learning_rate": 5.171497524849791e-06, "loss": 0.0882, "step": 132680 }, { "epoch": 2.701068702290076, "grad_norm": 0.026152409240416365, "learning_rate": 5.170787372995766e-06, "loss": 0.0301, "step": 132690 }, { "epoch": 2.701272264631043, "grad_norm": 39.08064732556637, "learning_rate": 5.170077217692469e-06, "loss": 0.0932, "step": 132700 }, { "epoch": 2.7014758269720103, "grad_norm": 19.912622708582607, "learning_rate": 5.169367058954244e-06, "loss": 0.1663, "step": 132710 }, { "epoch": 2.701679389312977, "grad_norm": 1.4532141117641697, "learning_rate": 5.168656896795433e-06, "loss": 0.0696, "step": 132720 }, { "epoch": 2.701882951653944, "grad_norm": 11.655958937145826, "learning_rate": 5.167946731230379e-06, "loss": 0.1982, "step": 132730 }, { "epoch": 2.702086513994911, "grad_norm": 14.359286684662148, "learning_rate": 5.1672365622734245e-06, "loss": 0.0673, "step": 132740 }, { "epoch": 2.7022900763358777, "grad_norm": 0.12529478483999937, "learning_rate": 5.166526389938912e-06, "loss": 0.1477, "step": 132750 }, { "epoch": 2.702493638676845, "grad_norm": 0.4517347671080526, "learning_rate": 5.165816214241184e-06, "loss": 0.1783, "step": 132760 }, { "epoch": 2.702697201017812, "grad_norm": 17.096699321089382, "learning_rate": 5.165106035194586e-06, "loss": 0.1688, "step": 132770 }, { "epoch": 2.7029007633587785, "grad_norm": 5.575156759351711, "learning_rate": 5.164395852813456e-06, "loss": 0.0566, "step": 132780 }, { "epoch": 2.7031043256997456, "grad_norm": 0.2227361698182711, "learning_rate": 5.163685667112143e-06, "loss": 0.0707, "step": 132790 }, { "epoch": 2.7033078880407126, "grad_norm": 16.42243240585574, "learning_rate": 5.162975478104986e-06, "loss": 0.0504, "step": 132800 }, { "epoch": 2.7035114503816793, "grad_norm": 10.572195507182823, "learning_rate": 5.162265285806329e-06, "loss": 0.1162, "step": 132810 }, { "epoch": 2.7037150127226464, "grad_norm": 19.548487912903514, "learning_rate": 5.161555090230516e-06, "loss": 0.1898, "step": 132820 }, { "epoch": 2.7039185750636134, "grad_norm": 1.700678335965586, "learning_rate": 5.160844891391888e-06, "loss": 0.0739, "step": 132830 }, { "epoch": 2.70412213740458, "grad_norm": 8.34363648559775, "learning_rate": 5.160134689304792e-06, "loss": 0.1993, "step": 132840 }, { "epoch": 2.704325699745547, "grad_norm": 10.173181133128024, "learning_rate": 5.1594244839835705e-06, "loss": 0.1801, "step": 132850 }, { "epoch": 2.7045292620865142, "grad_norm": 12.590239106656643, "learning_rate": 5.158714275442564e-06, "loss": 0.1381, "step": 132860 }, { "epoch": 2.704732824427481, "grad_norm": 13.281172550265861, "learning_rate": 5.158004063696119e-06, "loss": 0.1056, "step": 132870 }, { "epoch": 2.704936386768448, "grad_norm": 0.06014681240153263, "learning_rate": 5.15729384875858e-06, "loss": 0.0673, "step": 132880 }, { "epoch": 2.705139949109415, "grad_norm": 5.950137694405913, "learning_rate": 5.156583630644289e-06, "loss": 0.1663, "step": 132890 }, { "epoch": 2.7053435114503817, "grad_norm": 23.393720093991533, "learning_rate": 5.1558734093675865e-06, "loss": 0.0655, "step": 132900 }, { "epoch": 2.7055470737913487, "grad_norm": 8.605654640350847, "learning_rate": 5.155163184942824e-06, "loss": 0.2126, "step": 132910 }, { "epoch": 2.7057506361323154, "grad_norm": 2.5866237234147036, "learning_rate": 5.1544529573843385e-06, "loss": 0.1034, "step": 132920 }, { "epoch": 2.7059541984732824, "grad_norm": 13.328485452778846, "learning_rate": 5.153742726706478e-06, "loss": 0.1145, "step": 132930 }, { "epoch": 2.7061577608142495, "grad_norm": 0.09769920182115387, "learning_rate": 5.153032492923585e-06, "loss": 0.0594, "step": 132940 }, { "epoch": 2.706361323155216, "grad_norm": 9.178389715264577, "learning_rate": 5.152322256050002e-06, "loss": 0.159, "step": 132950 }, { "epoch": 2.7065648854961832, "grad_norm": 15.027418993807412, "learning_rate": 5.151612016100076e-06, "loss": 0.1479, "step": 132960 }, { "epoch": 2.70676844783715, "grad_norm": 8.412650037901305, "learning_rate": 5.15090177308815e-06, "loss": 0.2489, "step": 132970 }, { "epoch": 2.706972010178117, "grad_norm": 13.77260689254419, "learning_rate": 5.150191527028569e-06, "loss": 0.0622, "step": 132980 }, { "epoch": 2.707175572519084, "grad_norm": 14.007628251141188, "learning_rate": 5.149481277935676e-06, "loss": 0.148, "step": 132990 }, { "epoch": 2.7073791348600507, "grad_norm": 0.08250630588259342, "learning_rate": 5.148771025823817e-06, "loss": 0.0839, "step": 133000 }, { "epoch": 2.7075826972010177, "grad_norm": 1.726834483717513, "learning_rate": 5.148060770707334e-06, "loss": 0.1116, "step": 133010 }, { "epoch": 2.707786259541985, "grad_norm": 11.284706885143699, "learning_rate": 5.147350512600573e-06, "loss": 0.0981, "step": 133020 }, { "epoch": 2.7079898218829515, "grad_norm": 0.6242282323262445, "learning_rate": 5.146640251517879e-06, "loss": 0.0298, "step": 133030 }, { "epoch": 2.7081933842239185, "grad_norm": 0.32103029272740546, "learning_rate": 5.145929987473597e-06, "loss": 0.1251, "step": 133040 }, { "epoch": 2.7083969465648856, "grad_norm": 3.097092395096829, "learning_rate": 5.145219720482072e-06, "loss": 0.1024, "step": 133050 }, { "epoch": 2.7086005089058522, "grad_norm": 0.056054229418088186, "learning_rate": 5.144509450557646e-06, "loss": 0.1079, "step": 133060 }, { "epoch": 2.7088040712468193, "grad_norm": 9.85378661728578, "learning_rate": 5.143799177714664e-06, "loss": 0.2606, "step": 133070 }, { "epoch": 2.7090076335877864, "grad_norm": 0.09345729953138637, "learning_rate": 5.143088901967475e-06, "loss": 0.1422, "step": 133080 }, { "epoch": 2.709211195928753, "grad_norm": 8.601840768912421, "learning_rate": 5.142378623330421e-06, "loss": 0.0552, "step": 133090 }, { "epoch": 2.70941475826972, "grad_norm": 4.23622040142012, "learning_rate": 5.1416683418178455e-06, "loss": 0.0729, "step": 133100 }, { "epoch": 2.709618320610687, "grad_norm": 17.948286465587017, "learning_rate": 5.140958057444098e-06, "loss": 0.1648, "step": 133110 }, { "epoch": 2.709821882951654, "grad_norm": 3.6179086160196383, "learning_rate": 5.140247770223519e-06, "loss": 0.0781, "step": 133120 }, { "epoch": 2.710025445292621, "grad_norm": 1.6544339387783134, "learning_rate": 5.139537480170455e-06, "loss": 0.0652, "step": 133130 }, { "epoch": 2.710229007633588, "grad_norm": 11.723447162697957, "learning_rate": 5.1388271872992545e-06, "loss": 0.1259, "step": 133140 }, { "epoch": 2.7104325699745546, "grad_norm": 36.580136152948626, "learning_rate": 5.1381168916242586e-06, "loss": 0.0488, "step": 133150 }, { "epoch": 2.7106361323155217, "grad_norm": 26.627199343929462, "learning_rate": 5.137406593159814e-06, "loss": 0.1045, "step": 133160 }, { "epoch": 2.7108396946564888, "grad_norm": 38.44429321399644, "learning_rate": 5.136696291920268e-06, "loss": 0.2219, "step": 133170 }, { "epoch": 2.7110432569974554, "grad_norm": 15.50371803600896, "learning_rate": 5.135985987919962e-06, "loss": 0.0834, "step": 133180 }, { "epoch": 2.7112468193384225, "grad_norm": 44.72480810131177, "learning_rate": 5.135275681173245e-06, "loss": 0.1557, "step": 133190 }, { "epoch": 2.7114503816793896, "grad_norm": 14.205935856733056, "learning_rate": 5.134565371694461e-06, "loss": 0.132, "step": 133200 }, { "epoch": 2.711653944020356, "grad_norm": 0.05176493090740346, "learning_rate": 5.133855059497956e-06, "loss": 0.0983, "step": 133210 }, { "epoch": 2.7118575063613233, "grad_norm": 41.51600727754995, "learning_rate": 5.133144744598075e-06, "loss": 0.0776, "step": 133220 }, { "epoch": 2.71206106870229, "grad_norm": 69.30030269041076, "learning_rate": 5.1324344270091656e-06, "loss": 0.1798, "step": 133230 }, { "epoch": 2.712264631043257, "grad_norm": 8.133366249952568, "learning_rate": 5.131724106745572e-06, "loss": 0.1324, "step": 133240 }, { "epoch": 2.712468193384224, "grad_norm": 1.9322023098891004, "learning_rate": 5.13101378382164e-06, "loss": 0.1328, "step": 133250 }, { "epoch": 2.7126717557251907, "grad_norm": 0.3313563625699198, "learning_rate": 5.130303458251717e-06, "loss": 0.0749, "step": 133260 }, { "epoch": 2.712875318066158, "grad_norm": 0.21055943229507243, "learning_rate": 5.129593130050148e-06, "loss": 0.0773, "step": 133270 }, { "epoch": 2.7130788804071244, "grad_norm": 0.17118539990622206, "learning_rate": 5.128882799231277e-06, "loss": 0.0969, "step": 133280 }, { "epoch": 2.7132824427480915, "grad_norm": 0.6928497769299443, "learning_rate": 5.128172465809453e-06, "loss": 0.0561, "step": 133290 }, { "epoch": 2.7134860050890586, "grad_norm": 14.22924617292427, "learning_rate": 5.127462129799021e-06, "loss": 0.124, "step": 133300 }, { "epoch": 2.713689567430025, "grad_norm": 5.276233242108565, "learning_rate": 5.126751791214327e-06, "loss": 0.0887, "step": 133310 }, { "epoch": 2.7138931297709923, "grad_norm": 0.033656047458778766, "learning_rate": 5.126041450069717e-06, "loss": 0.0764, "step": 133320 }, { "epoch": 2.7140966921119594, "grad_norm": 14.60535995622647, "learning_rate": 5.1253311063795375e-06, "loss": 0.1539, "step": 133330 }, { "epoch": 2.714300254452926, "grad_norm": 8.5073204330436, "learning_rate": 5.124620760158137e-06, "loss": 0.1175, "step": 133340 }, { "epoch": 2.714503816793893, "grad_norm": 23.319960243669893, "learning_rate": 5.123910411419856e-06, "loss": 0.1589, "step": 133350 }, { "epoch": 2.71470737913486, "grad_norm": 0.2750231274783379, "learning_rate": 5.123200060179047e-06, "loss": 0.1718, "step": 133360 }, { "epoch": 2.714910941475827, "grad_norm": 29.04994451475099, "learning_rate": 5.122489706450053e-06, "loss": 0.1983, "step": 133370 }, { "epoch": 2.715114503816794, "grad_norm": 5.691661509309978, "learning_rate": 5.121779350247223e-06, "loss": 0.1049, "step": 133380 }, { "epoch": 2.715318066157761, "grad_norm": 11.759279595074172, "learning_rate": 5.121068991584901e-06, "loss": 0.2434, "step": 133390 }, { "epoch": 2.7155216284987276, "grad_norm": 16.949385074081796, "learning_rate": 5.120358630477435e-06, "loss": 0.1497, "step": 133400 }, { "epoch": 2.7157251908396947, "grad_norm": 43.30107464453783, "learning_rate": 5.119648266939172e-06, "loss": 0.1247, "step": 133410 }, { "epoch": 2.7159287531806617, "grad_norm": 1.6104848885535912, "learning_rate": 5.118937900984457e-06, "loss": 0.1663, "step": 133420 }, { "epoch": 2.7161323155216284, "grad_norm": 0.2261470623809794, "learning_rate": 5.118227532627641e-06, "loss": 0.1658, "step": 133430 }, { "epoch": 2.7163358778625954, "grad_norm": 0.9543274306374617, "learning_rate": 5.117517161883064e-06, "loss": 0.143, "step": 133440 }, { "epoch": 2.7165394402035625, "grad_norm": 6.18066628775466, "learning_rate": 5.116806788765078e-06, "loss": 0.145, "step": 133450 }, { "epoch": 2.716743002544529, "grad_norm": 5.526159047966585, "learning_rate": 5.116096413288029e-06, "loss": 0.1572, "step": 133460 }, { "epoch": 2.7169465648854962, "grad_norm": 0.9844385753655697, "learning_rate": 5.115386035466263e-06, "loss": 0.102, "step": 133470 }, { "epoch": 2.7171501272264633, "grad_norm": 19.42190134249125, "learning_rate": 5.114675655314126e-06, "loss": 0.1663, "step": 133480 }, { "epoch": 2.71735368956743, "grad_norm": 1.4654344793209613, "learning_rate": 5.113965272845969e-06, "loss": 0.1204, "step": 133490 }, { "epoch": 2.717557251908397, "grad_norm": 0.13602713535323127, "learning_rate": 5.113254888076134e-06, "loss": 0.0973, "step": 133500 }, { "epoch": 2.717760814249364, "grad_norm": 7.353888267716474, "learning_rate": 5.112544501018972e-06, "loss": 0.1795, "step": 133510 }, { "epoch": 2.7179643765903307, "grad_norm": 0.280025794748969, "learning_rate": 5.111834111688829e-06, "loss": 0.07, "step": 133520 }, { "epoch": 2.718167938931298, "grad_norm": 34.63157364403336, "learning_rate": 5.111123720100052e-06, "loss": 0.2092, "step": 133530 }, { "epoch": 2.718371501272265, "grad_norm": 0.3131701611753207, "learning_rate": 5.110413326266989e-06, "loss": 0.1313, "step": 133540 }, { "epoch": 2.7185750636132315, "grad_norm": 30.00750847382166, "learning_rate": 5.109702930203985e-06, "loss": 0.1193, "step": 133550 }, { "epoch": 2.7187786259541986, "grad_norm": 0.5792252869082961, "learning_rate": 5.108992531925388e-06, "loss": 0.2301, "step": 133560 }, { "epoch": 2.7189821882951652, "grad_norm": 82.15350841341524, "learning_rate": 5.10828213144555e-06, "loss": 0.0626, "step": 133570 }, { "epoch": 2.7191857506361323, "grad_norm": 15.154234751029598, "learning_rate": 5.107571728778813e-06, "loss": 0.1868, "step": 133580 }, { "epoch": 2.719389312977099, "grad_norm": 16.051911990663317, "learning_rate": 5.106861323939526e-06, "loss": 0.1304, "step": 133590 }, { "epoch": 2.719592875318066, "grad_norm": 0.12610653331192936, "learning_rate": 5.106150916942038e-06, "loss": 0.0802, "step": 133600 }, { "epoch": 2.719796437659033, "grad_norm": 23.93252960212948, "learning_rate": 5.105440507800694e-06, "loss": 0.1071, "step": 133610 }, { "epoch": 2.7199999999999998, "grad_norm": 0.014235713248905943, "learning_rate": 5.104730096529845e-06, "loss": 0.0862, "step": 133620 }, { "epoch": 2.720203562340967, "grad_norm": 0.5633447147829377, "learning_rate": 5.104019683143837e-06, "loss": 0.0698, "step": 133630 }, { "epoch": 2.720407124681934, "grad_norm": 11.52978337598769, "learning_rate": 5.103309267657016e-06, "loss": 0.0646, "step": 133640 }, { "epoch": 2.7206106870229005, "grad_norm": 41.26541010827529, "learning_rate": 5.102598850083731e-06, "loss": 0.2161, "step": 133650 }, { "epoch": 2.7208142493638676, "grad_norm": 0.6602409537527135, "learning_rate": 5.101888430438331e-06, "loss": 0.1496, "step": 133660 }, { "epoch": 2.7210178117048347, "grad_norm": 1.9203375344806322, "learning_rate": 5.101178008735162e-06, "loss": 0.2002, "step": 133670 }, { "epoch": 2.7212213740458013, "grad_norm": 5.954512404872259, "learning_rate": 5.1004675849885745e-06, "loss": 0.0833, "step": 133680 }, { "epoch": 2.7214249363867684, "grad_norm": 13.556520958687024, "learning_rate": 5.099757159212914e-06, "loss": 0.1803, "step": 133690 }, { "epoch": 2.7216284987277355, "grad_norm": 0.3537187409505419, "learning_rate": 5.099046731422528e-06, "loss": 0.0932, "step": 133700 }, { "epoch": 2.721832061068702, "grad_norm": 19.24164076182585, "learning_rate": 5.0983363016317665e-06, "loss": 0.0636, "step": 133710 }, { "epoch": 2.722035623409669, "grad_norm": 23.543099102742726, "learning_rate": 5.097625869854977e-06, "loss": 0.0294, "step": 133720 }, { "epoch": 2.7222391857506363, "grad_norm": 0.19012926744347755, "learning_rate": 5.096915436106507e-06, "loss": 0.0998, "step": 133730 }, { "epoch": 2.722442748091603, "grad_norm": 0.08840923238729015, "learning_rate": 5.096205000400705e-06, "loss": 0.1701, "step": 133740 }, { "epoch": 2.72264631043257, "grad_norm": 0.641904239969749, "learning_rate": 5.095494562751921e-06, "loss": 0.0977, "step": 133750 }, { "epoch": 2.722849872773537, "grad_norm": 40.91166333471258, "learning_rate": 5.094784123174499e-06, "loss": 0.0913, "step": 133760 }, { "epoch": 2.7230534351145037, "grad_norm": 9.132671260416652, "learning_rate": 5.09407368168279e-06, "loss": 0.1743, "step": 133770 }, { "epoch": 2.723256997455471, "grad_norm": 0.24194565776058188, "learning_rate": 5.093363238291141e-06, "loss": 0.2203, "step": 133780 }, { "epoch": 2.723460559796438, "grad_norm": 8.258632104265608, "learning_rate": 5.092652793013901e-06, "loss": 0.0528, "step": 133790 }, { "epoch": 2.7236641221374045, "grad_norm": 0.052926069686386884, "learning_rate": 5.0919423458654204e-06, "loss": 0.2729, "step": 133800 }, { "epoch": 2.7238676844783716, "grad_norm": 8.279942438614972, "learning_rate": 5.091231896860045e-06, "loss": 0.1404, "step": 133810 }, { "epoch": 2.7240712468193387, "grad_norm": 3.34454900993385, "learning_rate": 5.0905214460121235e-06, "loss": 0.11, "step": 133820 }, { "epoch": 2.7242748091603053, "grad_norm": 12.669751616396523, "learning_rate": 5.0898109933360065e-06, "loss": 0.0833, "step": 133830 }, { "epoch": 2.7244783715012724, "grad_norm": 0.5558807674853237, "learning_rate": 5.089100538846039e-06, "loss": 0.1011, "step": 133840 }, { "epoch": 2.7246819338422394, "grad_norm": 31.412442837309182, "learning_rate": 5.088390082556571e-06, "loss": 0.0677, "step": 133850 }, { "epoch": 2.724885496183206, "grad_norm": 0.04548132141339961, "learning_rate": 5.087679624481953e-06, "loss": 0.0887, "step": 133860 }, { "epoch": 2.725089058524173, "grad_norm": 9.760558732507553, "learning_rate": 5.08696916463653e-06, "loss": 0.108, "step": 133870 }, { "epoch": 2.72529262086514, "grad_norm": 14.968479867994418, "learning_rate": 5.086258703034654e-06, "loss": 0.235, "step": 133880 }, { "epoch": 2.725496183206107, "grad_norm": 0.20608428800986256, "learning_rate": 5.085548239690673e-06, "loss": 0.0817, "step": 133890 }, { "epoch": 2.725699745547074, "grad_norm": 6.4459065641460676, "learning_rate": 5.084837774618934e-06, "loss": 0.1338, "step": 133900 }, { "epoch": 2.7259033078880406, "grad_norm": 0.10359752877022077, "learning_rate": 5.084127307833787e-06, "loss": 0.1149, "step": 133910 }, { "epoch": 2.7261068702290077, "grad_norm": 15.639425988147662, "learning_rate": 5.083416839349582e-06, "loss": 0.2018, "step": 133920 }, { "epoch": 2.7263104325699743, "grad_norm": 1.5373681394375696, "learning_rate": 5.082706369180665e-06, "loss": 0.0228, "step": 133930 }, { "epoch": 2.7265139949109414, "grad_norm": 0.030332680041376393, "learning_rate": 5.081995897341386e-06, "loss": 0.1152, "step": 133940 }, { "epoch": 2.7267175572519085, "grad_norm": 10.905125014527757, "learning_rate": 5.081285423846096e-06, "loss": 0.1199, "step": 133950 }, { "epoch": 2.726921119592875, "grad_norm": 0.4782098577292204, "learning_rate": 5.080574948709141e-06, "loss": 0.072, "step": 133960 }, { "epoch": 2.727124681933842, "grad_norm": 0.20279696694021015, "learning_rate": 5.079864471944872e-06, "loss": 0.0778, "step": 133970 }, { "epoch": 2.7273282442748092, "grad_norm": 0.0700139951962583, "learning_rate": 5.079153993567636e-06, "loss": 0.13, "step": 133980 }, { "epoch": 2.727531806615776, "grad_norm": 5.144190714723411, "learning_rate": 5.078443513591783e-06, "loss": 0.126, "step": 133990 }, { "epoch": 2.727735368956743, "grad_norm": 0.018664145353788547, "learning_rate": 5.077733032031663e-06, "loss": 0.1248, "step": 134000 }, { "epoch": 2.72793893129771, "grad_norm": 10.478496187103016, "learning_rate": 5.077022548901626e-06, "loss": 0.1408, "step": 134010 }, { "epoch": 2.7281424936386767, "grad_norm": 1.841880885239721, "learning_rate": 5.076312064216016e-06, "loss": 0.1613, "step": 134020 }, { "epoch": 2.7283460559796437, "grad_norm": 6.182479676904797, "learning_rate": 5.075601577989187e-06, "loss": 0.1064, "step": 134030 }, { "epoch": 2.728549618320611, "grad_norm": 0.04240420496710388, "learning_rate": 5.074891090235488e-06, "loss": 0.1368, "step": 134040 }, { "epoch": 2.7287531806615775, "grad_norm": 5.318235570495766, "learning_rate": 5.074180600969265e-06, "loss": 0.1011, "step": 134050 }, { "epoch": 2.7289567430025445, "grad_norm": 0.1604607286661751, "learning_rate": 5.073470110204871e-06, "loss": 0.1115, "step": 134060 }, { "epoch": 2.7291603053435116, "grad_norm": 25.02147899773134, "learning_rate": 5.072759617956652e-06, "loss": 0.0988, "step": 134070 }, { "epoch": 2.7293638676844783, "grad_norm": 0.06365217966853537, "learning_rate": 5.072049124238959e-06, "loss": 0.0815, "step": 134080 }, { "epoch": 2.7295674300254453, "grad_norm": 0.036654280382487144, "learning_rate": 5.0713386290661426e-06, "loss": 0.117, "step": 134090 }, { "epoch": 2.7297709923664124, "grad_norm": 1.8176945053449205, "learning_rate": 5.070628132452549e-06, "loss": 0.0203, "step": 134100 }, { "epoch": 2.729974554707379, "grad_norm": 0.03785341668198228, "learning_rate": 5.06991763441253e-06, "loss": 0.1749, "step": 134110 }, { "epoch": 2.730178117048346, "grad_norm": 12.09859597913328, "learning_rate": 5.069207134960434e-06, "loss": 0.145, "step": 134120 }, { "epoch": 2.730381679389313, "grad_norm": 15.718941635707658, "learning_rate": 5.0684966341106125e-06, "loss": 0.1265, "step": 134130 }, { "epoch": 2.73058524173028, "grad_norm": 1.719678490964187, "learning_rate": 5.06778613187741e-06, "loss": 0.1893, "step": 134140 }, { "epoch": 2.730788804071247, "grad_norm": 13.698419556919335, "learning_rate": 5.067075628275182e-06, "loss": 0.1451, "step": 134150 }, { "epoch": 2.730992366412214, "grad_norm": 52.71639147706731, "learning_rate": 5.0663651233182744e-06, "loss": 0.0578, "step": 134160 }, { "epoch": 2.7311959287531806, "grad_norm": 2.7051238357039353, "learning_rate": 5.065654617021037e-06, "loss": 0.1136, "step": 134170 }, { "epoch": 2.7313994910941477, "grad_norm": 0.07123089584573658, "learning_rate": 5.06494410939782e-06, "loss": 0.0728, "step": 134180 }, { "epoch": 2.731603053435115, "grad_norm": 0.694924218850794, "learning_rate": 5.064233600462974e-06, "loss": 0.1109, "step": 134190 }, { "epoch": 2.7318066157760814, "grad_norm": 7.281969111568222, "learning_rate": 5.063523090230847e-06, "loss": 0.1241, "step": 134200 }, { "epoch": 2.7320101781170485, "grad_norm": 19.57516581808322, "learning_rate": 5.062812578715791e-06, "loss": 0.1611, "step": 134210 }, { "epoch": 2.732213740458015, "grad_norm": 8.583812878893962, "learning_rate": 5.062102065932152e-06, "loss": 0.2391, "step": 134220 }, { "epoch": 2.732417302798982, "grad_norm": 23.45741450967679, "learning_rate": 5.061391551894282e-06, "loss": 0.0894, "step": 134230 }, { "epoch": 2.732620865139949, "grad_norm": 13.441129545998555, "learning_rate": 5.060681036616532e-06, "loss": 0.1895, "step": 134240 }, { "epoch": 2.732824427480916, "grad_norm": 1.029748365840035, "learning_rate": 5.05997052011325e-06, "loss": 0.1433, "step": 134250 }, { "epoch": 2.733027989821883, "grad_norm": 3.739146302905078, "learning_rate": 5.059260002398786e-06, "loss": 0.1127, "step": 134260 }, { "epoch": 2.7332315521628496, "grad_norm": 4.150611466109993, "learning_rate": 5.0585494834874895e-06, "loss": 0.1705, "step": 134270 }, { "epoch": 2.7334351145038167, "grad_norm": 0.4681380049858898, "learning_rate": 5.0578389633937095e-06, "loss": 0.1147, "step": 134280 }, { "epoch": 2.733638676844784, "grad_norm": 0.5632950701868066, "learning_rate": 5.057128442131799e-06, "loss": 0.0414, "step": 134290 }, { "epoch": 2.7338422391857504, "grad_norm": 7.551909174304814, "learning_rate": 5.056417919716105e-06, "loss": 0.1281, "step": 134300 }, { "epoch": 2.7340458015267175, "grad_norm": 3.1843513322231605, "learning_rate": 5.055707396160979e-06, "loss": 0.0784, "step": 134310 }, { "epoch": 2.7342493638676846, "grad_norm": 0.08455477270669713, "learning_rate": 5.054996871480772e-06, "loss": 0.0916, "step": 134320 }, { "epoch": 2.734452926208651, "grad_norm": 16.533354295360972, "learning_rate": 5.05428634568983e-06, "loss": 0.0979, "step": 134330 }, { "epoch": 2.7346564885496183, "grad_norm": 9.407227804526148, "learning_rate": 5.0535758188025065e-06, "loss": 0.0341, "step": 134340 }, { "epoch": 2.7348600508905854, "grad_norm": 0.2699587909490463, "learning_rate": 5.052865290833149e-06, "loss": 0.1078, "step": 134350 }, { "epoch": 2.735063613231552, "grad_norm": 0.07709558941732125, "learning_rate": 5.0521547617961095e-06, "loss": 0.12, "step": 134360 }, { "epoch": 2.735267175572519, "grad_norm": 0.056300594166858876, "learning_rate": 5.051444231705737e-06, "loss": 0.0909, "step": 134370 }, { "epoch": 2.735470737913486, "grad_norm": 24.786198778446877, "learning_rate": 5.050733700576384e-06, "loss": 0.1974, "step": 134380 }, { "epoch": 2.735674300254453, "grad_norm": 9.742923837089593, "learning_rate": 5.050023168422395e-06, "loss": 0.1199, "step": 134390 }, { "epoch": 2.73587786259542, "grad_norm": 0.1267588674944733, "learning_rate": 5.049312635258127e-06, "loss": 0.0867, "step": 134400 }, { "epoch": 2.736081424936387, "grad_norm": 11.100183035020956, "learning_rate": 5.048602101097926e-06, "loss": 0.1493, "step": 134410 }, { "epoch": 2.7362849872773536, "grad_norm": 37.71911380029843, "learning_rate": 5.047891565956144e-06, "loss": 0.1081, "step": 134420 }, { "epoch": 2.7364885496183207, "grad_norm": 15.116891957947859, "learning_rate": 5.047181029847127e-06, "loss": 0.2265, "step": 134430 }, { "epoch": 2.7366921119592877, "grad_norm": 7.29789147120383, "learning_rate": 5.04647049278523e-06, "loss": 0.1178, "step": 134440 }, { "epoch": 2.7368956743002544, "grad_norm": 10.932397682458287, "learning_rate": 5.0457599547848025e-06, "loss": 0.1567, "step": 134450 }, { "epoch": 2.7370992366412215, "grad_norm": 0.20269701577910607, "learning_rate": 5.045049415860193e-06, "loss": 0.1206, "step": 134460 }, { "epoch": 2.7373027989821885, "grad_norm": 4.914900491496412, "learning_rate": 5.044338876025752e-06, "loss": 0.2001, "step": 134470 }, { "epoch": 2.737506361323155, "grad_norm": 15.899163778938862, "learning_rate": 5.043628335295831e-06, "loss": 0.0437, "step": 134480 }, { "epoch": 2.7377099236641222, "grad_norm": 5.848449254643655, "learning_rate": 5.042917793684779e-06, "loss": 0.1353, "step": 134490 }, { "epoch": 2.7379134860050893, "grad_norm": 0.09387005161690733, "learning_rate": 5.042207251206948e-06, "loss": 0.1298, "step": 134500 }, { "epoch": 2.738117048346056, "grad_norm": 0.5745871648153336, "learning_rate": 5.041496707876684e-06, "loss": 0.1718, "step": 134510 }, { "epoch": 2.738320610687023, "grad_norm": 0.27266172641789965, "learning_rate": 5.040786163708344e-06, "loss": 0.0843, "step": 134520 }, { "epoch": 2.7385241730279897, "grad_norm": 0.020378769063568617, "learning_rate": 5.040075618716274e-06, "loss": 0.0705, "step": 134530 }, { "epoch": 2.7387277353689568, "grad_norm": 3.3238964319077646, "learning_rate": 5.039365072914824e-06, "loss": 0.0921, "step": 134540 }, { "epoch": 2.738931297709924, "grad_norm": 4.336518502275493, "learning_rate": 5.038654526318347e-06, "loss": 0.1494, "step": 134550 }, { "epoch": 2.7391348600508905, "grad_norm": 0.3561859925339373, "learning_rate": 5.037943978941192e-06, "loss": 0.0593, "step": 134560 }, { "epoch": 2.7393384223918575, "grad_norm": 0.24568166942010736, "learning_rate": 5.03723343079771e-06, "loss": 0.1081, "step": 134570 }, { "epoch": 2.739541984732824, "grad_norm": 2.6747185754625336, "learning_rate": 5.0365228819022505e-06, "loss": 0.1042, "step": 134580 }, { "epoch": 2.7397455470737913, "grad_norm": 3.7879771372696163, "learning_rate": 5.035812332269163e-06, "loss": 0.1712, "step": 134590 }, { "epoch": 2.7399491094147583, "grad_norm": 0.2793554188084964, "learning_rate": 5.0351017819128e-06, "loss": 0.083, "step": 134600 }, { "epoch": 2.740152671755725, "grad_norm": 12.493535696436107, "learning_rate": 5.0343912308475126e-06, "loss": 0.0621, "step": 134610 }, { "epoch": 2.740356234096692, "grad_norm": 0.08626014477742878, "learning_rate": 5.033680679087649e-06, "loss": 0.0651, "step": 134620 }, { "epoch": 2.740559796437659, "grad_norm": 0.0020607513825090695, "learning_rate": 5.032970126647559e-06, "loss": 0.1278, "step": 134630 }, { "epoch": 2.7407633587786258, "grad_norm": 12.222971483358766, "learning_rate": 5.032259573541598e-06, "loss": 0.2471, "step": 134640 }, { "epoch": 2.740966921119593, "grad_norm": 0.1299873864023938, "learning_rate": 5.031549019784112e-06, "loss": 0.0769, "step": 134650 }, { "epoch": 2.74117048346056, "grad_norm": 14.277330092128217, "learning_rate": 5.0308384653894525e-06, "loss": 0.0877, "step": 134660 }, { "epoch": 2.7413740458015265, "grad_norm": 20.122682258417033, "learning_rate": 5.0301279103719704e-06, "loss": 0.1654, "step": 134670 }, { "epoch": 2.7415776081424936, "grad_norm": 27.60099093100309, "learning_rate": 5.029417354746016e-06, "loss": 0.1021, "step": 134680 }, { "epoch": 2.7417811704834607, "grad_norm": 15.74183077775572, "learning_rate": 5.0287067985259415e-06, "loss": 0.1356, "step": 134690 }, { "epoch": 2.7419847328244273, "grad_norm": 22.748089537365455, "learning_rate": 5.027996241726097e-06, "loss": 0.0896, "step": 134700 }, { "epoch": 2.7421882951653944, "grad_norm": 0.07405255069737159, "learning_rate": 5.027285684360831e-06, "loss": 0.1434, "step": 134710 }, { "epoch": 2.7423918575063615, "grad_norm": 0.16851170191457168, "learning_rate": 5.026575126444494e-06, "loss": 0.0947, "step": 134720 }, { "epoch": 2.742595419847328, "grad_norm": 23.09249395788763, "learning_rate": 5.0258645679914406e-06, "loss": 0.1169, "step": 134730 }, { "epoch": 2.742798982188295, "grad_norm": 27.533044725968463, "learning_rate": 5.0251540090160175e-06, "loss": 0.1348, "step": 134740 }, { "epoch": 2.7430025445292623, "grad_norm": 18.92674550062883, "learning_rate": 5.024443449532578e-06, "loss": 0.1453, "step": 134750 }, { "epoch": 2.743206106870229, "grad_norm": 5.89570006493951, "learning_rate": 5.023732889555471e-06, "loss": 0.1469, "step": 134760 }, { "epoch": 2.743409669211196, "grad_norm": 7.939877109787121, "learning_rate": 5.023022329099046e-06, "loss": 0.1484, "step": 134770 }, { "epoch": 2.743613231552163, "grad_norm": 17.60059462284638, "learning_rate": 5.022311768177658e-06, "loss": 0.1495, "step": 134780 }, { "epoch": 2.7438167938931297, "grad_norm": 5.708320039048278, "learning_rate": 5.021601206805654e-06, "loss": 0.0945, "step": 134790 }, { "epoch": 2.744020356234097, "grad_norm": 0.015297334338444598, "learning_rate": 5.020890644997385e-06, "loss": 0.0294, "step": 134800 }, { "epoch": 2.744223918575064, "grad_norm": 1.0453603219858807, "learning_rate": 5.0201800827672045e-06, "loss": 0.0745, "step": 134810 }, { "epoch": 2.7444274809160305, "grad_norm": 14.931496771382822, "learning_rate": 5.01946952012946e-06, "loss": 0.223, "step": 134820 }, { "epoch": 2.7446310432569976, "grad_norm": 61.51728910513996, "learning_rate": 5.018758957098504e-06, "loss": 0.1105, "step": 134830 }, { "epoch": 2.744834605597964, "grad_norm": 23.130069194796977, "learning_rate": 5.018048393688686e-06, "loss": 0.0879, "step": 134840 }, { "epoch": 2.7450381679389313, "grad_norm": 9.839584973013071, "learning_rate": 5.017337829914357e-06, "loss": 0.1912, "step": 134850 }, { "epoch": 2.7452417302798984, "grad_norm": 6.9376980869705225, "learning_rate": 5.016627265789869e-06, "loss": 0.1763, "step": 134860 }, { "epoch": 2.745445292620865, "grad_norm": 0.11260149034599314, "learning_rate": 5.015916701329573e-06, "loss": 0.0615, "step": 134870 }, { "epoch": 2.745648854961832, "grad_norm": 5.0379986147491165, "learning_rate": 5.015206136547817e-06, "loss": 0.1835, "step": 134880 }, { "epoch": 2.7458524173027987, "grad_norm": 0.35120976792454106, "learning_rate": 5.014495571458953e-06, "loss": 0.1179, "step": 134890 }, { "epoch": 2.746055979643766, "grad_norm": 0.34439709395482915, "learning_rate": 5.013785006077334e-06, "loss": 0.0922, "step": 134900 }, { "epoch": 2.746259541984733, "grad_norm": 8.683307416924714, "learning_rate": 5.0130744404173085e-06, "loss": 0.126, "step": 134910 }, { "epoch": 2.7464631043256995, "grad_norm": 0.21235846402291247, "learning_rate": 5.012363874493227e-06, "loss": 0.097, "step": 134920 }, { "epoch": 2.7466666666666666, "grad_norm": 1.169739331605501, "learning_rate": 5.011653308319442e-06, "loss": 0.1593, "step": 134930 }, { "epoch": 2.7468702290076337, "grad_norm": 10.781171230372859, "learning_rate": 5.010942741910304e-06, "loss": 0.1597, "step": 134940 }, { "epoch": 2.7470737913486003, "grad_norm": 0.4549606193487979, "learning_rate": 5.010232175280162e-06, "loss": 0.092, "step": 134950 }, { "epoch": 2.7472773536895674, "grad_norm": 0.7151751730050908, "learning_rate": 5.009521608443368e-06, "loss": 0.1489, "step": 134960 }, { "epoch": 2.7474809160305345, "grad_norm": 4.8768714414590795, "learning_rate": 5.008811041414274e-06, "loss": 0.076, "step": 134970 }, { "epoch": 2.747684478371501, "grad_norm": 7.151758178859557, "learning_rate": 5.0081004742072285e-06, "loss": 0.0773, "step": 134980 }, { "epoch": 2.747888040712468, "grad_norm": 0.13134338389348163, "learning_rate": 5.007389906836585e-06, "loss": 0.118, "step": 134990 }, { "epoch": 2.7480916030534353, "grad_norm": 0.010418624246166299, "learning_rate": 5.00667933931669e-06, "loss": 0.1197, "step": 135000 }, { "epoch": 2.748295165394402, "grad_norm": 0.1236488404926786, "learning_rate": 5.005968771661901e-06, "loss": 0.1218, "step": 135010 }, { "epoch": 2.748498727735369, "grad_norm": 0.5375741030109833, "learning_rate": 5.0052582038865626e-06, "loss": 0.1044, "step": 135020 }, { "epoch": 2.748702290076336, "grad_norm": 24.396153835321627, "learning_rate": 5.0045476360050285e-06, "loss": 0.2452, "step": 135030 }, { "epoch": 2.7489058524173027, "grad_norm": 8.130379350643262, "learning_rate": 5.003837068031648e-06, "loss": 0.2564, "step": 135040 }, { "epoch": 2.7491094147582698, "grad_norm": 1.1448276824721113, "learning_rate": 5.003126499980774e-06, "loss": 0.0397, "step": 135050 }, { "epoch": 2.749312977099237, "grad_norm": 3.9209561319716904, "learning_rate": 5.002415931866756e-06, "loss": 0.177, "step": 135060 }, { "epoch": 2.7495165394402035, "grad_norm": 8.856108561947474, "learning_rate": 5.001705363703947e-06, "loss": 0.1233, "step": 135070 }, { "epoch": 2.7497201017811705, "grad_norm": 6.610171072307118, "learning_rate": 5.000994795506694e-06, "loss": 0.1414, "step": 135080 }, { "epoch": 2.7499236641221376, "grad_norm": 38.45475978740261, "learning_rate": 5.000284227289348e-06, "loss": 0.1622, "step": 135090 }, { "epoch": 2.7501272264631043, "grad_norm": 17.350424946479347, "learning_rate": 4.999573659066265e-06, "loss": 0.0813, "step": 135100 }, { "epoch": 2.7503307888040713, "grad_norm": 11.327915949719973, "learning_rate": 4.99886309085179e-06, "loss": 0.1227, "step": 135110 }, { "epoch": 2.7505343511450384, "grad_norm": 0.026611984325507237, "learning_rate": 4.998152522660279e-06, "loss": 0.0748, "step": 135120 }, { "epoch": 2.750737913486005, "grad_norm": 8.553085337059814, "learning_rate": 4.997441954506079e-06, "loss": 0.0917, "step": 135130 }, { "epoch": 2.750941475826972, "grad_norm": 0.49392941721038436, "learning_rate": 4.996731386403543e-06, "loss": 0.105, "step": 135140 }, { "epoch": 2.751145038167939, "grad_norm": 22.388940721138354, "learning_rate": 4.996020818367018e-06, "loss": 0.0631, "step": 135150 }, { "epoch": 2.751348600508906, "grad_norm": 2.515079689759535, "learning_rate": 4.995310250410859e-06, "loss": 0.1362, "step": 135160 }, { "epoch": 2.751552162849873, "grad_norm": 8.71738395828958, "learning_rate": 4.994599682549418e-06, "loss": 0.09, "step": 135170 }, { "epoch": 2.7517557251908396, "grad_norm": 3.704854021973442, "learning_rate": 4.993889114797039e-06, "loss": 0.0993, "step": 135180 }, { "epoch": 2.7519592875318066, "grad_norm": 39.046588684914084, "learning_rate": 4.99317854716808e-06, "loss": 0.126, "step": 135190 }, { "epoch": 2.7521628498727733, "grad_norm": 0.3642055843835759, "learning_rate": 4.9924679796768895e-06, "loss": 0.1812, "step": 135200 }, { "epoch": 2.7523664122137403, "grad_norm": 0.003244314029796448, "learning_rate": 4.9917574123378165e-06, "loss": 0.121, "step": 135210 }, { "epoch": 2.7525699745547074, "grad_norm": 0.9320415509666008, "learning_rate": 4.991046845165214e-06, "loss": 0.143, "step": 135220 }, { "epoch": 2.752773536895674, "grad_norm": 17.204750239538015, "learning_rate": 4.990336278173431e-06, "loss": 0.0523, "step": 135230 }, { "epoch": 2.752977099236641, "grad_norm": 0.22042977307921782, "learning_rate": 4.98962571137682e-06, "loss": 0.0787, "step": 135240 }, { "epoch": 2.753180661577608, "grad_norm": 11.412853626265125, "learning_rate": 4.988915144789732e-06, "loss": 0.1106, "step": 135250 }, { "epoch": 2.753384223918575, "grad_norm": 0.13715384304132017, "learning_rate": 4.988204578426514e-06, "loss": 0.0666, "step": 135260 }, { "epoch": 2.753587786259542, "grad_norm": 0.040414287333800356, "learning_rate": 4.987494012301522e-06, "loss": 0.1107, "step": 135270 }, { "epoch": 2.753791348600509, "grad_norm": 3.027619754242505, "learning_rate": 4.986783446429105e-06, "loss": 0.0833, "step": 135280 }, { "epoch": 2.7539949109414756, "grad_norm": 8.127896166376006, "learning_rate": 4.986072880823611e-06, "loss": 0.1118, "step": 135290 }, { "epoch": 2.7541984732824427, "grad_norm": 7.811383426570087, "learning_rate": 4.985362315499396e-06, "loss": 0.1599, "step": 135300 }, { "epoch": 2.75440203562341, "grad_norm": 1.2810690953712383, "learning_rate": 4.984651750470807e-06, "loss": 0.1321, "step": 135310 }, { "epoch": 2.7546055979643764, "grad_norm": 11.425320965135414, "learning_rate": 4.983941185752193e-06, "loss": 0.114, "step": 135320 }, { "epoch": 2.7548091603053435, "grad_norm": 9.439780976139232, "learning_rate": 4.98323062135791e-06, "loss": 0.1019, "step": 135330 }, { "epoch": 2.7550127226463106, "grad_norm": 0.10031253175396203, "learning_rate": 4.9825200573023064e-06, "loss": 0.1026, "step": 135340 }, { "epoch": 2.755216284987277, "grad_norm": 5.55104033922637, "learning_rate": 4.981809493599731e-06, "loss": 0.1856, "step": 135350 }, { "epoch": 2.7554198473282443, "grad_norm": 14.367521785411308, "learning_rate": 4.98109893026454e-06, "loss": 0.1626, "step": 135360 }, { "epoch": 2.7556234096692114, "grad_norm": 16.64364141471811, "learning_rate": 4.980388367311076e-06, "loss": 0.135, "step": 135370 }, { "epoch": 2.755826972010178, "grad_norm": 1.436608984548271, "learning_rate": 4.979677804753697e-06, "loss": 0.0724, "step": 135380 }, { "epoch": 2.756030534351145, "grad_norm": 0.16820281712780258, "learning_rate": 4.97896724260675e-06, "loss": 0.1858, "step": 135390 }, { "epoch": 2.756234096692112, "grad_norm": 20.53108397578691, "learning_rate": 4.978256680884586e-06, "loss": 0.1752, "step": 135400 }, { "epoch": 2.756437659033079, "grad_norm": 0.10690545841289391, "learning_rate": 4.977546119601557e-06, "loss": 0.1825, "step": 135410 }, { "epoch": 2.756641221374046, "grad_norm": 34.26492703830293, "learning_rate": 4.976835558772013e-06, "loss": 0.0949, "step": 135420 }, { "epoch": 2.756844783715013, "grad_norm": 6.969271928783115, "learning_rate": 4.976124998410306e-06, "loss": 0.0997, "step": 135430 }, { "epoch": 2.7570483460559796, "grad_norm": 3.7866588897084434, "learning_rate": 4.975414438530783e-06, "loss": 0.3128, "step": 135440 }, { "epoch": 2.7572519083969467, "grad_norm": 5.145781528690786, "learning_rate": 4.974703879147798e-06, "loss": 0.1972, "step": 135450 }, { "epoch": 2.7574554707379137, "grad_norm": 0.623865799773195, "learning_rate": 4.973993320275702e-06, "loss": 0.097, "step": 135460 }, { "epoch": 2.7576590330788804, "grad_norm": 8.562348553563638, "learning_rate": 4.973282761928842e-06, "loss": 0.0434, "step": 135470 }, { "epoch": 2.7578625954198475, "grad_norm": 0.17434071342461643, "learning_rate": 4.972572204121572e-06, "loss": 0.1209, "step": 135480 }, { "epoch": 2.758066157760814, "grad_norm": 0.31623475180374855, "learning_rate": 4.971861646868243e-06, "loss": 0.1809, "step": 135490 }, { "epoch": 2.758269720101781, "grad_norm": 4.391469337737972, "learning_rate": 4.971151090183203e-06, "loss": 0.0779, "step": 135500 }, { "epoch": 2.7584732824427483, "grad_norm": 17.688984661444657, "learning_rate": 4.970440534080805e-06, "loss": 0.2127, "step": 135510 }, { "epoch": 2.758676844783715, "grad_norm": 10.242741454850178, "learning_rate": 4.969729978575397e-06, "loss": 0.0433, "step": 135520 }, { "epoch": 2.758880407124682, "grad_norm": 0.44081861900508623, "learning_rate": 4.969019423681331e-06, "loss": 0.0973, "step": 135530 }, { "epoch": 2.7590839694656486, "grad_norm": 0.07214763297543356, "learning_rate": 4.96830886941296e-06, "loss": 0.0934, "step": 135540 }, { "epoch": 2.7592875318066157, "grad_norm": 0.05178988738028138, "learning_rate": 4.96759831578463e-06, "loss": 0.0749, "step": 135550 }, { "epoch": 2.7594910941475828, "grad_norm": 7.9289710785886305, "learning_rate": 4.966887762810694e-06, "loss": 0.2298, "step": 135560 }, { "epoch": 2.7596946564885494, "grad_norm": 18.224403582052503, "learning_rate": 4.9661772105055034e-06, "loss": 0.1591, "step": 135570 }, { "epoch": 2.7598982188295165, "grad_norm": 14.59465927285418, "learning_rate": 4.9654666588834055e-06, "loss": 0.0907, "step": 135580 }, { "epoch": 2.7601017811704835, "grad_norm": 3.2132052518502934, "learning_rate": 4.964756107958755e-06, "loss": 0.1073, "step": 135590 }, { "epoch": 2.76030534351145, "grad_norm": 11.730565696087803, "learning_rate": 4.9640455577459e-06, "loss": 0.1215, "step": 135600 }, { "epoch": 2.7605089058524173, "grad_norm": 0.37796538898484333, "learning_rate": 4.963335008259189e-06, "loss": 0.0542, "step": 135610 }, { "epoch": 2.7607124681933843, "grad_norm": 0.03857619658024476, "learning_rate": 4.962624459512979e-06, "loss": 0.1064, "step": 135620 }, { "epoch": 2.760916030534351, "grad_norm": 14.114448434225809, "learning_rate": 4.961913911521612e-06, "loss": 0.0571, "step": 135630 }, { "epoch": 2.761119592875318, "grad_norm": 0.005204187927737418, "learning_rate": 4.961203364299445e-06, "loss": 0.1197, "step": 135640 }, { "epoch": 2.761323155216285, "grad_norm": 10.433967966232622, "learning_rate": 4.960492817860826e-06, "loss": 0.0991, "step": 135650 }, { "epoch": 2.7615267175572518, "grad_norm": 15.490086707386457, "learning_rate": 4.959782272220104e-06, "loss": 0.0963, "step": 135660 }, { "epoch": 2.761730279898219, "grad_norm": 0.22633183446489613, "learning_rate": 4.959071727391633e-06, "loss": 0.1202, "step": 135670 }, { "epoch": 2.761933842239186, "grad_norm": 9.870136083373515, "learning_rate": 4.958361183389759e-06, "loss": 0.0887, "step": 135680 }, { "epoch": 2.7621374045801526, "grad_norm": 10.786922419540243, "learning_rate": 4.957650640228834e-06, "loss": 0.1686, "step": 135690 }, { "epoch": 2.7623409669211196, "grad_norm": 3.1476909016875636, "learning_rate": 4.956940097923211e-06, "loss": 0.1822, "step": 135700 }, { "epoch": 2.7625445292620867, "grad_norm": 0.6118245740888666, "learning_rate": 4.956229556487237e-06, "loss": 0.0322, "step": 135710 }, { "epoch": 2.7627480916030533, "grad_norm": 32.33407313877357, "learning_rate": 4.955519015935263e-06, "loss": 0.1605, "step": 135720 }, { "epoch": 2.7629516539440204, "grad_norm": 14.36653752846173, "learning_rate": 4.954808476281641e-06, "loss": 0.2275, "step": 135730 }, { "epoch": 2.7631552162849875, "grad_norm": 5.169018636457201, "learning_rate": 4.95409793754072e-06, "loss": 0.1385, "step": 135740 }, { "epoch": 2.763358778625954, "grad_norm": 30.302340011005246, "learning_rate": 4.95338739972685e-06, "loss": 0.2286, "step": 135750 }, { "epoch": 2.763562340966921, "grad_norm": 4.871014192156765, "learning_rate": 4.952676862854381e-06, "loss": 0.116, "step": 135760 }, { "epoch": 2.7637659033078883, "grad_norm": 5.075316310358129, "learning_rate": 4.951966326937664e-06, "loss": 0.1334, "step": 135770 }, { "epoch": 2.763969465648855, "grad_norm": 0.12629918283786193, "learning_rate": 4.9512557919910495e-06, "loss": 0.0815, "step": 135780 }, { "epoch": 2.764173027989822, "grad_norm": 0.09622864299582164, "learning_rate": 4.950545258028887e-06, "loss": 0.21, "step": 135790 }, { "epoch": 2.7643765903307886, "grad_norm": 0.5639462117938084, "learning_rate": 4.949834725065527e-06, "loss": 0.0974, "step": 135800 }, { "epoch": 2.7645801526717557, "grad_norm": 7.034258089684197, "learning_rate": 4.949124193115317e-06, "loss": 0.2025, "step": 135810 }, { "epoch": 2.764783715012723, "grad_norm": 15.11229180715376, "learning_rate": 4.948413662192612e-06, "loss": 0.1088, "step": 135820 }, { "epoch": 2.7649872773536894, "grad_norm": 6.8030144254829645, "learning_rate": 4.947703132311759e-06, "loss": 0.1061, "step": 135830 }, { "epoch": 2.7651908396946565, "grad_norm": 14.68558935816055, "learning_rate": 4.946992603487107e-06, "loss": 0.1881, "step": 135840 }, { "epoch": 2.765394402035623, "grad_norm": 10.602072004500634, "learning_rate": 4.946282075733009e-06, "loss": 0.1983, "step": 135850 }, { "epoch": 2.7655979643765902, "grad_norm": 8.234177021026355, "learning_rate": 4.945571549063814e-06, "loss": 0.1119, "step": 135860 }, { "epoch": 2.7658015267175573, "grad_norm": 5.059657807645868, "learning_rate": 4.94486102349387e-06, "loss": 0.0751, "step": 135870 }, { "epoch": 2.766005089058524, "grad_norm": 0.19405922803031006, "learning_rate": 4.944150499037531e-06, "loss": 0.0502, "step": 135880 }, { "epoch": 2.766208651399491, "grad_norm": 8.716537840050481, "learning_rate": 4.9434399757091434e-06, "loss": 0.0534, "step": 135890 }, { "epoch": 2.766412213740458, "grad_norm": 0.2100725913415295, "learning_rate": 4.9427294535230565e-06, "loss": 0.0682, "step": 135900 }, { "epoch": 2.7666157760814247, "grad_norm": 1.8577488013060635, "learning_rate": 4.942018932493626e-06, "loss": 0.1645, "step": 135910 }, { "epoch": 2.766819338422392, "grad_norm": 14.459010516526149, "learning_rate": 4.941308412635195e-06, "loss": 0.1299, "step": 135920 }, { "epoch": 2.767022900763359, "grad_norm": 16.827311700896985, "learning_rate": 4.940597893962117e-06, "loss": 0.0582, "step": 135930 }, { "epoch": 2.7672264631043255, "grad_norm": 4.647144830620561, "learning_rate": 4.939887376488742e-06, "loss": 0.101, "step": 135940 }, { "epoch": 2.7674300254452926, "grad_norm": 0.4856268049567835, "learning_rate": 4.939176860229417e-06, "loss": 0.0632, "step": 135950 }, { "epoch": 2.7676335877862597, "grad_norm": 2.026406620432943, "learning_rate": 4.938466345198495e-06, "loss": 0.0782, "step": 135960 }, { "epoch": 2.7678371501272263, "grad_norm": 13.214323696124998, "learning_rate": 4.937755831410324e-06, "loss": 0.1037, "step": 135970 }, { "epoch": 2.7680407124681934, "grad_norm": 0.2665919967689912, "learning_rate": 4.937045318879253e-06, "loss": 0.0887, "step": 135980 }, { "epoch": 2.7682442748091605, "grad_norm": 2.9727516905716107, "learning_rate": 4.9363348076196345e-06, "loss": 0.1061, "step": 135990 }, { "epoch": 2.768447837150127, "grad_norm": 12.903060875875914, "learning_rate": 4.935624297645815e-06, "loss": 0.2124, "step": 136000 }, { "epoch": 2.768651399491094, "grad_norm": 7.877648510659558, "learning_rate": 4.934913788972147e-06, "loss": 0.0912, "step": 136010 }, { "epoch": 2.7688549618320613, "grad_norm": 40.80505761220325, "learning_rate": 4.93420328161298e-06, "loss": 0.1228, "step": 136020 }, { "epoch": 2.769058524173028, "grad_norm": 7.0295800474093015, "learning_rate": 4.9334927755826604e-06, "loss": 0.0699, "step": 136030 }, { "epoch": 2.769262086513995, "grad_norm": 39.789209955044356, "learning_rate": 4.932782270895541e-06, "loss": 0.0472, "step": 136040 }, { "epoch": 2.769465648854962, "grad_norm": 21.74573448416729, "learning_rate": 4.93207176756597e-06, "loss": 0.2078, "step": 136050 }, { "epoch": 2.7696692111959287, "grad_norm": 0.044693098509202646, "learning_rate": 4.931361265608295e-06, "loss": 0.0847, "step": 136060 }, { "epoch": 2.7698727735368958, "grad_norm": 14.069217900052454, "learning_rate": 4.930650765036871e-06, "loss": 0.1643, "step": 136070 }, { "epoch": 2.770076335877863, "grad_norm": 22.251366226795334, "learning_rate": 4.929940265866042e-06, "loss": 0.1098, "step": 136080 }, { "epoch": 2.7702798982188295, "grad_norm": 8.478945112377355, "learning_rate": 4.9292297681101615e-06, "loss": 0.1693, "step": 136090 }, { "epoch": 2.7704834605597966, "grad_norm": 4.47945437418246, "learning_rate": 4.9285192717835745e-06, "loss": 0.0927, "step": 136100 }, { "epoch": 2.7706870229007636, "grad_norm": 6.140463483651685, "learning_rate": 4.927808776900634e-06, "loss": 0.0562, "step": 136110 }, { "epoch": 2.7708905852417303, "grad_norm": 6.175889928203398, "learning_rate": 4.927098283475689e-06, "loss": 0.203, "step": 136120 }, { "epoch": 2.7710941475826973, "grad_norm": 18.343624193150657, "learning_rate": 4.926387791523087e-06, "loss": 0.1331, "step": 136130 }, { "epoch": 2.771297709923664, "grad_norm": 11.714383239916451, "learning_rate": 4.925677301057178e-06, "loss": 0.219, "step": 136140 }, { "epoch": 2.771501272264631, "grad_norm": 6.869169388850584, "learning_rate": 4.924966812092314e-06, "loss": 0.2122, "step": 136150 }, { "epoch": 2.7717048346055977, "grad_norm": 3.374030582470099, "learning_rate": 4.924256324642839e-06, "loss": 0.0691, "step": 136160 }, { "epoch": 2.7719083969465648, "grad_norm": 8.295108220588501, "learning_rate": 4.923545838723107e-06, "loss": 0.1647, "step": 136170 }, { "epoch": 2.772111959287532, "grad_norm": 0.5253692772071942, "learning_rate": 4.922835354347465e-06, "loss": 0.2175, "step": 136180 }, { "epoch": 2.7723155216284985, "grad_norm": 0.7258213292448189, "learning_rate": 4.922124871530261e-06, "loss": 0.0332, "step": 136190 }, { "epoch": 2.7725190839694656, "grad_norm": 38.64944856507197, "learning_rate": 4.921414390285847e-06, "loss": 0.1055, "step": 136200 }, { "epoch": 2.7727226463104326, "grad_norm": 23.49754701986644, "learning_rate": 4.920703910628569e-06, "loss": 0.1894, "step": 136210 }, { "epoch": 2.7729262086513993, "grad_norm": 15.202856447898515, "learning_rate": 4.919993432572779e-06, "loss": 0.1193, "step": 136220 }, { "epoch": 2.7731297709923663, "grad_norm": 0.5985771618560777, "learning_rate": 4.919282956132825e-06, "loss": 0.0287, "step": 136230 }, { "epoch": 2.7733333333333334, "grad_norm": 12.535268231544023, "learning_rate": 4.918572481323054e-06, "loss": 0.096, "step": 136240 }, { "epoch": 2.7735368956743, "grad_norm": 4.705462076811488, "learning_rate": 4.917862008157819e-06, "loss": 0.1117, "step": 136250 }, { "epoch": 2.773740458015267, "grad_norm": 3.3114455898967683, "learning_rate": 4.917151536651465e-06, "loss": 0.1091, "step": 136260 }, { "epoch": 2.773944020356234, "grad_norm": 0.0812744396495436, "learning_rate": 4.916441066818341e-06, "loss": 0.1267, "step": 136270 }, { "epoch": 2.774147582697201, "grad_norm": 1.6043620793546929, "learning_rate": 4.9157305986728004e-06, "loss": 0.0922, "step": 136280 }, { "epoch": 2.774351145038168, "grad_norm": 16.670803817312663, "learning_rate": 4.915020132229188e-06, "loss": 0.1404, "step": 136290 }, { "epoch": 2.774554707379135, "grad_norm": 8.45755444058874, "learning_rate": 4.914309667501851e-06, "loss": 0.1132, "step": 136300 }, { "epoch": 2.7747582697201016, "grad_norm": 11.703990652544682, "learning_rate": 4.913599204505144e-06, "loss": 0.0838, "step": 136310 }, { "epoch": 2.7749618320610687, "grad_norm": 37.927944984102716, "learning_rate": 4.912888743253411e-06, "loss": 0.1225, "step": 136320 }, { "epoch": 2.775165394402036, "grad_norm": 1.564426476003181, "learning_rate": 4.912178283761003e-06, "loss": 0.1041, "step": 136330 }, { "epoch": 2.7753689567430024, "grad_norm": 8.12623151944921, "learning_rate": 4.911467826042266e-06, "loss": 0.0776, "step": 136340 }, { "epoch": 2.7755725190839695, "grad_norm": 16.017588316103186, "learning_rate": 4.91075737011155e-06, "loss": 0.1823, "step": 136350 }, { "epoch": 2.7757760814249366, "grad_norm": 0.797023462130195, "learning_rate": 4.910046915983205e-06, "loss": 0.1047, "step": 136360 }, { "epoch": 2.7759796437659032, "grad_norm": 15.685178441460906, "learning_rate": 4.909336463671578e-06, "loss": 0.0974, "step": 136370 }, { "epoch": 2.7761832061068703, "grad_norm": 2.3412400113942646, "learning_rate": 4.908626013191017e-06, "loss": 0.1336, "step": 136380 }, { "epoch": 2.7763867684478374, "grad_norm": 17.545165072577166, "learning_rate": 4.907915564555874e-06, "loss": 0.1707, "step": 136390 }, { "epoch": 2.776590330788804, "grad_norm": 47.16641390359558, "learning_rate": 4.907205117780493e-06, "loss": 0.0937, "step": 136400 }, { "epoch": 2.776793893129771, "grad_norm": 0.10975546262168591, "learning_rate": 4.906494672879225e-06, "loss": 0.0222, "step": 136410 }, { "epoch": 2.776997455470738, "grad_norm": 0.14752946406846557, "learning_rate": 4.905784229866416e-06, "loss": 0.0249, "step": 136420 }, { "epoch": 2.777201017811705, "grad_norm": 0.142617583261249, "learning_rate": 4.9050737887564166e-06, "loss": 0.0483, "step": 136430 }, { "epoch": 2.777404580152672, "grad_norm": 1.5002198806343292, "learning_rate": 4.904363349563575e-06, "loss": 0.1653, "step": 136440 }, { "epoch": 2.7776081424936385, "grad_norm": 0.2445325031279849, "learning_rate": 4.903652912302237e-06, "loss": 0.1025, "step": 136450 }, { "epoch": 2.7778117048346056, "grad_norm": 39.41037228201419, "learning_rate": 4.902942476986756e-06, "loss": 0.1853, "step": 136460 }, { "epoch": 2.7780152671755727, "grad_norm": 11.4391836032499, "learning_rate": 4.902232043631473e-06, "loss": 0.1451, "step": 136470 }, { "epoch": 2.7782188295165393, "grad_norm": 0.07932591503886961, "learning_rate": 4.901521612250741e-06, "loss": 0.0942, "step": 136480 }, { "epoch": 2.7784223918575064, "grad_norm": 0.052959516809630625, "learning_rate": 4.900811182858908e-06, "loss": 0.207, "step": 136490 }, { "epoch": 2.778625954198473, "grad_norm": 0.9174896350081025, "learning_rate": 4.900100755470319e-06, "loss": 0.1505, "step": 136500 }, { "epoch": 2.77882951653944, "grad_norm": 0.06059411313071154, "learning_rate": 4.899390330099324e-06, "loss": 0.0305, "step": 136510 }, { "epoch": 2.779033078880407, "grad_norm": 0.1225944476444689, "learning_rate": 4.898679906760273e-06, "loss": 0.0718, "step": 136520 }, { "epoch": 2.779236641221374, "grad_norm": 15.126227239118053, "learning_rate": 4.897969485467509e-06, "loss": 0.1913, "step": 136530 }, { "epoch": 2.779440203562341, "grad_norm": 0.6156008624026666, "learning_rate": 4.8972590662353854e-06, "loss": 0.1211, "step": 136540 }, { "epoch": 2.779643765903308, "grad_norm": 4.505867950553813, "learning_rate": 4.896548649078246e-06, "loss": 0.201, "step": 136550 }, { "epoch": 2.7798473282442746, "grad_norm": 11.131679377766408, "learning_rate": 4.895838234010439e-06, "loss": 0.1273, "step": 136560 }, { "epoch": 2.7800508905852417, "grad_norm": 8.876334209921756, "learning_rate": 4.895127821046315e-06, "loss": 0.0603, "step": 136570 }, { "epoch": 2.7802544529262088, "grad_norm": 21.41230129861295, "learning_rate": 4.894417410200219e-06, "loss": 0.2474, "step": 136580 }, { "epoch": 2.7804580152671754, "grad_norm": 0.8922662433232017, "learning_rate": 4.8937070014864965e-06, "loss": 0.1854, "step": 136590 }, { "epoch": 2.7806615776081425, "grad_norm": 0.1432847026823368, "learning_rate": 4.892996594919503e-06, "loss": 0.0627, "step": 136600 }, { "epoch": 2.7808651399491096, "grad_norm": 0.0861676541943466, "learning_rate": 4.892286190513578e-06, "loss": 0.0495, "step": 136610 }, { "epoch": 2.781068702290076, "grad_norm": 3.7452314844654864, "learning_rate": 4.891575788283073e-06, "loss": 0.0863, "step": 136620 }, { "epoch": 2.7812722646310433, "grad_norm": 8.68124516201214, "learning_rate": 4.890865388242335e-06, "loss": 0.1912, "step": 136630 }, { "epoch": 2.7814758269720103, "grad_norm": 10.790194715600384, "learning_rate": 4.890154990405709e-06, "loss": 0.1532, "step": 136640 }, { "epoch": 2.781679389312977, "grad_norm": 0.1864199289907295, "learning_rate": 4.889444594787547e-06, "loss": 0.2081, "step": 136650 }, { "epoch": 2.781882951653944, "grad_norm": 14.20842188327166, "learning_rate": 4.888734201402192e-06, "loss": 0.1509, "step": 136660 }, { "epoch": 2.782086513994911, "grad_norm": 19.99363673838848, "learning_rate": 4.8880238102639924e-06, "loss": 0.1346, "step": 136670 }, { "epoch": 2.7822900763358778, "grad_norm": 10.701384107390368, "learning_rate": 4.887313421387299e-06, "loss": 0.0864, "step": 136680 }, { "epoch": 2.782493638676845, "grad_norm": 3.747099559612486, "learning_rate": 4.8866030347864546e-06, "loss": 0.1225, "step": 136690 }, { "epoch": 2.782697201017812, "grad_norm": 0.5909097814174525, "learning_rate": 4.885892650475809e-06, "loss": 0.0905, "step": 136700 }, { "epoch": 2.7829007633587786, "grad_norm": 58.05687744353679, "learning_rate": 4.885182268469707e-06, "loss": 0.146, "step": 136710 }, { "epoch": 2.7831043256997456, "grad_norm": 4.2402005461413745, "learning_rate": 4.884471888782497e-06, "loss": 0.0888, "step": 136720 }, { "epoch": 2.7833078880407127, "grad_norm": 6.634818581079851, "learning_rate": 4.883761511428527e-06, "loss": 0.1376, "step": 136730 }, { "epoch": 2.7835114503816794, "grad_norm": 8.704432154203351, "learning_rate": 4.883051136422141e-06, "loss": 0.1342, "step": 136740 }, { "epoch": 2.7837150127226464, "grad_norm": 1.5307162367120348, "learning_rate": 4.882340763777689e-06, "loss": 0.1412, "step": 136750 }, { "epoch": 2.783918575063613, "grad_norm": 3.7600875193421177, "learning_rate": 4.881630393509518e-06, "loss": 0.1691, "step": 136760 }, { "epoch": 2.78412213740458, "grad_norm": 0.156049356019548, "learning_rate": 4.880920025631973e-06, "loss": 0.1044, "step": 136770 }, { "epoch": 2.7843256997455472, "grad_norm": 2.7636906671196133, "learning_rate": 4.8802096601594025e-06, "loss": 0.1076, "step": 136780 }, { "epoch": 2.784529262086514, "grad_norm": 47.56781856858634, "learning_rate": 4.879499297106149e-06, "loss": 0.0809, "step": 136790 }, { "epoch": 2.784732824427481, "grad_norm": 7.083249839485615, "learning_rate": 4.878788936486565e-06, "loss": 0.2197, "step": 136800 }, { "epoch": 2.7849363867684476, "grad_norm": 6.153811225463554, "learning_rate": 4.8780785783149946e-06, "loss": 0.1713, "step": 136810 }, { "epoch": 2.7851399491094146, "grad_norm": 18.528674969430053, "learning_rate": 4.877368222605782e-06, "loss": 0.1729, "step": 136820 }, { "epoch": 2.7853435114503817, "grad_norm": 0.0984848056509784, "learning_rate": 4.876657869373279e-06, "loss": 0.0915, "step": 136830 }, { "epoch": 2.7855470737913484, "grad_norm": 53.14991256296323, "learning_rate": 4.875947518631828e-06, "loss": 0.1734, "step": 136840 }, { "epoch": 2.7857506361323154, "grad_norm": 19.630777108787836, "learning_rate": 4.875237170395775e-06, "loss": 0.1191, "step": 136850 }, { "epoch": 2.7859541984732825, "grad_norm": 4.927625487652619, "learning_rate": 4.874526824679471e-06, "loss": 0.0905, "step": 136860 }, { "epoch": 2.786157760814249, "grad_norm": 0.19993223410297767, "learning_rate": 4.873816481497258e-06, "loss": 0.1024, "step": 136870 }, { "epoch": 2.7863613231552162, "grad_norm": 29.936543455226982, "learning_rate": 4.873106140863482e-06, "loss": 0.1118, "step": 136880 }, { "epoch": 2.7865648854961833, "grad_norm": 0.04137168490615125, "learning_rate": 4.872395802792495e-06, "loss": 0.1667, "step": 136890 }, { "epoch": 2.78676844783715, "grad_norm": 39.0334402497207, "learning_rate": 4.871685467298635e-06, "loss": 0.1858, "step": 136900 }, { "epoch": 2.786972010178117, "grad_norm": 0.07504807870440022, "learning_rate": 4.870975134396255e-06, "loss": 0.0732, "step": 136910 }, { "epoch": 2.787175572519084, "grad_norm": 0.15697097948859567, "learning_rate": 4.870264804099698e-06, "loss": 0.0604, "step": 136920 }, { "epoch": 2.7873791348600507, "grad_norm": 0.2013996594636231, "learning_rate": 4.8695544764233085e-06, "loss": 0.0336, "step": 136930 }, { "epoch": 2.787582697201018, "grad_norm": 0.1977810973987225, "learning_rate": 4.868844151381436e-06, "loss": 0.1598, "step": 136940 }, { "epoch": 2.787786259541985, "grad_norm": 0.11014872423631109, "learning_rate": 4.868133828988424e-06, "loss": 0.1229, "step": 136950 }, { "epoch": 2.7879898218829515, "grad_norm": 5.118828584069297, "learning_rate": 4.8674235092586185e-06, "loss": 0.0869, "step": 136960 }, { "epoch": 2.7881933842239186, "grad_norm": 0.6052678945016872, "learning_rate": 4.8667131922063685e-06, "loss": 0.0686, "step": 136970 }, { "epoch": 2.7883969465648857, "grad_norm": 35.3693376497421, "learning_rate": 4.866002877846015e-06, "loss": 0.2082, "step": 136980 }, { "epoch": 2.7886005089058523, "grad_norm": 0.1379000618319988, "learning_rate": 4.865292566191907e-06, "loss": 0.0812, "step": 136990 }, { "epoch": 2.7888040712468194, "grad_norm": 0.09572182985260613, "learning_rate": 4.8645822572583886e-06, "loss": 0.1687, "step": 137000 }, { "epoch": 2.7890076335877865, "grad_norm": 8.099273542156734, "learning_rate": 4.863871951059806e-06, "loss": 0.0919, "step": 137010 }, { "epoch": 2.789211195928753, "grad_norm": 0.04083776353236704, "learning_rate": 4.8631616476105055e-06, "loss": 0.0705, "step": 137020 }, { "epoch": 2.78941475826972, "grad_norm": 4.271616106859581, "learning_rate": 4.862451346924831e-06, "loss": 0.1047, "step": 137030 }, { "epoch": 2.7896183206106873, "grad_norm": 8.888531574180396, "learning_rate": 4.8617410490171275e-06, "loss": 0.1481, "step": 137040 }, { "epoch": 2.789821882951654, "grad_norm": 27.39772599361911, "learning_rate": 4.861030753901744e-06, "loss": 0.0591, "step": 137050 }, { "epoch": 2.790025445292621, "grad_norm": 23.402982791667696, "learning_rate": 4.8603204615930225e-06, "loss": 0.2913, "step": 137060 }, { "epoch": 2.790229007633588, "grad_norm": 24.424580382237966, "learning_rate": 4.85961017210531e-06, "loss": 0.0649, "step": 137070 }, { "epoch": 2.7904325699745547, "grad_norm": 0.26899530271600686, "learning_rate": 4.858899885452949e-06, "loss": 0.1376, "step": 137080 }, { "epoch": 2.7906361323155218, "grad_norm": 0.6470153327873112, "learning_rate": 4.858189601650287e-06, "loss": 0.1321, "step": 137090 }, { "epoch": 2.7908396946564884, "grad_norm": 0.19891850276227105, "learning_rate": 4.85747932071167e-06, "loss": 0.1122, "step": 137100 }, { "epoch": 2.7910432569974555, "grad_norm": 0.4173418095107521, "learning_rate": 4.8567690426514394e-06, "loss": 0.1059, "step": 137110 }, { "epoch": 2.7912468193384226, "grad_norm": 0.03439406051818531, "learning_rate": 4.856058767483945e-06, "loss": 0.0928, "step": 137120 }, { "epoch": 2.791450381679389, "grad_norm": 0.0058338350267184615, "learning_rate": 4.855348495223528e-06, "loss": 0.1244, "step": 137130 }, { "epoch": 2.7916539440203563, "grad_norm": 4.981040356394116, "learning_rate": 4.854638225884533e-06, "loss": 0.0885, "step": 137140 }, { "epoch": 2.791857506361323, "grad_norm": 0.22066640161785803, "learning_rate": 4.85392795948131e-06, "loss": 0.1399, "step": 137150 }, { "epoch": 2.79206106870229, "grad_norm": 14.446672473110128, "learning_rate": 4.853217696028197e-06, "loss": 0.1214, "step": 137160 }, { "epoch": 2.792264631043257, "grad_norm": 0.05255124565500357, "learning_rate": 4.852507435539542e-06, "loss": 0.1057, "step": 137170 }, { "epoch": 2.7924681933842237, "grad_norm": 15.094351633402795, "learning_rate": 4.851797178029691e-06, "loss": 0.1145, "step": 137180 }, { "epoch": 2.7926717557251908, "grad_norm": 19.540965892162266, "learning_rate": 4.8510869235129845e-06, "loss": 0.1818, "step": 137190 }, { "epoch": 2.792875318066158, "grad_norm": 39.42236072483225, "learning_rate": 4.850376672003771e-06, "loss": 0.0672, "step": 137200 }, { "epoch": 2.7930788804071245, "grad_norm": 4.650620216981758, "learning_rate": 4.849666423516393e-06, "loss": 0.0948, "step": 137210 }, { "epoch": 2.7932824427480916, "grad_norm": 0.04951147829467395, "learning_rate": 4.848956178065193e-06, "loss": 0.0908, "step": 137220 }, { "epoch": 2.7934860050890586, "grad_norm": 2.974457979430444, "learning_rate": 4.848245935664521e-06, "loss": 0.0891, "step": 137230 }, { "epoch": 2.7936895674300253, "grad_norm": 0.014550764305256902, "learning_rate": 4.847535696328716e-06, "loss": 0.1308, "step": 137240 }, { "epoch": 2.7938931297709924, "grad_norm": 9.907069170207878, "learning_rate": 4.8468254600721235e-06, "loss": 0.1183, "step": 137250 }, { "epoch": 2.7940966921119594, "grad_norm": 10.96628771853719, "learning_rate": 4.84611522690909e-06, "loss": 0.0929, "step": 137260 }, { "epoch": 2.794300254452926, "grad_norm": 2.3754572872454607, "learning_rate": 4.845404996853956e-06, "loss": 0.079, "step": 137270 }, { "epoch": 2.794503816793893, "grad_norm": 9.177751818199466, "learning_rate": 4.844694769921069e-06, "loss": 0.1598, "step": 137280 }, { "epoch": 2.7947073791348602, "grad_norm": 0.3111012693414795, "learning_rate": 4.843984546124769e-06, "loss": 0.0276, "step": 137290 }, { "epoch": 2.794910941475827, "grad_norm": 10.121407424509448, "learning_rate": 4.843274325479403e-06, "loss": 0.0516, "step": 137300 }, { "epoch": 2.795114503816794, "grad_norm": 6.598722860919287, "learning_rate": 4.842564107999315e-06, "loss": 0.0882, "step": 137310 }, { "epoch": 2.795318066157761, "grad_norm": 0.12160111025803667, "learning_rate": 4.841853893698846e-06, "loss": 0.0906, "step": 137320 }, { "epoch": 2.7955216284987277, "grad_norm": 0.3398770984696691, "learning_rate": 4.841143682592341e-06, "loss": 0.0562, "step": 137330 }, { "epoch": 2.7957251908396947, "grad_norm": 0.420010670067898, "learning_rate": 4.8404334746941456e-06, "loss": 0.1691, "step": 137340 }, { "epoch": 2.795928753180662, "grad_norm": 7.02902456172796, "learning_rate": 4.8397232700186e-06, "loss": 0.0984, "step": 137350 }, { "epoch": 2.7961323155216284, "grad_norm": 10.079159560560004, "learning_rate": 4.839013068580052e-06, "loss": 0.1641, "step": 137360 }, { "epoch": 2.7963358778625955, "grad_norm": 3.32490730156718, "learning_rate": 4.838302870392839e-06, "loss": 0.0761, "step": 137370 }, { "epoch": 2.7965394402035626, "grad_norm": 0.12110762169122555, "learning_rate": 4.83759267547131e-06, "loss": 0.1448, "step": 137380 }, { "epoch": 2.7967430025445292, "grad_norm": 3.613964097395725, "learning_rate": 4.836882483829806e-06, "loss": 0.1229, "step": 137390 }, { "epoch": 2.7969465648854963, "grad_norm": 7.408706851623126, "learning_rate": 4.836172295482669e-06, "loss": 0.0514, "step": 137400 }, { "epoch": 2.797150127226463, "grad_norm": 0.3536660886347796, "learning_rate": 4.835462110444244e-06, "loss": 0.1438, "step": 137410 }, { "epoch": 2.79735368956743, "grad_norm": 5.477168645843108, "learning_rate": 4.834751928728875e-06, "loss": 0.1315, "step": 137420 }, { "epoch": 2.797557251908397, "grad_norm": 5.984972480149259, "learning_rate": 4.834041750350901e-06, "loss": 0.1537, "step": 137430 }, { "epoch": 2.7977608142493637, "grad_norm": 0.2672703707219554, "learning_rate": 4.833331575324672e-06, "loss": 0.0586, "step": 137440 }, { "epoch": 2.797964376590331, "grad_norm": 50.979845823643245, "learning_rate": 4.832621403664523e-06, "loss": 0.2977, "step": 137450 }, { "epoch": 2.7981679389312974, "grad_norm": 8.182861656819854, "learning_rate": 4.8319112353848e-06, "loss": 0.1649, "step": 137460 }, { "epoch": 2.7983715012722645, "grad_norm": 4.684729315473752, "learning_rate": 4.831201070499849e-06, "loss": 0.0327, "step": 137470 }, { "epoch": 2.7985750636132316, "grad_norm": 0.08084627257916557, "learning_rate": 4.830490909024006e-06, "loss": 0.1443, "step": 137480 }, { "epoch": 2.7987786259541982, "grad_norm": 0.013058155189754036, "learning_rate": 4.82978075097162e-06, "loss": 0.1501, "step": 137490 }, { "epoch": 2.7989821882951653, "grad_norm": 0.294960200564523, "learning_rate": 4.829070596357031e-06, "loss": 0.0912, "step": 137500 }, { "epoch": 2.7991857506361324, "grad_norm": 0.030524905704005647, "learning_rate": 4.828360445194579e-06, "loss": 0.0358, "step": 137510 }, { "epoch": 2.799389312977099, "grad_norm": 0.3419800659763526, "learning_rate": 4.827650297498612e-06, "loss": 0.1739, "step": 137520 }, { "epoch": 2.799592875318066, "grad_norm": 0.9448129276862391, "learning_rate": 4.826940153283468e-06, "loss": 0.078, "step": 137530 }, { "epoch": 2.799796437659033, "grad_norm": 0.008465159019064396, "learning_rate": 4.826230012563489e-06, "loss": 0.0743, "step": 137540 }, { "epoch": 2.8, "grad_norm": 5.739246989846811, "learning_rate": 4.825519875353021e-06, "loss": 0.175, "step": 137550 }, { "epoch": 2.800203562340967, "grad_norm": 1.5331594660975805, "learning_rate": 4.824809741666403e-06, "loss": 0.049, "step": 137560 }, { "epoch": 2.800407124681934, "grad_norm": 0.5490260436567154, "learning_rate": 4.824099611517979e-06, "loss": 0.1174, "step": 137570 }, { "epoch": 2.8006106870229006, "grad_norm": 0.07402460821820747, "learning_rate": 4.823389484922088e-06, "loss": 0.1756, "step": 137580 }, { "epoch": 2.8008142493638677, "grad_norm": 0.2490611396429894, "learning_rate": 4.822679361893074e-06, "loss": 0.1514, "step": 137590 }, { "epoch": 2.8010178117048348, "grad_norm": 7.941333233521782, "learning_rate": 4.82196924244528e-06, "loss": 0.1006, "step": 137600 }, { "epoch": 2.8012213740458014, "grad_norm": 6.157885139472143, "learning_rate": 4.821259126593046e-06, "loss": 0.139, "step": 137610 }, { "epoch": 2.8014249363867685, "grad_norm": 0.0672127795769402, "learning_rate": 4.820549014350713e-06, "loss": 0.1081, "step": 137620 }, { "epoch": 2.8016284987277356, "grad_norm": 0.12081009034799037, "learning_rate": 4.819838905732626e-06, "loss": 0.046, "step": 137630 }, { "epoch": 2.801832061068702, "grad_norm": 19.226582959484833, "learning_rate": 4.819128800753122e-06, "loss": 0.0891, "step": 137640 }, { "epoch": 2.8020356234096693, "grad_norm": 0.09683245915521285, "learning_rate": 4.818418699426548e-06, "loss": 0.0133, "step": 137650 }, { "epoch": 2.8022391857506364, "grad_norm": 8.742411271544439, "learning_rate": 4.817708601767238e-06, "loss": 0.1009, "step": 137660 }, { "epoch": 2.802442748091603, "grad_norm": 2.5760272885838726, "learning_rate": 4.81699850778954e-06, "loss": 0.1161, "step": 137670 }, { "epoch": 2.80264631043257, "grad_norm": 0.05342187121237975, "learning_rate": 4.8162884175077935e-06, "loss": 0.0862, "step": 137680 }, { "epoch": 2.802849872773537, "grad_norm": 0.0682728861913036, "learning_rate": 4.8155783309363365e-06, "loss": 0.1682, "step": 137690 }, { "epoch": 2.8030534351145038, "grad_norm": 9.308256971349005, "learning_rate": 4.814868248089514e-06, "loss": 0.1962, "step": 137700 }, { "epoch": 2.803256997455471, "grad_norm": 19.184401315689637, "learning_rate": 4.814158168981667e-06, "loss": 0.1879, "step": 137710 }, { "epoch": 2.803460559796438, "grad_norm": 15.942117110706127, "learning_rate": 4.813448093627134e-06, "loss": 0.1049, "step": 137720 }, { "epoch": 2.8036641221374046, "grad_norm": 42.98774134428246, "learning_rate": 4.812738022040258e-06, "loss": 0.0823, "step": 137730 }, { "epoch": 2.8038676844783716, "grad_norm": 11.647011516774727, "learning_rate": 4.812027954235376e-06, "loss": 0.1223, "step": 137740 }, { "epoch": 2.8040712468193383, "grad_norm": 0.3408501415518622, "learning_rate": 4.811317890226834e-06, "loss": 0.1299, "step": 137750 }, { "epoch": 2.8042748091603054, "grad_norm": 27.398904897529235, "learning_rate": 4.81060783002897e-06, "loss": 0.1287, "step": 137760 }, { "epoch": 2.804478371501272, "grad_norm": 8.86396374516557, "learning_rate": 4.809897773656123e-06, "loss": 0.0647, "step": 137770 }, { "epoch": 2.804681933842239, "grad_norm": 12.330435065187432, "learning_rate": 4.809187721122637e-06, "loss": 0.1746, "step": 137780 }, { "epoch": 2.804885496183206, "grad_norm": 4.534348202537017, "learning_rate": 4.808477672442853e-06, "loss": 0.1452, "step": 137790 }, { "epoch": 2.805089058524173, "grad_norm": 0.4425111491917134, "learning_rate": 4.807767627631104e-06, "loss": 0.1208, "step": 137800 }, { "epoch": 2.80529262086514, "grad_norm": 8.061112640204213, "learning_rate": 4.807057586701739e-06, "loss": 0.1646, "step": 137810 }, { "epoch": 2.805496183206107, "grad_norm": 0.16931414095951244, "learning_rate": 4.806347549669094e-06, "loss": 0.2336, "step": 137820 }, { "epoch": 2.8056997455470736, "grad_norm": 0.09818581310691947, "learning_rate": 4.805637516547508e-06, "loss": 0.0279, "step": 137830 }, { "epoch": 2.8059033078880407, "grad_norm": 4.005932001507827, "learning_rate": 4.8049274873513245e-06, "loss": 0.3029, "step": 137840 }, { "epoch": 2.8061068702290077, "grad_norm": 17.092110880541167, "learning_rate": 4.804217462094881e-06, "loss": 0.0973, "step": 137850 }, { "epoch": 2.8063104325699744, "grad_norm": 25.669557709731855, "learning_rate": 4.8035074407925185e-06, "loss": 0.0788, "step": 137860 }, { "epoch": 2.8065139949109414, "grad_norm": 10.027261694622858, "learning_rate": 4.8027974234585765e-06, "loss": 0.1476, "step": 137870 }, { "epoch": 2.8067175572519085, "grad_norm": 0.7368068439454137, "learning_rate": 4.802087410107392e-06, "loss": 0.0145, "step": 137880 }, { "epoch": 2.806921119592875, "grad_norm": 4.746974181991679, "learning_rate": 4.80137740075331e-06, "loss": 0.1443, "step": 137890 }, { "epoch": 2.8071246819338422, "grad_norm": 1.1932965361753851, "learning_rate": 4.800667395410666e-06, "loss": 0.0604, "step": 137900 }, { "epoch": 2.8073282442748093, "grad_norm": 2.5627390605020097, "learning_rate": 4.7999573940938e-06, "loss": 0.1245, "step": 137910 }, { "epoch": 2.807531806615776, "grad_norm": 23.629477101836546, "learning_rate": 4.799247396817054e-06, "loss": 0.1505, "step": 137920 }, { "epoch": 2.807735368956743, "grad_norm": 37.46484073541956, "learning_rate": 4.798537403594765e-06, "loss": 0.1949, "step": 137930 }, { "epoch": 2.80793893129771, "grad_norm": 0.14195455563837478, "learning_rate": 4.7978274144412726e-06, "loss": 0.1743, "step": 137940 }, { "epoch": 2.8081424936386767, "grad_norm": 0.2297761917711523, "learning_rate": 4.797117429370914e-06, "loss": 0.1631, "step": 137950 }, { "epoch": 2.808346055979644, "grad_norm": 0.08806331609842354, "learning_rate": 4.796407448398031e-06, "loss": 0.117, "step": 137960 }, { "epoch": 2.808549618320611, "grad_norm": 27.845369689401032, "learning_rate": 4.795697471536963e-06, "loss": 0.1442, "step": 137970 }, { "epoch": 2.8087531806615775, "grad_norm": 3.824975958637675, "learning_rate": 4.794987498802047e-06, "loss": 0.1249, "step": 137980 }, { "epoch": 2.8089567430025446, "grad_norm": 6.325741083473022, "learning_rate": 4.794277530207623e-06, "loss": 0.0989, "step": 137990 }, { "epoch": 2.8091603053435117, "grad_norm": 2.212853129389676, "learning_rate": 4.7935675657680295e-06, "loss": 0.1103, "step": 138000 }, { "epoch": 2.8093638676844783, "grad_norm": 0.9740086494083778, "learning_rate": 4.792857605497604e-06, "loss": 0.1074, "step": 138010 }, { "epoch": 2.8095674300254454, "grad_norm": 5.741458736601802, "learning_rate": 4.792147649410687e-06, "loss": 0.1867, "step": 138020 }, { "epoch": 2.8097709923664125, "grad_norm": 8.54506169739078, "learning_rate": 4.791437697521613e-06, "loss": 0.0619, "step": 138030 }, { "epoch": 2.809974554707379, "grad_norm": 22.057158730440097, "learning_rate": 4.790727749844726e-06, "loss": 0.1952, "step": 138040 }, { "epoch": 2.810178117048346, "grad_norm": 8.921090277722758, "learning_rate": 4.790017806394362e-06, "loss": 0.1399, "step": 138050 }, { "epoch": 2.810381679389313, "grad_norm": 12.516515975021441, "learning_rate": 4.789307867184857e-06, "loss": 0.1759, "step": 138060 }, { "epoch": 2.81058524173028, "grad_norm": 5.548144144499634, "learning_rate": 4.788597932230552e-06, "loss": 0.0983, "step": 138070 }, { "epoch": 2.810788804071247, "grad_norm": 0.078119633844161, "learning_rate": 4.787888001545784e-06, "loss": 0.1058, "step": 138080 }, { "epoch": 2.8109923664122136, "grad_norm": 0.23615681004211822, "learning_rate": 4.787178075144891e-06, "loss": 0.0323, "step": 138090 }, { "epoch": 2.8111959287531807, "grad_norm": 22.10004640365243, "learning_rate": 4.786468153042212e-06, "loss": 0.1384, "step": 138100 }, { "epoch": 2.8113994910941473, "grad_norm": 0.6242273430632036, "learning_rate": 4.785758235252082e-06, "loss": 0.1847, "step": 138110 }, { "epoch": 2.8116030534351144, "grad_norm": 0.1181067496331343, "learning_rate": 4.7850483217888394e-06, "loss": 0.0477, "step": 138120 }, { "epoch": 2.8118066157760815, "grad_norm": 11.768854822396447, "learning_rate": 4.784338412666827e-06, "loss": 0.1302, "step": 138130 }, { "epoch": 2.812010178117048, "grad_norm": 1.2590196881779292, "learning_rate": 4.783628507900374e-06, "loss": 0.1234, "step": 138140 }, { "epoch": 2.812213740458015, "grad_norm": 18.958729744194354, "learning_rate": 4.7829186075038235e-06, "loss": 0.0885, "step": 138150 }, { "epoch": 2.8124173027989823, "grad_norm": 5.248630487750042, "learning_rate": 4.782208711491512e-06, "loss": 0.1732, "step": 138160 }, { "epoch": 2.812620865139949, "grad_norm": 0.1541601198179025, "learning_rate": 4.781498819877774e-06, "loss": 0.1612, "step": 138170 }, { "epoch": 2.812824427480916, "grad_norm": 6.486872173865201, "learning_rate": 4.780788932676951e-06, "loss": 0.1142, "step": 138180 }, { "epoch": 2.813027989821883, "grad_norm": 0.2315875022352248, "learning_rate": 4.780079049903377e-06, "loss": 0.119, "step": 138190 }, { "epoch": 2.8132315521628497, "grad_norm": 1.5274079377316516, "learning_rate": 4.7793691715713885e-06, "loss": 0.0951, "step": 138200 }, { "epoch": 2.813435114503817, "grad_norm": 4.090690563105242, "learning_rate": 4.7786592976953265e-06, "loss": 0.1381, "step": 138210 }, { "epoch": 2.813638676844784, "grad_norm": 8.112463966305585, "learning_rate": 4.777949428289522e-06, "loss": 0.0991, "step": 138220 }, { "epoch": 2.8138422391857505, "grad_norm": 0.28398929703045844, "learning_rate": 4.777239563368318e-06, "loss": 0.0509, "step": 138230 }, { "epoch": 2.8140458015267176, "grad_norm": 33.66448282145478, "learning_rate": 4.7765297029460455e-06, "loss": 0.1598, "step": 138240 }, { "epoch": 2.8142493638676847, "grad_norm": 5.527120086656669, "learning_rate": 4.775819847037044e-06, "loss": 0.1222, "step": 138250 }, { "epoch": 2.8144529262086513, "grad_norm": 7.075175417371969, "learning_rate": 4.775109995655651e-06, "loss": 0.1423, "step": 138260 }, { "epoch": 2.8146564885496184, "grad_norm": 0.04152457434608265, "learning_rate": 4.774400148816199e-06, "loss": 0.1087, "step": 138270 }, { "epoch": 2.8148600508905854, "grad_norm": 0.11967594478735817, "learning_rate": 4.773690306533027e-06, "loss": 0.1455, "step": 138280 }, { "epoch": 2.815063613231552, "grad_norm": 0.9349844531391454, "learning_rate": 4.772980468820472e-06, "loss": 0.1328, "step": 138290 }, { "epoch": 2.815267175572519, "grad_norm": 0.09795451674194709, "learning_rate": 4.772270635692868e-06, "loss": 0.0752, "step": 138300 }, { "epoch": 2.8154707379134862, "grad_norm": 3.2985015963328257, "learning_rate": 4.771560807164554e-06, "loss": 0.0798, "step": 138310 }, { "epoch": 2.815674300254453, "grad_norm": 27.451038646387072, "learning_rate": 4.77085098324986e-06, "loss": 0.1545, "step": 138320 }, { "epoch": 2.81587786259542, "grad_norm": 11.516517077508674, "learning_rate": 4.770141163963127e-06, "loss": 0.0826, "step": 138330 }, { "epoch": 2.816081424936387, "grad_norm": 18.393065296591057, "learning_rate": 4.769431349318691e-06, "loss": 0.1523, "step": 138340 }, { "epoch": 2.8162849872773537, "grad_norm": 9.652245472082182, "learning_rate": 4.7687215393308825e-06, "loss": 0.0995, "step": 138350 }, { "epoch": 2.8164885496183207, "grad_norm": 8.049711106345907, "learning_rate": 4.7680117340140425e-06, "loss": 0.1443, "step": 138360 }, { "epoch": 2.8166921119592874, "grad_norm": 0.9794258976085926, "learning_rate": 4.767301933382505e-06, "loss": 0.1174, "step": 138370 }, { "epoch": 2.8168956743002544, "grad_norm": 22.724905464675228, "learning_rate": 4.766592137450602e-06, "loss": 0.0317, "step": 138380 }, { "epoch": 2.8170992366412215, "grad_norm": 15.468795585998334, "learning_rate": 4.7658823462326745e-06, "loss": 0.1875, "step": 138390 }, { "epoch": 2.817302798982188, "grad_norm": 19.270924459877502, "learning_rate": 4.7651725597430535e-06, "loss": 0.0991, "step": 138400 }, { "epoch": 2.8175063613231552, "grad_norm": 3.2905606366521827, "learning_rate": 4.764462777996073e-06, "loss": 0.1574, "step": 138410 }, { "epoch": 2.817709923664122, "grad_norm": 11.245952074630905, "learning_rate": 4.763753001006074e-06, "loss": 0.1223, "step": 138420 }, { "epoch": 2.817913486005089, "grad_norm": 6.094922027155046, "learning_rate": 4.7630432287873845e-06, "loss": 0.0942, "step": 138430 }, { "epoch": 2.818117048346056, "grad_norm": 5.140890252071992, "learning_rate": 4.762333461354343e-06, "loss": 0.1562, "step": 138440 }, { "epoch": 2.8183206106870227, "grad_norm": 7.915496730981134, "learning_rate": 4.761623698721285e-06, "loss": 0.1911, "step": 138450 }, { "epoch": 2.8185241730279897, "grad_norm": 15.208999735707422, "learning_rate": 4.7609139409025405e-06, "loss": 0.1111, "step": 138460 }, { "epoch": 2.818727735368957, "grad_norm": 24.52536569027991, "learning_rate": 4.760204187912449e-06, "loss": 0.1405, "step": 138470 }, { "epoch": 2.8189312977099235, "grad_norm": 9.182876922176266, "learning_rate": 4.759494439765342e-06, "loss": 0.0882, "step": 138480 }, { "epoch": 2.8191348600508905, "grad_norm": 0.17645209512289867, "learning_rate": 4.758784696475553e-06, "loss": 0.09, "step": 138490 }, { "epoch": 2.8193384223918576, "grad_norm": 0.07473629421713686, "learning_rate": 4.758074958057421e-06, "loss": 0.0903, "step": 138500 }, { "epoch": 2.8195419847328242, "grad_norm": 22.805295718806853, "learning_rate": 4.757365224525274e-06, "loss": 0.063, "step": 138510 }, { "epoch": 2.8197455470737913, "grad_norm": 0.11694518067320527, "learning_rate": 4.756655495893449e-06, "loss": 0.0748, "step": 138520 }, { "epoch": 2.8199491094147584, "grad_norm": 0.17907439012044454, "learning_rate": 4.755945772176282e-06, "loss": 0.1665, "step": 138530 }, { "epoch": 2.820152671755725, "grad_norm": 16.96700262438749, "learning_rate": 4.755236053388103e-06, "loss": 0.1965, "step": 138540 }, { "epoch": 2.820356234096692, "grad_norm": 9.04432697041491, "learning_rate": 4.754526339543248e-06, "loss": 0.1764, "step": 138550 }, { "epoch": 2.820559796437659, "grad_norm": 8.130593122742578, "learning_rate": 4.753816630656049e-06, "loss": 0.1779, "step": 138560 }, { "epoch": 2.820763358778626, "grad_norm": 1.0335195669247061, "learning_rate": 4.753106926740839e-06, "loss": 0.1184, "step": 138570 }, { "epoch": 2.820966921119593, "grad_norm": 8.154059297494241, "learning_rate": 4.752397227811955e-06, "loss": 0.1315, "step": 138580 }, { "epoch": 2.82117048346056, "grad_norm": 3.8844306253255576, "learning_rate": 4.751687533883727e-06, "loss": 0.1072, "step": 138590 }, { "epoch": 2.8213740458015266, "grad_norm": 25.157514618830728, "learning_rate": 4.750977844970489e-06, "loss": 0.1429, "step": 138600 }, { "epoch": 2.8215776081424937, "grad_norm": 0.7155490083628673, "learning_rate": 4.750268161086573e-06, "loss": 0.0348, "step": 138610 }, { "epoch": 2.8217811704834608, "grad_norm": 5.297769844369659, "learning_rate": 4.749558482246314e-06, "loss": 0.1615, "step": 138620 }, { "epoch": 2.8219847328244274, "grad_norm": 1.0304391477500998, "learning_rate": 4.748848808464045e-06, "loss": 0.1751, "step": 138630 }, { "epoch": 2.8221882951653945, "grad_norm": 10.003636669303628, "learning_rate": 4.748139139754096e-06, "loss": 0.1378, "step": 138640 }, { "epoch": 2.8223918575063616, "grad_norm": 2.413466523938293, "learning_rate": 4.7474294761308025e-06, "loss": 0.0681, "step": 138650 }, { "epoch": 2.822595419847328, "grad_norm": 0.6065680297818675, "learning_rate": 4.746719817608496e-06, "loss": 0.0968, "step": 138660 }, { "epoch": 2.8227989821882953, "grad_norm": 2.6139244754824937, "learning_rate": 4.7460101642015074e-06, "loss": 0.137, "step": 138670 }, { "epoch": 2.8230025445292624, "grad_norm": 11.103250057856638, "learning_rate": 4.745300515924175e-06, "loss": 0.1216, "step": 138680 }, { "epoch": 2.823206106870229, "grad_norm": 8.715525445667648, "learning_rate": 4.744590872790822e-06, "loss": 0.0667, "step": 138690 }, { "epoch": 2.823409669211196, "grad_norm": 0.28400812310729406, "learning_rate": 4.7438812348157875e-06, "loss": 0.0736, "step": 138700 }, { "epoch": 2.8236132315521627, "grad_norm": 21.766245755341124, "learning_rate": 4.7431716020134025e-06, "loss": 0.1639, "step": 138710 }, { "epoch": 2.82381679389313, "grad_norm": 0.08869614011440283, "learning_rate": 4.742461974397995e-06, "loss": 0.0832, "step": 138720 }, { "epoch": 2.8240203562340964, "grad_norm": 9.37497411893984, "learning_rate": 4.7417523519839005e-06, "loss": 0.1043, "step": 138730 }, { "epoch": 2.8242239185750635, "grad_norm": 0.21684520317460446, "learning_rate": 4.741042734785451e-06, "loss": 0.1258, "step": 138740 }, { "epoch": 2.8244274809160306, "grad_norm": 0.3403757566530532, "learning_rate": 4.740333122816976e-06, "loss": 0.0644, "step": 138750 }, { "epoch": 2.824631043256997, "grad_norm": 0.3057950914913106, "learning_rate": 4.739623516092809e-06, "loss": 0.2511, "step": 138760 }, { "epoch": 2.8248346055979643, "grad_norm": 0.07821637487129261, "learning_rate": 4.7389139146272795e-06, "loss": 0.0794, "step": 138770 }, { "epoch": 2.8250381679389314, "grad_norm": 1.6851268279616802, "learning_rate": 4.738204318434719e-06, "loss": 0.0818, "step": 138780 }, { "epoch": 2.825241730279898, "grad_norm": 10.388095907155785, "learning_rate": 4.737494727529462e-06, "loss": 0.082, "step": 138790 }, { "epoch": 2.825445292620865, "grad_norm": 7.960813060834004, "learning_rate": 4.736785141925835e-06, "loss": 0.1705, "step": 138800 }, { "epoch": 2.825648854961832, "grad_norm": 102.50498010939825, "learning_rate": 4.736075561638171e-06, "loss": 0.1228, "step": 138810 }, { "epoch": 2.825852417302799, "grad_norm": 4.860973234823717, "learning_rate": 4.735365986680802e-06, "loss": 0.1542, "step": 138820 }, { "epoch": 2.826055979643766, "grad_norm": 13.411467848692741, "learning_rate": 4.734656417068058e-06, "loss": 0.1322, "step": 138830 }, { "epoch": 2.826259541984733, "grad_norm": 10.238396243487863, "learning_rate": 4.7339468528142695e-06, "loss": 0.1064, "step": 138840 }, { "epoch": 2.8264631043256996, "grad_norm": 7.806791379801473, "learning_rate": 4.733237293933766e-06, "loss": 0.0858, "step": 138850 }, { "epoch": 2.8266666666666667, "grad_norm": 1.8700877042600057, "learning_rate": 4.732527740440878e-06, "loss": 0.1143, "step": 138860 }, { "epoch": 2.8268702290076337, "grad_norm": 23.34763960808746, "learning_rate": 4.73181819234994e-06, "loss": 0.1677, "step": 138870 }, { "epoch": 2.8270737913486004, "grad_norm": 2.5203144571131824, "learning_rate": 4.7311086496752765e-06, "loss": 0.1624, "step": 138880 }, { "epoch": 2.8272773536895675, "grad_norm": 0.11588572036399412, "learning_rate": 4.730399112431219e-06, "loss": 0.1836, "step": 138890 }, { "epoch": 2.8274809160305345, "grad_norm": 0.10788274103799732, "learning_rate": 4.729689580632102e-06, "loss": 0.1431, "step": 138900 }, { "epoch": 2.827684478371501, "grad_norm": 3.9946880923650254, "learning_rate": 4.728980054292251e-06, "loss": 0.0982, "step": 138910 }, { "epoch": 2.8278880407124682, "grad_norm": 35.32781844457955, "learning_rate": 4.7282705334259964e-06, "loss": 0.1229, "step": 138920 }, { "epoch": 2.8280916030534353, "grad_norm": 7.347563627556942, "learning_rate": 4.727561018047667e-06, "loss": 0.105, "step": 138930 }, { "epoch": 2.828295165394402, "grad_norm": 0.754342386628546, "learning_rate": 4.726851508171596e-06, "loss": 0.1042, "step": 138940 }, { "epoch": 2.828498727735369, "grad_norm": 14.147809141772514, "learning_rate": 4.72614200381211e-06, "loss": 0.1111, "step": 138950 }, { "epoch": 2.828702290076336, "grad_norm": 6.310165309642497, "learning_rate": 4.725432504983537e-06, "loss": 0.0754, "step": 138960 }, { "epoch": 2.8289058524173027, "grad_norm": 8.649283439348729, "learning_rate": 4.724723011700212e-06, "loss": 0.1447, "step": 138970 }, { "epoch": 2.82910941475827, "grad_norm": 7.580240939911914, "learning_rate": 4.724013523976457e-06, "loss": 0.0848, "step": 138980 }, { "epoch": 2.829312977099237, "grad_norm": 22.21520800230035, "learning_rate": 4.723304041826606e-06, "loss": 0.2141, "step": 138990 }, { "epoch": 2.8295165394402035, "grad_norm": 15.180411507325479, "learning_rate": 4.722594565264987e-06, "loss": 0.0463, "step": 139000 }, { "epoch": 2.8297201017811706, "grad_norm": 0.28050054559491566, "learning_rate": 4.7218850943059246e-06, "loss": 0.1069, "step": 139010 }, { "epoch": 2.8299236641221373, "grad_norm": 0.6135681433312314, "learning_rate": 4.721175628963753e-06, "loss": 0.0474, "step": 139020 }, { "epoch": 2.8301272264631043, "grad_norm": 1.3895054117019274, "learning_rate": 4.720466169252801e-06, "loss": 0.1167, "step": 139030 }, { "epoch": 2.8303307888040714, "grad_norm": 27.694029852495508, "learning_rate": 4.719756715187391e-06, "loss": 0.2112, "step": 139040 }, { "epoch": 2.830534351145038, "grad_norm": 1.0287342023761923, "learning_rate": 4.719047266781857e-06, "loss": 0.0968, "step": 139050 }, { "epoch": 2.830737913486005, "grad_norm": 0.09924002119894183, "learning_rate": 4.718337824050525e-06, "loss": 0.1507, "step": 139060 }, { "epoch": 2.8309414758269718, "grad_norm": 7.298619208369077, "learning_rate": 4.7176283870077225e-06, "loss": 0.109, "step": 139070 }, { "epoch": 2.831145038167939, "grad_norm": 3.6167910809183503, "learning_rate": 4.71691895566778e-06, "loss": 0.1448, "step": 139080 }, { "epoch": 2.831348600508906, "grad_norm": 10.466512074166157, "learning_rate": 4.7162095300450235e-06, "loss": 0.1207, "step": 139090 }, { "epoch": 2.8315521628498725, "grad_norm": 0.7772061760747432, "learning_rate": 4.715500110153779e-06, "loss": 0.1687, "step": 139100 }, { "epoch": 2.8317557251908396, "grad_norm": 0.13362047420089118, "learning_rate": 4.71479069600838e-06, "loss": 0.0827, "step": 139110 }, { "epoch": 2.8319592875318067, "grad_norm": 0.28581492446520906, "learning_rate": 4.714081287623148e-06, "loss": 0.0824, "step": 139120 }, { "epoch": 2.8321628498727733, "grad_norm": 0.9909927106081104, "learning_rate": 4.7133718850124124e-06, "loss": 0.1205, "step": 139130 }, { "epoch": 2.8323664122137404, "grad_norm": 4.043014977482368, "learning_rate": 4.712662488190501e-06, "loss": 0.1017, "step": 139140 }, { "epoch": 2.8325699745547075, "grad_norm": 0.05680099546591155, "learning_rate": 4.7119530971717405e-06, "loss": 0.0291, "step": 139150 }, { "epoch": 2.832773536895674, "grad_norm": 0.8462207056629679, "learning_rate": 4.711243711970459e-06, "loss": 0.1611, "step": 139160 }, { "epoch": 2.832977099236641, "grad_norm": 45.948871420328025, "learning_rate": 4.710534332600982e-06, "loss": 0.14, "step": 139170 }, { "epoch": 2.8331806615776083, "grad_norm": 0.21462227251202826, "learning_rate": 4.709824959077636e-06, "loss": 0.1067, "step": 139180 }, { "epoch": 2.833384223918575, "grad_norm": 13.97092267803573, "learning_rate": 4.709115591414751e-06, "loss": 0.1014, "step": 139190 }, { "epoch": 2.833587786259542, "grad_norm": 20.755781437799378, "learning_rate": 4.70840622962665e-06, "loss": 0.2374, "step": 139200 }, { "epoch": 2.833791348600509, "grad_norm": 10.528229836803986, "learning_rate": 4.707696873727662e-06, "loss": 0.136, "step": 139210 }, { "epoch": 2.8339949109414757, "grad_norm": 5.433234016453473, "learning_rate": 4.706987523732111e-06, "loss": 0.0992, "step": 139220 }, { "epoch": 2.834198473282443, "grad_norm": 8.302604449137899, "learning_rate": 4.7062781796543235e-06, "loss": 0.1147, "step": 139230 }, { "epoch": 2.83440203562341, "grad_norm": 0.49385266199799494, "learning_rate": 4.705568841508628e-06, "loss": 0.1783, "step": 139240 }, { "epoch": 2.8346055979643765, "grad_norm": 15.386366986827403, "learning_rate": 4.704859509309348e-06, "loss": 0.2885, "step": 139250 }, { "epoch": 2.8348091603053436, "grad_norm": 0.13657545378568536, "learning_rate": 4.704150183070813e-06, "loss": 0.0753, "step": 139260 }, { "epoch": 2.8350127226463107, "grad_norm": 9.32132524538806, "learning_rate": 4.703440862807342e-06, "loss": 0.1845, "step": 139270 }, { "epoch": 2.8352162849872773, "grad_norm": 0.30859918882749604, "learning_rate": 4.702731548533266e-06, "loss": 0.1058, "step": 139280 }, { "epoch": 2.8354198473282444, "grad_norm": 2.2360653244466095, "learning_rate": 4.702022240262911e-06, "loss": 0.0488, "step": 139290 }, { "epoch": 2.8356234096692114, "grad_norm": 0.21287719823358875, "learning_rate": 4.701312938010598e-06, "loss": 0.0851, "step": 139300 }, { "epoch": 2.835826972010178, "grad_norm": 0.014675716613967838, "learning_rate": 4.700603641790657e-06, "loss": 0.0319, "step": 139310 }, { "epoch": 2.836030534351145, "grad_norm": 12.100306144834262, "learning_rate": 4.699894351617412e-06, "loss": 0.1745, "step": 139320 }, { "epoch": 2.836234096692112, "grad_norm": 0.798352684856559, "learning_rate": 4.699185067505184e-06, "loss": 0.0919, "step": 139330 }, { "epoch": 2.836437659033079, "grad_norm": 24.124904825804677, "learning_rate": 4.6984757894683035e-06, "loss": 0.1267, "step": 139340 }, { "epoch": 2.836641221374046, "grad_norm": 13.092044576821662, "learning_rate": 4.697766517521092e-06, "loss": 0.0616, "step": 139350 }, { "epoch": 2.8368447837150126, "grad_norm": 21.02140650909505, "learning_rate": 4.697057251677873e-06, "loss": 0.2065, "step": 139360 }, { "epoch": 2.8370483460559797, "grad_norm": 19.486738507667155, "learning_rate": 4.696347991952977e-06, "loss": 0.1039, "step": 139370 }, { "epoch": 2.8372519083969463, "grad_norm": 0.3611813428351422, "learning_rate": 4.695638738360721e-06, "loss": 0.1363, "step": 139380 }, { "epoch": 2.8374554707379134, "grad_norm": 0.15331568122476444, "learning_rate": 4.694929490915433e-06, "loss": 0.1084, "step": 139390 }, { "epoch": 2.8376590330788805, "grad_norm": 0.9611728517435252, "learning_rate": 4.69422024963144e-06, "loss": 0.0193, "step": 139400 }, { "epoch": 2.837862595419847, "grad_norm": 0.09640025415330371, "learning_rate": 4.693511014523059e-06, "loss": 0.0505, "step": 139410 }, { "epoch": 2.838066157760814, "grad_norm": 0.2670156007012684, "learning_rate": 4.692801785604621e-06, "loss": 0.0433, "step": 139420 }, { "epoch": 2.8382697201017812, "grad_norm": 0.004165215593096283, "learning_rate": 4.6920925628904445e-06, "loss": 0.1428, "step": 139430 }, { "epoch": 2.838473282442748, "grad_norm": 42.4439302381864, "learning_rate": 4.691383346394855e-06, "loss": 0.1852, "step": 139440 }, { "epoch": 2.838676844783715, "grad_norm": 7.659643894012051, "learning_rate": 4.690674136132179e-06, "loss": 0.2079, "step": 139450 }, { "epoch": 2.838880407124682, "grad_norm": 18.765889059423042, "learning_rate": 4.689964932116735e-06, "loss": 0.1753, "step": 139460 }, { "epoch": 2.8390839694656487, "grad_norm": 28.416099909339838, "learning_rate": 4.689255734362847e-06, "loss": 0.2712, "step": 139470 }, { "epoch": 2.8392875318066157, "grad_norm": 0.14117201253510386, "learning_rate": 4.688546542884843e-06, "loss": 0.1877, "step": 139480 }, { "epoch": 2.839491094147583, "grad_norm": 0.018106482396587215, "learning_rate": 4.687837357697041e-06, "loss": 0.1861, "step": 139490 }, { "epoch": 2.8396946564885495, "grad_norm": 14.870230570863958, "learning_rate": 4.6871281788137665e-06, "loss": 0.1018, "step": 139500 }, { "epoch": 2.8398982188295165, "grad_norm": 9.030072522221102, "learning_rate": 4.6864190062493405e-06, "loss": 0.1268, "step": 139510 }, { "epoch": 2.8401017811704836, "grad_norm": 7.904535037519143, "learning_rate": 4.685709840018087e-06, "loss": 0.1368, "step": 139520 }, { "epoch": 2.8403053435114503, "grad_norm": 4.762583555335492, "learning_rate": 4.685000680134328e-06, "loss": 0.0831, "step": 139530 }, { "epoch": 2.8405089058524173, "grad_norm": 4.532963979718563, "learning_rate": 4.684291526612386e-06, "loss": 0.2458, "step": 139540 }, { "epoch": 2.8407124681933844, "grad_norm": 5.179249006591844, "learning_rate": 4.683582379466582e-06, "loss": 0.0813, "step": 139550 }, { "epoch": 2.840916030534351, "grad_norm": 0.10298506721704312, "learning_rate": 4.682873238711242e-06, "loss": 0.1043, "step": 139560 }, { "epoch": 2.841119592875318, "grad_norm": 0.07454060887812823, "learning_rate": 4.6821641043606845e-06, "loss": 0.0768, "step": 139570 }, { "epoch": 2.841323155216285, "grad_norm": 1.067520764616977, "learning_rate": 4.681454976429232e-06, "loss": 0.1254, "step": 139580 }, { "epoch": 2.841526717557252, "grad_norm": 0.11943644870300493, "learning_rate": 4.680745854931206e-06, "loss": 0.1227, "step": 139590 }, { "epoch": 2.841730279898219, "grad_norm": 1.9563110832020896, "learning_rate": 4.680036739880928e-06, "loss": 0.1107, "step": 139600 }, { "epoch": 2.841933842239186, "grad_norm": 0.44240073480520153, "learning_rate": 4.679327631292723e-06, "loss": 0.1577, "step": 139610 }, { "epoch": 2.8421374045801526, "grad_norm": 14.585530887330226, "learning_rate": 4.678618529180906e-06, "loss": 0.105, "step": 139620 }, { "epoch": 2.8423409669211197, "grad_norm": 0.848098184624184, "learning_rate": 4.6779094335598044e-06, "loss": 0.12, "step": 139630 }, { "epoch": 2.842544529262087, "grad_norm": 1.2704661387588605, "learning_rate": 4.6772003444437354e-06, "loss": 0.1375, "step": 139640 }, { "epoch": 2.8427480916030534, "grad_norm": 16.65913061443165, "learning_rate": 4.676491261847021e-06, "loss": 0.0762, "step": 139650 }, { "epoch": 2.8429516539440205, "grad_norm": 0.595081577678121, "learning_rate": 4.675782185783984e-06, "loss": 0.1648, "step": 139660 }, { "epoch": 2.843155216284987, "grad_norm": 0.7389222044070634, "learning_rate": 4.675073116268941e-06, "loss": 0.0889, "step": 139670 }, { "epoch": 2.843358778625954, "grad_norm": 6.721201736610148, "learning_rate": 4.674364053316216e-06, "loss": 0.1527, "step": 139680 }, { "epoch": 2.843562340966921, "grad_norm": 0.08785905608680226, "learning_rate": 4.673654996940129e-06, "loss": 0.0718, "step": 139690 }, { "epoch": 2.843765903307888, "grad_norm": 42.70837863042598, "learning_rate": 4.672945947154997e-06, "loss": 0.1434, "step": 139700 }, { "epoch": 2.843969465648855, "grad_norm": 0.41028578701671414, "learning_rate": 4.672236903975146e-06, "loss": 0.1098, "step": 139710 }, { "epoch": 2.8441730279898216, "grad_norm": 5.986092323895702, "learning_rate": 4.6715278674148925e-06, "loss": 0.0736, "step": 139720 }, { "epoch": 2.8443765903307887, "grad_norm": 0.04311090786632642, "learning_rate": 4.670818837488554e-06, "loss": 0.0958, "step": 139730 }, { "epoch": 2.844580152671756, "grad_norm": 0.6123633803317531, "learning_rate": 4.670109814210456e-06, "loss": 0.1314, "step": 139740 }, { "epoch": 2.8447837150127224, "grad_norm": 21.360645644625947, "learning_rate": 4.669400797594914e-06, "loss": 0.1248, "step": 139750 }, { "epoch": 2.8449872773536895, "grad_norm": 0.18242785311880205, "learning_rate": 4.668691787656248e-06, "loss": 0.0406, "step": 139760 }, { "epoch": 2.8451908396946566, "grad_norm": 0.018684002046789414, "learning_rate": 4.667982784408781e-06, "loss": 0.0529, "step": 139770 }, { "epoch": 2.845394402035623, "grad_norm": 21.677270669668644, "learning_rate": 4.6672737878668265e-06, "loss": 0.1091, "step": 139780 }, { "epoch": 2.8455979643765903, "grad_norm": 11.749186859700744, "learning_rate": 4.666564798044708e-06, "loss": 0.139, "step": 139790 }, { "epoch": 2.8458015267175574, "grad_norm": 2.2075947906225717, "learning_rate": 4.665855814956741e-06, "loss": 0.1786, "step": 139800 }, { "epoch": 2.846005089058524, "grad_norm": 4.663482608159893, "learning_rate": 4.665146838617247e-06, "loss": 0.096, "step": 139810 }, { "epoch": 2.846208651399491, "grad_norm": 0.2620960825447292, "learning_rate": 4.664437869040546e-06, "loss": 0.1046, "step": 139820 }, { "epoch": 2.846412213740458, "grad_norm": 22.477183941194973, "learning_rate": 4.663728906240952e-06, "loss": 0.1251, "step": 139830 }, { "epoch": 2.846615776081425, "grad_norm": 3.81392228305405, "learning_rate": 4.663019950232785e-06, "loss": 0.0676, "step": 139840 }, { "epoch": 2.846819338422392, "grad_norm": 0.01914160180504307, "learning_rate": 4.662311001030367e-06, "loss": 0.0991, "step": 139850 }, { "epoch": 2.847022900763359, "grad_norm": 0.14658771648413343, "learning_rate": 4.6616020586480116e-06, "loss": 0.0867, "step": 139860 }, { "epoch": 2.8472264631043256, "grad_norm": 7.61506215931992, "learning_rate": 4.66089312310004e-06, "loss": 0.0803, "step": 139870 }, { "epoch": 2.8474300254452927, "grad_norm": 9.96261516862774, "learning_rate": 4.660184194400766e-06, "loss": 0.1841, "step": 139880 }, { "epoch": 2.8476335877862597, "grad_norm": 20.129194046087203, "learning_rate": 4.659475272564511e-06, "loss": 0.0926, "step": 139890 }, { "epoch": 2.8478371501272264, "grad_norm": 0.15751476185331958, "learning_rate": 4.658766357605592e-06, "loss": 0.1662, "step": 139900 }, { "epoch": 2.8480407124681935, "grad_norm": 8.483278696293185, "learning_rate": 4.658057449538325e-06, "loss": 0.1625, "step": 139910 }, { "epoch": 2.8482442748091605, "grad_norm": 8.577140504633023, "learning_rate": 4.657348548377028e-06, "loss": 0.0446, "step": 139920 }, { "epoch": 2.848447837150127, "grad_norm": 15.045003011751968, "learning_rate": 4.6566396541360204e-06, "loss": 0.0352, "step": 139930 }, { "epoch": 2.8486513994910942, "grad_norm": 23.541908647037378, "learning_rate": 4.655930766829615e-06, "loss": 0.1821, "step": 139940 }, { "epoch": 2.8488549618320613, "grad_norm": 0.024135118065887706, "learning_rate": 4.6552218864721335e-06, "loss": 0.1438, "step": 139950 }, { "epoch": 2.849058524173028, "grad_norm": 12.152190420544787, "learning_rate": 4.654513013077888e-06, "loss": 0.2221, "step": 139960 }, { "epoch": 2.849262086513995, "grad_norm": 2.27767112616067, "learning_rate": 4.653804146661197e-06, "loss": 0.0628, "step": 139970 }, { "epoch": 2.8494656488549617, "grad_norm": 0.6200128626059881, "learning_rate": 4.653095287236379e-06, "loss": 0.1577, "step": 139980 }, { "epoch": 2.8496692111959288, "grad_norm": 13.48827468594515, "learning_rate": 4.652386434817746e-06, "loss": 0.1572, "step": 139990 }, { "epoch": 2.849872773536896, "grad_norm": 9.53513770962073, "learning_rate": 4.651677589419619e-06, "loss": 0.1142, "step": 140000 }, { "epoch": 2.8500763358778625, "grad_norm": 0.05920684901980482, "learning_rate": 4.650968751056311e-06, "loss": 0.0471, "step": 140010 }, { "epoch": 2.8502798982188295, "grad_norm": 8.69231621316184, "learning_rate": 4.6502599197421365e-06, "loss": 0.0866, "step": 140020 }, { "epoch": 2.850483460559796, "grad_norm": 29.371658280039327, "learning_rate": 4.649551095491416e-06, "loss": 0.1449, "step": 140030 }, { "epoch": 2.8506870229007633, "grad_norm": 0.19443043840990967, "learning_rate": 4.648842278318462e-06, "loss": 0.1723, "step": 140040 }, { "epoch": 2.8508905852417303, "grad_norm": 1.169341040400612, "learning_rate": 4.648133468237589e-06, "loss": 0.1144, "step": 140050 }, { "epoch": 2.851094147582697, "grad_norm": 0.7230656130988409, "learning_rate": 4.647424665263116e-06, "loss": 0.2137, "step": 140060 }, { "epoch": 2.851297709923664, "grad_norm": 0.12506941542000907, "learning_rate": 4.646715869409355e-06, "loss": 0.1652, "step": 140070 }, { "epoch": 2.851501272264631, "grad_norm": 19.59687656670527, "learning_rate": 4.646007080690623e-06, "loss": 0.1687, "step": 140080 }, { "epoch": 2.8517048346055978, "grad_norm": 14.643726106691483, "learning_rate": 4.6452982991212325e-06, "loss": 0.0614, "step": 140090 }, { "epoch": 2.851908396946565, "grad_norm": 0.013599649334943339, "learning_rate": 4.644589524715499e-06, "loss": 0.113, "step": 140100 }, { "epoch": 2.852111959287532, "grad_norm": 13.467692054223127, "learning_rate": 4.6438807574877395e-06, "loss": 0.2861, "step": 140110 }, { "epoch": 2.8523155216284986, "grad_norm": 7.461478859064569, "learning_rate": 4.643171997452266e-06, "loss": 0.0927, "step": 140120 }, { "epoch": 2.8525190839694656, "grad_norm": 17.560607275058512, "learning_rate": 4.642463244623393e-06, "loss": 0.1097, "step": 140130 }, { "epoch": 2.8527226463104327, "grad_norm": 8.520396538592763, "learning_rate": 4.641754499015438e-06, "loss": 0.0569, "step": 140140 }, { "epoch": 2.8529262086513993, "grad_norm": 0.45043146723114286, "learning_rate": 4.641045760642709e-06, "loss": 0.1175, "step": 140150 }, { "epoch": 2.8531297709923664, "grad_norm": 0.24808495031947986, "learning_rate": 4.640337029519527e-06, "loss": 0.1778, "step": 140160 }, { "epoch": 2.8533333333333335, "grad_norm": 10.950587671257708, "learning_rate": 4.639628305660198e-06, "loss": 0.1152, "step": 140170 }, { "epoch": 2.8535368956743, "grad_norm": 11.765998877643803, "learning_rate": 4.6389195890790404e-06, "loss": 0.0759, "step": 140180 }, { "epoch": 2.853740458015267, "grad_norm": 13.24646015861174, "learning_rate": 4.638210879790369e-06, "loss": 0.0995, "step": 140190 }, { "epoch": 2.8539440203562343, "grad_norm": 0.14837068837550973, "learning_rate": 4.637502177808492e-06, "loss": 0.1057, "step": 140200 }, { "epoch": 2.854147582697201, "grad_norm": 1.4532111382347652, "learning_rate": 4.636793483147727e-06, "loss": 0.0622, "step": 140210 }, { "epoch": 2.854351145038168, "grad_norm": 19.952177359014545, "learning_rate": 4.6360847958223855e-06, "loss": 0.091, "step": 140220 }, { "epoch": 2.854554707379135, "grad_norm": 8.570108761586505, "learning_rate": 4.63537611584678e-06, "loss": 0.1288, "step": 140230 }, { "epoch": 2.8547582697201017, "grad_norm": 0.15642695549585775, "learning_rate": 4.634667443235225e-06, "loss": 0.0595, "step": 140240 }, { "epoch": 2.854961832061069, "grad_norm": 15.168854418554641, "learning_rate": 4.633958778002028e-06, "loss": 0.1539, "step": 140250 }, { "epoch": 2.855165394402036, "grad_norm": 21.035295553966066, "learning_rate": 4.633250120161507e-06, "loss": 0.1064, "step": 140260 }, { "epoch": 2.8553689567430025, "grad_norm": 3.1838663503505242, "learning_rate": 4.632541469727973e-06, "loss": 0.1612, "step": 140270 }, { "epoch": 2.8555725190839696, "grad_norm": 9.500916619210983, "learning_rate": 4.631832826715735e-06, "loss": 0.0903, "step": 140280 }, { "epoch": 2.8557760814249367, "grad_norm": 14.553095789400581, "learning_rate": 4.631124191139109e-06, "loss": 0.2111, "step": 140290 }, { "epoch": 2.8559796437659033, "grad_norm": 9.712142469213486, "learning_rate": 4.630415563012405e-06, "loss": 0.1101, "step": 140300 }, { "epoch": 2.8561832061068704, "grad_norm": 4.248462534245527, "learning_rate": 4.629706942349934e-06, "loss": 0.0284, "step": 140310 }, { "epoch": 2.856386768447837, "grad_norm": 9.600694590589928, "learning_rate": 4.628998329166009e-06, "loss": 0.0886, "step": 140320 }, { "epoch": 2.856590330788804, "grad_norm": 0.07006973530980029, "learning_rate": 4.62828972347494e-06, "loss": 0.0705, "step": 140330 }, { "epoch": 2.8567938931297707, "grad_norm": 16.938201760248663, "learning_rate": 4.627581125291038e-06, "loss": 0.137, "step": 140340 }, { "epoch": 2.856997455470738, "grad_norm": 4.89530978984957, "learning_rate": 4.626872534628617e-06, "loss": 0.0693, "step": 140350 }, { "epoch": 2.857201017811705, "grad_norm": 3.679622561372333, "learning_rate": 4.626163951501985e-06, "loss": 0.1874, "step": 140360 }, { "epoch": 2.8574045801526715, "grad_norm": 10.17370832690032, "learning_rate": 4.625455375925453e-06, "loss": 0.188, "step": 140370 }, { "epoch": 2.8576081424936386, "grad_norm": 16.16315630770481, "learning_rate": 4.624746807913333e-06, "loss": 0.1821, "step": 140380 }, { "epoch": 2.8578117048346057, "grad_norm": 10.495959315356338, "learning_rate": 4.624038247479932e-06, "loss": 0.0806, "step": 140390 }, { "epoch": 2.8580152671755723, "grad_norm": 11.185022419042689, "learning_rate": 4.6233296946395656e-06, "loss": 0.1733, "step": 140400 }, { "epoch": 2.8582188295165394, "grad_norm": 5.772928359673621, "learning_rate": 4.62262114940654e-06, "loss": 0.0373, "step": 140410 }, { "epoch": 2.8584223918575065, "grad_norm": 8.53193763085589, "learning_rate": 4.621912611795165e-06, "loss": 0.1047, "step": 140420 }, { "epoch": 2.858625954198473, "grad_norm": 0.21086390126584892, "learning_rate": 4.621204081819755e-06, "loss": 0.1903, "step": 140430 }, { "epoch": 2.85882951653944, "grad_norm": 28.701149280406682, "learning_rate": 4.6204955594946145e-06, "loss": 0.1115, "step": 140440 }, { "epoch": 2.8590330788804073, "grad_norm": 13.637499077514283, "learning_rate": 4.619787044834056e-06, "loss": 0.1366, "step": 140450 }, { "epoch": 2.859236641221374, "grad_norm": 6.791434648358703, "learning_rate": 4.619078537852386e-06, "loss": 0.1044, "step": 140460 }, { "epoch": 2.859440203562341, "grad_norm": 6.9182826718583135, "learning_rate": 4.618370038563916e-06, "loss": 0.155, "step": 140470 }, { "epoch": 2.859643765903308, "grad_norm": 0.08045912719895336, "learning_rate": 4.617661546982956e-06, "loss": 0.1061, "step": 140480 }, { "epoch": 2.8598473282442747, "grad_norm": 0.22949347882516247, "learning_rate": 4.616953063123812e-06, "loss": 0.0708, "step": 140490 }, { "epoch": 2.8600508905852418, "grad_norm": 34.99548983433234, "learning_rate": 4.6162445870007945e-06, "loss": 0.1563, "step": 140500 }, { "epoch": 2.860254452926209, "grad_norm": 5.093920486092764, "learning_rate": 4.615536118628214e-06, "loss": 0.1734, "step": 140510 }, { "epoch": 2.8604580152671755, "grad_norm": 0.25488940587555153, "learning_rate": 4.614827658020374e-06, "loss": 0.1136, "step": 140520 }, { "epoch": 2.8606615776081425, "grad_norm": 24.916642241977232, "learning_rate": 4.614119205191588e-06, "loss": 0.1787, "step": 140530 }, { "epoch": 2.8608651399491096, "grad_norm": 10.748486461606515, "learning_rate": 4.613410760156159e-06, "loss": 0.1002, "step": 140540 }, { "epoch": 2.8610687022900763, "grad_norm": 17.899331933378352, "learning_rate": 4.612702322928399e-06, "loss": 0.2079, "step": 140550 }, { "epoch": 2.8612722646310433, "grad_norm": 3.6560072120599822, "learning_rate": 4.611993893522616e-06, "loss": 0.0995, "step": 140560 }, { "epoch": 2.8614758269720104, "grad_norm": 10.383741505556044, "learning_rate": 4.6112854719531125e-06, "loss": 0.1586, "step": 140570 }, { "epoch": 2.861679389312977, "grad_norm": 1.262321630466803, "learning_rate": 4.610577058234201e-06, "loss": 0.1234, "step": 140580 }, { "epoch": 2.861882951653944, "grad_norm": 15.256216944081013, "learning_rate": 4.609868652380189e-06, "loss": 0.0747, "step": 140590 }, { "epoch": 2.862086513994911, "grad_norm": 22.051271988835026, "learning_rate": 4.60916025440538e-06, "loss": 0.146, "step": 140600 }, { "epoch": 2.862290076335878, "grad_norm": 8.270300147789976, "learning_rate": 4.608451864324084e-06, "loss": 0.1378, "step": 140610 }, { "epoch": 2.862493638676845, "grad_norm": 10.35580899122833, "learning_rate": 4.6077434821506075e-06, "loss": 0.078, "step": 140620 }, { "epoch": 2.8626972010178116, "grad_norm": 0.510156647329704, "learning_rate": 4.607035107899255e-06, "loss": 0.1691, "step": 140630 }, { "epoch": 2.8629007633587786, "grad_norm": 0.13842584110911185, "learning_rate": 4.606326741584337e-06, "loss": 0.0598, "step": 140640 }, { "epoch": 2.8631043256997457, "grad_norm": 1.8177057584049279, "learning_rate": 4.605618383220155e-06, "loss": 0.1041, "step": 140650 }, { "epoch": 2.8633078880407123, "grad_norm": 12.304778773852814, "learning_rate": 4.60491003282102e-06, "loss": 0.1737, "step": 140660 }, { "epoch": 2.8635114503816794, "grad_norm": 12.66453268005759, "learning_rate": 4.604201690401234e-06, "loss": 0.1884, "step": 140670 }, { "epoch": 2.863715012722646, "grad_norm": 28.23801491522868, "learning_rate": 4.603493355975104e-06, "loss": 0.1109, "step": 140680 }, { "epoch": 2.863918575063613, "grad_norm": 14.030300886429503, "learning_rate": 4.6027850295569386e-06, "loss": 0.1619, "step": 140690 }, { "epoch": 2.86412213740458, "grad_norm": 0.02496172061561142, "learning_rate": 4.602076711161041e-06, "loss": 0.2348, "step": 140700 }, { "epoch": 2.864325699745547, "grad_norm": 0.36790539690635854, "learning_rate": 4.601368400801714e-06, "loss": 0.1049, "step": 140710 }, { "epoch": 2.864529262086514, "grad_norm": 7.906875662566332, "learning_rate": 4.6006600984932685e-06, "loss": 0.2013, "step": 140720 }, { "epoch": 2.864732824427481, "grad_norm": 16.952114194644796, "learning_rate": 4.599951804250005e-06, "loss": 0.1173, "step": 140730 }, { "epoch": 2.8649363867684476, "grad_norm": 6.799372841465751, "learning_rate": 4.599243518086232e-06, "loss": 0.1808, "step": 140740 }, { "epoch": 2.8651399491094147, "grad_norm": 0.06647826417677971, "learning_rate": 4.5985352400162495e-06, "loss": 0.1339, "step": 140750 }, { "epoch": 2.865343511450382, "grad_norm": 9.208370003174341, "learning_rate": 4.5978269700543664e-06, "loss": 0.1234, "step": 140760 }, { "epoch": 2.8655470737913484, "grad_norm": 0.08983277640338805, "learning_rate": 4.5971187082148864e-06, "loss": 0.0629, "step": 140770 }, { "epoch": 2.8657506361323155, "grad_norm": 16.676840728933588, "learning_rate": 4.5964104545121125e-06, "loss": 0.1389, "step": 140780 }, { "epoch": 2.8659541984732826, "grad_norm": 21.19544053988712, "learning_rate": 4.595702208960348e-06, "loss": 0.1921, "step": 140790 }, { "epoch": 2.8661577608142492, "grad_norm": 8.385152088947985, "learning_rate": 4.5949939715739e-06, "loss": 0.1369, "step": 140800 }, { "epoch": 2.8663613231552163, "grad_norm": 0.03409246004933137, "learning_rate": 4.594285742367069e-06, "loss": 0.1025, "step": 140810 }, { "epoch": 2.8665648854961834, "grad_norm": 1.3709260046109109, "learning_rate": 4.593577521354162e-06, "loss": 0.1738, "step": 140820 }, { "epoch": 2.86676844783715, "grad_norm": 0.22494057281289684, "learning_rate": 4.592869308549478e-06, "loss": 0.0813, "step": 140830 }, { "epoch": 2.866972010178117, "grad_norm": 0.48048504132918624, "learning_rate": 4.592161103967323e-06, "loss": 0.0932, "step": 140840 }, { "epoch": 2.867175572519084, "grad_norm": 3.9438069343851017, "learning_rate": 4.591452907622001e-06, "loss": 0.2006, "step": 140850 }, { "epoch": 2.867379134860051, "grad_norm": 29.42985052352127, "learning_rate": 4.590744719527812e-06, "loss": 0.1797, "step": 140860 }, { "epoch": 2.867582697201018, "grad_norm": 0.0666754329821751, "learning_rate": 4.590036539699061e-06, "loss": 0.1077, "step": 140870 }, { "epoch": 2.867786259541985, "grad_norm": 21.235068008783138, "learning_rate": 4.5893283681500526e-06, "loss": 0.1388, "step": 140880 }, { "epoch": 2.8679898218829516, "grad_norm": 0.1484584060076161, "learning_rate": 4.588620204895083e-06, "loss": 0.077, "step": 140890 }, { "epoch": 2.8681933842239187, "grad_norm": 16.04965284589446, "learning_rate": 4.587912049948462e-06, "loss": 0.0736, "step": 140900 }, { "epoch": 2.8683969465648858, "grad_norm": 7.759450570372593, "learning_rate": 4.587203903324485e-06, "loss": 0.1052, "step": 140910 }, { "epoch": 2.8686005089058524, "grad_norm": 0.1329225482981113, "learning_rate": 4.586495765037457e-06, "loss": 0.141, "step": 140920 }, { "epoch": 2.8688040712468195, "grad_norm": 0.8078351114893126, "learning_rate": 4.585787635101682e-06, "loss": 0.0235, "step": 140930 }, { "epoch": 2.869007633587786, "grad_norm": 6.963852187338426, "learning_rate": 4.585079513531457e-06, "loss": 0.0361, "step": 140940 }, { "epoch": 2.869211195928753, "grad_norm": 5.514398536577579, "learning_rate": 4.584371400341087e-06, "loss": 0.1352, "step": 140950 }, { "epoch": 2.8694147582697203, "grad_norm": 0.8408300694706295, "learning_rate": 4.583663295544872e-06, "loss": 0.1481, "step": 140960 }, { "epoch": 2.869618320610687, "grad_norm": 1.8090172771383466, "learning_rate": 4.582955199157111e-06, "loss": 0.0923, "step": 140970 }, { "epoch": 2.869821882951654, "grad_norm": 8.214547160572145, "learning_rate": 4.582247111192109e-06, "loss": 0.0858, "step": 140980 }, { "epoch": 2.8700254452926206, "grad_norm": 0.8542778053489748, "learning_rate": 4.5815390316641635e-06, "loss": 0.1233, "step": 140990 }, { "epoch": 2.8702290076335877, "grad_norm": 14.32887177533685, "learning_rate": 4.580830960587576e-06, "loss": 0.1462, "step": 141000 }, { "epoch": 2.8704325699745548, "grad_norm": 7.634856595736603, "learning_rate": 4.580122897976648e-06, "loss": 0.1251, "step": 141010 }, { "epoch": 2.8706361323155214, "grad_norm": 31.680653505837586, "learning_rate": 4.579414843845679e-06, "loss": 0.1338, "step": 141020 }, { "epoch": 2.8708396946564885, "grad_norm": 0.35180956973681604, "learning_rate": 4.578706798208969e-06, "loss": 0.1687, "step": 141030 }, { "epoch": 2.8710432569974556, "grad_norm": 7.177620313795065, "learning_rate": 4.577998761080816e-06, "loss": 0.1218, "step": 141040 }, { "epoch": 2.871246819338422, "grad_norm": 27.093347887135078, "learning_rate": 4.577290732475523e-06, "loss": 0.1652, "step": 141050 }, { "epoch": 2.8714503816793893, "grad_norm": 0.10869331777103265, "learning_rate": 4.5765827124073885e-06, "loss": 0.1311, "step": 141060 }, { "epoch": 2.8716539440203563, "grad_norm": 0.01586627239768008, "learning_rate": 4.575874700890711e-06, "loss": 0.0698, "step": 141070 }, { "epoch": 2.871857506361323, "grad_norm": 0.36704024745055835, "learning_rate": 4.5751666979397895e-06, "loss": 0.0756, "step": 141080 }, { "epoch": 2.87206106870229, "grad_norm": 1.8534163946047233, "learning_rate": 4.574458703568925e-06, "loss": 0.1126, "step": 141090 }, { "epoch": 2.872264631043257, "grad_norm": 1.3989133051521783, "learning_rate": 4.573750717792414e-06, "loss": 0.0743, "step": 141100 }, { "epoch": 2.8724681933842238, "grad_norm": 1.54889559301556, "learning_rate": 4.573042740624558e-06, "loss": 0.3135, "step": 141110 }, { "epoch": 2.872671755725191, "grad_norm": 22.974284021075462, "learning_rate": 4.572334772079651e-06, "loss": 0.1093, "step": 141120 }, { "epoch": 2.872875318066158, "grad_norm": 21.375799239850593, "learning_rate": 4.571626812171995e-06, "loss": 0.104, "step": 141130 }, { "epoch": 2.8730788804071246, "grad_norm": 12.820214670152076, "learning_rate": 4.570918860915889e-06, "loss": 0.0745, "step": 141140 }, { "epoch": 2.8732824427480916, "grad_norm": 2.8057883714491263, "learning_rate": 4.570210918325627e-06, "loss": 0.1449, "step": 141150 }, { "epoch": 2.8734860050890587, "grad_norm": 0.9964341411505226, "learning_rate": 4.56950298441551e-06, "loss": 0.1412, "step": 141160 }, { "epoch": 2.8736895674300253, "grad_norm": 0.15071379176921482, "learning_rate": 4.5687950591998356e-06, "loss": 0.0312, "step": 141170 }, { "epoch": 2.8738931297709924, "grad_norm": 17.987106224387016, "learning_rate": 4.568087142692899e-06, "loss": 0.2188, "step": 141180 }, { "epoch": 2.8740966921119595, "grad_norm": 11.225715836269613, "learning_rate": 4.567379234909001e-06, "loss": 0.1114, "step": 141190 }, { "epoch": 2.874300254452926, "grad_norm": 0.8924073314986538, "learning_rate": 4.566671335862434e-06, "loss": 0.1436, "step": 141200 }, { "epoch": 2.874503816793893, "grad_norm": 89.25058135675644, "learning_rate": 4.565963445567499e-06, "loss": 0.087, "step": 141210 }, { "epoch": 2.8747073791348603, "grad_norm": 21.745906757083926, "learning_rate": 4.565255564038491e-06, "loss": 0.1743, "step": 141220 }, { "epoch": 2.874910941475827, "grad_norm": 0.2867218496727967, "learning_rate": 4.564547691289706e-06, "loss": 0.066, "step": 141230 }, { "epoch": 2.875114503816794, "grad_norm": 7.717760785697584, "learning_rate": 4.563839827335441e-06, "loss": 0.1846, "step": 141240 }, { "epoch": 2.875318066157761, "grad_norm": 0.3125519832046433, "learning_rate": 4.5631319721899945e-06, "loss": 0.1405, "step": 141250 }, { "epoch": 2.8755216284987277, "grad_norm": 0.20058431563220935, "learning_rate": 4.562424125867658e-06, "loss": 0.0301, "step": 141260 }, { "epoch": 2.875725190839695, "grad_norm": 0.28718414050313923, "learning_rate": 4.561716288382732e-06, "loss": 0.054, "step": 141270 }, { "epoch": 2.8759287531806614, "grad_norm": 8.952238292597494, "learning_rate": 4.561008459749509e-06, "loss": 0.1619, "step": 141280 }, { "epoch": 2.8761323155216285, "grad_norm": 11.584173254443774, "learning_rate": 4.560300639982285e-06, "loss": 0.1395, "step": 141290 }, { "epoch": 2.876335877862595, "grad_norm": 19.726755063427305, "learning_rate": 4.559592829095358e-06, "loss": 0.0721, "step": 141300 }, { "epoch": 2.8765394402035622, "grad_norm": 45.65001876830528, "learning_rate": 4.558885027103019e-06, "loss": 0.145, "step": 141310 }, { "epoch": 2.8767430025445293, "grad_norm": 0.6988271643800423, "learning_rate": 4.558177234019565e-06, "loss": 0.0475, "step": 141320 }, { "epoch": 2.876946564885496, "grad_norm": 1.7797002209415702, "learning_rate": 4.557469449859293e-06, "loss": 0.0801, "step": 141330 }, { "epoch": 2.877150127226463, "grad_norm": 0.24986654110413833, "learning_rate": 4.556761674636494e-06, "loss": 0.1754, "step": 141340 }, { "epoch": 2.87735368956743, "grad_norm": 26.47678083596739, "learning_rate": 4.556053908365465e-06, "loss": 0.0797, "step": 141350 }, { "epoch": 2.8775572519083967, "grad_norm": 40.5175095362526, "learning_rate": 4.555346151060497e-06, "loss": 0.1573, "step": 141360 }, { "epoch": 2.877760814249364, "grad_norm": 37.04208443530066, "learning_rate": 4.554638402735887e-06, "loss": 0.0612, "step": 141370 }, { "epoch": 2.877964376590331, "grad_norm": 6.827238062474196, "learning_rate": 4.553930663405929e-06, "loss": 0.0886, "step": 141380 }, { "epoch": 2.8781679389312975, "grad_norm": 0.18335065056210564, "learning_rate": 4.553222933084915e-06, "loss": 0.0916, "step": 141390 }, { "epoch": 2.8783715012722646, "grad_norm": 0.42167868357850685, "learning_rate": 4.55251521178714e-06, "loss": 0.1744, "step": 141400 }, { "epoch": 2.8785750636132317, "grad_norm": 0.012599714352253204, "learning_rate": 4.5518074995268955e-06, "loss": 0.0677, "step": 141410 }, { "epoch": 2.8787786259541983, "grad_norm": 18.46171421087533, "learning_rate": 4.551099796318476e-06, "loss": 0.1307, "step": 141420 }, { "epoch": 2.8789821882951654, "grad_norm": 0.9564953857593913, "learning_rate": 4.550392102176175e-06, "loss": 0.1091, "step": 141430 }, { "epoch": 2.8791857506361325, "grad_norm": 6.027840861727622, "learning_rate": 4.549684417114284e-06, "loss": 0.0679, "step": 141440 }, { "epoch": 2.879389312977099, "grad_norm": 0.33792363308819473, "learning_rate": 4.548976741147096e-06, "loss": 0.1511, "step": 141450 }, { "epoch": 2.879592875318066, "grad_norm": 21.88065217893867, "learning_rate": 4.5482690742889056e-06, "loss": 0.1079, "step": 141460 }, { "epoch": 2.8797964376590333, "grad_norm": 6.030125344571097, "learning_rate": 4.5475614165539995e-06, "loss": 0.0561, "step": 141470 }, { "epoch": 2.88, "grad_norm": 29.049897376255483, "learning_rate": 4.546853767956677e-06, "loss": 0.152, "step": 141480 }, { "epoch": 2.880203562340967, "grad_norm": 0.04026491806032877, "learning_rate": 4.546146128511224e-06, "loss": 0.0447, "step": 141490 }, { "epoch": 2.880407124681934, "grad_norm": 0.3245705294479544, "learning_rate": 4.545438498231935e-06, "loss": 0.1511, "step": 141500 }, { "epoch": 2.8806106870229007, "grad_norm": 0.21435786123065634, "learning_rate": 4.544730877133102e-06, "loss": 0.1601, "step": 141510 }, { "epoch": 2.8808142493638678, "grad_norm": 4.9182505121925875, "learning_rate": 4.544023265229013e-06, "loss": 0.072, "step": 141520 }, { "epoch": 2.881017811704835, "grad_norm": 12.402870717956127, "learning_rate": 4.543315662533963e-06, "loss": 0.1356, "step": 141530 }, { "epoch": 2.8812213740458015, "grad_norm": 0.06835375597366197, "learning_rate": 4.542608069062242e-06, "loss": 0.145, "step": 141540 }, { "epoch": 2.8814249363867686, "grad_norm": 9.00216708797878, "learning_rate": 4.541900484828139e-06, "loss": 0.1218, "step": 141550 }, { "epoch": 2.8816284987277356, "grad_norm": 0.02264383632439029, "learning_rate": 4.541192909845947e-06, "loss": 0.0954, "step": 141560 }, { "epoch": 2.8818320610687023, "grad_norm": 32.31589306595943, "learning_rate": 4.5404853441299545e-06, "loss": 0.0628, "step": 141570 }, { "epoch": 2.8820356234096693, "grad_norm": 9.886795293652677, "learning_rate": 4.539777787694451e-06, "loss": 0.0737, "step": 141580 }, { "epoch": 2.882239185750636, "grad_norm": 0.17381722318008264, "learning_rate": 4.53907024055373e-06, "loss": 0.0826, "step": 141590 }, { "epoch": 2.882442748091603, "grad_norm": 9.813741084151356, "learning_rate": 4.538362702722079e-06, "loss": 0.1633, "step": 141600 }, { "epoch": 2.88264631043257, "grad_norm": 0.06155021582932966, "learning_rate": 4.537655174213784e-06, "loss": 0.0651, "step": 141610 }, { "epoch": 2.8828498727735368, "grad_norm": 0.09576389418048532, "learning_rate": 4.536947655043144e-06, "loss": 0.0469, "step": 141620 }, { "epoch": 2.883053435114504, "grad_norm": 0.6559677307548661, "learning_rate": 4.5362401452244384e-06, "loss": 0.0599, "step": 141630 }, { "epoch": 2.8832569974554705, "grad_norm": 0.5536753357901777, "learning_rate": 4.535532644771963e-06, "loss": 0.2184, "step": 141640 }, { "epoch": 2.8834605597964376, "grad_norm": 19.034014516560255, "learning_rate": 4.534825153700003e-06, "loss": 0.1335, "step": 141650 }, { "epoch": 2.8836641221374046, "grad_norm": 0.06064423028965289, "learning_rate": 4.534117672022846e-06, "loss": 0.1133, "step": 141660 }, { "epoch": 2.8838676844783713, "grad_norm": 0.14630003596259564, "learning_rate": 4.533410199754787e-06, "loss": 0.2027, "step": 141670 }, { "epoch": 2.8840712468193384, "grad_norm": 20.220428235968008, "learning_rate": 4.532702736910107e-06, "loss": 0.0759, "step": 141680 }, { "epoch": 2.8842748091603054, "grad_norm": 5.6999015956333166, "learning_rate": 4.531995283503096e-06, "loss": 0.0536, "step": 141690 }, { "epoch": 2.884478371501272, "grad_norm": 0.7407010384502337, "learning_rate": 4.531287839548046e-06, "loss": 0.0296, "step": 141700 }, { "epoch": 2.884681933842239, "grad_norm": 9.89163220854857, "learning_rate": 4.530580405059241e-06, "loss": 0.0825, "step": 141710 }, { "epoch": 2.884885496183206, "grad_norm": 24.555624475737, "learning_rate": 4.52987298005097e-06, "loss": 0.1276, "step": 141720 }, { "epoch": 2.885089058524173, "grad_norm": 1.6873336843599425, "learning_rate": 4.529165564537517e-06, "loss": 0.0915, "step": 141730 }, { "epoch": 2.88529262086514, "grad_norm": 17.0415747787258, "learning_rate": 4.528458158533173e-06, "loss": 0.1419, "step": 141740 }, { "epoch": 2.885496183206107, "grad_norm": 8.068471278519414, "learning_rate": 4.5277507620522255e-06, "loss": 0.1743, "step": 141750 }, { "epoch": 2.8856997455470736, "grad_norm": 0.022118299134657335, "learning_rate": 4.5270433751089584e-06, "loss": 0.1056, "step": 141760 }, { "epoch": 2.8859033078880407, "grad_norm": 0.03424896501950722, "learning_rate": 4.52633599771766e-06, "loss": 0.0924, "step": 141770 }, { "epoch": 2.886106870229008, "grad_norm": 1.3301683836460707, "learning_rate": 4.525628629892614e-06, "loss": 0.1158, "step": 141780 }, { "epoch": 2.8863104325699744, "grad_norm": 0.3724826631650524, "learning_rate": 4.524921271648111e-06, "loss": 0.153, "step": 141790 }, { "epoch": 2.8865139949109415, "grad_norm": 23.154588590488416, "learning_rate": 4.524213922998435e-06, "loss": 0.0911, "step": 141800 }, { "epoch": 2.8867175572519086, "grad_norm": 9.221526906552608, "learning_rate": 4.5235065839578696e-06, "loss": 0.0845, "step": 141810 }, { "epoch": 2.8869211195928752, "grad_norm": 23.55783143111246, "learning_rate": 4.522799254540704e-06, "loss": 0.1202, "step": 141820 }, { "epoch": 2.8871246819338423, "grad_norm": 0.4406275179526819, "learning_rate": 4.522091934761223e-06, "loss": 0.056, "step": 141830 }, { "epoch": 2.8873282442748094, "grad_norm": 17.71957860825001, "learning_rate": 4.521384624633708e-06, "loss": 0.1345, "step": 141840 }, { "epoch": 2.887531806615776, "grad_norm": 0.26992349971110097, "learning_rate": 4.52067732417245e-06, "loss": 0.0492, "step": 141850 }, { "epoch": 2.887735368956743, "grad_norm": 11.96914601348974, "learning_rate": 4.51997003339173e-06, "loss": 0.1644, "step": 141860 }, { "epoch": 2.88793893129771, "grad_norm": 24.934398685357554, "learning_rate": 4.519262752305831e-06, "loss": 0.2223, "step": 141870 }, { "epoch": 2.888142493638677, "grad_norm": 29.70073187691644, "learning_rate": 4.518555480929043e-06, "loss": 0.0978, "step": 141880 }, { "epoch": 2.888346055979644, "grad_norm": 7.736640111292047, "learning_rate": 4.5178482192756456e-06, "loss": 0.1157, "step": 141890 }, { "epoch": 2.8885496183206105, "grad_norm": 12.614174673286623, "learning_rate": 4.517140967359924e-06, "loss": 0.1326, "step": 141900 }, { "epoch": 2.8887531806615776, "grad_norm": 2.5473508569290293, "learning_rate": 4.516433725196165e-06, "loss": 0.0999, "step": 141910 }, { "epoch": 2.8889567430025447, "grad_norm": 21.57447015679719, "learning_rate": 4.515726492798647e-06, "loss": 0.1311, "step": 141920 }, { "epoch": 2.8891603053435113, "grad_norm": 0.06156264790683824, "learning_rate": 4.515019270181659e-06, "loss": 0.0703, "step": 141930 }, { "epoch": 2.8893638676844784, "grad_norm": 0.019758514630460054, "learning_rate": 4.514312057359479e-06, "loss": 0.1677, "step": 141940 }, { "epoch": 2.889567430025445, "grad_norm": 36.77835778362303, "learning_rate": 4.513604854346392e-06, "loss": 0.1816, "step": 141950 }, { "epoch": 2.889770992366412, "grad_norm": 6.238293638273615, "learning_rate": 4.5128976611566845e-06, "loss": 0.1158, "step": 141960 }, { "epoch": 2.889974554707379, "grad_norm": 0.019285855258123572, "learning_rate": 4.512190477804634e-06, "loss": 0.1254, "step": 141970 }, { "epoch": 2.890178117048346, "grad_norm": 2.7766820115982678, "learning_rate": 4.511483304304524e-06, "loss": 0.1398, "step": 141980 }, { "epoch": 2.890381679389313, "grad_norm": 17.398955556790465, "learning_rate": 4.51077614067064e-06, "loss": 0.1754, "step": 141990 }, { "epoch": 2.89058524173028, "grad_norm": 14.09814351199156, "learning_rate": 4.510068986917262e-06, "loss": 0.1664, "step": 142000 }, { "epoch": 2.8907888040712466, "grad_norm": 67.03666714301717, "learning_rate": 4.5093618430586724e-06, "loss": 0.0806, "step": 142010 }, { "epoch": 2.8909923664122137, "grad_norm": 8.99102228743074, "learning_rate": 4.508654709109149e-06, "loss": 0.0166, "step": 142020 }, { "epoch": 2.8911959287531808, "grad_norm": 14.119062122041043, "learning_rate": 4.507947585082979e-06, "loss": 0.1282, "step": 142030 }, { "epoch": 2.8913994910941474, "grad_norm": 1.3772171303029912, "learning_rate": 4.5072404709944415e-06, "loss": 0.1713, "step": 142040 }, { "epoch": 2.8916030534351145, "grad_norm": 0.18461662242755206, "learning_rate": 4.5065333668578175e-06, "loss": 0.0766, "step": 142050 }, { "epoch": 2.8918066157760816, "grad_norm": 51.66913151598113, "learning_rate": 4.505826272687388e-06, "loss": 0.1149, "step": 142060 }, { "epoch": 2.892010178117048, "grad_norm": 45.71471534069096, "learning_rate": 4.505119188497431e-06, "loss": 0.1393, "step": 142070 }, { "epoch": 2.8922137404580153, "grad_norm": 9.872566577370824, "learning_rate": 4.504412114302231e-06, "loss": 0.1523, "step": 142080 }, { "epoch": 2.8924173027989823, "grad_norm": 0.17240850848652367, "learning_rate": 4.503705050116067e-06, "loss": 0.0824, "step": 142090 }, { "epoch": 2.892620865139949, "grad_norm": 8.559800860884774, "learning_rate": 4.502997995953217e-06, "loss": 0.204, "step": 142100 }, { "epoch": 2.892824427480916, "grad_norm": 24.086154519424298, "learning_rate": 4.502290951827964e-06, "loss": 0.1529, "step": 142110 }, { "epoch": 2.893027989821883, "grad_norm": 0.4305113996697769, "learning_rate": 4.501583917754587e-06, "loss": 0.1124, "step": 142120 }, { "epoch": 2.8932315521628498, "grad_norm": 0.053538519567615274, "learning_rate": 4.500876893747363e-06, "loss": 0.1337, "step": 142130 }, { "epoch": 2.893435114503817, "grad_norm": 5.656574160688703, "learning_rate": 4.500169879820574e-06, "loss": 0.094, "step": 142140 }, { "epoch": 2.893638676844784, "grad_norm": 0.38862440356264033, "learning_rate": 4.4994628759884984e-06, "loss": 0.0873, "step": 142150 }, { "epoch": 2.8938422391857506, "grad_norm": 10.214731871623965, "learning_rate": 4.498755882265413e-06, "loss": 0.1735, "step": 142160 }, { "epoch": 2.8940458015267176, "grad_norm": 36.00338350036076, "learning_rate": 4.498048898665602e-06, "loss": 0.2614, "step": 142170 }, { "epoch": 2.8942493638676847, "grad_norm": 17.115770355357693, "learning_rate": 4.497341925203336e-06, "loss": 0.1367, "step": 142180 }, { "epoch": 2.8944529262086514, "grad_norm": 15.271923252235686, "learning_rate": 4.4966349618928985e-06, "loss": 0.1511, "step": 142190 }, { "epoch": 2.8946564885496184, "grad_norm": 0.21996360179924426, "learning_rate": 4.4959280087485675e-06, "loss": 0.087, "step": 142200 }, { "epoch": 2.8948600508905855, "grad_norm": 4.6520756821456, "learning_rate": 4.495221065784617e-06, "loss": 0.1025, "step": 142210 }, { "epoch": 2.895063613231552, "grad_norm": 4.04016958467655, "learning_rate": 4.49451413301533e-06, "loss": 0.1559, "step": 142220 }, { "epoch": 2.8952671755725192, "grad_norm": 0.10083998616986231, "learning_rate": 4.493807210454979e-06, "loss": 0.0539, "step": 142230 }, { "epoch": 2.895470737913486, "grad_norm": 15.168671457182203, "learning_rate": 4.493100298117844e-06, "loss": 0.0515, "step": 142240 }, { "epoch": 2.895674300254453, "grad_norm": 0.12260231930671812, "learning_rate": 4.492393396018202e-06, "loss": 0.0593, "step": 142250 }, { "epoch": 2.8958778625954196, "grad_norm": 15.869983698282361, "learning_rate": 4.491686504170329e-06, "loss": 0.1295, "step": 142260 }, { "epoch": 2.8960814249363867, "grad_norm": 24.7572042508421, "learning_rate": 4.4909796225885e-06, "loss": 0.178, "step": 142270 }, { "epoch": 2.8962849872773537, "grad_norm": 7.9047384024377285, "learning_rate": 4.490272751286996e-06, "loss": 0.122, "step": 142280 }, { "epoch": 2.8964885496183204, "grad_norm": 17.878501924308978, "learning_rate": 4.489565890280089e-06, "loss": 0.2431, "step": 142290 }, { "epoch": 2.8966921119592874, "grad_norm": 9.927897058909007, "learning_rate": 4.488859039582057e-06, "loss": 0.0915, "step": 142300 }, { "epoch": 2.8968956743002545, "grad_norm": 14.63589701750454, "learning_rate": 4.488152199207173e-06, "loss": 0.2103, "step": 142310 }, { "epoch": 2.897099236641221, "grad_norm": 0.08252325675213698, "learning_rate": 4.487445369169716e-06, "loss": 0.0293, "step": 142320 }, { "epoch": 2.8973027989821882, "grad_norm": 51.81722027577836, "learning_rate": 4.4867385494839605e-06, "loss": 0.0971, "step": 142330 }, { "epoch": 2.8975063613231553, "grad_norm": 20.036101384805544, "learning_rate": 4.48603174016418e-06, "loss": 0.1377, "step": 142340 }, { "epoch": 2.897709923664122, "grad_norm": 0.11091664163464222, "learning_rate": 4.48532494122465e-06, "loss": 0.1163, "step": 142350 }, { "epoch": 2.897913486005089, "grad_norm": 2.942356447868103, "learning_rate": 4.484618152679647e-06, "loss": 0.0647, "step": 142360 }, { "epoch": 2.898117048346056, "grad_norm": 0.2254872364664791, "learning_rate": 4.483911374543444e-06, "loss": 0.1209, "step": 142370 }, { "epoch": 2.8983206106870227, "grad_norm": 0.07237782049357296, "learning_rate": 4.4832046068303164e-06, "loss": 0.0922, "step": 142380 }, { "epoch": 2.89852417302799, "grad_norm": 10.295154339061193, "learning_rate": 4.482497849554535e-06, "loss": 0.1609, "step": 142390 }, { "epoch": 2.898727735368957, "grad_norm": 0.030329785632296297, "learning_rate": 4.481791102730377e-06, "loss": 0.0763, "step": 142400 }, { "epoch": 2.8989312977099235, "grad_norm": 0.4086154553335929, "learning_rate": 4.481084366372116e-06, "loss": 0.118, "step": 142410 }, { "epoch": 2.8991348600508906, "grad_norm": 4.3913777246143075, "learning_rate": 4.480377640494023e-06, "loss": 0.0384, "step": 142420 }, { "epoch": 2.8993384223918577, "grad_norm": 0.8107975062285225, "learning_rate": 4.479670925110375e-06, "loss": 0.0726, "step": 142430 }, { "epoch": 2.8995419847328243, "grad_norm": 0.017890483970091648, "learning_rate": 4.478964220235442e-06, "loss": 0.1408, "step": 142440 }, { "epoch": 2.8997455470737914, "grad_norm": 16.437150071005053, "learning_rate": 4.478257525883496e-06, "loss": 0.1893, "step": 142450 }, { "epoch": 2.8999491094147585, "grad_norm": 26.23307754768736, "learning_rate": 4.477550842068815e-06, "loss": 0.0875, "step": 142460 }, { "epoch": 2.900152671755725, "grad_norm": 0.653719602834152, "learning_rate": 4.476844168805664e-06, "loss": 0.0879, "step": 142470 }, { "epoch": 2.900356234096692, "grad_norm": 6.28497909646319, "learning_rate": 4.47613750610832e-06, "loss": 0.0341, "step": 142480 }, { "epoch": 2.9005597964376593, "grad_norm": 0.31043894500691943, "learning_rate": 4.475430853991055e-06, "loss": 0.0674, "step": 142490 }, { "epoch": 2.900763358778626, "grad_norm": 3.6881129962386643, "learning_rate": 4.474724212468137e-06, "loss": 0.1233, "step": 142500 }, { "epoch": 2.900966921119593, "grad_norm": 5.043420535228327, "learning_rate": 4.474017581553843e-06, "loss": 0.1496, "step": 142510 }, { "epoch": 2.90117048346056, "grad_norm": 3.764005789615168, "learning_rate": 4.473310961262439e-06, "loss": 0.202, "step": 142520 }, { "epoch": 2.9013740458015267, "grad_norm": 0.1790055494451635, "learning_rate": 4.472604351608199e-06, "loss": 0.1021, "step": 142530 }, { "epoch": 2.9015776081424938, "grad_norm": 8.841034028926723, "learning_rate": 4.471897752605395e-06, "loss": 0.1056, "step": 142540 }, { "epoch": 2.9017811704834604, "grad_norm": 0.04063073582999861, "learning_rate": 4.4711911642682945e-06, "loss": 0.1596, "step": 142550 }, { "epoch": 2.9019847328244275, "grad_norm": 32.32852384854634, "learning_rate": 4.470484586611169e-06, "loss": 0.1232, "step": 142560 }, { "epoch": 2.9021882951653946, "grad_norm": 0.05625077557631386, "learning_rate": 4.469778019648291e-06, "loss": 0.1265, "step": 142570 }, { "epoch": 2.902391857506361, "grad_norm": 22.541589423300564, "learning_rate": 4.469071463393928e-06, "loss": 0.1182, "step": 142580 }, { "epoch": 2.9025954198473283, "grad_norm": 0.24304649316778185, "learning_rate": 4.4683649178623515e-06, "loss": 0.0837, "step": 142590 }, { "epoch": 2.902798982188295, "grad_norm": 4.740595775951905, "learning_rate": 4.4676583830678295e-06, "loss": 0.1046, "step": 142600 }, { "epoch": 2.903002544529262, "grad_norm": 0.3838146386917114, "learning_rate": 4.466951859024631e-06, "loss": 0.0714, "step": 142610 }, { "epoch": 2.903206106870229, "grad_norm": 33.962315286163324, "learning_rate": 4.466245345747028e-06, "loss": 0.0631, "step": 142620 }, { "epoch": 2.9034096692111957, "grad_norm": 17.20273312237409, "learning_rate": 4.465538843249287e-06, "loss": 0.0911, "step": 142630 }, { "epoch": 2.9036132315521628, "grad_norm": 0.09146684003945967, "learning_rate": 4.464832351545676e-06, "loss": 0.1761, "step": 142640 }, { "epoch": 2.90381679389313, "grad_norm": 0.5557653801553107, "learning_rate": 4.464125870650467e-06, "loss": 0.2148, "step": 142650 }, { "epoch": 2.9040203562340965, "grad_norm": 0.3317404104677885, "learning_rate": 4.463419400577926e-06, "loss": 0.0602, "step": 142660 }, { "epoch": 2.9042239185750636, "grad_norm": 2.9899544890824568, "learning_rate": 4.462712941342322e-06, "loss": 0.0384, "step": 142670 }, { "epoch": 2.9044274809160306, "grad_norm": 0.07056963294651539, "learning_rate": 4.46200649295792e-06, "loss": 0.0866, "step": 142680 }, { "epoch": 2.9046310432569973, "grad_norm": 0.042291260840485775, "learning_rate": 4.461300055438991e-06, "loss": 0.0558, "step": 142690 }, { "epoch": 2.9048346055979644, "grad_norm": 25.615556076883372, "learning_rate": 4.460593628799802e-06, "loss": 0.0725, "step": 142700 }, { "epoch": 2.9050381679389314, "grad_norm": 0.04773561983821903, "learning_rate": 4.459887213054617e-06, "loss": 0.0842, "step": 142710 }, { "epoch": 2.905241730279898, "grad_norm": 5.454693096799988, "learning_rate": 4.459180808217707e-06, "loss": 0.0814, "step": 142720 }, { "epoch": 2.905445292620865, "grad_norm": 7.852212345227892, "learning_rate": 4.458474414303338e-06, "loss": 0.0808, "step": 142730 }, { "epoch": 2.9056488549618322, "grad_norm": 14.850523543934694, "learning_rate": 4.4577680313257745e-06, "loss": 0.1436, "step": 142740 }, { "epoch": 2.905852417302799, "grad_norm": 0.03488574333444763, "learning_rate": 4.4570616592992855e-06, "loss": 0.1046, "step": 142750 }, { "epoch": 2.906055979643766, "grad_norm": 9.463585406847683, "learning_rate": 4.456355298238133e-06, "loss": 0.114, "step": 142760 }, { "epoch": 2.906259541984733, "grad_norm": 1.2050355760917184, "learning_rate": 4.455648948156587e-06, "loss": 0.1689, "step": 142770 }, { "epoch": 2.9064631043256997, "grad_norm": 0.021014151784332108, "learning_rate": 4.454942609068912e-06, "loss": 0.0795, "step": 142780 }, { "epoch": 2.9066666666666667, "grad_norm": 0.1360935157561328, "learning_rate": 4.454236280989372e-06, "loss": 0.234, "step": 142790 }, { "epoch": 2.906870229007634, "grad_norm": 0.043752973431605585, "learning_rate": 4.4535299639322345e-06, "loss": 0.0972, "step": 142800 }, { "epoch": 2.9070737913486004, "grad_norm": 15.652284297792693, "learning_rate": 4.452823657911762e-06, "loss": 0.1255, "step": 142810 }, { "epoch": 2.9072773536895675, "grad_norm": 0.2370386031126376, "learning_rate": 4.45211736294222e-06, "loss": 0.1344, "step": 142820 }, { "epoch": 2.9074809160305346, "grad_norm": 30.88730974558582, "learning_rate": 4.451411079037875e-06, "loss": 0.1179, "step": 142830 }, { "epoch": 2.9076844783715012, "grad_norm": 14.581199433239842, "learning_rate": 4.45070480621299e-06, "loss": 0.0671, "step": 142840 }, { "epoch": 2.9078880407124683, "grad_norm": 10.663642814156846, "learning_rate": 4.449998544481827e-06, "loss": 0.0991, "step": 142850 }, { "epoch": 2.908091603053435, "grad_norm": 0.2472800666272691, "learning_rate": 4.449292293858653e-06, "loss": 0.0953, "step": 142860 }, { "epoch": 2.908295165394402, "grad_norm": 36.61002258941975, "learning_rate": 4.4485860543577305e-06, "loss": 0.1809, "step": 142870 }, { "epoch": 2.908498727735369, "grad_norm": 5.348401437745446, "learning_rate": 4.447879825993324e-06, "loss": 0.1644, "step": 142880 }, { "epoch": 2.9087022900763357, "grad_norm": 1.2220789740109048, "learning_rate": 4.447173608779695e-06, "loss": 0.1398, "step": 142890 }, { "epoch": 2.908905852417303, "grad_norm": 12.717205894179692, "learning_rate": 4.446467402731105e-06, "loss": 0.2356, "step": 142900 }, { "epoch": 2.9091094147582695, "grad_norm": 8.624039794800447, "learning_rate": 4.445761207861821e-06, "loss": 0.1576, "step": 142910 }, { "epoch": 2.9093129770992365, "grad_norm": 6.647920185631221, "learning_rate": 4.445055024186102e-06, "loss": 0.2244, "step": 142920 }, { "epoch": 2.9095165394402036, "grad_norm": 18.422806773345204, "learning_rate": 4.444348851718211e-06, "loss": 0.0931, "step": 142930 }, { "epoch": 2.9097201017811702, "grad_norm": 0.25713980737739256, "learning_rate": 4.443642690472412e-06, "loss": 0.0926, "step": 142940 }, { "epoch": 2.9099236641221373, "grad_norm": 0.12240866692513658, "learning_rate": 4.442936540462966e-06, "loss": 0.1424, "step": 142950 }, { "epoch": 2.9101272264631044, "grad_norm": 7.119985356979711, "learning_rate": 4.4422304017041335e-06, "loss": 0.0776, "step": 142960 }, { "epoch": 2.910330788804071, "grad_norm": 3.5247075314762473, "learning_rate": 4.441524274210175e-06, "loss": 0.1526, "step": 142970 }, { "epoch": 2.910534351145038, "grad_norm": 0.17023899722677535, "learning_rate": 4.440818157995354e-06, "loss": 0.0343, "step": 142980 }, { "epoch": 2.910737913486005, "grad_norm": 6.185133216256722, "learning_rate": 4.440112053073931e-06, "loss": 0.1242, "step": 142990 }, { "epoch": 2.910941475826972, "grad_norm": 1.7893375236622782, "learning_rate": 4.4394059594601645e-06, "loss": 0.0989, "step": 143000 }, { "epoch": 2.911145038167939, "grad_norm": 25.74601804567278, "learning_rate": 4.438699877168318e-06, "loss": 0.2996, "step": 143010 }, { "epoch": 2.911348600508906, "grad_norm": 9.269234563026579, "learning_rate": 4.437993806212651e-06, "loss": 0.1021, "step": 143020 }, { "epoch": 2.9115521628498726, "grad_norm": 19.203593938712963, "learning_rate": 4.437287746607423e-06, "loss": 0.0765, "step": 143030 }, { "epoch": 2.9117557251908397, "grad_norm": 11.296353201264727, "learning_rate": 4.4365816983668935e-06, "loss": 0.1081, "step": 143040 }, { "epoch": 2.9119592875318068, "grad_norm": 14.559831443709504, "learning_rate": 4.435875661505321e-06, "loss": 0.1016, "step": 143050 }, { "epoch": 2.9121628498727734, "grad_norm": 14.74382152215489, "learning_rate": 4.435169636036967e-06, "loss": 0.1245, "step": 143060 }, { "epoch": 2.9123664122137405, "grad_norm": 36.89996181358015, "learning_rate": 4.434463621976091e-06, "loss": 0.123, "step": 143070 }, { "epoch": 2.9125699745547076, "grad_norm": 0.30381880987598836, "learning_rate": 4.4337576193369475e-06, "loss": 0.1397, "step": 143080 }, { "epoch": 2.912773536895674, "grad_norm": 31.50947260778872, "learning_rate": 4.4330516281338e-06, "loss": 0.1147, "step": 143090 }, { "epoch": 2.9129770992366413, "grad_norm": 11.144574821630167, "learning_rate": 4.432345648380906e-06, "loss": 0.1465, "step": 143100 }, { "epoch": 2.9131806615776084, "grad_norm": 13.946511586294571, "learning_rate": 4.4316396800925206e-06, "loss": 0.1646, "step": 143110 }, { "epoch": 2.913384223918575, "grad_norm": 0.16933771144790857, "learning_rate": 4.4309337232829054e-06, "loss": 0.1406, "step": 143120 }, { "epoch": 2.913587786259542, "grad_norm": 13.557758606565908, "learning_rate": 4.430227777966317e-06, "loss": 0.1935, "step": 143130 }, { "epoch": 2.913791348600509, "grad_norm": 33.234337508450956, "learning_rate": 4.42952184415701e-06, "loss": 0.2025, "step": 143140 }, { "epoch": 2.913994910941476, "grad_norm": 0.03784218414365366, "learning_rate": 4.4288159218692476e-06, "loss": 0.1102, "step": 143150 }, { "epoch": 2.914198473282443, "grad_norm": 5.442988058615996, "learning_rate": 4.428110011117281e-06, "loss": 0.0502, "step": 143160 }, { "epoch": 2.91440203562341, "grad_norm": 1.6536112458812622, "learning_rate": 4.427404111915371e-06, "loss": 0.146, "step": 143170 }, { "epoch": 2.9146055979643766, "grad_norm": 13.56953327604796, "learning_rate": 4.4266982242777716e-06, "loss": 0.0853, "step": 143180 }, { "epoch": 2.9148091603053436, "grad_norm": 12.340805204906555, "learning_rate": 4.425992348218738e-06, "loss": 0.1659, "step": 143190 }, { "epoch": 2.9150127226463103, "grad_norm": 4.318805387373473, "learning_rate": 4.425286483752531e-06, "loss": 0.0515, "step": 143200 }, { "epoch": 2.9152162849872774, "grad_norm": 1.189684348405561, "learning_rate": 4.424580630893402e-06, "loss": 0.0904, "step": 143210 }, { "epoch": 2.9154198473282444, "grad_norm": 26.586206394270164, "learning_rate": 4.423874789655609e-06, "loss": 0.1546, "step": 143220 }, { "epoch": 2.915623409669211, "grad_norm": 48.323801485931625, "learning_rate": 4.4231689600534075e-06, "loss": 0.1947, "step": 143230 }, { "epoch": 2.915826972010178, "grad_norm": 40.91182016196872, "learning_rate": 4.422463142101051e-06, "loss": 0.0795, "step": 143240 }, { "epoch": 2.916030534351145, "grad_norm": 0.7477870531790798, "learning_rate": 4.4217573358127955e-06, "loss": 0.1761, "step": 143250 }, { "epoch": 2.916234096692112, "grad_norm": 6.129953192495885, "learning_rate": 4.421051541202893e-06, "loss": 0.1427, "step": 143260 }, { "epoch": 2.916437659033079, "grad_norm": 33.58163948093573, "learning_rate": 4.420345758285603e-06, "loss": 0.0715, "step": 143270 }, { "epoch": 2.9166412213740456, "grad_norm": 0.10818312590711711, "learning_rate": 4.419639987075178e-06, "loss": 0.1385, "step": 143280 }, { "epoch": 2.9168447837150127, "grad_norm": 8.864417451570151, "learning_rate": 4.418934227585869e-06, "loss": 0.0434, "step": 143290 }, { "epoch": 2.9170483460559797, "grad_norm": 3.80630030077553, "learning_rate": 4.418228479831931e-06, "loss": 0.1752, "step": 143300 }, { "epoch": 2.9172519083969464, "grad_norm": 0.05988372720368226, "learning_rate": 4.4175227438276195e-06, "loss": 0.1166, "step": 143310 }, { "epoch": 2.9174554707379134, "grad_norm": 2.0555001777613073, "learning_rate": 4.416817019587186e-06, "loss": 0.1711, "step": 143320 }, { "epoch": 2.9176590330788805, "grad_norm": 60.66632805580509, "learning_rate": 4.416111307124885e-06, "loss": 0.08, "step": 143330 }, { "epoch": 2.917862595419847, "grad_norm": 1.620140152355163, "learning_rate": 4.415405606454966e-06, "loss": 0.0574, "step": 143340 }, { "epoch": 2.9180661577608142, "grad_norm": 0.08041585059799031, "learning_rate": 4.4146999175916846e-06, "loss": 0.0754, "step": 143350 }, { "epoch": 2.9182697201017813, "grad_norm": 5.8353737760396855, "learning_rate": 4.413994240549294e-06, "loss": 0.0923, "step": 143360 }, { "epoch": 2.918473282442748, "grad_norm": 14.281489033343803, "learning_rate": 4.4132885753420416e-06, "loss": 0.0928, "step": 143370 }, { "epoch": 2.918676844783715, "grad_norm": 17.182701135756577, "learning_rate": 4.412582921984184e-06, "loss": 0.2233, "step": 143380 }, { "epoch": 2.918880407124682, "grad_norm": 5.517895471831733, "learning_rate": 4.411877280489971e-06, "loss": 0.177, "step": 143390 }, { "epoch": 2.9190839694656487, "grad_norm": 0.8571044080017199, "learning_rate": 4.4111716508736515e-06, "loss": 0.0542, "step": 143400 }, { "epoch": 2.919287531806616, "grad_norm": 15.309172118064714, "learning_rate": 4.410466033149482e-06, "loss": 0.1057, "step": 143410 }, { "epoch": 2.919491094147583, "grad_norm": 19.505432903064133, "learning_rate": 4.409760427331709e-06, "loss": 0.1135, "step": 143420 }, { "epoch": 2.9196946564885495, "grad_norm": 16.4934854484972, "learning_rate": 4.409054833434583e-06, "loss": 0.0874, "step": 143430 }, { "epoch": 2.9198982188295166, "grad_norm": 1.006256032390033, "learning_rate": 4.408349251472359e-06, "loss": 0.0671, "step": 143440 }, { "epoch": 2.9201017811704837, "grad_norm": 10.560520026384062, "learning_rate": 4.407643681459281e-06, "loss": 0.1603, "step": 143450 }, { "epoch": 2.9203053435114503, "grad_norm": 0.026889548257530337, "learning_rate": 4.406938123409603e-06, "loss": 0.0602, "step": 143460 }, { "epoch": 2.9205089058524174, "grad_norm": 26.54105355327703, "learning_rate": 4.406232577337573e-06, "loss": 0.0741, "step": 143470 }, { "epoch": 2.9207124681933845, "grad_norm": 0.1096240199647791, "learning_rate": 4.405527043257441e-06, "loss": 0.2525, "step": 143480 }, { "epoch": 2.920916030534351, "grad_norm": 0.508741519874512, "learning_rate": 4.4048215211834565e-06, "loss": 0.1599, "step": 143490 }, { "epoch": 2.921119592875318, "grad_norm": 19.501116082294132, "learning_rate": 4.404116011129868e-06, "loss": 0.1195, "step": 143500 }, { "epoch": 2.921323155216285, "grad_norm": 0.035786903672194906, "learning_rate": 4.403410513110922e-06, "loss": 0.0212, "step": 143510 }, { "epoch": 2.921526717557252, "grad_norm": 1.5209924607680523, "learning_rate": 4.4027050271408725e-06, "loss": 0.1974, "step": 143520 }, { "epoch": 2.921730279898219, "grad_norm": 0.03049829796521834, "learning_rate": 4.401999553233962e-06, "loss": 0.1565, "step": 143530 }, { "epoch": 2.9219338422391856, "grad_norm": 15.79925449715862, "learning_rate": 4.4012940914044425e-06, "loss": 0.0932, "step": 143540 }, { "epoch": 2.9221374045801527, "grad_norm": 11.386056738596256, "learning_rate": 4.400588641666558e-06, "loss": 0.0635, "step": 143550 }, { "epoch": 2.9223409669211193, "grad_norm": 0.2652700680012499, "learning_rate": 4.399883204034559e-06, "loss": 0.102, "step": 143560 }, { "epoch": 2.9225445292620864, "grad_norm": 0.1538235197304927, "learning_rate": 4.399177778522692e-06, "loss": 0.0676, "step": 143570 }, { "epoch": 2.9227480916030535, "grad_norm": 10.887186083167538, "learning_rate": 4.3984723651452025e-06, "loss": 0.1301, "step": 143580 }, { "epoch": 2.92295165394402, "grad_norm": 0.04716822929953493, "learning_rate": 4.397766963916338e-06, "loss": 0.0212, "step": 143590 }, { "epoch": 2.923155216284987, "grad_norm": 0.6284364353927466, "learning_rate": 4.397061574850347e-06, "loss": 0.1335, "step": 143600 }, { "epoch": 2.9233587786259543, "grad_norm": 7.465066727012518, "learning_rate": 4.396356197961473e-06, "loss": 0.057, "step": 143610 }, { "epoch": 2.923562340966921, "grad_norm": 8.133906867764626, "learning_rate": 4.395650833263965e-06, "loss": 0.1017, "step": 143620 }, { "epoch": 2.923765903307888, "grad_norm": 24.745310785242538, "learning_rate": 4.394945480772064e-06, "loss": 0.2618, "step": 143630 }, { "epoch": 2.923969465648855, "grad_norm": 6.990516719901774, "learning_rate": 4.394240140500019e-06, "loss": 0.056, "step": 143640 }, { "epoch": 2.9241730279898217, "grad_norm": 12.325511311416044, "learning_rate": 4.393534812462076e-06, "loss": 0.1479, "step": 143650 }, { "epoch": 2.924376590330789, "grad_norm": 5.917200984429946, "learning_rate": 4.392829496672476e-06, "loss": 0.0964, "step": 143660 }, { "epoch": 2.924580152671756, "grad_norm": 14.948160440330748, "learning_rate": 4.392124193145468e-06, "loss": 0.0836, "step": 143670 }, { "epoch": 2.9247837150127225, "grad_norm": 21.525417934459426, "learning_rate": 4.391418901895296e-06, "loss": 0.1642, "step": 143680 }, { "epoch": 2.9249872773536896, "grad_norm": 0.15337336535055676, "learning_rate": 4.390713622936201e-06, "loss": 0.1125, "step": 143690 }, { "epoch": 2.9251908396946567, "grad_norm": 2.57854838636426, "learning_rate": 4.390008356282432e-06, "loss": 0.0555, "step": 143700 }, { "epoch": 2.9253944020356233, "grad_norm": 0.779993203301217, "learning_rate": 4.389303101948227e-06, "loss": 0.0477, "step": 143710 }, { "epoch": 2.9255979643765904, "grad_norm": 1.0206071078770278, "learning_rate": 4.388597859947833e-06, "loss": 0.1031, "step": 143720 }, { "epoch": 2.9258015267175574, "grad_norm": 12.03063300504663, "learning_rate": 4.387892630295494e-06, "loss": 0.1269, "step": 143730 }, { "epoch": 2.926005089058524, "grad_norm": 0.2763209834440225, "learning_rate": 4.38718741300545e-06, "loss": 0.067, "step": 143740 }, { "epoch": 2.926208651399491, "grad_norm": 1.291515096508058, "learning_rate": 4.386482208091946e-06, "loss": 0.125, "step": 143750 }, { "epoch": 2.9264122137404582, "grad_norm": 1.865656699618537, "learning_rate": 4.385777015569225e-06, "loss": 0.0695, "step": 143760 }, { "epoch": 2.926615776081425, "grad_norm": 0.1990132500240913, "learning_rate": 4.385071835451526e-06, "loss": 0.0776, "step": 143770 }, { "epoch": 2.926819338422392, "grad_norm": 3.7560761422168945, "learning_rate": 4.384366667753096e-06, "loss": 0.1306, "step": 143780 }, { "epoch": 2.927022900763359, "grad_norm": 0.04199451277610071, "learning_rate": 4.383661512488174e-06, "loss": 0.158, "step": 143790 }, { "epoch": 2.9272264631043257, "grad_norm": 0.28592749710376764, "learning_rate": 4.382956369670999e-06, "loss": 0.0613, "step": 143800 }, { "epoch": 2.9274300254452927, "grad_norm": 12.346312175264273, "learning_rate": 4.382251239315817e-06, "loss": 0.0589, "step": 143810 }, { "epoch": 2.92763358778626, "grad_norm": 21.770774172315168, "learning_rate": 4.381546121436867e-06, "loss": 0.0784, "step": 143820 }, { "epoch": 2.9278371501272265, "grad_norm": 5.607863413014641, "learning_rate": 4.3808410160483895e-06, "loss": 0.2461, "step": 143830 }, { "epoch": 2.9280407124681935, "grad_norm": 15.272144943341887, "learning_rate": 4.380135923164623e-06, "loss": 0.117, "step": 143840 }, { "epoch": 2.92824427480916, "grad_norm": 1.5213246433288663, "learning_rate": 4.379430842799812e-06, "loss": 0.1567, "step": 143850 }, { "epoch": 2.9284478371501272, "grad_norm": 0.09378965012944974, "learning_rate": 4.378725774968195e-06, "loss": 0.1057, "step": 143860 }, { "epoch": 2.928651399491094, "grad_norm": 0.04321426639876732, "learning_rate": 4.378020719684009e-06, "loss": 0.0383, "step": 143870 }, { "epoch": 2.928854961832061, "grad_norm": 25.960494516853352, "learning_rate": 4.377315676961497e-06, "loss": 0.0875, "step": 143880 }, { "epoch": 2.929058524173028, "grad_norm": 9.78107504309172, "learning_rate": 4.376610646814897e-06, "loss": 0.0778, "step": 143890 }, { "epoch": 2.9292620865139947, "grad_norm": 0.7905185055874644, "learning_rate": 4.375905629258448e-06, "loss": 0.0802, "step": 143900 }, { "epoch": 2.9294656488549617, "grad_norm": 0.5779423690871726, "learning_rate": 4.3752006243063885e-06, "loss": 0.1393, "step": 143910 }, { "epoch": 2.929669211195929, "grad_norm": 15.312346119921434, "learning_rate": 4.3744956319729555e-06, "loss": 0.156, "step": 143920 }, { "epoch": 2.9298727735368955, "grad_norm": 49.39295642593981, "learning_rate": 4.3737906522723905e-06, "loss": 0.1045, "step": 143930 }, { "epoch": 2.9300763358778625, "grad_norm": 0.02756593925122995, "learning_rate": 4.373085685218931e-06, "loss": 0.1207, "step": 143940 }, { "epoch": 2.9302798982188296, "grad_norm": 26.423060854047964, "learning_rate": 4.372380730826811e-06, "loss": 0.0634, "step": 143950 }, { "epoch": 2.9304834605597962, "grad_norm": 3.6586430525088307, "learning_rate": 4.371675789110271e-06, "loss": 0.1039, "step": 143960 }, { "epoch": 2.9306870229007633, "grad_norm": 0.07857709650860584, "learning_rate": 4.3709708600835495e-06, "loss": 0.074, "step": 143970 }, { "epoch": 2.9308905852417304, "grad_norm": 0.2488769790190278, "learning_rate": 4.37026594376088e-06, "loss": 0.1059, "step": 143980 }, { "epoch": 2.931094147582697, "grad_norm": 8.097300830866361, "learning_rate": 4.369561040156503e-06, "loss": 0.1932, "step": 143990 }, { "epoch": 2.931297709923664, "grad_norm": 13.420117729731846, "learning_rate": 4.3688561492846495e-06, "loss": 0.0633, "step": 144000 }, { "epoch": 2.931501272264631, "grad_norm": 0.6215042153954831, "learning_rate": 4.368151271159561e-06, "loss": 0.0495, "step": 144010 }, { "epoch": 2.931704834605598, "grad_norm": 30.215052389454964, "learning_rate": 4.367446405795472e-06, "loss": 0.1538, "step": 144020 }, { "epoch": 2.931908396946565, "grad_norm": 1.7352617137750506, "learning_rate": 4.366741553206615e-06, "loss": 0.1135, "step": 144030 }, { "epoch": 2.932111959287532, "grad_norm": 0.0671232483297762, "learning_rate": 4.36603671340723e-06, "loss": 0.2162, "step": 144040 }, { "epoch": 2.9323155216284986, "grad_norm": 9.274455136177174, "learning_rate": 4.36533188641155e-06, "loss": 0.0715, "step": 144050 }, { "epoch": 2.9325190839694657, "grad_norm": 0.031258509029815876, "learning_rate": 4.3646270722338095e-06, "loss": 0.0636, "step": 144060 }, { "epoch": 2.932722646310433, "grad_norm": 15.556124377986443, "learning_rate": 4.363922270888245e-06, "loss": 0.0915, "step": 144070 }, { "epoch": 2.9329262086513994, "grad_norm": 1.0468390260852105, "learning_rate": 4.363217482389087e-06, "loss": 0.0538, "step": 144080 }, { "epoch": 2.9331297709923665, "grad_norm": 7.458482882461128, "learning_rate": 4.362512706750572e-06, "loss": 0.097, "step": 144090 }, { "epoch": 2.9333333333333336, "grad_norm": 11.405926347898415, "learning_rate": 4.361807943986937e-06, "loss": 0.1282, "step": 144100 }, { "epoch": 2.9335368956743, "grad_norm": 0.0203074136058368, "learning_rate": 4.361103194112411e-06, "loss": 0.1492, "step": 144110 }, { "epoch": 2.9337404580152673, "grad_norm": 5.213630745320812, "learning_rate": 4.360398457141227e-06, "loss": 0.1037, "step": 144120 }, { "epoch": 2.9339440203562344, "grad_norm": 2.7617133729969177, "learning_rate": 4.359693733087625e-06, "loss": 0.1137, "step": 144130 }, { "epoch": 2.934147582697201, "grad_norm": 10.652683122923815, "learning_rate": 4.358989021965828e-06, "loss": 0.1188, "step": 144140 }, { "epoch": 2.934351145038168, "grad_norm": 7.544903078276911, "learning_rate": 4.358284323790075e-06, "loss": 0.1634, "step": 144150 }, { "epoch": 2.9345547073791347, "grad_norm": 31.665858020862775, "learning_rate": 4.357579638574596e-06, "loss": 0.1243, "step": 144160 }, { "epoch": 2.934758269720102, "grad_norm": 0.03929409315633558, "learning_rate": 4.356874966333623e-06, "loss": 0.1683, "step": 144170 }, { "epoch": 2.934961832061069, "grad_norm": 0.10149115814003354, "learning_rate": 4.356170307081391e-06, "loss": 0.0386, "step": 144180 }, { "epoch": 2.9351653944020355, "grad_norm": 9.716626152236444, "learning_rate": 4.355465660832128e-06, "loss": 0.1928, "step": 144190 }, { "epoch": 2.9353689567430026, "grad_norm": 11.489945086684159, "learning_rate": 4.3547610276000665e-06, "loss": 0.187, "step": 144200 }, { "epoch": 2.935572519083969, "grad_norm": 26.67946897644962, "learning_rate": 4.354056407399434e-06, "loss": 0.2566, "step": 144210 }, { "epoch": 2.9357760814249363, "grad_norm": 9.627339958542837, "learning_rate": 4.353351800244467e-06, "loss": 0.1196, "step": 144220 }, { "epoch": 2.9359796437659034, "grad_norm": 12.996448096675028, "learning_rate": 4.352647206149394e-06, "loss": 0.1686, "step": 144230 }, { "epoch": 2.93618320610687, "grad_norm": 17.333634616763998, "learning_rate": 4.351942625128441e-06, "loss": 0.1613, "step": 144240 }, { "epoch": 2.936386768447837, "grad_norm": 4.901373535816956, "learning_rate": 4.351238057195844e-06, "loss": 0.0795, "step": 144250 }, { "epoch": 2.936590330788804, "grad_norm": 0.04368758963132663, "learning_rate": 4.35053350236583e-06, "loss": 0.1189, "step": 144260 }, { "epoch": 2.936793893129771, "grad_norm": 10.649846933060939, "learning_rate": 4.349828960652628e-06, "loss": 0.0562, "step": 144270 }, { "epoch": 2.936997455470738, "grad_norm": 23.471436421225725, "learning_rate": 4.349124432070469e-06, "loss": 0.1176, "step": 144280 }, { "epoch": 2.937201017811705, "grad_norm": 0.9587555659198964, "learning_rate": 4.348419916633578e-06, "loss": 0.0566, "step": 144290 }, { "epoch": 2.9374045801526716, "grad_norm": 0.6398228623488194, "learning_rate": 4.347715414356186e-06, "loss": 0.0365, "step": 144300 }, { "epoch": 2.9376081424936387, "grad_norm": 1.1210455309805898, "learning_rate": 4.347010925252524e-06, "loss": 0.1183, "step": 144310 }, { "epoch": 2.9378117048346057, "grad_norm": 0.6167926538921038, "learning_rate": 4.346306449336814e-06, "loss": 0.1325, "step": 144320 }, { "epoch": 2.9380152671755724, "grad_norm": 11.60315742889647, "learning_rate": 4.345601986623289e-06, "loss": 0.0882, "step": 144330 }, { "epoch": 2.9382188295165395, "grad_norm": 3.1320136188070395, "learning_rate": 4.344897537126176e-06, "loss": 0.0998, "step": 144340 }, { "epoch": 2.9384223918575065, "grad_norm": 21.399039682568862, "learning_rate": 4.3441931008596984e-06, "loss": 0.0717, "step": 144350 }, { "epoch": 2.938625954198473, "grad_norm": 0.16557507129313453, "learning_rate": 4.343488677838088e-06, "loss": 0.1195, "step": 144360 }, { "epoch": 2.9388295165394402, "grad_norm": 15.669397273082573, "learning_rate": 4.342784268075568e-06, "loss": 0.1868, "step": 144370 }, { "epoch": 2.9390330788804073, "grad_norm": 0.09354456109351476, "learning_rate": 4.342079871586366e-06, "loss": 0.046, "step": 144380 }, { "epoch": 2.939236641221374, "grad_norm": 0.12284215215905406, "learning_rate": 4.34137548838471e-06, "loss": 0.1088, "step": 144390 }, { "epoch": 2.939440203562341, "grad_norm": 7.231914158922671, "learning_rate": 4.3406711184848224e-06, "loss": 0.1168, "step": 144400 }, { "epoch": 2.939643765903308, "grad_norm": 0.2942811775561477, "learning_rate": 4.339966761900931e-06, "loss": 0.0667, "step": 144410 }, { "epoch": 2.9398473282442747, "grad_norm": 2.8711132338171153, "learning_rate": 4.3392624186472636e-06, "loss": 0.1876, "step": 144420 }, { "epoch": 2.940050890585242, "grad_norm": 0.3210589677151355, "learning_rate": 4.33855808873804e-06, "loss": 0.1349, "step": 144430 }, { "epoch": 2.940254452926209, "grad_norm": 28.099130929414322, "learning_rate": 4.337853772187489e-06, "loss": 0.1842, "step": 144440 }, { "epoch": 2.9404580152671755, "grad_norm": 8.203353710198233, "learning_rate": 4.337149469009833e-06, "loss": 0.1281, "step": 144450 }, { "epoch": 2.9406615776081426, "grad_norm": 0.575427227059882, "learning_rate": 4.336445179219297e-06, "loss": 0.0437, "step": 144460 }, { "epoch": 2.9408651399491093, "grad_norm": 7.210050068732918, "learning_rate": 4.335740902830107e-06, "loss": 0.063, "step": 144470 }, { "epoch": 2.9410687022900763, "grad_norm": 0.15796413579532328, "learning_rate": 4.335036639856484e-06, "loss": 0.0386, "step": 144480 }, { "epoch": 2.9412722646310434, "grad_norm": 13.399060550946341, "learning_rate": 4.33433239031265e-06, "loss": 0.2501, "step": 144490 }, { "epoch": 2.94147582697201, "grad_norm": 0.3835119949209953, "learning_rate": 4.333628154212835e-06, "loss": 0.1593, "step": 144500 }, { "epoch": 2.941679389312977, "grad_norm": 1.867372182610215, "learning_rate": 4.332923931571255e-06, "loss": 0.1973, "step": 144510 }, { "epoch": 2.9418829516539438, "grad_norm": 11.78063089932989, "learning_rate": 4.332219722402138e-06, "loss": 0.1425, "step": 144520 }, { "epoch": 2.942086513994911, "grad_norm": 0.09915973590640131, "learning_rate": 4.331515526719701e-06, "loss": 0.1723, "step": 144530 }, { "epoch": 2.942290076335878, "grad_norm": 0.07399897033963279, "learning_rate": 4.33081134453817e-06, "loss": 0.057, "step": 144540 }, { "epoch": 2.9424936386768445, "grad_norm": 0.2706031233368755, "learning_rate": 4.330107175871766e-06, "loss": 0.1199, "step": 144550 }, { "epoch": 2.9426972010178116, "grad_norm": 0.3227900293790537, "learning_rate": 4.32940302073471e-06, "loss": 0.0881, "step": 144560 }, { "epoch": 2.9429007633587787, "grad_norm": 0.8151355927735686, "learning_rate": 4.328698879141225e-06, "loss": 0.1902, "step": 144570 }, { "epoch": 2.9431043256997453, "grad_norm": 0.4596438675419745, "learning_rate": 4.3279947511055285e-06, "loss": 0.1109, "step": 144580 }, { "epoch": 2.9433078880407124, "grad_norm": 12.770741431880138, "learning_rate": 4.327290636641845e-06, "loss": 0.2006, "step": 144590 }, { "epoch": 2.9435114503816795, "grad_norm": 8.746684415306547, "learning_rate": 4.326586535764394e-06, "loss": 0.0827, "step": 144600 }, { "epoch": 2.943715012722646, "grad_norm": 8.257668619167275, "learning_rate": 4.325882448487393e-06, "loss": 0.111, "step": 144610 }, { "epoch": 2.943918575063613, "grad_norm": 1.746072074516017, "learning_rate": 4.325178374825066e-06, "loss": 0.066, "step": 144620 }, { "epoch": 2.9441221374045803, "grad_norm": 17.29020315452438, "learning_rate": 4.3244743147916315e-06, "loss": 0.1749, "step": 144630 }, { "epoch": 2.944325699745547, "grad_norm": 7.840160154341777, "learning_rate": 4.323770268401306e-06, "loss": 0.0983, "step": 144640 }, { "epoch": 2.944529262086514, "grad_norm": 0.06517088229900653, "learning_rate": 4.323066235668313e-06, "loss": 0.0781, "step": 144650 }, { "epoch": 2.944732824427481, "grad_norm": 10.760357679295083, "learning_rate": 4.322362216606869e-06, "loss": 0.0993, "step": 144660 }, { "epoch": 2.9449363867684477, "grad_norm": 13.44847612570446, "learning_rate": 4.3216582112311915e-06, "loss": 0.2386, "step": 144670 }, { "epoch": 2.945139949109415, "grad_norm": 2.7968051848618707, "learning_rate": 4.320954219555503e-06, "loss": 0.037, "step": 144680 }, { "epoch": 2.945343511450382, "grad_norm": 0.07598353246978888, "learning_rate": 4.320250241594015e-06, "loss": 0.0872, "step": 144690 }, { "epoch": 2.9455470737913485, "grad_norm": 14.306277068516394, "learning_rate": 4.319546277360951e-06, "loss": 0.1374, "step": 144700 }, { "epoch": 2.9457506361323156, "grad_norm": 0.5693818732907913, "learning_rate": 4.318842326870527e-06, "loss": 0.0851, "step": 144710 }, { "epoch": 2.9459541984732827, "grad_norm": 8.73384522413524, "learning_rate": 4.318138390136957e-06, "loss": 0.0928, "step": 144720 }, { "epoch": 2.9461577608142493, "grad_norm": 0.1316306120152872, "learning_rate": 4.317434467174463e-06, "loss": 0.0406, "step": 144730 }, { "epoch": 2.9463613231552164, "grad_norm": 22.331700504437723, "learning_rate": 4.316730557997257e-06, "loss": 0.0524, "step": 144740 }, { "epoch": 2.9465648854961835, "grad_norm": 10.016172679590477, "learning_rate": 4.316026662619557e-06, "loss": 0.1993, "step": 144750 }, { "epoch": 2.94676844783715, "grad_norm": 0.10874109498831064, "learning_rate": 4.315322781055582e-06, "loss": 0.0919, "step": 144760 }, { "epoch": 2.946972010178117, "grad_norm": 0.1396722587133679, "learning_rate": 4.3146189133195445e-06, "loss": 0.0663, "step": 144770 }, { "epoch": 2.9471755725190842, "grad_norm": 9.008940232315325, "learning_rate": 4.313915059425659e-06, "loss": 0.1578, "step": 144780 }, { "epoch": 2.947379134860051, "grad_norm": 0.2664514160059933, "learning_rate": 4.313211219388145e-06, "loss": 0.1033, "step": 144790 }, { "epoch": 2.947582697201018, "grad_norm": 0.08981018983997568, "learning_rate": 4.3125073932212135e-06, "loss": 0.1205, "step": 144800 }, { "epoch": 2.9477862595419846, "grad_norm": 0.7558862400584703, "learning_rate": 4.311803580939082e-06, "loss": 0.0922, "step": 144810 }, { "epoch": 2.9479898218829517, "grad_norm": 0.35794129726551466, "learning_rate": 4.311099782555963e-06, "loss": 0.0353, "step": 144820 }, { "epoch": 2.9481933842239183, "grad_norm": 3.12055976492064, "learning_rate": 4.31039599808607e-06, "loss": 0.1489, "step": 144830 }, { "epoch": 2.9483969465648854, "grad_norm": 17.061404095263427, "learning_rate": 4.30969222754362e-06, "loss": 0.1081, "step": 144840 }, { "epoch": 2.9486005089058525, "grad_norm": 0.23737574673423273, "learning_rate": 4.3089884709428235e-06, "loss": 0.1803, "step": 144850 }, { "epoch": 2.948804071246819, "grad_norm": 7.462342761762432, "learning_rate": 4.308284728297893e-06, "loss": 0.1266, "step": 144860 }, { "epoch": 2.949007633587786, "grad_norm": 30.810942019189223, "learning_rate": 4.307580999623046e-06, "loss": 0.123, "step": 144870 }, { "epoch": 2.9492111959287532, "grad_norm": 5.739449760017671, "learning_rate": 4.306877284932492e-06, "loss": 0.1659, "step": 144880 }, { "epoch": 2.94941475826972, "grad_norm": 0.2592385396598773, "learning_rate": 4.306173584240445e-06, "loss": 0.1173, "step": 144890 }, { "epoch": 2.949618320610687, "grad_norm": 0.01794700640325382, "learning_rate": 4.305469897561114e-06, "loss": 0.2811, "step": 144900 }, { "epoch": 2.949821882951654, "grad_norm": 0.21815088236114308, "learning_rate": 4.304766224908713e-06, "loss": 0.0699, "step": 144910 }, { "epoch": 2.9500254452926207, "grad_norm": 18.296947584769985, "learning_rate": 4.304062566297456e-06, "loss": 0.1724, "step": 144920 }, { "epoch": 2.9502290076335878, "grad_norm": 1.707837692658477, "learning_rate": 4.303358921741548e-06, "loss": 0.1405, "step": 144930 }, { "epoch": 2.950432569974555, "grad_norm": 0.9542538471968841, "learning_rate": 4.302655291255208e-06, "loss": 0.1434, "step": 144940 }, { "epoch": 2.9506361323155215, "grad_norm": 14.30675892643499, "learning_rate": 4.301951674852641e-06, "loss": 0.1656, "step": 144950 }, { "epoch": 2.9508396946564885, "grad_norm": 9.619739301897276, "learning_rate": 4.301248072548057e-06, "loss": 0.1458, "step": 144960 }, { "epoch": 2.9510432569974556, "grad_norm": 10.433095061394054, "learning_rate": 4.3005444843556706e-06, "loss": 0.2942, "step": 144970 }, { "epoch": 2.9512468193384223, "grad_norm": 0.25862389600523183, "learning_rate": 4.299840910289687e-06, "loss": 0.0366, "step": 144980 }, { "epoch": 2.9514503816793893, "grad_norm": 0.03296606661356731, "learning_rate": 4.29913735036432e-06, "loss": 0.1754, "step": 144990 }, { "epoch": 2.9516539440203564, "grad_norm": 6.049943156274133, "learning_rate": 4.298433804593777e-06, "loss": 0.1321, "step": 145000 }, { "epoch": 2.951857506361323, "grad_norm": 4.015045671019737, "learning_rate": 4.297730272992265e-06, "loss": 0.1046, "step": 145010 }, { "epoch": 2.95206106870229, "grad_norm": 11.5333790491451, "learning_rate": 4.297026755573997e-06, "loss": 0.1638, "step": 145020 }, { "epoch": 2.952264631043257, "grad_norm": 11.159604582586399, "learning_rate": 4.296323252353179e-06, "loss": 0.2339, "step": 145030 }, { "epoch": 2.952468193384224, "grad_norm": 1.387702795896356, "learning_rate": 4.295619763344016e-06, "loss": 0.119, "step": 145040 }, { "epoch": 2.952671755725191, "grad_norm": 0.3857339971037777, "learning_rate": 4.294916288560722e-06, "loss": 0.0929, "step": 145050 }, { "epoch": 2.952875318066158, "grad_norm": 0.1005595708614116, "learning_rate": 4.294212828017501e-06, "loss": 0.0664, "step": 145060 }, { "epoch": 2.9530788804071246, "grad_norm": 15.20056229879781, "learning_rate": 4.29350938172856e-06, "loss": 0.1235, "step": 145070 }, { "epoch": 2.9532824427480917, "grad_norm": 5.46631659932673, "learning_rate": 4.2928059497081095e-06, "loss": 0.1387, "step": 145080 }, { "epoch": 2.953486005089059, "grad_norm": 0.700496114282266, "learning_rate": 4.2921025319703515e-06, "loss": 0.1361, "step": 145090 }, { "epoch": 2.9536895674300254, "grad_norm": 3.8792241286285805, "learning_rate": 4.291399128529497e-06, "loss": 0.1003, "step": 145100 }, { "epoch": 2.9538931297709925, "grad_norm": 3.157238337258184, "learning_rate": 4.290695739399747e-06, "loss": 0.097, "step": 145110 }, { "epoch": 2.954096692111959, "grad_norm": 0.2373854446610898, "learning_rate": 4.28999236459531e-06, "loss": 0.08, "step": 145120 }, { "epoch": 2.954300254452926, "grad_norm": 29.477229622409396, "learning_rate": 4.289289004130395e-06, "loss": 0.1191, "step": 145130 }, { "epoch": 2.9545038167938933, "grad_norm": 2.923730600523685, "learning_rate": 4.2885856580192016e-06, "loss": 0.1901, "step": 145140 }, { "epoch": 2.95470737913486, "grad_norm": 6.051739047219581, "learning_rate": 4.287882326275936e-06, "loss": 0.1092, "step": 145150 }, { "epoch": 2.954910941475827, "grad_norm": 18.31945672928357, "learning_rate": 4.287179008914806e-06, "loss": 0.1693, "step": 145160 }, { "epoch": 2.9551145038167936, "grad_norm": 3.4660053602102416, "learning_rate": 4.286475705950013e-06, "loss": 0.1561, "step": 145170 }, { "epoch": 2.9553180661577607, "grad_norm": 14.012099538094668, "learning_rate": 4.285772417395764e-06, "loss": 0.0859, "step": 145180 }, { "epoch": 2.955521628498728, "grad_norm": 12.115583288500963, "learning_rate": 4.285069143266258e-06, "loss": 0.1038, "step": 145190 }, { "epoch": 2.9557251908396944, "grad_norm": 6.180617341598876, "learning_rate": 4.284365883575704e-06, "loss": 0.2233, "step": 145200 }, { "epoch": 2.9559287531806615, "grad_norm": 14.6104450803529, "learning_rate": 4.283662638338302e-06, "loss": 0.1487, "step": 145210 }, { "epoch": 2.9561323155216286, "grad_norm": 7.513851371413598, "learning_rate": 4.282959407568254e-06, "loss": 0.0711, "step": 145220 }, { "epoch": 2.956335877862595, "grad_norm": 8.55097129315875, "learning_rate": 4.282256191279768e-06, "loss": 0.0955, "step": 145230 }, { "epoch": 2.9565394402035623, "grad_norm": 14.364837287630538, "learning_rate": 4.281552989487039e-06, "loss": 0.1414, "step": 145240 }, { "epoch": 2.9567430025445294, "grad_norm": 0.044675341637502065, "learning_rate": 4.280849802204274e-06, "loss": 0.0958, "step": 145250 }, { "epoch": 2.956946564885496, "grad_norm": 4.397298702791079, "learning_rate": 4.280146629445674e-06, "loss": 0.1084, "step": 145260 }, { "epoch": 2.957150127226463, "grad_norm": 9.359430928307741, "learning_rate": 4.279443471225439e-06, "loss": 0.1308, "step": 145270 }, { "epoch": 2.95735368956743, "grad_norm": 4.020805056775524, "learning_rate": 4.278740327557772e-06, "loss": 0.1284, "step": 145280 }, { "epoch": 2.957557251908397, "grad_norm": 7.999194397624081, "learning_rate": 4.278037198456873e-06, "loss": 0.0847, "step": 145290 }, { "epoch": 2.957760814249364, "grad_norm": 2.803255380111929, "learning_rate": 4.277334083936942e-06, "loss": 0.0925, "step": 145300 }, { "epoch": 2.957964376590331, "grad_norm": 7.613368120512932, "learning_rate": 4.276630984012181e-06, "loss": 0.0831, "step": 145310 }, { "epoch": 2.9581679389312976, "grad_norm": 0.08393106177463586, "learning_rate": 4.275927898696788e-06, "loss": 0.1307, "step": 145320 }, { "epoch": 2.9583715012722647, "grad_norm": 0.9408256595906611, "learning_rate": 4.275224828004963e-06, "loss": 0.215, "step": 145330 }, { "epoch": 2.9585750636132317, "grad_norm": 1.5527457031089027, "learning_rate": 4.274521771950908e-06, "loss": 0.1188, "step": 145340 }, { "epoch": 2.9587786259541984, "grad_norm": 0.1663905026580197, "learning_rate": 4.273818730548819e-06, "loss": 0.1327, "step": 145350 }, { "epoch": 2.9589821882951655, "grad_norm": 0.25648023139036985, "learning_rate": 4.2731157038128955e-06, "loss": 0.1943, "step": 145360 }, { "epoch": 2.9591857506361325, "grad_norm": 10.24727078755976, "learning_rate": 4.2724126917573384e-06, "loss": 0.0886, "step": 145370 }, { "epoch": 2.959389312977099, "grad_norm": 2.3021744089835328, "learning_rate": 4.2717096943963436e-06, "loss": 0.1199, "step": 145380 }, { "epoch": 2.9595928753180663, "grad_norm": 44.748297610444695, "learning_rate": 4.27100671174411e-06, "loss": 0.1773, "step": 145390 }, { "epoch": 2.9597964376590333, "grad_norm": 12.289387533523154, "learning_rate": 4.270303743814834e-06, "loss": 0.0378, "step": 145400 }, { "epoch": 2.96, "grad_norm": 20.27320667176037, "learning_rate": 4.269600790622712e-06, "loss": 0.0886, "step": 145410 }, { "epoch": 2.960203562340967, "grad_norm": 17.444109175225037, "learning_rate": 4.268897852181945e-06, "loss": 0.0365, "step": 145420 }, { "epoch": 2.9604071246819337, "grad_norm": 0.28448934067046716, "learning_rate": 4.268194928506728e-06, "loss": 0.1462, "step": 145430 }, { "epoch": 2.9606106870229008, "grad_norm": 0.06702751188127294, "learning_rate": 4.267492019611255e-06, "loss": 0.0598, "step": 145440 }, { "epoch": 2.960814249363868, "grad_norm": 43.506055343824855, "learning_rate": 4.266789125509726e-06, "loss": 0.1618, "step": 145450 }, { "epoch": 2.9610178117048345, "grad_norm": 13.645624339415459, "learning_rate": 4.266086246216333e-06, "loss": 0.1522, "step": 145460 }, { "epoch": 2.9612213740458015, "grad_norm": 10.394386763885088, "learning_rate": 4.265383381745276e-06, "loss": 0.0831, "step": 145470 }, { "epoch": 2.961424936386768, "grad_norm": 22.20673405445974, "learning_rate": 4.2646805321107454e-06, "loss": 0.1005, "step": 145480 }, { "epoch": 2.9616284987277353, "grad_norm": 0.8246153216441562, "learning_rate": 4.2639776973269395e-06, "loss": 0.0705, "step": 145490 }, { "epoch": 2.9618320610687023, "grad_norm": 4.37308826435223, "learning_rate": 4.263274877408053e-06, "loss": 0.0749, "step": 145500 }, { "epoch": 2.962035623409669, "grad_norm": 0.1109929480370571, "learning_rate": 4.262572072368276e-06, "loss": 0.1109, "step": 145510 }, { "epoch": 2.962239185750636, "grad_norm": 0.4203965259892643, "learning_rate": 4.261869282221809e-06, "loss": 0.1064, "step": 145520 }, { "epoch": 2.962442748091603, "grad_norm": 20.92903107310128, "learning_rate": 4.261166506982842e-06, "loss": 0.0851, "step": 145530 }, { "epoch": 2.9626463104325698, "grad_norm": 6.665779233688099, "learning_rate": 4.260463746665569e-06, "loss": 0.0861, "step": 145540 }, { "epoch": 2.962849872773537, "grad_norm": 0.17974084149111874, "learning_rate": 4.2597610012841835e-06, "loss": 0.1428, "step": 145550 }, { "epoch": 2.963053435114504, "grad_norm": 36.91772383627856, "learning_rate": 4.259058270852876e-06, "loss": 0.2436, "step": 145560 }, { "epoch": 2.9632569974554706, "grad_norm": 18.86826283172608, "learning_rate": 4.2583555553858426e-06, "loss": 0.1171, "step": 145570 }, { "epoch": 2.9634605597964376, "grad_norm": 38.94235659025623, "learning_rate": 4.257652854897275e-06, "loss": 0.096, "step": 145580 }, { "epoch": 2.9636641221374047, "grad_norm": 8.048179853067584, "learning_rate": 4.2569501694013615e-06, "loss": 0.0998, "step": 145590 }, { "epoch": 2.9638676844783713, "grad_norm": 0.0854895303028129, "learning_rate": 4.256247498912299e-06, "loss": 0.0719, "step": 145600 }, { "epoch": 2.9640712468193384, "grad_norm": 8.932199408537304, "learning_rate": 4.255544843444274e-06, "loss": 0.1302, "step": 145610 }, { "epoch": 2.9642748091603055, "grad_norm": 27.287764347113562, "learning_rate": 4.25484220301148e-06, "loss": 0.1925, "step": 145620 }, { "epoch": 2.964478371501272, "grad_norm": 27.745571514732198, "learning_rate": 4.254139577628109e-06, "loss": 0.1819, "step": 145630 }, { "epoch": 2.964681933842239, "grad_norm": 3.729464279452401, "learning_rate": 4.2534369673083485e-06, "loss": 0.0791, "step": 145640 }, { "epoch": 2.9648854961832063, "grad_norm": 0.5842395430426311, "learning_rate": 4.252734372066389e-06, "loss": 0.1289, "step": 145650 }, { "epoch": 2.965089058524173, "grad_norm": 1.7611585553126472, "learning_rate": 4.252031791916425e-06, "loss": 0.1082, "step": 145660 }, { "epoch": 2.96529262086514, "grad_norm": 2.456495576290184, "learning_rate": 4.25132922687264e-06, "loss": 0.0512, "step": 145670 }, { "epoch": 2.965496183206107, "grad_norm": 2.628412494548949, "learning_rate": 4.250626676949227e-06, "loss": 0.1298, "step": 145680 }, { "epoch": 2.9656997455470737, "grad_norm": 12.00699658459389, "learning_rate": 4.249924142160372e-06, "loss": 0.103, "step": 145690 }, { "epoch": 2.965903307888041, "grad_norm": 76.08245998229691, "learning_rate": 4.2492216225202635e-06, "loss": 0.2268, "step": 145700 }, { "epoch": 2.966106870229008, "grad_norm": 22.62875484649378, "learning_rate": 4.248519118043094e-06, "loss": 0.1022, "step": 145710 }, { "epoch": 2.9663104325699745, "grad_norm": 0.18727801310792622, "learning_rate": 4.247816628743048e-06, "loss": 0.1599, "step": 145720 }, { "epoch": 2.9665139949109416, "grad_norm": 0.03466735815418032, "learning_rate": 4.247114154634312e-06, "loss": 0.0708, "step": 145730 }, { "epoch": 2.9667175572519087, "grad_norm": 0.9153615733011726, "learning_rate": 4.246411695731079e-06, "loss": 0.1748, "step": 145740 }, { "epoch": 2.9669211195928753, "grad_norm": 0.7199188325556354, "learning_rate": 4.24570925204753e-06, "loss": 0.0323, "step": 145750 }, { "epoch": 2.9671246819338424, "grad_norm": 37.93775198094688, "learning_rate": 4.245006823597856e-06, "loss": 0.1077, "step": 145760 }, { "epoch": 2.967328244274809, "grad_norm": 7.538644930197616, "learning_rate": 4.244304410396239e-06, "loss": 0.0902, "step": 145770 }, { "epoch": 2.967531806615776, "grad_norm": 0.04663293611883531, "learning_rate": 4.243602012456869e-06, "loss": 0.1478, "step": 145780 }, { "epoch": 2.9677353689567427, "grad_norm": 0.03582562102418155, "learning_rate": 4.2428996297939315e-06, "loss": 0.0549, "step": 145790 }, { "epoch": 2.96793893129771, "grad_norm": 21.16531954613104, "learning_rate": 4.242197262421611e-06, "loss": 0.1192, "step": 145800 }, { "epoch": 2.968142493638677, "grad_norm": 5.0084703150499825, "learning_rate": 4.241494910354091e-06, "loss": 0.0641, "step": 145810 }, { "epoch": 2.9683460559796435, "grad_norm": 3.970410063080407, "learning_rate": 4.24079257360556e-06, "loss": 0.1008, "step": 145820 }, { "epoch": 2.9685496183206106, "grad_norm": 0.27853811444312093, "learning_rate": 4.2400902521902005e-06, "loss": 0.0503, "step": 145830 }, { "epoch": 2.9687531806615777, "grad_norm": 3.5278469954676344, "learning_rate": 4.239387946122198e-06, "loss": 0.1489, "step": 145840 }, { "epoch": 2.9689567430025443, "grad_norm": 10.72262694852093, "learning_rate": 4.238685655415733e-06, "loss": 0.1475, "step": 145850 }, { "epoch": 2.9691603053435114, "grad_norm": 9.025605022180775, "learning_rate": 4.237983380084993e-06, "loss": 0.1437, "step": 145860 }, { "epoch": 2.9693638676844785, "grad_norm": 11.176854233383096, "learning_rate": 4.237281120144162e-06, "loss": 0.0953, "step": 145870 }, { "epoch": 2.969567430025445, "grad_norm": 30.237326193908892, "learning_rate": 4.2365788756074174e-06, "loss": 0.0863, "step": 145880 }, { "epoch": 2.969770992366412, "grad_norm": 8.725849272852898, "learning_rate": 4.235876646488948e-06, "loss": 0.1851, "step": 145890 }, { "epoch": 2.9699745547073793, "grad_norm": 1.4671398365433201, "learning_rate": 4.235174432802935e-06, "loss": 0.1949, "step": 145900 }, { "epoch": 2.970178117048346, "grad_norm": 12.25735224767248, "learning_rate": 4.234472234563557e-06, "loss": 0.1371, "step": 145910 }, { "epoch": 2.970381679389313, "grad_norm": 0.13948717863547747, "learning_rate": 4.233770051785e-06, "loss": 0.0486, "step": 145920 }, { "epoch": 2.97058524173028, "grad_norm": 2.554955312592446, "learning_rate": 4.2330678844814435e-06, "loss": 0.1843, "step": 145930 }, { "epoch": 2.9707888040712467, "grad_norm": 2.928652766124456, "learning_rate": 4.2323657326670665e-06, "loss": 0.0915, "step": 145940 }, { "epoch": 2.9709923664122138, "grad_norm": 2.68175709748154, "learning_rate": 4.231663596356056e-06, "loss": 0.1942, "step": 145950 }, { "epoch": 2.971195928753181, "grad_norm": 8.294061268198822, "learning_rate": 4.230961475562586e-06, "loss": 0.0716, "step": 145960 }, { "epoch": 2.9713994910941475, "grad_norm": 11.66695212490547, "learning_rate": 4.230259370300841e-06, "loss": 0.2483, "step": 145970 }, { "epoch": 2.9716030534351145, "grad_norm": 0.21438595387267753, "learning_rate": 4.229557280584999e-06, "loss": 0.053, "step": 145980 }, { "epoch": 2.9718066157760816, "grad_norm": 9.252604235972262, "learning_rate": 4.228855206429239e-06, "loss": 0.1542, "step": 145990 }, { "epoch": 2.9720101781170483, "grad_norm": 1.1541331450127341, "learning_rate": 4.228153147847744e-06, "loss": 0.2402, "step": 146000 }, { "epoch": 2.9722137404580153, "grad_norm": 7.418169777224736, "learning_rate": 4.227451104854687e-06, "loss": 0.0823, "step": 146010 }, { "epoch": 2.9724173027989824, "grad_norm": 0.14179278067375808, "learning_rate": 4.226749077464251e-06, "loss": 0.0873, "step": 146020 }, { "epoch": 2.972620865139949, "grad_norm": 20.009697274076103, "learning_rate": 4.2260470656906146e-06, "loss": 0.1498, "step": 146030 }, { "epoch": 2.972824427480916, "grad_norm": 20.9578482470971, "learning_rate": 4.225345069547954e-06, "loss": 0.2165, "step": 146040 }, { "epoch": 2.973027989821883, "grad_norm": 6.939919051024809, "learning_rate": 4.224643089050448e-06, "loss": 0.1339, "step": 146050 }, { "epoch": 2.97323155216285, "grad_norm": 0.09074274544306532, "learning_rate": 4.2239411242122715e-06, "loss": 0.0792, "step": 146060 }, { "epoch": 2.973435114503817, "grad_norm": 10.82464145623858, "learning_rate": 4.223239175047605e-06, "loss": 0.1037, "step": 146070 }, { "epoch": 2.9736386768447836, "grad_norm": 13.232193308193667, "learning_rate": 4.222537241570625e-06, "loss": 0.064, "step": 146080 }, { "epoch": 2.9738422391857506, "grad_norm": 6.511081594116466, "learning_rate": 4.221835323795505e-06, "loss": 0.1619, "step": 146090 }, { "epoch": 2.9740458015267177, "grad_norm": 10.543959948949302, "learning_rate": 4.221133421736422e-06, "loss": 0.0629, "step": 146100 }, { "epoch": 2.9742493638676843, "grad_norm": 4.37218959933576, "learning_rate": 4.220431535407555e-06, "loss": 0.118, "step": 146110 }, { "epoch": 2.9744529262086514, "grad_norm": 0.1869164667104804, "learning_rate": 4.219729664823076e-06, "loss": 0.1733, "step": 146120 }, { "epoch": 2.974656488549618, "grad_norm": 9.791578552010376, "learning_rate": 4.219027809997163e-06, "loss": 0.1818, "step": 146130 }, { "epoch": 2.974860050890585, "grad_norm": 4.619433925131485, "learning_rate": 4.218325970943986e-06, "loss": 0.0967, "step": 146140 }, { "epoch": 2.975063613231552, "grad_norm": 15.588667212939185, "learning_rate": 4.217624147677724e-06, "loss": 0.108, "step": 146150 }, { "epoch": 2.975267175572519, "grad_norm": 8.984239978656031, "learning_rate": 4.216922340212552e-06, "loss": 0.0824, "step": 146160 }, { "epoch": 2.975470737913486, "grad_norm": 14.90578984477546, "learning_rate": 4.21622054856264e-06, "loss": 0.0793, "step": 146170 }, { "epoch": 2.975674300254453, "grad_norm": 7.614918234706074, "learning_rate": 4.215518772742164e-06, "loss": 0.0684, "step": 146180 }, { "epoch": 2.9758778625954196, "grad_norm": 0.31866309932525183, "learning_rate": 4.214817012765298e-06, "loss": 0.1081, "step": 146190 }, { "epoch": 2.9760814249363867, "grad_norm": 0.3914430849159752, "learning_rate": 4.2141152686462115e-06, "loss": 0.0528, "step": 146200 }, { "epoch": 2.976284987277354, "grad_norm": 0.296694487467969, "learning_rate": 4.213413540399083e-06, "loss": 0.1022, "step": 146210 }, { "epoch": 2.9764885496183204, "grad_norm": 2.508492878492453, "learning_rate": 4.212711828038077e-06, "loss": 0.0434, "step": 146220 }, { "epoch": 2.9766921119592875, "grad_norm": 22.09388211708959, "learning_rate": 4.212010131577372e-06, "loss": 0.1335, "step": 146230 }, { "epoch": 2.9768956743002546, "grad_norm": 6.924623566534965, "learning_rate": 4.211308451031138e-06, "loss": 0.0738, "step": 146240 }, { "epoch": 2.9770992366412212, "grad_norm": 54.10956945331545, "learning_rate": 4.2106067864135425e-06, "loss": 0.1596, "step": 146250 }, { "epoch": 2.9773027989821883, "grad_norm": 10.51800248813669, "learning_rate": 4.209905137738761e-06, "loss": 0.1216, "step": 146260 }, { "epoch": 2.9775063613231554, "grad_norm": 1.0382931991944395, "learning_rate": 4.209203505020965e-06, "loss": 0.162, "step": 146270 }, { "epoch": 2.977709923664122, "grad_norm": 0.16957181998161072, "learning_rate": 4.20850188827432e-06, "loss": 0.1281, "step": 146280 }, { "epoch": 2.977913486005089, "grad_norm": 7.348050826303125, "learning_rate": 4.207800287513001e-06, "loss": 0.1108, "step": 146290 }, { "epoch": 2.978117048346056, "grad_norm": 0.21585996509107708, "learning_rate": 4.207098702751174e-06, "loss": 0.0882, "step": 146300 }, { "epoch": 2.978320610687023, "grad_norm": 19.612514283226425, "learning_rate": 4.206397134003009e-06, "loss": 0.1569, "step": 146310 }, { "epoch": 2.97852417302799, "grad_norm": 4.977597716164808, "learning_rate": 4.205695581282679e-06, "loss": 0.0725, "step": 146320 }, { "epoch": 2.978727735368957, "grad_norm": 29.472396247020537, "learning_rate": 4.204994044604348e-06, "loss": 0.0922, "step": 146330 }, { "epoch": 2.9789312977099236, "grad_norm": 9.836652533062296, "learning_rate": 4.204292523982188e-06, "loss": 0.2468, "step": 146340 }, { "epoch": 2.9791348600508907, "grad_norm": 0.9686041051981237, "learning_rate": 4.2035910194303635e-06, "loss": 0.0974, "step": 146350 }, { "epoch": 2.9793384223918578, "grad_norm": 1.8941434621302706, "learning_rate": 4.202889530963043e-06, "loss": 0.1998, "step": 146360 }, { "epoch": 2.9795419847328244, "grad_norm": 12.90835702771887, "learning_rate": 4.202188058594398e-06, "loss": 0.1236, "step": 146370 }, { "epoch": 2.9797455470737915, "grad_norm": 0.21200281825808734, "learning_rate": 4.201486602338591e-06, "loss": 0.0605, "step": 146380 }, { "epoch": 2.9799491094147585, "grad_norm": 16.030172129288225, "learning_rate": 4.2007851622097896e-06, "loss": 0.134, "step": 146390 }, { "epoch": 2.980152671755725, "grad_norm": 0.5443460187712291, "learning_rate": 4.200083738222163e-06, "loss": 0.1214, "step": 146400 }, { "epoch": 2.9803562340966923, "grad_norm": 1.8863995644458684, "learning_rate": 4.199382330389875e-06, "loss": 0.2168, "step": 146410 }, { "epoch": 2.980559796437659, "grad_norm": 3.680714746585955, "learning_rate": 4.198680938727093e-06, "loss": 0.0758, "step": 146420 }, { "epoch": 2.980763358778626, "grad_norm": 22.438885929840747, "learning_rate": 4.19797956324798e-06, "loss": 0.1705, "step": 146430 }, { "epoch": 2.9809669211195926, "grad_norm": 8.558854049300935, "learning_rate": 4.197278203966703e-06, "loss": 0.1958, "step": 146440 }, { "epoch": 2.9811704834605597, "grad_norm": 11.687762384670378, "learning_rate": 4.196576860897428e-06, "loss": 0.1371, "step": 146450 }, { "epoch": 2.9813740458015268, "grad_norm": 2.3447619952251952, "learning_rate": 4.1958755340543155e-06, "loss": 0.1716, "step": 146460 }, { "epoch": 2.9815776081424934, "grad_norm": 0.11315042274297966, "learning_rate": 4.195174223451534e-06, "loss": 0.0903, "step": 146470 }, { "epoch": 2.9817811704834605, "grad_norm": 17.797164853795884, "learning_rate": 4.194472929103246e-06, "loss": 0.1403, "step": 146480 }, { "epoch": 2.9819847328244276, "grad_norm": 0.9682408020194311, "learning_rate": 4.193771651023614e-06, "loss": 0.0585, "step": 146490 }, { "epoch": 2.982188295165394, "grad_norm": 0.15142686132182193, "learning_rate": 4.193070389226804e-06, "loss": 0.2213, "step": 146500 }, { "epoch": 2.9823918575063613, "grad_norm": 0.1609204345671737, "learning_rate": 4.192369143726973e-06, "loss": 0.136, "step": 146510 }, { "epoch": 2.9825954198473283, "grad_norm": 8.498295512063768, "learning_rate": 4.19166791453829e-06, "loss": 0.1743, "step": 146520 }, { "epoch": 2.982798982188295, "grad_norm": 28.94113600187603, "learning_rate": 4.190966701674915e-06, "loss": 0.1357, "step": 146530 }, { "epoch": 2.983002544529262, "grad_norm": 28.464033671090007, "learning_rate": 4.1902655051510065e-06, "loss": 0.1531, "step": 146540 }, { "epoch": 2.983206106870229, "grad_norm": 1.1359929132946387, "learning_rate": 4.189564324980731e-06, "loss": 0.0953, "step": 146550 }, { "epoch": 2.9834096692111958, "grad_norm": 16.770688678681065, "learning_rate": 4.188863161178248e-06, "loss": 0.154, "step": 146560 }, { "epoch": 2.983613231552163, "grad_norm": 0.37286465196262086, "learning_rate": 4.188162013757717e-06, "loss": 0.0812, "step": 146570 }, { "epoch": 2.98381679389313, "grad_norm": 22.38553242816493, "learning_rate": 4.187460882733302e-06, "loss": 0.0713, "step": 146580 }, { "epoch": 2.9840203562340966, "grad_norm": 3.8438539507427154, "learning_rate": 4.18675976811916e-06, "loss": 0.1153, "step": 146590 }, { "epoch": 2.9842239185750636, "grad_norm": 8.547640774942986, "learning_rate": 4.18605866992945e-06, "loss": 0.2483, "step": 146600 }, { "epoch": 2.9844274809160307, "grad_norm": 0.30292024668195017, "learning_rate": 4.185357588178336e-06, "loss": 0.1146, "step": 146610 }, { "epoch": 2.9846310432569974, "grad_norm": 0.05527183707407544, "learning_rate": 4.1846565228799745e-06, "loss": 0.1883, "step": 146620 }, { "epoch": 2.9848346055979644, "grad_norm": 0.8555389812370873, "learning_rate": 4.183955474048524e-06, "loss": 0.0253, "step": 146630 }, { "epoch": 2.9850381679389315, "grad_norm": 7.471281114868324, "learning_rate": 4.1832544416981445e-06, "loss": 0.0951, "step": 146640 }, { "epoch": 2.985241730279898, "grad_norm": 0.15712907908856127, "learning_rate": 4.182553425842991e-06, "loss": 0.0637, "step": 146650 }, { "epoch": 2.985445292620865, "grad_norm": 11.261879812134351, "learning_rate": 4.181852426497227e-06, "loss": 0.0842, "step": 146660 }, { "epoch": 2.9856488549618323, "grad_norm": 4.027743190818283, "learning_rate": 4.181151443675006e-06, "loss": 0.0614, "step": 146670 }, { "epoch": 2.985852417302799, "grad_norm": 7.035156815992844, "learning_rate": 4.180450477390484e-06, "loss": 0.1719, "step": 146680 }, { "epoch": 2.986055979643766, "grad_norm": 17.99331153407171, "learning_rate": 4.179749527657823e-06, "loss": 0.0922, "step": 146690 }, { "epoch": 2.986259541984733, "grad_norm": 9.443751324172386, "learning_rate": 4.179048594491177e-06, "loss": 0.1399, "step": 146700 }, { "epoch": 2.9864631043256997, "grad_norm": 2.2830612019916843, "learning_rate": 4.178347677904702e-06, "loss": 0.1957, "step": 146710 }, { "epoch": 2.986666666666667, "grad_norm": 0.5945011124954439, "learning_rate": 4.17764677791255e-06, "loss": 0.0356, "step": 146720 }, { "epoch": 2.9868702290076334, "grad_norm": 16.776818410315524, "learning_rate": 4.1769458945288844e-06, "loss": 0.1425, "step": 146730 }, { "epoch": 2.9870737913486005, "grad_norm": 16.051563110215007, "learning_rate": 4.1762450277678564e-06, "loss": 0.1404, "step": 146740 }, { "epoch": 2.9872773536895676, "grad_norm": 22.539023232586295, "learning_rate": 4.1755441776436186e-06, "loss": 0.0838, "step": 146750 }, { "epoch": 2.9874809160305342, "grad_norm": 0.30510970124727227, "learning_rate": 4.17484334417033e-06, "loss": 0.1747, "step": 146760 }, { "epoch": 2.9876844783715013, "grad_norm": 10.375055888581162, "learning_rate": 4.174142527362143e-06, "loss": 0.1955, "step": 146770 }, { "epoch": 2.987888040712468, "grad_norm": 12.564918272396072, "learning_rate": 4.173441727233211e-06, "loss": 0.153, "step": 146780 }, { "epoch": 2.988091603053435, "grad_norm": 8.216491386883435, "learning_rate": 4.172740943797689e-06, "loss": 0.1837, "step": 146790 }, { "epoch": 2.988295165394402, "grad_norm": 7.4293431542765305, "learning_rate": 4.1720401770697265e-06, "loss": 0.1417, "step": 146800 }, { "epoch": 2.9884987277353687, "grad_norm": 22.82376780601186, "learning_rate": 4.17133942706348e-06, "loss": 0.0332, "step": 146810 }, { "epoch": 2.988702290076336, "grad_norm": 0.04240228907699144, "learning_rate": 4.170638693793103e-06, "loss": 0.0807, "step": 146820 }, { "epoch": 2.988905852417303, "grad_norm": 29.758220211416305, "learning_rate": 4.1699379772727434e-06, "loss": 0.1784, "step": 146830 }, { "epoch": 2.9891094147582695, "grad_norm": 0.18786031519943155, "learning_rate": 4.169237277516556e-06, "loss": 0.0628, "step": 146840 }, { "epoch": 2.9893129770992366, "grad_norm": 49.75356382119952, "learning_rate": 4.168536594538693e-06, "loss": 0.2023, "step": 146850 }, { "epoch": 2.9895165394402037, "grad_norm": 0.257729054822005, "learning_rate": 4.1678359283533026e-06, "loss": 0.1078, "step": 146860 }, { "epoch": 2.9897201017811703, "grad_norm": 0.18676129749680548, "learning_rate": 4.167135278974539e-06, "loss": 0.11, "step": 146870 }, { "epoch": 2.9899236641221374, "grad_norm": 8.742129225472421, "learning_rate": 4.166434646416551e-06, "loss": 0.178, "step": 146880 }, { "epoch": 2.9901272264631045, "grad_norm": 7.595281989687245, "learning_rate": 4.165734030693487e-06, "loss": 0.1278, "step": 146890 }, { "epoch": 2.990330788804071, "grad_norm": 0.3571882502499972, "learning_rate": 4.165033431819502e-06, "loss": 0.1448, "step": 146900 }, { "epoch": 2.990534351145038, "grad_norm": 27.900213783776376, "learning_rate": 4.1643328498087396e-06, "loss": 0.1482, "step": 146910 }, { "epoch": 2.9907379134860053, "grad_norm": 11.037249751946309, "learning_rate": 4.163632284675352e-06, "loss": 0.1304, "step": 146920 }, { "epoch": 2.990941475826972, "grad_norm": 0.5120122306448878, "learning_rate": 4.16293173643349e-06, "loss": 0.153, "step": 146930 }, { "epoch": 2.991145038167939, "grad_norm": 5.305729499354193, "learning_rate": 4.162231205097297e-06, "loss": 0.0902, "step": 146940 }, { "epoch": 2.991348600508906, "grad_norm": 22.612862313994437, "learning_rate": 4.161530690680925e-06, "loss": 0.0692, "step": 146950 }, { "epoch": 2.9915521628498727, "grad_norm": 0.5006105277536723, "learning_rate": 4.1608301931985206e-06, "loss": 0.0631, "step": 146960 }, { "epoch": 2.9917557251908398, "grad_norm": 8.500490992013312, "learning_rate": 4.16012971266423e-06, "loss": 0.1275, "step": 146970 }, { "epoch": 2.991959287531807, "grad_norm": 43.11979292455734, "learning_rate": 4.159429249092204e-06, "loss": 0.0979, "step": 146980 }, { "epoch": 2.9921628498727735, "grad_norm": 20.556705911417488, "learning_rate": 4.1587288024965855e-06, "loss": 0.0985, "step": 146990 }, { "epoch": 2.9923664122137406, "grad_norm": 1.1386352586010915, "learning_rate": 4.158028372891523e-06, "loss": 0.0706, "step": 147000 }, { "epoch": 2.9925699745547076, "grad_norm": 8.112541977178175, "learning_rate": 4.1573279602911606e-06, "loss": 0.0946, "step": 147010 }, { "epoch": 2.9927735368956743, "grad_norm": 0.26241714294736873, "learning_rate": 4.1566275647096465e-06, "loss": 0.1309, "step": 147020 }, { "epoch": 2.9929770992366413, "grad_norm": 0.18755620381440288, "learning_rate": 4.155927186161126e-06, "loss": 0.2924, "step": 147030 }, { "epoch": 2.993180661577608, "grad_norm": 0.5040103375120499, "learning_rate": 4.155226824659741e-06, "loss": 0.0366, "step": 147040 }, { "epoch": 2.993384223918575, "grad_norm": 24.864532128933487, "learning_rate": 4.154526480219638e-06, "loss": 0.0491, "step": 147050 }, { "epoch": 2.993587786259542, "grad_norm": 0.13231396660419056, "learning_rate": 4.1538261528549645e-06, "loss": 0.1204, "step": 147060 }, { "epoch": 2.9937913486005088, "grad_norm": 1.7907004370889579, "learning_rate": 4.15312584257986e-06, "loss": 0.0812, "step": 147070 }, { "epoch": 2.993994910941476, "grad_norm": 4.704143539515095, "learning_rate": 4.152425549408471e-06, "loss": 0.0769, "step": 147080 }, { "epoch": 2.9941984732824425, "grad_norm": 0.36522464684716266, "learning_rate": 4.151725273354937e-06, "loss": 0.1038, "step": 147090 }, { "epoch": 2.9944020356234096, "grad_norm": 0.1399151927559296, "learning_rate": 4.151025014433406e-06, "loss": 0.1442, "step": 147100 }, { "epoch": 2.9946055979643766, "grad_norm": 0.7423201146879443, "learning_rate": 4.150324772658019e-06, "loss": 0.0921, "step": 147110 }, { "epoch": 2.9948091603053433, "grad_norm": 7.6556767376963, "learning_rate": 4.149624548042915e-06, "loss": 0.1923, "step": 147120 }, { "epoch": 2.9950127226463104, "grad_norm": 6.18142274946541, "learning_rate": 4.1489243406022396e-06, "loss": 0.1413, "step": 147130 }, { "epoch": 2.9952162849872774, "grad_norm": 23.037938852406256, "learning_rate": 4.148224150350134e-06, "loss": 0.1636, "step": 147140 }, { "epoch": 2.995419847328244, "grad_norm": 0.29062480746732317, "learning_rate": 4.147523977300738e-06, "loss": 0.1276, "step": 147150 }, { "epoch": 2.995623409669211, "grad_norm": 14.357712752738852, "learning_rate": 4.146823821468195e-06, "loss": 0.1965, "step": 147160 }, { "epoch": 2.9958269720101782, "grad_norm": 0.14639249740698201, "learning_rate": 4.146123682866641e-06, "loss": 0.0889, "step": 147170 }, { "epoch": 2.996030534351145, "grad_norm": 0.06507079310734255, "learning_rate": 4.145423561510219e-06, "loss": 0.1165, "step": 147180 }, { "epoch": 2.996234096692112, "grad_norm": 0.1101486190692804, "learning_rate": 4.144723457413073e-06, "loss": 0.0589, "step": 147190 }, { "epoch": 2.996437659033079, "grad_norm": 0.016903235450478456, "learning_rate": 4.144023370589334e-06, "loss": 0.1245, "step": 147200 }, { "epoch": 2.9966412213740456, "grad_norm": 0.10031716490130625, "learning_rate": 4.1433233010531474e-06, "loss": 0.0469, "step": 147210 }, { "epoch": 2.9968447837150127, "grad_norm": 17.062572583386775, "learning_rate": 4.142623248818651e-06, "loss": 0.135, "step": 147220 }, { "epoch": 2.99704834605598, "grad_norm": 7.912330545753074, "learning_rate": 4.14192321389998e-06, "loss": 0.1519, "step": 147230 }, { "epoch": 2.9972519083969464, "grad_norm": 0.10612091117970625, "learning_rate": 4.141223196311277e-06, "loss": 0.0594, "step": 147240 }, { "epoch": 2.9974554707379135, "grad_norm": 8.698750931357734, "learning_rate": 4.140523196066677e-06, "loss": 0.083, "step": 147250 }, { "epoch": 2.9976590330788806, "grad_norm": 21.57076465868074, "learning_rate": 4.139823213180316e-06, "loss": 0.1767, "step": 147260 }, { "epoch": 2.9978625954198472, "grad_norm": 28.588231204577152, "learning_rate": 4.139123247666336e-06, "loss": 0.06, "step": 147270 }, { "epoch": 2.9980661577608143, "grad_norm": 1.7287384477577492, "learning_rate": 4.1384232995388705e-06, "loss": 0.0805, "step": 147280 }, { "epoch": 2.9982697201017814, "grad_norm": 0.11355248581365253, "learning_rate": 4.137723368812054e-06, "loss": 0.1198, "step": 147290 }, { "epoch": 2.998473282442748, "grad_norm": 17.11659505687072, "learning_rate": 4.137023455500027e-06, "loss": 0.0965, "step": 147300 }, { "epoch": 2.998676844783715, "grad_norm": 7.984938659808191, "learning_rate": 4.136323559616921e-06, "loss": 0.0911, "step": 147310 }, { "epoch": 2.998880407124682, "grad_norm": 10.912145305630514, "learning_rate": 4.135623681176875e-06, "loss": 0.1799, "step": 147320 }, { "epoch": 2.999083969465649, "grad_norm": 0.386607212659674, "learning_rate": 4.134923820194022e-06, "loss": 0.0751, "step": 147330 }, { "epoch": 2.999287531806616, "grad_norm": 11.357258899039783, "learning_rate": 4.134223976682494e-06, "loss": 0.2096, "step": 147340 }, { "epoch": 2.999491094147583, "grad_norm": 14.231155167782473, "learning_rate": 4.133524150656431e-06, "loss": 0.1238, "step": 147350 }, { "epoch": 2.9996946564885496, "grad_norm": 0.3356089977697714, "learning_rate": 4.132824342129962e-06, "loss": 0.1095, "step": 147360 }, { "epoch": 2.9998982188295167, "grad_norm": 0.030237232497208937, "learning_rate": 4.132124551117224e-06, "loss": 0.1368, "step": 147370 }, { "epoch": 3.0001017811704833, "grad_norm": 0.039434605816427035, "learning_rate": 4.131424777632346e-06, "loss": 0.0524, "step": 147380 }, { "epoch": 3.0003053435114504, "grad_norm": 0.2161634267199889, "learning_rate": 4.130725021689465e-06, "loss": 0.0414, "step": 147390 }, { "epoch": 3.0005089058524175, "grad_norm": 11.667429554108564, "learning_rate": 4.130025283302712e-06, "loss": 0.0505, "step": 147400 }, { "epoch": 3.000712468193384, "grad_norm": 0.06585763902539263, "learning_rate": 4.129325562486217e-06, "loss": 0.0356, "step": 147410 }, { "epoch": 3.000916030534351, "grad_norm": 0.05861946954125762, "learning_rate": 4.1286258592541154e-06, "loss": 0.0768, "step": 147420 }, { "epoch": 3.0011195928753183, "grad_norm": 2.256014374001293, "learning_rate": 4.127926173620538e-06, "loss": 0.0387, "step": 147430 }, { "epoch": 3.001323155216285, "grad_norm": 0.07785531464649924, "learning_rate": 4.127226505599612e-06, "loss": 0.1079, "step": 147440 }, { "epoch": 3.001526717557252, "grad_norm": 48.60372921814917, "learning_rate": 4.126526855205472e-06, "loss": 0.1058, "step": 147450 }, { "epoch": 3.0017302798982186, "grad_norm": 0.14461799684106266, "learning_rate": 4.125827222452248e-06, "loss": 0.0867, "step": 147460 }, { "epoch": 3.0019338422391857, "grad_norm": 0.5195330725034973, "learning_rate": 4.125127607354067e-06, "loss": 0.0214, "step": 147470 }, { "epoch": 3.0021374045801528, "grad_norm": 0.07239833212744091, "learning_rate": 4.124428009925064e-06, "loss": 0.0502, "step": 147480 }, { "epoch": 3.0023409669211194, "grad_norm": 0.09083251881401937, "learning_rate": 4.123728430179363e-06, "loss": 0.085, "step": 147490 }, { "epoch": 3.0025445292620865, "grad_norm": 3.3285139981433427, "learning_rate": 4.123028868131095e-06, "loss": 0.0152, "step": 147500 }, { "epoch": 3.0027480916030536, "grad_norm": 3.532809933508627, "learning_rate": 4.12232932379439e-06, "loss": 0.0351, "step": 147510 }, { "epoch": 3.00295165394402, "grad_norm": 0.048024675609861976, "learning_rate": 4.1216297971833725e-06, "loss": 0.004, "step": 147520 }, { "epoch": 3.0031552162849873, "grad_norm": 0.015021170353296738, "learning_rate": 4.120930288312174e-06, "loss": 0.0727, "step": 147530 }, { "epoch": 3.0033587786259544, "grad_norm": 0.06800718743699406, "learning_rate": 4.120230797194921e-06, "loss": 0.0761, "step": 147540 }, { "epoch": 3.003562340966921, "grad_norm": 68.85553215150769, "learning_rate": 4.119531323845738e-06, "loss": 0.029, "step": 147550 }, { "epoch": 3.003765903307888, "grad_norm": 1.5179827411429614, "learning_rate": 4.118831868278756e-06, "loss": 0.101, "step": 147560 }, { "epoch": 3.003969465648855, "grad_norm": 0.022757906725600355, "learning_rate": 4.118132430508099e-06, "loss": 0.0023, "step": 147570 }, { "epoch": 3.0041730279898218, "grad_norm": 0.5627570565557675, "learning_rate": 4.117433010547891e-06, "loss": 0.2048, "step": 147580 }, { "epoch": 3.004376590330789, "grad_norm": 0.04932833834711836, "learning_rate": 4.1167336084122635e-06, "loss": 0.0701, "step": 147590 }, { "epoch": 3.004580152671756, "grad_norm": 1.0579379977186625, "learning_rate": 4.116034224115338e-06, "loss": 0.0019, "step": 147600 }, { "epoch": 3.0047837150127226, "grad_norm": 11.098034648871725, "learning_rate": 4.11533485767124e-06, "loss": 0.0321, "step": 147610 }, { "epoch": 3.0049872773536896, "grad_norm": 0.0548129739907698, "learning_rate": 4.114635509094093e-06, "loss": 0.1309, "step": 147620 }, { "epoch": 3.0051908396946563, "grad_norm": 0.01623678275935959, "learning_rate": 4.11393617839802e-06, "loss": 0.0743, "step": 147630 }, { "epoch": 3.0053944020356234, "grad_norm": 0.1979925186175931, "learning_rate": 4.113236865597151e-06, "loss": 0.0247, "step": 147640 }, { "epoch": 3.0055979643765904, "grad_norm": 41.11946694789866, "learning_rate": 4.1125375707056036e-06, "loss": 0.0925, "step": 147650 }, { "epoch": 3.005801526717557, "grad_norm": 0.02744196182077106, "learning_rate": 4.111838293737502e-06, "loss": 0.1206, "step": 147660 }, { "epoch": 3.006005089058524, "grad_norm": 0.015315086508652877, "learning_rate": 4.111139034706973e-06, "loss": 0.1125, "step": 147670 }, { "epoch": 3.0062086513994912, "grad_norm": 0.05110464692391459, "learning_rate": 4.1104397936281345e-06, "loss": 0.0889, "step": 147680 }, { "epoch": 3.006412213740458, "grad_norm": 0.4079131659233424, "learning_rate": 4.109740570515111e-06, "loss": 0.0205, "step": 147690 }, { "epoch": 3.006615776081425, "grad_norm": 0.44511545729510876, "learning_rate": 4.109041365382021e-06, "loss": 0.0032, "step": 147700 }, { "epoch": 3.006819338422392, "grad_norm": 8.224155268568852, "learning_rate": 4.10834217824299e-06, "loss": 0.0725, "step": 147710 }, { "epoch": 3.0070229007633587, "grad_norm": 0.24734791532983824, "learning_rate": 4.107643009112137e-06, "loss": 0.1029, "step": 147720 }, { "epoch": 3.0072264631043257, "grad_norm": 7.642559030875429, "learning_rate": 4.106943858003581e-06, "loss": 0.1199, "step": 147730 }, { "epoch": 3.007430025445293, "grad_norm": 0.10644794700233856, "learning_rate": 4.106244724931448e-06, "loss": 0.0339, "step": 147740 }, { "epoch": 3.0076335877862594, "grad_norm": 7.54528302979798, "learning_rate": 4.10554560990985e-06, "loss": 0.0548, "step": 147750 }, { "epoch": 3.0078371501272265, "grad_norm": 0.02644101819297602, "learning_rate": 4.104846512952912e-06, "loss": 0.0317, "step": 147760 }, { "epoch": 3.0080407124681936, "grad_norm": 0.46186233446747144, "learning_rate": 4.1041474340747525e-06, "loss": 0.0019, "step": 147770 }, { "epoch": 3.0082442748091602, "grad_norm": 28.6930136355509, "learning_rate": 4.1034483732894875e-06, "loss": 0.0185, "step": 147780 }, { "epoch": 3.0084478371501273, "grad_norm": 0.028651805623557555, "learning_rate": 4.102749330611238e-06, "loss": 0.0683, "step": 147790 }, { "epoch": 3.008651399491094, "grad_norm": 0.023865673194995274, "learning_rate": 4.102050306054123e-06, "loss": 0.0584, "step": 147800 }, { "epoch": 3.008854961832061, "grad_norm": 0.02161661629528888, "learning_rate": 4.101351299632256e-06, "loss": 0.0249, "step": 147810 }, { "epoch": 3.009058524173028, "grad_norm": 18.878423973149342, "learning_rate": 4.100652311359759e-06, "loss": 0.0609, "step": 147820 }, { "epoch": 3.0092620865139947, "grad_norm": 0.05073795943759538, "learning_rate": 4.099953341250746e-06, "loss": 0.0257, "step": 147830 }, { "epoch": 3.009465648854962, "grad_norm": 0.03615659441067272, "learning_rate": 4.0992543893193334e-06, "loss": 0.0006, "step": 147840 }, { "epoch": 3.009669211195929, "grad_norm": 0.011568112754916166, "learning_rate": 4.0985554555796415e-06, "loss": 0.0775, "step": 147850 }, { "epoch": 3.0098727735368955, "grad_norm": 7.989601418581344, "learning_rate": 4.097856540045782e-06, "loss": 0.072, "step": 147860 }, { "epoch": 3.0100763358778626, "grad_norm": 0.11497465710057092, "learning_rate": 4.097157642731869e-06, "loss": 0.0664, "step": 147870 }, { "epoch": 3.0102798982188297, "grad_norm": 0.01774519869511041, "learning_rate": 4.096458763652024e-06, "loss": 0.0799, "step": 147880 }, { "epoch": 3.0104834605597963, "grad_norm": 0.027868024991644758, "learning_rate": 4.095759902820357e-06, "loss": 0.0994, "step": 147890 }, { "epoch": 3.0106870229007634, "grad_norm": 1.0265647574143646, "learning_rate": 4.095061060250983e-06, "loss": 0.0114, "step": 147900 }, { "epoch": 3.0108905852417305, "grad_norm": 0.03240328014962827, "learning_rate": 4.094362235958017e-06, "loss": 0.0122, "step": 147910 }, { "epoch": 3.011094147582697, "grad_norm": 0.06828459315130604, "learning_rate": 4.09366342995557e-06, "loss": 0.0458, "step": 147920 }, { "epoch": 3.011297709923664, "grad_norm": 0.025491648038353236, "learning_rate": 4.09296464225776e-06, "loss": 0.0244, "step": 147930 }, { "epoch": 3.011501272264631, "grad_norm": 0.03144037102869106, "learning_rate": 4.0922658728786955e-06, "loss": 0.0139, "step": 147940 }, { "epoch": 3.011704834605598, "grad_norm": 0.23338608838160746, "learning_rate": 4.091567121832491e-06, "loss": 0.1316, "step": 147950 }, { "epoch": 3.011908396946565, "grad_norm": 0.07882452827604415, "learning_rate": 4.09086838913326e-06, "loss": 0.0164, "step": 147960 }, { "epoch": 3.0121119592875316, "grad_norm": 0.6300006283121624, "learning_rate": 4.090169674795112e-06, "loss": 0.0728, "step": 147970 }, { "epoch": 3.0123155216284987, "grad_norm": 6.986212331143236, "learning_rate": 4.08947097883216e-06, "loss": 0.0614, "step": 147980 }, { "epoch": 3.0125190839694658, "grad_norm": 0.012740439588124347, "learning_rate": 4.088772301258513e-06, "loss": 0.0286, "step": 147990 }, { "epoch": 3.0127226463104324, "grad_norm": 0.06422063306740418, "learning_rate": 4.088073642088283e-06, "loss": 0.0486, "step": 148000 }, { "epoch": 3.0129262086513995, "grad_norm": 0.018122059228737396, "learning_rate": 4.087375001335582e-06, "loss": 0.1578, "step": 148010 }, { "epoch": 3.0131297709923666, "grad_norm": 0.6488640215663606, "learning_rate": 4.086676379014515e-06, "loss": 0.1268, "step": 148020 }, { "epoch": 3.013333333333333, "grad_norm": 2.9405349322526027, "learning_rate": 4.085977775139197e-06, "loss": 0.0061, "step": 148030 }, { "epoch": 3.0135368956743003, "grad_norm": 0.05276714957500869, "learning_rate": 4.085279189723737e-06, "loss": 0.1108, "step": 148040 }, { "epoch": 3.0137404580152674, "grad_norm": 0.13997736194691598, "learning_rate": 4.084580622782239e-06, "loss": 0.0785, "step": 148050 }, { "epoch": 3.013944020356234, "grad_norm": 2.2955623870024318, "learning_rate": 4.0838820743288165e-06, "loss": 0.0278, "step": 148060 }, { "epoch": 3.014147582697201, "grad_norm": 0.5426268998658554, "learning_rate": 4.083183544377574e-06, "loss": 0.0362, "step": 148070 }, { "epoch": 3.014351145038168, "grad_norm": 12.292751120209692, "learning_rate": 4.08248503294262e-06, "loss": 0.051, "step": 148080 }, { "epoch": 3.014554707379135, "grad_norm": 7.645960325659613, "learning_rate": 4.081786540038065e-06, "loss": 0.0378, "step": 148090 }, { "epoch": 3.014758269720102, "grad_norm": 0.006050048791658744, "learning_rate": 4.081088065678011e-06, "loss": 0.0681, "step": 148100 }, { "epoch": 3.0149618320610685, "grad_norm": 0.39434161513579236, "learning_rate": 4.080389609876569e-06, "loss": 0.0384, "step": 148110 }, { "epoch": 3.0151653944020356, "grad_norm": 6.123798649587645, "learning_rate": 4.079691172647842e-06, "loss": 0.0134, "step": 148120 }, { "epoch": 3.0153689567430026, "grad_norm": 6.1168726643382305, "learning_rate": 4.078992754005937e-06, "loss": 0.0166, "step": 148130 }, { "epoch": 3.0155725190839693, "grad_norm": 2.967223854944373, "learning_rate": 4.078294353964961e-06, "loss": 0.0477, "step": 148140 }, { "epoch": 3.0157760814249364, "grad_norm": 6.565573209465294, "learning_rate": 4.077595972539016e-06, "loss": 0.0759, "step": 148150 }, { "epoch": 3.0159796437659034, "grad_norm": 3.6282787684736526, "learning_rate": 4.076897609742208e-06, "loss": 0.0029, "step": 148160 }, { "epoch": 3.01618320610687, "grad_norm": 2.798910070880006, "learning_rate": 4.076199265588645e-06, "loss": 0.0438, "step": 148170 }, { "epoch": 3.016386768447837, "grad_norm": 0.03478904150752122, "learning_rate": 4.075500940092425e-06, "loss": 0.0049, "step": 148180 }, { "epoch": 3.0165903307888042, "grad_norm": 0.9508571979778696, "learning_rate": 4.074802633267656e-06, "loss": 0.0843, "step": 148190 }, { "epoch": 3.016793893129771, "grad_norm": 0.06507040784787083, "learning_rate": 4.074104345128437e-06, "loss": 0.0259, "step": 148200 }, { "epoch": 3.016997455470738, "grad_norm": 0.11490664606071177, "learning_rate": 4.073406075688874e-06, "loss": 0.0758, "step": 148210 }, { "epoch": 3.017201017811705, "grad_norm": 0.030025604002142884, "learning_rate": 4.07270782496307e-06, "loss": 0.0498, "step": 148220 }, { "epoch": 3.0174045801526717, "grad_norm": 6.175671602635212, "learning_rate": 4.072009592965125e-06, "loss": 0.0934, "step": 148230 }, { "epoch": 3.0176081424936387, "grad_norm": 1.8330586446009938, "learning_rate": 4.07131137970914e-06, "loss": 0.0328, "step": 148240 }, { "epoch": 3.017811704834606, "grad_norm": 0.027790963867897513, "learning_rate": 4.070613185209221e-06, "loss": 0.0102, "step": 148250 }, { "epoch": 3.0180152671755724, "grad_norm": 0.049335951041076076, "learning_rate": 4.069915009479464e-06, "loss": 0.0525, "step": 148260 }, { "epoch": 3.0182188295165395, "grad_norm": 0.13040662903386366, "learning_rate": 4.069216852533973e-06, "loss": 0.0397, "step": 148270 }, { "epoch": 3.018422391857506, "grad_norm": 0.12745375377935592, "learning_rate": 4.068518714386843e-06, "loss": 0.0727, "step": 148280 }, { "epoch": 3.0186259541984732, "grad_norm": 0.010074321564838907, "learning_rate": 4.0678205950521795e-06, "loss": 0.0359, "step": 148290 }, { "epoch": 3.0188295165394403, "grad_norm": 1.9311095887836207, "learning_rate": 4.067122494544081e-06, "loss": 0.0778, "step": 148300 }, { "epoch": 3.019033078880407, "grad_norm": 0.03604971274841838, "learning_rate": 4.066424412876644e-06, "loss": 0.0528, "step": 148310 }, { "epoch": 3.019236641221374, "grad_norm": 0.003165427073970268, "learning_rate": 4.065726350063966e-06, "loss": 0.0218, "step": 148320 }, { "epoch": 3.019440203562341, "grad_norm": 0.10965403706637245, "learning_rate": 4.065028306120152e-06, "loss": 0.0033, "step": 148330 }, { "epoch": 3.0196437659033077, "grad_norm": 0.07930741989765464, "learning_rate": 4.064330281059293e-06, "loss": 0.0739, "step": 148340 }, { "epoch": 3.019847328244275, "grad_norm": 0.018851304117329947, "learning_rate": 4.06363227489549e-06, "loss": 0.0312, "step": 148350 }, { "epoch": 3.020050890585242, "grad_norm": 0.06065361843621186, "learning_rate": 4.062934287642839e-06, "loss": 0.0473, "step": 148360 }, { "epoch": 3.0202544529262085, "grad_norm": 0.019861387681261947, "learning_rate": 4.062236319315436e-06, "loss": 0.0023, "step": 148370 }, { "epoch": 3.0204580152671756, "grad_norm": 0.017733986898966752, "learning_rate": 4.0615383699273805e-06, "loss": 0.0338, "step": 148380 }, { "epoch": 3.0206615776081427, "grad_norm": 0.15399632691219542, "learning_rate": 4.060840439492764e-06, "loss": 0.0607, "step": 148390 }, { "epoch": 3.0208651399491093, "grad_norm": 0.09355539984317593, "learning_rate": 4.060142528025686e-06, "loss": 0.0353, "step": 148400 }, { "epoch": 3.0210687022900764, "grad_norm": 0.972884426904485, "learning_rate": 4.05944463554024e-06, "loss": 0.0102, "step": 148410 }, { "epoch": 3.021272264631043, "grad_norm": 0.0558648207588135, "learning_rate": 4.058746762050519e-06, "loss": 0.057, "step": 148420 }, { "epoch": 3.02147582697201, "grad_norm": 1.5697216389738524, "learning_rate": 4.058048907570623e-06, "loss": 0.0428, "step": 148430 }, { "epoch": 3.021679389312977, "grad_norm": 0.00608785667366948, "learning_rate": 4.057351072114641e-06, "loss": 0.0376, "step": 148440 }, { "epoch": 3.021882951653944, "grad_norm": 0.003544953060382444, "learning_rate": 4.056653255696667e-06, "loss": 0.01, "step": 148450 }, { "epoch": 3.022086513994911, "grad_norm": 6.34551328581388, "learning_rate": 4.0559554583307975e-06, "loss": 0.0335, "step": 148460 }, { "epoch": 3.022290076335878, "grad_norm": 0.2675659155327696, "learning_rate": 4.0552576800311214e-06, "loss": 0.0026, "step": 148470 }, { "epoch": 3.0224936386768446, "grad_norm": 1.7921419210022076, "learning_rate": 4.054559920811735e-06, "loss": 0.0232, "step": 148480 }, { "epoch": 3.0226972010178117, "grad_norm": 0.02098936131302832, "learning_rate": 4.0538621806867284e-06, "loss": 0.2169, "step": 148490 }, { "epoch": 3.0229007633587788, "grad_norm": 1.8887563209922735, "learning_rate": 4.053164459670192e-06, "loss": 0.0014, "step": 148500 }, { "epoch": 3.0231043256997454, "grad_norm": 0.8043454805580866, "learning_rate": 4.052466757776221e-06, "loss": 0.0405, "step": 148510 }, { "epoch": 3.0233078880407125, "grad_norm": 0.02301617216031855, "learning_rate": 4.051769075018903e-06, "loss": 0.0649, "step": 148520 }, { "epoch": 3.0235114503816796, "grad_norm": 12.522624245606522, "learning_rate": 4.0510714114123295e-06, "loss": 0.0489, "step": 148530 }, { "epoch": 3.023715012722646, "grad_norm": 0.03583398697455813, "learning_rate": 4.050373766970592e-06, "loss": 0.0276, "step": 148540 }, { "epoch": 3.0239185750636133, "grad_norm": 0.2496786271219662, "learning_rate": 4.049676141707779e-06, "loss": 0.1273, "step": 148550 }, { "epoch": 3.0241221374045804, "grad_norm": 1.7487039892330585, "learning_rate": 4.048978535637982e-06, "loss": 0.0571, "step": 148560 }, { "epoch": 3.024325699745547, "grad_norm": 12.52023210012691, "learning_rate": 4.048280948775286e-06, "loss": 0.0296, "step": 148570 }, { "epoch": 3.024529262086514, "grad_norm": 0.0660522285662775, "learning_rate": 4.047583381133783e-06, "loss": 0.0616, "step": 148580 }, { "epoch": 3.0247328244274807, "grad_norm": 0.009729093938007275, "learning_rate": 4.04688583272756e-06, "loss": 0.1332, "step": 148590 }, { "epoch": 3.024936386768448, "grad_norm": 0.027747315563690686, "learning_rate": 4.046188303570706e-06, "loss": 0.0488, "step": 148600 }, { "epoch": 3.025139949109415, "grad_norm": 0.04209695674073918, "learning_rate": 4.045490793677306e-06, "loss": 0.0882, "step": 148610 }, { "epoch": 3.0253435114503815, "grad_norm": 11.76937048634099, "learning_rate": 4.044793303061451e-06, "loss": 0.0368, "step": 148620 }, { "epoch": 3.0255470737913486, "grad_norm": 2.05389190680637, "learning_rate": 4.044095831737224e-06, "loss": 0.0729, "step": 148630 }, { "epoch": 3.0257506361323157, "grad_norm": 11.095365334924734, "learning_rate": 4.0433983797187145e-06, "loss": 0.0866, "step": 148640 }, { "epoch": 3.0259541984732823, "grad_norm": 6.025762207626347, "learning_rate": 4.042700947020005e-06, "loss": 0.0307, "step": 148650 }, { "epoch": 3.0261577608142494, "grad_norm": 0.034282549365273245, "learning_rate": 4.042003533655183e-06, "loss": 0.0063, "step": 148660 }, { "epoch": 3.0263613231552164, "grad_norm": 17.174639614932445, "learning_rate": 4.041306139638335e-06, "loss": 0.0729, "step": 148670 }, { "epoch": 3.026564885496183, "grad_norm": 0.02929873564224753, "learning_rate": 4.040608764983542e-06, "loss": 0.0032, "step": 148680 }, { "epoch": 3.02676844783715, "grad_norm": 22.2054982199029, "learning_rate": 4.039911409704893e-06, "loss": 0.0875, "step": 148690 }, { "epoch": 3.0269720101781172, "grad_norm": 8.389496404905838, "learning_rate": 4.039214073816469e-06, "loss": 0.123, "step": 148700 }, { "epoch": 3.027175572519084, "grad_norm": 0.03962597658307281, "learning_rate": 4.038516757332353e-06, "loss": 0.0008, "step": 148710 }, { "epoch": 3.027379134860051, "grad_norm": 2.376325364670255, "learning_rate": 4.037819460266633e-06, "loss": 0.0257, "step": 148720 }, { "epoch": 3.027582697201018, "grad_norm": 8.321073048749836, "learning_rate": 4.037122182633384e-06, "loss": 0.1141, "step": 148730 }, { "epoch": 3.0277862595419847, "grad_norm": 0.019536938964257723, "learning_rate": 4.036424924446695e-06, "loss": 0.057, "step": 148740 }, { "epoch": 3.0279898218829517, "grad_norm": 0.03715960870726165, "learning_rate": 4.035727685720646e-06, "loss": 0.0874, "step": 148750 }, { "epoch": 3.0281933842239184, "grad_norm": 0.3119663455840072, "learning_rate": 4.035030466469316e-06, "loss": 0.0291, "step": 148760 }, { "epoch": 3.0283969465648855, "grad_norm": 22.274778498295433, "learning_rate": 4.034333266706791e-06, "loss": 0.0069, "step": 148770 }, { "epoch": 3.0286005089058525, "grad_norm": 14.980387572883576, "learning_rate": 4.033636086447148e-06, "loss": 0.0976, "step": 148780 }, { "epoch": 3.028804071246819, "grad_norm": 11.420521898391408, "learning_rate": 4.0329389257044684e-06, "loss": 0.0049, "step": 148790 }, { "epoch": 3.0290076335877862, "grad_norm": 0.026188779207292845, "learning_rate": 4.032241784492835e-06, "loss": 0.0141, "step": 148800 }, { "epoch": 3.0292111959287533, "grad_norm": 3.005178189347605, "learning_rate": 4.0315446628263246e-06, "loss": 0.0804, "step": 148810 }, { "epoch": 3.02941475826972, "grad_norm": 0.003411242287905979, "learning_rate": 4.030847560719015e-06, "loss": 0.04, "step": 148820 }, { "epoch": 3.029618320610687, "grad_norm": 0.03045850566412432, "learning_rate": 4.0301504781849896e-06, "loss": 0.063, "step": 148830 }, { "epoch": 3.029821882951654, "grad_norm": 3.093139475022538, "learning_rate": 4.029453415238324e-06, "loss": 0.0023, "step": 148840 }, { "epoch": 3.0300254452926207, "grad_norm": 0.01839178200618886, "learning_rate": 4.028756371893097e-06, "loss": 0.0034, "step": 148850 }, { "epoch": 3.030229007633588, "grad_norm": 0.038291262272419836, "learning_rate": 4.028059348163385e-06, "loss": 0.0015, "step": 148860 }, { "epoch": 3.030432569974555, "grad_norm": 0.04453028357198562, "learning_rate": 4.027362344063266e-06, "loss": 0.0064, "step": 148870 }, { "epoch": 3.0306361323155215, "grad_norm": 36.83688717116581, "learning_rate": 4.026665359606819e-06, "loss": 0.0556, "step": 148880 }, { "epoch": 3.0308396946564886, "grad_norm": 5.573977857235685, "learning_rate": 4.0259683948081164e-06, "loss": 0.0025, "step": 148890 }, { "epoch": 3.0310432569974557, "grad_norm": 0.006001454390709137, "learning_rate": 4.025271449681238e-06, "loss": 0.1006, "step": 148900 }, { "epoch": 3.0312468193384223, "grad_norm": 0.006578814705197451, "learning_rate": 4.024574524240258e-06, "loss": 0.0055, "step": 148910 }, { "epoch": 3.0314503816793894, "grad_norm": 0.0011107770104407293, "learning_rate": 4.023877618499252e-06, "loss": 0.0863, "step": 148920 }, { "epoch": 3.031653944020356, "grad_norm": 0.026622351656512735, "learning_rate": 4.023180732472295e-06, "loss": 0.0107, "step": 148930 }, { "epoch": 3.031857506361323, "grad_norm": 0.1347517773196486, "learning_rate": 4.022483866173459e-06, "loss": 0.0722, "step": 148940 }, { "epoch": 3.03206106870229, "grad_norm": 0.1602762470366252, "learning_rate": 4.021787019616823e-06, "loss": 0.0401, "step": 148950 }, { "epoch": 3.032264631043257, "grad_norm": 48.37499491725674, "learning_rate": 4.021090192816457e-06, "loss": 0.115, "step": 148960 }, { "epoch": 3.032468193384224, "grad_norm": 0.03352807223941479, "learning_rate": 4.020393385786435e-06, "loss": 0.1273, "step": 148970 }, { "epoch": 3.032671755725191, "grad_norm": 8.081096622865584, "learning_rate": 4.01969659854083e-06, "loss": 0.048, "step": 148980 }, { "epoch": 3.0328753180661576, "grad_norm": 9.346176356109638, "learning_rate": 4.018999831093717e-06, "loss": 0.0373, "step": 148990 }, { "epoch": 3.0330788804071247, "grad_norm": 0.08362588030637842, "learning_rate": 4.018303083459163e-06, "loss": 0.0512, "step": 149000 }, { "epoch": 3.033282442748092, "grad_norm": 0.6898994786705057, "learning_rate": 4.017606355651247e-06, "loss": 0.0014, "step": 149010 }, { "epoch": 3.0334860050890584, "grad_norm": 2.927555974702274, "learning_rate": 4.016909647684032e-06, "loss": 0.0894, "step": 149020 }, { "epoch": 3.0336895674300255, "grad_norm": 19.16312274052597, "learning_rate": 4.016212959571594e-06, "loss": 0.0338, "step": 149030 }, { "epoch": 3.0338931297709926, "grad_norm": 33.90603780521943, "learning_rate": 4.015516291328003e-06, "loss": 0.058, "step": 149040 }, { "epoch": 3.034096692111959, "grad_norm": 0.011217059971693965, "learning_rate": 4.014819642967327e-06, "loss": 0.0009, "step": 149050 }, { "epoch": 3.0343002544529263, "grad_norm": 0.006210344176212796, "learning_rate": 4.014123014503638e-06, "loss": 0.0324, "step": 149060 }, { "epoch": 3.034503816793893, "grad_norm": 30.007867743104033, "learning_rate": 4.013426405951005e-06, "loss": 0.0546, "step": 149070 }, { "epoch": 3.03470737913486, "grad_norm": 15.930157662487817, "learning_rate": 4.012729817323495e-06, "loss": 0.0023, "step": 149080 }, { "epoch": 3.034910941475827, "grad_norm": 0.011886487402440967, "learning_rate": 4.01203324863518e-06, "loss": 0.065, "step": 149090 }, { "epoch": 3.0351145038167937, "grad_norm": 0.023405223606272473, "learning_rate": 4.011336699900125e-06, "loss": 0.0547, "step": 149100 }, { "epoch": 3.035318066157761, "grad_norm": 0.4206485547426159, "learning_rate": 4.0106401711323975e-06, "loss": 0.0101, "step": 149110 }, { "epoch": 3.035521628498728, "grad_norm": 0.017409105003596614, "learning_rate": 4.0099436623460686e-06, "loss": 0.0428, "step": 149120 }, { "epoch": 3.0357251908396945, "grad_norm": 30.771763890422033, "learning_rate": 4.009247173555202e-06, "loss": 0.0756, "step": 149130 }, { "epoch": 3.0359287531806616, "grad_norm": 0.13673754304594235, "learning_rate": 4.0085507047738645e-06, "loss": 0.0452, "step": 149140 }, { "epoch": 3.0361323155216287, "grad_norm": 19.471461017605332, "learning_rate": 4.007854256016122e-06, "loss": 0.1939, "step": 149150 }, { "epoch": 3.0363358778625953, "grad_norm": 31.377030408047894, "learning_rate": 4.00715782729604e-06, "loss": 0.0304, "step": 149160 }, { "epoch": 3.0365394402035624, "grad_norm": 0.4304190839757573, "learning_rate": 4.006461418627685e-06, "loss": 0.0315, "step": 149170 }, { "epoch": 3.0367430025445294, "grad_norm": 0.023922386129577585, "learning_rate": 4.005765030025122e-06, "loss": 0.0281, "step": 149180 }, { "epoch": 3.036946564885496, "grad_norm": 0.6151676675640341, "learning_rate": 4.005068661502414e-06, "loss": 0.0361, "step": 149190 }, { "epoch": 3.037150127226463, "grad_norm": 0.04651620847723895, "learning_rate": 4.004372313073627e-06, "loss": 0.1012, "step": 149200 }, { "epoch": 3.0373536895674302, "grad_norm": 0.003438213024812808, "learning_rate": 4.003675984752822e-06, "loss": 0.0064, "step": 149210 }, { "epoch": 3.037557251908397, "grad_norm": 0.08171534701698512, "learning_rate": 4.0029796765540654e-06, "loss": 0.0295, "step": 149220 }, { "epoch": 3.037760814249364, "grad_norm": 0.08374792643768925, "learning_rate": 4.002283388491416e-06, "loss": 0.1102, "step": 149230 }, { "epoch": 3.0379643765903306, "grad_norm": 0.12414761678914817, "learning_rate": 4.00158712057894e-06, "loss": 0.0416, "step": 149240 }, { "epoch": 3.0381679389312977, "grad_norm": 0.046807787799641426, "learning_rate": 4.0008908728307e-06, "loss": 0.0402, "step": 149250 }, { "epoch": 3.0383715012722647, "grad_norm": 0.023658172169233306, "learning_rate": 4.000194645260752e-06, "loss": 0.0768, "step": 149260 }, { "epoch": 3.0385750636132314, "grad_norm": 15.898042818172714, "learning_rate": 3.999498437883163e-06, "loss": 0.0091, "step": 149270 }, { "epoch": 3.0387786259541985, "grad_norm": 7.838788923375061, "learning_rate": 3.998802250711991e-06, "loss": 0.0757, "step": 149280 }, { "epoch": 3.0389821882951655, "grad_norm": 0.021936560603600837, "learning_rate": 3.998106083761298e-06, "loss": 0.0667, "step": 149290 }, { "epoch": 3.039185750636132, "grad_norm": 6.2459183139366194, "learning_rate": 3.9974099370451435e-06, "loss": 0.2168, "step": 149300 }, { "epoch": 3.0393893129770992, "grad_norm": 0.017461870675160883, "learning_rate": 3.996713810577585e-06, "loss": 0.0423, "step": 149310 }, { "epoch": 3.0395928753180663, "grad_norm": 12.658044721540588, "learning_rate": 3.996017704372684e-06, "loss": 0.0059, "step": 149320 }, { "epoch": 3.039796437659033, "grad_norm": 0.16884933520321244, "learning_rate": 3.995321618444499e-06, "loss": 0.0418, "step": 149330 }, { "epoch": 3.04, "grad_norm": 0.07128570943778355, "learning_rate": 3.994625552807086e-06, "loss": 0.0217, "step": 149340 }, { "epoch": 3.040203562340967, "grad_norm": 8.200959334352211, "learning_rate": 3.9939295074745066e-06, "loss": 0.1613, "step": 149350 }, { "epoch": 3.0404071246819337, "grad_norm": 0.034662634221834965, "learning_rate": 3.993233482460817e-06, "loss": 0.0618, "step": 149360 }, { "epoch": 3.040610687022901, "grad_norm": 0.1377044867102674, "learning_rate": 3.992537477780072e-06, "loss": 0.0051, "step": 149370 }, { "epoch": 3.0408142493638675, "grad_norm": 9.737558052034467, "learning_rate": 3.991841493446332e-06, "loss": 0.0657, "step": 149380 }, { "epoch": 3.0410178117048345, "grad_norm": 28.103337403545403, "learning_rate": 3.991145529473652e-06, "loss": 0.1312, "step": 149390 }, { "epoch": 3.0412213740458016, "grad_norm": 1.4920308409113672, "learning_rate": 3.990449585876085e-06, "loss": 0.0168, "step": 149400 }, { "epoch": 3.0414249363867683, "grad_norm": 22.000083996897203, "learning_rate": 3.989753662667692e-06, "loss": 0.0665, "step": 149410 }, { "epoch": 3.0416284987277353, "grad_norm": 6.862790685204781, "learning_rate": 3.989057759862522e-06, "loss": 0.0575, "step": 149420 }, { "epoch": 3.0418320610687024, "grad_norm": 0.04310378271295857, "learning_rate": 3.988361877474634e-06, "loss": 0.0004, "step": 149430 }, { "epoch": 3.042035623409669, "grad_norm": 0.02560340349554514, "learning_rate": 3.987666015518083e-06, "loss": 0.0565, "step": 149440 }, { "epoch": 3.042239185750636, "grad_norm": 0.6485030715584915, "learning_rate": 3.9869701740069166e-06, "loss": 0.0142, "step": 149450 }, { "epoch": 3.042442748091603, "grad_norm": 0.006353667482876151, "learning_rate": 3.986274352955195e-06, "loss": 0.0317, "step": 149460 }, { "epoch": 3.04264631043257, "grad_norm": 0.43338379747891226, "learning_rate": 3.9855785523769675e-06, "loss": 0.007, "step": 149470 }, { "epoch": 3.042849872773537, "grad_norm": 0.042469879051476415, "learning_rate": 3.984882772286287e-06, "loss": 0.1529, "step": 149480 }, { "epoch": 3.043053435114504, "grad_norm": 3.8513122954595627, "learning_rate": 3.984187012697208e-06, "loss": 0.0839, "step": 149490 }, { "epoch": 3.0432569974554706, "grad_norm": 0.03614854883450836, "learning_rate": 3.9834912736237805e-06, "loss": 0.0423, "step": 149500 }, { "epoch": 3.0434605597964377, "grad_norm": 17.347478104756345, "learning_rate": 3.982795555080056e-06, "loss": 0.0481, "step": 149510 }, { "epoch": 3.043664122137405, "grad_norm": 2.592873622752458, "learning_rate": 3.982099857080084e-06, "loss": 0.0696, "step": 149520 }, { "epoch": 3.0438676844783714, "grad_norm": 4.226286873215779, "learning_rate": 3.981404179637916e-06, "loss": 0.0829, "step": 149530 }, { "epoch": 3.0440712468193385, "grad_norm": 0.05397582071974195, "learning_rate": 3.980708522767606e-06, "loss": 0.0386, "step": 149540 }, { "epoch": 3.044274809160305, "grad_norm": 0.020793471051391843, "learning_rate": 3.980012886483196e-06, "loss": 0.0416, "step": 149550 }, { "epoch": 3.044478371501272, "grad_norm": 0.21135466180273693, "learning_rate": 3.979317270798741e-06, "loss": 0.0432, "step": 149560 }, { "epoch": 3.0446819338422393, "grad_norm": 0.0113483788710952, "learning_rate": 3.978621675728291e-06, "loss": 0.0218, "step": 149570 }, { "epoch": 3.044885496183206, "grad_norm": 0.02028303673478794, "learning_rate": 3.977926101285889e-06, "loss": 0.0354, "step": 149580 }, { "epoch": 3.045089058524173, "grad_norm": 0.485133334953291, "learning_rate": 3.977230547485588e-06, "loss": 0.0538, "step": 149590 }, { "epoch": 3.04529262086514, "grad_norm": 0.006880201284198074, "learning_rate": 3.976535014341431e-06, "loss": 0.0697, "step": 149600 }, { "epoch": 3.0454961832061067, "grad_norm": 0.23671834221573915, "learning_rate": 3.975839501867468e-06, "loss": 0.0355, "step": 149610 }, { "epoch": 3.045699745547074, "grad_norm": 0.25067791239846404, "learning_rate": 3.975144010077746e-06, "loss": 0.0699, "step": 149620 }, { "epoch": 3.045903307888041, "grad_norm": 0.017977892039656992, "learning_rate": 3.97444853898631e-06, "loss": 0.0009, "step": 149630 }, { "epoch": 3.0461068702290075, "grad_norm": 0.015842513665356252, "learning_rate": 3.973753088607207e-06, "loss": 0.0682, "step": 149640 }, { "epoch": 3.0463104325699746, "grad_norm": 0.020114804567960737, "learning_rate": 3.973057658954483e-06, "loss": 0.0006, "step": 149650 }, { "epoch": 3.0465139949109417, "grad_norm": 0.065002371520348, "learning_rate": 3.972362250042181e-06, "loss": 0.0647, "step": 149660 }, { "epoch": 3.0467175572519083, "grad_norm": 0.2782762520776078, "learning_rate": 3.9716668618843485e-06, "loss": 0.1576, "step": 149670 }, { "epoch": 3.0469211195928754, "grad_norm": 0.03912171567205132, "learning_rate": 3.970971494495027e-06, "loss": 0.062, "step": 149680 }, { "epoch": 3.0471246819338424, "grad_norm": 0.03316246419310535, "learning_rate": 3.9702761478882605e-06, "loss": 0.0669, "step": 149690 }, { "epoch": 3.047328244274809, "grad_norm": 0.12607913137256654, "learning_rate": 3.9695808220780975e-06, "loss": 0.098, "step": 149700 }, { "epoch": 3.047531806615776, "grad_norm": 4.512412736938616, "learning_rate": 3.968885517078573e-06, "loss": 0.0424, "step": 149710 }, { "epoch": 3.047735368956743, "grad_norm": 5.682685114916439, "learning_rate": 3.968190232903735e-06, "loss": 0.0324, "step": 149720 }, { "epoch": 3.04793893129771, "grad_norm": 0.2281136245432169, "learning_rate": 3.967494969567624e-06, "loss": 0.1084, "step": 149730 }, { "epoch": 3.048142493638677, "grad_norm": 6.953315395714189, "learning_rate": 3.966799727084281e-06, "loss": 0.0614, "step": 149740 }, { "epoch": 3.0483460559796436, "grad_norm": 34.03586584172558, "learning_rate": 3.966104505467751e-06, "loss": 0.0698, "step": 149750 }, { "epoch": 3.0485496183206107, "grad_norm": 0.012951514687548432, "learning_rate": 3.9654093047320695e-06, "loss": 0.0698, "step": 149760 }, { "epoch": 3.0487531806615777, "grad_norm": 0.21961180467142366, "learning_rate": 3.96471412489128e-06, "loss": 0.0789, "step": 149770 }, { "epoch": 3.0489567430025444, "grad_norm": 12.863131692702451, "learning_rate": 3.964018965959423e-06, "loss": 0.0232, "step": 149780 }, { "epoch": 3.0491603053435115, "grad_norm": 0.1782270252355239, "learning_rate": 3.963323827950537e-06, "loss": 0.0156, "step": 149790 }, { "epoch": 3.0493638676844785, "grad_norm": 0.14449128621647336, "learning_rate": 3.962628710878663e-06, "loss": 0.0119, "step": 149800 }, { "epoch": 3.049567430025445, "grad_norm": 4.961133825046551, "learning_rate": 3.961933614757836e-06, "loss": 0.0549, "step": 149810 }, { "epoch": 3.0497709923664122, "grad_norm": 0.0734244641302249, "learning_rate": 3.961238539602097e-06, "loss": 0.0413, "step": 149820 }, { "epoch": 3.0499745547073793, "grad_norm": 0.027849480879379683, "learning_rate": 3.9605434854254855e-06, "loss": 0.0155, "step": 149830 }, { "epoch": 3.050178117048346, "grad_norm": 15.277522788023854, "learning_rate": 3.959848452242035e-06, "loss": 0.022, "step": 149840 }, { "epoch": 3.050381679389313, "grad_norm": 0.07560891458507428, "learning_rate": 3.959153440065785e-06, "loss": 0.0302, "step": 149850 }, { "epoch": 3.05058524173028, "grad_norm": 5.635131212048132, "learning_rate": 3.958458448910773e-06, "loss": 0.1359, "step": 149860 }, { "epoch": 3.0507888040712468, "grad_norm": 8.524995904707652, "learning_rate": 3.957763478791033e-06, "loss": 0.0349, "step": 149870 }, { "epoch": 3.050992366412214, "grad_norm": 0.04591298655084296, "learning_rate": 3.957068529720603e-06, "loss": 0.0942, "step": 149880 }, { "epoch": 3.0511959287531805, "grad_norm": 0.007523800865902171, "learning_rate": 3.956373601713516e-06, "loss": 0.0588, "step": 149890 }, { "epoch": 3.0513994910941475, "grad_norm": 0.07218623377259388, "learning_rate": 3.95567869478381e-06, "loss": 0.0548, "step": 149900 }, { "epoch": 3.0516030534351146, "grad_norm": 23.15778774203417, "learning_rate": 3.9549838089455174e-06, "loss": 0.2437, "step": 149910 }, { "epoch": 3.0518066157760813, "grad_norm": 0.06459149236217457, "learning_rate": 3.954288944212671e-06, "loss": 0.0464, "step": 149920 }, { "epoch": 3.0520101781170483, "grad_norm": 0.189926025863063, "learning_rate": 3.953594100599307e-06, "loss": 0.1079, "step": 149930 }, { "epoch": 3.0522137404580154, "grad_norm": 0.00021453280752269552, "learning_rate": 3.95289927811946e-06, "loss": 0.0883, "step": 149940 }, { "epoch": 3.052417302798982, "grad_norm": 1.1279417041779998, "learning_rate": 3.952204476787158e-06, "loss": 0.1103, "step": 149950 }, { "epoch": 3.052620865139949, "grad_norm": 0.06201016655749211, "learning_rate": 3.951509696616438e-06, "loss": 0.0454, "step": 149960 }, { "epoch": 3.052824427480916, "grad_norm": 0.11060657326139499, "learning_rate": 3.950814937621329e-06, "loss": 0.0854, "step": 149970 }, { "epoch": 3.053027989821883, "grad_norm": 0.01710352900406612, "learning_rate": 3.950120199815863e-06, "loss": 0.028, "step": 149980 }, { "epoch": 3.05323155216285, "grad_norm": 10.774766024298188, "learning_rate": 3.949425483214074e-06, "loss": 0.0308, "step": 149990 }, { "epoch": 3.053435114503817, "grad_norm": 0.011079951907506533, "learning_rate": 3.948730787829989e-06, "loss": 0.0544, "step": 150000 }, { "epoch": 3.0536386768447836, "grad_norm": 11.656004095470898, "learning_rate": 3.948036113677639e-06, "loss": 0.0742, "step": 150010 }, { "epoch": 3.0538422391857507, "grad_norm": 16.08761444662856, "learning_rate": 3.947341460771058e-06, "loss": 0.035, "step": 150020 }, { "epoch": 3.0540458015267173, "grad_norm": 10.955713554703655, "learning_rate": 3.946646829124267e-06, "loss": 0.069, "step": 150030 }, { "epoch": 3.0542493638676844, "grad_norm": 0.8574416007629191, "learning_rate": 3.945952218751305e-06, "loss": 0.0063, "step": 150040 }, { "epoch": 3.0544529262086515, "grad_norm": 0.013058990787627409, "learning_rate": 3.945257629666193e-06, "loss": 0.0288, "step": 150050 }, { "epoch": 3.054656488549618, "grad_norm": 25.134068227938783, "learning_rate": 3.944563061882961e-06, "loss": 0.0794, "step": 150060 }, { "epoch": 3.054860050890585, "grad_norm": 0.01890923666671423, "learning_rate": 3.943868515415639e-06, "loss": 0.0738, "step": 150070 }, { "epoch": 3.0550636132315523, "grad_norm": 0.1183170601566847, "learning_rate": 3.943173990278252e-06, "loss": 0.0535, "step": 150080 }, { "epoch": 3.055267175572519, "grad_norm": 0.04329928088990202, "learning_rate": 3.942479486484826e-06, "loss": 0.0566, "step": 150090 }, { "epoch": 3.055470737913486, "grad_norm": 0.09390395167819607, "learning_rate": 3.941785004049391e-06, "loss": 0.0894, "step": 150100 }, { "epoch": 3.055674300254453, "grad_norm": 2.0942002694076196, "learning_rate": 3.94109054298597e-06, "loss": 0.0899, "step": 150110 }, { "epoch": 3.0558778625954197, "grad_norm": 0.05454424516561487, "learning_rate": 3.9403961033085906e-06, "loss": 0.1021, "step": 150120 }, { "epoch": 3.056081424936387, "grad_norm": 0.18064406596241556, "learning_rate": 3.939701685031276e-06, "loss": 0.015, "step": 150130 }, { "epoch": 3.056284987277354, "grad_norm": 0.09921921343078655, "learning_rate": 3.939007288168049e-06, "loss": 0.0229, "step": 150140 }, { "epoch": 3.0564885496183205, "grad_norm": 0.08452376732062838, "learning_rate": 3.938312912732939e-06, "loss": 0.004, "step": 150150 }, { "epoch": 3.0566921119592876, "grad_norm": 0.03415532498362386, "learning_rate": 3.937618558739966e-06, "loss": 0.0804, "step": 150160 }, { "epoch": 3.0568956743002547, "grad_norm": 0.019198593505768108, "learning_rate": 3.9369242262031566e-06, "loss": 0.0221, "step": 150170 }, { "epoch": 3.0570992366412213, "grad_norm": 0.030351698173391617, "learning_rate": 3.936229915136529e-06, "loss": 0.0849, "step": 150180 }, { "epoch": 3.0573027989821884, "grad_norm": 0.09875768745410114, "learning_rate": 3.935535625554108e-06, "loss": 0.0441, "step": 150190 }, { "epoch": 3.057506361323155, "grad_norm": 0.07841489032435993, "learning_rate": 3.9348413574699195e-06, "loss": 0.0914, "step": 150200 }, { "epoch": 3.057709923664122, "grad_norm": 94.81908610242802, "learning_rate": 3.934147110897979e-06, "loss": 0.0211, "step": 150210 }, { "epoch": 3.057913486005089, "grad_norm": 0.003685061935687132, "learning_rate": 3.933452885852311e-06, "loss": 0.0505, "step": 150220 }, { "epoch": 3.058117048346056, "grad_norm": 0.09290119045102417, "learning_rate": 3.932758682346937e-06, "loss": 0.1148, "step": 150230 }, { "epoch": 3.058320610687023, "grad_norm": 29.619156289012345, "learning_rate": 3.932064500395873e-06, "loss": 0.0125, "step": 150240 }, { "epoch": 3.05852417302799, "grad_norm": 0.3634212499474862, "learning_rate": 3.931370340013146e-06, "loss": 0.062, "step": 150250 }, { "epoch": 3.0587277353689566, "grad_norm": 1.0037840469938135, "learning_rate": 3.930676201212768e-06, "loss": 0.0031, "step": 150260 }, { "epoch": 3.0589312977099237, "grad_norm": 0.03545014144169717, "learning_rate": 3.929982084008762e-06, "loss": 0.0303, "step": 150270 }, { "epoch": 3.0591348600508907, "grad_norm": 13.5627802545819, "learning_rate": 3.929287988415148e-06, "loss": 0.1067, "step": 150280 }, { "epoch": 3.0593384223918574, "grad_norm": 0.23650000591404366, "learning_rate": 3.92859391444594e-06, "loss": 0.0644, "step": 150290 }, { "epoch": 3.0595419847328245, "grad_norm": 24.644650721298664, "learning_rate": 3.927899862115157e-06, "loss": 0.0458, "step": 150300 }, { "epoch": 3.0597455470737915, "grad_norm": 0.025144378743862824, "learning_rate": 3.92720583143682e-06, "loss": 0.0017, "step": 150310 }, { "epoch": 3.059949109414758, "grad_norm": 0.02472838380553194, "learning_rate": 3.926511822424941e-06, "loss": 0.0036, "step": 150320 }, { "epoch": 3.0601526717557253, "grad_norm": 0.013917166335762322, "learning_rate": 3.92581783509354e-06, "loss": 0.0198, "step": 150330 }, { "epoch": 3.0603562340966923, "grad_norm": 0.0803576078261861, "learning_rate": 3.9251238694566295e-06, "loss": 0.0007, "step": 150340 }, { "epoch": 3.060559796437659, "grad_norm": 0.058097058210871806, "learning_rate": 3.924429925528227e-06, "loss": 0.076, "step": 150350 }, { "epoch": 3.060763358778626, "grad_norm": 7.830245348603885, "learning_rate": 3.923736003322349e-06, "loss": 0.046, "step": 150360 }, { "epoch": 3.0609669211195927, "grad_norm": 0.04432703350679848, "learning_rate": 3.923042102853008e-06, "loss": 0.0762, "step": 150370 }, { "epoch": 3.0611704834605598, "grad_norm": 0.5038600083362449, "learning_rate": 3.922348224134217e-06, "loss": 0.0245, "step": 150380 }, { "epoch": 3.061374045801527, "grad_norm": 28.42096296643686, "learning_rate": 3.921654367179996e-06, "loss": 0.0078, "step": 150390 }, { "epoch": 3.0615776081424935, "grad_norm": 9.587999632692735, "learning_rate": 3.92096053200435e-06, "loss": 0.078, "step": 150400 }, { "epoch": 3.0617811704834605, "grad_norm": 0.04817139748210563, "learning_rate": 3.920266718621298e-06, "loss": 0.0865, "step": 150410 }, { "epoch": 3.0619847328244276, "grad_norm": 0.0388379335304943, "learning_rate": 3.919572927044849e-06, "loss": 0.0128, "step": 150420 }, { "epoch": 3.0621882951653943, "grad_norm": 0.120900829873721, "learning_rate": 3.918879157289016e-06, "loss": 0.0878, "step": 150430 }, { "epoch": 3.0623918575063613, "grad_norm": 8.643294539104263, "learning_rate": 3.918185409367812e-06, "loss": 0.0452, "step": 150440 }, { "epoch": 3.0625954198473284, "grad_norm": 35.43294947240994, "learning_rate": 3.917491683295247e-06, "loss": 0.0843, "step": 150450 }, { "epoch": 3.062798982188295, "grad_norm": 0.04166554834954904, "learning_rate": 3.91679797908533e-06, "loss": 0.0322, "step": 150460 }, { "epoch": 3.063002544529262, "grad_norm": 0.05420691428987749, "learning_rate": 3.9161042967520755e-06, "loss": 0.1279, "step": 150470 }, { "epoch": 3.063206106870229, "grad_norm": 0.2162399428198132, "learning_rate": 3.91541063630949e-06, "loss": 0.0461, "step": 150480 }, { "epoch": 3.063409669211196, "grad_norm": 3.5182483653245398, "learning_rate": 3.914716997771584e-06, "loss": 0.004, "step": 150490 }, { "epoch": 3.063613231552163, "grad_norm": 0.11647748218385137, "learning_rate": 3.914023381152364e-06, "loss": 0.0294, "step": 150500 }, { "epoch": 3.06381679389313, "grad_norm": 4.399121041767113, "learning_rate": 3.9133297864658416e-06, "loss": 0.0411, "step": 150510 }, { "epoch": 3.0640203562340966, "grad_norm": 1.178404176722073, "learning_rate": 3.9126362137260245e-06, "loss": 0.0199, "step": 150520 }, { "epoch": 3.0642239185750637, "grad_norm": 10.513007570356683, "learning_rate": 3.911942662946918e-06, "loss": 0.12, "step": 150530 }, { "epoch": 3.0644274809160303, "grad_norm": 0.03593917233840211, "learning_rate": 3.911249134142534e-06, "loss": 0.0127, "step": 150540 }, { "epoch": 3.0646310432569974, "grad_norm": 0.14178776357085426, "learning_rate": 3.910555627326873e-06, "loss": 0.0633, "step": 150550 }, { "epoch": 3.0648346055979645, "grad_norm": 0.01934739538638024, "learning_rate": 3.9098621425139454e-06, "loss": 0.0445, "step": 150560 }, { "epoch": 3.065038167938931, "grad_norm": 0.658338095999338, "learning_rate": 3.9091686797177564e-06, "loss": 0.0215, "step": 150570 }, { "epoch": 3.065241730279898, "grad_norm": 0.03749135208310345, "learning_rate": 3.90847523895231e-06, "loss": 0.0054, "step": 150580 }, { "epoch": 3.0654452926208653, "grad_norm": 0.024258964891534313, "learning_rate": 3.907781820231612e-06, "loss": 0.0006, "step": 150590 }, { "epoch": 3.065648854961832, "grad_norm": 2.2415233156905745, "learning_rate": 3.907088423569668e-06, "loss": 0.003, "step": 150600 }, { "epoch": 3.065852417302799, "grad_norm": 0.15030296183793704, "learning_rate": 3.906395048980479e-06, "loss": 0.0276, "step": 150610 }, { "epoch": 3.066055979643766, "grad_norm": 0.009502663731516052, "learning_rate": 3.905701696478053e-06, "loss": 0.1032, "step": 150620 }, { "epoch": 3.0662595419847327, "grad_norm": 0.14293730665837898, "learning_rate": 3.905008366076389e-06, "loss": 0.0008, "step": 150630 }, { "epoch": 3.0664631043257, "grad_norm": 0.015409086448231338, "learning_rate": 3.9043150577894905e-06, "loss": 0.0511, "step": 150640 }, { "epoch": 3.066666666666667, "grad_norm": 0.03801769445411413, "learning_rate": 3.903621771631363e-06, "loss": 0.0904, "step": 150650 }, { "epoch": 3.0668702290076335, "grad_norm": 0.12522485705835038, "learning_rate": 3.902928507616004e-06, "loss": 0.0323, "step": 150660 }, { "epoch": 3.0670737913486006, "grad_norm": 0.28615367433299504, "learning_rate": 3.902235265757417e-06, "loss": 0.0858, "step": 150670 }, { "epoch": 3.067277353689567, "grad_norm": 0.22173917631725348, "learning_rate": 3.901542046069605e-06, "loss": 0.0016, "step": 150680 }, { "epoch": 3.0674809160305343, "grad_norm": 0.01130150504502671, "learning_rate": 3.900848848566564e-06, "loss": 0.0595, "step": 150690 }, { "epoch": 3.0676844783715014, "grad_norm": 0.02460585284646369, "learning_rate": 3.900155673262297e-06, "loss": 0.0738, "step": 150700 }, { "epoch": 3.067888040712468, "grad_norm": 9.10945261663906, "learning_rate": 3.8994625201708034e-06, "loss": 0.2003, "step": 150710 }, { "epoch": 3.068091603053435, "grad_norm": 0.008028485693200974, "learning_rate": 3.898769389306079e-06, "loss": 0.0352, "step": 150720 }, { "epoch": 3.068295165394402, "grad_norm": 78.23631087582223, "learning_rate": 3.898076280682128e-06, "loss": 0.1228, "step": 150730 }, { "epoch": 3.068498727735369, "grad_norm": 0.003444147145162387, "learning_rate": 3.897383194312945e-06, "loss": 0.0458, "step": 150740 }, { "epoch": 3.068702290076336, "grad_norm": 0.031180143865943137, "learning_rate": 3.896690130212527e-06, "loss": 0.0259, "step": 150750 }, { "epoch": 3.068905852417303, "grad_norm": 7.999097368393465, "learning_rate": 3.895997088394875e-06, "loss": 0.1493, "step": 150760 }, { "epoch": 3.0691094147582696, "grad_norm": 7.398829222033151, "learning_rate": 3.895304068873983e-06, "loss": 0.0436, "step": 150770 }, { "epoch": 3.0693129770992367, "grad_norm": 0.01223489374484386, "learning_rate": 3.894611071663849e-06, "loss": 0.0452, "step": 150780 }, { "epoch": 3.0695165394402038, "grad_norm": 0.18437556994668952, "learning_rate": 3.893918096778466e-06, "loss": 0.0498, "step": 150790 }, { "epoch": 3.0697201017811704, "grad_norm": 0.9891742338442195, "learning_rate": 3.893225144231833e-06, "loss": 0.0281, "step": 150800 }, { "epoch": 3.0699236641221375, "grad_norm": 0.06282037137940007, "learning_rate": 3.892532214037944e-06, "loss": 0.044, "step": 150810 }, { "epoch": 3.0701272264631045, "grad_norm": 0.01206539751627685, "learning_rate": 3.891839306210793e-06, "loss": 0.0519, "step": 150820 }, { "epoch": 3.070330788804071, "grad_norm": 0.4439181701338689, "learning_rate": 3.891146420764374e-06, "loss": 0.0558, "step": 150830 }, { "epoch": 3.0705343511450383, "grad_norm": 0.026524659240895677, "learning_rate": 3.890453557712682e-06, "loss": 0.0331, "step": 150840 }, { "epoch": 3.070737913486005, "grad_norm": 0.04126732746225494, "learning_rate": 3.889760717069709e-06, "loss": 0.066, "step": 150850 }, { "epoch": 3.070941475826972, "grad_norm": 0.03067619070603361, "learning_rate": 3.88906789884945e-06, "loss": 0.0324, "step": 150860 }, { "epoch": 3.071145038167939, "grad_norm": 0.05260957996259406, "learning_rate": 3.888375103065893e-06, "loss": 0.1215, "step": 150870 }, { "epoch": 3.0713486005089057, "grad_norm": 0.02661226957675271, "learning_rate": 3.887682329733033e-06, "loss": 0.0818, "step": 150880 }, { "epoch": 3.0715521628498728, "grad_norm": 0.0374572062997448, "learning_rate": 3.886989578864863e-06, "loss": 0.0081, "step": 150890 }, { "epoch": 3.07175572519084, "grad_norm": 0.019294059083115486, "learning_rate": 3.88629685047537e-06, "loss": 0.025, "step": 150900 }, { "epoch": 3.0719592875318065, "grad_norm": 0.2014211457857469, "learning_rate": 3.885604144578548e-06, "loss": 0.0274, "step": 150910 }, { "epoch": 3.0721628498727735, "grad_norm": 11.231522907973217, "learning_rate": 3.884911461188385e-06, "loss": 0.0281, "step": 150920 }, { "epoch": 3.0723664122137406, "grad_norm": 0.00856914646412101, "learning_rate": 3.884218800318869e-06, "loss": 0.0059, "step": 150930 }, { "epoch": 3.0725699745547073, "grad_norm": 0.016461840538119037, "learning_rate": 3.883526161983995e-06, "loss": 0.0648, "step": 150940 }, { "epoch": 3.0727735368956743, "grad_norm": 0.04609626281262751, "learning_rate": 3.8828335461977465e-06, "loss": 0.0125, "step": 150950 }, { "epoch": 3.0729770992366414, "grad_norm": 0.29586948617171865, "learning_rate": 3.882140952974114e-06, "loss": 0.0335, "step": 150960 }, { "epoch": 3.073180661577608, "grad_norm": 0.007358057369234546, "learning_rate": 3.881448382327085e-06, "loss": 0.0652, "step": 150970 }, { "epoch": 3.073384223918575, "grad_norm": 0.03777795719907403, "learning_rate": 3.8807558342706455e-06, "loss": 0.1454, "step": 150980 }, { "epoch": 3.0735877862595418, "grad_norm": 1.6014845036064513, "learning_rate": 3.880063308818785e-06, "loss": 0.0453, "step": 150990 }, { "epoch": 3.073791348600509, "grad_norm": 0.11396665249492714, "learning_rate": 3.879370805985488e-06, "loss": 0.0077, "step": 151000 }, { "epoch": 3.073994910941476, "grad_norm": 0.027103949975789948, "learning_rate": 3.878678325784739e-06, "loss": 0.0183, "step": 151010 }, { "epoch": 3.0741984732824426, "grad_norm": 0.4455211051417585, "learning_rate": 3.877985868230527e-06, "loss": 0.0486, "step": 151020 }, { "epoch": 3.0744020356234096, "grad_norm": 0.4582554099709222, "learning_rate": 3.877293433336836e-06, "loss": 0.0502, "step": 151030 }, { "epoch": 3.0746055979643767, "grad_norm": 0.0024440336033895293, "learning_rate": 3.876601021117648e-06, "loss": 0.019, "step": 151040 }, { "epoch": 3.0748091603053433, "grad_norm": 0.0217630416198627, "learning_rate": 3.8759086315869524e-06, "loss": 0.0429, "step": 151050 }, { "epoch": 3.0750127226463104, "grad_norm": 0.016048381833429107, "learning_rate": 3.8752162647587275e-06, "loss": 0.0042, "step": 151060 }, { "epoch": 3.0752162849872775, "grad_norm": 0.1384387782507477, "learning_rate": 3.874523920646961e-06, "loss": 0.0399, "step": 151070 }, { "epoch": 3.075419847328244, "grad_norm": 11.192291314272524, "learning_rate": 3.87383159926563e-06, "loss": 0.0707, "step": 151080 }, { "epoch": 3.075623409669211, "grad_norm": 7.614919414119735, "learning_rate": 3.873139300628723e-06, "loss": 0.0543, "step": 151090 }, { "epoch": 3.0758269720101783, "grad_norm": 3.838136152127718, "learning_rate": 3.87244702475022e-06, "loss": 0.0176, "step": 151100 }, { "epoch": 3.076030534351145, "grad_norm": 0.7192904420695284, "learning_rate": 3.8717547716441e-06, "loss": 0.0714, "step": 151110 }, { "epoch": 3.076234096692112, "grad_norm": 0.048698336636989054, "learning_rate": 3.871062541324345e-06, "loss": 0.0366, "step": 151120 }, { "epoch": 3.076437659033079, "grad_norm": 8.256469305392715, "learning_rate": 3.870370333804937e-06, "loss": 0.0575, "step": 151130 }, { "epoch": 3.0766412213740457, "grad_norm": 5.248526705652602, "learning_rate": 3.869678149099856e-06, "loss": 0.0378, "step": 151140 }, { "epoch": 3.076844783715013, "grad_norm": 0.02268059235462731, "learning_rate": 3.86898598722308e-06, "loss": 0.0192, "step": 151150 }, { "epoch": 3.0770483460559794, "grad_norm": 0.004347030706240185, "learning_rate": 3.868293848188586e-06, "loss": 0.0408, "step": 151160 }, { "epoch": 3.0772519083969465, "grad_norm": 0.0012330282997983682, "learning_rate": 3.867601732010358e-06, "loss": 0.0683, "step": 151170 }, { "epoch": 3.0774554707379136, "grad_norm": 0.024209387043143352, "learning_rate": 3.866909638702373e-06, "loss": 0.1473, "step": 151180 }, { "epoch": 3.0776590330788802, "grad_norm": 0.06081603639957357, "learning_rate": 3.866217568278604e-06, "loss": 0.0812, "step": 151190 }, { "epoch": 3.0778625954198473, "grad_norm": 0.010884325122745545, "learning_rate": 3.8655255207530325e-06, "loss": 0.091, "step": 151200 }, { "epoch": 3.0780661577608144, "grad_norm": 0.056988922678012956, "learning_rate": 3.864833496139636e-06, "loss": 0.0222, "step": 151210 }, { "epoch": 3.078269720101781, "grad_norm": 0.029072589070196782, "learning_rate": 3.864141494452387e-06, "loss": 0.0487, "step": 151220 }, { "epoch": 3.078473282442748, "grad_norm": 0.07423991482529081, "learning_rate": 3.863449515705266e-06, "loss": 0.0236, "step": 151230 }, { "epoch": 3.078676844783715, "grad_norm": 0.03372037968903635, "learning_rate": 3.862757559912244e-06, "loss": 0.0754, "step": 151240 }, { "epoch": 3.078880407124682, "grad_norm": 0.3021159057831093, "learning_rate": 3.8620656270872975e-06, "loss": 0.0727, "step": 151250 }, { "epoch": 3.079083969465649, "grad_norm": 0.010182667514688945, "learning_rate": 3.861373717244403e-06, "loss": 0.0005, "step": 151260 }, { "epoch": 3.079287531806616, "grad_norm": 9.267478480493734, "learning_rate": 3.860681830397531e-06, "loss": 0.1316, "step": 151270 }, { "epoch": 3.0794910941475826, "grad_norm": 0.03793965290022593, "learning_rate": 3.8599899665606584e-06, "loss": 0.1011, "step": 151280 }, { "epoch": 3.0796946564885497, "grad_norm": 7.654433008294531, "learning_rate": 3.859298125747756e-06, "loss": 0.0941, "step": 151290 }, { "epoch": 3.0798982188295168, "grad_norm": 0.04009423404164994, "learning_rate": 3.858606307972795e-06, "loss": 0.0539, "step": 151300 }, { "epoch": 3.0801017811704834, "grad_norm": 0.016668255129248053, "learning_rate": 3.857914513249754e-06, "loss": 0.045, "step": 151310 }, { "epoch": 3.0803053435114505, "grad_norm": 10.321394863395554, "learning_rate": 3.857222741592598e-06, "loss": 0.0571, "step": 151320 }, { "epoch": 3.080508905852417, "grad_norm": 2.2991582464134876, "learning_rate": 3.8565309930152995e-06, "loss": 0.0839, "step": 151330 }, { "epoch": 3.080712468193384, "grad_norm": 9.874773824040503, "learning_rate": 3.855839267531832e-06, "loss": 0.1808, "step": 151340 }, { "epoch": 3.0809160305343513, "grad_norm": 0.016834184833689224, "learning_rate": 3.855147565156164e-06, "loss": 0.0193, "step": 151350 }, { "epoch": 3.081119592875318, "grad_norm": 0.04265268753096627, "learning_rate": 3.854455885902267e-06, "loss": 0.019, "step": 151360 }, { "epoch": 3.081323155216285, "grad_norm": 0.05246027676982953, "learning_rate": 3.853764229784107e-06, "loss": 0.0353, "step": 151370 }, { "epoch": 3.081526717557252, "grad_norm": 0.07316636023266442, "learning_rate": 3.8530725968156535e-06, "loss": 0.0621, "step": 151380 }, { "epoch": 3.0817302798982187, "grad_norm": 0.023555781112052476, "learning_rate": 3.852380987010879e-06, "loss": 0.0286, "step": 151390 }, { "epoch": 3.0819338422391858, "grad_norm": 0.06977175753417116, "learning_rate": 3.851689400383747e-06, "loss": 0.0269, "step": 151400 }, { "epoch": 3.082137404580153, "grad_norm": 0.04112350048642664, "learning_rate": 3.850997836948226e-06, "loss": 0.0017, "step": 151410 }, { "epoch": 3.0823409669211195, "grad_norm": 0.07440578232219369, "learning_rate": 3.850306296718285e-06, "loss": 0.0689, "step": 151420 }, { "epoch": 3.0825445292620866, "grad_norm": 0.15306646275366473, "learning_rate": 3.849614779707888e-06, "loss": 0.1133, "step": 151430 }, { "epoch": 3.0827480916030536, "grad_norm": 0.12204956260224309, "learning_rate": 3.848923285931003e-06, "loss": 0.0778, "step": 151440 }, { "epoch": 3.0829516539440203, "grad_norm": 0.13997543635485357, "learning_rate": 3.848231815401594e-06, "loss": 0.0365, "step": 151450 }, { "epoch": 3.0831552162849873, "grad_norm": 21.405537864152027, "learning_rate": 3.847540368133627e-06, "loss": 0.0898, "step": 151460 }, { "epoch": 3.0833587786259544, "grad_norm": 7.262290699034455, "learning_rate": 3.846848944141068e-06, "loss": 0.1043, "step": 151470 }, { "epoch": 3.083562340966921, "grad_norm": 0.2253281071447102, "learning_rate": 3.8461575434378774e-06, "loss": 0.0303, "step": 151480 }, { "epoch": 3.083765903307888, "grad_norm": 43.069497119953255, "learning_rate": 3.8454661660380235e-06, "loss": 0.0361, "step": 151490 }, { "epoch": 3.0839694656488548, "grad_norm": 2.59133485633367, "learning_rate": 3.844774811955467e-06, "loss": 0.0714, "step": 151500 }, { "epoch": 3.084173027989822, "grad_norm": 0.013025262987314783, "learning_rate": 3.84408348120417e-06, "loss": 0.0204, "step": 151510 }, { "epoch": 3.084376590330789, "grad_norm": 12.043596872988314, "learning_rate": 3.843392173798099e-06, "loss": 0.0483, "step": 151520 }, { "epoch": 3.0845801526717556, "grad_norm": 22.21308746788047, "learning_rate": 3.842700889751209e-06, "loss": 0.0328, "step": 151530 }, { "epoch": 3.0847837150127226, "grad_norm": 0.04657177802025361, "learning_rate": 3.842009629077467e-06, "loss": 0.0524, "step": 151540 }, { "epoch": 3.0849872773536897, "grad_norm": 0.04964695954382355, "learning_rate": 3.841318391790833e-06, "loss": 0.0037, "step": 151550 }, { "epoch": 3.0851908396946564, "grad_norm": 0.019948343379875232, "learning_rate": 3.840627177905263e-06, "loss": 0.0188, "step": 151560 }, { "epoch": 3.0853944020356234, "grad_norm": 0.010038905467321058, "learning_rate": 3.839935987434723e-06, "loss": 0.0914, "step": 151570 }, { "epoch": 3.0855979643765905, "grad_norm": 0.48124857291724293, "learning_rate": 3.83924482039317e-06, "loss": 0.0337, "step": 151580 }, { "epoch": 3.085801526717557, "grad_norm": 0.0021366555040725874, "learning_rate": 3.838553676794561e-06, "loss": 0.0006, "step": 151590 }, { "epoch": 3.086005089058524, "grad_norm": 0.030675971762183705, "learning_rate": 3.8378625566528585e-06, "loss": 0.051, "step": 151600 }, { "epoch": 3.0862086513994913, "grad_norm": 0.05011888765067352, "learning_rate": 3.837171459982017e-06, "loss": 0.0449, "step": 151610 }, { "epoch": 3.086412213740458, "grad_norm": 0.024705151349732234, "learning_rate": 3.836480386795996e-06, "loss": 0.2065, "step": 151620 }, { "epoch": 3.086615776081425, "grad_norm": 0.16015595366942698, "learning_rate": 3.8357893371087535e-06, "loss": 0.0082, "step": 151630 }, { "epoch": 3.0868193384223916, "grad_norm": 11.69124343969258, "learning_rate": 3.835098310934243e-06, "loss": 0.0077, "step": 151640 }, { "epoch": 3.0870229007633587, "grad_norm": 8.45217520443497, "learning_rate": 3.834407308286425e-06, "loss": 0.0715, "step": 151650 }, { "epoch": 3.087226463104326, "grad_norm": 0.2268318238244824, "learning_rate": 3.8337163291792505e-06, "loss": 0.0698, "step": 151660 }, { "epoch": 3.0874300254452924, "grad_norm": 0.6390273722913768, "learning_rate": 3.8330253736266765e-06, "loss": 0.0016, "step": 151670 }, { "epoch": 3.0876335877862595, "grad_norm": 0.019608939760949514, "learning_rate": 3.83233444164266e-06, "loss": 0.0332, "step": 151680 }, { "epoch": 3.0878371501272266, "grad_norm": 0.04801759764798539, "learning_rate": 3.831643533241151e-06, "loss": 0.1181, "step": 151690 }, { "epoch": 3.0880407124681932, "grad_norm": 0.11824941037527559, "learning_rate": 3.830952648436107e-06, "loss": 0.1105, "step": 151700 }, { "epoch": 3.0882442748091603, "grad_norm": 0.10412671232489923, "learning_rate": 3.830261787241481e-06, "loss": 0.038, "step": 151710 }, { "epoch": 3.0884478371501274, "grad_norm": 0.01975607564576244, "learning_rate": 3.829570949671224e-06, "loss": 0.0711, "step": 151720 }, { "epoch": 3.088651399491094, "grad_norm": 3.2745096960837e-07, "learning_rate": 3.82888013573929e-06, "loss": 0.0445, "step": 151730 }, { "epoch": 3.088854961832061, "grad_norm": 0.0826378599392577, "learning_rate": 3.828189345459628e-06, "loss": 0.0194, "step": 151740 }, { "epoch": 3.089058524173028, "grad_norm": 0.14515740529033333, "learning_rate": 3.827498578846192e-06, "loss": 0.0518, "step": 151750 }, { "epoch": 3.089262086513995, "grad_norm": 0.3514829963758306, "learning_rate": 3.826807835912934e-06, "loss": 0.0079, "step": 151760 }, { "epoch": 3.089465648854962, "grad_norm": 0.053897808523497986, "learning_rate": 3.826117116673801e-06, "loss": 0.0715, "step": 151770 }, { "epoch": 3.089669211195929, "grad_norm": 0.04955457783990477, "learning_rate": 3.825426421142745e-06, "loss": 0.0238, "step": 151780 }, { "epoch": 3.0898727735368956, "grad_norm": 0.0074036069712962215, "learning_rate": 3.8247357493337165e-06, "loss": 0.0292, "step": 151790 }, { "epoch": 3.0900763358778627, "grad_norm": 51.358084456206534, "learning_rate": 3.824045101260662e-06, "loss": 0.0162, "step": 151800 }, { "epoch": 3.0902798982188293, "grad_norm": 0.06176669657265176, "learning_rate": 3.823354476937533e-06, "loss": 0.0797, "step": 151810 }, { "epoch": 3.0904834605597964, "grad_norm": 0.004971137053634952, "learning_rate": 3.8226638763782736e-06, "loss": 0.0754, "step": 151820 }, { "epoch": 3.0906870229007635, "grad_norm": 1.045038967560572, "learning_rate": 3.8219732995968345e-06, "loss": 0.015, "step": 151830 }, { "epoch": 3.09089058524173, "grad_norm": 0.5940434751185181, "learning_rate": 3.821282746607163e-06, "loss": 0.0082, "step": 151840 }, { "epoch": 3.091094147582697, "grad_norm": 0.05127830702810272, "learning_rate": 3.820592217423203e-06, "loss": 0.0693, "step": 151850 }, { "epoch": 3.0912977099236643, "grad_norm": 11.957717436390617, "learning_rate": 3.819901712058902e-06, "loss": 0.0767, "step": 151860 }, { "epoch": 3.091501272264631, "grad_norm": 9.34260208144351, "learning_rate": 3.819211230528208e-06, "loss": 0.1262, "step": 151870 }, { "epoch": 3.091704834605598, "grad_norm": 0.9073481436252981, "learning_rate": 3.818520772845062e-06, "loss": 0.0068, "step": 151880 }, { "epoch": 3.091908396946565, "grad_norm": 14.48986081621332, "learning_rate": 3.817830339023412e-06, "loss": 0.0574, "step": 151890 }, { "epoch": 3.0921119592875317, "grad_norm": 6.9344138498040175, "learning_rate": 3.817139929077201e-06, "loss": 0.0694, "step": 151900 }, { "epoch": 3.0923155216284988, "grad_norm": 0.41087115134136065, "learning_rate": 3.8164495430203705e-06, "loss": 0.096, "step": 151910 }, { "epoch": 3.092519083969466, "grad_norm": 0.015299336312345214, "learning_rate": 3.8157591808668685e-06, "loss": 0.0096, "step": 151920 }, { "epoch": 3.0927226463104325, "grad_norm": 0.012192597084025966, "learning_rate": 3.815068842630634e-06, "loss": 0.0248, "step": 151930 }, { "epoch": 3.0929262086513996, "grad_norm": 0.0034383654354491204, "learning_rate": 3.8143785283256116e-06, "loss": 0.0605, "step": 151940 }, { "epoch": 3.093129770992366, "grad_norm": 21.360278343082125, "learning_rate": 3.8136882379657412e-06, "loss": 0.0452, "step": 151950 }, { "epoch": 3.0933333333333333, "grad_norm": 0.0662208879489782, "learning_rate": 3.8129979715649635e-06, "loss": 0.0784, "step": 151960 }, { "epoch": 3.0935368956743003, "grad_norm": 0.019839121795345364, "learning_rate": 3.812307729137222e-06, "loss": 0.0206, "step": 151970 }, { "epoch": 3.093740458015267, "grad_norm": 0.058002183193567876, "learning_rate": 3.8116175106964553e-06, "loss": 0.0236, "step": 151980 }, { "epoch": 3.093944020356234, "grad_norm": 7.695707360277266, "learning_rate": 3.8109273162566025e-06, "loss": 0.0318, "step": 151990 }, { "epoch": 3.094147582697201, "grad_norm": 0.0008076995968817417, "learning_rate": 3.810237145831606e-06, "loss": 0.0585, "step": 152000 }, { "epoch": 3.0943511450381678, "grad_norm": 0.013588545234237352, "learning_rate": 3.8095469994354017e-06, "loss": 0.0606, "step": 152010 }, { "epoch": 3.094554707379135, "grad_norm": 0.019078472305463008, "learning_rate": 3.80885687708193e-06, "loss": 0.0041, "step": 152020 }, { "epoch": 3.094758269720102, "grad_norm": 11.942335177488546, "learning_rate": 3.8081667787851256e-06, "loss": 0.0511, "step": 152030 }, { "epoch": 3.0949618320610686, "grad_norm": 0.218028269801559, "learning_rate": 3.8074767045589297e-06, "loss": 0.1071, "step": 152040 }, { "epoch": 3.0951653944020356, "grad_norm": 0.05229078177728364, "learning_rate": 3.8067866544172783e-06, "loss": 0.065, "step": 152050 }, { "epoch": 3.0953689567430027, "grad_norm": 0.22687643717769773, "learning_rate": 3.8060966283741056e-06, "loss": 0.1335, "step": 152060 }, { "epoch": 3.0955725190839694, "grad_norm": 11.247742899418242, "learning_rate": 3.80540662644335e-06, "loss": 0.0615, "step": 152070 }, { "epoch": 3.0957760814249364, "grad_norm": 0.10591477868007784, "learning_rate": 3.8047166486389475e-06, "loss": 0.0456, "step": 152080 }, { "epoch": 3.0959796437659035, "grad_norm": 0.03931169528880473, "learning_rate": 3.8040266949748307e-06, "loss": 0.0178, "step": 152090 }, { "epoch": 3.09618320610687, "grad_norm": 7.1505133858322445, "learning_rate": 3.8033367654649355e-06, "loss": 0.0842, "step": 152100 }, { "epoch": 3.0963867684478372, "grad_norm": 0.1383938487660834, "learning_rate": 3.8026468601231946e-06, "loss": 0.057, "step": 152110 }, { "epoch": 3.096590330788804, "grad_norm": 0.04199721243615107, "learning_rate": 3.801956978963543e-06, "loss": 0.0646, "step": 152120 }, { "epoch": 3.096793893129771, "grad_norm": 0.033051389167851844, "learning_rate": 3.8012671219999143e-06, "loss": 0.0164, "step": 152130 }, { "epoch": 3.096997455470738, "grad_norm": 1.99273329851869, "learning_rate": 3.8005772892462377e-06, "loss": 0.0064, "step": 152140 }, { "epoch": 3.0972010178117046, "grad_norm": 7.901397521262209, "learning_rate": 3.7998874807164494e-06, "loss": 0.143, "step": 152150 }, { "epoch": 3.0974045801526717, "grad_norm": 0.04303774213321754, "learning_rate": 3.7991976964244796e-06, "loss": 0.0495, "step": 152160 }, { "epoch": 3.097608142493639, "grad_norm": 0.0688277197748524, "learning_rate": 3.7985079363842575e-06, "loss": 0.0834, "step": 152170 }, { "epoch": 3.0978117048346054, "grad_norm": 12.127483667311441, "learning_rate": 3.7978182006097166e-06, "loss": 0.0363, "step": 152180 }, { "epoch": 3.0980152671755725, "grad_norm": 0.012287120132203403, "learning_rate": 3.7971284891147848e-06, "loss": 0.0231, "step": 152190 }, { "epoch": 3.0982188295165396, "grad_norm": 0.4109323742879244, "learning_rate": 3.7964388019133914e-06, "loss": 0.0423, "step": 152200 }, { "epoch": 3.0984223918575062, "grad_norm": 0.18126755731754618, "learning_rate": 3.7957491390194695e-06, "loss": 0.0768, "step": 152210 }, { "epoch": 3.0986259541984733, "grad_norm": 0.023246563519937882, "learning_rate": 3.7950595004469415e-06, "loss": 0.0141, "step": 152220 }, { "epoch": 3.0988295165394404, "grad_norm": 0.015298834361592331, "learning_rate": 3.79436988620974e-06, "loss": 0.0394, "step": 152230 }, { "epoch": 3.099033078880407, "grad_norm": 0.0509157694185438, "learning_rate": 3.7936802963217928e-06, "loss": 0.0352, "step": 152240 }, { "epoch": 3.099236641221374, "grad_norm": 0.026336684188221396, "learning_rate": 3.7929907307970237e-06, "loss": 0.0692, "step": 152250 }, { "epoch": 3.099440203562341, "grad_norm": 0.012312991633603712, "learning_rate": 3.7923011896493635e-06, "loss": 0.0223, "step": 152260 }, { "epoch": 3.099643765903308, "grad_norm": 0.6814437266212798, "learning_rate": 3.791611672892735e-06, "loss": 0.0808, "step": 152270 }, { "epoch": 3.099847328244275, "grad_norm": 0.6867782183762925, "learning_rate": 3.790922180541064e-06, "loss": 0.0663, "step": 152280 }, { "epoch": 3.1000508905852415, "grad_norm": 0.09726701474031818, "learning_rate": 3.7902327126082796e-06, "loss": 0.0014, "step": 152290 }, { "epoch": 3.1002544529262086, "grad_norm": 10.48318863584829, "learning_rate": 3.7895432691083017e-06, "loss": 0.0053, "step": 152300 }, { "epoch": 3.1004580152671757, "grad_norm": 0.06252508911441902, "learning_rate": 3.788853850055058e-06, "loss": 0.1058, "step": 152310 }, { "epoch": 3.1006615776081423, "grad_norm": 0.10296032910838164, "learning_rate": 3.7881644554624685e-06, "loss": 0.0183, "step": 152320 }, { "epoch": 3.1008651399491094, "grad_norm": 0.12687625556935686, "learning_rate": 3.7874750853444602e-06, "loss": 0.1051, "step": 152330 }, { "epoch": 3.1010687022900765, "grad_norm": 0.17347206079656544, "learning_rate": 3.7867857397149555e-06, "loss": 0.075, "step": 152340 }, { "epoch": 3.101272264631043, "grad_norm": 1.6177910070742596, "learning_rate": 3.7860964185878735e-06, "loss": 0.0255, "step": 152350 }, { "epoch": 3.10147582697201, "grad_norm": 2.6756713438144324, "learning_rate": 3.7854071219771373e-06, "loss": 0.0906, "step": 152360 }, { "epoch": 3.1016793893129773, "grad_norm": 0.10963945423466018, "learning_rate": 3.784717849896671e-06, "loss": 0.1163, "step": 152370 }, { "epoch": 3.101882951653944, "grad_norm": 0.01820105016001956, "learning_rate": 3.7840286023603912e-06, "loss": 0.0198, "step": 152380 }, { "epoch": 3.102086513994911, "grad_norm": 0.008435915126261967, "learning_rate": 3.7833393793822216e-06, "loss": 0.0652, "step": 152390 }, { "epoch": 3.102290076335878, "grad_norm": 0.17783206605740043, "learning_rate": 3.7826501809760775e-06, "loss": 0.0275, "step": 152400 }, { "epoch": 3.1024936386768447, "grad_norm": 0.026114147475452175, "learning_rate": 3.7819610071558825e-06, "loss": 0.0598, "step": 152410 }, { "epoch": 3.1026972010178118, "grad_norm": 0.027328698936234713, "learning_rate": 3.7812718579355547e-06, "loss": 0.0017, "step": 152420 }, { "epoch": 3.102900763358779, "grad_norm": 0.2209787411793398, "learning_rate": 3.7805827333290094e-06, "loss": 0.018, "step": 152430 }, { "epoch": 3.1031043256997455, "grad_norm": 0.7865583322837184, "learning_rate": 3.7798936333501673e-06, "loss": 0.056, "step": 152440 }, { "epoch": 3.1033078880407126, "grad_norm": 7.798980128107431, "learning_rate": 3.779204558012946e-06, "loss": 0.0758, "step": 152450 }, { "epoch": 3.103511450381679, "grad_norm": 16.690072269295474, "learning_rate": 3.778515507331259e-06, "loss": 0.0676, "step": 152460 }, { "epoch": 3.1037150127226463, "grad_norm": 0.14614924500584844, "learning_rate": 3.777826481319027e-06, "loss": 0.0266, "step": 152470 }, { "epoch": 3.1039185750636133, "grad_norm": 25.488113107392465, "learning_rate": 3.7771374799901615e-06, "loss": 0.0636, "step": 152480 }, { "epoch": 3.10412213740458, "grad_norm": 0.03432026711694984, "learning_rate": 3.77644850335858e-06, "loss": 0.0817, "step": 152490 }, { "epoch": 3.104325699745547, "grad_norm": 0.06029878515552407, "learning_rate": 3.775759551438198e-06, "loss": 0.0956, "step": 152500 }, { "epoch": 3.104529262086514, "grad_norm": 0.13477048835248212, "learning_rate": 3.7750706242429263e-06, "loss": 0.072, "step": 152510 }, { "epoch": 3.1047328244274808, "grad_norm": 29.628286594437345, "learning_rate": 3.7743817217866826e-06, "loss": 0.0358, "step": 152520 }, { "epoch": 3.104936386768448, "grad_norm": 0.017676313918231212, "learning_rate": 3.7736928440833792e-06, "loss": 0.078, "step": 152530 }, { "epoch": 3.105139949109415, "grad_norm": 0.19676832557268267, "learning_rate": 3.7730039911469262e-06, "loss": 0.1269, "step": 152540 }, { "epoch": 3.1053435114503816, "grad_norm": 71.34417246539388, "learning_rate": 3.7723151629912403e-06, "loss": 0.0652, "step": 152550 }, { "epoch": 3.1055470737913486, "grad_norm": 0.028816905421089525, "learning_rate": 3.77162635963023e-06, "loss": 0.0416, "step": 152560 }, { "epoch": 3.1057506361323157, "grad_norm": 0.366342287027217, "learning_rate": 3.770937581077806e-06, "loss": 0.0072, "step": 152570 }, { "epoch": 3.1059541984732824, "grad_norm": 0.06479761504092595, "learning_rate": 3.7702488273478833e-06, "loss": 0.0905, "step": 152580 }, { "epoch": 3.1061577608142494, "grad_norm": 4.737560069770535, "learning_rate": 3.7695600984543677e-06, "loss": 0.0472, "step": 152590 }, { "epoch": 3.106361323155216, "grad_norm": 2.1253344909461656, "learning_rate": 3.76887139441117e-06, "loss": 0.0413, "step": 152600 }, { "epoch": 3.106564885496183, "grad_norm": 0.03402384385068291, "learning_rate": 3.768182715232202e-06, "loss": 0.0366, "step": 152610 }, { "epoch": 3.1067684478371502, "grad_norm": 0.09080610660373961, "learning_rate": 3.76749406093137e-06, "loss": 0.0777, "step": 152620 }, { "epoch": 3.106972010178117, "grad_norm": 0.08277333669520251, "learning_rate": 3.7668054315225837e-06, "loss": 0.0445, "step": 152630 }, { "epoch": 3.107175572519084, "grad_norm": 0.06757702171015659, "learning_rate": 3.766116827019749e-06, "loss": 0.0547, "step": 152640 }, { "epoch": 3.107379134860051, "grad_norm": 0.10016993218011361, "learning_rate": 3.765428247436773e-06, "loss": 0.0187, "step": 152650 }, { "epoch": 3.1075826972010177, "grad_norm": 20.909522355820314, "learning_rate": 3.7647396927875663e-06, "loss": 0.2076, "step": 152660 }, { "epoch": 3.1077862595419847, "grad_norm": 0.05366851945323559, "learning_rate": 3.764051163086031e-06, "loss": 0.0594, "step": 152670 }, { "epoch": 3.107989821882952, "grad_norm": 0.06906004729771673, "learning_rate": 3.7633626583460757e-06, "loss": 0.0346, "step": 152680 }, { "epoch": 3.1081933842239184, "grad_norm": 2.514140456731768, "learning_rate": 3.7626741785816018e-06, "loss": 0.1083, "step": 152690 }, { "epoch": 3.1083969465648855, "grad_norm": 0.02313269659597061, "learning_rate": 3.761985723806518e-06, "loss": 0.0838, "step": 152700 }, { "epoch": 3.1086005089058526, "grad_norm": 0.22999141435126855, "learning_rate": 3.761297294034728e-06, "loss": 0.0235, "step": 152710 }, { "epoch": 3.1088040712468192, "grad_norm": 0.047901736075892175, "learning_rate": 3.7606088892801324e-06, "loss": 0.1175, "step": 152720 }, { "epoch": 3.1090076335877863, "grad_norm": 0.04912691942586812, "learning_rate": 3.759920509556637e-06, "loss": 0.0519, "step": 152730 }, { "epoch": 3.1092111959287534, "grad_norm": 0.053206880859957305, "learning_rate": 3.7592321548781465e-06, "loss": 0.0471, "step": 152740 }, { "epoch": 3.10941475826972, "grad_norm": 0.18413897419996644, "learning_rate": 3.758543825258558e-06, "loss": 0.0989, "step": 152750 }, { "epoch": 3.109618320610687, "grad_norm": 0.05941953733356883, "learning_rate": 3.757855520711779e-06, "loss": 0.0526, "step": 152760 }, { "epoch": 3.1098218829516537, "grad_norm": 12.680319975570992, "learning_rate": 3.7571672412517046e-06, "loss": 0.0795, "step": 152770 }, { "epoch": 3.110025445292621, "grad_norm": 0.4800729442102734, "learning_rate": 3.7564789868922393e-06, "loss": 0.071, "step": 152780 }, { "epoch": 3.110229007633588, "grad_norm": 6.380937146883375, "learning_rate": 3.7557907576472835e-06, "loss": 0.0971, "step": 152790 }, { "epoch": 3.1104325699745545, "grad_norm": 0.049845165738828884, "learning_rate": 3.755102553530734e-06, "loss": 0.0018, "step": 152800 }, { "epoch": 3.1106361323155216, "grad_norm": 0.09070753512901093, "learning_rate": 3.7544143745564933e-06, "loss": 0.0614, "step": 152810 }, { "epoch": 3.1108396946564887, "grad_norm": 0.10781519540978458, "learning_rate": 3.753726220738459e-06, "loss": 0.0033, "step": 152820 }, { "epoch": 3.1110432569974553, "grad_norm": 0.023155627922290922, "learning_rate": 3.7530380920905267e-06, "loss": 0.0565, "step": 152830 }, { "epoch": 3.1112468193384224, "grad_norm": 0.014182379449219825, "learning_rate": 3.752349988626599e-06, "loss": 0.0497, "step": 152840 }, { "epoch": 3.1114503816793895, "grad_norm": 0.1428304368756743, "learning_rate": 3.751661910360569e-06, "loss": 0.0438, "step": 152850 }, { "epoch": 3.111653944020356, "grad_norm": 0.25336829235963576, "learning_rate": 3.750973857306333e-06, "loss": 0.0087, "step": 152860 }, { "epoch": 3.111857506361323, "grad_norm": 2.4993757817082534, "learning_rate": 3.750285829477791e-06, "loss": 0.0595, "step": 152870 }, { "epoch": 3.1120610687022903, "grad_norm": 0.030354365347330804, "learning_rate": 3.7495978268888357e-06, "loss": 0.0825, "step": 152880 }, { "epoch": 3.112264631043257, "grad_norm": 0.007597033801857253, "learning_rate": 3.7489098495533614e-06, "loss": 0.1298, "step": 152890 }, { "epoch": 3.112468193384224, "grad_norm": 8.653280576844294, "learning_rate": 3.748221897485267e-06, "loss": 0.0309, "step": 152900 }, { "epoch": 3.1126717557251906, "grad_norm": 47.89809446567296, "learning_rate": 3.747533970698441e-06, "loss": 0.0343, "step": 152910 }, { "epoch": 3.1128753180661577, "grad_norm": 0.10168150910985602, "learning_rate": 3.7468460692067815e-06, "loss": 0.0015, "step": 152920 }, { "epoch": 3.1130788804071248, "grad_norm": 0.046964049678665985, "learning_rate": 3.7461581930241786e-06, "loss": 0.0084, "step": 152930 }, { "epoch": 3.1132824427480914, "grad_norm": 28.470330406616004, "learning_rate": 3.7454703421645245e-06, "loss": 0.052, "step": 152940 }, { "epoch": 3.1134860050890585, "grad_norm": 127.8906465048055, "learning_rate": 3.744782516641715e-06, "loss": 0.096, "step": 152950 }, { "epoch": 3.1136895674300256, "grad_norm": 0.01522720145649342, "learning_rate": 3.744094716469639e-06, "loss": 0.0011, "step": 152960 }, { "epoch": 3.113893129770992, "grad_norm": 0.014298904024179571, "learning_rate": 3.743406941662188e-06, "loss": 0.0046, "step": 152970 }, { "epoch": 3.1140966921119593, "grad_norm": 0.7176027178220235, "learning_rate": 3.7427191922332508e-06, "loss": 0.0588, "step": 152980 }, { "epoch": 3.1143002544529264, "grad_norm": 7.849854409605375, "learning_rate": 3.7420314681967197e-06, "loss": 0.0578, "step": 152990 }, { "epoch": 3.114503816793893, "grad_norm": 0.03796140319087482, "learning_rate": 3.741343769566484e-06, "loss": 0.0013, "step": 153000 }, { "epoch": 3.11470737913486, "grad_norm": 0.7106568021652533, "learning_rate": 3.7406560963564298e-06, "loss": 0.0179, "step": 153010 }, { "epoch": 3.114910941475827, "grad_norm": 1.6124864159320915, "learning_rate": 3.7399684485804493e-06, "loss": 0.0174, "step": 153020 }, { "epoch": 3.115114503816794, "grad_norm": 0.018228946379551088, "learning_rate": 3.73928082625243e-06, "loss": 0.161, "step": 153030 }, { "epoch": 3.115318066157761, "grad_norm": 0.004151286248751357, "learning_rate": 3.738593229386256e-06, "loss": 0.0362, "step": 153040 }, { "epoch": 3.115521628498728, "grad_norm": 0.7425916091465952, "learning_rate": 3.7379056579958197e-06, "loss": 0.1108, "step": 153050 }, { "epoch": 3.1157251908396946, "grad_norm": 62.95094599812427, "learning_rate": 3.7372181120950003e-06, "loss": 0.018, "step": 153060 }, { "epoch": 3.1159287531806616, "grad_norm": 5.11688705589915, "learning_rate": 3.736530591697689e-06, "loss": 0.057, "step": 153070 }, { "epoch": 3.1161323155216287, "grad_norm": 0.019749417339478922, "learning_rate": 3.7358430968177713e-06, "loss": 0.0663, "step": 153080 }, { "epoch": 3.1163358778625954, "grad_norm": 0.04756233861603174, "learning_rate": 3.7351556274691283e-06, "loss": 0.043, "step": 153090 }, { "epoch": 3.1165394402035624, "grad_norm": 0.6531594691528935, "learning_rate": 3.734468183665647e-06, "loss": 0.0708, "step": 153100 }, { "epoch": 3.116743002544529, "grad_norm": 0.08801422649765056, "learning_rate": 3.7337807654212123e-06, "loss": 0.0635, "step": 153110 }, { "epoch": 3.116946564885496, "grad_norm": 0.006483935568473183, "learning_rate": 3.7330933727497033e-06, "loss": 0.0243, "step": 153120 }, { "epoch": 3.1171501272264632, "grad_norm": 0.20998339002294236, "learning_rate": 3.732406005665008e-06, "loss": 0.0929, "step": 153130 }, { "epoch": 3.11735368956743, "grad_norm": 14.011313290237782, "learning_rate": 3.7317186641810043e-06, "loss": 0.0276, "step": 153140 }, { "epoch": 3.117557251908397, "grad_norm": 0.007768781450113158, "learning_rate": 3.7310313483115757e-06, "loss": 0.0003, "step": 153150 }, { "epoch": 3.117760814249364, "grad_norm": 0.034813252272085, "learning_rate": 3.7303440580706045e-06, "loss": 0.03, "step": 153160 }, { "epoch": 3.1179643765903307, "grad_norm": 21.06214694589379, "learning_rate": 3.7296567934719696e-06, "loss": 0.1387, "step": 153170 }, { "epoch": 3.1181679389312977, "grad_norm": 9.859625454487952, "learning_rate": 3.728969554529551e-06, "loss": 0.0651, "step": 153180 }, { "epoch": 3.118371501272265, "grad_norm": 0.026329462345864706, "learning_rate": 3.7282823412572335e-06, "loss": 0.0028, "step": 153190 }, { "epoch": 3.1185750636132314, "grad_norm": 20.082597201609648, "learning_rate": 3.7275951536688877e-06, "loss": 0.0919, "step": 153200 }, { "epoch": 3.1187786259541985, "grad_norm": 0.048368134424343685, "learning_rate": 3.726907991778399e-06, "loss": 0.0203, "step": 153210 }, { "epoch": 3.1189821882951656, "grad_norm": 74.95749026064833, "learning_rate": 3.726220855599642e-06, "loss": 0.0454, "step": 153220 }, { "epoch": 3.1191857506361322, "grad_norm": 0.00855153872690641, "learning_rate": 3.7255337451464944e-06, "loss": 0.0437, "step": 153230 }, { "epoch": 3.1193893129770993, "grad_norm": 0.0007474838921494516, "learning_rate": 3.7248466604328365e-06, "loss": 0.0245, "step": 153240 }, { "epoch": 3.119592875318066, "grad_norm": 0.004366383142997725, "learning_rate": 3.7241596014725416e-06, "loss": 0.0142, "step": 153250 }, { "epoch": 3.119796437659033, "grad_norm": 0.18420721022682388, "learning_rate": 3.7234725682794857e-06, "loss": 0.0664, "step": 153260 }, { "epoch": 3.12, "grad_norm": 0.018992228541369116, "learning_rate": 3.722785560867548e-06, "loss": 0.0172, "step": 153270 }, { "epoch": 3.1202035623409667, "grad_norm": 0.013598725548223804, "learning_rate": 3.7220985792505993e-06, "loss": 0.0929, "step": 153280 }, { "epoch": 3.120407124681934, "grad_norm": 0.07126560426816664, "learning_rate": 3.7214116234425167e-06, "loss": 0.001, "step": 153290 }, { "epoch": 3.120610687022901, "grad_norm": 0.027578840573539765, "learning_rate": 3.7207246934571715e-06, "loss": 0.0219, "step": 153300 }, { "epoch": 3.1208142493638675, "grad_norm": 7.280891185667778, "learning_rate": 3.72003778930844e-06, "loss": 0.0346, "step": 153310 }, { "epoch": 3.1210178117048346, "grad_norm": 0.01411760648631418, "learning_rate": 3.7193509110101943e-06, "loss": 0.0568, "step": 153320 }, { "epoch": 3.1212213740458017, "grad_norm": 50.710617683783084, "learning_rate": 3.7186640585763063e-06, "loss": 0.0219, "step": 153330 }, { "epoch": 3.1214249363867683, "grad_norm": 19.491012024247983, "learning_rate": 3.7179772320206488e-06, "loss": 0.0638, "step": 153340 }, { "epoch": 3.1216284987277354, "grad_norm": 1.2972219400283678, "learning_rate": 3.7172904313570897e-06, "loss": 0.0634, "step": 153350 }, { "epoch": 3.1218320610687025, "grad_norm": 0.13579335652680338, "learning_rate": 3.7166036565995035e-06, "loss": 0.0324, "step": 153360 }, { "epoch": 3.122035623409669, "grad_norm": 22.14601610377814, "learning_rate": 3.7159169077617608e-06, "loss": 0.0642, "step": 153370 }, { "epoch": 3.122239185750636, "grad_norm": 0.09344355519423937, "learning_rate": 3.715230184857728e-06, "loss": 0.0667, "step": 153380 }, { "epoch": 3.1224427480916033, "grad_norm": 0.016147036137590384, "learning_rate": 3.714543487901277e-06, "loss": 0.0005, "step": 153390 }, { "epoch": 3.12264631043257, "grad_norm": 9.099205892140116, "learning_rate": 3.713856816906277e-06, "loss": 0.1474, "step": 153400 }, { "epoch": 3.122849872773537, "grad_norm": 0.01540647689062088, "learning_rate": 3.713170171886594e-06, "loss": 0.0365, "step": 153410 }, { "epoch": 3.1230534351145036, "grad_norm": 14.34782607187924, "learning_rate": 3.7124835528560976e-06, "loss": 0.0027, "step": 153420 }, { "epoch": 3.1232569974554707, "grad_norm": 0.04645980108360789, "learning_rate": 3.711796959828654e-06, "loss": 0.1378, "step": 153430 }, { "epoch": 3.1234605597964378, "grad_norm": 0.005334157721856655, "learning_rate": 3.7111103928181287e-06, "loss": 0.001, "step": 153440 }, { "epoch": 3.1236641221374044, "grad_norm": 0.1451977157786877, "learning_rate": 3.710423851838391e-06, "loss": 0.0793, "step": 153450 }, { "epoch": 3.1238676844783715, "grad_norm": 5.426895651356395, "learning_rate": 3.709737336903304e-06, "loss": 0.0827, "step": 153460 }, { "epoch": 3.1240712468193386, "grad_norm": 0.10274563472487751, "learning_rate": 3.709050848026733e-06, "loss": 0.0074, "step": 153470 }, { "epoch": 3.124274809160305, "grad_norm": 9.969287382475018, "learning_rate": 3.7083643852225437e-06, "loss": 0.0965, "step": 153480 }, { "epoch": 3.1244783715012723, "grad_norm": 0.08581200380506218, "learning_rate": 3.7076779485045976e-06, "loss": 0.0455, "step": 153490 }, { "epoch": 3.1246819338422394, "grad_norm": 8.232538354266367, "learning_rate": 3.7069915378867617e-06, "loss": 0.1144, "step": 153500 }, { "epoch": 3.124885496183206, "grad_norm": 0.00596509136012186, "learning_rate": 3.7063051533828957e-06, "loss": 0.0509, "step": 153510 }, { "epoch": 3.125089058524173, "grad_norm": 0.061603263449105605, "learning_rate": 3.705618795006863e-06, "loss": 0.0297, "step": 153520 }, { "epoch": 3.12529262086514, "grad_norm": 0.0191204678828053, "learning_rate": 3.7049324627725276e-06, "loss": 0.0421, "step": 153530 }, { "epoch": 3.125496183206107, "grad_norm": 0.021329672983580673, "learning_rate": 3.704246156693748e-06, "loss": 0.0243, "step": 153540 }, { "epoch": 3.125699745547074, "grad_norm": 0.07125720233090836, "learning_rate": 3.703559876784385e-06, "loss": 0.0847, "step": 153550 }, { "epoch": 3.1259033078880405, "grad_norm": 0.08967166955876146, "learning_rate": 3.702873623058303e-06, "loss": 0.0423, "step": 153560 }, { "epoch": 3.1261068702290076, "grad_norm": 50.822649689937656, "learning_rate": 3.7021873955293575e-06, "loss": 0.079, "step": 153570 }, { "epoch": 3.1263104325699747, "grad_norm": 1.2606919531864629, "learning_rate": 3.70150119421141e-06, "loss": 0.0121, "step": 153580 }, { "epoch": 3.1265139949109413, "grad_norm": 0.47241317207777195, "learning_rate": 3.700815019118316e-06, "loss": 0.0825, "step": 153590 }, { "epoch": 3.1267175572519084, "grad_norm": 0.1504235536344107, "learning_rate": 3.700128870263937e-06, "loss": 0.0474, "step": 153600 }, { "epoch": 3.1269211195928754, "grad_norm": 54.95523784432109, "learning_rate": 3.699442747662131e-06, "loss": 0.0791, "step": 153610 }, { "epoch": 3.127124681933842, "grad_norm": 0.02527700669848901, "learning_rate": 3.6987566513267527e-06, "loss": 0.0572, "step": 153620 }, { "epoch": 3.127328244274809, "grad_norm": 0.040960664195732334, "learning_rate": 3.698070581271659e-06, "loss": 0.0407, "step": 153630 }, { "epoch": 3.1275318066157762, "grad_norm": 4.191722030018605, "learning_rate": 3.697384537510709e-06, "loss": 0.0381, "step": 153640 }, { "epoch": 3.127735368956743, "grad_norm": 7.931111366400338, "learning_rate": 3.6966985200577543e-06, "loss": 0.0394, "step": 153650 }, { "epoch": 3.12793893129771, "grad_norm": 0.021267125844774096, "learning_rate": 3.696012528926653e-06, "loss": 0.0848, "step": 153660 }, { "epoch": 3.128142493638677, "grad_norm": 11.103970522514178, "learning_rate": 3.6953265641312565e-06, "loss": 0.1169, "step": 153670 }, { "epoch": 3.1283460559796437, "grad_norm": 0.045611921949820326, "learning_rate": 3.694640625685421e-06, "loss": 0.0037, "step": 153680 }, { "epoch": 3.1285496183206107, "grad_norm": 0.2650840539455819, "learning_rate": 3.6939547136030007e-06, "loss": 0.0949, "step": 153690 }, { "epoch": 3.128753180661578, "grad_norm": 0.08963201966498531, "learning_rate": 3.6932688278978447e-06, "loss": 0.0807, "step": 153700 }, { "epoch": 3.1289567430025444, "grad_norm": 0.2946397705112365, "learning_rate": 3.69258296858381e-06, "loss": 0.1523, "step": 153710 }, { "epoch": 3.1291603053435115, "grad_norm": 0.019079426427140233, "learning_rate": 3.6918971356747452e-06, "loss": 0.0353, "step": 153720 }, { "epoch": 3.1293638676844786, "grad_norm": 0.08189690856535341, "learning_rate": 3.691211329184502e-06, "loss": 0.0038, "step": 153730 }, { "epoch": 3.1295674300254452, "grad_norm": 0.30628547528411226, "learning_rate": 3.690525549126934e-06, "loss": 0.0469, "step": 153740 }, { "epoch": 3.1297709923664123, "grad_norm": 0.03398629875431534, "learning_rate": 3.6898397955158865e-06, "loss": 0.0303, "step": 153750 }, { "epoch": 3.129974554707379, "grad_norm": 0.23342456745007875, "learning_rate": 3.6891540683652127e-06, "loss": 0.059, "step": 153760 }, { "epoch": 3.130178117048346, "grad_norm": 0.29681086200527, "learning_rate": 3.6884683676887624e-06, "loss": 0.0541, "step": 153770 }, { "epoch": 3.130381679389313, "grad_norm": 0.07251106125795827, "learning_rate": 3.6877826935003795e-06, "loss": 0.0722, "step": 153780 }, { "epoch": 3.1305852417302797, "grad_norm": 0.026049240937724406, "learning_rate": 3.6870970458139178e-06, "loss": 0.0787, "step": 153790 }, { "epoch": 3.130788804071247, "grad_norm": 42.55284398152282, "learning_rate": 3.6864114246432215e-06, "loss": 0.0678, "step": 153800 }, { "epoch": 3.130992366412214, "grad_norm": 10.385190117489527, "learning_rate": 3.685725830002137e-06, "loss": 0.0592, "step": 153810 }, { "epoch": 3.1311959287531805, "grad_norm": 0.1337573646860092, "learning_rate": 3.6850402619045144e-06, "loss": 0.0578, "step": 153820 }, { "epoch": 3.1313994910941476, "grad_norm": 0.05668461195584349, "learning_rate": 3.684354720364196e-06, "loss": 0.079, "step": 153830 }, { "epoch": 3.1316030534351147, "grad_norm": 0.06171744299142639, "learning_rate": 3.6836692053950286e-06, "loss": 0.0801, "step": 153840 }, { "epoch": 3.1318066157760813, "grad_norm": 0.03502612037396128, "learning_rate": 3.682983717010858e-06, "loss": 0.0021, "step": 153850 }, { "epoch": 3.1320101781170484, "grad_norm": 0.032579793547786465, "learning_rate": 3.682298255225527e-06, "loss": 0.004, "step": 153860 }, { "epoch": 3.132213740458015, "grad_norm": 0.12801695982491004, "learning_rate": 3.6816128200528806e-06, "loss": 0.0035, "step": 153870 }, { "epoch": 3.132417302798982, "grad_norm": 0.3253148593732723, "learning_rate": 3.6809274115067605e-06, "loss": 0.0034, "step": 153880 }, { "epoch": 3.132620865139949, "grad_norm": 19.12823864925293, "learning_rate": 3.6802420296010087e-06, "loss": 0.0349, "step": 153890 }, { "epoch": 3.132824427480916, "grad_norm": 0.00440660438373168, "learning_rate": 3.679556674349472e-06, "loss": 0.0494, "step": 153900 }, { "epoch": 3.133027989821883, "grad_norm": 0.03843725608106231, "learning_rate": 3.6788713457659873e-06, "loss": 0.0195, "step": 153910 }, { "epoch": 3.13323155216285, "grad_norm": 0.0355222756969134, "learning_rate": 3.6781860438643968e-06, "loss": 0.1144, "step": 153920 }, { "epoch": 3.1334351145038166, "grad_norm": 0.022963471417770553, "learning_rate": 3.6775007686585433e-06, "loss": 0.0579, "step": 153930 }, { "epoch": 3.1336386768447837, "grad_norm": 0.06702842162702848, "learning_rate": 3.6768155201622645e-06, "loss": 0.0508, "step": 153940 }, { "epoch": 3.1338422391857508, "grad_norm": 17.30856890984364, "learning_rate": 3.676130298389401e-06, "loss": 0.0966, "step": 153950 }, { "epoch": 3.1340458015267174, "grad_norm": 45.90773929881543, "learning_rate": 3.67544510335379e-06, "loss": 0.0509, "step": 153960 }, { "epoch": 3.1342493638676845, "grad_norm": 0.037945795197473126, "learning_rate": 3.6747599350692716e-06, "loss": 0.0028, "step": 153970 }, { "epoch": 3.1344529262086516, "grad_norm": 0.08261751035108224, "learning_rate": 3.6740747935496846e-06, "loss": 0.0142, "step": 153980 }, { "epoch": 3.134656488549618, "grad_norm": 0.1960970260016973, "learning_rate": 3.6733896788088617e-06, "loss": 0.0565, "step": 153990 }, { "epoch": 3.1348600508905853, "grad_norm": 0.2020903377627639, "learning_rate": 3.672704590860645e-06, "loss": 0.0085, "step": 154000 }, { "epoch": 3.1350636132315524, "grad_norm": 0.01568348008098999, "learning_rate": 3.67201952971887e-06, "loss": 0.1788, "step": 154010 }, { "epoch": 3.135267175572519, "grad_norm": 0.009380418614744099, "learning_rate": 3.6713344953973683e-06, "loss": 0.107, "step": 154020 }, { "epoch": 3.135470737913486, "grad_norm": 2.642995323440179, "learning_rate": 3.6706494879099807e-06, "loss": 0.1285, "step": 154030 }, { "epoch": 3.135674300254453, "grad_norm": 20.071943873119473, "learning_rate": 3.6699645072705365e-06, "loss": 0.0811, "step": 154040 }, { "epoch": 3.13587786259542, "grad_norm": 0.039655071105717636, "learning_rate": 3.669279553492873e-06, "loss": 0.0052, "step": 154050 }, { "epoch": 3.136081424936387, "grad_norm": 0.147313327991093, "learning_rate": 3.6685946265908234e-06, "loss": 0.0442, "step": 154060 }, { "epoch": 3.1362849872773535, "grad_norm": 0.012545364016822862, "learning_rate": 3.667909726578218e-06, "loss": 0.1288, "step": 154070 }, { "epoch": 3.1364885496183206, "grad_norm": 0.9659126517063725, "learning_rate": 3.6672248534688942e-06, "loss": 0.0897, "step": 154080 }, { "epoch": 3.1366921119592877, "grad_norm": 11.258530140730647, "learning_rate": 3.66654000727668e-06, "loss": 0.0447, "step": 154090 }, { "epoch": 3.1368956743002543, "grad_norm": 25.87147579476462, "learning_rate": 3.665855188015406e-06, "loss": 0.1018, "step": 154100 }, { "epoch": 3.1370992366412214, "grad_norm": 1.0072692790273974, "learning_rate": 3.665170395698907e-06, "loss": 0.0011, "step": 154110 }, { "epoch": 3.1373027989821884, "grad_norm": 0.14746490475429416, "learning_rate": 3.664485630341011e-06, "loss": 0.1101, "step": 154120 }, { "epoch": 3.137506361323155, "grad_norm": 0.023766547318600114, "learning_rate": 3.6638008919555463e-06, "loss": 0.0429, "step": 154130 }, { "epoch": 3.137709923664122, "grad_norm": 0.005200999465933094, "learning_rate": 3.663116180556345e-06, "loss": 0.0737, "step": 154140 }, { "epoch": 3.1379134860050892, "grad_norm": 2.2848104620559377, "learning_rate": 3.6624314961572345e-06, "loss": 0.022, "step": 154150 }, { "epoch": 3.138117048346056, "grad_norm": 0.07694233165302612, "learning_rate": 3.6617468387720433e-06, "loss": 0.0015, "step": 154160 }, { "epoch": 3.138320610687023, "grad_norm": 21.201713859244595, "learning_rate": 3.661062208414597e-06, "loss": 0.0834, "step": 154170 }, { "epoch": 3.13852417302799, "grad_norm": 0.07860061072264488, "learning_rate": 3.6603776050987226e-06, "loss": 0.0897, "step": 154180 }, { "epoch": 3.1387277353689567, "grad_norm": 0.014429208237460509, "learning_rate": 3.659693028838251e-06, "loss": 0.0017, "step": 154190 }, { "epoch": 3.1389312977099237, "grad_norm": 0.0367399879717209, "learning_rate": 3.659008479647003e-06, "loss": 0.0435, "step": 154200 }, { "epoch": 3.1391348600508904, "grad_norm": 12.35460404463285, "learning_rate": 3.658323957538805e-06, "loss": 0.1245, "step": 154210 }, { "epoch": 3.1393384223918575, "grad_norm": 8.135707215394632, "learning_rate": 3.6576394625274857e-06, "loss": 0.0547, "step": 154220 }, { "epoch": 3.1395419847328245, "grad_norm": 11.507354750210176, "learning_rate": 3.6569549946268644e-06, "loss": 0.0297, "step": 154230 }, { "epoch": 3.139745547073791, "grad_norm": 0.11921615134933676, "learning_rate": 3.6562705538507686e-06, "loss": 0.156, "step": 154240 }, { "epoch": 3.1399491094147582, "grad_norm": 0.11644229451226105, "learning_rate": 3.6555861402130173e-06, "loss": 0.0541, "step": 154250 }, { "epoch": 3.1401526717557253, "grad_norm": 0.10590706728061959, "learning_rate": 3.654901753727436e-06, "loss": 0.1176, "step": 154260 }, { "epoch": 3.140356234096692, "grad_norm": 0.11749764530195199, "learning_rate": 3.654217394407848e-06, "loss": 0.0363, "step": 154270 }, { "epoch": 3.140559796437659, "grad_norm": 13.049094605399866, "learning_rate": 3.653533062268071e-06, "loss": 0.0337, "step": 154280 }, { "epoch": 3.140763358778626, "grad_norm": 0.5280190683059712, "learning_rate": 3.6528487573219286e-06, "loss": 0.0353, "step": 154290 }, { "epoch": 3.1409669211195927, "grad_norm": 2.730193073489165, "learning_rate": 3.6521644795832423e-06, "loss": 0.0858, "step": 154300 }, { "epoch": 3.14117048346056, "grad_norm": 0.04514287491454542, "learning_rate": 3.6514802290658287e-06, "loss": 0.0296, "step": 154310 }, { "epoch": 3.141374045801527, "grad_norm": 0.2396231177708996, "learning_rate": 3.65079600578351e-06, "loss": 0.0389, "step": 154320 }, { "epoch": 3.1415776081424935, "grad_norm": 0.13177801672100772, "learning_rate": 3.6501118097501014e-06, "loss": 0.0011, "step": 154330 }, { "epoch": 3.1417811704834606, "grad_norm": 17.90679885708125, "learning_rate": 3.649427640979425e-06, "loss": 0.1328, "step": 154340 }, { "epoch": 3.1419847328244277, "grad_norm": 0.2429014550484301, "learning_rate": 3.6487434994852976e-06, "loss": 0.1008, "step": 154350 }, { "epoch": 3.1421882951653943, "grad_norm": 44.02863427828957, "learning_rate": 3.6480593852815336e-06, "loss": 0.1067, "step": 154360 }, { "epoch": 3.1423918575063614, "grad_norm": 0.08915403218625255, "learning_rate": 3.647375298381954e-06, "loss": 0.0503, "step": 154370 }, { "epoch": 3.142595419847328, "grad_norm": 0.27559839516608386, "learning_rate": 3.6466912388003707e-06, "loss": 0.1671, "step": 154380 }, { "epoch": 3.142798982188295, "grad_norm": 15.63644912232402, "learning_rate": 3.6460072065506002e-06, "loss": 0.054, "step": 154390 }, { "epoch": 3.143002544529262, "grad_norm": 7.937159775141481, "learning_rate": 3.64532320164646e-06, "loss": 0.0563, "step": 154400 }, { "epoch": 3.143206106870229, "grad_norm": 0.13258276108105096, "learning_rate": 3.6446392241017623e-06, "loss": 0.0027, "step": 154410 }, { "epoch": 3.143409669211196, "grad_norm": 0.05098329864165361, "learning_rate": 3.6439552739303196e-06, "loss": 0.0026, "step": 154420 }, { "epoch": 3.143613231552163, "grad_norm": 1.0747070946175823, "learning_rate": 3.6432713511459496e-06, "loss": 0.1636, "step": 154430 }, { "epoch": 3.1438167938931296, "grad_norm": 0.006633277183637992, "learning_rate": 3.6425874557624592e-06, "loss": 0.0716, "step": 154440 }, { "epoch": 3.1440203562340967, "grad_norm": 0.08835805711454159, "learning_rate": 3.641903587793666e-06, "loss": 0.0071, "step": 154450 }, { "epoch": 3.144223918575064, "grad_norm": 0.034046833722909856, "learning_rate": 3.6412197472533773e-06, "loss": 0.0206, "step": 154460 }, { "epoch": 3.1444274809160304, "grad_norm": 0.009392864780070107, "learning_rate": 3.6405359341554045e-06, "loss": 0.0182, "step": 154470 }, { "epoch": 3.1446310432569975, "grad_norm": 0.005682736140217373, "learning_rate": 3.639852148513563e-06, "loss": 0.0546, "step": 154480 }, { "epoch": 3.1448346055979646, "grad_norm": 23.639931583930263, "learning_rate": 3.639168390341657e-06, "loss": 0.0034, "step": 154490 }, { "epoch": 3.145038167938931, "grad_norm": 3.177707839915429, "learning_rate": 3.6384846596534973e-06, "loss": 0.1527, "step": 154500 }, { "epoch": 3.1452417302798983, "grad_norm": 0.059609464842035956, "learning_rate": 3.6378009564628957e-06, "loss": 0.0756, "step": 154510 }, { "epoch": 3.145445292620865, "grad_norm": 1.2290234108071159, "learning_rate": 3.6371172807836574e-06, "loss": 0.0039, "step": 154520 }, { "epoch": 3.145648854961832, "grad_norm": 86.03431810475081, "learning_rate": 3.636433632629592e-06, "loss": 0.1346, "step": 154530 }, { "epoch": 3.145852417302799, "grad_norm": 30.189378970460393, "learning_rate": 3.635750012014503e-06, "loss": 0.1223, "step": 154540 }, { "epoch": 3.1460559796437657, "grad_norm": 0.035290805507500966, "learning_rate": 3.635066418952201e-06, "loss": 0.002, "step": 154550 }, { "epoch": 3.146259541984733, "grad_norm": 0.0972915884234305, "learning_rate": 3.634382853456492e-06, "loss": 0.0008, "step": 154560 }, { "epoch": 3.1464631043257, "grad_norm": 0.03160435375254208, "learning_rate": 3.633699315541178e-06, "loss": 0.0825, "step": 154570 }, { "epoch": 3.1466666666666665, "grad_norm": 0.17853838837003108, "learning_rate": 3.633015805220067e-06, "loss": 0.0236, "step": 154580 }, { "epoch": 3.1468702290076336, "grad_norm": 0.34725995010548405, "learning_rate": 3.632332322506964e-06, "loss": 0.0171, "step": 154590 }, { "epoch": 3.1470737913486007, "grad_norm": 0.013308667528946255, "learning_rate": 3.6316488674156704e-06, "loss": 0.0776, "step": 154600 }, { "epoch": 3.1472773536895673, "grad_norm": 27.700415265421853, "learning_rate": 3.6309654399599904e-06, "loss": 0.0179, "step": 154610 }, { "epoch": 3.1474809160305344, "grad_norm": 0.014813019750178895, "learning_rate": 3.6302820401537253e-06, "loss": 0.0463, "step": 154620 }, { "epoch": 3.1476844783715014, "grad_norm": 0.8723387056935078, "learning_rate": 3.6295986680106787e-06, "loss": 0.0083, "step": 154630 }, { "epoch": 3.147888040712468, "grad_norm": 19.06067053194091, "learning_rate": 3.628915323544654e-06, "loss": 0.0128, "step": 154640 }, { "epoch": 3.148091603053435, "grad_norm": 0.08240601821654217, "learning_rate": 3.628232006769448e-06, "loss": 0.0029, "step": 154650 }, { "epoch": 3.1482951653944022, "grad_norm": 0.01623544026237248, "learning_rate": 3.6275487176988644e-06, "loss": 0.0388, "step": 154660 }, { "epoch": 3.148498727735369, "grad_norm": 58.136566798790334, "learning_rate": 3.6268654563467033e-06, "loss": 0.1306, "step": 154670 }, { "epoch": 3.148702290076336, "grad_norm": 0.39763708847533924, "learning_rate": 3.626182222726761e-06, "loss": 0.0297, "step": 154680 }, { "epoch": 3.148905852417303, "grad_norm": 0.03312827216772633, "learning_rate": 3.62549901685284e-06, "loss": 0.0576, "step": 154690 }, { "epoch": 3.1491094147582697, "grad_norm": 11.607417460926694, "learning_rate": 3.6248158387387356e-06, "loss": 0.0911, "step": 154700 }, { "epoch": 3.1493129770992367, "grad_norm": 0.004287384932258832, "learning_rate": 3.6241326883982454e-06, "loss": 0.0118, "step": 154710 }, { "epoch": 3.1495165394402034, "grad_norm": 0.031156434201969608, "learning_rate": 3.6234495658451708e-06, "loss": 0.1147, "step": 154720 }, { "epoch": 3.1497201017811705, "grad_norm": 0.28123935299376296, "learning_rate": 3.6227664710933027e-06, "loss": 0.001, "step": 154730 }, { "epoch": 3.1499236641221375, "grad_norm": 14.128238816188029, "learning_rate": 3.622083404156441e-06, "loss": 0.0158, "step": 154740 }, { "epoch": 3.150127226463104, "grad_norm": 8.705018260796848, "learning_rate": 3.621400365048379e-06, "loss": 0.1013, "step": 154750 }, { "epoch": 3.1503307888040712, "grad_norm": 0.02609797184100356, "learning_rate": 3.620717353782911e-06, "loss": 0.0875, "step": 154760 }, { "epoch": 3.1505343511450383, "grad_norm": 4.211828105384776, "learning_rate": 3.620034370373835e-06, "loss": 0.0474, "step": 154770 }, { "epoch": 3.150737913486005, "grad_norm": 0.02815520868493183, "learning_rate": 3.619351414834941e-06, "loss": 0.0382, "step": 154780 }, { "epoch": 3.150941475826972, "grad_norm": 0.009648599087146483, "learning_rate": 3.6186684871800215e-06, "loss": 0.0714, "step": 154790 }, { "epoch": 3.151145038167939, "grad_norm": 1.3298335937158616, "learning_rate": 3.6179855874228743e-06, "loss": 0.0469, "step": 154800 }, { "epoch": 3.1513486005089058, "grad_norm": 0.1447890191673374, "learning_rate": 3.6173027155772867e-06, "loss": 0.0527, "step": 154810 }, { "epoch": 3.151552162849873, "grad_norm": 2.7057435657174533, "learning_rate": 3.616619871657053e-06, "loss": 0.1056, "step": 154820 }, { "epoch": 3.1517557251908395, "grad_norm": 0.14346718050633342, "learning_rate": 3.6159370556759605e-06, "loss": 0.0078, "step": 154830 }, { "epoch": 3.1519592875318065, "grad_norm": 0.07064803017617793, "learning_rate": 3.615254267647803e-06, "loss": 0.011, "step": 154840 }, { "epoch": 3.1521628498727736, "grad_norm": 0.05274350511424529, "learning_rate": 3.6145715075863697e-06, "loss": 0.0728, "step": 154850 }, { "epoch": 3.1523664122137403, "grad_norm": 0.05791386293305626, "learning_rate": 3.6138887755054486e-06, "loss": 0.0689, "step": 154860 }, { "epoch": 3.1525699745547073, "grad_norm": 0.05602604204473184, "learning_rate": 3.613206071418828e-06, "loss": 0.0311, "step": 154870 }, { "epoch": 3.1527735368956744, "grad_norm": 9.49228398805194, "learning_rate": 3.612523395340299e-06, "loss": 0.099, "step": 154880 }, { "epoch": 3.152977099236641, "grad_norm": 34.79251373899223, "learning_rate": 3.611840747283646e-06, "loss": 0.0502, "step": 154890 }, { "epoch": 3.153180661577608, "grad_norm": 0.019022471313303937, "learning_rate": 3.6111581272626574e-06, "loss": 0.0554, "step": 154900 }, { "epoch": 3.153384223918575, "grad_norm": 8.169349396118783, "learning_rate": 3.610475535291118e-06, "loss": 0.1709, "step": 154910 }, { "epoch": 3.153587786259542, "grad_norm": 0.09431754228259988, "learning_rate": 3.6097929713828154e-06, "loss": 0.0011, "step": 154920 }, { "epoch": 3.153791348600509, "grad_norm": 0.0085043290359551, "learning_rate": 3.609110435551536e-06, "loss": 0.0235, "step": 154930 }, { "epoch": 3.153994910941476, "grad_norm": 0.11817744737382697, "learning_rate": 3.60842792781106e-06, "loss": 0.0387, "step": 154940 }, { "epoch": 3.1541984732824426, "grad_norm": 0.02370301398444395, "learning_rate": 3.607745448175176e-06, "loss": 0.0278, "step": 154950 }, { "epoch": 3.1544020356234097, "grad_norm": 0.10249682394890026, "learning_rate": 3.6070629966576666e-06, "loss": 0.06, "step": 154960 }, { "epoch": 3.154605597964377, "grad_norm": 0.0074343080366797605, "learning_rate": 3.606380573272312e-06, "loss": 0.0613, "step": 154970 }, { "epoch": 3.1548091603053434, "grad_norm": 0.009090203506621902, "learning_rate": 3.6056981780328988e-06, "loss": 0.0013, "step": 154980 }, { "epoch": 3.1550127226463105, "grad_norm": 0.019210955303510163, "learning_rate": 3.6050158109532053e-06, "loss": 0.0012, "step": 154990 }, { "epoch": 3.1552162849872776, "grad_norm": 0.0397388038911484, "learning_rate": 3.604333472047015e-06, "loss": 0.0622, "step": 155000 }, { "epoch": 3.155419847328244, "grad_norm": 2.0849919723424457, "learning_rate": 3.6036511613281085e-06, "loss": 0.0699, "step": 155010 }, { "epoch": 3.1556234096692113, "grad_norm": 0.021937910050775233, "learning_rate": 3.602968878810263e-06, "loss": 0.0009, "step": 155020 }, { "epoch": 3.155826972010178, "grad_norm": 0.017440753348535856, "learning_rate": 3.6022866245072618e-06, "loss": 0.0845, "step": 155030 }, { "epoch": 3.156030534351145, "grad_norm": 0.06123844183702508, "learning_rate": 3.601604398432883e-06, "loss": 0.0006, "step": 155040 }, { "epoch": 3.156234096692112, "grad_norm": 14.63467160005897, "learning_rate": 3.6009222006009027e-06, "loss": 0.0883, "step": 155050 }, { "epoch": 3.1564376590330787, "grad_norm": 0.35227433079649384, "learning_rate": 3.600240031025103e-06, "loss": 0.044, "step": 155060 }, { "epoch": 3.156641221374046, "grad_norm": 3.0557120987359028, "learning_rate": 3.5995578897192574e-06, "loss": 0.0033, "step": 155070 }, { "epoch": 3.156844783715013, "grad_norm": 0.04195615521501392, "learning_rate": 3.5988757766971426e-06, "loss": 0.0721, "step": 155080 }, { "epoch": 3.1570483460559795, "grad_norm": 0.05706283919405144, "learning_rate": 3.5981936919725385e-06, "loss": 0.0335, "step": 155090 }, { "epoch": 3.1572519083969466, "grad_norm": 0.02100330912796589, "learning_rate": 3.5975116355592173e-06, "loss": 0.0325, "step": 155100 }, { "epoch": 3.1574554707379137, "grad_norm": 7.411186748023578, "learning_rate": 3.596829607470956e-06, "loss": 0.0452, "step": 155110 }, { "epoch": 3.1576590330788803, "grad_norm": 0.4036458549684198, "learning_rate": 3.596147607721526e-06, "loss": 0.1454, "step": 155120 }, { "epoch": 3.1578625954198474, "grad_norm": 14.293945335841986, "learning_rate": 3.595465636324704e-06, "loss": 0.0571, "step": 155130 }, { "epoch": 3.1580661577608145, "grad_norm": 0.5443911323646068, "learning_rate": 3.594783693294264e-06, "loss": 0.0211, "step": 155140 }, { "epoch": 3.158269720101781, "grad_norm": 0.02489640413360428, "learning_rate": 3.5941017786439758e-06, "loss": 0.1194, "step": 155150 }, { "epoch": 3.158473282442748, "grad_norm": 0.2746272331712158, "learning_rate": 3.593419892387612e-06, "loss": 0.0884, "step": 155160 }, { "epoch": 3.158676844783715, "grad_norm": 0.07435963380004092, "learning_rate": 3.592738034538947e-06, "loss": 0.0989, "step": 155170 }, { "epoch": 3.158880407124682, "grad_norm": 0.051896801515882225, "learning_rate": 3.592056205111749e-06, "loss": 0.0647, "step": 155180 }, { "epoch": 3.159083969465649, "grad_norm": 0.02197860946324821, "learning_rate": 3.591374404119791e-06, "loss": 0.0314, "step": 155190 }, { "epoch": 3.1592875318066156, "grad_norm": 1.9273911480255372, "learning_rate": 3.5906926315768386e-06, "loss": 0.0804, "step": 155200 }, { "epoch": 3.1594910941475827, "grad_norm": 0.899523261139738, "learning_rate": 3.590010887496665e-06, "loss": 0.075, "step": 155210 }, { "epoch": 3.1596946564885497, "grad_norm": 6.735078346630551, "learning_rate": 3.5893291718930384e-06, "loss": 0.0806, "step": 155220 }, { "epoch": 3.1598982188295164, "grad_norm": 2.8678883457653868, "learning_rate": 3.5886474847797236e-06, "loss": 0.0835, "step": 155230 }, { "epoch": 3.1601017811704835, "grad_norm": 0.08277603796265368, "learning_rate": 3.5879658261704926e-06, "loss": 0.002, "step": 155240 }, { "epoch": 3.1603053435114505, "grad_norm": 6.493816554741998, "learning_rate": 3.587284196079111e-06, "loss": 0.0283, "step": 155250 }, { "epoch": 3.160508905852417, "grad_norm": 15.529633391924333, "learning_rate": 3.586602594519343e-06, "loss": 0.1126, "step": 155260 }, { "epoch": 3.1607124681933843, "grad_norm": 0.027369927731215455, "learning_rate": 3.5859210215049584e-06, "loss": 0.0729, "step": 155270 }, { "epoch": 3.1609160305343513, "grad_norm": 22.70041308752813, "learning_rate": 3.5852394770497165e-06, "loss": 0.0196, "step": 155280 }, { "epoch": 3.161119592875318, "grad_norm": 20.903212643948862, "learning_rate": 3.584557961167388e-06, "loss": 0.0783, "step": 155290 }, { "epoch": 3.161323155216285, "grad_norm": 0.022973846730524577, "learning_rate": 3.5838764738717348e-06, "loss": 0.0786, "step": 155300 }, { "epoch": 3.161526717557252, "grad_norm": 0.031902965317878416, "learning_rate": 3.5831950151765184e-06, "loss": 0.0268, "step": 155310 }, { "epoch": 3.1617302798982188, "grad_norm": 10.10040595313243, "learning_rate": 3.5825135850955047e-06, "loss": 0.1036, "step": 155320 }, { "epoch": 3.161933842239186, "grad_norm": 8.136776169500592, "learning_rate": 3.581832183642455e-06, "loss": 0.1234, "step": 155330 }, { "epoch": 3.1621374045801525, "grad_norm": 7.891455551988081, "learning_rate": 3.58115081083113e-06, "loss": 0.2195, "step": 155340 }, { "epoch": 3.1623409669211195, "grad_norm": 0.46801687461034563, "learning_rate": 3.580469466675293e-06, "loss": 0.0108, "step": 155350 }, { "epoch": 3.1625445292620866, "grad_norm": 31.28132943376755, "learning_rate": 3.579788151188703e-06, "loss": 0.0287, "step": 155360 }, { "epoch": 3.1627480916030533, "grad_norm": 0.043276653662102794, "learning_rate": 3.579106864385119e-06, "loss": 0.0432, "step": 155370 }, { "epoch": 3.1629516539440203, "grad_norm": 0.4830420676035512, "learning_rate": 3.578425606278305e-06, "loss": 0.0406, "step": 155380 }, { "epoch": 3.1631552162849874, "grad_norm": 0.9447241321471356, "learning_rate": 3.5777443768820147e-06, "loss": 0.0418, "step": 155390 }, { "epoch": 3.163358778625954, "grad_norm": 0.06670528471482796, "learning_rate": 3.577063176210008e-06, "loss": 0.0613, "step": 155400 }, { "epoch": 3.163562340966921, "grad_norm": 0.01986698325092339, "learning_rate": 3.5763820042760465e-06, "loss": 0.0304, "step": 155410 }, { "epoch": 3.163765903307888, "grad_norm": 0.21525720960650538, "learning_rate": 3.5757008610938804e-06, "loss": 0.0593, "step": 155420 }, { "epoch": 3.163969465648855, "grad_norm": 0.08951740549786243, "learning_rate": 3.5750197466772722e-06, "loss": 0.0565, "step": 155430 }, { "epoch": 3.164173027989822, "grad_norm": 0.04284733149204875, "learning_rate": 3.5743386610399742e-06, "loss": 0.0735, "step": 155440 }, { "epoch": 3.164376590330789, "grad_norm": 0.03133881677181339, "learning_rate": 3.5736576041957427e-06, "loss": 0.0884, "step": 155450 }, { "epoch": 3.1645801526717556, "grad_norm": 0.1125897814048869, "learning_rate": 3.572976576158335e-06, "loss": 0.052, "step": 155460 }, { "epoch": 3.1647837150127227, "grad_norm": 0.1312401715117577, "learning_rate": 3.5722955769415022e-06, "loss": 0.0934, "step": 155470 }, { "epoch": 3.1649872773536893, "grad_norm": 0.06322627265822926, "learning_rate": 3.5716146065590007e-06, "loss": 0.0396, "step": 155480 }, { "epoch": 3.1651908396946564, "grad_norm": 1.3207282837319592, "learning_rate": 3.5709336650245792e-06, "loss": 0.0739, "step": 155490 }, { "epoch": 3.1653944020356235, "grad_norm": 0.03531043512412598, "learning_rate": 3.5702527523519935e-06, "loss": 0.0234, "step": 155500 }, { "epoch": 3.16559796437659, "grad_norm": 8.301146972693504, "learning_rate": 3.5695718685549973e-06, "loss": 0.0906, "step": 155510 }, { "epoch": 3.165801526717557, "grad_norm": 1.1199848561434222, "learning_rate": 3.5688910136473366e-06, "loss": 0.0167, "step": 155520 }, { "epoch": 3.1660050890585243, "grad_norm": 0.28843874782890194, "learning_rate": 3.5682101876427665e-06, "loss": 0.0022, "step": 155530 }, { "epoch": 3.166208651399491, "grad_norm": 0.05324330437727214, "learning_rate": 3.567529390555037e-06, "loss": 0.0013, "step": 155540 }, { "epoch": 3.166412213740458, "grad_norm": 70.21791039831449, "learning_rate": 3.5668486223978936e-06, "loss": 0.0617, "step": 155550 }, { "epoch": 3.166615776081425, "grad_norm": 0.005956488816693281, "learning_rate": 3.5661678831850923e-06, "loss": 0.0423, "step": 155560 }, { "epoch": 3.1668193384223917, "grad_norm": 0.19701102020400035, "learning_rate": 3.565487172930373e-06, "loss": 0.0366, "step": 155570 }, { "epoch": 3.167022900763359, "grad_norm": 0.17885182463327878, "learning_rate": 3.56480649164749e-06, "loss": 0.0012, "step": 155580 }, { "epoch": 3.167226463104326, "grad_norm": 16.673366493530185, "learning_rate": 3.5641258393501885e-06, "loss": 0.0319, "step": 155590 }, { "epoch": 3.1674300254452925, "grad_norm": 0.08782816797887501, "learning_rate": 3.5634452160522142e-06, "loss": 0.0065, "step": 155600 }, { "epoch": 3.1676335877862596, "grad_norm": 2.728153461556037, "learning_rate": 3.5627646217673143e-06, "loss": 0.0807, "step": 155610 }, { "epoch": 3.1678371501272267, "grad_norm": 0.05136675124911319, "learning_rate": 3.5620840565092353e-06, "loss": 0.0446, "step": 155620 }, { "epoch": 3.1680407124681933, "grad_norm": 0.057864729808819904, "learning_rate": 3.5614035202917185e-06, "loss": 0.0331, "step": 155630 }, { "epoch": 3.1682442748091604, "grad_norm": 0.3793204089233274, "learning_rate": 3.560723013128513e-06, "loss": 0.0326, "step": 155640 }, { "epoch": 3.1684478371501275, "grad_norm": 0.02677263374954455, "learning_rate": 3.560042535033359e-06, "loss": 0.0113, "step": 155650 }, { "epoch": 3.168651399491094, "grad_norm": 0.03254463238834437, "learning_rate": 3.559362086019999e-06, "loss": 0.0965, "step": 155660 }, { "epoch": 3.168854961832061, "grad_norm": 21.246561216773838, "learning_rate": 3.5586816661021794e-06, "loss": 0.0362, "step": 155670 }, { "epoch": 3.169058524173028, "grad_norm": 17.79700138665323, "learning_rate": 3.5580012752936394e-06, "loss": 0.0748, "step": 155680 }, { "epoch": 3.169262086513995, "grad_norm": 0.663636483289968, "learning_rate": 3.55732091360812e-06, "loss": 0.0615, "step": 155690 }, { "epoch": 3.169465648854962, "grad_norm": 75.32316728991597, "learning_rate": 3.5566405810593653e-06, "loss": 0.0901, "step": 155700 }, { "epoch": 3.1696692111959286, "grad_norm": 2.3875826497636, "learning_rate": 3.55596027766111e-06, "loss": 0.049, "step": 155710 }, { "epoch": 3.1698727735368957, "grad_norm": 1.0884035225948203, "learning_rate": 3.5552800034270995e-06, "loss": 0.0311, "step": 155720 }, { "epoch": 3.1700763358778627, "grad_norm": 0.02371403683180663, "learning_rate": 3.554599758371069e-06, "loss": 0.0276, "step": 155730 }, { "epoch": 3.1702798982188294, "grad_norm": 0.14325376437308185, "learning_rate": 3.5539195425067564e-06, "loss": 0.0878, "step": 155740 }, { "epoch": 3.1704834605597965, "grad_norm": 0.0556273697398111, "learning_rate": 3.553239355847904e-06, "loss": 0.0524, "step": 155750 }, { "epoch": 3.1706870229007635, "grad_norm": 6.060928364232784, "learning_rate": 3.552559198408244e-06, "loss": 0.003, "step": 155760 }, { "epoch": 3.17089058524173, "grad_norm": 0.01211451094322471, "learning_rate": 3.5518790702015154e-06, "loss": 0.0978, "step": 155770 }, { "epoch": 3.1710941475826973, "grad_norm": 0.08465977920384926, "learning_rate": 3.5511989712414562e-06, "loss": 0.0205, "step": 155780 }, { "epoch": 3.171297709923664, "grad_norm": 17.29323689690505, "learning_rate": 3.5505189015417983e-06, "loss": 0.1052, "step": 155790 }, { "epoch": 3.171501272264631, "grad_norm": 47.521154408127686, "learning_rate": 3.549838861116279e-06, "loss": 0.038, "step": 155800 }, { "epoch": 3.171704834605598, "grad_norm": 0.007370800057330578, "learning_rate": 3.54915884997863e-06, "loss": 0.117, "step": 155810 }, { "epoch": 3.1719083969465647, "grad_norm": 0.6512821464372037, "learning_rate": 3.548478868142588e-06, "loss": 0.0267, "step": 155820 }, { "epoch": 3.1721119592875318, "grad_norm": 0.033978128395994366, "learning_rate": 3.547798915621885e-06, "loss": 0.0005, "step": 155830 }, { "epoch": 3.172315521628499, "grad_norm": 0.01603557093223289, "learning_rate": 3.547118992430253e-06, "loss": 0.0254, "step": 155840 }, { "epoch": 3.1725190839694655, "grad_norm": 0.03844509176632421, "learning_rate": 3.5464390985814256e-06, "loss": 0.0587, "step": 155850 }, { "epoch": 3.1727226463104325, "grad_norm": 1.4422771421088678, "learning_rate": 3.54575923408913e-06, "loss": 0.0166, "step": 155860 }, { "epoch": 3.1729262086513996, "grad_norm": 12.15773507217062, "learning_rate": 3.5450793989671005e-06, "loss": 0.081, "step": 155870 }, { "epoch": 3.1731297709923663, "grad_norm": 0.05207463890241158, "learning_rate": 3.544399593229068e-06, "loss": 0.0907, "step": 155880 }, { "epoch": 3.1733333333333333, "grad_norm": 0.10902914499940118, "learning_rate": 3.5437198168887586e-06, "loss": 0.0533, "step": 155890 }, { "epoch": 3.1735368956743004, "grad_norm": 0.05693683936387515, "learning_rate": 3.5430400699599037e-06, "loss": 0.0612, "step": 155900 }, { "epoch": 3.173740458015267, "grad_norm": 0.015240332612151925, "learning_rate": 3.542360352456232e-06, "loss": 0.0006, "step": 155910 }, { "epoch": 3.173944020356234, "grad_norm": 39.264388314230004, "learning_rate": 3.541680664391469e-06, "loss": 0.078, "step": 155920 }, { "epoch": 3.174147582697201, "grad_norm": 13.935080442950522, "learning_rate": 3.5410010057793455e-06, "loss": 0.0914, "step": 155930 }, { "epoch": 3.174351145038168, "grad_norm": 0.09015510861288947, "learning_rate": 3.5403213766335843e-06, "loss": 0.0382, "step": 155940 }, { "epoch": 3.174554707379135, "grad_norm": 4.099713256833015, "learning_rate": 3.539641776967912e-06, "loss": 0.0132, "step": 155950 }, { "epoch": 3.174758269720102, "grad_norm": 0.019210137557162586, "learning_rate": 3.5389622067960583e-06, "loss": 0.1252, "step": 155960 }, { "epoch": 3.1749618320610686, "grad_norm": 0.04219096311324579, "learning_rate": 3.5382826661317417e-06, "loss": 0.0318, "step": 155970 }, { "epoch": 3.1751653944020357, "grad_norm": 0.49827581400190235, "learning_rate": 3.5376031549886895e-06, "loss": 0.0357, "step": 155980 }, { "epoch": 3.1753689567430023, "grad_norm": 0.06286031103571527, "learning_rate": 3.5369236733806267e-06, "loss": 0.0871, "step": 155990 }, { "epoch": 3.1755725190839694, "grad_norm": 22.099052878151912, "learning_rate": 3.5362442213212727e-06, "loss": 0.0448, "step": 156000 }, { "epoch": 3.1757760814249365, "grad_norm": 0.05127063001025051, "learning_rate": 3.535564798824354e-06, "loss": 0.0653, "step": 156010 }, { "epoch": 3.175979643765903, "grad_norm": 0.11390813765722477, "learning_rate": 3.53488540590359e-06, "loss": 0.0861, "step": 156020 }, { "epoch": 3.17618320610687, "grad_norm": 0.002800450177001298, "learning_rate": 3.5342060425727e-06, "loss": 0.0455, "step": 156030 }, { "epoch": 3.1763867684478373, "grad_norm": 1.0426005970646246, "learning_rate": 3.5335267088454096e-06, "loss": 0.0478, "step": 156040 }, { "epoch": 3.176590330788804, "grad_norm": 1.6619021974965864, "learning_rate": 3.532847404735435e-06, "loss": 0.0344, "step": 156050 }, { "epoch": 3.176793893129771, "grad_norm": 0.04959304901581243, "learning_rate": 3.532168130256496e-06, "loss": 0.0517, "step": 156060 }, { "epoch": 3.176997455470738, "grad_norm": 0.05911606774744037, "learning_rate": 3.5314888854223135e-06, "loss": 0.0024, "step": 156070 }, { "epoch": 3.1772010178117047, "grad_norm": 4.2191917804974635, "learning_rate": 3.5308096702466038e-06, "loss": 0.0651, "step": 156080 }, { "epoch": 3.177404580152672, "grad_norm": 0.013307622629842458, "learning_rate": 3.5301304847430863e-06, "loss": 0.0853, "step": 156090 }, { "epoch": 3.177608142493639, "grad_norm": 13.384614819940087, "learning_rate": 3.5294513289254744e-06, "loss": 0.0757, "step": 156100 }, { "epoch": 3.1778117048346055, "grad_norm": 0.04116233121666588, "learning_rate": 3.5287722028074885e-06, "loss": 0.0367, "step": 156110 }, { "epoch": 3.1780152671755726, "grad_norm": 0.024787078122991222, "learning_rate": 3.5280931064028433e-06, "loss": 0.0115, "step": 156120 }, { "epoch": 3.1782188295165392, "grad_norm": 0.00551046729148388, "learning_rate": 3.527414039725253e-06, "loss": 0.0596, "step": 156130 }, { "epoch": 3.1784223918575063, "grad_norm": 0.9923928254034793, "learning_rate": 3.526735002788434e-06, "loss": 0.0013, "step": 156140 }, { "epoch": 3.1786259541984734, "grad_norm": 0.10752221882012707, "learning_rate": 3.526055995606097e-06, "loss": 0.0132, "step": 156150 }, { "epoch": 3.17882951653944, "grad_norm": 32.00310752187105, "learning_rate": 3.5253770181919584e-06, "loss": 0.0795, "step": 156160 }, { "epoch": 3.179033078880407, "grad_norm": 0.013843937942277498, "learning_rate": 3.524698070559731e-06, "loss": 0.0068, "step": 156170 }, { "epoch": 3.179236641221374, "grad_norm": 0.01375297016846114, "learning_rate": 3.524019152723125e-06, "loss": 0.0941, "step": 156180 }, { "epoch": 3.179440203562341, "grad_norm": 0.04150623708914569, "learning_rate": 3.523340264695854e-06, "loss": 0.0774, "step": 156190 }, { "epoch": 3.179643765903308, "grad_norm": 0.021378543252208046, "learning_rate": 3.5226614064916287e-06, "loss": 0.0148, "step": 156200 }, { "epoch": 3.179847328244275, "grad_norm": 0.015751855623412307, "learning_rate": 3.5219825781241577e-06, "loss": 0.0514, "step": 156210 }, { "epoch": 3.1800508905852416, "grad_norm": 0.030663098709998133, "learning_rate": 3.5213037796071544e-06, "loss": 0.0045, "step": 156220 }, { "epoch": 3.1802544529262087, "grad_norm": 0.43264047930287103, "learning_rate": 3.5206250109543238e-06, "loss": 0.0396, "step": 156230 }, { "epoch": 3.1804580152671758, "grad_norm": 0.03378607089672691, "learning_rate": 3.5199462721793763e-06, "loss": 0.063, "step": 156240 }, { "epoch": 3.1806615776081424, "grad_norm": 0.09957184910073037, "learning_rate": 3.5192675632960233e-06, "loss": 0.0354, "step": 156250 }, { "epoch": 3.1808651399491095, "grad_norm": 0.004384481787568318, "learning_rate": 3.5185888843179656e-06, "loss": 0.0133, "step": 156260 }, { "epoch": 3.1810687022900765, "grad_norm": 0.011695559263460343, "learning_rate": 3.5179102352589145e-06, "loss": 0.0968, "step": 156270 }, { "epoch": 3.181272264631043, "grad_norm": 10.2332934244352, "learning_rate": 3.5172316161325758e-06, "loss": 0.1017, "step": 156280 }, { "epoch": 3.1814758269720103, "grad_norm": 2.2732493149112085, "learning_rate": 3.5165530269526533e-06, "loss": 0.0029, "step": 156290 }, { "epoch": 3.181679389312977, "grad_norm": 1.0464534032364092, "learning_rate": 3.515874467732855e-06, "loss": 0.0041, "step": 156300 }, { "epoch": 3.181882951653944, "grad_norm": 14.861780679230533, "learning_rate": 3.515195938486882e-06, "loss": 0.1399, "step": 156310 }, { "epoch": 3.182086513994911, "grad_norm": 17.00159036847458, "learning_rate": 3.5145174392284374e-06, "loss": 0.005, "step": 156320 }, { "epoch": 3.1822900763358777, "grad_norm": 0.18454526354447134, "learning_rate": 3.5138389699712295e-06, "loss": 0.1201, "step": 156330 }, { "epoch": 3.1824936386768448, "grad_norm": 0.12340701601816419, "learning_rate": 3.513160530728957e-06, "loss": 0.0559, "step": 156340 }, { "epoch": 3.182697201017812, "grad_norm": 104.13455585297064, "learning_rate": 3.512482121515321e-06, "loss": 0.1203, "step": 156350 }, { "epoch": 3.1829007633587785, "grad_norm": 69.32127206057449, "learning_rate": 3.5118037423440265e-06, "loss": 0.0434, "step": 156360 }, { "epoch": 3.1831043256997456, "grad_norm": 10.13376709790327, "learning_rate": 3.5111253932287716e-06, "loss": 0.0603, "step": 156370 }, { "epoch": 3.1833078880407126, "grad_norm": 142.23955326648849, "learning_rate": 3.510447074183258e-06, "loss": 0.0453, "step": 156380 }, { "epoch": 3.1835114503816793, "grad_norm": 0.00045703179475132134, "learning_rate": 3.509768785221183e-06, "loss": 0.0608, "step": 156390 }, { "epoch": 3.1837150127226463, "grad_norm": 19.86205587606327, "learning_rate": 3.5090905263562457e-06, "loss": 0.083, "step": 156400 }, { "epoch": 3.1839185750636134, "grad_norm": 0.22342101383784976, "learning_rate": 3.5084122976021483e-06, "loss": 0.0471, "step": 156410 }, { "epoch": 3.18412213740458, "grad_norm": 0.02366215833058201, "learning_rate": 3.5077340989725837e-06, "loss": 0.0974, "step": 156420 }, { "epoch": 3.184325699745547, "grad_norm": 59.24780442016564, "learning_rate": 3.50705593048125e-06, "loss": 0.0502, "step": 156430 }, { "epoch": 3.1845292620865138, "grad_norm": 17.721016214329822, "learning_rate": 3.5063777921418474e-06, "loss": 0.0566, "step": 156440 }, { "epoch": 3.184732824427481, "grad_norm": 0.03466028955642332, "learning_rate": 3.505699683968068e-06, "loss": 0.0355, "step": 156450 }, { "epoch": 3.184936386768448, "grad_norm": 0.39457319170340455, "learning_rate": 3.5050216059736087e-06, "loss": 0.085, "step": 156460 }, { "epoch": 3.1851399491094146, "grad_norm": 16.37045847532636, "learning_rate": 3.504343558172162e-06, "loss": 0.1287, "step": 156470 }, { "epoch": 3.1853435114503816, "grad_norm": 0.09692284948747786, "learning_rate": 3.5036655405774244e-06, "loss": 0.0391, "step": 156480 }, { "epoch": 3.1855470737913487, "grad_norm": 79.69122161364969, "learning_rate": 3.5029875532030887e-06, "loss": 0.0116, "step": 156490 }, { "epoch": 3.1857506361323153, "grad_norm": 0.01789063028765396, "learning_rate": 3.502309596062847e-06, "loss": 0.0692, "step": 156500 }, { "epoch": 3.1859541984732824, "grad_norm": 9.080325192827317, "learning_rate": 3.5016316691703926e-06, "loss": 0.0336, "step": 156510 }, { "epoch": 3.1861577608142495, "grad_norm": 0.062623575649775, "learning_rate": 3.500953772539416e-06, "loss": 0.0015, "step": 156520 }, { "epoch": 3.186361323155216, "grad_norm": 0.046316103717991476, "learning_rate": 3.500275906183609e-06, "loss": 0.0434, "step": 156530 }, { "epoch": 3.186564885496183, "grad_norm": 11.455132130926101, "learning_rate": 3.4995980701166626e-06, "loss": 0.0714, "step": 156540 }, { "epoch": 3.1867684478371503, "grad_norm": 0.0342555033606234, "learning_rate": 3.4989202643522635e-06, "loss": 0.0567, "step": 156550 }, { "epoch": 3.186972010178117, "grad_norm": 0.01735238621165384, "learning_rate": 3.4982424889041045e-06, "loss": 0.0247, "step": 156560 }, { "epoch": 3.187175572519084, "grad_norm": 14.408887670943411, "learning_rate": 3.497564743785873e-06, "loss": 0.0588, "step": 156570 }, { "epoch": 3.187379134860051, "grad_norm": 0.11440297352803756, "learning_rate": 3.496887029011255e-06, "loss": 0.0426, "step": 156580 }, { "epoch": 3.1875826972010177, "grad_norm": 0.0070951376771680175, "learning_rate": 3.4962093445939417e-06, "loss": 0.0479, "step": 156590 }, { "epoch": 3.187786259541985, "grad_norm": 0.005934015805923324, "learning_rate": 3.495531690547616e-06, "loss": 0.0367, "step": 156600 }, { "epoch": 3.187989821882952, "grad_norm": 0.02986645073793049, "learning_rate": 3.494854066885965e-06, "loss": 0.0032, "step": 156610 }, { "epoch": 3.1881933842239185, "grad_norm": 0.0992115658376138, "learning_rate": 3.4941764736226764e-06, "loss": 0.0005, "step": 156620 }, { "epoch": 3.1883969465648856, "grad_norm": 0.9349159309120955, "learning_rate": 3.4934989107714325e-06, "loss": 0.0856, "step": 156630 }, { "epoch": 3.1886005089058522, "grad_norm": 2.6857715306113072, "learning_rate": 3.4928213783459173e-06, "loss": 0.1002, "step": 156640 }, { "epoch": 3.1888040712468193, "grad_norm": 0.20090479648114928, "learning_rate": 3.4921438763598182e-06, "loss": 0.0697, "step": 156650 }, { "epoch": 3.1890076335877864, "grad_norm": 55.1416014598839, "learning_rate": 3.4914664048268143e-06, "loss": 0.1144, "step": 156660 }, { "epoch": 3.189211195928753, "grad_norm": 0.06082399597252597, "learning_rate": 3.49078896376059e-06, "loss": 0.0763, "step": 156670 }, { "epoch": 3.18941475826972, "grad_norm": 0.0231890811806091, "learning_rate": 3.490111553174825e-06, "loss": 0.0296, "step": 156680 }, { "epoch": 3.189618320610687, "grad_norm": 0.055947458992170566, "learning_rate": 3.489434173083202e-06, "loss": 0.0359, "step": 156690 }, { "epoch": 3.189821882951654, "grad_norm": 0.030744428586833742, "learning_rate": 3.4887568234994027e-06, "loss": 0.0437, "step": 156700 }, { "epoch": 3.190025445292621, "grad_norm": 11.218208722165665, "learning_rate": 3.4880795044371057e-06, "loss": 0.1667, "step": 156710 }, { "epoch": 3.190229007633588, "grad_norm": 0.06729731204169656, "learning_rate": 3.4874022159099886e-06, "loss": 0.027, "step": 156720 }, { "epoch": 3.1904325699745546, "grad_norm": 0.07008250479631668, "learning_rate": 3.4867249579317343e-06, "loss": 0.0296, "step": 156730 }, { "epoch": 3.1906361323155217, "grad_norm": 0.03656518082832191, "learning_rate": 3.486047730516018e-06, "loss": 0.0723, "step": 156740 }, { "epoch": 3.1908396946564888, "grad_norm": 0.017414232484220422, "learning_rate": 3.4853705336765183e-06, "loss": 0.0289, "step": 156750 }, { "epoch": 3.1910432569974554, "grad_norm": 0.037848639636955526, "learning_rate": 3.484693367426909e-06, "loss": 0.0964, "step": 156760 }, { "epoch": 3.1912468193384225, "grad_norm": 16.961339810250127, "learning_rate": 3.48401623178087e-06, "loss": 0.0444, "step": 156770 }, { "epoch": 3.191450381679389, "grad_norm": 0.5920384486283997, "learning_rate": 3.4833391267520776e-06, "loss": 0.0132, "step": 156780 }, { "epoch": 3.191653944020356, "grad_norm": 0.0351304132870075, "learning_rate": 3.482662052354202e-06, "loss": 0.0584, "step": 156790 }, { "epoch": 3.1918575063613233, "grad_norm": 0.07307030746285623, "learning_rate": 3.481985008600922e-06, "loss": 0.005, "step": 156800 }, { "epoch": 3.19206106870229, "grad_norm": 0.05070406434691204, "learning_rate": 3.4813079955059115e-06, "loss": 0.0633, "step": 156810 }, { "epoch": 3.192264631043257, "grad_norm": 27.226628858363735, "learning_rate": 3.48063101308284e-06, "loss": 0.0291, "step": 156820 }, { "epoch": 3.192468193384224, "grad_norm": 28.954988542741514, "learning_rate": 3.4799540613453843e-06, "loss": 0.0694, "step": 156830 }, { "epoch": 3.1926717557251907, "grad_norm": 0.0024314217740510464, "learning_rate": 3.479277140307211e-06, "loss": 0.0549, "step": 156840 }, { "epoch": 3.1928753180661578, "grad_norm": 0.00895284046427698, "learning_rate": 3.4786002499819966e-06, "loss": 0.0317, "step": 156850 }, { "epoch": 3.193078880407125, "grad_norm": 0.04146651462493318, "learning_rate": 3.4779233903834096e-06, "loss": 0.0607, "step": 156860 }, { "epoch": 3.1932824427480915, "grad_norm": 0.020199105252701067, "learning_rate": 3.4772465615251195e-06, "loss": 0.0215, "step": 156870 }, { "epoch": 3.1934860050890586, "grad_norm": 9.302035376735285, "learning_rate": 3.4765697634207975e-06, "loss": 0.0461, "step": 156880 }, { "epoch": 3.1936895674300256, "grad_norm": 0.05227069777835558, "learning_rate": 3.4758929960841104e-06, "loss": 0.0226, "step": 156890 }, { "epoch": 3.1938931297709923, "grad_norm": 0.006421941707763477, "learning_rate": 3.475216259528726e-06, "loss": 0.0382, "step": 156900 }, { "epoch": 3.1940966921119593, "grad_norm": 0.0071388738208802635, "learning_rate": 3.4745395537683154e-06, "loss": 0.0629, "step": 156910 }, { "epoch": 3.1943002544529264, "grad_norm": 22.457566630824363, "learning_rate": 3.473862878816542e-06, "loss": 0.0743, "step": 156920 }, { "epoch": 3.194503816793893, "grad_norm": 0.009066342958214445, "learning_rate": 3.4731862346870728e-06, "loss": 0.0004, "step": 156930 }, { "epoch": 3.19470737913486, "grad_norm": 0.10258141480982001, "learning_rate": 3.472509621393577e-06, "loss": 0.0728, "step": 156940 }, { "epoch": 3.1949109414758268, "grad_norm": 22.732656543412364, "learning_rate": 3.4718330389497134e-06, "loss": 0.0983, "step": 156950 }, { "epoch": 3.195114503816794, "grad_norm": 0.5395320814701249, "learning_rate": 3.4711564873691528e-06, "loss": 0.0014, "step": 156960 }, { "epoch": 3.195318066157761, "grad_norm": 0.7300789853108739, "learning_rate": 3.470479966665554e-06, "loss": 0.043, "step": 156970 }, { "epoch": 3.1955216284987276, "grad_norm": 19.611038144994843, "learning_rate": 3.4698034768525824e-06, "loss": 0.0411, "step": 156980 }, { "epoch": 3.1957251908396946, "grad_norm": 0.007807497328688553, "learning_rate": 3.4691270179439018e-06, "loss": 0.0522, "step": 156990 }, { "epoch": 3.1959287531806617, "grad_norm": 7.386410242737348, "learning_rate": 3.468450589953172e-06, "loss": 0.0259, "step": 157000 }, { "epoch": 3.1961323155216284, "grad_norm": 0.03910638953366461, "learning_rate": 3.4677741928940535e-06, "loss": 0.0706, "step": 157010 }, { "epoch": 3.1963358778625954, "grad_norm": 0.0036195698648147645, "learning_rate": 3.4670978267802115e-06, "loss": 0.0418, "step": 157020 }, { "epoch": 3.1965394402035625, "grad_norm": 0.5362944199542401, "learning_rate": 3.466421491625302e-06, "loss": 0.0106, "step": 157030 }, { "epoch": 3.196743002544529, "grad_norm": 0.0020922542841928338, "learning_rate": 3.4657451874429866e-06, "loss": 0.0152, "step": 157040 }, { "epoch": 3.1969465648854962, "grad_norm": 0.05064951629108587, "learning_rate": 3.4650689142469213e-06, "loss": 0.0589, "step": 157050 }, { "epoch": 3.1971501272264633, "grad_norm": 0.012000617428265985, "learning_rate": 3.464392672050767e-06, "loss": 0.0023, "step": 157060 }, { "epoch": 3.19735368956743, "grad_norm": 0.09081193099970097, "learning_rate": 3.4637164608681816e-06, "loss": 0.0015, "step": 157070 }, { "epoch": 3.197557251908397, "grad_norm": 0.10587567377618336, "learning_rate": 3.4630402807128194e-06, "loss": 0.0249, "step": 157080 }, { "epoch": 3.1977608142493636, "grad_norm": 0.34847225680264793, "learning_rate": 3.462364131598339e-06, "loss": 0.0374, "step": 157090 }, { "epoch": 3.1979643765903307, "grad_norm": 30.804230146304633, "learning_rate": 3.461688013538397e-06, "loss": 0.0642, "step": 157100 }, { "epoch": 3.198167938931298, "grad_norm": 0.008410270186023382, "learning_rate": 3.4610119265466453e-06, "loss": 0.0884, "step": 157110 }, { "epoch": 3.1983715012722644, "grad_norm": 0.09244006425060257, "learning_rate": 3.4603358706367405e-06, "loss": 0.1424, "step": 157120 }, { "epoch": 3.1985750636132315, "grad_norm": 0.1412579136432414, "learning_rate": 3.459659845822335e-06, "loss": 0.0409, "step": 157130 }, { "epoch": 3.1987786259541986, "grad_norm": 0.06463214540006652, "learning_rate": 3.4589838521170834e-06, "loss": 0.0287, "step": 157140 }, { "epoch": 3.1989821882951652, "grad_norm": 0.00314086786605945, "learning_rate": 3.458307889534639e-06, "loss": 0.096, "step": 157150 }, { "epoch": 3.1991857506361323, "grad_norm": 14.848276739936663, "learning_rate": 3.45763195808865e-06, "loss": 0.0913, "step": 157160 }, { "epoch": 3.1993893129770994, "grad_norm": 9.58738533326718, "learning_rate": 3.4569560577927706e-06, "loss": 0.0812, "step": 157170 }, { "epoch": 3.199592875318066, "grad_norm": 0.0649367906650229, "learning_rate": 3.4562801886606526e-06, "loss": 0.0988, "step": 157180 }, { "epoch": 3.199796437659033, "grad_norm": 0.006136178580093094, "learning_rate": 3.455604350705943e-06, "loss": 0.0395, "step": 157190 }, { "epoch": 3.2, "grad_norm": 0.06275531911469905, "learning_rate": 3.454928543942294e-06, "loss": 0.0517, "step": 157200 }, { "epoch": 3.200203562340967, "grad_norm": 0.018952121434855192, "learning_rate": 3.4542527683833526e-06, "loss": 0.0266, "step": 157210 }, { "epoch": 3.200407124681934, "grad_norm": 0.11158089585805857, "learning_rate": 3.4535770240427658e-06, "loss": 0.0084, "step": 157220 }, { "epoch": 3.200610687022901, "grad_norm": 0.1326776292298431, "learning_rate": 3.452901310934186e-06, "loss": 0.1086, "step": 157230 }, { "epoch": 3.2008142493638676, "grad_norm": 0.03880827377305607, "learning_rate": 3.452225629071254e-06, "loss": 0.0405, "step": 157240 }, { "epoch": 3.2010178117048347, "grad_norm": 0.023695570391666313, "learning_rate": 3.4515499784676216e-06, "loss": 0.0212, "step": 157250 }, { "epoch": 3.2012213740458018, "grad_norm": 0.12522866866240018, "learning_rate": 3.4508743591369298e-06, "loss": 0.0338, "step": 157260 }, { "epoch": 3.2014249363867684, "grad_norm": 0.011014109157193858, "learning_rate": 3.4501987710928254e-06, "loss": 0.0333, "step": 157270 }, { "epoch": 3.2016284987277355, "grad_norm": 0.16530172057026882, "learning_rate": 3.449523214348954e-06, "loss": 0.0739, "step": 157280 }, { "epoch": 3.201832061068702, "grad_norm": 0.13049715786359092, "learning_rate": 3.448847688918958e-06, "loss": 0.0354, "step": 157290 }, { "epoch": 3.202035623409669, "grad_norm": 0.032966986520747074, "learning_rate": 3.448172194816479e-06, "loss": 0.0635, "step": 157300 }, { "epoch": 3.2022391857506363, "grad_norm": 0.050704635385319136, "learning_rate": 3.4474967320551643e-06, "loss": 0.0029, "step": 157310 }, { "epoch": 3.202442748091603, "grad_norm": 0.049678964955929564, "learning_rate": 3.446821300648652e-06, "loss": 0.0622, "step": 157320 }, { "epoch": 3.20264631043257, "grad_norm": 0.17084305147250814, "learning_rate": 3.4461459006105837e-06, "loss": 0.0525, "step": 157330 }, { "epoch": 3.202849872773537, "grad_norm": 51.21988321526703, "learning_rate": 3.4454705319545987e-06, "loss": 0.1874, "step": 157340 }, { "epoch": 3.2030534351145037, "grad_norm": 7.711603385768109, "learning_rate": 3.4447951946943398e-06, "loss": 0.0565, "step": 157350 }, { "epoch": 3.2032569974554708, "grad_norm": 4.85172211712736, "learning_rate": 3.4441198888434457e-06, "loss": 0.0137, "step": 157360 }, { "epoch": 3.203460559796438, "grad_norm": 2.406448751714888, "learning_rate": 3.4434446144155537e-06, "loss": 0.0792, "step": 157370 }, { "epoch": 3.2036641221374045, "grad_norm": 0.05647234285421502, "learning_rate": 3.4427693714243015e-06, "loss": 0.0273, "step": 157380 }, { "epoch": 3.2038676844783716, "grad_norm": 0.9190380172657128, "learning_rate": 3.442094159883329e-06, "loss": 0.021, "step": 157390 }, { "epoch": 3.204071246819338, "grad_norm": 0.014788675858207576, "learning_rate": 3.4414189798062713e-06, "loss": 0.0599, "step": 157400 }, { "epoch": 3.2042748091603053, "grad_norm": 0.24870955278585083, "learning_rate": 3.440743831206765e-06, "loss": 0.0227, "step": 157410 }, { "epoch": 3.2044783715012723, "grad_norm": 0.015254675146227626, "learning_rate": 3.4400687140984444e-06, "loss": 0.0446, "step": 157420 }, { "epoch": 3.204681933842239, "grad_norm": 0.07274848219873342, "learning_rate": 3.4393936284949457e-06, "loss": 0.0458, "step": 157430 }, { "epoch": 3.204885496183206, "grad_norm": 0.016275101591793392, "learning_rate": 3.438718574409904e-06, "loss": 0.0453, "step": 157440 }, { "epoch": 3.205089058524173, "grad_norm": 0.005873299607027892, "learning_rate": 3.438043551856949e-06, "loss": 0.065, "step": 157450 }, { "epoch": 3.2052926208651398, "grad_norm": 26.244108903487636, "learning_rate": 3.4373685608497174e-06, "loss": 0.0748, "step": 157460 }, { "epoch": 3.205496183206107, "grad_norm": 0.2530639067071477, "learning_rate": 3.4366936014018414e-06, "loss": 0.1126, "step": 157470 }, { "epoch": 3.205699745547074, "grad_norm": 0.007506247383938363, "learning_rate": 3.4360186735269495e-06, "loss": 0.0075, "step": 157480 }, { "epoch": 3.2059033078880406, "grad_norm": 0.12345932320825412, "learning_rate": 3.4353437772386776e-06, "loss": 0.0384, "step": 157490 }, { "epoch": 3.2061068702290076, "grad_norm": 0.6218981150551522, "learning_rate": 3.4346689125506517e-06, "loss": 0.0009, "step": 157500 }, { "epoch": 3.2063104325699747, "grad_norm": 5.133111333270966, "learning_rate": 3.4339940794765038e-06, "loss": 0.1694, "step": 157510 }, { "epoch": 3.2065139949109414, "grad_norm": 0.016330093582309687, "learning_rate": 3.433319278029863e-06, "loss": 0.0264, "step": 157520 }, { "epoch": 3.2067175572519084, "grad_norm": 0.08309729421590148, "learning_rate": 3.4326445082243554e-06, "loss": 0.0267, "step": 157530 }, { "epoch": 3.2069211195928755, "grad_norm": 3.9538284659524097, "learning_rate": 3.431969770073613e-06, "loss": 0.018, "step": 157540 }, { "epoch": 3.207124681933842, "grad_norm": 0.039926562623677775, "learning_rate": 3.431295063591259e-06, "loss": 0.0098, "step": 157550 }, { "epoch": 3.2073282442748092, "grad_norm": 0.025376093292161492, "learning_rate": 3.4306203887909205e-06, "loss": 0.0437, "step": 157560 }, { "epoch": 3.2075318066157763, "grad_norm": 0.021908733021400322, "learning_rate": 3.4299457456862273e-06, "loss": 0.0423, "step": 157570 }, { "epoch": 3.207735368956743, "grad_norm": 0.016346527224854714, "learning_rate": 3.4292711342908013e-06, "loss": 0.0687, "step": 157580 }, { "epoch": 3.20793893129771, "grad_norm": 0.1800446290075902, "learning_rate": 3.4285965546182664e-06, "loss": 0.0726, "step": 157590 }, { "epoch": 3.2081424936386767, "grad_norm": 0.10596985211676752, "learning_rate": 3.4279220066822493e-06, "loss": 0.0038, "step": 157600 }, { "epoch": 3.2083460559796437, "grad_norm": 0.021103454508636276, "learning_rate": 3.427247490496372e-06, "loss": 0.0416, "step": 157610 }, { "epoch": 3.208549618320611, "grad_norm": 0.030404388264737525, "learning_rate": 3.4265730060742586e-06, "loss": 0.0808, "step": 157620 }, { "epoch": 3.2087531806615774, "grad_norm": 0.01632021201106186, "learning_rate": 3.425898553429527e-06, "loss": 0.0206, "step": 157630 }, { "epoch": 3.2089567430025445, "grad_norm": 3.753791610546142, "learning_rate": 3.425224132575803e-06, "loss": 0.0222, "step": 157640 }, { "epoch": 3.2091603053435116, "grad_norm": 0.008544023128615552, "learning_rate": 3.4245497435267072e-06, "loss": 0.0201, "step": 157650 }, { "epoch": 3.2093638676844782, "grad_norm": 0.04917572911132033, "learning_rate": 3.423875386295857e-06, "loss": 0.0465, "step": 157660 }, { "epoch": 3.2095674300254453, "grad_norm": 0.009029271964785033, "learning_rate": 3.4232010608968725e-06, "loss": 0.0351, "step": 157670 }, { "epoch": 3.2097709923664124, "grad_norm": 0.025251234950114693, "learning_rate": 3.4225267673433754e-06, "loss": 0.0324, "step": 157680 }, { "epoch": 3.209974554707379, "grad_norm": 10.474496204285137, "learning_rate": 3.4218525056489816e-06, "loss": 0.0565, "step": 157690 }, { "epoch": 3.210178117048346, "grad_norm": 12.552604745236902, "learning_rate": 3.4211782758273094e-06, "loss": 0.0603, "step": 157700 }, { "epoch": 3.210381679389313, "grad_norm": 0.23011299317298978, "learning_rate": 3.4205040778919735e-06, "loss": 0.1467, "step": 157710 }, { "epoch": 3.21058524173028, "grad_norm": 26.02555517876785, "learning_rate": 3.419829911856593e-06, "loss": 0.0493, "step": 157720 }, { "epoch": 3.210788804071247, "grad_norm": 0.10140847233694929, "learning_rate": 3.419155777734784e-06, "loss": 0.0127, "step": 157730 }, { "epoch": 3.2109923664122135, "grad_norm": 0.27176161697305423, "learning_rate": 3.4184816755401585e-06, "loss": 0.0831, "step": 157740 }, { "epoch": 3.2111959287531806, "grad_norm": 0.4410798869318757, "learning_rate": 3.417807605286333e-06, "loss": 0.0312, "step": 157750 }, { "epoch": 3.2113994910941477, "grad_norm": 0.011469191280697996, "learning_rate": 3.417133566986922e-06, "loss": 0.0186, "step": 157760 }, { "epoch": 3.2116030534351143, "grad_norm": 0.11074531246361755, "learning_rate": 3.416459560655535e-06, "loss": 0.0118, "step": 157770 }, { "epoch": 3.2118066157760814, "grad_norm": 0.18004356166595872, "learning_rate": 3.4157855863057903e-06, "loss": 0.0486, "step": 157780 }, { "epoch": 3.2120101781170485, "grad_norm": 25.398046352399493, "learning_rate": 3.4151116439512927e-06, "loss": 0.1266, "step": 157790 }, { "epoch": 3.212213740458015, "grad_norm": 0.2894885502108739, "learning_rate": 3.4144377336056576e-06, "loss": 0.0389, "step": 157800 }, { "epoch": 3.212417302798982, "grad_norm": 0.022140778657248426, "learning_rate": 3.4137638552824966e-06, "loss": 0.0124, "step": 157810 }, { "epoch": 3.2126208651399493, "grad_norm": 0.02624975899680237, "learning_rate": 3.4130900089954142e-06, "loss": 0.0082, "step": 157820 }, { "epoch": 3.212824427480916, "grad_norm": 0.007636777358150534, "learning_rate": 3.4124161947580253e-06, "loss": 0.0336, "step": 157830 }, { "epoch": 3.213027989821883, "grad_norm": 0.0033188484718560778, "learning_rate": 3.4117424125839373e-06, "loss": 0.0205, "step": 157840 }, { "epoch": 3.21323155216285, "grad_norm": 6.755011988799672, "learning_rate": 3.411068662486754e-06, "loss": 0.0279, "step": 157850 }, { "epoch": 3.2134351145038167, "grad_norm": 1.0646846389667615, "learning_rate": 3.4103949444800875e-06, "loss": 0.0732, "step": 157860 }, { "epoch": 3.2136386768447838, "grad_norm": 0.010338600937136634, "learning_rate": 3.4097212585775418e-06, "loss": 0.0417, "step": 157870 }, { "epoch": 3.213842239185751, "grad_norm": 13.915789226098683, "learning_rate": 3.4090476047927222e-06, "loss": 0.0454, "step": 157880 }, { "epoch": 3.2140458015267175, "grad_norm": 0.35633397802929906, "learning_rate": 3.4083739831392383e-06, "loss": 0.0009, "step": 157890 }, { "epoch": 3.2142493638676846, "grad_norm": 0.020083242489091702, "learning_rate": 3.40770039363069e-06, "loss": 0.1044, "step": 157900 }, { "epoch": 3.214452926208651, "grad_norm": 0.04793142530073831, "learning_rate": 3.407026836280685e-06, "loss": 0.0533, "step": 157910 }, { "epoch": 3.2146564885496183, "grad_norm": 0.011202059125846018, "learning_rate": 3.4063533111028224e-06, "loss": 0.0146, "step": 157920 }, { "epoch": 3.2148600508905854, "grad_norm": 113.06305727401013, "learning_rate": 3.4056798181107078e-06, "loss": 0.1047, "step": 157930 }, { "epoch": 3.215063613231552, "grad_norm": 0.003166654891184301, "learning_rate": 3.4050063573179437e-06, "loss": 0.0312, "step": 157940 }, { "epoch": 3.215267175572519, "grad_norm": 8.174760241450318, "learning_rate": 3.40433292873813e-06, "loss": 0.0184, "step": 157950 }, { "epoch": 3.215470737913486, "grad_norm": 0.0027094268317542853, "learning_rate": 3.4036595323848666e-06, "loss": 0.0546, "step": 157960 }, { "epoch": 3.2156743002544528, "grad_norm": 0.00828973307942431, "learning_rate": 3.402986168271757e-06, "loss": 0.0029, "step": 157970 }, { "epoch": 3.21587786259542, "grad_norm": 0.06701464150114979, "learning_rate": 3.402312836412398e-06, "loss": 0.0218, "step": 157980 }, { "epoch": 3.216081424936387, "grad_norm": 0.10842523462365289, "learning_rate": 3.4016395368203904e-06, "loss": 0.061, "step": 157990 }, { "epoch": 3.2162849872773536, "grad_norm": 0.0022579395406908823, "learning_rate": 3.400966269509328e-06, "loss": 0.0007, "step": 158000 }, { "epoch": 3.2164885496183206, "grad_norm": 0.010079978372996028, "learning_rate": 3.4002930344928133e-06, "loss": 0.0062, "step": 158010 }, { "epoch": 3.2166921119592877, "grad_norm": 0.028312151208262026, "learning_rate": 3.399619831784442e-06, "loss": 0.0038, "step": 158020 }, { "epoch": 3.2168956743002544, "grad_norm": 0.0771526922863806, "learning_rate": 3.3989466613978073e-06, "loss": 0.0461, "step": 158030 }, { "epoch": 3.2170992366412214, "grad_norm": 0.050110910285355276, "learning_rate": 3.3982735233465075e-06, "loss": 0.0772, "step": 158040 }, { "epoch": 3.217302798982188, "grad_norm": 0.3503342383140742, "learning_rate": 3.3976004176441385e-06, "loss": 0.12, "step": 158050 }, { "epoch": 3.217506361323155, "grad_norm": 0.027599480120306226, "learning_rate": 3.3969273443042916e-06, "loss": 0.0941, "step": 158060 }, { "epoch": 3.2177099236641222, "grad_norm": 0.08340351731047879, "learning_rate": 3.396254303340563e-06, "loss": 0.0878, "step": 158070 }, { "epoch": 3.217913486005089, "grad_norm": 0.060123889557948694, "learning_rate": 3.3955812947665422e-06, "loss": 0.1255, "step": 158080 }, { "epoch": 3.218117048346056, "grad_norm": 0.30993096419790384, "learning_rate": 3.3949083185958248e-06, "loss": 0.0074, "step": 158090 }, { "epoch": 3.218320610687023, "grad_norm": 0.25198296901791345, "learning_rate": 3.3942353748420022e-06, "loss": 0.1073, "step": 158100 }, { "epoch": 3.2185241730279897, "grad_norm": 11.218413736090636, "learning_rate": 3.393562463518662e-06, "loss": 0.0031, "step": 158110 }, { "epoch": 3.2187277353689567, "grad_norm": 0.04717932051578597, "learning_rate": 3.392889584639398e-06, "loss": 0.019, "step": 158120 }, { "epoch": 3.218931297709924, "grad_norm": 0.053006285814266266, "learning_rate": 3.392216738217801e-06, "loss": 0.0511, "step": 158130 }, { "epoch": 3.2191348600508904, "grad_norm": 0.010967188987425625, "learning_rate": 3.391543924267454e-06, "loss": 0.0047, "step": 158140 }, { "epoch": 3.2193384223918575, "grad_norm": 0.3433966886131375, "learning_rate": 3.3908711428019523e-06, "loss": 0.0918, "step": 158150 }, { "epoch": 3.2195419847328246, "grad_norm": 0.9518767084665498, "learning_rate": 3.3901983938348795e-06, "loss": 0.113, "step": 158160 }, { "epoch": 3.2197455470737912, "grad_norm": 0.24594914468608464, "learning_rate": 3.389525677379821e-06, "loss": 0.0755, "step": 158170 }, { "epoch": 3.2199491094147583, "grad_norm": 0.07127773918271196, "learning_rate": 3.3888529934503694e-06, "loss": 0.0585, "step": 158180 }, { "epoch": 3.2201526717557254, "grad_norm": 0.09500987067022107, "learning_rate": 3.388180342060105e-06, "loss": 0.0984, "step": 158190 }, { "epoch": 3.220356234096692, "grad_norm": 0.027145689127627732, "learning_rate": 3.3875077232226137e-06, "loss": 0.0611, "step": 158200 }, { "epoch": 3.220559796437659, "grad_norm": 0.025529247983327112, "learning_rate": 3.386835136951484e-06, "loss": 0.0488, "step": 158210 }, { "epoch": 3.220763358778626, "grad_norm": 0.11201561261860231, "learning_rate": 3.386162583260293e-06, "loss": 0.0406, "step": 158220 }, { "epoch": 3.220966921119593, "grad_norm": 12.530194700708357, "learning_rate": 3.3854900621626297e-06, "loss": 0.0869, "step": 158230 }, { "epoch": 3.22117048346056, "grad_norm": 0.05531090114447358, "learning_rate": 3.3848175736720735e-06, "loss": 0.07, "step": 158240 }, { "epoch": 3.2213740458015265, "grad_norm": 2.1541315054931207, "learning_rate": 3.3841451178022046e-06, "loss": 0.0025, "step": 158250 }, { "epoch": 3.2215776081424936, "grad_norm": 0.2771157842703803, "learning_rate": 3.3834726945666084e-06, "loss": 0.0801, "step": 158260 }, { "epoch": 3.2217811704834607, "grad_norm": 0.0092395356171554, "learning_rate": 3.382800303978863e-06, "loss": 0.0007, "step": 158270 }, { "epoch": 3.2219847328244273, "grad_norm": 0.014606194967863683, "learning_rate": 3.3821279460525492e-06, "loss": 0.0023, "step": 158280 }, { "epoch": 3.2221882951653944, "grad_norm": 6.039983913438646, "learning_rate": 3.381455620801243e-06, "loss": 0.0751, "step": 158290 }, { "epoch": 3.2223918575063615, "grad_norm": 0.1715313213496135, "learning_rate": 3.380783328238526e-06, "loss": 0.0168, "step": 158300 }, { "epoch": 3.222595419847328, "grad_norm": 6.353702839529861, "learning_rate": 3.380111068377977e-06, "loss": 0.043, "step": 158310 }, { "epoch": 3.222798982188295, "grad_norm": 0.3555232220747765, "learning_rate": 3.379438841233169e-06, "loss": 0.0013, "step": 158320 }, { "epoch": 3.2230025445292623, "grad_norm": 0.012099730410006811, "learning_rate": 3.3787666468176816e-06, "loss": 0.0369, "step": 158330 }, { "epoch": 3.223206106870229, "grad_norm": 0.0185424822230158, "learning_rate": 3.3780944851450914e-06, "loss": 0.0571, "step": 158340 }, { "epoch": 3.223409669211196, "grad_norm": 0.005301669218524595, "learning_rate": 3.377422356228971e-06, "loss": 0.0297, "step": 158350 }, { "epoch": 3.2236132315521626, "grad_norm": 17.538191918606124, "learning_rate": 3.376750260082896e-06, "loss": 0.069, "step": 158360 }, { "epoch": 3.2238167938931297, "grad_norm": 0.038963029359246254, "learning_rate": 3.376078196720439e-06, "loss": 0.0459, "step": 158370 }, { "epoch": 3.2240203562340968, "grad_norm": 0.4801825559365532, "learning_rate": 3.3754061661551756e-06, "loss": 0.0279, "step": 158380 }, { "epoch": 3.2242239185750634, "grad_norm": 0.08166432342670366, "learning_rate": 3.3747341684006783e-06, "loss": 0.1018, "step": 158390 }, { "epoch": 3.2244274809160305, "grad_norm": 0.06431435838547103, "learning_rate": 3.374062203470515e-06, "loss": 0.0732, "step": 158400 }, { "epoch": 3.2246310432569976, "grad_norm": 0.12496998805142852, "learning_rate": 3.3733902713782618e-06, "loss": 0.0478, "step": 158410 }, { "epoch": 3.224834605597964, "grad_norm": 0.13059050394700922, "learning_rate": 3.3727183721374866e-06, "loss": 0.0413, "step": 158420 }, { "epoch": 3.2250381679389313, "grad_norm": 0.1202375594645799, "learning_rate": 3.3720465057617593e-06, "loss": 0.001, "step": 158430 }, { "epoch": 3.2252417302798984, "grad_norm": 0.10805124142064708, "learning_rate": 3.3713746722646513e-06, "loss": 0.0895, "step": 158440 }, { "epoch": 3.225445292620865, "grad_norm": 0.1370307814191827, "learning_rate": 3.370702871659728e-06, "loss": 0.0657, "step": 158450 }, { "epoch": 3.225648854961832, "grad_norm": 0.05032861740597395, "learning_rate": 3.370031103960557e-06, "loss": 0.0309, "step": 158460 }, { "epoch": 3.225852417302799, "grad_norm": 0.014943805038048496, "learning_rate": 3.3693593691807113e-06, "loss": 0.0063, "step": 158470 }, { "epoch": 3.226055979643766, "grad_norm": 13.12677306807416, "learning_rate": 3.3686876673337503e-06, "loss": 0.0746, "step": 158480 }, { "epoch": 3.226259541984733, "grad_norm": 0.6224369406267735, "learning_rate": 3.368015998433244e-06, "loss": 0.0663, "step": 158490 }, { "epoch": 3.2264631043257, "grad_norm": 0.2220623820826526, "learning_rate": 3.367344362492757e-06, "loss": 0.0236, "step": 158500 }, { "epoch": 3.2266666666666666, "grad_norm": 0.2575594678634217, "learning_rate": 3.366672759525852e-06, "loss": 0.0574, "step": 158510 }, { "epoch": 3.2268702290076337, "grad_norm": 7.093080099560007, "learning_rate": 3.3660011895460966e-06, "loss": 0.0518, "step": 158520 }, { "epoch": 3.2270737913486007, "grad_norm": 18.9328775978381, "learning_rate": 3.3653296525670498e-06, "loss": 0.1273, "step": 158530 }, { "epoch": 3.2272773536895674, "grad_norm": 0.03865674227280635, "learning_rate": 3.364658148602276e-06, "loss": 0.0781, "step": 158540 }, { "epoch": 3.2274809160305344, "grad_norm": 0.07594329681400068, "learning_rate": 3.3639866776653386e-06, "loss": 0.0892, "step": 158550 }, { "epoch": 3.227684478371501, "grad_norm": 33.88418957257054, "learning_rate": 3.3633152397697966e-06, "loss": 0.0995, "step": 158560 }, { "epoch": 3.227888040712468, "grad_norm": 5.806383661902879, "learning_rate": 3.3626438349292106e-06, "loss": 0.0392, "step": 158570 }, { "epoch": 3.2280916030534352, "grad_norm": 0.013944965717554983, "learning_rate": 3.361972463157143e-06, "loss": 0.0288, "step": 158580 }, { "epoch": 3.228295165394402, "grad_norm": 7.209980216325034, "learning_rate": 3.3613011244671507e-06, "loss": 0.0836, "step": 158590 }, { "epoch": 3.228498727735369, "grad_norm": 9.254060638060466, "learning_rate": 3.3606298188727938e-06, "loss": 0.0539, "step": 158600 }, { "epoch": 3.228702290076336, "grad_norm": 0.5432410268885899, "learning_rate": 3.359958546387626e-06, "loss": 0.0775, "step": 158610 }, { "epoch": 3.2289058524173027, "grad_norm": 0.005681971317608096, "learning_rate": 3.35928730702521e-06, "loss": 0.0643, "step": 158620 }, { "epoch": 3.2291094147582697, "grad_norm": 0.02140508010210258, "learning_rate": 3.358616100799101e-06, "loss": 0.0425, "step": 158630 }, { "epoch": 3.229312977099237, "grad_norm": 0.07946857465187732, "learning_rate": 3.3579449277228525e-06, "loss": 0.0572, "step": 158640 }, { "epoch": 3.2295165394402034, "grad_norm": 0.02420826092350813, "learning_rate": 3.357273787810022e-06, "loss": 0.074, "step": 158650 }, { "epoch": 3.2297201017811705, "grad_norm": 0.3525676898949474, "learning_rate": 3.3566026810741616e-06, "loss": 0.005, "step": 158660 }, { "epoch": 3.2299236641221376, "grad_norm": 13.121459554986613, "learning_rate": 3.355931607528827e-06, "loss": 0.0668, "step": 158670 }, { "epoch": 3.2301272264631042, "grad_norm": 8.723200355986993, "learning_rate": 3.3552605671875726e-06, "loss": 0.1088, "step": 158680 }, { "epoch": 3.2303307888040713, "grad_norm": 0.90338383096545, "learning_rate": 3.354589560063948e-06, "loss": 0.0349, "step": 158690 }, { "epoch": 3.230534351145038, "grad_norm": 0.09678991898214305, "learning_rate": 3.353918586171507e-06, "loss": 0.0114, "step": 158700 }, { "epoch": 3.230737913486005, "grad_norm": 0.11536426630663829, "learning_rate": 3.3532476455238012e-06, "loss": 0.019, "step": 158710 }, { "epoch": 3.230941475826972, "grad_norm": 0.0168940784369966, "learning_rate": 3.3525767381343784e-06, "loss": 0.0295, "step": 158720 }, { "epoch": 3.2311450381679387, "grad_norm": 0.03322558691985453, "learning_rate": 3.351905864016792e-06, "loss": 0.0022, "step": 158730 }, { "epoch": 3.231348600508906, "grad_norm": 0.0929342263699705, "learning_rate": 3.3512350231845887e-06, "loss": 0.0735, "step": 158740 }, { "epoch": 3.231552162849873, "grad_norm": 0.013396843526671552, "learning_rate": 3.3505642156513163e-06, "loss": 0.0278, "step": 158750 }, { "epoch": 3.2317557251908395, "grad_norm": 0.15216170081281205, "learning_rate": 3.3498934414305275e-06, "loss": 0.0716, "step": 158760 }, { "epoch": 3.2319592875318066, "grad_norm": 11.172089258888269, "learning_rate": 3.3492227005357626e-06, "loss": 0.0061, "step": 158770 }, { "epoch": 3.2321628498727737, "grad_norm": 0.5131196121407263, "learning_rate": 3.3485519929805727e-06, "loss": 0.0486, "step": 158780 }, { "epoch": 3.2323664122137403, "grad_norm": 0.9988700801173471, "learning_rate": 3.3478813187785033e-06, "loss": 0.1138, "step": 158790 }, { "epoch": 3.2325699745547074, "grad_norm": 0.03945692440427056, "learning_rate": 3.3472106779430962e-06, "loss": 0.0017, "step": 158800 }, { "epoch": 3.2327735368956745, "grad_norm": 0.015581871850793485, "learning_rate": 3.3465400704879007e-06, "loss": 0.0753, "step": 158810 }, { "epoch": 3.232977099236641, "grad_norm": 0.005317572027947459, "learning_rate": 3.3458694964264573e-06, "loss": 0.0259, "step": 158820 }, { "epoch": 3.233180661577608, "grad_norm": 0.0953113399184747, "learning_rate": 3.3451989557723084e-06, "loss": 0.1218, "step": 158830 }, { "epoch": 3.2333842239185753, "grad_norm": 0.4934636888671201, "learning_rate": 3.344528448539e-06, "loss": 0.0719, "step": 158840 }, { "epoch": 3.233587786259542, "grad_norm": 4.207670147200541, "learning_rate": 3.3438579747400714e-06, "loss": 0.128, "step": 158850 }, { "epoch": 3.233791348600509, "grad_norm": 2.4815959045197786, "learning_rate": 3.343187534389062e-06, "loss": 0.0356, "step": 158860 }, { "epoch": 3.2339949109414756, "grad_norm": 16.975177845124815, "learning_rate": 3.342517127499517e-06, "loss": 0.0599, "step": 158870 }, { "epoch": 3.2341984732824427, "grad_norm": 0.08443146995292178, "learning_rate": 3.3418467540849726e-06, "loss": 0.0311, "step": 158880 }, { "epoch": 3.2344020356234098, "grad_norm": 8.959594275837798, "learning_rate": 3.34117641415897e-06, "loss": 0.0024, "step": 158890 }, { "epoch": 3.2346055979643764, "grad_norm": 8.438886743972741, "learning_rate": 3.3405061077350445e-06, "loss": 0.0422, "step": 158900 }, { "epoch": 3.2348091603053435, "grad_norm": 0.028530168414822445, "learning_rate": 3.3398358348267346e-06, "loss": 0.0529, "step": 158910 }, { "epoch": 3.2350127226463106, "grad_norm": 0.39734870625520263, "learning_rate": 3.3391655954475806e-06, "loss": 0.0268, "step": 158920 }, { "epoch": 3.235216284987277, "grad_norm": 0.006858828826514644, "learning_rate": 3.3384953896111148e-06, "loss": 0.1225, "step": 158930 }, { "epoch": 3.2354198473282443, "grad_norm": 0.012935957923023092, "learning_rate": 3.337825217330877e-06, "loss": 0.0812, "step": 158940 }, { "epoch": 3.2356234096692114, "grad_norm": 0.0964958911674746, "learning_rate": 3.3371550786203965e-06, "loss": 0.015, "step": 158950 }, { "epoch": 3.235826972010178, "grad_norm": 1.2899594154901934, "learning_rate": 3.3364849734932126e-06, "loss": 0.0691, "step": 158960 }, { "epoch": 3.236030534351145, "grad_norm": 0.12573355588944765, "learning_rate": 3.3358149019628584e-06, "loss": 0.0636, "step": 158970 }, { "epoch": 3.236234096692112, "grad_norm": 34.751965651442646, "learning_rate": 3.335144864042863e-06, "loss": 0.0265, "step": 158980 }, { "epoch": 3.236437659033079, "grad_norm": 1.3394103807716509, "learning_rate": 3.3344748597467637e-06, "loss": 0.0334, "step": 158990 }, { "epoch": 3.236641221374046, "grad_norm": 0.1062414338378945, "learning_rate": 3.3338048890880893e-06, "loss": 0.0051, "step": 159000 }, { "epoch": 3.2368447837150125, "grad_norm": 10.259360470733423, "learning_rate": 3.3331349520803704e-06, "loss": 0.0339, "step": 159010 }, { "epoch": 3.2370483460559796, "grad_norm": 0.011944354313093332, "learning_rate": 3.332465048737139e-06, "loss": 0.0927, "step": 159020 }, { "epoch": 3.2372519083969467, "grad_norm": 0.20708066534222674, "learning_rate": 3.331795179071923e-06, "loss": 0.0498, "step": 159030 }, { "epoch": 3.2374554707379133, "grad_norm": 0.146229619185727, "learning_rate": 3.3311253430982527e-06, "loss": 0.0939, "step": 159040 }, { "epoch": 3.2376590330788804, "grad_norm": 0.03928833239974861, "learning_rate": 3.330455540829656e-06, "loss": 0.1018, "step": 159050 }, { "epoch": 3.2378625954198474, "grad_norm": 0.04860264675150344, "learning_rate": 3.329785772279658e-06, "loss": 0.0266, "step": 159060 }, { "epoch": 3.238066157760814, "grad_norm": 1.3798988336760714, "learning_rate": 3.329116037461788e-06, "loss": 0.0623, "step": 159070 }, { "epoch": 3.238269720101781, "grad_norm": 0.05402495632645691, "learning_rate": 3.3284463363895735e-06, "loss": 0.0605, "step": 159080 }, { "epoch": 3.2384732824427482, "grad_norm": 6.1091437170651055, "learning_rate": 3.3277766690765363e-06, "loss": 0.0377, "step": 159090 }, { "epoch": 3.238676844783715, "grad_norm": 0.060443161078302964, "learning_rate": 3.3271070355362046e-06, "loss": 0.0612, "step": 159100 }, { "epoch": 3.238880407124682, "grad_norm": 0.03562467956253488, "learning_rate": 3.3264374357821007e-06, "loss": 0.0307, "step": 159110 }, { "epoch": 3.239083969465649, "grad_norm": 0.20334922244094453, "learning_rate": 3.3257678698277462e-06, "loss": 0.0176, "step": 159120 }, { "epoch": 3.2392875318066157, "grad_norm": 1.4829962146802562, "learning_rate": 3.3250983376866684e-06, "loss": 0.0307, "step": 159130 }, { "epoch": 3.2394910941475827, "grad_norm": 0.6686499977332059, "learning_rate": 3.324428839372386e-06, "loss": 0.0147, "step": 159140 }, { "epoch": 3.23969465648855, "grad_norm": 1.057068755571135, "learning_rate": 3.323759374898421e-06, "loss": 0.0597, "step": 159150 }, { "epoch": 3.2398982188295165, "grad_norm": 0.08537067728743482, "learning_rate": 3.3230899442782956e-06, "loss": 0.0006, "step": 159160 }, { "epoch": 3.2401017811704835, "grad_norm": 1.123045736077858, "learning_rate": 3.322420547525528e-06, "loss": 0.0968, "step": 159170 }, { "epoch": 3.2403053435114506, "grad_norm": 0.0563277847664946, "learning_rate": 3.321751184653639e-06, "loss": 0.002, "step": 159180 }, { "epoch": 3.2405089058524172, "grad_norm": 25.661314332269033, "learning_rate": 3.3210818556761447e-06, "loss": 0.0439, "step": 159190 }, { "epoch": 3.2407124681933843, "grad_norm": 0.01715365551528126, "learning_rate": 3.3204125606065648e-06, "loss": 0.0817, "step": 159200 }, { "epoch": 3.240916030534351, "grad_norm": 6.210682638686144, "learning_rate": 3.319743299458418e-06, "loss": 0.0026, "step": 159210 }, { "epoch": 3.241119592875318, "grad_norm": 0.1684379688308072, "learning_rate": 3.3190740722452187e-06, "loss": 0.0871, "step": 159220 }, { "epoch": 3.241323155216285, "grad_norm": 0.10880163603119743, "learning_rate": 3.3184048789804825e-06, "loss": 0.0007, "step": 159230 }, { "epoch": 3.2415267175572517, "grad_norm": 35.294466733849305, "learning_rate": 3.3177357196777273e-06, "loss": 0.0707, "step": 159240 }, { "epoch": 3.241730279898219, "grad_norm": 0.0981821370814691, "learning_rate": 3.3170665943504655e-06, "loss": 0.0578, "step": 159250 }, { "epoch": 3.241933842239186, "grad_norm": 74.7244833532606, "learning_rate": 3.316397503012212e-06, "loss": 0.0995, "step": 159260 }, { "epoch": 3.2421374045801525, "grad_norm": 0.006829744811037531, "learning_rate": 3.315728445676477e-06, "loss": 0.0156, "step": 159270 }, { "epoch": 3.2423409669211196, "grad_norm": 0.002507158979597202, "learning_rate": 3.315059422356777e-06, "loss": 0.0133, "step": 159280 }, { "epoch": 3.2425445292620867, "grad_norm": 0.039313112884631164, "learning_rate": 3.3143904330666233e-06, "loss": 0.0046, "step": 159290 }, { "epoch": 3.2427480916030533, "grad_norm": 0.02564408243429516, "learning_rate": 3.313721477819524e-06, "loss": 0.0767, "step": 159300 }, { "epoch": 3.2429516539440204, "grad_norm": 0.09459080235382605, "learning_rate": 3.3130525566289935e-06, "loss": 0.0038, "step": 159310 }, { "epoch": 3.2431552162849875, "grad_norm": 0.013680934892524501, "learning_rate": 3.3123836695085363e-06, "loss": 0.105, "step": 159320 }, { "epoch": 3.243358778625954, "grad_norm": 25.03320333634259, "learning_rate": 3.311714816471667e-06, "loss": 0.1391, "step": 159330 }, { "epoch": 3.243562340966921, "grad_norm": 47.2439378854624, "learning_rate": 3.311045997531891e-06, "loss": 0.0404, "step": 159340 }, { "epoch": 3.243765903307888, "grad_norm": 2.2495965906070015, "learning_rate": 3.310377212702715e-06, "loss": 0.1967, "step": 159350 }, { "epoch": 3.243969465648855, "grad_norm": 0.016498154085756188, "learning_rate": 3.3097084619976485e-06, "loss": 0.0327, "step": 159360 }, { "epoch": 3.244173027989822, "grad_norm": 14.077072522908784, "learning_rate": 3.3090397454301975e-06, "loss": 0.0297, "step": 159370 }, { "epoch": 3.2443765903307886, "grad_norm": 0.4106439018633693, "learning_rate": 3.3083710630138648e-06, "loss": 0.0329, "step": 159380 }, { "epoch": 3.2445801526717557, "grad_norm": 0.059826226492952356, "learning_rate": 3.3077024147621596e-06, "loss": 0.0018, "step": 159390 }, { "epoch": 3.244783715012723, "grad_norm": 0.0076458780146587, "learning_rate": 3.307033800688583e-06, "loss": 0.0265, "step": 159400 }, { "epoch": 3.2449872773536894, "grad_norm": 17.104684602341166, "learning_rate": 3.306365220806638e-06, "loss": 0.1057, "step": 159410 }, { "epoch": 3.2451908396946565, "grad_norm": 0.025004811050112725, "learning_rate": 3.30569667512983e-06, "loss": 0.0399, "step": 159420 }, { "epoch": 3.2453944020356236, "grad_norm": 0.015412265118008082, "learning_rate": 3.3050281636716598e-06, "loss": 0.0007, "step": 159430 }, { "epoch": 3.24559796437659, "grad_norm": 9.364346824056241, "learning_rate": 3.304359686445627e-06, "loss": 0.0338, "step": 159440 }, { "epoch": 3.2458015267175573, "grad_norm": 0.014582631118684868, "learning_rate": 3.3036912434652387e-06, "loss": 0.0023, "step": 159450 }, { "epoch": 3.2460050890585244, "grad_norm": 2.4959731073834655, "learning_rate": 3.3030228347439863e-06, "loss": 0.0855, "step": 159460 }, { "epoch": 3.246208651399491, "grad_norm": 0.10393874184403748, "learning_rate": 3.302354460295376e-06, "loss": 0.0851, "step": 159470 }, { "epoch": 3.246412213740458, "grad_norm": 0.4671691164685758, "learning_rate": 3.301686120132903e-06, "loss": 0.0863, "step": 159480 }, { "epoch": 3.246615776081425, "grad_norm": 0.008709504442182204, "learning_rate": 3.3010178142700657e-06, "loss": 0.0585, "step": 159490 }, { "epoch": 3.246819338422392, "grad_norm": 60.581594015668145, "learning_rate": 3.3003495427203625e-06, "loss": 0.1059, "step": 159500 }, { "epoch": 3.247022900763359, "grad_norm": 8.735177208821547, "learning_rate": 3.29968130549729e-06, "loss": 0.0294, "step": 159510 }, { "epoch": 3.2472264631043255, "grad_norm": 16.52585353449598, "learning_rate": 3.299013102614341e-06, "loss": 0.0114, "step": 159520 }, { "epoch": 3.2474300254452926, "grad_norm": 0.007795336336282446, "learning_rate": 3.298344934085016e-06, "loss": 0.0549, "step": 159530 }, { "epoch": 3.2476335877862597, "grad_norm": 0.06596207353662112, "learning_rate": 3.297676799922806e-06, "loss": 0.009, "step": 159540 }, { "epoch": 3.2478371501272263, "grad_norm": 0.013367995404537808, "learning_rate": 3.2970087001412065e-06, "loss": 0.0673, "step": 159550 }, { "epoch": 3.2480407124681934, "grad_norm": 0.0232153819657989, "learning_rate": 3.2963406347537076e-06, "loss": 0.0494, "step": 159560 }, { "epoch": 3.2482442748091604, "grad_norm": 0.7019318948803283, "learning_rate": 3.295672603773805e-06, "loss": 0.0292, "step": 159570 }, { "epoch": 3.248447837150127, "grad_norm": 0.14486829559670775, "learning_rate": 3.2950046072149898e-06, "loss": 0.0028, "step": 159580 }, { "epoch": 3.248651399491094, "grad_norm": 0.014655666679752436, "learning_rate": 3.294336645090751e-06, "loss": 0.0832, "step": 159590 }, { "epoch": 3.2488549618320612, "grad_norm": 0.011920648545896995, "learning_rate": 3.2936687174145822e-06, "loss": 0.0646, "step": 159600 }, { "epoch": 3.249058524173028, "grad_norm": 10.218529136610112, "learning_rate": 3.293000824199971e-06, "loss": 0.0233, "step": 159610 }, { "epoch": 3.249262086513995, "grad_norm": 0.00929152675627909, "learning_rate": 3.2923329654604067e-06, "loss": 0.0751, "step": 159620 }, { "epoch": 3.249465648854962, "grad_norm": 0.03567352242629831, "learning_rate": 3.2916651412093788e-06, "loss": 0.001, "step": 159630 }, { "epoch": 3.2496692111959287, "grad_norm": 0.054418802970003184, "learning_rate": 3.290997351460371e-06, "loss": 0.1142, "step": 159640 }, { "epoch": 3.2498727735368957, "grad_norm": 0.5504137367204838, "learning_rate": 3.2903295962268742e-06, "loss": 0.0008, "step": 159650 }, { "epoch": 3.2500763358778624, "grad_norm": 0.6961915851362714, "learning_rate": 3.289661875522374e-06, "loss": 0.0494, "step": 159660 }, { "epoch": 3.2502798982188295, "grad_norm": 0.024325891515218342, "learning_rate": 3.288994189360354e-06, "loss": 0.0518, "step": 159670 }, { "epoch": 3.2504834605597965, "grad_norm": 0.07681916314750561, "learning_rate": 3.288326537754301e-06, "loss": 0.0025, "step": 159680 }, { "epoch": 3.250687022900763, "grad_norm": 0.044940730704027106, "learning_rate": 3.2876589207176977e-06, "loss": 0.0296, "step": 159690 }, { "epoch": 3.2508905852417302, "grad_norm": 0.004625301800923823, "learning_rate": 3.2869913382640267e-06, "loss": 0.0016, "step": 159700 }, { "epoch": 3.2510941475826973, "grad_norm": 0.101808258459259, "learning_rate": 3.2863237904067748e-06, "loss": 0.0004, "step": 159710 }, { "epoch": 3.251297709923664, "grad_norm": 10.890621049063894, "learning_rate": 3.285656277159419e-06, "loss": 0.0494, "step": 159720 }, { "epoch": 3.251501272264631, "grad_norm": 15.651375122038989, "learning_rate": 3.2849887985354423e-06, "loss": 0.0524, "step": 159730 }, { "epoch": 3.251704834605598, "grad_norm": 0.01342502952365957, "learning_rate": 3.2843213545483287e-06, "loss": 0.1718, "step": 159740 }, { "epoch": 3.2519083969465647, "grad_norm": 0.1692178778697949, "learning_rate": 3.2836539452115525e-06, "loss": 0.0339, "step": 159750 }, { "epoch": 3.252111959287532, "grad_norm": 14.181480403808656, "learning_rate": 3.2829865705385964e-06, "loss": 0.0347, "step": 159760 }, { "epoch": 3.252315521628499, "grad_norm": 52.42508065022586, "learning_rate": 3.282319230542938e-06, "loss": 0.0494, "step": 159770 }, { "epoch": 3.2525190839694655, "grad_norm": 0.04507654396185481, "learning_rate": 3.2816519252380542e-06, "loss": 0.0372, "step": 159780 }, { "epoch": 3.2527226463104326, "grad_norm": 0.35027802866937413, "learning_rate": 3.2809846546374245e-06, "loss": 0.03, "step": 159790 }, { "epoch": 3.2529262086513997, "grad_norm": 12.108995446545185, "learning_rate": 3.280317418754523e-06, "loss": 0.0497, "step": 159800 }, { "epoch": 3.2531297709923663, "grad_norm": 0.18999448074514153, "learning_rate": 3.2796502176028243e-06, "loss": 0.0006, "step": 159810 }, { "epoch": 3.2533333333333334, "grad_norm": 0.005948867486376282, "learning_rate": 3.278983051195809e-06, "loss": 0.0558, "step": 159820 }, { "epoch": 3.2535368956743005, "grad_norm": 0.04054050688186158, "learning_rate": 3.278315919546945e-06, "loss": 0.0918, "step": 159830 }, { "epoch": 3.253740458015267, "grad_norm": 0.09840586048669499, "learning_rate": 3.27764882266971e-06, "loss": 0.0586, "step": 159840 }, { "epoch": 3.253944020356234, "grad_norm": 0.012891233721179741, "learning_rate": 3.276981760577573e-06, "loss": 0.0383, "step": 159850 }, { "epoch": 3.254147582697201, "grad_norm": 0.11110646520966024, "learning_rate": 3.2763147332840104e-06, "loss": 0.0968, "step": 159860 }, { "epoch": 3.254351145038168, "grad_norm": 0.010832168659549385, "learning_rate": 3.2756477408024934e-06, "loss": 0.1033, "step": 159870 }, { "epoch": 3.254554707379135, "grad_norm": 0.03773626038793187, "learning_rate": 3.2749807831464887e-06, "loss": 0.0465, "step": 159880 }, { "epoch": 3.2547582697201016, "grad_norm": 0.005395886315750151, "learning_rate": 3.2743138603294687e-06, "loss": 0.0612, "step": 159890 }, { "epoch": 3.2549618320610687, "grad_norm": 6.663297519864494, "learning_rate": 3.273646972364905e-06, "loss": 0.0964, "step": 159900 }, { "epoch": 3.255165394402036, "grad_norm": 26.020591919120303, "learning_rate": 3.272980119266263e-06, "loss": 0.077, "step": 159910 }, { "epoch": 3.2553689567430024, "grad_norm": 1.311400754909127, "learning_rate": 3.272313301047013e-06, "loss": 0.0648, "step": 159920 }, { "epoch": 3.2555725190839695, "grad_norm": 0.188689961412504, "learning_rate": 3.271646517720619e-06, "loss": 0.001, "step": 159930 }, { "epoch": 3.2557760814249366, "grad_norm": 0.01097354850656373, "learning_rate": 3.270979769300551e-06, "loss": 0.0353, "step": 159940 }, { "epoch": 3.255979643765903, "grad_norm": 0.02281847480604777, "learning_rate": 3.2703130558002736e-06, "loss": 0.0956, "step": 159950 }, { "epoch": 3.2561832061068703, "grad_norm": 0.009291547902878892, "learning_rate": 3.269646377233251e-06, "loss": 0.123, "step": 159960 }, { "epoch": 3.256386768447837, "grad_norm": 0.10549505982445742, "learning_rate": 3.268979733612949e-06, "loss": 0.0121, "step": 159970 }, { "epoch": 3.256590330788804, "grad_norm": 0.00040575070235056017, "learning_rate": 3.2683131249528323e-06, "loss": 0.006, "step": 159980 }, { "epoch": 3.256793893129771, "grad_norm": 0.02342028620332688, "learning_rate": 3.267646551266359e-06, "loss": 0.0217, "step": 159990 }, { "epoch": 3.2569974554707377, "grad_norm": 0.4325111473479527, "learning_rate": 3.2669800125669993e-06, "loss": 0.056, "step": 160000 }, { "epoch": 3.257201017811705, "grad_norm": 0.0038895374010488623, "learning_rate": 3.266313508868207e-06, "loss": 0.0755, "step": 160010 }, { "epoch": 3.257404580152672, "grad_norm": 59.027444420973616, "learning_rate": 3.265647040183447e-06, "loss": 0.028, "step": 160020 }, { "epoch": 3.2576081424936385, "grad_norm": 0.10053840173109743, "learning_rate": 3.26498060652618e-06, "loss": 0.0598, "step": 160030 }, { "epoch": 3.2578117048346056, "grad_norm": 0.06994230703128186, "learning_rate": 3.2643142079098623e-06, "loss": 0.0327, "step": 160040 }, { "epoch": 3.2580152671755727, "grad_norm": 0.18423619761986124, "learning_rate": 3.2636478443479568e-06, "loss": 0.0088, "step": 160050 }, { "epoch": 3.2582188295165393, "grad_norm": 2.820287689151102, "learning_rate": 3.2629815158539192e-06, "loss": 0.0575, "step": 160060 }, { "epoch": 3.2584223918575064, "grad_norm": 10.054852813600332, "learning_rate": 3.262315222441205e-06, "loss": 0.0247, "step": 160070 }, { "epoch": 3.2586259541984735, "grad_norm": 19.997802525531327, "learning_rate": 3.261648964123276e-06, "loss": 0.0168, "step": 160080 }, { "epoch": 3.25882951653944, "grad_norm": 0.25443015348146625, "learning_rate": 3.260982740913583e-06, "loss": 0.0923, "step": 160090 }, { "epoch": 3.259033078880407, "grad_norm": 0.022980239050196884, "learning_rate": 3.260316552825583e-06, "loss": 0.0047, "step": 160100 }, { "epoch": 3.2592366412213742, "grad_norm": 0.019754706612388453, "learning_rate": 3.2596503998727337e-06, "loss": 0.1137, "step": 160110 }, { "epoch": 3.259440203562341, "grad_norm": 0.233849803388256, "learning_rate": 3.2589842820684835e-06, "loss": 0.0719, "step": 160120 }, { "epoch": 3.259643765903308, "grad_norm": 0.07902102710036123, "learning_rate": 3.2583181994262903e-06, "loss": 0.1285, "step": 160130 }, { "epoch": 3.259847328244275, "grad_norm": 0.07468014827850111, "learning_rate": 3.2576521519596023e-06, "loss": 0.0213, "step": 160140 }, { "epoch": 3.2600508905852417, "grad_norm": 0.007112106895888641, "learning_rate": 3.2569861396818735e-06, "loss": 0.136, "step": 160150 }, { "epoch": 3.2602544529262087, "grad_norm": 1.3472503528908917, "learning_rate": 3.2563201626065566e-06, "loss": 0.0053, "step": 160160 }, { "epoch": 3.2604580152671754, "grad_norm": 2.558397849513966, "learning_rate": 3.2556542207470983e-06, "loss": 0.0782, "step": 160170 }, { "epoch": 3.2606615776081425, "grad_norm": 0.6212230865780158, "learning_rate": 3.254988314116948e-06, "loss": 0.0641, "step": 160180 }, { "epoch": 3.2608651399491095, "grad_norm": 33.86879329967132, "learning_rate": 3.2543224427295594e-06, "loss": 0.064, "step": 160190 }, { "epoch": 3.261068702290076, "grad_norm": 0.35062382595076885, "learning_rate": 3.253656606598376e-06, "loss": 0.0402, "step": 160200 }, { "epoch": 3.2612722646310432, "grad_norm": 0.044088246635278085, "learning_rate": 3.2529908057368475e-06, "loss": 0.0946, "step": 160210 }, { "epoch": 3.2614758269720103, "grad_norm": 5.164692743041922, "learning_rate": 3.2523250401584184e-06, "loss": 0.0245, "step": 160220 }, { "epoch": 3.261679389312977, "grad_norm": 0.0048281253155234545, "learning_rate": 3.251659309876537e-06, "loss": 0.0723, "step": 160230 }, { "epoch": 3.261882951653944, "grad_norm": 0.1403048335485033, "learning_rate": 3.2509936149046483e-06, "loss": 0.0611, "step": 160240 }, { "epoch": 3.262086513994911, "grad_norm": 0.4444969099994837, "learning_rate": 3.250327955256194e-06, "loss": 0.0008, "step": 160250 }, { "epoch": 3.2622900763358778, "grad_norm": 14.150775876159782, "learning_rate": 3.2496623309446217e-06, "loss": 0.0585, "step": 160260 }, { "epoch": 3.262493638676845, "grad_norm": 0.3301450019986752, "learning_rate": 3.2489967419833744e-06, "loss": 0.0026, "step": 160270 }, { "epoch": 3.2626972010178115, "grad_norm": 0.0015785636361361837, "learning_rate": 3.2483311883858904e-06, "loss": 0.155, "step": 160280 }, { "epoch": 3.2629007633587785, "grad_norm": 0.022189672575122475, "learning_rate": 3.2476656701656174e-06, "loss": 0.03, "step": 160290 }, { "epoch": 3.2631043256997456, "grad_norm": 0.00949507849896804, "learning_rate": 3.2470001873359904e-06, "loss": 0.0663, "step": 160300 }, { "epoch": 3.2633078880407123, "grad_norm": 0.016598417589850808, "learning_rate": 3.2463347399104538e-06, "loss": 0.0868, "step": 160310 }, { "epoch": 3.2635114503816793, "grad_norm": 7.586578939333508, "learning_rate": 3.2456693279024466e-06, "loss": 0.0898, "step": 160320 }, { "epoch": 3.2637150127226464, "grad_norm": 0.031183736888656628, "learning_rate": 3.245003951325404e-06, "loss": 0.0326, "step": 160330 }, { "epoch": 3.263918575063613, "grad_norm": 0.3904167863667872, "learning_rate": 3.2443386101927687e-06, "loss": 0.0435, "step": 160340 }, { "epoch": 3.26412213740458, "grad_norm": 0.19159964893640197, "learning_rate": 3.2436733045179773e-06, "loss": 0.0466, "step": 160350 }, { "epoch": 3.264325699745547, "grad_norm": 11.611077240491163, "learning_rate": 3.243008034314464e-06, "loss": 0.0751, "step": 160360 }, { "epoch": 3.264529262086514, "grad_norm": 0.035681192052531056, "learning_rate": 3.242342799595668e-06, "loss": 0.0344, "step": 160370 }, { "epoch": 3.264732824427481, "grad_norm": 0.11584607823996708, "learning_rate": 3.241677600375023e-06, "loss": 0.001, "step": 160380 }, { "epoch": 3.264936386768448, "grad_norm": 0.02820488941501847, "learning_rate": 3.241012436665961e-06, "loss": 0.0436, "step": 160390 }, { "epoch": 3.2651399491094146, "grad_norm": 0.036408992711261266, "learning_rate": 3.2403473084819213e-06, "loss": 0.0334, "step": 160400 }, { "epoch": 3.2653435114503817, "grad_norm": 0.7492895247019403, "learning_rate": 3.239682215836333e-06, "loss": 0.0275, "step": 160410 }, { "epoch": 3.265547073791349, "grad_norm": 26.24820855129756, "learning_rate": 3.239017158742631e-06, "loss": 0.0606, "step": 160420 }, { "epoch": 3.2657506361323154, "grad_norm": 0.06036832842036916, "learning_rate": 3.238352137214244e-06, "loss": 0.0013, "step": 160430 }, { "epoch": 3.2659541984732825, "grad_norm": 0.020638187293049372, "learning_rate": 3.2376871512646034e-06, "loss": 0.0003, "step": 160440 }, { "epoch": 3.2661577608142496, "grad_norm": 0.10634519713839305, "learning_rate": 3.2370222009071432e-06, "loss": 0.126, "step": 160450 }, { "epoch": 3.266361323155216, "grad_norm": 0.004689528720572001, "learning_rate": 3.2363572861552894e-06, "loss": 0.079, "step": 160460 }, { "epoch": 3.2665648854961833, "grad_norm": 23.12110572069201, "learning_rate": 3.2356924070224704e-06, "loss": 0.0409, "step": 160470 }, { "epoch": 3.2667684478371504, "grad_norm": 0.003829157826002939, "learning_rate": 3.235027563522118e-06, "loss": 0.0716, "step": 160480 }, { "epoch": 3.266972010178117, "grad_norm": 0.0885993218878268, "learning_rate": 3.234362755667656e-06, "loss": 0.0667, "step": 160490 }, { "epoch": 3.267175572519084, "grad_norm": 0.09213674727713285, "learning_rate": 3.2336979834725136e-06, "loss": 0.0008, "step": 160500 }, { "epoch": 3.2673791348600507, "grad_norm": 0.1274703406110042, "learning_rate": 3.2330332469501125e-06, "loss": 0.0808, "step": 160510 }, { "epoch": 3.267582697201018, "grad_norm": 0.26676084194984273, "learning_rate": 3.2323685461138825e-06, "loss": 0.1015, "step": 160520 }, { "epoch": 3.267786259541985, "grad_norm": 0.07145803594165179, "learning_rate": 3.2317038809772473e-06, "loss": 0.0258, "step": 160530 }, { "epoch": 3.2679898218829515, "grad_norm": 5.0862826406834705, "learning_rate": 3.231039251553628e-06, "loss": 0.0039, "step": 160540 }, { "epoch": 3.2681933842239186, "grad_norm": 0.06911831981706942, "learning_rate": 3.2303746578564497e-06, "loss": 0.0414, "step": 160550 }, { "epoch": 3.2683969465648857, "grad_norm": 0.011078111449008167, "learning_rate": 3.2297100998991366e-06, "loss": 0.0148, "step": 160560 }, { "epoch": 3.2686005089058523, "grad_norm": 0.06466738707476788, "learning_rate": 3.229045577695106e-06, "loss": 0.0816, "step": 160570 }, { "epoch": 3.2688040712468194, "grad_norm": 2.2130397015040684, "learning_rate": 3.228381091257783e-06, "loss": 0.0459, "step": 160580 }, { "epoch": 3.269007633587786, "grad_norm": 7.500446873265226, "learning_rate": 3.2277166406005826e-06, "loss": 0.152, "step": 160590 }, { "epoch": 3.269211195928753, "grad_norm": 19.968506994480926, "learning_rate": 3.227052225736929e-06, "loss": 0.0258, "step": 160600 }, { "epoch": 3.26941475826972, "grad_norm": 2.5800566751563023, "learning_rate": 3.22638784668024e-06, "loss": 0.0321, "step": 160610 }, { "epoch": 3.269618320610687, "grad_norm": 0.01168933493844521, "learning_rate": 3.225723503443931e-06, "loss": 0.0236, "step": 160620 }, { "epoch": 3.269821882951654, "grad_norm": 0.03616913593133769, "learning_rate": 3.2250591960414225e-06, "loss": 0.0505, "step": 160630 }, { "epoch": 3.270025445292621, "grad_norm": 13.232789226611308, "learning_rate": 3.2243949244861294e-06, "loss": 0.082, "step": 160640 }, { "epoch": 3.2702290076335876, "grad_norm": 14.307966391591005, "learning_rate": 3.2237306887914664e-06, "loss": 0.0091, "step": 160650 }, { "epoch": 3.2704325699745547, "grad_norm": 0.006664202357735909, "learning_rate": 3.2230664889708527e-06, "loss": 0.0988, "step": 160660 }, { "epoch": 3.2706361323155217, "grad_norm": 0.023531462558805706, "learning_rate": 3.2224023250376978e-06, "loss": 0.0251, "step": 160670 }, { "epoch": 3.2708396946564884, "grad_norm": 0.8036001654852462, "learning_rate": 3.221738197005417e-06, "loss": 0.01, "step": 160680 }, { "epoch": 3.2710432569974555, "grad_norm": 0.14648543344262552, "learning_rate": 3.2210741048874263e-06, "loss": 0.1265, "step": 160690 }, { "epoch": 3.2712468193384225, "grad_norm": 0.17722609383095778, "learning_rate": 3.2204100486971337e-06, "loss": 0.0831, "step": 160700 }, { "epoch": 3.271450381679389, "grad_norm": 16.534838450635686, "learning_rate": 3.2197460284479533e-06, "loss": 0.0585, "step": 160710 }, { "epoch": 3.2716539440203563, "grad_norm": 0.004672693205029784, "learning_rate": 3.2190820441532934e-06, "loss": 0.0006, "step": 160720 }, { "epoch": 3.2718575063613233, "grad_norm": 0.04597167983819571, "learning_rate": 3.218418095826565e-06, "loss": 0.075, "step": 160730 }, { "epoch": 3.27206106870229, "grad_norm": 1.1860573974646653, "learning_rate": 3.2177541834811794e-06, "loss": 0.0743, "step": 160740 }, { "epoch": 3.272264631043257, "grad_norm": 0.17052040824216938, "learning_rate": 3.2170903071305428e-06, "loss": 0.0024, "step": 160750 }, { "epoch": 3.272468193384224, "grad_norm": 9.658613347841767, "learning_rate": 3.216426466788063e-06, "loss": 0.0387, "step": 160760 }, { "epoch": 3.2726717557251908, "grad_norm": 0.07451522666529485, "learning_rate": 3.215762662467149e-06, "loss": 0.0445, "step": 160770 }, { "epoch": 3.272875318066158, "grad_norm": 0.04689039009240682, "learning_rate": 3.215098894181206e-06, "loss": 0.1152, "step": 160780 }, { "epoch": 3.273078880407125, "grad_norm": 1.4673930728615214, "learning_rate": 3.214435161943641e-06, "loss": 0.0811, "step": 160790 }, { "epoch": 3.2732824427480915, "grad_norm": 7.66478076946602, "learning_rate": 3.2137714657678555e-06, "loss": 0.0915, "step": 160800 }, { "epoch": 3.2734860050890586, "grad_norm": 0.032077833647364225, "learning_rate": 3.213107805667256e-06, "loss": 0.0006, "step": 160810 }, { "epoch": 3.2736895674300253, "grad_norm": 18.750520710806, "learning_rate": 3.2124441816552478e-06, "loss": 0.0291, "step": 160820 }, { "epoch": 3.2738931297709923, "grad_norm": 0.028369738781367667, "learning_rate": 3.2117805937452294e-06, "loss": 0.0637, "step": 160830 }, { "epoch": 3.2740966921119594, "grad_norm": 0.06457099616599785, "learning_rate": 3.211117041950606e-06, "loss": 0.1012, "step": 160840 }, { "epoch": 3.274300254452926, "grad_norm": 0.1755574148253736, "learning_rate": 3.210453526284779e-06, "loss": 0.0704, "step": 160850 }, { "epoch": 3.274503816793893, "grad_norm": 0.08246611274056692, "learning_rate": 3.2097900467611475e-06, "loss": 0.0375, "step": 160860 }, { "epoch": 3.27470737913486, "grad_norm": 4.761252872715834, "learning_rate": 3.209126603393112e-06, "loss": 0.0115, "step": 160870 }, { "epoch": 3.274910941475827, "grad_norm": 0.02294238709356663, "learning_rate": 3.2084631961940703e-06, "loss": 0.046, "step": 160880 }, { "epoch": 3.275114503816794, "grad_norm": 0.1876554044981234, "learning_rate": 3.207799825177422e-06, "loss": 0.0003, "step": 160890 }, { "epoch": 3.275318066157761, "grad_norm": 0.06273373863879211, "learning_rate": 3.207136490356566e-06, "loss": 0.0173, "step": 160900 }, { "epoch": 3.2755216284987276, "grad_norm": 0.014825485093396977, "learning_rate": 3.2064731917448956e-06, "loss": 0.0412, "step": 160910 }, { "epoch": 3.2757251908396947, "grad_norm": 1.2908288395637062, "learning_rate": 3.20580992935581e-06, "loss": 0.1834, "step": 160920 }, { "epoch": 3.2759287531806613, "grad_norm": 13.795809260776037, "learning_rate": 3.205146703202705e-06, "loss": 0.0303, "step": 160930 }, { "epoch": 3.2761323155216284, "grad_norm": 0.319255368276416, "learning_rate": 3.204483513298972e-06, "loss": 0.0351, "step": 160940 }, { "epoch": 3.2763358778625955, "grad_norm": 0.1424552707192132, "learning_rate": 3.203820359658009e-06, "loss": 0.0729, "step": 160950 }, { "epoch": 3.276539440203562, "grad_norm": 0.015457927212038142, "learning_rate": 3.2031572422932066e-06, "loss": 0.0519, "step": 160960 }, { "epoch": 3.276743002544529, "grad_norm": 0.007182978576963117, "learning_rate": 3.2024941612179565e-06, "loss": 0.1051, "step": 160970 }, { "epoch": 3.2769465648854963, "grad_norm": 0.007197789530803395, "learning_rate": 3.2018311164456556e-06, "loss": 0.0294, "step": 160980 }, { "epoch": 3.277150127226463, "grad_norm": 0.07743658165935824, "learning_rate": 3.2011681079896885e-06, "loss": 0.0008, "step": 160990 }, { "epoch": 3.27735368956743, "grad_norm": 0.02605894769853916, "learning_rate": 3.2005051358634486e-06, "loss": 0.0016, "step": 161000 }, { "epoch": 3.277557251908397, "grad_norm": 0.04747429677106392, "learning_rate": 3.1998422000803263e-06, "loss": 0.0644, "step": 161010 }, { "epoch": 3.2777608142493637, "grad_norm": 5.83284210479787, "learning_rate": 3.1991793006537075e-06, "loss": 0.0918, "step": 161020 }, { "epoch": 3.277964376590331, "grad_norm": 13.77939971459705, "learning_rate": 3.198516437596984e-06, "loss": 0.0504, "step": 161030 }, { "epoch": 3.278167938931298, "grad_norm": 0.011621726142400986, "learning_rate": 3.1978536109235403e-06, "loss": 0.0343, "step": 161040 }, { "epoch": 3.2783715012722645, "grad_norm": 0.35779669916383394, "learning_rate": 3.197190820646764e-06, "loss": 0.0437, "step": 161050 }, { "epoch": 3.2785750636132316, "grad_norm": 0.28858222487663665, "learning_rate": 3.1965280667800417e-06, "loss": 0.0313, "step": 161060 }, { "epoch": 3.2787786259541987, "grad_norm": 0.059956224279195176, "learning_rate": 3.1958653493367576e-06, "loss": 0.0431, "step": 161070 }, { "epoch": 3.2789821882951653, "grad_norm": 0.5894340849199852, "learning_rate": 3.195202668330298e-06, "loss": 0.0646, "step": 161080 }, { "epoch": 3.2791857506361324, "grad_norm": 0.09337396235573324, "learning_rate": 3.194540023774042e-06, "loss": 0.0696, "step": 161090 }, { "epoch": 3.2793893129770995, "grad_norm": 0.01814112540403612, "learning_rate": 3.193877415681378e-06, "loss": 0.0393, "step": 161100 }, { "epoch": 3.279592875318066, "grad_norm": 1.2816910855707129, "learning_rate": 3.1932148440656855e-06, "loss": 0.0584, "step": 161110 }, { "epoch": 3.279796437659033, "grad_norm": 0.0987396070255578, "learning_rate": 3.192552308940345e-06, "loss": 0.0016, "step": 161120 }, { "epoch": 3.2800000000000002, "grad_norm": 0.13514210845391847, "learning_rate": 3.1918898103187397e-06, "loss": 0.0046, "step": 161130 }, { "epoch": 3.280203562340967, "grad_norm": 0.0646215675654434, "learning_rate": 3.191227348214249e-06, "loss": 0.0332, "step": 161140 }, { "epoch": 3.280407124681934, "grad_norm": 10.086057284605188, "learning_rate": 3.1905649226402515e-06, "loss": 0.0777, "step": 161150 }, { "epoch": 3.2806106870229006, "grad_norm": 7.487395765883465, "learning_rate": 3.1899025336101262e-06, "loss": 0.1193, "step": 161160 }, { "epoch": 3.2808142493638677, "grad_norm": 0.10535043896149149, "learning_rate": 3.189240181137249e-06, "loss": 0.1039, "step": 161170 }, { "epoch": 3.2810178117048348, "grad_norm": 0.026021566794905162, "learning_rate": 3.188577865234999e-06, "loss": 0.1009, "step": 161180 }, { "epoch": 3.2812213740458014, "grad_norm": 0.03439344079822577, "learning_rate": 3.187915585916753e-06, "loss": 0.0006, "step": 161190 }, { "epoch": 3.2814249363867685, "grad_norm": 0.08689617973540682, "learning_rate": 3.187253343195885e-06, "loss": 0.0573, "step": 161200 }, { "epoch": 3.2816284987277355, "grad_norm": 0.16873442826151153, "learning_rate": 3.1865911370857706e-06, "loss": 0.0045, "step": 161210 }, { "epoch": 3.281832061068702, "grad_norm": 0.11424607917324452, "learning_rate": 3.185928967599785e-06, "loss": 0.0757, "step": 161220 }, { "epoch": 3.2820356234096693, "grad_norm": 0.39753383119957697, "learning_rate": 3.1852668347512983e-06, "loss": 0.0062, "step": 161230 }, { "epoch": 3.282239185750636, "grad_norm": 0.05317213770897673, "learning_rate": 3.1846047385536872e-06, "loss": 0.0785, "step": 161240 }, { "epoch": 3.282442748091603, "grad_norm": 0.06698923329418133, "learning_rate": 3.183942679020321e-06, "loss": 0.0241, "step": 161250 }, { "epoch": 3.28264631043257, "grad_norm": 0.05498742752799634, "learning_rate": 3.1832806561645702e-06, "loss": 0.1165, "step": 161260 }, { "epoch": 3.2828498727735367, "grad_norm": 0.007836897777646776, "learning_rate": 3.1826186699998094e-06, "loss": 0.0351, "step": 161270 }, { "epoch": 3.2830534351145038, "grad_norm": 0.1150223360956882, "learning_rate": 3.1819567205394023e-06, "loss": 0.0018, "step": 161280 }, { "epoch": 3.283256997455471, "grad_norm": 0.6737810945620654, "learning_rate": 3.181294807796722e-06, "loss": 0.058, "step": 161290 }, { "epoch": 3.2834605597964375, "grad_norm": 12.145830861682828, "learning_rate": 3.1806329317851362e-06, "loss": 0.1381, "step": 161300 }, { "epoch": 3.2836641221374046, "grad_norm": 0.035830141170943616, "learning_rate": 3.1799710925180104e-06, "loss": 0.083, "step": 161310 }, { "epoch": 3.2838676844783716, "grad_norm": 0.031717579528204885, "learning_rate": 3.179309290008714e-06, "loss": 0.0859, "step": 161320 }, { "epoch": 3.2840712468193383, "grad_norm": 0.016639220214252293, "learning_rate": 3.1786475242706105e-06, "loss": 0.0017, "step": 161330 }, { "epoch": 3.2842748091603053, "grad_norm": 1.2340581482403574, "learning_rate": 3.1779857953170658e-06, "loss": 0.0542, "step": 161340 }, { "epoch": 3.2844783715012724, "grad_norm": 17.341271641190897, "learning_rate": 3.1773241031614453e-06, "loss": 0.062, "step": 161350 }, { "epoch": 3.284681933842239, "grad_norm": 0.2163861704202559, "learning_rate": 3.1766624478171126e-06, "loss": 0.0016, "step": 161360 }, { "epoch": 3.284885496183206, "grad_norm": 0.003259213098769761, "learning_rate": 3.1760008292974285e-06, "loss": 0.0385, "step": 161370 }, { "epoch": 3.285089058524173, "grad_norm": 0.029659890499897746, "learning_rate": 3.175339247615759e-06, "loss": 0.0032, "step": 161380 }, { "epoch": 3.28529262086514, "grad_norm": 0.018139846543766595, "learning_rate": 3.1746777027854626e-06, "loss": 0.0098, "step": 161390 }, { "epoch": 3.285496183206107, "grad_norm": 0.23972506917401298, "learning_rate": 3.174016194819902e-06, "loss": 0.0994, "step": 161400 }, { "epoch": 3.285699745547074, "grad_norm": 0.052244290396856435, "learning_rate": 3.173354723732435e-06, "loss": 0.0006, "step": 161410 }, { "epoch": 3.2859033078880406, "grad_norm": 0.026288675108643477, "learning_rate": 3.1726932895364216e-06, "loss": 0.0492, "step": 161420 }, { "epoch": 3.2861068702290077, "grad_norm": 0.03261724320957368, "learning_rate": 3.172031892245222e-06, "loss": 0.0526, "step": 161430 }, { "epoch": 3.286310432569975, "grad_norm": 3.7180867847212062, "learning_rate": 3.171370531872192e-06, "loss": 0.0341, "step": 161440 }, { "epoch": 3.2865139949109414, "grad_norm": 5.925169959354084, "learning_rate": 3.170709208430691e-06, "loss": 0.0595, "step": 161450 }, { "epoch": 3.2867175572519085, "grad_norm": 0.0771093938850925, "learning_rate": 3.170047921934071e-06, "loss": 0.1606, "step": 161460 }, { "epoch": 3.286921119592875, "grad_norm": 10.196714482992805, "learning_rate": 3.169386672395691e-06, "loss": 0.0649, "step": 161470 }, { "epoch": 3.287124681933842, "grad_norm": 0.6330575551178389, "learning_rate": 3.1687254598289064e-06, "loss": 0.049, "step": 161480 }, { "epoch": 3.2873282442748093, "grad_norm": 26.719223014468703, "learning_rate": 3.168064284247068e-06, "loss": 0.1028, "step": 161490 }, { "epoch": 3.287531806615776, "grad_norm": 0.2625660566239837, "learning_rate": 3.167403145663531e-06, "loss": 0.1428, "step": 161500 }, { "epoch": 3.287735368956743, "grad_norm": 9.5149719083268, "learning_rate": 3.1667420440916495e-06, "loss": 0.055, "step": 161510 }, { "epoch": 3.28793893129771, "grad_norm": 0.004865843878577394, "learning_rate": 3.1660809795447724e-06, "loss": 0.0317, "step": 161520 }, { "epoch": 3.2881424936386767, "grad_norm": 0.1762519817041262, "learning_rate": 3.1654199520362528e-06, "loss": 0.0012, "step": 161530 }, { "epoch": 3.288346055979644, "grad_norm": 0.014332126016021058, "learning_rate": 3.1647589615794394e-06, "loss": 0.1081, "step": 161540 }, { "epoch": 3.288549618320611, "grad_norm": 0.02436535130511365, "learning_rate": 3.1640980081876825e-06, "loss": 0.0037, "step": 161550 }, { "epoch": 3.2887531806615775, "grad_norm": 0.06176355391447999, "learning_rate": 3.163437091874333e-06, "loss": 0.0395, "step": 161560 }, { "epoch": 3.2889567430025446, "grad_norm": 0.2035349793028397, "learning_rate": 3.1627762126527335e-06, "loss": 0.0078, "step": 161570 }, { "epoch": 3.2891603053435112, "grad_norm": 0.0486036650089424, "learning_rate": 3.162115370536236e-06, "loss": 0.0255, "step": 161580 }, { "epoch": 3.2893638676844783, "grad_norm": 0.046198391112870794, "learning_rate": 3.1614545655381873e-06, "loss": 0.0258, "step": 161590 }, { "epoch": 3.2895674300254454, "grad_norm": 0.010748267899821761, "learning_rate": 3.1607937976719294e-06, "loss": 0.1015, "step": 161600 }, { "epoch": 3.289770992366412, "grad_norm": 1.3913010478133947, "learning_rate": 3.1601330669508118e-06, "loss": 0.039, "step": 161610 }, { "epoch": 3.289974554707379, "grad_norm": 0.047723786272769936, "learning_rate": 3.1594723733881753e-06, "loss": 0.0475, "step": 161620 }, { "epoch": 3.290178117048346, "grad_norm": 6.716124854685629, "learning_rate": 3.1588117169973642e-06, "loss": 0.0591, "step": 161630 }, { "epoch": 3.290381679389313, "grad_norm": 0.01275864029092103, "learning_rate": 3.1581510977917234e-06, "loss": 0.1367, "step": 161640 }, { "epoch": 3.29058524173028, "grad_norm": 0.053787193479599905, "learning_rate": 3.1574905157845925e-06, "loss": 0.0068, "step": 161650 }, { "epoch": 3.290788804071247, "grad_norm": 0.10590886618090348, "learning_rate": 3.1568299709893134e-06, "loss": 0.0447, "step": 161660 }, { "epoch": 3.2909923664122136, "grad_norm": 0.07926115437129173, "learning_rate": 3.1561694634192286e-06, "loss": 0.0844, "step": 161670 }, { "epoch": 3.2911959287531807, "grad_norm": 0.03713643889272306, "learning_rate": 3.155508993087675e-06, "loss": 0.0413, "step": 161680 }, { "epoch": 3.2913994910941478, "grad_norm": 13.578540685472436, "learning_rate": 3.154848560007995e-06, "loss": 0.0829, "step": 161690 }, { "epoch": 3.2916030534351144, "grad_norm": 0.04169527521482526, "learning_rate": 3.154188164193523e-06, "loss": 0.0565, "step": 161700 }, { "epoch": 3.2918066157760815, "grad_norm": 0.04711670621213415, "learning_rate": 3.153527805657598e-06, "loss": 0.0611, "step": 161710 }, { "epoch": 3.2920101781170485, "grad_norm": 0.19659949996074824, "learning_rate": 3.152867484413559e-06, "loss": 0.0452, "step": 161720 }, { "epoch": 3.292213740458015, "grad_norm": 6.706606768390649, "learning_rate": 3.152207200474738e-06, "loss": 0.0732, "step": 161730 }, { "epoch": 3.2924173027989823, "grad_norm": 0.17866463871121402, "learning_rate": 3.151546953854473e-06, "loss": 0.0474, "step": 161740 }, { "epoch": 3.2926208651399493, "grad_norm": 0.04682453000794142, "learning_rate": 3.1508867445660996e-06, "loss": 0.0625, "step": 161750 }, { "epoch": 3.292824427480916, "grad_norm": 0.06779395354723836, "learning_rate": 3.1502265726229493e-06, "loss": 0.1372, "step": 161760 }, { "epoch": 3.293027989821883, "grad_norm": 0.2276256482544741, "learning_rate": 3.149566438038356e-06, "loss": 0.1111, "step": 161770 }, { "epoch": 3.2932315521628497, "grad_norm": 0.7671095222366865, "learning_rate": 3.148906340825651e-06, "loss": 0.0747, "step": 161780 }, { "epoch": 3.2934351145038168, "grad_norm": 0.019471871382282797, "learning_rate": 3.1482462809981673e-06, "loss": 0.0789, "step": 161790 }, { "epoch": 3.293638676844784, "grad_norm": 0.027514077897125195, "learning_rate": 3.147586258569236e-06, "loss": 0.0599, "step": 161800 }, { "epoch": 3.2938422391857505, "grad_norm": 0.13911832384251574, "learning_rate": 3.146926273552183e-06, "loss": 0.1218, "step": 161810 }, { "epoch": 3.2940458015267176, "grad_norm": 0.01370889109268573, "learning_rate": 3.1462663259603456e-06, "loss": 0.0035, "step": 161820 }, { "epoch": 3.2942493638676846, "grad_norm": 0.16288315074095244, "learning_rate": 3.1456064158070425e-06, "loss": 0.01, "step": 161830 }, { "epoch": 3.2944529262086513, "grad_norm": 0.09635940786817337, "learning_rate": 3.144946543105608e-06, "loss": 0.0031, "step": 161840 }, { "epoch": 3.2946564885496183, "grad_norm": 11.544044447009592, "learning_rate": 3.1442867078693682e-06, "loss": 0.1105, "step": 161850 }, { "epoch": 3.2948600508905854, "grad_norm": 0.009809766562434496, "learning_rate": 3.143626910111647e-06, "loss": 0.0913, "step": 161860 }, { "epoch": 3.295063613231552, "grad_norm": 0.20823804360109932, "learning_rate": 3.1429671498457725e-06, "loss": 0.0887, "step": 161870 }, { "epoch": 3.295267175572519, "grad_norm": 0.03113274255793945, "learning_rate": 3.1423074270850683e-06, "loss": 0.0008, "step": 161880 }, { "epoch": 3.2954707379134858, "grad_norm": 0.03245571711264147, "learning_rate": 3.141647741842857e-06, "loss": 0.0497, "step": 161890 }, { "epoch": 3.295674300254453, "grad_norm": 0.49888046294276933, "learning_rate": 3.1409880941324644e-06, "loss": 0.0757, "step": 161900 }, { "epoch": 3.29587786259542, "grad_norm": 1.1700239195223614, "learning_rate": 3.1403284839672116e-06, "loss": 0.0921, "step": 161910 }, { "epoch": 3.2960814249363866, "grad_norm": 0.1209334345131191, "learning_rate": 3.139668911360418e-06, "loss": 0.0677, "step": 161920 }, { "epoch": 3.2962849872773536, "grad_norm": 0.025584332142059765, "learning_rate": 3.1390093763254102e-06, "loss": 0.0307, "step": 161930 }, { "epoch": 3.2964885496183207, "grad_norm": 0.08180365461998859, "learning_rate": 3.1383498788755034e-06, "loss": 0.0542, "step": 161940 }, { "epoch": 3.2966921119592874, "grad_norm": 8.683641276186563, "learning_rate": 3.1376904190240177e-06, "loss": 0.0506, "step": 161950 }, { "epoch": 3.2968956743002544, "grad_norm": 1.3267895290545664, "learning_rate": 3.1370309967842754e-06, "loss": 0.0311, "step": 161960 }, { "epoch": 3.2970992366412215, "grad_norm": 0.01463242840142942, "learning_rate": 3.136371612169589e-06, "loss": 0.045, "step": 161970 }, { "epoch": 3.297302798982188, "grad_norm": 3.194883808963842, "learning_rate": 3.1357122651932796e-06, "loss": 0.0043, "step": 161980 }, { "epoch": 3.297506361323155, "grad_norm": 14.005983675496976, "learning_rate": 3.1350529558686615e-06, "loss": 0.0731, "step": 161990 }, { "epoch": 3.2977099236641223, "grad_norm": 0.04336212491026854, "learning_rate": 3.1343936842090507e-06, "loss": 0.0071, "step": 162000 }, { "epoch": 3.297913486005089, "grad_norm": 1.2856681680230126, "learning_rate": 3.1337344502277633e-06, "loss": 0.1192, "step": 162010 }, { "epoch": 3.298117048346056, "grad_norm": 5.0027920450437255, "learning_rate": 3.1330752539381115e-06, "loss": 0.0301, "step": 162020 }, { "epoch": 3.298320610687023, "grad_norm": 1.2904218559059013, "learning_rate": 3.1324160953534087e-06, "loss": 0.107, "step": 162030 }, { "epoch": 3.2985241730279897, "grad_norm": 0.11128697401686433, "learning_rate": 3.13175697448697e-06, "loss": 0.0158, "step": 162040 }, { "epoch": 3.298727735368957, "grad_norm": 0.09064192255876735, "learning_rate": 3.1310978913521057e-06, "loss": 0.049, "step": 162050 }, { "epoch": 3.298931297709924, "grad_norm": 0.15415975245405011, "learning_rate": 3.1304388459621264e-06, "loss": 0.0251, "step": 162060 }, { "epoch": 3.2991348600508905, "grad_norm": 2.5804885796606944, "learning_rate": 3.129779838330341e-06, "loss": 0.093, "step": 162070 }, { "epoch": 3.2993384223918576, "grad_norm": 12.815201844637555, "learning_rate": 3.1291208684700615e-06, "loss": 0.0853, "step": 162080 }, { "epoch": 3.2995419847328247, "grad_norm": 0.016660762992410972, "learning_rate": 3.128461936394597e-06, "loss": 0.0285, "step": 162090 }, { "epoch": 3.2997455470737913, "grad_norm": 0.07160468654829558, "learning_rate": 3.127803042117253e-06, "loss": 0.0828, "step": 162100 }, { "epoch": 3.2999491094147584, "grad_norm": 0.03957855382184336, "learning_rate": 3.127144185651339e-06, "loss": 0.0087, "step": 162110 }, { "epoch": 3.300152671755725, "grad_norm": 0.06667810992620034, "learning_rate": 3.126485367010158e-06, "loss": 0.0315, "step": 162120 }, { "epoch": 3.300356234096692, "grad_norm": 13.298255121301294, "learning_rate": 3.12582658620702e-06, "loss": 0.113, "step": 162130 }, { "epoch": 3.300559796437659, "grad_norm": 0.034271191582477385, "learning_rate": 3.125167843255228e-06, "loss": 0.059, "step": 162140 }, { "epoch": 3.300763358778626, "grad_norm": 0.03649000110180219, "learning_rate": 3.1245091381680846e-06, "loss": 0.0079, "step": 162150 }, { "epoch": 3.300966921119593, "grad_norm": 0.03731702783797888, "learning_rate": 3.123850470958896e-06, "loss": 0.0191, "step": 162160 }, { "epoch": 3.30117048346056, "grad_norm": 10.335452341420414, "learning_rate": 3.1231918416409646e-06, "loss": 0.0392, "step": 162170 }, { "epoch": 3.3013740458015266, "grad_norm": 0.30748323913779546, "learning_rate": 3.1225332502275883e-06, "loss": 0.0782, "step": 162180 }, { "epoch": 3.3015776081424937, "grad_norm": 2.084363449140999, "learning_rate": 3.1218746967320747e-06, "loss": 0.0037, "step": 162190 }, { "epoch": 3.3017811704834603, "grad_norm": 0.39443809638991767, "learning_rate": 3.1212161811677185e-06, "loss": 0.0406, "step": 162200 }, { "epoch": 3.3019847328244274, "grad_norm": 0.044349880364178654, "learning_rate": 3.120557703547821e-06, "loss": 0.0688, "step": 162210 }, { "epoch": 3.3021882951653945, "grad_norm": 0.034403574570777216, "learning_rate": 3.119899263885683e-06, "loss": 0.0203, "step": 162220 }, { "epoch": 3.302391857506361, "grad_norm": 0.693118901272816, "learning_rate": 3.119240862194599e-06, "loss": 0.0314, "step": 162230 }, { "epoch": 3.302595419847328, "grad_norm": 0.04083816208827464, "learning_rate": 3.118582498487869e-06, "loss": 0.0344, "step": 162240 }, { "epoch": 3.3027989821882953, "grad_norm": 3.159716791078279, "learning_rate": 3.1179241727787903e-06, "loss": 0.0662, "step": 162250 }, { "epoch": 3.303002544529262, "grad_norm": 0.017815923698202688, "learning_rate": 3.117265885080655e-06, "loss": 0.0888, "step": 162260 }, { "epoch": 3.303206106870229, "grad_norm": 0.2863530558164325, "learning_rate": 3.1166076354067615e-06, "loss": 0.0029, "step": 162270 }, { "epoch": 3.303409669211196, "grad_norm": 0.04591213024975421, "learning_rate": 3.1159494237704014e-06, "loss": 0.1067, "step": 162280 }, { "epoch": 3.3036132315521627, "grad_norm": 0.1519024761549044, "learning_rate": 3.1152912501848686e-06, "loss": 0.0619, "step": 162290 }, { "epoch": 3.3038167938931298, "grad_norm": 0.07350221778458843, "learning_rate": 3.114633114663459e-06, "loss": 0.0473, "step": 162300 }, { "epoch": 3.304020356234097, "grad_norm": 12.275695512116345, "learning_rate": 3.1139750172194604e-06, "loss": 0.1141, "step": 162310 }, { "epoch": 3.3042239185750635, "grad_norm": 0.02965840176010375, "learning_rate": 3.1133169578661648e-06, "loss": 0.0628, "step": 162320 }, { "epoch": 3.3044274809160306, "grad_norm": 0.00730438894295736, "learning_rate": 3.1126589366168653e-06, "loss": 0.0706, "step": 162330 }, { "epoch": 3.3046310432569976, "grad_norm": 33.73539313946633, "learning_rate": 3.1120009534848484e-06, "loss": 0.0273, "step": 162340 }, { "epoch": 3.3048346055979643, "grad_norm": 5.491277920027862, "learning_rate": 3.1113430084834046e-06, "loss": 0.103, "step": 162350 }, { "epoch": 3.3050381679389313, "grad_norm": 0.014244435035747377, "learning_rate": 3.11068510162582e-06, "loss": 0.0009, "step": 162360 }, { "epoch": 3.3052417302798984, "grad_norm": 0.2446784435301124, "learning_rate": 3.1100272329253835e-06, "loss": 0.0424, "step": 162370 }, { "epoch": 3.305445292620865, "grad_norm": 4.785426606351575, "learning_rate": 3.1093694023953823e-06, "loss": 0.0207, "step": 162380 }, { "epoch": 3.305648854961832, "grad_norm": 0.09268956280360002, "learning_rate": 3.1087116100491003e-06, "loss": 0.007, "step": 162390 }, { "epoch": 3.305852417302799, "grad_norm": 11.871970034544432, "learning_rate": 3.108053855899823e-06, "loss": 0.1113, "step": 162400 }, { "epoch": 3.306055979643766, "grad_norm": 0.04830719434238026, "learning_rate": 3.107396139960836e-06, "loss": 0.0629, "step": 162410 }, { "epoch": 3.306259541984733, "grad_norm": 5.434713535220498, "learning_rate": 3.1067384622454212e-06, "loss": 0.0842, "step": 162420 }, { "epoch": 3.3064631043256996, "grad_norm": 0.04241394906423453, "learning_rate": 3.106080822766862e-06, "loss": 0.1175, "step": 162430 }, { "epoch": 3.3066666666666666, "grad_norm": 0.1475834043551697, "learning_rate": 3.1054232215384383e-06, "loss": 0.0011, "step": 162440 }, { "epoch": 3.3068702290076337, "grad_norm": 0.09442671379346429, "learning_rate": 3.1047656585734337e-06, "loss": 0.1146, "step": 162450 }, { "epoch": 3.3070737913486004, "grad_norm": 0.04078171237852046, "learning_rate": 3.1041081338851285e-06, "loss": 0.1524, "step": 162460 }, { "epoch": 3.3072773536895674, "grad_norm": 25.96447139896479, "learning_rate": 3.1034506474868e-06, "loss": 0.0519, "step": 162470 }, { "epoch": 3.3074809160305345, "grad_norm": 9.328300043696437, "learning_rate": 3.1027931993917305e-06, "loss": 0.0605, "step": 162480 }, { "epoch": 3.307684478371501, "grad_norm": 0.13257204394291172, "learning_rate": 3.1021357896131943e-06, "loss": 0.0196, "step": 162490 }, { "epoch": 3.3078880407124682, "grad_norm": 0.07385743339848141, "learning_rate": 3.10147841816447e-06, "loss": 0.0357, "step": 162500 }, { "epoch": 3.3080916030534353, "grad_norm": 0.015426103386034309, "learning_rate": 3.100821085058836e-06, "loss": 0.0554, "step": 162510 }, { "epoch": 3.308295165394402, "grad_norm": 0.08701472032630557, "learning_rate": 3.100163790309565e-06, "loss": 0.0657, "step": 162520 }, { "epoch": 3.308498727735369, "grad_norm": 0.04799645836616707, "learning_rate": 3.099506533929933e-06, "loss": 0.0465, "step": 162530 }, { "epoch": 3.3087022900763357, "grad_norm": 0.12018873221736051, "learning_rate": 3.0988493159332157e-06, "loss": 0.1027, "step": 162540 }, { "epoch": 3.3089058524173027, "grad_norm": 8.140838989860711, "learning_rate": 3.098192136332683e-06, "loss": 0.1559, "step": 162550 }, { "epoch": 3.30910941475827, "grad_norm": 0.08841726723997424, "learning_rate": 3.0975349951416125e-06, "loss": 0.0022, "step": 162560 }, { "epoch": 3.3093129770992364, "grad_norm": 0.2962772893165185, "learning_rate": 3.096877892373271e-06, "loss": 0.0619, "step": 162570 }, { "epoch": 3.3095165394402035, "grad_norm": 0.010470370496421669, "learning_rate": 3.096220828040931e-06, "loss": 0.053, "step": 162580 }, { "epoch": 3.3097201017811706, "grad_norm": 1.5209141391049756, "learning_rate": 3.0955638021578656e-06, "loss": 0.0356, "step": 162590 }, { "epoch": 3.3099236641221372, "grad_norm": 0.049543735086459914, "learning_rate": 3.0949068147373406e-06, "loss": 0.0582, "step": 162600 }, { "epoch": 3.3101272264631043, "grad_norm": 11.590956891739157, "learning_rate": 3.094249865792626e-06, "loss": 0.1077, "step": 162610 }, { "epoch": 3.3103307888040714, "grad_norm": 0.16964138335698892, "learning_rate": 3.0935929553369915e-06, "loss": 0.0581, "step": 162620 }, { "epoch": 3.310534351145038, "grad_norm": 11.023222117808896, "learning_rate": 3.092936083383702e-06, "loss": 0.0649, "step": 162630 }, { "epoch": 3.310737913486005, "grad_norm": 11.774264209284823, "learning_rate": 3.0922792499460253e-06, "loss": 0.0613, "step": 162640 }, { "epoch": 3.310941475826972, "grad_norm": 0.3298765331464211, "learning_rate": 3.091622455037224e-06, "loss": 0.0674, "step": 162650 }, { "epoch": 3.311145038167939, "grad_norm": 0.030253518873094278, "learning_rate": 3.0909656986705668e-06, "loss": 0.0012, "step": 162660 }, { "epoch": 3.311348600508906, "grad_norm": 0.0581062037573632, "learning_rate": 3.090308980859317e-06, "loss": 0.0079, "step": 162670 }, { "epoch": 3.311552162849873, "grad_norm": 0.08162316315069758, "learning_rate": 3.0896523016167356e-06, "loss": 0.0349, "step": 162680 }, { "epoch": 3.3117557251908396, "grad_norm": 0.015904684294662772, "learning_rate": 3.088995660956086e-06, "loss": 0.0179, "step": 162690 }, { "epoch": 3.3119592875318067, "grad_norm": 15.545665967435243, "learning_rate": 3.0883390588906316e-06, "loss": 0.0892, "step": 162700 }, { "epoch": 3.3121628498727738, "grad_norm": 0.16076555083559252, "learning_rate": 3.0876824954336316e-06, "loss": 0.0247, "step": 162710 }, { "epoch": 3.3123664122137404, "grad_norm": 0.8380490558979474, "learning_rate": 3.087025970598348e-06, "loss": 0.0041, "step": 162720 }, { "epoch": 3.3125699745547075, "grad_norm": 11.67852925700401, "learning_rate": 3.0863694843980365e-06, "loss": 0.1239, "step": 162730 }, { "epoch": 3.312773536895674, "grad_norm": 7.953217771437778, "learning_rate": 3.0857130368459587e-06, "loss": 0.0636, "step": 162740 }, { "epoch": 3.312977099236641, "grad_norm": 0.00819644177297126, "learning_rate": 3.085056627955373e-06, "loss": 0.0169, "step": 162750 }, { "epoch": 3.3131806615776083, "grad_norm": 0.7100087858718698, "learning_rate": 3.084400257739533e-06, "loss": 0.022, "step": 162760 }, { "epoch": 3.313384223918575, "grad_norm": 4.478313215178526, "learning_rate": 3.0837439262116986e-06, "loss": 0.0552, "step": 162770 }, { "epoch": 3.313587786259542, "grad_norm": 0.020433212173181406, "learning_rate": 3.0830876333851244e-06, "loss": 0.0342, "step": 162780 }, { "epoch": 3.313791348600509, "grad_norm": 0.004178455851705504, "learning_rate": 3.082431379273062e-06, "loss": 0.0491, "step": 162790 }, { "epoch": 3.3139949109414757, "grad_norm": 24.734403118946737, "learning_rate": 3.081775163888771e-06, "loss": 0.0707, "step": 162800 }, { "epoch": 3.3141984732824428, "grad_norm": 0.14822266922373625, "learning_rate": 3.0811189872454985e-06, "loss": 0.0087, "step": 162810 }, { "epoch": 3.31440203562341, "grad_norm": 12.988929682481656, "learning_rate": 3.0804628493565003e-06, "loss": 0.0936, "step": 162820 }, { "epoch": 3.3146055979643765, "grad_norm": 7.138532055169716, "learning_rate": 3.079806750235028e-06, "loss": 0.037, "step": 162830 }, { "epoch": 3.3148091603053436, "grad_norm": 0.04044163300178331, "learning_rate": 3.0791506898943303e-06, "loss": 0.0224, "step": 162840 }, { "epoch": 3.31501272264631, "grad_norm": 10.79244559887481, "learning_rate": 3.0784946683476596e-06, "loss": 0.0731, "step": 162850 }, { "epoch": 3.3152162849872773, "grad_norm": 0.030412284403162022, "learning_rate": 3.0778386856082633e-06, "loss": 0.0505, "step": 162860 }, { "epoch": 3.3154198473282444, "grad_norm": 11.117919691023872, "learning_rate": 3.0771827416893894e-06, "loss": 0.0902, "step": 162870 }, { "epoch": 3.315623409669211, "grad_norm": 0.06636187239257144, "learning_rate": 3.076526836604289e-06, "loss": 0.0435, "step": 162880 }, { "epoch": 3.315826972010178, "grad_norm": 0.009899372722333164, "learning_rate": 3.075870970366205e-06, "loss": 0.1977, "step": 162890 }, { "epoch": 3.316030534351145, "grad_norm": 0.0042200518582912825, "learning_rate": 3.0752151429883836e-06, "loss": 0.0365, "step": 162900 }, { "epoch": 3.3162340966921118, "grad_norm": 8.175255456035908, "learning_rate": 3.0745593544840746e-06, "loss": 0.0345, "step": 162910 }, { "epoch": 3.316437659033079, "grad_norm": 0.009196270568529657, "learning_rate": 3.0739036048665175e-06, "loss": 0.0458, "step": 162920 }, { "epoch": 3.316641221374046, "grad_norm": 11.935582485471201, "learning_rate": 3.0732478941489596e-06, "loss": 0.0662, "step": 162930 }, { "epoch": 3.3168447837150126, "grad_norm": 0.021318820251382486, "learning_rate": 3.0725922223446407e-06, "loss": 0.1181, "step": 162940 }, { "epoch": 3.3170483460559796, "grad_norm": 0.029380472339001585, "learning_rate": 3.0719365894668035e-06, "loss": 0.0832, "step": 162950 }, { "epoch": 3.3172519083969467, "grad_norm": 0.09757075552138425, "learning_rate": 3.0712809955286925e-06, "loss": 0.0259, "step": 162960 }, { "epoch": 3.3174554707379134, "grad_norm": 0.04041773320963146, "learning_rate": 3.0706254405435443e-06, "loss": 0.0054, "step": 162970 }, { "epoch": 3.3176590330788804, "grad_norm": 0.34069349302030605, "learning_rate": 3.0699699245245997e-06, "loss": 0.1556, "step": 162980 }, { "epoch": 3.3178625954198475, "grad_norm": 0.07662666874828238, "learning_rate": 3.0693144474851e-06, "loss": 0.0435, "step": 162990 }, { "epoch": 3.318066157760814, "grad_norm": 0.03170918987565346, "learning_rate": 3.0686590094382817e-06, "loss": 0.1103, "step": 163000 }, { "epoch": 3.3182697201017812, "grad_norm": 0.21180036387538698, "learning_rate": 3.068003610397382e-06, "loss": 0.013, "step": 163010 }, { "epoch": 3.3184732824427483, "grad_norm": 0.029465350050863124, "learning_rate": 3.067348250375637e-06, "loss": 0.0013, "step": 163020 }, { "epoch": 3.318676844783715, "grad_norm": 0.13641600857417496, "learning_rate": 3.0666929293862833e-06, "loss": 0.0036, "step": 163030 }, { "epoch": 3.318880407124682, "grad_norm": 0.03362881173794983, "learning_rate": 3.066037647442557e-06, "loss": 0.0019, "step": 163040 }, { "epoch": 3.319083969465649, "grad_norm": 0.08912564545366859, "learning_rate": 3.0653824045576903e-06, "loss": 0.0491, "step": 163050 }, { "epoch": 3.3192875318066157, "grad_norm": 0.05931472811052385, "learning_rate": 3.064727200744918e-06, "loss": 0.0012, "step": 163060 }, { "epoch": 3.319491094147583, "grad_norm": 0.3469941463330599, "learning_rate": 3.0640720360174737e-06, "loss": 0.0041, "step": 163070 }, { "epoch": 3.3196946564885494, "grad_norm": 42.115432522662935, "learning_rate": 3.063416910388587e-06, "loss": 0.0747, "step": 163080 }, { "epoch": 3.3198982188295165, "grad_norm": 0.013407227952660274, "learning_rate": 3.062761823871491e-06, "loss": 0.0631, "step": 163090 }, { "epoch": 3.3201017811704836, "grad_norm": 0.02454823886754841, "learning_rate": 3.0621067764794133e-06, "loss": 0.0749, "step": 163100 }, { "epoch": 3.3203053435114502, "grad_norm": 0.03532340396360389, "learning_rate": 3.0614517682255863e-06, "loss": 0.0139, "step": 163110 }, { "epoch": 3.3205089058524173, "grad_norm": 0.2427383485095701, "learning_rate": 3.0607967991232377e-06, "loss": 0.0827, "step": 163120 }, { "epoch": 3.3207124681933844, "grad_norm": 28.633858396214166, "learning_rate": 3.0601418691855945e-06, "loss": 0.057, "step": 163130 }, { "epoch": 3.320916030534351, "grad_norm": 7.324687669551994, "learning_rate": 3.0594869784258857e-06, "loss": 0.0688, "step": 163140 }, { "epoch": 3.321119592875318, "grad_norm": 0.006806155980507132, "learning_rate": 3.0588321268573372e-06, "loss": 0.0591, "step": 163150 }, { "epoch": 3.3213231552162847, "grad_norm": 9.371608268847483, "learning_rate": 3.0581773144931725e-06, "loss": 0.12, "step": 163160 }, { "epoch": 3.321526717557252, "grad_norm": 35.854971702620176, "learning_rate": 3.05752254134662e-06, "loss": 0.0669, "step": 163170 }, { "epoch": 3.321730279898219, "grad_norm": 13.109423299199262, "learning_rate": 3.056867807430901e-06, "loss": 0.0815, "step": 163180 }, { "epoch": 3.3219338422391855, "grad_norm": 0.6701617153260402, "learning_rate": 3.0562131127592378e-06, "loss": 0.1093, "step": 163190 }, { "epoch": 3.3221374045801526, "grad_norm": 0.1838443290560768, "learning_rate": 3.0555584573448567e-06, "loss": 0.0576, "step": 163200 }, { "epoch": 3.3223409669211197, "grad_norm": 0.3864530352745629, "learning_rate": 3.0549038412009755e-06, "loss": 0.0015, "step": 163210 }, { "epoch": 3.3225445292620863, "grad_norm": 0.9155645254453564, "learning_rate": 3.054249264340818e-06, "loss": 0.0007, "step": 163220 }, { "epoch": 3.3227480916030534, "grad_norm": 0.6658958744166029, "learning_rate": 3.0535947267776012e-06, "loss": 0.1178, "step": 163230 }, { "epoch": 3.3229516539440205, "grad_norm": 7.815707923884099, "learning_rate": 3.0529402285245453e-06, "loss": 0.0444, "step": 163240 }, { "epoch": 3.323155216284987, "grad_norm": 0.02471953429265157, "learning_rate": 3.052285769594871e-06, "loss": 0.0097, "step": 163250 }, { "epoch": 3.323358778625954, "grad_norm": 8.597062109787132, "learning_rate": 3.0516313500017937e-06, "loss": 0.0075, "step": 163260 }, { "epoch": 3.3235623409669213, "grad_norm": 0.028031346385735297, "learning_rate": 3.050976969758529e-06, "loss": 0.0415, "step": 163270 }, { "epoch": 3.323765903307888, "grad_norm": 0.021512188536047253, "learning_rate": 3.050322628878297e-06, "loss": 0.0796, "step": 163280 }, { "epoch": 3.323969465648855, "grad_norm": 0.20296836927268236, "learning_rate": 3.04966832737431e-06, "loss": 0.0873, "step": 163290 }, { "epoch": 3.324173027989822, "grad_norm": 0.08265924245022122, "learning_rate": 3.049014065259784e-06, "loss": 0.0477, "step": 163300 }, { "epoch": 3.3243765903307887, "grad_norm": 0.019320006918762622, "learning_rate": 3.04835984254793e-06, "loss": 0.1084, "step": 163310 }, { "epoch": 3.3245801526717558, "grad_norm": 0.15737375148043764, "learning_rate": 3.0477056592519634e-06, "loss": 0.0397, "step": 163320 }, { "epoch": 3.324783715012723, "grad_norm": 30.774122821022278, "learning_rate": 3.0470515153850968e-06, "loss": 0.0936, "step": 163330 }, { "epoch": 3.3249872773536895, "grad_norm": 0.013670590151477213, "learning_rate": 3.0463974109605378e-06, "loss": 0.0508, "step": 163340 }, { "epoch": 3.3251908396946566, "grad_norm": 12.168453854827586, "learning_rate": 3.0457433459915e-06, "loss": 0.1103, "step": 163350 }, { "epoch": 3.3253944020356236, "grad_norm": 17.599233913755036, "learning_rate": 3.045089320491194e-06, "loss": 0.0709, "step": 163360 }, { "epoch": 3.3255979643765903, "grad_norm": 39.16120399396935, "learning_rate": 3.044435334472826e-06, "loss": 0.0573, "step": 163370 }, { "epoch": 3.3258015267175574, "grad_norm": 0.07900066259301826, "learning_rate": 3.043781387949606e-06, "loss": 0.0268, "step": 163380 }, { "epoch": 3.326005089058524, "grad_norm": 26.399071566083098, "learning_rate": 3.043127480934739e-06, "loss": 0.0423, "step": 163390 }, { "epoch": 3.326208651399491, "grad_norm": 0.0826654020303722, "learning_rate": 3.0424736134414333e-06, "loss": 0.0458, "step": 163400 }, { "epoch": 3.326412213740458, "grad_norm": 0.03890770857496005, "learning_rate": 3.0418197854828955e-06, "loss": 0.0335, "step": 163410 }, { "epoch": 3.326615776081425, "grad_norm": 5.478811613660154, "learning_rate": 3.0411659970723273e-06, "loss": 0.0763, "step": 163420 }, { "epoch": 3.326819338422392, "grad_norm": 0.04806966087680108, "learning_rate": 3.0405122482229356e-06, "loss": 0.0074, "step": 163430 }, { "epoch": 3.327022900763359, "grad_norm": 0.034474422343058524, "learning_rate": 3.039858538947924e-06, "loss": 0.0083, "step": 163440 }, { "epoch": 3.3272264631043256, "grad_norm": 16.664263173991554, "learning_rate": 3.039204869260492e-06, "loss": 0.0975, "step": 163450 }, { "epoch": 3.3274300254452926, "grad_norm": 0.7453861147808949, "learning_rate": 3.038551239173845e-06, "loss": 0.0442, "step": 163460 }, { "epoch": 3.3276335877862597, "grad_norm": 0.016071413853222093, "learning_rate": 3.0378976487011808e-06, "loss": 0.069, "step": 163470 }, { "epoch": 3.3278371501272264, "grad_norm": 0.009257067186473036, "learning_rate": 3.0372440978557e-06, "loss": 0.0551, "step": 163480 }, { "epoch": 3.3280407124681934, "grad_norm": 61.01231780480086, "learning_rate": 3.036590586650606e-06, "loss": 0.0584, "step": 163490 }, { "epoch": 3.32824427480916, "grad_norm": 16.28089435353939, "learning_rate": 3.0359371150990902e-06, "loss": 0.0876, "step": 163500 }, { "epoch": 3.328447837150127, "grad_norm": 5.541570401706263, "learning_rate": 3.0352836832143563e-06, "loss": 0.0195, "step": 163510 }, { "epoch": 3.3286513994910942, "grad_norm": 0.06308388932196567, "learning_rate": 3.034630291009598e-06, "loss": 0.0009, "step": 163520 }, { "epoch": 3.328854961832061, "grad_norm": 0.05408352235975383, "learning_rate": 3.033976938498012e-06, "loss": 0.0745, "step": 163530 }, { "epoch": 3.329058524173028, "grad_norm": 19.58224970399931, "learning_rate": 3.033323625692795e-06, "loss": 0.1012, "step": 163540 }, { "epoch": 3.329262086513995, "grad_norm": 2.727337361065417, "learning_rate": 3.03267035260714e-06, "loss": 0.0019, "step": 163550 }, { "epoch": 3.3294656488549617, "grad_norm": 0.28285732169818445, "learning_rate": 3.03201711925424e-06, "loss": 0.0016, "step": 163560 }, { "epoch": 3.3296692111959287, "grad_norm": 0.0369949174702061, "learning_rate": 3.0313639256472905e-06, "loss": 0.0413, "step": 163570 }, { "epoch": 3.329872773536896, "grad_norm": 40.39042737613922, "learning_rate": 3.0307107717994816e-06, "loss": 0.0683, "step": 163580 }, { "epoch": 3.3300763358778624, "grad_norm": 0.13589508054629856, "learning_rate": 3.030057657724006e-06, "loss": 0.1251, "step": 163590 }, { "epoch": 3.3302798982188295, "grad_norm": 0.061590400190055294, "learning_rate": 3.0294045834340514e-06, "loss": 0.0544, "step": 163600 }, { "epoch": 3.3304834605597966, "grad_norm": 58.35796911531635, "learning_rate": 3.0287515489428105e-06, "loss": 0.0478, "step": 163610 }, { "epoch": 3.3306870229007632, "grad_norm": 13.337969753059213, "learning_rate": 3.0280985542634716e-06, "loss": 0.106, "step": 163620 }, { "epoch": 3.3308905852417303, "grad_norm": 0.07809000738068649, "learning_rate": 3.027445599409221e-06, "loss": 0.0387, "step": 163630 }, { "epoch": 3.3310941475826974, "grad_norm": 0.027776273775192976, "learning_rate": 3.026792684393247e-06, "loss": 0.0129, "step": 163640 }, { "epoch": 3.331297709923664, "grad_norm": 1.0163701652349328, "learning_rate": 3.0261398092287374e-06, "loss": 0.0011, "step": 163650 }, { "epoch": 3.331501272264631, "grad_norm": 7.988538291448914, "learning_rate": 3.025486973928876e-06, "loss": 0.0884, "step": 163660 }, { "epoch": 3.331704834605598, "grad_norm": 0.22120047950114136, "learning_rate": 3.0248341785068496e-06, "loss": 0.0619, "step": 163670 }, { "epoch": 3.331908396946565, "grad_norm": 0.10786274948398443, "learning_rate": 3.0241814229758386e-06, "loss": 0.005, "step": 163680 }, { "epoch": 3.332111959287532, "grad_norm": 0.06080615944699898, "learning_rate": 3.0235287073490292e-06, "loss": 0.0069, "step": 163690 }, { "epoch": 3.3323155216284985, "grad_norm": 13.055111855585878, "learning_rate": 3.0228760316396055e-06, "loss": 0.034, "step": 163700 }, { "epoch": 3.3325190839694656, "grad_norm": 0.051913939938608114, "learning_rate": 3.0222233958607438e-06, "loss": 0.0045, "step": 163710 }, { "epoch": 3.3327226463104327, "grad_norm": 0.022209898613497033, "learning_rate": 3.0215708000256295e-06, "loss": 0.0037, "step": 163720 }, { "epoch": 3.3329262086513993, "grad_norm": 0.08384981736426245, "learning_rate": 3.0209182441474416e-06, "loss": 0.0049, "step": 163730 }, { "epoch": 3.3331297709923664, "grad_norm": 48.05702906803242, "learning_rate": 3.0202657282393566e-06, "loss": 0.0746, "step": 163740 }, { "epoch": 3.3333333333333335, "grad_norm": 0.0336076756659028, "learning_rate": 3.0196132523145585e-06, "loss": 0.03, "step": 163750 }, { "epoch": 3.3335368956743, "grad_norm": 9.06574297768003, "learning_rate": 3.018960816386219e-06, "loss": 0.0504, "step": 163760 }, { "epoch": 3.333740458015267, "grad_norm": 0.01127235980263115, "learning_rate": 3.018308420467517e-06, "loss": 0.0513, "step": 163770 }, { "epoch": 3.3339440203562343, "grad_norm": 0.005861752979650223, "learning_rate": 3.0176560645716314e-06, "loss": 0.033, "step": 163780 }, { "epoch": 3.334147582697201, "grad_norm": 5.222264515493954, "learning_rate": 3.0170037487117323e-06, "loss": 0.0685, "step": 163790 }, { "epoch": 3.334351145038168, "grad_norm": 0.011726348537104934, "learning_rate": 3.0163514729009976e-06, "loss": 0.0419, "step": 163800 }, { "epoch": 3.3345547073791346, "grad_norm": 0.01615023836117077, "learning_rate": 3.0156992371526005e-06, "loss": 0.1741, "step": 163810 }, { "epoch": 3.3347582697201017, "grad_norm": 0.1093248263371104, "learning_rate": 3.015047041479711e-06, "loss": 0.0279, "step": 163820 }, { "epoch": 3.3349618320610688, "grad_norm": 0.6903558083312455, "learning_rate": 3.014394885895505e-06, "loss": 0.0443, "step": 163830 }, { "epoch": 3.3351653944020354, "grad_norm": 0.005961795432402952, "learning_rate": 3.0137427704131517e-06, "loss": 0.0275, "step": 163840 }, { "epoch": 3.3353689567430025, "grad_norm": 10.881678359979988, "learning_rate": 3.013090695045819e-06, "loss": 0.0691, "step": 163850 }, { "epoch": 3.3355725190839696, "grad_norm": 55.72573867760828, "learning_rate": 3.0124386598066823e-06, "loss": 0.0708, "step": 163860 }, { "epoch": 3.335776081424936, "grad_norm": 0.07528774943051268, "learning_rate": 3.011786664708905e-06, "loss": 0.0548, "step": 163870 }, { "epoch": 3.3359796437659033, "grad_norm": 7.385835978110634, "learning_rate": 3.011134709765658e-06, "loss": 0.0406, "step": 163880 }, { "epoch": 3.3361832061068704, "grad_norm": 0.08018718676268904, "learning_rate": 3.010482794990105e-06, "loss": 0.0218, "step": 163890 }, { "epoch": 3.336386768447837, "grad_norm": 21.23979912324352, "learning_rate": 3.0098309203954156e-06, "loss": 0.1243, "step": 163900 }, { "epoch": 3.336590330788804, "grad_norm": 0.013268822950189035, "learning_rate": 3.0091790859947552e-06, "loss": 0.1215, "step": 163910 }, { "epoch": 3.336793893129771, "grad_norm": 1.3371655168039114, "learning_rate": 3.008527291801286e-06, "loss": 0.0843, "step": 163920 }, { "epoch": 3.336997455470738, "grad_norm": 0.07859880969872521, "learning_rate": 3.007875537828172e-06, "loss": 0.1177, "step": 163930 }, { "epoch": 3.337201017811705, "grad_norm": 1.7519255498963895, "learning_rate": 3.00722382408858e-06, "loss": 0.0842, "step": 163940 }, { "epoch": 3.337404580152672, "grad_norm": 0.03207148385132702, "learning_rate": 3.006572150595668e-06, "loss": 0.0838, "step": 163950 }, { "epoch": 3.3376081424936386, "grad_norm": 0.04172752305790077, "learning_rate": 3.0059205173626005e-06, "loss": 0.0714, "step": 163960 }, { "epoch": 3.3378117048346057, "grad_norm": 0.0540158103498357, "learning_rate": 3.0052689244025335e-06, "loss": 0.0416, "step": 163970 }, { "epoch": 3.3380152671755727, "grad_norm": 0.19413388838760465, "learning_rate": 3.004617371728632e-06, "loss": 0.0267, "step": 163980 }, { "epoch": 3.3382188295165394, "grad_norm": 0.015092446985947675, "learning_rate": 3.003965859354053e-06, "loss": 0.0344, "step": 163990 }, { "epoch": 3.3384223918575064, "grad_norm": 0.6046533766589383, "learning_rate": 3.0033143872919522e-06, "loss": 0.043, "step": 164000 }, { "epoch": 3.3386259541984735, "grad_norm": 0.07268843063429926, "learning_rate": 3.00266295555549e-06, "loss": 0.0015, "step": 164010 }, { "epoch": 3.33882951653944, "grad_norm": 13.855135290838513, "learning_rate": 3.0020115641578237e-06, "loss": 0.113, "step": 164020 }, { "epoch": 3.3390330788804072, "grad_norm": 0.3195469503464198, "learning_rate": 3.0013602131121046e-06, "loss": 0.0484, "step": 164030 }, { "epoch": 3.339236641221374, "grad_norm": 79.10634143109583, "learning_rate": 3.000708902431494e-06, "loss": 0.1036, "step": 164040 }, { "epoch": 3.339440203562341, "grad_norm": 9.458195487811919, "learning_rate": 3.000057632129138e-06, "loss": 0.0421, "step": 164050 }, { "epoch": 3.339643765903308, "grad_norm": 0.3129887919951222, "learning_rate": 2.999406402218196e-06, "loss": 0.0032, "step": 164060 }, { "epoch": 3.3398473282442747, "grad_norm": 0.14826695318328223, "learning_rate": 2.998755212711819e-06, "loss": 0.0871, "step": 164070 }, { "epoch": 3.3400508905852417, "grad_norm": 0.6052803943088721, "learning_rate": 2.9981040636231564e-06, "loss": 0.0694, "step": 164080 }, { "epoch": 3.340254452926209, "grad_norm": 0.07510923267526912, "learning_rate": 2.997452954965361e-06, "loss": 0.0796, "step": 164090 }, { "epoch": 3.3404580152671755, "grad_norm": 20.57287823741581, "learning_rate": 2.9968018867515835e-06, "loss": 0.0581, "step": 164100 }, { "epoch": 3.3406615776081425, "grad_norm": 0.10282750000984031, "learning_rate": 2.9961508589949707e-06, "loss": 0.011, "step": 164110 }, { "epoch": 3.3408651399491096, "grad_norm": 0.018119078206610757, "learning_rate": 2.9954998717086735e-06, "loss": 0.0256, "step": 164120 }, { "epoch": 3.3410687022900762, "grad_norm": 4.199066584077228, "learning_rate": 2.9948489249058375e-06, "loss": 0.0305, "step": 164130 }, { "epoch": 3.3412722646310433, "grad_norm": 0.011591944584396476, "learning_rate": 2.99419801859961e-06, "loss": 0.0066, "step": 164140 }, { "epoch": 3.34147582697201, "grad_norm": 0.00271406163117261, "learning_rate": 2.9935471528031377e-06, "loss": 0.1165, "step": 164150 }, { "epoch": 3.341679389312977, "grad_norm": 0.04042541740995469, "learning_rate": 2.9928963275295642e-06, "loss": 0.0028, "step": 164160 }, { "epoch": 3.341882951653944, "grad_norm": 6.030411937934236, "learning_rate": 2.992245542792034e-06, "loss": 0.1, "step": 164170 }, { "epoch": 3.3420865139949107, "grad_norm": 0.09540023036980931, "learning_rate": 2.9915947986036925e-06, "loss": 0.023, "step": 164180 }, { "epoch": 3.342290076335878, "grad_norm": 0.16513597711479122, "learning_rate": 2.9909440949776803e-06, "loss": 0.0009, "step": 164190 }, { "epoch": 3.342493638676845, "grad_norm": 0.07111761722194725, "learning_rate": 2.9902934319271402e-06, "loss": 0.0818, "step": 164200 }, { "epoch": 3.3426972010178115, "grad_norm": 0.0413909417789822, "learning_rate": 2.989642809465212e-06, "loss": 0.0337, "step": 164210 }, { "epoch": 3.3429007633587786, "grad_norm": 0.9118218714460958, "learning_rate": 2.988992227605035e-06, "loss": 0.0713, "step": 164220 }, { "epoch": 3.3431043256997457, "grad_norm": 52.002906911987466, "learning_rate": 2.9883416863597536e-06, "loss": 0.0266, "step": 164230 }, { "epoch": 3.3433078880407123, "grad_norm": 0.00546461600458273, "learning_rate": 2.9876911857425005e-06, "loss": 0.0531, "step": 164240 }, { "epoch": 3.3435114503816794, "grad_norm": 10.747372846250533, "learning_rate": 2.9870407257664176e-06, "loss": 0.0593, "step": 164250 }, { "epoch": 3.3437150127226465, "grad_norm": 0.0018256958278831568, "learning_rate": 2.9863903064446378e-06, "loss": 0.1093, "step": 164260 }, { "epoch": 3.343918575063613, "grad_norm": 0.06971415080614606, "learning_rate": 2.9857399277902993e-06, "loss": 0.0391, "step": 164270 }, { "epoch": 3.34412213740458, "grad_norm": 0.08181789075803868, "learning_rate": 2.9850895898165388e-06, "loss": 0.0018, "step": 164280 }, { "epoch": 3.3443256997455473, "grad_norm": 0.06071409997972502, "learning_rate": 2.9844392925364874e-06, "loss": 0.0058, "step": 164290 }, { "epoch": 3.344529262086514, "grad_norm": 0.019486199464758474, "learning_rate": 2.9837890359632817e-06, "loss": 0.0377, "step": 164300 }, { "epoch": 3.344732824427481, "grad_norm": 30.471084662621813, "learning_rate": 2.9831388201100542e-06, "loss": 0.1307, "step": 164310 }, { "epoch": 3.344936386768448, "grad_norm": 0.01919636936124098, "learning_rate": 2.982488644989934e-06, "loss": 0.0008, "step": 164320 }, { "epoch": 3.3451399491094147, "grad_norm": 0.029978920726319577, "learning_rate": 2.9818385106160565e-06, "loss": 0.0986, "step": 164330 }, { "epoch": 3.345343511450382, "grad_norm": 16.3007197174282, "learning_rate": 2.981188417001547e-06, "loss": 0.0647, "step": 164340 }, { "epoch": 3.3455470737913484, "grad_norm": 10.453317146329345, "learning_rate": 2.9805383641595386e-06, "loss": 0.0146, "step": 164350 }, { "epoch": 3.3457506361323155, "grad_norm": 0.04280216476660771, "learning_rate": 2.9798883521031607e-06, "loss": 0.0011, "step": 164360 }, { "epoch": 3.3459541984732826, "grad_norm": 54.97433993121427, "learning_rate": 2.9792383808455368e-06, "loss": 0.012, "step": 164370 }, { "epoch": 3.346157760814249, "grad_norm": 87.10861678224325, "learning_rate": 2.9785884503997974e-06, "loss": 0.0342, "step": 164380 }, { "epoch": 3.3463613231552163, "grad_norm": 0.06380539399214141, "learning_rate": 2.977938560779069e-06, "loss": 0.0567, "step": 164390 }, { "epoch": 3.3465648854961834, "grad_norm": 0.027192676357453878, "learning_rate": 2.977288711996474e-06, "loss": 0.0546, "step": 164400 }, { "epoch": 3.34676844783715, "grad_norm": 4.237900378503982, "learning_rate": 2.97663890406514e-06, "loss": 0.0157, "step": 164410 }, { "epoch": 3.346972010178117, "grad_norm": 0.00580076581699098, "learning_rate": 2.9759891369981885e-06, "loss": 0.05, "step": 164420 }, { "epoch": 3.347175572519084, "grad_norm": 14.957577025650716, "learning_rate": 2.975339410808743e-06, "loss": 0.104, "step": 164430 }, { "epoch": 3.347379134860051, "grad_norm": 13.661248371300719, "learning_rate": 2.9746897255099274e-06, "loss": 0.083, "step": 164440 }, { "epoch": 3.347582697201018, "grad_norm": 0.03456581162090513, "learning_rate": 2.9740400811148605e-06, "loss": 0.1067, "step": 164450 }, { "epoch": 3.3477862595419845, "grad_norm": 120.52039880210882, "learning_rate": 2.973390477636663e-06, "loss": 0.0448, "step": 164460 }, { "epoch": 3.3479898218829516, "grad_norm": 0.02247602013600197, "learning_rate": 2.9727409150884567e-06, "loss": 0.0449, "step": 164470 }, { "epoch": 3.3481933842239187, "grad_norm": 11.511675107625516, "learning_rate": 2.972091393483357e-06, "loss": 0.0559, "step": 164480 }, { "epoch": 3.3483969465648853, "grad_norm": 0.026974899235978445, "learning_rate": 2.971441912834485e-06, "loss": 0.0353, "step": 164490 }, { "epoch": 3.3486005089058524, "grad_norm": 2.4818622003722166, "learning_rate": 2.9707924731549555e-06, "loss": 0.0509, "step": 164500 }, { "epoch": 3.3488040712468194, "grad_norm": 0.5799992165392474, "learning_rate": 2.970143074457884e-06, "loss": 0.018, "step": 164510 }, { "epoch": 3.349007633587786, "grad_norm": 0.046124343714906396, "learning_rate": 2.969493716756391e-06, "loss": 0.0151, "step": 164520 }, { "epoch": 3.349211195928753, "grad_norm": 0.10710132199546675, "learning_rate": 2.968844400063585e-06, "loss": 0.1136, "step": 164530 }, { "epoch": 3.3494147582697202, "grad_norm": 10.192487604544441, "learning_rate": 2.9681951243925823e-06, "loss": 0.0338, "step": 164540 }, { "epoch": 3.349618320610687, "grad_norm": 0.010504275099586489, "learning_rate": 2.967545889756498e-06, "loss": 0.0148, "step": 164550 }, { "epoch": 3.349821882951654, "grad_norm": 0.18343113412551348, "learning_rate": 2.9668966961684408e-06, "loss": 0.0301, "step": 164560 }, { "epoch": 3.350025445292621, "grad_norm": 0.12436194796737647, "learning_rate": 2.966247543641525e-06, "loss": 0.0078, "step": 164570 }, { "epoch": 3.3502290076335877, "grad_norm": 5.298275381203133, "learning_rate": 2.9655984321888577e-06, "loss": 0.0559, "step": 164580 }, { "epoch": 3.3504325699745547, "grad_norm": 9.279997082418378, "learning_rate": 2.9649493618235502e-06, "loss": 0.0415, "step": 164590 }, { "epoch": 3.350636132315522, "grad_norm": 7.648442026094309, "learning_rate": 2.964300332558714e-06, "loss": 0.0897, "step": 164600 }, { "epoch": 3.3508396946564885, "grad_norm": 20.279443309786554, "learning_rate": 2.9636513444074523e-06, "loss": 0.0668, "step": 164610 }, { "epoch": 3.3510432569974555, "grad_norm": 0.04405275134376458, "learning_rate": 2.963002397382876e-06, "loss": 0.0523, "step": 164620 }, { "epoch": 3.3512468193384226, "grad_norm": 0.040251533576427835, "learning_rate": 2.9623534914980877e-06, "loss": 0.0488, "step": 164630 }, { "epoch": 3.3514503816793892, "grad_norm": 0.027096587198362825, "learning_rate": 2.9617046267661964e-06, "loss": 0.0055, "step": 164640 }, { "epoch": 3.3516539440203563, "grad_norm": 0.01829611385687926, "learning_rate": 2.9610558032003066e-06, "loss": 0.1065, "step": 164650 }, { "epoch": 3.3518575063613234, "grad_norm": 7.975382669548693, "learning_rate": 2.960407020813518e-06, "loss": 0.0845, "step": 164660 }, { "epoch": 3.35206106870229, "grad_norm": 0.07655633658173472, "learning_rate": 2.959758279618939e-06, "loss": 0.0818, "step": 164670 }, { "epoch": 3.352264631043257, "grad_norm": 0.03380977246951568, "learning_rate": 2.9591095796296698e-06, "loss": 0.0031, "step": 164680 }, { "epoch": 3.3524681933842237, "grad_norm": 0.04433646706746817, "learning_rate": 2.958460920858809e-06, "loss": 0.0578, "step": 164690 }, { "epoch": 3.352671755725191, "grad_norm": 0.03793688453398604, "learning_rate": 2.9578123033194616e-06, "loss": 0.0891, "step": 164700 }, { "epoch": 3.352875318066158, "grad_norm": 1.099191438443022, "learning_rate": 2.9571637270247237e-06, "loss": 0.0775, "step": 164710 }, { "epoch": 3.3530788804071245, "grad_norm": 0.011550177634043847, "learning_rate": 2.956515191987695e-06, "loss": 0.0235, "step": 164720 }, { "epoch": 3.3532824427480916, "grad_norm": 0.017360775686871, "learning_rate": 2.9558666982214754e-06, "loss": 0.0539, "step": 164730 }, { "epoch": 3.3534860050890587, "grad_norm": 1.5599468088706783, "learning_rate": 2.9552182457391604e-06, "loss": 0.119, "step": 164740 }, { "epoch": 3.3536895674300253, "grad_norm": 0.008265487093713188, "learning_rate": 2.9545698345538453e-06, "loss": 0.0322, "step": 164750 }, { "epoch": 3.3538931297709924, "grad_norm": 0.6413905335304527, "learning_rate": 2.95392146467863e-06, "loss": 0.0014, "step": 164760 }, { "epoch": 3.354096692111959, "grad_norm": 0.006092387211431636, "learning_rate": 2.9532731361266028e-06, "loss": 0.0011, "step": 164770 }, { "epoch": 3.354300254452926, "grad_norm": 7.146515209671511, "learning_rate": 2.9526248489108622e-06, "loss": 0.1049, "step": 164780 }, { "epoch": 3.354503816793893, "grad_norm": 0.13054992122087905, "learning_rate": 2.9519766030444995e-06, "loss": 0.0813, "step": 164790 }, { "epoch": 3.35470737913486, "grad_norm": 18.14035110807335, "learning_rate": 2.951328398540606e-06, "loss": 0.0613, "step": 164800 }, { "epoch": 3.354910941475827, "grad_norm": 0.21498336654200706, "learning_rate": 2.9506802354122767e-06, "loss": 0.0011, "step": 164810 }, { "epoch": 3.355114503816794, "grad_norm": 0.018995366760176415, "learning_rate": 2.950032113672598e-06, "loss": 0.0907, "step": 164820 }, { "epoch": 3.3553180661577606, "grad_norm": 0.020394023548886774, "learning_rate": 2.9493840333346603e-06, "loss": 0.004, "step": 164830 }, { "epoch": 3.3555216284987277, "grad_norm": 0.018044678203348267, "learning_rate": 2.9487359944115544e-06, "loss": 0.0292, "step": 164840 }, { "epoch": 3.355725190839695, "grad_norm": 0.1956423472268431, "learning_rate": 2.948087996916367e-06, "loss": 0.0016, "step": 164850 }, { "epoch": 3.3559287531806614, "grad_norm": 4.157815084038654, "learning_rate": 2.9474400408621863e-06, "loss": 0.0612, "step": 164860 }, { "epoch": 3.3561323155216285, "grad_norm": 1.849444901904834, "learning_rate": 2.9467921262620956e-06, "loss": 0.0731, "step": 164870 }, { "epoch": 3.3563358778625956, "grad_norm": 0.06290912512446137, "learning_rate": 2.9461442531291826e-06, "loss": 0.0083, "step": 164880 }, { "epoch": 3.356539440203562, "grad_norm": 0.008862287943244947, "learning_rate": 2.945496421476534e-06, "loss": 0.03, "step": 164890 }, { "epoch": 3.3567430025445293, "grad_norm": 0.04612396523517534, "learning_rate": 2.9448486313172296e-06, "loss": 0.1126, "step": 164900 }, { "epoch": 3.3569465648854964, "grad_norm": 0.006636051294625035, "learning_rate": 2.9442008826643535e-06, "loss": 0.0425, "step": 164910 }, { "epoch": 3.357150127226463, "grad_norm": 0.6918344519031001, "learning_rate": 2.9435531755309903e-06, "loss": 0.0921, "step": 164920 }, { "epoch": 3.35735368956743, "grad_norm": 1.2250836061129888, "learning_rate": 2.9429055099302186e-06, "loss": 0.033, "step": 164930 }, { "epoch": 3.357557251908397, "grad_norm": 0.023129618212469734, "learning_rate": 2.9422578858751203e-06, "loss": 0.1029, "step": 164940 }, { "epoch": 3.357760814249364, "grad_norm": 0.059119309174751535, "learning_rate": 2.9416103033787725e-06, "loss": 0.0137, "step": 164950 }, { "epoch": 3.357964376590331, "grad_norm": 0.026156992148918826, "learning_rate": 2.9409627624542563e-06, "loss": 0.0266, "step": 164960 }, { "epoch": 3.358167938931298, "grad_norm": 0.01069273885326834, "learning_rate": 2.940315263114651e-06, "loss": 0.0183, "step": 164970 }, { "epoch": 3.3583715012722646, "grad_norm": 10.957955906783534, "learning_rate": 2.9396678053730286e-06, "loss": 0.0524, "step": 164980 }, { "epoch": 3.3585750636132317, "grad_norm": 0.009700661252282779, "learning_rate": 2.939020389242472e-06, "loss": 0.0702, "step": 164990 }, { "epoch": 3.3587786259541983, "grad_norm": 0.11074467213866632, "learning_rate": 2.938373014736051e-06, "loss": 0.0444, "step": 165000 }, { "epoch": 3.3589821882951654, "grad_norm": 0.047549503911233734, "learning_rate": 2.9377256818668422e-06, "loss": 0.0027, "step": 165010 }, { "epoch": 3.3591857506361325, "grad_norm": 0.4597295544577665, "learning_rate": 2.9370783906479217e-06, "loss": 0.1237, "step": 165020 }, { "epoch": 3.359389312977099, "grad_norm": 13.756030484114541, "learning_rate": 2.936431141092358e-06, "loss": 0.0498, "step": 165030 }, { "epoch": 3.359592875318066, "grad_norm": 0.5096687439264153, "learning_rate": 2.9357839332132253e-06, "loss": 0.0652, "step": 165040 }, { "epoch": 3.3597964376590332, "grad_norm": 0.020727897022402048, "learning_rate": 2.9351367670235964e-06, "loss": 0.0011, "step": 165050 }, { "epoch": 3.36, "grad_norm": 5.553663480104981, "learning_rate": 2.9344896425365377e-06, "loss": 0.0505, "step": 165060 }, { "epoch": 3.360203562340967, "grad_norm": 1.0022521648037461, "learning_rate": 2.9338425597651233e-06, "loss": 0.0241, "step": 165070 }, { "epoch": 3.360407124681934, "grad_norm": 0.03678340813245376, "learning_rate": 2.9331955187224185e-06, "loss": 0.0101, "step": 165080 }, { "epoch": 3.3606106870229007, "grad_norm": 0.01815397649044592, "learning_rate": 2.932548519421491e-06, "loss": 0.0007, "step": 165090 }, { "epoch": 3.3608142493638677, "grad_norm": 0.015075222169314388, "learning_rate": 2.9319015618754114e-06, "loss": 0.057, "step": 165100 }, { "epoch": 3.3610178117048344, "grad_norm": 0.057966763375923984, "learning_rate": 2.9312546460972426e-06, "loss": 0.0006, "step": 165110 }, { "epoch": 3.3612213740458015, "grad_norm": 0.008161834019870608, "learning_rate": 2.930607772100049e-06, "loss": 0.1209, "step": 165120 }, { "epoch": 3.3614249363867685, "grad_norm": 0.03201280032199683, "learning_rate": 2.9299609398968992e-06, "loss": 0.1646, "step": 165130 }, { "epoch": 3.361628498727735, "grad_norm": 0.03216730550322506, "learning_rate": 2.9293141495008536e-06, "loss": 0.0364, "step": 165140 }, { "epoch": 3.3618320610687022, "grad_norm": 0.10756517905537825, "learning_rate": 2.928667400924976e-06, "loss": 0.0322, "step": 165150 }, { "epoch": 3.3620356234096693, "grad_norm": 0.049559315984625985, "learning_rate": 2.928020694182327e-06, "loss": 0.0043, "step": 165160 }, { "epoch": 3.362239185750636, "grad_norm": 0.04872283621230804, "learning_rate": 2.92737402928597e-06, "loss": 0.0487, "step": 165170 }, { "epoch": 3.362442748091603, "grad_norm": 0.30976056224944387, "learning_rate": 2.9267274062489637e-06, "loss": 0.0647, "step": 165180 }, { "epoch": 3.36264631043257, "grad_norm": 0.01614091379859758, "learning_rate": 2.9260808250843675e-06, "loss": 0.0786, "step": 165190 }, { "epoch": 3.3628498727735368, "grad_norm": 11.632280653868627, "learning_rate": 2.9254342858052396e-06, "loss": 0.0646, "step": 165200 }, { "epoch": 3.363053435114504, "grad_norm": 0.05920357348883477, "learning_rate": 2.92478778842464e-06, "loss": 0.1412, "step": 165210 }, { "epoch": 3.363256997455471, "grad_norm": 7.6664584900523876, "learning_rate": 2.9241413329556222e-06, "loss": 0.0701, "step": 165220 }, { "epoch": 3.3634605597964375, "grad_norm": 0.2931260789154208, "learning_rate": 2.9234949194112463e-06, "loss": 0.0126, "step": 165230 }, { "epoch": 3.3636641221374046, "grad_norm": 0.05780674919053932, "learning_rate": 2.922848547804562e-06, "loss": 0.0029, "step": 165240 }, { "epoch": 3.3638676844783717, "grad_norm": 3.246453615722761, "learning_rate": 2.9222022181486275e-06, "loss": 0.0931, "step": 165250 }, { "epoch": 3.3640712468193383, "grad_norm": 0.08952065429828943, "learning_rate": 2.9215559304564967e-06, "loss": 0.1108, "step": 165260 }, { "epoch": 3.3642748091603054, "grad_norm": 38.316281024231735, "learning_rate": 2.920909684741219e-06, "loss": 0.094, "step": 165270 }, { "epoch": 3.3644783715012725, "grad_norm": 0.07937571577997952, "learning_rate": 2.9202634810158503e-06, "loss": 0.0989, "step": 165280 }, { "epoch": 3.364681933842239, "grad_norm": 0.11380534544662817, "learning_rate": 2.9196173192934374e-06, "loss": 0.0006, "step": 165290 }, { "epoch": 3.364885496183206, "grad_norm": 0.07615969498584164, "learning_rate": 2.9189711995870317e-06, "loss": 0.0995, "step": 165300 }, { "epoch": 3.365089058524173, "grad_norm": 0.3320873646940416, "learning_rate": 2.9183251219096863e-06, "loss": 0.0802, "step": 165310 }, { "epoch": 3.36529262086514, "grad_norm": 1.2080203555250664, "learning_rate": 2.9176790862744444e-06, "loss": 0.0361, "step": 165320 }, { "epoch": 3.365496183206107, "grad_norm": 7.6559239664073155, "learning_rate": 2.917033092694353e-06, "loss": 0.0506, "step": 165330 }, { "epoch": 3.3656997455470736, "grad_norm": 0.008995430854937578, "learning_rate": 2.9163871411824653e-06, "loss": 0.0472, "step": 165340 }, { "epoch": 3.3659033078880407, "grad_norm": 11.961181783977285, "learning_rate": 2.9157412317518206e-06, "loss": 0.0819, "step": 165350 }, { "epoch": 3.366106870229008, "grad_norm": 0.007544905089748587, "learning_rate": 2.9150953644154667e-06, "loss": 0.0192, "step": 165360 }, { "epoch": 3.3663104325699744, "grad_norm": 7.696571249628951, "learning_rate": 2.914449539186447e-06, "loss": 0.1237, "step": 165370 }, { "epoch": 3.3665139949109415, "grad_norm": 0.015633048118033125, "learning_rate": 2.9138037560778042e-06, "loss": 0.0084, "step": 165380 }, { "epoch": 3.3667175572519086, "grad_norm": 0.059909404344552365, "learning_rate": 2.913158015102583e-06, "loss": 0.0551, "step": 165390 }, { "epoch": 3.366921119592875, "grad_norm": 0.010228482542430854, "learning_rate": 2.9125123162738223e-06, "loss": 0.0505, "step": 165400 }, { "epoch": 3.3671246819338423, "grad_norm": 0.04217215966019504, "learning_rate": 2.9118666596045654e-06, "loss": 0.0011, "step": 165410 }, { "epoch": 3.367328244274809, "grad_norm": 0.13083123064042956, "learning_rate": 2.9112210451078495e-06, "loss": 0.0197, "step": 165420 }, { "epoch": 3.367531806615776, "grad_norm": 0.1450253202504793, "learning_rate": 2.910575472796715e-06, "loss": 0.0406, "step": 165430 }, { "epoch": 3.367735368956743, "grad_norm": 0.05000409290065601, "learning_rate": 2.909929942684203e-06, "loss": 0.0426, "step": 165440 }, { "epoch": 3.3679389312977097, "grad_norm": 16.878166067354798, "learning_rate": 2.9092844547833432e-06, "loss": 0.1401, "step": 165450 }, { "epoch": 3.368142493638677, "grad_norm": 132.02300467919716, "learning_rate": 2.908639009107178e-06, "loss": 0.0833, "step": 165460 }, { "epoch": 3.368346055979644, "grad_norm": 7.500831338718623, "learning_rate": 2.907993605668745e-06, "loss": 0.0687, "step": 165470 }, { "epoch": 3.3685496183206105, "grad_norm": 0.22054941216105486, "learning_rate": 2.907348244481071e-06, "loss": 0.0641, "step": 165480 }, { "epoch": 3.3687531806615776, "grad_norm": 0.4458914723926892, "learning_rate": 2.9067029255571965e-06, "loss": 0.1656, "step": 165490 }, { "epoch": 3.3689567430025447, "grad_norm": 0.1035937678793656, "learning_rate": 2.9060576489101544e-06, "loss": 0.0014, "step": 165500 }, { "epoch": 3.3691603053435113, "grad_norm": 0.07178441794324841, "learning_rate": 2.905412414552971e-06, "loss": 0.0007, "step": 165510 }, { "epoch": 3.3693638676844784, "grad_norm": 1.2736982036459474, "learning_rate": 2.9047672224986856e-06, "loss": 0.0041, "step": 165520 }, { "epoch": 3.3695674300254455, "grad_norm": 0.02349649051274942, "learning_rate": 2.9041220727603238e-06, "loss": 0.0346, "step": 165530 }, { "epoch": 3.369770992366412, "grad_norm": 125.78306300894955, "learning_rate": 2.903476965350913e-06, "loss": 0.0277, "step": 165540 }, { "epoch": 3.369974554707379, "grad_norm": 6.949913059483054, "learning_rate": 2.9028319002834897e-06, "loss": 0.0219, "step": 165550 }, { "epoch": 3.3701781170483462, "grad_norm": 13.824408726302673, "learning_rate": 2.9021868775710756e-06, "loss": 0.1081, "step": 165560 }, { "epoch": 3.370381679389313, "grad_norm": 0.05574658235271201, "learning_rate": 2.901541897226697e-06, "loss": 0.095, "step": 165570 }, { "epoch": 3.37058524173028, "grad_norm": 0.006500454637614871, "learning_rate": 2.9008969592633863e-06, "loss": 0.0691, "step": 165580 }, { "epoch": 3.370788804071247, "grad_norm": 0.022594510311192886, "learning_rate": 2.900252063694164e-06, "loss": 0.0299, "step": 165590 }, { "epoch": 3.3709923664122137, "grad_norm": 0.18088637217143447, "learning_rate": 2.899607210532055e-06, "loss": 0.0256, "step": 165600 }, { "epoch": 3.3711959287531807, "grad_norm": 7.948107563335709, "learning_rate": 2.898962399790084e-06, "loss": 0.045, "step": 165610 }, { "epoch": 3.371399491094148, "grad_norm": 0.2807426287578633, "learning_rate": 2.8983176314812735e-06, "loss": 0.1261, "step": 165620 }, { "epoch": 3.3716030534351145, "grad_norm": 0.03778470295289564, "learning_rate": 2.8976729056186454e-06, "loss": 0.053, "step": 165630 }, { "epoch": 3.3718066157760815, "grad_norm": 0.7373669158442769, "learning_rate": 2.89702822221522e-06, "loss": 0.0909, "step": 165640 }, { "epoch": 3.372010178117048, "grad_norm": 0.7536721373354431, "learning_rate": 2.896383581284021e-06, "loss": 0.0292, "step": 165650 }, { "epoch": 3.3722137404580153, "grad_norm": 18.306917538713396, "learning_rate": 2.895738982838061e-06, "loss": 0.0532, "step": 165660 }, { "epoch": 3.3724173027989823, "grad_norm": 0.14573094501067746, "learning_rate": 2.8950944268903645e-06, "loss": 0.018, "step": 165670 }, { "epoch": 3.372620865139949, "grad_norm": 0.16997546421375787, "learning_rate": 2.8944499134539494e-06, "loss": 0.0602, "step": 165680 }, { "epoch": 3.372824427480916, "grad_norm": 0.0562865893635561, "learning_rate": 2.893805442541826e-06, "loss": 0.0461, "step": 165690 }, { "epoch": 3.373027989821883, "grad_norm": 17.29521464149551, "learning_rate": 2.893161014167016e-06, "loss": 0.0912, "step": 165700 }, { "epoch": 3.3732315521628498, "grad_norm": 8.2520980749438, "learning_rate": 2.8925166283425355e-06, "loss": 0.0818, "step": 165710 }, { "epoch": 3.373435114503817, "grad_norm": 3.3073766385376974e-05, "learning_rate": 2.8918722850813918e-06, "loss": 0.0932, "step": 165720 }, { "epoch": 3.3736386768447835, "grad_norm": 8.415025481695611, "learning_rate": 2.891227984396606e-06, "loss": 0.1062, "step": 165730 }, { "epoch": 3.3738422391857505, "grad_norm": 31.951424274378397, "learning_rate": 2.890583726301186e-06, "loss": 0.0046, "step": 165740 }, { "epoch": 3.3740458015267176, "grad_norm": 15.768676898222578, "learning_rate": 2.889939510808144e-06, "loss": 0.0899, "step": 165750 }, { "epoch": 3.3742493638676843, "grad_norm": 0.08355536638768482, "learning_rate": 2.8892953379304923e-06, "loss": 0.1108, "step": 165760 }, { "epoch": 3.3744529262086513, "grad_norm": 0.03246336462461038, "learning_rate": 2.888651207681239e-06, "loss": 0.0441, "step": 165770 }, { "epoch": 3.3746564885496184, "grad_norm": 12.79922049995829, "learning_rate": 2.888007120073394e-06, "loss": 0.0384, "step": 165780 }, { "epoch": 3.374860050890585, "grad_norm": 0.16404191416696398, "learning_rate": 2.8873630751199654e-06, "loss": 0.0643, "step": 165790 }, { "epoch": 3.375063613231552, "grad_norm": 0.026945004307247118, "learning_rate": 2.8867190728339607e-06, "loss": 0.0679, "step": 165800 }, { "epoch": 3.375267175572519, "grad_norm": 0.8496601549191016, "learning_rate": 2.886075113228386e-06, "loss": 0.0352, "step": 165810 }, { "epoch": 3.375470737913486, "grad_norm": 0.3437000303234884, "learning_rate": 2.8854311963162474e-06, "loss": 0.0428, "step": 165820 }, { "epoch": 3.375674300254453, "grad_norm": 0.055008274006150576, "learning_rate": 2.8847873221105495e-06, "loss": 0.0798, "step": 165830 }, { "epoch": 3.37587786259542, "grad_norm": 0.5564297385948164, "learning_rate": 2.8841434906242955e-06, "loss": 0.015, "step": 165840 }, { "epoch": 3.3760814249363866, "grad_norm": 0.06281814671816655, "learning_rate": 2.883499701870489e-06, "loss": 0.021, "step": 165850 }, { "epoch": 3.3762849872773537, "grad_norm": 0.016412614601589926, "learning_rate": 2.882855955862132e-06, "loss": 0.0068, "step": 165860 }, { "epoch": 3.376488549618321, "grad_norm": 0.058313790915671536, "learning_rate": 2.8822122526122283e-06, "loss": 0.0182, "step": 165870 }, { "epoch": 3.3766921119592874, "grad_norm": 10.767787016210743, "learning_rate": 2.8815685921337716e-06, "loss": 0.0542, "step": 165880 }, { "epoch": 3.3768956743002545, "grad_norm": 60.33111162072756, "learning_rate": 2.8809249744397693e-06, "loss": 0.0413, "step": 165890 }, { "epoch": 3.3770992366412216, "grad_norm": 0.05855867110421314, "learning_rate": 2.8802813995432155e-06, "loss": 0.0148, "step": 165900 }, { "epoch": 3.377302798982188, "grad_norm": 0.02676725354772782, "learning_rate": 2.8796378674571057e-06, "loss": 0.0528, "step": 165910 }, { "epoch": 3.3775063613231553, "grad_norm": 0.015464095003198784, "learning_rate": 2.8789943781944453e-06, "loss": 0.0577, "step": 165920 }, { "epoch": 3.3777099236641224, "grad_norm": 0.03906516448506526, "learning_rate": 2.8783509317682223e-06, "loss": 0.0488, "step": 165930 }, { "epoch": 3.377913486005089, "grad_norm": 0.011453435116694638, "learning_rate": 2.8777075281914335e-06, "loss": 0.0033, "step": 165940 }, { "epoch": 3.378117048346056, "grad_norm": 10.31731849201541, "learning_rate": 2.8770641674770787e-06, "loss": 0.065, "step": 165950 }, { "epoch": 3.3783206106870227, "grad_norm": 0.1318090222939181, "learning_rate": 2.876420849638145e-06, "loss": 0.0416, "step": 165960 }, { "epoch": 3.37852417302799, "grad_norm": 0.04041495126309577, "learning_rate": 2.8757775746876284e-06, "loss": 0.0558, "step": 165970 }, { "epoch": 3.378727735368957, "grad_norm": 15.070524262072762, "learning_rate": 2.875134342638518e-06, "loss": 0.0878, "step": 165980 }, { "epoch": 3.3789312977099235, "grad_norm": 0.020262895447897875, "learning_rate": 2.8744911535038077e-06, "loss": 0.0012, "step": 165990 }, { "epoch": 3.3791348600508906, "grad_norm": 0.028210423424326763, "learning_rate": 2.8738480072964858e-06, "loss": 0.0676, "step": 166000 }, { "epoch": 3.3793384223918577, "grad_norm": 20.564329885393164, "learning_rate": 2.873204904029542e-06, "loss": 0.1233, "step": 166010 }, { "epoch": 3.3795419847328243, "grad_norm": 0.0159274324947205, "learning_rate": 2.8725618437159656e-06, "loss": 0.0488, "step": 166020 }, { "epoch": 3.3797455470737914, "grad_norm": 0.17922875923155399, "learning_rate": 2.8719188263687396e-06, "loss": 0.0009, "step": 166030 }, { "epoch": 3.3799491094147585, "grad_norm": 0.06623897983485308, "learning_rate": 2.871275852000856e-06, "loss": 0.0194, "step": 166040 }, { "epoch": 3.380152671755725, "grad_norm": 0.014309644880024437, "learning_rate": 2.8706329206253e-06, "loss": 0.0441, "step": 166050 }, { "epoch": 3.380356234096692, "grad_norm": 0.014647915457468288, "learning_rate": 2.8699900322550513e-06, "loss": 0.0973, "step": 166060 }, { "epoch": 3.380559796437659, "grad_norm": 13.034947226745697, "learning_rate": 2.8693471869030985e-06, "loss": 0.0917, "step": 166070 }, { "epoch": 3.380763358778626, "grad_norm": 0.1058774523233115, "learning_rate": 2.8687043845824258e-06, "loss": 0.0078, "step": 166080 }, { "epoch": 3.380966921119593, "grad_norm": 0.02189855452934813, "learning_rate": 2.8680616253060094e-06, "loss": 0.0436, "step": 166090 }, { "epoch": 3.3811704834605596, "grad_norm": 0.8845679218610353, "learning_rate": 2.867418909086838e-06, "loss": 0.0332, "step": 166100 }, { "epoch": 3.3813740458015267, "grad_norm": 5.997323764623394, "learning_rate": 2.866776235937887e-06, "loss": 0.0424, "step": 166110 }, { "epoch": 3.3815776081424938, "grad_norm": 0.018251233544311015, "learning_rate": 2.866133605872135e-06, "loss": 0.0925, "step": 166120 }, { "epoch": 3.3817811704834604, "grad_norm": 0.5138542611263014, "learning_rate": 2.8654910189025676e-06, "loss": 0.0369, "step": 166130 }, { "epoch": 3.3819847328244275, "grad_norm": 0.5331908422348314, "learning_rate": 2.8648484750421563e-06, "loss": 0.154, "step": 166140 }, { "epoch": 3.3821882951653945, "grad_norm": 7.8335923848038735, "learning_rate": 2.8642059743038797e-06, "loss": 0.1168, "step": 166150 }, { "epoch": 3.382391857506361, "grad_norm": 6.538436113560038, "learning_rate": 2.8635635167007146e-06, "loss": 0.0656, "step": 166160 }, { "epoch": 3.3825954198473283, "grad_norm": 0.11087006274966425, "learning_rate": 2.862921102245636e-06, "loss": 0.0248, "step": 166170 }, { "epoch": 3.3827989821882953, "grad_norm": 20.023761848585888, "learning_rate": 2.862278730951619e-06, "loss": 0.0117, "step": 166180 }, { "epoch": 3.383002544529262, "grad_norm": 0.8663671999985421, "learning_rate": 2.861636402831635e-06, "loss": 0.0916, "step": 166190 }, { "epoch": 3.383206106870229, "grad_norm": 0.02513521963744512, "learning_rate": 2.8609941178986587e-06, "loss": 0.0294, "step": 166200 }, { "epoch": 3.383409669211196, "grad_norm": 3.0103284287242675, "learning_rate": 2.860351876165661e-06, "loss": 0.095, "step": 166210 }, { "epoch": 3.3836132315521628, "grad_norm": 0.006638780555628739, "learning_rate": 2.8597096776456134e-06, "loss": 0.0347, "step": 166220 }, { "epoch": 3.38381679389313, "grad_norm": 0.00772958217393346, "learning_rate": 2.8590675223514852e-06, "loss": 0.0433, "step": 166230 }, { "epoch": 3.384020356234097, "grad_norm": 0.00784950569471656, "learning_rate": 2.858425410296246e-06, "loss": 0.0564, "step": 166240 }, { "epoch": 3.3842239185750635, "grad_norm": 0.01917362648337405, "learning_rate": 2.857783341492864e-06, "loss": 0.038, "step": 166250 }, { "epoch": 3.3844274809160306, "grad_norm": 2.6554572775881846, "learning_rate": 2.8571413159543086e-06, "loss": 0.0074, "step": 166260 }, { "epoch": 3.3846310432569973, "grad_norm": 36.18747493469799, "learning_rate": 2.8564993336935405e-06, "loss": 0.0299, "step": 166270 }, { "epoch": 3.3848346055979643, "grad_norm": 0.019460714709176385, "learning_rate": 2.8558573947235308e-06, "loss": 0.0406, "step": 166280 }, { "epoch": 3.3850381679389314, "grad_norm": 0.02079737909697527, "learning_rate": 2.8552154990572444e-06, "loss": 0.0148, "step": 166290 }, { "epoch": 3.385241730279898, "grad_norm": 0.3220205132352146, "learning_rate": 2.8545736467076423e-06, "loss": 0.1177, "step": 166300 }, { "epoch": 3.385445292620865, "grad_norm": 0.033106359750281984, "learning_rate": 2.8539318376876862e-06, "loss": 0.0484, "step": 166310 }, { "epoch": 3.385648854961832, "grad_norm": 0.01916239424289886, "learning_rate": 2.8532900720103447e-06, "loss": 0.0045, "step": 166320 }, { "epoch": 3.385852417302799, "grad_norm": 0.4975094738856569, "learning_rate": 2.8526483496885734e-06, "loss": 0.0495, "step": 166330 }, { "epoch": 3.386055979643766, "grad_norm": 0.05649856415311078, "learning_rate": 2.8520066707353345e-06, "loss": 0.0092, "step": 166340 }, { "epoch": 3.386259541984733, "grad_norm": 0.05094395336525979, "learning_rate": 2.851365035163587e-06, "loss": 0.0025, "step": 166350 }, { "epoch": 3.3864631043256996, "grad_norm": 0.046548084908251304, "learning_rate": 2.85072344298629e-06, "loss": 0.0014, "step": 166360 }, { "epoch": 3.3866666666666667, "grad_norm": 0.11574880374124663, "learning_rate": 2.8500818942164015e-06, "loss": 0.048, "step": 166370 }, { "epoch": 3.3868702290076333, "grad_norm": 0.0018630070559278293, "learning_rate": 2.8494403888668787e-06, "loss": 0.0306, "step": 166380 }, { "epoch": 3.3870737913486004, "grad_norm": 0.006548395041879392, "learning_rate": 2.8487989269506766e-06, "loss": 0.059, "step": 166390 }, { "epoch": 3.3872773536895675, "grad_norm": 0.027118403189721953, "learning_rate": 2.848157508480751e-06, "loss": 0.1307, "step": 166400 }, { "epoch": 3.387480916030534, "grad_norm": 0.00753968848369368, "learning_rate": 2.847516133470056e-06, "loss": 0.0426, "step": 166410 }, { "epoch": 3.387684478371501, "grad_norm": 0.04206898610892421, "learning_rate": 2.846874801931547e-06, "loss": 0.0698, "step": 166420 }, { "epoch": 3.3878880407124683, "grad_norm": 0.15879311138744565, "learning_rate": 2.846233513878171e-06, "loss": 0.0456, "step": 166430 }, { "epoch": 3.388091603053435, "grad_norm": 0.09426355393340854, "learning_rate": 2.8455922693228853e-06, "loss": 0.0425, "step": 166440 }, { "epoch": 3.388295165394402, "grad_norm": 0.01745673411875078, "learning_rate": 2.84495106827864e-06, "loss": 0.0344, "step": 166450 }, { "epoch": 3.388498727735369, "grad_norm": 0.10881914060270251, "learning_rate": 2.84430991075838e-06, "loss": 0.0963, "step": 166460 }, { "epoch": 3.3887022900763357, "grad_norm": 0.4234530879722232, "learning_rate": 2.8436687967750613e-06, "loss": 0.0604, "step": 166470 }, { "epoch": 3.388905852417303, "grad_norm": 0.00815804208229464, "learning_rate": 2.843027726341627e-06, "loss": 0.0342, "step": 166480 }, { "epoch": 3.38910941475827, "grad_norm": 0.03166282894861807, "learning_rate": 2.8423866994710242e-06, "loss": 0.0028, "step": 166490 }, { "epoch": 3.3893129770992365, "grad_norm": 0.024578176303701224, "learning_rate": 2.8417457161762045e-06, "loss": 0.0474, "step": 166500 }, { "epoch": 3.3895165394402036, "grad_norm": 0.013407303888852804, "learning_rate": 2.841104776470108e-06, "loss": 0.108, "step": 166510 }, { "epoch": 3.3897201017811707, "grad_norm": 0.02390866447631119, "learning_rate": 2.8404638803656794e-06, "loss": 0.0028, "step": 166520 }, { "epoch": 3.3899236641221373, "grad_norm": 0.0034469581112391975, "learning_rate": 2.839823027875868e-06, "loss": 0.0233, "step": 166530 }, { "epoch": 3.3901272264631044, "grad_norm": 22.414099833484006, "learning_rate": 2.839182219013611e-06, "loss": 0.0766, "step": 166540 }, { "epoch": 3.3903307888040715, "grad_norm": 0.0062251274995462165, "learning_rate": 2.838541453791852e-06, "loss": 0.0305, "step": 166550 }, { "epoch": 3.390534351145038, "grad_norm": 27.10450327378152, "learning_rate": 2.8379007322235325e-06, "loss": 0.0889, "step": 166560 }, { "epoch": 3.390737913486005, "grad_norm": 2.4321772822629812, "learning_rate": 2.8372600543215922e-06, "loss": 0.0928, "step": 166570 }, { "epoch": 3.3909414758269723, "grad_norm": 0.04726341507394605, "learning_rate": 2.8366194200989705e-06, "loss": 0.0192, "step": 166580 }, { "epoch": 3.391145038167939, "grad_norm": 14.150172528477784, "learning_rate": 2.8359788295686062e-06, "loss": 0.0831, "step": 166590 }, { "epoch": 3.391348600508906, "grad_norm": 0.179394508044821, "learning_rate": 2.8353382827434374e-06, "loss": 0.004, "step": 166600 }, { "epoch": 3.3915521628498726, "grad_norm": 1.3346222792103897, "learning_rate": 2.8346977796363994e-06, "loss": 0.0484, "step": 166610 }, { "epoch": 3.3917557251908397, "grad_norm": 1.0391127041148964, "learning_rate": 2.834057320260428e-06, "loss": 0.0172, "step": 166620 }, { "epoch": 3.3919592875318068, "grad_norm": 0.3237266674687507, "learning_rate": 2.833416904628462e-06, "loss": 0.0267, "step": 166630 }, { "epoch": 3.3921628498727734, "grad_norm": 0.05134812597870731, "learning_rate": 2.8327765327534276e-06, "loss": 0.0594, "step": 166640 }, { "epoch": 3.3923664122137405, "grad_norm": 10.507704299585088, "learning_rate": 2.832136204648265e-06, "loss": 0.1119, "step": 166650 }, { "epoch": 3.3925699745547075, "grad_norm": 0.053033046441517295, "learning_rate": 2.831495920325905e-06, "loss": 0.1126, "step": 166660 }, { "epoch": 3.392773536895674, "grad_norm": 0.027153178258464628, "learning_rate": 2.8308556797992736e-06, "loss": 0.0367, "step": 166670 }, { "epoch": 3.3929770992366413, "grad_norm": 0.03497305938781189, "learning_rate": 2.8302154830813113e-06, "loss": 0.0393, "step": 166680 }, { "epoch": 3.393180661577608, "grad_norm": 23.197034733562866, "learning_rate": 2.8295753301849383e-06, "loss": 0.0169, "step": 166690 }, { "epoch": 3.393384223918575, "grad_norm": 0.010003730118564392, "learning_rate": 2.828935221123088e-06, "loss": 0.0196, "step": 166700 }, { "epoch": 3.393587786259542, "grad_norm": 0.04313711634398256, "learning_rate": 2.8282951559086868e-06, "loss": 0.03, "step": 166710 }, { "epoch": 3.3937913486005087, "grad_norm": 0.020212356043119255, "learning_rate": 2.8276551345546623e-06, "loss": 0.0004, "step": 166720 }, { "epoch": 3.3939949109414758, "grad_norm": 0.028590961740015314, "learning_rate": 2.8270151570739397e-06, "loss": 0.0772, "step": 166730 }, { "epoch": 3.394198473282443, "grad_norm": 0.011620723568194678, "learning_rate": 2.826375223479445e-06, "loss": 0.0678, "step": 166740 }, { "epoch": 3.3944020356234095, "grad_norm": 1.5412288294807615, "learning_rate": 2.825735333784103e-06, "loss": 0.0641, "step": 166750 }, { "epoch": 3.3946055979643766, "grad_norm": 0.025444705615715456, "learning_rate": 2.825095488000835e-06, "loss": 0.0408, "step": 166760 }, { "epoch": 3.3948091603053436, "grad_norm": 39.87521014682229, "learning_rate": 2.8244556861425664e-06, "loss": 0.0054, "step": 166770 }, { "epoch": 3.3950127226463103, "grad_norm": 0.022973860007868045, "learning_rate": 2.8238159282222167e-06, "loss": 0.0014, "step": 166780 }, { "epoch": 3.3952162849872773, "grad_norm": 9.286699507967098, "learning_rate": 2.823176214252707e-06, "loss": 0.0947, "step": 166790 }, { "epoch": 3.3954198473282444, "grad_norm": 0.038548757465622495, "learning_rate": 2.8225365442469576e-06, "loss": 0.0273, "step": 166800 }, { "epoch": 3.395623409669211, "grad_norm": 0.037801886044919304, "learning_rate": 2.8218969182178866e-06, "loss": 0.0134, "step": 166810 }, { "epoch": 3.395826972010178, "grad_norm": 0.021491517770627522, "learning_rate": 2.821257336178414e-06, "loss": 0.0012, "step": 166820 }, { "epoch": 3.396030534351145, "grad_norm": 23.479807510219047, "learning_rate": 2.820617798141455e-06, "loss": 0.0225, "step": 166830 }, { "epoch": 3.396234096692112, "grad_norm": 13.193108829835754, "learning_rate": 2.8199783041199286e-06, "loss": 0.0695, "step": 166840 }, { "epoch": 3.396437659033079, "grad_norm": 0.02802683867050733, "learning_rate": 2.819338854126746e-06, "loss": 0.0783, "step": 166850 }, { "epoch": 3.396641221374046, "grad_norm": 4.636756727144235, "learning_rate": 2.818699448174822e-06, "loss": 0.0775, "step": 166860 }, { "epoch": 3.3968447837150126, "grad_norm": 0.14230494032902163, "learning_rate": 2.8180600862770764e-06, "loss": 0.0138, "step": 166870 }, { "epoch": 3.3970483460559797, "grad_norm": 6.234511577167198, "learning_rate": 2.8174207684464154e-06, "loss": 0.1052, "step": 166880 }, { "epoch": 3.397251908396947, "grad_norm": 0.009715115914760347, "learning_rate": 2.816781494695752e-06, "loss": 0.0469, "step": 166890 }, { "epoch": 3.3974554707379134, "grad_norm": 0.18494363539044564, "learning_rate": 2.816142265038001e-06, "loss": 0.0703, "step": 166900 }, { "epoch": 3.3976590330788805, "grad_norm": 4.407882460268589, "learning_rate": 2.8155030794860684e-06, "loss": 0.0192, "step": 166910 }, { "epoch": 3.397862595419847, "grad_norm": 0.0481732063110851, "learning_rate": 2.8148639380528644e-06, "loss": 0.0829, "step": 166920 }, { "epoch": 3.398066157760814, "grad_norm": 0.004354591666427314, "learning_rate": 2.8142248407512975e-06, "loss": 0.0016, "step": 166930 }, { "epoch": 3.3982697201017813, "grad_norm": 0.05785776261214051, "learning_rate": 2.8135857875942756e-06, "loss": 0.0027, "step": 166940 }, { "epoch": 3.398473282442748, "grad_norm": 20.465724511373868, "learning_rate": 2.812946778594705e-06, "loss": 0.0845, "step": 166950 }, { "epoch": 3.398676844783715, "grad_norm": 0.01572527176372423, "learning_rate": 2.8123078137654904e-06, "loss": 0.0013, "step": 166960 }, { "epoch": 3.398880407124682, "grad_norm": 0.018316919026573387, "learning_rate": 2.8116688931195378e-06, "loss": 0.0458, "step": 166970 }, { "epoch": 3.3990839694656487, "grad_norm": 0.08261276081697959, "learning_rate": 2.81103001666975e-06, "loss": 0.0668, "step": 166980 }, { "epoch": 3.399287531806616, "grad_norm": 0.1178664513339382, "learning_rate": 2.810391184429031e-06, "loss": 0.0044, "step": 166990 }, { "epoch": 3.399491094147583, "grad_norm": 0.03647512282851591, "learning_rate": 2.8097523964102836e-06, "loss": 0.1519, "step": 167000 }, { "epoch": 3.3996946564885495, "grad_norm": 0.017058637721904156, "learning_rate": 2.809113652626404e-06, "loss": 0.0485, "step": 167010 }, { "epoch": 3.3998982188295166, "grad_norm": 0.03960238981817062, "learning_rate": 2.808474953090298e-06, "loss": 0.0117, "step": 167020 }, { "epoch": 3.4001017811704832, "grad_norm": 0.0011277767957085646, "learning_rate": 2.8078362978148642e-06, "loss": 0.0055, "step": 167030 }, { "epoch": 3.4003053435114503, "grad_norm": 6.6667333369416, "learning_rate": 2.807197686812997e-06, "loss": 0.0877, "step": 167040 }, { "epoch": 3.4005089058524174, "grad_norm": 0.009078235492546397, "learning_rate": 2.8065591200976005e-06, "loss": 0.0804, "step": 167050 }, { "epoch": 3.400712468193384, "grad_norm": 0.03376278654991019, "learning_rate": 2.8059205976815652e-06, "loss": 0.1269, "step": 167060 }, { "epoch": 3.400916030534351, "grad_norm": 0.08613061141571984, "learning_rate": 2.805282119577788e-06, "loss": 0.055, "step": 167070 }, { "epoch": 3.401119592875318, "grad_norm": 25.06025957005265, "learning_rate": 2.804643685799169e-06, "loss": 0.0801, "step": 167080 }, { "epoch": 3.401323155216285, "grad_norm": 9.000726839515035, "learning_rate": 2.8040052963585964e-06, "loss": 0.0383, "step": 167090 }, { "epoch": 3.401526717557252, "grad_norm": 0.08144402242948036, "learning_rate": 2.8033669512689632e-06, "loss": 0.0163, "step": 167100 }, { "epoch": 3.401730279898219, "grad_norm": 0.01487245397247873, "learning_rate": 2.8027286505431673e-06, "loss": 0.028, "step": 167110 }, { "epoch": 3.4019338422391856, "grad_norm": 0.31621957065526474, "learning_rate": 2.8020903941940945e-06, "loss": 0.0019, "step": 167120 }, { "epoch": 3.4021374045801527, "grad_norm": 43.222032945761775, "learning_rate": 2.8014521822346376e-06, "loss": 0.1025, "step": 167130 }, { "epoch": 3.4023409669211198, "grad_norm": 12.574132055667667, "learning_rate": 2.8008140146776854e-06, "loss": 0.0882, "step": 167140 }, { "epoch": 3.4025445292620864, "grad_norm": 0.07481501930905952, "learning_rate": 2.800175891536126e-06, "loss": 0.0494, "step": 167150 }, { "epoch": 3.4027480916030535, "grad_norm": 0.0075056098347952935, "learning_rate": 2.7995378128228485e-06, "loss": 0.0402, "step": 167160 }, { "epoch": 3.4029516539440205, "grad_norm": 0.2984530285279545, "learning_rate": 2.798899778550739e-06, "loss": 0.0708, "step": 167170 }, { "epoch": 3.403155216284987, "grad_norm": 0.171260343574805, "learning_rate": 2.798261788732683e-06, "loss": 0.0525, "step": 167180 }, { "epoch": 3.4033587786259543, "grad_norm": 0.024919163821276458, "learning_rate": 2.797623843381566e-06, "loss": 0.0656, "step": 167190 }, { "epoch": 3.4035623409669213, "grad_norm": 0.0025427822722873107, "learning_rate": 2.796985942510273e-06, "loss": 0.0944, "step": 167200 }, { "epoch": 3.403765903307888, "grad_norm": 8.683034680737949, "learning_rate": 2.7963480861316878e-06, "loss": 0.0787, "step": 167210 }, { "epoch": 3.403969465648855, "grad_norm": 77.06698051376267, "learning_rate": 2.7957102742586872e-06, "loss": 0.0334, "step": 167220 }, { "epoch": 3.404173027989822, "grad_norm": 0.06861871659333157, "learning_rate": 2.7950725069041583e-06, "loss": 0.0206, "step": 167230 }, { "epoch": 3.4043765903307888, "grad_norm": 0.02729590703472828, "learning_rate": 2.7944347840809825e-06, "loss": 0.1101, "step": 167240 }, { "epoch": 3.404580152671756, "grad_norm": 0.05387738157259464, "learning_rate": 2.7937971058020337e-06, "loss": 0.0727, "step": 167250 }, { "epoch": 3.4047837150127225, "grad_norm": 0.007997519874294956, "learning_rate": 2.793159472080195e-06, "loss": 0.0053, "step": 167260 }, { "epoch": 3.4049872773536896, "grad_norm": 0.014110372178742286, "learning_rate": 2.792521882928345e-06, "loss": 0.0342, "step": 167270 }, { "epoch": 3.4051908396946566, "grad_norm": 0.00575208638814666, "learning_rate": 2.7918843383593576e-06, "loss": 0.0125, "step": 167280 }, { "epoch": 3.4053944020356233, "grad_norm": 0.046168376698194256, "learning_rate": 2.7912468383861097e-06, "loss": 0.0887, "step": 167290 }, { "epoch": 3.4055979643765903, "grad_norm": 0.009188967247357918, "learning_rate": 2.7906093830214764e-06, "loss": 0.0516, "step": 167300 }, { "epoch": 3.4058015267175574, "grad_norm": 0.09878522575350117, "learning_rate": 2.7899719722783324e-06, "loss": 0.0345, "step": 167310 }, { "epoch": 3.406005089058524, "grad_norm": 0.001695490354401959, "learning_rate": 2.7893346061695515e-06, "loss": 0.0416, "step": 167320 }, { "epoch": 3.406208651399491, "grad_norm": 0.06657585341495346, "learning_rate": 2.788697284708005e-06, "loss": 0.0376, "step": 167330 }, { "epoch": 3.4064122137404578, "grad_norm": 0.021228901430551124, "learning_rate": 2.7880600079065657e-06, "loss": 0.0807, "step": 167340 }, { "epoch": 3.406615776081425, "grad_norm": 0.07928160989602176, "learning_rate": 2.7874227757781026e-06, "loss": 0.0802, "step": 167350 }, { "epoch": 3.406819338422392, "grad_norm": 0.02418158889129977, "learning_rate": 2.7867855883354864e-06, "loss": 0.0524, "step": 167360 }, { "epoch": 3.4070229007633586, "grad_norm": 0.0119543813079664, "learning_rate": 2.786148445591586e-06, "loss": 0.0329, "step": 167370 }, { "epoch": 3.4072264631043256, "grad_norm": 0.016089919288029636, "learning_rate": 2.785511347559269e-06, "loss": 0.1044, "step": 167380 }, { "epoch": 3.4074300254452927, "grad_norm": 0.017732898857764093, "learning_rate": 2.7848742942514027e-06, "loss": 0.0086, "step": 167390 }, { "epoch": 3.4076335877862594, "grad_norm": 0.03278662230154264, "learning_rate": 2.7842372856808552e-06, "loss": 0.0015, "step": 167400 }, { "epoch": 3.4078371501272264, "grad_norm": 0.16768200484885187, "learning_rate": 2.7836003218604856e-06, "loss": 0.0082, "step": 167410 }, { "epoch": 3.4080407124681935, "grad_norm": 6.5265820002811274, "learning_rate": 2.782963402803166e-06, "loss": 0.0558, "step": 167420 }, { "epoch": 3.40824427480916, "grad_norm": 0.05899226934731447, "learning_rate": 2.782326528521755e-06, "loss": 0.051, "step": 167430 }, { "epoch": 3.4084478371501272, "grad_norm": 19.857563910220293, "learning_rate": 2.781689699029113e-06, "loss": 0.0545, "step": 167440 }, { "epoch": 3.4086513994910943, "grad_norm": 0.027270507270316345, "learning_rate": 2.781052914338109e-06, "loss": 0.0591, "step": 167450 }, { "epoch": 3.408854961832061, "grad_norm": 0.007525153047884142, "learning_rate": 2.780416174461597e-06, "loss": 0.0852, "step": 167460 }, { "epoch": 3.409058524173028, "grad_norm": 8.85978791703233, "learning_rate": 2.779779479412438e-06, "loss": 0.0824, "step": 167470 }, { "epoch": 3.409262086513995, "grad_norm": 0.01100818528340453, "learning_rate": 2.779142829203495e-06, "loss": 0.0316, "step": 167480 }, { "epoch": 3.4094656488549617, "grad_norm": 7.395698754423067, "learning_rate": 2.7785062238476216e-06, "loss": 0.0453, "step": 167490 }, { "epoch": 3.409669211195929, "grad_norm": 0.11519137050206901, "learning_rate": 2.7778696633576765e-06, "loss": 0.0394, "step": 167500 }, { "epoch": 3.409872773536896, "grad_norm": 0.04356170673762281, "learning_rate": 2.7772331477465155e-06, "loss": 0.0266, "step": 167510 }, { "epoch": 3.4100763358778625, "grad_norm": 1.5013543260346587, "learning_rate": 2.7765966770269936e-06, "loss": 0.0158, "step": 167520 }, { "epoch": 3.4102798982188296, "grad_norm": 0.01767676791561746, "learning_rate": 2.7759602512119654e-06, "loss": 0.022, "step": 167530 }, { "epoch": 3.4104834605597967, "grad_norm": 0.05898429714751946, "learning_rate": 2.7753238703142847e-06, "loss": 0.0208, "step": 167540 }, { "epoch": 3.4106870229007633, "grad_norm": 0.055586068352875204, "learning_rate": 2.774687534346804e-06, "loss": 0.0183, "step": 167550 }, { "epoch": 3.4108905852417304, "grad_norm": 0.04209368989343362, "learning_rate": 2.774051243322375e-06, "loss": 0.031, "step": 167560 }, { "epoch": 3.411094147582697, "grad_norm": 6.692580947181686, "learning_rate": 2.773414997253848e-06, "loss": 0.0976, "step": 167570 }, { "epoch": 3.411297709923664, "grad_norm": 9.959660902761389, "learning_rate": 2.772778796154074e-06, "loss": 0.0454, "step": 167580 }, { "epoch": 3.411501272264631, "grad_norm": 0.03411791207313539, "learning_rate": 2.772142640035898e-06, "loss": 0.0152, "step": 167590 }, { "epoch": 3.411704834605598, "grad_norm": 0.004095943087787315, "learning_rate": 2.7715065289121723e-06, "loss": 0.0208, "step": 167600 }, { "epoch": 3.411908396946565, "grad_norm": 0.05567834115912019, "learning_rate": 2.770870462795745e-06, "loss": 0.0729, "step": 167610 }, { "epoch": 3.412111959287532, "grad_norm": 0.025754519557469727, "learning_rate": 2.7702344416994553e-06, "loss": 0.0027, "step": 167620 }, { "epoch": 3.4123155216284986, "grad_norm": 4.280009513888602, "learning_rate": 2.769598465636155e-06, "loss": 0.0036, "step": 167630 }, { "epoch": 3.4125190839694657, "grad_norm": 0.02126150170504802, "learning_rate": 2.768962534618689e-06, "loss": 0.0666, "step": 167640 }, { "epoch": 3.4127226463104328, "grad_norm": 6.886929986049386, "learning_rate": 2.7683266486598936e-06, "loss": 0.1013, "step": 167650 }, { "epoch": 3.4129262086513994, "grad_norm": 0.0017315907419813282, "learning_rate": 2.7676908077726194e-06, "loss": 0.0015, "step": 167660 }, { "epoch": 3.4131297709923665, "grad_norm": 10.070209217467422, "learning_rate": 2.7670550119697038e-06, "loss": 0.1413, "step": 167670 }, { "epoch": 3.413333333333333, "grad_norm": 0.12110225710104239, "learning_rate": 2.7664192612639873e-06, "loss": 0.0029, "step": 167680 }, { "epoch": 3.4135368956743, "grad_norm": 0.05090873628282321, "learning_rate": 2.765783555668311e-06, "loss": 0.0409, "step": 167690 }, { "epoch": 3.4137404580152673, "grad_norm": 0.007991753682223457, "learning_rate": 2.7651478951955136e-06, "loss": 0.0351, "step": 167700 }, { "epoch": 3.413944020356234, "grad_norm": 0.02129151390393291, "learning_rate": 2.7645122798584325e-06, "loss": 0.123, "step": 167710 }, { "epoch": 3.414147582697201, "grad_norm": 0.7797779331081421, "learning_rate": 2.7638767096699053e-06, "loss": 0.0043, "step": 167720 }, { "epoch": 3.414351145038168, "grad_norm": 0.014552565692738028, "learning_rate": 2.763241184642768e-06, "loss": 0.0506, "step": 167730 }, { "epoch": 3.4145547073791347, "grad_norm": 0.03141334497123623, "learning_rate": 2.7626057047898565e-06, "loss": 0.0793, "step": 167740 }, { "epoch": 3.4147582697201018, "grad_norm": 0.03071579340046657, "learning_rate": 2.761970270124004e-06, "loss": 0.0184, "step": 167750 }, { "epoch": 3.414961832061069, "grad_norm": 0.07193021014993685, "learning_rate": 2.7613348806580443e-06, "loss": 0.111, "step": 167760 }, { "epoch": 3.4151653944020355, "grad_norm": 0.24411525808087178, "learning_rate": 2.7606995364048107e-06, "loss": 0.0288, "step": 167770 }, { "epoch": 3.4153689567430026, "grad_norm": 0.03480709632257985, "learning_rate": 2.7600642373771336e-06, "loss": 0.0012, "step": 167780 }, { "epoch": 3.4155725190839696, "grad_norm": 0.014549841879940472, "learning_rate": 2.7594289835878464e-06, "loss": 0.0412, "step": 167790 }, { "epoch": 3.4157760814249363, "grad_norm": 10.206037241562926, "learning_rate": 2.758793775049773e-06, "loss": 0.1275, "step": 167800 }, { "epoch": 3.4159796437659034, "grad_norm": 0.03286112240509908, "learning_rate": 2.7581586117757475e-06, "loss": 0.0452, "step": 167810 }, { "epoch": 3.4161832061068704, "grad_norm": 0.016412752353716593, "learning_rate": 2.7575234937785985e-06, "loss": 0.0689, "step": 167820 }, { "epoch": 3.416386768447837, "grad_norm": 0.013448655155137906, "learning_rate": 2.7568884210711487e-06, "loss": 0.03, "step": 167830 }, { "epoch": 3.416590330788804, "grad_norm": 0.010172999156545764, "learning_rate": 2.7562533936662244e-06, "loss": 0.0272, "step": 167840 }, { "epoch": 3.416793893129771, "grad_norm": 0.11949784098415889, "learning_rate": 2.755618411576657e-06, "loss": 0.0204, "step": 167850 }, { "epoch": 3.416997455470738, "grad_norm": 6.444244834600049, "learning_rate": 2.7549834748152636e-06, "loss": 0.106, "step": 167860 }, { "epoch": 3.417201017811705, "grad_norm": 0.01217635203782281, "learning_rate": 2.7543485833948717e-06, "loss": 0.046, "step": 167870 }, { "epoch": 3.4174045801526716, "grad_norm": 0.1809151997641723, "learning_rate": 2.7537137373283017e-06, "loss": 0.025, "step": 167880 }, { "epoch": 3.4176081424936386, "grad_norm": 15.049759733224361, "learning_rate": 2.7530789366283767e-06, "loss": 0.1018, "step": 167890 }, { "epoch": 3.4178117048346057, "grad_norm": 9.655260364699009, "learning_rate": 2.752444181307916e-06, "loss": 0.0202, "step": 167900 }, { "epoch": 3.4180152671755724, "grad_norm": 0.017787735392539965, "learning_rate": 2.7518094713797403e-06, "loss": 0.0293, "step": 167910 }, { "epoch": 3.4182188295165394, "grad_norm": 0.047179630237126645, "learning_rate": 2.751174806856668e-06, "loss": 0.1172, "step": 167920 }, { "epoch": 3.4184223918575065, "grad_norm": 8.322772484565332, "learning_rate": 2.7505401877515164e-06, "loss": 0.0712, "step": 167930 }, { "epoch": 3.418625954198473, "grad_norm": 0.35433029578931, "learning_rate": 2.749905614077103e-06, "loss": 0.0128, "step": 167940 }, { "epoch": 3.4188295165394402, "grad_norm": 0.23896736586915834, "learning_rate": 2.7492710858462466e-06, "loss": 0.0415, "step": 167950 }, { "epoch": 3.4190330788804073, "grad_norm": 0.047534178242296804, "learning_rate": 2.748636603071755e-06, "loss": 0.1382, "step": 167960 }, { "epoch": 3.419236641221374, "grad_norm": 0.05413186948972266, "learning_rate": 2.748002165766449e-06, "loss": 0.0052, "step": 167970 }, { "epoch": 3.419440203562341, "grad_norm": 0.03312189713894853, "learning_rate": 2.7473677739431416e-06, "loss": 0.0091, "step": 167980 }, { "epoch": 3.4196437659033077, "grad_norm": 0.06265949544425613, "learning_rate": 2.7467334276146394e-06, "loss": 0.0597, "step": 167990 }, { "epoch": 3.4198473282442747, "grad_norm": 10.517994345654134, "learning_rate": 2.7460991267937604e-06, "loss": 0.0495, "step": 168000 }, { "epoch": 3.420050890585242, "grad_norm": 9.770288133039472, "learning_rate": 2.745464871493314e-06, "loss": 0.0951, "step": 168010 }, { "epoch": 3.4202544529262084, "grad_norm": 0.02966825101122095, "learning_rate": 2.7448306617261043e-06, "loss": 0.0009, "step": 168020 }, { "epoch": 3.4204580152671755, "grad_norm": 0.009701198649055116, "learning_rate": 2.7441964975049483e-06, "loss": 0.0347, "step": 168030 }, { "epoch": 3.4206615776081426, "grad_norm": 0.028414628285333032, "learning_rate": 2.743562378842648e-06, "loss": 0.0019, "step": 168040 }, { "epoch": 3.4208651399491092, "grad_norm": 0.22108083543039092, "learning_rate": 2.7429283057520082e-06, "loss": 0.0418, "step": 168050 }, { "epoch": 3.4210687022900763, "grad_norm": 0.01810150841580913, "learning_rate": 2.742294278245844e-06, "loss": 0.0581, "step": 168060 }, { "epoch": 3.4212722646310434, "grad_norm": 13.863272245188265, "learning_rate": 2.741660296336952e-06, "loss": 0.0779, "step": 168070 }, { "epoch": 3.42147582697201, "grad_norm": 8.120067674742831, "learning_rate": 2.7410263600381372e-06, "loss": 0.1553, "step": 168080 }, { "epoch": 3.421679389312977, "grad_norm": 25.299098231388694, "learning_rate": 2.740392469362208e-06, "loss": 0.1172, "step": 168090 }, { "epoch": 3.421882951653944, "grad_norm": 2.3621741886122902, "learning_rate": 2.739758624321962e-06, "loss": 0.0594, "step": 168100 }, { "epoch": 3.422086513994911, "grad_norm": 7.155398625725178, "learning_rate": 2.7391248249302014e-06, "loss": 0.0457, "step": 168110 }, { "epoch": 3.422290076335878, "grad_norm": 27.343017043500748, "learning_rate": 2.738491071199727e-06, "loss": 0.0901, "step": 168120 }, { "epoch": 3.422493638676845, "grad_norm": 0.1766095391293902, "learning_rate": 2.737857363143338e-06, "loss": 0.0242, "step": 168130 }, { "epoch": 3.4226972010178116, "grad_norm": 0.005670637231792439, "learning_rate": 2.7372237007738334e-06, "loss": 0.0265, "step": 168140 }, { "epoch": 3.4229007633587787, "grad_norm": 0.07210275713460439, "learning_rate": 2.7365900841040104e-06, "loss": 0.014, "step": 168150 }, { "epoch": 3.4231043256997458, "grad_norm": 0.050945533007292564, "learning_rate": 2.7359565131466683e-06, "loss": 0.0171, "step": 168160 }, { "epoch": 3.4233078880407124, "grad_norm": 59.09196625222876, "learning_rate": 2.735322987914596e-06, "loss": 0.0063, "step": 168170 }, { "epoch": 3.4235114503816795, "grad_norm": 11.491329741661493, "learning_rate": 2.734689508420595e-06, "loss": 0.0633, "step": 168180 }, { "epoch": 3.4237150127226466, "grad_norm": 0.011417084241005085, "learning_rate": 2.734056074677459e-06, "loss": 0.0401, "step": 168190 }, { "epoch": 3.423918575063613, "grad_norm": 1.856741149031918, "learning_rate": 2.7334226866979758e-06, "loss": 0.0085, "step": 168200 }, { "epoch": 3.4241221374045803, "grad_norm": 0.06847905753032031, "learning_rate": 2.7327893444949423e-06, "loss": 0.1258, "step": 168210 }, { "epoch": 3.424325699745547, "grad_norm": 9.922888213792204, "learning_rate": 2.7321560480811497e-06, "loss": 0.0303, "step": 168220 }, { "epoch": 3.424529262086514, "grad_norm": 19.368329776442575, "learning_rate": 2.731522797469383e-06, "loss": 0.0288, "step": 168230 }, { "epoch": 3.424732824427481, "grad_norm": 0.011968456587443584, "learning_rate": 2.7308895926724395e-06, "loss": 0.0625, "step": 168240 }, { "epoch": 3.4249363867684477, "grad_norm": 0.035483467770770924, "learning_rate": 2.730256433703101e-06, "loss": 0.0152, "step": 168250 }, { "epoch": 3.4251399491094148, "grad_norm": 0.02036697004596967, "learning_rate": 2.7296233205741575e-06, "loss": 0.0357, "step": 168260 }, { "epoch": 3.425343511450382, "grad_norm": 0.3628318866867182, "learning_rate": 2.7289902532983946e-06, "loss": 0.1101, "step": 168270 }, { "epoch": 3.4255470737913485, "grad_norm": 0.06891542986445491, "learning_rate": 2.7283572318885985e-06, "loss": 0.0195, "step": 168280 }, { "epoch": 3.4257506361323156, "grad_norm": 0.2635831850574235, "learning_rate": 2.7277242563575547e-06, "loss": 0.0361, "step": 168290 }, { "epoch": 3.425954198473282, "grad_norm": 0.09801374850001518, "learning_rate": 2.727091326718046e-06, "loss": 0.0743, "step": 168300 }, { "epoch": 3.4261577608142493, "grad_norm": 0.7243285756650688, "learning_rate": 2.7264584429828554e-06, "loss": 0.1182, "step": 168310 }, { "epoch": 3.4263613231552164, "grad_norm": 0.03706864881490076, "learning_rate": 2.7258256051647647e-06, "loss": 0.0828, "step": 168320 }, { "epoch": 3.426564885496183, "grad_norm": 5.875089052052347, "learning_rate": 2.725192813276555e-06, "loss": 0.0216, "step": 168330 }, { "epoch": 3.42676844783715, "grad_norm": 16.204489164544277, "learning_rate": 2.7245600673310067e-06, "loss": 0.0177, "step": 168340 }, { "epoch": 3.426972010178117, "grad_norm": 0.1335605869819493, "learning_rate": 2.7239273673408983e-06, "loss": 0.1328, "step": 168350 }, { "epoch": 3.427175572519084, "grad_norm": 1.2306851899701305, "learning_rate": 2.7232947133190087e-06, "loss": 0.0318, "step": 168360 }, { "epoch": 3.427379134860051, "grad_norm": 0.5373131041047672, "learning_rate": 2.7226621052781142e-06, "loss": 0.0041, "step": 168370 }, { "epoch": 3.427582697201018, "grad_norm": 0.004113455557374175, "learning_rate": 2.722029543230994e-06, "loss": 0.0229, "step": 168380 }, { "epoch": 3.4277862595419846, "grad_norm": 53.80519956792342, "learning_rate": 2.7213970271904166e-06, "loss": 0.0754, "step": 168390 }, { "epoch": 3.4279898218829516, "grad_norm": 14.389558987955587, "learning_rate": 2.7207645571691655e-06, "loss": 0.0745, "step": 168400 }, { "epoch": 3.4281933842239187, "grad_norm": 0.05315838357957172, "learning_rate": 2.720132133180008e-06, "loss": 0.0013, "step": 168410 }, { "epoch": 3.4283969465648854, "grad_norm": 1.2053053350940586, "learning_rate": 2.719499755235716e-06, "loss": 0.0867, "step": 168420 }, { "epoch": 3.4286005089058524, "grad_norm": 0.1907054761163801, "learning_rate": 2.7188674233490686e-06, "loss": 0.0723, "step": 168430 }, { "epoch": 3.4288040712468195, "grad_norm": 26.73238982533859, "learning_rate": 2.7182351375328286e-06, "loss": 0.1658, "step": 168440 }, { "epoch": 3.429007633587786, "grad_norm": 0.10442702963409464, "learning_rate": 2.717602897799769e-06, "loss": 0.0658, "step": 168450 }, { "epoch": 3.4292111959287532, "grad_norm": 6.237309543592303, "learning_rate": 2.716970704162659e-06, "loss": 0.0511, "step": 168460 }, { "epoch": 3.4294147582697203, "grad_norm": 0.008684583367739416, "learning_rate": 2.7163385566342666e-06, "loss": 0.0924, "step": 168470 }, { "epoch": 3.429618320610687, "grad_norm": 0.05180950427279234, "learning_rate": 2.7157064552273584e-06, "loss": 0.0082, "step": 168480 }, { "epoch": 3.429821882951654, "grad_norm": 0.16212877044206334, "learning_rate": 2.715074399954699e-06, "loss": 0.001, "step": 168490 }, { "epoch": 3.430025445292621, "grad_norm": 0.10978715617963339, "learning_rate": 2.7144423908290564e-06, "loss": 0.0238, "step": 168500 }, { "epoch": 3.4302290076335877, "grad_norm": 0.05457306358522079, "learning_rate": 2.7138104278631935e-06, "loss": 0.0236, "step": 168510 }, { "epoch": 3.430432569974555, "grad_norm": 0.07074365442310694, "learning_rate": 2.713178511069873e-06, "loss": 0.0378, "step": 168520 }, { "epoch": 3.4306361323155214, "grad_norm": 0.06298686184150123, "learning_rate": 2.71254664046186e-06, "loss": 0.0007, "step": 168530 }, { "epoch": 3.4308396946564885, "grad_norm": 0.00482531444391733, "learning_rate": 2.7119148160519098e-06, "loss": 0.0459, "step": 168540 }, { "epoch": 3.4310432569974556, "grad_norm": 0.17172659044262334, "learning_rate": 2.711283037852789e-06, "loss": 0.1134, "step": 168550 }, { "epoch": 3.4312468193384222, "grad_norm": 0.4441117439911443, "learning_rate": 2.7106513058772572e-06, "loss": 0.1077, "step": 168560 }, { "epoch": 3.4314503816793893, "grad_norm": 0.06461222205782742, "learning_rate": 2.7100196201380667e-06, "loss": 0.0805, "step": 168570 }, { "epoch": 3.4316539440203564, "grad_norm": 17.369576657737802, "learning_rate": 2.7093879806479818e-06, "loss": 0.0483, "step": 168580 }, { "epoch": 3.431857506361323, "grad_norm": 0.05206513888536481, "learning_rate": 2.7087563874197587e-06, "loss": 0.1051, "step": 168590 }, { "epoch": 3.43206106870229, "grad_norm": 0.18137994356224738, "learning_rate": 2.7081248404661475e-06, "loss": 0.0223, "step": 168600 }, { "epoch": 3.432264631043257, "grad_norm": 5.605173535953801, "learning_rate": 2.707493339799911e-06, "loss": 0.0773, "step": 168610 }, { "epoch": 3.432468193384224, "grad_norm": 1.8959457474732366, "learning_rate": 2.706861885433797e-06, "loss": 0.1213, "step": 168620 }, { "epoch": 3.432671755725191, "grad_norm": 0.08684777163429508, "learning_rate": 2.706230477380559e-06, "loss": 0.0245, "step": 168630 }, { "epoch": 3.4328753180661575, "grad_norm": 0.08807799469519349, "learning_rate": 2.7055991156529547e-06, "loss": 0.0434, "step": 168640 }, { "epoch": 3.4330788804071246, "grad_norm": 0.06243661373474301, "learning_rate": 2.70496780026373e-06, "loss": 0.0432, "step": 168650 }, { "epoch": 3.4332824427480917, "grad_norm": 31.573028002663253, "learning_rate": 2.7043365312256366e-06, "loss": 0.0525, "step": 168660 }, { "epoch": 3.4334860050890583, "grad_norm": 8.640196016386291, "learning_rate": 2.703705308551423e-06, "loss": 0.0432, "step": 168670 }, { "epoch": 3.4336895674300254, "grad_norm": 0.01574549715123363, "learning_rate": 2.703074132253839e-06, "loss": 0.0756, "step": 168680 }, { "epoch": 3.4338931297709925, "grad_norm": 0.05054505055131518, "learning_rate": 2.7024430023456306e-06, "loss": 0.0377, "step": 168690 }, { "epoch": 3.434096692111959, "grad_norm": 0.03463927292535615, "learning_rate": 2.701811918839545e-06, "loss": 0.0306, "step": 168700 }, { "epoch": 3.434300254452926, "grad_norm": 0.12714248958474045, "learning_rate": 2.7011808817483287e-06, "loss": 0.0234, "step": 168710 }, { "epoch": 3.4345038167938933, "grad_norm": 0.3142119731295638, "learning_rate": 2.7005498910847245e-06, "loss": 0.0414, "step": 168720 }, { "epoch": 3.43470737913486, "grad_norm": 0.004860235028477415, "learning_rate": 2.699918946861477e-06, "loss": 0.0023, "step": 168730 }, { "epoch": 3.434910941475827, "grad_norm": 4.376332021962602, "learning_rate": 2.6992880490913287e-06, "loss": 0.0779, "step": 168740 }, { "epoch": 3.435114503816794, "grad_norm": 11.609017210636456, "learning_rate": 2.6986571977870225e-06, "loss": 0.0624, "step": 168750 }, { "epoch": 3.4353180661577607, "grad_norm": 0.11163853501582817, "learning_rate": 2.698026392961297e-06, "loss": 0.064, "step": 168760 }, { "epoch": 3.4355216284987278, "grad_norm": 9.432336883915346, "learning_rate": 2.697395634626896e-06, "loss": 0.1094, "step": 168770 }, { "epoch": 3.435725190839695, "grad_norm": 0.1944787327557254, "learning_rate": 2.696764922796552e-06, "loss": 0.0079, "step": 168780 }, { "epoch": 3.4359287531806615, "grad_norm": 42.42461239091033, "learning_rate": 2.696134257483009e-06, "loss": 0.1404, "step": 168790 }, { "epoch": 3.4361323155216286, "grad_norm": 0.12989455435822275, "learning_rate": 2.695503638699004e-06, "loss": 0.0312, "step": 168800 }, { "epoch": 3.4363358778625956, "grad_norm": 3.2403444061920648, "learning_rate": 2.6948730664572688e-06, "loss": 0.0093, "step": 168810 }, { "epoch": 3.4365394402035623, "grad_norm": 0.023508386904024417, "learning_rate": 2.6942425407705413e-06, "loss": 0.0361, "step": 168820 }, { "epoch": 3.4367430025445294, "grad_norm": 0.38542056547922043, "learning_rate": 2.693612061651556e-06, "loss": 0.0054, "step": 168830 }, { "epoch": 3.436946564885496, "grad_norm": 0.15048148581649545, "learning_rate": 2.692981629113045e-06, "loss": 0.0359, "step": 168840 }, { "epoch": 3.437150127226463, "grad_norm": 0.03360427053355171, "learning_rate": 2.692351243167741e-06, "loss": 0.035, "step": 168850 }, { "epoch": 3.43735368956743, "grad_norm": 0.003930214620666746, "learning_rate": 2.691720903828377e-06, "loss": 0.0848, "step": 168860 }, { "epoch": 3.437557251908397, "grad_norm": 0.01932077200475364, "learning_rate": 2.691090611107682e-06, "loss": 0.0271, "step": 168870 }, { "epoch": 3.437760814249364, "grad_norm": 0.05919584292009845, "learning_rate": 2.6904603650183857e-06, "loss": 0.017, "step": 168880 }, { "epoch": 3.437964376590331, "grad_norm": 6.739922929821868, "learning_rate": 2.6898301655732174e-06, "loss": 0.0259, "step": 168890 }, { "epoch": 3.4381679389312976, "grad_norm": 0.03767850043370279, "learning_rate": 2.689200012784904e-06, "loss": 0.0314, "step": 168900 }, { "epoch": 3.4383715012722647, "grad_norm": 0.08082623716939981, "learning_rate": 2.6885699066661726e-06, "loss": 0.0309, "step": 168910 }, { "epoch": 3.4385750636132317, "grad_norm": 14.008396546439128, "learning_rate": 2.687939847229749e-06, "loss": 0.0746, "step": 168920 }, { "epoch": 3.4387786259541984, "grad_norm": 0.006272611467101508, "learning_rate": 2.68730983448836e-06, "loss": 0.1158, "step": 168930 }, { "epoch": 3.4389821882951654, "grad_norm": 0.036138002306946096, "learning_rate": 2.686679868454724e-06, "loss": 0.04, "step": 168940 }, { "epoch": 3.439185750636132, "grad_norm": 0.06673774306029474, "learning_rate": 2.6860499491415693e-06, "loss": 0.1064, "step": 168950 }, { "epoch": 3.439389312977099, "grad_norm": 0.08359989587123748, "learning_rate": 2.685420076561619e-06, "loss": 0.0329, "step": 168960 }, { "epoch": 3.4395928753180662, "grad_norm": 0.016816480145950174, "learning_rate": 2.684790250727587e-06, "loss": 0.0279, "step": 168970 }, { "epoch": 3.439796437659033, "grad_norm": 0.011841461556220406, "learning_rate": 2.6841604716522013e-06, "loss": 0.0219, "step": 168980 }, { "epoch": 3.44, "grad_norm": 0.09648583417548853, "learning_rate": 2.683530739348176e-06, "loss": 0.0976, "step": 168990 }, { "epoch": 3.440203562340967, "grad_norm": 32.657932885889835, "learning_rate": 2.6829010538282285e-06, "loss": 0.0855, "step": 169000 }, { "epoch": 3.4404071246819337, "grad_norm": 0.04279299065274886, "learning_rate": 2.6822714151050832e-06, "loss": 0.0788, "step": 169010 }, { "epoch": 3.4406106870229007, "grad_norm": 0.03197190666005874, "learning_rate": 2.68164182319145e-06, "loss": 0.0553, "step": 169020 }, { "epoch": 3.440814249363868, "grad_norm": 0.025114776730831553, "learning_rate": 2.681012278100044e-06, "loss": 0.001, "step": 169030 }, { "epoch": 3.4410178117048345, "grad_norm": 0.19718414961764277, "learning_rate": 2.680382779843586e-06, "loss": 0.1147, "step": 169040 }, { "epoch": 3.4412213740458015, "grad_norm": 0.014947279975321979, "learning_rate": 2.679753328434783e-06, "loss": 0.0426, "step": 169050 }, { "epoch": 3.4414249363867686, "grad_norm": 0.023600798594756858, "learning_rate": 2.679123923886351e-06, "loss": 0.0235, "step": 169060 }, { "epoch": 3.4416284987277352, "grad_norm": 0.010344801188312554, "learning_rate": 2.6784945662109996e-06, "loss": 0.0125, "step": 169070 }, { "epoch": 3.4418320610687023, "grad_norm": 12.272429412674482, "learning_rate": 2.677865255421441e-06, "loss": 0.042, "step": 169080 }, { "epoch": 3.4420356234096694, "grad_norm": 0.05826159520589766, "learning_rate": 2.6772359915303843e-06, "loss": 0.0007, "step": 169090 }, { "epoch": 3.442239185750636, "grad_norm": 0.07077295348163463, "learning_rate": 2.6766067745505385e-06, "loss": 0.1163, "step": 169100 }, { "epoch": 3.442442748091603, "grad_norm": 0.006191065049900781, "learning_rate": 2.6759776044946113e-06, "loss": 0.0827, "step": 169110 }, { "epoch": 3.44264631043257, "grad_norm": 0.055449400153479655, "learning_rate": 2.6753484813753093e-06, "loss": 0.0113, "step": 169120 }, { "epoch": 3.442849872773537, "grad_norm": 48.11647826580076, "learning_rate": 2.6747194052053394e-06, "loss": 0.0536, "step": 169130 }, { "epoch": 3.443053435114504, "grad_norm": 0.026257111369960815, "learning_rate": 2.674090375997408e-06, "loss": 0.0034, "step": 169140 }, { "epoch": 3.443256997455471, "grad_norm": 0.013342030380028758, "learning_rate": 2.673461393764213e-06, "loss": 0.0451, "step": 169150 }, { "epoch": 3.4434605597964376, "grad_norm": 0.04829227555702809, "learning_rate": 2.672832458518464e-06, "loss": 0.131, "step": 169160 }, { "epoch": 3.4436641221374047, "grad_norm": 0.38271907391640286, "learning_rate": 2.672203570272863e-06, "loss": 0.1099, "step": 169170 }, { "epoch": 3.4438676844783713, "grad_norm": 4.117949307314113, "learning_rate": 2.6715747290401046e-06, "loss": 0.0752, "step": 169180 }, { "epoch": 3.4440712468193384, "grad_norm": 0.019643488678860222, "learning_rate": 2.6709459348328973e-06, "loss": 0.0143, "step": 169190 }, { "epoch": 3.4442748091603055, "grad_norm": 87.65321760890576, "learning_rate": 2.6703171876639356e-06, "loss": 0.1004, "step": 169200 }, { "epoch": 3.444478371501272, "grad_norm": 0.01058899544855408, "learning_rate": 2.6696884875459185e-06, "loss": 0.1219, "step": 169210 }, { "epoch": 3.444681933842239, "grad_norm": 25.381981447607565, "learning_rate": 2.669059834491543e-06, "loss": 0.0463, "step": 169220 }, { "epoch": 3.4448854961832063, "grad_norm": 0.03526737035805706, "learning_rate": 2.6684312285135073e-06, "loss": 0.0388, "step": 169230 }, { "epoch": 3.445089058524173, "grad_norm": 0.039736267670677075, "learning_rate": 2.667802669624506e-06, "loss": 0.0511, "step": 169240 }, { "epoch": 3.44529262086514, "grad_norm": 0.03482094741143461, "learning_rate": 2.6671741578372334e-06, "loss": 0.0285, "step": 169250 }, { "epoch": 3.4454961832061066, "grad_norm": 6.629429713027844, "learning_rate": 2.666545693164383e-06, "loss": 0.0509, "step": 169260 }, { "epoch": 3.4456997455470737, "grad_norm": 0.028196688193803172, "learning_rate": 2.665917275618648e-06, "loss": 0.0644, "step": 169270 }, { "epoch": 3.445903307888041, "grad_norm": 0.06559372195780802, "learning_rate": 2.66528890521272e-06, "loss": 0.0018, "step": 169280 }, { "epoch": 3.4461068702290074, "grad_norm": 13.260573361119352, "learning_rate": 2.6646605819592895e-06, "loss": 0.1061, "step": 169290 }, { "epoch": 3.4463104325699745, "grad_norm": 0.009760116266560639, "learning_rate": 2.664032305871047e-06, "loss": 0.1273, "step": 169300 }, { "epoch": 3.4465139949109416, "grad_norm": 0.0011977427767526322, "learning_rate": 2.663404076960681e-06, "loss": 0.0501, "step": 169310 }, { "epoch": 3.446717557251908, "grad_norm": 11.77080528790359, "learning_rate": 2.6627758952408776e-06, "loss": 0.0421, "step": 169320 }, { "epoch": 3.4469211195928753, "grad_norm": 0.11383509242127098, "learning_rate": 2.662147760724326e-06, "loss": 0.0114, "step": 169330 }, { "epoch": 3.4471246819338424, "grad_norm": 0.026567103706987782, "learning_rate": 2.6615196734237125e-06, "loss": 0.1411, "step": 169340 }, { "epoch": 3.447328244274809, "grad_norm": 11.592763486395777, "learning_rate": 2.6608916333517222e-06, "loss": 0.0439, "step": 169350 }, { "epoch": 3.447531806615776, "grad_norm": 11.492199371098144, "learning_rate": 2.6602636405210362e-06, "loss": 0.0377, "step": 169360 }, { "epoch": 3.447735368956743, "grad_norm": 0.01651214404621273, "learning_rate": 2.659635694944338e-06, "loss": 0.0148, "step": 169370 }, { "epoch": 3.44793893129771, "grad_norm": 9.578361913314708, "learning_rate": 2.659007796634315e-06, "loss": 0.0382, "step": 169380 }, { "epoch": 3.448142493638677, "grad_norm": 0.2085314432516412, "learning_rate": 2.6583799456036425e-06, "loss": 0.1442, "step": 169390 }, { "epoch": 3.448346055979644, "grad_norm": 0.1059565602859791, "learning_rate": 2.6577521418650008e-06, "loss": 0.0813, "step": 169400 }, { "epoch": 3.4485496183206106, "grad_norm": 0.028743781072528996, "learning_rate": 2.6571243854310756e-06, "loss": 0.0692, "step": 169410 }, { "epoch": 3.4487531806615777, "grad_norm": 0.03882256850553328, "learning_rate": 2.6564966763145394e-06, "loss": 0.0098, "step": 169420 }, { "epoch": 3.4489567430025447, "grad_norm": 4.855193114179691, "learning_rate": 2.655869014528071e-06, "loss": 0.0182, "step": 169430 }, { "epoch": 3.4491603053435114, "grad_norm": 0.036191458741088596, "learning_rate": 2.6552414000843474e-06, "loss": 0.0049, "step": 169440 }, { "epoch": 3.4493638676844784, "grad_norm": 2.5191969412877153, "learning_rate": 2.6546138329960436e-06, "loss": 0.0539, "step": 169450 }, { "epoch": 3.4495674300254455, "grad_norm": 0.006318739695817401, "learning_rate": 2.653986313275834e-06, "loss": 0.0481, "step": 169460 }, { "epoch": 3.449770992366412, "grad_norm": 0.13162235924352028, "learning_rate": 2.6533588409363924e-06, "loss": 0.0011, "step": 169470 }, { "epoch": 3.4499745547073792, "grad_norm": 8.527849022187512, "learning_rate": 2.6527314159903917e-06, "loss": 0.0815, "step": 169480 }, { "epoch": 3.450178117048346, "grad_norm": 0.14224649946067658, "learning_rate": 2.652104038450504e-06, "loss": 0.0475, "step": 169490 }, { "epoch": 3.450381679389313, "grad_norm": 0.08374519791062367, "learning_rate": 2.651476708329398e-06, "loss": 0.0007, "step": 169500 }, { "epoch": 3.45058524173028, "grad_norm": 0.14330959296104417, "learning_rate": 2.650849425639747e-06, "loss": 0.0402, "step": 169510 }, { "epoch": 3.4507888040712467, "grad_norm": 0.020503225862955898, "learning_rate": 2.6502221903942137e-06, "loss": 0.0212, "step": 169520 }, { "epoch": 3.4509923664122137, "grad_norm": 0.146403092283275, "learning_rate": 2.649595002605472e-06, "loss": 0.0058, "step": 169530 }, { "epoch": 3.451195928753181, "grad_norm": 6.932432824867717, "learning_rate": 2.6489678622861888e-06, "loss": 0.0409, "step": 169540 }, { "epoch": 3.4513994910941475, "grad_norm": 0.027269769604578237, "learning_rate": 2.6483407694490233e-06, "loss": 0.1312, "step": 169550 }, { "epoch": 3.4516030534351145, "grad_norm": 0.32627781418744967, "learning_rate": 2.6477137241066485e-06, "loss": 0.0043, "step": 169560 }, { "epoch": 3.4518066157760816, "grad_norm": 0.1455307330162158, "learning_rate": 2.647086726271724e-06, "loss": 0.0378, "step": 169570 }, { "epoch": 3.4520101781170482, "grad_norm": 0.0037137126931682126, "learning_rate": 2.64645977595691e-06, "loss": 0.0434, "step": 169580 }, { "epoch": 3.4522137404580153, "grad_norm": 0.06074945793822739, "learning_rate": 2.6458328731748774e-06, "loss": 0.0048, "step": 169590 }, { "epoch": 3.452417302798982, "grad_norm": 0.03973600344026422, "learning_rate": 2.6452060179382794e-06, "loss": 0.1041, "step": 169600 }, { "epoch": 3.452620865139949, "grad_norm": 0.07557555812270893, "learning_rate": 2.6445792102597768e-06, "loss": 0.1358, "step": 169610 }, { "epoch": 3.452824427480916, "grad_norm": 0.008777270136300627, "learning_rate": 2.6439524501520353e-06, "loss": 0.0378, "step": 169620 }, { "epoch": 3.4530279898218827, "grad_norm": 0.468423570427775, "learning_rate": 2.6433257376277067e-06, "loss": 0.072, "step": 169630 }, { "epoch": 3.45323155216285, "grad_norm": 0.012946223599523547, "learning_rate": 2.6426990726994495e-06, "loss": 0.1457, "step": 169640 }, { "epoch": 3.453435114503817, "grad_norm": 39.08284030159036, "learning_rate": 2.642072455379921e-06, "loss": 0.0207, "step": 169650 }, { "epoch": 3.4536386768447835, "grad_norm": 0.11131360869025038, "learning_rate": 2.6414458856817765e-06, "loss": 0.0689, "step": 169660 }, { "epoch": 3.4538422391857506, "grad_norm": 0.027712390242530454, "learning_rate": 2.64081936361767e-06, "loss": 0.0314, "step": 169670 }, { "epoch": 3.4540458015267177, "grad_norm": 0.04408046266762237, "learning_rate": 2.6401928892002548e-06, "loss": 0.0375, "step": 169680 }, { "epoch": 3.4542493638676843, "grad_norm": 20.700928310252095, "learning_rate": 2.639566462442184e-06, "loss": 0.015, "step": 169690 }, { "epoch": 3.4544529262086514, "grad_norm": 0.003749181485464777, "learning_rate": 2.6389400833561084e-06, "loss": 0.023, "step": 169700 }, { "epoch": 3.4546564885496185, "grad_norm": 0.11157609204560082, "learning_rate": 2.6383137519546788e-06, "loss": 0.0964, "step": 169710 }, { "epoch": 3.454860050890585, "grad_norm": 8.235715817197914, "learning_rate": 2.6376874682505473e-06, "loss": 0.1194, "step": 169720 }, { "epoch": 3.455063613231552, "grad_norm": 17.299583970926, "learning_rate": 2.637061232256356e-06, "loss": 0.0742, "step": 169730 }, { "epoch": 3.4552671755725193, "grad_norm": 1.1671778352877489, "learning_rate": 2.636435043984758e-06, "loss": 0.1232, "step": 169740 }, { "epoch": 3.455470737913486, "grad_norm": 0.08651199133031634, "learning_rate": 2.6358089034484002e-06, "loss": 0.0007, "step": 169750 }, { "epoch": 3.455674300254453, "grad_norm": 0.036266569969385404, "learning_rate": 2.6351828106599237e-06, "loss": 0.1114, "step": 169760 }, { "epoch": 3.45587786259542, "grad_norm": 0.035140332694697955, "learning_rate": 2.634556765631978e-06, "loss": 0.0606, "step": 169770 }, { "epoch": 3.4560814249363867, "grad_norm": 16.344749202461593, "learning_rate": 2.6339307683772063e-06, "loss": 0.1295, "step": 169780 }, { "epoch": 3.456284987277354, "grad_norm": 0.1152877352235135, "learning_rate": 2.633304818908249e-06, "loss": 0.038, "step": 169790 }, { "epoch": 3.456488549618321, "grad_norm": 0.16894307998391192, "learning_rate": 2.632678917237749e-06, "loss": 0.0017, "step": 169800 }, { "epoch": 3.4566921119592875, "grad_norm": 7.703829368128883, "learning_rate": 2.632053063378347e-06, "loss": 0.1836, "step": 169810 }, { "epoch": 3.4568956743002546, "grad_norm": 0.09469585866325192, "learning_rate": 2.6314272573426837e-06, "loss": 0.0137, "step": 169820 }, { "epoch": 3.457099236641221, "grad_norm": 0.06695446108345394, "learning_rate": 2.630801499143397e-06, "loss": 0.0115, "step": 169830 }, { "epoch": 3.4573027989821883, "grad_norm": 0.09858092451049608, "learning_rate": 2.6301757887931266e-06, "loss": 0.1794, "step": 169840 }, { "epoch": 3.4575063613231554, "grad_norm": 1.6236822540091642, "learning_rate": 2.6295501263045074e-06, "loss": 0.0018, "step": 169850 }, { "epoch": 3.457709923664122, "grad_norm": 0.1566523542031435, "learning_rate": 2.6289245116901773e-06, "loss": 0.1047, "step": 169860 }, { "epoch": 3.457913486005089, "grad_norm": 0.05414209280158449, "learning_rate": 2.6282989449627706e-06, "loss": 0.0409, "step": 169870 }, { "epoch": 3.458117048346056, "grad_norm": 0.04770707239444868, "learning_rate": 2.6276734261349207e-06, "loss": 0.0084, "step": 169880 }, { "epoch": 3.458320610687023, "grad_norm": 0.13494520072276395, "learning_rate": 2.6270479552192617e-06, "loss": 0.0285, "step": 169890 }, { "epoch": 3.45852417302799, "grad_norm": 0.5030815585599185, "learning_rate": 2.6264225322284265e-06, "loss": 0.0362, "step": 169900 }, { "epoch": 3.4587277353689565, "grad_norm": 0.09986405311947641, "learning_rate": 2.6257971571750462e-06, "loss": 0.0722, "step": 169910 }, { "epoch": 3.4589312977099236, "grad_norm": 0.048836113439791246, "learning_rate": 2.6251718300717466e-06, "loss": 0.0018, "step": 169920 }, { "epoch": 3.4591348600508907, "grad_norm": 0.19074821694114194, "learning_rate": 2.624546550931165e-06, "loss": 0.0632, "step": 169930 }, { "epoch": 3.4593384223918573, "grad_norm": 3.341424837200648, "learning_rate": 2.623921319765923e-06, "loss": 0.0428, "step": 169940 }, { "epoch": 3.4595419847328244, "grad_norm": 0.023606814079395903, "learning_rate": 2.623296136588649e-06, "loss": 0.0674, "step": 169950 }, { "epoch": 3.4597455470737914, "grad_norm": 0.04830462109061235, "learning_rate": 2.622671001411974e-06, "loss": 0.0869, "step": 169960 }, { "epoch": 3.459949109414758, "grad_norm": 0.5728067999974824, "learning_rate": 2.6220459142485177e-06, "loss": 0.0577, "step": 169970 }, { "epoch": 3.460152671755725, "grad_norm": 9.27089932591886, "learning_rate": 2.621420875110906e-06, "loss": 0.0732, "step": 169980 }, { "epoch": 3.4603562340966922, "grad_norm": 0.32282039803131524, "learning_rate": 2.6207958840117664e-06, "loss": 0.0143, "step": 169990 }, { "epoch": 3.460559796437659, "grad_norm": 7.1963997684002345, "learning_rate": 2.620170940963717e-06, "loss": 0.2289, "step": 170000 }, { "epoch": 3.460763358778626, "grad_norm": 0.6913021400416596, "learning_rate": 2.6195460459793807e-06, "loss": 0.0369, "step": 170010 }, { "epoch": 3.460966921119593, "grad_norm": 9.718285164207122, "learning_rate": 2.6189211990713777e-06, "loss": 0.0364, "step": 170020 }, { "epoch": 3.4611704834605597, "grad_norm": 8.076805001660206, "learning_rate": 2.618296400252328e-06, "loss": 0.0685, "step": 170030 }, { "epoch": 3.4613740458015267, "grad_norm": 3.606580898660319, "learning_rate": 2.61767164953485e-06, "loss": 0.0056, "step": 170040 }, { "epoch": 3.461577608142494, "grad_norm": 0.16237263992423784, "learning_rate": 2.6170469469315613e-06, "loss": 0.0661, "step": 170050 }, { "epoch": 3.4617811704834605, "grad_norm": 0.04366867510480071, "learning_rate": 2.6164222924550786e-06, "loss": 0.024, "step": 170060 }, { "epoch": 3.4619847328244275, "grad_norm": 198.02885015066818, "learning_rate": 2.615797686118018e-06, "loss": 0.0217, "step": 170070 }, { "epoch": 3.4621882951653946, "grad_norm": 0.009732809226645576, "learning_rate": 2.615173127932994e-06, "loss": 0.0649, "step": 170080 }, { "epoch": 3.4623918575063612, "grad_norm": 0.06417818761242816, "learning_rate": 2.6145486179126223e-06, "loss": 0.0838, "step": 170090 }, { "epoch": 3.4625954198473283, "grad_norm": 0.05849899240340678, "learning_rate": 2.6139241560695093e-06, "loss": 0.0899, "step": 170100 }, { "epoch": 3.4627989821882954, "grad_norm": 0.06203913195448224, "learning_rate": 2.613299742416274e-06, "loss": 0.0441, "step": 170110 }, { "epoch": 3.463002544529262, "grad_norm": 11.860248516307395, "learning_rate": 2.612675376965526e-06, "loss": 0.153, "step": 170120 }, { "epoch": 3.463206106870229, "grad_norm": 0.010120232840240377, "learning_rate": 2.6120510597298696e-06, "loss": 0.0297, "step": 170130 }, { "epoch": 3.4634096692111958, "grad_norm": 0.0793224046436147, "learning_rate": 2.61142679072192e-06, "loss": 0.0098, "step": 170140 }, { "epoch": 3.463613231552163, "grad_norm": 4.6338341488119706e-05, "learning_rate": 2.6108025699542842e-06, "loss": 0.0393, "step": 170150 }, { "epoch": 3.46381679389313, "grad_norm": 0.07863270175807578, "learning_rate": 2.6101783974395647e-06, "loss": 0.0519, "step": 170160 }, { "epoch": 3.4640203562340965, "grad_norm": 33.191716429486924, "learning_rate": 2.609554273190373e-06, "loss": 0.0361, "step": 170170 }, { "epoch": 3.4642239185750636, "grad_norm": 22.351425789689934, "learning_rate": 2.6089301972193115e-06, "loss": 0.0564, "step": 170180 }, { "epoch": 3.4644274809160307, "grad_norm": 0.7285957189758864, "learning_rate": 2.6083061695389835e-06, "loss": 0.0256, "step": 170190 }, { "epoch": 3.4646310432569973, "grad_norm": 0.03403665324778073, "learning_rate": 2.6076821901619925e-06, "loss": 0.0374, "step": 170200 }, { "epoch": 3.4648346055979644, "grad_norm": 0.046055162864489414, "learning_rate": 2.6070582591009412e-06, "loss": 0.0692, "step": 170210 }, { "epoch": 3.4650381679389315, "grad_norm": 12.570934815759221, "learning_rate": 2.606434376368431e-06, "loss": 0.021, "step": 170220 }, { "epoch": 3.465241730279898, "grad_norm": 0.7087246108848421, "learning_rate": 2.6058105419770607e-06, "loss": 0.0025, "step": 170230 }, { "epoch": 3.465445292620865, "grad_norm": 0.03925263502631006, "learning_rate": 2.6051867559394307e-06, "loss": 0.0315, "step": 170240 }, { "epoch": 3.465648854961832, "grad_norm": 9.026441417829892, "learning_rate": 2.604563018268138e-06, "loss": 0.1576, "step": 170250 }, { "epoch": 3.465852417302799, "grad_norm": 0.03742408785685005, "learning_rate": 2.603939328975781e-06, "loss": 0.149, "step": 170260 }, { "epoch": 3.466055979643766, "grad_norm": 21.56036870331421, "learning_rate": 2.6033156880749555e-06, "loss": 0.0433, "step": 170270 }, { "epoch": 3.4662595419847326, "grad_norm": 0.09737947143949072, "learning_rate": 2.6026920955782566e-06, "loss": 0.0627, "step": 170280 }, { "epoch": 3.4664631043256997, "grad_norm": 0.20767521948992343, "learning_rate": 2.602068551498278e-06, "loss": 0.0263, "step": 170290 }, { "epoch": 3.466666666666667, "grad_norm": 0.06909464635447832, "learning_rate": 2.6014450558476155e-06, "loss": 0.0021, "step": 170300 }, { "epoch": 3.4668702290076334, "grad_norm": 9.360765478778497, "learning_rate": 2.600821608638856e-06, "loss": 0.0446, "step": 170310 }, { "epoch": 3.4670737913486005, "grad_norm": 4.756058082496514, "learning_rate": 2.6001982098845958e-06, "loss": 0.0142, "step": 170320 }, { "epoch": 3.4672773536895676, "grad_norm": 0.19569508361747373, "learning_rate": 2.599574859597426e-06, "loss": 0.0069, "step": 170330 }, { "epoch": 3.467480916030534, "grad_norm": 0.010334871241259366, "learning_rate": 2.598951557789931e-06, "loss": 0.0331, "step": 170340 }, { "epoch": 3.4676844783715013, "grad_norm": 0.04880692275018271, "learning_rate": 2.5983283044747005e-06, "loss": 0.0649, "step": 170350 }, { "epoch": 3.4678880407124684, "grad_norm": 0.025833389773582943, "learning_rate": 2.597705099664327e-06, "loss": 0.0807, "step": 170360 }, { "epoch": 3.468091603053435, "grad_norm": 15.927520302037536, "learning_rate": 2.5970819433713913e-06, "loss": 0.0789, "step": 170370 }, { "epoch": 3.468295165394402, "grad_norm": 0.007757048092450104, "learning_rate": 2.5964588356084804e-06, "loss": 0.0482, "step": 170380 }, { "epoch": 3.468498727735369, "grad_norm": 0.05005353777599029, "learning_rate": 2.595835776388179e-06, "loss": 0.0301, "step": 170390 }, { "epoch": 3.468702290076336, "grad_norm": 11.628113867120298, "learning_rate": 2.5952127657230707e-06, "loss": 0.0799, "step": 170400 }, { "epoch": 3.468905852417303, "grad_norm": 63.10815591797031, "learning_rate": 2.5945898036257376e-06, "loss": 0.0802, "step": 170410 }, { "epoch": 3.46910941475827, "grad_norm": 0.05153853107048158, "learning_rate": 2.5939668901087624e-06, "loss": 0.1293, "step": 170420 }, { "epoch": 3.4693129770992366, "grad_norm": 0.028663709618380433, "learning_rate": 2.593344025184724e-06, "loss": 0.011, "step": 170430 }, { "epoch": 3.4695165394402037, "grad_norm": 0.11459375734163002, "learning_rate": 2.5927212088662035e-06, "loss": 0.0408, "step": 170440 }, { "epoch": 3.4697201017811703, "grad_norm": 0.016896313583676367, "learning_rate": 2.5920984411657786e-06, "loss": 0.0284, "step": 170450 }, { "epoch": 3.4699236641221374, "grad_norm": 0.02515032230395572, "learning_rate": 2.5914757220960285e-06, "loss": 0.069, "step": 170460 }, { "epoch": 3.4701272264631045, "grad_norm": 11.852510033744986, "learning_rate": 2.590853051669525e-06, "loss": 0.0486, "step": 170470 }, { "epoch": 3.470330788804071, "grad_norm": 0.042420315392962156, "learning_rate": 2.590230429898849e-06, "loss": 0.0349, "step": 170480 }, { "epoch": 3.470534351145038, "grad_norm": 0.7729611713293527, "learning_rate": 2.589607856796574e-06, "loss": 0.0014, "step": 170490 }, { "epoch": 3.4707379134860052, "grad_norm": 0.8550758292127674, "learning_rate": 2.58898533237527e-06, "loss": 0.1444, "step": 170500 }, { "epoch": 3.470941475826972, "grad_norm": 0.030684953910073243, "learning_rate": 2.5883628566475148e-06, "loss": 0.0008, "step": 170510 }, { "epoch": 3.471145038167939, "grad_norm": 0.008705995329755311, "learning_rate": 2.5877404296258797e-06, "loss": 0.1111, "step": 170520 }, { "epoch": 3.471348600508906, "grad_norm": 0.30084142732086083, "learning_rate": 2.5871180513229293e-06, "loss": 0.0006, "step": 170530 }, { "epoch": 3.4715521628498727, "grad_norm": 0.017240866391771945, "learning_rate": 2.586495721751242e-06, "loss": 0.1201, "step": 170540 }, { "epoch": 3.4717557251908397, "grad_norm": 0.023273998032070715, "learning_rate": 2.5858734409233797e-06, "loss": 0.0128, "step": 170550 }, { "epoch": 3.4719592875318064, "grad_norm": 0.09195410390761548, "learning_rate": 2.585251208851911e-06, "loss": 0.035, "step": 170560 }, { "epoch": 3.4721628498727735, "grad_norm": 0.017330407832932705, "learning_rate": 2.584629025549408e-06, "loss": 0.0294, "step": 170570 }, { "epoch": 3.4723664122137405, "grad_norm": 0.12442570273110543, "learning_rate": 2.584006891028431e-06, "loss": 0.0409, "step": 170580 }, { "epoch": 3.472569974554707, "grad_norm": 12.672241840526915, "learning_rate": 2.5833848053015466e-06, "loss": 0.0665, "step": 170590 }, { "epoch": 3.4727735368956743, "grad_norm": 0.26438139103129016, "learning_rate": 2.5827627683813183e-06, "loss": 0.0515, "step": 170600 }, { "epoch": 3.4729770992366413, "grad_norm": 0.061970664738297807, "learning_rate": 2.58214078028031e-06, "loss": 0.0791, "step": 170610 }, { "epoch": 3.473180661577608, "grad_norm": 0.024160476277706813, "learning_rate": 2.5815188410110824e-06, "loss": 0.0077, "step": 170620 }, { "epoch": 3.473384223918575, "grad_norm": 0.06084172854129052, "learning_rate": 2.580896950586197e-06, "loss": 0.0616, "step": 170630 }, { "epoch": 3.473587786259542, "grad_norm": 0.00860345629639455, "learning_rate": 2.5802751090182132e-06, "loss": 0.1068, "step": 170640 }, { "epoch": 3.4737913486005088, "grad_norm": 0.025065788475776955, "learning_rate": 2.5796533163196903e-06, "loss": 0.0272, "step": 170650 }, { "epoch": 3.473994910941476, "grad_norm": 8.65870617803724, "learning_rate": 2.5790315725031854e-06, "loss": 0.0788, "step": 170660 }, { "epoch": 3.474198473282443, "grad_norm": 12.179491427537757, "learning_rate": 2.5784098775812585e-06, "loss": 0.0625, "step": 170670 }, { "epoch": 3.4744020356234095, "grad_norm": 24.273175528081715, "learning_rate": 2.577788231566459e-06, "loss": 0.1073, "step": 170680 }, { "epoch": 3.4746055979643766, "grad_norm": 23.095783631219113, "learning_rate": 2.577166634471349e-06, "loss": 0.14, "step": 170690 }, { "epoch": 3.4748091603053437, "grad_norm": 0.21395963727360376, "learning_rate": 2.57654508630848e-06, "loss": 0.0732, "step": 170700 }, { "epoch": 3.4750127226463103, "grad_norm": 0.1734925467293572, "learning_rate": 2.5759235870904007e-06, "loss": 0.0205, "step": 170710 }, { "epoch": 3.4752162849872774, "grad_norm": 0.2667907430018616, "learning_rate": 2.5753021368296695e-06, "loss": 0.0266, "step": 170720 }, { "epoch": 3.4754198473282445, "grad_norm": 0.068132331858666, "learning_rate": 2.574680735538836e-06, "loss": 0.0694, "step": 170730 }, { "epoch": 3.475623409669211, "grad_norm": 0.038948420722745576, "learning_rate": 2.5740593832304474e-06, "loss": 0.0437, "step": 170740 }, { "epoch": 3.475826972010178, "grad_norm": 0.16504932532360533, "learning_rate": 2.5734380799170533e-06, "loss": 0.0396, "step": 170750 }, { "epoch": 3.4760305343511453, "grad_norm": 6.024673345230689, "learning_rate": 2.572816825611203e-06, "loss": 0.0736, "step": 170760 }, { "epoch": 3.476234096692112, "grad_norm": 0.026897004568698157, "learning_rate": 2.5721956203254435e-06, "loss": 0.0518, "step": 170770 }, { "epoch": 3.476437659033079, "grad_norm": 0.009751459517196774, "learning_rate": 2.5715744640723194e-06, "loss": 0.042, "step": 170780 }, { "epoch": 3.4766412213740456, "grad_norm": 6.920407525426282, "learning_rate": 2.5709533568643775e-06, "loss": 0.065, "step": 170790 }, { "epoch": 3.4768447837150127, "grad_norm": 15.344348088025153, "learning_rate": 2.570332298714161e-06, "loss": 0.1208, "step": 170800 }, { "epoch": 3.47704834605598, "grad_norm": 0.05466053308269576, "learning_rate": 2.5697112896342137e-06, "loss": 0.0433, "step": 170810 }, { "epoch": 3.4772519083969464, "grad_norm": 0.11833027503106484, "learning_rate": 2.569090329637076e-06, "loss": 0.0989, "step": 170820 }, { "epoch": 3.4774554707379135, "grad_norm": 0.10982979525573221, "learning_rate": 2.568469418735291e-06, "loss": 0.1082, "step": 170830 }, { "epoch": 3.4776590330788806, "grad_norm": 9.39491706580676, "learning_rate": 2.5678485569413972e-06, "loss": 0.0253, "step": 170840 }, { "epoch": 3.477862595419847, "grad_norm": 0.09768162482784272, "learning_rate": 2.567227744267935e-06, "loss": 0.0535, "step": 170850 }, { "epoch": 3.4780661577608143, "grad_norm": 25.936126493653546, "learning_rate": 2.5666069807274417e-06, "loss": 0.0047, "step": 170860 }, { "epoch": 3.478269720101781, "grad_norm": 0.320976857903231, "learning_rate": 2.565986266332454e-06, "loss": 0.0044, "step": 170870 }, { "epoch": 3.478473282442748, "grad_norm": 0.04410339615250318, "learning_rate": 2.56536560109551e-06, "loss": 0.1147, "step": 170880 }, { "epoch": 3.478676844783715, "grad_norm": 4.342397809779811, "learning_rate": 2.564744985029144e-06, "loss": 0.0603, "step": 170890 }, { "epoch": 3.4788804071246817, "grad_norm": 22.988005596570463, "learning_rate": 2.564124418145886e-06, "loss": 0.0098, "step": 170900 }, { "epoch": 3.479083969465649, "grad_norm": 0.024398787058208905, "learning_rate": 2.5635039004582773e-06, "loss": 0.0043, "step": 170910 }, { "epoch": 3.479287531806616, "grad_norm": 0.02568390893449333, "learning_rate": 2.5628834319788433e-06, "loss": 0.0218, "step": 170920 }, { "epoch": 3.4794910941475825, "grad_norm": 0.15224599008924194, "learning_rate": 2.5622630127201153e-06, "loss": 0.0515, "step": 170930 }, { "epoch": 3.4796946564885496, "grad_norm": 22.32515189768491, "learning_rate": 2.5616426426946293e-06, "loss": 0.1045, "step": 170940 }, { "epoch": 3.4798982188295167, "grad_norm": 0.07090523199074278, "learning_rate": 2.5610223219149088e-06, "loss": 0.0124, "step": 170950 }, { "epoch": 3.4801017811704833, "grad_norm": 11.506556299066194, "learning_rate": 2.5604020503934846e-06, "loss": 0.0628, "step": 170960 }, { "epoch": 3.4803053435114504, "grad_norm": 0.08110675099569682, "learning_rate": 2.559781828142882e-06, "loss": 0.053, "step": 170970 }, { "epoch": 3.4805089058524175, "grad_norm": 2.6051438389280626, "learning_rate": 2.5591616551756292e-06, "loss": 0.0364, "step": 170980 }, { "epoch": 3.480712468193384, "grad_norm": 0.20211769852890135, "learning_rate": 2.5585415315042507e-06, "loss": 0.1033, "step": 170990 }, { "epoch": 3.480916030534351, "grad_norm": 0.24941130311761767, "learning_rate": 2.5579214571412698e-06, "loss": 0.0171, "step": 171000 }, { "epoch": 3.4811195928753182, "grad_norm": 0.027783185510625485, "learning_rate": 2.5573014320992106e-06, "loss": 0.0348, "step": 171010 }, { "epoch": 3.481323155216285, "grad_norm": 0.0001287174553514314, "learning_rate": 2.5566814563905958e-06, "loss": 0.0003, "step": 171020 }, { "epoch": 3.481526717557252, "grad_norm": 0.025500431012697933, "learning_rate": 2.5560615300279453e-06, "loss": 0.002, "step": 171030 }, { "epoch": 3.481730279898219, "grad_norm": 0.004550606562034779, "learning_rate": 2.555441653023782e-06, "loss": 0.0195, "step": 171040 }, { "epoch": 3.4819338422391857, "grad_norm": 0.02613946879183717, "learning_rate": 2.5548218253906194e-06, "loss": 0.081, "step": 171050 }, { "epoch": 3.4821374045801528, "grad_norm": 0.021621906140627808, "learning_rate": 2.5542020471409814e-06, "loss": 0.0479, "step": 171060 }, { "epoch": 3.48234096692112, "grad_norm": 0.007122469170837001, "learning_rate": 2.5535823182873847e-06, "loss": 0.047, "step": 171070 }, { "epoch": 3.4825445292620865, "grad_norm": 0.07930979338738141, "learning_rate": 2.5529626388423405e-06, "loss": 0.0726, "step": 171080 }, { "epoch": 3.4827480916030535, "grad_norm": 7.630943296505998, "learning_rate": 2.5523430088183696e-06, "loss": 0.1499, "step": 171090 }, { "epoch": 3.48295165394402, "grad_norm": 0.014779273909555636, "learning_rate": 2.5517234282279856e-06, "loss": 0.0119, "step": 171100 }, { "epoch": 3.4831552162849873, "grad_norm": 17.22513288245309, "learning_rate": 2.5511038970836975e-06, "loss": 0.1191, "step": 171110 }, { "epoch": 3.4833587786259543, "grad_norm": 0.08625693466277927, "learning_rate": 2.5504844153980236e-06, "loss": 0.0476, "step": 171120 }, { "epoch": 3.483562340966921, "grad_norm": 37.525291618647536, "learning_rate": 2.54986498318347e-06, "loss": 0.0234, "step": 171130 }, { "epoch": 3.483765903307888, "grad_norm": 0.027800843729121893, "learning_rate": 2.5492456004525467e-06, "loss": 0.0074, "step": 171140 }, { "epoch": 3.483969465648855, "grad_norm": 0.1596600885988196, "learning_rate": 2.5486262672177685e-06, "loss": 0.0472, "step": 171150 }, { "epoch": 3.4841730279898218, "grad_norm": 0.004777469455305542, "learning_rate": 2.5480069834916387e-06, "loss": 0.0189, "step": 171160 }, { "epoch": 3.484376590330789, "grad_norm": 13.18493858664342, "learning_rate": 2.5473877492866667e-06, "loss": 0.0378, "step": 171170 }, { "epoch": 3.484580152671756, "grad_norm": 0.1371960779495105, "learning_rate": 2.5467685646153574e-06, "loss": 0.0604, "step": 171180 }, { "epoch": 3.4847837150127225, "grad_norm": 0.5248342044383887, "learning_rate": 2.5461494294902166e-06, "loss": 0.002, "step": 171190 }, { "epoch": 3.4849872773536896, "grad_norm": 0.09806575124334652, "learning_rate": 2.545530343923749e-06, "loss": 0.0668, "step": 171200 }, { "epoch": 3.4851908396946563, "grad_norm": 0.17106699240776432, "learning_rate": 2.544911307928457e-06, "loss": 0.057, "step": 171210 }, { "epoch": 3.4853944020356233, "grad_norm": 0.0340338756231802, "learning_rate": 2.5442923215168438e-06, "loss": 0.094, "step": 171220 }, { "epoch": 3.4855979643765904, "grad_norm": 12.554117946589951, "learning_rate": 2.5436733847014092e-06, "loss": 0.0869, "step": 171230 }, { "epoch": 3.485801526717557, "grad_norm": 0.0686777248478189, "learning_rate": 2.543054497494655e-06, "loss": 0.0562, "step": 171240 }, { "epoch": 3.486005089058524, "grad_norm": 0.05392435398192887, "learning_rate": 2.5424356599090818e-06, "loss": 0.0572, "step": 171250 }, { "epoch": 3.486208651399491, "grad_norm": 0.5443180401202411, "learning_rate": 2.5418168719571814e-06, "loss": 0.0402, "step": 171260 }, { "epoch": 3.486412213740458, "grad_norm": 0.07164520971916433, "learning_rate": 2.5411981336514575e-06, "loss": 0.0021, "step": 171270 }, { "epoch": 3.486615776081425, "grad_norm": 18.973099335408246, "learning_rate": 2.5405794450044064e-06, "loss": 0.0699, "step": 171280 }, { "epoch": 3.486819338422392, "grad_norm": 12.38951026385943, "learning_rate": 2.5399608060285164e-06, "loss": 0.0636, "step": 171290 }, { "epoch": 3.4870229007633586, "grad_norm": 0.033942365638719836, "learning_rate": 2.539342216736289e-06, "loss": 0.0708, "step": 171300 }, { "epoch": 3.4872264631043257, "grad_norm": 0.033019248057519886, "learning_rate": 2.5387236771402167e-06, "loss": 0.0723, "step": 171310 }, { "epoch": 3.487430025445293, "grad_norm": 0.9586100158675607, "learning_rate": 2.5381051872527883e-06, "loss": 0.0139, "step": 171320 }, { "epoch": 3.4876335877862594, "grad_norm": 0.18583176727652725, "learning_rate": 2.537486747086496e-06, "loss": 0.0772, "step": 171330 }, { "epoch": 3.4878371501272265, "grad_norm": 0.010039426320560851, "learning_rate": 2.536868356653831e-06, "loss": 0.0022, "step": 171340 }, { "epoch": 3.4880407124681936, "grad_norm": 0.029741936145178987, "learning_rate": 2.536250015967282e-06, "loss": 0.051, "step": 171350 }, { "epoch": 3.48824427480916, "grad_norm": 0.09631060747505187, "learning_rate": 2.535631725039337e-06, "loss": 0.0648, "step": 171360 }, { "epoch": 3.4884478371501273, "grad_norm": 0.10520755276216787, "learning_rate": 2.5350134838824836e-06, "loss": 0.0778, "step": 171370 }, { "epoch": 3.4886513994910944, "grad_norm": 0.09440282136708744, "learning_rate": 2.5343952925092075e-06, "loss": 0.032, "step": 171380 }, { "epoch": 3.488854961832061, "grad_norm": 64.82336251377329, "learning_rate": 2.5337771509319943e-06, "loss": 0.0875, "step": 171390 }, { "epoch": 3.489058524173028, "grad_norm": 0.15205417171068164, "learning_rate": 2.5331590591633274e-06, "loss": 0.0656, "step": 171400 }, { "epoch": 3.4892620865139947, "grad_norm": 18.262203030653954, "learning_rate": 2.532541017215691e-06, "loss": 0.0417, "step": 171410 }, { "epoch": 3.489465648854962, "grad_norm": 0.04683653381732578, "learning_rate": 2.531923025101567e-06, "loss": 0.0558, "step": 171420 }, { "epoch": 3.489669211195929, "grad_norm": 0.051326205361846906, "learning_rate": 2.531305082833436e-06, "loss": 0.001, "step": 171430 }, { "epoch": 3.4898727735368955, "grad_norm": 0.021399926766478557, "learning_rate": 2.530687190423781e-06, "loss": 0.0366, "step": 171440 }, { "epoch": 3.4900763358778626, "grad_norm": 3.1989989028622685, "learning_rate": 2.5300693478850745e-06, "loss": 0.0008, "step": 171450 }, { "epoch": 3.4902798982188297, "grad_norm": 0.0360086331064384, "learning_rate": 2.5294515552298003e-06, "loss": 0.0407, "step": 171460 }, { "epoch": 3.4904834605597963, "grad_norm": 0.0487892860332154, "learning_rate": 2.528833812470437e-06, "loss": 0.0048, "step": 171470 }, { "epoch": 3.4906870229007634, "grad_norm": 0.7929185099988529, "learning_rate": 2.5282161196194525e-06, "loss": 0.0455, "step": 171480 }, { "epoch": 3.4908905852417305, "grad_norm": 0.06957917403076672, "learning_rate": 2.527598476689332e-06, "loss": 0.0609, "step": 171490 }, { "epoch": 3.491094147582697, "grad_norm": 0.0023461723266178444, "learning_rate": 2.5269808836925425e-06, "loss": 0.0629, "step": 171500 }, { "epoch": 3.491297709923664, "grad_norm": 0.08190461612784924, "learning_rate": 2.526363340641557e-06, "loss": 0.1175, "step": 171510 }, { "epoch": 3.491501272264631, "grad_norm": 0.03591377079422244, "learning_rate": 2.525745847548854e-06, "loss": 0.0583, "step": 171520 }, { "epoch": 3.491704834605598, "grad_norm": 0.09534020712087897, "learning_rate": 2.5251284044268986e-06, "loss": 0.0245, "step": 171530 }, { "epoch": 3.491908396946565, "grad_norm": 0.08927166065641655, "learning_rate": 2.5245110112881597e-06, "loss": 0.0963, "step": 171540 }, { "epoch": 3.4921119592875316, "grad_norm": 4.53110907382717, "learning_rate": 2.523893668145114e-06, "loss": 0.031, "step": 171550 }, { "epoch": 3.4923155216284987, "grad_norm": 0.08115255454048581, "learning_rate": 2.5232763750102237e-06, "loss": 0.0876, "step": 171560 }, { "epoch": 3.4925190839694658, "grad_norm": 0.3388077397897793, "learning_rate": 2.522659131895956e-06, "loss": 0.0457, "step": 171570 }, { "epoch": 3.4927226463104324, "grad_norm": 0.18521935756770522, "learning_rate": 2.522041938814779e-06, "loss": 0.0589, "step": 171580 }, { "epoch": 3.4929262086513995, "grad_norm": 0.008925669121487017, "learning_rate": 2.521424795779155e-06, "loss": 0.1225, "step": 171590 }, { "epoch": 3.4931297709923665, "grad_norm": 0.13781075623159492, "learning_rate": 2.520807702801551e-06, "loss": 0.0045, "step": 171600 }, { "epoch": 3.493333333333333, "grad_norm": 8.945848124086428, "learning_rate": 2.520190659894428e-06, "loss": 0.0822, "step": 171610 }, { "epoch": 3.4935368956743003, "grad_norm": 0.060969394548479716, "learning_rate": 2.5195736670702507e-06, "loss": 0.0858, "step": 171620 }, { "epoch": 3.4937404580152673, "grad_norm": 8.18403503586476, "learning_rate": 2.518956724341474e-06, "loss": 0.0342, "step": 171630 }, { "epoch": 3.493944020356234, "grad_norm": 0.034277630014249834, "learning_rate": 2.518339831720563e-06, "loss": 0.0557, "step": 171640 }, { "epoch": 3.494147582697201, "grad_norm": 1.2792122341080472, "learning_rate": 2.517722989219978e-06, "loss": 0.0801, "step": 171650 }, { "epoch": 3.494351145038168, "grad_norm": 0.03437668779658626, "learning_rate": 2.5171061968521715e-06, "loss": 0.0815, "step": 171660 }, { "epoch": 3.4945547073791348, "grad_norm": 0.426687023033708, "learning_rate": 2.5164894546296043e-06, "loss": 0.0195, "step": 171670 }, { "epoch": 3.494758269720102, "grad_norm": 0.611373540026647, "learning_rate": 2.5158727625647334e-06, "loss": 0.092, "step": 171680 }, { "epoch": 3.494961832061069, "grad_norm": 50.733620814738224, "learning_rate": 2.5152561206700077e-06, "loss": 0.0584, "step": 171690 }, { "epoch": 3.4951653944020356, "grad_norm": 0.010219042682505553, "learning_rate": 2.5146395289578882e-06, "loss": 0.0631, "step": 171700 }, { "epoch": 3.4953689567430026, "grad_norm": 0.037145712746138045, "learning_rate": 2.514022987440824e-06, "loss": 0.1216, "step": 171710 }, { "epoch": 3.4955725190839697, "grad_norm": 7.563498726453808, "learning_rate": 2.5134064961312666e-06, "loss": 0.1102, "step": 171720 }, { "epoch": 3.4957760814249363, "grad_norm": 0.03653711449169976, "learning_rate": 2.512790055041667e-06, "loss": 0.0741, "step": 171730 }, { "epoch": 3.4959796437659034, "grad_norm": 0.10977255615919308, "learning_rate": 2.5121736641844775e-06, "loss": 0.0297, "step": 171740 }, { "epoch": 3.49618320610687, "grad_norm": 0.0186515712462671, "learning_rate": 2.5115573235721433e-06, "loss": 0.0347, "step": 171750 }, { "epoch": 3.496386768447837, "grad_norm": 0.20243232685148693, "learning_rate": 2.5109410332171158e-06, "loss": 0.0708, "step": 171760 }, { "epoch": 3.496590330788804, "grad_norm": 0.29711050836187036, "learning_rate": 2.5103247931318385e-06, "loss": 0.037, "step": 171770 }, { "epoch": 3.496793893129771, "grad_norm": 0.10061328130400449, "learning_rate": 2.50970860332876e-06, "loss": 0.0522, "step": 171780 }, { "epoch": 3.496997455470738, "grad_norm": 13.213355300691777, "learning_rate": 2.5090924638203227e-06, "loss": 0.0991, "step": 171790 }, { "epoch": 3.497201017811705, "grad_norm": 0.045021311187077494, "learning_rate": 2.5084763746189717e-06, "loss": 0.0251, "step": 171800 }, { "epoch": 3.4974045801526716, "grad_norm": 0.04580775253038362, "learning_rate": 2.5078603357371494e-06, "loss": 0.0478, "step": 171810 }, { "epoch": 3.4976081424936387, "grad_norm": 0.01106172689430608, "learning_rate": 2.5072443471872976e-06, "loss": 0.0788, "step": 171820 }, { "epoch": 3.4978117048346054, "grad_norm": 3.1596460135066455, "learning_rate": 2.5066284089818564e-06, "loss": 0.0551, "step": 171830 }, { "epoch": 3.4980152671755724, "grad_norm": 0.055124271648385174, "learning_rate": 2.5060125211332665e-06, "loss": 0.0009, "step": 171840 }, { "epoch": 3.4982188295165395, "grad_norm": 0.682784672894093, "learning_rate": 2.5053966836539655e-06, "loss": 0.0497, "step": 171850 }, { "epoch": 3.498422391857506, "grad_norm": 0.027810499261089066, "learning_rate": 2.5047808965563934e-06, "loss": 0.1219, "step": 171860 }, { "epoch": 3.498625954198473, "grad_norm": 0.12693347604533042, "learning_rate": 2.504165159852984e-06, "loss": 0.0046, "step": 171870 }, { "epoch": 3.4988295165394403, "grad_norm": 0.03158516951086428, "learning_rate": 2.5035494735561703e-06, "loss": 0.01, "step": 171880 }, { "epoch": 3.499033078880407, "grad_norm": 11.071983455911734, "learning_rate": 2.502933837678395e-06, "loss": 0.0703, "step": 171890 }, { "epoch": 3.499236641221374, "grad_norm": 16.48594649103713, "learning_rate": 2.5023182522320856e-06, "loss": 0.0464, "step": 171900 }, { "epoch": 3.499440203562341, "grad_norm": 0.12857387034455559, "learning_rate": 2.5017027172296735e-06, "loss": 0.0156, "step": 171910 }, { "epoch": 3.4996437659033077, "grad_norm": 0.9632494159283066, "learning_rate": 2.5010872326835966e-06, "loss": 0.0664, "step": 171920 }, { "epoch": 3.499847328244275, "grad_norm": 0.12404276983089003, "learning_rate": 2.5004717986062804e-06, "loss": 0.079, "step": 171930 }, { "epoch": 3.500050890585242, "grad_norm": 17.9323352557491, "learning_rate": 2.499856415010155e-06, "loss": 0.048, "step": 171940 }, { "epoch": 3.5002544529262085, "grad_norm": 0.22929472808640142, "learning_rate": 2.4992410819076496e-06, "loss": 0.0968, "step": 171950 }, { "epoch": 3.5004580152671756, "grad_norm": 0.037619804161530046, "learning_rate": 2.4986257993111916e-06, "loss": 0.0525, "step": 171960 }, { "epoch": 3.5006615776081427, "grad_norm": 0.24705776395615398, "learning_rate": 2.498010567233207e-06, "loss": 0.0486, "step": 171970 }, { "epoch": 3.5008651399491093, "grad_norm": 0.02924160695716024, "learning_rate": 2.497395385686122e-06, "loss": 0.1476, "step": 171980 }, { "epoch": 3.5010687022900764, "grad_norm": 5.734405457858369, "learning_rate": 2.4967802546823622e-06, "loss": 0.1064, "step": 171990 }, { "epoch": 3.5012722646310435, "grad_norm": 0.03669910663991382, "learning_rate": 2.4961651742343456e-06, "loss": 0.0505, "step": 172000 }, { "epoch": 3.50147582697201, "grad_norm": 0.05013272049003992, "learning_rate": 2.4955501443545e-06, "loss": 0.0918, "step": 172010 }, { "epoch": 3.501679389312977, "grad_norm": 0.04075778163636204, "learning_rate": 2.494935165055247e-06, "loss": 0.0017, "step": 172020 }, { "epoch": 3.5018829516539443, "grad_norm": 3.906793746568925, "learning_rate": 2.4943202363490006e-06, "loss": 0.0887, "step": 172030 }, { "epoch": 3.502086513994911, "grad_norm": 0.019709527986661434, "learning_rate": 2.4937053582481863e-06, "loss": 0.05, "step": 172040 }, { "epoch": 3.502290076335878, "grad_norm": 0.13484370519481148, "learning_rate": 2.4930905307652228e-06, "loss": 0.0294, "step": 172050 }, { "epoch": 3.502493638676845, "grad_norm": 0.015661574278459378, "learning_rate": 2.4924757539125203e-06, "loss": 0.0948, "step": 172060 }, { "epoch": 3.5026972010178117, "grad_norm": 2.5876418976705504, "learning_rate": 2.491861027702504e-06, "loss": 0.0745, "step": 172070 }, { "epoch": 3.5029007633587788, "grad_norm": 1.1939658798465114, "learning_rate": 2.4912463521475828e-06, "loss": 0.0036, "step": 172080 }, { "epoch": 3.5031043256997454, "grad_norm": 0.5065540838164423, "learning_rate": 2.490631727260171e-06, "loss": 0.0619, "step": 172090 }, { "epoch": 3.5033078880407125, "grad_norm": 0.09205288188505506, "learning_rate": 2.490017153052687e-06, "loss": 0.0239, "step": 172100 }, { "epoch": 3.5035114503816795, "grad_norm": 0.0376668519261517, "learning_rate": 2.4894026295375372e-06, "loss": 0.1067, "step": 172110 }, { "epoch": 3.503715012722646, "grad_norm": 0.1246745747037636, "learning_rate": 2.4887881567271338e-06, "loss": 0.0009, "step": 172120 }, { "epoch": 3.5039185750636133, "grad_norm": 53.300334251306715, "learning_rate": 2.4881737346338915e-06, "loss": 0.0513, "step": 172130 }, { "epoch": 3.50412213740458, "grad_norm": 0.07320256744643894, "learning_rate": 2.487559363270214e-06, "loss": 0.0872, "step": 172140 }, { "epoch": 3.504325699745547, "grad_norm": 0.07712440799990973, "learning_rate": 2.486945042648511e-06, "loss": 0.0297, "step": 172150 }, { "epoch": 3.504529262086514, "grad_norm": 0.30689793232427803, "learning_rate": 2.4863307727811893e-06, "loss": 0.084, "step": 172160 }, { "epoch": 3.5047328244274807, "grad_norm": 0.0069165189184475905, "learning_rate": 2.4857165536806553e-06, "loss": 0.0705, "step": 172170 }, { "epoch": 3.5049363867684478, "grad_norm": 21.7977465878854, "learning_rate": 2.485102385359315e-06, "loss": 0.1066, "step": 172180 }, { "epoch": 3.505139949109415, "grad_norm": 2.069012585413126, "learning_rate": 2.48448826782957e-06, "loss": 0.116, "step": 172190 }, { "epoch": 3.5053435114503815, "grad_norm": 29.590177145645363, "learning_rate": 2.4838742011038246e-06, "loss": 0.0583, "step": 172200 }, { "epoch": 3.5055470737913486, "grad_norm": 0.11183933399406554, "learning_rate": 2.4832601851944804e-06, "loss": 0.0174, "step": 172210 }, { "epoch": 3.5057506361323156, "grad_norm": 0.2246097392171037, "learning_rate": 2.4826462201139388e-06, "loss": 0.0775, "step": 172220 }, { "epoch": 3.5059541984732823, "grad_norm": 0.6671620223928022, "learning_rate": 2.4820323058746004e-06, "loss": 0.0727, "step": 172230 }, { "epoch": 3.5061577608142493, "grad_norm": 8.684708452298398, "learning_rate": 2.4814184424888594e-06, "loss": 0.0341, "step": 172240 }, { "epoch": 3.5063613231552164, "grad_norm": 35.670203435791755, "learning_rate": 2.4808046299691185e-06, "loss": 0.0402, "step": 172250 }, { "epoch": 3.506564885496183, "grad_norm": 0.10478018328376429, "learning_rate": 2.480190868327775e-06, "loss": 0.0805, "step": 172260 }, { "epoch": 3.50676844783715, "grad_norm": 0.5124711959400867, "learning_rate": 2.4795771575772203e-06, "loss": 0.0413, "step": 172270 }, { "epoch": 3.506972010178117, "grad_norm": 0.024321948870462032, "learning_rate": 2.478963497729849e-06, "loss": 0.0156, "step": 172280 }, { "epoch": 3.507175572519084, "grad_norm": 0.05661214039067882, "learning_rate": 2.478349888798061e-06, "loss": 0.021, "step": 172290 }, { "epoch": 3.507379134860051, "grad_norm": 0.03512488316626601, "learning_rate": 2.477736330794243e-06, "loss": 0.0073, "step": 172300 }, { "epoch": 3.507582697201018, "grad_norm": 0.1123505403090358, "learning_rate": 2.477122823730788e-06, "loss": 0.0337, "step": 172310 }, { "epoch": 3.5077862595419846, "grad_norm": 0.11281076091239668, "learning_rate": 2.476509367620088e-06, "loss": 0.0433, "step": 172320 }, { "epoch": 3.5079898218829517, "grad_norm": 0.1305363367288241, "learning_rate": 2.475895962474531e-06, "loss": 0.1196, "step": 172330 }, { "epoch": 3.508193384223919, "grad_norm": 0.0821046022487887, "learning_rate": 2.4752826083065055e-06, "loss": 0.0214, "step": 172340 }, { "epoch": 3.5083969465648854, "grad_norm": 0.2435414893010079, "learning_rate": 2.4746693051284006e-06, "loss": 0.0346, "step": 172350 }, { "epoch": 3.5086005089058525, "grad_norm": 0.23093309623803307, "learning_rate": 2.4740560529526008e-06, "loss": 0.0399, "step": 172360 }, { "epoch": 3.5088040712468196, "grad_norm": 7.420346873917432, "learning_rate": 2.4734428517914926e-06, "loss": 0.0021, "step": 172370 }, { "epoch": 3.5090076335877862, "grad_norm": 68.8036934270893, "learning_rate": 2.4728297016574605e-06, "loss": 0.0988, "step": 172380 }, { "epoch": 3.5092111959287533, "grad_norm": 1.0161500012454978, "learning_rate": 2.4722166025628874e-06, "loss": 0.0545, "step": 172390 }, { "epoch": 3.50941475826972, "grad_norm": 0.10576871805571746, "learning_rate": 2.4716035545201555e-06, "loss": 0.0478, "step": 172400 }, { "epoch": 3.509618320610687, "grad_norm": 2.9447855255139554, "learning_rate": 2.470990557541647e-06, "loss": 0.0187, "step": 172410 }, { "epoch": 3.509821882951654, "grad_norm": 27.180885947695867, "learning_rate": 2.470377611639743e-06, "loss": 0.1498, "step": 172420 }, { "epoch": 3.5100254452926207, "grad_norm": 0.10244786697554563, "learning_rate": 2.469764716826817e-06, "loss": 0.0005, "step": 172430 }, { "epoch": 3.510229007633588, "grad_norm": 0.00782116261156257, "learning_rate": 2.4691518731152564e-06, "loss": 0.0287, "step": 172440 }, { "epoch": 3.5104325699745544, "grad_norm": 0.24202925984850998, "learning_rate": 2.468539080517431e-06, "loss": 0.0712, "step": 172450 }, { "epoch": 3.5106361323155215, "grad_norm": 0.05548856030824752, "learning_rate": 2.467926339045717e-06, "loss": 0.0856, "step": 172460 }, { "epoch": 3.5108396946564886, "grad_norm": 0.007426766984734679, "learning_rate": 2.467313648712496e-06, "loss": 0.105, "step": 172470 }, { "epoch": 3.5110432569974552, "grad_norm": 0.03989357640752115, "learning_rate": 2.4667010095301365e-06, "loss": 0.0468, "step": 172480 }, { "epoch": 3.5112468193384223, "grad_norm": 0.1894647358455368, "learning_rate": 2.4660884215110105e-06, "loss": 0.1403, "step": 172490 }, { "epoch": 3.5114503816793894, "grad_norm": 0.03198420273523728, "learning_rate": 2.4654758846674964e-06, "loss": 0.0283, "step": 172500 }, { "epoch": 3.511653944020356, "grad_norm": 0.06467439095546249, "learning_rate": 2.464863399011959e-06, "loss": 0.0078, "step": 172510 }, { "epoch": 3.511857506361323, "grad_norm": 0.07315598898786403, "learning_rate": 2.4642509645567715e-06, "loss": 0.0441, "step": 172520 }, { "epoch": 3.51206106870229, "grad_norm": 1.1509711651931473, "learning_rate": 2.463638581314301e-06, "loss": 0.0291, "step": 172530 }, { "epoch": 3.512264631043257, "grad_norm": 0.018337699800954173, "learning_rate": 2.4630262492969166e-06, "loss": 0.0011, "step": 172540 }, { "epoch": 3.512468193384224, "grad_norm": 0.20903579804762956, "learning_rate": 2.462413968516984e-06, "loss": 0.0301, "step": 172550 }, { "epoch": 3.512671755725191, "grad_norm": 0.02587579349256512, "learning_rate": 2.4618017389868705e-06, "loss": 0.0766, "step": 172560 }, { "epoch": 3.5128753180661576, "grad_norm": 1.9833902695438852, "learning_rate": 2.4611895607189396e-06, "loss": 0.0869, "step": 172570 }, { "epoch": 3.5130788804071247, "grad_norm": 0.04217701836219025, "learning_rate": 2.460577433725555e-06, "loss": 0.0716, "step": 172580 }, { "epoch": 3.5132824427480918, "grad_norm": 0.016834971301496842, "learning_rate": 2.4599653580190808e-06, "loss": 0.0439, "step": 172590 }, { "epoch": 3.5134860050890584, "grad_norm": 0.21899875763232562, "learning_rate": 2.459353333611879e-06, "loss": 0.1061, "step": 172600 }, { "epoch": 3.5136895674300255, "grad_norm": 0.009865634271865304, "learning_rate": 2.458741360516306e-06, "loss": 0.0293, "step": 172610 }, { "epoch": 3.5138931297709926, "grad_norm": 9.206859363894, "learning_rate": 2.458129438744725e-06, "loss": 0.0083, "step": 172620 }, { "epoch": 3.514096692111959, "grad_norm": 0.03523353025288627, "learning_rate": 2.4575175683094965e-06, "loss": 0.0625, "step": 172630 }, { "epoch": 3.5143002544529263, "grad_norm": 8.829116696024611, "learning_rate": 2.456905749222972e-06, "loss": 0.0979, "step": 172640 }, { "epoch": 3.5145038167938933, "grad_norm": 0.08874475385018336, "learning_rate": 2.456293981497513e-06, "loss": 0.0009, "step": 172650 }, { "epoch": 3.51470737913486, "grad_norm": 0.09561694010965192, "learning_rate": 2.455682265145475e-06, "loss": 0.0129, "step": 172660 }, { "epoch": 3.514910941475827, "grad_norm": 45.35384610855571, "learning_rate": 2.455070600179207e-06, "loss": 0.0364, "step": 172670 }, { "epoch": 3.515114503816794, "grad_norm": 1.565060315938148, "learning_rate": 2.4544589866110695e-06, "loss": 0.066, "step": 172680 }, { "epoch": 3.5153180661577608, "grad_norm": 0.147020890005411, "learning_rate": 2.45384742445341e-06, "loss": 0.0646, "step": 172690 }, { "epoch": 3.515521628498728, "grad_norm": 45.12030921197866, "learning_rate": 2.4532359137185806e-06, "loss": 0.06, "step": 172700 }, { "epoch": 3.515725190839695, "grad_norm": 3.1895748695888124, "learning_rate": 2.4526244544189328e-06, "loss": 0.02, "step": 172710 }, { "epoch": 3.5159287531806616, "grad_norm": 0.29075633747697055, "learning_rate": 2.4520130465668146e-06, "loss": 0.0757, "step": 172720 }, { "epoch": 3.5161323155216286, "grad_norm": 0.02372184296157522, "learning_rate": 2.451401690174575e-06, "loss": 0.0155, "step": 172730 }, { "epoch": 3.5163358778625953, "grad_norm": 0.12964597627744376, "learning_rate": 2.450790385254561e-06, "loss": 0.0036, "step": 172740 }, { "epoch": 3.5165394402035624, "grad_norm": 0.20432167160339693, "learning_rate": 2.450179131819118e-06, "loss": 0.0236, "step": 172750 }, { "epoch": 3.5167430025445294, "grad_norm": 0.009877298301488278, "learning_rate": 2.4495679298805926e-06, "loss": 0.0514, "step": 172760 }, { "epoch": 3.516946564885496, "grad_norm": 0.110222863202145, "learning_rate": 2.448956779451327e-06, "loss": 0.0529, "step": 172770 }, { "epoch": 3.517150127226463, "grad_norm": 14.604671205137002, "learning_rate": 2.448345680543665e-06, "loss": 0.0749, "step": 172780 }, { "epoch": 3.5173536895674298, "grad_norm": 0.0020580981849617464, "learning_rate": 2.4477346331699487e-06, "loss": 0.0657, "step": 172790 }, { "epoch": 3.517557251908397, "grad_norm": 0.028882503077089998, "learning_rate": 2.4471236373425184e-06, "loss": 0.0321, "step": 172800 }, { "epoch": 3.517760814249364, "grad_norm": 0.4616998540269345, "learning_rate": 2.4465126930737164e-06, "loss": 0.0444, "step": 172810 }, { "epoch": 3.5179643765903306, "grad_norm": 0.06422712067067379, "learning_rate": 2.4459018003758754e-06, "loss": 0.0364, "step": 172820 }, { "epoch": 3.5181679389312976, "grad_norm": 0.07546789393675976, "learning_rate": 2.4452909592613393e-06, "loss": 0.0013, "step": 172830 }, { "epoch": 3.5183715012722647, "grad_norm": 0.06302104046190693, "learning_rate": 2.4446801697424447e-06, "loss": 0.0019, "step": 172840 }, { "epoch": 3.5185750636132314, "grad_norm": 41.395347463870046, "learning_rate": 2.444069431831524e-06, "loss": 0.0705, "step": 172850 }, { "epoch": 3.5187786259541984, "grad_norm": 0.019277473389374036, "learning_rate": 2.4434587455409103e-06, "loss": 0.0541, "step": 172860 }, { "epoch": 3.5189821882951655, "grad_norm": 0.004126538141214853, "learning_rate": 2.4428481108829446e-06, "loss": 0.0245, "step": 172870 }, { "epoch": 3.519185750636132, "grad_norm": 0.029527978296961032, "learning_rate": 2.4422375278699523e-06, "loss": 0.0428, "step": 172880 }, { "epoch": 3.5193893129770992, "grad_norm": 0.003493205303486802, "learning_rate": 2.4416269965142673e-06, "loss": 0.0628, "step": 172890 }, { "epoch": 3.5195928753180663, "grad_norm": 0.2262715033761164, "learning_rate": 2.4410165168282214e-06, "loss": 0.0627, "step": 172900 }, { "epoch": 3.519796437659033, "grad_norm": 0.0002955584416026641, "learning_rate": 2.440406088824142e-06, "loss": 0.057, "step": 172910 }, { "epoch": 3.52, "grad_norm": 0.015410118094839562, "learning_rate": 2.4397957125143582e-06, "loss": 0.0676, "step": 172920 }, { "epoch": 3.520203562340967, "grad_norm": 0.026849548303477862, "learning_rate": 2.439185387911198e-06, "loss": 0.2051, "step": 172930 }, { "epoch": 3.5204071246819337, "grad_norm": 0.12937822445476338, "learning_rate": 2.438575115026987e-06, "loss": 0.0652, "step": 172940 }, { "epoch": 3.520610687022901, "grad_norm": 7.0214538193613825, "learning_rate": 2.437964893874051e-06, "loss": 0.0298, "step": 172950 }, { "epoch": 3.520814249363868, "grad_norm": 0.057978679673201035, "learning_rate": 2.437354724464714e-06, "loss": 0.0897, "step": 172960 }, { "epoch": 3.5210178117048345, "grad_norm": 0.021929153634464064, "learning_rate": 2.4367446068112997e-06, "loss": 0.045, "step": 172970 }, { "epoch": 3.5212213740458016, "grad_norm": 0.014892493931934708, "learning_rate": 2.4361345409261265e-06, "loss": 0.0777, "step": 172980 }, { "epoch": 3.5214249363867687, "grad_norm": 0.1002132003225631, "learning_rate": 2.4355245268215204e-06, "loss": 0.0008, "step": 172990 }, { "epoch": 3.5216284987277353, "grad_norm": 0.33734529859024165, "learning_rate": 2.4349145645098017e-06, "loss": 0.0059, "step": 173000 }, { "epoch": 3.5218320610687024, "grad_norm": 0.003363979527660548, "learning_rate": 2.4343046540032834e-06, "loss": 0.0195, "step": 173010 }, { "epoch": 3.5220356234096695, "grad_norm": 0.07573427507663713, "learning_rate": 2.4336947953142903e-06, "loss": 0.0249, "step": 173020 }, { "epoch": 3.522239185750636, "grad_norm": 112.63815761274354, "learning_rate": 2.4330849884551343e-06, "loss": 0.0728, "step": 173030 }, { "epoch": 3.522442748091603, "grad_norm": 3.318926382917257, "learning_rate": 2.432475233438132e-06, "loss": 0.0802, "step": 173040 }, { "epoch": 3.52264631043257, "grad_norm": 0.024827053339977236, "learning_rate": 2.431865530275603e-06, "loss": 0.0424, "step": 173050 }, { "epoch": 3.522849872773537, "grad_norm": 17.577249622264368, "learning_rate": 2.4312558789798547e-06, "loss": 0.0502, "step": 173060 }, { "epoch": 3.523053435114504, "grad_norm": 52.07183251955929, "learning_rate": 2.4306462795632006e-06, "loss": 0.0411, "step": 173070 }, { "epoch": 3.5232569974554706, "grad_norm": 0.30725321972189873, "learning_rate": 2.430036732037959e-06, "loss": 0.0242, "step": 173080 }, { "epoch": 3.5234605597964377, "grad_norm": 0.009963377788481878, "learning_rate": 2.429427236416433e-06, "loss": 0.0301, "step": 173090 }, { "epoch": 3.5236641221374043, "grad_norm": 5.198084243647003, "learning_rate": 2.4288177927109348e-06, "loss": 0.0538, "step": 173100 }, { "epoch": 3.5238676844783714, "grad_norm": 7.708629088191712, "learning_rate": 2.4282084009337732e-06, "loss": 0.0904, "step": 173110 }, { "epoch": 3.5240712468193385, "grad_norm": 0.03487238170914951, "learning_rate": 2.4275990610972548e-06, "loss": 0.0598, "step": 173120 }, { "epoch": 3.524274809160305, "grad_norm": 0.23733780622145245, "learning_rate": 2.4269897732136872e-06, "loss": 0.0075, "step": 173130 }, { "epoch": 3.524478371501272, "grad_norm": 0.053667443504724396, "learning_rate": 2.4263805372953754e-06, "loss": 0.0731, "step": 173140 }, { "epoch": 3.5246819338422393, "grad_norm": 0.02182244887943231, "learning_rate": 2.425771353354623e-06, "loss": 0.015, "step": 173150 }, { "epoch": 3.524885496183206, "grad_norm": 0.006195044423600321, "learning_rate": 2.4251622214037333e-06, "loss": 0.1289, "step": 173160 }, { "epoch": 3.525089058524173, "grad_norm": 6.731823037055935, "learning_rate": 2.4245531414550094e-06, "loss": 0.0389, "step": 173170 }, { "epoch": 3.52529262086514, "grad_norm": 0.0442880461125688, "learning_rate": 2.423944113520754e-06, "loss": 0.0044, "step": 173180 }, { "epoch": 3.5254961832061067, "grad_norm": 6.143041198866318, "learning_rate": 2.423335137613261e-06, "loss": 0.0204, "step": 173190 }, { "epoch": 3.5256997455470738, "grad_norm": 22.289276539215898, "learning_rate": 2.422726213744836e-06, "loss": 0.0721, "step": 173200 }, { "epoch": 3.525903307888041, "grad_norm": 35.08606781398141, "learning_rate": 2.422117341927776e-06, "loss": 0.061, "step": 173210 }, { "epoch": 3.5261068702290075, "grad_norm": 0.044839488697197415, "learning_rate": 2.421508522174373e-06, "loss": 0.046, "step": 173220 }, { "epoch": 3.5263104325699746, "grad_norm": 0.012121523891864466, "learning_rate": 2.420899754496928e-06, "loss": 0.0656, "step": 173230 }, { "epoch": 3.5265139949109416, "grad_norm": 0.017603442242615064, "learning_rate": 2.420291038907737e-06, "loss": 0.0493, "step": 173240 }, { "epoch": 3.5267175572519083, "grad_norm": 0.005330009889312941, "learning_rate": 2.419682375419089e-06, "loss": 0.002, "step": 173250 }, { "epoch": 3.5269211195928754, "grad_norm": 0.024501288008359864, "learning_rate": 2.4190737640432783e-06, "loss": 0.0003, "step": 173260 }, { "epoch": 3.5271246819338424, "grad_norm": 0.013631564793731536, "learning_rate": 2.4184652047925977e-06, "loss": 0.0645, "step": 173270 }, { "epoch": 3.527328244274809, "grad_norm": 0.001933994842827259, "learning_rate": 2.4178566976793366e-06, "loss": 0.0256, "step": 173280 }, { "epoch": 3.527531806615776, "grad_norm": 54.212441430136316, "learning_rate": 2.4172482427157862e-06, "loss": 0.126, "step": 173290 }, { "epoch": 3.5277353689567432, "grad_norm": 4.5294531694522675, "learning_rate": 2.4166398399142332e-06, "loss": 0.0197, "step": 173300 }, { "epoch": 3.52793893129771, "grad_norm": 0.050369669397508865, "learning_rate": 2.4160314892869667e-06, "loss": 0.0381, "step": 173310 }, { "epoch": 3.528142493638677, "grad_norm": 46.712716455005825, "learning_rate": 2.415423190846272e-06, "loss": 0.0944, "step": 173320 }, { "epoch": 3.528346055979644, "grad_norm": 37.66860375256036, "learning_rate": 2.414814944604435e-06, "loss": 0.0584, "step": 173330 }, { "epoch": 3.5285496183206106, "grad_norm": 0.01160201089818601, "learning_rate": 2.4142067505737397e-06, "loss": 0.1644, "step": 173340 }, { "epoch": 3.5287531806615777, "grad_norm": 0.06111041221501211, "learning_rate": 2.4135986087664696e-06, "loss": 0.0493, "step": 173350 }, { "epoch": 3.528956743002545, "grad_norm": 0.0955928701958563, "learning_rate": 2.412990519194907e-06, "loss": 0.0851, "step": 173360 }, { "epoch": 3.5291603053435114, "grad_norm": 0.03597830729968586, "learning_rate": 2.4123824818713333e-06, "loss": 0.0177, "step": 173370 }, { "epoch": 3.5293638676844785, "grad_norm": 0.03951790320688698, "learning_rate": 2.4117744968080277e-06, "loss": 0.0897, "step": 173380 }, { "epoch": 3.529567430025445, "grad_norm": 0.0631556967231482, "learning_rate": 2.4111665640172716e-06, "loss": 0.0979, "step": 173390 }, { "epoch": 3.5297709923664122, "grad_norm": 0.5618168969889444, "learning_rate": 2.4105586835113394e-06, "loss": 0.0024, "step": 173400 }, { "epoch": 3.529974554707379, "grad_norm": 0.036647638466114824, "learning_rate": 2.4099508553025076e-06, "loss": 0.0064, "step": 173410 }, { "epoch": 3.530178117048346, "grad_norm": 0.05101747826610332, "learning_rate": 2.409343079403058e-06, "loss": 0.1114, "step": 173420 }, { "epoch": 3.530381679389313, "grad_norm": 8.03803926459244, "learning_rate": 2.4087353558252603e-06, "loss": 0.0555, "step": 173430 }, { "epoch": 3.5305852417302797, "grad_norm": 0.08886330414700142, "learning_rate": 2.4081276845813872e-06, "loss": 0.0775, "step": 173440 }, { "epoch": 3.5307888040712467, "grad_norm": 62.6506480260855, "learning_rate": 2.407520065683718e-06, "loss": 0.0943, "step": 173450 }, { "epoch": 3.530992366412214, "grad_norm": 0.015316964122654993, "learning_rate": 2.406912499144518e-06, "loss": 0.0296, "step": 173460 }, { "epoch": 3.5311959287531804, "grad_norm": 0.027760596923904948, "learning_rate": 2.4063049849760602e-06, "loss": 0.0031, "step": 173470 }, { "epoch": 3.5313994910941475, "grad_norm": 0.0032155619371118218, "learning_rate": 2.405697523190614e-06, "loss": 0.0111, "step": 173480 }, { "epoch": 3.5316030534351146, "grad_norm": 0.0412882355412648, "learning_rate": 2.4050901138004475e-06, "loss": 0.0558, "step": 173490 }, { "epoch": 3.5318066157760812, "grad_norm": 11.378446058517977, "learning_rate": 2.4044827568178282e-06, "loss": 0.0727, "step": 173500 }, { "epoch": 3.5320101781170483, "grad_norm": 6.2988872186553175, "learning_rate": 2.4038754522550224e-06, "loss": 0.0982, "step": 173510 }, { "epoch": 3.5322137404580154, "grad_norm": 0.030773956203296995, "learning_rate": 2.4032682001242963e-06, "loss": 0.0228, "step": 173520 }, { "epoch": 3.532417302798982, "grad_norm": 0.18918769572744143, "learning_rate": 2.402661000437913e-06, "loss": 0.0161, "step": 173530 }, { "epoch": 3.532620865139949, "grad_norm": 19.587626838118712, "learning_rate": 2.402053853208136e-06, "loss": 0.056, "step": 173540 }, { "epoch": 3.532824427480916, "grad_norm": 0.2504796156261431, "learning_rate": 2.40144675844723e-06, "loss": 0.0823, "step": 173550 }, { "epoch": 3.533027989821883, "grad_norm": 0.04300630015032558, "learning_rate": 2.4008397161674495e-06, "loss": 0.0282, "step": 173560 }, { "epoch": 3.53323155216285, "grad_norm": 0.009009466547577725, "learning_rate": 2.400232726381061e-06, "loss": 0.073, "step": 173570 }, { "epoch": 3.533435114503817, "grad_norm": 0.04891646217571141, "learning_rate": 2.3996257891003223e-06, "loss": 0.0306, "step": 173580 }, { "epoch": 3.5336386768447836, "grad_norm": 0.02085074946135486, "learning_rate": 2.399018904337487e-06, "loss": 0.0285, "step": 173590 }, { "epoch": 3.5338422391857507, "grad_norm": 0.015925764541996696, "learning_rate": 2.398412072104816e-06, "loss": 0.078, "step": 173600 }, { "epoch": 3.5340458015267178, "grad_norm": 0.42373453812462697, "learning_rate": 2.397805292414566e-06, "loss": 0.0591, "step": 173610 }, { "epoch": 3.5342493638676844, "grad_norm": 0.004672083083962502, "learning_rate": 2.3971985652789863e-06, "loss": 0.0358, "step": 173620 }, { "epoch": 3.5344529262086515, "grad_norm": 9.036857108723476, "learning_rate": 2.396591890710338e-06, "loss": 0.0829, "step": 173630 }, { "epoch": 3.5346564885496186, "grad_norm": 0.022383853442704285, "learning_rate": 2.3959852687208673e-06, "loss": 0.0259, "step": 173640 }, { "epoch": 3.534860050890585, "grad_norm": 0.03563687850993147, "learning_rate": 2.395378699322826e-06, "loss": 0.0275, "step": 173650 }, { "epoch": 3.5350636132315523, "grad_norm": 0.0649888980349052, "learning_rate": 2.394772182528471e-06, "loss": 0.024, "step": 173660 }, { "epoch": 3.5352671755725193, "grad_norm": 0.07042221162401387, "learning_rate": 2.394165718350045e-06, "loss": 0.009, "step": 173670 }, { "epoch": 3.535470737913486, "grad_norm": 0.01697998591935199, "learning_rate": 2.393559306799799e-06, "loss": 0.0071, "step": 173680 }, { "epoch": 3.535674300254453, "grad_norm": 17.091874937735774, "learning_rate": 2.3929529478899795e-06, "loss": 0.0192, "step": 173690 }, { "epoch": 3.5358778625954197, "grad_norm": 0.5009135710213957, "learning_rate": 2.3923466416328328e-06, "loss": 0.0375, "step": 173700 }, { "epoch": 3.5360814249363868, "grad_norm": 11.903624373480993, "learning_rate": 2.391740388040605e-06, "loss": 0.0937, "step": 173710 }, { "epoch": 3.536284987277354, "grad_norm": 0.07628240366299661, "learning_rate": 2.391134187125539e-06, "loss": 0.0085, "step": 173720 }, { "epoch": 3.5364885496183205, "grad_norm": 0.08530994854942846, "learning_rate": 2.390528038899878e-06, "loss": 0.0017, "step": 173730 }, { "epoch": 3.5366921119592876, "grad_norm": 0.044983548988459, "learning_rate": 2.389921943375865e-06, "loss": 0.0995, "step": 173740 }, { "epoch": 3.536895674300254, "grad_norm": 0.03992431772619929, "learning_rate": 2.3893159005657395e-06, "loss": 0.015, "step": 173750 }, { "epoch": 3.5370992366412213, "grad_norm": 0.04750993226555172, "learning_rate": 2.388709910481744e-06, "loss": 0.0235, "step": 173760 }, { "epoch": 3.5373027989821884, "grad_norm": 7.8436323735775435, "learning_rate": 2.388103973136111e-06, "loss": 0.0591, "step": 173770 }, { "epoch": 3.537506361323155, "grad_norm": 0.018495587230090465, "learning_rate": 2.387498088541085e-06, "loss": 0.0475, "step": 173780 }, { "epoch": 3.537709923664122, "grad_norm": 0.057315649797702276, "learning_rate": 2.3868922567089016e-06, "loss": 0.0391, "step": 173790 }, { "epoch": 3.537913486005089, "grad_norm": 0.7701328003433975, "learning_rate": 2.386286477651791e-06, "loss": 0.0584, "step": 173800 }, { "epoch": 3.538117048346056, "grad_norm": 0.19324406178598186, "learning_rate": 2.385680751381993e-06, "loss": 0.0415, "step": 173810 }, { "epoch": 3.538320610687023, "grad_norm": 12.258951007970603, "learning_rate": 2.3850750779117416e-06, "loss": 0.1361, "step": 173820 }, { "epoch": 3.53852417302799, "grad_norm": 0.024591882963986265, "learning_rate": 2.3844694572532652e-06, "loss": 0.0013, "step": 173830 }, { "epoch": 3.5387277353689566, "grad_norm": 0.0200127132560139, "learning_rate": 2.3838638894187967e-06, "loss": 0.0461, "step": 173840 }, { "epoch": 3.5389312977099237, "grad_norm": 20.0185245409157, "learning_rate": 2.3832583744205664e-06, "loss": 0.0403, "step": 173850 }, { "epoch": 3.5391348600508907, "grad_norm": 0.032905677355049005, "learning_rate": 2.3826529122708036e-06, "loss": 0.0087, "step": 173860 }, { "epoch": 3.5393384223918574, "grad_norm": 0.010638008829776785, "learning_rate": 2.3820475029817366e-06, "loss": 0.0238, "step": 173870 }, { "epoch": 3.5395419847328244, "grad_norm": 0.21496687953347882, "learning_rate": 2.381442146565592e-06, "loss": 0.0411, "step": 173880 }, { "epoch": 3.5397455470737915, "grad_norm": 0.005175513807371377, "learning_rate": 2.380836843034596e-06, "loss": 0.0845, "step": 173890 }, { "epoch": 3.539949109414758, "grad_norm": 0.08388920234196232, "learning_rate": 2.3802315924009735e-06, "loss": 0.0398, "step": 173900 }, { "epoch": 3.5401526717557252, "grad_norm": 0.011703590057086176, "learning_rate": 2.3796263946769478e-06, "loss": 0.0017, "step": 173910 }, { "epoch": 3.5403562340966923, "grad_norm": 13.360635667940667, "learning_rate": 2.3790212498747423e-06, "loss": 0.062, "step": 173920 }, { "epoch": 3.540559796437659, "grad_norm": 0.008206449356631576, "learning_rate": 2.3784161580065784e-06, "loss": 0.1005, "step": 173930 }, { "epoch": 3.540763358778626, "grad_norm": 0.02421955492431139, "learning_rate": 2.377811119084677e-06, "loss": 0.0337, "step": 173940 }, { "epoch": 3.540966921119593, "grad_norm": 0.17686335544486687, "learning_rate": 2.377206133121259e-06, "loss": 0.0881, "step": 173950 }, { "epoch": 3.5411704834605597, "grad_norm": 2.511700500895514, "learning_rate": 2.376601200128538e-06, "loss": 0.0577, "step": 173960 }, { "epoch": 3.541374045801527, "grad_norm": 0.6239068074306631, "learning_rate": 2.3759963201187362e-06, "loss": 0.0349, "step": 173970 }, { "epoch": 3.541577608142494, "grad_norm": 9.888258890167062, "learning_rate": 2.3753914931040704e-06, "loss": 0.076, "step": 173980 }, { "epoch": 3.5417811704834605, "grad_norm": 9.006993891344257, "learning_rate": 2.37478671909675e-06, "loss": 0.0801, "step": 173990 }, { "epoch": 3.5419847328244276, "grad_norm": 0.035442613241076094, "learning_rate": 2.374181998108997e-06, "loss": 0.0565, "step": 174000 }, { "epoch": 3.5421882951653942, "grad_norm": 0.023029471162654625, "learning_rate": 2.3735773301530184e-06, "loss": 0.0145, "step": 174010 }, { "epoch": 3.5423918575063613, "grad_norm": 0.07473239759839027, "learning_rate": 2.3729727152410264e-06, "loss": 0.0694, "step": 174020 }, { "epoch": 3.5425954198473284, "grad_norm": 0.9195363368813191, "learning_rate": 2.372368153385238e-06, "loss": 0.0006, "step": 174030 }, { "epoch": 3.542798982188295, "grad_norm": 0.052749451224383216, "learning_rate": 2.3717636445978576e-06, "loss": 0.1145, "step": 174040 }, { "epoch": 3.543002544529262, "grad_norm": 20.274643230819425, "learning_rate": 2.3711591888910935e-06, "loss": 0.0629, "step": 174050 }, { "epoch": 3.5432061068702287, "grad_norm": 0.036098865776150006, "learning_rate": 2.370554786277159e-06, "loss": 0.0459, "step": 174060 }, { "epoch": 3.543409669211196, "grad_norm": 0.024583319314769272, "learning_rate": 2.369950436768256e-06, "loss": 0.0004, "step": 174070 }, { "epoch": 3.543613231552163, "grad_norm": 0.052022072816122304, "learning_rate": 2.3693461403765904e-06, "loss": 0.0058, "step": 174080 }, { "epoch": 3.5438167938931295, "grad_norm": 0.014075758578507893, "learning_rate": 2.368741897114368e-06, "loss": 0.0168, "step": 174090 }, { "epoch": 3.5440203562340966, "grad_norm": 0.02313786299919612, "learning_rate": 2.368137706993793e-06, "loss": 0.0172, "step": 174100 }, { "epoch": 3.5442239185750637, "grad_norm": 38.81345356129392, "learning_rate": 2.367533570027066e-06, "loss": 0.0483, "step": 174110 }, { "epoch": 3.5444274809160303, "grad_norm": 0.012442830500069585, "learning_rate": 2.3669294862263892e-06, "loss": 0.0095, "step": 174120 }, { "epoch": 3.5446310432569974, "grad_norm": 0.02519949677509891, "learning_rate": 2.3663254556039644e-06, "loss": 0.0766, "step": 174130 }, { "epoch": 3.5448346055979645, "grad_norm": 0.016979012196022446, "learning_rate": 2.365721478171986e-06, "loss": 0.0436, "step": 174140 }, { "epoch": 3.545038167938931, "grad_norm": 0.004319136337690173, "learning_rate": 2.3651175539426567e-06, "loss": 0.0412, "step": 174150 }, { "epoch": 3.545241730279898, "grad_norm": 0.6574049564736903, "learning_rate": 2.3645136829281743e-06, "loss": 0.0165, "step": 174160 }, { "epoch": 3.5454452926208653, "grad_norm": 0.11451033737505932, "learning_rate": 2.363909865140729e-06, "loss": 0.0783, "step": 174170 }, { "epoch": 3.545648854961832, "grad_norm": 0.0890872430455687, "learning_rate": 2.3633061005925207e-06, "loss": 0.0118, "step": 174180 }, { "epoch": 3.545852417302799, "grad_norm": 0.06103307388704387, "learning_rate": 2.3627023892957436e-06, "loss": 0.064, "step": 174190 }, { "epoch": 3.546055979643766, "grad_norm": 0.3657874021615286, "learning_rate": 2.362098731262585e-06, "loss": 0.0157, "step": 174200 }, { "epoch": 3.5462595419847327, "grad_norm": 0.4106324666342037, "learning_rate": 2.361495126505244e-06, "loss": 0.0033, "step": 174210 }, { "epoch": 3.5464631043256998, "grad_norm": 0.11318173898306765, "learning_rate": 2.360891575035905e-06, "loss": 0.0289, "step": 174220 }, { "epoch": 3.546666666666667, "grad_norm": 0.011640981910858939, "learning_rate": 2.3602880768667595e-06, "loss": 0.0452, "step": 174230 }, { "epoch": 3.5468702290076335, "grad_norm": 0.07833685857830323, "learning_rate": 2.359684632009997e-06, "loss": 0.0181, "step": 174240 }, { "epoch": 3.5470737913486006, "grad_norm": 0.5246787380942564, "learning_rate": 2.3590812404778042e-06, "loss": 0.1328, "step": 174250 }, { "epoch": 3.5472773536895676, "grad_norm": 0.012937494058074891, "learning_rate": 2.358477902282366e-06, "loss": 0.0882, "step": 174260 }, { "epoch": 3.5474809160305343, "grad_norm": 0.021187800220059897, "learning_rate": 2.35787461743587e-06, "loss": 0.0637, "step": 174270 }, { "epoch": 3.5476844783715014, "grad_norm": 0.2072319598921061, "learning_rate": 2.3572713859504986e-06, "loss": 0.1355, "step": 174280 }, { "epoch": 3.5478880407124684, "grad_norm": 0.3739189461049608, "learning_rate": 2.3566682078384358e-06, "loss": 0.0366, "step": 174290 }, { "epoch": 3.548091603053435, "grad_norm": 0.017659992500701657, "learning_rate": 2.3560650831118624e-06, "loss": 0.0516, "step": 174300 }, { "epoch": 3.548295165394402, "grad_norm": 0.09210303851856141, "learning_rate": 2.355462011782961e-06, "loss": 0.0075, "step": 174310 }, { "epoch": 3.5484987277353692, "grad_norm": 0.0028203830567564863, "learning_rate": 2.3548589938639096e-06, "loss": 0.0068, "step": 174320 }, { "epoch": 3.548702290076336, "grad_norm": 0.473292577560501, "learning_rate": 2.354256029366888e-06, "loss": 0.0585, "step": 174330 }, { "epoch": 3.548905852417303, "grad_norm": 0.03136203111108003, "learning_rate": 2.3536531183040734e-06, "loss": 0.0367, "step": 174340 }, { "epoch": 3.5491094147582696, "grad_norm": 0.009979172023347813, "learning_rate": 2.3530502606876425e-06, "loss": 0.0291, "step": 174350 }, { "epoch": 3.5493129770992367, "grad_norm": 0.03432232615784424, "learning_rate": 2.352447456529771e-06, "loss": 0.0838, "step": 174360 }, { "epoch": 3.5495165394402033, "grad_norm": 0.132299528127227, "learning_rate": 2.351844705842635e-06, "loss": 0.0009, "step": 174370 }, { "epoch": 3.5497201017811704, "grad_norm": 11.717479181297856, "learning_rate": 2.351242008638405e-06, "loss": 0.0266, "step": 174380 }, { "epoch": 3.5499236641221374, "grad_norm": 0.05195692372796509, "learning_rate": 2.3506393649292507e-06, "loss": 0.0882, "step": 174390 }, { "epoch": 3.550127226463104, "grad_norm": 0.12832576743409715, "learning_rate": 2.3500367747273516e-06, "loss": 0.0633, "step": 174400 }, { "epoch": 3.550330788804071, "grad_norm": 0.019549023965239186, "learning_rate": 2.349434238044871e-06, "loss": 0.0162, "step": 174410 }, { "epoch": 3.5505343511450382, "grad_norm": 0.14192143498385817, "learning_rate": 2.3488317548939794e-06, "loss": 0.0685, "step": 174420 }, { "epoch": 3.550737913486005, "grad_norm": 0.03431590474674628, "learning_rate": 2.3482293252868454e-06, "loss": 0.0005, "step": 174430 }, { "epoch": 3.550941475826972, "grad_norm": 0.37600251313954103, "learning_rate": 2.3476269492356357e-06, "loss": 0.0038, "step": 174440 }, { "epoch": 3.551145038167939, "grad_norm": 0.03144444247312298, "learning_rate": 2.347024626752516e-06, "loss": 0.0881, "step": 174450 }, { "epoch": 3.5513486005089057, "grad_norm": 7.753816260520295, "learning_rate": 2.346422357849651e-06, "loss": 0.0188, "step": 174460 }, { "epoch": 3.5515521628498727, "grad_norm": 0.1694646807185587, "learning_rate": 2.3458201425392043e-06, "loss": 0.0467, "step": 174470 }, { "epoch": 3.55175572519084, "grad_norm": 14.337691310968248, "learning_rate": 2.345217980833338e-06, "loss": 0.1131, "step": 174480 }, { "epoch": 3.5519592875318065, "grad_norm": 0.06103952460941719, "learning_rate": 2.344615872744214e-06, "loss": 0.0299, "step": 174490 }, { "epoch": 3.5521628498727735, "grad_norm": 5.797719507334976, "learning_rate": 2.344013818283995e-06, "loss": 0.0647, "step": 174500 }, { "epoch": 3.5523664122137406, "grad_norm": 0.004778665442797329, "learning_rate": 2.343411817464834e-06, "loss": 0.0554, "step": 174510 }, { "epoch": 3.5525699745547072, "grad_norm": 0.015687198911108355, "learning_rate": 2.3428098702988945e-06, "loss": 0.0307, "step": 174520 }, { "epoch": 3.5527735368956743, "grad_norm": 0.04487027722551732, "learning_rate": 2.342207976798334e-06, "loss": 0.0529, "step": 174530 }, { "epoch": 3.5529770992366414, "grad_norm": 36.83424107728384, "learning_rate": 2.341606136975303e-06, "loss": 0.0596, "step": 174540 }, { "epoch": 3.553180661577608, "grad_norm": 8.585063308186383, "learning_rate": 2.3410043508419618e-06, "loss": 0.087, "step": 174550 }, { "epoch": 3.553384223918575, "grad_norm": 0.040735053830945896, "learning_rate": 2.3404026184104646e-06, "loss": 0.0409, "step": 174560 }, { "epoch": 3.553587786259542, "grad_norm": 0.18206243684157505, "learning_rate": 2.339800939692958e-06, "loss": 0.0015, "step": 174570 }, { "epoch": 3.553791348600509, "grad_norm": 0.05309783215146244, "learning_rate": 2.339199314701602e-06, "loss": 0.0151, "step": 174580 }, { "epoch": 3.553994910941476, "grad_norm": 0.10030535896479281, "learning_rate": 2.3385977434485403e-06, "loss": 0.0297, "step": 174590 }, { "epoch": 3.554198473282443, "grad_norm": 0.18498205084458172, "learning_rate": 2.3379962259459233e-06, "loss": 0.0231, "step": 174600 }, { "epoch": 3.5544020356234096, "grad_norm": 0.16402872549381603, "learning_rate": 2.337394762205905e-06, "loss": 0.0334, "step": 174610 }, { "epoch": 3.5546055979643767, "grad_norm": 0.05714540429261351, "learning_rate": 2.336793352240627e-06, "loss": 0.058, "step": 174620 }, { "epoch": 3.5548091603053438, "grad_norm": 0.562075195160226, "learning_rate": 2.3361919960622357e-06, "loss": 0.0049, "step": 174630 }, { "epoch": 3.5550127226463104, "grad_norm": 0.021275875503097237, "learning_rate": 2.335590693682882e-06, "loss": 0.0459, "step": 174640 }, { "epoch": 3.5552162849872775, "grad_norm": 0.027631936463782036, "learning_rate": 2.3349894451147037e-06, "loss": 0.0251, "step": 174650 }, { "epoch": 3.555419847328244, "grad_norm": 0.13634148679557248, "learning_rate": 2.3343882503698468e-06, "loss": 0.0179, "step": 174660 }, { "epoch": 3.555623409669211, "grad_norm": 11.53209148615377, "learning_rate": 2.3337871094604515e-06, "loss": 0.1154, "step": 174670 }, { "epoch": 3.5558269720101783, "grad_norm": 0.029932978821692005, "learning_rate": 2.33318602239866e-06, "loss": 0.0005, "step": 174680 }, { "epoch": 3.556030534351145, "grad_norm": 0.17032687315033548, "learning_rate": 2.332584989196612e-06, "loss": 0.0954, "step": 174690 }, { "epoch": 3.556234096692112, "grad_norm": 0.1418046764998388, "learning_rate": 2.3319840098664446e-06, "loss": 0.1235, "step": 174700 }, { "epoch": 3.5564376590330786, "grad_norm": 0.190048070846669, "learning_rate": 2.3313830844202976e-06, "loss": 0.0099, "step": 174710 }, { "epoch": 3.5566412213740457, "grad_norm": 0.54583875279016, "learning_rate": 2.330782212870306e-06, "loss": 0.0326, "step": 174720 }, { "epoch": 3.556844783715013, "grad_norm": 0.020395668312705728, "learning_rate": 2.330181395228606e-06, "loss": 0.0002, "step": 174730 }, { "epoch": 3.5570483460559794, "grad_norm": 16.112329674358612, "learning_rate": 2.3295806315073323e-06, "loss": 0.0374, "step": 174740 }, { "epoch": 3.5572519083969465, "grad_norm": 17.384066368188165, "learning_rate": 2.328979921718614e-06, "loss": 0.0561, "step": 174750 }, { "epoch": 3.5574554707379136, "grad_norm": 0.011540778669694376, "learning_rate": 2.328379265874588e-06, "loss": 0.0534, "step": 174760 }, { "epoch": 3.55765903307888, "grad_norm": 0.06987562650145084, "learning_rate": 2.327778663987386e-06, "loss": 0.1465, "step": 174770 }, { "epoch": 3.5578625954198473, "grad_norm": 8.365791658784563, "learning_rate": 2.3271781160691337e-06, "loss": 0.0501, "step": 174780 }, { "epoch": 3.5580661577608144, "grad_norm": 14.054732496830146, "learning_rate": 2.326577622131962e-06, "loss": 0.1253, "step": 174790 }, { "epoch": 3.558269720101781, "grad_norm": 12.587121677227914, "learning_rate": 2.3259771821879984e-06, "loss": 0.0346, "step": 174800 }, { "epoch": 3.558473282442748, "grad_norm": 4.324994503941402, "learning_rate": 2.32537679624937e-06, "loss": 0.0199, "step": 174810 }, { "epoch": 3.558676844783715, "grad_norm": 0.043250770416495596, "learning_rate": 2.324776464328202e-06, "loss": 0.0111, "step": 174820 }, { "epoch": 3.558880407124682, "grad_norm": 25.456986164707796, "learning_rate": 2.3241761864366187e-06, "loss": 0.0707, "step": 174830 }, { "epoch": 3.559083969465649, "grad_norm": 0.037588695221291296, "learning_rate": 2.323575962586744e-06, "loss": 0.0078, "step": 174840 }, { "epoch": 3.559287531806616, "grad_norm": 10.009850779714826, "learning_rate": 2.3229757927906996e-06, "loss": 0.1024, "step": 174850 }, { "epoch": 3.5594910941475826, "grad_norm": 0.013062306852661208, "learning_rate": 2.3223756770606075e-06, "loss": 0.0644, "step": 174860 }, { "epoch": 3.5596946564885497, "grad_norm": 0.06184536518649902, "learning_rate": 2.3217756154085876e-06, "loss": 0.0676, "step": 174870 }, { "epoch": 3.5598982188295167, "grad_norm": 0.005146986043667808, "learning_rate": 2.3211756078467577e-06, "loss": 0.0506, "step": 174880 }, { "epoch": 3.5601017811704834, "grad_norm": 42.40981220843571, "learning_rate": 2.3205756543872375e-06, "loss": 0.1034, "step": 174890 }, { "epoch": 3.5603053435114504, "grad_norm": 0.1675272914490023, "learning_rate": 2.3199757550421425e-06, "loss": 0.1304, "step": 174900 }, { "epoch": 3.5605089058524175, "grad_norm": 20.74353422865258, "learning_rate": 2.3193759098235896e-06, "loss": 0.0119, "step": 174910 }, { "epoch": 3.560712468193384, "grad_norm": 0.057882911116046945, "learning_rate": 2.3187761187436925e-06, "loss": 0.0563, "step": 174920 }, { "epoch": 3.5609160305343512, "grad_norm": 0.027392856491233006, "learning_rate": 2.318176381814567e-06, "loss": 0.1199, "step": 174930 }, { "epoch": 3.5611195928753183, "grad_norm": 0.054436872316617015, "learning_rate": 2.3175766990483193e-06, "loss": 0.0346, "step": 174940 }, { "epoch": 3.561323155216285, "grad_norm": 63.11487025257346, "learning_rate": 2.316977070457069e-06, "loss": 0.0845, "step": 174950 }, { "epoch": 3.561526717557252, "grad_norm": 0.07054759674465089, "learning_rate": 2.316377496052922e-06, "loss": 0.1113, "step": 174960 }, { "epoch": 3.5617302798982187, "grad_norm": 0.04867358288239091, "learning_rate": 2.3157779758479843e-06, "loss": 0.0009, "step": 174970 }, { "epoch": 3.5619338422391857, "grad_norm": 0.07186998329386712, "learning_rate": 2.3151785098543722e-06, "loss": 0.0495, "step": 174980 }, { "epoch": 3.562137404580153, "grad_norm": 43.225891699056824, "learning_rate": 2.3145790980841858e-06, "loss": 0.0924, "step": 174990 }, { "epoch": 3.5623409669211195, "grad_norm": 0.004952364516214941, "learning_rate": 2.3139797405495325e-06, "loss": 0.0007, "step": 175000 }, { "epoch": 3.5625445292620865, "grad_norm": 0.8053785809874816, "learning_rate": 2.3133804372625207e-06, "loss": 0.0567, "step": 175010 }, { "epoch": 3.562748091603053, "grad_norm": 0.3974698260980203, "learning_rate": 2.3127811882352495e-06, "loss": 0.0278, "step": 175020 }, { "epoch": 3.5629516539440202, "grad_norm": 0.026470278562654145, "learning_rate": 2.3121819934798233e-06, "loss": 0.0236, "step": 175030 }, { "epoch": 3.5631552162849873, "grad_norm": 12.49008179242289, "learning_rate": 2.311582853008344e-06, "loss": 0.0264, "step": 175040 }, { "epoch": 3.563358778625954, "grad_norm": 0.4000659532303679, "learning_rate": 2.3109837668329114e-06, "loss": 0.0008, "step": 175050 }, { "epoch": 3.563562340966921, "grad_norm": 0.055763855956108825, "learning_rate": 2.310384734965625e-06, "loss": 0.0222, "step": 175060 }, { "epoch": 3.563765903307888, "grad_norm": 0.1094043931253197, "learning_rate": 2.309785757418583e-06, "loss": 0.0742, "step": 175070 }, { "epoch": 3.5639694656488548, "grad_norm": 0.017670011610243112, "learning_rate": 2.309186834203883e-06, "loss": 0.0593, "step": 175080 }, { "epoch": 3.564173027989822, "grad_norm": 0.012015886456171935, "learning_rate": 2.30858796533362e-06, "loss": 0.0402, "step": 175090 }, { "epoch": 3.564376590330789, "grad_norm": 50.041450501319595, "learning_rate": 2.30798915081989e-06, "loss": 0.0556, "step": 175100 }, { "epoch": 3.5645801526717555, "grad_norm": 8.973271419142689, "learning_rate": 2.3073903906747886e-06, "loss": 0.0534, "step": 175110 }, { "epoch": 3.5647837150127226, "grad_norm": 0.39731000775068015, "learning_rate": 2.306791684910402e-06, "loss": 0.1532, "step": 175120 }, { "epoch": 3.5649872773536897, "grad_norm": 0.5827080166444486, "learning_rate": 2.3061930335388282e-06, "loss": 0.0657, "step": 175130 }, { "epoch": 3.5651908396946563, "grad_norm": 0.01597974722668683, "learning_rate": 2.3055944365721578e-06, "loss": 0.0578, "step": 175140 }, { "epoch": 3.5653944020356234, "grad_norm": 0.00939637339124453, "learning_rate": 2.3049958940224744e-06, "loss": 0.0345, "step": 175150 }, { "epoch": 3.5655979643765905, "grad_norm": 0.006606843456040394, "learning_rate": 2.304397405901873e-06, "loss": 0.0123, "step": 175160 }, { "epoch": 3.565801526717557, "grad_norm": 0.011768726660117777, "learning_rate": 2.3037989722224367e-06, "loss": 0.0237, "step": 175170 }, { "epoch": 3.566005089058524, "grad_norm": 14.340706170606143, "learning_rate": 2.3032005929962508e-06, "loss": 0.0871, "step": 175180 }, { "epoch": 3.5662086513994913, "grad_norm": 0.009425948025094251, "learning_rate": 2.302602268235406e-06, "loss": 0.0587, "step": 175190 }, { "epoch": 3.566412213740458, "grad_norm": 0.022822304442786056, "learning_rate": 2.302003997951981e-06, "loss": 0.0215, "step": 175200 }, { "epoch": 3.566615776081425, "grad_norm": 31.857845784356236, "learning_rate": 2.3014057821580603e-06, "loss": 0.2022, "step": 175210 }, { "epoch": 3.566819338422392, "grad_norm": 2.9122574580320193, "learning_rate": 2.3008076208657254e-06, "loss": 0.0672, "step": 175220 }, { "epoch": 3.5670229007633587, "grad_norm": 0.047286782942520596, "learning_rate": 2.300209514087057e-06, "loss": 0.0007, "step": 175230 }, { "epoch": 3.567226463104326, "grad_norm": 37.62778267673653, "learning_rate": 2.299611461834135e-06, "loss": 0.0476, "step": 175240 }, { "epoch": 3.567430025445293, "grad_norm": 7.755036315205024, "learning_rate": 2.2990134641190374e-06, "loss": 0.0783, "step": 175250 }, { "epoch": 3.5676335877862595, "grad_norm": 1.1034196705810808, "learning_rate": 2.2984155209538424e-06, "loss": 0.0095, "step": 175260 }, { "epoch": 3.5678371501272266, "grad_norm": 0.054629297488861764, "learning_rate": 2.2978176323506245e-06, "loss": 0.046, "step": 175270 }, { "epoch": 3.5680407124681937, "grad_norm": 0.007924689312070902, "learning_rate": 2.2972197983214607e-06, "loss": 0.0493, "step": 175280 }, { "epoch": 3.5682442748091603, "grad_norm": 14.054325646149023, "learning_rate": 2.2966220188784238e-06, "loss": 0.0296, "step": 175290 }, { "epoch": 3.5684478371501274, "grad_norm": 3.465579991811695, "learning_rate": 2.2960242940335873e-06, "loss": 0.0248, "step": 175300 }, { "epoch": 3.568651399491094, "grad_norm": 0.013540370205952116, "learning_rate": 2.295426623799023e-06, "loss": 0.0562, "step": 175310 }, { "epoch": 3.568854961832061, "grad_norm": 4.184014453883614, "learning_rate": 2.294829008186803e-06, "loss": 0.0833, "step": 175320 }, { "epoch": 3.5690585241730277, "grad_norm": 0.04089548649355941, "learning_rate": 2.294231447208992e-06, "loss": 0.0388, "step": 175330 }, { "epoch": 3.569262086513995, "grad_norm": 0.0024138243797776735, "learning_rate": 2.2936339408776636e-06, "loss": 0.0405, "step": 175340 }, { "epoch": 3.569465648854962, "grad_norm": 0.039851051111229656, "learning_rate": 2.2930364892048854e-06, "loss": 0.0152, "step": 175350 }, { "epoch": 3.5696692111959285, "grad_norm": 0.012353564687805517, "learning_rate": 2.2924390922027206e-06, "loss": 0.0095, "step": 175360 }, { "epoch": 3.5698727735368956, "grad_norm": 0.0009156589674743944, "learning_rate": 2.2918417498832334e-06, "loss": 0.128, "step": 175370 }, { "epoch": 3.5700763358778627, "grad_norm": 13.084204927042924, "learning_rate": 2.2912444622584936e-06, "loss": 0.0382, "step": 175380 }, { "epoch": 3.5702798982188293, "grad_norm": 0.011810011324737118, "learning_rate": 2.290647229340559e-06, "loss": 0.1333, "step": 175390 }, { "epoch": 3.5704834605597964, "grad_norm": 0.022193838418806427, "learning_rate": 2.2900500511414935e-06, "loss": 0.0396, "step": 175400 }, { "epoch": 3.5706870229007635, "grad_norm": 0.12630593863047687, "learning_rate": 2.289452927673357e-06, "loss": 0.0356, "step": 175410 }, { "epoch": 3.57089058524173, "grad_norm": 0.021897689970162337, "learning_rate": 2.28885585894821e-06, "loss": 0.0834, "step": 175420 }, { "epoch": 3.571094147582697, "grad_norm": 9.01788943601452, "learning_rate": 2.2882588449781113e-06, "loss": 0.1143, "step": 175430 }, { "epoch": 3.5712977099236642, "grad_norm": 0.06798377697780034, "learning_rate": 2.2876618857751176e-06, "loss": 0.0169, "step": 175440 }, { "epoch": 3.571501272264631, "grad_norm": 0.15931057496340267, "learning_rate": 2.2870649813512857e-06, "loss": 0.0513, "step": 175450 }, { "epoch": 3.571704834605598, "grad_norm": 16.07322846022848, "learning_rate": 2.2864681317186704e-06, "loss": 0.0887, "step": 175460 }, { "epoch": 3.571908396946565, "grad_norm": 0.02407947332885442, "learning_rate": 2.2858713368893264e-06, "loss": 0.023, "step": 175470 }, { "epoch": 3.5721119592875317, "grad_norm": 0.10504570388456368, "learning_rate": 2.2852745968753087e-06, "loss": 0.0054, "step": 175480 }, { "epoch": 3.5723155216284987, "grad_norm": 0.028088546179156987, "learning_rate": 2.284677911688663e-06, "loss": 0.0532, "step": 175490 }, { "epoch": 3.572519083969466, "grad_norm": 1.4635879912729597, "learning_rate": 2.284081281341446e-06, "loss": 0.0749, "step": 175500 }, { "epoch": 3.5727226463104325, "grad_norm": 0.036946349680582524, "learning_rate": 2.283484705845708e-06, "loss": 0.0308, "step": 175510 }, { "epoch": 3.5729262086513995, "grad_norm": 7.738456429728684, "learning_rate": 2.282888185213491e-06, "loss": 0.0783, "step": 175520 }, { "epoch": 3.5731297709923666, "grad_norm": 0.028727802507550467, "learning_rate": 2.28229171945685e-06, "loss": 0.0282, "step": 175530 }, { "epoch": 3.5733333333333333, "grad_norm": 9.456780835832426, "learning_rate": 2.281695308587827e-06, "loss": 0.0574, "step": 175540 }, { "epoch": 3.5735368956743003, "grad_norm": 0.18989243840471925, "learning_rate": 2.2810989526184663e-06, "loss": 0.0988, "step": 175550 }, { "epoch": 3.5737404580152674, "grad_norm": 0.02643991766595529, "learning_rate": 2.280502651560818e-06, "loss": 0.0935, "step": 175560 }, { "epoch": 3.573944020356234, "grad_norm": 0.12475313328741451, "learning_rate": 2.2799064054269192e-06, "loss": 0.091, "step": 175570 }, { "epoch": 3.574147582697201, "grad_norm": 0.047623385419885145, "learning_rate": 2.279310214228812e-06, "loss": 0.0017, "step": 175580 }, { "epoch": 3.574351145038168, "grad_norm": 62.522038398692146, "learning_rate": 2.2787140779785433e-06, "loss": 0.0299, "step": 175590 }, { "epoch": 3.574554707379135, "grad_norm": 0.04178512938152268, "learning_rate": 2.278117996688147e-06, "loss": 0.0518, "step": 175600 }, { "epoch": 3.574758269720102, "grad_norm": 9.877631970799762, "learning_rate": 2.2775219703696635e-06, "loss": 0.0163, "step": 175610 }, { "epoch": 3.5749618320610685, "grad_norm": 12.778777882639515, "learning_rate": 2.27692599903513e-06, "loss": 0.0846, "step": 175620 }, { "epoch": 3.5751653944020356, "grad_norm": 0.013524039902525223, "learning_rate": 2.2763300826965836e-06, "loss": 0.0515, "step": 175630 }, { "epoch": 3.5753689567430027, "grad_norm": 13.154941086773496, "learning_rate": 2.275734221366059e-06, "loss": 0.0967, "step": 175640 }, { "epoch": 3.5755725190839693, "grad_norm": 6.032741332552323, "learning_rate": 2.2751384150555905e-06, "loss": 0.0719, "step": 175650 }, { "epoch": 3.5757760814249364, "grad_norm": 0.2594813287867682, "learning_rate": 2.274542663777212e-06, "loss": 0.1021, "step": 175660 }, { "epoch": 3.575979643765903, "grad_norm": 13.320238125247315, "learning_rate": 2.273946967542954e-06, "loss": 0.0161, "step": 175670 }, { "epoch": 3.57618320610687, "grad_norm": 0.08739627163919679, "learning_rate": 2.2733513263648484e-06, "loss": 0.082, "step": 175680 }, { "epoch": 3.576386768447837, "grad_norm": 0.0567080141681765, "learning_rate": 2.2727557402549265e-06, "loss": 0.0725, "step": 175690 }, { "epoch": 3.576590330788804, "grad_norm": 0.02141243185534055, "learning_rate": 2.2721602092252117e-06, "loss": 0.0299, "step": 175700 }, { "epoch": 3.576793893129771, "grad_norm": 0.0033632098304815605, "learning_rate": 2.271564733287736e-06, "loss": 0.0011, "step": 175710 }, { "epoch": 3.576997455470738, "grad_norm": 0.0096137892031824, "learning_rate": 2.270969312454527e-06, "loss": 0.0058, "step": 175720 }, { "epoch": 3.5772010178117046, "grad_norm": 0.060001279023676926, "learning_rate": 2.270373946737604e-06, "loss": 0.016, "step": 175730 }, { "epoch": 3.5774045801526717, "grad_norm": 15.494000870913549, "learning_rate": 2.269778636148997e-06, "loss": 0.0876, "step": 175740 }, { "epoch": 3.577608142493639, "grad_norm": 1.2402158155308813, "learning_rate": 2.2691833807007284e-06, "loss": 0.0886, "step": 175750 }, { "epoch": 3.5778117048346054, "grad_norm": 0.05364921926439445, "learning_rate": 2.268588180404817e-06, "loss": 0.0135, "step": 175760 }, { "epoch": 3.5780152671755725, "grad_norm": 0.09029198548759984, "learning_rate": 2.2679930352732854e-06, "loss": 0.0215, "step": 175770 }, { "epoch": 3.5782188295165396, "grad_norm": 12.067653459979542, "learning_rate": 2.2673979453181534e-06, "loss": 0.0533, "step": 175780 }, { "epoch": 3.578422391857506, "grad_norm": 0.05603464779832389, "learning_rate": 2.266802910551439e-06, "loss": 0.0518, "step": 175790 }, { "epoch": 3.5786259541984733, "grad_norm": 0.017769723547223844, "learning_rate": 2.2662079309851605e-06, "loss": 0.0009, "step": 175800 }, { "epoch": 3.5788295165394404, "grad_norm": 0.0024639480849230366, "learning_rate": 2.265613006631334e-06, "loss": 0.001, "step": 175810 }, { "epoch": 3.579033078880407, "grad_norm": 0.05990677539362241, "learning_rate": 2.265018137501974e-06, "loss": 0.0179, "step": 175820 }, { "epoch": 3.579236641221374, "grad_norm": 0.01972427597824635, "learning_rate": 2.264423323609096e-06, "loss": 0.1091, "step": 175830 }, { "epoch": 3.579440203562341, "grad_norm": 0.09483732021731982, "learning_rate": 2.263828564964712e-06, "loss": 0.062, "step": 175840 }, { "epoch": 3.579643765903308, "grad_norm": 0.016998652379194, "learning_rate": 2.2632338615808347e-06, "loss": 0.0478, "step": 175850 }, { "epoch": 3.579847328244275, "grad_norm": 0.12579399992527396, "learning_rate": 2.2626392134694748e-06, "loss": 0.0475, "step": 175860 }, { "epoch": 3.580050890585242, "grad_norm": 0.08216121423485995, "learning_rate": 2.262044620642641e-06, "loss": 0.0607, "step": 175870 }, { "epoch": 3.5802544529262086, "grad_norm": 0.3106419148644856, "learning_rate": 2.2614500831123426e-06, "loss": 0.0302, "step": 175880 }, { "epoch": 3.5804580152671757, "grad_norm": 1.527420752235182, "learning_rate": 2.260855600890587e-06, "loss": 0.0209, "step": 175890 }, { "epoch": 3.5806615776081427, "grad_norm": 2.981795886819404, "learning_rate": 2.2602611739893826e-06, "loss": 0.0414, "step": 175900 }, { "epoch": 3.5808651399491094, "grad_norm": 0.11616772760901987, "learning_rate": 2.259666802420731e-06, "loss": 0.0716, "step": 175910 }, { "epoch": 3.5810687022900765, "grad_norm": 0.04227530327392528, "learning_rate": 2.259072486196635e-06, "loss": 0.0036, "step": 175920 }, { "epoch": 3.5812722646310435, "grad_norm": 0.022362489474792985, "learning_rate": 2.258478225329105e-06, "loss": 0.1096, "step": 175930 }, { "epoch": 3.58147582697201, "grad_norm": 0.04646917912946188, "learning_rate": 2.257884019830136e-06, "loss": 0.0029, "step": 175940 }, { "epoch": 3.5816793893129772, "grad_norm": 0.05657014184722722, "learning_rate": 2.2572898697117296e-06, "loss": 0.0003, "step": 175950 }, { "epoch": 3.581882951653944, "grad_norm": 0.0014808152295760235, "learning_rate": 2.2566957749858905e-06, "loss": 0.047, "step": 175960 }, { "epoch": 3.582086513994911, "grad_norm": 7.137328152758666, "learning_rate": 2.2561017356646116e-06, "loss": 0.0149, "step": 175970 }, { "epoch": 3.5822900763358776, "grad_norm": 20.345881741026048, "learning_rate": 2.2555077517598924e-06, "loss": 0.0303, "step": 175980 }, { "epoch": 3.5824936386768447, "grad_norm": 0.008700030601693585, "learning_rate": 2.2549138232837286e-06, "loss": 0.0023, "step": 175990 }, { "epoch": 3.5826972010178118, "grad_norm": 0.042818245236708366, "learning_rate": 2.254319950248116e-06, "loss": 0.0074, "step": 176000 }, { "epoch": 3.5829007633587784, "grad_norm": 8.519992302793227, "learning_rate": 2.2537261326650484e-06, "loss": 0.1005, "step": 176010 }, { "epoch": 3.5831043256997455, "grad_norm": 3.18938944805391, "learning_rate": 2.2531323705465188e-06, "loss": 0.021, "step": 176020 }, { "epoch": 3.5833078880407125, "grad_norm": 1.5265120502168206, "learning_rate": 2.2525386639045187e-06, "loss": 0.0007, "step": 176030 }, { "epoch": 3.583511450381679, "grad_norm": 0.006833456388562711, "learning_rate": 2.2519450127510393e-06, "loss": 0.0708, "step": 176040 }, { "epoch": 3.5837150127226463, "grad_norm": 0.05341556411182084, "learning_rate": 2.25135141709807e-06, "loss": 0.0727, "step": 176050 }, { "epoch": 3.5839185750636133, "grad_norm": 0.004369497108535782, "learning_rate": 2.2507578769576006e-06, "loss": 0.0695, "step": 176060 }, { "epoch": 3.58412213740458, "grad_norm": 0.023893116931801375, "learning_rate": 2.2501643923416127e-06, "loss": 0.0434, "step": 176070 }, { "epoch": 3.584325699745547, "grad_norm": 0.04655833470257349, "learning_rate": 2.2495709632620994e-06, "loss": 0.0115, "step": 176080 }, { "epoch": 3.584529262086514, "grad_norm": 0.08854826580800487, "learning_rate": 2.2489775897310444e-06, "loss": 0.001, "step": 176090 }, { "epoch": 3.5847328244274808, "grad_norm": 0.042327014056931685, "learning_rate": 2.2483842717604265e-06, "loss": 0.0414, "step": 176100 }, { "epoch": 3.584936386768448, "grad_norm": 0.003746671786920222, "learning_rate": 2.2477910093622347e-06, "loss": 0.0205, "step": 176110 }, { "epoch": 3.585139949109415, "grad_norm": 8.753890374639111, "learning_rate": 2.2471978025484493e-06, "loss": 0.0245, "step": 176120 }, { "epoch": 3.5853435114503815, "grad_norm": 0.002164134249075751, "learning_rate": 2.246604651331047e-06, "loss": 0.0912, "step": 176130 }, { "epoch": 3.5855470737913486, "grad_norm": 0.00518536506864994, "learning_rate": 2.246011555722013e-06, "loss": 0.045, "step": 176140 }, { "epoch": 3.5857506361323157, "grad_norm": 0.07110428228658076, "learning_rate": 2.2454185157333214e-06, "loss": 0.0038, "step": 176150 }, { "epoch": 3.5859541984732823, "grad_norm": 0.007474116840904356, "learning_rate": 2.244825531376949e-06, "loss": 0.0843, "step": 176160 }, { "epoch": 3.5861577608142494, "grad_norm": 0.018403866188261925, "learning_rate": 2.244232602664877e-06, "loss": 0.0575, "step": 176170 }, { "epoch": 3.5863613231552165, "grad_norm": 0.0532305038965098, "learning_rate": 2.243639729609075e-06, "loss": 0.0341, "step": 176180 }, { "epoch": 3.586564885496183, "grad_norm": 0.026036375398430983, "learning_rate": 2.2430469122215193e-06, "loss": 0.0412, "step": 176190 }, { "epoch": 3.58676844783715, "grad_norm": 0.025650171938366013, "learning_rate": 2.2424541505141816e-06, "loss": 0.0002, "step": 176200 }, { "epoch": 3.5869720101781173, "grad_norm": 0.009214944994193529, "learning_rate": 2.2418614444990345e-06, "loss": 0.0378, "step": 176210 }, { "epoch": 3.587175572519084, "grad_norm": 0.2953632719903765, "learning_rate": 2.241268794188048e-06, "loss": 0.0658, "step": 176220 }, { "epoch": 3.587379134860051, "grad_norm": 15.118740854363265, "learning_rate": 2.2406761995931913e-06, "loss": 0.0305, "step": 176230 }, { "epoch": 3.587582697201018, "grad_norm": 0.49121629529309296, "learning_rate": 2.2400836607264324e-06, "loss": 0.0629, "step": 176240 }, { "epoch": 3.5877862595419847, "grad_norm": 0.007597146322450672, "learning_rate": 2.239491177599739e-06, "loss": 0.0254, "step": 176250 }, { "epoch": 3.587989821882952, "grad_norm": 0.09128811042189239, "learning_rate": 2.238898750225077e-06, "loss": 0.0277, "step": 176260 }, { "epoch": 3.5881933842239184, "grad_norm": 2.6374641080103562, "learning_rate": 2.2383063786144126e-06, "loss": 0.0447, "step": 176270 }, { "epoch": 3.5883969465648855, "grad_norm": 0.015011189087021206, "learning_rate": 2.2377140627797046e-06, "loss": 0.0207, "step": 176280 }, { "epoch": 3.5886005089058526, "grad_norm": 0.02364482404072583, "learning_rate": 2.23712180273292e-06, "loss": 0.0817, "step": 176290 }, { "epoch": 3.588804071246819, "grad_norm": 5.647379246119401, "learning_rate": 2.2365295984860213e-06, "loss": 0.1297, "step": 176300 }, { "epoch": 3.5890076335877863, "grad_norm": 0.5442636305889296, "learning_rate": 2.235937450050965e-06, "loss": 0.0535, "step": 176310 }, { "epoch": 3.589211195928753, "grad_norm": 0.015650541395358285, "learning_rate": 2.2353453574397097e-06, "loss": 0.0641, "step": 176320 }, { "epoch": 3.58941475826972, "grad_norm": 0.02196507072261947, "learning_rate": 2.23475332066422e-06, "loss": 0.0981, "step": 176330 }, { "epoch": 3.589618320610687, "grad_norm": 0.08837673253070176, "learning_rate": 2.234161339736447e-06, "loss": 0.0191, "step": 176340 }, { "epoch": 3.5898218829516537, "grad_norm": 0.2959485405951349, "learning_rate": 2.2335694146683474e-06, "loss": 0.0713, "step": 176350 }, { "epoch": 3.590025445292621, "grad_norm": 0.5690230164477641, "learning_rate": 2.232977545471877e-06, "loss": 0.0009, "step": 176360 }, { "epoch": 3.590229007633588, "grad_norm": 0.02758887185460373, "learning_rate": 2.232385732158989e-06, "loss": 0.1329, "step": 176370 }, { "epoch": 3.5904325699745545, "grad_norm": 0.05685423967950192, "learning_rate": 2.2317939747416357e-06, "loss": 0.0349, "step": 176380 }, { "epoch": 3.5906361323155216, "grad_norm": 0.010343153317813283, "learning_rate": 2.2312022732317686e-06, "loss": 0.001, "step": 176390 }, { "epoch": 3.5908396946564887, "grad_norm": 0.4368567052750345, "learning_rate": 2.2306106276413373e-06, "loss": 0.0309, "step": 176400 }, { "epoch": 3.5910432569974553, "grad_norm": 1.155139497546479, "learning_rate": 2.2300190379822916e-06, "loss": 0.0502, "step": 176410 }, { "epoch": 3.5912468193384224, "grad_norm": 0.24526112465024805, "learning_rate": 2.229427504266579e-06, "loss": 0.037, "step": 176420 }, { "epoch": 3.5914503816793895, "grad_norm": 0.013553769885635121, "learning_rate": 2.2288360265061473e-06, "loss": 0.0006, "step": 176430 }, { "epoch": 3.591653944020356, "grad_norm": 0.10760497627978532, "learning_rate": 2.22824460471294e-06, "loss": 0.067, "step": 176440 }, { "epoch": 3.591857506361323, "grad_norm": 0.13330743933665531, "learning_rate": 2.2276532388989034e-06, "loss": 0.1007, "step": 176450 }, { "epoch": 3.5920610687022902, "grad_norm": 0.004554621950654081, "learning_rate": 2.2270619290759828e-06, "loss": 0.0256, "step": 176460 }, { "epoch": 3.592264631043257, "grad_norm": 0.019365937011448602, "learning_rate": 2.2264706752561138e-06, "loss": 0.0346, "step": 176470 }, { "epoch": 3.592468193384224, "grad_norm": 0.09342157897321036, "learning_rate": 2.225879477451244e-06, "loss": 0.1405, "step": 176480 }, { "epoch": 3.592671755725191, "grad_norm": 0.0745818137511938, "learning_rate": 2.225288335673313e-06, "loss": 0.0088, "step": 176490 }, { "epoch": 3.5928753180661577, "grad_norm": 0.05564927160789061, "learning_rate": 2.2246972499342538e-06, "loss": 0.1372, "step": 176500 }, { "epoch": 3.5930788804071248, "grad_norm": 0.531147761840291, "learning_rate": 2.224106220246012e-06, "loss": 0.0482, "step": 176510 }, { "epoch": 3.593282442748092, "grad_norm": 0.1855405538948153, "learning_rate": 2.2235152466205186e-06, "loss": 0.0469, "step": 176520 }, { "epoch": 3.5934860050890585, "grad_norm": 8.65893592794876, "learning_rate": 2.222924329069709e-06, "loss": 0.0291, "step": 176530 }, { "epoch": 3.5936895674300255, "grad_norm": 0.0574928113398144, "learning_rate": 2.222333467605523e-06, "loss": 0.0308, "step": 176540 }, { "epoch": 3.5938931297709926, "grad_norm": 0.31078165154271764, "learning_rate": 2.2217426622398884e-06, "loss": 0.0436, "step": 176550 }, { "epoch": 3.5940966921119593, "grad_norm": 0.10475832688570826, "learning_rate": 2.22115191298474e-06, "loss": 0.1256, "step": 176560 }, { "epoch": 3.5943002544529263, "grad_norm": 0.023920574093159824, "learning_rate": 2.2205612198520067e-06, "loss": 0.0524, "step": 176570 }, { "epoch": 3.594503816793893, "grad_norm": 0.031952544037911965, "learning_rate": 2.21997058285362e-06, "loss": 0.0357, "step": 176580 }, { "epoch": 3.59470737913486, "grad_norm": 0.0071575647605452404, "learning_rate": 2.2193800020015077e-06, "loss": 0.0522, "step": 176590 }, { "epoch": 3.594910941475827, "grad_norm": 0.17088897092525773, "learning_rate": 2.218789477307598e-06, "loss": 0.0343, "step": 176600 }, { "epoch": 3.5951145038167938, "grad_norm": 100.9807321640983, "learning_rate": 2.2181990087838168e-06, "loss": 0.0712, "step": 176610 }, { "epoch": 3.595318066157761, "grad_norm": 2.702341190403405e-07, "learning_rate": 2.21760859644209e-06, "loss": 0.0049, "step": 176620 }, { "epoch": 3.5955216284987275, "grad_norm": 0.029942868157062978, "learning_rate": 2.2170182402943404e-06, "loss": 0.0568, "step": 176630 }, { "epoch": 3.5957251908396946, "grad_norm": 0.04419828148461355, "learning_rate": 2.2164279403524942e-06, "loss": 0.0698, "step": 176640 }, { "epoch": 3.5959287531806616, "grad_norm": 0.07392542060443157, "learning_rate": 2.215837696628467e-06, "loss": 0.1024, "step": 176650 }, { "epoch": 3.5961323155216283, "grad_norm": 3.243496016291466, "learning_rate": 2.2152475091341855e-06, "loss": 0.0798, "step": 176660 }, { "epoch": 3.5963358778625953, "grad_norm": 12.831737296463352, "learning_rate": 2.21465737788157e-06, "loss": 0.0583, "step": 176670 }, { "epoch": 3.5965394402035624, "grad_norm": 0.01277722286342467, "learning_rate": 2.2140673028825315e-06, "loss": 0.0077, "step": 176680 }, { "epoch": 3.596743002544529, "grad_norm": 0.03132554274293643, "learning_rate": 2.213477284148994e-06, "loss": 0.109, "step": 176690 }, { "epoch": 3.596946564885496, "grad_norm": 0.013091218010255302, "learning_rate": 2.2128873216928743e-06, "loss": 0.0289, "step": 176700 }, { "epoch": 3.597150127226463, "grad_norm": 0.042818361898851416, "learning_rate": 2.2122974155260808e-06, "loss": 0.0358, "step": 176710 }, { "epoch": 3.59735368956743, "grad_norm": 1.0215447642021038, "learning_rate": 2.2117075656605347e-06, "loss": 0.0189, "step": 176720 }, { "epoch": 3.597557251908397, "grad_norm": 0.014278110232117055, "learning_rate": 2.2111177721081445e-06, "loss": 0.0368, "step": 176730 }, { "epoch": 3.597760814249364, "grad_norm": 5.288232604054336, "learning_rate": 2.210528034880823e-06, "loss": 0.0657, "step": 176740 }, { "epoch": 3.5979643765903306, "grad_norm": 0.10183035773814415, "learning_rate": 2.20993835399048e-06, "loss": 0.0007, "step": 176750 }, { "epoch": 3.5981679389312977, "grad_norm": 18.88260057981919, "learning_rate": 2.2093487294490255e-06, "loss": 0.063, "step": 176760 }, { "epoch": 3.598371501272265, "grad_norm": 11.705345028326299, "learning_rate": 2.2087591612683683e-06, "loss": 0.0439, "step": 176770 }, { "epoch": 3.5985750636132314, "grad_norm": 0.2511717421116383, "learning_rate": 2.2081696494604147e-06, "loss": 0.0459, "step": 176780 }, { "epoch": 3.5987786259541985, "grad_norm": 0.03076782281141374, "learning_rate": 2.207580194037071e-06, "loss": 0.0596, "step": 176790 }, { "epoch": 3.5989821882951656, "grad_norm": 1.179929612257905, "learning_rate": 2.206990795010242e-06, "loss": 0.0726, "step": 176800 }, { "epoch": 3.599185750636132, "grad_norm": 15.281686970924865, "learning_rate": 2.206401452391831e-06, "loss": 0.1028, "step": 176810 }, { "epoch": 3.5993893129770993, "grad_norm": 0.06365813126924887, "learning_rate": 2.2058121661937406e-06, "loss": 0.0076, "step": 176820 }, { "epoch": 3.5995928753180664, "grad_norm": 9.44749451217269, "learning_rate": 2.2052229364278726e-06, "loss": 0.0298, "step": 176830 }, { "epoch": 3.599796437659033, "grad_norm": 0.13954523726768317, "learning_rate": 2.204633763106127e-06, "loss": 0.0431, "step": 176840 }, { "epoch": 3.6, "grad_norm": 12.292441813749846, "learning_rate": 2.204044646240403e-06, "loss": 0.0295, "step": 176850 }, { "epoch": 3.600203562340967, "grad_norm": 0.058574419813870904, "learning_rate": 2.2034555858425986e-06, "loss": 0.0435, "step": 176860 }, { "epoch": 3.600407124681934, "grad_norm": 0.026007301542347115, "learning_rate": 2.202866581924611e-06, "loss": 0.0278, "step": 176870 }, { "epoch": 3.600610687022901, "grad_norm": 6.266122238223563, "learning_rate": 2.2022776344983366e-06, "loss": 0.0121, "step": 176880 }, { "epoch": 3.600814249363868, "grad_norm": 12.86696946472448, "learning_rate": 2.2016887435756673e-06, "loss": 0.1126, "step": 176890 }, { "epoch": 3.6010178117048346, "grad_norm": 0.06586712218233774, "learning_rate": 2.201099909168496e-06, "loss": 0.0795, "step": 176900 }, { "epoch": 3.6012213740458017, "grad_norm": 0.013790619517786972, "learning_rate": 2.2005111312887207e-06, "loss": 0.0819, "step": 176910 }, { "epoch": 3.6014249363867683, "grad_norm": 0.03516799856977321, "learning_rate": 2.199922409948227e-06, "loss": 0.0838, "step": 176920 }, { "epoch": 3.6016284987277354, "grad_norm": 0.01297774625112298, "learning_rate": 2.199333745158907e-06, "loss": 0.0082, "step": 176930 }, { "epoch": 3.601832061068702, "grad_norm": 19.771140184213067, "learning_rate": 2.198745136932649e-06, "loss": 0.0371, "step": 176940 }, { "epoch": 3.602035623409669, "grad_norm": 0.0934987502944887, "learning_rate": 2.1981565852813407e-06, "loss": 0.0159, "step": 176950 }, { "epoch": 3.602239185750636, "grad_norm": 0.09277340954009754, "learning_rate": 2.197568090216869e-06, "loss": 0.0937, "step": 176960 }, { "epoch": 3.602442748091603, "grad_norm": 0.02512813855398949, "learning_rate": 2.1969796517511195e-06, "loss": 0.0886, "step": 176970 }, { "epoch": 3.60264631043257, "grad_norm": 8.47933160294976, "learning_rate": 2.196391269895976e-06, "loss": 0.0114, "step": 176980 }, { "epoch": 3.602849872773537, "grad_norm": 7.447791647476939, "learning_rate": 2.1958029446633224e-06, "loss": 0.0677, "step": 176990 }, { "epoch": 3.6030534351145036, "grad_norm": 0.06486053264731052, "learning_rate": 2.1952146760650395e-06, "loss": 0.0775, "step": 177000 }, { "epoch": 3.6032569974554707, "grad_norm": 0.025734336877205347, "learning_rate": 2.1946264641130107e-06, "loss": 0.0008, "step": 177010 }, { "epoch": 3.6034605597964378, "grad_norm": 0.006585011969698846, "learning_rate": 2.19403830881911e-06, "loss": 0.045, "step": 177020 }, { "epoch": 3.6036641221374044, "grad_norm": 0.14926994517387404, "learning_rate": 2.1934502101952215e-06, "loss": 0.056, "step": 177030 }, { "epoch": 3.6038676844783715, "grad_norm": 0.9282215154929939, "learning_rate": 2.1928621682532224e-06, "loss": 0.0686, "step": 177040 }, { "epoch": 3.6040712468193385, "grad_norm": 0.14688999167095002, "learning_rate": 2.192274183004984e-06, "loss": 0.0004, "step": 177050 }, { "epoch": 3.604274809160305, "grad_norm": 0.08226743689409535, "learning_rate": 2.191686254462387e-06, "loss": 0.0018, "step": 177060 }, { "epoch": 3.6044783715012723, "grad_norm": 0.2907814567078373, "learning_rate": 2.1910983826373046e-06, "loss": 0.0777, "step": 177070 }, { "epoch": 3.6046819338422393, "grad_norm": 0.08172999161542625, "learning_rate": 2.190510567541605e-06, "loss": 0.0087, "step": 177080 }, { "epoch": 3.604885496183206, "grad_norm": 17.73082230094249, "learning_rate": 2.189922809187166e-06, "loss": 0.0636, "step": 177090 }, { "epoch": 3.605089058524173, "grad_norm": 0.00122794690945811, "learning_rate": 2.1893351075858536e-06, "loss": 0.1439, "step": 177100 }, { "epoch": 3.60529262086514, "grad_norm": 0.01824404985409863, "learning_rate": 2.1887474627495377e-06, "loss": 0.0269, "step": 177110 }, { "epoch": 3.6054961832061068, "grad_norm": 0.07408672502207098, "learning_rate": 2.1881598746900904e-06, "loss": 0.0619, "step": 177120 }, { "epoch": 3.605699745547074, "grad_norm": 1.1060840476380354, "learning_rate": 2.1875723434193747e-06, "loss": 0.065, "step": 177130 }, { "epoch": 3.605903307888041, "grad_norm": 0.9516719703771034, "learning_rate": 2.1869848689492564e-06, "loss": 0.0138, "step": 177140 }, { "epoch": 3.6061068702290076, "grad_norm": 0.16307717568412436, "learning_rate": 2.1863974512916048e-06, "loss": 0.0011, "step": 177150 }, { "epoch": 3.6063104325699746, "grad_norm": 8.995704864390328, "learning_rate": 2.185810090458279e-06, "loss": 0.0274, "step": 177160 }, { "epoch": 3.6065139949109417, "grad_norm": 0.014153082513214676, "learning_rate": 2.185222786461143e-06, "loss": 0.0016, "step": 177170 }, { "epoch": 3.6067175572519083, "grad_norm": 0.023606736857837023, "learning_rate": 2.1846355393120584e-06, "loss": 0.0107, "step": 177180 }, { "epoch": 3.6069211195928754, "grad_norm": 64.54341903714695, "learning_rate": 2.184048349022885e-06, "loss": 0.086, "step": 177190 }, { "epoch": 3.6071246819338425, "grad_norm": 11.444223694169473, "learning_rate": 2.1834612156054824e-06, "loss": 0.0215, "step": 177200 }, { "epoch": 3.607328244274809, "grad_norm": 0.6465813057722827, "learning_rate": 2.182874139071708e-06, "loss": 0.0256, "step": 177210 }, { "epoch": 3.607531806615776, "grad_norm": 0.04295700324978206, "learning_rate": 2.182287119433419e-06, "loss": 0.0431, "step": 177220 }, { "epoch": 3.607735368956743, "grad_norm": 0.044394633503200664, "learning_rate": 2.18170015670247e-06, "loss": 0.1178, "step": 177230 }, { "epoch": 3.60793893129771, "grad_norm": 0.05760839766094117, "learning_rate": 2.181113250890717e-06, "loss": 0.0323, "step": 177240 }, { "epoch": 3.608142493638677, "grad_norm": 0.22553334043254475, "learning_rate": 2.180526402010015e-06, "loss": 0.0595, "step": 177250 }, { "epoch": 3.6083460559796436, "grad_norm": 0.1051641141178847, "learning_rate": 2.179939610072209e-06, "loss": 0.0036, "step": 177260 }, { "epoch": 3.6085496183206107, "grad_norm": 0.0010073209282597377, "learning_rate": 2.1793528750891575e-06, "loss": 0.0434, "step": 177270 }, { "epoch": 3.6087531806615774, "grad_norm": 0.01228413537497768, "learning_rate": 2.1787661970727088e-06, "loss": 0.1492, "step": 177280 }, { "epoch": 3.6089567430025444, "grad_norm": 0.02796770336257958, "learning_rate": 2.178179576034709e-06, "loss": 0.0304, "step": 177290 }, { "epoch": 3.6091603053435115, "grad_norm": 0.24136122397803658, "learning_rate": 2.177593011987007e-06, "loss": 0.0612, "step": 177300 }, { "epoch": 3.609363867684478, "grad_norm": 0.025347010156272637, "learning_rate": 2.1770065049414495e-06, "loss": 0.0404, "step": 177310 }, { "epoch": 3.6095674300254452, "grad_norm": 0.019320099461918743, "learning_rate": 2.1764200549098812e-06, "loss": 0.1487, "step": 177320 }, { "epoch": 3.6097709923664123, "grad_norm": 0.46587174534852693, "learning_rate": 2.175833661904147e-06, "loss": 0.0622, "step": 177330 }, { "epoch": 3.609974554707379, "grad_norm": 0.31754257776651335, "learning_rate": 2.1752473259360896e-06, "loss": 0.0274, "step": 177340 }, { "epoch": 3.610178117048346, "grad_norm": 0.08225985837118664, "learning_rate": 2.1746610470175505e-06, "loss": 0.074, "step": 177350 }, { "epoch": 3.610381679389313, "grad_norm": 12.46378386374418, "learning_rate": 2.174074825160371e-06, "loss": 0.1171, "step": 177360 }, { "epoch": 3.6105852417302797, "grad_norm": 5.54760743524578, "learning_rate": 2.1734886603763892e-06, "loss": 0.0049, "step": 177370 }, { "epoch": 3.610788804071247, "grad_norm": 0.01944318256037408, "learning_rate": 2.1729025526774454e-06, "loss": 0.0417, "step": 177380 }, { "epoch": 3.610992366412214, "grad_norm": 0.13951669125663518, "learning_rate": 2.1723165020753756e-06, "loss": 0.1024, "step": 177390 }, { "epoch": 3.6111959287531805, "grad_norm": 0.12211781728230231, "learning_rate": 2.1717305085820154e-06, "loss": 0.0575, "step": 177400 }, { "epoch": 3.6113994910941476, "grad_norm": 0.3555228589770347, "learning_rate": 2.1711445722092012e-06, "loss": 0.0844, "step": 177410 }, { "epoch": 3.6116030534351147, "grad_norm": 23.576359092989286, "learning_rate": 2.1705586929687657e-06, "loss": 0.0405, "step": 177420 }, { "epoch": 3.6118066157760813, "grad_norm": 0.08574363183882447, "learning_rate": 2.1699728708725414e-06, "loss": 0.0027, "step": 177430 }, { "epoch": 3.6120101781170484, "grad_norm": 7.356748643747106, "learning_rate": 2.1693871059323623e-06, "loss": 0.1007, "step": 177440 }, { "epoch": 3.6122137404580155, "grad_norm": 0.024384692422178274, "learning_rate": 2.1688013981600525e-06, "loss": 0.1285, "step": 177450 }, { "epoch": 3.612417302798982, "grad_norm": 0.21742431783762683, "learning_rate": 2.168215747567449e-06, "loss": 0.0414, "step": 177460 }, { "epoch": 3.612620865139949, "grad_norm": 0.1769332341678968, "learning_rate": 2.167630154166373e-06, "loss": 0.0076, "step": 177470 }, { "epoch": 3.6128244274809163, "grad_norm": 0.036528893131736836, "learning_rate": 2.1670446179686535e-06, "loss": 0.0447, "step": 177480 }, { "epoch": 3.613027989821883, "grad_norm": 25.15632118286121, "learning_rate": 2.1664591389861193e-06, "loss": 0.1385, "step": 177490 }, { "epoch": 3.61323155216285, "grad_norm": 0.005924014035014184, "learning_rate": 2.165873717230591e-06, "loss": 0.0473, "step": 177500 }, { "epoch": 3.613435114503817, "grad_norm": 0.44718921115262855, "learning_rate": 2.165288352713892e-06, "loss": 0.0391, "step": 177510 }, { "epoch": 3.6136386768447837, "grad_norm": 0.023483761524950242, "learning_rate": 2.1647030454478485e-06, "loss": 0.0345, "step": 177520 }, { "epoch": 3.6138422391857508, "grad_norm": 0.10250774467102007, "learning_rate": 2.1641177954442777e-06, "loss": 0.1254, "step": 177530 }, { "epoch": 3.6140458015267174, "grad_norm": 0.7988924803712633, "learning_rate": 2.163532602715e-06, "loss": 0.0107, "step": 177540 }, { "epoch": 3.6142493638676845, "grad_norm": 31.26312214554386, "learning_rate": 2.1629474672718353e-06, "loss": 0.0457, "step": 177550 }, { "epoch": 3.6144529262086516, "grad_norm": 0.014126177740969142, "learning_rate": 2.1623623891266006e-06, "loss": 0.0022, "step": 177560 }, { "epoch": 3.614656488549618, "grad_norm": 0.03078416073351025, "learning_rate": 2.161777368291112e-06, "loss": 0.0009, "step": 177570 }, { "epoch": 3.6148600508905853, "grad_norm": 0.0031978971510870278, "learning_rate": 2.1611924047771855e-06, "loss": 0.0003, "step": 177580 }, { "epoch": 3.615063613231552, "grad_norm": 21.382639161530143, "learning_rate": 2.1606074985966356e-06, "loss": 0.0825, "step": 177590 }, { "epoch": 3.615267175572519, "grad_norm": 0.02461389396004234, "learning_rate": 2.1600226497612712e-06, "loss": 0.0785, "step": 177600 }, { "epoch": 3.615470737913486, "grad_norm": 0.061517381173677804, "learning_rate": 2.1594378582829094e-06, "loss": 0.0553, "step": 177610 }, { "epoch": 3.6156743002544527, "grad_norm": 0.29821903244389913, "learning_rate": 2.1588531241733605e-06, "loss": 0.0029, "step": 177620 }, { "epoch": 3.6158778625954198, "grad_norm": 34.90632092791526, "learning_rate": 2.158268447444428e-06, "loss": 0.1066, "step": 177630 }, { "epoch": 3.616081424936387, "grad_norm": 0.018442931521160413, "learning_rate": 2.1576838281079267e-06, "loss": 0.0041, "step": 177640 }, { "epoch": 3.6162849872773535, "grad_norm": 0.032512774522438725, "learning_rate": 2.157099266175663e-06, "loss": 0.0336, "step": 177650 }, { "epoch": 3.6164885496183206, "grad_norm": 0.018585221565579135, "learning_rate": 2.1565147616594377e-06, "loss": 0.0918, "step": 177660 }, { "epoch": 3.6166921119592876, "grad_norm": 0.08503939749105086, "learning_rate": 2.1559303145710624e-06, "loss": 0.0163, "step": 177670 }, { "epoch": 3.6168956743002543, "grad_norm": 0.03444438970312121, "learning_rate": 2.1553459249223364e-06, "loss": 0.0338, "step": 177680 }, { "epoch": 3.6170992366412213, "grad_norm": 0.016998702924855644, "learning_rate": 2.1547615927250607e-06, "loss": 0.0958, "step": 177690 }, { "epoch": 3.6173027989821884, "grad_norm": 0.1682190074859334, "learning_rate": 2.1541773179910436e-06, "loss": 0.0007, "step": 177700 }, { "epoch": 3.617506361323155, "grad_norm": 0.05140360178854014, "learning_rate": 2.1535931007320787e-06, "loss": 0.0413, "step": 177710 }, { "epoch": 3.617709923664122, "grad_norm": 17.261607800986194, "learning_rate": 2.1530089409599676e-06, "loss": 0.079, "step": 177720 }, { "epoch": 3.617913486005089, "grad_norm": 0.013095970072959482, "learning_rate": 2.152424838686507e-06, "loss": 0.0386, "step": 177730 }, { "epoch": 3.618117048346056, "grad_norm": 2.1479550591632433, "learning_rate": 2.1518407939234954e-06, "loss": 0.043, "step": 177740 }, { "epoch": 3.618320610687023, "grad_norm": 0.008345802277936756, "learning_rate": 2.151256806682727e-06, "loss": 0.0211, "step": 177750 }, { "epoch": 3.61852417302799, "grad_norm": 0.11352141234509407, "learning_rate": 2.1506728769759966e-06, "loss": 0.1009, "step": 177760 }, { "epoch": 3.6187277353689566, "grad_norm": 0.8867830611489946, "learning_rate": 2.1500890048150976e-06, "loss": 0.0285, "step": 177770 }, { "epoch": 3.6189312977099237, "grad_norm": 0.15588737501181946, "learning_rate": 2.1495051902118212e-06, "loss": 0.0578, "step": 177780 }, { "epoch": 3.619134860050891, "grad_norm": 0.06222092501069691, "learning_rate": 2.1489214331779595e-06, "loss": 0.0381, "step": 177790 }, { "epoch": 3.6193384223918574, "grad_norm": 0.05500975684340475, "learning_rate": 2.1483377337253014e-06, "loss": 0.002, "step": 177800 }, { "epoch": 3.6195419847328245, "grad_norm": 0.25062532332117476, "learning_rate": 2.1477540918656355e-06, "loss": 0.0165, "step": 177810 }, { "epoch": 3.6197455470737916, "grad_norm": 0.008393486333986271, "learning_rate": 2.1471705076107495e-06, "loss": 0.0396, "step": 177820 }, { "epoch": 3.6199491094147582, "grad_norm": 0.0053231894272348715, "learning_rate": 2.1465869809724316e-06, "loss": 0.0782, "step": 177830 }, { "epoch": 3.6201526717557253, "grad_norm": 0.9948702064395756, "learning_rate": 2.1460035119624607e-06, "loss": 0.0663, "step": 177840 }, { "epoch": 3.6203562340966924, "grad_norm": 13.786131242027048, "learning_rate": 2.145420100592627e-06, "loss": 0.1922, "step": 177850 }, { "epoch": 3.620559796437659, "grad_norm": 0.06515837646656811, "learning_rate": 2.144836746874712e-06, "loss": 0.0692, "step": 177860 }, { "epoch": 3.620763358778626, "grad_norm": 0.5853506668517078, "learning_rate": 2.1442534508204953e-06, "loss": 0.0317, "step": 177870 }, { "epoch": 3.6209669211195927, "grad_norm": 0.288832860915331, "learning_rate": 2.1436702124417568e-06, "loss": 0.0464, "step": 177880 }, { "epoch": 3.62117048346056, "grad_norm": 0.1266609023836442, "learning_rate": 2.1430870317502805e-06, "loss": 0.0682, "step": 177890 }, { "epoch": 3.6213740458015264, "grad_norm": 0.03332695180102752, "learning_rate": 2.142503908757839e-06, "loss": 0.0706, "step": 177900 }, { "epoch": 3.6215776081424935, "grad_norm": 0.13487822485954873, "learning_rate": 2.141920843476212e-06, "loss": 0.0242, "step": 177910 }, { "epoch": 3.6217811704834606, "grad_norm": 0.2497591597215292, "learning_rate": 2.1413378359171748e-06, "loss": 0.025, "step": 177920 }, { "epoch": 3.6219847328244272, "grad_norm": 0.0031463207674441345, "learning_rate": 2.140754886092502e-06, "loss": 0.0505, "step": 177930 }, { "epoch": 3.6221882951653943, "grad_norm": 11.114322429365398, "learning_rate": 2.140171994013967e-06, "loss": 0.0347, "step": 177940 }, { "epoch": 3.6223918575063614, "grad_norm": 0.12190677430896346, "learning_rate": 2.1395891596933418e-06, "loss": 0.0259, "step": 177950 }, { "epoch": 3.622595419847328, "grad_norm": 0.1011534620355753, "learning_rate": 2.1390063831423974e-06, "loss": 0.0456, "step": 177960 }, { "epoch": 3.622798982188295, "grad_norm": 0.013698941769140452, "learning_rate": 2.1384236643729046e-06, "loss": 0.0492, "step": 177970 }, { "epoch": 3.623002544529262, "grad_norm": 0.1679569040796483, "learning_rate": 2.137841003396631e-06, "loss": 0.0233, "step": 177980 }, { "epoch": 3.623206106870229, "grad_norm": 0.013059420799688238, "learning_rate": 2.1372584002253473e-06, "loss": 0.0393, "step": 177990 }, { "epoch": 3.623409669211196, "grad_norm": 0.1493996328772602, "learning_rate": 2.1366758548708135e-06, "loss": 0.0754, "step": 178000 }, { "epoch": 3.623613231552163, "grad_norm": 68.75341819020046, "learning_rate": 2.1360933673448004e-06, "loss": 0.0596, "step": 178010 }, { "epoch": 3.6238167938931296, "grad_norm": 0.0034852122573812688, "learning_rate": 2.135510937659072e-06, "loss": 0.0853, "step": 178020 }, { "epoch": 3.6240203562340967, "grad_norm": 0.1730587266669428, "learning_rate": 2.134928565825386e-06, "loss": 0.0066, "step": 178030 }, { "epoch": 3.6242239185750638, "grad_norm": 0.12543680732954285, "learning_rate": 2.134346251855512e-06, "loss": 0.019, "step": 178040 }, { "epoch": 3.6244274809160304, "grad_norm": 0.05106111330094522, "learning_rate": 2.1337639957612045e-06, "loss": 0.0462, "step": 178050 }, { "epoch": 3.6246310432569975, "grad_norm": 0.06236847365116117, "learning_rate": 2.1331817975542222e-06, "loss": 0.0148, "step": 178060 }, { "epoch": 3.6248346055979646, "grad_norm": 14.798858259901223, "learning_rate": 2.1325996572463304e-06, "loss": 0.136, "step": 178070 }, { "epoch": 3.625038167938931, "grad_norm": 0.03659372954963885, "learning_rate": 2.1320175748492794e-06, "loss": 0.0414, "step": 178080 }, { "epoch": 3.6252417302798983, "grad_norm": 0.008272856884954787, "learning_rate": 2.1314355503748257e-06, "loss": 0.0015, "step": 178090 }, { "epoch": 3.6254452926208653, "grad_norm": 0.3125568917998371, "learning_rate": 2.13085358383473e-06, "loss": 0.1259, "step": 178100 }, { "epoch": 3.625648854961832, "grad_norm": 37.85159617604411, "learning_rate": 2.1302716752407386e-06, "loss": 0.05, "step": 178110 }, { "epoch": 3.625852417302799, "grad_norm": 9.297115814100755, "learning_rate": 2.1296898246046075e-06, "loss": 0.0433, "step": 178120 }, { "epoch": 3.626055979643766, "grad_norm": 24.38678720688768, "learning_rate": 2.129108031938088e-06, "loss": 0.1302, "step": 178130 }, { "epoch": 3.6262595419847328, "grad_norm": 0.3177834741666077, "learning_rate": 2.1285262972529287e-06, "loss": 0.0013, "step": 178140 }, { "epoch": 3.6264631043257, "grad_norm": 17.33767110261056, "learning_rate": 2.1279446205608794e-06, "loss": 0.0963, "step": 178150 }, { "epoch": 3.626666666666667, "grad_norm": 0.11169133705717577, "learning_rate": 2.1273630018736875e-06, "loss": 0.0072, "step": 178160 }, { "epoch": 3.6268702290076336, "grad_norm": 35.001318099241914, "learning_rate": 2.1267814412031003e-06, "loss": 0.0769, "step": 178170 }, { "epoch": 3.6270737913486006, "grad_norm": 0.24288023193978284, "learning_rate": 2.1261999385608617e-06, "loss": 0.0385, "step": 178180 }, { "epoch": 3.6272773536895673, "grad_norm": 7.117595840560744, "learning_rate": 2.1256184939587177e-06, "loss": 0.0799, "step": 178190 }, { "epoch": 3.6274809160305344, "grad_norm": 0.022648503159019916, "learning_rate": 2.1250371074084114e-06, "loss": 0.0811, "step": 178200 }, { "epoch": 3.6276844783715014, "grad_norm": 0.08287407969730812, "learning_rate": 2.1244557789216803e-06, "loss": 0.0148, "step": 178210 }, { "epoch": 3.627888040712468, "grad_norm": 11.038488257525607, "learning_rate": 2.1238745085102704e-06, "loss": 0.0938, "step": 178220 }, { "epoch": 3.628091603053435, "grad_norm": 0.18828805531127324, "learning_rate": 2.123293296185921e-06, "loss": 0.0194, "step": 178230 }, { "epoch": 3.6282951653944018, "grad_norm": 0.007999419152612255, "learning_rate": 2.1227121419603646e-06, "loss": 0.0418, "step": 178240 }, { "epoch": 3.628498727735369, "grad_norm": 0.7876561436353197, "learning_rate": 2.1221310458453446e-06, "loss": 0.1163, "step": 178250 }, { "epoch": 3.628702290076336, "grad_norm": 13.909208223720587, "learning_rate": 2.121550007852597e-06, "loss": 0.0372, "step": 178260 }, { "epoch": 3.6289058524173026, "grad_norm": 0.042718394093320566, "learning_rate": 2.1209690279938522e-06, "loss": 0.0193, "step": 178270 }, { "epoch": 3.6291094147582696, "grad_norm": 0.005012704196932513, "learning_rate": 2.1203881062808455e-06, "loss": 0.0331, "step": 178280 }, { "epoch": 3.6293129770992367, "grad_norm": 0.7460136985626089, "learning_rate": 2.1198072427253107e-06, "loss": 0.0413, "step": 178290 }, { "epoch": 3.6295165394402034, "grad_norm": 0.15727944543243133, "learning_rate": 2.119226437338978e-06, "loss": 0.0651, "step": 178300 }, { "epoch": 3.6297201017811704, "grad_norm": 0.04729746428456151, "learning_rate": 2.118645690133578e-06, "loss": 0.0483, "step": 178310 }, { "epoch": 3.6299236641221375, "grad_norm": 0.021903544227423407, "learning_rate": 2.118065001120839e-06, "loss": 0.0568, "step": 178320 }, { "epoch": 3.630127226463104, "grad_norm": 2.6077714768363442, "learning_rate": 2.11748437031249e-06, "loss": 0.0272, "step": 178330 }, { "epoch": 3.6303307888040712, "grad_norm": 0.03462519724430949, "learning_rate": 2.116903797720256e-06, "loss": 0.028, "step": 178340 }, { "epoch": 3.6305343511450383, "grad_norm": 0.06266118249735783, "learning_rate": 2.1163232833558634e-06, "loss": 0.0915, "step": 178350 }, { "epoch": 3.630737913486005, "grad_norm": 22.849278176151547, "learning_rate": 2.115742827231036e-06, "loss": 0.0358, "step": 178360 }, { "epoch": 3.630941475826972, "grad_norm": 38.01341051521488, "learning_rate": 2.1151624293574975e-06, "loss": 0.0338, "step": 178370 }, { "epoch": 3.631145038167939, "grad_norm": 0.04320435011585501, "learning_rate": 2.1145820897469693e-06, "loss": 0.0269, "step": 178380 }, { "epoch": 3.6313486005089057, "grad_norm": 0.007044727496762862, "learning_rate": 2.1140018084111724e-06, "loss": 0.0349, "step": 178390 }, { "epoch": 3.631552162849873, "grad_norm": 0.009318722249832313, "learning_rate": 2.1134215853618256e-06, "loss": 0.0008, "step": 178400 }, { "epoch": 3.63175572519084, "grad_norm": 0.26968816242751653, "learning_rate": 2.11284142061065e-06, "loss": 0.0004, "step": 178410 }, { "epoch": 3.6319592875318065, "grad_norm": 0.000818635415382674, "learning_rate": 2.112261314169359e-06, "loss": 0.0407, "step": 178420 }, { "epoch": 3.6321628498727736, "grad_norm": 0.009593021388809629, "learning_rate": 2.1116812660496675e-06, "loss": 0.0602, "step": 178430 }, { "epoch": 3.6323664122137407, "grad_norm": 0.042622445171260195, "learning_rate": 2.111101276263297e-06, "loss": 0.0012, "step": 178440 }, { "epoch": 3.6325699745547073, "grad_norm": 0.005108492081964843, "learning_rate": 2.110521344821955e-06, "loss": 0.0007, "step": 178450 }, { "epoch": 3.6327735368956744, "grad_norm": 0.4343004095035604, "learning_rate": 2.109941471737355e-06, "loss": 0.0725, "step": 178460 }, { "epoch": 3.6329770992366415, "grad_norm": 0.8875188772206625, "learning_rate": 2.109361657021213e-06, "loss": 0.1129, "step": 178470 }, { "epoch": 3.633180661577608, "grad_norm": 0.015213313179391941, "learning_rate": 2.1087819006852327e-06, "loss": 0.1366, "step": 178480 }, { "epoch": 3.633384223918575, "grad_norm": 0.020832659101597047, "learning_rate": 2.1082022027411264e-06, "loss": 0.048, "step": 178490 }, { "epoch": 3.633587786259542, "grad_norm": 4.463999915527168, "learning_rate": 2.107622563200601e-06, "loss": 0.0588, "step": 178500 }, { "epoch": 3.633791348600509, "grad_norm": 0.8350718681827399, "learning_rate": 2.1070429820753637e-06, "loss": 0.0011, "step": 178510 }, { "epoch": 3.633994910941476, "grad_norm": 7.3684701991464365, "learning_rate": 2.1064634593771187e-06, "loss": 0.1173, "step": 178520 }, { "epoch": 3.6341984732824426, "grad_norm": 0.08773735399527509, "learning_rate": 2.1058839951175712e-06, "loss": 0.046, "step": 178530 }, { "epoch": 3.6344020356234097, "grad_norm": 26.685855705519327, "learning_rate": 2.1053045893084246e-06, "loss": 0.0302, "step": 178540 }, { "epoch": 3.6346055979643763, "grad_norm": 14.328612262599023, "learning_rate": 2.1047252419613793e-06, "loss": 0.1192, "step": 178550 }, { "epoch": 3.6348091603053434, "grad_norm": 38.95851139653719, "learning_rate": 2.1041459530881376e-06, "loss": 0.0431, "step": 178560 }, { "epoch": 3.6350127226463105, "grad_norm": 28.31083201919116, "learning_rate": 2.1035667227004e-06, "loss": 0.0753, "step": 178570 }, { "epoch": 3.635216284987277, "grad_norm": 0.024533469898811312, "learning_rate": 2.102987550809859e-06, "loss": 0.0686, "step": 178580 }, { "epoch": 3.635419847328244, "grad_norm": 0.03059840201320631, "learning_rate": 2.102408437428218e-06, "loss": 0.0585, "step": 178590 }, { "epoch": 3.6356234096692113, "grad_norm": 0.013833826392249221, "learning_rate": 2.1018293825671716e-06, "loss": 0.088, "step": 178600 }, { "epoch": 3.635826972010178, "grad_norm": 0.05420360249030636, "learning_rate": 2.101250386238411e-06, "loss": 0.065, "step": 178610 }, { "epoch": 3.636030534351145, "grad_norm": 0.23436377835518912, "learning_rate": 2.1006714484536334e-06, "loss": 0.0558, "step": 178620 }, { "epoch": 3.636234096692112, "grad_norm": 0.14564972182741917, "learning_rate": 2.100092569224533e-06, "loss": 0.058, "step": 178630 }, { "epoch": 3.6364376590330787, "grad_norm": 0.009099425382592966, "learning_rate": 2.0995137485627935e-06, "loss": 0.0364, "step": 178640 }, { "epoch": 3.6366412213740458, "grad_norm": 57.24745905187675, "learning_rate": 2.098934986480114e-06, "loss": 0.055, "step": 178650 }, { "epoch": 3.636844783715013, "grad_norm": 0.008296259716761968, "learning_rate": 2.098356282988177e-06, "loss": 0.0478, "step": 178660 }, { "epoch": 3.6370483460559795, "grad_norm": 1.540397374958406, "learning_rate": 2.0977776380986705e-06, "loss": 0.0313, "step": 178670 }, { "epoch": 3.6372519083969466, "grad_norm": 39.4686908509963, "learning_rate": 2.0971990518232856e-06, "loss": 0.041, "step": 178680 }, { "epoch": 3.6374554707379136, "grad_norm": 0.05986946813813275, "learning_rate": 2.096620524173703e-06, "loss": 0.0202, "step": 178690 }, { "epoch": 3.6376590330788803, "grad_norm": 0.00986600379354871, "learning_rate": 2.096042055161609e-06, "loss": 0.0223, "step": 178700 }, { "epoch": 3.6378625954198474, "grad_norm": 0.010619929427701012, "learning_rate": 2.095463644798686e-06, "loss": 0.1422, "step": 178710 }, { "epoch": 3.6380661577608144, "grad_norm": 0.011344269653570754, "learning_rate": 2.094885293096615e-06, "loss": 0.0006, "step": 178720 }, { "epoch": 3.638269720101781, "grad_norm": 0.2454319434181654, "learning_rate": 2.094307000067078e-06, "loss": 0.0026, "step": 178730 }, { "epoch": 3.638473282442748, "grad_norm": 0.02346402623707788, "learning_rate": 2.093728765721753e-06, "loss": 0.019, "step": 178740 }, { "epoch": 3.6386768447837152, "grad_norm": 0.21891870416884418, "learning_rate": 2.093150590072319e-06, "loss": 0.0795, "step": 178750 }, { "epoch": 3.638880407124682, "grad_norm": 0.027104456288294436, "learning_rate": 2.092572473130453e-06, "loss": 0.0234, "step": 178760 }, { "epoch": 3.639083969465649, "grad_norm": 1.3557978824095607, "learning_rate": 2.0919944149078308e-06, "loss": 0.0386, "step": 178770 }, { "epoch": 3.639287531806616, "grad_norm": 0.09422939889443606, "learning_rate": 2.091416415416128e-06, "loss": 0.01, "step": 178780 }, { "epoch": 3.6394910941475827, "grad_norm": 18.73741609954643, "learning_rate": 2.090838474667014e-06, "loss": 0.0221, "step": 178790 }, { "epoch": 3.6396946564885497, "grad_norm": 0.012901595939612494, "learning_rate": 2.090260592672166e-06, "loss": 0.0025, "step": 178800 }, { "epoch": 3.639898218829517, "grad_norm": 0.024673103948389674, "learning_rate": 2.089682769443254e-06, "loss": 0.0249, "step": 178810 }, { "epoch": 3.6401017811704834, "grad_norm": 0.1721031173850114, "learning_rate": 2.089105004991946e-06, "loss": 0.0965, "step": 178820 }, { "epoch": 3.6403053435114505, "grad_norm": 2.2447359024410902, "learning_rate": 2.0885272993299088e-06, "loss": 0.1063, "step": 178830 }, { "epoch": 3.640508905852417, "grad_norm": 0.016460893268513226, "learning_rate": 2.087949652468817e-06, "loss": 0.08, "step": 178840 }, { "epoch": 3.6407124681933842, "grad_norm": 4.611518990854699, "learning_rate": 2.0873720644203312e-06, "loss": 0.0704, "step": 178850 }, { "epoch": 3.6409160305343513, "grad_norm": 0.00869535679466066, "learning_rate": 2.086794535196117e-06, "loss": 0.0331, "step": 178860 }, { "epoch": 3.641119592875318, "grad_norm": 0.05694412938722524, "learning_rate": 2.08621706480784e-06, "loss": 0.0314, "step": 178870 }, { "epoch": 3.641323155216285, "grad_norm": 1.4823505586672419, "learning_rate": 2.0856396532671624e-06, "loss": 0.1048, "step": 178880 }, { "epoch": 3.6415267175572517, "grad_norm": 0.0906222227309597, "learning_rate": 2.0850623005857455e-06, "loss": 0.0386, "step": 178890 }, { "epoch": 3.6417302798982187, "grad_norm": 2.5660284189978464, "learning_rate": 2.0844850067752496e-06, "loss": 0.0773, "step": 178900 }, { "epoch": 3.641933842239186, "grad_norm": 3.542574650228789, "learning_rate": 2.0839077718473345e-06, "loss": 0.022, "step": 178910 }, { "epoch": 3.6421374045801524, "grad_norm": 9.100286362982903, "learning_rate": 2.083330595813658e-06, "loss": 0.0247, "step": 178920 }, { "epoch": 3.6423409669211195, "grad_norm": 12.676701257774056, "learning_rate": 2.0827534786858762e-06, "loss": 0.0556, "step": 178930 }, { "epoch": 3.6425445292620866, "grad_norm": 0.7768667548476689, "learning_rate": 2.0821764204756456e-06, "loss": 0.0563, "step": 178940 }, { "epoch": 3.6427480916030532, "grad_norm": 0.20512081869120682, "learning_rate": 2.08159942119462e-06, "loss": 0.0997, "step": 178950 }, { "epoch": 3.6429516539440203, "grad_norm": 0.5579650099200392, "learning_rate": 2.0810224808544533e-06, "loss": 0.0812, "step": 178960 }, { "epoch": 3.6431552162849874, "grad_norm": 7.777423966104038, "learning_rate": 2.0804455994667994e-06, "loss": 0.1496, "step": 178970 }, { "epoch": 3.643358778625954, "grad_norm": 0.08343031368259503, "learning_rate": 2.0798687770433023e-06, "loss": 0.0603, "step": 178980 }, { "epoch": 3.643562340966921, "grad_norm": 5.742195752060306, "learning_rate": 2.0792920135956207e-06, "loss": 0.0248, "step": 178990 }, { "epoch": 3.643765903307888, "grad_norm": 0.04969410774615008, "learning_rate": 2.0787153091353966e-06, "loss": 0.0535, "step": 179000 }, { "epoch": 3.643969465648855, "grad_norm": 0.3863459524869762, "learning_rate": 2.0781386636742783e-06, "loss": 0.0022, "step": 179010 }, { "epoch": 3.644173027989822, "grad_norm": 0.11096393355512274, "learning_rate": 2.077562077223916e-06, "loss": 0.0005, "step": 179020 }, { "epoch": 3.644376590330789, "grad_norm": 0.0021153993593803267, "learning_rate": 2.0769855497959503e-06, "loss": 0.0722, "step": 179030 }, { "epoch": 3.6445801526717556, "grad_norm": 5.594866569010303, "learning_rate": 2.076409081402024e-06, "loss": 0.0217, "step": 179040 }, { "epoch": 3.6447837150127227, "grad_norm": 0.07115527977300971, "learning_rate": 2.0758326720537854e-06, "loss": 0.0259, "step": 179050 }, { "epoch": 3.6449872773536898, "grad_norm": 0.009988711243819838, "learning_rate": 2.0752563217628706e-06, "loss": 0.0159, "step": 179060 }, { "epoch": 3.6451908396946564, "grad_norm": 59.799817126116544, "learning_rate": 2.0746800305409216e-06, "loss": 0.048, "step": 179070 }, { "epoch": 3.6453944020356235, "grad_norm": 0.2923287340103861, "learning_rate": 2.074103798399577e-06, "loss": 0.0006, "step": 179080 }, { "epoch": 3.6455979643765906, "grad_norm": 0.03906889801219991, "learning_rate": 2.073527625350475e-06, "loss": 0.0769, "step": 179090 }, { "epoch": 3.645801526717557, "grad_norm": 25.467317590881237, "learning_rate": 2.0729515114052513e-06, "loss": 0.0836, "step": 179100 }, { "epoch": 3.6460050890585243, "grad_norm": 0.0772607362910096, "learning_rate": 2.072375456575542e-06, "loss": 0.0045, "step": 179110 }, { "epoch": 3.6462086513994914, "grad_norm": 32.157013841413466, "learning_rate": 2.0717994608729813e-06, "loss": 0.0898, "step": 179120 }, { "epoch": 3.646412213740458, "grad_norm": 0.022429376626197737, "learning_rate": 2.071223524309202e-06, "loss": 0.0545, "step": 179130 }, { "epoch": 3.646615776081425, "grad_norm": 0.023095821578661753, "learning_rate": 2.070647646895836e-06, "loss": 0.0043, "step": 179140 }, { "epoch": 3.6468193384223917, "grad_norm": 0.06658488918434645, "learning_rate": 2.0700718286445156e-06, "loss": 0.0459, "step": 179150 }, { "epoch": 3.6470229007633588, "grad_norm": 28.716320993125812, "learning_rate": 2.0694960695668644e-06, "loss": 0.0599, "step": 179160 }, { "epoch": 3.647226463104326, "grad_norm": 0.09607787176780108, "learning_rate": 2.0689203696745164e-06, "loss": 0.0019, "step": 179170 }, { "epoch": 3.6474300254452925, "grad_norm": 0.01754476085053947, "learning_rate": 2.0683447289790983e-06, "loss": 0.0811, "step": 179180 }, { "epoch": 3.6476335877862596, "grad_norm": 0.056713180042554386, "learning_rate": 2.067769147492231e-06, "loss": 0.0006, "step": 179190 }, { "epoch": 3.647837150127226, "grad_norm": 0.0057184628985976265, "learning_rate": 2.067193625225544e-06, "loss": 0.0361, "step": 179200 }, { "epoch": 3.6480407124681933, "grad_norm": 0.02376799319960961, "learning_rate": 2.066618162190662e-06, "loss": 0.0846, "step": 179210 }, { "epoch": 3.6482442748091604, "grad_norm": 0.01680271589946863, "learning_rate": 2.0660427583991995e-06, "loss": 0.0019, "step": 179220 }, { "epoch": 3.648447837150127, "grad_norm": 0.04858210480041811, "learning_rate": 2.0654674138627867e-06, "loss": 0.056, "step": 179230 }, { "epoch": 3.648651399491094, "grad_norm": 0.0017600268591488322, "learning_rate": 2.0648921285930367e-06, "loss": 0.2046, "step": 179240 }, { "epoch": 3.648854961832061, "grad_norm": 0.01192858489900946, "learning_rate": 2.0643169026015713e-06, "loss": 0.1608, "step": 179250 }, { "epoch": 3.649058524173028, "grad_norm": 1.2104450063158987, "learning_rate": 2.0637417359000065e-06, "loss": 0.0236, "step": 179260 }, { "epoch": 3.649262086513995, "grad_norm": 1.7271477244495916, "learning_rate": 2.06316662849996e-06, "loss": 0.0834, "step": 179270 }, { "epoch": 3.649465648854962, "grad_norm": 25.194483586304976, "learning_rate": 2.0625915804130458e-06, "loss": 0.0685, "step": 179280 }, { "epoch": 3.6496692111959286, "grad_norm": 6.097232338723799, "learning_rate": 2.062016591650878e-06, "loss": 0.0066, "step": 179290 }, { "epoch": 3.6498727735368957, "grad_norm": 4.928364038695264, "learning_rate": 2.061441662225069e-06, "loss": 0.0875, "step": 179300 }, { "epoch": 3.6500763358778627, "grad_norm": 5.8557055352563525, "learning_rate": 2.0608667921472303e-06, "loss": 0.0948, "step": 179310 }, { "epoch": 3.6502798982188294, "grad_norm": 0.051517382209800205, "learning_rate": 2.0602919814289722e-06, "loss": 0.0605, "step": 179320 }, { "epoch": 3.6504834605597964, "grad_norm": 0.025433864947098046, "learning_rate": 2.0597172300819047e-06, "loss": 0.0759, "step": 179330 }, { "epoch": 3.6506870229007635, "grad_norm": 0.027051000806426716, "learning_rate": 2.059142538117634e-06, "loss": 0.0378, "step": 179340 }, { "epoch": 3.65089058524173, "grad_norm": 0.07344797410515892, "learning_rate": 2.058567905547768e-06, "loss": 0.0919, "step": 179350 }, { "epoch": 3.6510941475826972, "grad_norm": 0.02774033624270756, "learning_rate": 2.057993332383913e-06, "loss": 0.0368, "step": 179360 }, { "epoch": 3.6512977099236643, "grad_norm": 0.09358455476908056, "learning_rate": 2.0574188186376687e-06, "loss": 0.0949, "step": 179370 }, { "epoch": 3.651501272264631, "grad_norm": 0.2976252941312059, "learning_rate": 2.056844364320643e-06, "loss": 0.0316, "step": 179380 }, { "epoch": 3.651704834605598, "grad_norm": 0.1741742307640193, "learning_rate": 2.056269969444438e-06, "loss": 0.0479, "step": 179390 }, { "epoch": 3.651908396946565, "grad_norm": 0.05410145324313361, "learning_rate": 2.0556956340206508e-06, "loss": 0.0618, "step": 179400 }, { "epoch": 3.6521119592875317, "grad_norm": 0.007966027155058729, "learning_rate": 2.0551213580608807e-06, "loss": 0.0537, "step": 179410 }, { "epoch": 3.652315521628499, "grad_norm": 0.13471591956214107, "learning_rate": 2.0545471415767314e-06, "loss": 0.0189, "step": 179420 }, { "epoch": 3.652519083969466, "grad_norm": 0.21494786026079307, "learning_rate": 2.053972984579794e-06, "loss": 0.0007, "step": 179430 }, { "epoch": 3.6527226463104325, "grad_norm": 0.0975265434216018, "learning_rate": 2.0533988870816675e-06, "loss": 0.0397, "step": 179440 }, { "epoch": 3.6529262086513996, "grad_norm": 0.024728472378409617, "learning_rate": 2.0528248490939447e-06, "loss": 0.0739, "step": 179450 }, { "epoch": 3.6531297709923667, "grad_norm": 0.04677615851653297, "learning_rate": 2.0522508706282207e-06, "loss": 0.1313, "step": 179460 }, { "epoch": 3.6533333333333333, "grad_norm": 0.007482608220359456, "learning_rate": 2.0516769516960867e-06, "loss": 0.1607, "step": 179470 }, { "epoch": 3.6535368956743004, "grad_norm": 8.543673176959123, "learning_rate": 2.051103092309134e-06, "loss": 0.0692, "step": 179480 }, { "epoch": 3.653740458015267, "grad_norm": 0.043960118100123007, "learning_rate": 2.0505292924789527e-06, "loss": 0.0737, "step": 179490 }, { "epoch": 3.653944020356234, "grad_norm": 20.97812399704142, "learning_rate": 2.049955552217131e-06, "loss": 0.1546, "step": 179500 }, { "epoch": 3.6541475826972007, "grad_norm": 6.763652650381009, "learning_rate": 2.049381871535257e-06, "loss": 0.0668, "step": 179510 }, { "epoch": 3.654351145038168, "grad_norm": 18.35110564951857, "learning_rate": 2.048808250444918e-06, "loss": 0.0278, "step": 179520 }, { "epoch": 3.654554707379135, "grad_norm": 0.06242123852658497, "learning_rate": 2.0482346889576938e-06, "loss": 0.0396, "step": 179530 }, { "epoch": 3.6547582697201015, "grad_norm": 0.05384065869195255, "learning_rate": 2.0476611870851736e-06, "loss": 0.0991, "step": 179540 }, { "epoch": 3.6549618320610686, "grad_norm": 0.0646974400341388, "learning_rate": 2.0470877448389403e-06, "loss": 0.0029, "step": 179550 }, { "epoch": 3.6551653944020357, "grad_norm": 2.2073736034469653, "learning_rate": 2.04651436223057e-06, "loss": 0.0389, "step": 179560 }, { "epoch": 3.6553689567430023, "grad_norm": 15.539677486719569, "learning_rate": 2.0459410392716478e-06, "loss": 0.0319, "step": 179570 }, { "epoch": 3.6555725190839694, "grad_norm": 6.567854137180402, "learning_rate": 2.045367775973753e-06, "loss": 0.1214, "step": 179580 }, { "epoch": 3.6557760814249365, "grad_norm": 0.07309650199326916, "learning_rate": 2.0447945723484576e-06, "loss": 0.0038, "step": 179590 }, { "epoch": 3.655979643765903, "grad_norm": 7.8115772280895035, "learning_rate": 2.044221428407346e-06, "loss": 0.0605, "step": 179600 }, { "epoch": 3.65618320610687, "grad_norm": 3.4219276884091236, "learning_rate": 2.0436483441619877e-06, "loss": 0.0659, "step": 179610 }, { "epoch": 3.6563867684478373, "grad_norm": 0.034377886564709234, "learning_rate": 2.0430753196239566e-06, "loss": 0.0086, "step": 179620 }, { "epoch": 3.656590330788804, "grad_norm": 13.017874730281134, "learning_rate": 2.0425023548048323e-06, "loss": 0.0382, "step": 179630 }, { "epoch": 3.656793893129771, "grad_norm": 0.17811980695389334, "learning_rate": 2.0419294497161794e-06, "loss": 0.0386, "step": 179640 }, { "epoch": 3.656997455470738, "grad_norm": 0.2431498144217554, "learning_rate": 2.0413566043695704e-06, "loss": 0.0581, "step": 179650 }, { "epoch": 3.6572010178117047, "grad_norm": 0.015303892942314972, "learning_rate": 2.040783818776579e-06, "loss": 0.0481, "step": 179660 }, { "epoch": 3.657404580152672, "grad_norm": 0.09155548042100978, "learning_rate": 2.040211092948768e-06, "loss": 0.084, "step": 179670 }, { "epoch": 3.657608142493639, "grad_norm": 0.8100346224991295, "learning_rate": 2.0396384268977056e-06, "loss": 0.1036, "step": 179680 }, { "epoch": 3.6578117048346055, "grad_norm": 0.044891339712439, "learning_rate": 2.0390658206349585e-06, "loss": 0.094, "step": 179690 }, { "epoch": 3.6580152671755726, "grad_norm": 0.008426793816581886, "learning_rate": 2.0384932741720904e-06, "loss": 0.0005, "step": 179700 }, { "epoch": 3.6582188295165396, "grad_norm": 0.06028950115886293, "learning_rate": 2.0379207875206654e-06, "loss": 0.0498, "step": 179710 }, { "epoch": 3.6584223918575063, "grad_norm": 0.16080699952980101, "learning_rate": 2.037348360692245e-06, "loss": 0.0351, "step": 179720 }, { "epoch": 3.6586259541984734, "grad_norm": 4.473760042729051, "learning_rate": 2.036775993698393e-06, "loss": 0.0396, "step": 179730 }, { "epoch": 3.6588295165394404, "grad_norm": 0.16665509814267257, "learning_rate": 2.0362036865506617e-06, "loss": 0.0593, "step": 179740 }, { "epoch": 3.659033078880407, "grad_norm": 0.5332018649110222, "learning_rate": 2.0356314392606162e-06, "loss": 0.0023, "step": 179750 }, { "epoch": 3.659236641221374, "grad_norm": 12.206921219815996, "learning_rate": 2.0350592518398144e-06, "loss": 0.0649, "step": 179760 }, { "epoch": 3.6594402035623412, "grad_norm": 7.7767810419827885, "learning_rate": 2.0344871242998052e-06, "loss": 0.1275, "step": 179770 }, { "epoch": 3.659643765903308, "grad_norm": 0.047723285539382515, "learning_rate": 2.033915056652151e-06, "loss": 0.0048, "step": 179780 }, { "epoch": 3.659847328244275, "grad_norm": 0.12107560794767598, "learning_rate": 2.0333430489084034e-06, "loss": 0.042, "step": 179790 }, { "epoch": 3.6600508905852416, "grad_norm": 0.1297849238062621, "learning_rate": 2.0327711010801127e-06, "loss": 0.0256, "step": 179800 }, { "epoch": 3.6602544529262087, "grad_norm": 0.30217552965940975, "learning_rate": 2.032199213178831e-06, "loss": 0.0269, "step": 179810 }, { "epoch": 3.6604580152671757, "grad_norm": 0.04806470299175087, "learning_rate": 2.031627385216109e-06, "loss": 0.0547, "step": 179820 }, { "epoch": 3.6606615776081424, "grad_norm": 0.05310253292041745, "learning_rate": 2.031055617203495e-06, "loss": 0.0006, "step": 179830 }, { "epoch": 3.6608651399491094, "grad_norm": 0.1359850952286446, "learning_rate": 2.030483909152537e-06, "loss": 0.0052, "step": 179840 }, { "epoch": 3.661068702290076, "grad_norm": 0.030529227200851078, "learning_rate": 2.029912261074781e-06, "loss": 0.048, "step": 179850 }, { "epoch": 3.661272264631043, "grad_norm": 35.16635483297769, "learning_rate": 2.029340672981772e-06, "loss": 0.0542, "step": 179860 }, { "epoch": 3.6614758269720102, "grad_norm": 1.4780421568738469, "learning_rate": 2.028769144885055e-06, "loss": 0.0715, "step": 179870 }, { "epoch": 3.661679389312977, "grad_norm": 0.0061803306325700505, "learning_rate": 2.0281976767961715e-06, "loss": 0.0825, "step": 179880 }, { "epoch": 3.661882951653944, "grad_norm": 0.4762897493481889, "learning_rate": 2.027626268726664e-06, "loss": 0.0169, "step": 179890 }, { "epoch": 3.662086513994911, "grad_norm": 0.01080522732352641, "learning_rate": 2.027054920688072e-06, "loss": 0.0312, "step": 179900 }, { "epoch": 3.6622900763358777, "grad_norm": 1.9398055719330805, "learning_rate": 2.026483632691935e-06, "loss": 0.0455, "step": 179910 }, { "epoch": 3.6624936386768447, "grad_norm": 0.23101139503063753, "learning_rate": 2.0259124047497915e-06, "loss": 0.0006, "step": 179920 }, { "epoch": 3.662697201017812, "grad_norm": 0.056449662486071195, "learning_rate": 2.0253412368731766e-06, "loss": 0.0165, "step": 179930 }, { "epoch": 3.6629007633587785, "grad_norm": 1.4662845094927666, "learning_rate": 2.0247701290736275e-06, "loss": 0.0712, "step": 179940 }, { "epoch": 3.6631043256997455, "grad_norm": 0.07021459818704211, "learning_rate": 2.0241990813626796e-06, "loss": 0.058, "step": 179950 }, { "epoch": 3.6633078880407126, "grad_norm": 11.395386962476422, "learning_rate": 2.02362809375186e-06, "loss": 0.0294, "step": 179960 }, { "epoch": 3.6635114503816792, "grad_norm": 0.2773160088486424, "learning_rate": 2.023057166252708e-06, "loss": 0.0542, "step": 179970 }, { "epoch": 3.6637150127226463, "grad_norm": 0.6175033029457543, "learning_rate": 2.0224862988767496e-06, "loss": 0.0461, "step": 179980 }, { "epoch": 3.6639185750636134, "grad_norm": 0.007928168109245376, "learning_rate": 2.021915491635513e-06, "loss": 0.0746, "step": 179990 }, { "epoch": 3.66412213740458, "grad_norm": 0.04365951329671319, "learning_rate": 2.0213447445405325e-06, "loss": 0.0684, "step": 180000 }, { "epoch": 3.664325699745547, "grad_norm": 9.532876927645974, "learning_rate": 2.02077405760333e-06, "loss": 0.081, "step": 180010 }, { "epoch": 3.664529262086514, "grad_norm": 0.06556060434664762, "learning_rate": 2.020203430835431e-06, "loss": 0.0351, "step": 180020 }, { "epoch": 3.664732824427481, "grad_norm": 0.44906205263494764, "learning_rate": 2.0196328642483648e-06, "loss": 0.1075, "step": 180030 }, { "epoch": 3.664936386768448, "grad_norm": 0.254413396449428, "learning_rate": 2.0190623578536495e-06, "loss": 0.0033, "step": 180040 }, { "epoch": 3.665139949109415, "grad_norm": 0.02692420527539841, "learning_rate": 2.01849191166281e-06, "loss": 0.1167, "step": 180050 }, { "epoch": 3.6653435114503816, "grad_norm": 0.015487865955719542, "learning_rate": 2.0179215256873658e-06, "loss": 0.0864, "step": 180060 }, { "epoch": 3.6655470737913487, "grad_norm": 4.060936048435295, "learning_rate": 2.017351199938838e-06, "loss": 0.0287, "step": 180070 }, { "epoch": 3.6657506361323158, "grad_norm": 4.766148178888486, "learning_rate": 2.0167809344287435e-06, "loss": 0.0648, "step": 180080 }, { "epoch": 3.6659541984732824, "grad_norm": 0.051282688075201144, "learning_rate": 2.016210729168601e-06, "loss": 0.0533, "step": 180090 }, { "epoch": 3.6661577608142495, "grad_norm": 0.10111068854005666, "learning_rate": 2.015640584169928e-06, "loss": 0.0434, "step": 180100 }, { "epoch": 3.666361323155216, "grad_norm": 0.040159677366045775, "learning_rate": 2.0150704994442326e-06, "loss": 0.0108, "step": 180110 }, { "epoch": 3.666564885496183, "grad_norm": 6.436883436401902, "learning_rate": 2.0145004750030354e-06, "loss": 0.0542, "step": 180120 }, { "epoch": 3.6667684478371503, "grad_norm": 0.02989474075469149, "learning_rate": 2.013930510857849e-06, "loss": 0.0574, "step": 180130 }, { "epoch": 3.666972010178117, "grad_norm": 0.04680004406833177, "learning_rate": 2.013360607020177e-06, "loss": 0.0492, "step": 180140 }, { "epoch": 3.667175572519084, "grad_norm": 0.12349158678974143, "learning_rate": 2.012790763501537e-06, "loss": 0.0259, "step": 180150 }, { "epoch": 3.6673791348600506, "grad_norm": 0.0745347672778247, "learning_rate": 2.0122209803134363e-06, "loss": 0.0389, "step": 180160 }, { "epoch": 3.6675826972010177, "grad_norm": 18.28277712614827, "learning_rate": 2.011651257467378e-06, "loss": 0.0324, "step": 180170 }, { "epoch": 3.667786259541985, "grad_norm": 2.269839585314903, "learning_rate": 2.011081594974875e-06, "loss": 0.0561, "step": 180180 }, { "epoch": 3.6679898218829514, "grad_norm": 0.13911159504324397, "learning_rate": 2.0105119928474263e-06, "loss": 0.0158, "step": 180190 }, { "epoch": 3.6681933842239185, "grad_norm": 0.06637751723369914, "learning_rate": 2.0099424510965376e-06, "loss": 0.0693, "step": 180200 }, { "epoch": 3.6683969465648856, "grad_norm": 0.015782569598192332, "learning_rate": 2.0093729697337155e-06, "loss": 0.0646, "step": 180210 }, { "epoch": 3.668600508905852, "grad_norm": 1.4634359832430104, "learning_rate": 2.0088035487704564e-06, "loss": 0.0017, "step": 180220 }, { "epoch": 3.6688040712468193, "grad_norm": 8.957162848764922, "learning_rate": 2.008234188218262e-06, "loss": 0.0563, "step": 180230 }, { "epoch": 3.6690076335877864, "grad_norm": 15.113861675034476, "learning_rate": 2.007664888088632e-06, "loss": 0.104, "step": 180240 }, { "epoch": 3.669211195928753, "grad_norm": 16.100093125094, "learning_rate": 2.0070956483930632e-06, "loss": 0.04, "step": 180250 }, { "epoch": 3.66941475826972, "grad_norm": 13.841758493736156, "learning_rate": 2.0065264691430532e-06, "loss": 0.0042, "step": 180260 }, { "epoch": 3.669618320610687, "grad_norm": 0.10161553394588053, "learning_rate": 2.005957350350096e-06, "loss": 0.0437, "step": 180270 }, { "epoch": 3.669821882951654, "grad_norm": 0.022914612264772937, "learning_rate": 2.005388292025687e-06, "loss": 0.0012, "step": 180280 }, { "epoch": 3.670025445292621, "grad_norm": 2.00403254021462, "learning_rate": 2.004819294181318e-06, "loss": 0.1529, "step": 180290 }, { "epoch": 3.670229007633588, "grad_norm": 0.011364397933631037, "learning_rate": 2.0042503568284816e-06, "loss": 0.0635, "step": 180300 }, { "epoch": 3.6704325699745546, "grad_norm": 0.7138202454488435, "learning_rate": 2.0036814799786676e-06, "loss": 0.0144, "step": 180310 }, { "epoch": 3.6706361323155217, "grad_norm": 10.624802861973839, "learning_rate": 2.003112663643365e-06, "loss": 0.0845, "step": 180320 }, { "epoch": 3.6708396946564887, "grad_norm": 0.020364111459495522, "learning_rate": 2.0025439078340624e-06, "loss": 0.0288, "step": 180330 }, { "epoch": 3.6710432569974554, "grad_norm": 0.030050010774072936, "learning_rate": 2.001975212562248e-06, "loss": 0.0365, "step": 180340 }, { "epoch": 3.6712468193384225, "grad_norm": 0.02912038629102485, "learning_rate": 2.0014065778394038e-06, "loss": 0.0712, "step": 180350 }, { "epoch": 3.6714503816793895, "grad_norm": 0.011814677839349, "learning_rate": 2.0008380036770142e-06, "loss": 0.0623, "step": 180360 }, { "epoch": 3.671653944020356, "grad_norm": 0.003899275094962932, "learning_rate": 2.000269490086568e-06, "loss": 0.0236, "step": 180370 }, { "epoch": 3.6718575063613232, "grad_norm": 0.0353676978545139, "learning_rate": 1.9997010370795406e-06, "loss": 0.1133, "step": 180380 }, { "epoch": 3.6720610687022903, "grad_norm": 0.13751342102437322, "learning_rate": 1.9991326446674153e-06, "loss": 0.0123, "step": 180390 }, { "epoch": 3.672264631043257, "grad_norm": 4.845993454381651, "learning_rate": 1.998564312861671e-06, "loss": 0.0763, "step": 180400 }, { "epoch": 3.672468193384224, "grad_norm": 0.28872031518635266, "learning_rate": 1.997996041673787e-06, "loss": 0.0192, "step": 180410 }, { "epoch": 3.672671755725191, "grad_norm": 19.936837122240192, "learning_rate": 1.997427831115239e-06, "loss": 0.0713, "step": 180420 }, { "epoch": 3.6728753180661577, "grad_norm": 0.04212102584709541, "learning_rate": 1.996859681197503e-06, "loss": 0.0909, "step": 180430 }, { "epoch": 3.673078880407125, "grad_norm": 0.05775215645958194, "learning_rate": 1.9962915919320542e-06, "loss": 0.054, "step": 180440 }, { "epoch": 3.6732824427480915, "grad_norm": 0.023294271486903027, "learning_rate": 1.9957235633303655e-06, "loss": 0.001, "step": 180450 }, { "epoch": 3.6734860050890585, "grad_norm": 0.035964730609881604, "learning_rate": 1.995155595403909e-06, "loss": 0.0671, "step": 180460 }, { "epoch": 3.673689567430025, "grad_norm": 0.051595465165795745, "learning_rate": 1.994587688164155e-06, "loss": 0.0289, "step": 180470 }, { "epoch": 3.6738931297709922, "grad_norm": 0.08090045388791246, "learning_rate": 1.9940198416225736e-06, "loss": 0.0769, "step": 180480 }, { "epoch": 3.6740966921119593, "grad_norm": 0.38748083974128467, "learning_rate": 1.9934520557906336e-06, "loss": 0.0503, "step": 180490 }, { "epoch": 3.674300254452926, "grad_norm": 0.006140316403299472, "learning_rate": 1.9928843306798034e-06, "loss": 0.0156, "step": 180500 }, { "epoch": 3.674503816793893, "grad_norm": 3.3915897793203875, "learning_rate": 1.992316666301544e-06, "loss": 0.145, "step": 180510 }, { "epoch": 3.67470737913486, "grad_norm": 0.09405095988682922, "learning_rate": 1.991749062667324e-06, "loss": 0.0319, "step": 180520 }, { "epoch": 3.6749109414758268, "grad_norm": 0.06164484522990554, "learning_rate": 1.9911815197886094e-06, "loss": 0.0435, "step": 180530 }, { "epoch": 3.675114503816794, "grad_norm": 0.5846232822585734, "learning_rate": 1.9906140376768548e-06, "loss": 0.0011, "step": 180540 }, { "epoch": 3.675318066157761, "grad_norm": 0.126233835515373, "learning_rate": 1.99004661634353e-06, "loss": 0.0511, "step": 180550 }, { "epoch": 3.6755216284987275, "grad_norm": 0.7544761738810903, "learning_rate": 1.9894792558000883e-06, "loss": 0.025, "step": 180560 }, { "epoch": 3.6757251908396946, "grad_norm": 0.15063748076035285, "learning_rate": 1.988911956057989e-06, "loss": 0.0538, "step": 180570 }, { "epoch": 3.6759287531806617, "grad_norm": 0.08381491857440564, "learning_rate": 1.9883447171286948e-06, "loss": 0.0104, "step": 180580 }, { "epoch": 3.6761323155216283, "grad_norm": 0.07376612892546976, "learning_rate": 1.9877775390236557e-06, "loss": 0.0249, "step": 180590 }, { "epoch": 3.6763358778625954, "grad_norm": 0.058228113009023974, "learning_rate": 1.9872104217543275e-06, "loss": 0.0623, "step": 180600 }, { "epoch": 3.6765394402035625, "grad_norm": 0.20749658942477478, "learning_rate": 1.986643365332169e-06, "loss": 0.0484, "step": 180610 }, { "epoch": 3.676743002544529, "grad_norm": 0.10185186440205378, "learning_rate": 1.986076369768627e-06, "loss": 0.0718, "step": 180620 }, { "epoch": 3.676946564885496, "grad_norm": 0.07688645395993801, "learning_rate": 1.985509435075154e-06, "loss": 0.0304, "step": 180630 }, { "epoch": 3.6771501272264633, "grad_norm": 0.07683712566349983, "learning_rate": 1.984942561263201e-06, "loss": 0.1238, "step": 180640 }, { "epoch": 3.67735368956743, "grad_norm": 20.85746770479052, "learning_rate": 1.9843757483442165e-06, "loss": 0.0053, "step": 180650 }, { "epoch": 3.677557251908397, "grad_norm": 0.007735585143996404, "learning_rate": 1.9838089963296483e-06, "loss": 0.0798, "step": 180660 }, { "epoch": 3.677760814249364, "grad_norm": 0.182010555362258, "learning_rate": 1.9832423052309414e-06, "loss": 0.0482, "step": 180670 }, { "epoch": 3.6779643765903307, "grad_norm": 0.011482704315836305, "learning_rate": 1.9826756750595422e-06, "loss": 0.1583, "step": 180680 }, { "epoch": 3.678167938931298, "grad_norm": 0.1263517883661855, "learning_rate": 1.982109105826894e-06, "loss": 0.0256, "step": 180690 }, { "epoch": 3.678371501272265, "grad_norm": 0.02015518075676141, "learning_rate": 1.981542597544439e-06, "loss": 0.0308, "step": 180700 }, { "epoch": 3.6785750636132315, "grad_norm": 0.007955175065611133, "learning_rate": 1.980976150223621e-06, "loss": 0.0393, "step": 180710 }, { "epoch": 3.6787786259541986, "grad_norm": 0.01825372296525141, "learning_rate": 1.9804097638758753e-06, "loss": 0.0303, "step": 180720 }, { "epoch": 3.6789821882951657, "grad_norm": 38.91330732490154, "learning_rate": 1.979843438512644e-06, "loss": 0.0647, "step": 180730 }, { "epoch": 3.6791857506361323, "grad_norm": 0.03987781253807987, "learning_rate": 1.979277174145367e-06, "loss": 0.0434, "step": 180740 }, { "epoch": 3.6793893129770994, "grad_norm": 0.05849879460752463, "learning_rate": 1.9787109707854744e-06, "loss": 0.016, "step": 180750 }, { "epoch": 3.679592875318066, "grad_norm": 0.027161908747100384, "learning_rate": 1.9781448284444087e-06, "loss": 0.0866, "step": 180760 }, { "epoch": 3.679796437659033, "grad_norm": 0.0053517332445375205, "learning_rate": 1.9775787471335987e-06, "loss": 0.025, "step": 180770 }, { "epoch": 3.68, "grad_norm": 13.375028761274852, "learning_rate": 1.977012726864479e-06, "loss": 0.0911, "step": 180780 }, { "epoch": 3.680203562340967, "grad_norm": 14.143559888664045, "learning_rate": 1.9764467676484806e-06, "loss": 0.0681, "step": 180790 }, { "epoch": 3.680407124681934, "grad_norm": 0.003824733739920162, "learning_rate": 1.975880869497034e-06, "loss": 0.0827, "step": 180800 }, { "epoch": 3.6806106870229005, "grad_norm": 0.03198540058596511, "learning_rate": 1.975315032421568e-06, "loss": 0.0095, "step": 180810 }, { "epoch": 3.6808142493638676, "grad_norm": 0.17076975874186082, "learning_rate": 1.974749256433511e-06, "loss": 0.0301, "step": 180820 }, { "epoch": 3.6810178117048347, "grad_norm": 82.54710971230517, "learning_rate": 1.974183541544289e-06, "loss": 0.0576, "step": 180830 }, { "epoch": 3.6812213740458013, "grad_norm": 0.07729257551328268, "learning_rate": 1.973617887765328e-06, "loss": 0.0213, "step": 180840 }, { "epoch": 3.6814249363867684, "grad_norm": 25.139061594055384, "learning_rate": 1.973052295108051e-06, "loss": 0.0759, "step": 180850 }, { "epoch": 3.6816284987277355, "grad_norm": 0.010279054697480777, "learning_rate": 1.9724867635838824e-06, "loss": 0.0029, "step": 180860 }, { "epoch": 3.681832061068702, "grad_norm": 0.04683217020459574, "learning_rate": 1.9719212932042427e-06, "loss": 0.0392, "step": 180870 }, { "epoch": 3.682035623409669, "grad_norm": 0.06780302876703523, "learning_rate": 1.9713558839805525e-06, "loss": 0.0574, "step": 180880 }, { "epoch": 3.6822391857506362, "grad_norm": 0.024941663351690322, "learning_rate": 1.9707905359242314e-06, "loss": 0.0456, "step": 180890 }, { "epoch": 3.682442748091603, "grad_norm": 2.735781988181575, "learning_rate": 1.9702252490466967e-06, "loss": 0.0023, "step": 180900 }, { "epoch": 3.68264631043257, "grad_norm": 0.09049070156143242, "learning_rate": 1.9696600233593655e-06, "loss": 0.0571, "step": 180910 }, { "epoch": 3.682849872773537, "grad_norm": 8.260611557840047, "learning_rate": 1.969094858873655e-06, "loss": 0.1345, "step": 180920 }, { "epoch": 3.6830534351145037, "grad_norm": 0.05427760579162923, "learning_rate": 1.9685297556009757e-06, "loss": 0.0234, "step": 180930 }, { "epoch": 3.6832569974554707, "grad_norm": 0.4341065690773685, "learning_rate": 1.967964713552741e-06, "loss": 0.008, "step": 180940 }, { "epoch": 3.683460559796438, "grad_norm": 0.20512712584717704, "learning_rate": 1.9673997327403672e-06, "loss": 0.0596, "step": 180950 }, { "epoch": 3.6836641221374045, "grad_norm": 13.968418717999015, "learning_rate": 1.9668348131752604e-06, "loss": 0.0276, "step": 180960 }, { "epoch": 3.6838676844783715, "grad_norm": 0.02063044694666482, "learning_rate": 1.966269954868829e-06, "loss": 0.0506, "step": 180970 }, { "epoch": 3.6840712468193386, "grad_norm": 0.05052331012739861, "learning_rate": 1.9657051578324877e-06, "loss": 0.1692, "step": 180980 }, { "epoch": 3.6842748091603053, "grad_norm": 0.24010297582434686, "learning_rate": 1.9651404220776364e-06, "loss": 0.0022, "step": 180990 }, { "epoch": 3.6844783715012723, "grad_norm": 0.034363452554310385, "learning_rate": 1.9645757476156833e-06, "loss": 0.077, "step": 181000 }, { "epoch": 3.6846819338422394, "grad_norm": 0.012173857776649526, "learning_rate": 1.9640111344580328e-06, "loss": 0.0938, "step": 181010 }, { "epoch": 3.684885496183206, "grad_norm": 0.022841538576678557, "learning_rate": 1.9634465826160876e-06, "loss": 0.001, "step": 181020 }, { "epoch": 3.685089058524173, "grad_norm": 4.83546011597484, "learning_rate": 1.9628820921012494e-06, "loss": 0.0484, "step": 181030 }, { "epoch": 3.68529262086514, "grad_norm": 0.011788033780499592, "learning_rate": 1.9623176629249195e-06, "loss": 0.065, "step": 181040 }, { "epoch": 3.685496183206107, "grad_norm": 0.2003834627373308, "learning_rate": 1.961753295098497e-06, "loss": 0.0725, "step": 181050 }, { "epoch": 3.685699745547074, "grad_norm": 1.318126456617742, "learning_rate": 1.9611889886333797e-06, "loss": 0.0717, "step": 181060 }, { "epoch": 3.6859033078880405, "grad_norm": 0.019579276425385254, "learning_rate": 1.9606247435409648e-06, "loss": 0.0611, "step": 181070 }, { "epoch": 3.6861068702290076, "grad_norm": 1.2933911280663646, "learning_rate": 1.9600605598326493e-06, "loss": 0.2137, "step": 181080 }, { "epoch": 3.6863104325699747, "grad_norm": 0.01881225816680868, "learning_rate": 1.9594964375198234e-06, "loss": 0.0243, "step": 181090 }, { "epoch": 3.6865139949109413, "grad_norm": 0.02328497070703696, "learning_rate": 1.958932376613884e-06, "loss": 0.0824, "step": 181100 }, { "epoch": 3.6867175572519084, "grad_norm": 15.142301470290397, "learning_rate": 1.9583683771262246e-06, "loss": 0.0662, "step": 181110 }, { "epoch": 3.686921119592875, "grad_norm": 0.407261702963994, "learning_rate": 1.957804439068229e-06, "loss": 0.0399, "step": 181120 }, { "epoch": 3.687124681933842, "grad_norm": 0.015662248813260858, "learning_rate": 1.957240562451296e-06, "loss": 0.0702, "step": 181130 }, { "epoch": 3.687328244274809, "grad_norm": 0.013115461186087744, "learning_rate": 1.9566767472868065e-06, "loss": 0.051, "step": 181140 }, { "epoch": 3.687531806615776, "grad_norm": 17.149641965513556, "learning_rate": 1.956112993586148e-06, "loss": 0.0436, "step": 181150 }, { "epoch": 3.687735368956743, "grad_norm": 1.329138755343155, "learning_rate": 1.9555493013607113e-06, "loss": 0.1167, "step": 181160 }, { "epoch": 3.68793893129771, "grad_norm": 0.21379717424096245, "learning_rate": 1.954985670621876e-06, "loss": 0.0014, "step": 181170 }, { "epoch": 3.6881424936386766, "grad_norm": 0.049759037653744884, "learning_rate": 1.9544221013810254e-06, "loss": 0.0765, "step": 181180 }, { "epoch": 3.6883460559796437, "grad_norm": 0.051387324091185016, "learning_rate": 1.953858593649547e-06, "loss": 0.0328, "step": 181190 }, { "epoch": 3.688549618320611, "grad_norm": 0.010214995979872134, "learning_rate": 1.9532951474388154e-06, "loss": 0.0474, "step": 181200 }, { "epoch": 3.6887531806615774, "grad_norm": 0.038439958890278604, "learning_rate": 1.9527317627602127e-06, "loss": 0.0329, "step": 181210 }, { "epoch": 3.6889567430025445, "grad_norm": 0.2285112201988921, "learning_rate": 1.952168439625117e-06, "loss": 0.0695, "step": 181220 }, { "epoch": 3.6891603053435116, "grad_norm": 14.386183951556074, "learning_rate": 1.951605178044905e-06, "loss": 0.0285, "step": 181230 }, { "epoch": 3.689363867684478, "grad_norm": 0.16133716543287696, "learning_rate": 1.951041978030953e-06, "loss": 0.0267, "step": 181240 }, { "epoch": 3.6895674300254453, "grad_norm": 13.77077673133917, "learning_rate": 1.950478839594635e-06, "loss": 0.0708, "step": 181250 }, { "epoch": 3.6897709923664124, "grad_norm": 0.024560863875438262, "learning_rate": 1.949915762747325e-06, "loss": 0.081, "step": 181260 }, { "epoch": 3.689974554707379, "grad_norm": 0.01654761678276424, "learning_rate": 1.949352747500394e-06, "loss": 0.0776, "step": 181270 }, { "epoch": 3.690178117048346, "grad_norm": 0.003362995164568495, "learning_rate": 1.948789793865214e-06, "loss": 0.0284, "step": 181280 }, { "epoch": 3.690381679389313, "grad_norm": 0.019342424175057174, "learning_rate": 1.9482269018531556e-06, "loss": 0.0525, "step": 181290 }, { "epoch": 3.69058524173028, "grad_norm": 9.217748421401255, "learning_rate": 1.9476640714755825e-06, "loss": 0.0713, "step": 181300 }, { "epoch": 3.690788804071247, "grad_norm": 0.003567752216395145, "learning_rate": 1.9471013027438663e-06, "loss": 0.0483, "step": 181310 }, { "epoch": 3.690992366412214, "grad_norm": 0.06182392469053916, "learning_rate": 1.9465385956693727e-06, "loss": 0.0993, "step": 181320 }, { "epoch": 3.6911959287531806, "grad_norm": 0.13932071208001776, "learning_rate": 1.9459759502634635e-06, "loss": 0.0518, "step": 181330 }, { "epoch": 3.6913994910941477, "grad_norm": 3.9916309296479824, "learning_rate": 1.9454133665375017e-06, "loss": 0.0315, "step": 181340 }, { "epoch": 3.6916030534351147, "grad_norm": 0.05413380655225406, "learning_rate": 1.9448508445028553e-06, "loss": 0.0018, "step": 181350 }, { "epoch": 3.6918066157760814, "grad_norm": 0.0552158717177305, "learning_rate": 1.9442883841708787e-06, "loss": 0.0663, "step": 181360 }, { "epoch": 3.6920101781170485, "grad_norm": 0.4435942263741162, "learning_rate": 1.9437259855529345e-06, "loss": 0.0525, "step": 181370 }, { "epoch": 3.6922137404580155, "grad_norm": 0.29791004803322735, "learning_rate": 1.94316364866038e-06, "loss": 0.0021, "step": 181380 }, { "epoch": 3.692417302798982, "grad_norm": 0.10346889081313311, "learning_rate": 1.942601373504573e-06, "loss": 0.1752, "step": 181390 }, { "epoch": 3.6926208651399492, "grad_norm": 0.01972816601837339, "learning_rate": 1.942039160096869e-06, "loss": 0.1166, "step": 181400 }, { "epoch": 3.692824427480916, "grad_norm": 21.407466623239504, "learning_rate": 1.941477008448623e-06, "loss": 0.0545, "step": 181410 }, { "epoch": 3.693027989821883, "grad_norm": 0.04682500260881937, "learning_rate": 1.940914918571188e-06, "loss": 0.0014, "step": 181420 }, { "epoch": 3.6932315521628496, "grad_norm": 3.613930321393424, "learning_rate": 1.9403528904759166e-06, "loss": 0.0319, "step": 181430 }, { "epoch": 3.6934351145038167, "grad_norm": 18.760191361126957, "learning_rate": 1.939790924174159e-06, "loss": 0.0016, "step": 181440 }, { "epoch": 3.6936386768447838, "grad_norm": 0.2536718153052173, "learning_rate": 1.939229019677265e-06, "loss": 0.0218, "step": 181450 }, { "epoch": 3.6938422391857504, "grad_norm": 8.109679721380394, "learning_rate": 1.9386671769965835e-06, "loss": 0.0073, "step": 181460 }, { "epoch": 3.6940458015267175, "grad_norm": 0.0034038545776247167, "learning_rate": 1.9381053961434613e-06, "loss": 0.0472, "step": 181470 }, { "epoch": 3.6942493638676845, "grad_norm": 7.613884170096684, "learning_rate": 1.9375436771292467e-06, "loss": 0.0873, "step": 181480 }, { "epoch": 3.694452926208651, "grad_norm": 0.04314589553731849, "learning_rate": 1.936982019965278e-06, "loss": 0.0665, "step": 181490 }, { "epoch": 3.6946564885496183, "grad_norm": 0.008649496673179633, "learning_rate": 1.9364204246629055e-06, "loss": 0.0383, "step": 181500 }, { "epoch": 3.6948600508905853, "grad_norm": 0.09207950226206185, "learning_rate": 1.935858891233467e-06, "loss": 0.0296, "step": 181510 }, { "epoch": 3.695063613231552, "grad_norm": 0.023447082353486037, "learning_rate": 1.9352974196883036e-06, "loss": 0.1368, "step": 181520 }, { "epoch": 3.695267175572519, "grad_norm": 0.012786907437535239, "learning_rate": 1.9347360100387584e-06, "loss": 0.001, "step": 181530 }, { "epoch": 3.695470737913486, "grad_norm": 0.04511208804316657, "learning_rate": 1.9341746622961666e-06, "loss": 0.0319, "step": 181540 }, { "epoch": 3.6956743002544528, "grad_norm": 0.032220019039373805, "learning_rate": 1.9336133764718645e-06, "loss": 0.0006, "step": 181550 }, { "epoch": 3.69587786259542, "grad_norm": 0.041106574264818715, "learning_rate": 1.9330521525771935e-06, "loss": 0.0244, "step": 181560 }, { "epoch": 3.696081424936387, "grad_norm": 0.03636833350473315, "learning_rate": 1.9324909906234817e-06, "loss": 0.0106, "step": 181570 }, { "epoch": 3.6962849872773536, "grad_norm": 0.002699871712403423, "learning_rate": 1.9319298906220664e-06, "loss": 0.0889, "step": 181580 }, { "epoch": 3.6964885496183206, "grad_norm": 0.09989120464491416, "learning_rate": 1.9313688525842777e-06, "loss": 0.0334, "step": 181590 }, { "epoch": 3.6966921119592877, "grad_norm": 0.13015301666315562, "learning_rate": 1.930807876521448e-06, "loss": 0.0381, "step": 181600 }, { "epoch": 3.6968956743002543, "grad_norm": 0.010661490157809027, "learning_rate": 1.9302469624449062e-06, "loss": 0.0071, "step": 181610 }, { "epoch": 3.6970992366412214, "grad_norm": 35.98095643042293, "learning_rate": 1.9296861103659802e-06, "loss": 0.0436, "step": 181620 }, { "epoch": 3.6973027989821885, "grad_norm": 0.004516223557992461, "learning_rate": 1.9291253202959984e-06, "loss": 0.0518, "step": 181630 }, { "epoch": 3.697506361323155, "grad_norm": 0.029457830594971462, "learning_rate": 1.9285645922462858e-06, "loss": 0.0992, "step": 181640 }, { "epoch": 3.697709923664122, "grad_norm": 0.058247985077376434, "learning_rate": 1.9280039262281674e-06, "loss": 0.0567, "step": 181650 }, { "epoch": 3.6979134860050893, "grad_norm": 0.06841106381579347, "learning_rate": 1.9274433222529683e-06, "loss": 0.0006, "step": 181660 }, { "epoch": 3.698117048346056, "grad_norm": 15.829367359901148, "learning_rate": 1.926882780332005e-06, "loss": 0.036, "step": 181670 }, { "epoch": 3.698320610687023, "grad_norm": 6.769952411753308, "learning_rate": 1.9263223004766035e-06, "loss": 0.0547, "step": 181680 }, { "epoch": 3.69852417302799, "grad_norm": 18.69148256412312, "learning_rate": 1.9257618826980846e-06, "loss": 0.0619, "step": 181690 }, { "epoch": 3.6987277353689567, "grad_norm": 0.08822427520867143, "learning_rate": 1.9252015270077596e-06, "loss": 0.0223, "step": 181700 }, { "epoch": 3.698931297709924, "grad_norm": 8.850403413796158, "learning_rate": 1.924641233416952e-06, "loss": 0.1458, "step": 181710 }, { "epoch": 3.6991348600508904, "grad_norm": 0.022709816968409357, "learning_rate": 1.924081001936978e-06, "loss": 0.0349, "step": 181720 }, { "epoch": 3.6993384223918575, "grad_norm": 35.501793216294445, "learning_rate": 1.9235208325791466e-06, "loss": 0.0028, "step": 181730 }, { "epoch": 3.6995419847328246, "grad_norm": 37.39752502043451, "learning_rate": 1.922960725354777e-06, "loss": 0.0906, "step": 181740 }, { "epoch": 3.699745547073791, "grad_norm": 0.024885964025197105, "learning_rate": 1.9224006802751776e-06, "loss": 0.0107, "step": 181750 }, { "epoch": 3.6999491094147583, "grad_norm": 14.271113144071018, "learning_rate": 1.9218406973516607e-06, "loss": 0.0694, "step": 181760 }, { "epoch": 3.700152671755725, "grad_norm": 26.598635987124684, "learning_rate": 1.9212807765955347e-06, "loss": 0.0486, "step": 181770 }, { "epoch": 3.700356234096692, "grad_norm": 11.774383165652022, "learning_rate": 1.9207209180181095e-06, "loss": 0.0102, "step": 181780 }, { "epoch": 3.700559796437659, "grad_norm": 31.94161546885892, "learning_rate": 1.9201611216306916e-06, "loss": 0.0553, "step": 181790 }, { "epoch": 3.7007633587786257, "grad_norm": 0.5110302592481368, "learning_rate": 1.919601387444586e-06, "loss": 0.0526, "step": 181800 }, { "epoch": 3.700966921119593, "grad_norm": 0.0649423662504972, "learning_rate": 1.9190417154710977e-06, "loss": 0.0522, "step": 181810 }, { "epoch": 3.70117048346056, "grad_norm": 0.5240868888993444, "learning_rate": 1.9184821057215307e-06, "loss": 0.077, "step": 181820 }, { "epoch": 3.7013740458015265, "grad_norm": 5.113122140491854, "learning_rate": 1.917922558207187e-06, "loss": 0.0135, "step": 181830 }, { "epoch": 3.7015776081424936, "grad_norm": 0.005239432461301357, "learning_rate": 1.9173630729393662e-06, "loss": 0.0947, "step": 181840 }, { "epoch": 3.7017811704834607, "grad_norm": 0.02671576328949989, "learning_rate": 1.9168036499293684e-06, "loss": 0.0573, "step": 181850 }, { "epoch": 3.7019847328244273, "grad_norm": 0.03893905116273873, "learning_rate": 1.9162442891884924e-06, "loss": 0.0933, "step": 181860 }, { "epoch": 3.7021882951653944, "grad_norm": 0.23220328940621338, "learning_rate": 1.915684990728037e-06, "loss": 0.0603, "step": 181870 }, { "epoch": 3.7023918575063615, "grad_norm": 12.976691956889836, "learning_rate": 1.9151257545592934e-06, "loss": 0.0265, "step": 181880 }, { "epoch": 3.702595419847328, "grad_norm": 1.8355676809928172, "learning_rate": 1.914566580693557e-06, "loss": 0.0477, "step": 181890 }, { "epoch": 3.702798982188295, "grad_norm": 0.02616854104207014, "learning_rate": 1.9140074691421255e-06, "loss": 0.0002, "step": 181900 }, { "epoch": 3.7030025445292623, "grad_norm": 0.16032282370296222, "learning_rate": 1.9134484199162865e-06, "loss": 0.0423, "step": 181910 }, { "epoch": 3.703206106870229, "grad_norm": 0.008430446633692128, "learning_rate": 1.912889433027331e-06, "loss": 0.1143, "step": 181920 }, { "epoch": 3.703409669211196, "grad_norm": 0.07458450184845197, "learning_rate": 1.9123305084865523e-06, "loss": 0.0024, "step": 181930 }, { "epoch": 3.703613231552163, "grad_norm": 0.060013243237314474, "learning_rate": 1.9117716463052346e-06, "loss": 0.0036, "step": 181940 }, { "epoch": 3.7038167938931297, "grad_norm": 0.02453897357328343, "learning_rate": 1.9112128464946654e-06, "loss": 0.0142, "step": 181950 }, { "epoch": 3.7040203562340968, "grad_norm": 1.9369063801399466, "learning_rate": 1.9106541090661317e-06, "loss": 0.0387, "step": 181960 }, { "epoch": 3.704223918575064, "grad_norm": 0.014159683775196589, "learning_rate": 1.910095434030917e-06, "loss": 0.043, "step": 181970 }, { "epoch": 3.7044274809160305, "grad_norm": 0.045099016635597775, "learning_rate": 1.909536821400305e-06, "loss": 0.0337, "step": 181980 }, { "epoch": 3.7046310432569975, "grad_norm": 0.2482958766693322, "learning_rate": 1.9089782711855776e-06, "loss": 0.0054, "step": 181990 }, { "epoch": 3.7048346055979646, "grad_norm": 0.014093973316663495, "learning_rate": 1.9084197833980145e-06, "loss": 0.0507, "step": 182000 }, { "epoch": 3.7050381679389313, "grad_norm": 0.10570529663148391, "learning_rate": 1.9078613580488964e-06, "loss": 0.0078, "step": 182010 }, { "epoch": 3.7052417302798983, "grad_norm": 0.010384015679790727, "learning_rate": 1.9073029951495004e-06, "loss": 0.0014, "step": 182020 }, { "epoch": 3.7054452926208654, "grad_norm": 5.473350257097777, "learning_rate": 1.9067446947111058e-06, "loss": 0.0745, "step": 182030 }, { "epoch": 3.705648854961832, "grad_norm": 0.022104621494281065, "learning_rate": 1.906186456744983e-06, "loss": 0.0957, "step": 182040 }, { "epoch": 3.705852417302799, "grad_norm": 34.389232122824325, "learning_rate": 1.9056282812624106e-06, "loss": 0.0549, "step": 182050 }, { "epoch": 3.7060559796437658, "grad_norm": 13.619246447408614, "learning_rate": 1.9050701682746625e-06, "loss": 0.109, "step": 182060 }, { "epoch": 3.706259541984733, "grad_norm": 15.91139144736427, "learning_rate": 1.9045121177930053e-06, "loss": 0.0606, "step": 182070 }, { "epoch": 3.7064631043256995, "grad_norm": 0.5233836146795582, "learning_rate": 1.9039541298287144e-06, "loss": 0.0789, "step": 182080 }, { "epoch": 3.7066666666666666, "grad_norm": 0.03220374768139414, "learning_rate": 1.9033962043930593e-06, "loss": 0.0401, "step": 182090 }, { "epoch": 3.7068702290076336, "grad_norm": 22.608739367686606, "learning_rate": 1.9028383414973029e-06, "loss": 0.0362, "step": 182100 }, { "epoch": 3.7070737913486003, "grad_norm": 0.05998965871772405, "learning_rate": 1.902280541152719e-06, "loss": 0.0541, "step": 182110 }, { "epoch": 3.7072773536895673, "grad_norm": 32.90513241290509, "learning_rate": 1.901722803370567e-06, "loss": 0.0314, "step": 182120 }, { "epoch": 3.7074809160305344, "grad_norm": 0.46598135749661945, "learning_rate": 1.9011651281621124e-06, "loss": 0.0214, "step": 182130 }, { "epoch": 3.707684478371501, "grad_norm": 28.169666164646877, "learning_rate": 1.9006075155386222e-06, "loss": 0.1449, "step": 182140 }, { "epoch": 3.707888040712468, "grad_norm": 0.17547165466734735, "learning_rate": 1.9000499655113541e-06, "loss": 0.0371, "step": 182150 }, { "epoch": 3.708091603053435, "grad_norm": 12.734388171847268, "learning_rate": 1.8994924780915691e-06, "loss": 0.1491, "step": 182160 }, { "epoch": 3.708295165394402, "grad_norm": 0.15958774085975247, "learning_rate": 1.8989350532905276e-06, "loss": 0.0316, "step": 182170 }, { "epoch": 3.708498727735369, "grad_norm": 0.08491601327972859, "learning_rate": 1.8983776911194863e-06, "loss": 0.0587, "step": 182180 }, { "epoch": 3.708702290076336, "grad_norm": 0.1088663681304562, "learning_rate": 1.8978203915897025e-06, "loss": 0.0267, "step": 182190 }, { "epoch": 3.7089058524173026, "grad_norm": 9.876988449113695, "learning_rate": 1.8972631547124316e-06, "loss": 0.0488, "step": 182200 }, { "epoch": 3.7091094147582697, "grad_norm": 0.058689749696830426, "learning_rate": 1.8967059804989275e-06, "loss": 0.0742, "step": 182210 }, { "epoch": 3.709312977099237, "grad_norm": 39.513946629049755, "learning_rate": 1.896148868960443e-06, "loss": 0.0313, "step": 182220 }, { "epoch": 3.7095165394402034, "grad_norm": 0.04871499178174316, "learning_rate": 1.8955918201082301e-06, "loss": 0.0036, "step": 182230 }, { "epoch": 3.7097201017811705, "grad_norm": 0.025821394996885735, "learning_rate": 1.8950348339535403e-06, "loss": 0.0476, "step": 182240 }, { "epoch": 3.7099236641221376, "grad_norm": 0.032563844658540717, "learning_rate": 1.8944779105076177e-06, "loss": 0.0484, "step": 182250 }, { "epoch": 3.710127226463104, "grad_norm": 0.014550019041158702, "learning_rate": 1.8939210497817157e-06, "loss": 0.0172, "step": 182260 }, { "epoch": 3.7103307888040713, "grad_norm": 0.028928174536645927, "learning_rate": 1.8933642517870803e-06, "loss": 0.1059, "step": 182270 }, { "epoch": 3.7105343511450384, "grad_norm": 0.02314929585947265, "learning_rate": 1.8928075165349513e-06, "loss": 0.0709, "step": 182280 }, { "epoch": 3.710737913486005, "grad_norm": 0.020438395102749814, "learning_rate": 1.892250844036579e-06, "loss": 0.0481, "step": 182290 }, { "epoch": 3.710941475826972, "grad_norm": 0.0476961391434314, "learning_rate": 1.8916942343032046e-06, "loss": 0.0435, "step": 182300 }, { "epoch": 3.711145038167939, "grad_norm": 0.33497052803163285, "learning_rate": 1.8911376873460674e-06, "loss": 0.0104, "step": 182310 }, { "epoch": 3.711348600508906, "grad_norm": 0.3270179428410384, "learning_rate": 1.8905812031764086e-06, "loss": 0.0482, "step": 182320 }, { "epoch": 3.711552162849873, "grad_norm": 0.9681173626035133, "learning_rate": 1.8900247818054673e-06, "loss": 0.0012, "step": 182330 }, { "epoch": 3.71175572519084, "grad_norm": 0.03523915900135101, "learning_rate": 1.889468423244481e-06, "loss": 0.0411, "step": 182340 }, { "epoch": 3.7119592875318066, "grad_norm": 8.265857354187359, "learning_rate": 1.8889121275046869e-06, "loss": 0.0847, "step": 182350 }, { "epoch": 3.7121628498727737, "grad_norm": 0.147571034189907, "learning_rate": 1.8883558945973184e-06, "loss": 0.0421, "step": 182360 }, { "epoch": 3.7123664122137403, "grad_norm": 10.545663061293936, "learning_rate": 1.887799724533611e-06, "loss": 0.0901, "step": 182370 }, { "epoch": 3.7125699745547074, "grad_norm": 0.7357639876325603, "learning_rate": 1.8872436173247965e-06, "loss": 0.0257, "step": 182380 }, { "epoch": 3.7127735368956745, "grad_norm": 0.15857084331035562, "learning_rate": 1.8866875729821066e-06, "loss": 0.0459, "step": 182390 }, { "epoch": 3.712977099236641, "grad_norm": 0.07819591588620417, "learning_rate": 1.8861315915167705e-06, "loss": 0.0411, "step": 182400 }, { "epoch": 3.713180661577608, "grad_norm": 21.846434218935382, "learning_rate": 1.8855756729400182e-06, "loss": 0.0961, "step": 182410 }, { "epoch": 3.713384223918575, "grad_norm": 0.03364719767236057, "learning_rate": 1.8850198172630763e-06, "loss": 0.0505, "step": 182420 }, { "epoch": 3.713587786259542, "grad_norm": 0.07166855756837574, "learning_rate": 1.8844640244971713e-06, "loss": 0.0329, "step": 182430 }, { "epoch": 3.713791348600509, "grad_norm": 16.354046888802554, "learning_rate": 1.8839082946535282e-06, "loss": 0.0845, "step": 182440 }, { "epoch": 3.7139949109414756, "grad_norm": 0.11405758741647118, "learning_rate": 1.88335262774337e-06, "loss": 0.0809, "step": 182450 }, { "epoch": 3.7141984732824427, "grad_norm": 0.005793145970305882, "learning_rate": 1.882797023777922e-06, "loss": 0.1649, "step": 182460 }, { "epoch": 3.7144020356234098, "grad_norm": 0.10416557176209745, "learning_rate": 1.8822414827683994e-06, "loss": 0.003, "step": 182470 }, { "epoch": 3.7146055979643764, "grad_norm": 8.817767241663018, "learning_rate": 1.8816860047260288e-06, "loss": 0.0987, "step": 182480 }, { "epoch": 3.7148091603053435, "grad_norm": 0.034361038770631354, "learning_rate": 1.8811305896620246e-06, "loss": 0.048, "step": 182490 }, { "epoch": 3.7150127226463106, "grad_norm": 5.636427282198449, "learning_rate": 1.880575237587603e-06, "loss": 0.0467, "step": 182500 }, { "epoch": 3.715216284987277, "grad_norm": 0.28743356557874306, "learning_rate": 1.8800199485139859e-06, "loss": 0.0133, "step": 182510 }, { "epoch": 3.7154198473282443, "grad_norm": 0.010586417508163588, "learning_rate": 1.8794647224523826e-06, "loss": 0.0411, "step": 182520 }, { "epoch": 3.7156234096692113, "grad_norm": 0.19139347348203686, "learning_rate": 1.878909559414009e-06, "loss": 0.0366, "step": 182530 }, { "epoch": 3.715826972010178, "grad_norm": 0.028880349043701823, "learning_rate": 1.8783544594100761e-06, "loss": 0.0778, "step": 182540 }, { "epoch": 3.716030534351145, "grad_norm": 0.00516622237127451, "learning_rate": 1.877799422451796e-06, "loss": 0.0027, "step": 182550 }, { "epoch": 3.716234096692112, "grad_norm": 0.04489755707875885, "learning_rate": 1.8772444485503777e-06, "loss": 0.0745, "step": 182560 }, { "epoch": 3.7164376590330788, "grad_norm": 0.04661519630206813, "learning_rate": 1.8766895377170297e-06, "loss": 0.0119, "step": 182570 }, { "epoch": 3.716641221374046, "grad_norm": 0.04210204455111167, "learning_rate": 1.8761346899629596e-06, "loss": 0.111, "step": 182580 }, { "epoch": 3.716844783715013, "grad_norm": 0.12249277188239582, "learning_rate": 1.875579905299373e-06, "loss": 0.0221, "step": 182590 }, { "epoch": 3.7170483460559796, "grad_norm": 12.512148608582327, "learning_rate": 1.875025183737474e-06, "loss": 0.0869, "step": 182600 }, { "epoch": 3.7172519083969466, "grad_norm": 0.19370507380197136, "learning_rate": 1.8744705252884682e-06, "loss": 0.0679, "step": 182610 }, { "epoch": 3.7174554707379137, "grad_norm": 0.04599875214327146, "learning_rate": 1.873915929963553e-06, "loss": 0.0013, "step": 182620 }, { "epoch": 3.7176590330788803, "grad_norm": 8.656236227697168, "learning_rate": 1.873361397773933e-06, "loss": 0.1171, "step": 182630 }, { "epoch": 3.7178625954198474, "grad_norm": 1.1167421376996585, "learning_rate": 1.8728069287308082e-06, "loss": 0.0015, "step": 182640 }, { "epoch": 3.7180661577608145, "grad_norm": 0.02812549807162804, "learning_rate": 1.8722525228453725e-06, "loss": 0.0154, "step": 182650 }, { "epoch": 3.718269720101781, "grad_norm": 0.168185521772075, "learning_rate": 1.871698180128827e-06, "loss": 0.1003, "step": 182660 }, { "epoch": 3.718473282442748, "grad_norm": 47.8801769580026, "learning_rate": 1.8711439005923676e-06, "loss": 0.0465, "step": 182670 }, { "epoch": 3.718676844783715, "grad_norm": 0.005066899095679003, "learning_rate": 1.8705896842471837e-06, "loss": 0.1265, "step": 182680 }, { "epoch": 3.718880407124682, "grad_norm": 0.1561504408554695, "learning_rate": 1.8700355311044749e-06, "loss": 0.0325, "step": 182690 }, { "epoch": 3.719083969465649, "grad_norm": 0.021867645330155394, "learning_rate": 1.8694814411754285e-06, "loss": 0.1098, "step": 182700 }, { "epoch": 3.7192875318066156, "grad_norm": 0.08353416721067046, "learning_rate": 1.8689274144712349e-06, "loss": 0.0148, "step": 182710 }, { "epoch": 3.7194910941475827, "grad_norm": 0.272606360011296, "learning_rate": 1.8683734510030882e-06, "loss": 0.0033, "step": 182720 }, { "epoch": 3.7196946564885494, "grad_norm": 0.02025615791919297, "learning_rate": 1.8678195507821717e-06, "loss": 0.0098, "step": 182730 }, { "epoch": 3.7198982188295164, "grad_norm": 0.11654286826118401, "learning_rate": 1.867265713819673e-06, "loss": 0.1791, "step": 182740 }, { "epoch": 3.7201017811704835, "grad_norm": 0.009779038085780355, "learning_rate": 1.8667119401267786e-06, "loss": 0.0339, "step": 182750 }, { "epoch": 3.72030534351145, "grad_norm": 0.034889551950109524, "learning_rate": 1.8661582297146724e-06, "loss": 0.1013, "step": 182760 }, { "epoch": 3.7205089058524172, "grad_norm": 0.010365060776221402, "learning_rate": 1.8656045825945367e-06, "loss": 0.0009, "step": 182770 }, { "epoch": 3.7207124681933843, "grad_norm": 0.007338454887882053, "learning_rate": 1.8650509987775539e-06, "loss": 0.044, "step": 182780 }, { "epoch": 3.720916030534351, "grad_norm": 0.2941953079192854, "learning_rate": 1.8644974782749038e-06, "loss": 0.0317, "step": 182790 }, { "epoch": 3.721119592875318, "grad_norm": 3.027237371770229, "learning_rate": 1.8639440210977656e-06, "loss": 0.1818, "step": 182800 }, { "epoch": 3.721323155216285, "grad_norm": 0.014412412548982137, "learning_rate": 1.8633906272573172e-06, "loss": 0.0793, "step": 182810 }, { "epoch": 3.7215267175572517, "grad_norm": 0.079154022955422, "learning_rate": 1.8628372967647345e-06, "loss": 0.0328, "step": 182820 }, { "epoch": 3.721730279898219, "grad_norm": 0.00473817550912204, "learning_rate": 1.8622840296311938e-06, "loss": 0.0469, "step": 182830 }, { "epoch": 3.721933842239186, "grad_norm": 0.20341846684262804, "learning_rate": 1.8617308258678679e-06, "loss": 0.0027, "step": 182840 }, { "epoch": 3.7221374045801525, "grad_norm": 9.823178092396889, "learning_rate": 1.8611776854859325e-06, "loss": 0.0223, "step": 182850 }, { "epoch": 3.7223409669211196, "grad_norm": 9.546101950014638, "learning_rate": 1.860624608496554e-06, "loss": 0.0587, "step": 182860 }, { "epoch": 3.7225445292620867, "grad_norm": 0.09321192131294448, "learning_rate": 1.8600715949109037e-06, "loss": 0.0156, "step": 182870 }, { "epoch": 3.7227480916030533, "grad_norm": 0.02622212582014967, "learning_rate": 1.8595186447401554e-06, "loss": 0.0234, "step": 182880 }, { "epoch": 3.7229516539440204, "grad_norm": 0.7278254072539797, "learning_rate": 1.8589657579954711e-06, "loss": 0.0046, "step": 182890 }, { "epoch": 3.7231552162849875, "grad_norm": 0.12370674890421152, "learning_rate": 1.8584129346880192e-06, "loss": 0.0164, "step": 182900 }, { "epoch": 3.723358778625954, "grad_norm": 0.013493671498200018, "learning_rate": 1.8578601748289638e-06, "loss": 0.0667, "step": 182910 }, { "epoch": 3.723562340966921, "grad_norm": 0.29112050359287267, "learning_rate": 1.8573074784294699e-06, "loss": 0.0246, "step": 182920 }, { "epoch": 3.7237659033078883, "grad_norm": 0.016719572567954262, "learning_rate": 1.8567548455006985e-06, "loss": 0.1646, "step": 182930 }, { "epoch": 3.723969465648855, "grad_norm": 0.033293500380189356, "learning_rate": 1.8562022760538117e-06, "loss": 0.0378, "step": 182940 }, { "epoch": 3.724173027989822, "grad_norm": 0.05446536483002069, "learning_rate": 1.8556497700999694e-06, "loss": 0.0723, "step": 182950 }, { "epoch": 3.724376590330789, "grad_norm": 0.009890871854476568, "learning_rate": 1.8550973276503298e-06, "loss": 0.1493, "step": 182960 }, { "epoch": 3.7245801526717557, "grad_norm": 4.597407850478218, "learning_rate": 1.8545449487160499e-06, "loss": 0.0621, "step": 182970 }, { "epoch": 3.7247837150127228, "grad_norm": 0.9023335329750112, "learning_rate": 1.8539926333082863e-06, "loss": 0.0096, "step": 182980 }, { "epoch": 3.72498727735369, "grad_norm": 0.027761276107088124, "learning_rate": 1.853440381438194e-06, "loss": 0.0039, "step": 182990 }, { "epoch": 3.7251908396946565, "grad_norm": 0.006147859312334748, "learning_rate": 1.8528881931169252e-06, "loss": 0.0443, "step": 183000 }, { "epoch": 3.7253944020356236, "grad_norm": 1.6848073463813635, "learning_rate": 1.8523360683556353e-06, "loss": 0.0288, "step": 183010 }, { "epoch": 3.72559796437659, "grad_norm": 16.194568880631945, "learning_rate": 1.8517840071654686e-06, "loss": 0.0593, "step": 183020 }, { "epoch": 3.7258015267175573, "grad_norm": 0.024737037030104558, "learning_rate": 1.8512320095575803e-06, "loss": 0.0013, "step": 183030 }, { "epoch": 3.726005089058524, "grad_norm": 0.04093120650149712, "learning_rate": 1.8506800755431192e-06, "loss": 0.0851, "step": 183040 }, { "epoch": 3.726208651399491, "grad_norm": 0.09029874239038553, "learning_rate": 1.850128205133227e-06, "loss": 0.0119, "step": 183050 }, { "epoch": 3.726412213740458, "grad_norm": 0.0074834045717335415, "learning_rate": 1.8495763983390558e-06, "loss": 0.0337, "step": 183060 }, { "epoch": 3.7266157760814247, "grad_norm": 0.011511954053591296, "learning_rate": 1.8490246551717456e-06, "loss": 0.039, "step": 183070 }, { "epoch": 3.7268193384223918, "grad_norm": 0.2589745302254564, "learning_rate": 1.8484729756424386e-06, "loss": 0.0102, "step": 183080 }, { "epoch": 3.727022900763359, "grad_norm": 15.678223338626784, "learning_rate": 1.847921359762283e-06, "loss": 0.0136, "step": 183090 }, { "epoch": 3.7272264631043255, "grad_norm": 0.09400383393782694, "learning_rate": 1.8473698075424134e-06, "loss": 0.0235, "step": 183100 }, { "epoch": 3.7274300254452926, "grad_norm": 0.9838736110904265, "learning_rate": 1.8468183189939692e-06, "loss": 0.0355, "step": 183110 }, { "epoch": 3.7276335877862596, "grad_norm": 0.4057698829164369, "learning_rate": 1.8462668941280942e-06, "loss": 0.0556, "step": 183120 }, { "epoch": 3.7278371501272263, "grad_norm": 0.06693926091893045, "learning_rate": 1.8457155329559196e-06, "loss": 0.0009, "step": 183130 }, { "epoch": 3.7280407124681934, "grad_norm": 14.752819374827638, "learning_rate": 1.8451642354885818e-06, "loss": 0.1465, "step": 183140 }, { "epoch": 3.7282442748091604, "grad_norm": 1.209172940091198, "learning_rate": 1.8446130017372161e-06, "loss": 0.0679, "step": 183150 }, { "epoch": 3.728447837150127, "grad_norm": 0.016949195844054674, "learning_rate": 1.844061831712955e-06, "loss": 0.0402, "step": 183160 }, { "epoch": 3.728651399491094, "grad_norm": 3.383240131550392, "learning_rate": 1.8435107254269303e-06, "loss": 0.1536, "step": 183170 }, { "epoch": 3.728854961832061, "grad_norm": 0.04352498815472446, "learning_rate": 1.8429596828902713e-06, "loss": 0.059, "step": 183180 }, { "epoch": 3.729058524173028, "grad_norm": 0.11485127263347757, "learning_rate": 1.8424087041141082e-06, "loss": 0.0453, "step": 183190 }, { "epoch": 3.729262086513995, "grad_norm": 0.022782157611400768, "learning_rate": 1.8418577891095686e-06, "loss": 0.1376, "step": 183200 }, { "epoch": 3.729465648854962, "grad_norm": 0.029881456029051696, "learning_rate": 1.841306937887778e-06, "loss": 0.0509, "step": 183210 }, { "epoch": 3.7296692111959286, "grad_norm": 0.09106116225866619, "learning_rate": 1.8407561504598637e-06, "loss": 0.0003, "step": 183220 }, { "epoch": 3.7298727735368957, "grad_norm": 0.3245276443534941, "learning_rate": 1.8402054268369452e-06, "loss": 0.0152, "step": 183230 }, { "epoch": 3.730076335877863, "grad_norm": 0.027815234193511246, "learning_rate": 1.8396547670301496e-06, "loss": 0.0202, "step": 183240 }, { "epoch": 3.7302798982188294, "grad_norm": 0.07301587012907794, "learning_rate": 1.8391041710505974e-06, "loss": 0.0006, "step": 183250 }, { "epoch": 3.7304834605597965, "grad_norm": 0.023015972377796123, "learning_rate": 1.8385536389094049e-06, "loss": 0.001, "step": 183260 }, { "epoch": 3.7306870229007636, "grad_norm": 0.013535660750287361, "learning_rate": 1.8380031706176972e-06, "loss": 0.1562, "step": 183270 }, { "epoch": 3.7308905852417302, "grad_norm": 3.3386701800838825, "learning_rate": 1.837452766186586e-06, "loss": 0.0621, "step": 183280 }, { "epoch": 3.7310941475826973, "grad_norm": 0.1272421086771537, "learning_rate": 1.8369024256271895e-06, "loss": 0.0249, "step": 183290 }, { "epoch": 3.7312977099236644, "grad_norm": 0.19553399513911168, "learning_rate": 1.8363521489506225e-06, "loss": 0.0517, "step": 183300 }, { "epoch": 3.731501272264631, "grad_norm": 0.11675364737135051, "learning_rate": 1.8358019361679991e-06, "loss": 0.0554, "step": 183310 }, { "epoch": 3.731704834605598, "grad_norm": 0.33136936707629294, "learning_rate": 1.8352517872904308e-06, "loss": 0.0987, "step": 183320 }, { "epoch": 3.7319083969465647, "grad_norm": 0.03726880434415111, "learning_rate": 1.834701702329028e-06, "loss": 0.1096, "step": 183330 }, { "epoch": 3.732111959287532, "grad_norm": 0.11010897973979707, "learning_rate": 1.8341516812949023e-06, "loss": 0.0043, "step": 183340 }, { "epoch": 3.732315521628499, "grad_norm": 0.08619280370316094, "learning_rate": 1.8336017241991605e-06, "loss": 0.1014, "step": 183350 }, { "epoch": 3.7325190839694655, "grad_norm": 9.780219446489086, "learning_rate": 1.8330518310529105e-06, "loss": 0.098, "step": 183360 }, { "epoch": 3.7327226463104326, "grad_norm": 0.1188383229700696, "learning_rate": 1.8325020018672573e-06, "loss": 0.0296, "step": 183370 }, { "epoch": 3.7329262086513992, "grad_norm": 0.05918617253263435, "learning_rate": 1.831952236653306e-06, "loss": 0.1343, "step": 183380 }, { "epoch": 3.7331297709923663, "grad_norm": 0.17816183431920213, "learning_rate": 1.8314025354221599e-06, "loss": 0.0333, "step": 183390 }, { "epoch": 3.7333333333333334, "grad_norm": 0.19256094794008444, "learning_rate": 1.8308528981849206e-06, "loss": 0.0709, "step": 183400 }, { "epoch": 3.7335368956743, "grad_norm": 0.024436264053718272, "learning_rate": 1.8303033249526891e-06, "loss": 0.0185, "step": 183410 }, { "epoch": 3.733740458015267, "grad_norm": 15.250200665356227, "learning_rate": 1.829753815736564e-06, "loss": 0.0904, "step": 183420 }, { "epoch": 3.733944020356234, "grad_norm": 0.034552251088170026, "learning_rate": 1.829204370547646e-06, "loss": 0.0086, "step": 183430 }, { "epoch": 3.734147582697201, "grad_norm": 0.257170052869137, "learning_rate": 1.828654989397028e-06, "loss": 0.0188, "step": 183440 }, { "epoch": 3.734351145038168, "grad_norm": 0.05387995759677279, "learning_rate": 1.8281056722958052e-06, "loss": 0.0483, "step": 183450 }, { "epoch": 3.734554707379135, "grad_norm": 0.06712256730021961, "learning_rate": 1.827556419255077e-06, "loss": 0.0304, "step": 183460 }, { "epoch": 3.7347582697201016, "grad_norm": 14.251549937715668, "learning_rate": 1.8270072302859314e-06, "loss": 0.0381, "step": 183470 }, { "epoch": 3.7349618320610687, "grad_norm": 8.335468861201333e-05, "learning_rate": 1.82645810539946e-06, "loss": 0.1493, "step": 183480 }, { "epoch": 3.7351653944020358, "grad_norm": 0.017739842305663913, "learning_rate": 1.8259090446067578e-06, "loss": 0.0963, "step": 183490 }, { "epoch": 3.7353689567430024, "grad_norm": 0.055321194770388576, "learning_rate": 1.8253600479189094e-06, "loss": 0.0388, "step": 183500 }, { "epoch": 3.7355725190839695, "grad_norm": 0.16125728833572123, "learning_rate": 1.8248111153470032e-06, "loss": 0.0473, "step": 183510 }, { "epoch": 3.7357760814249366, "grad_norm": 4.087812551348086, "learning_rate": 1.8242622469021264e-06, "loss": 0.0082, "step": 183520 }, { "epoch": 3.735979643765903, "grad_norm": 5.5033036276660425, "learning_rate": 1.823713442595364e-06, "loss": 0.021, "step": 183530 }, { "epoch": 3.7361832061068703, "grad_norm": 0.0021628228899083514, "learning_rate": 1.8231647024377996e-06, "loss": 0.0647, "step": 183540 }, { "epoch": 3.7363867684478373, "grad_norm": 59.1736833341922, "learning_rate": 1.8226160264405156e-06, "loss": 0.0988, "step": 183550 }, { "epoch": 3.736590330788804, "grad_norm": 10.412265455838353, "learning_rate": 1.822067414614595e-06, "loss": 0.05, "step": 183560 }, { "epoch": 3.736793893129771, "grad_norm": 0.017384792323833484, "learning_rate": 1.8215188669711132e-06, "loss": 0.0028, "step": 183570 }, { "epoch": 3.736997455470738, "grad_norm": 0.012213797996190124, "learning_rate": 1.820970383521153e-06, "loss": 0.007, "step": 183580 }, { "epoch": 3.7372010178117048, "grad_norm": 0.25885183041161186, "learning_rate": 1.820421964275792e-06, "loss": 0.004, "step": 183590 }, { "epoch": 3.737404580152672, "grad_norm": 8.338201320846727, "learning_rate": 1.8198736092461016e-06, "loss": 0.0443, "step": 183600 }, { "epoch": 3.737608142493639, "grad_norm": 0.22720011597502887, "learning_rate": 1.8193253184431613e-06, "loss": 0.0751, "step": 183610 }, { "epoch": 3.7378117048346056, "grad_norm": 0.14894908789098374, "learning_rate": 1.8187770918780446e-06, "loss": 0.0012, "step": 183620 }, { "epoch": 3.7380152671755726, "grad_norm": 0.16588308773555208, "learning_rate": 1.8182289295618183e-06, "loss": 0.2226, "step": 183630 }, { "epoch": 3.7382188295165393, "grad_norm": 0.12068454249084326, "learning_rate": 1.8176808315055605e-06, "loss": 0.0369, "step": 183640 }, { "epoch": 3.7384223918575064, "grad_norm": 10.30247837019394, "learning_rate": 1.817132797720335e-06, "loss": 0.023, "step": 183650 }, { "epoch": 3.7386259541984734, "grad_norm": 0.004189208878692325, "learning_rate": 1.8165848282172111e-06, "loss": 0.0006, "step": 183660 }, { "epoch": 3.73882951653944, "grad_norm": 0.40797115504871484, "learning_rate": 1.81603692300726e-06, "loss": 0.0442, "step": 183670 }, { "epoch": 3.739033078880407, "grad_norm": 52.58674954816673, "learning_rate": 1.8154890821015425e-06, "loss": 0.0819, "step": 183680 }, { "epoch": 3.739236641221374, "grad_norm": 0.14553063816822487, "learning_rate": 1.8149413055111225e-06, "loss": 0.0115, "step": 183690 }, { "epoch": 3.739440203562341, "grad_norm": 0.0021465818757301705, "learning_rate": 1.8143935932470686e-06, "loss": 0.058, "step": 183700 }, { "epoch": 3.739643765903308, "grad_norm": 0.004760491602101421, "learning_rate": 1.8138459453204377e-06, "loss": 0.1006, "step": 183710 }, { "epoch": 3.7398473282442746, "grad_norm": 11.887534295571854, "learning_rate": 1.8132983617422918e-06, "loss": 0.0711, "step": 183720 }, { "epoch": 3.7400508905852416, "grad_norm": 0.012213716354737046, "learning_rate": 1.8127508425236896e-06, "loss": 0.0113, "step": 183730 }, { "epoch": 3.7402544529262087, "grad_norm": 17.901972009829628, "learning_rate": 1.8122033876756895e-06, "loss": 0.1321, "step": 183740 }, { "epoch": 3.7404580152671754, "grad_norm": 45.97827690566877, "learning_rate": 1.811655997209348e-06, "loss": 0.0635, "step": 183750 }, { "epoch": 3.7406615776081424, "grad_norm": 12.81816849303701, "learning_rate": 1.8111086711357196e-06, "loss": 0.0823, "step": 183760 }, { "epoch": 3.7408651399491095, "grad_norm": 0.21596768955376014, "learning_rate": 1.8105614094658592e-06, "loss": 0.0432, "step": 183770 }, { "epoch": 3.741068702290076, "grad_norm": 0.07895624780215825, "learning_rate": 1.8100142122108188e-06, "loss": 0.05, "step": 183780 }, { "epoch": 3.7412722646310432, "grad_norm": 0.04250429923819254, "learning_rate": 1.8094670793816505e-06, "loss": 0.1024, "step": 183790 }, { "epoch": 3.7414758269720103, "grad_norm": 2.3730125047296657, "learning_rate": 1.8089200109894055e-06, "loss": 0.054, "step": 183800 }, { "epoch": 3.741679389312977, "grad_norm": 0.023495345500594397, "learning_rate": 1.8083730070451277e-06, "loss": 0.0574, "step": 183810 }, { "epoch": 3.741882951653944, "grad_norm": 0.05030185619889934, "learning_rate": 1.8078260675598692e-06, "loss": 0.0336, "step": 183820 }, { "epoch": 3.742086513994911, "grad_norm": 0.026606346209188027, "learning_rate": 1.8072791925446764e-06, "loss": 0.0018, "step": 183830 }, { "epoch": 3.7422900763358777, "grad_norm": 0.0328609123016261, "learning_rate": 1.8067323820105908e-06, "loss": 0.024, "step": 183840 }, { "epoch": 3.742493638676845, "grad_norm": 7.689303938314686, "learning_rate": 1.8061856359686563e-06, "loss": 0.067, "step": 183850 }, { "epoch": 3.742697201017812, "grad_norm": 0.09331064223724467, "learning_rate": 1.8056389544299196e-06, "loss": 0.0824, "step": 183860 }, { "epoch": 3.7429007633587785, "grad_norm": 0.23602246555630507, "learning_rate": 1.8050923374054168e-06, "loss": 0.0066, "step": 183870 }, { "epoch": 3.7431043256997456, "grad_norm": 0.07976173635166775, "learning_rate": 1.8045457849061898e-06, "loss": 0.0531, "step": 183880 }, { "epoch": 3.7433078880407127, "grad_norm": 0.04254964080403517, "learning_rate": 1.8039992969432758e-06, "loss": 0.1086, "step": 183890 }, { "epoch": 3.7435114503816793, "grad_norm": 0.13793977041371308, "learning_rate": 1.8034528735277123e-06, "loss": 0.0033, "step": 183900 }, { "epoch": 3.7437150127226464, "grad_norm": 0.05534653877919353, "learning_rate": 1.8029065146705355e-06, "loss": 0.0361, "step": 183910 }, { "epoch": 3.7439185750636135, "grad_norm": 19.617637287955876, "learning_rate": 1.8023602203827795e-06, "loss": 0.1029, "step": 183920 }, { "epoch": 3.74412213740458, "grad_norm": 0.017159510990127243, "learning_rate": 1.8018139906754772e-06, "loss": 0.0611, "step": 183930 }, { "epoch": 3.744325699745547, "grad_norm": 0.03006740253433094, "learning_rate": 1.8012678255596606e-06, "loss": 0.0558, "step": 183940 }, { "epoch": 3.7445292620865143, "grad_norm": 7.102898797527129, "learning_rate": 1.8007217250463606e-06, "loss": 0.0778, "step": 183950 }, { "epoch": 3.744732824427481, "grad_norm": 7.1641208395545695, "learning_rate": 1.8001756891466055e-06, "loss": 0.1628, "step": 183960 }, { "epoch": 3.744936386768448, "grad_norm": 0.012230692326127533, "learning_rate": 1.799629717871424e-06, "loss": 0.0012, "step": 183970 }, { "epoch": 3.7451399491094146, "grad_norm": 0.04174178362238254, "learning_rate": 1.7990838112318421e-06, "loss": 0.0598, "step": 183980 }, { "epoch": 3.7453435114503817, "grad_norm": 9.204219525010272, "learning_rate": 1.7985379692388872e-06, "loss": 0.0452, "step": 183990 }, { "epoch": 3.7455470737913483, "grad_norm": 0.12086383162531808, "learning_rate": 1.797992191903578e-06, "loss": 0.0009, "step": 184000 }, { "epoch": 3.7457506361323154, "grad_norm": 0.017547470570595403, "learning_rate": 1.797446479236944e-06, "loss": 0.0637, "step": 184010 }, { "epoch": 3.7459541984732825, "grad_norm": 14.095218294336975, "learning_rate": 1.7969008312500018e-06, "loss": 0.0697, "step": 184020 }, { "epoch": 3.746157760814249, "grad_norm": 0.09588149279959586, "learning_rate": 1.796355247953771e-06, "loss": 0.039, "step": 184030 }, { "epoch": 3.746361323155216, "grad_norm": 24.331406596278793, "learning_rate": 1.7958097293592759e-06, "loss": 0.1079, "step": 184040 }, { "epoch": 3.7465648854961833, "grad_norm": 48.65053457710882, "learning_rate": 1.7952642754775278e-06, "loss": 0.055, "step": 184050 }, { "epoch": 3.74676844783715, "grad_norm": 0.0418247037378097, "learning_rate": 1.794718886319544e-06, "loss": 0.0258, "step": 184060 }, { "epoch": 3.746972010178117, "grad_norm": 0.21824366902223127, "learning_rate": 1.7941735618963447e-06, "loss": 0.0885, "step": 184070 }, { "epoch": 3.747175572519084, "grad_norm": 2.0375507997717714, "learning_rate": 1.793628302218937e-06, "loss": 0.068, "step": 184080 }, { "epoch": 3.7473791348600507, "grad_norm": 0.07523310900505414, "learning_rate": 1.7930831072983357e-06, "loss": 0.0577, "step": 184090 }, { "epoch": 3.7475826972010178, "grad_norm": 0.2069871844784791, "learning_rate": 1.7925379771455514e-06, "loss": 0.0841, "step": 184100 }, { "epoch": 3.747786259541985, "grad_norm": 0.22531692483516289, "learning_rate": 1.791992911771594e-06, "loss": 0.1271, "step": 184110 }, { "epoch": 3.7479898218829515, "grad_norm": 0.09494128979159967, "learning_rate": 1.7914479111874716e-06, "loss": 0.0658, "step": 184120 }, { "epoch": 3.7481933842239186, "grad_norm": 0.034562422687920466, "learning_rate": 1.7909029754041912e-06, "loss": 0.0143, "step": 184130 }, { "epoch": 3.7483969465648856, "grad_norm": 0.056035349583837206, "learning_rate": 1.790358104432759e-06, "loss": 0.0236, "step": 184140 }, { "epoch": 3.7486005089058523, "grad_norm": 0.16090669296735716, "learning_rate": 1.7898132982841782e-06, "loss": 0.0901, "step": 184150 }, { "epoch": 3.7488040712468194, "grad_norm": 0.06300464297837154, "learning_rate": 1.7892685569694523e-06, "loss": 0.0256, "step": 184160 }, { "epoch": 3.7490076335877864, "grad_norm": 72.16537857158157, "learning_rate": 1.7887238804995855e-06, "loss": 0.028, "step": 184170 }, { "epoch": 3.749211195928753, "grad_norm": 0.0022402167175595033, "learning_rate": 1.7881792688855726e-06, "loss": 0.0404, "step": 184180 }, { "epoch": 3.74941475826972, "grad_norm": 0.05070209432885887, "learning_rate": 1.7876347221384177e-06, "loss": 0.0065, "step": 184190 }, { "epoch": 3.7496183206106872, "grad_norm": 0.00655080582664836, "learning_rate": 1.7870902402691193e-06, "loss": 0.1187, "step": 184200 }, { "epoch": 3.749821882951654, "grad_norm": 0.017400737755725145, "learning_rate": 1.7865458232886678e-06, "loss": 0.0443, "step": 184210 }, { "epoch": 3.750025445292621, "grad_norm": 0.03365570314415391, "learning_rate": 1.7860014712080638e-06, "loss": 0.0103, "step": 184220 }, { "epoch": 3.750229007633588, "grad_norm": 11.030096717747094, "learning_rate": 1.7854571840383018e-06, "loss": 0.04, "step": 184230 }, { "epoch": 3.7504325699745547, "grad_norm": 9.206723252303854, "learning_rate": 1.7849129617903687e-06, "loss": 0.13, "step": 184240 }, { "epoch": 3.7506361323155217, "grad_norm": 5.425571625427796, "learning_rate": 1.7843688044752627e-06, "loss": 0.0538, "step": 184250 }, { "epoch": 3.750839694656489, "grad_norm": 0.0997451093892438, "learning_rate": 1.7838247121039687e-06, "loss": 0.0046, "step": 184260 }, { "epoch": 3.7510432569974554, "grad_norm": 1.3203341000383562, "learning_rate": 1.7832806846874773e-06, "loss": 0.0617, "step": 184270 }, { "epoch": 3.7512468193384225, "grad_norm": 0.018960665276363805, "learning_rate": 1.7827367222367753e-06, "loss": 0.0386, "step": 184280 }, { "epoch": 3.751450381679389, "grad_norm": 38.78701468602862, "learning_rate": 1.782192824762849e-06, "loss": 0.1161, "step": 184290 }, { "epoch": 3.7516539440203562, "grad_norm": 0.15056831252219763, "learning_rate": 1.7816489922766834e-06, "loss": 0.0794, "step": 184300 }, { "epoch": 3.7518575063613233, "grad_norm": 0.040907199692953734, "learning_rate": 1.7811052247892614e-06, "loss": 0.0326, "step": 184310 }, { "epoch": 3.75206106870229, "grad_norm": 0.10017960803032017, "learning_rate": 1.780561522311565e-06, "loss": 0.094, "step": 184320 }, { "epoch": 3.752264631043257, "grad_norm": 7.3823398461589855, "learning_rate": 1.7800178848545758e-06, "loss": 0.0454, "step": 184330 }, { "epoch": 3.7524681933842237, "grad_norm": 1.4504932662559265, "learning_rate": 1.7794743124292723e-06, "loss": 0.0022, "step": 184340 }, { "epoch": 3.7526717557251907, "grad_norm": 0.01880987263125265, "learning_rate": 1.778930805046633e-06, "loss": 0.0578, "step": 184350 }, { "epoch": 3.752875318066158, "grad_norm": 0.2067950151272507, "learning_rate": 1.7783873627176352e-06, "loss": 0.0544, "step": 184360 }, { "epoch": 3.7530788804071245, "grad_norm": 0.04452199645250766, "learning_rate": 1.7778439854532536e-06, "loss": 0.0236, "step": 184370 }, { "epoch": 3.7532824427480915, "grad_norm": 0.032976507567040264, "learning_rate": 1.7773006732644648e-06, "loss": 0.0621, "step": 184380 }, { "epoch": 3.7534860050890586, "grad_norm": 0.08257279338330319, "learning_rate": 1.7767574261622377e-06, "loss": 0.0355, "step": 184390 }, { "epoch": 3.7536895674300252, "grad_norm": 29.4692943346115, "learning_rate": 1.7762142441575448e-06, "loss": 0.0922, "step": 184400 }, { "epoch": 3.7538931297709923, "grad_norm": 0.023518657918127957, "learning_rate": 1.7756711272613603e-06, "loss": 0.0021, "step": 184410 }, { "epoch": 3.7540966921119594, "grad_norm": 10.807292931788329, "learning_rate": 1.775128075484649e-06, "loss": 0.1744, "step": 184420 }, { "epoch": 3.754300254452926, "grad_norm": 0.29554188174049, "learning_rate": 1.7745850888383776e-06, "loss": 0.0543, "step": 184430 }, { "epoch": 3.754503816793893, "grad_norm": 0.03504628853688504, "learning_rate": 1.7740421673335184e-06, "loss": 0.0386, "step": 184440 }, { "epoch": 3.75470737913486, "grad_norm": 4.242318109456683, "learning_rate": 1.773499310981031e-06, "loss": 0.0018, "step": 184450 }, { "epoch": 3.754910941475827, "grad_norm": 9.620952428368904, "learning_rate": 1.7729565197918809e-06, "loss": 0.084, "step": 184460 }, { "epoch": 3.755114503816794, "grad_norm": 1.607107062177206, "learning_rate": 1.7724137937770296e-06, "loss": 0.0101, "step": 184470 }, { "epoch": 3.755318066157761, "grad_norm": 2.0714037976920574, "learning_rate": 1.7718711329474392e-06, "loss": 0.0306, "step": 184480 }, { "epoch": 3.7555216284987276, "grad_norm": 0.039723865123671424, "learning_rate": 1.7713285373140692e-06, "loss": 0.0399, "step": 184490 }, { "epoch": 3.7557251908396947, "grad_norm": 0.066080563941667, "learning_rate": 1.7707860068878778e-06, "loss": 0.0024, "step": 184500 }, { "epoch": 3.7559287531806618, "grad_norm": 0.07109198405680141, "learning_rate": 1.770243541679822e-06, "loss": 0.0535, "step": 184510 }, { "epoch": 3.7561323155216284, "grad_norm": 12.645565448496132, "learning_rate": 1.7697011417008581e-06, "loss": 0.0603, "step": 184520 }, { "epoch": 3.7563358778625955, "grad_norm": 0.021627412442416494, "learning_rate": 1.7691588069619397e-06, "loss": 0.0788, "step": 184530 }, { "epoch": 3.7565394402035626, "grad_norm": 0.24481832846024415, "learning_rate": 1.7686165374740228e-06, "loss": 0.0523, "step": 184540 }, { "epoch": 3.756743002544529, "grad_norm": 0.022747318710682463, "learning_rate": 1.7680743332480532e-06, "loss": 0.0269, "step": 184550 }, { "epoch": 3.7569465648854963, "grad_norm": 0.03168645103573182, "learning_rate": 1.767532194294987e-06, "loss": 0.0018, "step": 184560 }, { "epoch": 3.7571501272264634, "grad_norm": 0.06468226019623634, "learning_rate": 1.7669901206257728e-06, "loss": 0.0774, "step": 184570 }, { "epoch": 3.75735368956743, "grad_norm": 0.04140983345831852, "learning_rate": 1.7664481122513545e-06, "loss": 0.0007, "step": 184580 }, { "epoch": 3.757557251908397, "grad_norm": 0.18633906952653861, "learning_rate": 1.7659061691826823e-06, "loss": 0.0746, "step": 184590 }, { "epoch": 3.757760814249364, "grad_norm": 0.028754297118569097, "learning_rate": 1.765364291430703e-06, "loss": 0.039, "step": 184600 }, { "epoch": 3.757964376590331, "grad_norm": 22.77310686062965, "learning_rate": 1.764822479006354e-06, "loss": 0.0461, "step": 184610 }, { "epoch": 3.758167938931298, "grad_norm": 0.015444700785504487, "learning_rate": 1.7642807319205851e-06, "loss": 0.0132, "step": 184620 }, { "epoch": 3.7583715012722645, "grad_norm": 0.08283371993356257, "learning_rate": 1.7637390501843331e-06, "loss": 0.078, "step": 184630 }, { "epoch": 3.7585750636132316, "grad_norm": 1.0160725550849827, "learning_rate": 1.7631974338085379e-06, "loss": 0.0832, "step": 184640 }, { "epoch": 3.758778625954198, "grad_norm": 0.009986413745986786, "learning_rate": 1.7626558828041423e-06, "loss": 0.0663, "step": 184650 }, { "epoch": 3.7589821882951653, "grad_norm": 19.42266864061073, "learning_rate": 1.7621143971820791e-06, "loss": 0.0649, "step": 184660 }, { "epoch": 3.7591857506361324, "grad_norm": 0.07979423659561541, "learning_rate": 1.7615729769532863e-06, "loss": 0.023, "step": 184670 }, { "epoch": 3.759389312977099, "grad_norm": 0.21736778210971716, "learning_rate": 1.7610316221286983e-06, "loss": 0.0258, "step": 184680 }, { "epoch": 3.759592875318066, "grad_norm": 0.056341059112551314, "learning_rate": 1.7604903327192485e-06, "loss": 0.0435, "step": 184690 }, { "epoch": 3.759796437659033, "grad_norm": 0.30921041881806577, "learning_rate": 1.759949108735869e-06, "loss": 0.0143, "step": 184700 }, { "epoch": 3.76, "grad_norm": 126.5821274625167, "learning_rate": 1.75940795018949e-06, "loss": 0.0922, "step": 184710 }, { "epoch": 3.760203562340967, "grad_norm": 0.004232517409665956, "learning_rate": 1.7588668570910416e-06, "loss": 0.0527, "step": 184720 }, { "epoch": 3.760407124681934, "grad_norm": 0.011849445404537766, "learning_rate": 1.7583258294514516e-06, "loss": 0.0654, "step": 184730 }, { "epoch": 3.7606106870229006, "grad_norm": 0.07028659668980645, "learning_rate": 1.7577848672816467e-06, "loss": 0.0269, "step": 184740 }, { "epoch": 3.7608142493638677, "grad_norm": 9.903164046313462, "learning_rate": 1.7572439705925537e-06, "loss": 0.0493, "step": 184750 }, { "epoch": 3.7610178117048347, "grad_norm": 7.30873689234006, "learning_rate": 1.7567031393950923e-06, "loss": 0.0732, "step": 184760 }, { "epoch": 3.7612213740458014, "grad_norm": 31.527164680409445, "learning_rate": 1.7561623737001898e-06, "loss": 0.167, "step": 184770 }, { "epoch": 3.7614249363867684, "grad_norm": 116.89926075602627, "learning_rate": 1.7556216735187682e-06, "loss": 0.021, "step": 184780 }, { "epoch": 3.7616284987277355, "grad_norm": 0.14033845290620137, "learning_rate": 1.7550810388617416e-06, "loss": 0.0368, "step": 184790 }, { "epoch": 3.761832061068702, "grad_norm": 16.519082525867272, "learning_rate": 1.7545404697400354e-06, "loss": 0.0399, "step": 184800 }, { "epoch": 3.7620356234096692, "grad_norm": 0.13492346165551772, "learning_rate": 1.7539999661645657e-06, "loss": 0.0253, "step": 184810 }, { "epoch": 3.7622391857506363, "grad_norm": 7.447979072228995, "learning_rate": 1.7534595281462457e-06, "loss": 0.0653, "step": 184820 }, { "epoch": 3.762442748091603, "grad_norm": 0.11908622316654206, "learning_rate": 1.7529191556959923e-06, "loss": 0.0212, "step": 184830 }, { "epoch": 3.76264631043257, "grad_norm": 0.013003794828306853, "learning_rate": 1.752378848824719e-06, "loss": 0.0764, "step": 184840 }, { "epoch": 3.762849872773537, "grad_norm": 0.08304316490789605, "learning_rate": 1.7518386075433374e-06, "loss": 0.0472, "step": 184850 }, { "epoch": 3.7630534351145037, "grad_norm": 0.22583605255967104, "learning_rate": 1.7512984318627585e-06, "loss": 0.0545, "step": 184860 }, { "epoch": 3.763256997455471, "grad_norm": 0.04447268568167695, "learning_rate": 1.7507583217938924e-06, "loss": 0.0334, "step": 184870 }, { "epoch": 3.763460559796438, "grad_norm": 0.0017999800989264515, "learning_rate": 1.7502182773476472e-06, "loss": 0.0017, "step": 184880 }, { "epoch": 3.7636641221374045, "grad_norm": 0.0020996854711020463, "learning_rate": 1.7496782985349293e-06, "loss": 0.0277, "step": 184890 }, { "epoch": 3.7638676844783716, "grad_norm": 7.602229865431485, "learning_rate": 1.7491383853666445e-06, "loss": 0.0965, "step": 184900 }, { "epoch": 3.7640712468193387, "grad_norm": 0.6399289295863239, "learning_rate": 1.7485985378536967e-06, "loss": 0.0529, "step": 184910 }, { "epoch": 3.7642748091603053, "grad_norm": 0.07152436525669119, "learning_rate": 1.74805875600699e-06, "loss": 0.1004, "step": 184920 }, { "epoch": 3.7644783715012724, "grad_norm": 1.4274301655528525, "learning_rate": 1.7475190398374248e-06, "loss": 0.0989, "step": 184930 }, { "epoch": 3.764681933842239, "grad_norm": 7.610908201226266, "learning_rate": 1.7469793893559017e-06, "loss": 0.0359, "step": 184940 }, { "epoch": 3.764885496183206, "grad_norm": 0.08343383890483583, "learning_rate": 1.74643980457332e-06, "loss": 0.0338, "step": 184950 }, { "epoch": 3.765089058524173, "grad_norm": 0.022728775027280154, "learning_rate": 1.745900285500577e-06, "loss": 0.015, "step": 184960 }, { "epoch": 3.76529262086514, "grad_norm": 21.13814250126588, "learning_rate": 1.7453608321485705e-06, "loss": 0.0466, "step": 184970 }, { "epoch": 3.765496183206107, "grad_norm": 0.09399506997416186, "learning_rate": 1.7448214445281903e-06, "loss": 0.0456, "step": 184980 }, { "epoch": 3.7656997455470735, "grad_norm": 8.273585439266471, "learning_rate": 1.744282122650337e-06, "loss": 0.0347, "step": 184990 }, { "epoch": 3.7659033078880406, "grad_norm": 0.12240140811448935, "learning_rate": 1.7437428665258982e-06, "loss": 0.0009, "step": 185000 }, { "epoch": 3.7661068702290077, "grad_norm": 0.016564025231499975, "learning_rate": 1.7432036761657644e-06, "loss": 0.0021, "step": 185010 }, { "epoch": 3.7663104325699743, "grad_norm": 0.0514710569166856, "learning_rate": 1.742664551580831e-06, "loss": 0.0219, "step": 185020 }, { "epoch": 3.7665139949109414, "grad_norm": 0.039662334901433435, "learning_rate": 1.7421254927819803e-06, "loss": 0.0462, "step": 185030 }, { "epoch": 3.7667175572519085, "grad_norm": 6.505407143256533, "learning_rate": 1.7415864997801013e-06, "loss": 0.0214, "step": 185040 }, { "epoch": 3.766921119592875, "grad_norm": 3.502029744066477, "learning_rate": 1.7410475725860798e-06, "loss": 0.1399, "step": 185050 }, { "epoch": 3.767124681933842, "grad_norm": 0.024403534231565063, "learning_rate": 1.7405087112107999e-06, "loss": 0.0656, "step": 185060 }, { "epoch": 3.7673282442748093, "grad_norm": 0.02001263461396013, "learning_rate": 1.739969915665145e-06, "loss": 0.0128, "step": 185070 }, { "epoch": 3.767531806615776, "grad_norm": 0.1325317472914352, "learning_rate": 1.7394311859599966e-06, "loss": 0.0746, "step": 185080 }, { "epoch": 3.767735368956743, "grad_norm": 0.003101454320504425, "learning_rate": 1.7388925221062346e-06, "loss": 0.0359, "step": 185090 }, { "epoch": 3.76793893129771, "grad_norm": 0.14742728166008842, "learning_rate": 1.7383539241147385e-06, "loss": 0.0429, "step": 185100 }, { "epoch": 3.7681424936386767, "grad_norm": 0.016792712000250603, "learning_rate": 1.7378153919963864e-06, "loss": 0.0384, "step": 185110 }, { "epoch": 3.768346055979644, "grad_norm": 2.3591368076760615, "learning_rate": 1.7372769257620553e-06, "loss": 0.0822, "step": 185120 }, { "epoch": 3.768549618320611, "grad_norm": 0.1282416414164003, "learning_rate": 1.7367385254226154e-06, "loss": 0.0046, "step": 185130 }, { "epoch": 3.7687531806615775, "grad_norm": 0.01261676738020275, "learning_rate": 1.7362001909889464e-06, "loss": 0.0012, "step": 185140 }, { "epoch": 3.7689567430025446, "grad_norm": 0.013660973904750944, "learning_rate": 1.7356619224719196e-06, "loss": 0.0491, "step": 185150 }, { "epoch": 3.7691603053435117, "grad_norm": 0.01957177510101034, "learning_rate": 1.7351237198824017e-06, "loss": 0.0578, "step": 185160 }, { "epoch": 3.7693638676844783, "grad_norm": 0.027491159629766334, "learning_rate": 1.7345855832312665e-06, "loss": 0.0167, "step": 185170 }, { "epoch": 3.7695674300254454, "grad_norm": 23.544166179931857, "learning_rate": 1.7340475125293831e-06, "loss": 0.0891, "step": 185180 }, { "epoch": 3.7697709923664124, "grad_norm": 0.16920253783979994, "learning_rate": 1.7335095077876135e-06, "loss": 0.0163, "step": 185190 }, { "epoch": 3.769974554707379, "grad_norm": 0.043070287112296254, "learning_rate": 1.7329715690168302e-06, "loss": 0.0752, "step": 185200 }, { "epoch": 3.770178117048346, "grad_norm": 0.061967654956339456, "learning_rate": 1.732433696227892e-06, "loss": 0.0186, "step": 185210 }, { "epoch": 3.7703816793893132, "grad_norm": 0.010906187437786942, "learning_rate": 1.7318958894316623e-06, "loss": 0.0418, "step": 185220 }, { "epoch": 3.77058524173028, "grad_norm": 0.051432907130942215, "learning_rate": 1.7313581486390074e-06, "loss": 0.0693, "step": 185230 }, { "epoch": 3.770788804071247, "grad_norm": 9.825081994718188, "learning_rate": 1.7308204738607837e-06, "loss": 0.0944, "step": 185240 }, { "epoch": 3.7709923664122136, "grad_norm": 0.09291115621720589, "learning_rate": 1.7302828651078508e-06, "loss": 0.0495, "step": 185250 }, { "epoch": 3.7711959287531807, "grad_norm": 21.30200941621336, "learning_rate": 1.7297453223910664e-06, "loss": 0.079, "step": 185260 }, { "epoch": 3.7713994910941477, "grad_norm": 0.008772699858315289, "learning_rate": 1.729207845721288e-06, "loss": 0.0194, "step": 185270 }, { "epoch": 3.7716030534351144, "grad_norm": 0.2771311240406675, "learning_rate": 1.7286704351093698e-06, "loss": 0.0599, "step": 185280 }, { "epoch": 3.7718066157760815, "grad_norm": 0.032346929981389605, "learning_rate": 1.7281330905661653e-06, "loss": 0.0123, "step": 185290 }, { "epoch": 3.772010178117048, "grad_norm": 0.0711800494932459, "learning_rate": 1.7275958121025272e-06, "loss": 0.08, "step": 185300 }, { "epoch": 3.772213740458015, "grad_norm": 0.5696210483158987, "learning_rate": 1.7270585997293066e-06, "loss": 0.0787, "step": 185310 }, { "epoch": 3.7724173027989822, "grad_norm": 0.11040497510068957, "learning_rate": 1.7265214534573526e-06, "loss": 0.0051, "step": 185320 }, { "epoch": 3.772620865139949, "grad_norm": 0.027451348032327797, "learning_rate": 1.7259843732975161e-06, "loss": 0.0709, "step": 185330 }, { "epoch": 3.772824427480916, "grad_norm": 0.00856669025124433, "learning_rate": 1.725447359260639e-06, "loss": 0.0288, "step": 185340 }, { "epoch": 3.773027989821883, "grad_norm": 0.9581781943113125, "learning_rate": 1.7249104113575715e-06, "loss": 0.0011, "step": 185350 }, { "epoch": 3.7732315521628497, "grad_norm": 0.025909297915636002, "learning_rate": 1.7243735295991587e-06, "loss": 0.0813, "step": 185360 }, { "epoch": 3.7734351145038167, "grad_norm": 9.603690476194647, "learning_rate": 1.7238367139962393e-06, "loss": 0.1553, "step": 185370 }, { "epoch": 3.773638676844784, "grad_norm": 0.013095358565507361, "learning_rate": 1.7232999645596554e-06, "loss": 0.0005, "step": 185380 }, { "epoch": 3.7738422391857505, "grad_norm": 77.24307327268602, "learning_rate": 1.7227632813002526e-06, "loss": 0.0401, "step": 185390 }, { "epoch": 3.7740458015267175, "grad_norm": 0.08271736687525649, "learning_rate": 1.7222266642288642e-06, "loss": 0.0275, "step": 185400 }, { "epoch": 3.7742493638676846, "grad_norm": 0.046912460111809294, "learning_rate": 1.7216901133563307e-06, "loss": 0.0779, "step": 185410 }, { "epoch": 3.7744529262086512, "grad_norm": 17.39333664915228, "learning_rate": 1.7211536286934877e-06, "loss": 0.0576, "step": 185420 }, { "epoch": 3.7746564885496183, "grad_norm": 5.351232023204052, "learning_rate": 1.7206172102511699e-06, "loss": 0.0776, "step": 185430 }, { "epoch": 3.7748600508905854, "grad_norm": 6.04823552024143, "learning_rate": 1.7200808580402117e-06, "loss": 0.0272, "step": 185440 }, { "epoch": 3.775063613231552, "grad_norm": 0.08658301198775308, "learning_rate": 1.7195445720714448e-06, "loss": 0.0087, "step": 185450 }, { "epoch": 3.775267175572519, "grad_norm": 0.017464493515284504, "learning_rate": 1.719008352355701e-06, "loss": 0.0973, "step": 185460 }, { "epoch": 3.775470737913486, "grad_norm": 0.05391500048816702, "learning_rate": 1.7184721989038088e-06, "loss": 0.0923, "step": 185470 }, { "epoch": 3.775674300254453, "grad_norm": 15.352546914061636, "learning_rate": 1.7179361117265975e-06, "loss": 0.0948, "step": 185480 }, { "epoch": 3.77587786259542, "grad_norm": 5.665962858657687, "learning_rate": 1.7174000908348937e-06, "loss": 0.0267, "step": 185490 }, { "epoch": 3.776081424936387, "grad_norm": 1.5706036628391984, "learning_rate": 1.7168641362395227e-06, "loss": 0.0385, "step": 185500 }, { "epoch": 3.7762849872773536, "grad_norm": 0.2084543219430016, "learning_rate": 1.7163282479513093e-06, "loss": 0.044, "step": 185510 }, { "epoch": 3.7764885496183207, "grad_norm": 8.588218848917894, "learning_rate": 1.7157924259810781e-06, "loss": 0.0394, "step": 185520 }, { "epoch": 3.776692111959288, "grad_norm": 0.033374461255208134, "learning_rate": 1.7152566703396457e-06, "loss": 0.026, "step": 185530 }, { "epoch": 3.7768956743002544, "grad_norm": 1.67757867941649e-05, "learning_rate": 1.7147209810378368e-06, "loss": 0.0078, "step": 185540 }, { "epoch": 3.7770992366412215, "grad_norm": 0.008612683029037988, "learning_rate": 1.714185358086471e-06, "loss": 0.0288, "step": 185550 }, { "epoch": 3.7773027989821886, "grad_norm": 30.72568371012588, "learning_rate": 1.7136498014963604e-06, "loss": 0.0695, "step": 185560 }, { "epoch": 3.777506361323155, "grad_norm": 0.09009739795177593, "learning_rate": 1.7131143112783283e-06, "loss": 0.0425, "step": 185570 }, { "epoch": 3.7777099236641223, "grad_norm": 0.045504161748124564, "learning_rate": 1.7125788874431842e-06, "loss": 0.0385, "step": 185580 }, { "epoch": 3.777913486005089, "grad_norm": 7.708344980613367, "learning_rate": 1.7120435300017424e-06, "loss": 0.1158, "step": 185590 }, { "epoch": 3.778117048346056, "grad_norm": 0.09604382834958253, "learning_rate": 1.7115082389648197e-06, "loss": 0.0413, "step": 185600 }, { "epoch": 3.7783206106870226, "grad_norm": 0.1515258184689225, "learning_rate": 1.7109730143432217e-06, "loss": 0.0029, "step": 185610 }, { "epoch": 3.7785241730279897, "grad_norm": 0.025438245616559746, "learning_rate": 1.7104378561477586e-06, "loss": 0.038, "step": 185620 }, { "epoch": 3.778727735368957, "grad_norm": 0.3578502513465269, "learning_rate": 1.709902764389243e-06, "loss": 0.0911, "step": 185630 }, { "epoch": 3.7789312977099234, "grad_norm": 0.09602795897900446, "learning_rate": 1.7093677390784768e-06, "loss": 0.0403, "step": 185640 }, { "epoch": 3.7791348600508905, "grad_norm": 2.7964659142304527, "learning_rate": 1.7088327802262677e-06, "loss": 0.0701, "step": 185650 }, { "epoch": 3.7793384223918576, "grad_norm": 0.18881624771619823, "learning_rate": 1.7082978878434198e-06, "loss": 0.0341, "step": 185660 }, { "epoch": 3.779541984732824, "grad_norm": 62.832551684076556, "learning_rate": 1.7077630619407355e-06, "loss": 0.0319, "step": 185670 }, { "epoch": 3.7797455470737913, "grad_norm": 0.2232328747830915, "learning_rate": 1.7072283025290164e-06, "loss": 0.0063, "step": 185680 }, { "epoch": 3.7799491094147584, "grad_norm": 0.01710758912865117, "learning_rate": 1.7066936096190635e-06, "loss": 0.0012, "step": 185690 }, { "epoch": 3.780152671755725, "grad_norm": 0.7149808654191608, "learning_rate": 1.7061589832216758e-06, "loss": 0.0869, "step": 185700 }, { "epoch": 3.780356234096692, "grad_norm": 0.024569894709126766, "learning_rate": 1.7056244233476465e-06, "loss": 0.0543, "step": 185710 }, { "epoch": 3.780559796437659, "grad_norm": 0.019963163166941703, "learning_rate": 1.7050899300077771e-06, "loss": 0.0424, "step": 185720 }, { "epoch": 3.780763358778626, "grad_norm": 19.31101578001544, "learning_rate": 1.704555503212862e-06, "loss": 0.0686, "step": 185730 }, { "epoch": 3.780966921119593, "grad_norm": 0.14489715124320904, "learning_rate": 1.7040211429736897e-06, "loss": 0.1297, "step": 185740 }, { "epoch": 3.78117048346056, "grad_norm": 0.3782816231276957, "learning_rate": 1.7034868493010577e-06, "loss": 0.032, "step": 185750 }, { "epoch": 3.7813740458015266, "grad_norm": 9.95274975805987, "learning_rate": 1.7029526222057552e-06, "loss": 0.039, "step": 185760 }, { "epoch": 3.7815776081424937, "grad_norm": 0.05050025802478172, "learning_rate": 1.7024184616985683e-06, "loss": 0.0709, "step": 185770 }, { "epoch": 3.7817811704834607, "grad_norm": 0.07909725015809436, "learning_rate": 1.7018843677902914e-06, "loss": 0.0899, "step": 185780 }, { "epoch": 3.7819847328244274, "grad_norm": 0.07821141172578237, "learning_rate": 1.7013503404917058e-06, "loss": 0.0004, "step": 185790 }, { "epoch": 3.7821882951653945, "grad_norm": 10.462513955736844, "learning_rate": 1.700816379813599e-06, "loss": 0.0468, "step": 185800 }, { "epoch": 3.7823918575063615, "grad_norm": 0.3020367753594723, "learning_rate": 1.7002824857667543e-06, "loss": 0.0756, "step": 185810 }, { "epoch": 3.782595419847328, "grad_norm": 0.21108695641863676, "learning_rate": 1.6997486583619549e-06, "loss": 0.0525, "step": 185820 }, { "epoch": 3.7827989821882952, "grad_norm": 0.10383295339360334, "learning_rate": 1.699214897609982e-06, "loss": 0.0325, "step": 185830 }, { "epoch": 3.7830025445292623, "grad_norm": 0.002056811527290588, "learning_rate": 1.6986812035216154e-06, "loss": 0.0004, "step": 185840 }, { "epoch": 3.783206106870229, "grad_norm": 0.018224157708181244, "learning_rate": 1.6981475761076343e-06, "loss": 0.0446, "step": 185850 }, { "epoch": 3.783409669211196, "grad_norm": 0.05981827341158493, "learning_rate": 1.6976140153788156e-06, "loss": 0.1001, "step": 185860 }, { "epoch": 3.783613231552163, "grad_norm": 0.08236043299924785, "learning_rate": 1.6970805213459357e-06, "loss": 0.0541, "step": 185870 }, { "epoch": 3.7838167938931297, "grad_norm": 0.13975202687113214, "learning_rate": 1.6965470940197682e-06, "loss": 0.082, "step": 185880 }, { "epoch": 3.784020356234097, "grad_norm": 0.19813977020034126, "learning_rate": 1.696013733411087e-06, "loss": 0.0707, "step": 185890 }, { "epoch": 3.7842239185750635, "grad_norm": 0.05720131490457754, "learning_rate": 1.6954804395306646e-06, "loss": 0.0029, "step": 185900 }, { "epoch": 3.7844274809160305, "grad_norm": 0.04093594416751921, "learning_rate": 1.69494721238927e-06, "loss": 0.032, "step": 185910 }, { "epoch": 3.7846310432569976, "grad_norm": 0.026611295344166043, "learning_rate": 1.6944140519976753e-06, "loss": 0.0084, "step": 185920 }, { "epoch": 3.7848346055979643, "grad_norm": 9.080431978059572, "learning_rate": 1.6938809583666432e-06, "loss": 0.1145, "step": 185930 }, { "epoch": 3.7850381679389313, "grad_norm": 0.015181076792945216, "learning_rate": 1.6933479315069467e-06, "loss": 0.0795, "step": 185940 }, { "epoch": 3.785241730279898, "grad_norm": 20.731742049835994, "learning_rate": 1.6928149714293463e-06, "loss": 0.0284, "step": 185950 }, { "epoch": 3.785445292620865, "grad_norm": 0.05808246965057327, "learning_rate": 1.6922820781446047e-06, "loss": 0.0337, "step": 185960 }, { "epoch": 3.785648854961832, "grad_norm": 9.745686583380891, "learning_rate": 1.691749251663491e-06, "loss": 0.0673, "step": 185970 }, { "epoch": 3.7858524173027988, "grad_norm": 0.1782374403369211, "learning_rate": 1.69121649199676e-06, "loss": 0.0862, "step": 185980 }, { "epoch": 3.786055979643766, "grad_norm": 0.006700454960189512, "learning_rate": 1.6906837991551722e-06, "loss": 0.0571, "step": 185990 }, { "epoch": 3.786259541984733, "grad_norm": 0.05118357204729857, "learning_rate": 1.6901511731494907e-06, "loss": 0.0719, "step": 186000 }, { "epoch": 3.7864631043256995, "grad_norm": 0.07151893285304461, "learning_rate": 1.6896186139904673e-06, "loss": 0.0039, "step": 186010 }, { "epoch": 3.7866666666666666, "grad_norm": 0.04036949171217353, "learning_rate": 1.6890861216888603e-06, "loss": 0.0555, "step": 186020 }, { "epoch": 3.7868702290076337, "grad_norm": 0.011654469653720708, "learning_rate": 1.6885536962554234e-06, "loss": 0.0597, "step": 186030 }, { "epoch": 3.7870737913486003, "grad_norm": 2.1050163849822883, "learning_rate": 1.6880213377009098e-06, "loss": 0.038, "step": 186040 }, { "epoch": 3.7872773536895674, "grad_norm": 0.20474289976365787, "learning_rate": 1.6874890460360705e-06, "loss": 0.0457, "step": 186050 }, { "epoch": 3.7874809160305345, "grad_norm": 8.238768002404575, "learning_rate": 1.6869568212716569e-06, "loss": 0.1371, "step": 186060 }, { "epoch": 3.787684478371501, "grad_norm": 0.04036978307746622, "learning_rate": 1.6864246634184189e-06, "loss": 0.0599, "step": 186070 }, { "epoch": 3.787888040712468, "grad_norm": 0.008068370470943999, "learning_rate": 1.6858925724870994e-06, "loss": 0.0007, "step": 186080 }, { "epoch": 3.7880916030534353, "grad_norm": 58.846124018205344, "learning_rate": 1.6853605484884494e-06, "loss": 0.0792, "step": 186090 }, { "epoch": 3.788295165394402, "grad_norm": 0.04137892731809218, "learning_rate": 1.6848285914332146e-06, "loss": 0.0402, "step": 186100 }, { "epoch": 3.788498727735369, "grad_norm": 0.06371560244798634, "learning_rate": 1.6842967013321325e-06, "loss": 0.0224, "step": 186110 }, { "epoch": 3.788702290076336, "grad_norm": 8.314018790255982, "learning_rate": 1.6837648781959509e-06, "loss": 0.0618, "step": 186120 }, { "epoch": 3.7889058524173027, "grad_norm": 0.004427682250353564, "learning_rate": 1.6832331220354103e-06, "loss": 0.0024, "step": 186130 }, { "epoch": 3.78910941475827, "grad_norm": 0.016271216282734452, "learning_rate": 1.6827014328612456e-06, "loss": 0.1182, "step": 186140 }, { "epoch": 3.789312977099237, "grad_norm": 0.0706069752216618, "learning_rate": 1.6821698106842016e-06, "loss": 0.0074, "step": 186150 }, { "epoch": 3.7895165394402035, "grad_norm": 0.19190511292716983, "learning_rate": 1.68163825551501e-06, "loss": 0.0022, "step": 186160 }, { "epoch": 3.7897201017811706, "grad_norm": 0.2988021626493042, "learning_rate": 1.681106767364406e-06, "loss": 0.0542, "step": 186170 }, { "epoch": 3.7899236641221377, "grad_norm": 0.03254390959553442, "learning_rate": 1.680575346243129e-06, "loss": 0.0221, "step": 186180 }, { "epoch": 3.7901272264631043, "grad_norm": 0.3328166105929386, "learning_rate": 1.6800439921619067e-06, "loss": 0.1469, "step": 186190 }, { "epoch": 3.7903307888040714, "grad_norm": 0.19074469353726595, "learning_rate": 1.6795127051314697e-06, "loss": 0.0173, "step": 186200 }, { "epoch": 3.790534351145038, "grad_norm": 0.012812940878281432, "learning_rate": 1.678981485162554e-06, "loss": 0.0884, "step": 186210 }, { "epoch": 3.790737913486005, "grad_norm": 0.01778499357347561, "learning_rate": 1.6784503322658836e-06, "loss": 0.0665, "step": 186220 }, { "epoch": 3.790941475826972, "grad_norm": 0.5982805852098274, "learning_rate": 1.6779192464521864e-06, "loss": 0.0234, "step": 186230 }, { "epoch": 3.791145038167939, "grad_norm": 0.027607333069470368, "learning_rate": 1.677388227732189e-06, "loss": 0.0486, "step": 186240 }, { "epoch": 3.791348600508906, "grad_norm": 13.468592541197694, "learning_rate": 1.6768572761166157e-06, "loss": 0.121, "step": 186250 }, { "epoch": 3.7915521628498725, "grad_norm": 0.023438547023089073, "learning_rate": 1.6763263916161899e-06, "loss": 0.0276, "step": 186260 }, { "epoch": 3.7917557251908396, "grad_norm": 0.019387326540688997, "learning_rate": 1.6757955742416332e-06, "loss": 0.0109, "step": 186270 }, { "epoch": 3.7919592875318067, "grad_norm": 0.43140157258421025, "learning_rate": 1.6752648240036668e-06, "loss": 0.0686, "step": 186280 }, { "epoch": 3.7921628498727733, "grad_norm": 0.030157013059463538, "learning_rate": 1.6747341409130092e-06, "loss": 0.0391, "step": 186290 }, { "epoch": 3.7923664122137404, "grad_norm": 0.027977576727168434, "learning_rate": 1.6742035249803789e-06, "loss": 0.0013, "step": 186300 }, { "epoch": 3.7925699745547075, "grad_norm": 0.24414780639925873, "learning_rate": 1.6736729762164933e-06, "loss": 0.0068, "step": 186310 }, { "epoch": 3.792773536895674, "grad_norm": 8.993222340122806, "learning_rate": 1.6731424946320629e-06, "loss": 0.0871, "step": 186320 }, { "epoch": 3.792977099236641, "grad_norm": 0.06646993184846763, "learning_rate": 1.6726120802378071e-06, "loss": 0.0088, "step": 186330 }, { "epoch": 3.7931806615776082, "grad_norm": 10.76614969182533, "learning_rate": 1.6720817330444366e-06, "loss": 0.0471, "step": 186340 }, { "epoch": 3.793384223918575, "grad_norm": 0.058078766376803086, "learning_rate": 1.671551453062661e-06, "loss": 0.0902, "step": 186350 }, { "epoch": 3.793587786259542, "grad_norm": 0.14897815280219104, "learning_rate": 1.6710212403031888e-06, "loss": 0.0215, "step": 186360 }, { "epoch": 3.793791348600509, "grad_norm": 7.801309918973821, "learning_rate": 1.6704910947767339e-06, "loss": 0.05, "step": 186370 }, { "epoch": 3.7939949109414757, "grad_norm": 0.20888985996024662, "learning_rate": 1.6699610164939978e-06, "loss": 0.0301, "step": 186380 }, { "epoch": 3.7941984732824428, "grad_norm": 0.015795797435262295, "learning_rate": 1.6694310054656881e-06, "loss": 0.0468, "step": 186390 }, { "epoch": 3.79440203562341, "grad_norm": 0.027175261721076933, "learning_rate": 1.6689010617025092e-06, "loss": 0.0436, "step": 186400 }, { "epoch": 3.7946055979643765, "grad_norm": 0.44512247328927773, "learning_rate": 1.6683711852151635e-06, "loss": 0.0296, "step": 186410 }, { "epoch": 3.7948091603053435, "grad_norm": 10.513513459611456, "learning_rate": 1.667841376014353e-06, "loss": 0.1149, "step": 186420 }, { "epoch": 3.7950127226463106, "grad_norm": 0.2694923318097748, "learning_rate": 1.6673116341107775e-06, "loss": 0.0022, "step": 186430 }, { "epoch": 3.7952162849872773, "grad_norm": 0.3170941603686342, "learning_rate": 1.6667819595151364e-06, "loss": 0.1397, "step": 186440 }, { "epoch": 3.7954198473282443, "grad_norm": 0.12113474679770483, "learning_rate": 1.6662523522381264e-06, "loss": 0.0321, "step": 186450 }, { "epoch": 3.7956234096692114, "grad_norm": 0.024653108937556317, "learning_rate": 1.665722812290444e-06, "loss": 0.0442, "step": 186460 }, { "epoch": 3.795826972010178, "grad_norm": 7.728852310754305, "learning_rate": 1.665193339682784e-06, "loss": 0.132, "step": 186470 }, { "epoch": 3.796030534351145, "grad_norm": 0.13722114500425966, "learning_rate": 1.6646639344258396e-06, "loss": 0.0659, "step": 186480 }, { "epoch": 3.796234096692112, "grad_norm": 0.01942363650750282, "learning_rate": 1.6641345965303036e-06, "loss": 0.1018, "step": 186490 }, { "epoch": 3.796437659033079, "grad_norm": 12.454870301352075, "learning_rate": 1.6636053260068668e-06, "loss": 0.1213, "step": 186500 }, { "epoch": 3.796641221374046, "grad_norm": 8.422395594076622, "learning_rate": 1.6630761228662146e-06, "loss": 0.0255, "step": 186510 }, { "epoch": 3.796844783715013, "grad_norm": 7.6435257398647005, "learning_rate": 1.6625469871190414e-06, "loss": 0.1282, "step": 186520 }, { "epoch": 3.7970483460559796, "grad_norm": 0.17509478464231815, "learning_rate": 1.6620179187760288e-06, "loss": 0.0413, "step": 186530 }, { "epoch": 3.7972519083969467, "grad_norm": 0.004828856975657477, "learning_rate": 1.6614889178478622e-06, "loss": 0.0348, "step": 186540 }, { "epoch": 3.7974554707379133, "grad_norm": 0.01647659303525342, "learning_rate": 1.6609599843452296e-06, "loss": 0.0318, "step": 186550 }, { "epoch": 3.7976590330788804, "grad_norm": 0.016938424964718736, "learning_rate": 1.6604311182788096e-06, "loss": 0.0454, "step": 186560 }, { "epoch": 3.797862595419847, "grad_norm": 0.03175033900542037, "learning_rate": 1.6599023196592828e-06, "loss": 0.0969, "step": 186570 }, { "epoch": 3.798066157760814, "grad_norm": 13.997993446816574, "learning_rate": 1.659373588497334e-06, "loss": 0.0379, "step": 186580 }, { "epoch": 3.798269720101781, "grad_norm": 0.10037124050516281, "learning_rate": 1.6588449248036359e-06, "loss": 0.0202, "step": 186590 }, { "epoch": 3.798473282442748, "grad_norm": 0.1703138519594161, "learning_rate": 1.6583163285888687e-06, "loss": 0.063, "step": 186600 }, { "epoch": 3.798676844783715, "grad_norm": 9.306282149761646, "learning_rate": 1.6577877998637071e-06, "loss": 0.0514, "step": 186610 }, { "epoch": 3.798880407124682, "grad_norm": 9.836992965087155, "learning_rate": 1.6572593386388254e-06, "loss": 0.0303, "step": 186620 }, { "epoch": 3.7990839694656486, "grad_norm": 0.05807188249398936, "learning_rate": 1.6567309449248968e-06, "loss": 0.0285, "step": 186630 }, { "epoch": 3.7992875318066157, "grad_norm": 1.0672238292606184, "learning_rate": 1.6562026187325936e-06, "loss": 0.0038, "step": 186640 }, { "epoch": 3.799491094147583, "grad_norm": 0.06410611667844747, "learning_rate": 1.6556743600725844e-06, "loss": 0.0011, "step": 186650 }, { "epoch": 3.7996946564885494, "grad_norm": 0.004636546669409004, "learning_rate": 1.6551461689555392e-06, "loss": 0.015, "step": 186660 }, { "epoch": 3.7998982188295165, "grad_norm": 26.603194374159603, "learning_rate": 1.6546180453921256e-06, "loss": 0.119, "step": 186670 }, { "epoch": 3.8001017811704836, "grad_norm": 0.2011270578356755, "learning_rate": 1.6540899893930108e-06, "loss": 0.0429, "step": 186680 }, { "epoch": 3.80030534351145, "grad_norm": 1.9848928832087274, "learning_rate": 1.6535620009688546e-06, "loss": 0.0361, "step": 186690 }, { "epoch": 3.8005089058524173, "grad_norm": 0.025469776494885465, "learning_rate": 1.6530340801303262e-06, "loss": 0.0006, "step": 186700 }, { "epoch": 3.8007124681933844, "grad_norm": 0.059263951772160074, "learning_rate": 1.6525062268880865e-06, "loss": 0.0355, "step": 186710 }, { "epoch": 3.800916030534351, "grad_norm": 0.03894007440498038, "learning_rate": 1.6519784412527922e-06, "loss": 0.0741, "step": 186720 }, { "epoch": 3.801119592875318, "grad_norm": 0.14680461929163277, "learning_rate": 1.6514507232351085e-06, "loss": 0.0844, "step": 186730 }, { "epoch": 3.801323155216285, "grad_norm": 0.1042468025855791, "learning_rate": 1.6509230728456887e-06, "loss": 0.0014, "step": 186740 }, { "epoch": 3.801526717557252, "grad_norm": 7.767961185738172, "learning_rate": 1.6503954900951896e-06, "loss": 0.0569, "step": 186750 }, { "epoch": 3.801730279898219, "grad_norm": 8.301143051691549, "learning_rate": 1.649867974994271e-06, "loss": 0.0575, "step": 186760 }, { "epoch": 3.801933842239186, "grad_norm": 17.50648407664929, "learning_rate": 1.6493405275535817e-06, "loss": 0.0662, "step": 186770 }, { "epoch": 3.8021374045801526, "grad_norm": 0.02574427741942387, "learning_rate": 1.6488131477837765e-06, "loss": 0.0477, "step": 186780 }, { "epoch": 3.8023409669211197, "grad_norm": 0.030031068682235578, "learning_rate": 1.648285835695506e-06, "loss": 0.0615, "step": 186790 }, { "epoch": 3.8025445292620867, "grad_norm": 26.984407952620494, "learning_rate": 1.6477585912994197e-06, "loss": 0.1292, "step": 186800 }, { "epoch": 3.8027480916030534, "grad_norm": 0.3351522385000783, "learning_rate": 1.647231414606167e-06, "loss": 0.0738, "step": 186810 }, { "epoch": 3.8029516539440205, "grad_norm": 7.325609891789582, "learning_rate": 1.6467043056263937e-06, "loss": 0.1721, "step": 186820 }, { "epoch": 3.8031552162849875, "grad_norm": 0.05584691691054302, "learning_rate": 1.6461772643707458e-06, "loss": 0.0688, "step": 186830 }, { "epoch": 3.803358778625954, "grad_norm": 0.12391646656233136, "learning_rate": 1.6456502908498678e-06, "loss": 0.0382, "step": 186840 }, { "epoch": 3.8035623409669213, "grad_norm": 13.29951446678721, "learning_rate": 1.6451233850744031e-06, "loss": 0.0821, "step": 186850 }, { "epoch": 3.803765903307888, "grad_norm": 8.571112797113397, "learning_rate": 1.6445965470549919e-06, "loss": 0.0755, "step": 186860 }, { "epoch": 3.803969465648855, "grad_norm": 0.011228732778436617, "learning_rate": 1.6440697768022756e-06, "loss": 0.1381, "step": 186870 }, { "epoch": 3.804173027989822, "grad_norm": 0.09246928598114572, "learning_rate": 1.6435430743268927e-06, "loss": 0.0202, "step": 186880 }, { "epoch": 3.8043765903307887, "grad_norm": 0.00030610632703936044, "learning_rate": 1.6430164396394815e-06, "loss": 0.0403, "step": 186890 }, { "epoch": 3.8045801526717558, "grad_norm": 0.12474918937475084, "learning_rate": 1.642489872750676e-06, "loss": 0.001, "step": 186900 }, { "epoch": 3.8047837150127224, "grad_norm": 0.09771543913136095, "learning_rate": 1.6419633736711105e-06, "loss": 0.0244, "step": 186910 }, { "epoch": 3.8049872773536895, "grad_norm": 0.019045855829366604, "learning_rate": 1.6414369424114224e-06, "loss": 0.0711, "step": 186920 }, { "epoch": 3.8051908396946565, "grad_norm": 0.07228355692887128, "learning_rate": 1.64091057898224e-06, "loss": 0.0446, "step": 186930 }, { "epoch": 3.805394402035623, "grad_norm": 0.10326949307858573, "learning_rate": 1.6403842833941929e-06, "loss": 0.0396, "step": 186940 }, { "epoch": 3.8055979643765903, "grad_norm": 0.0019544231075487193, "learning_rate": 1.6398580556579159e-06, "loss": 0.0929, "step": 186950 }, { "epoch": 3.8058015267175573, "grad_norm": 0.03969615692433963, "learning_rate": 1.6393318957840316e-06, "loss": 0.0569, "step": 186960 }, { "epoch": 3.806005089058524, "grad_norm": 0.0034982669515615026, "learning_rate": 1.6388058037831678e-06, "loss": 0.0053, "step": 186970 }, { "epoch": 3.806208651399491, "grad_norm": 0.43118559873678297, "learning_rate": 1.6382797796659505e-06, "loss": 0.0263, "step": 186980 }, { "epoch": 3.806412213740458, "grad_norm": 15.939396138478692, "learning_rate": 1.6377538234430024e-06, "loss": 0.0175, "step": 186990 }, { "epoch": 3.8066157760814248, "grad_norm": 0.31344290772957567, "learning_rate": 1.637227935124947e-06, "loss": 0.0415, "step": 187000 }, { "epoch": 3.806819338422392, "grad_norm": 0.01973188669503257, "learning_rate": 1.6367021147224043e-06, "loss": 0.0865, "step": 187010 }, { "epoch": 3.807022900763359, "grad_norm": 0.3966296386267726, "learning_rate": 1.6361763622459942e-06, "loss": 0.0409, "step": 187020 }, { "epoch": 3.8072264631043256, "grad_norm": 0.14904935288555837, "learning_rate": 1.6356506777063352e-06, "loss": 0.0084, "step": 187030 }, { "epoch": 3.8074300254452926, "grad_norm": 45.94969957354935, "learning_rate": 1.635125061114044e-06, "loss": 0.0921, "step": 187040 }, { "epoch": 3.8076335877862597, "grad_norm": 0.018735393365269765, "learning_rate": 1.6345995124797376e-06, "loss": 0.0646, "step": 187050 }, { "epoch": 3.8078371501272263, "grad_norm": 0.5928800270041283, "learning_rate": 1.6340740318140259e-06, "loss": 0.0473, "step": 187060 }, { "epoch": 3.8080407124681934, "grad_norm": 0.0007943163369249785, "learning_rate": 1.6335486191275252e-06, "loss": 0.0699, "step": 187070 }, { "epoch": 3.8082442748091605, "grad_norm": 19.71138370067042, "learning_rate": 1.6330232744308482e-06, "loss": 0.0612, "step": 187080 }, { "epoch": 3.808447837150127, "grad_norm": 0.5107195172699112, "learning_rate": 1.6324979977345996e-06, "loss": 0.0258, "step": 187090 }, { "epoch": 3.808651399491094, "grad_norm": 0.08074596548019594, "learning_rate": 1.6319727890493948e-06, "loss": 0.0831, "step": 187100 }, { "epoch": 3.8088549618320613, "grad_norm": 0.004980142638664681, "learning_rate": 1.631447648385835e-06, "loss": 0.098, "step": 187110 }, { "epoch": 3.809058524173028, "grad_norm": 0.0254756238945312, "learning_rate": 1.6309225757545278e-06, "loss": 0.0015, "step": 187120 }, { "epoch": 3.809262086513995, "grad_norm": 0.011424367321938612, "learning_rate": 1.6303975711660814e-06, "loss": 0.0075, "step": 187130 }, { "epoch": 3.809465648854962, "grad_norm": 0.042321619242077245, "learning_rate": 1.6298726346310945e-06, "loss": 0.076, "step": 187140 }, { "epoch": 3.8096692111959287, "grad_norm": 0.029477161564703577, "learning_rate": 1.6293477661601687e-06, "loss": 0.0809, "step": 187150 }, { "epoch": 3.809872773536896, "grad_norm": 0.037333360772645, "learning_rate": 1.6288229657639093e-06, "loss": 0.0678, "step": 187160 }, { "epoch": 3.8100763358778624, "grad_norm": 0.014119893353066537, "learning_rate": 1.6282982334529106e-06, "loss": 0.0741, "step": 187170 }, { "epoch": 3.8102798982188295, "grad_norm": 0.12069575650290919, "learning_rate": 1.6277735692377722e-06, "loss": 0.0013, "step": 187180 }, { "epoch": 3.8104834605597966, "grad_norm": 0.00827374356710684, "learning_rate": 1.6272489731290897e-06, "loss": 0.0527, "step": 187190 }, { "epoch": 3.810687022900763, "grad_norm": 11.476069018258249, "learning_rate": 1.626724445137458e-06, "loss": 0.0147, "step": 187200 }, { "epoch": 3.8108905852417303, "grad_norm": 0.6067680325696481, "learning_rate": 1.6261999852734712e-06, "loss": 0.0682, "step": 187210 }, { "epoch": 3.811094147582697, "grad_norm": 0.12025234553240352, "learning_rate": 1.6256755935477214e-06, "loss": 0.0073, "step": 187220 }, { "epoch": 3.811297709923664, "grad_norm": 0.061192862166626054, "learning_rate": 1.6251512699707983e-06, "loss": 0.0568, "step": 187230 }, { "epoch": 3.811501272264631, "grad_norm": 12.510950204343866, "learning_rate": 1.6246270145532928e-06, "loss": 0.1054, "step": 187240 }, { "epoch": 3.8117048346055977, "grad_norm": 0.061895642488954156, "learning_rate": 1.6241028273057918e-06, "loss": 0.0614, "step": 187250 }, { "epoch": 3.811908396946565, "grad_norm": 0.042054983889618314, "learning_rate": 1.6235787082388837e-06, "loss": 0.0199, "step": 187260 }, { "epoch": 3.812111959287532, "grad_norm": 0.016984856071122104, "learning_rate": 1.6230546573631494e-06, "loss": 0.0605, "step": 187270 }, { "epoch": 3.8123155216284985, "grad_norm": 0.4838280370658368, "learning_rate": 1.6225306746891778e-06, "loss": 0.0518, "step": 187280 }, { "epoch": 3.8125190839694656, "grad_norm": 0.1291741066912304, "learning_rate": 1.6220067602275508e-06, "loss": 0.0019, "step": 187290 }, { "epoch": 3.8127226463104327, "grad_norm": 6.123478832050308, "learning_rate": 1.6214829139888445e-06, "loss": 0.0159, "step": 187300 }, { "epoch": 3.8129262086513993, "grad_norm": 0.0608629386677174, "learning_rate": 1.620959135983644e-06, "loss": 0.0811, "step": 187310 }, { "epoch": 3.8131297709923664, "grad_norm": 2.677900864143249, "learning_rate": 1.6204354262225276e-06, "loss": 0.0077, "step": 187320 }, { "epoch": 3.8133333333333335, "grad_norm": 7.267010381021379, "learning_rate": 1.6199117847160689e-06, "loss": 0.0711, "step": 187330 }, { "epoch": 3.8135368956743, "grad_norm": 1.1540781762721568, "learning_rate": 1.6193882114748454e-06, "loss": 0.0082, "step": 187340 }, { "epoch": 3.813740458015267, "grad_norm": 0.07944402039524019, "learning_rate": 1.6188647065094314e-06, "loss": 0.0029, "step": 187350 }, { "epoch": 3.8139440203562343, "grad_norm": 0.0019510243785744803, "learning_rate": 1.618341269830399e-06, "loss": 0.0016, "step": 187360 }, { "epoch": 3.814147582697201, "grad_norm": 0.03491114636419729, "learning_rate": 1.6178179014483204e-06, "loss": 0.0117, "step": 187370 }, { "epoch": 3.814351145038168, "grad_norm": 0.04532121840042199, "learning_rate": 1.6172946013737655e-06, "loss": 0.0964, "step": 187380 }, { "epoch": 3.814554707379135, "grad_norm": 0.11609640940693844, "learning_rate": 1.6167713696173031e-06, "loss": 0.0928, "step": 187390 }, { "epoch": 3.8147582697201017, "grad_norm": 0.15915481949022114, "learning_rate": 1.6162482061895008e-06, "loss": 0.0691, "step": 187400 }, { "epoch": 3.8149618320610688, "grad_norm": 0.08776867294269532, "learning_rate": 1.6157251111009237e-06, "loss": 0.0085, "step": 187410 }, { "epoch": 3.815165394402036, "grad_norm": 0.011404079101786186, "learning_rate": 1.6152020843621368e-06, "loss": 0.0004, "step": 187420 }, { "epoch": 3.8153689567430025, "grad_norm": 0.2940770161251918, "learning_rate": 1.6146791259837036e-06, "loss": 0.029, "step": 187430 }, { "epoch": 3.8155725190839695, "grad_norm": 0.016702617765692967, "learning_rate": 1.6141562359761859e-06, "loss": 0.0343, "step": 187440 }, { "epoch": 3.8157760814249366, "grad_norm": 0.03400596484129547, "learning_rate": 1.6136334143501454e-06, "loss": 0.0296, "step": 187450 }, { "epoch": 3.8159796437659033, "grad_norm": 0.016072313868738725, "learning_rate": 1.6131106611161361e-06, "loss": 0.0154, "step": 187460 }, { "epoch": 3.8161832061068703, "grad_norm": 15.64109637961748, "learning_rate": 1.612587976284723e-06, "loss": 0.1636, "step": 187470 }, { "epoch": 3.8163867684478374, "grad_norm": 0.012022905736412562, "learning_rate": 1.6120653598664571e-06, "loss": 0.0011, "step": 187480 }, { "epoch": 3.816590330788804, "grad_norm": 0.015522160015121111, "learning_rate": 1.6115428118718929e-06, "loss": 0.0372, "step": 187490 }, { "epoch": 3.816793893129771, "grad_norm": 0.01587422203564245, "learning_rate": 1.611020332311589e-06, "loss": 0.0391, "step": 187500 }, { "epoch": 3.8169974554707378, "grad_norm": 0.30243520884968444, "learning_rate": 1.6104979211960937e-06, "loss": 0.0165, "step": 187510 }, { "epoch": 3.817201017811705, "grad_norm": 0.12868216692788265, "learning_rate": 1.609975578535956e-06, "loss": 0.0682, "step": 187520 }, { "epoch": 3.8174045801526715, "grad_norm": 0.10991454007374298, "learning_rate": 1.6094533043417314e-06, "loss": 0.0355, "step": 187530 }, { "epoch": 3.8176081424936386, "grad_norm": 0.05589965834348096, "learning_rate": 1.6089310986239631e-06, "loss": 0.0222, "step": 187540 }, { "epoch": 3.8178117048346056, "grad_norm": 0.014528642970631245, "learning_rate": 1.6084089613931985e-06, "loss": 0.0601, "step": 187550 }, { "epoch": 3.8180152671755723, "grad_norm": 0.09397644550020907, "learning_rate": 1.6078868926599833e-06, "loss": 0.0025, "step": 187560 }, { "epoch": 3.8182188295165393, "grad_norm": 0.012231051039808696, "learning_rate": 1.6073648924348612e-06, "loss": 0.0123, "step": 187570 }, { "epoch": 3.8184223918575064, "grad_norm": 0.27387920086635337, "learning_rate": 1.6068429607283753e-06, "loss": 0.0349, "step": 187580 }, { "epoch": 3.818625954198473, "grad_norm": 0.09621031509131604, "learning_rate": 1.6063210975510656e-06, "loss": 0.0257, "step": 187590 }, { "epoch": 3.81882951653944, "grad_norm": 1.5003283697995242, "learning_rate": 1.6057993029134723e-06, "loss": 0.0166, "step": 187600 }, { "epoch": 3.819033078880407, "grad_norm": 0.036367643189171926, "learning_rate": 1.6052775768261342e-06, "loss": 0.0042, "step": 187610 }, { "epoch": 3.819236641221374, "grad_norm": 0.06550045726359924, "learning_rate": 1.6047559192995877e-06, "loss": 0.1248, "step": 187620 }, { "epoch": 3.819440203562341, "grad_norm": 0.007716105185929172, "learning_rate": 1.6042343303443698e-06, "loss": 0.0699, "step": 187630 }, { "epoch": 3.819643765903308, "grad_norm": 0.0072303659911774985, "learning_rate": 1.6037128099710104e-06, "loss": 0.0003, "step": 187640 }, { "epoch": 3.8198473282442746, "grad_norm": 0.6892349714793875, "learning_rate": 1.6031913581900466e-06, "loss": 0.0514, "step": 187650 }, { "epoch": 3.8200508905852417, "grad_norm": 0.1199634408949693, "learning_rate": 1.60266997501201e-06, "loss": 0.0039, "step": 187660 }, { "epoch": 3.820254452926209, "grad_norm": 0.10104698069265118, "learning_rate": 1.6021486604474257e-06, "loss": 0.2184, "step": 187670 }, { "epoch": 3.8204580152671754, "grad_norm": 0.011228605171426235, "learning_rate": 1.6016274145068272e-06, "loss": 0.1609, "step": 187680 }, { "epoch": 3.8206615776081425, "grad_norm": 0.08237331102089233, "learning_rate": 1.6011062372007424e-06, "loss": 0.1018, "step": 187690 }, { "epoch": 3.8208651399491096, "grad_norm": 0.0054320780230271235, "learning_rate": 1.6005851285396907e-06, "loss": 0.0291, "step": 187700 }, { "epoch": 3.8210687022900762, "grad_norm": 0.00836455821031789, "learning_rate": 1.6000640885342045e-06, "loss": 0.0039, "step": 187710 }, { "epoch": 3.8212722646310433, "grad_norm": 0.022044053041360936, "learning_rate": 1.5995431171948016e-06, "loss": 0.0508, "step": 187720 }, { "epoch": 3.8214758269720104, "grad_norm": 0.15408437284737736, "learning_rate": 1.5990222145320034e-06, "loss": 0.0097, "step": 187730 }, { "epoch": 3.821679389312977, "grad_norm": 0.04232191182341888, "learning_rate": 1.5985013805563348e-06, "loss": 0.0125, "step": 187740 }, { "epoch": 3.821882951653944, "grad_norm": 14.339912207347174, "learning_rate": 1.5979806152783112e-06, "loss": 0.0574, "step": 187750 }, { "epoch": 3.822086513994911, "grad_norm": 24.3525039129498, "learning_rate": 1.59745991870845e-06, "loss": 0.0106, "step": 187760 }, { "epoch": 3.822290076335878, "grad_norm": 0.011509149520283066, "learning_rate": 1.596939290857269e-06, "loss": 0.0704, "step": 187770 }, { "epoch": 3.822493638676845, "grad_norm": 1.1148655007714328, "learning_rate": 1.5964187317352813e-06, "loss": 0.1337, "step": 187780 }, { "epoch": 3.822697201017812, "grad_norm": 0.011730101827123538, "learning_rate": 1.5958982413530017e-06, "loss": 0.0334, "step": 187790 }, { "epoch": 3.8229007633587786, "grad_norm": 47.69304980042902, "learning_rate": 1.5953778197209412e-06, "loss": 0.1391, "step": 187800 }, { "epoch": 3.8231043256997457, "grad_norm": 0.033511274655163785, "learning_rate": 1.594857466849611e-06, "loss": 0.0017, "step": 187810 }, { "epoch": 3.8233078880407123, "grad_norm": 0.07489004155059698, "learning_rate": 1.5943371827495197e-06, "loss": 0.041, "step": 187820 }, { "epoch": 3.8235114503816794, "grad_norm": 0.03796609436295538, "learning_rate": 1.593816967431176e-06, "loss": 0.0296, "step": 187830 }, { "epoch": 3.8237150127226465, "grad_norm": 0.007960097503802199, "learning_rate": 1.593296820905087e-06, "loss": 0.0005, "step": 187840 }, { "epoch": 3.823918575063613, "grad_norm": 0.036584424819716514, "learning_rate": 1.5927767431817532e-06, "loss": 0.0519, "step": 187850 }, { "epoch": 3.82412213740458, "grad_norm": 0.14950099256533295, "learning_rate": 1.5922567342716832e-06, "loss": 0.0387, "step": 187860 }, { "epoch": 3.824325699745547, "grad_norm": 0.06560056353939271, "learning_rate": 1.5917367941853795e-06, "loss": 0.0102, "step": 187870 }, { "epoch": 3.824529262086514, "grad_norm": 2.032571910356811, "learning_rate": 1.5912169229333391e-06, "loss": 0.0349, "step": 187880 }, { "epoch": 3.824732824427481, "grad_norm": 1.361684411976872, "learning_rate": 1.5906971205260623e-06, "loss": 0.0007, "step": 187890 }, { "epoch": 3.8249363867684476, "grad_norm": 0.4225095626981663, "learning_rate": 1.5901773869740512e-06, "loss": 0.0871, "step": 187900 }, { "epoch": 3.8251399491094147, "grad_norm": 11.132114235823314, "learning_rate": 1.5896577222877978e-06, "loss": 0.0228, "step": 187910 }, { "epoch": 3.8253435114503818, "grad_norm": 0.2044832796942261, "learning_rate": 1.5891381264777994e-06, "loss": 0.0179, "step": 187920 }, { "epoch": 3.8255470737913484, "grad_norm": 0.0012721842927214423, "learning_rate": 1.5886185995545494e-06, "loss": 0.0348, "step": 187930 }, { "epoch": 3.8257506361323155, "grad_norm": 10.365853031265784, "learning_rate": 1.5880991415285412e-06, "loss": 0.123, "step": 187940 }, { "epoch": 3.8259541984732826, "grad_norm": 0.09570695776930802, "learning_rate": 1.587579752410265e-06, "loss": 0.0705, "step": 187950 }, { "epoch": 3.826157760814249, "grad_norm": 0.06735184297580162, "learning_rate": 1.5870604322102108e-06, "loss": 0.0742, "step": 187960 }, { "epoch": 3.8263613231552163, "grad_norm": 0.014983294754169294, "learning_rate": 1.586541180938867e-06, "loss": 0.1857, "step": 187970 }, { "epoch": 3.8265648854961833, "grad_norm": 0.0526662410578001, "learning_rate": 1.5860219986067205e-06, "loss": 0.0954, "step": 187980 }, { "epoch": 3.82676844783715, "grad_norm": 0.02952107227024824, "learning_rate": 1.5855028852242571e-06, "loss": 0.105, "step": 187990 }, { "epoch": 3.826972010178117, "grad_norm": 38.385172983347424, "learning_rate": 1.5849838408019607e-06, "loss": 0.0317, "step": 188000 }, { "epoch": 3.827175572519084, "grad_norm": 20.428715973637328, "learning_rate": 1.5844648653503141e-06, "loss": 0.071, "step": 188010 }, { "epoch": 3.8273791348600508, "grad_norm": 0.09839667904672857, "learning_rate": 1.583945958879799e-06, "loss": 0.0025, "step": 188020 }, { "epoch": 3.827582697201018, "grad_norm": 0.1719125179978118, "learning_rate": 1.5834271214008962e-06, "loss": 0.0727, "step": 188030 }, { "epoch": 3.827786259541985, "grad_norm": 0.01985433578495846, "learning_rate": 1.5829083529240803e-06, "loss": 0.027, "step": 188040 }, { "epoch": 3.8279898218829516, "grad_norm": 0.010763233026974078, "learning_rate": 1.5823896534598332e-06, "loss": 0.1065, "step": 188050 }, { "epoch": 3.8281933842239186, "grad_norm": 0.029392877368413922, "learning_rate": 1.5818710230186301e-06, "loss": 0.0037, "step": 188060 }, { "epoch": 3.8283969465648857, "grad_norm": 0.03433515004340303, "learning_rate": 1.581352461610941e-06, "loss": 0.0185, "step": 188070 }, { "epoch": 3.8286005089058524, "grad_norm": 0.05365685447499735, "learning_rate": 1.5808339692472458e-06, "loss": 0.0686, "step": 188080 }, { "epoch": 3.8288040712468194, "grad_norm": 0.05662629862548086, "learning_rate": 1.5803155459380103e-06, "loss": 0.03, "step": 188090 }, { "epoch": 3.8290076335877865, "grad_norm": 0.03842671536577682, "learning_rate": 1.5797971916937056e-06, "loss": 0.0008, "step": 188100 }, { "epoch": 3.829211195928753, "grad_norm": 0.012551238021108751, "learning_rate": 1.5792789065248049e-06, "loss": 0.0044, "step": 188110 }, { "epoch": 3.82941475826972, "grad_norm": 0.044465807891767836, "learning_rate": 1.5787606904417706e-06, "loss": 0.0732, "step": 188120 }, { "epoch": 3.8296183206106873, "grad_norm": 0.12803310529862358, "learning_rate": 1.5782425434550712e-06, "loss": 0.056, "step": 188130 }, { "epoch": 3.829821882951654, "grad_norm": 0.0007050094669061247, "learning_rate": 1.5777244655751706e-06, "loss": 0.0799, "step": 188140 }, { "epoch": 3.830025445292621, "grad_norm": 4.207042296551647, "learning_rate": 1.5772064568125323e-06, "loss": 0.0687, "step": 188150 }, { "epoch": 3.8302290076335876, "grad_norm": 0.025921822013225303, "learning_rate": 1.576688517177618e-06, "loss": 0.0004, "step": 188160 }, { "epoch": 3.8304325699745547, "grad_norm": 6.352564949171455, "learning_rate": 1.5761706466808886e-06, "loss": 0.0341, "step": 188170 }, { "epoch": 3.8306361323155214, "grad_norm": 17.146713380103666, "learning_rate": 1.575652845332803e-06, "loss": 0.1026, "step": 188180 }, { "epoch": 3.8308396946564884, "grad_norm": 0.02448096647902847, "learning_rate": 1.5751351131438181e-06, "loss": 0.0462, "step": 188190 }, { "epoch": 3.8310432569974555, "grad_norm": 0.05849360374034492, "learning_rate": 1.5746174501243911e-06, "loss": 0.0079, "step": 188200 }, { "epoch": 3.831246819338422, "grad_norm": 0.07694242301366862, "learning_rate": 1.5740998562849779e-06, "loss": 0.0531, "step": 188210 }, { "epoch": 3.8314503816793892, "grad_norm": 0.6261400726524893, "learning_rate": 1.5735823316360277e-06, "loss": 0.0839, "step": 188220 }, { "epoch": 3.8316539440203563, "grad_norm": 0.03088170000888555, "learning_rate": 1.5730648761879974e-06, "loss": 0.0184, "step": 188230 }, { "epoch": 3.831857506361323, "grad_norm": 0.025549344668148483, "learning_rate": 1.5725474899513372e-06, "loss": 0.1122, "step": 188240 }, { "epoch": 3.83206106870229, "grad_norm": 0.07361161664523788, "learning_rate": 1.5720301729364912e-06, "loss": 0.1819, "step": 188250 }, { "epoch": 3.832264631043257, "grad_norm": 22.926539687465528, "learning_rate": 1.571512925153913e-06, "loss": 0.1287, "step": 188260 }, { "epoch": 3.8324681933842237, "grad_norm": 0.07712132476100393, "learning_rate": 1.570995746614049e-06, "loss": 0.0476, "step": 188270 }, { "epoch": 3.832671755725191, "grad_norm": 0.07896521852477917, "learning_rate": 1.570478637327339e-06, "loss": 0.0371, "step": 188280 }, { "epoch": 3.832875318066158, "grad_norm": 22.35014923369487, "learning_rate": 1.5699615973042332e-06, "loss": 0.1538, "step": 188290 }, { "epoch": 3.8330788804071245, "grad_norm": 1.3000481997874758, "learning_rate": 1.5694446265551694e-06, "loss": 0.0925, "step": 188300 }, { "epoch": 3.8332824427480916, "grad_norm": 0.07689418366604518, "learning_rate": 1.56892772509059e-06, "loss": 0.0518, "step": 188310 }, { "epoch": 3.8334860050890587, "grad_norm": 0.5684896560304382, "learning_rate": 1.568410892920934e-06, "loss": 0.0606, "step": 188320 }, { "epoch": 3.8336895674300253, "grad_norm": 78.88180665175388, "learning_rate": 1.5678941300566402e-06, "loss": 0.087, "step": 188330 }, { "epoch": 3.8338931297709924, "grad_norm": 0.19396711742214243, "learning_rate": 1.5673774365081445e-06, "loss": 0.0179, "step": 188340 }, { "epoch": 3.8340966921119595, "grad_norm": 0.3207790958142544, "learning_rate": 1.566860812285883e-06, "loss": 0.0012, "step": 188350 }, { "epoch": 3.834300254452926, "grad_norm": 0.07384564063124748, "learning_rate": 1.5663442574002896e-06, "loss": 0.0395, "step": 188360 }, { "epoch": 3.834503816793893, "grad_norm": 0.03263348632978881, "learning_rate": 1.5658277718617958e-06, "loss": 0.0449, "step": 188370 }, { "epoch": 3.8347073791348603, "grad_norm": 8.32397382475503, "learning_rate": 1.5653113556808335e-06, "loss": 0.0806, "step": 188380 }, { "epoch": 3.834910941475827, "grad_norm": 1.1496809323676405, "learning_rate": 1.5647950088678326e-06, "loss": 0.0545, "step": 188390 }, { "epoch": 3.835114503816794, "grad_norm": 0.020433548942345343, "learning_rate": 1.5642787314332207e-06, "loss": 0.0278, "step": 188400 }, { "epoch": 3.835318066157761, "grad_norm": 0.1086822076703559, "learning_rate": 1.5637625233874249e-06, "loss": 0.0757, "step": 188410 }, { "epoch": 3.8355216284987277, "grad_norm": 0.025540361749546762, "learning_rate": 1.5632463847408714e-06, "loss": 0.082, "step": 188420 }, { "epoch": 3.8357251908396948, "grad_norm": 0.062443408766271746, "learning_rate": 1.5627303155039847e-06, "loss": 0.0193, "step": 188430 }, { "epoch": 3.835928753180662, "grad_norm": 0.05543470316103887, "learning_rate": 1.5622143156871832e-06, "loss": 0.0547, "step": 188440 }, { "epoch": 3.8361323155216285, "grad_norm": 0.05754494176550897, "learning_rate": 1.5616983853008942e-06, "loss": 0.0656, "step": 188450 }, { "epoch": 3.8363358778625956, "grad_norm": 0.07510555768836268, "learning_rate": 1.561182524355534e-06, "loss": 0.0537, "step": 188460 }, { "epoch": 3.836539440203562, "grad_norm": 0.2373615904945421, "learning_rate": 1.56066673286152e-06, "loss": 0.0015, "step": 188470 }, { "epoch": 3.8367430025445293, "grad_norm": 1.6418706538330738, "learning_rate": 1.5601510108292745e-06, "loss": 0.0859, "step": 188480 }, { "epoch": 3.8369465648854963, "grad_norm": 0.0592016711346583, "learning_rate": 1.559635358269208e-06, "loss": 0.0637, "step": 188490 }, { "epoch": 3.837150127226463, "grad_norm": 7.586266110245852, "learning_rate": 1.559119775191737e-06, "loss": 0.1453, "step": 188500 }, { "epoch": 3.83735368956743, "grad_norm": 0.21615810801184088, "learning_rate": 1.5586042616072738e-06, "loss": 0.0682, "step": 188510 }, { "epoch": 3.8375572519083967, "grad_norm": 0.06421686834325553, "learning_rate": 1.55808881752623e-06, "loss": 0.0275, "step": 188520 }, { "epoch": 3.8377608142493638, "grad_norm": 0.03521375872347338, "learning_rate": 1.557573442959015e-06, "loss": 0.0507, "step": 188530 }, { "epoch": 3.837964376590331, "grad_norm": 3.180775734361886, "learning_rate": 1.5570581379160388e-06, "loss": 0.0181, "step": 188540 }, { "epoch": 3.8381679389312975, "grad_norm": 0.05299462678651697, "learning_rate": 1.5565429024077083e-06, "loss": 0.0039, "step": 188550 }, { "epoch": 3.8383715012722646, "grad_norm": 15.532057513667718, "learning_rate": 1.5560277364444283e-06, "loss": 0.0127, "step": 188560 }, { "epoch": 3.8385750636132316, "grad_norm": 0.02282048422087988, "learning_rate": 1.5555126400366044e-06, "loss": 0.0298, "step": 188570 }, { "epoch": 3.8387786259541983, "grad_norm": 0.08912490341382667, "learning_rate": 1.554997613194641e-06, "loss": 0.0101, "step": 188580 }, { "epoch": 3.8389821882951654, "grad_norm": 0.015649859403111, "learning_rate": 1.5544826559289338e-06, "loss": 0.0665, "step": 188590 }, { "epoch": 3.8391857506361324, "grad_norm": 0.07113817924917423, "learning_rate": 1.55396776824989e-06, "loss": 0.073, "step": 188600 }, { "epoch": 3.839389312977099, "grad_norm": 0.019413294267172223, "learning_rate": 1.5534529501679063e-06, "loss": 0.001, "step": 188610 }, { "epoch": 3.839592875318066, "grad_norm": 0.1875535326893976, "learning_rate": 1.5529382016933765e-06, "loss": 0.0012, "step": 188620 }, { "epoch": 3.8397964376590332, "grad_norm": 0.05678583340766136, "learning_rate": 1.5524235228367008e-06, "loss": 0.029, "step": 188630 }, { "epoch": 3.84, "grad_norm": 0.10646428241764858, "learning_rate": 1.5519089136082744e-06, "loss": 0.0012, "step": 188640 }, { "epoch": 3.840203562340967, "grad_norm": 0.08653200824374366, "learning_rate": 1.5513943740184857e-06, "loss": 0.0521, "step": 188650 }, { "epoch": 3.840407124681934, "grad_norm": 0.011804415914174844, "learning_rate": 1.5508799040777318e-06, "loss": 0.0027, "step": 188660 }, { "epoch": 3.8406106870229006, "grad_norm": 0.05901025769787914, "learning_rate": 1.5503655037963994e-06, "loss": 0.0149, "step": 188670 }, { "epoch": 3.8408142493638677, "grad_norm": 13.568331666630874, "learning_rate": 1.5498511731848776e-06, "loss": 0.0749, "step": 188680 }, { "epoch": 3.841017811704835, "grad_norm": 0.5534937600009486, "learning_rate": 1.5493369122535578e-06, "loss": 0.02, "step": 188690 }, { "epoch": 3.8412213740458014, "grad_norm": 0.08284294927384271, "learning_rate": 1.548822721012822e-06, "loss": 0.0588, "step": 188700 }, { "epoch": 3.8414249363867685, "grad_norm": 0.012371323419815334, "learning_rate": 1.5483085994730552e-06, "loss": 0.0453, "step": 188710 }, { "epoch": 3.8416284987277356, "grad_norm": 8.21235556620429, "learning_rate": 1.5477945476446442e-06, "loss": 0.0631, "step": 188720 }, { "epoch": 3.8418320610687022, "grad_norm": 0.09579602788668015, "learning_rate": 1.547280565537968e-06, "loss": 0.1248, "step": 188730 }, { "epoch": 3.8420356234096693, "grad_norm": 0.06030092276797219, "learning_rate": 1.546766653163408e-06, "loss": 0.058, "step": 188740 }, { "epoch": 3.8422391857506364, "grad_norm": 2.509435836244782, "learning_rate": 1.5462528105313423e-06, "loss": 0.0161, "step": 188750 }, { "epoch": 3.842442748091603, "grad_norm": 12.884520223652691, "learning_rate": 1.5457390376521503e-06, "loss": 0.0557, "step": 188760 }, { "epoch": 3.84264631043257, "grad_norm": 10.962900151314575, "learning_rate": 1.5452253345362073e-06, "loss": 0.0551, "step": 188770 }, { "epoch": 3.8428498727735367, "grad_norm": 14.212302614427069, "learning_rate": 1.5447117011938877e-06, "loss": 0.0256, "step": 188780 }, { "epoch": 3.843053435114504, "grad_norm": 0.0204807406712546, "learning_rate": 1.5441981376355664e-06, "loss": 0.0177, "step": 188790 }, { "epoch": 3.843256997455471, "grad_norm": 0.1786895951072169, "learning_rate": 1.5436846438716147e-06, "loss": 0.011, "step": 188800 }, { "epoch": 3.8434605597964375, "grad_norm": 11.667051098665192, "learning_rate": 1.5431712199124033e-06, "loss": 0.0452, "step": 188810 }, { "epoch": 3.8436641221374046, "grad_norm": 11.439523677377284, "learning_rate": 1.5426578657683027e-06, "loss": 0.0482, "step": 188820 }, { "epoch": 3.8438676844783712, "grad_norm": 0.19933972993326438, "learning_rate": 1.5421445814496765e-06, "loss": 0.1377, "step": 188830 }, { "epoch": 3.8440712468193383, "grad_norm": 14.465319708598686, "learning_rate": 1.5416313669668958e-06, "loss": 0.0569, "step": 188840 }, { "epoch": 3.8442748091603054, "grad_norm": 0.5334846397583904, "learning_rate": 1.541118222330326e-06, "loss": 0.0016, "step": 188850 }, { "epoch": 3.844478371501272, "grad_norm": 6.538268682638252e-06, "learning_rate": 1.5406051475503265e-06, "loss": 0.0009, "step": 188860 }, { "epoch": 3.844681933842239, "grad_norm": 0.06248737072545955, "learning_rate": 1.5400921426372622e-06, "loss": 0.0246, "step": 188870 }, { "epoch": 3.844885496183206, "grad_norm": 0.05208545744973998, "learning_rate": 1.5395792076014932e-06, "loss": 0.0311, "step": 188880 }, { "epoch": 3.845089058524173, "grad_norm": 32.54552798676914, "learning_rate": 1.5390663424533791e-06, "loss": 0.1029, "step": 188890 }, { "epoch": 3.84529262086514, "grad_norm": 1.6869039342667218, "learning_rate": 1.5385535472032775e-06, "loss": 0.0241, "step": 188900 }, { "epoch": 3.845496183206107, "grad_norm": 0.04896053755831923, "learning_rate": 1.5380408218615457e-06, "loss": 0.0713, "step": 188910 }, { "epoch": 3.8456997455470736, "grad_norm": 0.003402520004361875, "learning_rate": 1.5375281664385384e-06, "loss": 0.0293, "step": 188920 }, { "epoch": 3.8459033078880407, "grad_norm": 0.016955869200352424, "learning_rate": 1.5370155809446097e-06, "loss": 0.0896, "step": 188930 }, { "epoch": 3.8461068702290078, "grad_norm": 0.04137369061450166, "learning_rate": 1.5365030653901114e-06, "loss": 0.0026, "step": 188940 }, { "epoch": 3.8463104325699744, "grad_norm": 0.10680095061728119, "learning_rate": 1.535990619785394e-06, "loss": 0.0611, "step": 188950 }, { "epoch": 3.8465139949109415, "grad_norm": 0.00501028277892385, "learning_rate": 1.5354782441408084e-06, "loss": 0.0344, "step": 188960 }, { "epoch": 3.8467175572519086, "grad_norm": 0.01386948449189308, "learning_rate": 1.534965938466702e-06, "loss": 0.0606, "step": 188970 }, { "epoch": 3.846921119592875, "grad_norm": 11.15900996197697, "learning_rate": 1.5344537027734214e-06, "loss": 0.0548, "step": 188980 }, { "epoch": 3.8471246819338423, "grad_norm": 5.46599804352841, "learning_rate": 1.5339415370713111e-06, "loss": 0.1209, "step": 188990 }, { "epoch": 3.8473282442748094, "grad_norm": 0.0003385831006776304, "learning_rate": 1.5334294413707169e-06, "loss": 0.0443, "step": 189000 }, { "epoch": 3.847531806615776, "grad_norm": 0.04084462297878606, "learning_rate": 1.5329174156819809e-06, "loss": 0.09, "step": 189010 }, { "epoch": 3.847735368956743, "grad_norm": 1.7127324080121744, "learning_rate": 1.5324054600154398e-06, "loss": 0.0606, "step": 189020 }, { "epoch": 3.84793893129771, "grad_norm": 0.08019872452672372, "learning_rate": 1.5318935743814406e-06, "loss": 0.0421, "step": 189030 }, { "epoch": 3.8481424936386768, "grad_norm": 0.003837167587224297, "learning_rate": 1.5313817587903152e-06, "loss": 0.0065, "step": 189040 }, { "epoch": 3.848346055979644, "grad_norm": 31.674164426888268, "learning_rate": 1.5308700132524018e-06, "loss": 0.008, "step": 189050 }, { "epoch": 3.848549618320611, "grad_norm": 0.21127950466995868, "learning_rate": 1.5303583377780395e-06, "loss": 0.0006, "step": 189060 }, { "epoch": 3.8487531806615776, "grad_norm": 2.6288747668947776, "learning_rate": 1.5298467323775578e-06, "loss": 0.0442, "step": 189070 }, { "epoch": 3.8489567430025446, "grad_norm": 0.017185576040435083, "learning_rate": 1.5293351970612896e-06, "loss": 0.0561, "step": 189080 }, { "epoch": 3.8491603053435117, "grad_norm": 0.08709387790771496, "learning_rate": 1.5288237318395698e-06, "loss": 0.1066, "step": 189090 }, { "epoch": 3.8493638676844784, "grad_norm": 0.01736839937496143, "learning_rate": 1.5283123367227248e-06, "loss": 0.0004, "step": 189100 }, { "epoch": 3.8495674300254454, "grad_norm": 0.00753635412041094, "learning_rate": 1.5278010117210834e-06, "loss": 0.0196, "step": 189110 }, { "epoch": 3.849770992366412, "grad_norm": 0.003270357577917086, "learning_rate": 1.5272897568449724e-06, "loss": 0.0052, "step": 189120 }, { "epoch": 3.849974554707379, "grad_norm": 22.71471969844856, "learning_rate": 1.526778572104718e-06, "loss": 0.1045, "step": 189130 }, { "epoch": 3.850178117048346, "grad_norm": 14.805665811209785, "learning_rate": 1.5262674575106435e-06, "loss": 0.1028, "step": 189140 }, { "epoch": 3.850381679389313, "grad_norm": 0.006027299369072413, "learning_rate": 1.5257564130730717e-06, "loss": 0.0257, "step": 189150 }, { "epoch": 3.85058524173028, "grad_norm": 0.016170049401694097, "learning_rate": 1.5252454388023241e-06, "loss": 0.033, "step": 189160 }, { "epoch": 3.8507888040712466, "grad_norm": 1.1527993931207086, "learning_rate": 1.5247345347087206e-06, "loss": 0.0046, "step": 189170 }, { "epoch": 3.8509923664122137, "grad_norm": 0.04425420333353516, "learning_rate": 1.524223700802579e-06, "loss": 0.0023, "step": 189180 }, { "epoch": 3.8511959287531807, "grad_norm": 1.7030849793998795, "learning_rate": 1.5237129370942184e-06, "loss": 0.0258, "step": 189190 }, { "epoch": 3.8513994910941474, "grad_norm": 0.009561478131121572, "learning_rate": 1.523202243593949e-06, "loss": 0.0707, "step": 189200 }, { "epoch": 3.8516030534351144, "grad_norm": 27.37741152931051, "learning_rate": 1.52269162031209e-06, "loss": 0.1624, "step": 189210 }, { "epoch": 3.8518066157760815, "grad_norm": 0.06992568859362121, "learning_rate": 1.5221810672589543e-06, "loss": 0.021, "step": 189220 }, { "epoch": 3.852010178117048, "grad_norm": 14.032760717620691, "learning_rate": 1.5216705844448475e-06, "loss": 0.0959, "step": 189230 }, { "epoch": 3.8522137404580152, "grad_norm": 37.39253831917932, "learning_rate": 1.5211601718800873e-06, "loss": 0.0547, "step": 189240 }, { "epoch": 3.8524173027989823, "grad_norm": 0.02855992662296319, "learning_rate": 1.520649829574976e-06, "loss": 0.0024, "step": 189250 }, { "epoch": 3.852620865139949, "grad_norm": 0.2088392145283031, "learning_rate": 1.5201395575398214e-06, "loss": 0.0646, "step": 189260 }, { "epoch": 3.852824427480916, "grad_norm": 0.027583176800623423, "learning_rate": 1.5196293557849335e-06, "loss": 0.0456, "step": 189270 }, { "epoch": 3.853027989821883, "grad_norm": 1.1518925975995478, "learning_rate": 1.5191192243206122e-06, "loss": 0.0121, "step": 189280 }, { "epoch": 3.8532315521628497, "grad_norm": 0.04883898626807152, "learning_rate": 1.5186091631571609e-06, "loss": 0.031, "step": 189290 }, { "epoch": 3.853435114503817, "grad_norm": 0.02978902346149855, "learning_rate": 1.5180991723048822e-06, "loss": 0.0017, "step": 189300 }, { "epoch": 3.853638676844784, "grad_norm": 0.018445534319499822, "learning_rate": 1.5175892517740748e-06, "loss": 0.0015, "step": 189310 }, { "epoch": 3.8538422391857505, "grad_norm": 0.06748662170894419, "learning_rate": 1.5170794015750384e-06, "loss": 0.0105, "step": 189320 }, { "epoch": 3.8540458015267176, "grad_norm": 0.0258108077258518, "learning_rate": 1.5165696217180686e-06, "loss": 0.0014, "step": 189330 }, { "epoch": 3.8542493638676847, "grad_norm": 0.057167177008525805, "learning_rate": 1.516059912213463e-06, "loss": 0.0423, "step": 189340 }, { "epoch": 3.8544529262086513, "grad_norm": 0.09822971126615161, "learning_rate": 1.5155502730715138e-06, "loss": 0.0584, "step": 189350 }, { "epoch": 3.8546564885496184, "grad_norm": 0.11583804442834925, "learning_rate": 1.515040704302515e-06, "loss": 0.0732, "step": 189360 }, { "epoch": 3.8548600508905855, "grad_norm": 0.0193288042426809, "learning_rate": 1.5145312059167582e-06, "loss": 0.0444, "step": 189370 }, { "epoch": 3.855063613231552, "grad_norm": 0.04486006165418803, "learning_rate": 1.5140217779245326e-06, "loss": 0.038, "step": 189380 }, { "epoch": 3.855267175572519, "grad_norm": 40.35222638395432, "learning_rate": 1.5135124203361274e-06, "loss": 0.026, "step": 189390 }, { "epoch": 3.8554707379134863, "grad_norm": 0.3980959485573497, "learning_rate": 1.5130031331618305e-06, "loss": 0.0008, "step": 189400 }, { "epoch": 3.855674300254453, "grad_norm": 0.03675043792057117, "learning_rate": 1.5124939164119251e-06, "loss": 0.0202, "step": 189410 }, { "epoch": 3.85587786259542, "grad_norm": 0.004727006085972474, "learning_rate": 1.5119847700966955e-06, "loss": 0.0777, "step": 189420 }, { "epoch": 3.8560814249363866, "grad_norm": 0.019833974935360793, "learning_rate": 1.5114756942264292e-06, "loss": 0.0152, "step": 189430 }, { "epoch": 3.8562849872773537, "grad_norm": 0.010048902313128282, "learning_rate": 1.5109666888114028e-06, "loss": 0.048, "step": 189440 }, { "epoch": 3.8564885496183208, "grad_norm": 0.0122764871635325, "learning_rate": 1.5104577538618964e-06, "loss": 0.0673, "step": 189450 }, { "epoch": 3.8566921119592874, "grad_norm": 6.852526272440675, "learning_rate": 1.5099488893881936e-06, "loss": 0.0968, "step": 189460 }, { "epoch": 3.8568956743002545, "grad_norm": 0.12297546044624785, "learning_rate": 1.509440095400566e-06, "loss": 0.0567, "step": 189470 }, { "epoch": 3.857099236641221, "grad_norm": 0.051388627522029016, "learning_rate": 1.5089313719092918e-06, "loss": 0.0338, "step": 189480 }, { "epoch": 3.857302798982188, "grad_norm": 0.030112264899872235, "learning_rate": 1.5084227189246447e-06, "loss": 0.0633, "step": 189490 }, { "epoch": 3.8575063613231553, "grad_norm": 16.35471487103015, "learning_rate": 1.5079141364568983e-06, "loss": 0.1579, "step": 189500 }, { "epoch": 3.857709923664122, "grad_norm": 0.008389762890097581, "learning_rate": 1.5074056245163238e-06, "loss": 0.0529, "step": 189510 }, { "epoch": 3.857913486005089, "grad_norm": 0.015527043680532052, "learning_rate": 1.5068971831131907e-06, "loss": 0.0108, "step": 189520 }, { "epoch": 3.858117048346056, "grad_norm": 2.757778756316678, "learning_rate": 1.506388812257768e-06, "loss": 0.0029, "step": 189530 }, { "epoch": 3.8583206106870227, "grad_norm": 0.27938621298010374, "learning_rate": 1.5058805119603238e-06, "loss": 0.0017, "step": 189540 }, { "epoch": 3.85852417302799, "grad_norm": 0.13276053776356925, "learning_rate": 1.5053722822311223e-06, "loss": 0.007, "step": 189550 }, { "epoch": 3.858727735368957, "grad_norm": 10.459456734570603, "learning_rate": 1.50486412308043e-06, "loss": 0.0578, "step": 189560 }, { "epoch": 3.8589312977099235, "grad_norm": 0.00943239491502446, "learning_rate": 1.504356034518506e-06, "loss": 0.0032, "step": 189570 }, { "epoch": 3.8591348600508906, "grad_norm": 0.03060358844894794, "learning_rate": 1.503848016555615e-06, "loss": 0.0029, "step": 189580 }, { "epoch": 3.8593384223918576, "grad_norm": 0.009053784483926207, "learning_rate": 1.5033400692020189e-06, "loss": 0.0419, "step": 189590 }, { "epoch": 3.8595419847328243, "grad_norm": 40.351955941425665, "learning_rate": 1.5028321924679702e-06, "loss": 0.0712, "step": 189600 }, { "epoch": 3.8597455470737914, "grad_norm": 0.07409795153917666, "learning_rate": 1.5023243863637322e-06, "loss": 0.0415, "step": 189610 }, { "epoch": 3.8599491094147584, "grad_norm": 0.05612846319012437, "learning_rate": 1.5018166508995575e-06, "loss": 0.0013, "step": 189620 }, { "epoch": 3.860152671755725, "grad_norm": 0.03130834443029979, "learning_rate": 1.5013089860856989e-06, "loss": 0.0367, "step": 189630 }, { "epoch": 3.860356234096692, "grad_norm": 0.47639284807662985, "learning_rate": 1.5008013919324149e-06, "loss": 0.0566, "step": 189640 }, { "epoch": 3.8605597964376592, "grad_norm": 0.31056602683627277, "learning_rate": 1.5002938684499524e-06, "loss": 0.0795, "step": 189650 }, { "epoch": 3.860763358778626, "grad_norm": 0.023789677830605715, "learning_rate": 1.4997864156485615e-06, "loss": 0.0423, "step": 189660 }, { "epoch": 3.860966921119593, "grad_norm": 1.6823788068101557, "learning_rate": 1.499279033538495e-06, "loss": 0.0205, "step": 189670 }, { "epoch": 3.86117048346056, "grad_norm": 0.0077870029283194616, "learning_rate": 1.4987717221299957e-06, "loss": 0.0873, "step": 189680 }, { "epoch": 3.8613740458015267, "grad_norm": 7.153862877342908, "learning_rate": 1.4982644814333108e-06, "loss": 0.0153, "step": 189690 }, { "epoch": 3.8615776081424937, "grad_norm": 4.3820656433095815, "learning_rate": 1.4977573114586852e-06, "loss": 0.0424, "step": 189700 }, { "epoch": 3.861781170483461, "grad_norm": 23.87031962132507, "learning_rate": 1.4972502122163613e-06, "loss": 0.038, "step": 189710 }, { "epoch": 3.8619847328244274, "grad_norm": 0.20725466522758382, "learning_rate": 1.4967431837165812e-06, "loss": 0.07, "step": 189720 }, { "epoch": 3.8621882951653945, "grad_norm": 0.015690636037737033, "learning_rate": 1.4962362259695846e-06, "loss": 0.0223, "step": 189730 }, { "epoch": 3.862391857506361, "grad_norm": 0.028711638447300146, "learning_rate": 1.4957293389856103e-06, "loss": 0.0329, "step": 189740 }, { "epoch": 3.8625954198473282, "grad_norm": 0.05963434127241164, "learning_rate": 1.495222522774895e-06, "loss": 0.027, "step": 189750 }, { "epoch": 3.8627989821882953, "grad_norm": 13.521184611196057, "learning_rate": 1.4947157773476755e-06, "loss": 0.0745, "step": 189760 }, { "epoch": 3.863002544529262, "grad_norm": 0.050904533095839165, "learning_rate": 1.4942091027141868e-06, "loss": 0.0298, "step": 189770 }, { "epoch": 3.863206106870229, "grad_norm": 0.021007658209475234, "learning_rate": 1.4937024988846577e-06, "loss": 0.0609, "step": 189780 }, { "epoch": 3.8634096692111957, "grad_norm": 24.47172187206486, "learning_rate": 1.4931959658693246e-06, "loss": 0.0805, "step": 189790 }, { "epoch": 3.8636132315521627, "grad_norm": 0.030893749865309444, "learning_rate": 1.4926895036784173e-06, "loss": 0.0559, "step": 189800 }, { "epoch": 3.86381679389313, "grad_norm": 0.0733378562979661, "learning_rate": 1.4921831123221593e-06, "loss": 0.0621, "step": 189810 }, { "epoch": 3.8640203562340965, "grad_norm": 2.216006955924433, "learning_rate": 1.4916767918107827e-06, "loss": 0.0274, "step": 189820 }, { "epoch": 3.8642239185750635, "grad_norm": 0.02206909257433401, "learning_rate": 1.4911705421545141e-06, "loss": 0.0573, "step": 189830 }, { "epoch": 3.8644274809160306, "grad_norm": 0.03414265479947594, "learning_rate": 1.4906643633635743e-06, "loss": 0.0061, "step": 189840 }, { "epoch": 3.8646310432569972, "grad_norm": 0.0030282944286106938, "learning_rate": 1.490158255448187e-06, "loss": 0.0418, "step": 189850 }, { "epoch": 3.8648346055979643, "grad_norm": 7.617815574310059, "learning_rate": 1.4896522184185747e-06, "loss": 0.0607, "step": 189860 }, { "epoch": 3.8650381679389314, "grad_norm": 0.006560013352415877, "learning_rate": 1.4891462522849575e-06, "loss": 0.0554, "step": 189870 }, { "epoch": 3.865241730279898, "grad_norm": 0.02747508704288014, "learning_rate": 1.4886403570575531e-06, "loss": 0.0004, "step": 189880 }, { "epoch": 3.865445292620865, "grad_norm": 0.017524949396812158, "learning_rate": 1.4881345327465796e-06, "loss": 0.0238, "step": 189890 }, { "epoch": 3.865648854961832, "grad_norm": 0.03607758504787594, "learning_rate": 1.4876287793622523e-06, "loss": 0.0367, "step": 189900 }, { "epoch": 3.865852417302799, "grad_norm": 0.05332753619404864, "learning_rate": 1.4871230969147859e-06, "loss": 0.0853, "step": 189910 }, { "epoch": 3.866055979643766, "grad_norm": 0.02001116609960529, "learning_rate": 1.4866174854143934e-06, "loss": 0.0698, "step": 189920 }, { "epoch": 3.866259541984733, "grad_norm": 7.704047332342108, "learning_rate": 1.4861119448712852e-06, "loss": 0.1282, "step": 189930 }, { "epoch": 3.8664631043256996, "grad_norm": 0.02250443654226891, "learning_rate": 1.4856064752956728e-06, "loss": 0.0481, "step": 189940 }, { "epoch": 3.8666666666666667, "grad_norm": 0.024258452677673684, "learning_rate": 1.4851010766977636e-06, "loss": 0.0387, "step": 189950 }, { "epoch": 3.8668702290076338, "grad_norm": 0.020869555883501742, "learning_rate": 1.4845957490877677e-06, "loss": 0.0908, "step": 189960 }, { "epoch": 3.8670737913486004, "grad_norm": 0.031655742512985814, "learning_rate": 1.4840904924758848e-06, "loss": 0.0826, "step": 189970 }, { "epoch": 3.8672773536895675, "grad_norm": 0.11932268676470449, "learning_rate": 1.4835853068723255e-06, "loss": 0.0453, "step": 189980 }, { "epoch": 3.8674809160305346, "grad_norm": 0.0943457193040044, "learning_rate": 1.483080192287289e-06, "loss": 0.1424, "step": 189990 }, { "epoch": 3.867684478371501, "grad_norm": 0.02468352073616845, "learning_rate": 1.4825751487309764e-06, "loss": 0.029, "step": 190000 }, { "epoch": 3.8678880407124683, "grad_norm": 2.6602891398554482, "learning_rate": 1.482070176213592e-06, "loss": 0.0074, "step": 190010 }, { "epoch": 3.8680916030534354, "grad_norm": 11.378241733943288, "learning_rate": 1.4815652747453297e-06, "loss": 0.1014, "step": 190020 }, { "epoch": 3.868295165394402, "grad_norm": 0.06495846386741125, "learning_rate": 1.4810604443363868e-06, "loss": 0.0418, "step": 190030 }, { "epoch": 3.868498727735369, "grad_norm": 10.087813410386996, "learning_rate": 1.4805556849969644e-06, "loss": 0.0401, "step": 190040 }, { "epoch": 3.868702290076336, "grad_norm": 0.09705960153215146, "learning_rate": 1.4800509967372506e-06, "loss": 0.0634, "step": 190050 }, { "epoch": 3.868905852417303, "grad_norm": 9.646191032149014, "learning_rate": 1.479546379567441e-06, "loss": 0.1084, "step": 190060 }, { "epoch": 3.86910941475827, "grad_norm": 9.547332307754836, "learning_rate": 1.4790418334977263e-06, "loss": 0.0551, "step": 190070 }, { "epoch": 3.8693129770992365, "grad_norm": 0.014661152478011565, "learning_rate": 1.4785373585382967e-06, "loss": 0.0661, "step": 190080 }, { "epoch": 3.8695165394402036, "grad_norm": 0.011317826101629725, "learning_rate": 1.478032954699341e-06, "loss": 0.0338, "step": 190090 }, { "epoch": 3.86972010178117, "grad_norm": 0.0886134861573761, "learning_rate": 1.4775286219910461e-06, "loss": 0.0185, "step": 190100 }, { "epoch": 3.8699236641221373, "grad_norm": 0.009086533539234573, "learning_rate": 1.4770243604235972e-06, "loss": 0.0006, "step": 190110 }, { "epoch": 3.8701272264631044, "grad_norm": 7.622826752995284, "learning_rate": 1.476520170007179e-06, "loss": 0.024, "step": 190120 }, { "epoch": 3.870330788804071, "grad_norm": 0.043300579078991694, "learning_rate": 1.4760160507519744e-06, "loss": 0.0024, "step": 190130 }, { "epoch": 3.870534351145038, "grad_norm": 6.037842141052087, "learning_rate": 1.4755120026681663e-06, "loss": 0.03, "step": 190140 }, { "epoch": 3.870737913486005, "grad_norm": 21.786694015515295, "learning_rate": 1.4750080257659294e-06, "loss": 0.0224, "step": 190150 }, { "epoch": 3.870941475826972, "grad_norm": 0.01754344624651079, "learning_rate": 1.4745041200554471e-06, "loss": 0.0995, "step": 190160 }, { "epoch": 3.871145038167939, "grad_norm": 0.4188496758206809, "learning_rate": 1.4740002855468966e-06, "loss": 0.0276, "step": 190170 }, { "epoch": 3.871348600508906, "grad_norm": 0.3442090124651724, "learning_rate": 1.4734965222504482e-06, "loss": 0.0309, "step": 190180 }, { "epoch": 3.8715521628498726, "grad_norm": 0.15325765447821132, "learning_rate": 1.472992830176282e-06, "loss": 0.0239, "step": 190190 }, { "epoch": 3.8717557251908397, "grad_norm": 0.05025772891776429, "learning_rate": 1.4724892093345693e-06, "loss": 0.0225, "step": 190200 }, { "epoch": 3.8719592875318067, "grad_norm": 0.3703837943571276, "learning_rate": 1.471985659735477e-06, "loss": 0.0299, "step": 190210 }, { "epoch": 3.8721628498727734, "grad_norm": 0.9316963449174867, "learning_rate": 1.4714821813891823e-06, "loss": 0.0739, "step": 190220 }, { "epoch": 3.8723664122137404, "grad_norm": 0.13420649845689114, "learning_rate": 1.4709787743058474e-06, "loss": 0.0352, "step": 190230 }, { "epoch": 3.8725699745547075, "grad_norm": 0.018813054233614464, "learning_rate": 1.4704754384956405e-06, "loss": 0.0442, "step": 190240 }, { "epoch": 3.872773536895674, "grad_norm": 0.4117070108034198, "learning_rate": 1.4699721739687311e-06, "loss": 0.0948, "step": 190250 }, { "epoch": 3.8729770992366412, "grad_norm": 0.19769985489657435, "learning_rate": 1.4694689807352786e-06, "loss": 0.0067, "step": 190260 }, { "epoch": 3.8731806615776083, "grad_norm": 0.5172851436783903, "learning_rate": 1.4689658588054473e-06, "loss": 0.0018, "step": 190270 }, { "epoch": 3.873384223918575, "grad_norm": 0.023173527229874737, "learning_rate": 1.468462808189398e-06, "loss": 0.0319, "step": 190280 }, { "epoch": 3.873587786259542, "grad_norm": 0.04013979594251982, "learning_rate": 1.4679598288972918e-06, "loss": 0.0875, "step": 190290 }, { "epoch": 3.873791348600509, "grad_norm": 0.06826317587955942, "learning_rate": 1.4674569209392854e-06, "loss": 0.0024, "step": 190300 }, { "epoch": 3.8739949109414757, "grad_norm": 0.3553203927389903, "learning_rate": 1.4669540843255363e-06, "loss": 0.0649, "step": 190310 }, { "epoch": 3.874198473282443, "grad_norm": 0.03599745998090109, "learning_rate": 1.4664513190662005e-06, "loss": 0.0123, "step": 190320 }, { "epoch": 3.87440203562341, "grad_norm": 0.006587528399294189, "learning_rate": 1.4659486251714311e-06, "loss": 0.0735, "step": 190330 }, { "epoch": 3.8746055979643765, "grad_norm": 0.07458077013858741, "learning_rate": 1.4654460026513812e-06, "loss": 0.0031, "step": 190340 }, { "epoch": 3.8748091603053436, "grad_norm": 12.558644746016899, "learning_rate": 1.4649434515162036e-06, "loss": 0.1261, "step": 190350 }, { "epoch": 3.8750127226463107, "grad_norm": 6.620485219682533, "learning_rate": 1.4644409717760427e-06, "loss": 0.0615, "step": 190360 }, { "epoch": 3.8752162849872773, "grad_norm": 0.21109107966597146, "learning_rate": 1.4639385634410518e-06, "loss": 0.0656, "step": 190370 }, { "epoch": 3.8754198473282444, "grad_norm": 0.0032243936066514234, "learning_rate": 1.4634362265213775e-06, "loss": 0.0643, "step": 190380 }, { "epoch": 3.875623409669211, "grad_norm": 0.009894217559819892, "learning_rate": 1.462933961027162e-06, "loss": 0.0314, "step": 190390 }, { "epoch": 3.875826972010178, "grad_norm": 7.277119731684452, "learning_rate": 1.4624317669685496e-06, "loss": 0.0549, "step": 190400 }, { "epoch": 3.876030534351145, "grad_norm": 0.475237899717762, "learning_rate": 1.4619296443556868e-06, "loss": 0.0479, "step": 190410 }, { "epoch": 3.876234096692112, "grad_norm": 0.19752322292374713, "learning_rate": 1.4614275931987105e-06, "loss": 0.0006, "step": 190420 }, { "epoch": 3.876437659033079, "grad_norm": 0.007092728305475368, "learning_rate": 1.4609256135077614e-06, "loss": 0.0613, "step": 190430 }, { "epoch": 3.8766412213740455, "grad_norm": 0.057107133593231954, "learning_rate": 1.4604237052929776e-06, "loss": 0.0461, "step": 190440 }, { "epoch": 3.8768447837150126, "grad_norm": 8.748014849296231, "learning_rate": 1.4599218685644968e-06, "loss": 0.0372, "step": 190450 }, { "epoch": 3.8770483460559797, "grad_norm": 0.008335264249094683, "learning_rate": 1.459420103332453e-06, "loss": 0.0332, "step": 190460 }, { "epoch": 3.8772519083969463, "grad_norm": 6.941532396371758, "learning_rate": 1.4589184096069803e-06, "loss": 0.0785, "step": 190470 }, { "epoch": 3.8774554707379134, "grad_norm": 0.04000899555282915, "learning_rate": 1.4584167873982113e-06, "loss": 0.0794, "step": 190480 }, { "epoch": 3.8776590330788805, "grad_norm": 0.07839823221860562, "learning_rate": 1.457915236716277e-06, "loss": 0.0449, "step": 190490 }, { "epoch": 3.877862595419847, "grad_norm": 0.03568169972930551, "learning_rate": 1.4574137575713066e-06, "loss": 0.0879, "step": 190500 }, { "epoch": 3.878066157760814, "grad_norm": 0.07724750330499396, "learning_rate": 1.4569123499734278e-06, "loss": 0.062, "step": 190510 }, { "epoch": 3.8782697201017813, "grad_norm": 5.443733758561556, "learning_rate": 1.4564110139327681e-06, "loss": 0.1198, "step": 190520 }, { "epoch": 3.878473282442748, "grad_norm": 9.793982730382519, "learning_rate": 1.4559097494594514e-06, "loss": 0.0437, "step": 190530 }, { "epoch": 3.878676844783715, "grad_norm": 0.17099147271756068, "learning_rate": 1.4554085565636045e-06, "loss": 0.0691, "step": 190540 }, { "epoch": 3.878880407124682, "grad_norm": 15.556772404962363, "learning_rate": 1.4549074352553434e-06, "loss": 0.027, "step": 190550 }, { "epoch": 3.8790839694656487, "grad_norm": 0.029374163750721917, "learning_rate": 1.4544063855447942e-06, "loss": 0.0074, "step": 190560 }, { "epoch": 3.879287531806616, "grad_norm": 0.017679255484382792, "learning_rate": 1.4539054074420766e-06, "loss": 0.1085, "step": 190570 }, { "epoch": 3.879491094147583, "grad_norm": 1.4911829124119393, "learning_rate": 1.4534045009573039e-06, "loss": 0.0584, "step": 190580 }, { "epoch": 3.8796946564885495, "grad_norm": 13.681976617974062, "learning_rate": 1.452903666100598e-06, "loss": 0.0355, "step": 190590 }, { "epoch": 3.8798982188295166, "grad_norm": 0.021474283834289197, "learning_rate": 1.4524029028820696e-06, "loss": 0.0503, "step": 190600 }, { "epoch": 3.8801017811704837, "grad_norm": 16.090626468560046, "learning_rate": 1.451902211311832e-06, "loss": 0.1425, "step": 190610 }, { "epoch": 3.8803053435114503, "grad_norm": 0.00597768467154093, "learning_rate": 1.451401591400003e-06, "loss": 0.0246, "step": 190620 }, { "epoch": 3.8805089058524174, "grad_norm": 0.08278750963139214, "learning_rate": 1.4509010431566871e-06, "loss": 0.0607, "step": 190630 }, { "epoch": 3.8807124681933844, "grad_norm": 0.06482375427097102, "learning_rate": 1.4504005665919963e-06, "loss": 0.1103, "step": 190640 }, { "epoch": 3.880916030534351, "grad_norm": 0.968122985518062, "learning_rate": 1.449900161716038e-06, "loss": 0.0085, "step": 190650 }, { "epoch": 3.881119592875318, "grad_norm": 0.020397498657497442, "learning_rate": 1.4493998285389182e-06, "loss": 0.127, "step": 190660 }, { "epoch": 3.8813231552162852, "grad_norm": 0.5383360940847844, "learning_rate": 1.4488995670707417e-06, "loss": 0.0155, "step": 190670 }, { "epoch": 3.881526717557252, "grad_norm": 0.6519686044269857, "learning_rate": 1.4483993773216126e-06, "loss": 0.0178, "step": 190680 }, { "epoch": 3.881730279898219, "grad_norm": 13.998363449464868, "learning_rate": 1.4478992593016328e-06, "loss": 0.0349, "step": 190690 }, { "epoch": 3.881933842239186, "grad_norm": 7.24434839787942, "learning_rate": 1.447399213020902e-06, "loss": 0.0518, "step": 190700 }, { "epoch": 3.8821374045801527, "grad_norm": 0.1253153243568621, "learning_rate": 1.4468992384895197e-06, "loss": 0.0558, "step": 190710 }, { "epoch": 3.8823409669211197, "grad_norm": 0.017895763122943477, "learning_rate": 1.446399335717586e-06, "loss": 0.0784, "step": 190720 }, { "epoch": 3.8825445292620864, "grad_norm": 8.481655547073101, "learning_rate": 1.4458995047151909e-06, "loss": 0.1386, "step": 190730 }, { "epoch": 3.8827480916030535, "grad_norm": 0.1411280827990035, "learning_rate": 1.4453997454924347e-06, "loss": 0.1231, "step": 190740 }, { "epoch": 3.88295165394402, "grad_norm": 0.035116520811392805, "learning_rate": 1.4449000580594102e-06, "loss": 0.0621, "step": 190750 }, { "epoch": 3.883155216284987, "grad_norm": 0.03774967657777069, "learning_rate": 1.4444004424262054e-06, "loss": 0.0759, "step": 190760 }, { "epoch": 3.8833587786259542, "grad_norm": 0.07304301248263839, "learning_rate": 1.4439008986029146e-06, "loss": 0.08, "step": 190770 }, { "epoch": 3.883562340966921, "grad_norm": 0.013161830098085726, "learning_rate": 1.4434014265996266e-06, "loss": 0.0286, "step": 190780 }, { "epoch": 3.883765903307888, "grad_norm": 0.04776060412673135, "learning_rate": 1.4429020264264249e-06, "loss": 0.0462, "step": 190790 }, { "epoch": 3.883969465648855, "grad_norm": 0.07860760778430499, "learning_rate": 1.4424026980934015e-06, "loss": 0.0453, "step": 190800 }, { "epoch": 3.8841730279898217, "grad_norm": 0.03651175395959049, "learning_rate": 1.4419034416106358e-06, "loss": 0.0028, "step": 190810 }, { "epoch": 3.8843765903307887, "grad_norm": 0.03712383896617094, "learning_rate": 1.441404256988213e-06, "loss": 0.0358, "step": 190820 }, { "epoch": 3.884580152671756, "grad_norm": 0.027277771722811366, "learning_rate": 1.440905144236215e-06, "loss": 0.0922, "step": 190830 }, { "epoch": 3.8847837150127225, "grad_norm": 13.217265378601034, "learning_rate": 1.4404061033647216e-06, "loss": 0.0035, "step": 190840 }, { "epoch": 3.8849872773536895, "grad_norm": 0.06270089533766211, "learning_rate": 1.4399071343838112e-06, "loss": 0.0366, "step": 190850 }, { "epoch": 3.8851908396946566, "grad_norm": 0.1695765890434132, "learning_rate": 1.4394082373035623e-06, "loss": 0.0038, "step": 190860 }, { "epoch": 3.8853944020356233, "grad_norm": 0.012444212430371249, "learning_rate": 1.4389094121340497e-06, "loss": 0.1123, "step": 190870 }, { "epoch": 3.8855979643765903, "grad_norm": 0.0018255928713104604, "learning_rate": 1.438410658885348e-06, "loss": 0.098, "step": 190880 }, { "epoch": 3.8858015267175574, "grad_norm": 0.0738192768529959, "learning_rate": 1.4379119775675305e-06, "loss": 0.004, "step": 190890 }, { "epoch": 3.886005089058524, "grad_norm": 10.74658092032455, "learning_rate": 1.4374133681906682e-06, "loss": 0.0431, "step": 190900 }, { "epoch": 3.886208651399491, "grad_norm": 0.8797935246956061, "learning_rate": 1.4369148307648324e-06, "loss": 0.0032, "step": 190910 }, { "epoch": 3.886412213740458, "grad_norm": 0.06920535969066406, "learning_rate": 1.43641636530009e-06, "loss": 0.0252, "step": 190920 }, { "epoch": 3.886615776081425, "grad_norm": 0.3118103405664863, "learning_rate": 1.4359179718065092e-06, "loss": 0.0294, "step": 190930 }, { "epoch": 3.886819338422392, "grad_norm": 0.005299358273926399, "learning_rate": 1.435419650294157e-06, "loss": 0.0915, "step": 190940 }, { "epoch": 3.887022900763359, "grad_norm": 0.006817804272768765, "learning_rate": 1.4349214007730926e-06, "loss": 0.0459, "step": 190950 }, { "epoch": 3.8872264631043256, "grad_norm": 0.28194666935561996, "learning_rate": 1.4344232232533861e-06, "loss": 0.0522, "step": 190960 }, { "epoch": 3.8874300254452927, "grad_norm": 0.052550766624960096, "learning_rate": 1.4339251177450935e-06, "loss": 0.0415, "step": 190970 }, { "epoch": 3.88763358778626, "grad_norm": 0.044313271294364596, "learning_rate": 1.4334270842582742e-06, "loss": 0.1091, "step": 190980 }, { "epoch": 3.8878371501272264, "grad_norm": 0.034029338733696034, "learning_rate": 1.4329291228029917e-06, "loss": 0.0297, "step": 190990 }, { "epoch": 3.8880407124681935, "grad_norm": 0.3024495333494502, "learning_rate": 1.4324312333892986e-06, "loss": 0.0022, "step": 191000 }, { "epoch": 3.8882442748091606, "grad_norm": 1.9438969010402471, "learning_rate": 1.431933416027252e-06, "loss": 0.0256, "step": 191010 }, { "epoch": 3.888447837150127, "grad_norm": 0.2366905945121083, "learning_rate": 1.4314356707269057e-06, "loss": 0.0153, "step": 191020 }, { "epoch": 3.8886513994910943, "grad_norm": 0.017806709961010875, "learning_rate": 1.4309379974983118e-06, "loss": 0.0389, "step": 191030 }, { "epoch": 3.888854961832061, "grad_norm": 0.1914672714530773, "learning_rate": 1.4304403963515224e-06, "loss": 0.0605, "step": 191040 }, { "epoch": 3.889058524173028, "grad_norm": 0.006462608345480755, "learning_rate": 1.4299428672965865e-06, "loss": 0.0045, "step": 191050 }, { "epoch": 3.889262086513995, "grad_norm": 3.764097196776063, "learning_rate": 1.4294454103435523e-06, "loss": 0.0526, "step": 191060 }, { "epoch": 3.8894656488549617, "grad_norm": 0.004180254472880058, "learning_rate": 1.4289480255024674e-06, "loss": 0.0461, "step": 191070 }, { "epoch": 3.889669211195929, "grad_norm": 0.030673055531126564, "learning_rate": 1.4284507127833764e-06, "loss": 0.0563, "step": 191080 }, { "epoch": 3.8898727735368954, "grad_norm": 0.13034510040017452, "learning_rate": 1.427953472196325e-06, "loss": 0.0585, "step": 191090 }, { "epoch": 3.8900763358778625, "grad_norm": 0.10376863722445766, "learning_rate": 1.4274563037513505e-06, "loss": 0.0341, "step": 191100 }, { "epoch": 3.8902798982188296, "grad_norm": 0.061991858613049663, "learning_rate": 1.4269592074584994e-06, "loss": 0.0005, "step": 191110 }, { "epoch": 3.890483460559796, "grad_norm": 0.0020469372279085457, "learning_rate": 1.4264621833278108e-06, "loss": 0.0592, "step": 191120 }, { "epoch": 3.8906870229007633, "grad_norm": 0.2981620187737599, "learning_rate": 1.4259652313693173e-06, "loss": 0.0005, "step": 191130 }, { "epoch": 3.8908905852417304, "grad_norm": 10.440450227914624, "learning_rate": 1.4254683515930618e-06, "loss": 0.0899, "step": 191140 }, { "epoch": 3.891094147582697, "grad_norm": 0.02126365549052747, "learning_rate": 1.4249715440090777e-06, "loss": 0.0236, "step": 191150 }, { "epoch": 3.891297709923664, "grad_norm": 0.10191806390745023, "learning_rate": 1.4244748086273945e-06, "loss": 0.009, "step": 191160 }, { "epoch": 3.891501272264631, "grad_norm": 10.680404127975262, "learning_rate": 1.423978145458052e-06, "loss": 0.0679, "step": 191170 }, { "epoch": 3.891704834605598, "grad_norm": 0.2320271303593814, "learning_rate": 1.423481554511074e-06, "loss": 0.0219, "step": 191180 }, { "epoch": 3.891908396946565, "grad_norm": 0.015846752327168083, "learning_rate": 1.422985035796492e-06, "loss": 0.0271, "step": 191190 }, { "epoch": 3.892111959287532, "grad_norm": 0.0027526689990222, "learning_rate": 1.4224885893243367e-06, "loss": 0.028, "step": 191200 }, { "epoch": 3.8923155216284986, "grad_norm": 0.026395329637729267, "learning_rate": 1.4219922151046312e-06, "loss": 0.031, "step": 191210 }, { "epoch": 3.8925190839694657, "grad_norm": 9.672985975823417, "learning_rate": 1.4214959131473994e-06, "loss": 0.0566, "step": 191220 }, { "epoch": 3.8927226463104327, "grad_norm": 17.014673252282066, "learning_rate": 1.42099968346267e-06, "loss": 0.0322, "step": 191230 }, { "epoch": 3.8929262086513994, "grad_norm": 0.09157566035354343, "learning_rate": 1.42050352606046e-06, "loss": 0.1803, "step": 191240 }, { "epoch": 3.8931297709923665, "grad_norm": 0.05147641168420088, "learning_rate": 1.4200074409507914e-06, "loss": 0.001, "step": 191250 }, { "epoch": 3.8933333333333335, "grad_norm": 0.00996652648998667, "learning_rate": 1.419511428143684e-06, "loss": 0.077, "step": 191260 }, { "epoch": 3.8935368956743, "grad_norm": 0.022259231373623744, "learning_rate": 1.4190154876491547e-06, "loss": 0.1219, "step": 191270 }, { "epoch": 3.8937404580152672, "grad_norm": 0.5847661495730344, "learning_rate": 1.41851961947722e-06, "loss": 0.0292, "step": 191280 }, { "epoch": 3.8939440203562343, "grad_norm": 2.5148800921041214, "learning_rate": 1.4180238236378945e-06, "loss": 0.0773, "step": 191290 }, { "epoch": 3.894147582697201, "grad_norm": 5.531531348016056, "learning_rate": 1.4175281001411928e-06, "loss": 0.0766, "step": 191300 }, { "epoch": 3.894351145038168, "grad_norm": 0.02449061576368806, "learning_rate": 1.417032448997122e-06, "loss": 0.0603, "step": 191310 }, { "epoch": 3.894554707379135, "grad_norm": 0.008121385268359198, "learning_rate": 1.4165368702156978e-06, "loss": 0.0414, "step": 191320 }, { "epoch": 3.8947582697201018, "grad_norm": 0.031254166783093056, "learning_rate": 1.4160413638069288e-06, "loss": 0.0695, "step": 191330 }, { "epoch": 3.894961832061069, "grad_norm": 2.2038457404141627, "learning_rate": 1.4155459297808167e-06, "loss": 0.0679, "step": 191340 }, { "epoch": 3.8951653944020355, "grad_norm": 0.2786703116772054, "learning_rate": 1.415050568147373e-06, "loss": 0.0187, "step": 191350 }, { "epoch": 3.8953689567430025, "grad_norm": 1.57923314692006, "learning_rate": 1.4145552789166023e-06, "loss": 0.1273, "step": 191360 }, { "epoch": 3.8955725190839696, "grad_norm": 3.659838174898563, "learning_rate": 1.414060062098504e-06, "loss": 0.0868, "step": 191370 }, { "epoch": 3.8957760814249363, "grad_norm": 0.3597353351098797, "learning_rate": 1.4135649177030814e-06, "loss": 0.0986, "step": 191380 }, { "epoch": 3.8959796437659033, "grad_norm": 10.835875094096695, "learning_rate": 1.4130698457403346e-06, "loss": 0.0906, "step": 191390 }, { "epoch": 3.89618320610687, "grad_norm": 0.02257738646077263, "learning_rate": 1.4125748462202627e-06, "loss": 0.0546, "step": 191400 }, { "epoch": 3.896386768447837, "grad_norm": 0.12638872056505637, "learning_rate": 1.4120799191528616e-06, "loss": 0.0556, "step": 191410 }, { "epoch": 3.896590330788804, "grad_norm": 0.11750375012484758, "learning_rate": 1.4115850645481284e-06, "loss": 0.0906, "step": 191420 }, { "epoch": 3.8967938931297708, "grad_norm": 0.1330193966577016, "learning_rate": 1.411090282416056e-06, "loss": 0.1159, "step": 191430 }, { "epoch": 3.896997455470738, "grad_norm": 20.057534442312786, "learning_rate": 1.4105955727666382e-06, "loss": 0.0165, "step": 191440 }, { "epoch": 3.897201017811705, "grad_norm": 25.0340651658632, "learning_rate": 1.410100935609866e-06, "loss": 0.112, "step": 191450 }, { "epoch": 3.8974045801526715, "grad_norm": 0.04072650838168185, "learning_rate": 1.409606370955729e-06, "loss": 0.0077, "step": 191460 }, { "epoch": 3.8976081424936386, "grad_norm": 25.685573213793017, "learning_rate": 1.4091118788142156e-06, "loss": 0.0568, "step": 191470 }, { "epoch": 3.8978117048346057, "grad_norm": 11.763478023370999, "learning_rate": 1.4086174591953127e-06, "loss": 0.0394, "step": 191480 }, { "epoch": 3.8980152671755723, "grad_norm": 0.22391591407120146, "learning_rate": 1.4081231121090077e-06, "loss": 0.0558, "step": 191490 }, { "epoch": 3.8982188295165394, "grad_norm": 0.035853920453703295, "learning_rate": 1.4076288375652797e-06, "loss": 0.0395, "step": 191500 }, { "epoch": 3.8984223918575065, "grad_norm": 26.408315771021233, "learning_rate": 1.4071346355741162e-06, "loss": 0.0753, "step": 191510 }, { "epoch": 3.898625954198473, "grad_norm": 14.54949391408704, "learning_rate": 1.4066405061454974e-06, "loss": 0.1755, "step": 191520 }, { "epoch": 3.89882951653944, "grad_norm": 4.299096896826049, "learning_rate": 1.4061464492893983e-06, "loss": 0.0955, "step": 191530 }, { "epoch": 3.8990330788804073, "grad_norm": 0.07866263972891228, "learning_rate": 1.4056524650158043e-06, "loss": 0.0359, "step": 191540 }, { "epoch": 3.899236641221374, "grad_norm": 0.11083284735202921, "learning_rate": 1.4051585533346862e-06, "loss": 0.0743, "step": 191550 }, { "epoch": 3.899440203562341, "grad_norm": 0.04471014720558721, "learning_rate": 1.40466471425602e-06, "loss": 0.0889, "step": 191560 }, { "epoch": 3.899643765903308, "grad_norm": 2.22847653046726, "learning_rate": 1.4041709477897835e-06, "loss": 0.0116, "step": 191570 }, { "epoch": 3.8998473282442747, "grad_norm": 0.0016693290571821436, "learning_rate": 1.4036772539459448e-06, "loss": 0.0437, "step": 191580 }, { "epoch": 3.900050890585242, "grad_norm": 8.450582146866989, "learning_rate": 1.4031836327344744e-06, "loss": 0.0576, "step": 191590 }, { "epoch": 3.900254452926209, "grad_norm": 0.43582970185301967, "learning_rate": 1.4026900841653463e-06, "loss": 0.04, "step": 191600 }, { "epoch": 3.9004580152671755, "grad_norm": 37.22917359962541, "learning_rate": 1.4021966082485238e-06, "loss": 0.0778, "step": 191610 }, { "epoch": 3.9006615776081426, "grad_norm": 8.778571946020271, "learning_rate": 1.4017032049939744e-06, "loss": 0.1495, "step": 191620 }, { "epoch": 3.9008651399491097, "grad_norm": 0.245018949311463, "learning_rate": 1.4012098744116636e-06, "loss": 0.0021, "step": 191630 }, { "epoch": 3.9010687022900763, "grad_norm": 0.022261205665266685, "learning_rate": 1.4007166165115549e-06, "loss": 0.0918, "step": 191640 }, { "epoch": 3.9012722646310434, "grad_norm": 0.08145967571067263, "learning_rate": 1.4002234313036096e-06, "loss": 0.0512, "step": 191650 }, { "epoch": 3.9014758269720105, "grad_norm": 12.539137643287127, "learning_rate": 1.3997303187977883e-06, "loss": 0.0261, "step": 191660 }, { "epoch": 3.901679389312977, "grad_norm": 0.023794312384556135, "learning_rate": 1.399237279004052e-06, "loss": 0.0297, "step": 191670 }, { "epoch": 3.901882951653944, "grad_norm": 0.01949747874924817, "learning_rate": 1.3987443119323541e-06, "loss": 0.0635, "step": 191680 }, { "epoch": 3.902086513994911, "grad_norm": 10.376099364879549, "learning_rate": 1.3982514175926542e-06, "loss": 0.0511, "step": 191690 }, { "epoch": 3.902290076335878, "grad_norm": 0.014856559457193191, "learning_rate": 1.3977585959949076e-06, "loss": 0.0019, "step": 191700 }, { "epoch": 3.9024936386768445, "grad_norm": 0.03302399746529678, "learning_rate": 1.397265847149063e-06, "loss": 0.015, "step": 191710 }, { "epoch": 3.9026972010178116, "grad_norm": 0.23941676292906652, "learning_rate": 1.3967731710650762e-06, "loss": 0.0039, "step": 191720 }, { "epoch": 3.9029007633587787, "grad_norm": 5.373831928450022, "learning_rate": 1.3962805677528973e-06, "loss": 0.0448, "step": 191730 }, { "epoch": 3.9031043256997453, "grad_norm": 0.01899434440084902, "learning_rate": 1.3957880372224715e-06, "loss": 0.0957, "step": 191740 }, { "epoch": 3.9033078880407124, "grad_norm": 0.030902502602624317, "learning_rate": 1.3952955794837508e-06, "loss": 0.0694, "step": 191750 }, { "epoch": 3.9035114503816795, "grad_norm": 0.18900876001139233, "learning_rate": 1.3948031945466777e-06, "loss": 0.0414, "step": 191760 }, { "epoch": 3.903715012722646, "grad_norm": 0.02469005591064209, "learning_rate": 1.394310882421196e-06, "loss": 0.0292, "step": 191770 }, { "epoch": 3.903918575063613, "grad_norm": 14.019186694930221, "learning_rate": 1.393818643117253e-06, "loss": 0.1636, "step": 191780 }, { "epoch": 3.9041221374045803, "grad_norm": 23.95364919124531, "learning_rate": 1.3933264766447858e-06, "loss": 0.0806, "step": 191790 }, { "epoch": 3.904325699745547, "grad_norm": 0.05577818753088587, "learning_rate": 1.3928343830137358e-06, "loss": 0.0806, "step": 191800 }, { "epoch": 3.904529262086514, "grad_norm": 0.014168460291158154, "learning_rate": 1.392342362234041e-06, "loss": 0.0365, "step": 191810 }, { "epoch": 3.904732824427481, "grad_norm": 7.439708197145675, "learning_rate": 1.391850414315639e-06, "loss": 0.061, "step": 191820 }, { "epoch": 3.9049363867684477, "grad_norm": 0.3135061805298672, "learning_rate": 1.3913585392684658e-06, "loss": 0.0524, "step": 191830 }, { "epoch": 3.9051399491094148, "grad_norm": 0.06331486648420791, "learning_rate": 1.3908667371024542e-06, "loss": 0.0685, "step": 191840 }, { "epoch": 3.905343511450382, "grad_norm": 0.057792339638820786, "learning_rate": 1.3903750078275374e-06, "loss": 0.0791, "step": 191850 }, { "epoch": 3.9055470737913485, "grad_norm": 0.16527978950197472, "learning_rate": 1.389883351453647e-06, "loss": 0.0203, "step": 191860 }, { "epoch": 3.9057506361323155, "grad_norm": 0.04492472941359169, "learning_rate": 1.3893917679907114e-06, "loss": 0.0469, "step": 191870 }, { "epoch": 3.9059541984732826, "grad_norm": 1.0559934323140503, "learning_rate": 1.38890025744866e-06, "loss": 0.0478, "step": 191880 }, { "epoch": 3.9061577608142493, "grad_norm": 16.125599461138602, "learning_rate": 1.3884088198374185e-06, "loss": 0.0694, "step": 191890 }, { "epoch": 3.9063613231552163, "grad_norm": 9.057810871528915, "learning_rate": 1.3879174551669127e-06, "loss": 0.0622, "step": 191900 }, { "epoch": 3.9065648854961834, "grad_norm": 11.268435816670245, "learning_rate": 1.3874261634470682e-06, "loss": 0.0337, "step": 191910 }, { "epoch": 3.90676844783715, "grad_norm": 0.6928607142528134, "learning_rate": 1.3869349446878034e-06, "loss": 0.0239, "step": 191920 }, { "epoch": 3.906972010178117, "grad_norm": 0.032339147391073665, "learning_rate": 1.3864437988990393e-06, "loss": 0.0048, "step": 191930 }, { "epoch": 3.907175572519084, "grad_norm": 0.029252503650159845, "learning_rate": 1.3859527260907003e-06, "loss": 0.0744, "step": 191940 }, { "epoch": 3.907379134860051, "grad_norm": 0.05514731297360347, "learning_rate": 1.3854617262726989e-06, "loss": 0.0209, "step": 191950 }, { "epoch": 3.907582697201018, "grad_norm": 0.30279520128534754, "learning_rate": 1.3849707994549521e-06, "loss": 0.1494, "step": 191960 }, { "epoch": 3.907786259541985, "grad_norm": 0.1133821109354305, "learning_rate": 1.3844799456473795e-06, "loss": 0.0675, "step": 191970 }, { "epoch": 3.9079898218829516, "grad_norm": 0.01350866078247367, "learning_rate": 1.383989164859889e-06, "loss": 0.1182, "step": 191980 }, { "epoch": 3.9081933842239187, "grad_norm": 0.018003095650509626, "learning_rate": 1.3834984571023953e-06, "loss": 0.0496, "step": 191990 }, { "epoch": 3.9083969465648853, "grad_norm": 30.933037353762874, "learning_rate": 1.383007822384808e-06, "loss": 0.0605, "step": 192000 }, { "epoch": 3.9086005089058524, "grad_norm": 16.046214944372316, "learning_rate": 1.382517260717036e-06, "loss": 0.034, "step": 192010 }, { "epoch": 3.9088040712468195, "grad_norm": 1.4147733287505484, "learning_rate": 1.3820267721089881e-06, "loss": 0.1578, "step": 192020 }, { "epoch": 3.909007633587786, "grad_norm": 13.729655672688937, "learning_rate": 1.3815363565705686e-06, "loss": 0.0615, "step": 192030 }, { "epoch": 3.909211195928753, "grad_norm": 2.8280805333854224, "learning_rate": 1.3810460141116839e-06, "loss": 0.0762, "step": 192040 }, { "epoch": 3.90941475826972, "grad_norm": 10.315553328146978, "learning_rate": 1.3805557447422352e-06, "loss": 0.1152, "step": 192050 }, { "epoch": 3.909618320610687, "grad_norm": 30.091227431693547, "learning_rate": 1.3800655484721255e-06, "loss": 0.0412, "step": 192060 }, { "epoch": 3.909821882951654, "grad_norm": 0.09170112218500072, "learning_rate": 1.3795754253112558e-06, "loss": 0.0021, "step": 192070 }, { "epoch": 3.9100254452926206, "grad_norm": 0.7210662021529423, "learning_rate": 1.3790853752695204e-06, "loss": 0.0102, "step": 192080 }, { "epoch": 3.9102290076335877, "grad_norm": 0.039026184539137046, "learning_rate": 1.3785953983568212e-06, "loss": 0.0452, "step": 192090 }, { "epoch": 3.910432569974555, "grad_norm": 6.236073443665382, "learning_rate": 1.3781054945830534e-06, "loss": 0.0453, "step": 192100 }, { "epoch": 3.9106361323155214, "grad_norm": 0.025202242409729705, "learning_rate": 1.3776156639581079e-06, "loss": 0.0031, "step": 192110 }, { "epoch": 3.9108396946564885, "grad_norm": 0.24774892617706806, "learning_rate": 1.3771259064918825e-06, "loss": 0.0726, "step": 192120 }, { "epoch": 3.9110432569974556, "grad_norm": 0.014392793534611566, "learning_rate": 1.3766362221942636e-06, "loss": 0.1177, "step": 192130 }, { "epoch": 3.911246819338422, "grad_norm": 0.006537475981642714, "learning_rate": 1.3761466110751414e-06, "loss": 0.094, "step": 192140 }, { "epoch": 3.9114503816793893, "grad_norm": 0.6932896222455627, "learning_rate": 1.3756570731444096e-06, "loss": 0.0481, "step": 192150 }, { "epoch": 3.9116539440203564, "grad_norm": 3.203024854261498, "learning_rate": 1.3751676084119492e-06, "loss": 0.0281, "step": 192160 }, { "epoch": 3.911857506361323, "grad_norm": 0.017370500466154844, "learning_rate": 1.3746782168876466e-06, "loss": 0.0141, "step": 192170 }, { "epoch": 3.91206106870229, "grad_norm": 0.0380521141177281, "learning_rate": 1.37418889858139e-06, "loss": 0.0496, "step": 192180 }, { "epoch": 3.912264631043257, "grad_norm": 0.8995641991196179, "learning_rate": 1.3736996535030567e-06, "loss": 0.0351, "step": 192190 }, { "epoch": 3.912468193384224, "grad_norm": 7.346178041125515, "learning_rate": 1.3732104816625297e-06, "loss": 0.056, "step": 192200 }, { "epoch": 3.912671755725191, "grad_norm": 13.101157688152204, "learning_rate": 1.372721383069688e-06, "loss": 0.0259, "step": 192210 }, { "epoch": 3.912875318066158, "grad_norm": 0.023777423006349007, "learning_rate": 1.3722323577344099e-06, "loss": 0.0467, "step": 192220 }, { "epoch": 3.9130788804071246, "grad_norm": 0.05895543268710745, "learning_rate": 1.3717434056665723e-06, "loss": 0.1651, "step": 192230 }, { "epoch": 3.9132824427480917, "grad_norm": 0.04218432290035666, "learning_rate": 1.3712545268760497e-06, "loss": 0.0237, "step": 192240 }, { "epoch": 3.9134860050890588, "grad_norm": 0.08041980879454656, "learning_rate": 1.3707657213727154e-06, "loss": 0.0485, "step": 192250 }, { "epoch": 3.9136895674300254, "grad_norm": 0.0034391132759635035, "learning_rate": 1.3702769891664419e-06, "loss": 0.0172, "step": 192260 }, { "epoch": 3.9138931297709925, "grad_norm": 0.15196892768604428, "learning_rate": 1.3697883302670994e-06, "loss": 0.0015, "step": 192270 }, { "epoch": 3.9140966921119595, "grad_norm": 0.05341640997448459, "learning_rate": 1.369299744684559e-06, "loss": 0.0954, "step": 192280 }, { "epoch": 3.914300254452926, "grad_norm": 0.010541790353339953, "learning_rate": 1.368811232428684e-06, "loss": 0.0265, "step": 192290 }, { "epoch": 3.9145038167938933, "grad_norm": 14.79271837969943, "learning_rate": 1.368322793509344e-06, "loss": 0.0578, "step": 192300 }, { "epoch": 3.91470737913486, "grad_norm": 0.03713986058296518, "learning_rate": 1.3678344279364048e-06, "loss": 0.0254, "step": 192310 }, { "epoch": 3.914910941475827, "grad_norm": 0.0037781603594772505, "learning_rate": 1.3673461357197237e-06, "loss": 0.0009, "step": 192320 }, { "epoch": 3.915114503816794, "grad_norm": 0.08172613228615858, "learning_rate": 1.366857916869168e-06, "loss": 0.0697, "step": 192330 }, { "epoch": 3.9153180661577607, "grad_norm": 14.494975125432767, "learning_rate": 1.3663697713945984e-06, "loss": 0.0655, "step": 192340 }, { "epoch": 3.9155216284987278, "grad_norm": 4.118106722660841, "learning_rate": 1.3658816993058688e-06, "loss": 0.0938, "step": 192350 }, { "epoch": 3.9157251908396944, "grad_norm": 0.009663061503503264, "learning_rate": 1.3653937006128399e-06, "loss": 0.0786, "step": 192360 }, { "epoch": 3.9159287531806615, "grad_norm": 0.499899668203731, "learning_rate": 1.3649057753253658e-06, "loss": 0.0016, "step": 192370 }, { "epoch": 3.9161323155216285, "grad_norm": 21.51384855281869, "learning_rate": 1.3644179234533017e-06, "loss": 0.0547, "step": 192380 }, { "epoch": 3.916335877862595, "grad_norm": 0.006764336323419292, "learning_rate": 1.3639301450065007e-06, "loss": 0.0344, "step": 192390 }, { "epoch": 3.9165394402035623, "grad_norm": 0.03105065394184803, "learning_rate": 1.3634424399948131e-06, "loss": 0.0725, "step": 192400 }, { "epoch": 3.9167430025445293, "grad_norm": 0.08616117839233862, "learning_rate": 1.3629548084280897e-06, "loss": 0.0701, "step": 192410 }, { "epoch": 3.916946564885496, "grad_norm": 1.2660914856199899, "learning_rate": 1.3624672503161778e-06, "loss": 0.0059, "step": 192420 }, { "epoch": 3.917150127226463, "grad_norm": 0.0007995232554404157, "learning_rate": 1.3619797656689254e-06, "loss": 0.0245, "step": 192430 }, { "epoch": 3.91735368956743, "grad_norm": 0.019841208931844184, "learning_rate": 1.3614923544961773e-06, "loss": 0.021, "step": 192440 }, { "epoch": 3.9175572519083968, "grad_norm": 0.13412069039504845, "learning_rate": 1.3610050168077776e-06, "loss": 0.0424, "step": 192450 }, { "epoch": 3.917760814249364, "grad_norm": 0.3418805304504188, "learning_rate": 1.3605177526135682e-06, "loss": 0.0207, "step": 192460 }, { "epoch": 3.917964376590331, "grad_norm": 0.055487824462286145, "learning_rate": 1.3600305619233922e-06, "loss": 0.0828, "step": 192470 }, { "epoch": 3.9181679389312976, "grad_norm": 8.663736842712465, "learning_rate": 1.359543444747084e-06, "loss": 0.0444, "step": 192480 }, { "epoch": 3.9183715012722646, "grad_norm": 0.23943541404695515, "learning_rate": 1.359056401094488e-06, "loss": 0.0029, "step": 192490 }, { "epoch": 3.9185750636132317, "grad_norm": 0.12077977465480748, "learning_rate": 1.358569430975436e-06, "loss": 0.0036, "step": 192500 }, { "epoch": 3.9187786259541983, "grad_norm": 9.64855541421784, "learning_rate": 1.3580825343997628e-06, "loss": 0.0091, "step": 192510 }, { "epoch": 3.9189821882951654, "grad_norm": 9.059429212344401, "learning_rate": 1.357595711377307e-06, "loss": 0.0034, "step": 192520 }, { "epoch": 3.9191857506361325, "grad_norm": 50.156848483840804, "learning_rate": 1.3571089619178952e-06, "loss": 0.0408, "step": 192530 }, { "epoch": 3.919389312977099, "grad_norm": 0.019260281848680273, "learning_rate": 1.3566222860313582e-06, "loss": 0.0006, "step": 192540 }, { "epoch": 3.919592875318066, "grad_norm": 3.1658267604134105, "learning_rate": 1.3561356837275303e-06, "loss": 0.003, "step": 192550 }, { "epoch": 3.9197964376590333, "grad_norm": 11.526817926671233, "learning_rate": 1.3556491550162337e-06, "loss": 0.0749, "step": 192560 }, { "epoch": 3.92, "grad_norm": 0.0003329223761530998, "learning_rate": 1.355162699907297e-06, "loss": 0.005, "step": 192570 }, { "epoch": 3.920203562340967, "grad_norm": 0.05366888223955102, "learning_rate": 1.3546763184105433e-06, "loss": 0.0662, "step": 192580 }, { "epoch": 3.920407124681934, "grad_norm": 0.05395660857976744, "learning_rate": 1.354190010535797e-06, "loss": 0.0038, "step": 192590 }, { "epoch": 3.9206106870229007, "grad_norm": 0.005356128416559381, "learning_rate": 1.353703776292879e-06, "loss": 0.045, "step": 192600 }, { "epoch": 3.920814249363868, "grad_norm": 0.0017718586387862301, "learning_rate": 1.3532176156916095e-06, "loss": 0.0188, "step": 192610 }, { "epoch": 3.921017811704835, "grad_norm": 3.058417812539914, "learning_rate": 1.3527315287418075e-06, "loss": 0.0254, "step": 192620 }, { "epoch": 3.9212213740458015, "grad_norm": 0.03354167430396435, "learning_rate": 1.3522455154532892e-06, "loss": 0.0388, "step": 192630 }, { "epoch": 3.9214249363867686, "grad_norm": 0.03864943639196651, "learning_rate": 1.351759575835872e-06, "loss": 0.0015, "step": 192640 }, { "epoch": 3.9216284987277352, "grad_norm": 0.010591225537542775, "learning_rate": 1.35127370989937e-06, "loss": 0.01, "step": 192650 }, { "epoch": 3.9218320610687023, "grad_norm": 0.008208302637980914, "learning_rate": 1.3507879176535922e-06, "loss": 0.0107, "step": 192660 }, { "epoch": 3.922035623409669, "grad_norm": 2.0201801075389015e-06, "learning_rate": 1.3503021991083538e-06, "loss": 0.0487, "step": 192670 }, { "epoch": 3.922239185750636, "grad_norm": 0.012421652214126118, "learning_rate": 1.349816554273465e-06, "loss": 0.1079, "step": 192680 }, { "epoch": 3.922442748091603, "grad_norm": 3.6664881014557746, "learning_rate": 1.3493309831587293e-06, "loss": 0.0026, "step": 192690 }, { "epoch": 3.9226463104325697, "grad_norm": 0.04951464790280148, "learning_rate": 1.348845485773959e-06, "loss": 0.0517, "step": 192700 }, { "epoch": 3.922849872773537, "grad_norm": 8.284877269107707, "learning_rate": 1.3483600621289562e-06, "loss": 0.0141, "step": 192710 }, { "epoch": 3.923053435114504, "grad_norm": 0.013109041656732491, "learning_rate": 1.3478747122335228e-06, "loss": 0.0804, "step": 192720 }, { "epoch": 3.9232569974554705, "grad_norm": 7.792747600739672, "learning_rate": 1.3473894360974666e-06, "loss": 0.0587, "step": 192730 }, { "epoch": 3.9234605597964376, "grad_norm": 0.0009339035044822774, "learning_rate": 1.3469042337305843e-06, "loss": 0.0631, "step": 192740 }, { "epoch": 3.9236641221374047, "grad_norm": 0.0007918774171048286, "learning_rate": 1.346419105142674e-06, "loss": 0.0289, "step": 192750 }, { "epoch": 3.9238676844783713, "grad_norm": 0.10044715930668882, "learning_rate": 1.3459340503435392e-06, "loss": 0.0745, "step": 192760 }, { "epoch": 3.9240712468193384, "grad_norm": 13.212307990660518, "learning_rate": 1.3454490693429711e-06, "loss": 0.0235, "step": 192770 }, { "epoch": 3.9242748091603055, "grad_norm": 16.54376417373743, "learning_rate": 1.3449641621507658e-06, "loss": 0.0581, "step": 192780 }, { "epoch": 3.924478371501272, "grad_norm": 21.179974948730663, "learning_rate": 1.3444793287767171e-06, "loss": 0.0933, "step": 192790 }, { "epoch": 3.924681933842239, "grad_norm": 14.155583679935997, "learning_rate": 1.343994569230616e-06, "loss": 0.1046, "step": 192800 }, { "epoch": 3.9248854961832063, "grad_norm": 0.04674906860290133, "learning_rate": 1.3435098835222544e-06, "loss": 0.001, "step": 192810 }, { "epoch": 3.925089058524173, "grad_norm": 0.011117034070487765, "learning_rate": 1.3430252716614194e-06, "loss": 0.0334, "step": 192820 }, { "epoch": 3.92529262086514, "grad_norm": 0.08050811199993885, "learning_rate": 1.3425407336579e-06, "loss": 0.0474, "step": 192830 }, { "epoch": 3.925496183206107, "grad_norm": 26.560939625494594, "learning_rate": 1.3420562695214806e-06, "loss": 0.1028, "step": 192840 }, { "epoch": 3.9256997455470737, "grad_norm": 4.261519234518514, "learning_rate": 1.341571879261947e-06, "loss": 0.0852, "step": 192850 }, { "epoch": 3.9259033078880408, "grad_norm": 0.01261032080370306, "learning_rate": 1.3410875628890817e-06, "loss": 0.0407, "step": 192860 }, { "epoch": 3.926106870229008, "grad_norm": 11.357733135823707, "learning_rate": 1.340603320412664e-06, "loss": 0.1035, "step": 192870 }, { "epoch": 3.9263104325699745, "grad_norm": 18.004781900893654, "learning_rate": 1.3401191518424761e-06, "loss": 0.1151, "step": 192880 }, { "epoch": 3.9265139949109416, "grad_norm": 0.05453230016305481, "learning_rate": 1.3396350571882976e-06, "loss": 0.0278, "step": 192890 }, { "epoch": 3.9267175572519086, "grad_norm": 0.5329515161939883, "learning_rate": 1.339151036459902e-06, "loss": 0.0278, "step": 192900 }, { "epoch": 3.9269211195928753, "grad_norm": 0.03161630232829461, "learning_rate": 1.338667089667065e-06, "loss": 0.0699, "step": 192910 }, { "epoch": 3.9271246819338423, "grad_norm": 0.024480174499904594, "learning_rate": 1.3381832168195646e-06, "loss": 0.0815, "step": 192920 }, { "epoch": 3.9273282442748094, "grad_norm": 4.097286058578808, "learning_rate": 1.3376994179271697e-06, "loss": 0.0367, "step": 192930 }, { "epoch": 3.927531806615776, "grad_norm": 0.014440186342889507, "learning_rate": 1.3372156929996516e-06, "loss": 0.0813, "step": 192940 }, { "epoch": 3.927735368956743, "grad_norm": 0.02683070466386902, "learning_rate": 1.33673204204678e-06, "loss": 0.0322, "step": 192950 }, { "epoch": 3.9279389312977098, "grad_norm": 0.02158890595146091, "learning_rate": 1.336248465078323e-06, "loss": 0.1035, "step": 192960 }, { "epoch": 3.928142493638677, "grad_norm": 1.4637729208162853, "learning_rate": 1.3357649621040476e-06, "loss": 0.0574, "step": 192970 }, { "epoch": 3.928346055979644, "grad_norm": 0.027637161849927133, "learning_rate": 1.3352815331337183e-06, "loss": 0.0077, "step": 192980 }, { "epoch": 3.9285496183206106, "grad_norm": 0.028054620135532886, "learning_rate": 1.3347981781770986e-06, "loss": 0.0352, "step": 192990 }, { "epoch": 3.9287531806615776, "grad_norm": 0.013059098179006594, "learning_rate": 1.3343148972439502e-06, "loss": 0.0641, "step": 193000 }, { "epoch": 3.9289567430025443, "grad_norm": 0.1298449306428667, "learning_rate": 1.3338316903440335e-06, "loss": 0.0384, "step": 193010 }, { "epoch": 3.9291603053435114, "grad_norm": 24.85524769401871, "learning_rate": 1.333348557487108e-06, "loss": 0.0387, "step": 193020 }, { "epoch": 3.9293638676844784, "grad_norm": 0.8724474694824861, "learning_rate": 1.3328654986829315e-06, "loss": 0.0829, "step": 193030 }, { "epoch": 3.929567430025445, "grad_norm": 0.006073678690171236, "learning_rate": 1.332382513941259e-06, "loss": 0.0175, "step": 193040 }, { "epoch": 3.929770992366412, "grad_norm": 0.015424212884847115, "learning_rate": 1.3318996032718472e-06, "loss": 0.0131, "step": 193050 }, { "epoch": 3.929974554707379, "grad_norm": 0.020476950552035956, "learning_rate": 1.331416766684444e-06, "loss": 0.0292, "step": 193060 }, { "epoch": 3.930178117048346, "grad_norm": 0.0024301482750511026, "learning_rate": 1.3309340041888074e-06, "loss": 0.027, "step": 193070 }, { "epoch": 3.930381679389313, "grad_norm": 0.5239386912521586, "learning_rate": 1.3304513157946835e-06, "loss": 0.0469, "step": 193080 }, { "epoch": 3.93058524173028, "grad_norm": 0.01867211699868099, "learning_rate": 1.3299687015118195e-06, "loss": 0.0155, "step": 193090 }, { "epoch": 3.9307888040712466, "grad_norm": 8.540764477390246, "learning_rate": 1.3294861613499676e-06, "loss": 0.051, "step": 193100 }, { "epoch": 3.9309923664122137, "grad_norm": 0.02329950154049563, "learning_rate": 1.329003695318869e-06, "loss": 0.0285, "step": 193110 }, { "epoch": 3.931195928753181, "grad_norm": 0.14710586338608522, "learning_rate": 1.3285213034282674e-06, "loss": 0.0039, "step": 193120 }, { "epoch": 3.9313994910941474, "grad_norm": 0.014189695124683837, "learning_rate": 1.3280389856879094e-06, "loss": 0.0257, "step": 193130 }, { "epoch": 3.9316030534351145, "grad_norm": 0.12386807221163813, "learning_rate": 1.3275567421075325e-06, "loss": 0.0114, "step": 193140 }, { "epoch": 3.9318066157760816, "grad_norm": 0.3126015903556063, "learning_rate": 1.327074572696877e-06, "loss": 0.0502, "step": 193150 }, { "epoch": 3.9320101781170482, "grad_norm": 9.148466305955486, "learning_rate": 1.3265924774656818e-06, "loss": 0.2126, "step": 193160 }, { "epoch": 3.9322137404580153, "grad_norm": 0.14830085594924272, "learning_rate": 1.326110456423682e-06, "loss": 0.0288, "step": 193170 }, { "epoch": 3.9324173027989824, "grad_norm": 0.08390391844352303, "learning_rate": 1.3256285095806142e-06, "loss": 0.0579, "step": 193180 }, { "epoch": 3.932620865139949, "grad_norm": 0.0025142064488126808, "learning_rate": 1.3251466369462108e-06, "loss": 0.0918, "step": 193190 }, { "epoch": 3.932824427480916, "grad_norm": 0.04223065221648039, "learning_rate": 1.3246648385302046e-06, "loss": 0.0505, "step": 193200 }, { "epoch": 3.933027989821883, "grad_norm": 5.05211092328935, "learning_rate": 1.324183114342325e-06, "loss": 0.0173, "step": 193210 }, { "epoch": 3.93323155216285, "grad_norm": 0.02423086515537157, "learning_rate": 1.3237014643923024e-06, "loss": 0.0018, "step": 193220 }, { "epoch": 3.933435114503817, "grad_norm": 1.6245473993040032, "learning_rate": 1.3232198886898656e-06, "loss": 0.1148, "step": 193230 }, { "epoch": 3.933638676844784, "grad_norm": 0.14947218116794098, "learning_rate": 1.3227383872447352e-06, "loss": 0.0853, "step": 193240 }, { "epoch": 3.9338422391857506, "grad_norm": 0.03267515474240203, "learning_rate": 1.3222569600666413e-06, "loss": 0.0471, "step": 193250 }, { "epoch": 3.9340458015267177, "grad_norm": 0.028870833994191204, "learning_rate": 1.321775607165306e-06, "loss": 0.0166, "step": 193260 }, { "epoch": 3.9342493638676843, "grad_norm": 0.01635421575014244, "learning_rate": 1.321294328550447e-06, "loss": 0.0006, "step": 193270 }, { "epoch": 3.9344529262086514, "grad_norm": 0.36391873401551617, "learning_rate": 1.320813124231789e-06, "loss": 0.0228, "step": 193280 }, { "epoch": 3.9346564885496185, "grad_norm": 0.03784869491692109, "learning_rate": 1.32033199421905e-06, "loss": 0.063, "step": 193290 }, { "epoch": 3.934860050890585, "grad_norm": 14.238786148277718, "learning_rate": 1.3198509385219427e-06, "loss": 0.0424, "step": 193300 }, { "epoch": 3.935063613231552, "grad_norm": 0.048521940191178774, "learning_rate": 1.3193699571501882e-06, "loss": 0.0023, "step": 193310 }, { "epoch": 3.935267175572519, "grad_norm": 0.030416462656672288, "learning_rate": 1.3188890501134976e-06, "loss": 0.0713, "step": 193320 }, { "epoch": 3.935470737913486, "grad_norm": 28.284686366394727, "learning_rate": 1.3184082174215833e-06, "loss": 0.0857, "step": 193330 }, { "epoch": 3.935674300254453, "grad_norm": 0.06784350684147149, "learning_rate": 1.3179274590841568e-06, "loss": 0.0624, "step": 193340 }, { "epoch": 3.9358778625954196, "grad_norm": 0.33476809142946273, "learning_rate": 1.3174467751109277e-06, "loss": 0.0318, "step": 193350 }, { "epoch": 3.9360814249363867, "grad_norm": 0.015802467376620118, "learning_rate": 1.3169661655116045e-06, "loss": 0.0299, "step": 193360 }, { "epoch": 3.9362849872773538, "grad_norm": 8.902381589246716, "learning_rate": 1.3164856302958927e-06, "loss": 0.1173, "step": 193370 }, { "epoch": 3.9364885496183204, "grad_norm": 18.776463682210313, "learning_rate": 1.3160051694734982e-06, "loss": 0.0968, "step": 193380 }, { "epoch": 3.9366921119592875, "grad_norm": 13.154727197768805, "learning_rate": 1.3155247830541245e-06, "loss": 0.0307, "step": 193390 }, { "epoch": 3.9368956743002546, "grad_norm": 9.477606473629457, "learning_rate": 1.315044471047473e-06, "loss": 0.0387, "step": 193400 }, { "epoch": 3.937099236641221, "grad_norm": 0.09479009749261455, "learning_rate": 1.3145642334632453e-06, "loss": 0.0006, "step": 193410 }, { "epoch": 3.9373027989821883, "grad_norm": 13.025561032070906, "learning_rate": 1.314084070311139e-06, "loss": 0.0276, "step": 193420 }, { "epoch": 3.9375063613231553, "grad_norm": 0.04307710972944931, "learning_rate": 1.3136039816008523e-06, "loss": 0.0753, "step": 193430 }, { "epoch": 3.937709923664122, "grad_norm": 0.01749658285682107, "learning_rate": 1.3131239673420831e-06, "loss": 0.0788, "step": 193440 }, { "epoch": 3.937913486005089, "grad_norm": 85.81862727095333, "learning_rate": 1.3126440275445224e-06, "loss": 0.1019, "step": 193450 }, { "epoch": 3.938117048346056, "grad_norm": 0.060895231339038805, "learning_rate": 1.3121641622178631e-06, "loss": 0.0033, "step": 193460 }, { "epoch": 3.9383206106870228, "grad_norm": 0.0684052849445167, "learning_rate": 1.311684371371802e-06, "loss": 0.0462, "step": 193470 }, { "epoch": 3.93852417302799, "grad_norm": 0.013195211701761769, "learning_rate": 1.3112046550160234e-06, "loss": 0.0012, "step": 193480 }, { "epoch": 3.938727735368957, "grad_norm": 37.3812757193751, "learning_rate": 1.3107250131602167e-06, "loss": 0.0674, "step": 193490 }, { "epoch": 3.9389312977099236, "grad_norm": 0.1746833558610377, "learning_rate": 1.3102454458140734e-06, "loss": 0.0644, "step": 193500 }, { "epoch": 3.9391348600508906, "grad_norm": 0.009739486714295007, "learning_rate": 1.3097659529872735e-06, "loss": 0.0337, "step": 193510 }, { "epoch": 3.9393384223918577, "grad_norm": 0.040151551471255674, "learning_rate": 1.309286534689504e-06, "loss": 0.059, "step": 193520 }, { "epoch": 3.9395419847328244, "grad_norm": 0.05274347856359932, "learning_rate": 1.3088071909304461e-06, "loss": 0.0666, "step": 193530 }, { "epoch": 3.9397455470737914, "grad_norm": 28.87510068581374, "learning_rate": 1.3083279217197809e-06, "loss": 0.1031, "step": 193540 }, { "epoch": 3.9399491094147585, "grad_norm": 0.02980105961103655, "learning_rate": 1.3078487270671886e-06, "loss": 0.0131, "step": 193550 }, { "epoch": 3.940152671755725, "grad_norm": 0.14354795377729657, "learning_rate": 1.3073696069823467e-06, "loss": 0.0765, "step": 193560 }, { "epoch": 3.9403562340966922, "grad_norm": 0.026914821932224112, "learning_rate": 1.3068905614749317e-06, "loss": 0.0015, "step": 193570 }, { "epoch": 3.9405597964376593, "grad_norm": 0.1143501623489419, "learning_rate": 1.3064115905546181e-06, "loss": 0.0787, "step": 193580 }, { "epoch": 3.940763358778626, "grad_norm": 0.03081484839945538, "learning_rate": 1.3059326942310802e-06, "loss": 0.0013, "step": 193590 }, { "epoch": 3.940966921119593, "grad_norm": 43.46891438592112, "learning_rate": 1.3054538725139908e-06, "loss": 0.0899, "step": 193600 }, { "epoch": 3.9411704834605596, "grad_norm": 0.009123963753415544, "learning_rate": 1.304975125413016e-06, "loss": 0.0486, "step": 193610 }, { "epoch": 3.9413740458015267, "grad_norm": 0.0358321652201361, "learning_rate": 1.304496452937829e-06, "loss": 0.0431, "step": 193620 }, { "epoch": 3.941577608142494, "grad_norm": 0.00603187646117402, "learning_rate": 1.3040178550980974e-06, "loss": 0.0227, "step": 193630 }, { "epoch": 3.9417811704834604, "grad_norm": 0.06466442106570944, "learning_rate": 1.3035393319034828e-06, "loss": 0.0966, "step": 193640 }, { "epoch": 3.9419847328244275, "grad_norm": 0.03583714373619942, "learning_rate": 1.3030608833636538e-06, "loss": 0.0056, "step": 193650 }, { "epoch": 3.942188295165394, "grad_norm": 0.0058917634228090624, "learning_rate": 1.3025825094882732e-06, "loss": 0.0117, "step": 193660 }, { "epoch": 3.9423918575063612, "grad_norm": 0.004918877728639593, "learning_rate": 1.3021042102869985e-06, "loss": 0.0192, "step": 193670 }, { "epoch": 3.9425954198473283, "grad_norm": 0.0842327591273772, "learning_rate": 1.3016259857694952e-06, "loss": 0.0378, "step": 193680 }, { "epoch": 3.942798982188295, "grad_norm": 0.07433477297361273, "learning_rate": 1.3011478359454166e-06, "loss": 0.0381, "step": 193690 }, { "epoch": 3.943002544529262, "grad_norm": 0.002258239243950958, "learning_rate": 1.30066976082442e-06, "loss": 0.0735, "step": 193700 }, { "epoch": 3.943206106870229, "grad_norm": 0.025817577708616413, "learning_rate": 1.3001917604161657e-06, "loss": 0.0676, "step": 193710 }, { "epoch": 3.9434096692111957, "grad_norm": 7.042340571152371, "learning_rate": 1.2997138347303023e-06, "loss": 0.0528, "step": 193720 }, { "epoch": 3.943613231552163, "grad_norm": 0.027539774679340538, "learning_rate": 1.299235983776483e-06, "loss": 0.0221, "step": 193730 }, { "epoch": 3.94381679389313, "grad_norm": 0.048525986100751234, "learning_rate": 1.2987582075643618e-06, "loss": 0.1202, "step": 193740 }, { "epoch": 3.9440203562340965, "grad_norm": 0.018252489949903396, "learning_rate": 1.2982805061035847e-06, "loss": 0.0273, "step": 193750 }, { "epoch": 3.9442239185750636, "grad_norm": 0.017074107038898924, "learning_rate": 1.2978028794038006e-06, "loss": 0.0143, "step": 193760 }, { "epoch": 3.9444274809160307, "grad_norm": 43.694155728342004, "learning_rate": 1.2973253274746556e-06, "loss": 0.013, "step": 193770 }, { "epoch": 3.9446310432569973, "grad_norm": 18.117874559070348, "learning_rate": 1.2968478503257947e-06, "loss": 0.0343, "step": 193780 }, { "epoch": 3.9448346055979644, "grad_norm": 1.1811767320660806, "learning_rate": 1.296370447966861e-06, "loss": 0.0477, "step": 193790 }, { "epoch": 3.9450381679389315, "grad_norm": 0.014480387225018806, "learning_rate": 1.2958931204074959e-06, "loss": 0.055, "step": 193800 }, { "epoch": 3.945241730279898, "grad_norm": 2.424008514211773, "learning_rate": 1.2954158676573424e-06, "loss": 0.0421, "step": 193810 }, { "epoch": 3.945445292620865, "grad_norm": 0.04995462609782967, "learning_rate": 1.2949386897260336e-06, "loss": 0.0006, "step": 193820 }, { "epoch": 3.9456488549618323, "grad_norm": 0.6995491438395326, "learning_rate": 1.2944615866232113e-06, "loss": 0.0557, "step": 193830 }, { "epoch": 3.945852417302799, "grad_norm": 0.1794080682608824, "learning_rate": 1.293984558358512e-06, "loss": 0.0769, "step": 193840 }, { "epoch": 3.946055979643766, "grad_norm": 0.06073290764992247, "learning_rate": 1.293507604941564e-06, "loss": 0.0552, "step": 193850 }, { "epoch": 3.946259541984733, "grad_norm": 3.1854820378365316, "learning_rate": 1.2930307263820064e-06, "loss": 0.0038, "step": 193860 }, { "epoch": 3.9464631043256997, "grad_norm": 0.07457878823671925, "learning_rate": 1.2925539226894684e-06, "loss": 0.0026, "step": 193870 }, { "epoch": 3.9466666666666668, "grad_norm": 0.08536948444076223, "learning_rate": 1.2920771938735782e-06, "loss": 0.0638, "step": 193880 }, { "epoch": 3.946870229007634, "grad_norm": 9.2373754237918, "learning_rate": 1.2916005399439646e-06, "loss": 0.1001, "step": 193890 }, { "epoch": 3.9470737913486005, "grad_norm": 33.25513165675015, "learning_rate": 1.2911239609102543e-06, "loss": 0.0788, "step": 193900 }, { "epoch": 3.9472773536895676, "grad_norm": 0.12021458708018722, "learning_rate": 1.2906474567820727e-06, "loss": 0.023, "step": 193910 }, { "epoch": 3.947480916030534, "grad_norm": 0.0047346191355538275, "learning_rate": 1.2901710275690437e-06, "loss": 0.0692, "step": 193920 }, { "epoch": 3.9476844783715013, "grad_norm": 0.018946330411776558, "learning_rate": 1.2896946732807886e-06, "loss": 0.0537, "step": 193930 }, { "epoch": 3.9478880407124683, "grad_norm": 0.011066755861664935, "learning_rate": 1.2892183939269287e-06, "loss": 0.106, "step": 193940 }, { "epoch": 3.948091603053435, "grad_norm": 0.2339936485180076, "learning_rate": 1.2887421895170826e-06, "loss": 0.0405, "step": 193950 }, { "epoch": 3.948295165394402, "grad_norm": 0.02943405723430747, "learning_rate": 1.2882660600608681e-06, "loss": 0.0394, "step": 193960 }, { "epoch": 3.9484987277353687, "grad_norm": 0.1714870258761368, "learning_rate": 1.2877900055679011e-06, "loss": 0.0755, "step": 193970 }, { "epoch": 3.9487022900763358, "grad_norm": 0.015576012026862603, "learning_rate": 1.2873140260477962e-06, "loss": 0.0725, "step": 193980 }, { "epoch": 3.948905852417303, "grad_norm": 23.646356003010425, "learning_rate": 1.2868381215101667e-06, "loss": 0.1443, "step": 193990 }, { "epoch": 3.9491094147582695, "grad_norm": 0.30487457314744376, "learning_rate": 1.2863622919646252e-06, "loss": 0.0666, "step": 194000 }, { "epoch": 3.9493129770992366, "grad_norm": 9.415524804828813, "learning_rate": 1.2858865374207774e-06, "loss": 0.0236, "step": 194010 }, { "epoch": 3.9495165394402036, "grad_norm": 0.5331877241837851, "learning_rate": 1.2854108578882357e-06, "loss": 0.0503, "step": 194020 }, { "epoch": 3.9497201017811703, "grad_norm": 0.08616857703840629, "learning_rate": 1.2849352533766085e-06, "loss": 0.0952, "step": 194030 }, { "epoch": 3.9499236641221374, "grad_norm": 1.3279449052286096, "learning_rate": 1.2844597238954954e-06, "loss": 0.0018, "step": 194040 }, { "epoch": 3.9501272264631044, "grad_norm": 31.091936046905946, "learning_rate": 1.2839842694545064e-06, "loss": 0.0559, "step": 194050 }, { "epoch": 3.950330788804071, "grad_norm": 0.7485242181872288, "learning_rate": 1.2835088900632403e-06, "loss": 0.0312, "step": 194060 }, { "epoch": 3.950534351145038, "grad_norm": 0.025799548526128823, "learning_rate": 1.283033585731297e-06, "loss": 0.0053, "step": 194070 }, { "epoch": 3.9507379134860052, "grad_norm": 0.1260881858163707, "learning_rate": 1.2825583564682815e-06, "loss": 0.0219, "step": 194080 }, { "epoch": 3.950941475826972, "grad_norm": 0.00823387781851027, "learning_rate": 1.282083202283786e-06, "loss": 0.0005, "step": 194090 }, { "epoch": 3.951145038167939, "grad_norm": 0.6581877892774668, "learning_rate": 1.2816081231874072e-06, "loss": 0.0231, "step": 194100 }, { "epoch": 3.951348600508906, "grad_norm": 0.5472568361553051, "learning_rate": 1.2811331191887449e-06, "loss": 0.0756, "step": 194110 }, { "epoch": 3.9515521628498727, "grad_norm": 0.324885213003306, "learning_rate": 1.280658190297388e-06, "loss": 0.0315, "step": 194120 }, { "epoch": 3.9517557251908397, "grad_norm": 0.02906163802485887, "learning_rate": 1.2801833365229289e-06, "loss": 0.0351, "step": 194130 }, { "epoch": 3.951959287531807, "grad_norm": 0.019779107705912773, "learning_rate": 1.2797085578749586e-06, "loss": 0.0039, "step": 194140 }, { "epoch": 3.9521628498727734, "grad_norm": 10.73590887577435, "learning_rate": 1.2792338543630656e-06, "loss": 0.0648, "step": 194150 }, { "epoch": 3.9523664122137405, "grad_norm": 0.008413020787840533, "learning_rate": 1.2787592259968372e-06, "loss": 0.0008, "step": 194160 }, { "epoch": 3.9525699745547076, "grad_norm": 0.01426191630766416, "learning_rate": 1.2782846727858594e-06, "loss": 0.2302, "step": 194170 }, { "epoch": 3.9527735368956742, "grad_norm": 13.241322833987319, "learning_rate": 1.2778101947397175e-06, "loss": 0.016, "step": 194180 }, { "epoch": 3.9529770992366413, "grad_norm": 0.03116444136125663, "learning_rate": 1.2773357918679896e-06, "loss": 0.0247, "step": 194190 }, { "epoch": 3.9531806615776084, "grad_norm": 2.7878716787788984, "learning_rate": 1.2768614641802624e-06, "loss": 0.0692, "step": 194200 }, { "epoch": 3.953384223918575, "grad_norm": 12.198762366361326, "learning_rate": 1.2763872116861137e-06, "loss": 0.1056, "step": 194210 }, { "epoch": 3.953587786259542, "grad_norm": 0.004601520437465431, "learning_rate": 1.2759130343951193e-06, "loss": 0.0016, "step": 194220 }, { "epoch": 3.953791348600509, "grad_norm": 17.43535075654332, "learning_rate": 1.2754389323168586e-06, "loss": 0.1373, "step": 194230 }, { "epoch": 3.953994910941476, "grad_norm": 0.003860915260970866, "learning_rate": 1.2749649054609076e-06, "loss": 0.0251, "step": 194240 }, { "epoch": 3.954198473282443, "grad_norm": 0.375120665245329, "learning_rate": 1.274490953836835e-06, "loss": 0.1152, "step": 194250 }, { "epoch": 3.9544020356234095, "grad_norm": 0.06748165608787894, "learning_rate": 1.2740170774542193e-06, "loss": 0.0605, "step": 194260 }, { "epoch": 3.9546055979643766, "grad_norm": 0.004721663116957306, "learning_rate": 1.2735432763226258e-06, "loss": 0.0915, "step": 194270 }, { "epoch": 3.9548091603053432, "grad_norm": 0.0026734085822725045, "learning_rate": 1.2730695504516244e-06, "loss": 0.1695, "step": 194280 }, { "epoch": 3.9550127226463103, "grad_norm": 0.3767771834612489, "learning_rate": 1.2725958998507865e-06, "loss": 0.0458, "step": 194290 }, { "epoch": 3.9552162849872774, "grad_norm": 0.11231952698257358, "learning_rate": 1.2721223245296738e-06, "loss": 0.0667, "step": 194300 }, { "epoch": 3.955419847328244, "grad_norm": 0.03786090196212664, "learning_rate": 1.271648824497852e-06, "loss": 0.047, "step": 194310 }, { "epoch": 3.955623409669211, "grad_norm": 0.03319033502982546, "learning_rate": 1.2711753997648845e-06, "loss": 0.126, "step": 194320 }, { "epoch": 3.955826972010178, "grad_norm": 0.09219853729185432, "learning_rate": 1.2707020503403323e-06, "loss": 0.0596, "step": 194330 }, { "epoch": 3.956030534351145, "grad_norm": 0.08936899400165171, "learning_rate": 1.2702287762337556e-06, "loss": 0.0131, "step": 194340 }, { "epoch": 3.956234096692112, "grad_norm": 0.02262639200511979, "learning_rate": 1.2697555774547127e-06, "loss": 0.0192, "step": 194350 }, { "epoch": 3.956437659033079, "grad_norm": 9.143860486156775, "learning_rate": 1.2692824540127607e-06, "loss": 0.0707, "step": 194360 }, { "epoch": 3.9566412213740456, "grad_norm": 28.530949054936503, "learning_rate": 1.2688094059174544e-06, "loss": 0.1294, "step": 194370 }, { "epoch": 3.9568447837150127, "grad_norm": 0.37389142775523954, "learning_rate": 1.2683364331783476e-06, "loss": 0.0281, "step": 194380 }, { "epoch": 3.9570483460559798, "grad_norm": 0.012369336043627749, "learning_rate": 1.2678635358049934e-06, "loss": 0.0482, "step": 194390 }, { "epoch": 3.9572519083969464, "grad_norm": 0.3119362143929963, "learning_rate": 1.2673907138069418e-06, "loss": 0.0321, "step": 194400 }, { "epoch": 3.9574554707379135, "grad_norm": 0.017530960823119588, "learning_rate": 1.266917967193742e-06, "loss": 0.0763, "step": 194410 }, { "epoch": 3.9576590330788806, "grad_norm": 0.15418148593535547, "learning_rate": 1.266445295974944e-06, "loss": 0.0536, "step": 194420 }, { "epoch": 3.957862595419847, "grad_norm": 18.61161467666703, "learning_rate": 1.2659727001600908e-06, "loss": 0.0689, "step": 194430 }, { "epoch": 3.9580661577608143, "grad_norm": 0.07245649645423993, "learning_rate": 1.2655001797587263e-06, "loss": 0.0882, "step": 194440 }, { "epoch": 3.9582697201017814, "grad_norm": 15.551276113713607, "learning_rate": 1.2650277347803985e-06, "loss": 0.0291, "step": 194450 }, { "epoch": 3.958473282442748, "grad_norm": 0.41021381354300407, "learning_rate": 1.2645553652346448e-06, "loss": 0.0654, "step": 194460 }, { "epoch": 3.958676844783715, "grad_norm": 0.05730460890545486, "learning_rate": 1.264083071131007e-06, "loss": 0.0353, "step": 194470 }, { "epoch": 3.958880407124682, "grad_norm": 0.03710044517092856, "learning_rate": 1.2636108524790236e-06, "loss": 0.0397, "step": 194480 }, { "epoch": 3.9590839694656488, "grad_norm": 0.03346335966310098, "learning_rate": 1.2631387092882309e-06, "loss": 0.0315, "step": 194490 }, { "epoch": 3.959287531806616, "grad_norm": 0.1926436140624044, "learning_rate": 1.2626666415681654e-06, "loss": 0.0654, "step": 194500 }, { "epoch": 3.959491094147583, "grad_norm": 0.012035906313064018, "learning_rate": 1.2621946493283605e-06, "loss": 0.0343, "step": 194510 }, { "epoch": 3.9596946564885496, "grad_norm": 0.3560003444902582, "learning_rate": 1.2617227325783488e-06, "loss": 0.047, "step": 194520 }, { "epoch": 3.9598982188295166, "grad_norm": 10.929467923376802, "learning_rate": 1.2612508913276617e-06, "loss": 0.0752, "step": 194530 }, { "epoch": 3.9601017811704837, "grad_norm": 0.09659421951781863, "learning_rate": 1.2607791255858281e-06, "loss": 0.0649, "step": 194540 }, { "epoch": 3.9603053435114504, "grad_norm": 1.1533804726608068, "learning_rate": 1.2603074353623762e-06, "loss": 0.0756, "step": 194550 }, { "epoch": 3.9605089058524174, "grad_norm": 0.34016750613181246, "learning_rate": 1.2598358206668327e-06, "loss": 0.0685, "step": 194560 }, { "epoch": 3.960712468193384, "grad_norm": 0.030719707479420447, "learning_rate": 1.2593642815087215e-06, "loss": 0.0163, "step": 194570 }, { "epoch": 3.960916030534351, "grad_norm": 0.004776237406669623, "learning_rate": 1.2588928178975684e-06, "loss": 0.0112, "step": 194580 }, { "epoch": 3.9611195928753182, "grad_norm": 0.4513274920350739, "learning_rate": 1.2584214298428903e-06, "loss": 0.0182, "step": 194590 }, { "epoch": 3.961323155216285, "grad_norm": 0.01531213496623478, "learning_rate": 1.2579501173542119e-06, "loss": 0.105, "step": 194600 }, { "epoch": 3.961526717557252, "grad_norm": 0.010939736137209957, "learning_rate": 1.2574788804410527e-06, "loss": 0.0255, "step": 194610 }, { "epoch": 3.9617302798982186, "grad_norm": 0.09423817236132864, "learning_rate": 1.2570077191129238e-06, "loss": 0.0755, "step": 194620 }, { "epoch": 3.9619338422391857, "grad_norm": 0.02551023044658028, "learning_rate": 1.2565366333793488e-06, "loss": 0.0186, "step": 194630 }, { "epoch": 3.9621374045801527, "grad_norm": 12.764587697141472, "learning_rate": 1.2560656232498358e-06, "loss": 0.0647, "step": 194640 }, { "epoch": 3.9623409669211194, "grad_norm": 0.08536110438211589, "learning_rate": 1.2555946887338989e-06, "loss": 0.0381, "step": 194650 }, { "epoch": 3.9625445292620864, "grad_norm": 0.0929473167679848, "learning_rate": 1.2551238298410529e-06, "loss": 0.0315, "step": 194660 }, { "epoch": 3.9627480916030535, "grad_norm": 0.06540641520694994, "learning_rate": 1.2546530465808026e-06, "loss": 0.0089, "step": 194670 }, { "epoch": 3.96295165394402, "grad_norm": 0.03752068705200781, "learning_rate": 1.2541823389626566e-06, "loss": 0.0281, "step": 194680 }, { "epoch": 3.9631552162849872, "grad_norm": 0.010779880398044353, "learning_rate": 1.2537117069961263e-06, "loss": 0.0075, "step": 194690 }, { "epoch": 3.9633587786259543, "grad_norm": 0.03410457836162634, "learning_rate": 1.2532411506907116e-06, "loss": 0.0049, "step": 194700 }, { "epoch": 3.963562340966921, "grad_norm": 0.0385244465426017, "learning_rate": 1.2527706700559173e-06, "loss": 0.0056, "step": 194710 }, { "epoch": 3.963765903307888, "grad_norm": 0.0389052879686306, "learning_rate": 1.2523002651012461e-06, "loss": 0.0402, "step": 194720 }, { "epoch": 3.963969465648855, "grad_norm": 115.87746984259135, "learning_rate": 1.251829935836198e-06, "loss": 0.0563, "step": 194730 }, { "epoch": 3.9641730279898217, "grad_norm": 0.12976190972248944, "learning_rate": 1.2513596822702718e-06, "loss": 0.0788, "step": 194740 }, { "epoch": 3.964376590330789, "grad_norm": 0.08619568903774133, "learning_rate": 1.2508895044129648e-06, "loss": 0.0017, "step": 194750 }, { "epoch": 3.964580152671756, "grad_norm": 0.00586861372247685, "learning_rate": 1.2504194022737731e-06, "loss": 0.0049, "step": 194760 }, { "epoch": 3.9647837150127225, "grad_norm": 0.006818504068582528, "learning_rate": 1.2499493758621916e-06, "loss": 0.0801, "step": 194770 }, { "epoch": 3.9649872773536896, "grad_norm": 7.980022660804825, "learning_rate": 1.2494794251877119e-06, "loss": 0.1616, "step": 194780 }, { "epoch": 3.9651908396946567, "grad_norm": 0.013527434791660409, "learning_rate": 1.2490095502598277e-06, "loss": 0.0521, "step": 194790 }, { "epoch": 3.9653944020356233, "grad_norm": 0.07296383386346256, "learning_rate": 1.2485397510880237e-06, "loss": 0.0742, "step": 194800 }, { "epoch": 3.9655979643765904, "grad_norm": 0.04766593074824631, "learning_rate": 1.2480700276817925e-06, "loss": 0.0753, "step": 194810 }, { "epoch": 3.9658015267175575, "grad_norm": 1.7436307650012368, "learning_rate": 1.2476003800506215e-06, "loss": 0.0348, "step": 194820 }, { "epoch": 3.966005089058524, "grad_norm": 33.63813742066601, "learning_rate": 1.2471308082039908e-06, "loss": 0.072, "step": 194830 }, { "epoch": 3.966208651399491, "grad_norm": 0.01541327302384914, "learning_rate": 1.24666131215139e-06, "loss": 0.0012, "step": 194840 }, { "epoch": 3.9664122137404583, "grad_norm": 5.527397709794105, "learning_rate": 1.246191891902297e-06, "loss": 0.0038, "step": 194850 }, { "epoch": 3.966615776081425, "grad_norm": 0.06450327815668973, "learning_rate": 1.2457225474661933e-06, "loss": 0.0151, "step": 194860 }, { "epoch": 3.966819338422392, "grad_norm": 0.0674664114308135, "learning_rate": 1.2452532788525578e-06, "loss": 0.0563, "step": 194870 }, { "epoch": 3.9670229007633586, "grad_norm": 0.03243035461618024, "learning_rate": 1.2447840860708693e-06, "loss": 0.0473, "step": 194880 }, { "epoch": 3.9672264631043257, "grad_norm": 0.06618818202076984, "learning_rate": 1.2443149691306022e-06, "loss": 0.0303, "step": 194890 }, { "epoch": 3.9674300254452928, "grad_norm": 0.20458651638550798, "learning_rate": 1.2438459280412319e-06, "loss": 0.0009, "step": 194900 }, { "epoch": 3.9676335877862594, "grad_norm": 0.03552318811695648, "learning_rate": 1.243376962812231e-06, "loss": 0.0487, "step": 194910 }, { "epoch": 3.9678371501272265, "grad_norm": 0.05325043218998892, "learning_rate": 1.2429080734530707e-06, "loss": 0.0461, "step": 194920 }, { "epoch": 3.968040712468193, "grad_norm": 0.2855233765071248, "learning_rate": 1.242439259973221e-06, "loss": 0.0571, "step": 194930 }, { "epoch": 3.96824427480916, "grad_norm": 9.127808415885086, "learning_rate": 1.2419705223821505e-06, "loss": 0.0597, "step": 194940 }, { "epoch": 3.9684478371501273, "grad_norm": 0.1797999019886553, "learning_rate": 1.2415018606893252e-06, "loss": 0.045, "step": 194950 }, { "epoch": 3.968651399491094, "grad_norm": 4.6797307401547155, "learning_rate": 1.241033274904211e-06, "loss": 0.1031, "step": 194960 }, { "epoch": 3.968854961832061, "grad_norm": 11.001441423208323, "learning_rate": 1.2405647650362706e-06, "loss": 0.1002, "step": 194970 }, { "epoch": 3.969058524173028, "grad_norm": 0.01595204523059938, "learning_rate": 1.2400963310949693e-06, "loss": 0.0022, "step": 194980 }, { "epoch": 3.9692620865139947, "grad_norm": 0.046491636439490516, "learning_rate": 1.239627973089762e-06, "loss": 0.0081, "step": 194990 }, { "epoch": 3.969465648854962, "grad_norm": 3.151191545521302, "learning_rate": 1.239159691030114e-06, "loss": 0.0112, "step": 195000 }, { "epoch": 3.969669211195929, "grad_norm": 23.793836297612515, "learning_rate": 1.2386914849254783e-06, "loss": 0.0222, "step": 195010 }, { "epoch": 3.9698727735368955, "grad_norm": 0.10894232907537728, "learning_rate": 1.2382233547853112e-06, "loss": 0.0745, "step": 195020 }, { "epoch": 3.9700763358778626, "grad_norm": 5.659030359894416, "learning_rate": 1.2377553006190717e-06, "loss": 0.0273, "step": 195030 }, { "epoch": 3.9702798982188297, "grad_norm": 0.009057677768669559, "learning_rate": 1.2372873224362082e-06, "loss": 0.004, "step": 195040 }, { "epoch": 3.9704834605597963, "grad_norm": 0.10285055617751296, "learning_rate": 1.236819420246172e-06, "loss": 0.0171, "step": 195050 }, { "epoch": 3.9706870229007634, "grad_norm": 0.08773248243625927, "learning_rate": 1.2363515940584176e-06, "loss": 0.0319, "step": 195060 }, { "epoch": 3.9708905852417304, "grad_norm": 13.331254977382676, "learning_rate": 1.235883843882389e-06, "loss": 0.0058, "step": 195070 }, { "epoch": 3.971094147582697, "grad_norm": 0.00608442196389645, "learning_rate": 1.2354161697275335e-06, "loss": 0.0007, "step": 195080 }, { "epoch": 3.971297709923664, "grad_norm": 0.03414273323845184, "learning_rate": 1.2349485716032977e-06, "loss": 0.0468, "step": 195090 }, { "epoch": 3.9715012722646312, "grad_norm": 25.51439605346625, "learning_rate": 1.234481049519125e-06, "loss": 0.0651, "step": 195100 }, { "epoch": 3.971704834605598, "grad_norm": 49.30332533484074, "learning_rate": 1.2340136034844574e-06, "loss": 0.0596, "step": 195110 }, { "epoch": 3.971908396946565, "grad_norm": 0.007409556557902887, "learning_rate": 1.2335462335087356e-06, "loss": 0.0431, "step": 195120 }, { "epoch": 3.972111959287532, "grad_norm": 0.03777097183887294, "learning_rate": 1.2330789396013987e-06, "loss": 0.0791, "step": 195130 }, { "epoch": 3.9723155216284987, "grad_norm": 41.17135742436008, "learning_rate": 1.2326117217718847e-06, "loss": 0.024, "step": 195140 }, { "epoch": 3.9725190839694657, "grad_norm": 0.04074683371426076, "learning_rate": 1.232144580029629e-06, "loss": 0.0016, "step": 195150 }, { "epoch": 3.972722646310433, "grad_norm": 0.0388636930594518, "learning_rate": 1.2316775143840682e-06, "loss": 0.0582, "step": 195160 }, { "epoch": 3.9729262086513994, "grad_norm": 0.010397126481222803, "learning_rate": 1.2312105248446304e-06, "loss": 0.037, "step": 195170 }, { "epoch": 3.9731297709923665, "grad_norm": 0.04871253452456529, "learning_rate": 1.2307436114207516e-06, "loss": 0.1235, "step": 195180 }, { "epoch": 3.9733333333333336, "grad_norm": 0.0486853358386473, "learning_rate": 1.230276774121862e-06, "loss": 0.0195, "step": 195190 }, { "epoch": 3.9735368956743002, "grad_norm": 0.05264298215028856, "learning_rate": 1.2298100129573854e-06, "loss": 0.0563, "step": 195200 }, { "epoch": 3.9737404580152673, "grad_norm": 24.835266526904967, "learning_rate": 1.229343327936754e-06, "loss": 0.084, "step": 195210 }, { "epoch": 3.973944020356234, "grad_norm": 0.055576679598681414, "learning_rate": 1.2288767190693895e-06, "loss": 0.0231, "step": 195220 }, { "epoch": 3.974147582697201, "grad_norm": 2.376089587660756, "learning_rate": 1.2284101863647157e-06, "loss": 0.0306, "step": 195230 }, { "epoch": 3.9743511450381677, "grad_norm": 0.09954382798000531, "learning_rate": 1.2279437298321585e-06, "loss": 0.0505, "step": 195240 }, { "epoch": 3.9745547073791347, "grad_norm": 0.05034083251553338, "learning_rate": 1.2274773494811348e-06, "loss": 0.0406, "step": 195250 }, { "epoch": 3.974758269720102, "grad_norm": 1.1177997783639342, "learning_rate": 1.2270110453210632e-06, "loss": 0.0011, "step": 195260 }, { "epoch": 3.9749618320610685, "grad_norm": 0.06311038926493051, "learning_rate": 1.2265448173613664e-06, "loss": 0.0752, "step": 195270 }, { "epoch": 3.9751653944020355, "grad_norm": 0.01506102819527499, "learning_rate": 1.2260786656114553e-06, "loss": 0.0525, "step": 195280 }, { "epoch": 3.9753689567430026, "grad_norm": 0.18624897782974148, "learning_rate": 1.225612590080747e-06, "loss": 0.0331, "step": 195290 }, { "epoch": 3.9755725190839692, "grad_norm": 0.024025043985272554, "learning_rate": 1.2251465907786536e-06, "loss": 0.0596, "step": 195300 }, { "epoch": 3.9757760814249363, "grad_norm": 0.31744034158238366, "learning_rate": 1.2246806677145862e-06, "loss": 0.0236, "step": 195310 }, { "epoch": 3.9759796437659034, "grad_norm": 0.2635558862567888, "learning_rate": 1.224214820897956e-06, "loss": 0.014, "step": 195320 }, { "epoch": 3.97618320610687, "grad_norm": 0.059826926480866946, "learning_rate": 1.2237490503381706e-06, "loss": 0.0543, "step": 195330 }, { "epoch": 3.976386768447837, "grad_norm": 0.06720337577294204, "learning_rate": 1.2232833560446367e-06, "loss": 0.0176, "step": 195340 }, { "epoch": 3.976590330788804, "grad_norm": 0.013768460584987053, "learning_rate": 1.2228177380267598e-06, "loss": 0.0665, "step": 195350 }, { "epoch": 3.976793893129771, "grad_norm": 0.02860209112850413, "learning_rate": 1.2223521962939433e-06, "loss": 0.0003, "step": 195360 }, { "epoch": 3.976997455470738, "grad_norm": 6.9136010760183675, "learning_rate": 1.2218867308555914e-06, "loss": 0.1196, "step": 195370 }, { "epoch": 3.977201017811705, "grad_norm": 0.009713747782631486, "learning_rate": 1.2214213417211006e-06, "loss": 0.082, "step": 195380 }, { "epoch": 3.9774045801526716, "grad_norm": 0.01865214779777351, "learning_rate": 1.220956028899874e-06, "loss": 0.0727, "step": 195390 }, { "epoch": 3.9776081424936387, "grad_norm": 0.009697571150245658, "learning_rate": 1.2204907924013088e-06, "loss": 0.0804, "step": 195400 }, { "epoch": 3.9778117048346058, "grad_norm": 95.51566939220159, "learning_rate": 1.2200256322347986e-06, "loss": 0.0406, "step": 195410 }, { "epoch": 3.9780152671755724, "grad_norm": 0.018402097187471638, "learning_rate": 1.2195605484097384e-06, "loss": 0.0523, "step": 195420 }, { "epoch": 3.9782188295165395, "grad_norm": 0.1775010939185829, "learning_rate": 1.2190955409355248e-06, "loss": 0.0564, "step": 195430 }, { "epoch": 3.9784223918575066, "grad_norm": 13.47283931091111, "learning_rate": 1.218630609821545e-06, "loss": 0.0263, "step": 195440 }, { "epoch": 3.978625954198473, "grad_norm": 55.89227767964506, "learning_rate": 1.2181657550771903e-06, "loss": 0.0727, "step": 195450 }, { "epoch": 3.9788295165394403, "grad_norm": 0.00021195056384466847, "learning_rate": 1.2177009767118492e-06, "loss": 0.0018, "step": 195460 }, { "epoch": 3.9790330788804074, "grad_norm": 0.05497840096495136, "learning_rate": 1.217236274734908e-06, "loss": 0.0052, "step": 195470 }, { "epoch": 3.979236641221374, "grad_norm": 0.06444621013002348, "learning_rate": 1.216771649155753e-06, "loss": 0.1019, "step": 195480 }, { "epoch": 3.979440203562341, "grad_norm": 32.18742942821623, "learning_rate": 1.2163070999837673e-06, "loss": 0.1593, "step": 195490 }, { "epoch": 3.979643765903308, "grad_norm": 0.025659563073412068, "learning_rate": 1.215842627228333e-06, "loss": 0.029, "step": 195500 }, { "epoch": 3.979847328244275, "grad_norm": 7.089508287631572, "learning_rate": 1.2153782308988303e-06, "loss": 0.1028, "step": 195510 }, { "epoch": 3.980050890585242, "grad_norm": 0.8655373055468175, "learning_rate": 1.2149139110046394e-06, "loss": 0.0363, "step": 195520 }, { "epoch": 3.9802544529262085, "grad_norm": 0.03260145735747373, "learning_rate": 1.2144496675551387e-06, "loss": 0.0285, "step": 195530 }, { "epoch": 3.9804580152671756, "grad_norm": 0.1055635719579138, "learning_rate": 1.2139855005596995e-06, "loss": 0.0015, "step": 195540 }, { "epoch": 3.9806615776081427, "grad_norm": 0.02349329092081349, "learning_rate": 1.213521410027701e-06, "loss": 0.0177, "step": 195550 }, { "epoch": 3.9808651399491093, "grad_norm": 0.06507309649221067, "learning_rate": 1.2130573959685161e-06, "loss": 0.0011, "step": 195560 }, { "epoch": 3.9810687022900764, "grad_norm": 11.207962895513772, "learning_rate": 1.2125934583915116e-06, "loss": 0.0842, "step": 195570 }, { "epoch": 3.981272264631043, "grad_norm": 0.010483287947801668, "learning_rate": 1.2121295973060632e-06, "loss": 0.083, "step": 195580 }, { "epoch": 3.98147582697201, "grad_norm": 0.018045410406470255, "learning_rate": 1.2116658127215353e-06, "loss": 0.0328, "step": 195590 }, { "epoch": 3.981679389312977, "grad_norm": 39.69264274870374, "learning_rate": 1.2112021046472938e-06, "loss": 0.046, "step": 195600 }, { "epoch": 3.981882951653944, "grad_norm": 0.025682397519534283, "learning_rate": 1.2107384730927086e-06, "loss": 0.0297, "step": 195610 }, { "epoch": 3.982086513994911, "grad_norm": 0.07251758513344038, "learning_rate": 1.2102749180671392e-06, "loss": 0.0093, "step": 195620 }, { "epoch": 3.982290076335878, "grad_norm": 18.564662609547767, "learning_rate": 1.2098114395799477e-06, "loss": 0.0934, "step": 195630 }, { "epoch": 3.9824936386768446, "grad_norm": 5.894957825558963, "learning_rate": 1.209348037640498e-06, "loss": 0.0451, "step": 195640 }, { "epoch": 3.9826972010178117, "grad_norm": 1.8013238309562423, "learning_rate": 1.2088847122581464e-06, "loss": 0.026, "step": 195650 }, { "epoch": 3.9829007633587787, "grad_norm": 0.02808735751223068, "learning_rate": 1.208421463442251e-06, "loss": 0.038, "step": 195660 }, { "epoch": 3.9831043256997454, "grad_norm": 0.0013114974553896598, "learning_rate": 1.207958291202167e-06, "loss": 0.029, "step": 195670 }, { "epoch": 3.9833078880407125, "grad_norm": 0.7478957608408299, "learning_rate": 1.2074951955472497e-06, "loss": 0.0842, "step": 195680 }, { "epoch": 3.9835114503816795, "grad_norm": 7.274719671891957, "learning_rate": 1.2070321764868515e-06, "loss": 0.116, "step": 195690 }, { "epoch": 3.983715012722646, "grad_norm": 0.012449212215546793, "learning_rate": 1.2065692340303242e-06, "loss": 0.0042, "step": 195700 }, { "epoch": 3.9839185750636132, "grad_norm": 0.012722706132316077, "learning_rate": 1.2061063681870166e-06, "loss": 0.0596, "step": 195710 }, { "epoch": 3.9841221374045803, "grad_norm": 1.2148137667463129e-05, "learning_rate": 1.2056435789662779e-06, "loss": 0.0224, "step": 195720 }, { "epoch": 3.984325699745547, "grad_norm": 0.4102549619722954, "learning_rate": 1.205180866377454e-06, "loss": 0.0641, "step": 195730 }, { "epoch": 3.984529262086514, "grad_norm": 0.3217536686336667, "learning_rate": 1.2047182304298916e-06, "loss": 0.0281, "step": 195740 }, { "epoch": 3.984732824427481, "grad_norm": 0.20169928122887656, "learning_rate": 1.20425567113293e-06, "loss": 0.0769, "step": 195750 }, { "epoch": 3.9849363867684477, "grad_norm": 0.01414416317759825, "learning_rate": 1.2037931884959152e-06, "loss": 0.0004, "step": 195760 }, { "epoch": 3.985139949109415, "grad_norm": 0.004055536574270403, "learning_rate": 1.2033307825281886e-06, "loss": 0.0114, "step": 195770 }, { "epoch": 3.985343511450382, "grad_norm": 0.023798444813344886, "learning_rate": 1.2028684532390833e-06, "loss": 0.106, "step": 195780 }, { "epoch": 3.9855470737913485, "grad_norm": 12.90394756680066, "learning_rate": 1.2024062006379422e-06, "loss": 0.0238, "step": 195790 }, { "epoch": 3.9857506361323156, "grad_norm": 17.354284824104685, "learning_rate": 1.2019440247341003e-06, "loss": 0.0973, "step": 195800 }, { "epoch": 3.9859541984732827, "grad_norm": 0.03703760673859334, "learning_rate": 1.2014819255368876e-06, "loss": 0.0473, "step": 195810 }, { "epoch": 3.9861577608142493, "grad_norm": 0.06914549202593591, "learning_rate": 1.201019903055643e-06, "loss": 0.0165, "step": 195820 }, { "epoch": 3.9863613231552164, "grad_norm": 0.0923087614858586, "learning_rate": 1.2005579572996934e-06, "loss": 0.0013, "step": 195830 }, { "epoch": 3.986564885496183, "grad_norm": 0.02893146864102879, "learning_rate": 1.200096088278369e-06, "loss": 0.0851, "step": 195840 }, { "epoch": 3.98676844783715, "grad_norm": 32.31080127307104, "learning_rate": 1.1996342960009983e-06, "loss": 0.0597, "step": 195850 }, { "epoch": 3.986972010178117, "grad_norm": 45.732063114084916, "learning_rate": 1.1991725804769083e-06, "loss": 0.0486, "step": 195860 }, { "epoch": 3.987175572519084, "grad_norm": 0.014716836967490812, "learning_rate": 1.198710941715423e-06, "loss": 0.066, "step": 195870 }, { "epoch": 3.987379134860051, "grad_norm": 0.040076912839709725, "learning_rate": 1.1982493797258665e-06, "loss": 0.0507, "step": 195880 }, { "epoch": 3.9875826972010175, "grad_norm": 0.042589326796231855, "learning_rate": 1.1977878945175603e-06, "loss": 0.0513, "step": 195890 }, { "epoch": 3.9877862595419846, "grad_norm": 0.012031400221709686, "learning_rate": 1.1973264860998246e-06, "loss": 0.0183, "step": 195900 }, { "epoch": 3.9879898218829517, "grad_norm": 9.435047593032198, "learning_rate": 1.1968651544819786e-06, "loss": 0.0909, "step": 195910 }, { "epoch": 3.9881933842239183, "grad_norm": 0.01927303352317891, "learning_rate": 1.196403899673339e-06, "loss": 0.0066, "step": 195920 }, { "epoch": 3.9883969465648854, "grad_norm": 0.15132264865440584, "learning_rate": 1.195942721683222e-06, "loss": 0.0225, "step": 195930 }, { "epoch": 3.9886005089058525, "grad_norm": 0.0003744868814200754, "learning_rate": 1.1954816205209408e-06, "loss": 0.0519, "step": 195940 }, { "epoch": 3.988804071246819, "grad_norm": 0.012373237342148274, "learning_rate": 1.1950205961958105e-06, "loss": 0.012, "step": 195950 }, { "epoch": 3.989007633587786, "grad_norm": 0.015331172163685606, "learning_rate": 1.1945596487171385e-06, "loss": 0.0199, "step": 195960 }, { "epoch": 3.9892111959287533, "grad_norm": 0.4469070010383674, "learning_rate": 1.1940987780942336e-06, "loss": 0.1, "step": 195970 }, { "epoch": 3.98941475826972, "grad_norm": 0.00741675797820006, "learning_rate": 1.1936379843364093e-06, "loss": 0.0543, "step": 195980 }, { "epoch": 3.989618320610687, "grad_norm": 0.02870455483964026, "learning_rate": 1.1931772674529673e-06, "loss": 0.0051, "step": 195990 }, { "epoch": 3.989821882951654, "grad_norm": 0.3428541177515298, "learning_rate": 1.1927166274532114e-06, "loss": 0.1135, "step": 196000 }, { "epoch": 3.9900254452926207, "grad_norm": 0.10541718158344036, "learning_rate": 1.1922560643464503e-06, "loss": 0.1509, "step": 196010 }, { "epoch": 3.990229007633588, "grad_norm": 0.42559290187016413, "learning_rate": 1.191795578141981e-06, "loss": 0.063, "step": 196020 }, { "epoch": 3.990432569974555, "grad_norm": 0.013039121157560027, "learning_rate": 1.1913351688491042e-06, "loss": 0.022, "step": 196030 }, { "epoch": 3.9906361323155215, "grad_norm": 31.925318999609267, "learning_rate": 1.19087483647712e-06, "loss": 0.0573, "step": 196040 }, { "epoch": 3.9908396946564886, "grad_norm": 10.7368945960616, "learning_rate": 1.190414581035324e-06, "loss": 0.0374, "step": 196050 }, { "epoch": 3.9910432569974557, "grad_norm": 0.10467425050113738, "learning_rate": 1.1899544025330129e-06, "loss": 0.1232, "step": 196060 }, { "epoch": 3.9912468193384223, "grad_norm": 23.151037258178054, "learning_rate": 1.1894943009794795e-06, "loss": 0.0617, "step": 196070 }, { "epoch": 3.9914503816793894, "grad_norm": 0.07651020201680475, "learning_rate": 1.1890342763840168e-06, "loss": 0.0734, "step": 196080 }, { "epoch": 3.9916539440203564, "grad_norm": 0.1174808287298259, "learning_rate": 1.1885743287559148e-06, "loss": 0.0063, "step": 196090 }, { "epoch": 3.991857506361323, "grad_norm": 0.1100898169310989, "learning_rate": 1.188114458104464e-06, "loss": 0.1054, "step": 196100 }, { "epoch": 3.99206106870229, "grad_norm": 0.03562007829753734, "learning_rate": 1.1876546644389525e-06, "loss": 0.0052, "step": 196110 }, { "epoch": 3.9922646310432572, "grad_norm": 0.16937082621819446, "learning_rate": 1.1871949477686623e-06, "loss": 0.0223, "step": 196120 }, { "epoch": 3.992468193384224, "grad_norm": 44.424969059473945, "learning_rate": 1.1867353081028827e-06, "loss": 0.014, "step": 196130 }, { "epoch": 3.992671755725191, "grad_norm": 5.75967138424022, "learning_rate": 1.1862757454508966e-06, "loss": 0.0265, "step": 196140 }, { "epoch": 3.992875318066158, "grad_norm": 0.012824693961387577, "learning_rate": 1.1858162598219808e-06, "loss": 0.0729, "step": 196150 }, { "epoch": 3.9930788804071247, "grad_norm": 0.10162327556198343, "learning_rate": 1.1853568512254194e-06, "loss": 0.0474, "step": 196160 }, { "epoch": 3.9932824427480917, "grad_norm": 0.018050226401624042, "learning_rate": 1.1848975196704915e-06, "loss": 0.0159, "step": 196170 }, { "epoch": 3.9934860050890584, "grad_norm": 35.79235454803512, "learning_rate": 1.1844382651664687e-06, "loss": 0.0193, "step": 196180 }, { "epoch": 3.9936895674300255, "grad_norm": 0.07455077585415888, "learning_rate": 1.1839790877226327e-06, "loss": 0.0662, "step": 196190 }, { "epoch": 3.993893129770992, "grad_norm": 0.05447381930759432, "learning_rate": 1.1835199873482529e-06, "loss": 0.0292, "step": 196200 }, { "epoch": 3.994096692111959, "grad_norm": 0.14696308522447443, "learning_rate": 1.1830609640526003e-06, "loss": 0.0047, "step": 196210 }, { "epoch": 3.9943002544529262, "grad_norm": 10.161130919763949, "learning_rate": 1.1826020178449515e-06, "loss": 0.0686, "step": 196220 }, { "epoch": 3.994503816793893, "grad_norm": 5.771756655421994, "learning_rate": 1.1821431487345697e-06, "loss": 0.0024, "step": 196230 }, { "epoch": 3.99470737913486, "grad_norm": 0.029886265764298068, "learning_rate": 1.1816843567307246e-06, "loss": 0.0012, "step": 196240 }, { "epoch": 3.994910941475827, "grad_norm": 0.06942787767982637, "learning_rate": 1.1812256418426815e-06, "loss": 0.0349, "step": 196250 }, { "epoch": 3.9951145038167937, "grad_norm": 0.019376232248080306, "learning_rate": 1.1807670040797053e-06, "loss": 0.0597, "step": 196260 }, { "epoch": 3.9953180661577608, "grad_norm": 0.13253340355763427, "learning_rate": 1.1803084434510586e-06, "loss": 0.0453, "step": 196270 }, { "epoch": 3.995521628498728, "grad_norm": 9.78151199863275, "learning_rate": 1.179849959966003e-06, "loss": 0.0658, "step": 196280 }, { "epoch": 3.9957251908396945, "grad_norm": 0.09258051203753885, "learning_rate": 1.1793915536337973e-06, "loss": 0.0571, "step": 196290 }, { "epoch": 3.9959287531806615, "grad_norm": 0.024067063734108343, "learning_rate": 1.1789332244637002e-06, "loss": 0.1086, "step": 196300 }, { "epoch": 3.9961323155216286, "grad_norm": 0.03896725697244221, "learning_rate": 1.178474972464968e-06, "loss": 0.0326, "step": 196310 }, { "epoch": 3.9963358778625953, "grad_norm": 0.06634987787523682, "learning_rate": 1.1780167976468577e-06, "loss": 0.0106, "step": 196320 }, { "epoch": 3.9965394402035623, "grad_norm": 0.01784332051698703, "learning_rate": 1.1775587000186172e-06, "loss": 0.0492, "step": 196330 }, { "epoch": 3.9967430025445294, "grad_norm": 0.027324910286272746, "learning_rate": 1.177100679589504e-06, "loss": 0.0539, "step": 196340 }, { "epoch": 3.996946564885496, "grad_norm": 0.00746811599954305, "learning_rate": 1.1766427363687682e-06, "loss": 0.1404, "step": 196350 }, { "epoch": 3.997150127226463, "grad_norm": 0.07699967360631808, "learning_rate": 1.1761848703656536e-06, "loss": 0.1355, "step": 196360 }, { "epoch": 3.99735368956743, "grad_norm": 0.05123116765912719, "learning_rate": 1.1757270815894118e-06, "loss": 0.1037, "step": 196370 }, { "epoch": 3.997557251908397, "grad_norm": 9.202795482015212, "learning_rate": 1.175269370049289e-06, "loss": 0.0668, "step": 196380 }, { "epoch": 3.997760814249364, "grad_norm": 0.014266290681282986, "learning_rate": 1.1748117357545258e-06, "loss": 0.0142, "step": 196390 }, { "epoch": 3.997964376590331, "grad_norm": 0.04667391348099016, "learning_rate": 1.1743541787143664e-06, "loss": 0.0728, "step": 196400 }, { "epoch": 3.9981679389312976, "grad_norm": 0.025788396068871722, "learning_rate": 1.1738966989380517e-06, "loss": 0.0007, "step": 196410 }, { "epoch": 3.9983715012722647, "grad_norm": 0.03766230015945051, "learning_rate": 1.1734392964348213e-06, "loss": 0.039, "step": 196420 }, { "epoch": 3.998575063613232, "grad_norm": 13.150724984193168, "learning_rate": 1.1729819712139128e-06, "loss": 0.0926, "step": 196430 }, { "epoch": 3.9987786259541984, "grad_norm": 0.018248800976757722, "learning_rate": 1.1725247232845626e-06, "loss": 0.0792, "step": 196440 }, { "epoch": 3.9989821882951655, "grad_norm": 0.10561219919705365, "learning_rate": 1.1720675526560054e-06, "loss": 0.0025, "step": 196450 }, { "epoch": 3.9991857506361326, "grad_norm": 0.36292467550667834, "learning_rate": 1.1716104593374739e-06, "loss": 0.0871, "step": 196460 }, { "epoch": 3.999389312977099, "grad_norm": 0.024257018550576267, "learning_rate": 1.1711534433382005e-06, "loss": 0.003, "step": 196470 }, { "epoch": 3.9995928753180663, "grad_norm": 0.03164319407987081, "learning_rate": 1.170696504667415e-06, "loss": 0.0909, "step": 196480 }, { "epoch": 3.999796437659033, "grad_norm": 18.11177724199058, "learning_rate": 1.1702396433343455e-06, "loss": 0.0843, "step": 196490 }, { "epoch": 4.0, "grad_norm": 8.638176085534303, "learning_rate": 1.169782859348219e-06, "loss": 0.0199, "step": 196500 }, { "epoch": 4.000203562340967, "grad_norm": 0.1070736051827461, "learning_rate": 1.1693261527182625e-06, "loss": 0.0006, "step": 196510 }, { "epoch": 4.000407124681934, "grad_norm": 0.012366848763466243, "learning_rate": 1.168869523453695e-06, "loss": 0.002, "step": 196520 }, { "epoch": 4.000610687022901, "grad_norm": 0.04961317969630878, "learning_rate": 1.1684129715637439e-06, "loss": 0.0013, "step": 196530 }, { "epoch": 4.000814249363867, "grad_norm": 0.06951828176333272, "learning_rate": 1.167956497057629e-06, "loss": 0.0003, "step": 196540 }, { "epoch": 4.001017811704835, "grad_norm": 0.09866920671931592, "learning_rate": 1.1675000999445657e-06, "loss": 0.0004, "step": 196550 }, { "epoch": 4.001221374045802, "grad_norm": 0.22017291003895068, "learning_rate": 1.1670437802337769e-06, "loss": 0.0007, "step": 196560 }, { "epoch": 4.001424936386768, "grad_norm": 0.10096623594051232, "learning_rate": 1.1665875379344738e-06, "loss": 0.0011, "step": 196570 }, { "epoch": 4.001628498727736, "grad_norm": 0.05350099597080048, "learning_rate": 1.1661313730558721e-06, "loss": 0.0012, "step": 196580 }, { "epoch": 4.001832061068702, "grad_norm": 0.012798336720077377, "learning_rate": 1.1656752856071879e-06, "loss": 0.001, "step": 196590 }, { "epoch": 4.002035623409669, "grad_norm": 0.020656832311630242, "learning_rate": 1.1652192755976283e-06, "loss": 0.0006, "step": 196600 }, { "epoch": 4.0022391857506365, "grad_norm": 0.0798102127422092, "learning_rate": 1.1647633430364048e-06, "loss": 0.0003, "step": 196610 }, { "epoch": 4.002442748091603, "grad_norm": 0.07026126103715112, "learning_rate": 1.164307487932725e-06, "loss": 0.0554, "step": 196620 }, { "epoch": 4.00264631043257, "grad_norm": 0.013624314073895567, "learning_rate": 1.1638517102957957e-06, "loss": 0.0002, "step": 196630 }, { "epoch": 4.002849872773537, "grad_norm": 0.0025376581693976447, "learning_rate": 1.1633960101348223e-06, "loss": 0.0003, "step": 196640 }, { "epoch": 4.003053435114504, "grad_norm": 0.015088610282318579, "learning_rate": 1.1629403874590084e-06, "loss": 0.0002, "step": 196650 }, { "epoch": 4.003256997455471, "grad_norm": 0.006092686970951553, "learning_rate": 1.1624848422775549e-06, "loss": 0.0006, "step": 196660 }, { "epoch": 4.003460559796437, "grad_norm": 0.006874215077322425, "learning_rate": 1.162029374599663e-06, "loss": 0.0003, "step": 196670 }, { "epoch": 4.003664122137405, "grad_norm": 0.014918482597093169, "learning_rate": 1.1615739844345308e-06, "loss": 0.0003, "step": 196680 }, { "epoch": 4.003867684478371, "grad_norm": 0.04746228581956954, "learning_rate": 1.1611186717913575e-06, "loss": 0.0002, "step": 196690 }, { "epoch": 4.004071246819338, "grad_norm": 0.050984248411758346, "learning_rate": 1.1606634366793345e-06, "loss": 0.0297, "step": 196700 }, { "epoch": 4.0042748091603055, "grad_norm": 0.015399794430855394, "learning_rate": 1.1602082791076596e-06, "loss": 0.0002, "step": 196710 }, { "epoch": 4.004478371501272, "grad_norm": 0.00786442014422025, "learning_rate": 1.1597531990855254e-06, "loss": 0.0008, "step": 196720 }, { "epoch": 4.004681933842239, "grad_norm": 0.02101741065927466, "learning_rate": 1.1592981966221189e-06, "loss": 0.0018, "step": 196730 }, { "epoch": 4.004885496183206, "grad_norm": 0.03464658013717062, "learning_rate": 1.1588432717266335e-06, "loss": 0.0002, "step": 196740 }, { "epoch": 4.005089058524173, "grad_norm": 0.04595105944825502, "learning_rate": 1.1583884244082567e-06, "loss": 0.0473, "step": 196750 }, { "epoch": 4.00529262086514, "grad_norm": 0.005454618897551378, "learning_rate": 1.157933654676171e-06, "loss": 0.0418, "step": 196760 }, { "epoch": 4.005496183206107, "grad_norm": 0.004696134713559367, "learning_rate": 1.1574789625395666e-06, "loss": 0.0026, "step": 196770 }, { "epoch": 4.005699745547074, "grad_norm": 0.015113342694821072, "learning_rate": 1.1570243480076226e-06, "loss": 0.0124, "step": 196780 }, { "epoch": 4.00590330788804, "grad_norm": 0.01893242909171778, "learning_rate": 1.1565698110895197e-06, "loss": 0.0002, "step": 196790 }, { "epoch": 4.006106870229008, "grad_norm": 0.05709981718606349, "learning_rate": 1.156115351794443e-06, "loss": 0.0003, "step": 196800 }, { "epoch": 4.0063104325699745, "grad_norm": 0.005066193212168117, "learning_rate": 1.1556609701315657e-06, "loss": 0.0298, "step": 196810 }, { "epoch": 4.006513994910941, "grad_norm": 9.271737459971158, "learning_rate": 1.1552066661100669e-06, "loss": 0.0057, "step": 196820 }, { "epoch": 4.006717557251909, "grad_norm": 0.01772701385303851, "learning_rate": 1.1547524397391213e-06, "loss": 0.0004, "step": 196830 }, { "epoch": 4.006921119592875, "grad_norm": 0.010267321668350095, "learning_rate": 1.1542982910279033e-06, "loss": 0.0269, "step": 196840 }, { "epoch": 4.007124681933842, "grad_norm": 0.008351657355656728, "learning_rate": 1.1538442199855843e-06, "loss": 0.0004, "step": 196850 }, { "epoch": 4.0073282442748095, "grad_norm": 0.005077982194537651, "learning_rate": 1.1533902266213348e-06, "loss": 0.0002, "step": 196860 }, { "epoch": 4.007531806615776, "grad_norm": 0.03242678526133495, "learning_rate": 1.1529363109443242e-06, "loss": 0.0005, "step": 196870 }, { "epoch": 4.007735368956743, "grad_norm": 0.007636965202859353, "learning_rate": 1.1524824729637201e-06, "loss": 0.0437, "step": 196880 }, { "epoch": 4.00793893129771, "grad_norm": 0.01871073841842686, "learning_rate": 1.1520287126886875e-06, "loss": 0.0003, "step": 196890 }, { "epoch": 4.008142493638677, "grad_norm": 0.03891476138038922, "learning_rate": 1.1515750301283917e-06, "loss": 0.0539, "step": 196900 }, { "epoch": 4.0083460559796436, "grad_norm": 0.007105930027186063, "learning_rate": 1.1511214252919945e-06, "loss": 0.0003, "step": 196910 }, { "epoch": 4.008549618320611, "grad_norm": 11.734374118869836, "learning_rate": 1.1506678981886576e-06, "loss": 0.0147, "step": 196920 }, { "epoch": 4.008753180661578, "grad_norm": 0.0061218079507124904, "learning_rate": 1.1502144488275418e-06, "loss": 0.0047, "step": 196930 }, { "epoch": 4.008956743002544, "grad_norm": 0.034474601179498236, "learning_rate": 1.1497610772178026e-06, "loss": 0.0002, "step": 196940 }, { "epoch": 4.009160305343512, "grad_norm": 0.037773346137291476, "learning_rate": 1.1493077833685956e-06, "loss": 0.0002, "step": 196950 }, { "epoch": 4.0093638676844785, "grad_norm": 0.022304719219134704, "learning_rate": 1.148854567289081e-06, "loss": 0.0215, "step": 196960 }, { "epoch": 4.009567430025445, "grad_norm": 0.011431905057648266, "learning_rate": 1.1484014289884065e-06, "loss": 0.0028, "step": 196970 }, { "epoch": 4.009770992366413, "grad_norm": 0.08250366110513596, "learning_rate": 1.1479483684757265e-06, "loss": 0.0002, "step": 196980 }, { "epoch": 4.009974554707379, "grad_norm": 4.6488435390947815, "learning_rate": 1.1474953857601906e-06, "loss": 0.0256, "step": 196990 }, { "epoch": 4.010178117048346, "grad_norm": 0.01687078189735724, "learning_rate": 1.1470424808509472e-06, "loss": 0.0001, "step": 197000 }, { "epoch": 4.010381679389313, "grad_norm": 0.015450563968789022, "learning_rate": 1.1465896537571435e-06, "loss": 0.0276, "step": 197010 }, { "epoch": 4.01058524173028, "grad_norm": 0.03637182099037768, "learning_rate": 1.1461369044879244e-06, "loss": 0.048, "step": 197020 }, { "epoch": 4.010788804071247, "grad_norm": 19.625473317350774, "learning_rate": 1.145684233052435e-06, "loss": 0.02, "step": 197030 }, { "epoch": 4.010992366412213, "grad_norm": 0.01877075211512541, "learning_rate": 1.1452316394598167e-06, "loss": 0.0017, "step": 197040 }, { "epoch": 4.011195928753181, "grad_norm": 0.004336775665171595, "learning_rate": 1.1447791237192102e-06, "loss": 0.0005, "step": 197050 }, { "epoch": 4.0113994910941475, "grad_norm": 0.009132217453981907, "learning_rate": 1.1443266858397562e-06, "loss": 0.0003, "step": 197060 }, { "epoch": 4.011603053435114, "grad_norm": 0.003305581861882168, "learning_rate": 1.143874325830588e-06, "loss": 0.0308, "step": 197070 }, { "epoch": 4.011806615776082, "grad_norm": 0.042669485642265435, "learning_rate": 1.1434220437008458e-06, "loss": 0.002, "step": 197080 }, { "epoch": 4.012010178117048, "grad_norm": 0.15441885498909602, "learning_rate": 1.1429698394596645e-06, "loss": 0.0008, "step": 197090 }, { "epoch": 4.012213740458015, "grad_norm": 0.0009844276692175576, "learning_rate": 1.142517713116172e-06, "loss": 0.0416, "step": 197100 }, { "epoch": 4.0124173027989825, "grad_norm": 0.0036517843453589415, "learning_rate": 1.1420656646795036e-06, "loss": 0.0001, "step": 197110 }, { "epoch": 4.012620865139949, "grad_norm": 0.013642992957235244, "learning_rate": 1.1416136941587902e-06, "loss": 0.0015, "step": 197120 }, { "epoch": 4.012824427480916, "grad_norm": 0.013663208294016859, "learning_rate": 1.1411618015631547e-06, "loss": 0.0005, "step": 197130 }, { "epoch": 4.013027989821883, "grad_norm": 0.007407140517401148, "learning_rate": 1.1407099869017296e-06, "loss": 0.0019, "step": 197140 }, { "epoch": 4.01323155216285, "grad_norm": 0.009568869366660725, "learning_rate": 1.1402582501836358e-06, "loss": 0.0002, "step": 197150 }, { "epoch": 4.0134351145038165, "grad_norm": 0.023788254320948048, "learning_rate": 1.1398065914179963e-06, "loss": 0.0001, "step": 197160 }, { "epoch": 4.013638676844784, "grad_norm": 0.009048129362709054, "learning_rate": 1.1393550106139372e-06, "loss": 0.0002, "step": 197170 }, { "epoch": 4.013842239185751, "grad_norm": 0.03300925239483617, "learning_rate": 1.1389035077805749e-06, "loss": 0.001, "step": 197180 }, { "epoch": 4.014045801526717, "grad_norm": 0.0027140607770100803, "learning_rate": 1.1384520829270284e-06, "loss": 0.0099, "step": 197190 }, { "epoch": 4.014249363867685, "grad_norm": 3.6257727895616076, "learning_rate": 1.138000736062418e-06, "loss": 0.0015, "step": 197200 }, { "epoch": 4.0144529262086515, "grad_norm": 0.006670117303318369, "learning_rate": 1.137549467195856e-06, "loss": 0.0004, "step": 197210 }, { "epoch": 4.014656488549618, "grad_norm": 0.01781697483858283, "learning_rate": 1.137098276336457e-06, "loss": 0.0459, "step": 197220 }, { "epoch": 4.014860050890586, "grad_norm": 0.012221365994096746, "learning_rate": 1.1366471634933335e-06, "loss": 0.0004, "step": 197230 }, { "epoch": 4.015063613231552, "grad_norm": 0.0013315169868712718, "learning_rate": 1.1361961286755968e-06, "loss": 0.0006, "step": 197240 }, { "epoch": 4.015267175572519, "grad_norm": 0.003023449437644861, "learning_rate": 1.1357451718923556e-06, "loss": 0.0824, "step": 197250 }, { "epoch": 4.015470737913486, "grad_norm": 0.011396260052597914, "learning_rate": 1.1352942931527178e-06, "loss": 0.0002, "step": 197260 }, { "epoch": 4.015674300254453, "grad_norm": 0.04426567199020223, "learning_rate": 1.1348434924657908e-06, "loss": 0.0002, "step": 197270 }, { "epoch": 4.01587786259542, "grad_norm": 21.82339781160132, "learning_rate": 1.1343927698406754e-06, "loss": 0.0026, "step": 197280 }, { "epoch": 4.016081424936387, "grad_norm": 0.0308198252071244, "learning_rate": 1.1339421252864779e-06, "loss": 0.0406, "step": 197290 }, { "epoch": 4.016284987277354, "grad_norm": 0.004553734732906015, "learning_rate": 1.1334915588123003e-06, "loss": 0.0034, "step": 197300 }, { "epoch": 4.0164885496183205, "grad_norm": 0.009446195231240432, "learning_rate": 1.1330410704272372e-06, "loss": 0.0001, "step": 197310 }, { "epoch": 4.016692111959287, "grad_norm": 0.008441683056900082, "learning_rate": 1.1325906601403925e-06, "loss": 0.0001, "step": 197320 }, { "epoch": 4.016895674300255, "grad_norm": 0.010600683319666284, "learning_rate": 1.1321403279608617e-06, "loss": 0.0113, "step": 197330 }, { "epoch": 4.017099236641221, "grad_norm": 0.08239099114408781, "learning_rate": 1.131690073897736e-06, "loss": 0.0002, "step": 197340 }, { "epoch": 4.017302798982188, "grad_norm": 0.7268557872905094, "learning_rate": 1.1312398979601142e-06, "loss": 0.0005, "step": 197350 }, { "epoch": 4.017506361323155, "grad_norm": 0.0034890431593886575, "learning_rate": 1.1307898001570845e-06, "loss": 0.0003, "step": 197360 }, { "epoch": 4.017709923664122, "grad_norm": 0.03157669738772249, "learning_rate": 1.1303397804977378e-06, "loss": 0.0002, "step": 197370 }, { "epoch": 4.017913486005089, "grad_norm": 0.010531082515286939, "learning_rate": 1.1298898389911638e-06, "loss": 0.0001, "step": 197380 }, { "epoch": 4.018117048346056, "grad_norm": 0.008876969240191835, "learning_rate": 1.1294399756464492e-06, "loss": 0.0002, "step": 197390 }, { "epoch": 4.018320610687023, "grad_norm": 0.0027391808577944673, "learning_rate": 1.128990190472679e-06, "loss": 0.0345, "step": 197400 }, { "epoch": 4.0185241730279895, "grad_norm": 36.5056097801986, "learning_rate": 1.1285404834789381e-06, "loss": 0.0699, "step": 197410 }, { "epoch": 4.018727735368957, "grad_norm": 0.04865781334665521, "learning_rate": 1.1280908546743086e-06, "loss": 0.0861, "step": 197420 }, { "epoch": 4.018931297709924, "grad_norm": 0.009223161344144953, "learning_rate": 1.1276413040678708e-06, "loss": 0.0001, "step": 197430 }, { "epoch": 4.01913486005089, "grad_norm": 0.0064982497995968395, "learning_rate": 1.1271918316687053e-06, "loss": 0.03, "step": 197440 }, { "epoch": 4.019338422391858, "grad_norm": 0.004518990380274039, "learning_rate": 1.1267424374858882e-06, "loss": 0.0001, "step": 197450 }, { "epoch": 4.019541984732824, "grad_norm": 0.015253444298958944, "learning_rate": 1.1262931215284967e-06, "loss": 0.0302, "step": 197460 }, { "epoch": 4.019745547073791, "grad_norm": 0.010790473449963058, "learning_rate": 1.1258438838056047e-06, "loss": 0.0003, "step": 197470 }, { "epoch": 4.019949109414759, "grad_norm": 0.017179867281524954, "learning_rate": 1.1253947243262853e-06, "loss": 0.0001, "step": 197480 }, { "epoch": 4.020152671755725, "grad_norm": 0.03637646913779361, "learning_rate": 1.1249456430996108e-06, "loss": 0.0004, "step": 197490 }, { "epoch": 4.020356234096692, "grad_norm": 8.55637848439351, "learning_rate": 1.124496640134648e-06, "loss": 0.0299, "step": 197500 }, { "epoch": 4.020559796437659, "grad_norm": 0.02055235664818261, "learning_rate": 1.12404771544047e-06, "loss": 0.0002, "step": 197510 }, { "epoch": 4.020763358778626, "grad_norm": 0.14106723900971657, "learning_rate": 1.123598869026139e-06, "loss": 0.0002, "step": 197520 }, { "epoch": 4.020966921119593, "grad_norm": 0.002728337703306588, "learning_rate": 1.1231501009007202e-06, "loss": 0.0002, "step": 197530 }, { "epoch": 4.02117048346056, "grad_norm": 9.794252153856158, "learning_rate": 1.1227014110732808e-06, "loss": 0.0108, "step": 197540 }, { "epoch": 4.021374045801527, "grad_norm": 0.013304102336936807, "learning_rate": 1.1222527995528792e-06, "loss": 0.0007, "step": 197550 }, { "epoch": 4.021577608142493, "grad_norm": 0.011644728546900634, "learning_rate": 1.1218042663485757e-06, "loss": 0.0384, "step": 197560 }, { "epoch": 4.021781170483461, "grad_norm": 0.0010123129627439941, "learning_rate": 1.121355811469433e-06, "loss": 0.0141, "step": 197570 }, { "epoch": 4.021984732824428, "grad_norm": 0.00359244486408601, "learning_rate": 1.1209074349245036e-06, "loss": 0.0075, "step": 197580 }, { "epoch": 4.022188295165394, "grad_norm": 0.06122854552108055, "learning_rate": 1.1204591367228451e-06, "loss": 0.0002, "step": 197590 }, { "epoch": 4.022391857506362, "grad_norm": 8.976250929087147, "learning_rate": 1.1200109168735106e-06, "loss": 0.028, "step": 197600 }, { "epoch": 4.022595419847328, "grad_norm": 0.03973424688241561, "learning_rate": 1.1195627753855537e-06, "loss": 0.0001, "step": 197610 }, { "epoch": 4.022798982188295, "grad_norm": 0.014575602371885467, "learning_rate": 1.1191147122680241e-06, "loss": 0.0514, "step": 197620 }, { "epoch": 4.023002544529262, "grad_norm": 0.0027428474948508557, "learning_rate": 1.1186667275299712e-06, "loss": 0.0001, "step": 197630 }, { "epoch": 4.023206106870229, "grad_norm": 0.06424990111958882, "learning_rate": 1.1182188211804446e-06, "loss": 0.0005, "step": 197640 }, { "epoch": 4.023409669211196, "grad_norm": 0.004583730341018641, "learning_rate": 1.117770993228486e-06, "loss": 0.0007, "step": 197650 }, { "epoch": 4.023613231552162, "grad_norm": 0.01319421313585459, "learning_rate": 1.1173232436831437e-06, "loss": 0.0001, "step": 197660 }, { "epoch": 4.02381679389313, "grad_norm": 0.00764252452802543, "learning_rate": 1.1168755725534608e-06, "loss": 0.0001, "step": 197670 }, { "epoch": 4.024020356234097, "grad_norm": 0.04746148827998843, "learning_rate": 1.116427979848474e-06, "loss": 0.0173, "step": 197680 }, { "epoch": 4.024223918575063, "grad_norm": 0.019811813155677226, "learning_rate": 1.1159804655772277e-06, "loss": 0.0047, "step": 197690 }, { "epoch": 4.024427480916031, "grad_norm": 0.017911994081510514, "learning_rate": 1.1155330297487598e-06, "loss": 0.02, "step": 197700 }, { "epoch": 4.024631043256997, "grad_norm": 0.11394058429239108, "learning_rate": 1.115085672372103e-06, "loss": 0.0006, "step": 197710 }, { "epoch": 4.024834605597964, "grad_norm": 0.01750040032806138, "learning_rate": 1.1146383934562976e-06, "loss": 0.0005, "step": 197720 }, { "epoch": 4.0250381679389315, "grad_norm": 0.02837036431947572, "learning_rate": 1.1141911930103728e-06, "loss": 0.0562, "step": 197730 }, { "epoch": 4.025241730279898, "grad_norm": 0.0024851281278783946, "learning_rate": 1.11374407104336e-06, "loss": 0.0211, "step": 197740 }, { "epoch": 4.025445292620865, "grad_norm": 5.319421570751507, "learning_rate": 1.113297027564294e-06, "loss": 0.0137, "step": 197750 }, { "epoch": 4.025648854961832, "grad_norm": 0.02016179578353271, "learning_rate": 1.1128500625821993e-06, "loss": 0.0002, "step": 197760 }, { "epoch": 4.025852417302799, "grad_norm": 0.0016193610956362922, "learning_rate": 1.1124031761061027e-06, "loss": 0.0001, "step": 197770 }, { "epoch": 4.026055979643766, "grad_norm": 0.0015404725495975578, "learning_rate": 1.111956368145034e-06, "loss": 0.0001, "step": 197780 }, { "epoch": 4.026259541984733, "grad_norm": 0.02067107253149466, "learning_rate": 1.1115096387080127e-06, "loss": 0.0008, "step": 197790 }, { "epoch": 4.0264631043257, "grad_norm": 0.0164852813464038, "learning_rate": 1.1110629878040629e-06, "loss": 0.002, "step": 197800 }, { "epoch": 4.026666666666666, "grad_norm": 0.007839230161058863, "learning_rate": 1.1106164154422045e-06, "loss": 0.0213, "step": 197810 }, { "epoch": 4.026870229007634, "grad_norm": 0.014036193494080924, "learning_rate": 1.1101699216314576e-06, "loss": 0.0002, "step": 197820 }, { "epoch": 4.0270737913486006, "grad_norm": 0.01466516808086009, "learning_rate": 1.1097235063808388e-06, "loss": 0.0003, "step": 197830 }, { "epoch": 4.027277353689567, "grad_norm": 0.06126920304076925, "learning_rate": 1.1092771696993642e-06, "loss": 0.0008, "step": 197840 }, { "epoch": 4.027480916030535, "grad_norm": 0.01624987921427506, "learning_rate": 1.108830911596049e-06, "loss": 0.0001, "step": 197850 }, { "epoch": 4.027684478371501, "grad_norm": 0.00909340266658264, "learning_rate": 1.1083847320799052e-06, "loss": 0.0002, "step": 197860 }, { "epoch": 4.027888040712468, "grad_norm": 0.013802560018079316, "learning_rate": 1.1079386311599438e-06, "loss": 0.0001, "step": 197870 }, { "epoch": 4.0280916030534355, "grad_norm": 0.0072403393432308215, "learning_rate": 1.1074926088451765e-06, "loss": 0.0003, "step": 197880 }, { "epoch": 4.028295165394402, "grad_norm": 0.018432828072217754, "learning_rate": 1.107046665144606e-06, "loss": 0.0485, "step": 197890 }, { "epoch": 4.028498727735369, "grad_norm": 0.04434843249088797, "learning_rate": 1.106600800067244e-06, "loss": 0.0002, "step": 197900 }, { "epoch": 4.028702290076336, "grad_norm": 0.010259171384267705, "learning_rate": 1.106155013622095e-06, "loss": 0.0002, "step": 197910 }, { "epoch": 4.028905852417303, "grad_norm": 0.01654636412724817, "learning_rate": 1.1057093058181595e-06, "loss": 0.0396, "step": 197920 }, { "epoch": 4.02910941475827, "grad_norm": 0.001076944632034398, "learning_rate": 1.1052636766644387e-06, "loss": 0.0244, "step": 197930 }, { "epoch": 4.029312977099237, "grad_norm": 0.05910954064855588, "learning_rate": 1.104818126169937e-06, "loss": 0.0002, "step": 197940 }, { "epoch": 4.029516539440204, "grad_norm": 0.05087852162697973, "learning_rate": 1.104372654343649e-06, "loss": 0.0002, "step": 197950 }, { "epoch": 4.02972010178117, "grad_norm": 0.5080863852440571, "learning_rate": 1.1039272611945728e-06, "loss": 0.0061, "step": 197960 }, { "epoch": 4.029923664122137, "grad_norm": 71.63587142535108, "learning_rate": 1.1034819467317037e-06, "loss": 0.0017, "step": 197970 }, { "epoch": 4.0301272264631045, "grad_norm": 0.0035086342954195157, "learning_rate": 1.103036710964035e-06, "loss": 0.0012, "step": 197980 }, { "epoch": 4.030330788804071, "grad_norm": 0.02291583209718022, "learning_rate": 1.1025915539005593e-06, "loss": 0.0001, "step": 197990 }, { "epoch": 4.030534351145038, "grad_norm": 0.004107765214389246, "learning_rate": 1.1021464755502675e-06, "loss": 0.0001, "step": 198000 }, { "epoch": 4.030737913486005, "grad_norm": 30.93837699374792, "learning_rate": 1.1017014759221473e-06, "loss": 0.0648, "step": 198010 }, { "epoch": 4.030941475826972, "grad_norm": 0.010559878518045383, "learning_rate": 1.1012565550251874e-06, "loss": 0.0106, "step": 198020 }, { "epoch": 4.031145038167939, "grad_norm": 0.00021368289599073528, "learning_rate": 1.1008117128683727e-06, "loss": 0.0001, "step": 198030 }, { "epoch": 4.031348600508906, "grad_norm": 0.013258851072201204, "learning_rate": 1.1003669494606889e-06, "loss": 0.0192, "step": 198040 }, { "epoch": 4.031552162849873, "grad_norm": 0.007897020141130905, "learning_rate": 1.099922264811114e-06, "loss": 0.065, "step": 198050 }, { "epoch": 4.031755725190839, "grad_norm": 0.010226726427018228, "learning_rate": 1.0994776589286343e-06, "loss": 0.0001, "step": 198060 }, { "epoch": 4.031959287531807, "grad_norm": 0.03186608816979044, "learning_rate": 1.099033131822228e-06, "loss": 0.0001, "step": 198070 }, { "epoch": 4.0321628498727735, "grad_norm": 0.0016226923739681251, "learning_rate": 1.0985886835008697e-06, "loss": 0.0004, "step": 198080 }, { "epoch": 4.03236641221374, "grad_norm": 0.00497757082634401, "learning_rate": 1.0981443139735403e-06, "loss": 0.0001, "step": 198090 }, { "epoch": 4.032569974554708, "grad_norm": 0.01850087538051118, "learning_rate": 1.0977000232492114e-06, "loss": 0.0002, "step": 198100 }, { "epoch": 4.032773536895674, "grad_norm": 0.000342106193327141, "learning_rate": 1.0972558113368542e-06, "loss": 0.0001, "step": 198110 }, { "epoch": 4.032977099236641, "grad_norm": 5.982331222315015, "learning_rate": 1.0968116782454463e-06, "loss": 0.0096, "step": 198120 }, { "epoch": 4.0331806615776085, "grad_norm": 0.00612650648640655, "learning_rate": 1.0963676239839516e-06, "loss": 0.0002, "step": 198130 }, { "epoch": 4.033384223918575, "grad_norm": 0.02451280362500417, "learning_rate": 1.09592364856134e-06, "loss": 0.0413, "step": 198140 }, { "epoch": 4.033587786259542, "grad_norm": 0.0025894841602643382, "learning_rate": 1.0954797519865806e-06, "loss": 0.0002, "step": 198150 }, { "epoch": 4.033791348600509, "grad_norm": 0.04445380436218269, "learning_rate": 1.095035934268636e-06, "loss": 0.0001, "step": 198160 }, { "epoch": 4.033994910941476, "grad_norm": 0.005349148267468282, "learning_rate": 1.09459219541647e-06, "loss": 0.0001, "step": 198170 }, { "epoch": 4.0341984732824425, "grad_norm": 47.321611298528246, "learning_rate": 1.0941485354390447e-06, "loss": 0.0382, "step": 198180 }, { "epoch": 4.03440203562341, "grad_norm": 0.0055071823997530816, "learning_rate": 1.0937049543453205e-06, "loss": 0.0135, "step": 198190 }, { "epoch": 4.034605597964377, "grad_norm": 0.00405222891917984, "learning_rate": 1.0932614521442558e-06, "loss": 0.0001, "step": 198200 }, { "epoch": 4.034809160305343, "grad_norm": 0.022108828568499234, "learning_rate": 1.0928180288448081e-06, "loss": 0.0001, "step": 198210 }, { "epoch": 4.035012722646311, "grad_norm": 0.021507901427066916, "learning_rate": 1.092374684455933e-06, "loss": 0.0001, "step": 198220 }, { "epoch": 4.0352162849872775, "grad_norm": 0.0014269880814300633, "learning_rate": 1.0919314189865837e-06, "loss": 0.002, "step": 198230 }, { "epoch": 4.035419847328244, "grad_norm": 0.2070589529251915, "learning_rate": 1.0914882324457133e-06, "loss": 0.0191, "step": 198240 }, { "epoch": 4.035623409669212, "grad_norm": 0.004768215412584666, "learning_rate": 1.0910451248422733e-06, "loss": 0.0001, "step": 198250 }, { "epoch": 4.035826972010178, "grad_norm": 0.04032647942072797, "learning_rate": 1.0906020961852088e-06, "loss": 0.0028, "step": 198260 }, { "epoch": 4.036030534351145, "grad_norm": 0.007044601721833251, "learning_rate": 1.0901591464834716e-06, "loss": 0.0614, "step": 198270 }, { "epoch": 4.0362340966921115, "grad_norm": 0.009709656486979242, "learning_rate": 1.0897162757460077e-06, "loss": 0.0001, "step": 198280 }, { "epoch": 4.036437659033079, "grad_norm": 0.021178275629971884, "learning_rate": 1.0892734839817575e-06, "loss": 0.0272, "step": 198290 }, { "epoch": 4.036641221374046, "grad_norm": 0.3252941090859918, "learning_rate": 1.0888307711996677e-06, "loss": 0.0001, "step": 198300 }, { "epoch": 4.036844783715012, "grad_norm": 0.0036161339031717697, "learning_rate": 1.088388137408679e-06, "loss": 0.0003, "step": 198310 }, { "epoch": 4.03704834605598, "grad_norm": 0.08543720911691381, "learning_rate": 1.0879455826177277e-06, "loss": 0.0001, "step": 198320 }, { "epoch": 4.0372519083969465, "grad_norm": 0.012319200463295312, "learning_rate": 1.0875031068357566e-06, "loss": 0.0221, "step": 198330 }, { "epoch": 4.037455470737913, "grad_norm": 0.005914069269236949, "learning_rate": 1.0870607100716979e-06, "loss": 0.0001, "step": 198340 }, { "epoch": 4.037659033078881, "grad_norm": 0.001719722532389168, "learning_rate": 1.0866183923344881e-06, "loss": 0.0001, "step": 198350 }, { "epoch": 4.037862595419847, "grad_norm": 30.257904755307866, "learning_rate": 1.0861761536330606e-06, "loss": 0.0814, "step": 198360 }, { "epoch": 4.038066157760814, "grad_norm": 0.004354409955675566, "learning_rate": 1.085733993976346e-06, "loss": 0.0135, "step": 198370 }, { "epoch": 4.038269720101781, "grad_norm": 0.00458408464987203, "learning_rate": 1.085291913373276e-06, "loss": 0.0002, "step": 198380 }, { "epoch": 4.038473282442748, "grad_norm": 0.021861772327411443, "learning_rate": 1.084849911832777e-06, "loss": 0.0006, "step": 198390 }, { "epoch": 4.038676844783715, "grad_norm": 0.01067030007258399, "learning_rate": 1.0844079893637772e-06, "loss": 0.033, "step": 198400 }, { "epoch": 4.038880407124682, "grad_norm": 0.012349933161228357, "learning_rate": 1.083966145975201e-06, "loss": 0.0002, "step": 198410 }, { "epoch": 4.039083969465649, "grad_norm": 7.496560082304157, "learning_rate": 1.0835243816759722e-06, "loss": 0.0894, "step": 198420 }, { "epoch": 4.0392875318066155, "grad_norm": 0.0121288046580367, "learning_rate": 1.0830826964750129e-06, "loss": 0.0004, "step": 198430 }, { "epoch": 4.039491094147583, "grad_norm": 0.023429198042609713, "learning_rate": 1.082641090381244e-06, "loss": 0.0004, "step": 198440 }, { "epoch": 4.03969465648855, "grad_norm": 0.15177970436646349, "learning_rate": 1.082199563403583e-06, "loss": 0.0383, "step": 198450 }, { "epoch": 4.039898218829516, "grad_norm": 0.001969741222897574, "learning_rate": 1.08175811555095e-06, "loss": 0.0196, "step": 198460 }, { "epoch": 4.040101781170484, "grad_norm": 0.0018999841061784737, "learning_rate": 1.0813167468322566e-06, "loss": 0.033, "step": 198470 }, { "epoch": 4.04030534351145, "grad_norm": 0.0027462760145463023, "learning_rate": 1.0808754572564177e-06, "loss": 0.0662, "step": 198480 }, { "epoch": 4.040508905852417, "grad_norm": 0.0020109682286447183, "learning_rate": 1.0804342468323492e-06, "loss": 0.0001, "step": 198490 }, { "epoch": 4.040712468193385, "grad_norm": 0.014786927768797746, "learning_rate": 1.0799931155689585e-06, "loss": 0.0045, "step": 198500 }, { "epoch": 4.040916030534351, "grad_norm": 0.0010856658910406059, "learning_rate": 1.0795520634751544e-06, "loss": 0.0018, "step": 198510 }, { "epoch": 4.041119592875318, "grad_norm": 0.08311046822834803, "learning_rate": 1.0791110905598485e-06, "loss": 0.0003, "step": 198520 }, { "epoch": 4.041323155216285, "grad_norm": 0.004661289544877184, "learning_rate": 1.0786701968319425e-06, "loss": 0.0001, "step": 198530 }, { "epoch": 4.041526717557252, "grad_norm": 0.001090336547833657, "learning_rate": 1.0782293823003426e-06, "loss": 0.0001, "step": 198540 }, { "epoch": 4.041730279898219, "grad_norm": 0.008233371435495468, "learning_rate": 1.0777886469739517e-06, "loss": 0.0004, "step": 198550 }, { "epoch": 4.041933842239186, "grad_norm": 0.004506564355619074, "learning_rate": 1.0773479908616708e-06, "loss": 0.0552, "step": 198560 }, { "epoch": 4.042137404580153, "grad_norm": 0.00900036860685798, "learning_rate": 1.0769074139723994e-06, "loss": 0.0001, "step": 198570 }, { "epoch": 4.042340966921119, "grad_norm": 0.004122217030383471, "learning_rate": 1.076466916315036e-06, "loss": 0.0547, "step": 198580 }, { "epoch": 4.042544529262086, "grad_norm": 0.012608626526839216, "learning_rate": 1.0760264978984763e-06, "loss": 0.0002, "step": 198590 }, { "epoch": 4.042748091603054, "grad_norm": 0.0048117977080198325, "learning_rate": 1.0755861587316157e-06, "loss": 0.0214, "step": 198600 }, { "epoch": 4.04295165394402, "grad_norm": 0.01678900267719156, "learning_rate": 1.0751458988233477e-06, "loss": 0.0003, "step": 198610 }, { "epoch": 4.043155216284987, "grad_norm": 0.012739142870238251, "learning_rate": 1.0747057181825642e-06, "loss": 0.05, "step": 198620 }, { "epoch": 4.043358778625954, "grad_norm": 0.007652708835561981, "learning_rate": 1.074265616818152e-06, "loss": 0.0002, "step": 198630 }, { "epoch": 4.043562340966921, "grad_norm": 0.003776943611054103, "learning_rate": 1.073825594739003e-06, "loss": 0.0135, "step": 198640 }, { "epoch": 4.043765903307888, "grad_norm": 0.02007810953352915, "learning_rate": 1.0733856519540048e-06, "loss": 0.0001, "step": 198650 }, { "epoch": 4.043969465648855, "grad_norm": 0.001665736951324492, "learning_rate": 1.072945788472038e-06, "loss": 0.03, "step": 198660 }, { "epoch": 4.044173027989822, "grad_norm": 0.006612876040979769, "learning_rate": 1.0725060043019902e-06, "loss": 0.0001, "step": 198670 }, { "epoch": 4.0443765903307884, "grad_norm": 0.15032956164505593, "learning_rate": 1.0720662994527437e-06, "loss": 0.0155, "step": 198680 }, { "epoch": 4.044580152671756, "grad_norm": 0.013200290626487459, "learning_rate": 1.0716266739331744e-06, "loss": 0.0307, "step": 198690 }, { "epoch": 4.044783715012723, "grad_norm": 5.788968802147909, "learning_rate": 1.0711871277521674e-06, "loss": 0.0028, "step": 198700 }, { "epoch": 4.044987277353689, "grad_norm": 0.023274808982192707, "learning_rate": 1.0707476609185946e-06, "loss": 0.0011, "step": 198710 }, { "epoch": 4.045190839694657, "grad_norm": 0.5433453614710105, "learning_rate": 1.0703082734413327e-06, "loss": 0.0266, "step": 198720 }, { "epoch": 4.045394402035623, "grad_norm": 0.09115506446200665, "learning_rate": 1.0698689653292594e-06, "loss": 0.0008, "step": 198730 }, { "epoch": 4.04559796437659, "grad_norm": 0.009394907315621029, "learning_rate": 1.0694297365912425e-06, "loss": 0.0266, "step": 198740 }, { "epoch": 4.0458015267175576, "grad_norm": 0.03712273435219181, "learning_rate": 1.068990587236155e-06, "loss": 0.0545, "step": 198750 }, { "epoch": 4.046005089058524, "grad_norm": 0.00413843469793238, "learning_rate": 1.0685515172728662e-06, "loss": 0.0001, "step": 198760 }, { "epoch": 4.046208651399491, "grad_norm": 0.0016683131293359678, "learning_rate": 1.0681125267102421e-06, "loss": 0.0066, "step": 198770 }, { "epoch": 4.046412213740458, "grad_norm": 0.004557198114761381, "learning_rate": 1.0676736155571504e-06, "loss": 0.0001, "step": 198780 }, { "epoch": 4.046615776081425, "grad_norm": 0.03985628473578593, "learning_rate": 1.0672347838224546e-06, "loss": 0.0002, "step": 198790 }, { "epoch": 4.046819338422392, "grad_norm": 0.008080602625532804, "learning_rate": 1.0667960315150178e-06, "loss": 0.0003, "step": 198800 }, { "epoch": 4.047022900763359, "grad_norm": 0.0018544171817487245, "learning_rate": 1.066357358643701e-06, "loss": 0.038, "step": 198810 }, { "epoch": 4.047226463104326, "grad_norm": 0.004099434746651837, "learning_rate": 1.0659187652173641e-06, "loss": 0.0008, "step": 198820 }, { "epoch": 4.047430025445292, "grad_norm": 0.0009325322960594833, "learning_rate": 1.0654802512448658e-06, "loss": 0.0001, "step": 198830 }, { "epoch": 4.04763358778626, "grad_norm": 0.0077809939828168495, "learning_rate": 1.065041816735059e-06, "loss": 0.0, "step": 198840 }, { "epoch": 4.047837150127227, "grad_norm": 0.0004700368157049589, "learning_rate": 1.064603461696802e-06, "loss": 0.1056, "step": 198850 }, { "epoch": 4.048040712468193, "grad_norm": 0.017018038144813213, "learning_rate": 1.0641651861389484e-06, "loss": 0.0003, "step": 198860 }, { "epoch": 4.048244274809161, "grad_norm": 0.01877743502395059, "learning_rate": 1.0637269900703456e-06, "loss": 0.0002, "step": 198870 }, { "epoch": 4.048447837150127, "grad_norm": 0.000990946322216086, "learning_rate": 1.063288873499847e-06, "loss": 0.0286, "step": 198880 }, { "epoch": 4.048651399491094, "grad_norm": 0.015434756263392312, "learning_rate": 1.0628508364363016e-06, "loss": 0.0007, "step": 198890 }, { "epoch": 4.0488549618320615, "grad_norm": 0.0022635915895335675, "learning_rate": 1.0624128788885536e-06, "loss": 0.0001, "step": 198900 }, { "epoch": 4.049058524173028, "grad_norm": 0.015947330862644084, "learning_rate": 1.0619750008654488e-06, "loss": 0.0002, "step": 198910 }, { "epoch": 4.049262086513995, "grad_norm": 0.016569856518907154, "learning_rate": 1.0615372023758308e-06, "loss": 0.0326, "step": 198920 }, { "epoch": 4.049465648854961, "grad_norm": 11.333210953625622, "learning_rate": 1.0610994834285422e-06, "loss": 0.0436, "step": 198930 }, { "epoch": 4.049669211195929, "grad_norm": 0.004410170177987374, "learning_rate": 1.0606618440324222e-06, "loss": 0.0443, "step": 198940 }, { "epoch": 4.049872773536896, "grad_norm": 0.016665596846651348, "learning_rate": 1.0602242841963107e-06, "loss": 0.0253, "step": 198950 }, { "epoch": 4.050076335877862, "grad_norm": 0.004454919382584535, "learning_rate": 1.0597868039290438e-06, "loss": 0.0001, "step": 198960 }, { "epoch": 4.05027989821883, "grad_norm": 0.012931910431119043, "learning_rate": 1.0593494032394574e-06, "loss": 0.0012, "step": 198970 }, { "epoch": 4.050483460559796, "grad_norm": 0.031828650211771205, "learning_rate": 1.0589120821363851e-06, "loss": 0.0005, "step": 198980 }, { "epoch": 4.050687022900763, "grad_norm": 0.04260198601883769, "learning_rate": 1.05847484062866e-06, "loss": 0.0861, "step": 198990 }, { "epoch": 4.0508905852417305, "grad_norm": 0.13155253791214178, "learning_rate": 1.0580376787251112e-06, "loss": 0.0011, "step": 199000 }, { "epoch": 4.051094147582697, "grad_norm": 0.005069263867282227, "learning_rate": 1.0576005964345693e-06, "loss": 0.0001, "step": 199010 }, { "epoch": 4.051297709923664, "grad_norm": 0.00313809912248835, "learning_rate": 1.0571635937658626e-06, "loss": 0.0046, "step": 199020 }, { "epoch": 4.051501272264631, "grad_norm": 0.0030758638586329983, "learning_rate": 1.0567266707278124e-06, "loss": 0.0032, "step": 199030 }, { "epoch": 4.051704834605598, "grad_norm": 0.004802447760175863, "learning_rate": 1.0562898273292487e-06, "loss": 0.0128, "step": 199040 }, { "epoch": 4.051908396946565, "grad_norm": 0.0018888642435527208, "learning_rate": 1.0558530635789904e-06, "loss": 0.0002, "step": 199050 }, { "epoch": 4.052111959287532, "grad_norm": 1.0815804683341614, "learning_rate": 1.055416379485858e-06, "loss": 0.0476, "step": 199060 }, { "epoch": 4.052315521628499, "grad_norm": 6.8821734386195, "learning_rate": 1.0549797750586749e-06, "loss": 0.0339, "step": 199070 }, { "epoch": 4.052519083969465, "grad_norm": 0.038385968097499924, "learning_rate": 1.0545432503062554e-06, "loss": 0.014, "step": 199080 }, { "epoch": 4.052722646310433, "grad_norm": 0.0027819988164278185, "learning_rate": 1.0541068052374149e-06, "loss": 0.024, "step": 199090 }, { "epoch": 4.0529262086513995, "grad_norm": 0.034221713686136745, "learning_rate": 1.0536704398609726e-06, "loss": 0.0023, "step": 199100 }, { "epoch": 4.053129770992366, "grad_norm": 18.84916648012341, "learning_rate": 1.0532341541857378e-06, "loss": 0.0172, "step": 199110 }, { "epoch": 4.053333333333334, "grad_norm": 0.015094451817292987, "learning_rate": 1.052797948220522e-06, "loss": 0.0569, "step": 199120 }, { "epoch": 4.0535368956743, "grad_norm": 6.558623104779325, "learning_rate": 1.0523618219741355e-06, "loss": 0.007, "step": 199130 }, { "epoch": 4.053740458015267, "grad_norm": 0.06031871648450424, "learning_rate": 1.0519257754553868e-06, "loss": 0.0577, "step": 199140 }, { "epoch": 4.0539440203562345, "grad_norm": 0.004259614742284645, "learning_rate": 1.0514898086730823e-06, "loss": 0.0005, "step": 199150 }, { "epoch": 4.054147582697201, "grad_norm": 0.004050995432889272, "learning_rate": 1.0510539216360267e-06, "loss": 0.0001, "step": 199160 }, { "epoch": 4.054351145038168, "grad_norm": 0.08770136070183834, "learning_rate": 1.050618114353023e-06, "loss": 0.0273, "step": 199170 }, { "epoch": 4.054554707379135, "grad_norm": 0.005590610412295901, "learning_rate": 1.0501823868328737e-06, "loss": 0.0002, "step": 199180 }, { "epoch": 4.054758269720102, "grad_norm": 0.003975163891740741, "learning_rate": 1.049746739084378e-06, "loss": 0.0301, "step": 199190 }, { "epoch": 4.0549618320610685, "grad_norm": 0.09227461782452823, "learning_rate": 1.0493111711163362e-06, "loss": 0.0346, "step": 199200 }, { "epoch": 4.055165394402036, "grad_norm": 0.010570991072050326, "learning_rate": 1.0488756829375413e-06, "loss": 0.069, "step": 199210 }, { "epoch": 4.055368956743003, "grad_norm": 19.436458006120567, "learning_rate": 1.0484402745567922e-06, "loss": 0.0173, "step": 199220 }, { "epoch": 4.055572519083969, "grad_norm": 0.007656988477773112, "learning_rate": 1.0480049459828828e-06, "loss": 0.0233, "step": 199230 }, { "epoch": 4.055776081424936, "grad_norm": 0.011836747334843285, "learning_rate": 1.0475696972246008e-06, "loss": 0.0001, "step": 199240 }, { "epoch": 4.0559796437659035, "grad_norm": 0.025138967113858778, "learning_rate": 1.0471345282907409e-06, "loss": 0.0005, "step": 199250 }, { "epoch": 4.05618320610687, "grad_norm": 0.0037756805973976817, "learning_rate": 1.046699439190092e-06, "loss": 0.0071, "step": 199260 }, { "epoch": 4.056386768447837, "grad_norm": 0.01699082668518298, "learning_rate": 1.0462644299314367e-06, "loss": 0.0272, "step": 199270 }, { "epoch": 4.056590330788804, "grad_norm": 0.0016264168620027273, "learning_rate": 1.0458295005235664e-06, "loss": 0.0003, "step": 199280 }, { "epoch": 4.056793893129771, "grad_norm": 0.014339951272945033, "learning_rate": 1.045394650975261e-06, "loss": 0.0001, "step": 199290 }, { "epoch": 4.0569974554707375, "grad_norm": 0.0014920105480598252, "learning_rate": 1.0449598812953026e-06, "loss": 0.0001, "step": 199300 }, { "epoch": 4.057201017811705, "grad_norm": 0.07170395580590923, "learning_rate": 1.0445251914924765e-06, "loss": 0.0244, "step": 199310 }, { "epoch": 4.057404580152672, "grad_norm": 0.03591683646991076, "learning_rate": 1.0440905815755569e-06, "loss": 0.0194, "step": 199320 }, { "epoch": 4.057608142493638, "grad_norm": 0.0033961418742051333, "learning_rate": 1.0436560515533234e-06, "loss": 0.0001, "step": 199330 }, { "epoch": 4.057811704834606, "grad_norm": 0.0032882554992818944, "learning_rate": 1.043221601434552e-06, "loss": 0.0011, "step": 199340 }, { "epoch": 4.0580152671755725, "grad_norm": 0.01776920308414759, "learning_rate": 1.0427872312280163e-06, "loss": 0.0129, "step": 199350 }, { "epoch": 4.058218829516539, "grad_norm": 0.007884545724761478, "learning_rate": 1.0423529409424893e-06, "loss": 0.0557, "step": 199360 }, { "epoch": 4.058422391857507, "grad_norm": 0.1735590799829227, "learning_rate": 1.0419187305867424e-06, "loss": 0.0002, "step": 199370 }, { "epoch": 4.058625954198473, "grad_norm": 0.005223639254636028, "learning_rate": 1.0414846001695444e-06, "loss": 0.1273, "step": 199380 }, { "epoch": 4.05882951653944, "grad_norm": 0.0042790422506449564, "learning_rate": 1.0410505496996638e-06, "loss": 0.0243, "step": 199390 }, { "epoch": 4.059033078880407, "grad_norm": 0.41872459119115696, "learning_rate": 1.0406165791858659e-06, "loss": 0.0344, "step": 199400 }, { "epoch": 4.059236641221374, "grad_norm": 0.007821554664346565, "learning_rate": 1.0401826886369176e-06, "loss": 0.0005, "step": 199410 }, { "epoch": 4.059440203562341, "grad_norm": 0.028258586991445143, "learning_rate": 1.0397488780615772e-06, "loss": 0.0004, "step": 199420 }, { "epoch": 4.059643765903308, "grad_norm": 0.0033578502460747716, "learning_rate": 1.0393151474686103e-06, "loss": 0.0026, "step": 199430 }, { "epoch": 4.059847328244275, "grad_norm": 0.022410131297953707, "learning_rate": 1.0388814968667766e-06, "loss": 0.0002, "step": 199440 }, { "epoch": 4.0600508905852415, "grad_norm": 0.014250792816962137, "learning_rate": 1.0384479262648317e-06, "loss": 0.002, "step": 199450 }, { "epoch": 4.060254452926209, "grad_norm": 0.00035751534431459586, "learning_rate": 1.0380144356715322e-06, "loss": 0.0426, "step": 199460 }, { "epoch": 4.060458015267176, "grad_norm": 20.318485761354623, "learning_rate": 1.0375810250956365e-06, "loss": 0.0316, "step": 199470 }, { "epoch": 4.060661577608142, "grad_norm": 0.0007226778480724612, "learning_rate": 1.0371476945458947e-06, "loss": 0.0001, "step": 199480 }, { "epoch": 4.06086513994911, "grad_norm": 0.027603256675030115, "learning_rate": 1.0367144440310588e-06, "loss": 0.057, "step": 199490 }, { "epoch": 4.061068702290076, "grad_norm": 0.14308605944386682, "learning_rate": 1.0362812735598797e-06, "loss": 0.0256, "step": 199500 }, { "epoch": 4.061272264631043, "grad_norm": 0.009598542569908072, "learning_rate": 1.0358481831411054e-06, "loss": 0.0001, "step": 199510 }, { "epoch": 4.061475826972011, "grad_norm": 0.008148185293163582, "learning_rate": 1.0354151727834833e-06, "loss": 0.0002, "step": 199520 }, { "epoch": 4.061679389312977, "grad_norm": 0.0771607195331708, "learning_rate": 1.0349822424957574e-06, "loss": 0.0243, "step": 199530 }, { "epoch": 4.061882951653944, "grad_norm": 0.03229819387547021, "learning_rate": 1.0345493922866722e-06, "loss": 0.0002, "step": 199540 }, { "epoch": 4.062086513994911, "grad_norm": 0.006557587913600637, "learning_rate": 1.0341166221649695e-06, "loss": 0.0002, "step": 199550 }, { "epoch": 4.062290076335878, "grad_norm": 0.031330394198323376, "learning_rate": 1.03368393213939e-06, "loss": 0.0228, "step": 199560 }, { "epoch": 4.062493638676845, "grad_norm": 22.041143198458663, "learning_rate": 1.033251322218673e-06, "loss": 0.0518, "step": 199570 }, { "epoch": 4.062697201017811, "grad_norm": 0.004800381507995366, "learning_rate": 1.0328187924115524e-06, "loss": 0.0003, "step": 199580 }, { "epoch": 4.062900763358779, "grad_norm": 1.8514200139438157, "learning_rate": 1.0323863427267673e-06, "loss": 0.0546, "step": 199590 }, { "epoch": 4.0631043256997454, "grad_norm": 0.03226220097426968, "learning_rate": 1.031953973173051e-06, "loss": 0.0018, "step": 199600 }, { "epoch": 4.063307888040712, "grad_norm": 0.001095036222460918, "learning_rate": 1.031521683759133e-06, "loss": 0.0005, "step": 199610 }, { "epoch": 4.06351145038168, "grad_norm": 0.056637834197017525, "learning_rate": 1.0310894744937466e-06, "loss": 0.0002, "step": 199620 }, { "epoch": 4.063715012722646, "grad_norm": 0.01643057733811588, "learning_rate": 1.0306573453856222e-06, "loss": 0.0002, "step": 199630 }, { "epoch": 4.063918575063613, "grad_norm": 0.027385028991211747, "learning_rate": 1.0302252964434823e-06, "loss": 0.0002, "step": 199640 }, { "epoch": 4.06412213740458, "grad_norm": 0.04533938456220146, "learning_rate": 1.029793327676058e-06, "loss": 0.0001, "step": 199650 }, { "epoch": 4.064325699745547, "grad_norm": 0.0330355329297754, "learning_rate": 1.0293614390920698e-06, "loss": 0.0002, "step": 199660 }, { "epoch": 4.064529262086514, "grad_norm": 0.04088841516353972, "learning_rate": 1.0289296307002406e-06, "loss": 0.0247, "step": 199670 }, { "epoch": 4.064732824427481, "grad_norm": 0.0068893635967695105, "learning_rate": 1.028497902509294e-06, "loss": 0.0004, "step": 199680 }, { "epoch": 4.064936386768448, "grad_norm": 0.07340100298464693, "learning_rate": 1.0280662545279469e-06, "loss": 0.0059, "step": 199690 }, { "epoch": 4.0651399491094145, "grad_norm": 0.0030335113661472232, "learning_rate": 1.0276346867649162e-06, "loss": 0.0001, "step": 199700 }, { "epoch": 4.065343511450382, "grad_norm": 0.003318803507008719, "learning_rate": 1.0272031992289216e-06, "loss": 0.0046, "step": 199710 }, { "epoch": 4.065547073791349, "grad_norm": 0.016971428020980634, "learning_rate": 1.0267717919286745e-06, "loss": 0.0007, "step": 199720 }, { "epoch": 4.065750636132315, "grad_norm": 8.374006780227376, "learning_rate": 1.026340464872888e-06, "loss": 0.0213, "step": 199730 }, { "epoch": 4.065954198473283, "grad_norm": 0.007639088312698979, "learning_rate": 1.0259092180702745e-06, "loss": 0.0003, "step": 199740 }, { "epoch": 4.066157760814249, "grad_norm": 0.004851267704442895, "learning_rate": 1.0254780515295426e-06, "loss": 0.0974, "step": 199750 }, { "epoch": 4.066361323155216, "grad_norm": 0.07800638471985924, "learning_rate": 1.0250469652594004e-06, "loss": 0.0075, "step": 199760 }, { "epoch": 4.066564885496184, "grad_norm": 0.03939801267829681, "learning_rate": 1.0246159592685544e-06, "loss": 0.0001, "step": 199770 }, { "epoch": 4.06676844783715, "grad_norm": 0.003531418572531333, "learning_rate": 1.0241850335657112e-06, "loss": 0.0002, "step": 199780 }, { "epoch": 4.066972010178117, "grad_norm": 0.11491482138348479, "learning_rate": 1.0237541881595692e-06, "loss": 0.002, "step": 199790 }, { "epoch": 4.067175572519084, "grad_norm": 0.002622048592512637, "learning_rate": 1.023323423058834e-06, "loss": 0.0001, "step": 199800 }, { "epoch": 4.067379134860051, "grad_norm": 0.008375919755319974, "learning_rate": 1.0228927382722054e-06, "loss": 0.007, "step": 199810 }, { "epoch": 4.067582697201018, "grad_norm": 0.01314942167798486, "learning_rate": 1.0224621338083779e-06, "loss": 0.0001, "step": 199820 }, { "epoch": 4.067786259541985, "grad_norm": 38.166461771372255, "learning_rate": 1.0220316096760518e-06, "loss": 0.0153, "step": 199830 }, { "epoch": 4.067989821882952, "grad_norm": 0.0069080462716127845, "learning_rate": 1.0216011658839225e-06, "loss": 0.0001, "step": 199840 }, { "epoch": 4.068193384223918, "grad_norm": 0.006209040278514095, "learning_rate": 1.021170802440679e-06, "loss": 0.0002, "step": 199850 }, { "epoch": 4.068396946564886, "grad_norm": 0.012103795553286072, "learning_rate": 1.0207405193550186e-06, "loss": 0.0462, "step": 199860 }, { "epoch": 4.068600508905853, "grad_norm": 24.585556145702533, "learning_rate": 1.020310316635627e-06, "loss": 0.0392, "step": 199870 }, { "epoch": 4.068804071246819, "grad_norm": 0.007193529304240845, "learning_rate": 1.0198801942911946e-06, "loss": 0.0568, "step": 199880 }, { "epoch": 4.069007633587786, "grad_norm": 0.003356171567357988, "learning_rate": 1.0194501523304085e-06, "loss": 0.0449, "step": 199890 }, { "epoch": 4.069211195928753, "grad_norm": 0.027373977531850485, "learning_rate": 1.0190201907619528e-06, "loss": 0.0001, "step": 199900 }, { "epoch": 4.06941475826972, "grad_norm": 8.851127902024427, "learning_rate": 1.018590309594512e-06, "loss": 0.0397, "step": 199910 }, { "epoch": 4.069618320610687, "grad_norm": 0.02313217027171758, "learning_rate": 1.018160508836768e-06, "loss": 0.0017, "step": 199920 }, { "epoch": 4.069821882951654, "grad_norm": 0.18143084662364678, "learning_rate": 1.0177307884974007e-06, "loss": 0.0165, "step": 199930 }, { "epoch": 4.070025445292621, "grad_norm": 10.063201950046778, "learning_rate": 1.0173011485850898e-06, "loss": 0.0305, "step": 199940 }, { "epoch": 4.070229007633587, "grad_norm": 0.07050140152749024, "learning_rate": 1.0168715891085118e-06, "loss": 0.0004, "step": 199950 }, { "epoch": 4.070432569974555, "grad_norm": 0.0018120833685465751, "learning_rate": 1.0164421100763422e-06, "loss": 0.0002, "step": 199960 }, { "epoch": 4.070636132315522, "grad_norm": 0.01115835293714322, "learning_rate": 1.016012711497255e-06, "loss": 0.0001, "step": 199970 }, { "epoch": 4.070839694656488, "grad_norm": 0.01187847659799687, "learning_rate": 1.0155833933799219e-06, "loss": 0.0006, "step": 199980 }, { "epoch": 4.071043256997456, "grad_norm": 0.1668982198798063, "learning_rate": 1.0151541557330148e-06, "loss": 0.0322, "step": 199990 }, { "epoch": 4.071246819338422, "grad_norm": 0.017223958088473636, "learning_rate": 1.0147249985652031e-06, "loss": 0.0007, "step": 200000 }, { "epoch": 4.071450381679389, "grad_norm": 0.0036683224367409797, "learning_rate": 1.01429592188515e-06, "loss": 0.0192, "step": 200010 }, { "epoch": 4.0716539440203565, "grad_norm": 0.0005592684504963294, "learning_rate": 1.0138669257015276e-06, "loss": 0.0235, "step": 200020 }, { "epoch": 4.071857506361323, "grad_norm": 0.010157352542680236, "learning_rate": 1.0134380100229952e-06, "loss": 0.0002, "step": 200030 }, { "epoch": 4.07206106870229, "grad_norm": 0.021594948285321384, "learning_rate": 1.0130091748582154e-06, "loss": 0.0156, "step": 200040 }, { "epoch": 4.072264631043257, "grad_norm": 66.31967180751937, "learning_rate": 1.012580420215853e-06, "loss": 0.0743, "step": 200050 }, { "epoch": 4.072468193384224, "grad_norm": 0.018277431639944396, "learning_rate": 1.0121517461045638e-06, "loss": 0.0001, "step": 200060 }, { "epoch": 4.072671755725191, "grad_norm": 0.0034012570482772416, "learning_rate": 1.0117231525330045e-06, "loss": 0.0006, "step": 200070 }, { "epoch": 4.072875318066158, "grad_norm": 0.024120116211624774, "learning_rate": 1.011294639509836e-06, "loss": 0.0003, "step": 200080 }, { "epoch": 4.073078880407125, "grad_norm": 7.899911242475909, "learning_rate": 1.0108662070437087e-06, "loss": 0.0022, "step": 200090 }, { "epoch": 4.073282442748091, "grad_norm": 0.003033856329255884, "learning_rate": 1.010437855143276e-06, "loss": 0.0001, "step": 200100 }, { "epoch": 4.073486005089059, "grad_norm": 0.004701120220915817, "learning_rate": 1.0100095838171892e-06, "loss": 0.0001, "step": 200110 }, { "epoch": 4.0736895674300255, "grad_norm": 0.015994227664015596, "learning_rate": 1.0095813930740982e-06, "loss": 0.0276, "step": 200120 }, { "epoch": 4.073893129770992, "grad_norm": 0.0028607237955213536, "learning_rate": 1.0091532829226503e-06, "loss": 0.033, "step": 200130 }, { "epoch": 4.07409669211196, "grad_norm": 0.0013195416843316313, "learning_rate": 1.0087252533714925e-06, "loss": 0.0001, "step": 200140 }, { "epoch": 4.074300254452926, "grad_norm": 0.0021388383081341034, "learning_rate": 1.0082973044292698e-06, "loss": 0.0025, "step": 200150 }, { "epoch": 4.074503816793893, "grad_norm": 0.0061212084557601566, "learning_rate": 1.0078694361046215e-06, "loss": 0.0283, "step": 200160 }, { "epoch": 4.0747073791348605, "grad_norm": 0.008839835806783703, "learning_rate": 1.0074416484061938e-06, "loss": 0.0001, "step": 200170 }, { "epoch": 4.074910941475827, "grad_norm": 0.014583234573124824, "learning_rate": 1.007013941342625e-06, "loss": 0.011, "step": 200180 }, { "epoch": 4.075114503816794, "grad_norm": 0.0041285262457926904, "learning_rate": 1.0065863149225503e-06, "loss": 0.0003, "step": 200190 }, { "epoch": 4.07531806615776, "grad_norm": 0.28187846911230396, "learning_rate": 1.0061587691546098e-06, "loss": 0.0084, "step": 200200 }, { "epoch": 4.075521628498728, "grad_norm": 0.0025489306692114636, "learning_rate": 1.005731304047438e-06, "loss": 0.0048, "step": 200210 }, { "epoch": 4.0757251908396945, "grad_norm": 0.10157790040471713, "learning_rate": 1.0053039196096647e-06, "loss": 0.0002, "step": 200220 }, { "epoch": 4.075928753180661, "grad_norm": 0.001512878439171203, "learning_rate": 1.0048766158499263e-06, "loss": 0.0091, "step": 200230 }, { "epoch": 4.076132315521629, "grad_norm": 0.00223802732611274, "learning_rate": 1.0044493927768496e-06, "loss": 0.0001, "step": 200240 }, { "epoch": 4.076335877862595, "grad_norm": 0.007960202778884823, "learning_rate": 1.0040222503990614e-06, "loss": 0.0213, "step": 200250 }, { "epoch": 4.076539440203562, "grad_norm": 0.02378357538887993, "learning_rate": 1.0035951887251937e-06, "loss": 0.0484, "step": 200260 }, { "epoch": 4.0767430025445295, "grad_norm": 0.00550738850406229, "learning_rate": 1.0031682077638676e-06, "loss": 0.0002, "step": 200270 }, { "epoch": 4.076946564885496, "grad_norm": 0.22362283723397736, "learning_rate": 1.0027413075237058e-06, "loss": 0.0002, "step": 200280 }, { "epoch": 4.077150127226463, "grad_norm": 0.019117919959413174, "learning_rate": 1.002314488013334e-06, "loss": 0.0247, "step": 200290 }, { "epoch": 4.07735368956743, "grad_norm": 0.0023650544858284093, "learning_rate": 1.0018877492413693e-06, "loss": 0.0001, "step": 200300 }, { "epoch": 4.077557251908397, "grad_norm": 19.99859806429485, "learning_rate": 1.0014610912164307e-06, "loss": 0.0846, "step": 200310 }, { "epoch": 4.0777608142493635, "grad_norm": 0.015778872385311036, "learning_rate": 1.0010345139471355e-06, "loss": 0.0007, "step": 200320 }, { "epoch": 4.077964376590331, "grad_norm": 0.0007297178051145295, "learning_rate": 1.000608017442099e-06, "loss": 0.0031, "step": 200330 }, { "epoch": 4.078167938931298, "grad_norm": 0.007248666168145684, "learning_rate": 1.0001816017099348e-06, "loss": 0.0012, "step": 200340 }, { "epoch": 4.078371501272264, "grad_norm": 0.001910963492828733, "learning_rate": 9.997552667592548e-07, "loss": 0.0001, "step": 200350 }, { "epoch": 4.078575063613232, "grad_norm": 0.009558439565555842, "learning_rate": 9.993290125986699e-07, "loss": 0.0001, "step": 200360 }, { "epoch": 4.0787786259541985, "grad_norm": 0.05762443705555183, "learning_rate": 9.989028392367877e-07, "loss": 0.0001, "step": 200370 }, { "epoch": 4.078982188295165, "grad_norm": 0.045746436976323315, "learning_rate": 9.984767466822164e-07, "loss": 0.0273, "step": 200380 }, { "epoch": 4.079185750636133, "grad_norm": 5.32415354537753, "learning_rate": 9.980507349435626e-07, "loss": 0.0392, "step": 200390 }, { "epoch": 4.079389312977099, "grad_norm": 0.0007177032619273394, "learning_rate": 9.976248040294257e-07, "loss": 0.0002, "step": 200400 }, { "epoch": 4.079592875318066, "grad_norm": 0.0026111214037179745, "learning_rate": 9.971989539484123e-07, "loss": 0.0002, "step": 200410 }, { "epoch": 4.079796437659033, "grad_norm": 0.02439603621605674, "learning_rate": 9.967731847091227e-07, "loss": 0.0002, "step": 200420 }, { "epoch": 4.08, "grad_norm": 0.007049256620209108, "learning_rate": 9.963474963201541e-07, "loss": 0.0001, "step": 200430 }, { "epoch": 4.080203562340967, "grad_norm": 0.008044443537796033, "learning_rate": 9.95921888790104e-07, "loss": 0.0002, "step": 200440 }, { "epoch": 4.080407124681934, "grad_norm": 0.0030643374456319887, "learning_rate": 9.954963621275686e-07, "loss": 0.0424, "step": 200450 }, { "epoch": 4.080610687022901, "grad_norm": 8.753679606514694, "learning_rate": 9.95070916341142e-07, "loss": 0.0196, "step": 200460 }, { "epoch": 4.0808142493638675, "grad_norm": 0.1081192619746984, "learning_rate": 9.946455514394165e-07, "loss": 0.0492, "step": 200470 }, { "epoch": 4.081017811704835, "grad_norm": 0.0027742797978514955, "learning_rate": 9.942202674309832e-07, "loss": 0.0643, "step": 200480 }, { "epoch": 4.081221374045802, "grad_norm": 25.177828029485575, "learning_rate": 9.937950643244304e-07, "loss": 0.0361, "step": 200490 }, { "epoch": 4.081424936386768, "grad_norm": 0.020845233759850847, "learning_rate": 9.933699421283466e-07, "loss": 0.0003, "step": 200500 }, { "epoch": 4.081628498727735, "grad_norm": 0.003086263609407673, "learning_rate": 9.929449008513175e-07, "loss": 0.0177, "step": 200510 }, { "epoch": 4.0818320610687024, "grad_norm": 0.003062093083213463, "learning_rate": 9.92519940501927e-07, "loss": 0.0001, "step": 200520 }, { "epoch": 4.082035623409669, "grad_norm": 0.03059522555648571, "learning_rate": 9.92095061088758e-07, "loss": 0.0097, "step": 200530 }, { "epoch": 4.082239185750636, "grad_norm": 0.005719393226414369, "learning_rate": 9.916702626203912e-07, "loss": 0.0143, "step": 200540 }, { "epoch": 4.082442748091603, "grad_norm": 0.025679026958945226, "learning_rate": 9.912455451054076e-07, "loss": 0.0004, "step": 200550 }, { "epoch": 4.08264631043257, "grad_norm": 0.14699208172439185, "learning_rate": 9.908209085523806e-07, "loss": 0.0007, "step": 200560 }, { "epoch": 4.0828498727735365, "grad_norm": 0.010074585869444874, "learning_rate": 9.903963529698906e-07, "loss": 0.0392, "step": 200570 }, { "epoch": 4.083053435114504, "grad_norm": 0.06169394586072137, "learning_rate": 9.89971878366512e-07, "loss": 0.0004, "step": 200580 }, { "epoch": 4.083256997455471, "grad_norm": 0.05978808379239321, "learning_rate": 9.895474847508135e-07, "loss": 0.0002, "step": 200590 }, { "epoch": 4.083460559796437, "grad_norm": 0.0023978567480168263, "learning_rate": 9.891231721313716e-07, "loss": 0.0008, "step": 200600 }, { "epoch": 4.083664122137405, "grad_norm": 0.009975702366284925, "learning_rate": 9.886989405167519e-07, "loss": 0.0174, "step": 200610 }, { "epoch": 4.0838676844783715, "grad_norm": 0.021147675365528806, "learning_rate": 9.88274789915522e-07, "loss": 0.0006, "step": 200620 }, { "epoch": 4.084071246819338, "grad_norm": 0.0014622564496065335, "learning_rate": 9.87850720336253e-07, "loss": 0.0291, "step": 200630 }, { "epoch": 4.084274809160306, "grad_norm": 0.008003844800699768, "learning_rate": 9.874267317875047e-07, "loss": 0.0001, "step": 200640 }, { "epoch": 4.084478371501272, "grad_norm": 0.0010182830842016643, "learning_rate": 9.8700282427784e-07, "loss": 0.0, "step": 200650 }, { "epoch": 4.084681933842239, "grad_norm": 21.17845904533098, "learning_rate": 9.865789978158252e-07, "loss": 0.0378, "step": 200660 }, { "epoch": 4.084885496183206, "grad_norm": 0.0002458345788793355, "learning_rate": 9.861552524100155e-07, "loss": 0.0004, "step": 200670 }, { "epoch": 4.085089058524173, "grad_norm": 0.010937193150728847, "learning_rate": 9.8573158806897e-07, "loss": 0.0005, "step": 200680 }, { "epoch": 4.08529262086514, "grad_norm": 0.005695413781864931, "learning_rate": 9.853080048012452e-07, "loss": 0.0001, "step": 200690 }, { "epoch": 4.085496183206107, "grad_norm": 0.013933582226721112, "learning_rate": 9.848845026153963e-07, "loss": 0.0002, "step": 200700 }, { "epoch": 4.085699745547074, "grad_norm": 0.0020072877217833444, "learning_rate": 9.844610815199768e-07, "loss": 0.0002, "step": 200710 }, { "epoch": 4.0859033078880405, "grad_norm": 0.0006887089738208511, "learning_rate": 9.84037741523537e-07, "loss": 0.0003, "step": 200720 }, { "epoch": 4.086106870229008, "grad_norm": 0.06062066384498549, "learning_rate": 9.836144826346278e-07, "loss": 0.0001, "step": 200730 }, { "epoch": 4.086310432569975, "grad_norm": 0.02097494873579416, "learning_rate": 9.831913048617975e-07, "loss": 0.0305, "step": 200740 }, { "epoch": 4.086513994910941, "grad_norm": 0.0020184801419305823, "learning_rate": 9.82768208213592e-07, "loss": 0.0587, "step": 200750 }, { "epoch": 4.086717557251909, "grad_norm": 0.016077893738212494, "learning_rate": 9.823451926985577e-07, "loss": 0.0414, "step": 200760 }, { "epoch": 4.086921119592875, "grad_norm": 0.011873867247547559, "learning_rate": 9.819222583252352e-07, "loss": 0.0151, "step": 200770 }, { "epoch": 4.087124681933842, "grad_norm": 0.02208013726712391, "learning_rate": 9.814994051021686e-07, "loss": 0.0001, "step": 200780 }, { "epoch": 4.08732824427481, "grad_norm": 0.004250720590225801, "learning_rate": 9.81076633037899e-07, "loss": 0.0364, "step": 200790 }, { "epoch": 4.087531806615776, "grad_norm": 0.04086582710116566, "learning_rate": 9.8065394214096e-07, "loss": 0.0192, "step": 200800 }, { "epoch": 4.087735368956743, "grad_norm": 0.00027430347065379664, "learning_rate": 9.802313324198947e-07, "loss": 0.0004, "step": 200810 }, { "epoch": 4.08793893129771, "grad_norm": 0.004477079829557699, "learning_rate": 9.798088038832332e-07, "loss": 0.0001, "step": 200820 }, { "epoch": 4.088142493638677, "grad_norm": 0.0015660189900311714, "learning_rate": 9.793863565395102e-07, "loss": 0.0002, "step": 200830 }, { "epoch": 4.088346055979644, "grad_norm": 0.01370298778011418, "learning_rate": 9.789639903972602e-07, "loss": 0.0678, "step": 200840 }, { "epoch": 4.08854961832061, "grad_norm": 0.006411696173759297, "learning_rate": 9.785417054650103e-07, "loss": 0.0264, "step": 200850 }, { "epoch": 4.088753180661578, "grad_norm": 0.00747233559662491, "learning_rate": 9.781195017512906e-07, "loss": 0.0002, "step": 200860 }, { "epoch": 4.088956743002544, "grad_norm": 0.3412970075585607, "learning_rate": 9.776973792646278e-07, "loss": 0.0009, "step": 200870 }, { "epoch": 4.089160305343511, "grad_norm": 0.0061078977170446825, "learning_rate": 9.772753380135469e-07, "loss": 0.0001, "step": 200880 }, { "epoch": 4.089363867684479, "grad_norm": 2.593437876023638, "learning_rate": 9.768533780065715e-07, "loss": 0.0121, "step": 200890 }, { "epoch": 4.089567430025445, "grad_norm": 0.0013266791585518493, "learning_rate": 9.764314992522245e-07, "loss": 0.0399, "step": 200900 }, { "epoch": 4.089770992366412, "grad_norm": 0.0041751469842921185, "learning_rate": 9.760097017590254e-07, "loss": 0.0273, "step": 200910 }, { "epoch": 4.089974554707379, "grad_norm": 0.06911154068400467, "learning_rate": 9.755879855354927e-07, "loss": 0.0302, "step": 200920 }, { "epoch": 4.090178117048346, "grad_norm": 0.009307394426931936, "learning_rate": 9.75166350590145e-07, "loss": 0.0498, "step": 200930 }, { "epoch": 4.090381679389313, "grad_norm": 0.007514463791786802, "learning_rate": 9.74744796931496e-07, "loss": 0.0007, "step": 200940 }, { "epoch": 4.09058524173028, "grad_norm": 0.0013142139403998468, "learning_rate": 9.743233245680606e-07, "loss": 0.0003, "step": 200950 }, { "epoch": 4.090788804071247, "grad_norm": 0.0012855462003791415, "learning_rate": 9.739019335083505e-07, "loss": 0.0575, "step": 200960 }, { "epoch": 4.090992366412213, "grad_norm": 0.020028980187768962, "learning_rate": 9.734806237608778e-07, "loss": 0.0007, "step": 200970 }, { "epoch": 4.091195928753181, "grad_norm": 5.0424049698160855, "learning_rate": 9.730593953341495e-07, "loss": 0.0064, "step": 200980 }, { "epoch": 4.091399491094148, "grad_norm": 0.005004926647486691, "learning_rate": 9.726382482366708e-07, "loss": 0.0481, "step": 200990 }, { "epoch": 4.091603053435114, "grad_norm": 0.00578378035821913, "learning_rate": 9.722171824769533e-07, "loss": 0.0001, "step": 201000 }, { "epoch": 4.091806615776082, "grad_norm": 0.002329170156698096, "learning_rate": 9.71796198063496e-07, "loss": 0.0394, "step": 201010 }, { "epoch": 4.092010178117048, "grad_norm": 0.006123203406199136, "learning_rate": 9.713752950048016e-07, "loss": 0.0166, "step": 201020 }, { "epoch": 4.092213740458015, "grad_norm": 0.08682249267926141, "learning_rate": 9.709544733093746e-07, "loss": 0.0002, "step": 201030 }, { "epoch": 4.0924173027989825, "grad_norm": 0.02792461029774242, "learning_rate": 9.7053373298571e-07, "loss": 0.0001, "step": 201040 }, { "epoch": 4.092620865139949, "grad_norm": 0.0025847249782615833, "learning_rate": 9.701130740423066e-07, "loss": 0.0615, "step": 201050 }, { "epoch": 4.092824427480916, "grad_norm": 0.0008511586036450693, "learning_rate": 9.696924964876596e-07, "loss": 0.0736, "step": 201060 }, { "epoch": 4.093027989821883, "grad_norm": 9.513382840117472, "learning_rate": 9.692720003302641e-07, "loss": 0.028, "step": 201070 }, { "epoch": 4.09323155216285, "grad_norm": 0.0027384921398139065, "learning_rate": 9.688515855786124e-07, "loss": 0.0233, "step": 201080 }, { "epoch": 4.093435114503817, "grad_norm": 0.004364452134423953, "learning_rate": 9.684312522411947e-07, "loss": 0.0001, "step": 201090 }, { "epoch": 4.093638676844784, "grad_norm": 0.020541769591287267, "learning_rate": 9.680110003265003e-07, "loss": 0.0001, "step": 201100 }, { "epoch": 4.093842239185751, "grad_norm": 0.0023076649116432566, "learning_rate": 9.67590829843017e-07, "loss": 0.0001, "step": 201110 }, { "epoch": 4.094045801526717, "grad_norm": 17.054562896341828, "learning_rate": 9.671707407992308e-07, "loss": 0.0027, "step": 201120 }, { "epoch": 4.094249363867685, "grad_norm": 0.004917574084147926, "learning_rate": 9.667507332036268e-07, "loss": 0.0485, "step": 201130 }, { "epoch": 4.0944529262086515, "grad_norm": 0.01945884381284771, "learning_rate": 9.663308070646843e-07, "loss": 0.0004, "step": 201140 }, { "epoch": 4.094656488549618, "grad_norm": 0.012162710956294887, "learning_rate": 9.659109623908874e-07, "loss": 0.0506, "step": 201150 }, { "epoch": 4.094860050890585, "grad_norm": 0.00283551455043124, "learning_rate": 9.654911991907163e-07, "loss": 0.0397, "step": 201160 }, { "epoch": 4.095063613231552, "grad_norm": 0.05175897124359617, "learning_rate": 9.650715174726433e-07, "loss": 0.0001, "step": 201170 }, { "epoch": 4.095267175572519, "grad_norm": 0.0017565587641896671, "learning_rate": 9.646519172451513e-07, "loss": 0.0005, "step": 201180 }, { "epoch": 4.095470737913486, "grad_norm": 0.005257262955923228, "learning_rate": 9.642323985167096e-07, "loss": 0.0041, "step": 201190 }, { "epoch": 4.095674300254453, "grad_norm": 0.016824129080603864, "learning_rate": 9.638129612957918e-07, "loss": 0.0053, "step": 201200 }, { "epoch": 4.09587786259542, "grad_norm": 0.00408416271539255, "learning_rate": 9.633936055908721e-07, "loss": 0.0048, "step": 201210 }, { "epoch": 4.096081424936386, "grad_norm": 0.005009048629808221, "learning_rate": 9.629743314104161e-07, "loss": 0.0003, "step": 201220 }, { "epoch": 4.096284987277354, "grad_norm": 0.004779903851203363, "learning_rate": 9.625551387628923e-07, "loss": 0.0001, "step": 201230 }, { "epoch": 4.0964885496183205, "grad_norm": 0.0097031872865586, "learning_rate": 9.621360276567703e-07, "loss": 0.0004, "step": 201240 }, { "epoch": 4.096692111959287, "grad_norm": 0.0028741703397865393, "learning_rate": 9.617169981005103e-07, "loss": 0.0003, "step": 201250 }, { "epoch": 4.096895674300255, "grad_norm": 0.04464320346491825, "learning_rate": 9.61298050102577e-07, "loss": 0.0001, "step": 201260 }, { "epoch": 4.097099236641221, "grad_norm": 0.08292922797166825, "learning_rate": 9.608791836714315e-07, "loss": 0.0002, "step": 201270 }, { "epoch": 4.097302798982188, "grad_norm": 6.822660392885098, "learning_rate": 9.604603988155331e-07, "loss": 0.0283, "step": 201280 }, { "epoch": 4.0975063613231555, "grad_norm": 0.021296285309606873, "learning_rate": 9.600416955433395e-07, "loss": 0.0005, "step": 201290 }, { "epoch": 4.097709923664122, "grad_norm": 0.01176271104211663, "learning_rate": 9.596230738633077e-07, "loss": 0.0052, "step": 201300 }, { "epoch": 4.097913486005089, "grad_norm": 0.004812925617708059, "learning_rate": 9.592045337838918e-07, "loss": 0.0002, "step": 201310 }, { "epoch": 4.098117048346056, "grad_norm": 0.01116953980740448, "learning_rate": 9.587860753135447e-07, "loss": 0.0001, "step": 201320 }, { "epoch": 4.098320610687023, "grad_norm": 0.012620744297106568, "learning_rate": 9.58367698460718e-07, "loss": 0.0001, "step": 201330 }, { "epoch": 4.0985241730279895, "grad_norm": 0.007933377567844607, "learning_rate": 9.579494032338628e-07, "loss": 0.0005, "step": 201340 }, { "epoch": 4.098727735368957, "grad_norm": 0.0017938628144792742, "learning_rate": 9.575311896414224e-07, "loss": 0.0002, "step": 201350 }, { "epoch": 4.098931297709924, "grad_norm": 0.015377253821652472, "learning_rate": 9.57113057691848e-07, "loss": 0.0259, "step": 201360 }, { "epoch": 4.09913486005089, "grad_norm": 0.08484253800502045, "learning_rate": 9.566950073935837e-07, "loss": 0.0069, "step": 201370 }, { "epoch": 4.099338422391858, "grad_norm": 0.0033665142936307683, "learning_rate": 9.56277038755069e-07, "loss": 0.0001, "step": 201380 }, { "epoch": 4.0995419847328245, "grad_norm": 37.83628603399605, "learning_rate": 9.558591517847487e-07, "loss": 0.0272, "step": 201390 }, { "epoch": 4.099745547073791, "grad_norm": 0.003540453264233558, "learning_rate": 9.554413464910634e-07, "loss": 0.0076, "step": 201400 }, { "epoch": 4.099949109414759, "grad_norm": 0.007918473270210254, "learning_rate": 9.550236228824477e-07, "loss": 0.0243, "step": 201410 }, { "epoch": 4.100152671755725, "grad_norm": 0.0017361811834429863, "learning_rate": 9.546059809673402e-07, "loss": 0.0008, "step": 201420 }, { "epoch": 4.100356234096692, "grad_norm": 0.0019274764261541458, "learning_rate": 9.54188420754175e-07, "loss": 0.0548, "step": 201430 }, { "epoch": 4.100559796437659, "grad_norm": 0.003756782678627308, "learning_rate": 9.537709422513864e-07, "loss": 0.0012, "step": 201440 }, { "epoch": 4.100763358778626, "grad_norm": 0.002230852270023517, "learning_rate": 9.533535454674042e-07, "loss": 0.0005, "step": 201450 }, { "epoch": 4.100966921119593, "grad_norm": 0.0013184786823475735, "learning_rate": 9.529362304106598e-07, "loss": 0.0, "step": 201460 }, { "epoch": 4.10117048346056, "grad_norm": 0.013317074921747276, "learning_rate": 9.525189970895804e-07, "loss": 0.1461, "step": 201470 }, { "epoch": 4.101374045801527, "grad_norm": 0.0012509094101049278, "learning_rate": 9.52101845512593e-07, "loss": 0.0003, "step": 201480 }, { "epoch": 4.1015776081424935, "grad_norm": 0.0039491071664488255, "learning_rate": 9.516847756881225e-07, "loss": 0.0001, "step": 201490 }, { "epoch": 4.10178117048346, "grad_norm": 0.03046889258264689, "learning_rate": 9.512677876245924e-07, "loss": 0.0101, "step": 201500 }, { "epoch": 4.101984732824428, "grad_norm": 0.00327106249673498, "learning_rate": 9.50850881330424e-07, "loss": 0.0001, "step": 201510 }, { "epoch": 4.102188295165394, "grad_norm": 0.0025885260680982665, "learning_rate": 9.504340568140374e-07, "loss": 0.0002, "step": 201520 }, { "epoch": 4.102391857506361, "grad_norm": 0.08493202476426868, "learning_rate": 9.500173140838521e-07, "loss": 0.0001, "step": 201530 }, { "epoch": 4.1025954198473285, "grad_norm": 10.07331803092232, "learning_rate": 9.496006531482804e-07, "loss": 0.0352, "step": 201540 }, { "epoch": 4.102798982188295, "grad_norm": 0.09338165580856513, "learning_rate": 9.491840740157432e-07, "loss": 0.0003, "step": 201550 }, { "epoch": 4.103002544529262, "grad_norm": 0.002944544774943434, "learning_rate": 9.487675766946503e-07, "loss": 0.0476, "step": 201560 }, { "epoch": 4.103206106870229, "grad_norm": 0.00396756870331654, "learning_rate": 9.483511611934121e-07, "loss": 0.0, "step": 201570 }, { "epoch": 4.103409669211196, "grad_norm": 5.248241303092184, "learning_rate": 9.479348275204436e-07, "loss": 0.0581, "step": 201580 }, { "epoch": 4.1036132315521625, "grad_norm": 0.001015695342989877, "learning_rate": 9.475185756841487e-07, "loss": 0.1035, "step": 201590 }, { "epoch": 4.10381679389313, "grad_norm": 0.0007980361833196186, "learning_rate": 9.471024056929351e-07, "loss": 0.0066, "step": 201600 }, { "epoch": 4.104020356234097, "grad_norm": 0.02411840026525515, "learning_rate": 9.466863175552105e-07, "loss": 0.0657, "step": 201610 }, { "epoch": 4.104223918575063, "grad_norm": 0.004347371632252406, "learning_rate": 9.462703112793753e-07, "loss": 0.0002, "step": 201620 }, { "epoch": 4.104427480916031, "grad_norm": 0.010047102541333897, "learning_rate": 9.458543868738324e-07, "loss": 0.0008, "step": 201630 }, { "epoch": 4.1046310432569975, "grad_norm": 0.011868235253102466, "learning_rate": 9.454385443469816e-07, "loss": 0.0001, "step": 201640 }, { "epoch": 4.104834605597964, "grad_norm": 0.0022994862624592326, "learning_rate": 9.450227837072218e-07, "loss": 0.0003, "step": 201650 }, { "epoch": 4.105038167938932, "grad_norm": 0.008876043234305776, "learning_rate": 9.446071049629496e-07, "loss": 0.0268, "step": 201660 }, { "epoch": 4.105241730279898, "grad_norm": 0.013430631438662315, "learning_rate": 9.4419150812256e-07, "loss": 0.0003, "step": 201670 }, { "epoch": 4.105445292620865, "grad_norm": 0.018601387805460265, "learning_rate": 9.437759931944468e-07, "loss": 0.0002, "step": 201680 }, { "epoch": 4.105648854961832, "grad_norm": 6.320974310529189, "learning_rate": 9.433605601870021e-07, "loss": 0.026, "step": 201690 }, { "epoch": 4.105852417302799, "grad_norm": 0.0011715703928041033, "learning_rate": 9.429452091086156e-07, "loss": 0.0126, "step": 201700 }, { "epoch": 4.106055979643766, "grad_norm": 0.05189832832797077, "learning_rate": 9.425299399676774e-07, "loss": 0.081, "step": 201710 }, { "epoch": 4.106259541984733, "grad_norm": 0.008563063207732955, "learning_rate": 9.421147527725705e-07, "loss": 0.0001, "step": 201720 }, { "epoch": 4.1064631043257, "grad_norm": 0.006876204026427966, "learning_rate": 9.416996475316842e-07, "loss": 0.0193, "step": 201730 }, { "epoch": 4.1066666666666665, "grad_norm": 0.0017542764299024788, "learning_rate": 9.412846242534012e-07, "loss": 0.0012, "step": 201740 }, { "epoch": 4.106870229007634, "grad_norm": 0.007878447646952579, "learning_rate": 9.408696829461012e-07, "loss": 0.0002, "step": 201750 }, { "epoch": 4.107073791348601, "grad_norm": 0.0013401668246584913, "learning_rate": 9.404548236181665e-07, "loss": 0.0001, "step": 201760 }, { "epoch": 4.107277353689567, "grad_norm": 0.0031119158172112885, "learning_rate": 9.400400462779768e-07, "loss": 0.0097, "step": 201770 }, { "epoch": 4.107480916030535, "grad_norm": 0.0015720824663961743, "learning_rate": 9.396253509339049e-07, "loss": 0.009, "step": 201780 }, { "epoch": 4.107684478371501, "grad_norm": 0.007437799344792249, "learning_rate": 9.392107375943316e-07, "loss": 0.0001, "step": 201790 }, { "epoch": 4.107888040712468, "grad_norm": 0.0037176993285655746, "learning_rate": 9.387962062676264e-07, "loss": 0.0381, "step": 201800 }, { "epoch": 4.108091603053435, "grad_norm": 0.0033598725702794776, "learning_rate": 9.383817569621611e-07, "loss": 0.0001, "step": 201810 }, { "epoch": 4.108295165394402, "grad_norm": 0.007783360141672083, "learning_rate": 9.379673896863101e-07, "loss": 0.0226, "step": 201820 }, { "epoch": 4.108498727735369, "grad_norm": 0.012041117033288843, "learning_rate": 9.375531044484387e-07, "loss": 0.0286, "step": 201830 }, { "epoch": 4.1087022900763355, "grad_norm": 0.0426149880425423, "learning_rate": 9.371389012569143e-07, "loss": 0.0561, "step": 201840 }, { "epoch": 4.108905852417303, "grad_norm": 2.144261504432993, "learning_rate": 9.367247801201029e-07, "loss": 0.0006, "step": 201850 }, { "epoch": 4.10910941475827, "grad_norm": 0.006193809068028702, "learning_rate": 9.363107410463679e-07, "loss": 0.0017, "step": 201860 }, { "epoch": 4.109312977099236, "grad_norm": 1.4077325540526815, "learning_rate": 9.358967840440713e-07, "loss": 0.0005, "step": 201870 }, { "epoch": 4.109516539440204, "grad_norm": 0.01653623548159771, "learning_rate": 9.354829091215739e-07, "loss": 0.0249, "step": 201880 }, { "epoch": 4.10972010178117, "grad_norm": 0.0033524605736581325, "learning_rate": 9.350691162872344e-07, "loss": 0.0006, "step": 201890 }, { "epoch": 4.109923664122137, "grad_norm": 0.000993002321779842, "learning_rate": 9.346554055494094e-07, "loss": 0.0028, "step": 201900 }, { "epoch": 4.110127226463105, "grad_norm": 0.09511869135434967, "learning_rate": 9.342417769164547e-07, "loss": 0.0614, "step": 201910 }, { "epoch": 4.110330788804071, "grad_norm": 0.0581805879987292, "learning_rate": 9.338282303967255e-07, "loss": 0.0342, "step": 201920 }, { "epoch": 4.110534351145038, "grad_norm": 0.016932277258422893, "learning_rate": 9.334147659985698e-07, "loss": 0.0002, "step": 201930 }, { "epoch": 4.110737913486005, "grad_norm": 0.004376617749226459, "learning_rate": 9.330013837303414e-07, "loss": 0.0001, "step": 201940 }, { "epoch": 4.110941475826972, "grad_norm": 0.0017422623697118432, "learning_rate": 9.325880836003898e-07, "loss": 0.0607, "step": 201950 }, { "epoch": 4.111145038167939, "grad_norm": 0.013929600697671286, "learning_rate": 9.321748656170598e-07, "loss": 0.1057, "step": 201960 }, { "epoch": 4.111348600508906, "grad_norm": 0.002158137757259917, "learning_rate": 9.317617297886961e-07, "loss": 0.0024, "step": 201970 }, { "epoch": 4.111552162849873, "grad_norm": 0.0024292312079813474, "learning_rate": 9.313486761236462e-07, "loss": 0.0001, "step": 201980 }, { "epoch": 4.111755725190839, "grad_norm": 14.938191503297432, "learning_rate": 9.309357046302498e-07, "loss": 0.0117, "step": 201990 }, { "epoch": 4.111959287531807, "grad_norm": 0.00489565178308713, "learning_rate": 9.305228153168472e-07, "loss": 0.0047, "step": 202000 }, { "epoch": 4.112162849872774, "grad_norm": 0.000333944894380281, "learning_rate": 9.30110008191778e-07, "loss": 0.0007, "step": 202010 }, { "epoch": 4.11236641221374, "grad_norm": 0.03509022278654888, "learning_rate": 9.296972832633788e-07, "loss": 0.0001, "step": 202020 }, { "epoch": 4.112569974554708, "grad_norm": 0.0015417140191184304, "learning_rate": 9.292846405399857e-07, "loss": 0.0002, "step": 202030 }, { "epoch": 4.112773536895674, "grad_norm": 0.007562656459747632, "learning_rate": 9.288720800299322e-07, "loss": 0.0004, "step": 202040 }, { "epoch": 4.112977099236641, "grad_norm": 14.26183502124413, "learning_rate": 9.28459601741551e-07, "loss": 0.0273, "step": 202050 }, { "epoch": 4.1131806615776085, "grad_norm": 0.02688517911490415, "learning_rate": 9.280472056831719e-07, "loss": 0.0342, "step": 202060 }, { "epoch": 4.113384223918575, "grad_norm": 0.0014235475394899792, "learning_rate": 9.276348918631245e-07, "loss": 0.0128, "step": 202070 }, { "epoch": 4.113587786259542, "grad_norm": 0.005588748152494321, "learning_rate": 9.272226602897366e-07, "loss": 0.0047, "step": 202080 }, { "epoch": 4.113791348600509, "grad_norm": 0.027166411456394087, "learning_rate": 9.268105109713299e-07, "loss": 0.0154, "step": 202090 }, { "epoch": 4.113994910941476, "grad_norm": 0.029839973572776766, "learning_rate": 9.263984439162332e-07, "loss": 0.0012, "step": 202100 }, { "epoch": 4.114198473282443, "grad_norm": 0.005103029692805636, "learning_rate": 9.259864591327672e-07, "loss": 0.0064, "step": 202110 }, { "epoch": 4.11440203562341, "grad_norm": 0.0008346667261477144, "learning_rate": 9.255745566292501e-07, "loss": 0.0341, "step": 202120 }, { "epoch": 4.114605597964377, "grad_norm": 7.53284400519236, "learning_rate": 9.251627364140036e-07, "loss": 0.0951, "step": 202130 }, { "epoch": 4.114809160305343, "grad_norm": 0.02678216590442548, "learning_rate": 9.247509984953451e-07, "loss": 0.0002, "step": 202140 }, { "epoch": 4.11501272264631, "grad_norm": 39.67081492858992, "learning_rate": 9.243393428815867e-07, "loss": 0.017, "step": 202150 }, { "epoch": 4.1152162849872775, "grad_norm": 0.023336995082989653, "learning_rate": 9.239277695810472e-07, "loss": 0.0005, "step": 202160 }, { "epoch": 4.115419847328244, "grad_norm": 1.851243206678702, "learning_rate": 9.235162786020352e-07, "loss": 0.0019, "step": 202170 }, { "epoch": 4.115623409669211, "grad_norm": 0.006362607054668226, "learning_rate": 9.231048699528605e-07, "loss": 0.009, "step": 202180 }, { "epoch": 4.115826972010178, "grad_norm": 0.0002537601633891729, "learning_rate": 9.226935436418372e-07, "loss": 0.0259, "step": 202190 }, { "epoch": 4.116030534351145, "grad_norm": 0.00724019448998205, "learning_rate": 9.222822996772679e-07, "loss": 0.0497, "step": 202200 }, { "epoch": 4.116234096692112, "grad_norm": 0.0015492608869662036, "learning_rate": 9.218711380674594e-07, "loss": 0.0291, "step": 202210 }, { "epoch": 4.116437659033079, "grad_norm": 0.011279386422910948, "learning_rate": 9.214600588207162e-07, "loss": 0.0002, "step": 202220 }, { "epoch": 4.116641221374046, "grad_norm": 0.007115073676184418, "learning_rate": 9.2104906194534e-07, "loss": 0.0166, "step": 202230 }, { "epoch": 4.116844783715012, "grad_norm": 0.004816568669291606, "learning_rate": 9.206381474496318e-07, "loss": 0.002, "step": 202240 }, { "epoch": 4.11704834605598, "grad_norm": 0.01338669720708826, "learning_rate": 9.202273153418905e-07, "loss": 0.0002, "step": 202250 }, { "epoch": 4.1172519083969465, "grad_norm": 0.0039377209736871106, "learning_rate": 9.198165656304131e-07, "loss": 0.0366, "step": 202260 }, { "epoch": 4.117455470737913, "grad_norm": 0.006090236275134841, "learning_rate": 9.19405898323495e-07, "loss": 0.0012, "step": 202270 }, { "epoch": 4.117659033078881, "grad_norm": 0.12152348672067684, "learning_rate": 9.18995313429431e-07, "loss": 0.0014, "step": 202280 }, { "epoch": 4.117862595419847, "grad_norm": 0.05918405958682576, "learning_rate": 9.185848109565143e-07, "loss": 0.0573, "step": 202290 }, { "epoch": 4.118066157760814, "grad_norm": 0.0018812717197918335, "learning_rate": 9.181743909130314e-07, "loss": 0.0226, "step": 202300 }, { "epoch": 4.1182697201017815, "grad_norm": 0.006214063469021414, "learning_rate": 9.177640533072751e-07, "loss": 0.0002, "step": 202310 }, { "epoch": 4.118473282442748, "grad_norm": 0.011656296104798524, "learning_rate": 9.173537981475333e-07, "loss": 0.0408, "step": 202320 }, { "epoch": 4.118676844783715, "grad_norm": 0.11142475398530845, "learning_rate": 9.169436254420872e-07, "loss": 0.0003, "step": 202330 }, { "epoch": 4.118880407124682, "grad_norm": 0.001281046406490078, "learning_rate": 9.165335351992249e-07, "loss": 0.0006, "step": 202340 }, { "epoch": 4.119083969465649, "grad_norm": 0.02516325390775402, "learning_rate": 9.161235274272284e-07, "loss": 0.0002, "step": 202350 }, { "epoch": 4.1192875318066156, "grad_norm": 0.005838113086308863, "learning_rate": 9.157136021343749e-07, "loss": 0.0005, "step": 202360 }, { "epoch": 4.119491094147583, "grad_norm": 0.004931830176501494, "learning_rate": 9.153037593289482e-07, "loss": 0.0002, "step": 202370 }, { "epoch": 4.11969465648855, "grad_norm": 0.0518155550841282, "learning_rate": 9.148939990192218e-07, "loss": 0.0553, "step": 202380 }, { "epoch": 4.119898218829516, "grad_norm": 0.0020077794662688104, "learning_rate": 9.144843212134729e-07, "loss": 0.0002, "step": 202390 }, { "epoch": 4.120101781170484, "grad_norm": 0.6500237415848352, "learning_rate": 9.14074725919975e-07, "loss": 0.0352, "step": 202400 }, { "epoch": 4.1203053435114505, "grad_norm": 0.008364558339836735, "learning_rate": 9.136652131470008e-07, "loss": 0.0013, "step": 202410 }, { "epoch": 4.120508905852417, "grad_norm": 0.0036954500288889237, "learning_rate": 9.13255782902821e-07, "loss": 0.0019, "step": 202420 }, { "epoch": 4.120712468193385, "grad_norm": 0.0376454903698205, "learning_rate": 9.12846435195704e-07, "loss": 0.0003, "step": 202430 }, { "epoch": 4.120916030534351, "grad_norm": 9.155006200493174, "learning_rate": 9.124371700339174e-07, "loss": 0.0197, "step": 202440 }, { "epoch": 4.121119592875318, "grad_norm": 0.1298588027242755, "learning_rate": 9.12027987425727e-07, "loss": 0.0008, "step": 202450 }, { "epoch": 4.121323155216285, "grad_norm": 0.0036850007660376854, "learning_rate": 9.116188873793968e-07, "loss": 0.0005, "step": 202460 }, { "epoch": 4.121526717557252, "grad_norm": 0.0010093553493910569, "learning_rate": 9.112098699031884e-07, "loss": 0.0002, "step": 202470 }, { "epoch": 4.121730279898219, "grad_norm": 0.008117180759518077, "learning_rate": 9.108009350053637e-07, "loss": 0.0374, "step": 202480 }, { "epoch": 4.121933842239185, "grad_norm": 9.183347891151609, "learning_rate": 9.103920826941808e-07, "loss": 0.0586, "step": 202490 }, { "epoch": 4.122137404580153, "grad_norm": 0.010623189129597267, "learning_rate": 9.099833129778968e-07, "loss": 0.0002, "step": 202500 }, { "epoch": 4.1223409669211195, "grad_norm": 0.03668806848206917, "learning_rate": 9.095746258647692e-07, "loss": 0.0343, "step": 202510 }, { "epoch": 4.122544529262086, "grad_norm": 0.002490087996029833, "learning_rate": 9.091660213630482e-07, "loss": 0.0001, "step": 202520 }, { "epoch": 4.122748091603054, "grad_norm": 9.501344013585229, "learning_rate": 9.087574994809906e-07, "loss": 0.0785, "step": 202530 }, { "epoch": 4.12295165394402, "grad_norm": 0.004868967587536522, "learning_rate": 9.083490602268435e-07, "loss": 0.0001, "step": 202540 }, { "epoch": 4.123155216284987, "grad_norm": 0.03076025919095977, "learning_rate": 9.079407036088561e-07, "loss": 0.0203, "step": 202550 }, { "epoch": 4.1233587786259545, "grad_norm": 0.005395128373149693, "learning_rate": 9.075324296352788e-07, "loss": 0.0002, "step": 202560 }, { "epoch": 4.123562340966921, "grad_norm": 0.0009420932380038506, "learning_rate": 9.071242383143542e-07, "loss": 0.0005, "step": 202570 }, { "epoch": 4.123765903307888, "grad_norm": 0.02066422746147984, "learning_rate": 9.067161296543275e-07, "loss": 0.0191, "step": 202580 }, { "epoch": 4.123969465648855, "grad_norm": 4.004168778105724, "learning_rate": 9.063081036634402e-07, "loss": 0.0068, "step": 202590 }, { "epoch": 4.124173027989822, "grad_norm": 0.004174596456004463, "learning_rate": 9.059001603499335e-07, "loss": 0.0285, "step": 202600 }, { "epoch": 4.1243765903307885, "grad_norm": 0.049292617113617127, "learning_rate": 9.054922997220461e-07, "loss": 0.0134, "step": 202610 }, { "epoch": 4.124580152671756, "grad_norm": 0.009634774269458957, "learning_rate": 9.050845217880155e-07, "loss": 0.0305, "step": 202620 }, { "epoch": 4.124783715012723, "grad_norm": 9.492004812055432, "learning_rate": 9.046768265560773e-07, "loss": 0.054, "step": 202630 }, { "epoch": 4.124987277353689, "grad_norm": 8.378952016313741, "learning_rate": 9.04269214034465e-07, "loss": 0.0372, "step": 202640 }, { "epoch": 4.125190839694657, "grad_norm": 0.01002694265838596, "learning_rate": 9.038616842314118e-07, "loss": 0.0186, "step": 202650 }, { "epoch": 4.1253944020356235, "grad_norm": 0.029068666312901822, "learning_rate": 9.034542371551486e-07, "loss": 0.0002, "step": 202660 }, { "epoch": 4.12559796437659, "grad_norm": 0.05356826014261387, "learning_rate": 9.030468728139008e-07, "loss": 0.001, "step": 202670 }, { "epoch": 4.125801526717558, "grad_norm": 0.012405696683976102, "learning_rate": 9.026395912158992e-07, "loss": 0.0002, "step": 202680 }, { "epoch": 4.126005089058524, "grad_norm": 0.0011853506005694632, "learning_rate": 9.022323923693699e-07, "loss": 0.0017, "step": 202690 }, { "epoch": 4.126208651399491, "grad_norm": 0.010543239116505135, "learning_rate": 9.01825276282533e-07, "loss": 0.0326, "step": 202700 }, { "epoch": 4.126412213740458, "grad_norm": 0.01597209124337753, "learning_rate": 9.014182429636143e-07, "loss": 0.0413, "step": 202710 }, { "epoch": 4.126615776081425, "grad_norm": 0.014312173963848988, "learning_rate": 9.010112924208342e-07, "loss": 0.0166, "step": 202720 }, { "epoch": 4.126819338422392, "grad_norm": 12.857203088102493, "learning_rate": 9.006044246624085e-07, "loss": 0.0369, "step": 202730 }, { "epoch": 4.127022900763359, "grad_norm": 0.014814078370908146, "learning_rate": 9.001976396965584e-07, "loss": 0.0008, "step": 202740 }, { "epoch": 4.127226463104326, "grad_norm": 21.08352418139692, "learning_rate": 8.997909375314961e-07, "loss": 0.017, "step": 202750 }, { "epoch": 4.1274300254452925, "grad_norm": 0.014764672512277527, "learning_rate": 8.993843181754358e-07, "loss": 0.0063, "step": 202760 }, { "epoch": 4.12763358778626, "grad_norm": 0.02169663315013311, "learning_rate": 8.989777816365935e-07, "loss": 0.0135, "step": 202770 }, { "epoch": 4.127837150127227, "grad_norm": 0.01080329133485582, "learning_rate": 8.985713279231756e-07, "loss": 0.001, "step": 202780 }, { "epoch": 4.128040712468193, "grad_norm": 0.0046602717614612306, "learning_rate": 8.98164957043392e-07, "loss": 0.0001, "step": 202790 }, { "epoch": 4.12824427480916, "grad_norm": 0.002582637573392361, "learning_rate": 8.977586690054508e-07, "loss": 0.0019, "step": 202800 }, { "epoch": 4.128447837150127, "grad_norm": 0.009333976773233343, "learning_rate": 8.973524638175562e-07, "loss": 0.0347, "step": 202810 }, { "epoch": 4.128651399491094, "grad_norm": 0.00876661080156538, "learning_rate": 8.969463414879131e-07, "loss": 0.0234, "step": 202820 }, { "epoch": 4.128854961832061, "grad_norm": 0.0058946266851205645, "learning_rate": 8.965403020247237e-07, "loss": 0.0292, "step": 202830 }, { "epoch": 4.129058524173028, "grad_norm": 0.009274565674725262, "learning_rate": 8.961343454361876e-07, "loss": 0.0007, "step": 202840 }, { "epoch": 4.129262086513995, "grad_norm": 0.0029021507919406806, "learning_rate": 8.957284717305043e-07, "loss": 0.0014, "step": 202850 }, { "epoch": 4.1294656488549615, "grad_norm": 0.0029411330218439787, "learning_rate": 8.95322680915871e-07, "loss": 0.0003, "step": 202860 }, { "epoch": 4.129669211195929, "grad_norm": 0.018764463549114638, "learning_rate": 8.949169730004825e-07, "loss": 0.0003, "step": 202870 }, { "epoch": 4.129872773536896, "grad_norm": 0.02077195599954332, "learning_rate": 8.945113479925332e-07, "loss": 0.0001, "step": 202880 }, { "epoch": 4.130076335877862, "grad_norm": 0.009280565187074808, "learning_rate": 8.941058059002155e-07, "loss": 0.0016, "step": 202890 }, { "epoch": 4.13027989821883, "grad_norm": 13.90695155065562, "learning_rate": 8.937003467317201e-07, "loss": 0.0543, "step": 202900 }, { "epoch": 4.130483460559796, "grad_norm": 0.029190484241734854, "learning_rate": 8.932949704952326e-07, "loss": 0.0218, "step": 202910 }, { "epoch": 4.130687022900763, "grad_norm": 0.0009286632607372207, "learning_rate": 8.928896771989443e-07, "loss": 0.0001, "step": 202920 }, { "epoch": 4.130890585241731, "grad_norm": 0.08237700963192786, "learning_rate": 8.924844668510396e-07, "loss": 0.0002, "step": 202930 }, { "epoch": 4.131094147582697, "grad_norm": 0.027792988398579115, "learning_rate": 8.920793394597005e-07, "loss": 0.0003, "step": 202940 }, { "epoch": 4.131297709923664, "grad_norm": 0.03709477642256133, "learning_rate": 8.916742950331103e-07, "loss": 0.0294, "step": 202950 }, { "epoch": 4.131501272264631, "grad_norm": 0.023015823647121934, "learning_rate": 8.912693335794492e-07, "loss": 0.0371, "step": 202960 }, { "epoch": 4.131704834605598, "grad_norm": 0.0002464804277382745, "learning_rate": 8.908644551068957e-07, "loss": 0.049, "step": 202970 }, { "epoch": 4.131908396946565, "grad_norm": 0.0009091241086996593, "learning_rate": 8.90459659623627e-07, "loss": 0.0461, "step": 202980 }, { "epoch": 4.132111959287532, "grad_norm": 0.031827364062855516, "learning_rate": 8.900549471378184e-07, "loss": 0.0001, "step": 202990 }, { "epoch": 4.132315521628499, "grad_norm": 0.0016561570242874797, "learning_rate": 8.896503176576438e-07, "loss": 0.0004, "step": 203000 }, { "epoch": 4.132519083969465, "grad_norm": 0.04920156277965108, "learning_rate": 8.892457711912755e-07, "loss": 0.0001, "step": 203010 }, { "epoch": 4.132722646310433, "grad_norm": 1.358881730713222, "learning_rate": 8.888413077468833e-07, "loss": 0.0011, "step": 203020 }, { "epoch": 4.1329262086514, "grad_norm": 0.006025473761220743, "learning_rate": 8.884369273326359e-07, "loss": 0.0002, "step": 203030 }, { "epoch": 4.133129770992366, "grad_norm": 0.0025281450503894386, "learning_rate": 8.880326299567005e-07, "loss": 0.0021, "step": 203040 }, { "epoch": 4.133333333333334, "grad_norm": 0.0023983363393189788, "learning_rate": 8.876284156272419e-07, "loss": 0.0001, "step": 203050 }, { "epoch": 4.1335368956743, "grad_norm": 0.01419052154637001, "learning_rate": 8.872242843524259e-07, "loss": 0.0005, "step": 203060 }, { "epoch": 4.133740458015267, "grad_norm": 0.017178889149907794, "learning_rate": 8.8682023614041e-07, "loss": 0.0, "step": 203070 }, { "epoch": 4.133944020356234, "grad_norm": 0.29823178725446375, "learning_rate": 8.864162709993584e-07, "loss": 0.0222, "step": 203080 }, { "epoch": 4.134147582697201, "grad_norm": 0.009311702320235817, "learning_rate": 8.8601238893743e-07, "loss": 0.0007, "step": 203090 }, { "epoch": 4.134351145038168, "grad_norm": 0.007465796031922954, "learning_rate": 8.85608589962777e-07, "loss": 0.0004, "step": 203100 }, { "epoch": 4.134554707379134, "grad_norm": 0.001106673886080299, "learning_rate": 8.852048740835612e-07, "loss": 0.0005, "step": 203110 }, { "epoch": 4.134758269720102, "grad_norm": 0.0012360493919828985, "learning_rate": 8.84801241307931e-07, "loss": 0.0, "step": 203120 }, { "epoch": 4.134961832061069, "grad_norm": 0.024241088450866505, "learning_rate": 8.843976916440389e-07, "loss": 0.0245, "step": 203130 }, { "epoch": 4.135165394402035, "grad_norm": 0.04735103066805666, "learning_rate": 8.839942251000389e-07, "loss": 0.0006, "step": 203140 }, { "epoch": 4.135368956743003, "grad_norm": 0.012976493020349386, "learning_rate": 8.835908416840755e-07, "loss": 0.0001, "step": 203150 }, { "epoch": 4.135572519083969, "grad_norm": 0.012767133079453713, "learning_rate": 8.831875414042956e-07, "loss": 0.0021, "step": 203160 }, { "epoch": 4.135776081424936, "grad_norm": 0.012413803258675403, "learning_rate": 8.827843242688483e-07, "loss": 0.0356, "step": 203170 }, { "epoch": 4.1359796437659035, "grad_norm": 0.053774775214837714, "learning_rate": 8.823811902858731e-07, "loss": 0.0016, "step": 203180 }, { "epoch": 4.13618320610687, "grad_norm": 0.00777812570729661, "learning_rate": 8.819781394635136e-07, "loss": 0.0267, "step": 203190 }, { "epoch": 4.136386768447837, "grad_norm": 10.799352159153221, "learning_rate": 8.815751718099092e-07, "loss": 0.0974, "step": 203200 }, { "epoch": 4.136590330788804, "grad_norm": 0.003837801029292194, "learning_rate": 8.81172287333199e-07, "loss": 0.0171, "step": 203210 }, { "epoch": 4.136793893129771, "grad_norm": 0.00010979118422397155, "learning_rate": 8.807694860415195e-07, "loss": 0.0001, "step": 203220 }, { "epoch": 4.136997455470738, "grad_norm": 0.006946742487953472, "learning_rate": 8.803667679430056e-07, "loss": 0.0615, "step": 203230 }, { "epoch": 4.137201017811705, "grad_norm": 0.014654328456754235, "learning_rate": 8.799641330457909e-07, "loss": 0.0003, "step": 203240 }, { "epoch": 4.137404580152672, "grad_norm": 0.03945635559881411, "learning_rate": 8.795615813580066e-07, "loss": 0.0412, "step": 203250 }, { "epoch": 4.137608142493638, "grad_norm": 0.0021781251977275582, "learning_rate": 8.791591128877841e-07, "loss": 0.0002, "step": 203260 }, { "epoch": 4.137811704834606, "grad_norm": 0.01456192739984043, "learning_rate": 8.787567276432519e-07, "loss": 0.0001, "step": 203270 }, { "epoch": 4.1380152671755726, "grad_norm": 0.018807247495233912, "learning_rate": 8.783544256325327e-07, "loss": 0.0001, "step": 203280 }, { "epoch": 4.138218829516539, "grad_norm": 0.014167055807952184, "learning_rate": 8.779522068637564e-07, "loss": 0.0126, "step": 203290 }, { "epoch": 4.138422391857507, "grad_norm": 0.03594443674887043, "learning_rate": 8.775500713450452e-07, "loss": 0.0004, "step": 203300 }, { "epoch": 4.138625954198473, "grad_norm": 0.002147823374227776, "learning_rate": 8.771480190845177e-07, "loss": 0.0003, "step": 203310 }, { "epoch": 4.13882951653944, "grad_norm": 0.1713967011139124, "learning_rate": 8.767460500902986e-07, "loss": 0.0066, "step": 203320 }, { "epoch": 4.1390330788804075, "grad_norm": 0.0030359988094783037, "learning_rate": 8.763441643705029e-07, "loss": 0.0001, "step": 203330 }, { "epoch": 4.139236641221374, "grad_norm": 0.0009844592951191665, "learning_rate": 8.759423619332464e-07, "loss": 0.0687, "step": 203340 }, { "epoch": 4.139440203562341, "grad_norm": 0.005455199666942515, "learning_rate": 8.75540642786648e-07, "loss": 0.0024, "step": 203350 }, { "epoch": 4.139643765903308, "grad_norm": 0.004641066429643718, "learning_rate": 8.751390069388177e-07, "loss": 0.0182, "step": 203360 }, { "epoch": 4.139847328244275, "grad_norm": 0.011986491879013572, "learning_rate": 8.747374543978677e-07, "loss": 0.0001, "step": 203370 }, { "epoch": 4.140050890585242, "grad_norm": 0.012192855349075927, "learning_rate": 8.74335985171908e-07, "loss": 0.0002, "step": 203380 }, { "epoch": 4.140254452926209, "grad_norm": 0.03683061848651575, "learning_rate": 8.739345992690474e-07, "loss": 0.0047, "step": 203390 }, { "epoch": 4.140458015267176, "grad_norm": 0.010247331740755929, "learning_rate": 8.735332966973914e-07, "loss": 0.0003, "step": 203400 }, { "epoch": 4.140661577608142, "grad_norm": 0.017789229715894975, "learning_rate": 8.731320774650454e-07, "loss": 0.0001, "step": 203410 }, { "epoch": 4.140865139949109, "grad_norm": 0.008963438591536536, "learning_rate": 8.727309415801128e-07, "loss": 0.0012, "step": 203420 }, { "epoch": 4.1410687022900765, "grad_norm": 0.02421018842418268, "learning_rate": 8.723298890506943e-07, "loss": 0.0002, "step": 203430 }, { "epoch": 4.141272264631043, "grad_norm": 0.21928589179195773, "learning_rate": 8.719289198848902e-07, "loss": 0.0005, "step": 203440 }, { "epoch": 4.14147582697201, "grad_norm": 0.0018132242707782958, "learning_rate": 8.715280340907983e-07, "loss": 0.0452, "step": 203450 }, { "epoch": 4.141679389312977, "grad_norm": 4.288224962838528, "learning_rate": 8.711272316765157e-07, "loss": 0.0387, "step": 203460 }, { "epoch": 4.141882951653944, "grad_norm": 0.01635721109201253, "learning_rate": 8.707265126501363e-07, "loss": 0.0065, "step": 203470 }, { "epoch": 4.142086513994911, "grad_norm": 0.03492490353693823, "learning_rate": 8.703258770197542e-07, "loss": 0.0003, "step": 203480 }, { "epoch": 4.142290076335878, "grad_norm": 0.0015522086811219987, "learning_rate": 8.699253247934597e-07, "loss": 0.0033, "step": 203490 }, { "epoch": 4.142493638676845, "grad_norm": 0.011394779447139142, "learning_rate": 8.695248559793407e-07, "loss": 0.0047, "step": 203500 }, { "epoch": 4.142697201017811, "grad_norm": 0.004692911074250038, "learning_rate": 8.691244705854901e-07, "loss": 0.0002, "step": 203510 }, { "epoch": 4.142900763358779, "grad_norm": 0.01277640231447539, "learning_rate": 8.687241686199905e-07, "loss": 0.0002, "step": 203520 }, { "epoch": 4.1431043256997455, "grad_norm": 0.011598169616253214, "learning_rate": 8.683239500909251e-07, "loss": 0.0023, "step": 203530 }, { "epoch": 4.143307888040712, "grad_norm": 0.011486083488317915, "learning_rate": 8.679238150063824e-07, "loss": 0.0008, "step": 203540 }, { "epoch": 4.14351145038168, "grad_norm": 0.009558809832350653, "learning_rate": 8.675237633744388e-07, "loss": 0.0222, "step": 203550 }, { "epoch": 4.143715012722646, "grad_norm": 0.024431640441442835, "learning_rate": 8.671237952031758e-07, "loss": 0.0002, "step": 203560 }, { "epoch": 4.143918575063613, "grad_norm": 0.01354039366373191, "learning_rate": 8.667239105006703e-07, "loss": 0.0349, "step": 203570 }, { "epoch": 4.1441221374045805, "grad_norm": 0.02850914582013467, "learning_rate": 8.663241092749991e-07, "loss": 0.0001, "step": 203580 }, { "epoch": 4.144325699745547, "grad_norm": 0.110218044594779, "learning_rate": 8.659243915342369e-07, "loss": 0.036, "step": 203590 }, { "epoch": 4.144529262086514, "grad_norm": 0.008171594537743482, "learning_rate": 8.655247572864561e-07, "loss": 0.0659, "step": 203600 }, { "epoch": 4.144732824427481, "grad_norm": 0.012711624113830757, "learning_rate": 8.651252065397298e-07, "loss": 0.0001, "step": 203610 }, { "epoch": 4.144936386768448, "grad_norm": 0.001677414480507753, "learning_rate": 8.647257393021224e-07, "loss": 0.0001, "step": 203620 }, { "epoch": 4.1451399491094145, "grad_norm": 0.0069731855226130455, "learning_rate": 8.643263555817067e-07, "loss": 0.0001, "step": 203630 }, { "epoch": 4.145343511450382, "grad_norm": 0.004887079636500465, "learning_rate": 8.639270553865475e-07, "loss": 0.0121, "step": 203640 }, { "epoch": 4.145547073791349, "grad_norm": 0.04269182891142627, "learning_rate": 8.635278387247065e-07, "loss": 0.0001, "step": 203650 }, { "epoch": 4.145750636132315, "grad_norm": 0.223692569114794, "learning_rate": 8.631287056042498e-07, "loss": 0.0453, "step": 203660 }, { "epoch": 4.145954198473283, "grad_norm": 0.004792286794891201, "learning_rate": 8.627296560332388e-07, "loss": 0.0001, "step": 203670 }, { "epoch": 4.1461577608142495, "grad_norm": 0.005574747381740047, "learning_rate": 8.623306900197282e-07, "loss": 0.0203, "step": 203680 }, { "epoch": 4.146361323155216, "grad_norm": 33.186670201249626, "learning_rate": 8.619318075717808e-07, "loss": 0.04, "step": 203690 }, { "epoch": 4.146564885496184, "grad_norm": 0.6139604510156331, "learning_rate": 8.615330086974494e-07, "loss": 0.0004, "step": 203700 }, { "epoch": 4.14676844783715, "grad_norm": 0.0070469773054967315, "learning_rate": 8.611342934047884e-07, "loss": 0.0142, "step": 203710 }, { "epoch": 4.146972010178117, "grad_norm": 0.281853177378139, "learning_rate": 8.60735661701853e-07, "loss": 0.0017, "step": 203720 }, { "epoch": 4.1471755725190835, "grad_norm": 0.014359251160719375, "learning_rate": 8.603371135966915e-07, "loss": 0.0047, "step": 203730 }, { "epoch": 4.147379134860051, "grad_norm": 0.041220040727941124, "learning_rate": 8.599386490973522e-07, "loss": 0.0003, "step": 203740 }, { "epoch": 4.147582697201018, "grad_norm": 0.0017606306631885763, "learning_rate": 8.595402682118869e-07, "loss": 0.06, "step": 203750 }, { "epoch": 4.147786259541984, "grad_norm": 3.1698137810268756, "learning_rate": 8.591419709483373e-07, "loss": 0.0057, "step": 203760 }, { "epoch": 4.147989821882952, "grad_norm": 0.005287295712451104, "learning_rate": 8.587437573147494e-07, "loss": 0.0365, "step": 203770 }, { "epoch": 4.1481933842239185, "grad_norm": 0.015372134495456954, "learning_rate": 8.583456273191648e-07, "loss": 0.0001, "step": 203780 }, { "epoch": 4.148396946564885, "grad_norm": 0.02411653679331307, "learning_rate": 8.579475809696248e-07, "loss": 0.0004, "step": 203790 }, { "epoch": 4.148600508905853, "grad_norm": 0.0008782039429364286, "learning_rate": 8.575496182741683e-07, "loss": 0.0092, "step": 203800 }, { "epoch": 4.148804071246819, "grad_norm": 0.00022355747158936496, "learning_rate": 8.571517392408329e-07, "loss": 0.0631, "step": 203810 }, { "epoch": 4.149007633587786, "grad_norm": 0.01041967248501498, "learning_rate": 8.567539438776539e-07, "loss": 0.0111, "step": 203820 }, { "epoch": 4.149211195928753, "grad_norm": 0.07444140525451402, "learning_rate": 8.563562321926655e-07, "loss": 0.0687, "step": 203830 }, { "epoch": 4.14941475826972, "grad_norm": 0.0014032253925517287, "learning_rate": 8.559586041938999e-07, "loss": 0.0027, "step": 203840 }, { "epoch": 4.149618320610687, "grad_norm": 0.07055476832164577, "learning_rate": 8.555610598893894e-07, "loss": 0.0002, "step": 203850 }, { "epoch": 4.149821882951654, "grad_norm": 0.0018508727650700051, "learning_rate": 8.551635992871582e-07, "loss": 0.0236, "step": 203860 }, { "epoch": 4.150025445292621, "grad_norm": 0.02027548180726128, "learning_rate": 8.547662223952385e-07, "loss": 0.0748, "step": 203870 }, { "epoch": 4.1502290076335875, "grad_norm": 0.002647588455337267, "learning_rate": 8.543689292216545e-07, "loss": 0.0372, "step": 203880 }, { "epoch": 4.150432569974555, "grad_norm": 0.005904594843665999, "learning_rate": 8.539717197744274e-07, "loss": 0.0001, "step": 203890 }, { "epoch": 4.150636132315522, "grad_norm": 0.0085332319921238, "learning_rate": 8.53574594061583e-07, "loss": 0.0455, "step": 203900 }, { "epoch": 4.150839694656488, "grad_norm": 0.010570113463949838, "learning_rate": 8.531775520911412e-07, "loss": 0.0002, "step": 203910 }, { "epoch": 4.151043256997456, "grad_norm": 0.005304525712164866, "learning_rate": 8.527805938711187e-07, "loss": 0.0327, "step": 203920 }, { "epoch": 4.151246819338422, "grad_norm": 0.01344831073717988, "learning_rate": 8.52383719409533e-07, "loss": 0.0001, "step": 203930 }, { "epoch": 4.151450381679389, "grad_norm": 0.020147134858420273, "learning_rate": 8.519869287144011e-07, "loss": 0.0659, "step": 203940 }, { "epoch": 4.151653944020357, "grad_norm": 0.011188936828834432, "learning_rate": 8.51590221793735e-07, "loss": 0.0002, "step": 203950 }, { "epoch": 4.151857506361323, "grad_norm": 0.034940257644973896, "learning_rate": 8.511935986555481e-07, "loss": 0.0326, "step": 203960 }, { "epoch": 4.15206106870229, "grad_norm": 0.030237356108421345, "learning_rate": 8.507970593078496e-07, "loss": 0.0454, "step": 203970 }, { "epoch": 4.152264631043257, "grad_norm": 0.017048236810495786, "learning_rate": 8.504006037586493e-07, "loss": 0.0058, "step": 203980 }, { "epoch": 4.152468193384224, "grad_norm": 0.0012757464483835162, "learning_rate": 8.500042320159529e-07, "loss": 0.0415, "step": 203990 }, { "epoch": 4.152671755725191, "grad_norm": 0.019647986646607794, "learning_rate": 8.496079440877664e-07, "loss": 0.0546, "step": 204000 }, { "epoch": 4.152875318066158, "grad_norm": 0.04347962227018421, "learning_rate": 8.492117399820926e-07, "loss": 0.0548, "step": 204010 }, { "epoch": 4.153078880407125, "grad_norm": 0.007183241410789011, "learning_rate": 8.488156197069347e-07, "loss": 0.0403, "step": 204020 }, { "epoch": 4.153282442748091, "grad_norm": 0.0036323740554968682, "learning_rate": 8.484195832702913e-07, "loss": 0.0002, "step": 204030 }, { "epoch": 4.153486005089059, "grad_norm": 0.07281271541505908, "learning_rate": 8.480236306801637e-07, "loss": 0.0607, "step": 204040 }, { "epoch": 4.153689567430026, "grad_norm": 0.11320556708493026, "learning_rate": 8.476277619445433e-07, "loss": 0.0269, "step": 204050 }, { "epoch": 4.153893129770992, "grad_norm": 0.01227869423486544, "learning_rate": 8.472319770714315e-07, "loss": 0.0024, "step": 204060 }, { "epoch": 4.154096692111959, "grad_norm": 0.006825348208167987, "learning_rate": 8.468362760688171e-07, "loss": 0.0002, "step": 204070 }, { "epoch": 4.154300254452926, "grad_norm": 0.021620553983592652, "learning_rate": 8.464406589446922e-07, "loss": 0.0001, "step": 204080 }, { "epoch": 4.154503816793893, "grad_norm": 0.004994029847691647, "learning_rate": 8.460451257070501e-07, "loss": 0.0043, "step": 204090 }, { "epoch": 4.15470737913486, "grad_norm": 0.00559118979014616, "learning_rate": 8.45649676363876e-07, "loss": 0.0003, "step": 204100 }, { "epoch": 4.154910941475827, "grad_norm": 11.580333780414625, "learning_rate": 8.452543109231565e-07, "loss": 0.0218, "step": 204110 }, { "epoch": 4.155114503816794, "grad_norm": 0.04966186411077857, "learning_rate": 8.448590293928794e-07, "loss": 0.0029, "step": 204120 }, { "epoch": 4.1553180661577604, "grad_norm": 0.00950375735961247, "learning_rate": 8.444638317810249e-07, "loss": 0.0267, "step": 204130 }, { "epoch": 4.155521628498728, "grad_norm": 0.01209072623529562, "learning_rate": 8.440687180955759e-07, "loss": 0.0508, "step": 204140 }, { "epoch": 4.155725190839695, "grad_norm": 0.004226714862662157, "learning_rate": 8.436736883445118e-07, "loss": 0.0001, "step": 204150 }, { "epoch": 4.155928753180661, "grad_norm": 0.0015254589728365808, "learning_rate": 8.432787425358108e-07, "loss": 0.0009, "step": 204160 }, { "epoch": 4.156132315521629, "grad_norm": 0.02299939080338552, "learning_rate": 8.428838806774492e-07, "loss": 0.0003, "step": 204170 }, { "epoch": 4.156335877862595, "grad_norm": 0.6889246555738078, "learning_rate": 8.424891027774023e-07, "loss": 0.0003, "step": 204180 }, { "epoch": 4.156539440203562, "grad_norm": 0.013296644347803678, "learning_rate": 8.420944088436428e-07, "loss": 0.0008, "step": 204190 }, { "epoch": 4.1567430025445296, "grad_norm": 0.019723748270694293, "learning_rate": 8.416997988841419e-07, "loss": 0.023, "step": 204200 }, { "epoch": 4.156946564885496, "grad_norm": 0.6335503713603955, "learning_rate": 8.413052729068693e-07, "loss": 0.0005, "step": 204210 }, { "epoch": 4.157150127226463, "grad_norm": 0.00646676500170084, "learning_rate": 8.409108309197944e-07, "loss": 0.0002, "step": 204220 }, { "epoch": 4.15735368956743, "grad_norm": 0.008291960747145173, "learning_rate": 8.405164729308801e-07, "loss": 0.0001, "step": 204230 }, { "epoch": 4.157557251908397, "grad_norm": 0.49964875261952607, "learning_rate": 8.401221989480935e-07, "loss": 0.0025, "step": 204240 }, { "epoch": 4.157760814249364, "grad_norm": 0.017162079409852628, "learning_rate": 8.397280089793985e-07, "loss": 0.0009, "step": 204250 }, { "epoch": 4.157964376590331, "grad_norm": 0.002176327201916901, "learning_rate": 8.393339030327524e-07, "loss": 0.0443, "step": 204260 }, { "epoch": 4.158167938931298, "grad_norm": 0.0007596649104471238, "learning_rate": 8.389398811161181e-07, "loss": 0.0002, "step": 204270 }, { "epoch": 4.158371501272264, "grad_norm": 0.07365054859033159, "learning_rate": 8.385459432374537e-07, "loss": 0.0006, "step": 204280 }, { "epoch": 4.158575063613232, "grad_norm": 0.007003677613716381, "learning_rate": 8.381520894047118e-07, "loss": 0.0004, "step": 204290 }, { "epoch": 4.158778625954199, "grad_norm": 0.022889945179592454, "learning_rate": 8.377583196258505e-07, "loss": 0.0001, "step": 204300 }, { "epoch": 4.158982188295165, "grad_norm": 0.006476059892608245, "learning_rate": 8.373646339088203e-07, "loss": 0.0004, "step": 204310 }, { "epoch": 4.159185750636133, "grad_norm": 0.30364809902397644, "learning_rate": 8.369710322615709e-07, "loss": 0.0179, "step": 204320 }, { "epoch": 4.159389312977099, "grad_norm": 0.0195129627796147, "learning_rate": 8.365775146920569e-07, "loss": 0.0003, "step": 204330 }, { "epoch": 4.159592875318066, "grad_norm": 0.006565273302022515, "learning_rate": 8.361840812082201e-07, "loss": 0.0001, "step": 204340 }, { "epoch": 4.1597964376590335, "grad_norm": 0.0024168812209029063, "learning_rate": 8.357907318180092e-07, "loss": 0.0162, "step": 204350 }, { "epoch": 4.16, "grad_norm": 0.04101878214762661, "learning_rate": 8.35397466529368e-07, "loss": 0.078, "step": 204360 }, { "epoch": 4.160203562340967, "grad_norm": 0.3739952206792846, "learning_rate": 8.350042853502388e-07, "loss": 0.0078, "step": 204370 }, { "epoch": 4.160407124681933, "grad_norm": 0.0010128683303548644, "learning_rate": 8.346111882885627e-07, "loss": 0.0037, "step": 204380 }, { "epoch": 4.160610687022901, "grad_norm": 0.10760300999389738, "learning_rate": 8.342181753522787e-07, "loss": 0.0001, "step": 204390 }, { "epoch": 4.160814249363868, "grad_norm": 0.002032802526914092, "learning_rate": 8.338252465493241e-07, "loss": 0.0043, "step": 204400 }, { "epoch": 4.161017811704834, "grad_norm": 0.01688304125199891, "learning_rate": 8.334324018876345e-07, "loss": 0.0001, "step": 204410 }, { "epoch": 4.161221374045802, "grad_norm": 0.02392683361981065, "learning_rate": 8.33039641375144e-07, "loss": 0.0001, "step": 204420 }, { "epoch": 4.161424936386768, "grad_norm": 0.0026607754922798577, "learning_rate": 8.32646965019786e-07, "loss": 0.0001, "step": 204430 }, { "epoch": 4.161628498727735, "grad_norm": 0.0017192244340941626, "learning_rate": 8.32254372829488e-07, "loss": 0.0001, "step": 204440 }, { "epoch": 4.1618320610687025, "grad_norm": 0.0025268668244715826, "learning_rate": 8.318618648121817e-07, "loss": 0.01, "step": 204450 }, { "epoch": 4.162035623409669, "grad_norm": 0.008768271654379793, "learning_rate": 8.314694409757951e-07, "loss": 0.0004, "step": 204460 }, { "epoch": 4.162239185750636, "grad_norm": 0.0157681444063683, "learning_rate": 8.310771013282509e-07, "loss": 0.0001, "step": 204470 }, { "epoch": 4.162442748091603, "grad_norm": 0.0011815328742948634, "learning_rate": 8.30684845877473e-07, "loss": 0.0002, "step": 204480 }, { "epoch": 4.16264631043257, "grad_norm": 0.053526680338962046, "learning_rate": 8.302926746313872e-07, "loss": 0.1753, "step": 204490 }, { "epoch": 4.162849872773537, "grad_norm": 0.009174253982126844, "learning_rate": 8.299005875979105e-07, "loss": 0.0003, "step": 204500 }, { "epoch": 4.163053435114504, "grad_norm": 0.008089617181158644, "learning_rate": 8.295085847849616e-07, "loss": 0.0318, "step": 204510 }, { "epoch": 4.163256997455471, "grad_norm": 0.002857687931672911, "learning_rate": 8.291166662004591e-07, "loss": 0.0032, "step": 204520 }, { "epoch": 4.163460559796437, "grad_norm": 0.03122406134979078, "learning_rate": 8.287248318523172e-07, "loss": 0.0001, "step": 204530 }, { "epoch": 4.163664122137405, "grad_norm": 9.81848509931369, "learning_rate": 8.283330817484503e-07, "loss": 0.0029, "step": 204540 }, { "epoch": 4.1638676844783715, "grad_norm": 0.02433131080019592, "learning_rate": 8.279414158967692e-07, "loss": 0.0001, "step": 204550 }, { "epoch": 4.164071246819338, "grad_norm": 0.005595785207490732, "learning_rate": 8.275498343051852e-07, "loss": 0.0001, "step": 204560 }, { "epoch": 4.164274809160306, "grad_norm": 0.19280103142321647, "learning_rate": 8.271583369816066e-07, "loss": 0.0005, "step": 204570 }, { "epoch": 4.164478371501272, "grad_norm": 0.04388070522499295, "learning_rate": 8.267669239339393e-07, "loss": 0.0001, "step": 204580 }, { "epoch": 4.164681933842239, "grad_norm": 0.0039038759245083373, "learning_rate": 8.263755951700908e-07, "loss": 0.0435, "step": 204590 }, { "epoch": 4.1648854961832065, "grad_norm": 5.264487009278315, "learning_rate": 8.259843506979603e-07, "loss": 0.0044, "step": 204600 }, { "epoch": 4.165089058524173, "grad_norm": 0.0028406864826299493, "learning_rate": 8.255931905254527e-07, "loss": 0.0341, "step": 204610 }, { "epoch": 4.16529262086514, "grad_norm": 0.0009591389389817707, "learning_rate": 8.252021146604689e-07, "loss": 0.0001, "step": 204620 }, { "epoch": 4.165496183206107, "grad_norm": 0.006207791521495549, "learning_rate": 8.248111231109029e-07, "loss": 0.0425, "step": 204630 }, { "epoch": 4.165699745547074, "grad_norm": 0.022792296604045387, "learning_rate": 8.244202158846554e-07, "loss": 0.0006, "step": 204640 }, { "epoch": 4.1659033078880405, "grad_norm": 0.003990089711506772, "learning_rate": 8.240293929896203e-07, "loss": 0.056, "step": 204650 }, { "epoch": 4.166106870229008, "grad_norm": 0.02098961693591372, "learning_rate": 8.236386544336883e-07, "loss": 0.0003, "step": 204660 }, { "epoch": 4.166310432569975, "grad_norm": 0.002725620997669782, "learning_rate": 8.232480002247551e-07, "loss": 0.0007, "step": 204670 }, { "epoch": 4.166513994910941, "grad_norm": 0.002578432858606661, "learning_rate": 8.228574303707071e-07, "loss": 0.0001, "step": 204680 }, { "epoch": 4.166717557251909, "grad_norm": 0.14061952160823588, "learning_rate": 8.22466944879432e-07, "loss": 0.0017, "step": 204690 }, { "epoch": 4.1669211195928755, "grad_norm": 0.04078824641809342, "learning_rate": 8.220765437588196e-07, "loss": 0.0599, "step": 204700 }, { "epoch": 4.167124681933842, "grad_norm": 0.008758035252645359, "learning_rate": 8.216862270167519e-07, "loss": 0.0288, "step": 204710 }, { "epoch": 4.167328244274809, "grad_norm": 0.0018377545899015027, "learning_rate": 8.212959946611121e-07, "loss": 0.0394, "step": 204720 }, { "epoch": 4.167531806615776, "grad_norm": 0.5449524222354822, "learning_rate": 8.209058466997822e-07, "loss": 0.0003, "step": 204730 }, { "epoch": 4.167735368956743, "grad_norm": 0.04068456422547548, "learning_rate": 8.205157831406412e-07, "loss": 0.0131, "step": 204740 }, { "epoch": 4.1679389312977095, "grad_norm": 0.054491539097373816, "learning_rate": 8.201258039915672e-07, "loss": 0.0002, "step": 204750 }, { "epoch": 4.168142493638677, "grad_norm": 0.0024360619344355235, "learning_rate": 8.197359092604356e-07, "loss": 0.0003, "step": 204760 }, { "epoch": 4.168346055979644, "grad_norm": 0.07612434227099307, "learning_rate": 8.193460989551222e-07, "loss": 0.0221, "step": 204770 }, { "epoch": 4.16854961832061, "grad_norm": 0.004541374606011817, "learning_rate": 8.189563730834988e-07, "loss": 0.0179, "step": 204780 }, { "epoch": 4.168753180661578, "grad_norm": 0.2780495880190752, "learning_rate": 8.185667316534363e-07, "loss": 0.0172, "step": 204790 }, { "epoch": 4.1689567430025445, "grad_norm": 0.018598182843612407, "learning_rate": 8.181771746728056e-07, "loss": 0.0063, "step": 204800 }, { "epoch": 4.169160305343511, "grad_norm": 0.035564732716480675, "learning_rate": 8.177877021494712e-07, "loss": 0.0013, "step": 204810 }, { "epoch": 4.169363867684479, "grad_norm": 0.13781188540386918, "learning_rate": 8.173983140913011e-07, "loss": 0.0003, "step": 204820 }, { "epoch": 4.169567430025445, "grad_norm": 0.0017531982673422994, "learning_rate": 8.170090105061612e-07, "loss": 0.0009, "step": 204830 }, { "epoch": 4.169770992366412, "grad_norm": 0.02677986374837205, "learning_rate": 8.166197914019091e-07, "loss": 0.0005, "step": 204840 }, { "epoch": 4.169974554707379, "grad_norm": 0.015227956865026529, "learning_rate": 8.162306567864098e-07, "loss": 0.0001, "step": 204850 }, { "epoch": 4.170178117048346, "grad_norm": 0.07720643615289642, "learning_rate": 8.158416066675218e-07, "loss": 0.0001, "step": 204860 }, { "epoch": 4.170381679389313, "grad_norm": 0.001232675405774629, "learning_rate": 8.154526410531e-07, "loss": 0.0425, "step": 204870 }, { "epoch": 4.17058524173028, "grad_norm": 9.165556580085722, "learning_rate": 8.150637599510037e-07, "loss": 0.1012, "step": 204880 }, { "epoch": 4.170788804071247, "grad_norm": 0.00048185235411054287, "learning_rate": 8.146749633690843e-07, "loss": 0.0007, "step": 204890 }, { "epoch": 4.1709923664122135, "grad_norm": 0.4225639682592904, "learning_rate": 8.142862513151939e-07, "loss": 0.0002, "step": 204900 }, { "epoch": 4.171195928753181, "grad_norm": 0.0008872601215109579, "learning_rate": 8.138976237971841e-07, "loss": 0.0051, "step": 204910 }, { "epoch": 4.171399491094148, "grad_norm": 0.0016794671637986325, "learning_rate": 8.135090808229034e-07, "loss": 0.0575, "step": 204920 }, { "epoch": 4.171603053435114, "grad_norm": 0.011383099879631597, "learning_rate": 8.131206224001992e-07, "loss": 0.0001, "step": 204930 }, { "epoch": 4.171806615776082, "grad_norm": 0.0036895775422565017, "learning_rate": 8.127322485369166e-07, "loss": 0.0001, "step": 204940 }, { "epoch": 4.172010178117048, "grad_norm": 0.01203659939260399, "learning_rate": 8.123439592408994e-07, "loss": 0.0001, "step": 204950 }, { "epoch": 4.172213740458015, "grad_norm": 0.004532353778588315, "learning_rate": 8.119557545199896e-07, "loss": 0.0001, "step": 204960 }, { "epoch": 4.172417302798983, "grad_norm": 0.0035031914950015386, "learning_rate": 8.115676343820277e-07, "loss": 0.0172, "step": 204970 }, { "epoch": 4.172620865139949, "grad_norm": 0.010618098209073562, "learning_rate": 8.111795988348519e-07, "loss": 0.0016, "step": 204980 }, { "epoch": 4.172824427480916, "grad_norm": 0.015756498567467074, "learning_rate": 8.107916478862993e-07, "loss": 0.001, "step": 204990 }, { "epoch": 4.1730279898218825, "grad_norm": 0.023437074390214946, "learning_rate": 8.104037815442051e-07, "loss": 0.0057, "step": 205000 }, { "epoch": 4.17323155216285, "grad_norm": 0.002167876856824546, "learning_rate": 8.100159998164037e-07, "loss": 0.0482, "step": 205010 }, { "epoch": 4.173435114503817, "grad_norm": 0.004027405771505474, "learning_rate": 8.096283027107249e-07, "loss": 0.0477, "step": 205020 }, { "epoch": 4.173638676844783, "grad_norm": 0.013731353449517635, "learning_rate": 8.092406902349981e-07, "loss": 0.0234, "step": 205030 }, { "epoch": 4.173842239185751, "grad_norm": 0.002310390782591831, "learning_rate": 8.088531623970558e-07, "loss": 0.0006, "step": 205040 }, { "epoch": 4.1740458015267174, "grad_norm": 0.009607918080437691, "learning_rate": 8.084657192047207e-07, "loss": 0.0138, "step": 205050 }, { "epoch": 4.174249363867684, "grad_norm": 0.008245747992393157, "learning_rate": 8.080783606658182e-07, "loss": 0.0317, "step": 205060 }, { "epoch": 4.174452926208652, "grad_norm": 0.0036444056347977615, "learning_rate": 8.076910867881743e-07, "loss": 0.0679, "step": 205070 }, { "epoch": 4.174656488549618, "grad_norm": 0.0033108607285313933, "learning_rate": 8.073038975796077e-07, "loss": 0.0357, "step": 205080 }, { "epoch": 4.174860050890585, "grad_norm": 0.0037574852499534027, "learning_rate": 8.069167930479393e-07, "loss": 0.0004, "step": 205090 }, { "epoch": 4.175063613231552, "grad_norm": 0.007933241943232494, "learning_rate": 8.065297732009863e-07, "loss": 0.0003, "step": 205100 }, { "epoch": 4.175267175572519, "grad_norm": 0.015614967047378353, "learning_rate": 8.061428380465658e-07, "loss": 0.0071, "step": 205110 }, { "epoch": 4.175470737913486, "grad_norm": 0.020496011031543096, "learning_rate": 8.057559875924925e-07, "loss": 0.0002, "step": 205120 }, { "epoch": 4.175674300254453, "grad_norm": 0.014869349572699772, "learning_rate": 8.053692218465791e-07, "loss": 0.0308, "step": 205130 }, { "epoch": 4.17587786259542, "grad_norm": 0.0030296018980112906, "learning_rate": 8.049825408166373e-07, "loss": 0.0198, "step": 205140 }, { "epoch": 4.1760814249363865, "grad_norm": 0.006914172826714893, "learning_rate": 8.045959445104762e-07, "loss": 0.0536, "step": 205150 }, { "epoch": 4.176284987277354, "grad_norm": 0.003956707770914822, "learning_rate": 8.042094329359029e-07, "loss": 0.0266, "step": 205160 }, { "epoch": 4.176488549618321, "grad_norm": 0.0783683132324505, "learning_rate": 8.038230061007263e-07, "loss": 0.0297, "step": 205170 }, { "epoch": 4.176692111959287, "grad_norm": 0.0066250676274579945, "learning_rate": 8.034366640127455e-07, "loss": 0.0004, "step": 205180 }, { "epoch": 4.176895674300255, "grad_norm": 0.00796791859405889, "learning_rate": 8.030504066797684e-07, "loss": 0.0001, "step": 205190 }, { "epoch": 4.177099236641221, "grad_norm": 4.881665859418621, "learning_rate": 8.026642341095948e-07, "loss": 0.0007, "step": 205200 }, { "epoch": 4.177302798982188, "grad_norm": 0.03383962624151889, "learning_rate": 8.022781463100205e-07, "loss": 0.0338, "step": 205210 }, { "epoch": 4.177506361323156, "grad_norm": 0.003156058177833414, "learning_rate": 8.018921432888471e-07, "loss": 0.0002, "step": 205220 }, { "epoch": 4.177709923664122, "grad_norm": 0.002770521574386691, "learning_rate": 8.015062250538702e-07, "loss": 0.0015, "step": 205230 }, { "epoch": 4.177913486005089, "grad_norm": 0.008909375818625493, "learning_rate": 8.011203916128801e-07, "loss": 0.0002, "step": 205240 }, { "epoch": 4.178117048346056, "grad_norm": 0.0051224264954806585, "learning_rate": 8.007346429736745e-07, "loss": 0.0002, "step": 205250 }, { "epoch": 4.178320610687023, "grad_norm": 0.011309986751523695, "learning_rate": 8.0034897914404e-07, "loss": 0.028, "step": 205260 }, { "epoch": 4.17852417302799, "grad_norm": 0.29924942697344437, "learning_rate": 7.999634001317658e-07, "loss": 0.0004, "step": 205270 }, { "epoch": 4.178727735368957, "grad_norm": 0.0011363408729301511, "learning_rate": 7.99577905944643e-07, "loss": 0.0192, "step": 205280 }, { "epoch": 4.178931297709924, "grad_norm": 0.002822901683214354, "learning_rate": 7.991924965904529e-07, "loss": 0.0281, "step": 205290 }, { "epoch": 4.17913486005089, "grad_norm": 0.0063212523511695, "learning_rate": 7.988071720769814e-07, "loss": 0.0002, "step": 205300 }, { "epoch": 4.179338422391858, "grad_norm": 0.010734719010942953, "learning_rate": 7.984219324120096e-07, "loss": 0.0324, "step": 205310 }, { "epoch": 4.179541984732825, "grad_norm": 0.0086643591758463, "learning_rate": 7.980367776033187e-07, "loss": 0.0321, "step": 205320 }, { "epoch": 4.179745547073791, "grad_norm": 0.0027799379835364057, "learning_rate": 7.976517076586876e-07, "loss": 0.0001, "step": 205330 }, { "epoch": 4.179949109414759, "grad_norm": 0.008711185867680115, "learning_rate": 7.972667225858921e-07, "loss": 0.0009, "step": 205340 }, { "epoch": 4.180152671755725, "grad_norm": 0.001779920098274618, "learning_rate": 7.968818223927088e-07, "loss": 0.0004, "step": 205350 }, { "epoch": 4.180356234096692, "grad_norm": 0.0018379717747630015, "learning_rate": 7.964970070869105e-07, "loss": 0.0, "step": 205360 }, { "epoch": 4.180559796437659, "grad_norm": 0.0031841159482176996, "learning_rate": 7.961122766762691e-07, "loss": 0.0001, "step": 205370 }, { "epoch": 4.180763358778626, "grad_norm": 0.005331025445704891, "learning_rate": 7.957276311685558e-07, "loss": 0.0966, "step": 205380 }, { "epoch": 4.180966921119593, "grad_norm": 0.031839661194231615, "learning_rate": 7.953430705715359e-07, "loss": 0.0271, "step": 205390 }, { "epoch": 4.181170483460559, "grad_norm": 0.002782484042079352, "learning_rate": 7.949585948929795e-07, "loss": 0.0003, "step": 205400 }, { "epoch": 4.181374045801527, "grad_norm": 0.007285205248776131, "learning_rate": 7.945742041406518e-07, "loss": 0.0605, "step": 205410 }, { "epoch": 4.181577608142494, "grad_norm": 0.002710091576472652, "learning_rate": 7.941898983223118e-07, "loss": 0.0264, "step": 205420 }, { "epoch": 4.18178117048346, "grad_norm": 0.19543999315392074, "learning_rate": 7.938056774457253e-07, "loss": 0.0007, "step": 205430 }, { "epoch": 4.181984732824428, "grad_norm": 0.002465797612234308, "learning_rate": 7.934215415186519e-07, "loss": 0.0097, "step": 205440 }, { "epoch": 4.182188295165394, "grad_norm": 0.015225798545482509, "learning_rate": 7.93037490548847e-07, "loss": 0.0001, "step": 205450 }, { "epoch": 4.182391857506361, "grad_norm": 0.7627161547901159, "learning_rate": 7.926535245440687e-07, "loss": 0.0003, "step": 205460 }, { "epoch": 4.1825954198473285, "grad_norm": 0.005942488638079672, "learning_rate": 7.922696435120713e-07, "loss": 0.0001, "step": 205470 }, { "epoch": 4.182798982188295, "grad_norm": 0.001471606111058229, "learning_rate": 7.918858474606084e-07, "loss": 0.0003, "step": 205480 }, { "epoch": 4.183002544529262, "grad_norm": 0.022046752171244136, "learning_rate": 7.915021363974302e-07, "loss": 0.0005, "step": 205490 }, { "epoch": 4.183206106870229, "grad_norm": 0.0005347798328626893, "learning_rate": 7.911185103302871e-07, "loss": 0.0069, "step": 205500 }, { "epoch": 4.183409669211196, "grad_norm": 0.009925985569265175, "learning_rate": 7.90734969266927e-07, "loss": 0.0002, "step": 205510 }, { "epoch": 4.183613231552163, "grad_norm": 0.00445100689869292, "learning_rate": 7.903515132150952e-07, "loss": 0.0245, "step": 205520 }, { "epoch": 4.18381679389313, "grad_norm": 0.23139291769001924, "learning_rate": 7.899681421825373e-07, "loss": 0.0572, "step": 205530 }, { "epoch": 4.184020356234097, "grad_norm": 0.002083764767834346, "learning_rate": 7.895848561769948e-07, "loss": 0.0004, "step": 205540 }, { "epoch": 4.184223918575063, "grad_norm": 0.14169213612104853, "learning_rate": 7.892016552062088e-07, "loss": 0.0708, "step": 205550 }, { "epoch": 4.184427480916031, "grad_norm": 0.00390750057371372, "learning_rate": 7.888185392779191e-07, "loss": 0.002, "step": 205560 }, { "epoch": 4.1846310432569975, "grad_norm": 0.008317207685921167, "learning_rate": 7.88435508399864e-07, "loss": 0.0002, "step": 205570 }, { "epoch": 4.184834605597964, "grad_norm": 0.3029246631674694, "learning_rate": 7.880525625797764e-07, "loss": 0.0287, "step": 205580 }, { "epoch": 4.185038167938932, "grad_norm": 9.941156319854416, "learning_rate": 7.876697018253931e-07, "loss": 0.0306, "step": 205590 }, { "epoch": 4.185241730279898, "grad_norm": 0.013932507726066362, "learning_rate": 7.872869261444466e-07, "loss": 0.0001, "step": 205600 }, { "epoch": 4.185445292620865, "grad_norm": 0.02478690540500182, "learning_rate": 7.869042355446644e-07, "loss": 0.0001, "step": 205610 }, { "epoch": 4.1856488549618325, "grad_norm": 0.008237256678803997, "learning_rate": 7.865216300337802e-07, "loss": 0.0002, "step": 205620 }, { "epoch": 4.185852417302799, "grad_norm": 0.001983963184981328, "learning_rate": 7.86139109619517e-07, "loss": 0.0002, "step": 205630 }, { "epoch": 4.186055979643766, "grad_norm": 0.004265256893503936, "learning_rate": 7.857566743096007e-07, "loss": 0.0127, "step": 205640 }, { "epoch": 4.186259541984732, "grad_norm": 0.0010302128665994744, "learning_rate": 7.85374324111759e-07, "loss": 0.0001, "step": 205650 }, { "epoch": 4.1864631043257, "grad_norm": 0.1043739490259416, "learning_rate": 7.849920590337096e-07, "loss": 0.0137, "step": 205660 }, { "epoch": 4.1866666666666665, "grad_norm": 5.163336088057264, "learning_rate": 7.846098790831736e-07, "loss": 0.034, "step": 205670 }, { "epoch": 4.186870229007633, "grad_norm": 0.0071287244752830424, "learning_rate": 7.84227784267873e-07, "loss": 0.0003, "step": 205680 }, { "epoch": 4.187073791348601, "grad_norm": 0.015431682626537178, "learning_rate": 7.838457745955209e-07, "loss": 0.0003, "step": 205690 }, { "epoch": 4.187277353689567, "grad_norm": 8.211507873838373, "learning_rate": 7.834638500738334e-07, "loss": 0.0412, "step": 205700 }, { "epoch": 4.187480916030534, "grad_norm": 0.005401706648611972, "learning_rate": 7.830820107105247e-07, "loss": 0.0004, "step": 205710 }, { "epoch": 4.1876844783715015, "grad_norm": 0.0008938565833880823, "learning_rate": 7.827002565133057e-07, "loss": 0.0002, "step": 205720 }, { "epoch": 4.187888040712468, "grad_norm": 0.056678179863220844, "learning_rate": 7.82318587489887e-07, "loss": 0.0361, "step": 205730 }, { "epoch": 4.188091603053435, "grad_norm": 8.807261486013864, "learning_rate": 7.819370036479768e-07, "loss": 0.0691, "step": 205740 }, { "epoch": 4.188295165394402, "grad_norm": 0.001295024367166765, "learning_rate": 7.815555049952833e-07, "loss": 0.0249, "step": 205750 }, { "epoch": 4.188498727735369, "grad_norm": 0.008227661557031758, "learning_rate": 7.811740915395066e-07, "loss": 0.0206, "step": 205760 }, { "epoch": 4.1887022900763355, "grad_norm": 0.003261730903607689, "learning_rate": 7.807927632883544e-07, "loss": 0.0001, "step": 205770 }, { "epoch": 4.188905852417303, "grad_norm": 0.003567819750026154, "learning_rate": 7.804115202495277e-07, "loss": 0.0001, "step": 205780 }, { "epoch": 4.18910941475827, "grad_norm": 0.003579625161820955, "learning_rate": 7.800303624307232e-07, "loss": 0.0005, "step": 205790 }, { "epoch": 4.189312977099236, "grad_norm": 0.003029033465834647, "learning_rate": 7.796492898396418e-07, "loss": 0.0311, "step": 205800 }, { "epoch": 4.189516539440204, "grad_norm": 0.025392129972470073, "learning_rate": 7.792683024839798e-07, "loss": 0.0003, "step": 205810 }, { "epoch": 4.1897201017811705, "grad_norm": 0.010873917271567002, "learning_rate": 7.788874003714286e-07, "loss": 0.0374, "step": 205820 }, { "epoch": 4.189923664122137, "grad_norm": 0.0016320326133925586, "learning_rate": 7.78506583509685e-07, "loss": 0.0009, "step": 205830 }, { "epoch": 4.190127226463105, "grad_norm": 1.1389646380065077, "learning_rate": 7.781258519064371e-07, "loss": 0.0294, "step": 205840 }, { "epoch": 4.190330788804071, "grad_norm": 0.039759166413332124, "learning_rate": 7.777452055693747e-07, "loss": 0.0001, "step": 205850 }, { "epoch": 4.190534351145038, "grad_norm": 0.007237815864541032, "learning_rate": 7.773646445061878e-07, "loss": 0.0178, "step": 205860 }, { "epoch": 4.190737913486005, "grad_norm": 0.16807642037715761, "learning_rate": 7.769841687245599e-07, "loss": 0.0002, "step": 205870 }, { "epoch": 4.190941475826972, "grad_norm": 0.010081011050600507, "learning_rate": 7.766037782321756e-07, "loss": 0.1231, "step": 205880 }, { "epoch": 4.191145038167939, "grad_norm": 0.21012960684361656, "learning_rate": 7.762234730367179e-07, "loss": 0.1258, "step": 205890 }, { "epoch": 4.191348600508906, "grad_norm": 0.025063899009892577, "learning_rate": 7.758432531458671e-07, "loss": 0.0586, "step": 205900 }, { "epoch": 4.191552162849873, "grad_norm": 0.022812279827291275, "learning_rate": 7.754631185673028e-07, "loss": 0.0002, "step": 205910 }, { "epoch": 4.1917557251908395, "grad_norm": 0.011836739004684833, "learning_rate": 7.750830693087019e-07, "loss": 0.0002, "step": 205920 }, { "epoch": 4.191959287531807, "grad_norm": 0.0005050895236108966, "learning_rate": 7.747031053777399e-07, "loss": 0.043, "step": 205930 }, { "epoch": 4.192162849872774, "grad_norm": 0.021517650166918947, "learning_rate": 7.743232267820911e-07, "loss": 0.0003, "step": 205940 }, { "epoch": 4.19236641221374, "grad_norm": 0.10629446044053831, "learning_rate": 7.739434335294272e-07, "loss": 0.0002, "step": 205950 }, { "epoch": 4.192569974554708, "grad_norm": 0.030211630605195643, "learning_rate": 7.735637256274187e-07, "loss": 0.0003, "step": 205960 }, { "epoch": 4.1927735368956744, "grad_norm": 8.141627385448583, "learning_rate": 7.731841030837345e-07, "loss": 0.032, "step": 205970 }, { "epoch": 4.192977099236641, "grad_norm": 1.5109751775144014, "learning_rate": 7.728045659060413e-07, "loss": 0.0021, "step": 205980 }, { "epoch": 4.193180661577608, "grad_norm": 0.015346829025269654, "learning_rate": 7.724251141020061e-07, "loss": 0.0009, "step": 205990 }, { "epoch": 4.193384223918575, "grad_norm": 13.431584401400864, "learning_rate": 7.720457476792892e-07, "loss": 0.0536, "step": 206000 }, { "epoch": 4.193587786259542, "grad_norm": 0.030065361344957103, "learning_rate": 7.716664666455526e-07, "loss": 0.0002, "step": 206010 }, { "epoch": 4.1937913486005085, "grad_norm": 0.003693435589519572, "learning_rate": 7.712872710084607e-07, "loss": 0.0528, "step": 206020 }, { "epoch": 4.193994910941476, "grad_norm": 0.05014147033671946, "learning_rate": 7.709081607756674e-07, "loss": 0.0002, "step": 206030 }, { "epoch": 4.194198473282443, "grad_norm": 0.002267246056404043, "learning_rate": 7.705291359548295e-07, "loss": 0.0051, "step": 206040 }, { "epoch": 4.194402035623409, "grad_norm": 0.017902830559730312, "learning_rate": 7.701501965536057e-07, "loss": 0.0004, "step": 206050 }, { "epoch": 4.194605597964377, "grad_norm": 0.005891910284533563, "learning_rate": 7.697713425796454e-07, "loss": 0.0486, "step": 206060 }, { "epoch": 4.1948091603053435, "grad_norm": 0.04608633961037842, "learning_rate": 7.693925740406016e-07, "loss": 0.0005, "step": 206070 }, { "epoch": 4.19501272264631, "grad_norm": 0.020078164386974903, "learning_rate": 7.690138909441236e-07, "loss": 0.0358, "step": 206080 }, { "epoch": 4.195216284987278, "grad_norm": 0.007229493760212623, "learning_rate": 7.686352932978597e-07, "loss": 0.0054, "step": 206090 }, { "epoch": 4.195419847328244, "grad_norm": 0.005783425476268601, "learning_rate": 7.68256781109456e-07, "loss": 0.0347, "step": 206100 }, { "epoch": 4.195623409669211, "grad_norm": 0.016459787147027942, "learning_rate": 7.67878354386557e-07, "loss": 0.0001, "step": 206110 }, { "epoch": 4.195826972010178, "grad_norm": 0.0167047661003555, "learning_rate": 7.675000131368065e-07, "loss": 0.0002, "step": 206120 }, { "epoch": 4.196030534351145, "grad_norm": 0.009194855431515889, "learning_rate": 7.671217573678425e-07, "loss": 0.0186, "step": 206130 }, { "epoch": 4.196234096692112, "grad_norm": 0.007045669793432538, "learning_rate": 7.667435870873074e-07, "loss": 0.0004, "step": 206140 }, { "epoch": 4.196437659033079, "grad_norm": 14.681171579966751, "learning_rate": 7.663655023028394e-07, "loss": 0.0618, "step": 206150 }, { "epoch": 4.196641221374046, "grad_norm": 0.006346120084620579, "learning_rate": 7.65987503022071e-07, "loss": 0.0031, "step": 206160 }, { "epoch": 4.1968447837150125, "grad_norm": 0.03812744130275886, "learning_rate": 7.656095892526389e-07, "loss": 0.0004, "step": 206170 }, { "epoch": 4.19704834605598, "grad_norm": 0.004795694007018419, "learning_rate": 7.652317610021765e-07, "loss": 0.0184, "step": 206180 }, { "epoch": 4.197251908396947, "grad_norm": 0.01181619187925376, "learning_rate": 7.648540182783104e-07, "loss": 0.0001, "step": 206190 }, { "epoch": 4.197455470737913, "grad_norm": 17.44779476787217, "learning_rate": 7.644763610886747e-07, "loss": 0.0264, "step": 206200 }, { "epoch": 4.197659033078881, "grad_norm": 0.006490468423742354, "learning_rate": 7.64098789440893e-07, "loss": 0.0158, "step": 206210 }, { "epoch": 4.197862595419847, "grad_norm": 23.44024982576682, "learning_rate": 7.637213033425911e-07, "loss": 0.0336, "step": 206220 }, { "epoch": 4.198066157760814, "grad_norm": 0.013019478072094625, "learning_rate": 7.633439028013962e-07, "loss": 0.0104, "step": 206230 }, { "epoch": 4.198269720101782, "grad_norm": 0.03439398768049525, "learning_rate": 7.629665878249265e-07, "loss": 0.0001, "step": 206240 }, { "epoch": 4.198473282442748, "grad_norm": 0.021903853889934665, "learning_rate": 7.625893584208027e-07, "loss": 0.0003, "step": 206250 }, { "epoch": 4.198676844783715, "grad_norm": 0.08214740139868763, "learning_rate": 7.622122145966465e-07, "loss": 0.0002, "step": 206260 }, { "epoch": 4.198880407124682, "grad_norm": 0.008844330539683726, "learning_rate": 7.618351563600723e-07, "loss": 0.0004, "step": 206270 }, { "epoch": 4.199083969465649, "grad_norm": 0.00610541056053606, "learning_rate": 7.614581837186958e-07, "loss": 0.0203, "step": 206280 }, { "epoch": 4.199287531806616, "grad_norm": 0.010710766245114326, "learning_rate": 7.610812966801301e-07, "loss": 0.0016, "step": 206290 }, { "epoch": 4.199491094147582, "grad_norm": 0.014371520003630437, "learning_rate": 7.607044952519876e-07, "loss": 0.0001, "step": 206300 }, { "epoch": 4.19969465648855, "grad_norm": 0.020045975071334855, "learning_rate": 7.603277794418778e-07, "loss": 0.0001, "step": 206310 }, { "epoch": 4.199898218829516, "grad_norm": 0.030573949433411244, "learning_rate": 7.599511492574091e-07, "loss": 0.0001, "step": 206320 }, { "epoch": 4.200101781170483, "grad_norm": 0.03847995632925404, "learning_rate": 7.595746047061881e-07, "loss": 0.0004, "step": 206330 }, { "epoch": 4.200305343511451, "grad_norm": 0.001010435617165837, "learning_rate": 7.591981457958198e-07, "loss": 0.0312, "step": 206340 }, { "epoch": 4.200508905852417, "grad_norm": 0.09193968807881057, "learning_rate": 7.58821772533907e-07, "loss": 0.0322, "step": 206350 }, { "epoch": 4.200712468193384, "grad_norm": 0.0026704484139010343, "learning_rate": 7.58445484928052e-07, "loss": 0.0533, "step": 206360 }, { "epoch": 4.200916030534351, "grad_norm": 0.007225867828168785, "learning_rate": 7.580692829858515e-07, "loss": 0.0002, "step": 206370 }, { "epoch": 4.201119592875318, "grad_norm": 0.015022040317596597, "learning_rate": 7.576931667149068e-07, "loss": 0.0085, "step": 206380 }, { "epoch": 4.201323155216285, "grad_norm": 0.0027981606193722285, "learning_rate": 7.573171361228133e-07, "loss": 0.0001, "step": 206390 }, { "epoch": 4.201526717557252, "grad_norm": 0.007952980289910405, "learning_rate": 7.569411912171626e-07, "loss": 0.0003, "step": 206400 }, { "epoch": 4.201730279898219, "grad_norm": 0.010658112054396283, "learning_rate": 7.565653320055505e-07, "loss": 0.0377, "step": 206410 }, { "epoch": 4.201933842239185, "grad_norm": 0.004447294846467029, "learning_rate": 7.561895584955687e-07, "loss": 0.0223, "step": 206420 }, { "epoch": 4.202137404580153, "grad_norm": 0.014401331273682878, "learning_rate": 7.558138706948032e-07, "loss": 0.001, "step": 206430 }, { "epoch": 4.20234096692112, "grad_norm": 0.04012204663788722, "learning_rate": 7.55438268610843e-07, "loss": 0.0002, "step": 206440 }, { "epoch": 4.202544529262086, "grad_norm": 6.0056741399872315, "learning_rate": 7.550627522512743e-07, "loss": 0.0015, "step": 206450 }, { "epoch": 4.202748091603054, "grad_norm": 0.001695386885626121, "learning_rate": 7.546873216236805e-07, "loss": 0.0001, "step": 206460 }, { "epoch": 4.20295165394402, "grad_norm": 0.015962053084174112, "learning_rate": 7.543119767356444e-07, "loss": 0.0463, "step": 206470 }, { "epoch": 4.203155216284987, "grad_norm": 0.005196810391166216, "learning_rate": 7.53936717594746e-07, "loss": 0.0401, "step": 206480 }, { "epoch": 4.2033587786259545, "grad_norm": 0.005815029100150689, "learning_rate": 7.535615442085647e-07, "loss": 0.02, "step": 206490 }, { "epoch": 4.203562340966921, "grad_norm": 0.0009222234297837557, "learning_rate": 7.531864565846769e-07, "loss": 0.0189, "step": 206500 }, { "epoch": 4.203765903307888, "grad_norm": 0.0022998381263332045, "learning_rate": 7.528114547306592e-07, "loss": 0.0279, "step": 206510 }, { "epoch": 4.203969465648855, "grad_norm": 0.004287798416311409, "learning_rate": 7.524365386540844e-07, "loss": 0.0001, "step": 206520 }, { "epoch": 4.204173027989822, "grad_norm": 0.008436245554570536, "learning_rate": 7.520617083625243e-07, "loss": 0.0004, "step": 206530 }, { "epoch": 4.204376590330789, "grad_norm": 0.003657734208268817, "learning_rate": 7.516869638635488e-07, "loss": 0.0538, "step": 206540 }, { "epoch": 4.204580152671756, "grad_norm": 7.527698453525667, "learning_rate": 7.513123051647286e-07, "loss": 0.0404, "step": 206550 }, { "epoch": 4.204783715012723, "grad_norm": 0.09366989273690182, "learning_rate": 7.509377322736266e-07, "loss": 0.0452, "step": 206560 }, { "epoch": 4.204987277353689, "grad_norm": 0.004273575995183342, "learning_rate": 7.505632451978118e-07, "loss": 0.0007, "step": 206570 }, { "epoch": 4.205190839694657, "grad_norm": 0.009752662202429336, "learning_rate": 7.501888439448441e-07, "loss": 0.0006, "step": 206580 }, { "epoch": 4.2053944020356235, "grad_norm": 0.0006578333005222269, "learning_rate": 7.498145285222858e-07, "loss": 0.0003, "step": 206590 }, { "epoch": 4.20559796437659, "grad_norm": 0.0311271495877379, "learning_rate": 7.494402989376992e-07, "loss": 0.021, "step": 206600 }, { "epoch": 4.205801526717558, "grad_norm": 0.00888617937285999, "learning_rate": 7.490661551986395e-07, "loss": 0.0001, "step": 206610 }, { "epoch": 4.206005089058524, "grad_norm": 12.632382153396838, "learning_rate": 7.486920973126627e-07, "loss": 0.0075, "step": 206620 }, { "epoch": 4.206208651399491, "grad_norm": 0.03019400902750015, "learning_rate": 7.483181252873267e-07, "loss": 0.0001, "step": 206630 }, { "epoch": 4.206412213740458, "grad_norm": 0.0036148133357129584, "learning_rate": 7.479442391301817e-07, "loss": 0.0003, "step": 206640 }, { "epoch": 4.206615776081425, "grad_norm": 0.010178696905883917, "learning_rate": 7.475704388487792e-07, "loss": 0.0015, "step": 206650 }, { "epoch": 4.206819338422392, "grad_norm": 0.03649614621012816, "learning_rate": 7.471967244506684e-07, "loss": 0.0001, "step": 206660 }, { "epoch": 4.207022900763358, "grad_norm": 0.005958804990826825, "learning_rate": 7.468230959433976e-07, "loss": 0.0001, "step": 206670 }, { "epoch": 4.207226463104326, "grad_norm": 0.09048414502994796, "learning_rate": 7.464495533345123e-07, "loss": 0.0002, "step": 206680 }, { "epoch": 4.2074300254452925, "grad_norm": 0.00697542710522892, "learning_rate": 7.460760966315572e-07, "loss": 0.0009, "step": 206690 }, { "epoch": 4.207633587786259, "grad_norm": 0.00806269365844427, "learning_rate": 7.45702725842074e-07, "loss": 0.0182, "step": 206700 }, { "epoch": 4.207837150127227, "grad_norm": 0.00196999625836544, "learning_rate": 7.45329440973604e-07, "loss": 0.0038, "step": 206710 }, { "epoch": 4.208040712468193, "grad_norm": 0.0016910028149289835, "learning_rate": 7.449562420336853e-07, "loss": 0.0528, "step": 206720 }, { "epoch": 4.20824427480916, "grad_norm": 0.00861756459987851, "learning_rate": 7.445831290298572e-07, "loss": 0.0251, "step": 206730 }, { "epoch": 4.2084478371501275, "grad_norm": 0.00020978003730816726, "learning_rate": 7.442101019696518e-07, "loss": 0.0305, "step": 206740 }, { "epoch": 4.208651399491094, "grad_norm": 0.002198583092913193, "learning_rate": 7.43837160860606e-07, "loss": 0.0001, "step": 206750 }, { "epoch": 4.208854961832061, "grad_norm": 0.009521864748812219, "learning_rate": 7.434643057102514e-07, "loss": 0.0007, "step": 206760 }, { "epoch": 4.209058524173028, "grad_norm": 0.011071628872689039, "learning_rate": 7.430915365261154e-07, "loss": 0.0204, "step": 206770 }, { "epoch": 4.209262086513995, "grad_norm": 0.06328315415905766, "learning_rate": 7.427188533157309e-07, "loss": 0.0077, "step": 206780 }, { "epoch": 4.2094656488549616, "grad_norm": 22.289094044368913, "learning_rate": 7.423462560866212e-07, "loss": 0.0202, "step": 206790 }, { "epoch": 4.209669211195929, "grad_norm": 0.048603072807821904, "learning_rate": 7.419737448463116e-07, "loss": 0.0001, "step": 206800 }, { "epoch": 4.209872773536896, "grad_norm": 0.02148672955002255, "learning_rate": 7.416013196023287e-07, "loss": 0.0046, "step": 206810 }, { "epoch": 4.210076335877862, "grad_norm": 0.02426512810833167, "learning_rate": 7.412289803621914e-07, "loss": 0.0507, "step": 206820 }, { "epoch": 4.21027989821883, "grad_norm": 0.016027269408423944, "learning_rate": 7.408567271334194e-07, "loss": 0.0599, "step": 206830 }, { "epoch": 4.2104834605597965, "grad_norm": 0.004333412138030685, "learning_rate": 7.404845599235316e-07, "loss": 0.0004, "step": 206840 }, { "epoch": 4.210687022900763, "grad_norm": 0.004729018038734299, "learning_rate": 7.401124787400449e-07, "loss": 0.0021, "step": 206850 }, { "epoch": 4.210890585241731, "grad_norm": 0.3301524441941333, "learning_rate": 7.397404835904731e-07, "loss": 0.0144, "step": 206860 }, { "epoch": 4.211094147582697, "grad_norm": 0.13433647963565437, "learning_rate": 7.393685744823293e-07, "loss": 0.0303, "step": 206870 }, { "epoch": 4.211297709923664, "grad_norm": 0.0022577339655511256, "learning_rate": 7.389967514231244e-07, "loss": 0.0035, "step": 206880 }, { "epoch": 4.2115012722646314, "grad_norm": 0.027048808171574342, "learning_rate": 7.386250144203688e-07, "loss": 0.0001, "step": 206890 }, { "epoch": 4.211704834605598, "grad_norm": 0.015104447120860674, "learning_rate": 7.382533634815697e-07, "loss": 0.0003, "step": 206900 }, { "epoch": 4.211908396946565, "grad_norm": 0.0032548994648871512, "learning_rate": 7.378817986142328e-07, "loss": 0.0001, "step": 206910 }, { "epoch": 4.212111959287531, "grad_norm": 0.001252693896392052, "learning_rate": 7.375103198258626e-07, "loss": 0.0595, "step": 206920 }, { "epoch": 4.212315521628499, "grad_norm": 0.00928412080589381, "learning_rate": 7.371389271239615e-07, "loss": 0.0124, "step": 206930 }, { "epoch": 4.2125190839694655, "grad_norm": 0.012801905366829614, "learning_rate": 7.367676205160313e-07, "loss": 0.0361, "step": 206940 }, { "epoch": 4.212722646310432, "grad_norm": 0.010844040796098284, "learning_rate": 7.363964000095675e-07, "loss": 0.0002, "step": 206950 }, { "epoch": 4.2129262086514, "grad_norm": 0.10258869952958893, "learning_rate": 7.360252656120714e-07, "loss": 0.0012, "step": 206960 }, { "epoch": 4.213129770992366, "grad_norm": 0.009881694769715347, "learning_rate": 7.356542173310382e-07, "loss": 0.0002, "step": 206970 }, { "epoch": 4.213333333333333, "grad_norm": 0.004897949907841628, "learning_rate": 7.352832551739592e-07, "loss": 0.0185, "step": 206980 }, { "epoch": 4.2135368956743005, "grad_norm": 0.044929101039502824, "learning_rate": 7.349123791483271e-07, "loss": 0.0012, "step": 206990 }, { "epoch": 4.213740458015267, "grad_norm": 0.03409675669192795, "learning_rate": 7.345415892616347e-07, "loss": 0.0094, "step": 207000 }, { "epoch": 4.213944020356234, "grad_norm": 0.022270846816467373, "learning_rate": 7.341708855213675e-07, "loss": 0.0062, "step": 207010 }, { "epoch": 4.214147582697201, "grad_norm": 19.3795138065275, "learning_rate": 7.338002679350142e-07, "loss": 0.0037, "step": 207020 }, { "epoch": 4.214351145038168, "grad_norm": 0.017186814967274085, "learning_rate": 7.33429736510059e-07, "loss": 0.0001, "step": 207030 }, { "epoch": 4.2145547073791345, "grad_norm": 0.02387602248664481, "learning_rate": 7.330592912539852e-07, "loss": 0.0362, "step": 207040 }, { "epoch": 4.214758269720102, "grad_norm": 0.0012696466434384236, "learning_rate": 7.326889321742753e-07, "loss": 0.0002, "step": 207050 }, { "epoch": 4.214961832061069, "grad_norm": 0.010542179434317822, "learning_rate": 7.323186592784087e-07, "loss": 0.0079, "step": 207060 }, { "epoch": 4.215165394402035, "grad_norm": 0.003291905231988131, "learning_rate": 7.319484725738629e-07, "loss": 0.0058, "step": 207070 }, { "epoch": 4.215368956743003, "grad_norm": 0.004874043687138654, "learning_rate": 7.315783720681158e-07, "loss": 0.0002, "step": 207080 }, { "epoch": 4.2155725190839695, "grad_norm": 0.02227960076099954, "learning_rate": 7.312083577686402e-07, "loss": 0.0642, "step": 207090 }, { "epoch": 4.215776081424936, "grad_norm": 0.002018322905905174, "learning_rate": 7.308384296829119e-07, "loss": 0.0001, "step": 207100 }, { "epoch": 4.215979643765904, "grad_norm": 0.1390923888565888, "learning_rate": 7.304685878183975e-07, "loss": 0.0003, "step": 207110 }, { "epoch": 4.21618320610687, "grad_norm": 0.021334247045098394, "learning_rate": 7.300988321825702e-07, "loss": 0.0271, "step": 207120 }, { "epoch": 4.216386768447837, "grad_norm": 0.7600569939321751, "learning_rate": 7.297291627828978e-07, "loss": 0.0056, "step": 207130 }, { "epoch": 4.216590330788804, "grad_norm": 0.07191734000251868, "learning_rate": 7.293595796268427e-07, "loss": 0.0001, "step": 207140 }, { "epoch": 4.216793893129771, "grad_norm": 10.504793307876467, "learning_rate": 7.289900827218738e-07, "loss": 0.0526, "step": 207150 }, { "epoch": 4.216997455470738, "grad_norm": 2.6628638845008337e-05, "learning_rate": 7.286206720754502e-07, "loss": 0.0349, "step": 207160 }, { "epoch": 4.217201017811705, "grad_norm": 0.03641574941492566, "learning_rate": 7.28251347695032e-07, "loss": 0.0013, "step": 207170 }, { "epoch": 4.217404580152672, "grad_norm": 0.030025808459661123, "learning_rate": 7.278821095880829e-07, "loss": 0.0001, "step": 207180 }, { "epoch": 4.2176081424936385, "grad_norm": 0.002978425993195504, "learning_rate": 7.275129577620554e-07, "loss": 0.0001, "step": 207190 }, { "epoch": 4.217811704834606, "grad_norm": 0.0005976850494965013, "learning_rate": 7.27143892224405e-07, "loss": 0.0111, "step": 207200 }, { "epoch": 4.218015267175573, "grad_norm": 0.007250514895451128, "learning_rate": 7.267749129825902e-07, "loss": 0.0003, "step": 207210 }, { "epoch": 4.218218829516539, "grad_norm": 0.002966757081122343, "learning_rate": 7.264060200440582e-07, "loss": 0.0, "step": 207220 }, { "epoch": 4.218422391857507, "grad_norm": 0.019468765021117513, "learning_rate": 7.260372134162613e-07, "loss": 0.0024, "step": 207230 }, { "epoch": 4.218625954198473, "grad_norm": 0.004966279461042507, "learning_rate": 7.256684931066476e-07, "loss": 0.0001, "step": 207240 }, { "epoch": 4.21882951653944, "grad_norm": 0.0171092176273918, "learning_rate": 7.25299859122664e-07, "loss": 0.0001, "step": 207250 }, { "epoch": 4.219033078880408, "grad_norm": 0.0225835329263975, "learning_rate": 7.249313114717554e-07, "loss": 0.0003, "step": 207260 }, { "epoch": 4.219236641221374, "grad_norm": 0.08236537841044647, "learning_rate": 7.245628501613655e-07, "loss": 0.0044, "step": 207270 }, { "epoch": 4.219440203562341, "grad_norm": 0.10084284069861225, "learning_rate": 7.241944751989355e-07, "loss": 0.0001, "step": 207280 }, { "epoch": 4.2196437659033075, "grad_norm": 3.8135367918547955, "learning_rate": 7.23826186591905e-07, "loss": 0.0014, "step": 207290 }, { "epoch": 4.219847328244275, "grad_norm": 0.0015192212344266353, "learning_rate": 7.234579843477124e-07, "loss": 0.0001, "step": 207300 }, { "epoch": 4.220050890585242, "grad_norm": 0.0035961116341241104, "learning_rate": 7.230898684737953e-07, "loss": 0.1298, "step": 207310 }, { "epoch": 4.220254452926208, "grad_norm": 0.0031887163592471935, "learning_rate": 7.227218389775848e-07, "loss": 0.0001, "step": 207320 }, { "epoch": 4.220458015267176, "grad_norm": 0.005692079202281985, "learning_rate": 7.223538958665166e-07, "loss": 0.0716, "step": 207330 }, { "epoch": 4.220661577608142, "grad_norm": 0.0037459904512198365, "learning_rate": 7.219860391480221e-07, "loss": 0.0001, "step": 207340 }, { "epoch": 4.220865139949109, "grad_norm": 0.024144189484804027, "learning_rate": 7.216182688295275e-07, "loss": 0.0001, "step": 207350 }, { "epoch": 4.221068702290077, "grad_norm": 0.011588195177495578, "learning_rate": 7.212505849184636e-07, "loss": 0.0003, "step": 207360 }, { "epoch": 4.221272264631043, "grad_norm": 0.021346273945623148, "learning_rate": 7.208829874222562e-07, "loss": 0.0007, "step": 207370 }, { "epoch": 4.22147582697201, "grad_norm": 0.0010285151963367021, "learning_rate": 7.205154763483263e-07, "loss": 0.0006, "step": 207380 }, { "epoch": 4.221679389312977, "grad_norm": 0.01313627084139627, "learning_rate": 7.201480517041004e-07, "loss": 0.0003, "step": 207390 }, { "epoch": 4.221882951653944, "grad_norm": 0.008567969345396902, "learning_rate": 7.197807134969958e-07, "loss": 0.0006, "step": 207400 }, { "epoch": 4.222086513994911, "grad_norm": 0.13024529216612926, "learning_rate": 7.194134617344328e-07, "loss": 0.0002, "step": 207410 }, { "epoch": 4.222290076335878, "grad_norm": 0.0006010182092440285, "learning_rate": 7.190462964238282e-07, "loss": 0.0275, "step": 207420 }, { "epoch": 4.222493638676845, "grad_norm": 0.012527361253945454, "learning_rate": 7.18679217572597e-07, "loss": 0.0167, "step": 207430 }, { "epoch": 4.222697201017811, "grad_norm": 0.007947991097233811, "learning_rate": 7.183122251881536e-07, "loss": 0.0218, "step": 207440 }, { "epoch": 4.222900763358779, "grad_norm": 0.004381458867676578, "learning_rate": 7.179453192779101e-07, "loss": 0.0002, "step": 207450 }, { "epoch": 4.223104325699746, "grad_norm": 0.06141784486740163, "learning_rate": 7.175784998492757e-07, "loss": 0.0143, "step": 207460 }, { "epoch": 4.223307888040712, "grad_norm": 0.03614932806296922, "learning_rate": 7.17211766909659e-07, "loss": 0.0006, "step": 207470 }, { "epoch": 4.22351145038168, "grad_norm": 0.0006702800616520776, "learning_rate": 7.168451204664672e-07, "loss": 0.0001, "step": 207480 }, { "epoch": 4.223715012722646, "grad_norm": 0.06679060651073887, "learning_rate": 7.164785605271047e-07, "loss": 0.0001, "step": 207490 }, { "epoch": 4.223918575063613, "grad_norm": 0.036310903290488805, "learning_rate": 7.161120870989747e-07, "loss": 0.0325, "step": 207500 }, { "epoch": 4.2241221374045805, "grad_norm": 0.007726563165501482, "learning_rate": 7.157457001894791e-07, "loss": 0.0214, "step": 207510 }, { "epoch": 4.224325699745547, "grad_norm": 0.0008440281444943324, "learning_rate": 7.153793998060177e-07, "loss": 0.0001, "step": 207520 }, { "epoch": 4.224529262086514, "grad_norm": 0.0015124125607530734, "learning_rate": 7.150131859559872e-07, "loss": 0.0372, "step": 207530 }, { "epoch": 4.224732824427481, "grad_norm": 0.019701771703629526, "learning_rate": 7.14647058646783e-07, "loss": 0.0003, "step": 207540 }, { "epoch": 4.224936386768448, "grad_norm": 0.00504468924232695, "learning_rate": 7.142810178858034e-07, "loss": 0.0055, "step": 207550 }, { "epoch": 4.225139949109415, "grad_norm": 3.360840560592917, "learning_rate": 7.139150636804376e-07, "loss": 0.0006, "step": 207560 }, { "epoch": 4.225343511450381, "grad_norm": 0.029241724400385128, "learning_rate": 7.135491960380763e-07, "loss": 0.0425, "step": 207570 }, { "epoch": 4.225547073791349, "grad_norm": 0.005775008265112076, "learning_rate": 7.131834149661121e-07, "loss": 0.0001, "step": 207580 }, { "epoch": 4.225750636132315, "grad_norm": 0.0034892074068558904, "learning_rate": 7.128177204719288e-07, "loss": 0.0425, "step": 207590 }, { "epoch": 4.225954198473282, "grad_norm": 0.001761538220225023, "learning_rate": 7.124521125629142e-07, "loss": 0.0001, "step": 207600 }, { "epoch": 4.2261577608142495, "grad_norm": 4.605474989756271, "learning_rate": 7.120865912464509e-07, "loss": 0.0047, "step": 207610 }, { "epoch": 4.226361323155216, "grad_norm": 0.13319345422858464, "learning_rate": 7.11721156529922e-07, "loss": 0.0046, "step": 207620 }, { "epoch": 4.226564885496183, "grad_norm": 0.003467023720364655, "learning_rate": 7.113558084207073e-07, "loss": 0.0001, "step": 207630 }, { "epoch": 4.22676844783715, "grad_norm": 0.006309994968479157, "learning_rate": 7.109905469261863e-07, "loss": 0.0001, "step": 207640 }, { "epoch": 4.226972010178117, "grad_norm": 0.027767215302507905, "learning_rate": 7.106253720537349e-07, "loss": 0.0144, "step": 207650 }, { "epoch": 4.227175572519084, "grad_norm": 0.0077884399666900105, "learning_rate": 7.102602838107286e-07, "loss": 0.0279, "step": 207660 }, { "epoch": 4.227379134860051, "grad_norm": 1.2102508751600858, "learning_rate": 7.098952822045418e-07, "loss": 0.0369, "step": 207670 }, { "epoch": 4.227582697201018, "grad_norm": 0.00894956843420701, "learning_rate": 7.095303672425457e-07, "loss": 0.0035, "step": 207680 }, { "epoch": 4.227786259541984, "grad_norm": 0.030318701688210856, "learning_rate": 7.091655389321084e-07, "loss": 0.0001, "step": 207690 }, { "epoch": 4.227989821882952, "grad_norm": 0.03447536081237902, "learning_rate": 7.088007972805999e-07, "loss": 0.0004, "step": 207700 }, { "epoch": 4.2281933842239185, "grad_norm": 0.002004100634428024, "learning_rate": 7.084361422953883e-07, "loss": 0.0438, "step": 207710 }, { "epoch": 4.228396946564885, "grad_norm": 0.006042981183924469, "learning_rate": 7.080715739838334e-07, "loss": 0.0002, "step": 207720 }, { "epoch": 4.228600508905853, "grad_norm": 0.0036304794392565862, "learning_rate": 7.077070923533024e-07, "loss": 0.0, "step": 207730 }, { "epoch": 4.228804071246819, "grad_norm": 0.04655090951415836, "learning_rate": 7.073426974111563e-07, "loss": 0.0001, "step": 207740 }, { "epoch": 4.229007633587786, "grad_norm": 0.010285614673206019, "learning_rate": 7.069783891647508e-07, "loss": 0.0001, "step": 207750 }, { "epoch": 4.2292111959287535, "grad_norm": 0.002493293510064122, "learning_rate": 7.066141676214489e-07, "loss": 0.029, "step": 207760 }, { "epoch": 4.22941475826972, "grad_norm": 0.014251670393196744, "learning_rate": 7.062500327886024e-07, "loss": 0.0058, "step": 207770 }, { "epoch": 4.229618320610687, "grad_norm": 0.0007582296594909405, "learning_rate": 7.058859846735655e-07, "loss": 0.0575, "step": 207780 }, { "epoch": 4.229821882951654, "grad_norm": 7.608204368208903, "learning_rate": 7.05522023283694e-07, "loss": 0.0694, "step": 207790 }, { "epoch": 4.230025445292621, "grad_norm": 0.001667772727201445, "learning_rate": 7.051581486263354e-07, "loss": 0.0332, "step": 207800 }, { "epoch": 4.230229007633588, "grad_norm": 0.0005877060844013457, "learning_rate": 7.047943607088398e-07, "loss": 0.01, "step": 207810 }, { "epoch": 4.230432569974555, "grad_norm": 0.012667364462013652, "learning_rate": 7.044306595385547e-07, "loss": 0.0002, "step": 207820 }, { "epoch": 4.230636132315522, "grad_norm": 0.013861488346974718, "learning_rate": 7.040670451228249e-07, "loss": 0.0075, "step": 207830 }, { "epoch": 4.230839694656488, "grad_norm": 0.013512642381803946, "learning_rate": 7.037035174689938e-07, "loss": 0.0126, "step": 207840 }, { "epoch": 4.231043256997456, "grad_norm": 0.000654840989997416, "learning_rate": 7.033400765844039e-07, "loss": 0.0145, "step": 207850 }, { "epoch": 4.2312468193384225, "grad_norm": 0.02458571338918156, "learning_rate": 7.029767224763956e-07, "loss": 0.0001, "step": 207860 }, { "epoch": 4.231450381679389, "grad_norm": 0.00454625996041679, "learning_rate": 7.026134551523062e-07, "loss": 0.0001, "step": 207870 }, { "epoch": 4.231653944020357, "grad_norm": 0.005106372044383238, "learning_rate": 7.022502746194737e-07, "loss": 0.0002, "step": 207880 }, { "epoch": 4.231857506361323, "grad_norm": 0.010198853852604654, "learning_rate": 7.018871808852335e-07, "loss": 0.0002, "step": 207890 }, { "epoch": 4.23206106870229, "grad_norm": 0.10810679262626051, "learning_rate": 7.01524173956915e-07, "loss": 0.0002, "step": 207900 }, { "epoch": 4.2322646310432575, "grad_norm": 0.019878096256207862, "learning_rate": 7.011612538418533e-07, "loss": 0.0001, "step": 207910 }, { "epoch": 4.232468193384224, "grad_norm": 0.020591764395739197, "learning_rate": 7.007984205473783e-07, "loss": 0.0072, "step": 207920 }, { "epoch": 4.232671755725191, "grad_norm": 12.841851408992364, "learning_rate": 7.004356740808144e-07, "loss": 0.0682, "step": 207930 }, { "epoch": 4.232875318066157, "grad_norm": 4.663019151334577, "learning_rate": 7.000730144494904e-07, "loss": 0.001, "step": 207940 }, { "epoch": 4.233078880407125, "grad_norm": 0.1392598042663228, "learning_rate": 6.99710441660732e-07, "loss": 0.0226, "step": 207950 }, { "epoch": 4.2332824427480915, "grad_norm": 0.003941295784782602, "learning_rate": 6.993479557218585e-07, "loss": 0.0298, "step": 207960 }, { "epoch": 4.233486005089058, "grad_norm": 2.318132193182455, "learning_rate": 6.989855566401921e-07, "loss": 0.0011, "step": 207970 }, { "epoch": 4.233689567430026, "grad_norm": 0.0073560124317482945, "learning_rate": 6.986232444230523e-07, "loss": 0.0017, "step": 207980 }, { "epoch": 4.233893129770992, "grad_norm": 0.007077700071330578, "learning_rate": 6.982610190777561e-07, "loss": 0.0001, "step": 207990 }, { "epoch": 4.234096692111959, "grad_norm": 0.0005764896803533166, "learning_rate": 6.978988806116194e-07, "loss": 0.0002, "step": 208000 }, { "epoch": 4.2343002544529265, "grad_norm": 6.610872285486798, "learning_rate": 6.975368290319557e-07, "loss": 0.0416, "step": 208010 }, { "epoch": 4.234503816793893, "grad_norm": 0.0012158907502289819, "learning_rate": 6.971748643460769e-07, "loss": 0.0002, "step": 208020 }, { "epoch": 4.23470737913486, "grad_norm": 0.0023895610438660533, "learning_rate": 6.968129865612944e-07, "loss": 0.0003, "step": 208030 }, { "epoch": 4.234910941475827, "grad_norm": 0.004166670159937297, "learning_rate": 6.964511956849157e-07, "loss": 0.0111, "step": 208040 }, { "epoch": 4.235114503816794, "grad_norm": 0.00137151867676463, "learning_rate": 6.960894917242478e-07, "loss": 0.0001, "step": 208050 }, { "epoch": 4.2353180661577605, "grad_norm": 3.4460575788219083, "learning_rate": 6.957278746865964e-07, "loss": 0.0007, "step": 208060 }, { "epoch": 4.235521628498728, "grad_norm": 0.0008359305046159206, "learning_rate": 6.953663445792647e-07, "loss": 0.0264, "step": 208070 }, { "epoch": 4.235725190839695, "grad_norm": 0.017729810377589354, "learning_rate": 6.950049014095544e-07, "loss": 0.0194, "step": 208080 }, { "epoch": 4.235928753180661, "grad_norm": 0.00033523068851008965, "learning_rate": 6.946435451847633e-07, "loss": 0.0299, "step": 208090 }, { "epoch": 4.236132315521629, "grad_norm": 0.0014228515683145052, "learning_rate": 6.942822759121914e-07, "loss": 0.0003, "step": 208100 }, { "epoch": 4.2363358778625955, "grad_norm": 0.01745113511632305, "learning_rate": 6.939210935991364e-07, "loss": 0.0734, "step": 208110 }, { "epoch": 4.236539440203562, "grad_norm": 0.0006076694516012483, "learning_rate": 6.93559998252889e-07, "loss": 0.0002, "step": 208120 }, { "epoch": 4.23674300254453, "grad_norm": 0.00370021820495103, "learning_rate": 6.931989898807462e-07, "loss": 0.0007, "step": 208130 }, { "epoch": 4.236946564885496, "grad_norm": 0.012212137406752716, "learning_rate": 6.928380684899954e-07, "loss": 0.0001, "step": 208140 }, { "epoch": 4.237150127226463, "grad_norm": 0.0043495914214734255, "learning_rate": 6.924772340879265e-07, "loss": 0.0013, "step": 208150 }, { "epoch": 4.23735368956743, "grad_norm": 0.010088240779282661, "learning_rate": 6.921164866818303e-07, "loss": 0.0432, "step": 208160 }, { "epoch": 4.237557251908397, "grad_norm": 0.0029922343055168773, "learning_rate": 6.917558262789892e-07, "loss": 0.0621, "step": 208170 }, { "epoch": 4.237760814249364, "grad_norm": 0.0143642186831696, "learning_rate": 6.91395252886688e-07, "loss": 0.0089, "step": 208180 }, { "epoch": 4.237964376590331, "grad_norm": 0.0002761096839173484, "learning_rate": 6.910347665122086e-07, "loss": 0.0019, "step": 208190 }, { "epoch": 4.238167938931298, "grad_norm": 8.990714588575717, "learning_rate": 6.906743671628324e-07, "loss": 0.1048, "step": 208200 }, { "epoch": 4.2383715012722645, "grad_norm": 0.0023021204566449566, "learning_rate": 6.903140548458376e-07, "loss": 0.0526, "step": 208210 }, { "epoch": 4.238575063613231, "grad_norm": 0.0118700219204589, "learning_rate": 6.899538295685016e-07, "loss": 0.0002, "step": 208220 }, { "epoch": 4.238778625954199, "grad_norm": 0.009942059770187807, "learning_rate": 6.89593691338099e-07, "loss": 0.0007, "step": 208230 }, { "epoch": 4.238982188295165, "grad_norm": 0.011507789075186977, "learning_rate": 6.892336401619037e-07, "loss": 0.0008, "step": 208240 }, { "epoch": 4.239185750636132, "grad_norm": 0.014010073339343218, "learning_rate": 6.888736760471876e-07, "loss": 0.0299, "step": 208250 }, { "epoch": 4.239389312977099, "grad_norm": 0.01032909832854669, "learning_rate": 6.885137990012208e-07, "loss": 0.0351, "step": 208260 }, { "epoch": 4.239592875318066, "grad_norm": 0.050405255588496546, "learning_rate": 6.881540090312693e-07, "loss": 0.0091, "step": 208270 }, { "epoch": 4.239796437659033, "grad_norm": 0.0033639018260330756, "learning_rate": 6.877943061446019e-07, "loss": 0.0002, "step": 208280 }, { "epoch": 4.24, "grad_norm": 0.009578522013307856, "learning_rate": 6.874346903484841e-07, "loss": 0.0001, "step": 208290 }, { "epoch": 4.240203562340967, "grad_norm": 0.007322131623539244, "learning_rate": 6.870751616501747e-07, "loss": 0.0001, "step": 208300 }, { "epoch": 4.2404071246819335, "grad_norm": 0.009875508698177292, "learning_rate": 6.867157200569385e-07, "loss": 0.0196, "step": 208310 }, { "epoch": 4.240610687022901, "grad_norm": 0.010196874876106155, "learning_rate": 6.863563655760352e-07, "loss": 0.0001, "step": 208320 }, { "epoch": 4.240814249363868, "grad_norm": 0.006367558104614481, "learning_rate": 6.859970982147184e-07, "loss": 0.0264, "step": 208330 }, { "epoch": 4.241017811704834, "grad_norm": 0.006194639342356894, "learning_rate": 6.856379179802492e-07, "loss": 0.0001, "step": 208340 }, { "epoch": 4.241221374045802, "grad_norm": 0.0072050692485051675, "learning_rate": 6.852788248798775e-07, "loss": 0.0137, "step": 208350 }, { "epoch": 4.241424936386768, "grad_norm": 0.01074096473235178, "learning_rate": 6.849198189208567e-07, "loss": 0.0123, "step": 208360 }, { "epoch": 4.241628498727735, "grad_norm": 0.004790131915807749, "learning_rate": 6.845609001104398e-07, "loss": 0.0391, "step": 208370 }, { "epoch": 4.241832061068703, "grad_norm": 0.011742090743544252, "learning_rate": 6.842020684558731e-07, "loss": 0.0828, "step": 208380 }, { "epoch": 4.242035623409669, "grad_norm": 0.07852737541962518, "learning_rate": 6.838433239644038e-07, "loss": 0.0004, "step": 208390 }, { "epoch": 4.242239185750636, "grad_norm": 0.0021941363514927765, "learning_rate": 6.834846666432782e-07, "loss": 0.0002, "step": 208400 }, { "epoch": 4.242442748091603, "grad_norm": 0.0035358057182934624, "learning_rate": 6.831260964997388e-07, "loss": 0.0008, "step": 208410 }, { "epoch": 4.24264631043257, "grad_norm": 0.00835625870283578, "learning_rate": 6.827676135410283e-07, "loss": 0.044, "step": 208420 }, { "epoch": 4.242849872773537, "grad_norm": 0.06591166797735605, "learning_rate": 6.824092177743868e-07, "loss": 0.0349, "step": 208430 }, { "epoch": 4.243053435114504, "grad_norm": 0.06697992435118022, "learning_rate": 6.820509092070515e-07, "loss": 0.0003, "step": 208440 }, { "epoch": 4.243256997455471, "grad_norm": 0.01510414963931249, "learning_rate": 6.816926878462598e-07, "loss": 0.0002, "step": 208450 }, { "epoch": 4.243460559796437, "grad_norm": 0.021771843517600295, "learning_rate": 6.813345536992466e-07, "loss": 0.0001, "step": 208460 }, { "epoch": 4.243664122137405, "grad_norm": 0.03982316554551116, "learning_rate": 6.809765067732438e-07, "loss": 0.0001, "step": 208470 }, { "epoch": 4.243867684478372, "grad_norm": 0.03334349714635579, "learning_rate": 6.806185470754839e-07, "loss": 0.0019, "step": 208480 }, { "epoch": 4.244071246819338, "grad_norm": 0.011593915195223818, "learning_rate": 6.802606746131957e-07, "loss": 0.0001, "step": 208490 }, { "epoch": 4.244274809160306, "grad_norm": 0.009372163069858042, "learning_rate": 6.799028893936077e-07, "loss": 0.0268, "step": 208500 }, { "epoch": 4.244478371501272, "grad_norm": 2.811423255209939, "learning_rate": 6.795451914239443e-07, "loss": 0.0038, "step": 208510 }, { "epoch": 4.244681933842239, "grad_norm": 0.2036275050089426, "learning_rate": 6.791875807114289e-07, "loss": 0.0003, "step": 208520 }, { "epoch": 4.2448854961832065, "grad_norm": 0.03489860071285932, "learning_rate": 6.788300572632883e-07, "loss": 0.0917, "step": 208530 }, { "epoch": 4.245089058524173, "grad_norm": 0.0026661002755722163, "learning_rate": 6.784726210867388e-07, "loss": 0.0377, "step": 208540 }, { "epoch": 4.24529262086514, "grad_norm": 0.04075386499549534, "learning_rate": 6.78115272189001e-07, "loss": 0.0005, "step": 208550 }, { "epoch": 4.2454961832061064, "grad_norm": 43.33661714448191, "learning_rate": 6.777580105772918e-07, "loss": 0.0037, "step": 208560 }, { "epoch": 4.245699745547074, "grad_norm": 0.01830961088389229, "learning_rate": 6.774008362588264e-07, "loss": 0.0369, "step": 208570 }, { "epoch": 4.245903307888041, "grad_norm": 0.0041665477657076904, "learning_rate": 6.770437492408188e-07, "loss": 0.0983, "step": 208580 }, { "epoch": 4.246106870229007, "grad_norm": 0.013049346717136581, "learning_rate": 6.766867495304802e-07, "loss": 0.0001, "step": 208590 }, { "epoch": 4.246310432569975, "grad_norm": 0.2069230892183149, "learning_rate": 6.763298371350218e-07, "loss": 0.0253, "step": 208600 }, { "epoch": 4.246513994910941, "grad_norm": 0.011293457498226729, "learning_rate": 6.759730120616503e-07, "loss": 0.0001, "step": 208610 }, { "epoch": 4.246717557251908, "grad_norm": 0.008190758347779561, "learning_rate": 6.756162743175737e-07, "loss": 0.0253, "step": 208620 }, { "epoch": 4.2469211195928755, "grad_norm": 0.012937138158084366, "learning_rate": 6.752596239099973e-07, "loss": 0.0005, "step": 208630 }, { "epoch": 4.247124681933842, "grad_norm": 0.003162981495481087, "learning_rate": 6.74903060846121e-07, "loss": 0.0001, "step": 208640 }, { "epoch": 4.247328244274809, "grad_norm": 0.019559434872551063, "learning_rate": 6.74546585133149e-07, "loss": 0.0007, "step": 208650 }, { "epoch": 4.247531806615776, "grad_norm": 0.030939869228457505, "learning_rate": 6.741901967782805e-07, "loss": 0.0357, "step": 208660 }, { "epoch": 4.247735368956743, "grad_norm": 0.048431392309574633, "learning_rate": 6.738338957887109e-07, "loss": 0.0121, "step": 208670 }, { "epoch": 4.24793893129771, "grad_norm": 0.09207052071083298, "learning_rate": 6.734776821716388e-07, "loss": 0.0006, "step": 208680 }, { "epoch": 4.248142493638677, "grad_norm": 0.0030550157570917048, "learning_rate": 6.731215559342585e-07, "loss": 0.0004, "step": 208690 }, { "epoch": 4.248346055979644, "grad_norm": 0.020096000952766143, "learning_rate": 6.727655170837594e-07, "loss": 0.0001, "step": 208700 }, { "epoch": 4.24854961832061, "grad_norm": 0.011024072961992702, "learning_rate": 6.724095656273361e-07, "loss": 0.007, "step": 208710 }, { "epoch": 4.248753180661578, "grad_norm": 0.006830460931859588, "learning_rate": 6.72053701572174e-07, "loss": 0.0003, "step": 208720 }, { "epoch": 4.248956743002545, "grad_norm": 0.002128776931315282, "learning_rate": 6.716979249254613e-07, "loss": 0.0001, "step": 208730 }, { "epoch": 4.249160305343511, "grad_norm": 0.0013138166241175213, "learning_rate": 6.713422356943855e-07, "loss": 0.058, "step": 208740 }, { "epoch": 4.249363867684479, "grad_norm": 0.03835669026325119, "learning_rate": 6.709866338861277e-07, "loss": 0.039, "step": 208750 }, { "epoch": 4.249567430025445, "grad_norm": 0.008380668734655998, "learning_rate": 6.706311195078696e-07, "loss": 0.0, "step": 208760 }, { "epoch": 4.249770992366412, "grad_norm": 0.02099973617411679, "learning_rate": 6.702756925667942e-07, "loss": 0.0545, "step": 208770 }, { "epoch": 4.2499745547073795, "grad_norm": 0.001549428817630767, "learning_rate": 6.699203530700765e-07, "loss": 0.0008, "step": 208780 }, { "epoch": 4.250178117048346, "grad_norm": 0.12260019656111987, "learning_rate": 6.695651010248949e-07, "loss": 0.0336, "step": 208790 }, { "epoch": 4.250381679389313, "grad_norm": 0.09292836963597965, "learning_rate": 6.692099364384235e-07, "loss": 0.0001, "step": 208800 }, { "epoch": 4.25058524173028, "grad_norm": 0.0026496952251380537, "learning_rate": 6.688548593178351e-07, "loss": 0.0002, "step": 208810 }, { "epoch": 4.250788804071247, "grad_norm": 0.002408994048757842, "learning_rate": 6.684998696703015e-07, "loss": 0.0002, "step": 208820 }, { "epoch": 4.250992366412214, "grad_norm": 0.027029650064929196, "learning_rate": 6.681449675029922e-07, "loss": 0.0004, "step": 208830 }, { "epoch": 4.25119592875318, "grad_norm": 0.016427384747495805, "learning_rate": 6.677901528230746e-07, "loss": 0.0053, "step": 208840 }, { "epoch": 4.251399491094148, "grad_norm": 0.0030684700500085383, "learning_rate": 6.674354256377147e-07, "loss": 0.0388, "step": 208850 }, { "epoch": 4.251603053435114, "grad_norm": 5.537316277083962, "learning_rate": 6.670807859540762e-07, "loss": 0.0499, "step": 208860 }, { "epoch": 4.251806615776081, "grad_norm": 0.0005919119420666726, "learning_rate": 6.667262337793234e-07, "loss": 0.0005, "step": 208870 }, { "epoch": 4.2520101781170485, "grad_norm": 0.028771514230228827, "learning_rate": 6.663717691206134e-07, "loss": 0.0299, "step": 208880 }, { "epoch": 4.252213740458015, "grad_norm": 0.0016612433262064613, "learning_rate": 6.660173919851081e-07, "loss": 0.0006, "step": 208890 }, { "epoch": 4.252417302798982, "grad_norm": 0.019173345230936632, "learning_rate": 6.656631023799653e-07, "loss": 0.036, "step": 208900 }, { "epoch": 4.252620865139949, "grad_norm": 0.0009142966204617155, "learning_rate": 6.653089003123364e-07, "loss": 0.0163, "step": 208910 }, { "epoch": 4.252824427480916, "grad_norm": 0.009240553060518403, "learning_rate": 6.64954785789379e-07, "loss": 0.0416, "step": 208920 }, { "epoch": 4.253027989821883, "grad_norm": 0.0154782961000236, "learning_rate": 6.646007588182424e-07, "loss": 0.0296, "step": 208930 }, { "epoch": 4.25323155216285, "grad_norm": 0.002398200147980114, "learning_rate": 6.642468194060775e-07, "loss": 0.0001, "step": 208940 }, { "epoch": 4.253435114503817, "grad_norm": 0.053083242619385, "learning_rate": 6.638929675600325e-07, "loss": 0.0205, "step": 208950 }, { "epoch": 4.253638676844783, "grad_norm": 0.005806041273638901, "learning_rate": 6.63539203287254e-07, "loss": 0.0002, "step": 208960 }, { "epoch": 4.253842239185751, "grad_norm": 0.049364476559922744, "learning_rate": 6.631855265948861e-07, "loss": 0.0122, "step": 208970 }, { "epoch": 4.2540458015267175, "grad_norm": 0.04831797993857212, "learning_rate": 6.628319374900727e-07, "loss": 0.0174, "step": 208980 }, { "epoch": 4.254249363867684, "grad_norm": 0.013593227840273536, "learning_rate": 6.62478435979954e-07, "loss": 0.0003, "step": 208990 }, { "epoch": 4.254452926208652, "grad_norm": 0.23466180801064127, "learning_rate": 6.621250220716707e-07, "loss": 0.0027, "step": 209000 }, { "epoch": 4.254656488549618, "grad_norm": 0.12557967133277384, "learning_rate": 6.617716957723597e-07, "loss": 0.0002, "step": 209010 }, { "epoch": 4.254860050890585, "grad_norm": 0.0003350638069674173, "learning_rate": 6.614184570891563e-07, "loss": 0.0002, "step": 209020 }, { "epoch": 4.2550636132315525, "grad_norm": 0.013050339773691315, "learning_rate": 6.610653060291955e-07, "loss": 0.0448, "step": 209030 }, { "epoch": 4.255267175572519, "grad_norm": 0.002849095470401416, "learning_rate": 6.607122425996094e-07, "loss": 0.0003, "step": 209040 }, { "epoch": 4.255470737913486, "grad_norm": 0.0019592591098821868, "learning_rate": 6.603592668075287e-07, "loss": 0.0002, "step": 209050 }, { "epoch": 4.255674300254453, "grad_norm": 0.016628420982223606, "learning_rate": 6.60006378660083e-07, "loss": 0.0003, "step": 209060 }, { "epoch": 4.25587786259542, "grad_norm": 9.540222499724015, "learning_rate": 6.596535781643959e-07, "loss": 0.0536, "step": 209070 }, { "epoch": 4.2560814249363865, "grad_norm": 0.025798647625330004, "learning_rate": 6.593008653275973e-07, "loss": 0.0092, "step": 209080 }, { "epoch": 4.256284987277354, "grad_norm": 0.002347007393313491, "learning_rate": 6.589482401568075e-07, "loss": 0.0436, "step": 209090 }, { "epoch": 4.256488549618321, "grad_norm": 0.012114017289768768, "learning_rate": 6.585957026591483e-07, "loss": 0.0158, "step": 209100 }, { "epoch": 4.256692111959287, "grad_norm": 0.003996594302053202, "learning_rate": 6.582432528417426e-07, "loss": 0.0002, "step": 209110 }, { "epoch": 4.256895674300255, "grad_norm": 0.07654773634222542, "learning_rate": 6.57890890711706e-07, "loss": 0.0002, "step": 209120 }, { "epoch": 4.2570992366412215, "grad_norm": 0.11554747351684692, "learning_rate": 6.575386162761538e-07, "loss": 0.0389, "step": 209130 }, { "epoch": 4.257302798982188, "grad_norm": 0.024838145189918392, "learning_rate": 6.571864295422043e-07, "loss": 0.0633, "step": 209140 }, { "epoch": 4.257506361323156, "grad_norm": 0.09531959963579315, "learning_rate": 6.568343305169677e-07, "loss": 0.0483, "step": 209150 }, { "epoch": 4.257709923664122, "grad_norm": 0.011508093481904758, "learning_rate": 6.564823192075554e-07, "loss": 0.0002, "step": 209160 }, { "epoch": 4.257913486005089, "grad_norm": 0.006537463195212904, "learning_rate": 6.561303956210774e-07, "loss": 0.0018, "step": 209170 }, { "epoch": 4.258117048346056, "grad_norm": 0.0070042447794389515, "learning_rate": 6.55778559764641e-07, "loss": 0.0592, "step": 209180 }, { "epoch": 4.258320610687023, "grad_norm": 0.005342439389244538, "learning_rate": 6.554268116453516e-07, "loss": 0.0005, "step": 209190 }, { "epoch": 4.25852417302799, "grad_norm": 0.004272879188211482, "learning_rate": 6.550751512703135e-07, "loss": 0.0001, "step": 209200 }, { "epoch": 4.258727735368957, "grad_norm": 0.003850207141702473, "learning_rate": 6.547235786466288e-07, "loss": 0.0484, "step": 209210 }, { "epoch": 4.258931297709924, "grad_norm": 0.010415845612844934, "learning_rate": 6.543720937813985e-07, "loss": 0.0001, "step": 209220 }, { "epoch": 4.2591348600508905, "grad_norm": 0.03708496973167049, "learning_rate": 6.540206966817208e-07, "loss": 0.0006, "step": 209230 }, { "epoch": 4.259338422391857, "grad_norm": 0.009805071405092632, "learning_rate": 6.53669387354694e-07, "loss": 0.0004, "step": 209240 }, { "epoch": 4.259541984732825, "grad_norm": 0.0007491953377162504, "learning_rate": 6.533181658074095e-07, "loss": 0.0002, "step": 209250 }, { "epoch": 4.259745547073791, "grad_norm": 0.003549685452868603, "learning_rate": 6.529670320469638e-07, "loss": 0.0642, "step": 209260 }, { "epoch": 4.259949109414758, "grad_norm": 0.0030962381380891064, "learning_rate": 6.526159860804493e-07, "loss": 0.0248, "step": 209270 }, { "epoch": 4.260152671755725, "grad_norm": 0.0023955046889657557, "learning_rate": 6.522650279149522e-07, "loss": 0.0003, "step": 209280 }, { "epoch": 4.260356234096692, "grad_norm": 13.930394428596863, "learning_rate": 6.519141575575649e-07, "loss": 0.1083, "step": 209290 }, { "epoch": 4.260559796437659, "grad_norm": 0.0310358277929036, "learning_rate": 6.515633750153705e-07, "loss": 0.0282, "step": 209300 }, { "epoch": 4.260763358778626, "grad_norm": 0.016194194112138474, "learning_rate": 6.512126802954533e-07, "loss": 0.0005, "step": 209310 }, { "epoch": 4.260966921119593, "grad_norm": 0.23929238293746613, "learning_rate": 6.508620734048993e-07, "loss": 0.0005, "step": 209320 }, { "epoch": 4.2611704834605595, "grad_norm": 8.180392495251672, "learning_rate": 6.505115543507867e-07, "loss": 0.0557, "step": 209330 }, { "epoch": 4.261374045801527, "grad_norm": 0.035872981612826954, "learning_rate": 6.50161123140195e-07, "loss": 0.0002, "step": 209340 }, { "epoch": 4.261577608142494, "grad_norm": 0.23729115626565433, "learning_rate": 6.498107797802017e-07, "loss": 0.0004, "step": 209350 }, { "epoch": 4.26178117048346, "grad_norm": 0.0033252821028858497, "learning_rate": 6.494605242778834e-07, "loss": 0.0005, "step": 209360 }, { "epoch": 4.261984732824428, "grad_norm": 0.05172601578132508, "learning_rate": 6.491103566403129e-07, "loss": 0.0007, "step": 209370 }, { "epoch": 4.262188295165394, "grad_norm": 0.003135746899350692, "learning_rate": 6.487602768745621e-07, "loss": 0.002, "step": 209380 }, { "epoch": 4.262391857506361, "grad_norm": 0.01196785417977951, "learning_rate": 6.484102849877027e-07, "loss": 0.0003, "step": 209390 }, { "epoch": 4.262595419847329, "grad_norm": 0.005173811658745637, "learning_rate": 6.480603809868019e-07, "loss": 0.0001, "step": 209400 }, { "epoch": 4.262798982188295, "grad_norm": 0.013842500601731169, "learning_rate": 6.477105648789273e-07, "loss": 0.0149, "step": 209410 }, { "epoch": 4.263002544529262, "grad_norm": 0.04358031086192567, "learning_rate": 6.473608366711431e-07, "loss": 0.0001, "step": 209420 }, { "epoch": 4.263206106870229, "grad_norm": 0.007629201333446553, "learning_rate": 6.470111963705134e-07, "loss": 0.0002, "step": 209430 }, { "epoch": 4.263409669211196, "grad_norm": 0.02825108182463753, "learning_rate": 6.466616439840989e-07, "loss": 0.0033, "step": 209440 }, { "epoch": 4.263613231552163, "grad_norm": 0.019925320909196593, "learning_rate": 6.463121795189609e-07, "loss": 0.0002, "step": 209450 }, { "epoch": 4.26381679389313, "grad_norm": 0.003629609738644118, "learning_rate": 6.459628029821535e-07, "loss": 0.0003, "step": 209460 }, { "epoch": 4.264020356234097, "grad_norm": 0.0007592930821681428, "learning_rate": 6.456135143807369e-07, "loss": 0.0001, "step": 209470 }, { "epoch": 4.264223918575063, "grad_norm": 0.003819266542982843, "learning_rate": 6.45264313721764e-07, "loss": 0.0001, "step": 209480 }, { "epoch": 4.26442748091603, "grad_norm": 0.003217963177797728, "learning_rate": 6.449152010122867e-07, "loss": 0.0001, "step": 209490 }, { "epoch": 4.264631043256998, "grad_norm": 0.03242907271314847, "learning_rate": 6.445661762593552e-07, "loss": 0.0222, "step": 209500 }, { "epoch": 4.264834605597964, "grad_norm": 0.0474604736385503, "learning_rate": 6.442172394700213e-07, "loss": 0.0003, "step": 209510 }, { "epoch": 4.265038167938931, "grad_norm": 0.004140893656852267, "learning_rate": 6.438683906513294e-07, "loss": 0.0001, "step": 209520 }, { "epoch": 4.265241730279898, "grad_norm": 0.004380009721352042, "learning_rate": 6.435196298103264e-07, "loss": 0.0003, "step": 209530 }, { "epoch": 4.265445292620865, "grad_norm": 0.0036176496904892406, "learning_rate": 6.431709569540551e-07, "loss": 0.0331, "step": 209540 }, { "epoch": 4.265648854961832, "grad_norm": 0.0025526898451062457, "learning_rate": 6.428223720895582e-07, "loss": 0.0025, "step": 209550 }, { "epoch": 4.265852417302799, "grad_norm": 0.06775920163017105, "learning_rate": 6.424738752238751e-07, "loss": 0.0327, "step": 209560 }, { "epoch": 4.266055979643766, "grad_norm": 0.04365640274158953, "learning_rate": 6.421254663640452e-07, "loss": 0.0551, "step": 209570 }, { "epoch": 4.2662595419847325, "grad_norm": 0.20762906438383646, "learning_rate": 6.41777145517104e-07, "loss": 0.0005, "step": 209580 }, { "epoch": 4.2664631043257, "grad_norm": 0.013529517993305156, "learning_rate": 6.414289126900869e-07, "loss": 0.0005, "step": 209590 }, { "epoch": 4.266666666666667, "grad_norm": 0.006493591352248369, "learning_rate": 6.41080767890026e-07, "loss": 0.0002, "step": 209600 }, { "epoch": 4.266870229007633, "grad_norm": 0.041037186901298706, "learning_rate": 6.407327111239553e-07, "loss": 0.0001, "step": 209610 }, { "epoch": 4.267073791348601, "grad_norm": 0.15843430319631738, "learning_rate": 6.403847423988995e-07, "loss": 0.0004, "step": 209620 }, { "epoch": 4.267277353689567, "grad_norm": 0.03285182302143429, "learning_rate": 6.4003686172189e-07, "loss": 0.0003, "step": 209630 }, { "epoch": 4.267480916030534, "grad_norm": 0.004430388887229367, "learning_rate": 6.39689069099953e-07, "loss": 0.0001, "step": 209640 }, { "epoch": 4.2676844783715016, "grad_norm": 0.0003168240602918727, "learning_rate": 6.393413645401092e-07, "loss": 0.0247, "step": 209650 }, { "epoch": 4.267888040712468, "grad_norm": 0.0015930244774529701, "learning_rate": 6.389937480493846e-07, "loss": 0.0683, "step": 209660 }, { "epoch": 4.268091603053435, "grad_norm": 0.0544602006253507, "learning_rate": 6.386462196347976e-07, "loss": 0.0005, "step": 209670 }, { "epoch": 4.268295165394402, "grad_norm": 0.014438178248533155, "learning_rate": 6.382987793033662e-07, "loss": 0.015, "step": 209680 }, { "epoch": 4.268498727735369, "grad_norm": 0.5134234345401633, "learning_rate": 6.379514270621112e-07, "loss": 0.0311, "step": 209690 }, { "epoch": 4.268702290076336, "grad_norm": 0.0035553882937332665, "learning_rate": 6.376041629180446e-07, "loss": 0.0001, "step": 209700 }, { "epoch": 4.268905852417303, "grad_norm": 0.0019507727699000497, "learning_rate": 6.372569868781791e-07, "loss": 0.0354, "step": 209710 }, { "epoch": 4.26910941475827, "grad_norm": 0.0009006675286325547, "learning_rate": 6.369098989495304e-07, "loss": 0.0009, "step": 209720 }, { "epoch": 4.269312977099236, "grad_norm": 0.00785137690199159, "learning_rate": 6.365628991391049e-07, "loss": 0.0008, "step": 209730 }, { "epoch": 4.269516539440204, "grad_norm": 0.01013585030206579, "learning_rate": 6.362159874539114e-07, "loss": 0.0012, "step": 209740 }, { "epoch": 4.269720101781171, "grad_norm": 0.037869922969238703, "learning_rate": 6.358691639009562e-07, "loss": 0.0218, "step": 209750 }, { "epoch": 4.269923664122137, "grad_norm": 0.009619481783687604, "learning_rate": 6.355224284872446e-07, "loss": 0.0141, "step": 209760 }, { "epoch": 4.270127226463105, "grad_norm": 0.0072963299980106375, "learning_rate": 6.35175781219779e-07, "loss": 0.0509, "step": 209770 }, { "epoch": 4.270330788804071, "grad_norm": 0.05041978287361659, "learning_rate": 6.348292221055607e-07, "loss": 0.0381, "step": 209780 }, { "epoch": 4.270534351145038, "grad_norm": 0.01738127007729492, "learning_rate": 6.344827511515883e-07, "loss": 0.009, "step": 209790 }, { "epoch": 4.2707379134860055, "grad_norm": 0.0049737166494291, "learning_rate": 6.341363683648593e-07, "loss": 0.0001, "step": 209800 }, { "epoch": 4.270941475826972, "grad_norm": 0.0055930673209446365, "learning_rate": 6.337900737523695e-07, "loss": 0.0001, "step": 209810 }, { "epoch": 4.271145038167939, "grad_norm": 0.0049576024550029565, "learning_rate": 6.33443867321114e-07, "loss": 0.0003, "step": 209820 }, { "epoch": 4.271348600508906, "grad_norm": 0.0007872443288966615, "learning_rate": 6.330977490780815e-07, "loss": 0.0105, "step": 209830 }, { "epoch": 4.271552162849873, "grad_norm": 0.0016514445268924673, "learning_rate": 6.327517190302656e-07, "loss": 0.0002, "step": 209840 }, { "epoch": 4.27175572519084, "grad_norm": 0.004494656602201233, "learning_rate": 6.324057771846548e-07, "loss": 0.0001, "step": 209850 }, { "epoch": 4.271959287531806, "grad_norm": 0.04002134065093731, "learning_rate": 6.320599235482322e-07, "loss": 0.0008, "step": 209860 }, { "epoch": 4.272162849872774, "grad_norm": 0.015422109060520174, "learning_rate": 6.317141581279867e-07, "loss": 0.058, "step": 209870 }, { "epoch": 4.27236641221374, "grad_norm": 0.014631097008588103, "learning_rate": 6.313684809309012e-07, "loss": 0.001, "step": 209880 }, { "epoch": 4.272569974554707, "grad_norm": 0.00019952837313340542, "learning_rate": 6.310228919639539e-07, "loss": 0.028, "step": 209890 }, { "epoch": 4.2727735368956745, "grad_norm": 0.007791048934205758, "learning_rate": 6.306773912341285e-07, "loss": 0.0008, "step": 209900 }, { "epoch": 4.272977099236641, "grad_norm": 1.5493404194261698e-07, "learning_rate": 6.303319787484003e-07, "loss": 0.0277, "step": 209910 }, { "epoch": 4.273180661577608, "grad_norm": 0.18113780515679753, "learning_rate": 6.29986654513745e-07, "loss": 0.0035, "step": 209920 }, { "epoch": 4.273384223918575, "grad_norm": 0.0046016702561964195, "learning_rate": 6.296414185371386e-07, "loss": 0.0001, "step": 209930 }, { "epoch": 4.273587786259542, "grad_norm": 0.0038592719563027046, "learning_rate": 6.292962708255517e-07, "loss": 0.0548, "step": 209940 }, { "epoch": 4.273791348600509, "grad_norm": 0.014535935461130906, "learning_rate": 6.289512113859569e-07, "loss": 0.0105, "step": 209950 }, { "epoch": 4.273994910941476, "grad_norm": 0.06630262090107665, "learning_rate": 6.286062402253218e-07, "loss": 0.0007, "step": 209960 }, { "epoch": 4.274198473282443, "grad_norm": 0.012932273586930164, "learning_rate": 6.282613573506147e-07, "loss": 0.0092, "step": 209970 }, { "epoch": 4.274402035623409, "grad_norm": 0.005759742896984744, "learning_rate": 6.279165627687994e-07, "loss": 0.0419, "step": 209980 }, { "epoch": 4.274605597964377, "grad_norm": 0.0031016265853131913, "learning_rate": 6.275718564868405e-07, "loss": 0.0134, "step": 209990 }, { "epoch": 4.2748091603053435, "grad_norm": 0.003588565750238171, "learning_rate": 6.272272385117e-07, "loss": 0.0027, "step": 210000 }, { "epoch": 4.27501272264631, "grad_norm": 0.004595283932614301, "learning_rate": 6.26882708850337e-07, "loss": 0.0001, "step": 210010 }, { "epoch": 4.275216284987278, "grad_norm": 6.565216642317225, "learning_rate": 6.265382675097104e-07, "loss": 0.0217, "step": 210020 }, { "epoch": 4.275419847328244, "grad_norm": 0.005410435403538527, "learning_rate": 6.261939144967777e-07, "loss": 0.0001, "step": 210030 }, { "epoch": 4.275623409669211, "grad_norm": 0.000844382114294786, "learning_rate": 6.258496498184912e-07, "loss": 0.0001, "step": 210040 }, { "epoch": 4.2758269720101785, "grad_norm": 0.013943256239324143, "learning_rate": 6.255054734818039e-07, "loss": 0.0001, "step": 210050 }, { "epoch": 4.276030534351145, "grad_norm": 0.008685996688607324, "learning_rate": 6.2516138549367e-07, "loss": 0.0001, "step": 210060 }, { "epoch": 4.276234096692112, "grad_norm": 0.03887021823931082, "learning_rate": 6.248173858610351e-07, "loss": 0.0026, "step": 210070 }, { "epoch": 4.276437659033079, "grad_norm": 0.0020050712698298214, "learning_rate": 6.244734745908477e-07, "loss": 0.0002, "step": 210080 }, { "epoch": 4.276641221374046, "grad_norm": 0.0005671023923169708, "learning_rate": 6.241296516900563e-07, "loss": 0.0357, "step": 210090 }, { "epoch": 4.2768447837150125, "grad_norm": 0.026848462766128834, "learning_rate": 6.237859171656013e-07, "loss": 0.0187, "step": 210100 }, { "epoch": 4.27704834605598, "grad_norm": 0.009314864828442654, "learning_rate": 6.234422710244265e-07, "loss": 0.0383, "step": 210110 }, { "epoch": 4.277251908396947, "grad_norm": 0.012251627142966492, "learning_rate": 6.230987132734717e-07, "loss": 0.0001, "step": 210120 }, { "epoch": 4.277455470737913, "grad_norm": 0.01990258724059277, "learning_rate": 6.22755243919676e-07, "loss": 0.0176, "step": 210130 }, { "epoch": 4.27765903307888, "grad_norm": 0.00392068043176274, "learning_rate": 6.22411862969976e-07, "loss": 0.0001, "step": 210140 }, { "epoch": 4.2778625954198475, "grad_norm": 0.0018709396300819827, "learning_rate": 6.220685704313067e-07, "loss": 0.0002, "step": 210150 }, { "epoch": 4.278066157760814, "grad_norm": 0.0036384207447650307, "learning_rate": 6.217253663106015e-07, "loss": 0.0151, "step": 210160 }, { "epoch": 4.278269720101781, "grad_norm": 0.0010345700836900827, "learning_rate": 6.213822506147915e-07, "loss": 0.0013, "step": 210170 }, { "epoch": 4.278473282442748, "grad_norm": 0.03915919521595806, "learning_rate": 6.210392233508067e-07, "loss": 0.0001, "step": 210180 }, { "epoch": 4.278676844783715, "grad_norm": 0.00038121525468263654, "learning_rate": 6.206962845255754e-07, "loss": 0.004, "step": 210190 }, { "epoch": 4.2788804071246815, "grad_norm": 5.725149205831202, "learning_rate": 6.203534341460216e-07, "loss": 0.0049, "step": 210200 }, { "epoch": 4.279083969465649, "grad_norm": 0.005088412720939307, "learning_rate": 6.200106722190719e-07, "loss": 0.0225, "step": 210210 }, { "epoch": 4.279287531806616, "grad_norm": 0.11992816790885472, "learning_rate": 6.196679987516497e-07, "loss": 0.0005, "step": 210220 }, { "epoch": 4.279491094147582, "grad_norm": 0.009948956297056373, "learning_rate": 6.19325413750671e-07, "loss": 0.0007, "step": 210230 }, { "epoch": 4.27969465648855, "grad_norm": 0.006292036880919014, "learning_rate": 6.189829172230593e-07, "loss": 0.0001, "step": 210240 }, { "epoch": 4.2798982188295165, "grad_norm": 0.008695333039673207, "learning_rate": 6.186405091757319e-07, "loss": 0.0002, "step": 210250 }, { "epoch": 4.280101781170483, "grad_norm": 4.751304927045618e-05, "learning_rate": 6.182981896155998e-07, "loss": 0.0, "step": 210260 }, { "epoch": 4.280305343511451, "grad_norm": 0.026871964450740502, "learning_rate": 6.17955958549582e-07, "loss": 0.0444, "step": 210270 }, { "epoch": 4.280508905852417, "grad_norm": 0.0035282862562253966, "learning_rate": 6.176138159845863e-07, "loss": 0.0011, "step": 210280 }, { "epoch": 4.280712468193384, "grad_norm": 0.05843943952609155, "learning_rate": 6.172717619275226e-07, "loss": 0.0002, "step": 210290 }, { "epoch": 4.280916030534351, "grad_norm": 0.01746605840841805, "learning_rate": 6.169297963853033e-07, "loss": 0.0002, "step": 210300 }, { "epoch": 4.281119592875318, "grad_norm": 4.775078601590564, "learning_rate": 6.165879193648305e-07, "loss": 0.0261, "step": 210310 }, { "epoch": 4.281323155216285, "grad_norm": 0.008637270650820764, "learning_rate": 6.162461308730111e-07, "loss": 0.0006, "step": 210320 }, { "epoch": 4.281526717557252, "grad_norm": 7.1085768887323235, "learning_rate": 6.159044309167467e-07, "loss": 0.0159, "step": 210330 }, { "epoch": 4.281730279898219, "grad_norm": 0.005509227175093571, "learning_rate": 6.155628195029395e-07, "loss": 0.0002, "step": 210340 }, { "epoch": 4.2819338422391855, "grad_norm": 0.0034002006052689703, "learning_rate": 6.152212966384879e-07, "loss": 0.0062, "step": 210350 }, { "epoch": 4.282137404580153, "grad_norm": 0.008856813577850979, "learning_rate": 6.148798623302898e-07, "loss": 0.026, "step": 210360 }, { "epoch": 4.28234096692112, "grad_norm": 0.004686279306776706, "learning_rate": 6.145385165852413e-07, "loss": 0.0002, "step": 210370 }, { "epoch": 4.282544529262086, "grad_norm": 9.192284295286193, "learning_rate": 6.141972594102352e-07, "loss": 0.0346, "step": 210380 }, { "epoch": 4.282748091603054, "grad_norm": 0.0036170461774675342, "learning_rate": 6.138560908121649e-07, "loss": 0.1039, "step": 210390 }, { "epoch": 4.28295165394402, "grad_norm": 0.31307200514315153, "learning_rate": 6.13515010797921e-07, "loss": 0.0255, "step": 210400 }, { "epoch": 4.283155216284987, "grad_norm": 0.011515351833978334, "learning_rate": 6.13174019374389e-07, "loss": 0.01, "step": 210410 }, { "epoch": 4.283358778625955, "grad_norm": 0.0009872387023885884, "learning_rate": 6.128331165484592e-07, "loss": 0.0001, "step": 210420 }, { "epoch": 4.283562340966921, "grad_norm": 0.05661978422358458, "learning_rate": 6.124923023270163e-07, "loss": 0.0448, "step": 210430 }, { "epoch": 4.283765903307888, "grad_norm": 0.0011088686639132933, "learning_rate": 6.121515767169401e-07, "loss": 0.0001, "step": 210440 }, { "epoch": 4.283969465648855, "grad_norm": 0.008207669242840408, "learning_rate": 6.118109397251154e-07, "loss": 0.0002, "step": 210450 }, { "epoch": 4.284173027989822, "grad_norm": 12.688272332680718, "learning_rate": 6.114703913584219e-07, "loss": 0.0263, "step": 210460 }, { "epoch": 4.284376590330789, "grad_norm": 0.002807730793969281, "learning_rate": 6.111299316237352e-07, "loss": 0.0403, "step": 210470 }, { "epoch": 4.284580152671756, "grad_norm": 0.02609310691483337, "learning_rate": 6.107895605279318e-07, "loss": 0.0517, "step": 210480 }, { "epoch": 4.284783715012723, "grad_norm": 0.006107735801529755, "learning_rate": 6.104492780778871e-07, "loss": 0.0001, "step": 210490 }, { "epoch": 4.2849872773536894, "grad_norm": 0.021829058740862543, "learning_rate": 6.101090842804724e-07, "loss": 0.0001, "step": 210500 }, { "epoch": 4.285190839694656, "grad_norm": 0.012610473027553398, "learning_rate": 6.097689791425592e-07, "loss": 0.0035, "step": 210510 }, { "epoch": 4.285394402035624, "grad_norm": 0.0029526955554215538, "learning_rate": 6.094289626710165e-07, "loss": 0.0054, "step": 210520 }, { "epoch": 4.28559796437659, "grad_norm": 0.011157823726976516, "learning_rate": 6.090890348727102e-07, "loss": 0.0334, "step": 210530 }, { "epoch": 4.285801526717557, "grad_norm": 0.031023039585146718, "learning_rate": 6.087491957545066e-07, "loss": 0.0012, "step": 210540 }, { "epoch": 4.286005089058524, "grad_norm": 0.0033811047649737387, "learning_rate": 6.084094453232692e-07, "loss": 0.0003, "step": 210550 }, { "epoch": 4.286208651399491, "grad_norm": 0.01961469362325652, "learning_rate": 6.080697835858584e-07, "loss": 0.0843, "step": 210560 }, { "epoch": 4.286412213740458, "grad_norm": 0.023001964748279886, "learning_rate": 6.077302105491361e-07, "loss": 0.0002, "step": 210570 }, { "epoch": 4.286615776081425, "grad_norm": 0.0011166277194806361, "learning_rate": 6.073907262199591e-07, "loss": 0.0361, "step": 210580 }, { "epoch": 4.286819338422392, "grad_norm": 0.002716416872094187, "learning_rate": 6.070513306051851e-07, "loss": 0.0044, "step": 210590 }, { "epoch": 4.2870229007633585, "grad_norm": 0.0002176125511602999, "learning_rate": 6.067120237116653e-07, "loss": 0.0021, "step": 210600 }, { "epoch": 4.287226463104326, "grad_norm": 0.008738026020203581, "learning_rate": 6.063728055462553e-07, "loss": 0.0008, "step": 210610 }, { "epoch": 4.287430025445293, "grad_norm": 0.005725844828811551, "learning_rate": 6.06033676115807e-07, "loss": 0.0216, "step": 210620 }, { "epoch": 4.287633587786259, "grad_norm": 0.005163027497159417, "learning_rate": 6.056946354271659e-07, "loss": 0.0001, "step": 210630 }, { "epoch": 4.287837150127227, "grad_norm": 0.41036185404911374, "learning_rate": 6.053556834871837e-07, "loss": 0.0002, "step": 210640 }, { "epoch": 4.288040712468193, "grad_norm": 0.003985164329446471, "learning_rate": 6.050168203027023e-07, "loss": 0.0001, "step": 210650 }, { "epoch": 4.28824427480916, "grad_norm": 52.17650687759172, "learning_rate": 6.046780458805662e-07, "loss": 0.044, "step": 210660 }, { "epoch": 4.288447837150128, "grad_norm": 0.000641210955583961, "learning_rate": 6.043393602276198e-07, "loss": 0.0001, "step": 210670 }, { "epoch": 4.288651399491094, "grad_norm": 0.8132478929429887, "learning_rate": 6.040007633507012e-07, "loss": 0.0007, "step": 210680 }, { "epoch": 4.288854961832061, "grad_norm": 8.865512339264368, "learning_rate": 6.036622552566485e-07, "loss": 0.0063, "step": 210690 }, { "epoch": 4.289058524173028, "grad_norm": 0.004798122460134967, "learning_rate": 6.033238359522992e-07, "loss": 0.0001, "step": 210700 }, { "epoch": 4.289262086513995, "grad_norm": 0.021457058153884514, "learning_rate": 6.029855054444883e-07, "loss": 0.0002, "step": 210710 }, { "epoch": 4.289465648854962, "grad_norm": 0.06108177325304803, "learning_rate": 6.026472637400482e-07, "loss": 0.0001, "step": 210720 }, { "epoch": 4.289669211195929, "grad_norm": 0.006119818264914564, "learning_rate": 6.023091108458101e-07, "loss": 0.0001, "step": 210730 }, { "epoch": 4.289872773536896, "grad_norm": 0.005863083881517421, "learning_rate": 6.019710467686041e-07, "loss": 0.0271, "step": 210740 }, { "epoch": 4.290076335877862, "grad_norm": 0.0029453195792686977, "learning_rate": 6.016330715152568e-07, "loss": 0.0001, "step": 210750 }, { "epoch": 4.290279898218829, "grad_norm": 0.00726867531641675, "learning_rate": 6.012951850925952e-07, "loss": 0.0263, "step": 210760 }, { "epoch": 4.290483460559797, "grad_norm": 0.0014837689459866513, "learning_rate": 6.009573875074437e-07, "loss": 0.0516, "step": 210770 }, { "epoch": 4.290687022900763, "grad_norm": 0.0043745987159499265, "learning_rate": 6.006196787666218e-07, "loss": 0.0001, "step": 210780 }, { "epoch": 4.29089058524173, "grad_norm": 0.005677924744463943, "learning_rate": 6.002820588769526e-07, "loss": 0.0033, "step": 210790 }, { "epoch": 4.291094147582697, "grad_norm": 4.803224120680963e-05, "learning_rate": 5.999445278452554e-07, "loss": 0.0192, "step": 210800 }, { "epoch": 4.291297709923664, "grad_norm": 0.06066104068226329, "learning_rate": 5.996070856783432e-07, "loss": 0.0472, "step": 210810 }, { "epoch": 4.291501272264631, "grad_norm": 0.06310517150649951, "learning_rate": 5.992697323830349e-07, "loss": 0.0383, "step": 210820 }, { "epoch": 4.291704834605598, "grad_norm": 0.0029011849317494747, "learning_rate": 5.989324679661435e-07, "loss": 0.0001, "step": 210830 }, { "epoch": 4.291908396946565, "grad_norm": 0.001002012765667002, "learning_rate": 5.985952924344768e-07, "loss": 0.0006, "step": 210840 }, { "epoch": 4.292111959287531, "grad_norm": 0.14451401427210187, "learning_rate": 5.9825820579485e-07, "loss": 0.0179, "step": 210850 }, { "epoch": 4.292315521628499, "grad_norm": 0.002763236159572974, "learning_rate": 5.979212080540664e-07, "loss": 0.0148, "step": 210860 }, { "epoch": 4.292519083969466, "grad_norm": 0.011459488135715687, "learning_rate": 5.975842992189345e-07, "loss": 0.0334, "step": 210870 }, { "epoch": 4.292722646310432, "grad_norm": 7.297826119960671, "learning_rate": 5.972474792962573e-07, "loss": 0.0271, "step": 210880 }, { "epoch": 4.2929262086514, "grad_norm": 0.002289505897962172, "learning_rate": 5.969107482928387e-07, "loss": 0.0001, "step": 210890 }, { "epoch": 4.293129770992366, "grad_norm": 0.03468647810877191, "learning_rate": 5.965741062154784e-07, "loss": 0.0264, "step": 210900 }, { "epoch": 4.293333333333333, "grad_norm": 0.028742072563463818, "learning_rate": 5.962375530709752e-07, "loss": 0.0001, "step": 210910 }, { "epoch": 4.2935368956743005, "grad_norm": 0.07411546820927928, "learning_rate": 5.959010888661271e-07, "loss": 0.0057, "step": 210920 }, { "epoch": 4.293740458015267, "grad_norm": 0.003237783839184927, "learning_rate": 5.955647136077286e-07, "loss": 0.049, "step": 210930 }, { "epoch": 4.293944020356234, "grad_norm": 0.002499651436089886, "learning_rate": 5.952284273025737e-07, "loss": 0.0089, "step": 210940 }, { "epoch": 4.294147582697201, "grad_norm": 0.12294294802106638, "learning_rate": 5.948922299574538e-07, "loss": 0.0312, "step": 210950 }, { "epoch": 4.294351145038168, "grad_norm": 8.554997067446463, "learning_rate": 5.945561215791596e-07, "loss": 0.0397, "step": 210960 }, { "epoch": 4.294554707379135, "grad_norm": 0.0030569855998796194, "learning_rate": 5.942201021744781e-07, "loss": 0.0003, "step": 210970 }, { "epoch": 4.294758269720102, "grad_norm": 0.2979397141867509, "learning_rate": 5.93884171750197e-07, "loss": 0.0347, "step": 210980 }, { "epoch": 4.294961832061069, "grad_norm": 0.004504011869283784, "learning_rate": 5.935483303130996e-07, "loss": 0.0003, "step": 210990 }, { "epoch": 4.295165394402035, "grad_norm": 0.007965130381056292, "learning_rate": 5.932125778699688e-07, "loss": 0.0002, "step": 211000 }, { "epoch": 4.295368956743003, "grad_norm": 0.007293492654462975, "learning_rate": 5.928769144275875e-07, "loss": 0.0086, "step": 211010 }, { "epoch": 4.2955725190839695, "grad_norm": 0.028500374840408276, "learning_rate": 5.925413399927321e-07, "loss": 0.0155, "step": 211020 }, { "epoch": 4.295776081424936, "grad_norm": 0.0004699152560682006, "learning_rate": 5.922058545721798e-07, "loss": 0.0001, "step": 211030 }, { "epoch": 4.295979643765904, "grad_norm": 0.04573550266162991, "learning_rate": 5.918704581727097e-07, "loss": 0.0004, "step": 211040 }, { "epoch": 4.29618320610687, "grad_norm": 0.0016686606025764785, "learning_rate": 5.915351508010919e-07, "loss": 0.0551, "step": 211050 }, { "epoch": 4.296386768447837, "grad_norm": 0.0163892844387053, "learning_rate": 5.911999324641005e-07, "loss": 0.079, "step": 211060 }, { "epoch": 4.2965903307888045, "grad_norm": 0.010663588957712248, "learning_rate": 5.908648031685043e-07, "loss": 0.0001, "step": 211070 }, { "epoch": 4.296793893129771, "grad_norm": 0.0045287815296553725, "learning_rate": 5.905297629210727e-07, "loss": 0.0396, "step": 211080 }, { "epoch": 4.296997455470738, "grad_norm": 0.0832766807067292, "learning_rate": 5.901948117285722e-07, "loss": 0.0002, "step": 211090 }, { "epoch": 4.297201017811705, "grad_norm": 0.4461850758639586, "learning_rate": 5.89859949597767e-07, "loss": 0.0098, "step": 211100 }, { "epoch": 4.297404580152672, "grad_norm": 0.010672344302714375, "learning_rate": 5.895251765354199e-07, "loss": 0.0097, "step": 211110 }, { "epoch": 4.2976081424936385, "grad_norm": 0.03345137673641765, "learning_rate": 5.891904925482927e-07, "loss": 0.0001, "step": 211120 }, { "epoch": 4.297811704834606, "grad_norm": 0.22635329190193842, "learning_rate": 5.888558976431447e-07, "loss": 0.0019, "step": 211130 }, { "epoch": 4.298015267175573, "grad_norm": 0.005867346755125345, "learning_rate": 5.885213918267346e-07, "loss": 0.0291, "step": 211140 }, { "epoch": 4.298218829516539, "grad_norm": 0.020892198964923534, "learning_rate": 5.881869751058144e-07, "loss": 0.0003, "step": 211150 }, { "epoch": 4.298422391857506, "grad_norm": 0.008028593401497287, "learning_rate": 5.878526474871421e-07, "loss": 0.0408, "step": 211160 }, { "epoch": 4.2986259541984735, "grad_norm": 0.013140326924905433, "learning_rate": 5.875184089774694e-07, "loss": 0.0006, "step": 211170 }, { "epoch": 4.29882951653944, "grad_norm": 0.04359888746466261, "learning_rate": 5.871842595835436e-07, "loss": 0.0002, "step": 211180 }, { "epoch": 4.299033078880407, "grad_norm": 86.16723658403318, "learning_rate": 5.868501993121162e-07, "loss": 0.0115, "step": 211190 }, { "epoch": 4.299236641221374, "grad_norm": 0.006217876555664232, "learning_rate": 5.865162281699338e-07, "loss": 0.0003, "step": 211200 }, { "epoch": 4.299440203562341, "grad_norm": 0.33729775608234364, "learning_rate": 5.861823461637395e-07, "loss": 0.0449, "step": 211210 }, { "epoch": 4.2996437659033075, "grad_norm": 0.003591820413900907, "learning_rate": 5.858485533002789e-07, "loss": 0.0003, "step": 211220 }, { "epoch": 4.299847328244275, "grad_norm": 0.0023865423409352056, "learning_rate": 5.855148495862917e-07, "loss": 0.0246, "step": 211230 }, { "epoch": 4.300050890585242, "grad_norm": 0.00244961284677992, "learning_rate": 5.851812350285164e-07, "loss": 0.0342, "step": 211240 }, { "epoch": 4.300254452926208, "grad_norm": 0.01867446271242562, "learning_rate": 5.848477096336946e-07, "loss": 0.0001, "step": 211250 }, { "epoch": 4.300458015267176, "grad_norm": 0.009741358926196498, "learning_rate": 5.84514273408559e-07, "loss": 0.0109, "step": 211260 }, { "epoch": 4.3006615776081425, "grad_norm": 0.006048480885046237, "learning_rate": 5.841809263598435e-07, "loss": 0.0001, "step": 211270 }, { "epoch": 4.300865139949109, "grad_norm": 0.0033797920142787717, "learning_rate": 5.838476684942845e-07, "loss": 0.0002, "step": 211280 }, { "epoch": 4.301068702290077, "grad_norm": 0.1546456580247777, "learning_rate": 5.835144998186082e-07, "loss": 0.0008, "step": 211290 }, { "epoch": 4.301272264631043, "grad_norm": 0.02127909884773229, "learning_rate": 5.831814203395447e-07, "loss": 0.0014, "step": 211300 }, { "epoch": 4.30147582697201, "grad_norm": 0.005393291532558031, "learning_rate": 5.828484300638215e-07, "loss": 0.0152, "step": 211310 }, { "epoch": 4.301679389312977, "grad_norm": 0.006958097448845219, "learning_rate": 5.825155289981638e-07, "loss": 0.0002, "step": 211320 }, { "epoch": 4.301882951653944, "grad_norm": 0.044551814775576445, "learning_rate": 5.821827171492944e-07, "loss": 0.0004, "step": 211330 }, { "epoch": 4.302086513994911, "grad_norm": 0.18153864512291693, "learning_rate": 5.818499945239353e-07, "loss": 0.0027, "step": 211340 }, { "epoch": 4.302290076335878, "grad_norm": 0.0022086564589767093, "learning_rate": 5.815173611288067e-07, "loss": 0.0019, "step": 211350 }, { "epoch": 4.302493638676845, "grad_norm": 2.265656540474832, "learning_rate": 5.811848169706241e-07, "loss": 0.016, "step": 211360 }, { "epoch": 4.3026972010178115, "grad_norm": 0.004504215517699602, "learning_rate": 5.808523620561068e-07, "loss": 0.0227, "step": 211370 }, { "epoch": 4.302900763358779, "grad_norm": 0.0030779082405792816, "learning_rate": 5.805199963919688e-07, "loss": 0.0101, "step": 211380 }, { "epoch": 4.303104325699746, "grad_norm": 0.003882167920063119, "learning_rate": 5.801877199849193e-07, "loss": 0.0287, "step": 211390 }, { "epoch": 4.303307888040712, "grad_norm": 0.06489986325039164, "learning_rate": 5.798555328416727e-07, "loss": 0.0152, "step": 211400 }, { "epoch": 4.303511450381679, "grad_norm": 0.009037292727896457, "learning_rate": 5.795234349689383e-07, "loss": 0.0303, "step": 211410 }, { "epoch": 4.3037150127226464, "grad_norm": 0.017388452860435784, "learning_rate": 5.791914263734188e-07, "loss": 0.0001, "step": 211420 }, { "epoch": 4.303918575063613, "grad_norm": 0.06521045163792721, "learning_rate": 5.788595070618247e-07, "loss": 0.0685, "step": 211430 }, { "epoch": 4.30412213740458, "grad_norm": 0.03410221939504741, "learning_rate": 5.785276770408565e-07, "loss": 0.0001, "step": 211440 }, { "epoch": 4.304325699745547, "grad_norm": 0.0007507936108161772, "learning_rate": 5.781959363172163e-07, "loss": 0.0, "step": 211450 }, { "epoch": 4.304529262086514, "grad_norm": 0.0001942915494862238, "learning_rate": 5.778642848976046e-07, "loss": 0.0424, "step": 211460 }, { "epoch": 4.3047328244274805, "grad_norm": 0.08150747727364702, "learning_rate": 5.775327227887195e-07, "loss": 0.0237, "step": 211470 }, { "epoch": 4.304936386768448, "grad_norm": 0.0066331469163397475, "learning_rate": 5.772012499972568e-07, "loss": 0.0, "step": 211480 }, { "epoch": 4.305139949109415, "grad_norm": 0.11008659797139543, "learning_rate": 5.768698665299116e-07, "loss": 0.0002, "step": 211490 }, { "epoch": 4.305343511450381, "grad_norm": 0.0019784954675040054, "learning_rate": 5.765385723933765e-07, "loss": 0.045, "step": 211500 }, { "epoch": 4.305547073791349, "grad_norm": 0.0012070325951024975, "learning_rate": 5.76207367594342e-07, "loss": 0.0001, "step": 211510 }, { "epoch": 4.3057506361323155, "grad_norm": 0.017318768159267073, "learning_rate": 5.758762521394979e-07, "loss": 0.0001, "step": 211520 }, { "epoch": 4.305954198473282, "grad_norm": 0.003708284864865279, "learning_rate": 5.755452260355305e-07, "loss": 0.0001, "step": 211530 }, { "epoch": 4.30615776081425, "grad_norm": 0.005637029373662565, "learning_rate": 5.752142892891266e-07, "loss": 0.0177, "step": 211540 }, { "epoch": 4.306361323155216, "grad_norm": 0.0032320215205023746, "learning_rate": 5.748834419069687e-07, "loss": 0.0005, "step": 211550 }, { "epoch": 4.306564885496183, "grad_norm": 0.008356395859754591, "learning_rate": 5.745526838957394e-07, "loss": 0.0003, "step": 211560 }, { "epoch": 4.30676844783715, "grad_norm": 0.009606868624984291, "learning_rate": 5.742220152621197e-07, "loss": 0.0466, "step": 211570 }, { "epoch": 4.306972010178117, "grad_norm": 0.004296295471034534, "learning_rate": 5.738914360127846e-07, "loss": 0.0115, "step": 211580 }, { "epoch": 4.307175572519084, "grad_norm": 1.2893800738842631, "learning_rate": 5.735609461544151e-07, "loss": 0.0005, "step": 211590 }, { "epoch": 4.307379134860051, "grad_norm": 0.002244240508264661, "learning_rate": 5.732305456936821e-07, "loss": 0.0002, "step": 211600 }, { "epoch": 4.307582697201018, "grad_norm": 4.625991717909388, "learning_rate": 5.729002346372586e-07, "loss": 0.0014, "step": 211610 }, { "epoch": 4.3077862595419845, "grad_norm": 0.0021076871687800785, "learning_rate": 5.725700129918193e-07, "loss": 0.0203, "step": 211620 }, { "epoch": 4.307989821882952, "grad_norm": 0.00018455557925494513, "learning_rate": 5.7223988076403e-07, "loss": 0.0003, "step": 211630 }, { "epoch": 4.308193384223919, "grad_norm": 0.08091934703898955, "learning_rate": 5.719098379605586e-07, "loss": 0.0006, "step": 211640 }, { "epoch": 4.308396946564885, "grad_norm": 0.00135124309613071, "learning_rate": 5.715798845880727e-07, "loss": 0.0023, "step": 211650 }, { "epoch": 4.308600508905853, "grad_norm": 0.003570765255653587, "learning_rate": 5.712500206532346e-07, "loss": 0.0057, "step": 211660 }, { "epoch": 4.308804071246819, "grad_norm": 0.028022403670687385, "learning_rate": 5.709202461627056e-07, "loss": 0.0001, "step": 211670 }, { "epoch": 4.309007633587786, "grad_norm": 0.008471141737200094, "learning_rate": 5.705905611231477e-07, "loss": 0.0271, "step": 211680 }, { "epoch": 4.309211195928754, "grad_norm": 0.0025383258254843897, "learning_rate": 5.702609655412178e-07, "loss": 0.0335, "step": 211690 }, { "epoch": 4.30941475826972, "grad_norm": 0.00013282542606656768, "learning_rate": 5.699314594235734e-07, "loss": 0.0001, "step": 211700 }, { "epoch": 4.309618320610687, "grad_norm": 0.026625279638836544, "learning_rate": 5.696020427768689e-07, "loss": 0.0001, "step": 211710 }, { "epoch": 4.309821882951654, "grad_norm": 0.003201268796766343, "learning_rate": 5.692727156077588e-07, "loss": 0.0003, "step": 211720 }, { "epoch": 4.310025445292621, "grad_norm": 0.01894674313671001, "learning_rate": 5.689434779228908e-07, "loss": 0.0003, "step": 211730 }, { "epoch": 4.310229007633588, "grad_norm": 0.002134198374567204, "learning_rate": 5.686143297289176e-07, "loss": 0.0002, "step": 211740 }, { "epoch": 4.310432569974555, "grad_norm": 0.005835494233364691, "learning_rate": 5.68285271032486e-07, "loss": 0.086, "step": 211750 }, { "epoch": 4.310636132315522, "grad_norm": 0.014881808907718263, "learning_rate": 5.6795630184024e-07, "loss": 0.0002, "step": 211760 }, { "epoch": 4.310839694656488, "grad_norm": 0.0010655655106088027, "learning_rate": 5.676274221588257e-07, "loss": 0.0054, "step": 211770 }, { "epoch": 4.311043256997456, "grad_norm": 0.0871726203632292, "learning_rate": 5.672986319948854e-07, "loss": 0.0019, "step": 211780 }, { "epoch": 4.311246819338423, "grad_norm": 0.004161617486469197, "learning_rate": 5.669699313550564e-07, "loss": 0.0358, "step": 211790 }, { "epoch": 4.311450381679389, "grad_norm": 0.0009179555174581764, "learning_rate": 5.666413202459814e-07, "loss": 0.0388, "step": 211800 }, { "epoch": 4.311653944020356, "grad_norm": 0.0011088729523621968, "learning_rate": 5.663127986742938e-07, "loss": 0.0628, "step": 211810 }, { "epoch": 4.311857506361323, "grad_norm": 0.0859928874440019, "learning_rate": 5.659843666466292e-07, "loss": 0.0006, "step": 211820 }, { "epoch": 4.31206106870229, "grad_norm": 0.097843942973926, "learning_rate": 5.656560241696235e-07, "loss": 0.0006, "step": 211830 }, { "epoch": 4.312264631043257, "grad_norm": 0.02537837072082626, "learning_rate": 5.653277712499044e-07, "loss": 0.0028, "step": 211840 }, { "epoch": 4.312468193384224, "grad_norm": 0.0009314127793135537, "learning_rate": 5.649996078941028e-07, "loss": 0.0244, "step": 211850 }, { "epoch": 4.312671755725191, "grad_norm": 0.015022731439445861, "learning_rate": 5.646715341088465e-07, "loss": 0.0227, "step": 211860 }, { "epoch": 4.312875318066157, "grad_norm": 0.0192386674647903, "learning_rate": 5.64343549900761e-07, "loss": 0.0002, "step": 211870 }, { "epoch": 4.313078880407125, "grad_norm": 0.2394041595567437, "learning_rate": 5.640156552764709e-07, "loss": 0.0002, "step": 211880 }, { "epoch": 4.313282442748092, "grad_norm": 0.005587590122847772, "learning_rate": 5.636878502425979e-07, "loss": 0.035, "step": 211890 }, { "epoch": 4.313486005089058, "grad_norm": 0.012319087844261967, "learning_rate": 5.633601348057626e-07, "loss": 0.0005, "step": 211900 }, { "epoch": 4.313689567430026, "grad_norm": 0.0032239091848346646, "learning_rate": 5.630325089725841e-07, "loss": 0.0001, "step": 211910 }, { "epoch": 4.313893129770992, "grad_norm": 1.398062171655502, "learning_rate": 5.627049727496786e-07, "loss": 0.0005, "step": 211920 }, { "epoch": 4.314096692111959, "grad_norm": 0.061618059167205634, "learning_rate": 5.623775261436615e-07, "loss": 0.0001, "step": 211930 }, { "epoch": 4.3143002544529265, "grad_norm": 4.566076303424085, "learning_rate": 5.620501691611458e-07, "loss": 0.0797, "step": 211940 }, { "epoch": 4.314503816793893, "grad_norm": 0.04967349827606156, "learning_rate": 5.617229018087427e-07, "loss": 0.0349, "step": 211950 }, { "epoch": 4.31470737913486, "grad_norm": 0.0037265082574263844, "learning_rate": 5.613957240930634e-07, "loss": 0.0, "step": 211960 }, { "epoch": 4.314910941475827, "grad_norm": 0.001657137079920178, "learning_rate": 5.610686360207118e-07, "loss": 0.0436, "step": 211970 }, { "epoch": 4.315114503816794, "grad_norm": 0.0011667185261895153, "learning_rate": 5.607416375982983e-07, "loss": 0.0005, "step": 211980 }, { "epoch": 4.315318066157761, "grad_norm": 10.524118689627013, "learning_rate": 5.604147288324258e-07, "loss": 0.05, "step": 211990 }, { "epoch": 4.315521628498728, "grad_norm": 0.010050334304639454, "learning_rate": 5.600879097296947e-07, "loss": 0.0416, "step": 212000 }, { "epoch": 4.315725190839695, "grad_norm": 0.014038380644351919, "learning_rate": 5.597611802967057e-07, "loss": 0.0004, "step": 212010 }, { "epoch": 4.315928753180661, "grad_norm": 0.012578411035696806, "learning_rate": 5.594345405400615e-07, "loss": 0.0003, "step": 212020 }, { "epoch": 4.316132315521629, "grad_norm": 0.0042389860567679785, "learning_rate": 5.591079904663544e-07, "loss": 0.0002, "step": 212030 }, { "epoch": 4.3163358778625955, "grad_norm": 0.006305281699129493, "learning_rate": 5.587815300821814e-07, "loss": 0.0342, "step": 212040 }, { "epoch": 4.316539440203562, "grad_norm": 0.03939119288706447, "learning_rate": 5.584551593941357e-07, "loss": 0.0042, "step": 212050 }, { "epoch": 4.316743002544529, "grad_norm": 0.010467860344706545, "learning_rate": 5.581288784088084e-07, "loss": 0.0007, "step": 212060 }, { "epoch": 4.316946564885496, "grad_norm": 0.004779837519164028, "learning_rate": 5.578026871327902e-07, "loss": 0.0504, "step": 212070 }, { "epoch": 4.317150127226463, "grad_norm": 0.01514933861244911, "learning_rate": 5.574765855726677e-07, "loss": 0.0012, "step": 212080 }, { "epoch": 4.31735368956743, "grad_norm": 0.012058157728367491, "learning_rate": 5.571505737350274e-07, "loss": 0.0, "step": 212090 }, { "epoch": 4.317557251908397, "grad_norm": 0.013456665756891924, "learning_rate": 5.568246516264542e-07, "loss": 0.046, "step": 212100 }, { "epoch": 4.317760814249364, "grad_norm": 0.009140108287044492, "learning_rate": 5.564988192535298e-07, "loss": 0.013, "step": 212110 }, { "epoch": 4.31796437659033, "grad_norm": 12.771548166261619, "learning_rate": 5.561730766228357e-07, "loss": 0.0295, "step": 212120 }, { "epoch": 4.318167938931298, "grad_norm": 0.006602679027612855, "learning_rate": 5.558474237409478e-07, "loss": 0.0164, "step": 212130 }, { "epoch": 4.3183715012722645, "grad_norm": 0.008868101691154347, "learning_rate": 5.555218606144464e-07, "loss": 0.0152, "step": 212140 }, { "epoch": 4.318575063613231, "grad_norm": 10.601043943596823, "learning_rate": 5.551963872499061e-07, "loss": 0.0527, "step": 212150 }, { "epoch": 4.318778625954199, "grad_norm": 0.03379736680152841, "learning_rate": 5.548710036538979e-07, "loss": 0.0002, "step": 212160 }, { "epoch": 4.318982188295165, "grad_norm": 0.028816348861598676, "learning_rate": 5.54545709832997e-07, "loss": 0.0006, "step": 212170 }, { "epoch": 4.319185750636132, "grad_norm": 0.005089157851859876, "learning_rate": 5.542205057937705e-07, "loss": 0.042, "step": 212180 }, { "epoch": 4.3193893129770995, "grad_norm": 0.011687781060842362, "learning_rate": 5.538953915427858e-07, "loss": 0.001, "step": 212190 }, { "epoch": 4.319592875318066, "grad_norm": 0.001241775465229056, "learning_rate": 5.535703670866121e-07, "loss": 0.0636, "step": 212200 }, { "epoch": 4.319796437659033, "grad_norm": 0.004989600436140292, "learning_rate": 5.532454324318109e-07, "loss": 0.0022, "step": 212210 }, { "epoch": 4.32, "grad_norm": 0.08003116270161988, "learning_rate": 5.529205875849447e-07, "loss": 0.0002, "step": 212220 }, { "epoch": 4.320203562340967, "grad_norm": 0.0013177471109448047, "learning_rate": 5.525958325525771e-07, "loss": 0.0591, "step": 212230 }, { "epoch": 4.3204071246819336, "grad_norm": 0.00904759750974617, "learning_rate": 5.522711673412634e-07, "loss": 0.0003, "step": 212240 }, { "epoch": 4.320610687022901, "grad_norm": 0.009091555782960403, "learning_rate": 5.519465919575623e-07, "loss": 0.0341, "step": 212250 }, { "epoch": 4.320814249363868, "grad_norm": 0.001471483671808592, "learning_rate": 5.516221064080291e-07, "loss": 0.0202, "step": 212260 }, { "epoch": 4.321017811704834, "grad_norm": 0.005130306151312079, "learning_rate": 5.512977106992168e-07, "loss": 0.0001, "step": 212270 }, { "epoch": 4.321221374045802, "grad_norm": 0.0048259518379093, "learning_rate": 5.509734048376769e-07, "loss": 0.0216, "step": 212280 }, { "epoch": 4.3214249363867685, "grad_norm": 0.0022857509389199895, "learning_rate": 5.506491888299597e-07, "loss": 0.08, "step": 212290 }, { "epoch": 4.321628498727735, "grad_norm": 0.02868780376158193, "learning_rate": 5.503250626826129e-07, "loss": 0.0002, "step": 212300 }, { "epoch": 4.321832061068703, "grad_norm": 0.00989204460016907, "learning_rate": 5.500010264021821e-07, "loss": 0.0001, "step": 212310 }, { "epoch": 4.322035623409669, "grad_norm": 0.013173870606013688, "learning_rate": 5.496770799952122e-07, "loss": 0.0005, "step": 212320 }, { "epoch": 4.322239185750636, "grad_norm": 0.0011829551943939372, "learning_rate": 5.493532234682469e-07, "loss": 0.0001, "step": 212330 }, { "epoch": 4.3224427480916034, "grad_norm": 11.043798140001952, "learning_rate": 5.490294568278237e-07, "loss": 0.0376, "step": 212340 }, { "epoch": 4.32264631043257, "grad_norm": 0.014010660916994967, "learning_rate": 5.487057800804846e-07, "loss": 0.0396, "step": 212350 }, { "epoch": 4.322849872773537, "grad_norm": 0.17482711039698107, "learning_rate": 5.483821932327659e-07, "loss": 0.0004, "step": 212360 }, { "epoch": 4.323053435114504, "grad_norm": 0.02152966245263039, "learning_rate": 5.48058696291201e-07, "loss": 0.02, "step": 212370 }, { "epoch": 4.323256997455471, "grad_norm": 0.024205059753332522, "learning_rate": 5.477352892623255e-07, "loss": 0.0072, "step": 212380 }, { "epoch": 4.3234605597964375, "grad_norm": 0.013540953067259826, "learning_rate": 5.474119721526716e-07, "loss": 0.0531, "step": 212390 }, { "epoch": 4.323664122137405, "grad_norm": 0.001745462073677102, "learning_rate": 5.470887449687668e-07, "loss": 0.0001, "step": 212400 }, { "epoch": 4.323867684478372, "grad_norm": 11.132268362919316, "learning_rate": 5.467656077171396e-07, "loss": 0.0553, "step": 212410 }, { "epoch": 4.324071246819338, "grad_norm": 0.1280592827152019, "learning_rate": 5.464425604043172e-07, "loss": 0.0319, "step": 212420 }, { "epoch": 4.324274809160305, "grad_norm": 0.01576270639947119, "learning_rate": 5.461196030368232e-07, "loss": 0.0001, "step": 212430 }, { "epoch": 4.3244783715012725, "grad_norm": 0.006702981422768387, "learning_rate": 5.457967356211802e-07, "loss": 0.0001, "step": 212440 }, { "epoch": 4.324681933842239, "grad_norm": 0.01981886230707644, "learning_rate": 5.45473958163909e-07, "loss": 0.0001, "step": 212450 }, { "epoch": 4.324885496183206, "grad_norm": 0.0019124679153606394, "learning_rate": 5.451512706715289e-07, "loss": 0.0397, "step": 212460 }, { "epoch": 4.325089058524173, "grad_norm": 0.006966442822987328, "learning_rate": 5.44828673150557e-07, "loss": 0.0387, "step": 212470 }, { "epoch": 4.32529262086514, "grad_norm": 0.06909315223565195, "learning_rate": 5.445061656075079e-07, "loss": 0.0149, "step": 212480 }, { "epoch": 4.3254961832061065, "grad_norm": 0.0038560509966999246, "learning_rate": 5.441837480488954e-07, "loss": 0.0009, "step": 212490 }, { "epoch": 4.325699745547074, "grad_norm": 0.009350959340047085, "learning_rate": 5.438614204812309e-07, "loss": 0.0001, "step": 212500 }, { "epoch": 4.325903307888041, "grad_norm": 0.01880605556898929, "learning_rate": 5.435391829110248e-07, "loss": 0.0002, "step": 212510 }, { "epoch": 4.326106870229007, "grad_norm": 0.013787653006175793, "learning_rate": 5.432170353447847e-07, "loss": 0.0521, "step": 212520 }, { "epoch": 4.326310432569975, "grad_norm": 0.2851899937469601, "learning_rate": 5.428949777890169e-07, "loss": 0.0091, "step": 212530 }, { "epoch": 4.3265139949109415, "grad_norm": 0.010827781582068978, "learning_rate": 5.425730102502269e-07, "loss": 0.0002, "step": 212540 }, { "epoch": 4.326717557251908, "grad_norm": 0.017183975042802432, "learning_rate": 5.422511327349151e-07, "loss": 0.0095, "step": 212550 }, { "epoch": 4.326921119592876, "grad_norm": 0.0018930547587374664, "learning_rate": 5.419293452495822e-07, "loss": 0.0003, "step": 212560 }, { "epoch": 4.327124681933842, "grad_norm": 0.008098111991728768, "learning_rate": 5.416076478007298e-07, "loss": 0.0001, "step": 212570 }, { "epoch": 4.327328244274809, "grad_norm": 0.01128301069787225, "learning_rate": 5.412860403948522e-07, "loss": 0.0352, "step": 212580 }, { "epoch": 4.327531806615776, "grad_norm": 0.08676435632538351, "learning_rate": 5.409645230384453e-07, "loss": 0.0071, "step": 212590 }, { "epoch": 4.327735368956743, "grad_norm": 0.05380946436054417, "learning_rate": 5.406430957380049e-07, "loss": 0.022, "step": 212600 }, { "epoch": 4.32793893129771, "grad_norm": 0.04111599153708132, "learning_rate": 5.403217585000197e-07, "loss": 0.0001, "step": 212610 }, { "epoch": 4.328142493638677, "grad_norm": 0.03175989781605875, "learning_rate": 5.400005113309803e-07, "loss": 0.0121, "step": 212620 }, { "epoch": 4.328346055979644, "grad_norm": 0.0084209606916591, "learning_rate": 5.396793542373758e-07, "loss": 0.0351, "step": 212630 }, { "epoch": 4.3285496183206105, "grad_norm": 0.020243922577520123, "learning_rate": 5.393582872256908e-07, "loss": 0.0003, "step": 212640 }, { "epoch": 4.328753180661578, "grad_norm": 0.011398401419220647, "learning_rate": 5.390373103024111e-07, "loss": 0.0275, "step": 212650 }, { "epoch": 4.328956743002545, "grad_norm": 0.018939547313584142, "learning_rate": 5.387164234740183e-07, "loss": 0.0002, "step": 212660 }, { "epoch": 4.329160305343511, "grad_norm": 0.13731137042879404, "learning_rate": 5.383956267469931e-07, "loss": 0.0012, "step": 212670 }, { "epoch": 4.329363867684479, "grad_norm": 0.008675479756521363, "learning_rate": 5.380749201278151e-07, "loss": 0.0003, "step": 212680 }, { "epoch": 4.329567430025445, "grad_norm": 0.02305902227454172, "learning_rate": 5.37754303622961e-07, "loss": 0.0454, "step": 212690 }, { "epoch": 4.329770992366412, "grad_norm": 0.004164851568443806, "learning_rate": 5.374337772389071e-07, "loss": 0.0003, "step": 212700 }, { "epoch": 4.329974554707379, "grad_norm": 3.366801881563928, "learning_rate": 5.371133409821233e-07, "loss": 0.043, "step": 212710 }, { "epoch": 4.330178117048346, "grad_norm": 0.003641608246790356, "learning_rate": 5.367929948590856e-07, "loss": 0.0308, "step": 212720 }, { "epoch": 4.330381679389313, "grad_norm": 0.016640883457939102, "learning_rate": 5.364727388762625e-07, "loss": 0.0005, "step": 212730 }, { "epoch": 4.3305852417302795, "grad_norm": 0.009366182312888008, "learning_rate": 5.36152573040119e-07, "loss": 0.0007, "step": 212740 }, { "epoch": 4.330788804071247, "grad_norm": 0.03622131316545049, "learning_rate": 5.358324973571261e-07, "loss": 0.0001, "step": 212750 }, { "epoch": 4.330992366412214, "grad_norm": 0.0023810411279073713, "learning_rate": 5.355125118337445e-07, "loss": 0.0178, "step": 212760 }, { "epoch": 4.33119592875318, "grad_norm": 0.020727248705954777, "learning_rate": 5.351926164764376e-07, "loss": 0.0191, "step": 212770 }, { "epoch": 4.331399491094148, "grad_norm": 0.02709883007243459, "learning_rate": 5.348728112916679e-07, "loss": 0.0012, "step": 212780 }, { "epoch": 4.331603053435114, "grad_norm": 0.004235175346236777, "learning_rate": 5.345530962858919e-07, "loss": 0.0037, "step": 212790 }, { "epoch": 4.331806615776081, "grad_norm": 0.0028934120006038288, "learning_rate": 5.342334714655667e-07, "loss": 0.0206, "step": 212800 }, { "epoch": 4.332010178117049, "grad_norm": 0.007990542142971579, "learning_rate": 5.339139368371499e-07, "loss": 0.0102, "step": 212810 }, { "epoch": 4.332213740458015, "grad_norm": 0.0002285995074771117, "learning_rate": 5.335944924070929e-07, "loss": 0.0248, "step": 212820 }, { "epoch": 4.332417302798982, "grad_norm": 0.026529761975223814, "learning_rate": 5.33275138181848e-07, "loss": 0.0318, "step": 212830 }, { "epoch": 4.332620865139949, "grad_norm": 0.04853400370166855, "learning_rate": 5.329558741678642e-07, "loss": 0.0025, "step": 212840 }, { "epoch": 4.332824427480916, "grad_norm": 0.026680860778795928, "learning_rate": 5.326367003715904e-07, "loss": 0.0149, "step": 212850 }, { "epoch": 4.333027989821883, "grad_norm": 0.005185460418981199, "learning_rate": 5.323176167994721e-07, "loss": 0.0001, "step": 212860 }, { "epoch": 4.33323155216285, "grad_norm": 0.0038773092511949596, "learning_rate": 5.319986234579544e-07, "loss": 0.0185, "step": 212870 }, { "epoch": 4.333435114503817, "grad_norm": 0.004023315897351353, "learning_rate": 5.316797203534784e-07, "loss": 0.0143, "step": 212880 }, { "epoch": 4.333638676844783, "grad_norm": 0.07596127084556992, "learning_rate": 5.313609074924864e-07, "loss": 0.0001, "step": 212890 }, { "epoch": 4.333842239185751, "grad_norm": 0.005601745950612351, "learning_rate": 5.310421848814162e-07, "loss": 0.0004, "step": 212900 }, { "epoch": 4.334045801526718, "grad_norm": 10.184181756528437, "learning_rate": 5.307235525267057e-07, "loss": 0.0253, "step": 212910 }, { "epoch": 4.334249363867684, "grad_norm": 0.031171963046459125, "learning_rate": 5.304050104347879e-07, "loss": 0.0002, "step": 212920 }, { "epoch": 4.334452926208652, "grad_norm": 0.07134311941422986, "learning_rate": 5.300865586120985e-07, "loss": 0.0001, "step": 212930 }, { "epoch": 4.334656488549618, "grad_norm": 48.542331104772245, "learning_rate": 5.297681970650692e-07, "loss": 0.0328, "step": 212940 }, { "epoch": 4.334860050890585, "grad_norm": 0.003310672264903195, "learning_rate": 5.294499258001268e-07, "loss": 0.067, "step": 212950 }, { "epoch": 4.3350636132315525, "grad_norm": 0.010562225944547867, "learning_rate": 5.291317448237021e-07, "loss": 0.0002, "step": 212960 }, { "epoch": 4.335267175572519, "grad_norm": 0.029324054261205008, "learning_rate": 5.288136541422217e-07, "loss": 0.0363, "step": 212970 }, { "epoch": 4.335470737913486, "grad_norm": 0.007629815948173817, "learning_rate": 5.284956537621067e-07, "loss": 0.0, "step": 212980 }, { "epoch": 4.335674300254453, "grad_norm": 0.001270324265385626, "learning_rate": 5.281777436897817e-07, "loss": 0.0004, "step": 212990 }, { "epoch": 4.33587786259542, "grad_norm": 0.00201594582107725, "learning_rate": 5.278599239316667e-07, "loss": 0.0104, "step": 213000 }, { "epoch": 4.336081424936387, "grad_norm": 0.010059713072953527, "learning_rate": 5.275421944941805e-07, "loss": 0.0002, "step": 213010 }, { "epoch": 4.336284987277354, "grad_norm": 0.005074902813071545, "learning_rate": 5.272245553837407e-07, "loss": 0.04, "step": 213020 }, { "epoch": 4.336488549618321, "grad_norm": 0.0025503631965053957, "learning_rate": 5.269070066067611e-07, "loss": 0.0068, "step": 213030 }, { "epoch": 4.336692111959287, "grad_norm": 0.015955612909643518, "learning_rate": 5.265895481696564e-07, "loss": 0.0259, "step": 213040 }, { "epoch": 4.336895674300255, "grad_norm": 0.006317264785928325, "learning_rate": 5.262721800788373e-07, "loss": 0.0001, "step": 213050 }, { "epoch": 4.3370992366412215, "grad_norm": 8.800726612209493, "learning_rate": 5.259549023407134e-07, "loss": 0.0397, "step": 213060 }, { "epoch": 4.337302798982188, "grad_norm": 0.0006575893847480379, "learning_rate": 5.256377149616932e-07, "loss": 0.0009, "step": 213070 }, { "epoch": 4.337506361323155, "grad_norm": 0.020497749768520986, "learning_rate": 5.253206179481823e-07, "loss": 0.0001, "step": 213080 }, { "epoch": 4.337709923664122, "grad_norm": 0.007404574282480901, "learning_rate": 5.250036113065842e-07, "loss": 0.0465, "step": 213090 }, { "epoch": 4.337913486005089, "grad_norm": 0.048494630116361556, "learning_rate": 5.246866950433039e-07, "loss": 0.0035, "step": 213100 }, { "epoch": 4.338117048346056, "grad_norm": 3.872164377688774, "learning_rate": 5.24369869164737e-07, "loss": 0.0114, "step": 213110 }, { "epoch": 4.338320610687023, "grad_norm": 0.005583613264492501, "learning_rate": 5.240531336772875e-07, "loss": 0.0191, "step": 213120 }, { "epoch": 4.33852417302799, "grad_norm": 0.00029194409214924053, "learning_rate": 5.237364885873491e-07, "loss": 0.0001, "step": 213130 }, { "epoch": 4.338727735368956, "grad_norm": 0.009088866884931341, "learning_rate": 5.234199339013169e-07, "loss": 0.0812, "step": 213140 }, { "epoch": 4.338931297709924, "grad_norm": 0.004926469548921591, "learning_rate": 5.231034696255866e-07, "loss": 0.0001, "step": 213150 }, { "epoch": 4.3391348600508906, "grad_norm": 0.026549674887282205, "learning_rate": 5.227870957665471e-07, "loss": 0.0511, "step": 213160 }, { "epoch": 4.339338422391857, "grad_norm": 0.03024293834955125, "learning_rate": 5.224708123305877e-07, "loss": 0.0001, "step": 213170 }, { "epoch": 4.339541984732825, "grad_norm": 0.0008685268863193816, "learning_rate": 5.221546193240989e-07, "loss": 0.0069, "step": 213180 }, { "epoch": 4.339745547073791, "grad_norm": 2.237640881429472e-05, "learning_rate": 5.218385167534645e-07, "loss": 0.0194, "step": 213190 }, { "epoch": 4.339949109414758, "grad_norm": 0.0020695431573236793, "learning_rate": 5.215225046250688e-07, "loss": 0.0278, "step": 213200 }, { "epoch": 4.3401526717557255, "grad_norm": 0.0009441975301208898, "learning_rate": 5.212065829452945e-07, "loss": 0.0003, "step": 213210 }, { "epoch": 4.340356234096692, "grad_norm": 0.0019319540902947745, "learning_rate": 5.20890751720522e-07, "loss": 0.052, "step": 213220 }, { "epoch": 4.340559796437659, "grad_norm": 0.017331128910420178, "learning_rate": 5.205750109571295e-07, "loss": 0.0373, "step": 213230 }, { "epoch": 4.340763358778626, "grad_norm": 0.012109944023776542, "learning_rate": 5.202593606614942e-07, "loss": 0.001, "step": 213240 }, { "epoch": 4.340966921119593, "grad_norm": 0.021628079088606873, "learning_rate": 5.199438008399915e-07, "loss": 0.017, "step": 213250 }, { "epoch": 4.34117048346056, "grad_norm": 0.47065956404584053, "learning_rate": 5.196283314989936e-07, "loss": 0.0117, "step": 213260 }, { "epoch": 4.341374045801527, "grad_norm": 0.01568670843914097, "learning_rate": 5.19312952644872e-07, "loss": 0.0001, "step": 213270 }, { "epoch": 4.341577608142494, "grad_norm": 0.0028569126828159076, "learning_rate": 5.189976642839978e-07, "loss": 0.0003, "step": 213280 }, { "epoch": 4.34178117048346, "grad_norm": 0.13452194491337707, "learning_rate": 5.186824664227347e-07, "loss": 0.026, "step": 213290 }, { "epoch": 4.341984732824428, "grad_norm": 11.343235148781936, "learning_rate": 5.183673590674526e-07, "loss": 0.0314, "step": 213300 }, { "epoch": 4.3421882951653945, "grad_norm": 0.0035012474292802917, "learning_rate": 5.180523422245149e-07, "loss": 0.0001, "step": 213310 }, { "epoch": 4.342391857506361, "grad_norm": 12.260649934008443, "learning_rate": 5.177374159002807e-07, "loss": 0.0325, "step": 213320 }, { "epoch": 4.342595419847328, "grad_norm": 4.845132897633315, "learning_rate": 5.174225801011135e-07, "loss": 0.0164, "step": 213330 }, { "epoch": 4.342798982188295, "grad_norm": 0.1110930932259351, "learning_rate": 5.171078348333719e-07, "loss": 0.0004, "step": 213340 }, { "epoch": 4.343002544529262, "grad_norm": 8.023525887724363, "learning_rate": 5.167931801034098e-07, "loss": 0.0627, "step": 213350 }, { "epoch": 4.343206106870229, "grad_norm": 0.03728320938184189, "learning_rate": 5.164786159175855e-07, "loss": 0.0371, "step": 213360 }, { "epoch": 4.343409669211196, "grad_norm": 0.0044243856048798574, "learning_rate": 5.161641422822488e-07, "loss": 0.0001, "step": 213370 }, { "epoch": 4.343613231552163, "grad_norm": 0.8638291020292893, "learning_rate": 5.158497592037531e-07, "loss": 0.0004, "step": 213380 }, { "epoch": 4.343816793893129, "grad_norm": 0.034360493824921505, "learning_rate": 5.155354666884466e-07, "loss": 0.0166, "step": 213390 }, { "epoch": 4.344020356234097, "grad_norm": 1.4516681034804608, "learning_rate": 5.152212647426774e-07, "loss": 0.0005, "step": 213400 }, { "epoch": 4.3442239185750635, "grad_norm": 0.0023963685342893856, "learning_rate": 5.149071533727912e-07, "loss": 0.0001, "step": 213410 }, { "epoch": 4.34442748091603, "grad_norm": 0.007403147997491759, "learning_rate": 5.145931325851316e-07, "loss": 0.033, "step": 213420 }, { "epoch": 4.344631043256998, "grad_norm": 0.019701904806094853, "learning_rate": 5.142792023860416e-07, "loss": 0.0002, "step": 213430 }, { "epoch": 4.344834605597964, "grad_norm": 0.0034853989421470715, "learning_rate": 5.139653627818597e-07, "loss": 0.0002, "step": 213440 }, { "epoch": 4.345038167938931, "grad_norm": 0.034612579488831875, "learning_rate": 5.136516137789261e-07, "loss": 0.0001, "step": 213450 }, { "epoch": 4.3452417302798985, "grad_norm": 0.015577714140942502, "learning_rate": 5.13337955383576e-07, "loss": 0.0338, "step": 213460 }, { "epoch": 4.345445292620865, "grad_norm": 0.006186484225084924, "learning_rate": 5.130243876021446e-07, "loss": 0.021, "step": 213470 }, { "epoch": 4.345648854961832, "grad_norm": 12.028061817271963, "learning_rate": 5.12710910440965e-07, "loss": 0.0566, "step": 213480 }, { "epoch": 4.345852417302799, "grad_norm": 0.010886096041388828, "learning_rate": 5.123975239063688e-07, "loss": 0.0001, "step": 213490 }, { "epoch": 4.346055979643766, "grad_norm": 10.915660136256362, "learning_rate": 5.120842280046834e-07, "loss": 0.0688, "step": 213500 }, { "epoch": 4.3462595419847325, "grad_norm": 25.504739707734096, "learning_rate": 5.117710227422379e-07, "loss": 0.0099, "step": 213510 }, { "epoch": 4.3464631043257, "grad_norm": 0.08732605079552948, "learning_rate": 5.114579081253584e-07, "loss": 0.0352, "step": 213520 }, { "epoch": 4.346666666666667, "grad_norm": 0.004649264381786145, "learning_rate": 5.111448841603667e-07, "loss": 0.0008, "step": 213530 }, { "epoch": 4.346870229007633, "grad_norm": 0.0013918135810213907, "learning_rate": 5.108319508535848e-07, "loss": 0.0374, "step": 213540 }, { "epoch": 4.347073791348601, "grad_norm": 0.09533673281524732, "learning_rate": 5.105191082113353e-07, "loss": 0.0016, "step": 213550 }, { "epoch": 4.3472773536895675, "grad_norm": 0.004325543512050185, "learning_rate": 5.102063562399346e-07, "loss": 0.0056, "step": 213560 }, { "epoch": 4.347480916030534, "grad_norm": 0.18279188772282515, "learning_rate": 5.098936949456984e-07, "loss": 0.0013, "step": 213570 }, { "epoch": 4.347684478371502, "grad_norm": 0.027125298697800424, "learning_rate": 5.09581124334943e-07, "loss": 0.0002, "step": 213580 }, { "epoch": 4.347888040712468, "grad_norm": 0.25888173190668834, "learning_rate": 5.0926864441398e-07, "loss": 0.0407, "step": 213590 }, { "epoch": 4.348091603053435, "grad_norm": 0.06046148600002255, "learning_rate": 5.089562551891208e-07, "loss": 0.0458, "step": 213600 }, { "epoch": 4.348295165394402, "grad_norm": 0.012652190699938752, "learning_rate": 5.086439566666745e-07, "loss": 0.0103, "step": 213610 }, { "epoch": 4.348498727735369, "grad_norm": 11.149766552318662, "learning_rate": 5.083317488529482e-07, "loss": 0.0387, "step": 213620 }, { "epoch": 4.348702290076336, "grad_norm": 0.005558917860499446, "learning_rate": 5.080196317542474e-07, "loss": 0.0256, "step": 213630 }, { "epoch": 4.348905852417303, "grad_norm": 0.10568692641176873, "learning_rate": 5.07707605376876e-07, "loss": 0.0139, "step": 213640 }, { "epoch": 4.34910941475827, "grad_norm": 0.023815749925509465, "learning_rate": 5.073956697271365e-07, "loss": 0.0002, "step": 213650 }, { "epoch": 4.3493129770992365, "grad_norm": 0.0181195865752687, "learning_rate": 5.070838248113258e-07, "loss": 0.0022, "step": 213660 }, { "epoch": 4.349516539440204, "grad_norm": 11.30021618950015, "learning_rate": 5.067720706357449e-07, "loss": 0.0311, "step": 213670 }, { "epoch": 4.349720101781171, "grad_norm": 9.68284525162107, "learning_rate": 5.064604072066903e-07, "loss": 0.0421, "step": 213680 }, { "epoch": 4.349923664122137, "grad_norm": 0.0017534700072701981, "learning_rate": 5.061488345304539e-07, "loss": 0.0438, "step": 213690 }, { "epoch": 4.350127226463105, "grad_norm": 0.011402866426638715, "learning_rate": 5.058373526133304e-07, "loss": 0.0002, "step": 213700 }, { "epoch": 4.350330788804071, "grad_norm": 0.00472812904529719, "learning_rate": 5.05525961461611e-07, "loss": 0.0119, "step": 213710 }, { "epoch": 4.350534351145038, "grad_norm": 0.0020285087871306133, "learning_rate": 5.052146610815816e-07, "loss": 0.0002, "step": 213720 }, { "epoch": 4.350737913486005, "grad_norm": 0.020881113727041215, "learning_rate": 5.049034514795337e-07, "loss": 0.0436, "step": 213730 }, { "epoch": 4.350941475826972, "grad_norm": 0.0037815215985354893, "learning_rate": 5.045923326617491e-07, "loss": 0.0001, "step": 213740 }, { "epoch": 4.351145038167939, "grad_norm": 0.014081853651519477, "learning_rate": 5.042813046345114e-07, "loss": 0.0112, "step": 213750 }, { "epoch": 4.3513486005089055, "grad_norm": 0.03704332836674708, "learning_rate": 5.039703674041052e-07, "loss": 0.006, "step": 213760 }, { "epoch": 4.351552162849873, "grad_norm": 0.01759542214061077, "learning_rate": 5.036595209768069e-07, "loss": 0.0002, "step": 213770 }, { "epoch": 4.35175572519084, "grad_norm": 0.009555537573951625, "learning_rate": 5.033487653588954e-07, "loss": 0.0016, "step": 213780 }, { "epoch": 4.351959287531806, "grad_norm": 0.01370351920704584, "learning_rate": 5.030381005566492e-07, "loss": 0.001, "step": 213790 }, { "epoch": 4.352162849872774, "grad_norm": 0.004801600329549947, "learning_rate": 5.027275265763393e-07, "loss": 0.0001, "step": 213800 }, { "epoch": 4.35236641221374, "grad_norm": 0.005267747585731482, "learning_rate": 5.024170434242392e-07, "loss": 0.0001, "step": 213810 }, { "epoch": 4.352569974554707, "grad_norm": 0.000533766886184514, "learning_rate": 5.021066511066203e-07, "loss": 0.0001, "step": 213820 }, { "epoch": 4.352773536895675, "grad_norm": 0.0023977754175733513, "learning_rate": 5.017963496297507e-07, "loss": 0.0007, "step": 213830 }, { "epoch": 4.352977099236641, "grad_norm": 0.0018822761131006148, "learning_rate": 5.014861389998971e-07, "loss": 0.0023, "step": 213840 }, { "epoch": 4.353180661577608, "grad_norm": 0.03231391517467735, "learning_rate": 5.01176019223325e-07, "loss": 0.0806, "step": 213850 }, { "epoch": 4.353384223918575, "grad_norm": 0.031177612445705865, "learning_rate": 5.00865990306299e-07, "loss": 0.0435, "step": 213860 }, { "epoch": 4.353587786259542, "grad_norm": 0.044585494457390226, "learning_rate": 5.005560522550774e-07, "loss": 0.0005, "step": 213870 }, { "epoch": 4.353791348600509, "grad_norm": 0.0014285196372473537, "learning_rate": 5.002462050759216e-07, "loss": 0.0004, "step": 213880 }, { "epoch": 4.353994910941476, "grad_norm": 0.0170900781103975, "learning_rate": 4.999364487750913e-07, "loss": 0.0002, "step": 213890 }, { "epoch": 4.354198473282443, "grad_norm": 0.006282341465799394, "learning_rate": 4.996267833588381e-07, "loss": 0.0219, "step": 213900 }, { "epoch": 4.354402035623409, "grad_norm": 5.608596098066882e-05, "learning_rate": 4.993172088334192e-07, "loss": 0.0001, "step": 213910 }, { "epoch": 4.354605597964377, "grad_norm": 0.7462555869563208, "learning_rate": 4.990077252050873e-07, "loss": 0.0274, "step": 213920 }, { "epoch": 4.354809160305344, "grad_norm": 0.004610778556510134, "learning_rate": 4.986983324800898e-07, "loss": 0.0328, "step": 213930 }, { "epoch": 4.35501272264631, "grad_norm": 0.0006047527461665698, "learning_rate": 4.983890306646788e-07, "loss": 0.0377, "step": 213940 }, { "epoch": 4.355216284987278, "grad_norm": 0.0012266241755962584, "learning_rate": 4.980798197650982e-07, "loss": 0.0009, "step": 213950 }, { "epoch": 4.355419847328244, "grad_norm": 0.007814954760520968, "learning_rate": 4.977706997875948e-07, "loss": 0.0001, "step": 213960 }, { "epoch": 4.355623409669211, "grad_norm": 14.36991788451635, "learning_rate": 4.974616707384106e-07, "loss": 0.0487, "step": 213970 }, { "epoch": 4.355826972010178, "grad_norm": 15.003513917857564, "learning_rate": 4.971527326237869e-07, "loss": 0.0261, "step": 213980 }, { "epoch": 4.356030534351145, "grad_norm": 0.04592165691031153, "learning_rate": 4.968438854499635e-07, "loss": 0.0445, "step": 213990 }, { "epoch": 4.356234096692112, "grad_norm": 0.007005004222920551, "learning_rate": 4.965351292231779e-07, "loss": 0.0001, "step": 214000 }, { "epoch": 4.3564376590330784, "grad_norm": 0.0007213383439547701, "learning_rate": 4.962264639496661e-07, "loss": 0.0002, "step": 214010 }, { "epoch": 4.356641221374046, "grad_norm": 0.0035960929980686047, "learning_rate": 4.959178896356615e-07, "loss": 0.0152, "step": 214020 }, { "epoch": 4.356844783715013, "grad_norm": 0.01114525433623304, "learning_rate": 4.956094062873962e-07, "loss": 0.0362, "step": 214030 }, { "epoch": 4.357048346055979, "grad_norm": 0.09478912866834568, "learning_rate": 4.953010139111003e-07, "loss": 0.0295, "step": 214040 }, { "epoch": 4.357251908396947, "grad_norm": 0.011406164031492128, "learning_rate": 4.949927125130027e-07, "loss": 0.0378, "step": 214050 }, { "epoch": 4.357455470737913, "grad_norm": 0.0038195989722212254, "learning_rate": 4.946845020993297e-07, "loss": 0.0001, "step": 214060 }, { "epoch": 4.35765903307888, "grad_norm": 0.15431263851576588, "learning_rate": 4.943763826763059e-07, "loss": 0.0058, "step": 214070 }, { "epoch": 4.3578625954198476, "grad_norm": 0.19628231527655113, "learning_rate": 4.940683542501557e-07, "loss": 0.0038, "step": 214080 }, { "epoch": 4.358066157760814, "grad_norm": 0.001321399085602456, "learning_rate": 4.937604168270959e-07, "loss": 0.0258, "step": 214090 }, { "epoch": 4.358269720101781, "grad_norm": 1.310681295869361, "learning_rate": 4.934525704133508e-07, "loss": 0.0416, "step": 214100 }, { "epoch": 4.358473282442748, "grad_norm": 0.02768399499311766, "learning_rate": 4.931448150151347e-07, "loss": 0.0015, "step": 214110 }, { "epoch": 4.358676844783715, "grad_norm": 0.014953857771304209, "learning_rate": 4.928371506386626e-07, "loss": 0.0002, "step": 214120 }, { "epoch": 4.358880407124682, "grad_norm": 51.5690734692156, "learning_rate": 4.925295772901517e-07, "loss": 0.0229, "step": 214130 }, { "epoch": 4.359083969465649, "grad_norm": 10.944438752983364, "learning_rate": 4.922220949758106e-07, "loss": 0.0383, "step": 214140 }, { "epoch": 4.359287531806616, "grad_norm": 0.0057301595200497666, "learning_rate": 4.91914703701849e-07, "loss": 0.0295, "step": 214150 }, { "epoch": 4.359491094147582, "grad_norm": 0.013452119813259842, "learning_rate": 4.916074034744784e-07, "loss": 0.0003, "step": 214160 }, { "epoch": 4.35969465648855, "grad_norm": 0.0023142923650332384, "learning_rate": 4.913001942999018e-07, "loss": 0.0786, "step": 214170 }, { "epoch": 4.359898218829517, "grad_norm": 0.007670447283035482, "learning_rate": 4.909930761843251e-07, "loss": 0.0005, "step": 214180 }, { "epoch": 4.360101781170483, "grad_norm": 0.007805219953260894, "learning_rate": 4.90686049133951e-07, "loss": 0.018, "step": 214190 }, { "epoch": 4.360305343511451, "grad_norm": 0.00403776195637151, "learning_rate": 4.903791131549795e-07, "loss": 0.0003, "step": 214200 }, { "epoch": 4.360508905852417, "grad_norm": 0.004329083870096644, "learning_rate": 4.900722682536107e-07, "loss": 0.0004, "step": 214210 }, { "epoch": 4.360712468193384, "grad_norm": 0.00999078665751749, "learning_rate": 4.897655144360413e-07, "loss": 0.0007, "step": 214220 }, { "epoch": 4.3609160305343515, "grad_norm": 0.18009673428502673, "learning_rate": 4.89458851708467e-07, "loss": 0.0002, "step": 214230 }, { "epoch": 4.361119592875318, "grad_norm": 11.958333821236335, "learning_rate": 4.891522800770793e-07, "loss": 0.0562, "step": 214240 }, { "epoch": 4.361323155216285, "grad_norm": 0.07625622126662973, "learning_rate": 4.888457995480716e-07, "loss": 0.0002, "step": 214250 }, { "epoch": 4.361526717557252, "grad_norm": 8.939703474983153, "learning_rate": 4.885394101276347e-07, "loss": 0.0458, "step": 214260 }, { "epoch": 4.361730279898219, "grad_norm": 0.0035932533288118027, "learning_rate": 4.88233111821953e-07, "loss": 0.0012, "step": 214270 }, { "epoch": 4.361933842239186, "grad_norm": 0.022717545151849667, "learning_rate": 4.879269046372159e-07, "loss": 0.0001, "step": 214280 }, { "epoch": 4.362137404580153, "grad_norm": 0.09708646510599735, "learning_rate": 4.876207885796069e-07, "loss": 0.0003, "step": 214290 }, { "epoch": 4.36234096692112, "grad_norm": 0.0020181498755928486, "learning_rate": 4.873147636553066e-07, "loss": 0.0212, "step": 214300 }, { "epoch": 4.362544529262086, "grad_norm": 0.007446122108264976, "learning_rate": 4.870088298704989e-07, "loss": 0.0001, "step": 214310 }, { "epoch": 4.362748091603054, "grad_norm": 0.04206779429115126, "learning_rate": 4.867029872313589e-07, "loss": 0.015, "step": 214320 }, { "epoch": 4.3629516539440205, "grad_norm": 0.004163324205265059, "learning_rate": 4.863972357440649e-07, "loss": 0.0002, "step": 214330 }, { "epoch": 4.363155216284987, "grad_norm": 0.004522100716740159, "learning_rate": 4.860915754147938e-07, "loss": 0.0002, "step": 214340 }, { "epoch": 4.363358778625954, "grad_norm": 4.484673787719615, "learning_rate": 4.857860062497166e-07, "loss": 0.0618, "step": 214350 }, { "epoch": 4.363562340966921, "grad_norm": 0.00935408934190572, "learning_rate": 4.854805282550051e-07, "loss": 0.0067, "step": 214360 }, { "epoch": 4.363765903307888, "grad_norm": 0.0058052535125244446, "learning_rate": 4.851751414368288e-07, "loss": 0.0001, "step": 214370 }, { "epoch": 4.363969465648855, "grad_norm": 9.07678545496849, "learning_rate": 4.848698458013556e-07, "loss": 0.0118, "step": 214380 }, { "epoch": 4.364173027989822, "grad_norm": 0.0005988556008748581, "learning_rate": 4.845646413547516e-07, "loss": 0.0001, "step": 214390 }, { "epoch": 4.364376590330789, "grad_norm": 0.010463725520750736, "learning_rate": 4.842595281031804e-07, "loss": 0.0001, "step": 214400 }, { "epoch": 4.364580152671755, "grad_norm": 18.549244275957623, "learning_rate": 4.839545060528039e-07, "loss": 0.0192, "step": 214410 }, { "epoch": 4.364783715012723, "grad_norm": 0.02724419614781379, "learning_rate": 4.83649575209783e-07, "loss": 0.0256, "step": 214420 }, { "epoch": 4.3649872773536895, "grad_norm": 0.008370433553493968, "learning_rate": 4.833447355802762e-07, "loss": 0.0002, "step": 214430 }, { "epoch": 4.365190839694656, "grad_norm": 0.014210108393124064, "learning_rate": 4.830399871704394e-07, "loss": 0.0002, "step": 214440 }, { "epoch": 4.365394402035624, "grad_norm": 0.004756868393619294, "learning_rate": 4.827353299864285e-07, "loss": 0.0012, "step": 214450 }, { "epoch": 4.36559796437659, "grad_norm": 0.0036183931228691746, "learning_rate": 4.824307640343951e-07, "loss": 0.0412, "step": 214460 }, { "epoch": 4.365801526717557, "grad_norm": 0.00290205603019288, "learning_rate": 4.821262893204926e-07, "loss": 0.0001, "step": 214470 }, { "epoch": 4.3660050890585245, "grad_norm": 0.004060840142855213, "learning_rate": 4.818219058508661e-07, "loss": 0.0749, "step": 214480 }, { "epoch": 4.366208651399491, "grad_norm": 8.97615630245501, "learning_rate": 4.815176136316668e-07, "loss": 0.0332, "step": 214490 }, { "epoch": 4.366412213740458, "grad_norm": 0.10047792180322511, "learning_rate": 4.812134126690404e-07, "loss": 0.0022, "step": 214500 }, { "epoch": 4.366615776081425, "grad_norm": 0.21607269954887676, "learning_rate": 4.809093029691281e-07, "loss": 0.0015, "step": 214510 }, { "epoch": 4.366819338422392, "grad_norm": 0.0473088602694394, "learning_rate": 4.806052845380726e-07, "loss": 0.0184, "step": 214520 }, { "epoch": 4.3670229007633585, "grad_norm": 0.004539786935975717, "learning_rate": 4.803013573820147e-07, "loss": 0.0215, "step": 214530 }, { "epoch": 4.367226463104326, "grad_norm": 0.0030334803432325703, "learning_rate": 4.799975215070918e-07, "loss": 0.0105, "step": 214540 }, { "epoch": 4.367430025445293, "grad_norm": 0.0211072285169253, "learning_rate": 4.796937769194409e-07, "loss": 0.0022, "step": 214550 }, { "epoch": 4.367633587786259, "grad_norm": 0.004262101656774489, "learning_rate": 4.793901236251969e-07, "loss": 0.0114, "step": 214560 }, { "epoch": 4.367837150127227, "grad_norm": 0.4598575574078844, "learning_rate": 4.790865616304913e-07, "loss": 0.0248, "step": 214570 }, { "epoch": 4.3680407124681935, "grad_norm": 0.004393765027135883, "learning_rate": 4.787830909414554e-07, "loss": 0.0237, "step": 214580 }, { "epoch": 4.36824427480916, "grad_norm": 0.007500037463351533, "learning_rate": 4.784797115642181e-07, "loss": 0.0001, "step": 214590 }, { "epoch": 4.368447837150128, "grad_norm": 0.007436668269645736, "learning_rate": 4.781764235049069e-07, "loss": 0.0007, "step": 214600 }, { "epoch": 4.368651399491094, "grad_norm": 0.0046941434824104705, "learning_rate": 4.778732267696474e-07, "loss": 0.0015, "step": 214610 }, { "epoch": 4.368854961832061, "grad_norm": 0.046018172477428156, "learning_rate": 4.775701213645617e-07, "loss": 0.0002, "step": 214620 }, { "epoch": 4.3690585241730275, "grad_norm": 0.027809922855772145, "learning_rate": 4.77267107295774e-07, "loss": 0.0245, "step": 214630 }, { "epoch": 4.369262086513995, "grad_norm": 0.002075608101871648, "learning_rate": 4.769641845694e-07, "loss": 0.0001, "step": 214640 }, { "epoch": 4.369465648854962, "grad_norm": 0.11937407927798406, "learning_rate": 4.766613531915609e-07, "loss": 0.0699, "step": 214650 }, { "epoch": 4.369669211195928, "grad_norm": 13.678582945015657, "learning_rate": 4.7635861316837227e-07, "loss": 0.0611, "step": 214660 }, { "epoch": 4.369872773536896, "grad_norm": 0.01696898199248436, "learning_rate": 4.760559645059465e-07, "loss": 0.0003, "step": 214670 }, { "epoch": 4.3700763358778625, "grad_norm": 0.0037676153472197193, "learning_rate": 4.757534072103992e-07, "loss": 0.0264, "step": 214680 }, { "epoch": 4.370279898218829, "grad_norm": 0.6008689496334733, "learning_rate": 4.754509412878383e-07, "loss": 0.0006, "step": 214690 }, { "epoch": 4.370483460559797, "grad_norm": 0.019425378715820365, "learning_rate": 4.751485667443717e-07, "loss": 0.0699, "step": 214700 }, { "epoch": 4.370687022900763, "grad_norm": 0.00845574948980493, "learning_rate": 4.7484628358611007e-07, "loss": 0.0002, "step": 214710 }, { "epoch": 4.37089058524173, "grad_norm": 9.577874832308859e-09, "learning_rate": 4.7454409181915463e-07, "loss": 0.0001, "step": 214720 }, { "epoch": 4.371094147582697, "grad_norm": 0.4489729525356572, "learning_rate": 4.742419914496099e-07, "loss": 0.0005, "step": 214730 }, { "epoch": 4.371297709923664, "grad_norm": 0.025822562763131135, "learning_rate": 4.739399824835783e-07, "loss": 0.0023, "step": 214740 }, { "epoch": 4.371501272264631, "grad_norm": 0.036050129648514895, "learning_rate": 4.736380649271577e-07, "loss": 0.0006, "step": 214750 }, { "epoch": 4.371704834605598, "grad_norm": 0.054047188143797505, "learning_rate": 4.733362387864465e-07, "loss": 0.0001, "step": 214760 }, { "epoch": 4.371908396946565, "grad_norm": 0.0005439517850408035, "learning_rate": 4.7303450406753995e-07, "loss": 0.0002, "step": 214770 }, { "epoch": 4.3721119592875315, "grad_norm": 0.051486065660754754, "learning_rate": 4.7273286077653246e-07, "loss": 0.0002, "step": 214780 }, { "epoch": 4.372315521628499, "grad_norm": 0.018699769066911922, "learning_rate": 4.724313089195159e-07, "loss": 0.0424, "step": 214790 }, { "epoch": 4.372519083969466, "grad_norm": 0.0271241873863056, "learning_rate": 4.721298485025805e-07, "loss": 0.0014, "step": 214800 }, { "epoch": 4.372722646310432, "grad_norm": 0.0018502939174534386, "learning_rate": 4.7182847953181456e-07, "loss": 0.0001, "step": 214810 }, { "epoch": 4.3729262086514, "grad_norm": 0.0032540887864727316, "learning_rate": 4.7152720201330493e-07, "loss": 0.0001, "step": 214820 }, { "epoch": 4.373129770992366, "grad_norm": 0.011738442899292198, "learning_rate": 4.7122601595313623e-07, "loss": 0.0001, "step": 214830 }, { "epoch": 4.373333333333333, "grad_norm": 7.64435450990288, "learning_rate": 4.709249213573913e-07, "loss": 0.0325, "step": 214840 }, { "epoch": 4.373536895674301, "grad_norm": 0.0007039272183402345, "learning_rate": 4.706239182321498e-07, "loss": 0.0017, "step": 214850 }, { "epoch": 4.373740458015267, "grad_norm": 0.001689446389378739, "learning_rate": 4.7032300658349293e-07, "loss": 0.0004, "step": 214860 }, { "epoch": 4.373944020356234, "grad_norm": 0.0004992502515746399, "learning_rate": 4.700221864174981e-07, "loss": 0.0131, "step": 214870 }, { "epoch": 4.374147582697201, "grad_norm": 15.432306579687912, "learning_rate": 4.697214577402376e-07, "loss": 0.0547, "step": 214880 }, { "epoch": 4.374351145038168, "grad_norm": 0.0009987211314683004, "learning_rate": 4.694208205577894e-07, "loss": 0.0057, "step": 214890 }, { "epoch": 4.374554707379135, "grad_norm": 3.162865408890519e-05, "learning_rate": 4.691202748762219e-07, "loss": 0.0001, "step": 214900 }, { "epoch": 4.374758269720102, "grad_norm": 0.0010408407434953917, "learning_rate": 4.6881982070160646e-07, "loss": 0.0186, "step": 214910 }, { "epoch": 4.374961832061069, "grad_norm": 0.0067466242559081745, "learning_rate": 4.685194580400104e-07, "loss": 0.0032, "step": 214920 }, { "epoch": 4.3751653944020354, "grad_norm": 13.242009882035717, "learning_rate": 4.6821918689750055e-07, "loss": 0.0391, "step": 214930 }, { "epoch": 4.375368956743003, "grad_norm": 0.019263500075246465, "learning_rate": 4.679190072801415e-07, "loss": 0.0001, "step": 214940 }, { "epoch": 4.37557251908397, "grad_norm": 0.0002633844628462694, "learning_rate": 4.6761891919399505e-07, "loss": 0.0005, "step": 214950 }, { "epoch": 4.375776081424936, "grad_norm": 0.001724924024015448, "learning_rate": 4.6731892264512246e-07, "loss": 0.0336, "step": 214960 }, { "epoch": 4.375979643765904, "grad_norm": 0.05996366135187469, "learning_rate": 4.670190176395817e-07, "loss": 0.034, "step": 214970 }, { "epoch": 4.37618320610687, "grad_norm": 0.0009110326268908918, "learning_rate": 4.6671920418343066e-07, "loss": 0.0002, "step": 214980 }, { "epoch": 4.376386768447837, "grad_norm": 0.028716618243593613, "learning_rate": 4.664194822827245e-07, "loss": 0.0583, "step": 214990 }, { "epoch": 4.376590330788804, "grad_norm": 3.2290941697166198, "learning_rate": 4.661198519435156e-07, "loss": 0.0005, "step": 215000 }, { "epoch": 4.376793893129771, "grad_norm": 0.007516353252408601, "learning_rate": 4.6582031317185583e-07, "loss": 0.0699, "step": 215010 }, { "epoch": 4.376997455470738, "grad_norm": 0.17016783652797798, "learning_rate": 4.655208659737953e-07, "loss": 0.0002, "step": 215020 }, { "epoch": 4.3772010178117045, "grad_norm": 0.012341998573610237, "learning_rate": 4.652215103553809e-07, "loss": 0.0454, "step": 215030 }, { "epoch": 4.377404580152672, "grad_norm": 0.03779230115718371, "learning_rate": 4.6492224632265884e-07, "loss": 0.0215, "step": 215040 }, { "epoch": 4.377608142493639, "grad_norm": 0.013619407200226077, "learning_rate": 4.646230738816743e-07, "loss": 0.013, "step": 215050 }, { "epoch": 4.377811704834605, "grad_norm": 4.504375927142018, "learning_rate": 4.643239930384669e-07, "loss": 0.0511, "step": 215060 }, { "epoch": 4.378015267175573, "grad_norm": 0.0037196210976131973, "learning_rate": 4.640250037990779e-07, "loss": 0.0002, "step": 215070 }, { "epoch": 4.378218829516539, "grad_norm": 0.002533155672165169, "learning_rate": 4.6372610616954804e-07, "loss": 0.002, "step": 215080 }, { "epoch": 4.378422391857506, "grad_norm": 0.010017668972142483, "learning_rate": 4.634273001559109e-07, "loss": 0.043, "step": 215090 }, { "epoch": 4.378625954198474, "grad_norm": 23.304712370443912, "learning_rate": 4.6312858576420207e-07, "loss": 0.0889, "step": 215100 }, { "epoch": 4.37882951653944, "grad_norm": 1.1964887854842203e-08, "learning_rate": 4.6282996300045623e-07, "loss": 0.0002, "step": 215110 }, { "epoch": 4.379033078880407, "grad_norm": 0.001389107161815409, "learning_rate": 4.625314318707025e-07, "loss": 0.0003, "step": 215120 }, { "epoch": 4.379236641221374, "grad_norm": 0.008879616897281296, "learning_rate": 4.622329923809704e-07, "loss": 0.0011, "step": 215130 }, { "epoch": 4.379440203562341, "grad_norm": 0.02050970955536448, "learning_rate": 4.61934644537288e-07, "loss": 0.0001, "step": 215140 }, { "epoch": 4.379643765903308, "grad_norm": 0.032534708237681426, "learning_rate": 4.6163638834568036e-07, "loss": 0.0577, "step": 215150 }, { "epoch": 4.379847328244275, "grad_norm": 0.01693723093890577, "learning_rate": 4.6133822381217107e-07, "loss": 0.0321, "step": 215160 }, { "epoch": 4.380050890585242, "grad_norm": 0.03406188610244801, "learning_rate": 4.610401509427825e-07, "loss": 0.0174, "step": 215170 }, { "epoch": 4.380254452926208, "grad_norm": 0.0010066666469297839, "learning_rate": 4.607421697435338e-07, "loss": 0.0004, "step": 215180 }, { "epoch": 4.380458015267176, "grad_norm": 0.029404611641259685, "learning_rate": 4.604442802204434e-07, "loss": 0.0159, "step": 215190 }, { "epoch": 4.380661577608143, "grad_norm": 0.1862171798924699, "learning_rate": 4.601464823795282e-07, "loss": 0.0464, "step": 215200 }, { "epoch": 4.380865139949109, "grad_norm": 0.01583975125556413, "learning_rate": 4.598487762268028e-07, "loss": 0.0007, "step": 215210 }, { "epoch": 4.381068702290077, "grad_norm": 3.770585654406538e-07, "learning_rate": 4.5955116176827687e-07, "loss": 0.0483, "step": 215220 }, { "epoch": 4.381272264631043, "grad_norm": 0.03943111156928846, "learning_rate": 4.5925363900996445e-07, "loss": 0.0315, "step": 215230 }, { "epoch": 4.38147582697201, "grad_norm": 0.006843880586784007, "learning_rate": 4.589562079578741e-07, "loss": 0.0003, "step": 215240 }, { "epoch": 4.3816793893129775, "grad_norm": 0.08070762941410861, "learning_rate": 4.586588686180099e-07, "loss": 0.0004, "step": 215250 }, { "epoch": 4.381882951653944, "grad_norm": 0.029487326431860434, "learning_rate": 4.5836162099638146e-07, "loss": 0.0057, "step": 215260 }, { "epoch": 4.382086513994911, "grad_norm": 0.10722458095509164, "learning_rate": 4.580644650989885e-07, "loss": 0.0004, "step": 215270 }, { "epoch": 4.382290076335877, "grad_norm": 0.09556224022961683, "learning_rate": 4.5776740093183227e-07, "loss": 0.0001, "step": 215280 }, { "epoch": 4.382493638676845, "grad_norm": 0.0027550444034804265, "learning_rate": 4.574704285009163e-07, "loss": 0.0004, "step": 215290 }, { "epoch": 4.382697201017812, "grad_norm": 0.0013364816817748647, "learning_rate": 4.571735478122341e-07, "loss": 0.0001, "step": 215300 }, { "epoch": 4.382900763358778, "grad_norm": 0.003441154759009093, "learning_rate": 4.568767588717826e-07, "loss": 0.087, "step": 215310 }, { "epoch": 4.383104325699746, "grad_norm": 10.302774806862047, "learning_rate": 4.565800616855587e-07, "loss": 0.0276, "step": 215320 }, { "epoch": 4.383307888040712, "grad_norm": 0.015070844664211103, "learning_rate": 4.5628345625955085e-07, "loss": 0.0002, "step": 215330 }, { "epoch": 4.383511450381679, "grad_norm": 0.001990384677793189, "learning_rate": 4.5598694259975096e-07, "loss": 0.0001, "step": 215340 }, { "epoch": 4.3837150127226465, "grad_norm": 0.10294902185920168, "learning_rate": 4.5569052071214756e-07, "loss": 0.0361, "step": 215350 }, { "epoch": 4.383918575063613, "grad_norm": 0.021144066465518103, "learning_rate": 4.5539419060272705e-07, "loss": 0.0051, "step": 215360 }, { "epoch": 4.38412213740458, "grad_norm": 1.3406149435588825e-06, "learning_rate": 4.550979522774746e-07, "loss": 0.0016, "step": 215370 }, { "epoch": 4.384325699745547, "grad_norm": 10.623319846097768, "learning_rate": 4.5480180574237256e-07, "loss": 0.0414, "step": 215380 }, { "epoch": 4.384529262086514, "grad_norm": 0.0012514458113780625, "learning_rate": 4.545057510034023e-07, "loss": 0.0011, "step": 215390 }, { "epoch": 4.384732824427481, "grad_norm": 0.00528271963945734, "learning_rate": 4.542097880665425e-07, "loss": 0.0002, "step": 215400 }, { "epoch": 4.384936386768448, "grad_norm": 0.0013074566034717568, "learning_rate": 4.5391391693777155e-07, "loss": 0.0001, "step": 215410 }, { "epoch": 4.385139949109415, "grad_norm": 0.02122659908141648, "learning_rate": 4.536181376230647e-07, "loss": 0.0002, "step": 215420 }, { "epoch": 4.385343511450381, "grad_norm": 0.022533778776975655, "learning_rate": 4.533224501283939e-07, "loss": 0.0001, "step": 215430 }, { "epoch": 4.385547073791349, "grad_norm": 0.0451751080285339, "learning_rate": 4.5302685445973327e-07, "loss": 0.0001, "step": 215440 }, { "epoch": 4.3857506361323155, "grad_norm": 4.462640315006901, "learning_rate": 4.5273135062305243e-07, "loss": 0.0069, "step": 215450 }, { "epoch": 4.385954198473282, "grad_norm": 0.037373949773743, "learning_rate": 4.524359386243171e-07, "loss": 0.0002, "step": 215460 }, { "epoch": 4.38615776081425, "grad_norm": 0.00354685019670965, "learning_rate": 4.5214061846949595e-07, "loss": 0.0313, "step": 215470 }, { "epoch": 4.386361323155216, "grad_norm": 0.0445803892109576, "learning_rate": 4.5184539016455364e-07, "loss": 0.0042, "step": 215480 }, { "epoch": 4.386564885496183, "grad_norm": 0.005910154436625487, "learning_rate": 4.5155025371545036e-07, "loss": 0.0012, "step": 215490 }, { "epoch": 4.3867684478371505, "grad_norm": 0.06849830296654086, "learning_rate": 4.512552091281486e-07, "loss": 0.0067, "step": 215500 }, { "epoch": 4.386972010178117, "grad_norm": 0.14153470237678734, "learning_rate": 4.509602564086063e-07, "loss": 0.0158, "step": 215510 }, { "epoch": 4.387175572519084, "grad_norm": 0.00029061025822104075, "learning_rate": 4.50665395562781e-07, "loss": 0.0001, "step": 215520 }, { "epoch": 4.387379134860051, "grad_norm": 1.1686814759947821, "learning_rate": 4.503706265966273e-07, "loss": 0.0187, "step": 215530 }, { "epoch": 4.387582697201018, "grad_norm": 2.959819587428597, "learning_rate": 4.500759495160989e-07, "loss": 0.0712, "step": 215540 }, { "epoch": 4.3877862595419845, "grad_norm": 0.0035943260721293738, "learning_rate": 4.49781364327147e-07, "loss": 0.0378, "step": 215550 }, { "epoch": 4.387989821882952, "grad_norm": 0.6484339428828237, "learning_rate": 4.494868710357209e-07, "loss": 0.0016, "step": 215560 }, { "epoch": 4.388193384223919, "grad_norm": 0.003440855628891347, "learning_rate": 4.491924696477684e-07, "loss": 0.0001, "step": 215570 }, { "epoch": 4.388396946564885, "grad_norm": 0.003008321772988082, "learning_rate": 4.488981601692355e-07, "loss": 0.0169, "step": 215580 }, { "epoch": 4.388600508905853, "grad_norm": 0.0026778959401544023, "learning_rate": 4.486039426060662e-07, "loss": 0.0024, "step": 215590 }, { "epoch": 4.3888040712468195, "grad_norm": 0.003992149429796482, "learning_rate": 4.4830981696420195e-07, "loss": 0.0015, "step": 215600 }, { "epoch": 4.389007633587786, "grad_norm": 0.0003907905786867794, "learning_rate": 4.480157832495846e-07, "loss": 0.0369, "step": 215610 }, { "epoch": 4.389211195928754, "grad_norm": 0.0008304656123953372, "learning_rate": 4.4772184146815e-07, "loss": 0.0001, "step": 215620 }, { "epoch": 4.38941475826972, "grad_norm": 0.0060809333791471375, "learning_rate": 4.474279916258373e-07, "loss": 0.0144, "step": 215630 }, { "epoch": 4.389618320610687, "grad_norm": 12.723118823376494, "learning_rate": 4.471342337285794e-07, "loss": 0.0448, "step": 215640 }, { "epoch": 4.3898218829516535, "grad_norm": 4.701822769071809, "learning_rate": 4.46840567782309e-07, "loss": 0.0103, "step": 215650 }, { "epoch": 4.390025445292621, "grad_norm": 0.0005045729410798578, "learning_rate": 4.465469937929595e-07, "loss": 0.0001, "step": 215660 }, { "epoch": 4.390229007633588, "grad_norm": 0.0036648215816483427, "learning_rate": 4.462535117664568e-07, "loss": 0.0324, "step": 215670 }, { "epoch": 4.390432569974554, "grad_norm": 0.0019435172785390337, "learning_rate": 4.459601217087295e-07, "loss": 0.0256, "step": 215680 }, { "epoch": 4.390636132315522, "grad_norm": 0.06314363396487176, "learning_rate": 4.4566682362570447e-07, "loss": 0.0002, "step": 215690 }, { "epoch": 4.3908396946564885, "grad_norm": 0.0015931417810474923, "learning_rate": 4.4537361752330253e-07, "loss": 0.0003, "step": 215700 }, { "epoch": 4.391043256997455, "grad_norm": 0.009124971381784084, "learning_rate": 4.4508050340744734e-07, "loss": 0.0001, "step": 215710 }, { "epoch": 4.391246819338423, "grad_norm": 0.051016657649252534, "learning_rate": 4.4478748128405803e-07, "loss": 0.0001, "step": 215720 }, { "epoch": 4.391450381679389, "grad_norm": 0.01429998491973419, "learning_rate": 4.444945511590526e-07, "loss": 0.0185, "step": 215730 }, { "epoch": 4.391653944020356, "grad_norm": 13.864739856796705, "learning_rate": 4.442017130383469e-07, "loss": 0.0416, "step": 215740 }, { "epoch": 4.391857506361323, "grad_norm": 0.0013256742065027912, "learning_rate": 4.439089669278557e-07, "loss": 0.0347, "step": 215750 }, { "epoch": 4.39206106870229, "grad_norm": 0.0022254142848758102, "learning_rate": 4.436163128334908e-07, "loss": 0.0295, "step": 215760 }, { "epoch": 4.392264631043257, "grad_norm": 0.01858114668114023, "learning_rate": 4.433237507611632e-07, "loss": 0.0025, "step": 215770 }, { "epoch": 4.392468193384224, "grad_norm": 0.03144774800109958, "learning_rate": 4.4303128071678136e-07, "loss": 0.0004, "step": 215780 }, { "epoch": 4.392671755725191, "grad_norm": 0.007398249767464727, "learning_rate": 4.427389027062534e-07, "loss": 0.0001, "step": 215790 }, { "epoch": 4.3928753180661575, "grad_norm": 0.04414829292878371, "learning_rate": 4.424466167354807e-07, "loss": 0.0289, "step": 215800 }, { "epoch": 4.393078880407125, "grad_norm": 0.00047141151835621833, "learning_rate": 4.421544228103697e-07, "loss": 0.0001, "step": 215810 }, { "epoch": 4.393282442748092, "grad_norm": 0.2998794550959249, "learning_rate": 4.418623209368217e-07, "loss": 0.0004, "step": 215820 }, { "epoch": 4.393486005089058, "grad_norm": 10.10000018472581, "learning_rate": 4.4157031112073313e-07, "loss": 0.0566, "step": 215830 }, { "epoch": 4.393689567430026, "grad_norm": 0.012064167026700915, "learning_rate": 4.4127839336800425e-07, "loss": 0.0104, "step": 215840 }, { "epoch": 4.3938931297709924, "grad_norm": 0.021206376979822115, "learning_rate": 4.409865676845304e-07, "loss": 0.0005, "step": 215850 }, { "epoch": 4.394096692111959, "grad_norm": 0.011359050094704731, "learning_rate": 4.4069483407620297e-07, "loss": 0.0271, "step": 215860 }, { "epoch": 4.394300254452927, "grad_norm": 0.018479034662025676, "learning_rate": 4.4040319254891773e-07, "loss": 0.0011, "step": 215870 }, { "epoch": 4.394503816793893, "grad_norm": 0.003327242328223905, "learning_rate": 4.4011164310856227e-07, "loss": 0.0002, "step": 215880 }, { "epoch": 4.39470737913486, "grad_norm": 0.008520540639901323, "learning_rate": 4.398201857610246e-07, "loss": 0.0158, "step": 215890 }, { "epoch": 4.3949109414758265, "grad_norm": 0.015193292242640006, "learning_rate": 4.395288205121923e-07, "loss": 0.0002, "step": 215900 }, { "epoch": 4.395114503816794, "grad_norm": 0.008813745625891779, "learning_rate": 4.392375473679489e-07, "loss": 0.0222, "step": 215910 }, { "epoch": 4.395318066157761, "grad_norm": 0.005152923204791042, "learning_rate": 4.389463663341775e-07, "loss": 0.0001, "step": 215920 }, { "epoch": 4.395521628498727, "grad_norm": 0.0016024477487013521, "learning_rate": 4.3865527741675853e-07, "loss": 0.0156, "step": 215930 }, { "epoch": 4.395725190839695, "grad_norm": 0.000621748428077647, "learning_rate": 4.3836428062157155e-07, "loss": 0.0003, "step": 215940 }, { "epoch": 4.3959287531806615, "grad_norm": 0.004163770228051013, "learning_rate": 4.380733759544936e-07, "loss": 0.0036, "step": 215950 }, { "epoch": 4.396132315521628, "grad_norm": 0.004067617772897944, "learning_rate": 4.377825634213989e-07, "loss": 0.0003, "step": 215960 }, { "epoch": 4.396335877862596, "grad_norm": 0.0013025466620357983, "learning_rate": 4.374918430281616e-07, "loss": 0.0264, "step": 215970 }, { "epoch": 4.396539440203562, "grad_norm": 0.013391645426410841, "learning_rate": 4.372012147806531e-07, "loss": 0.0003, "step": 215980 }, { "epoch": 4.396743002544529, "grad_norm": 0.002356545557385308, "learning_rate": 4.369106786847432e-07, "loss": 0.0001, "step": 215990 }, { "epoch": 4.396946564885496, "grad_norm": 0.0005380927643688707, "learning_rate": 4.366202347463e-07, "loss": 0.0003, "step": 216000 }, { "epoch": 4.397150127226463, "grad_norm": 0.004517726952991558, "learning_rate": 4.363298829711865e-07, "loss": 0.0001, "step": 216010 }, { "epoch": 4.39735368956743, "grad_norm": 0.01423052509538278, "learning_rate": 4.360396233652703e-07, "loss": 0.0202, "step": 216020 }, { "epoch": 4.397557251908397, "grad_norm": 0.0013858550995849424, "learning_rate": 4.357494559344133e-07, "loss": 0.0027, "step": 216030 }, { "epoch": 4.397760814249364, "grad_norm": 9.649679731429963, "learning_rate": 4.354593806844737e-07, "loss": 0.0697, "step": 216040 }, { "epoch": 4.3979643765903305, "grad_norm": 0.31058778831767087, "learning_rate": 4.3516939762131003e-07, "loss": 0.0068, "step": 216050 }, { "epoch": 4.398167938931298, "grad_norm": 0.004331873850520333, "learning_rate": 4.348795067507816e-07, "loss": 0.0001, "step": 216060 }, { "epoch": 4.398371501272265, "grad_norm": 0.004947836041935568, "learning_rate": 4.345897080787409e-07, "loss": 0.0001, "step": 216070 }, { "epoch": 4.398575063613231, "grad_norm": 0.009888363725837972, "learning_rate": 4.34300001611041e-07, "loss": 0.0001, "step": 216080 }, { "epoch": 4.398778625954199, "grad_norm": 0.00870086580568029, "learning_rate": 4.3401038735353285e-07, "loss": 0.0002, "step": 216090 }, { "epoch": 4.398982188295165, "grad_norm": 0.020872426302077002, "learning_rate": 4.3372086531206616e-07, "loss": 0.0425, "step": 216100 }, { "epoch": 4.399185750636132, "grad_norm": 0.0013614243320285897, "learning_rate": 4.334314354924879e-07, "loss": 0.0208, "step": 216110 }, { "epoch": 4.3993893129771, "grad_norm": 10.173357152763572, "learning_rate": 4.331420979006434e-07, "loss": 0.0161, "step": 216120 }, { "epoch": 4.399592875318066, "grad_norm": 0.002392276394932463, "learning_rate": 4.328528525423759e-07, "loss": 0.0001, "step": 216130 }, { "epoch": 4.399796437659033, "grad_norm": 0.002464560907551737, "learning_rate": 4.3256369942352825e-07, "loss": 0.0171, "step": 216140 }, { "epoch": 4.4, "grad_norm": 0.0035731463045420094, "learning_rate": 4.3227463854993877e-07, "loss": 0.0001, "step": 216150 }, { "epoch": 4.400203562340967, "grad_norm": 0.11239786226363206, "learning_rate": 4.319856699274477e-07, "loss": 0.0002, "step": 216160 }, { "epoch": 4.400407124681934, "grad_norm": 0.002830715829783545, "learning_rate": 4.3169679356188766e-07, "loss": 0.0323, "step": 216170 }, { "epoch": 4.400610687022901, "grad_norm": 0.016025164908379935, "learning_rate": 4.3140800945909556e-07, "loss": 0.0001, "step": 216180 }, { "epoch": 4.400814249363868, "grad_norm": 0.018101450206552048, "learning_rate": 4.31119317624904e-07, "loss": 0.0432, "step": 216190 }, { "epoch": 4.401017811704834, "grad_norm": 0.013838427756652704, "learning_rate": 4.3083071806514055e-07, "loss": 0.0222, "step": 216200 }, { "epoch": 4.401221374045802, "grad_norm": 0.0018223623810609518, "learning_rate": 4.305422107856366e-07, "loss": 0.0013, "step": 216210 }, { "epoch": 4.401424936386769, "grad_norm": 0.017345354020392962, "learning_rate": 4.3025379579221925e-07, "loss": 0.0001, "step": 216220 }, { "epoch": 4.401628498727735, "grad_norm": 0.011909934980834114, "learning_rate": 4.2996547309071047e-07, "loss": 0.0715, "step": 216230 }, { "epoch": 4.401832061068703, "grad_norm": 0.003102874762075259, "learning_rate": 4.2967724268693665e-07, "loss": 0.0001, "step": 216240 }, { "epoch": 4.402035623409669, "grad_norm": 0.04184113833407206, "learning_rate": 4.293891045867171e-07, "loss": 0.0509, "step": 216250 }, { "epoch": 4.402239185750636, "grad_norm": 0.0011264393931225203, "learning_rate": 4.2910105879586996e-07, "loss": 0.014, "step": 216260 }, { "epoch": 4.4024427480916035, "grad_norm": 0.0601448380366404, "learning_rate": 4.288131053202166e-07, "loss": 0.0405, "step": 216270 }, { "epoch": 4.40264631043257, "grad_norm": 0.007913249349059185, "learning_rate": 4.2852524416556905e-07, "loss": 0.0178, "step": 216280 }, { "epoch": 4.402849872773537, "grad_norm": 0.018538041977115734, "learning_rate": 4.282374753377422e-07, "loss": 0.0213, "step": 216290 }, { "epoch": 4.403053435114503, "grad_norm": 0.14233298818811718, "learning_rate": 4.279497988425474e-07, "loss": 0.0002, "step": 216300 }, { "epoch": 4.403256997455471, "grad_norm": 0.08808914628286008, "learning_rate": 4.2766221468579614e-07, "loss": 0.0001, "step": 216310 }, { "epoch": 4.403460559796438, "grad_norm": 5.258416560726263, "learning_rate": 4.2737472287329496e-07, "loss": 0.0057, "step": 216320 }, { "epoch": 4.403664122137404, "grad_norm": 0.03623757297284585, "learning_rate": 4.270873234108508e-07, "loss": 0.0197, "step": 216330 }, { "epoch": 4.403867684478372, "grad_norm": 0.0083486841623373, "learning_rate": 4.2680001630426793e-07, "loss": 0.0065, "step": 216340 }, { "epoch": 4.404071246819338, "grad_norm": 1.2998615865576761, "learning_rate": 4.265128015593495e-07, "loss": 0.0004, "step": 216350 }, { "epoch": 4.404274809160305, "grad_norm": 31.586754151465207, "learning_rate": 4.2622567918189526e-07, "loss": 0.0089, "step": 216360 }, { "epoch": 4.4044783715012725, "grad_norm": 0.015376914043742517, "learning_rate": 4.259386491777051e-07, "loss": 0.045, "step": 216370 }, { "epoch": 4.404681933842239, "grad_norm": 0.02211092410581913, "learning_rate": 4.2565171155257376e-07, "loss": 0.0419, "step": 216380 }, { "epoch": 4.404885496183206, "grad_norm": 0.005036836252677407, "learning_rate": 4.253648663122989e-07, "loss": 0.0002, "step": 216390 }, { "epoch": 4.405089058524173, "grad_norm": 0.16696692230987836, "learning_rate": 4.2507811346267304e-07, "loss": 0.0527, "step": 216400 }, { "epoch": 4.40529262086514, "grad_norm": 0.0012314376924083819, "learning_rate": 4.24791453009486e-07, "loss": 0.0177, "step": 216410 }, { "epoch": 4.405496183206107, "grad_norm": 0.02125348200856181, "learning_rate": 4.245048849585287e-07, "loss": 0.0003, "step": 216420 }, { "epoch": 4.405699745547074, "grad_norm": 0.01911542913942531, "learning_rate": 4.242184093155893e-07, "loss": 0.0479, "step": 216430 }, { "epoch": 4.405903307888041, "grad_norm": 0.034971920006165946, "learning_rate": 4.239320260864521e-07, "loss": 0.0001, "step": 216440 }, { "epoch": 4.406106870229007, "grad_norm": 0.013722859858175679, "learning_rate": 4.236457352769019e-07, "loss": 0.0544, "step": 216450 }, { "epoch": 4.406310432569975, "grad_norm": 0.004828147275272055, "learning_rate": 4.233595368927196e-07, "loss": 0.0001, "step": 216460 }, { "epoch": 4.4065139949109415, "grad_norm": 0.001177027358977427, "learning_rate": 4.2307343093968664e-07, "loss": 0.0001, "step": 216470 }, { "epoch": 4.406717557251908, "grad_norm": 0.0024097464411043327, "learning_rate": 4.227874174235808e-07, "loss": 0.0, "step": 216480 }, { "epoch": 4.406921119592876, "grad_norm": 0.006924894979443295, "learning_rate": 4.2250149635017835e-07, "loss": 0.0005, "step": 216490 }, { "epoch": 4.407124681933842, "grad_norm": 0.0025410034872525213, "learning_rate": 4.222156677252537e-07, "loss": 0.0093, "step": 216500 }, { "epoch": 4.407328244274809, "grad_norm": 0.07158615106680531, "learning_rate": 4.2192993155457993e-07, "loss": 0.0001, "step": 216510 }, { "epoch": 4.4075318066157765, "grad_norm": 0.0030469829505215063, "learning_rate": 4.216442878439281e-07, "loss": 0.0005, "step": 216520 }, { "epoch": 4.407735368956743, "grad_norm": 0.0464974935295301, "learning_rate": 4.2135873659906625e-07, "loss": 0.0188, "step": 216530 }, { "epoch": 4.40793893129771, "grad_norm": 0.002712690751397425, "learning_rate": 4.21073277825762e-07, "loss": 0.0006, "step": 216540 }, { "epoch": 4.408142493638676, "grad_norm": 0.0056604529819520135, "learning_rate": 4.207879115297808e-07, "loss": 0.0729, "step": 216550 }, { "epoch": 4.408346055979644, "grad_norm": 0.0011799771862490722, "learning_rate": 4.2050263771688583e-07, "loss": 0.0002, "step": 216560 }, { "epoch": 4.4085496183206105, "grad_norm": 0.0016265246732290808, "learning_rate": 4.2021745639283797e-07, "loss": 0.1019, "step": 216570 }, { "epoch": 4.408753180661577, "grad_norm": 4.3939967111089935, "learning_rate": 4.199323675633976e-07, "loss": 0.0176, "step": 216580 }, { "epoch": 4.408956743002545, "grad_norm": 0.003337516901876998, "learning_rate": 4.1964737123432295e-07, "loss": 0.0001, "step": 216590 }, { "epoch": 4.409160305343511, "grad_norm": 0.046647349428471034, "learning_rate": 4.1936246741136724e-07, "loss": 0.0002, "step": 216600 }, { "epoch": 4.409363867684478, "grad_norm": 0.026342530880230107, "learning_rate": 4.1907765610028795e-07, "loss": 0.0001, "step": 216610 }, { "epoch": 4.4095674300254455, "grad_norm": 0.0011526090467905298, "learning_rate": 4.18792937306835e-07, "loss": 0.0225, "step": 216620 }, { "epoch": 4.409770992366412, "grad_norm": 0.0030382188112853432, "learning_rate": 4.185083110367583e-07, "loss": 0.0063, "step": 216630 }, { "epoch": 4.409974554707379, "grad_norm": 0.019907297625413187, "learning_rate": 4.1822377729580867e-07, "loss": 0.018, "step": 216640 }, { "epoch": 4.410178117048346, "grad_norm": 0.008343909254761514, "learning_rate": 4.1793933608973046e-07, "loss": 0.0015, "step": 216650 }, { "epoch": 4.410381679389313, "grad_norm": 0.0018835152903407708, "learning_rate": 4.1765498742426914e-07, "loss": 0.0392, "step": 216660 }, { "epoch": 4.4105852417302795, "grad_norm": 0.018330625595691458, "learning_rate": 4.1737073130516727e-07, "loss": 0.0001, "step": 216670 }, { "epoch": 4.410788804071247, "grad_norm": 0.206646806850032, "learning_rate": 4.170865677381658e-07, "loss": 0.0171, "step": 216680 }, { "epoch": 4.410992366412214, "grad_norm": 0.006479834938706228, "learning_rate": 4.1680249672900407e-07, "loss": 0.0001, "step": 216690 }, { "epoch": 4.41119592875318, "grad_norm": 8.655032743175193, "learning_rate": 4.165185182834186e-07, "loss": 0.0267, "step": 216700 }, { "epoch": 4.411399491094148, "grad_norm": 0.0027308567797813864, "learning_rate": 4.162346324071453e-07, "loss": 0.0807, "step": 216710 }, { "epoch": 4.4116030534351145, "grad_norm": 0.04854662697609834, "learning_rate": 4.159508391059175e-07, "loss": 0.0017, "step": 216720 }, { "epoch": 4.411806615776081, "grad_norm": 0.026292195582203165, "learning_rate": 4.1566713838546713e-07, "loss": 0.0001, "step": 216730 }, { "epoch": 4.412010178117049, "grad_norm": 0.014105298512466457, "learning_rate": 4.1538353025152357e-07, "loss": 0.0001, "step": 216740 }, { "epoch": 4.412213740458015, "grad_norm": 5.737502285245072, "learning_rate": 4.151000147098133e-07, "loss": 0.0062, "step": 216750 }, { "epoch": 4.412417302798982, "grad_norm": 0.0006829040575512298, "learning_rate": 4.1481659176606456e-07, "loss": 0.0083, "step": 216760 }, { "epoch": 4.4126208651399494, "grad_norm": 0.01948432638793672, "learning_rate": 4.1453326142600116e-07, "loss": 0.0001, "step": 216770 }, { "epoch": 4.412824427480916, "grad_norm": 0.004650155506589302, "learning_rate": 4.142500236953423e-07, "loss": 0.0, "step": 216780 }, { "epoch": 4.413027989821883, "grad_norm": 0.0034279776108299543, "learning_rate": 4.139668785798123e-07, "loss": 0.0217, "step": 216790 }, { "epoch": 4.41323155216285, "grad_norm": 0.017634944128028948, "learning_rate": 4.1368382608512834e-07, "loss": 0.0537, "step": 216800 }, { "epoch": 4.413435114503817, "grad_norm": 0.04383749804817973, "learning_rate": 4.134008662170047e-07, "loss": 0.0001, "step": 216810 }, { "epoch": 4.4136386768447835, "grad_norm": 0.017368851185280785, "learning_rate": 4.131179989811601e-07, "loss": 0.0239, "step": 216820 }, { "epoch": 4.413842239185751, "grad_norm": 0.04042673210527472, "learning_rate": 4.1283522438330445e-07, "loss": 0.0344, "step": 216830 }, { "epoch": 4.414045801526718, "grad_norm": 0.00876412909148142, "learning_rate": 4.1255254242914933e-07, "loss": 0.0001, "step": 216840 }, { "epoch": 4.414249363867684, "grad_norm": 0.005130012148236145, "learning_rate": 4.122699531244051e-07, "loss": 0.0132, "step": 216850 }, { "epoch": 4.414452926208652, "grad_norm": 0.02515022538709721, "learning_rate": 4.119874564747778e-07, "loss": 0.0533, "step": 216860 }, { "epoch": 4.4146564885496185, "grad_norm": 0.020723142855350533, "learning_rate": 4.117050524859728e-07, "loss": 0.0004, "step": 216870 }, { "epoch": 4.414860050890585, "grad_norm": 0.006479162311014298, "learning_rate": 4.1142274116369394e-07, "loss": 0.001, "step": 216880 }, { "epoch": 4.415063613231553, "grad_norm": 0.053690445021386056, "learning_rate": 4.111405225136433e-07, "loss": 0.0001, "step": 216890 }, { "epoch": 4.415267175572519, "grad_norm": 0.00038836520286169554, "learning_rate": 4.108583965415197e-07, "loss": 0.0037, "step": 216900 }, { "epoch": 4.415470737913486, "grad_norm": 0.0025348029100709093, "learning_rate": 4.105763632530219e-07, "loss": 0.0005, "step": 216910 }, { "epoch": 4.4156743002544525, "grad_norm": 0.0120843132005702, "learning_rate": 4.102944226538458e-07, "loss": 0.0003, "step": 216920 }, { "epoch": 4.41587786259542, "grad_norm": 0.12929796315790335, "learning_rate": 4.100125747496847e-07, "loss": 0.0094, "step": 216930 }, { "epoch": 4.416081424936387, "grad_norm": 0.0016548567711232986, "learning_rate": 4.0973081954623186e-07, "loss": 0.0007, "step": 216940 }, { "epoch": 4.416284987277353, "grad_norm": 0.0009773491014874942, "learning_rate": 4.094491570491771e-07, "loss": 0.0222, "step": 216950 }, { "epoch": 4.416488549618321, "grad_norm": 0.017496145586710247, "learning_rate": 4.0916758726420926e-07, "loss": 0.0458, "step": 216960 }, { "epoch": 4.4166921119592875, "grad_norm": 0.013821453673113895, "learning_rate": 4.088861101970154e-07, "loss": 0.0095, "step": 216970 }, { "epoch": 4.416895674300254, "grad_norm": 0.0030935858907921872, "learning_rate": 4.086047258532805e-07, "loss": 0.0382, "step": 216980 }, { "epoch": 4.417099236641222, "grad_norm": 0.002905743607756287, "learning_rate": 4.0832343423868495e-07, "loss": 0.0458, "step": 216990 }, { "epoch": 4.417302798982188, "grad_norm": 0.023997191916059014, "learning_rate": 4.0804223535891253e-07, "loss": 0.0, "step": 217000 }, { "epoch": 4.417506361323155, "grad_norm": 0.01105509488942585, "learning_rate": 4.077611292196426e-07, "loss": 0.0001, "step": 217010 }, { "epoch": 4.417709923664122, "grad_norm": 0.013793630941279418, "learning_rate": 4.074801158265501e-07, "loss": 0.0004, "step": 217020 }, { "epoch": 4.417913486005089, "grad_norm": 0.04205850660960954, "learning_rate": 4.0719919518531216e-07, "loss": 0.0341, "step": 217030 }, { "epoch": 4.418117048346056, "grad_norm": 0.010700275642875883, "learning_rate": 4.0691836730160196e-07, "loss": 0.0076, "step": 217040 }, { "epoch": 4.418320610687023, "grad_norm": 0.0005696647501914554, "learning_rate": 4.0663763218109107e-07, "loss": 0.0108, "step": 217050 }, { "epoch": 4.41852417302799, "grad_norm": 0.007457346348733414, "learning_rate": 4.0635698982944947e-07, "loss": 0.0272, "step": 217060 }, { "epoch": 4.4187277353689565, "grad_norm": 0.029714612805699418, "learning_rate": 4.060764402523454e-07, "loss": 0.0079, "step": 217070 }, { "epoch": 4.418931297709924, "grad_norm": 0.09825892538155762, "learning_rate": 4.0579598345544367e-07, "loss": 0.0001, "step": 217080 }, { "epoch": 4.419134860050891, "grad_norm": 0.0032115209740349545, "learning_rate": 4.0551561944440985e-07, "loss": 0.0002, "step": 217090 }, { "epoch": 4.419338422391857, "grad_norm": 0.013068631723657654, "learning_rate": 4.0523534822490605e-07, "loss": 0.0517, "step": 217100 }, { "epoch": 4.419541984732825, "grad_norm": 0.0021941726064940766, "learning_rate": 4.049551698025922e-07, "loss": 0.0001, "step": 217110 }, { "epoch": 4.419745547073791, "grad_norm": 0.003122948773954518, "learning_rate": 4.0467508418312704e-07, "loss": 0.0002, "step": 217120 }, { "epoch": 4.419949109414758, "grad_norm": 0.00528956277213437, "learning_rate": 4.0439509137216725e-07, "loss": 0.0016, "step": 217130 }, { "epoch": 4.420152671755726, "grad_norm": 0.029408492454907505, "learning_rate": 4.0411519137536823e-07, "loss": 0.0002, "step": 217140 }, { "epoch": 4.420356234096692, "grad_norm": 0.00035699514833945593, "learning_rate": 4.038353841983811e-07, "loss": 0.0372, "step": 217150 }, { "epoch": 4.420559796437659, "grad_norm": 0.01055549033650683, "learning_rate": 4.03555669846859e-07, "loss": 0.0, "step": 217160 }, { "epoch": 4.420763358778626, "grad_norm": 0.009796150618751885, "learning_rate": 4.0327604832645085e-07, "loss": 0.0273, "step": 217170 }, { "epoch": 4.420966921119593, "grad_norm": 0.01506698943861732, "learning_rate": 4.0299651964280205e-07, "loss": 0.0001, "step": 217180 }, { "epoch": 4.42117048346056, "grad_norm": 6.917374609550141, "learning_rate": 4.0271708380156094e-07, "loss": 0.0303, "step": 217190 }, { "epoch": 4.421374045801526, "grad_norm": 0.010943217119836453, "learning_rate": 4.0243774080836907e-07, "loss": 0.0168, "step": 217200 }, { "epoch": 4.421577608142494, "grad_norm": 0.0026852901488758586, "learning_rate": 4.021584906688675e-07, "loss": 0.0, "step": 217210 }, { "epoch": 4.42178117048346, "grad_norm": 0.0019271460415156163, "learning_rate": 4.018793333886989e-07, "loss": 0.0087, "step": 217220 }, { "epoch": 4.421984732824427, "grad_norm": 0.001577374505210294, "learning_rate": 4.016002689734988e-07, "loss": 0.0001, "step": 217230 }, { "epoch": 4.422188295165395, "grad_norm": 0.004678885103544173, "learning_rate": 4.013212974289027e-07, "loss": 0.0346, "step": 217240 }, { "epoch": 4.422391857506361, "grad_norm": 11.229363155944519, "learning_rate": 4.0104241876054827e-07, "loss": 0.0253, "step": 217250 }, { "epoch": 4.422595419847328, "grad_norm": 6.017885963540789, "learning_rate": 4.007636329740644e-07, "loss": 0.0232, "step": 217260 }, { "epoch": 4.422798982188295, "grad_norm": 0.01458170469003917, "learning_rate": 4.004849400750832e-07, "loss": 0.0975, "step": 217270 }, { "epoch": 4.423002544529262, "grad_norm": 0.005448249943410525, "learning_rate": 4.002063400692324e-07, "loss": 0.0729, "step": 217280 }, { "epoch": 4.423206106870229, "grad_norm": 0.005862251274264347, "learning_rate": 3.999278329621392e-07, "loss": 0.0001, "step": 217290 }, { "epoch": 4.423409669211196, "grad_norm": 0.005674107161352304, "learning_rate": 3.99649418759428e-07, "loss": 0.019, "step": 217300 }, { "epoch": 4.423613231552163, "grad_norm": 0.002241612899840508, "learning_rate": 3.99371097466722e-07, "loss": 0.0007, "step": 217310 }, { "epoch": 4.423816793893129, "grad_norm": 0.012013269167933997, "learning_rate": 3.990928690896434e-07, "loss": 0.0125, "step": 217320 }, { "epoch": 4.424020356234097, "grad_norm": 0.00021649669525639, "learning_rate": 3.988147336338083e-07, "loss": 0.0009, "step": 217330 }, { "epoch": 4.424223918575064, "grad_norm": 0.01721112522709158, "learning_rate": 3.985366911048366e-07, "loss": 0.0001, "step": 217340 }, { "epoch": 4.42442748091603, "grad_norm": 0.005741564971777664, "learning_rate": 3.9825874150834445e-07, "loss": 0.0007, "step": 217350 }, { "epoch": 4.424631043256998, "grad_norm": 0.007123701851863781, "learning_rate": 3.9798088484994224e-07, "loss": 0.0229, "step": 217360 }, { "epoch": 4.424834605597964, "grad_norm": 0.01005638386998384, "learning_rate": 3.977031211352439e-07, "loss": 0.0019, "step": 217370 }, { "epoch": 4.425038167938931, "grad_norm": 7.337025727442498, "learning_rate": 3.974254503698599e-07, "loss": 0.0308, "step": 217380 }, { "epoch": 4.4252417302798985, "grad_norm": 0.012025137119665406, "learning_rate": 3.9714787255939526e-07, "loss": 0.0001, "step": 217390 }, { "epoch": 4.425445292620865, "grad_norm": 0.005629808468238588, "learning_rate": 3.968703877094593e-07, "loss": 0.0001, "step": 217400 }, { "epoch": 4.425648854961832, "grad_norm": 0.02275291749568022, "learning_rate": 3.9659299582565367e-07, "loss": 0.0002, "step": 217410 }, { "epoch": 4.425852417302799, "grad_norm": 0.00742628173746077, "learning_rate": 3.9631569691358173e-07, "loss": 0.0, "step": 217420 }, { "epoch": 4.426055979643766, "grad_norm": 0.06313248860185414, "learning_rate": 3.9603849097884395e-07, "loss": 0.0242, "step": 217430 }, { "epoch": 4.426259541984733, "grad_norm": 0.006153627124462038, "learning_rate": 3.9576137802703863e-07, "loss": 0.0044, "step": 217440 }, { "epoch": 4.4264631043257, "grad_norm": 0.008836501175846292, "learning_rate": 3.9548435806376297e-07, "loss": 0.0001, "step": 217450 }, { "epoch": 4.426666666666667, "grad_norm": 0.006035487449377607, "learning_rate": 3.952074310946108e-07, "loss": 0.0003, "step": 217460 }, { "epoch": 4.426870229007633, "grad_norm": 0.00017793035357066442, "learning_rate": 3.9493059712517543e-07, "loss": 0.0004, "step": 217470 }, { "epoch": 4.427073791348601, "grad_norm": 0.013430249958869588, "learning_rate": 3.9465385616104803e-07, "loss": 0.0002, "step": 217480 }, { "epoch": 4.4272773536895675, "grad_norm": 0.0028834027512067437, "learning_rate": 3.943772082078173e-07, "loss": 0.0001, "step": 217490 }, { "epoch": 4.427480916030534, "grad_norm": 0.0017325585197145517, "learning_rate": 3.941006532710712e-07, "loss": 0.0003, "step": 217500 }, { "epoch": 4.427684478371502, "grad_norm": 0.030684129185330304, "learning_rate": 3.9382419135639507e-07, "loss": 0.0002, "step": 217510 }, { "epoch": 4.427888040712468, "grad_norm": 8.015402177733424, "learning_rate": 3.9354782246937173e-07, "loss": 0.0085, "step": 217520 }, { "epoch": 4.428091603053435, "grad_norm": 0.0945358741765699, "learning_rate": 3.9327154661558286e-07, "loss": 0.0008, "step": 217530 }, { "epoch": 4.4282951653944025, "grad_norm": 0.07251660945860813, "learning_rate": 3.92995363800609e-07, "loss": 0.0002, "step": 217540 }, { "epoch": 4.428498727735369, "grad_norm": 0.01920842885539797, "learning_rate": 3.927192740300273e-07, "loss": 0.0001, "step": 217550 }, { "epoch": 4.428702290076336, "grad_norm": 0.012297505748373183, "learning_rate": 3.92443277309415e-07, "loss": 0.0231, "step": 217560 }, { "epoch": 4.428905852417302, "grad_norm": 0.003741401820718674, "learning_rate": 3.921673736443443e-07, "loss": 0.0129, "step": 217570 }, { "epoch": 4.42910941475827, "grad_norm": 0.26731476249099656, "learning_rate": 3.91891563040388e-07, "loss": 0.0003, "step": 217580 }, { "epoch": 4.4293129770992365, "grad_norm": 0.014115512833721014, "learning_rate": 3.916158455031177e-07, "loss": 0.0829, "step": 217590 }, { "epoch": 4.429516539440203, "grad_norm": 0.0029451597839083702, "learning_rate": 3.9134022103810067e-07, "loss": 0.0021, "step": 217600 }, { "epoch": 4.429720101781171, "grad_norm": 9.128164109702464, "learning_rate": 3.910646896509024e-07, "loss": 0.0034, "step": 217610 }, { "epoch": 4.429923664122137, "grad_norm": 0.029600048427397503, "learning_rate": 3.9078925134709124e-07, "loss": 0.0003, "step": 217620 }, { "epoch": 4.430127226463104, "grad_norm": 0.04893073997866622, "learning_rate": 3.905139061322266e-07, "loss": 0.0002, "step": 217630 }, { "epoch": 4.4303307888040715, "grad_norm": 0.009416290907235843, "learning_rate": 3.9023865401187076e-07, "loss": 0.0028, "step": 217640 }, { "epoch": 4.430534351145038, "grad_norm": 0.0023502629632095986, "learning_rate": 3.8996349499158203e-07, "loss": 0.0148, "step": 217650 }, { "epoch": 4.430737913486005, "grad_norm": 5.835781249240021, "learning_rate": 3.8968842907691875e-07, "loss": 0.0384, "step": 217660 }, { "epoch": 4.430941475826972, "grad_norm": 0.0072794148833521906, "learning_rate": 3.894134562734359e-07, "loss": 0.0244, "step": 217670 }, { "epoch": 4.431145038167939, "grad_norm": 5.755881837019233, "learning_rate": 3.8913857658668617e-07, "loss": 0.0051, "step": 217680 }, { "epoch": 4.4313486005089056, "grad_norm": 0.0074354538235963386, "learning_rate": 3.8886379002222306e-07, "loss": 0.0001, "step": 217690 }, { "epoch": 4.431552162849873, "grad_norm": 0.019952195449862157, "learning_rate": 3.8858909658559265e-07, "loss": 0.0003, "step": 217700 }, { "epoch": 4.43175572519084, "grad_norm": 0.019185073261091777, "learning_rate": 3.8831449628234596e-07, "loss": 0.0489, "step": 217710 }, { "epoch": 4.431959287531806, "grad_norm": 0.0016705741539217674, "learning_rate": 3.880399891180281e-07, "loss": 0.0002, "step": 217720 }, { "epoch": 4.432162849872774, "grad_norm": 0.007180624202835208, "learning_rate": 3.877655750981818e-07, "loss": 0.0001, "step": 217730 }, { "epoch": 4.4323664122137405, "grad_norm": 0.0004283350920261094, "learning_rate": 3.87491254228351e-07, "loss": 0.0001, "step": 217740 }, { "epoch": 4.432569974554707, "grad_norm": 0.007394114419794391, "learning_rate": 3.872170265140757e-07, "loss": 0.0001, "step": 217750 }, { "epoch": 4.432773536895675, "grad_norm": 0.008473548994485164, "learning_rate": 3.869428919608925e-07, "loss": 0.0001, "step": 217760 }, { "epoch": 4.432977099236641, "grad_norm": 0.06946539192219105, "learning_rate": 3.8666885057434045e-07, "loss": 0.0001, "step": 217770 }, { "epoch": 4.433180661577608, "grad_norm": 0.005519313662104649, "learning_rate": 3.8639490235995226e-07, "loss": 0.0002, "step": 217780 }, { "epoch": 4.4333842239185755, "grad_norm": 0.0021106734679783835, "learning_rate": 3.861210473232602e-07, "loss": 0.0004, "step": 217790 }, { "epoch": 4.433587786259542, "grad_norm": 0.0023951972055715554, "learning_rate": 3.858472854697981e-07, "loss": 0.0001, "step": 217800 }, { "epoch": 4.433791348600509, "grad_norm": 0.008151163607013895, "learning_rate": 3.855736168050922e-07, "loss": 0.0001, "step": 217810 }, { "epoch": 4.433994910941476, "grad_norm": 0.0017783181151974038, "learning_rate": 3.853000413346697e-07, "loss": 0.0044, "step": 217820 }, { "epoch": 4.434198473282443, "grad_norm": 0.003595391938188315, "learning_rate": 3.850265590640584e-07, "loss": 0.0229, "step": 217830 }, { "epoch": 4.4344020356234095, "grad_norm": 0.002181448490094317, "learning_rate": 3.8475316999877886e-07, "loss": 0.0337, "step": 217840 }, { "epoch": 4.434605597964376, "grad_norm": 0.0002622259779061315, "learning_rate": 3.844798741443534e-07, "loss": 0.1382, "step": 217850 }, { "epoch": 4.434809160305344, "grad_norm": 0.008011538672977464, "learning_rate": 3.842066715063014e-07, "loss": 0.0392, "step": 217860 }, { "epoch": 4.43501272264631, "grad_norm": 0.015422773869615413, "learning_rate": 3.8393356209014074e-07, "loss": 0.0253, "step": 217870 }, { "epoch": 4.435216284987277, "grad_norm": 0.6798332278297162, "learning_rate": 3.836605459013876e-07, "loss": 0.0006, "step": 217880 }, { "epoch": 4.4354198473282445, "grad_norm": 0.00844146026033665, "learning_rate": 3.833876229455552e-07, "loss": 0.0001, "step": 217890 }, { "epoch": 4.435623409669211, "grad_norm": 0.26590429884837896, "learning_rate": 3.831147932281559e-07, "loss": 0.0002, "step": 217900 }, { "epoch": 4.435826972010178, "grad_norm": 0.002713982991881315, "learning_rate": 3.828420567547003e-07, "loss": 0.0237, "step": 217910 }, { "epoch": 4.436030534351145, "grad_norm": 0.012203805009029534, "learning_rate": 3.8256941353069563e-07, "loss": 0.0393, "step": 217920 }, { "epoch": 4.436234096692112, "grad_norm": 0.0009185809785059609, "learning_rate": 3.822968635616503e-07, "loss": 0.0004, "step": 217930 }, { "epoch": 4.4364376590330785, "grad_norm": 0.004814968906101959, "learning_rate": 3.8202440685306485e-07, "loss": 0.0085, "step": 217940 }, { "epoch": 4.436641221374046, "grad_norm": 29.861560746162294, "learning_rate": 3.81752043410446e-07, "loss": 0.0192, "step": 217950 }, { "epoch": 4.436844783715013, "grad_norm": 0.003362349958652191, "learning_rate": 3.814797732392933e-07, "loss": 0.0216, "step": 217960 }, { "epoch": 4.437048346055979, "grad_norm": 0.10921693607225301, "learning_rate": 3.8120759634510507e-07, "loss": 0.0001, "step": 217970 }, { "epoch": 4.437251908396947, "grad_norm": 0.006335555593072168, "learning_rate": 3.809355127333769e-07, "loss": 0.0415, "step": 217980 }, { "epoch": 4.4374554707379135, "grad_norm": 0.005617414580395498, "learning_rate": 3.806635224096072e-07, "loss": 0.0619, "step": 217990 }, { "epoch": 4.43765903307888, "grad_norm": 0.00783003876767094, "learning_rate": 3.803916253792861e-07, "loss": 0.0004, "step": 218000 }, { "epoch": 4.437862595419848, "grad_norm": 0.004291789989828748, "learning_rate": 3.8011982164790686e-07, "loss": 0.0002, "step": 218010 }, { "epoch": 4.438066157760814, "grad_norm": 0.0009285489949807837, "learning_rate": 3.798481112209579e-07, "loss": 0.0146, "step": 218020 }, { "epoch": 4.438269720101781, "grad_norm": 0.004619264151573895, "learning_rate": 3.795764941039265e-07, "loss": 0.0276, "step": 218030 }, { "epoch": 4.438473282442748, "grad_norm": 0.02205209879158762, "learning_rate": 3.793049703022994e-07, "loss": 0.0001, "step": 218040 }, { "epoch": 4.438676844783715, "grad_norm": 0.029237469248111696, "learning_rate": 3.7903353982155997e-07, "loss": 0.0194, "step": 218050 }, { "epoch": 4.438880407124682, "grad_norm": 7.265225142518184, "learning_rate": 3.7876220266719e-07, "loss": 0.015, "step": 218060 }, { "epoch": 4.439083969465649, "grad_norm": 0.05516765106424395, "learning_rate": 3.784909588446689e-07, "loss": 0.0041, "step": 218070 }, { "epoch": 4.439287531806616, "grad_norm": 0.0026222687986187307, "learning_rate": 3.782198083594757e-07, "loss": 0.0007, "step": 218080 }, { "epoch": 4.4394910941475825, "grad_norm": 0.037544879306491225, "learning_rate": 3.7794875121708654e-07, "loss": 0.0345, "step": 218090 }, { "epoch": 4.43969465648855, "grad_norm": 0.003947248630591074, "learning_rate": 3.776777874229748e-07, "loss": 0.0042, "step": 218100 }, { "epoch": 4.439898218829517, "grad_norm": 0.008627167128801419, "learning_rate": 3.7740691698261447e-07, "loss": 0.0001, "step": 218110 }, { "epoch": 4.440101781170483, "grad_norm": 0.005197326869698107, "learning_rate": 3.771361399014756e-07, "loss": 0.0005, "step": 218120 }, { "epoch": 4.440305343511451, "grad_norm": 0.0030448180552714115, "learning_rate": 3.768654561850249e-07, "loss": 0.0035, "step": 218130 }, { "epoch": 4.440508905852417, "grad_norm": 13.80469130350618, "learning_rate": 3.765948658387325e-07, "loss": 0.0173, "step": 218140 }, { "epoch": 4.440712468193384, "grad_norm": 0.007829225708749034, "learning_rate": 3.763243688680612e-07, "loss": 0.0001, "step": 218150 }, { "epoch": 4.440916030534352, "grad_norm": 0.005445988508880591, "learning_rate": 3.760539652784734e-07, "loss": 0.0044, "step": 218160 }, { "epoch": 4.441119592875318, "grad_norm": 0.004275394112457028, "learning_rate": 3.7578365507543344e-07, "loss": 0.036, "step": 218170 }, { "epoch": 4.441323155216285, "grad_norm": 0.009640030967764097, "learning_rate": 3.7551343826439714e-07, "loss": 0.0001, "step": 218180 }, { "epoch": 4.441526717557252, "grad_norm": 0.022061181932528335, "learning_rate": 3.7524331485082275e-07, "loss": 0.0001, "step": 218190 }, { "epoch": 4.441730279898219, "grad_norm": 0.0968578198618223, "learning_rate": 3.749732848401677e-07, "loss": 0.0001, "step": 218200 }, { "epoch": 4.441933842239186, "grad_norm": 0.01914504743619462, "learning_rate": 3.7470334823788365e-07, "loss": 0.0003, "step": 218210 }, { "epoch": 4.442137404580152, "grad_norm": 0.9884538846654572, "learning_rate": 3.744335050494224e-07, "loss": 0.0362, "step": 218220 }, { "epoch": 4.44234096692112, "grad_norm": 0.04083681513940647, "learning_rate": 3.7416375528023463e-07, "loss": 0.0245, "step": 218230 }, { "epoch": 4.442544529262086, "grad_norm": 0.001343168957107217, "learning_rate": 3.7389409893576755e-07, "loss": 0.0001, "step": 218240 }, { "epoch": 4.442748091603053, "grad_norm": 0.004415868533304852, "learning_rate": 3.7362453602146797e-07, "loss": 0.0001, "step": 218250 }, { "epoch": 4.442951653944021, "grad_norm": 0.026015973377478366, "learning_rate": 3.733550665427793e-07, "loss": 0.0156, "step": 218260 }, { "epoch": 4.443155216284987, "grad_norm": 0.001945146838448899, "learning_rate": 3.7308569050514387e-07, "loss": 0.0001, "step": 218270 }, { "epoch": 4.443358778625954, "grad_norm": 0.00504573385188527, "learning_rate": 3.728164079140023e-07, "loss": 0.0009, "step": 218280 }, { "epoch": 4.443562340966921, "grad_norm": 5.113388963384729, "learning_rate": 3.725472187747936e-07, "loss": 0.0148, "step": 218290 }, { "epoch": 4.443765903307888, "grad_norm": 0.018854795026527024, "learning_rate": 3.722781230929545e-07, "loss": 0.0171, "step": 218300 }, { "epoch": 4.443969465648855, "grad_norm": 0.00229657270123042, "learning_rate": 3.720091208739179e-07, "loss": 0.0145, "step": 218310 }, { "epoch": 4.444173027989822, "grad_norm": 0.004643691309679036, "learning_rate": 3.717402121231184e-07, "loss": 0.019, "step": 218320 }, { "epoch": 4.444376590330789, "grad_norm": 0.45702928140307064, "learning_rate": 3.714713968459871e-07, "loss": 0.0013, "step": 218330 }, { "epoch": 4.444580152671755, "grad_norm": 0.029895302040700213, "learning_rate": 3.712026750479514e-07, "loss": 0.0316, "step": 218340 }, { "epoch": 4.444783715012723, "grad_norm": 0.0007686170678100003, "learning_rate": 3.709340467344397e-07, "loss": 0.0565, "step": 218350 }, { "epoch": 4.44498727735369, "grad_norm": 0.003266975980405936, "learning_rate": 3.706655119108782e-07, "loss": 0.0004, "step": 218360 }, { "epoch": 4.445190839694656, "grad_norm": 0.0037228166538614093, "learning_rate": 3.7039707058268756e-07, "loss": 0.0, "step": 218370 }, { "epoch": 4.445394402035624, "grad_norm": 0.0017232165507879023, "learning_rate": 3.701287227552924e-07, "loss": 0.0004, "step": 218380 }, { "epoch": 4.44559796437659, "grad_norm": 0.0005778469839096606, "learning_rate": 3.698604684341106e-07, "loss": 0.0316, "step": 218390 }, { "epoch": 4.445801526717557, "grad_norm": 0.002602033595603312, "learning_rate": 3.6959230762456e-07, "loss": 0.0276, "step": 218400 }, { "epoch": 4.4460050890585245, "grad_norm": 0.1052363381894023, "learning_rate": 3.693242403320563e-07, "loss": 0.042, "step": 218410 }, { "epoch": 4.446208651399491, "grad_norm": 0.5054241999232236, "learning_rate": 3.690562665620145e-07, "loss": 0.0002, "step": 218420 }, { "epoch": 4.446412213740458, "grad_norm": 0.0003737931081538007, "learning_rate": 3.687883863198455e-07, "loss": 0.0002, "step": 218430 }, { "epoch": 4.446615776081425, "grad_norm": 0.016193956669240908, "learning_rate": 3.6852059961096033e-07, "loss": 0.028, "step": 218440 }, { "epoch": 4.446819338422392, "grad_norm": 0.00018949253280635514, "learning_rate": 3.682529064407664e-07, "loss": 0.0593, "step": 218450 }, { "epoch": 4.447022900763359, "grad_norm": 0.0037965154936709282, "learning_rate": 3.67985306814671e-07, "loss": 0.0, "step": 218460 }, { "epoch": 4.447226463104325, "grad_norm": 0.32479902714513154, "learning_rate": 3.677178007380783e-07, "loss": 0.0002, "step": 218470 }, { "epoch": 4.447430025445293, "grad_norm": 0.3230222656207836, "learning_rate": 3.6745038821639103e-07, "loss": 0.0852, "step": 218480 }, { "epoch": 4.447633587786259, "grad_norm": 0.0006851773478983831, "learning_rate": 3.671830692550099e-07, "loss": 0.0002, "step": 218490 }, { "epoch": 4.447837150127226, "grad_norm": 0.24766906759414498, "learning_rate": 3.66915843859334e-07, "loss": 0.0158, "step": 218500 }, { "epoch": 4.4480407124681935, "grad_norm": 0.020128289885886334, "learning_rate": 3.6664871203476007e-07, "loss": 0.0001, "step": 218510 }, { "epoch": 4.44824427480916, "grad_norm": 0.002592935361155246, "learning_rate": 3.663816737866821e-07, "loss": 0.0, "step": 218520 }, { "epoch": 4.448447837150127, "grad_norm": 0.02787911389684781, "learning_rate": 3.661147291204947e-07, "loss": 0.032, "step": 218530 }, { "epoch": 4.448651399491094, "grad_norm": 8.193654169675531, "learning_rate": 3.6584787804158973e-07, "loss": 0.0406, "step": 218540 }, { "epoch": 4.448854961832061, "grad_norm": 0.0015508939423168697, "learning_rate": 3.65581120555355e-07, "loss": 0.0021, "step": 218550 }, { "epoch": 4.449058524173028, "grad_norm": 0.008878673107175324, "learning_rate": 3.6531445666717736e-07, "loss": 0.0001, "step": 218560 }, { "epoch": 4.449262086513995, "grad_norm": 0.002422570373995183, "learning_rate": 3.650478863824458e-07, "loss": 0.0015, "step": 218570 }, { "epoch": 4.449465648854962, "grad_norm": 0.16347605541946458, "learning_rate": 3.647814097065405e-07, "loss": 0.0073, "step": 218580 }, { "epoch": 4.449669211195928, "grad_norm": 3.0632357993308723, "learning_rate": 3.645150266448455e-07, "loss": 0.0488, "step": 218590 }, { "epoch": 4.449872773536896, "grad_norm": 0.010380567563902415, "learning_rate": 3.642487372027392e-07, "loss": 0.0387, "step": 218600 }, { "epoch": 4.4500763358778626, "grad_norm": 0.00635577074583312, "learning_rate": 3.639825413856013e-07, "loss": 0.0331, "step": 218610 }, { "epoch": 4.450279898218829, "grad_norm": 0.01846204868079317, "learning_rate": 3.637164391988068e-07, "loss": 0.0004, "step": 218620 }, { "epoch": 4.450483460559797, "grad_norm": 0.035382929984326135, "learning_rate": 3.6345043064772987e-07, "loss": 0.0001, "step": 218630 }, { "epoch": 4.450687022900763, "grad_norm": 0.003178183202876632, "learning_rate": 3.6318451573774396e-07, "loss": 0.0001, "step": 218640 }, { "epoch": 4.45089058524173, "grad_norm": 0.01279026779371879, "learning_rate": 3.629186944742186e-07, "loss": 0.0002, "step": 218650 }, { "epoch": 4.4510941475826975, "grad_norm": 0.0031146191080719796, "learning_rate": 3.6265296686252284e-07, "loss": 0.0227, "step": 218660 }, { "epoch": 4.451297709923664, "grad_norm": 0.03240493344244532, "learning_rate": 3.623873329080235e-07, "loss": 0.0002, "step": 218670 }, { "epoch": 4.451501272264631, "grad_norm": 0.002070328604464797, "learning_rate": 3.621217926160842e-07, "loss": 0.0001, "step": 218680 }, { "epoch": 4.451704834605598, "grad_norm": 0.0014561377971242244, "learning_rate": 3.618563459920699e-07, "loss": 0.0002, "step": 218690 }, { "epoch": 4.451908396946565, "grad_norm": 0.00170213947601136, "learning_rate": 3.615909930413408e-07, "loss": 0.0, "step": 218700 }, { "epoch": 4.452111959287532, "grad_norm": 0.0031903469340332554, "learning_rate": 3.613257337692544e-07, "loss": 0.0179, "step": 218710 }, { "epoch": 4.452315521628499, "grad_norm": 0.003774582339291803, "learning_rate": 3.610605681811702e-07, "loss": 0.0375, "step": 218720 }, { "epoch": 4.452519083969466, "grad_norm": 0.0014147569533421672, "learning_rate": 3.6079549628244337e-07, "loss": 0.0002, "step": 218730 }, { "epoch": 4.452722646310432, "grad_norm": 0.008804476292093697, "learning_rate": 3.6053051807842523e-07, "loss": 0.0005, "step": 218740 }, { "epoch": 4.4529262086514, "grad_norm": 0.007665135589923124, "learning_rate": 3.6026563357447084e-07, "loss": 0.0003, "step": 218750 }, { "epoch": 4.4531297709923665, "grad_norm": 0.004609055957271924, "learning_rate": 3.6000084277592663e-07, "loss": 0.0002, "step": 218760 }, { "epoch": 4.453333333333333, "grad_norm": 0.051037270678789964, "learning_rate": 3.5973614568814096e-07, "loss": 0.0003, "step": 218770 }, { "epoch": 4.453536895674301, "grad_norm": 0.01944993167056685, "learning_rate": 3.594715423164624e-07, "loss": 0.0001, "step": 218780 }, { "epoch": 4.453740458015267, "grad_norm": 0.22989223775197562, "learning_rate": 3.592070326662317e-07, "loss": 0.0131, "step": 218790 }, { "epoch": 4.453944020356234, "grad_norm": 0.0016100971824361255, "learning_rate": 3.589426167427923e-07, "loss": 0.1063, "step": 218800 }, { "epoch": 4.4541475826972015, "grad_norm": 0.18189969802473607, "learning_rate": 3.586782945514849e-07, "loss": 0.0182, "step": 218810 }, { "epoch": 4.454351145038168, "grad_norm": 0.007952373546535415, "learning_rate": 3.5841406609764704e-07, "loss": 0.0007, "step": 218820 }, { "epoch": 4.454554707379135, "grad_norm": 0.10943117539410328, "learning_rate": 3.5814993138661546e-07, "loss": 0.0001, "step": 218830 }, { "epoch": 4.454758269720102, "grad_norm": 0.04308689804148507, "learning_rate": 3.578858904237242e-07, "loss": 0.0146, "step": 218840 }, { "epoch": 4.454961832061069, "grad_norm": 0.026684938939554193, "learning_rate": 3.5762194321430686e-07, "loss": 0.0001, "step": 218850 }, { "epoch": 4.4551653944020355, "grad_norm": 0.00311427547596222, "learning_rate": 3.5735808976369356e-07, "loss": 0.0002, "step": 218860 }, { "epoch": 4.455368956743002, "grad_norm": 0.0034841706364945506, "learning_rate": 3.5709433007721337e-07, "loss": 0.0, "step": 218870 }, { "epoch": 4.45557251908397, "grad_norm": 0.03888280130390576, "learning_rate": 3.568306641601943e-07, "loss": 0.0121, "step": 218880 }, { "epoch": 4.455776081424936, "grad_norm": 0.001523308405343677, "learning_rate": 3.565670920179581e-07, "loss": 0.0004, "step": 218890 }, { "epoch": 4.455979643765903, "grad_norm": 0.00725754764973931, "learning_rate": 3.563036136558318e-07, "loss": 0.0003, "step": 218900 }, { "epoch": 4.4561832061068705, "grad_norm": 0.0006416466130206801, "learning_rate": 3.5604022907913536e-07, "loss": 0.0001, "step": 218910 }, { "epoch": 4.456386768447837, "grad_norm": 0.03534874121219468, "learning_rate": 3.5577693829318636e-07, "loss": 0.0046, "step": 218920 }, { "epoch": 4.456590330788804, "grad_norm": 0.0009589347425618865, "learning_rate": 3.555137413033044e-07, "loss": 0.0054, "step": 218930 }, { "epoch": 4.456793893129771, "grad_norm": 0.427324046993951, "learning_rate": 3.552506381148052e-07, "loss": 0.0008, "step": 218940 }, { "epoch": 4.456997455470738, "grad_norm": 0.0024466043998585415, "learning_rate": 3.549876287330012e-07, "loss": 0.0067, "step": 218950 }, { "epoch": 4.4572010178117045, "grad_norm": 0.03968254162634207, "learning_rate": 3.5472471316320476e-07, "loss": 0.0024, "step": 218960 }, { "epoch": 4.457404580152672, "grad_norm": 0.018490515191621884, "learning_rate": 3.5446189141072616e-07, "loss": 0.0266, "step": 218970 }, { "epoch": 4.457608142493639, "grad_norm": 0.012702560398035585, "learning_rate": 3.541991634808728e-07, "loss": 0.0001, "step": 218980 }, { "epoch": 4.457811704834605, "grad_norm": 0.011711910122608016, "learning_rate": 3.5393652937895096e-07, "loss": 0.0001, "step": 218990 }, { "epoch": 4.458015267175573, "grad_norm": 0.0008928374972855454, "learning_rate": 3.536739891102647e-07, "loss": 0.003, "step": 219000 }, { "epoch": 4.4582188295165395, "grad_norm": 0.0071432930370863975, "learning_rate": 3.5341154268011713e-07, "loss": 0.0001, "step": 219010 }, { "epoch": 4.458422391857506, "grad_norm": 0.004995974298065291, "learning_rate": 3.5314919009380777e-07, "loss": 0.0391, "step": 219020 }, { "epoch": 4.458625954198474, "grad_norm": 0.0037983456198623375, "learning_rate": 3.528869313566363e-07, "loss": 0.0429, "step": 219030 }, { "epoch": 4.45882951653944, "grad_norm": 0.0019227741160883637, "learning_rate": 3.5262476647389796e-07, "loss": 0.0001, "step": 219040 }, { "epoch": 4.459033078880407, "grad_norm": 0.14905638979666144, "learning_rate": 3.523626954508891e-07, "loss": 0.0184, "step": 219050 }, { "epoch": 4.459236641221374, "grad_norm": 0.00015418956857900896, "learning_rate": 3.521007182929009e-07, "loss": 0.0001, "step": 219060 }, { "epoch": 4.459440203562341, "grad_norm": 0.006213841557135607, "learning_rate": 3.518388350052254e-07, "loss": 0.0397, "step": 219070 }, { "epoch": 4.459643765903308, "grad_norm": 8.809868255038337, "learning_rate": 3.5157704559315166e-07, "loss": 0.0256, "step": 219080 }, { "epoch": 4.459847328244275, "grad_norm": 0.012838252797185378, "learning_rate": 3.5131535006196704e-07, "loss": 0.0002, "step": 219090 }, { "epoch": 4.460050890585242, "grad_norm": 26.7473142559857, "learning_rate": 3.5105374841695573e-07, "loss": 0.0062, "step": 219100 }, { "epoch": 4.4602544529262085, "grad_norm": 0.0017050844981941935, "learning_rate": 3.5079224066340124e-07, "loss": 0.0002, "step": 219110 }, { "epoch": 4.460458015267175, "grad_norm": 0.002743735437647014, "learning_rate": 3.505308268065866e-07, "loss": 0.0271, "step": 219120 }, { "epoch": 4.460661577608143, "grad_norm": 0.00576091319381752, "learning_rate": 3.502695068517903e-07, "loss": 0.0004, "step": 219130 }, { "epoch": 4.460865139949109, "grad_norm": 0.0015104156598893656, "learning_rate": 3.500082808042887e-07, "loss": 0.0223, "step": 219140 }, { "epoch": 4.461068702290076, "grad_norm": 0.004988495446286682, "learning_rate": 3.49747148669361e-07, "loss": 0.0, "step": 219150 }, { "epoch": 4.461272264631043, "grad_norm": 0.035999711490874976, "learning_rate": 3.4948611045227786e-07, "loss": 0.03, "step": 219160 }, { "epoch": 4.46147582697201, "grad_norm": 0.14888056450032527, "learning_rate": 3.4922516615831294e-07, "loss": 0.0003, "step": 219170 }, { "epoch": 4.461679389312977, "grad_norm": 0.002885195193131087, "learning_rate": 3.489643157927358e-07, "loss": 0.0126, "step": 219180 }, { "epoch": 4.461882951653944, "grad_norm": 0.06639868062655332, "learning_rate": 3.487035593608151e-07, "loss": 0.038, "step": 219190 }, { "epoch": 4.462086513994911, "grad_norm": 0.09251349705443085, "learning_rate": 3.4844289686781663e-07, "loss": 0.0003, "step": 219200 }, { "epoch": 4.4622900763358775, "grad_norm": 0.2267014719471353, "learning_rate": 3.481823283190044e-07, "loss": 0.0003, "step": 219210 }, { "epoch": 4.462493638676845, "grad_norm": 0.01888053917070388, "learning_rate": 3.4792185371964216e-07, "loss": 0.0544, "step": 219220 }, { "epoch": 4.462697201017812, "grad_norm": 0.00511856379423335, "learning_rate": 3.4766147307498997e-07, "loss": 0.0079, "step": 219230 }, { "epoch": 4.462900763358778, "grad_norm": 0.021634997946681982, "learning_rate": 3.474011863903065e-07, "loss": 0.0164, "step": 219240 }, { "epoch": 4.463104325699746, "grad_norm": 0.007638329113893549, "learning_rate": 3.4714099367084917e-07, "loss": 0.0, "step": 219250 }, { "epoch": 4.463307888040712, "grad_norm": 0.00627310739550015, "learning_rate": 3.4688089492187107e-07, "loss": 0.0286, "step": 219260 }, { "epoch": 4.463511450381679, "grad_norm": 0.043004109737100026, "learning_rate": 3.4662089014862677e-07, "loss": 0.0005, "step": 219270 }, { "epoch": 4.463715012722647, "grad_norm": 0.0005698680603665162, "learning_rate": 3.4636097935636824e-07, "loss": 0.0392, "step": 219280 }, { "epoch": 4.463918575063613, "grad_norm": 0.003643171314985387, "learning_rate": 3.4610116255034186e-07, "loss": 0.0004, "step": 219290 }, { "epoch": 4.46412213740458, "grad_norm": 0.002455770853424696, "learning_rate": 3.458414397357973e-07, "loss": 0.0002, "step": 219300 }, { "epoch": 4.464325699745547, "grad_norm": 1.042631611738539, "learning_rate": 3.455818109179804e-07, "loss": 0.1413, "step": 219310 }, { "epoch": 4.464529262086514, "grad_norm": 0.005818303181496998, "learning_rate": 3.4532227610213245e-07, "loss": 0.0147, "step": 219320 }, { "epoch": 4.464732824427481, "grad_norm": 0.00041960701128004337, "learning_rate": 3.4506283529349703e-07, "loss": 0.0114, "step": 219330 }, { "epoch": 4.464936386768448, "grad_norm": 0.00889492389681626, "learning_rate": 3.448034884973134e-07, "loss": 0.0001, "step": 219340 }, { "epoch": 4.465139949109415, "grad_norm": 0.0007850336364396107, "learning_rate": 3.445442357188178e-07, "loss": 0.0004, "step": 219350 }, { "epoch": 4.465343511450381, "grad_norm": 1.4911943485052574, "learning_rate": 3.442850769632489e-07, "loss": 0.0009, "step": 219360 }, { "epoch": 4.465547073791349, "grad_norm": 0.08406168354755392, "learning_rate": 3.440260122358391e-07, "loss": 0.0001, "step": 219370 }, { "epoch": 4.465750636132316, "grad_norm": 0.028883330063055556, "learning_rate": 3.437670415418204e-07, "loss": 0.0087, "step": 219380 }, { "epoch": 4.465954198473282, "grad_norm": 0.0015678117394773952, "learning_rate": 3.4350816488642357e-07, "loss": 0.0013, "step": 219390 }, { "epoch": 4.46615776081425, "grad_norm": 0.003170012699761986, "learning_rate": 3.432493822748767e-07, "loss": 0.0001, "step": 219400 }, { "epoch": 4.466361323155216, "grad_norm": 0.00036379995774354446, "learning_rate": 3.429906937124067e-07, "loss": 0.0001, "step": 219410 }, { "epoch": 4.466564885496183, "grad_norm": 1.6880254955518388, "learning_rate": 3.427320992042377e-07, "loss": 0.0007, "step": 219420 }, { "epoch": 4.4667684478371505, "grad_norm": 4.7606998161790226e-05, "learning_rate": 3.424735987555927e-07, "loss": 0.0113, "step": 219430 }, { "epoch": 4.466972010178117, "grad_norm": 0.009532790125275845, "learning_rate": 3.422151923716921e-07, "loss": 0.0001, "step": 219440 }, { "epoch": 4.467175572519084, "grad_norm": 0.0008140823837299795, "learning_rate": 3.41956880057755e-07, "loss": 0.0715, "step": 219450 }, { "epoch": 4.467379134860051, "grad_norm": 0.009702956416012806, "learning_rate": 3.416986618189988e-07, "loss": 0.0001, "step": 219460 }, { "epoch": 4.467582697201018, "grad_norm": 0.004139155642784954, "learning_rate": 3.414405376606367e-07, "loss": 0.0002, "step": 219470 }, { "epoch": 4.467786259541985, "grad_norm": 0.01210849001876428, "learning_rate": 3.411825075878833e-07, "loss": 0.0179, "step": 219480 }, { "epoch": 4.467989821882951, "grad_norm": 18.178564824723292, "learning_rate": 3.4092457160595114e-07, "loss": 0.0302, "step": 219490 }, { "epoch": 4.468193384223919, "grad_norm": 0.010865798140352123, "learning_rate": 3.406667297200461e-07, "loss": 0.0001, "step": 219500 }, { "epoch": 4.468396946564885, "grad_norm": 0.0013606751163731485, "learning_rate": 3.404089819353784e-07, "loss": 0.048, "step": 219510 }, { "epoch": 4.468600508905852, "grad_norm": 0.007479202398436428, "learning_rate": 3.401513282571539e-07, "loss": 0.005, "step": 219520 }, { "epoch": 4.4688040712468196, "grad_norm": 0.0022178237585326597, "learning_rate": 3.39893768690574e-07, "loss": 0.0002, "step": 219530 }, { "epoch": 4.469007633587786, "grad_norm": 0.01868169101109018, "learning_rate": 3.396363032408417e-07, "loss": 0.0068, "step": 219540 }, { "epoch": 4.469211195928753, "grad_norm": 0.0011332637158609129, "learning_rate": 3.3937893191315684e-07, "loss": 0.0001, "step": 219550 }, { "epoch": 4.46941475826972, "grad_norm": 0.01240758654297411, "learning_rate": 3.391216547127174e-07, "loss": 0.0232, "step": 219560 }, { "epoch": 4.469618320610687, "grad_norm": 10.068383271843123, "learning_rate": 3.3886447164471926e-07, "loss": 0.0075, "step": 219570 }, { "epoch": 4.469821882951654, "grad_norm": 0.031693042880691795, "learning_rate": 3.386073827143566e-07, "loss": 0.0001, "step": 219580 }, { "epoch": 4.470025445292621, "grad_norm": 0.0008098802649133904, "learning_rate": 3.3835038792682185e-07, "loss": 0.0151, "step": 219590 }, { "epoch": 4.470229007633588, "grad_norm": 0.008806873196025211, "learning_rate": 3.3809348728730487e-07, "loss": 0.0183, "step": 219600 }, { "epoch": 4.470432569974554, "grad_norm": 0.005663051379802871, "learning_rate": 3.3783668080099473e-07, "loss": 0.0606, "step": 219610 }, { "epoch": 4.470636132315522, "grad_norm": 0.0010425066838359728, "learning_rate": 3.375799684730774e-07, "loss": 0.0001, "step": 219620 }, { "epoch": 4.470839694656489, "grad_norm": 0.005749470921378787, "learning_rate": 3.373233503087375e-07, "loss": 0.0114, "step": 219630 }, { "epoch": 4.471043256997455, "grad_norm": 0.006619092262913189, "learning_rate": 3.3706682631315826e-07, "loss": 0.0001, "step": 219640 }, { "epoch": 4.471246819338423, "grad_norm": 0.005148977139372103, "learning_rate": 3.368103964915215e-07, "loss": 0.0062, "step": 219650 }, { "epoch": 4.471450381679389, "grad_norm": 0.0023006186560129652, "learning_rate": 3.3655406084900255e-07, "loss": 0.0001, "step": 219660 }, { "epoch": 4.471653944020356, "grad_norm": 0.0009373985330168937, "learning_rate": 3.3629781939078235e-07, "loss": 0.0003, "step": 219670 }, { "epoch": 4.4718575063613235, "grad_norm": 0.00018292710615189545, "learning_rate": 3.3604167212203496e-07, "loss": 0.0114, "step": 219680 }, { "epoch": 4.47206106870229, "grad_norm": 1.0146205946348388, "learning_rate": 3.357856190479314e-07, "loss": 0.0006, "step": 219690 }, { "epoch": 4.472264631043257, "grad_norm": 0.031499498174208565, "learning_rate": 3.355296601736463e-07, "loss": 0.0001, "step": 219700 }, { "epoch": 4.472468193384224, "grad_norm": 5.2167974113316395, "learning_rate": 3.3527379550434725e-07, "loss": 0.0217, "step": 219710 }, { "epoch": 4.472671755725191, "grad_norm": 4.054950930626268, "learning_rate": 3.3501802504520067e-07, "loss": 0.0026, "step": 219720 }, { "epoch": 4.472875318066158, "grad_norm": 0.08174450345304052, "learning_rate": 3.3476234880137513e-07, "loss": 0.0002, "step": 219730 }, { "epoch": 4.473078880407125, "grad_norm": 0.0005712264719600181, "learning_rate": 3.345067667780322e-07, "loss": 0.0587, "step": 219740 }, { "epoch": 4.473282442748092, "grad_norm": 0.003975848438357503, "learning_rate": 3.3425127898033316e-07, "loss": 0.0002, "step": 219750 }, { "epoch": 4.473486005089058, "grad_norm": 0.0012669616302262191, "learning_rate": 3.3399588541344065e-07, "loss": 0.0534, "step": 219760 }, { "epoch": 4.473689567430025, "grad_norm": 0.03435598895772897, "learning_rate": 3.3374058608251046e-07, "loss": 0.0283, "step": 219770 }, { "epoch": 4.4738931297709925, "grad_norm": 0.046130147100117175, "learning_rate": 3.3348538099269914e-07, "loss": 0.0284, "step": 219780 }, { "epoch": 4.474096692111959, "grad_norm": 0.0022133472425603564, "learning_rate": 3.3323027014916076e-07, "loss": 0.0002, "step": 219790 }, { "epoch": 4.474300254452926, "grad_norm": 0.006358255871990897, "learning_rate": 3.32975253557048e-07, "loss": 0.0174, "step": 219800 }, { "epoch": 4.474503816793893, "grad_norm": 0.001277448442336213, "learning_rate": 3.3272033122151104e-07, "loss": 0.0694, "step": 219810 }, { "epoch": 4.47470737913486, "grad_norm": 0.030404635901336553, "learning_rate": 3.324655031476987e-07, "loss": 0.0009, "step": 219820 }, { "epoch": 4.474910941475827, "grad_norm": 0.0016625514322344194, "learning_rate": 3.3221076934075777e-07, "loss": 0.0261, "step": 219830 }, { "epoch": 4.475114503816794, "grad_norm": 11.117443253161534, "learning_rate": 3.3195612980583046e-07, "loss": 0.0059, "step": 219840 }, { "epoch": 4.475318066157761, "grad_norm": 0.015175410261372155, "learning_rate": 3.317015845480631e-07, "loss": 0.0516, "step": 219850 }, { "epoch": 4.475521628498727, "grad_norm": 0.07722285315697362, "learning_rate": 3.3144713357259496e-07, "loss": 0.0001, "step": 219860 }, { "epoch": 4.475725190839695, "grad_norm": 0.009832131268200667, "learning_rate": 3.311927768845641e-07, "loss": 0.0419, "step": 219870 }, { "epoch": 4.4759287531806615, "grad_norm": 0.04467571692531744, "learning_rate": 3.309385144891086e-07, "loss": 0.0002, "step": 219880 }, { "epoch": 4.476132315521628, "grad_norm": 0.0030137341018769397, "learning_rate": 3.3068434639136504e-07, "loss": 0.0374, "step": 219890 }, { "epoch": 4.476335877862596, "grad_norm": 3.8828461746229967, "learning_rate": 3.304302725964631e-07, "loss": 0.001, "step": 219900 }, { "epoch": 4.476539440203562, "grad_norm": 0.006258655914436131, "learning_rate": 3.3017629310953757e-07, "loss": 0.0085, "step": 219910 }, { "epoch": 4.476743002544529, "grad_norm": 0.011808770195941995, "learning_rate": 3.2992240793571605e-07, "loss": 0.0287, "step": 219920 }, { "epoch": 4.4769465648854965, "grad_norm": 0.0031175842420408622, "learning_rate": 3.2966861708012664e-07, "loss": 0.0446, "step": 219930 }, { "epoch": 4.477150127226463, "grad_norm": 0.013942944717802564, "learning_rate": 3.294149205478947e-07, "loss": 0.0558, "step": 219940 }, { "epoch": 4.47735368956743, "grad_norm": 0.012071262488143858, "learning_rate": 3.291613183441439e-07, "loss": 0.0168, "step": 219950 }, { "epoch": 4.477557251908397, "grad_norm": 0.012204368942560433, "learning_rate": 3.289078104739968e-07, "loss": 0.0002, "step": 219960 }, { "epoch": 4.477760814249364, "grad_norm": 0.022040680794952465, "learning_rate": 3.2865439694257205e-07, "loss": 0.0002, "step": 219970 }, { "epoch": 4.4779643765903305, "grad_norm": 0.0004333418394230567, "learning_rate": 3.28401077754989e-07, "loss": 0.0006, "step": 219980 }, { "epoch": 4.478167938931298, "grad_norm": 8.178617630413938e-05, "learning_rate": 3.281478529163629e-07, "loss": 0.0011, "step": 219990 }, { "epoch": 4.478371501272265, "grad_norm": 7.617928756252975, "learning_rate": 3.2789472243180796e-07, "loss": 0.0055, "step": 220000 }, { "epoch": 4.478575063613231, "grad_norm": 0.0036047070425594147, "learning_rate": 3.276416863064369e-07, "loss": 0.0001, "step": 220010 }, { "epoch": 4.478778625954199, "grad_norm": 12.065094925221283, "learning_rate": 3.2738874454536054e-07, "loss": 0.0782, "step": 220020 }, { "epoch": 4.4789821882951655, "grad_norm": 0.01221835785945765, "learning_rate": 3.2713589715368587e-07, "loss": 0.0106, "step": 220030 }, { "epoch": 4.479185750636132, "grad_norm": 0.0023626484156541697, "learning_rate": 3.268831441365211e-07, "loss": 0.0318, "step": 220040 }, { "epoch": 4.4793893129771, "grad_norm": 0.8877897204907372, "learning_rate": 3.2663048549896934e-07, "loss": 0.0181, "step": 220050 }, { "epoch": 4.479592875318066, "grad_norm": 0.0018596760735885413, "learning_rate": 3.263779212461349e-07, "loss": 0.0017, "step": 220060 }, { "epoch": 4.479796437659033, "grad_norm": 0.07353513610505384, "learning_rate": 3.261254513831186e-07, "loss": 0.0177, "step": 220070 }, { "epoch": 4.48, "grad_norm": 12.882035458914682, "learning_rate": 3.258730759150175e-07, "loss": 0.047, "step": 220080 }, { "epoch": 4.480203562340967, "grad_norm": 0.06337063396221504, "learning_rate": 3.2562079484692976e-07, "loss": 0.0078, "step": 220090 }, { "epoch": 4.480407124681934, "grad_norm": 0.002187703271392342, "learning_rate": 3.2536860818395135e-07, "loss": 0.0014, "step": 220100 }, { "epoch": 4.480610687022901, "grad_norm": 0.1251814708426549, "learning_rate": 3.2511651593117476e-07, "loss": 0.0006, "step": 220110 }, { "epoch": 4.480814249363868, "grad_norm": 0.0046721553315688035, "learning_rate": 3.248645180936899e-07, "loss": 0.0274, "step": 220120 }, { "epoch": 4.4810178117048345, "grad_norm": 0.007690327243965065, "learning_rate": 3.246126146765899e-07, "loss": 0.0001, "step": 220130 }, { "epoch": 4.481221374045801, "grad_norm": 0.004651206484242702, "learning_rate": 3.24360805684959e-07, "loss": 0.0913, "step": 220140 }, { "epoch": 4.481424936386769, "grad_norm": 0.0016358931359428338, "learning_rate": 3.241090911238831e-07, "loss": 0.0003, "step": 220150 }, { "epoch": 4.481628498727735, "grad_norm": 0.00245694335694656, "learning_rate": 3.238574709984477e-07, "loss": 0.0006, "step": 220160 }, { "epoch": 4.481832061068702, "grad_norm": 0.006056684422464792, "learning_rate": 3.2360594531373246e-07, "loss": 0.0249, "step": 220170 }, { "epoch": 4.482035623409669, "grad_norm": 0.0032507651765337894, "learning_rate": 3.23354514074819e-07, "loss": 0.0063, "step": 220180 }, { "epoch": 4.482239185750636, "grad_norm": 0.014609627599624182, "learning_rate": 3.231031772867843e-07, "loss": 0.0001, "step": 220190 }, { "epoch": 4.482442748091603, "grad_norm": 0.0161662499718616, "learning_rate": 3.22851934954706e-07, "loss": 0.0275, "step": 220200 }, { "epoch": 4.48264631043257, "grad_norm": 0.00425630638636926, "learning_rate": 3.226007870836551e-07, "loss": 0.0149, "step": 220210 }, { "epoch": 4.482849872773537, "grad_norm": 0.004813058200971737, "learning_rate": 3.223497336787063e-07, "loss": 0.0006, "step": 220220 }, { "epoch": 4.4830534351145035, "grad_norm": 0.025214015486207983, "learning_rate": 3.2209877474493e-07, "loss": 0.0215, "step": 220230 }, { "epoch": 4.483256997455471, "grad_norm": 0.003527958246812135, "learning_rate": 3.218479102873934e-07, "loss": 0.0002, "step": 220240 }, { "epoch": 4.483460559796438, "grad_norm": 0.07100012966264034, "learning_rate": 3.215971403111634e-07, "loss": 0.014, "step": 220250 }, { "epoch": 4.483664122137404, "grad_norm": 0.011976757025534997, "learning_rate": 3.213464648213066e-07, "loss": 0.038, "step": 220260 }, { "epoch": 4.483867684478372, "grad_norm": 0.03251268888142826, "learning_rate": 3.210958838228817e-07, "loss": 0.0001, "step": 220270 }, { "epoch": 4.484071246819338, "grad_norm": 0.02354195629188417, "learning_rate": 3.2084539732095354e-07, "loss": 0.0316, "step": 220280 }, { "epoch": 4.484274809160305, "grad_norm": 0.013717984946996236, "learning_rate": 3.205950053205786e-07, "loss": 0.0005, "step": 220290 }, { "epoch": 4.484478371501273, "grad_norm": 0.010339907991077025, "learning_rate": 3.2034470782681406e-07, "loss": 0.0018, "step": 220300 }, { "epoch": 4.484681933842239, "grad_norm": 7.971468582234475, "learning_rate": 3.200945048447168e-07, "loss": 0.0549, "step": 220310 }, { "epoch": 4.484885496183206, "grad_norm": 0.012399353055760641, "learning_rate": 3.1984439637933793e-07, "loss": 0.0001, "step": 220320 }, { "epoch": 4.485089058524173, "grad_norm": 0.011313956721757642, "learning_rate": 3.195943824357284e-07, "loss": 0.0002, "step": 220330 }, { "epoch": 4.48529262086514, "grad_norm": 0.10097057207845324, "learning_rate": 3.193444630189407e-07, "loss": 0.0003, "step": 220340 }, { "epoch": 4.485496183206107, "grad_norm": 0.001406194107716867, "learning_rate": 3.190946381340193e-07, "loss": 0.0399, "step": 220350 }, { "epoch": 4.485699745547074, "grad_norm": 0.8097546539087307, "learning_rate": 3.188449077860106e-07, "loss": 0.0482, "step": 220360 }, { "epoch": 4.485903307888041, "grad_norm": 0.002109170767240146, "learning_rate": 3.185952719799579e-07, "loss": 0.0477, "step": 220370 }, { "epoch": 4.4861068702290074, "grad_norm": 0.004981510574749456, "learning_rate": 3.1834573072090366e-07, "loss": 0.0489, "step": 220380 }, { "epoch": 4.486310432569975, "grad_norm": 0.030355660040914032, "learning_rate": 3.1809628401388736e-07, "loss": 0.0001, "step": 220390 }, { "epoch": 4.486513994910942, "grad_norm": 0.02444965060540035, "learning_rate": 3.1784693186394654e-07, "loss": 0.0001, "step": 220400 }, { "epoch": 4.486717557251908, "grad_norm": 0.0026296641925782904, "learning_rate": 3.175976742761178e-07, "loss": 0.0004, "step": 220410 }, { "epoch": 4.486921119592875, "grad_norm": 0.03620252322591555, "learning_rate": 3.1734851125543477e-07, "loss": 0.0178, "step": 220420 }, { "epoch": 4.487124681933842, "grad_norm": 0.008933135022458658, "learning_rate": 3.1709944280693016e-07, "loss": 0.064, "step": 220430 }, { "epoch": 4.487328244274809, "grad_norm": 0.0010632217943778123, "learning_rate": 3.1685046893563385e-07, "loss": 0.0001, "step": 220440 }, { "epoch": 4.487531806615776, "grad_norm": 0.001241414571813578, "learning_rate": 3.166015896465735e-07, "loss": 0.0011, "step": 220450 }, { "epoch": 4.487735368956743, "grad_norm": 0.0028861119003791326, "learning_rate": 3.1635280494477616e-07, "loss": 0.0003, "step": 220460 }, { "epoch": 4.48793893129771, "grad_norm": 19.577921272939783, "learning_rate": 3.161041148352678e-07, "loss": 0.0148, "step": 220470 }, { "epoch": 4.4881424936386765, "grad_norm": 0.0271729561890319, "learning_rate": 3.1585551932306834e-07, "loss": 0.0005, "step": 220480 }, { "epoch": 4.488346055979644, "grad_norm": 13.070285818779077, "learning_rate": 3.156070184132004e-07, "loss": 0.0294, "step": 220490 }, { "epoch": 4.488549618320611, "grad_norm": 0.00924622885242635, "learning_rate": 3.153586121106816e-07, "loss": 0.0002, "step": 220500 }, { "epoch": 4.488753180661577, "grad_norm": 0.0041436006576169, "learning_rate": 3.1511030042052973e-07, "loss": 0.0395, "step": 220510 }, { "epoch": 4.488956743002545, "grad_norm": 0.002793296474830066, "learning_rate": 3.148620833477595e-07, "loss": 0.0543, "step": 220520 }, { "epoch": 4.489160305343511, "grad_norm": 0.002381739467230902, "learning_rate": 3.146139608973836e-07, "loss": 0.0004, "step": 220530 }, { "epoch": 4.489363867684478, "grad_norm": 0.01196103922773073, "learning_rate": 3.1436593307441364e-07, "loss": 0.0001, "step": 220540 }, { "epoch": 4.489567430025446, "grad_norm": 0.0029442534440639453, "learning_rate": 3.141179998838589e-07, "loss": 0.0305, "step": 220550 }, { "epoch": 4.489770992366412, "grad_norm": 0.03627112250163134, "learning_rate": 3.1387016133072655e-07, "loss": 0.0001, "step": 220560 }, { "epoch": 4.489974554707379, "grad_norm": 0.08907888615723283, "learning_rate": 3.13622417420022e-07, "loss": 0.0001, "step": 220570 }, { "epoch": 4.490178117048346, "grad_norm": 0.019583425247842835, "learning_rate": 3.133747681567484e-07, "loss": 0.026, "step": 220580 }, { "epoch": 4.490381679389313, "grad_norm": 0.010990127685384717, "learning_rate": 3.131272135459079e-07, "loss": 0.0009, "step": 220590 }, { "epoch": 4.49058524173028, "grad_norm": 0.019862174928317947, "learning_rate": 3.128797535924999e-07, "loss": 0.0725, "step": 220600 }, { "epoch": 4.490788804071247, "grad_norm": 0.06268641714531044, "learning_rate": 3.1263238830152197e-07, "loss": 0.0128, "step": 220610 }, { "epoch": 4.490992366412214, "grad_norm": 0.015143754514351614, "learning_rate": 3.1238511767797076e-07, "loss": 0.0002, "step": 220620 }, { "epoch": 4.49119592875318, "grad_norm": 0.0059900514604204385, "learning_rate": 3.1213794172683996e-07, "loss": 0.0, "step": 220630 }, { "epoch": 4.491399491094148, "grad_norm": 0.0010740445044854984, "learning_rate": 3.1189086045312013e-07, "loss": 0.0467, "step": 220640 }, { "epoch": 4.491603053435115, "grad_norm": 0.010845733135734248, "learning_rate": 3.116438738618044e-07, "loss": 0.011, "step": 220650 }, { "epoch": 4.491806615776081, "grad_norm": 0.0022708523176716696, "learning_rate": 3.113969819578777e-07, "loss": 0.0, "step": 220660 }, { "epoch": 4.492010178117049, "grad_norm": 0.08025877944692647, "learning_rate": 3.1115018474632773e-07, "loss": 0.0024, "step": 220670 }, { "epoch": 4.492213740458015, "grad_norm": 0.021628051633244466, "learning_rate": 3.1090348223213996e-07, "loss": 0.0375, "step": 220680 }, { "epoch": 4.492417302798982, "grad_norm": 0.0079309465561581, "learning_rate": 3.106568744202959e-07, "loss": 0.002, "step": 220690 }, { "epoch": 4.4926208651399495, "grad_norm": 0.01763997064930964, "learning_rate": 3.104103613157744e-07, "loss": 0.0007, "step": 220700 }, { "epoch": 4.492824427480916, "grad_norm": 0.0021505514952920705, "learning_rate": 3.101639429235581e-07, "loss": 0.0, "step": 220710 }, { "epoch": 4.493027989821883, "grad_norm": 0.10970094974380527, "learning_rate": 3.0991761924862027e-07, "loss": 0.0003, "step": 220720 }, { "epoch": 4.49323155216285, "grad_norm": 0.0068414245102734065, "learning_rate": 3.0967139029593696e-07, "loss": 0.0033, "step": 220730 }, { "epoch": 4.493435114503817, "grad_norm": 0.000784229005840574, "learning_rate": 3.094252560704808e-07, "loss": 0.0054, "step": 220740 }, { "epoch": 4.493638676844784, "grad_norm": 0.002733405149720581, "learning_rate": 3.091792165772234e-07, "loss": 0.0053, "step": 220750 }, { "epoch": 4.493842239185751, "grad_norm": 0.00027735626141593385, "learning_rate": 3.0893327182113355e-07, "loss": 0.0907, "step": 220760 }, { "epoch": 4.494045801526718, "grad_norm": 0.06647156924208242, "learning_rate": 3.0868742180717783e-07, "loss": 0.0185, "step": 220770 }, { "epoch": 4.494249363867684, "grad_norm": 0.011105077716754423, "learning_rate": 3.0844166654032225e-07, "loss": 0.0296, "step": 220780 }, { "epoch": 4.494452926208651, "grad_norm": 0.02005918494568991, "learning_rate": 3.0819600602552956e-07, "loss": 0.0265, "step": 220790 }, { "epoch": 4.4946564885496185, "grad_norm": 0.006632383645463697, "learning_rate": 3.079504402677619e-07, "loss": 0.0111, "step": 220800 }, { "epoch": 4.494860050890585, "grad_norm": 0.0031091317689964444, "learning_rate": 3.077049692719791e-07, "loss": 0.0001, "step": 220810 }, { "epoch": 4.495063613231552, "grad_norm": 0.009856374142461311, "learning_rate": 3.074595930431368e-07, "loss": 0.0148, "step": 220820 }, { "epoch": 4.495267175572519, "grad_norm": 0.0014112715131492986, "learning_rate": 3.072143115861925e-07, "loss": 0.0131, "step": 220830 }, { "epoch": 4.495470737913486, "grad_norm": 0.012517903520953702, "learning_rate": 3.069691249061002e-07, "loss": 0.0001, "step": 220840 }, { "epoch": 4.495674300254453, "grad_norm": 0.009522121817504779, "learning_rate": 3.0672403300780917e-07, "loss": 0.0979, "step": 220850 }, { "epoch": 4.49587786259542, "grad_norm": 0.19490937438104528, "learning_rate": 3.0647903589627327e-07, "loss": 0.0004, "step": 220860 }, { "epoch": 4.496081424936387, "grad_norm": 0.03600510530126108, "learning_rate": 3.062341335764374e-07, "loss": 0.0026, "step": 220870 }, { "epoch": 4.496284987277353, "grad_norm": 0.006560794629545258, "learning_rate": 3.0598932605324814e-07, "loss": 0.0003, "step": 220880 }, { "epoch": 4.496488549618321, "grad_norm": 0.002617924682908212, "learning_rate": 3.057446133316516e-07, "loss": 0.033, "step": 220890 }, { "epoch": 4.4966921119592875, "grad_norm": 0.000701853861777782, "learning_rate": 3.0549999541658823e-07, "loss": 0.0239, "step": 220900 }, { "epoch": 4.496895674300254, "grad_norm": 0.0013881836805981344, "learning_rate": 3.052554723129986e-07, "loss": 0.0003, "step": 220910 }, { "epoch": 4.497099236641222, "grad_norm": 0.0045733313507712245, "learning_rate": 3.0501104402582135e-07, "loss": 0.0002, "step": 220920 }, { "epoch": 4.497302798982188, "grad_norm": 0.00010720827606500298, "learning_rate": 3.0476671055999384e-07, "loss": 0.0001, "step": 220930 }, { "epoch": 4.497506361323155, "grad_norm": 0.04439342318053977, "learning_rate": 3.0452247192044926e-07, "loss": 0.0002, "step": 220940 }, { "epoch": 4.4977099236641225, "grad_norm": 5.119106454222398, "learning_rate": 3.042783281121214e-07, "loss": 0.0224, "step": 220950 }, { "epoch": 4.497913486005089, "grad_norm": 0.6608243215911623, "learning_rate": 3.0403427913994087e-07, "loss": 0.0005, "step": 220960 }, { "epoch": 4.498117048346056, "grad_norm": 0.006842442447046925, "learning_rate": 3.037903250088364e-07, "loss": 0.0002, "step": 220970 }, { "epoch": 4.498320610687023, "grad_norm": 0.009051294043741793, "learning_rate": 3.0354646572373526e-07, "loss": 0.0001, "step": 220980 }, { "epoch": 4.49852417302799, "grad_norm": 0.00818699063841877, "learning_rate": 3.0330270128956173e-07, "loss": 0.0001, "step": 220990 }, { "epoch": 4.4987277353689565, "grad_norm": 0.000773834711979585, "learning_rate": 3.0305903171123974e-07, "loss": 0.0001, "step": 221000 }, { "epoch": 4.498931297709924, "grad_norm": 0.626593343217415, "learning_rate": 3.028154569936903e-07, "loss": 0.0335, "step": 221010 }, { "epoch": 4.499134860050891, "grad_norm": 0.004184724834831099, "learning_rate": 3.025719771418334e-07, "loss": 0.0002, "step": 221020 }, { "epoch": 4.499338422391857, "grad_norm": 0.006295093866055664, "learning_rate": 3.0232859216058396e-07, "loss": 0.0019, "step": 221030 }, { "epoch": 4.499541984732824, "grad_norm": 0.07174594262134762, "learning_rate": 3.0208530205485976e-07, "loss": 0.0002, "step": 221040 }, { "epoch": 4.4997455470737915, "grad_norm": 13.834090177324105, "learning_rate": 3.018421068295746e-07, "loss": 0.088, "step": 221050 }, { "epoch": 4.499949109414758, "grad_norm": 0.0004106932067901791, "learning_rate": 3.0159900648963845e-07, "loss": 0.0001, "step": 221060 }, { "epoch": 4.500152671755725, "grad_norm": 0.023555233056467898, "learning_rate": 3.013560010399613e-07, "loss": 0.0007, "step": 221070 }, { "epoch": 4.500356234096692, "grad_norm": 0.0012443370635013434, "learning_rate": 3.011130904854531e-07, "loss": 0.0016, "step": 221080 }, { "epoch": 4.500559796437659, "grad_norm": 0.0029252744589814334, "learning_rate": 3.0087027483101715e-07, "loss": 0.0338, "step": 221090 }, { "epoch": 4.5007633587786255, "grad_norm": 0.008506196339888904, "learning_rate": 3.006275540815584e-07, "loss": 0.0002, "step": 221100 }, { "epoch": 4.500966921119593, "grad_norm": 0.013108416732114636, "learning_rate": 3.003849282419796e-07, "loss": 0.0201, "step": 221110 }, { "epoch": 4.50117048346056, "grad_norm": 0.003378267539358848, "learning_rate": 3.0014239731717964e-07, "loss": 0.0002, "step": 221120 }, { "epoch": 4.501374045801526, "grad_norm": 0.003961875528071472, "learning_rate": 2.9989996131205736e-07, "loss": 0.0557, "step": 221130 }, { "epoch": 4.501577608142494, "grad_norm": 0.0026377052130135974, "learning_rate": 2.9965762023150937e-07, "loss": 0.0003, "step": 221140 }, { "epoch": 4.5017811704834605, "grad_norm": 0.008988281865215907, "learning_rate": 2.9941537408042897e-07, "loss": 0.0001, "step": 221150 }, { "epoch": 4.501984732824427, "grad_norm": 0.009380795832208895, "learning_rate": 2.991732228637101e-07, "loss": 0.0003, "step": 221160 }, { "epoch": 4.502188295165395, "grad_norm": 0.6717131111260382, "learning_rate": 2.9893116658624266e-07, "loss": 0.0019, "step": 221170 }, { "epoch": 4.502391857506361, "grad_norm": 0.2513852786935032, "learning_rate": 2.986892052529156e-07, "loss": 0.0004, "step": 221180 }, { "epoch": 4.502595419847328, "grad_norm": 0.0016892628628573923, "learning_rate": 2.9844733886861377e-07, "loss": 0.001, "step": 221190 }, { "epoch": 4.502798982188295, "grad_norm": 0.004591080242520664, "learning_rate": 2.982055674382245e-07, "loss": 0.0001, "step": 221200 }, { "epoch": 4.503002544529262, "grad_norm": 0.002771419409334506, "learning_rate": 2.979638909666305e-07, "loss": 0.0003, "step": 221210 }, { "epoch": 4.503206106870229, "grad_norm": 0.007259243051833188, "learning_rate": 2.9772230945871006e-07, "loss": 0.0529, "step": 221220 }, { "epoch": 4.503409669211196, "grad_norm": 0.035044855365069634, "learning_rate": 2.97480822919346e-07, "loss": 0.0444, "step": 221230 }, { "epoch": 4.503613231552163, "grad_norm": 0.0013607517628898048, "learning_rate": 2.9723943135341214e-07, "loss": 0.0006, "step": 221240 }, { "epoch": 4.5038167938931295, "grad_norm": 0.043719438651103865, "learning_rate": 2.9699813476578463e-07, "loss": 0.0427, "step": 221250 }, { "epoch": 4.504020356234097, "grad_norm": 0.0010239894461995054, "learning_rate": 2.9675693316133903e-07, "loss": 0.0143, "step": 221260 }, { "epoch": 4.504223918575064, "grad_norm": 0.0002378401755940687, "learning_rate": 2.965158265449436e-07, "loss": 0.0289, "step": 221270 }, { "epoch": 4.50442748091603, "grad_norm": 0.005598838440622577, "learning_rate": 2.962748149214689e-07, "loss": 0.0, "step": 221280 }, { "epoch": 4.504631043256998, "grad_norm": 0.12343795456399911, "learning_rate": 2.960338982957839e-07, "loss": 0.0289, "step": 221290 }, { "epoch": 4.5048346055979644, "grad_norm": 0.012483252908631863, "learning_rate": 2.957930766727524e-07, "loss": 0.0091, "step": 221300 }, { "epoch": 4.505038167938931, "grad_norm": 0.15636451278755947, "learning_rate": 2.955523500572383e-07, "loss": 0.0003, "step": 221310 }, { "epoch": 4.505241730279899, "grad_norm": 0.009340564295105505, "learning_rate": 2.953117184541038e-07, "loss": 0.001, "step": 221320 }, { "epoch": 4.505445292620865, "grad_norm": 0.002133386941105576, "learning_rate": 2.9507118186820893e-07, "loss": 0.0001, "step": 221330 }, { "epoch": 4.505648854961832, "grad_norm": 0.0024394391715141877, "learning_rate": 2.948307403044115e-07, "loss": 0.0002, "step": 221340 }, { "epoch": 4.505852417302799, "grad_norm": 0.0037204413559569723, "learning_rate": 2.945903937675676e-07, "loss": 0.0112, "step": 221350 }, { "epoch": 4.506055979643766, "grad_norm": 0.012634943974637332, "learning_rate": 2.943501422625311e-07, "loss": 0.048, "step": 221360 }, { "epoch": 4.506259541984733, "grad_norm": 0.0034304583770348313, "learning_rate": 2.941099857941543e-07, "loss": 0.0371, "step": 221370 }, { "epoch": 4.5064631043257, "grad_norm": 0.0027032604604494657, "learning_rate": 2.938699243672871e-07, "loss": 0.0001, "step": 221380 }, { "epoch": 4.506666666666667, "grad_norm": 0.006858264758200698, "learning_rate": 2.9362995798677964e-07, "loss": 0.0001, "step": 221390 }, { "epoch": 4.5068702290076335, "grad_norm": 0.0042451519390481396, "learning_rate": 2.9339008665747513e-07, "loss": 0.0468, "step": 221400 }, { "epoch": 4.507073791348601, "grad_norm": 0.026626908309848187, "learning_rate": 2.9315031038422035e-07, "loss": 0.001, "step": 221410 }, { "epoch": 4.507277353689568, "grad_norm": 0.005231355512297111, "learning_rate": 2.9291062917185864e-07, "loss": 0.0336, "step": 221420 }, { "epoch": 4.507480916030534, "grad_norm": 6.52514203999178, "learning_rate": 2.926710430252278e-07, "loss": 0.0206, "step": 221430 }, { "epoch": 4.507684478371502, "grad_norm": 2.779625293895551, "learning_rate": 2.924315519491688e-07, "loss": 0.044, "step": 221440 }, { "epoch": 4.507888040712468, "grad_norm": 7.906474646065074e-05, "learning_rate": 2.9219215594851915e-07, "loss": 0.0001, "step": 221450 }, { "epoch": 4.508091603053435, "grad_norm": 0.018841544400266273, "learning_rate": 2.9195285502811143e-07, "loss": 0.0002, "step": 221460 }, { "epoch": 4.508295165394402, "grad_norm": 0.0011727918412982635, "learning_rate": 2.917136491927797e-07, "loss": 0.0019, "step": 221470 }, { "epoch": 4.508498727735369, "grad_norm": 0.11913270711426106, "learning_rate": 2.9147453844735494e-07, "loss": 0.0002, "step": 221480 }, { "epoch": 4.508702290076336, "grad_norm": 0.18071063326483508, "learning_rate": 2.912355227966668e-07, "loss": 0.0103, "step": 221490 }, { "epoch": 4.5089058524173025, "grad_norm": 0.006795026885630544, "learning_rate": 2.9099660224554184e-07, "loss": 0.0007, "step": 221500 }, { "epoch": 4.50910941475827, "grad_norm": 0.002295010522498448, "learning_rate": 2.9075777679880567e-07, "loss": 0.0001, "step": 221510 }, { "epoch": 4.509312977099237, "grad_norm": 0.01224181855631815, "learning_rate": 2.905190464612817e-07, "loss": 0.0088, "step": 221520 }, { "epoch": 4.509516539440203, "grad_norm": 0.022377610760823504, "learning_rate": 2.9028041123779104e-07, "loss": 0.0555, "step": 221530 }, { "epoch": 4.509720101781171, "grad_norm": 0.0013492491913936623, "learning_rate": 2.900418711331537e-07, "loss": 0.0138, "step": 221540 }, { "epoch": 4.509923664122137, "grad_norm": 0.003805105026740648, "learning_rate": 2.898034261521876e-07, "loss": 0.0078, "step": 221550 }, { "epoch": 4.510127226463104, "grad_norm": 0.010111792451983233, "learning_rate": 2.895650762997076e-07, "loss": 0.0005, "step": 221560 }, { "epoch": 4.510330788804072, "grad_norm": 0.0063823757359668986, "learning_rate": 2.893268215805278e-07, "loss": 0.0004, "step": 221570 }, { "epoch": 4.510534351145038, "grad_norm": 0.0014322911780750983, "learning_rate": 2.890886619994604e-07, "loss": 0.0401, "step": 221580 }, { "epoch": 4.510737913486005, "grad_norm": 0.013088076196766574, "learning_rate": 2.8885059756131485e-07, "loss": 0.0001, "step": 221590 }, { "epoch": 4.510941475826972, "grad_norm": 10.28397953261089, "learning_rate": 2.8861262827090066e-07, "loss": 0.0163, "step": 221600 }, { "epoch": 4.511145038167939, "grad_norm": 0.0018994842837633265, "learning_rate": 2.883747541330212e-07, "loss": 0.0029, "step": 221610 }, { "epoch": 4.511348600508906, "grad_norm": 0.08131814424395212, "learning_rate": 2.8813697515248205e-07, "loss": 0.0002, "step": 221620 }, { "epoch": 4.511552162849873, "grad_norm": 0.023594091684512045, "learning_rate": 2.878992913340867e-07, "loss": 0.0001, "step": 221630 }, { "epoch": 4.51175572519084, "grad_norm": 0.003968016805873507, "learning_rate": 2.876617026826334e-07, "loss": 0.0006, "step": 221640 }, { "epoch": 4.511959287531806, "grad_norm": 0.02971881554408778, "learning_rate": 2.874242092029206e-07, "loss": 0.0152, "step": 221650 }, { "epoch": 4.512162849872773, "grad_norm": 9.780708105412102e-05, "learning_rate": 2.871868108997472e-07, "loss": 0.0387, "step": 221660 }, { "epoch": 4.512366412213741, "grad_norm": 0.004869516119712222, "learning_rate": 2.869495077779061e-07, "loss": 0.0677, "step": 221670 }, { "epoch": 4.512569974554707, "grad_norm": 0.006108569252492847, "learning_rate": 2.867122998421895e-07, "loss": 0.0, "step": 221680 }, { "epoch": 4.512773536895674, "grad_norm": 0.0017205679370386753, "learning_rate": 2.8647518709738864e-07, "loss": 0.0047, "step": 221690 }, { "epoch": 4.512977099236641, "grad_norm": 0.04110963455240558, "learning_rate": 2.862381695482924e-07, "loss": 0.0002, "step": 221700 }, { "epoch": 4.513180661577608, "grad_norm": 0.0020691733294873147, "learning_rate": 2.8600124719968756e-07, "loss": 0.0002, "step": 221710 }, { "epoch": 4.513384223918575, "grad_norm": 0.0019421469585105986, "learning_rate": 2.8576442005635917e-07, "loss": 0.0026, "step": 221720 }, { "epoch": 4.513587786259542, "grad_norm": 0.0013155408766333653, "learning_rate": 2.8552768812309064e-07, "loss": 0.0004, "step": 221730 }, { "epoch": 4.513791348600509, "grad_norm": 4.718859264403425, "learning_rate": 2.85291051404662e-07, "loss": 0.0191, "step": 221740 }, { "epoch": 4.513994910941475, "grad_norm": 0.010163897729411188, "learning_rate": 2.850545099058538e-07, "loss": 0.0227, "step": 221750 }, { "epoch": 4.514198473282443, "grad_norm": 3.6094578239450663, "learning_rate": 2.8481806363144236e-07, "loss": 0.0034, "step": 221760 }, { "epoch": 4.51440203562341, "grad_norm": 6.134550146822329, "learning_rate": 2.845817125862027e-07, "loss": 0.0256, "step": 221770 }, { "epoch": 4.514605597964376, "grad_norm": 0.26220255384866653, "learning_rate": 2.843454567749088e-07, "loss": 0.0001, "step": 221780 }, { "epoch": 4.514809160305344, "grad_norm": 0.0009905239538323236, "learning_rate": 2.841092962023334e-07, "loss": 0.0185, "step": 221790 }, { "epoch": 4.51501272264631, "grad_norm": 0.024467884261985953, "learning_rate": 2.838732308732428e-07, "loss": 0.0004, "step": 221800 }, { "epoch": 4.515216284987277, "grad_norm": 93.80262870742585, "learning_rate": 2.836372607924076e-07, "loss": 0.0545, "step": 221810 }, { "epoch": 4.5154198473282445, "grad_norm": 0.007197766320205606, "learning_rate": 2.8340138596459345e-07, "loss": 0.0001, "step": 221820 }, { "epoch": 4.515623409669211, "grad_norm": 0.10603362163296105, "learning_rate": 2.8316560639456146e-07, "loss": 0.0361, "step": 221830 }, { "epoch": 4.515826972010178, "grad_norm": 0.0018054214704231713, "learning_rate": 2.8292992208707683e-07, "loss": 0.0004, "step": 221840 }, { "epoch": 4.516030534351145, "grad_norm": 0.005727087495799486, "learning_rate": 2.826943330468973e-07, "loss": 0.0902, "step": 221850 }, { "epoch": 4.516234096692112, "grad_norm": 0.022491071649279656, "learning_rate": 2.824588392787808e-07, "loss": 0.0002, "step": 221860 }, { "epoch": 4.516437659033079, "grad_norm": 30.68382034653267, "learning_rate": 2.8222344078748577e-07, "loss": 0.0158, "step": 221870 }, { "epoch": 4.516641221374046, "grad_norm": 0.02904407104362782, "learning_rate": 2.819881375777639e-07, "loss": 0.0004, "step": 221880 }, { "epoch": 4.516844783715013, "grad_norm": 0.0002578158397824663, "learning_rate": 2.817529296543686e-07, "loss": 0.0005, "step": 221890 }, { "epoch": 4.517048346055979, "grad_norm": 0.016465508864165902, "learning_rate": 2.8151781702205003e-07, "loss": 0.025, "step": 221900 }, { "epoch": 4.517251908396947, "grad_norm": 0.0183853529049727, "learning_rate": 2.8128279968555605e-07, "loss": 0.0365, "step": 221910 }, { "epoch": 4.5174554707379135, "grad_norm": 0.002189969971391119, "learning_rate": 2.8104787764963395e-07, "loss": 0.0505, "step": 221920 }, { "epoch": 4.51765903307888, "grad_norm": 0.4818734994259772, "learning_rate": 2.8081305091902766e-07, "loss": 0.0005, "step": 221930 }, { "epoch": 4.517862595419848, "grad_norm": 0.000283151678782327, "learning_rate": 2.8057831949848014e-07, "loss": 0.0, "step": 221940 }, { "epoch": 4.518066157760814, "grad_norm": 0.007924083586263446, "learning_rate": 2.80343683392732e-07, "loss": 0.008, "step": 221950 }, { "epoch": 4.518269720101781, "grad_norm": 6.399978632437934e-05, "learning_rate": 2.801091426065222e-07, "loss": 0.0001, "step": 221960 }, { "epoch": 4.5184732824427485, "grad_norm": 0.014176961879733798, "learning_rate": 2.798746971445881e-07, "loss": 0.0002, "step": 221970 }, { "epoch": 4.518676844783715, "grad_norm": 0.011647090176201667, "learning_rate": 2.7964034701166266e-07, "loss": 0.0001, "step": 221980 }, { "epoch": 4.518880407124682, "grad_norm": 0.008439198308438131, "learning_rate": 2.794060922124803e-07, "loss": 0.0318, "step": 221990 }, { "epoch": 4.519083969465649, "grad_norm": 0.009617137807578916, "learning_rate": 2.7917193275177334e-07, "loss": 0.0072, "step": 222000 }, { "epoch": 4.519287531806616, "grad_norm": 0.0015734634754406179, "learning_rate": 2.789378686342686e-07, "loss": 0.0198, "step": 222010 }, { "epoch": 4.5194910941475825, "grad_norm": 0.010203423697193886, "learning_rate": 2.7870389986469404e-07, "loss": 0.0001, "step": 222020 }, { "epoch": 4.51969465648855, "grad_norm": 0.0019270620750810013, "learning_rate": 2.784700264477758e-07, "loss": 0.0002, "step": 222030 }, { "epoch": 4.519898218829517, "grad_norm": 6.4065760204724125, "learning_rate": 2.782362483882367e-07, "loss": 0.0246, "step": 222040 }, { "epoch": 4.520101781170483, "grad_norm": 0.0006489014623335996, "learning_rate": 2.780025656907975e-07, "loss": 0.0001, "step": 222050 }, { "epoch": 4.520305343511451, "grad_norm": 0.0012923080203760583, "learning_rate": 2.7776897836017893e-07, "loss": 0.0004, "step": 222060 }, { "epoch": 4.5205089058524175, "grad_norm": 0.007583834791351314, "learning_rate": 2.775354864010976e-07, "loss": 0.0536, "step": 222070 }, { "epoch": 4.520712468193384, "grad_norm": 0.006865286944104677, "learning_rate": 2.773020898182699e-07, "loss": 0.0124, "step": 222080 }, { "epoch": 4.520916030534351, "grad_norm": 0.00015726354147276044, "learning_rate": 2.770687886164092e-07, "loss": 0.0001, "step": 222090 }, { "epoch": 4.521119592875318, "grad_norm": 0.0005696946919514476, "learning_rate": 2.768355828002278e-07, "loss": 0.0001, "step": 222100 }, { "epoch": 4.521323155216285, "grad_norm": 0.013944035043445994, "learning_rate": 2.7660247237443483e-07, "loss": 0.0001, "step": 222110 }, { "epoch": 4.5215267175572516, "grad_norm": 0.011996528357111999, "learning_rate": 2.763694573437381e-07, "loss": 0.0002, "step": 222120 }, { "epoch": 4.521730279898219, "grad_norm": 0.00660085325368684, "learning_rate": 2.76136537712845e-07, "loss": 0.0, "step": 222130 }, { "epoch": 4.521933842239186, "grad_norm": 0.021862382945398995, "learning_rate": 2.759037134864584e-07, "loss": 0.0633, "step": 222140 }, { "epoch": 4.522137404580152, "grad_norm": 0.010175608200237647, "learning_rate": 2.7567098466928123e-07, "loss": 0.0418, "step": 222150 }, { "epoch": 4.52234096692112, "grad_norm": 0.004528569107338869, "learning_rate": 2.754383512660136e-07, "loss": 0.0064, "step": 222160 }, { "epoch": 4.5225445292620865, "grad_norm": 0.009362645903470392, "learning_rate": 2.752058132813523e-07, "loss": 0.0, "step": 222170 }, { "epoch": 4.522748091603053, "grad_norm": 0.031873190699163154, "learning_rate": 2.749733707199964e-07, "loss": 0.0001, "step": 222180 }, { "epoch": 4.522951653944021, "grad_norm": 0.006639505387779783, "learning_rate": 2.7474102358663934e-07, "loss": 0.0013, "step": 222190 }, { "epoch": 4.523155216284987, "grad_norm": 0.001395196407708575, "learning_rate": 2.745087718859718e-07, "loss": 0.029, "step": 222200 }, { "epoch": 4.523358778625954, "grad_norm": 0.006840342574976037, "learning_rate": 2.742766156226878e-07, "loss": 0.0001, "step": 222210 }, { "epoch": 4.5235623409669214, "grad_norm": 0.0021725216639002965, "learning_rate": 2.740445548014731e-07, "loss": 0.0162, "step": 222220 }, { "epoch": 4.523765903307888, "grad_norm": 8.669899819744769, "learning_rate": 2.7381258942701547e-07, "loss": 0.0659, "step": 222230 }, { "epoch": 4.523969465648855, "grad_norm": 0.010170099112459027, "learning_rate": 2.7358071950400076e-07, "loss": 0.0001, "step": 222240 }, { "epoch": 4.524173027989822, "grad_norm": 0.007367815690896738, "learning_rate": 2.733489450371107e-07, "loss": 0.0284, "step": 222250 }, { "epoch": 4.524376590330789, "grad_norm": 0.012094838501727505, "learning_rate": 2.7311726603102664e-07, "loss": 0.0425, "step": 222260 }, { "epoch": 4.5245801526717555, "grad_norm": 0.0007758168116547284, "learning_rate": 2.728856824904269e-07, "loss": 0.0001, "step": 222270 }, { "epoch": 4.524783715012723, "grad_norm": 0.10127010876949834, "learning_rate": 2.726541944199901e-07, "loss": 0.0004, "step": 222280 }, { "epoch": 4.52498727735369, "grad_norm": 0.0017634655036728737, "learning_rate": 2.7242280182439016e-07, "loss": 0.0001, "step": 222290 }, { "epoch": 4.525190839694656, "grad_norm": 0.06088562895934123, "learning_rate": 2.721915047083007e-07, "loss": 0.0188, "step": 222300 }, { "epoch": 4.525394402035623, "grad_norm": 0.015988816582047385, "learning_rate": 2.719603030763934e-07, "loss": 0.0001, "step": 222310 }, { "epoch": 4.5255979643765905, "grad_norm": 0.0010359346652219585, "learning_rate": 2.7172919693333746e-07, "loss": 0.0501, "step": 222320 }, { "epoch": 4.525801526717557, "grad_norm": 0.015504406262157565, "learning_rate": 2.714981862838001e-07, "loss": 0.0354, "step": 222330 }, { "epoch": 4.526005089058524, "grad_norm": 0.10426968713500032, "learning_rate": 2.7126727113244823e-07, "loss": 0.0003, "step": 222340 }, { "epoch": 4.526208651399491, "grad_norm": 0.0006235198508685131, "learning_rate": 2.7103645148394255e-07, "loss": 0.045, "step": 222350 }, { "epoch": 4.526412213740458, "grad_norm": 0.026732821343740804, "learning_rate": 2.7080572734294764e-07, "loss": 0.005, "step": 222360 }, { "epoch": 4.5266157760814245, "grad_norm": 0.02074775958868438, "learning_rate": 2.7057509871412256e-07, "loss": 0.0117, "step": 222370 }, { "epoch": 4.526819338422392, "grad_norm": 0.007200303656163798, "learning_rate": 2.7034456560212365e-07, "loss": 0.0, "step": 222380 }, { "epoch": 4.527022900763359, "grad_norm": 13.184264030559644, "learning_rate": 2.7011412801160877e-07, "loss": 0.0066, "step": 222390 }, { "epoch": 4.527226463104325, "grad_norm": 1.1606542841566896, "learning_rate": 2.6988378594723197e-07, "loss": 0.0068, "step": 222400 }, { "epoch": 4.527430025445293, "grad_norm": 0.020235176902749015, "learning_rate": 2.6965353941364294e-07, "loss": 0.113, "step": 222410 }, { "epoch": 4.5276335877862595, "grad_norm": 0.002215133941719333, "learning_rate": 2.6942338841549454e-07, "loss": 0.0, "step": 222420 }, { "epoch": 4.527837150127226, "grad_norm": 0.004948799857461621, "learning_rate": 2.691933329574331e-07, "loss": 0.0012, "step": 222430 }, { "epoch": 4.528040712468194, "grad_norm": 0.2553889844662708, "learning_rate": 2.689633730441055e-07, "loss": 0.0129, "step": 222440 }, { "epoch": 4.52824427480916, "grad_norm": 0.006664165750180852, "learning_rate": 2.6873350868015625e-07, "loss": 0.0579, "step": 222450 }, { "epoch": 4.528447837150127, "grad_norm": 0.002862676491657682, "learning_rate": 2.6850373987022726e-07, "loss": 0.0286, "step": 222460 }, { "epoch": 4.528651399491094, "grad_norm": 0.4738759776530472, "learning_rate": 2.6827406661895984e-07, "loss": 0.0442, "step": 222470 }, { "epoch": 4.528854961832061, "grad_norm": 6.445200207758178, "learning_rate": 2.680444889309919e-07, "loss": 0.0262, "step": 222480 }, { "epoch": 4.529058524173028, "grad_norm": 0.03277450070986345, "learning_rate": 2.6781500681096026e-07, "loss": 0.0001, "step": 222490 }, { "epoch": 4.529262086513995, "grad_norm": 1.9109904979865693, "learning_rate": 2.6758562026349964e-07, "loss": 0.0004, "step": 222500 }, { "epoch": 4.529465648854962, "grad_norm": 0.0010020681658718578, "learning_rate": 2.673563292932424e-07, "loss": 0.0083, "step": 222510 }, { "epoch": 4.5296692111959285, "grad_norm": 0.01728399701289324, "learning_rate": 2.671271339048198e-07, "loss": 0.0001, "step": 222520 }, { "epoch": 4.529872773536896, "grad_norm": 0.004506856060824934, "learning_rate": 2.66898034102861e-07, "loss": 0.0005, "step": 222530 }, { "epoch": 4.530076335877863, "grad_norm": 0.010987327379086666, "learning_rate": 2.6666902989199215e-07, "loss": 0.0248, "step": 222540 }, { "epoch": 4.530279898218829, "grad_norm": 0.004239584356880353, "learning_rate": 2.664401212768386e-07, "loss": 0.0001, "step": 222550 }, { "epoch": 4.530483460559797, "grad_norm": 0.02922499366915885, "learning_rate": 2.662113082620238e-07, "loss": 0.0119, "step": 222560 }, { "epoch": 4.530687022900763, "grad_norm": 0.020250736046437085, "learning_rate": 2.6598259085216907e-07, "loss": 0.0003, "step": 222570 }, { "epoch": 4.53089058524173, "grad_norm": 0.013381128301469437, "learning_rate": 2.657539690518934e-07, "loss": 0.0443, "step": 222580 }, { "epoch": 4.531094147582698, "grad_norm": 0.013089160879560168, "learning_rate": 2.6552544286581384e-07, "loss": 0.0001, "step": 222590 }, { "epoch": 4.531297709923664, "grad_norm": 0.008016847180514772, "learning_rate": 2.652970122985449e-07, "loss": 0.0003, "step": 222600 }, { "epoch": 4.531501272264631, "grad_norm": 27.314465306794457, "learning_rate": 2.650686773547023e-07, "loss": 0.0092, "step": 222610 }, { "epoch": 4.531704834605598, "grad_norm": 0.0024110869081868614, "learning_rate": 2.648404380388958e-07, "loss": 0.0474, "step": 222620 }, { "epoch": 4.531908396946565, "grad_norm": 0.00031487295654431365, "learning_rate": 2.6461229435573557e-07, "loss": 0.0037, "step": 222630 }, { "epoch": 4.532111959287532, "grad_norm": 0.21455990163019412, "learning_rate": 2.6438424630982897e-07, "loss": 0.0032, "step": 222640 }, { "epoch": 4.532315521628499, "grad_norm": 0.008828120528300214, "learning_rate": 2.6415629390578236e-07, "loss": 0.0343, "step": 222650 }, { "epoch": 4.532519083969466, "grad_norm": 0.011055263931543309, "learning_rate": 2.639284371481987e-07, "loss": 0.0488, "step": 222660 }, { "epoch": 4.532722646310432, "grad_norm": 0.0013035295165389487, "learning_rate": 2.6370067604168046e-07, "loss": 0.0293, "step": 222670 }, { "epoch": 4.5329262086514, "grad_norm": 0.001103871228764735, "learning_rate": 2.634730105908273e-07, "loss": 0.0454, "step": 222680 }, { "epoch": 4.533129770992367, "grad_norm": 0.0003232165800109189, "learning_rate": 2.632454408002372e-07, "loss": 0.0044, "step": 222690 }, { "epoch": 4.533333333333333, "grad_norm": 0.0025118675801881598, "learning_rate": 2.6301796667450643e-07, "loss": 0.0146, "step": 222700 }, { "epoch": 4.533536895674301, "grad_norm": 0.1285489428994016, "learning_rate": 2.627905882182297e-07, "loss": 0.0003, "step": 222710 }, { "epoch": 4.533740458015267, "grad_norm": 0.004872792407055592, "learning_rate": 2.6256330543599715e-07, "loss": 0.0004, "step": 222720 }, { "epoch": 4.533944020356234, "grad_norm": 0.015428267291833603, "learning_rate": 2.6233611833240083e-07, "loss": 0.0001, "step": 222730 }, { "epoch": 4.534147582697201, "grad_norm": 0.0019385929690797536, "learning_rate": 2.6210902691202967e-07, "loss": 0.0159, "step": 222740 }, { "epoch": 4.534351145038168, "grad_norm": 0.002160658247153094, "learning_rate": 2.618820311794679e-07, "loss": 0.0001, "step": 222750 }, { "epoch": 4.534554707379135, "grad_norm": 0.001479463646735768, "learning_rate": 2.616551311393012e-07, "loss": 0.0204, "step": 222760 }, { "epoch": 4.534758269720101, "grad_norm": 4.407741389804459, "learning_rate": 2.614283267961132e-07, "loss": 0.0014, "step": 222770 }, { "epoch": 4.534961832061069, "grad_norm": 0.5515337254597912, "learning_rate": 2.6120161815448185e-07, "loss": 0.0003, "step": 222780 }, { "epoch": 4.535165394402036, "grad_norm": 59.60315306763058, "learning_rate": 2.6097500521898913e-07, "loss": 0.034, "step": 222790 }, { "epoch": 4.535368956743002, "grad_norm": 0.1032713080890521, "learning_rate": 2.60748487994209e-07, "loss": 0.0927, "step": 222800 }, { "epoch": 4.53557251908397, "grad_norm": 0.0012131114689224278, "learning_rate": 2.6052206648471633e-07, "loss": 0.0336, "step": 222810 }, { "epoch": 4.535776081424936, "grad_norm": 0.001843131276122952, "learning_rate": 2.602957406950868e-07, "loss": 0.0007, "step": 222820 }, { "epoch": 4.535979643765903, "grad_norm": 0.3184173604116007, "learning_rate": 2.6006951062988837e-07, "loss": 0.0435, "step": 222830 }, { "epoch": 4.5361832061068705, "grad_norm": 10.976690716898926, "learning_rate": 2.5984337629369083e-07, "loss": 0.0522, "step": 222840 }, { "epoch": 4.536386768447837, "grad_norm": 0.0003372329868942071, "learning_rate": 2.596173376910627e-07, "loss": 0.0701, "step": 222850 }, { "epoch": 4.536590330788804, "grad_norm": 0.014668701142014818, "learning_rate": 2.593913948265675e-07, "loss": 0.0001, "step": 222860 }, { "epoch": 4.536793893129771, "grad_norm": 0.003933190655328829, "learning_rate": 2.591655477047683e-07, "loss": 0.0, "step": 222870 }, { "epoch": 4.536997455470738, "grad_norm": 0.00020072437816408235, "learning_rate": 2.5893979633022756e-07, "loss": 0.0521, "step": 222880 }, { "epoch": 4.537201017811705, "grad_norm": 0.015196400592488966, "learning_rate": 2.5871414070750444e-07, "loss": 0.0182, "step": 222890 }, { "epoch": 4.537404580152672, "grad_norm": 0.017477980381383956, "learning_rate": 2.5848858084115524e-07, "loss": 0.0052, "step": 222900 }, { "epoch": 4.537608142493639, "grad_norm": 2.0777845281134963, "learning_rate": 2.582631167357369e-07, "loss": 0.0004, "step": 222910 }, { "epoch": 4.537811704834605, "grad_norm": 0.02875359864740009, "learning_rate": 2.5803774839580186e-07, "loss": 0.0327, "step": 222920 }, { "epoch": 4.538015267175572, "grad_norm": 0.01651853356625206, "learning_rate": 2.5781247582590205e-07, "loss": 0.0333, "step": 222930 }, { "epoch": 4.5382188295165395, "grad_norm": 0.0018924918729229308, "learning_rate": 2.575872990305872e-07, "loss": 0.0002, "step": 222940 }, { "epoch": 4.538422391857506, "grad_norm": 0.0128352939256822, "learning_rate": 2.573622180144053e-07, "loss": 0.0055, "step": 222950 }, { "epoch": 4.538625954198473, "grad_norm": 0.40612718884279464, "learning_rate": 2.5713723278190104e-07, "loss": 0.0006, "step": 222960 }, { "epoch": 4.53882951653944, "grad_norm": 0.06551361547130313, "learning_rate": 2.5691234333761973e-07, "loss": 0.0016, "step": 222970 }, { "epoch": 4.539033078880407, "grad_norm": 0.1736359824904119, "learning_rate": 2.566875496861032e-07, "loss": 0.0002, "step": 222980 }, { "epoch": 4.539236641221374, "grad_norm": 0.021213452251605043, "learning_rate": 2.5646285183189067e-07, "loss": 0.0001, "step": 222990 }, { "epoch": 4.539440203562341, "grad_norm": 0.4981839002933735, "learning_rate": 2.5623824977952016e-07, "loss": 0.0001, "step": 223000 }, { "epoch": 4.539643765903308, "grad_norm": 0.010061320596768175, "learning_rate": 2.5601374353352805e-07, "loss": 0.0001, "step": 223010 }, { "epoch": 4.539847328244274, "grad_norm": 0.003487939346382122, "learning_rate": 2.55789333098449e-07, "loss": 0.0024, "step": 223020 }, { "epoch": 4.540050890585242, "grad_norm": 0.0022128225380221105, "learning_rate": 2.555650184788144e-07, "loss": 0.0007, "step": 223030 }, { "epoch": 4.5402544529262086, "grad_norm": 0.0028750787292335634, "learning_rate": 2.5534079967915514e-07, "loss": 0.0002, "step": 223040 }, { "epoch": 4.540458015267175, "grad_norm": 0.01898887437420485, "learning_rate": 2.5511667670399977e-07, "loss": 0.0009, "step": 223050 }, { "epoch": 4.540661577608143, "grad_norm": 0.004094630023840223, "learning_rate": 2.5489264955787406e-07, "loss": 0.0008, "step": 223060 }, { "epoch": 4.540865139949109, "grad_norm": 0.024399364214465622, "learning_rate": 2.546687182453034e-07, "loss": 0.0002, "step": 223070 }, { "epoch": 4.541068702290076, "grad_norm": 0.007065962021277649, "learning_rate": 2.544448827708096e-07, "loss": 0.0003, "step": 223080 }, { "epoch": 4.5412722646310435, "grad_norm": 0.011852379527263188, "learning_rate": 2.542211431389141e-07, "loss": 0.0147, "step": 223090 }, { "epoch": 4.54147582697201, "grad_norm": 0.5514927216322771, "learning_rate": 2.539974993541344e-07, "loss": 0.0212, "step": 223100 }, { "epoch": 4.541679389312977, "grad_norm": 0.003108962328515891, "learning_rate": 2.5377395142098857e-07, "loss": 0.0003, "step": 223110 }, { "epoch": 4.541882951653944, "grad_norm": 0.06064891058588532, "learning_rate": 2.535504993439908e-07, "loss": 0.0266, "step": 223120 }, { "epoch": 4.542086513994911, "grad_norm": 0.002505407866279924, "learning_rate": 2.533271431276535e-07, "loss": 0.0078, "step": 223130 }, { "epoch": 4.542290076335878, "grad_norm": 0.005325664426090292, "learning_rate": 2.531038827764898e-07, "loss": 0.0001, "step": 223140 }, { "epoch": 4.542493638676845, "grad_norm": 0.0062771312810167, "learning_rate": 2.5288071829500494e-07, "loss": 0.0013, "step": 223150 }, { "epoch": 4.542697201017812, "grad_norm": 0.0011529204751525694, "learning_rate": 2.526576496877103e-07, "loss": 0.0001, "step": 223160 }, { "epoch": 4.542900763358778, "grad_norm": 0.011276974316292716, "learning_rate": 2.5243467695910797e-07, "loss": 0.0116, "step": 223170 }, { "epoch": 4.543104325699746, "grad_norm": 0.006766786614692039, "learning_rate": 2.522118001137014e-07, "loss": 0.0009, "step": 223180 }, { "epoch": 4.5433078880407125, "grad_norm": 0.02014715032778922, "learning_rate": 2.5198901915599373e-07, "loss": 0.0639, "step": 223190 }, { "epoch": 4.543511450381679, "grad_norm": 0.012602250201182219, "learning_rate": 2.51766334090483e-07, "loss": 0.0055, "step": 223200 }, { "epoch": 4.543715012722647, "grad_norm": 0.8419188396531021, "learning_rate": 2.515437449216662e-07, "loss": 0.0054, "step": 223210 }, { "epoch": 4.543918575063613, "grad_norm": 0.003172520547651397, "learning_rate": 2.513212516540403e-07, "loss": 0.003, "step": 223220 }, { "epoch": 4.54412213740458, "grad_norm": 0.009719245674057709, "learning_rate": 2.5109885429209777e-07, "loss": 0.0001, "step": 223230 }, { "epoch": 4.5443256997455475, "grad_norm": 0.040494314580540855, "learning_rate": 2.508765528403301e-07, "loss": 0.0003, "step": 223240 }, { "epoch": 4.544529262086514, "grad_norm": 0.017056270236390544, "learning_rate": 2.5065434730322745e-07, "loss": 0.0365, "step": 223250 }, { "epoch": 4.544732824427481, "grad_norm": 0.09372918346271297, "learning_rate": 2.5043223768527746e-07, "loss": 0.0061, "step": 223260 }, { "epoch": 4.544936386768448, "grad_norm": 0.002972909570392195, "learning_rate": 2.502102239909654e-07, "loss": 0.0347, "step": 223270 }, { "epoch": 4.545139949109415, "grad_norm": 0.001817857602972493, "learning_rate": 2.49988306224776e-07, "loss": 0.0001, "step": 223280 }, { "epoch": 4.5453435114503815, "grad_norm": 0.027435066357894184, "learning_rate": 2.497664843911907e-07, "loss": 0.0001, "step": 223290 }, { "epoch": 4.545547073791349, "grad_norm": 10.737136554136267, "learning_rate": 2.4954475849468916e-07, "loss": 0.049, "step": 223300 }, { "epoch": 4.545750636132316, "grad_norm": 0.36537718823523624, "learning_rate": 2.493231285397502e-07, "loss": 0.0369, "step": 223310 }, { "epoch": 4.545954198473282, "grad_norm": 0.0036981995232155157, "learning_rate": 2.4910159453085004e-07, "loss": 0.0014, "step": 223320 }, { "epoch": 4.54615776081425, "grad_norm": 0.0051656716845574204, "learning_rate": 2.4888015647246076e-07, "loss": 0.0009, "step": 223330 }, { "epoch": 4.5463613231552165, "grad_norm": 0.006974238134512992, "learning_rate": 2.486588143690566e-07, "loss": 0.0213, "step": 223340 }, { "epoch": 4.546564885496183, "grad_norm": 0.08400225877322927, "learning_rate": 2.4843756822510836e-07, "loss": 0.0443, "step": 223350 }, { "epoch": 4.546768447837151, "grad_norm": 0.0037965410020522, "learning_rate": 2.482164180450819e-07, "loss": 0.0505, "step": 223360 }, { "epoch": 4.546972010178117, "grad_norm": 0.07742236260406576, "learning_rate": 2.4799536383344644e-07, "loss": 0.0005, "step": 223370 }, { "epoch": 4.547175572519084, "grad_norm": 0.0005091626940704368, "learning_rate": 2.477744055946646e-07, "loss": 0.0001, "step": 223380 }, { "epoch": 4.5473791348600505, "grad_norm": 8.570276432096644e-05, "learning_rate": 2.475535433331988e-07, "loss": 0.0302, "step": 223390 }, { "epoch": 4.547582697201018, "grad_norm": 0.0008622052270483259, "learning_rate": 2.4733277705351167e-07, "loss": 0.0221, "step": 223400 }, { "epoch": 4.547786259541985, "grad_norm": 0.0012713990101683667, "learning_rate": 2.4711210676006014e-07, "loss": 0.0001, "step": 223410 }, { "epoch": 4.547989821882951, "grad_norm": 0.0012427021528890162, "learning_rate": 2.468915324573007e-07, "loss": 0.0001, "step": 223420 }, { "epoch": 4.548193384223919, "grad_norm": 0.043394235991215314, "learning_rate": 2.466710541496892e-07, "loss": 0.0463, "step": 223430 }, { "epoch": 4.5483969465648855, "grad_norm": 0.022846067541774293, "learning_rate": 2.4645067184167757e-07, "loss": 0.0001, "step": 223440 }, { "epoch": 4.548600508905852, "grad_norm": 0.013249817342639247, "learning_rate": 2.462303855377174e-07, "loss": 0.007, "step": 223450 }, { "epoch": 4.54880407124682, "grad_norm": 0.008680397408511637, "learning_rate": 2.4601019524225723e-07, "loss": 0.0001, "step": 223460 }, { "epoch": 4.549007633587786, "grad_norm": 0.0002918245460640395, "learning_rate": 2.457901009597441e-07, "loss": 0.0001, "step": 223470 }, { "epoch": 4.549211195928753, "grad_norm": 7.804191899490938, "learning_rate": 2.4557010269462387e-07, "loss": 0.0261, "step": 223480 }, { "epoch": 4.54941475826972, "grad_norm": 0.025780960391290665, "learning_rate": 2.4535020045133864e-07, "loss": 0.0001, "step": 223490 }, { "epoch": 4.549618320610687, "grad_norm": 9.849932057099972, "learning_rate": 2.4513039423433026e-07, "loss": 0.0375, "step": 223500 }, { "epoch": 4.549821882951654, "grad_norm": 0.004434645097697106, "learning_rate": 2.4491068404803755e-07, "loss": 0.0142, "step": 223510 }, { "epoch": 4.550025445292621, "grad_norm": 0.023355618382221728, "learning_rate": 2.446910698968985e-07, "loss": 0.0001, "step": 223520 }, { "epoch": 4.550229007633588, "grad_norm": 0.011149725627949766, "learning_rate": 2.4447155178534796e-07, "loss": 0.0002, "step": 223530 }, { "epoch": 4.5504325699745545, "grad_norm": 0.012314394655488753, "learning_rate": 2.442521297178191e-07, "loss": 0.0262, "step": 223540 }, { "epoch": 4.550636132315522, "grad_norm": 0.017659798694572897, "learning_rate": 2.4403280369874384e-07, "loss": 0.0003, "step": 223550 }, { "epoch": 4.550839694656489, "grad_norm": 0.051889445047010235, "learning_rate": 2.4381357373255253e-07, "loss": 0.0001, "step": 223560 }, { "epoch": 4.551043256997455, "grad_norm": 0.007680298142442639, "learning_rate": 2.435944398236717e-07, "loss": 0.0001, "step": 223570 }, { "epoch": 4.551246819338422, "grad_norm": 0.006068301526904903, "learning_rate": 2.4337540197652665e-07, "loss": 0.0019, "step": 223580 }, { "epoch": 4.551450381679389, "grad_norm": 0.00014169661136798124, "learning_rate": 2.4315646019554273e-07, "loss": 0.0001, "step": 223590 }, { "epoch": 4.551653944020356, "grad_norm": 0.003029182137694702, "learning_rate": 2.429376144851403e-07, "loss": 0.0001, "step": 223600 }, { "epoch": 4.551857506361323, "grad_norm": 0.0003434453481883039, "learning_rate": 2.427188648497403e-07, "loss": 0.0007, "step": 223610 }, { "epoch": 4.55206106870229, "grad_norm": 0.0039722973097021195, "learning_rate": 2.425002112937597e-07, "loss": 0.0236, "step": 223620 }, { "epoch": 4.552264631043257, "grad_norm": 0.0033934851284560382, "learning_rate": 2.4228165382161495e-07, "loss": 0.0003, "step": 223630 }, { "epoch": 4.5524681933842235, "grad_norm": 0.016350916492268202, "learning_rate": 2.4206319243772034e-07, "loss": 0.0231, "step": 223640 }, { "epoch": 4.552671755725191, "grad_norm": 0.00562279722981973, "learning_rate": 2.4184482714648793e-07, "loss": 0.0004, "step": 223650 }, { "epoch": 4.552875318066158, "grad_norm": 0.008297836217243082, "learning_rate": 2.416265579523269e-07, "loss": 0.0003, "step": 223660 }, { "epoch": 4.553078880407124, "grad_norm": 0.1159808635093207, "learning_rate": 2.414083848596471e-07, "loss": 0.0004, "step": 223670 }, { "epoch": 4.553282442748092, "grad_norm": 8.704788907393326, "learning_rate": 2.411903078728534e-07, "loss": 0.0272, "step": 223680 }, { "epoch": 4.553486005089058, "grad_norm": 0.5284814478314408, "learning_rate": 2.4097232699635155e-07, "loss": 0.0004, "step": 223690 }, { "epoch": 4.553689567430025, "grad_norm": 0.0035812018535276604, "learning_rate": 2.4075444223454205e-07, "loss": 0.0001, "step": 223700 }, { "epoch": 4.553893129770993, "grad_norm": 0.026058925054795977, "learning_rate": 2.405366535918269e-07, "loss": 0.0001, "step": 223710 }, { "epoch": 4.554096692111959, "grad_norm": 0.004696640039578148, "learning_rate": 2.4031896107260477e-07, "loss": 0.0005, "step": 223720 }, { "epoch": 4.554300254452926, "grad_norm": 10.35169540243285, "learning_rate": 2.4010136468127057e-07, "loss": 0.0759, "step": 223730 }, { "epoch": 4.554503816793893, "grad_norm": 0.2081340049453625, "learning_rate": 2.3988386442222124e-07, "loss": 0.0004, "step": 223740 }, { "epoch": 4.55470737913486, "grad_norm": 0.008398330144339316, "learning_rate": 2.3966646029984776e-07, "loss": 0.0001, "step": 223750 }, { "epoch": 4.554910941475827, "grad_norm": 0.00028727480823052213, "learning_rate": 2.394491523185405e-07, "loss": 0.0232, "step": 223760 }, { "epoch": 4.555114503816794, "grad_norm": 0.011806973221814144, "learning_rate": 2.392319404826904e-07, "loss": 0.0004, "step": 223770 }, { "epoch": 4.555318066157761, "grad_norm": 0.22371432893514323, "learning_rate": 2.3901482479668224e-07, "loss": 0.0357, "step": 223780 }, { "epoch": 4.555521628498727, "grad_norm": 0.003985888880552241, "learning_rate": 2.387978052649015e-07, "loss": 0.0003, "step": 223790 }, { "epoch": 4.555725190839695, "grad_norm": 0.0013190851819528467, "learning_rate": 2.385808818917329e-07, "loss": 0.0001, "step": 223800 }, { "epoch": 4.555928753180662, "grad_norm": 0.0028158758504925915, "learning_rate": 2.383640546815552e-07, "loss": 0.0128, "step": 223810 }, { "epoch": 4.556132315521628, "grad_norm": 0.012680312928442461, "learning_rate": 2.3814732363874826e-07, "loss": 0.0214, "step": 223820 }, { "epoch": 4.556335877862596, "grad_norm": 0.01901193044052725, "learning_rate": 2.3793068876768965e-07, "loss": 0.0365, "step": 223830 }, { "epoch": 4.556539440203562, "grad_norm": 0.1809686217064262, "learning_rate": 2.3771415007275367e-07, "loss": 0.0008, "step": 223840 }, { "epoch": 4.556743002544529, "grad_norm": 0.004365897550132941, "learning_rate": 2.374977075583146e-07, "loss": 0.0003, "step": 223850 }, { "epoch": 4.5569465648854965, "grad_norm": 0.023436434570424065, "learning_rate": 2.3728136122874334e-07, "loss": 0.0001, "step": 223860 }, { "epoch": 4.557150127226463, "grad_norm": 0.007065261529431918, "learning_rate": 2.370651110884098e-07, "loss": 0.0001, "step": 223870 }, { "epoch": 4.55735368956743, "grad_norm": 0.010623390745782554, "learning_rate": 2.3684895714168043e-07, "loss": 0.0188, "step": 223880 }, { "epoch": 4.557557251908397, "grad_norm": 5.754907753125937, "learning_rate": 2.3663289939292122e-07, "loss": 0.039, "step": 223890 }, { "epoch": 4.557760814249364, "grad_norm": 0.0027277450017557307, "learning_rate": 2.3641693784649643e-07, "loss": 0.0004, "step": 223900 }, { "epoch": 4.557964376590331, "grad_norm": 0.19880382894745444, "learning_rate": 2.3620107250676595e-07, "loss": 0.0012, "step": 223910 }, { "epoch": 4.558167938931298, "grad_norm": 0.023702671635362434, "learning_rate": 2.3598530337809123e-07, "loss": 0.0001, "step": 223920 }, { "epoch": 4.558371501272265, "grad_norm": 0.001843997574334114, "learning_rate": 2.3576963046482993e-07, "loss": 0.0001, "step": 223930 }, { "epoch": 4.558575063613231, "grad_norm": 0.03436287305686569, "learning_rate": 2.3555405377133578e-07, "loss": 0.0337, "step": 223940 }, { "epoch": 4.558778625954199, "grad_norm": 0.007950402374611865, "learning_rate": 2.3533857330196476e-07, "loss": 0.0004, "step": 223950 }, { "epoch": 4.5589821882951655, "grad_norm": 0.07352080660072065, "learning_rate": 2.351231890610689e-07, "loss": 0.0305, "step": 223960 }, { "epoch": 4.559185750636132, "grad_norm": 0.01135567081749498, "learning_rate": 2.3490790105299643e-07, "loss": 0.052, "step": 223970 }, { "epoch": 4.5593893129771, "grad_norm": 0.0007423211866115434, "learning_rate": 2.3469270928209665e-07, "loss": 0.0, "step": 223980 }, { "epoch": 4.559592875318066, "grad_norm": 0.000802082677221345, "learning_rate": 2.3447761375271494e-07, "loss": 0.0001, "step": 223990 }, { "epoch": 4.559796437659033, "grad_norm": 0.001012998144270442, "learning_rate": 2.3426261446919562e-07, "loss": 0.0666, "step": 224000 }, { "epoch": 4.5600000000000005, "grad_norm": 11.805572195436433, "learning_rate": 2.3404771143588134e-07, "loss": 0.005, "step": 224010 }, { "epoch": 4.560203562340967, "grad_norm": 0.5027023325762658, "learning_rate": 2.3383290465711196e-07, "loss": 0.0004, "step": 224020 }, { "epoch": 4.560407124681934, "grad_norm": 0.01142481880256205, "learning_rate": 2.3361819413722565e-07, "loss": 0.0262, "step": 224030 }, { "epoch": 4.5606106870229, "grad_norm": 0.004206901161902751, "learning_rate": 2.3340357988055952e-07, "loss": 0.0, "step": 224040 }, { "epoch": 4.560814249363868, "grad_norm": 1.7400319848781824, "learning_rate": 2.3318906189144674e-07, "loss": 0.1101, "step": 224050 }, { "epoch": 4.561017811704835, "grad_norm": 0.0011877496056918282, "learning_rate": 2.3297464017422112e-07, "loss": 0.0491, "step": 224060 }, { "epoch": 4.561221374045801, "grad_norm": 0.011335229447477915, "learning_rate": 2.3276031473321192e-07, "loss": 0.0122, "step": 224070 }, { "epoch": 4.561424936386769, "grad_norm": 0.08473095578431163, "learning_rate": 2.325460855727485e-07, "loss": 0.0002, "step": 224080 }, { "epoch": 4.561628498727735, "grad_norm": 0.002936892835664827, "learning_rate": 2.3233195269715735e-07, "loss": 0.0, "step": 224090 }, { "epoch": 4.561832061068702, "grad_norm": 0.0018273391505402973, "learning_rate": 2.321179161107634e-07, "loss": 0.0402, "step": 224100 }, { "epoch": 4.5620356234096695, "grad_norm": 0.0045855357596229825, "learning_rate": 2.3190397581788982e-07, "loss": 0.0217, "step": 224110 }, { "epoch": 4.562239185750636, "grad_norm": 0.005655714506344413, "learning_rate": 2.3169013182285538e-07, "loss": 0.0495, "step": 224120 }, { "epoch": 4.562442748091603, "grad_norm": 0.0018721848052386718, "learning_rate": 2.3147638412998053e-07, "loss": 0.0001, "step": 224130 }, { "epoch": 4.56264631043257, "grad_norm": 0.01833492389075704, "learning_rate": 2.3126273274358236e-07, "loss": 0.0095, "step": 224140 }, { "epoch": 4.562849872773537, "grad_norm": 0.01222502565320079, "learning_rate": 2.3104917766797519e-07, "loss": 0.0405, "step": 224150 }, { "epoch": 4.563053435114504, "grad_norm": 0.0011087276511524936, "learning_rate": 2.3083571890747169e-07, "loss": 0.0004, "step": 224160 }, { "epoch": 4.563256997455471, "grad_norm": 4.127815342684481, "learning_rate": 2.3062235646638453e-07, "loss": 0.0418, "step": 224170 }, { "epoch": 4.563460559796438, "grad_norm": 0.01590146114416039, "learning_rate": 2.3040909034902137e-07, "loss": 0.0259, "step": 224180 }, { "epoch": 4.563664122137404, "grad_norm": 0.0010702746946035233, "learning_rate": 2.301959205596893e-07, "loss": 0.0002, "step": 224190 }, { "epoch": 4.563867684478372, "grad_norm": 10.911353315837163, "learning_rate": 2.2998284710269436e-07, "loss": 0.0798, "step": 224200 }, { "epoch": 4.5640712468193385, "grad_norm": 0.009833911803890429, "learning_rate": 2.2976986998233974e-07, "loss": 0.1142, "step": 224210 }, { "epoch": 4.564274809160305, "grad_norm": 0.00810581860982644, "learning_rate": 2.2955698920292647e-07, "loss": 0.0484, "step": 224220 }, { "epoch": 4.564478371501272, "grad_norm": 0.01405387855231192, "learning_rate": 2.2934420476875441e-07, "loss": 0.0912, "step": 224230 }, { "epoch": 4.564681933842239, "grad_norm": 0.002114105848713264, "learning_rate": 2.2913151668412014e-07, "loss": 0.0054, "step": 224240 }, { "epoch": 4.564885496183206, "grad_norm": 0.023206928628442067, "learning_rate": 2.2891892495332023e-07, "loss": 0.007, "step": 224250 }, { "epoch": 4.565089058524173, "grad_norm": 0.020024301509654115, "learning_rate": 2.2870642958064736e-07, "loss": 0.0002, "step": 224260 }, { "epoch": 4.56529262086514, "grad_norm": 0.0007856245319339531, "learning_rate": 2.2849403057039422e-07, "loss": 0.0213, "step": 224270 }, { "epoch": 4.565496183206107, "grad_norm": 0.004387786301252807, "learning_rate": 2.2828172792684843e-07, "loss": 0.0001, "step": 224280 }, { "epoch": 4.565699745547073, "grad_norm": 70.36590787718546, "learning_rate": 2.2806952165429996e-07, "loss": 0.009, "step": 224290 }, { "epoch": 4.565903307888041, "grad_norm": 11.243037734584645, "learning_rate": 2.2785741175703425e-07, "loss": 0.0398, "step": 224300 }, { "epoch": 4.5661068702290075, "grad_norm": 0.008207684081570181, "learning_rate": 2.276453982393334e-07, "loss": 0.0402, "step": 224310 }, { "epoch": 4.566310432569974, "grad_norm": 0.0009512866265830182, "learning_rate": 2.274334811054807e-07, "loss": 0.0, "step": 224320 }, { "epoch": 4.566513994910942, "grad_norm": 0.0004939396407334701, "learning_rate": 2.2722166035975713e-07, "loss": 0.0004, "step": 224330 }, { "epoch": 4.566717557251908, "grad_norm": 0.00572118952000525, "learning_rate": 2.270099360064376e-07, "loss": 0.0006, "step": 224340 }, { "epoch": 4.566921119592875, "grad_norm": 0.018852308671038792, "learning_rate": 2.267983080498015e-07, "loss": 0.0076, "step": 224350 }, { "epoch": 4.5671246819338425, "grad_norm": 0.0032157137381886155, "learning_rate": 2.2658677649412098e-07, "loss": 0.0001, "step": 224360 }, { "epoch": 4.567328244274809, "grad_norm": 0.0015517725442116379, "learning_rate": 2.2637534134366756e-07, "loss": 0.0136, "step": 224370 }, { "epoch": 4.567531806615776, "grad_norm": 0.061117539139918874, "learning_rate": 2.26164002602714e-07, "loss": 0.0709, "step": 224380 }, { "epoch": 4.567735368956743, "grad_norm": 0.0034965942814338237, "learning_rate": 2.2595276027552625e-07, "loss": 0.0297, "step": 224390 }, { "epoch": 4.56793893129771, "grad_norm": 0.0030549570377490943, "learning_rate": 2.257416143663721e-07, "loss": 0.0292, "step": 224400 }, { "epoch": 4.5681424936386765, "grad_norm": 0.0006994367349582117, "learning_rate": 2.2553056487951474e-07, "loss": 0.0001, "step": 224410 }, { "epoch": 4.568346055979644, "grad_norm": 0.2317716735600722, "learning_rate": 2.2531961181921692e-07, "loss": 0.0008, "step": 224420 }, { "epoch": 4.568549618320611, "grad_norm": 0.01241530469561587, "learning_rate": 2.2510875518973908e-07, "loss": 0.0422, "step": 224430 }, { "epoch": 4.568753180661577, "grad_norm": 0.006246146330185013, "learning_rate": 2.248979949953406e-07, "loss": 0.0014, "step": 224440 }, { "epoch": 4.568956743002545, "grad_norm": 2.811970840735221, "learning_rate": 2.2468733124027698e-07, "loss": 0.0055, "step": 224450 }, { "epoch": 4.5691603053435115, "grad_norm": 0.00495411917532805, "learning_rate": 2.2447676392880367e-07, "loss": 0.0001, "step": 224460 }, { "epoch": 4.569363867684478, "grad_norm": 0.0006804188814384835, "learning_rate": 2.2426629306517234e-07, "loss": 0.0063, "step": 224470 }, { "epoch": 4.569567430025446, "grad_norm": 0.01079048929253636, "learning_rate": 2.2405591865363563e-07, "loss": 0.037, "step": 224480 }, { "epoch": 4.569770992366412, "grad_norm": 0.02792392817362337, "learning_rate": 2.2384564069843906e-07, "loss": 0.0199, "step": 224490 }, { "epoch": 4.569974554707379, "grad_norm": 0.3627692286021423, "learning_rate": 2.2363545920383255e-07, "loss": 0.0118, "step": 224500 }, { "epoch": 4.570178117048346, "grad_norm": 0.004757544384649358, "learning_rate": 2.2342537417406052e-07, "loss": 0.0001, "step": 224510 }, { "epoch": 4.570381679389313, "grad_norm": 0.01592666454006348, "learning_rate": 2.2321538561336398e-07, "loss": 0.0001, "step": 224520 }, { "epoch": 4.57058524173028, "grad_norm": 0.007862626473897362, "learning_rate": 2.230054935259851e-07, "loss": 0.0, "step": 224530 }, { "epoch": 4.570788804071247, "grad_norm": 0.031570571026438746, "learning_rate": 2.227956979161644e-07, "loss": 0.0028, "step": 224540 }, { "epoch": 4.570992366412214, "grad_norm": 0.010261297625196987, "learning_rate": 2.2258599878813626e-07, "loss": 0.0005, "step": 224550 }, { "epoch": 4.5711959287531805, "grad_norm": 0.018481750457081926, "learning_rate": 2.223763961461378e-07, "loss": 0.0001, "step": 224560 }, { "epoch": 4.571399491094148, "grad_norm": 0.0056052942360641355, "learning_rate": 2.2216688999440128e-07, "loss": 0.0072, "step": 224570 }, { "epoch": 4.571603053435115, "grad_norm": 0.0008043235022251881, "learning_rate": 2.2195748033715824e-07, "loss": 0.0365, "step": 224580 }, { "epoch": 4.571806615776081, "grad_norm": 0.04031321085483874, "learning_rate": 2.2174816717863812e-07, "loss": 0.0022, "step": 224590 }, { "epoch": 4.572010178117049, "grad_norm": 0.028026716753388733, "learning_rate": 2.215389505230675e-07, "loss": 0.0075, "step": 224600 }, { "epoch": 4.572213740458015, "grad_norm": 0.29468552644587936, "learning_rate": 2.2132983037467304e-07, "loss": 0.0003, "step": 224610 }, { "epoch": 4.572417302798982, "grad_norm": 0.052713891475062113, "learning_rate": 2.2112080673767744e-07, "loss": 0.0702, "step": 224620 }, { "epoch": 4.57262086513995, "grad_norm": 0.014149011386851805, "learning_rate": 2.2091187961630233e-07, "loss": 0.0376, "step": 224630 }, { "epoch": 4.572824427480916, "grad_norm": 0.029883618506685333, "learning_rate": 2.2070304901476713e-07, "loss": 0.0206, "step": 224640 }, { "epoch": 4.573027989821883, "grad_norm": 0.001856973870476325, "learning_rate": 2.2049431493728957e-07, "loss": 0.0728, "step": 224650 }, { "epoch": 4.5732315521628495, "grad_norm": 0.01684018121971676, "learning_rate": 2.2028567738808515e-07, "loss": 0.0001, "step": 224660 }, { "epoch": 4.573435114503817, "grad_norm": 0.0011412953609683716, "learning_rate": 2.200771363713683e-07, "loss": 0.0234, "step": 224670 }, { "epoch": 4.573638676844784, "grad_norm": 0.012141309937212656, "learning_rate": 2.1986869189134896e-07, "loss": 0.0004, "step": 224680 }, { "epoch": 4.57384223918575, "grad_norm": 0.005889523406013672, "learning_rate": 2.1966034395223878e-07, "loss": 0.0001, "step": 224690 }, { "epoch": 4.574045801526718, "grad_norm": 0.016898864962295156, "learning_rate": 2.1945209255824552e-07, "loss": 0.0645, "step": 224700 }, { "epoch": 4.574249363867684, "grad_norm": 0.0491337674369488, "learning_rate": 2.1924393771357355e-07, "loss": 0.0003, "step": 224710 }, { "epoch": 4.574452926208651, "grad_norm": 0.008639361312956433, "learning_rate": 2.1903587942242898e-07, "loss": 0.0491, "step": 224720 }, { "epoch": 4.574656488549619, "grad_norm": 0.0008114367855794315, "learning_rate": 2.188279176890118e-07, "loss": 0.0637, "step": 224730 }, { "epoch": 4.574860050890585, "grad_norm": 0.0069331517032495535, "learning_rate": 2.186200525175225e-07, "loss": 0.0035, "step": 224740 }, { "epoch": 4.575063613231552, "grad_norm": 0.025075948851727856, "learning_rate": 2.1841228391216108e-07, "loss": 0.0123, "step": 224750 }, { "epoch": 4.575267175572519, "grad_norm": 0.006636663269327426, "learning_rate": 2.182046118771214e-07, "loss": 0.0039, "step": 224760 }, { "epoch": 4.575470737913486, "grad_norm": 7.84245826401571, "learning_rate": 2.1799703641659843e-07, "loss": 0.1033, "step": 224770 }, { "epoch": 4.575674300254453, "grad_norm": 0.004125882216701416, "learning_rate": 2.1778955753478492e-07, "loss": 0.0005, "step": 224780 }, { "epoch": 4.57587786259542, "grad_norm": 0.0005897362788404349, "learning_rate": 2.1758217523587032e-07, "loss": 0.0076, "step": 224790 }, { "epoch": 4.576081424936387, "grad_norm": 0.000832963835140341, "learning_rate": 2.173748895240435e-07, "loss": 0.0214, "step": 224800 }, { "epoch": 4.5762849872773534, "grad_norm": 0.0030372205384233595, "learning_rate": 2.1716770040349055e-07, "loss": 0.0265, "step": 224810 }, { "epoch": 4.576488549618321, "grad_norm": 0.0005610404685410909, "learning_rate": 2.1696060787839644e-07, "loss": 0.0299, "step": 224820 }, { "epoch": 4.576692111959288, "grad_norm": 0.024734458352431443, "learning_rate": 2.1675361195294288e-07, "loss": 0.0001, "step": 224830 }, { "epoch": 4.576895674300254, "grad_norm": 0.008134372863984838, "learning_rate": 2.1654671263131144e-07, "loss": 0.0649, "step": 224840 }, { "epoch": 4.577099236641222, "grad_norm": 0.030825237808145972, "learning_rate": 2.163399099176805e-07, "loss": 0.0431, "step": 224850 }, { "epoch": 4.577302798982188, "grad_norm": 0.0003442830547133594, "learning_rate": 2.1613320381622505e-07, "loss": 0.0026, "step": 224860 }, { "epoch": 4.577506361323155, "grad_norm": 0.11635921096700338, "learning_rate": 2.1592659433112229e-07, "loss": 0.0017, "step": 224870 }, { "epoch": 4.577709923664122, "grad_norm": 0.001509266206564043, "learning_rate": 2.1572008146654387e-07, "loss": 0.0344, "step": 224880 }, { "epoch": 4.577913486005089, "grad_norm": 0.014445110062490866, "learning_rate": 2.1551366522665928e-07, "loss": 0.0043, "step": 224890 }, { "epoch": 4.578117048346056, "grad_norm": 0.004435754093665864, "learning_rate": 2.153073456156396e-07, "loss": 0.0024, "step": 224900 }, { "epoch": 4.5783206106870225, "grad_norm": 0.03584814705111776, "learning_rate": 2.1510112263765094e-07, "loss": 0.0002, "step": 224910 }, { "epoch": 4.57852417302799, "grad_norm": 0.08222922931008135, "learning_rate": 2.1489499629685718e-07, "loss": 0.0009, "step": 224920 }, { "epoch": 4.578727735368957, "grad_norm": 0.013738488259682305, "learning_rate": 2.1468896659742333e-07, "loss": 0.0004, "step": 224930 }, { "epoch": 4.578931297709923, "grad_norm": 0.002712665953818117, "learning_rate": 2.144830335435083e-07, "loss": 0.0206, "step": 224940 }, { "epoch": 4.579134860050891, "grad_norm": 0.00015017724577082652, "learning_rate": 2.1427719713927208e-07, "loss": 0.0008, "step": 224950 }, { "epoch": 4.579338422391857, "grad_norm": 0.003569335499613397, "learning_rate": 2.1407145738887193e-07, "loss": 0.0008, "step": 224960 }, { "epoch": 4.579541984732824, "grad_norm": 0.009339764363742814, "learning_rate": 2.1386581429646336e-07, "loss": 0.0528, "step": 224970 }, { "epoch": 4.579745547073792, "grad_norm": 0.0009075960644226008, "learning_rate": 2.1366026786619864e-07, "loss": 0.0456, "step": 224980 }, { "epoch": 4.579949109414758, "grad_norm": 0.034415300521825606, "learning_rate": 2.1345481810222947e-07, "loss": 0.0307, "step": 224990 }, { "epoch": 4.580152671755725, "grad_norm": 0.004233751052701868, "learning_rate": 2.132494650087058e-07, "loss": 0.0284, "step": 225000 }, { "epoch": 4.580356234096692, "grad_norm": 0.0331160678896061, "learning_rate": 2.1304420858977438e-07, "loss": 0.0027, "step": 225010 }, { "epoch": 4.580559796437659, "grad_norm": 14.99160608191753, "learning_rate": 2.1283904884958017e-07, "loss": 0.0411, "step": 225020 }, { "epoch": 4.580763358778626, "grad_norm": 0.003515054434439423, "learning_rate": 2.1263398579226768e-07, "loss": 0.0244, "step": 225030 }, { "epoch": 4.580966921119593, "grad_norm": 0.009177407134133082, "learning_rate": 2.1242901942197802e-07, "loss": 0.0069, "step": 225040 }, { "epoch": 4.58117048346056, "grad_norm": 0.14773783919441488, "learning_rate": 2.122241497428501e-07, "loss": 0.0009, "step": 225050 }, { "epoch": 4.581374045801526, "grad_norm": 0.032102130576210704, "learning_rate": 2.120193767590234e-07, "loss": 0.0003, "step": 225060 }, { "epoch": 4.581577608142494, "grad_norm": 0.0054846479197598885, "learning_rate": 2.118147004746307e-07, "loss": 0.0003, "step": 225070 }, { "epoch": 4.581781170483461, "grad_norm": 0.002727693111217392, "learning_rate": 2.116101208938076e-07, "loss": 0.009, "step": 225080 }, { "epoch": 4.581984732824427, "grad_norm": 0.002638449649794989, "learning_rate": 2.1140563802068636e-07, "loss": 0.0025, "step": 225090 }, { "epoch": 4.582188295165395, "grad_norm": 0.011047155159571971, "learning_rate": 2.1120125185939534e-07, "loss": 0.0306, "step": 225100 }, { "epoch": 4.582391857506361, "grad_norm": 0.005606273912949352, "learning_rate": 2.109969624140623e-07, "loss": 0.0002, "step": 225110 }, { "epoch": 4.582595419847328, "grad_norm": 0.0217619966913668, "learning_rate": 2.1079276968881512e-07, "loss": 0.0001, "step": 225120 }, { "epoch": 4.5827989821882955, "grad_norm": 10.307351581590993, "learning_rate": 2.1058867368777602e-07, "loss": 0.0493, "step": 225130 }, { "epoch": 4.583002544529262, "grad_norm": 0.0008018940470489514, "learning_rate": 2.103846744150667e-07, "loss": 0.0001, "step": 225140 }, { "epoch": 4.583206106870229, "grad_norm": 0.0181064123718952, "learning_rate": 2.1018077187480833e-07, "loss": 0.0121, "step": 225150 }, { "epoch": 4.583409669211196, "grad_norm": 0.03770651013523377, "learning_rate": 2.0997696607111872e-07, "loss": 0.0744, "step": 225160 }, { "epoch": 4.583613231552163, "grad_norm": 10.329844978811877, "learning_rate": 2.0977325700811345e-07, "loss": 0.0215, "step": 225170 }, { "epoch": 4.58381679389313, "grad_norm": 0.0030151399851234984, "learning_rate": 2.0956964468990703e-07, "loss": 0.0, "step": 225180 }, { "epoch": 4.584020356234097, "grad_norm": 0.21352043139224333, "learning_rate": 2.093661291206117e-07, "loss": 0.0004, "step": 225190 }, { "epoch": 4.584223918575064, "grad_norm": 0.0059911436217066905, "learning_rate": 2.0916271030433755e-07, "loss": 0.045, "step": 225200 }, { "epoch": 4.58442748091603, "grad_norm": 0.004643832799984225, "learning_rate": 2.089593882451929e-07, "loss": 0.0007, "step": 225210 }, { "epoch": 4.584631043256998, "grad_norm": 0.007921189925995796, "learning_rate": 2.0875616294728507e-07, "loss": 0.036, "step": 225220 }, { "epoch": 4.5848346055979645, "grad_norm": 0.06799751245618477, "learning_rate": 2.0855303441471685e-07, "loss": 0.0001, "step": 225230 }, { "epoch": 4.585038167938931, "grad_norm": 0.03327614032005187, "learning_rate": 2.0835000265159112e-07, "loss": 0.0556, "step": 225240 }, { "epoch": 4.585241730279899, "grad_norm": 0.005183332917265364, "learning_rate": 2.0814706766201008e-07, "loss": 0.0367, "step": 225250 }, { "epoch": 4.585445292620865, "grad_norm": 0.0024782365504254045, "learning_rate": 2.0794422945006943e-07, "loss": 0.0003, "step": 225260 }, { "epoch": 4.585648854961832, "grad_norm": 0.003705021295361357, "learning_rate": 2.0774148801986748e-07, "loss": 0.0003, "step": 225270 }, { "epoch": 4.5858524173027995, "grad_norm": 0.006550961302629968, "learning_rate": 2.0753884337549934e-07, "loss": 0.0377, "step": 225280 }, { "epoch": 4.586055979643766, "grad_norm": 0.0065926494070934205, "learning_rate": 2.073362955210556e-07, "loss": 0.0001, "step": 225290 }, { "epoch": 4.586259541984733, "grad_norm": 0.016062685431682766, "learning_rate": 2.0713384446062967e-07, "loss": 0.0001, "step": 225300 }, { "epoch": 4.586463104325699, "grad_norm": 0.6116086063902983, "learning_rate": 2.0693149019830827e-07, "loss": 0.0019, "step": 225310 }, { "epoch": 4.586666666666667, "grad_norm": 0.01154562451207959, "learning_rate": 2.0672923273817814e-07, "loss": 0.0002, "step": 225320 }, { "epoch": 4.5868702290076335, "grad_norm": 0.009715739319291964, "learning_rate": 2.0652707208432655e-07, "loss": 0.0557, "step": 225330 }, { "epoch": 4.5870737913486, "grad_norm": 0.002152674575134839, "learning_rate": 2.0632500824083356e-07, "loss": 0.0015, "step": 225340 }, { "epoch": 4.587277353689568, "grad_norm": 0.01171703182600209, "learning_rate": 2.0612304121178094e-07, "loss": 0.0372, "step": 225350 }, { "epoch": 4.587480916030534, "grad_norm": 0.010922574848007023, "learning_rate": 2.0592117100124876e-07, "loss": 0.0167, "step": 225360 }, { "epoch": 4.587684478371501, "grad_norm": 7.720446627240106e-05, "learning_rate": 2.0571939761331316e-07, "loss": 0.008, "step": 225370 }, { "epoch": 4.5878880407124685, "grad_norm": 1.2957061966099155, "learning_rate": 2.0551772105204871e-07, "loss": 0.0004, "step": 225380 }, { "epoch": 4.588091603053435, "grad_norm": 0.0030634596021040265, "learning_rate": 2.0531614132152933e-07, "loss": 0.0545, "step": 225390 }, { "epoch": 4.588295165394402, "grad_norm": 0.13435602790356438, "learning_rate": 2.0511465842582622e-07, "loss": 0.0055, "step": 225400 }, { "epoch": 4.588498727735369, "grad_norm": 0.0023920013924696617, "learning_rate": 2.0491327236900838e-07, "loss": 0.0001, "step": 225410 }, { "epoch": 4.588702290076336, "grad_norm": 0.0935304292012701, "learning_rate": 2.0471198315514306e-07, "loss": 0.0001, "step": 225420 }, { "epoch": 4.5889058524173025, "grad_norm": 0.03196137126745266, "learning_rate": 2.0451079078829595e-07, "loss": 0.0113, "step": 225430 }, { "epoch": 4.58910941475827, "grad_norm": 0.016016599338974723, "learning_rate": 2.0430969527252876e-07, "loss": 0.0014, "step": 225440 }, { "epoch": 4.589312977099237, "grad_norm": 0.03603318745211115, "learning_rate": 2.0410869661190436e-07, "loss": 0.0037, "step": 225450 }, { "epoch": 4.589516539440203, "grad_norm": 0.034327278260655916, "learning_rate": 2.0390779481048284e-07, "loss": 0.001, "step": 225460 }, { "epoch": 4.589720101781171, "grad_norm": 0.0024609911922373736, "learning_rate": 2.0370698987231984e-07, "loss": 0.0003, "step": 225470 }, { "epoch": 4.5899236641221375, "grad_norm": 0.0016749662738914373, "learning_rate": 2.0350628180147158e-07, "loss": 0.0018, "step": 225480 }, { "epoch": 4.590127226463104, "grad_norm": 0.013011508816005912, "learning_rate": 2.0330567060199312e-07, "loss": 0.002, "step": 225490 }, { "epoch": 4.590330788804071, "grad_norm": 0.022151106304439564, "learning_rate": 2.0310515627793404e-07, "loss": 0.0001, "step": 225500 }, { "epoch": 4.590534351145038, "grad_norm": 0.006558414138726095, "learning_rate": 2.0290473883334437e-07, "loss": 0.0003, "step": 225510 }, { "epoch": 4.590737913486005, "grad_norm": 0.0018963740369106082, "learning_rate": 2.0270441827227206e-07, "loss": 0.0004, "step": 225520 }, { "epoch": 4.5909414758269715, "grad_norm": 0.00724615987928759, "learning_rate": 2.0250419459876324e-07, "loss": 0.018, "step": 225530 }, { "epoch": 4.591145038167939, "grad_norm": 0.0010368126416690484, "learning_rate": 2.023040678168614e-07, "loss": 0.0001, "step": 225540 }, { "epoch": 4.591348600508906, "grad_norm": 0.005274268854932314, "learning_rate": 2.0210403793060773e-07, "loss": 0.0001, "step": 225550 }, { "epoch": 4.591552162849872, "grad_norm": 0.0049302726749257675, "learning_rate": 2.019041049440429e-07, "loss": 0.04, "step": 225560 }, { "epoch": 4.59175572519084, "grad_norm": 0.008871834867136379, "learning_rate": 2.0170426886120475e-07, "loss": 0.0078, "step": 225570 }, { "epoch": 4.5919592875318065, "grad_norm": 0.0028053133382325165, "learning_rate": 2.01504529686129e-07, "loss": 0.047, "step": 225580 }, { "epoch": 4.592162849872773, "grad_norm": 0.011008973733476542, "learning_rate": 2.013048874228496e-07, "loss": 0.0004, "step": 225590 }, { "epoch": 4.592366412213741, "grad_norm": 0.019701532729471012, "learning_rate": 2.011053420753989e-07, "loss": 0.0002, "step": 225600 }, { "epoch": 4.592569974554707, "grad_norm": 0.009720383503410037, "learning_rate": 2.0090589364780643e-07, "loss": 0.0216, "step": 225610 }, { "epoch": 4.592773536895674, "grad_norm": 0.038504829223542036, "learning_rate": 2.007065421441007e-07, "loss": 0.0071, "step": 225620 }, { "epoch": 4.592977099236641, "grad_norm": 0.0805301372919357, "learning_rate": 2.0050728756830783e-07, "loss": 0.0001, "step": 225630 }, { "epoch": 4.593180661577608, "grad_norm": 0.000661480967554686, "learning_rate": 2.003081299244525e-07, "loss": 0.0256, "step": 225640 }, { "epoch": 4.593384223918575, "grad_norm": 0.0065788347466104674, "learning_rate": 2.0010906921655638e-07, "loss": 0.0001, "step": 225650 }, { "epoch": 4.593587786259542, "grad_norm": 0.0019073853316412858, "learning_rate": 1.9991010544863852e-07, "loss": 0.0439, "step": 225660 }, { "epoch": 4.593791348600509, "grad_norm": 0.000453700016904138, "learning_rate": 1.997112386247202e-07, "loss": 0.0245, "step": 225670 }, { "epoch": 4.5939949109414755, "grad_norm": 0.0029711960051463852, "learning_rate": 1.9951246874881534e-07, "loss": 0.0009, "step": 225680 }, { "epoch": 4.594198473282443, "grad_norm": 2.2599289270530196, "learning_rate": 1.993137958249386e-07, "loss": 0.0051, "step": 225690 }, { "epoch": 4.59440203562341, "grad_norm": 0.01399357727839632, "learning_rate": 1.9911521985710448e-07, "loss": 0.0002, "step": 225700 }, { "epoch": 4.594605597964376, "grad_norm": 0.013793718744211547, "learning_rate": 1.9891674084932088e-07, "loss": 0.0311, "step": 225710 }, { "epoch": 4.594809160305344, "grad_norm": 13.995428610442042, "learning_rate": 1.9871835880559741e-07, "loss": 0.0537, "step": 225720 }, { "epoch": 4.59501272264631, "grad_norm": 0.010432440784100954, "learning_rate": 1.9852007372994142e-07, "loss": 0.0002, "step": 225730 }, { "epoch": 4.595216284987277, "grad_norm": 0.0019233115001804936, "learning_rate": 1.9832188562635635e-07, "loss": 0.0003, "step": 225740 }, { "epoch": 4.595419847328245, "grad_norm": 0.27076660972525063, "learning_rate": 1.981237944988451e-07, "loss": 0.0002, "step": 225750 }, { "epoch": 4.595623409669211, "grad_norm": 5.807438978576142, "learning_rate": 1.9792580035140897e-07, "loss": 0.0259, "step": 225760 }, { "epoch": 4.595826972010178, "grad_norm": 0.00022866135111048916, "learning_rate": 1.977279031880458e-07, "loss": 0.0568, "step": 225770 }, { "epoch": 4.596030534351145, "grad_norm": 0.08740351904286633, "learning_rate": 1.9753010301275354e-07, "loss": 0.0282, "step": 225780 }, { "epoch": 4.596234096692112, "grad_norm": 0.06459979074948605, "learning_rate": 1.9733239982952623e-07, "loss": 0.0005, "step": 225790 }, { "epoch": 4.596437659033079, "grad_norm": 0.2456519193913148, "learning_rate": 1.9713479364235732e-07, "loss": 0.0005, "step": 225800 }, { "epoch": 4.596641221374046, "grad_norm": 0.008625145220787996, "learning_rate": 1.9693728445523586e-07, "loss": 0.0564, "step": 225810 }, { "epoch": 4.596844783715013, "grad_norm": 6.545418235203884, "learning_rate": 1.9673987227215307e-07, "loss": 0.0175, "step": 225820 }, { "epoch": 4.5970483460559795, "grad_norm": 0.03730408958475511, "learning_rate": 1.965425570970958e-07, "loss": 0.0003, "step": 225830 }, { "epoch": 4.597251908396947, "grad_norm": 21.899223656279723, "learning_rate": 1.9634533893404696e-07, "loss": 0.0132, "step": 225840 }, { "epoch": 4.597455470737914, "grad_norm": 0.017199526290047392, "learning_rate": 1.9614821778699222e-07, "loss": 0.0051, "step": 225850 }, { "epoch": 4.59765903307888, "grad_norm": 0.04236180924005938, "learning_rate": 1.9595119365991177e-07, "loss": 0.0156, "step": 225860 }, { "epoch": 4.597862595419848, "grad_norm": 3.174481627551365e-05, "learning_rate": 1.9575426655678298e-07, "loss": 0.0003, "step": 225870 }, { "epoch": 4.598066157760814, "grad_norm": 0.5982363682782655, "learning_rate": 1.9555743648158654e-07, "loss": 0.0003, "step": 225880 }, { "epoch": 4.598269720101781, "grad_norm": 0.04955855039691221, "learning_rate": 1.9536070343829482e-07, "loss": 0.0005, "step": 225890 }, { "epoch": 4.5984732824427486, "grad_norm": 0.0007139711891840814, "learning_rate": 1.9516406743088134e-07, "loss": 0.0112, "step": 225900 }, { "epoch": 4.598676844783715, "grad_norm": 0.0026837105692974687, "learning_rate": 1.94967528463319e-07, "loss": 0.0, "step": 225910 }, { "epoch": 4.598880407124682, "grad_norm": 0.005815853169954736, "learning_rate": 1.9477108653957633e-07, "loss": 0.0004, "step": 225920 }, { "epoch": 4.599083969465649, "grad_norm": 0.021904500094977623, "learning_rate": 1.945747416636201e-07, "loss": 0.0302, "step": 225930 }, { "epoch": 4.599287531806616, "grad_norm": 0.006075095790786175, "learning_rate": 1.9437849383941665e-07, "loss": 0.0001, "step": 225940 }, { "epoch": 4.599491094147583, "grad_norm": 0.001241503302480209, "learning_rate": 1.9418234307092886e-07, "loss": 0.0588, "step": 225950 }, { "epoch": 4.599694656488549, "grad_norm": 0.006200135741977991, "learning_rate": 1.9398628936211862e-07, "loss": 0.0117, "step": 225960 }, { "epoch": 4.599898218829517, "grad_norm": 0.01663470391317485, "learning_rate": 1.9379033271694547e-07, "loss": 0.0049, "step": 225970 }, { "epoch": 4.600101781170483, "grad_norm": 0.0428080116759352, "learning_rate": 1.935944731393663e-07, "loss": 0.0001, "step": 225980 }, { "epoch": 4.60030534351145, "grad_norm": 0.006888595179058723, "learning_rate": 1.9339871063333792e-07, "loss": 0.0407, "step": 225990 }, { "epoch": 4.600508905852418, "grad_norm": 0.23584211612762643, "learning_rate": 1.932030452028133e-07, "loss": 0.0006, "step": 226000 }, { "epoch": 4.600712468193384, "grad_norm": 0.0021396356204625524, "learning_rate": 1.9300747685174426e-07, "loss": 0.0587, "step": 226010 }, { "epoch": 4.600916030534351, "grad_norm": 0.024529545949132415, "learning_rate": 1.9281200558408098e-07, "loss": 0.0004, "step": 226020 }, { "epoch": 4.601119592875318, "grad_norm": 0.0031077087590568, "learning_rate": 1.926166314037703e-07, "loss": 0.0177, "step": 226030 }, { "epoch": 4.601323155216285, "grad_norm": 0.00895482338913349, "learning_rate": 1.924213543147596e-07, "loss": 0.0002, "step": 226040 }, { "epoch": 4.601526717557252, "grad_norm": 0.013432250886240022, "learning_rate": 1.9222617432099133e-07, "loss": 0.0001, "step": 226050 }, { "epoch": 4.601730279898219, "grad_norm": 0.0027445987516905206, "learning_rate": 1.9203109142640676e-07, "loss": 0.0024, "step": 226060 }, { "epoch": 4.601933842239186, "grad_norm": 0.008676144632289681, "learning_rate": 1.9183610563494827e-07, "loss": 0.0398, "step": 226070 }, { "epoch": 4.602137404580152, "grad_norm": 0.24833495209584378, "learning_rate": 1.916412169505516e-07, "loss": 0.0004, "step": 226080 }, { "epoch": 4.60234096692112, "grad_norm": 9.19300074640294, "learning_rate": 1.9144642537715363e-07, "loss": 0.0588, "step": 226090 }, { "epoch": 4.602544529262087, "grad_norm": 26.704129431970113, "learning_rate": 1.9125173091868955e-07, "loss": 0.0023, "step": 226100 }, { "epoch": 4.602748091603053, "grad_norm": 0.007273980171086956, "learning_rate": 1.910571335790895e-07, "loss": 0.0002, "step": 226110 }, { "epoch": 4.602951653944021, "grad_norm": 0.004186550996612841, "learning_rate": 1.9086263336228483e-07, "loss": 0.0002, "step": 226120 }, { "epoch": 4.603155216284987, "grad_norm": 0.048229968854845616, "learning_rate": 1.9066823027220294e-07, "loss": 0.0168, "step": 226130 }, { "epoch": 4.603358778625954, "grad_norm": 0.010382551409647505, "learning_rate": 1.9047392431277068e-07, "loss": 0.0, "step": 226140 }, { "epoch": 4.603562340966921, "grad_norm": 0.011373372490562307, "learning_rate": 1.902797154879127e-07, "loss": 0.0053, "step": 226150 }, { "epoch": 4.603765903307888, "grad_norm": 0.0028845031117114834, "learning_rate": 1.9008560380155028e-07, "loss": 0.0665, "step": 226160 }, { "epoch": 4.603969465648855, "grad_norm": 0.0002885440772174207, "learning_rate": 1.898915892576042e-07, "loss": 0.0026, "step": 226170 }, { "epoch": 4.604173027989821, "grad_norm": 0.0020715881759516084, "learning_rate": 1.8969767185999298e-07, "loss": 0.0005, "step": 226180 }, { "epoch": 4.604376590330789, "grad_norm": 0.10470690209017207, "learning_rate": 1.895038516126324e-07, "loss": 0.0299, "step": 226190 }, { "epoch": 4.604580152671756, "grad_norm": 0.012346829818017822, "learning_rate": 1.8931012851943875e-07, "loss": 0.042, "step": 226200 }, { "epoch": 4.604783715012722, "grad_norm": 18.57298220769331, "learning_rate": 1.891165025843217e-07, "loss": 0.0139, "step": 226210 }, { "epoch": 4.60498727735369, "grad_norm": 0.000563163336078353, "learning_rate": 1.8892297381119362e-07, "loss": 0.0197, "step": 226220 }, { "epoch": 4.605190839694656, "grad_norm": 0.004253297081108949, "learning_rate": 1.8872954220396366e-07, "loss": 0.0001, "step": 226230 }, { "epoch": 4.605394402035623, "grad_norm": 0.0003488960353078959, "learning_rate": 1.885362077665359e-07, "loss": 0.0001, "step": 226240 }, { "epoch": 4.6055979643765905, "grad_norm": 0.06284207025844525, "learning_rate": 1.883429705028178e-07, "loss": 0.0006, "step": 226250 }, { "epoch": 4.605801526717557, "grad_norm": 0.0038868064947191324, "learning_rate": 1.8814983041671064e-07, "loss": 0.0013, "step": 226260 }, { "epoch": 4.606005089058524, "grad_norm": 0.43924281101281737, "learning_rate": 1.879567875121141e-07, "loss": 0.0007, "step": 226270 }, { "epoch": 4.606208651399491, "grad_norm": 0.010306204557718437, "learning_rate": 1.877638417929295e-07, "loss": 0.0177, "step": 226280 }, { "epoch": 4.606412213740458, "grad_norm": 0.002452719515021552, "learning_rate": 1.8757099326305152e-07, "loss": 0.0181, "step": 226290 }, { "epoch": 4.606615776081425, "grad_norm": 0.0058507514348955575, "learning_rate": 1.8737824192637532e-07, "loss": 0.0, "step": 226300 }, { "epoch": 4.606819338422392, "grad_norm": 0.011582195985546773, "learning_rate": 1.8718558778679562e-07, "loss": 0.0354, "step": 226310 }, { "epoch": 4.607022900763359, "grad_norm": 0.5406761529146978, "learning_rate": 1.869930308482004e-07, "loss": 0.0478, "step": 226320 }, { "epoch": 4.607226463104325, "grad_norm": 0.0033311476750750037, "learning_rate": 1.8680057111448101e-07, "loss": 0.0007, "step": 226330 }, { "epoch": 4.607430025445293, "grad_norm": 0.1183137217116189, "learning_rate": 1.8660820858952266e-07, "loss": 0.0048, "step": 226340 }, { "epoch": 4.6076335877862595, "grad_norm": 0.07201160486536262, "learning_rate": 1.8641594327721167e-07, "loss": 0.0002, "step": 226350 }, { "epoch": 4.607837150127226, "grad_norm": 4.589130409801177, "learning_rate": 1.8622377518142998e-07, "loss": 0.0357, "step": 226360 }, { "epoch": 4.608040712468194, "grad_norm": 0.022973137606582213, "learning_rate": 1.8603170430605944e-07, "loss": 0.0001, "step": 226370 }, { "epoch": 4.60824427480916, "grad_norm": 0.19213865073180056, "learning_rate": 1.8583973065497919e-07, "loss": 0.0154, "step": 226380 }, { "epoch": 4.608447837150127, "grad_norm": 0.10984048776312219, "learning_rate": 1.856478542320661e-07, "loss": 0.0581, "step": 226390 }, { "epoch": 4.6086513994910945, "grad_norm": 0.007072808389438589, "learning_rate": 1.8545607504119544e-07, "loss": 0.03, "step": 226400 }, { "epoch": 4.608854961832061, "grad_norm": 0.0007598077947926107, "learning_rate": 1.8526439308624133e-07, "loss": 0.0001, "step": 226410 }, { "epoch": 4.609058524173028, "grad_norm": 0.008017003477864725, "learning_rate": 1.8507280837107232e-07, "loss": 0.0193, "step": 226420 }, { "epoch": 4.609262086513995, "grad_norm": 9.616971832827307, "learning_rate": 1.8488132089956034e-07, "loss": 0.0813, "step": 226430 }, { "epoch": 4.609465648854962, "grad_norm": 0.047551755025553755, "learning_rate": 1.8468993067557284e-07, "loss": 0.0338, "step": 226440 }, { "epoch": 4.6096692111959285, "grad_norm": 0.0019251771467856777, "learning_rate": 1.8449863770297284e-07, "loss": 0.0001, "step": 226450 }, { "epoch": 4.609872773536896, "grad_norm": 0.05058668443080213, "learning_rate": 1.8430744198562611e-07, "loss": 0.0705, "step": 226460 }, { "epoch": 4.610076335877863, "grad_norm": 0.0015547673469206618, "learning_rate": 1.8411634352739295e-07, "loss": 0.0005, "step": 226470 }, { "epoch": 4.610279898218829, "grad_norm": 6.389190131987473, "learning_rate": 1.83925342332133e-07, "loss": 0.0205, "step": 226480 }, { "epoch": 4.610483460559797, "grad_norm": 0.0036125099527679317, "learning_rate": 1.8373443840370376e-07, "loss": 0.0358, "step": 226490 }, { "epoch": 4.6106870229007635, "grad_norm": 0.0028371719055771847, "learning_rate": 1.8354363174596045e-07, "loss": 0.0452, "step": 226500 }, { "epoch": 4.61089058524173, "grad_norm": 0.0007533095374698546, "learning_rate": 1.8335292236275726e-07, "loss": 0.0128, "step": 226510 }, { "epoch": 4.611094147582698, "grad_norm": 0.02205007684464529, "learning_rate": 1.8316231025794607e-07, "loss": 0.0229, "step": 226520 }, { "epoch": 4.611297709923664, "grad_norm": 0.17621249229758668, "learning_rate": 1.8297179543537547e-07, "loss": 0.0073, "step": 226530 }, { "epoch": 4.611501272264631, "grad_norm": 0.0023263903001311498, "learning_rate": 1.8278137789889404e-07, "loss": 0.0002, "step": 226540 }, { "epoch": 4.611704834605598, "grad_norm": 0.02879538144788891, "learning_rate": 1.8259105765234708e-07, "loss": 0.0117, "step": 226550 }, { "epoch": 4.611908396946565, "grad_norm": 0.01679669950907683, "learning_rate": 1.824008346995787e-07, "loss": 0.0001, "step": 226560 }, { "epoch": 4.612111959287532, "grad_norm": 0.002424700390404282, "learning_rate": 1.8221070904443029e-07, "loss": 0.0018, "step": 226570 }, { "epoch": 4.612315521628499, "grad_norm": 0.004398015069752252, "learning_rate": 1.8202068069074207e-07, "loss": 0.0008, "step": 226580 }, { "epoch": 4.612519083969466, "grad_norm": 0.016336590244885558, "learning_rate": 1.818307496423516e-07, "loss": 0.0001, "step": 226590 }, { "epoch": 4.6127226463104325, "grad_norm": 0.0059049063264981615, "learning_rate": 1.8164091590309463e-07, "loss": 0.0002, "step": 226600 }, { "epoch": 4.612926208651399, "grad_norm": 0.018363443126448525, "learning_rate": 1.814511794768059e-07, "loss": 0.0001, "step": 226610 }, { "epoch": 4.613129770992367, "grad_norm": 0.004541430511841193, "learning_rate": 1.8126154036731681e-07, "loss": 0.0002, "step": 226620 }, { "epoch": 4.613333333333333, "grad_norm": 0.0017170823617779641, "learning_rate": 1.8107199857845703e-07, "loss": 0.0178, "step": 226630 }, { "epoch": 4.6135368956743, "grad_norm": 0.015298665955745173, "learning_rate": 1.8088255411405464e-07, "loss": 0.0002, "step": 226640 }, { "epoch": 4.613740458015267, "grad_norm": 0.4600893659216284, "learning_rate": 1.8069320697793712e-07, "loss": 0.028, "step": 226650 }, { "epoch": 4.613944020356234, "grad_norm": 0.004865602156398347, "learning_rate": 1.8050395717392698e-07, "loss": 0.0003, "step": 226660 }, { "epoch": 4.614147582697201, "grad_norm": 0.019464093564978815, "learning_rate": 1.8031480470584617e-07, "loss": 0.017, "step": 226670 }, { "epoch": 4.614351145038168, "grad_norm": 0.09345522359625406, "learning_rate": 1.8012574957751662e-07, "loss": 0.0002, "step": 226680 }, { "epoch": 4.614554707379135, "grad_norm": 0.0024717249535575548, "learning_rate": 1.7993679179275525e-07, "loss": 0.0013, "step": 226690 }, { "epoch": 4.6147582697201015, "grad_norm": 0.0012737563454261094, "learning_rate": 1.797479313553785e-07, "loss": 0.0154, "step": 226700 }, { "epoch": 4.614961832061069, "grad_norm": 0.06760509547405036, "learning_rate": 1.795591682692005e-07, "loss": 0.0002, "step": 226710 }, { "epoch": 4.615165394402036, "grad_norm": 0.016909011626971802, "learning_rate": 1.7937050253803378e-07, "loss": 0.0157, "step": 226720 }, { "epoch": 4.615368956743002, "grad_norm": 1.1797928723449937, "learning_rate": 1.791819341656892e-07, "loss": 0.0018, "step": 226730 }, { "epoch": 4.61557251908397, "grad_norm": 0.010081851188985342, "learning_rate": 1.7899346315597421e-07, "loss": 0.0001, "step": 226740 }, { "epoch": 4.6157760814249365, "grad_norm": 0.0019859610420689964, "learning_rate": 1.7880508951269525e-07, "loss": 0.0222, "step": 226750 }, { "epoch": 4.615979643765903, "grad_norm": 0.00708278886330058, "learning_rate": 1.7861681323965762e-07, "loss": 0.0003, "step": 226760 }, { "epoch": 4.616183206106871, "grad_norm": 0.020589053588704415, "learning_rate": 1.7842863434066327e-07, "loss": 0.048, "step": 226770 }, { "epoch": 4.616386768447837, "grad_norm": 0.05099105802597697, "learning_rate": 1.782405528195136e-07, "loss": 0.0004, "step": 226780 }, { "epoch": 4.616590330788804, "grad_norm": 0.001872158168443034, "learning_rate": 1.7805256868000497e-07, "loss": 0.0214, "step": 226790 }, { "epoch": 4.6167938931297705, "grad_norm": 0.009716179045136731, "learning_rate": 1.778646819259361e-07, "loss": 0.0003, "step": 226800 }, { "epoch": 4.616997455470738, "grad_norm": 0.0006744872591662547, "learning_rate": 1.776768925611011e-07, "loss": 0.0126, "step": 226810 }, { "epoch": 4.617201017811705, "grad_norm": 0.3734308410106842, "learning_rate": 1.7748920058929143e-07, "loss": 0.0003, "step": 226820 }, { "epoch": 4.617404580152671, "grad_norm": 0.01817083694451029, "learning_rate": 1.7730160601429957e-07, "loss": 0.0001, "step": 226830 }, { "epoch": 4.617608142493639, "grad_norm": 0.0009247045932562582, "learning_rate": 1.7711410883991308e-07, "loss": 0.0313, "step": 226840 }, { "epoch": 4.6178117048346055, "grad_norm": 0.0017679270091474006, "learning_rate": 1.7692670906991837e-07, "loss": 0.0075, "step": 226850 }, { "epoch": 4.618015267175572, "grad_norm": 0.04371608841861115, "learning_rate": 1.7673940670810186e-07, "loss": 0.0277, "step": 226860 }, { "epoch": 4.61821882951654, "grad_norm": 0.003919215322589069, "learning_rate": 1.7655220175824493e-07, "loss": 0.0436, "step": 226870 }, { "epoch": 4.618422391857506, "grad_norm": 0.0011080197613246932, "learning_rate": 1.7636509422412852e-07, "loss": 0.0044, "step": 226880 }, { "epoch": 4.618625954198473, "grad_norm": 0.006438792274224038, "learning_rate": 1.761780841095323e-07, "loss": 0.0008, "step": 226890 }, { "epoch": 4.61882951653944, "grad_norm": 0.030632720131666215, "learning_rate": 1.7599117141823275e-07, "loss": 0.0196, "step": 226900 }, { "epoch": 4.619033078880407, "grad_norm": 0.1557757111693086, "learning_rate": 1.758043561540046e-07, "loss": 0.0002, "step": 226910 }, { "epoch": 4.619236641221374, "grad_norm": 0.001323479579827853, "learning_rate": 1.7561763832062096e-07, "loss": 0.0, "step": 226920 }, { "epoch": 4.619440203562341, "grad_norm": 0.06630059780513146, "learning_rate": 1.7543101792185268e-07, "loss": 0.0006, "step": 226930 }, { "epoch": 4.619643765903308, "grad_norm": 0.051105955373733095, "learning_rate": 1.75244494961469e-07, "loss": 0.0009, "step": 226940 }, { "epoch": 4.6198473282442745, "grad_norm": 0.0008495547867256179, "learning_rate": 1.7505806944323746e-07, "loss": 0.0002, "step": 226950 }, { "epoch": 4.620050890585242, "grad_norm": 2.438548619307718, "learning_rate": 1.7487174137092223e-07, "loss": 0.0418, "step": 226960 }, { "epoch": 4.620254452926209, "grad_norm": 0.0008435300328919291, "learning_rate": 1.74685510748287e-07, "loss": 0.059, "step": 226970 }, { "epoch": 4.620458015267175, "grad_norm": 0.004058523314268421, "learning_rate": 1.7449937757909263e-07, "loss": 0.0003, "step": 226980 }, { "epoch": 4.620661577608143, "grad_norm": 0.037277454808688874, "learning_rate": 1.7431334186709948e-07, "loss": 0.0002, "step": 226990 }, { "epoch": 4.620865139949109, "grad_norm": 0.3472834211460856, "learning_rate": 1.741274036160623e-07, "loss": 0.0002, "step": 227000 }, { "epoch": 4.621068702290076, "grad_norm": 0.006065390669852872, "learning_rate": 1.739415628297386e-07, "loss": 0.0016, "step": 227010 }, { "epoch": 4.621272264631044, "grad_norm": 0.004278428009269705, "learning_rate": 1.737558195118816e-07, "loss": 0.0081, "step": 227020 }, { "epoch": 4.62147582697201, "grad_norm": 0.004108159811175197, "learning_rate": 1.7357017366624152e-07, "loss": 0.0002, "step": 227030 }, { "epoch": 4.621679389312977, "grad_norm": 0.09665009721219078, "learning_rate": 1.7338462529656763e-07, "loss": 0.0455, "step": 227040 }, { "epoch": 4.621882951653944, "grad_norm": 0.006423517620953656, "learning_rate": 1.731991744066086e-07, "loss": 0.0114, "step": 227050 }, { "epoch": 4.622086513994911, "grad_norm": 0.028505905560724673, "learning_rate": 1.7301382100010922e-07, "loss": 0.0129, "step": 227060 }, { "epoch": 4.622290076335878, "grad_norm": 0.00010534644888920738, "learning_rate": 1.7282856508081258e-07, "loss": 0.0074, "step": 227070 }, { "epoch": 4.622493638676845, "grad_norm": 0.006999563713351534, "learning_rate": 1.7264340665246015e-07, "loss": 0.0023, "step": 227080 }, { "epoch": 4.622697201017812, "grad_norm": 0.0014340021018267955, "learning_rate": 1.724583457187917e-07, "loss": 0.0327, "step": 227090 }, { "epoch": 4.622900763358778, "grad_norm": 0.20689186669037862, "learning_rate": 1.7227338228354484e-07, "loss": 0.0402, "step": 227100 }, { "epoch": 4.623104325699746, "grad_norm": 0.049185255048547, "learning_rate": 1.7208851635045543e-07, "loss": 0.0004, "step": 227110 }, { "epoch": 4.623307888040713, "grad_norm": 0.026650907885759717, "learning_rate": 1.7190374792325658e-07, "loss": 0.0001, "step": 227120 }, { "epoch": 4.623511450381679, "grad_norm": 0.0014911876075930526, "learning_rate": 1.717190770056798e-07, "loss": 0.0119, "step": 227130 }, { "epoch": 4.623715012722647, "grad_norm": 0.004272416915572907, "learning_rate": 1.7153450360145542e-07, "loss": 0.0494, "step": 227140 }, { "epoch": 4.623918575063613, "grad_norm": 0.08377225318461054, "learning_rate": 1.7135002771431043e-07, "loss": 0.0098, "step": 227150 }, { "epoch": 4.62412213740458, "grad_norm": 0.003449704264958428, "learning_rate": 1.7116564934797075e-07, "loss": 0.0001, "step": 227160 }, { "epoch": 4.6243256997455475, "grad_norm": 0.011292417343926173, "learning_rate": 1.7098136850616066e-07, "loss": 0.0001, "step": 227170 }, { "epoch": 4.624529262086514, "grad_norm": 0.021017854128669668, "learning_rate": 1.7079718519260214e-07, "loss": 0.0001, "step": 227180 }, { "epoch": 4.624732824427481, "grad_norm": 0.000778454878789482, "learning_rate": 1.7061309941101335e-07, "loss": 0.0326, "step": 227190 }, { "epoch": 4.624936386768448, "grad_norm": 0.0022522684040692815, "learning_rate": 1.7042911116511407e-07, "loss": 0.0017, "step": 227200 }, { "epoch": 4.625139949109415, "grad_norm": 0.0009849881331337185, "learning_rate": 1.7024522045861858e-07, "loss": 0.0144, "step": 227210 }, { "epoch": 4.625343511450382, "grad_norm": 0.0234524311282789, "learning_rate": 1.7006142729524166e-07, "loss": 0.0002, "step": 227220 }, { "epoch": 4.625547073791348, "grad_norm": 0.015244740480358943, "learning_rate": 1.6987773167869538e-07, "loss": 0.0016, "step": 227230 }, { "epoch": 4.625750636132316, "grad_norm": 0.008933963243004174, "learning_rate": 1.696941336126895e-07, "loss": 0.0002, "step": 227240 }, { "epoch": 4.625954198473282, "grad_norm": 0.003919174054544151, "learning_rate": 1.6951063310093163e-07, "loss": 0.0001, "step": 227250 }, { "epoch": 4.626157760814249, "grad_norm": 0.004154460805168346, "learning_rate": 1.693272301471288e-07, "loss": 0.0, "step": 227260 }, { "epoch": 4.6263613231552165, "grad_norm": 0.0021057432610153393, "learning_rate": 1.6914392475498364e-07, "loss": 0.0011, "step": 227270 }, { "epoch": 4.626564885496183, "grad_norm": 0.03694746915473538, "learning_rate": 1.6896071692819983e-07, "loss": 0.023, "step": 227280 }, { "epoch": 4.62676844783715, "grad_norm": 0.00747819533436822, "learning_rate": 1.6877760667047605e-07, "loss": 0.0, "step": 227290 }, { "epoch": 4.626972010178117, "grad_norm": 0.012872482052091731, "learning_rate": 1.6859459398551104e-07, "loss": 0.0366, "step": 227300 }, { "epoch": 4.627175572519084, "grad_norm": 0.010361751642503756, "learning_rate": 1.6841167887700127e-07, "loss": 0.0385, "step": 227310 }, { "epoch": 4.627379134860051, "grad_norm": 0.014577775228186323, "learning_rate": 1.6822886134864047e-07, "loss": 0.0002, "step": 227320 }, { "epoch": 4.627582697201018, "grad_norm": 0.13187036365148355, "learning_rate": 1.6804614140412122e-07, "loss": 0.0001, "step": 227330 }, { "epoch": 4.627786259541985, "grad_norm": 0.002369197156823082, "learning_rate": 1.6786351904713394e-07, "loss": 0.0001, "step": 227340 }, { "epoch": 4.627989821882951, "grad_norm": 0.24853235168738066, "learning_rate": 1.676809942813662e-07, "loss": 0.0002, "step": 227350 }, { "epoch": 4.628193384223919, "grad_norm": 0.07094877634886515, "learning_rate": 1.674985671105056e-07, "loss": 0.0001, "step": 227360 }, { "epoch": 4.6283969465648855, "grad_norm": 0.0027158251306205745, "learning_rate": 1.6731623753823422e-07, "loss": 0.0001, "step": 227370 }, { "epoch": 4.628600508905852, "grad_norm": 0.00888398901531665, "learning_rate": 1.6713400556823688e-07, "loss": 0.0451, "step": 227380 }, { "epoch": 4.62880407124682, "grad_norm": 0.010585294769575327, "learning_rate": 1.6695187120419342e-07, "loss": 0.0002, "step": 227390 }, { "epoch": 4.629007633587786, "grad_norm": 0.0003320908617852664, "learning_rate": 1.6676983444978036e-07, "loss": 0.0002, "step": 227400 }, { "epoch": 4.629211195928753, "grad_norm": 0.007389493645681845, "learning_rate": 1.6658789530867635e-07, "loss": 0.0004, "step": 227410 }, { "epoch": 4.6294147582697205, "grad_norm": 0.008575290972780992, "learning_rate": 1.6640605378455632e-07, "loss": 0.0004, "step": 227420 }, { "epoch": 4.629618320610687, "grad_norm": 0.0028117002367946107, "learning_rate": 1.662243098810895e-07, "loss": 0.0132, "step": 227430 }, { "epoch": 4.629821882951654, "grad_norm": 0.004073700786583903, "learning_rate": 1.6604266360195076e-07, "loss": 0.0002, "step": 227440 }, { "epoch": 4.63002544529262, "grad_norm": 0.003556643793374879, "learning_rate": 1.658611149508055e-07, "loss": 0.0246, "step": 227450 }, { "epoch": 4.630229007633588, "grad_norm": 0.04191672344756061, "learning_rate": 1.6567966393132128e-07, "loss": 0.0002, "step": 227460 }, { "epoch": 4.6304325699745545, "grad_norm": 0.49277049139263135, "learning_rate": 1.6549831054716248e-07, "loss": 0.0002, "step": 227470 }, { "epoch": 4.630636132315521, "grad_norm": 0.00014778629681996193, "learning_rate": 1.6531705480199277e-07, "loss": 0.0423, "step": 227480 }, { "epoch": 4.630839694656489, "grad_norm": 0.001484080223081012, "learning_rate": 1.651358966994715e-07, "loss": 0.0001, "step": 227490 }, { "epoch": 4.631043256997455, "grad_norm": 0.03806583118189425, "learning_rate": 1.6495483624325846e-07, "loss": 0.0494, "step": 227500 }, { "epoch": 4.631246819338422, "grad_norm": 0.015873134187315376, "learning_rate": 1.6477387343701024e-07, "loss": 0.0003, "step": 227510 }, { "epoch": 4.6314503816793895, "grad_norm": 2.494930701043131, "learning_rate": 1.6459300828438107e-07, "loss": 0.0005, "step": 227520 }, { "epoch": 4.631653944020356, "grad_norm": 0.924480267550584, "learning_rate": 1.6441224078902363e-07, "loss": 0.0035, "step": 227530 }, { "epoch": 4.631857506361323, "grad_norm": 0.014269387573187181, "learning_rate": 1.6423157095458997e-07, "loss": 0.0033, "step": 227540 }, { "epoch": 4.63206106870229, "grad_norm": 0.0022852757160188495, "learning_rate": 1.6405099878472776e-07, "loss": 0.0006, "step": 227550 }, { "epoch": 4.632264631043257, "grad_norm": 3.580644287635785, "learning_rate": 1.6387052428308404e-07, "loss": 0.0083, "step": 227560 }, { "epoch": 4.6324681933842236, "grad_norm": 0.01710045767076912, "learning_rate": 1.636901474533048e-07, "loss": 0.0001, "step": 227570 }, { "epoch": 4.632671755725191, "grad_norm": 0.005271586811730725, "learning_rate": 1.635098682990316e-07, "loss": 0.0, "step": 227580 }, { "epoch": 4.632875318066158, "grad_norm": 0.005375114910206279, "learning_rate": 1.6332968682390537e-07, "loss": 0.0012, "step": 227590 }, { "epoch": 4.633078880407124, "grad_norm": 0.20203394957652648, "learning_rate": 1.6314960303156714e-07, "loss": 0.0087, "step": 227600 }, { "epoch": 4.633282442748092, "grad_norm": 0.00021998001121057158, "learning_rate": 1.629696169256517e-07, "loss": 0.0169, "step": 227610 }, { "epoch": 4.6334860050890585, "grad_norm": 0.010319425182636949, "learning_rate": 1.6278972850979402e-07, "loss": 0.0026, "step": 227620 }, { "epoch": 4.633689567430025, "grad_norm": 0.0023073137055879384, "learning_rate": 1.6260993778762945e-07, "loss": 0.0002, "step": 227630 }, { "epoch": 4.633893129770993, "grad_norm": 6.790984380414495, "learning_rate": 1.6243024476278736e-07, "loss": 0.0454, "step": 227640 }, { "epoch": 4.634096692111959, "grad_norm": 0.016315327383410898, "learning_rate": 1.6225064943889702e-07, "loss": 0.0478, "step": 227650 }, { "epoch": 4.634300254452926, "grad_norm": 0.30157279524128056, "learning_rate": 1.6207115181958555e-07, "loss": 0.0004, "step": 227660 }, { "epoch": 4.6345038167938934, "grad_norm": 0.0030152434776369056, "learning_rate": 1.6189175190847839e-07, "loss": 0.0375, "step": 227670 }, { "epoch": 4.63470737913486, "grad_norm": 0.016583338998784176, "learning_rate": 1.617124497091993e-07, "loss": 0.0102, "step": 227680 }, { "epoch": 4.634910941475827, "grad_norm": 0.0012212899856072219, "learning_rate": 1.615332452253682e-07, "loss": 0.0, "step": 227690 }, { "epoch": 4.635114503816794, "grad_norm": 0.009478036420856987, "learning_rate": 1.6135413846060543e-07, "loss": 0.006, "step": 227700 }, { "epoch": 4.635318066157761, "grad_norm": 0.04734373417995999, "learning_rate": 1.611751294185282e-07, "loss": 0.0436, "step": 227710 }, { "epoch": 4.6355216284987275, "grad_norm": 0.08063220616845236, "learning_rate": 1.6099621810275133e-07, "loss": 0.0532, "step": 227720 }, { "epoch": 4.635725190839695, "grad_norm": 0.0015508300338174514, "learning_rate": 1.6081740451688922e-07, "loss": 0.0013, "step": 227730 }, { "epoch": 4.635928753180662, "grad_norm": 0.002645085647707286, "learning_rate": 1.6063868866455113e-07, "loss": 0.0261, "step": 227740 }, { "epoch": 4.636132315521628, "grad_norm": 0.007943435108916765, "learning_rate": 1.604600705493481e-07, "loss": 0.0001, "step": 227750 }, { "epoch": 4.636335877862596, "grad_norm": 0.007313835047083227, "learning_rate": 1.6028155017488834e-07, "loss": 0.0003, "step": 227760 }, { "epoch": 4.6365394402035625, "grad_norm": 0.00474918093285921, "learning_rate": 1.6010312754477453e-07, "loss": 0.0791, "step": 227770 }, { "epoch": 4.636743002544529, "grad_norm": 0.0007413548781927955, "learning_rate": 1.5992480266261267e-07, "loss": 0.0435, "step": 227780 }, { "epoch": 4.636946564885497, "grad_norm": 0.0017867894419908452, "learning_rate": 1.5974657553200435e-07, "loss": 0.0, "step": 227790 }, { "epoch": 4.637150127226463, "grad_norm": 0.0042832981365967925, "learning_rate": 1.595684461565461e-07, "loss": 0.0002, "step": 227800 }, { "epoch": 4.63735368956743, "grad_norm": 0.0040221205982498166, "learning_rate": 1.593904145398395e-07, "loss": 0.0001, "step": 227810 }, { "epoch": 4.637557251908397, "grad_norm": 0.0048507719113070665, "learning_rate": 1.592124806854767e-07, "loss": 0.0001, "step": 227820 }, { "epoch": 4.637760814249364, "grad_norm": 0.027008740095640776, "learning_rate": 1.590346445970531e-07, "loss": 0.0269, "step": 227830 }, { "epoch": 4.637964376590331, "grad_norm": 0.0008620922048828361, "learning_rate": 1.588569062781603e-07, "loss": 0.0525, "step": 227840 }, { "epoch": 4.638167938931298, "grad_norm": 11.739446195513864, "learning_rate": 1.5867926573238767e-07, "loss": 0.0259, "step": 227850 }, { "epoch": 4.638371501272265, "grad_norm": 28.917846029806118, "learning_rate": 1.5850172296332178e-07, "loss": 0.0158, "step": 227860 }, { "epoch": 4.6385750636132315, "grad_norm": 0.6840948361814896, "learning_rate": 1.5832427797455085e-07, "loss": 0.0026, "step": 227870 }, { "epoch": 4.638778625954198, "grad_norm": 0.048493504997083825, "learning_rate": 1.581469307696565e-07, "loss": 0.0001, "step": 227880 }, { "epoch": 4.638982188295166, "grad_norm": 0.014793998024028787, "learning_rate": 1.5796968135222135e-07, "loss": 0.01, "step": 227890 }, { "epoch": 4.639185750636132, "grad_norm": 0.037520758996196904, "learning_rate": 1.577925297258248e-07, "loss": 0.0196, "step": 227900 }, { "epoch": 4.639389312977099, "grad_norm": 0.005798332122503162, "learning_rate": 1.5761547589404457e-07, "loss": 0.0384, "step": 227910 }, { "epoch": 4.639592875318066, "grad_norm": 0.07022278086632062, "learning_rate": 1.5743851986045722e-07, "loss": 0.0224, "step": 227920 }, { "epoch": 4.639796437659033, "grad_norm": 0.0018330302692424626, "learning_rate": 1.5726166162863544e-07, "loss": 0.0016, "step": 227930 }, { "epoch": 4.64, "grad_norm": 0.008086891605238662, "learning_rate": 1.5708490120215303e-07, "loss": 0.0027, "step": 227940 }, { "epoch": 4.640203562340967, "grad_norm": 0.005182838668500439, "learning_rate": 1.5690823858457715e-07, "loss": 0.0341, "step": 227950 }, { "epoch": 4.640407124681934, "grad_norm": 0.004023955917603159, "learning_rate": 1.567316737794783e-07, "loss": 0.0007, "step": 227960 }, { "epoch": 4.6406106870229005, "grad_norm": 12.550483296539442, "learning_rate": 1.5655520679042136e-07, "loss": 0.0239, "step": 227970 }, { "epoch": 4.640814249363868, "grad_norm": 0.003035991208282816, "learning_rate": 1.563788376209696e-07, "loss": 0.0004, "step": 227980 }, { "epoch": 4.641017811704835, "grad_norm": 0.001950134512770767, "learning_rate": 1.5620256627468633e-07, "loss": 0.0, "step": 227990 }, { "epoch": 4.641221374045801, "grad_norm": 0.0022843549486610956, "learning_rate": 1.5602639275513142e-07, "loss": 0.0102, "step": 228000 }, { "epoch": 4.641424936386769, "grad_norm": 0.018825808210252263, "learning_rate": 1.5585031706586152e-07, "loss": 0.0001, "step": 228010 }, { "epoch": 4.641628498727735, "grad_norm": 0.004530272541229275, "learning_rate": 1.5567433921043428e-07, "loss": 0.0371, "step": 228020 }, { "epoch": 4.641832061068702, "grad_norm": 0.004798715016542218, "learning_rate": 1.5549845919240304e-07, "loss": 0.0242, "step": 228030 }, { "epoch": 4.64203562340967, "grad_norm": 0.01669775634338288, "learning_rate": 1.553226770153199e-07, "loss": 0.0247, "step": 228040 }, { "epoch": 4.642239185750636, "grad_norm": 0.00036640253592243793, "learning_rate": 1.5514699268273537e-07, "loss": 0.0237, "step": 228050 }, { "epoch": 4.642442748091603, "grad_norm": 0.6487260412490748, "learning_rate": 1.5497140619819772e-07, "loss": 0.0003, "step": 228060 }, { "epoch": 4.6426463104325695, "grad_norm": 0.0008554250918626795, "learning_rate": 1.5479591756525248e-07, "loss": 0.0184, "step": 228070 }, { "epoch": 4.642849872773537, "grad_norm": 0.00532680754527875, "learning_rate": 1.5462052678744454e-07, "loss": 0.0001, "step": 228080 }, { "epoch": 4.643053435114504, "grad_norm": 0.0009605694611227625, "learning_rate": 1.5444523386831555e-07, "loss": 0.0324, "step": 228090 }, { "epoch": 4.64325699745547, "grad_norm": 7.409544258383713e-05, "learning_rate": 1.54270038811406e-07, "loss": 0.0003, "step": 228100 }, { "epoch": 4.643460559796438, "grad_norm": 0.0033038534507199226, "learning_rate": 1.5409494162025473e-07, "loss": 0.0064, "step": 228110 }, { "epoch": 4.643664122137404, "grad_norm": 0.0015980199569310847, "learning_rate": 1.5391994229839723e-07, "loss": 0.0, "step": 228120 }, { "epoch": 4.643867684478371, "grad_norm": 0.0030588823489675087, "learning_rate": 1.5374504084936847e-07, "loss": 0.0003, "step": 228130 }, { "epoch": 4.644071246819339, "grad_norm": 0.010169811230562847, "learning_rate": 1.5357023727670063e-07, "loss": 0.0001, "step": 228140 }, { "epoch": 4.644274809160305, "grad_norm": 0.012684736841797634, "learning_rate": 1.5339553158392363e-07, "loss": 0.0015, "step": 228150 }, { "epoch": 4.644478371501272, "grad_norm": 0.002870669866119139, "learning_rate": 1.5322092377456687e-07, "loss": 0.0002, "step": 228160 }, { "epoch": 4.644681933842239, "grad_norm": 0.0025497631865556737, "learning_rate": 1.5304641385215535e-07, "loss": 0.0003, "step": 228170 }, { "epoch": 4.644885496183206, "grad_norm": 0.022546007415241556, "learning_rate": 1.5287200182021511e-07, "loss": 0.0003, "step": 228180 }, { "epoch": 4.645089058524173, "grad_norm": 0.007991413015758913, "learning_rate": 1.5269768768226724e-07, "loss": 0.0554, "step": 228190 }, { "epoch": 4.64529262086514, "grad_norm": 0.00827581350533797, "learning_rate": 1.5252347144183278e-07, "loss": 0.0231, "step": 228200 }, { "epoch": 4.645496183206107, "grad_norm": 0.004973417473193136, "learning_rate": 1.5234935310243116e-07, "loss": 0.0, "step": 228210 }, { "epoch": 4.645699745547073, "grad_norm": 0.004817885028143702, "learning_rate": 1.5217533266757733e-07, "loss": 0.0119, "step": 228220 }, { "epoch": 4.645903307888041, "grad_norm": 0.052658880637828746, "learning_rate": 1.5200141014078685e-07, "loss": 0.0274, "step": 228230 }, { "epoch": 4.646106870229008, "grad_norm": 0.006543535596650434, "learning_rate": 1.5182758552557185e-07, "loss": 0.0001, "step": 228240 }, { "epoch": 4.646310432569974, "grad_norm": 0.00787060512849861, "learning_rate": 1.5165385882544348e-07, "loss": 0.0002, "step": 228250 }, { "epoch": 4.646513994910942, "grad_norm": 0.291261646005626, "learning_rate": 1.5148023004390944e-07, "loss": 0.0003, "step": 228260 }, { "epoch": 4.646717557251908, "grad_norm": 0.01731784188912705, "learning_rate": 1.513066991844775e-07, "loss": 0.0001, "step": 228270 }, { "epoch": 4.646921119592875, "grad_norm": 0.1186476336577388, "learning_rate": 1.5113326625065205e-07, "loss": 0.0099, "step": 228280 }, { "epoch": 4.6471246819338425, "grad_norm": 0.0009939831804823438, "learning_rate": 1.5095993124593534e-07, "loss": 0.0001, "step": 228290 }, { "epoch": 4.647328244274809, "grad_norm": 0.002184205403923013, "learning_rate": 1.5078669417382785e-07, "loss": 0.0633, "step": 228300 }, { "epoch": 4.647531806615776, "grad_norm": 0.3318017829783542, "learning_rate": 1.5061355503783016e-07, "loss": 0.0007, "step": 228310 }, { "epoch": 4.647735368956743, "grad_norm": 0.01308470876481274, "learning_rate": 1.504405138414361e-07, "loss": 0.0034, "step": 228320 }, { "epoch": 4.64793893129771, "grad_norm": 0.3892185211132918, "learning_rate": 1.502675705881429e-07, "loss": 0.05, "step": 228330 }, { "epoch": 4.648142493638677, "grad_norm": 0.0008306851190117227, "learning_rate": 1.5009472528144277e-07, "loss": 0.0715, "step": 228340 }, { "epoch": 4.648346055979644, "grad_norm": 0.00334599782260948, "learning_rate": 1.499219779248251e-07, "loss": 0.0343, "step": 228350 }, { "epoch": 4.648549618320611, "grad_norm": 0.013917749378198391, "learning_rate": 1.4974932852178103e-07, "loss": 0.0001, "step": 228360 }, { "epoch": 4.648753180661577, "grad_norm": 0.006536968127469984, "learning_rate": 1.4957677707579667e-07, "loss": 0.0001, "step": 228370 }, { "epoch": 4.648956743002545, "grad_norm": 0.029164141139692092, "learning_rate": 1.494043235903553e-07, "loss": 0.0004, "step": 228380 }, { "epoch": 4.6491603053435115, "grad_norm": 0.008444080384393024, "learning_rate": 1.4923196806894246e-07, "loss": 0.0355, "step": 228390 }, { "epoch": 4.649363867684478, "grad_norm": 12.103917298597642, "learning_rate": 1.4905971051503708e-07, "loss": 0.0357, "step": 228400 }, { "epoch": 4.649567430025446, "grad_norm": 0.03698313689405416, "learning_rate": 1.4888755093211805e-07, "loss": 0.0003, "step": 228410 }, { "epoch": 4.649770992366412, "grad_norm": 0.018544030128830595, "learning_rate": 1.487154893236642e-07, "loss": 0.0002, "step": 228420 }, { "epoch": 4.649974554707379, "grad_norm": 0.001340522320858601, "learning_rate": 1.4854352569314946e-07, "loss": 0.0483, "step": 228430 }, { "epoch": 4.6501781170483465, "grad_norm": 0.03277450915945243, "learning_rate": 1.4837166004404657e-07, "loss": 0.0523, "step": 228440 }, { "epoch": 4.650381679389313, "grad_norm": 0.0024880598988996075, "learning_rate": 1.481998923798267e-07, "loss": 0.0096, "step": 228450 }, { "epoch": 4.65058524173028, "grad_norm": 0.0007218035793651139, "learning_rate": 1.4802822270395923e-07, "loss": 0.0001, "step": 228460 }, { "epoch": 4.650788804071247, "grad_norm": 0.021558019254444297, "learning_rate": 1.4785665101991086e-07, "loss": 0.0313, "step": 228470 }, { "epoch": 4.650992366412214, "grad_norm": 0.008822067863550491, "learning_rate": 1.4768517733114718e-07, "loss": 0.0008, "step": 228480 }, { "epoch": 4.6511959287531806, "grad_norm": 0.0022045075709615667, "learning_rate": 1.4751380164113094e-07, "loss": 0.0001, "step": 228490 }, { "epoch": 4.651399491094148, "grad_norm": 0.003038155156690301, "learning_rate": 1.4734252395332382e-07, "loss": 0.0005, "step": 228500 }, { "epoch": 4.651603053435115, "grad_norm": 0.005922387695819721, "learning_rate": 1.4717134427118418e-07, "loss": 0.001, "step": 228510 }, { "epoch": 4.651806615776081, "grad_norm": 0.0032548556658891013, "learning_rate": 1.470002625981698e-07, "loss": 0.0184, "step": 228520 }, { "epoch": 4.652010178117048, "grad_norm": 0.0022821051143858857, "learning_rate": 1.468292789377357e-07, "loss": 0.0021, "step": 228530 }, { "epoch": 4.6522137404580155, "grad_norm": 0.029256542320621876, "learning_rate": 1.466583932933352e-07, "loss": 0.0205, "step": 228540 }, { "epoch": 4.652417302798982, "grad_norm": 0.015952667731340988, "learning_rate": 1.4648760566842001e-07, "loss": 0.0007, "step": 228550 }, { "epoch": 4.652620865139949, "grad_norm": 0.0008656488779149711, "learning_rate": 1.463169160664385e-07, "loss": 0.0376, "step": 228560 }, { "epoch": 4.652824427480916, "grad_norm": 0.01237776569404752, "learning_rate": 1.4614632449083733e-07, "loss": 0.0003, "step": 228570 }, { "epoch": 4.653027989821883, "grad_norm": 0.001058240008159191, "learning_rate": 1.4597583094506428e-07, "loss": 0.0412, "step": 228580 }, { "epoch": 4.65323155216285, "grad_norm": 0.002231302368606202, "learning_rate": 1.4580543543256108e-07, "loss": 0.0001, "step": 228590 }, { "epoch": 4.653435114503817, "grad_norm": 0.004567379962833423, "learning_rate": 1.4563513795676886e-07, "loss": 0.0001, "step": 228600 }, { "epoch": 4.653638676844784, "grad_norm": 0.003343549445154272, "learning_rate": 1.454649385211271e-07, "loss": 0.0487, "step": 228610 }, { "epoch": 4.65384223918575, "grad_norm": 0.01602324408572862, "learning_rate": 1.452948371290741e-07, "loss": 0.0129, "step": 228620 }, { "epoch": 4.654045801526718, "grad_norm": 0.042190503662216286, "learning_rate": 1.451248337840444e-07, "loss": 0.0002, "step": 228630 }, { "epoch": 4.6542493638676845, "grad_norm": 0.01289318253008822, "learning_rate": 1.449549284894719e-07, "loss": 0.0001, "step": 228640 }, { "epoch": 4.654452926208651, "grad_norm": 0.013759201808357133, "learning_rate": 1.4478512124878774e-07, "loss": 0.0233, "step": 228650 }, { "epoch": 4.654656488549619, "grad_norm": 0.006730343152900451, "learning_rate": 1.446154120654214e-07, "loss": 0.0084, "step": 228660 }, { "epoch": 4.654860050890585, "grad_norm": 0.007191827375014744, "learning_rate": 1.444458009428007e-07, "loss": 0.0366, "step": 228670 }, { "epoch": 4.655063613231552, "grad_norm": 0.23087781557765724, "learning_rate": 1.4427628788435067e-07, "loss": 0.0001, "step": 228680 }, { "epoch": 4.6552671755725195, "grad_norm": 0.015556620746591213, "learning_rate": 1.4410687289349524e-07, "loss": 0.0127, "step": 228690 }, { "epoch": 4.655470737913486, "grad_norm": 0.050307107103274004, "learning_rate": 1.4393755597365612e-07, "loss": 0.0001, "step": 228700 }, { "epoch": 4.655674300254453, "grad_norm": 0.28569721555088146, "learning_rate": 1.4376833712825278e-07, "loss": 0.0005, "step": 228710 }, { "epoch": 4.655877862595419, "grad_norm": 0.00044831162028425545, "learning_rate": 1.4359921636070194e-07, "loss": 0.0001, "step": 228720 }, { "epoch": 4.656081424936387, "grad_norm": 0.000648226337348516, "learning_rate": 1.4343019367442036e-07, "loss": 0.0001, "step": 228730 }, { "epoch": 4.6562849872773535, "grad_norm": 0.11995873709385706, "learning_rate": 1.4326126907282135e-07, "loss": 0.085, "step": 228740 }, { "epoch": 4.65648854961832, "grad_norm": 0.000753750616887067, "learning_rate": 1.4309244255931553e-07, "loss": 0.0109, "step": 228750 }, { "epoch": 4.656692111959288, "grad_norm": 0.003503758809772685, "learning_rate": 1.429237141373152e-07, "loss": 0.103, "step": 228760 }, { "epoch": 4.656895674300254, "grad_norm": 0.0023985967437839743, "learning_rate": 1.4275508381022542e-07, "loss": 0.0, "step": 228770 }, { "epoch": 4.657099236641221, "grad_norm": 0.004076580532279525, "learning_rate": 1.4258655158145174e-07, "loss": 0.0, "step": 228780 }, { "epoch": 4.6573027989821885, "grad_norm": 16.592030073620744, "learning_rate": 1.4241811745440094e-07, "loss": 0.0979, "step": 228790 }, { "epoch": 4.657506361323155, "grad_norm": 0.0020439751472408935, "learning_rate": 1.4224978143247138e-07, "loss": 0.0, "step": 228800 }, { "epoch": 4.657709923664122, "grad_norm": 0.0014914907070109418, "learning_rate": 1.4208154351906422e-07, "loss": 0.0019, "step": 228810 }, { "epoch": 4.657913486005089, "grad_norm": 0.011840633327849173, "learning_rate": 1.419134037175779e-07, "loss": 0.0001, "step": 228820 }, { "epoch": 4.658117048346056, "grad_norm": 0.001352823637606399, "learning_rate": 1.4174536203140742e-07, "loss": 0.0103, "step": 228830 }, { "epoch": 4.6583206106870225, "grad_norm": 0.0013387268465607195, "learning_rate": 1.415774184639468e-07, "loss": 0.0098, "step": 228840 }, { "epoch": 4.65852417302799, "grad_norm": 0.0005921709445306572, "learning_rate": 1.4140957301858772e-07, "loss": 0.0, "step": 228850 }, { "epoch": 4.658727735368957, "grad_norm": 0.0001248425967582208, "learning_rate": 1.412418256987197e-07, "loss": 0.0001, "step": 228860 }, { "epoch": 4.658931297709923, "grad_norm": 0.04222972664306369, "learning_rate": 1.410741765077317e-07, "loss": 0.0002, "step": 228870 }, { "epoch": 4.659134860050891, "grad_norm": 0.08601491892873014, "learning_rate": 1.4090662544900824e-07, "loss": 0.0435, "step": 228880 }, { "epoch": 4.6593384223918575, "grad_norm": 0.021789196343339427, "learning_rate": 1.407391725259344e-07, "loss": 0.0442, "step": 228890 }, { "epoch": 4.659541984732824, "grad_norm": 0.010588625639677192, "learning_rate": 1.4057181774189188e-07, "loss": 0.0, "step": 228900 }, { "epoch": 4.659745547073792, "grad_norm": 0.01958958051479987, "learning_rate": 1.4040456110026023e-07, "loss": 0.0689, "step": 228910 }, { "epoch": 4.659949109414758, "grad_norm": 0.002736221766219644, "learning_rate": 1.4023740260441787e-07, "loss": 0.0065, "step": 228920 }, { "epoch": 4.660152671755725, "grad_norm": 0.015826292168628048, "learning_rate": 1.400703422577393e-07, "loss": 0.0219, "step": 228930 }, { "epoch": 4.660356234096692, "grad_norm": 0.009526346877368867, "learning_rate": 1.3990338006360072e-07, "loss": 0.0223, "step": 228940 }, { "epoch": 4.660559796437659, "grad_norm": 0.06402179184532003, "learning_rate": 1.3973651602537332e-07, "loss": 0.0007, "step": 228950 }, { "epoch": 4.660763358778626, "grad_norm": 0.002307545257402626, "learning_rate": 1.3956975014642605e-07, "loss": 0.0007, "step": 228960 }, { "epoch": 4.660966921119593, "grad_norm": 0.0020505544395026613, "learning_rate": 1.3940308243012902e-07, "loss": 0.029, "step": 228970 }, { "epoch": 4.66117048346056, "grad_norm": 0.007076083243712377, "learning_rate": 1.3923651287984618e-07, "loss": 0.033, "step": 228980 }, { "epoch": 4.6613740458015265, "grad_norm": 0.014614270196619282, "learning_rate": 1.390700414989432e-07, "loss": 0.0296, "step": 228990 }, { "epoch": 4.661577608142494, "grad_norm": 0.022468848392549933, "learning_rate": 1.3890366829078127e-07, "loss": 0.0007, "step": 229000 }, { "epoch": 4.661781170483461, "grad_norm": 0.0008520222848844325, "learning_rate": 1.387373932587205e-07, "loss": 0.0001, "step": 229010 }, { "epoch": 4.661984732824427, "grad_norm": 0.011189505074963314, "learning_rate": 1.385712164061198e-07, "loss": 0.0007, "step": 229020 }, { "epoch": 4.662188295165395, "grad_norm": 0.0016319799302094446, "learning_rate": 1.3840513773633434e-07, "loss": 0.006, "step": 229030 }, { "epoch": 4.662391857506361, "grad_norm": 0.002282308163436876, "learning_rate": 1.382391572527192e-07, "loss": 0.0018, "step": 229040 }, { "epoch": 4.662595419847328, "grad_norm": 0.0002275556671535539, "learning_rate": 1.380732749586261e-07, "loss": 0.0288, "step": 229050 }, { "epoch": 4.662798982188296, "grad_norm": 0.003066909664374027, "learning_rate": 1.3790749085740574e-07, "loss": 0.0001, "step": 229060 }, { "epoch": 4.663002544529262, "grad_norm": 0.001510330137715164, "learning_rate": 1.377418049524054e-07, "loss": 0.0344, "step": 229070 }, { "epoch": 4.663206106870229, "grad_norm": 0.06803858414562691, "learning_rate": 1.375762172469719e-07, "loss": 0.0484, "step": 229080 }, { "epoch": 4.663409669211196, "grad_norm": 0.0016634577061551883, "learning_rate": 1.3741072774444974e-07, "loss": 0.0115, "step": 229090 }, { "epoch": 4.663613231552163, "grad_norm": 0.011781483198951725, "learning_rate": 1.3724533644818073e-07, "loss": 0.0005, "step": 229100 }, { "epoch": 4.66381679389313, "grad_norm": 0.007564855340828269, "learning_rate": 1.3708004336150493e-07, "loss": 0.0005, "step": 229110 }, { "epoch": 4.664020356234097, "grad_norm": 0.004114327759174208, "learning_rate": 1.3691484848776137e-07, "loss": 0.0142, "step": 229120 }, { "epoch": 4.664223918575064, "grad_norm": 0.014634539651029874, "learning_rate": 1.3674975183028682e-07, "loss": 0.0148, "step": 229130 }, { "epoch": 4.66442748091603, "grad_norm": 0.001416880233449379, "learning_rate": 1.3658475339241417e-07, "loss": 0.0029, "step": 229140 }, { "epoch": 4.664631043256997, "grad_norm": 0.004076938397153875, "learning_rate": 1.3641985317747574e-07, "loss": 0.0019, "step": 229150 }, { "epoch": 4.664834605597965, "grad_norm": 0.0020135102008849385, "learning_rate": 1.3625505118880388e-07, "loss": 0.0069, "step": 229160 }, { "epoch": 4.665038167938931, "grad_norm": 0.017904340595241364, "learning_rate": 1.3609034742972537e-07, "loss": 0.071, "step": 229170 }, { "epoch": 4.665241730279898, "grad_norm": 0.008235607189739806, "learning_rate": 1.359257419035659e-07, "loss": 0.0349, "step": 229180 }, { "epoch": 4.665445292620865, "grad_norm": 0.005100475764818571, "learning_rate": 1.357612346136522e-07, "loss": 0.0007, "step": 229190 }, { "epoch": 4.665648854961832, "grad_norm": 0.0030932686210234047, "learning_rate": 1.35596825563305e-07, "loss": 0.012, "step": 229200 }, { "epoch": 4.665852417302799, "grad_norm": 0.001593999525954498, "learning_rate": 1.354325147558455e-07, "loss": 0.0685, "step": 229210 }, { "epoch": 4.666055979643766, "grad_norm": 0.0034574273508292696, "learning_rate": 1.3526830219459163e-07, "loss": 0.0, "step": 229220 }, { "epoch": 4.666259541984733, "grad_norm": 0.0014738443622571247, "learning_rate": 1.3510418788286017e-07, "loss": 0.0053, "step": 229230 }, { "epoch": 4.666463104325699, "grad_norm": 0.010459336994185624, "learning_rate": 1.3494017182396567e-07, "loss": 0.0235, "step": 229240 }, { "epoch": 4.666666666666667, "grad_norm": 0.04451914040439226, "learning_rate": 1.3477625402121996e-07, "loss": 0.0621, "step": 229250 }, { "epoch": 4.666870229007634, "grad_norm": 3.2599198418543853, "learning_rate": 1.3461243447793482e-07, "loss": 0.0031, "step": 229260 }, { "epoch": 4.6670737913486, "grad_norm": 0.011794193416059591, "learning_rate": 1.3444871319741813e-07, "loss": 0.0002, "step": 229270 }, { "epoch": 4.667277353689568, "grad_norm": 0.009530502572737153, "learning_rate": 1.3428509018297619e-07, "loss": 0.0114, "step": 229280 }, { "epoch": 4.667480916030534, "grad_norm": 0.0021905844507995407, "learning_rate": 1.341215654379141e-07, "loss": 0.0685, "step": 229290 }, { "epoch": 4.667684478371501, "grad_norm": 0.0008119001254434577, "learning_rate": 1.3395813896553366e-07, "loss": 0.0001, "step": 229300 }, { "epoch": 4.6678880407124685, "grad_norm": 0.006901020422591648, "learning_rate": 1.337948107691367e-07, "loss": 0.0003, "step": 229310 }, { "epoch": 4.668091603053435, "grad_norm": 0.005030890494374321, "learning_rate": 1.336315808520211e-07, "loss": 0.0051, "step": 229320 }, { "epoch": 4.668295165394402, "grad_norm": 0.019086328063373275, "learning_rate": 1.334684492174826e-07, "loss": 0.0002, "step": 229330 }, { "epoch": 4.668498727735369, "grad_norm": 0.0024490446617978956, "learning_rate": 1.3330541586881796e-07, "loss": 0.0011, "step": 229340 }, { "epoch": 4.668702290076336, "grad_norm": 0.03184408916105833, "learning_rate": 1.3314248080931847e-07, "loss": 0.0207, "step": 229350 }, { "epoch": 4.668905852417303, "grad_norm": 0.005051443247646085, "learning_rate": 1.3297964404227425e-07, "loss": 0.0013, "step": 229360 }, { "epoch": 4.669109414758269, "grad_norm": 0.0005358185298412172, "learning_rate": 1.3281690557097603e-07, "loss": 0.0001, "step": 229370 }, { "epoch": 4.669312977099237, "grad_norm": 0.0028933592777295016, "learning_rate": 1.3265426539870841e-07, "loss": 0.0002, "step": 229380 }, { "epoch": 4.669516539440203, "grad_norm": 0.02078645907313089, "learning_rate": 1.3249172352875706e-07, "loss": 0.0001, "step": 229390 }, { "epoch": 4.66972010178117, "grad_norm": 0.007002548905097385, "learning_rate": 1.3232927996440493e-07, "loss": 0.0239, "step": 229400 }, { "epoch": 4.6699236641221376, "grad_norm": 0.014654052177060527, "learning_rate": 1.3216693470893272e-07, "loss": 0.0001, "step": 229410 }, { "epoch": 4.670127226463104, "grad_norm": 0.014866914640797604, "learning_rate": 1.3200468776561836e-07, "loss": 0.0001, "step": 229420 }, { "epoch": 4.670330788804071, "grad_norm": 0.0036148740319701055, "learning_rate": 1.318425391377398e-07, "loss": 0.0, "step": 229430 }, { "epoch": 4.670534351145038, "grad_norm": 0.021000122139259485, "learning_rate": 1.316804888285711e-07, "loss": 0.0002, "step": 229440 }, { "epoch": 4.670737913486005, "grad_norm": 0.003625967837641249, "learning_rate": 1.3151853684138516e-07, "loss": 0.0001, "step": 229450 }, { "epoch": 4.670941475826972, "grad_norm": 0.0010309851792678778, "learning_rate": 1.313566831794527e-07, "loss": 0.0001, "step": 229460 }, { "epoch": 4.671145038167939, "grad_norm": 0.013242564031941765, "learning_rate": 1.3119492784604338e-07, "loss": 0.0002, "step": 229470 }, { "epoch": 4.671348600508906, "grad_norm": 0.0064034210629936444, "learning_rate": 1.3103327084442285e-07, "loss": 0.0434, "step": 229480 }, { "epoch": 4.671552162849872, "grad_norm": 0.01910162340416883, "learning_rate": 1.3087171217785743e-07, "loss": 0.0156, "step": 229490 }, { "epoch": 4.67175572519084, "grad_norm": 0.054355554065167996, "learning_rate": 1.3071025184960895e-07, "loss": 0.0003, "step": 229500 }, { "epoch": 4.671959287531807, "grad_norm": 0.006928671789817287, "learning_rate": 1.3054888986293756e-07, "loss": 0.0002, "step": 229510 }, { "epoch": 4.672162849872773, "grad_norm": 0.005584823277271397, "learning_rate": 1.30387626221104e-07, "loss": 0.0001, "step": 229520 }, { "epoch": 4.672366412213741, "grad_norm": 0.0044914597583751645, "learning_rate": 1.3022646092736456e-07, "loss": 0.0001, "step": 229530 }, { "epoch": 4.672569974554707, "grad_norm": 0.07727596264162914, "learning_rate": 1.3006539398497387e-07, "loss": 0.0001, "step": 229540 }, { "epoch": 4.672773536895674, "grad_norm": 0.0038276499337442744, "learning_rate": 1.2990442539718373e-07, "loss": 0.0397, "step": 229550 }, { "epoch": 4.6729770992366415, "grad_norm": 55.802298121080916, "learning_rate": 1.2974355516724824e-07, "loss": 0.0032, "step": 229560 }, { "epoch": 4.673180661577608, "grad_norm": 0.017395512807568302, "learning_rate": 1.295827832984131e-07, "loss": 0.0001, "step": 229570 }, { "epoch": 4.673384223918575, "grad_norm": 0.0017117140237215935, "learning_rate": 1.2942210979392745e-07, "loss": 0.0001, "step": 229580 }, { "epoch": 4.673587786259542, "grad_norm": 0.011886855668964039, "learning_rate": 1.292615346570353e-07, "loss": 0.0353, "step": 229590 }, { "epoch": 4.673791348600509, "grad_norm": 0.001516676137661425, "learning_rate": 1.291010578909796e-07, "loss": 0.0001, "step": 229600 }, { "epoch": 4.673994910941476, "grad_norm": 0.003261956234775159, "learning_rate": 1.2894067949900224e-07, "loss": 0.0834, "step": 229610 }, { "epoch": 4.674198473282443, "grad_norm": 0.0018463550572304077, "learning_rate": 1.2878039948434173e-07, "loss": 0.0001, "step": 229620 }, { "epoch": 4.67440203562341, "grad_norm": 0.04344753900237412, "learning_rate": 1.2862021785023493e-07, "loss": 0.0697, "step": 229630 }, { "epoch": 4.674605597964376, "grad_norm": 0.0026986651910952796, "learning_rate": 1.2846013459991702e-07, "loss": 0.0007, "step": 229640 }, { "epoch": 4.674809160305344, "grad_norm": 0.001661024192904061, "learning_rate": 1.2830014973662097e-07, "loss": 0.0002, "step": 229650 }, { "epoch": 4.6750127226463105, "grad_norm": 0.002207158150156954, "learning_rate": 1.2814026326357864e-07, "loss": 0.0001, "step": 229660 }, { "epoch": 4.675216284987277, "grad_norm": 0.06333114774131196, "learning_rate": 1.2798047518401803e-07, "loss": 0.0002, "step": 229670 }, { "epoch": 4.675419847328245, "grad_norm": 6.522706459160749, "learning_rate": 1.2782078550116706e-07, "loss": 0.0447, "step": 229680 }, { "epoch": 4.675623409669211, "grad_norm": 0.002264367104355127, "learning_rate": 1.2766119421825152e-07, "loss": 0.0341, "step": 229690 }, { "epoch": 4.675826972010178, "grad_norm": 0.02870649029918588, "learning_rate": 1.2750170133849215e-07, "loss": 0.0235, "step": 229700 }, { "epoch": 4.6760305343511455, "grad_norm": 0.008767931670120995, "learning_rate": 1.2734230686511305e-07, "loss": 0.039, "step": 229710 }, { "epoch": 4.676234096692112, "grad_norm": 0.027431091292237236, "learning_rate": 1.2718301080133167e-07, "loss": 0.0001, "step": 229720 }, { "epoch": 4.676437659033079, "grad_norm": 0.005112262543943267, "learning_rate": 1.2702381315036428e-07, "loss": 0.0001, "step": 229730 }, { "epoch": 4.676641221374046, "grad_norm": 0.005060623582495827, "learning_rate": 1.2686471391542888e-07, "loss": 0.0056, "step": 229740 }, { "epoch": 4.676844783715013, "grad_norm": 0.011914774758551365, "learning_rate": 1.267057130997368e-07, "loss": 0.0002, "step": 229750 }, { "epoch": 4.6770483460559795, "grad_norm": 0.00115066536461448, "learning_rate": 1.2654681070649877e-07, "loss": 0.0001, "step": 229760 }, { "epoch": 4.677251908396947, "grad_norm": 0.009435206467904174, "learning_rate": 1.263880067389256e-07, "loss": 0.0001, "step": 229770 }, { "epoch": 4.677455470737914, "grad_norm": 9.016586094937402, "learning_rate": 1.262293012002236e-07, "loss": 0.0349, "step": 229780 }, { "epoch": 4.67765903307888, "grad_norm": 0.09973470351010463, "learning_rate": 1.260706940935985e-07, "loss": 0.0005, "step": 229790 }, { "epoch": 4.677862595419847, "grad_norm": 0.004383342568481689, "learning_rate": 1.259121854222528e-07, "loss": 0.0188, "step": 229800 }, { "epoch": 4.6780661577608145, "grad_norm": 0.004040917581393298, "learning_rate": 1.2575377518938892e-07, "loss": 0.0505, "step": 229810 }, { "epoch": 4.678269720101781, "grad_norm": 0.04180262504555619, "learning_rate": 1.2559546339820482e-07, "loss": 0.0212, "step": 229820 }, { "epoch": 4.678473282442748, "grad_norm": 0.002827493846602557, "learning_rate": 1.254372500518991e-07, "loss": 0.0005, "step": 229830 }, { "epoch": 4.678676844783715, "grad_norm": 0.008590028046772925, "learning_rate": 1.2527913515366642e-07, "loss": 0.0341, "step": 229840 }, { "epoch": 4.678880407124682, "grad_norm": 0.06063727461016315, "learning_rate": 1.2512111870669974e-07, "loss": 0.0005, "step": 229850 }, { "epoch": 4.6790839694656485, "grad_norm": 0.48377964116925193, "learning_rate": 1.2496320071419099e-07, "loss": 0.0005, "step": 229860 }, { "epoch": 4.679287531806616, "grad_norm": 0.01062662301856972, "learning_rate": 1.248053811793304e-07, "loss": 0.0001, "step": 229870 }, { "epoch": 4.679491094147583, "grad_norm": 0.0015481907859761323, "learning_rate": 1.2464766010530317e-07, "loss": 0.0544, "step": 229880 }, { "epoch": 4.679694656488549, "grad_norm": 0.0005284005969820388, "learning_rate": 1.244900374952962e-07, "loss": 0.0002, "step": 229890 }, { "epoch": 4.679898218829517, "grad_norm": 0.03248025642856677, "learning_rate": 1.2433251335249254e-07, "loss": 0.0771, "step": 229900 }, { "epoch": 4.6801017811704835, "grad_norm": 0.001321434288374242, "learning_rate": 1.2417508768007347e-07, "loss": 0.0004, "step": 229910 }, { "epoch": 4.68030534351145, "grad_norm": 0.0012665352221626954, "learning_rate": 1.240177604812187e-07, "loss": 0.0084, "step": 229920 }, { "epoch": 4.680508905852418, "grad_norm": 0.0013282771077506807, "learning_rate": 1.2386053175910573e-07, "loss": 0.015, "step": 229930 }, { "epoch": 4.680712468193384, "grad_norm": 0.001972607627731756, "learning_rate": 1.2370340151690864e-07, "loss": 0.0419, "step": 229940 }, { "epoch": 4.680916030534351, "grad_norm": 0.03990776522267162, "learning_rate": 1.2354636975780322e-07, "loss": 0.058, "step": 229950 }, { "epoch": 4.681119592875318, "grad_norm": 0.012734659831251743, "learning_rate": 1.233894364849597e-07, "loss": 0.0001, "step": 229960 }, { "epoch": 4.681323155216285, "grad_norm": 0.007303795884063937, "learning_rate": 1.232326017015467e-07, "loss": 0.0066, "step": 229970 }, { "epoch": 4.681526717557252, "grad_norm": 0.021107351961578723, "learning_rate": 1.230758654107328e-07, "loss": 0.0086, "step": 229980 }, { "epoch": 4.681730279898219, "grad_norm": 0.02889990561573877, "learning_rate": 1.2291922761568376e-07, "loss": 0.0132, "step": 229990 }, { "epoch": 4.681933842239186, "grad_norm": 0.0024144095450850563, "learning_rate": 1.2276268831956206e-07, "loss": 0.0014, "step": 230000 }, { "epoch": 4.6821374045801525, "grad_norm": 0.0051368088026770165, "learning_rate": 1.2260624752552963e-07, "loss": 0.0001, "step": 230010 }, { "epoch": 4.682340966921119, "grad_norm": 0.00930635997588007, "learning_rate": 1.2244990523674615e-07, "loss": 0.0178, "step": 230020 }, { "epoch": 4.682544529262087, "grad_norm": 0.00437227663570504, "learning_rate": 1.222936614563691e-07, "loss": 0.0001, "step": 230030 }, { "epoch": 4.682748091603053, "grad_norm": 0.005600969624618832, "learning_rate": 1.2213751618755432e-07, "loss": 0.0, "step": 230040 }, { "epoch": 4.68295165394402, "grad_norm": 3.1441260840667216, "learning_rate": 1.2198146943345423e-07, "loss": 0.0091, "step": 230050 }, { "epoch": 4.683155216284987, "grad_norm": 0.011112737767103566, "learning_rate": 1.218255211972219e-07, "loss": 0.0006, "step": 230060 }, { "epoch": 4.683358778625954, "grad_norm": 0.00557452864614124, "learning_rate": 1.2166967148200593e-07, "loss": 0.0001, "step": 230070 }, { "epoch": 4.683562340966921, "grad_norm": 0.017578397548160676, "learning_rate": 1.2151392029095488e-07, "loss": 0.003, "step": 230080 }, { "epoch": 4.683765903307888, "grad_norm": 4.391269725360711, "learning_rate": 1.2135826762721293e-07, "loss": 0.0013, "step": 230090 }, { "epoch": 4.683969465648855, "grad_norm": 0.0005433104002798569, "learning_rate": 1.212027134939242e-07, "loss": 0.0003, "step": 230100 }, { "epoch": 4.6841730279898215, "grad_norm": 0.004994038054253794, "learning_rate": 1.2104725789423123e-07, "loss": 0.0243, "step": 230110 }, { "epoch": 4.684376590330789, "grad_norm": 0.005113565626188527, "learning_rate": 1.2089190083127255e-07, "loss": 0.0003, "step": 230120 }, { "epoch": 4.684580152671756, "grad_norm": 0.025094126158681243, "learning_rate": 1.2073664230818571e-07, "loss": 0.053, "step": 230130 }, { "epoch": 4.684783715012722, "grad_norm": 0.005333509483573541, "learning_rate": 1.2058148232810762e-07, "loss": 0.0041, "step": 230140 }, { "epoch": 4.68498727735369, "grad_norm": 0.002977144423084087, "learning_rate": 1.204264208941708e-07, "loss": 0.0302, "step": 230150 }, { "epoch": 4.685190839694656, "grad_norm": 0.1661905226400246, "learning_rate": 1.2027145800950768e-07, "loss": 0.0718, "step": 230160 }, { "epoch": 4.685394402035623, "grad_norm": 0.014242569312304615, "learning_rate": 1.2011659367724694e-07, "loss": 0.0001, "step": 230170 }, { "epoch": 4.685597964376591, "grad_norm": 0.0011758714622000606, "learning_rate": 1.1996182790051713e-07, "loss": 0.0002, "step": 230180 }, { "epoch": 4.685801526717557, "grad_norm": 0.04118379450917589, "learning_rate": 1.1980716068244357e-07, "loss": 0.0245, "step": 230190 }, { "epoch": 4.686005089058524, "grad_norm": 0.0014803683572570138, "learning_rate": 1.1965259202614986e-07, "loss": 0.0, "step": 230200 }, { "epoch": 4.686208651399491, "grad_norm": 0.0004331332601252166, "learning_rate": 1.1949812193475797e-07, "loss": 0.0429, "step": 230210 }, { "epoch": 4.686412213740458, "grad_norm": 0.0011378578183397173, "learning_rate": 1.1934375041138758e-07, "loss": 0.0244, "step": 230220 }, { "epoch": 4.686615776081425, "grad_norm": 0.0088708523035309, "learning_rate": 1.1918947745915621e-07, "loss": 0.0021, "step": 230230 }, { "epoch": 4.686819338422392, "grad_norm": 0.0034864439146399906, "learning_rate": 1.1903530308118028e-07, "loss": 0.0001, "step": 230240 }, { "epoch": 4.687022900763359, "grad_norm": 0.015680621009938573, "learning_rate": 1.1888122728057227e-07, "loss": 0.0032, "step": 230250 }, { "epoch": 4.6872264631043254, "grad_norm": 0.01840308378092909, "learning_rate": 1.1872725006044527e-07, "loss": 0.0004, "step": 230260 }, { "epoch": 4.687430025445293, "grad_norm": 0.011125974748338508, "learning_rate": 1.1857337142390902e-07, "loss": 0.0001, "step": 230270 }, { "epoch": 4.68763358778626, "grad_norm": 0.015573183286363687, "learning_rate": 1.1841959137406933e-07, "loss": 0.0001, "step": 230280 }, { "epoch": 4.687837150127226, "grad_norm": 0.041143351451805275, "learning_rate": 1.1826590991403431e-07, "loss": 0.0001, "step": 230290 }, { "epoch": 4.688040712468194, "grad_norm": 0.016926687819693545, "learning_rate": 1.1811232704690756e-07, "loss": 0.0004, "step": 230300 }, { "epoch": 4.68824427480916, "grad_norm": 0.0029472941421621493, "learning_rate": 1.1795884277578883e-07, "loss": 0.0001, "step": 230310 }, { "epoch": 4.688447837150127, "grad_norm": 0.012226544448806564, "learning_rate": 1.1780545710378011e-07, "loss": 0.0001, "step": 230320 }, { "epoch": 4.6886513994910946, "grad_norm": 0.005000201488414197, "learning_rate": 1.1765217003397832e-07, "loss": 0.0215, "step": 230330 }, { "epoch": 4.688854961832061, "grad_norm": 0.054309259065603875, "learning_rate": 1.1749898156947882e-07, "loss": 0.0001, "step": 230340 }, { "epoch": 4.689058524173028, "grad_norm": 0.05681353670047552, "learning_rate": 1.1734589171337685e-07, "loss": 0.0024, "step": 230350 }, { "epoch": 4.689262086513995, "grad_norm": 0.057280422744480676, "learning_rate": 1.1719290046876275e-07, "loss": 0.0019, "step": 230360 }, { "epoch": 4.689465648854962, "grad_norm": 0.8805852805323405, "learning_rate": 1.1704000783872737e-07, "loss": 0.0003, "step": 230370 }, { "epoch": 4.689669211195929, "grad_norm": 0.010306233808860633, "learning_rate": 1.1688721382635826e-07, "loss": 0.0268, "step": 230380 }, { "epoch": 4.689872773536896, "grad_norm": 0.0018954627066557015, "learning_rate": 1.1673451843474071e-07, "loss": 0.0162, "step": 230390 }, { "epoch": 4.690076335877863, "grad_norm": 0.02404716460485962, "learning_rate": 1.1658192166696003e-07, "loss": 0.0374, "step": 230400 }, { "epoch": 4.690279898218829, "grad_norm": 10.358017392174881, "learning_rate": 1.1642942352609655e-07, "loss": 0.0079, "step": 230410 }, { "epoch": 4.690483460559797, "grad_norm": 0.00651549613763896, "learning_rate": 1.162770240152311e-07, "loss": 0.0001, "step": 230420 }, { "epoch": 4.690687022900764, "grad_norm": 0.004252175641685197, "learning_rate": 1.1612472313744128e-07, "loss": 0.0153, "step": 230430 }, { "epoch": 4.69089058524173, "grad_norm": 34.94001391844842, "learning_rate": 1.1597252089580292e-07, "loss": 0.0455, "step": 230440 }, { "epoch": 4.691094147582697, "grad_norm": 0.00033044239088721666, "learning_rate": 1.1582041729339078e-07, "loss": 0.0001, "step": 230450 }, { "epoch": 4.691297709923664, "grad_norm": 0.0002507885783314929, "learning_rate": 1.156684123332752e-07, "loss": 0.0003, "step": 230460 }, { "epoch": 4.691501272264631, "grad_norm": 0.005390335070353899, "learning_rate": 1.1551650601852704e-07, "loss": 0.0067, "step": 230470 }, { "epoch": 4.691704834605598, "grad_norm": 0.00651639969415424, "learning_rate": 1.1536469835221498e-07, "loss": 0.0005, "step": 230480 }, { "epoch": 4.691908396946565, "grad_norm": 0.0025335849496972655, "learning_rate": 1.1521298933740377e-07, "loss": 0.0245, "step": 230490 }, { "epoch": 4.692111959287532, "grad_norm": 0.04160774055218152, "learning_rate": 1.1506137897715764e-07, "loss": 0.0001, "step": 230500 }, { "epoch": 4.692315521628498, "grad_norm": 0.00437406380790443, "learning_rate": 1.1490986727453912e-07, "loss": 0.0035, "step": 230510 }, { "epoch": 4.692519083969466, "grad_norm": 0.0013529840950967811, "learning_rate": 1.1475845423260746e-07, "loss": 0.0005, "step": 230520 }, { "epoch": 4.692722646310433, "grad_norm": 0.0012244247116825315, "learning_rate": 1.1460713985442129e-07, "loss": 0.0005, "step": 230530 }, { "epoch": 4.692926208651399, "grad_norm": 0.0016773134880654615, "learning_rate": 1.1445592414303597e-07, "loss": 0.0083, "step": 230540 }, { "epoch": 4.693129770992367, "grad_norm": 0.008462781243502452, "learning_rate": 1.1430480710150571e-07, "loss": 0.0005, "step": 230550 }, { "epoch": 4.693333333333333, "grad_norm": 0.0011546561425338803, "learning_rate": 1.1415378873288252e-07, "loss": 0.0007, "step": 230560 }, { "epoch": 4.6935368956743, "grad_norm": 7.389169959235916e-05, "learning_rate": 1.1400286904021673e-07, "loss": 0.0008, "step": 230570 }, { "epoch": 4.6937404580152675, "grad_norm": 0.003225875984108961, "learning_rate": 1.1385204802655648e-07, "loss": 0.0006, "step": 230580 }, { "epoch": 4.693944020356234, "grad_norm": 0.007249643344607163, "learning_rate": 1.1370132569494652e-07, "loss": 0.0422, "step": 230590 }, { "epoch": 4.694147582697201, "grad_norm": 0.016504056965800557, "learning_rate": 1.1355070204843277e-07, "loss": 0.0387, "step": 230600 }, { "epoch": 4.694351145038168, "grad_norm": 32.497187370303564, "learning_rate": 1.1340017709005558e-07, "loss": 0.0534, "step": 230610 }, { "epoch": 4.694554707379135, "grad_norm": 0.002207239897363734, "learning_rate": 1.1324975082285583e-07, "loss": 0.0001, "step": 230620 }, { "epoch": 4.694758269720102, "grad_norm": 0.013504671128834543, "learning_rate": 1.1309942324987166e-07, "loss": 0.0178, "step": 230630 }, { "epoch": 4.694961832061068, "grad_norm": 0.049165492458862176, "learning_rate": 1.1294919437413898e-07, "loss": 0.0001, "step": 230640 }, { "epoch": 4.695165394402036, "grad_norm": 0.004560677275388502, "learning_rate": 1.1279906419869147e-07, "loss": 0.0001, "step": 230650 }, { "epoch": 4.695368956743002, "grad_norm": 0.007958750468559617, "learning_rate": 1.1264903272656169e-07, "loss": 0.0, "step": 230660 }, { "epoch": 4.695572519083969, "grad_norm": 0.016097674022459298, "learning_rate": 1.1249909996078002e-07, "loss": 0.0005, "step": 230670 }, { "epoch": 4.6957760814249365, "grad_norm": 3.8380601586798457, "learning_rate": 1.123492659043729e-07, "loss": 0.001, "step": 230680 }, { "epoch": 4.695979643765903, "grad_norm": 0.00034459283462663393, "learning_rate": 1.1219953056036903e-07, "loss": 0.0014, "step": 230690 }, { "epoch": 4.69618320610687, "grad_norm": 0.038106347692835885, "learning_rate": 1.1204989393179045e-07, "loss": 0.0001, "step": 230700 }, { "epoch": 4.696386768447837, "grad_norm": 0.004523796708137227, "learning_rate": 1.1190035602165916e-07, "loss": 0.0032, "step": 230710 }, { "epoch": 4.696590330788804, "grad_norm": 0.008217277042316364, "learning_rate": 1.1175091683299722e-07, "loss": 0.0013, "step": 230720 }, { "epoch": 4.696793893129771, "grad_norm": 0.010452261877147056, "learning_rate": 1.1160157636882107e-07, "loss": 0.0079, "step": 230730 }, { "epoch": 4.696997455470738, "grad_norm": 0.015958155780915955, "learning_rate": 1.1145233463214722e-07, "loss": 0.0236, "step": 230740 }, { "epoch": 4.697201017811705, "grad_norm": 0.0009814960408123363, "learning_rate": 1.113031916259899e-07, "loss": 0.0002, "step": 230750 }, { "epoch": 4.697404580152671, "grad_norm": 0.001283813523010391, "learning_rate": 1.1115414735336116e-07, "loss": 0.0276, "step": 230760 }, { "epoch": 4.697608142493639, "grad_norm": 0.008176560657558597, "learning_rate": 1.1100520181727081e-07, "loss": 0.0739, "step": 230770 }, { "epoch": 4.6978117048346055, "grad_norm": 0.002411827259352894, "learning_rate": 1.1085635502072811e-07, "loss": 0.0023, "step": 230780 }, { "epoch": 4.698015267175572, "grad_norm": 0.010988673243994766, "learning_rate": 1.1070760696673788e-07, "loss": 0.0637, "step": 230790 }, { "epoch": 4.69821882951654, "grad_norm": 0.05465533451908918, "learning_rate": 1.1055895765830549e-07, "loss": 0.0274, "step": 230800 }, { "epoch": 4.698422391857506, "grad_norm": 0.0003422558219700381, "learning_rate": 1.1041040709843187e-07, "loss": 0.0001, "step": 230810 }, { "epoch": 4.698625954198473, "grad_norm": 0.004087337118014204, "learning_rate": 1.1026195529011852e-07, "loss": 0.0004, "step": 230820 }, { "epoch": 4.6988295165394405, "grad_norm": 0.021762283011606945, "learning_rate": 1.1011360223636247e-07, "loss": 0.0, "step": 230830 }, { "epoch": 4.699033078880407, "grad_norm": 0.013861362195225302, "learning_rate": 1.0996534794016023e-07, "loss": 0.0, "step": 230840 }, { "epoch": 4.699236641221374, "grad_norm": 0.002970062270177582, "learning_rate": 1.0981719240450661e-07, "loss": 0.0572, "step": 230850 }, { "epoch": 4.699440203562341, "grad_norm": 0.010170358726687565, "learning_rate": 1.0966913563239256e-07, "loss": 0.0011, "step": 230860 }, { "epoch": 4.699643765903308, "grad_norm": 8.98795664357367, "learning_rate": 1.0952117762680958e-07, "loss": 0.0722, "step": 230870 }, { "epoch": 4.6998473282442745, "grad_norm": 0.18001938306379042, "learning_rate": 1.0937331839074583e-07, "loss": 0.0238, "step": 230880 }, { "epoch": 4.700050890585242, "grad_norm": 0.037527325482594194, "learning_rate": 1.0922555792718559e-07, "loss": 0.0001, "step": 230890 }, { "epoch": 4.700254452926209, "grad_norm": 0.004444111865151783, "learning_rate": 1.090778962391159e-07, "loss": 0.0319, "step": 230900 }, { "epoch": 4.700458015267175, "grad_norm": 0.007218998084276498, "learning_rate": 1.0893033332951664e-07, "loss": 0.0001, "step": 230910 }, { "epoch": 4.700661577608143, "grad_norm": 0.0005450480974380027, "learning_rate": 1.087828692013687e-07, "loss": 0.0572, "step": 230920 }, { "epoch": 4.7008651399491095, "grad_norm": 0.018817385324189237, "learning_rate": 1.0863550385765142e-07, "loss": 0.0109, "step": 230930 }, { "epoch": 4.701068702290076, "grad_norm": 0.0009580567580081142, "learning_rate": 1.0848823730133962e-07, "loss": 0.0002, "step": 230940 }, { "epoch": 4.701272264631044, "grad_norm": 0.03065630327242124, "learning_rate": 1.0834106953540868e-07, "loss": 0.0001, "step": 230950 }, { "epoch": 4.70147582697201, "grad_norm": 0.1480916869089643, "learning_rate": 1.0819400056282958e-07, "loss": 0.0037, "step": 230960 }, { "epoch": 4.701679389312977, "grad_norm": 0.063912878821567, "learning_rate": 1.0804703038657382e-07, "loss": 0.0007, "step": 230970 }, { "epoch": 4.701882951653944, "grad_norm": 0.0043859631886761395, "learning_rate": 1.0790015900960849e-07, "loss": 0.0268, "step": 230980 }, { "epoch": 4.702086513994911, "grad_norm": 0.01617743753207274, "learning_rate": 1.0775338643490063e-07, "loss": 0.0015, "step": 230990 }, { "epoch": 4.702290076335878, "grad_norm": 0.015916097742791815, "learning_rate": 1.0760671266541456e-07, "loss": 0.0001, "step": 231000 }, { "epoch": 4.702493638676845, "grad_norm": 0.003477054793959049, "learning_rate": 1.0746013770411179e-07, "loss": 0.0001, "step": 231010 }, { "epoch": 4.702697201017812, "grad_norm": 0.0038565623326953483, "learning_rate": 1.0731366155395328e-07, "loss": 0.0269, "step": 231020 }, { "epoch": 4.7029007633587785, "grad_norm": 0.005933050647638392, "learning_rate": 1.0716728421789723e-07, "loss": 0.0007, "step": 231030 }, { "epoch": 4.703104325699746, "grad_norm": 0.01981644136658568, "learning_rate": 1.0702100569889961e-07, "loss": 0.0149, "step": 231040 }, { "epoch": 4.703307888040713, "grad_norm": 0.06968349103368325, "learning_rate": 1.0687482599991528e-07, "loss": 0.0085, "step": 231050 }, { "epoch": 4.703511450381679, "grad_norm": 0.024798907818721305, "learning_rate": 1.0672874512389631e-07, "loss": 0.0001, "step": 231060 }, { "epoch": 4.703715012722647, "grad_norm": 0.05661655695413368, "learning_rate": 1.0658276307379257e-07, "loss": 0.0004, "step": 231070 }, { "epoch": 4.703918575063613, "grad_norm": 0.0007580596117437115, "learning_rate": 1.0643687985255168e-07, "loss": 0.0011, "step": 231080 }, { "epoch": 4.70412213740458, "grad_norm": 0.35572836414857384, "learning_rate": 1.0629109546312245e-07, "loss": 0.0004, "step": 231090 }, { "epoch": 4.704325699745547, "grad_norm": 0.009214064048519148, "learning_rate": 1.0614540990844635e-07, "loss": 0.0199, "step": 231100 }, { "epoch": 4.704529262086514, "grad_norm": 0.0005387006988345252, "learning_rate": 1.0599982319146774e-07, "loss": 0.0001, "step": 231110 }, { "epoch": 4.704732824427481, "grad_norm": 0.0016866827616035009, "learning_rate": 1.0585433531512535e-07, "loss": 0.0088, "step": 231120 }, { "epoch": 4.7049363867684475, "grad_norm": 0.03537458089709644, "learning_rate": 1.057089462823585e-07, "loss": 0.0, "step": 231130 }, { "epoch": 4.705139949109415, "grad_norm": 0.05243250474777489, "learning_rate": 1.0556365609610375e-07, "loss": 0.0029, "step": 231140 }, { "epoch": 4.705343511450382, "grad_norm": 0.0012306883566162215, "learning_rate": 1.054184647592943e-07, "loss": 0.0306, "step": 231150 }, { "epoch": 4.705547073791348, "grad_norm": 0.006148405402208635, "learning_rate": 1.0527337227486389e-07, "loss": 0.0012, "step": 231160 }, { "epoch": 4.705750636132316, "grad_norm": 0.035693929238150755, "learning_rate": 1.0512837864574132e-07, "loss": 0.0276, "step": 231170 }, { "epoch": 4.7059541984732824, "grad_norm": 0.010573579810942094, "learning_rate": 1.049834838748559e-07, "loss": 0.0222, "step": 231180 }, { "epoch": 4.706157760814249, "grad_norm": 0.013207145371123519, "learning_rate": 1.0483868796513419e-07, "loss": 0.0004, "step": 231190 }, { "epoch": 4.706361323155217, "grad_norm": 0.0005264767688513454, "learning_rate": 1.0469399091949994e-07, "loss": 0.0003, "step": 231200 }, { "epoch": 4.706564885496183, "grad_norm": 0.0032304909981150805, "learning_rate": 1.0454939274087527e-07, "loss": 0.0002, "step": 231210 }, { "epoch": 4.70676844783715, "grad_norm": 0.0045881118436746525, "learning_rate": 1.0440489343218174e-07, "loss": 0.0, "step": 231220 }, { "epoch": 4.706972010178117, "grad_norm": 0.0022714822200240996, "learning_rate": 1.0426049299633589e-07, "loss": 0.0018, "step": 231230 }, { "epoch": 4.707175572519084, "grad_norm": 0.00935929467121939, "learning_rate": 1.041161914362554e-07, "loss": 0.0001, "step": 231240 }, { "epoch": 4.707379134860051, "grad_norm": 11.432214108196902, "learning_rate": 1.039719887548546e-07, "loss": 0.0803, "step": 231250 }, { "epoch": 4.707582697201018, "grad_norm": 0.0050351405641070966, "learning_rate": 1.0382788495504504e-07, "loss": 0.0188, "step": 231260 }, { "epoch": 4.707786259541985, "grad_norm": 0.0004276862041568193, "learning_rate": 1.0368388003973772e-07, "loss": 0.0014, "step": 231270 }, { "epoch": 4.7079898218829515, "grad_norm": 0.010405405105992183, "learning_rate": 1.0353997401184091e-07, "loss": 0.0002, "step": 231280 }, { "epoch": 4.708193384223918, "grad_norm": 0.07207123248597923, "learning_rate": 1.0339616687426057e-07, "loss": 0.0272, "step": 231290 }, { "epoch": 4.708396946564886, "grad_norm": 0.026802132750141525, "learning_rate": 1.0325245862990219e-07, "loss": 0.0004, "step": 231300 }, { "epoch": 4.708600508905852, "grad_norm": 10.56852402961473, "learning_rate": 1.0310884928166676e-07, "loss": 0.0478, "step": 231310 }, { "epoch": 4.708804071246819, "grad_norm": 0.007834666749995035, "learning_rate": 1.029653388324553e-07, "loss": 0.0231, "step": 231320 }, { "epoch": 4.709007633587786, "grad_norm": 0.0012222927679383271, "learning_rate": 1.028219272851666e-07, "loss": 0.0252, "step": 231330 }, { "epoch": 4.709211195928753, "grad_norm": 0.0005162171456198995, "learning_rate": 1.0267861464269613e-07, "loss": 0.0006, "step": 231340 }, { "epoch": 4.70941475826972, "grad_norm": 0.04631476257535847, "learning_rate": 1.0253540090793879e-07, "loss": 0.001, "step": 231350 }, { "epoch": 4.709618320610687, "grad_norm": 0.030272228351494207, "learning_rate": 1.0239228608378727e-07, "loss": 0.0515, "step": 231360 }, { "epoch": 4.709821882951654, "grad_norm": 0.0020860083374481873, "learning_rate": 1.0224927017313146e-07, "loss": 0.0001, "step": 231370 }, { "epoch": 4.7100254452926205, "grad_norm": 0.08916318519594493, "learning_rate": 1.0210635317885965e-07, "loss": 0.0002, "step": 231380 }, { "epoch": 4.710229007633588, "grad_norm": 0.0011730103518576944, "learning_rate": 1.0196353510385837e-07, "loss": 0.0004, "step": 231390 }, { "epoch": 4.710432569974555, "grad_norm": 0.17833236643722286, "learning_rate": 1.0182081595101312e-07, "loss": 0.0001, "step": 231400 }, { "epoch": 4.710636132315521, "grad_norm": 0.0013501193044670788, "learning_rate": 1.0167819572320381e-07, "loss": 0.0133, "step": 231410 }, { "epoch": 4.710839694656489, "grad_norm": 0.2552849913303861, "learning_rate": 1.0153567442331313e-07, "loss": 0.1173, "step": 231420 }, { "epoch": 4.711043256997455, "grad_norm": 0.1124522856338793, "learning_rate": 1.0139325205421935e-07, "loss": 0.0003, "step": 231430 }, { "epoch": 4.711246819338422, "grad_norm": 0.051967568509343275, "learning_rate": 1.0125092861879682e-07, "loss": 0.0276, "step": 231440 }, { "epoch": 4.71145038167939, "grad_norm": 0.0063092342305635755, "learning_rate": 1.0110870411992212e-07, "loss": 0.0406, "step": 231450 }, { "epoch": 4.711653944020356, "grad_norm": 0.012529773758636724, "learning_rate": 1.0096657856046688e-07, "loss": 0.0007, "step": 231460 }, { "epoch": 4.711857506361323, "grad_norm": 0.03164111999081794, "learning_rate": 1.0082455194330098e-07, "loss": 0.0392, "step": 231470 }, { "epoch": 4.71206106870229, "grad_norm": 0.00011700321617797953, "learning_rate": 1.0068262427129438e-07, "loss": 0.0002, "step": 231480 }, { "epoch": 4.712264631043257, "grad_norm": 0.0014109032297238118, "learning_rate": 1.0054079554731145e-07, "loss": 0.0, "step": 231490 }, { "epoch": 4.712468193384224, "grad_norm": 0.0012088654753560346, "learning_rate": 1.003990657742182e-07, "loss": 0.0001, "step": 231500 }, { "epoch": 4.712671755725191, "grad_norm": 0.13906939462937035, "learning_rate": 1.0025743495487628e-07, "loss": 0.0001, "step": 231510 }, { "epoch": 4.712875318066158, "grad_norm": 0.06296974038990143, "learning_rate": 1.0011590309214613e-07, "loss": 0.0212, "step": 231520 }, { "epoch": 4.713078880407124, "grad_norm": 0.058497491618316015, "learning_rate": 9.997447018888606e-08, "loss": 0.0005, "step": 231530 }, { "epoch": 4.713282442748092, "grad_norm": 0.0019165582453364053, "learning_rate": 9.983313624795321e-08, "loss": 0.0535, "step": 231540 }, { "epoch": 4.713486005089059, "grad_norm": 0.010551695579266409, "learning_rate": 9.969190127220141e-08, "loss": 0.0295, "step": 231550 }, { "epoch": 4.713689567430025, "grad_norm": 28.43707368308756, "learning_rate": 9.955076526448337e-08, "loss": 0.0453, "step": 231560 }, { "epoch": 4.713893129770993, "grad_norm": 0.007354976631264356, "learning_rate": 9.940972822764905e-08, "loss": 0.0271, "step": 231570 }, { "epoch": 4.714096692111959, "grad_norm": 0.003912307271410721, "learning_rate": 9.926879016454727e-08, "loss": 0.0321, "step": 231580 }, { "epoch": 4.714300254452926, "grad_norm": 0.017027764554883352, "learning_rate": 9.912795107802465e-08, "loss": 0.0, "step": 231590 }, { "epoch": 4.7145038167938935, "grad_norm": 0.014713255965578022, "learning_rate": 9.898721097092501e-08, "loss": 0.0003, "step": 231600 }, { "epoch": 4.71470737913486, "grad_norm": 0.010445646729439451, "learning_rate": 9.884656984609108e-08, "loss": 0.0004, "step": 231610 }, { "epoch": 4.714910941475827, "grad_norm": 4.2385380012025715, "learning_rate": 9.870602770636395e-08, "loss": 0.0096, "step": 231620 }, { "epoch": 4.715114503816794, "grad_norm": 0.12058840857372553, "learning_rate": 9.856558455458021e-08, "loss": 0.0002, "step": 231630 }, { "epoch": 4.715318066157761, "grad_norm": 0.01373073366763583, "learning_rate": 9.84252403935787e-08, "loss": 0.0073, "step": 231640 }, { "epoch": 4.715521628498728, "grad_norm": 0.0008926090733609708, "learning_rate": 9.828499522619217e-08, "loss": 0.0358, "step": 231650 }, { "epoch": 4.715725190839695, "grad_norm": 0.009853795808725495, "learning_rate": 9.814484905525278e-08, "loss": 0.0162, "step": 231660 }, { "epoch": 4.715928753180662, "grad_norm": 0.009014202344543325, "learning_rate": 9.800480188359273e-08, "loss": 0.0188, "step": 231670 }, { "epoch": 4.716132315521628, "grad_norm": 1.640621235106403e-08, "learning_rate": 9.78648537140392e-08, "loss": 0.095, "step": 231680 }, { "epoch": 4.716335877862596, "grad_norm": 0.013404415679911163, "learning_rate": 9.772500454941824e-08, "loss": 0.0005, "step": 231690 }, { "epoch": 4.7165394402035625, "grad_norm": 2.1539527947829975, "learning_rate": 9.758525439255596e-08, "loss": 0.0243, "step": 231700 }, { "epoch": 4.716743002544529, "grad_norm": 0.024449963780969326, "learning_rate": 9.744560324627339e-08, "loss": 0.04, "step": 231710 }, { "epoch": 4.716946564885496, "grad_norm": 0.002228676091802281, "learning_rate": 9.730605111339108e-08, "loss": 0.0002, "step": 231720 }, { "epoch": 4.717150127226463, "grad_norm": 3.811826759071421e-05, "learning_rate": 9.716659799672734e-08, "loss": 0.0296, "step": 231730 }, { "epoch": 4.71735368956743, "grad_norm": 0.005101670283823652, "learning_rate": 9.702724389909934e-08, "loss": 0.0001, "step": 231740 }, { "epoch": 4.717557251908397, "grad_norm": 0.0012338521920365728, "learning_rate": 9.688798882332151e-08, "loss": 0.0567, "step": 231750 }, { "epoch": 4.717760814249364, "grad_norm": 0.0020175131232303216, "learning_rate": 9.674883277220548e-08, "loss": 0.012, "step": 231760 }, { "epoch": 4.717964376590331, "grad_norm": 0.004445900225722748, "learning_rate": 9.660977574856234e-08, "loss": 0.0004, "step": 231770 }, { "epoch": 4.718167938931297, "grad_norm": 0.002442228782066041, "learning_rate": 9.647081775519985e-08, "loss": 0.0031, "step": 231780 }, { "epoch": 4.718371501272265, "grad_norm": 0.021174852023204568, "learning_rate": 9.633195879492463e-08, "loss": 0.0014, "step": 231790 }, { "epoch": 4.7185750636132315, "grad_norm": 0.16982642011114402, "learning_rate": 9.61931988705428e-08, "loss": 0.0174, "step": 231800 }, { "epoch": 4.718778625954198, "grad_norm": 0.030907883760317927, "learning_rate": 9.605453798485375e-08, "loss": 0.0066, "step": 231810 }, { "epoch": 4.718982188295166, "grad_norm": 0.001197031107493794, "learning_rate": 9.591597614066028e-08, "loss": 0.0318, "step": 231820 }, { "epoch": 4.719185750636132, "grad_norm": 0.007299053359136488, "learning_rate": 9.577751334076069e-08, "loss": 0.0001, "step": 231830 }, { "epoch": 4.719389312977099, "grad_norm": 0.003590598823281911, "learning_rate": 9.56391495879494e-08, "loss": 0.0001, "step": 231840 }, { "epoch": 4.7195928753180665, "grad_norm": 0.5735404449475936, "learning_rate": 9.550088488502362e-08, "loss": 0.0007, "step": 231850 }, { "epoch": 4.719796437659033, "grad_norm": 0.029325919790640368, "learning_rate": 9.536271923477391e-08, "loss": 0.0087, "step": 231860 }, { "epoch": 4.72, "grad_norm": 0.009359255349239908, "learning_rate": 9.522465263999025e-08, "loss": 0.0145, "step": 231870 }, { "epoch": 4.720203562340967, "grad_norm": 0.0038228492330965452, "learning_rate": 9.508668510346374e-08, "loss": 0.0001, "step": 231880 }, { "epoch": 4.720407124681934, "grad_norm": 0.0030420375193774473, "learning_rate": 9.494881662797828e-08, "loss": 0.0007, "step": 231890 }, { "epoch": 4.7206106870229005, "grad_norm": 0.0392591677606802, "learning_rate": 9.481104721631829e-08, "loss": 0.0303, "step": 231900 }, { "epoch": 4.720814249363868, "grad_norm": 0.009591369176658837, "learning_rate": 9.467337687126876e-08, "loss": 0.043, "step": 231910 }, { "epoch": 4.721017811704835, "grad_norm": 0.0045024028470346585, "learning_rate": 9.453580559560748e-08, "loss": 0.0001, "step": 231920 }, { "epoch": 4.721221374045801, "grad_norm": 0.001428775344475637, "learning_rate": 9.439833339211335e-08, "loss": 0.0001, "step": 231930 }, { "epoch": 4.721424936386768, "grad_norm": 0.012073050464679201, "learning_rate": 9.426096026356412e-08, "loss": 0.0004, "step": 231940 }, { "epoch": 4.7216284987277355, "grad_norm": 0.0032666330022732733, "learning_rate": 9.41236862127326e-08, "loss": 0.0171, "step": 231950 }, { "epoch": 4.721832061068702, "grad_norm": 0.0011439863118180534, "learning_rate": 9.398651124239266e-08, "loss": 0.0001, "step": 231960 }, { "epoch": 4.722035623409669, "grad_norm": 0.0027904836316732986, "learning_rate": 9.384943535531321e-08, "loss": 0.0099, "step": 231970 }, { "epoch": 4.722239185750636, "grad_norm": 0.005825930471542656, "learning_rate": 9.371245855426425e-08, "loss": 0.0053, "step": 231980 }, { "epoch": 4.722442748091603, "grad_norm": 0.004797869112139375, "learning_rate": 9.35755808420108e-08, "loss": 0.0001, "step": 231990 }, { "epoch": 4.7226463104325695, "grad_norm": 5.872121789503691, "learning_rate": 9.34388022213184e-08, "loss": 0.0061, "step": 232000 }, { "epoch": 4.722849872773537, "grad_norm": 21.855689113417643, "learning_rate": 9.330212269494931e-08, "loss": 0.0901, "step": 232010 }, { "epoch": 4.723053435114504, "grad_norm": 0.004616920512145382, "learning_rate": 9.316554226566298e-08, "loss": 0.083, "step": 232020 }, { "epoch": 4.72325699745547, "grad_norm": 0.08917128404827758, "learning_rate": 9.302906093621833e-08, "loss": 0.0002, "step": 232030 }, { "epoch": 4.723460559796438, "grad_norm": 0.007315485792174916, "learning_rate": 9.289267870937258e-08, "loss": 0.0001, "step": 232040 }, { "epoch": 4.7236641221374045, "grad_norm": 0.00020171401077384362, "learning_rate": 9.275639558787908e-08, "loss": 0.0001, "step": 232050 }, { "epoch": 4.723867684478371, "grad_norm": 0.0007661685928821732, "learning_rate": 9.262021157449064e-08, "loss": 0.0447, "step": 232060 }, { "epoch": 4.724071246819339, "grad_norm": 0.0011505318384689516, "learning_rate": 9.248412667195783e-08, "loss": 0.0, "step": 232070 }, { "epoch": 4.724274809160305, "grad_norm": 0.01136298575413866, "learning_rate": 9.234814088302901e-08, "loss": 0.0009, "step": 232080 }, { "epoch": 4.724478371501272, "grad_norm": 0.06625865982630139, "learning_rate": 9.221225421045033e-08, "loss": 0.0006, "step": 232090 }, { "epoch": 4.7246819338422394, "grad_norm": 0.010683661083190313, "learning_rate": 9.20764666569668e-08, "loss": 0.0007, "step": 232100 }, { "epoch": 4.724885496183206, "grad_norm": 0.006208759996318612, "learning_rate": 9.194077822531954e-08, "loss": 0.0001, "step": 232110 }, { "epoch": 4.725089058524173, "grad_norm": 0.022575581356321015, "learning_rate": 9.18051889182503e-08, "loss": 0.0001, "step": 232120 }, { "epoch": 4.72529262086514, "grad_norm": 0.0009079473065635631, "learning_rate": 9.166969873849685e-08, "loss": 0.0001, "step": 232130 }, { "epoch": 4.725496183206107, "grad_norm": 0.0010040313582704262, "learning_rate": 9.15343076887959e-08, "loss": 0.0079, "step": 232140 }, { "epoch": 4.7256997455470735, "grad_norm": 0.0002606616306986348, "learning_rate": 9.139901577188138e-08, "loss": 0.0001, "step": 232150 }, { "epoch": 4.725903307888041, "grad_norm": 0.012566927122388478, "learning_rate": 9.126382299048608e-08, "loss": 0.0003, "step": 232160 }, { "epoch": 4.726106870229008, "grad_norm": 0.013660523468898443, "learning_rate": 9.112872934734062e-08, "loss": 0.0006, "step": 232170 }, { "epoch": 4.726310432569974, "grad_norm": 0.0024044033940786944, "learning_rate": 9.099373484517283e-08, "loss": 0.0207, "step": 232180 }, { "epoch": 4.726513994910942, "grad_norm": 15.734666789995133, "learning_rate": 9.085883948670882e-08, "loss": 0.0423, "step": 232190 }, { "epoch": 4.7267175572519085, "grad_norm": 0.006307587553387149, "learning_rate": 9.072404327467421e-08, "loss": 0.0361, "step": 232200 }, { "epoch": 4.726921119592875, "grad_norm": 0.008882845051357963, "learning_rate": 9.058934621179016e-08, "loss": 0.0001, "step": 232210 }, { "epoch": 4.727124681933843, "grad_norm": 0.0012728777063663944, "learning_rate": 9.045474830077782e-08, "loss": 0.0074, "step": 232220 }, { "epoch": 4.727328244274809, "grad_norm": 0.0067913398873753646, "learning_rate": 9.032024954435559e-08, "loss": 0.0001, "step": 232230 }, { "epoch": 4.727531806615776, "grad_norm": 2.0767966264299518, "learning_rate": 9.01858499452385e-08, "loss": 0.0019, "step": 232240 }, { "epoch": 4.727735368956743, "grad_norm": 0.0984207016898063, "learning_rate": 9.005154950614325e-08, "loss": 0.0063, "step": 232250 }, { "epoch": 4.72793893129771, "grad_norm": 0.009862477266880562, "learning_rate": 8.991734822977994e-08, "loss": 0.0001, "step": 232260 }, { "epoch": 4.728142493638677, "grad_norm": 0.3025034067839873, "learning_rate": 8.97832461188597e-08, "loss": 0.0003, "step": 232270 }, { "epoch": 4.728346055979644, "grad_norm": 0.06885970194695953, "learning_rate": 8.964924317609203e-08, "loss": 0.0001, "step": 232280 }, { "epoch": 4.728549618320611, "grad_norm": 0.01704204020997402, "learning_rate": 8.9515339404182e-08, "loss": 0.0029, "step": 232290 }, { "epoch": 4.7287531806615775, "grad_norm": 0.02222519217910677, "learning_rate": 8.938153480583467e-08, "loss": 0.0005, "step": 232300 }, { "epoch": 4.728956743002545, "grad_norm": 0.011648765956386937, "learning_rate": 8.924782938375177e-08, "loss": 0.0001, "step": 232310 }, { "epoch": 4.729160305343512, "grad_norm": 0.022651771277173348, "learning_rate": 8.911422314063389e-08, "loss": 0.0152, "step": 232320 }, { "epoch": 4.729363867684478, "grad_norm": 0.0006099710372107585, "learning_rate": 8.898071607917946e-08, "loss": 0.0001, "step": 232330 }, { "epoch": 4.729567430025446, "grad_norm": 0.09791759340456141, "learning_rate": 8.884730820208465e-08, "loss": 0.0002, "step": 232340 }, { "epoch": 4.729770992366412, "grad_norm": 0.001744071211185207, "learning_rate": 8.871399951204451e-08, "loss": 0.0152, "step": 232350 }, { "epoch": 4.729974554707379, "grad_norm": 0.017075614541354662, "learning_rate": 8.858079001175024e-08, "loss": 0.0208, "step": 232360 }, { "epoch": 4.730178117048346, "grad_norm": 3.2743028405517505, "learning_rate": 8.844767970389301e-08, "loss": 0.0326, "step": 232370 }, { "epoch": 4.730381679389313, "grad_norm": 0.015458323540582788, "learning_rate": 8.831466859116123e-08, "loss": 0.0001, "step": 232380 }, { "epoch": 4.73058524173028, "grad_norm": 0.01371732650358012, "learning_rate": 8.818175667624051e-08, "loss": 0.0, "step": 232390 }, { "epoch": 4.7307888040712465, "grad_norm": 0.03235654219000407, "learning_rate": 8.80489439618154e-08, "loss": 0.0002, "step": 232400 }, { "epoch": 4.730992366412214, "grad_norm": 0.0017599462288694558, "learning_rate": 8.79162304505693e-08, "loss": 0.0005, "step": 232410 }, { "epoch": 4.731195928753181, "grad_norm": 0.024302016794993945, "learning_rate": 8.778361614518061e-08, "loss": 0.0001, "step": 232420 }, { "epoch": 4.731399491094147, "grad_norm": 0.13663808278772774, "learning_rate": 8.765110104832941e-08, "loss": 0.0011, "step": 232430 }, { "epoch": 4.731603053435115, "grad_norm": 0.001152013072520355, "learning_rate": 8.751868516269135e-08, "loss": 0.0154, "step": 232440 }, { "epoch": 4.731806615776081, "grad_norm": 0.0017892729960715717, "learning_rate": 8.738636849093984e-08, "loss": 0.0382, "step": 232450 }, { "epoch": 4.732010178117048, "grad_norm": 8.76107738136811, "learning_rate": 8.725415103574885e-08, "loss": 0.0284, "step": 232460 }, { "epoch": 4.732213740458016, "grad_norm": 0.0004238636624234593, "learning_rate": 8.712203279978737e-08, "loss": 0.0101, "step": 232470 }, { "epoch": 4.732417302798982, "grad_norm": 0.001915241137948848, "learning_rate": 8.699001378572435e-08, "loss": 0.0521, "step": 232480 }, { "epoch": 4.732620865139949, "grad_norm": 0.04871606024666936, "learning_rate": 8.685809399622602e-08, "loss": 0.0003, "step": 232490 }, { "epoch": 4.732824427480916, "grad_norm": 0.03237743224750681, "learning_rate": 8.672627343395634e-08, "loss": 0.02, "step": 232500 }, { "epoch": 4.733027989821883, "grad_norm": 0.00979994199844312, "learning_rate": 8.659455210157764e-08, "loss": 0.0003, "step": 232510 }, { "epoch": 4.73323155216285, "grad_norm": 0.0030026714694483705, "learning_rate": 8.64629300017511e-08, "loss": 0.0364, "step": 232520 }, { "epoch": 4.733435114503817, "grad_norm": 0.07196358501369218, "learning_rate": 8.633140713713351e-08, "loss": 0.0108, "step": 232530 }, { "epoch": 4.733638676844784, "grad_norm": 0.007604378035514725, "learning_rate": 8.619998351038217e-08, "loss": 0.0164, "step": 232540 }, { "epoch": 4.73384223918575, "grad_norm": 0.0135539334423466, "learning_rate": 8.606865912415108e-08, "loss": 0.0001, "step": 232550 }, { "epoch": 4.734045801526718, "grad_norm": 0.006705576053914352, "learning_rate": 8.593743398109255e-08, "loss": 0.0121, "step": 232560 }, { "epoch": 4.734249363867685, "grad_norm": 0.013504182345974189, "learning_rate": 8.580630808385725e-08, "loss": 0.0002, "step": 232570 }, { "epoch": 4.734452926208651, "grad_norm": 0.013028670641085711, "learning_rate": 8.567528143509252e-08, "loss": 0.0003, "step": 232580 }, { "epoch": 4.734656488549618, "grad_norm": 0.004804292476711308, "learning_rate": 8.55443540374451e-08, "loss": 0.0001, "step": 232590 }, { "epoch": 4.734860050890585, "grad_norm": 0.0005648272929907489, "learning_rate": 8.541352589355955e-08, "loss": 0.0001, "step": 232600 }, { "epoch": 4.735063613231552, "grad_norm": 0.0010215314507594439, "learning_rate": 8.52827970060771e-08, "loss": 0.0001, "step": 232610 }, { "epoch": 4.735267175572519, "grad_norm": 0.051097041189698086, "learning_rate": 8.515216737763953e-08, "loss": 0.0003, "step": 232620 }, { "epoch": 4.735470737913486, "grad_norm": 0.03935934605474469, "learning_rate": 8.502163701088362e-08, "loss": 0.0626, "step": 232630 }, { "epoch": 4.735674300254453, "grad_norm": 0.01204082587191657, "learning_rate": 8.489120590844612e-08, "loss": 0.0004, "step": 232640 }, { "epoch": 4.735877862595419, "grad_norm": 0.01189600777801876, "learning_rate": 8.476087407296162e-08, "loss": 0.0199, "step": 232650 }, { "epoch": 4.736081424936387, "grad_norm": 7.138967164801898, "learning_rate": 8.463064150706191e-08, "loss": 0.0307, "step": 232660 }, { "epoch": 4.736284987277354, "grad_norm": 0.0034229912801192345, "learning_rate": 8.450050821337763e-08, "loss": 0.0244, "step": 232670 }, { "epoch": 4.73648854961832, "grad_norm": 0.008926164604109037, "learning_rate": 8.437047419453614e-08, "loss": 0.0003, "step": 232680 }, { "epoch": 4.736692111959288, "grad_norm": 0.006095643983472617, "learning_rate": 8.42405394531648e-08, "loss": 0.0003, "step": 232690 }, { "epoch": 4.736895674300254, "grad_norm": 0.0029758577183622925, "learning_rate": 8.411070399188703e-08, "loss": 0.0008, "step": 232700 }, { "epoch": 4.737099236641221, "grad_norm": 0.048544591405404854, "learning_rate": 8.398096781332521e-08, "loss": 0.0003, "step": 232710 }, { "epoch": 4.7373027989821885, "grad_norm": 2.1821394497744198, "learning_rate": 8.385133092009889e-08, "loss": 0.0016, "step": 232720 }, { "epoch": 4.737506361323155, "grad_norm": 0.002100669334323966, "learning_rate": 8.372179331482766e-08, "loss": 0.0001, "step": 232730 }, { "epoch": 4.737709923664122, "grad_norm": 0.006848872215699255, "learning_rate": 8.359235500012663e-08, "loss": 0.0048, "step": 232740 }, { "epoch": 4.737913486005089, "grad_norm": 0.0019780644369741026, "learning_rate": 8.346301597861095e-08, "loss": 0.0002, "step": 232750 }, { "epoch": 4.738117048346056, "grad_norm": 0.014159282269676159, "learning_rate": 8.33337762528913e-08, "loss": 0.0381, "step": 232760 }, { "epoch": 4.738320610687023, "grad_norm": 0.002450822963385059, "learning_rate": 8.320463582557892e-08, "loss": 0.0002, "step": 232770 }, { "epoch": 4.73852417302799, "grad_norm": 8.21201332760453, "learning_rate": 8.307559469928173e-08, "loss": 0.064, "step": 232780 }, { "epoch": 4.738727735368957, "grad_norm": 0.004928517614755704, "learning_rate": 8.294665287660541e-08, "loss": 0.0503, "step": 232790 }, { "epoch": 4.738931297709923, "grad_norm": 0.0028045486355131742, "learning_rate": 8.281781036015568e-08, "loss": 0.0379, "step": 232800 }, { "epoch": 4.739134860050891, "grad_norm": 0.0008482224562493681, "learning_rate": 8.268906715253266e-08, "loss": 0.0207, "step": 232810 }, { "epoch": 4.7393384223918575, "grad_norm": 0.003063394434107914, "learning_rate": 8.256042325633707e-08, "loss": 0.028, "step": 232820 }, { "epoch": 4.739541984732824, "grad_norm": 0.12516003411663942, "learning_rate": 8.243187867416847e-08, "loss": 0.0184, "step": 232830 }, { "epoch": 4.739745547073792, "grad_norm": 0.001007587753743466, "learning_rate": 8.23034334086209e-08, "loss": 0.0001, "step": 232840 }, { "epoch": 4.739949109414758, "grad_norm": 0.0012771281577711718, "learning_rate": 8.217508746228953e-08, "loss": 0.0, "step": 232850 }, { "epoch": 4.740152671755725, "grad_norm": 0.0039627482688614785, "learning_rate": 8.204684083776671e-08, "loss": 0.0296, "step": 232860 }, { "epoch": 4.7403562340966925, "grad_norm": 0.004530588949850793, "learning_rate": 8.191869353764203e-08, "loss": 0.03, "step": 232870 }, { "epoch": 4.740559796437659, "grad_norm": 2.937334414004471, "learning_rate": 8.1790645564504e-08, "loss": 0.0064, "step": 232880 }, { "epoch": 4.740763358778626, "grad_norm": 0.027063135388673457, "learning_rate": 8.166269692093831e-08, "loss": 0.0001, "step": 232890 }, { "epoch": 4.740966921119593, "grad_norm": 0.005398667767311464, "learning_rate": 8.153484760952956e-08, "loss": 0.0005, "step": 232900 }, { "epoch": 4.74117048346056, "grad_norm": 0.0021400119513460924, "learning_rate": 8.140709763285903e-08, "loss": 0.021, "step": 232910 }, { "epoch": 4.7413740458015265, "grad_norm": 3.562376712785712, "learning_rate": 8.127944699350743e-08, "loss": 0.0018, "step": 232920 }, { "epoch": 4.741577608142494, "grad_norm": 0.23834066774748275, "learning_rate": 8.115189569405268e-08, "loss": 0.0001, "step": 232930 }, { "epoch": 4.741781170483461, "grad_norm": 0.03657317300760382, "learning_rate": 8.102444373707052e-08, "loss": 0.0001, "step": 232940 }, { "epoch": 4.741984732824427, "grad_norm": 0.0015923169830572039, "learning_rate": 8.089709112513555e-08, "loss": 0.0249, "step": 232950 }, { "epoch": 4.742188295165395, "grad_norm": 0.33388041080447484, "learning_rate": 8.076983786082016e-08, "loss": 0.0248, "step": 232960 }, { "epoch": 4.7423918575063615, "grad_norm": 0.004957011187227211, "learning_rate": 8.064268394669284e-08, "loss": 0.0024, "step": 232970 }, { "epoch": 4.742595419847328, "grad_norm": 0.0005343941723011826, "learning_rate": 8.05156293853232e-08, "loss": 0.0202, "step": 232980 }, { "epoch": 4.742798982188296, "grad_norm": 0.0003716662226574511, "learning_rate": 8.038867417927699e-08, "loss": 0.0267, "step": 232990 }, { "epoch": 4.743002544529262, "grad_norm": 0.040697020472947945, "learning_rate": 8.026181833111713e-08, "loss": 0.0215, "step": 233000 }, { "epoch": 4.743206106870229, "grad_norm": 0.001874792621599819, "learning_rate": 8.013506184340658e-08, "loss": 0.0001, "step": 233010 }, { "epoch": 4.743409669211196, "grad_norm": 0.0013536677490245269, "learning_rate": 8.000840471870552e-08, "loss": 0.0002, "step": 233020 }, { "epoch": 4.743613231552163, "grad_norm": 17.221750590349824, "learning_rate": 7.988184695957136e-08, "loss": 0.03, "step": 233030 }, { "epoch": 4.74381679389313, "grad_norm": 6.840041825691593, "learning_rate": 7.97553885685609e-08, "loss": 0.0228, "step": 233040 }, { "epoch": 4.744020356234096, "grad_norm": 0.01454015288234938, "learning_rate": 7.962902954822715e-08, "loss": 0.0003, "step": 233050 }, { "epoch": 4.744223918575064, "grad_norm": 0.0018544287905813612, "learning_rate": 7.950276990112249e-08, "loss": 0.0001, "step": 233060 }, { "epoch": 4.7444274809160305, "grad_norm": 0.005702347059793628, "learning_rate": 7.937660962979655e-08, "loss": 0.0122, "step": 233070 }, { "epoch": 4.744631043256997, "grad_norm": 0.008307906756465436, "learning_rate": 7.925054873679839e-08, "loss": 0.0071, "step": 233080 }, { "epoch": 4.744834605597965, "grad_norm": 0.0001878781394043868, "learning_rate": 7.91245872246732e-08, "loss": 0.0001, "step": 233090 }, { "epoch": 4.745038167938931, "grad_norm": 0.044341071663239556, "learning_rate": 7.899872509596452e-08, "loss": 0.0009, "step": 233100 }, { "epoch": 4.745241730279898, "grad_norm": 0.009235518310461582, "learning_rate": 7.887296235321528e-08, "loss": 0.0001, "step": 233110 }, { "epoch": 4.7454452926208655, "grad_norm": 0.007085308213308004, "learning_rate": 7.87472989989646e-08, "loss": 0.0478, "step": 233120 }, { "epoch": 4.745648854961832, "grad_norm": 0.0007857407872895535, "learning_rate": 7.862173503575099e-08, "loss": 0.0, "step": 233130 }, { "epoch": 4.745852417302799, "grad_norm": 0.012075215753463112, "learning_rate": 7.849627046611019e-08, "loss": 0.0, "step": 233140 }, { "epoch": 4.746055979643766, "grad_norm": 0.004467628432900852, "learning_rate": 7.837090529257574e-08, "loss": 0.0003, "step": 233150 }, { "epoch": 4.746259541984733, "grad_norm": 0.01617549666838953, "learning_rate": 7.824563951768005e-08, "loss": 0.0445, "step": 233160 }, { "epoch": 4.7464631043256995, "grad_norm": 0.006635460163066609, "learning_rate": 7.812047314395332e-08, "loss": 0.005, "step": 233170 }, { "epoch": 4.746666666666667, "grad_norm": 0.005024347818297693, "learning_rate": 7.799540617392242e-08, "loss": 0.0008, "step": 233180 }, { "epoch": 4.746870229007634, "grad_norm": 0.05704920570431508, "learning_rate": 7.787043861011367e-08, "loss": 0.0178, "step": 233190 }, { "epoch": 4.7470737913486, "grad_norm": 0.027802233090679307, "learning_rate": 7.774557045505171e-08, "loss": 0.0325, "step": 233200 }, { "epoch": 4.747277353689567, "grad_norm": 0.016781511806355034, "learning_rate": 7.76208017112573e-08, "loss": 0.0659, "step": 233210 }, { "epoch": 4.7474809160305345, "grad_norm": 10.590546834282089, "learning_rate": 7.749613238125008e-08, "loss": 0.0173, "step": 233220 }, { "epoch": 4.747684478371501, "grad_norm": 0.006035567140341993, "learning_rate": 7.737156246754973e-08, "loss": 0.0144, "step": 233230 }, { "epoch": 4.747888040712468, "grad_norm": 0.0119240561912465, "learning_rate": 7.724709197267033e-08, "loss": 0.0002, "step": 233240 }, { "epoch": 4.748091603053435, "grad_norm": 18.433855776359994, "learning_rate": 7.712272089912653e-08, "loss": 0.0249, "step": 233250 }, { "epoch": 4.748295165394402, "grad_norm": 0.02024865518330061, "learning_rate": 7.699844924943023e-08, "loss": 0.0002, "step": 233260 }, { "epoch": 4.7484987277353685, "grad_norm": 0.0013056046757872928, "learning_rate": 7.687427702609107e-08, "loss": 0.0005, "step": 233270 }, { "epoch": 4.748702290076336, "grad_norm": 8.799731712986485, "learning_rate": 7.67502042316165e-08, "loss": 0.0222, "step": 233280 }, { "epoch": 4.748905852417303, "grad_norm": 0.017614468878103303, "learning_rate": 7.662623086851284e-08, "loss": 0.0022, "step": 233290 }, { "epoch": 4.749109414758269, "grad_norm": 0.0032364627980116896, "learning_rate": 7.650235693928365e-08, "loss": 0.0033, "step": 233300 }, { "epoch": 4.749312977099237, "grad_norm": 0.13000235973861443, "learning_rate": 7.637858244643081e-08, "loss": 0.0264, "step": 233310 }, { "epoch": 4.7495165394402035, "grad_norm": 0.0008769162134442301, "learning_rate": 7.6254907392454e-08, "loss": 0.0001, "step": 233320 }, { "epoch": 4.74972010178117, "grad_norm": 5.923408414028669, "learning_rate": 7.613133177985121e-08, "loss": 0.0135, "step": 233330 }, { "epoch": 4.749923664122138, "grad_norm": 0.5635683636444521, "learning_rate": 7.600785561111768e-08, "loss": 0.0339, "step": 233340 }, { "epoch": 4.750127226463104, "grad_norm": 0.05105339747677153, "learning_rate": 7.588447888874806e-08, "loss": 0.0064, "step": 233350 }, { "epoch": 4.750330788804071, "grad_norm": 0.017027439645558716, "learning_rate": 7.57612016152337e-08, "loss": 0.0001, "step": 233360 }, { "epoch": 4.750534351145038, "grad_norm": 0.0028006366537152243, "learning_rate": 7.563802379306318e-08, "loss": 0.0593, "step": 233370 }, { "epoch": 4.750737913486005, "grad_norm": 0.13406463632653712, "learning_rate": 7.551494542472615e-08, "loss": 0.0001, "step": 233380 }, { "epoch": 4.750941475826972, "grad_norm": 0.010403107918438848, "learning_rate": 7.539196651270787e-08, "loss": 0.0001, "step": 233390 }, { "epoch": 4.751145038167939, "grad_norm": 0.005520148549333779, "learning_rate": 7.526908705949076e-08, "loss": 0.0004, "step": 233400 }, { "epoch": 4.751348600508906, "grad_norm": 0.0031980171194071288, "learning_rate": 7.514630706755843e-08, "loss": 0.0582, "step": 233410 }, { "epoch": 4.7515521628498725, "grad_norm": 3.7797497414212065, "learning_rate": 7.502362653938888e-08, "loss": 0.0004, "step": 233420 }, { "epoch": 4.75175572519084, "grad_norm": 0.011323652038061, "learning_rate": 7.490104547746013e-08, "loss": 0.0007, "step": 233430 }, { "epoch": 4.751959287531807, "grad_norm": 0.0009940141549805784, "learning_rate": 7.477856388424909e-08, "loss": 0.0031, "step": 233440 }, { "epoch": 4.752162849872773, "grad_norm": 0.028007366261387525, "learning_rate": 7.465618176222822e-08, "loss": 0.0001, "step": 233450 }, { "epoch": 4.752366412213741, "grad_norm": 0.0015955223848316748, "learning_rate": 7.453389911386999e-08, "loss": 0.0001, "step": 233460 }, { "epoch": 4.752569974554707, "grad_norm": 0.006386204927479005, "learning_rate": 7.441171594164298e-08, "loss": 0.0001, "step": 233470 }, { "epoch": 4.752773536895674, "grad_norm": 0.0024653577323871033, "learning_rate": 7.428963224801577e-08, "loss": 0.0553, "step": 233480 }, { "epoch": 4.752977099236642, "grad_norm": 0.0023982523406037575, "learning_rate": 7.416764803545362e-08, "loss": 0.0001, "step": 233490 }, { "epoch": 4.753180661577608, "grad_norm": 53.64480594686613, "learning_rate": 7.40457633064201e-08, "loss": 0.0267, "step": 233500 }, { "epoch": 4.753384223918575, "grad_norm": 0.005782047905775231, "learning_rate": 7.392397806337714e-08, "loss": 0.0145, "step": 233510 }, { "epoch": 4.753587786259542, "grad_norm": 0.003228631524660713, "learning_rate": 7.380229230878445e-08, "loss": 0.0001, "step": 233520 }, { "epoch": 4.753791348600509, "grad_norm": 0.004112840709932771, "learning_rate": 7.368070604509892e-08, "loss": 0.0003, "step": 233530 }, { "epoch": 4.753994910941476, "grad_norm": 0.0027371673543804347, "learning_rate": 7.355921927477749e-08, "loss": 0.0363, "step": 233540 }, { "epoch": 4.754198473282443, "grad_norm": 14.038195628425111, "learning_rate": 7.343783200027155e-08, "loss": 0.056, "step": 233550 }, { "epoch": 4.75440203562341, "grad_norm": 0.020808067156790896, "learning_rate": 7.331654422403466e-08, "loss": 0.0424, "step": 233560 }, { "epoch": 4.754605597964376, "grad_norm": 0.0006312059588259785, "learning_rate": 7.319535594851601e-08, "loss": 0.0049, "step": 233570 }, { "epoch": 4.754809160305344, "grad_norm": 0.03390802668980826, "learning_rate": 7.307426717616251e-08, "loss": 0.0002, "step": 233580 }, { "epoch": 4.755012722646311, "grad_norm": 0.1730839232015233, "learning_rate": 7.295327790941942e-08, "loss": 0.0002, "step": 233590 }, { "epoch": 4.755216284987277, "grad_norm": 0.02556348712079475, "learning_rate": 7.283238815073146e-08, "loss": 0.0081, "step": 233600 }, { "epoch": 4.755419847328245, "grad_norm": 0.006269086458013979, "learning_rate": 7.271159790253946e-08, "loss": 0.003, "step": 233610 }, { "epoch": 4.755623409669211, "grad_norm": 0.0007496050415689056, "learning_rate": 7.259090716728312e-08, "loss": 0.0401, "step": 233620 }, { "epoch": 4.755826972010178, "grad_norm": 0.017175176081948244, "learning_rate": 7.24703159473994e-08, "loss": 0.0384, "step": 233630 }, { "epoch": 4.7560305343511455, "grad_norm": 0.0002533777951227954, "learning_rate": 7.234982424532466e-08, "loss": 0.0001, "step": 233640 }, { "epoch": 4.756234096692112, "grad_norm": 0.002044085171914368, "learning_rate": 7.222943206349197e-08, "loss": 0.0001, "step": 233650 }, { "epoch": 4.756437659033079, "grad_norm": 0.7974780658466056, "learning_rate": 7.210913940433273e-08, "loss": 0.0162, "step": 233660 }, { "epoch": 4.756641221374045, "grad_norm": 0.0013911250450016886, "learning_rate": 7.198894627027663e-08, "loss": 0.0615, "step": 233670 }, { "epoch": 4.756844783715013, "grad_norm": 0.017174317139313385, "learning_rate": 7.186885266375122e-08, "loss": 0.0385, "step": 233680 }, { "epoch": 4.75704834605598, "grad_norm": 0.0019535865576588525, "learning_rate": 7.174885858718116e-08, "loss": 0.0001, "step": 233690 }, { "epoch": 4.757251908396946, "grad_norm": 9.981709978427347, "learning_rate": 7.16289640429907e-08, "loss": 0.0447, "step": 233700 }, { "epoch": 4.757455470737914, "grad_norm": 0.0002707059375214169, "learning_rate": 7.150916903360116e-08, "loss": 0.0037, "step": 233710 }, { "epoch": 4.75765903307888, "grad_norm": 0.29398214037070836, "learning_rate": 7.138947356143178e-08, "loss": 0.001, "step": 233720 }, { "epoch": 4.757862595419847, "grad_norm": 0.0029686169384120593, "learning_rate": 7.126987762890003e-08, "loss": 0.0071, "step": 233730 }, { "epoch": 4.7580661577608145, "grad_norm": 0.00431322450431734, "learning_rate": 7.115038123842067e-08, "loss": 0.0007, "step": 233740 }, { "epoch": 4.758269720101781, "grad_norm": 0.007785004654366282, "learning_rate": 7.103098439240786e-08, "loss": 0.0008, "step": 233750 }, { "epoch": 4.758473282442748, "grad_norm": 0.00210783253127572, "learning_rate": 7.091168709327356e-08, "loss": 0.0001, "step": 233760 }, { "epoch": 4.758676844783715, "grad_norm": 0.059362924184609325, "learning_rate": 7.07924893434253e-08, "loss": 0.0381, "step": 233770 }, { "epoch": 4.758880407124682, "grad_norm": 0.0008175129460390299, "learning_rate": 7.067339114527228e-08, "loss": 0.0039, "step": 233780 }, { "epoch": 4.759083969465649, "grad_norm": 0.006889359130302714, "learning_rate": 7.05543925012192e-08, "loss": 0.0296, "step": 233790 }, { "epoch": 4.759287531806616, "grad_norm": 0.0016636498791617834, "learning_rate": 7.043549341366807e-08, "loss": 0.0001, "step": 233800 }, { "epoch": 4.759491094147583, "grad_norm": 0.11058664147719552, "learning_rate": 7.031669388502249e-08, "loss": 0.0011, "step": 233810 }, { "epoch": 4.759694656488549, "grad_norm": 0.01725836498376987, "learning_rate": 7.019799391768056e-08, "loss": 0.0005, "step": 233820 }, { "epoch": 4.759898218829517, "grad_norm": 0.00528324799397996, "learning_rate": 7.00793935140387e-08, "loss": 0.0537, "step": 233830 }, { "epoch": 4.7601017811704835, "grad_norm": 0.03518721872190049, "learning_rate": 6.996089267649442e-08, "loss": 0.0067, "step": 233840 }, { "epoch": 4.76030534351145, "grad_norm": 0.0031047848915718387, "learning_rate": 6.984249140743915e-08, "loss": 0.0352, "step": 233850 }, { "epoch": 4.760508905852417, "grad_norm": 0.04592329559087483, "learning_rate": 6.972418970926487e-08, "loss": 0.0261, "step": 233860 }, { "epoch": 4.760712468193384, "grad_norm": 0.11389110871950978, "learning_rate": 6.960598758436076e-08, "loss": 0.0234, "step": 233870 }, { "epoch": 4.760916030534351, "grad_norm": 0.009043983173035616, "learning_rate": 6.948788503511383e-08, "loss": 0.0005, "step": 233880 }, { "epoch": 4.761119592875318, "grad_norm": 0.008262223307886868, "learning_rate": 6.936988206390993e-08, "loss": 0.0006, "step": 233890 }, { "epoch": 4.761323155216285, "grad_norm": 0.0020373092520833786, "learning_rate": 6.925197867313161e-08, "loss": 0.0002, "step": 233900 }, { "epoch": 4.761526717557252, "grad_norm": 0.0009817039973478011, "learning_rate": 6.913417486516027e-08, "loss": 0.0, "step": 233910 }, { "epoch": 4.761730279898218, "grad_norm": 0.010981960567265879, "learning_rate": 6.901647064237516e-08, "loss": 0.0001, "step": 233920 }, { "epoch": 4.761933842239186, "grad_norm": 0.010249105481533818, "learning_rate": 6.889886600715324e-08, "loss": 0.0002, "step": 233930 }, { "epoch": 4.7621374045801526, "grad_norm": 0.0042465281872203706, "learning_rate": 6.878136096187093e-08, "loss": 0.0326, "step": 233940 }, { "epoch": 4.762340966921119, "grad_norm": 0.0022416595349271273, "learning_rate": 6.866395550889915e-08, "loss": 0.0002, "step": 233950 }, { "epoch": 4.762544529262087, "grad_norm": 0.45074035898249976, "learning_rate": 6.854664965061098e-08, "loss": 0.0003, "step": 233960 }, { "epoch": 4.762748091603053, "grad_norm": 0.013896664473622089, "learning_rate": 6.842944338937507e-08, "loss": 0.0631, "step": 233970 }, { "epoch": 4.76295165394402, "grad_norm": 7.621859435781186, "learning_rate": 6.831233672755733e-08, "loss": 0.044, "step": 233980 }, { "epoch": 4.7631552162849875, "grad_norm": 0.03304993951549095, "learning_rate": 6.819532966752529e-08, "loss": 0.0002, "step": 233990 }, { "epoch": 4.763358778625954, "grad_norm": 0.003528746070263965, "learning_rate": 6.807842221163986e-08, "loss": 0.0471, "step": 234000 }, { "epoch": 4.763562340966921, "grad_norm": 0.00281771616179817, "learning_rate": 6.796161436226301e-08, "loss": 0.0011, "step": 234010 }, { "epoch": 4.763765903307888, "grad_norm": 0.0027793501834663522, "learning_rate": 6.784490612175398e-08, "loss": 0.0001, "step": 234020 }, { "epoch": 4.763969465648855, "grad_norm": 0.0014370717540129006, "learning_rate": 6.772829749246979e-08, "loss": 0.0342, "step": 234030 }, { "epoch": 4.764173027989822, "grad_norm": 0.058118860590976866, "learning_rate": 6.76117884767652e-08, "loss": 0.0001, "step": 234040 }, { "epoch": 4.764376590330789, "grad_norm": 0.009675682497340073, "learning_rate": 6.749537907699333e-08, "loss": 0.0321, "step": 234050 }, { "epoch": 4.764580152671756, "grad_norm": 0.002021787376362323, "learning_rate": 6.737906929550508e-08, "loss": 0.0113, "step": 234060 }, { "epoch": 4.764783715012722, "grad_norm": 0.008281491988168105, "learning_rate": 6.72628591346497e-08, "loss": 0.0001, "step": 234070 }, { "epoch": 4.76498727735369, "grad_norm": 0.02859435688233558, "learning_rate": 6.714674859677472e-08, "loss": 0.0001, "step": 234080 }, { "epoch": 4.7651908396946565, "grad_norm": 0.0012798139971849207, "learning_rate": 6.703073768422441e-08, "loss": 0.0481, "step": 234090 }, { "epoch": 4.765394402035623, "grad_norm": 0.000514655790444798, "learning_rate": 6.691482639934188e-08, "loss": 0.0002, "step": 234100 }, { "epoch": 4.765597964376591, "grad_norm": 0.011274093337227925, "learning_rate": 6.679901474446804e-08, "loss": 0.0021, "step": 234110 }, { "epoch": 4.765801526717557, "grad_norm": 0.044059241359831885, "learning_rate": 6.668330272194213e-08, "loss": 0.0001, "step": 234120 }, { "epoch": 4.766005089058524, "grad_norm": 0.004517867193256611, "learning_rate": 6.656769033410115e-08, "loss": 0.0001, "step": 234130 }, { "epoch": 4.7662086513994915, "grad_norm": 0.00044709289770767513, "learning_rate": 6.645217758327938e-08, "loss": 0.0389, "step": 234140 }, { "epoch": 4.766412213740458, "grad_norm": 0.03069281878802821, "learning_rate": 6.633676447181103e-08, "loss": 0.0, "step": 234150 }, { "epoch": 4.766615776081425, "grad_norm": 0.11970835905994749, "learning_rate": 6.622145100202593e-08, "loss": 0.0092, "step": 234160 }, { "epoch": 4.766819338422392, "grad_norm": 0.005319238510185602, "learning_rate": 6.610623717625276e-08, "loss": 0.0439, "step": 234170 }, { "epoch": 4.767022900763359, "grad_norm": 0.002260367176504221, "learning_rate": 6.599112299681965e-08, "loss": 0.0322, "step": 234180 }, { "epoch": 4.7672264631043255, "grad_norm": 0.0053767988847468915, "learning_rate": 6.587610846605031e-08, "loss": 0.0002, "step": 234190 }, { "epoch": 4.767430025445293, "grad_norm": 0.029249945332372147, "learning_rate": 6.576119358626843e-08, "loss": 0.0318, "step": 234200 }, { "epoch": 4.76763358778626, "grad_norm": 5.660474845608321, "learning_rate": 6.564637835979437e-08, "loss": 0.0191, "step": 234210 }, { "epoch": 4.767837150127226, "grad_norm": 0.003261134350213411, "learning_rate": 6.553166278894685e-08, "loss": 0.0818, "step": 234220 }, { "epoch": 4.768040712468194, "grad_norm": 0.0235429279105149, "learning_rate": 6.541704687604289e-08, "loss": 0.0029, "step": 234230 }, { "epoch": 4.7682442748091605, "grad_norm": 0.0011732688373720316, "learning_rate": 6.530253062339787e-08, "loss": 0.0004, "step": 234240 }, { "epoch": 4.768447837150127, "grad_norm": 0.0485309792050901, "learning_rate": 6.518811403332382e-08, "loss": 0.0007, "step": 234250 }, { "epoch": 4.768651399491095, "grad_norm": 0.008571534830071443, "learning_rate": 6.507379710813167e-08, "loss": 0.012, "step": 234260 }, { "epoch": 4.768854961832061, "grad_norm": 0.006693118782487283, "learning_rate": 6.495957985013013e-08, "loss": 0.0027, "step": 234270 }, { "epoch": 4.769058524173028, "grad_norm": 0.005794398043226993, "learning_rate": 6.48454622616268e-08, "loss": 0.0298, "step": 234280 }, { "epoch": 4.7692620865139945, "grad_norm": 0.0002751384195338028, "learning_rate": 6.47314443449254e-08, "loss": 0.0, "step": 234290 }, { "epoch": 4.769465648854962, "grad_norm": 0.015412133345231017, "learning_rate": 6.461752610232908e-08, "loss": 0.035, "step": 234300 }, { "epoch": 4.769669211195929, "grad_norm": 0.03589095460956952, "learning_rate": 6.450370753613877e-08, "loss": 0.0001, "step": 234310 }, { "epoch": 4.769872773536895, "grad_norm": 0.002655648768067875, "learning_rate": 6.438998864865265e-08, "loss": 0.0001, "step": 234320 }, { "epoch": 4.770076335877863, "grad_norm": 0.047553676311249966, "learning_rate": 6.427636944216776e-08, "loss": 0.017, "step": 234330 }, { "epoch": 4.7702798982188295, "grad_norm": 0.00014821230126222136, "learning_rate": 6.416284991897892e-08, "loss": 0.0149, "step": 234340 }, { "epoch": 4.770483460559796, "grad_norm": 0.002070802490367085, "learning_rate": 6.40494300813782e-08, "loss": 0.0041, "step": 234350 }, { "epoch": 4.770687022900764, "grad_norm": 0.00017262061987514642, "learning_rate": 6.393610993165766e-08, "loss": 0.0002, "step": 234360 }, { "epoch": 4.77089058524173, "grad_norm": 0.01537756283414676, "learning_rate": 6.382288947210436e-08, "loss": 0.0, "step": 234370 }, { "epoch": 4.771094147582697, "grad_norm": 0.003655806518937316, "learning_rate": 6.370976870500533e-08, "loss": 0.0002, "step": 234380 }, { "epoch": 4.771297709923664, "grad_norm": 0.00010976709303397765, "learning_rate": 6.3596747632646e-08, "loss": 0.0001, "step": 234390 }, { "epoch": 4.771501272264631, "grad_norm": 0.002375698141432633, "learning_rate": 6.348382625730786e-08, "loss": 0.0002, "step": 234400 }, { "epoch": 4.771704834605598, "grad_norm": 0.0028630067910570757, "learning_rate": 6.337100458127188e-08, "loss": 0.0076, "step": 234410 }, { "epoch": 4.771908396946565, "grad_norm": 0.00475916974034765, "learning_rate": 6.325828260681733e-08, "loss": 0.0001, "step": 234420 }, { "epoch": 4.772111959287532, "grad_norm": 0.0073294454235085064, "learning_rate": 6.314566033622016e-08, "loss": 0.0422, "step": 234430 }, { "epoch": 4.7723155216284985, "grad_norm": 0.009349829529846589, "learning_rate": 6.303313777175468e-08, "loss": 0.0, "step": 234440 }, { "epoch": 4.772519083969466, "grad_norm": 0.020339756312170474, "learning_rate": 6.292071491569407e-08, "loss": 0.0223, "step": 234450 }, { "epoch": 4.772722646310433, "grad_norm": 0.07064051649767583, "learning_rate": 6.280839177030818e-08, "loss": 0.0174, "step": 234460 }, { "epoch": 4.772926208651399, "grad_norm": 0.0004891642410830688, "learning_rate": 6.26961683378663e-08, "loss": 0.0279, "step": 234470 }, { "epoch": 4.773129770992367, "grad_norm": 0.2642091857395785, "learning_rate": 6.258404462063438e-08, "loss": 0.0002, "step": 234480 }, { "epoch": 4.773333333333333, "grad_norm": 0.0031594404358986842, "learning_rate": 6.247202062087676e-08, "loss": 0.0001, "step": 234490 }, { "epoch": 4.7735368956743, "grad_norm": 0.006062631131844676, "learning_rate": 6.236009634085661e-08, "loss": 0.0051, "step": 234500 }, { "epoch": 4.773740458015267, "grad_norm": 0.0007743417930903513, "learning_rate": 6.224827178283377e-08, "loss": 0.0001, "step": 234510 }, { "epoch": 4.773944020356234, "grad_norm": 0.0008230217640686225, "learning_rate": 6.213654694906702e-08, "loss": 0.0002, "step": 234520 }, { "epoch": 4.774147582697201, "grad_norm": 0.0003113002726226279, "learning_rate": 6.202492184181175e-08, "loss": 0.0836, "step": 234530 }, { "epoch": 4.7743511450381675, "grad_norm": 0.013482872996423628, "learning_rate": 6.191339646332395e-08, "loss": 0.0496, "step": 234540 }, { "epoch": 4.774554707379135, "grad_norm": 0.00249688794411969, "learning_rate": 6.18019708158557e-08, "loss": 0.0002, "step": 234550 }, { "epoch": 4.774758269720102, "grad_norm": 0.006442189949492275, "learning_rate": 6.169064490165633e-08, "loss": 0.0021, "step": 234560 }, { "epoch": 4.774961832061068, "grad_norm": 0.40908081507652694, "learning_rate": 6.157941872297513e-08, "loss": 0.0736, "step": 234570 }, { "epoch": 4.775165394402036, "grad_norm": 0.0006788760748535465, "learning_rate": 6.146829228205809e-08, "loss": 0.0001, "step": 234580 }, { "epoch": 4.775368956743002, "grad_norm": 0.026539967832394987, "learning_rate": 6.135726558115007e-08, "loss": 0.0354, "step": 234590 }, { "epoch": 4.775572519083969, "grad_norm": 0.0009061579328179763, "learning_rate": 6.124633862249263e-08, "loss": 0.0001, "step": 234600 }, { "epoch": 4.775776081424937, "grad_norm": 0.04007918180889637, "learning_rate": 6.113551140832674e-08, "loss": 0.0015, "step": 234610 }, { "epoch": 4.775979643765903, "grad_norm": 0.006563767173695551, "learning_rate": 6.102478394089006e-08, "loss": 0.0782, "step": 234620 }, { "epoch": 4.77618320610687, "grad_norm": 0.0004450379670239186, "learning_rate": 6.091415622241913e-08, "loss": 0.0, "step": 234630 }, { "epoch": 4.776386768447837, "grad_norm": 0.023343773365568622, "learning_rate": 6.08036282551483e-08, "loss": 0.0001, "step": 234640 }, { "epoch": 4.776590330788804, "grad_norm": 0.0001724932396481244, "learning_rate": 6.069320004131019e-08, "loss": 0.0267, "step": 234650 }, { "epoch": 4.776793893129771, "grad_norm": 0.020053538365669487, "learning_rate": 6.05828715831347e-08, "loss": 0.0523, "step": 234660 }, { "epoch": 4.776997455470738, "grad_norm": 0.00829378474649126, "learning_rate": 6.047264288285004e-08, "loss": 0.0288, "step": 234670 }, { "epoch": 4.777201017811705, "grad_norm": 0.22154592466843867, "learning_rate": 6.036251394268223e-08, "loss": 0.0003, "step": 234680 }, { "epoch": 4.777404580152671, "grad_norm": 0.0015569883468612214, "learning_rate": 6.025248476485557e-08, "loss": 0.0337, "step": 234690 }, { "epoch": 4.777608142493639, "grad_norm": 0.002538269955235182, "learning_rate": 6.014255535159219e-08, "loss": 0.0008, "step": 234700 }, { "epoch": 4.777811704834606, "grad_norm": 3.9080020337162353, "learning_rate": 6.00327257051131e-08, "loss": 0.0035, "step": 234710 }, { "epoch": 4.778015267175572, "grad_norm": 0.004676504626651808, "learning_rate": 5.992299582763483e-08, "loss": 0.0345, "step": 234720 }, { "epoch": 4.77821882951654, "grad_norm": 0.024097290253035128, "learning_rate": 5.981336572137509e-08, "loss": 0.0001, "step": 234730 }, { "epoch": 4.778422391857506, "grad_norm": 0.03142077303092189, "learning_rate": 5.970383538854763e-08, "loss": 0.0656, "step": 234740 }, { "epoch": 4.778625954198473, "grad_norm": 0.012463508067640867, "learning_rate": 5.9594404831363474e-08, "loss": 0.0001, "step": 234750 }, { "epoch": 4.7788295165394405, "grad_norm": 0.01346661247979604, "learning_rate": 5.9485074052033634e-08, "loss": 0.0021, "step": 234760 }, { "epoch": 4.779033078880407, "grad_norm": 0.010786126659368026, "learning_rate": 5.9375843052766336e-08, "loss": 0.017, "step": 234770 }, { "epoch": 4.779236641221374, "grad_norm": 0.0013778454059775055, "learning_rate": 5.9266711835767046e-08, "loss": 0.0003, "step": 234780 }, { "epoch": 4.779440203562341, "grad_norm": 0.006121783656011984, "learning_rate": 5.915768040324066e-08, "loss": 0.0008, "step": 234790 }, { "epoch": 4.779643765903308, "grad_norm": 16.4866659960395, "learning_rate": 5.9048748757388195e-08, "loss": 0.0517, "step": 234800 }, { "epoch": 4.779847328244275, "grad_norm": 0.007390952463341277, "learning_rate": 5.893991690041068e-08, "loss": 0.0001, "step": 234810 }, { "epoch": 4.780050890585242, "grad_norm": 0.12323924319398176, "learning_rate": 5.883118483450523e-08, "loss": 0.0001, "step": 234820 }, { "epoch": 4.780254452926209, "grad_norm": 0.026603211966939766, "learning_rate": 5.8722552561867876e-08, "loss": 0.0001, "step": 234830 }, { "epoch": 4.780458015267175, "grad_norm": 0.008104042527949613, "learning_rate": 5.8614020084693525e-08, "loss": 0.001, "step": 234840 }, { "epoch": 4.780661577608143, "grad_norm": 0.005203662147576568, "learning_rate": 5.850558740517265e-08, "loss": 0.0001, "step": 234850 }, { "epoch": 4.7808651399491096, "grad_norm": 0.001281844917454968, "learning_rate": 5.839725452549683e-08, "loss": 0.0061, "step": 234860 }, { "epoch": 4.781068702290076, "grad_norm": 0.0019030242867870038, "learning_rate": 5.828902144785265e-08, "loss": 0.0293, "step": 234870 }, { "epoch": 4.781272264631044, "grad_norm": 0.030109854573040218, "learning_rate": 5.818088817442724e-08, "loss": 0.0276, "step": 234880 }, { "epoch": 4.78147582697201, "grad_norm": 0.018892511026882042, "learning_rate": 5.807285470740332e-08, "loss": 0.0514, "step": 234890 }, { "epoch": 4.781679389312977, "grad_norm": 0.0027249543197356506, "learning_rate": 5.7964921048963005e-08, "loss": 0.0123, "step": 234900 }, { "epoch": 4.7818829516539445, "grad_norm": 0.025857390299284695, "learning_rate": 5.7857087201286796e-08, "loss": 0.0172, "step": 234910 }, { "epoch": 4.782086513994911, "grad_norm": 0.003700590216917385, "learning_rate": 5.774935316655239e-08, "loss": 0.0, "step": 234920 }, { "epoch": 4.782290076335878, "grad_norm": 0.0012762178935071158, "learning_rate": 5.764171894693471e-08, "loss": 0.0001, "step": 234930 }, { "epoch": 4.782493638676844, "grad_norm": 1.7149132079162148, "learning_rate": 5.7534184544608684e-08, "loss": 0.0006, "step": 234940 }, { "epoch": 4.782697201017812, "grad_norm": 0.010390941501204143, "learning_rate": 5.742674996174591e-08, "loss": 0.0151, "step": 234950 }, { "epoch": 4.782900763358779, "grad_norm": 0.03602967266931812, "learning_rate": 5.7319415200515206e-08, "loss": 0.0005, "step": 234960 }, { "epoch": 4.783104325699745, "grad_norm": 8.606395447055988, "learning_rate": 5.721218026308539e-08, "loss": 0.0969, "step": 234970 }, { "epoch": 4.783307888040713, "grad_norm": 0.017506380949898228, "learning_rate": 5.710504515162196e-08, "loss": 0.0149, "step": 234980 }, { "epoch": 4.783511450381679, "grad_norm": 0.021185046013743544, "learning_rate": 5.6998009868288185e-08, "loss": 0.0001, "step": 234990 }, { "epoch": 4.783715012722646, "grad_norm": 0.0029461853851692897, "learning_rate": 5.6891074415246216e-08, "loss": 0.026, "step": 235000 }, { "epoch": 4.7839185750636135, "grad_norm": 0.00805334522349897, "learning_rate": 5.6784238794656e-08, "loss": 0.0598, "step": 235010 }, { "epoch": 4.78412213740458, "grad_norm": 0.005820578591203202, "learning_rate": 5.667750300867469e-08, "loss": 0.0001, "step": 235020 }, { "epoch": 4.784325699745547, "grad_norm": 0.012995410780248929, "learning_rate": 5.6570867059458356e-08, "loss": 0.0005, "step": 235030 }, { "epoch": 4.784529262086514, "grad_norm": 0.00799650609604324, "learning_rate": 5.646433094916026e-08, "loss": 0.0001, "step": 235040 }, { "epoch": 4.784732824427481, "grad_norm": 0.0031259325781648924, "learning_rate": 5.635789467993202e-08, "loss": 0.0299, "step": 235050 }, { "epoch": 4.784936386768448, "grad_norm": 0.00037544647803743167, "learning_rate": 5.625155825392359e-08, "loss": 0.0045, "step": 235060 }, { "epoch": 4.785139949109415, "grad_norm": 0.0017728650074115375, "learning_rate": 5.6145321673282684e-08, "loss": 0.0091, "step": 235070 }, { "epoch": 4.785343511450382, "grad_norm": 0.004263045442549307, "learning_rate": 5.6039184940154256e-08, "loss": 0.0, "step": 235080 }, { "epoch": 4.785547073791348, "grad_norm": 0.009370527983376823, "learning_rate": 5.5933148056682706e-08, "loss": 0.0003, "step": 235090 }, { "epoch": 4.785750636132316, "grad_norm": 0.013358711757235133, "learning_rate": 5.5827211025009096e-08, "loss": 0.0318, "step": 235100 }, { "epoch": 4.7859541984732825, "grad_norm": 0.0013003465497157466, "learning_rate": 5.572137384727283e-08, "loss": 0.0001, "step": 235110 }, { "epoch": 4.786157760814249, "grad_norm": 0.0011401750045673196, "learning_rate": 5.561563652561164e-08, "loss": 0.0319, "step": 235120 }, { "epoch": 4.786361323155216, "grad_norm": 0.001685644662208073, "learning_rate": 5.5509999062161037e-08, "loss": 0.0326, "step": 235130 }, { "epoch": 4.786564885496183, "grad_norm": 0.004611149444169599, "learning_rate": 5.540446145905431e-08, "loss": 0.0305, "step": 235140 }, { "epoch": 4.78676844783715, "grad_norm": 0.0017326359164083545, "learning_rate": 5.5299023718423106e-08, "loss": 0.0002, "step": 235150 }, { "epoch": 4.786972010178117, "grad_norm": 0.0028348777904684706, "learning_rate": 5.519368584239682e-08, "loss": 0.0185, "step": 235160 }, { "epoch": 4.787175572519084, "grad_norm": 0.008975948251326768, "learning_rate": 5.508844783310319e-08, "loss": 0.0001, "step": 235170 }, { "epoch": 4.787379134860051, "grad_norm": 0.0013751486067261275, "learning_rate": 5.498330969266719e-08, "loss": 0.0, "step": 235180 }, { "epoch": 4.787582697201017, "grad_norm": 0.014597591541661464, "learning_rate": 5.487827142321212e-08, "loss": 0.0006, "step": 235190 }, { "epoch": 4.787786259541985, "grad_norm": 14.594334330445589, "learning_rate": 5.477333302686016e-08, "loss": 0.0313, "step": 235200 }, { "epoch": 4.7879898218829515, "grad_norm": 0.013688070592831943, "learning_rate": 5.466849450572964e-08, "loss": 0.0212, "step": 235210 }, { "epoch": 4.788193384223918, "grad_norm": 0.0034254203236206023, "learning_rate": 5.4563755861938295e-08, "loss": 0.0, "step": 235220 }, { "epoch": 4.788396946564886, "grad_norm": 0.1524824640740534, "learning_rate": 5.445911709760221e-08, "loss": 0.0004, "step": 235230 }, { "epoch": 4.788600508905852, "grad_norm": 0.000847376510706088, "learning_rate": 5.4354578214833586e-08, "loss": 0.0001, "step": 235240 }, { "epoch": 4.788804071246819, "grad_norm": 26.477487821196256, "learning_rate": 5.4250139215744626e-08, "loss": 0.1115, "step": 235250 }, { "epoch": 4.7890076335877865, "grad_norm": 0.025332899083259544, "learning_rate": 5.414580010244419e-08, "loss": 0.0223, "step": 235260 }, { "epoch": 4.789211195928753, "grad_norm": 0.9141034683221563, "learning_rate": 5.404156087703894e-08, "loss": 0.0003, "step": 235270 }, { "epoch": 4.78941475826972, "grad_norm": 0.1412200218097199, "learning_rate": 5.393742154163495e-08, "loss": 0.0333, "step": 235280 }, { "epoch": 4.789618320610687, "grad_norm": 0.013412421494224874, "learning_rate": 5.3833382098335555e-08, "loss": 0.0001, "step": 235290 }, { "epoch": 4.789821882951654, "grad_norm": 0.005204789622106406, "learning_rate": 5.3729442549240726e-08, "loss": 0.0006, "step": 235300 }, { "epoch": 4.7900254452926205, "grad_norm": 0.19866077269263577, "learning_rate": 5.362560289645158e-08, "loss": 0.0001, "step": 235310 }, { "epoch": 4.790229007633588, "grad_norm": 0.2787914582211359, "learning_rate": 5.35218631420642e-08, "loss": 0.0003, "step": 235320 }, { "epoch": 4.790432569974555, "grad_norm": 0.009908149827173668, "learning_rate": 5.3418223288173033e-08, "loss": 0.0095, "step": 235330 }, { "epoch": 4.790636132315521, "grad_norm": 0.017822858447826242, "learning_rate": 5.331468333687251e-08, "loss": 0.0002, "step": 235340 }, { "epoch": 4.790839694656489, "grad_norm": 0.0010785966778568256, "learning_rate": 5.321124329025318e-08, "loss": 0.0001, "step": 235350 }, { "epoch": 4.7910432569974555, "grad_norm": 0.008972907729465353, "learning_rate": 5.310790315040393e-08, "loss": 0.0001, "step": 235360 }, { "epoch": 4.791246819338422, "grad_norm": 0.005518526987212797, "learning_rate": 5.300466291941253e-08, "loss": 0.0001, "step": 235370 }, { "epoch": 4.79145038167939, "grad_norm": 0.0032407454857881035, "learning_rate": 5.290152259936399e-08, "loss": 0.03, "step": 235380 }, { "epoch": 4.791653944020356, "grad_norm": 0.16916919419746737, "learning_rate": 5.279848219234052e-08, "loss": 0.0246, "step": 235390 }, { "epoch": 4.791857506361323, "grad_norm": 0.009629665036727221, "learning_rate": 5.269554170042324e-08, "loss": 0.0086, "step": 235400 }, { "epoch": 4.79206106870229, "grad_norm": 0.0033188900782741087, "learning_rate": 5.2592701125692146e-08, "loss": 0.0201, "step": 235410 }, { "epoch": 4.792264631043257, "grad_norm": 0.010435071458483079, "learning_rate": 5.248996047022337e-08, "loss": 0.0001, "step": 235420 }, { "epoch": 4.792468193384224, "grad_norm": 0.010903479858784577, "learning_rate": 5.238731973609246e-08, "loss": 0.0404, "step": 235430 }, { "epoch": 4.792671755725191, "grad_norm": 0.13319144558796125, "learning_rate": 5.22847789253722e-08, "loss": 0.0312, "step": 235440 }, { "epoch": 4.792875318066158, "grad_norm": 9.998659174576929, "learning_rate": 5.2182338040133174e-08, "loss": 0.0317, "step": 235450 }, { "epoch": 4.7930788804071245, "grad_norm": 0.015534809343518977, "learning_rate": 5.207999708244427e-08, "loss": 0.0002, "step": 235460 }, { "epoch": 4.793282442748092, "grad_norm": 0.0011008910110840838, "learning_rate": 5.197775605437383e-08, "loss": 0.0, "step": 235470 }, { "epoch": 4.793486005089059, "grad_norm": 0.002437785498574673, "learning_rate": 5.18756149579841e-08, "loss": 0.0245, "step": 235480 }, { "epoch": 4.793689567430025, "grad_norm": 0.007866980221775406, "learning_rate": 5.177357379534009e-08, "loss": 0.0001, "step": 235490 }, { "epoch": 4.793893129770993, "grad_norm": 0.0007373955917402089, "learning_rate": 5.167163256850294e-08, "loss": 0.0004, "step": 235500 }, { "epoch": 4.794096692111959, "grad_norm": 0.22502604304931526, "learning_rate": 5.1569791279529324e-08, "loss": 0.0003, "step": 235510 }, { "epoch": 4.794300254452926, "grad_norm": 0.024934406080889947, "learning_rate": 5.1468049930477604e-08, "loss": 0.0427, "step": 235520 }, { "epoch": 4.794503816793894, "grad_norm": 0.011348528251792457, "learning_rate": 5.13664085234028e-08, "loss": 0.0001, "step": 235530 }, { "epoch": 4.79470737913486, "grad_norm": 0.0050706477338377425, "learning_rate": 5.1264867060356607e-08, "loss": 0.0001, "step": 235540 }, { "epoch": 4.794910941475827, "grad_norm": 0.0022288730381722676, "learning_rate": 5.116342554339071e-08, "loss": 0.0018, "step": 235550 }, { "epoch": 4.795114503816794, "grad_norm": 0.003354287689350337, "learning_rate": 5.1062083974552925e-08, "loss": 0.0001, "step": 235560 }, { "epoch": 4.795318066157761, "grad_norm": 3.8363657341545596, "learning_rate": 5.096084235589105e-08, "loss": 0.0024, "step": 235570 }, { "epoch": 4.795521628498728, "grad_norm": 0.02249647178056775, "learning_rate": 5.085970068944901e-08, "loss": 0.0003, "step": 235580 }, { "epoch": 4.795725190839694, "grad_norm": 0.0034556698289567233, "learning_rate": 5.075865897727017e-08, "loss": 0.0001, "step": 235590 }, { "epoch": 4.795928753180662, "grad_norm": 0.009797261573887371, "learning_rate": 5.065771722139456e-08, "loss": 0.0005, "step": 235600 }, { "epoch": 4.796132315521628, "grad_norm": 0.0042306437984769565, "learning_rate": 5.055687542386112e-08, "loss": 0.0079, "step": 235610 }, { "epoch": 4.796335877862595, "grad_norm": 14.048187702550049, "learning_rate": 5.045613358670598e-08, "loss": 0.0403, "step": 235620 }, { "epoch": 4.796539440203563, "grad_norm": 0.010570653235877173, "learning_rate": 5.035549171196474e-08, "loss": 0.0042, "step": 235630 }, { "epoch": 4.796743002544529, "grad_norm": 0.0015783627129827456, "learning_rate": 5.0254949801669114e-08, "loss": 0.0016, "step": 235640 }, { "epoch": 4.796946564885496, "grad_norm": 0.0076178046188490455, "learning_rate": 5.015450785785025e-08, "loss": 0.0002, "step": 235650 }, { "epoch": 4.797150127226463, "grad_norm": 0.016210470937034274, "learning_rate": 5.0054165882536534e-08, "loss": 0.0055, "step": 235660 }, { "epoch": 4.79735368956743, "grad_norm": 0.0011220479351236685, "learning_rate": 4.995392387775355e-08, "loss": 0.0002, "step": 235670 }, { "epoch": 4.797557251908397, "grad_norm": 0.00204717963205764, "learning_rate": 4.9853781845528025e-08, "loss": 0.0004, "step": 235680 }, { "epoch": 4.797760814249364, "grad_norm": 0.001313790122315558, "learning_rate": 4.975373978788001e-08, "loss": 0.0304, "step": 235690 }, { "epoch": 4.797964376590331, "grad_norm": 0.005214899007006915, "learning_rate": 4.965379770683121e-08, "loss": 0.0003, "step": 235700 }, { "epoch": 4.7981679389312974, "grad_norm": 0.021556890459575585, "learning_rate": 4.9553955604400575e-08, "loss": 0.0025, "step": 235710 }, { "epoch": 4.798371501272265, "grad_norm": 0.02982669252995895, "learning_rate": 4.945421348260371e-08, "loss": 0.0365, "step": 235720 }, { "epoch": 4.798575063613232, "grad_norm": 0.07622750101909696, "learning_rate": 4.935457134345456e-08, "loss": 0.0572, "step": 235730 }, { "epoch": 4.798778625954198, "grad_norm": 0.0018098669316795708, "learning_rate": 4.9255029188967074e-08, "loss": 0.019, "step": 235740 }, { "epoch": 4.798982188295166, "grad_norm": 0.0006018361775745037, "learning_rate": 4.9155587021150754e-08, "loss": 0.0094, "step": 235750 }, { "epoch": 4.799185750636132, "grad_norm": 0.017097321427869235, "learning_rate": 4.905624484201399e-08, "loss": 0.0001, "step": 235760 }, { "epoch": 4.799389312977099, "grad_norm": 0.004634118205749393, "learning_rate": 4.8957002653562403e-08, "loss": 0.0026, "step": 235770 }, { "epoch": 4.799592875318066, "grad_norm": 0.03831368022036677, "learning_rate": 4.885786045780161e-08, "loss": 0.0015, "step": 235780 }, { "epoch": 4.799796437659033, "grad_norm": 0.0019825376482569407, "learning_rate": 4.8758818256733345e-08, "loss": 0.0306, "step": 235790 }, { "epoch": 4.8, "grad_norm": 0.003250429464948829, "learning_rate": 4.8659876052358224e-08, "loss": 0.0232, "step": 235800 }, { "epoch": 4.8002035623409665, "grad_norm": 9.3228909492945e-05, "learning_rate": 4.856103384667354e-08, "loss": 0.0065, "step": 235810 }, { "epoch": 4.800407124681934, "grad_norm": 0.0010484402805739999, "learning_rate": 4.8462291641676596e-08, "loss": 0.0273, "step": 235820 }, { "epoch": 4.800610687022901, "grad_norm": 0.0055866586879855925, "learning_rate": 4.836364943936078e-08, "loss": 0.0, "step": 235830 }, { "epoch": 4.800814249363867, "grad_norm": 0.06302713976350863, "learning_rate": 4.826510724171951e-08, "loss": 0.0001, "step": 235840 }, { "epoch": 4.801017811704835, "grad_norm": 0.015778261830818152, "learning_rate": 4.816666505074119e-08, "loss": 0.0, "step": 235850 }, { "epoch": 4.801221374045801, "grad_norm": 0.016947259272410736, "learning_rate": 4.806832286841534e-08, "loss": 0.0128, "step": 235860 }, { "epoch": 4.801424936386768, "grad_norm": 0.004278111019391875, "learning_rate": 4.7970080696728146e-08, "loss": 0.0, "step": 235870 }, { "epoch": 4.801628498727736, "grad_norm": 0.01699919388605238, "learning_rate": 4.787193853766303e-08, "loss": 0.0231, "step": 235880 }, { "epoch": 4.801832061068702, "grad_norm": 0.0003438803368836816, "learning_rate": 4.777389639320229e-08, "loss": 0.0001, "step": 235890 }, { "epoch": 4.802035623409669, "grad_norm": 0.009267978635097996, "learning_rate": 4.767595426532656e-08, "loss": 0.0002, "step": 235900 }, { "epoch": 4.802239185750636, "grad_norm": 0.003842404124125926, "learning_rate": 4.757811215601316e-08, "loss": 0.0005, "step": 235910 }, { "epoch": 4.802442748091603, "grad_norm": 0.018211328587334725, "learning_rate": 4.748037006723827e-08, "loss": 0.0482, "step": 235920 }, { "epoch": 4.80264631043257, "grad_norm": 0.013109710589685854, "learning_rate": 4.738272800097643e-08, "loss": 0.015, "step": 235930 }, { "epoch": 4.802849872773537, "grad_norm": 0.001137287422208409, "learning_rate": 4.728518595919884e-08, "loss": 0.0001, "step": 235940 }, { "epoch": 4.803053435114504, "grad_norm": 0.05636081055714042, "learning_rate": 4.7187743943876705e-08, "loss": 0.0004, "step": 235950 }, { "epoch": 4.80325699745547, "grad_norm": 1.0527649185448373, "learning_rate": 4.709040195697734e-08, "loss": 0.0364, "step": 235960 }, { "epoch": 4.803460559796438, "grad_norm": 0.004484497753416098, "learning_rate": 4.699316000046583e-08, "loss": 0.0022, "step": 235970 }, { "epoch": 4.803664122137405, "grad_norm": 0.029497694992669105, "learning_rate": 4.6896018076307283e-08, "loss": 0.0366, "step": 235980 }, { "epoch": 4.803867684478371, "grad_norm": 0.0035500801826154793, "learning_rate": 4.6798976186462895e-08, "loss": 0.0002, "step": 235990 }, { "epoch": 4.804071246819339, "grad_norm": 19.79891007470129, "learning_rate": 4.670203433289333e-08, "loss": 0.0073, "step": 236000 }, { "epoch": 4.804274809160305, "grad_norm": 9.700563379543402, "learning_rate": 4.6605192517555906e-08, "loss": 0.0454, "step": 236010 }, { "epoch": 4.804478371501272, "grad_norm": 0.005785325528511119, "learning_rate": 4.6508450742406286e-08, "loss": 0.0, "step": 236020 }, { "epoch": 4.8046819338422395, "grad_norm": 0.011995590463936685, "learning_rate": 4.6411809009398454e-08, "loss": 0.0926, "step": 236030 }, { "epoch": 4.804885496183206, "grad_norm": 0.047371738324797026, "learning_rate": 4.6315267320484746e-08, "loss": 0.0002, "step": 236040 }, { "epoch": 4.805089058524173, "grad_norm": 0.011224841294027342, "learning_rate": 4.621882567761471e-08, "loss": 0.0008, "step": 236050 }, { "epoch": 4.80529262086514, "grad_norm": 0.000285121236775968, "learning_rate": 4.612248408273512e-08, "loss": 0.0015, "step": 236060 }, { "epoch": 4.805496183206107, "grad_norm": 0.002723993550050515, "learning_rate": 4.6026242537792755e-08, "loss": 0.0021, "step": 236070 }, { "epoch": 4.805699745547074, "grad_norm": 0.004465949051938338, "learning_rate": 4.593010104473161e-08, "loss": 0.0001, "step": 236080 }, { "epoch": 4.805903307888041, "grad_norm": 0.008564451437316186, "learning_rate": 4.5834059605492365e-08, "loss": 0.0601, "step": 236090 }, { "epoch": 4.806106870229008, "grad_norm": 0.002692257265239409, "learning_rate": 4.5738118222015124e-08, "loss": 0.0008, "step": 236100 }, { "epoch": 4.806310432569974, "grad_norm": 0.04142566501238103, "learning_rate": 4.5642276896237794e-08, "loss": 0.0249, "step": 236110 }, { "epoch": 4.806513994910942, "grad_norm": 0.05079191238766208, "learning_rate": 4.5546535630096035e-08, "loss": 0.0245, "step": 236120 }, { "epoch": 4.8067175572519085, "grad_norm": 0.004189675590167176, "learning_rate": 4.545089442552275e-08, "loss": 0.0001, "step": 236130 }, { "epoch": 4.806921119592875, "grad_norm": 0.004476449584975138, "learning_rate": 4.5355353284450844e-08, "loss": 0.0001, "step": 236140 }, { "epoch": 4.807124681933843, "grad_norm": 0.006333843566665052, "learning_rate": 4.5259912208808764e-08, "loss": 0.011, "step": 236150 }, { "epoch": 4.807328244274809, "grad_norm": 0.0025688981777381, "learning_rate": 4.516457120052387e-08, "loss": 0.0337, "step": 236160 }, { "epoch": 4.807531806615776, "grad_norm": 0.0010048715601839475, "learning_rate": 4.5069330261522935e-08, "loss": 0.0003, "step": 236170 }, { "epoch": 4.8077353689567435, "grad_norm": 0.006474896713890364, "learning_rate": 4.497418939372833e-08, "loss": 0.0229, "step": 236180 }, { "epoch": 4.80793893129771, "grad_norm": 0.5944405164296264, "learning_rate": 4.487914859906239e-08, "loss": 0.0003, "step": 236190 }, { "epoch": 4.808142493638677, "grad_norm": 0.9502832639837282, "learning_rate": 4.4784207879444156e-08, "loss": 0.0006, "step": 236200 }, { "epoch": 4.808346055979644, "grad_norm": 0.4898849563682308, "learning_rate": 4.468936723679096e-08, "loss": 0.0003, "step": 236210 }, { "epoch": 4.808549618320611, "grad_norm": 0.003207758533729351, "learning_rate": 4.45946266730185e-08, "loss": 0.0113, "step": 236220 }, { "epoch": 4.8087531806615775, "grad_norm": 0.0034343664257309617, "learning_rate": 4.44999861900397e-08, "loss": 0.0001, "step": 236230 }, { "epoch": 4.808956743002544, "grad_norm": 0.00019445596856140505, "learning_rate": 4.440544578976691e-08, "loss": 0.0848, "step": 236240 }, { "epoch": 4.809160305343512, "grad_norm": 3.509461773459037, "learning_rate": 4.4311005474108604e-08, "loss": 0.0227, "step": 236250 }, { "epoch": 4.809363867684478, "grad_norm": 0.008013689638335207, "learning_rate": 4.4216665244972146e-08, "loss": 0.0001, "step": 236260 }, { "epoch": 4.809567430025445, "grad_norm": 0.08764363020077712, "learning_rate": 4.412242510426379e-08, "loss": 0.0253, "step": 236270 }, { "epoch": 4.8097709923664125, "grad_norm": 0.25238445853253816, "learning_rate": 4.402828505388534e-08, "loss": 0.0004, "step": 236280 }, { "epoch": 4.809974554707379, "grad_norm": 0.10030160756180659, "learning_rate": 4.3934245095739734e-08, "loss": 0.012, "step": 236290 }, { "epoch": 4.810178117048346, "grad_norm": 0.0066202504339271835, "learning_rate": 4.384030523172489e-08, "loss": 0.0001, "step": 236300 }, { "epoch": 4.810381679389313, "grad_norm": 0.00549445772262014, "learning_rate": 4.3746465463738177e-08, "loss": 0.0, "step": 236310 }, { "epoch": 4.81058524173028, "grad_norm": 0.007344274956436921, "learning_rate": 4.365272579367641e-08, "loss": 0.0006, "step": 236320 }, { "epoch": 4.8107888040712465, "grad_norm": 0.3799286266332236, "learning_rate": 4.3559086223430855e-08, "loss": 0.0153, "step": 236330 }, { "epoch": 4.810992366412214, "grad_norm": 0.007844791031623673, "learning_rate": 4.346554675489334e-08, "loss": 0.0003, "step": 236340 }, { "epoch": 4.811195928753181, "grad_norm": 0.0003634095721047093, "learning_rate": 4.337210738995345e-08, "loss": 0.0, "step": 236350 }, { "epoch": 4.811399491094147, "grad_norm": 0.0009411365102469867, "learning_rate": 4.3278768130497475e-08, "loss": 0.0002, "step": 236360 }, { "epoch": 4.811603053435115, "grad_norm": 0.0013983270071233028, "learning_rate": 4.318552897841111e-08, "loss": 0.0001, "step": 236370 }, { "epoch": 4.8118066157760815, "grad_norm": 0.006709722755009434, "learning_rate": 4.309238993557729e-08, "loss": 0.0321, "step": 236380 }, { "epoch": 4.812010178117048, "grad_norm": 0.0021382522526801045, "learning_rate": 4.299935100387731e-08, "loss": 0.0325, "step": 236390 }, { "epoch": 4.812213740458016, "grad_norm": 0.07812887501483969, "learning_rate": 4.290641218518965e-08, "loss": 0.0749, "step": 236400 }, { "epoch": 4.812417302798982, "grad_norm": 0.0062093987641102, "learning_rate": 4.281357348139226e-08, "loss": 0.0255, "step": 236410 }, { "epoch": 4.812620865139949, "grad_norm": 0.004301482967356785, "learning_rate": 4.272083489435919e-08, "loss": 0.0002, "step": 236420 }, { "epoch": 4.8128244274809155, "grad_norm": 0.010307516968609267, "learning_rate": 4.262819642596394e-08, "loss": 0.0002, "step": 236430 }, { "epoch": 4.813027989821883, "grad_norm": 0.008461554722963113, "learning_rate": 4.253565807807725e-08, "loss": 0.0079, "step": 236440 }, { "epoch": 4.81323155216285, "grad_norm": 0.0010856987646314592, "learning_rate": 4.244321985256816e-08, "loss": 0.0002, "step": 236450 }, { "epoch": 4.813435114503816, "grad_norm": 0.005764581928241739, "learning_rate": 4.2350881751302976e-08, "loss": 0.0001, "step": 236460 }, { "epoch": 4.813638676844784, "grad_norm": 0.004055772429231111, "learning_rate": 4.2258643776147966e-08, "loss": 0.0314, "step": 236470 }, { "epoch": 4.8138422391857505, "grad_norm": 0.26448840387023925, "learning_rate": 4.216650592896498e-08, "loss": 0.0005, "step": 236480 }, { "epoch": 4.814045801526717, "grad_norm": 0.0015402434652852068, "learning_rate": 4.207446821161421e-08, "loss": 0.0001, "step": 236490 }, { "epoch": 4.814249363867685, "grad_norm": 0.02583833772360892, "learning_rate": 4.1982530625956364e-08, "loss": 0.0, "step": 236500 }, { "epoch": 4.814452926208651, "grad_norm": 0.010819425881164406, "learning_rate": 4.189069317384664e-08, "loss": 0.0311, "step": 236510 }, { "epoch": 4.814656488549618, "grad_norm": 0.007114446008843368, "learning_rate": 4.1798955857140775e-08, "loss": 0.0002, "step": 236520 }, { "epoch": 4.814860050890585, "grad_norm": 0.0004992888950776729, "learning_rate": 4.170731867769062e-08, "loss": 0.0008, "step": 236530 }, { "epoch": 4.815063613231552, "grad_norm": 0.007995434324066936, "learning_rate": 4.161578163734803e-08, "loss": 0.0015, "step": 236540 }, { "epoch": 4.815267175572519, "grad_norm": 0.005704900780916644, "learning_rate": 4.152434473796041e-08, "loss": 0.0128, "step": 236550 }, { "epoch": 4.815470737913486, "grad_norm": 0.005546668494688126, "learning_rate": 4.143300798137573e-08, "loss": 0.0526, "step": 236560 }, { "epoch": 4.815674300254453, "grad_norm": 0.0010598279200191385, "learning_rate": 4.134177136943751e-08, "loss": 0.04, "step": 236570 }, { "epoch": 4.8158778625954195, "grad_norm": 0.032680418648074167, "learning_rate": 4.125063490398928e-08, "loss": 0.0002, "step": 236580 }, { "epoch": 4.816081424936387, "grad_norm": 5.750393347470519, "learning_rate": 4.1159598586870685e-08, "loss": 0.1092, "step": 236590 }, { "epoch": 4.816284987277354, "grad_norm": 0.18225033060578477, "learning_rate": 4.106866241992191e-08, "loss": 0.0022, "step": 236600 }, { "epoch": 4.81648854961832, "grad_norm": 0.0036034215044932327, "learning_rate": 4.0977826404977605e-08, "loss": 0.0031, "step": 236610 }, { "epoch": 4.816692111959288, "grad_norm": 0.006794772207450127, "learning_rate": 4.0887090543874075e-08, "loss": 0.0001, "step": 236620 }, { "epoch": 4.8168956743002544, "grad_norm": 0.0005398013471149543, "learning_rate": 4.079645483844263e-08, "loss": 0.0321, "step": 236630 }, { "epoch": 4.817099236641221, "grad_norm": 0.03794745065717877, "learning_rate": 4.070591929051404e-08, "loss": 0.0502, "step": 236640 }, { "epoch": 4.817302798982189, "grad_norm": 0.0031215204237276285, "learning_rate": 4.061548390191683e-08, "loss": 0.0001, "step": 236650 }, { "epoch": 4.817506361323155, "grad_norm": 5.128606251375219, "learning_rate": 4.0525148674478434e-08, "loss": 0.0064, "step": 236660 }, { "epoch": 4.817709923664122, "grad_norm": 0.003786813666182851, "learning_rate": 4.043491361002183e-08, "loss": 0.0379, "step": 236670 }, { "epoch": 4.817913486005089, "grad_norm": 0.0620885784722085, "learning_rate": 4.034477871036946e-08, "loss": 0.0002, "step": 236680 }, { "epoch": 4.818117048346056, "grad_norm": 0.0026827462200490703, "learning_rate": 4.025474397734319e-08, "loss": 0.0462, "step": 236690 }, { "epoch": 4.818320610687023, "grad_norm": 0.006848988246392963, "learning_rate": 4.0164809412760463e-08, "loss": 0.0001, "step": 236700 }, { "epoch": 4.81852417302799, "grad_norm": 5.270668868192763, "learning_rate": 4.0074975018437044e-08, "loss": 0.0157, "step": 236710 }, { "epoch": 4.818727735368957, "grad_norm": 0.004738630167561967, "learning_rate": 3.9985240796188154e-08, "loss": 0.0224, "step": 236720 }, { "epoch": 4.8189312977099235, "grad_norm": 0.007946864259324194, "learning_rate": 3.9895606747825665e-08, "loss": 0.0001, "step": 236730 }, { "epoch": 4.819134860050891, "grad_norm": 0.6675067915308525, "learning_rate": 3.980607287515981e-08, "loss": 0.0006, "step": 236740 }, { "epoch": 4.819338422391858, "grad_norm": 0.0034018839942864514, "learning_rate": 3.9716639179999125e-08, "loss": 0.0001, "step": 236750 }, { "epoch": 4.819541984732824, "grad_norm": 0.0019812274325713905, "learning_rate": 3.962730566414996e-08, "loss": 0.0413, "step": 236760 }, { "epoch": 4.819745547073792, "grad_norm": 0.001816128795657975, "learning_rate": 3.953807232941587e-08, "loss": 0.0001, "step": 236770 }, { "epoch": 4.819949109414758, "grad_norm": 0.007288100262350043, "learning_rate": 3.944893917759984e-08, "loss": 0.0008, "step": 236780 }, { "epoch": 4.820152671755725, "grad_norm": 0.011452181744078409, "learning_rate": 3.9359906210501564e-08, "loss": 0.0242, "step": 236790 }, { "epoch": 4.820356234096693, "grad_norm": 4.150612247680927e-05, "learning_rate": 3.927097342991903e-08, "loss": 0.0103, "step": 236800 }, { "epoch": 4.820559796437659, "grad_norm": 0.009661140559017666, "learning_rate": 3.9182140837648594e-08, "loss": 0.0106, "step": 236810 }, { "epoch": 4.820763358778626, "grad_norm": 0.00687480943344937, "learning_rate": 3.909340843548437e-08, "loss": 0.0002, "step": 236820 }, { "epoch": 4.820966921119593, "grad_norm": 0.07664929256592556, "learning_rate": 3.900477622521826e-08, "loss": 0.003, "step": 236830 }, { "epoch": 4.82117048346056, "grad_norm": 0.00810112980495976, "learning_rate": 3.891624420864049e-08, "loss": 0.0002, "step": 236840 }, { "epoch": 4.821374045801527, "grad_norm": 0.009037342386061402, "learning_rate": 3.8827812387539075e-08, "loss": 0.0257, "step": 236850 }, { "epoch": 4.821577608142493, "grad_norm": 0.020916043271978834, "learning_rate": 3.873948076369982e-08, "loss": 0.0001, "step": 236860 }, { "epoch": 4.821781170483461, "grad_norm": 0.006960956941043037, "learning_rate": 3.865124933890685e-08, "loss": 0.0596, "step": 236870 }, { "epoch": 4.821984732824427, "grad_norm": 0.0010726386042356046, "learning_rate": 3.8563118114942064e-08, "loss": 0.0004, "step": 236880 }, { "epoch": 4.822188295165394, "grad_norm": 0.02538831755541913, "learning_rate": 3.847508709358516e-08, "loss": 0.0001, "step": 236890 }, { "epoch": 4.822391857506362, "grad_norm": 0.006571224610015187, "learning_rate": 3.838715627661471e-08, "loss": 0.012, "step": 236900 }, { "epoch": 4.822595419847328, "grad_norm": 0.0048807080967028255, "learning_rate": 3.8299325665805965e-08, "loss": 0.0001, "step": 236910 }, { "epoch": 4.822798982188295, "grad_norm": 0.0049947818789940505, "learning_rate": 3.82115952629325e-08, "loss": 0.0002, "step": 236920 }, { "epoch": 4.823002544529262, "grad_norm": 8.965858457615992, "learning_rate": 3.812396506976734e-08, "loss": 0.0398, "step": 236930 }, { "epoch": 4.823206106870229, "grad_norm": 0.00360642453295074, "learning_rate": 3.8036435088079085e-08, "loss": 0.0006, "step": 236940 }, { "epoch": 4.823409669211196, "grad_norm": 0.027975185700677203, "learning_rate": 3.79490053196363e-08, "loss": 0.0002, "step": 236950 }, { "epoch": 4.823613231552163, "grad_norm": 0.0023178292878970885, "learning_rate": 3.7861675766204257e-08, "loss": 0.0, "step": 236960 }, { "epoch": 4.82381679389313, "grad_norm": 0.0025010834164867135, "learning_rate": 3.7774446429547085e-08, "loss": 0.0, "step": 236970 }, { "epoch": 4.824020356234096, "grad_norm": 0.009727646052417655, "learning_rate": 3.768731731142561e-08, "loss": 0.0335, "step": 236980 }, { "epoch": 4.824223918575064, "grad_norm": 0.03333729530104756, "learning_rate": 3.760028841360064e-08, "loss": 0.0353, "step": 236990 }, { "epoch": 4.824427480916031, "grad_norm": 0.0018043969074848414, "learning_rate": 3.751335973782966e-08, "loss": 0.0351, "step": 237000 }, { "epoch": 4.824631043256997, "grad_norm": 0.00010482817615859066, "learning_rate": 3.7426531285867374e-08, "loss": 0.0001, "step": 237010 }, { "epoch": 4.824834605597965, "grad_norm": 34.45027570341931, "learning_rate": 3.733980305946849e-08, "loss": 0.0096, "step": 237020 }, { "epoch": 4.825038167938931, "grad_norm": 0.012348287650822687, "learning_rate": 3.725317506038439e-08, "loss": 0.0296, "step": 237030 }, { "epoch": 4.825241730279898, "grad_norm": 0.02182667072798579, "learning_rate": 3.7166647290363676e-08, "loss": 0.0295, "step": 237040 }, { "epoch": 4.8254452926208655, "grad_norm": 0.004599032817303738, "learning_rate": 3.7080219751154944e-08, "loss": 0.0001, "step": 237050 }, { "epoch": 4.825648854961832, "grad_norm": 0.04695695064256836, "learning_rate": 3.699389244450347e-08, "loss": 0.0001, "step": 237060 }, { "epoch": 4.825852417302799, "grad_norm": 0.00044917467699055996, "learning_rate": 3.690766537215229e-08, "loss": 0.0043, "step": 237070 }, { "epoch": 4.826055979643765, "grad_norm": 0.0028642058470122935, "learning_rate": 3.682153853584392e-08, "loss": 0.0003, "step": 237080 }, { "epoch": 4.826259541984733, "grad_norm": 0.0016256028363350727, "learning_rate": 3.6735511937316394e-08, "loss": 0.0003, "step": 237090 }, { "epoch": 4.8264631043257, "grad_norm": 0.013354978463284522, "learning_rate": 3.6649585578308334e-08, "loss": 0.0001, "step": 237100 }, { "epoch": 4.826666666666666, "grad_norm": 0.0036881286052445183, "learning_rate": 3.6563759460553904e-08, "loss": 0.0004, "step": 237110 }, { "epoch": 4.826870229007634, "grad_norm": 0.001913809588180861, "learning_rate": 3.647803358578783e-08, "loss": 0.0431, "step": 237120 }, { "epoch": 4.8270737913486, "grad_norm": 15.004896625656105, "learning_rate": 3.639240795574095e-08, "loss": 0.0191, "step": 237130 }, { "epoch": 4.827277353689567, "grad_norm": 0.018907855471007592, "learning_rate": 3.6306882572141875e-08, "loss": 0.0023, "step": 237140 }, { "epoch": 4.8274809160305345, "grad_norm": 0.014524420594410814, "learning_rate": 3.622145743671923e-08, "loss": 0.0002, "step": 237150 }, { "epoch": 4.827684478371501, "grad_norm": 0.0003241006960171356, "learning_rate": 3.613613255119719e-08, "loss": 0.0002, "step": 237160 }, { "epoch": 4.827888040712468, "grad_norm": 0.006799473977499927, "learning_rate": 3.605090791729937e-08, "loss": 0.0547, "step": 237170 }, { "epoch": 4.828091603053435, "grad_norm": 6.74399154028236, "learning_rate": 3.5965783536746627e-08, "loss": 0.1108, "step": 237180 }, { "epoch": 4.828295165394402, "grad_norm": 0.0069221108216185245, "learning_rate": 3.5880759411259233e-08, "loss": 0.0002, "step": 237190 }, { "epoch": 4.828498727735369, "grad_norm": 0.010579028415000392, "learning_rate": 3.579583554255306e-08, "loss": 0.013, "step": 237200 }, { "epoch": 4.828702290076336, "grad_norm": 0.0002015541965845984, "learning_rate": 3.571101193234394e-08, "loss": 0.0023, "step": 237210 }, { "epoch": 4.828905852417303, "grad_norm": 0.19280226466865036, "learning_rate": 3.562628858234496e-08, "loss": 0.0012, "step": 237220 }, { "epoch": 4.829109414758269, "grad_norm": 0.14891731334654382, "learning_rate": 3.554166549426696e-08, "loss": 0.0152, "step": 237230 }, { "epoch": 4.829312977099237, "grad_norm": 0.016008705623014298, "learning_rate": 3.54571426698197e-08, "loss": 0.0001, "step": 237240 }, { "epoch": 4.8295165394402035, "grad_norm": 0.02760819594932866, "learning_rate": 3.537272011070958e-08, "loss": 0.0308, "step": 237250 }, { "epoch": 4.82972010178117, "grad_norm": 0.0012078884375067687, "learning_rate": 3.528839781864135e-08, "loss": 0.0229, "step": 237260 }, { "epoch": 4.829923664122138, "grad_norm": 0.0009890949041667964, "learning_rate": 3.5204175795319205e-08, "loss": 0.0001, "step": 237270 }, { "epoch": 4.830127226463104, "grad_norm": 0.0050013982964681805, "learning_rate": 3.512005404244234e-08, "loss": 0.0041, "step": 237280 }, { "epoch": 4.830330788804071, "grad_norm": 0.5597915559189794, "learning_rate": 3.5036032561711064e-08, "loss": 0.0006, "step": 237290 }, { "epoch": 4.8305343511450385, "grad_norm": 0.0008891923626435308, "learning_rate": 3.4952111354822346e-08, "loss": 0.0002, "step": 237300 }, { "epoch": 4.830737913486005, "grad_norm": 0.002619525474146606, "learning_rate": 3.4868290423470394e-08, "loss": 0.0199, "step": 237310 }, { "epoch": 4.830941475826972, "grad_norm": 0.023504636180401946, "learning_rate": 3.4784569769348295e-08, "loss": 0.0001, "step": 237320 }, { "epoch": 4.831145038167939, "grad_norm": 0.0016808128174960679, "learning_rate": 3.4700949394146924e-08, "loss": 0.0006, "step": 237330 }, { "epoch": 4.831348600508906, "grad_norm": 0.19331947626215387, "learning_rate": 3.461742929955547e-08, "loss": 0.0003, "step": 237340 }, { "epoch": 4.8315521628498725, "grad_norm": 0.5220852915576001, "learning_rate": 3.453400948725982e-08, "loss": 0.0714, "step": 237350 }, { "epoch": 4.83175572519084, "grad_norm": 0.0037378887064461403, "learning_rate": 3.445068995894585e-08, "loss": 0.0009, "step": 237360 }, { "epoch": 4.831959287531807, "grad_norm": 0.049921947691784936, "learning_rate": 3.436747071629609e-08, "loss": 0.0119, "step": 237370 }, { "epoch": 4.832162849872773, "grad_norm": 0.003285498917547688, "learning_rate": 3.4284351760989766e-08, "loss": 0.0514, "step": 237380 }, { "epoch": 4.832366412213741, "grad_norm": 0.0032550158615963303, "learning_rate": 3.4201333094707746e-08, "loss": 0.0001, "step": 237390 }, { "epoch": 4.8325699745547075, "grad_norm": 0.034767107716875856, "learning_rate": 3.411841471912536e-08, "loss": 0.0004, "step": 237400 }, { "epoch": 4.832773536895674, "grad_norm": 0.008709697694042758, "learning_rate": 3.403559663591738e-08, "loss": 0.0003, "step": 237410 }, { "epoch": 4.832977099236642, "grad_norm": 0.0011382542997374135, "learning_rate": 3.395287884675691e-08, "loss": 0.0, "step": 237420 }, { "epoch": 4.833180661577608, "grad_norm": 0.01634328291375499, "learning_rate": 3.387026135331484e-08, "loss": 0.0003, "step": 237430 }, { "epoch": 4.833384223918575, "grad_norm": 0.00358460748046107, "learning_rate": 3.378774415725816e-08, "loss": 0.0001, "step": 237440 }, { "epoch": 4.833587786259542, "grad_norm": 0.07750191862662772, "learning_rate": 3.3705327260254994e-08, "loss": 0.0014, "step": 237450 }, { "epoch": 4.833791348600509, "grad_norm": 0.01403884393263831, "learning_rate": 3.3623010663969e-08, "loss": 0.0001, "step": 237460 }, { "epoch": 4.833994910941476, "grad_norm": 11.400481508057547, "learning_rate": 3.354079437006275e-08, "loss": 0.0723, "step": 237470 }, { "epoch": 4.834198473282443, "grad_norm": 0.041615231011622504, "learning_rate": 3.345867838019767e-08, "loss": 0.0002, "step": 237480 }, { "epoch": 4.83440203562341, "grad_norm": 0.0013634589332070467, "learning_rate": 3.3376662696030795e-08, "loss": 0.0008, "step": 237490 }, { "epoch": 4.8346055979643765, "grad_norm": 0.031987469140817876, "learning_rate": 3.329474731921967e-08, "loss": 0.0001, "step": 237500 }, { "epoch": 4.834809160305343, "grad_norm": 0.022088376454387843, "learning_rate": 3.321293225141797e-08, "loss": 0.0005, "step": 237510 }, { "epoch": 4.835012722646311, "grad_norm": 0.0021105749688250823, "learning_rate": 3.313121749427828e-08, "loss": 0.0001, "step": 237520 }, { "epoch": 4.835216284987277, "grad_norm": 0.0008445989802389855, "learning_rate": 3.304960304945093e-08, "loss": 0.0006, "step": 237530 }, { "epoch": 4.835419847328244, "grad_norm": 0.010205833162405965, "learning_rate": 3.296808891858405e-08, "loss": 0.0001, "step": 237540 }, { "epoch": 4.8356234096692114, "grad_norm": 0.00021547623717786977, "learning_rate": 3.2886675103324104e-08, "loss": 0.0315, "step": 237550 }, { "epoch": 4.835826972010178, "grad_norm": 0.0010502920636296569, "learning_rate": 3.2805361605315335e-08, "loss": 0.0001, "step": 237560 }, { "epoch": 4.836030534351145, "grad_norm": 0.004250339178054047, "learning_rate": 3.272414842620031e-08, "loss": 0.0002, "step": 237570 }, { "epoch": 4.836234096692112, "grad_norm": 0.0008726304126046166, "learning_rate": 3.264303556761883e-08, "loss": 0.011, "step": 237580 }, { "epoch": 4.836437659033079, "grad_norm": 0.029651901084611583, "learning_rate": 3.2562023031209036e-08, "loss": 0.0353, "step": 237590 }, { "epoch": 4.8366412213740455, "grad_norm": 31.829131001112952, "learning_rate": 3.248111081860683e-08, "loss": 0.0362, "step": 237600 }, { "epoch": 4.836844783715013, "grad_norm": 10.15809823563858, "learning_rate": 3.2400298931447584e-08, "loss": 0.0366, "step": 237610 }, { "epoch": 4.83704834605598, "grad_norm": 0.009496839470946504, "learning_rate": 3.231958737136165e-08, "loss": 0.0752, "step": 237620 }, { "epoch": 4.837251908396946, "grad_norm": 0.011578568108983618, "learning_rate": 3.223897613998051e-08, "loss": 0.0587, "step": 237630 }, { "epoch": 4.837455470737914, "grad_norm": 0.0011294169174662085, "learning_rate": 3.215846523893174e-08, "loss": 0.0055, "step": 237640 }, { "epoch": 4.8376590330788805, "grad_norm": 0.0010332164697127416, "learning_rate": 3.207805466984071e-08, "loss": 0.0015, "step": 237650 }, { "epoch": 4.837862595419847, "grad_norm": 12.001725270503423, "learning_rate": 3.1997744434332235e-08, "loss": 0.0329, "step": 237660 }, { "epoch": 4.838066157760815, "grad_norm": 0.01933326327938413, "learning_rate": 3.191753453402835e-08, "loss": 0.0001, "step": 237670 }, { "epoch": 4.838269720101781, "grad_norm": 0.0010327327353720306, "learning_rate": 3.183742497054887e-08, "loss": 0.0264, "step": 237680 }, { "epoch": 4.838473282442748, "grad_norm": 0.44174135656773417, "learning_rate": 3.1757415745510835e-08, "loss": 0.0179, "step": 237690 }, { "epoch": 4.8386768447837145, "grad_norm": 0.001209528683882578, "learning_rate": 3.167750686053128e-08, "loss": 0.0358, "step": 237700 }, { "epoch": 4.838880407124682, "grad_norm": 0.0004865543599583727, "learning_rate": 3.159769831722337e-08, "loss": 0.0001, "step": 237710 }, { "epoch": 4.839083969465649, "grad_norm": 0.00107184774032516, "learning_rate": 3.1517990117199695e-08, "loss": 0.0001, "step": 237720 }, { "epoch": 4.839287531806615, "grad_norm": 0.10038691181777869, "learning_rate": 3.1438382262068966e-08, "loss": 0.0254, "step": 237730 }, { "epoch": 4.839491094147583, "grad_norm": 0.0024919609712542343, "learning_rate": 3.1358874753439905e-08, "loss": 0.0004, "step": 237740 }, { "epoch": 4.8396946564885495, "grad_norm": 0.007828572368056105, "learning_rate": 3.1279467592917887e-08, "loss": 0.0002, "step": 237750 }, { "epoch": 4.839898218829516, "grad_norm": 0.001629758411906886, "learning_rate": 3.1200160782106634e-08, "loss": 0.0, "step": 237760 }, { "epoch": 4.840101781170484, "grad_norm": 0.00019917464293331345, "learning_rate": 3.11209543226082e-08, "loss": 0.0002, "step": 237770 }, { "epoch": 4.84030534351145, "grad_norm": 0.009629066993577042, "learning_rate": 3.10418482160213e-08, "loss": 0.0007, "step": 237780 }, { "epoch": 4.840508905852417, "grad_norm": 0.014756201313938202, "learning_rate": 3.0962842463944655e-08, "loss": 0.0001, "step": 237790 }, { "epoch": 4.840712468193384, "grad_norm": 33.63402572456145, "learning_rate": 3.0883937067973656e-08, "loss": 0.011, "step": 237800 }, { "epoch": 4.840916030534351, "grad_norm": 0.0022260916558104952, "learning_rate": 3.080513202970148e-08, "loss": 0.0026, "step": 237810 }, { "epoch": 4.841119592875318, "grad_norm": 0.03753975154340597, "learning_rate": 3.072642735072018e-08, "loss": 0.0631, "step": 237820 }, { "epoch": 4.841323155216285, "grad_norm": 0.008642794432328724, "learning_rate": 3.064782303261904e-08, "loss": 0.0051, "step": 237830 }, { "epoch": 4.841526717557252, "grad_norm": 0.0061337122226484105, "learning_rate": 3.0569319076985124e-08, "loss": 0.0376, "step": 237840 }, { "epoch": 4.8417302798982185, "grad_norm": 0.011365548197096634, "learning_rate": 3.049091548540495e-08, "loss": 0.0017, "step": 237850 }, { "epoch": 4.841933842239186, "grad_norm": 0.009769090886819426, "learning_rate": 3.0412612259461125e-08, "loss": 0.0002, "step": 237860 }, { "epoch": 4.842137404580153, "grad_norm": 0.018151572037144546, "learning_rate": 3.033440940073518e-08, "loss": 0.0013, "step": 237870 }, { "epoch": 4.842340966921119, "grad_norm": 0.028811738697638693, "learning_rate": 3.0256306910806946e-08, "loss": 0.0002, "step": 237880 }, { "epoch": 4.842544529262087, "grad_norm": 0.11042843370086121, "learning_rate": 3.0178304791253497e-08, "loss": 0.0138, "step": 237890 }, { "epoch": 4.842748091603053, "grad_norm": 0.0008092520262760281, "learning_rate": 3.010040304365025e-08, "loss": 0.0002, "step": 237900 }, { "epoch": 4.84295165394402, "grad_norm": 0.003678118441110891, "learning_rate": 3.002260166957094e-08, "loss": 0.0002, "step": 237910 }, { "epoch": 4.843155216284988, "grad_norm": 0.002820593805839649, "learning_rate": 2.9944900670585974e-08, "loss": 0.0, "step": 237920 }, { "epoch": 4.843358778625954, "grad_norm": 9.686444379745264, "learning_rate": 2.9867300048265216e-08, "loss": 0.031, "step": 237930 }, { "epoch": 4.843562340966921, "grad_norm": 0.0002970842232058557, "learning_rate": 2.9789799804175733e-08, "loss": 0.0001, "step": 237940 }, { "epoch": 4.843765903307888, "grad_norm": 0.11907905372545796, "learning_rate": 2.9712399939882952e-08, "loss": 0.0362, "step": 237950 }, { "epoch": 4.843969465648855, "grad_norm": 0.0013662691803817877, "learning_rate": 2.963510045695006e-08, "loss": 0.0002, "step": 237960 }, { "epoch": 4.844173027989822, "grad_norm": 0.009235008040418435, "learning_rate": 2.9557901356938034e-08, "loss": 0.0132, "step": 237970 }, { "epoch": 4.844376590330789, "grad_norm": 0.048994860386140995, "learning_rate": 2.948080264140618e-08, "loss": 0.0595, "step": 237980 }, { "epoch": 4.844580152671756, "grad_norm": 0.04760368130955368, "learning_rate": 2.9403804311911588e-08, "loss": 0.0002, "step": 237990 }, { "epoch": 4.844783715012722, "grad_norm": 0.04420109239853499, "learning_rate": 2.9326906370008567e-08, "loss": 0.0002, "step": 238000 }, { "epoch": 4.84498727735369, "grad_norm": 0.0008491389846768145, "learning_rate": 2.925010881725199e-08, "loss": 0.0002, "step": 238010 }, { "epoch": 4.845190839694657, "grad_norm": 0.004918133327364645, "learning_rate": 2.917341165519061e-08, "loss": 0.0004, "step": 238020 }, { "epoch": 4.845394402035623, "grad_norm": 0.009084775542710597, "learning_rate": 2.909681488537486e-08, "loss": 0.0062, "step": 238030 }, { "epoch": 4.845597964376591, "grad_norm": 0.0035923475808357416, "learning_rate": 2.902031850935183e-08, "loss": 0.0137, "step": 238040 }, { "epoch": 4.845801526717557, "grad_norm": 0.22242691879271934, "learning_rate": 2.8943922528665846e-08, "loss": 0.0497, "step": 238050 }, { "epoch": 4.846005089058524, "grad_norm": 0.00818217311362142, "learning_rate": 2.8867626944860115e-08, "loss": 0.029, "step": 238060 }, { "epoch": 4.8462086513994915, "grad_norm": 0.048866790362379, "learning_rate": 2.8791431759475073e-08, "loss": 0.0002, "step": 238070 }, { "epoch": 4.846412213740458, "grad_norm": 16.39724164813659, "learning_rate": 2.8715336974050045e-08, "loss": 0.0555, "step": 238080 }, { "epoch": 4.846615776081425, "grad_norm": 1.2131829305428374, "learning_rate": 2.8639342590122133e-08, "loss": 0.0416, "step": 238090 }, { "epoch": 4.846819338422392, "grad_norm": 0.003846669407957345, "learning_rate": 2.856344860922511e-08, "loss": 0.0001, "step": 238100 }, { "epoch": 4.847022900763359, "grad_norm": 0.001192490642255489, "learning_rate": 2.8487655032892747e-08, "loss": 0.042, "step": 238110 }, { "epoch": 4.847226463104326, "grad_norm": 0.004125821567211852, "learning_rate": 2.841196186265549e-08, "loss": 0.0103, "step": 238120 }, { "epoch": 4.847430025445293, "grad_norm": 0.026062258552460957, "learning_rate": 2.8336369100042117e-08, "loss": 0.0017, "step": 238130 }, { "epoch": 4.84763358778626, "grad_norm": 0.030702586864548718, "learning_rate": 2.826087674657918e-08, "loss": 0.0001, "step": 238140 }, { "epoch": 4.847837150127226, "grad_norm": 0.03311268512696889, "learning_rate": 2.8185484803791574e-08, "loss": 0.0004, "step": 238150 }, { "epoch": 4.848040712468193, "grad_norm": 0.004169513664788493, "learning_rate": 2.8110193273201415e-08, "loss": 0.0228, "step": 238160 }, { "epoch": 4.8482442748091605, "grad_norm": 0.009725646352079002, "learning_rate": 2.8035002156329704e-08, "loss": 0.0005, "step": 238170 }, { "epoch": 4.848447837150127, "grad_norm": 0.03503779811798866, "learning_rate": 2.7959911454694676e-08, "loss": 0.0001, "step": 238180 }, { "epoch": 4.848651399491094, "grad_norm": 0.00636736099076029, "learning_rate": 2.7884921169814005e-08, "loss": 0.0152, "step": 238190 }, { "epoch": 4.848854961832061, "grad_norm": 0.016236241694447484, "learning_rate": 2.7810031303200925e-08, "loss": 0.0389, "step": 238200 }, { "epoch": 4.849058524173028, "grad_norm": 0.0014840304022182703, "learning_rate": 2.773524185636811e-08, "loss": 0.0024, "step": 238210 }, { "epoch": 4.849262086513995, "grad_norm": 0.006913573214568628, "learning_rate": 2.766055283082658e-08, "loss": 0.0227, "step": 238220 }, { "epoch": 4.849465648854962, "grad_norm": 0.0063177249834303825, "learning_rate": 2.7585964228084573e-08, "loss": 0.0001, "step": 238230 }, { "epoch": 4.849669211195929, "grad_norm": 0.012593208522652178, "learning_rate": 2.7511476049648102e-08, "loss": 0.0009, "step": 238240 }, { "epoch": 4.849872773536895, "grad_norm": 0.024012884765522192, "learning_rate": 2.7437088297022073e-08, "loss": 0.0601, "step": 238250 }, { "epoch": 4.850076335877863, "grad_norm": 0.002866009224212541, "learning_rate": 2.7362800971708624e-08, "loss": 0.0, "step": 238260 }, { "epoch": 4.8502798982188295, "grad_norm": 0.02052468502169995, "learning_rate": 2.728861407520822e-08, "loss": 0.0, "step": 238270 }, { "epoch": 4.850483460559796, "grad_norm": 0.008206122811535474, "learning_rate": 2.7214527609019105e-08, "loss": 0.0, "step": 238280 }, { "epoch": 4.850687022900764, "grad_norm": 0.06735968788174546, "learning_rate": 2.7140541574637303e-08, "loss": 0.0004, "step": 238290 }, { "epoch": 4.85089058524173, "grad_norm": 0.043475558425935985, "learning_rate": 2.7066655973557732e-08, "loss": 0.0007, "step": 238300 }, { "epoch": 4.851094147582697, "grad_norm": 0.0013544624829223706, "learning_rate": 2.6992870807271977e-08, "loss": 0.032, "step": 238310 }, { "epoch": 4.8512977099236645, "grad_norm": 0.03830659565085836, "learning_rate": 2.6919186077269953e-08, "loss": 0.0002, "step": 238320 }, { "epoch": 4.851501272264631, "grad_norm": 0.17903135480882457, "learning_rate": 2.6845601785040475e-08, "loss": 0.0003, "step": 238330 }, { "epoch": 4.851704834605598, "grad_norm": 0.002697936724023198, "learning_rate": 2.6772117932069573e-08, "loss": 0.0213, "step": 238340 }, { "epoch": 4.851908396946564, "grad_norm": 0.03325901972064321, "learning_rate": 2.6698734519841065e-08, "loss": 0.0125, "step": 238350 }, { "epoch": 4.852111959287532, "grad_norm": 0.024356780980882713, "learning_rate": 2.6625451549837088e-08, "loss": 0.0012, "step": 238360 }, { "epoch": 4.8523155216284986, "grad_norm": 10.47006893229171, "learning_rate": 2.6552269023538134e-08, "loss": 0.0953, "step": 238370 }, { "epoch": 4.852519083969465, "grad_norm": 0.44830455386112117, "learning_rate": 2.6479186942421908e-08, "loss": 0.0011, "step": 238380 }, { "epoch": 4.852722646310433, "grad_norm": 45.660498338857884, "learning_rate": 2.6406205307963896e-08, "loss": 0.0284, "step": 238390 }, { "epoch": 4.852926208651399, "grad_norm": 8.899948163649604, "learning_rate": 2.633332412163847e-08, "loss": 0.04, "step": 238400 }, { "epoch": 4.853129770992366, "grad_norm": 0.023329007813234756, "learning_rate": 2.6260543384918346e-08, "loss": 0.0424, "step": 238410 }, { "epoch": 4.8533333333333335, "grad_norm": 0.45045528542088215, "learning_rate": 2.6187863099271794e-08, "loss": 0.032, "step": 238420 }, { "epoch": 4.8535368956743, "grad_norm": 0.003116278496001755, "learning_rate": 2.611528326616819e-08, "loss": 0.021, "step": 238430 }, { "epoch": 4.853740458015267, "grad_norm": 0.00046514337291431016, "learning_rate": 2.6042803887072476e-08, "loss": 0.0003, "step": 238440 }, { "epoch": 4.853944020356234, "grad_norm": 0.05505779335852868, "learning_rate": 2.597042496344848e-08, "loss": 0.0003, "step": 238450 }, { "epoch": 4.854147582697201, "grad_norm": 0.01694591462601829, "learning_rate": 2.5898146496758924e-08, "loss": 0.0232, "step": 238460 }, { "epoch": 4.854351145038168, "grad_norm": 0.0018749499723506053, "learning_rate": 2.5825968488462082e-08, "loss": 0.0547, "step": 238470 }, { "epoch": 4.854554707379135, "grad_norm": 0.024785941336389162, "learning_rate": 2.5753890940017344e-08, "loss": 0.0535, "step": 238480 }, { "epoch": 4.854758269720102, "grad_norm": 0.0010221122860202092, "learning_rate": 2.5681913852879102e-08, "loss": 0.0003, "step": 238490 }, { "epoch": 4.854961832061068, "grad_norm": 0.18979704620762122, "learning_rate": 2.5610037228501194e-08, "loss": 0.0005, "step": 238500 }, { "epoch": 4.855165394402036, "grad_norm": 0.00035194644146130903, "learning_rate": 2.553826106833579e-08, "loss": 0.0001, "step": 238510 }, { "epoch": 4.8553689567430025, "grad_norm": 0.004373906126508742, "learning_rate": 2.5466585373832286e-08, "loss": 0.0002, "step": 238520 }, { "epoch": 4.855572519083969, "grad_norm": 0.0032157132267200627, "learning_rate": 2.5395010146438414e-08, "loss": 0.054, "step": 238530 }, { "epoch": 4.855776081424937, "grad_norm": 0.005487824138706978, "learning_rate": 2.5323535387599686e-08, "loss": 0.0001, "step": 238540 }, { "epoch": 4.855979643765903, "grad_norm": 0.0008253912815403178, "learning_rate": 2.5252161098759388e-08, "loss": 0.0006, "step": 238550 }, { "epoch": 4.85618320610687, "grad_norm": 0.001590170117963225, "learning_rate": 2.5180887281359145e-08, "loss": 0.0167, "step": 238560 }, { "epoch": 4.8563867684478375, "grad_norm": 1.7521983502647411, "learning_rate": 2.5109713936837808e-08, "loss": 0.0014, "step": 238570 }, { "epoch": 4.856590330788804, "grad_norm": 0.001733576938758343, "learning_rate": 2.5038641066633675e-08, "loss": 0.0121, "step": 238580 }, { "epoch": 4.856793893129771, "grad_norm": 0.004457002544487813, "learning_rate": 2.4967668672182254e-08, "loss": 0.0, "step": 238590 }, { "epoch": 4.856997455470738, "grad_norm": 0.0032219900326180854, "learning_rate": 2.4896796754916853e-08, "loss": 0.0002, "step": 238600 }, { "epoch": 4.857201017811705, "grad_norm": 0.010240569665390933, "learning_rate": 2.4826025316267433e-08, "loss": 0.028, "step": 238610 }, { "epoch": 4.8574045801526715, "grad_norm": 0.0026780087307821977, "learning_rate": 2.4755354357665627e-08, "loss": 0.0001, "step": 238620 }, { "epoch": 4.857608142493639, "grad_norm": 0.0022464336187220995, "learning_rate": 2.4684783880536412e-08, "loss": 0.0019, "step": 238630 }, { "epoch": 4.857811704834606, "grad_norm": 0.0022237749704476155, "learning_rate": 2.461431388630697e-08, "loss": 0.0034, "step": 238640 }, { "epoch": 4.858015267175572, "grad_norm": 0.06998933734626647, "learning_rate": 2.4543944376399508e-08, "loss": 0.0001, "step": 238650 }, { "epoch": 4.85821882951654, "grad_norm": 0.0012548303677644515, "learning_rate": 2.4473675352235104e-08, "loss": 0.0, "step": 238660 }, { "epoch": 4.8584223918575065, "grad_norm": 0.0037911365810458847, "learning_rate": 2.440350681523318e-08, "loss": 0.008, "step": 238670 }, { "epoch": 4.858625954198473, "grad_norm": 0.006806807696530566, "learning_rate": 2.433343876681149e-08, "loss": 0.0006, "step": 238680 }, { "epoch": 4.858829516539441, "grad_norm": 0.001981331473170646, "learning_rate": 2.426347120838446e-08, "loss": 0.0, "step": 238690 }, { "epoch": 4.859033078880407, "grad_norm": 14.337136984565031, "learning_rate": 2.4193604141364847e-08, "loss": 0.0545, "step": 238700 }, { "epoch": 4.859236641221374, "grad_norm": 0.0006453185711036216, "learning_rate": 2.4123837567164853e-08, "loss": 0.0069, "step": 238710 }, { "epoch": 4.859440203562341, "grad_norm": 0.0008325057222942036, "learning_rate": 2.4054171487192245e-08, "loss": 0.0651, "step": 238720 }, { "epoch": 4.859643765903308, "grad_norm": 0.08858979325354172, "learning_rate": 2.3984605902854787e-08, "loss": 0.0003, "step": 238730 }, { "epoch": 4.859847328244275, "grad_norm": 0.002903451151719771, "learning_rate": 2.391514081555746e-08, "loss": 0.0618, "step": 238740 }, { "epoch": 4.860050890585242, "grad_norm": 0.004956705602705688, "learning_rate": 2.3845776226703586e-08, "loss": 0.0001, "step": 238750 }, { "epoch": 4.860254452926209, "grad_norm": 0.002687433839047923, "learning_rate": 2.377651213769261e-08, "loss": 0.0595, "step": 238760 }, { "epoch": 4.8604580152671755, "grad_norm": 0.00043428218351137756, "learning_rate": 2.3707348549924514e-08, "loss": 0.0001, "step": 238770 }, { "epoch": 4.860661577608143, "grad_norm": 0.004222881197526514, "learning_rate": 2.3638285464795964e-08, "loss": 0.0061, "step": 238780 }, { "epoch": 4.86086513994911, "grad_norm": 0.004711862036846568, "learning_rate": 2.3569322883701395e-08, "loss": 0.0021, "step": 238790 }, { "epoch": 4.861068702290076, "grad_norm": 0.003160567878301555, "learning_rate": 2.3500460808034696e-08, "loss": 0.0, "step": 238800 }, { "epoch": 4.861272264631043, "grad_norm": 0.00027711628422791615, "learning_rate": 2.3431699239185314e-08, "loss": 0.0, "step": 238810 }, { "epoch": 4.86147582697201, "grad_norm": 0.056085064930949106, "learning_rate": 2.3363038178542685e-08, "loss": 0.0012, "step": 238820 }, { "epoch": 4.861679389312977, "grad_norm": 0.011618231466593108, "learning_rate": 2.3294477627493485e-08, "loss": 0.0337, "step": 238830 }, { "epoch": 4.861882951653944, "grad_norm": 0.000760058048075404, "learning_rate": 2.3226017587422158e-08, "loss": 0.0004, "step": 238840 }, { "epoch": 4.862086513994911, "grad_norm": 3.963551190868973, "learning_rate": 2.3157658059711487e-08, "loss": 0.0039, "step": 238850 }, { "epoch": 4.862290076335878, "grad_norm": 0.02724917619846826, "learning_rate": 2.3089399045742033e-08, "loss": 0.0268, "step": 238860 }, { "epoch": 4.8624936386768445, "grad_norm": 0.004180856315145431, "learning_rate": 2.302124054689214e-08, "loss": 0.0003, "step": 238870 }, { "epoch": 4.862697201017812, "grad_norm": 0.6569638697768877, "learning_rate": 2.2953182564539044e-08, "loss": 0.0403, "step": 238880 }, { "epoch": 4.862900763358779, "grad_norm": 0.0007088771849627612, "learning_rate": 2.2885225100056642e-08, "loss": 0.0001, "step": 238890 }, { "epoch": 4.863104325699745, "grad_norm": 0.00837019078347865, "learning_rate": 2.2817368154817165e-08, "loss": 0.0006, "step": 238900 }, { "epoch": 4.863307888040713, "grad_norm": 6.676958278263228, "learning_rate": 2.2749611730191744e-08, "loss": 0.0438, "step": 238910 }, { "epoch": 4.863511450381679, "grad_norm": 0.11500032746424739, "learning_rate": 2.268195582754873e-08, "loss": 0.0013, "step": 238920 }, { "epoch": 4.863715012722646, "grad_norm": 0.01634784361838204, "learning_rate": 2.2614400448254804e-08, "loss": 0.0001, "step": 238930 }, { "epoch": 4.863918575063614, "grad_norm": 8.745850028553203, "learning_rate": 2.2546945593672766e-08, "loss": 0.0307, "step": 238940 }, { "epoch": 4.86412213740458, "grad_norm": 0.04868669745835273, "learning_rate": 2.2479591265167078e-08, "loss": 0.0034, "step": 238950 }, { "epoch": 4.864325699745547, "grad_norm": 0.00389198491962274, "learning_rate": 2.2412337464096655e-08, "loss": 0.0002, "step": 238960 }, { "epoch": 4.864529262086514, "grad_norm": 0.015989591879234506, "learning_rate": 2.2345184191819857e-08, "loss": 0.0001, "step": 238970 }, { "epoch": 4.864732824427481, "grad_norm": 1.3597986444390555, "learning_rate": 2.2278131449693373e-08, "loss": 0.0008, "step": 238980 }, { "epoch": 4.864936386768448, "grad_norm": 0.0012239624736829406, "learning_rate": 2.2211179239071678e-08, "loss": 0.0114, "step": 238990 }, { "epoch": 4.865139949109414, "grad_norm": 0.007400319429226861, "learning_rate": 2.2144327561306468e-08, "loss": 0.0003, "step": 239000 }, { "epoch": 4.865343511450382, "grad_norm": 0.016646982689620882, "learning_rate": 2.207757641774777e-08, "loss": 0.0251, "step": 239010 }, { "epoch": 4.865547073791348, "grad_norm": 0.02010904180647351, "learning_rate": 2.2010925809743953e-08, "loss": 0.028, "step": 239020 }, { "epoch": 4.865750636132315, "grad_norm": 0.007503024470751008, "learning_rate": 2.1944375738641167e-08, "loss": 0.0002, "step": 239030 }, { "epoch": 4.865954198473283, "grad_norm": 0.0014518211201509838, "learning_rate": 2.1877926205783328e-08, "loss": 0.0001, "step": 239040 }, { "epoch": 4.866157760814249, "grad_norm": 0.009385524849237338, "learning_rate": 2.1811577212512703e-08, "loss": 0.0001, "step": 239050 }, { "epoch": 4.866361323155216, "grad_norm": 0.014189683738225923, "learning_rate": 2.1745328760169327e-08, "loss": 0.0447, "step": 239060 }, { "epoch": 4.866564885496183, "grad_norm": 0.0028845326562228313, "learning_rate": 2.1679180850090464e-08, "loss": 0.0011, "step": 239070 }, { "epoch": 4.86676844783715, "grad_norm": 0.0029193379170878468, "learning_rate": 2.1613133483612825e-08, "loss": 0.0303, "step": 239080 }, { "epoch": 4.866972010178117, "grad_norm": 0.0068129336030294165, "learning_rate": 2.1547186662069784e-08, "loss": 0.0007, "step": 239090 }, { "epoch": 4.867175572519084, "grad_norm": 0.002660585919259204, "learning_rate": 2.148134038679417e-08, "loss": 0.0054, "step": 239100 }, { "epoch": 4.867379134860051, "grad_norm": 0.09648565253148808, "learning_rate": 2.1415594659114358e-08, "loss": 0.0625, "step": 239110 }, { "epoch": 4.867582697201017, "grad_norm": 0.2551327646619935, "learning_rate": 2.134994948035929e-08, "loss": 0.0002, "step": 239120 }, { "epoch": 4.867786259541985, "grad_norm": 0.032733660545943746, "learning_rate": 2.1284404851854567e-08, "loss": 0.0197, "step": 239130 }, { "epoch": 4.867989821882952, "grad_norm": 0.4579881020658779, "learning_rate": 2.1218960774924136e-08, "loss": 0.0007, "step": 239140 }, { "epoch": 4.868193384223918, "grad_norm": 2.5035827045500434e-07, "learning_rate": 2.115361725088916e-08, "loss": 0.0013, "step": 239150 }, { "epoch": 4.868396946564886, "grad_norm": 0.003090346795507828, "learning_rate": 2.1088374281069136e-08, "loss": 0.0002, "step": 239160 }, { "epoch": 4.868600508905852, "grad_norm": 0.130680399805401, "learning_rate": 2.1023231866783013e-08, "loss": 0.0387, "step": 239170 }, { "epoch": 4.868804071246819, "grad_norm": 0.04341616538321014, "learning_rate": 2.095819000934529e-08, "loss": 0.0001, "step": 239180 }, { "epoch": 4.8690076335877865, "grad_norm": 0.02748199313668337, "learning_rate": 2.089324871006937e-08, "loss": 0.0016, "step": 239190 }, { "epoch": 4.869211195928753, "grad_norm": 0.005592612502999621, "learning_rate": 2.082840797026753e-08, "loss": 0.0037, "step": 239200 }, { "epoch": 4.86941475826972, "grad_norm": 0.023392906072115336, "learning_rate": 2.0763667791249276e-08, "loss": 0.0182, "step": 239210 }, { "epoch": 4.869618320610687, "grad_norm": 0.021349522177411873, "learning_rate": 2.0699028174322456e-08, "loss": 0.0072, "step": 239220 }, { "epoch": 4.869821882951654, "grad_norm": 0.0023148377521696574, "learning_rate": 2.0634489120791023e-08, "loss": 0.0318, "step": 239230 }, { "epoch": 4.870025445292621, "grad_norm": 0.16039778667790477, "learning_rate": 2.0570050631960046e-08, "loss": 0.0055, "step": 239240 }, { "epoch": 4.870229007633588, "grad_norm": 0.003609117741882817, "learning_rate": 2.050571270913071e-08, "loss": 0.0002, "step": 239250 }, { "epoch": 4.870432569974555, "grad_norm": 0.03166799252722608, "learning_rate": 2.0441475353601415e-08, "loss": 0.0002, "step": 239260 }, { "epoch": 4.870636132315521, "grad_norm": 0.18210602442063858, "learning_rate": 2.0377338566670567e-08, "loss": 0.0019, "step": 239270 }, { "epoch": 4.870839694656489, "grad_norm": 0.040731539719644314, "learning_rate": 2.031330234963269e-08, "loss": 0.0247, "step": 239280 }, { "epoch": 4.8710432569974556, "grad_norm": 0.0008296444933722537, "learning_rate": 2.0249366703781748e-08, "loss": 0.0032, "step": 239290 }, { "epoch": 4.871246819338422, "grad_norm": 0.023326368545093654, "learning_rate": 2.018553163040893e-08, "loss": 0.0123, "step": 239300 }, { "epoch": 4.87145038167939, "grad_norm": 0.11125775560057435, "learning_rate": 2.0121797130802644e-08, "loss": 0.0004, "step": 239310 }, { "epoch": 4.871653944020356, "grad_norm": 0.00650821020001931, "learning_rate": 2.0058163206250758e-08, "loss": 0.0257, "step": 239320 }, { "epoch": 4.871857506361323, "grad_norm": 0.004525092410374491, "learning_rate": 1.9994629858038905e-08, "loss": 0.0259, "step": 239330 }, { "epoch": 4.8720610687022905, "grad_norm": 0.2510080935479987, "learning_rate": 1.9931197087449394e-08, "loss": 0.0031, "step": 239340 }, { "epoch": 4.872264631043257, "grad_norm": 0.001581139920532515, "learning_rate": 1.9867864895763424e-08, "loss": 0.0013, "step": 239350 }, { "epoch": 4.872468193384224, "grad_norm": 0.0030708300852488077, "learning_rate": 1.980463328426052e-08, "loss": 0.0009, "step": 239360 }, { "epoch": 4.872671755725191, "grad_norm": 0.05809800852529395, "learning_rate": 1.974150225421745e-08, "loss": 0.0001, "step": 239370 }, { "epoch": 4.872875318066158, "grad_norm": 0.0012366874817263359, "learning_rate": 1.9678471806909295e-08, "loss": 0.0002, "step": 239380 }, { "epoch": 4.873078880407125, "grad_norm": 0.07942438109020863, "learning_rate": 1.9615541943609484e-08, "loss": 0.0381, "step": 239390 }, { "epoch": 4.873282442748092, "grad_norm": 0.005486008314403812, "learning_rate": 1.955271266558756e-08, "loss": 0.0504, "step": 239400 }, { "epoch": 4.873486005089059, "grad_norm": 7.331132618635353, "learning_rate": 1.948998397411417e-08, "loss": 0.0357, "step": 239410 }, { "epoch": 4.873689567430025, "grad_norm": 0.0013179000568032727, "learning_rate": 1.9427355870454967e-08, "loss": 0.0221, "step": 239420 }, { "epoch": 4.873893129770992, "grad_norm": 0.0013340794094745018, "learning_rate": 1.9364828355875053e-08, "loss": 0.0156, "step": 239430 }, { "epoch": 4.8740966921119595, "grad_norm": 0.05262495467370045, "learning_rate": 1.9302401431637864e-08, "loss": 0.0001, "step": 239440 }, { "epoch": 4.874300254452926, "grad_norm": 0.009155864798516872, "learning_rate": 1.9240075099004053e-08, "loss": 0.0001, "step": 239450 }, { "epoch": 4.874503816793893, "grad_norm": 0.0021319163759600276, "learning_rate": 1.9177849359231503e-08, "loss": 0.0208, "step": 239460 }, { "epoch": 4.87470737913486, "grad_norm": 0.0036665497258358297, "learning_rate": 1.9115724213577546e-08, "loss": 0.0001, "step": 239470 }, { "epoch": 4.874910941475827, "grad_norm": 0.027326324723319303, "learning_rate": 1.9053699663297286e-08, "loss": 0.0, "step": 239480 }, { "epoch": 4.875114503816794, "grad_norm": 0.1773468004294967, "learning_rate": 1.8991775709642503e-08, "loss": 0.0148, "step": 239490 }, { "epoch": 4.875318066157761, "grad_norm": 0.009044739448717807, "learning_rate": 1.892995235386441e-08, "loss": 0.0157, "step": 239500 }, { "epoch": 4.875521628498728, "grad_norm": 0.00029348929993700825, "learning_rate": 1.886822959721202e-08, "loss": 0.0439, "step": 239510 }, { "epoch": 4.875725190839694, "grad_norm": 0.006861440771865271, "learning_rate": 1.8806607440930437e-08, "loss": 0.031, "step": 239520 }, { "epoch": 4.875928753180662, "grad_norm": 0.01688243384533414, "learning_rate": 1.874508588626589e-08, "loss": 0.0009, "step": 239530 }, { "epoch": 4.8761323155216285, "grad_norm": 0.1288408260994888, "learning_rate": 1.8683664934460167e-08, "loss": 0.0102, "step": 239540 }, { "epoch": 4.876335877862595, "grad_norm": 12.89303151242194, "learning_rate": 1.862234458675338e-08, "loss": 0.1034, "step": 239550 }, { "epoch": 4.876539440203563, "grad_norm": 0.04078551728266227, "learning_rate": 1.856112484438455e-08, "loss": 0.0176, "step": 239560 }, { "epoch": 4.876743002544529, "grad_norm": 0.06074558453610833, "learning_rate": 1.8500005708589897e-08, "loss": 0.0001, "step": 239570 }, { "epoch": 4.876946564885496, "grad_norm": 7.424645717686122e-07, "learning_rate": 1.843898718060344e-08, "loss": 0.0263, "step": 239580 }, { "epoch": 4.8771501272264635, "grad_norm": 0.0032443127283492603, "learning_rate": 1.8378069261658082e-08, "loss": 0.0001, "step": 239590 }, { "epoch": 4.87735368956743, "grad_norm": 0.013152575975162058, "learning_rate": 1.8317251952983394e-08, "loss": 0.0309, "step": 239600 }, { "epoch": 4.877557251908397, "grad_norm": 4.390131594546759, "learning_rate": 1.825653525580895e-08, "loss": 0.0133, "step": 239610 }, { "epoch": 4.877760814249364, "grad_norm": 0.2211099909975781, "learning_rate": 1.819591917135932e-08, "loss": 0.0249, "step": 239620 }, { "epoch": 4.877964376590331, "grad_norm": 0.0007382115275224179, "learning_rate": 1.81354037008602e-08, "loss": 0.0045, "step": 239630 }, { "epoch": 4.8781679389312975, "grad_norm": 0.02254251439703525, "learning_rate": 1.8074988845533382e-08, "loss": 0.0234, "step": 239640 }, { "epoch": 4.878371501272264, "grad_norm": 0.0008902180333383425, "learning_rate": 1.8014674606598448e-08, "loss": 0.0, "step": 239650 }, { "epoch": 4.878575063613232, "grad_norm": 1.506972358998533e-07, "learning_rate": 1.795446098527387e-08, "loss": 0.0003, "step": 239660 }, { "epoch": 4.878778625954198, "grad_norm": 0.0007841314746244115, "learning_rate": 1.7894347982775896e-08, "loss": 0.044, "step": 239670 }, { "epoch": 4.878982188295165, "grad_norm": 0.00025575935659934753, "learning_rate": 1.7834335600318554e-08, "loss": 0.0001, "step": 239680 }, { "epoch": 4.8791857506361325, "grad_norm": 0.01307188370421276, "learning_rate": 1.7774423839113652e-08, "loss": 0.055, "step": 239690 }, { "epoch": 4.879389312977099, "grad_norm": 0.015480004189439808, "learning_rate": 1.771461270037189e-08, "loss": 0.0028, "step": 239700 }, { "epoch": 4.879592875318066, "grad_norm": 0.001966252904502516, "learning_rate": 1.765490218530008e-08, "loss": 0.0005, "step": 239710 }, { "epoch": 4.879796437659033, "grad_norm": 0.0018026216744973901, "learning_rate": 1.7595292295104482e-08, "loss": 0.0302, "step": 239720 }, { "epoch": 4.88, "grad_norm": 0.03286334785438553, "learning_rate": 1.7535783030989682e-08, "loss": 0.0003, "step": 239730 }, { "epoch": 4.8802035623409665, "grad_norm": 0.012795280845410378, "learning_rate": 1.747637439415695e-08, "loss": 0.037, "step": 239740 }, { "epoch": 4.880407124681934, "grad_norm": 0.00018135687833848756, "learning_rate": 1.7417066385806425e-08, "loss": 0.0001, "step": 239750 }, { "epoch": 4.880610687022901, "grad_norm": 0.45908101447627775, "learning_rate": 1.7357859007136048e-08, "loss": 0.0032, "step": 239760 }, { "epoch": 4.880814249363867, "grad_norm": 0.006510155239365846, "learning_rate": 1.7298752259340414e-08, "loss": 0.0055, "step": 239770 }, { "epoch": 4.881017811704835, "grad_norm": 0.003532491408061372, "learning_rate": 1.7239746143615233e-08, "loss": 0.0003, "step": 239780 }, { "epoch": 4.8812213740458015, "grad_norm": 0.008956965183977306, "learning_rate": 1.718084066115011e-08, "loss": 0.0006, "step": 239790 }, { "epoch": 4.881424936386768, "grad_norm": 0.056419346976969056, "learning_rate": 1.7122035813136317e-08, "loss": 0.0001, "step": 239800 }, { "epoch": 4.881628498727736, "grad_norm": 0.009041868235965962, "learning_rate": 1.706333160076068e-08, "loss": 0.0001, "step": 239810 }, { "epoch": 4.881832061068702, "grad_norm": 0.026376780583032108, "learning_rate": 1.7004728025209472e-08, "loss": 0.0001, "step": 239820 }, { "epoch": 4.882035623409669, "grad_norm": 0.003359422282032388, "learning_rate": 1.6946225087665634e-08, "loss": 0.0044, "step": 239830 }, { "epoch": 4.882239185750636, "grad_norm": 0.0017644904918318436, "learning_rate": 1.688782278931045e-08, "loss": 0.0238, "step": 239840 }, { "epoch": 4.882442748091603, "grad_norm": 0.0070738138763432505, "learning_rate": 1.682952113132408e-08, "loss": 0.0002, "step": 239850 }, { "epoch": 4.88264631043257, "grad_norm": 0.000505815483912832, "learning_rate": 1.6771320114883917e-08, "loss": 0.0285, "step": 239860 }, { "epoch": 4.882849872773537, "grad_norm": 0.01657308028887947, "learning_rate": 1.6713219741165686e-08, "loss": 0.0003, "step": 239870 }, { "epoch": 4.883053435114504, "grad_norm": 0.0018888902800166588, "learning_rate": 1.6655220011341788e-08, "loss": 0.0001, "step": 239880 }, { "epoch": 4.8832569974554705, "grad_norm": 0.0785269362140384, "learning_rate": 1.6597320926584613e-08, "loss": 0.0008, "step": 239890 }, { "epoch": 4.883460559796438, "grad_norm": 0.007772410986552758, "learning_rate": 1.6539522488062675e-08, "loss": 0.0037, "step": 239900 }, { "epoch": 4.883664122137405, "grad_norm": 10.8701255523225, "learning_rate": 1.648182469694448e-08, "loss": 0.0824, "step": 239910 }, { "epoch": 4.883867684478371, "grad_norm": 0.0041750110291171585, "learning_rate": 1.64242275543941e-08, "loss": 0.0063, "step": 239920 }, { "epoch": 4.884071246819339, "grad_norm": 0.017999273909470054, "learning_rate": 1.6366731061575047e-08, "loss": 0.0001, "step": 239930 }, { "epoch": 4.884274809160305, "grad_norm": 0.00596979246489695, "learning_rate": 1.630933521964917e-08, "loss": 0.0002, "step": 239940 }, { "epoch": 4.884478371501272, "grad_norm": 0.013840797053951895, "learning_rate": 1.625204002977443e-08, "loss": 0.0007, "step": 239950 }, { "epoch": 4.88468193384224, "grad_norm": 0.2581669835887205, "learning_rate": 1.6194845493109902e-08, "loss": 0.0395, "step": 239960 }, { "epoch": 4.884885496183206, "grad_norm": 0.0049931910009808735, "learning_rate": 1.613775161080855e-08, "loss": 0.0001, "step": 239970 }, { "epoch": 4.885089058524173, "grad_norm": 0.0010841397417107666, "learning_rate": 1.6080758384025008e-08, "loss": 0.0001, "step": 239980 }, { "epoch": 4.88529262086514, "grad_norm": 0.001566722018312769, "learning_rate": 1.6023865813910023e-08, "loss": 0.0022, "step": 239990 }, { "epoch": 4.885496183206107, "grad_norm": 7.641207297253599, "learning_rate": 1.596707390161156e-08, "loss": 0.0442, "step": 240000 }, { "epoch": 4.885699745547074, "grad_norm": 0.006722934772573971, "learning_rate": 1.5910382648278156e-08, "loss": 0.0163, "step": 240010 }, { "epoch": 4.885903307888041, "grad_norm": 0.013499978862519522, "learning_rate": 1.5853792055053883e-08, "loss": 0.0, "step": 240020 }, { "epoch": 4.886106870229008, "grad_norm": 0.005218818802493569, "learning_rate": 1.5797302123081725e-08, "loss": 0.0002, "step": 240030 }, { "epoch": 4.886310432569974, "grad_norm": 0.19594834209869702, "learning_rate": 1.5740912853502433e-08, "loss": 0.0002, "step": 240040 }, { "epoch": 4.886513994910942, "grad_norm": 0.025915028663757667, "learning_rate": 1.5684624247455094e-08, "loss": 0.0472, "step": 240050 }, { "epoch": 4.886717557251909, "grad_norm": 0.004592224343691849, "learning_rate": 1.5628436306077132e-08, "loss": 0.0013, "step": 240060 }, { "epoch": 4.886921119592875, "grad_norm": 0.000315321038673046, "learning_rate": 1.5572349030502087e-08, "loss": 0.0, "step": 240070 }, { "epoch": 4.887124681933842, "grad_norm": 8.151645340117419, "learning_rate": 1.5516362421863496e-08, "loss": 0.0086, "step": 240080 }, { "epoch": 4.887328244274809, "grad_norm": 0.016071835284060487, "learning_rate": 1.5460476481292118e-08, "loss": 0.0053, "step": 240090 }, { "epoch": 4.887531806615776, "grad_norm": 0.00148115915251606, "learning_rate": 1.5404691209915946e-08, "loss": 0.0301, "step": 240100 }, { "epoch": 4.887735368956743, "grad_norm": 0.015275341823609538, "learning_rate": 1.534900660886296e-08, "loss": 0.0099, "step": 240110 }, { "epoch": 4.88793893129771, "grad_norm": 0.020716991083696158, "learning_rate": 1.529342267925671e-08, "loss": 0.0003, "step": 240120 }, { "epoch": 4.888142493638677, "grad_norm": 0.006364319933826629, "learning_rate": 1.5237939422219627e-08, "loss": 0.0031, "step": 240130 }, { "epoch": 4.8883460559796434, "grad_norm": 0.06242629474963471, "learning_rate": 1.518255683887304e-08, "loss": 0.0022, "step": 240140 }, { "epoch": 4.888549618320611, "grad_norm": 0.002080850559882777, "learning_rate": 1.5127274930334947e-08, "loss": 0.0001, "step": 240150 }, { "epoch": 4.888753180661578, "grad_norm": 0.0020087995238810643, "learning_rate": 1.5072093697722222e-08, "loss": 0.0575, "step": 240160 }, { "epoch": 4.888956743002544, "grad_norm": 0.0009022957536980031, "learning_rate": 1.501701314214843e-08, "loss": 0.0002, "step": 240170 }, { "epoch": 4.889160305343512, "grad_norm": 0.04803691327059974, "learning_rate": 1.496203326472767e-08, "loss": 0.0008, "step": 240180 }, { "epoch": 4.889363867684478, "grad_norm": 0.011966500533909498, "learning_rate": 1.4907154066568506e-08, "loss": 0.0308, "step": 240190 }, { "epoch": 4.889567430025445, "grad_norm": 0.25488293187090677, "learning_rate": 1.4852375548780606e-08, "loss": 0.0138, "step": 240200 }, { "epoch": 4.8897709923664125, "grad_norm": 0.002327971277343273, "learning_rate": 1.479769771246975e-08, "loss": 0.0058, "step": 240210 }, { "epoch": 4.889974554707379, "grad_norm": 9.25864588847427, "learning_rate": 1.4743120558740054e-08, "loss": 0.0383, "step": 240220 }, { "epoch": 4.890178117048346, "grad_norm": 0.0012474426201369243, "learning_rate": 1.468864408869397e-08, "loss": 0.0002, "step": 240230 }, { "epoch": 4.890381679389313, "grad_norm": 0.013703087446091337, "learning_rate": 1.4634268303432287e-08, "loss": 0.0268, "step": 240240 }, { "epoch": 4.89058524173028, "grad_norm": 0.0268109667712944, "learning_rate": 1.4579993204051901e-08, "loss": 0.0286, "step": 240250 }, { "epoch": 4.890788804071247, "grad_norm": 0.015415618545754418, "learning_rate": 1.452581879165027e-08, "loss": 0.0012, "step": 240260 }, { "epoch": 4.890992366412213, "grad_norm": 0.12693782098670137, "learning_rate": 1.4471745067320963e-08, "loss": 0.0004, "step": 240270 }, { "epoch": 4.891195928753181, "grad_norm": 0.020406637859231514, "learning_rate": 1.4417772032155886e-08, "loss": 0.0001, "step": 240280 }, { "epoch": 4.891399491094147, "grad_norm": 0.00457042887094027, "learning_rate": 1.4363899687245275e-08, "loss": 0.0461, "step": 240290 }, { "epoch": 4.891603053435114, "grad_norm": 0.004011771079532541, "learning_rate": 1.4310128033677151e-08, "loss": 0.003, "step": 240300 }, { "epoch": 4.891806615776082, "grad_norm": 0.0001509361636864791, "learning_rate": 1.4256457072537866e-08, "loss": 0.0002, "step": 240310 }, { "epoch": 4.892010178117048, "grad_norm": 0.000888893127463678, "learning_rate": 1.4202886804910444e-08, "loss": 0.0016, "step": 240320 }, { "epoch": 4.892213740458015, "grad_norm": 0.0060196930122375205, "learning_rate": 1.4149417231877905e-08, "loss": 0.0122, "step": 240330 }, { "epoch": 4.892417302798982, "grad_norm": 0.008686822825771556, "learning_rate": 1.4096048354519388e-08, "loss": 0.0001, "step": 240340 }, { "epoch": 4.892620865139949, "grad_norm": 4.537773075482343, "learning_rate": 1.4042780173912363e-08, "loss": 0.0024, "step": 240350 }, { "epoch": 4.892824427480916, "grad_norm": 0.01289314059626508, "learning_rate": 1.3989612691134302e-08, "loss": 0.028, "step": 240360 }, { "epoch": 4.893027989821883, "grad_norm": 0.0021675446079022108, "learning_rate": 1.3936545907257127e-08, "loss": 0.0268, "step": 240370 }, { "epoch": 4.89323155216285, "grad_norm": 0.005855185481847725, "learning_rate": 1.3883579823353865e-08, "loss": 0.0001, "step": 240380 }, { "epoch": 4.893435114503816, "grad_norm": 0.05923887667838244, "learning_rate": 1.3830714440493664e-08, "loss": 0.0003, "step": 240390 }, { "epoch": 4.893638676844784, "grad_norm": 0.0007198961054442786, "learning_rate": 1.3777949759744557e-08, "loss": 0.0453, "step": 240400 }, { "epoch": 4.893842239185751, "grad_norm": 0.00046667099563860577, "learning_rate": 1.3725285782171805e-08, "loss": 0.0214, "step": 240410 }, { "epoch": 4.894045801526717, "grad_norm": 0.0029720046670066403, "learning_rate": 1.3672722508838998e-08, "loss": 0.014, "step": 240420 }, { "epoch": 4.894249363867685, "grad_norm": 0.016349436556946075, "learning_rate": 1.3620259940808068e-08, "loss": 0.0237, "step": 240430 }, { "epoch": 4.894452926208651, "grad_norm": 0.02166489580606454, "learning_rate": 1.3567898079138164e-08, "loss": 0.0012, "step": 240440 }, { "epoch": 4.894656488549618, "grad_norm": 3.3035315608432394, "learning_rate": 1.3515636924887332e-08, "loss": 0.0008, "step": 240450 }, { "epoch": 4.8948600508905855, "grad_norm": 3.9318994392208495, "learning_rate": 1.3463476479110837e-08, "loss": 0.0624, "step": 240460 }, { "epoch": 4.895063613231552, "grad_norm": 0.0009889874290779997, "learning_rate": 1.3411416742861727e-08, "loss": 0.0, "step": 240470 }, { "epoch": 4.895267175572519, "grad_norm": 0.0014106203197446615, "learning_rate": 1.3359457717191938e-08, "loss": 0.0001, "step": 240480 }, { "epoch": 4.895470737913486, "grad_norm": 0.007304598732888973, "learning_rate": 1.3307599403150629e-08, "loss": 0.0, "step": 240490 }, { "epoch": 4.895674300254453, "grad_norm": 0.001983408589854472, "learning_rate": 1.3255841801784742e-08, "loss": 0.0001, "step": 240500 }, { "epoch": 4.89587786259542, "grad_norm": 0.01253665308280579, "learning_rate": 1.320418491414066e-08, "loss": 0.0001, "step": 240510 }, { "epoch": 4.896081424936387, "grad_norm": 0.15364302251670212, "learning_rate": 1.3152628741260887e-08, "loss": 0.0002, "step": 240520 }, { "epoch": 4.896284987277354, "grad_norm": 0.009671407694596052, "learning_rate": 1.3101173284186252e-08, "loss": 0.0001, "step": 240530 }, { "epoch": 4.89648854961832, "grad_norm": 0.0030082162607094347, "learning_rate": 1.3049818543957037e-08, "loss": 0.0168, "step": 240540 }, { "epoch": 4.896692111959288, "grad_norm": 0.005668207780558409, "learning_rate": 1.2998564521609635e-08, "loss": 0.0383, "step": 240550 }, { "epoch": 4.8968956743002545, "grad_norm": 0.004313026340916001, "learning_rate": 1.294741121817933e-08, "loss": 0.0294, "step": 240560 }, { "epoch": 4.897099236641221, "grad_norm": 0.0005874317168957919, "learning_rate": 1.2896358634699736e-08, "loss": 0.0009, "step": 240570 }, { "epoch": 4.897302798982189, "grad_norm": 0.37489958738954743, "learning_rate": 1.284540677220114e-08, "loss": 0.0216, "step": 240580 }, { "epoch": 4.897506361323155, "grad_norm": 0.0074331196944803285, "learning_rate": 1.279455563171328e-08, "loss": 0.0014, "step": 240590 }, { "epoch": 4.897709923664122, "grad_norm": 0.002729995836155218, "learning_rate": 1.274380521426255e-08, "loss": 0.0002, "step": 240600 }, { "epoch": 4.8979134860050895, "grad_norm": 0.030292373499138138, "learning_rate": 1.2693155520874245e-08, "loss": 0.0001, "step": 240610 }, { "epoch": 4.898117048346056, "grad_norm": 0.009259695263980803, "learning_rate": 1.2642606552571435e-08, "loss": 0.0604, "step": 240620 }, { "epoch": 4.898320610687023, "grad_norm": 0.0973459455715708, "learning_rate": 1.259215831037497e-08, "loss": 0.0001, "step": 240630 }, { "epoch": 4.89852417302799, "grad_norm": 0.004872004393678697, "learning_rate": 1.2541810795303477e-08, "loss": 0.0001, "step": 240640 }, { "epoch": 4.898727735368957, "grad_norm": 0.023307250577795497, "learning_rate": 1.2491564008373923e-08, "loss": 0.0402, "step": 240650 }, { "epoch": 4.8989312977099235, "grad_norm": 0.017509211550960538, "learning_rate": 1.2441417950601053e-08, "loss": 0.0, "step": 240660 }, { "epoch": 4.899134860050891, "grad_norm": 0.002723233566099378, "learning_rate": 1.2391372622997389e-08, "loss": 0.0184, "step": 240670 }, { "epoch": 4.899338422391858, "grad_norm": 0.0010040327037876438, "learning_rate": 1.2341428026574342e-08, "loss": 0.0, "step": 240680 }, { "epoch": 4.899541984732824, "grad_norm": 0.04633846410786973, "learning_rate": 1.2291584162339999e-08, "loss": 0.0001, "step": 240690 }, { "epoch": 4.899745547073792, "grad_norm": 0.00217843596095389, "learning_rate": 1.2241841031301327e-08, "loss": 0.0015, "step": 240700 }, { "epoch": 4.8999491094147585, "grad_norm": 0.0015843592777271705, "learning_rate": 1.2192198634463082e-08, "loss": 0.0312, "step": 240710 }, { "epoch": 4.900152671755725, "grad_norm": 0.0034146501932789245, "learning_rate": 1.2142656972827237e-08, "loss": 0.0735, "step": 240720 }, { "epoch": 4.900356234096692, "grad_norm": 0.019677665773499272, "learning_rate": 1.2093216047394662e-08, "loss": 0.0002, "step": 240730 }, { "epoch": 4.900559796437659, "grad_norm": 0.0004961773948873686, "learning_rate": 1.2043875859164555e-08, "loss": 0.0001, "step": 240740 }, { "epoch": 4.900763358778626, "grad_norm": 0.11483424735537945, "learning_rate": 1.1994636409132232e-08, "loss": 0.0117, "step": 240750 }, { "epoch": 4.9009669211195925, "grad_norm": 7.848025527887043, "learning_rate": 1.1945497698293007e-08, "loss": 0.002, "step": 240760 }, { "epoch": 4.90117048346056, "grad_norm": 0.010145554559759754, "learning_rate": 1.1896459727638864e-08, "loss": 0.0607, "step": 240770 }, { "epoch": 4.901374045801527, "grad_norm": 0.00013138681039518528, "learning_rate": 1.1847522498160679e-08, "loss": 0.0, "step": 240780 }, { "epoch": 4.901577608142493, "grad_norm": 0.1360975326794089, "learning_rate": 1.1798686010845995e-08, "loss": 0.0005, "step": 240790 }, { "epoch": 4.901781170483461, "grad_norm": 0.01496022943270945, "learning_rate": 1.1749950266681797e-08, "loss": 0.0123, "step": 240800 }, { "epoch": 4.9019847328244275, "grad_norm": 0.0011428584535646542, "learning_rate": 1.1701315266652303e-08, "loss": 0.0005, "step": 240810 }, { "epoch": 4.902188295165394, "grad_norm": 0.004165303300568298, "learning_rate": 1.1652781011739501e-08, "loss": 0.0003, "step": 240820 }, { "epoch": 4.902391857506362, "grad_norm": 0.004361795066690929, "learning_rate": 1.1604347502923719e-08, "loss": 0.0, "step": 240830 }, { "epoch": 4.902595419847328, "grad_norm": 0.0037899494223840235, "learning_rate": 1.155601474118362e-08, "loss": 0.0267, "step": 240840 }, { "epoch": 4.902798982188295, "grad_norm": 0.0017713427149652697, "learning_rate": 1.1507782727493976e-08, "loss": 0.0004, "step": 240850 }, { "epoch": 4.903002544529262, "grad_norm": 0.00954472420027744, "learning_rate": 1.1459651462830678e-08, "loss": 0.0004, "step": 240860 }, { "epoch": 4.903206106870229, "grad_norm": 0.06092908048635198, "learning_rate": 1.141162094816406e-08, "loss": 0.0002, "step": 240870 }, { "epoch": 4.903409669211196, "grad_norm": 0.202008288930681, "learning_rate": 1.1363691184465564e-08, "loss": 0.0004, "step": 240880 }, { "epoch": 4.903613231552163, "grad_norm": 0.005341226817921628, "learning_rate": 1.1315862172702751e-08, "loss": 0.0002, "step": 240890 }, { "epoch": 4.90381679389313, "grad_norm": 0.021485324305088038, "learning_rate": 1.1268133913840962e-08, "loss": 0.0135, "step": 240900 }, { "epoch": 4.9040203562340965, "grad_norm": 0.0037329270845290414, "learning_rate": 1.1220506408845533e-08, "loss": 0.0204, "step": 240910 }, { "epoch": 4.904223918575063, "grad_norm": 0.004955289521735269, "learning_rate": 1.1172979658676808e-08, "loss": 0.0026, "step": 240920 }, { "epoch": 4.904427480916031, "grad_norm": 0.005080506038886315, "learning_rate": 1.1125553664295131e-08, "loss": 0.0, "step": 240930 }, { "epoch": 4.904631043256997, "grad_norm": 0.02659745436323296, "learning_rate": 1.1078228426658622e-08, "loss": 0.0139, "step": 240940 }, { "epoch": 4.904834605597964, "grad_norm": 0.001266134807633481, "learning_rate": 1.1031003946723184e-08, "loss": 0.0239, "step": 240950 }, { "epoch": 4.905038167938931, "grad_norm": 0.0004223598727297789, "learning_rate": 1.09838802254425e-08, "loss": 0.0018, "step": 240960 }, { "epoch": 4.905241730279898, "grad_norm": 0.05645419525399036, "learning_rate": 1.093685726376803e-08, "loss": 0.0048, "step": 240970 }, { "epoch": 4.905445292620865, "grad_norm": 0.07606696556356533, "learning_rate": 1.088993506264957e-08, "loss": 0.0001, "step": 240980 }, { "epoch": 4.905648854961832, "grad_norm": 0.0015706024635253504, "learning_rate": 1.0843113623034695e-08, "loss": 0.0537, "step": 240990 }, { "epoch": 4.905852417302799, "grad_norm": 0.006366173270324845, "learning_rate": 1.0796392945869316e-08, "loss": 0.0989, "step": 241000 }, { "epoch": 4.9060559796437655, "grad_norm": 0.002865596453898501, "learning_rate": 1.0749773032096567e-08, "loss": 0.037, "step": 241010 }, { "epoch": 4.906259541984733, "grad_norm": 0.0011600041492394954, "learning_rate": 1.0703253882658471e-08, "loss": 0.0005, "step": 241020 }, { "epoch": 4.9064631043257, "grad_norm": 0.029114875346498724, "learning_rate": 1.0656835498494278e-08, "loss": 0.0002, "step": 241030 }, { "epoch": 4.906666666666666, "grad_norm": 0.0004896467799087428, "learning_rate": 1.061051788054157e-08, "loss": 0.0297, "step": 241040 }, { "epoch": 4.906870229007634, "grad_norm": 0.012571389846444828, "learning_rate": 1.0564301029735713e-08, "loss": 0.0019, "step": 241050 }, { "epoch": 4.9070737913486004, "grad_norm": 0.10605992692093306, "learning_rate": 1.0518184947009847e-08, "loss": 0.0301, "step": 241060 }, { "epoch": 4.907277353689567, "grad_norm": 0.004634109852105673, "learning_rate": 1.0472169633296004e-08, "loss": 0.0747, "step": 241070 }, { "epoch": 4.907480916030535, "grad_norm": 0.006612607960263821, "learning_rate": 1.0426255089522886e-08, "loss": 0.004, "step": 241080 }, { "epoch": 4.907684478371501, "grad_norm": 0.007930214501523888, "learning_rate": 1.0380441316618084e-08, "loss": 0.0457, "step": 241090 }, { "epoch": 4.907888040712468, "grad_norm": 0.6163794773496812, "learning_rate": 1.0334728315507525e-08, "loss": 0.0001, "step": 241100 }, { "epoch": 4.908091603053435, "grad_norm": 70.50712906512999, "learning_rate": 1.0289116087113248e-08, "loss": 0.048, "step": 241110 }, { "epoch": 4.908295165394402, "grad_norm": 0.311130589462016, "learning_rate": 1.024360463235674e-08, "loss": 0.0158, "step": 241120 }, { "epoch": 4.908498727735369, "grad_norm": 0.007144024873365484, "learning_rate": 1.019819395215782e-08, "loss": 0.0678, "step": 241130 }, { "epoch": 4.908702290076336, "grad_norm": 0.014499554414751823, "learning_rate": 1.0152884047432976e-08, "loss": 0.0002, "step": 241140 }, { "epoch": 4.908905852417303, "grad_norm": 0.007876771175672648, "learning_rate": 1.0107674919097588e-08, "loss": 0.0004, "step": 241150 }, { "epoch": 4.9091094147582695, "grad_norm": 0.03549499765412686, "learning_rate": 1.0062566568064259e-08, "loss": 0.0001, "step": 241160 }, { "epoch": 4.909312977099237, "grad_norm": 0.02520342829723427, "learning_rate": 1.0017558995245036e-08, "loss": 0.0653, "step": 241170 }, { "epoch": 4.909516539440204, "grad_norm": 0.5267179417301937, "learning_rate": 9.972652201547529e-09, "loss": 0.0005, "step": 241180 }, { "epoch": 4.90972010178117, "grad_norm": 0.0017019498022693234, "learning_rate": 9.927846187879897e-09, "loss": 0.0007, "step": 241190 }, { "epoch": 4.909923664122138, "grad_norm": 0.08047890012573954, "learning_rate": 9.88314095514642e-09, "loss": 0.0002, "step": 241200 }, { "epoch": 4.910127226463104, "grad_norm": 0.0032925540355819567, "learning_rate": 9.838536504250262e-09, "loss": 0.0003, "step": 241210 }, { "epoch": 4.910330788804071, "grad_norm": 0.009785846649910801, "learning_rate": 9.79403283609237e-09, "loss": 0.0003, "step": 241220 }, { "epoch": 4.910534351145039, "grad_norm": 0.023200007337512995, "learning_rate": 9.749629951570915e-09, "loss": 0.0003, "step": 241230 }, { "epoch": 4.910737913486005, "grad_norm": 0.0957834442410398, "learning_rate": 9.705327851582957e-09, "loss": 0.062, "step": 241240 }, { "epoch": 4.910941475826972, "grad_norm": 0.1343323052420777, "learning_rate": 9.66112653702389e-09, "loss": 0.0269, "step": 241250 }, { "epoch": 4.911145038167939, "grad_norm": 0.031428861990767665, "learning_rate": 9.617026008785779e-09, "loss": 0.0012, "step": 241260 }, { "epoch": 4.911348600508906, "grad_norm": 7.377983538471279, "learning_rate": 9.573026267759022e-09, "loss": 0.0199, "step": 241270 }, { "epoch": 4.911552162849873, "grad_norm": 0.23422913812029464, "learning_rate": 9.529127314832908e-09, "loss": 0.0607, "step": 241280 }, { "epoch": 4.91175572519084, "grad_norm": 0.005875325654762854, "learning_rate": 9.485329150893397e-09, "loss": 0.0003, "step": 241290 }, { "epoch": 4.911959287531807, "grad_norm": 0.0013932392221948156, "learning_rate": 9.441631776825888e-09, "loss": 0.0134, "step": 241300 }, { "epoch": 4.912162849872773, "grad_norm": 0.015794063186083285, "learning_rate": 9.3980351935119e-09, "loss": 0.066, "step": 241310 }, { "epoch": 4.912366412213741, "grad_norm": 0.011425034033624267, "learning_rate": 9.354539401832952e-09, "loss": 0.0003, "step": 241320 }, { "epoch": 4.912569974554708, "grad_norm": 0.007595312228175924, "learning_rate": 9.311144402666673e-09, "loss": 0.0118, "step": 241330 }, { "epoch": 4.912773536895674, "grad_norm": 0.009327573451747822, "learning_rate": 9.267850196890138e-09, "loss": 0.0001, "step": 241340 }, { "epoch": 4.912977099236642, "grad_norm": 0.004332643236220225, "learning_rate": 9.224656785377096e-09, "loss": 0.0002, "step": 241350 }, { "epoch": 4.913180661577608, "grad_norm": 0.0094259010662914, "learning_rate": 9.181564169000178e-09, "loss": 0.0295, "step": 241360 }, { "epoch": 4.913384223918575, "grad_norm": 0.0021445494549803337, "learning_rate": 9.13857234862925e-09, "loss": 0.0001, "step": 241370 }, { "epoch": 4.913587786259542, "grad_norm": 0.018997343694714446, "learning_rate": 9.09568132513361e-09, "loss": 0.0, "step": 241380 }, { "epoch": 4.913791348600509, "grad_norm": 0.09847630582081743, "learning_rate": 9.05289109937868e-09, "loss": 0.0078, "step": 241390 }, { "epoch": 4.913994910941476, "grad_norm": 0.005237960482783359, "learning_rate": 9.010201672229324e-09, "loss": 0.0001, "step": 241400 }, { "epoch": 4.914198473282442, "grad_norm": 0.013154794650975066, "learning_rate": 8.967613044547074e-09, "loss": 0.0002, "step": 241410 }, { "epoch": 4.91440203562341, "grad_norm": 0.002504330917382719, "learning_rate": 8.925125217191798e-09, "loss": 0.0002, "step": 241420 }, { "epoch": 4.914605597964377, "grad_norm": 8.705821047142123, "learning_rate": 8.88273819102281e-09, "loss": 0.0064, "step": 241430 }, { "epoch": 4.914809160305343, "grad_norm": 4.698362259087465, "learning_rate": 8.840451966894981e-09, "loss": 0.0025, "step": 241440 }, { "epoch": 4.915012722646311, "grad_norm": 0.001635761829985626, "learning_rate": 8.798266545663182e-09, "loss": 0.0323, "step": 241450 }, { "epoch": 4.915216284987277, "grad_norm": 0.3288633040457871, "learning_rate": 8.756181928178398e-09, "loss": 0.0002, "step": 241460 }, { "epoch": 4.915419847328244, "grad_norm": 0.0009140802412320949, "learning_rate": 8.714198115291616e-09, "loss": 0.0179, "step": 241470 }, { "epoch": 4.9156234096692115, "grad_norm": 0.06689369116490634, "learning_rate": 8.672315107850493e-09, "loss": 0.0324, "step": 241480 }, { "epoch": 4.915826972010178, "grad_norm": 0.0013381547105500002, "learning_rate": 8.630532906700462e-09, "loss": 0.0001, "step": 241490 }, { "epoch": 4.916030534351145, "grad_norm": 0.02562841080595907, "learning_rate": 8.588851512685848e-09, "loss": 0.0, "step": 241500 }, { "epoch": 4.916234096692112, "grad_norm": 0.0011383447734519736, "learning_rate": 8.547270926647644e-09, "loss": 0.0, "step": 241510 }, { "epoch": 4.916437659033079, "grad_norm": 0.4510867936311854, "learning_rate": 8.505791149426846e-09, "loss": 0.0673, "step": 241520 }, { "epoch": 4.916641221374046, "grad_norm": 0.005527681629941976, "learning_rate": 8.464412181860559e-09, "loss": 0.0001, "step": 241530 }, { "epoch": 4.916844783715013, "grad_norm": 0.005713669547149819, "learning_rate": 8.423134024784785e-09, "loss": 0.0185, "step": 241540 }, { "epoch": 4.91704834605598, "grad_norm": 0.004828530837791087, "learning_rate": 8.381956679032189e-09, "loss": 0.0002, "step": 241550 }, { "epoch": 4.917251908396946, "grad_norm": 0.0026795513858257477, "learning_rate": 8.340880145435438e-09, "loss": 0.0, "step": 241560 }, { "epoch": 4.917455470737913, "grad_norm": 0.021385054807997936, "learning_rate": 8.299904424823868e-09, "loss": 0.016, "step": 241570 }, { "epoch": 4.9176590330788805, "grad_norm": 0.02643826777557272, "learning_rate": 8.259029518025151e-09, "loss": 0.0049, "step": 241580 }, { "epoch": 4.917862595419847, "grad_norm": 0.00041206075558048003, "learning_rate": 8.21825542586474e-09, "loss": 0.0001, "step": 241590 }, { "epoch": 4.918066157760814, "grad_norm": 0.02372070716505942, "learning_rate": 8.177582149165863e-09, "loss": 0.0022, "step": 241600 }, { "epoch": 4.918269720101781, "grad_norm": 2.162089449304985e-05, "learning_rate": 8.137009688750086e-09, "loss": 0.0001, "step": 241610 }, { "epoch": 4.918473282442748, "grad_norm": 0.002960349188257428, "learning_rate": 8.096538045436753e-09, "loss": 0.0385, "step": 241620 }, { "epoch": 4.918676844783715, "grad_norm": 0.03256448150490012, "learning_rate": 8.056167220043542e-09, "loss": 0.0124, "step": 241630 }, { "epoch": 4.918880407124682, "grad_norm": 0.4398249209409418, "learning_rate": 8.01589721338536e-09, "loss": 0.0006, "step": 241640 }, { "epoch": 4.919083969465649, "grad_norm": 4.28551528156534e-08, "learning_rate": 7.975728026275997e-09, "loss": 0.0216, "step": 241650 }, { "epoch": 4.919287531806615, "grad_norm": 0.005898408437467981, "learning_rate": 7.935659659526473e-09, "loss": 0.0003, "step": 241660 }, { "epoch": 4.919491094147583, "grad_norm": 0.002000740478073853, "learning_rate": 7.89569211394614e-09, "loss": 0.0, "step": 241670 }, { "epoch": 4.9196946564885495, "grad_norm": 0.0025748359369872956, "learning_rate": 7.855825390341576e-09, "loss": 0.0001, "step": 241680 }, { "epoch": 4.919898218829516, "grad_norm": 0.007954268008088839, "learning_rate": 7.816059489518802e-09, "loss": 0.0005, "step": 241690 }, { "epoch": 4.920101781170484, "grad_norm": 3.596548047446907, "learning_rate": 7.776394412280508e-09, "loss": 0.0272, "step": 241700 }, { "epoch": 4.92030534351145, "grad_norm": 0.0011732474323536393, "learning_rate": 7.736830159428277e-09, "loss": 0.0, "step": 241710 }, { "epoch": 4.920508905852417, "grad_norm": 0.03378013649966975, "learning_rate": 7.697366731760358e-09, "loss": 0.0, "step": 241720 }, { "epoch": 4.9207124681933845, "grad_norm": 0.000368228924465241, "learning_rate": 7.658004130074449e-09, "loss": 0.0, "step": 241730 }, { "epoch": 4.920916030534351, "grad_norm": 0.022764240493734784, "learning_rate": 7.618742355164909e-09, "loss": 0.0001, "step": 241740 }, { "epoch": 4.921119592875318, "grad_norm": 0.003504591088561955, "learning_rate": 7.579581407825e-09, "loss": 0.0001, "step": 241750 }, { "epoch": 4.921323155216285, "grad_norm": 0.03164533655432823, "learning_rate": 7.540521288845748e-09, "loss": 0.0003, "step": 241760 }, { "epoch": 4.921526717557252, "grad_norm": 0.016826366644997325, "learning_rate": 7.501561999015972e-09, "loss": 0.0001, "step": 241770 }, { "epoch": 4.9217302798982185, "grad_norm": 0.0038853519269430466, "learning_rate": 7.462703539122262e-09, "loss": 0.0048, "step": 241780 }, { "epoch": 4.921933842239186, "grad_norm": 0.005033417519032391, "learning_rate": 7.423945909949548e-09, "loss": 0.0001, "step": 241790 }, { "epoch": 4.922137404580153, "grad_norm": 0.06550221844439953, "learning_rate": 7.385289112281091e-09, "loss": 0.0006, "step": 241800 }, { "epoch": 4.922340966921119, "grad_norm": 0.02627208544915735, "learning_rate": 7.3467331468968225e-09, "loss": 0.0008, "step": 241810 }, { "epoch": 4.922544529262087, "grad_norm": 0.07373287572135892, "learning_rate": 7.308278014575565e-09, "loss": 0.0312, "step": 241820 }, { "epoch": 4.9227480916030535, "grad_norm": 0.003111028985401963, "learning_rate": 7.2699237160944734e-09, "loss": 0.0001, "step": 241830 }, { "epoch": 4.92295165394402, "grad_norm": 0.1208312106031363, "learning_rate": 7.231670252227929e-09, "loss": 0.0001, "step": 241840 }, { "epoch": 4.923155216284988, "grad_norm": 0.0308438990099361, "learning_rate": 7.193517623748092e-09, "loss": 0.0016, "step": 241850 }, { "epoch": 4.923358778625954, "grad_norm": 0.007273143701425303, "learning_rate": 7.1554658314260115e-09, "loss": 0.0001, "step": 241860 }, { "epoch": 4.923562340966921, "grad_norm": 0.00104909821840081, "learning_rate": 7.117514876030518e-09, "loss": 0.0177, "step": 241870 }, { "epoch": 4.923765903307888, "grad_norm": 0.005726171136457649, "learning_rate": 7.079664758327109e-09, "loss": 0.0002, "step": 241880 }, { "epoch": 4.923969465648855, "grad_norm": 0.14259102637917187, "learning_rate": 7.0419154790807295e-09, "loss": 0.0005, "step": 241890 }, { "epoch": 4.924173027989822, "grad_norm": 0.00024174707463341796, "learning_rate": 7.004267039053547e-09, "loss": 0.0001, "step": 241900 }, { "epoch": 4.924376590330789, "grad_norm": 0.008018435445562194, "learning_rate": 6.9667194390066195e-09, "loss": 0.0002, "step": 241910 }, { "epoch": 4.924580152671756, "grad_norm": 0.01428024820446649, "learning_rate": 6.929272679697119e-09, "loss": 0.0002, "step": 241920 }, { "epoch": 4.9247837150127225, "grad_norm": 6.222146708062297e-05, "learning_rate": 6.8919267618827725e-09, "loss": 0.0005, "step": 241930 }, { "epoch": 4.92498727735369, "grad_norm": 0.004222697923723434, "learning_rate": 6.854681686316311e-09, "loss": 0.0005, "step": 241940 }, { "epoch": 4.925190839694657, "grad_norm": 0.008587364304747517, "learning_rate": 6.817537453751022e-09, "loss": 0.0002, "step": 241950 }, { "epoch": 4.925394402035623, "grad_norm": 0.00019587531508108511, "learning_rate": 6.78049406493686e-09, "loss": 0.0335, "step": 241960 }, { "epoch": 4.925597964376591, "grad_norm": 0.01713664075222434, "learning_rate": 6.7435515206215606e-09, "loss": 0.0381, "step": 241970 }, { "epoch": 4.925801526717557, "grad_norm": 0.006645870044789709, "learning_rate": 6.706709821551749e-09, "loss": 0.0001, "step": 241980 }, { "epoch": 4.926005089058524, "grad_norm": 0.004576616050506622, "learning_rate": 6.669968968471274e-09, "loss": 0.0265, "step": 241990 }, { "epoch": 4.926208651399491, "grad_norm": 0.9805573895526689, "learning_rate": 6.633328962121766e-09, "loss": 0.0003, "step": 242000 }, { "epoch": 4.926412213740458, "grad_norm": 5.012057418981072, "learning_rate": 6.596789803243742e-09, "loss": 0.0297, "step": 242010 }, { "epoch": 4.926615776081425, "grad_norm": 0.0010304776551525478, "learning_rate": 6.560351492575501e-09, "loss": 0.0403, "step": 242020 }, { "epoch": 4.9268193384223915, "grad_norm": 0.010258426690968969, "learning_rate": 6.524014030852011e-09, "loss": 0.0013, "step": 242030 }, { "epoch": 4.927022900763359, "grad_norm": 8.157676436433242, "learning_rate": 6.4877774188076835e-09, "loss": 0.0267, "step": 242040 }, { "epoch": 4.927226463104326, "grad_norm": 0.0032506625909721693, "learning_rate": 6.451641657174157e-09, "loss": 0.0001, "step": 242050 }, { "epoch": 4.927430025445292, "grad_norm": 0.000513246236713509, "learning_rate": 6.415606746681402e-09, "loss": 0.0, "step": 242060 }, { "epoch": 4.92763358778626, "grad_norm": 15.965503277551264, "learning_rate": 6.37967268805717e-09, "loss": 0.018, "step": 242070 }, { "epoch": 4.9278371501272265, "grad_norm": 0.003455382512187561, "learning_rate": 6.343839482027547e-09, "loss": 0.0007, "step": 242080 }, { "epoch": 4.928040712468193, "grad_norm": 0.0015383801618670456, "learning_rate": 6.308107129315289e-09, "loss": 0.0001, "step": 242090 }, { "epoch": 4.928244274809161, "grad_norm": 0.01511961404321167, "learning_rate": 6.272475630643149e-09, "loss": 0.0083, "step": 242100 }, { "epoch": 4.928447837150127, "grad_norm": 0.002035527865828948, "learning_rate": 6.236944986729998e-09, "loss": 0.0001, "step": 242110 }, { "epoch": 4.928651399491094, "grad_norm": 0.002117858028182782, "learning_rate": 6.201515198293595e-09, "loss": 0.0017, "step": 242120 }, { "epoch": 4.928854961832061, "grad_norm": 0.004970881359717702, "learning_rate": 6.166186266049479e-09, "loss": 0.0379, "step": 242130 }, { "epoch": 4.929058524173028, "grad_norm": 0.03579745389268214, "learning_rate": 6.130958190711522e-09, "loss": 0.0052, "step": 242140 }, { "epoch": 4.929262086513995, "grad_norm": 0.008096092207740773, "learning_rate": 6.095830972990269e-09, "loss": 0.0317, "step": 242150 }, { "epoch": 4.929465648854962, "grad_norm": 0.007914909030645522, "learning_rate": 6.0608046135962604e-09, "loss": 0.0071, "step": 242160 }, { "epoch": 4.929669211195929, "grad_norm": 0.009538552084268459, "learning_rate": 6.0258791132367104e-09, "loss": 0.0415, "step": 242170 }, { "epoch": 4.9298727735368955, "grad_norm": 0.025545559460136747, "learning_rate": 5.991054472616609e-09, "loss": 0.0002, "step": 242180 }, { "epoch": 4.930076335877863, "grad_norm": 0.003784091289012829, "learning_rate": 5.956330692439283e-09, "loss": 0.004, "step": 242190 }, { "epoch": 4.93027989821883, "grad_norm": 0.03101289425148556, "learning_rate": 5.9217077734058384e-09, "loss": 0.0331, "step": 242200 }, { "epoch": 4.930483460559796, "grad_norm": 0.010844584298337427, "learning_rate": 5.88718571621627e-09, "loss": 0.0033, "step": 242210 }, { "epoch": 4.930687022900763, "grad_norm": 0.005640069173682525, "learning_rate": 5.8527645215672445e-09, "loss": 0.0009, "step": 242220 }, { "epoch": 4.93089058524173, "grad_norm": 0.03141925600981, "learning_rate": 5.818444190154315e-09, "loss": 0.0004, "step": 242230 }, { "epoch": 4.931094147582697, "grad_norm": 0.002358444906714607, "learning_rate": 5.784224722670262e-09, "loss": 0.0015, "step": 242240 }, { "epoch": 4.931297709923664, "grad_norm": 0.0002037565919044095, "learning_rate": 5.750106119805643e-09, "loss": 0.0001, "step": 242250 }, { "epoch": 4.931501272264631, "grad_norm": 0.15956590395872416, "learning_rate": 5.7160883822510174e-09, "loss": 0.0017, "step": 242260 }, { "epoch": 4.931704834605598, "grad_norm": 0.15201441048313227, "learning_rate": 5.682171510691947e-09, "loss": 0.0002, "step": 242270 }, { "epoch": 4.9319083969465645, "grad_norm": 0.03023965928897673, "learning_rate": 5.6483555058145515e-09, "loss": 0.0008, "step": 242280 }, { "epoch": 4.932111959287532, "grad_norm": 6.020393154418333, "learning_rate": 5.6146403683010605e-09, "loss": 0.0013, "step": 242290 }, { "epoch": 4.932315521628499, "grad_norm": 9.884828442542652, "learning_rate": 5.581026098832598e-09, "loss": 0.078, "step": 242300 }, { "epoch": 4.932519083969465, "grad_norm": 10.610844011708949, "learning_rate": 5.547512698088064e-09, "loss": 0.079, "step": 242310 }, { "epoch": 4.932722646310433, "grad_norm": 0.007597290056622742, "learning_rate": 5.514100166744141e-09, "loss": 0.0053, "step": 242320 }, { "epoch": 4.932926208651399, "grad_norm": 0.007319784546454684, "learning_rate": 5.480788505476397e-09, "loss": 0.0002, "step": 242330 }, { "epoch": 4.933129770992366, "grad_norm": 0.017169291285934783, "learning_rate": 5.44757771495652e-09, "loss": 0.0423, "step": 242340 }, { "epoch": 4.933333333333334, "grad_norm": 0.02523417962651083, "learning_rate": 5.414467795855638e-09, "loss": 0.0002, "step": 242350 }, { "epoch": 4.9335368956743, "grad_norm": 0.12331913558524778, "learning_rate": 5.38145874884266e-09, "loss": 0.0219, "step": 242360 }, { "epoch": 4.933740458015267, "grad_norm": 0.02424177407292405, "learning_rate": 5.348550574584277e-09, "loss": 0.003, "step": 242370 }, { "epoch": 4.933944020356234, "grad_norm": 0.01188344885058321, "learning_rate": 5.31574327374551e-09, "loss": 0.0004, "step": 242380 }, { "epoch": 4.934147582697201, "grad_norm": 0.16026259431403186, "learning_rate": 5.2830368469874994e-09, "loss": 0.0002, "step": 242390 }, { "epoch": 4.934351145038168, "grad_norm": 0.0006038560752003252, "learning_rate": 5.250431294972491e-09, "loss": 0.0234, "step": 242400 }, { "epoch": 4.934554707379135, "grad_norm": 0.005490742376304241, "learning_rate": 5.217926618357738e-09, "loss": 0.0007, "step": 242410 }, { "epoch": 4.934758269720102, "grad_norm": 0.001675181142610647, "learning_rate": 5.185522817799938e-09, "loss": 0.0287, "step": 242420 }, { "epoch": 4.934961832061068, "grad_norm": 0.05115717553204782, "learning_rate": 5.15321989395412e-09, "loss": 0.0092, "step": 242430 }, { "epoch": 4.935165394402036, "grad_norm": 0.0035014661997538605, "learning_rate": 5.121017847472542e-09, "loss": 0.0001, "step": 242440 }, { "epoch": 4.935368956743003, "grad_norm": 0.05104132266411159, "learning_rate": 5.08891667900524e-09, "loss": 0.0135, "step": 242450 }, { "epoch": 4.935572519083969, "grad_norm": 0.5770678637737741, "learning_rate": 5.056916389200583e-09, "loss": 0.0263, "step": 242460 }, { "epoch": 4.935776081424937, "grad_norm": 0.05057723673290252, "learning_rate": 5.025016978704722e-09, "loss": 0.0001, "step": 242470 }, { "epoch": 4.935979643765903, "grad_norm": 0.003886184048873108, "learning_rate": 4.99321844816214e-09, "loss": 0.0116, "step": 242480 }, { "epoch": 4.93618320610687, "grad_norm": 0.018027230422110574, "learning_rate": 4.9615207982156576e-09, "loss": 0.0001, "step": 242490 }, { "epoch": 4.9363867684478375, "grad_norm": 7.761428022979726e-05, "learning_rate": 4.929924029504207e-09, "loss": 0.0001, "step": 242500 }, { "epoch": 4.936590330788804, "grad_norm": 0.0004371518215317679, "learning_rate": 4.898428142666722e-09, "loss": 0.0183, "step": 242510 }, { "epoch": 4.936793893129771, "grad_norm": 0.01623678649958911, "learning_rate": 4.867033138339361e-09, "loss": 0.0002, "step": 242520 }, { "epoch": 4.936997455470738, "grad_norm": 0.017174275629490074, "learning_rate": 4.835739017155505e-09, "loss": 0.0, "step": 242530 }, { "epoch": 4.937201017811705, "grad_norm": 0.2276534544366311, "learning_rate": 4.804545779747982e-09, "loss": 0.0003, "step": 242540 }, { "epoch": 4.937404580152672, "grad_norm": 0.020385491716543618, "learning_rate": 4.7734534267462884e-09, "loss": 0.0001, "step": 242550 }, { "epoch": 4.937608142493639, "grad_norm": 0.01748297740154028, "learning_rate": 4.742461958778255e-09, "loss": 0.0004, "step": 242560 }, { "epoch": 4.937811704834606, "grad_norm": 0.0006424080874228353, "learning_rate": 4.711571376470048e-09, "loss": 0.0737, "step": 242570 }, { "epoch": 4.938015267175572, "grad_norm": 0.034619864452365486, "learning_rate": 4.680781680446167e-09, "loss": 0.0001, "step": 242580 }, { "epoch": 4.93821882951654, "grad_norm": 3.4603200634640155e-05, "learning_rate": 4.650092871327228e-09, "loss": 0.013, "step": 242590 }, { "epoch": 4.9384223918575065, "grad_norm": 0.0007932770529094226, "learning_rate": 4.619504949733844e-09, "loss": 0.0311, "step": 242600 }, { "epoch": 4.938625954198473, "grad_norm": 0.0015933769021405726, "learning_rate": 4.589017916283301e-09, "loss": 0.0291, "step": 242610 }, { "epoch": 4.938829516539441, "grad_norm": 0.001287050943329446, "learning_rate": 4.558631771592325e-09, "loss": 0.0403, "step": 242620 }, { "epoch": 4.939033078880407, "grad_norm": 0.013611901130979721, "learning_rate": 4.5283465162732074e-09, "loss": 0.0389, "step": 242630 }, { "epoch": 4.939236641221374, "grad_norm": 0.0006250913812377299, "learning_rate": 4.498162150938234e-09, "loss": 0.0503, "step": 242640 }, { "epoch": 4.939440203562341, "grad_norm": 4.011274676063678, "learning_rate": 4.4680786761974735e-09, "loss": 0.0194, "step": 242650 }, { "epoch": 4.939643765903308, "grad_norm": 0.0013076065966278663, "learning_rate": 4.438096092657662e-09, "loss": 0.0211, "step": 242660 }, { "epoch": 4.939847328244275, "grad_norm": 0.0007835065283114557, "learning_rate": 4.408214400924982e-09, "loss": 0.0444, "step": 242670 }, { "epoch": 4.940050890585241, "grad_norm": 0.045063384408690454, "learning_rate": 4.378433601602283e-09, "loss": 0.0008, "step": 242680 }, { "epoch": 4.940254452926209, "grad_norm": 0.002725630755838194, "learning_rate": 4.348753695291863e-09, "loss": 0.0291, "step": 242690 }, { "epoch": 4.9404580152671755, "grad_norm": 0.0022305208615085352, "learning_rate": 4.319174682592686e-09, "loss": 0.0046, "step": 242700 }, { "epoch": 4.940661577608142, "grad_norm": 0.009098157992535553, "learning_rate": 4.289696564102053e-09, "loss": 0.0236, "step": 242710 }, { "epoch": 4.94086513994911, "grad_norm": 0.001862318263354785, "learning_rate": 4.260319340415042e-09, "loss": 0.0001, "step": 242720 }, { "epoch": 4.941068702290076, "grad_norm": 0.0019556954238333694, "learning_rate": 4.2310430121261795e-09, "loss": 0.0004, "step": 242730 }, { "epoch": 4.941272264631043, "grad_norm": 0.004286810009839077, "learning_rate": 4.201867579824992e-09, "loss": 0.0136, "step": 242740 }, { "epoch": 4.9414758269720105, "grad_norm": 0.001199860030919828, "learning_rate": 4.172793044102119e-09, "loss": 0.0001, "step": 242750 }, { "epoch": 4.941679389312977, "grad_norm": 0.02580802447415783, "learning_rate": 4.143819405544314e-09, "loss": 0.0914, "step": 242760 }, { "epoch": 4.941882951653944, "grad_norm": 0.20673818869088118, "learning_rate": 4.114946664736663e-09, "loss": 0.0193, "step": 242770 }, { "epoch": 4.942086513994911, "grad_norm": 0.0036449534994451925, "learning_rate": 4.086174822262035e-09, "loss": 0.0, "step": 242780 }, { "epoch": 4.942290076335878, "grad_norm": 0.006280800356408552, "learning_rate": 4.057503878702185e-09, "loss": 0.0008, "step": 242790 }, { "epoch": 4.9424936386768445, "grad_norm": 5.296666883673336, "learning_rate": 4.02893383463554e-09, "loss": 0.0559, "step": 242800 }, { "epoch": 4.942697201017812, "grad_norm": 0.09411685152769776, "learning_rate": 4.000464690639416e-09, "loss": 0.0007, "step": 242810 }, { "epoch": 4.942900763358779, "grad_norm": 0.04881923558560889, "learning_rate": 3.972096447288909e-09, "loss": 0.0003, "step": 242820 }, { "epoch": 4.943104325699745, "grad_norm": 0.0010922675284297443, "learning_rate": 3.9438291051563384e-09, "loss": 0.03, "step": 242830 }, { "epoch": 4.943307888040712, "grad_norm": 0.006989271515908952, "learning_rate": 3.915662664813469e-09, "loss": 0.0004, "step": 242840 }, { "epoch": 4.9435114503816795, "grad_norm": 0.0023870350124820636, "learning_rate": 3.88759712682818e-09, "loss": 0.0759, "step": 242850 }, { "epoch": 4.943715012722646, "grad_norm": 28.794783833715716, "learning_rate": 3.85963249176835e-09, "loss": 0.032, "step": 242860 }, { "epoch": 4.943918575063613, "grad_norm": 0.0028537455637573836, "learning_rate": 3.831768760197974e-09, "loss": 0.0169, "step": 242870 }, { "epoch": 4.94412213740458, "grad_norm": 0.0017237582186518485, "learning_rate": 3.804005932679933e-09, "loss": 0.0282, "step": 242880 }, { "epoch": 4.944325699745547, "grad_norm": 0.008154642466048865, "learning_rate": 3.776344009774891e-09, "loss": 0.0231, "step": 242890 }, { "epoch": 4.9445292620865136, "grad_norm": 0.001708425605858323, "learning_rate": 3.748782992041844e-09, "loss": 0.0212, "step": 242900 }, { "epoch": 4.944732824427481, "grad_norm": 0.12430062098134362, "learning_rate": 3.72132288003757e-09, "loss": 0.0036, "step": 242910 }, { "epoch": 4.944936386768448, "grad_norm": 0.3920690847751639, "learning_rate": 3.69396367431607e-09, "loss": 0.0174, "step": 242920 }, { "epoch": 4.945139949109414, "grad_norm": 12.574295515336768, "learning_rate": 3.666705375430235e-09, "loss": 0.0589, "step": 242930 }, { "epoch": 4.945343511450382, "grad_norm": 0.03697509361147129, "learning_rate": 3.6395479839301808e-09, "loss": 0.0001, "step": 242940 }, { "epoch": 4.9455470737913485, "grad_norm": 0.018436221924519134, "learning_rate": 3.612491500364912e-09, "loss": 0.0204, "step": 242950 }, { "epoch": 4.945750636132315, "grad_norm": 0.003715842274592787, "learning_rate": 3.585535925281214e-09, "loss": 0.0001, "step": 242960 }, { "epoch": 4.945954198473283, "grad_norm": 0.005317074389007591, "learning_rate": 3.5586812592225406e-09, "loss": 0.0002, "step": 242970 }, { "epoch": 4.946157760814249, "grad_norm": 0.00037299344518887753, "learning_rate": 3.531927502731236e-09, "loss": 0.0001, "step": 242980 }, { "epoch": 4.946361323155216, "grad_norm": 0.012669474270755529, "learning_rate": 3.505274656348534e-09, "loss": 0.0, "step": 242990 }, { "epoch": 4.9465648854961835, "grad_norm": 0.0470875244710781, "learning_rate": 3.4787227206123377e-09, "loss": 0.0024, "step": 243000 }, { "epoch": 4.94676844783715, "grad_norm": 1.6661957966479002, "learning_rate": 3.452271696058329e-09, "loss": 0.0166, "step": 243010 }, { "epoch": 4.946972010178117, "grad_norm": 0.0023478437193228457, "learning_rate": 3.4259215832216366e-09, "loss": 0.0458, "step": 243020 }, { "epoch": 4.947175572519084, "grad_norm": 0.0029686723310149922, "learning_rate": 3.399672382634056e-09, "loss": 0.0028, "step": 243030 }, { "epoch": 4.947379134860051, "grad_norm": 0.03647584659734006, "learning_rate": 3.3735240948251645e-09, "loss": 0.0159, "step": 243040 }, { "epoch": 4.9475826972010175, "grad_norm": 8.051998238098376, "learning_rate": 3.3474767203239835e-09, "loss": 0.0452, "step": 243050 }, { "epoch": 4.947786259541985, "grad_norm": 0.3076376844446838, "learning_rate": 3.3215302596562028e-09, "loss": 0.0178, "step": 243060 }, { "epoch": 4.947989821882952, "grad_norm": 0.007299440523960678, "learning_rate": 3.295684713345293e-09, "loss": 0.0, "step": 243070 }, { "epoch": 4.948193384223918, "grad_norm": 0.06580814746936867, "learning_rate": 3.269940081914169e-09, "loss": 0.0001, "step": 243080 }, { "epoch": 4.948396946564886, "grad_norm": 38.41642025846174, "learning_rate": 3.2442963658824155e-09, "loss": 0.0128, "step": 243090 }, { "epoch": 4.9486005089058525, "grad_norm": 0.1808520605225126, "learning_rate": 3.2187535657673965e-09, "loss": 0.033, "step": 243100 }, { "epoch": 4.948804071246819, "grad_norm": 0.03945962940715529, "learning_rate": 3.19331168208592e-09, "loss": 0.0002, "step": 243110 }, { "epoch": 4.949007633587787, "grad_norm": 0.15253734154821386, "learning_rate": 3.167970715351465e-09, "loss": 0.0254, "step": 243120 }, { "epoch": 4.949211195928753, "grad_norm": 0.022933809888056884, "learning_rate": 3.142730666075289e-09, "loss": 0.0256, "step": 243130 }, { "epoch": 4.94941475826972, "grad_norm": 0.003921775569868946, "learning_rate": 3.1175915347675388e-09, "loss": 0.0013, "step": 243140 }, { "epoch": 4.949618320610687, "grad_norm": 0.07599009910094005, "learning_rate": 3.0925533219361427e-09, "loss": 0.0001, "step": 243150 }, { "epoch": 4.949821882951654, "grad_norm": 0.14293692047262255, "learning_rate": 3.067616028086806e-09, "loss": 0.0088, "step": 243160 }, { "epoch": 4.950025445292621, "grad_norm": 0.005268851531985759, "learning_rate": 3.042779653722461e-09, "loss": 0.0001, "step": 243170 }, { "epoch": 4.950229007633588, "grad_norm": 0.1289676469395523, "learning_rate": 3.0180441993454823e-09, "loss": 0.0003, "step": 243180 }, { "epoch": 4.950432569974555, "grad_norm": 0.0036436201385342233, "learning_rate": 2.9934096654554712e-09, "loss": 0.0343, "step": 243190 }, { "epoch": 4.9506361323155215, "grad_norm": 0.005731604082496467, "learning_rate": 2.968876052549252e-09, "loss": 0.0535, "step": 243200 }, { "epoch": 4.950839694656489, "grad_norm": 6.470292954963663, "learning_rate": 2.944443361123095e-09, "loss": 0.0342, "step": 243210 }, { "epoch": 4.951043256997456, "grad_norm": 9.865245728762465e-05, "learning_rate": 2.9201115916693832e-09, "loss": 0.0002, "step": 243220 }, { "epoch": 4.951246819338422, "grad_norm": 0.00432840076028304, "learning_rate": 2.8958807446805015e-09, "loss": 0.0177, "step": 243230 }, { "epoch": 4.95145038167939, "grad_norm": 0.0017709555378311514, "learning_rate": 2.8717508206460575e-09, "loss": 0.0001, "step": 243240 }, { "epoch": 4.951653944020356, "grad_norm": 0.0028641416511480445, "learning_rate": 2.8477218200523295e-09, "loss": 0.0017, "step": 243250 }, { "epoch": 4.951857506361323, "grad_norm": 0.0035184187191526673, "learning_rate": 2.8237937433850393e-09, "loss": 0.023, "step": 243260 }, { "epoch": 4.952061068702291, "grad_norm": 0.006931448730678507, "learning_rate": 2.7999665911282447e-09, "loss": 0.0001, "step": 243270 }, { "epoch": 4.952264631043257, "grad_norm": 0.0007346388046705431, "learning_rate": 2.776240363761562e-09, "loss": 0.0559, "step": 243280 }, { "epoch": 4.952468193384224, "grad_norm": 9.582605677179546e-05, "learning_rate": 2.752615061765718e-09, "loss": 0.0022, "step": 243290 }, { "epoch": 4.9526717557251905, "grad_norm": 0.0019273161452324386, "learning_rate": 2.7290906856169975e-09, "loss": 0.0593, "step": 243300 }, { "epoch": 4.952875318066158, "grad_norm": 0.05824886575596876, "learning_rate": 2.7056672357905766e-09, "loss": 0.0349, "step": 243310 }, { "epoch": 4.953078880407125, "grad_norm": 0.00013893215971937472, "learning_rate": 2.6823447127599656e-09, "loss": 0.0425, "step": 243320 }, { "epoch": 4.953282442748091, "grad_norm": 0.0033510479757471274, "learning_rate": 2.6591231169958984e-09, "loss": 0.0302, "step": 243330 }, { "epoch": 4.953486005089059, "grad_norm": 0.016583651069172926, "learning_rate": 2.6360024489674453e-09, "loss": 0.0027, "step": 243340 }, { "epoch": 4.953689567430025, "grad_norm": 0.015850337880434533, "learning_rate": 2.612982709140899e-09, "loss": 0.0279, "step": 243350 }, { "epoch": 4.953893129770992, "grad_norm": 0.0005793793114500022, "learning_rate": 2.5900638979825533e-09, "loss": 0.0, "step": 243360 }, { "epoch": 4.95409669211196, "grad_norm": 0.03102656260059072, "learning_rate": 2.567246015953706e-09, "loss": 0.0034, "step": 243370 }, { "epoch": 4.954300254452926, "grad_norm": 0.00016547015692015257, "learning_rate": 2.54452906351621e-09, "loss": 0.0003, "step": 243380 }, { "epoch": 4.954503816793893, "grad_norm": 0.0019956540592953715, "learning_rate": 2.5219130411291424e-09, "loss": 0.0018, "step": 243390 }, { "epoch": 4.95470737913486, "grad_norm": 19.041950855245325, "learning_rate": 2.49939794924825e-09, "loss": 0.0704, "step": 243400 }, { "epoch": 4.954910941475827, "grad_norm": 0.10602394961291851, "learning_rate": 2.476983788328724e-09, "loss": 0.0017, "step": 243410 }, { "epoch": 4.955114503816794, "grad_norm": 0.1461995574112885, "learning_rate": 2.4546705588235355e-09, "loss": 0.0001, "step": 243420 }, { "epoch": 4.955318066157761, "grad_norm": 0.018320410013326814, "learning_rate": 2.4324582611828796e-09, "loss": 0.0634, "step": 243430 }, { "epoch": 4.955521628498728, "grad_norm": 0.028980373526287025, "learning_rate": 2.4103468958552865e-09, "loss": 0.0002, "step": 243440 }, { "epoch": 4.955725190839694, "grad_norm": 0.00784950216666017, "learning_rate": 2.3883364632881765e-09, "loss": 0.012, "step": 243450 }, { "epoch": 4.955928753180662, "grad_norm": 0.004151274252027059, "learning_rate": 2.3664269639250834e-09, "loss": 0.0001, "step": 243460 }, { "epoch": 4.956132315521629, "grad_norm": 0.0026997564280191984, "learning_rate": 2.3446183982089865e-09, "loss": 0.0061, "step": 243470 }, { "epoch": 4.956335877862595, "grad_norm": 0.003411405880467455, "learning_rate": 2.322910766580644e-09, "loss": 0.0153, "step": 243480 }, { "epoch": 4.956539440203562, "grad_norm": 0.00975103229192966, "learning_rate": 2.3013040694774837e-09, "loss": 0.0003, "step": 243490 }, { "epoch": 4.956743002544529, "grad_norm": 0.0017998619086441248, "learning_rate": 2.2797983073369334e-09, "loss": 0.0001, "step": 243500 }, { "epoch": 4.956946564885496, "grad_norm": 0.0015057771310530894, "learning_rate": 2.2583934805925354e-09, "loss": 0.0001, "step": 243510 }, { "epoch": 4.957150127226463, "grad_norm": 0.0020834202086644524, "learning_rate": 2.237089589676722e-09, "loss": 0.0002, "step": 243520 }, { "epoch": 4.95735368956743, "grad_norm": 4.4608617106147594, "learning_rate": 2.215886635020259e-09, "loss": 0.0335, "step": 243530 }, { "epoch": 4.957557251908397, "grad_norm": 0.013883548017453064, "learning_rate": 2.1947846170511376e-09, "loss": 0.0041, "step": 243540 }, { "epoch": 4.957760814249363, "grad_norm": 11.329466801539372, "learning_rate": 2.173783536195684e-09, "loss": 0.0701, "step": 243550 }, { "epoch": 4.957964376590331, "grad_norm": 0.004447726416987756, "learning_rate": 2.152883392877447e-09, "loss": 0.0004, "step": 243560 }, { "epoch": 4.958167938931298, "grad_norm": 0.007710283672786518, "learning_rate": 2.1320841875188682e-09, "loss": 0.0428, "step": 243570 }, { "epoch": 4.958371501272264, "grad_norm": 0.0005911688116854218, "learning_rate": 2.1113859205401656e-09, "loss": 0.0333, "step": 243580 }, { "epoch": 4.958575063613232, "grad_norm": 0.013486381269668703, "learning_rate": 2.0907885923593384e-09, "loss": 0.0002, "step": 243590 }, { "epoch": 4.958778625954198, "grad_norm": 0.003977165275309216, "learning_rate": 2.0702922033921657e-09, "loss": 0.0001, "step": 243600 }, { "epoch": 4.958982188295165, "grad_norm": 0.00696870915936069, "learning_rate": 2.0498967540533156e-09, "loss": 0.0015, "step": 243610 }, { "epoch": 4.9591857506361325, "grad_norm": 0.0004362296484354576, "learning_rate": 2.02960224475357e-09, "loss": 0.0149, "step": 243620 }, { "epoch": 4.959389312977099, "grad_norm": 0.00699046287348174, "learning_rate": 2.009408675903157e-09, "loss": 0.0305, "step": 243630 }, { "epoch": 4.959592875318066, "grad_norm": 0.004169029199997158, "learning_rate": 1.989316047910084e-09, "loss": 0.0001, "step": 243640 }, { "epoch": 4.959796437659033, "grad_norm": 7.78675067459135, "learning_rate": 1.969324361180691e-09, "loss": 0.0695, "step": 243650 }, { "epoch": 4.96, "grad_norm": 0.00735866636007725, "learning_rate": 1.9494336161179905e-09, "loss": 0.0007, "step": 243660 }, { "epoch": 4.960203562340967, "grad_norm": 0.01123610575184879, "learning_rate": 1.9296438131238825e-09, "loss": 0.0008, "step": 243670 }, { "epoch": 4.960407124681934, "grad_norm": 0.01972068387186143, "learning_rate": 1.9099549525980477e-09, "loss": 0.0453, "step": 243680 }, { "epoch": 4.960610687022901, "grad_norm": 0.0035899321319454226, "learning_rate": 1.8903670349379454e-09, "loss": 0.0006, "step": 243690 }, { "epoch": 4.960814249363867, "grad_norm": 0.0008686372399133173, "learning_rate": 1.8708800605393705e-09, "loss": 0.0367, "step": 243700 }, { "epoch": 4.961017811704835, "grad_norm": 0.03742893081409572, "learning_rate": 1.8514940297964523e-09, "loss": 0.012, "step": 243710 }, { "epoch": 4.9612213740458015, "grad_norm": 5.38536118959932e-05, "learning_rate": 1.832208943099989e-09, "loss": 0.0082, "step": 243720 }, { "epoch": 4.961424936386768, "grad_norm": 0.003856344350339973, "learning_rate": 1.8130248008391138e-09, "loss": 0.0277, "step": 243730 }, { "epoch": 4.961628498727736, "grad_norm": 0.0031741794148199363, "learning_rate": 1.7939416034024048e-09, "loss": 0.0002, "step": 243740 }, { "epoch": 4.961832061068702, "grad_norm": 0.0009779237803729216, "learning_rate": 1.7749593511745543e-09, "loss": 0.0002, "step": 243750 }, { "epoch": 4.962035623409669, "grad_norm": 0.007365674171745911, "learning_rate": 1.7560780445391445e-09, "loss": 0.0215, "step": 243760 }, { "epoch": 4.9622391857506365, "grad_norm": 4.830166035679479, "learning_rate": 1.737297683877537e-09, "loss": 0.0007, "step": 243770 }, { "epoch": 4.962442748091603, "grad_norm": 0.24934163610407845, "learning_rate": 1.7186182695683173e-09, "loss": 0.0133, "step": 243780 }, { "epoch": 4.96264631043257, "grad_norm": 0.13106058068510182, "learning_rate": 1.700039801990072e-09, "loss": 0.0314, "step": 243790 }, { "epoch": 4.962849872773537, "grad_norm": 0.08951767671072235, "learning_rate": 1.6815622815169464e-09, "loss": 0.0001, "step": 243800 }, { "epoch": 4.963053435114504, "grad_norm": 6.309750722744148, "learning_rate": 1.66318570852253e-09, "loss": 0.0123, "step": 243810 }, { "epoch": 4.9632569974554706, "grad_norm": 0.0007027540018102919, "learning_rate": 1.6449100833776378e-09, "loss": 0.0292, "step": 243820 }, { "epoch": 4.963460559796438, "grad_norm": 0.000735295957006643, "learning_rate": 1.6267354064519735e-09, "loss": 0.0443, "step": 243830 }, { "epoch": 4.963664122137405, "grad_norm": 0.009567321611091797, "learning_rate": 1.6086616781119114e-09, "loss": 0.0003, "step": 243840 }, { "epoch": 4.963867684478371, "grad_norm": 0.07071584566772278, "learning_rate": 1.5906888987227143e-09, "loss": 0.0001, "step": 243850 }, { "epoch": 4.964071246819339, "grad_norm": 0.0033274979809411634, "learning_rate": 1.5728170686479805e-09, "loss": 0.0004, "step": 243860 }, { "epoch": 4.9642748091603055, "grad_norm": 13.778814495131572, "learning_rate": 1.5550461882479772e-09, "loss": 0.0778, "step": 243870 }, { "epoch": 4.964478371501272, "grad_norm": 8.138532899567743e-05, "learning_rate": 1.5373762578813067e-09, "loss": 0.0444, "step": 243880 }, { "epoch": 4.96468193384224, "grad_norm": 0.000266836539555043, "learning_rate": 1.5198072779060159e-09, "loss": 0.0002, "step": 243890 }, { "epoch": 4.964885496183206, "grad_norm": 0.0007890737616540822, "learning_rate": 1.5023392486757105e-09, "loss": 0.048, "step": 243900 }, { "epoch": 4.965089058524173, "grad_norm": 0.010832795210329347, "learning_rate": 1.4849721705434416e-09, "loss": 0.0179, "step": 243910 }, { "epoch": 4.96529262086514, "grad_norm": 0.0007944716130185945, "learning_rate": 1.467706043860595e-09, "loss": 0.0009, "step": 243920 }, { "epoch": 4.965496183206107, "grad_norm": 0.0013335740891268308, "learning_rate": 1.4505408689757804e-09, "loss": 0.025, "step": 243930 }, { "epoch": 4.965699745547074, "grad_norm": 0.022736364407931732, "learning_rate": 1.4334766462348325e-09, "loss": 0.0365, "step": 243940 }, { "epoch": 4.96590330788804, "grad_norm": 0.0007116038898597671, "learning_rate": 1.4165133759830307e-09, "loss": 0.0006, "step": 243950 }, { "epoch": 4.966106870229008, "grad_norm": 0.005524819518275031, "learning_rate": 1.3996510585634338e-09, "loss": 0.0004, "step": 243960 }, { "epoch": 4.9663104325699745, "grad_norm": 0.0027367710154835197, "learning_rate": 1.3828896943152148e-09, "loss": 0.0001, "step": 243970 }, { "epoch": 4.966513994910941, "grad_norm": 0.00037380652756527295, "learning_rate": 1.3662292835781021e-09, "loss": 0.036, "step": 243980 }, { "epoch": 4.966717557251909, "grad_norm": 0.04649532017633662, "learning_rate": 1.3496698266884933e-09, "loss": 0.0223, "step": 243990 }, { "epoch": 4.966921119592875, "grad_norm": 0.01337003385934366, "learning_rate": 1.3332113239800104e-09, "loss": 0.0313, "step": 244000 }, { "epoch": 4.967124681933842, "grad_norm": 0.004112424515270242, "learning_rate": 1.3168537757857203e-09, "loss": 0.0244, "step": 244010 }, { "epoch": 4.9673282442748095, "grad_norm": 0.04831281726007491, "learning_rate": 1.3005971824359142e-09, "loss": 0.0058, "step": 244020 }, { "epoch": 4.967531806615776, "grad_norm": 0.012532759872162243, "learning_rate": 1.2844415442586633e-09, "loss": 0.0002, "step": 244030 }, { "epoch": 4.967735368956743, "grad_norm": 0.0169342872348739, "learning_rate": 1.2683868615803729e-09, "loss": 0.0235, "step": 244040 }, { "epoch": 4.96793893129771, "grad_norm": 0.0009179978460037655, "learning_rate": 1.2524331347257834e-09, "loss": 0.0127, "step": 244050 }, { "epoch": 4.968142493638677, "grad_norm": 0.012032904845750819, "learning_rate": 1.2365803640163044e-09, "loss": 0.0006, "step": 244060 }, { "epoch": 4.9683460559796435, "grad_norm": 0.001525348946676058, "learning_rate": 1.220828549772235e-09, "loss": 0.0032, "step": 244070 }, { "epoch": 4.968549618320611, "grad_norm": 0.003967435524760166, "learning_rate": 1.2051776923122093e-09, "loss": 0.0001, "step": 244080 }, { "epoch": 4.968753180661578, "grad_norm": 0.16253355083311347, "learning_rate": 1.1896277919515309e-09, "loss": 0.0004, "step": 244090 }, { "epoch": 4.968956743002544, "grad_norm": 0.0017515746629668913, "learning_rate": 1.1741788490055029e-09, "loss": 0.0002, "step": 244100 }, { "epoch": 4.969160305343512, "grad_norm": 0.006103213767648785, "learning_rate": 1.1588308637849877e-09, "loss": 0.0045, "step": 244110 }, { "epoch": 4.9693638676844785, "grad_norm": 0.012763889078048658, "learning_rate": 1.1435838366002927e-09, "loss": 0.0001, "step": 244120 }, { "epoch": 4.969567430025445, "grad_norm": 0.00022559003172734838, "learning_rate": 1.1284377677595049e-09, "loss": 0.0184, "step": 244130 }, { "epoch": 4.969770992366412, "grad_norm": 0.12766825741969662, "learning_rate": 1.1133926575684907e-09, "loss": 0.0236, "step": 244140 }, { "epoch": 4.969974554707379, "grad_norm": 3.2691260099227346, "learning_rate": 1.0984485063308959e-09, "loss": 0.093, "step": 244150 }, { "epoch": 4.970178117048346, "grad_norm": 0.0019336218566778306, "learning_rate": 1.0836053143487012e-09, "loss": 0.0001, "step": 244160 }, { "epoch": 4.9703816793893125, "grad_norm": 0.009332005615078147, "learning_rate": 1.0688630819216672e-09, "loss": 0.0382, "step": 244170 }, { "epoch": 4.97058524173028, "grad_norm": 0.011333087562327059, "learning_rate": 1.0542218093473333e-09, "loss": 0.0012, "step": 244180 }, { "epoch": 4.970788804071247, "grad_norm": 0.0018502761731991773, "learning_rate": 1.039681496921574e-09, "loss": 0.0, "step": 244190 }, { "epoch": 4.970992366412213, "grad_norm": 0.0032004147081957438, "learning_rate": 1.0252421449385986e-09, "loss": 0.046, "step": 244200 }, { "epoch": 4.971195928753181, "grad_norm": 0.009356337493476903, "learning_rate": 1.01090375368873e-09, "loss": 0.0001, "step": 244210 }, { "epoch": 4.9713994910941475, "grad_norm": 0.014432403894078969, "learning_rate": 9.96666323462847e-10, "loss": 0.0795, "step": 244220 }, { "epoch": 4.971603053435114, "grad_norm": 0.295275068522184, "learning_rate": 9.825298545479423e-10, "loss": 0.0208, "step": 244230 }, { "epoch": 4.971806615776082, "grad_norm": 0.015346001777981054, "learning_rate": 9.684943472293428e-10, "loss": 0.0002, "step": 244240 }, { "epoch": 4.972010178117048, "grad_norm": 0.5321329423618977, "learning_rate": 9.54559801790711e-10, "loss": 0.0007, "step": 244250 }, { "epoch": 4.972213740458015, "grad_norm": 0.08739523481459194, "learning_rate": 9.40726218513488e-10, "loss": 0.0003, "step": 244260 }, { "epoch": 4.972417302798982, "grad_norm": 0.003648468564278965, "learning_rate": 9.269935976774503e-10, "loss": 0.0378, "step": 244270 }, { "epoch": 4.972620865139949, "grad_norm": 0.0012861638513135033, "learning_rate": 9.133619395590431e-10, "loss": 0.0001, "step": 244280 }, { "epoch": 4.972824427480916, "grad_norm": 0.005056420583350768, "learning_rate": 8.998312444341572e-10, "loss": 0.0001, "step": 244290 }, { "epoch": 4.973027989821883, "grad_norm": 0.0034622858567095204, "learning_rate": 8.864015125759073e-10, "loss": 0.0001, "step": 244300 }, { "epoch": 4.97323155216285, "grad_norm": 0.019698490313111522, "learning_rate": 8.73072744255743e-10, "loss": 0.0001, "step": 244310 }, { "epoch": 4.9734351145038165, "grad_norm": 0.7826414166652199, "learning_rate": 8.598449397428932e-10, "loss": 0.0007, "step": 244320 }, { "epoch": 4.973638676844784, "grad_norm": 7.54089164871285, "learning_rate": 8.467180993038116e-10, "loss": 0.0071, "step": 244330 }, { "epoch": 4.973842239185751, "grad_norm": 0.13842129082294477, "learning_rate": 8.336922232043965e-10, "loss": 0.0002, "step": 244340 }, { "epoch": 4.974045801526717, "grad_norm": 0.0017466370817611533, "learning_rate": 8.207673117072156e-10, "loss": 0.0001, "step": 244350 }, { "epoch": 4.974249363867685, "grad_norm": 0.0012550894244572034, "learning_rate": 8.079433650737267e-10, "loss": 0.0365, "step": 244360 }, { "epoch": 4.974452926208651, "grad_norm": 0.01771009701208453, "learning_rate": 7.952203835626116e-10, "loss": 0.0001, "step": 244370 }, { "epoch": 4.974656488549618, "grad_norm": 0.029046978834794294, "learning_rate": 7.825983674314419e-10, "loss": 0.0264, "step": 244380 }, { "epoch": 4.974860050890586, "grad_norm": 0.002899213202316911, "learning_rate": 7.700773169344589e-10, "loss": 0.0349, "step": 244390 }, { "epoch": 4.975063613231552, "grad_norm": 0.0005092860313713931, "learning_rate": 7.576572323242382e-10, "loss": 0.0002, "step": 244400 }, { "epoch": 4.975267175572519, "grad_norm": 0.0008156112776223628, "learning_rate": 7.453381138522453e-10, "loss": 0.0023, "step": 244410 }, { "epoch": 4.975470737913486, "grad_norm": 0.07025780529032274, "learning_rate": 7.331199617677254e-10, "loss": 0.0109, "step": 244420 }, { "epoch": 4.975674300254453, "grad_norm": 0.005881153700701199, "learning_rate": 7.210027763160377e-10, "loss": 0.0002, "step": 244430 }, { "epoch": 4.97587786259542, "grad_norm": 0.009863262829742802, "learning_rate": 7.089865577430966e-10, "loss": 0.0148, "step": 244440 }, { "epoch": 4.976081424936387, "grad_norm": 0.0033897153544719216, "learning_rate": 6.970713062914858e-10, "loss": 0.0003, "step": 244450 }, { "epoch": 4.976284987277354, "grad_norm": 0.0010012186683148443, "learning_rate": 6.852570222015686e-10, "loss": 0.0002, "step": 244460 }, { "epoch": 4.97648854961832, "grad_norm": 0.021992852281511637, "learning_rate": 6.735437057114879e-10, "loss": 0.0001, "step": 244470 }, { "epoch": 4.976692111959288, "grad_norm": 0.02720311451704824, "learning_rate": 6.619313570582764e-10, "loss": 0.0002, "step": 244480 }, { "epoch": 4.976895674300255, "grad_norm": 1.5324669155562332, "learning_rate": 6.50419976476746e-10, "loss": 0.0005, "step": 244490 }, { "epoch": 4.977099236641221, "grad_norm": 0.006750000669343818, "learning_rate": 6.390095641989336e-10, "loss": 0.0001, "step": 244500 }, { "epoch": 4.977302798982189, "grad_norm": 0.0013845042797517022, "learning_rate": 6.277001204557654e-10, "loss": 0.0001, "step": 244510 }, { "epoch": 4.977506361323155, "grad_norm": 0.01601718275613808, "learning_rate": 6.164916454753922e-10, "loss": 0.0001, "step": 244520 }, { "epoch": 4.977709923664122, "grad_norm": 0.005094495092490101, "learning_rate": 6.053841394837446e-10, "loss": 0.001, "step": 244530 }, { "epoch": 4.9779134860050895, "grad_norm": 0.0026088134599232668, "learning_rate": 5.943776027056425e-10, "loss": 0.0001, "step": 244540 }, { "epoch": 4.978117048346056, "grad_norm": 0.6711682607679933, "learning_rate": 5.834720353631307e-10, "loss": 0.0004, "step": 244550 }, { "epoch": 4.978320610687023, "grad_norm": 0.007860488994319426, "learning_rate": 5.726674376765884e-10, "loss": 0.0024, "step": 244560 }, { "epoch": 4.978524173027989, "grad_norm": 0.0014393816763248332, "learning_rate": 5.619638098647295e-10, "loss": 0.0004, "step": 244570 }, { "epoch": 4.978727735368957, "grad_norm": 0.012608269511552403, "learning_rate": 5.513611521429374e-10, "loss": 0.015, "step": 244580 }, { "epoch": 4.978931297709924, "grad_norm": 0.0024660045276769537, "learning_rate": 5.40859464725485e-10, "loss": 0.0004, "step": 244590 }, { "epoch": 4.97913486005089, "grad_norm": 4.943013448307033e-06, "learning_rate": 5.3045874782498e-10, "loss": 0.02, "step": 244600 }, { "epoch": 4.979338422391858, "grad_norm": 0.003930176910764554, "learning_rate": 5.201590016506997e-10, "loss": 0.0002, "step": 244610 }, { "epoch": 4.979541984732824, "grad_norm": 0.00758865444607513, "learning_rate": 5.099602264113657e-10, "loss": 0.0081, "step": 244620 }, { "epoch": 4.979745547073791, "grad_norm": 0.0010467927910300203, "learning_rate": 4.998624223129245e-10, "loss": 0.0675, "step": 244630 }, { "epoch": 4.9799491094147585, "grad_norm": 0.0018225406980180063, "learning_rate": 4.898655895585469e-10, "loss": 0.0307, "step": 244640 }, { "epoch": 4.980152671755725, "grad_norm": 0.0014725323038052543, "learning_rate": 4.799697283508487e-10, "loss": 0.0001, "step": 244650 }, { "epoch": 4.980356234096692, "grad_norm": 0.005055795464957629, "learning_rate": 4.701748388896699e-10, "loss": 0.0007, "step": 244660 }, { "epoch": 4.980559796437659, "grad_norm": 0.01780802624268158, "learning_rate": 4.6048092137263024e-10, "loss": 0.0835, "step": 244670 }, { "epoch": 4.980763358778626, "grad_norm": 0.007171648914788955, "learning_rate": 4.50887975995129e-10, "loss": 0.0002, "step": 244680 }, { "epoch": 4.980966921119593, "grad_norm": 0.00029462736245045685, "learning_rate": 4.4139600295145525e-10, "loss": 0.0001, "step": 244690 }, { "epoch": 4.98117048346056, "grad_norm": 0.0035000311384733324, "learning_rate": 4.320050024336775e-10, "loss": 0.0001, "step": 244700 }, { "epoch": 4.981374045801527, "grad_norm": 0.004375438022205995, "learning_rate": 4.227149746305337e-10, "loss": 0.0099, "step": 244710 }, { "epoch": 4.981577608142493, "grad_norm": 0.0046647059711536955, "learning_rate": 4.135259197302066e-10, "loss": 0.0016, "step": 244720 }, { "epoch": 4.981781170483461, "grad_norm": 0.013746468118843095, "learning_rate": 4.0443783791810354e-10, "loss": 0.0455, "step": 244730 }, { "epoch": 4.9819847328244276, "grad_norm": 0.05424956843574483, "learning_rate": 3.9545072937741125e-10, "loss": 0.0001, "step": 244740 }, { "epoch": 4.982188295165394, "grad_norm": 0.0026101459601426537, "learning_rate": 3.8656459429076144e-10, "loss": 0.0001, "step": 244750 }, { "epoch": 4.982391857506362, "grad_norm": 0.00025147239227597676, "learning_rate": 3.777794328363449e-10, "loss": 0.0421, "step": 244760 }, { "epoch": 4.982595419847328, "grad_norm": 0.004237881334269804, "learning_rate": 3.690952451923524e-10, "loss": 0.0313, "step": 244770 }, { "epoch": 4.982798982188295, "grad_norm": 0.0033569049754098193, "learning_rate": 3.605120315341992e-10, "loss": 0.0038, "step": 244780 }, { "epoch": 4.983002544529262, "grad_norm": 0.006178583045514621, "learning_rate": 3.5202979203452503e-10, "loss": 0.0416, "step": 244790 }, { "epoch": 4.983206106870229, "grad_norm": 0.028568001107769147, "learning_rate": 3.436485268654144e-10, "loss": 0.0501, "step": 244800 }, { "epoch": 4.983409669211196, "grad_norm": 11.490373229651214, "learning_rate": 3.353682361956212e-10, "loss": 0.0289, "step": 244810 }, { "epoch": 4.983613231552162, "grad_norm": 0.001461817210582687, "learning_rate": 3.2718892019278916e-10, "loss": 0.0, "step": 244820 }, { "epoch": 4.98381679389313, "grad_norm": 0.0015378286594110021, "learning_rate": 3.1911057902178634e-10, "loss": 0.0129, "step": 244830 }, { "epoch": 4.984020356234097, "grad_norm": 0.0004064968238900308, "learning_rate": 3.1113321284581555e-10, "loss": 0.0107, "step": 244840 }, { "epoch": 4.984223918575063, "grad_norm": 0.027280896322849666, "learning_rate": 3.0325682182585913e-10, "loss": 0.002, "step": 244850 }, { "epoch": 4.984427480916031, "grad_norm": 8.749847162649326, "learning_rate": 2.954814061217892e-10, "loss": 0.0039, "step": 244860 }, { "epoch": 4.984631043256997, "grad_norm": 36.640210555633764, "learning_rate": 2.878069658895921e-10, "loss": 0.0643, "step": 244870 }, { "epoch": 4.984834605597964, "grad_norm": 0.0028898825713673405, "learning_rate": 2.8023350128469907e-10, "loss": 0.0002, "step": 244880 }, { "epoch": 4.9850381679389315, "grad_norm": 0.13945745197055304, "learning_rate": 2.7276101246032083e-10, "loss": 0.0005, "step": 244890 }, { "epoch": 4.985241730279898, "grad_norm": 0.5306097296502406, "learning_rate": 2.6538949956744776e-10, "loss": 0.0547, "step": 244900 }, { "epoch": 4.985445292620865, "grad_norm": 0.0003303821208611645, "learning_rate": 2.581189627542946e-10, "loss": 0.0003, "step": 244910 }, { "epoch": 4.985648854961832, "grad_norm": 0.005869861741614776, "learning_rate": 2.5094940216796595e-10, "loss": 0.0001, "step": 244920 }, { "epoch": 4.985852417302799, "grad_norm": 5.062503702157375, "learning_rate": 2.4388081795334586e-10, "loss": 0.0194, "step": 244930 }, { "epoch": 4.986055979643766, "grad_norm": 0.0061413607037247245, "learning_rate": 2.36913210253098e-10, "loss": 0.0001, "step": 244940 }, { "epoch": 4.986259541984733, "grad_norm": 29.9707971326293, "learning_rate": 2.3004657920822071e-10, "loss": 0.0166, "step": 244950 }, { "epoch": 4.9864631043257, "grad_norm": 0.004365874332158651, "learning_rate": 2.2328092495693676e-10, "loss": 0.0111, "step": 244960 }, { "epoch": 4.986666666666666, "grad_norm": 0.0010004606138857015, "learning_rate": 2.166162476363587e-10, "loss": 0.0001, "step": 244970 }, { "epoch": 4.986870229007634, "grad_norm": 0.018031641249445115, "learning_rate": 2.1005254738082348e-10, "loss": 0.0504, "step": 244980 }, { "epoch": 4.9870737913486005, "grad_norm": 0.0024163681078013567, "learning_rate": 2.0358982432300278e-10, "loss": 0.0098, "step": 244990 }, { "epoch": 4.987277353689567, "grad_norm": 0.06834255499939883, "learning_rate": 1.9722807859390293e-10, "loss": 0.0003, "step": 245000 }, { "epoch": 4.987480916030535, "grad_norm": 0.0008311219395753477, "learning_rate": 1.9096731032064441e-10, "loss": 0.0008, "step": 245010 }, { "epoch": 4.987684478371501, "grad_norm": 0.06876456945323363, "learning_rate": 1.8480751963090293e-10, "loss": 0.0005, "step": 245020 }, { "epoch": 4.987888040712468, "grad_norm": 5.422709154285586, "learning_rate": 1.7874870664846834e-10, "loss": 0.0198, "step": 245030 }, { "epoch": 4.9880916030534355, "grad_norm": 0.01862990255076491, "learning_rate": 1.7279087149657536e-10, "loss": 0.0015, "step": 245040 }, { "epoch": 4.988295165394402, "grad_norm": 0.44679690394379756, "learning_rate": 1.669340142940179e-10, "loss": 0.0247, "step": 245050 }, { "epoch": 4.988498727735369, "grad_norm": 0.01942436206093379, "learning_rate": 1.6117813516070002e-10, "loss": 0.0001, "step": 245060 }, { "epoch": 4.988702290076336, "grad_norm": 0.0059997375027198, "learning_rate": 1.5552323421152982e-10, "loss": 0.0003, "step": 245070 }, { "epoch": 4.988905852417303, "grad_norm": 0.004437017742095274, "learning_rate": 1.4996931156141536e-10, "loss": 0.0538, "step": 245080 }, { "epoch": 4.9891094147582695, "grad_norm": 0.020020718136001224, "learning_rate": 1.445163673230443e-10, "loss": 0.0482, "step": 245090 }, { "epoch": 4.989312977099237, "grad_norm": 0.034604769559229996, "learning_rate": 1.391644016052185e-10, "loss": 0.0004, "step": 245100 }, { "epoch": 4.989516539440204, "grad_norm": 0.02645538703576522, "learning_rate": 1.339134145172949e-10, "loss": 0.0002, "step": 245110 }, { "epoch": 4.98972010178117, "grad_norm": 0.0004482821522455762, "learning_rate": 1.2876340616418958e-10, "loss": 0.0009, "step": 245120 }, { "epoch": 4.989923664122138, "grad_norm": 0.022538209303687315, "learning_rate": 1.2371437665081865e-10, "loss": 0.0192, "step": 245130 }, { "epoch": 4.9901272264631045, "grad_norm": 0.12230330217050481, "learning_rate": 1.1876632607876748e-10, "loss": 0.0008, "step": 245140 }, { "epoch": 4.990330788804071, "grad_norm": 0.4051659735206697, "learning_rate": 1.1391925454795616e-10, "loss": 0.03, "step": 245150 }, { "epoch": 4.990534351145039, "grad_norm": 0.0031842254182001197, "learning_rate": 1.0917316215663942e-10, "loss": 0.0001, "step": 245160 }, { "epoch": 4.990737913486005, "grad_norm": 0.03440984791747218, "learning_rate": 1.0452804900029645e-10, "loss": 0.0352, "step": 245170 }, { "epoch": 4.990941475826972, "grad_norm": 10.125803589469546, "learning_rate": 9.998391517274109e-11, "loss": 0.0031, "step": 245180 }, { "epoch": 4.991145038167939, "grad_norm": 0.00037126613859599366, "learning_rate": 9.554076076556673e-11, "loss": 0.02, "step": 245190 }, { "epoch": 4.991348600508906, "grad_norm": 0.002246401299342861, "learning_rate": 9.119858586925656e-11, "loss": 0.0003, "step": 245200 }, { "epoch": 4.991552162849873, "grad_norm": 0.007412844309869771, "learning_rate": 8.695739057096308e-11, "loss": 0.0127, "step": 245210 }, { "epoch": 4.991755725190839, "grad_norm": 0.02167417535981786, "learning_rate": 8.281717495617347e-11, "loss": 0.0076, "step": 245220 }, { "epoch": 4.991959287531807, "grad_norm": 0.005383630427304506, "learning_rate": 7.877793910870957e-11, "loss": 0.0257, "step": 245230 }, { "epoch": 4.9921628498727735, "grad_norm": 0.011168146024088521, "learning_rate": 7.483968311017276e-11, "loss": 0.0427, "step": 245240 }, { "epoch": 4.99236641221374, "grad_norm": 0.002883107433112684, "learning_rate": 7.10024070404991e-11, "loss": 0.0417, "step": 245250 }, { "epoch": 4.992569974554708, "grad_norm": 6.391184658895263, "learning_rate": 6.726611097629399e-11, "loss": 0.0185, "step": 245260 }, { "epoch": 4.992773536895674, "grad_norm": 0.0036417584769236704, "learning_rate": 6.36307949936077e-11, "loss": 0.0439, "step": 245270 }, { "epoch": 4.992977099236641, "grad_norm": 0.07219391464097483, "learning_rate": 6.009645916571494e-11, "loss": 0.0121, "step": 245280 }, { "epoch": 4.993180661577608, "grad_norm": 0.042741117572984, "learning_rate": 5.666310356422511e-11, "loss": 0.0552, "step": 245290 }, { "epoch": 4.993384223918575, "grad_norm": 0.0017326152787996401, "learning_rate": 5.3330728258527144e-11, "loss": 0.0002, "step": 245300 }, { "epoch": 4.993587786259542, "grad_norm": 0.03946018375832276, "learning_rate": 5.0099333315234424e-11, "loss": 0.001, "step": 245310 }, { "epoch": 4.993791348600509, "grad_norm": 0.009507110530525999, "learning_rate": 4.696891880040522e-11, "loss": 0.0001, "step": 245320 }, { "epoch": 4.993994910941476, "grad_norm": 0.18641533183128753, "learning_rate": 4.3939484776767126e-11, "loss": 0.0006, "step": 245330 }, { "epoch": 4.9941984732824425, "grad_norm": 0.010804464625094871, "learning_rate": 4.1011031305937535e-11, "loss": 0.0382, "step": 245340 }, { "epoch": 4.99440203562341, "grad_norm": 0.0019638224240373036, "learning_rate": 3.818355844620314e-11, "loss": 0.0001, "step": 245350 }, { "epoch": 4.994605597964377, "grad_norm": 12.063288845729648, "learning_rate": 3.545706625585066e-11, "loss": 0.0427, "step": 245360 }, { "epoch": 4.994809160305343, "grad_norm": 0.07476268646209004, "learning_rate": 3.2831554789281016e-11, "loss": 0.0005, "step": 245370 }, { "epoch": 4.995012722646311, "grad_norm": 0.01134474321820369, "learning_rate": 3.030702409922981e-11, "loss": 0.002, "step": 245380 }, { "epoch": 4.995216284987277, "grad_norm": 0.0007279625438131273, "learning_rate": 2.7883474237322406e-11, "loss": 0.0013, "step": 245390 }, { "epoch": 4.995419847328244, "grad_norm": 0.0015919239829186816, "learning_rate": 2.5560905251853506e-11, "loss": 0.0001, "step": 245400 }, { "epoch": 4.995623409669211, "grad_norm": 0.23062655466881538, "learning_rate": 2.3339317190562703e-11, "loss": 0.0003, "step": 245410 }, { "epoch": 4.995826972010178, "grad_norm": 0.23582861724066453, "learning_rate": 2.1218710097858918e-11, "loss": 0.0457, "step": 245420 }, { "epoch": 4.996030534351145, "grad_norm": 0.01959913508212233, "learning_rate": 1.9199084016485736e-11, "loss": 0.0027, "step": 245430 }, { "epoch": 4.9962340966921115, "grad_norm": 0.011411944344840566, "learning_rate": 1.7280438986966298e-11, "loss": 0.0001, "step": 245440 }, { "epoch": 4.996437659033079, "grad_norm": 0.0013124204252334488, "learning_rate": 1.546277504871352e-11, "loss": 0.0002, "step": 245450 }, { "epoch": 4.996641221374046, "grad_norm": 0.005434849622029969, "learning_rate": 1.374609223780965e-11, "loss": 0.0001, "step": 245460 }, { "epoch": 4.996844783715012, "grad_norm": 0.001149484186195963, "learning_rate": 1.2130390589781827e-11, "loss": 0.0252, "step": 245470 }, { "epoch": 4.99704834605598, "grad_norm": 0.007763435103750524, "learning_rate": 1.0615670136271406e-11, "loss": 0.0001, "step": 245480 }, { "epoch": 4.997251908396946, "grad_norm": 9.502442525651432, "learning_rate": 9.20193090836463e-12, "loss": 0.033, "step": 245490 }, { "epoch": 4.997455470737913, "grad_norm": 0.00623871606542292, "learning_rate": 7.889172934927303e-12, "loss": 0.0344, "step": 245500 }, { "epoch": 4.997659033078881, "grad_norm": 0.04808586355619789, "learning_rate": 6.677396242049661e-12, "loss": 0.0001, "step": 245510 }, { "epoch": 4.997862595419847, "grad_norm": 0.00254733487332007, "learning_rate": 5.5666008536015006e-12, "loss": 0.0326, "step": 245520 }, { "epoch": 4.998066157760814, "grad_norm": 0.0006298350596682099, "learning_rate": 4.5567867934526164e-12, "loss": 0.0, "step": 245530 }, { "epoch": 4.998269720101781, "grad_norm": 0.002207513692468944, "learning_rate": 3.6479540810319125e-12, "loss": 0.0002, "step": 245540 }, { "epoch": 4.998473282442748, "grad_norm": 0.0021759812861868277, "learning_rate": 2.840102734658068e-12, "loss": 0.0002, "step": 245550 }, { "epoch": 4.998676844783715, "grad_norm": 0.028516558083826884, "learning_rate": 2.133232770984428e-12, "loss": 0.0134, "step": 245560 }, { "epoch": 4.998880407124682, "grad_norm": 6.7258078992549235, "learning_rate": 1.5273442038887809e-12, "loss": 0.0345, "step": 245570 }, { "epoch": 4.999083969465649, "grad_norm": 0.02782481692148548, "learning_rate": 1.0224370461386912e-12, "loss": 0.0416, "step": 245580 }, { "epoch": 4.9992875318066154, "grad_norm": 0.0035423152996696527, "learning_rate": 6.185113077261662e-13, "loss": 0.0009, "step": 245590 }, { "epoch": 4.999491094147583, "grad_norm": 0.002818433905174469, "learning_rate": 3.1556699697787854e-13, "loss": 0.0, "step": 245600 }, { "epoch": 4.99969465648855, "grad_norm": 0.0009956019072522818, "learning_rate": 1.1360411944494331e-13, "loss": 0.0669, "step": 245610 }, { "epoch": 4.999898218829516, "grad_norm": 0.00527856462034036, "learning_rate": 1.2622680123364206e-14, "loss": 0.0001, "step": 245620 }, { "epoch": 5.0, "step": 245625, "total_flos": 2908242111848448.0, "train_loss": 0.08464478309470846, "train_runtime": 267213.9379, "train_samples_per_second": 7.354, "train_steps_per_second": 0.919 } ], "logging_steps": 10, "max_steps": 245625, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2908242111848448.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }