{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9985721085197525, "eval_steps": 500, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007139457401237506, "grad_norm": 5.92962121963501, "learning_rate": 2.3809523809523813e-08, "loss": 0.8451, "step": 1 }, { "epoch": 0.0014278914802475012, "grad_norm": 5.9440741539001465, "learning_rate": 4.7619047619047627e-08, "loss": 0.8729, "step": 2 }, { "epoch": 0.002141837220371252, "grad_norm": 5.892368793487549, "learning_rate": 7.142857142857144e-08, "loss": 0.8606, "step": 3 }, { "epoch": 0.0028557829604950024, "grad_norm": 5.659056186676025, "learning_rate": 9.523809523809525e-08, "loss": 0.8407, "step": 4 }, { "epoch": 0.003569728700618753, "grad_norm": 5.844188213348389, "learning_rate": 1.1904761904761906e-07, "loss": 0.8452, "step": 5 }, { "epoch": 0.004283674440742504, "grad_norm": 5.90915060043335, "learning_rate": 1.4285714285714287e-07, "loss": 0.8868, "step": 6 }, { "epoch": 0.004997620180866254, "grad_norm": 5.481647491455078, "learning_rate": 1.6666666666666668e-07, "loss": 0.8141, "step": 7 }, { "epoch": 0.005711565920990005, "grad_norm": 5.7116594314575195, "learning_rate": 1.904761904761905e-07, "loss": 0.8447, "step": 8 }, { "epoch": 0.006425511661113755, "grad_norm": 5.570527076721191, "learning_rate": 2.142857142857143e-07, "loss": 0.8387, "step": 9 }, { "epoch": 0.007139457401237506, "grad_norm": 5.808752536773682, "learning_rate": 2.3809523809523811e-07, "loss": 0.8587, "step": 10 }, { "epoch": 0.007853403141361256, "grad_norm": 5.930179595947266, "learning_rate": 2.6190476190476194e-07, "loss": 0.839, "step": 11 }, { "epoch": 0.008567348881485007, "grad_norm": 5.679565906524658, "learning_rate": 2.8571428571428575e-07, "loss": 0.8077, "step": 12 }, { "epoch": 0.009281294621608758, "grad_norm": 5.761166572570801, "learning_rate": 3.0952380952380955e-07, "loss": 0.8668, "step": 13 }, { "epoch": 0.009995240361732508, "grad_norm": 5.815394878387451, "learning_rate": 3.3333333333333335e-07, "loss": 0.8546, "step": 14 }, { "epoch": 0.010709186101856259, "grad_norm": 5.309225082397461, "learning_rate": 3.5714285714285716e-07, "loss": 0.8335, "step": 15 }, { "epoch": 0.01142313184198001, "grad_norm": 5.556797504425049, "learning_rate": 3.80952380952381e-07, "loss": 0.8542, "step": 16 }, { "epoch": 0.01213707758210376, "grad_norm": 5.287019729614258, "learning_rate": 4.047619047619048e-07, "loss": 0.8342, "step": 17 }, { "epoch": 0.01285102332222751, "grad_norm": 5.2523603439331055, "learning_rate": 4.285714285714286e-07, "loss": 0.8129, "step": 18 }, { "epoch": 0.013564969062351261, "grad_norm": 5.378078937530518, "learning_rate": 4.523809523809524e-07, "loss": 0.856, "step": 19 }, { "epoch": 0.014278914802475012, "grad_norm": 4.596665382385254, "learning_rate": 4.7619047619047623e-07, "loss": 0.8339, "step": 20 }, { "epoch": 0.014992860542598763, "grad_norm": 4.389111042022705, "learning_rate": 5.000000000000001e-07, "loss": 0.8068, "step": 21 }, { "epoch": 0.015706806282722512, "grad_norm": 4.394775867462158, "learning_rate": 5.238095238095239e-07, "loss": 0.7912, "step": 22 }, { "epoch": 0.016420752022846263, "grad_norm": 4.271754741668701, "learning_rate": 5.476190476190477e-07, "loss": 0.7965, "step": 23 }, { "epoch": 0.017134697762970014, "grad_norm": 4.025062561035156, "learning_rate": 5.714285714285715e-07, "loss": 0.771, "step": 24 }, { "epoch": 0.017848643503093765, "grad_norm": 4.102192401885986, "learning_rate": 5.952380952380953e-07, "loss": 0.8178, "step": 25 }, { "epoch": 0.018562589243217516, "grad_norm": 3.942967176437378, "learning_rate": 6.190476190476191e-07, "loss": 0.779, "step": 26 }, { "epoch": 0.019276534983341267, "grad_norm": 3.0135536193847656, "learning_rate": 6.428571428571428e-07, "loss": 0.7607, "step": 27 }, { "epoch": 0.019990480723465015, "grad_norm": 2.3885884284973145, "learning_rate": 6.666666666666667e-07, "loss": 0.7602, "step": 28 }, { "epoch": 0.020704426463588766, "grad_norm": 2.330827474594116, "learning_rate": 6.904761904761906e-07, "loss": 0.7498, "step": 29 }, { "epoch": 0.021418372203712517, "grad_norm": 2.2703824043273926, "learning_rate": 7.142857142857143e-07, "loss": 0.7463, "step": 30 }, { "epoch": 0.022132317943836268, "grad_norm": 2.258985757827759, "learning_rate": 7.380952380952381e-07, "loss": 0.7543, "step": 31 }, { "epoch": 0.02284626368396002, "grad_norm": 2.1284327507019043, "learning_rate": 7.61904761904762e-07, "loss": 0.734, "step": 32 }, { "epoch": 0.02356020942408377, "grad_norm": 1.9912084341049194, "learning_rate": 7.857142857142857e-07, "loss": 0.7527, "step": 33 }, { "epoch": 0.02427415516420752, "grad_norm": 1.991990327835083, "learning_rate": 8.095238095238096e-07, "loss": 0.752, "step": 34 }, { "epoch": 0.024988100904331272, "grad_norm": 1.9211993217468262, "learning_rate": 8.333333333333333e-07, "loss": 0.7514, "step": 35 }, { "epoch": 0.02570204664445502, "grad_norm": 1.6093519926071167, "learning_rate": 8.571428571428572e-07, "loss": 0.7375, "step": 36 }, { "epoch": 0.02641599238457877, "grad_norm": 1.3489774465560913, "learning_rate": 8.80952380952381e-07, "loss": 0.7016, "step": 37 }, { "epoch": 0.027129938124702522, "grad_norm": 1.710343360900879, "learning_rate": 9.047619047619048e-07, "loss": 0.7326, "step": 38 }, { "epoch": 0.027843883864826273, "grad_norm": 2.0335943698883057, "learning_rate": 9.285714285714287e-07, "loss": 0.7421, "step": 39 }, { "epoch": 0.028557829604950024, "grad_norm": 2.149522304534912, "learning_rate": 9.523809523809525e-07, "loss": 0.7479, "step": 40 }, { "epoch": 0.029271775345073775, "grad_norm": 2.1640384197235107, "learning_rate": 9.761904761904764e-07, "loss": 0.6991, "step": 41 }, { "epoch": 0.029985721085197526, "grad_norm": 2.004243850708008, "learning_rate": 1.0000000000000002e-06, "loss": 0.6979, "step": 42 }, { "epoch": 0.030699666825321277, "grad_norm": 1.8973255157470703, "learning_rate": 1.023809523809524e-06, "loss": 0.7111, "step": 43 }, { "epoch": 0.031413612565445025, "grad_norm": 1.7284727096557617, "learning_rate": 1.0476190476190478e-06, "loss": 0.7074, "step": 44 }, { "epoch": 0.03212755830556878, "grad_norm": 1.5644124746322632, "learning_rate": 1.0714285714285714e-06, "loss": 0.6887, "step": 45 }, { "epoch": 0.03284150404569253, "grad_norm": 1.4279332160949707, "learning_rate": 1.0952380952380954e-06, "loss": 0.6999, "step": 46 }, { "epoch": 0.03355544978581628, "grad_norm": 1.2258559465408325, "learning_rate": 1.1190476190476192e-06, "loss": 0.6725, "step": 47 }, { "epoch": 0.03426939552594003, "grad_norm": 0.945966899394989, "learning_rate": 1.142857142857143e-06, "loss": 0.6825, "step": 48 }, { "epoch": 0.034983341266063776, "grad_norm": 0.9720449447631836, "learning_rate": 1.1666666666666668e-06, "loss": 0.6957, "step": 49 }, { "epoch": 0.03569728700618753, "grad_norm": 0.9771139621734619, "learning_rate": 1.1904761904761906e-06, "loss": 0.6824, "step": 50 }, { "epoch": 0.03641123274631128, "grad_norm": 1.0270413160324097, "learning_rate": 1.2142857142857144e-06, "loss": 0.651, "step": 51 }, { "epoch": 0.03712517848643503, "grad_norm": 1.0483615398406982, "learning_rate": 1.2380952380952382e-06, "loss": 0.6532, "step": 52 }, { "epoch": 0.03783912422655878, "grad_norm": 0.9663938879966736, "learning_rate": 1.261904761904762e-06, "loss": 0.6797, "step": 53 }, { "epoch": 0.038553069966682535, "grad_norm": 0.9398236274719238, "learning_rate": 1.2857142857142856e-06, "loss": 0.6345, "step": 54 }, { "epoch": 0.03926701570680628, "grad_norm": 0.80275559425354, "learning_rate": 1.3095238095238096e-06, "loss": 0.6389, "step": 55 }, { "epoch": 0.03998096144693003, "grad_norm": 0.7588368058204651, "learning_rate": 1.3333333333333334e-06, "loss": 0.6263, "step": 56 }, { "epoch": 0.040694907187053785, "grad_norm": 0.7626146078109741, "learning_rate": 1.3571428571428572e-06, "loss": 0.6462, "step": 57 }, { "epoch": 0.04140885292717753, "grad_norm": 0.8766148686408997, "learning_rate": 1.3809523809523812e-06, "loss": 0.6527, "step": 58 }, { "epoch": 0.04212279866730129, "grad_norm": 0.7159417271614075, "learning_rate": 1.4047619047619048e-06, "loss": 0.6383, "step": 59 }, { "epoch": 0.042836744407425034, "grad_norm": 0.6709489226341248, "learning_rate": 1.4285714285714286e-06, "loss": 0.6485, "step": 60 }, { "epoch": 0.04355069014754879, "grad_norm": 0.6112363934516907, "learning_rate": 1.4523809523809526e-06, "loss": 0.6254, "step": 61 }, { "epoch": 0.044264635887672536, "grad_norm": 0.5781610608100891, "learning_rate": 1.4761904761904762e-06, "loss": 0.6027, "step": 62 }, { "epoch": 0.04497858162779629, "grad_norm": 0.6682778000831604, "learning_rate": 1.5e-06, "loss": 0.6198, "step": 63 }, { "epoch": 0.04569252736792004, "grad_norm": 0.6872972249984741, "learning_rate": 1.523809523809524e-06, "loss": 0.6132, "step": 64 }, { "epoch": 0.046406473108043786, "grad_norm": 0.6571768522262573, "learning_rate": 1.5476190476190479e-06, "loss": 0.614, "step": 65 }, { "epoch": 0.04712041884816754, "grad_norm": 0.667121171951294, "learning_rate": 1.5714285714285714e-06, "loss": 0.6258, "step": 66 }, { "epoch": 0.04783436458829129, "grad_norm": 0.5724891424179077, "learning_rate": 1.5952380952380953e-06, "loss": 0.603, "step": 67 }, { "epoch": 0.04854831032841504, "grad_norm": 0.550451934337616, "learning_rate": 1.6190476190476193e-06, "loss": 0.6401, "step": 68 }, { "epoch": 0.04926225606853879, "grad_norm": 0.5058234333992004, "learning_rate": 1.642857142857143e-06, "loss": 0.6045, "step": 69 }, { "epoch": 0.049976201808662545, "grad_norm": 0.5202196836471558, "learning_rate": 1.6666666666666667e-06, "loss": 0.5719, "step": 70 }, { "epoch": 0.05069014754878629, "grad_norm": 0.547271728515625, "learning_rate": 1.6904761904761907e-06, "loss": 0.6136, "step": 71 }, { "epoch": 0.05140409328891004, "grad_norm": 0.489646852016449, "learning_rate": 1.7142857142857145e-06, "loss": 0.5694, "step": 72 }, { "epoch": 0.052118039029033794, "grad_norm": 0.532255232334137, "learning_rate": 1.738095238095238e-06, "loss": 0.6413, "step": 73 }, { "epoch": 0.05283198476915754, "grad_norm": 0.5723663568496704, "learning_rate": 1.761904761904762e-06, "loss": 0.562, "step": 74 }, { "epoch": 0.053545930509281296, "grad_norm": 0.515780508518219, "learning_rate": 1.7857142857142859e-06, "loss": 0.5919, "step": 75 }, { "epoch": 0.054259876249405044, "grad_norm": 0.4471990466117859, "learning_rate": 1.8095238095238097e-06, "loss": 0.5938, "step": 76 }, { "epoch": 0.0549738219895288, "grad_norm": 0.5089431405067444, "learning_rate": 1.8333333333333333e-06, "loss": 0.5873, "step": 77 }, { "epoch": 0.055687767729652546, "grad_norm": 0.4684774875640869, "learning_rate": 1.8571428571428573e-06, "loss": 0.5716, "step": 78 }, { "epoch": 0.0564017134697763, "grad_norm": 0.43487250804901123, "learning_rate": 1.880952380952381e-06, "loss": 0.5812, "step": 79 }, { "epoch": 0.05711565920990005, "grad_norm": 0.4487122893333435, "learning_rate": 1.904761904761905e-06, "loss": 0.6109, "step": 80 }, { "epoch": 0.057829604950023795, "grad_norm": 0.48797449469566345, "learning_rate": 1.928571428571429e-06, "loss": 0.5903, "step": 81 }, { "epoch": 0.05854355069014755, "grad_norm": 0.5499839782714844, "learning_rate": 1.9523809523809527e-06, "loss": 0.6071, "step": 82 }, { "epoch": 0.0592574964302713, "grad_norm": 0.47427046298980713, "learning_rate": 1.976190476190476e-06, "loss": 0.5892, "step": 83 }, { "epoch": 0.05997144217039505, "grad_norm": 0.45563632249832153, "learning_rate": 2.0000000000000003e-06, "loss": 0.5905, "step": 84 }, { "epoch": 0.0606853879105188, "grad_norm": 0.4781167805194855, "learning_rate": 2.023809523809524e-06, "loss": 0.6241, "step": 85 }, { "epoch": 0.061399333650642554, "grad_norm": 0.4708476662635803, "learning_rate": 2.047619047619048e-06, "loss": 0.563, "step": 86 }, { "epoch": 0.0621132793907663, "grad_norm": 0.43687382340431213, "learning_rate": 2.0714285714285717e-06, "loss": 0.5911, "step": 87 }, { "epoch": 0.06282722513089005, "grad_norm": 0.4474760890007019, "learning_rate": 2.0952380952380955e-06, "loss": 0.5965, "step": 88 }, { "epoch": 0.0635411708710138, "grad_norm": 0.42731088399887085, "learning_rate": 2.1190476190476194e-06, "loss": 0.5814, "step": 89 }, { "epoch": 0.06425511661113756, "grad_norm": 0.39258188009262085, "learning_rate": 2.1428571428571427e-06, "loss": 0.5617, "step": 90 }, { "epoch": 0.0649690623512613, "grad_norm": 0.45413362979888916, "learning_rate": 2.166666666666667e-06, "loss": 0.5784, "step": 91 }, { "epoch": 0.06568300809138505, "grad_norm": 0.47618427872657776, "learning_rate": 2.1904761904761908e-06, "loss": 0.5751, "step": 92 }, { "epoch": 0.0663969538315088, "grad_norm": 0.4495272636413574, "learning_rate": 2.2142857142857146e-06, "loss": 0.57, "step": 93 }, { "epoch": 0.06711089957163256, "grad_norm": 0.43923139572143555, "learning_rate": 2.2380952380952384e-06, "loss": 0.6067, "step": 94 }, { "epoch": 0.06782484531175631, "grad_norm": 0.4091683030128479, "learning_rate": 2.261904761904762e-06, "loss": 0.586, "step": 95 }, { "epoch": 0.06853879105188006, "grad_norm": 0.47703421115875244, "learning_rate": 2.285714285714286e-06, "loss": 0.6003, "step": 96 }, { "epoch": 0.0692527367920038, "grad_norm": 0.48331135511398315, "learning_rate": 2.3095238095238098e-06, "loss": 0.603, "step": 97 }, { "epoch": 0.06996668253212755, "grad_norm": 0.43075695633888245, "learning_rate": 2.3333333333333336e-06, "loss": 0.5547, "step": 98 }, { "epoch": 0.07068062827225131, "grad_norm": 0.42383721470832825, "learning_rate": 2.3571428571428574e-06, "loss": 0.5567, "step": 99 }, { "epoch": 0.07139457401237506, "grad_norm": 0.4112760126590729, "learning_rate": 2.380952380952381e-06, "loss": 0.5593, "step": 100 }, { "epoch": 0.07210851975249881, "grad_norm": 0.42717376351356506, "learning_rate": 2.404761904761905e-06, "loss": 0.5728, "step": 101 }, { "epoch": 0.07282246549262256, "grad_norm": 0.4443986415863037, "learning_rate": 2.428571428571429e-06, "loss": 0.5676, "step": 102 }, { "epoch": 0.07353641123274632, "grad_norm": 0.40651193261146545, "learning_rate": 2.4523809523809526e-06, "loss": 0.5485, "step": 103 }, { "epoch": 0.07425035697287007, "grad_norm": 0.4356403946876526, "learning_rate": 2.4761904761904764e-06, "loss": 0.5512, "step": 104 }, { "epoch": 0.07496430271299381, "grad_norm": 0.4706228971481323, "learning_rate": 2.5e-06, "loss": 0.5599, "step": 105 }, { "epoch": 0.07567824845311756, "grad_norm": 0.4296753704547882, "learning_rate": 2.523809523809524e-06, "loss": 0.5676, "step": 106 }, { "epoch": 0.07639219419324131, "grad_norm": 0.40761423110961914, "learning_rate": 2.547619047619048e-06, "loss": 0.5419, "step": 107 }, { "epoch": 0.07710613993336507, "grad_norm": 0.461953729391098, "learning_rate": 2.571428571428571e-06, "loss": 0.5773, "step": 108 }, { "epoch": 0.07782008567348882, "grad_norm": 0.4252876341342926, "learning_rate": 2.595238095238096e-06, "loss": 0.5419, "step": 109 }, { "epoch": 0.07853403141361257, "grad_norm": 0.4430479109287262, "learning_rate": 2.6190476190476192e-06, "loss": 0.5901, "step": 110 }, { "epoch": 0.07924797715373631, "grad_norm": 0.42605239152908325, "learning_rate": 2.642857142857143e-06, "loss": 0.5812, "step": 111 }, { "epoch": 0.07996192289386006, "grad_norm": 0.41223040223121643, "learning_rate": 2.666666666666667e-06, "loss": 0.5264, "step": 112 }, { "epoch": 0.08067586863398382, "grad_norm": 0.3804052472114563, "learning_rate": 2.6904761904761906e-06, "loss": 0.5855, "step": 113 }, { "epoch": 0.08138981437410757, "grad_norm": 0.4697934091091156, "learning_rate": 2.7142857142857144e-06, "loss": 0.5974, "step": 114 }, { "epoch": 0.08210376011423132, "grad_norm": 0.46152356266975403, "learning_rate": 2.7380952380952387e-06, "loss": 0.5177, "step": 115 }, { "epoch": 0.08281770585435506, "grad_norm": 0.4593653678894043, "learning_rate": 2.7619047619047625e-06, "loss": 0.574, "step": 116 }, { "epoch": 0.08353165159447883, "grad_norm": 0.4581179618835449, "learning_rate": 2.785714285714286e-06, "loss": 0.5476, "step": 117 }, { "epoch": 0.08424559733460257, "grad_norm": 0.4491896629333496, "learning_rate": 2.8095238095238096e-06, "loss": 0.5324, "step": 118 }, { "epoch": 0.08495954307472632, "grad_norm": 0.3953836262226105, "learning_rate": 2.8333333333333335e-06, "loss": 0.5423, "step": 119 }, { "epoch": 0.08567348881485007, "grad_norm": 0.47906434535980225, "learning_rate": 2.8571428571428573e-06, "loss": 0.5741, "step": 120 }, { "epoch": 0.08638743455497382, "grad_norm": 0.46572014689445496, "learning_rate": 2.880952380952381e-06, "loss": 0.5528, "step": 121 }, { "epoch": 0.08710138029509758, "grad_norm": 0.4415108859539032, "learning_rate": 2.9047619047619053e-06, "loss": 0.5505, "step": 122 }, { "epoch": 0.08781532603522132, "grad_norm": 0.43933913111686707, "learning_rate": 2.928571428571429e-06, "loss": 0.5694, "step": 123 }, { "epoch": 0.08852927177534507, "grad_norm": 0.4380364716053009, "learning_rate": 2.9523809523809525e-06, "loss": 0.5551, "step": 124 }, { "epoch": 0.08924321751546882, "grad_norm": 0.4099256992340088, "learning_rate": 2.9761904761904763e-06, "loss": 0.5383, "step": 125 }, { "epoch": 0.08995716325559258, "grad_norm": 0.42280951142311096, "learning_rate": 3e-06, "loss": 0.5216, "step": 126 }, { "epoch": 0.09067110899571633, "grad_norm": 0.44926342368125916, "learning_rate": 3.023809523809524e-06, "loss": 0.55, "step": 127 }, { "epoch": 0.09138505473584008, "grad_norm": 0.4238821864128113, "learning_rate": 3.047619047619048e-06, "loss": 0.5406, "step": 128 }, { "epoch": 0.09209900047596382, "grad_norm": 0.4177471399307251, "learning_rate": 3.071428571428572e-06, "loss": 0.5275, "step": 129 }, { "epoch": 0.09281294621608757, "grad_norm": 0.517320990562439, "learning_rate": 3.0952380952380957e-06, "loss": 0.5377, "step": 130 }, { "epoch": 0.09352689195621133, "grad_norm": 0.417407751083374, "learning_rate": 3.1190476190476195e-06, "loss": 0.5436, "step": 131 }, { "epoch": 0.09424083769633508, "grad_norm": 0.4375893771648407, "learning_rate": 3.142857142857143e-06, "loss": 0.5155, "step": 132 }, { "epoch": 0.09495478343645883, "grad_norm": 0.4427245259284973, "learning_rate": 3.1666666666666667e-06, "loss": 0.5707, "step": 133 }, { "epoch": 0.09566872917658258, "grad_norm": 0.4050052762031555, "learning_rate": 3.1904761904761905e-06, "loss": 0.5283, "step": 134 }, { "epoch": 0.09638267491670632, "grad_norm": 0.39988547563552856, "learning_rate": 3.2142857142857147e-06, "loss": 0.5449, "step": 135 }, { "epoch": 0.09709662065683008, "grad_norm": 0.49275848269462585, "learning_rate": 3.2380952380952385e-06, "loss": 0.5618, "step": 136 }, { "epoch": 0.09781056639695383, "grad_norm": 0.4768349528312683, "learning_rate": 3.2619047619047623e-06, "loss": 0.5601, "step": 137 }, { "epoch": 0.09852451213707758, "grad_norm": 0.449545294046402, "learning_rate": 3.285714285714286e-06, "loss": 0.5657, "step": 138 }, { "epoch": 0.09923845787720133, "grad_norm": 0.4421097934246063, "learning_rate": 3.3095238095238095e-06, "loss": 0.5315, "step": 139 }, { "epoch": 0.09995240361732509, "grad_norm": 0.4963396191596985, "learning_rate": 3.3333333333333333e-06, "loss": 0.5297, "step": 140 }, { "epoch": 0.10066634935744884, "grad_norm": 0.46313542127609253, "learning_rate": 3.357142857142857e-06, "loss": 0.5455, "step": 141 }, { "epoch": 0.10138029509757258, "grad_norm": 0.4632144570350647, "learning_rate": 3.3809523809523814e-06, "loss": 0.5464, "step": 142 }, { "epoch": 0.10209424083769633, "grad_norm": 0.4045509099960327, "learning_rate": 3.404761904761905e-06, "loss": 0.526, "step": 143 }, { "epoch": 0.10280818657782008, "grad_norm": 0.43662405014038086, "learning_rate": 3.428571428571429e-06, "loss": 0.5394, "step": 144 }, { "epoch": 0.10352213231794384, "grad_norm": 0.46929582953453064, "learning_rate": 3.4523809523809528e-06, "loss": 0.5409, "step": 145 }, { "epoch": 0.10423607805806759, "grad_norm": 0.4333263039588928, "learning_rate": 3.476190476190476e-06, "loss": 0.5493, "step": 146 }, { "epoch": 0.10495002379819134, "grad_norm": 0.42170342803001404, "learning_rate": 3.5e-06, "loss": 0.5227, "step": 147 }, { "epoch": 0.10566396953831508, "grad_norm": 0.45947131514549255, "learning_rate": 3.523809523809524e-06, "loss": 0.5383, "step": 148 }, { "epoch": 0.10637791527843884, "grad_norm": 0.4494132399559021, "learning_rate": 3.547619047619048e-06, "loss": 0.5296, "step": 149 }, { "epoch": 0.10709186101856259, "grad_norm": 0.5001965761184692, "learning_rate": 3.5714285714285718e-06, "loss": 0.5189, "step": 150 }, { "epoch": 0.10780580675868634, "grad_norm": 0.41522446274757385, "learning_rate": 3.5952380952380956e-06, "loss": 0.5711, "step": 151 }, { "epoch": 0.10851975249881009, "grad_norm": 0.3996860980987549, "learning_rate": 3.6190476190476194e-06, "loss": 0.5424, "step": 152 }, { "epoch": 0.10923369823893384, "grad_norm": 0.4289129674434662, "learning_rate": 3.642857142857143e-06, "loss": 0.5212, "step": 153 }, { "epoch": 0.1099476439790576, "grad_norm": 0.4885237216949463, "learning_rate": 3.6666666666666666e-06, "loss": 0.5285, "step": 154 }, { "epoch": 0.11066158971918134, "grad_norm": 0.4415178894996643, "learning_rate": 3.690476190476191e-06, "loss": 0.53, "step": 155 }, { "epoch": 0.11137553545930509, "grad_norm": 0.4392349123954773, "learning_rate": 3.7142857142857146e-06, "loss": 0.5144, "step": 156 }, { "epoch": 0.11208948119942884, "grad_norm": 0.44325530529022217, "learning_rate": 3.7380952380952384e-06, "loss": 0.5536, "step": 157 }, { "epoch": 0.1128034269395526, "grad_norm": 0.44300249218940735, "learning_rate": 3.761904761904762e-06, "loss": 0.5435, "step": 158 }, { "epoch": 0.11351737267967635, "grad_norm": 0.4076782464981079, "learning_rate": 3.785714285714286e-06, "loss": 0.512, "step": 159 }, { "epoch": 0.1142313184198001, "grad_norm": 0.4592754542827606, "learning_rate": 3.80952380952381e-06, "loss": 0.5399, "step": 160 }, { "epoch": 0.11494526415992384, "grad_norm": 0.4299003779888153, "learning_rate": 3.833333333333334e-06, "loss": 0.5253, "step": 161 }, { "epoch": 0.11565920990004759, "grad_norm": 0.4050465524196625, "learning_rate": 3.857142857142858e-06, "loss": 0.5236, "step": 162 }, { "epoch": 0.11637315564017135, "grad_norm": 0.4490654766559601, "learning_rate": 3.880952380952381e-06, "loss": 0.5333, "step": 163 }, { "epoch": 0.1170871013802951, "grad_norm": 0.4368576109409332, "learning_rate": 3.9047619047619055e-06, "loss": 0.5142, "step": 164 }, { "epoch": 0.11780104712041885, "grad_norm": 0.4316302537918091, "learning_rate": 3.928571428571429e-06, "loss": 0.5657, "step": 165 }, { "epoch": 0.1185149928605426, "grad_norm": 0.4359660744667053, "learning_rate": 3.952380952380952e-06, "loss": 0.5339, "step": 166 }, { "epoch": 0.11922893860066634, "grad_norm": 0.4642390012741089, "learning_rate": 3.9761904761904764e-06, "loss": 0.5104, "step": 167 }, { "epoch": 0.1199428843407901, "grad_norm": 0.4274469316005707, "learning_rate": 4.000000000000001e-06, "loss": 0.546, "step": 168 }, { "epoch": 0.12065683008091385, "grad_norm": 0.5196504592895508, "learning_rate": 4.023809523809524e-06, "loss": 0.5461, "step": 169 }, { "epoch": 0.1213707758210376, "grad_norm": 0.4546881914138794, "learning_rate": 4.047619047619048e-06, "loss": 0.5456, "step": 170 }, { "epoch": 0.12208472156116135, "grad_norm": 0.48085030913352966, "learning_rate": 4.071428571428572e-06, "loss": 0.5353, "step": 171 }, { "epoch": 0.12279866730128511, "grad_norm": 0.4451395571231842, "learning_rate": 4.095238095238096e-06, "loss": 0.5258, "step": 172 }, { "epoch": 0.12351261304140886, "grad_norm": 0.4326440095901489, "learning_rate": 4.119047619047619e-06, "loss": 0.519, "step": 173 }, { "epoch": 0.1242265587815326, "grad_norm": 0.4211156964302063, "learning_rate": 4.1428571428571435e-06, "loss": 0.5027, "step": 174 }, { "epoch": 0.12494050452165635, "grad_norm": 0.4523780941963196, "learning_rate": 4.166666666666667e-06, "loss": 0.5511, "step": 175 }, { "epoch": 0.1256544502617801, "grad_norm": 0.42376890778541565, "learning_rate": 4.190476190476191e-06, "loss": 0.5129, "step": 176 }, { "epoch": 0.12636839600190386, "grad_norm": 0.4197622537612915, "learning_rate": 4.2142857142857145e-06, "loss": 0.5241, "step": 177 }, { "epoch": 0.1270823417420276, "grad_norm": 0.44599732756614685, "learning_rate": 4.238095238095239e-06, "loss": 0.5267, "step": 178 }, { "epoch": 0.12779628748215136, "grad_norm": 0.4292154908180237, "learning_rate": 4.261904761904762e-06, "loss": 0.5289, "step": 179 }, { "epoch": 0.12851023322227512, "grad_norm": 0.44487264752388, "learning_rate": 4.2857142857142855e-06, "loss": 0.5142, "step": 180 }, { "epoch": 0.12922417896239885, "grad_norm": 0.43930426239967346, "learning_rate": 4.30952380952381e-06, "loss": 0.5365, "step": 181 }, { "epoch": 0.1299381247025226, "grad_norm": 0.3991073966026306, "learning_rate": 4.333333333333334e-06, "loss": 0.5343, "step": 182 }, { "epoch": 0.13065207044264635, "grad_norm": 0.4669043719768524, "learning_rate": 4.357142857142857e-06, "loss": 0.519, "step": 183 }, { "epoch": 0.1313660161827701, "grad_norm": 0.3896547555923462, "learning_rate": 4.3809523809523815e-06, "loss": 0.5128, "step": 184 }, { "epoch": 0.13207996192289387, "grad_norm": 0.4569891095161438, "learning_rate": 4.404761904761905e-06, "loss": 0.5178, "step": 185 }, { "epoch": 0.1327939076630176, "grad_norm": 0.4485086500644684, "learning_rate": 4.428571428571429e-06, "loss": 0.506, "step": 186 }, { "epoch": 0.13350785340314136, "grad_norm": 0.46973252296447754, "learning_rate": 4.4523809523809525e-06, "loss": 0.5054, "step": 187 }, { "epoch": 0.13422179914326512, "grad_norm": 0.4698057770729065, "learning_rate": 4.476190476190477e-06, "loss": 0.5313, "step": 188 }, { "epoch": 0.13493574488338886, "grad_norm": 0.4588364362716675, "learning_rate": 4.5e-06, "loss": 0.5242, "step": 189 }, { "epoch": 0.13564969062351262, "grad_norm": 0.42742547392845154, "learning_rate": 4.523809523809524e-06, "loss": 0.5207, "step": 190 }, { "epoch": 0.13636363636363635, "grad_norm": 0.4604097008705139, "learning_rate": 4.547619047619048e-06, "loss": 0.5283, "step": 191 }, { "epoch": 0.13707758210376011, "grad_norm": 0.44324547052383423, "learning_rate": 4.571428571428572e-06, "loss": 0.5295, "step": 192 }, { "epoch": 0.13779152784388388, "grad_norm": 0.43391361832618713, "learning_rate": 4.595238095238095e-06, "loss": 0.5105, "step": 193 }, { "epoch": 0.1385054735840076, "grad_norm": 0.3949018120765686, "learning_rate": 4.6190476190476196e-06, "loss": 0.5058, "step": 194 }, { "epoch": 0.13921941932413137, "grad_norm": 0.4204160273075104, "learning_rate": 4.642857142857144e-06, "loss": 0.5203, "step": 195 }, { "epoch": 0.1399333650642551, "grad_norm": 0.40288060903549194, "learning_rate": 4.666666666666667e-06, "loss": 0.5142, "step": 196 }, { "epoch": 0.14064731080437887, "grad_norm": 0.4147815704345703, "learning_rate": 4.6904761904761905e-06, "loss": 0.5357, "step": 197 }, { "epoch": 0.14136125654450263, "grad_norm": 0.4246997833251953, "learning_rate": 4.714285714285715e-06, "loss": 0.497, "step": 198 }, { "epoch": 0.14207520228462636, "grad_norm": 0.43200987577438354, "learning_rate": 4.738095238095238e-06, "loss": 0.4912, "step": 199 }, { "epoch": 0.14278914802475012, "grad_norm": 0.3852202296257019, "learning_rate": 4.761904761904762e-06, "loss": 0.5192, "step": 200 }, { "epoch": 0.14350309376487386, "grad_norm": 0.4455043375492096, "learning_rate": 4.785714285714287e-06, "loss": 0.5085, "step": 201 }, { "epoch": 0.14421703950499762, "grad_norm": 0.43952491879463196, "learning_rate": 4.80952380952381e-06, "loss": 0.5235, "step": 202 }, { "epoch": 0.14493098524512138, "grad_norm": 0.434721976518631, "learning_rate": 4.833333333333333e-06, "loss": 0.5282, "step": 203 }, { "epoch": 0.1456449309852451, "grad_norm": 0.4365811347961426, "learning_rate": 4.857142857142858e-06, "loss": 0.5219, "step": 204 }, { "epoch": 0.14635887672536887, "grad_norm": 0.44151750206947327, "learning_rate": 4.880952380952381e-06, "loss": 0.5409, "step": 205 }, { "epoch": 0.14707282246549264, "grad_norm": 0.43844419717788696, "learning_rate": 4.904761904761905e-06, "loss": 0.5563, "step": 206 }, { "epoch": 0.14778676820561637, "grad_norm": 0.4504668712615967, "learning_rate": 4.928571428571429e-06, "loss": 0.4964, "step": 207 }, { "epoch": 0.14850071394574013, "grad_norm": 0.4177759289741516, "learning_rate": 4.952380952380953e-06, "loss": 0.4992, "step": 208 }, { "epoch": 0.14921465968586387, "grad_norm": 0.4263685643672943, "learning_rate": 4.976190476190477e-06, "loss": 0.5233, "step": 209 }, { "epoch": 0.14992860542598763, "grad_norm": 0.465336412191391, "learning_rate": 5e-06, "loss": 0.5094, "step": 210 }, { "epoch": 0.1506425511661114, "grad_norm": 0.42533963918685913, "learning_rate": 5.023809523809524e-06, "loss": 0.5118, "step": 211 }, { "epoch": 0.15135649690623512, "grad_norm": 0.41367989778518677, "learning_rate": 5.047619047619048e-06, "loss": 0.4949, "step": 212 }, { "epoch": 0.15207044264635888, "grad_norm": 0.44342368841171265, "learning_rate": 5.071428571428571e-06, "loss": 0.5236, "step": 213 }, { "epoch": 0.15278438838648262, "grad_norm": 0.4493117928504944, "learning_rate": 5.095238095238096e-06, "loss": 0.5245, "step": 214 }, { "epoch": 0.15349833412660638, "grad_norm": 0.45809483528137207, "learning_rate": 5.119047619047619e-06, "loss": 0.5039, "step": 215 }, { "epoch": 0.15421227986673014, "grad_norm": 0.4311520457267761, "learning_rate": 5.142857142857142e-06, "loss": 0.5209, "step": 216 }, { "epoch": 0.15492622560685387, "grad_norm": 0.4178316593170166, "learning_rate": 5.1666666666666675e-06, "loss": 0.5004, "step": 217 }, { "epoch": 0.15564017134697763, "grad_norm": 0.4141157567501068, "learning_rate": 5.190476190476192e-06, "loss": 0.4986, "step": 218 }, { "epoch": 0.15635411708710137, "grad_norm": 0.43364667892456055, "learning_rate": 5.214285714285715e-06, "loss": 0.534, "step": 219 }, { "epoch": 0.15706806282722513, "grad_norm": 0.42537960410118103, "learning_rate": 5.2380952380952384e-06, "loss": 0.5056, "step": 220 }, { "epoch": 0.1577820085673489, "grad_norm": 0.40286320447921753, "learning_rate": 5.261904761904763e-06, "loss": 0.534, "step": 221 }, { "epoch": 0.15849595430747263, "grad_norm": 0.43042734265327454, "learning_rate": 5.285714285714286e-06, "loss": 0.4833, "step": 222 }, { "epoch": 0.1592099000475964, "grad_norm": 0.4420410394668579, "learning_rate": 5.30952380952381e-06, "loss": 0.499, "step": 223 }, { "epoch": 0.15992384578772012, "grad_norm": 0.5106959939002991, "learning_rate": 5.333333333333334e-06, "loss": 0.5623, "step": 224 }, { "epoch": 0.16063779152784388, "grad_norm": 0.39420950412750244, "learning_rate": 5.357142857142857e-06, "loss": 0.5168, "step": 225 }, { "epoch": 0.16135173726796764, "grad_norm": 0.44402945041656494, "learning_rate": 5.380952380952381e-06, "loss": 0.5084, "step": 226 }, { "epoch": 0.16206568300809138, "grad_norm": 0.4395456314086914, "learning_rate": 5.404761904761905e-06, "loss": 0.5156, "step": 227 }, { "epoch": 0.16277962874821514, "grad_norm": 0.4364016652107239, "learning_rate": 5.428571428571429e-06, "loss": 0.472, "step": 228 }, { "epoch": 0.1634935744883389, "grad_norm": 0.44030630588531494, "learning_rate": 5.452380952380952e-06, "loss": 0.5057, "step": 229 }, { "epoch": 0.16420752022846263, "grad_norm": 0.5160099864006042, "learning_rate": 5.476190476190477e-06, "loss": 0.4804, "step": 230 }, { "epoch": 0.1649214659685864, "grad_norm": 0.4410359263420105, "learning_rate": 5.500000000000001e-06, "loss": 0.4855, "step": 231 }, { "epoch": 0.16563541170871013, "grad_norm": 0.4528823792934418, "learning_rate": 5.523809523809525e-06, "loss": 0.4977, "step": 232 }, { "epoch": 0.1663493574488339, "grad_norm": 0.5081466436386108, "learning_rate": 5.547619047619048e-06, "loss": 0.523, "step": 233 }, { "epoch": 0.16706330318895765, "grad_norm": 0.4724820554256439, "learning_rate": 5.571428571428572e-06, "loss": 0.5248, "step": 234 }, { "epoch": 0.16777724892908139, "grad_norm": 0.4536125957965851, "learning_rate": 5.595238095238096e-06, "loss": 0.5188, "step": 235 }, { "epoch": 0.16849119466920515, "grad_norm": 0.5303017497062683, "learning_rate": 5.619047619047619e-06, "loss": 0.5429, "step": 236 }, { "epoch": 0.16920514040932888, "grad_norm": 0.5844816565513611, "learning_rate": 5.6428571428571435e-06, "loss": 0.5389, "step": 237 }, { "epoch": 0.16991908614945264, "grad_norm": 0.4750198423862457, "learning_rate": 5.666666666666667e-06, "loss": 0.5037, "step": 238 }, { "epoch": 0.1706330318895764, "grad_norm": 0.6005067825317383, "learning_rate": 5.690476190476191e-06, "loss": 0.5326, "step": 239 }, { "epoch": 0.17134697762970014, "grad_norm": 0.5071528553962708, "learning_rate": 5.7142857142857145e-06, "loss": 0.5183, "step": 240 }, { "epoch": 0.1720609233698239, "grad_norm": 0.5047202110290527, "learning_rate": 5.738095238095238e-06, "loss": 0.4998, "step": 241 }, { "epoch": 0.17277486910994763, "grad_norm": 0.4880475699901581, "learning_rate": 5.761904761904762e-06, "loss": 0.5165, "step": 242 }, { "epoch": 0.1734888148500714, "grad_norm": 0.4877784252166748, "learning_rate": 5.785714285714286e-06, "loss": 0.5104, "step": 243 }, { "epoch": 0.17420276059019515, "grad_norm": 0.4605950117111206, "learning_rate": 5.8095238095238106e-06, "loss": 0.5084, "step": 244 }, { "epoch": 0.1749167063303189, "grad_norm": 0.46828368306159973, "learning_rate": 5.833333333333334e-06, "loss": 0.5077, "step": 245 }, { "epoch": 0.17563065207044265, "grad_norm": 0.5146253705024719, "learning_rate": 5.857142857142858e-06, "loss": 0.4905, "step": 246 }, { "epoch": 0.17634459781056638, "grad_norm": 0.49939969182014465, "learning_rate": 5.8809523809523816e-06, "loss": 0.5121, "step": 247 }, { "epoch": 0.17705854355069015, "grad_norm": 0.4433838129043579, "learning_rate": 5.904761904761905e-06, "loss": 0.493, "step": 248 }, { "epoch": 0.1777724892908139, "grad_norm": 0.539391815662384, "learning_rate": 5.928571428571429e-06, "loss": 0.5068, "step": 249 }, { "epoch": 0.17848643503093764, "grad_norm": 0.485293447971344, "learning_rate": 5.9523809523809525e-06, "loss": 0.4798, "step": 250 }, { "epoch": 0.1792003807710614, "grad_norm": 0.4739006459712982, "learning_rate": 5.976190476190477e-06, "loss": 0.5115, "step": 251 }, { "epoch": 0.17991432651118516, "grad_norm": 0.48996254801750183, "learning_rate": 6e-06, "loss": 0.4955, "step": 252 }, { "epoch": 0.1806282722513089, "grad_norm": 0.44892409443855286, "learning_rate": 6.023809523809524e-06, "loss": 0.5111, "step": 253 }, { "epoch": 0.18134221799143266, "grad_norm": 0.49043112993240356, "learning_rate": 6.047619047619048e-06, "loss": 0.5378, "step": 254 }, { "epoch": 0.1820561637315564, "grad_norm": 0.4595518410205841, "learning_rate": 6.071428571428571e-06, "loss": 0.506, "step": 255 }, { "epoch": 0.18277010947168015, "grad_norm": 0.46342596411705017, "learning_rate": 6.095238095238096e-06, "loss": 0.5212, "step": 256 }, { "epoch": 0.18348405521180391, "grad_norm": 0.48837316036224365, "learning_rate": 6.11904761904762e-06, "loss": 0.5429, "step": 257 }, { "epoch": 0.18419800095192765, "grad_norm": 0.46363863348960876, "learning_rate": 6.142857142857144e-06, "loss": 0.5196, "step": 258 }, { "epoch": 0.1849119466920514, "grad_norm": 0.4528738558292389, "learning_rate": 6.166666666666667e-06, "loss": 0.4823, "step": 259 }, { "epoch": 0.18562589243217514, "grad_norm": 0.44764065742492676, "learning_rate": 6.1904761904761914e-06, "loss": 0.4711, "step": 260 }, { "epoch": 0.1863398381722989, "grad_norm": 0.4490417540073395, "learning_rate": 6.214285714285715e-06, "loss": 0.5073, "step": 261 }, { "epoch": 0.18705378391242267, "grad_norm": 0.43078622221946716, "learning_rate": 6.238095238095239e-06, "loss": 0.4916, "step": 262 }, { "epoch": 0.1877677296525464, "grad_norm": 0.4000595510005951, "learning_rate": 6.261904761904762e-06, "loss": 0.5113, "step": 263 }, { "epoch": 0.18848167539267016, "grad_norm": 0.42854049801826477, "learning_rate": 6.285714285714286e-06, "loss": 0.4561, "step": 264 }, { "epoch": 0.1891956211327939, "grad_norm": 0.47784942388534546, "learning_rate": 6.30952380952381e-06, "loss": 0.5311, "step": 265 }, { "epoch": 0.18990956687291766, "grad_norm": 0.4116208255290985, "learning_rate": 6.333333333333333e-06, "loss": 0.5347, "step": 266 }, { "epoch": 0.19062351261304142, "grad_norm": 0.4591863751411438, "learning_rate": 6.357142857142858e-06, "loss": 0.5099, "step": 267 }, { "epoch": 0.19133745835316515, "grad_norm": 0.39054515957832336, "learning_rate": 6.380952380952381e-06, "loss": 0.4876, "step": 268 }, { "epoch": 0.1920514040932889, "grad_norm": 0.4340115785598755, "learning_rate": 6.404761904761904e-06, "loss": 0.4843, "step": 269 }, { "epoch": 0.19276534983341265, "grad_norm": 0.4426612854003906, "learning_rate": 6.4285714285714295e-06, "loss": 0.5338, "step": 270 }, { "epoch": 0.1934792955735364, "grad_norm": 0.42602139711380005, "learning_rate": 6.452380952380954e-06, "loss": 0.5237, "step": 271 }, { "epoch": 0.19419324131366017, "grad_norm": 0.4232972264289856, "learning_rate": 6.476190476190477e-06, "loss": 0.5252, "step": 272 }, { "epoch": 0.1949071870537839, "grad_norm": 0.4852554202079773, "learning_rate": 6.5000000000000004e-06, "loss": 0.517, "step": 273 }, { "epoch": 0.19562113279390766, "grad_norm": 0.43604421615600586, "learning_rate": 6.523809523809525e-06, "loss": 0.4964, "step": 274 }, { "epoch": 0.19633507853403143, "grad_norm": 0.4551788568496704, "learning_rate": 6.547619047619048e-06, "loss": 0.5201, "step": 275 }, { "epoch": 0.19704902427415516, "grad_norm": 0.4512634575366974, "learning_rate": 6.571428571428572e-06, "loss": 0.488, "step": 276 }, { "epoch": 0.19776297001427892, "grad_norm": 0.46974584460258484, "learning_rate": 6.595238095238096e-06, "loss": 0.4814, "step": 277 }, { "epoch": 0.19847691575440266, "grad_norm": 0.4474380910396576, "learning_rate": 6.619047619047619e-06, "loss": 0.5111, "step": 278 }, { "epoch": 0.19919086149452642, "grad_norm": 0.4210040271282196, "learning_rate": 6.642857142857143e-06, "loss": 0.5117, "step": 279 }, { "epoch": 0.19990480723465018, "grad_norm": 0.46460166573524475, "learning_rate": 6.666666666666667e-06, "loss": 0.4986, "step": 280 }, { "epoch": 0.2006187529747739, "grad_norm": 0.4458734393119812, "learning_rate": 6.690476190476191e-06, "loss": 0.4804, "step": 281 }, { "epoch": 0.20133269871489767, "grad_norm": 0.49311551451683044, "learning_rate": 6.714285714285714e-06, "loss": 0.5338, "step": 282 }, { "epoch": 0.2020466444550214, "grad_norm": 0.40495166182518005, "learning_rate": 6.738095238095239e-06, "loss": 0.5168, "step": 283 }, { "epoch": 0.20276059019514517, "grad_norm": 0.4936951696872711, "learning_rate": 6.761904761904763e-06, "loss": 0.5414, "step": 284 }, { "epoch": 0.20347453593526893, "grad_norm": 0.40082433819770813, "learning_rate": 6.785714285714287e-06, "loss": 0.5074, "step": 285 }, { "epoch": 0.20418848167539266, "grad_norm": 0.4599921405315399, "learning_rate": 6.80952380952381e-06, "loss": 0.4903, "step": 286 }, { "epoch": 0.20490242741551642, "grad_norm": 0.41228535771369934, "learning_rate": 6.833333333333334e-06, "loss": 0.4837, "step": 287 }, { "epoch": 0.20561637315564016, "grad_norm": 0.4814947247505188, "learning_rate": 6.857142857142858e-06, "loss": 0.5089, "step": 288 }, { "epoch": 0.20633031889576392, "grad_norm": 0.4384603500366211, "learning_rate": 6.880952380952381e-06, "loss": 0.4972, "step": 289 }, { "epoch": 0.20704426463588768, "grad_norm": 0.4923464357852936, "learning_rate": 6.9047619047619055e-06, "loss": 0.4993, "step": 290 }, { "epoch": 0.20775821037601142, "grad_norm": 0.48079055547714233, "learning_rate": 6.928571428571429e-06, "loss": 0.5152, "step": 291 }, { "epoch": 0.20847215611613518, "grad_norm": 0.4485103189945221, "learning_rate": 6.952380952380952e-06, "loss": 0.4954, "step": 292 }, { "epoch": 0.20918610185625894, "grad_norm": 0.4864366352558136, "learning_rate": 6.9761904761904765e-06, "loss": 0.5077, "step": 293 }, { "epoch": 0.20990004759638267, "grad_norm": 0.4614764451980591, "learning_rate": 7e-06, "loss": 0.5063, "step": 294 }, { "epoch": 0.21061399333650643, "grad_norm": 0.5203495621681213, "learning_rate": 7.023809523809524e-06, "loss": 0.5017, "step": 295 }, { "epoch": 0.21132793907663017, "grad_norm": 0.4602290987968445, "learning_rate": 7.047619047619048e-06, "loss": 0.4894, "step": 296 }, { "epoch": 0.21204188481675393, "grad_norm": 0.465944766998291, "learning_rate": 7.0714285714285726e-06, "loss": 0.4774, "step": 297 }, { "epoch": 0.2127558305568777, "grad_norm": 0.48657429218292236, "learning_rate": 7.095238095238096e-06, "loss": 0.4867, "step": 298 }, { "epoch": 0.21346977629700142, "grad_norm": 0.4559839069843292, "learning_rate": 7.11904761904762e-06, "loss": 0.4853, "step": 299 }, { "epoch": 0.21418372203712518, "grad_norm": 0.48258113861083984, "learning_rate": 7.1428571428571436e-06, "loss": 0.4714, "step": 300 }, { "epoch": 0.21489766777724892, "grad_norm": 0.5259243845939636, "learning_rate": 7.166666666666667e-06, "loss": 0.4899, "step": 301 }, { "epoch": 0.21561161351737268, "grad_norm": 0.46037453413009644, "learning_rate": 7.190476190476191e-06, "loss": 0.523, "step": 302 }, { "epoch": 0.21632555925749644, "grad_norm": 0.5605643391609192, "learning_rate": 7.2142857142857145e-06, "loss": 0.4794, "step": 303 }, { "epoch": 0.21703950499762018, "grad_norm": 0.48208001255989075, "learning_rate": 7.238095238095239e-06, "loss": 0.515, "step": 304 }, { "epoch": 0.21775345073774394, "grad_norm": 0.5135701894760132, "learning_rate": 7.261904761904762e-06, "loss": 0.5084, "step": 305 }, { "epoch": 0.21846739647786767, "grad_norm": 0.5240781903266907, "learning_rate": 7.285714285714286e-06, "loss": 0.5062, "step": 306 }, { "epoch": 0.21918134221799143, "grad_norm": 0.5261046886444092, "learning_rate": 7.30952380952381e-06, "loss": 0.4895, "step": 307 }, { "epoch": 0.2198952879581152, "grad_norm": 0.4766820967197418, "learning_rate": 7.333333333333333e-06, "loss": 0.4883, "step": 308 }, { "epoch": 0.22060923369823893, "grad_norm": 0.540033221244812, "learning_rate": 7.357142857142858e-06, "loss": 0.4829, "step": 309 }, { "epoch": 0.2213231794383627, "grad_norm": 0.46773627400398254, "learning_rate": 7.380952380952382e-06, "loss": 0.4844, "step": 310 }, { "epoch": 0.22203712517848642, "grad_norm": 0.5059742331504822, "learning_rate": 7.404761904761906e-06, "loss": 0.5008, "step": 311 }, { "epoch": 0.22275107091861018, "grad_norm": 0.437052845954895, "learning_rate": 7.428571428571429e-06, "loss": 0.4681, "step": 312 }, { "epoch": 0.22346501665873394, "grad_norm": 0.5561332106590271, "learning_rate": 7.4523809523809534e-06, "loss": 0.533, "step": 313 }, { "epoch": 0.22417896239885768, "grad_norm": 0.4422554671764374, "learning_rate": 7.476190476190477e-06, "loss": 0.4847, "step": 314 }, { "epoch": 0.22489290813898144, "grad_norm": 0.535584568977356, "learning_rate": 7.500000000000001e-06, "loss": 0.5079, "step": 315 }, { "epoch": 0.2256068538791052, "grad_norm": 0.4610931873321533, "learning_rate": 7.523809523809524e-06, "loss": 0.4749, "step": 316 }, { "epoch": 0.22632079961922894, "grad_norm": 0.49137550592422485, "learning_rate": 7.547619047619048e-06, "loss": 0.4745, "step": 317 }, { "epoch": 0.2270347453593527, "grad_norm": 0.5180812478065491, "learning_rate": 7.571428571428572e-06, "loss": 0.5297, "step": 318 }, { "epoch": 0.22774869109947643, "grad_norm": 0.4868127703666687, "learning_rate": 7.595238095238095e-06, "loss": 0.4872, "step": 319 }, { "epoch": 0.2284626368396002, "grad_norm": 0.5120978355407715, "learning_rate": 7.61904761904762e-06, "loss": 0.4785, "step": 320 }, { "epoch": 0.22917658257972395, "grad_norm": 0.5709794759750366, "learning_rate": 7.642857142857143e-06, "loss": 0.5288, "step": 321 }, { "epoch": 0.2298905283198477, "grad_norm": 0.5280117988586426, "learning_rate": 7.666666666666667e-06, "loss": 0.4665, "step": 322 }, { "epoch": 0.23060447405997145, "grad_norm": 0.4743697941303253, "learning_rate": 7.690476190476191e-06, "loss": 0.489, "step": 323 }, { "epoch": 0.23131841980009518, "grad_norm": 0.6074149012565613, "learning_rate": 7.714285714285716e-06, "loss": 0.4954, "step": 324 }, { "epoch": 0.23203236554021894, "grad_norm": 0.45898741483688354, "learning_rate": 7.738095238095238e-06, "loss": 0.4984, "step": 325 }, { "epoch": 0.2327463112803427, "grad_norm": 0.5202960968017578, "learning_rate": 7.761904761904762e-06, "loss": 0.5072, "step": 326 }, { "epoch": 0.23346025702046644, "grad_norm": 0.5676241517066956, "learning_rate": 7.785714285714287e-06, "loss": 0.5383, "step": 327 }, { "epoch": 0.2341742027605902, "grad_norm": 0.4074823558330536, "learning_rate": 7.809523809523811e-06, "loss": 0.4632, "step": 328 }, { "epoch": 0.23488814850071393, "grad_norm": 0.4754447638988495, "learning_rate": 7.833333333333333e-06, "loss": 0.4774, "step": 329 }, { "epoch": 0.2356020942408377, "grad_norm": 0.46306535601615906, "learning_rate": 7.857142857142858e-06, "loss": 0.492, "step": 330 }, { "epoch": 0.23631603998096146, "grad_norm": 0.4984784722328186, "learning_rate": 7.880952380952382e-06, "loss": 0.4729, "step": 331 }, { "epoch": 0.2370299857210852, "grad_norm": 0.5318698883056641, "learning_rate": 7.904761904761904e-06, "loss": 0.5061, "step": 332 }, { "epoch": 0.23774393146120895, "grad_norm": 0.5411748290061951, "learning_rate": 7.928571428571429e-06, "loss": 0.4729, "step": 333 }, { "epoch": 0.23845787720133269, "grad_norm": 0.560531735420227, "learning_rate": 7.952380952380953e-06, "loss": 0.5121, "step": 334 }, { "epoch": 0.23917182294145645, "grad_norm": 0.3968646228313446, "learning_rate": 7.976190476190477e-06, "loss": 0.4945, "step": 335 }, { "epoch": 0.2398857686815802, "grad_norm": 0.5918635725975037, "learning_rate": 8.000000000000001e-06, "loss": 0.466, "step": 336 }, { "epoch": 0.24059971442170394, "grad_norm": 0.5013028383255005, "learning_rate": 8.023809523809526e-06, "loss": 0.4741, "step": 337 }, { "epoch": 0.2413136601618277, "grad_norm": 0.5348782539367676, "learning_rate": 8.047619047619048e-06, "loss": 0.5237, "step": 338 }, { "epoch": 0.24202760590195146, "grad_norm": 0.45027807354927063, "learning_rate": 8.071428571428572e-06, "loss": 0.4877, "step": 339 }, { "epoch": 0.2427415516420752, "grad_norm": 0.45286816358566284, "learning_rate": 8.095238095238097e-06, "loss": 0.5079, "step": 340 }, { "epoch": 0.24345549738219896, "grad_norm": 0.4660584628582001, "learning_rate": 8.119047619047619e-06, "loss": 0.487, "step": 341 }, { "epoch": 0.2441694431223227, "grad_norm": 0.46187111735343933, "learning_rate": 8.142857142857143e-06, "loss": 0.4701, "step": 342 }, { "epoch": 0.24488338886244645, "grad_norm": 0.45501235127449036, "learning_rate": 8.166666666666668e-06, "loss": 0.4666, "step": 343 }, { "epoch": 0.24559733460257022, "grad_norm": 0.46281898021698, "learning_rate": 8.190476190476192e-06, "loss": 0.459, "step": 344 }, { "epoch": 0.24631128034269395, "grad_norm": 0.520489513874054, "learning_rate": 8.214285714285714e-06, "loss": 0.4668, "step": 345 }, { "epoch": 0.2470252260828177, "grad_norm": 0.4572030305862427, "learning_rate": 8.238095238095239e-06, "loss": 0.4968, "step": 346 }, { "epoch": 0.24773917182294145, "grad_norm": 0.4570038914680481, "learning_rate": 8.261904761904763e-06, "loss": 0.4703, "step": 347 }, { "epoch": 0.2484531175630652, "grad_norm": 0.5626035928726196, "learning_rate": 8.285714285714287e-06, "loss": 0.4987, "step": 348 }, { "epoch": 0.24916706330318897, "grad_norm": 0.5220304727554321, "learning_rate": 8.309523809523811e-06, "loss": 0.4998, "step": 349 }, { "epoch": 0.2498810090433127, "grad_norm": 0.5331788659095764, "learning_rate": 8.333333333333334e-06, "loss": 0.4921, "step": 350 }, { "epoch": 0.25059495478343646, "grad_norm": 0.5733940601348877, "learning_rate": 8.357142857142858e-06, "loss": 0.4919, "step": 351 }, { "epoch": 0.2513089005235602, "grad_norm": 0.4944547414779663, "learning_rate": 8.380952380952382e-06, "loss": 0.4985, "step": 352 }, { "epoch": 0.252022846263684, "grad_norm": 0.6107597351074219, "learning_rate": 8.404761904761905e-06, "loss": 0.5079, "step": 353 }, { "epoch": 0.2527367920038077, "grad_norm": 0.4302752614021301, "learning_rate": 8.428571428571429e-06, "loss": 0.4737, "step": 354 }, { "epoch": 0.25345073774393145, "grad_norm": 0.5126711130142212, "learning_rate": 8.452380952380953e-06, "loss": 0.4892, "step": 355 }, { "epoch": 0.2541646834840552, "grad_norm": 0.4725133180618286, "learning_rate": 8.476190476190477e-06, "loss": 0.4948, "step": 356 }, { "epoch": 0.254878629224179, "grad_norm": 0.4334884583950043, "learning_rate": 8.5e-06, "loss": 0.4975, "step": 357 }, { "epoch": 0.2555925749643027, "grad_norm": 0.4992075562477112, "learning_rate": 8.523809523809524e-06, "loss": 0.476, "step": 358 }, { "epoch": 0.25630652070442644, "grad_norm": 0.43098992109298706, "learning_rate": 8.547619047619048e-06, "loss": 0.4993, "step": 359 }, { "epoch": 0.25702046644455023, "grad_norm": 0.4246855676174164, "learning_rate": 8.571428571428571e-06, "loss": 0.5008, "step": 360 }, { "epoch": 0.25773441218467397, "grad_norm": 0.49387508630752563, "learning_rate": 8.595238095238097e-06, "loss": 0.5034, "step": 361 }, { "epoch": 0.2584483579247977, "grad_norm": 0.4718877375125885, "learning_rate": 8.61904761904762e-06, "loss": 0.4741, "step": 362 }, { "epoch": 0.2591623036649215, "grad_norm": 0.4787861108779907, "learning_rate": 8.642857142857144e-06, "loss": 0.4919, "step": 363 }, { "epoch": 0.2598762494050452, "grad_norm": 0.4394989013671875, "learning_rate": 8.666666666666668e-06, "loss": 0.5007, "step": 364 }, { "epoch": 0.26059019514516896, "grad_norm": 0.4728846848011017, "learning_rate": 8.690476190476192e-06, "loss": 0.4907, "step": 365 }, { "epoch": 0.2613041408852927, "grad_norm": 0.42583903670310974, "learning_rate": 8.714285714285715e-06, "loss": 0.4848, "step": 366 }, { "epoch": 0.2620180866254165, "grad_norm": 0.43035927414894104, "learning_rate": 8.738095238095239e-06, "loss": 0.4562, "step": 367 }, { "epoch": 0.2627320323655402, "grad_norm": 0.5316163897514343, "learning_rate": 8.761904761904763e-06, "loss": 0.5105, "step": 368 }, { "epoch": 0.26344597810566395, "grad_norm": 0.4800125062465668, "learning_rate": 8.785714285714286e-06, "loss": 0.4922, "step": 369 }, { "epoch": 0.26415992384578774, "grad_norm": 0.5188619494438171, "learning_rate": 8.80952380952381e-06, "loss": 0.4906, "step": 370 }, { "epoch": 0.26487386958591147, "grad_norm": 0.5674293041229248, "learning_rate": 8.833333333333334e-06, "loss": 0.4945, "step": 371 }, { "epoch": 0.2655878153260352, "grad_norm": 0.5527657270431519, "learning_rate": 8.857142857142858e-06, "loss": 0.4862, "step": 372 }, { "epoch": 0.266301761066159, "grad_norm": 0.476783812046051, "learning_rate": 8.88095238095238e-06, "loss": 0.4987, "step": 373 }, { "epoch": 0.2670157068062827, "grad_norm": 0.5499778389930725, "learning_rate": 8.904761904761905e-06, "loss": 0.5124, "step": 374 }, { "epoch": 0.26772965254640646, "grad_norm": 0.4940359592437744, "learning_rate": 8.92857142857143e-06, "loss": 0.5121, "step": 375 }, { "epoch": 0.26844359828653025, "grad_norm": 0.5119591355323792, "learning_rate": 8.952380952380953e-06, "loss": 0.4762, "step": 376 }, { "epoch": 0.269157544026654, "grad_norm": 0.48889845609664917, "learning_rate": 8.976190476190478e-06, "loss": 0.5153, "step": 377 }, { "epoch": 0.2698714897667777, "grad_norm": 0.5048221349716187, "learning_rate": 9e-06, "loss": 0.509, "step": 378 }, { "epoch": 0.27058543550690145, "grad_norm": 0.46567416191101074, "learning_rate": 9.023809523809524e-06, "loss": 0.4687, "step": 379 }, { "epoch": 0.27129938124702524, "grad_norm": 0.5101912617683411, "learning_rate": 9.047619047619049e-06, "loss": 0.4961, "step": 380 }, { "epoch": 0.272013326987149, "grad_norm": 0.5018596053123474, "learning_rate": 9.071428571428573e-06, "loss": 0.4796, "step": 381 }, { "epoch": 0.2727272727272727, "grad_norm": 0.4413711130619049, "learning_rate": 9.095238095238095e-06, "loss": 0.5223, "step": 382 }, { "epoch": 0.2734412184673965, "grad_norm": 0.4603331983089447, "learning_rate": 9.11904761904762e-06, "loss": 0.4819, "step": 383 }, { "epoch": 0.27415516420752023, "grad_norm": 0.40550267696380615, "learning_rate": 9.142857142857144e-06, "loss": 0.487, "step": 384 }, { "epoch": 0.27486910994764396, "grad_norm": 0.47390803694725037, "learning_rate": 9.166666666666666e-06, "loss": 0.4759, "step": 385 }, { "epoch": 0.27558305568776775, "grad_norm": 0.49294808506965637, "learning_rate": 9.19047619047619e-06, "loss": 0.4817, "step": 386 }, { "epoch": 0.2762970014278915, "grad_norm": 0.4294382333755493, "learning_rate": 9.214285714285715e-06, "loss": 0.4914, "step": 387 }, { "epoch": 0.2770109471680152, "grad_norm": 0.600999116897583, "learning_rate": 9.238095238095239e-06, "loss": 0.5041, "step": 388 }, { "epoch": 0.27772489290813895, "grad_norm": 0.49100160598754883, "learning_rate": 9.261904761904763e-06, "loss": 0.4808, "step": 389 }, { "epoch": 0.27843883864826274, "grad_norm": 0.5513970851898193, "learning_rate": 9.285714285714288e-06, "loss": 0.4813, "step": 390 }, { "epoch": 0.2791527843883865, "grad_norm": 0.5193294286727905, "learning_rate": 9.30952380952381e-06, "loss": 0.4932, "step": 391 }, { "epoch": 0.2798667301285102, "grad_norm": 0.470844566822052, "learning_rate": 9.333333333333334e-06, "loss": 0.483, "step": 392 }, { "epoch": 0.280580675868634, "grad_norm": 0.4948735237121582, "learning_rate": 9.357142857142859e-06, "loss": 0.4849, "step": 393 }, { "epoch": 0.28129462160875773, "grad_norm": 0.5293798446655273, "learning_rate": 9.380952380952381e-06, "loss": 0.4973, "step": 394 }, { "epoch": 0.28200856734888147, "grad_norm": 0.4749852120876312, "learning_rate": 9.404761904761905e-06, "loss": 0.4867, "step": 395 }, { "epoch": 0.28272251308900526, "grad_norm": 0.528459906578064, "learning_rate": 9.42857142857143e-06, "loss": 0.4984, "step": 396 }, { "epoch": 0.283436458829129, "grad_norm": 0.5348254442214966, "learning_rate": 9.452380952380952e-06, "loss": 0.5014, "step": 397 }, { "epoch": 0.2841504045692527, "grad_norm": 0.528060793876648, "learning_rate": 9.476190476190476e-06, "loss": 0.4519, "step": 398 }, { "epoch": 0.2848643503093765, "grad_norm": 0.48265206813812256, "learning_rate": 9.5e-06, "loss": 0.4697, "step": 399 }, { "epoch": 0.28557829604950025, "grad_norm": 0.5073575973510742, "learning_rate": 9.523809523809525e-06, "loss": 0.4727, "step": 400 }, { "epoch": 0.286292241789624, "grad_norm": 0.4650314450263977, "learning_rate": 9.547619047619049e-06, "loss": 0.4896, "step": 401 }, { "epoch": 0.2870061875297477, "grad_norm": 0.4480634927749634, "learning_rate": 9.571428571428573e-06, "loss": 0.477, "step": 402 }, { "epoch": 0.2877201332698715, "grad_norm": 0.5474111437797546, "learning_rate": 9.595238095238096e-06, "loss": 0.5055, "step": 403 }, { "epoch": 0.28843407900999524, "grad_norm": 0.4966522455215454, "learning_rate": 9.61904761904762e-06, "loss": 0.4856, "step": 404 }, { "epoch": 0.28914802475011897, "grad_norm": 0.5535951256752014, "learning_rate": 9.642857142857144e-06, "loss": 0.5159, "step": 405 }, { "epoch": 0.28986197049024276, "grad_norm": 0.5100200176239014, "learning_rate": 9.666666666666667e-06, "loss": 0.489, "step": 406 }, { "epoch": 0.2905759162303665, "grad_norm": 0.5043646693229675, "learning_rate": 9.690476190476191e-06, "loss": 0.506, "step": 407 }, { "epoch": 0.2912898619704902, "grad_norm": 0.39624398946762085, "learning_rate": 9.714285714285715e-06, "loss": 0.4692, "step": 408 }, { "epoch": 0.292003807710614, "grad_norm": 0.4201776683330536, "learning_rate": 9.73809523809524e-06, "loss": 0.4696, "step": 409 }, { "epoch": 0.29271775345073775, "grad_norm": 0.4276765286922455, "learning_rate": 9.761904761904762e-06, "loss": 0.4699, "step": 410 }, { "epoch": 0.2934316991908615, "grad_norm": 0.47397249937057495, "learning_rate": 9.785714285714286e-06, "loss": 0.483, "step": 411 }, { "epoch": 0.2941456449309853, "grad_norm": 0.4943482279777527, "learning_rate": 9.80952380952381e-06, "loss": 0.5008, "step": 412 }, { "epoch": 0.294859590671109, "grad_norm": 0.46299171447753906, "learning_rate": 9.833333333333333e-06, "loss": 0.4854, "step": 413 }, { "epoch": 0.29557353641123274, "grad_norm": 0.45781195163726807, "learning_rate": 9.857142857142859e-06, "loss": 0.491, "step": 414 }, { "epoch": 0.2962874821513565, "grad_norm": 0.4925564229488373, "learning_rate": 9.880952380952381e-06, "loss": 0.4949, "step": 415 }, { "epoch": 0.29700142789148026, "grad_norm": 0.4176582396030426, "learning_rate": 9.904761904761906e-06, "loss": 0.4688, "step": 416 }, { "epoch": 0.297715373631604, "grad_norm": 0.43885573744773865, "learning_rate": 9.92857142857143e-06, "loss": 0.4663, "step": 417 }, { "epoch": 0.29842931937172773, "grad_norm": 0.449413001537323, "learning_rate": 9.952380952380954e-06, "loss": 0.472, "step": 418 }, { "epoch": 0.2991432651118515, "grad_norm": 0.4984172284603119, "learning_rate": 9.976190476190477e-06, "loss": 0.4881, "step": 419 }, { "epoch": 0.29985721085197525, "grad_norm": 0.4509439170360565, "learning_rate": 1e-05, "loss": 0.4986, "step": 420 }, { "epoch": 0.300571156592099, "grad_norm": 0.5232335329055786, "learning_rate": 9.999998273143979e-06, "loss": 0.5095, "step": 421 }, { "epoch": 0.3012851023322228, "grad_norm": 0.42692241072654724, "learning_rate": 9.999993092577106e-06, "loss": 0.4844, "step": 422 }, { "epoch": 0.3019990480723465, "grad_norm": 0.5039780139923096, "learning_rate": 9.99998445830296e-06, "loss": 0.5075, "step": 423 }, { "epoch": 0.30271299381247024, "grad_norm": 0.49364426732063293, "learning_rate": 9.999972370327508e-06, "loss": 0.4788, "step": 424 }, { "epoch": 0.303426939552594, "grad_norm": 0.5859333276748657, "learning_rate": 9.999956828659095e-06, "loss": 0.4844, "step": 425 }, { "epoch": 0.30414088529271777, "grad_norm": 0.4995706081390381, "learning_rate": 9.999937833308459e-06, "loss": 0.4683, "step": 426 }, { "epoch": 0.3048548310328415, "grad_norm": 0.44499126076698303, "learning_rate": 9.999915384288723e-06, "loss": 0.4912, "step": 427 }, { "epoch": 0.30556877677296523, "grad_norm": 0.5795122385025024, "learning_rate": 9.999889481615387e-06, "loss": 0.4826, "step": 428 }, { "epoch": 0.306282722513089, "grad_norm": 0.507599413394928, "learning_rate": 9.99986012530635e-06, "loss": 0.4635, "step": 429 }, { "epoch": 0.30699666825321276, "grad_norm": 0.4873568117618561, "learning_rate": 9.999827315381885e-06, "loss": 0.5029, "step": 430 }, { "epoch": 0.3077106139933365, "grad_norm": 0.5197048783302307, "learning_rate": 9.999791051864658e-06, "loss": 0.498, "step": 431 }, { "epoch": 0.3084245597334603, "grad_norm": 0.46478942036628723, "learning_rate": 9.999751334779716e-06, "loss": 0.4896, "step": 432 }, { "epoch": 0.309138505473584, "grad_norm": 0.4881684184074402, "learning_rate": 9.999708164154494e-06, "loss": 0.4995, "step": 433 }, { "epoch": 0.30985245121370775, "grad_norm": 0.5006881952285767, "learning_rate": 9.999661540018812e-06, "loss": 0.485, "step": 434 }, { "epoch": 0.31056639695383154, "grad_norm": 0.5424726605415344, "learning_rate": 9.999611462404874e-06, "loss": 0.4878, "step": 435 }, { "epoch": 0.31128034269395527, "grad_norm": 0.4550643861293793, "learning_rate": 9.999557931347273e-06, "loss": 0.5103, "step": 436 }, { "epoch": 0.311994288434079, "grad_norm": 0.5511481761932373, "learning_rate": 9.999500946882985e-06, "loss": 0.4853, "step": 437 }, { "epoch": 0.31270823417420274, "grad_norm": 0.422407865524292, "learning_rate": 9.999440509051367e-06, "loss": 0.5011, "step": 438 }, { "epoch": 0.3134221799143265, "grad_norm": 0.5483337640762329, "learning_rate": 9.999376617894173e-06, "loss": 0.4689, "step": 439 }, { "epoch": 0.31413612565445026, "grad_norm": 0.4893694221973419, "learning_rate": 9.99930927345553e-06, "loss": 0.4882, "step": 440 }, { "epoch": 0.314850071394574, "grad_norm": 0.5444978475570679, "learning_rate": 9.999238475781957e-06, "loss": 0.4626, "step": 441 }, { "epoch": 0.3155640171346978, "grad_norm": 0.6000845432281494, "learning_rate": 9.999164224922358e-06, "loss": 0.4612, "step": 442 }, { "epoch": 0.3162779628748215, "grad_norm": 0.42021629214286804, "learning_rate": 9.999086520928021e-06, "loss": 0.4634, "step": 443 }, { "epoch": 0.31699190861494525, "grad_norm": 0.5883877873420715, "learning_rate": 9.999005363852619e-06, "loss": 0.4989, "step": 444 }, { "epoch": 0.31770585435506904, "grad_norm": 0.4713692367076874, "learning_rate": 9.99892075375221e-06, "loss": 0.4624, "step": 445 }, { "epoch": 0.3184198000951928, "grad_norm": 0.47248777747154236, "learning_rate": 9.998832690685238e-06, "loss": 0.4684, "step": 446 }, { "epoch": 0.3191337458353165, "grad_norm": 0.5747678279876709, "learning_rate": 9.998741174712534e-06, "loss": 0.5149, "step": 447 }, { "epoch": 0.31984769157544024, "grad_norm": 0.541374921798706, "learning_rate": 9.99864620589731e-06, "loss": 0.4643, "step": 448 }, { "epoch": 0.32056163731556403, "grad_norm": 0.5375084280967712, "learning_rate": 9.998547784305163e-06, "loss": 0.4766, "step": 449 }, { "epoch": 0.32127558305568776, "grad_norm": 0.47498375177383423, "learning_rate": 9.998445910004082e-06, "loss": 0.4853, "step": 450 }, { "epoch": 0.3219895287958115, "grad_norm": 0.5587392449378967, "learning_rate": 9.998340583064431e-06, "loss": 0.4706, "step": 451 }, { "epoch": 0.3227034745359353, "grad_norm": 0.444881796836853, "learning_rate": 9.99823180355897e-06, "loss": 0.4741, "step": 452 }, { "epoch": 0.323417420276059, "grad_norm": 0.4607422947883606, "learning_rate": 9.99811957156283e-06, "loss": 0.4744, "step": 453 }, { "epoch": 0.32413136601618275, "grad_norm": 0.49619805812835693, "learning_rate": 9.998003887153538e-06, "loss": 0.4973, "step": 454 }, { "epoch": 0.32484531175630654, "grad_norm": 0.5151596665382385, "learning_rate": 9.997884750411004e-06, "loss": 0.4754, "step": 455 }, { "epoch": 0.3255592574964303, "grad_norm": 0.4870684742927551, "learning_rate": 9.997762161417517e-06, "loss": 0.4856, "step": 456 }, { "epoch": 0.326273203236554, "grad_norm": 0.456604927778244, "learning_rate": 9.997636120257758e-06, "loss": 0.49, "step": 457 }, { "epoch": 0.3269871489766778, "grad_norm": 0.5555675029754639, "learning_rate": 9.997506627018787e-06, "loss": 0.4832, "step": 458 }, { "epoch": 0.32770109471680153, "grad_norm": 0.4799937903881073, "learning_rate": 9.99737368179005e-06, "loss": 0.4986, "step": 459 }, { "epoch": 0.32841504045692527, "grad_norm": 0.47113171219825745, "learning_rate": 9.99723728466338e-06, "loss": 0.4708, "step": 460 }, { "epoch": 0.329128986197049, "grad_norm": 0.5660278797149658, "learning_rate": 9.997097435732989e-06, "loss": 0.5039, "step": 461 }, { "epoch": 0.3298429319371728, "grad_norm": 0.44431230425834656, "learning_rate": 9.99695413509548e-06, "loss": 0.5019, "step": 462 }, { "epoch": 0.3305568776772965, "grad_norm": 0.4687296450138092, "learning_rate": 9.996807382849835e-06, "loss": 0.4823, "step": 463 }, { "epoch": 0.33127082341742026, "grad_norm": 0.5167220234870911, "learning_rate": 9.996657179097422e-06, "loss": 0.4807, "step": 464 }, { "epoch": 0.33198476915754405, "grad_norm": 0.4486069977283478, "learning_rate": 9.996503523941994e-06, "loss": 0.468, "step": 465 }, { "epoch": 0.3326987148976678, "grad_norm": 0.5133646130561829, "learning_rate": 9.996346417489685e-06, "loss": 0.4933, "step": 466 }, { "epoch": 0.3334126606377915, "grad_norm": 0.5519976019859314, "learning_rate": 9.99618585984902e-06, "loss": 0.4614, "step": 467 }, { "epoch": 0.3341266063779153, "grad_norm": 0.4191671311855316, "learning_rate": 9.996021851130897e-06, "loss": 0.508, "step": 468 }, { "epoch": 0.33484055211803904, "grad_norm": 0.4850325882434845, "learning_rate": 9.995854391448607e-06, "loss": 0.4807, "step": 469 }, { "epoch": 0.33555449785816277, "grad_norm": 0.48098069429397583, "learning_rate": 9.995683480917821e-06, "loss": 0.4753, "step": 470 }, { "epoch": 0.3362684435982865, "grad_norm": 0.44790735840797424, "learning_rate": 9.995509119656595e-06, "loss": 0.4542, "step": 471 }, { "epoch": 0.3369823893384103, "grad_norm": 0.5097856521606445, "learning_rate": 9.995331307785365e-06, "loss": 0.5124, "step": 472 }, { "epoch": 0.337696335078534, "grad_norm": 0.46077197790145874, "learning_rate": 9.995150045426958e-06, "loss": 0.5075, "step": 473 }, { "epoch": 0.33841028081865776, "grad_norm": 0.44511741399765015, "learning_rate": 9.994965332706574e-06, "loss": 0.4753, "step": 474 }, { "epoch": 0.33912422655878155, "grad_norm": 0.42632752656936646, "learning_rate": 9.994777169751806e-06, "loss": 0.4921, "step": 475 }, { "epoch": 0.3398381722989053, "grad_norm": 0.4286082088947296, "learning_rate": 9.994585556692624e-06, "loss": 0.4769, "step": 476 }, { "epoch": 0.340552118039029, "grad_norm": 0.4093618094921112, "learning_rate": 9.994390493661384e-06, "loss": 0.4688, "step": 477 }, { "epoch": 0.3412660637791528, "grad_norm": 0.47100022435188293, "learning_rate": 9.994191980792823e-06, "loss": 0.4831, "step": 478 }, { "epoch": 0.34198000951927654, "grad_norm": 0.3992394208908081, "learning_rate": 9.993990018224065e-06, "loss": 0.4879, "step": 479 }, { "epoch": 0.3426939552594003, "grad_norm": 0.40426766872406006, "learning_rate": 9.993784606094612e-06, "loss": 0.4946, "step": 480 }, { "epoch": 0.34340790099952406, "grad_norm": 0.46871325373649597, "learning_rate": 9.993575744546354e-06, "loss": 0.4977, "step": 481 }, { "epoch": 0.3441218467396478, "grad_norm": 0.4214226007461548, "learning_rate": 9.993363433723555e-06, "loss": 0.4611, "step": 482 }, { "epoch": 0.34483579247977153, "grad_norm": 0.4775031805038452, "learning_rate": 9.993147673772869e-06, "loss": 0.4953, "step": 483 }, { "epoch": 0.34554973821989526, "grad_norm": 0.43553927540779114, "learning_rate": 9.992928464843335e-06, "loss": 0.4857, "step": 484 }, { "epoch": 0.34626368396001905, "grad_norm": 0.4599379599094391, "learning_rate": 9.992705807086364e-06, "loss": 0.503, "step": 485 }, { "epoch": 0.3469776297001428, "grad_norm": 0.4651959240436554, "learning_rate": 9.99247970065576e-06, "loss": 0.4861, "step": 486 }, { "epoch": 0.3476915754402665, "grad_norm": 0.4255892336368561, "learning_rate": 9.992250145707701e-06, "loss": 0.4991, "step": 487 }, { "epoch": 0.3484055211803903, "grad_norm": 0.40496301651000977, "learning_rate": 9.99201714240075e-06, "loss": 0.4721, "step": 488 }, { "epoch": 0.34911946692051404, "grad_norm": 0.40841564536094666, "learning_rate": 9.991780690895856e-06, "loss": 0.4704, "step": 489 }, { "epoch": 0.3498334126606378, "grad_norm": 0.41158539056777954, "learning_rate": 9.991540791356342e-06, "loss": 0.4886, "step": 490 }, { "epoch": 0.35054735840076157, "grad_norm": 0.3852746784687042, "learning_rate": 9.991297443947918e-06, "loss": 0.4926, "step": 491 }, { "epoch": 0.3512613041408853, "grad_norm": 0.4358777105808258, "learning_rate": 9.991050648838676e-06, "loss": 0.4555, "step": 492 }, { "epoch": 0.35197524988100903, "grad_norm": 0.41972124576568604, "learning_rate": 9.990800406199088e-06, "loss": 0.4622, "step": 493 }, { "epoch": 0.35268919562113277, "grad_norm": 0.467920184135437, "learning_rate": 9.990546716202003e-06, "loss": 0.491, "step": 494 }, { "epoch": 0.35340314136125656, "grad_norm": 0.48706600069999695, "learning_rate": 9.990289579022661e-06, "loss": 0.4979, "step": 495 }, { "epoch": 0.3541170871013803, "grad_norm": 0.47243496775627136, "learning_rate": 9.990028994838673e-06, "loss": 0.4725, "step": 496 }, { "epoch": 0.354831032841504, "grad_norm": 0.44609078764915466, "learning_rate": 9.989764963830038e-06, "loss": 0.4585, "step": 497 }, { "epoch": 0.3555449785816278, "grad_norm": 0.42003902792930603, "learning_rate": 9.989497486179132e-06, "loss": 0.4748, "step": 498 }, { "epoch": 0.35625892432175155, "grad_norm": 0.45294129848480225, "learning_rate": 9.989226562070715e-06, "loss": 0.479, "step": 499 }, { "epoch": 0.3569728700618753, "grad_norm": 0.5105749368667603, "learning_rate": 9.988952191691925e-06, "loss": 0.488, "step": 500 }, { "epoch": 0.35768681580199907, "grad_norm": 0.4143266975879669, "learning_rate": 9.98867437523228e-06, "loss": 0.465, "step": 501 }, { "epoch": 0.3584007615421228, "grad_norm": 0.45589834451675415, "learning_rate": 9.988393112883683e-06, "loss": 0.5281, "step": 502 }, { "epoch": 0.35911470728224654, "grad_norm": 0.5068339705467224, "learning_rate": 9.98810840484041e-06, "loss": 0.4712, "step": 503 }, { "epoch": 0.3598286530223703, "grad_norm": 0.5150161385536194, "learning_rate": 9.987820251299121e-06, "loss": 0.4831, "step": 504 }, { "epoch": 0.36054259876249406, "grad_norm": 0.4378601610660553, "learning_rate": 9.98752865245886e-06, "loss": 0.5046, "step": 505 }, { "epoch": 0.3612565445026178, "grad_norm": 0.41716739535331726, "learning_rate": 9.987233608521043e-06, "loss": 0.4555, "step": 506 }, { "epoch": 0.3619704902427415, "grad_norm": 0.4565330743789673, "learning_rate": 9.986935119689469e-06, "loss": 0.4807, "step": 507 }, { "epoch": 0.3626844359828653, "grad_norm": 0.4708004593849182, "learning_rate": 9.986633186170319e-06, "loss": 0.4776, "step": 508 }, { "epoch": 0.36339838172298905, "grad_norm": 0.48886993527412415, "learning_rate": 9.986327808172151e-06, "loss": 0.4984, "step": 509 }, { "epoch": 0.3641123274631128, "grad_norm": 0.45697659254074097, "learning_rate": 9.986018985905901e-06, "loss": 0.4713, "step": 510 }, { "epoch": 0.3648262732032366, "grad_norm": 0.5176493525505066, "learning_rate": 9.985706719584888e-06, "loss": 0.4966, "step": 511 }, { "epoch": 0.3655402189433603, "grad_norm": 0.4500645697116852, "learning_rate": 9.985391009424805e-06, "loss": 0.4765, "step": 512 }, { "epoch": 0.36625416468348404, "grad_norm": 0.4685465097427368, "learning_rate": 9.98507185564373e-06, "loss": 0.449, "step": 513 }, { "epoch": 0.36696811042360783, "grad_norm": 0.46868348121643066, "learning_rate": 9.98474925846211e-06, "loss": 0.4475, "step": 514 }, { "epoch": 0.36768205616373156, "grad_norm": 0.47851935029029846, "learning_rate": 9.984423218102783e-06, "loss": 0.4645, "step": 515 }, { "epoch": 0.3683960019038553, "grad_norm": 0.49869951605796814, "learning_rate": 9.984093734790955e-06, "loss": 0.4638, "step": 516 }, { "epoch": 0.36910994764397903, "grad_norm": 0.5251151919364929, "learning_rate": 9.983760808754215e-06, "loss": 0.5168, "step": 517 }, { "epoch": 0.3698238933841028, "grad_norm": 0.4916033446788788, "learning_rate": 9.98342444022253e-06, "loss": 0.4945, "step": 518 }, { "epoch": 0.37053783912422655, "grad_norm": 0.5256971120834351, "learning_rate": 9.983084629428244e-06, "loss": 0.4774, "step": 519 }, { "epoch": 0.3712517848643503, "grad_norm": 0.47000887989997864, "learning_rate": 9.982741376606077e-06, "loss": 0.4667, "step": 520 }, { "epoch": 0.3719657306044741, "grad_norm": 0.4650852382183075, "learning_rate": 9.982394681993131e-06, "loss": 0.4752, "step": 521 }, { "epoch": 0.3726796763445978, "grad_norm": 0.4615582227706909, "learning_rate": 9.98204454582888e-06, "loss": 0.4855, "step": 522 }, { "epoch": 0.37339362208472154, "grad_norm": 0.5258937478065491, "learning_rate": 9.98169096835518e-06, "loss": 0.4643, "step": 523 }, { "epoch": 0.37410756782484533, "grad_norm": 0.5281727313995361, "learning_rate": 9.98133394981626e-06, "loss": 0.4866, "step": 524 }, { "epoch": 0.37482151356496907, "grad_norm": 0.5025337338447571, "learning_rate": 9.980973490458728e-06, "loss": 0.4899, "step": 525 }, { "epoch": 0.3755354593050928, "grad_norm": 0.44381970167160034, "learning_rate": 9.98060959053157e-06, "loss": 0.4786, "step": 526 }, { "epoch": 0.3762494050452166, "grad_norm": 0.5522001385688782, "learning_rate": 9.980242250286147e-06, "loss": 0.4832, "step": 527 }, { "epoch": 0.3769633507853403, "grad_norm": 0.56991046667099, "learning_rate": 9.979871469976197e-06, "loss": 0.4646, "step": 528 }, { "epoch": 0.37767729652546406, "grad_norm": 0.4718845784664154, "learning_rate": 9.97949724985783e-06, "loss": 0.5014, "step": 529 }, { "epoch": 0.3783912422655878, "grad_norm": 0.43930402398109436, "learning_rate": 9.97911959018954e-06, "loss": 0.4684, "step": 530 }, { "epoch": 0.3791051880057116, "grad_norm": 0.6599938273429871, "learning_rate": 9.978738491232191e-06, "loss": 0.4709, "step": 531 }, { "epoch": 0.3798191337458353, "grad_norm": 0.5629675388336182, "learning_rate": 9.978353953249023e-06, "loss": 0.4749, "step": 532 }, { "epoch": 0.38053307948595905, "grad_norm": 0.536827564239502, "learning_rate": 9.977965976505654e-06, "loss": 0.4592, "step": 533 }, { "epoch": 0.38124702522608284, "grad_norm": 0.5417434573173523, "learning_rate": 9.977574561270075e-06, "loss": 0.4739, "step": 534 }, { "epoch": 0.38196097096620657, "grad_norm": 0.5453307628631592, "learning_rate": 9.977179707812656e-06, "loss": 0.4927, "step": 535 }, { "epoch": 0.3826749167063303, "grad_norm": 0.43357160687446594, "learning_rate": 9.976781416406136e-06, "loss": 0.4605, "step": 536 }, { "epoch": 0.3833888624464541, "grad_norm": 0.5322845578193665, "learning_rate": 9.976379687325633e-06, "loss": 0.47, "step": 537 }, { "epoch": 0.3841028081865778, "grad_norm": 0.462542861700058, "learning_rate": 9.975974520848636e-06, "loss": 0.4592, "step": 538 }, { "epoch": 0.38481675392670156, "grad_norm": 0.4661354720592499, "learning_rate": 9.975565917255017e-06, "loss": 0.4728, "step": 539 }, { "epoch": 0.3855306996668253, "grad_norm": 0.5488191246986389, "learning_rate": 9.975153876827008e-06, "loss": 0.4759, "step": 540 }, { "epoch": 0.3862446454069491, "grad_norm": 0.4788029193878174, "learning_rate": 9.974738399849226e-06, "loss": 0.47, "step": 541 }, { "epoch": 0.3869585911470728, "grad_norm": 0.5458348393440247, "learning_rate": 9.974319486608662e-06, "loss": 0.5071, "step": 542 }, { "epoch": 0.38767253688719655, "grad_norm": 0.4854185879230499, "learning_rate": 9.97389713739467e-06, "loss": 0.4777, "step": 543 }, { "epoch": 0.38838648262732034, "grad_norm": 0.45350560545921326, "learning_rate": 9.973471352498991e-06, "loss": 0.4658, "step": 544 }, { "epoch": 0.3891004283674441, "grad_norm": 0.46048983931541443, "learning_rate": 9.97304213221573e-06, "loss": 0.4755, "step": 545 }, { "epoch": 0.3898143741075678, "grad_norm": 0.4406314790248871, "learning_rate": 9.972609476841368e-06, "loss": 0.4764, "step": 546 }, { "epoch": 0.3905283198476916, "grad_norm": 0.43931612372398376, "learning_rate": 9.972173386674757e-06, "loss": 0.4848, "step": 547 }, { "epoch": 0.39124226558781533, "grad_norm": 0.427096962928772, "learning_rate": 9.971733862017126e-06, "loss": 0.4601, "step": 548 }, { "epoch": 0.39195621132793906, "grad_norm": 0.4327687919139862, "learning_rate": 9.97129090317207e-06, "loss": 0.4421, "step": 549 }, { "epoch": 0.39267015706806285, "grad_norm": 0.42941418290138245, "learning_rate": 9.97084451044556e-06, "loss": 0.4572, "step": 550 }, { "epoch": 0.3933841028081866, "grad_norm": 0.4368964731693268, "learning_rate": 9.970394684145944e-06, "loss": 0.4599, "step": 551 }, { "epoch": 0.3940980485483103, "grad_norm": 0.4132940471172333, "learning_rate": 9.969941424583926e-06, "loss": 0.4699, "step": 552 }, { "epoch": 0.39481199428843405, "grad_norm": 0.4635457396507263, "learning_rate": 9.9694847320726e-06, "loss": 0.4778, "step": 553 }, { "epoch": 0.39552594002855784, "grad_norm": 0.4288369417190552, "learning_rate": 9.96902460692742e-06, "loss": 0.4806, "step": 554 }, { "epoch": 0.3962398857686816, "grad_norm": 0.47607097029685974, "learning_rate": 9.968561049466214e-06, "loss": 0.4813, "step": 555 }, { "epoch": 0.3969538315088053, "grad_norm": 0.5849734544754028, "learning_rate": 9.96809406000918e-06, "loss": 0.4734, "step": 556 }, { "epoch": 0.3976677772489291, "grad_norm": 0.4433881640434265, "learning_rate": 9.967623638878889e-06, "loss": 0.4767, "step": 557 }, { "epoch": 0.39838172298905283, "grad_norm": 0.444789320230484, "learning_rate": 9.967149786400278e-06, "loss": 0.4695, "step": 558 }, { "epoch": 0.39909566872917657, "grad_norm": 0.442457377910614, "learning_rate": 9.966672502900661e-06, "loss": 0.4754, "step": 559 }, { "epoch": 0.39980961446930036, "grad_norm": 0.5156027674674988, "learning_rate": 9.966191788709716e-06, "loss": 0.4675, "step": 560 }, { "epoch": 0.4005235602094241, "grad_norm": 0.5144112706184387, "learning_rate": 9.965707644159492e-06, "loss": 0.4681, "step": 561 }, { "epoch": 0.4012375059495478, "grad_norm": 0.47684189677238464, "learning_rate": 9.96522006958441e-06, "loss": 0.5106, "step": 562 }, { "epoch": 0.40195145168967156, "grad_norm": 0.4687498211860657, "learning_rate": 9.964729065321255e-06, "loss": 0.4704, "step": 563 }, { "epoch": 0.40266539742979535, "grad_norm": 0.3886616826057434, "learning_rate": 9.964234631709188e-06, "loss": 0.453, "step": 564 }, { "epoch": 0.4033793431699191, "grad_norm": 0.4414128065109253, "learning_rate": 9.963736769089734e-06, "loss": 0.4795, "step": 565 }, { "epoch": 0.4040932889100428, "grad_norm": 0.39798232913017273, "learning_rate": 9.963235477806787e-06, "loss": 0.4492, "step": 566 }, { "epoch": 0.4048072346501666, "grad_norm": 0.4560062289237976, "learning_rate": 9.962730758206612e-06, "loss": 0.4643, "step": 567 }, { "epoch": 0.40552118039029034, "grad_norm": 0.4316386282444, "learning_rate": 9.962222610637837e-06, "loss": 0.4777, "step": 568 }, { "epoch": 0.40623512613041407, "grad_norm": 0.4441889822483063, "learning_rate": 9.961711035451466e-06, "loss": 0.4847, "step": 569 }, { "epoch": 0.40694907187053786, "grad_norm": 0.44680121541023254, "learning_rate": 9.961196033000862e-06, "loss": 0.4742, "step": 570 }, { "epoch": 0.4076630176106616, "grad_norm": 0.4722740948200226, "learning_rate": 9.96067760364176e-06, "loss": 0.4869, "step": 571 }, { "epoch": 0.4083769633507853, "grad_norm": 0.4118778705596924, "learning_rate": 9.96015574773226e-06, "loss": 0.4877, "step": 572 }, { "epoch": 0.4090909090909091, "grad_norm": 0.420208215713501, "learning_rate": 9.959630465632833e-06, "loss": 0.4507, "step": 573 }, { "epoch": 0.40980485483103285, "grad_norm": 0.4741220474243164, "learning_rate": 9.959101757706308e-06, "loss": 0.4931, "step": 574 }, { "epoch": 0.4105188005711566, "grad_norm": 0.44508472084999084, "learning_rate": 9.958569624317894e-06, "loss": 0.4664, "step": 575 }, { "epoch": 0.4112327463112803, "grad_norm": 0.5623816251754761, "learning_rate": 9.958034065835151e-06, "loss": 0.4784, "step": 576 }, { "epoch": 0.4119466920514041, "grad_norm": 0.431743860244751, "learning_rate": 9.957495082628017e-06, "loss": 0.4811, "step": 577 }, { "epoch": 0.41266063779152784, "grad_norm": 0.5305282473564148, "learning_rate": 9.956952675068788e-06, "loss": 0.465, "step": 578 }, { "epoch": 0.4133745835316516, "grad_norm": 0.4637860059738159, "learning_rate": 9.956406843532128e-06, "loss": 0.479, "step": 579 }, { "epoch": 0.41408852927177536, "grad_norm": 0.45113661885261536, "learning_rate": 9.955857588395065e-06, "loss": 0.5125, "step": 580 }, { "epoch": 0.4148024750118991, "grad_norm": 0.4810327887535095, "learning_rate": 9.955304910036993e-06, "loss": 0.4716, "step": 581 }, { "epoch": 0.41551642075202283, "grad_norm": 0.5344920754432678, "learning_rate": 9.954748808839675e-06, "loss": 0.4876, "step": 582 }, { "epoch": 0.4162303664921466, "grad_norm": 0.46465402841567993, "learning_rate": 9.954189285187228e-06, "loss": 0.4723, "step": 583 }, { "epoch": 0.41694431223227035, "grad_norm": 0.38246509432792664, "learning_rate": 9.95362633946614e-06, "loss": 0.4445, "step": 584 }, { "epoch": 0.4176582579723941, "grad_norm": 0.5566004514694214, "learning_rate": 9.953059972065264e-06, "loss": 0.4756, "step": 585 }, { "epoch": 0.4183722037125179, "grad_norm": 0.4260857105255127, "learning_rate": 9.952490183375812e-06, "loss": 0.4656, "step": 586 }, { "epoch": 0.4190861494526416, "grad_norm": 0.4514102637767792, "learning_rate": 9.951916973791361e-06, "loss": 0.4802, "step": 587 }, { "epoch": 0.41980009519276534, "grad_norm": 0.4042509198188782, "learning_rate": 9.951340343707852e-06, "loss": 0.4889, "step": 588 }, { "epoch": 0.4205140409328891, "grad_norm": 0.39018023014068604, "learning_rate": 9.950760293523586e-06, "loss": 0.4533, "step": 589 }, { "epoch": 0.42122798667301287, "grad_norm": 0.41814279556274414, "learning_rate": 9.950176823639233e-06, "loss": 0.4878, "step": 590 }, { "epoch": 0.4219419324131366, "grad_norm": 0.4082237184047699, "learning_rate": 9.949589934457815e-06, "loss": 0.4644, "step": 591 }, { "epoch": 0.42265587815326033, "grad_norm": 0.3996804654598236, "learning_rate": 9.948999626384725e-06, "loss": 0.4704, "step": 592 }, { "epoch": 0.4233698238933841, "grad_norm": 0.40315839648246765, "learning_rate": 9.94840589982771e-06, "loss": 0.5091, "step": 593 }, { "epoch": 0.42408376963350786, "grad_norm": 0.4186719059944153, "learning_rate": 9.947808755196886e-06, "loss": 0.4749, "step": 594 }, { "epoch": 0.4247977153736316, "grad_norm": 0.3960438668727875, "learning_rate": 9.947208192904722e-06, "loss": 0.4438, "step": 595 }, { "epoch": 0.4255116611137554, "grad_norm": 0.4563726484775543, "learning_rate": 9.946604213366058e-06, "loss": 0.4854, "step": 596 }, { "epoch": 0.4262256068538791, "grad_norm": 0.42055395245552063, "learning_rate": 9.945996816998082e-06, "loss": 0.4668, "step": 597 }, { "epoch": 0.42693955259400285, "grad_norm": 0.46389687061309814, "learning_rate": 9.945386004220352e-06, "loss": 0.4692, "step": 598 }, { "epoch": 0.4276534983341266, "grad_norm": 0.5103921890258789, "learning_rate": 9.944771775454781e-06, "loss": 0.4711, "step": 599 }, { "epoch": 0.42836744407425037, "grad_norm": 0.47864049673080444, "learning_rate": 9.944154131125643e-06, "loss": 0.4573, "step": 600 }, { "epoch": 0.4290813898143741, "grad_norm": 0.46442970633506775, "learning_rate": 9.943533071659573e-06, "loss": 0.471, "step": 601 }, { "epoch": 0.42979533555449784, "grad_norm": 0.45173966884613037, "learning_rate": 9.942908597485558e-06, "loss": 0.4843, "step": 602 }, { "epoch": 0.4305092812946216, "grad_norm": 0.5047414898872375, "learning_rate": 9.942280709034954e-06, "loss": 0.4934, "step": 603 }, { "epoch": 0.43122322703474536, "grad_norm": 0.3951266407966614, "learning_rate": 9.94164940674147e-06, "loss": 0.4587, "step": 604 }, { "epoch": 0.4319371727748691, "grad_norm": 0.4868263602256775, "learning_rate": 9.941014691041171e-06, "loss": 0.4849, "step": 605 }, { "epoch": 0.4326511185149929, "grad_norm": 0.4567371606826782, "learning_rate": 9.940376562372482e-06, "loss": 0.4919, "step": 606 }, { "epoch": 0.4333650642551166, "grad_norm": 0.4102000296115875, "learning_rate": 9.939735021176186e-06, "loss": 0.4762, "step": 607 }, { "epoch": 0.43407900999524035, "grad_norm": 0.46101275086402893, "learning_rate": 9.939090067895422e-06, "loss": 0.4591, "step": 608 }, { "epoch": 0.43479295573536414, "grad_norm": 0.4145568311214447, "learning_rate": 9.938441702975689e-06, "loss": 0.4288, "step": 609 }, { "epoch": 0.4355069014754879, "grad_norm": 0.49346286058425903, "learning_rate": 9.937789926864838e-06, "loss": 0.454, "step": 610 }, { "epoch": 0.4362208472156116, "grad_norm": 0.4239324927330017, "learning_rate": 9.93713474001308e-06, "loss": 0.4801, "step": 611 }, { "epoch": 0.43693479295573534, "grad_norm": 0.4895893633365631, "learning_rate": 9.936476142872979e-06, "loss": 0.4804, "step": 612 }, { "epoch": 0.43764873869585913, "grad_norm": 0.5118461847305298, "learning_rate": 9.935814135899456e-06, "loss": 0.4759, "step": 613 }, { "epoch": 0.43836268443598286, "grad_norm": 0.4282967746257782, "learning_rate": 9.935148719549788e-06, "loss": 0.4548, "step": 614 }, { "epoch": 0.4390766301761066, "grad_norm": 0.4933742582798004, "learning_rate": 9.934479894283607e-06, "loss": 0.4699, "step": 615 }, { "epoch": 0.4397905759162304, "grad_norm": 0.43877318501472473, "learning_rate": 9.933807660562898e-06, "loss": 0.4568, "step": 616 }, { "epoch": 0.4405045216563541, "grad_norm": 0.4344831705093384, "learning_rate": 9.933132018851998e-06, "loss": 0.4422, "step": 617 }, { "epoch": 0.44121846739647785, "grad_norm": 0.5046058297157288, "learning_rate": 9.932452969617607e-06, "loss": 0.4901, "step": 618 }, { "epoch": 0.44193241313660164, "grad_norm": 0.47470957040786743, "learning_rate": 9.931770513328771e-06, "loss": 0.4673, "step": 619 }, { "epoch": 0.4426463588767254, "grad_norm": 0.42409801483154297, "learning_rate": 9.931084650456892e-06, "loss": 0.4523, "step": 620 }, { "epoch": 0.4433603046168491, "grad_norm": 0.4726031422615051, "learning_rate": 9.930395381475723e-06, "loss": 0.5014, "step": 621 }, { "epoch": 0.44407425035697284, "grad_norm": 0.438292920589447, "learning_rate": 9.929702706861373e-06, "loss": 0.4666, "step": 622 }, { "epoch": 0.44478819609709663, "grad_norm": 0.4515282213687897, "learning_rate": 9.929006627092298e-06, "loss": 0.4761, "step": 623 }, { "epoch": 0.44550214183722037, "grad_norm": 0.4979286789894104, "learning_rate": 9.928307142649315e-06, "loss": 0.4707, "step": 624 }, { "epoch": 0.4462160875773441, "grad_norm": 0.451331228017807, "learning_rate": 9.927604254015586e-06, "loss": 0.4535, "step": 625 }, { "epoch": 0.4469300333174679, "grad_norm": 0.48635435104370117, "learning_rate": 9.926897961676625e-06, "loss": 0.4664, "step": 626 }, { "epoch": 0.4476439790575916, "grad_norm": 0.4747207760810852, "learning_rate": 9.926188266120297e-06, "loss": 0.47, "step": 627 }, { "epoch": 0.44835792479771536, "grad_norm": 0.4276427924633026, "learning_rate": 9.92547516783682e-06, "loss": 0.4464, "step": 628 }, { "epoch": 0.44907187053783915, "grad_norm": 0.45583784580230713, "learning_rate": 9.924758667318763e-06, "loss": 0.4704, "step": 629 }, { "epoch": 0.4497858162779629, "grad_norm": 0.4518415033817291, "learning_rate": 9.924038765061042e-06, "loss": 0.495, "step": 630 }, { "epoch": 0.4504997620180866, "grad_norm": 0.4921402633190155, "learning_rate": 9.923315461560923e-06, "loss": 0.4925, "step": 631 }, { "epoch": 0.4512137077582104, "grad_norm": 0.5025215148925781, "learning_rate": 9.92258875731802e-06, "loss": 0.4719, "step": 632 }, { "epoch": 0.45192765349833414, "grad_norm": 0.5026609897613525, "learning_rate": 9.921858652834306e-06, "loss": 0.4773, "step": 633 }, { "epoch": 0.45264159923845787, "grad_norm": 0.5369899272918701, "learning_rate": 9.92112514861409e-06, "loss": 0.4824, "step": 634 }, { "epoch": 0.4533555449785816, "grad_norm": 0.5237758159637451, "learning_rate": 9.920388245164033e-06, "loss": 0.4512, "step": 635 }, { "epoch": 0.4540694907187054, "grad_norm": 0.4766666293144226, "learning_rate": 9.91964794299315e-06, "loss": 0.4642, "step": 636 }, { "epoch": 0.4547834364588291, "grad_norm": 0.5207862257957458, "learning_rate": 9.918904242612794e-06, "loss": 0.4838, "step": 637 }, { "epoch": 0.45549738219895286, "grad_norm": 0.47551223635673523, "learning_rate": 9.918157144536676e-06, "loss": 0.4546, "step": 638 }, { "epoch": 0.45621132793907665, "grad_norm": 0.4940049350261688, "learning_rate": 9.917406649280843e-06, "loss": 0.4577, "step": 639 }, { "epoch": 0.4569252736792004, "grad_norm": 0.5475967526435852, "learning_rate": 9.916652757363698e-06, "loss": 0.4767, "step": 640 }, { "epoch": 0.4576392194193241, "grad_norm": 0.43662315607070923, "learning_rate": 9.915895469305984e-06, "loss": 0.4547, "step": 641 }, { "epoch": 0.4583531651594479, "grad_norm": 0.46715548634529114, "learning_rate": 9.915134785630793e-06, "loss": 0.4568, "step": 642 }, { "epoch": 0.45906711089957164, "grad_norm": 0.46888020634651184, "learning_rate": 9.914370706863559e-06, "loss": 0.4626, "step": 643 }, { "epoch": 0.4597810566396954, "grad_norm": 0.4800085425376892, "learning_rate": 9.913603233532067e-06, "loss": 0.4652, "step": 644 }, { "epoch": 0.4604950023798191, "grad_norm": 0.4387550354003906, "learning_rate": 9.912832366166443e-06, "loss": 0.5065, "step": 645 }, { "epoch": 0.4612089481199429, "grad_norm": 0.49363070726394653, "learning_rate": 9.912058105299155e-06, "loss": 0.4673, "step": 646 }, { "epoch": 0.46192289386006663, "grad_norm": 0.48655688762664795, "learning_rate": 9.91128045146502e-06, "loss": 0.4992, "step": 647 }, { "epoch": 0.46263683960019036, "grad_norm": 0.39251482486724854, "learning_rate": 9.910499405201195e-06, "loss": 0.4808, "step": 648 }, { "epoch": 0.46335078534031415, "grad_norm": 0.47780367732048035, "learning_rate": 9.909714967047183e-06, "loss": 0.4687, "step": 649 }, { "epoch": 0.4640647310804379, "grad_norm": 0.44201985001564026, "learning_rate": 9.90892713754483e-06, "loss": 0.4766, "step": 650 }, { "epoch": 0.4647786768205616, "grad_norm": 0.4053218960762024, "learning_rate": 9.908135917238321e-06, "loss": 0.4598, "step": 651 }, { "epoch": 0.4654926225606854, "grad_norm": 0.47375550866127014, "learning_rate": 9.907341306674185e-06, "loss": 0.4361, "step": 652 }, { "epoch": 0.46620656830080914, "grad_norm": 0.38820329308509827, "learning_rate": 9.906543306401296e-06, "loss": 0.4587, "step": 653 }, { "epoch": 0.4669205140409329, "grad_norm": 0.4250805974006653, "learning_rate": 9.905741916970863e-06, "loss": 0.4752, "step": 654 }, { "epoch": 0.46763445978105667, "grad_norm": 0.4992777407169342, "learning_rate": 9.904937138936443e-06, "loss": 0.4753, "step": 655 }, { "epoch": 0.4683484055211804, "grad_norm": 0.4264627993106842, "learning_rate": 9.90412897285393e-06, "loss": 0.4461, "step": 656 }, { "epoch": 0.46906235126130413, "grad_norm": 0.44459816813468933, "learning_rate": 9.903317419281557e-06, "loss": 0.4503, "step": 657 }, { "epoch": 0.46977629700142787, "grad_norm": 0.42337924242019653, "learning_rate": 9.902502478779897e-06, "loss": 0.4689, "step": 658 }, { "epoch": 0.47049024274155166, "grad_norm": 0.45474159717559814, "learning_rate": 9.901684151911868e-06, "loss": 0.4493, "step": 659 }, { "epoch": 0.4712041884816754, "grad_norm": 0.4849100410938263, "learning_rate": 9.900862439242719e-06, "loss": 0.4752, "step": 660 }, { "epoch": 0.4719181342217991, "grad_norm": 0.43573102355003357, "learning_rate": 9.900037341340046e-06, "loss": 0.4849, "step": 661 }, { "epoch": 0.4726320799619229, "grad_norm": 0.5287122130393982, "learning_rate": 9.899208858773776e-06, "loss": 0.4757, "step": 662 }, { "epoch": 0.47334602570204665, "grad_norm": 0.4180127680301666, "learning_rate": 9.898376992116179e-06, "loss": 0.4759, "step": 663 }, { "epoch": 0.4740599714421704, "grad_norm": 0.5111497044563293, "learning_rate": 9.897541741941858e-06, "loss": 0.4779, "step": 664 }, { "epoch": 0.47477391718229417, "grad_norm": 0.4259183704853058, "learning_rate": 9.896703108827758e-06, "loss": 0.4801, "step": 665 }, { "epoch": 0.4754878629224179, "grad_norm": 0.4456402659416199, "learning_rate": 9.895861093353159e-06, "loss": 0.4475, "step": 666 }, { "epoch": 0.47620180866254164, "grad_norm": 0.4718480706214905, "learning_rate": 9.895015696099674e-06, "loss": 0.4699, "step": 667 }, { "epoch": 0.47691575440266537, "grad_norm": 0.44722360372543335, "learning_rate": 9.894166917651256e-06, "loss": 0.4897, "step": 668 }, { "epoch": 0.47762970014278916, "grad_norm": 0.4801155924797058, "learning_rate": 9.893314758594192e-06, "loss": 0.4777, "step": 669 }, { "epoch": 0.4783436458829129, "grad_norm": 0.4650520980358124, "learning_rate": 9.892459219517108e-06, "loss": 0.4875, "step": 670 }, { "epoch": 0.4790575916230366, "grad_norm": 0.38936296105384827, "learning_rate": 9.891600301010956e-06, "loss": 0.4787, "step": 671 }, { "epoch": 0.4797715373631604, "grad_norm": 0.4634529650211334, "learning_rate": 9.890738003669029e-06, "loss": 0.4997, "step": 672 }, { "epoch": 0.48048548310328415, "grad_norm": 0.3680836260318756, "learning_rate": 9.889872328086953e-06, "loss": 0.4537, "step": 673 }, { "epoch": 0.4811994288434079, "grad_norm": 0.4153779447078705, "learning_rate": 9.889003274862687e-06, "loss": 0.4916, "step": 674 }, { "epoch": 0.4819133745835317, "grad_norm": 0.4367934763431549, "learning_rate": 9.888130844596525e-06, "loss": 0.4692, "step": 675 }, { "epoch": 0.4826273203236554, "grad_norm": 0.4408281743526459, "learning_rate": 9.887255037891085e-06, "loss": 0.4645, "step": 676 }, { "epoch": 0.48334126606377914, "grad_norm": 0.4769415855407715, "learning_rate": 9.886375855351332e-06, "loss": 0.4734, "step": 677 }, { "epoch": 0.48405521180390293, "grad_norm": 0.4199475347995758, "learning_rate": 9.885493297584548e-06, "loss": 0.4672, "step": 678 }, { "epoch": 0.48476915754402666, "grad_norm": 0.4803716540336609, "learning_rate": 9.884607365200355e-06, "loss": 0.4513, "step": 679 }, { "epoch": 0.4854831032841504, "grad_norm": 0.41991549730300903, "learning_rate": 9.883718058810708e-06, "loss": 0.4466, "step": 680 }, { "epoch": 0.48619704902427413, "grad_norm": 0.4356257915496826, "learning_rate": 9.882825379029883e-06, "loss": 0.4463, "step": 681 }, { "epoch": 0.4869109947643979, "grad_norm": 0.48920494318008423, "learning_rate": 9.881929326474496e-06, "loss": 0.4582, "step": 682 }, { "epoch": 0.48762494050452165, "grad_norm": 0.42219802737236023, "learning_rate": 9.881029901763485e-06, "loss": 0.4765, "step": 683 }, { "epoch": 0.4883388862446454, "grad_norm": 0.4805302023887634, "learning_rate": 9.880127105518122e-06, "loss": 0.4654, "step": 684 }, { "epoch": 0.4890528319847692, "grad_norm": 0.5203669667243958, "learning_rate": 9.87922093836201e-06, "loss": 0.4878, "step": 685 }, { "epoch": 0.4897667777248929, "grad_norm": 0.42576292157173157, "learning_rate": 9.878311400921072e-06, "loss": 0.4662, "step": 686 }, { "epoch": 0.49048072346501664, "grad_norm": 0.5348847508430481, "learning_rate": 9.877398493823567e-06, "loss": 0.4702, "step": 687 }, { "epoch": 0.49119466920514043, "grad_norm": 0.43158286809921265, "learning_rate": 9.876482217700078e-06, "loss": 0.4643, "step": 688 }, { "epoch": 0.49190861494526417, "grad_norm": 0.4631837010383606, "learning_rate": 9.875562573183518e-06, "loss": 0.4633, "step": 689 }, { "epoch": 0.4926225606853879, "grad_norm": 0.5671799182891846, "learning_rate": 9.874639560909118e-06, "loss": 0.4778, "step": 690 }, { "epoch": 0.49333650642551163, "grad_norm": 0.45904073119163513, "learning_rate": 9.873713181514448e-06, "loss": 0.4763, "step": 691 }, { "epoch": 0.4940504521656354, "grad_norm": 0.42761480808258057, "learning_rate": 9.872783435639397e-06, "loss": 0.4523, "step": 692 }, { "epoch": 0.49476439790575916, "grad_norm": 0.4533417522907257, "learning_rate": 9.871850323926178e-06, "loss": 0.4383, "step": 693 }, { "epoch": 0.4954783436458829, "grad_norm": 0.49110040068626404, "learning_rate": 9.87091384701933e-06, "loss": 0.4725, "step": 694 }, { "epoch": 0.4961922893860067, "grad_norm": 0.40232300758361816, "learning_rate": 9.869974005565719e-06, "loss": 0.4579, "step": 695 }, { "epoch": 0.4969062351261304, "grad_norm": 0.45771846175193787, "learning_rate": 9.869030800214531e-06, "loss": 0.5028, "step": 696 }, { "epoch": 0.49762018086625415, "grad_norm": 0.40337151288986206, "learning_rate": 9.868084231617283e-06, "loss": 0.4608, "step": 697 }, { "epoch": 0.49833412660637794, "grad_norm": 0.45426473021507263, "learning_rate": 9.867134300427806e-06, "loss": 0.4721, "step": 698 }, { "epoch": 0.49904807234650167, "grad_norm": 0.42217227816581726, "learning_rate": 9.866181007302258e-06, "loss": 0.4793, "step": 699 }, { "epoch": 0.4997620180866254, "grad_norm": 0.42799410223960876, "learning_rate": 9.86522435289912e-06, "loss": 0.4555, "step": 700 }, { "epoch": 0.5004759638267492, "grad_norm": 0.5040037631988525, "learning_rate": 9.864264337879194e-06, "loss": 0.4583, "step": 701 }, { "epoch": 0.5011899095668729, "grad_norm": 0.4595912992954254, "learning_rate": 9.863300962905602e-06, "loss": 0.4928, "step": 702 }, { "epoch": 0.5019038553069967, "grad_norm": 0.5728545784950256, "learning_rate": 9.862334228643788e-06, "loss": 0.4595, "step": 703 }, { "epoch": 0.5026178010471204, "grad_norm": 0.4412950873374939, "learning_rate": 9.861364135761518e-06, "loss": 0.4544, "step": 704 }, { "epoch": 0.5033317467872441, "grad_norm": 0.4280628561973572, "learning_rate": 9.860390684928873e-06, "loss": 0.4532, "step": 705 }, { "epoch": 0.504045692527368, "grad_norm": 0.4585987627506256, "learning_rate": 9.859413876818261e-06, "loss": 0.4752, "step": 706 }, { "epoch": 0.5047596382674917, "grad_norm": 0.45602765679359436, "learning_rate": 9.858433712104403e-06, "loss": 0.4813, "step": 707 }, { "epoch": 0.5054735840076154, "grad_norm": 0.38030165433883667, "learning_rate": 9.857450191464337e-06, "loss": 0.4677, "step": 708 }, { "epoch": 0.5061875297477392, "grad_norm": 0.4958546757698059, "learning_rate": 9.856463315577429e-06, "loss": 0.4942, "step": 709 }, { "epoch": 0.5069014754878629, "grad_norm": 0.49317288398742676, "learning_rate": 9.855473085125351e-06, "loss": 0.4771, "step": 710 }, { "epoch": 0.5076154212279866, "grad_norm": 0.4205581247806549, "learning_rate": 9.854479500792099e-06, "loss": 0.4569, "step": 711 }, { "epoch": 0.5083293669681104, "grad_norm": 0.41384023427963257, "learning_rate": 9.853482563263981e-06, "loss": 0.479, "step": 712 }, { "epoch": 0.5090433127082342, "grad_norm": 0.4902040660381317, "learning_rate": 9.852482273229629e-06, "loss": 0.4661, "step": 713 }, { "epoch": 0.509757258448358, "grad_norm": 0.4159619212150574, "learning_rate": 9.851478631379982e-06, "loss": 0.4778, "step": 714 }, { "epoch": 0.5104712041884817, "grad_norm": 0.42354169487953186, "learning_rate": 9.850471638408301e-06, "loss": 0.4275, "step": 715 }, { "epoch": 0.5111851499286054, "grad_norm": 0.5028774738311768, "learning_rate": 9.849461295010157e-06, "loss": 0.4884, "step": 716 }, { "epoch": 0.5118990956687292, "grad_norm": 0.3793300986289978, "learning_rate": 9.848447601883436e-06, "loss": 0.4399, "step": 717 }, { "epoch": 0.5126130414088529, "grad_norm": 0.4898541569709778, "learning_rate": 9.847430559728339e-06, "loss": 0.4569, "step": 718 }, { "epoch": 0.5133269871489767, "grad_norm": 0.4972122311592102, "learning_rate": 9.846410169247383e-06, "loss": 0.4861, "step": 719 }, { "epoch": 0.5140409328891005, "grad_norm": 0.3918731212615967, "learning_rate": 9.84538643114539e-06, "loss": 0.4613, "step": 720 }, { "epoch": 0.5147548786292242, "grad_norm": 0.48928704857826233, "learning_rate": 9.844359346129504e-06, "loss": 0.4632, "step": 721 }, { "epoch": 0.5154688243693479, "grad_norm": 0.4580102264881134, "learning_rate": 9.843328914909176e-06, "loss": 0.4954, "step": 722 }, { "epoch": 0.5161827701094717, "grad_norm": 0.4670655131340027, "learning_rate": 9.842295138196165e-06, "loss": 0.4595, "step": 723 }, { "epoch": 0.5168967158495954, "grad_norm": 0.43404439091682434, "learning_rate": 9.841258016704547e-06, "loss": 0.4735, "step": 724 }, { "epoch": 0.5176106615897191, "grad_norm": 0.41010597348213196, "learning_rate": 9.840217551150706e-06, "loss": 0.4556, "step": 725 }, { "epoch": 0.518324607329843, "grad_norm": 0.4328065812587738, "learning_rate": 9.839173742253334e-06, "loss": 0.49, "step": 726 }, { "epoch": 0.5190385530699667, "grad_norm": 0.5222164392471313, "learning_rate": 9.838126590733435e-06, "loss": 0.4984, "step": 727 }, { "epoch": 0.5197524988100904, "grad_norm": 0.4558221101760864, "learning_rate": 9.83707609731432e-06, "loss": 0.489, "step": 728 }, { "epoch": 0.5204664445502142, "grad_norm": 0.397320032119751, "learning_rate": 9.836022262721611e-06, "loss": 0.4861, "step": 729 }, { "epoch": 0.5211803902903379, "grad_norm": 0.49606046080589294, "learning_rate": 9.834965087683237e-06, "loss": 0.4493, "step": 730 }, { "epoch": 0.5218943360304616, "grad_norm": 0.5208548903465271, "learning_rate": 9.833904572929432e-06, "loss": 0.4644, "step": 731 }, { "epoch": 0.5226082817705854, "grad_norm": 0.4145772159099579, "learning_rate": 9.832840719192737e-06, "loss": 0.4662, "step": 732 }, { "epoch": 0.5233222275107092, "grad_norm": 0.5045866370201111, "learning_rate": 9.831773527208003e-06, "loss": 0.4591, "step": 733 }, { "epoch": 0.524036173250833, "grad_norm": 0.50401771068573, "learning_rate": 9.830702997712385e-06, "loss": 0.4589, "step": 734 }, { "epoch": 0.5247501189909567, "grad_norm": 0.45011526346206665, "learning_rate": 9.829629131445342e-06, "loss": 0.4585, "step": 735 }, { "epoch": 0.5254640647310804, "grad_norm": 0.41140392422676086, "learning_rate": 9.82855192914864e-06, "loss": 0.4377, "step": 736 }, { "epoch": 0.5261780104712042, "grad_norm": 0.4983551800251007, "learning_rate": 9.827471391566348e-06, "loss": 0.4802, "step": 737 }, { "epoch": 0.5268919562113279, "grad_norm": 0.44469282031059265, "learning_rate": 9.826387519444838e-06, "loss": 0.4587, "step": 738 }, { "epoch": 0.5276059019514517, "grad_norm": 0.4122444987297058, "learning_rate": 9.825300313532787e-06, "loss": 0.4507, "step": 739 }, { "epoch": 0.5283198476915755, "grad_norm": 0.4905400276184082, "learning_rate": 9.824209774581176e-06, "loss": 0.5037, "step": 740 }, { "epoch": 0.5290337934316992, "grad_norm": 0.48906081914901733, "learning_rate": 9.823115903343283e-06, "loss": 0.4752, "step": 741 }, { "epoch": 0.5297477391718229, "grad_norm": 0.4640292525291443, "learning_rate": 9.822018700574696e-06, "loss": 0.4824, "step": 742 }, { "epoch": 0.5304616849119467, "grad_norm": 0.4039747714996338, "learning_rate": 9.820918167033295e-06, "loss": 0.4531, "step": 743 }, { "epoch": 0.5311756306520704, "grad_norm": 0.41849735379219055, "learning_rate": 9.819814303479268e-06, "loss": 0.4562, "step": 744 }, { "epoch": 0.5318895763921941, "grad_norm": 0.42816245555877686, "learning_rate": 9.818707110675099e-06, "loss": 0.4981, "step": 745 }, { "epoch": 0.532603522132318, "grad_norm": 0.46538016200065613, "learning_rate": 9.817596589385572e-06, "loss": 0.4984, "step": 746 }, { "epoch": 0.5333174678724417, "grad_norm": 0.43954789638519287, "learning_rate": 9.816482740377775e-06, "loss": 0.4541, "step": 747 }, { "epoch": 0.5340314136125655, "grad_norm": 0.4497023820877075, "learning_rate": 9.815365564421086e-06, "loss": 0.4669, "step": 748 }, { "epoch": 0.5347453593526892, "grad_norm": 0.5067876577377319, "learning_rate": 9.81424506228719e-06, "loss": 0.4665, "step": 749 }, { "epoch": 0.5354593050928129, "grad_norm": 0.43730294704437256, "learning_rate": 9.81312123475006e-06, "loss": 0.4775, "step": 750 }, { "epoch": 0.5361732508329367, "grad_norm": 0.40336930751800537, "learning_rate": 9.811994082585979e-06, "loss": 0.459, "step": 751 }, { "epoch": 0.5368871965730605, "grad_norm": 0.40494504570961, "learning_rate": 9.810863606573512e-06, "loss": 0.4507, "step": 752 }, { "epoch": 0.5376011423131842, "grad_norm": 0.4006732106208801, "learning_rate": 9.80972980749353e-06, "loss": 0.4515, "step": 753 }, { "epoch": 0.538315088053308, "grad_norm": 0.40761899948120117, "learning_rate": 9.808592686129196e-06, "loss": 0.4798, "step": 754 }, { "epoch": 0.5390290337934317, "grad_norm": 0.41306209564208984, "learning_rate": 9.807452243265968e-06, "loss": 0.4833, "step": 755 }, { "epoch": 0.5397429795335554, "grad_norm": 0.44104501605033875, "learning_rate": 9.806308479691595e-06, "loss": 0.468, "step": 756 }, { "epoch": 0.5404569252736792, "grad_norm": 0.39887815713882446, "learning_rate": 9.805161396196128e-06, "loss": 0.4481, "step": 757 }, { "epoch": 0.5411708710138029, "grad_norm": 0.41797375679016113, "learning_rate": 9.804010993571902e-06, "loss": 0.4598, "step": 758 }, { "epoch": 0.5418848167539267, "grad_norm": 0.44974246621131897, "learning_rate": 9.802857272613552e-06, "loss": 0.4375, "step": 759 }, { "epoch": 0.5425987624940505, "grad_norm": 0.43062451481819153, "learning_rate": 9.801700234118e-06, "loss": 0.4641, "step": 760 }, { "epoch": 0.5433127082341742, "grad_norm": 0.44102635979652405, "learning_rate": 9.800539878884463e-06, "loss": 0.4625, "step": 761 }, { "epoch": 0.544026653974298, "grad_norm": 0.4689774513244629, "learning_rate": 9.799376207714446e-06, "loss": 0.4652, "step": 762 }, { "epoch": 0.5447405997144217, "grad_norm": 0.41966477036476135, "learning_rate": 9.798209221411748e-06, "loss": 0.4447, "step": 763 }, { "epoch": 0.5454545454545454, "grad_norm": 0.4193533658981323, "learning_rate": 9.797038920782453e-06, "loss": 0.4448, "step": 764 }, { "epoch": 0.5461684911946693, "grad_norm": 0.42513543367385864, "learning_rate": 9.795865306634939e-06, "loss": 0.4766, "step": 765 }, { "epoch": 0.546882436934793, "grad_norm": 0.44730156660079956, "learning_rate": 9.794688379779873e-06, "loss": 0.4464, "step": 766 }, { "epoch": 0.5475963826749167, "grad_norm": 0.45415976643562317, "learning_rate": 9.793508141030205e-06, "loss": 0.4621, "step": 767 }, { "epoch": 0.5483103284150405, "grad_norm": 0.47266384959220886, "learning_rate": 9.792324591201179e-06, "loss": 0.4506, "step": 768 }, { "epoch": 0.5490242741551642, "grad_norm": 0.46025076508522034, "learning_rate": 9.79113773111032e-06, "loss": 0.4697, "step": 769 }, { "epoch": 0.5497382198952879, "grad_norm": 0.5645651817321777, "learning_rate": 9.789947561577445e-06, "loss": 0.4671, "step": 770 }, { "epoch": 0.5504521656354117, "grad_norm": 0.4486807584762573, "learning_rate": 9.788754083424654e-06, "loss": 0.4791, "step": 771 }, { "epoch": 0.5511661113755355, "grad_norm": 0.5816744565963745, "learning_rate": 9.787557297476331e-06, "loss": 0.4856, "step": 772 }, { "epoch": 0.5518800571156592, "grad_norm": 0.45033740997314453, "learning_rate": 9.786357204559149e-06, "loss": 0.4739, "step": 773 }, { "epoch": 0.552594002855783, "grad_norm": 0.4652721881866455, "learning_rate": 9.785153805502062e-06, "loss": 0.4752, "step": 774 }, { "epoch": 0.5533079485959067, "grad_norm": 0.41499409079551697, "learning_rate": 9.783947101136311e-06, "loss": 0.4604, "step": 775 }, { "epoch": 0.5540218943360304, "grad_norm": 0.4741017818450928, "learning_rate": 9.782737092295414e-06, "loss": 0.4376, "step": 776 }, { "epoch": 0.5547358400761542, "grad_norm": 0.418442040681839, "learning_rate": 9.781523779815178e-06, "loss": 0.4273, "step": 777 }, { "epoch": 0.5554497858162779, "grad_norm": 0.4226670265197754, "learning_rate": 9.78030716453369e-06, "loss": 0.4793, "step": 778 }, { "epoch": 0.5561637315564018, "grad_norm": 0.4366421699523926, "learning_rate": 9.779087247291315e-06, "loss": 0.4667, "step": 779 }, { "epoch": 0.5568776772965255, "grad_norm": 0.47962209582328796, "learning_rate": 9.777864028930705e-06, "loss": 0.4674, "step": 780 }, { "epoch": 0.5575916230366492, "grad_norm": 0.49572330713272095, "learning_rate": 9.776637510296786e-06, "loss": 0.4854, "step": 781 }, { "epoch": 0.558305568776773, "grad_norm": 0.44364601373672485, "learning_rate": 9.775407692236767e-06, "loss": 0.488, "step": 782 }, { "epoch": 0.5590195145168967, "grad_norm": 0.5210701823234558, "learning_rate": 9.774174575600137e-06, "loss": 0.4372, "step": 783 }, { "epoch": 0.5597334602570204, "grad_norm": 0.5055267810821533, "learning_rate": 9.77293816123866e-06, "loss": 0.487, "step": 784 }, { "epoch": 0.5604474059971443, "grad_norm": 0.4046049118041992, "learning_rate": 9.771698450006382e-06, "loss": 0.4848, "step": 785 }, { "epoch": 0.561161351737268, "grad_norm": 0.49240782856941223, "learning_rate": 9.770455442759622e-06, "loss": 0.4553, "step": 786 }, { "epoch": 0.5618752974773917, "grad_norm": 0.4387914538383484, "learning_rate": 9.769209140356979e-06, "loss": 0.4515, "step": 787 }, { "epoch": 0.5625892432175155, "grad_norm": 0.40791699290275574, "learning_rate": 9.767959543659327e-06, "loss": 0.4711, "step": 788 }, { "epoch": 0.5633031889576392, "grad_norm": 0.4557393193244934, "learning_rate": 9.766706653529814e-06, "loss": 0.4549, "step": 789 }, { "epoch": 0.5640171346977629, "grad_norm": 0.4543333351612091, "learning_rate": 9.765450470833867e-06, "loss": 0.4651, "step": 790 }, { "epoch": 0.5647310804378867, "grad_norm": 0.3865351676940918, "learning_rate": 9.764190996439181e-06, "loss": 0.4317, "step": 791 }, { "epoch": 0.5654450261780105, "grad_norm": 0.4277508556842804, "learning_rate": 9.762928231215731e-06, "loss": 0.4681, "step": 792 }, { "epoch": 0.5661589719181342, "grad_norm": 0.4409734606742859, "learning_rate": 9.761662176035764e-06, "loss": 0.4556, "step": 793 }, { "epoch": 0.566872917658258, "grad_norm": 0.4007022976875305, "learning_rate": 9.760392831773793e-06, "loss": 0.4707, "step": 794 }, { "epoch": 0.5675868633983817, "grad_norm": 0.4557911157608032, "learning_rate": 9.759120199306613e-06, "loss": 0.4797, "step": 795 }, { "epoch": 0.5683008091385054, "grad_norm": 0.4491014778614044, "learning_rate": 9.757844279513282e-06, "loss": 0.4548, "step": 796 }, { "epoch": 0.5690147548786292, "grad_norm": 0.43383780121803284, "learning_rate": 9.756565073275133e-06, "loss": 0.4663, "step": 797 }, { "epoch": 0.569728700618753, "grad_norm": 0.43138614296913147, "learning_rate": 9.755282581475769e-06, "loss": 0.474, "step": 798 }, { "epoch": 0.5704426463588768, "grad_norm": 0.4633713364601135, "learning_rate": 9.753996805001059e-06, "loss": 0.4456, "step": 799 }, { "epoch": 0.5711565920990005, "grad_norm": 0.42670825123786926, "learning_rate": 9.752707744739146e-06, "loss": 0.449, "step": 800 }, { "epoch": 0.5718705378391242, "grad_norm": 0.45984652638435364, "learning_rate": 9.751415401580437e-06, "loss": 0.4928, "step": 801 }, { "epoch": 0.572584483579248, "grad_norm": 0.44748353958129883, "learning_rate": 9.750119776417608e-06, "loss": 0.4546, "step": 802 }, { "epoch": 0.5732984293193717, "grad_norm": 0.47593948245048523, "learning_rate": 9.748820870145604e-06, "loss": 0.4684, "step": 803 }, { "epoch": 0.5740123750594954, "grad_norm": 0.3857801854610443, "learning_rate": 9.747518683661632e-06, "loss": 0.4692, "step": 804 }, { "epoch": 0.5747263207996193, "grad_norm": 0.498096764087677, "learning_rate": 9.74621321786517e-06, "loss": 0.4477, "step": 805 }, { "epoch": 0.575440266539743, "grad_norm": 0.44641008973121643, "learning_rate": 9.744904473657958e-06, "loss": 0.4568, "step": 806 }, { "epoch": 0.5761542122798667, "grad_norm": 0.5136968493461609, "learning_rate": 9.743592451944e-06, "loss": 0.4471, "step": 807 }, { "epoch": 0.5768681580199905, "grad_norm": 0.46371763944625854, "learning_rate": 9.742277153629564e-06, "loss": 0.4613, "step": 808 }, { "epoch": 0.5775821037601142, "grad_norm": 0.456162691116333, "learning_rate": 9.740958579623188e-06, "loss": 0.4555, "step": 809 }, { "epoch": 0.5782960495002379, "grad_norm": 0.596610426902771, "learning_rate": 9.73963673083566e-06, "loss": 0.4682, "step": 810 }, { "epoch": 0.5790099952403618, "grad_norm": 0.4046292304992676, "learning_rate": 9.73831160818004e-06, "loss": 0.4521, "step": 811 }, { "epoch": 0.5797239409804855, "grad_norm": 0.5033577084541321, "learning_rate": 9.736983212571646e-06, "loss": 0.4638, "step": 812 }, { "epoch": 0.5804378867206093, "grad_norm": 0.473416268825531, "learning_rate": 9.73565154492806e-06, "loss": 0.4591, "step": 813 }, { "epoch": 0.581151832460733, "grad_norm": 0.4630573093891144, "learning_rate": 9.734316606169118e-06, "loss": 0.434, "step": 814 }, { "epoch": 0.5818657782008567, "grad_norm": 0.42755213379859924, "learning_rate": 9.732978397216918e-06, "loss": 0.4742, "step": 815 }, { "epoch": 0.5825797239409805, "grad_norm": 0.4827682673931122, "learning_rate": 9.731636918995821e-06, "loss": 0.4528, "step": 816 }, { "epoch": 0.5832936696811042, "grad_norm": 0.4831705093383789, "learning_rate": 9.730292172432442e-06, "loss": 0.4537, "step": 817 }, { "epoch": 0.584007615421228, "grad_norm": 0.4030603766441345, "learning_rate": 9.728944158455653e-06, "loss": 0.4658, "step": 818 }, { "epoch": 0.5847215611613518, "grad_norm": 0.4521293342113495, "learning_rate": 9.727592877996585e-06, "loss": 0.4308, "step": 819 }, { "epoch": 0.5854355069014755, "grad_norm": 0.535734236240387, "learning_rate": 9.726238331988625e-06, "loss": 0.4722, "step": 820 }, { "epoch": 0.5861494526415992, "grad_norm": 0.4401475489139557, "learning_rate": 9.724880521367415e-06, "loss": 0.486, "step": 821 }, { "epoch": 0.586863398381723, "grad_norm": 0.48872891068458557, "learning_rate": 9.723519447070854e-06, "loss": 0.4385, "step": 822 }, { "epoch": 0.5875773441218467, "grad_norm": 0.431827574968338, "learning_rate": 9.72215511003909e-06, "loss": 0.446, "step": 823 }, { "epoch": 0.5882912898619705, "grad_norm": 0.5041089057922363, "learning_rate": 9.720787511214533e-06, "loss": 0.4585, "step": 824 }, { "epoch": 0.5890052356020943, "grad_norm": 0.45105883479118347, "learning_rate": 9.719416651541839e-06, "loss": 0.4446, "step": 825 }, { "epoch": 0.589719181342218, "grad_norm": 0.4259766638278961, "learning_rate": 9.718042531967918e-06, "loss": 0.4339, "step": 826 }, { "epoch": 0.5904331270823417, "grad_norm": 0.5587831139564514, "learning_rate": 9.716665153441935e-06, "loss": 0.4464, "step": 827 }, { "epoch": 0.5911470728224655, "grad_norm": 0.46677905321121216, "learning_rate": 9.715284516915303e-06, "loss": 0.4613, "step": 828 }, { "epoch": 0.5918610185625892, "grad_norm": 0.4979769289493561, "learning_rate": 9.713900623341685e-06, "loss": 0.4481, "step": 829 }, { "epoch": 0.592574964302713, "grad_norm": 0.40785491466522217, "learning_rate": 9.712513473676997e-06, "loss": 0.4411, "step": 830 }, { "epoch": 0.5932889100428368, "grad_norm": 0.4314916431903839, "learning_rate": 9.7111230688794e-06, "loss": 0.4542, "step": 831 }, { "epoch": 0.5940028557829605, "grad_norm": 0.3709169030189514, "learning_rate": 9.709729409909308e-06, "loss": 0.4431, "step": 832 }, { "epoch": 0.5947168015230843, "grad_norm": 0.4327278435230255, "learning_rate": 9.708332497729378e-06, "loss": 0.4726, "step": 833 }, { "epoch": 0.595430747263208, "grad_norm": 0.4315388798713684, "learning_rate": 9.706932333304518e-06, "loss": 0.438, "step": 834 }, { "epoch": 0.5961446930033317, "grad_norm": 0.4537888765335083, "learning_rate": 9.705528917601878e-06, "loss": 0.4771, "step": 835 }, { "epoch": 0.5968586387434555, "grad_norm": 0.4020180404186249, "learning_rate": 9.704122251590862e-06, "loss": 0.4543, "step": 836 }, { "epoch": 0.5975725844835792, "grad_norm": 0.48662450909614563, "learning_rate": 9.702712336243109e-06, "loss": 0.4378, "step": 837 }, { "epoch": 0.598286530223703, "grad_norm": 0.451673299074173, "learning_rate": 9.70129917253251e-06, "loss": 0.4761, "step": 838 }, { "epoch": 0.5990004759638268, "grad_norm": 0.4521813690662384, "learning_rate": 9.699882761435195e-06, "loss": 0.4465, "step": 839 }, { "epoch": 0.5997144217039505, "grad_norm": 0.46379783749580383, "learning_rate": 9.698463103929542e-06, "loss": 0.4814, "step": 840 }, { "epoch": 0.6004283674440742, "grad_norm": 0.39723727107048035, "learning_rate": 9.697040200996168e-06, "loss": 0.4566, "step": 841 }, { "epoch": 0.601142313184198, "grad_norm": 0.3505880832672119, "learning_rate": 9.69561405361793e-06, "loss": 0.4607, "step": 842 }, { "epoch": 0.6018562589243217, "grad_norm": 0.4222604036331177, "learning_rate": 9.694184662779931e-06, "loss": 0.4735, "step": 843 }, { "epoch": 0.6025702046644456, "grad_norm": 0.4059349298477173, "learning_rate": 9.692752029469511e-06, "loss": 0.4485, "step": 844 }, { "epoch": 0.6032841504045693, "grad_norm": 0.3809026777744293, "learning_rate": 9.691316154676251e-06, "loss": 0.4418, "step": 845 }, { "epoch": 0.603998096144693, "grad_norm": 0.413377970457077, "learning_rate": 9.68987703939197e-06, "loss": 0.4392, "step": 846 }, { "epoch": 0.6047120418848168, "grad_norm": 0.42644205689430237, "learning_rate": 9.688434684610725e-06, "loss": 0.482, "step": 847 }, { "epoch": 0.6054259876249405, "grad_norm": 0.3915427029132843, "learning_rate": 9.686989091328814e-06, "loss": 0.4668, "step": 848 }, { "epoch": 0.6061399333650642, "grad_norm": 0.3805599808692932, "learning_rate": 9.685540260544768e-06, "loss": 0.438, "step": 849 }, { "epoch": 0.606853879105188, "grad_norm": 0.4175950586795807, "learning_rate": 9.684088193259356e-06, "loss": 0.4381, "step": 850 }, { "epoch": 0.6075678248453118, "grad_norm": 0.41853561997413635, "learning_rate": 9.682632890475584e-06, "loss": 0.4615, "step": 851 }, { "epoch": 0.6082817705854355, "grad_norm": 0.41240060329437256, "learning_rate": 9.681174353198687e-06, "loss": 0.4718, "step": 852 }, { "epoch": 0.6089957163255593, "grad_norm": 0.3953721821308136, "learning_rate": 9.679712582436142e-06, "loss": 0.4737, "step": 853 }, { "epoch": 0.609709662065683, "grad_norm": 0.4398781657218933, "learning_rate": 9.678247579197658e-06, "loss": 0.465, "step": 854 }, { "epoch": 0.6104236078058067, "grad_norm": 0.45831871032714844, "learning_rate": 9.67677934449517e-06, "loss": 0.48, "step": 855 }, { "epoch": 0.6111375535459305, "grad_norm": 0.38881903886795044, "learning_rate": 9.675307879342854e-06, "loss": 0.449, "step": 856 }, { "epoch": 0.6118514992860543, "grad_norm": 0.5169574022293091, "learning_rate": 9.673833184757112e-06, "loss": 0.4799, "step": 857 }, { "epoch": 0.612565445026178, "grad_norm": 0.4126818776130676, "learning_rate": 9.672355261756578e-06, "loss": 0.4581, "step": 858 }, { "epoch": 0.6132793907663018, "grad_norm": 0.3549620807170868, "learning_rate": 9.670874111362117e-06, "loss": 0.4428, "step": 859 }, { "epoch": 0.6139933365064255, "grad_norm": 0.5122111439704895, "learning_rate": 9.669389734596819e-06, "loss": 0.4374, "step": 860 }, { "epoch": 0.6147072822465492, "grad_norm": 0.39036473631858826, "learning_rate": 9.667902132486009e-06, "loss": 0.4233, "step": 861 }, { "epoch": 0.615421227986673, "grad_norm": 0.4144284725189209, "learning_rate": 9.666411306057237e-06, "loss": 0.4349, "step": 862 }, { "epoch": 0.6161351737267967, "grad_norm": 0.4126204252243042, "learning_rate": 9.664917256340279e-06, "loss": 0.473, "step": 863 }, { "epoch": 0.6168491194669206, "grad_norm": 0.3953283727169037, "learning_rate": 9.663419984367139e-06, "loss": 0.4556, "step": 864 }, { "epoch": 0.6175630652070443, "grad_norm": 0.4195423126220703, "learning_rate": 9.661919491172046e-06, "loss": 0.4474, "step": 865 }, { "epoch": 0.618277010947168, "grad_norm": 0.4812825620174408, "learning_rate": 9.660415777791454e-06, "loss": 0.462, "step": 866 }, { "epoch": 0.6189909566872918, "grad_norm": 0.38025158643722534, "learning_rate": 9.658908845264043e-06, "loss": 0.467, "step": 867 }, { "epoch": 0.6197049024274155, "grad_norm": 0.4076271951198578, "learning_rate": 9.657398694630713e-06, "loss": 0.454, "step": 868 }, { "epoch": 0.6204188481675392, "grad_norm": 0.4284236431121826, "learning_rate": 9.65588532693459e-06, "loss": 0.4532, "step": 869 }, { "epoch": 0.6211327939076631, "grad_norm": 0.46698468923568726, "learning_rate": 9.654368743221022e-06, "loss": 0.4481, "step": 870 }, { "epoch": 0.6218467396477868, "grad_norm": 0.4649007320404053, "learning_rate": 9.652848944537578e-06, "loss": 0.465, "step": 871 }, { "epoch": 0.6225606853879105, "grad_norm": 0.44577133655548096, "learning_rate": 9.651325931934046e-06, "loss": 0.4642, "step": 872 }, { "epoch": 0.6232746311280343, "grad_norm": 0.4520270824432373, "learning_rate": 9.649799706462435e-06, "loss": 0.464, "step": 873 }, { "epoch": 0.623988576868158, "grad_norm": 0.4870014488697052, "learning_rate": 9.648270269176974e-06, "loss": 0.4528, "step": 874 }, { "epoch": 0.6247025226082817, "grad_norm": 0.46805259585380554, "learning_rate": 9.646737621134112e-06, "loss": 0.4736, "step": 875 }, { "epoch": 0.6254164683484055, "grad_norm": 0.4292861223220825, "learning_rate": 9.645201763392513e-06, "loss": 0.4576, "step": 876 }, { "epoch": 0.6261304140885293, "grad_norm": 0.4367702305316925, "learning_rate": 9.64366269701306e-06, "loss": 0.4703, "step": 877 }, { "epoch": 0.626844359828653, "grad_norm": 0.4553101360797882, "learning_rate": 9.642120423058849e-06, "loss": 0.4577, "step": 878 }, { "epoch": 0.6275583055687768, "grad_norm": 0.39043128490448, "learning_rate": 9.640574942595195e-06, "loss": 0.4357, "step": 879 }, { "epoch": 0.6282722513089005, "grad_norm": 0.43497684597969055, "learning_rate": 9.639026256689628e-06, "loss": 0.4672, "step": 880 }, { "epoch": 0.6289861970490243, "grad_norm": 0.4343476891517639, "learning_rate": 9.63747436641189e-06, "loss": 0.4419, "step": 881 }, { "epoch": 0.629700142789148, "grad_norm": 0.4394531548023224, "learning_rate": 9.635919272833938e-06, "loss": 0.4694, "step": 882 }, { "epoch": 0.6304140885292717, "grad_norm": 0.47809767723083496, "learning_rate": 9.634360977029939e-06, "loss": 0.4593, "step": 883 }, { "epoch": 0.6311280342693956, "grad_norm": 0.4669482707977295, "learning_rate": 9.632799480076278e-06, "loss": 0.4379, "step": 884 }, { "epoch": 0.6318419800095193, "grad_norm": 0.4133824110031128, "learning_rate": 9.631234783051544e-06, "loss": 0.4564, "step": 885 }, { "epoch": 0.632555925749643, "grad_norm": 0.43426448106765747, "learning_rate": 9.629666887036542e-06, "loss": 0.4494, "step": 886 }, { "epoch": 0.6332698714897668, "grad_norm": 0.4651135802268982, "learning_rate": 9.62809579311428e-06, "loss": 0.4724, "step": 887 }, { "epoch": 0.6339838172298905, "grad_norm": 0.46176186203956604, "learning_rate": 9.626521502369984e-06, "loss": 0.4579, "step": 888 }, { "epoch": 0.6346977629700142, "grad_norm": 0.4672599732875824, "learning_rate": 9.62494401589108e-06, "loss": 0.4751, "step": 889 }, { "epoch": 0.6354117087101381, "grad_norm": 0.37788236141204834, "learning_rate": 9.623363334767208e-06, "loss": 0.4348, "step": 890 }, { "epoch": 0.6361256544502618, "grad_norm": 0.38841888308525085, "learning_rate": 9.621779460090209e-06, "loss": 0.4125, "step": 891 }, { "epoch": 0.6368396001903855, "grad_norm": 0.4386557340621948, "learning_rate": 9.620192392954134e-06, "loss": 0.4589, "step": 892 }, { "epoch": 0.6375535459305093, "grad_norm": 0.37221458554267883, "learning_rate": 9.618602134455235e-06, "loss": 0.4366, "step": 893 }, { "epoch": 0.638267491670633, "grad_norm": 0.42739078402519226, "learning_rate": 9.617008685691973e-06, "loss": 0.4332, "step": 894 }, { "epoch": 0.6389814374107567, "grad_norm": 0.4186476171016693, "learning_rate": 9.61541204776501e-06, "loss": 0.4507, "step": 895 }, { "epoch": 0.6396953831508805, "grad_norm": 0.4407652020454407, "learning_rate": 9.613812221777212e-06, "loss": 0.4506, "step": 896 }, { "epoch": 0.6404093288910043, "grad_norm": 0.4661284387111664, "learning_rate": 9.612209208833648e-06, "loss": 0.4723, "step": 897 }, { "epoch": 0.6411232746311281, "grad_norm": 0.45641493797302246, "learning_rate": 9.610603010041583e-06, "loss": 0.4567, "step": 898 }, { "epoch": 0.6418372203712518, "grad_norm": 0.4218883514404297, "learning_rate": 9.608993626510491e-06, "loss": 0.4545, "step": 899 }, { "epoch": 0.6425511661113755, "grad_norm": 0.5096099376678467, "learning_rate": 9.60738105935204e-06, "loss": 0.4898, "step": 900 }, { "epoch": 0.6432651118514993, "grad_norm": 0.44276461005210876, "learning_rate": 9.605765309680097e-06, "loss": 0.4521, "step": 901 }, { "epoch": 0.643979057591623, "grad_norm": 0.4752160608768463, "learning_rate": 9.60414637861073e-06, "loss": 0.4776, "step": 902 }, { "epoch": 0.6446930033317468, "grad_norm": 0.4094425141811371, "learning_rate": 9.602524267262202e-06, "loss": 0.4656, "step": 903 }, { "epoch": 0.6454069490718706, "grad_norm": 0.44830507040023804, "learning_rate": 9.600898976754977e-06, "loss": 0.434, "step": 904 }, { "epoch": 0.6461208948119943, "grad_norm": 0.4234859347343445, "learning_rate": 9.59927050821171e-06, "loss": 0.4504, "step": 905 }, { "epoch": 0.646834840552118, "grad_norm": 0.43991902470588684, "learning_rate": 9.597638862757255e-06, "loss": 0.484, "step": 906 }, { "epoch": 0.6475487862922418, "grad_norm": 0.4644470512866974, "learning_rate": 9.596004041518657e-06, "loss": 0.4592, "step": 907 }, { "epoch": 0.6482627320323655, "grad_norm": 0.40318650007247925, "learning_rate": 9.594366045625155e-06, "loss": 0.4382, "step": 908 }, { "epoch": 0.6489766777724892, "grad_norm": 0.41802096366882324, "learning_rate": 9.592724876208183e-06, "loss": 0.4764, "step": 909 }, { "epoch": 0.6496906235126131, "grad_norm": 0.4562220573425293, "learning_rate": 9.591080534401371e-06, "loss": 0.4651, "step": 910 }, { "epoch": 0.6504045692527368, "grad_norm": 0.38075128197669983, "learning_rate": 9.589433021340531e-06, "loss": 0.4433, "step": 911 }, { "epoch": 0.6511185149928606, "grad_norm": 0.3936757743358612, "learning_rate": 9.58778233816367e-06, "loss": 0.4573, "step": 912 }, { "epoch": 0.6518324607329843, "grad_norm": 0.4855842888355255, "learning_rate": 9.586128486010986e-06, "loss": 0.4728, "step": 913 }, { "epoch": 0.652546406473108, "grad_norm": 0.38145995140075684, "learning_rate": 9.584471466024865e-06, "loss": 0.4533, "step": 914 }, { "epoch": 0.6532603522132318, "grad_norm": 0.4232887625694275, "learning_rate": 9.582811279349881e-06, "loss": 0.4731, "step": 915 }, { "epoch": 0.6539742979533556, "grad_norm": 0.4045996069908142, "learning_rate": 9.581147927132797e-06, "loss": 0.4667, "step": 916 }, { "epoch": 0.6546882436934793, "grad_norm": 0.4121949076652527, "learning_rate": 9.579481410522556e-06, "loss": 0.4598, "step": 917 }, { "epoch": 0.6554021894336031, "grad_norm": 0.3911149203777313, "learning_rate": 9.577811730670297e-06, "loss": 0.4552, "step": 918 }, { "epoch": 0.6561161351737268, "grad_norm": 0.3826931416988373, "learning_rate": 9.576138888729334e-06, "loss": 0.4409, "step": 919 }, { "epoch": 0.6568300809138505, "grad_norm": 0.45705002546310425, "learning_rate": 9.574462885855173e-06, "loss": 0.4618, "step": 920 }, { "epoch": 0.6575440266539743, "grad_norm": 0.4200792908668518, "learning_rate": 9.572783723205502e-06, "loss": 0.444, "step": 921 }, { "epoch": 0.658257972394098, "grad_norm": 0.46073973178863525, "learning_rate": 9.571101401940186e-06, "loss": 0.4478, "step": 922 }, { "epoch": 0.6589719181342218, "grad_norm": 0.45487338304519653, "learning_rate": 9.569415923221275e-06, "loss": 0.4596, "step": 923 }, { "epoch": 0.6596858638743456, "grad_norm": 0.38112837076187134, "learning_rate": 9.567727288213005e-06, "loss": 0.4659, "step": 924 }, { "epoch": 0.6603998096144693, "grad_norm": 0.48497188091278076, "learning_rate": 9.566035498081785e-06, "loss": 0.4663, "step": 925 }, { "epoch": 0.661113755354593, "grad_norm": 0.40633150935173035, "learning_rate": 9.564340553996207e-06, "loss": 0.4492, "step": 926 }, { "epoch": 0.6618277010947168, "grad_norm": 0.388465017080307, "learning_rate": 9.56264245712704e-06, "loss": 0.4639, "step": 927 }, { "epoch": 0.6625416468348405, "grad_norm": 0.47613468766212463, "learning_rate": 9.560941208647231e-06, "loss": 0.4569, "step": 928 }, { "epoch": 0.6632555925749642, "grad_norm": 0.41201379895210266, "learning_rate": 9.559236809731907e-06, "loss": 0.4842, "step": 929 }, { "epoch": 0.6639695383150881, "grad_norm": 0.4550820291042328, "learning_rate": 9.557529261558367e-06, "loss": 0.4664, "step": 930 }, { "epoch": 0.6646834840552118, "grad_norm": 0.43166831135749817, "learning_rate": 9.555818565306086e-06, "loss": 0.444, "step": 931 }, { "epoch": 0.6653974297953356, "grad_norm": 0.46192118525505066, "learning_rate": 9.554104722156716e-06, "loss": 0.4592, "step": 932 }, { "epoch": 0.6661113755354593, "grad_norm": 0.5062450170516968, "learning_rate": 9.552387733294081e-06, "loss": 0.4564, "step": 933 }, { "epoch": 0.666825321275583, "grad_norm": 0.44456174969673157, "learning_rate": 9.550667599904176e-06, "loss": 0.4627, "step": 934 }, { "epoch": 0.6675392670157068, "grad_norm": 0.47239744663238525, "learning_rate": 9.548944323175173e-06, "loss": 0.4447, "step": 935 }, { "epoch": 0.6682532127558306, "grad_norm": 0.3833394944667816, "learning_rate": 9.547217904297411e-06, "loss": 0.4097, "step": 936 }, { "epoch": 0.6689671584959543, "grad_norm": 0.46193554997444153, "learning_rate": 9.545488344463401e-06, "loss": 0.4732, "step": 937 }, { "epoch": 0.6696811042360781, "grad_norm": 0.41425415873527527, "learning_rate": 9.543755644867823e-06, "loss": 0.456, "step": 938 }, { "epoch": 0.6703950499762018, "grad_norm": 0.43259185552597046, "learning_rate": 9.542019806707526e-06, "loss": 0.4404, "step": 939 }, { "epoch": 0.6711089957163255, "grad_norm": 0.4509962201118469, "learning_rate": 9.540280831181525e-06, "loss": 0.4472, "step": 940 }, { "epoch": 0.6718229414564493, "grad_norm": 0.40036627650260925, "learning_rate": 9.538538719491008e-06, "loss": 0.4653, "step": 941 }, { "epoch": 0.672536887196573, "grad_norm": 0.4233179986476898, "learning_rate": 9.536793472839325e-06, "loss": 0.4559, "step": 942 }, { "epoch": 0.6732508329366969, "grad_norm": 0.4642760157585144, "learning_rate": 9.53504509243199e-06, "loss": 0.4762, "step": 943 }, { "epoch": 0.6739647786768206, "grad_norm": 0.43304044008255005, "learning_rate": 9.533293579476684e-06, "loss": 0.4572, "step": 944 }, { "epoch": 0.6746787244169443, "grad_norm": 0.4402642250061035, "learning_rate": 9.531538935183252e-06, "loss": 0.4573, "step": 945 }, { "epoch": 0.675392670157068, "grad_norm": 0.4110449254512787, "learning_rate": 9.529781160763699e-06, "loss": 0.4667, "step": 946 }, { "epoch": 0.6761066158971918, "grad_norm": 0.4298001229763031, "learning_rate": 9.528020257432195e-06, "loss": 0.4624, "step": 947 }, { "epoch": 0.6768205616373155, "grad_norm": 0.43860822916030884, "learning_rate": 9.526256226405075e-06, "loss": 0.4726, "step": 948 }, { "epoch": 0.6775345073774394, "grad_norm": 0.4842575490474701, "learning_rate": 9.524489068900824e-06, "loss": 0.4611, "step": 949 }, { "epoch": 0.6782484531175631, "grad_norm": 0.4543883502483368, "learning_rate": 9.522718786140096e-06, "loss": 0.4831, "step": 950 }, { "epoch": 0.6789623988576868, "grad_norm": 0.4147496819496155, "learning_rate": 9.520945379345701e-06, "loss": 0.4659, "step": 951 }, { "epoch": 0.6796763445978106, "grad_norm": 0.39796245098114014, "learning_rate": 9.519168849742603e-06, "loss": 0.4554, "step": 952 }, { "epoch": 0.6803902903379343, "grad_norm": 0.38497060537338257, "learning_rate": 9.51738919855793e-06, "loss": 0.4692, "step": 953 }, { "epoch": 0.681104236078058, "grad_norm": 0.401654452085495, "learning_rate": 9.51560642702096e-06, "loss": 0.4469, "step": 954 }, { "epoch": 0.6818181818181818, "grad_norm": 0.38676926493644714, "learning_rate": 9.51382053636313e-06, "loss": 0.4581, "step": 955 }, { "epoch": 0.6825321275583056, "grad_norm": 0.3948328495025635, "learning_rate": 9.512031527818028e-06, "loss": 0.4261, "step": 956 }, { "epoch": 0.6832460732984293, "grad_norm": 0.39151304960250854, "learning_rate": 9.510239402621402e-06, "loss": 0.4917, "step": 957 }, { "epoch": 0.6839600190385531, "grad_norm": 0.4553239941596985, "learning_rate": 9.508444162011147e-06, "loss": 0.4287, "step": 958 }, { "epoch": 0.6846739647786768, "grad_norm": 0.42719265818595886, "learning_rate": 9.506645807227311e-06, "loss": 0.4641, "step": 959 }, { "epoch": 0.6853879105188005, "grad_norm": 0.40899378061294556, "learning_rate": 9.504844339512096e-06, "loss": 0.4487, "step": 960 }, { "epoch": 0.6861018562589243, "grad_norm": 0.45471325516700745, "learning_rate": 9.50303976010985e-06, "loss": 0.4886, "step": 961 }, { "epoch": 0.6868158019990481, "grad_norm": 0.3970225155353546, "learning_rate": 9.501232070267074e-06, "loss": 0.431, "step": 962 }, { "epoch": 0.6875297477391719, "grad_norm": 0.4096478223800659, "learning_rate": 9.499421271232416e-06, "loss": 0.4865, "step": 963 }, { "epoch": 0.6882436934792956, "grad_norm": 0.39732542634010315, "learning_rate": 9.49760736425667e-06, "loss": 0.4492, "step": 964 }, { "epoch": 0.6889576392194193, "grad_norm": 0.43579593300819397, "learning_rate": 9.495790350592782e-06, "loss": 0.4458, "step": 965 }, { "epoch": 0.6896715849595431, "grad_norm": 0.3609623908996582, "learning_rate": 9.493970231495836e-06, "loss": 0.4364, "step": 966 }, { "epoch": 0.6903855306996668, "grad_norm": 0.41585561633110046, "learning_rate": 9.492147008223067e-06, "loss": 0.4463, "step": 967 }, { "epoch": 0.6910994764397905, "grad_norm": 0.34034019708633423, "learning_rate": 9.490320682033854e-06, "loss": 0.4434, "step": 968 }, { "epoch": 0.6918134221799144, "grad_norm": 0.42653706669807434, "learning_rate": 9.488491254189718e-06, "loss": 0.4601, "step": 969 }, { "epoch": 0.6925273679200381, "grad_norm": 0.3717711567878723, "learning_rate": 9.486658725954321e-06, "loss": 0.443, "step": 970 }, { "epoch": 0.6932413136601618, "grad_norm": 0.4123340845108032, "learning_rate": 9.484823098593467e-06, "loss": 0.4682, "step": 971 }, { "epoch": 0.6939552594002856, "grad_norm": 0.4810003638267517, "learning_rate": 9.482984373375105e-06, "loss": 0.4466, "step": 972 }, { "epoch": 0.6946692051404093, "grad_norm": 0.3973073959350586, "learning_rate": 9.481142551569318e-06, "loss": 0.4563, "step": 973 }, { "epoch": 0.695383150880533, "grad_norm": 0.4730876088142395, "learning_rate": 9.479297634448331e-06, "loss": 0.4923, "step": 974 }, { "epoch": 0.6960970966206569, "grad_norm": 0.42745110392570496, "learning_rate": 9.477449623286505e-06, "loss": 0.4847, "step": 975 }, { "epoch": 0.6968110423607806, "grad_norm": 0.39569368958473206, "learning_rate": 9.475598519360345e-06, "loss": 0.4554, "step": 976 }, { "epoch": 0.6975249881009044, "grad_norm": 0.45202112197875977, "learning_rate": 9.47374432394848e-06, "loss": 0.4528, "step": 977 }, { "epoch": 0.6982389338410281, "grad_norm": 0.3970228433609009, "learning_rate": 9.471887038331686e-06, "loss": 0.442, "step": 978 }, { "epoch": 0.6989528795811518, "grad_norm": 0.4375821053981781, "learning_rate": 9.470026663792867e-06, "loss": 0.4448, "step": 979 }, { "epoch": 0.6996668253212756, "grad_norm": 0.42357251048088074, "learning_rate": 9.468163201617063e-06, "loss": 0.4447, "step": 980 }, { "epoch": 0.7003807710613993, "grad_norm": 0.4188991189002991, "learning_rate": 9.466296653091446e-06, "loss": 0.4665, "step": 981 }, { "epoch": 0.7010947168015231, "grad_norm": 0.40749698877334595, "learning_rate": 9.464427019505321e-06, "loss": 0.4744, "step": 982 }, { "epoch": 0.7018086625416469, "grad_norm": 0.47900545597076416, "learning_rate": 9.462554302150122e-06, "loss": 0.4145, "step": 983 }, { "epoch": 0.7025226082817706, "grad_norm": 0.42986226081848145, "learning_rate": 9.460678502319419e-06, "loss": 0.4618, "step": 984 }, { "epoch": 0.7032365540218943, "grad_norm": 0.3790907561779022, "learning_rate": 9.458799621308898e-06, "loss": 0.4712, "step": 985 }, { "epoch": 0.7039504997620181, "grad_norm": 0.42422616481781006, "learning_rate": 9.456917660416389e-06, "loss": 0.4583, "step": 986 }, { "epoch": 0.7046644455021418, "grad_norm": 0.44558125734329224, "learning_rate": 9.45503262094184e-06, "loss": 0.491, "step": 987 }, { "epoch": 0.7053783912422655, "grad_norm": 0.39622268080711365, "learning_rate": 9.453144504187327e-06, "loss": 0.4493, "step": 988 }, { "epoch": 0.7060923369823894, "grad_norm": 0.40596848726272583, "learning_rate": 9.451253311457052e-06, "loss": 0.4596, "step": 989 }, { "epoch": 0.7068062827225131, "grad_norm": 0.40984272956848145, "learning_rate": 9.449359044057344e-06, "loss": 0.431, "step": 990 }, { "epoch": 0.7075202284626368, "grad_norm": 0.3763715922832489, "learning_rate": 9.447461703296652e-06, "loss": 0.4289, "step": 991 }, { "epoch": 0.7082341742027606, "grad_norm": 0.42052599787712097, "learning_rate": 9.44556129048555e-06, "loss": 0.469, "step": 992 }, { "epoch": 0.7089481199428843, "grad_norm": 0.40017423033714294, "learning_rate": 9.443657806936735e-06, "loss": 0.4359, "step": 993 }, { "epoch": 0.709662065683008, "grad_norm": 0.4136017858982086, "learning_rate": 9.441751253965022e-06, "loss": 0.4572, "step": 994 }, { "epoch": 0.7103760114231319, "grad_norm": 0.4294152557849884, "learning_rate": 9.439841632887348e-06, "loss": 0.4513, "step": 995 }, { "epoch": 0.7110899571632556, "grad_norm": 0.4472765624523163, "learning_rate": 9.437928945022772e-06, "loss": 0.4625, "step": 996 }, { "epoch": 0.7118039029033794, "grad_norm": 0.4365465044975281, "learning_rate": 9.436013191692465e-06, "loss": 0.4165, "step": 997 }, { "epoch": 0.7125178486435031, "grad_norm": 0.4257107079029083, "learning_rate": 9.434094374219722e-06, "loss": 0.479, "step": 998 }, { "epoch": 0.7132317943836268, "grad_norm": 0.47689110040664673, "learning_rate": 9.432172493929949e-06, "loss": 0.4505, "step": 999 }, { "epoch": 0.7139457401237506, "grad_norm": 0.4511907696723938, "learning_rate": 9.430247552150673e-06, "loss": 0.4315, "step": 1000 }, { "epoch": 0.7146596858638743, "grad_norm": 0.4138486087322235, "learning_rate": 9.428319550211531e-06, "loss": 0.442, "step": 1001 }, { "epoch": 0.7153736316039981, "grad_norm": 0.5057360529899597, "learning_rate": 9.426388489444276e-06, "loss": 0.4672, "step": 1002 }, { "epoch": 0.7160875773441219, "grad_norm": 0.39573320746421814, "learning_rate": 9.424454371182774e-06, "loss": 0.4326, "step": 1003 }, { "epoch": 0.7168015230842456, "grad_norm": 0.44264817237854004, "learning_rate": 9.422517196763002e-06, "loss": 0.4489, "step": 1004 }, { "epoch": 0.7175154688243693, "grad_norm": 0.4479890465736389, "learning_rate": 9.420576967523049e-06, "loss": 0.4146, "step": 1005 }, { "epoch": 0.7182294145644931, "grad_norm": 0.44956323504447937, "learning_rate": 9.418633684803114e-06, "loss": 0.4258, "step": 1006 }, { "epoch": 0.7189433603046168, "grad_norm": 0.4050552248954773, "learning_rate": 9.416687349945504e-06, "loss": 0.423, "step": 1007 }, { "epoch": 0.7196573060447407, "grad_norm": 0.4073205888271332, "learning_rate": 9.414737964294636e-06, "loss": 0.4615, "step": 1008 }, { "epoch": 0.7203712517848644, "grad_norm": 0.4946090579032898, "learning_rate": 9.41278552919703e-06, "loss": 0.4385, "step": 1009 }, { "epoch": 0.7210851975249881, "grad_norm": 0.4424374997615814, "learning_rate": 9.410830046001321e-06, "loss": 0.452, "step": 1010 }, { "epoch": 0.7217991432651119, "grad_norm": 0.43435031175613403, "learning_rate": 9.408871516058241e-06, "loss": 0.4306, "step": 1011 }, { "epoch": 0.7225130890052356, "grad_norm": 0.44740062952041626, "learning_rate": 9.40690994072063e-06, "loss": 0.4497, "step": 1012 }, { "epoch": 0.7232270347453593, "grad_norm": 0.4270225465297699, "learning_rate": 9.404945321343431e-06, "loss": 0.4482, "step": 1013 }, { "epoch": 0.723940980485483, "grad_norm": 0.4370426833629608, "learning_rate": 9.40297765928369e-06, "loss": 0.4414, "step": 1014 }, { "epoch": 0.7246549262256069, "grad_norm": 0.45443597435951233, "learning_rate": 9.401006955900555e-06, "loss": 0.4643, "step": 1015 }, { "epoch": 0.7253688719657306, "grad_norm": 0.4412541687488556, "learning_rate": 9.399033212555276e-06, "loss": 0.4348, "step": 1016 }, { "epoch": 0.7260828177058544, "grad_norm": 0.47233814001083374, "learning_rate": 9.3970564306112e-06, "loss": 0.4477, "step": 1017 }, { "epoch": 0.7267967634459781, "grad_norm": 0.39398062229156494, "learning_rate": 9.39507661143377e-06, "loss": 0.4238, "step": 1018 }, { "epoch": 0.7275107091861018, "grad_norm": 0.4450756013393402, "learning_rate": 9.39309375639054e-06, "loss": 0.4784, "step": 1019 }, { "epoch": 0.7282246549262256, "grad_norm": 0.41000860929489136, "learning_rate": 9.391107866851143e-06, "loss": 0.4442, "step": 1020 }, { "epoch": 0.7289386006663494, "grad_norm": 0.38451582193374634, "learning_rate": 9.389118944187324e-06, "loss": 0.4506, "step": 1021 }, { "epoch": 0.7296525464064731, "grad_norm": 0.4028826057910919, "learning_rate": 9.38712698977291e-06, "loss": 0.4375, "step": 1022 }, { "epoch": 0.7303664921465969, "grad_norm": 0.42325252294540405, "learning_rate": 9.385132004983834e-06, "loss": 0.4381, "step": 1023 }, { "epoch": 0.7310804378867206, "grad_norm": 0.45173129439353943, "learning_rate": 9.383133991198113e-06, "loss": 0.4492, "step": 1024 }, { "epoch": 0.7317943836268443, "grad_norm": 0.4049539566040039, "learning_rate": 9.381132949795862e-06, "loss": 0.4618, "step": 1025 }, { "epoch": 0.7325083293669681, "grad_norm": 0.40364423394203186, "learning_rate": 9.379128882159283e-06, "loss": 0.4332, "step": 1026 }, { "epoch": 0.7332222751070918, "grad_norm": 0.40010765194892883, "learning_rate": 9.377121789672673e-06, "loss": 0.4407, "step": 1027 }, { "epoch": 0.7339362208472157, "grad_norm": 0.4244546890258789, "learning_rate": 9.375111673722415e-06, "loss": 0.427, "step": 1028 }, { "epoch": 0.7346501665873394, "grad_norm": 0.349141389131546, "learning_rate": 9.37309853569698e-06, "loss": 0.4416, "step": 1029 }, { "epoch": 0.7353641123274631, "grad_norm": 0.4288831055164337, "learning_rate": 9.37108237698693e-06, "loss": 0.4508, "step": 1030 }, { "epoch": 0.7360780580675869, "grad_norm": 0.35316914319992065, "learning_rate": 9.369063198984909e-06, "loss": 0.4277, "step": 1031 }, { "epoch": 0.7367920038077106, "grad_norm": 0.4101913571357727, "learning_rate": 9.36704100308565e-06, "loss": 0.4538, "step": 1032 }, { "epoch": 0.7375059495478343, "grad_norm": 0.3987557590007782, "learning_rate": 9.36501579068597e-06, "loss": 0.4403, "step": 1033 }, { "epoch": 0.7382198952879581, "grad_norm": 0.39294275641441345, "learning_rate": 9.362987563184767e-06, "loss": 0.472, "step": 1034 }, { "epoch": 0.7389338410280819, "grad_norm": 0.41065865755081177, "learning_rate": 9.360956321983028e-06, "loss": 0.4676, "step": 1035 }, { "epoch": 0.7396477867682056, "grad_norm": 0.4457060694694519, "learning_rate": 9.358922068483813e-06, "loss": 0.4629, "step": 1036 }, { "epoch": 0.7403617325083294, "grad_norm": 0.389983206987381, "learning_rate": 9.35688480409227e-06, "loss": 0.4795, "step": 1037 }, { "epoch": 0.7410756782484531, "grad_norm": 0.42484769225120544, "learning_rate": 9.354844530215621e-06, "loss": 0.4553, "step": 1038 }, { "epoch": 0.7417896239885768, "grad_norm": 0.4432097375392914, "learning_rate": 9.352801248263172e-06, "loss": 0.4608, "step": 1039 }, { "epoch": 0.7425035697287006, "grad_norm": 0.4870185852050781, "learning_rate": 9.350754959646306e-06, "loss": 0.4614, "step": 1040 }, { "epoch": 0.7432175154688244, "grad_norm": 0.38214534521102905, "learning_rate": 9.348705665778479e-06, "loss": 0.4546, "step": 1041 }, { "epoch": 0.7439314612089482, "grad_norm": 0.3865267038345337, "learning_rate": 9.346653368075223e-06, "loss": 0.4362, "step": 1042 }, { "epoch": 0.7446454069490719, "grad_norm": 0.39638006687164307, "learning_rate": 9.344598067954151e-06, "loss": 0.4522, "step": 1043 }, { "epoch": 0.7453593526891956, "grad_norm": 0.36356687545776367, "learning_rate": 9.342539766834945e-06, "loss": 0.4244, "step": 1044 }, { "epoch": 0.7460732984293194, "grad_norm": 0.3600682020187378, "learning_rate": 9.34047846613936e-06, "loss": 0.4471, "step": 1045 }, { "epoch": 0.7467872441694431, "grad_norm": 0.36762747168540955, "learning_rate": 9.338414167291225e-06, "loss": 0.4462, "step": 1046 }, { "epoch": 0.7475011899095668, "grad_norm": 0.3906828463077545, "learning_rate": 9.336346871716438e-06, "loss": 0.4198, "step": 1047 }, { "epoch": 0.7482151356496907, "grad_norm": 0.3782983720302582, "learning_rate": 9.334276580842966e-06, "loss": 0.4774, "step": 1048 }, { "epoch": 0.7489290813898144, "grad_norm": 0.4157158136367798, "learning_rate": 9.33220329610085e-06, "loss": 0.4396, "step": 1049 }, { "epoch": 0.7496430271299381, "grad_norm": 0.3996657431125641, "learning_rate": 9.330127018922195e-06, "loss": 0.4606, "step": 1050 }, { "epoch": 0.7503569728700619, "grad_norm": 0.37893790006637573, "learning_rate": 9.328047750741171e-06, "loss": 0.4509, "step": 1051 }, { "epoch": 0.7510709186101856, "grad_norm": 0.4385627806186676, "learning_rate": 9.325965492994018e-06, "loss": 0.4568, "step": 1052 }, { "epoch": 0.7517848643503093, "grad_norm": 0.3845553696155548, "learning_rate": 9.323880247119041e-06, "loss": 0.4605, "step": 1053 }, { "epoch": 0.7524988100904332, "grad_norm": 0.38276010751724243, "learning_rate": 9.321792014556608e-06, "loss": 0.433, "step": 1054 }, { "epoch": 0.7532127558305569, "grad_norm": 0.36342307925224304, "learning_rate": 9.31970079674915e-06, "loss": 0.4406, "step": 1055 }, { "epoch": 0.7539267015706806, "grad_norm": 0.36015015840530396, "learning_rate": 9.317606595141156e-06, "loss": 0.4449, "step": 1056 }, { "epoch": 0.7546406473108044, "grad_norm": 0.4197927713394165, "learning_rate": 9.315509411179182e-06, "loss": 0.4598, "step": 1057 }, { "epoch": 0.7553545930509281, "grad_norm": 0.36679941415786743, "learning_rate": 9.313409246311844e-06, "loss": 0.4717, "step": 1058 }, { "epoch": 0.7560685387910518, "grad_norm": 0.3999110162258148, "learning_rate": 9.311306101989814e-06, "loss": 0.4588, "step": 1059 }, { "epoch": 0.7567824845311756, "grad_norm": 0.418150931596756, "learning_rate": 9.309199979665821e-06, "loss": 0.4451, "step": 1060 }, { "epoch": 0.7574964302712994, "grad_norm": 0.38037365674972534, "learning_rate": 9.307090880794654e-06, "loss": 0.446, "step": 1061 }, { "epoch": 0.7582103760114232, "grad_norm": 0.37348586320877075, "learning_rate": 9.304978806833158e-06, "loss": 0.4598, "step": 1062 }, { "epoch": 0.7589243217515469, "grad_norm": 0.3958044648170471, "learning_rate": 9.302863759240231e-06, "loss": 0.4636, "step": 1063 }, { "epoch": 0.7596382674916706, "grad_norm": 0.43031439185142517, "learning_rate": 9.30074573947683e-06, "loss": 0.4353, "step": 1064 }, { "epoch": 0.7603522132317944, "grad_norm": 0.4074847400188446, "learning_rate": 9.298624749005953e-06, "loss": 0.4412, "step": 1065 }, { "epoch": 0.7610661589719181, "grad_norm": 0.42829662561416626, "learning_rate": 9.296500789292663e-06, "loss": 0.448, "step": 1066 }, { "epoch": 0.7617801047120419, "grad_norm": 0.4095578193664551, "learning_rate": 9.29437386180407e-06, "loss": 0.481, "step": 1067 }, { "epoch": 0.7624940504521657, "grad_norm": 0.41284245252609253, "learning_rate": 9.292243968009332e-06, "loss": 0.4389, "step": 1068 }, { "epoch": 0.7632079961922894, "grad_norm": 0.3740554451942444, "learning_rate": 9.290111109379656e-06, "loss": 0.4504, "step": 1069 }, { "epoch": 0.7639219419324131, "grad_norm": 0.4073421359062195, "learning_rate": 9.287975287388297e-06, "loss": 0.4458, "step": 1070 }, { "epoch": 0.7646358876725369, "grad_norm": 0.3934573531150818, "learning_rate": 9.285836503510562e-06, "loss": 0.4773, "step": 1071 }, { "epoch": 0.7653498334126606, "grad_norm": 0.4184890389442444, "learning_rate": 9.283694759223795e-06, "loss": 0.4745, "step": 1072 }, { "epoch": 0.7660637791527843, "grad_norm": 0.4062485098838806, "learning_rate": 9.281550056007395e-06, "loss": 0.4418, "step": 1073 }, { "epoch": 0.7667777248929082, "grad_norm": 0.3993505537509918, "learning_rate": 9.279402395342794e-06, "loss": 0.4546, "step": 1074 }, { "epoch": 0.7674916706330319, "grad_norm": 0.4744284749031067, "learning_rate": 9.277251778713475e-06, "loss": 0.4483, "step": 1075 }, { "epoch": 0.7682056163731557, "grad_norm": 0.6329030394554138, "learning_rate": 9.275098207604958e-06, "loss": 0.4423, "step": 1076 }, { "epoch": 0.7689195621132794, "grad_norm": 0.461624413728714, "learning_rate": 9.27294168350481e-06, "loss": 0.4544, "step": 1077 }, { "epoch": 0.7696335078534031, "grad_norm": 0.4024062156677246, "learning_rate": 9.27078220790263e-06, "loss": 0.4417, "step": 1078 }, { "epoch": 0.7703474535935269, "grad_norm": 0.40257468819618225, "learning_rate": 9.268619782290058e-06, "loss": 0.461, "step": 1079 }, { "epoch": 0.7710613993336506, "grad_norm": 0.4006763994693756, "learning_rate": 9.266454408160779e-06, "loss": 0.4367, "step": 1080 }, { "epoch": 0.7717753450737744, "grad_norm": 0.4012266993522644, "learning_rate": 9.264286087010504e-06, "loss": 0.4523, "step": 1081 }, { "epoch": 0.7724892908138982, "grad_norm": 0.39539411664009094, "learning_rate": 9.262114820336987e-06, "loss": 0.451, "step": 1082 }, { "epoch": 0.7732032365540219, "grad_norm": 0.3492133617401123, "learning_rate": 9.25994060964001e-06, "loss": 0.453, "step": 1083 }, { "epoch": 0.7739171822941456, "grad_norm": 0.4016281068325043, "learning_rate": 9.257763456421398e-06, "loss": 0.4703, "step": 1084 }, { "epoch": 0.7746311280342694, "grad_norm": 0.3659343421459198, "learning_rate": 9.255583362184998e-06, "loss": 0.4595, "step": 1085 }, { "epoch": 0.7753450737743931, "grad_norm": 0.4803147315979004, "learning_rate": 9.253400328436699e-06, "loss": 0.4549, "step": 1086 }, { "epoch": 0.776059019514517, "grad_norm": 0.39474865794181824, "learning_rate": 9.251214356684409e-06, "loss": 0.4466, "step": 1087 }, { "epoch": 0.7767729652546407, "grad_norm": 0.4754883050918579, "learning_rate": 9.249025448438076e-06, "loss": 0.4384, "step": 1088 }, { "epoch": 0.7774869109947644, "grad_norm": 0.4194841980934143, "learning_rate": 9.246833605209669e-06, "loss": 0.4519, "step": 1089 }, { "epoch": 0.7782008567348881, "grad_norm": 0.3891260623931885, "learning_rate": 9.244638828513189e-06, "loss": 0.4355, "step": 1090 }, { "epoch": 0.7789148024750119, "grad_norm": 0.48531225323677063, "learning_rate": 9.242441119864658e-06, "loss": 0.4505, "step": 1091 }, { "epoch": 0.7796287482151356, "grad_norm": 0.4491783082485199, "learning_rate": 9.24024048078213e-06, "loss": 0.4467, "step": 1092 }, { "epoch": 0.7803426939552593, "grad_norm": 0.43060213327407837, "learning_rate": 9.23803691278568e-06, "loss": 0.4687, "step": 1093 }, { "epoch": 0.7810566396953832, "grad_norm": 0.46565043926239014, "learning_rate": 9.235830417397404e-06, "loss": 0.4411, "step": 1094 }, { "epoch": 0.7817705854355069, "grad_norm": 0.47758641839027405, "learning_rate": 9.233620996141421e-06, "loss": 0.4402, "step": 1095 }, { "epoch": 0.7824845311756307, "grad_norm": 0.44061461091041565, "learning_rate": 9.231408650543875e-06, "loss": 0.4504, "step": 1096 }, { "epoch": 0.7831984769157544, "grad_norm": 0.431016206741333, "learning_rate": 9.229193382132926e-06, "loss": 0.426, "step": 1097 }, { "epoch": 0.7839124226558781, "grad_norm": 0.385990709066391, "learning_rate": 9.226975192438752e-06, "loss": 0.4221, "step": 1098 }, { "epoch": 0.7846263683960019, "grad_norm": 0.4171094000339508, "learning_rate": 9.224754082993553e-06, "loss": 0.44, "step": 1099 }, { "epoch": 0.7853403141361257, "grad_norm": 0.3991812765598297, "learning_rate": 9.22253005533154e-06, "loss": 0.4635, "step": 1100 }, { "epoch": 0.7860542598762494, "grad_norm": 0.4003165364265442, "learning_rate": 9.220303110988947e-06, "loss": 0.43, "step": 1101 }, { "epoch": 0.7867682056163732, "grad_norm": 0.4350619316101074, "learning_rate": 9.218073251504018e-06, "loss": 0.4448, "step": 1102 }, { "epoch": 0.7874821513564969, "grad_norm": 0.4046204686164856, "learning_rate": 9.21584047841701e-06, "loss": 0.4322, "step": 1103 }, { "epoch": 0.7881960970966206, "grad_norm": 0.4128446877002716, "learning_rate": 9.213604793270196e-06, "loss": 0.4492, "step": 1104 }, { "epoch": 0.7889100428367444, "grad_norm": 0.4479241967201233, "learning_rate": 9.211366197607858e-06, "loss": 0.4611, "step": 1105 }, { "epoch": 0.7896239885768681, "grad_norm": 0.3800888955593109, "learning_rate": 9.209124692976287e-06, "loss": 0.4459, "step": 1106 }, { "epoch": 0.790337934316992, "grad_norm": 0.4059661030769348, "learning_rate": 9.20688028092379e-06, "loss": 0.4507, "step": 1107 }, { "epoch": 0.7910518800571157, "grad_norm": 0.38508448004722595, "learning_rate": 9.204632963000671e-06, "loss": 0.4567, "step": 1108 }, { "epoch": 0.7917658257972394, "grad_norm": 0.38886934518814087, "learning_rate": 9.202382740759253e-06, "loss": 0.4482, "step": 1109 }, { "epoch": 0.7924797715373632, "grad_norm": 0.4030570089817047, "learning_rate": 9.200129615753858e-06, "loss": 0.4296, "step": 1110 }, { "epoch": 0.7931937172774869, "grad_norm": 0.4152039587497711, "learning_rate": 9.197873589540815e-06, "loss": 0.4635, "step": 1111 }, { "epoch": 0.7939076630176106, "grad_norm": 0.39336398243904114, "learning_rate": 9.195614663678458e-06, "loss": 0.4337, "step": 1112 }, { "epoch": 0.7946216087577345, "grad_norm": 0.46112725138664246, "learning_rate": 9.193352839727122e-06, "loss": 0.4793, "step": 1113 }, { "epoch": 0.7953355544978582, "grad_norm": 0.41267451643943787, "learning_rate": 9.191088119249143e-06, "loss": 0.4682, "step": 1114 }, { "epoch": 0.7960495002379819, "grad_norm": 0.40518462657928467, "learning_rate": 9.188820503808862e-06, "loss": 0.4561, "step": 1115 }, { "epoch": 0.7967634459781057, "grad_norm": 0.39974531531333923, "learning_rate": 9.186549994972618e-06, "loss": 0.4609, "step": 1116 }, { "epoch": 0.7974773917182294, "grad_norm": 0.3782057464122772, "learning_rate": 9.184276594308745e-06, "loss": 0.4569, "step": 1117 }, { "epoch": 0.7981913374583531, "grad_norm": 0.40960463881492615, "learning_rate": 9.182000303387579e-06, "loss": 0.48, "step": 1118 }, { "epoch": 0.7989052831984769, "grad_norm": 0.39368903636932373, "learning_rate": 9.179721123781448e-06, "loss": 0.4687, "step": 1119 }, { "epoch": 0.7996192289386007, "grad_norm": 0.4052329361438751, "learning_rate": 9.177439057064684e-06, "loss": 0.4558, "step": 1120 }, { "epoch": 0.8003331746787244, "grad_norm": 0.4367281198501587, "learning_rate": 9.175154104813599e-06, "loss": 0.463, "step": 1121 }, { "epoch": 0.8010471204188482, "grad_norm": 0.3932774066925049, "learning_rate": 9.172866268606514e-06, "loss": 0.4425, "step": 1122 }, { "epoch": 0.8017610661589719, "grad_norm": 0.4212522804737091, "learning_rate": 9.170575550023731e-06, "loss": 0.4625, "step": 1123 }, { "epoch": 0.8024750118990956, "grad_norm": 0.4118433892726898, "learning_rate": 9.168281950647545e-06, "loss": 0.4287, "step": 1124 }, { "epoch": 0.8031889576392194, "grad_norm": 0.39600682258605957, "learning_rate": 9.165985472062245e-06, "loss": 0.4328, "step": 1125 }, { "epoch": 0.8039029033793431, "grad_norm": 0.40919768810272217, "learning_rate": 9.163686115854105e-06, "loss": 0.4364, "step": 1126 }, { "epoch": 0.804616849119467, "grad_norm": 0.4236423373222351, "learning_rate": 9.16138388361139e-06, "loss": 0.4693, "step": 1127 }, { "epoch": 0.8053307948595907, "grad_norm": 0.44984033703804016, "learning_rate": 9.159078776924347e-06, "loss": 0.4504, "step": 1128 }, { "epoch": 0.8060447405997144, "grad_norm": 0.36435410380363464, "learning_rate": 9.156770797385209e-06, "loss": 0.4604, "step": 1129 }, { "epoch": 0.8067586863398382, "grad_norm": 0.37174874544143677, "learning_rate": 9.154459946588199e-06, "loss": 0.418, "step": 1130 }, { "epoch": 0.8074726320799619, "grad_norm": 0.40497592091560364, "learning_rate": 9.152146226129519e-06, "loss": 0.4751, "step": 1131 }, { "epoch": 0.8081865778200856, "grad_norm": 0.38946202397346497, "learning_rate": 9.149829637607354e-06, "loss": 0.4513, "step": 1132 }, { "epoch": 0.8089005235602095, "grad_norm": 0.3943304419517517, "learning_rate": 9.147510182621868e-06, "loss": 0.4425, "step": 1133 }, { "epoch": 0.8096144693003332, "grad_norm": 0.3692152798175812, "learning_rate": 9.145187862775208e-06, "loss": 0.4375, "step": 1134 }, { "epoch": 0.8103284150404569, "grad_norm": 0.3853449821472168, "learning_rate": 9.1428626796715e-06, "loss": 0.4534, "step": 1135 }, { "epoch": 0.8110423607805807, "grad_norm": 0.38231685757637024, "learning_rate": 9.140534634916847e-06, "loss": 0.4344, "step": 1136 }, { "epoch": 0.8117563065207044, "grad_norm": 0.4321335554122925, "learning_rate": 9.138203730119326e-06, "loss": 0.4407, "step": 1137 }, { "epoch": 0.8124702522608281, "grad_norm": 0.40988704562187195, "learning_rate": 9.13586996688899e-06, "loss": 0.4396, "step": 1138 }, { "epoch": 0.8131841980009519, "grad_norm": 0.4521836042404175, "learning_rate": 9.133533346837873e-06, "loss": 0.4622, "step": 1139 }, { "epoch": 0.8138981437410757, "grad_norm": 0.46127015352249146, "learning_rate": 9.131193871579975e-06, "loss": 0.4805, "step": 1140 }, { "epoch": 0.8146120894811995, "grad_norm": 0.40231549739837646, "learning_rate": 9.128851542731271e-06, "loss": 0.4596, "step": 1141 }, { "epoch": 0.8153260352213232, "grad_norm": 0.4261883795261383, "learning_rate": 9.126506361909709e-06, "loss": 0.4583, "step": 1142 }, { "epoch": 0.8160399809614469, "grad_norm": 0.42851459980010986, "learning_rate": 9.1241583307352e-06, "loss": 0.4457, "step": 1143 }, { "epoch": 0.8167539267015707, "grad_norm": 0.38919615745544434, "learning_rate": 9.121807450829633e-06, "loss": 0.4706, "step": 1144 }, { "epoch": 0.8174678724416944, "grad_norm": 0.4283595383167267, "learning_rate": 9.119453723816858e-06, "loss": 0.4363, "step": 1145 }, { "epoch": 0.8181818181818182, "grad_norm": 0.34559234976768494, "learning_rate": 9.117097151322697e-06, "loss": 0.448, "step": 1146 }, { "epoch": 0.818895763921942, "grad_norm": 0.41315630078315735, "learning_rate": 9.114737734974932e-06, "loss": 0.4775, "step": 1147 }, { "epoch": 0.8196097096620657, "grad_norm": 0.4023497402667999, "learning_rate": 9.112375476403313e-06, "loss": 0.4312, "step": 1148 }, { "epoch": 0.8203236554021894, "grad_norm": 0.3901861011981964, "learning_rate": 9.110010377239552e-06, "loss": 0.4384, "step": 1149 }, { "epoch": 0.8210376011423132, "grad_norm": 0.40413418412208557, "learning_rate": 9.107642439117322e-06, "loss": 0.4393, "step": 1150 }, { "epoch": 0.8217515468824369, "grad_norm": 0.441320538520813, "learning_rate": 9.105271663672259e-06, "loss": 0.4423, "step": 1151 }, { "epoch": 0.8224654926225606, "grad_norm": 0.4004063904285431, "learning_rate": 9.102898052541959e-06, "loss": 0.4527, "step": 1152 }, { "epoch": 0.8231794383626845, "grad_norm": 0.48579734563827515, "learning_rate": 9.100521607365975e-06, "loss": 0.4412, "step": 1153 }, { "epoch": 0.8238933841028082, "grad_norm": 0.392312616109848, "learning_rate": 9.09814232978582e-06, "loss": 0.4638, "step": 1154 }, { "epoch": 0.824607329842932, "grad_norm": 0.39549192786216736, "learning_rate": 9.09576022144496e-06, "loss": 0.4613, "step": 1155 }, { "epoch": 0.8253212755830557, "grad_norm": 0.4085342586040497, "learning_rate": 9.093375283988819e-06, "loss": 0.4463, "step": 1156 }, { "epoch": 0.8260352213231794, "grad_norm": 0.38182303309440613, "learning_rate": 9.090987519064774e-06, "loss": 0.4379, "step": 1157 }, { "epoch": 0.8267491670633031, "grad_norm": 0.37258681654930115, "learning_rate": 9.088596928322158e-06, "loss": 0.4367, "step": 1158 }, { "epoch": 0.827463112803427, "grad_norm": 0.4722216725349426, "learning_rate": 9.086203513412249e-06, "loss": 0.4537, "step": 1159 }, { "epoch": 0.8281770585435507, "grad_norm": 0.37080612778663635, "learning_rate": 9.083807275988285e-06, "loss": 0.436, "step": 1160 }, { "epoch": 0.8288910042836745, "grad_norm": 0.4227668344974518, "learning_rate": 9.081408217705446e-06, "loss": 0.4674, "step": 1161 }, { "epoch": 0.8296049500237982, "grad_norm": 0.39168256521224976, "learning_rate": 9.079006340220862e-06, "loss": 0.4491, "step": 1162 }, { "epoch": 0.8303188957639219, "grad_norm": 0.42359039187431335, "learning_rate": 9.076601645193612e-06, "loss": 0.4535, "step": 1163 }, { "epoch": 0.8310328415040457, "grad_norm": 0.40439915657043457, "learning_rate": 9.074194134284726e-06, "loss": 0.4363, "step": 1164 }, { "epoch": 0.8317467872441694, "grad_norm": 0.39631712436676025, "learning_rate": 9.071783809157168e-06, "loss": 0.4343, "step": 1165 }, { "epoch": 0.8324607329842932, "grad_norm": 0.4184314012527466, "learning_rate": 9.069370671475853e-06, "loss": 0.4557, "step": 1166 }, { "epoch": 0.833174678724417, "grad_norm": 0.4567212760448456, "learning_rate": 9.066954722907639e-06, "loss": 0.4346, "step": 1167 }, { "epoch": 0.8338886244645407, "grad_norm": 0.39337655901908875, "learning_rate": 9.064535965121324e-06, "loss": 0.4588, "step": 1168 }, { "epoch": 0.8346025702046644, "grad_norm": 0.42534512281417847, "learning_rate": 9.062114399787648e-06, "loss": 0.471, "step": 1169 }, { "epoch": 0.8353165159447882, "grad_norm": 0.4587640166282654, "learning_rate": 9.059690028579285e-06, "loss": 0.4666, "step": 1170 }, { "epoch": 0.8360304616849119, "grad_norm": 0.42202210426330566, "learning_rate": 9.057262853170851e-06, "loss": 0.4456, "step": 1171 }, { "epoch": 0.8367444074250358, "grad_norm": 0.34864142537117004, "learning_rate": 9.054832875238903e-06, "loss": 0.4319, "step": 1172 }, { "epoch": 0.8374583531651595, "grad_norm": 0.3956230878829956, "learning_rate": 9.052400096461928e-06, "loss": 0.4487, "step": 1173 }, { "epoch": 0.8381722989052832, "grad_norm": 0.38575971126556396, "learning_rate": 9.049964518520348e-06, "loss": 0.4522, "step": 1174 }, { "epoch": 0.838886244645407, "grad_norm": 0.4551747739315033, "learning_rate": 9.047526143096522e-06, "loss": 0.4434, "step": 1175 }, { "epoch": 0.8396001903855307, "grad_norm": 0.39886242151260376, "learning_rate": 9.045084971874738e-06, "loss": 0.4631, "step": 1176 }, { "epoch": 0.8403141361256544, "grad_norm": 0.3898126780986786, "learning_rate": 9.042641006541218e-06, "loss": 0.4434, "step": 1177 }, { "epoch": 0.8410280818657782, "grad_norm": 0.48534339666366577, "learning_rate": 9.04019424878411e-06, "loss": 0.4949, "step": 1178 }, { "epoch": 0.841742027605902, "grad_norm": 0.4195680320262909, "learning_rate": 9.037744700293497e-06, "loss": 0.4591, "step": 1179 }, { "epoch": 0.8424559733460257, "grad_norm": 0.39496639370918274, "learning_rate": 9.035292362761382e-06, "loss": 0.44, "step": 1180 }, { "epoch": 0.8431699190861495, "grad_norm": 0.3909156024456024, "learning_rate": 9.032837237881699e-06, "loss": 0.4484, "step": 1181 }, { "epoch": 0.8438838648262732, "grad_norm": 0.4194478988647461, "learning_rate": 9.030379327350311e-06, "loss": 0.4506, "step": 1182 }, { "epoch": 0.8445978105663969, "grad_norm": 0.4193781018257141, "learning_rate": 9.027918632864998e-06, "loss": 0.4855, "step": 1183 }, { "epoch": 0.8453117563065207, "grad_norm": 0.3767452538013458, "learning_rate": 9.025455156125466e-06, "loss": 0.4603, "step": 1184 }, { "epoch": 0.8460257020466444, "grad_norm": 0.4139699339866638, "learning_rate": 9.022988898833342e-06, "loss": 0.4681, "step": 1185 }, { "epoch": 0.8467396477867682, "grad_norm": 0.4191766381263733, "learning_rate": 9.020519862692176e-06, "loss": 0.4556, "step": 1186 }, { "epoch": 0.847453593526892, "grad_norm": 0.44919589161872864, "learning_rate": 9.018048049407437e-06, "loss": 0.4668, "step": 1187 }, { "epoch": 0.8481675392670157, "grad_norm": 0.38122469186782837, "learning_rate": 9.01557346068651e-06, "loss": 0.4157, "step": 1188 }, { "epoch": 0.8488814850071394, "grad_norm": 0.36956140398979187, "learning_rate": 9.013096098238697e-06, "loss": 0.4559, "step": 1189 }, { "epoch": 0.8495954307472632, "grad_norm": 0.41490405797958374, "learning_rate": 9.01061596377522e-06, "loss": 0.4633, "step": 1190 }, { "epoch": 0.8503093764873869, "grad_norm": 0.4113265872001648, "learning_rate": 9.00813305900921e-06, "loss": 0.427, "step": 1191 }, { "epoch": 0.8510233222275108, "grad_norm": 0.38517022132873535, "learning_rate": 9.005647385655718e-06, "loss": 0.4287, "step": 1192 }, { "epoch": 0.8517372679676345, "grad_norm": 0.4551391303539276, "learning_rate": 9.0031589454317e-06, "loss": 0.4452, "step": 1193 }, { "epoch": 0.8524512137077582, "grad_norm": 0.4008440673351288, "learning_rate": 9.000667740056033e-06, "loss": 0.4416, "step": 1194 }, { "epoch": 0.853165159447882, "grad_norm": 0.41434797644615173, "learning_rate": 8.998173771249491e-06, "loss": 0.4596, "step": 1195 }, { "epoch": 0.8538791051880057, "grad_norm": 0.4218859374523163, "learning_rate": 8.99567704073477e-06, "loss": 0.4437, "step": 1196 }, { "epoch": 0.8545930509281294, "grad_norm": 0.4249461889266968, "learning_rate": 8.993177550236464e-06, "loss": 0.4298, "step": 1197 }, { "epoch": 0.8553069966682532, "grad_norm": 0.3948894143104553, "learning_rate": 8.99067530148108e-06, "loss": 0.4515, "step": 1198 }, { "epoch": 0.856020942408377, "grad_norm": 0.3640841245651245, "learning_rate": 8.988170296197025e-06, "loss": 0.4369, "step": 1199 }, { "epoch": 0.8567348881485007, "grad_norm": 0.5157415270805359, "learning_rate": 8.985662536114614e-06, "loss": 0.4652, "step": 1200 }, { "epoch": 0.8574488338886245, "grad_norm": 0.38794252276420593, "learning_rate": 8.983152022966061e-06, "loss": 0.4511, "step": 1201 }, { "epoch": 0.8581627796287482, "grad_norm": 0.46351543068885803, "learning_rate": 8.980638758485486e-06, "loss": 0.4535, "step": 1202 }, { "epoch": 0.8588767253688719, "grad_norm": 0.4450928866863251, "learning_rate": 8.978122744408905e-06, "loss": 0.4577, "step": 1203 }, { "epoch": 0.8595906711089957, "grad_norm": 0.3972509801387787, "learning_rate": 8.97560398247424e-06, "loss": 0.4406, "step": 1204 }, { "epoch": 0.8603046168491195, "grad_norm": 0.4504891037940979, "learning_rate": 8.973082474421302e-06, "loss": 0.4966, "step": 1205 }, { "epoch": 0.8610185625892433, "grad_norm": 0.40321412682533264, "learning_rate": 8.970558221991806e-06, "loss": 0.4385, "step": 1206 }, { "epoch": 0.861732508329367, "grad_norm": 0.3613456189632416, "learning_rate": 8.968031226929362e-06, "loss": 0.4449, "step": 1207 }, { "epoch": 0.8624464540694907, "grad_norm": 0.40380731225013733, "learning_rate": 8.965501490979467e-06, "loss": 0.4811, "step": 1208 }, { "epoch": 0.8631603998096145, "grad_norm": 0.47398316860198975, "learning_rate": 8.962969015889522e-06, "loss": 0.4756, "step": 1209 }, { "epoch": 0.8638743455497382, "grad_norm": 0.38953328132629395, "learning_rate": 8.960433803408813e-06, "loss": 0.4462, "step": 1210 }, { "epoch": 0.8645882912898619, "grad_norm": 0.42078733444213867, "learning_rate": 8.957895855288517e-06, "loss": 0.4641, "step": 1211 }, { "epoch": 0.8653022370299858, "grad_norm": 0.4136643409729004, "learning_rate": 8.955355173281709e-06, "loss": 0.4449, "step": 1212 }, { "epoch": 0.8660161827701095, "grad_norm": 0.4238320291042328, "learning_rate": 8.952811759143337e-06, "loss": 0.4554, "step": 1213 }, { "epoch": 0.8667301285102332, "grad_norm": 0.39206674695014954, "learning_rate": 8.95026561463025e-06, "loss": 0.4676, "step": 1214 }, { "epoch": 0.867444074250357, "grad_norm": 0.42224228382110596, "learning_rate": 8.947716741501178e-06, "loss": 0.447, "step": 1215 }, { "epoch": 0.8681580199904807, "grad_norm": 0.34985020756721497, "learning_rate": 8.945165141516733e-06, "loss": 0.4236, "step": 1216 }, { "epoch": 0.8688719657306044, "grad_norm": 0.42906859517097473, "learning_rate": 8.942610816439419e-06, "loss": 0.4494, "step": 1217 }, { "epoch": 0.8695859114707283, "grad_norm": 0.41674482822418213, "learning_rate": 8.94005376803361e-06, "loss": 0.4439, "step": 1218 }, { "epoch": 0.870299857210852, "grad_norm": 0.40022775530815125, "learning_rate": 8.937493998065572e-06, "loss": 0.4841, "step": 1219 }, { "epoch": 0.8710138029509757, "grad_norm": 0.45178306102752686, "learning_rate": 8.934931508303446e-06, "loss": 0.4428, "step": 1220 }, { "epoch": 0.8717277486910995, "grad_norm": 0.3962271511554718, "learning_rate": 8.93236630051725e-06, "loss": 0.4225, "step": 1221 }, { "epoch": 0.8724416944312232, "grad_norm": 0.361479789018631, "learning_rate": 8.929798376478884e-06, "loss": 0.4597, "step": 1222 }, { "epoch": 0.873155640171347, "grad_norm": 0.3774799704551697, "learning_rate": 8.927227737962123e-06, "loss": 0.4631, "step": 1223 }, { "epoch": 0.8738695859114707, "grad_norm": 0.4508700370788574, "learning_rate": 8.924654386742613e-06, "loss": 0.4527, "step": 1224 }, { "epoch": 0.8745835316515945, "grad_norm": 0.3796118199825287, "learning_rate": 8.92207832459788e-06, "loss": 0.4519, "step": 1225 }, { "epoch": 0.8752974773917183, "grad_norm": 0.36993607878685, "learning_rate": 8.919499553307316e-06, "loss": 0.4483, "step": 1226 }, { "epoch": 0.876011423131842, "grad_norm": 0.4543451964855194, "learning_rate": 8.91691807465219e-06, "loss": 0.3984, "step": 1227 }, { "epoch": 0.8767253688719657, "grad_norm": 0.39216744899749756, "learning_rate": 8.91433389041564e-06, "loss": 0.4472, "step": 1228 }, { "epoch": 0.8774393146120895, "grad_norm": 0.38701125979423523, "learning_rate": 8.911747002382668e-06, "loss": 0.4247, "step": 1229 }, { "epoch": 0.8781532603522132, "grad_norm": 0.4125150740146637, "learning_rate": 8.90915741234015e-06, "loss": 0.4167, "step": 1230 }, { "epoch": 0.8788672060923369, "grad_norm": 0.44489938020706177, "learning_rate": 8.906565122076824e-06, "loss": 0.4726, "step": 1231 }, { "epoch": 0.8795811518324608, "grad_norm": 0.4452850818634033, "learning_rate": 8.903970133383297e-06, "loss": 0.4772, "step": 1232 }, { "epoch": 0.8802950975725845, "grad_norm": 0.4778824746608734, "learning_rate": 8.901372448052036e-06, "loss": 0.452, "step": 1233 }, { "epoch": 0.8810090433127082, "grad_norm": 0.43324387073516846, "learning_rate": 8.898772067877372e-06, "loss": 0.4282, "step": 1234 }, { "epoch": 0.881722989052832, "grad_norm": 0.41094642877578735, "learning_rate": 8.8961689946555e-06, "loss": 0.4406, "step": 1235 }, { "epoch": 0.8824369347929557, "grad_norm": 0.432242751121521, "learning_rate": 8.89356323018447e-06, "loss": 0.4408, "step": 1236 }, { "epoch": 0.8831508805330794, "grad_norm": 0.4760993719100952, "learning_rate": 8.890954776264198e-06, "loss": 0.4588, "step": 1237 }, { "epoch": 0.8838648262732033, "grad_norm": 0.37809139490127563, "learning_rate": 8.88834363469645e-06, "loss": 0.4268, "step": 1238 }, { "epoch": 0.884578772013327, "grad_norm": 0.42397189140319824, "learning_rate": 8.885729807284855e-06, "loss": 0.447, "step": 1239 }, { "epoch": 0.8852927177534508, "grad_norm": 0.40611135959625244, "learning_rate": 8.883113295834893e-06, "loss": 0.4188, "step": 1240 }, { "epoch": 0.8860066634935745, "grad_norm": 0.374917209148407, "learning_rate": 8.880494102153899e-06, "loss": 0.4203, "step": 1241 }, { "epoch": 0.8867206092336982, "grad_norm": 0.3778848648071289, "learning_rate": 8.877872228051061e-06, "loss": 0.4278, "step": 1242 }, { "epoch": 0.887434554973822, "grad_norm": 0.3666262626647949, "learning_rate": 8.875247675337422e-06, "loss": 0.4178, "step": 1243 }, { "epoch": 0.8881485007139457, "grad_norm": 0.38657715916633606, "learning_rate": 8.872620445825868e-06, "loss": 0.4421, "step": 1244 }, { "epoch": 0.8888624464540695, "grad_norm": 0.36203598976135254, "learning_rate": 8.869990541331137e-06, "loss": 0.4015, "step": 1245 }, { "epoch": 0.8895763921941933, "grad_norm": 0.3962545692920685, "learning_rate": 8.867357963669821e-06, "loss": 0.4438, "step": 1246 }, { "epoch": 0.890290337934317, "grad_norm": 0.38752034306526184, "learning_rate": 8.864722714660348e-06, "loss": 0.4358, "step": 1247 }, { "epoch": 0.8910042836744407, "grad_norm": 0.37031614780426025, "learning_rate": 8.862084796122998e-06, "loss": 0.4459, "step": 1248 }, { "epoch": 0.8917182294145645, "grad_norm": 0.4262847304344177, "learning_rate": 8.859444209879894e-06, "loss": 0.4596, "step": 1249 }, { "epoch": 0.8924321751546882, "grad_norm": 0.4591592848300934, "learning_rate": 8.856800957755e-06, "loss": 0.4709, "step": 1250 }, { "epoch": 0.893146120894812, "grad_norm": 0.39474329352378845, "learning_rate": 8.854155041574121e-06, "loss": 0.4807, "step": 1251 }, { "epoch": 0.8938600666349358, "grad_norm": 0.4128243923187256, "learning_rate": 8.851506463164906e-06, "loss": 0.4526, "step": 1252 }, { "epoch": 0.8945740123750595, "grad_norm": 0.5192345380783081, "learning_rate": 8.84885522435684e-06, "loss": 0.45, "step": 1253 }, { "epoch": 0.8952879581151832, "grad_norm": 0.40533649921417236, "learning_rate": 8.846201326981245e-06, "loss": 0.43, "step": 1254 }, { "epoch": 0.896001903855307, "grad_norm": 0.5018612742424011, "learning_rate": 8.84354477287128e-06, "loss": 0.4615, "step": 1255 }, { "epoch": 0.8967158495954307, "grad_norm": 0.42623400688171387, "learning_rate": 8.840885563861941e-06, "loss": 0.4665, "step": 1256 }, { "epoch": 0.8974297953355544, "grad_norm": 0.41789746284484863, "learning_rate": 8.838223701790057e-06, "loss": 0.4218, "step": 1257 }, { "epoch": 0.8981437410756783, "grad_norm": 0.3744523525238037, "learning_rate": 8.835559188494287e-06, "loss": 0.4535, "step": 1258 }, { "epoch": 0.898857686815802, "grad_norm": 0.3844186067581177, "learning_rate": 8.832892025815124e-06, "loss": 0.4471, "step": 1259 }, { "epoch": 0.8995716325559258, "grad_norm": 0.42066535353660583, "learning_rate": 8.83022221559489e-06, "loss": 0.4509, "step": 1260 }, { "epoch": 0.9002855782960495, "grad_norm": 0.3856178820133209, "learning_rate": 8.827549759677739e-06, "loss": 0.4513, "step": 1261 }, { "epoch": 0.9009995240361732, "grad_norm": 0.4100262522697449, "learning_rate": 8.824874659909645e-06, "loss": 0.4291, "step": 1262 }, { "epoch": 0.901713469776297, "grad_norm": 0.3996388912200928, "learning_rate": 8.822196918138416e-06, "loss": 0.4582, "step": 1263 }, { "epoch": 0.9024274155164208, "grad_norm": 0.4257446825504303, "learning_rate": 8.819516536213683e-06, "loss": 0.4492, "step": 1264 }, { "epoch": 0.9031413612565445, "grad_norm": 0.37579306960105896, "learning_rate": 8.816833515986896e-06, "loss": 0.4565, "step": 1265 }, { "epoch": 0.9038553069966683, "grad_norm": 0.4119375944137573, "learning_rate": 8.814147859311333e-06, "loss": 0.4687, "step": 1266 }, { "epoch": 0.904569252736792, "grad_norm": 0.3706452250480652, "learning_rate": 8.811459568042092e-06, "loss": 0.4185, "step": 1267 }, { "epoch": 0.9052831984769157, "grad_norm": 0.34958571195602417, "learning_rate": 8.808768644036086e-06, "loss": 0.4261, "step": 1268 }, { "epoch": 0.9059971442170395, "grad_norm": 0.4750383794307709, "learning_rate": 8.806075089152051e-06, "loss": 0.4736, "step": 1269 }, { "epoch": 0.9067110899571632, "grad_norm": 0.3738330602645874, "learning_rate": 8.803378905250544e-06, "loss": 0.4508, "step": 1270 }, { "epoch": 0.907425035697287, "grad_norm": 0.3889524042606354, "learning_rate": 8.80068009419393e-06, "loss": 0.4406, "step": 1271 }, { "epoch": 0.9081389814374108, "grad_norm": 0.41215723752975464, "learning_rate": 8.797978657846391e-06, "loss": 0.4183, "step": 1272 }, { "epoch": 0.9088529271775345, "grad_norm": 0.36004236340522766, "learning_rate": 8.795274598073927e-06, "loss": 0.4447, "step": 1273 }, { "epoch": 0.9095668729176583, "grad_norm": 0.387378454208374, "learning_rate": 8.792567916744346e-06, "loss": 0.4657, "step": 1274 }, { "epoch": 0.910280818657782, "grad_norm": 0.36724770069122314, "learning_rate": 8.789858615727266e-06, "loss": 0.4377, "step": 1275 }, { "epoch": 0.9109947643979057, "grad_norm": 0.3908044993877411, "learning_rate": 8.787146696894118e-06, "loss": 0.4451, "step": 1276 }, { "epoch": 0.9117087101380295, "grad_norm": 0.394031822681427, "learning_rate": 8.784432162118138e-06, "loss": 0.4607, "step": 1277 }, { "epoch": 0.9124226558781533, "grad_norm": 0.3770332634449005, "learning_rate": 8.781715013274369e-06, "loss": 0.4563, "step": 1278 }, { "epoch": 0.913136601618277, "grad_norm": 0.36583754420280457, "learning_rate": 8.778995252239664e-06, "loss": 0.4359, "step": 1279 }, { "epoch": 0.9138505473584008, "grad_norm": 0.3956010341644287, "learning_rate": 8.776272880892675e-06, "loss": 0.4524, "step": 1280 }, { "epoch": 0.9145644930985245, "grad_norm": 0.4308643639087677, "learning_rate": 8.773547901113862e-06, "loss": 0.4627, "step": 1281 }, { "epoch": 0.9152784388386482, "grad_norm": 0.3791871964931488, "learning_rate": 8.77082031478548e-06, "loss": 0.429, "step": 1282 }, { "epoch": 0.915992384578772, "grad_norm": 0.3777129054069519, "learning_rate": 8.768090123791591e-06, "loss": 0.4486, "step": 1283 }, { "epoch": 0.9167063303188958, "grad_norm": 0.39012211561203003, "learning_rate": 8.765357330018056e-06, "loss": 0.4526, "step": 1284 }, { "epoch": 0.9174202760590195, "grad_norm": 0.3985898196697235, "learning_rate": 8.762621935352526e-06, "loss": 0.4424, "step": 1285 }, { "epoch": 0.9181342217991433, "grad_norm": 0.4009990394115448, "learning_rate": 8.75988394168446e-06, "loss": 0.4314, "step": 1286 }, { "epoch": 0.918848167539267, "grad_norm": 0.4097512662410736, "learning_rate": 8.757143350905102e-06, "loss": 0.4355, "step": 1287 }, { "epoch": 0.9195621132793907, "grad_norm": 0.45964205265045166, "learning_rate": 8.754400164907496e-06, "loss": 0.4406, "step": 1288 }, { "epoch": 0.9202760590195145, "grad_norm": 0.3974362909793854, "learning_rate": 8.751654385586478e-06, "loss": 0.469, "step": 1289 }, { "epoch": 0.9209900047596382, "grad_norm": 0.39585229754447937, "learning_rate": 8.748906014838672e-06, "loss": 0.4118, "step": 1290 }, { "epoch": 0.9217039504997621, "grad_norm": 0.42526352405548096, "learning_rate": 8.746155054562496e-06, "loss": 0.4596, "step": 1291 }, { "epoch": 0.9224178962398858, "grad_norm": 0.3545001745223999, "learning_rate": 8.74340150665815e-06, "loss": 0.4325, "step": 1292 }, { "epoch": 0.9231318419800095, "grad_norm": 0.4815099835395813, "learning_rate": 8.740645373027635e-06, "loss": 0.4816, "step": 1293 }, { "epoch": 0.9238457877201333, "grad_norm": 0.3643430173397064, "learning_rate": 8.737886655574724e-06, "loss": 0.4176, "step": 1294 }, { "epoch": 0.924559733460257, "grad_norm": 0.3853975236415863, "learning_rate": 8.735125356204982e-06, "loss": 0.4428, "step": 1295 }, { "epoch": 0.9252736792003807, "grad_norm": 0.39786475896835327, "learning_rate": 8.732361476825752e-06, "loss": 0.4365, "step": 1296 }, { "epoch": 0.9259876249405046, "grad_norm": 0.38572025299072266, "learning_rate": 8.729595019346166e-06, "loss": 0.4727, "step": 1297 }, { "epoch": 0.9267015706806283, "grad_norm": 0.35422077775001526, "learning_rate": 8.726825985677132e-06, "loss": 0.4581, "step": 1298 }, { "epoch": 0.927415516420752, "grad_norm": 0.3958778381347656, "learning_rate": 8.724054377731342e-06, "loss": 0.4357, "step": 1299 }, { "epoch": 0.9281294621608758, "grad_norm": 0.38095352053642273, "learning_rate": 8.721280197423259e-06, "loss": 0.4379, "step": 1300 }, { "epoch": 0.9288434079009995, "grad_norm": 0.4009891450405121, "learning_rate": 8.71850344666913e-06, "loss": 0.4627, "step": 1301 }, { "epoch": 0.9295573536411232, "grad_norm": 0.44171562790870667, "learning_rate": 8.715724127386971e-06, "loss": 0.5014, "step": 1302 }, { "epoch": 0.930271299381247, "grad_norm": 0.39028841257095337, "learning_rate": 8.71294224149658e-06, "loss": 0.4744, "step": 1303 }, { "epoch": 0.9309852451213708, "grad_norm": 0.40115073323249817, "learning_rate": 8.710157790919522e-06, "loss": 0.465, "step": 1304 }, { "epoch": 0.9316991908614946, "grad_norm": 0.43890073895454407, "learning_rate": 8.707370777579134e-06, "loss": 0.4326, "step": 1305 }, { "epoch": 0.9324131366016183, "grad_norm": 0.3557293713092804, "learning_rate": 8.704581203400526e-06, "loss": 0.457, "step": 1306 }, { "epoch": 0.933127082341742, "grad_norm": 0.40797409415245056, "learning_rate": 8.701789070310574e-06, "loss": 0.4393, "step": 1307 }, { "epoch": 0.9338410280818658, "grad_norm": 0.3665221333503723, "learning_rate": 8.698994380237921e-06, "loss": 0.4487, "step": 1308 }, { "epoch": 0.9345549738219895, "grad_norm": 0.397717148065567, "learning_rate": 8.69619713511298e-06, "loss": 0.4539, "step": 1309 }, { "epoch": 0.9352689195621133, "grad_norm": 0.3946472704410553, "learning_rate": 8.69339733686793e-06, "loss": 0.4383, "step": 1310 }, { "epoch": 0.9359828653022371, "grad_norm": 0.4367827773094177, "learning_rate": 8.690594987436705e-06, "loss": 0.4285, "step": 1311 }, { "epoch": 0.9366968110423608, "grad_norm": 0.40816405415534973, "learning_rate": 8.687790088755008e-06, "loss": 0.4362, "step": 1312 }, { "epoch": 0.9374107567824845, "grad_norm": 0.39215004444122314, "learning_rate": 8.684982642760303e-06, "loss": 0.4526, "step": 1313 }, { "epoch": 0.9381247025226083, "grad_norm": 0.5099432468414307, "learning_rate": 8.68217265139181e-06, "loss": 0.4281, "step": 1314 }, { "epoch": 0.938838648262732, "grad_norm": 0.3847297132015228, "learning_rate": 8.679360116590512e-06, "loss": 0.4645, "step": 1315 }, { "epoch": 0.9395525940028557, "grad_norm": 0.3918679356575012, "learning_rate": 8.676545040299145e-06, "loss": 0.4474, "step": 1316 }, { "epoch": 0.9402665397429796, "grad_norm": 0.424439013004303, "learning_rate": 8.6737274244622e-06, "loss": 0.4409, "step": 1317 }, { "epoch": 0.9409804854831033, "grad_norm": 0.4015720784664154, "learning_rate": 8.670907271025923e-06, "loss": 0.4602, "step": 1318 }, { "epoch": 0.941694431223227, "grad_norm": 0.3929297924041748, "learning_rate": 8.668084581938318e-06, "loss": 0.4496, "step": 1319 }, { "epoch": 0.9424083769633508, "grad_norm": 0.3828696310520172, "learning_rate": 8.665259359149132e-06, "loss": 0.4292, "step": 1320 }, { "epoch": 0.9431223227034745, "grad_norm": 0.38161516189575195, "learning_rate": 8.662431604609868e-06, "loss": 0.4628, "step": 1321 }, { "epoch": 0.9438362684435982, "grad_norm": 0.4426814615726471, "learning_rate": 8.659601320273776e-06, "loss": 0.4471, "step": 1322 }, { "epoch": 0.9445502141837221, "grad_norm": 0.38170987367630005, "learning_rate": 8.656768508095853e-06, "loss": 0.4331, "step": 1323 }, { "epoch": 0.9452641599238458, "grad_norm": 0.3950255215167999, "learning_rate": 8.653933170032842e-06, "loss": 0.4399, "step": 1324 }, { "epoch": 0.9459781056639696, "grad_norm": 0.3660363256931305, "learning_rate": 8.651095308043232e-06, "loss": 0.4394, "step": 1325 }, { "epoch": 0.9466920514040933, "grad_norm": 0.4194352924823761, "learning_rate": 8.648254924087256e-06, "loss": 0.433, "step": 1326 }, { "epoch": 0.947405997144217, "grad_norm": 0.38351181149482727, "learning_rate": 8.645412020126885e-06, "loss": 0.4483, "step": 1327 }, { "epoch": 0.9481199428843408, "grad_norm": 0.36284881830215454, "learning_rate": 8.642566598125832e-06, "loss": 0.4525, "step": 1328 }, { "epoch": 0.9488338886244645, "grad_norm": 0.39078426361083984, "learning_rate": 8.639718660049556e-06, "loss": 0.4599, "step": 1329 }, { "epoch": 0.9495478343645883, "grad_norm": 0.4203556478023529, "learning_rate": 8.636868207865244e-06, "loss": 0.4597, "step": 1330 }, { "epoch": 0.9502617801047121, "grad_norm": 0.36322903633117676, "learning_rate": 8.634015243541827e-06, "loss": 0.4237, "step": 1331 }, { "epoch": 0.9509757258448358, "grad_norm": 0.4135705530643463, "learning_rate": 8.631159769049965e-06, "loss": 0.4435, "step": 1332 }, { "epoch": 0.9516896715849595, "grad_norm": 0.3880682587623596, "learning_rate": 8.62830178636206e-06, "loss": 0.438, "step": 1333 }, { "epoch": 0.9524036173250833, "grad_norm": 0.4601151645183563, "learning_rate": 8.62544129745224e-06, "loss": 0.4559, "step": 1334 }, { "epoch": 0.953117563065207, "grad_norm": 0.3529123365879059, "learning_rate": 8.622578304296364e-06, "loss": 0.4266, "step": 1335 }, { "epoch": 0.9538315088053307, "grad_norm": 0.41550639271736145, "learning_rate": 8.619712808872025e-06, "loss": 0.4471, "step": 1336 }, { "epoch": 0.9545454545454546, "grad_norm": 0.3943754732608795, "learning_rate": 8.61684481315854e-06, "loss": 0.4297, "step": 1337 }, { "epoch": 0.9552594002855783, "grad_norm": 0.4022347331047058, "learning_rate": 8.613974319136959e-06, "loss": 0.4425, "step": 1338 }, { "epoch": 0.955973346025702, "grad_norm": 0.3545350730419159, "learning_rate": 8.61110132879005e-06, "loss": 0.4356, "step": 1339 }, { "epoch": 0.9566872917658258, "grad_norm": 0.37980467081069946, "learning_rate": 8.608225844102312e-06, "loss": 0.4481, "step": 1340 }, { "epoch": 0.9574012375059495, "grad_norm": 0.40926894545555115, "learning_rate": 8.605347867059963e-06, "loss": 0.431, "step": 1341 }, { "epoch": 0.9581151832460733, "grad_norm": 0.36129409074783325, "learning_rate": 8.602467399650942e-06, "loss": 0.4179, "step": 1342 }, { "epoch": 0.9588291289861971, "grad_norm": 0.4124484956264496, "learning_rate": 8.599584443864915e-06, "loss": 0.4567, "step": 1343 }, { "epoch": 0.9595430747263208, "grad_norm": 0.392179399728775, "learning_rate": 8.596699001693257e-06, "loss": 0.4357, "step": 1344 }, { "epoch": 0.9602570204664446, "grad_norm": 0.38509485125541687, "learning_rate": 8.593811075129065e-06, "loss": 0.4266, "step": 1345 }, { "epoch": 0.9609709662065683, "grad_norm": 0.39666932821273804, "learning_rate": 8.59092066616716e-06, "loss": 0.4285, "step": 1346 }, { "epoch": 0.961684911946692, "grad_norm": 0.4083783030509949, "learning_rate": 8.58802777680406e-06, "loss": 0.4449, "step": 1347 }, { "epoch": 0.9623988576868158, "grad_norm": 0.39439642429351807, "learning_rate": 8.585132409038013e-06, "loss": 0.4419, "step": 1348 }, { "epoch": 0.9631128034269395, "grad_norm": 0.36515992879867554, "learning_rate": 8.582234564868968e-06, "loss": 0.4458, "step": 1349 }, { "epoch": 0.9638267491670633, "grad_norm": 0.4330118000507355, "learning_rate": 8.579334246298593e-06, "loss": 0.4556, "step": 1350 }, { "epoch": 0.9645406949071871, "grad_norm": 0.4189104437828064, "learning_rate": 8.576431455330258e-06, "loss": 0.4295, "step": 1351 }, { "epoch": 0.9652546406473108, "grad_norm": 0.4075847864151001, "learning_rate": 8.573526193969047e-06, "loss": 0.4557, "step": 1352 }, { "epoch": 0.9659685863874345, "grad_norm": 0.3876395523548126, "learning_rate": 8.570618464221741e-06, "loss": 0.4332, "step": 1353 }, { "epoch": 0.9666825321275583, "grad_norm": 0.3956856429576874, "learning_rate": 8.567708268096839e-06, "loss": 0.47, "step": 1354 }, { "epoch": 0.967396477867682, "grad_norm": 0.42093825340270996, "learning_rate": 8.564795607604535e-06, "loss": 0.4631, "step": 1355 }, { "epoch": 0.9681104236078059, "grad_norm": 0.37383702397346497, "learning_rate": 8.561880484756726e-06, "loss": 0.4341, "step": 1356 }, { "epoch": 0.9688243693479296, "grad_norm": 0.40793636441230774, "learning_rate": 8.558962901567009e-06, "loss": 0.4521, "step": 1357 }, { "epoch": 0.9695383150880533, "grad_norm": 0.3996295630931854, "learning_rate": 8.556042860050686e-06, "loss": 0.4179, "step": 1358 }, { "epoch": 0.9702522608281771, "grad_norm": 0.4541391432285309, "learning_rate": 8.553120362224754e-06, "loss": 0.453, "step": 1359 }, { "epoch": 0.9709662065683008, "grad_norm": 0.4304084777832031, "learning_rate": 8.550195410107903e-06, "loss": 0.4505, "step": 1360 }, { "epoch": 0.9716801523084245, "grad_norm": 0.4393538534641266, "learning_rate": 8.547268005720523e-06, "loss": 0.4433, "step": 1361 }, { "epoch": 0.9723940980485483, "grad_norm": 0.37188974022865295, "learning_rate": 8.544338151084697e-06, "loss": 0.413, "step": 1362 }, { "epoch": 0.9731080437886721, "grad_norm": 0.4409099817276001, "learning_rate": 8.541405848224199e-06, "loss": 0.4483, "step": 1363 }, { "epoch": 0.9738219895287958, "grad_norm": 0.39124223589897156, "learning_rate": 8.538471099164494e-06, "loss": 0.4302, "step": 1364 }, { "epoch": 0.9745359352689196, "grad_norm": 0.4356656074523926, "learning_rate": 8.535533905932739e-06, "loss": 0.4148, "step": 1365 }, { "epoch": 0.9752498810090433, "grad_norm": 0.37344440817832947, "learning_rate": 8.532594270557777e-06, "loss": 0.4492, "step": 1366 }, { "epoch": 0.975963826749167, "grad_norm": 0.44505396485328674, "learning_rate": 8.52965219507014e-06, "loss": 0.4499, "step": 1367 }, { "epoch": 0.9766777724892908, "grad_norm": 0.43665894865989685, "learning_rate": 8.526707681502045e-06, "loss": 0.4277, "step": 1368 }, { "epoch": 0.9773917182294146, "grad_norm": 0.3779570460319519, "learning_rate": 8.52376073188739e-06, "loss": 0.4457, "step": 1369 }, { "epoch": 0.9781056639695384, "grad_norm": 0.4030458331108093, "learning_rate": 8.52081134826176e-06, "loss": 0.4422, "step": 1370 }, { "epoch": 0.9788196097096621, "grad_norm": 0.3873618543148041, "learning_rate": 8.517859532662418e-06, "loss": 0.4513, "step": 1371 }, { "epoch": 0.9795335554497858, "grad_norm": 0.3586464822292328, "learning_rate": 8.51490528712831e-06, "loss": 0.425, "step": 1372 }, { "epoch": 0.9802475011899096, "grad_norm": 0.4088030755519867, "learning_rate": 8.511948613700056e-06, "loss": 0.4427, "step": 1373 }, { "epoch": 0.9809614469300333, "grad_norm": 0.3710865080356598, "learning_rate": 8.508989514419959e-06, "loss": 0.4199, "step": 1374 }, { "epoch": 0.981675392670157, "grad_norm": 0.4322734773159027, "learning_rate": 8.50602799133199e-06, "loss": 0.4339, "step": 1375 }, { "epoch": 0.9823893384102809, "grad_norm": 0.40171775221824646, "learning_rate": 8.503064046481802e-06, "loss": 0.4458, "step": 1376 }, { "epoch": 0.9831032841504046, "grad_norm": 0.4186509847640991, "learning_rate": 8.500097681916717e-06, "loss": 0.4433, "step": 1377 }, { "epoch": 0.9838172298905283, "grad_norm": 0.36876121163368225, "learning_rate": 8.497128899685728e-06, "loss": 0.4437, "step": 1378 }, { "epoch": 0.9845311756306521, "grad_norm": 0.38206976652145386, "learning_rate": 8.4941577018395e-06, "loss": 0.4347, "step": 1379 }, { "epoch": 0.9852451213707758, "grad_norm": 0.34068116545677185, "learning_rate": 8.491184090430365e-06, "loss": 0.4452, "step": 1380 }, { "epoch": 0.9859590671108995, "grad_norm": 0.3805212080478668, "learning_rate": 8.48820806751232e-06, "loss": 0.4296, "step": 1381 }, { "epoch": 0.9866730128510233, "grad_norm": 0.38589203357696533, "learning_rate": 8.485229635141034e-06, "loss": 0.4423, "step": 1382 }, { "epoch": 0.9873869585911471, "grad_norm": 0.3786391019821167, "learning_rate": 8.482248795373835e-06, "loss": 0.409, "step": 1383 }, { "epoch": 0.9881009043312708, "grad_norm": 0.3747534453868866, "learning_rate": 8.479265550269714e-06, "loss": 0.4552, "step": 1384 }, { "epoch": 0.9888148500713946, "grad_norm": 0.3907141387462616, "learning_rate": 8.476279901889328e-06, "loss": 0.4298, "step": 1385 }, { "epoch": 0.9895287958115183, "grad_norm": 0.377784788608551, "learning_rate": 8.473291852294986e-06, "loss": 0.4352, "step": 1386 }, { "epoch": 0.990242741551642, "grad_norm": 0.3770368695259094, "learning_rate": 8.470301403550667e-06, "loss": 0.4392, "step": 1387 }, { "epoch": 0.9909566872917658, "grad_norm": 0.37844574451446533, "learning_rate": 8.467308557721996e-06, "loss": 0.4348, "step": 1388 }, { "epoch": 0.9916706330318896, "grad_norm": 0.38033434748649597, "learning_rate": 8.46431331687626e-06, "loss": 0.4514, "step": 1389 }, { "epoch": 0.9923845787720134, "grad_norm": 0.36625564098358154, "learning_rate": 8.461315683082398e-06, "loss": 0.4406, "step": 1390 }, { "epoch": 0.9930985245121371, "grad_norm": 0.41841375827789307, "learning_rate": 8.458315658411006e-06, "loss": 0.4543, "step": 1391 }, { "epoch": 0.9938124702522608, "grad_norm": 0.3993852436542511, "learning_rate": 8.455313244934324e-06, "loss": 0.4526, "step": 1392 }, { "epoch": 0.9945264159923846, "grad_norm": 0.36040499806404114, "learning_rate": 8.452308444726249e-06, "loss": 0.4179, "step": 1393 }, { "epoch": 0.9952403617325083, "grad_norm": 0.3533748686313629, "learning_rate": 8.449301259862324e-06, "loss": 0.4313, "step": 1394 }, { "epoch": 0.995954307472632, "grad_norm": 0.37252476811408997, "learning_rate": 8.446291692419735e-06, "loss": 0.4538, "step": 1395 }, { "epoch": 0.9966682532127559, "grad_norm": 0.3555397689342499, "learning_rate": 8.443279744477324e-06, "loss": 0.4277, "step": 1396 }, { "epoch": 0.9973821989528796, "grad_norm": 0.3759317994117737, "learning_rate": 8.440265418115568e-06, "loss": 0.4418, "step": 1397 }, { "epoch": 0.9980961446930033, "grad_norm": 0.35843491554260254, "learning_rate": 8.437248715416591e-06, "loss": 0.4356, "step": 1398 }, { "epoch": 0.9988100904331271, "grad_norm": 0.3813358247280121, "learning_rate": 8.434229638464156e-06, "loss": 0.4405, "step": 1399 }, { "epoch": 0.9995240361732508, "grad_norm": 0.3811368942260742, "learning_rate": 8.43120818934367e-06, "loss": 0.4489, "step": 1400 }, { "epoch": 1.0002379819133747, "grad_norm": 0.5345582365989685, "learning_rate": 8.428184370142171e-06, "loss": 0.5747, "step": 1401 }, { "epoch": 1.0009519276534984, "grad_norm": 0.37325209379196167, "learning_rate": 8.425158182948346e-06, "loss": 0.4312, "step": 1402 }, { "epoch": 1.0016658733936221, "grad_norm": 0.39232245087623596, "learning_rate": 8.422129629852505e-06, "loss": 0.4311, "step": 1403 }, { "epoch": 1.0023798191337459, "grad_norm": 0.4088703989982605, "learning_rate": 8.4190987129466e-06, "loss": 0.401, "step": 1404 }, { "epoch": 1.0030937648738696, "grad_norm": 0.3927323818206787, "learning_rate": 8.416065434324216e-06, "loss": 0.4203, "step": 1405 }, { "epoch": 1.0038077106139933, "grad_norm": 0.40728285908699036, "learning_rate": 8.413029796080568e-06, "loss": 0.3995, "step": 1406 }, { "epoch": 1.004521656354117, "grad_norm": 0.4336508512496948, "learning_rate": 8.409991800312493e-06, "loss": 0.4244, "step": 1407 }, { "epoch": 1.0052356020942408, "grad_norm": 0.422789067029953, "learning_rate": 8.40695144911847e-06, "loss": 0.4513, "step": 1408 }, { "epoch": 1.0059495478343645, "grad_norm": 0.42884382605552673, "learning_rate": 8.403908744598596e-06, "loss": 0.4072, "step": 1409 }, { "epoch": 1.0066634935744883, "grad_norm": 0.36188215017318726, "learning_rate": 8.400863688854598e-06, "loss": 0.3806, "step": 1410 }, { "epoch": 1.007377439314612, "grad_norm": 0.40400391817092896, "learning_rate": 8.397816283989822e-06, "loss": 0.4136, "step": 1411 }, { "epoch": 1.008091385054736, "grad_norm": 0.3689432442188263, "learning_rate": 8.394766532109243e-06, "loss": 0.4093, "step": 1412 }, { "epoch": 1.0088053307948597, "grad_norm": 0.349376916885376, "learning_rate": 8.391714435319452e-06, "loss": 0.404, "step": 1413 }, { "epoch": 1.0095192765349834, "grad_norm": 0.39513668417930603, "learning_rate": 8.388659995728662e-06, "loss": 0.4151, "step": 1414 }, { "epoch": 1.0102332222751071, "grad_norm": 0.38644829392433167, "learning_rate": 8.385603215446704e-06, "loss": 0.4312, "step": 1415 }, { "epoch": 1.0109471680152309, "grad_norm": 0.3854246139526367, "learning_rate": 8.382544096585028e-06, "loss": 0.4192, "step": 1416 }, { "epoch": 1.0116611137553546, "grad_norm": 0.43690580129623413, "learning_rate": 8.379482641256692e-06, "loss": 0.4299, "step": 1417 }, { "epoch": 1.0123750594954783, "grad_norm": 0.3660273253917694, "learning_rate": 8.376418851576377e-06, "loss": 0.392, "step": 1418 }, { "epoch": 1.013089005235602, "grad_norm": 0.39697685837745667, "learning_rate": 8.373352729660373e-06, "loss": 0.4573, "step": 1419 }, { "epoch": 1.0138029509757258, "grad_norm": 0.3791833817958832, "learning_rate": 8.370284277626576e-06, "loss": 0.396, "step": 1420 }, { "epoch": 1.0145168967158495, "grad_norm": 0.40672361850738525, "learning_rate": 8.367213497594501e-06, "loss": 0.4378, "step": 1421 }, { "epoch": 1.0152308424559733, "grad_norm": 0.3776877820491791, "learning_rate": 8.364140391685265e-06, "loss": 0.4215, "step": 1422 }, { "epoch": 1.015944788196097, "grad_norm": 0.38980382680892944, "learning_rate": 8.361064962021592e-06, "loss": 0.4435, "step": 1423 }, { "epoch": 1.0166587339362207, "grad_norm": 0.3333980143070221, "learning_rate": 8.357987210727807e-06, "loss": 0.3761, "step": 1424 }, { "epoch": 1.0173726796763447, "grad_norm": 0.41519951820373535, "learning_rate": 8.35490713992985e-06, "loss": 0.4205, "step": 1425 }, { "epoch": 1.0180866254164684, "grad_norm": 0.3882196843624115, "learning_rate": 8.351824751755257e-06, "loss": 0.4344, "step": 1426 }, { "epoch": 1.0188005711565922, "grad_norm": 0.4083569347858429, "learning_rate": 8.348740048333156e-06, "loss": 0.4255, "step": 1427 }, { "epoch": 1.019514516896716, "grad_norm": 0.45800071954727173, "learning_rate": 8.345653031794292e-06, "loss": 0.4417, "step": 1428 }, { "epoch": 1.0202284626368396, "grad_norm": 0.38050535321235657, "learning_rate": 8.342563704270993e-06, "loss": 0.4042, "step": 1429 }, { "epoch": 1.0209424083769634, "grad_norm": 0.3935188055038452, "learning_rate": 8.339472067897187e-06, "loss": 0.3835, "step": 1430 }, { "epoch": 1.021656354117087, "grad_norm": 0.411724716424942, "learning_rate": 8.336378124808404e-06, "loss": 0.4143, "step": 1431 }, { "epoch": 1.0223702998572108, "grad_norm": 0.4121796488761902, "learning_rate": 8.333281877141758e-06, "loss": 0.432, "step": 1432 }, { "epoch": 1.0230842455973346, "grad_norm": 0.3897634744644165, "learning_rate": 8.330183327035959e-06, "loss": 0.3818, "step": 1433 }, { "epoch": 1.0237981913374583, "grad_norm": 0.43437254428863525, "learning_rate": 8.327082476631307e-06, "loss": 0.4134, "step": 1434 }, { "epoch": 1.024512137077582, "grad_norm": 0.399063378572464, "learning_rate": 8.323979328069689e-06, "loss": 0.4094, "step": 1435 }, { "epoch": 1.0252260828177058, "grad_norm": 0.36286845803260803, "learning_rate": 8.320873883494586e-06, "loss": 0.3945, "step": 1436 }, { "epoch": 1.0259400285578295, "grad_norm": 0.40683797001838684, "learning_rate": 8.317766145051057e-06, "loss": 0.389, "step": 1437 }, { "epoch": 1.0266539742979535, "grad_norm": 0.39179933071136475, "learning_rate": 8.314656114885748e-06, "loss": 0.4168, "step": 1438 }, { "epoch": 1.0273679200380772, "grad_norm": 0.4295940399169922, "learning_rate": 8.311543795146893e-06, "loss": 0.425, "step": 1439 }, { "epoch": 1.028081865778201, "grad_norm": 0.4460819959640503, "learning_rate": 8.308429187984298e-06, "loss": 0.4377, "step": 1440 }, { "epoch": 1.0287958115183247, "grad_norm": 0.39134350419044495, "learning_rate": 8.305312295549358e-06, "loss": 0.3829, "step": 1441 }, { "epoch": 1.0295097572584484, "grad_norm": 0.4578056335449219, "learning_rate": 8.302193119995038e-06, "loss": 0.4456, "step": 1442 }, { "epoch": 1.0302237029985721, "grad_norm": 0.36667466163635254, "learning_rate": 8.299071663475892e-06, "loss": 0.4024, "step": 1443 }, { "epoch": 1.0309376487386959, "grad_norm": 0.36832356452941895, "learning_rate": 8.295947928148037e-06, "loss": 0.3721, "step": 1444 }, { "epoch": 1.0316515944788196, "grad_norm": 0.40222519636154175, "learning_rate": 8.292821916169171e-06, "loss": 0.4354, "step": 1445 }, { "epoch": 1.0323655402189433, "grad_norm": 0.400464802980423, "learning_rate": 8.289693629698564e-06, "loss": 0.3996, "step": 1446 }, { "epoch": 1.033079485959067, "grad_norm": 0.3613945543766022, "learning_rate": 8.286563070897054e-06, "loss": 0.3834, "step": 1447 }, { "epoch": 1.0337934316991908, "grad_norm": 0.39636075496673584, "learning_rate": 8.283430241927053e-06, "loss": 0.4261, "step": 1448 }, { "epoch": 1.0345073774393145, "grad_norm": 0.4131576120853424, "learning_rate": 8.280295144952537e-06, "loss": 0.4526, "step": 1449 }, { "epoch": 1.0352213231794383, "grad_norm": 0.3799155354499817, "learning_rate": 8.277157782139051e-06, "loss": 0.4001, "step": 1450 }, { "epoch": 1.035935268919562, "grad_norm": 0.3989335298538208, "learning_rate": 8.274018155653705e-06, "loss": 0.4199, "step": 1451 }, { "epoch": 1.036649214659686, "grad_norm": 0.3860228955745697, "learning_rate": 8.270876267665173e-06, "loss": 0.4067, "step": 1452 }, { "epoch": 1.0373631603998097, "grad_norm": 0.40510717034339905, "learning_rate": 8.267732120343688e-06, "loss": 0.4158, "step": 1453 }, { "epoch": 1.0380771061399334, "grad_norm": 0.3777405321598053, "learning_rate": 8.264585715861048e-06, "loss": 0.4089, "step": 1454 }, { "epoch": 1.0387910518800572, "grad_norm": 0.38435274362564087, "learning_rate": 8.261437056390607e-06, "loss": 0.4121, "step": 1455 }, { "epoch": 1.039504997620181, "grad_norm": 0.403801828622818, "learning_rate": 8.258286144107277e-06, "loss": 0.4269, "step": 1456 }, { "epoch": 1.0402189433603046, "grad_norm": 0.35973072052001953, "learning_rate": 8.255132981187527e-06, "loss": 0.3692, "step": 1457 }, { "epoch": 1.0409328891004284, "grad_norm": 0.3795086741447449, "learning_rate": 8.251977569809383e-06, "loss": 0.4229, "step": 1458 }, { "epoch": 1.041646834840552, "grad_norm": 0.38045817613601685, "learning_rate": 8.248819912152417e-06, "loss": 0.3924, "step": 1459 }, { "epoch": 1.0423607805806758, "grad_norm": 0.39325088262557983, "learning_rate": 8.24566001039776e-06, "loss": 0.3902, "step": 1460 }, { "epoch": 1.0430747263207996, "grad_norm": 0.41978752613067627, "learning_rate": 8.242497866728089e-06, "loss": 0.4377, "step": 1461 }, { "epoch": 1.0437886720609233, "grad_norm": 0.38929447531700134, "learning_rate": 8.239333483327633e-06, "loss": 0.3918, "step": 1462 }, { "epoch": 1.044502617801047, "grad_norm": 0.3676670491695404, "learning_rate": 8.236166862382163e-06, "loss": 0.419, "step": 1463 }, { "epoch": 1.045216563541171, "grad_norm": 0.37409210205078125, "learning_rate": 8.232998006078998e-06, "loss": 0.408, "step": 1464 }, { "epoch": 1.0459305092812947, "grad_norm": 0.3794994652271271, "learning_rate": 8.229826916607005e-06, "loss": 0.4172, "step": 1465 }, { "epoch": 1.0466444550214185, "grad_norm": 0.3833736479282379, "learning_rate": 8.226653596156588e-06, "loss": 0.4091, "step": 1466 }, { "epoch": 1.0473584007615422, "grad_norm": 0.4618484079837799, "learning_rate": 8.223478046919693e-06, "loss": 0.4393, "step": 1467 }, { "epoch": 1.048072346501666, "grad_norm": 0.42562198638916016, "learning_rate": 8.220300271089806e-06, "loss": 0.3836, "step": 1468 }, { "epoch": 1.0487862922417897, "grad_norm": 0.4015915095806122, "learning_rate": 8.217120270861954e-06, "loss": 0.4126, "step": 1469 }, { "epoch": 1.0495002379819134, "grad_norm": 0.39297306537628174, "learning_rate": 8.213938048432697e-06, "loss": 0.3919, "step": 1470 }, { "epoch": 1.0502141837220371, "grad_norm": 0.4398539662361145, "learning_rate": 8.210753606000131e-06, "loss": 0.4309, "step": 1471 }, { "epoch": 1.0509281294621609, "grad_norm": 0.40727490186691284, "learning_rate": 8.207566945763885e-06, "loss": 0.4494, "step": 1472 }, { "epoch": 1.0516420752022846, "grad_norm": 0.38790008425712585, "learning_rate": 8.204378069925121e-06, "loss": 0.4066, "step": 1473 }, { "epoch": 1.0523560209424083, "grad_norm": 0.3996787965297699, "learning_rate": 8.20118698068653e-06, "loss": 0.3927, "step": 1474 }, { "epoch": 1.053069966682532, "grad_norm": 0.3849998712539673, "learning_rate": 8.197993680252334e-06, "loss": 0.4306, "step": 1475 }, { "epoch": 1.0537839124226558, "grad_norm": 0.43130505084991455, "learning_rate": 8.19479817082828e-06, "loss": 0.391, "step": 1476 }, { "epoch": 1.0544978581627795, "grad_norm": 0.41345125436782837, "learning_rate": 8.191600454621642e-06, "loss": 0.4189, "step": 1477 }, { "epoch": 1.0552118039029035, "grad_norm": 0.40189459919929504, "learning_rate": 8.188400533841217e-06, "loss": 0.3806, "step": 1478 }, { "epoch": 1.0559257496430272, "grad_norm": 0.3925422430038452, "learning_rate": 8.18519841069733e-06, "loss": 0.4309, "step": 1479 }, { "epoch": 1.056639695383151, "grad_norm": 0.3721925616264343, "learning_rate": 8.181994087401819e-06, "loss": 0.3967, "step": 1480 }, { "epoch": 1.0573536411232747, "grad_norm": 0.3983113467693329, "learning_rate": 8.178787566168047e-06, "loss": 0.4111, "step": 1481 }, { "epoch": 1.0580675868633984, "grad_norm": 0.44313845038414, "learning_rate": 8.175578849210894e-06, "loss": 0.4062, "step": 1482 }, { "epoch": 1.0587815326035221, "grad_norm": 0.40648433566093445, "learning_rate": 8.172367938746759e-06, "loss": 0.4049, "step": 1483 }, { "epoch": 1.0594954783436459, "grad_norm": 0.4045593738555908, "learning_rate": 8.16915483699355e-06, "loss": 0.4183, "step": 1484 }, { "epoch": 1.0602094240837696, "grad_norm": 0.4168120324611664, "learning_rate": 8.165939546170701e-06, "loss": 0.3991, "step": 1485 }, { "epoch": 1.0609233698238933, "grad_norm": 0.42281922698020935, "learning_rate": 8.162722068499139e-06, "loss": 0.4368, "step": 1486 }, { "epoch": 1.061637315564017, "grad_norm": 0.4092094600200653, "learning_rate": 8.159502406201319e-06, "loss": 0.3983, "step": 1487 }, { "epoch": 1.0623512613041408, "grad_norm": 0.4540458023548126, "learning_rate": 8.156280561501196e-06, "loss": 0.4375, "step": 1488 }, { "epoch": 1.0630652070442645, "grad_norm": 0.42783260345458984, "learning_rate": 8.153056536624235e-06, "loss": 0.3922, "step": 1489 }, { "epoch": 1.0637791527843885, "grad_norm": 0.47128456830978394, "learning_rate": 8.149830333797407e-06, "loss": 0.4335, "step": 1490 }, { "epoch": 1.0644930985245122, "grad_norm": 0.414855420589447, "learning_rate": 8.146601955249187e-06, "loss": 0.3868, "step": 1491 }, { "epoch": 1.065207044264636, "grad_norm": 0.4660300612449646, "learning_rate": 8.143371403209555e-06, "loss": 0.4101, "step": 1492 }, { "epoch": 1.0659209900047597, "grad_norm": 0.4201982021331787, "learning_rate": 8.140138679909987e-06, "loss": 0.397, "step": 1493 }, { "epoch": 1.0666349357448834, "grad_norm": 0.3941434621810913, "learning_rate": 8.136903787583464e-06, "loss": 0.4318, "step": 1494 }, { "epoch": 1.0673488814850072, "grad_norm": 0.482112318277359, "learning_rate": 8.13366672846446e-06, "loss": 0.4266, "step": 1495 }, { "epoch": 1.068062827225131, "grad_norm": 0.3776969611644745, "learning_rate": 8.130427504788956e-06, "loss": 0.4002, "step": 1496 }, { "epoch": 1.0687767729652546, "grad_norm": 0.537226140499115, "learning_rate": 8.127186118794415e-06, "loss": 0.4693, "step": 1497 }, { "epoch": 1.0694907187053784, "grad_norm": 0.40249496698379517, "learning_rate": 8.123942572719801e-06, "loss": 0.3829, "step": 1498 }, { "epoch": 1.070204664445502, "grad_norm": 0.39786240458488464, "learning_rate": 8.12069686880557e-06, "loss": 0.4047, "step": 1499 }, { "epoch": 1.0709186101856258, "grad_norm": 0.4172275960445404, "learning_rate": 8.117449009293668e-06, "loss": 0.4201, "step": 1500 }, { "epoch": 1.0716325559257496, "grad_norm": 0.387802392244339, "learning_rate": 8.114198996427527e-06, "loss": 0.3885, "step": 1501 }, { "epoch": 1.0723465016658733, "grad_norm": 0.4489603042602539, "learning_rate": 8.110946832452071e-06, "loss": 0.4476, "step": 1502 }, { "epoch": 1.073060447405997, "grad_norm": 0.46110326051712036, "learning_rate": 8.107692519613705e-06, "loss": 0.3891, "step": 1503 }, { "epoch": 1.073774393146121, "grad_norm": 0.34547945857048035, "learning_rate": 8.104436060160324e-06, "loss": 0.3968, "step": 1504 }, { "epoch": 1.0744883388862447, "grad_norm": 0.4861612617969513, "learning_rate": 8.101177456341301e-06, "loss": 0.3892, "step": 1505 }, { "epoch": 1.0752022846263685, "grad_norm": 0.4647684395313263, "learning_rate": 8.097916710407491e-06, "loss": 0.4555, "step": 1506 }, { "epoch": 1.0759162303664922, "grad_norm": 0.3550173342227936, "learning_rate": 8.094653824611232e-06, "loss": 0.3626, "step": 1507 }, { "epoch": 1.076630176106616, "grad_norm": 0.46954384446144104, "learning_rate": 8.091388801206334e-06, "loss": 0.4262, "step": 1508 }, { "epoch": 1.0773441218467397, "grad_norm": 0.40155744552612305, "learning_rate": 8.08812164244809e-06, "loss": 0.3965, "step": 1509 }, { "epoch": 1.0780580675868634, "grad_norm": 0.38685140013694763, "learning_rate": 8.084852350593264e-06, "loss": 0.4068, "step": 1510 }, { "epoch": 1.0787720133269871, "grad_norm": 0.3838564157485962, "learning_rate": 8.081580927900095e-06, "loss": 0.4183, "step": 1511 }, { "epoch": 1.0794859590671109, "grad_norm": 0.5106685757637024, "learning_rate": 8.078307376628292e-06, "loss": 0.426, "step": 1512 }, { "epoch": 1.0801999048072346, "grad_norm": 0.381047785282135, "learning_rate": 8.075031699039037e-06, "loss": 0.3941, "step": 1513 }, { "epoch": 1.0809138505473583, "grad_norm": 0.39671704173088074, "learning_rate": 8.071753897394981e-06, "loss": 0.4047, "step": 1514 }, { "epoch": 1.081627796287482, "grad_norm": 0.48401710391044617, "learning_rate": 8.068473973960238e-06, "loss": 0.4121, "step": 1515 }, { "epoch": 1.0823417420276058, "grad_norm": 0.3880790174007416, "learning_rate": 8.065191931000391e-06, "loss": 0.3631, "step": 1516 }, { "epoch": 1.0830556877677298, "grad_norm": 0.4391993582248688, "learning_rate": 8.061907770782486e-06, "loss": 0.4537, "step": 1517 }, { "epoch": 1.0837696335078535, "grad_norm": 0.43650227785110474, "learning_rate": 8.058621495575032e-06, "loss": 0.3736, "step": 1518 }, { "epoch": 1.0844835792479772, "grad_norm": 0.43206265568733215, "learning_rate": 8.055333107648e-06, "loss": 0.4608, "step": 1519 }, { "epoch": 1.085197524988101, "grad_norm": 0.3654104173183441, "learning_rate": 8.052042609272817e-06, "loss": 0.3862, "step": 1520 }, { "epoch": 1.0859114707282247, "grad_norm": 0.3691144585609436, "learning_rate": 8.04875000272237e-06, "loss": 0.3762, "step": 1521 }, { "epoch": 1.0866254164683484, "grad_norm": 0.4204148054122925, "learning_rate": 8.045455290271003e-06, "loss": 0.4909, "step": 1522 }, { "epoch": 1.0873393622084722, "grad_norm": 0.34364989399909973, "learning_rate": 8.042158474194513e-06, "loss": 0.376, "step": 1523 }, { "epoch": 1.088053307948596, "grad_norm": 0.43049412965774536, "learning_rate": 8.038859556770152e-06, "loss": 0.407, "step": 1524 }, { "epoch": 1.0887672536887196, "grad_norm": 0.36369580030441284, "learning_rate": 8.035558540276618e-06, "loss": 0.3948, "step": 1525 }, { "epoch": 1.0894811994288434, "grad_norm": 0.40714097023010254, "learning_rate": 8.032255426994069e-06, "loss": 0.393, "step": 1526 }, { "epoch": 1.090195145168967, "grad_norm": 0.3799701929092407, "learning_rate": 8.0289502192041e-06, "loss": 0.4239, "step": 1527 }, { "epoch": 1.0909090909090908, "grad_norm": 0.36732184886932373, "learning_rate": 8.025642919189763e-06, "loss": 0.4197, "step": 1528 }, { "epoch": 1.0916230366492146, "grad_norm": 0.39801672101020813, "learning_rate": 8.022333529235547e-06, "loss": 0.4199, "step": 1529 }, { "epoch": 1.0923369823893383, "grad_norm": 0.4001610279083252, "learning_rate": 8.019022051627387e-06, "loss": 0.4672, "step": 1530 }, { "epoch": 1.0930509281294623, "grad_norm": 0.3986048698425293, "learning_rate": 8.015708488652666e-06, "loss": 0.3987, "step": 1531 }, { "epoch": 1.093764873869586, "grad_norm": 0.3734215795993805, "learning_rate": 8.012392842600198e-06, "loss": 0.4199, "step": 1532 }, { "epoch": 1.0944788196097097, "grad_norm": 0.3625553548336029, "learning_rate": 8.009075115760243e-06, "loss": 0.3864, "step": 1533 }, { "epoch": 1.0951927653498335, "grad_norm": 0.401052862405777, "learning_rate": 8.005755310424493e-06, "loss": 0.4607, "step": 1534 }, { "epoch": 1.0959067110899572, "grad_norm": 0.42043641209602356, "learning_rate": 8.00243342888608e-06, "loss": 0.3984, "step": 1535 }, { "epoch": 1.096620656830081, "grad_norm": 0.3619014024734497, "learning_rate": 7.99910947343957e-06, "loss": 0.4175, "step": 1536 }, { "epoch": 1.0973346025702047, "grad_norm": 0.35864993929862976, "learning_rate": 7.995783446380957e-06, "loss": 0.4057, "step": 1537 }, { "epoch": 1.0980485483103284, "grad_norm": 0.3963184058666229, "learning_rate": 7.992455350007668e-06, "loss": 0.4369, "step": 1538 }, { "epoch": 1.0987624940504521, "grad_norm": 0.4310768246650696, "learning_rate": 7.989125186618566e-06, "loss": 0.4464, "step": 1539 }, { "epoch": 1.0994764397905759, "grad_norm": 0.3571033477783203, "learning_rate": 7.985792958513932e-06, "loss": 0.4109, "step": 1540 }, { "epoch": 1.1001903855306996, "grad_norm": 0.4611467123031616, "learning_rate": 7.982458667995477e-06, "loss": 0.4373, "step": 1541 }, { "epoch": 1.1009043312708233, "grad_norm": 0.41237887740135193, "learning_rate": 7.979122317366337e-06, "loss": 0.4477, "step": 1542 }, { "epoch": 1.1016182770109473, "grad_norm": 0.382065087556839, "learning_rate": 7.975783908931073e-06, "loss": 0.3844, "step": 1543 }, { "epoch": 1.102332222751071, "grad_norm": 0.42400848865509033, "learning_rate": 7.972443444995663e-06, "loss": 0.4202, "step": 1544 }, { "epoch": 1.1030461684911947, "grad_norm": 0.3919365704059601, "learning_rate": 7.969100927867508e-06, "loss": 0.3871, "step": 1545 }, { "epoch": 1.1037601142313185, "grad_norm": 0.4259241819381714, "learning_rate": 7.965756359855428e-06, "loss": 0.4576, "step": 1546 }, { "epoch": 1.1044740599714422, "grad_norm": 0.4084315001964569, "learning_rate": 7.962409743269654e-06, "loss": 0.4094, "step": 1547 }, { "epoch": 1.105188005711566, "grad_norm": 0.40495678782463074, "learning_rate": 7.95906108042184e-06, "loss": 0.4419, "step": 1548 }, { "epoch": 1.1059019514516897, "grad_norm": 0.3817271292209625, "learning_rate": 7.955710373625047e-06, "loss": 0.3967, "step": 1549 }, { "epoch": 1.1066158971918134, "grad_norm": 0.3514828383922577, "learning_rate": 7.952357625193749e-06, "loss": 0.3558, "step": 1550 }, { "epoch": 1.1073298429319371, "grad_norm": 0.41378253698349, "learning_rate": 7.949002837443836e-06, "loss": 0.4659, "step": 1551 }, { "epoch": 1.1080437886720609, "grad_norm": 0.38074102997779846, "learning_rate": 7.9456460126926e-06, "loss": 0.3839, "step": 1552 }, { "epoch": 1.1087577344121846, "grad_norm": 0.4404522776603699, "learning_rate": 7.942287153258741e-06, "loss": 0.4066, "step": 1553 }, { "epoch": 1.1094716801523083, "grad_norm": 0.38182923197746277, "learning_rate": 7.938926261462366e-06, "loss": 0.42, "step": 1554 }, { "epoch": 1.110185625892432, "grad_norm": 0.3753544092178345, "learning_rate": 7.935563339624988e-06, "loss": 0.4144, "step": 1555 }, { "epoch": 1.1108995716325558, "grad_norm": 0.4211865961551666, "learning_rate": 7.932198390069515e-06, "loss": 0.3694, "step": 1556 }, { "epoch": 1.1116135173726798, "grad_norm": 0.3534238040447235, "learning_rate": 7.928831415120265e-06, "loss": 0.4091, "step": 1557 }, { "epoch": 1.1123274631128035, "grad_norm": 0.41923439502716064, "learning_rate": 7.925462417102949e-06, "loss": 0.4126, "step": 1558 }, { "epoch": 1.1130414088529272, "grad_norm": 0.40439584851264954, "learning_rate": 7.922091398344674e-06, "loss": 0.4218, "step": 1559 }, { "epoch": 1.113755354593051, "grad_norm": 0.3760434687137604, "learning_rate": 7.918718361173951e-06, "loss": 0.4371, "step": 1560 }, { "epoch": 1.1144693003331747, "grad_norm": 0.3603569567203522, "learning_rate": 7.915343307920674e-06, "loss": 0.386, "step": 1561 }, { "epoch": 1.1151832460732984, "grad_norm": 0.36292415857315063, "learning_rate": 7.91196624091614e-06, "loss": 0.4062, "step": 1562 }, { "epoch": 1.1158971918134222, "grad_norm": 0.34848788380622864, "learning_rate": 7.90858716249303e-06, "loss": 0.3635, "step": 1563 }, { "epoch": 1.116611137553546, "grad_norm": 0.3804885149002075, "learning_rate": 7.905206074985416e-06, "loss": 0.419, "step": 1564 }, { "epoch": 1.1173250832936696, "grad_norm": 0.34398943185806274, "learning_rate": 7.901822980728761e-06, "loss": 0.3799, "step": 1565 }, { "epoch": 1.1180390290337934, "grad_norm": 0.3910611867904663, "learning_rate": 7.898437882059913e-06, "loss": 0.42, "step": 1566 }, { "epoch": 1.118752974773917, "grad_norm": 0.38644319772720337, "learning_rate": 7.895050781317097e-06, "loss": 0.3973, "step": 1567 }, { "epoch": 1.1194669205140408, "grad_norm": 0.3647618889808655, "learning_rate": 7.891661680839932e-06, "loss": 0.3719, "step": 1568 }, { "epoch": 1.1201808662541648, "grad_norm": 0.40835559368133545, "learning_rate": 7.888270582969415e-06, "loss": 0.4619, "step": 1569 }, { "epoch": 1.1208948119942885, "grad_norm": 0.35694777965545654, "learning_rate": 7.884877490047915e-06, "loss": 0.3926, "step": 1570 }, { "epoch": 1.1216087577344123, "grad_norm": 0.4006486237049103, "learning_rate": 7.88148240441919e-06, "loss": 0.4522, "step": 1571 }, { "epoch": 1.122322703474536, "grad_norm": 0.36353668570518494, "learning_rate": 7.87808532842837e-06, "loss": 0.3991, "step": 1572 }, { "epoch": 1.1230366492146597, "grad_norm": 0.38042980432510376, "learning_rate": 7.874686264421954e-06, "loss": 0.4152, "step": 1573 }, { "epoch": 1.1237505949547835, "grad_norm": 0.35065579414367676, "learning_rate": 7.871285214747825e-06, "loss": 0.4001, "step": 1574 }, { "epoch": 1.1244645406949072, "grad_norm": 0.3555498421192169, "learning_rate": 7.86788218175523e-06, "loss": 0.4168, "step": 1575 }, { "epoch": 1.125178486435031, "grad_norm": 0.3762018084526062, "learning_rate": 7.86447716779479e-06, "loss": 0.4367, "step": 1576 }, { "epoch": 1.1258924321751547, "grad_norm": 0.37621209025382996, "learning_rate": 7.861070175218492e-06, "loss": 0.3772, "step": 1577 }, { "epoch": 1.1266063779152784, "grad_norm": 0.3675478398799896, "learning_rate": 7.857661206379687e-06, "loss": 0.4091, "step": 1578 }, { "epoch": 1.1273203236554021, "grad_norm": 0.3727535307407379, "learning_rate": 7.8542502636331e-06, "loss": 0.4103, "step": 1579 }, { "epoch": 1.1280342693955259, "grad_norm": 0.3393477201461792, "learning_rate": 7.85083734933481e-06, "loss": 0.3862, "step": 1580 }, { "epoch": 1.1287482151356496, "grad_norm": 0.3449508547782898, "learning_rate": 7.84742246584226e-06, "loss": 0.412, "step": 1581 }, { "epoch": 1.1294621608757733, "grad_norm": 0.3551158607006073, "learning_rate": 7.84400561551426e-06, "loss": 0.389, "step": 1582 }, { "epoch": 1.130176106615897, "grad_norm": 0.3817702531814575, "learning_rate": 7.84058680071097e-06, "loss": 0.4369, "step": 1583 }, { "epoch": 1.130890052356021, "grad_norm": 0.3495345711708069, "learning_rate": 7.83716602379391e-06, "loss": 0.416, "step": 1584 }, { "epoch": 1.1316039980961448, "grad_norm": 0.37218794226646423, "learning_rate": 7.833743287125958e-06, "loss": 0.4293, "step": 1585 }, { "epoch": 1.1323179438362685, "grad_norm": 0.3814103305339813, "learning_rate": 7.83031859307134e-06, "loss": 0.3957, "step": 1586 }, { "epoch": 1.1330318895763922, "grad_norm": 0.37465646862983704, "learning_rate": 7.826891943995641e-06, "loss": 0.4246, "step": 1587 }, { "epoch": 1.133745835316516, "grad_norm": 0.4248572885990143, "learning_rate": 7.823463342265793e-06, "loss": 0.4445, "step": 1588 }, { "epoch": 1.1344597810566397, "grad_norm": 0.39941906929016113, "learning_rate": 7.820032790250073e-06, "loss": 0.3944, "step": 1589 }, { "epoch": 1.1351737267967634, "grad_norm": 0.3663010001182556, "learning_rate": 7.81660029031811e-06, "loss": 0.4074, "step": 1590 }, { "epoch": 1.1358876725368872, "grad_norm": 0.3688174784183502, "learning_rate": 7.81316584484088e-06, "loss": 0.3912, "step": 1591 }, { "epoch": 1.136601618277011, "grad_norm": 0.40535426139831543, "learning_rate": 7.809729456190699e-06, "loss": 0.3852, "step": 1592 }, { "epoch": 1.1373155640171346, "grad_norm": 0.40426114201545715, "learning_rate": 7.806291126741222e-06, "loss": 0.4218, "step": 1593 }, { "epoch": 1.1380295097572584, "grad_norm": 0.3818063735961914, "learning_rate": 7.802850858867454e-06, "loss": 0.4405, "step": 1594 }, { "epoch": 1.1387434554973823, "grad_norm": 0.3690115213394165, "learning_rate": 7.799408654945733e-06, "loss": 0.3278, "step": 1595 }, { "epoch": 1.139457401237506, "grad_norm": 0.4518817961215973, "learning_rate": 7.795964517353734e-06, "loss": 0.4364, "step": 1596 }, { "epoch": 1.1401713469776298, "grad_norm": 0.3729475736618042, "learning_rate": 7.79251844847047e-06, "loss": 0.3736, "step": 1597 }, { "epoch": 1.1408852927177535, "grad_norm": 0.39113327860832214, "learning_rate": 7.789070450676288e-06, "loss": 0.4289, "step": 1598 }, { "epoch": 1.1415992384578773, "grad_norm": 0.37770453095436096, "learning_rate": 7.785620526352862e-06, "loss": 0.4061, "step": 1599 }, { "epoch": 1.142313184198001, "grad_norm": 0.41695982217788696, "learning_rate": 7.782168677883206e-06, "loss": 0.4049, "step": 1600 }, { "epoch": 1.1430271299381247, "grad_norm": 0.3699249029159546, "learning_rate": 7.778714907651655e-06, "loss": 0.377, "step": 1601 }, { "epoch": 1.1437410756782485, "grad_norm": 0.3985188603401184, "learning_rate": 7.775259218043876e-06, "loss": 0.3902, "step": 1602 }, { "epoch": 1.1444550214183722, "grad_norm": 0.37457019090652466, "learning_rate": 7.771801611446859e-06, "loss": 0.4034, "step": 1603 }, { "epoch": 1.145168967158496, "grad_norm": 0.38990771770477295, "learning_rate": 7.768342090248919e-06, "loss": 0.4023, "step": 1604 }, { "epoch": 1.1458829128986197, "grad_norm": 0.39454373717308044, "learning_rate": 7.764880656839698e-06, "loss": 0.4217, "step": 1605 }, { "epoch": 1.1465968586387434, "grad_norm": 0.39274829626083374, "learning_rate": 7.76141731361015e-06, "loss": 0.4349, "step": 1606 }, { "epoch": 1.1473108043788671, "grad_norm": 0.4037812054157257, "learning_rate": 7.757952062952559e-06, "loss": 0.4002, "step": 1607 }, { "epoch": 1.1480247501189909, "grad_norm": 0.3890760540962219, "learning_rate": 7.754484907260513e-06, "loss": 0.4528, "step": 1608 }, { "epoch": 1.1487386958591146, "grad_norm": 0.35251176357269287, "learning_rate": 7.751015848928929e-06, "loss": 0.3956, "step": 1609 }, { "epoch": 1.1494526415992385, "grad_norm": 0.37196555733680725, "learning_rate": 7.747544890354031e-06, "loss": 0.391, "step": 1610 }, { "epoch": 1.1501665873393623, "grad_norm": 0.3829929232597351, "learning_rate": 7.744072033933356e-06, "loss": 0.4551, "step": 1611 }, { "epoch": 1.150880533079486, "grad_norm": 0.3760228157043457, "learning_rate": 7.740597282065756e-06, "loss": 0.3996, "step": 1612 }, { "epoch": 1.1515944788196097, "grad_norm": 0.3621516823768616, "learning_rate": 7.737120637151389e-06, "loss": 0.4005, "step": 1613 }, { "epoch": 1.1523084245597335, "grad_norm": 0.372494101524353, "learning_rate": 7.733642101591719e-06, "loss": 0.4168, "step": 1614 }, { "epoch": 1.1530223702998572, "grad_norm": 0.43177199363708496, "learning_rate": 7.730161677789518e-06, "loss": 0.4104, "step": 1615 }, { "epoch": 1.153736316039981, "grad_norm": 0.3601280152797699, "learning_rate": 7.726679368148863e-06, "loss": 0.4147, "step": 1616 }, { "epoch": 1.1544502617801047, "grad_norm": 0.3906410336494446, "learning_rate": 7.723195175075136e-06, "loss": 0.4077, "step": 1617 }, { "epoch": 1.1551642075202284, "grad_norm": 0.47527292370796204, "learning_rate": 7.719709100975012e-06, "loss": 0.438, "step": 1618 }, { "epoch": 1.1558781532603521, "grad_norm": 0.34364214539527893, "learning_rate": 7.716221148256475e-06, "loss": 0.4129, "step": 1619 }, { "epoch": 1.1565920990004759, "grad_norm": 0.4309101700782776, "learning_rate": 7.712731319328798e-06, "loss": 0.4282, "step": 1620 }, { "epoch": 1.1573060447405998, "grad_norm": 0.3382113575935364, "learning_rate": 7.709239616602556e-06, "loss": 0.3579, "step": 1621 }, { "epoch": 1.1580199904807236, "grad_norm": 0.4186495244503021, "learning_rate": 7.705746042489614e-06, "loss": 0.4983, "step": 1622 }, { "epoch": 1.1587339362208473, "grad_norm": 0.35274824500083923, "learning_rate": 7.702250599403133e-06, "loss": 0.3583, "step": 1623 }, { "epoch": 1.159447881960971, "grad_norm": 0.36097633838653564, "learning_rate": 7.698753289757565e-06, "loss": 0.4016, "step": 1624 }, { "epoch": 1.1601618277010948, "grad_norm": 0.3825134038925171, "learning_rate": 7.69525411596865e-06, "loss": 0.4221, "step": 1625 }, { "epoch": 1.1608757734412185, "grad_norm": 0.3673611283302307, "learning_rate": 7.691753080453413e-06, "loss": 0.4155, "step": 1626 }, { "epoch": 1.1615897191813422, "grad_norm": 0.4128561019897461, "learning_rate": 7.688250185630168e-06, "loss": 0.4067, "step": 1627 }, { "epoch": 1.162303664921466, "grad_norm": 0.35096636414527893, "learning_rate": 7.684745433918519e-06, "loss": 0.3845, "step": 1628 }, { "epoch": 1.1630176106615897, "grad_norm": 0.4037291407585144, "learning_rate": 7.681238827739338e-06, "loss": 0.4468, "step": 1629 }, { "epoch": 1.1637315564017134, "grad_norm": 0.41492077708244324, "learning_rate": 7.677730369514792e-06, "loss": 0.4304, "step": 1630 }, { "epoch": 1.1644455021418372, "grad_norm": 0.39384976029396057, "learning_rate": 7.674220061668323e-06, "loss": 0.4374, "step": 1631 }, { "epoch": 1.165159447881961, "grad_norm": 0.3649975657463074, "learning_rate": 7.670707906624644e-06, "loss": 0.3786, "step": 1632 }, { "epoch": 1.1658733936220846, "grad_norm": 0.416906476020813, "learning_rate": 7.667193906809754e-06, "loss": 0.4156, "step": 1633 }, { "epoch": 1.1665873393622084, "grad_norm": 0.36093437671661377, "learning_rate": 7.66367806465092e-06, "loss": 0.3854, "step": 1634 }, { "epoch": 1.167301285102332, "grad_norm": 0.3907586336135864, "learning_rate": 7.660160382576683e-06, "loss": 0.4447, "step": 1635 }, { "epoch": 1.168015230842456, "grad_norm": 0.4116952121257782, "learning_rate": 7.656640863016857e-06, "loss": 0.4083, "step": 1636 }, { "epoch": 1.1687291765825798, "grad_norm": 0.41316860914230347, "learning_rate": 7.653119508402522e-06, "loss": 0.454, "step": 1637 }, { "epoch": 1.1694431223227035, "grad_norm": 0.3817954659461975, "learning_rate": 7.649596321166024e-06, "loss": 0.3978, "step": 1638 }, { "epoch": 1.1701570680628273, "grad_norm": 0.4007527232170105, "learning_rate": 7.646071303740985e-06, "loss": 0.4668, "step": 1639 }, { "epoch": 1.170871013802951, "grad_norm": 0.4378969073295593, "learning_rate": 7.642544458562278e-06, "loss": 0.4335, "step": 1640 }, { "epoch": 1.1715849595430747, "grad_norm": 0.35937026143074036, "learning_rate": 7.639015788066046e-06, "loss": 0.3793, "step": 1641 }, { "epoch": 1.1722989052831985, "grad_norm": 0.4054730534553528, "learning_rate": 7.635485294689693e-06, "loss": 0.4334, "step": 1642 }, { "epoch": 1.1730128510233222, "grad_norm": 0.37875932455062866, "learning_rate": 7.631952980871879e-06, "loss": 0.4169, "step": 1643 }, { "epoch": 1.173726796763446, "grad_norm": 0.3923840820789337, "learning_rate": 7.628418849052523e-06, "loss": 0.4209, "step": 1644 }, { "epoch": 1.1744407425035697, "grad_norm": 0.3628324270248413, "learning_rate": 7.624882901672801e-06, "loss": 0.3874, "step": 1645 }, { "epoch": 1.1751546882436934, "grad_norm": 0.4371251165866852, "learning_rate": 7.6213451411751405e-06, "loss": 0.4328, "step": 1646 }, { "epoch": 1.1758686339838171, "grad_norm": 0.3731955885887146, "learning_rate": 7.617805570003223e-06, "loss": 0.4023, "step": 1647 }, { "epoch": 1.176582579723941, "grad_norm": 0.3900730311870575, "learning_rate": 7.614264190601981e-06, "loss": 0.3869, "step": 1648 }, { "epoch": 1.1772965254640648, "grad_norm": 0.35364001989364624, "learning_rate": 7.610721005417594e-06, "loss": 0.3795, "step": 1649 }, { "epoch": 1.1780104712041886, "grad_norm": 0.39277687668800354, "learning_rate": 7.607176016897491e-06, "loss": 0.4281, "step": 1650 }, { "epoch": 1.1787244169443123, "grad_norm": 0.3438258767127991, "learning_rate": 7.603629227490347e-06, "loss": 0.3782, "step": 1651 }, { "epoch": 1.179438362684436, "grad_norm": 0.38156962394714355, "learning_rate": 7.600080639646077e-06, "loss": 0.4037, "step": 1652 }, { "epoch": 1.1801523084245598, "grad_norm": 0.36124229431152344, "learning_rate": 7.596530255815846e-06, "loss": 0.4229, "step": 1653 }, { "epoch": 1.1808662541646835, "grad_norm": 0.3726867735385895, "learning_rate": 7.59297807845205e-06, "loss": 0.4098, "step": 1654 }, { "epoch": 1.1815801999048072, "grad_norm": 0.3814711570739746, "learning_rate": 7.58942411000833e-06, "loss": 0.394, "step": 1655 }, { "epoch": 1.182294145644931, "grad_norm": 0.3889271318912506, "learning_rate": 7.585868352939564e-06, "loss": 0.3993, "step": 1656 }, { "epoch": 1.1830080913850547, "grad_norm": 0.3860906958580017, "learning_rate": 7.5823108097018625e-06, "loss": 0.4257, "step": 1657 }, { "epoch": 1.1837220371251784, "grad_norm": 0.39376792311668396, "learning_rate": 7.578751482752572e-06, "loss": 0.4314, "step": 1658 }, { "epoch": 1.1844359828653022, "grad_norm": 0.3638084828853607, "learning_rate": 7.575190374550272e-06, "loss": 0.418, "step": 1659 }, { "epoch": 1.185149928605426, "grad_norm": 0.3763420879840851, "learning_rate": 7.571627487554769e-06, "loss": 0.3653, "step": 1660 }, { "epoch": 1.1858638743455496, "grad_norm": 0.40781906247138977, "learning_rate": 7.5680628242271e-06, "loss": 0.4161, "step": 1661 }, { "epoch": 1.1865778200856736, "grad_norm": 0.3526462912559509, "learning_rate": 7.564496387029532e-06, "loss": 0.4155, "step": 1662 }, { "epoch": 1.1872917658257973, "grad_norm": 0.35759782791137695, "learning_rate": 7.5609281784255505e-06, "loss": 0.4041, "step": 1663 }, { "epoch": 1.188005711565921, "grad_norm": 0.3475222885608673, "learning_rate": 7.5573582008798706e-06, "loss": 0.4187, "step": 1664 }, { "epoch": 1.1887196573060448, "grad_norm": 0.37290212512016296, "learning_rate": 7.553786456858429e-06, "loss": 0.4216, "step": 1665 }, { "epoch": 1.1894336030461685, "grad_norm": 0.3464251160621643, "learning_rate": 7.550212948828377e-06, "loss": 0.3661, "step": 1666 }, { "epoch": 1.1901475487862923, "grad_norm": 0.3699226975440979, "learning_rate": 7.546637679258091e-06, "loss": 0.3938, "step": 1667 }, { "epoch": 1.190861494526416, "grad_norm": 0.39688393473625183, "learning_rate": 7.543060650617159e-06, "loss": 0.4507, "step": 1668 }, { "epoch": 1.1915754402665397, "grad_norm": 0.37305453419685364, "learning_rate": 7.539481865376388e-06, "loss": 0.3862, "step": 1669 }, { "epoch": 1.1922893860066635, "grad_norm": 0.4015500843524933, "learning_rate": 7.535901326007796e-06, "loss": 0.4008, "step": 1670 }, { "epoch": 1.1930033317467872, "grad_norm": 0.3755965232849121, "learning_rate": 7.532319034984614e-06, "loss": 0.4101, "step": 1671 }, { "epoch": 1.193717277486911, "grad_norm": 0.3713371753692627, "learning_rate": 7.528734994781284e-06, "loss": 0.3834, "step": 1672 }, { "epoch": 1.1944312232270347, "grad_norm": 0.3709106743335724, "learning_rate": 7.5251492078734515e-06, "loss": 0.4007, "step": 1673 }, { "epoch": 1.1951451689671586, "grad_norm": 0.3653619587421417, "learning_rate": 7.521561676737972e-06, "loss": 0.473, "step": 1674 }, { "epoch": 1.1958591147072823, "grad_norm": 0.3679652214050293, "learning_rate": 7.517972403852905e-06, "loss": 0.4119, "step": 1675 }, { "epoch": 1.196573060447406, "grad_norm": 0.40264463424682617, "learning_rate": 7.514381391697518e-06, "loss": 0.4307, "step": 1676 }, { "epoch": 1.1972870061875298, "grad_norm": 0.35908249020576477, "learning_rate": 7.510788642752269e-06, "loss": 0.4101, "step": 1677 }, { "epoch": 1.1980009519276535, "grad_norm": 0.3540298044681549, "learning_rate": 7.507194159498827e-06, "loss": 0.4127, "step": 1678 }, { "epoch": 1.1987148976677773, "grad_norm": 0.3890514075756073, "learning_rate": 7.503597944420051e-06, "loss": 0.4493, "step": 1679 }, { "epoch": 1.199428843407901, "grad_norm": 0.3609951436519623, "learning_rate": 7.500000000000001e-06, "loss": 0.4148, "step": 1680 }, { "epoch": 1.2001427891480247, "grad_norm": 0.3549725115299225, "learning_rate": 7.496400328723929e-06, "loss": 0.3789, "step": 1681 }, { "epoch": 1.2008567348881485, "grad_norm": 0.3998273015022278, "learning_rate": 7.49279893307828e-06, "loss": 0.419, "step": 1682 }, { "epoch": 1.2015706806282722, "grad_norm": 0.36520156264305115, "learning_rate": 7.489195815550692e-06, "loss": 0.4035, "step": 1683 }, { "epoch": 1.202284626368396, "grad_norm": 0.3489760756492615, "learning_rate": 7.485590978629991e-06, "loss": 0.4122, "step": 1684 }, { "epoch": 1.2029985721085197, "grad_norm": 0.32319897413253784, "learning_rate": 7.48198442480619e-06, "loss": 0.4054, "step": 1685 }, { "epoch": 1.2037125178486434, "grad_norm": 0.40094175934791565, "learning_rate": 7.478376156570489e-06, "loss": 0.3985, "step": 1686 }, { "epoch": 1.2044264635887671, "grad_norm": 0.41882187128067017, "learning_rate": 7.4747661764152716e-06, "loss": 0.4552, "step": 1687 }, { "epoch": 1.2051404093288909, "grad_norm": 0.35747718811035156, "learning_rate": 7.471154486834105e-06, "loss": 0.4093, "step": 1688 }, { "epoch": 1.2058543550690148, "grad_norm": 0.3718704283237457, "learning_rate": 7.467541090321735e-06, "loss": 0.4358, "step": 1689 }, { "epoch": 1.2065683008091386, "grad_norm": 0.3605864942073822, "learning_rate": 7.463925989374089e-06, "loss": 0.3745, "step": 1690 }, { "epoch": 1.2072822465492623, "grad_norm": 0.47202691435813904, "learning_rate": 7.46030918648827e-06, "loss": 0.4598, "step": 1691 }, { "epoch": 1.207996192289386, "grad_norm": 0.3646314740180969, "learning_rate": 7.456690684162557e-06, "loss": 0.3647, "step": 1692 }, { "epoch": 1.2087101380295098, "grad_norm": 0.4154495298862457, "learning_rate": 7.453070484896404e-06, "loss": 0.4304, "step": 1693 }, { "epoch": 1.2094240837696335, "grad_norm": 0.3968041241168976, "learning_rate": 7.449448591190436e-06, "loss": 0.4194, "step": 1694 }, { "epoch": 1.2101380295097572, "grad_norm": 0.4121806025505066, "learning_rate": 7.445825005546448e-06, "loss": 0.4135, "step": 1695 }, { "epoch": 1.210851975249881, "grad_norm": 0.3678217828273773, "learning_rate": 7.442199730467403e-06, "loss": 0.4168, "step": 1696 }, { "epoch": 1.2115659209900047, "grad_norm": 0.381674200296402, "learning_rate": 7.438572768457435e-06, "loss": 0.4244, "step": 1697 }, { "epoch": 1.2122798667301284, "grad_norm": 0.3847369849681854, "learning_rate": 7.434944122021837e-06, "loss": 0.4167, "step": 1698 }, { "epoch": 1.2129938124702522, "grad_norm": 0.3897620737552643, "learning_rate": 7.431313793667072e-06, "loss": 0.4203, "step": 1699 }, { "epoch": 1.2137077582103761, "grad_norm": 0.3665939271450043, "learning_rate": 7.4276817859007615e-06, "loss": 0.424, "step": 1700 }, { "epoch": 1.2144217039504999, "grad_norm": 0.37564751505851746, "learning_rate": 7.424048101231687e-06, "loss": 0.4468, "step": 1701 }, { "epoch": 1.2151356496906236, "grad_norm": 0.35756975412368774, "learning_rate": 7.420412742169787e-06, "loss": 0.3933, "step": 1702 }, { "epoch": 1.2158495954307473, "grad_norm": 0.3804989159107208, "learning_rate": 7.41677571122616e-06, "loss": 0.4069, "step": 1703 }, { "epoch": 1.216563541170871, "grad_norm": 0.3934292793273926, "learning_rate": 7.413137010913055e-06, "loss": 0.4091, "step": 1704 }, { "epoch": 1.2172774869109948, "grad_norm": 0.41200676560401917, "learning_rate": 7.40949664374388e-06, "loss": 0.4153, "step": 1705 }, { "epoch": 1.2179914326511185, "grad_norm": 0.38602107763290405, "learning_rate": 7.40585461223319e-06, "loss": 0.3985, "step": 1706 }, { "epoch": 1.2187053783912423, "grad_norm": 0.3960776925086975, "learning_rate": 7.4022109188966895e-06, "loss": 0.4501, "step": 1707 }, { "epoch": 1.219419324131366, "grad_norm": 0.39100953936576843, "learning_rate": 7.398565566251232e-06, "loss": 0.4011, "step": 1708 }, { "epoch": 1.2201332698714897, "grad_norm": 0.4206654131412506, "learning_rate": 7.394918556814819e-06, "loss": 0.4281, "step": 1709 }, { "epoch": 1.2208472156116135, "grad_norm": 0.4027411639690399, "learning_rate": 7.391269893106592e-06, "loss": 0.4104, "step": 1710 }, { "epoch": 1.2215611613517372, "grad_norm": 0.36591699719429016, "learning_rate": 7.38761957764684e-06, "loss": 0.3688, "step": 1711 }, { "epoch": 1.222275107091861, "grad_norm": 0.37746813893318176, "learning_rate": 7.383967612956988e-06, "loss": 0.3966, "step": 1712 }, { "epoch": 1.2229890528319847, "grad_norm": 0.3523162305355072, "learning_rate": 7.3803140015596065e-06, "loss": 0.3793, "step": 1713 }, { "epoch": 1.2237029985721084, "grad_norm": 0.42229917645454407, "learning_rate": 7.376658745978399e-06, "loss": 0.4299, "step": 1714 }, { "epoch": 1.2244169443122324, "grad_norm": 0.37795141339302063, "learning_rate": 7.373001848738203e-06, "loss": 0.4016, "step": 1715 }, { "epoch": 1.225130890052356, "grad_norm": 0.39933252334594727, "learning_rate": 7.369343312364994e-06, "loss": 0.4181, "step": 1716 }, { "epoch": 1.2258448357924798, "grad_norm": 0.3269639015197754, "learning_rate": 7.36568313938588e-06, "loss": 0.3813, "step": 1717 }, { "epoch": 1.2265587815326036, "grad_norm": 0.36302849650382996, "learning_rate": 7.3620213323290925e-06, "loss": 0.4548, "step": 1718 }, { "epoch": 1.2272727272727273, "grad_norm": 0.36343181133270264, "learning_rate": 7.358357893724003e-06, "loss": 0.3947, "step": 1719 }, { "epoch": 1.227986673012851, "grad_norm": 0.3895154893398285, "learning_rate": 7.354692826101102e-06, "loss": 0.4131, "step": 1720 }, { "epoch": 1.2287006187529748, "grad_norm": 0.35956868529319763, "learning_rate": 7.351026131992005e-06, "loss": 0.3826, "step": 1721 }, { "epoch": 1.2294145644930985, "grad_norm": 0.3585433065891266, "learning_rate": 7.347357813929455e-06, "loss": 0.4118, "step": 1722 }, { "epoch": 1.2301285102332222, "grad_norm": 0.40509283542633057, "learning_rate": 7.343687874447314e-06, "loss": 0.4487, "step": 1723 }, { "epoch": 1.230842455973346, "grad_norm": 0.3875187635421753, "learning_rate": 7.340016316080565e-06, "loss": 0.427, "step": 1724 }, { "epoch": 1.2315564017134697, "grad_norm": 0.3712376356124878, "learning_rate": 7.336343141365311e-06, "loss": 0.3752, "step": 1725 }, { "epoch": 1.2322703474535937, "grad_norm": 0.4046606421470642, "learning_rate": 7.332668352838766e-06, "loss": 0.4167, "step": 1726 }, { "epoch": 1.2329842931937174, "grad_norm": 0.4665222465991974, "learning_rate": 7.328991953039266e-06, "loss": 0.4028, "step": 1727 }, { "epoch": 1.2336982389338411, "grad_norm": 0.3878833055496216, "learning_rate": 7.3253139445062535e-06, "loss": 0.4032, "step": 1728 }, { "epoch": 1.2344121846739649, "grad_norm": 0.4618808627128601, "learning_rate": 7.321634329780286e-06, "loss": 0.4616, "step": 1729 }, { "epoch": 1.2351261304140886, "grad_norm": 0.39210790395736694, "learning_rate": 7.317953111403029e-06, "loss": 0.3938, "step": 1730 }, { "epoch": 1.2358400761542123, "grad_norm": 0.40029633045196533, "learning_rate": 7.314270291917256e-06, "loss": 0.4077, "step": 1731 }, { "epoch": 1.236554021894336, "grad_norm": 0.36452963948249817, "learning_rate": 7.310585873866849e-06, "loss": 0.3966, "step": 1732 }, { "epoch": 1.2372679676344598, "grad_norm": 0.47701600193977356, "learning_rate": 7.3068998597967885e-06, "loss": 0.4054, "step": 1733 }, { "epoch": 1.2379819133745835, "grad_norm": 0.3986196517944336, "learning_rate": 7.303212252253163e-06, "loss": 0.4318, "step": 1734 }, { "epoch": 1.2386958591147073, "grad_norm": 0.4075613021850586, "learning_rate": 7.299523053783157e-06, "loss": 0.4228, "step": 1735 }, { "epoch": 1.239409804854831, "grad_norm": 0.38738906383514404, "learning_rate": 7.295832266935059e-06, "loss": 0.429, "step": 1736 }, { "epoch": 1.2401237505949547, "grad_norm": 0.34400275349617004, "learning_rate": 7.29213989425825e-06, "loss": 0.3625, "step": 1737 }, { "epoch": 1.2408376963350785, "grad_norm": 0.4018247127532959, "learning_rate": 7.288445938303211e-06, "loss": 0.4189, "step": 1738 }, { "epoch": 1.2415516420752022, "grad_norm": 0.3847181499004364, "learning_rate": 7.2847504016215105e-06, "loss": 0.3959, "step": 1739 }, { "epoch": 1.242265587815326, "grad_norm": 0.3555474579334259, "learning_rate": 7.281053286765816e-06, "loss": 0.3864, "step": 1740 }, { "epoch": 1.2429795335554499, "grad_norm": 0.3967037498950958, "learning_rate": 7.277354596289878e-06, "loss": 0.4172, "step": 1741 }, { "epoch": 1.2436934792955736, "grad_norm": 0.38541585206985474, "learning_rate": 7.273654332748541e-06, "loss": 0.4286, "step": 1742 }, { "epoch": 1.2444074250356973, "grad_norm": 0.42293480038642883, "learning_rate": 7.269952498697734e-06, "loss": 0.4575, "step": 1743 }, { "epoch": 1.245121370775821, "grad_norm": 0.3557571768760681, "learning_rate": 7.266249096694471e-06, "loss": 0.373, "step": 1744 }, { "epoch": 1.2458353165159448, "grad_norm": 0.36732858419418335, "learning_rate": 7.262544129296848e-06, "loss": 0.4083, "step": 1745 }, { "epoch": 1.2465492622560685, "grad_norm": 0.4440584182739258, "learning_rate": 7.258837599064043e-06, "loss": 0.4603, "step": 1746 }, { "epoch": 1.2472632079961923, "grad_norm": 0.3568522036075592, "learning_rate": 7.255129508556312e-06, "loss": 0.3879, "step": 1747 }, { "epoch": 1.247977153736316, "grad_norm": 0.3794586658477783, "learning_rate": 7.251419860334994e-06, "loss": 0.4216, "step": 1748 }, { "epoch": 1.2486910994764397, "grad_norm": 0.37130436301231384, "learning_rate": 7.247708656962498e-06, "loss": 0.3719, "step": 1749 }, { "epoch": 1.2494050452165635, "grad_norm": 0.41925621032714844, "learning_rate": 7.243995901002312e-06, "loss": 0.4196, "step": 1750 }, { "epoch": 1.2501189909566872, "grad_norm": 0.33625364303588867, "learning_rate": 7.240281595018991e-06, "loss": 0.3807, "step": 1751 }, { "epoch": 1.2508329366968112, "grad_norm": 0.3737693727016449, "learning_rate": 7.236565741578163e-06, "loss": 0.4239, "step": 1752 }, { "epoch": 1.251546882436935, "grad_norm": 0.4040364623069763, "learning_rate": 7.23284834324653e-06, "loss": 0.3852, "step": 1753 }, { "epoch": 1.2522608281770586, "grad_norm": 0.3605290353298187, "learning_rate": 7.229129402591852e-06, "loss": 0.387, "step": 1754 }, { "epoch": 1.2529747739171824, "grad_norm": 0.3255966007709503, "learning_rate": 7.225408922182962e-06, "loss": 0.4027, "step": 1755 }, { "epoch": 1.253688719657306, "grad_norm": 0.3581578731536865, "learning_rate": 7.221686904589754e-06, "loss": 0.4138, "step": 1756 }, { "epoch": 1.2544026653974298, "grad_norm": 0.36623701453208923, "learning_rate": 7.217963352383182e-06, "loss": 0.4023, "step": 1757 }, { "epoch": 1.2551166111375536, "grad_norm": 0.32047656178474426, "learning_rate": 7.214238268135258e-06, "loss": 0.3896, "step": 1758 }, { "epoch": 1.2558305568776773, "grad_norm": 0.3591682016849518, "learning_rate": 7.210511654419062e-06, "loss": 0.4037, "step": 1759 }, { "epoch": 1.256544502617801, "grad_norm": 0.34747225046157837, "learning_rate": 7.206783513808721e-06, "loss": 0.3967, "step": 1760 }, { "epoch": 1.2572584483579248, "grad_norm": 0.3492237329483032, "learning_rate": 7.203053848879419e-06, "loss": 0.4369, "step": 1761 }, { "epoch": 1.2579723940980485, "grad_norm": 0.3685934841632843, "learning_rate": 7.199322662207396e-06, "loss": 0.3933, "step": 1762 }, { "epoch": 1.2586863398381722, "grad_norm": 0.4155377745628357, "learning_rate": 7.1955899563699405e-06, "loss": 0.4232, "step": 1763 }, { "epoch": 1.259400285578296, "grad_norm": 0.4029749631881714, "learning_rate": 7.191855733945388e-06, "loss": 0.4368, "step": 1764 }, { "epoch": 1.2601142313184197, "grad_norm": 0.39092421531677246, "learning_rate": 7.188119997513127e-06, "loss": 0.3789, "step": 1765 }, { "epoch": 1.2608281770585434, "grad_norm": 0.4011591076850891, "learning_rate": 7.184382749653589e-06, "loss": 0.3989, "step": 1766 }, { "epoch": 1.2615421227986672, "grad_norm": 0.3564746677875519, "learning_rate": 7.180643992948247e-06, "loss": 0.3948, "step": 1767 }, { "epoch": 1.2622560685387911, "grad_norm": 0.4585202634334564, "learning_rate": 7.176903729979622e-06, "loss": 0.4088, "step": 1768 }, { "epoch": 1.2629700142789149, "grad_norm": 0.37669700384140015, "learning_rate": 7.173161963331271e-06, "loss": 0.3838, "step": 1769 }, { "epoch": 1.2636839600190386, "grad_norm": 0.34163612127304077, "learning_rate": 7.169418695587791e-06, "loss": 0.3769, "step": 1770 }, { "epoch": 1.2643979057591623, "grad_norm": 0.38727229833602905, "learning_rate": 7.165673929334816e-06, "loss": 0.4042, "step": 1771 }, { "epoch": 1.265111851499286, "grad_norm": 0.38921433687210083, "learning_rate": 7.161927667159013e-06, "loss": 0.413, "step": 1772 }, { "epoch": 1.2658257972394098, "grad_norm": 0.3988471031188965, "learning_rate": 7.158179911648087e-06, "loss": 0.4257, "step": 1773 }, { "epoch": 1.2665397429795335, "grad_norm": 0.36354073882102966, "learning_rate": 7.15443066539077e-06, "loss": 0.4261, "step": 1774 }, { "epoch": 1.2672536887196573, "grad_norm": 0.39231181144714355, "learning_rate": 7.150679930976826e-06, "loss": 0.3796, "step": 1775 }, { "epoch": 1.267967634459781, "grad_norm": 0.4442611634731293, "learning_rate": 7.146927710997047e-06, "loss": 0.41, "step": 1776 }, { "epoch": 1.2686815801999047, "grad_norm": 0.37000223994255066, "learning_rate": 7.143174008043248e-06, "loss": 0.4192, "step": 1777 }, { "epoch": 1.2693955259400287, "grad_norm": 0.39494144916534424, "learning_rate": 7.1394188247082715e-06, "loss": 0.3975, "step": 1778 }, { "epoch": 1.2701094716801524, "grad_norm": 0.3675936758518219, "learning_rate": 7.135662163585984e-06, "loss": 0.3994, "step": 1779 }, { "epoch": 1.2708234174202762, "grad_norm": 0.34165385365486145, "learning_rate": 7.1319040272712705e-06, "loss": 0.3977, "step": 1780 }, { "epoch": 1.2715373631604, "grad_norm": 0.3635388910770416, "learning_rate": 7.128144418360033e-06, "loss": 0.4042, "step": 1781 }, { "epoch": 1.2722513089005236, "grad_norm": 0.41613712906837463, "learning_rate": 7.124383339449193e-06, "loss": 0.4481, "step": 1782 }, { "epoch": 1.2729652546406474, "grad_norm": 0.34609201550483704, "learning_rate": 7.120620793136689e-06, "loss": 0.3998, "step": 1783 }, { "epoch": 1.273679200380771, "grad_norm": 0.44059431552886963, "learning_rate": 7.116856782021469e-06, "loss": 0.4402, "step": 1784 }, { "epoch": 1.2743931461208948, "grad_norm": 0.31442174315452576, "learning_rate": 7.113091308703498e-06, "loss": 0.3864, "step": 1785 }, { "epoch": 1.2751070918610186, "grad_norm": 0.34456944465637207, "learning_rate": 7.109324375783746e-06, "loss": 0.3792, "step": 1786 }, { "epoch": 1.2758210376011423, "grad_norm": 0.3726886510848999, "learning_rate": 7.105555985864194e-06, "loss": 0.4207, "step": 1787 }, { "epoch": 1.276534983341266, "grad_norm": 0.3595294654369354, "learning_rate": 7.101786141547829e-06, "loss": 0.3943, "step": 1788 }, { "epoch": 1.2772489290813898, "grad_norm": 0.3850803077220917, "learning_rate": 7.09801484543864e-06, "loss": 0.4952, "step": 1789 }, { "epoch": 1.2779628748215135, "grad_norm": 0.30809205770492554, "learning_rate": 7.094242100141625e-06, "loss": 0.3709, "step": 1790 }, { "epoch": 1.2786768205616372, "grad_norm": 0.4388459026813507, "learning_rate": 7.090467908262777e-06, "loss": 0.4728, "step": 1791 }, { "epoch": 1.279390766301761, "grad_norm": 0.3715416193008423, "learning_rate": 7.08669227240909e-06, "loss": 0.3797, "step": 1792 }, { "epoch": 1.2801047120418847, "grad_norm": 0.3442803919315338, "learning_rate": 7.082915195188558e-06, "loss": 0.4075, "step": 1793 }, { "epoch": 1.2808186577820084, "grad_norm": 0.4321364164352417, "learning_rate": 7.079136679210165e-06, "loss": 0.4859, "step": 1794 }, { "epoch": 1.2815326035221324, "grad_norm": 0.34529802203178406, "learning_rate": 7.075356727083894e-06, "loss": 0.3789, "step": 1795 }, { "epoch": 1.2822465492622561, "grad_norm": 0.35416677594184875, "learning_rate": 7.07157534142072e-06, "loss": 0.3727, "step": 1796 }, { "epoch": 1.2829604950023799, "grad_norm": 0.43859609961509705, "learning_rate": 7.067792524832604e-06, "loss": 0.4709, "step": 1797 }, { "epoch": 1.2836744407425036, "grad_norm": 0.3799041211605072, "learning_rate": 7.064008279932499e-06, "loss": 0.4112, "step": 1798 }, { "epoch": 1.2843883864826273, "grad_norm": 0.390602171421051, "learning_rate": 7.060222609334343e-06, "loss": 0.4157, "step": 1799 }, { "epoch": 1.285102332222751, "grad_norm": 0.35931912064552307, "learning_rate": 7.056435515653059e-06, "loss": 0.3806, "step": 1800 }, { "epoch": 1.2858162779628748, "grad_norm": 0.3904794752597809, "learning_rate": 7.0526470015045536e-06, "loss": 0.3906, "step": 1801 }, { "epoch": 1.2865302237029985, "grad_norm": 0.38831472396850586, "learning_rate": 7.048857069505714e-06, "loss": 0.4259, "step": 1802 }, { "epoch": 1.2872441694431223, "grad_norm": 0.3882738947868347, "learning_rate": 7.045065722274407e-06, "loss": 0.4472, "step": 1803 }, { "epoch": 1.2879581151832462, "grad_norm": 0.38649091124534607, "learning_rate": 7.041272962429478e-06, "loss": 0.4158, "step": 1804 }, { "epoch": 1.28867206092337, "grad_norm": 0.4356970191001892, "learning_rate": 7.037478792590744e-06, "loss": 0.4045, "step": 1805 }, { "epoch": 1.2893860066634937, "grad_norm": 0.3897852301597595, "learning_rate": 7.033683215379002e-06, "loss": 0.3765, "step": 1806 }, { "epoch": 1.2900999524036174, "grad_norm": 0.38683855533599854, "learning_rate": 7.029886233416017e-06, "loss": 0.3645, "step": 1807 }, { "epoch": 1.2908138981437411, "grad_norm": 0.46600252389907837, "learning_rate": 7.026087849324527e-06, "loss": 0.4421, "step": 1808 }, { "epoch": 1.2915278438838649, "grad_norm": 0.42398691177368164, "learning_rate": 7.022288065728233e-06, "loss": 0.4187, "step": 1809 }, { "epoch": 1.2922417896239886, "grad_norm": 0.375620037317276, "learning_rate": 7.0184868852518114e-06, "loss": 0.3639, "step": 1810 }, { "epoch": 1.2929557353641123, "grad_norm": 0.43513545393943787, "learning_rate": 7.014684310520897e-06, "loss": 0.4157, "step": 1811 }, { "epoch": 1.293669681104236, "grad_norm": 0.41871434450149536, "learning_rate": 7.010880344162087e-06, "loss": 0.4003, "step": 1812 }, { "epoch": 1.2943836268443598, "grad_norm": 0.3969043791294098, "learning_rate": 7.007074988802946e-06, "loss": 0.4467, "step": 1813 }, { "epoch": 1.2950975725844835, "grad_norm": 0.3862881660461426, "learning_rate": 7.003268247071994e-06, "loss": 0.3901, "step": 1814 }, { "epoch": 1.2958115183246073, "grad_norm": 0.44554799795150757, "learning_rate": 6.999460121598704e-06, "loss": 0.4611, "step": 1815 }, { "epoch": 1.296525464064731, "grad_norm": 0.3812718093395233, "learning_rate": 6.995650615013516e-06, "loss": 0.3818, "step": 1816 }, { "epoch": 1.2972394098048547, "grad_norm": 0.39190050959587097, "learning_rate": 6.991839729947817e-06, "loss": 0.4431, "step": 1817 }, { "epoch": 1.2979533555449785, "grad_norm": 0.4073682725429535, "learning_rate": 6.988027469033943e-06, "loss": 0.3875, "step": 1818 }, { "epoch": 1.2986673012851022, "grad_norm": 0.3678056597709656, "learning_rate": 6.984213834905186e-06, "loss": 0.3862, "step": 1819 }, { "epoch": 1.299381247025226, "grad_norm": 0.3448074758052826, "learning_rate": 6.980398830195785e-06, "loss": 0.4014, "step": 1820 }, { "epoch": 1.30009519276535, "grad_norm": 0.3786395192146301, "learning_rate": 6.976582457540926e-06, "loss": 0.4005, "step": 1821 }, { "epoch": 1.3008091385054736, "grad_norm": 0.36747899651527405, "learning_rate": 6.972764719576739e-06, "loss": 0.3857, "step": 1822 }, { "epoch": 1.3015230842455974, "grad_norm": 0.3883817195892334, "learning_rate": 6.968945618940299e-06, "loss": 0.4314, "step": 1823 }, { "epoch": 1.302237029985721, "grad_norm": 0.3571496605873108, "learning_rate": 6.965125158269619e-06, "loss": 0.4296, "step": 1824 }, { "epoch": 1.3029509757258448, "grad_norm": 0.37855738401412964, "learning_rate": 6.961303340203653e-06, "loss": 0.3876, "step": 1825 }, { "epoch": 1.3036649214659686, "grad_norm": 0.3811725676059723, "learning_rate": 6.957480167382294e-06, "loss": 0.416, "step": 1826 }, { "epoch": 1.3043788672060923, "grad_norm": 0.35796719789505005, "learning_rate": 6.953655642446368e-06, "loss": 0.4283, "step": 1827 }, { "epoch": 1.305092812946216, "grad_norm": 0.4055135250091553, "learning_rate": 6.94982976803764e-06, "loss": 0.4014, "step": 1828 }, { "epoch": 1.3058067586863398, "grad_norm": 0.39228639006614685, "learning_rate": 6.9460025467988e-06, "loss": 0.4451, "step": 1829 }, { "epoch": 1.3065207044264635, "grad_norm": 0.32330313324928284, "learning_rate": 6.942173981373474e-06, "loss": 0.3691, "step": 1830 }, { "epoch": 1.3072346501665875, "grad_norm": 0.418790340423584, "learning_rate": 6.938344074406214e-06, "loss": 0.4142, "step": 1831 }, { "epoch": 1.3079485959067112, "grad_norm": 0.367402583360672, "learning_rate": 6.934512828542498e-06, "loss": 0.3889, "step": 1832 }, { "epoch": 1.308662541646835, "grad_norm": 0.370441198348999, "learning_rate": 6.930680246428732e-06, "loss": 0.4113, "step": 1833 }, { "epoch": 1.3093764873869587, "grad_norm": 0.41884833574295044, "learning_rate": 6.9268463307122425e-06, "loss": 0.4326, "step": 1834 }, { "epoch": 1.3100904331270824, "grad_norm": 0.3725357949733734, "learning_rate": 6.923011084041276e-06, "loss": 0.4412, "step": 1835 }, { "epoch": 1.3108043788672061, "grad_norm": 0.3965980112552643, "learning_rate": 6.919174509065003e-06, "loss": 0.3895, "step": 1836 }, { "epoch": 1.3115183246073299, "grad_norm": 0.4142155945301056, "learning_rate": 6.915336608433505e-06, "loss": 0.4221, "step": 1837 }, { "epoch": 1.3122322703474536, "grad_norm": 0.3294416666030884, "learning_rate": 6.911497384797785e-06, "loss": 0.3906, "step": 1838 }, { "epoch": 1.3129462160875773, "grad_norm": 0.4166727066040039, "learning_rate": 6.907656840809758e-06, "loss": 0.4199, "step": 1839 }, { "epoch": 1.313660161827701, "grad_norm": 0.4000997543334961, "learning_rate": 6.903814979122249e-06, "loss": 0.4222, "step": 1840 }, { "epoch": 1.3143741075678248, "grad_norm": 0.3874818980693817, "learning_rate": 6.8999718023889965e-06, "loss": 0.4092, "step": 1841 }, { "epoch": 1.3150880533079485, "grad_norm": 0.32716643810272217, "learning_rate": 6.896127313264643e-06, "loss": 0.3629, "step": 1842 }, { "epoch": 1.3158019990480723, "grad_norm": 0.41060400009155273, "learning_rate": 6.8922815144047425e-06, "loss": 0.4472, "step": 1843 }, { "epoch": 1.316515944788196, "grad_norm": 0.41023683547973633, "learning_rate": 6.888434408465751e-06, "loss": 0.4241, "step": 1844 }, { "epoch": 1.3172298905283197, "grad_norm": 0.38187751173973083, "learning_rate": 6.8845859981050265e-06, "loss": 0.4286, "step": 1845 }, { "epoch": 1.3179438362684435, "grad_norm": 0.3486166000366211, "learning_rate": 6.880736285980832e-06, "loss": 0.4078, "step": 1846 }, { "epoch": 1.3186577820085674, "grad_norm": 0.4209003150463104, "learning_rate": 6.876885274752325e-06, "loss": 0.4244, "step": 1847 }, { "epoch": 1.3193717277486912, "grad_norm": 0.382060706615448, "learning_rate": 6.873032967079562e-06, "loss": 0.4522, "step": 1848 }, { "epoch": 1.320085673488815, "grad_norm": 0.39987072348594666, "learning_rate": 6.869179365623494e-06, "loss": 0.4483, "step": 1849 }, { "epoch": 1.3207996192289386, "grad_norm": 0.35256335139274597, "learning_rate": 6.86532447304597e-06, "loss": 0.4058, "step": 1850 }, { "epoch": 1.3215135649690624, "grad_norm": 0.3744581937789917, "learning_rate": 6.8614682920097265e-06, "loss": 0.4128, "step": 1851 }, { "epoch": 1.322227510709186, "grad_norm": 0.36044371128082275, "learning_rate": 6.85761082517839e-06, "loss": 0.4217, "step": 1852 }, { "epoch": 1.3229414564493098, "grad_norm": 0.36062249541282654, "learning_rate": 6.85375207521648e-06, "loss": 0.3987, "step": 1853 }, { "epoch": 1.3236554021894336, "grad_norm": 0.34385916590690613, "learning_rate": 6.8498920447893955e-06, "loss": 0.388, "step": 1854 }, { "epoch": 1.3243693479295573, "grad_norm": 0.35270482301712036, "learning_rate": 6.8460307365634225e-06, "loss": 0.4398, "step": 1855 }, { "epoch": 1.325083293669681, "grad_norm": 0.37861794233322144, "learning_rate": 6.842168153205734e-06, "loss": 0.4172, "step": 1856 }, { "epoch": 1.325797239409805, "grad_norm": 0.37961673736572266, "learning_rate": 6.838304297384377e-06, "loss": 0.4374, "step": 1857 }, { "epoch": 1.3265111851499287, "grad_norm": 0.34836646914482117, "learning_rate": 6.834439171768281e-06, "loss": 0.39, "step": 1858 }, { "epoch": 1.3272251308900525, "grad_norm": 0.40485402941703796, "learning_rate": 6.830572779027255e-06, "loss": 0.4005, "step": 1859 }, { "epoch": 1.3279390766301762, "grad_norm": 0.40433064103126526, "learning_rate": 6.8267051218319766e-06, "loss": 0.4153, "step": 1860 }, { "epoch": 1.3286530223703, "grad_norm": 0.3619265854358673, "learning_rate": 6.8228362028540014e-06, "loss": 0.4239, "step": 1861 }, { "epoch": 1.3293669681104237, "grad_norm": 0.4338117837905884, "learning_rate": 6.818966024765758e-06, "loss": 0.4213, "step": 1862 }, { "epoch": 1.3300809138505474, "grad_norm": 0.4010458290576935, "learning_rate": 6.8150945902405415e-06, "loss": 0.4415, "step": 1863 }, { "epoch": 1.3307948595906711, "grad_norm": 0.3375040590763092, "learning_rate": 6.811221901952514e-06, "loss": 0.3888, "step": 1864 }, { "epoch": 1.3315088053307949, "grad_norm": 0.3807043135166168, "learning_rate": 6.80734796257671e-06, "loss": 0.4173, "step": 1865 }, { "epoch": 1.3322227510709186, "grad_norm": 0.3496513068675995, "learning_rate": 6.8034727747890195e-06, "loss": 0.3743, "step": 1866 }, { "epoch": 1.3329366968110423, "grad_norm": 0.3672002851963043, "learning_rate": 6.799596341266199e-06, "loss": 0.4275, "step": 1867 }, { "epoch": 1.333650642551166, "grad_norm": 0.3625110387802124, "learning_rate": 6.795718664685869e-06, "loss": 0.4364, "step": 1868 }, { "epoch": 1.3343645882912898, "grad_norm": 0.3347302973270416, "learning_rate": 6.7918397477265e-06, "loss": 0.4037, "step": 1869 }, { "epoch": 1.3350785340314135, "grad_norm": 0.39025142788887024, "learning_rate": 6.787959593067431e-06, "loss": 0.4127, "step": 1870 }, { "epoch": 1.3357924797715373, "grad_norm": 0.38774973154067993, "learning_rate": 6.784078203388846e-06, "loss": 0.479, "step": 1871 }, { "epoch": 1.336506425511661, "grad_norm": 0.39791107177734375, "learning_rate": 6.780195581371785e-06, "loss": 0.4576, "step": 1872 }, { "epoch": 1.337220371251785, "grad_norm": 0.3305912911891937, "learning_rate": 6.776311729698139e-06, "loss": 0.3979, "step": 1873 }, { "epoch": 1.3379343169919087, "grad_norm": 0.35750141739845276, "learning_rate": 6.772426651050651e-06, "loss": 0.4139, "step": 1874 }, { "epoch": 1.3386482627320324, "grad_norm": 0.34659498929977417, "learning_rate": 6.768540348112908e-06, "loss": 0.3854, "step": 1875 }, { "epoch": 1.3393622084721561, "grad_norm": 0.398612380027771, "learning_rate": 6.7646528235693445e-06, "loss": 0.4225, "step": 1876 }, { "epoch": 1.3400761542122799, "grad_norm": 0.3540426790714264, "learning_rate": 6.7607640801052395e-06, "loss": 0.3986, "step": 1877 }, { "epoch": 1.3407900999524036, "grad_norm": 0.33179301023483276, "learning_rate": 6.7568741204067145e-06, "loss": 0.3972, "step": 1878 }, { "epoch": 1.3415040456925273, "grad_norm": 0.41746652126312256, "learning_rate": 6.752982947160727e-06, "loss": 0.417, "step": 1879 }, { "epoch": 1.342217991432651, "grad_norm": 0.3637026250362396, "learning_rate": 6.749090563055075e-06, "loss": 0.4144, "step": 1880 }, { "epoch": 1.3429319371727748, "grad_norm": 0.35406988859176636, "learning_rate": 6.745196970778394e-06, "loss": 0.4351, "step": 1881 }, { "epoch": 1.3436458829128985, "grad_norm": 0.37934255599975586, "learning_rate": 6.741302173020155e-06, "loss": 0.3905, "step": 1882 }, { "epoch": 1.3443598286530225, "grad_norm": 0.3637227714061737, "learning_rate": 6.737406172470657e-06, "loss": 0.3812, "step": 1883 }, { "epoch": 1.3450737743931462, "grad_norm": 0.3569844961166382, "learning_rate": 6.733508971821037e-06, "loss": 0.4604, "step": 1884 }, { "epoch": 1.34578772013327, "grad_norm": 0.34184494614601135, "learning_rate": 6.729610573763252e-06, "loss": 0.3964, "step": 1885 }, { "epoch": 1.3465016658733937, "grad_norm": 0.38159283995628357, "learning_rate": 6.7257109809900945e-06, "loss": 0.4291, "step": 1886 }, { "epoch": 1.3472156116135174, "grad_norm": 0.34077188372612, "learning_rate": 6.721810196195176e-06, "loss": 0.4164, "step": 1887 }, { "epoch": 1.3479295573536412, "grad_norm": 0.3269563615322113, "learning_rate": 6.7179082220729355e-06, "loss": 0.3909, "step": 1888 }, { "epoch": 1.348643503093765, "grad_norm": 0.37459084391593933, "learning_rate": 6.714005061318633e-06, "loss": 0.4369, "step": 1889 }, { "epoch": 1.3493574488338886, "grad_norm": 0.36838868260383606, "learning_rate": 6.710100716628345e-06, "loss": 0.4033, "step": 1890 }, { "epoch": 1.3500713945740124, "grad_norm": 0.35317298769950867, "learning_rate": 6.706195190698969e-06, "loss": 0.3994, "step": 1891 }, { "epoch": 1.350785340314136, "grad_norm": 0.3870333135128021, "learning_rate": 6.702288486228215e-06, "loss": 0.4526, "step": 1892 }, { "epoch": 1.3514992860542598, "grad_norm": 0.39125704765319824, "learning_rate": 6.698380605914614e-06, "loss": 0.3971, "step": 1893 }, { "epoch": 1.3522132317943836, "grad_norm": 0.3511878252029419, "learning_rate": 6.694471552457502e-06, "loss": 0.3636, "step": 1894 }, { "epoch": 1.3529271775345073, "grad_norm": 0.3698835074901581, "learning_rate": 6.6905613285570295e-06, "loss": 0.4076, "step": 1895 }, { "epoch": 1.353641123274631, "grad_norm": 0.3352327346801758, "learning_rate": 6.686649936914151e-06, "loss": 0.377, "step": 1896 }, { "epoch": 1.3543550690147548, "grad_norm": 0.36856481432914734, "learning_rate": 6.682737380230633e-06, "loss": 0.4219, "step": 1897 }, { "epoch": 1.3550690147548785, "grad_norm": 0.3999020457267761, "learning_rate": 6.678823661209043e-06, "loss": 0.4242, "step": 1898 }, { "epoch": 1.3557829604950022, "grad_norm": 0.3534005582332611, "learning_rate": 6.6749087825527535e-06, "loss": 0.4113, "step": 1899 }, { "epoch": 1.3564969062351262, "grad_norm": 0.3533548414707184, "learning_rate": 6.6709927469659385e-06, "loss": 0.3628, "step": 1900 }, { "epoch": 1.35721085197525, "grad_norm": 0.36851775646209717, "learning_rate": 6.667075557153568e-06, "loss": 0.3888, "step": 1901 }, { "epoch": 1.3579247977153737, "grad_norm": 0.3680386543273926, "learning_rate": 6.6631572158214105e-06, "loss": 0.4185, "step": 1902 }, { "epoch": 1.3586387434554974, "grad_norm": 0.35744547843933105, "learning_rate": 6.659237725676033e-06, "loss": 0.4146, "step": 1903 }, { "epoch": 1.3593526891956211, "grad_norm": 0.37310701608657837, "learning_rate": 6.655317089424791e-06, "loss": 0.3977, "step": 1904 }, { "epoch": 1.3600666349357449, "grad_norm": 0.37334948778152466, "learning_rate": 6.651395309775837e-06, "loss": 0.4268, "step": 1905 }, { "epoch": 1.3607805806758686, "grad_norm": 0.35113611817359924, "learning_rate": 6.6474723894381075e-06, "loss": 0.3911, "step": 1906 }, { "epoch": 1.3614945264159923, "grad_norm": 0.3269369304180145, "learning_rate": 6.6435483311213325e-06, "loss": 0.3755, "step": 1907 }, { "epoch": 1.362208472156116, "grad_norm": 0.37147027254104614, "learning_rate": 6.639623137536023e-06, "loss": 0.4296, "step": 1908 }, { "epoch": 1.36292241789624, "grad_norm": 0.32802414894104004, "learning_rate": 6.635696811393478e-06, "loss": 0.4028, "step": 1909 }, { "epoch": 1.3636363636363638, "grad_norm": 0.38549864292144775, "learning_rate": 6.631769355405779e-06, "loss": 0.4164, "step": 1910 }, { "epoch": 1.3643503093764875, "grad_norm": 0.3523275554180145, "learning_rate": 6.627840772285784e-06, "loss": 0.3696, "step": 1911 }, { "epoch": 1.3650642551166112, "grad_norm": 0.31300631165504456, "learning_rate": 6.623911064747133e-06, "loss": 0.3589, "step": 1912 }, { "epoch": 1.365778200856735, "grad_norm": 0.41396498680114746, "learning_rate": 6.619980235504242e-06, "loss": 0.418, "step": 1913 }, { "epoch": 1.3664921465968587, "grad_norm": 0.36761870980262756, "learning_rate": 6.616048287272301e-06, "loss": 0.4356, "step": 1914 }, { "epoch": 1.3672060923369824, "grad_norm": 0.38788318634033203, "learning_rate": 6.612115222767272e-06, "loss": 0.4018, "step": 1915 }, { "epoch": 1.3679200380771062, "grad_norm": 0.32709187269210815, "learning_rate": 6.608181044705893e-06, "loss": 0.3927, "step": 1916 }, { "epoch": 1.36863398381723, "grad_norm": 0.37265634536743164, "learning_rate": 6.604245755805665e-06, "loss": 0.4141, "step": 1917 }, { "epoch": 1.3693479295573536, "grad_norm": 0.37378859519958496, "learning_rate": 6.600309358784858e-06, "loss": 0.4202, "step": 1918 }, { "epoch": 1.3700618752974774, "grad_norm": 0.362530916929245, "learning_rate": 6.596371856362511e-06, "loss": 0.4054, "step": 1919 }, { "epoch": 1.370775821037601, "grad_norm": 0.35513415932655334, "learning_rate": 6.592433251258423e-06, "loss": 0.3938, "step": 1920 }, { "epoch": 1.3714897667777248, "grad_norm": 0.35260745882987976, "learning_rate": 6.588493546193155e-06, "loss": 0.4057, "step": 1921 }, { "epoch": 1.3722037125178486, "grad_norm": 0.36095914244651794, "learning_rate": 6.584552743888028e-06, "loss": 0.4405, "step": 1922 }, { "epoch": 1.3729176582579723, "grad_norm": 0.3495786190032959, "learning_rate": 6.5806108470651235e-06, "loss": 0.3555, "step": 1923 }, { "epoch": 1.373631603998096, "grad_norm": 0.39985379576683044, "learning_rate": 6.576667858447272e-06, "loss": 0.4321, "step": 1924 }, { "epoch": 1.3743455497382198, "grad_norm": 0.36370280385017395, "learning_rate": 6.572723780758069e-06, "loss": 0.4091, "step": 1925 }, { "epoch": 1.3750594954783437, "grad_norm": 0.37978029251098633, "learning_rate": 6.568778616721853e-06, "loss": 0.436, "step": 1926 }, { "epoch": 1.3757734412184675, "grad_norm": 0.3587864339351654, "learning_rate": 6.564832369063716e-06, "loss": 0.3799, "step": 1927 }, { "epoch": 1.3764873869585912, "grad_norm": 0.36326754093170166, "learning_rate": 6.560885040509499e-06, "loss": 0.4655, "step": 1928 }, { "epoch": 1.377201332698715, "grad_norm": 0.34549418091773987, "learning_rate": 6.556936633785788e-06, "loss": 0.4033, "step": 1929 }, { "epoch": 1.3779152784388387, "grad_norm": 0.3384488523006439, "learning_rate": 6.552987151619919e-06, "loss": 0.4083, "step": 1930 }, { "epoch": 1.3786292241789624, "grad_norm": 0.3659460246562958, "learning_rate": 6.549036596739964e-06, "loss": 0.3736, "step": 1931 }, { "epoch": 1.3793431699190861, "grad_norm": 0.3481779098510742, "learning_rate": 6.545084971874738e-06, "loss": 0.4375, "step": 1932 }, { "epoch": 1.3800571156592099, "grad_norm": 0.324264258146286, "learning_rate": 6.5411322797537965e-06, "loss": 0.4002, "step": 1933 }, { "epoch": 1.3807710613993336, "grad_norm": 0.35723620653152466, "learning_rate": 6.5371785231074326e-06, "loss": 0.4417, "step": 1934 }, { "epoch": 1.3814850071394573, "grad_norm": 0.3799307644367218, "learning_rate": 6.5332237046666725e-06, "loss": 0.3957, "step": 1935 }, { "epoch": 1.3821989528795813, "grad_norm": 0.31929489970207214, "learning_rate": 6.529267827163277e-06, "loss": 0.3888, "step": 1936 }, { "epoch": 1.382912898619705, "grad_norm": 0.3532329797744751, "learning_rate": 6.52531089332974e-06, "loss": 0.4021, "step": 1937 }, { "epoch": 1.3836268443598287, "grad_norm": 0.38118523359298706, "learning_rate": 6.521352905899283e-06, "loss": 0.3974, "step": 1938 }, { "epoch": 1.3843407900999525, "grad_norm": 0.3301306664943695, "learning_rate": 6.517393867605854e-06, "loss": 0.3749, "step": 1939 }, { "epoch": 1.3850547358400762, "grad_norm": 0.3724460303783417, "learning_rate": 6.513433781184131e-06, "loss": 0.4086, "step": 1940 }, { "epoch": 1.3857686815802, "grad_norm": 0.3756787180900574, "learning_rate": 6.509472649369511e-06, "loss": 0.419, "step": 1941 }, { "epoch": 1.3864826273203237, "grad_norm": 0.3553272783756256, "learning_rate": 6.505510474898118e-06, "loss": 0.4407, "step": 1942 }, { "epoch": 1.3871965730604474, "grad_norm": 0.3663780391216278, "learning_rate": 6.501547260506793e-06, "loss": 0.4247, "step": 1943 }, { "epoch": 1.3879105188005711, "grad_norm": 0.3457871973514557, "learning_rate": 6.497583008933097e-06, "loss": 0.4078, "step": 1944 }, { "epoch": 1.3886244645406949, "grad_norm": 0.3948150873184204, "learning_rate": 6.493617722915306e-06, "loss": 0.4033, "step": 1945 }, { "epoch": 1.3893384102808186, "grad_norm": 0.3669643998146057, "learning_rate": 6.48965140519241e-06, "loss": 0.4173, "step": 1946 }, { "epoch": 1.3900523560209423, "grad_norm": 0.3367150127887726, "learning_rate": 6.485684058504116e-06, "loss": 0.3962, "step": 1947 }, { "epoch": 1.390766301761066, "grad_norm": 0.38365331292152405, "learning_rate": 6.481715685590836e-06, "loss": 0.419, "step": 1948 }, { "epoch": 1.3914802475011898, "grad_norm": 0.35009509325027466, "learning_rate": 6.477746289193695e-06, "loss": 0.4091, "step": 1949 }, { "epoch": 1.3921941932413135, "grad_norm": 0.3549378514289856, "learning_rate": 6.473775872054522e-06, "loss": 0.4062, "step": 1950 }, { "epoch": 1.3929081389814373, "grad_norm": 0.32682183384895325, "learning_rate": 6.469804436915854e-06, "loss": 0.4202, "step": 1951 }, { "epoch": 1.3936220847215612, "grad_norm": 0.3582307696342468, "learning_rate": 6.4658319865209276e-06, "loss": 0.425, "step": 1952 }, { "epoch": 1.394336030461685, "grad_norm": 0.35364171862602234, "learning_rate": 6.461858523613684e-06, "loss": 0.4005, "step": 1953 }, { "epoch": 1.3950499762018087, "grad_norm": 0.36491861939430237, "learning_rate": 6.457884050938763e-06, "loss": 0.4532, "step": 1954 }, { "epoch": 1.3957639219419324, "grad_norm": 0.31203487515449524, "learning_rate": 6.453908571241501e-06, "loss": 0.3569, "step": 1955 }, { "epoch": 1.3964778676820562, "grad_norm": 0.3858880400657654, "learning_rate": 6.449932087267932e-06, "loss": 0.4331, "step": 1956 }, { "epoch": 1.39719181342218, "grad_norm": 0.31933534145355225, "learning_rate": 6.445954601764779e-06, "loss": 0.3892, "step": 1957 }, { "epoch": 1.3979057591623036, "grad_norm": 0.368465781211853, "learning_rate": 6.4419761174794604e-06, "loss": 0.4338, "step": 1958 }, { "epoch": 1.3986197049024274, "grad_norm": 0.3526532053947449, "learning_rate": 6.437996637160086e-06, "loss": 0.4074, "step": 1959 }, { "epoch": 1.399333650642551, "grad_norm": 0.3498420715332031, "learning_rate": 6.434016163555452e-06, "loss": 0.3926, "step": 1960 }, { "epoch": 1.4000475963826748, "grad_norm": 0.33886945247650146, "learning_rate": 6.430034699415038e-06, "loss": 0.4197, "step": 1961 }, { "epoch": 1.4007615421227988, "grad_norm": 0.30931320786476135, "learning_rate": 6.426052247489012e-06, "loss": 0.3768, "step": 1962 }, { "epoch": 1.4014754878629225, "grad_norm": 0.3435583710670471, "learning_rate": 6.42206881052822e-06, "loss": 0.4074, "step": 1963 }, { "epoch": 1.4021894336030463, "grad_norm": 0.33796557784080505, "learning_rate": 6.4180843912841925e-06, "loss": 0.3813, "step": 1964 }, { "epoch": 1.40290337934317, "grad_norm": 0.36256465315818787, "learning_rate": 6.414098992509138e-06, "loss": 0.4599, "step": 1965 }, { "epoch": 1.4036173250832937, "grad_norm": 0.38860318064689636, "learning_rate": 6.410112616955938e-06, "loss": 0.3866, "step": 1966 }, { "epoch": 1.4043312708234175, "grad_norm": 0.3422498106956482, "learning_rate": 6.4061252673781534e-06, "loss": 0.3936, "step": 1967 }, { "epoch": 1.4050452165635412, "grad_norm": 0.3462320864200592, "learning_rate": 6.402136946530014e-06, "loss": 0.3962, "step": 1968 }, { "epoch": 1.405759162303665, "grad_norm": 0.434007465839386, "learning_rate": 6.3981476571664235e-06, "loss": 0.4577, "step": 1969 }, { "epoch": 1.4064731080437887, "grad_norm": 0.3438386619091034, "learning_rate": 6.394157402042952e-06, "loss": 0.3755, "step": 1970 }, { "epoch": 1.4071870537839124, "grad_norm": 0.3627864122390747, "learning_rate": 6.390166183915839e-06, "loss": 0.4237, "step": 1971 }, { "epoch": 1.4079009995240361, "grad_norm": 0.37217527627944946, "learning_rate": 6.386174005541986e-06, "loss": 0.3969, "step": 1972 }, { "epoch": 1.4086149452641599, "grad_norm": 0.329129695892334, "learning_rate": 6.3821808696789626e-06, "loss": 0.3907, "step": 1973 }, { "epoch": 1.4093288910042836, "grad_norm": 0.37056654691696167, "learning_rate": 6.378186779084996e-06, "loss": 0.441, "step": 1974 }, { "epoch": 1.4100428367444073, "grad_norm": 0.35464170575141907, "learning_rate": 6.374191736518974e-06, "loss": 0.4142, "step": 1975 }, { "epoch": 1.410756782484531, "grad_norm": 0.33439695835113525, "learning_rate": 6.3701957447404426e-06, "loss": 0.3855, "step": 1976 }, { "epoch": 1.4114707282246548, "grad_norm": 0.341579794883728, "learning_rate": 6.3661988065096015e-06, "loss": 0.4004, "step": 1977 }, { "epoch": 1.4121846739647788, "grad_norm": 0.3402107357978821, "learning_rate": 6.362200924587305e-06, "loss": 0.4072, "step": 1978 }, { "epoch": 1.4128986197049025, "grad_norm": 0.3357406556606293, "learning_rate": 6.3582021017350605e-06, "loss": 0.3911, "step": 1979 }, { "epoch": 1.4136125654450262, "grad_norm": 0.3719256818294525, "learning_rate": 6.354202340715027e-06, "loss": 0.417, "step": 1980 }, { "epoch": 1.41432651118515, "grad_norm": 0.3486076295375824, "learning_rate": 6.350201644290005e-06, "loss": 0.4172, "step": 1981 }, { "epoch": 1.4150404569252737, "grad_norm": 0.36275514960289, "learning_rate": 6.346200015223447e-06, "loss": 0.4288, "step": 1982 }, { "epoch": 1.4157544026653974, "grad_norm": 0.3578275442123413, "learning_rate": 6.342197456279449e-06, "loss": 0.396, "step": 1983 }, { "epoch": 1.4164683484055212, "grad_norm": 0.3395306169986725, "learning_rate": 6.338193970222745e-06, "loss": 0.4183, "step": 1984 }, { "epoch": 1.417182294145645, "grad_norm": 0.37304335832595825, "learning_rate": 6.334189559818715e-06, "loss": 0.4305, "step": 1985 }, { "epoch": 1.4178962398857686, "grad_norm": 0.3537655174732208, "learning_rate": 6.330184227833376e-06, "loss": 0.3784, "step": 1986 }, { "epoch": 1.4186101856258924, "grad_norm": 0.35809946060180664, "learning_rate": 6.32617797703338e-06, "loss": 0.4321, "step": 1987 }, { "epoch": 1.4193241313660163, "grad_norm": 0.34437334537506104, "learning_rate": 6.322170810186013e-06, "loss": 0.3814, "step": 1988 }, { "epoch": 1.42003807710614, "grad_norm": 0.3429303467273712, "learning_rate": 6.318162730059194e-06, "loss": 0.4025, "step": 1989 }, { "epoch": 1.4207520228462638, "grad_norm": 0.35968297719955444, "learning_rate": 6.314153739421477e-06, "loss": 0.4264, "step": 1990 }, { "epoch": 1.4214659685863875, "grad_norm": 0.3684230148792267, "learning_rate": 6.310143841042041e-06, "loss": 0.4041, "step": 1991 }, { "epoch": 1.4221799143265113, "grad_norm": 0.3452284336090088, "learning_rate": 6.306133037690693e-06, "loss": 0.3903, "step": 1992 }, { "epoch": 1.422893860066635, "grad_norm": 0.329992413520813, "learning_rate": 6.302121332137864e-06, "loss": 0.3923, "step": 1993 }, { "epoch": 1.4236078058067587, "grad_norm": 0.31407803297042847, "learning_rate": 6.298108727154608e-06, "loss": 0.3883, "step": 1994 }, { "epoch": 1.4243217515468825, "grad_norm": 0.3176577389240265, "learning_rate": 6.294095225512604e-06, "loss": 0.4062, "step": 1995 }, { "epoch": 1.4250356972870062, "grad_norm": 0.31628450751304626, "learning_rate": 6.290080829984147e-06, "loss": 0.3775, "step": 1996 }, { "epoch": 1.42574964302713, "grad_norm": 0.32859933376312256, "learning_rate": 6.28606554334215e-06, "loss": 0.3825, "step": 1997 }, { "epoch": 1.4264635887672537, "grad_norm": 0.3605019748210907, "learning_rate": 6.282049368360143e-06, "loss": 0.4308, "step": 1998 }, { "epoch": 1.4271775345073774, "grad_norm": 0.3657609820365906, "learning_rate": 6.278032307812266e-06, "loss": 0.4358, "step": 1999 }, { "epoch": 1.4278914802475011, "grad_norm": 0.3083842694759369, "learning_rate": 6.274014364473274e-06, "loss": 0.3967, "step": 2000 }, { "epoch": 1.4286054259876249, "grad_norm": 0.33848610520362854, "learning_rate": 6.269995541118531e-06, "loss": 0.4061, "step": 2001 }, { "epoch": 1.4293193717277486, "grad_norm": 0.35883381962776184, "learning_rate": 6.26597584052401e-06, "loss": 0.4239, "step": 2002 }, { "epoch": 1.4300333174678723, "grad_norm": 0.3638203740119934, "learning_rate": 6.261955265466286e-06, "loss": 0.4327, "step": 2003 }, { "epoch": 1.430747263207996, "grad_norm": 0.3389233350753784, "learning_rate": 6.257933818722544e-06, "loss": 0.3789, "step": 2004 }, { "epoch": 1.43146120894812, "grad_norm": 0.38248327374458313, "learning_rate": 6.253911503070564e-06, "loss": 0.4163, "step": 2005 }, { "epoch": 1.4321751546882437, "grad_norm": 0.3613067865371704, "learning_rate": 6.249888321288733e-06, "loss": 0.363, "step": 2006 }, { "epoch": 1.4328891004283675, "grad_norm": 0.3565087616443634, "learning_rate": 6.245864276156033e-06, "loss": 0.4046, "step": 2007 }, { "epoch": 1.4336030461684912, "grad_norm": 0.35029342770576477, "learning_rate": 6.241839370452041e-06, "loss": 0.4206, "step": 2008 }, { "epoch": 1.434316991908615, "grad_norm": 0.35269707441329956, "learning_rate": 6.237813606956931e-06, "loss": 0.4292, "step": 2009 }, { "epoch": 1.4350309376487387, "grad_norm": 0.3705071806907654, "learning_rate": 6.233786988451468e-06, "loss": 0.4005, "step": 2010 }, { "epoch": 1.4357448833888624, "grad_norm": 0.3276744484901428, "learning_rate": 6.229759517717011e-06, "loss": 0.4194, "step": 2011 }, { "epoch": 1.4364588291289861, "grad_norm": 0.3599443733692169, "learning_rate": 6.225731197535501e-06, "loss": 0.3649, "step": 2012 }, { "epoch": 1.4371727748691099, "grad_norm": 0.37531203031539917, "learning_rate": 6.2217020306894705e-06, "loss": 0.4516, "step": 2013 }, { "epoch": 1.4378867206092336, "grad_norm": 0.3284858465194702, "learning_rate": 6.217672019962038e-06, "loss": 0.3821, "step": 2014 }, { "epoch": 1.4386006663493576, "grad_norm": 0.37694182991981506, "learning_rate": 6.2136411681369e-06, "loss": 0.4171, "step": 2015 }, { "epoch": 1.4393146120894813, "grad_norm": 0.38982346653938293, "learning_rate": 6.209609477998339e-06, "loss": 0.4165, "step": 2016 }, { "epoch": 1.440028557829605, "grad_norm": 0.3539135754108429, "learning_rate": 6.205576952331215e-06, "loss": 0.4204, "step": 2017 }, { "epoch": 1.4407425035697288, "grad_norm": 0.392172634601593, "learning_rate": 6.20154359392096e-06, "loss": 0.4484, "step": 2018 }, { "epoch": 1.4414564493098525, "grad_norm": 0.35314634442329407, "learning_rate": 6.19750940555359e-06, "loss": 0.4096, "step": 2019 }, { "epoch": 1.4421703950499762, "grad_norm": 0.3628053069114685, "learning_rate": 6.19347439001569e-06, "loss": 0.3999, "step": 2020 }, { "epoch": 1.4428843407901, "grad_norm": 0.34841248393058777, "learning_rate": 6.1894385500944135e-06, "loss": 0.4079, "step": 2021 }, { "epoch": 1.4435982865302237, "grad_norm": 0.3513837158679962, "learning_rate": 6.185401888577488e-06, "loss": 0.3663, "step": 2022 }, { "epoch": 1.4443122322703474, "grad_norm": 0.383858859539032, "learning_rate": 6.181364408253209e-06, "loss": 0.4554, "step": 2023 }, { "epoch": 1.4450261780104712, "grad_norm": 0.376427561044693, "learning_rate": 6.177326111910429e-06, "loss": 0.4134, "step": 2024 }, { "epoch": 1.445740123750595, "grad_norm": 0.3537364900112152, "learning_rate": 6.173287002338577e-06, "loss": 0.3653, "step": 2025 }, { "epoch": 1.4464540694907186, "grad_norm": 0.39380761981010437, "learning_rate": 6.169247082327634e-06, "loss": 0.4121, "step": 2026 }, { "epoch": 1.4471680152308424, "grad_norm": 0.3496025800704956, "learning_rate": 6.165206354668145e-06, "loss": 0.4071, "step": 2027 }, { "epoch": 1.447881960970966, "grad_norm": 0.3332447111606598, "learning_rate": 6.161164822151213e-06, "loss": 0.3841, "step": 2028 }, { "epoch": 1.4485959067110898, "grad_norm": 0.4223122000694275, "learning_rate": 6.1571224875684945e-06, "loss": 0.4572, "step": 2029 }, { "epoch": 1.4493098524512136, "grad_norm": 0.3698405623435974, "learning_rate": 6.153079353712201e-06, "loss": 0.3763, "step": 2030 }, { "epoch": 1.4500237981913375, "grad_norm": 0.39172181487083435, "learning_rate": 6.1490354233750986e-06, "loss": 0.4037, "step": 2031 }, { "epoch": 1.4507377439314613, "grad_norm": 0.3611370921134949, "learning_rate": 6.144990699350498e-06, "loss": 0.4131, "step": 2032 }, { "epoch": 1.451451689671585, "grad_norm": 0.34716346859931946, "learning_rate": 6.140945184432265e-06, "loss": 0.4161, "step": 2033 }, { "epoch": 1.4521656354117087, "grad_norm": 0.3890252411365509, "learning_rate": 6.136898881414807e-06, "loss": 0.4154, "step": 2034 }, { "epoch": 1.4528795811518325, "grad_norm": 0.3421449661254883, "learning_rate": 6.132851793093079e-06, "loss": 0.3955, "step": 2035 }, { "epoch": 1.4535935268919562, "grad_norm": 0.3730386793613434, "learning_rate": 6.128803922262573e-06, "loss": 0.426, "step": 2036 }, { "epoch": 1.45430747263208, "grad_norm": 0.4055565595626831, "learning_rate": 6.124755271719326e-06, "loss": 0.4092, "step": 2037 }, { "epoch": 1.4550214183722037, "grad_norm": 0.3415382504463196, "learning_rate": 6.120705844259913e-06, "loss": 0.4003, "step": 2038 }, { "epoch": 1.4557353641123274, "grad_norm": 0.3615740239620209, "learning_rate": 6.116655642681447e-06, "loss": 0.411, "step": 2039 }, { "epoch": 1.4564493098524511, "grad_norm": 0.34904801845550537, "learning_rate": 6.112604669781572e-06, "loss": 0.4223, "step": 2040 }, { "epoch": 1.457163255592575, "grad_norm": 0.34463950991630554, "learning_rate": 6.108552928358469e-06, "loss": 0.3621, "step": 2041 }, { "epoch": 1.4578772013326988, "grad_norm": 0.3858923017978668, "learning_rate": 6.104500421210845e-06, "loss": 0.4358, "step": 2042 }, { "epoch": 1.4585911470728226, "grad_norm": 0.3242444694042206, "learning_rate": 6.100447151137939e-06, "loss": 0.3664, "step": 2043 }, { "epoch": 1.4593050928129463, "grad_norm": 0.36653679609298706, "learning_rate": 6.0963931209395165e-06, "loss": 0.4315, "step": 2044 }, { "epoch": 1.46001903855307, "grad_norm": 0.3501462936401367, "learning_rate": 6.0923383334158704e-06, "loss": 0.353, "step": 2045 }, { "epoch": 1.4607329842931938, "grad_norm": 0.3852728605270386, "learning_rate": 6.088282791367812e-06, "loss": 0.4044, "step": 2046 }, { "epoch": 1.4614469300333175, "grad_norm": 0.31130918860435486, "learning_rate": 6.084226497596677e-06, "loss": 0.4097, "step": 2047 }, { "epoch": 1.4621608757734412, "grad_norm": 0.36546358466148376, "learning_rate": 6.08016945490432e-06, "loss": 0.4375, "step": 2048 }, { "epoch": 1.462874821513565, "grad_norm": 0.33770808577537537, "learning_rate": 6.076111666093111e-06, "loss": 0.3902, "step": 2049 }, { "epoch": 1.4635887672536887, "grad_norm": 0.35034501552581787, "learning_rate": 6.0720531339659386e-06, "loss": 0.4449, "step": 2050 }, { "epoch": 1.4643027129938124, "grad_norm": 0.33929842710494995, "learning_rate": 6.0679938613262015e-06, "loss": 0.4018, "step": 2051 }, { "epoch": 1.4650166587339362, "grad_norm": 0.3582986891269684, "learning_rate": 6.063933850977811e-06, "loss": 0.4169, "step": 2052 }, { "epoch": 1.46573060447406, "grad_norm": 0.35435494780540466, "learning_rate": 6.059873105725191e-06, "loss": 0.389, "step": 2053 }, { "epoch": 1.4664445502141836, "grad_norm": 0.312693327665329, "learning_rate": 6.0558116283732696e-06, "loss": 0.3455, "step": 2054 }, { "epoch": 1.4671584959543074, "grad_norm": 0.38524195551872253, "learning_rate": 6.05174942172748e-06, "loss": 0.4144, "step": 2055 }, { "epoch": 1.467872441694431, "grad_norm": 0.3746894896030426, "learning_rate": 6.047686488593761e-06, "loss": 0.3868, "step": 2056 }, { "epoch": 1.468586387434555, "grad_norm": 0.3590252995491028, "learning_rate": 6.043622831778554e-06, "loss": 0.3988, "step": 2057 }, { "epoch": 1.4693003331746788, "grad_norm": 0.40093719959259033, "learning_rate": 6.039558454088796e-06, "loss": 0.4428, "step": 2058 }, { "epoch": 1.4700142789148025, "grad_norm": 0.33961015939712524, "learning_rate": 6.035493358331932e-06, "loss": 0.3674, "step": 2059 }, { "epoch": 1.4707282246549263, "grad_norm": 0.39494115114212036, "learning_rate": 6.031427547315889e-06, "loss": 0.4098, "step": 2060 }, { "epoch": 1.47144217039505, "grad_norm": 0.3565913438796997, "learning_rate": 6.027361023849096e-06, "loss": 0.4026, "step": 2061 }, { "epoch": 1.4721561161351737, "grad_norm": 0.38847047090530396, "learning_rate": 6.023293790740476e-06, "loss": 0.4184, "step": 2062 }, { "epoch": 1.4728700618752975, "grad_norm": 0.3412530720233917, "learning_rate": 6.019225850799439e-06, "loss": 0.4128, "step": 2063 }, { "epoch": 1.4735840076154212, "grad_norm": 0.37124529480934143, "learning_rate": 6.015157206835881e-06, "loss": 0.4295, "step": 2064 }, { "epoch": 1.474297953355545, "grad_norm": 0.38594958186149597, "learning_rate": 6.011087861660191e-06, "loss": 0.4039, "step": 2065 }, { "epoch": 1.4750118990956687, "grad_norm": 0.3629513680934906, "learning_rate": 6.007017818083234e-06, "loss": 0.4016, "step": 2066 }, { "epoch": 1.4757258448357926, "grad_norm": 0.357436865568161, "learning_rate": 6.002947078916365e-06, "loss": 0.4058, "step": 2067 }, { "epoch": 1.4764397905759163, "grad_norm": 0.35907819867134094, "learning_rate": 5.998875646971414e-06, "loss": 0.4106, "step": 2068 }, { "epoch": 1.47715373631604, "grad_norm": 0.374222993850708, "learning_rate": 5.994803525060691e-06, "loss": 0.3928, "step": 2069 }, { "epoch": 1.4778676820561638, "grad_norm": 0.3433668613433838, "learning_rate": 5.990730715996989e-06, "loss": 0.4474, "step": 2070 }, { "epoch": 1.4785816277962875, "grad_norm": 0.3939042091369629, "learning_rate": 5.986657222593562e-06, "loss": 0.414, "step": 2071 }, { "epoch": 1.4792955735364113, "grad_norm": 0.36146366596221924, "learning_rate": 5.982583047664151e-06, "loss": 0.4043, "step": 2072 }, { "epoch": 1.480009519276535, "grad_norm": 0.3630291521549225, "learning_rate": 5.978508194022958e-06, "loss": 0.3951, "step": 2073 }, { "epoch": 1.4807234650166587, "grad_norm": 0.3917105793952942, "learning_rate": 5.9744326644846585e-06, "loss": 0.4438, "step": 2074 }, { "epoch": 1.4814374107567825, "grad_norm": 0.3557332456111908, "learning_rate": 5.970356461864392e-06, "loss": 0.4064, "step": 2075 }, { "epoch": 1.4821513564969062, "grad_norm": 0.33721092343330383, "learning_rate": 5.9662795889777666e-06, "loss": 0.4022, "step": 2076 }, { "epoch": 1.48286530223703, "grad_norm": 0.416286438703537, "learning_rate": 5.962202048640851e-06, "loss": 0.4324, "step": 2077 }, { "epoch": 1.4835792479771537, "grad_norm": 0.33623647689819336, "learning_rate": 5.958123843670174e-06, "loss": 0.3996, "step": 2078 }, { "epoch": 1.4842931937172774, "grad_norm": 0.34796154499053955, "learning_rate": 5.954044976882725e-06, "loss": 0.3756, "step": 2079 }, { "epoch": 1.4850071394574011, "grad_norm": 0.3673635721206665, "learning_rate": 5.949965451095952e-06, "loss": 0.3967, "step": 2080 }, { "epoch": 1.4857210851975249, "grad_norm": 0.32628753781318665, "learning_rate": 5.945885269127753e-06, "loss": 0.4211, "step": 2081 }, { "epoch": 1.4864350309376486, "grad_norm": 0.3244740962982178, "learning_rate": 5.941804433796485e-06, "loss": 0.393, "step": 2082 }, { "epoch": 1.4871489766777726, "grad_norm": 0.3392585813999176, "learning_rate": 5.9377229479209555e-06, "loss": 0.3794, "step": 2083 }, { "epoch": 1.4878629224178963, "grad_norm": 0.3746250867843628, "learning_rate": 5.933640814320417e-06, "loss": 0.4105, "step": 2084 }, { "epoch": 1.48857686815802, "grad_norm": 0.3503364622592926, "learning_rate": 5.929558035814574e-06, "loss": 0.4439, "step": 2085 }, { "epoch": 1.4892908138981438, "grad_norm": 0.3200962245464325, "learning_rate": 5.925474615223573e-06, "loss": 0.3888, "step": 2086 }, { "epoch": 1.4900047596382675, "grad_norm": 0.36670830845832825, "learning_rate": 5.921390555368008e-06, "loss": 0.4194, "step": 2087 }, { "epoch": 1.4907187053783912, "grad_norm": 0.34690016508102417, "learning_rate": 5.917305859068912e-06, "loss": 0.3743, "step": 2088 }, { "epoch": 1.491432651118515, "grad_norm": 0.4056900441646576, "learning_rate": 5.913220529147757e-06, "loss": 0.4462, "step": 2089 }, { "epoch": 1.4921465968586387, "grad_norm": 0.33290964365005493, "learning_rate": 5.909134568426455e-06, "loss": 0.3827, "step": 2090 }, { "epoch": 1.4928605425987624, "grad_norm": 0.3391304910182953, "learning_rate": 5.90504797972735e-06, "loss": 0.3972, "step": 2091 }, { "epoch": 1.4935744883388862, "grad_norm": 0.3788953423500061, "learning_rate": 5.900960765873223e-06, "loss": 0.3981, "step": 2092 }, { "epoch": 1.4942884340790101, "grad_norm": 0.34349584579467773, "learning_rate": 5.896872929687287e-06, "loss": 0.3786, "step": 2093 }, { "epoch": 1.4950023798191339, "grad_norm": 0.3473876118659973, "learning_rate": 5.892784473993184e-06, "loss": 0.394, "step": 2094 }, { "epoch": 1.4957163255592576, "grad_norm": 0.4311414957046509, "learning_rate": 5.888695401614982e-06, "loss": 0.4298, "step": 2095 }, { "epoch": 1.4964302712993813, "grad_norm": 0.3799305558204651, "learning_rate": 5.884605715377179e-06, "loss": 0.4582, "step": 2096 }, { "epoch": 1.497144217039505, "grad_norm": 0.365782231092453, "learning_rate": 5.880515418104692e-06, "loss": 0.3873, "step": 2097 }, { "epoch": 1.4978581627796288, "grad_norm": 0.38105812668800354, "learning_rate": 5.876424512622863e-06, "loss": 0.3901, "step": 2098 }, { "epoch": 1.4985721085197525, "grad_norm": 0.3469472825527191, "learning_rate": 5.872333001757458e-06, "loss": 0.4039, "step": 2099 }, { "epoch": 1.4992860542598763, "grad_norm": 0.3738333284854889, "learning_rate": 5.8682408883346535e-06, "loss": 0.3851, "step": 2100 }, { "epoch": 1.5, "grad_norm": 0.3460516929626465, "learning_rate": 5.864148175181045e-06, "loss": 0.3948, "step": 2101 }, { "epoch": 1.5007139457401237, "grad_norm": 0.3423824906349182, "learning_rate": 5.860054865123646e-06, "loss": 0.4013, "step": 2102 }, { "epoch": 1.5014278914802475, "grad_norm": 0.3496379554271698, "learning_rate": 5.855960960989877e-06, "loss": 0.3845, "step": 2103 }, { "epoch": 1.5021418372203712, "grad_norm": 0.36114436388015747, "learning_rate": 5.8518664656075704e-06, "loss": 0.4011, "step": 2104 }, { "epoch": 1.502855782960495, "grad_norm": 0.35141241550445557, "learning_rate": 5.847771381804972e-06, "loss": 0.3899, "step": 2105 }, { "epoch": 1.5035697287006187, "grad_norm": 0.41466590762138367, "learning_rate": 5.843675712410724e-06, "loss": 0.4159, "step": 2106 }, { "epoch": 1.5042836744407424, "grad_norm": 0.37981557846069336, "learning_rate": 5.839579460253887e-06, "loss": 0.3605, "step": 2107 }, { "epoch": 1.5049976201808661, "grad_norm": 0.3980327248573303, "learning_rate": 5.835482628163909e-06, "loss": 0.4455, "step": 2108 }, { "epoch": 1.5057115659209899, "grad_norm": 0.3998209536075592, "learning_rate": 5.8313852189706465e-06, "loss": 0.4452, "step": 2109 }, { "epoch": 1.5064255116611136, "grad_norm": 0.3505743741989136, "learning_rate": 5.827287235504356e-06, "loss": 0.3934, "step": 2110 }, { "epoch": 1.5071394574012376, "grad_norm": 0.36259138584136963, "learning_rate": 5.8231886805956895e-06, "loss": 0.422, "step": 2111 }, { "epoch": 1.5078534031413613, "grad_norm": 0.36310410499572754, "learning_rate": 5.819089557075689e-06, "loss": 0.4281, "step": 2112 }, { "epoch": 1.508567348881485, "grad_norm": 0.3366665542125702, "learning_rate": 5.814989867775796e-06, "loss": 0.4034, "step": 2113 }, { "epoch": 1.5092812946216088, "grad_norm": 0.3621787130832672, "learning_rate": 5.810889615527839e-06, "loss": 0.4069, "step": 2114 }, { "epoch": 1.5099952403617325, "grad_norm": 0.3826106786727905, "learning_rate": 5.806788803164034e-06, "loss": 0.4023, "step": 2115 }, { "epoch": 1.5107091861018562, "grad_norm": 0.38729381561279297, "learning_rate": 5.802687433516989e-06, "loss": 0.3686, "step": 2116 }, { "epoch": 1.51142313184198, "grad_norm": 0.37689220905303955, "learning_rate": 5.798585509419692e-06, "loss": 0.3938, "step": 2117 }, { "epoch": 1.512137077582104, "grad_norm": 0.41571810841560364, "learning_rate": 5.7944830337055165e-06, "loss": 0.4209, "step": 2118 }, { "epoch": 1.5128510233222277, "grad_norm": 0.3941282033920288, "learning_rate": 5.790380009208217e-06, "loss": 0.3942, "step": 2119 }, { "epoch": 1.5135649690623514, "grad_norm": 0.4243156611919403, "learning_rate": 5.786276438761928e-06, "loss": 0.423, "step": 2120 }, { "epoch": 1.5142789148024751, "grad_norm": 0.35655471682548523, "learning_rate": 5.782172325201155e-06, "loss": 0.4374, "step": 2121 }, { "epoch": 1.5149928605425989, "grad_norm": 0.36241453886032104, "learning_rate": 5.778067671360788e-06, "loss": 0.4071, "step": 2122 }, { "epoch": 1.5157068062827226, "grad_norm": 0.3614761531352997, "learning_rate": 5.773962480076081e-06, "loss": 0.4015, "step": 2123 }, { "epoch": 1.5164207520228463, "grad_norm": 0.37037381529808044, "learning_rate": 5.769856754182668e-06, "loss": 0.3639, "step": 2124 }, { "epoch": 1.51713469776297, "grad_norm": 0.38032224774360657, "learning_rate": 5.765750496516547e-06, "loss": 0.4305, "step": 2125 }, { "epoch": 1.5178486435030938, "grad_norm": 0.3618186414241791, "learning_rate": 5.7616437099140825e-06, "loss": 0.3665, "step": 2126 }, { "epoch": 1.5185625892432175, "grad_norm": 0.34980762004852295, "learning_rate": 5.7575363972120066e-06, "loss": 0.4367, "step": 2127 }, { "epoch": 1.5192765349833413, "grad_norm": 0.4108301103115082, "learning_rate": 5.753428561247416e-06, "loss": 0.4255, "step": 2128 }, { "epoch": 1.519990480723465, "grad_norm": 0.42617541551589966, "learning_rate": 5.749320204857766e-06, "loss": 0.4266, "step": 2129 }, { "epoch": 1.5207044264635887, "grad_norm": 0.3605881333351135, "learning_rate": 5.745211330880872e-06, "loss": 0.4209, "step": 2130 }, { "epoch": 1.5214183722037125, "grad_norm": 0.4171713590621948, "learning_rate": 5.74110194215491e-06, "loss": 0.4297, "step": 2131 }, { "epoch": 1.5221323179438362, "grad_norm": 0.42024245858192444, "learning_rate": 5.736992041518407e-06, "loss": 0.4021, "step": 2132 }, { "epoch": 1.52284626368396, "grad_norm": 0.3973669111728668, "learning_rate": 5.732881631810245e-06, "loss": 0.4534, "step": 2133 }, { "epoch": 1.5235602094240837, "grad_norm": 0.413713276386261, "learning_rate": 5.7287707158696604e-06, "loss": 0.4344, "step": 2134 }, { "epoch": 1.5242741551642074, "grad_norm": 0.43302005529403687, "learning_rate": 5.724659296536234e-06, "loss": 0.3963, "step": 2135 }, { "epoch": 1.5249881009043311, "grad_norm": 0.4023389220237732, "learning_rate": 5.720547376649901e-06, "loss": 0.4047, "step": 2136 }, { "epoch": 1.5257020466444549, "grad_norm": 0.3362114429473877, "learning_rate": 5.716434959050938e-06, "loss": 0.4059, "step": 2137 }, { "epoch": 1.5264159923845788, "grad_norm": 0.366486132144928, "learning_rate": 5.712322046579965e-06, "loss": 0.431, "step": 2138 }, { "epoch": 1.5271299381247025, "grad_norm": 0.357882022857666, "learning_rate": 5.708208642077946e-06, "loss": 0.3862, "step": 2139 }, { "epoch": 1.5278438838648263, "grad_norm": 0.37006518244743347, "learning_rate": 5.7040947483861845e-06, "loss": 0.4083, "step": 2140 }, { "epoch": 1.52855782960495, "grad_norm": 0.3698999285697937, "learning_rate": 5.699980368346318e-06, "loss": 0.3953, "step": 2141 }, { "epoch": 1.5292717753450737, "grad_norm": 0.3669086694717407, "learning_rate": 5.695865504800328e-06, "loss": 0.3537, "step": 2142 }, { "epoch": 1.5299857210851975, "grad_norm": 0.36121225357055664, "learning_rate": 5.6917501605905225e-06, "loss": 0.4035, "step": 2143 }, { "epoch": 1.5306996668253214, "grad_norm": 0.3722722828388214, "learning_rate": 5.687634338559544e-06, "loss": 0.4014, "step": 2144 }, { "epoch": 1.5314136125654452, "grad_norm": 0.3517853915691376, "learning_rate": 5.683518041550368e-06, "loss": 0.4299, "step": 2145 }, { "epoch": 1.532127558305569, "grad_norm": 0.3317107558250427, "learning_rate": 5.679401272406292e-06, "loss": 0.3566, "step": 2146 }, { "epoch": 1.5328415040456926, "grad_norm": 0.33763495087623596, "learning_rate": 5.675284033970945e-06, "loss": 0.3974, "step": 2147 }, { "epoch": 1.5335554497858164, "grad_norm": 0.34236279129981995, "learning_rate": 5.671166329088278e-06, "loss": 0.4193, "step": 2148 }, { "epoch": 1.53426939552594, "grad_norm": 0.37215691804885864, "learning_rate": 5.667048160602564e-06, "loss": 0.3771, "step": 2149 }, { "epoch": 1.5349833412660638, "grad_norm": 0.3563275635242462, "learning_rate": 5.6629295313583975e-06, "loss": 0.3859, "step": 2150 }, { "epoch": 1.5356972870061876, "grad_norm": 0.35603615641593933, "learning_rate": 5.658810444200689e-06, "loss": 0.4327, "step": 2151 }, { "epoch": 1.5364112327463113, "grad_norm": 0.33763042092323303, "learning_rate": 5.654690901974667e-06, "loss": 0.4078, "step": 2152 }, { "epoch": 1.537125178486435, "grad_norm": 0.3508383631706238, "learning_rate": 5.6505709075258755e-06, "loss": 0.3643, "step": 2153 }, { "epoch": 1.5378391242265588, "grad_norm": 0.35984811186790466, "learning_rate": 5.646450463700167e-06, "loss": 0.4052, "step": 2154 }, { "epoch": 1.5385530699666825, "grad_norm": 0.358154833316803, "learning_rate": 5.642329573343708e-06, "loss": 0.4229, "step": 2155 }, { "epoch": 1.5392670157068062, "grad_norm": 0.35609620809555054, "learning_rate": 5.638208239302975e-06, "loss": 0.4151, "step": 2156 }, { "epoch": 1.53998096144693, "grad_norm": 0.37116876244544983, "learning_rate": 5.634086464424743e-06, "loss": 0.4155, "step": 2157 }, { "epoch": 1.5406949071870537, "grad_norm": 0.3857714831829071, "learning_rate": 5.6299642515560995e-06, "loss": 0.4217, "step": 2158 }, { "epoch": 1.5414088529271774, "grad_norm": 0.3410700559616089, "learning_rate": 5.625841603544431e-06, "loss": 0.3745, "step": 2159 }, { "epoch": 1.5421227986673012, "grad_norm": 0.34167835116386414, "learning_rate": 5.621718523237427e-06, "loss": 0.4016, "step": 2160 }, { "epoch": 1.542836744407425, "grad_norm": 0.339898020029068, "learning_rate": 5.617595013483071e-06, "loss": 0.3918, "step": 2161 }, { "epoch": 1.5435506901475486, "grad_norm": 0.3627465069293976, "learning_rate": 5.613471077129651e-06, "loss": 0.3706, "step": 2162 }, { "epoch": 1.5442646358876724, "grad_norm": 0.3800169825553894, "learning_rate": 5.609346717025738e-06, "loss": 0.4259, "step": 2163 }, { "epoch": 1.5449785816277963, "grad_norm": 0.35475343465805054, "learning_rate": 5.605221936020208e-06, "loss": 0.4081, "step": 2164 }, { "epoch": 1.54569252736792, "grad_norm": 0.3703308403491974, "learning_rate": 5.60109673696222e-06, "loss": 0.4139, "step": 2165 }, { "epoch": 1.5464064731080438, "grad_norm": 0.35251644253730774, "learning_rate": 5.596971122701221e-06, "loss": 0.3884, "step": 2166 }, { "epoch": 1.5471204188481675, "grad_norm": 0.3585383892059326, "learning_rate": 5.592845096086954e-06, "loss": 0.4006, "step": 2167 }, { "epoch": 1.5478343645882913, "grad_norm": 0.3612227737903595, "learning_rate": 5.588718659969437e-06, "loss": 0.4214, "step": 2168 }, { "epoch": 1.548548310328415, "grad_norm": 0.3503778576850891, "learning_rate": 5.584591817198974e-06, "loss": 0.3759, "step": 2169 }, { "epoch": 1.549262256068539, "grad_norm": 0.36421290040016174, "learning_rate": 5.5804645706261515e-06, "loss": 0.4493, "step": 2170 }, { "epoch": 1.5499762018086627, "grad_norm": 0.3235544264316559, "learning_rate": 5.576336923101834e-06, "loss": 0.3851, "step": 2171 }, { "epoch": 1.5506901475487864, "grad_norm": 0.3135702311992645, "learning_rate": 5.57220887747716e-06, "loss": 0.3686, "step": 2172 }, { "epoch": 1.5514040932889102, "grad_norm": 0.3337264657020569, "learning_rate": 5.568080436603549e-06, "loss": 0.4045, "step": 2173 }, { "epoch": 1.552118039029034, "grad_norm": 0.33957186341285706, "learning_rate": 5.5639516033326894e-06, "loss": 0.361, "step": 2174 }, { "epoch": 1.5528319847691576, "grad_norm": 0.3477252721786499, "learning_rate": 5.559822380516539e-06, "loss": 0.4222, "step": 2175 }, { "epoch": 1.5535459305092814, "grad_norm": 0.3503718972206116, "learning_rate": 5.555692771007331e-06, "loss": 0.4287, "step": 2176 }, { "epoch": 1.554259876249405, "grad_norm": 0.34887033700942993, "learning_rate": 5.55156277765756e-06, "loss": 0.3946, "step": 2177 }, { "epoch": 1.5549738219895288, "grad_norm": 0.3624603748321533, "learning_rate": 5.547432403319986e-06, "loss": 0.4253, "step": 2178 }, { "epoch": 1.5556877677296526, "grad_norm": 0.34626489877700806, "learning_rate": 5.543301650847634e-06, "loss": 0.4048, "step": 2179 }, { "epoch": 1.5564017134697763, "grad_norm": 0.3127381503582001, "learning_rate": 5.539170523093794e-06, "loss": 0.4037, "step": 2180 }, { "epoch": 1.5571156592099, "grad_norm": 0.31926631927490234, "learning_rate": 5.535039022912007e-06, "loss": 0.3642, "step": 2181 }, { "epoch": 1.5578296049500238, "grad_norm": 0.4056514501571655, "learning_rate": 5.530907153156077e-06, "loss": 0.4113, "step": 2182 }, { "epoch": 1.5585435506901475, "grad_norm": 0.39462563395500183, "learning_rate": 5.5267749166800584e-06, "loss": 0.4669, "step": 2183 }, { "epoch": 1.5592574964302712, "grad_norm": 0.34105804562568665, "learning_rate": 5.522642316338268e-06, "loss": 0.3596, "step": 2184 }, { "epoch": 1.559971442170395, "grad_norm": 0.35658231377601624, "learning_rate": 5.518509354985264e-06, "loss": 0.4324, "step": 2185 }, { "epoch": 1.5606853879105187, "grad_norm": 0.3688192367553711, "learning_rate": 5.51437603547586e-06, "loss": 0.4054, "step": 2186 }, { "epoch": 1.5613993336506424, "grad_norm": 0.3739490509033203, "learning_rate": 5.510242360665114e-06, "loss": 0.4495, "step": 2187 }, { "epoch": 1.5621132793907662, "grad_norm": 0.33576780557632446, "learning_rate": 5.50610833340833e-06, "loss": 0.3872, "step": 2188 }, { "epoch": 1.56282722513089, "grad_norm": 0.38252565264701843, "learning_rate": 5.501973956561056e-06, "loss": 0.3807, "step": 2189 }, { "epoch": 1.5635411708710139, "grad_norm": 0.3414037823677063, "learning_rate": 5.497839232979084e-06, "loss": 0.4023, "step": 2190 }, { "epoch": 1.5642551166111376, "grad_norm": 0.3222062587738037, "learning_rate": 5.493704165518439e-06, "loss": 0.4243, "step": 2191 }, { "epoch": 1.5649690623512613, "grad_norm": 0.3245667815208435, "learning_rate": 5.4895687570353916e-06, "loss": 0.3902, "step": 2192 }, { "epoch": 1.565683008091385, "grad_norm": 0.3504859507083893, "learning_rate": 5.485433010386442e-06, "loss": 0.4211, "step": 2193 }, { "epoch": 1.5663969538315088, "grad_norm": 0.3204061686992645, "learning_rate": 5.4812969284283245e-06, "loss": 0.3447, "step": 2194 }, { "epoch": 1.5671108995716325, "grad_norm": 0.3694224953651428, "learning_rate": 5.477160514018007e-06, "loss": 0.4136, "step": 2195 }, { "epoch": 1.5678248453117563, "grad_norm": 0.3556981682777405, "learning_rate": 5.473023770012686e-06, "loss": 0.4069, "step": 2196 }, { "epoch": 1.5685387910518802, "grad_norm": 0.33332112431526184, "learning_rate": 5.468886699269789e-06, "loss": 0.4062, "step": 2197 }, { "epoch": 1.569252736792004, "grad_norm": 0.3688619136810303, "learning_rate": 5.464749304646963e-06, "loss": 0.3905, "step": 2198 }, { "epoch": 1.5699666825321277, "grad_norm": 0.3583354651927948, "learning_rate": 5.4606115890020815e-06, "loss": 0.3998, "step": 2199 }, { "epoch": 1.5706806282722514, "grad_norm": 0.3502436876296997, "learning_rate": 5.456473555193242e-06, "loss": 0.4332, "step": 2200 }, { "epoch": 1.5713945740123751, "grad_norm": 0.33699703216552734, "learning_rate": 5.452335206078761e-06, "loss": 0.4095, "step": 2201 }, { "epoch": 1.5721085197524989, "grad_norm": 0.38349008560180664, "learning_rate": 5.448196544517168e-06, "loss": 0.4196, "step": 2202 }, { "epoch": 1.5728224654926226, "grad_norm": 0.36898112297058105, "learning_rate": 5.444057573367215e-06, "loss": 0.3853, "step": 2203 }, { "epoch": 1.5735364112327463, "grad_norm": 0.3622547686100006, "learning_rate": 5.439918295487866e-06, "loss": 0.4328, "step": 2204 }, { "epoch": 1.57425035697287, "grad_norm": 0.3394806981086731, "learning_rate": 5.435778713738292e-06, "loss": 0.4412, "step": 2205 }, { "epoch": 1.5749643027129938, "grad_norm": 0.35249483585357666, "learning_rate": 5.431638830977879e-06, "loss": 0.3955, "step": 2206 }, { "epoch": 1.5756782484531175, "grad_norm": 0.3730209767818451, "learning_rate": 5.427498650066222e-06, "loss": 0.3911, "step": 2207 }, { "epoch": 1.5763921941932413, "grad_norm": 0.35215914249420166, "learning_rate": 5.423358173863117e-06, "loss": 0.3797, "step": 2208 }, { "epoch": 1.577106139933365, "grad_norm": 0.33042001724243164, "learning_rate": 5.419217405228566e-06, "loss": 0.4337, "step": 2209 }, { "epoch": 1.5778200856734887, "grad_norm": 0.34554529190063477, "learning_rate": 5.415076347022777e-06, "loss": 0.4239, "step": 2210 }, { "epoch": 1.5785340314136125, "grad_norm": 0.3281601667404175, "learning_rate": 5.4109350021061526e-06, "loss": 0.3868, "step": 2211 }, { "epoch": 1.5792479771537362, "grad_norm": 0.3243865370750427, "learning_rate": 5.406793373339292e-06, "loss": 0.4179, "step": 2212 }, { "epoch": 1.57996192289386, "grad_norm": 0.34284284710884094, "learning_rate": 5.402651463582998e-06, "loss": 0.3915, "step": 2213 }, { "epoch": 1.5806758686339837, "grad_norm": 0.34193703532218933, "learning_rate": 5.398509275698263e-06, "loss": 0.3948, "step": 2214 }, { "epoch": 1.5813898143741074, "grad_norm": 0.3674015402793884, "learning_rate": 5.39436681254627e-06, "loss": 0.4437, "step": 2215 }, { "epoch": 1.5821037601142314, "grad_norm": 0.3464122712612152, "learning_rate": 5.390224076988396e-06, "loss": 0.37, "step": 2216 }, { "epoch": 1.582817705854355, "grad_norm": 0.3683827221393585, "learning_rate": 5.386081071886204e-06, "loss": 0.4005, "step": 2217 }, { "epoch": 1.5835316515944788, "grad_norm": 0.3645051121711731, "learning_rate": 5.38193780010144e-06, "loss": 0.3857, "step": 2218 }, { "epoch": 1.5842455973346026, "grad_norm": 0.3438183069229126, "learning_rate": 5.377794264496041e-06, "loss": 0.4036, "step": 2219 }, { "epoch": 1.5849595430747263, "grad_norm": 0.3767929673194885, "learning_rate": 5.373650467932122e-06, "loss": 0.4152, "step": 2220 }, { "epoch": 1.58567348881485, "grad_norm": 0.36624839901924133, "learning_rate": 5.3695064132719775e-06, "loss": 0.4193, "step": 2221 }, { "epoch": 1.5863874345549738, "grad_norm": 0.31180405616760254, "learning_rate": 5.365362103378085e-06, "loss": 0.3746, "step": 2222 }, { "epoch": 1.5871013802950977, "grad_norm": 0.3444020450115204, "learning_rate": 5.361217541113093e-06, "loss": 0.4269, "step": 2223 }, { "epoch": 1.5878153260352215, "grad_norm": 0.35388970375061035, "learning_rate": 5.357072729339825e-06, "loss": 0.4107, "step": 2224 }, { "epoch": 1.5885292717753452, "grad_norm": 0.3733987808227539, "learning_rate": 5.352927670921282e-06, "loss": 0.4171, "step": 2225 }, { "epoch": 1.589243217515469, "grad_norm": 0.3456484079360962, "learning_rate": 5.348782368720627e-06, "loss": 0.4416, "step": 2226 }, { "epoch": 1.5899571632555927, "grad_norm": 0.37477704882621765, "learning_rate": 5.344636825601199e-06, "loss": 0.4425, "step": 2227 }, { "epoch": 1.5906711089957164, "grad_norm": 0.33790329098701477, "learning_rate": 5.3404910444265015e-06, "loss": 0.3998, "step": 2228 }, { "epoch": 1.5913850547358401, "grad_norm": 0.38368159532546997, "learning_rate": 5.336345028060199e-06, "loss": 0.443, "step": 2229 }, { "epoch": 1.5920990004759639, "grad_norm": 0.3402498960494995, "learning_rate": 5.332198779366123e-06, "loss": 0.3628, "step": 2230 }, { "epoch": 1.5928129462160876, "grad_norm": 0.32691264152526855, "learning_rate": 5.3280523012082595e-06, "loss": 0.4661, "step": 2231 }, { "epoch": 1.5935268919562113, "grad_norm": 0.31241607666015625, "learning_rate": 5.323905596450759e-06, "loss": 0.3629, "step": 2232 }, { "epoch": 1.594240837696335, "grad_norm": 0.36928802728652954, "learning_rate": 5.319758667957929e-06, "loss": 0.3754, "step": 2233 }, { "epoch": 1.5949547834364588, "grad_norm": 0.37047064304351807, "learning_rate": 5.315611518594225e-06, "loss": 0.448, "step": 2234 }, { "epoch": 1.5956687291765825, "grad_norm": 0.3221254348754883, "learning_rate": 5.311464151224261e-06, "loss": 0.3703, "step": 2235 }, { "epoch": 1.5963826749167063, "grad_norm": 0.3976373076438904, "learning_rate": 5.307316568712799e-06, "loss": 0.4262, "step": 2236 }, { "epoch": 1.59709662065683, "grad_norm": 0.3264545500278473, "learning_rate": 5.30316877392475e-06, "loss": 0.3954, "step": 2237 }, { "epoch": 1.5978105663969537, "grad_norm": 0.34977129101753235, "learning_rate": 5.299020769725172e-06, "loss": 0.4214, "step": 2238 }, { "epoch": 1.5985245121370775, "grad_norm": 0.31404078006744385, "learning_rate": 5.294872558979266e-06, "loss": 0.3798, "step": 2239 }, { "epoch": 1.5992384578772012, "grad_norm": 0.3606621325016022, "learning_rate": 5.290724144552379e-06, "loss": 0.4158, "step": 2240 }, { "epoch": 1.599952403617325, "grad_norm": 0.35547080636024475, "learning_rate": 5.286575529309997e-06, "loss": 0.3974, "step": 2241 }, { "epoch": 1.6006663493574487, "grad_norm": 0.3685660660266876, "learning_rate": 5.282426716117743e-06, "loss": 0.4143, "step": 2242 }, { "epoch": 1.6013802950975726, "grad_norm": 0.3483262062072754, "learning_rate": 5.278277707841379e-06, "loss": 0.3797, "step": 2243 }, { "epoch": 1.6020942408376964, "grad_norm": 0.3279109299182892, "learning_rate": 5.274128507346801e-06, "loss": 0.4043, "step": 2244 }, { "epoch": 1.60280818657782, "grad_norm": 0.33113181591033936, "learning_rate": 5.269979117500037e-06, "loss": 0.3901, "step": 2245 }, { "epoch": 1.6035221323179438, "grad_norm": 0.35114434361457825, "learning_rate": 5.2658295411672475e-06, "loss": 0.4162, "step": 2246 }, { "epoch": 1.6042360780580676, "grad_norm": 0.39909791946411133, "learning_rate": 5.2616797812147205e-06, "loss": 0.4344, "step": 2247 }, { "epoch": 1.6049500237981913, "grad_norm": 0.34979450702667236, "learning_rate": 5.25752984050887e-06, "loss": 0.3803, "step": 2248 }, { "epoch": 1.6056639695383152, "grad_norm": 0.3491116464138031, "learning_rate": 5.253379721916237e-06, "loss": 0.3919, "step": 2249 }, { "epoch": 1.606377915278439, "grad_norm": 0.3503110408782959, "learning_rate": 5.249229428303486e-06, "loss": 0.3951, "step": 2250 }, { "epoch": 1.6070918610185627, "grad_norm": 0.3487466871738434, "learning_rate": 5.2450789625374e-06, "loss": 0.4085, "step": 2251 }, { "epoch": 1.6078058067586865, "grad_norm": 0.31986698508262634, "learning_rate": 5.240928327484879e-06, "loss": 0.3831, "step": 2252 }, { "epoch": 1.6085197524988102, "grad_norm": 0.40220242738723755, "learning_rate": 5.2367775260129465e-06, "loss": 0.4472, "step": 2253 }, { "epoch": 1.609233698238934, "grad_norm": 0.3703799545764923, "learning_rate": 5.232626560988735e-06, "loss": 0.401, "step": 2254 }, { "epoch": 1.6099476439790577, "grad_norm": 0.34934136271476746, "learning_rate": 5.228475435279491e-06, "loss": 0.3959, "step": 2255 }, { "epoch": 1.6106615897191814, "grad_norm": 0.3568724989891052, "learning_rate": 5.224324151752575e-06, "loss": 0.4046, "step": 2256 }, { "epoch": 1.6113755354593051, "grad_norm": 0.3557667136192322, "learning_rate": 5.220172713275455e-06, "loss": 0.3625, "step": 2257 }, { "epoch": 1.6120894811994289, "grad_norm": 0.3317587673664093, "learning_rate": 5.216021122715703e-06, "loss": 0.4097, "step": 2258 }, { "epoch": 1.6128034269395526, "grad_norm": 0.3524550497531891, "learning_rate": 5.211869382941003e-06, "loss": 0.4279, "step": 2259 }, { "epoch": 1.6135173726796763, "grad_norm": 0.36658185720443726, "learning_rate": 5.207717496819134e-06, "loss": 0.3856, "step": 2260 }, { "epoch": 1.6142313184198, "grad_norm": 0.34933528304100037, "learning_rate": 5.20356546721798e-06, "loss": 0.4184, "step": 2261 }, { "epoch": 1.6149452641599238, "grad_norm": 0.3549487590789795, "learning_rate": 5.199413297005525e-06, "loss": 0.4097, "step": 2262 }, { "epoch": 1.6156592099000475, "grad_norm": 0.3420690894126892, "learning_rate": 5.195260989049849e-06, "loss": 0.3851, "step": 2263 }, { "epoch": 1.6163731556401713, "grad_norm": 0.38678160309791565, "learning_rate": 5.191108546219128e-06, "loss": 0.4107, "step": 2264 }, { "epoch": 1.617087101380295, "grad_norm": 0.31952106952667236, "learning_rate": 5.18695597138163e-06, "loss": 0.3824, "step": 2265 }, { "epoch": 1.6178010471204187, "grad_norm": 0.383065789937973, "learning_rate": 5.182803267405712e-06, "loss": 0.4244, "step": 2266 }, { "epoch": 1.6185149928605425, "grad_norm": 0.35406607389450073, "learning_rate": 5.1786504371598255e-06, "loss": 0.3654, "step": 2267 }, { "epoch": 1.6192289386006662, "grad_norm": 0.33852270245552063, "learning_rate": 5.174497483512506e-06, "loss": 0.4511, "step": 2268 }, { "epoch": 1.6199428843407901, "grad_norm": 0.34085220098495483, "learning_rate": 5.170344409332373e-06, "loss": 0.4007, "step": 2269 }, { "epoch": 1.6206568300809139, "grad_norm": 0.3740547001361847, "learning_rate": 5.166191217488134e-06, "loss": 0.4402, "step": 2270 }, { "epoch": 1.6213707758210376, "grad_norm": 0.36671262979507446, "learning_rate": 5.162037910848573e-06, "loss": 0.4173, "step": 2271 }, { "epoch": 1.6220847215611613, "grad_norm": 0.38146135210990906, "learning_rate": 5.157884492282555e-06, "loss": 0.4502, "step": 2272 }, { "epoch": 1.622798667301285, "grad_norm": 0.35387131571769714, "learning_rate": 5.1537309646590225e-06, "loss": 0.3725, "step": 2273 }, { "epoch": 1.6235126130414088, "grad_norm": 0.3768306076526642, "learning_rate": 5.1495773308469935e-06, "loss": 0.4228, "step": 2274 }, { "epoch": 1.6242265587815328, "grad_norm": 0.36740046739578247, "learning_rate": 5.145423593715558e-06, "loss": 0.4029, "step": 2275 }, { "epoch": 1.6249405045216565, "grad_norm": 0.35674813389778137, "learning_rate": 5.141269756133879e-06, "loss": 0.414, "step": 2276 }, { "epoch": 1.6256544502617802, "grad_norm": 0.3489223122596741, "learning_rate": 5.137115820971189e-06, "loss": 0.3801, "step": 2277 }, { "epoch": 1.626368396001904, "grad_norm": 0.3352114260196686, "learning_rate": 5.132961791096787e-06, "loss": 0.3768, "step": 2278 }, { "epoch": 1.6270823417420277, "grad_norm": 0.34129598736763, "learning_rate": 5.128807669380036e-06, "loss": 0.3913, "step": 2279 }, { "epoch": 1.6277962874821514, "grad_norm": 0.40645116567611694, "learning_rate": 5.1246534586903655e-06, "loss": 0.4221, "step": 2280 }, { "epoch": 1.6285102332222752, "grad_norm": 0.36963215470314026, "learning_rate": 5.120499161897265e-06, "loss": 0.4177, "step": 2281 }, { "epoch": 1.629224178962399, "grad_norm": 0.40515610575675964, "learning_rate": 5.116344781870282e-06, "loss": 0.4161, "step": 2282 }, { "epoch": 1.6299381247025226, "grad_norm": 0.38429901003837585, "learning_rate": 5.112190321479026e-06, "loss": 0.4096, "step": 2283 }, { "epoch": 1.6306520704426464, "grad_norm": 0.3842037618160248, "learning_rate": 5.108035783593156e-06, "loss": 0.408, "step": 2284 }, { "epoch": 1.63136601618277, "grad_norm": 0.3345833122730255, "learning_rate": 5.103881171082389e-06, "loss": 0.3866, "step": 2285 }, { "epoch": 1.6320799619228938, "grad_norm": 0.3899039924144745, "learning_rate": 5.099726486816491e-06, "loss": 0.3989, "step": 2286 }, { "epoch": 1.6327939076630176, "grad_norm": 0.32292819023132324, "learning_rate": 5.095571733665279e-06, "loss": 0.3956, "step": 2287 }, { "epoch": 1.6335078534031413, "grad_norm": 0.3459334969520569, "learning_rate": 5.091416914498619e-06, "loss": 0.4292, "step": 2288 }, { "epoch": 1.634221799143265, "grad_norm": 0.34531986713409424, "learning_rate": 5.087262032186418e-06, "loss": 0.3973, "step": 2289 }, { "epoch": 1.6349357448833888, "grad_norm": 0.36314842104911804, "learning_rate": 5.083107089598632e-06, "loss": 0.3839, "step": 2290 }, { "epoch": 1.6356496906235125, "grad_norm": 0.332551509141922, "learning_rate": 5.0789520896052535e-06, "loss": 0.4129, "step": 2291 }, { "epoch": 1.6363636363636362, "grad_norm": 0.3246923089027405, "learning_rate": 5.074797035076319e-06, "loss": 0.4026, "step": 2292 }, { "epoch": 1.63707758210376, "grad_norm": 0.3947613537311554, "learning_rate": 5.0706419288819e-06, "loss": 0.3962, "step": 2293 }, { "epoch": 1.6377915278438837, "grad_norm": 0.35120731592178345, "learning_rate": 5.066486773892105e-06, "loss": 0.4399, "step": 2294 }, { "epoch": 1.6385054735840077, "grad_norm": 0.35956594347953796, "learning_rate": 5.062331572977076e-06, "loss": 0.3778, "step": 2295 }, { "epoch": 1.6392194193241314, "grad_norm": 0.3389890491962433, "learning_rate": 5.0581763290069865e-06, "loss": 0.4069, "step": 2296 }, { "epoch": 1.6399333650642551, "grad_norm": 0.34975293278694153, "learning_rate": 5.054021044852039e-06, "loss": 0.3985, "step": 2297 }, { "epoch": 1.6406473108043789, "grad_norm": 0.3551579415798187, "learning_rate": 5.049865723382463e-06, "loss": 0.422, "step": 2298 }, { "epoch": 1.6413612565445026, "grad_norm": 0.33525100350379944, "learning_rate": 5.045710367468519e-06, "loss": 0.4553, "step": 2299 }, { "epoch": 1.6420752022846263, "grad_norm": 0.3438290059566498, "learning_rate": 5.041554979980487e-06, "loss": 0.3877, "step": 2300 }, { "epoch": 1.64278914802475, "grad_norm": 0.3539755642414093, "learning_rate": 5.037399563788665e-06, "loss": 0.4009, "step": 2301 }, { "epoch": 1.643503093764874, "grad_norm": 0.367981880903244, "learning_rate": 5.033244121763381e-06, "loss": 0.4498, "step": 2302 }, { "epoch": 1.6442170395049978, "grad_norm": 0.32630619406700134, "learning_rate": 5.02908865677497e-06, "loss": 0.4209, "step": 2303 }, { "epoch": 1.6449309852451215, "grad_norm": 0.3233901560306549, "learning_rate": 5.024933171693791e-06, "loss": 0.3799, "step": 2304 }, { "epoch": 1.6456449309852452, "grad_norm": 0.337588906288147, "learning_rate": 5.020777669390213e-06, "loss": 0.389, "step": 2305 }, { "epoch": 1.646358876725369, "grad_norm": 0.37902921438217163, "learning_rate": 5.016622152734617e-06, "loss": 0.4292, "step": 2306 }, { "epoch": 1.6470728224654927, "grad_norm": 0.3178088665008545, "learning_rate": 5.012466624597396e-06, "loss": 0.3653, "step": 2307 }, { "epoch": 1.6477867682056164, "grad_norm": 0.33749425411224365, "learning_rate": 5.008311087848949e-06, "loss": 0.3894, "step": 2308 }, { "epoch": 1.6485007139457402, "grad_norm": 0.34140825271606445, "learning_rate": 5.00415554535968e-06, "loss": 0.4293, "step": 2309 }, { "epoch": 1.649214659685864, "grad_norm": 0.33500322699546814, "learning_rate": 5e-06, "loss": 0.3601, "step": 2310 }, { "epoch": 1.6499286054259876, "grad_norm": 0.38107994198799133, "learning_rate": 4.99584445464032e-06, "loss": 0.4315, "step": 2311 }, { "epoch": 1.6506425511661114, "grad_norm": 0.3267035186290741, "learning_rate": 4.9916889121510535e-06, "loss": 0.3757, "step": 2312 }, { "epoch": 1.651356496906235, "grad_norm": 0.37362051010131836, "learning_rate": 4.987533375402605e-06, "loss": 0.4222, "step": 2313 }, { "epoch": 1.6520704426463588, "grad_norm": 0.4011008143424988, "learning_rate": 4.983377847265384e-06, "loss": 0.4362, "step": 2314 }, { "epoch": 1.6527843883864826, "grad_norm": 0.3170570433139801, "learning_rate": 4.979222330609789e-06, "loss": 0.404, "step": 2315 }, { "epoch": 1.6534983341266063, "grad_norm": 0.3368811011314392, "learning_rate": 4.9750668283062104e-06, "loss": 0.4114, "step": 2316 }, { "epoch": 1.65421227986673, "grad_norm": 0.342215359210968, "learning_rate": 4.970911343225031e-06, "loss": 0.4014, "step": 2317 }, { "epoch": 1.6549262256068538, "grad_norm": 0.378783643245697, "learning_rate": 4.966755878236622e-06, "loss": 0.4189, "step": 2318 }, { "epoch": 1.6556401713469775, "grad_norm": 0.35180729627609253, "learning_rate": 4.962600436211336e-06, "loss": 0.3576, "step": 2319 }, { "epoch": 1.6563541170871012, "grad_norm": 0.34756550192832947, "learning_rate": 4.958445020019516e-06, "loss": 0.3837, "step": 2320 }, { "epoch": 1.6570680628272252, "grad_norm": 0.3368312120437622, "learning_rate": 4.954289632531483e-06, "loss": 0.4216, "step": 2321 }, { "epoch": 1.657782008567349, "grad_norm": 0.32443884015083313, "learning_rate": 4.950134276617538e-06, "loss": 0.3929, "step": 2322 }, { "epoch": 1.6584959543074727, "grad_norm": 0.35953250527381897, "learning_rate": 4.945978955147963e-06, "loss": 0.343, "step": 2323 }, { "epoch": 1.6592099000475964, "grad_norm": 0.35918259620666504, "learning_rate": 4.941823670993016e-06, "loss": 0.3973, "step": 2324 }, { "epoch": 1.6599238457877201, "grad_norm": 0.3187091648578644, "learning_rate": 4.937668427022925e-06, "loss": 0.4108, "step": 2325 }, { "epoch": 1.6606377915278439, "grad_norm": 0.36014169454574585, "learning_rate": 4.933513226107896e-06, "loss": 0.3879, "step": 2326 }, { "epoch": 1.6613517372679676, "grad_norm": 0.3237893581390381, "learning_rate": 4.9293580711181025e-06, "loss": 0.3483, "step": 2327 }, { "epoch": 1.6620656830080915, "grad_norm": 0.36595889925956726, "learning_rate": 4.9252029649236835e-06, "loss": 0.4508, "step": 2328 }, { "epoch": 1.6627796287482153, "grad_norm": 0.32836344838142395, "learning_rate": 4.921047910394747e-06, "loss": 0.3581, "step": 2329 }, { "epoch": 1.663493574488339, "grad_norm": 0.3779946267604828, "learning_rate": 4.916892910401369e-06, "loss": 0.4342, "step": 2330 }, { "epoch": 1.6642075202284627, "grad_norm": 0.36291930079460144, "learning_rate": 4.9127379678135825e-06, "loss": 0.3661, "step": 2331 }, { "epoch": 1.6649214659685865, "grad_norm": 0.35671043395996094, "learning_rate": 4.908583085501383e-06, "loss": 0.4482, "step": 2332 }, { "epoch": 1.6656354117087102, "grad_norm": 0.35664284229278564, "learning_rate": 4.904428266334721e-06, "loss": 0.4266, "step": 2333 }, { "epoch": 1.666349357448834, "grad_norm": 0.3274058401584625, "learning_rate": 4.900273513183511e-06, "loss": 0.3707, "step": 2334 }, { "epoch": 1.6670633031889577, "grad_norm": 0.335305392742157, "learning_rate": 4.8961188289176125e-06, "loss": 0.386, "step": 2335 }, { "epoch": 1.6677772489290814, "grad_norm": 0.3449561297893524, "learning_rate": 4.891964216406844e-06, "loss": 0.393, "step": 2336 }, { "epoch": 1.6684911946692051, "grad_norm": 0.35806503891944885, "learning_rate": 4.887809678520976e-06, "loss": 0.4301, "step": 2337 }, { "epoch": 1.6692051404093289, "grad_norm": 0.33603689074516296, "learning_rate": 4.883655218129719e-06, "loss": 0.4476, "step": 2338 }, { "epoch": 1.6699190861494526, "grad_norm": 0.341680645942688, "learning_rate": 4.879500838102736e-06, "loss": 0.3413, "step": 2339 }, { "epoch": 1.6706330318895763, "grad_norm": 0.36178961396217346, "learning_rate": 4.875346541309637e-06, "loss": 0.4269, "step": 2340 }, { "epoch": 1.6713469776297, "grad_norm": 0.3612090051174164, "learning_rate": 4.871192330619965e-06, "loss": 0.4253, "step": 2341 }, { "epoch": 1.6720609233698238, "grad_norm": 0.3311202824115753, "learning_rate": 4.867038208903214e-06, "loss": 0.4126, "step": 2342 }, { "epoch": 1.6727748691099475, "grad_norm": 0.34334930777549744, "learning_rate": 4.862884179028813e-06, "loss": 0.3836, "step": 2343 }, { "epoch": 1.6734888148500713, "grad_norm": 0.34345197677612305, "learning_rate": 4.858730243866123e-06, "loss": 0.4025, "step": 2344 }, { "epoch": 1.674202760590195, "grad_norm": 0.3519206941127777, "learning_rate": 4.854576406284443e-06, "loss": 0.4021, "step": 2345 }, { "epoch": 1.6749167063303187, "grad_norm": 0.31659042835235596, "learning_rate": 4.850422669153009e-06, "loss": 0.3879, "step": 2346 }, { "epoch": 1.6756306520704425, "grad_norm": 0.35398629307746887, "learning_rate": 4.846269035340978e-06, "loss": 0.3848, "step": 2347 }, { "epoch": 1.6763445978105664, "grad_norm": 0.321573406457901, "learning_rate": 4.842115507717446e-06, "loss": 0.3988, "step": 2348 }, { "epoch": 1.6770585435506902, "grad_norm": 0.3152107000350952, "learning_rate": 4.8379620891514284e-06, "loss": 0.3771, "step": 2349 }, { "epoch": 1.677772489290814, "grad_norm": 0.3402288854122162, "learning_rate": 4.833808782511867e-06, "loss": 0.4317, "step": 2350 }, { "epoch": 1.6784864350309376, "grad_norm": 0.35215920209884644, "learning_rate": 4.829655590667628e-06, "loss": 0.4178, "step": 2351 }, { "epoch": 1.6792003807710614, "grad_norm": 0.3289930522441864, "learning_rate": 4.825502516487497e-06, "loss": 0.3826, "step": 2352 }, { "epoch": 1.679914326511185, "grad_norm": 0.35873106122016907, "learning_rate": 4.821349562840176e-06, "loss": 0.4167, "step": 2353 }, { "epoch": 1.680628272251309, "grad_norm": 0.3567160665988922, "learning_rate": 4.817196732594288e-06, "loss": 0.4106, "step": 2354 }, { "epoch": 1.6813422179914328, "grad_norm": 0.3772673010826111, "learning_rate": 4.8130440286183725e-06, "loss": 0.4047, "step": 2355 }, { "epoch": 1.6820561637315565, "grad_norm": 0.3293762803077698, "learning_rate": 4.808891453780874e-06, "loss": 0.3689, "step": 2356 }, { "epoch": 1.6827701094716803, "grad_norm": 0.37742963433265686, "learning_rate": 4.804739010950151e-06, "loss": 0.4334, "step": 2357 }, { "epoch": 1.683484055211804, "grad_norm": 0.3590449094772339, "learning_rate": 4.800586702994477e-06, "loss": 0.404, "step": 2358 }, { "epoch": 1.6841980009519277, "grad_norm": 0.3676163852214813, "learning_rate": 4.796434532782021e-06, "loss": 0.4075, "step": 2359 }, { "epoch": 1.6849119466920515, "grad_norm": 0.36676090955734253, "learning_rate": 4.792282503180867e-06, "loss": 0.4419, "step": 2360 }, { "epoch": 1.6856258924321752, "grad_norm": 0.35442525148391724, "learning_rate": 4.788130617058999e-06, "loss": 0.3995, "step": 2361 }, { "epoch": 1.686339838172299, "grad_norm": 0.35397449135780334, "learning_rate": 4.783978877284298e-06, "loss": 0.3647, "step": 2362 }, { "epoch": 1.6870537839124227, "grad_norm": 0.361934632062912, "learning_rate": 4.7798272867245465e-06, "loss": 0.4158, "step": 2363 }, { "epoch": 1.6877677296525464, "grad_norm": 0.32651641964912415, "learning_rate": 4.775675848247427e-06, "loss": 0.3752, "step": 2364 }, { "epoch": 1.6884816753926701, "grad_norm": 0.3381231427192688, "learning_rate": 4.7715245647205105e-06, "loss": 0.4283, "step": 2365 }, { "epoch": 1.6891956211327939, "grad_norm": 0.3292731046676636, "learning_rate": 4.767373439011267e-06, "loss": 0.3898, "step": 2366 }, { "epoch": 1.6899095668729176, "grad_norm": 0.35277608036994934, "learning_rate": 4.763222473987056e-06, "loss": 0.4034, "step": 2367 }, { "epoch": 1.6906235126130413, "grad_norm": 0.36514002084732056, "learning_rate": 4.7590716725151236e-06, "loss": 0.3936, "step": 2368 }, { "epoch": 1.691337458353165, "grad_norm": 0.35467636585235596, "learning_rate": 4.754921037462602e-06, "loss": 0.4062, "step": 2369 }, { "epoch": 1.6920514040932888, "grad_norm": 0.3361594080924988, "learning_rate": 4.750770571696514e-06, "loss": 0.3966, "step": 2370 }, { "epoch": 1.6927653498334125, "grad_norm": 0.359636127948761, "learning_rate": 4.746620278083764e-06, "loss": 0.4196, "step": 2371 }, { "epoch": 1.6934792955735363, "grad_norm": 0.3286466896533966, "learning_rate": 4.742470159491131e-06, "loss": 0.3171, "step": 2372 }, { "epoch": 1.69419324131366, "grad_norm": 0.3444349467754364, "learning_rate": 4.738320218785281e-06, "loss": 0.4133, "step": 2373 }, { "epoch": 1.694907187053784, "grad_norm": 0.3428444564342499, "learning_rate": 4.734170458832754e-06, "loss": 0.4067, "step": 2374 }, { "epoch": 1.6956211327939077, "grad_norm": 0.3500819206237793, "learning_rate": 4.730020882499964e-06, "loss": 0.3873, "step": 2375 }, { "epoch": 1.6963350785340314, "grad_norm": 0.32237178087234497, "learning_rate": 4.7258714926532e-06, "loss": 0.381, "step": 2376 }, { "epoch": 1.6970490242741552, "grad_norm": 0.3393082916736603, "learning_rate": 4.721722292158622e-06, "loss": 0.4434, "step": 2377 }, { "epoch": 1.697762970014279, "grad_norm": 0.33411160111427307, "learning_rate": 4.717573283882258e-06, "loss": 0.3727, "step": 2378 }, { "epoch": 1.6984769157544026, "grad_norm": 0.36042270064353943, "learning_rate": 4.713424470690004e-06, "loss": 0.4004, "step": 2379 }, { "epoch": 1.6991908614945264, "grad_norm": 0.3321034610271454, "learning_rate": 4.7092758554476215e-06, "loss": 0.412, "step": 2380 }, { "epoch": 1.6999048072346503, "grad_norm": 0.36317363381385803, "learning_rate": 4.7051274410207345e-06, "loss": 0.4599, "step": 2381 }, { "epoch": 1.700618752974774, "grad_norm": 0.3286013901233673, "learning_rate": 4.700979230274829e-06, "loss": 0.3733, "step": 2382 }, { "epoch": 1.7013326987148978, "grad_norm": 0.3682527542114258, "learning_rate": 4.696831226075252e-06, "loss": 0.4094, "step": 2383 }, { "epoch": 1.7020466444550215, "grad_norm": 0.3964429795742035, "learning_rate": 4.692683431287202e-06, "loss": 0.4374, "step": 2384 }, { "epoch": 1.7027605901951453, "grad_norm": 0.34076234698295593, "learning_rate": 4.6885358487757396e-06, "loss": 0.3888, "step": 2385 }, { "epoch": 1.703474535935269, "grad_norm": 0.3605630099773407, "learning_rate": 4.684388481405776e-06, "loss": 0.4141, "step": 2386 }, { "epoch": 1.7041884816753927, "grad_norm": 0.34162020683288574, "learning_rate": 4.680241332042072e-06, "loss": 0.3793, "step": 2387 }, { "epoch": 1.7049024274155165, "grad_norm": 0.3704993724822998, "learning_rate": 4.676094403549241e-06, "loss": 0.4189, "step": 2388 }, { "epoch": 1.7056163731556402, "grad_norm": 0.3190763294696808, "learning_rate": 4.671947698791743e-06, "loss": 0.3467, "step": 2389 }, { "epoch": 1.706330318895764, "grad_norm": 0.37106820940971375, "learning_rate": 4.66780122063388e-06, "loss": 0.439, "step": 2390 }, { "epoch": 1.7070442646358877, "grad_norm": 0.3428013324737549, "learning_rate": 4.663654971939802e-06, "loss": 0.42, "step": 2391 }, { "epoch": 1.7077582103760114, "grad_norm": 0.3192331790924072, "learning_rate": 4.6595089555735e-06, "loss": 0.397, "step": 2392 }, { "epoch": 1.7084721561161351, "grad_norm": 0.332592636346817, "learning_rate": 4.655363174398802e-06, "loss": 0.3908, "step": 2393 }, { "epoch": 1.7091861018562589, "grad_norm": 0.3250824809074402, "learning_rate": 4.651217631279374e-06, "loss": 0.3778, "step": 2394 }, { "epoch": 1.7099000475963826, "grad_norm": 0.33837831020355225, "learning_rate": 4.647072329078721e-06, "loss": 0.4307, "step": 2395 }, { "epoch": 1.7106139933365063, "grad_norm": 0.3450653851032257, "learning_rate": 4.642927270660176e-06, "loss": 0.3781, "step": 2396 }, { "epoch": 1.71132793907663, "grad_norm": 0.3521951138973236, "learning_rate": 4.638782458886908e-06, "loss": 0.3909, "step": 2397 }, { "epoch": 1.7120418848167538, "grad_norm": 0.3873155415058136, "learning_rate": 4.634637896621916e-06, "loss": 0.4433, "step": 2398 }, { "epoch": 1.7127558305568775, "grad_norm": 0.2969878613948822, "learning_rate": 4.630493586728023e-06, "loss": 0.3724, "step": 2399 }, { "epoch": 1.7134697762970015, "grad_norm": 0.3676137626171112, "learning_rate": 4.626349532067879e-06, "loss": 0.3961, "step": 2400 }, { "epoch": 1.7141837220371252, "grad_norm": 0.3561798930168152, "learning_rate": 4.6222057355039614e-06, "loss": 0.4248, "step": 2401 }, { "epoch": 1.714897667777249, "grad_norm": 0.33322757482528687, "learning_rate": 4.618062199898561e-06, "loss": 0.3618, "step": 2402 }, { "epoch": 1.7156116135173727, "grad_norm": 0.3990393579006195, "learning_rate": 4.613918928113797e-06, "loss": 0.4398, "step": 2403 }, { "epoch": 1.7163255592574964, "grad_norm": 0.3661271929740906, "learning_rate": 4.6097759230116055e-06, "loss": 0.4191, "step": 2404 }, { "epoch": 1.7170395049976201, "grad_norm": 0.34667643904685974, "learning_rate": 4.605633187453732e-06, "loss": 0.4062, "step": 2405 }, { "epoch": 1.7177534507377439, "grad_norm": 0.36835265159606934, "learning_rate": 4.601490724301738e-06, "loss": 0.4209, "step": 2406 }, { "epoch": 1.7184673964778678, "grad_norm": 0.33424878120422363, "learning_rate": 4.597348536417004e-06, "loss": 0.4084, "step": 2407 }, { "epoch": 1.7191813422179916, "grad_norm": 0.3625214695930481, "learning_rate": 4.59320662666071e-06, "loss": 0.4223, "step": 2408 }, { "epoch": 1.7198952879581153, "grad_norm": 0.36519524455070496, "learning_rate": 4.589064997893849e-06, "loss": 0.3699, "step": 2409 }, { "epoch": 1.720609233698239, "grad_norm": 0.3668571412563324, "learning_rate": 4.584923652977224e-06, "loss": 0.4295, "step": 2410 }, { "epoch": 1.7213231794383628, "grad_norm": 0.3399532735347748, "learning_rate": 4.580782594771435e-06, "loss": 0.4202, "step": 2411 }, { "epoch": 1.7220371251784865, "grad_norm": 0.37981271743774414, "learning_rate": 4.576641826136884e-06, "loss": 0.4041, "step": 2412 }, { "epoch": 1.7227510709186102, "grad_norm": 0.3738254904747009, "learning_rate": 4.572501349933778e-06, "loss": 0.4011, "step": 2413 }, { "epoch": 1.723465016658734, "grad_norm": 0.36144548654556274, "learning_rate": 4.568361169022122e-06, "loss": 0.4337, "step": 2414 }, { "epoch": 1.7241789623988577, "grad_norm": 0.340559184551239, "learning_rate": 4.564221286261709e-06, "loss": 0.4005, "step": 2415 }, { "epoch": 1.7248929081389814, "grad_norm": 0.3306848406791687, "learning_rate": 4.560081704512136e-06, "loss": 0.3343, "step": 2416 }, { "epoch": 1.7256068538791052, "grad_norm": 0.3644857704639435, "learning_rate": 4.555942426632786e-06, "loss": 0.3874, "step": 2417 }, { "epoch": 1.726320799619229, "grad_norm": 0.34086793661117554, "learning_rate": 4.551803455482833e-06, "loss": 0.4021, "step": 2418 }, { "epoch": 1.7270347453593526, "grad_norm": 0.38071125745773315, "learning_rate": 4.54766479392124e-06, "loss": 0.4351, "step": 2419 }, { "epoch": 1.7277486910994764, "grad_norm": 0.29987892508506775, "learning_rate": 4.5435264448067595e-06, "loss": 0.3707, "step": 2420 }, { "epoch": 1.7284626368396, "grad_norm": 0.34249168634414673, "learning_rate": 4.539388410997919e-06, "loss": 0.3725, "step": 2421 }, { "epoch": 1.7291765825797238, "grad_norm": 0.3889201879501343, "learning_rate": 4.53525069535304e-06, "loss": 0.4412, "step": 2422 }, { "epoch": 1.7298905283198476, "grad_norm": 0.3178822696208954, "learning_rate": 4.5311133007302145e-06, "loss": 0.3601, "step": 2423 }, { "epoch": 1.7306044740599713, "grad_norm": 0.3740610182285309, "learning_rate": 4.526976229987315e-06, "loss": 0.4467, "step": 2424 }, { "epoch": 1.731318419800095, "grad_norm": 0.32239678502082825, "learning_rate": 4.522839485981994e-06, "loss": 0.3523, "step": 2425 }, { "epoch": 1.732032365540219, "grad_norm": 0.32924291491508484, "learning_rate": 4.518703071571678e-06, "loss": 0.3858, "step": 2426 }, { "epoch": 1.7327463112803427, "grad_norm": 0.38538095355033875, "learning_rate": 4.51456698961356e-06, "loss": 0.4229, "step": 2427 }, { "epoch": 1.7334602570204665, "grad_norm": 0.3466036319732666, "learning_rate": 4.510431242964609e-06, "loss": 0.4125, "step": 2428 }, { "epoch": 1.7341742027605902, "grad_norm": 0.3497263193130493, "learning_rate": 4.506295834481561e-06, "loss": 0.4075, "step": 2429 }, { "epoch": 1.734888148500714, "grad_norm": 0.36979472637176514, "learning_rate": 4.502160767020918e-06, "loss": 0.4101, "step": 2430 }, { "epoch": 1.7356020942408377, "grad_norm": 0.3881022334098816, "learning_rate": 4.498026043438944e-06, "loss": 0.4154, "step": 2431 }, { "epoch": 1.7363160399809614, "grad_norm": 0.31425800919532776, "learning_rate": 4.493891666591672e-06, "loss": 0.3552, "step": 2432 }, { "epoch": 1.7370299857210854, "grad_norm": 0.3413712680339813, "learning_rate": 4.489757639334888e-06, "loss": 0.4178, "step": 2433 }, { "epoch": 1.737743931461209, "grad_norm": 0.37211716175079346, "learning_rate": 4.4856239645241414e-06, "loss": 0.3947, "step": 2434 }, { "epoch": 1.7384578772013328, "grad_norm": 0.3779730796813965, "learning_rate": 4.4814906450147376e-06, "loss": 0.4121, "step": 2435 }, { "epoch": 1.7391718229414566, "grad_norm": 0.3327236473560333, "learning_rate": 4.477357683661734e-06, "loss": 0.4026, "step": 2436 }, { "epoch": 1.7398857686815803, "grad_norm": 0.3717908263206482, "learning_rate": 4.4732250833199415e-06, "loss": 0.4218, "step": 2437 }, { "epoch": 1.740599714421704, "grad_norm": 0.3644159436225891, "learning_rate": 4.469092846843927e-06, "loss": 0.4047, "step": 2438 }, { "epoch": 1.7413136601618278, "grad_norm": 0.3502219319343567, "learning_rate": 4.464960977087995e-06, "loss": 0.399, "step": 2439 }, { "epoch": 1.7420276059019515, "grad_norm": 0.3129276931285858, "learning_rate": 4.460829476906208e-06, "loss": 0.3846, "step": 2440 }, { "epoch": 1.7427415516420752, "grad_norm": 0.3460855484008789, "learning_rate": 4.456698349152367e-06, "loss": 0.3776, "step": 2441 }, { "epoch": 1.743455497382199, "grad_norm": 0.34521669149398804, "learning_rate": 4.452567596680016e-06, "loss": 0.3881, "step": 2442 }, { "epoch": 1.7441694431223227, "grad_norm": 0.36042702198028564, "learning_rate": 4.448437222342441e-06, "loss": 0.4166, "step": 2443 }, { "epoch": 1.7448833888624464, "grad_norm": 0.2955246865749359, "learning_rate": 4.444307228992671e-06, "loss": 0.3353, "step": 2444 }, { "epoch": 1.7455973346025702, "grad_norm": 0.3577134609222412, "learning_rate": 4.4401776194834615e-06, "loss": 0.4594, "step": 2445 }, { "epoch": 1.746311280342694, "grad_norm": 0.317241907119751, "learning_rate": 4.436048396667312e-06, "loss": 0.4168, "step": 2446 }, { "epoch": 1.7470252260828176, "grad_norm": 0.3356296420097351, "learning_rate": 4.431919563396453e-06, "loss": 0.4159, "step": 2447 }, { "epoch": 1.7477391718229414, "grad_norm": 0.3535829782485962, "learning_rate": 4.427791122522841e-06, "loss": 0.3894, "step": 2448 }, { "epoch": 1.748453117563065, "grad_norm": 0.36325082182884216, "learning_rate": 4.4236630768981675e-06, "loss": 0.4165, "step": 2449 }, { "epoch": 1.7491670633031888, "grad_norm": 0.3590885400772095, "learning_rate": 4.4195354293738484e-06, "loss": 0.4384, "step": 2450 }, { "epoch": 1.7498810090433126, "grad_norm": 0.3331025242805481, "learning_rate": 4.415408182801027e-06, "loss": 0.3894, "step": 2451 }, { "epoch": 1.7505949547834363, "grad_norm": 0.31428200006484985, "learning_rate": 4.411281340030564e-06, "loss": 0.3577, "step": 2452 }, { "epoch": 1.7513089005235603, "grad_norm": 0.3396282494068146, "learning_rate": 4.407154903913046e-06, "loss": 0.4225, "step": 2453 }, { "epoch": 1.752022846263684, "grad_norm": 0.33742010593414307, "learning_rate": 4.4030288772987795e-06, "loss": 0.4149, "step": 2454 }, { "epoch": 1.7527367920038077, "grad_norm": 0.3010783791542053, "learning_rate": 4.398903263037783e-06, "loss": 0.379, "step": 2455 }, { "epoch": 1.7534507377439315, "grad_norm": 0.32578739523887634, "learning_rate": 4.394778063979793e-06, "loss": 0.411, "step": 2456 }, { "epoch": 1.7541646834840552, "grad_norm": 0.3173413574695587, "learning_rate": 4.390653282974264e-06, "loss": 0.3739, "step": 2457 }, { "epoch": 1.754878629224179, "grad_norm": 0.3598727285861969, "learning_rate": 4.386528922870351e-06, "loss": 0.5025, "step": 2458 }, { "epoch": 1.7555925749643029, "grad_norm": 0.34099191427230835, "learning_rate": 4.38240498651693e-06, "loss": 0.3785, "step": 2459 }, { "epoch": 1.7563065207044266, "grad_norm": 0.32162144780158997, "learning_rate": 4.3782814767625755e-06, "loss": 0.3955, "step": 2460 }, { "epoch": 1.7570204664445503, "grad_norm": 0.32530152797698975, "learning_rate": 4.37415839645557e-06, "loss": 0.3828, "step": 2461 }, { "epoch": 1.757734412184674, "grad_norm": 0.3669569194316864, "learning_rate": 4.370035748443901e-06, "loss": 0.3976, "step": 2462 }, { "epoch": 1.7584483579247978, "grad_norm": 0.3419501781463623, "learning_rate": 4.3659135355752595e-06, "loss": 0.394, "step": 2463 }, { "epoch": 1.7591623036649215, "grad_norm": 0.34542661905288696, "learning_rate": 4.361791760697027e-06, "loss": 0.4234, "step": 2464 }, { "epoch": 1.7598762494050453, "grad_norm": 0.34894928336143494, "learning_rate": 4.357670426656293e-06, "loss": 0.3809, "step": 2465 }, { "epoch": 1.760590195145169, "grad_norm": 0.3376394510269165, "learning_rate": 4.353549536299835e-06, "loss": 0.4296, "step": 2466 }, { "epoch": 1.7613041408852927, "grad_norm": 0.32814472913742065, "learning_rate": 4.349429092474127e-06, "loss": 0.3804, "step": 2467 }, { "epoch": 1.7620180866254165, "grad_norm": 0.34881216287612915, "learning_rate": 4.345309098025333e-06, "loss": 0.3831, "step": 2468 }, { "epoch": 1.7627320323655402, "grad_norm": 0.32883837819099426, "learning_rate": 4.341189555799313e-06, "loss": 0.3756, "step": 2469 }, { "epoch": 1.763445978105664, "grad_norm": 0.3483749330043793, "learning_rate": 4.337070468641604e-06, "loss": 0.412, "step": 2470 }, { "epoch": 1.7641599238457877, "grad_norm": 0.372975617647171, "learning_rate": 4.3329518393974365e-06, "loss": 0.4052, "step": 2471 }, { "epoch": 1.7648738695859114, "grad_norm": 0.33447033166885376, "learning_rate": 4.3288336709117246e-06, "loss": 0.3744, "step": 2472 }, { "epoch": 1.7655878153260351, "grad_norm": 0.35076579451560974, "learning_rate": 4.324715966029056e-06, "loss": 0.4004, "step": 2473 }, { "epoch": 1.7663017610661589, "grad_norm": 0.3350413143634796, "learning_rate": 4.320598727593709e-06, "loss": 0.4132, "step": 2474 }, { "epoch": 1.7670157068062826, "grad_norm": 0.34914371371269226, "learning_rate": 4.316481958449634e-06, "loss": 0.4099, "step": 2475 }, { "epoch": 1.7677296525464063, "grad_norm": 0.33007118105888367, "learning_rate": 4.3123656614404565e-06, "loss": 0.331, "step": 2476 }, { "epoch": 1.76844359828653, "grad_norm": 0.372249037027359, "learning_rate": 4.308249839409479e-06, "loss": 0.4703, "step": 2477 }, { "epoch": 1.7691575440266538, "grad_norm": 0.3268132209777832, "learning_rate": 4.304134495199675e-06, "loss": 0.4142, "step": 2478 }, { "epoch": 1.7698714897667778, "grad_norm": 0.3392791450023651, "learning_rate": 4.300019631653683e-06, "loss": 0.36, "step": 2479 }, { "epoch": 1.7705854355069015, "grad_norm": 0.3742843270301819, "learning_rate": 4.295905251613817e-06, "loss": 0.4241, "step": 2480 }, { "epoch": 1.7712993812470252, "grad_norm": 0.3393654227256775, "learning_rate": 4.291791357922056e-06, "loss": 0.4068, "step": 2481 }, { "epoch": 1.772013326987149, "grad_norm": 0.3616999685764313, "learning_rate": 4.2876779534200364e-06, "loss": 0.3917, "step": 2482 }, { "epoch": 1.7727272727272727, "grad_norm": 0.38674452900886536, "learning_rate": 4.283565040949063e-06, "loss": 0.4206, "step": 2483 }, { "epoch": 1.7734412184673964, "grad_norm": 0.3651033341884613, "learning_rate": 4.279452623350101e-06, "loss": 0.4222, "step": 2484 }, { "epoch": 1.7741551642075202, "grad_norm": 0.36025992035865784, "learning_rate": 4.275340703463767e-06, "loss": 0.395, "step": 2485 }, { "epoch": 1.7748691099476441, "grad_norm": 0.35400283336639404, "learning_rate": 4.271229284130341e-06, "loss": 0.4091, "step": 2486 }, { "epoch": 1.7755830556877679, "grad_norm": 0.3544522821903229, "learning_rate": 4.267118368189757e-06, "loss": 0.3926, "step": 2487 }, { "epoch": 1.7762970014278916, "grad_norm": 0.3587086796760559, "learning_rate": 4.263007958481595e-06, "loss": 0.4033, "step": 2488 }, { "epoch": 1.7770109471680153, "grad_norm": 0.37722691893577576, "learning_rate": 4.258898057845092e-06, "loss": 0.4865, "step": 2489 }, { "epoch": 1.777724892908139, "grad_norm": 0.3273109495639801, "learning_rate": 4.254788669119127e-06, "loss": 0.3859, "step": 2490 }, { "epoch": 1.7784388386482628, "grad_norm": 0.3620944917201996, "learning_rate": 4.250679795142236e-06, "loss": 0.4276, "step": 2491 }, { "epoch": 1.7791527843883865, "grad_norm": 0.3316320776939392, "learning_rate": 4.246571438752585e-06, "loss": 0.336, "step": 2492 }, { "epoch": 1.7798667301285103, "grad_norm": 0.33982622623443604, "learning_rate": 4.2424636027879926e-06, "loss": 0.3843, "step": 2493 }, { "epoch": 1.780580675868634, "grad_norm": 0.3198906481266022, "learning_rate": 4.238356290085919e-06, "loss": 0.3855, "step": 2494 }, { "epoch": 1.7812946216087577, "grad_norm": 0.3823298513889313, "learning_rate": 4.234249503483455e-06, "loss": 0.4633, "step": 2495 }, { "epoch": 1.7820085673488815, "grad_norm": 0.31077656149864197, "learning_rate": 4.230143245817332e-06, "loss": 0.3644, "step": 2496 }, { "epoch": 1.7827225130890052, "grad_norm": 0.3257931172847748, "learning_rate": 4.22603751992392e-06, "loss": 0.4068, "step": 2497 }, { "epoch": 1.783436458829129, "grad_norm": 0.3512572944164276, "learning_rate": 4.221932328639214e-06, "loss": 0.4157, "step": 2498 }, { "epoch": 1.7841504045692527, "grad_norm": 0.37562867999076843, "learning_rate": 4.217827674798845e-06, "loss": 0.4189, "step": 2499 }, { "epoch": 1.7848643503093764, "grad_norm": 0.337578147649765, "learning_rate": 4.213723561238074e-06, "loss": 0.3911, "step": 2500 }, { "epoch": 1.7855782960495001, "grad_norm": 0.36933717131614685, "learning_rate": 4.209619990791784e-06, "loss": 0.3942, "step": 2501 }, { "epoch": 1.7862922417896239, "grad_norm": 0.37764301896095276, "learning_rate": 4.205516966294484e-06, "loss": 0.4003, "step": 2502 }, { "epoch": 1.7870061875297476, "grad_norm": 0.34868499636650085, "learning_rate": 4.20141449058031e-06, "loss": 0.3913, "step": 2503 }, { "epoch": 1.7877201332698713, "grad_norm": 0.34320011734962463, "learning_rate": 4.197312566483013e-06, "loss": 0.3925, "step": 2504 }, { "epoch": 1.7884340790099953, "grad_norm": 0.3451811671257019, "learning_rate": 4.193211196835967e-06, "loss": 0.4398, "step": 2505 }, { "epoch": 1.789148024750119, "grad_norm": 0.3084588646888733, "learning_rate": 4.189110384472164e-06, "loss": 0.3507, "step": 2506 }, { "epoch": 1.7898619704902428, "grad_norm": 0.33123138546943665, "learning_rate": 4.185010132224207e-06, "loss": 0.4012, "step": 2507 }, { "epoch": 1.7905759162303665, "grad_norm": 0.3442211449146271, "learning_rate": 4.180910442924312e-06, "loss": 0.4517, "step": 2508 }, { "epoch": 1.7912898619704902, "grad_norm": 0.31605038046836853, "learning_rate": 4.176811319404314e-06, "loss": 0.3331, "step": 2509 }, { "epoch": 1.792003807710614, "grad_norm": 0.329583078622818, "learning_rate": 4.172712764495645e-06, "loss": 0.3828, "step": 2510 }, { "epoch": 1.7927177534507377, "grad_norm": 0.35639241337776184, "learning_rate": 4.1686147810293534e-06, "loss": 0.4083, "step": 2511 }, { "epoch": 1.7934316991908617, "grad_norm": 0.3272187113761902, "learning_rate": 4.164517371836094e-06, "loss": 0.3968, "step": 2512 }, { "epoch": 1.7941456449309854, "grad_norm": 0.3498457670211792, "learning_rate": 4.160420539746115e-06, "loss": 0.4546, "step": 2513 }, { "epoch": 1.7948595906711091, "grad_norm": 0.3613307774066925, "learning_rate": 4.156324287589276e-06, "loss": 0.4362, "step": 2514 }, { "epoch": 1.7955735364112329, "grad_norm": 0.2902338206768036, "learning_rate": 4.152228618195031e-06, "loss": 0.3739, "step": 2515 }, { "epoch": 1.7962874821513566, "grad_norm": 0.3426540791988373, "learning_rate": 4.148133534392431e-06, "loss": 0.444, "step": 2516 }, { "epoch": 1.7970014278914803, "grad_norm": 0.31677335500717163, "learning_rate": 4.144039039010125e-06, "loss": 0.3582, "step": 2517 }, { "epoch": 1.797715373631604, "grad_norm": 0.34726718068122864, "learning_rate": 4.139945134876358e-06, "loss": 0.4435, "step": 2518 }, { "epoch": 1.7984293193717278, "grad_norm": 0.32019302248954773, "learning_rate": 4.135851824818956e-06, "loss": 0.4059, "step": 2519 }, { "epoch": 1.7991432651118515, "grad_norm": 0.39268526434898376, "learning_rate": 4.131759111665349e-06, "loss": 0.4088, "step": 2520 }, { "epoch": 1.7998572108519753, "grad_norm": 0.34862571954727173, "learning_rate": 4.127666998242545e-06, "loss": 0.3989, "step": 2521 }, { "epoch": 1.800571156592099, "grad_norm": 0.390438437461853, "learning_rate": 4.1235754873771375e-06, "loss": 0.4393, "step": 2522 }, { "epoch": 1.8012851023322227, "grad_norm": 0.3502418100833893, "learning_rate": 4.119484581895309e-06, "loss": 0.3942, "step": 2523 }, { "epoch": 1.8019990480723465, "grad_norm": 0.3409385085105896, "learning_rate": 4.115394284622824e-06, "loss": 0.4107, "step": 2524 }, { "epoch": 1.8027129938124702, "grad_norm": 0.3645883798599243, "learning_rate": 4.111304598385019e-06, "loss": 0.4289, "step": 2525 }, { "epoch": 1.803426939552594, "grad_norm": 0.3311411142349243, "learning_rate": 4.107215526006818e-06, "loss": 0.375, "step": 2526 }, { "epoch": 1.8041408852927177, "grad_norm": 0.3695546090602875, "learning_rate": 4.103127070312713e-06, "loss": 0.4239, "step": 2527 }, { "epoch": 1.8048548310328414, "grad_norm": 0.3528454005718231, "learning_rate": 4.099039234126778e-06, "loss": 0.394, "step": 2528 }, { "epoch": 1.8055687767729651, "grad_norm": 0.36127981543540955, "learning_rate": 4.094952020272651e-06, "loss": 0.3875, "step": 2529 }, { "epoch": 1.8062827225130889, "grad_norm": 0.3554092049598694, "learning_rate": 4.090865431573547e-06, "loss": 0.4638, "step": 2530 }, { "epoch": 1.8069966682532126, "grad_norm": 0.32327067852020264, "learning_rate": 4.086779470852244e-06, "loss": 0.3602, "step": 2531 }, { "epoch": 1.8077106139933365, "grad_norm": 0.31297779083251953, "learning_rate": 4.0826941409310885e-06, "loss": 0.3722, "step": 2532 }, { "epoch": 1.8084245597334603, "grad_norm": 0.3364746868610382, "learning_rate": 4.078609444631992e-06, "loss": 0.3442, "step": 2533 }, { "epoch": 1.809138505473584, "grad_norm": 0.347519189119339, "learning_rate": 4.074525384776428e-06, "loss": 0.4161, "step": 2534 }, { "epoch": 1.8098524512137077, "grad_norm": 0.34925100207328796, "learning_rate": 4.070441964185428e-06, "loss": 0.4054, "step": 2535 }, { "epoch": 1.8105663969538315, "grad_norm": 0.34900379180908203, "learning_rate": 4.066359185679584e-06, "loss": 0.4606, "step": 2536 }, { "epoch": 1.8112803426939552, "grad_norm": 0.3364696800708771, "learning_rate": 4.062277052079047e-06, "loss": 0.3969, "step": 2537 }, { "epoch": 1.8119942884340792, "grad_norm": 0.36224013566970825, "learning_rate": 4.058195566203516e-06, "loss": 0.3958, "step": 2538 }, { "epoch": 1.812708234174203, "grad_norm": 0.3928415775299072, "learning_rate": 4.054114730872248e-06, "loss": 0.4314, "step": 2539 }, { "epoch": 1.8134221799143266, "grad_norm": 0.31084972620010376, "learning_rate": 4.0500345489040515e-06, "loss": 0.3825, "step": 2540 }, { "epoch": 1.8141361256544504, "grad_norm": 0.34033969044685364, "learning_rate": 4.045955023117276e-06, "loss": 0.4246, "step": 2541 }, { "epoch": 1.814850071394574, "grad_norm": 0.37132012844085693, "learning_rate": 4.041876156329827e-06, "loss": 0.421, "step": 2542 }, { "epoch": 1.8155640171346978, "grad_norm": 0.3730315864086151, "learning_rate": 4.037797951359151e-06, "loss": 0.3967, "step": 2543 }, { "epoch": 1.8162779628748216, "grad_norm": 0.3217448592185974, "learning_rate": 4.033720411022235e-06, "loss": 0.3743, "step": 2544 }, { "epoch": 1.8169919086149453, "grad_norm": 0.3895328938961029, "learning_rate": 4.029643538135608e-06, "loss": 0.4214, "step": 2545 }, { "epoch": 1.817705854355069, "grad_norm": 0.37244653701782227, "learning_rate": 4.025567335515343e-06, "loss": 0.4009, "step": 2546 }, { "epoch": 1.8184198000951928, "grad_norm": 0.3507479131221771, "learning_rate": 4.021491805977043e-06, "loss": 0.4027, "step": 2547 }, { "epoch": 1.8191337458353165, "grad_norm": 0.36194708943367004, "learning_rate": 4.017416952335849e-06, "loss": 0.3954, "step": 2548 }, { "epoch": 1.8198476915754402, "grad_norm": 0.36172065138816833, "learning_rate": 4.013342777406438e-06, "loss": 0.4414, "step": 2549 }, { "epoch": 1.820561637315564, "grad_norm": 0.3398033380508423, "learning_rate": 4.009269284003014e-06, "loss": 0.3477, "step": 2550 }, { "epoch": 1.8212755830556877, "grad_norm": 0.3803071677684784, "learning_rate": 4.005196474939308e-06, "loss": 0.4085, "step": 2551 }, { "epoch": 1.8219895287958114, "grad_norm": 0.3490172326564789, "learning_rate": 4.0011243530285885e-06, "loss": 0.3509, "step": 2552 }, { "epoch": 1.8227034745359352, "grad_norm": 0.41603758931159973, "learning_rate": 3.997052921083637e-06, "loss": 0.4219, "step": 2553 }, { "epoch": 1.823417420276059, "grad_norm": 0.3504122793674469, "learning_rate": 3.992982181916766e-06, "loss": 0.4129, "step": 2554 }, { "epoch": 1.8241313660161826, "grad_norm": 0.327274352312088, "learning_rate": 3.988912138339812e-06, "loss": 0.3837, "step": 2555 }, { "epoch": 1.8248453117563064, "grad_norm": 0.3749498724937439, "learning_rate": 3.98484279316412e-06, "loss": 0.3638, "step": 2556 }, { "epoch": 1.82555925749643, "grad_norm": 0.37317296862602234, "learning_rate": 3.980774149200562e-06, "loss": 0.3839, "step": 2557 }, { "epoch": 1.826273203236554, "grad_norm": 0.38238510489463806, "learning_rate": 3.976706209259526e-06, "loss": 0.4514, "step": 2558 }, { "epoch": 1.8269871489766778, "grad_norm": 0.34494420886039734, "learning_rate": 3.9726389761509055e-06, "loss": 0.4041, "step": 2559 }, { "epoch": 1.8277010947168015, "grad_norm": 0.333314448595047, "learning_rate": 3.968572452684113e-06, "loss": 0.3727, "step": 2560 }, { "epoch": 1.8284150404569253, "grad_norm": 0.37005898356437683, "learning_rate": 3.964506641668071e-06, "loss": 0.4129, "step": 2561 }, { "epoch": 1.829128986197049, "grad_norm": 0.3525717258453369, "learning_rate": 3.960441545911205e-06, "loss": 0.3775, "step": 2562 }, { "epoch": 1.8298429319371727, "grad_norm": 0.35084694623947144, "learning_rate": 3.956377168221448e-06, "loss": 0.3812, "step": 2563 }, { "epoch": 1.8305568776772967, "grad_norm": 0.3380087614059448, "learning_rate": 3.952313511406242e-06, "loss": 0.3929, "step": 2564 }, { "epoch": 1.8312708234174204, "grad_norm": 0.3470844328403473, "learning_rate": 3.948250578272522e-06, "loss": 0.3764, "step": 2565 }, { "epoch": 1.8319847691575442, "grad_norm": 0.3730376362800598, "learning_rate": 3.944188371626731e-06, "loss": 0.4307, "step": 2566 }, { "epoch": 1.832698714897668, "grad_norm": 0.32073792815208435, "learning_rate": 3.94012689427481e-06, "loss": 0.3686, "step": 2567 }, { "epoch": 1.8334126606377916, "grad_norm": 0.3110658526420593, "learning_rate": 3.936066149022191e-06, "loss": 0.3646, "step": 2568 }, { "epoch": 1.8341266063779154, "grad_norm": 0.3258930444717407, "learning_rate": 3.932006138673801e-06, "loss": 0.4172, "step": 2569 }, { "epoch": 1.834840552118039, "grad_norm": 0.3104999363422394, "learning_rate": 3.927946866034062e-06, "loss": 0.3898, "step": 2570 }, { "epoch": 1.8355544978581628, "grad_norm": 0.3800426423549652, "learning_rate": 3.923888333906891e-06, "loss": 0.4114, "step": 2571 }, { "epoch": 1.8362684435982866, "grad_norm": 0.3481191098690033, "learning_rate": 3.919830545095681e-06, "loss": 0.4057, "step": 2572 }, { "epoch": 1.8369823893384103, "grad_norm": 0.3255986273288727, "learning_rate": 3.915773502403324e-06, "loss": 0.385, "step": 2573 }, { "epoch": 1.837696335078534, "grad_norm": 0.3377329409122467, "learning_rate": 3.911717208632189e-06, "loss": 0.418, "step": 2574 }, { "epoch": 1.8384102808186578, "grad_norm": 0.32500702142715454, "learning_rate": 3.907661666584131e-06, "loss": 0.395, "step": 2575 }, { "epoch": 1.8391242265587815, "grad_norm": 0.38208460807800293, "learning_rate": 3.903606879060483e-06, "loss": 0.4094, "step": 2576 }, { "epoch": 1.8398381722989052, "grad_norm": 0.3570519685745239, "learning_rate": 3.8995528488620635e-06, "loss": 0.3939, "step": 2577 }, { "epoch": 1.840552118039029, "grad_norm": 0.3424563705921173, "learning_rate": 3.895499578789157e-06, "loss": 0.3977, "step": 2578 }, { "epoch": 1.8412660637791527, "grad_norm": 0.3924601674079895, "learning_rate": 3.8914470716415325e-06, "loss": 0.3921, "step": 2579 }, { "epoch": 1.8419800095192764, "grad_norm": 0.3678261637687683, "learning_rate": 3.887395330218429e-06, "loss": 0.3463, "step": 2580 }, { "epoch": 1.8426939552594002, "grad_norm": 0.3710252344608307, "learning_rate": 3.883344357318554e-06, "loss": 0.4216, "step": 2581 }, { "epoch": 1.843407900999524, "grad_norm": 0.34346652030944824, "learning_rate": 3.879294155740087e-06, "loss": 0.4496, "step": 2582 }, { "epoch": 1.8441218467396476, "grad_norm": 0.33560842275619507, "learning_rate": 3.875244728280676e-06, "loss": 0.3613, "step": 2583 }, { "epoch": 1.8448357924797716, "grad_norm": 0.38952013850212097, "learning_rate": 3.871196077737429e-06, "loss": 0.4163, "step": 2584 }, { "epoch": 1.8455497382198953, "grad_norm": 0.35172751545906067, "learning_rate": 3.867148206906924e-06, "loss": 0.363, "step": 2585 }, { "epoch": 1.846263683960019, "grad_norm": 0.3174920678138733, "learning_rate": 3.863101118585193e-06, "loss": 0.3825, "step": 2586 }, { "epoch": 1.8469776297001428, "grad_norm": 0.32433655858039856, "learning_rate": 3.859054815567736e-06, "loss": 0.4214, "step": 2587 }, { "epoch": 1.8476915754402665, "grad_norm": 0.31507205963134766, "learning_rate": 3.855009300649502e-06, "loss": 0.3814, "step": 2588 }, { "epoch": 1.8484055211803903, "grad_norm": 0.32086804509162903, "learning_rate": 3.850964576624904e-06, "loss": 0.3818, "step": 2589 }, { "epoch": 1.849119466920514, "grad_norm": 0.3562743365764618, "learning_rate": 3.8469206462878e-06, "loss": 0.4317, "step": 2590 }, { "epoch": 1.849833412660638, "grad_norm": 0.3326963484287262, "learning_rate": 3.842877512431506e-06, "loss": 0.3941, "step": 2591 }, { "epoch": 1.8505473584007617, "grad_norm": 0.33907628059387207, "learning_rate": 3.8388351778487884e-06, "loss": 0.4116, "step": 2592 }, { "epoch": 1.8512613041408854, "grad_norm": 0.337594211101532, "learning_rate": 3.8347936453318555e-06, "loss": 0.3852, "step": 2593 }, { "epoch": 1.8519752498810091, "grad_norm": 0.3695349097251892, "learning_rate": 3.830752917672366e-06, "loss": 0.4317, "step": 2594 }, { "epoch": 1.8526891956211329, "grad_norm": 0.3424777686595917, "learning_rate": 3.826712997661426e-06, "loss": 0.3836, "step": 2595 }, { "epoch": 1.8534031413612566, "grad_norm": 0.33053985238075256, "learning_rate": 3.822673888089572e-06, "loss": 0.3808, "step": 2596 }, { "epoch": 1.8541170871013803, "grad_norm": 0.318733811378479, "learning_rate": 3.818635591746794e-06, "loss": 0.4063, "step": 2597 }, { "epoch": 1.854831032841504, "grad_norm": 0.363445907831192, "learning_rate": 3.8145981114225135e-06, "loss": 0.4066, "step": 2598 }, { "epoch": 1.8555449785816278, "grad_norm": 0.3627270460128784, "learning_rate": 3.8105614499055886e-06, "loss": 0.4131, "step": 2599 }, { "epoch": 1.8562589243217515, "grad_norm": 0.3468954563140869, "learning_rate": 3.806525609984312e-06, "loss": 0.4218, "step": 2600 }, { "epoch": 1.8569728700618753, "grad_norm": 0.30909064412117004, "learning_rate": 3.8024905944464118e-06, "loss": 0.3565, "step": 2601 }, { "epoch": 1.857686815801999, "grad_norm": 0.3370901644229889, "learning_rate": 3.7984564060790415e-06, "loss": 0.4062, "step": 2602 }, { "epoch": 1.8584007615421227, "grad_norm": 0.35261860489845276, "learning_rate": 3.794423047668787e-06, "loss": 0.4236, "step": 2603 }, { "epoch": 1.8591147072822465, "grad_norm": 0.3496216833591461, "learning_rate": 3.790390522001662e-06, "loss": 0.3924, "step": 2604 }, { "epoch": 1.8598286530223702, "grad_norm": 0.3503456115722656, "learning_rate": 3.7863588318631017e-06, "loss": 0.3538, "step": 2605 }, { "epoch": 1.860542598762494, "grad_norm": 0.36206671595573425, "learning_rate": 3.7823279800379636e-06, "loss": 0.4035, "step": 2606 }, { "epoch": 1.8612565445026177, "grad_norm": 0.3261309862136841, "learning_rate": 3.778297969310529e-06, "loss": 0.4179, "step": 2607 }, { "epoch": 1.8619704902427414, "grad_norm": 0.31791186332702637, "learning_rate": 3.7742688024645007e-06, "loss": 0.3627, "step": 2608 }, { "epoch": 1.8626844359828651, "grad_norm": 0.35057365894317627, "learning_rate": 3.7702404822829903e-06, "loss": 0.3795, "step": 2609 }, { "epoch": 1.863398381722989, "grad_norm": 0.3621506989002228, "learning_rate": 3.7662130115485317e-06, "loss": 0.4575, "step": 2610 }, { "epoch": 1.8641123274631128, "grad_norm": 0.3776465952396393, "learning_rate": 3.7621863930430715e-06, "loss": 0.4033, "step": 2611 }, { "epoch": 1.8648262732032366, "grad_norm": 0.3196949362754822, "learning_rate": 3.7581606295479605e-06, "loss": 0.3766, "step": 2612 }, { "epoch": 1.8655402189433603, "grad_norm": 0.3334537446498871, "learning_rate": 3.754135723843968e-06, "loss": 0.3931, "step": 2613 }, { "epoch": 1.866254164683484, "grad_norm": 0.3653191030025482, "learning_rate": 3.7501116787112685e-06, "loss": 0.4233, "step": 2614 }, { "epoch": 1.8669681104236078, "grad_norm": 0.359526127576828, "learning_rate": 3.7460884969294363e-06, "loss": 0.3985, "step": 2615 }, { "epoch": 1.8676820561637315, "grad_norm": 0.3111197352409363, "learning_rate": 3.7420661812774577e-06, "loss": 0.4144, "step": 2616 }, { "epoch": 1.8683960019038555, "grad_norm": 0.3205115795135498, "learning_rate": 3.738044734533716e-06, "loss": 0.4364, "step": 2617 }, { "epoch": 1.8691099476439792, "grad_norm": 0.3079449534416199, "learning_rate": 3.7340241594759917e-06, "loss": 0.3819, "step": 2618 }, { "epoch": 1.869823893384103, "grad_norm": 0.3420986533164978, "learning_rate": 3.7300044588814692e-06, "loss": 0.4023, "step": 2619 }, { "epoch": 1.8705378391242267, "grad_norm": 0.31430569291114807, "learning_rate": 3.7259856355267275e-06, "loss": 0.3692, "step": 2620 }, { "epoch": 1.8712517848643504, "grad_norm": 0.3226533830165863, "learning_rate": 3.7219676921877358e-06, "loss": 0.3461, "step": 2621 }, { "epoch": 1.8719657306044741, "grad_norm": 0.34140661358833313, "learning_rate": 3.7179506316398584e-06, "loss": 0.4161, "step": 2622 }, { "epoch": 1.8726796763445979, "grad_norm": 0.33984991908073425, "learning_rate": 3.7139344566578517e-06, "loss": 0.4507, "step": 2623 }, { "epoch": 1.8733936220847216, "grad_norm": 0.29591548442840576, "learning_rate": 3.7099191700158545e-06, "loss": 0.3597, "step": 2624 }, { "epoch": 1.8741075678248453, "grad_norm": 0.3785572648048401, "learning_rate": 3.705904774487396e-06, "loss": 0.4139, "step": 2625 }, { "epoch": 1.874821513564969, "grad_norm": 0.3536435663700104, "learning_rate": 3.7018912728453937e-06, "loss": 0.3847, "step": 2626 }, { "epoch": 1.8755354593050928, "grad_norm": 0.32978391647338867, "learning_rate": 3.697878667862138e-06, "loss": 0.3913, "step": 2627 }, { "epoch": 1.8762494050452165, "grad_norm": 0.34351620078086853, "learning_rate": 3.6938669623093086e-06, "loss": 0.4068, "step": 2628 }, { "epoch": 1.8769633507853403, "grad_norm": 0.3169800937175751, "learning_rate": 3.6898561589579612e-06, "loss": 0.3521, "step": 2629 }, { "epoch": 1.877677296525464, "grad_norm": 0.39101311564445496, "learning_rate": 3.685846260578524e-06, "loss": 0.4162, "step": 2630 }, { "epoch": 1.8783912422655877, "grad_norm": 0.36165180802345276, "learning_rate": 3.6818372699408067e-06, "loss": 0.4331, "step": 2631 }, { "epoch": 1.8791051880057115, "grad_norm": 0.3273307681083679, "learning_rate": 3.6778291898139907e-06, "loss": 0.4017, "step": 2632 }, { "epoch": 1.8798191337458352, "grad_norm": 0.33569034934043884, "learning_rate": 3.673822022966622e-06, "loss": 0.4074, "step": 2633 }, { "epoch": 1.880533079485959, "grad_norm": 0.33353856205940247, "learning_rate": 3.669815772166625e-06, "loss": 0.3566, "step": 2634 }, { "epoch": 1.8812470252260827, "grad_norm": 0.39036422967910767, "learning_rate": 3.6658104401812857e-06, "loss": 0.3953, "step": 2635 }, { "epoch": 1.8819609709662064, "grad_norm": 0.32230857014656067, "learning_rate": 3.661806029777257e-06, "loss": 0.3916, "step": 2636 }, { "epoch": 1.8826749167063304, "grad_norm": 0.36008816957473755, "learning_rate": 3.6578025437205535e-06, "loss": 0.435, "step": 2637 }, { "epoch": 1.883388862446454, "grad_norm": 0.31351229548454285, "learning_rate": 3.6537999847765556e-06, "loss": 0.362, "step": 2638 }, { "epoch": 1.8841028081865778, "grad_norm": 0.3540533483028412, "learning_rate": 3.649798355709997e-06, "loss": 0.4151, "step": 2639 }, { "epoch": 1.8848167539267016, "grad_norm": 0.35034236311912537, "learning_rate": 3.6457976592849753e-06, "loss": 0.3855, "step": 2640 }, { "epoch": 1.8855306996668253, "grad_norm": 0.35808390378952026, "learning_rate": 3.6417978982649403e-06, "loss": 0.3939, "step": 2641 }, { "epoch": 1.886244645406949, "grad_norm": 0.3309548795223236, "learning_rate": 3.6377990754126967e-06, "loss": 0.3875, "step": 2642 }, { "epoch": 1.886958591147073, "grad_norm": 0.3313486874103546, "learning_rate": 3.6338011934904006e-06, "loss": 0.4178, "step": 2643 }, { "epoch": 1.8876725368871967, "grad_norm": 0.31396785378456116, "learning_rate": 3.6298042552595604e-06, "loss": 0.3614, "step": 2644 }, { "epoch": 1.8883864826273205, "grad_norm": 0.36250850558280945, "learning_rate": 3.6258082634810267e-06, "loss": 0.438, "step": 2645 }, { "epoch": 1.8891004283674442, "grad_norm": 0.3316774070262909, "learning_rate": 3.6218132209150047e-06, "loss": 0.4037, "step": 2646 }, { "epoch": 1.889814374107568, "grad_norm": 0.3296377658843994, "learning_rate": 3.6178191303210374e-06, "loss": 0.3627, "step": 2647 }, { "epoch": 1.8905283198476917, "grad_norm": 0.3638305962085724, "learning_rate": 3.613825994458016e-06, "loss": 0.4037, "step": 2648 }, { "epoch": 1.8912422655878154, "grad_norm": 0.3395373225212097, "learning_rate": 3.609833816084163e-06, "loss": 0.426, "step": 2649 }, { "epoch": 1.8919562113279391, "grad_norm": 0.3101867735385895, "learning_rate": 3.6058425979570482e-06, "loss": 0.3803, "step": 2650 }, { "epoch": 1.8926701570680629, "grad_norm": 0.3457593023777008, "learning_rate": 3.601852342833578e-06, "loss": 0.4133, "step": 2651 }, { "epoch": 1.8933841028081866, "grad_norm": 0.30962780117988586, "learning_rate": 3.5978630534699873e-06, "loss": 0.4132, "step": 2652 }, { "epoch": 1.8940980485483103, "grad_norm": 0.32597219944000244, "learning_rate": 3.593874732621847e-06, "loss": 0.4209, "step": 2653 }, { "epoch": 1.894811994288434, "grad_norm": 0.3234803378582001, "learning_rate": 3.5898873830440633e-06, "loss": 0.3887, "step": 2654 }, { "epoch": 1.8955259400285578, "grad_norm": 0.3560456931591034, "learning_rate": 3.585901007490863e-06, "loss": 0.4052, "step": 2655 }, { "epoch": 1.8962398857686815, "grad_norm": 0.3263135552406311, "learning_rate": 3.5819156087158075e-06, "loss": 0.3626, "step": 2656 }, { "epoch": 1.8969538315088053, "grad_norm": 0.3771098554134369, "learning_rate": 3.5779311894717817e-06, "loss": 0.4695, "step": 2657 }, { "epoch": 1.897667777248929, "grad_norm": 0.32952868938446045, "learning_rate": 3.57394775251099e-06, "loss": 0.3925, "step": 2658 }, { "epoch": 1.8983817229890527, "grad_norm": 0.3412013649940491, "learning_rate": 3.569965300584963e-06, "loss": 0.3766, "step": 2659 }, { "epoch": 1.8990956687291765, "grad_norm": 0.3511653542518616, "learning_rate": 3.5659838364445505e-06, "loss": 0.4095, "step": 2660 }, { "epoch": 1.8998096144693002, "grad_norm": 0.3400176465511322, "learning_rate": 3.562003362839914e-06, "loss": 0.3478, "step": 2661 }, { "epoch": 1.900523560209424, "grad_norm": 0.3423818349838257, "learning_rate": 3.558023882520539e-06, "loss": 0.3965, "step": 2662 }, { "epoch": 1.9012375059495479, "grad_norm": 0.354904443025589, "learning_rate": 3.554045398235223e-06, "loss": 0.4021, "step": 2663 }, { "epoch": 1.9019514516896716, "grad_norm": 0.32639190554618835, "learning_rate": 3.550067912732069e-06, "loss": 0.3389, "step": 2664 }, { "epoch": 1.9026653974297953, "grad_norm": 0.3169628977775574, "learning_rate": 3.546091428758499e-06, "loss": 0.3842, "step": 2665 }, { "epoch": 1.903379343169919, "grad_norm": 0.3665022552013397, "learning_rate": 3.542115949061239e-06, "loss": 0.4283, "step": 2666 }, { "epoch": 1.9040932889100428, "grad_norm": 0.31405603885650635, "learning_rate": 3.538141476386317e-06, "loss": 0.3872, "step": 2667 }, { "epoch": 1.9048072346501665, "grad_norm": 0.3408525288105011, "learning_rate": 3.5341680134790732e-06, "loss": 0.428, "step": 2668 }, { "epoch": 1.9055211803902905, "grad_norm": 0.3233490586280823, "learning_rate": 3.5301955630841487e-06, "loss": 0.3777, "step": 2669 }, { "epoch": 1.9062351261304142, "grad_norm": 0.34976205229759216, "learning_rate": 3.526224127945479e-06, "loss": 0.4325, "step": 2670 }, { "epoch": 1.906949071870538, "grad_norm": 0.32051005959510803, "learning_rate": 3.5222537108063065e-06, "loss": 0.369, "step": 2671 }, { "epoch": 1.9076630176106617, "grad_norm": 0.33221110701560974, "learning_rate": 3.518284314409166e-06, "loss": 0.3945, "step": 2672 }, { "epoch": 1.9083769633507854, "grad_norm": 0.34792762994766235, "learning_rate": 3.5143159414958854e-06, "loss": 0.4045, "step": 2673 }, { "epoch": 1.9090909090909092, "grad_norm": 0.30886080861091614, "learning_rate": 3.51034859480759e-06, "loss": 0.3978, "step": 2674 }, { "epoch": 1.909804854831033, "grad_norm": 0.3593558371067047, "learning_rate": 3.5063822770846965e-06, "loss": 0.4079, "step": 2675 }, { "epoch": 1.9105188005711566, "grad_norm": 0.3191942274570465, "learning_rate": 3.502416991066904e-06, "loss": 0.3841, "step": 2676 }, { "epoch": 1.9112327463112804, "grad_norm": 0.3379144072532654, "learning_rate": 3.4984527394932076e-06, "loss": 0.4044, "step": 2677 }, { "epoch": 1.911946692051404, "grad_norm": 0.3301568925380707, "learning_rate": 3.4944895251018847e-06, "loss": 0.4073, "step": 2678 }, { "epoch": 1.9126606377915278, "grad_norm": 0.31947019696235657, "learning_rate": 3.4905273506304904e-06, "loss": 0.4058, "step": 2679 }, { "epoch": 1.9133745835316516, "grad_norm": 0.31290411949157715, "learning_rate": 3.4865662188158713e-06, "loss": 0.4123, "step": 2680 }, { "epoch": 1.9140885292717753, "grad_norm": 0.31579068303108215, "learning_rate": 3.4826061323941483e-06, "loss": 0.384, "step": 2681 }, { "epoch": 1.914802475011899, "grad_norm": 0.33633798360824585, "learning_rate": 3.478647094100719e-06, "loss": 0.4159, "step": 2682 }, { "epoch": 1.9155164207520228, "grad_norm": 0.308641254901886, "learning_rate": 3.474689106670261e-06, "loss": 0.3937, "step": 2683 }, { "epoch": 1.9162303664921465, "grad_norm": 0.34956514835357666, "learning_rate": 3.470732172836725e-06, "loss": 0.4157, "step": 2684 }, { "epoch": 1.9169443122322702, "grad_norm": 0.3082904517650604, "learning_rate": 3.4667762953333296e-06, "loss": 0.4107, "step": 2685 }, { "epoch": 1.917658257972394, "grad_norm": 0.3159489035606384, "learning_rate": 3.4628214768925683e-06, "loss": 0.3868, "step": 2686 }, { "epoch": 1.9183722037125177, "grad_norm": 0.3198615610599518, "learning_rate": 3.4588677202462035e-06, "loss": 0.4145, "step": 2687 }, { "epoch": 1.9190861494526414, "grad_norm": 0.3316042125225067, "learning_rate": 3.4549150281252635e-06, "loss": 0.3755, "step": 2688 }, { "epoch": 1.9198000951927654, "grad_norm": 0.3327986001968384, "learning_rate": 3.4509634032600383e-06, "loss": 0.3871, "step": 2689 }, { "epoch": 1.9205140409328891, "grad_norm": 0.3489210903644562, "learning_rate": 3.4470128483800813e-06, "loss": 0.4289, "step": 2690 }, { "epoch": 1.9212279866730129, "grad_norm": 0.3444793224334717, "learning_rate": 3.443063366214212e-06, "loss": 0.4237, "step": 2691 }, { "epoch": 1.9219419324131366, "grad_norm": 0.28600072860717773, "learning_rate": 3.4391149594905015e-06, "loss": 0.3521, "step": 2692 }, { "epoch": 1.9226558781532603, "grad_norm": 0.34374305605888367, "learning_rate": 3.4351676309362847e-06, "loss": 0.4072, "step": 2693 }, { "epoch": 1.923369823893384, "grad_norm": 0.35586097836494446, "learning_rate": 3.4312213832781487e-06, "loss": 0.4179, "step": 2694 }, { "epoch": 1.9240837696335078, "grad_norm": 0.3325978219509125, "learning_rate": 3.427276219241933e-06, "loss": 0.4076, "step": 2695 }, { "epoch": 1.9247977153736318, "grad_norm": 0.3291131258010864, "learning_rate": 3.4233321415527275e-06, "loss": 0.4379, "step": 2696 }, { "epoch": 1.9255116611137555, "grad_norm": 0.3179616928100586, "learning_rate": 3.41938915293488e-06, "loss": 0.3511, "step": 2697 }, { "epoch": 1.9262256068538792, "grad_norm": 0.33295807242393494, "learning_rate": 3.4154472561119734e-06, "loss": 0.3968, "step": 2698 }, { "epoch": 1.926939552594003, "grad_norm": 0.34130918979644775, "learning_rate": 3.4115064538068453e-06, "loss": 0.4262, "step": 2699 }, { "epoch": 1.9276534983341267, "grad_norm": 0.3363751769065857, "learning_rate": 3.4075667487415785e-06, "loss": 0.4047, "step": 2700 }, { "epoch": 1.9283674440742504, "grad_norm": 0.3074435293674469, "learning_rate": 3.4036281436374906e-06, "loss": 0.3751, "step": 2701 }, { "epoch": 1.9290813898143742, "grad_norm": 0.31051886081695557, "learning_rate": 3.399690641215142e-06, "loss": 0.3881, "step": 2702 }, { "epoch": 1.929795335554498, "grad_norm": 0.3172960579395294, "learning_rate": 3.3957542441943375e-06, "loss": 0.3941, "step": 2703 }, { "epoch": 1.9305092812946216, "grad_norm": 0.32382404804229736, "learning_rate": 3.391818955294108e-06, "loss": 0.4251, "step": 2704 }, { "epoch": 1.9312232270347454, "grad_norm": 0.2926674485206604, "learning_rate": 3.3878847772327273e-06, "loss": 0.3885, "step": 2705 }, { "epoch": 1.931937172774869, "grad_norm": 0.3440132737159729, "learning_rate": 3.383951712727701e-06, "loss": 0.4305, "step": 2706 }, { "epoch": 1.9326511185149928, "grad_norm": 0.3291775584220886, "learning_rate": 3.38001976449576e-06, "loss": 0.3837, "step": 2707 }, { "epoch": 1.9333650642551166, "grad_norm": 0.34621894359588623, "learning_rate": 3.376088935252868e-06, "loss": 0.4357, "step": 2708 }, { "epoch": 1.9340790099952403, "grad_norm": 0.3427930772304535, "learning_rate": 3.372159227714218e-06, "loss": 0.393, "step": 2709 }, { "epoch": 1.934792955735364, "grad_norm": 0.3142802119255066, "learning_rate": 3.3682306445942224e-06, "loss": 0.3867, "step": 2710 }, { "epoch": 1.9355069014754878, "grad_norm": 0.3301985561847687, "learning_rate": 3.3643031886065224e-06, "loss": 0.3791, "step": 2711 }, { "epoch": 1.9362208472156115, "grad_norm": 0.4013395607471466, "learning_rate": 3.3603768624639786e-06, "loss": 0.3992, "step": 2712 }, { "epoch": 1.9369347929557352, "grad_norm": 0.34850311279296875, "learning_rate": 3.3564516688786696e-06, "loss": 0.3817, "step": 2713 }, { "epoch": 1.937648738695859, "grad_norm": 0.3343808054924011, "learning_rate": 3.352527610561894e-06, "loss": 0.4355, "step": 2714 }, { "epoch": 1.938362684435983, "grad_norm": 0.30471181869506836, "learning_rate": 3.3486046902241663e-06, "loss": 0.357, "step": 2715 }, { "epoch": 1.9390766301761067, "grad_norm": 0.34242889285087585, "learning_rate": 3.3446829105752103e-06, "loss": 0.3988, "step": 2716 }, { "epoch": 1.9397905759162304, "grad_norm": 0.36961522698402405, "learning_rate": 3.340762274323968e-06, "loss": 0.4129, "step": 2717 }, { "epoch": 1.9405045216563541, "grad_norm": 0.37949737906455994, "learning_rate": 3.336842784178591e-06, "loss": 0.406, "step": 2718 }, { "epoch": 1.9412184673964779, "grad_norm": 0.36104169487953186, "learning_rate": 3.3329244428464335e-06, "loss": 0.4061, "step": 2719 }, { "epoch": 1.9419324131366016, "grad_norm": 0.3235034942626953, "learning_rate": 3.3290072530340628e-06, "loss": 0.3857, "step": 2720 }, { "epoch": 1.9426463588767253, "grad_norm": 0.3346574306488037, "learning_rate": 3.325091217447248e-06, "loss": 0.3552, "step": 2721 }, { "epoch": 1.9433603046168493, "grad_norm": 0.35284703969955444, "learning_rate": 3.3211763387909585e-06, "loss": 0.3763, "step": 2722 }, { "epoch": 1.944074250356973, "grad_norm": 0.340466171503067, "learning_rate": 3.317262619769368e-06, "loss": 0.4068, "step": 2723 }, { "epoch": 1.9447881960970967, "grad_norm": 0.30032333731651306, "learning_rate": 3.3133500630858507e-06, "loss": 0.3876, "step": 2724 }, { "epoch": 1.9455021418372205, "grad_norm": 0.3499583601951599, "learning_rate": 3.3094386714429726e-06, "loss": 0.4344, "step": 2725 }, { "epoch": 1.9462160875773442, "grad_norm": 0.34192773699760437, "learning_rate": 3.3055284475424987e-06, "loss": 0.4092, "step": 2726 }, { "epoch": 1.946930033317468, "grad_norm": 0.31276407837867737, "learning_rate": 3.3016193940853857e-06, "loss": 0.3434, "step": 2727 }, { "epoch": 1.9476439790575917, "grad_norm": 0.3291288912296295, "learning_rate": 3.297711513771786e-06, "loss": 0.4044, "step": 2728 }, { "epoch": 1.9483579247977154, "grad_norm": 0.309345006942749, "learning_rate": 3.2938048093010326e-06, "loss": 0.4035, "step": 2729 }, { "epoch": 1.9490718705378391, "grad_norm": 0.354292631149292, "learning_rate": 3.289899283371657e-06, "loss": 0.412, "step": 2730 }, { "epoch": 1.9497858162779629, "grad_norm": 0.33752211928367615, "learning_rate": 3.285994938681369e-06, "loss": 0.401, "step": 2731 }, { "epoch": 1.9504997620180866, "grad_norm": 0.34491583704948425, "learning_rate": 3.2820917779270657e-06, "loss": 0.4043, "step": 2732 }, { "epoch": 1.9512137077582103, "grad_norm": 0.3418349623680115, "learning_rate": 3.2781898038048242e-06, "loss": 0.3856, "step": 2733 }, { "epoch": 1.951927653498334, "grad_norm": 0.36076417565345764, "learning_rate": 3.2742890190099075e-06, "loss": 0.3973, "step": 2734 }, { "epoch": 1.9526415992384578, "grad_norm": 0.3258986473083496, "learning_rate": 3.2703894262367487e-06, "loss": 0.3582, "step": 2735 }, { "epoch": 1.9533555449785815, "grad_norm": 0.32206496596336365, "learning_rate": 3.266491028178964e-06, "loss": 0.3754, "step": 2736 }, { "epoch": 1.9540694907187053, "grad_norm": 0.3765884041786194, "learning_rate": 3.2625938275293436e-06, "loss": 0.4496, "step": 2737 }, { "epoch": 1.954783436458829, "grad_norm": 0.3062964677810669, "learning_rate": 3.258697826979847e-06, "loss": 0.3738, "step": 2738 }, { "epoch": 1.9554973821989527, "grad_norm": 0.331343412399292, "learning_rate": 3.2548030292216067e-06, "loss": 0.4347, "step": 2739 }, { "epoch": 1.9562113279390765, "grad_norm": 0.353507936000824, "learning_rate": 3.250909436944928e-06, "loss": 0.3856, "step": 2740 }, { "epoch": 1.9569252736792002, "grad_norm": 0.34858042001724243, "learning_rate": 3.2470170528392754e-06, "loss": 0.4296, "step": 2741 }, { "epoch": 1.9576392194193242, "grad_norm": 0.32925644516944885, "learning_rate": 3.2431258795932863e-06, "loss": 0.392, "step": 2742 }, { "epoch": 1.958353165159448, "grad_norm": 0.35432252287864685, "learning_rate": 3.239235919894761e-06, "loss": 0.3873, "step": 2743 }, { "epoch": 1.9590671108995716, "grad_norm": 0.40140846371650696, "learning_rate": 3.2353471764306567e-06, "loss": 0.4303, "step": 2744 }, { "epoch": 1.9597810566396954, "grad_norm": 0.3634323179721832, "learning_rate": 3.231459651887093e-06, "loss": 0.3649, "step": 2745 }, { "epoch": 1.960495002379819, "grad_norm": 0.3198097050189972, "learning_rate": 3.2275733489493517e-06, "loss": 0.395, "step": 2746 }, { "epoch": 1.9612089481199428, "grad_norm": 0.36029326915740967, "learning_rate": 3.2236882703018624e-06, "loss": 0.4312, "step": 2747 }, { "epoch": 1.9619228938600668, "grad_norm": 0.3343370854854584, "learning_rate": 3.219804418628216e-06, "loss": 0.412, "step": 2748 }, { "epoch": 1.9626368396001905, "grad_norm": 0.3367076516151428, "learning_rate": 3.2159217966111557e-06, "loss": 0.4422, "step": 2749 }, { "epoch": 1.9633507853403143, "grad_norm": 0.343839168548584, "learning_rate": 3.2120404069325695e-06, "loss": 0.4009, "step": 2750 }, { "epoch": 1.964064731080438, "grad_norm": 0.38663601875305176, "learning_rate": 3.2081602522734987e-06, "loss": 0.398, "step": 2751 }, { "epoch": 1.9647786768205617, "grad_norm": 0.35041967034339905, "learning_rate": 3.2042813353141333e-06, "loss": 0.408, "step": 2752 }, { "epoch": 1.9654926225606855, "grad_norm": 0.31860053539276123, "learning_rate": 3.200403658733802e-06, "loss": 0.3469, "step": 2753 }, { "epoch": 1.9662065683008092, "grad_norm": 0.3243362605571747, "learning_rate": 3.1965272252109817e-06, "loss": 0.3997, "step": 2754 }, { "epoch": 1.966920514040933, "grad_norm": 0.32138147950172424, "learning_rate": 3.1926520374232928e-06, "loss": 0.3739, "step": 2755 }, { "epoch": 1.9676344597810567, "grad_norm": 0.3615894317626953, "learning_rate": 3.188778098047487e-06, "loss": 0.3998, "step": 2756 }, { "epoch": 1.9683484055211804, "grad_norm": 0.3547836244106293, "learning_rate": 3.18490540975946e-06, "loss": 0.4003, "step": 2757 }, { "epoch": 1.9690623512613041, "grad_norm": 0.32483166456222534, "learning_rate": 3.1810339752342446e-06, "loss": 0.3847, "step": 2758 }, { "epoch": 1.9697762970014279, "grad_norm": 0.3487589657306671, "learning_rate": 3.1771637971460002e-06, "loss": 0.4183, "step": 2759 }, { "epoch": 1.9704902427415516, "grad_norm": 0.29587888717651367, "learning_rate": 3.173294878168025e-06, "loss": 0.3467, "step": 2760 }, { "epoch": 1.9712041884816753, "grad_norm": 0.3272700309753418, "learning_rate": 3.1694272209727476e-06, "loss": 0.4394, "step": 2761 }, { "epoch": 1.971918134221799, "grad_norm": 0.34951549768447876, "learning_rate": 3.1655608282317207e-06, "loss": 0.3702, "step": 2762 }, { "epoch": 1.9726320799619228, "grad_norm": 0.35404255986213684, "learning_rate": 3.161695702615625e-06, "loss": 0.4222, "step": 2763 }, { "epoch": 1.9733460257020465, "grad_norm": 0.3310895264148712, "learning_rate": 3.1578318467942672e-06, "loss": 0.4073, "step": 2764 }, { "epoch": 1.9740599714421703, "grad_norm": 0.3019251823425293, "learning_rate": 3.1539692634365788e-06, "loss": 0.3671, "step": 2765 }, { "epoch": 1.974773917182294, "grad_norm": 0.3396937847137451, "learning_rate": 3.150107955210606e-06, "loss": 0.4093, "step": 2766 }, { "epoch": 1.9754878629224177, "grad_norm": 0.32615596055984497, "learning_rate": 3.146247924783521e-06, "loss": 0.3885, "step": 2767 }, { "epoch": 1.9762018086625417, "grad_norm": 0.30689287185668945, "learning_rate": 3.1423891748216106e-06, "loss": 0.3747, "step": 2768 }, { "epoch": 1.9769157544026654, "grad_norm": 0.3135201036930084, "learning_rate": 3.1385317079902743e-06, "loss": 0.4127, "step": 2769 }, { "epoch": 1.9776297001427892, "grad_norm": 0.34364086389541626, "learning_rate": 3.1346755269540303e-06, "loss": 0.3895, "step": 2770 }, { "epoch": 1.978343645882913, "grad_norm": 0.31011825799942017, "learning_rate": 3.1308206343765073e-06, "loss": 0.3878, "step": 2771 }, { "epoch": 1.9790575916230366, "grad_norm": 0.3036888539791107, "learning_rate": 3.12696703292044e-06, "loss": 0.3661, "step": 2772 }, { "epoch": 1.9797715373631604, "grad_norm": 0.34809258580207825, "learning_rate": 3.123114725247677e-06, "loss": 0.4243, "step": 2773 }, { "epoch": 1.9804854831032843, "grad_norm": 0.3421609401702881, "learning_rate": 3.1192637140191694e-06, "loss": 0.3764, "step": 2774 }, { "epoch": 1.981199428843408, "grad_norm": 0.3157767057418823, "learning_rate": 3.1154140018949743e-06, "loss": 0.362, "step": 2775 }, { "epoch": 1.9819133745835318, "grad_norm": 0.30387505888938904, "learning_rate": 3.1115655915342497e-06, "loss": 0.3837, "step": 2776 }, { "epoch": 1.9826273203236555, "grad_norm": 0.3134278357028961, "learning_rate": 3.107718485595259e-06, "loss": 0.3866, "step": 2777 }, { "epoch": 1.9833412660637793, "grad_norm": 0.3175162374973297, "learning_rate": 3.1038726867353587e-06, "loss": 0.3713, "step": 2778 }, { "epoch": 1.984055211803903, "grad_norm": 0.3354339003562927, "learning_rate": 3.100028197611006e-06, "loss": 0.4085, "step": 2779 }, { "epoch": 1.9847691575440267, "grad_norm": 0.3511711359024048, "learning_rate": 3.0961850208777527e-06, "loss": 0.4145, "step": 2780 }, { "epoch": 1.9854831032841505, "grad_norm": 0.32675349712371826, "learning_rate": 3.092343159190244e-06, "loss": 0.3797, "step": 2781 }, { "epoch": 1.9861970490242742, "grad_norm": 0.355646014213562, "learning_rate": 3.0885026152022156e-06, "loss": 0.4173, "step": 2782 }, { "epoch": 1.986910994764398, "grad_norm": 0.314433753490448, "learning_rate": 3.084663391566497e-06, "loss": 0.3851, "step": 2783 }, { "epoch": 1.9876249405045217, "grad_norm": 0.3188086450099945, "learning_rate": 3.0808254909349987e-06, "loss": 0.3914, "step": 2784 }, { "epoch": 1.9883388862446454, "grad_norm": 0.35387277603149414, "learning_rate": 3.0769889159587253e-06, "loss": 0.4096, "step": 2785 }, { "epoch": 1.9890528319847691, "grad_norm": 0.3307502269744873, "learning_rate": 3.0731536692877596e-06, "loss": 0.4056, "step": 2786 }, { "epoch": 1.9897667777248929, "grad_norm": 0.3306613266468048, "learning_rate": 3.0693197535712695e-06, "loss": 0.3847, "step": 2787 }, { "epoch": 1.9904807234650166, "grad_norm": 0.33310389518737793, "learning_rate": 3.0654871714575023e-06, "loss": 0.4047, "step": 2788 }, { "epoch": 1.9911946692051403, "grad_norm": 0.3723585307598114, "learning_rate": 3.0616559255937882e-06, "loss": 0.3969, "step": 2789 }, { "epoch": 1.991908614945264, "grad_norm": 0.34806403517723083, "learning_rate": 3.057826018626527e-06, "loss": 0.4032, "step": 2790 }, { "epoch": 1.9926225606853878, "grad_norm": 0.3327779471874237, "learning_rate": 3.0539974532012007e-06, "loss": 0.3596, "step": 2791 }, { "epoch": 1.9933365064255115, "grad_norm": 0.35434260964393616, "learning_rate": 3.0501702319623617e-06, "loss": 0.4319, "step": 2792 }, { "epoch": 1.9940504521656353, "grad_norm": 0.3428080379962921, "learning_rate": 3.0463443575536324e-06, "loss": 0.4099, "step": 2793 }, { "epoch": 1.9947643979057592, "grad_norm": 0.3464294373989105, "learning_rate": 3.0425198326177064e-06, "loss": 0.4035, "step": 2794 }, { "epoch": 1.995478343645883, "grad_norm": 0.3144712746143341, "learning_rate": 3.0386966597963487e-06, "loss": 0.3883, "step": 2795 }, { "epoch": 1.9961922893860067, "grad_norm": 0.31975188851356506, "learning_rate": 3.0348748417303826e-06, "loss": 0.3889, "step": 2796 }, { "epoch": 1.9969062351261304, "grad_norm": 0.3571605980396271, "learning_rate": 3.0310543810597015e-06, "loss": 0.4142, "step": 2797 }, { "epoch": 1.9976201808662541, "grad_norm": 0.33142516016960144, "learning_rate": 3.027235280423262e-06, "loss": 0.4068, "step": 2798 }, { "epoch": 1.9983341266063779, "grad_norm": 0.3385022282600403, "learning_rate": 3.023417542459076e-06, "loss": 0.4147, "step": 2799 }, { "epoch": 1.9990480723465016, "grad_norm": 0.32061508297920227, "learning_rate": 3.019601169804216e-06, "loss": 0.3677, "step": 2800 }, { "epoch": 1.9997620180866256, "grad_norm": 0.3904738426208496, "learning_rate": 3.0157861650948174e-06, "loss": 0.5067, "step": 2801 }, { "epoch": 2.0004759638267493, "grad_norm": 0.3931441605091095, "learning_rate": 3.0119725309660595e-06, "loss": 0.4322, "step": 2802 }, { "epoch": 2.001189909566873, "grad_norm": 0.3576829135417938, "learning_rate": 3.0081602700521844e-06, "loss": 0.3796, "step": 2803 }, { "epoch": 2.0019038553069968, "grad_norm": 0.3527168929576874, "learning_rate": 3.0043493849864835e-06, "loss": 0.3711, "step": 2804 }, { "epoch": 2.0026178010471205, "grad_norm": 0.32481375336647034, "learning_rate": 3.000539878401296e-06, "loss": 0.3805, "step": 2805 }, { "epoch": 2.0033317467872442, "grad_norm": 0.34065234661102295, "learning_rate": 2.9967317529280076e-06, "loss": 0.3944, "step": 2806 }, { "epoch": 2.004045692527368, "grad_norm": 0.3529455363750458, "learning_rate": 2.9929250111970533e-06, "loss": 0.3401, "step": 2807 }, { "epoch": 2.0047596382674917, "grad_norm": 0.374421089887619, "learning_rate": 2.989119655837913e-06, "loss": 0.4079, "step": 2808 }, { "epoch": 2.0054735840076154, "grad_norm": 0.31809762120246887, "learning_rate": 2.985315689479104e-06, "loss": 0.3073, "step": 2809 }, { "epoch": 2.006187529747739, "grad_norm": 0.3399421274662018, "learning_rate": 2.981513114748189e-06, "loss": 0.4141, "step": 2810 }, { "epoch": 2.006901475487863, "grad_norm": 0.31755927205085754, "learning_rate": 2.9777119342717686e-06, "loss": 0.3627, "step": 2811 }, { "epoch": 2.0076154212279866, "grad_norm": 0.33894312381744385, "learning_rate": 2.973912150675475e-06, "loss": 0.3582, "step": 2812 }, { "epoch": 2.0083293669681104, "grad_norm": 0.33166781067848206, "learning_rate": 2.970113766583983e-06, "loss": 0.3913, "step": 2813 }, { "epoch": 2.009043312708234, "grad_norm": 0.32689499855041504, "learning_rate": 2.966316784621e-06, "loss": 0.3428, "step": 2814 }, { "epoch": 2.009757258448358, "grad_norm": 0.3428614139556885, "learning_rate": 2.962521207409257e-06, "loss": 0.3901, "step": 2815 }, { "epoch": 2.0104712041884816, "grad_norm": 0.32499849796295166, "learning_rate": 2.9587270375705244e-06, "loss": 0.3304, "step": 2816 }, { "epoch": 2.0111851499286053, "grad_norm": 0.3256402313709259, "learning_rate": 2.9549342777255955e-06, "loss": 0.3847, "step": 2817 }, { "epoch": 2.011899095668729, "grad_norm": 0.32548293471336365, "learning_rate": 2.951142930494288e-06, "loss": 0.3917, "step": 2818 }, { "epoch": 2.0126130414088528, "grad_norm": 0.35149604082107544, "learning_rate": 2.9473529984954473e-06, "loss": 0.4061, "step": 2819 }, { "epoch": 2.0133269871489765, "grad_norm": 0.3234248459339142, "learning_rate": 2.9435644843469434e-06, "loss": 0.3654, "step": 2820 }, { "epoch": 2.0140409328891002, "grad_norm": 0.32377517223358154, "learning_rate": 2.9397773906656584e-06, "loss": 0.3692, "step": 2821 }, { "epoch": 2.014754878629224, "grad_norm": 0.32497715950012207, "learning_rate": 2.9359917200675026e-06, "loss": 0.3603, "step": 2822 }, { "epoch": 2.0154688243693477, "grad_norm": 0.34514883160591125, "learning_rate": 2.932207475167398e-06, "loss": 0.3492, "step": 2823 }, { "epoch": 2.016182770109472, "grad_norm": 0.33020538091659546, "learning_rate": 2.9284246585792812e-06, "loss": 0.4227, "step": 2824 }, { "epoch": 2.0168967158495956, "grad_norm": 0.2869771420955658, "learning_rate": 2.9246432729161057e-06, "loss": 0.3373, "step": 2825 }, { "epoch": 2.0176106615897194, "grad_norm": 0.3282347023487091, "learning_rate": 2.9208633207898372e-06, "loss": 0.4039, "step": 2826 }, { "epoch": 2.018324607329843, "grad_norm": 0.31377074122428894, "learning_rate": 2.917084804811445e-06, "loss": 0.3854, "step": 2827 }, { "epoch": 2.019038553069967, "grad_norm": 0.34053948521614075, "learning_rate": 2.9133077275909112e-06, "loss": 0.4001, "step": 2828 }, { "epoch": 2.0197524988100906, "grad_norm": 0.3334048092365265, "learning_rate": 2.9095320917372256e-06, "loss": 0.3659, "step": 2829 }, { "epoch": 2.0204664445502143, "grad_norm": 0.30989015102386475, "learning_rate": 2.905757899858377e-06, "loss": 0.3609, "step": 2830 }, { "epoch": 2.021180390290338, "grad_norm": 0.31467336416244507, "learning_rate": 2.90198515456136e-06, "loss": 0.3403, "step": 2831 }, { "epoch": 2.0218943360304618, "grad_norm": 0.32294028997421265, "learning_rate": 2.8982138584521734e-06, "loss": 0.3517, "step": 2832 }, { "epoch": 2.0226082817705855, "grad_norm": 0.3486914038658142, "learning_rate": 2.8944440141358077e-06, "loss": 0.3973, "step": 2833 }, { "epoch": 2.0233222275107092, "grad_norm": 0.32210004329681396, "learning_rate": 2.890675624216255e-06, "loss": 0.3605, "step": 2834 }, { "epoch": 2.024036173250833, "grad_norm": 0.31832388043403625, "learning_rate": 2.886908691296504e-06, "loss": 0.3734, "step": 2835 }, { "epoch": 2.0247501189909567, "grad_norm": 0.3215055465698242, "learning_rate": 2.8831432179785314e-06, "loss": 0.416, "step": 2836 }, { "epoch": 2.0254640647310804, "grad_norm": 0.3239387571811676, "learning_rate": 2.879379206863313e-06, "loss": 0.3462, "step": 2837 }, { "epoch": 2.026178010471204, "grad_norm": 0.32534006237983704, "learning_rate": 2.8756166605508085e-06, "loss": 0.4208, "step": 2838 }, { "epoch": 2.026891956211328, "grad_norm": 0.3187818229198456, "learning_rate": 2.8718555816399703e-06, "loss": 0.3518, "step": 2839 }, { "epoch": 2.0276059019514516, "grad_norm": 0.3610284626483917, "learning_rate": 2.8680959727287316e-06, "loss": 0.3862, "step": 2840 }, { "epoch": 2.0283198476915754, "grad_norm": 0.30586907267570496, "learning_rate": 2.8643378364140186e-06, "loss": 0.3776, "step": 2841 }, { "epoch": 2.029033793431699, "grad_norm": 0.3440910875797272, "learning_rate": 2.86058117529173e-06, "loss": 0.376, "step": 2842 }, { "epoch": 2.029747739171823, "grad_norm": 0.3523302376270294, "learning_rate": 2.856825991956753e-06, "loss": 0.387, "step": 2843 }, { "epoch": 2.0304616849119466, "grad_norm": 0.32681137323379517, "learning_rate": 2.853072289002954e-06, "loss": 0.3537, "step": 2844 }, { "epoch": 2.0311756306520703, "grad_norm": 0.34205162525177, "learning_rate": 2.8493200690231746e-06, "loss": 0.3913, "step": 2845 }, { "epoch": 2.031889576392194, "grad_norm": 0.3374054431915283, "learning_rate": 2.84556933460923e-06, "loss": 0.3851, "step": 2846 }, { "epoch": 2.0326035221323178, "grad_norm": 0.31228959560394287, "learning_rate": 2.841820088351912e-06, "loss": 0.3238, "step": 2847 }, { "epoch": 2.0333174678724415, "grad_norm": 0.33500954508781433, "learning_rate": 2.8380723328409872e-06, "loss": 0.3983, "step": 2848 }, { "epoch": 2.0340314136125652, "grad_norm": 0.3212590515613556, "learning_rate": 2.8343260706651864e-06, "loss": 0.3571, "step": 2849 }, { "epoch": 2.0347453593526894, "grad_norm": 0.3359088599681854, "learning_rate": 2.83058130441221e-06, "loss": 0.3775, "step": 2850 }, { "epoch": 2.035459305092813, "grad_norm": 0.33782464265823364, "learning_rate": 2.826838036668731e-06, "loss": 0.3958, "step": 2851 }, { "epoch": 2.036173250832937, "grad_norm": 0.2921976149082184, "learning_rate": 2.823096270020379e-06, "loss": 0.3705, "step": 2852 }, { "epoch": 2.0368871965730606, "grad_norm": 0.322043240070343, "learning_rate": 2.8193560070517535e-06, "loss": 0.3523, "step": 2853 }, { "epoch": 2.0376011423131843, "grad_norm": 0.3190116882324219, "learning_rate": 2.815617250346414e-06, "loss": 0.3883, "step": 2854 }, { "epoch": 2.038315088053308, "grad_norm": 0.31682243943214417, "learning_rate": 2.8118800024868743e-06, "loss": 0.3581, "step": 2855 }, { "epoch": 2.039029033793432, "grad_norm": 0.3415047824382782, "learning_rate": 2.8081442660546126e-06, "loss": 0.3976, "step": 2856 }, { "epoch": 2.0397429795335555, "grad_norm": 0.3135128319263458, "learning_rate": 2.8044100436300624e-06, "loss": 0.3613, "step": 2857 }, { "epoch": 2.0404569252736793, "grad_norm": 0.32922354340553284, "learning_rate": 2.8006773377926043e-06, "loss": 0.3615, "step": 2858 }, { "epoch": 2.041170871013803, "grad_norm": 0.3411819040775299, "learning_rate": 2.7969461511205807e-06, "loss": 0.3597, "step": 2859 }, { "epoch": 2.0418848167539267, "grad_norm": 0.31412526965141296, "learning_rate": 2.7932164861912805e-06, "loss": 0.3268, "step": 2860 }, { "epoch": 2.0425987624940505, "grad_norm": 0.3252978026866913, "learning_rate": 2.7894883455809385e-06, "loss": 0.3494, "step": 2861 }, { "epoch": 2.043312708234174, "grad_norm": 0.2894049882888794, "learning_rate": 2.7857617318647434e-06, "loss": 0.3129, "step": 2862 }, { "epoch": 2.044026653974298, "grad_norm": 0.3302081823348999, "learning_rate": 2.7820366476168224e-06, "loss": 0.3761, "step": 2863 }, { "epoch": 2.0447405997144217, "grad_norm": 0.36394643783569336, "learning_rate": 2.7783130954102484e-06, "loss": 0.3899, "step": 2864 }, { "epoch": 2.0454545454545454, "grad_norm": 0.33841872215270996, "learning_rate": 2.774591077817038e-06, "loss": 0.3529, "step": 2865 }, { "epoch": 2.046168491194669, "grad_norm": 0.34495580196380615, "learning_rate": 2.7708705974081496e-06, "loss": 0.3662, "step": 2866 }, { "epoch": 2.046882436934793, "grad_norm": 0.3340053856372833, "learning_rate": 2.7671516567534717e-06, "loss": 0.3511, "step": 2867 }, { "epoch": 2.0475963826749166, "grad_norm": 0.3594892919063568, "learning_rate": 2.7634342584218364e-06, "loss": 0.4038, "step": 2868 }, { "epoch": 2.0483103284150403, "grad_norm": 0.30271533131599426, "learning_rate": 2.759718404981012e-06, "loss": 0.3226, "step": 2869 }, { "epoch": 2.049024274155164, "grad_norm": 0.36532679200172424, "learning_rate": 2.7560040989976894e-06, "loss": 0.4321, "step": 2870 }, { "epoch": 2.049738219895288, "grad_norm": 0.3087964355945587, "learning_rate": 2.752291343037501e-06, "loss": 0.3009, "step": 2871 }, { "epoch": 2.0504521656354115, "grad_norm": 0.3254118859767914, "learning_rate": 2.7485801396650067e-06, "loss": 0.3577, "step": 2872 }, { "epoch": 2.0511661113755353, "grad_norm": 0.34323474764823914, "learning_rate": 2.7448704914436873e-06, "loss": 0.3595, "step": 2873 }, { "epoch": 2.051880057115659, "grad_norm": 0.3922349214553833, "learning_rate": 2.7411624009359592e-06, "loss": 0.4048, "step": 2874 }, { "epoch": 2.0525940028557828, "grad_norm": 0.32388171553611755, "learning_rate": 2.737455870703155e-06, "loss": 0.352, "step": 2875 }, { "epoch": 2.053307948595907, "grad_norm": 0.30658167600631714, "learning_rate": 2.733750903305531e-06, "loss": 0.3532, "step": 2876 }, { "epoch": 2.0540218943360307, "grad_norm": 0.31151166558265686, "learning_rate": 2.7300475013022666e-06, "loss": 0.3498, "step": 2877 }, { "epoch": 2.0547358400761544, "grad_norm": 0.3791300356388092, "learning_rate": 2.726345667251461e-06, "loss": 0.3941, "step": 2878 }, { "epoch": 2.055449785816278, "grad_norm": 0.3261881470680237, "learning_rate": 2.7226454037101237e-06, "loss": 0.3647, "step": 2879 }, { "epoch": 2.056163731556402, "grad_norm": 0.32827094197273254, "learning_rate": 2.718946713234185e-06, "loss": 0.3139, "step": 2880 }, { "epoch": 2.0568776772965256, "grad_norm": 0.3149397075176239, "learning_rate": 2.7152495983784886e-06, "loss": 0.3331, "step": 2881 }, { "epoch": 2.0575916230366493, "grad_norm": 0.3625166118144989, "learning_rate": 2.7115540616967906e-06, "loss": 0.4047, "step": 2882 }, { "epoch": 2.058305568776773, "grad_norm": 0.3173932731151581, "learning_rate": 2.7078601057417497e-06, "loss": 0.3465, "step": 2883 }, { "epoch": 2.059019514516897, "grad_norm": 0.32341521978378296, "learning_rate": 2.7041677330649408e-06, "loss": 0.3852, "step": 2884 }, { "epoch": 2.0597334602570205, "grad_norm": 0.3321298658847809, "learning_rate": 2.7004769462168432e-06, "loss": 0.3367, "step": 2885 }, { "epoch": 2.0604474059971443, "grad_norm": 0.3493439555168152, "learning_rate": 2.6967877477468394e-06, "loss": 0.3718, "step": 2886 }, { "epoch": 2.061161351737268, "grad_norm": 0.338044673204422, "learning_rate": 2.693100140203213e-06, "loss": 0.3919, "step": 2887 }, { "epoch": 2.0618752974773917, "grad_norm": 0.316736102104187, "learning_rate": 2.6894141261331542e-06, "loss": 0.3603, "step": 2888 }, { "epoch": 2.0625892432175155, "grad_norm": 0.32039907574653625, "learning_rate": 2.685729708082745e-06, "loss": 0.3674, "step": 2889 }, { "epoch": 2.063303188957639, "grad_norm": 0.33011144399642944, "learning_rate": 2.682046888596972e-06, "loss": 0.3503, "step": 2890 }, { "epoch": 2.064017134697763, "grad_norm": 0.30614227056503296, "learning_rate": 2.678365670219716e-06, "loss": 0.3322, "step": 2891 }, { "epoch": 2.0647310804378867, "grad_norm": 0.32778844237327576, "learning_rate": 2.674686055493748e-06, "loss": 0.3646, "step": 2892 }, { "epoch": 2.0654450261780104, "grad_norm": 0.3296724259853363, "learning_rate": 2.6710080469607345e-06, "loss": 0.3754, "step": 2893 }, { "epoch": 2.066158971918134, "grad_norm": 0.30955633521080017, "learning_rate": 2.667331647161235e-06, "loss": 0.3633, "step": 2894 }, { "epoch": 2.066872917658258, "grad_norm": 0.3208116292953491, "learning_rate": 2.66365685863469e-06, "loss": 0.3627, "step": 2895 }, { "epoch": 2.0675868633983816, "grad_norm": 0.3449697196483612, "learning_rate": 2.659983683919434e-06, "loss": 0.3827, "step": 2896 }, { "epoch": 2.0683008091385053, "grad_norm": 0.34081757068634033, "learning_rate": 2.656312125552687e-06, "loss": 0.3716, "step": 2897 }, { "epoch": 2.069014754878629, "grad_norm": 0.36976614594459534, "learning_rate": 2.6526421860705474e-06, "loss": 0.4218, "step": 2898 }, { "epoch": 2.069728700618753, "grad_norm": 0.3277587890625, "learning_rate": 2.648973868007997e-06, "loss": 0.3424, "step": 2899 }, { "epoch": 2.0704426463588765, "grad_norm": 0.3127264678478241, "learning_rate": 2.645307173898901e-06, "loss": 0.3473, "step": 2900 }, { "epoch": 2.0711565920990003, "grad_norm": 0.3181946575641632, "learning_rate": 2.6416421062759984e-06, "loss": 0.3911, "step": 2901 }, { "epoch": 2.071870537839124, "grad_norm": 0.33409684896469116, "learning_rate": 2.6379786676709075e-06, "loss": 0.3922, "step": 2902 }, { "epoch": 2.072584483579248, "grad_norm": 0.3017069697380066, "learning_rate": 2.6343168606141235e-06, "loss": 0.3427, "step": 2903 }, { "epoch": 2.073298429319372, "grad_norm": 0.31502681970596313, "learning_rate": 2.6306566876350072e-06, "loss": 0.4033, "step": 2904 }, { "epoch": 2.0740123750594956, "grad_norm": 0.32331734895706177, "learning_rate": 2.626998151261798e-06, "loss": 0.3712, "step": 2905 }, { "epoch": 2.0747263207996194, "grad_norm": 0.32161715626716614, "learning_rate": 2.623341254021603e-06, "loss": 0.3717, "step": 2906 }, { "epoch": 2.075440266539743, "grad_norm": 0.3139287233352661, "learning_rate": 2.619685998440393e-06, "loss": 0.3885, "step": 2907 }, { "epoch": 2.076154212279867, "grad_norm": 0.332526296377182, "learning_rate": 2.616032387043011e-06, "loss": 0.3633, "step": 2908 }, { "epoch": 2.0768681580199906, "grad_norm": 0.332086443901062, "learning_rate": 2.612380422353162e-06, "loss": 0.3883, "step": 2909 }, { "epoch": 2.0775821037601143, "grad_norm": 0.3098534643650055, "learning_rate": 2.608730106893411e-06, "loss": 0.3215, "step": 2910 }, { "epoch": 2.078296049500238, "grad_norm": 0.3275844156742096, "learning_rate": 2.6050814431851834e-06, "loss": 0.3807, "step": 2911 }, { "epoch": 2.079009995240362, "grad_norm": 0.3280472755432129, "learning_rate": 2.601434433748771e-06, "loss": 0.3895, "step": 2912 }, { "epoch": 2.0797239409804855, "grad_norm": 0.35528305172920227, "learning_rate": 2.5977890811033135e-06, "loss": 0.376, "step": 2913 }, { "epoch": 2.0804378867206093, "grad_norm": 0.32367658615112305, "learning_rate": 2.594145387766812e-06, "loss": 0.3541, "step": 2914 }, { "epoch": 2.081151832460733, "grad_norm": 0.31749770045280457, "learning_rate": 2.5905033562561223e-06, "loss": 0.3883, "step": 2915 }, { "epoch": 2.0818657782008567, "grad_norm": 0.31539180874824524, "learning_rate": 2.5868629890869467e-06, "loss": 0.3786, "step": 2916 }, { "epoch": 2.0825797239409805, "grad_norm": 0.3298123776912689, "learning_rate": 2.5832242887738422e-06, "loss": 0.3823, "step": 2917 }, { "epoch": 2.083293669681104, "grad_norm": 0.35312312841415405, "learning_rate": 2.579587257830216e-06, "loss": 0.3823, "step": 2918 }, { "epoch": 2.084007615421228, "grad_norm": 0.30093055963516235, "learning_rate": 2.5759518987683154e-06, "loss": 0.325, "step": 2919 }, { "epoch": 2.0847215611613517, "grad_norm": 0.3052996098995209, "learning_rate": 2.5723182140992385e-06, "loss": 0.3669, "step": 2920 }, { "epoch": 2.0854355069014754, "grad_norm": 0.3126366436481476, "learning_rate": 2.5686862063329286e-06, "loss": 0.3967, "step": 2921 }, { "epoch": 2.086149452641599, "grad_norm": 0.2951839864253998, "learning_rate": 2.5650558779781635e-06, "loss": 0.3463, "step": 2922 }, { "epoch": 2.086863398381723, "grad_norm": 0.3427375555038452, "learning_rate": 2.561427231542568e-06, "loss": 0.3917, "step": 2923 }, { "epoch": 2.0875773441218466, "grad_norm": 0.3199326992034912, "learning_rate": 2.5578002695325986e-06, "loss": 0.3947, "step": 2924 }, { "epoch": 2.0882912898619703, "grad_norm": 0.3011264204978943, "learning_rate": 2.5541749944535554e-06, "loss": 0.3625, "step": 2925 }, { "epoch": 2.089005235602094, "grad_norm": 0.33290159702301025, "learning_rate": 2.550551408809566e-06, "loss": 0.3676, "step": 2926 }, { "epoch": 2.089719181342218, "grad_norm": 0.3149368464946747, "learning_rate": 2.546929515103596e-06, "loss": 0.3675, "step": 2927 }, { "epoch": 2.090433127082342, "grad_norm": 0.30464065074920654, "learning_rate": 2.543309315837444e-06, "loss": 0.3823, "step": 2928 }, { "epoch": 2.0911470728224657, "grad_norm": 0.29481202363967896, "learning_rate": 2.5396908135117303e-06, "loss": 0.3775, "step": 2929 }, { "epoch": 2.0918610185625894, "grad_norm": 0.312491238117218, "learning_rate": 2.536074010625911e-06, "loss": 0.3492, "step": 2930 }, { "epoch": 2.092574964302713, "grad_norm": 0.3197564482688904, "learning_rate": 2.532458909678266e-06, "loss": 0.3929, "step": 2931 }, { "epoch": 2.093288910042837, "grad_norm": 0.3151457905769348, "learning_rate": 2.528845513165896e-06, "loss": 0.3706, "step": 2932 }, { "epoch": 2.0940028557829606, "grad_norm": 0.30442100763320923, "learning_rate": 2.52523382358473e-06, "loss": 0.3184, "step": 2933 }, { "epoch": 2.0947168015230844, "grad_norm": 0.3386186361312866, "learning_rate": 2.521623843429512e-06, "loss": 0.3567, "step": 2934 }, { "epoch": 2.095430747263208, "grad_norm": 0.2996649146080017, "learning_rate": 2.518015575193812e-06, "loss": 0.3455, "step": 2935 }, { "epoch": 2.096144693003332, "grad_norm": 0.3128938674926758, "learning_rate": 2.5144090213700103e-06, "loss": 0.4126, "step": 2936 }, { "epoch": 2.0968586387434556, "grad_norm": 0.2985534071922302, "learning_rate": 2.5108041844493104e-06, "loss": 0.3293, "step": 2937 }, { "epoch": 2.0975725844835793, "grad_norm": 0.33922985196113586, "learning_rate": 2.5072010669217215e-06, "loss": 0.4119, "step": 2938 }, { "epoch": 2.098286530223703, "grad_norm": 0.3101251721382141, "learning_rate": 2.5035996712760724e-06, "loss": 0.3933, "step": 2939 }, { "epoch": 2.0990004759638268, "grad_norm": 0.3222533166408539, "learning_rate": 2.5000000000000015e-06, "loss": 0.3565, "step": 2940 }, { "epoch": 2.0997144217039505, "grad_norm": 0.3285093307495117, "learning_rate": 2.49640205557995e-06, "loss": 0.3861, "step": 2941 }, { "epoch": 2.1004283674440742, "grad_norm": 0.3525807559490204, "learning_rate": 2.4928058405011734e-06, "loss": 0.3357, "step": 2942 }, { "epoch": 2.101142313184198, "grad_norm": 0.3126453161239624, "learning_rate": 2.4892113572477324e-06, "loss": 0.3698, "step": 2943 }, { "epoch": 2.1018562589243217, "grad_norm": 0.3374863564968109, "learning_rate": 2.4856186083024836e-06, "loss": 0.3972, "step": 2944 }, { "epoch": 2.1025702046644454, "grad_norm": 0.3245698809623718, "learning_rate": 2.4820275961470935e-06, "loss": 0.3733, "step": 2945 }, { "epoch": 2.103284150404569, "grad_norm": 0.3359304368495941, "learning_rate": 2.4784383232620297e-06, "loss": 0.3717, "step": 2946 }, { "epoch": 2.103998096144693, "grad_norm": 0.3152310252189636, "learning_rate": 2.4748507921265514e-06, "loss": 0.4004, "step": 2947 }, { "epoch": 2.1047120418848166, "grad_norm": 0.3081190884113312, "learning_rate": 2.4712650052187174e-06, "loss": 0.3386, "step": 2948 }, { "epoch": 2.1054259876249404, "grad_norm": 0.32439371943473816, "learning_rate": 2.467680965015387e-06, "loss": 0.3507, "step": 2949 }, { "epoch": 2.106139933365064, "grad_norm": 0.3029186427593231, "learning_rate": 2.464098673992205e-06, "loss": 0.3694, "step": 2950 }, { "epoch": 2.106853879105188, "grad_norm": 0.2983914017677307, "learning_rate": 2.4605181346236127e-06, "loss": 0.3592, "step": 2951 }, { "epoch": 2.1075678248453116, "grad_norm": 0.3135688900947571, "learning_rate": 2.4569393493828433e-06, "loss": 0.3775, "step": 2952 }, { "epoch": 2.1082817705854353, "grad_norm": 0.31310468912124634, "learning_rate": 2.453362320741911e-06, "loss": 0.3517, "step": 2953 }, { "epoch": 2.108995716325559, "grad_norm": 0.3362148106098175, "learning_rate": 2.4497870511716237e-06, "loss": 0.3752, "step": 2954 }, { "epoch": 2.1097096620656832, "grad_norm": 0.31584399938583374, "learning_rate": 2.4462135431415736e-06, "loss": 0.4204, "step": 2955 }, { "epoch": 2.110423607805807, "grad_norm": 0.30653998255729675, "learning_rate": 2.4426417991201294e-06, "loss": 0.3633, "step": 2956 }, { "epoch": 2.1111375535459307, "grad_norm": 0.32680588960647583, "learning_rate": 2.4390718215744495e-06, "loss": 0.3686, "step": 2957 }, { "epoch": 2.1118514992860544, "grad_norm": 0.3296738564968109, "learning_rate": 2.43550361297047e-06, "loss": 0.3616, "step": 2958 }, { "epoch": 2.112565445026178, "grad_norm": 0.33208590745925903, "learning_rate": 2.4319371757729017e-06, "loss": 0.3851, "step": 2959 }, { "epoch": 2.113279390766302, "grad_norm": 0.3219974637031555, "learning_rate": 2.428372512445233e-06, "loss": 0.4017, "step": 2960 }, { "epoch": 2.1139933365064256, "grad_norm": 0.3091762065887451, "learning_rate": 2.424809625449729e-06, "loss": 0.3394, "step": 2961 }, { "epoch": 2.1147072822465494, "grad_norm": 0.31664803624153137, "learning_rate": 2.4212485172474293e-06, "loss": 0.3686, "step": 2962 }, { "epoch": 2.115421227986673, "grad_norm": 0.3165169954299927, "learning_rate": 2.4176891902981388e-06, "loss": 0.3603, "step": 2963 }, { "epoch": 2.116135173726797, "grad_norm": 0.30552223324775696, "learning_rate": 2.4141316470604362e-06, "loss": 0.3472, "step": 2964 }, { "epoch": 2.1168491194669206, "grad_norm": 0.3311840891838074, "learning_rate": 2.4105758899916714e-06, "loss": 0.4036, "step": 2965 }, { "epoch": 2.1175630652070443, "grad_norm": 0.3210717737674713, "learning_rate": 2.4070219215479513e-06, "loss": 0.3797, "step": 2966 }, { "epoch": 2.118277010947168, "grad_norm": 0.3172813951969147, "learning_rate": 2.403469744184154e-06, "loss": 0.3507, "step": 2967 }, { "epoch": 2.1189909566872918, "grad_norm": 0.31716054677963257, "learning_rate": 2.3999193603539234e-06, "loss": 0.3649, "step": 2968 }, { "epoch": 2.1197049024274155, "grad_norm": 0.33042779564857483, "learning_rate": 2.3963707725096537e-06, "loss": 0.3433, "step": 2969 }, { "epoch": 2.1204188481675392, "grad_norm": 0.3609652519226074, "learning_rate": 2.39282398310251e-06, "loss": 0.4026, "step": 2970 }, { "epoch": 2.121132793907663, "grad_norm": 0.31170910596847534, "learning_rate": 2.3892789945824092e-06, "loss": 0.3792, "step": 2971 }, { "epoch": 2.1218467396477867, "grad_norm": 0.29822126030921936, "learning_rate": 2.3857358093980217e-06, "loss": 0.3638, "step": 2972 }, { "epoch": 2.1225606853879104, "grad_norm": 0.2937508523464203, "learning_rate": 2.382194429996778e-06, "loss": 0.3474, "step": 2973 }, { "epoch": 2.123274631128034, "grad_norm": 0.33520248532295227, "learning_rate": 2.3786548588248616e-06, "loss": 0.3644, "step": 2974 }, { "epoch": 2.123988576868158, "grad_norm": 0.35710224509239197, "learning_rate": 2.3751170983272e-06, "loss": 0.3986, "step": 2975 }, { "epoch": 2.1247025226082816, "grad_norm": 0.30845192074775696, "learning_rate": 2.371581150947476e-06, "loss": 0.364, "step": 2976 }, { "epoch": 2.1254164683484054, "grad_norm": 0.3203703761100769, "learning_rate": 2.368047019128122e-06, "loss": 0.3799, "step": 2977 }, { "epoch": 2.126130414088529, "grad_norm": 0.3418649435043335, "learning_rate": 2.3645147053103074e-06, "loss": 0.3396, "step": 2978 }, { "epoch": 2.126844359828653, "grad_norm": 0.3112621009349823, "learning_rate": 2.3609842119339533e-06, "loss": 0.3437, "step": 2979 }, { "epoch": 2.127558305568777, "grad_norm": 0.3102470636367798, "learning_rate": 2.357455541437723e-06, "loss": 0.3765, "step": 2980 }, { "epoch": 2.1282722513089007, "grad_norm": 0.34279024600982666, "learning_rate": 2.353928696259016e-06, "loss": 0.3696, "step": 2981 }, { "epoch": 2.1289861970490245, "grad_norm": 0.326668381690979, "learning_rate": 2.3504036788339763e-06, "loss": 0.3817, "step": 2982 }, { "epoch": 2.129700142789148, "grad_norm": 0.3056515157222748, "learning_rate": 2.3468804915974797e-06, "loss": 0.3731, "step": 2983 }, { "epoch": 2.130414088529272, "grad_norm": 0.3362802565097809, "learning_rate": 2.3433591369831455e-06, "loss": 0.361, "step": 2984 }, { "epoch": 2.1311280342693957, "grad_norm": 0.3101626932621002, "learning_rate": 2.339839617423318e-06, "loss": 0.3482, "step": 2985 }, { "epoch": 2.1318419800095194, "grad_norm": 0.3425101637840271, "learning_rate": 2.3363219353490822e-06, "loss": 0.3975, "step": 2986 }, { "epoch": 2.132555925749643, "grad_norm": 0.29521870613098145, "learning_rate": 2.3328060931902473e-06, "loss": 0.3362, "step": 2987 }, { "epoch": 2.133269871489767, "grad_norm": 0.3089696168899536, "learning_rate": 2.3292920933753566e-06, "loss": 0.3685, "step": 2988 }, { "epoch": 2.1339838172298906, "grad_norm": 0.3213994801044464, "learning_rate": 2.32577993833168e-06, "loss": 0.351, "step": 2989 }, { "epoch": 2.1346977629700143, "grad_norm": 0.3801293671131134, "learning_rate": 2.3222696304852084e-06, "loss": 0.4131, "step": 2990 }, { "epoch": 2.135411708710138, "grad_norm": 0.33047300577163696, "learning_rate": 2.3187611722606616e-06, "loss": 0.3708, "step": 2991 }, { "epoch": 2.136125654450262, "grad_norm": 0.3248124122619629, "learning_rate": 2.3152545660814835e-06, "loss": 0.3259, "step": 2992 }, { "epoch": 2.1368396001903855, "grad_norm": 0.36247482895851135, "learning_rate": 2.3117498143698314e-06, "loss": 0.377, "step": 2993 }, { "epoch": 2.1375535459305093, "grad_norm": 0.32065916061401367, "learning_rate": 2.3082469195465893e-06, "loss": 0.3431, "step": 2994 }, { "epoch": 2.138267491670633, "grad_norm": 0.35036545991897583, "learning_rate": 2.3047458840313526e-06, "loss": 0.3841, "step": 2995 }, { "epoch": 2.1389814374107567, "grad_norm": 0.3257358968257904, "learning_rate": 2.3012467102424373e-06, "loss": 0.3761, "step": 2996 }, { "epoch": 2.1396953831508805, "grad_norm": 0.3185453414916992, "learning_rate": 2.297749400596868e-06, "loss": 0.3257, "step": 2997 }, { "epoch": 2.140409328891004, "grad_norm": 0.3565068542957306, "learning_rate": 2.294253957510389e-06, "loss": 0.3806, "step": 2998 }, { "epoch": 2.141123274631128, "grad_norm": 0.3489561378955841, "learning_rate": 2.2907603833974466e-06, "loss": 0.3643, "step": 2999 }, { "epoch": 2.1418372203712517, "grad_norm": 0.3509209454059601, "learning_rate": 2.2872686806712037e-06, "loss": 0.3726, "step": 3000 }, { "epoch": 2.1425511661113754, "grad_norm": 0.3153849244117737, "learning_rate": 2.2837788517435256e-06, "loss": 0.3558, "step": 3001 }, { "epoch": 2.143265111851499, "grad_norm": 0.3244629204273224, "learning_rate": 2.2802908990249884e-06, "loss": 0.3757, "step": 3002 }, { "epoch": 2.143979057591623, "grad_norm": 0.34377601742744446, "learning_rate": 2.2768048249248648e-06, "loss": 0.3731, "step": 3003 }, { "epoch": 2.1446930033317466, "grad_norm": 0.3283791244029999, "learning_rate": 2.2733206318511354e-06, "loss": 0.3735, "step": 3004 }, { "epoch": 2.1454069490718704, "grad_norm": 0.32368412613868713, "learning_rate": 2.269838322210483e-06, "loss": 0.3506, "step": 3005 }, { "epoch": 2.146120894811994, "grad_norm": 0.32579505443573, "learning_rate": 2.2663578984082826e-06, "loss": 0.3696, "step": 3006 }, { "epoch": 2.146834840552118, "grad_norm": 0.30534011125564575, "learning_rate": 2.262879362848613e-06, "loss": 0.3573, "step": 3007 }, { "epoch": 2.147548786292242, "grad_norm": 0.28667911887168884, "learning_rate": 2.259402717934246e-06, "loss": 0.3101, "step": 3008 }, { "epoch": 2.1482627320323657, "grad_norm": 0.3237725496292114, "learning_rate": 2.2559279660666444e-06, "loss": 0.3503, "step": 3009 }, { "epoch": 2.1489766777724895, "grad_norm": 0.3522917628288269, "learning_rate": 2.2524551096459703e-06, "loss": 0.3882, "step": 3010 }, { "epoch": 2.149690623512613, "grad_norm": 0.3335150182247162, "learning_rate": 2.248984151071073e-06, "loss": 0.3554, "step": 3011 }, { "epoch": 2.150404569252737, "grad_norm": 0.32317355275154114, "learning_rate": 2.245515092739488e-06, "loss": 0.3674, "step": 3012 }, { "epoch": 2.1511185149928607, "grad_norm": 0.2993209958076477, "learning_rate": 2.2420479370474423e-06, "loss": 0.3488, "step": 3013 }, { "epoch": 2.1518324607329844, "grad_norm": 0.33227065205574036, "learning_rate": 2.23858268638985e-06, "loss": 0.3527, "step": 3014 }, { "epoch": 2.152546406473108, "grad_norm": 0.32486099004745483, "learning_rate": 2.235119343160303e-06, "loss": 0.3684, "step": 3015 }, { "epoch": 2.153260352213232, "grad_norm": 0.3444325029850006, "learning_rate": 2.23165790975108e-06, "loss": 0.4213, "step": 3016 }, { "epoch": 2.1539742979533556, "grad_norm": 0.30067622661590576, "learning_rate": 2.2281983885531428e-06, "loss": 0.338, "step": 3017 }, { "epoch": 2.1546882436934793, "grad_norm": 0.30825135111808777, "learning_rate": 2.224740781956126e-06, "loss": 0.3732, "step": 3018 }, { "epoch": 2.155402189433603, "grad_norm": 0.32050010561943054, "learning_rate": 2.221285092348347e-06, "loss": 0.3863, "step": 3019 }, { "epoch": 2.156116135173727, "grad_norm": 0.2852170765399933, "learning_rate": 2.217831322116797e-06, "loss": 0.3682, "step": 3020 }, { "epoch": 2.1568300809138505, "grad_norm": 0.29628071188926697, "learning_rate": 2.214379473647139e-06, "loss": 0.37, "step": 3021 }, { "epoch": 2.1575440266539743, "grad_norm": 0.3545869290828705, "learning_rate": 2.2109295493237138e-06, "loss": 0.4151, "step": 3022 }, { "epoch": 2.158257972394098, "grad_norm": 0.3333930969238281, "learning_rate": 2.2074815515295313e-06, "loss": 0.3722, "step": 3023 }, { "epoch": 2.1589719181342217, "grad_norm": 0.31420156359672546, "learning_rate": 2.204035482646267e-06, "loss": 0.367, "step": 3024 }, { "epoch": 2.1596858638743455, "grad_norm": 0.2862323820590973, "learning_rate": 2.2005913450542673e-06, "loss": 0.3683, "step": 3025 }, { "epoch": 2.160399809614469, "grad_norm": 0.33538225293159485, "learning_rate": 2.1971491411325475e-06, "loss": 0.4204, "step": 3026 }, { "epoch": 2.161113755354593, "grad_norm": 0.3571988046169281, "learning_rate": 2.1937088732587785e-06, "loss": 0.3838, "step": 3027 }, { "epoch": 2.1618277010947167, "grad_norm": 0.3179175853729248, "learning_rate": 2.190270543809303e-06, "loss": 0.3331, "step": 3028 }, { "epoch": 2.1625416468348404, "grad_norm": 0.3122272491455078, "learning_rate": 2.1868341551591214e-06, "loss": 0.3561, "step": 3029 }, { "epoch": 2.163255592574964, "grad_norm": 0.30867642164230347, "learning_rate": 2.1833997096818897e-06, "loss": 0.3897, "step": 3030 }, { "epoch": 2.163969538315088, "grad_norm": 0.3206799328327179, "learning_rate": 2.1799672097499293e-06, "loss": 0.403, "step": 3031 }, { "epoch": 2.1646834840552116, "grad_norm": 0.3090355396270752, "learning_rate": 2.1765366577342083e-06, "loss": 0.3287, "step": 3032 }, { "epoch": 2.165397429795336, "grad_norm": 0.337970495223999, "learning_rate": 2.17310805600436e-06, "loss": 0.4067, "step": 3033 }, { "epoch": 2.1661113755354595, "grad_norm": 0.3263612389564514, "learning_rate": 2.1696814069286605e-06, "loss": 0.3523, "step": 3034 }, { "epoch": 2.1668253212755832, "grad_norm": 0.3213244676589966, "learning_rate": 2.1662567128740453e-06, "loss": 0.3577, "step": 3035 }, { "epoch": 2.167539267015707, "grad_norm": 0.3519528806209564, "learning_rate": 2.162833976206092e-06, "loss": 0.4047, "step": 3036 }, { "epoch": 2.1682532127558307, "grad_norm": 0.3240700662136078, "learning_rate": 2.1594131992890315e-06, "loss": 0.3749, "step": 3037 }, { "epoch": 2.1689671584959545, "grad_norm": 0.32179099321365356, "learning_rate": 2.155994384485742e-06, "loss": 0.3589, "step": 3038 }, { "epoch": 2.169681104236078, "grad_norm": 0.3170776963233948, "learning_rate": 2.1525775341577404e-06, "loss": 0.3553, "step": 3039 }, { "epoch": 2.170395049976202, "grad_norm": 0.31676310300827026, "learning_rate": 2.1491626506651914e-06, "loss": 0.3731, "step": 3040 }, { "epoch": 2.1711089957163257, "grad_norm": 0.29801782965660095, "learning_rate": 2.1457497363669e-06, "loss": 0.3512, "step": 3041 }, { "epoch": 2.1718229414564494, "grad_norm": 0.32548806071281433, "learning_rate": 2.1423387936203125e-06, "loss": 0.3822, "step": 3042 }, { "epoch": 2.172536887196573, "grad_norm": 0.31850847601890564, "learning_rate": 2.1389298247815104e-06, "loss": 0.3599, "step": 3043 }, { "epoch": 2.173250832936697, "grad_norm": 0.2923532724380493, "learning_rate": 2.13552283220521e-06, "loss": 0.3599, "step": 3044 }, { "epoch": 2.1739647786768206, "grad_norm": 0.33477306365966797, "learning_rate": 2.132117818244771e-06, "loss": 0.3803, "step": 3045 }, { "epoch": 2.1746787244169443, "grad_norm": 0.32199007272720337, "learning_rate": 2.1287147852521763e-06, "loss": 0.3724, "step": 3046 }, { "epoch": 2.175392670157068, "grad_norm": 0.3474103510379791, "learning_rate": 2.125313735578047e-06, "loss": 0.4329, "step": 3047 }, { "epoch": 2.176106615897192, "grad_norm": 0.30923348665237427, "learning_rate": 2.1219146715716332e-06, "loss": 0.3441, "step": 3048 }, { "epoch": 2.1768205616373155, "grad_norm": 0.3164384961128235, "learning_rate": 2.1185175955808106e-06, "loss": 0.3836, "step": 3049 }, { "epoch": 2.1775345073774393, "grad_norm": 0.305702805519104, "learning_rate": 2.115122509952085e-06, "loss": 0.38, "step": 3050 }, { "epoch": 2.178248453117563, "grad_norm": 0.3233560025691986, "learning_rate": 2.1117294170305876e-06, "loss": 0.3839, "step": 3051 }, { "epoch": 2.1789623988576867, "grad_norm": 0.33852988481521606, "learning_rate": 2.1083383191600676e-06, "loss": 0.3631, "step": 3052 }, { "epoch": 2.1796763445978105, "grad_norm": 0.2952287495136261, "learning_rate": 2.1049492186829025e-06, "loss": 0.3198, "step": 3053 }, { "epoch": 2.180390290337934, "grad_norm": 0.3451196551322937, "learning_rate": 2.1015621179400893e-06, "loss": 0.3726, "step": 3054 }, { "epoch": 2.181104236078058, "grad_norm": 0.3434827923774719, "learning_rate": 2.098177019271238e-06, "loss": 0.3737, "step": 3055 }, { "epoch": 2.1818181818181817, "grad_norm": 0.30263781547546387, "learning_rate": 2.0947939250145843e-06, "loss": 0.3437, "step": 3056 }, { "epoch": 2.1825321275583054, "grad_norm": 0.3148142695426941, "learning_rate": 2.0914128375069724e-06, "loss": 0.3972, "step": 3057 }, { "epoch": 2.183246073298429, "grad_norm": 0.33719977736473083, "learning_rate": 2.0880337590838617e-06, "loss": 0.3289, "step": 3058 }, { "epoch": 2.183960019038553, "grad_norm": 0.32223108410835266, "learning_rate": 2.0846566920793265e-06, "loss": 0.3547, "step": 3059 }, { "epoch": 2.1846739647786766, "grad_norm": 0.32754549384117126, "learning_rate": 2.081281638826052e-06, "loss": 0.3582, "step": 3060 }, { "epoch": 2.1853879105188008, "grad_norm": 0.32013681530952454, "learning_rate": 2.0779086016553267e-06, "loss": 0.3941, "step": 3061 }, { "epoch": 2.1861018562589245, "grad_norm": 0.31953102350234985, "learning_rate": 2.0745375828970527e-06, "loss": 0.3882, "step": 3062 }, { "epoch": 2.1868158019990482, "grad_norm": 0.3050871789455414, "learning_rate": 2.071168584879736e-06, "loss": 0.354, "step": 3063 }, { "epoch": 2.187529747739172, "grad_norm": 0.29990777373313904, "learning_rate": 2.0678016099304853e-06, "loss": 0.3672, "step": 3064 }, { "epoch": 2.1882436934792957, "grad_norm": 0.31668567657470703, "learning_rate": 2.0644366603750133e-06, "loss": 0.3736, "step": 3065 }, { "epoch": 2.1889576392194194, "grad_norm": 0.3234982192516327, "learning_rate": 2.061073738537635e-06, "loss": 0.3998, "step": 3066 }, { "epoch": 2.189671584959543, "grad_norm": 0.30414310097694397, "learning_rate": 2.0577128467412597e-06, "loss": 0.3861, "step": 3067 }, { "epoch": 2.190385530699667, "grad_norm": 0.3158671259880066, "learning_rate": 2.054353987307402e-06, "loss": 0.331, "step": 3068 }, { "epoch": 2.1910994764397906, "grad_norm": 0.3521653413772583, "learning_rate": 2.050997162556166e-06, "loss": 0.3949, "step": 3069 }, { "epoch": 2.1918134221799144, "grad_norm": 0.2971859574317932, "learning_rate": 2.047642374806252e-06, "loss": 0.3263, "step": 3070 }, { "epoch": 2.192527367920038, "grad_norm": 0.3339986801147461, "learning_rate": 2.0442896263749547e-06, "loss": 0.3831, "step": 3071 }, { "epoch": 2.193241313660162, "grad_norm": 0.3340981900691986, "learning_rate": 2.0409389195781627e-06, "loss": 0.392, "step": 3072 }, { "epoch": 2.1939552594002856, "grad_norm": 0.30113399028778076, "learning_rate": 2.0375902567303474e-06, "loss": 0.3816, "step": 3073 }, { "epoch": 2.1946692051404093, "grad_norm": 0.2953292727470398, "learning_rate": 2.034243640144573e-06, "loss": 0.3356, "step": 3074 }, { "epoch": 2.195383150880533, "grad_norm": 0.3522700071334839, "learning_rate": 2.030899072132493e-06, "loss": 0.4171, "step": 3075 }, { "epoch": 2.1960970966206568, "grad_norm": 0.2872333228588104, "learning_rate": 2.0275565550043376e-06, "loss": 0.3319, "step": 3076 }, { "epoch": 2.1968110423607805, "grad_norm": 0.31446170806884766, "learning_rate": 2.0242160910689274e-06, "loss": 0.3673, "step": 3077 }, { "epoch": 2.1975249881009042, "grad_norm": 0.3301360011100769, "learning_rate": 2.0208776826336617e-06, "loss": 0.3792, "step": 3078 }, { "epoch": 2.198238933841028, "grad_norm": 0.310077965259552, "learning_rate": 2.0175413320045238e-06, "loss": 0.3505, "step": 3079 }, { "epoch": 2.1989528795811517, "grad_norm": 0.32037970423698425, "learning_rate": 2.0142070414860704e-06, "loss": 0.3496, "step": 3080 }, { "epoch": 2.1996668253212754, "grad_norm": 0.3258970081806183, "learning_rate": 2.0108748133814347e-06, "loss": 0.3595, "step": 3081 }, { "epoch": 2.200380771061399, "grad_norm": 0.3277747333049774, "learning_rate": 2.007544649992333e-06, "loss": 0.3609, "step": 3082 }, { "epoch": 2.201094716801523, "grad_norm": 0.3119693398475647, "learning_rate": 2.004216553619045e-06, "loss": 0.3347, "step": 3083 }, { "epoch": 2.2018086625416466, "grad_norm": 0.3624415099620819, "learning_rate": 2.0008905265604316e-06, "loss": 0.395, "step": 3084 }, { "epoch": 2.2025226082817704, "grad_norm": 0.334560364484787, "learning_rate": 1.9975665711139214e-06, "loss": 0.403, "step": 3085 }, { "epoch": 2.2032365540218946, "grad_norm": 0.30009621381759644, "learning_rate": 1.994244689575508e-06, "loss": 0.322, "step": 3086 }, { "epoch": 2.2039504997620183, "grad_norm": 0.3466108739376068, "learning_rate": 1.990924884239758e-06, "loss": 0.4077, "step": 3087 }, { "epoch": 2.204664445502142, "grad_norm": 0.34048327803611755, "learning_rate": 1.9876071573998036e-06, "loss": 0.3863, "step": 3088 }, { "epoch": 2.2053783912422658, "grad_norm": 0.3180732727050781, "learning_rate": 1.9842915113473348e-06, "loss": 0.3925, "step": 3089 }, { "epoch": 2.2060923369823895, "grad_norm": 0.3187166452407837, "learning_rate": 1.980977948372612e-06, "loss": 0.3594, "step": 3090 }, { "epoch": 2.2068062827225132, "grad_norm": 0.29573261737823486, "learning_rate": 1.977666470764455e-06, "loss": 0.3374, "step": 3091 }, { "epoch": 2.207520228462637, "grad_norm": 0.3188989758491516, "learning_rate": 1.9743570808102402e-06, "loss": 0.3973, "step": 3092 }, { "epoch": 2.2082341742027607, "grad_norm": 0.3084259033203125, "learning_rate": 1.971049780795901e-06, "loss": 0.3693, "step": 3093 }, { "epoch": 2.2089481199428844, "grad_norm": 0.317142516374588, "learning_rate": 1.9677445730059348e-06, "loss": 0.3754, "step": 3094 }, { "epoch": 2.209662065683008, "grad_norm": 0.3306886553764343, "learning_rate": 1.9644414597233834e-06, "loss": 0.3489, "step": 3095 }, { "epoch": 2.210376011423132, "grad_norm": 0.33401167392730713, "learning_rate": 1.9611404432298505e-06, "loss": 0.3242, "step": 3096 }, { "epoch": 2.2110899571632556, "grad_norm": 0.3521692156791687, "learning_rate": 1.9578415258054897e-06, "loss": 0.3775, "step": 3097 }, { "epoch": 2.2118039029033794, "grad_norm": 0.34567514061927795, "learning_rate": 1.9545447097289984e-06, "loss": 0.3756, "step": 3098 }, { "epoch": 2.212517848643503, "grad_norm": 0.3099464774131775, "learning_rate": 1.9512499972776303e-06, "loss": 0.347, "step": 3099 }, { "epoch": 2.213231794383627, "grad_norm": 0.3200329840183258, "learning_rate": 1.947957390727185e-06, "loss": 0.3814, "step": 3100 }, { "epoch": 2.2139457401237506, "grad_norm": 0.29725712537765503, "learning_rate": 1.9446668923520014e-06, "loss": 0.3687, "step": 3101 }, { "epoch": 2.2146596858638743, "grad_norm": 0.3141818344593048, "learning_rate": 1.941378504424968e-06, "loss": 0.3802, "step": 3102 }, { "epoch": 2.215373631603998, "grad_norm": 0.3589174151420593, "learning_rate": 1.9380922292175153e-06, "loss": 0.4075, "step": 3103 }, { "epoch": 2.2160875773441218, "grad_norm": 0.3046371638774872, "learning_rate": 1.9348080689996112e-06, "loss": 0.3417, "step": 3104 }, { "epoch": 2.2168015230842455, "grad_norm": 0.306972473859787, "learning_rate": 1.9315260260397638e-06, "loss": 0.3211, "step": 3105 }, { "epoch": 2.2175154688243692, "grad_norm": 0.30683740973472595, "learning_rate": 1.9282461026050214e-06, "loss": 0.3494, "step": 3106 }, { "epoch": 2.218229414564493, "grad_norm": 0.33157116174697876, "learning_rate": 1.924968300960964e-06, "loss": 0.3917, "step": 3107 }, { "epoch": 2.2189433603046167, "grad_norm": 0.30851563811302185, "learning_rate": 1.9216926233717087e-06, "loss": 0.3725, "step": 3108 }, { "epoch": 2.2196573060447404, "grad_norm": 0.3226998448371887, "learning_rate": 1.918419072099908e-06, "loss": 0.343, "step": 3109 }, { "epoch": 2.220371251784864, "grad_norm": 0.3411026895046234, "learning_rate": 1.9151476494067376e-06, "loss": 0.3987, "step": 3110 }, { "epoch": 2.221085197524988, "grad_norm": 0.30215105414390564, "learning_rate": 1.911878357551911e-06, "loss": 0.3184, "step": 3111 }, { "epoch": 2.2217991432651116, "grad_norm": 0.32328107953071594, "learning_rate": 1.9086111987936677e-06, "loss": 0.3769, "step": 3112 }, { "epoch": 2.222513089005236, "grad_norm": 0.3313390016555786, "learning_rate": 1.9053461753887698e-06, "loss": 0.3668, "step": 3113 }, { "epoch": 2.2232270347453595, "grad_norm": 0.33170783519744873, "learning_rate": 1.902083289592509e-06, "loss": 0.3862, "step": 3114 }, { "epoch": 2.2239409804854833, "grad_norm": 0.29292234778404236, "learning_rate": 1.8988225436587005e-06, "loss": 0.3537, "step": 3115 }, { "epoch": 2.224654926225607, "grad_norm": 0.3177589178085327, "learning_rate": 1.8955639398396759e-06, "loss": 0.3825, "step": 3116 }, { "epoch": 2.2253688719657307, "grad_norm": 0.3067724406719208, "learning_rate": 1.892307480386295e-06, "loss": 0.3763, "step": 3117 }, { "epoch": 2.2260828177058545, "grad_norm": 0.3392550051212311, "learning_rate": 1.8890531675479296e-06, "loss": 0.4188, "step": 3118 }, { "epoch": 2.226796763445978, "grad_norm": 0.30106204748153687, "learning_rate": 1.885801003572474e-06, "loss": 0.3553, "step": 3119 }, { "epoch": 2.227510709186102, "grad_norm": 0.3266313374042511, "learning_rate": 1.8825509907063328e-06, "loss": 0.378, "step": 3120 }, { "epoch": 2.2282246549262257, "grad_norm": 0.3099631071090698, "learning_rate": 1.8793031311944294e-06, "loss": 0.3854, "step": 3121 }, { "epoch": 2.2289386006663494, "grad_norm": 0.3301216661930084, "learning_rate": 1.8760574272802002e-06, "loss": 0.3458, "step": 3122 }, { "epoch": 2.229652546406473, "grad_norm": 0.34616562724113464, "learning_rate": 1.8728138812055863e-06, "loss": 0.3729, "step": 3123 }, { "epoch": 2.230366492146597, "grad_norm": 0.3208323121070862, "learning_rate": 1.8695724952110445e-06, "loss": 0.3746, "step": 3124 }, { "epoch": 2.2310804378867206, "grad_norm": 0.3121297359466553, "learning_rate": 1.8663332715355399e-06, "loss": 0.3818, "step": 3125 }, { "epoch": 2.2317943836268443, "grad_norm": 0.3545406460762024, "learning_rate": 1.8630962124165376e-06, "loss": 0.4066, "step": 3126 }, { "epoch": 2.232508329366968, "grad_norm": 0.31082019209861755, "learning_rate": 1.8598613200900145e-06, "loss": 0.3441, "step": 3127 }, { "epoch": 2.233222275107092, "grad_norm": 0.32405564188957214, "learning_rate": 1.8566285967904462e-06, "loss": 0.3377, "step": 3128 }, { "epoch": 2.2339362208472155, "grad_norm": 0.31234249472618103, "learning_rate": 1.8533980447508138e-06, "loss": 0.3803, "step": 3129 }, { "epoch": 2.2346501665873393, "grad_norm": 0.34129035472869873, "learning_rate": 1.8501696662025937e-06, "loss": 0.4078, "step": 3130 }, { "epoch": 2.235364112327463, "grad_norm": 0.3159407675266266, "learning_rate": 1.8469434633757672e-06, "loss": 0.3738, "step": 3131 }, { "epoch": 2.2360780580675867, "grad_norm": 0.29553547501564026, "learning_rate": 1.843719438498806e-06, "loss": 0.344, "step": 3132 }, { "epoch": 2.2367920038077105, "grad_norm": 0.33445867896080017, "learning_rate": 1.8404975937986825e-06, "loss": 0.3757, "step": 3133 }, { "epoch": 2.237505949547834, "grad_norm": 0.2955077290534973, "learning_rate": 1.8372779315008627e-06, "loss": 0.3409, "step": 3134 }, { "epoch": 2.238219895287958, "grad_norm": 0.3233815133571625, "learning_rate": 1.8340604538293017e-06, "loss": 0.3786, "step": 3135 }, { "epoch": 2.2389338410280817, "grad_norm": 0.3229714334011078, "learning_rate": 1.8308451630064484e-06, "loss": 0.3565, "step": 3136 }, { "epoch": 2.2396477867682054, "grad_norm": 0.34022411704063416, "learning_rate": 1.8276320612532421e-06, "loss": 0.4025, "step": 3137 }, { "epoch": 2.2403617325083296, "grad_norm": 0.33247148990631104, "learning_rate": 1.8244211507891064e-06, "loss": 0.3925, "step": 3138 }, { "epoch": 2.2410756782484533, "grad_norm": 0.3093615472316742, "learning_rate": 1.8212124338319538e-06, "loss": 0.3405, "step": 3139 }, { "epoch": 2.241789623988577, "grad_norm": 0.3300589919090271, "learning_rate": 1.8180059125981826e-06, "loss": 0.3738, "step": 3140 }, { "epoch": 2.242503569728701, "grad_norm": 0.3333210349082947, "learning_rate": 1.8148015893026727e-06, "loss": 0.3742, "step": 3141 }, { "epoch": 2.2432175154688245, "grad_norm": 0.3170190751552582, "learning_rate": 1.8115994661587832e-06, "loss": 0.3785, "step": 3142 }, { "epoch": 2.2439314612089483, "grad_norm": 0.30098360776901245, "learning_rate": 1.8083995453783604e-06, "loss": 0.38, "step": 3143 }, { "epoch": 2.244645406949072, "grad_norm": 0.30730128288269043, "learning_rate": 1.8052018291717216e-06, "loss": 0.384, "step": 3144 }, { "epoch": 2.2453593526891957, "grad_norm": 0.30871880054473877, "learning_rate": 1.8020063197476667e-06, "loss": 0.3398, "step": 3145 }, { "epoch": 2.2460732984293195, "grad_norm": 0.3424612581729889, "learning_rate": 1.7988130193134712e-06, "loss": 0.3767, "step": 3146 }, { "epoch": 2.246787244169443, "grad_norm": 0.32774800062179565, "learning_rate": 1.7956219300748796e-06, "loss": 0.3757, "step": 3147 }, { "epoch": 2.247501189909567, "grad_norm": 0.3173527121543884, "learning_rate": 1.7924330542361151e-06, "loss": 0.3347, "step": 3148 }, { "epoch": 2.2482151356496907, "grad_norm": 0.3313104808330536, "learning_rate": 1.78924639399987e-06, "loss": 0.3728, "step": 3149 }, { "epoch": 2.2489290813898144, "grad_norm": 0.2881266176700592, "learning_rate": 1.7860619515673034e-06, "loss": 0.3658, "step": 3150 }, { "epoch": 2.249643027129938, "grad_norm": 0.2980828881263733, "learning_rate": 1.7828797291380456e-06, "loss": 0.3651, "step": 3151 }, { "epoch": 2.250356972870062, "grad_norm": 0.32741037011146545, "learning_rate": 1.7796997289101947e-06, "loss": 0.3624, "step": 3152 }, { "epoch": 2.2510709186101856, "grad_norm": 0.3535408079624176, "learning_rate": 1.7765219530803101e-06, "loss": 0.3917, "step": 3153 }, { "epoch": 2.2517848643503093, "grad_norm": 0.3289972245693207, "learning_rate": 1.7733464038434145e-06, "loss": 0.3856, "step": 3154 }, { "epoch": 2.252498810090433, "grad_norm": 0.32745999097824097, "learning_rate": 1.770173083392997e-06, "loss": 0.3585, "step": 3155 }, { "epoch": 2.253212755830557, "grad_norm": 0.3297242820262909, "learning_rate": 1.7670019939210025e-06, "loss": 0.3879, "step": 3156 }, { "epoch": 2.2539267015706805, "grad_norm": 0.296766459941864, "learning_rate": 1.7638331376178385e-06, "loss": 0.3758, "step": 3157 }, { "epoch": 2.2546406473108043, "grad_norm": 0.30405062437057495, "learning_rate": 1.7606665166723674e-06, "loss": 0.3604, "step": 3158 }, { "epoch": 2.255354593050928, "grad_norm": 0.32584893703460693, "learning_rate": 1.7575021332719117e-06, "loss": 0.3625, "step": 3159 }, { "epoch": 2.2560685387910517, "grad_norm": 0.32027190923690796, "learning_rate": 1.7543399896022406e-06, "loss": 0.3428, "step": 3160 }, { "epoch": 2.2567824845311755, "grad_norm": 0.30972662568092346, "learning_rate": 1.7511800878475832e-06, "loss": 0.3556, "step": 3161 }, { "epoch": 2.257496430271299, "grad_norm": 0.3106229305267334, "learning_rate": 1.748022430190619e-06, "loss": 0.4015, "step": 3162 }, { "epoch": 2.258210376011423, "grad_norm": 0.2998780310153961, "learning_rate": 1.7448670188124727e-06, "loss": 0.3714, "step": 3163 }, { "epoch": 2.2589243217515467, "grad_norm": 0.3022858500480652, "learning_rate": 1.7417138558927244e-06, "loss": 0.3518, "step": 3164 }, { "epoch": 2.2596382674916704, "grad_norm": 0.3597790598869324, "learning_rate": 1.7385629436093958e-06, "loss": 0.3965, "step": 3165 }, { "epoch": 2.260352213231794, "grad_norm": 0.31283387541770935, "learning_rate": 1.7354142841389537e-06, "loss": 0.3751, "step": 3166 }, { "epoch": 2.2610661589719183, "grad_norm": 0.3165439963340759, "learning_rate": 1.7322678796563124e-06, "loss": 0.4092, "step": 3167 }, { "epoch": 2.261780104712042, "grad_norm": 0.30886438488960266, "learning_rate": 1.7291237323348287e-06, "loss": 0.365, "step": 3168 }, { "epoch": 2.262494050452166, "grad_norm": 0.32916638255119324, "learning_rate": 1.7259818443462955e-06, "loss": 0.3981, "step": 3169 }, { "epoch": 2.2632079961922895, "grad_norm": 0.30567121505737305, "learning_rate": 1.7228422178609488e-06, "loss": 0.3584, "step": 3170 }, { "epoch": 2.2639219419324133, "grad_norm": 0.3113281726837158, "learning_rate": 1.7197048550474643e-06, "loss": 0.3738, "step": 3171 }, { "epoch": 2.264635887672537, "grad_norm": 0.31449511647224426, "learning_rate": 1.716569758072948e-06, "loss": 0.3759, "step": 3172 }, { "epoch": 2.2653498334126607, "grad_norm": 0.30202192068099976, "learning_rate": 1.7134369291029456e-06, "loss": 0.3606, "step": 3173 }, { "epoch": 2.2660637791527845, "grad_norm": 0.308538556098938, "learning_rate": 1.7103063703014372e-06, "loss": 0.3687, "step": 3174 }, { "epoch": 2.266777724892908, "grad_norm": 0.33726397156715393, "learning_rate": 1.707178083830829e-06, "loss": 0.3973, "step": 3175 }, { "epoch": 2.267491670633032, "grad_norm": 0.32160940766334534, "learning_rate": 1.7040520718519644e-06, "loss": 0.3269, "step": 3176 }, { "epoch": 2.2682056163731557, "grad_norm": 0.3278031647205353, "learning_rate": 1.7009283365241086e-06, "loss": 0.3866, "step": 3177 }, { "epoch": 2.2689195621132794, "grad_norm": 0.3194325268268585, "learning_rate": 1.6978068800049624e-06, "loss": 0.357, "step": 3178 }, { "epoch": 2.269633507853403, "grad_norm": 0.31157365441322327, "learning_rate": 1.6946877044506443e-06, "loss": 0.3646, "step": 3179 }, { "epoch": 2.270347453593527, "grad_norm": 0.3117707371711731, "learning_rate": 1.6915708120157042e-06, "loss": 0.3769, "step": 3180 }, { "epoch": 2.2710613993336506, "grad_norm": 0.35465526580810547, "learning_rate": 1.6884562048531089e-06, "loss": 0.3991, "step": 3181 }, { "epoch": 2.2717753450737743, "grad_norm": 0.3193246126174927, "learning_rate": 1.6853438851142517e-06, "loss": 0.3508, "step": 3182 }, { "epoch": 2.272489290813898, "grad_norm": 0.3156697154045105, "learning_rate": 1.6822338549489447e-06, "loss": 0.3304, "step": 3183 }, { "epoch": 2.273203236554022, "grad_norm": 0.33044975996017456, "learning_rate": 1.679126116505415e-06, "loss": 0.356, "step": 3184 }, { "epoch": 2.2739171822941455, "grad_norm": 0.3125957250595093, "learning_rate": 1.6760206719303107e-06, "loss": 0.349, "step": 3185 }, { "epoch": 2.2746311280342693, "grad_norm": 0.320894330739975, "learning_rate": 1.6729175233686957e-06, "loss": 0.3882, "step": 3186 }, { "epoch": 2.275345073774393, "grad_norm": 0.3128517270088196, "learning_rate": 1.6698166729640425e-06, "loss": 0.3581, "step": 3187 }, { "epoch": 2.2760590195145167, "grad_norm": 0.3427741527557373, "learning_rate": 1.666718122858244e-06, "loss": 0.3777, "step": 3188 }, { "epoch": 2.2767729652546405, "grad_norm": 0.31792664527893066, "learning_rate": 1.6636218751915973e-06, "loss": 0.347, "step": 3189 }, { "epoch": 2.2774869109947646, "grad_norm": 0.327566534280777, "learning_rate": 1.6605279321028138e-06, "loss": 0.3415, "step": 3190 }, { "epoch": 2.2782008567348884, "grad_norm": 0.32561957836151123, "learning_rate": 1.6574362957290091e-06, "loss": 0.3835, "step": 3191 }, { "epoch": 2.278914802475012, "grad_norm": 0.3182944357395172, "learning_rate": 1.6543469682057105e-06, "loss": 0.3779, "step": 3192 }, { "epoch": 2.279628748215136, "grad_norm": 0.36268308758735657, "learning_rate": 1.6512599516668443e-06, "loss": 0.3957, "step": 3193 }, { "epoch": 2.2803426939552596, "grad_norm": 0.3296567499637604, "learning_rate": 1.648175248244745e-06, "loss": 0.377, "step": 3194 }, { "epoch": 2.2810566396953833, "grad_norm": 0.29413026571273804, "learning_rate": 1.6450928600701505e-06, "loss": 0.3089, "step": 3195 }, { "epoch": 2.281770585435507, "grad_norm": 0.33130383491516113, "learning_rate": 1.6420127892721926e-06, "loss": 0.3723, "step": 3196 }, { "epoch": 2.2824845311756308, "grad_norm": 0.34188342094421387, "learning_rate": 1.6389350379784097e-06, "loss": 0.373, "step": 3197 }, { "epoch": 2.2831984769157545, "grad_norm": 0.3285056948661804, "learning_rate": 1.6358596083147342e-06, "loss": 0.357, "step": 3198 }, { "epoch": 2.2839124226558782, "grad_norm": 0.30042386054992676, "learning_rate": 1.6327865024054984e-06, "loss": 0.3555, "step": 3199 }, { "epoch": 2.284626368396002, "grad_norm": 0.32933545112609863, "learning_rate": 1.6297157223734228e-06, "loss": 0.3717, "step": 3200 }, { "epoch": 2.2853403141361257, "grad_norm": 0.30113887786865234, "learning_rate": 1.6266472703396286e-06, "loss": 0.376, "step": 3201 }, { "epoch": 2.2860542598762494, "grad_norm": 0.3111390769481659, "learning_rate": 1.6235811484236247e-06, "loss": 0.3877, "step": 3202 }, { "epoch": 2.286768205616373, "grad_norm": 0.3044652044773102, "learning_rate": 1.6205173587433094e-06, "loss": 0.3292, "step": 3203 }, { "epoch": 2.287482151356497, "grad_norm": 0.3641783893108368, "learning_rate": 1.617455903414974e-06, "loss": 0.416, "step": 3204 }, { "epoch": 2.2881960970966206, "grad_norm": 0.30634692311286926, "learning_rate": 1.614396784553297e-06, "loss": 0.3503, "step": 3205 }, { "epoch": 2.2889100428367444, "grad_norm": 0.31968289613723755, "learning_rate": 1.611340004271339e-06, "loss": 0.3976, "step": 3206 }, { "epoch": 2.289623988576868, "grad_norm": 0.3156765401363373, "learning_rate": 1.6082855646805485e-06, "loss": 0.4039, "step": 3207 }, { "epoch": 2.290337934316992, "grad_norm": 0.3067564070224762, "learning_rate": 1.6052334678907583e-06, "loss": 0.384, "step": 3208 }, { "epoch": 2.2910518800571156, "grad_norm": 0.2873035967350006, "learning_rate": 1.6021837160101783e-06, "loss": 0.3675, "step": 3209 }, { "epoch": 2.2917658257972393, "grad_norm": 0.33663269877433777, "learning_rate": 1.5991363111454023e-06, "loss": 0.4024, "step": 3210 }, { "epoch": 2.292479771537363, "grad_norm": 0.2841077148914337, "learning_rate": 1.5960912554014047e-06, "loss": 0.3436, "step": 3211 }, { "epoch": 2.2931937172774868, "grad_norm": 0.3052608370780945, "learning_rate": 1.5930485508815302e-06, "loss": 0.3546, "step": 3212 }, { "epoch": 2.2939076630176105, "grad_norm": 0.3115614652633667, "learning_rate": 1.5900081996875083e-06, "loss": 0.3619, "step": 3213 }, { "epoch": 2.2946216087577342, "grad_norm": 0.32519757747650146, "learning_rate": 1.5869702039194357e-06, "loss": 0.3706, "step": 3214 }, { "epoch": 2.295335554497858, "grad_norm": 0.35075098276138306, "learning_rate": 1.5839345656757844e-06, "loss": 0.406, "step": 3215 }, { "epoch": 2.2960495002379817, "grad_norm": 0.30977508425712585, "learning_rate": 1.5809012870533996e-06, "loss": 0.3589, "step": 3216 }, { "epoch": 2.2967634459781054, "grad_norm": 0.3378070890903473, "learning_rate": 1.5778703701474969e-06, "loss": 0.4321, "step": 3217 }, { "epoch": 2.297477391718229, "grad_norm": 0.3020024597644806, "learning_rate": 1.574841817051656e-06, "loss": 0.3279, "step": 3218 }, { "epoch": 2.2981913374583534, "grad_norm": 0.3483741879463196, "learning_rate": 1.571815629857829e-06, "loss": 0.3836, "step": 3219 }, { "epoch": 2.298905283198477, "grad_norm": 0.29773107171058655, "learning_rate": 1.5687918106563326e-06, "loss": 0.3217, "step": 3220 }, { "epoch": 2.299619228938601, "grad_norm": 0.2972620725631714, "learning_rate": 1.565770361535845e-06, "loss": 0.3501, "step": 3221 }, { "epoch": 2.3003331746787246, "grad_norm": 0.29882651567459106, "learning_rate": 1.5627512845834092e-06, "loss": 0.3737, "step": 3222 }, { "epoch": 2.3010471204188483, "grad_norm": 0.30821913480758667, "learning_rate": 1.5597345818844323e-06, "loss": 0.3889, "step": 3223 }, { "epoch": 2.301761066158972, "grad_norm": 0.31530851125717163, "learning_rate": 1.5567202555226756e-06, "loss": 0.3706, "step": 3224 }, { "epoch": 2.3024750118990958, "grad_norm": 0.3204335570335388, "learning_rate": 1.553708307580265e-06, "loss": 0.3429, "step": 3225 }, { "epoch": 2.3031889576392195, "grad_norm": 0.31654226779937744, "learning_rate": 1.5506987401376794e-06, "loss": 0.3723, "step": 3226 }, { "epoch": 2.3039029033793432, "grad_norm": 0.3099775016307831, "learning_rate": 1.5476915552737532e-06, "loss": 0.3474, "step": 3227 }, { "epoch": 2.304616849119467, "grad_norm": 0.34430843591690063, "learning_rate": 1.544686755065677e-06, "loss": 0.4094, "step": 3228 }, { "epoch": 2.3053307948595907, "grad_norm": 0.3079422116279602, "learning_rate": 1.5416843415889965e-06, "loss": 0.3485, "step": 3229 }, { "epoch": 2.3060447405997144, "grad_norm": 0.32381075620651245, "learning_rate": 1.5386843169176025e-06, "loss": 0.4008, "step": 3230 }, { "epoch": 2.306758686339838, "grad_norm": 0.2838936150074005, "learning_rate": 1.535686683123741e-06, "loss": 0.3122, "step": 3231 }, { "epoch": 2.307472632079962, "grad_norm": 0.3262771964073181, "learning_rate": 1.532691442278006e-06, "loss": 0.3737, "step": 3232 }, { "epoch": 2.3081865778200856, "grad_norm": 0.3015210032463074, "learning_rate": 1.5296985964493344e-06, "loss": 0.3562, "step": 3233 }, { "epoch": 2.3089005235602094, "grad_norm": 0.3247354030609131, "learning_rate": 1.5267081477050132e-06, "loss": 0.3711, "step": 3234 }, { "epoch": 2.309614469300333, "grad_norm": 0.3244098722934723, "learning_rate": 1.5237200981106741e-06, "loss": 0.3794, "step": 3235 }, { "epoch": 2.310328415040457, "grad_norm": 0.27465254068374634, "learning_rate": 1.5207344497302862e-06, "loss": 0.3126, "step": 3236 }, { "epoch": 2.3110423607805806, "grad_norm": 0.30310410261154175, "learning_rate": 1.5177512046261667e-06, "loss": 0.3802, "step": 3237 }, { "epoch": 2.3117563065207043, "grad_norm": 0.30037254095077515, "learning_rate": 1.5147703648589663e-06, "loss": 0.4109, "step": 3238 }, { "epoch": 2.312470252260828, "grad_norm": 0.29890236258506775, "learning_rate": 1.5117919324876807e-06, "loss": 0.3428, "step": 3239 }, { "epoch": 2.3131841980009518, "grad_norm": 0.3358297049999237, "learning_rate": 1.5088159095696365e-06, "loss": 0.3511, "step": 3240 }, { "epoch": 2.3138981437410755, "grad_norm": 0.34607386589050293, "learning_rate": 1.5058422981604998e-06, "loss": 0.368, "step": 3241 }, { "epoch": 2.3146120894811997, "grad_norm": 0.3394908905029297, "learning_rate": 1.5028711003142725e-06, "loss": 0.3886, "step": 3242 }, { "epoch": 2.3153260352213234, "grad_norm": 0.3194403052330017, "learning_rate": 1.4999023180832834e-06, "loss": 0.3942, "step": 3243 }, { "epoch": 2.316039980961447, "grad_norm": 0.32160699367523193, "learning_rate": 1.4969359535181977e-06, "loss": 0.3638, "step": 3244 }, { "epoch": 2.316753926701571, "grad_norm": 0.331973135471344, "learning_rate": 1.4939720086680116e-06, "loss": 0.3372, "step": 3245 }, { "epoch": 2.3174678724416946, "grad_norm": 0.3297324478626251, "learning_rate": 1.4910104855800429e-06, "loss": 0.4044, "step": 3246 }, { "epoch": 2.3181818181818183, "grad_norm": 0.2832113802433014, "learning_rate": 1.4880513862999441e-06, "loss": 0.3088, "step": 3247 }, { "epoch": 2.318895763921942, "grad_norm": 0.3290097713470459, "learning_rate": 1.4850947128716914e-06, "loss": 0.4239, "step": 3248 }, { "epoch": 2.319609709662066, "grad_norm": 0.30601778626441956, "learning_rate": 1.4821404673375838e-06, "loss": 0.3354, "step": 3249 }, { "epoch": 2.3203236554021895, "grad_norm": 0.32143434882164, "learning_rate": 1.4791886517382415e-06, "loss": 0.3958, "step": 3250 }, { "epoch": 2.3210376011423133, "grad_norm": 0.30371996760368347, "learning_rate": 1.476239268112612e-06, "loss": 0.3359, "step": 3251 }, { "epoch": 2.321751546882437, "grad_norm": 0.3014746308326721, "learning_rate": 1.4732923184979563e-06, "loss": 0.3512, "step": 3252 }, { "epoch": 2.3224654926225607, "grad_norm": 0.3074491620063782, "learning_rate": 1.4703478049298604e-06, "loss": 0.357, "step": 3253 }, { "epoch": 2.3231794383626845, "grad_norm": 0.3306367099285126, "learning_rate": 1.4674057294422245e-06, "loss": 0.4214, "step": 3254 }, { "epoch": 2.323893384102808, "grad_norm": 0.31929782032966614, "learning_rate": 1.4644660940672628e-06, "loss": 0.3738, "step": 3255 }, { "epoch": 2.324607329842932, "grad_norm": 0.3244269788265228, "learning_rate": 1.4615289008355077e-06, "loss": 0.3999, "step": 3256 }, { "epoch": 2.3253212755830557, "grad_norm": 0.28336966037750244, "learning_rate": 1.458594151775804e-06, "loss": 0.3367, "step": 3257 }, { "epoch": 2.3260352213231794, "grad_norm": 0.32148173451423645, "learning_rate": 1.455661848915305e-06, "loss": 0.3889, "step": 3258 }, { "epoch": 2.326749167063303, "grad_norm": 0.32394590973854065, "learning_rate": 1.4527319942794776e-06, "loss": 0.3661, "step": 3259 }, { "epoch": 2.327463112803427, "grad_norm": 0.3378584384918213, "learning_rate": 1.4498045898920988e-06, "loss": 0.3915, "step": 3260 }, { "epoch": 2.3281770585435506, "grad_norm": 0.3018362522125244, "learning_rate": 1.446879637775247e-06, "loss": 0.3243, "step": 3261 }, { "epoch": 2.3288910042836743, "grad_norm": 0.32550713419914246, "learning_rate": 1.4439571399493146e-06, "loss": 0.4207, "step": 3262 }, { "epoch": 2.329604950023798, "grad_norm": 0.31254905462265015, "learning_rate": 1.441037098432993e-06, "loss": 0.3775, "step": 3263 }, { "epoch": 2.330318895763922, "grad_norm": 0.3186340928077698, "learning_rate": 1.438119515243277e-06, "loss": 0.3697, "step": 3264 }, { "epoch": 2.3310328415040455, "grad_norm": 0.3248949944972992, "learning_rate": 1.4352043923954666e-06, "loss": 0.3969, "step": 3265 }, { "epoch": 2.3317467872441693, "grad_norm": 0.31037217378616333, "learning_rate": 1.4322917319031626e-06, "loss": 0.3639, "step": 3266 }, { "epoch": 2.332460732984293, "grad_norm": 0.32751545310020447, "learning_rate": 1.4293815357782592e-06, "loss": 0.3635, "step": 3267 }, { "epoch": 2.3331746787244168, "grad_norm": 0.32149484753608704, "learning_rate": 1.4264738060309551e-06, "loss": 0.4181, "step": 3268 }, { "epoch": 2.3338886244645405, "grad_norm": 0.29442915320396423, "learning_rate": 1.4235685446697433e-06, "loss": 0.3691, "step": 3269 }, { "epoch": 2.334602570204664, "grad_norm": 0.29248425364494324, "learning_rate": 1.4206657537014078e-06, "loss": 0.3623, "step": 3270 }, { "epoch": 2.335316515944788, "grad_norm": 0.31392431259155273, "learning_rate": 1.4177654351310316e-06, "loss": 0.3946, "step": 3271 }, { "epoch": 2.336030461684912, "grad_norm": 0.3032830059528351, "learning_rate": 1.414867590961989e-06, "loss": 0.348, "step": 3272 }, { "epoch": 2.336744407425036, "grad_norm": 0.3233291208744049, "learning_rate": 1.4119722231959405e-06, "loss": 0.3767, "step": 3273 }, { "epoch": 2.3374583531651596, "grad_norm": 0.3411926329135895, "learning_rate": 1.409079333832843e-06, "loss": 0.3726, "step": 3274 }, { "epoch": 2.3381722989052833, "grad_norm": 0.3385508954524994, "learning_rate": 1.4061889248709343e-06, "loss": 0.4065, "step": 3275 }, { "epoch": 2.338886244645407, "grad_norm": 0.29721567034721375, "learning_rate": 1.4033009983067454e-06, "loss": 0.3701, "step": 3276 }, { "epoch": 2.339600190385531, "grad_norm": 0.2942933142185211, "learning_rate": 1.4004155561350874e-06, "loss": 0.3549, "step": 3277 }, { "epoch": 2.3403141361256545, "grad_norm": 0.3077782094478607, "learning_rate": 1.397532600349058e-06, "loss": 0.3702, "step": 3278 }, { "epoch": 2.3410280818657783, "grad_norm": 0.297897070646286, "learning_rate": 1.3946521329400397e-06, "loss": 0.3337, "step": 3279 }, { "epoch": 2.341742027605902, "grad_norm": 0.3552298843860626, "learning_rate": 1.3917741558976894e-06, "loss": 0.4012, "step": 3280 }, { "epoch": 2.3424559733460257, "grad_norm": 0.3262600600719452, "learning_rate": 1.3888986712099506e-06, "loss": 0.3949, "step": 3281 }, { "epoch": 2.3431699190861495, "grad_norm": 0.29478880763053894, "learning_rate": 1.3860256808630429e-06, "loss": 0.3378, "step": 3282 }, { "epoch": 2.343883864826273, "grad_norm": 0.31052976846694946, "learning_rate": 1.38315518684146e-06, "loss": 0.3568, "step": 3283 }, { "epoch": 2.344597810566397, "grad_norm": 0.31571969389915466, "learning_rate": 1.3802871911279752e-06, "loss": 0.3937, "step": 3284 }, { "epoch": 2.3453117563065207, "grad_norm": 0.2926078140735626, "learning_rate": 1.3774216957036368e-06, "loss": 0.3726, "step": 3285 }, { "epoch": 2.3460257020466444, "grad_norm": 0.3061274588108063, "learning_rate": 1.3745587025477619e-06, "loss": 0.4083, "step": 3286 }, { "epoch": 2.346739647786768, "grad_norm": 0.29176071286201477, "learning_rate": 1.3716982136379402e-06, "loss": 0.3303, "step": 3287 }, { "epoch": 2.347453593526892, "grad_norm": 0.3038147985935211, "learning_rate": 1.3688402309500353e-06, "loss": 0.351, "step": 3288 }, { "epoch": 2.3481675392670156, "grad_norm": 0.31772053241729736, "learning_rate": 1.3659847564581746e-06, "loss": 0.3914, "step": 3289 }, { "epoch": 2.3488814850071393, "grad_norm": 0.3005426824092865, "learning_rate": 1.3631317921347564e-06, "loss": 0.3594, "step": 3290 }, { "epoch": 2.349595430747263, "grad_norm": 0.3125799894332886, "learning_rate": 1.360281339950446e-06, "loss": 0.3798, "step": 3291 }, { "epoch": 2.350309376487387, "grad_norm": 0.28415918350219727, "learning_rate": 1.3574334018741685e-06, "loss": 0.3412, "step": 3292 }, { "epoch": 2.3510233222275105, "grad_norm": 0.33047863841056824, "learning_rate": 1.3545879798731165e-06, "loss": 0.3732, "step": 3293 }, { "epoch": 2.3517372679676343, "grad_norm": 0.30380329489707947, "learning_rate": 1.351745075912746e-06, "loss": 0.3431, "step": 3294 }, { "epoch": 2.3524512137077584, "grad_norm": 0.35068100690841675, "learning_rate": 1.3489046919567683e-06, "loss": 0.4081, "step": 3295 }, { "epoch": 2.353165159447882, "grad_norm": 0.3263034224510193, "learning_rate": 1.3460668299671576e-06, "loss": 0.3712, "step": 3296 }, { "epoch": 2.353879105188006, "grad_norm": 0.3307804763317108, "learning_rate": 1.3432314919041478e-06, "loss": 0.4023, "step": 3297 }, { "epoch": 2.3545930509281296, "grad_norm": 0.33391454815864563, "learning_rate": 1.3403986797262254e-06, "loss": 0.351, "step": 3298 }, { "epoch": 2.3553069966682534, "grad_norm": 0.33166882395744324, "learning_rate": 1.3375683953901325e-06, "loss": 0.3701, "step": 3299 }, { "epoch": 2.356020942408377, "grad_norm": 0.3363358974456787, "learning_rate": 1.3347406408508695e-06, "loss": 0.3709, "step": 3300 }, { "epoch": 2.356734888148501, "grad_norm": 0.2978459298610687, "learning_rate": 1.3319154180616838e-06, "loss": 0.3442, "step": 3301 }, { "epoch": 2.3574488338886246, "grad_norm": 0.2825360894203186, "learning_rate": 1.3290927289740773e-06, "loss": 0.3739, "step": 3302 }, { "epoch": 2.3581627796287483, "grad_norm": 0.31953367590904236, "learning_rate": 1.326272575537803e-06, "loss": 0.3934, "step": 3303 }, { "epoch": 2.358876725368872, "grad_norm": 0.33753442764282227, "learning_rate": 1.3234549597008572e-06, "loss": 0.3744, "step": 3304 }, { "epoch": 2.359590671108996, "grad_norm": 0.32387807965278625, "learning_rate": 1.3206398834094886e-06, "loss": 0.3441, "step": 3305 }, { "epoch": 2.3603046168491195, "grad_norm": 0.3406265079975128, "learning_rate": 1.317827348608191e-06, "loss": 0.4013, "step": 3306 }, { "epoch": 2.3610185625892433, "grad_norm": 0.3092511296272278, "learning_rate": 1.3150173572396985e-06, "loss": 0.35, "step": 3307 }, { "epoch": 2.361732508329367, "grad_norm": 0.32707446813583374, "learning_rate": 1.3122099112449926e-06, "loss": 0.3914, "step": 3308 }, { "epoch": 2.3624464540694907, "grad_norm": 0.307940810918808, "learning_rate": 1.3094050125632973e-06, "loss": 0.3528, "step": 3309 }, { "epoch": 2.3631603998096145, "grad_norm": 0.3110640347003937, "learning_rate": 1.3066026631320733e-06, "loss": 0.3871, "step": 3310 }, { "epoch": 2.363874345549738, "grad_norm": 0.3150292932987213, "learning_rate": 1.3038028648870204e-06, "loss": 0.3696, "step": 3311 }, { "epoch": 2.364588291289862, "grad_norm": 0.3511156439781189, "learning_rate": 1.3010056197620813e-06, "loss": 0.3904, "step": 3312 }, { "epoch": 2.3653022370299857, "grad_norm": 0.30848604440689087, "learning_rate": 1.298210929689429e-06, "loss": 0.3467, "step": 3313 }, { "epoch": 2.3660161827701094, "grad_norm": 0.33009716868400574, "learning_rate": 1.2954187965994758e-06, "loss": 0.392, "step": 3314 }, { "epoch": 2.366730128510233, "grad_norm": 0.3421522378921509, "learning_rate": 1.2926292224208664e-06, "loss": 0.3925, "step": 3315 }, { "epoch": 2.367444074250357, "grad_norm": 0.2989375591278076, "learning_rate": 1.2898422090804792e-06, "loss": 0.334, "step": 3316 }, { "epoch": 2.3681580199904806, "grad_norm": 0.340727835893631, "learning_rate": 1.28705775850342e-06, "loss": 0.4213, "step": 3317 }, { "epoch": 2.3688719657306043, "grad_norm": 0.28640398383140564, "learning_rate": 1.2842758726130283e-06, "loss": 0.3416, "step": 3318 }, { "epoch": 2.369585911470728, "grad_norm": 0.28490397334098816, "learning_rate": 1.281496553330872e-06, "loss": 0.358, "step": 3319 }, { "epoch": 2.370299857210852, "grad_norm": 0.3013041913509369, "learning_rate": 1.2787198025767417e-06, "loss": 0.3557, "step": 3320 }, { "epoch": 2.3710138029509755, "grad_norm": 0.3352706730365753, "learning_rate": 1.27594562226866e-06, "loss": 0.3686, "step": 3321 }, { "epoch": 2.3717277486910993, "grad_norm": 0.3300008475780487, "learning_rate": 1.2731740143228683e-06, "loss": 0.4106, "step": 3322 }, { "epoch": 2.372441694431223, "grad_norm": 0.31866806745529175, "learning_rate": 1.270404980653836e-06, "loss": 0.3703, "step": 3323 }, { "epoch": 2.373155640171347, "grad_norm": 0.34348803758621216, "learning_rate": 1.2676385231742493e-06, "loss": 0.3405, "step": 3324 }, { "epoch": 2.373869585911471, "grad_norm": 0.31288599967956543, "learning_rate": 1.264874643795021e-06, "loss": 0.3752, "step": 3325 }, { "epoch": 2.3745835316515946, "grad_norm": 0.303318589925766, "learning_rate": 1.262113344425277e-06, "loss": 0.4022, "step": 3326 }, { "epoch": 2.3752974773917184, "grad_norm": 0.3101738691329956, "learning_rate": 1.259354626972365e-06, "loss": 0.37, "step": 3327 }, { "epoch": 2.376011423131842, "grad_norm": 0.3248431384563446, "learning_rate": 1.2565984933418495e-06, "loss": 0.3812, "step": 3328 }, { "epoch": 2.376725368871966, "grad_norm": 0.2997415065765381, "learning_rate": 1.2538449454375063e-06, "loss": 0.3792, "step": 3329 }, { "epoch": 2.3774393146120896, "grad_norm": 0.3076637089252472, "learning_rate": 1.2510939851613285e-06, "loss": 0.3873, "step": 3330 }, { "epoch": 2.3781532603522133, "grad_norm": 0.3082558214664459, "learning_rate": 1.2483456144135237e-06, "loss": 0.3567, "step": 3331 }, { "epoch": 2.378867206092337, "grad_norm": 0.29710885882377625, "learning_rate": 1.2455998350925042e-06, "loss": 0.3866, "step": 3332 }, { "epoch": 2.3795811518324608, "grad_norm": 0.2991340160369873, "learning_rate": 1.242856649094899e-06, "loss": 0.3766, "step": 3333 }, { "epoch": 2.3802950975725845, "grad_norm": 0.3272038996219635, "learning_rate": 1.2401160583155407e-06, "loss": 0.4044, "step": 3334 }, { "epoch": 2.3810090433127082, "grad_norm": 0.312645822763443, "learning_rate": 1.2373780646474748e-06, "loss": 0.3807, "step": 3335 }, { "epoch": 2.381722989052832, "grad_norm": 0.2990264296531677, "learning_rate": 1.234642669981946e-06, "loss": 0.3806, "step": 3336 }, { "epoch": 2.3824369347929557, "grad_norm": 0.30579274892807007, "learning_rate": 1.2319098762084104e-06, "loss": 0.3441, "step": 3337 }, { "epoch": 2.3831508805330794, "grad_norm": 0.31674233078956604, "learning_rate": 1.2291796852145216e-06, "loss": 0.3376, "step": 3338 }, { "epoch": 2.383864826273203, "grad_norm": 0.33739492297172546, "learning_rate": 1.22645209888614e-06, "loss": 0.3851, "step": 3339 }, { "epoch": 2.384578772013327, "grad_norm": 0.29859209060668945, "learning_rate": 1.223727119107327e-06, "loss": 0.3468, "step": 3340 }, { "epoch": 2.3852927177534506, "grad_norm": 0.3249276280403137, "learning_rate": 1.2210047477603376e-06, "loss": 0.3531, "step": 3341 }, { "epoch": 2.3860066634935744, "grad_norm": 0.31469109654426575, "learning_rate": 1.218284986725632e-06, "loss": 0.4288, "step": 3342 }, { "epoch": 2.386720609233698, "grad_norm": 0.32021811604499817, "learning_rate": 1.2155678378818652e-06, "loss": 0.378, "step": 3343 }, { "epoch": 2.387434554973822, "grad_norm": 0.3337295651435852, "learning_rate": 1.2128533031058838e-06, "loss": 0.4129, "step": 3344 }, { "epoch": 2.3881485007139456, "grad_norm": 0.317818820476532, "learning_rate": 1.2101413842727345e-06, "loss": 0.358, "step": 3345 }, { "epoch": 2.3888624464540693, "grad_norm": 0.3047606647014618, "learning_rate": 1.2074320832556558e-06, "loss": 0.3442, "step": 3346 }, { "epoch": 2.3895763921941935, "grad_norm": 0.3096768260002136, "learning_rate": 1.2047254019260745e-06, "loss": 0.3932, "step": 3347 }, { "epoch": 2.390290337934317, "grad_norm": 0.31763818860054016, "learning_rate": 1.2020213421536103e-06, "loss": 0.3706, "step": 3348 }, { "epoch": 2.391004283674441, "grad_norm": 0.3003665506839752, "learning_rate": 1.1993199058060734e-06, "loss": 0.3444, "step": 3349 }, { "epoch": 2.3917182294145647, "grad_norm": 0.3185729384422302, "learning_rate": 1.1966210947494583e-06, "loss": 0.3573, "step": 3350 }, { "epoch": 2.3924321751546884, "grad_norm": 0.3179927170276642, "learning_rate": 1.1939249108479495e-06, "loss": 0.3869, "step": 3351 }, { "epoch": 2.393146120894812, "grad_norm": 0.31660208106040955, "learning_rate": 1.1912313559639172e-06, "loss": 0.3702, "step": 3352 }, { "epoch": 2.393860066634936, "grad_norm": 0.3210875988006592, "learning_rate": 1.1885404319579108e-06, "loss": 0.3842, "step": 3353 }, { "epoch": 2.3945740123750596, "grad_norm": 0.2978332042694092, "learning_rate": 1.1858521406886674e-06, "loss": 0.3625, "step": 3354 }, { "epoch": 2.3952879581151834, "grad_norm": 0.3231465518474579, "learning_rate": 1.1831664840131035e-06, "loss": 0.4017, "step": 3355 }, { "epoch": 2.396001903855307, "grad_norm": 0.32814693450927734, "learning_rate": 1.1804834637863176e-06, "loss": 0.367, "step": 3356 }, { "epoch": 2.396715849595431, "grad_norm": 0.3598434031009674, "learning_rate": 1.1778030818615827e-06, "loss": 0.3595, "step": 3357 }, { "epoch": 2.3974297953355546, "grad_norm": 0.32664820551872253, "learning_rate": 1.1751253400903551e-06, "loss": 0.3887, "step": 3358 }, { "epoch": 2.3981437410756783, "grad_norm": 0.28529471158981323, "learning_rate": 1.1724502403222631e-06, "loss": 0.3144, "step": 3359 }, { "epoch": 2.398857686815802, "grad_norm": 0.3144751489162445, "learning_rate": 1.1697777844051105e-06, "loss": 0.3638, "step": 3360 }, { "epoch": 2.3995716325559258, "grad_norm": 0.2745177745819092, "learning_rate": 1.1671079741848767e-06, "loss": 0.3053, "step": 3361 }, { "epoch": 2.4002855782960495, "grad_norm": 0.3060501217842102, "learning_rate": 1.1644408115057155e-06, "loss": 0.3871, "step": 3362 }, { "epoch": 2.4009995240361732, "grad_norm": 0.3019615411758423, "learning_rate": 1.1617762982099446e-06, "loss": 0.3791, "step": 3363 }, { "epoch": 2.401713469776297, "grad_norm": 0.3165585398674011, "learning_rate": 1.159114436138059e-06, "loss": 0.3618, "step": 3364 }, { "epoch": 2.4024274155164207, "grad_norm": 0.3041638135910034, "learning_rate": 1.1564552271287217e-06, "loss": 0.3536, "step": 3365 }, { "epoch": 2.4031413612565444, "grad_norm": 0.33302342891693115, "learning_rate": 1.1537986730187567e-06, "loss": 0.4107, "step": 3366 }, { "epoch": 2.403855306996668, "grad_norm": 0.3171966075897217, "learning_rate": 1.1511447756431604e-06, "loss": 0.3559, "step": 3367 }, { "epoch": 2.404569252736792, "grad_norm": 0.33768272399902344, "learning_rate": 1.1484935368350946e-06, "loss": 0.3856, "step": 3368 }, { "epoch": 2.4052831984769156, "grad_norm": 0.29210901260375977, "learning_rate": 1.145844958425879e-06, "loss": 0.3582, "step": 3369 }, { "epoch": 2.4059971442170394, "grad_norm": 0.29756268858909607, "learning_rate": 1.1431990422450018e-06, "loss": 0.3602, "step": 3370 }, { "epoch": 2.406711089957163, "grad_norm": 0.30788642168045044, "learning_rate": 1.1405557901201087e-06, "loss": 0.3884, "step": 3371 }, { "epoch": 2.407425035697287, "grad_norm": 0.28679800033569336, "learning_rate": 1.137915203877003e-06, "loss": 0.333, "step": 3372 }, { "epoch": 2.4081389814374106, "grad_norm": 0.3128231167793274, "learning_rate": 1.1352772853396533e-06, "loss": 0.3935, "step": 3373 }, { "epoch": 2.4088529271775343, "grad_norm": 0.3085997402667999, "learning_rate": 1.132642036330181e-06, "loss": 0.3791, "step": 3374 }, { "epoch": 2.409566872917658, "grad_norm": 0.3110702931880951, "learning_rate": 1.1300094586688632e-06, "loss": 0.3234, "step": 3375 }, { "epoch": 2.4102808186577818, "grad_norm": 0.34995603561401367, "learning_rate": 1.1273795541741334e-06, "loss": 0.3828, "step": 3376 }, { "epoch": 2.410994764397906, "grad_norm": 0.31205251812934875, "learning_rate": 1.1247523246625802e-06, "loss": 0.3804, "step": 3377 }, { "epoch": 2.4117087101380297, "grad_norm": 0.306048721075058, "learning_rate": 1.1221277719489387e-06, "loss": 0.35, "step": 3378 }, { "epoch": 2.4124226558781534, "grad_norm": 0.31352463364601135, "learning_rate": 1.1195058978461016e-06, "loss": 0.3941, "step": 3379 }, { "epoch": 2.413136601618277, "grad_norm": 0.2967744469642639, "learning_rate": 1.1168867041651082e-06, "loss": 0.3415, "step": 3380 }, { "epoch": 2.413850547358401, "grad_norm": 0.3305203318595886, "learning_rate": 1.1142701927151456e-06, "loss": 0.4058, "step": 3381 }, { "epoch": 2.4145644930985246, "grad_norm": 0.3068082928657532, "learning_rate": 1.1116563653035507e-06, "loss": 0.3133, "step": 3382 }, { "epoch": 2.4152784388386483, "grad_norm": 0.31327566504478455, "learning_rate": 1.1090452237358028e-06, "loss": 0.3952, "step": 3383 }, { "epoch": 2.415992384578772, "grad_norm": 0.3131173253059387, "learning_rate": 1.1064367698155303e-06, "loss": 0.3901, "step": 3384 }, { "epoch": 2.416706330318896, "grad_norm": 0.30159395933151245, "learning_rate": 1.103831005344501e-06, "loss": 0.3652, "step": 3385 }, { "epoch": 2.4174202760590195, "grad_norm": 0.33973464369773865, "learning_rate": 1.1012279321226294e-06, "loss": 0.4176, "step": 3386 }, { "epoch": 2.4181342217991433, "grad_norm": 0.3095953166484833, "learning_rate": 1.0986275519479657e-06, "loss": 0.3751, "step": 3387 }, { "epoch": 2.418848167539267, "grad_norm": 0.2902759909629822, "learning_rate": 1.096029866616704e-06, "loss": 0.3178, "step": 3388 }, { "epoch": 2.4195621132793907, "grad_norm": 0.29413366317749023, "learning_rate": 1.0934348779231774e-06, "loss": 0.3372, "step": 3389 }, { "epoch": 2.4202760590195145, "grad_norm": 0.32598602771759033, "learning_rate": 1.0908425876598512e-06, "loss": 0.3879, "step": 3390 }, { "epoch": 2.420990004759638, "grad_norm": 0.3080146014690399, "learning_rate": 1.0882529976173327e-06, "loss": 0.3493, "step": 3391 }, { "epoch": 2.421703950499762, "grad_norm": 0.31416693329811096, "learning_rate": 1.0856661095843617e-06, "loss": 0.3927, "step": 3392 }, { "epoch": 2.4224178962398857, "grad_norm": 0.32720959186553955, "learning_rate": 1.0830819253478104e-06, "loss": 0.4019, "step": 3393 }, { "epoch": 2.4231318419800094, "grad_norm": 0.3119010925292969, "learning_rate": 1.0805004466926855e-06, "loss": 0.3378, "step": 3394 }, { "epoch": 2.423845787720133, "grad_norm": 0.30900704860687256, "learning_rate": 1.0779216754021215e-06, "loss": 0.4119, "step": 3395 }, { "epoch": 2.424559733460257, "grad_norm": 0.3077006936073303, "learning_rate": 1.0753456132573886e-06, "loss": 0.3956, "step": 3396 }, { "epoch": 2.4252736792003806, "grad_norm": 0.33242538571357727, "learning_rate": 1.0727722620378788e-06, "loss": 0.3725, "step": 3397 }, { "epoch": 2.4259876249405044, "grad_norm": 0.29311856627464294, "learning_rate": 1.070201623521116e-06, "loss": 0.3526, "step": 3398 }, { "epoch": 2.426701570680628, "grad_norm": 0.3037789762020111, "learning_rate": 1.0676336994827513e-06, "loss": 0.3667, "step": 3399 }, { "epoch": 2.4274155164207523, "grad_norm": 0.321086049079895, "learning_rate": 1.065068491696556e-06, "loss": 0.3816, "step": 3400 }, { "epoch": 2.428129462160876, "grad_norm": 0.3206118941307068, "learning_rate": 1.062506001934428e-06, "loss": 0.3899, "step": 3401 }, { "epoch": 2.4288434079009997, "grad_norm": 0.2675626277923584, "learning_rate": 1.0599462319663906e-06, "loss": 0.3523, "step": 3402 }, { "epoch": 2.4295573536411235, "grad_norm": 0.2748230993747711, "learning_rate": 1.0573891835605821e-06, "loss": 0.3673, "step": 3403 }, { "epoch": 2.430271299381247, "grad_norm": 0.3058761656284332, "learning_rate": 1.0548348584832656e-06, "loss": 0.3589, "step": 3404 }, { "epoch": 2.430985245121371, "grad_norm": 0.3071015477180481, "learning_rate": 1.0522832584988235e-06, "loss": 0.3968, "step": 3405 }, { "epoch": 2.4316991908614947, "grad_norm": 0.3143406808376312, "learning_rate": 1.0497343853697506e-06, "loss": 0.3633, "step": 3406 }, { "epoch": 2.4324131366016184, "grad_norm": 0.3190564811229706, "learning_rate": 1.0471882408566648e-06, "loss": 0.3791, "step": 3407 }, { "epoch": 2.433127082341742, "grad_norm": 0.33089426159858704, "learning_rate": 1.044644826718295e-06, "loss": 0.4113, "step": 3408 }, { "epoch": 2.433841028081866, "grad_norm": 0.30120334029197693, "learning_rate": 1.042104144711484e-06, "loss": 0.3485, "step": 3409 }, { "epoch": 2.4345549738219896, "grad_norm": 0.30974772572517395, "learning_rate": 1.0395661965911891e-06, "loss": 0.3561, "step": 3410 }, { "epoch": 2.4352689195621133, "grad_norm": 0.31071510910987854, "learning_rate": 1.0370309841104803e-06, "loss": 0.3341, "step": 3411 }, { "epoch": 2.435982865302237, "grad_norm": 0.31992340087890625, "learning_rate": 1.0344985090205344e-06, "loss": 0.3296, "step": 3412 }, { "epoch": 2.436696811042361, "grad_norm": 0.3343741297721863, "learning_rate": 1.0319687730706402e-06, "loss": 0.3776, "step": 3413 }, { "epoch": 2.4374107567824845, "grad_norm": 0.3292313814163208, "learning_rate": 1.029441778008195e-06, "loss": 0.4162, "step": 3414 }, { "epoch": 2.4381247025226083, "grad_norm": 0.2816011905670166, "learning_rate": 1.0269175255786984e-06, "loss": 0.3661, "step": 3415 }, { "epoch": 2.438838648262732, "grad_norm": 0.3160713315010071, "learning_rate": 1.0243960175257605e-06, "loss": 0.3536, "step": 3416 }, { "epoch": 2.4395525940028557, "grad_norm": 0.318091481924057, "learning_rate": 1.0218772555910955e-06, "loss": 0.3725, "step": 3417 }, { "epoch": 2.4402665397429795, "grad_norm": 0.3013283312320709, "learning_rate": 1.0193612415145154e-06, "loss": 0.3571, "step": 3418 }, { "epoch": 2.440980485483103, "grad_norm": 0.31924453377723694, "learning_rate": 1.0168479770339406e-06, "loss": 0.4006, "step": 3419 }, { "epoch": 2.441694431223227, "grad_norm": 0.2906936705112457, "learning_rate": 1.0143374638853892e-06, "loss": 0.3593, "step": 3420 }, { "epoch": 2.4424083769633507, "grad_norm": 0.2821863293647766, "learning_rate": 1.0118297038029767e-06, "loss": 0.3642, "step": 3421 }, { "epoch": 2.4431223227034744, "grad_norm": 0.3557227849960327, "learning_rate": 1.0093246985189208e-06, "loss": 0.4166, "step": 3422 }, { "epoch": 2.443836268443598, "grad_norm": 0.326776921749115, "learning_rate": 1.006822449763537e-06, "loss": 0.3807, "step": 3423 }, { "epoch": 2.444550214183722, "grad_norm": 0.31994393467903137, "learning_rate": 1.004322959265231e-06, "loss": 0.3692, "step": 3424 }, { "epoch": 2.4452641599238456, "grad_norm": 0.30462610721588135, "learning_rate": 1.0018262287505088e-06, "loss": 0.3505, "step": 3425 }, { "epoch": 2.4459781056639693, "grad_norm": 0.30507490038871765, "learning_rate": 9.993322599439692e-07, "loss": 0.3838, "step": 3426 }, { "epoch": 2.446692051404093, "grad_norm": 0.2906094491481781, "learning_rate": 9.968410545683e-07, "loss": 0.3643, "step": 3427 }, { "epoch": 2.447405997144217, "grad_norm": 0.33424970507621765, "learning_rate": 9.943526143442827e-07, "loss": 0.4192, "step": 3428 }, { "epoch": 2.448119942884341, "grad_norm": 0.3227403461933136, "learning_rate": 9.918669409907904e-07, "loss": 0.3807, "step": 3429 }, { "epoch": 2.4488338886244647, "grad_norm": 0.28227698802948, "learning_rate": 9.893840362247809e-07, "loss": 0.3467, "step": 3430 }, { "epoch": 2.4495478343645885, "grad_norm": 0.30288466811180115, "learning_rate": 9.869039017613042e-07, "loss": 0.3515, "step": 3431 }, { "epoch": 2.450261780104712, "grad_norm": 0.3284318149089813, "learning_rate": 9.844265393134927e-07, "loss": 0.3883, "step": 3432 }, { "epoch": 2.450975725844836, "grad_norm": 0.343831866979599, "learning_rate": 9.819519505925646e-07, "loss": 0.3681, "step": 3433 }, { "epoch": 2.4516896715849597, "grad_norm": 0.3015079200267792, "learning_rate": 9.794801373078238e-07, "loss": 0.3105, "step": 3434 }, { "epoch": 2.4524036173250834, "grad_norm": 0.3243652880191803, "learning_rate": 9.770111011666582e-07, "loss": 0.3599, "step": 3435 }, { "epoch": 2.453117563065207, "grad_norm": 0.31435662508010864, "learning_rate": 9.745448438745358e-07, "loss": 0.3937, "step": 3436 }, { "epoch": 2.453831508805331, "grad_norm": 0.30853769183158875, "learning_rate": 9.720813671350033e-07, "loss": 0.3715, "step": 3437 }, { "epoch": 2.4545454545454546, "grad_norm": 0.29245731234550476, "learning_rate": 9.696206726496893e-07, "loss": 0.3667, "step": 3438 }, { "epoch": 2.4552594002855783, "grad_norm": 0.2932073473930359, "learning_rate": 9.671627621183011e-07, "loss": 0.3499, "step": 3439 }, { "epoch": 2.455973346025702, "grad_norm": 0.3090064227581024, "learning_rate": 9.647076372386195e-07, "loss": 0.3765, "step": 3440 }, { "epoch": 2.456687291765826, "grad_norm": 0.28787750005722046, "learning_rate": 9.622552997065043e-07, "loss": 0.3493, "step": 3441 }, { "epoch": 2.4574012375059495, "grad_norm": 0.30215704441070557, "learning_rate": 9.59805751215891e-07, "loss": 0.3567, "step": 3442 }, { "epoch": 2.4581151832460733, "grad_norm": 0.3329910337924957, "learning_rate": 9.573589934587845e-07, "loss": 0.3525, "step": 3443 }, { "epoch": 2.458829128986197, "grad_norm": 0.3027404844760895, "learning_rate": 9.549150281252633e-07, "loss": 0.3363, "step": 3444 }, { "epoch": 2.4595430747263207, "grad_norm": 0.3099161982536316, "learning_rate": 9.524738569034803e-07, "loss": 0.387, "step": 3445 }, { "epoch": 2.4602570204664445, "grad_norm": 0.2897385060787201, "learning_rate": 9.500354814796537e-07, "loss": 0.3842, "step": 3446 }, { "epoch": 2.460970966206568, "grad_norm": 0.314109206199646, "learning_rate": 9.47599903538074e-07, "loss": 0.3821, "step": 3447 }, { "epoch": 2.461684911946692, "grad_norm": 0.3401591181755066, "learning_rate": 9.451671247610988e-07, "loss": 0.3851, "step": 3448 }, { "epoch": 2.4623988576868157, "grad_norm": 0.3195832371711731, "learning_rate": 9.427371468291502e-07, "loss": 0.3662, "step": 3449 }, { "epoch": 2.4631128034269394, "grad_norm": 0.32316702604293823, "learning_rate": 9.403099714207175e-07, "loss": 0.388, "step": 3450 }, { "epoch": 2.463826749167063, "grad_norm": 0.2973496615886688, "learning_rate": 9.378856002123549e-07, "loss": 0.3277, "step": 3451 }, { "epoch": 2.4645406949071873, "grad_norm": 0.3444820046424866, "learning_rate": 9.354640348786765e-07, "loss": 0.3928, "step": 3452 }, { "epoch": 2.465254640647311, "grad_norm": 0.3048448860645294, "learning_rate": 9.330452770923604e-07, "loss": 0.3511, "step": 3453 }, { "epoch": 2.4659685863874348, "grad_norm": 0.32515281438827515, "learning_rate": 9.306293285241475e-07, "loss": 0.3379, "step": 3454 }, { "epoch": 2.4666825321275585, "grad_norm": 0.30910608172416687, "learning_rate": 9.282161908428344e-07, "loss": 0.3621, "step": 3455 }, { "epoch": 2.4673964778676822, "grad_norm": 0.3356781303882599, "learning_rate": 9.258058657152763e-07, "loss": 0.3751, "step": 3456 }, { "epoch": 2.468110423607806, "grad_norm": 0.3142489492893219, "learning_rate": 9.233983548063891e-07, "loss": 0.3825, "step": 3457 }, { "epoch": 2.4688243693479297, "grad_norm": 0.31302130222320557, "learning_rate": 9.209936597791407e-07, "loss": 0.3896, "step": 3458 }, { "epoch": 2.4695383150880534, "grad_norm": 0.2984788715839386, "learning_rate": 9.185917822945567e-07, "loss": 0.3272, "step": 3459 }, { "epoch": 2.470252260828177, "grad_norm": 0.3399263918399811, "learning_rate": 9.161927240117174e-07, "loss": 0.3686, "step": 3460 }, { "epoch": 2.470966206568301, "grad_norm": 0.3151572346687317, "learning_rate": 9.137964865877519e-07, "loss": 0.3568, "step": 3461 }, { "epoch": 2.4716801523084246, "grad_norm": 0.30553174018859863, "learning_rate": 9.114030716778433e-07, "loss": 0.3568, "step": 3462 }, { "epoch": 2.4723940980485484, "grad_norm": 0.31021836400032043, "learning_rate": 9.090124809352268e-07, "loss": 0.3898, "step": 3463 }, { "epoch": 2.473108043788672, "grad_norm": 0.3267972469329834, "learning_rate": 9.066247160111818e-07, "loss": 0.3848, "step": 3464 }, { "epoch": 2.473821989528796, "grad_norm": 0.33588090538978577, "learning_rate": 9.042397785550405e-07, "loss": 0.3628, "step": 3465 }, { "epoch": 2.4745359352689196, "grad_norm": 0.3279980719089508, "learning_rate": 9.018576702141818e-07, "loss": 0.3538, "step": 3466 }, { "epoch": 2.4752498810090433, "grad_norm": 0.3124123811721802, "learning_rate": 8.994783926340255e-07, "loss": 0.3392, "step": 3467 }, { "epoch": 2.475963826749167, "grad_norm": 0.32125115394592285, "learning_rate": 8.971019474580428e-07, "loss": 0.3831, "step": 3468 }, { "epoch": 2.4766777724892908, "grad_norm": 0.3388424813747406, "learning_rate": 8.947283363277437e-07, "loss": 0.3604, "step": 3469 }, { "epoch": 2.4773917182294145, "grad_norm": 0.30818405747413635, "learning_rate": 8.923575608826812e-07, "loss": 0.3748, "step": 3470 }, { "epoch": 2.4781056639695382, "grad_norm": 0.28909996151924133, "learning_rate": 8.899896227604509e-07, "loss": 0.3605, "step": 3471 }, { "epoch": 2.478819609709662, "grad_norm": 0.33838319778442383, "learning_rate": 8.876245235966884e-07, "loss": 0.419, "step": 3472 }, { "epoch": 2.4795335554497857, "grad_norm": 0.30171722173690796, "learning_rate": 8.852622650250698e-07, "loss": 0.3518, "step": 3473 }, { "epoch": 2.4802475011899094, "grad_norm": 0.32410743832588196, "learning_rate": 8.82902848677304e-07, "loss": 0.3624, "step": 3474 }, { "epoch": 2.480961446930033, "grad_norm": 0.30025041103363037, "learning_rate": 8.805462761831418e-07, "loss": 0.3452, "step": 3475 }, { "epoch": 2.481675392670157, "grad_norm": 0.34161391854286194, "learning_rate": 8.78192549170368e-07, "loss": 0.4164, "step": 3476 }, { "epoch": 2.4823893384102806, "grad_norm": 0.30657052993774414, "learning_rate": 8.758416692648008e-07, "loss": 0.3458, "step": 3477 }, { "epoch": 2.4831032841504044, "grad_norm": 0.3140174150466919, "learning_rate": 8.734936380902936e-07, "loss": 0.4125, "step": 3478 }, { "epoch": 2.483817229890528, "grad_norm": 0.28678399324417114, "learning_rate": 8.711484572687295e-07, "loss": 0.3756, "step": 3479 }, { "epoch": 2.484531175630652, "grad_norm": 0.29673588275909424, "learning_rate": 8.688061284200266e-07, "loss": 0.3893, "step": 3480 }, { "epoch": 2.4852451213707756, "grad_norm": 0.32084956765174866, "learning_rate": 8.66466653162128e-07, "loss": 0.3942, "step": 3481 }, { "epoch": 2.4859590671108998, "grad_norm": 0.311459481716156, "learning_rate": 8.641300331110114e-07, "loss": 0.3897, "step": 3482 }, { "epoch": 2.4866730128510235, "grad_norm": 0.2973622679710388, "learning_rate": 8.617962698806764e-07, "loss": 0.3513, "step": 3483 }, { "epoch": 2.4873869585911472, "grad_norm": 0.3269909918308258, "learning_rate": 8.594653650831541e-07, "loss": 0.379, "step": 3484 }, { "epoch": 2.488100904331271, "grad_norm": 0.3451862931251526, "learning_rate": 8.571373203285005e-07, "loss": 0.3737, "step": 3485 }, { "epoch": 2.4888148500713947, "grad_norm": 0.30511486530303955, "learning_rate": 8.54812137224792e-07, "loss": 0.3228, "step": 3486 }, { "epoch": 2.4895287958115184, "grad_norm": 0.34069424867630005, "learning_rate": 8.52489817378132e-07, "loss": 0.3912, "step": 3487 }, { "epoch": 2.490242741551642, "grad_norm": 0.31860655546188354, "learning_rate": 8.501703623926472e-07, "loss": 0.3598, "step": 3488 }, { "epoch": 2.490956687291766, "grad_norm": 0.3021690547466278, "learning_rate": 8.478537738704811e-07, "loss": 0.3289, "step": 3489 }, { "epoch": 2.4916706330318896, "grad_norm": 0.3339398205280304, "learning_rate": 8.455400534118008e-07, "loss": 0.3557, "step": 3490 }, { "epoch": 2.4923845787720134, "grad_norm": 0.3131773769855499, "learning_rate": 8.432292026147921e-07, "loss": 0.3945, "step": 3491 }, { "epoch": 2.493098524512137, "grad_norm": 0.3184497356414795, "learning_rate": 8.409212230756564e-07, "loss": 0.3714, "step": 3492 }, { "epoch": 2.493812470252261, "grad_norm": 0.2926839590072632, "learning_rate": 8.38616116388612e-07, "loss": 0.3954, "step": 3493 }, { "epoch": 2.4945264159923846, "grad_norm": 0.2987339496612549, "learning_rate": 8.363138841458962e-07, "loss": 0.3211, "step": 3494 }, { "epoch": 2.4952403617325083, "grad_norm": 0.3151412010192871, "learning_rate": 8.340145279377559e-07, "loss": 0.3939, "step": 3495 }, { "epoch": 2.495954307472632, "grad_norm": 0.32360076904296875, "learning_rate": 8.317180493524557e-07, "loss": 0.3623, "step": 3496 }, { "epoch": 2.4966682532127558, "grad_norm": 0.3221840262413025, "learning_rate": 8.294244499762716e-07, "loss": 0.3354, "step": 3497 }, { "epoch": 2.4973821989528795, "grad_norm": 0.3085176646709442, "learning_rate": 8.271337313934869e-07, "loss": 0.3607, "step": 3498 }, { "epoch": 2.4980961446930032, "grad_norm": 0.3141801953315735, "learning_rate": 8.248458951864008e-07, "loss": 0.3611, "step": 3499 }, { "epoch": 2.498810090433127, "grad_norm": 0.3071938455104828, "learning_rate": 8.225609429353187e-07, "loss": 0.3855, "step": 3500 }, { "epoch": 2.4995240361732507, "grad_norm": 0.3037102222442627, "learning_rate": 8.202788762185515e-07, "loss": 0.362, "step": 3501 }, { "epoch": 2.5002379819133744, "grad_norm": 0.29588326811790466, "learning_rate": 8.179996966124221e-07, "loss": 0.3829, "step": 3502 }, { "epoch": 2.5009519276534986, "grad_norm": 0.30879026651382446, "learning_rate": 8.15723405691256e-07, "loss": 0.3659, "step": 3503 }, { "epoch": 2.5016658733936223, "grad_norm": 0.300559401512146, "learning_rate": 8.134500050273841e-07, "loss": 0.3496, "step": 3504 }, { "epoch": 2.502379819133746, "grad_norm": 0.3210066258907318, "learning_rate": 8.111794961911384e-07, "loss": 0.362, "step": 3505 }, { "epoch": 2.50309376487387, "grad_norm": 0.3171641528606415, "learning_rate": 8.089118807508589e-07, "loss": 0.4023, "step": 3506 }, { "epoch": 2.5038077106139935, "grad_norm": 0.31272703409194946, "learning_rate": 8.066471602728804e-07, "loss": 0.3956, "step": 3507 }, { "epoch": 2.5045216563541173, "grad_norm": 0.3074282705783844, "learning_rate": 8.043853363215437e-07, "loss": 0.3819, "step": 3508 }, { "epoch": 2.505235602094241, "grad_norm": 0.2964726388454437, "learning_rate": 8.021264104591864e-07, "loss": 0.3379, "step": 3509 }, { "epoch": 2.5059495478343647, "grad_norm": 0.31418004631996155, "learning_rate": 7.99870384246143e-07, "loss": 0.4144, "step": 3510 }, { "epoch": 2.5066634935744885, "grad_norm": 0.2892567217350006, "learning_rate": 7.976172592407478e-07, "loss": 0.3503, "step": 3511 }, { "epoch": 2.507377439314612, "grad_norm": 0.2915591895580292, "learning_rate": 7.953670369993288e-07, "loss": 0.3885, "step": 3512 }, { "epoch": 2.508091385054736, "grad_norm": 0.3271644711494446, "learning_rate": 7.931197190762119e-07, "loss": 0.3345, "step": 3513 }, { "epoch": 2.5088053307948597, "grad_norm": 0.33039069175720215, "learning_rate": 7.908753070237124e-07, "loss": 0.4025, "step": 3514 }, { "epoch": 2.5095192765349834, "grad_norm": 0.3027612864971161, "learning_rate": 7.886338023921431e-07, "loss": 0.3258, "step": 3515 }, { "epoch": 2.510233222275107, "grad_norm": 0.28781068325042725, "learning_rate": 7.863952067298042e-07, "loss": 0.3518, "step": 3516 }, { "epoch": 2.510947168015231, "grad_norm": 0.29141703248023987, "learning_rate": 7.841595215829906e-07, "loss": 0.3911, "step": 3517 }, { "epoch": 2.5116611137553546, "grad_norm": 0.3199761211872101, "learning_rate": 7.81926748495983e-07, "loss": 0.4119, "step": 3518 }, { "epoch": 2.5123750594954783, "grad_norm": 0.304118812084198, "learning_rate": 7.796968890110546e-07, "loss": 0.3815, "step": 3519 }, { "epoch": 2.513089005235602, "grad_norm": 0.31006479263305664, "learning_rate": 7.774699446684608e-07, "loss": 0.3701, "step": 3520 }, { "epoch": 2.513802950975726, "grad_norm": 0.30093875527381897, "learning_rate": 7.752459170064491e-07, "loss": 0.3771, "step": 3521 }, { "epoch": 2.5145168967158495, "grad_norm": 0.302006334066391, "learning_rate": 7.730248075612501e-07, "loss": 0.3626, "step": 3522 }, { "epoch": 2.5152308424559733, "grad_norm": 0.30582284927368164, "learning_rate": 7.708066178670759e-07, "loss": 0.4145, "step": 3523 }, { "epoch": 2.515944788196097, "grad_norm": 0.2994190752506256, "learning_rate": 7.685913494561253e-07, "loss": 0.3467, "step": 3524 }, { "epoch": 2.5166587339362207, "grad_norm": 0.31135112047195435, "learning_rate": 7.663790038585794e-07, "loss": 0.3573, "step": 3525 }, { "epoch": 2.5173726796763445, "grad_norm": 0.32380354404449463, "learning_rate": 7.641695826025975e-07, "loss": 0.3628, "step": 3526 }, { "epoch": 2.518086625416468, "grad_norm": 0.2904009222984314, "learning_rate": 7.619630872143214e-07, "loss": 0.3729, "step": 3527 }, { "epoch": 2.518800571156592, "grad_norm": 0.33915069699287415, "learning_rate": 7.597595192178702e-07, "loss": 0.4275, "step": 3528 }, { "epoch": 2.5195145168967157, "grad_norm": 0.2896716594696045, "learning_rate": 7.575588801353433e-07, "loss": 0.3252, "step": 3529 }, { "epoch": 2.5202284626368394, "grad_norm": 0.3203714191913605, "learning_rate": 7.553611714868136e-07, "loss": 0.377, "step": 3530 }, { "epoch": 2.520942408376963, "grad_norm": 0.32726237177848816, "learning_rate": 7.531663947903334e-07, "loss": 0.3582, "step": 3531 }, { "epoch": 2.521656354117087, "grad_norm": 0.3003333806991577, "learning_rate": 7.50974551561926e-07, "loss": 0.3614, "step": 3532 }, { "epoch": 2.5223702998572106, "grad_norm": 0.3061165511608124, "learning_rate": 7.487856433155916e-07, "loss": 0.3786, "step": 3533 }, { "epoch": 2.5230842455973344, "grad_norm": 0.27165549993515015, "learning_rate": 7.465996715633028e-07, "loss": 0.32, "step": 3534 }, { "epoch": 2.523798191337458, "grad_norm": 0.31679636240005493, "learning_rate": 7.444166378150014e-07, "loss": 0.4112, "step": 3535 }, { "epoch": 2.5245121370775823, "grad_norm": 0.30517980456352234, "learning_rate": 7.422365435786022e-07, "loss": 0.3921, "step": 3536 }, { "epoch": 2.525226082817706, "grad_norm": 0.29721736907958984, "learning_rate": 7.4005939035999e-07, "loss": 0.3487, "step": 3537 }, { "epoch": 2.5259400285578297, "grad_norm": 0.32969555258750916, "learning_rate": 7.378851796630143e-07, "loss": 0.4262, "step": 3538 }, { "epoch": 2.5266539742979535, "grad_norm": 0.27905789017677307, "learning_rate": 7.357139129894958e-07, "loss": 0.3419, "step": 3539 }, { "epoch": 2.527367920038077, "grad_norm": 0.32592764496803284, "learning_rate": 7.33545591839222e-07, "loss": 0.3985, "step": 3540 }, { "epoch": 2.528081865778201, "grad_norm": 0.29951608180999756, "learning_rate": 7.313802177099427e-07, "loss": 0.3588, "step": 3541 }, { "epoch": 2.5287958115183247, "grad_norm": 0.2855447232723236, "learning_rate": 7.292177920973726e-07, "loss": 0.3568, "step": 3542 }, { "epoch": 2.5295097572584484, "grad_norm": 0.29610127210617065, "learning_rate": 7.270583164951928e-07, "loss": 0.3251, "step": 3543 }, { "epoch": 2.530223702998572, "grad_norm": 0.316863089799881, "learning_rate": 7.24901792395043e-07, "loss": 0.4198, "step": 3544 }, { "epoch": 2.530937648738696, "grad_norm": 0.2946541905403137, "learning_rate": 7.227482212865267e-07, "loss": 0.3764, "step": 3545 }, { "epoch": 2.5316515944788196, "grad_norm": 0.32039329409599304, "learning_rate": 7.205976046572083e-07, "loss": 0.4108, "step": 3546 }, { "epoch": 2.5323655402189433, "grad_norm": 0.32215002179145813, "learning_rate": 7.184499439926068e-07, "loss": 0.3458, "step": 3547 }, { "epoch": 2.533079485959067, "grad_norm": 0.34685611724853516, "learning_rate": 7.163052407762045e-07, "loss": 0.3457, "step": 3548 }, { "epoch": 2.533793431699191, "grad_norm": 0.30680808424949646, "learning_rate": 7.141634964894389e-07, "loss": 0.3821, "step": 3549 }, { "epoch": 2.5345073774393145, "grad_norm": 0.3051254451274872, "learning_rate": 7.120247126117025e-07, "loss": 0.3939, "step": 3550 }, { "epoch": 2.5352213231794383, "grad_norm": 0.3300296366214752, "learning_rate": 7.098888906203449e-07, "loss": 0.4082, "step": 3551 }, { "epoch": 2.535935268919562, "grad_norm": 0.3147153854370117, "learning_rate": 7.077560319906696e-07, "loss": 0.3815, "step": 3552 }, { "epoch": 2.5366492146596857, "grad_norm": 0.29201430082321167, "learning_rate": 7.056261381959318e-07, "loss": 0.3438, "step": 3553 }, { "epoch": 2.5373631603998095, "grad_norm": 0.32107019424438477, "learning_rate": 7.03499210707338e-07, "loss": 0.3627, "step": 3554 }, { "epoch": 2.538077106139933, "grad_norm": 0.3023436367511749, "learning_rate": 7.013752509940486e-07, "loss": 0.3242, "step": 3555 }, { "epoch": 2.5387910518800574, "grad_norm": 0.32993465662002563, "learning_rate": 6.992542605231739e-07, "loss": 0.3708, "step": 3556 }, { "epoch": 2.539504997620181, "grad_norm": 0.3207978904247284, "learning_rate": 6.971362407597693e-07, "loss": 0.3868, "step": 3557 }, { "epoch": 2.540218943360305, "grad_norm": 0.29421359300613403, "learning_rate": 6.950211931668421e-07, "loss": 0.3543, "step": 3558 }, { "epoch": 2.5409328891004286, "grad_norm": 0.2812667191028595, "learning_rate": 6.929091192053472e-07, "loss": 0.3156, "step": 3559 }, { "epoch": 2.5416468348405523, "grad_norm": 0.3159504234790802, "learning_rate": 6.908000203341802e-07, "loss": 0.4062, "step": 3560 }, { "epoch": 2.542360780580676, "grad_norm": 0.298666775226593, "learning_rate": 6.88693898010187e-07, "loss": 0.3497, "step": 3561 }, { "epoch": 2.5430747263208, "grad_norm": 0.3116026222705841, "learning_rate": 6.865907536881566e-07, "loss": 0.3615, "step": 3562 }, { "epoch": 2.5437886720609235, "grad_norm": 0.312075674533844, "learning_rate": 6.84490588820818e-07, "loss": 0.3656, "step": 3563 }, { "epoch": 2.5445026178010473, "grad_norm": 0.2998380661010742, "learning_rate": 6.82393404858846e-07, "loss": 0.3722, "step": 3564 }, { "epoch": 2.545216563541171, "grad_norm": 0.30636659264564514, "learning_rate": 6.802992032508537e-07, "loss": 0.3593, "step": 3565 }, { "epoch": 2.5459305092812947, "grad_norm": 0.30200645327568054, "learning_rate": 6.782079854433931e-07, "loss": 0.3927, "step": 3566 }, { "epoch": 2.5466444550214185, "grad_norm": 0.3280206024646759, "learning_rate": 6.761197528809593e-07, "loss": 0.3779, "step": 3567 }, { "epoch": 2.547358400761542, "grad_norm": 0.29718366265296936, "learning_rate": 6.740345070059829e-07, "loss": 0.3596, "step": 3568 }, { "epoch": 2.548072346501666, "grad_norm": 0.32445019483566284, "learning_rate": 6.719522492588304e-07, "loss": 0.3567, "step": 3569 }, { "epoch": 2.5487862922417897, "grad_norm": 0.3212509751319885, "learning_rate": 6.698729810778065e-07, "loss": 0.4019, "step": 3570 }, { "epoch": 2.5495002379819134, "grad_norm": 0.29165974259376526, "learning_rate": 6.677967038991512e-07, "loss": 0.3565, "step": 3571 }, { "epoch": 2.550214183722037, "grad_norm": 0.2943155765533447, "learning_rate": 6.657234191570344e-07, "loss": 0.3646, "step": 3572 }, { "epoch": 2.550928129462161, "grad_norm": 0.32746297121047974, "learning_rate": 6.636531282835629e-07, "loss": 0.4199, "step": 3573 }, { "epoch": 2.5516420752022846, "grad_norm": 0.29913613200187683, "learning_rate": 6.61585832708776e-07, "loss": 0.3544, "step": 3574 }, { "epoch": 2.5523560209424083, "grad_norm": 0.28172141313552856, "learning_rate": 6.595215338606398e-07, "loss": 0.3271, "step": 3575 }, { "epoch": 2.553069966682532, "grad_norm": 0.3085053265094757, "learning_rate": 6.574602331650559e-07, "loss": 0.3791, "step": 3576 }, { "epoch": 2.553783912422656, "grad_norm": 0.2954999804496765, "learning_rate": 6.554019320458494e-07, "loss": 0.4051, "step": 3577 }, { "epoch": 2.5544978581627795, "grad_norm": 0.2846652865409851, "learning_rate": 6.533466319247783e-07, "loss": 0.3558, "step": 3578 }, { "epoch": 2.5552118039029033, "grad_norm": 0.30584219098091125, "learning_rate": 6.512943342215234e-07, "loss": 0.3725, "step": 3579 }, { "epoch": 2.555925749643027, "grad_norm": 0.3108086884021759, "learning_rate": 6.492450403536959e-07, "loss": 0.4009, "step": 3580 }, { "epoch": 2.5566396953831507, "grad_norm": 0.3267786204814911, "learning_rate": 6.471987517368283e-07, "loss": 0.3496, "step": 3581 }, { "epoch": 2.5573536411232745, "grad_norm": 0.31788596510887146, "learning_rate": 6.451554697843798e-07, "loss": 0.3595, "step": 3582 }, { "epoch": 2.558067586863398, "grad_norm": 0.3139001727104187, "learning_rate": 6.431151959077325e-07, "loss": 0.3545, "step": 3583 }, { "epoch": 2.558781532603522, "grad_norm": 0.29583656787872314, "learning_rate": 6.410779315161885e-07, "loss": 0.3784, "step": 3584 }, { "epoch": 2.5594954783436457, "grad_norm": 0.3204035758972168, "learning_rate": 6.390436780169735e-07, "loss": 0.3854, "step": 3585 }, { "epoch": 2.5602094240837694, "grad_norm": 0.2888199985027313, "learning_rate": 6.370124368152336e-07, "loss": 0.3348, "step": 3586 }, { "epoch": 2.560923369823893, "grad_norm": 0.300418496131897, "learning_rate": 6.349842093140318e-07, "loss": 0.3818, "step": 3587 }, { "epoch": 2.561637315564017, "grad_norm": 0.30239689350128174, "learning_rate": 6.329589969143518e-07, "loss": 0.3665, "step": 3588 }, { "epoch": 2.562351261304141, "grad_norm": 0.30291351675987244, "learning_rate": 6.309368010150929e-07, "loss": 0.387, "step": 3589 }, { "epoch": 2.5630652070442648, "grad_norm": 0.3048894703388214, "learning_rate": 6.289176230130728e-07, "loss": 0.3645, "step": 3590 }, { "epoch": 2.5637791527843885, "grad_norm": 0.3024040758609772, "learning_rate": 6.269014643030214e-07, "loss": 0.3547, "step": 3591 }, { "epoch": 2.5644930985245122, "grad_norm": 0.3426075577735901, "learning_rate": 6.248883262775862e-07, "loss": 0.3978, "step": 3592 }, { "epoch": 2.565207044264636, "grad_norm": 0.296692818403244, "learning_rate": 6.228782103273284e-07, "loss": 0.3678, "step": 3593 }, { "epoch": 2.5659209900047597, "grad_norm": 0.299581378698349, "learning_rate": 6.208711178407173e-07, "loss": 0.3494, "step": 3594 }, { "epoch": 2.5666349357448834, "grad_norm": 0.32905223965644836, "learning_rate": 6.188670502041389e-07, "loss": 0.4262, "step": 3595 }, { "epoch": 2.567348881485007, "grad_norm": 0.28314927220344543, "learning_rate": 6.16866008801888e-07, "loss": 0.359, "step": 3596 }, { "epoch": 2.568062827225131, "grad_norm": 0.30851250886917114, "learning_rate": 6.148679950161673e-07, "loss": 0.3758, "step": 3597 }, { "epoch": 2.5687767729652546, "grad_norm": 0.2848406136035919, "learning_rate": 6.128730102270897e-07, "loss": 0.3764, "step": 3598 }, { "epoch": 2.5694907187053784, "grad_norm": 0.3006265163421631, "learning_rate": 6.108810558126782e-07, "loss": 0.3552, "step": 3599 }, { "epoch": 2.570204664445502, "grad_norm": 0.3147828280925751, "learning_rate": 6.088921331488568e-07, "loss": 0.3736, "step": 3600 }, { "epoch": 2.570918610185626, "grad_norm": 0.3130388557910919, "learning_rate": 6.06906243609462e-07, "loss": 0.3775, "step": 3601 }, { "epoch": 2.5716325559257496, "grad_norm": 0.30322539806365967, "learning_rate": 6.049233885662298e-07, "loss": 0.3672, "step": 3602 }, { "epoch": 2.5723465016658733, "grad_norm": 0.2886291444301605, "learning_rate": 6.029435693888019e-07, "loss": 0.3723, "step": 3603 }, { "epoch": 2.573060447405997, "grad_norm": 0.30935853719711304, "learning_rate": 6.009667874447244e-07, "loss": 0.3929, "step": 3604 }, { "epoch": 2.5737743931461208, "grad_norm": 0.29253673553466797, "learning_rate": 5.989930440994451e-07, "loss": 0.344, "step": 3605 }, { "epoch": 2.5744883388862445, "grad_norm": 0.3019779324531555, "learning_rate": 5.9702234071631e-07, "loss": 0.3638, "step": 3606 }, { "epoch": 2.5752022846263682, "grad_norm": 0.3005354106426239, "learning_rate": 5.950546786565697e-07, "loss": 0.3864, "step": 3607 }, { "epoch": 2.5759162303664924, "grad_norm": 0.31299349665641785, "learning_rate": 5.930900592793715e-07, "loss": 0.3648, "step": 3608 }, { "epoch": 2.576630176106616, "grad_norm": 0.3043883740901947, "learning_rate": 5.911284839417597e-07, "loss": 0.3968, "step": 3609 }, { "epoch": 2.57734412184674, "grad_norm": 0.28809016942977905, "learning_rate": 5.891699539986789e-07, "loss": 0.3315, "step": 3610 }, { "epoch": 2.5780580675868636, "grad_norm": 0.33970195055007935, "learning_rate": 5.872144708029698e-07, "loss": 0.4135, "step": 3611 }, { "epoch": 2.5787720133269874, "grad_norm": 0.2996990978717804, "learning_rate": 5.852620357053651e-07, "loss": 0.338, "step": 3612 }, { "epoch": 2.579485959067111, "grad_norm": 0.30268245935440063, "learning_rate": 5.833126500544966e-07, "loss": 0.3853, "step": 3613 }, { "epoch": 2.580199904807235, "grad_norm": 0.2929946184158325, "learning_rate": 5.813663151968874e-07, "loss": 0.337, "step": 3614 }, { "epoch": 2.5809138505473586, "grad_norm": 0.3376908600330353, "learning_rate": 5.794230324769518e-07, "loss": 0.3977, "step": 3615 }, { "epoch": 2.5816277962874823, "grad_norm": 0.2990003228187561, "learning_rate": 5.774828032369983e-07, "loss": 0.3496, "step": 3616 }, { "epoch": 2.582341742027606, "grad_norm": 0.31677478551864624, "learning_rate": 5.755456288172273e-07, "loss": 0.3834, "step": 3617 }, { "epoch": 2.5830556877677298, "grad_norm": 0.284882515668869, "learning_rate": 5.736115105557249e-07, "loss": 0.3805, "step": 3618 }, { "epoch": 2.5837696335078535, "grad_norm": 0.31591904163360596, "learning_rate": 5.716804497884698e-07, "loss": 0.316, "step": 3619 }, { "epoch": 2.5844835792479772, "grad_norm": 0.32895562052726746, "learning_rate": 5.697524478493288e-07, "loss": 0.4086, "step": 3620 }, { "epoch": 2.585197524988101, "grad_norm": 0.28695952892303467, "learning_rate": 5.678275060700517e-07, "loss": 0.3669, "step": 3621 }, { "epoch": 2.5859114707282247, "grad_norm": 0.32626351714134216, "learning_rate": 5.659056257802792e-07, "loss": 0.3772, "step": 3622 }, { "epoch": 2.5866254164683484, "grad_norm": 0.32430630922317505, "learning_rate": 5.639868083075362e-07, "loss": 0.3691, "step": 3623 }, { "epoch": 2.587339362208472, "grad_norm": 0.30264976620674133, "learning_rate": 5.620710549772295e-07, "loss": 0.3533, "step": 3624 }, { "epoch": 2.588053307948596, "grad_norm": 0.29625600576400757, "learning_rate": 5.601583671126532e-07, "loss": 0.3603, "step": 3625 }, { "epoch": 2.5887672536887196, "grad_norm": 0.3076716959476471, "learning_rate": 5.582487460349806e-07, "loss": 0.3732, "step": 3626 }, { "epoch": 2.5894811994288434, "grad_norm": 0.3200216591358185, "learning_rate": 5.563421930632674e-07, "loss": 0.3858, "step": 3627 }, { "epoch": 2.590195145168967, "grad_norm": 0.3065534234046936, "learning_rate": 5.54438709514451e-07, "loss": 0.3455, "step": 3628 }, { "epoch": 2.590909090909091, "grad_norm": 0.29809001088142395, "learning_rate": 5.525382967033499e-07, "loss": 0.3529, "step": 3629 }, { "epoch": 2.5916230366492146, "grad_norm": 0.31359627842903137, "learning_rate": 5.506409559426573e-07, "loss": 0.4065, "step": 3630 }, { "epoch": 2.5923369823893383, "grad_norm": 0.30515092611312866, "learning_rate": 5.487466885429482e-07, "loss": 0.3525, "step": 3631 }, { "epoch": 2.593050928129462, "grad_norm": 0.30810022354125977, "learning_rate": 5.468554958126737e-07, "loss": 0.4007, "step": 3632 }, { "epoch": 2.5937648738695858, "grad_norm": 0.3144661486148834, "learning_rate": 5.449673790581611e-07, "loss": 0.3346, "step": 3633 }, { "epoch": 2.5944788196097095, "grad_norm": 0.28544148802757263, "learning_rate": 5.43082339583611e-07, "loss": 0.3501, "step": 3634 }, { "epoch": 2.5951927653498332, "grad_norm": 0.2924717962741852, "learning_rate": 5.412003786911013e-07, "loss": 0.3449, "step": 3635 }, { "epoch": 2.595906711089957, "grad_norm": 0.2951527237892151, "learning_rate": 5.393214976805833e-07, "loss": 0.3704, "step": 3636 }, { "epoch": 2.5966206568300807, "grad_norm": 0.2910362482070923, "learning_rate": 5.374456978498782e-07, "loss": 0.375, "step": 3637 }, { "epoch": 2.5973346025702044, "grad_norm": 0.300446093082428, "learning_rate": 5.355729804946802e-07, "loss": 0.3839, "step": 3638 }, { "epoch": 2.598048548310328, "grad_norm": 0.2869141697883606, "learning_rate": 5.337033469085562e-07, "loss": 0.3625, "step": 3639 }, { "epoch": 2.598762494050452, "grad_norm": 0.3143072724342346, "learning_rate": 5.318367983829393e-07, "loss": 0.3747, "step": 3640 }, { "epoch": 2.599476439790576, "grad_norm": 0.30844494700431824, "learning_rate": 5.299733362071347e-07, "loss": 0.4175, "step": 3641 }, { "epoch": 2.6001903855307, "grad_norm": 0.3012448251247406, "learning_rate": 5.281129616683167e-07, "loss": 0.3437, "step": 3642 }, { "epoch": 2.6009043312708235, "grad_norm": 0.316679984331131, "learning_rate": 5.262556760515214e-07, "loss": 0.3224, "step": 3643 }, { "epoch": 2.6016182770109473, "grad_norm": 0.31293144822120667, "learning_rate": 5.244014806396569e-07, "loss": 0.3821, "step": 3644 }, { "epoch": 2.602332222751071, "grad_norm": 0.2957480549812317, "learning_rate": 5.225503767134954e-07, "loss": 0.3761, "step": 3645 }, { "epoch": 2.6030461684911947, "grad_norm": 0.3122027516365051, "learning_rate": 5.207023655516702e-07, "loss": 0.3573, "step": 3646 }, { "epoch": 2.6037601142313185, "grad_norm": 0.2911969721317291, "learning_rate": 5.188574484306829e-07, "loss": 0.3402, "step": 3647 }, { "epoch": 2.604474059971442, "grad_norm": 0.35041293501853943, "learning_rate": 5.17015626624896e-07, "loss": 0.4858, "step": 3648 }, { "epoch": 2.605188005711566, "grad_norm": 0.3334674835205078, "learning_rate": 5.151769014065339e-07, "loss": 0.3604, "step": 3649 }, { "epoch": 2.6059019514516897, "grad_norm": 0.31498730182647705, "learning_rate": 5.133412740456805e-07, "loss": 0.3331, "step": 3650 }, { "epoch": 2.6066158971918134, "grad_norm": 0.2920898199081421, "learning_rate": 5.115087458102841e-07, "loss": 0.3769, "step": 3651 }, { "epoch": 2.607329842931937, "grad_norm": 0.2975864112377167, "learning_rate": 5.096793179661463e-07, "loss": 0.3852, "step": 3652 }, { "epoch": 2.608043788672061, "grad_norm": 0.2980819046497345, "learning_rate": 5.078529917769331e-07, "loss": 0.3792, "step": 3653 }, { "epoch": 2.6087577344121846, "grad_norm": 0.3214935064315796, "learning_rate": 5.06029768504166e-07, "loss": 0.3522, "step": 3654 }, { "epoch": 2.6094716801523083, "grad_norm": 0.2958112359046936, "learning_rate": 5.042096494072196e-07, "loss": 0.3654, "step": 3655 }, { "epoch": 2.610185625892432, "grad_norm": 0.29468655586242676, "learning_rate": 5.023926357433296e-07, "loss": 0.3275, "step": 3656 }, { "epoch": 2.610899571632556, "grad_norm": 0.28689315915107727, "learning_rate": 5.005787287675851e-07, "loss": 0.3693, "step": 3657 }, { "epoch": 2.6116135173726795, "grad_norm": 0.33735138177871704, "learning_rate": 4.987679297329262e-07, "loss": 0.4226, "step": 3658 }, { "epoch": 2.6123274631128033, "grad_norm": 0.29257240891456604, "learning_rate": 4.969602398901496e-07, "loss": 0.3527, "step": 3659 }, { "epoch": 2.613041408852927, "grad_norm": 0.29605260491371155, "learning_rate": 4.951556604879049e-07, "loss": 0.3484, "step": 3660 }, { "epoch": 2.613755354593051, "grad_norm": 0.3032779097557068, "learning_rate": 4.933541927726887e-07, "loss": 0.3995, "step": 3661 }, { "epoch": 2.614469300333175, "grad_norm": 0.3002890348434448, "learning_rate": 4.915558379888541e-07, "loss": 0.39, "step": 3662 }, { "epoch": 2.6151832460732987, "grad_norm": 0.3181798756122589, "learning_rate": 4.897605973785996e-07, "loss": 0.3454, "step": 3663 }, { "epoch": 2.6158971918134224, "grad_norm": 0.2974728047847748, "learning_rate": 4.879684721819728e-07, "loss": 0.3758, "step": 3664 }, { "epoch": 2.616611137553546, "grad_norm": 0.2877800464630127, "learning_rate": 4.861794636368721e-07, "loss": 0.3833, "step": 3665 }, { "epoch": 2.61732508329367, "grad_norm": 0.2953026294708252, "learning_rate": 4.843935729790422e-07, "loss": 0.3649, "step": 3666 }, { "epoch": 2.6180390290337936, "grad_norm": 0.30656909942626953, "learning_rate": 4.826108014420716e-07, "loss": 0.3446, "step": 3667 }, { "epoch": 2.6187529747739173, "grad_norm": 0.3072724938392639, "learning_rate": 4.808311502573976e-07, "loss": 0.3914, "step": 3668 }, { "epoch": 2.619466920514041, "grad_norm": 0.2891867160797119, "learning_rate": 4.790546206542995e-07, "loss": 0.3613, "step": 3669 }, { "epoch": 2.620180866254165, "grad_norm": 0.33532649278640747, "learning_rate": 4.772812138599043e-07, "loss": 0.4447, "step": 3670 }, { "epoch": 2.6208948119942885, "grad_norm": 0.28417283296585083, "learning_rate": 4.755109310991762e-07, "loss": 0.3273, "step": 3671 }, { "epoch": 2.6216087577344123, "grad_norm": 0.32238489389419556, "learning_rate": 4.737437735949263e-07, "loss": 0.376, "step": 3672 }, { "epoch": 2.622322703474536, "grad_norm": 0.3075103163719177, "learning_rate": 4.7197974256780466e-07, "loss": 0.34, "step": 3673 }, { "epoch": 2.6230366492146597, "grad_norm": 0.29550448060035706, "learning_rate": 4.702188392363027e-07, "loss": 0.3809, "step": 3674 }, { "epoch": 2.6237505949547835, "grad_norm": 0.2783593237400055, "learning_rate": 4.6846106481675035e-07, "loss": 0.3366, "step": 3675 }, { "epoch": 2.624464540694907, "grad_norm": 0.32389917969703674, "learning_rate": 4.6670642052331793e-07, "loss": 0.3814, "step": 3676 }, { "epoch": 2.625178486435031, "grad_norm": 0.3062639832496643, "learning_rate": 4.6495490756801156e-07, "loss": 0.391, "step": 3677 }, { "epoch": 2.6258924321751547, "grad_norm": 0.2854464650154114, "learning_rate": 4.632065271606756e-07, "loss": 0.3466, "step": 3678 }, { "epoch": 2.6266063779152784, "grad_norm": 0.308683842420578, "learning_rate": 4.614612805089924e-07, "loss": 0.3712, "step": 3679 }, { "epoch": 2.627320323655402, "grad_norm": 0.3106927275657654, "learning_rate": 4.5971916881847543e-07, "loss": 0.3484, "step": 3680 }, { "epoch": 2.628034269395526, "grad_norm": 0.31912803649902344, "learning_rate": 4.57980193292476e-07, "loss": 0.4002, "step": 3681 }, { "epoch": 2.6287482151356496, "grad_norm": 0.30328992009162903, "learning_rate": 4.562443551321788e-07, "loss": 0.3418, "step": 3682 }, { "epoch": 2.6294621608757733, "grad_norm": 0.33357417583465576, "learning_rate": 4.545116555366003e-07, "loss": 0.3823, "step": 3683 }, { "epoch": 2.630176106615897, "grad_norm": 0.31552422046661377, "learning_rate": 4.5278209570258914e-07, "loss": 0.4166, "step": 3684 }, { "epoch": 2.630890052356021, "grad_norm": 0.2928459942340851, "learning_rate": 4.5105567682482756e-07, "loss": 0.3701, "step": 3685 }, { "epoch": 2.6316039980961445, "grad_norm": 0.3127056658267975, "learning_rate": 4.4933240009582493e-07, "loss": 0.3975, "step": 3686 }, { "epoch": 2.6323179438362683, "grad_norm": 0.27527666091918945, "learning_rate": 4.4761226670592074e-07, "loss": 0.3083, "step": 3687 }, { "epoch": 2.633031889576392, "grad_norm": 0.302788645029068, "learning_rate": 4.458952778432857e-07, "loss": 0.3714, "step": 3688 }, { "epoch": 2.6337458353165157, "grad_norm": 0.3079182207584381, "learning_rate": 4.4418143469391497e-07, "loss": 0.3476, "step": 3689 }, { "epoch": 2.6344597810566395, "grad_norm": 0.3199690282344818, "learning_rate": 4.4247073844163434e-07, "loss": 0.3974, "step": 3690 }, { "epoch": 2.635173726796763, "grad_norm": 0.3018459677696228, "learning_rate": 4.407631902680942e-07, "loss": 0.3787, "step": 3691 }, { "epoch": 2.635887672536887, "grad_norm": 0.30932334065437317, "learning_rate": 4.390587913527694e-07, "loss": 0.414, "step": 3692 }, { "epoch": 2.6366016182770107, "grad_norm": 0.29435425996780396, "learning_rate": 4.3735754287296097e-07, "loss": 0.3699, "step": 3693 }, { "epoch": 2.637315564017135, "grad_norm": 0.2944483458995819, "learning_rate": 4.356594460037944e-07, "loss": 0.3332, "step": 3694 }, { "epoch": 2.6380295097572586, "grad_norm": 0.30148088932037354, "learning_rate": 4.3396450191821593e-07, "loss": 0.3797, "step": 3695 }, { "epoch": 2.6387434554973823, "grad_norm": 0.28691935539245605, "learning_rate": 4.322727117869951e-07, "loss": 0.3293, "step": 3696 }, { "epoch": 2.639457401237506, "grad_norm": 0.31295567750930786, "learning_rate": 4.3058407677872503e-07, "loss": 0.3625, "step": 3697 }, { "epoch": 2.64017134697763, "grad_norm": 0.31189775466918945, "learning_rate": 4.288985980598165e-07, "loss": 0.4043, "step": 3698 }, { "epoch": 2.6408852927177535, "grad_norm": 0.29426148533821106, "learning_rate": 4.2721627679449983e-07, "loss": 0.3507, "step": 3699 }, { "epoch": 2.6415992384578773, "grad_norm": 0.31887397170066833, "learning_rate": 4.255371141448272e-07, "loss": 0.4047, "step": 3700 }, { "epoch": 2.642313184198001, "grad_norm": 0.28193363547325134, "learning_rate": 4.23861111270667e-07, "loss": 0.3108, "step": 3701 }, { "epoch": 2.6430271299381247, "grad_norm": 0.3038823902606964, "learning_rate": 4.221882693297047e-07, "loss": 0.3948, "step": 3702 }, { "epoch": 2.6437410756782485, "grad_norm": 0.2852167785167694, "learning_rate": 4.205185894774455e-07, "loss": 0.3271, "step": 3703 }, { "epoch": 2.644455021418372, "grad_norm": 0.33730706572532654, "learning_rate": 4.188520728672052e-07, "loss": 0.3685, "step": 3704 }, { "epoch": 2.645168967158496, "grad_norm": 0.3018828332424164, "learning_rate": 4.171887206501191e-07, "loss": 0.3546, "step": 3705 }, { "epoch": 2.6458829128986197, "grad_norm": 0.3150060176849365, "learning_rate": 4.155285339751358e-07, "loss": 0.3601, "step": 3706 }, { "epoch": 2.6465968586387434, "grad_norm": 0.31857651472091675, "learning_rate": 4.138715139890148e-07, "loss": 0.3741, "step": 3707 }, { "epoch": 2.647310804378867, "grad_norm": 0.28811800479888916, "learning_rate": 4.122176618363305e-07, "loss": 0.3437, "step": 3708 }, { "epoch": 2.648024750118991, "grad_norm": 0.288809210062027, "learning_rate": 4.105669786594707e-07, "loss": 0.3463, "step": 3709 }, { "epoch": 2.6487386958591146, "grad_norm": 0.31533658504486084, "learning_rate": 4.089194655986306e-07, "loss": 0.3771, "step": 3710 }, { "epoch": 2.6494526415992383, "grad_norm": 0.2931728959083557, "learning_rate": 4.0727512379181653e-07, "loss": 0.3854, "step": 3711 }, { "epoch": 2.650166587339362, "grad_norm": 0.2897021472454071, "learning_rate": 4.0563395437484666e-07, "loss": 0.3298, "step": 3712 }, { "epoch": 2.6508805330794862, "grad_norm": 0.31492966413497925, "learning_rate": 4.039959584813463e-07, "loss": 0.377, "step": 3713 }, { "epoch": 2.65159447881961, "grad_norm": 0.30231308937072754, "learning_rate": 4.0236113724274716e-07, "loss": 0.4167, "step": 3714 }, { "epoch": 2.6523084245597337, "grad_norm": 0.2868257164955139, "learning_rate": 4.0072949178829033e-07, "loss": 0.3683, "step": 3715 }, { "epoch": 2.6530223702998574, "grad_norm": 0.3047792315483093, "learning_rate": 3.991010232450243e-07, "loss": 0.3536, "step": 3716 }, { "epoch": 2.653736316039981, "grad_norm": 0.30674707889556885, "learning_rate": 3.9747573273779816e-07, "loss": 0.3846, "step": 3717 }, { "epoch": 2.654450261780105, "grad_norm": 0.31017085909843445, "learning_rate": 3.958536213892711e-07, "loss": 0.3734, "step": 3718 }, { "epoch": 2.6551642075202286, "grad_norm": 0.28236037492752075, "learning_rate": 3.9423469031990457e-07, "loss": 0.3268, "step": 3719 }, { "epoch": 2.6558781532603524, "grad_norm": 0.2824125289916992, "learning_rate": 3.9261894064796136e-07, "loss": 0.3509, "step": 3720 }, { "epoch": 2.656592099000476, "grad_norm": 0.3249209523200989, "learning_rate": 3.910063734895103e-07, "loss": 0.3646, "step": 3721 }, { "epoch": 2.6573060447406, "grad_norm": 0.3102090060710907, "learning_rate": 3.893969899584171e-07, "loss": 0.3759, "step": 3722 }, { "epoch": 2.6580199904807236, "grad_norm": 0.3029686212539673, "learning_rate": 3.877907911663542e-07, "loss": 0.3645, "step": 3723 }, { "epoch": 2.6587339362208473, "grad_norm": 0.3064883053302765, "learning_rate": 3.8618777822278854e-07, "loss": 0.3456, "step": 3724 }, { "epoch": 2.659447881960971, "grad_norm": 0.30878615379333496, "learning_rate": 3.84587952234991e-07, "loss": 0.3912, "step": 3725 }, { "epoch": 2.6601618277010948, "grad_norm": 0.3083043396472931, "learning_rate": 3.8299131430802826e-07, "loss": 0.3337, "step": 3726 }, { "epoch": 2.6608757734412185, "grad_norm": 0.32280251383781433, "learning_rate": 3.8139786554476643e-07, "loss": 0.3771, "step": 3727 }, { "epoch": 2.6615897191813422, "grad_norm": 0.3169936239719391, "learning_rate": 3.798076070458684e-07, "loss": 0.3836, "step": 3728 }, { "epoch": 2.662303664921466, "grad_norm": 0.3001805543899536, "learning_rate": 3.782205399097916e-07, "loss": 0.3551, "step": 3729 }, { "epoch": 2.6630176106615897, "grad_norm": 0.32167062163352966, "learning_rate": 3.766366652327924e-07, "loss": 0.3748, "step": 3730 }, { "epoch": 2.6637315564017134, "grad_norm": 0.31623542308807373, "learning_rate": 3.750559841089196e-07, "loss": 0.3744, "step": 3731 }, { "epoch": 2.664445502141837, "grad_norm": 0.30210402607917786, "learning_rate": 3.734784976300165e-07, "loss": 0.4017, "step": 3732 }, { "epoch": 2.665159447881961, "grad_norm": 0.3178146481513977, "learning_rate": 3.719042068857204e-07, "loss": 0.3838, "step": 3733 }, { "epoch": 2.6658733936220846, "grad_norm": 0.2987056374549866, "learning_rate": 3.7033311296345966e-07, "loss": 0.3725, "step": 3734 }, { "epoch": 2.6665873393622084, "grad_norm": 0.29920873045921326, "learning_rate": 3.687652169484568e-07, "loss": 0.3539, "step": 3735 }, { "epoch": 2.667301285102332, "grad_norm": 0.2934167981147766, "learning_rate": 3.6720051992372276e-07, "loss": 0.3686, "step": 3736 }, { "epoch": 2.668015230842456, "grad_norm": 0.323991984128952, "learning_rate": 3.656390229700613e-07, "loss": 0.3828, "step": 3737 }, { "epoch": 2.6687291765825796, "grad_norm": 0.2951498031616211, "learning_rate": 3.6408072716606346e-07, "loss": 0.3499, "step": 3738 }, { "epoch": 2.6694431223227033, "grad_norm": 0.30960437655448914, "learning_rate": 3.6252563358811053e-07, "loss": 0.3765, "step": 3739 }, { "epoch": 2.670157068062827, "grad_norm": 0.2979672849178314, "learning_rate": 3.6097374331037326e-07, "loss": 0.369, "step": 3740 }, { "epoch": 2.6708710138029508, "grad_norm": 0.30336520075798035, "learning_rate": 3.5942505740480583e-07, "loss": 0.4003, "step": 3741 }, { "epoch": 2.6715849595430745, "grad_norm": 0.31211620569229126, "learning_rate": 3.5787957694115194e-07, "loss": 0.3549, "step": 3742 }, { "epoch": 2.6722989052831982, "grad_norm": 0.3104884624481201, "learning_rate": 3.563373029869416e-07, "loss": 0.3155, "step": 3743 }, { "epoch": 2.673012851023322, "grad_norm": 0.32453739643096924, "learning_rate": 3.5479823660748703e-07, "loss": 0.3473, "step": 3744 }, { "epoch": 2.6737267967634457, "grad_norm": 0.2963000237941742, "learning_rate": 3.5326237886588734e-07, "loss": 0.3783, "step": 3745 }, { "epoch": 2.67444074250357, "grad_norm": 0.31814685463905334, "learning_rate": 3.517297308230261e-07, "loss": 0.3922, "step": 3746 }, { "epoch": 2.6751546882436936, "grad_norm": 0.30926641821861267, "learning_rate": 3.5020029353756703e-07, "loss": 0.3666, "step": 3747 }, { "epoch": 2.6758686339838174, "grad_norm": 0.30093926191329956, "learning_rate": 3.486740680659562e-07, "loss": 0.3343, "step": 3748 }, { "epoch": 2.676582579723941, "grad_norm": 0.31517595052719116, "learning_rate": 3.4715105546242355e-07, "loss": 0.3414, "step": 3749 }, { "epoch": 2.677296525464065, "grad_norm": 0.3034377992153168, "learning_rate": 3.4563125677897936e-07, "loss": 0.3645, "step": 3750 }, { "epoch": 2.6780104712041886, "grad_norm": 0.3013376295566559, "learning_rate": 3.44114673065411e-07, "loss": 0.3294, "step": 3751 }, { "epoch": 2.6787244169443123, "grad_norm": 0.2953805923461914, "learning_rate": 3.426013053692878e-07, "loss": 0.3459, "step": 3752 }, { "epoch": 2.679438362684436, "grad_norm": 0.31488651037216187, "learning_rate": 3.4109115473595855e-07, "loss": 0.3909, "step": 3753 }, { "epoch": 2.6801523084245598, "grad_norm": 0.31173524260520935, "learning_rate": 3.395842222085466e-07, "loss": 0.3651, "step": 3754 }, { "epoch": 2.6808662541646835, "grad_norm": 0.3002777695655823, "learning_rate": 3.380805088279543e-07, "loss": 0.3144, "step": 3755 }, { "epoch": 2.6815801999048072, "grad_norm": 0.3115241229534149, "learning_rate": 3.365800156328619e-07, "loss": 0.3668, "step": 3756 }, { "epoch": 2.682294145644931, "grad_norm": 0.3055487871170044, "learning_rate": 3.3508274365972146e-07, "loss": 0.4015, "step": 3757 }, { "epoch": 2.6830080913850547, "grad_norm": 0.2932371497154236, "learning_rate": 3.3358869394276406e-07, "loss": 0.3669, "step": 3758 }, { "epoch": 2.6837220371251784, "grad_norm": 0.3015128970146179, "learning_rate": 3.320978675139919e-07, "loss": 0.3991, "step": 3759 }, { "epoch": 2.684435982865302, "grad_norm": 0.3131522536277771, "learning_rate": 3.306102654031823e-07, "loss": 0.3487, "step": 3760 }, { "epoch": 2.685149928605426, "grad_norm": 0.29915347695350647, "learning_rate": 3.291258886378851e-07, "loss": 0.3665, "step": 3761 }, { "epoch": 2.6858638743455496, "grad_norm": 0.29916951060295105, "learning_rate": 3.276447382434228e-07, "loss": 0.3551, "step": 3762 }, { "epoch": 2.6865778200856734, "grad_norm": 0.32663577795028687, "learning_rate": 3.261668152428882e-07, "loss": 0.3383, "step": 3763 }, { "epoch": 2.687291765825797, "grad_norm": 0.3132264018058777, "learning_rate": 3.2469212065714573e-07, "loss": 0.3539, "step": 3764 }, { "epoch": 2.688005711565921, "grad_norm": 0.2805797755718231, "learning_rate": 3.2322065550483005e-07, "loss": 0.3803, "step": 3765 }, { "epoch": 2.688719657306045, "grad_norm": 0.2826303541660309, "learning_rate": 3.2175242080234314e-07, "loss": 0.34, "step": 3766 }, { "epoch": 2.6894336030461687, "grad_norm": 0.31121203303337097, "learning_rate": 3.202874175638576e-07, "loss": 0.4128, "step": 3767 }, { "epoch": 2.6901475487862925, "grad_norm": 0.2801654636859894, "learning_rate": 3.18825646801314e-07, "loss": 0.3685, "step": 3768 }, { "epoch": 2.690861494526416, "grad_norm": 0.33013811707496643, "learning_rate": 3.173671095244185e-07, "loss": 0.3848, "step": 3769 }, { "epoch": 2.69157544026654, "grad_norm": 0.31304869055747986, "learning_rate": 3.1591180674064584e-07, "loss": 0.3577, "step": 3770 }, { "epoch": 2.6922893860066637, "grad_norm": 0.30329757928848267, "learning_rate": 3.14459739455234e-07, "loss": 0.349, "step": 3771 }, { "epoch": 2.6930033317467874, "grad_norm": 0.2993893325328827, "learning_rate": 3.1301090867118746e-07, "loss": 0.3567, "step": 3772 }, { "epoch": 2.693717277486911, "grad_norm": 0.2847723364830017, "learning_rate": 3.1156531538927615e-07, "loss": 0.3572, "step": 3773 }, { "epoch": 2.694431223227035, "grad_norm": 0.2865867018699646, "learning_rate": 3.101229606080319e-07, "loss": 0.35, "step": 3774 }, { "epoch": 2.6951451689671586, "grad_norm": 0.3215136229991913, "learning_rate": 3.086838453237506e-07, "loss": 0.3784, "step": 3775 }, { "epoch": 2.6958591147072823, "grad_norm": 0.3159521520137787, "learning_rate": 3.0724797053048927e-07, "loss": 0.3962, "step": 3776 }, { "epoch": 2.696573060447406, "grad_norm": 0.31209874153137207, "learning_rate": 3.0581533722006953e-07, "loss": 0.3881, "step": 3777 }, { "epoch": 2.69728700618753, "grad_norm": 0.29009518027305603, "learning_rate": 3.043859463820703e-07, "loss": 0.3578, "step": 3778 }, { "epoch": 2.6980009519276535, "grad_norm": 0.30552032589912415, "learning_rate": 3.02959799003833e-07, "loss": 0.3846, "step": 3779 }, { "epoch": 2.6987148976677773, "grad_norm": 0.29027968645095825, "learning_rate": 3.015368960704584e-07, "loss": 0.3248, "step": 3780 }, { "epoch": 2.699428843407901, "grad_norm": 0.3323717415332794, "learning_rate": 3.001172385648049e-07, "loss": 0.366, "step": 3781 }, { "epoch": 2.7001427891480247, "grad_norm": 0.3399321734905243, "learning_rate": 2.9870082746749186e-07, "loss": 0.3981, "step": 3782 }, { "epoch": 2.7008567348881485, "grad_norm": 0.2915847897529602, "learning_rate": 2.972876637568922e-07, "loss": 0.3082, "step": 3783 }, { "epoch": 2.701570680628272, "grad_norm": 0.3229154050350189, "learning_rate": 2.9587774840914016e-07, "loss": 0.3725, "step": 3784 }, { "epoch": 2.702284626368396, "grad_norm": 0.31973370909690857, "learning_rate": 2.944710823981228e-07, "loss": 0.384, "step": 3785 }, { "epoch": 2.7029985721085197, "grad_norm": 0.31713223457336426, "learning_rate": 2.930676666954846e-07, "loss": 0.3852, "step": 3786 }, { "epoch": 2.7037125178486434, "grad_norm": 0.31682252883911133, "learning_rate": 2.916675022706239e-07, "loss": 0.3771, "step": 3787 }, { "epoch": 2.704426463588767, "grad_norm": 0.2986239194869995, "learning_rate": 2.9027059009069323e-07, "loss": 0.3462, "step": 3788 }, { "epoch": 2.705140409328891, "grad_norm": 0.31223469972610474, "learning_rate": 2.8887693112060025e-07, "loss": 0.3719, "step": 3789 }, { "epoch": 2.7058543550690146, "grad_norm": 0.3022516965866089, "learning_rate": 2.8748652632300367e-07, "loss": 0.3687, "step": 3790 }, { "epoch": 2.7065683008091383, "grad_norm": 0.31703799962997437, "learning_rate": 2.8609937665831534e-07, "loss": 0.3824, "step": 3791 }, { "epoch": 2.707282246549262, "grad_norm": 0.30523934960365295, "learning_rate": 2.847154830846971e-07, "loss": 0.3593, "step": 3792 }, { "epoch": 2.707996192289386, "grad_norm": 0.31959813833236694, "learning_rate": 2.833348465580654e-07, "loss": 0.391, "step": 3793 }, { "epoch": 2.7087101380295096, "grad_norm": 0.33256033062934875, "learning_rate": 2.819574680320825e-07, "loss": 0.3693, "step": 3794 }, { "epoch": 2.7094240837696333, "grad_norm": 0.3145621716976166, "learning_rate": 2.8058334845816214e-07, "loss": 0.3686, "step": 3795 }, { "epoch": 2.710138029509757, "grad_norm": 0.2953834533691406, "learning_rate": 2.792124887854686e-07, "loss": 0.3428, "step": 3796 }, { "epoch": 2.7108519752498808, "grad_norm": 0.31604906916618347, "learning_rate": 2.778448899609104e-07, "loss": 0.4095, "step": 3797 }, { "epoch": 2.7115659209900045, "grad_norm": 0.30829960107803345, "learning_rate": 2.7648055292914754e-07, "loss": 0.3451, "step": 3798 }, { "epoch": 2.7122798667301287, "grad_norm": 0.2921428084373474, "learning_rate": 2.751194786325861e-07, "loss": 0.3429, "step": 3799 }, { "epoch": 2.7129938124702524, "grad_norm": 0.30675119161605835, "learning_rate": 2.737616680113758e-07, "loss": 0.3735, "step": 3800 }, { "epoch": 2.713707758210376, "grad_norm": 0.31168919801712036, "learning_rate": 2.724071220034158e-07, "loss": 0.3904, "step": 3801 }, { "epoch": 2.7144217039505, "grad_norm": 0.3015439510345459, "learning_rate": 2.7105584154434815e-07, "loss": 0.3617, "step": 3802 }, { "epoch": 2.7151356496906236, "grad_norm": 0.29646819829940796, "learning_rate": 2.697078275675591e-07, "loss": 0.3891, "step": 3803 }, { "epoch": 2.7158495954307473, "grad_norm": 0.3098753094673157, "learning_rate": 2.6836308100417874e-07, "loss": 0.3445, "step": 3804 }, { "epoch": 2.716563541170871, "grad_norm": 0.30142447352409363, "learning_rate": 2.6702160278308186e-07, "loss": 0.4271, "step": 3805 }, { "epoch": 2.717277486910995, "grad_norm": 0.2950383722782135, "learning_rate": 2.6568339383088284e-07, "loss": 0.3443, "step": 3806 }, { "epoch": 2.7179914326511185, "grad_norm": 0.31868675351142883, "learning_rate": 2.6434845507194106e-07, "loss": 0.3394, "step": 3807 }, { "epoch": 2.7187053783912423, "grad_norm": 0.3120635151863098, "learning_rate": 2.63016787428354e-07, "loss": 0.3721, "step": 3808 }, { "epoch": 2.719419324131366, "grad_norm": 0.29681941866874695, "learning_rate": 2.6168839181996087e-07, "loss": 0.3612, "step": 3809 }, { "epoch": 2.7201332698714897, "grad_norm": 0.3011421859264374, "learning_rate": 2.6036326916434153e-07, "loss": 0.3706, "step": 3810 }, { "epoch": 2.7208472156116135, "grad_norm": 0.2980944514274597, "learning_rate": 2.5904142037681434e-07, "loss": 0.3458, "step": 3811 }, { "epoch": 2.721561161351737, "grad_norm": 0.3272477090358734, "learning_rate": 2.5772284637043563e-07, "loss": 0.4198, "step": 3812 }, { "epoch": 2.722275107091861, "grad_norm": 0.3016315698623657, "learning_rate": 2.564075480560013e-07, "loss": 0.3198, "step": 3813 }, { "epoch": 2.7229890528319847, "grad_norm": 0.30579838156700134, "learning_rate": 2.5509552634204347e-07, "loss": 0.3828, "step": 3814 }, { "epoch": 2.7237029985721084, "grad_norm": 0.31047382950782776, "learning_rate": 2.5378678213483057e-07, "loss": 0.3254, "step": 3815 }, { "epoch": 2.724416944312232, "grad_norm": 0.30288568139076233, "learning_rate": 2.524813163383683e-07, "loss": 0.3658, "step": 3816 }, { "epoch": 2.725130890052356, "grad_norm": 0.3133848011493683, "learning_rate": 2.511791298543975e-07, "loss": 0.3671, "step": 3817 }, { "epoch": 2.72584483579248, "grad_norm": 0.3034319281578064, "learning_rate": 2.498802235823922e-07, "loss": 0.3452, "step": 3818 }, { "epoch": 2.726558781532604, "grad_norm": 0.31440210342407227, "learning_rate": 2.485845984195645e-07, "loss": 0.3641, "step": 3819 }, { "epoch": 2.7272727272727275, "grad_norm": 0.3061026930809021, "learning_rate": 2.472922552608559e-07, "loss": 0.3853, "step": 3820 }, { "epoch": 2.7279866730128512, "grad_norm": 0.3054577112197876, "learning_rate": 2.4600319499894163e-07, "loss": 0.38, "step": 3821 }, { "epoch": 2.728700618752975, "grad_norm": 0.27194616198539734, "learning_rate": 2.447174185242324e-07, "loss": 0.3364, "step": 3822 }, { "epoch": 2.7294145644930987, "grad_norm": 0.3238633871078491, "learning_rate": 2.434349267248681e-07, "loss": 0.3972, "step": 3823 }, { "epoch": 2.7301285102332224, "grad_norm": 0.3119550347328186, "learning_rate": 2.421557204867192e-07, "loss": 0.3381, "step": 3824 }, { "epoch": 2.730842455973346, "grad_norm": 0.2963503301143646, "learning_rate": 2.4087980069338825e-07, "loss": 0.3921, "step": 3825 }, { "epoch": 2.73155640171347, "grad_norm": 0.29276329278945923, "learning_rate": 2.396071682262069e-07, "loss": 0.3956, "step": 3826 }, { "epoch": 2.7322703474535937, "grad_norm": 0.28879213333129883, "learning_rate": 2.3833782396423755e-07, "loss": 0.3217, "step": 3827 }, { "epoch": 2.7329842931937174, "grad_norm": 0.3138888478279114, "learning_rate": 2.3707176878426886e-07, "loss": 0.3567, "step": 3828 }, { "epoch": 2.733698238933841, "grad_norm": 0.31228840351104736, "learning_rate": 2.3580900356081903e-07, "loss": 0.379, "step": 3829 }, { "epoch": 2.734412184673965, "grad_norm": 0.2943858504295349, "learning_rate": 2.3454952916613482e-07, "loss": 0.3672, "step": 3830 }, { "epoch": 2.7351261304140886, "grad_norm": 0.292011022567749, "learning_rate": 2.3329334647018696e-07, "loss": 0.3803, "step": 3831 }, { "epoch": 2.7358400761542123, "grad_norm": 0.29467684030532837, "learning_rate": 2.3204045634067418e-07, "loss": 0.347, "step": 3832 }, { "epoch": 2.736554021894336, "grad_norm": 0.2988191843032837, "learning_rate": 2.307908596430225e-07, "loss": 0.3398, "step": 3833 }, { "epoch": 2.73726796763446, "grad_norm": 0.30540773272514343, "learning_rate": 2.2954455724037873e-07, "loss": 0.4086, "step": 3834 }, { "epoch": 2.7379819133745835, "grad_norm": 0.2983843982219696, "learning_rate": 2.283015499936192e-07, "loss": 0.3748, "step": 3835 }, { "epoch": 2.7386958591147073, "grad_norm": 0.30421552062034607, "learning_rate": 2.2706183876134047e-07, "loss": 0.3945, "step": 3836 }, { "epoch": 2.739409804854831, "grad_norm": 0.3092510402202606, "learning_rate": 2.2582542439986422e-07, "loss": 0.3737, "step": 3837 }, { "epoch": 2.7401237505949547, "grad_norm": 0.2995096445083618, "learning_rate": 2.2459230776323336e-07, "loss": 0.3758, "step": 3838 }, { "epoch": 2.7408376963350785, "grad_norm": 0.2941546142101288, "learning_rate": 2.233624897032155e-07, "loss": 0.363, "step": 3839 }, { "epoch": 2.741551642075202, "grad_norm": 0.2851475477218628, "learning_rate": 2.2213597106929608e-07, "loss": 0.3775, "step": 3840 }, { "epoch": 2.742265587815326, "grad_norm": 0.28896117210388184, "learning_rate": 2.2091275270868516e-07, "loss": 0.3389, "step": 3841 }, { "epoch": 2.7429795335554497, "grad_norm": 0.30135759711265564, "learning_rate": 2.1969283546631137e-07, "loss": 0.3388, "step": 3842 }, { "epoch": 2.7436934792955734, "grad_norm": 0.3196756839752197, "learning_rate": 2.1847622018482283e-07, "loss": 0.4263, "step": 3843 }, { "epoch": 2.744407425035697, "grad_norm": 0.27675604820251465, "learning_rate": 2.1726290770458735e-07, "loss": 0.3318, "step": 3844 }, { "epoch": 2.745121370775821, "grad_norm": 0.2976396381855011, "learning_rate": 2.1605289886369118e-07, "loss": 0.3585, "step": 3845 }, { "epoch": 2.7458353165159446, "grad_norm": 0.34482303261756897, "learning_rate": 2.1484619449793854e-07, "loss": 0.3786, "step": 3846 }, { "epoch": 2.7465492622560683, "grad_norm": 0.30008551478385925, "learning_rate": 2.136427954408521e-07, "loss": 0.3305, "step": 3847 }, { "epoch": 2.747263207996192, "grad_norm": 0.30408453941345215, "learning_rate": 2.1244270252367028e-07, "loss": 0.3779, "step": 3848 }, { "epoch": 2.747977153736316, "grad_norm": 0.29275399446487427, "learning_rate": 2.1124591657534776e-07, "loss": 0.397, "step": 3849 }, { "epoch": 2.7486910994764395, "grad_norm": 0.29636693000793457, "learning_rate": 2.1005243842255552e-07, "loss": 0.3604, "step": 3850 }, { "epoch": 2.7494050452165637, "grad_norm": 0.2914973497390747, "learning_rate": 2.088622688896802e-07, "loss": 0.3998, "step": 3851 }, { "epoch": 2.7501189909566874, "grad_norm": 0.3002171218395233, "learning_rate": 2.0767540879882143e-07, "loss": 0.3455, "step": 3852 }, { "epoch": 2.750832936696811, "grad_norm": 0.3068557679653168, "learning_rate": 2.064918589697945e-07, "loss": 0.3834, "step": 3853 }, { "epoch": 2.751546882436935, "grad_norm": 0.27624115347862244, "learning_rate": 2.053116202201272e-07, "loss": 0.3571, "step": 3854 }, { "epoch": 2.7522608281770586, "grad_norm": 0.32101619243621826, "learning_rate": 2.041346933650612e-07, "loss": 0.4079, "step": 3855 }, { "epoch": 2.7529747739171824, "grad_norm": 0.3046807646751404, "learning_rate": 2.0296107921754793e-07, "loss": 0.3686, "step": 3856 }, { "epoch": 2.753688719657306, "grad_norm": 0.3071230351924896, "learning_rate": 2.0179077858825445e-07, "loss": 0.3612, "step": 3857 }, { "epoch": 2.75440266539743, "grad_norm": 0.31157734990119934, "learning_rate": 2.006237922855553e-07, "loss": 0.3793, "step": 3858 }, { "epoch": 2.7551166111375536, "grad_norm": 0.2894992232322693, "learning_rate": 1.9946012111553837e-07, "loss": 0.3215, "step": 3859 }, { "epoch": 2.7558305568776773, "grad_norm": 0.30772948265075684, "learning_rate": 1.982997658820013e-07, "loss": 0.3985, "step": 3860 }, { "epoch": 2.756544502617801, "grad_norm": 0.3119879961013794, "learning_rate": 1.9714272738644957e-07, "loss": 0.3959, "step": 3861 }, { "epoch": 2.7572584483579248, "grad_norm": 0.2899867594242096, "learning_rate": 1.9598900642809894e-07, "loss": 0.3313, "step": 3862 }, { "epoch": 2.7579723940980485, "grad_norm": 0.3102521300315857, "learning_rate": 1.9483860380387408e-07, "loss": 0.3826, "step": 3863 }, { "epoch": 2.7586863398381722, "grad_norm": 0.32955992221832275, "learning_rate": 1.9369152030840553e-07, "loss": 0.3335, "step": 3864 }, { "epoch": 2.759400285578296, "grad_norm": 0.2985744774341583, "learning_rate": 1.925477567340339e-07, "loss": 0.358, "step": 3865 }, { "epoch": 2.7601142313184197, "grad_norm": 0.2953208088874817, "learning_rate": 1.9140731387080502e-07, "loss": 0.3838, "step": 3866 }, { "epoch": 2.7608281770585434, "grad_norm": 0.3025439977645874, "learning_rate": 1.9027019250647038e-07, "loss": 0.3751, "step": 3867 }, { "epoch": 2.761542122798667, "grad_norm": 0.3142061233520508, "learning_rate": 1.8913639342648893e-07, "loss": 0.3218, "step": 3868 }, { "epoch": 2.762256068538791, "grad_norm": 0.30441147089004517, "learning_rate": 1.8800591741402252e-07, "loss": 0.3541, "step": 3869 }, { "epoch": 2.7629700142789146, "grad_norm": 0.305136501789093, "learning_rate": 1.8687876524993987e-07, "loss": 0.3539, "step": 3870 }, { "epoch": 2.763683960019039, "grad_norm": 0.2955913841724396, "learning_rate": 1.8575493771281205e-07, "loss": 0.3441, "step": 3871 }, { "epoch": 2.7643979057591626, "grad_norm": 0.3164345920085907, "learning_rate": 1.846344355789148e-07, "loss": 0.3571, "step": 3872 }, { "epoch": 2.7651118514992863, "grad_norm": 0.28818097710609436, "learning_rate": 1.8351725962222733e-07, "loss": 0.3331, "step": 3873 }, { "epoch": 2.76582579723941, "grad_norm": 0.3159824311733246, "learning_rate": 1.8240341061442902e-07, "loss": 0.3845, "step": 3874 }, { "epoch": 2.7665397429795338, "grad_norm": 0.2835760712623596, "learning_rate": 1.8129288932490276e-07, "loss": 0.3327, "step": 3875 }, { "epoch": 2.7672536887196575, "grad_norm": 0.30045443773269653, "learning_rate": 1.801856965207338e-07, "loss": 0.3972, "step": 3876 }, { "epoch": 2.7679676344597812, "grad_norm": 0.2830030918121338, "learning_rate": 1.79081832966706e-07, "loss": 0.329, "step": 3877 }, { "epoch": 2.768681580199905, "grad_norm": 0.31319954991340637, "learning_rate": 1.779812994253055e-07, "loss": 0.4287, "step": 3878 }, { "epoch": 2.7693955259400287, "grad_norm": 0.3135215640068054, "learning_rate": 1.7688409665671702e-07, "loss": 0.3564, "step": 3879 }, { "epoch": 2.7701094716801524, "grad_norm": 0.295306533575058, "learning_rate": 1.757902254188254e-07, "loss": 0.3938, "step": 3880 }, { "epoch": 2.770823417420276, "grad_norm": 0.29264670610427856, "learning_rate": 1.7469968646721347e-07, "loss": 0.3849, "step": 3881 }, { "epoch": 2.7715373631604, "grad_norm": 0.30202674865722656, "learning_rate": 1.7361248055516366e-07, "loss": 0.3473, "step": 3882 }, { "epoch": 2.7722513089005236, "grad_norm": 0.31755128502845764, "learning_rate": 1.725286084336536e-07, "loss": 0.3974, "step": 3883 }, { "epoch": 2.7729652546406474, "grad_norm": 0.29102423787117004, "learning_rate": 1.7144807085136105e-07, "loss": 0.3405, "step": 3884 }, { "epoch": 2.773679200380771, "grad_norm": 0.28180640935897827, "learning_rate": 1.7037086855465902e-07, "loss": 0.3562, "step": 3885 }, { "epoch": 2.774393146120895, "grad_norm": 0.29380422830581665, "learning_rate": 1.6929700228761614e-07, "loss": 0.3742, "step": 3886 }, { "epoch": 2.7751070918610186, "grad_norm": 0.31835418939590454, "learning_rate": 1.6822647279199744e-07, "loss": 0.3655, "step": 3887 }, { "epoch": 2.7758210376011423, "grad_norm": 0.2986754775047302, "learning_rate": 1.6715928080726417e-07, "loss": 0.3597, "step": 3888 }, { "epoch": 2.776534983341266, "grad_norm": 0.3163442313671112, "learning_rate": 1.6609542707057001e-07, "loss": 0.3811, "step": 3889 }, { "epoch": 2.7772489290813898, "grad_norm": 0.2792378067970276, "learning_rate": 1.6503491231676382e-07, "loss": 0.3556, "step": 3890 }, { "epoch": 2.7779628748215135, "grad_norm": 0.2988012433052063, "learning_rate": 1.6397773727838906e-07, "loss": 0.4133, "step": 3891 }, { "epoch": 2.7786768205616372, "grad_norm": 0.29000410437583923, "learning_rate": 1.6292390268568103e-07, "loss": 0.3521, "step": 3892 }, { "epoch": 2.779390766301761, "grad_norm": 0.283653199672699, "learning_rate": 1.6187340926656636e-07, "loss": 0.3452, "step": 3893 }, { "epoch": 2.7801047120418847, "grad_norm": 0.29850804805755615, "learning_rate": 1.6082625774666793e-07, "loss": 0.3726, "step": 3894 }, { "epoch": 2.7808186577820084, "grad_norm": 0.30088678002357483, "learning_rate": 1.5978244884929607e-07, "loss": 0.3559, "step": 3895 }, { "epoch": 2.781532603522132, "grad_norm": 0.31188687682151794, "learning_rate": 1.5874198329545398e-07, "loss": 0.3937, "step": 3896 }, { "epoch": 2.782246549262256, "grad_norm": 0.2820137143135071, "learning_rate": 1.5770486180383627e-07, "loss": 0.3329, "step": 3897 }, { "epoch": 2.7829604950023796, "grad_norm": 0.3276325762271881, "learning_rate": 1.5667108509082597e-07, "loss": 0.364, "step": 3898 }, { "epoch": 2.7836744407425034, "grad_norm": 0.29837656021118164, "learning_rate": 1.556406538704963e-07, "loss": 0.3639, "step": 3899 }, { "epoch": 2.784388386482627, "grad_norm": 0.325461208820343, "learning_rate": 1.5461356885461077e-07, "loss": 0.3728, "step": 3900 }, { "epoch": 2.785102332222751, "grad_norm": 0.3138081431388855, "learning_rate": 1.53589830752619e-07, "loss": 0.3396, "step": 3901 }, { "epoch": 2.7858162779628746, "grad_norm": 0.3118140697479248, "learning_rate": 1.5256944027166155e-07, "loss": 0.3692, "step": 3902 }, { "epoch": 2.7865302237029983, "grad_norm": 0.3180035650730133, "learning_rate": 1.5155239811656562e-07, "loss": 0.3998, "step": 3903 }, { "epoch": 2.7872441694431225, "grad_norm": 0.2886551320552826, "learning_rate": 1.505387049898449e-07, "loss": 0.3238, "step": 3904 }, { "epoch": 2.787958115183246, "grad_norm": 0.3178371787071228, "learning_rate": 1.4952836159169982e-07, "loss": 0.4063, "step": 3905 }, { "epoch": 2.78867206092337, "grad_norm": 0.2857176959514618, "learning_rate": 1.4852136862001766e-07, "loss": 0.3235, "step": 3906 }, { "epoch": 2.7893860066634937, "grad_norm": 0.30487060546875, "learning_rate": 1.4751772677037146e-07, "loss": 0.4104, "step": 3907 }, { "epoch": 2.7900999524036174, "grad_norm": 0.2952791750431061, "learning_rate": 1.4651743673601894e-07, "loss": 0.3395, "step": 3908 }, { "epoch": 2.790813898143741, "grad_norm": 0.29363325238227844, "learning_rate": 1.455204992079029e-07, "loss": 0.3654, "step": 3909 }, { "epoch": 2.791527843883865, "grad_norm": 0.28805431723594666, "learning_rate": 1.4452691487465087e-07, "loss": 0.3513, "step": 3910 }, { "epoch": 2.7922417896239886, "grad_norm": 0.28454339504241943, "learning_rate": 1.4353668442257218e-07, "loss": 0.3941, "step": 3911 }, { "epoch": 2.7929557353641123, "grad_norm": 0.29833346605300903, "learning_rate": 1.4254980853566248e-07, "loss": 0.3883, "step": 3912 }, { "epoch": 2.793669681104236, "grad_norm": 0.30358925461769104, "learning_rate": 1.4156628789559924e-07, "loss": 0.3702, "step": 3913 }, { "epoch": 2.79438362684436, "grad_norm": 0.2881988286972046, "learning_rate": 1.4058612318173958e-07, "loss": 0.3717, "step": 3914 }, { "epoch": 2.7950975725844835, "grad_norm": 0.3052288889884949, "learning_rate": 1.3960931507112752e-07, "loss": 0.3656, "step": 3915 }, { "epoch": 2.7958115183246073, "grad_norm": 0.297738641500473, "learning_rate": 1.3863586423848385e-07, "loss": 0.3685, "step": 3916 }, { "epoch": 2.796525464064731, "grad_norm": 0.2931293547153473, "learning_rate": 1.3766577135621296e-07, "loss": 0.3452, "step": 3917 }, { "epoch": 2.7972394098048547, "grad_norm": 0.297857403755188, "learning_rate": 1.3669903709439936e-07, "loss": 0.3477, "step": 3918 }, { "epoch": 2.7979533555449785, "grad_norm": 0.28006646037101746, "learning_rate": 1.357356621208078e-07, "loss": 0.354, "step": 3919 }, { "epoch": 2.798667301285102, "grad_norm": 0.3015158474445343, "learning_rate": 1.3477564710088097e-07, "loss": 0.345, "step": 3920 }, { "epoch": 2.799381247025226, "grad_norm": 0.3026409447193146, "learning_rate": 1.338189926977429e-07, "loss": 0.3781, "step": 3921 }, { "epoch": 2.8000951927653497, "grad_norm": 0.31797653436660767, "learning_rate": 1.3286569957219552e-07, "loss": 0.4261, "step": 3922 }, { "epoch": 2.800809138505474, "grad_norm": 0.27088630199432373, "learning_rate": 1.3191576838271768e-07, "loss": 0.3344, "step": 3923 }, { "epoch": 2.8015230842455976, "grad_norm": 0.2948342561721802, "learning_rate": 1.3096919978546842e-07, "loss": 0.366, "step": 3924 }, { "epoch": 2.8022370299857213, "grad_norm": 0.30651864409446716, "learning_rate": 1.3002599443428243e-07, "loss": 0.3792, "step": 3925 }, { "epoch": 2.802950975725845, "grad_norm": 0.32652926445007324, "learning_rate": 1.290861529806714e-07, "loss": 0.3964, "step": 3926 }, { "epoch": 2.803664921465969, "grad_norm": 0.289427250623703, "learning_rate": 1.2814967607382433e-07, "loss": 0.3361, "step": 3927 }, { "epoch": 2.8043788672060925, "grad_norm": 0.27916446328163147, "learning_rate": 1.2721656436060382e-07, "loss": 0.3622, "step": 3928 }, { "epoch": 2.8050928129462163, "grad_norm": 0.2994278073310852, "learning_rate": 1.2628681848555203e-07, "loss": 0.3484, "step": 3929 }, { "epoch": 2.80580675868634, "grad_norm": 0.31146278977394104, "learning_rate": 1.253604390908819e-07, "loss": 0.3836, "step": 3930 }, { "epoch": 2.8065207044264637, "grad_norm": 0.29927945137023926, "learning_rate": 1.2443742681648441e-07, "loss": 0.3637, "step": 3931 }, { "epoch": 2.8072346501665875, "grad_norm": 0.3102878928184509, "learning_rate": 1.2351778229992228e-07, "loss": 0.3691, "step": 3932 }, { "epoch": 2.807948595906711, "grad_norm": 0.2995920479297638, "learning_rate": 1.226015061764335e-07, "loss": 0.3336, "step": 3933 }, { "epoch": 2.808662541646835, "grad_norm": 0.3170377016067505, "learning_rate": 1.2168859907892904e-07, "loss": 0.355, "step": 3934 }, { "epoch": 2.8093764873869587, "grad_norm": 0.2962314188480377, "learning_rate": 1.2077906163799168e-07, "loss": 0.3432, "step": 3935 }, { "epoch": 2.8100904331270824, "grad_norm": 0.284684956073761, "learning_rate": 1.1987289448187777e-07, "loss": 0.3839, "step": 3936 }, { "epoch": 2.810804378867206, "grad_norm": 0.3038073778152466, "learning_rate": 1.1897009823651662e-07, "loss": 0.3497, "step": 3937 }, { "epoch": 2.81151832460733, "grad_norm": 0.3111512064933777, "learning_rate": 1.1807067352550605e-07, "loss": 0.4045, "step": 3938 }, { "epoch": 2.8122322703474536, "grad_norm": 0.3764260709285736, "learning_rate": 1.1717462097011856e-07, "loss": 0.3445, "step": 3939 }, { "epoch": 2.8129462160875773, "grad_norm": 0.31522512435913086, "learning_rate": 1.1628194118929403e-07, "loss": 0.392, "step": 3940 }, { "epoch": 2.813660161827701, "grad_norm": 0.2930360734462738, "learning_rate": 1.1539263479964535e-07, "loss": 0.3321, "step": 3941 }, { "epoch": 2.814374107567825, "grad_norm": 0.3091942369937897, "learning_rate": 1.1450670241545392e-07, "loss": 0.3796, "step": 3942 }, { "epoch": 2.8150880533079485, "grad_norm": 0.3162194490432739, "learning_rate": 1.1362414464867078e-07, "loss": 0.372, "step": 3943 }, { "epoch": 2.8158019990480723, "grad_norm": 0.2845645546913147, "learning_rate": 1.127449621089155e-07, "loss": 0.3618, "step": 3944 }, { "epoch": 2.816515944788196, "grad_norm": 0.2950558364391327, "learning_rate": 1.1186915540347732e-07, "loss": 0.3796, "step": 3945 }, { "epoch": 2.8172298905283197, "grad_norm": 0.30604612827301025, "learning_rate": 1.1099672513731286e-07, "loss": 0.3874, "step": 3946 }, { "epoch": 2.8179438362684435, "grad_norm": 0.29252833127975464, "learning_rate": 1.101276719130473e-07, "loss": 0.3926, "step": 3947 }, { "epoch": 2.818657782008567, "grad_norm": 0.317072331905365, "learning_rate": 1.0926199633097156e-07, "loss": 0.4049, "step": 3948 }, { "epoch": 2.819371727748691, "grad_norm": 0.2999995946884155, "learning_rate": 1.0839969898904512e-07, "loss": 0.3361, "step": 3949 }, { "epoch": 2.8200856734888147, "grad_norm": 0.32751181721687317, "learning_rate": 1.0754078048289374e-07, "loss": 0.3898, "step": 3950 }, { "epoch": 2.8207996192289384, "grad_norm": 0.28487688302993774, "learning_rate": 1.0668524140580783e-07, "loss": 0.3482, "step": 3951 }, { "epoch": 2.821513564969062, "grad_norm": 0.2982807755470276, "learning_rate": 1.0583308234874523e-07, "loss": 0.3705, "step": 3952 }, { "epoch": 2.822227510709186, "grad_norm": 0.3108339011669159, "learning_rate": 1.0498430390032787e-07, "loss": 0.3835, "step": 3953 }, { "epoch": 2.8229414564493096, "grad_norm": 0.31574100255966187, "learning_rate": 1.041389066468429e-07, "loss": 0.3542, "step": 3954 }, { "epoch": 2.8236554021894333, "grad_norm": 0.30693361163139343, "learning_rate": 1.0329689117224262e-07, "loss": 0.3645, "step": 3955 }, { "epoch": 2.8243693479295575, "grad_norm": 0.3111601173877716, "learning_rate": 1.0245825805814291e-07, "loss": 0.4038, "step": 3956 }, { "epoch": 2.8250832936696813, "grad_norm": 0.29228663444519043, "learning_rate": 1.0162300788382263e-07, "loss": 0.3454, "step": 3957 }, { "epoch": 2.825797239409805, "grad_norm": 0.2987278401851654, "learning_rate": 1.0079114122622413e-07, "loss": 0.3605, "step": 3958 }, { "epoch": 2.8265111851499287, "grad_norm": 0.30149176716804504, "learning_rate": 9.996265865995502e-08, "loss": 0.361, "step": 3959 }, { "epoch": 2.8272251308900525, "grad_norm": 0.28053945302963257, "learning_rate": 9.913756075728088e-08, "loss": 0.3549, "step": 3960 }, { "epoch": 2.827939076630176, "grad_norm": 0.2931215167045593, "learning_rate": 9.8315848088133e-08, "loss": 0.3508, "step": 3961 }, { "epoch": 2.8286530223703, "grad_norm": 0.3159455358982086, "learning_rate": 9.749752122010347e-08, "loss": 0.355, "step": 3962 }, { "epoch": 2.8293669681104237, "grad_norm": 0.331102192401886, "learning_rate": 9.66825807184446e-08, "loss": 0.3588, "step": 3963 }, { "epoch": 2.8300809138505474, "grad_norm": 0.32204729318618774, "learning_rate": 9.587102714607166e-08, "loss": 0.3535, "step": 3964 }, { "epoch": 2.830794859590671, "grad_norm": 0.2800059914588928, "learning_rate": 9.506286106355734e-08, "loss": 0.3555, "step": 3965 }, { "epoch": 2.831508805330795, "grad_norm": 0.2847030758857727, "learning_rate": 9.42580830291373e-08, "loss": 0.3633, "step": 3966 }, { "epoch": 2.8322227510709186, "grad_norm": 0.29922446608543396, "learning_rate": 9.345669359870524e-08, "loss": 0.3891, "step": 3967 }, { "epoch": 2.8329366968110423, "grad_norm": 0.2982969880104065, "learning_rate": 9.265869332581556e-08, "loss": 0.3452, "step": 3968 }, { "epoch": 2.833650642551166, "grad_norm": 0.2680341601371765, "learning_rate": 9.186408276168012e-08, "loss": 0.3528, "step": 3969 }, { "epoch": 2.83436458829129, "grad_norm": 0.28607505559921265, "learning_rate": 9.1072862455171e-08, "loss": 0.3665, "step": 3970 }, { "epoch": 2.8350785340314135, "grad_norm": 0.2893790304660797, "learning_rate": 9.028503295281709e-08, "loss": 0.3814, "step": 3971 }, { "epoch": 2.8357924797715373, "grad_norm": 0.30229854583740234, "learning_rate": 8.950059479880591e-08, "loss": 0.3694, "step": 3972 }, { "epoch": 2.836506425511661, "grad_norm": 0.3041836619377136, "learning_rate": 8.871954853498121e-08, "loss": 0.3944, "step": 3973 }, { "epoch": 2.8372203712517847, "grad_norm": 0.29845723509788513, "learning_rate": 8.794189470084646e-08, "loss": 0.3455, "step": 3974 }, { "epoch": 2.8379343169919085, "grad_norm": 0.2935575246810913, "learning_rate": 8.716763383355863e-08, "loss": 0.3529, "step": 3975 }, { "epoch": 2.8386482627320326, "grad_norm": 0.30347976088523865, "learning_rate": 8.639676646793382e-08, "loss": 0.3748, "step": 3976 }, { "epoch": 2.8393622084721564, "grad_norm": 0.32004985213279724, "learning_rate": 8.562929313644164e-08, "loss": 0.3757, "step": 3977 }, { "epoch": 2.84007615421228, "grad_norm": 0.2899249196052551, "learning_rate": 8.486521436920914e-08, "loss": 0.357, "step": 3978 }, { "epoch": 2.840790099952404, "grad_norm": 0.3054234981536865, "learning_rate": 8.410453069401692e-08, "loss": 0.3405, "step": 3979 }, { "epoch": 2.8415040456925276, "grad_norm": 0.29941344261169434, "learning_rate": 8.334724263630301e-08, "loss": 0.3467, "step": 3980 }, { "epoch": 2.8422179914326513, "grad_norm": 0.2900296151638031, "learning_rate": 8.25933507191573e-08, "loss": 0.3616, "step": 3981 }, { "epoch": 2.842931937172775, "grad_norm": 0.26120689511299133, "learning_rate": 8.184285546332549e-08, "loss": 0.3453, "step": 3982 }, { "epoch": 2.8436458829128988, "grad_norm": 0.2924225330352783, "learning_rate": 8.109575738720621e-08, "loss": 0.3564, "step": 3983 }, { "epoch": 2.8443598286530225, "grad_norm": 0.3105575740337372, "learning_rate": 8.035205700685167e-08, "loss": 0.3909, "step": 3984 }, { "epoch": 2.8450737743931462, "grad_norm": 0.31312569975852966, "learning_rate": 7.961175483596762e-08, "loss": 0.3573, "step": 3985 }, { "epoch": 2.84578772013327, "grad_norm": 0.28318580985069275, "learning_rate": 7.887485138591166e-08, "loss": 0.3543, "step": 3986 }, { "epoch": 2.8465016658733937, "grad_norm": 0.30636337399482727, "learning_rate": 7.8141347165695e-08, "loss": 0.4119, "step": 3987 }, { "epoch": 2.8472156116135174, "grad_norm": 0.2794308364391327, "learning_rate": 7.741124268197952e-08, "loss": 0.3582, "step": 3988 }, { "epoch": 2.847929557353641, "grad_norm": 0.2926535904407501, "learning_rate": 7.668453843907908e-08, "loss": 0.3969, "step": 3989 }, { "epoch": 2.848643503093765, "grad_norm": 0.2826480269432068, "learning_rate": 7.59612349389599e-08, "loss": 0.3611, "step": 3990 }, { "epoch": 2.8493574488338886, "grad_norm": 0.27983716130256653, "learning_rate": 7.52413326812379e-08, "loss": 0.3586, "step": 3991 }, { "epoch": 2.8500713945740124, "grad_norm": 0.28637367486953735, "learning_rate": 7.452483216317973e-08, "loss": 0.3683, "step": 3992 }, { "epoch": 2.850785340314136, "grad_norm": 0.2805359363555908, "learning_rate": 7.381173387970397e-08, "loss": 0.3226, "step": 3993 }, { "epoch": 2.85149928605426, "grad_norm": 0.3101789355278015, "learning_rate": 7.310203832337659e-08, "loss": 0.3961, "step": 3994 }, { "epoch": 2.8522132317943836, "grad_norm": 0.29535412788391113, "learning_rate": 7.23957459844149e-08, "loss": 0.3751, "step": 3995 }, { "epoch": 2.8529271775345073, "grad_norm": 0.31685930490493774, "learning_rate": 7.169285735068531e-08, "loss": 0.393, "step": 3996 }, { "epoch": 2.853641123274631, "grad_norm": 0.30512019991874695, "learning_rate": 7.09933729077017e-08, "loss": 0.3856, "step": 3997 }, { "epoch": 2.8543550690147548, "grad_norm": 0.30056971311569214, "learning_rate": 7.029729313862866e-08, "loss": 0.3253, "step": 3998 }, { "epoch": 2.8550690147548785, "grad_norm": 0.31153038144111633, "learning_rate": 6.960461852427824e-08, "loss": 0.3901, "step": 3999 }, { "epoch": 2.8557829604950022, "grad_norm": 0.28380703926086426, "learning_rate": 6.891534954310886e-08, "loss": 0.3514, "step": 4000 }, { "epoch": 2.856496906235126, "grad_norm": 0.2855680584907532, "learning_rate": 6.822948667122909e-08, "loss": 0.3837, "step": 4001 }, { "epoch": 2.8572108519752497, "grad_norm": 0.2921585440635681, "learning_rate": 6.75470303823933e-08, "loss": 0.3752, "step": 4002 }, { "epoch": 2.8579247977153734, "grad_norm": 0.296125590801239, "learning_rate": 6.68679811480022e-08, "loss": 0.3669, "step": 4003 }, { "epoch": 2.858638743455497, "grad_norm": 0.31194302439689636, "learning_rate": 6.61923394371039e-08, "loss": 0.3695, "step": 4004 }, { "epoch": 2.859352689195621, "grad_norm": 0.28786715865135193, "learning_rate": 6.552010571639456e-08, "loss": 0.3399, "step": 4005 }, { "epoch": 2.8600666349357446, "grad_norm": 0.30232834815979004, "learning_rate": 6.485128045021216e-08, "loss": 0.3472, "step": 4006 }, { "epoch": 2.8607805806758684, "grad_norm": 0.3109003007411957, "learning_rate": 6.418586410054384e-08, "loss": 0.3741, "step": 4007 }, { "epoch": 2.861494526415992, "grad_norm": 0.30633026361465454, "learning_rate": 6.352385712702191e-08, "loss": 0.3977, "step": 4008 }, { "epoch": 2.8622084721561163, "grad_norm": 0.2983093857765198, "learning_rate": 6.286525998692061e-08, "loss": 0.3713, "step": 4009 }, { "epoch": 2.86292241789624, "grad_norm": 0.28383803367614746, "learning_rate": 6.221007313516159e-08, "loss": 0.3953, "step": 4010 }, { "epoch": 2.8636363636363638, "grad_norm": 0.2911945581436157, "learning_rate": 6.15582970243117e-08, "loss": 0.3535, "step": 4011 }, { "epoch": 2.8643503093764875, "grad_norm": 0.3133157193660736, "learning_rate": 6.090993210457807e-08, "loss": 0.3609, "step": 4012 }, { "epoch": 2.8650642551166112, "grad_norm": 0.32112395763397217, "learning_rate": 6.026497882381521e-08, "loss": 0.3867, "step": 4013 }, { "epoch": 2.865778200856735, "grad_norm": 0.3035886585712433, "learning_rate": 5.96234376275201e-08, "loss": 0.3744, "step": 4014 }, { "epoch": 2.8664921465968587, "grad_norm": 0.2940291166305542, "learning_rate": 5.8985308958831033e-08, "loss": 0.3541, "step": 4015 }, { "epoch": 2.8672060923369824, "grad_norm": 0.3086344003677368, "learning_rate": 5.835059325853098e-08, "loss": 0.3424, "step": 4016 }, { "epoch": 2.867920038077106, "grad_norm": 0.2896854877471924, "learning_rate": 5.7719290965045914e-08, "loss": 0.3503, "step": 4017 }, { "epoch": 2.86863398381723, "grad_norm": 0.29668599367141724, "learning_rate": 5.709140251444201e-08, "loss": 0.3929, "step": 4018 }, { "epoch": 2.8693479295573536, "grad_norm": 0.29737210273742676, "learning_rate": 5.646692834042844e-08, "loss": 0.3522, "step": 4019 }, { "epoch": 2.8700618752974774, "grad_norm": 0.30855172872543335, "learning_rate": 5.584586887435739e-08, "loss": 0.3508, "step": 4020 }, { "epoch": 2.870775821037601, "grad_norm": 0.2710255980491638, "learning_rate": 5.5228224545219585e-08, "loss": 0.2755, "step": 4021 }, { "epoch": 2.871489766777725, "grad_norm": 0.33105331659317017, "learning_rate": 5.461399577964877e-08, "loss": 0.4468, "step": 4022 }, { "epoch": 2.8722037125178486, "grad_norm": 0.3074122667312622, "learning_rate": 5.400318300191831e-08, "loss": 0.3505, "step": 4023 }, { "epoch": 2.8729176582579723, "grad_norm": 0.3044474124908447, "learning_rate": 5.339578663394296e-08, "loss": 0.3878, "step": 4024 }, { "epoch": 2.873631603998096, "grad_norm": 0.29707637429237366, "learning_rate": 5.279180709527765e-08, "loss": 0.3729, "step": 4025 }, { "epoch": 2.8743455497382198, "grad_norm": 0.2858903408050537, "learning_rate": 5.219124480311533e-08, "loss": 0.3542, "step": 4026 }, { "epoch": 2.8750594954783435, "grad_norm": 0.2898145318031311, "learning_rate": 5.159410017229083e-08, "loss": 0.3674, "step": 4027 }, { "epoch": 2.8757734412184672, "grad_norm": 0.2955038249492645, "learning_rate": 5.100037361527699e-08, "loss": 0.3876, "step": 4028 }, { "epoch": 2.8764873869585914, "grad_norm": 0.2803758382797241, "learning_rate": 5.041006554218519e-08, "loss": 0.3195, "step": 4029 }, { "epoch": 2.877201332698715, "grad_norm": 0.3063618540763855, "learning_rate": 4.9823176360768166e-08, "loss": 0.4, "step": 4030 }, { "epoch": 2.877915278438839, "grad_norm": 0.29846450686454773, "learning_rate": 4.92397064764133e-08, "loss": 0.3755, "step": 4031 }, { "epoch": 2.8786292241789626, "grad_norm": 0.28261780738830566, "learning_rate": 4.865965629214819e-08, "loss": 0.3648, "step": 4032 }, { "epoch": 2.8793431699190863, "grad_norm": 0.3061164319515228, "learning_rate": 4.8083026208639583e-08, "loss": 0.3649, "step": 4033 }, { "epoch": 2.88005711565921, "grad_norm": 0.3082464039325714, "learning_rate": 4.75098166241883e-08, "loss": 0.3606, "step": 4034 }, { "epoch": 2.880771061399334, "grad_norm": 0.2908499538898468, "learning_rate": 4.694002793473596e-08, "loss": 0.3681, "step": 4035 }, { "epoch": 2.8814850071394575, "grad_norm": 0.3145562410354614, "learning_rate": 4.6373660533859945e-08, "loss": 0.3612, "step": 4036 }, { "epoch": 2.8821989528795813, "grad_norm": 0.29564693570137024, "learning_rate": 4.581071481277288e-08, "loss": 0.3458, "step": 4037 }, { "epoch": 2.882912898619705, "grad_norm": 0.2876262068748474, "learning_rate": 4.52511911603265e-08, "loss": 0.3492, "step": 4038 }, { "epoch": 2.8836268443598287, "grad_norm": 0.2912510931491852, "learning_rate": 4.469508996300664e-08, "loss": 0.3695, "step": 4039 }, { "epoch": 2.8843407900999525, "grad_norm": 0.2950400710105896, "learning_rate": 4.41424116049366e-08, "loss": 0.3813, "step": 4040 }, { "epoch": 2.885054735840076, "grad_norm": 0.310129851102829, "learning_rate": 4.3593156467873765e-08, "loss": 0.3807, "step": 4041 }, { "epoch": 2.8857686815802, "grad_norm": 0.33167052268981934, "learning_rate": 4.3047324931213555e-08, "loss": 0.3822, "step": 4042 }, { "epoch": 2.8864826273203237, "grad_norm": 0.28467097878456116, "learning_rate": 4.250491737198381e-08, "loss": 0.3336, "step": 4043 }, { "epoch": 2.8871965730604474, "grad_norm": 0.31464964151382446, "learning_rate": 4.196593416484873e-08, "loss": 0.4021, "step": 4044 }, { "epoch": 2.887910518800571, "grad_norm": 0.3144361972808838, "learning_rate": 4.1430375682107174e-08, "loss": 0.3687, "step": 4045 }, { "epoch": 2.888624464540695, "grad_norm": 0.3030063509941101, "learning_rate": 4.0898242293691546e-08, "loss": 0.4185, "step": 4046 }, { "epoch": 2.8893384102808186, "grad_norm": 0.27903440594673157, "learning_rate": 4.036953436716895e-08, "loss": 0.344, "step": 4047 }, { "epoch": 2.8900523560209423, "grad_norm": 0.2965116500854492, "learning_rate": 3.9844252267741136e-08, "loss": 0.3864, "step": 4048 }, { "epoch": 2.890766301761066, "grad_norm": 0.3105154037475586, "learning_rate": 3.932239635824231e-08, "loss": 0.3529, "step": 4049 }, { "epoch": 2.89148024750119, "grad_norm": 0.3090529143810272, "learning_rate": 3.8803966999139686e-08, "loss": 0.3816, "step": 4050 }, { "epoch": 2.8921941932413135, "grad_norm": 0.326323539018631, "learning_rate": 3.82889645485357e-08, "loss": 0.397, "step": 4051 }, { "epoch": 2.8929081389814373, "grad_norm": 0.2921806275844574, "learning_rate": 3.777738936216358e-08, "loss": 0.3627, "step": 4052 }, { "epoch": 2.893622084721561, "grad_norm": 0.3116381764411926, "learning_rate": 3.726924179339009e-08, "loss": 0.3194, "step": 4053 }, { "epoch": 2.8943360304616848, "grad_norm": 0.3331836462020874, "learning_rate": 3.676452219321447e-08, "loss": 0.3778, "step": 4054 }, { "epoch": 2.8950499762018085, "grad_norm": 0.3125240206718445, "learning_rate": 3.626323091026785e-08, "loss": 0.3853, "step": 4055 }, { "epoch": 2.895763921941932, "grad_norm": 0.2981298565864563, "learning_rate": 3.576536829081323e-08, "loss": 0.3878, "step": 4056 }, { "epoch": 2.896477867682056, "grad_norm": 0.3085692524909973, "learning_rate": 3.527093467874609e-08, "loss": 0.3562, "step": 4057 }, { "epoch": 2.8971918134221797, "grad_norm": 0.2861967980861664, "learning_rate": 3.477993041559213e-08, "loss": 0.3529, "step": 4058 }, { "epoch": 2.8979057591623034, "grad_norm": 0.31298914551734924, "learning_rate": 3.429235584050894e-08, "loss": 0.3722, "step": 4059 }, { "epoch": 2.898619704902427, "grad_norm": 0.2981667220592499, "learning_rate": 3.3808211290284886e-08, "loss": 0.3554, "step": 4060 }, { "epoch": 2.8993336506425513, "grad_norm": 0.29355674982070923, "learning_rate": 3.332749709933969e-08, "loss": 0.3608, "step": 4061 }, { "epoch": 2.900047596382675, "grad_norm": 0.29736241698265076, "learning_rate": 3.285021359972218e-08, "loss": 0.3855, "step": 4062 }, { "epoch": 2.900761542122799, "grad_norm": 0.3185316026210785, "learning_rate": 3.2376361121112534e-08, "loss": 0.3506, "step": 4063 }, { "epoch": 2.9014754878629225, "grad_norm": 0.3075784742832184, "learning_rate": 3.190593999082114e-08, "loss": 0.3682, "step": 4064 }, { "epoch": 2.9021894336030463, "grad_norm": 0.2970426380634308, "learning_rate": 3.143895053378698e-08, "loss": 0.3883, "step": 4065 }, { "epoch": 2.90290337934317, "grad_norm": 0.31235459446907043, "learning_rate": 3.097539307258035e-08, "loss": 0.3768, "step": 4066 }, { "epoch": 2.9036173250832937, "grad_norm": 0.29221293330192566, "learning_rate": 3.0515267927400116e-08, "loss": 0.3342, "step": 4067 }, { "epoch": 2.9043312708234175, "grad_norm": 0.31332316994667053, "learning_rate": 3.005857541607371e-08, "loss": 0.4031, "step": 4068 }, { "epoch": 2.905045216563541, "grad_norm": 0.2921530604362488, "learning_rate": 2.9605315854058236e-08, "loss": 0.3543, "step": 4069 }, { "epoch": 2.905759162303665, "grad_norm": 0.2932598888874054, "learning_rate": 2.9155489554439364e-08, "loss": 0.3604, "step": 4070 }, { "epoch": 2.9064731080437887, "grad_norm": 0.2865261733531952, "learning_rate": 2.8709096827930773e-08, "loss": 0.3732, "step": 4071 }, { "epoch": 2.9071870537839124, "grad_norm": 0.29261383414268494, "learning_rate": 2.8266137982875807e-08, "loss": 0.3847, "step": 4072 }, { "epoch": 2.907900999524036, "grad_norm": 0.294680655002594, "learning_rate": 2.7826613325243613e-08, "loss": 0.3754, "step": 4073 }, { "epoch": 2.90861494526416, "grad_norm": 0.2924196422100067, "learning_rate": 2.7390523158633552e-08, "loss": 0.3752, "step": 4074 }, { "epoch": 2.9093288910042836, "grad_norm": 0.31545501947402954, "learning_rate": 2.6957867784270787e-08, "loss": 0.3606, "step": 4075 }, { "epoch": 2.9100428367444073, "grad_norm": 0.2918698191642761, "learning_rate": 2.6528647501009585e-08, "loss": 0.3242, "step": 4076 }, { "epoch": 2.910756782484531, "grad_norm": 0.29167595505714417, "learning_rate": 2.6102862605330016e-08, "loss": 0.3661, "step": 4077 }, { "epoch": 2.911470728224655, "grad_norm": 0.29906314611434937, "learning_rate": 2.5680513391340144e-08, "loss": 0.4, "step": 4078 }, { "epoch": 2.9121846739647785, "grad_norm": 0.2863021492958069, "learning_rate": 2.5261600150773836e-08, "loss": 0.3428, "step": 4079 }, { "epoch": 2.9128986197049023, "grad_norm": 0.309415727853775, "learning_rate": 2.4846123172992953e-08, "loss": 0.3877, "step": 4080 }, { "epoch": 2.9136125654450264, "grad_norm": 0.29645100235939026, "learning_rate": 2.44340827449846e-08, "loss": 0.3454, "step": 4081 }, { "epoch": 2.91432651118515, "grad_norm": 0.31893154978752136, "learning_rate": 2.4025479151363327e-08, "loss": 0.4118, "step": 4082 }, { "epoch": 2.915040456925274, "grad_norm": 0.301846981048584, "learning_rate": 2.3620312674367818e-08, "loss": 0.3896, "step": 4083 }, { "epoch": 2.9157544026653976, "grad_norm": 0.2998887598514557, "learning_rate": 2.3218583593864196e-08, "loss": 0.3785, "step": 4084 }, { "epoch": 2.9164683484055214, "grad_norm": 0.29651299118995667, "learning_rate": 2.2820292187344384e-08, "loss": 0.3663, "step": 4085 }, { "epoch": 2.917182294145645, "grad_norm": 0.30653518438339233, "learning_rate": 2.242543872992442e-08, "loss": 0.3831, "step": 4086 }, { "epoch": 2.917896239885769, "grad_norm": 0.287043035030365, "learning_rate": 2.203402349434669e-08, "loss": 0.3551, "step": 4087 }, { "epoch": 2.9186101856258926, "grad_norm": 0.28153863549232483, "learning_rate": 2.1646046750978255e-08, "loss": 0.3674, "step": 4088 }, { "epoch": 2.9193241313660163, "grad_norm": 0.30492496490478516, "learning_rate": 2.1261508767810856e-08, "loss": 0.3824, "step": 4089 }, { "epoch": 2.92003807710614, "grad_norm": 0.2987598180770874, "learning_rate": 2.088040981046091e-08, "loss": 0.3677, "step": 4090 }, { "epoch": 2.920752022846264, "grad_norm": 0.3143036663532257, "learning_rate": 2.0502750142170624e-08, "loss": 0.3251, "step": 4091 }, { "epoch": 2.9214659685863875, "grad_norm": 0.3007151782512665, "learning_rate": 2.012853002380466e-08, "loss": 0.3837, "step": 4092 }, { "epoch": 2.9221799143265113, "grad_norm": 0.3009974956512451, "learning_rate": 1.975774971385347e-08, "loss": 0.4108, "step": 4093 }, { "epoch": 2.922893860066635, "grad_norm": 0.29071536660194397, "learning_rate": 1.939040946842996e-08, "loss": 0.346, "step": 4094 }, { "epoch": 2.9236078058067587, "grad_norm": 0.3091032803058624, "learning_rate": 1.9026509541272276e-08, "loss": 0.4029, "step": 4095 }, { "epoch": 2.9243217515468825, "grad_norm": 0.314525842666626, "learning_rate": 1.8666050183741013e-08, "loss": 0.3611, "step": 4096 }, { "epoch": 2.925035697287006, "grad_norm": 0.3078586459159851, "learning_rate": 1.8309031644821452e-08, "loss": 0.3667, "step": 4097 }, { "epoch": 2.92574964302713, "grad_norm": 0.2910337448120117, "learning_rate": 1.7955454171120766e-08, "loss": 0.3605, "step": 4098 }, { "epoch": 2.9264635887672537, "grad_norm": 0.3213263750076294, "learning_rate": 1.760531800686971e-08, "loss": 0.3762, "step": 4099 }, { "epoch": 2.9271775345073774, "grad_norm": 0.3135104477405548, "learning_rate": 1.725862339392259e-08, "loss": 0.3586, "step": 4100 }, { "epoch": 2.927891480247501, "grad_norm": 0.3039046823978424, "learning_rate": 1.6915370571756185e-08, "loss": 0.3755, "step": 4101 }, { "epoch": 2.928605425987625, "grad_norm": 0.30872318148612976, "learning_rate": 1.657555977746972e-08, "loss": 0.3689, "step": 4102 }, { "epoch": 2.9293193717277486, "grad_norm": 0.3062438666820526, "learning_rate": 1.623919124578488e-08, "loss": 0.3646, "step": 4103 }, { "epoch": 2.9300333174678723, "grad_norm": 0.30002203583717346, "learning_rate": 1.590626520904526e-08, "loss": 0.3955, "step": 4104 }, { "epoch": 2.930747263207996, "grad_norm": 0.31022265553474426, "learning_rate": 1.5576781897218006e-08, "loss": 0.3659, "step": 4105 }, { "epoch": 2.93146120894812, "grad_norm": 0.3073256313800812, "learning_rate": 1.5250741537889968e-08, "loss": 0.3787, "step": 4106 }, { "epoch": 2.9321751546882435, "grad_norm": 0.3139854967594147, "learning_rate": 1.4928144356272102e-08, "loss": 0.3702, "step": 4107 }, { "epoch": 2.9328891004283673, "grad_norm": 0.26805540919303894, "learning_rate": 1.4608990575195048e-08, "loss": 0.329, "step": 4108 }, { "epoch": 2.933603046168491, "grad_norm": 0.3089768588542938, "learning_rate": 1.429328041511302e-08, "loss": 0.3646, "step": 4109 }, { "epoch": 2.9343169919086147, "grad_norm": 0.3060535192489624, "learning_rate": 1.3981014094099354e-08, "loss": 0.3738, "step": 4110 }, { "epoch": 2.9350309376487385, "grad_norm": 0.27764949202537537, "learning_rate": 1.3672191827849846e-08, "loss": 0.3437, "step": 4111 }, { "epoch": 2.935744883388862, "grad_norm": 0.2817898988723755, "learning_rate": 1.3366813829681636e-08, "loss": 0.3607, "step": 4112 }, { "epoch": 2.936458829128986, "grad_norm": 0.2730650007724762, "learning_rate": 1.3064880310531548e-08, "loss": 0.3748, "step": 4113 }, { "epoch": 2.93717277486911, "grad_norm": 0.29343491792678833, "learning_rate": 1.2766391478958862e-08, "loss": 0.3611, "step": 4114 }, { "epoch": 2.937886720609234, "grad_norm": 0.28810158371925354, "learning_rate": 1.2471347541140876e-08, "loss": 0.362, "step": 4115 }, { "epoch": 2.9386006663493576, "grad_norm": 0.29350900650024414, "learning_rate": 1.2179748700879013e-08, "loss": 0.3844, "step": 4116 }, { "epoch": 2.9393146120894813, "grad_norm": 0.30602988600730896, "learning_rate": 1.189159515959104e-08, "loss": 0.394, "step": 4117 }, { "epoch": 2.940028557829605, "grad_norm": 0.30116716027259827, "learning_rate": 1.1606887116317744e-08, "loss": 0.3573, "step": 4118 }, { "epoch": 2.9407425035697288, "grad_norm": 0.3191863000392914, "learning_rate": 1.132562476771959e-08, "loss": 0.406, "step": 4119 }, { "epoch": 2.9414564493098525, "grad_norm": 0.3122248947620392, "learning_rate": 1.1047808308075059e-08, "loss": 0.342, "step": 4120 }, { "epoch": 2.9421703950499762, "grad_norm": 0.31657132506370544, "learning_rate": 1.0773437929285091e-08, "loss": 0.3827, "step": 4121 }, { "epoch": 2.9428843407901, "grad_norm": 0.30848103761672974, "learning_rate": 1.0502513820868088e-08, "loss": 0.3975, "step": 4122 }, { "epoch": 2.9435982865302237, "grad_norm": 0.2856195867061615, "learning_rate": 1.0235036169963241e-08, "loss": 0.3537, "step": 4123 }, { "epoch": 2.9443122322703474, "grad_norm": 0.2787690758705139, "learning_rate": 9.971005161327763e-09, "loss": 0.3368, "step": 4124 }, { "epoch": 2.945026178010471, "grad_norm": 0.30873897671699524, "learning_rate": 9.710420977340763e-09, "loss": 0.3829, "step": 4125 }, { "epoch": 2.945740123750595, "grad_norm": 0.30935895442962646, "learning_rate": 9.453283797997147e-09, "loss": 0.3791, "step": 4126 }, { "epoch": 2.9464540694907186, "grad_norm": 0.28779593110084534, "learning_rate": 9.199593800913731e-09, "loss": 0.3473, "step": 4127 }, { "epoch": 2.9471680152308424, "grad_norm": 0.30554649233818054, "learning_rate": 8.949351161324227e-09, "loss": 0.359, "step": 4128 }, { "epoch": 2.947881960970966, "grad_norm": 0.30918651819229126, "learning_rate": 8.702556052082034e-09, "loss": 0.3527, "step": 4129 }, { "epoch": 2.94859590671109, "grad_norm": 0.3200169801712036, "learning_rate": 8.459208643659122e-09, "loss": 0.4057, "step": 4130 }, { "epoch": 2.9493098524512136, "grad_norm": 0.3170667290687561, "learning_rate": 8.219309104145478e-09, "loss": 0.3748, "step": 4131 }, { "epoch": 2.9500237981913373, "grad_norm": 0.29655998945236206, "learning_rate": 7.982857599250216e-09, "loss": 0.3263, "step": 4132 }, { "epoch": 2.950737743931461, "grad_norm": 0.3056013882160187, "learning_rate": 7.749854292300462e-09, "loss": 0.3568, "step": 4133 }, { "epoch": 2.951451689671585, "grad_norm": 0.32344985008239746, "learning_rate": 7.520299344241366e-09, "loss": 0.3548, "step": 4134 }, { "epoch": 2.952165635411709, "grad_norm": 0.30256274342536926, "learning_rate": 7.294192913636089e-09, "loss": 0.389, "step": 4135 }, { "epoch": 2.9528795811518327, "grad_norm": 0.30093616247177124, "learning_rate": 7.071535156666365e-09, "loss": 0.3576, "step": 4136 }, { "epoch": 2.9535935268919564, "grad_norm": 0.30580538511276245, "learning_rate": 6.852326227130835e-09, "loss": 0.3831, "step": 4137 }, { "epoch": 2.95430747263208, "grad_norm": 0.3115174472332001, "learning_rate": 6.6365662764467095e-09, "loss": 0.3605, "step": 4138 }, { "epoch": 2.955021418372204, "grad_norm": 0.2973778247833252, "learning_rate": 6.424255453648109e-09, "loss": 0.3658, "step": 4139 }, { "epoch": 2.9557353641123276, "grad_norm": 0.3145565390586853, "learning_rate": 6.215393905388278e-09, "loss": 0.3797, "step": 4140 }, { "epoch": 2.9564493098524514, "grad_norm": 0.30767589807510376, "learning_rate": 6.009981775935703e-09, "loss": 0.3502, "step": 4141 }, { "epoch": 2.957163255592575, "grad_norm": 0.2916697859764099, "learning_rate": 5.80801920717744e-09, "loss": 0.3501, "step": 4142 }, { "epoch": 2.957877201332699, "grad_norm": 0.28827783465385437, "learning_rate": 5.609506338617454e-09, "loss": 0.3574, "step": 4143 }, { "epoch": 2.9585911470728226, "grad_norm": 0.2837104797363281, "learning_rate": 5.414443307377171e-09, "loss": 0.3519, "step": 4144 }, { "epoch": 2.9593050928129463, "grad_norm": 0.2797496020793915, "learning_rate": 5.222830248195476e-09, "loss": 0.3264, "step": 4145 }, { "epoch": 2.96001903855307, "grad_norm": 0.3090255856513977, "learning_rate": 5.034667293427053e-09, "loss": 0.4206, "step": 4146 }, { "epoch": 2.9607329842931938, "grad_norm": 0.2901042401790619, "learning_rate": 4.849954573043492e-09, "loss": 0.3576, "step": 4147 }, { "epoch": 2.9614469300333175, "grad_norm": 0.29129812121391296, "learning_rate": 4.6686922146349515e-09, "loss": 0.3285, "step": 4148 }, { "epoch": 2.9621608757734412, "grad_norm": 0.30441591143608093, "learning_rate": 4.490880343405724e-09, "loss": 0.3874, "step": 4149 }, { "epoch": 2.962874821513565, "grad_norm": 0.2982679307460785, "learning_rate": 4.316519082179227e-09, "loss": 0.3927, "step": 4150 }, { "epoch": 2.9635887672536887, "grad_norm": 0.2749045789241791, "learning_rate": 4.145608551393565e-09, "loss": 0.3601, "step": 4151 }, { "epoch": 2.9643027129938124, "grad_norm": 0.3097539246082306, "learning_rate": 3.978148869103748e-09, "loss": 0.3849, "step": 4152 }, { "epoch": 2.965016658733936, "grad_norm": 0.2841283679008484, "learning_rate": 3.814140150981693e-09, "loss": 0.3763, "step": 4153 }, { "epoch": 2.96573060447406, "grad_norm": 0.28789088129997253, "learning_rate": 3.6535825103145573e-09, "loss": 0.3348, "step": 4154 }, { "epoch": 2.9664445502141836, "grad_norm": 0.31086236238479614, "learning_rate": 3.496476058006959e-09, "loss": 0.4071, "step": 4155 }, { "epoch": 2.9671584959543074, "grad_norm": 0.3198659121990204, "learning_rate": 3.3428209025793135e-09, "loss": 0.4083, "step": 4156 }, { "epoch": 2.967872441694431, "grad_norm": 0.2744852304458618, "learning_rate": 3.192617150166166e-09, "loss": 0.3486, "step": 4157 }, { "epoch": 2.968586387434555, "grad_norm": 0.2967083752155304, "learning_rate": 3.0458649045211897e-09, "loss": 0.3874, "step": 4158 }, { "epoch": 2.9693003331746786, "grad_norm": 0.2966892123222351, "learning_rate": 2.9025642670121866e-09, "loss": 0.367, "step": 4159 }, { "epoch": 2.9700142789148023, "grad_norm": 0.2850406765937805, "learning_rate": 2.7627153366222014e-09, "loss": 0.3481, "step": 4160 }, { "epoch": 2.970728224654926, "grad_norm": 0.27064603567123413, "learning_rate": 2.626318209951184e-09, "loss": 0.3412, "step": 4161 }, { "epoch": 2.9714421703950498, "grad_norm": 0.3141019642353058, "learning_rate": 2.493372981214326e-09, "loss": 0.4028, "step": 4162 }, { "epoch": 2.9721561161351735, "grad_norm": 0.28853628039360046, "learning_rate": 2.363879742243169e-09, "loss": 0.3613, "step": 4163 }, { "epoch": 2.9728700618752972, "grad_norm": 0.2989036440849304, "learning_rate": 2.237838582483387e-09, "loss": 0.3554, "step": 4164 }, { "epoch": 2.973584007615421, "grad_norm": 0.27963072061538696, "learning_rate": 2.1152495889970035e-09, "loss": 0.382, "step": 4165 }, { "epoch": 2.974297953355545, "grad_norm": 0.4804932475090027, "learning_rate": 1.9961128464623947e-09, "loss": 0.3688, "step": 4166 }, { "epoch": 2.975011899095669, "grad_norm": 0.3062093257904053, "learning_rate": 1.880428437170956e-09, "loss": 0.3748, "step": 4167 }, { "epoch": 2.9757258448357926, "grad_norm": 0.2991393208503723, "learning_rate": 1.7681964410320995e-09, "loss": 0.351, "step": 4168 }, { "epoch": 2.9764397905759163, "grad_norm": 0.3044251799583435, "learning_rate": 1.6594169355682587e-09, "loss": 0.4658, "step": 4169 }, { "epoch": 2.97715373631604, "grad_norm": 0.2792556583881378, "learning_rate": 1.5540899959187727e-09, "loss": 0.3299, "step": 4170 }, { "epoch": 2.977867682056164, "grad_norm": 0.3030499517917633, "learning_rate": 1.4522156948365563e-09, "loss": 0.3737, "step": 4171 }, { "epoch": 2.9785816277962875, "grad_norm": 0.30088430643081665, "learning_rate": 1.3537941026914302e-09, "loss": 0.3827, "step": 4172 }, { "epoch": 2.9792955735364113, "grad_norm": 0.32762452960014343, "learning_rate": 1.2588252874673469e-09, "loss": 0.3992, "step": 4173 }, { "epoch": 2.980009519276535, "grad_norm": 0.3005044460296631, "learning_rate": 1.1673093147623882e-09, "loss": 0.3718, "step": 4174 }, { "epoch": 2.9807234650166587, "grad_norm": 0.2889971435070038, "learning_rate": 1.0792462477909881e-09, "loss": 0.3547, "step": 4175 }, { "epoch": 2.9814374107567825, "grad_norm": 0.30376550555229187, "learning_rate": 9.946361473822664e-10, "loss": 0.3814, "step": 4176 }, { "epoch": 2.982151356496906, "grad_norm": 0.28302001953125, "learning_rate": 9.134790719800279e-10, "loss": 0.3414, "step": 4177 }, { "epoch": 2.98286530223703, "grad_norm": 0.31932854652404785, "learning_rate": 8.357750776427643e-10, "loss": 0.3663, "step": 4178 }, { "epoch": 2.9835792479771537, "grad_norm": 0.3090726137161255, "learning_rate": 7.615242180436521e-10, "loss": 0.3294, "step": 4179 }, { "epoch": 2.9842931937172774, "grad_norm": 0.30943745374679565, "learning_rate": 6.907265444716649e-10, "loss": 0.3804, "step": 4180 }, { "epoch": 2.985007139457401, "grad_norm": 0.2823165953159332, "learning_rate": 6.233821058287959e-10, "loss": 0.3586, "step": 4181 }, { "epoch": 2.985721085197525, "grad_norm": 0.29489150643348694, "learning_rate": 5.594909486328348e-10, "loss": 0.3943, "step": 4182 }, { "epoch": 2.9864350309376486, "grad_norm": 0.3001711666584015, "learning_rate": 4.990531170168123e-10, "loss": 0.3687, "step": 4183 }, { "epoch": 2.9871489766777723, "grad_norm": 0.3157098889350891, "learning_rate": 4.4206865272733436e-10, "loss": 0.3831, "step": 4184 }, { "epoch": 2.987862922417896, "grad_norm": 0.2860705256462097, "learning_rate": 3.885375951256931e-10, "loss": 0.3613, "step": 4185 }, { "epoch": 2.9885768681580203, "grad_norm": 0.30765339732170105, "learning_rate": 3.384599811889766e-10, "loss": 0.3482, "step": 4186 }, { "epoch": 2.989290813898144, "grad_norm": 0.3248096704483032, "learning_rate": 2.918358455067383e-10, "loss": 0.4231, "step": 4187 }, { "epoch": 2.9900047596382677, "grad_norm": 0.28265517950057983, "learning_rate": 2.486652202848827e-10, "loss": 0.3682, "step": 4188 }, { "epoch": 2.9907187053783915, "grad_norm": 0.2769085466861725, "learning_rate": 2.0894813534289015e-10, "loss": 0.3479, "step": 4189 }, { "epoch": 2.991432651118515, "grad_norm": 0.2951517105102539, "learning_rate": 1.7268461811548176e-10, "loss": 0.3504, "step": 4190 }, { "epoch": 2.992146596858639, "grad_norm": 0.2968785762786865, "learning_rate": 1.398746936509543e-10, "loss": 0.337, "step": 4191 }, { "epoch": 2.9928605425987627, "grad_norm": 0.2969133257865906, "learning_rate": 1.1051838461340059e-10, "loss": 0.3768, "step": 4192 }, { "epoch": 2.9935744883388864, "grad_norm": 0.2951071560382843, "learning_rate": 8.461571127882373e-11, "loss": 0.3861, "step": 4193 }, { "epoch": 2.99428843407901, "grad_norm": 0.29289186000823975, "learning_rate": 6.216669154068822e-11, "loss": 0.3467, "step": 4194 }, { "epoch": 2.995002379819134, "grad_norm": 0.31209808588027954, "learning_rate": 4.317134090547903e-11, "loss": 0.3992, "step": 4195 }, { "epoch": 2.9957163255592576, "grad_norm": 0.29162687063217163, "learning_rate": 2.7629672493256766e-11, "loss": 0.3388, "step": 4196 }, { "epoch": 2.9964302712993813, "grad_norm": 0.32298973202705383, "learning_rate": 1.5541697039878067e-11, "loss": 0.3832, "step": 4197 }, { "epoch": 2.997144217039505, "grad_norm": 0.29815611243247986, "learning_rate": 6.907422894775195e-12, "loss": 0.3908, "step": 4198 }, { "epoch": 2.997858162779629, "grad_norm": 0.31458815932273865, "learning_rate": 1.7268560220662366e-12, "loss": 0.3642, "step": 4199 }, { "epoch": 2.9985721085197525, "grad_norm": 0.3017503321170807, "learning_rate": 0.0, "loss": 0.3458, "step": 4200 }, { "epoch": 2.9985721085197525, "step": 4200, "total_flos": 5158286395867136.0, "train_loss": 0.4208235884138516, "train_runtime": 82361.54, "train_samples_per_second": 4.897, "train_steps_per_second": 0.051 } ], "logging_steps": 1.0, "max_steps": 4200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5158286395867136.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }