{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 18516, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016202203499675956, "grad_norm": 9.86984634399414, "learning_rate": 8.992805755395684e-09, "loss": 0.5557, "step": 1 }, { "epoch": 0.0003240440699935191, "grad_norm": 8.46564769744873, "learning_rate": 1.798561151079137e-08, "loss": 0.5138, "step": 2 }, { "epoch": 0.0004860661049902787, "grad_norm": 8.7678861618042, "learning_rate": 2.6978417266187054e-08, "loss": 0.5389, "step": 3 }, { "epoch": 0.0006480881399870382, "grad_norm": 8.85426139831543, "learning_rate": 3.597122302158274e-08, "loss": 0.5323, "step": 4 }, { "epoch": 0.0008101101749837978, "grad_norm": 8.901697158813477, "learning_rate": 4.496402877697842e-08, "loss": 0.5217, "step": 5 }, { "epoch": 0.0009721322099805574, "grad_norm": 8.482805252075195, "learning_rate": 5.395683453237411e-08, "loss": 0.5235, "step": 6 }, { "epoch": 0.001134154244977317, "grad_norm": 8.641008377075195, "learning_rate": 6.294964028776979e-08, "loss": 0.5277, "step": 7 }, { "epoch": 0.0012961762799740765, "grad_norm": 9.918697357177734, "learning_rate": 7.194244604316547e-08, "loss": 0.5525, "step": 8 }, { "epoch": 0.001458198314970836, "grad_norm": 8.959720611572266, "learning_rate": 8.093525179856116e-08, "loss": 0.5073, "step": 9 }, { "epoch": 0.0016202203499675956, "grad_norm": 8.834793090820312, "learning_rate": 8.992805755395684e-08, "loss": 0.529, "step": 10 }, { "epoch": 0.0017822423849643552, "grad_norm": 8.80196762084961, "learning_rate": 9.892086330935252e-08, "loss": 0.5193, "step": 11 }, { "epoch": 0.0019442644199611147, "grad_norm": 8.514031410217285, "learning_rate": 1.0791366906474822e-07, "loss": 0.5291, "step": 12 }, { "epoch": 0.002106286454957874, "grad_norm": 8.85981559753418, "learning_rate": 1.169064748201439e-07, "loss": 0.5294, "step": 13 }, { "epoch": 0.002268308489954634, "grad_norm": 8.460282325744629, "learning_rate": 1.2589928057553958e-07, "loss": 0.5154, "step": 14 }, { "epoch": 0.002430330524951393, "grad_norm": 9.40372371673584, "learning_rate": 1.3489208633093525e-07, "loss": 0.5348, "step": 15 }, { "epoch": 0.002592352559948153, "grad_norm": 8.342942237854004, "learning_rate": 1.4388489208633095e-07, "loss": 0.5128, "step": 16 }, { "epoch": 0.0027543745949449123, "grad_norm": 8.372694969177246, "learning_rate": 1.5287769784172664e-07, "loss": 0.5387, "step": 17 }, { "epoch": 0.002916396629941672, "grad_norm": 7.830298900604248, "learning_rate": 1.618705035971223e-07, "loss": 0.4864, "step": 18 }, { "epoch": 0.0030784186649384314, "grad_norm": 7.605618000030518, "learning_rate": 1.70863309352518e-07, "loss": 0.5016, "step": 19 }, { "epoch": 0.0032404406999351912, "grad_norm": 7.3603997230529785, "learning_rate": 1.7985611510791368e-07, "loss": 0.4962, "step": 20 }, { "epoch": 0.0034024627349319506, "grad_norm": 8.310635566711426, "learning_rate": 1.8884892086330937e-07, "loss": 0.4981, "step": 21 }, { "epoch": 0.0035644847699287103, "grad_norm": 7.44785737991333, "learning_rate": 1.9784172661870504e-07, "loss": 0.4864, "step": 22 }, { "epoch": 0.0037265068049254697, "grad_norm": 6.7629008293151855, "learning_rate": 2.0683453237410074e-07, "loss": 0.4597, "step": 23 }, { "epoch": 0.0038885288399222295, "grad_norm": 6.164111137390137, "learning_rate": 2.1582733812949643e-07, "loss": 0.4475, "step": 24 }, { "epoch": 0.004050550874918989, "grad_norm": 6.299708843231201, "learning_rate": 2.248201438848921e-07, "loss": 0.4362, "step": 25 }, { "epoch": 0.004212572909915748, "grad_norm": 6.691930294036865, "learning_rate": 2.338129496402878e-07, "loss": 0.4647, "step": 26 }, { "epoch": 0.004374594944912508, "grad_norm": 6.3759846687316895, "learning_rate": 2.428057553956835e-07, "loss": 0.4502, "step": 27 }, { "epoch": 0.004536616979909268, "grad_norm": 6.641443729400635, "learning_rate": 2.5179856115107916e-07, "loss": 0.4535, "step": 28 }, { "epoch": 0.004698639014906027, "grad_norm": 6.435218811035156, "learning_rate": 2.6079136690647483e-07, "loss": 0.4315, "step": 29 }, { "epoch": 0.004860661049902786, "grad_norm": 5.86301851272583, "learning_rate": 2.697841726618705e-07, "loss": 0.4106, "step": 30 }, { "epoch": 0.005022683084899547, "grad_norm": 5.353550910949707, "learning_rate": 2.787769784172662e-07, "loss": 0.3607, "step": 31 }, { "epoch": 0.005184705119896306, "grad_norm": 4.887706756591797, "learning_rate": 2.877697841726619e-07, "loss": 0.36, "step": 32 }, { "epoch": 0.005346727154893065, "grad_norm": 4.950960159301758, "learning_rate": 2.9676258992805756e-07, "loss": 0.3418, "step": 33 }, { "epoch": 0.005508749189889825, "grad_norm": 5.3128581047058105, "learning_rate": 3.057553956834533e-07, "loss": 0.3229, "step": 34 }, { "epoch": 0.005670771224886585, "grad_norm": 4.85807466506958, "learning_rate": 3.1474820143884896e-07, "loss": 0.2946, "step": 35 }, { "epoch": 0.005832793259883344, "grad_norm": 4.823163986206055, "learning_rate": 3.237410071942446e-07, "loss": 0.304, "step": 36 }, { "epoch": 0.0059948152948801035, "grad_norm": 4.9618239402771, "learning_rate": 3.3273381294964035e-07, "loss": 0.3032, "step": 37 }, { "epoch": 0.006156837329876863, "grad_norm": 4.626596927642822, "learning_rate": 3.41726618705036e-07, "loss": 0.272, "step": 38 }, { "epoch": 0.006318859364873623, "grad_norm": 3.9511983394622803, "learning_rate": 3.5071942446043163e-07, "loss": 0.2683, "step": 39 }, { "epoch": 0.0064808813998703824, "grad_norm": 3.7039501667022705, "learning_rate": 3.5971223021582736e-07, "loss": 0.2829, "step": 40 }, { "epoch": 0.006642903434867142, "grad_norm": 3.3785510063171387, "learning_rate": 3.68705035971223e-07, "loss": 0.2598, "step": 41 }, { "epoch": 0.006804925469863901, "grad_norm": 3.1743481159210205, "learning_rate": 3.7769784172661875e-07, "loss": 0.2307, "step": 42 }, { "epoch": 0.006966947504860661, "grad_norm": 3.37454891204834, "learning_rate": 3.8669064748201447e-07, "loss": 0.2391, "step": 43 }, { "epoch": 0.007128969539857421, "grad_norm": 3.0130465030670166, "learning_rate": 3.956834532374101e-07, "loss": 0.2258, "step": 44 }, { "epoch": 0.00729099157485418, "grad_norm": 2.937732458114624, "learning_rate": 4.0467625899280576e-07, "loss": 0.22, "step": 45 }, { "epoch": 0.007453013609850939, "grad_norm": 3.2253363132476807, "learning_rate": 4.136690647482015e-07, "loss": 0.2128, "step": 46 }, { "epoch": 0.0076150356448477, "grad_norm": 2.944216012954712, "learning_rate": 4.2266187050359715e-07, "loss": 0.2199, "step": 47 }, { "epoch": 0.007777057679844459, "grad_norm": 2.8392646312713623, "learning_rate": 4.3165467625899287e-07, "loss": 0.2185, "step": 48 }, { "epoch": 0.007939079714841219, "grad_norm": 2.9585957527160645, "learning_rate": 4.406474820143885e-07, "loss": 0.2416, "step": 49 }, { "epoch": 0.008101101749837978, "grad_norm": 2.977691173553467, "learning_rate": 4.496402877697842e-07, "loss": 0.2219, "step": 50 }, { "epoch": 0.008263123784834738, "grad_norm": 2.6376028060913086, "learning_rate": 4.586330935251799e-07, "loss": 0.2271, "step": 51 }, { "epoch": 0.008425145819831496, "grad_norm": 2.8397135734558105, "learning_rate": 4.676258992805756e-07, "loss": 0.2227, "step": 52 }, { "epoch": 0.008587167854828257, "grad_norm": 2.7388408184051514, "learning_rate": 4.7661870503597127e-07, "loss": 0.1965, "step": 53 }, { "epoch": 0.008749189889825017, "grad_norm": 2.7062389850616455, "learning_rate": 4.85611510791367e-07, "loss": 0.1991, "step": 54 }, { "epoch": 0.008911211924821775, "grad_norm": 2.688417911529541, "learning_rate": 4.946043165467626e-07, "loss": 0.2121, "step": 55 }, { "epoch": 0.009073233959818535, "grad_norm": 2.418215751647949, "learning_rate": 5.035971223021583e-07, "loss": 0.2104, "step": 56 }, { "epoch": 0.009235255994815296, "grad_norm": 2.9777352809906006, "learning_rate": 5.12589928057554e-07, "loss": 0.2145, "step": 57 }, { "epoch": 0.009397278029812054, "grad_norm": 2.8849077224731445, "learning_rate": 5.215827338129497e-07, "loss": 0.1984, "step": 58 }, { "epoch": 0.009559300064808814, "grad_norm": 2.7535386085510254, "learning_rate": 5.305755395683454e-07, "loss": 0.2159, "step": 59 }, { "epoch": 0.009721322099805573, "grad_norm": 3.017183780670166, "learning_rate": 5.39568345323741e-07, "loss": 0.2485, "step": 60 }, { "epoch": 0.009883344134802333, "grad_norm": 3.307050943374634, "learning_rate": 5.485611510791367e-07, "loss": 0.21, "step": 61 }, { "epoch": 0.010045366169799093, "grad_norm": 2.6310977935791016, "learning_rate": 5.575539568345325e-07, "loss": 0.1888, "step": 62 }, { "epoch": 0.010207388204795852, "grad_norm": 2.3625879287719727, "learning_rate": 5.665467625899281e-07, "loss": 0.1938, "step": 63 }, { "epoch": 0.010369410239792612, "grad_norm": 3.2340619564056396, "learning_rate": 5.755395683453238e-07, "loss": 0.1812, "step": 64 }, { "epoch": 0.010531432274789372, "grad_norm": 2.1760573387145996, "learning_rate": 5.845323741007194e-07, "loss": 0.18, "step": 65 }, { "epoch": 0.01069345430978613, "grad_norm": 2.495835542678833, "learning_rate": 5.935251798561151e-07, "loss": 0.2233, "step": 66 }, { "epoch": 0.01085547634478289, "grad_norm": 2.425354242324829, "learning_rate": 6.025179856115109e-07, "loss": 0.2017, "step": 67 }, { "epoch": 0.01101749837977965, "grad_norm": 2.386017322540283, "learning_rate": 6.115107913669066e-07, "loss": 0.1897, "step": 68 }, { "epoch": 0.01117952041477641, "grad_norm": 2.476449728012085, "learning_rate": 6.205035971223022e-07, "loss": 0.2163, "step": 69 }, { "epoch": 0.01134154244977317, "grad_norm": 2.3602700233459473, "learning_rate": 6.294964028776979e-07, "loss": 0.1795, "step": 70 }, { "epoch": 0.011503564484769928, "grad_norm": 2.5833189487457275, "learning_rate": 6.384892086330936e-07, "loss": 0.215, "step": 71 }, { "epoch": 0.011665586519766688, "grad_norm": 2.3865418434143066, "learning_rate": 6.474820143884893e-07, "loss": 0.171, "step": 72 }, { "epoch": 0.011827608554763449, "grad_norm": 2.407026529312134, "learning_rate": 6.564748201438849e-07, "loss": 0.1766, "step": 73 }, { "epoch": 0.011989630589760207, "grad_norm": 2.3541834354400635, "learning_rate": 6.654676258992807e-07, "loss": 0.1703, "step": 74 }, { "epoch": 0.012151652624756967, "grad_norm": 2.7565481662750244, "learning_rate": 6.744604316546763e-07, "loss": 0.1833, "step": 75 }, { "epoch": 0.012313674659753726, "grad_norm": 2.53861665725708, "learning_rate": 6.83453237410072e-07, "loss": 0.1967, "step": 76 }, { "epoch": 0.012475696694750486, "grad_norm": 2.920691728591919, "learning_rate": 6.924460431654677e-07, "loss": 0.1945, "step": 77 }, { "epoch": 0.012637718729747246, "grad_norm": 2.714219093322754, "learning_rate": 7.014388489208633e-07, "loss": 0.1961, "step": 78 }, { "epoch": 0.012799740764744005, "grad_norm": 2.2343974113464355, "learning_rate": 7.104316546762591e-07, "loss": 0.1868, "step": 79 }, { "epoch": 0.012961762799740765, "grad_norm": 2.333791971206665, "learning_rate": 7.194244604316547e-07, "loss": 0.1705, "step": 80 }, { "epoch": 0.013123784834737525, "grad_norm": 2.554150104522705, "learning_rate": 7.284172661870504e-07, "loss": 0.1822, "step": 81 }, { "epoch": 0.013285806869734284, "grad_norm": 2.51990008354187, "learning_rate": 7.37410071942446e-07, "loss": 0.1889, "step": 82 }, { "epoch": 0.013447828904731044, "grad_norm": 2.2917540073394775, "learning_rate": 7.464028776978418e-07, "loss": 0.1788, "step": 83 }, { "epoch": 0.013609850939727802, "grad_norm": 2.376124620437622, "learning_rate": 7.553956834532375e-07, "loss": 0.1995, "step": 84 }, { "epoch": 0.013771872974724562, "grad_norm": 2.521143674850464, "learning_rate": 7.643884892086331e-07, "loss": 0.1691, "step": 85 }, { "epoch": 0.013933895009721323, "grad_norm": 2.2666220664978027, "learning_rate": 7.733812949640289e-07, "loss": 0.1831, "step": 86 }, { "epoch": 0.014095917044718081, "grad_norm": 2.2533257007598877, "learning_rate": 7.823741007194246e-07, "loss": 0.1726, "step": 87 }, { "epoch": 0.014257939079714841, "grad_norm": 2.371137857437134, "learning_rate": 7.913669064748202e-07, "loss": 0.182, "step": 88 }, { "epoch": 0.014419961114711602, "grad_norm": 2.547879934310913, "learning_rate": 8.003597122302159e-07, "loss": 0.1908, "step": 89 }, { "epoch": 0.01458198314970836, "grad_norm": 2.433431625366211, "learning_rate": 8.093525179856115e-07, "loss": 0.1951, "step": 90 }, { "epoch": 0.01474400518470512, "grad_norm": 2.1704154014587402, "learning_rate": 8.183453237410073e-07, "loss": 0.1545, "step": 91 }, { "epoch": 0.014906027219701879, "grad_norm": 2.624864339828491, "learning_rate": 8.27338129496403e-07, "loss": 0.1969, "step": 92 }, { "epoch": 0.015068049254698639, "grad_norm": 2.3828327655792236, "learning_rate": 8.363309352517986e-07, "loss": 0.165, "step": 93 }, { "epoch": 0.0152300712896954, "grad_norm": 2.242358922958374, "learning_rate": 8.453237410071943e-07, "loss": 0.1761, "step": 94 }, { "epoch": 0.015392093324692158, "grad_norm": 2.126227378845215, "learning_rate": 8.543165467625899e-07, "loss": 0.1626, "step": 95 }, { "epoch": 0.015554115359688918, "grad_norm": 2.2046821117401123, "learning_rate": 8.633093525179857e-07, "loss": 0.1867, "step": 96 }, { "epoch": 0.015716137394685678, "grad_norm": 2.2523837089538574, "learning_rate": 8.723021582733814e-07, "loss": 0.1755, "step": 97 }, { "epoch": 0.015878159429682438, "grad_norm": 2.326197862625122, "learning_rate": 8.81294964028777e-07, "loss": 0.1811, "step": 98 }, { "epoch": 0.016040181464679195, "grad_norm": 2.4861881732940674, "learning_rate": 8.902877697841728e-07, "loss": 0.2073, "step": 99 }, { "epoch": 0.016202203499675955, "grad_norm": 2.3390986919403076, "learning_rate": 8.992805755395684e-07, "loss": 0.1749, "step": 100 }, { "epoch": 0.016364225534672715, "grad_norm": 2.372002124786377, "learning_rate": 9.082733812949641e-07, "loss": 0.1778, "step": 101 }, { "epoch": 0.016526247569669476, "grad_norm": 2.4759466648101807, "learning_rate": 9.172661870503598e-07, "loss": 0.1649, "step": 102 }, { "epoch": 0.016688269604666236, "grad_norm": 2.2869863510131836, "learning_rate": 9.262589928057554e-07, "loss": 0.159, "step": 103 }, { "epoch": 0.016850291639662993, "grad_norm": 2.3882856369018555, "learning_rate": 9.352517985611512e-07, "loss": 0.1893, "step": 104 }, { "epoch": 0.017012313674659753, "grad_norm": 2.316922187805176, "learning_rate": 9.442446043165468e-07, "loss": 0.1737, "step": 105 }, { "epoch": 0.017174335709656513, "grad_norm": 2.4775216579437256, "learning_rate": 9.532374100719425e-07, "loss": 0.204, "step": 106 }, { "epoch": 0.017336357744653273, "grad_norm": 2.44256329536438, "learning_rate": 9.622302158273383e-07, "loss": 0.203, "step": 107 }, { "epoch": 0.017498379779650033, "grad_norm": 2.3133935928344727, "learning_rate": 9.71223021582734e-07, "loss": 0.1842, "step": 108 }, { "epoch": 0.017660401814646794, "grad_norm": 2.498054265975952, "learning_rate": 9.802158273381295e-07, "loss": 0.169, "step": 109 }, { "epoch": 0.01782242384964355, "grad_norm": 2.51688814163208, "learning_rate": 9.892086330935252e-07, "loss": 0.1845, "step": 110 }, { "epoch": 0.01798444588464031, "grad_norm": 2.356818437576294, "learning_rate": 9.98201438848921e-07, "loss": 0.1835, "step": 111 }, { "epoch": 0.01814646791963707, "grad_norm": 2.1603775024414062, "learning_rate": 1.0071942446043167e-06, "loss": 0.155, "step": 112 }, { "epoch": 0.01830848995463383, "grad_norm": 2.509432554244995, "learning_rate": 1.0161870503597124e-06, "loss": 0.2281, "step": 113 }, { "epoch": 0.01847051198963059, "grad_norm": 2.5537493228912354, "learning_rate": 1.025179856115108e-06, "loss": 0.1793, "step": 114 }, { "epoch": 0.018632534024627348, "grad_norm": 2.246959924697876, "learning_rate": 1.0341726618705036e-06, "loss": 0.1801, "step": 115 }, { "epoch": 0.018794556059624108, "grad_norm": 2.19378662109375, "learning_rate": 1.0431654676258993e-06, "loss": 0.1637, "step": 116 }, { "epoch": 0.01895657809462087, "grad_norm": 2.3158321380615234, "learning_rate": 1.052158273381295e-06, "loss": 0.1617, "step": 117 }, { "epoch": 0.01911860012961763, "grad_norm": 2.4354169368743896, "learning_rate": 1.0611510791366908e-06, "loss": 0.1779, "step": 118 }, { "epoch": 0.01928062216461439, "grad_norm": 2.5059328079223633, "learning_rate": 1.0701438848920865e-06, "loss": 0.1832, "step": 119 }, { "epoch": 0.019442644199611146, "grad_norm": 2.449944019317627, "learning_rate": 1.079136690647482e-06, "loss": 0.2071, "step": 120 }, { "epoch": 0.019604666234607906, "grad_norm": 2.3459267616271973, "learning_rate": 1.0881294964028777e-06, "loss": 0.1757, "step": 121 }, { "epoch": 0.019766688269604666, "grad_norm": 2.456517457962036, "learning_rate": 1.0971223021582735e-06, "loss": 0.1804, "step": 122 }, { "epoch": 0.019928710304601426, "grad_norm": 2.350886344909668, "learning_rate": 1.1061151079136692e-06, "loss": 0.1675, "step": 123 }, { "epoch": 0.020090732339598186, "grad_norm": 2.2044262886047363, "learning_rate": 1.115107913669065e-06, "loss": 0.1666, "step": 124 }, { "epoch": 0.020252754374594947, "grad_norm": 2.2976362705230713, "learning_rate": 1.1241007194244604e-06, "loss": 0.1797, "step": 125 }, { "epoch": 0.020414776409591703, "grad_norm": 2.1294548511505127, "learning_rate": 1.1330935251798561e-06, "loss": 0.1529, "step": 126 }, { "epoch": 0.020576798444588464, "grad_norm": 2.205601453781128, "learning_rate": 1.1420863309352519e-06, "loss": 0.1641, "step": 127 }, { "epoch": 0.020738820479585224, "grad_norm": 2.2161824703216553, "learning_rate": 1.1510791366906476e-06, "loss": 0.1617, "step": 128 }, { "epoch": 0.020900842514581984, "grad_norm": 2.266568422317505, "learning_rate": 1.1600719424460433e-06, "loss": 0.1746, "step": 129 }, { "epoch": 0.021062864549578744, "grad_norm": 2.3664584159851074, "learning_rate": 1.1690647482014388e-06, "loss": 0.1777, "step": 130 }, { "epoch": 0.0212248865845755, "grad_norm": 2.2521674633026123, "learning_rate": 1.1780575539568347e-06, "loss": 0.1707, "step": 131 }, { "epoch": 0.02138690861957226, "grad_norm": 2.394850254058838, "learning_rate": 1.1870503597122303e-06, "loss": 0.1856, "step": 132 }, { "epoch": 0.02154893065456902, "grad_norm": 2.332915782928467, "learning_rate": 1.196043165467626e-06, "loss": 0.1818, "step": 133 }, { "epoch": 0.02171095268956578, "grad_norm": 2.307866334915161, "learning_rate": 1.2050359712230217e-06, "loss": 0.1799, "step": 134 }, { "epoch": 0.021872974724562542, "grad_norm": 2.2809898853302, "learning_rate": 1.2140287769784172e-06, "loss": 0.1762, "step": 135 }, { "epoch": 0.0220349967595593, "grad_norm": 2.1759355068206787, "learning_rate": 1.2230215827338131e-06, "loss": 0.1522, "step": 136 }, { "epoch": 0.02219701879455606, "grad_norm": 2.172473907470703, "learning_rate": 1.2320143884892087e-06, "loss": 0.1693, "step": 137 }, { "epoch": 0.02235904082955282, "grad_norm": 2.1808273792266846, "learning_rate": 1.2410071942446044e-06, "loss": 0.1549, "step": 138 }, { "epoch": 0.02252106286454958, "grad_norm": 2.164473533630371, "learning_rate": 1.25e-06, "loss": 0.1798, "step": 139 }, { "epoch": 0.02268308489954634, "grad_norm": 2.2718453407287598, "learning_rate": 1.2589928057553958e-06, "loss": 0.1606, "step": 140 }, { "epoch": 0.022845106934543096, "grad_norm": 2.1400561332702637, "learning_rate": 1.2679856115107913e-06, "loss": 0.1428, "step": 141 }, { "epoch": 0.023007128969539856, "grad_norm": 2.193063497543335, "learning_rate": 1.2769784172661873e-06, "loss": 0.1729, "step": 142 }, { "epoch": 0.023169151004536617, "grad_norm": 2.2718231678009033, "learning_rate": 1.285971223021583e-06, "loss": 0.1803, "step": 143 }, { "epoch": 0.023331173039533377, "grad_norm": 2.3830406665802, "learning_rate": 1.2949640287769785e-06, "loss": 0.1688, "step": 144 }, { "epoch": 0.023493195074530137, "grad_norm": 2.2217133045196533, "learning_rate": 1.3039568345323742e-06, "loss": 0.172, "step": 145 }, { "epoch": 0.023655217109526897, "grad_norm": 2.429953098297119, "learning_rate": 1.3129496402877697e-06, "loss": 0.1761, "step": 146 }, { "epoch": 0.023817239144523654, "grad_norm": 2.481562614440918, "learning_rate": 1.3219424460431657e-06, "loss": 0.1777, "step": 147 }, { "epoch": 0.023979261179520414, "grad_norm": 2.3909571170806885, "learning_rate": 1.3309352517985614e-06, "loss": 0.1814, "step": 148 }, { "epoch": 0.024141283214517174, "grad_norm": 2.3645970821380615, "learning_rate": 1.339928057553957e-06, "loss": 0.179, "step": 149 }, { "epoch": 0.024303305249513935, "grad_norm": 2.5163588523864746, "learning_rate": 1.3489208633093526e-06, "loss": 0.2024, "step": 150 }, { "epoch": 0.024465327284510695, "grad_norm": 2.2522170543670654, "learning_rate": 1.3579136690647481e-06, "loss": 0.1844, "step": 151 }, { "epoch": 0.02462734931950745, "grad_norm": 2.276763677597046, "learning_rate": 1.366906474820144e-06, "loss": 0.1934, "step": 152 }, { "epoch": 0.02478937135450421, "grad_norm": 2.1778862476348877, "learning_rate": 1.3758992805755398e-06, "loss": 0.166, "step": 153 }, { "epoch": 0.024951393389500972, "grad_norm": 2.162675619125366, "learning_rate": 1.3848920863309353e-06, "loss": 0.1823, "step": 154 }, { "epoch": 0.025113415424497732, "grad_norm": 2.212240695953369, "learning_rate": 1.393884892086331e-06, "loss": 0.1814, "step": 155 }, { "epoch": 0.025275437459494492, "grad_norm": 2.283370018005371, "learning_rate": 1.4028776978417265e-06, "loss": 0.1821, "step": 156 }, { "epoch": 0.02543745949449125, "grad_norm": 2.1461737155914307, "learning_rate": 1.4118705035971225e-06, "loss": 0.1689, "step": 157 }, { "epoch": 0.02559948152948801, "grad_norm": 2.1343040466308594, "learning_rate": 1.4208633093525182e-06, "loss": 0.1754, "step": 158 }, { "epoch": 0.02576150356448477, "grad_norm": 2.200503349304199, "learning_rate": 1.4298561151079137e-06, "loss": 0.1824, "step": 159 }, { "epoch": 0.02592352559948153, "grad_norm": 2.3421995639801025, "learning_rate": 1.4388489208633094e-06, "loss": 0.1768, "step": 160 }, { "epoch": 0.02608554763447829, "grad_norm": 2.1087827682495117, "learning_rate": 1.447841726618705e-06, "loss": 0.1929, "step": 161 }, { "epoch": 0.02624756966947505, "grad_norm": 2.4311277866363525, "learning_rate": 1.4568345323741009e-06, "loss": 0.1845, "step": 162 }, { "epoch": 0.026409591704471807, "grad_norm": 2.0372049808502197, "learning_rate": 1.4658273381294966e-06, "loss": 0.163, "step": 163 }, { "epoch": 0.026571613739468567, "grad_norm": 2.1894679069519043, "learning_rate": 1.474820143884892e-06, "loss": 0.1511, "step": 164 }, { "epoch": 0.026733635774465327, "grad_norm": 2.3050854206085205, "learning_rate": 1.4838129496402878e-06, "loss": 0.1867, "step": 165 }, { "epoch": 0.026895657809462088, "grad_norm": 2.4033379554748535, "learning_rate": 1.4928057553956835e-06, "loss": 0.1739, "step": 166 }, { "epoch": 0.027057679844458848, "grad_norm": 2.3603515625, "learning_rate": 1.5017985611510793e-06, "loss": 0.162, "step": 167 }, { "epoch": 0.027219701879455604, "grad_norm": 1.9225950241088867, "learning_rate": 1.510791366906475e-06, "loss": 0.1444, "step": 168 }, { "epoch": 0.027381723914452365, "grad_norm": 2.3583033084869385, "learning_rate": 1.5197841726618707e-06, "loss": 0.1942, "step": 169 }, { "epoch": 0.027543745949449125, "grad_norm": 2.416661262512207, "learning_rate": 1.5287769784172662e-06, "loss": 0.1778, "step": 170 }, { "epoch": 0.027705767984445885, "grad_norm": 2.0838210582733154, "learning_rate": 1.537769784172662e-06, "loss": 0.1581, "step": 171 }, { "epoch": 0.027867790019442645, "grad_norm": 2.3810360431671143, "learning_rate": 1.5467625899280579e-06, "loss": 0.174, "step": 172 }, { "epoch": 0.028029812054439402, "grad_norm": 2.243232011795044, "learning_rate": 1.5557553956834534e-06, "loss": 0.1563, "step": 173 }, { "epoch": 0.028191834089436162, "grad_norm": 2.2563958168029785, "learning_rate": 1.5647482014388491e-06, "loss": 0.1719, "step": 174 }, { "epoch": 0.028353856124432922, "grad_norm": 2.283281087875366, "learning_rate": 1.5737410071942446e-06, "loss": 0.1607, "step": 175 }, { "epoch": 0.028515878159429683, "grad_norm": 2.166625499725342, "learning_rate": 1.5827338129496403e-06, "loss": 0.1615, "step": 176 }, { "epoch": 0.028677900194426443, "grad_norm": 2.2912237644195557, "learning_rate": 1.5917266187050363e-06, "loss": 0.1731, "step": 177 }, { "epoch": 0.028839922229423203, "grad_norm": 2.196793794631958, "learning_rate": 1.6007194244604318e-06, "loss": 0.1644, "step": 178 }, { "epoch": 0.02900194426441996, "grad_norm": 2.193145513534546, "learning_rate": 1.6097122302158275e-06, "loss": 0.1632, "step": 179 }, { "epoch": 0.02916396629941672, "grad_norm": 2.420989990234375, "learning_rate": 1.618705035971223e-06, "loss": 0.1953, "step": 180 }, { "epoch": 0.02932598833441348, "grad_norm": 2.0741348266601562, "learning_rate": 1.6276978417266187e-06, "loss": 0.1573, "step": 181 }, { "epoch": 0.02948801036941024, "grad_norm": 2.2281997203826904, "learning_rate": 1.6366906474820147e-06, "loss": 0.1897, "step": 182 }, { "epoch": 0.029650032404407, "grad_norm": 2.163466453552246, "learning_rate": 1.6456834532374102e-06, "loss": 0.1742, "step": 183 }, { "epoch": 0.029812054439403757, "grad_norm": 2.054077625274658, "learning_rate": 1.654676258992806e-06, "loss": 0.1741, "step": 184 }, { "epoch": 0.029974076474400518, "grad_norm": 2.1084864139556885, "learning_rate": 1.6636690647482014e-06, "loss": 0.1887, "step": 185 }, { "epoch": 0.030136098509397278, "grad_norm": 2.160194158554077, "learning_rate": 1.6726618705035971e-06, "loss": 0.162, "step": 186 }, { "epoch": 0.030298120544394038, "grad_norm": 2.227296829223633, "learning_rate": 1.681654676258993e-06, "loss": 0.1717, "step": 187 }, { "epoch": 0.0304601425793908, "grad_norm": 2.0688178539276123, "learning_rate": 1.6906474820143886e-06, "loss": 0.1705, "step": 188 }, { "epoch": 0.030622164614387555, "grad_norm": 2.081521511077881, "learning_rate": 1.6996402877697843e-06, "loss": 0.1759, "step": 189 }, { "epoch": 0.030784186649384315, "grad_norm": 2.2949635982513428, "learning_rate": 1.7086330935251798e-06, "loss": 0.1693, "step": 190 }, { "epoch": 0.030946208684381075, "grad_norm": 2.3229682445526123, "learning_rate": 1.7176258992805755e-06, "loss": 0.186, "step": 191 }, { "epoch": 0.031108230719377836, "grad_norm": 2.3723998069763184, "learning_rate": 1.7266187050359715e-06, "loss": 0.1902, "step": 192 }, { "epoch": 0.031270252754374596, "grad_norm": 2.150329828262329, "learning_rate": 1.735611510791367e-06, "loss": 0.1723, "step": 193 }, { "epoch": 0.031432274789371356, "grad_norm": 2.097501277923584, "learning_rate": 1.7446043165467627e-06, "loss": 0.165, "step": 194 }, { "epoch": 0.031594296824368116, "grad_norm": 2.10372257232666, "learning_rate": 1.7535971223021584e-06, "loss": 0.1578, "step": 195 }, { "epoch": 0.031756318859364877, "grad_norm": 2.1476476192474365, "learning_rate": 1.762589928057554e-06, "loss": 0.1662, "step": 196 }, { "epoch": 0.03191834089436163, "grad_norm": 2.1310970783233643, "learning_rate": 1.7715827338129499e-06, "loss": 0.1655, "step": 197 }, { "epoch": 0.03208036292935839, "grad_norm": 2.2113521099090576, "learning_rate": 1.7805755395683456e-06, "loss": 0.1758, "step": 198 }, { "epoch": 0.03224238496435515, "grad_norm": 2.0751709938049316, "learning_rate": 1.7895683453237411e-06, "loss": 0.1585, "step": 199 }, { "epoch": 0.03240440699935191, "grad_norm": 1.9745714664459229, "learning_rate": 1.7985611510791368e-06, "loss": 0.1666, "step": 200 }, { "epoch": 0.03256642903434867, "grad_norm": 2.1305060386657715, "learning_rate": 1.8075539568345323e-06, "loss": 0.1843, "step": 201 }, { "epoch": 0.03272845106934543, "grad_norm": 2.0196950435638428, "learning_rate": 1.8165467625899283e-06, "loss": 0.1768, "step": 202 }, { "epoch": 0.03289047310434219, "grad_norm": 2.2363412380218506, "learning_rate": 1.825539568345324e-06, "loss": 0.169, "step": 203 }, { "epoch": 0.03305249513933895, "grad_norm": 2.118345260620117, "learning_rate": 1.8345323741007195e-06, "loss": 0.1486, "step": 204 }, { "epoch": 0.03321451717433571, "grad_norm": 2.0574629306793213, "learning_rate": 1.8435251798561152e-06, "loss": 0.1794, "step": 205 }, { "epoch": 0.03337653920933247, "grad_norm": 1.975506067276001, "learning_rate": 1.8525179856115107e-06, "loss": 0.1608, "step": 206 }, { "epoch": 0.03353856124432923, "grad_norm": 2.1187586784362793, "learning_rate": 1.8615107913669067e-06, "loss": 0.1514, "step": 207 }, { "epoch": 0.033700583279325985, "grad_norm": 2.031742811203003, "learning_rate": 1.8705035971223024e-06, "loss": 0.1567, "step": 208 }, { "epoch": 0.033862605314322745, "grad_norm": 2.198672294616699, "learning_rate": 1.879496402877698e-06, "loss": 0.1709, "step": 209 }, { "epoch": 0.034024627349319506, "grad_norm": 1.9215576648712158, "learning_rate": 1.8884892086330936e-06, "loss": 0.1466, "step": 210 }, { "epoch": 0.034186649384316266, "grad_norm": 2.0358407497406006, "learning_rate": 1.8974820143884896e-06, "loss": 0.1641, "step": 211 }, { "epoch": 0.034348671419313026, "grad_norm": 2.2609496116638184, "learning_rate": 1.906474820143885e-06, "loss": 0.1607, "step": 212 }, { "epoch": 0.034510693454309786, "grad_norm": 2.0340802669525146, "learning_rate": 1.915467625899281e-06, "loss": 0.1605, "step": 213 }, { "epoch": 0.034672715489306546, "grad_norm": 2.190073013305664, "learning_rate": 1.9244604316546765e-06, "loss": 0.1893, "step": 214 }, { "epoch": 0.03483473752430331, "grad_norm": 2.129892587661743, "learning_rate": 1.933453237410072e-06, "loss": 0.1657, "step": 215 }, { "epoch": 0.03499675955930007, "grad_norm": 1.9545798301696777, "learning_rate": 1.942446043165468e-06, "loss": 0.1698, "step": 216 }, { "epoch": 0.03515878159429683, "grad_norm": 1.9465980529785156, "learning_rate": 1.9514388489208637e-06, "loss": 0.1571, "step": 217 }, { "epoch": 0.03532080362929359, "grad_norm": 2.116178512573242, "learning_rate": 1.960431654676259e-06, "loss": 0.1581, "step": 218 }, { "epoch": 0.03548282566429034, "grad_norm": 2.0396151542663574, "learning_rate": 1.9694244604316547e-06, "loss": 0.1637, "step": 219 }, { "epoch": 0.0356448476992871, "grad_norm": 2.1016218662261963, "learning_rate": 1.9784172661870504e-06, "loss": 0.1707, "step": 220 }, { "epoch": 0.03580686973428386, "grad_norm": 2.122370719909668, "learning_rate": 1.987410071942446e-06, "loss": 0.1701, "step": 221 }, { "epoch": 0.03596889176928062, "grad_norm": 2.2020812034606934, "learning_rate": 1.996402877697842e-06, "loss": 0.1755, "step": 222 }, { "epoch": 0.03613091380427738, "grad_norm": 2.160726547241211, "learning_rate": 2.0053956834532376e-06, "loss": 0.1704, "step": 223 }, { "epoch": 0.03629293583927414, "grad_norm": 2.121961832046509, "learning_rate": 2.0143884892086333e-06, "loss": 0.1631, "step": 224 }, { "epoch": 0.0364549578742709, "grad_norm": 2.045151472091675, "learning_rate": 2.023381294964029e-06, "loss": 0.1794, "step": 225 }, { "epoch": 0.03661697990926766, "grad_norm": 1.982482671737671, "learning_rate": 2.0323741007194248e-06, "loss": 0.1649, "step": 226 }, { "epoch": 0.03677900194426442, "grad_norm": 2.0936903953552246, "learning_rate": 2.0413669064748205e-06, "loss": 0.1894, "step": 227 }, { "epoch": 0.03694102397926118, "grad_norm": 1.9307271242141724, "learning_rate": 2.050359712230216e-06, "loss": 0.1658, "step": 228 }, { "epoch": 0.037103046014257936, "grad_norm": 2.0920655727386475, "learning_rate": 2.0593525179856115e-06, "loss": 0.1823, "step": 229 }, { "epoch": 0.037265068049254696, "grad_norm": 2.037468194961548, "learning_rate": 2.0683453237410072e-06, "loss": 0.1854, "step": 230 }, { "epoch": 0.037427090084251456, "grad_norm": 2.0791573524475098, "learning_rate": 2.0773381294964034e-06, "loss": 0.1668, "step": 231 }, { "epoch": 0.037589112119248216, "grad_norm": 2.020608425140381, "learning_rate": 2.0863309352517987e-06, "loss": 0.1664, "step": 232 }, { "epoch": 0.03775113415424498, "grad_norm": 2.0509209632873535, "learning_rate": 2.0953237410071944e-06, "loss": 0.1655, "step": 233 }, { "epoch": 0.03791315618924174, "grad_norm": 1.895331621170044, "learning_rate": 2.10431654676259e-06, "loss": 0.1671, "step": 234 }, { "epoch": 0.0380751782242385, "grad_norm": 1.8676968812942505, "learning_rate": 2.113309352517986e-06, "loss": 0.1499, "step": 235 }, { "epoch": 0.03823720025923526, "grad_norm": 1.9968868494033813, "learning_rate": 2.1223021582733816e-06, "loss": 0.1704, "step": 236 }, { "epoch": 0.03839922229423202, "grad_norm": 2.107897996902466, "learning_rate": 2.1312949640287773e-06, "loss": 0.1738, "step": 237 }, { "epoch": 0.03856124432922878, "grad_norm": 2.0445058345794678, "learning_rate": 2.140287769784173e-06, "loss": 0.1809, "step": 238 }, { "epoch": 0.03872326636422554, "grad_norm": 2.110241413116455, "learning_rate": 2.1492805755395683e-06, "loss": 0.1731, "step": 239 }, { "epoch": 0.03888528839922229, "grad_norm": 2.0265743732452393, "learning_rate": 2.158273381294964e-06, "loss": 0.1581, "step": 240 }, { "epoch": 0.03904731043421905, "grad_norm": 1.8976444005966187, "learning_rate": 2.16726618705036e-06, "loss": 0.1622, "step": 241 }, { "epoch": 0.03920933246921581, "grad_norm": 1.967790126800537, "learning_rate": 2.1762589928057555e-06, "loss": 0.1639, "step": 242 }, { "epoch": 0.03937135450421257, "grad_norm": 2.0550568103790283, "learning_rate": 2.185251798561151e-06, "loss": 0.1786, "step": 243 }, { "epoch": 0.03953337653920933, "grad_norm": 1.9519095420837402, "learning_rate": 2.194244604316547e-06, "loss": 0.1772, "step": 244 }, { "epoch": 0.03969539857420609, "grad_norm": 2.0229897499084473, "learning_rate": 2.2032374100719426e-06, "loss": 0.1746, "step": 245 }, { "epoch": 0.03985742060920285, "grad_norm": 2.133192777633667, "learning_rate": 2.2122302158273384e-06, "loss": 0.172, "step": 246 }, { "epoch": 0.04001944264419961, "grad_norm": 2.323986053466797, "learning_rate": 2.221223021582734e-06, "loss": 0.1845, "step": 247 }, { "epoch": 0.04018146467919637, "grad_norm": 2.0216121673583984, "learning_rate": 2.23021582733813e-06, "loss": 0.1844, "step": 248 }, { "epoch": 0.04034348671419313, "grad_norm": 1.8960579633712769, "learning_rate": 2.2392086330935255e-06, "loss": 0.1468, "step": 249 }, { "epoch": 0.04050550874918989, "grad_norm": 1.971975564956665, "learning_rate": 2.248201438848921e-06, "loss": 0.1822, "step": 250 }, { "epoch": 0.040667530784186647, "grad_norm": 1.8761088848114014, "learning_rate": 2.257194244604317e-06, "loss": 0.1613, "step": 251 }, { "epoch": 0.04082955281918341, "grad_norm": 1.8940060138702393, "learning_rate": 2.2661870503597123e-06, "loss": 0.1827, "step": 252 }, { "epoch": 0.04099157485418017, "grad_norm": 1.880953311920166, "learning_rate": 2.275179856115108e-06, "loss": 0.1738, "step": 253 }, { "epoch": 0.04115359688917693, "grad_norm": 2.1408498287200928, "learning_rate": 2.2841726618705037e-06, "loss": 0.2066, "step": 254 }, { "epoch": 0.04131561892417369, "grad_norm": 2.4467737674713135, "learning_rate": 2.2931654676258994e-06, "loss": 0.1646, "step": 255 }, { "epoch": 0.04147764095917045, "grad_norm": 1.8072450160980225, "learning_rate": 2.302158273381295e-06, "loss": 0.1475, "step": 256 }, { "epoch": 0.04163966299416721, "grad_norm": 2.035788059234619, "learning_rate": 2.311151079136691e-06, "loss": 0.1742, "step": 257 }, { "epoch": 0.04180168502916397, "grad_norm": 2.0450069904327393, "learning_rate": 2.3201438848920866e-06, "loss": 0.1733, "step": 258 }, { "epoch": 0.04196370706416073, "grad_norm": 2.0818724632263184, "learning_rate": 2.3291366906474823e-06, "loss": 0.1743, "step": 259 }, { "epoch": 0.04212572909915749, "grad_norm": 2.11169695854187, "learning_rate": 2.3381294964028776e-06, "loss": 0.1632, "step": 260 }, { "epoch": 0.04228775113415424, "grad_norm": 2.079584836959839, "learning_rate": 2.3471223021582738e-06, "loss": 0.1868, "step": 261 }, { "epoch": 0.042449773169151, "grad_norm": 2.0339434146881104, "learning_rate": 2.3561151079136695e-06, "loss": 0.1536, "step": 262 }, { "epoch": 0.04261179520414776, "grad_norm": 2.0371828079223633, "learning_rate": 2.365107913669065e-06, "loss": 0.174, "step": 263 }, { "epoch": 0.04277381723914452, "grad_norm": 2.0146055221557617, "learning_rate": 2.3741007194244605e-06, "loss": 0.1599, "step": 264 }, { "epoch": 0.04293583927414128, "grad_norm": 2.0458879470825195, "learning_rate": 2.3830935251798562e-06, "loss": 0.1722, "step": 265 }, { "epoch": 0.04309786130913804, "grad_norm": 1.9001797437667847, "learning_rate": 2.392086330935252e-06, "loss": 0.1519, "step": 266 }, { "epoch": 0.0432598833441348, "grad_norm": 1.9941308498382568, "learning_rate": 2.4010791366906477e-06, "loss": 0.1796, "step": 267 }, { "epoch": 0.04342190537913156, "grad_norm": 2.0200061798095703, "learning_rate": 2.4100719424460434e-06, "loss": 0.1746, "step": 268 }, { "epoch": 0.04358392741412832, "grad_norm": 2.166887044906616, "learning_rate": 2.419064748201439e-06, "loss": 0.1712, "step": 269 }, { "epoch": 0.043745949449125084, "grad_norm": 2.011035680770874, "learning_rate": 2.4280575539568344e-06, "loss": 0.1726, "step": 270 }, { "epoch": 0.043907971484121844, "grad_norm": 2.070662021636963, "learning_rate": 2.4370503597122306e-06, "loss": 0.1877, "step": 271 }, { "epoch": 0.0440699935191186, "grad_norm": 1.8755451440811157, "learning_rate": 2.4460431654676263e-06, "loss": 0.153, "step": 272 }, { "epoch": 0.04423201555411536, "grad_norm": 2.0150034427642822, "learning_rate": 2.4550359712230216e-06, "loss": 0.1857, "step": 273 }, { "epoch": 0.04439403758911212, "grad_norm": 1.969179391860962, "learning_rate": 2.4640287769784173e-06, "loss": 0.1758, "step": 274 }, { "epoch": 0.04455605962410888, "grad_norm": 1.8629865646362305, "learning_rate": 2.473021582733813e-06, "loss": 0.1643, "step": 275 }, { "epoch": 0.04471808165910564, "grad_norm": 1.8298242092132568, "learning_rate": 2.4820143884892088e-06, "loss": 0.1666, "step": 276 }, { "epoch": 0.0448801036941024, "grad_norm": 1.870679259300232, "learning_rate": 2.4910071942446045e-06, "loss": 0.1613, "step": 277 }, { "epoch": 0.04504212572909916, "grad_norm": 2.0010008811950684, "learning_rate": 2.5e-06, "loss": 0.171, "step": 278 }, { "epoch": 0.04520414776409592, "grad_norm": 2.0288240909576416, "learning_rate": 2.508992805755396e-06, "loss": 0.1759, "step": 279 }, { "epoch": 0.04536616979909268, "grad_norm": 1.911431908607483, "learning_rate": 2.5179856115107916e-06, "loss": 0.1709, "step": 280 }, { "epoch": 0.04552819183408944, "grad_norm": 2.0871028900146484, "learning_rate": 2.526978417266187e-06, "loss": 0.1817, "step": 281 }, { "epoch": 0.04569021386908619, "grad_norm": 1.9303252696990967, "learning_rate": 2.5359712230215827e-06, "loss": 0.189, "step": 282 }, { "epoch": 0.04585223590408295, "grad_norm": 2.0121030807495117, "learning_rate": 2.544964028776979e-06, "loss": 0.1956, "step": 283 }, { "epoch": 0.04601425793907971, "grad_norm": 1.8766732215881348, "learning_rate": 2.5539568345323745e-06, "loss": 0.1731, "step": 284 }, { "epoch": 0.04617627997407647, "grad_norm": 1.7872287034988403, "learning_rate": 2.5629496402877703e-06, "loss": 0.1672, "step": 285 }, { "epoch": 0.04633830200907323, "grad_norm": 2.2016873359680176, "learning_rate": 2.571942446043166e-06, "loss": 0.1942, "step": 286 }, { "epoch": 0.04650032404406999, "grad_norm": 2.001664400100708, "learning_rate": 2.5809352517985613e-06, "loss": 0.1779, "step": 287 }, { "epoch": 0.046662346079066754, "grad_norm": 1.881262183189392, "learning_rate": 2.589928057553957e-06, "loss": 0.1621, "step": 288 }, { "epoch": 0.046824368114063514, "grad_norm": 2.1064798831939697, "learning_rate": 2.5989208633093527e-06, "loss": 0.1788, "step": 289 }, { "epoch": 0.046986390149060274, "grad_norm": 1.8403074741363525, "learning_rate": 2.6079136690647484e-06, "loss": 0.1627, "step": 290 }, { "epoch": 0.047148412184057034, "grad_norm": 2.0622360706329346, "learning_rate": 2.6169064748201437e-06, "loss": 0.1742, "step": 291 }, { "epoch": 0.047310434219053794, "grad_norm": 1.878718376159668, "learning_rate": 2.6258992805755395e-06, "loss": 0.1509, "step": 292 }, { "epoch": 0.04747245625405055, "grad_norm": 2.0211925506591797, "learning_rate": 2.6348920863309356e-06, "loss": 0.1923, "step": 293 }, { "epoch": 0.04763447828904731, "grad_norm": 2.1039490699768066, "learning_rate": 2.6438848920863313e-06, "loss": 0.1733, "step": 294 }, { "epoch": 0.04779650032404407, "grad_norm": 2.026010751724243, "learning_rate": 2.652877697841727e-06, "loss": 0.1759, "step": 295 }, { "epoch": 0.04795852235904083, "grad_norm": 2.0125303268432617, "learning_rate": 2.6618705035971228e-06, "loss": 0.1844, "step": 296 }, { "epoch": 0.04812054439403759, "grad_norm": 1.8597307205200195, "learning_rate": 2.670863309352518e-06, "loss": 0.1637, "step": 297 }, { "epoch": 0.04828256642903435, "grad_norm": 1.8322033882141113, "learning_rate": 2.679856115107914e-06, "loss": 0.1638, "step": 298 }, { "epoch": 0.04844458846403111, "grad_norm": 1.9159802198410034, "learning_rate": 2.6888489208633095e-06, "loss": 0.1668, "step": 299 }, { "epoch": 0.04860661049902787, "grad_norm": 1.653609037399292, "learning_rate": 2.6978417266187052e-06, "loss": 0.1523, "step": 300 }, { "epoch": 0.04876863253402463, "grad_norm": 1.8605982065200806, "learning_rate": 2.706834532374101e-06, "loss": 0.1641, "step": 301 }, { "epoch": 0.04893065456902139, "grad_norm": 1.9447520971298218, "learning_rate": 2.7158273381294963e-06, "loss": 0.1795, "step": 302 }, { "epoch": 0.04909267660401815, "grad_norm": 2.1139204502105713, "learning_rate": 2.7248201438848924e-06, "loss": 0.1839, "step": 303 }, { "epoch": 0.0492546986390149, "grad_norm": 2.0103182792663574, "learning_rate": 2.733812949640288e-06, "loss": 0.1707, "step": 304 }, { "epoch": 0.04941672067401166, "grad_norm": 1.9638346433639526, "learning_rate": 2.742805755395684e-06, "loss": 0.1548, "step": 305 }, { "epoch": 0.04957874270900842, "grad_norm": 2.0801048278808594, "learning_rate": 2.7517985611510796e-06, "loss": 0.1813, "step": 306 }, { "epoch": 0.049740764744005184, "grad_norm": 1.848359227180481, "learning_rate": 2.760791366906475e-06, "loss": 0.1655, "step": 307 }, { "epoch": 0.049902786779001944, "grad_norm": 1.85602605342865, "learning_rate": 2.7697841726618706e-06, "loss": 0.1789, "step": 308 }, { "epoch": 0.050064808813998704, "grad_norm": 1.8783005475997925, "learning_rate": 2.7787769784172663e-06, "loss": 0.1753, "step": 309 }, { "epoch": 0.050226830848995464, "grad_norm": 1.981858253479004, "learning_rate": 2.787769784172662e-06, "loss": 0.1798, "step": 310 }, { "epoch": 0.050388852883992225, "grad_norm": 2.0939548015594482, "learning_rate": 2.7967625899280578e-06, "loss": 0.1923, "step": 311 }, { "epoch": 0.050550874918988985, "grad_norm": 2.0538747310638428, "learning_rate": 2.805755395683453e-06, "loss": 0.1869, "step": 312 }, { "epoch": 0.050712896953985745, "grad_norm": 1.807354211807251, "learning_rate": 2.8147482014388492e-06, "loss": 0.1719, "step": 313 }, { "epoch": 0.0508749189889825, "grad_norm": 2.057577610015869, "learning_rate": 2.823741007194245e-06, "loss": 0.1939, "step": 314 }, { "epoch": 0.05103694102397926, "grad_norm": 1.821679949760437, "learning_rate": 2.8327338129496407e-06, "loss": 0.1564, "step": 315 }, { "epoch": 0.05119896305897602, "grad_norm": 1.8081183433532715, "learning_rate": 2.8417266187050364e-06, "loss": 0.1717, "step": 316 }, { "epoch": 0.05136098509397278, "grad_norm": 1.8699404001235962, "learning_rate": 2.850719424460432e-06, "loss": 0.173, "step": 317 }, { "epoch": 0.05152300712896954, "grad_norm": 2.0023691654205322, "learning_rate": 2.8597122302158274e-06, "loss": 0.1764, "step": 318 }, { "epoch": 0.0516850291639663, "grad_norm": 2.0363411903381348, "learning_rate": 2.868705035971223e-06, "loss": 0.1744, "step": 319 }, { "epoch": 0.05184705119896306, "grad_norm": 1.8756717443466187, "learning_rate": 2.877697841726619e-06, "loss": 0.176, "step": 320 }, { "epoch": 0.05200907323395982, "grad_norm": 2.1065685749053955, "learning_rate": 2.8866906474820146e-06, "loss": 0.1713, "step": 321 }, { "epoch": 0.05217109526895658, "grad_norm": 1.929309606552124, "learning_rate": 2.89568345323741e-06, "loss": 0.1705, "step": 322 }, { "epoch": 0.05233311730395334, "grad_norm": 1.8109393119812012, "learning_rate": 2.9046762589928064e-06, "loss": 0.1771, "step": 323 }, { "epoch": 0.0524951393389501, "grad_norm": 1.9056837558746338, "learning_rate": 2.9136690647482017e-06, "loss": 0.1714, "step": 324 }, { "epoch": 0.052657161373946854, "grad_norm": 1.8842450380325317, "learning_rate": 2.9226618705035975e-06, "loss": 0.1938, "step": 325 }, { "epoch": 0.052819183408943614, "grad_norm": 1.9185986518859863, "learning_rate": 2.931654676258993e-06, "loss": 0.1771, "step": 326 }, { "epoch": 0.052981205443940374, "grad_norm": 1.8425815105438232, "learning_rate": 2.940647482014389e-06, "loss": 0.19, "step": 327 }, { "epoch": 0.053143227478937134, "grad_norm": 1.9350950717926025, "learning_rate": 2.949640287769784e-06, "loss": 0.187, "step": 328 }, { "epoch": 0.053305249513933894, "grad_norm": 1.8640246391296387, "learning_rate": 2.95863309352518e-06, "loss": 0.1677, "step": 329 }, { "epoch": 0.053467271548930655, "grad_norm": 1.8789522647857666, "learning_rate": 2.9676258992805756e-06, "loss": 0.1823, "step": 330 }, { "epoch": 0.053629293583927415, "grad_norm": 1.8093454837799072, "learning_rate": 2.9766187050359714e-06, "loss": 0.1587, "step": 331 }, { "epoch": 0.053791315618924175, "grad_norm": 1.8029776811599731, "learning_rate": 2.985611510791367e-06, "loss": 0.1723, "step": 332 }, { "epoch": 0.053953337653920935, "grad_norm": 1.8550293445587158, "learning_rate": 2.9946043165467632e-06, "loss": 0.1948, "step": 333 }, { "epoch": 0.054115359688917695, "grad_norm": 1.8107119798660278, "learning_rate": 3.0035971223021585e-06, "loss": 0.172, "step": 334 }, { "epoch": 0.054277381723914456, "grad_norm": 1.7598873376846313, "learning_rate": 3.0125899280575543e-06, "loss": 0.1709, "step": 335 }, { "epoch": 0.05443940375891121, "grad_norm": 1.8725013732910156, "learning_rate": 3.02158273381295e-06, "loss": 0.2019, "step": 336 }, { "epoch": 0.05460142579390797, "grad_norm": 1.7645277976989746, "learning_rate": 3.0305755395683457e-06, "loss": 0.1677, "step": 337 }, { "epoch": 0.05476344782890473, "grad_norm": 1.922553300857544, "learning_rate": 3.0395683453237414e-06, "loss": 0.1788, "step": 338 }, { "epoch": 0.05492546986390149, "grad_norm": 1.7409249544143677, "learning_rate": 3.0485611510791367e-06, "loss": 0.1629, "step": 339 }, { "epoch": 0.05508749189889825, "grad_norm": 1.914994478225708, "learning_rate": 3.0575539568345324e-06, "loss": 0.1757, "step": 340 }, { "epoch": 0.05524951393389501, "grad_norm": 1.8456170558929443, "learning_rate": 3.066546762589928e-06, "loss": 0.1906, "step": 341 }, { "epoch": 0.05541153596889177, "grad_norm": 1.949918270111084, "learning_rate": 3.075539568345324e-06, "loss": 0.2056, "step": 342 }, { "epoch": 0.05557355800388853, "grad_norm": 1.8012248277664185, "learning_rate": 3.08453237410072e-06, "loss": 0.1719, "step": 343 }, { "epoch": 0.05573558003888529, "grad_norm": 1.942472219467163, "learning_rate": 3.0935251798561158e-06, "loss": 0.1834, "step": 344 }, { "epoch": 0.05589760207388205, "grad_norm": 1.8514517545700073, "learning_rate": 3.102517985611511e-06, "loss": 0.1911, "step": 345 }, { "epoch": 0.056059624108878804, "grad_norm": 1.7124977111816406, "learning_rate": 3.1115107913669068e-06, "loss": 0.1656, "step": 346 }, { "epoch": 0.056221646143875564, "grad_norm": 1.6776280403137207, "learning_rate": 3.1205035971223025e-06, "loss": 0.1741, "step": 347 }, { "epoch": 0.056383668178872325, "grad_norm": 2.2318480014801025, "learning_rate": 3.1294964028776982e-06, "loss": 0.219, "step": 348 }, { "epoch": 0.056545690213869085, "grad_norm": 1.7956312894821167, "learning_rate": 3.1384892086330935e-06, "loss": 0.1731, "step": 349 }, { "epoch": 0.056707712248865845, "grad_norm": 1.813264012336731, "learning_rate": 3.1474820143884892e-06, "loss": 0.1798, "step": 350 }, { "epoch": 0.056869734283862605, "grad_norm": 1.6817779541015625, "learning_rate": 3.156474820143885e-06, "loss": 0.1615, "step": 351 }, { "epoch": 0.057031756318859365, "grad_norm": 1.8656824827194214, "learning_rate": 3.1654676258992807e-06, "loss": 0.1682, "step": 352 }, { "epoch": 0.057193778353856126, "grad_norm": 1.817460536956787, "learning_rate": 3.174460431654677e-06, "loss": 0.1608, "step": 353 }, { "epoch": 0.057355800388852886, "grad_norm": 1.9450856447219849, "learning_rate": 3.1834532374100726e-06, "loss": 0.1892, "step": 354 }, { "epoch": 0.057517822423849646, "grad_norm": 1.7994742393493652, "learning_rate": 3.192446043165468e-06, "loss": 0.1634, "step": 355 }, { "epoch": 0.057679844458846406, "grad_norm": 1.8007848262786865, "learning_rate": 3.2014388489208636e-06, "loss": 0.1731, "step": 356 }, { "epoch": 0.05784186649384316, "grad_norm": 1.920350432395935, "learning_rate": 3.2104316546762593e-06, "loss": 0.174, "step": 357 }, { "epoch": 0.05800388852883992, "grad_norm": 1.810903549194336, "learning_rate": 3.219424460431655e-06, "loss": 0.1828, "step": 358 }, { "epoch": 0.05816591056383668, "grad_norm": 1.9278273582458496, "learning_rate": 3.2284172661870507e-06, "loss": 0.1881, "step": 359 }, { "epoch": 0.05832793259883344, "grad_norm": 1.7868634462356567, "learning_rate": 3.237410071942446e-06, "loss": 0.1719, "step": 360 }, { "epoch": 0.0584899546338302, "grad_norm": 1.6021710634231567, "learning_rate": 3.2464028776978418e-06, "loss": 0.1478, "step": 361 }, { "epoch": 0.05865197666882696, "grad_norm": 1.6602813005447388, "learning_rate": 3.2553956834532375e-06, "loss": 0.1527, "step": 362 }, { "epoch": 0.05881399870382372, "grad_norm": 1.8697694540023804, "learning_rate": 3.2643884892086336e-06, "loss": 0.1809, "step": 363 }, { "epoch": 0.05897602073882048, "grad_norm": 1.9650862216949463, "learning_rate": 3.2733812949640294e-06, "loss": 0.1869, "step": 364 }, { "epoch": 0.05913804277381724, "grad_norm": 1.7278015613555908, "learning_rate": 3.2823741007194247e-06, "loss": 0.1734, "step": 365 }, { "epoch": 0.059300064808814, "grad_norm": 1.995110273361206, "learning_rate": 3.2913669064748204e-06, "loss": 0.1962, "step": 366 }, { "epoch": 0.05946208684381076, "grad_norm": 1.8341810703277588, "learning_rate": 3.300359712230216e-06, "loss": 0.1917, "step": 367 }, { "epoch": 0.059624108878807515, "grad_norm": 1.8125361204147339, "learning_rate": 3.309352517985612e-06, "loss": 0.1794, "step": 368 }, { "epoch": 0.059786130913804275, "grad_norm": 1.6951332092285156, "learning_rate": 3.3183453237410075e-06, "loss": 0.1623, "step": 369 }, { "epoch": 0.059948152948801035, "grad_norm": 1.753805160522461, "learning_rate": 3.327338129496403e-06, "loss": 0.1757, "step": 370 }, { "epoch": 0.060110174983797796, "grad_norm": 1.876604676246643, "learning_rate": 3.3363309352517986e-06, "loss": 0.2007, "step": 371 }, { "epoch": 0.060272197018794556, "grad_norm": 1.8833791017532349, "learning_rate": 3.3453237410071943e-06, "loss": 0.19, "step": 372 }, { "epoch": 0.060434219053791316, "grad_norm": 1.769794225692749, "learning_rate": 3.3543165467625904e-06, "loss": 0.1679, "step": 373 }, { "epoch": 0.060596241088788076, "grad_norm": 1.955523133277893, "learning_rate": 3.363309352517986e-06, "loss": 0.1937, "step": 374 }, { "epoch": 0.060758263123784836, "grad_norm": 1.8921654224395752, "learning_rate": 3.372302158273382e-06, "loss": 0.1607, "step": 375 }, { "epoch": 0.0609202851587816, "grad_norm": 1.7015023231506348, "learning_rate": 3.381294964028777e-06, "loss": 0.1699, "step": 376 }, { "epoch": 0.06108230719377836, "grad_norm": 1.8324452638626099, "learning_rate": 3.390287769784173e-06, "loss": 0.1949, "step": 377 }, { "epoch": 0.06124432922877511, "grad_norm": 1.611916422843933, "learning_rate": 3.3992805755395686e-06, "loss": 0.1381, "step": 378 }, { "epoch": 0.06140635126377187, "grad_norm": 1.8987873792648315, "learning_rate": 3.4082733812949643e-06, "loss": 0.1813, "step": 379 }, { "epoch": 0.06156837329876863, "grad_norm": 1.7634623050689697, "learning_rate": 3.4172661870503596e-06, "loss": 0.1739, "step": 380 }, { "epoch": 0.06173039533376539, "grad_norm": 1.7649253606796265, "learning_rate": 3.4262589928057554e-06, "loss": 0.1654, "step": 381 }, { "epoch": 0.06189241736876215, "grad_norm": 1.7400226593017578, "learning_rate": 3.435251798561151e-06, "loss": 0.1646, "step": 382 }, { "epoch": 0.06205443940375891, "grad_norm": 1.8828028440475464, "learning_rate": 3.4442446043165472e-06, "loss": 0.1785, "step": 383 }, { "epoch": 0.06221646143875567, "grad_norm": 1.9339855909347534, "learning_rate": 3.453237410071943e-06, "loss": 0.1811, "step": 384 }, { "epoch": 0.06237848347375243, "grad_norm": 1.6471163034439087, "learning_rate": 3.4622302158273387e-06, "loss": 0.1637, "step": 385 }, { "epoch": 0.06254050550874919, "grad_norm": 1.9495569467544556, "learning_rate": 3.471223021582734e-06, "loss": 0.1963, "step": 386 }, { "epoch": 0.06270252754374595, "grad_norm": 1.7704451084136963, "learning_rate": 3.4802158273381297e-06, "loss": 0.1774, "step": 387 }, { "epoch": 0.06286454957874271, "grad_norm": 1.7431086301803589, "learning_rate": 3.4892086330935254e-06, "loss": 0.1647, "step": 388 }, { "epoch": 0.06302657161373947, "grad_norm": 2.1561217308044434, "learning_rate": 3.498201438848921e-06, "loss": 0.1727, "step": 389 }, { "epoch": 0.06318859364873623, "grad_norm": 1.6407549381256104, "learning_rate": 3.507194244604317e-06, "loss": 0.1625, "step": 390 }, { "epoch": 0.06335061568373299, "grad_norm": 1.7267616987228394, "learning_rate": 3.516187050359712e-06, "loss": 0.1668, "step": 391 }, { "epoch": 0.06351263771872975, "grad_norm": 1.9636503458023071, "learning_rate": 3.525179856115108e-06, "loss": 0.2013, "step": 392 }, { "epoch": 0.0636746597537265, "grad_norm": 1.6342748403549194, "learning_rate": 3.534172661870504e-06, "loss": 0.1827, "step": 393 }, { "epoch": 0.06383668178872326, "grad_norm": 1.8311208486557007, "learning_rate": 3.5431654676258998e-06, "loss": 0.1811, "step": 394 }, { "epoch": 0.06399870382372003, "grad_norm": 1.904611587524414, "learning_rate": 3.5521582733812955e-06, "loss": 0.2114, "step": 395 }, { "epoch": 0.06416072585871678, "grad_norm": 1.7322005033493042, "learning_rate": 3.561151079136691e-06, "loss": 0.1734, "step": 396 }, { "epoch": 0.06432274789371355, "grad_norm": 1.7229621410369873, "learning_rate": 3.5701438848920865e-06, "loss": 0.1606, "step": 397 }, { "epoch": 0.0644847699287103, "grad_norm": 1.8113038539886475, "learning_rate": 3.5791366906474822e-06, "loss": 0.191, "step": 398 }, { "epoch": 0.06464679196370707, "grad_norm": 1.7715518474578857, "learning_rate": 3.588129496402878e-06, "loss": 0.1914, "step": 399 }, { "epoch": 0.06480881399870382, "grad_norm": 1.8682818412780762, "learning_rate": 3.5971223021582737e-06, "loss": 0.1771, "step": 400 }, { "epoch": 0.06497083603370059, "grad_norm": 1.6986286640167236, "learning_rate": 3.606115107913669e-06, "loss": 0.1693, "step": 401 }, { "epoch": 0.06513285806869734, "grad_norm": 1.7627536058425903, "learning_rate": 3.6151079136690647e-06, "loss": 0.1719, "step": 402 }, { "epoch": 0.06529488010369411, "grad_norm": 1.707191824913025, "learning_rate": 3.624100719424461e-06, "loss": 0.1713, "step": 403 }, { "epoch": 0.06545690213869086, "grad_norm": 14.93448543548584, "learning_rate": 3.6330935251798566e-06, "loss": 0.1574, "step": 404 }, { "epoch": 0.06561892417368761, "grad_norm": 2.0957789421081543, "learning_rate": 3.6420863309352523e-06, "loss": 0.2031, "step": 405 }, { "epoch": 0.06578094620868438, "grad_norm": 1.8755544424057007, "learning_rate": 3.651079136690648e-06, "loss": 0.1766, "step": 406 }, { "epoch": 0.06594296824368114, "grad_norm": 1.6951358318328857, "learning_rate": 3.6600719424460433e-06, "loss": 0.1615, "step": 407 }, { "epoch": 0.0661049902786779, "grad_norm": 1.621053695678711, "learning_rate": 3.669064748201439e-06, "loss": 0.1667, "step": 408 }, { "epoch": 0.06626701231367466, "grad_norm": 1.7325719594955444, "learning_rate": 3.6780575539568347e-06, "loss": 0.1811, "step": 409 }, { "epoch": 0.06642903434867142, "grad_norm": 1.7083348035812378, "learning_rate": 3.6870503597122305e-06, "loss": 0.1689, "step": 410 }, { "epoch": 0.06659105638366818, "grad_norm": 1.5969374179840088, "learning_rate": 3.696043165467626e-06, "loss": 0.1649, "step": 411 }, { "epoch": 0.06675307841866494, "grad_norm": 1.688307285308838, "learning_rate": 3.7050359712230215e-06, "loss": 0.1681, "step": 412 }, { "epoch": 0.0669151004536617, "grad_norm": 1.976590871810913, "learning_rate": 3.7140287769784176e-06, "loss": 0.2102, "step": 413 }, { "epoch": 0.06707712248865846, "grad_norm": 1.8404573202133179, "learning_rate": 3.7230215827338134e-06, "loss": 0.1617, "step": 414 }, { "epoch": 0.06723914452365522, "grad_norm": 1.8087718486785889, "learning_rate": 3.732014388489209e-06, "loss": 0.1652, "step": 415 }, { "epoch": 0.06740116655865197, "grad_norm": 1.8954182863235474, "learning_rate": 3.741007194244605e-06, "loss": 0.1803, "step": 416 }, { "epoch": 0.06756318859364874, "grad_norm": 1.7707210779190063, "learning_rate": 3.7500000000000005e-06, "loss": 0.1833, "step": 417 }, { "epoch": 0.06772521062864549, "grad_norm": 1.5861374139785767, "learning_rate": 3.758992805755396e-06, "loss": 0.1485, "step": 418 }, { "epoch": 0.06788723266364226, "grad_norm": 1.7552915811538696, "learning_rate": 3.7679856115107915e-06, "loss": 0.1783, "step": 419 }, { "epoch": 0.06804925469863901, "grad_norm": 1.6234557628631592, "learning_rate": 3.7769784172661873e-06, "loss": 0.1765, "step": 420 }, { "epoch": 0.06821127673363578, "grad_norm": 1.9008007049560547, "learning_rate": 3.785971223021583e-06, "loss": 0.1839, "step": 421 }, { "epoch": 0.06837329876863253, "grad_norm": 1.8210786581039429, "learning_rate": 3.794964028776979e-06, "loss": 0.1912, "step": 422 }, { "epoch": 0.0685353208036293, "grad_norm": 1.6907942295074463, "learning_rate": 3.8039568345323744e-06, "loss": 0.1651, "step": 423 }, { "epoch": 0.06869734283862605, "grad_norm": 1.691689372062683, "learning_rate": 3.81294964028777e-06, "loss": 0.176, "step": 424 }, { "epoch": 0.06885936487362282, "grad_norm": 1.6513557434082031, "learning_rate": 3.821942446043166e-06, "loss": 0.173, "step": 425 }, { "epoch": 0.06902138690861957, "grad_norm": 1.5421315431594849, "learning_rate": 3.830935251798562e-06, "loss": 0.1508, "step": 426 }, { "epoch": 0.06918340894361633, "grad_norm": 1.7500141859054565, "learning_rate": 3.839928057553957e-06, "loss": 0.195, "step": 427 }, { "epoch": 0.06934543097861309, "grad_norm": 1.5468772649765015, "learning_rate": 3.848920863309353e-06, "loss": 0.142, "step": 428 }, { "epoch": 0.06950745301360985, "grad_norm": 1.614610195159912, "learning_rate": 3.857913669064748e-06, "loss": 0.1648, "step": 429 }, { "epoch": 0.06966947504860661, "grad_norm": 1.802154541015625, "learning_rate": 3.866906474820144e-06, "loss": 0.185, "step": 430 }, { "epoch": 0.06983149708360337, "grad_norm": 1.7224853038787842, "learning_rate": 3.87589928057554e-06, "loss": 0.1658, "step": 431 }, { "epoch": 0.06999351911860013, "grad_norm": 1.7054455280303955, "learning_rate": 3.884892086330936e-06, "loss": 0.166, "step": 432 }, { "epoch": 0.07015554115359689, "grad_norm": 1.712424397468567, "learning_rate": 3.893884892086331e-06, "loss": 0.1694, "step": 433 }, { "epoch": 0.07031756318859365, "grad_norm": 1.892596960067749, "learning_rate": 3.902877697841727e-06, "loss": 0.1748, "step": 434 }, { "epoch": 0.07047958522359041, "grad_norm": 1.7858827114105225, "learning_rate": 3.911870503597123e-06, "loss": 0.1781, "step": 435 }, { "epoch": 0.07064160725858717, "grad_norm": 1.9324816465377808, "learning_rate": 3.920863309352518e-06, "loss": 0.1967, "step": 436 }, { "epoch": 0.07080362929358393, "grad_norm": 1.74705970287323, "learning_rate": 3.929856115107914e-06, "loss": 0.1813, "step": 437 }, { "epoch": 0.07096565132858068, "grad_norm": 1.7450978755950928, "learning_rate": 3.938848920863309e-06, "loss": 0.181, "step": 438 }, { "epoch": 0.07112767336357745, "grad_norm": 1.758083701133728, "learning_rate": 3.9478417266187056e-06, "loss": 0.1903, "step": 439 }, { "epoch": 0.0712896953985742, "grad_norm": 1.6639132499694824, "learning_rate": 3.956834532374101e-06, "loss": 0.1826, "step": 440 }, { "epoch": 0.07145171743357097, "grad_norm": 1.6857917308807373, "learning_rate": 3.965827338129496e-06, "loss": 0.1705, "step": 441 }, { "epoch": 0.07161373946856772, "grad_norm": 1.6692376136779785, "learning_rate": 3.974820143884892e-06, "loss": 0.1877, "step": 442 }, { "epoch": 0.07177576150356449, "grad_norm": 1.5959900617599487, "learning_rate": 3.9838129496402885e-06, "loss": 0.1659, "step": 443 }, { "epoch": 0.07193778353856124, "grad_norm": 1.768189549446106, "learning_rate": 3.992805755395684e-06, "loss": 0.1933, "step": 444 }, { "epoch": 0.07209980557355801, "grad_norm": 1.590326189994812, "learning_rate": 4.00179856115108e-06, "loss": 0.1662, "step": 445 }, { "epoch": 0.07226182760855476, "grad_norm": 1.768247365951538, "learning_rate": 4.010791366906475e-06, "loss": 0.1964, "step": 446 }, { "epoch": 0.07242384964355152, "grad_norm": 1.639091968536377, "learning_rate": 4.0197841726618705e-06, "loss": 0.1858, "step": 447 }, { "epoch": 0.07258587167854828, "grad_norm": 1.7374097108840942, "learning_rate": 4.028776978417267e-06, "loss": 0.1829, "step": 448 }, { "epoch": 0.07274789371354504, "grad_norm": 1.716292381286621, "learning_rate": 4.037769784172662e-06, "loss": 0.1936, "step": 449 }, { "epoch": 0.0729099157485418, "grad_norm": 1.5779844522476196, "learning_rate": 4.046762589928058e-06, "loss": 0.1685, "step": 450 }, { "epoch": 0.07307193778353856, "grad_norm": 1.6292905807495117, "learning_rate": 4.055755395683453e-06, "loss": 0.1623, "step": 451 }, { "epoch": 0.07323395981853532, "grad_norm": 1.4410508871078491, "learning_rate": 4.0647482014388495e-06, "loss": 0.1431, "step": 452 }, { "epoch": 0.07339598185353208, "grad_norm": 1.6054296493530273, "learning_rate": 4.073741007194245e-06, "loss": 0.1682, "step": 453 }, { "epoch": 0.07355800388852884, "grad_norm": 1.6179120540618896, "learning_rate": 4.082733812949641e-06, "loss": 0.1837, "step": 454 }, { "epoch": 0.0737200259235256, "grad_norm": 1.683802604675293, "learning_rate": 4.091726618705036e-06, "loss": 0.1766, "step": 455 }, { "epoch": 0.07388204795852236, "grad_norm": 1.5937223434448242, "learning_rate": 4.100719424460432e-06, "loss": 0.1603, "step": 456 }, { "epoch": 0.07404406999351912, "grad_norm": 1.7861487865447998, "learning_rate": 4.109712230215828e-06, "loss": 0.1868, "step": 457 }, { "epoch": 0.07420609202851587, "grad_norm": 1.786029577255249, "learning_rate": 4.118705035971223e-06, "loss": 0.1933, "step": 458 }, { "epoch": 0.07436811406351264, "grad_norm": 1.6774612665176392, "learning_rate": 4.127697841726619e-06, "loss": 0.1834, "step": 459 }, { "epoch": 0.07453013609850939, "grad_norm": 1.8457497358322144, "learning_rate": 4.1366906474820145e-06, "loss": 0.1785, "step": 460 }, { "epoch": 0.07469215813350616, "grad_norm": 1.6226444244384766, "learning_rate": 4.14568345323741e-06, "loss": 0.1689, "step": 461 }, { "epoch": 0.07485418016850291, "grad_norm": 1.7107115983963013, "learning_rate": 4.154676258992807e-06, "loss": 0.1993, "step": 462 }, { "epoch": 0.07501620220349968, "grad_norm": 1.7487287521362305, "learning_rate": 4.163669064748202e-06, "loss": 0.2032, "step": 463 }, { "epoch": 0.07517822423849643, "grad_norm": 1.8493058681488037, "learning_rate": 4.172661870503597e-06, "loss": 0.1937, "step": 464 }, { "epoch": 0.0753402462734932, "grad_norm": 1.7110384702682495, "learning_rate": 4.1816546762589935e-06, "loss": 0.1755, "step": 465 }, { "epoch": 0.07550226830848995, "grad_norm": 1.6556897163391113, "learning_rate": 4.190647482014389e-06, "loss": 0.1961, "step": 466 }, { "epoch": 0.07566429034348672, "grad_norm": 1.5604274272918701, "learning_rate": 4.199640287769784e-06, "loss": 0.1725, "step": 467 }, { "epoch": 0.07582631237848347, "grad_norm": 1.639374017715454, "learning_rate": 4.20863309352518e-06, "loss": 0.1795, "step": 468 }, { "epoch": 0.07598833441348023, "grad_norm": 1.582411289215088, "learning_rate": 4.2176258992805755e-06, "loss": 0.1786, "step": 469 }, { "epoch": 0.076150356448477, "grad_norm": 1.62992262840271, "learning_rate": 4.226618705035972e-06, "loss": 0.1732, "step": 470 }, { "epoch": 0.07631237848347375, "grad_norm": 1.8388803005218506, "learning_rate": 4.235611510791367e-06, "loss": 0.208, "step": 471 }, { "epoch": 0.07647440051847051, "grad_norm": 1.8594791889190674, "learning_rate": 4.244604316546763e-06, "loss": 0.2148, "step": 472 }, { "epoch": 0.07663642255346727, "grad_norm": 1.814803957939148, "learning_rate": 4.2535971223021584e-06, "loss": 0.2031, "step": 473 }, { "epoch": 0.07679844458846403, "grad_norm": 1.6284650564193726, "learning_rate": 4.2625899280575546e-06, "loss": 0.1865, "step": 474 }, { "epoch": 0.07696046662346079, "grad_norm": 1.8454334735870361, "learning_rate": 4.27158273381295e-06, "loss": 0.1818, "step": 475 }, { "epoch": 0.07712248865845756, "grad_norm": 1.8718208074569702, "learning_rate": 4.280575539568346e-06, "loss": 0.1912, "step": 476 }, { "epoch": 0.07728451069345431, "grad_norm": 1.4540497064590454, "learning_rate": 4.289568345323741e-06, "loss": 0.1595, "step": 477 }, { "epoch": 0.07744653272845108, "grad_norm": 1.749086618423462, "learning_rate": 4.298561151079137e-06, "loss": 0.2013, "step": 478 }, { "epoch": 0.07760855476344783, "grad_norm": 1.8641357421875, "learning_rate": 4.307553956834533e-06, "loss": 0.202, "step": 479 }, { "epoch": 0.07777057679844458, "grad_norm": 1.670167326927185, "learning_rate": 4.316546762589928e-06, "loss": 0.1815, "step": 480 }, { "epoch": 0.07793259883344135, "grad_norm": 1.800274133682251, "learning_rate": 4.325539568345324e-06, "loss": 0.2006, "step": 481 }, { "epoch": 0.0780946208684381, "grad_norm": 2.035557985305786, "learning_rate": 4.33453237410072e-06, "loss": 0.1903, "step": 482 }, { "epoch": 0.07825664290343487, "grad_norm": 1.7821377515792847, "learning_rate": 4.343525179856116e-06, "loss": 0.2089, "step": 483 }, { "epoch": 0.07841866493843162, "grad_norm": 1.605576515197754, "learning_rate": 4.352517985611511e-06, "loss": 0.1625, "step": 484 }, { "epoch": 0.07858068697342839, "grad_norm": 1.7494524717330933, "learning_rate": 4.361510791366907e-06, "loss": 0.1915, "step": 485 }, { "epoch": 0.07874270900842514, "grad_norm": 1.4881764650344849, "learning_rate": 4.370503597122302e-06, "loss": 0.1512, "step": 486 }, { "epoch": 0.07890473104342191, "grad_norm": 1.9464272260665894, "learning_rate": 4.3794964028776985e-06, "loss": 0.1752, "step": 487 }, { "epoch": 0.07906675307841866, "grad_norm": 1.8107142448425293, "learning_rate": 4.388489208633094e-06, "loss": 0.1953, "step": 488 }, { "epoch": 0.07922877511341543, "grad_norm": 1.7034633159637451, "learning_rate": 4.397482014388489e-06, "loss": 0.1875, "step": 489 }, { "epoch": 0.07939079714841218, "grad_norm": 1.7011704444885254, "learning_rate": 4.406474820143885e-06, "loss": 0.1942, "step": 490 }, { "epoch": 0.07955281918340894, "grad_norm": 1.567685604095459, "learning_rate": 4.415467625899281e-06, "loss": 0.1869, "step": 491 }, { "epoch": 0.0797148412184057, "grad_norm": 1.5715806484222412, "learning_rate": 4.424460431654677e-06, "loss": 0.1618, "step": 492 }, { "epoch": 0.07987686325340246, "grad_norm": 1.6158721446990967, "learning_rate": 4.433453237410073e-06, "loss": 0.1903, "step": 493 }, { "epoch": 0.08003888528839923, "grad_norm": 1.7132471799850464, "learning_rate": 4.442446043165468e-06, "loss": 0.1761, "step": 494 }, { "epoch": 0.08020090732339598, "grad_norm": 1.561064600944519, "learning_rate": 4.4514388489208635e-06, "loss": 0.1644, "step": 495 }, { "epoch": 0.08036292935839275, "grad_norm": 1.6423125267028809, "learning_rate": 4.46043165467626e-06, "loss": 0.1724, "step": 496 }, { "epoch": 0.0805249513933895, "grad_norm": 1.5064512491226196, "learning_rate": 4.469424460431655e-06, "loss": 0.1421, "step": 497 }, { "epoch": 0.08068697342838627, "grad_norm": 1.592776894569397, "learning_rate": 4.478417266187051e-06, "loss": 0.1831, "step": 498 }, { "epoch": 0.08084899546338302, "grad_norm": 1.378753423690796, "learning_rate": 4.487410071942446e-06, "loss": 0.1469, "step": 499 }, { "epoch": 0.08101101749837979, "grad_norm": 1.6191986799240112, "learning_rate": 4.496402877697842e-06, "loss": 0.1797, "step": 500 }, { "epoch": 0.08117303953337654, "grad_norm": 1.6818901300430298, "learning_rate": 4.505395683453238e-06, "loss": 0.1666, "step": 501 }, { "epoch": 0.08133506156837329, "grad_norm": 1.6711454391479492, "learning_rate": 4.514388489208634e-06, "loss": 0.1883, "step": 502 }, { "epoch": 0.08149708360337006, "grad_norm": 1.5778876543045044, "learning_rate": 4.523381294964029e-06, "loss": 0.1556, "step": 503 }, { "epoch": 0.08165910563836681, "grad_norm": 1.751618504524231, "learning_rate": 4.5323741007194245e-06, "loss": 0.1773, "step": 504 }, { "epoch": 0.08182112767336358, "grad_norm": 1.6356712579727173, "learning_rate": 4.541366906474821e-06, "loss": 0.1674, "step": 505 }, { "epoch": 0.08198314970836033, "grad_norm": 1.717413306236267, "learning_rate": 4.550359712230216e-06, "loss": 0.2015, "step": 506 }, { "epoch": 0.0821451717433571, "grad_norm": 1.6795759201049805, "learning_rate": 4.559352517985612e-06, "loss": 0.1858, "step": 507 }, { "epoch": 0.08230719377835385, "grad_norm": 1.4658695459365845, "learning_rate": 4.5683453237410074e-06, "loss": 0.1719, "step": 508 }, { "epoch": 0.08246921581335062, "grad_norm": 1.865307331085205, "learning_rate": 4.577338129496403e-06, "loss": 0.1978, "step": 509 }, { "epoch": 0.08263123784834737, "grad_norm": 1.8304200172424316, "learning_rate": 4.586330935251799e-06, "loss": 0.1906, "step": 510 }, { "epoch": 0.08279325988334413, "grad_norm": 1.795590877532959, "learning_rate": 4.595323741007194e-06, "loss": 0.1827, "step": 511 }, { "epoch": 0.0829552819183409, "grad_norm": 1.6314994096755981, "learning_rate": 4.60431654676259e-06, "loss": 0.1725, "step": 512 }, { "epoch": 0.08311730395333765, "grad_norm": 1.6502032279968262, "learning_rate": 4.6133093525179865e-06, "loss": 0.1895, "step": 513 }, { "epoch": 0.08327932598833442, "grad_norm": 1.5785597562789917, "learning_rate": 4.622302158273382e-06, "loss": 0.1602, "step": 514 }, { "epoch": 0.08344134802333117, "grad_norm": 1.8144105672836304, "learning_rate": 4.631294964028777e-06, "loss": 0.174, "step": 515 }, { "epoch": 0.08360337005832794, "grad_norm": 1.689868688583374, "learning_rate": 4.640287769784173e-06, "loss": 0.176, "step": 516 }, { "epoch": 0.08376539209332469, "grad_norm": 1.7384047508239746, "learning_rate": 4.6492805755395685e-06, "loss": 0.1739, "step": 517 }, { "epoch": 0.08392741412832146, "grad_norm": 1.7412197589874268, "learning_rate": 4.658273381294965e-06, "loss": 0.1957, "step": 518 }, { "epoch": 0.08408943616331821, "grad_norm": 1.6650651693344116, "learning_rate": 4.66726618705036e-06, "loss": 0.184, "step": 519 }, { "epoch": 0.08425145819831498, "grad_norm": 1.7459502220153809, "learning_rate": 4.676258992805755e-06, "loss": 0.1772, "step": 520 }, { "epoch": 0.08441348023331173, "grad_norm": 1.6877442598342896, "learning_rate": 4.685251798561151e-06, "loss": 0.1872, "step": 521 }, { "epoch": 0.08457550226830848, "grad_norm": 1.6881842613220215, "learning_rate": 4.6942446043165475e-06, "loss": 0.1841, "step": 522 }, { "epoch": 0.08473752430330525, "grad_norm": 1.4388700723648071, "learning_rate": 4.703237410071943e-06, "loss": 0.1652, "step": 523 }, { "epoch": 0.084899546338302, "grad_norm": 1.5876359939575195, "learning_rate": 4.712230215827339e-06, "loss": 0.2057, "step": 524 }, { "epoch": 0.08506156837329877, "grad_norm": 1.647616982460022, "learning_rate": 4.721223021582734e-06, "loss": 0.186, "step": 525 }, { "epoch": 0.08522359040829552, "grad_norm": 1.5112206935882568, "learning_rate": 4.73021582733813e-06, "loss": 0.1697, "step": 526 }, { "epoch": 0.08538561244329229, "grad_norm": 1.553218126296997, "learning_rate": 4.739208633093526e-06, "loss": 0.1871, "step": 527 }, { "epoch": 0.08554763447828904, "grad_norm": 1.5007338523864746, "learning_rate": 4.748201438848921e-06, "loss": 0.1555, "step": 528 }, { "epoch": 0.08570965651328581, "grad_norm": 1.6641744375228882, "learning_rate": 4.757194244604317e-06, "loss": 0.1909, "step": 529 }, { "epoch": 0.08587167854828257, "grad_norm": 1.8256597518920898, "learning_rate": 4.7661870503597125e-06, "loss": 0.1872, "step": 530 }, { "epoch": 0.08603370058327933, "grad_norm": 1.818845272064209, "learning_rate": 4.775179856115108e-06, "loss": 0.1928, "step": 531 }, { "epoch": 0.08619572261827609, "grad_norm": 1.664866328239441, "learning_rate": 4.784172661870504e-06, "loss": 0.167, "step": 532 }, { "epoch": 0.08635774465327284, "grad_norm": 1.511879563331604, "learning_rate": 4.7931654676259e-06, "loss": 0.1692, "step": 533 }, { "epoch": 0.0865197666882696, "grad_norm": 1.680817723274231, "learning_rate": 4.802158273381295e-06, "loss": 0.1897, "step": 534 }, { "epoch": 0.08668178872326636, "grad_norm": 1.5544551610946655, "learning_rate": 4.8111510791366915e-06, "loss": 0.1597, "step": 535 }, { "epoch": 0.08684381075826313, "grad_norm": 1.759607195854187, "learning_rate": 4.820143884892087e-06, "loss": 0.1925, "step": 536 }, { "epoch": 0.08700583279325988, "grad_norm": 1.6217458248138428, "learning_rate": 4.829136690647482e-06, "loss": 0.1897, "step": 537 }, { "epoch": 0.08716785482825665, "grad_norm": 1.574110507965088, "learning_rate": 4.838129496402878e-06, "loss": 0.1898, "step": 538 }, { "epoch": 0.0873298768632534, "grad_norm": 1.4586173295974731, "learning_rate": 4.8471223021582736e-06, "loss": 0.1767, "step": 539 }, { "epoch": 0.08749189889825017, "grad_norm": 1.533091425895691, "learning_rate": 4.856115107913669e-06, "loss": 0.1819, "step": 540 }, { "epoch": 0.08765392093324692, "grad_norm": 1.4116888046264648, "learning_rate": 4.865107913669065e-06, "loss": 0.1611, "step": 541 }, { "epoch": 0.08781594296824369, "grad_norm": 1.6544139385223389, "learning_rate": 4.874100719424461e-06, "loss": 0.1787, "step": 542 }, { "epoch": 0.08797796500324044, "grad_norm": 1.6531710624694824, "learning_rate": 4.8830935251798564e-06, "loss": 0.1961, "step": 543 }, { "epoch": 0.0881399870382372, "grad_norm": 1.4757072925567627, "learning_rate": 4.892086330935253e-06, "loss": 0.1713, "step": 544 }, { "epoch": 0.08830200907323396, "grad_norm": 1.6193426847457886, "learning_rate": 4.901079136690648e-06, "loss": 0.1845, "step": 545 }, { "epoch": 0.08846403110823071, "grad_norm": 1.7032020092010498, "learning_rate": 4.910071942446043e-06, "loss": 0.189, "step": 546 }, { "epoch": 0.08862605314322748, "grad_norm": 1.4911900758743286, "learning_rate": 4.919064748201439e-06, "loss": 0.1592, "step": 547 }, { "epoch": 0.08878807517822424, "grad_norm": 1.6090925931930542, "learning_rate": 4.928057553956835e-06, "loss": 0.1983, "step": 548 }, { "epoch": 0.088950097213221, "grad_norm": 1.876451015472412, "learning_rate": 4.937050359712231e-06, "loss": 0.2078, "step": 549 }, { "epoch": 0.08911211924821776, "grad_norm": 1.5491797924041748, "learning_rate": 4.946043165467626e-06, "loss": 0.178, "step": 550 }, { "epoch": 0.08927414128321452, "grad_norm": 1.6761187314987183, "learning_rate": 4.955035971223021e-06, "loss": 0.2046, "step": 551 }, { "epoch": 0.08943616331821128, "grad_norm": 1.6212235689163208, "learning_rate": 4.9640287769784175e-06, "loss": 0.2055, "step": 552 }, { "epoch": 0.08959818535320804, "grad_norm": 1.6363742351531982, "learning_rate": 4.973021582733814e-06, "loss": 0.1966, "step": 553 }, { "epoch": 0.0897602073882048, "grad_norm": 1.580815076828003, "learning_rate": 4.982014388489209e-06, "loss": 0.2054, "step": 554 }, { "epoch": 0.08992222942320155, "grad_norm": 1.9822163581848145, "learning_rate": 4.991007194244605e-06, "loss": 0.155, "step": 555 }, { "epoch": 0.09008425145819832, "grad_norm": 1.7965415716171265, "learning_rate": 5e-06, "loss": 0.1808, "step": 556 }, { "epoch": 0.09024627349319507, "grad_norm": 1.4814484119415283, "learning_rate": 4.999999961753026e-06, "loss": 0.1716, "step": 557 }, { "epoch": 0.09040829552819184, "grad_norm": 1.5439770221710205, "learning_rate": 4.999999847012101e-06, "loss": 0.1672, "step": 558 }, { "epoch": 0.09057031756318859, "grad_norm": 1.5221511125564575, "learning_rate": 4.999999655777232e-06, "loss": 0.1829, "step": 559 }, { "epoch": 0.09073233959818536, "grad_norm": 1.8477576971054077, "learning_rate": 4.9999993880484235e-06, "loss": 0.1827, "step": 560 }, { "epoch": 0.09089436163318211, "grad_norm": 1.5962644815444946, "learning_rate": 4.999999043825682e-06, "loss": 0.1675, "step": 561 }, { "epoch": 0.09105638366817888, "grad_norm": 1.5648473501205444, "learning_rate": 4.999998623109022e-06, "loss": 0.1645, "step": 562 }, { "epoch": 0.09121840570317563, "grad_norm": 1.4602282047271729, "learning_rate": 4.999998125898452e-06, "loss": 0.1595, "step": 563 }, { "epoch": 0.09138042773817238, "grad_norm": 2.079158306121826, "learning_rate": 4.99999755219399e-06, "loss": 0.2142, "step": 564 }, { "epoch": 0.09154244977316915, "grad_norm": 1.9085109233856201, "learning_rate": 4.9999969019956526e-06, "loss": 0.1923, "step": 565 }, { "epoch": 0.0917044718081659, "grad_norm": 1.8776715993881226, "learning_rate": 4.9999961753034595e-06, "loss": 0.1935, "step": 566 }, { "epoch": 0.09186649384316267, "grad_norm": 1.6320695877075195, "learning_rate": 4.9999953721174345e-06, "loss": 0.1881, "step": 567 }, { "epoch": 0.09202851587815943, "grad_norm": 1.6475578546524048, "learning_rate": 4.9999944924376e-06, "loss": 0.191, "step": 568 }, { "epoch": 0.09219053791315619, "grad_norm": 1.4152305126190186, "learning_rate": 4.9999935362639844e-06, "loss": 0.1728, "step": 569 }, { "epoch": 0.09235255994815295, "grad_norm": 1.6144766807556152, "learning_rate": 4.999992503596616e-06, "loss": 0.1727, "step": 570 }, { "epoch": 0.09251458198314971, "grad_norm": 1.455117106437683, "learning_rate": 4.999991394435527e-06, "loss": 0.1658, "step": 571 }, { "epoch": 0.09267660401814647, "grad_norm": 1.536605715751648, "learning_rate": 4.999990208780751e-06, "loss": 0.1875, "step": 572 }, { "epoch": 0.09283862605314323, "grad_norm": 1.4941188097000122, "learning_rate": 4.999988946632326e-06, "loss": 0.1732, "step": 573 }, { "epoch": 0.09300064808813999, "grad_norm": 1.8162550926208496, "learning_rate": 4.999987607990287e-06, "loss": 0.2139, "step": 574 }, { "epoch": 0.09316267012313674, "grad_norm": 1.8714874982833862, "learning_rate": 4.9999861928546786e-06, "loss": 0.2113, "step": 575 }, { "epoch": 0.09332469215813351, "grad_norm": 1.6756885051727295, "learning_rate": 4.999984701225542e-06, "loss": 0.1855, "step": 576 }, { "epoch": 0.09348671419313026, "grad_norm": 1.7077648639678955, "learning_rate": 4.999983133102923e-06, "loss": 0.184, "step": 577 }, { "epoch": 0.09364873622812703, "grad_norm": 1.6993731260299683, "learning_rate": 4.9999814884868705e-06, "loss": 0.2088, "step": 578 }, { "epoch": 0.09381075826312378, "grad_norm": 1.6294496059417725, "learning_rate": 4.999979767377434e-06, "loss": 0.1909, "step": 579 }, { "epoch": 0.09397278029812055, "grad_norm": 1.4724843502044678, "learning_rate": 4.999977969774666e-06, "loss": 0.1786, "step": 580 }, { "epoch": 0.0941348023331173, "grad_norm": 1.5849987268447876, "learning_rate": 4.999976095678622e-06, "loss": 0.1942, "step": 581 }, { "epoch": 0.09429682436811407, "grad_norm": 1.393203854560852, "learning_rate": 4.99997414508936e-06, "loss": 0.157, "step": 582 }, { "epoch": 0.09445884640311082, "grad_norm": 1.5272077322006226, "learning_rate": 4.999972118006939e-06, "loss": 0.1809, "step": 583 }, { "epoch": 0.09462086843810759, "grad_norm": 1.4402399063110352, "learning_rate": 4.999970014431421e-06, "loss": 0.187, "step": 584 }, { "epoch": 0.09478289047310434, "grad_norm": 1.433086633682251, "learning_rate": 4.99996783436287e-06, "loss": 0.179, "step": 585 }, { "epoch": 0.0949449125081011, "grad_norm": 1.463783860206604, "learning_rate": 4.999965577801354e-06, "loss": 0.1703, "step": 586 }, { "epoch": 0.09510693454309786, "grad_norm": 1.4071027040481567, "learning_rate": 4.9999632447469395e-06, "loss": 0.1708, "step": 587 }, { "epoch": 0.09526895657809462, "grad_norm": 1.5153921842575073, "learning_rate": 4.999960835199701e-06, "loss": 0.1786, "step": 588 }, { "epoch": 0.09543097861309138, "grad_norm": 1.577370047569275, "learning_rate": 4.999958349159709e-06, "loss": 0.178, "step": 589 }, { "epoch": 0.09559300064808814, "grad_norm": 1.562628984451294, "learning_rate": 4.999955786627042e-06, "loss": 0.1793, "step": 590 }, { "epoch": 0.0957550226830849, "grad_norm": 1.4154744148254395, "learning_rate": 4.999953147601779e-06, "loss": 0.1573, "step": 591 }, { "epoch": 0.09591704471808166, "grad_norm": 1.473583459854126, "learning_rate": 4.999950432083998e-06, "loss": 0.1735, "step": 592 }, { "epoch": 0.09607906675307842, "grad_norm": 1.5967344045639038, "learning_rate": 4.999947640073784e-06, "loss": 0.1798, "step": 593 }, { "epoch": 0.09624108878807518, "grad_norm": 1.5491890907287598, "learning_rate": 4.999944771571222e-06, "loss": 0.1899, "step": 594 }, { "epoch": 0.09640311082307194, "grad_norm": 1.5898832082748413, "learning_rate": 4.9999418265764e-06, "loss": 0.1825, "step": 595 }, { "epoch": 0.0965651328580687, "grad_norm": 1.4132360219955444, "learning_rate": 4.999938805089407e-06, "loss": 0.1663, "step": 596 }, { "epoch": 0.09672715489306545, "grad_norm": 1.5513783693313599, "learning_rate": 4.999935707110337e-06, "loss": 0.2075, "step": 597 }, { "epoch": 0.09688917692806222, "grad_norm": 1.444082498550415, "learning_rate": 4.999932532639285e-06, "loss": 0.1501, "step": 598 }, { "epoch": 0.09705119896305897, "grad_norm": 1.5398008823394775, "learning_rate": 4.999929281676346e-06, "loss": 0.1837, "step": 599 }, { "epoch": 0.09721322099805574, "grad_norm": 1.4911479949951172, "learning_rate": 4.99992595422162e-06, "loss": 0.1966, "step": 600 }, { "epoch": 0.09737524303305249, "grad_norm": 1.8352231979370117, "learning_rate": 4.99992255027521e-06, "loss": 0.201, "step": 601 }, { "epoch": 0.09753726506804926, "grad_norm": 1.491248369216919, "learning_rate": 4.9999190698372216e-06, "loss": 0.187, "step": 602 }, { "epoch": 0.09769928710304601, "grad_norm": 1.4229164123535156, "learning_rate": 4.999915512907757e-06, "loss": 0.1733, "step": 603 }, { "epoch": 0.09786130913804278, "grad_norm": 1.624070167541504, "learning_rate": 4.9999118794869285e-06, "loss": 0.1749, "step": 604 }, { "epoch": 0.09802333117303953, "grad_norm": 1.3550504446029663, "learning_rate": 4.999908169574846e-06, "loss": 0.1683, "step": 605 }, { "epoch": 0.0981853532080363, "grad_norm": 1.5067567825317383, "learning_rate": 4.999904383171623e-06, "loss": 0.1891, "step": 606 }, { "epoch": 0.09834737524303305, "grad_norm": 1.5590304136276245, "learning_rate": 4.999900520277376e-06, "loss": 0.1831, "step": 607 }, { "epoch": 0.0985093972780298, "grad_norm": 1.4621655941009521, "learning_rate": 4.999896580892221e-06, "loss": 0.1648, "step": 608 }, { "epoch": 0.09867141931302657, "grad_norm": 1.5438154935836792, "learning_rate": 4.999892565016282e-06, "loss": 0.1728, "step": 609 }, { "epoch": 0.09883344134802333, "grad_norm": 1.5466684103012085, "learning_rate": 4.99988847264968e-06, "loss": 0.1915, "step": 610 }, { "epoch": 0.0989954633830201, "grad_norm": 1.430253267288208, "learning_rate": 4.99988430379254e-06, "loss": 0.1577, "step": 611 }, { "epoch": 0.09915748541801685, "grad_norm": 1.4443409442901611, "learning_rate": 4.99988005844499e-06, "loss": 0.1668, "step": 612 }, { "epoch": 0.09931950745301361, "grad_norm": 1.5595300197601318, "learning_rate": 4.999875736607159e-06, "loss": 0.1686, "step": 613 }, { "epoch": 0.09948152948801037, "grad_norm": 1.4867337942123413, "learning_rate": 4.999871338279181e-06, "loss": 0.1906, "step": 614 }, { "epoch": 0.09964355152300713, "grad_norm": 1.4922622442245483, "learning_rate": 4.99986686346119e-06, "loss": 0.1727, "step": 615 }, { "epoch": 0.09980557355800389, "grad_norm": 1.4832661151885986, "learning_rate": 4.999862312153322e-06, "loss": 0.1703, "step": 616 }, { "epoch": 0.09996759559300065, "grad_norm": 1.4716413021087646, "learning_rate": 4.999857684355716e-06, "loss": 0.1704, "step": 617 }, { "epoch": 0.10012961762799741, "grad_norm": 1.4800324440002441, "learning_rate": 4.999852980068516e-06, "loss": 0.1604, "step": 618 }, { "epoch": 0.10029163966299416, "grad_norm": 1.4337764978408813, "learning_rate": 4.999848199291863e-06, "loss": 0.1785, "step": 619 }, { "epoch": 0.10045366169799093, "grad_norm": 1.373763918876648, "learning_rate": 4.9998433420259055e-06, "loss": 0.1532, "step": 620 }, { "epoch": 0.10061568373298768, "grad_norm": 1.4618251323699951, "learning_rate": 4.999838408270791e-06, "loss": 0.1849, "step": 621 }, { "epoch": 0.10077770576798445, "grad_norm": 1.356284499168396, "learning_rate": 4.99983339802667e-06, "loss": 0.1555, "step": 622 }, { "epoch": 0.1009397278029812, "grad_norm": 1.548274040222168, "learning_rate": 4.999828311293697e-06, "loss": 0.1913, "step": 623 }, { "epoch": 0.10110174983797797, "grad_norm": 1.549202799797058, "learning_rate": 4.999823148072027e-06, "loss": 0.1909, "step": 624 }, { "epoch": 0.10126377187297472, "grad_norm": 1.5712025165557861, "learning_rate": 4.999817908361818e-06, "loss": 0.1814, "step": 625 }, { "epoch": 0.10142579390797149, "grad_norm": 1.3882803916931152, "learning_rate": 4.999812592163232e-06, "loss": 0.1725, "step": 626 }, { "epoch": 0.10158781594296824, "grad_norm": 1.4438263177871704, "learning_rate": 4.999807199476428e-06, "loss": 0.1663, "step": 627 }, { "epoch": 0.101749837977965, "grad_norm": 1.430675745010376, "learning_rate": 4.9998017303015735e-06, "loss": 0.1685, "step": 628 }, { "epoch": 0.10191186001296176, "grad_norm": 1.514258623123169, "learning_rate": 4.999796184638836e-06, "loss": 0.2044, "step": 629 }, { "epoch": 0.10207388204795852, "grad_norm": 1.3589094877243042, "learning_rate": 4.999790562488385e-06, "loss": 0.1636, "step": 630 }, { "epoch": 0.10223590408295528, "grad_norm": 1.56080961227417, "learning_rate": 4.999784863850391e-06, "loss": 0.1925, "step": 631 }, { "epoch": 0.10239792611795204, "grad_norm": 1.3032417297363281, "learning_rate": 4.999779088725031e-06, "loss": 0.1394, "step": 632 }, { "epoch": 0.1025599481529488, "grad_norm": 1.3875499963760376, "learning_rate": 4.999773237112479e-06, "loss": 0.1593, "step": 633 }, { "epoch": 0.10272197018794556, "grad_norm": 1.6035758256912231, "learning_rate": 4.999767309012916e-06, "loss": 0.1822, "step": 634 }, { "epoch": 0.10288399222294232, "grad_norm": 1.519217610359192, "learning_rate": 4.999761304426523e-06, "loss": 0.187, "step": 635 }, { "epoch": 0.10304601425793908, "grad_norm": 1.6218081712722778, "learning_rate": 4.999755223353483e-06, "loss": 0.1963, "step": 636 }, { "epoch": 0.10320803629293585, "grad_norm": 1.4830374717712402, "learning_rate": 4.999749065793982e-06, "loss": 0.1656, "step": 637 }, { "epoch": 0.1033700583279326, "grad_norm": 1.5597121715545654, "learning_rate": 4.9997428317482086e-06, "loss": 0.1817, "step": 638 }, { "epoch": 0.10353208036292935, "grad_norm": 1.3938210010528564, "learning_rate": 4.999736521216355e-06, "loss": 0.1594, "step": 639 }, { "epoch": 0.10369410239792612, "grad_norm": 1.3650377988815308, "learning_rate": 4.999730134198612e-06, "loss": 0.1661, "step": 640 }, { "epoch": 0.10385612443292287, "grad_norm": 1.493725061416626, "learning_rate": 4.999723670695177e-06, "loss": 0.173, "step": 641 }, { "epoch": 0.10401814646791964, "grad_norm": 1.5311143398284912, "learning_rate": 4.999717130706247e-06, "loss": 0.1933, "step": 642 }, { "epoch": 0.10418016850291639, "grad_norm": 1.5196541547775269, "learning_rate": 4.9997105142320205e-06, "loss": 0.1797, "step": 643 }, { "epoch": 0.10434219053791316, "grad_norm": 1.541266679763794, "learning_rate": 4.999703821272702e-06, "loss": 0.1813, "step": 644 }, { "epoch": 0.10450421257290991, "grad_norm": 1.65276038646698, "learning_rate": 4.999697051828497e-06, "loss": 0.2055, "step": 645 }, { "epoch": 0.10466623460790668, "grad_norm": 1.6365846395492554, "learning_rate": 4.99969020589961e-06, "loss": 0.2004, "step": 646 }, { "epoch": 0.10482825664290343, "grad_norm": 1.675693154335022, "learning_rate": 4.999683283486252e-06, "loss": 0.1797, "step": 647 }, { "epoch": 0.1049902786779002, "grad_norm": 1.5289831161499023, "learning_rate": 4.999676284588635e-06, "loss": 0.2035, "step": 648 }, { "epoch": 0.10515230071289695, "grad_norm": 1.4459340572357178, "learning_rate": 4.9996692092069735e-06, "loss": 0.1682, "step": 649 }, { "epoch": 0.10531432274789371, "grad_norm": 1.7202799320220947, "learning_rate": 4.999662057341482e-06, "loss": 0.2091, "step": 650 }, { "epoch": 0.10547634478289047, "grad_norm": 1.4932163953781128, "learning_rate": 4.999654828992382e-06, "loss": 0.1856, "step": 651 }, { "epoch": 0.10563836681788723, "grad_norm": 1.3993346691131592, "learning_rate": 4.999647524159892e-06, "loss": 0.1592, "step": 652 }, { "epoch": 0.105800388852884, "grad_norm": 1.7462083101272583, "learning_rate": 4.999640142844237e-06, "loss": 0.1681, "step": 653 }, { "epoch": 0.10596241088788075, "grad_norm": 1.5770518779754639, "learning_rate": 4.9996326850456435e-06, "loss": 0.1914, "step": 654 }, { "epoch": 0.10612443292287752, "grad_norm": 1.3480045795440674, "learning_rate": 4.9996251507643375e-06, "loss": 0.1521, "step": 655 }, { "epoch": 0.10628645495787427, "grad_norm": 1.5072104930877686, "learning_rate": 4.999617540000552e-06, "loss": 0.1811, "step": 656 }, { "epoch": 0.10644847699287104, "grad_norm": 1.4140270948410034, "learning_rate": 4.9996098527545184e-06, "loss": 0.1599, "step": 657 }, { "epoch": 0.10661049902786779, "grad_norm": 1.3946372270584106, "learning_rate": 4.999602089026472e-06, "loss": 0.1585, "step": 658 }, { "epoch": 0.10677252106286456, "grad_norm": 1.7381147146224976, "learning_rate": 4.9995942488166506e-06, "loss": 0.2138, "step": 659 }, { "epoch": 0.10693454309786131, "grad_norm": 1.5091066360473633, "learning_rate": 4.999586332125294e-06, "loss": 0.1769, "step": 660 }, { "epoch": 0.10709656513285806, "grad_norm": 1.4993563890457153, "learning_rate": 4.999578338952646e-06, "loss": 0.1819, "step": 661 }, { "epoch": 0.10725858716785483, "grad_norm": 1.6799975633621216, "learning_rate": 4.9995702692989476e-06, "loss": 0.2105, "step": 662 }, { "epoch": 0.10742060920285158, "grad_norm": 1.5190070867538452, "learning_rate": 4.999562123164448e-06, "loss": 0.1842, "step": 663 }, { "epoch": 0.10758263123784835, "grad_norm": 1.4410516023635864, "learning_rate": 4.999553900549398e-06, "loss": 0.1664, "step": 664 }, { "epoch": 0.1077446532728451, "grad_norm": 1.4741100072860718, "learning_rate": 4.999545601454046e-06, "loss": 0.1726, "step": 665 }, { "epoch": 0.10790667530784187, "grad_norm": 1.4260085821151733, "learning_rate": 4.999537225878648e-06, "loss": 0.1803, "step": 666 }, { "epoch": 0.10806869734283862, "grad_norm": 1.4949939250946045, "learning_rate": 4.999528773823459e-06, "loss": 0.1779, "step": 667 }, { "epoch": 0.10823071937783539, "grad_norm": 1.3150126934051514, "learning_rate": 4.999520245288739e-06, "loss": 0.1637, "step": 668 }, { "epoch": 0.10839274141283214, "grad_norm": 1.3088539838790894, "learning_rate": 4.999511640274748e-06, "loss": 0.157, "step": 669 }, { "epoch": 0.10855476344782891, "grad_norm": 1.4893115758895874, "learning_rate": 4.999502958781749e-06, "loss": 0.1793, "step": 670 }, { "epoch": 0.10871678548282566, "grad_norm": 1.5916615724563599, "learning_rate": 4.999494200810009e-06, "loss": 0.1923, "step": 671 }, { "epoch": 0.10887880751782242, "grad_norm": 1.4635965824127197, "learning_rate": 4.999485366359794e-06, "loss": 0.1867, "step": 672 }, { "epoch": 0.10904082955281919, "grad_norm": 1.5369232892990112, "learning_rate": 4.999476455431377e-06, "loss": 0.175, "step": 673 }, { "epoch": 0.10920285158781594, "grad_norm": 1.5748496055603027, "learning_rate": 4.999467468025028e-06, "loss": 0.2062, "step": 674 }, { "epoch": 0.1093648736228127, "grad_norm": 1.5589791536331177, "learning_rate": 4.999458404141023e-06, "loss": 0.1822, "step": 675 }, { "epoch": 0.10952689565780946, "grad_norm": 1.4582257270812988, "learning_rate": 4.99944926377964e-06, "loss": 0.1925, "step": 676 }, { "epoch": 0.10968891769280623, "grad_norm": 1.5575858354568481, "learning_rate": 4.9994400469411575e-06, "loss": 0.1785, "step": 677 }, { "epoch": 0.10985093972780298, "grad_norm": 1.3714617490768433, "learning_rate": 4.999430753625858e-06, "loss": 0.1631, "step": 678 }, { "epoch": 0.11001296176279975, "grad_norm": 1.4028851985931396, "learning_rate": 4.999421383834027e-06, "loss": 0.1758, "step": 679 }, { "epoch": 0.1101749837977965, "grad_norm": 1.395218014717102, "learning_rate": 4.999411937565949e-06, "loss": 0.1711, "step": 680 }, { "epoch": 0.11033700583279327, "grad_norm": 1.4894384145736694, "learning_rate": 4.999402414821915e-06, "loss": 0.197, "step": 681 }, { "epoch": 0.11049902786779002, "grad_norm": 1.489985466003418, "learning_rate": 4.999392815602214e-06, "loss": 0.1664, "step": 682 }, { "epoch": 0.11066104990278677, "grad_norm": 1.4633452892303467, "learning_rate": 4.9993831399071425e-06, "loss": 0.1793, "step": 683 }, { "epoch": 0.11082307193778354, "grad_norm": 1.4508930444717407, "learning_rate": 4.999373387736996e-06, "loss": 0.1914, "step": 684 }, { "epoch": 0.1109850939727803, "grad_norm": 1.3373337984085083, "learning_rate": 4.999363559092071e-06, "loss": 0.1516, "step": 685 }, { "epoch": 0.11114711600777706, "grad_norm": 1.4252420663833618, "learning_rate": 4.999353653972669e-06, "loss": 0.1849, "step": 686 }, { "epoch": 0.11130913804277381, "grad_norm": 1.383408546447754, "learning_rate": 4.999343672379095e-06, "loss": 0.162, "step": 687 }, { "epoch": 0.11147116007777058, "grad_norm": 1.477158546447754, "learning_rate": 4.999333614311652e-06, "loss": 0.1832, "step": 688 }, { "epoch": 0.11163318211276733, "grad_norm": 1.374411940574646, "learning_rate": 4.999323479770649e-06, "loss": 0.1662, "step": 689 }, { "epoch": 0.1117952041477641, "grad_norm": 1.3238674402236938, "learning_rate": 4.999313268756396e-06, "loss": 0.156, "step": 690 }, { "epoch": 0.11195722618276086, "grad_norm": 1.4178396463394165, "learning_rate": 4.999302981269204e-06, "loss": 0.165, "step": 691 }, { "epoch": 0.11211924821775761, "grad_norm": 1.5902644395828247, "learning_rate": 4.99929261730939e-06, "loss": 0.1938, "step": 692 }, { "epoch": 0.11228127025275438, "grad_norm": 1.420102596282959, "learning_rate": 4.999282176877271e-06, "loss": 0.1818, "step": 693 }, { "epoch": 0.11244329228775113, "grad_norm": 1.4864150285720825, "learning_rate": 4.999271659973164e-06, "loss": 0.1992, "step": 694 }, { "epoch": 0.1126053143227479, "grad_norm": 1.5267049074172974, "learning_rate": 4.999261066597393e-06, "loss": 0.1995, "step": 695 }, { "epoch": 0.11276733635774465, "grad_norm": 1.4048455953598022, "learning_rate": 4.999250396750281e-06, "loss": 0.1719, "step": 696 }, { "epoch": 0.11292935839274142, "grad_norm": 1.4921525716781616, "learning_rate": 4.999239650432155e-06, "loss": 0.191, "step": 697 }, { "epoch": 0.11309138042773817, "grad_norm": 1.41100013256073, "learning_rate": 4.999228827643344e-06, "loss": 0.1837, "step": 698 }, { "epoch": 0.11325340246273494, "grad_norm": 1.3732647895812988, "learning_rate": 4.999217928384179e-06, "loss": 0.1936, "step": 699 }, { "epoch": 0.11341542449773169, "grad_norm": 1.556138277053833, "learning_rate": 4.999206952654993e-06, "loss": 0.1966, "step": 700 }, { "epoch": 0.11357744653272846, "grad_norm": 1.5490831136703491, "learning_rate": 4.9991959004561225e-06, "loss": 0.1914, "step": 701 }, { "epoch": 0.11373946856772521, "grad_norm": 1.3613611459732056, "learning_rate": 4.999184771787905e-06, "loss": 0.1692, "step": 702 }, { "epoch": 0.11390149060272196, "grad_norm": 1.4281145334243774, "learning_rate": 4.999173566650682e-06, "loss": 0.1671, "step": 703 }, { "epoch": 0.11406351263771873, "grad_norm": 1.629227638244629, "learning_rate": 4.999162285044795e-06, "loss": 0.1739, "step": 704 }, { "epoch": 0.11422553467271548, "grad_norm": 1.5268644094467163, "learning_rate": 4.999150926970591e-06, "loss": 0.1809, "step": 705 }, { "epoch": 0.11438755670771225, "grad_norm": 1.445254921913147, "learning_rate": 4.9991394924284155e-06, "loss": 0.1684, "step": 706 }, { "epoch": 0.114549578742709, "grad_norm": 1.533254861831665, "learning_rate": 4.99912798141862e-06, "loss": 0.1923, "step": 707 }, { "epoch": 0.11471160077770577, "grad_norm": 1.3670967817306519, "learning_rate": 4.999116393941556e-06, "loss": 0.1548, "step": 708 }, { "epoch": 0.11487362281270252, "grad_norm": 1.3976548910140991, "learning_rate": 4.999104729997577e-06, "loss": 0.1821, "step": 709 }, { "epoch": 0.11503564484769929, "grad_norm": 1.4151368141174316, "learning_rate": 4.999092989587042e-06, "loss": 0.188, "step": 710 }, { "epoch": 0.11519766688269605, "grad_norm": 1.2826131582260132, "learning_rate": 4.999081172710309e-06, "loss": 0.152, "step": 711 }, { "epoch": 0.11535968891769281, "grad_norm": 1.463115930557251, "learning_rate": 4.9990692793677395e-06, "loss": 0.1712, "step": 712 }, { "epoch": 0.11552171095268957, "grad_norm": 1.4324864149093628, "learning_rate": 4.999057309559698e-06, "loss": 0.1749, "step": 713 }, { "epoch": 0.11568373298768632, "grad_norm": 1.2447808980941772, "learning_rate": 4.999045263286551e-06, "loss": 0.1539, "step": 714 }, { "epoch": 0.11584575502268309, "grad_norm": 1.3643749952316284, "learning_rate": 4.999033140548666e-06, "loss": 0.165, "step": 715 }, { "epoch": 0.11600777705767984, "grad_norm": 1.491412878036499, "learning_rate": 4.999020941346414e-06, "loss": 0.1752, "step": 716 }, { "epoch": 0.1161697990926766, "grad_norm": 1.363407850265503, "learning_rate": 4.999008665680169e-06, "loss": 0.1652, "step": 717 }, { "epoch": 0.11633182112767336, "grad_norm": 1.3879029750823975, "learning_rate": 4.998996313550306e-06, "loss": 0.1635, "step": 718 }, { "epoch": 0.11649384316267013, "grad_norm": 1.5233573913574219, "learning_rate": 4.9989838849572035e-06, "loss": 0.175, "step": 719 }, { "epoch": 0.11665586519766688, "grad_norm": 1.3929238319396973, "learning_rate": 4.998971379901242e-06, "loss": 0.1714, "step": 720 }, { "epoch": 0.11681788723266365, "grad_norm": 1.4243627786636353, "learning_rate": 4.9989587983828036e-06, "loss": 0.1931, "step": 721 }, { "epoch": 0.1169799092676604, "grad_norm": 1.6758102178573608, "learning_rate": 4.998946140402273e-06, "loss": 0.1698, "step": 722 }, { "epoch": 0.11714193130265717, "grad_norm": 1.4985467195510864, "learning_rate": 4.998933405960038e-06, "loss": 0.194, "step": 723 }, { "epoch": 0.11730395333765392, "grad_norm": 1.500422716140747, "learning_rate": 4.998920595056488e-06, "loss": 0.1828, "step": 724 }, { "epoch": 0.11746597537265067, "grad_norm": 1.4340198040008545, "learning_rate": 4.998907707692015e-06, "loss": 0.1903, "step": 725 }, { "epoch": 0.11762799740764744, "grad_norm": 1.3363415002822876, "learning_rate": 4.998894743867013e-06, "loss": 0.1711, "step": 726 }, { "epoch": 0.1177900194426442, "grad_norm": 1.4468626976013184, "learning_rate": 4.998881703581879e-06, "loss": 0.17, "step": 727 }, { "epoch": 0.11795204147764096, "grad_norm": 1.3693926334381104, "learning_rate": 4.998868586837013e-06, "loss": 0.183, "step": 728 }, { "epoch": 0.11811406351263772, "grad_norm": 1.2546919584274292, "learning_rate": 4.998855393632815e-06, "loss": 0.1606, "step": 729 }, { "epoch": 0.11827608554763448, "grad_norm": 1.3889665603637695, "learning_rate": 4.998842123969689e-06, "loss": 0.1593, "step": 730 }, { "epoch": 0.11843810758263124, "grad_norm": 1.3932158946990967, "learning_rate": 4.998828777848041e-06, "loss": 0.1779, "step": 731 }, { "epoch": 0.118600129617628, "grad_norm": 1.3757567405700684, "learning_rate": 4.998815355268279e-06, "loss": 0.1707, "step": 732 }, { "epoch": 0.11876215165262476, "grad_norm": 1.5059839487075806, "learning_rate": 4.998801856230815e-06, "loss": 0.1737, "step": 733 }, { "epoch": 0.11892417368762152, "grad_norm": 1.3769277334213257, "learning_rate": 4.998788280736061e-06, "loss": 0.1745, "step": 734 }, { "epoch": 0.11908619572261828, "grad_norm": 1.4347563982009888, "learning_rate": 4.998774628784432e-06, "loss": 0.1948, "step": 735 }, { "epoch": 0.11924821775761503, "grad_norm": 1.40513277053833, "learning_rate": 4.998760900376347e-06, "loss": 0.1795, "step": 736 }, { "epoch": 0.1194102397926118, "grad_norm": 1.2432270050048828, "learning_rate": 4.998747095512225e-06, "loss": 0.1416, "step": 737 }, { "epoch": 0.11957226182760855, "grad_norm": 1.4356383085250854, "learning_rate": 4.99873321419249e-06, "loss": 0.1925, "step": 738 }, { "epoch": 0.11973428386260532, "grad_norm": 1.1989301443099976, "learning_rate": 4.998719256417563e-06, "loss": 0.1551, "step": 739 }, { "epoch": 0.11989630589760207, "grad_norm": 1.339026689529419, "learning_rate": 4.998705222187875e-06, "loss": 0.1952, "step": 740 }, { "epoch": 0.12005832793259884, "grad_norm": 1.408982276916504, "learning_rate": 4.998691111503854e-06, "loss": 0.1869, "step": 741 }, { "epoch": 0.12022034996759559, "grad_norm": 1.28006112575531, "learning_rate": 4.998676924365931e-06, "loss": 0.1713, "step": 742 }, { "epoch": 0.12038237200259236, "grad_norm": 1.423261046409607, "learning_rate": 4.998662660774541e-06, "loss": 0.1713, "step": 743 }, { "epoch": 0.12054439403758911, "grad_norm": 1.3350955247879028, "learning_rate": 4.998648320730121e-06, "loss": 0.156, "step": 744 }, { "epoch": 0.12070641607258586, "grad_norm": 1.3964903354644775, "learning_rate": 4.998633904233108e-06, "loss": 0.1827, "step": 745 }, { "epoch": 0.12086843810758263, "grad_norm": 1.4213593006134033, "learning_rate": 4.998619411283945e-06, "loss": 0.1742, "step": 746 }, { "epoch": 0.12103046014257939, "grad_norm": 1.2968708276748657, "learning_rate": 4.998604841883073e-06, "loss": 0.1623, "step": 747 }, { "epoch": 0.12119248217757615, "grad_norm": 1.5439331531524658, "learning_rate": 4.998590196030942e-06, "loss": 0.2048, "step": 748 }, { "epoch": 0.1213545042125729, "grad_norm": 1.4168784618377686, "learning_rate": 4.998575473727995e-06, "loss": 0.1814, "step": 749 }, { "epoch": 0.12151652624756967, "grad_norm": 1.568098545074463, "learning_rate": 4.998560674974686e-06, "loss": 0.184, "step": 750 }, { "epoch": 0.12167854828256643, "grad_norm": 1.4985617399215698, "learning_rate": 4.998545799771466e-06, "loss": 0.1938, "step": 751 }, { "epoch": 0.1218405703175632, "grad_norm": 1.5311156511306763, "learning_rate": 4.998530848118792e-06, "loss": 0.1802, "step": 752 }, { "epoch": 0.12200259235255995, "grad_norm": 1.499812126159668, "learning_rate": 4.99851582001712e-06, "loss": 0.1726, "step": 753 }, { "epoch": 0.12216461438755671, "grad_norm": 1.4239556789398193, "learning_rate": 4.99850071546691e-06, "loss": 0.2014, "step": 754 }, { "epoch": 0.12232663642255347, "grad_norm": 1.3375072479248047, "learning_rate": 4.998485534468624e-06, "loss": 0.176, "step": 755 }, { "epoch": 0.12248865845755022, "grad_norm": 1.356996774673462, "learning_rate": 4.998470277022728e-06, "loss": 0.1734, "step": 756 }, { "epoch": 0.12265068049254699, "grad_norm": 1.4485342502593994, "learning_rate": 4.998454943129687e-06, "loss": 0.1824, "step": 757 }, { "epoch": 0.12281270252754374, "grad_norm": 1.4348751306533813, "learning_rate": 4.99843953278997e-06, "loss": 0.1857, "step": 758 }, { "epoch": 0.12297472456254051, "grad_norm": 1.4200366735458374, "learning_rate": 4.998424046004051e-06, "loss": 0.1772, "step": 759 }, { "epoch": 0.12313674659753726, "grad_norm": 1.4113013744354248, "learning_rate": 4.998408482772401e-06, "loss": 0.1823, "step": 760 }, { "epoch": 0.12329876863253403, "grad_norm": 1.5233389139175415, "learning_rate": 4.9983928430954986e-06, "loss": 0.1756, "step": 761 }, { "epoch": 0.12346079066753078, "grad_norm": 1.6090972423553467, "learning_rate": 4.99837712697382e-06, "loss": 0.1848, "step": 762 }, { "epoch": 0.12362281270252755, "grad_norm": 1.318171501159668, "learning_rate": 4.998361334407849e-06, "loss": 0.1513, "step": 763 }, { "epoch": 0.1237848347375243, "grad_norm": 1.4973655939102173, "learning_rate": 4.998345465398066e-06, "loss": 0.1755, "step": 764 }, { "epoch": 0.12394685677252107, "grad_norm": 1.4733107089996338, "learning_rate": 4.998329519944957e-06, "loss": 0.1767, "step": 765 }, { "epoch": 0.12410887880751782, "grad_norm": 1.373856544494629, "learning_rate": 4.998313498049011e-06, "loss": 0.1895, "step": 766 }, { "epoch": 0.12427090084251458, "grad_norm": 1.2825859785079956, "learning_rate": 4.998297399710718e-06, "loss": 0.1642, "step": 767 }, { "epoch": 0.12443292287751134, "grad_norm": 1.416802167892456, "learning_rate": 4.9982812249305704e-06, "loss": 0.1892, "step": 768 }, { "epoch": 0.1245949449125081, "grad_norm": 1.3358246088027954, "learning_rate": 4.998264973709063e-06, "loss": 0.1655, "step": 769 }, { "epoch": 0.12475696694750486, "grad_norm": 1.3493046760559082, "learning_rate": 4.998248646046693e-06, "loss": 0.1602, "step": 770 }, { "epoch": 0.12491898898250162, "grad_norm": 1.3179484605789185, "learning_rate": 4.99823224194396e-06, "loss": 0.1788, "step": 771 }, { "epoch": 0.12508101101749838, "grad_norm": 1.3762329816818237, "learning_rate": 4.998215761401366e-06, "loss": 0.1727, "step": 772 }, { "epoch": 0.12524303305249515, "grad_norm": 1.5525070428848267, "learning_rate": 4.998199204419415e-06, "loss": 0.1876, "step": 773 }, { "epoch": 0.1254050550874919, "grad_norm": 1.4245859384536743, "learning_rate": 4.9981825709986145e-06, "loss": 0.1737, "step": 774 }, { "epoch": 0.12556707712248866, "grad_norm": 1.5142040252685547, "learning_rate": 4.998165861139472e-06, "loss": 0.1775, "step": 775 }, { "epoch": 0.12572909915748542, "grad_norm": 1.4016090631484985, "learning_rate": 4.9981490748425e-06, "loss": 0.1676, "step": 776 }, { "epoch": 0.12589112119248216, "grad_norm": 1.5504176616668701, "learning_rate": 4.998132212108212e-06, "loss": 0.1861, "step": 777 }, { "epoch": 0.12605314322747893, "grad_norm": 1.3723235130310059, "learning_rate": 4.998115272937123e-06, "loss": 0.1657, "step": 778 }, { "epoch": 0.1262151652624757, "grad_norm": 1.4091416597366333, "learning_rate": 4.998098257329753e-06, "loss": 0.1698, "step": 779 }, { "epoch": 0.12637718729747247, "grad_norm": 1.3922086954116821, "learning_rate": 4.998081165286621e-06, "loss": 0.1828, "step": 780 }, { "epoch": 0.1265392093324692, "grad_norm": 1.3854937553405762, "learning_rate": 4.998063996808251e-06, "loss": 0.1805, "step": 781 }, { "epoch": 0.12670123136746597, "grad_norm": 1.359626293182373, "learning_rate": 4.9980467518951666e-06, "loss": 0.1816, "step": 782 }, { "epoch": 0.12686325340246274, "grad_norm": 1.2333866357803345, "learning_rate": 4.998029430547898e-06, "loss": 0.1541, "step": 783 }, { "epoch": 0.1270252754374595, "grad_norm": 1.4757193326950073, "learning_rate": 4.998012032766974e-06, "loss": 0.1925, "step": 784 }, { "epoch": 0.12718729747245625, "grad_norm": 1.434784173965454, "learning_rate": 4.997994558552926e-06, "loss": 0.1815, "step": 785 }, { "epoch": 0.127349319507453, "grad_norm": 1.4703211784362793, "learning_rate": 4.997977007906291e-06, "loss": 0.1768, "step": 786 }, { "epoch": 0.12751134154244978, "grad_norm": 1.414862871170044, "learning_rate": 4.997959380827603e-06, "loss": 0.1674, "step": 787 }, { "epoch": 0.12767336357744652, "grad_norm": 1.548573613166809, "learning_rate": 4.997941677317403e-06, "loss": 0.197, "step": 788 }, { "epoch": 0.1278353856124433, "grad_norm": 1.4064699411392212, "learning_rate": 4.997923897376233e-06, "loss": 0.1857, "step": 789 }, { "epoch": 0.12799740764744005, "grad_norm": 1.2528257369995117, "learning_rate": 4.997906041004637e-06, "loss": 0.1574, "step": 790 }, { "epoch": 0.12815942968243682, "grad_norm": 1.3743071556091309, "learning_rate": 4.9978881082031605e-06, "loss": 0.1833, "step": 791 }, { "epoch": 0.12832145171743356, "grad_norm": 1.437005639076233, "learning_rate": 4.997870098972353e-06, "loss": 0.1712, "step": 792 }, { "epoch": 0.12848347375243033, "grad_norm": 1.3489969968795776, "learning_rate": 4.997852013312765e-06, "loss": 0.1772, "step": 793 }, { "epoch": 0.1286454957874271, "grad_norm": 1.6645337343215942, "learning_rate": 4.99783385122495e-06, "loss": 0.1913, "step": 794 }, { "epoch": 0.12880751782242386, "grad_norm": 1.3913012742996216, "learning_rate": 4.997815612709463e-06, "loss": 0.1605, "step": 795 }, { "epoch": 0.1289695398574206, "grad_norm": 1.3065690994262695, "learning_rate": 4.997797297766864e-06, "loss": 0.1604, "step": 796 }, { "epoch": 0.12913156189241737, "grad_norm": 1.439260721206665, "learning_rate": 4.997778906397713e-06, "loss": 0.1689, "step": 797 }, { "epoch": 0.12929358392741414, "grad_norm": 1.3989150524139404, "learning_rate": 4.9977604386025704e-06, "loss": 0.1724, "step": 798 }, { "epoch": 0.12945560596241087, "grad_norm": 1.5063525438308716, "learning_rate": 4.9977418943820036e-06, "loss": 0.1865, "step": 799 }, { "epoch": 0.12961762799740764, "grad_norm": 1.3701797723770142, "learning_rate": 4.997723273736579e-06, "loss": 0.1755, "step": 800 }, { "epoch": 0.1297796500324044, "grad_norm": 2.1302490234375, "learning_rate": 4.997704576666867e-06, "loss": 0.1756, "step": 801 }, { "epoch": 0.12994167206740118, "grad_norm": 1.360653042793274, "learning_rate": 4.9976858031734375e-06, "loss": 0.1637, "step": 802 }, { "epoch": 0.13010369410239792, "grad_norm": 1.3926098346710205, "learning_rate": 4.997666953256869e-06, "loss": 0.1664, "step": 803 }, { "epoch": 0.13026571613739468, "grad_norm": 1.2852853536605835, "learning_rate": 4.9976480269177345e-06, "loss": 0.1714, "step": 804 }, { "epoch": 0.13042773817239145, "grad_norm": 1.3100239038467407, "learning_rate": 4.997629024156615e-06, "loss": 0.167, "step": 805 }, { "epoch": 0.13058976020738822, "grad_norm": 1.3834192752838135, "learning_rate": 4.997609944974092e-06, "loss": 0.1626, "step": 806 }, { "epoch": 0.13075178224238496, "grad_norm": 1.1783868074417114, "learning_rate": 4.997590789370749e-06, "loss": 0.1597, "step": 807 }, { "epoch": 0.13091380427738172, "grad_norm": 1.45823073387146, "learning_rate": 4.99757155734717e-06, "loss": 0.1965, "step": 808 }, { "epoch": 0.1310758263123785, "grad_norm": 1.4000074863433838, "learning_rate": 4.9975522489039474e-06, "loss": 0.1707, "step": 809 }, { "epoch": 0.13123784834737523, "grad_norm": 1.4006067514419556, "learning_rate": 4.997532864041669e-06, "loss": 0.1683, "step": 810 }, { "epoch": 0.131399870382372, "grad_norm": 1.3444918394088745, "learning_rate": 4.99751340276093e-06, "loss": 0.1902, "step": 811 }, { "epoch": 0.13156189241736876, "grad_norm": 1.2597421407699585, "learning_rate": 4.997493865062323e-06, "loss": 0.1666, "step": 812 }, { "epoch": 0.13172391445236553, "grad_norm": 1.7309461832046509, "learning_rate": 4.997474250946448e-06, "loss": 0.1891, "step": 813 }, { "epoch": 0.13188593648736227, "grad_norm": 1.5406413078308105, "learning_rate": 4.9974545604139055e-06, "loss": 0.1793, "step": 814 }, { "epoch": 0.13204795852235904, "grad_norm": 1.402835726737976, "learning_rate": 4.9974347934652965e-06, "loss": 0.1846, "step": 815 }, { "epoch": 0.1322099805573558, "grad_norm": 1.3662683963775635, "learning_rate": 4.997414950101227e-06, "loss": 0.1669, "step": 816 }, { "epoch": 0.13237200259235257, "grad_norm": 1.3234673738479614, "learning_rate": 4.9973950303223026e-06, "loss": 0.1713, "step": 817 }, { "epoch": 0.1325340246273493, "grad_norm": 1.6133899688720703, "learning_rate": 4.997375034129135e-06, "loss": 0.1928, "step": 818 }, { "epoch": 0.13269604666234608, "grad_norm": 1.3357648849487305, "learning_rate": 4.997354961522335e-06, "loss": 0.1872, "step": 819 }, { "epoch": 0.13285806869734285, "grad_norm": 1.4020048379898071, "learning_rate": 4.997334812502516e-06, "loss": 0.1831, "step": 820 }, { "epoch": 0.13302009073233959, "grad_norm": 1.350357174873352, "learning_rate": 4.997314587070295e-06, "loss": 0.166, "step": 821 }, { "epoch": 0.13318211276733635, "grad_norm": 1.5430233478546143, "learning_rate": 4.9972942852262915e-06, "loss": 0.1769, "step": 822 }, { "epoch": 0.13334413480233312, "grad_norm": 1.4370311498641968, "learning_rate": 4.997273906971126e-06, "loss": 0.1876, "step": 823 }, { "epoch": 0.1335061568373299, "grad_norm": 1.2412606477737427, "learning_rate": 4.997253452305423e-06, "loss": 0.1494, "step": 824 }, { "epoch": 0.13366817887232663, "grad_norm": 1.3403682708740234, "learning_rate": 4.9972329212298065e-06, "loss": 0.18, "step": 825 }, { "epoch": 0.1338302009073234, "grad_norm": 1.2844048738479614, "learning_rate": 4.9972123137449065e-06, "loss": 0.1618, "step": 826 }, { "epoch": 0.13399222294232016, "grad_norm": 1.5786724090576172, "learning_rate": 4.997191629851352e-06, "loss": 0.1946, "step": 827 }, { "epoch": 0.13415424497731693, "grad_norm": 1.384547472000122, "learning_rate": 4.997170869549778e-06, "loss": 0.1725, "step": 828 }, { "epoch": 0.13431626701231367, "grad_norm": 1.4201945066452026, "learning_rate": 4.997150032840818e-06, "loss": 0.1766, "step": 829 }, { "epoch": 0.13447828904731043, "grad_norm": 1.369523048400879, "learning_rate": 4.99712911972511e-06, "loss": 0.1735, "step": 830 }, { "epoch": 0.1346403110823072, "grad_norm": 1.2436349391937256, "learning_rate": 4.997108130203293e-06, "loss": 0.1599, "step": 831 }, { "epoch": 0.13480233311730394, "grad_norm": 1.3385329246520996, "learning_rate": 4.99708706427601e-06, "loss": 0.1676, "step": 832 }, { "epoch": 0.1349643551523007, "grad_norm": 1.412265419960022, "learning_rate": 4.997065921943907e-06, "loss": 0.1745, "step": 833 }, { "epoch": 0.13512637718729748, "grad_norm": 1.3985753059387207, "learning_rate": 4.997044703207629e-06, "loss": 0.154, "step": 834 }, { "epoch": 0.13528839922229424, "grad_norm": 1.3470317125320435, "learning_rate": 4.9970234080678244e-06, "loss": 0.1806, "step": 835 }, { "epoch": 0.13545042125729098, "grad_norm": 1.3285126686096191, "learning_rate": 4.9970020365251485e-06, "loss": 0.179, "step": 836 }, { "epoch": 0.13561244329228775, "grad_norm": 1.4892990589141846, "learning_rate": 4.9969805885802515e-06, "loss": 0.1902, "step": 837 }, { "epoch": 0.13577446532728452, "grad_norm": 1.373530626296997, "learning_rate": 4.996959064233792e-06, "loss": 0.1539, "step": 838 }, { "epoch": 0.13593648736228128, "grad_norm": 1.3297441005706787, "learning_rate": 4.996937463486427e-06, "loss": 0.1704, "step": 839 }, { "epoch": 0.13609850939727802, "grad_norm": 1.343121886253357, "learning_rate": 4.996915786338818e-06, "loss": 0.1592, "step": 840 }, { "epoch": 0.1362605314322748, "grad_norm": 1.4662681818008423, "learning_rate": 4.9968940327916284e-06, "loss": 0.1863, "step": 841 }, { "epoch": 0.13642255346727156, "grad_norm": 1.4304791688919067, "learning_rate": 4.9968722028455245e-06, "loss": 0.1775, "step": 842 }, { "epoch": 0.1365845755022683, "grad_norm": 1.3115490674972534, "learning_rate": 4.996850296501172e-06, "loss": 0.163, "step": 843 }, { "epoch": 0.13674659753726506, "grad_norm": 1.5014792680740356, "learning_rate": 4.996828313759245e-06, "loss": 0.1856, "step": 844 }, { "epoch": 0.13690861957226183, "grad_norm": 1.2926596403121948, "learning_rate": 4.996806254620411e-06, "loss": 0.1664, "step": 845 }, { "epoch": 0.1370706416072586, "grad_norm": 1.501706600189209, "learning_rate": 4.99678411908535e-06, "loss": 0.1922, "step": 846 }, { "epoch": 0.13723266364225534, "grad_norm": 1.2282168865203857, "learning_rate": 4.996761907154736e-06, "loss": 0.1679, "step": 847 }, { "epoch": 0.1373946856772521, "grad_norm": 1.315477967262268, "learning_rate": 4.996739618829251e-06, "loss": 0.1789, "step": 848 }, { "epoch": 0.13755670771224887, "grad_norm": 1.2631007432937622, "learning_rate": 4.996717254109574e-06, "loss": 0.1654, "step": 849 }, { "epoch": 0.13771872974724564, "grad_norm": 1.4962035417556763, "learning_rate": 4.996694812996391e-06, "loss": 0.2064, "step": 850 }, { "epoch": 0.13788075178224238, "grad_norm": 1.2344715595245361, "learning_rate": 4.99667229549039e-06, "loss": 0.163, "step": 851 }, { "epoch": 0.13804277381723914, "grad_norm": 1.3445398807525635, "learning_rate": 4.996649701592258e-06, "loss": 0.1601, "step": 852 }, { "epoch": 0.1382047958522359, "grad_norm": 1.3226261138916016, "learning_rate": 4.996627031302686e-06, "loss": 0.1853, "step": 853 }, { "epoch": 0.13836681788723265, "grad_norm": 1.4674776792526245, "learning_rate": 4.99660428462237e-06, "loss": 0.2106, "step": 854 }, { "epoch": 0.13852883992222942, "grad_norm": 1.2431912422180176, "learning_rate": 4.996581461552003e-06, "loss": 0.1622, "step": 855 }, { "epoch": 0.13869086195722619, "grad_norm": 1.3208400011062622, "learning_rate": 4.996558562092286e-06, "loss": 0.1614, "step": 856 }, { "epoch": 0.13885288399222295, "grad_norm": 1.3402066230773926, "learning_rate": 4.996535586243918e-06, "loss": 0.1581, "step": 857 }, { "epoch": 0.1390149060272197, "grad_norm": 1.3583393096923828, "learning_rate": 4.996512534007602e-06, "loss": 0.1659, "step": 858 }, { "epoch": 0.13917692806221646, "grad_norm": 1.4143537282943726, "learning_rate": 4.9964894053840455e-06, "loss": 0.1879, "step": 859 }, { "epoch": 0.13933895009721323, "grad_norm": 1.3818471431732178, "learning_rate": 4.996466200373954e-06, "loss": 0.182, "step": 860 }, { "epoch": 0.13950097213221, "grad_norm": 1.3754279613494873, "learning_rate": 4.996442918978038e-06, "loss": 0.1816, "step": 861 }, { "epoch": 0.13966299416720673, "grad_norm": 1.2652978897094727, "learning_rate": 4.99641956119701e-06, "loss": 0.1495, "step": 862 }, { "epoch": 0.1398250162022035, "grad_norm": 1.4501652717590332, "learning_rate": 4.996396127031584e-06, "loss": 0.1854, "step": 863 }, { "epoch": 0.13998703823720027, "grad_norm": 1.2763866186141968, "learning_rate": 4.996372616482478e-06, "loss": 0.1654, "step": 864 }, { "epoch": 0.140149060272197, "grad_norm": 1.335204839706421, "learning_rate": 4.996349029550411e-06, "loss": 0.1533, "step": 865 }, { "epoch": 0.14031108230719377, "grad_norm": 1.3689930438995361, "learning_rate": 4.996325366236105e-06, "loss": 0.1826, "step": 866 }, { "epoch": 0.14047310434219054, "grad_norm": 1.2703019380569458, "learning_rate": 4.996301626540284e-06, "loss": 0.1686, "step": 867 }, { "epoch": 0.1406351263771873, "grad_norm": 1.4817618131637573, "learning_rate": 4.996277810463675e-06, "loss": 0.2053, "step": 868 }, { "epoch": 0.14079714841218405, "grad_norm": 1.4522895812988281, "learning_rate": 4.996253918007004e-06, "loss": 0.1929, "step": 869 }, { "epoch": 0.14095917044718081, "grad_norm": 1.3986607789993286, "learning_rate": 4.996229949171004e-06, "loss": 0.1761, "step": 870 }, { "epoch": 0.14112119248217758, "grad_norm": 1.342969298362732, "learning_rate": 4.996205903956409e-06, "loss": 0.177, "step": 871 }, { "epoch": 0.14128321451717435, "grad_norm": 1.3436371088027954, "learning_rate": 4.996181782363955e-06, "loss": 0.1776, "step": 872 }, { "epoch": 0.1414452365521711, "grad_norm": 1.4952949285507202, "learning_rate": 4.996157584394378e-06, "loss": 0.1983, "step": 873 }, { "epoch": 0.14160725858716786, "grad_norm": 1.2056357860565186, "learning_rate": 4.99613331004842e-06, "loss": 0.1559, "step": 874 }, { "epoch": 0.14176928062216462, "grad_norm": 1.2493481636047363, "learning_rate": 4.996108959326823e-06, "loss": 0.1677, "step": 875 }, { "epoch": 0.14193130265716136, "grad_norm": 1.348042607307434, "learning_rate": 4.996084532230332e-06, "loss": 0.1691, "step": 876 }, { "epoch": 0.14209332469215813, "grad_norm": 1.3877140283584595, "learning_rate": 4.996060028759695e-06, "loss": 0.1829, "step": 877 }, { "epoch": 0.1422553467271549, "grad_norm": 1.3358656167984009, "learning_rate": 4.996035448915661e-06, "loss": 0.1798, "step": 878 }, { "epoch": 0.14241736876215166, "grad_norm": 1.5458526611328125, "learning_rate": 4.996010792698983e-06, "loss": 0.2146, "step": 879 }, { "epoch": 0.1425793907971484, "grad_norm": 1.238255500793457, "learning_rate": 4.995986060110415e-06, "loss": 0.1554, "step": 880 }, { "epoch": 0.14274141283214517, "grad_norm": 1.332350730895996, "learning_rate": 4.995961251150714e-06, "loss": 0.1563, "step": 881 }, { "epoch": 0.14290343486714194, "grad_norm": 1.4522160291671753, "learning_rate": 4.995936365820638e-06, "loss": 0.194, "step": 882 }, { "epoch": 0.1430654569021387, "grad_norm": 1.2477561235427856, "learning_rate": 4.99591140412095e-06, "loss": 0.1582, "step": 883 }, { "epoch": 0.14322747893713544, "grad_norm": 1.5550086498260498, "learning_rate": 4.9958863660524125e-06, "loss": 0.1858, "step": 884 }, { "epoch": 0.1433895009721322, "grad_norm": 1.1642705202102661, "learning_rate": 4.995861251615792e-06, "loss": 0.1353, "step": 885 }, { "epoch": 0.14355152300712898, "grad_norm": 1.1860448122024536, "learning_rate": 4.995836060811859e-06, "loss": 0.1516, "step": 886 }, { "epoch": 0.14371354504212572, "grad_norm": 1.2857884168624878, "learning_rate": 4.99581079364138e-06, "loss": 0.165, "step": 887 }, { "epoch": 0.14387556707712248, "grad_norm": 1.3009867668151855, "learning_rate": 4.995785450105131e-06, "loss": 0.1873, "step": 888 }, { "epoch": 0.14403758911211925, "grad_norm": 1.2586079835891724, "learning_rate": 4.995760030203888e-06, "loss": 0.1622, "step": 889 }, { "epoch": 0.14419961114711602, "grad_norm": 1.3150060176849365, "learning_rate": 4.995734533938427e-06, "loss": 0.1756, "step": 890 }, { "epoch": 0.14436163318211276, "grad_norm": 1.2379282712936401, "learning_rate": 4.995708961309528e-06, "loss": 0.1527, "step": 891 }, { "epoch": 0.14452365521710953, "grad_norm": 1.3145521879196167, "learning_rate": 4.995683312317975e-06, "loss": 0.1756, "step": 892 }, { "epoch": 0.1446856772521063, "grad_norm": 1.420900583267212, "learning_rate": 4.9956575869645515e-06, "loss": 0.1738, "step": 893 }, { "epoch": 0.14484769928710303, "grad_norm": 1.2677338123321533, "learning_rate": 4.995631785250046e-06, "loss": 0.1759, "step": 894 }, { "epoch": 0.1450097213220998, "grad_norm": 1.1973481178283691, "learning_rate": 4.995605907175247e-06, "loss": 0.1592, "step": 895 }, { "epoch": 0.14517174335709657, "grad_norm": 1.342222809791565, "learning_rate": 4.9955799527409465e-06, "loss": 0.1733, "step": 896 }, { "epoch": 0.14533376539209333, "grad_norm": 1.3735847473144531, "learning_rate": 4.995553921947938e-06, "loss": 0.1729, "step": 897 }, { "epoch": 0.14549578742709007, "grad_norm": 1.2551465034484863, "learning_rate": 4.99552781479702e-06, "loss": 0.1819, "step": 898 }, { "epoch": 0.14565780946208684, "grad_norm": 1.5449222326278687, "learning_rate": 4.995501631288989e-06, "loss": 0.1913, "step": 899 }, { "epoch": 0.1458198314970836, "grad_norm": 1.166682243347168, "learning_rate": 4.995475371424648e-06, "loss": 0.1512, "step": 900 }, { "epoch": 0.14598185353208037, "grad_norm": 1.2635172605514526, "learning_rate": 4.995449035204798e-06, "loss": 0.1575, "step": 901 }, { "epoch": 0.14614387556707711, "grad_norm": 1.3577876091003418, "learning_rate": 4.995422622630247e-06, "loss": 0.1916, "step": 902 }, { "epoch": 0.14630589760207388, "grad_norm": 1.3075361251831055, "learning_rate": 4.995396133701803e-06, "loss": 0.1454, "step": 903 }, { "epoch": 0.14646791963707065, "grad_norm": 1.339972972869873, "learning_rate": 4.995369568420276e-06, "loss": 0.1741, "step": 904 }, { "epoch": 0.1466299416720674, "grad_norm": 1.459546685218811, "learning_rate": 4.995342926786478e-06, "loss": 0.1882, "step": 905 }, { "epoch": 0.14679196370706415, "grad_norm": 1.2182546854019165, "learning_rate": 4.995316208801226e-06, "loss": 0.1348, "step": 906 }, { "epoch": 0.14695398574206092, "grad_norm": 1.376412272453308, "learning_rate": 4.995289414465337e-06, "loss": 0.1798, "step": 907 }, { "epoch": 0.1471160077770577, "grad_norm": 1.489270806312561, "learning_rate": 4.99526254377963e-06, "loss": 0.1869, "step": 908 }, { "epoch": 0.14727802981205443, "grad_norm": 1.320319652557373, "learning_rate": 4.9952355967449265e-06, "loss": 0.1671, "step": 909 }, { "epoch": 0.1474400518470512, "grad_norm": 1.327093243598938, "learning_rate": 4.995208573362053e-06, "loss": 0.1765, "step": 910 }, { "epoch": 0.14760207388204796, "grad_norm": 1.3886643648147583, "learning_rate": 4.995181473631836e-06, "loss": 0.1717, "step": 911 }, { "epoch": 0.14776409591704473, "grad_norm": 1.3381421566009521, "learning_rate": 4.995154297555103e-06, "loss": 0.1716, "step": 912 }, { "epoch": 0.14792611795204147, "grad_norm": 1.1352941989898682, "learning_rate": 4.995127045132687e-06, "loss": 0.1547, "step": 913 }, { "epoch": 0.14808813998703824, "grad_norm": 1.4500410556793213, "learning_rate": 4.995099716365421e-06, "loss": 0.1913, "step": 914 }, { "epoch": 0.148250162022035, "grad_norm": 1.28703773021698, "learning_rate": 4.995072311254144e-06, "loss": 0.18, "step": 915 }, { "epoch": 0.14841218405703174, "grad_norm": 1.2766846418380737, "learning_rate": 4.995044829799689e-06, "loss": 0.1769, "step": 916 }, { "epoch": 0.1485742060920285, "grad_norm": 1.2691501379013062, "learning_rate": 4.995017272002902e-06, "loss": 0.1684, "step": 917 }, { "epoch": 0.14873622812702528, "grad_norm": 1.4125738143920898, "learning_rate": 4.994989637864624e-06, "loss": 0.1809, "step": 918 }, { "epoch": 0.14889825016202204, "grad_norm": 1.3057345151901245, "learning_rate": 4.994961927385701e-06, "loss": 0.1608, "step": 919 }, { "epoch": 0.14906027219701878, "grad_norm": 1.4931777715682983, "learning_rate": 4.99493414056698e-06, "loss": 0.1883, "step": 920 }, { "epoch": 0.14922229423201555, "grad_norm": 1.4381946325302124, "learning_rate": 4.994906277409313e-06, "loss": 0.1773, "step": 921 }, { "epoch": 0.14938431626701232, "grad_norm": 1.3123935461044312, "learning_rate": 4.99487833791355e-06, "loss": 0.159, "step": 922 }, { "epoch": 0.14954633830200909, "grad_norm": 1.396132469177246, "learning_rate": 4.994850322080549e-06, "loss": 0.1704, "step": 923 }, { "epoch": 0.14970836033700582, "grad_norm": 1.267357349395752, "learning_rate": 4.9948222299111644e-06, "loss": 0.1605, "step": 924 }, { "epoch": 0.1498703823720026, "grad_norm": 1.2908340692520142, "learning_rate": 4.994794061406258e-06, "loss": 0.1625, "step": 925 }, { "epoch": 0.15003240440699936, "grad_norm": 1.3178255558013916, "learning_rate": 4.994765816566689e-06, "loss": 0.176, "step": 926 }, { "epoch": 0.1501944264419961, "grad_norm": 1.2020256519317627, "learning_rate": 4.994737495393325e-06, "loss": 0.1565, "step": 927 }, { "epoch": 0.15035644847699287, "grad_norm": 1.346628189086914, "learning_rate": 4.994709097887029e-06, "loss": 0.1868, "step": 928 }, { "epoch": 0.15051847051198963, "grad_norm": 1.440252661705017, "learning_rate": 4.994680624048674e-06, "loss": 0.184, "step": 929 }, { "epoch": 0.1506804925469864, "grad_norm": 1.4660731554031372, "learning_rate": 4.994652073879127e-06, "loss": 0.192, "step": 930 }, { "epoch": 0.15084251458198314, "grad_norm": 1.4855326414108276, "learning_rate": 4.9946234473792645e-06, "loss": 0.1839, "step": 931 }, { "epoch": 0.1510045366169799, "grad_norm": 1.2965097427368164, "learning_rate": 4.994594744549961e-06, "loss": 0.1671, "step": 932 }, { "epoch": 0.15116655865197667, "grad_norm": 1.3417192697525024, "learning_rate": 4.994565965392094e-06, "loss": 0.1841, "step": 933 }, { "epoch": 0.15132858068697344, "grad_norm": 1.2108187675476074, "learning_rate": 4.994537109906546e-06, "loss": 0.1594, "step": 934 }, { "epoch": 0.15149060272197018, "grad_norm": 1.4936498403549194, "learning_rate": 4.994508178094199e-06, "loss": 0.1838, "step": 935 }, { "epoch": 0.15165262475696695, "grad_norm": 1.2515209913253784, "learning_rate": 4.99447916995594e-06, "loss": 0.1531, "step": 936 }, { "epoch": 0.15181464679196371, "grad_norm": 1.2355049848556519, "learning_rate": 4.994450085492653e-06, "loss": 0.1466, "step": 937 }, { "epoch": 0.15197666882696045, "grad_norm": 1.317088007926941, "learning_rate": 4.99442092470523e-06, "loss": 0.1851, "step": 938 }, { "epoch": 0.15213869086195722, "grad_norm": 1.262994408607483, "learning_rate": 4.994391687594564e-06, "loss": 0.1656, "step": 939 }, { "epoch": 0.152300712896954, "grad_norm": 1.1988916397094727, "learning_rate": 4.994362374161548e-06, "loss": 0.1457, "step": 940 }, { "epoch": 0.15246273493195076, "grad_norm": 1.2624685764312744, "learning_rate": 4.99433298440708e-06, "loss": 0.1759, "step": 941 }, { "epoch": 0.1526247569669475, "grad_norm": 1.4372855424880981, "learning_rate": 4.994303518332059e-06, "loss": 0.1792, "step": 942 }, { "epoch": 0.15278677900194426, "grad_norm": 1.3524651527404785, "learning_rate": 4.994273975937386e-06, "loss": 0.1956, "step": 943 }, { "epoch": 0.15294880103694103, "grad_norm": 1.2189857959747314, "learning_rate": 4.994244357223965e-06, "loss": 0.1646, "step": 944 }, { "epoch": 0.1531108230719378, "grad_norm": 1.5901044607162476, "learning_rate": 4.994214662192704e-06, "loss": 0.1821, "step": 945 }, { "epoch": 0.15327284510693454, "grad_norm": 1.2014747858047485, "learning_rate": 4.994184890844509e-06, "loss": 0.1585, "step": 946 }, { "epoch": 0.1534348671419313, "grad_norm": 1.2449418306350708, "learning_rate": 4.994155043180292e-06, "loss": 0.1766, "step": 947 }, { "epoch": 0.15359688917692807, "grad_norm": 1.1856237649917603, "learning_rate": 4.9941251192009665e-06, "loss": 0.1561, "step": 948 }, { "epoch": 0.1537589112119248, "grad_norm": 1.3287744522094727, "learning_rate": 4.994095118907449e-06, "loss": 0.18, "step": 949 }, { "epoch": 0.15392093324692158, "grad_norm": 1.4006623029708862, "learning_rate": 4.994065042300655e-06, "loss": 0.1852, "step": 950 }, { "epoch": 0.15408295528191834, "grad_norm": 1.3355299234390259, "learning_rate": 4.994034889381508e-06, "loss": 0.1653, "step": 951 }, { "epoch": 0.1542449773169151, "grad_norm": 1.5296586751937866, "learning_rate": 4.994004660150927e-06, "loss": 0.2171, "step": 952 }, { "epoch": 0.15440699935191185, "grad_norm": 1.447704553604126, "learning_rate": 4.99397435460984e-06, "loss": 0.188, "step": 953 }, { "epoch": 0.15456902138690862, "grad_norm": 1.272879958152771, "learning_rate": 4.993943972759173e-06, "loss": 0.1728, "step": 954 }, { "epoch": 0.15473104342190538, "grad_norm": 1.1389333009719849, "learning_rate": 4.9939135145998554e-06, "loss": 0.1572, "step": 955 }, { "epoch": 0.15489306545690215, "grad_norm": 1.3290883302688599, "learning_rate": 4.993882980132819e-06, "loss": 0.1665, "step": 956 }, { "epoch": 0.1550550874918989, "grad_norm": 1.3863415718078613, "learning_rate": 4.993852369358999e-06, "loss": 0.1757, "step": 957 }, { "epoch": 0.15521710952689566, "grad_norm": 1.211903691291809, "learning_rate": 4.993821682279332e-06, "loss": 0.1507, "step": 958 }, { "epoch": 0.15537913156189243, "grad_norm": 1.2453619241714478, "learning_rate": 4.9937909188947555e-06, "loss": 0.1578, "step": 959 }, { "epoch": 0.15554115359688916, "grad_norm": 1.4014263153076172, "learning_rate": 4.993760079206212e-06, "loss": 0.1949, "step": 960 }, { "epoch": 0.15570317563188593, "grad_norm": 1.3202704191207886, "learning_rate": 4.993729163214644e-06, "loss": 0.1744, "step": 961 }, { "epoch": 0.1558651976668827, "grad_norm": 1.3204492330551147, "learning_rate": 4.993698170920999e-06, "loss": 0.1833, "step": 962 }, { "epoch": 0.15602721970187947, "grad_norm": 1.3719549179077148, "learning_rate": 4.993667102326226e-06, "loss": 0.1733, "step": 963 }, { "epoch": 0.1561892417368762, "grad_norm": 1.2820831537246704, "learning_rate": 4.993635957431273e-06, "loss": 0.18, "step": 964 }, { "epoch": 0.15635126377187297, "grad_norm": 1.2792859077453613, "learning_rate": 4.993604736237094e-06, "loss": 0.1643, "step": 965 }, { "epoch": 0.15651328580686974, "grad_norm": 1.3071776628494263, "learning_rate": 4.993573438744645e-06, "loss": 0.1543, "step": 966 }, { "epoch": 0.1566753078418665, "grad_norm": 1.3182742595672607, "learning_rate": 4.993542064954883e-06, "loss": 0.1719, "step": 967 }, { "epoch": 0.15683732987686325, "grad_norm": 1.3087525367736816, "learning_rate": 4.993510614868767e-06, "loss": 0.1889, "step": 968 }, { "epoch": 0.15699935191186, "grad_norm": 1.2305779457092285, "learning_rate": 4.993479088487262e-06, "loss": 0.1651, "step": 969 }, { "epoch": 0.15716137394685678, "grad_norm": 1.1964472532272339, "learning_rate": 4.99344748581133e-06, "loss": 0.1523, "step": 970 }, { "epoch": 0.15732339598185352, "grad_norm": 1.23350989818573, "learning_rate": 4.993415806841939e-06, "loss": 0.16, "step": 971 }, { "epoch": 0.1574854180168503, "grad_norm": 1.3825608491897583, "learning_rate": 4.993384051580059e-06, "loss": 0.1884, "step": 972 }, { "epoch": 0.15764744005184705, "grad_norm": 1.4116489887237549, "learning_rate": 4.993352220026661e-06, "loss": 0.1763, "step": 973 }, { "epoch": 0.15780946208684382, "grad_norm": 1.418979287147522, "learning_rate": 4.993320312182718e-06, "loss": 0.1982, "step": 974 }, { "epoch": 0.15797148412184056, "grad_norm": 1.3567852973937988, "learning_rate": 4.993288328049208e-06, "loss": 0.1845, "step": 975 }, { "epoch": 0.15813350615683733, "grad_norm": 1.257607340812683, "learning_rate": 4.993256267627108e-06, "loss": 0.1626, "step": 976 }, { "epoch": 0.1582955281918341, "grad_norm": 1.2353687286376953, "learning_rate": 4.993224130917399e-06, "loss": 0.1695, "step": 977 }, { "epoch": 0.15845755022683086, "grad_norm": 1.4237691164016724, "learning_rate": 4.993191917921066e-06, "loss": 0.1841, "step": 978 }, { "epoch": 0.1586195722618276, "grad_norm": 1.2207448482513428, "learning_rate": 4.9931596286390935e-06, "loss": 0.1518, "step": 979 }, { "epoch": 0.15878159429682437, "grad_norm": 1.366822600364685, "learning_rate": 4.9931272630724704e-06, "loss": 0.1751, "step": 980 }, { "epoch": 0.15894361633182114, "grad_norm": 1.3193942308425903, "learning_rate": 4.993094821222186e-06, "loss": 0.1615, "step": 981 }, { "epoch": 0.15910563836681788, "grad_norm": 1.361041784286499, "learning_rate": 4.993062303089233e-06, "loss": 0.1669, "step": 982 }, { "epoch": 0.15926766040181464, "grad_norm": 1.4017630815505981, "learning_rate": 4.993029708674607e-06, "loss": 0.1801, "step": 983 }, { "epoch": 0.1594296824368114, "grad_norm": 1.3844646215438843, "learning_rate": 4.992997037979304e-06, "loss": 0.1771, "step": 984 }, { "epoch": 0.15959170447180818, "grad_norm": 1.2512445449829102, "learning_rate": 4.992964291004326e-06, "loss": 0.1692, "step": 985 }, { "epoch": 0.15975372650680492, "grad_norm": 1.4162150621414185, "learning_rate": 4.992931467750673e-06, "loss": 0.1696, "step": 986 }, { "epoch": 0.15991574854180168, "grad_norm": 1.2711294889450073, "learning_rate": 4.99289856821935e-06, "loss": 0.1628, "step": 987 }, { "epoch": 0.16007777057679845, "grad_norm": 1.318920373916626, "learning_rate": 4.992865592411362e-06, "loss": 0.1504, "step": 988 }, { "epoch": 0.16023979261179522, "grad_norm": 1.3679457902908325, "learning_rate": 4.992832540327721e-06, "loss": 0.1738, "step": 989 }, { "epoch": 0.16040181464679196, "grad_norm": 1.2336037158966064, "learning_rate": 4.992799411969436e-06, "loss": 0.1659, "step": 990 }, { "epoch": 0.16056383668178872, "grad_norm": 1.1132322549819946, "learning_rate": 4.992766207337523e-06, "loss": 0.1369, "step": 991 }, { "epoch": 0.1607258587167855, "grad_norm": 1.2798656225204468, "learning_rate": 4.992732926432995e-06, "loss": 0.1702, "step": 992 }, { "epoch": 0.16088788075178223, "grad_norm": 1.3788708448410034, "learning_rate": 4.992699569256872e-06, "loss": 0.1978, "step": 993 }, { "epoch": 0.161049902786779, "grad_norm": 1.2766364812850952, "learning_rate": 4.9926661358101745e-06, "loss": 0.1856, "step": 994 }, { "epoch": 0.16121192482177576, "grad_norm": 1.371200680732727, "learning_rate": 4.992632626093926e-06, "loss": 0.1999, "step": 995 }, { "epoch": 0.16137394685677253, "grad_norm": 1.2448960542678833, "learning_rate": 4.9925990401091505e-06, "loss": 0.1574, "step": 996 }, { "epoch": 0.16153596889176927, "grad_norm": 1.2014307975769043, "learning_rate": 4.992565377856876e-06, "loss": 0.1572, "step": 997 }, { "epoch": 0.16169799092676604, "grad_norm": 1.3043875694274902, "learning_rate": 4.992531639338133e-06, "loss": 0.1746, "step": 998 }, { "epoch": 0.1618600129617628, "grad_norm": 1.3030551671981812, "learning_rate": 4.992497824553954e-06, "loss": 0.172, "step": 999 }, { "epoch": 0.16202203499675957, "grad_norm": 1.3272135257720947, "learning_rate": 4.992463933505374e-06, "loss": 0.1755, "step": 1000 }, { "epoch": 0.1621840570317563, "grad_norm": 1.2103444337844849, "learning_rate": 4.992429966193428e-06, "loss": 0.1522, "step": 1001 }, { "epoch": 0.16234607906675308, "grad_norm": 1.353193759918213, "learning_rate": 4.9923959226191574e-06, "loss": 0.1796, "step": 1002 }, { "epoch": 0.16250810110174985, "grad_norm": 1.339188814163208, "learning_rate": 4.992361802783603e-06, "loss": 0.1926, "step": 1003 }, { "epoch": 0.16267012313674659, "grad_norm": 1.269451379776001, "learning_rate": 4.992327606687808e-06, "loss": 0.1597, "step": 1004 }, { "epoch": 0.16283214517174335, "grad_norm": 1.325924277305603, "learning_rate": 4.992293334332821e-06, "loss": 0.1785, "step": 1005 }, { "epoch": 0.16299416720674012, "grad_norm": 1.2406764030456543, "learning_rate": 4.992258985719688e-06, "loss": 0.1658, "step": 1006 }, { "epoch": 0.1631561892417369, "grad_norm": 1.3176758289337158, "learning_rate": 4.992224560849461e-06, "loss": 0.1802, "step": 1007 }, { "epoch": 0.16331821127673363, "grad_norm": 1.3824093341827393, "learning_rate": 4.992190059723194e-06, "loss": 0.1961, "step": 1008 }, { "epoch": 0.1634802333117304, "grad_norm": 1.223380446434021, "learning_rate": 4.9921554823419424e-06, "loss": 0.1567, "step": 1009 }, { "epoch": 0.16364225534672716, "grad_norm": 1.3019189834594727, "learning_rate": 4.992120828706763e-06, "loss": 0.1695, "step": 1010 }, { "epoch": 0.1638042773817239, "grad_norm": 1.3338146209716797, "learning_rate": 4.9920860988187185e-06, "loss": 0.1876, "step": 1011 }, { "epoch": 0.16396629941672067, "grad_norm": 1.167526364326477, "learning_rate": 4.99205129267887e-06, "loss": 0.1609, "step": 1012 }, { "epoch": 0.16412832145171743, "grad_norm": 1.1966924667358398, "learning_rate": 4.9920164102882816e-06, "loss": 0.1576, "step": 1013 }, { "epoch": 0.1642903434867142, "grad_norm": 1.1444998979568481, "learning_rate": 4.991981451648022e-06, "loss": 0.1646, "step": 1014 }, { "epoch": 0.16445236552171094, "grad_norm": 1.1742830276489258, "learning_rate": 4.99194641675916e-06, "loss": 0.1607, "step": 1015 }, { "epoch": 0.1646143875567077, "grad_norm": 1.199351191520691, "learning_rate": 4.9919113056227685e-06, "loss": 0.1506, "step": 1016 }, { "epoch": 0.16477640959170448, "grad_norm": 1.2436610460281372, "learning_rate": 4.991876118239922e-06, "loss": 0.1683, "step": 1017 }, { "epoch": 0.16493843162670124, "grad_norm": 1.322636365890503, "learning_rate": 4.991840854611696e-06, "loss": 0.1757, "step": 1018 }, { "epoch": 0.16510045366169798, "grad_norm": 1.06990647315979, "learning_rate": 4.99180551473917e-06, "loss": 0.1369, "step": 1019 }, { "epoch": 0.16526247569669475, "grad_norm": 1.193261981010437, "learning_rate": 4.991770098623425e-06, "loss": 0.1608, "step": 1020 }, { "epoch": 0.16542449773169152, "grad_norm": 1.2813093662261963, "learning_rate": 4.991734606265544e-06, "loss": 0.1531, "step": 1021 }, { "epoch": 0.16558651976668826, "grad_norm": 1.2579412460327148, "learning_rate": 4.9916990376666156e-06, "loss": 0.1675, "step": 1022 }, { "epoch": 0.16574854180168502, "grad_norm": 1.3288764953613281, "learning_rate": 4.991663392827726e-06, "loss": 0.1501, "step": 1023 }, { "epoch": 0.1659105638366818, "grad_norm": 1.4220918416976929, "learning_rate": 4.991627671749966e-06, "loss": 0.1591, "step": 1024 }, { "epoch": 0.16607258587167856, "grad_norm": 1.4772206544876099, "learning_rate": 4.991591874434429e-06, "loss": 0.1787, "step": 1025 }, { "epoch": 0.1662346079066753, "grad_norm": 1.2587242126464844, "learning_rate": 4.9915560008822105e-06, "loss": 0.164, "step": 1026 }, { "epoch": 0.16639662994167206, "grad_norm": 1.2085176706314087, "learning_rate": 4.991520051094407e-06, "loss": 0.1701, "step": 1027 }, { "epoch": 0.16655865197666883, "grad_norm": 1.2200427055358887, "learning_rate": 4.99148402507212e-06, "loss": 0.1538, "step": 1028 }, { "epoch": 0.1667206740116656, "grad_norm": 1.2087466716766357, "learning_rate": 4.991447922816451e-06, "loss": 0.1672, "step": 1029 }, { "epoch": 0.16688269604666234, "grad_norm": 1.2888163328170776, "learning_rate": 4.991411744328505e-06, "loss": 0.1578, "step": 1030 }, { "epoch": 0.1670447180816591, "grad_norm": 1.478442668914795, "learning_rate": 4.991375489609388e-06, "loss": 0.1984, "step": 1031 }, { "epoch": 0.16720674011665587, "grad_norm": 1.3319884538650513, "learning_rate": 4.991339158660211e-06, "loss": 0.1584, "step": 1032 }, { "epoch": 0.1673687621516526, "grad_norm": 1.1861016750335693, "learning_rate": 4.991302751482084e-06, "loss": 0.1581, "step": 1033 }, { "epoch": 0.16753078418664938, "grad_norm": 1.2733880281448364, "learning_rate": 4.991266268076121e-06, "loss": 0.1757, "step": 1034 }, { "epoch": 0.16769280622164615, "grad_norm": 1.2338616847991943, "learning_rate": 4.99122970844344e-06, "loss": 0.1733, "step": 1035 }, { "epoch": 0.1678548282566429, "grad_norm": 1.305107831954956, "learning_rate": 4.991193072585158e-06, "loss": 0.1722, "step": 1036 }, { "epoch": 0.16801685029163965, "grad_norm": 1.245059847831726, "learning_rate": 4.991156360502397e-06, "loss": 0.1636, "step": 1037 }, { "epoch": 0.16817887232663642, "grad_norm": 1.260838270187378, "learning_rate": 4.99111957219628e-06, "loss": 0.1777, "step": 1038 }, { "epoch": 0.1683408943616332, "grad_norm": 1.13496994972229, "learning_rate": 4.9910827076679325e-06, "loss": 0.1579, "step": 1039 }, { "epoch": 0.16850291639662995, "grad_norm": 1.2134323120117188, "learning_rate": 4.991045766918482e-06, "loss": 0.1666, "step": 1040 }, { "epoch": 0.1686649384316267, "grad_norm": 1.0484683513641357, "learning_rate": 4.9910087499490585e-06, "loss": 0.1457, "step": 1041 }, { "epoch": 0.16882696046662346, "grad_norm": 1.2500617504119873, "learning_rate": 4.990971656760797e-06, "loss": 0.1704, "step": 1042 }, { "epoch": 0.16898898250162023, "grad_norm": 1.296579360961914, "learning_rate": 4.990934487354831e-06, "loss": 0.1739, "step": 1043 }, { "epoch": 0.16915100453661697, "grad_norm": 1.2237029075622559, "learning_rate": 4.990897241732296e-06, "loss": 0.1624, "step": 1044 }, { "epoch": 0.16931302657161373, "grad_norm": 1.361970067024231, "learning_rate": 4.9908599198943346e-06, "loss": 0.193, "step": 1045 }, { "epoch": 0.1694750486066105, "grad_norm": 1.2445952892303467, "learning_rate": 4.990822521842086e-06, "loss": 0.1788, "step": 1046 }, { "epoch": 0.16963707064160727, "grad_norm": 1.3412961959838867, "learning_rate": 4.990785047576697e-06, "loss": 0.1853, "step": 1047 }, { "epoch": 0.169799092676604, "grad_norm": 1.2915765047073364, "learning_rate": 4.990747497099312e-06, "loss": 0.182, "step": 1048 }, { "epoch": 0.16996111471160077, "grad_norm": 1.2325940132141113, "learning_rate": 4.990709870411082e-06, "loss": 0.1495, "step": 1049 }, { "epoch": 0.17012313674659754, "grad_norm": 1.2738209962844849, "learning_rate": 4.990672167513158e-06, "loss": 0.1584, "step": 1050 }, { "epoch": 0.1702851587815943, "grad_norm": 1.3789118528366089, "learning_rate": 4.990634388406692e-06, "loss": 0.1932, "step": 1051 }, { "epoch": 0.17044718081659105, "grad_norm": 1.3370823860168457, "learning_rate": 4.990596533092841e-06, "loss": 0.1788, "step": 1052 }, { "epoch": 0.17060920285158782, "grad_norm": 1.381735920906067, "learning_rate": 4.990558601572764e-06, "loss": 0.189, "step": 1053 }, { "epoch": 0.17077122488658458, "grad_norm": 1.2468198537826538, "learning_rate": 4.9905205938476195e-06, "loss": 0.1635, "step": 1054 }, { "epoch": 0.17093324692158132, "grad_norm": 1.2447727918624878, "learning_rate": 4.990482509918572e-06, "loss": 0.1759, "step": 1055 }, { "epoch": 0.1710952689565781, "grad_norm": 1.266304612159729, "learning_rate": 4.990444349786788e-06, "loss": 0.186, "step": 1056 }, { "epoch": 0.17125729099157486, "grad_norm": 1.2319304943084717, "learning_rate": 4.990406113453433e-06, "loss": 0.1781, "step": 1057 }, { "epoch": 0.17141931302657162, "grad_norm": 1.234866738319397, "learning_rate": 4.990367800919677e-06, "loss": 0.1719, "step": 1058 }, { "epoch": 0.17158133506156836, "grad_norm": 1.0335838794708252, "learning_rate": 4.990329412186693e-06, "loss": 0.1407, "step": 1059 }, { "epoch": 0.17174335709656513, "grad_norm": 1.2708618640899658, "learning_rate": 4.990290947255656e-06, "loss": 0.1635, "step": 1060 }, { "epoch": 0.1719053791315619, "grad_norm": 1.3228340148925781, "learning_rate": 4.990252406127742e-06, "loss": 0.186, "step": 1061 }, { "epoch": 0.17206740116655866, "grad_norm": 1.2613164186477661, "learning_rate": 4.9902137888041304e-06, "loss": 0.1698, "step": 1062 }, { "epoch": 0.1722294232015554, "grad_norm": 1.2189115285873413, "learning_rate": 4.990175095286003e-06, "loss": 0.1622, "step": 1063 }, { "epoch": 0.17239144523655217, "grad_norm": 1.113931655883789, "learning_rate": 4.990136325574545e-06, "loss": 0.1433, "step": 1064 }, { "epoch": 0.17255346727154894, "grad_norm": 1.1805305480957031, "learning_rate": 4.9900974796709405e-06, "loss": 0.1345, "step": 1065 }, { "epoch": 0.17271548930654568, "grad_norm": 1.3531560897827148, "learning_rate": 4.990058557576379e-06, "loss": 0.175, "step": 1066 }, { "epoch": 0.17287751134154244, "grad_norm": 1.403969168663025, "learning_rate": 4.990019559292052e-06, "loss": 0.1674, "step": 1067 }, { "epoch": 0.1730395333765392, "grad_norm": 1.4083645343780518, "learning_rate": 4.989980484819152e-06, "loss": 0.1722, "step": 1068 }, { "epoch": 0.17320155541153598, "grad_norm": 1.2746031284332275, "learning_rate": 4.989941334158874e-06, "loss": 0.1638, "step": 1069 }, { "epoch": 0.17336357744653272, "grad_norm": 1.2782847881317139, "learning_rate": 4.9899021073124175e-06, "loss": 0.1733, "step": 1070 }, { "epoch": 0.17352559948152949, "grad_norm": 1.4043595790863037, "learning_rate": 4.989862804280982e-06, "loss": 0.1756, "step": 1071 }, { "epoch": 0.17368762151652625, "grad_norm": 1.3001036643981934, "learning_rate": 4.989823425065769e-06, "loss": 0.1839, "step": 1072 }, { "epoch": 0.17384964355152302, "grad_norm": 1.2596933841705322, "learning_rate": 4.989783969667986e-06, "loss": 0.175, "step": 1073 }, { "epoch": 0.17401166558651976, "grad_norm": 1.2950873374938965, "learning_rate": 4.989744438088838e-06, "loss": 0.1684, "step": 1074 }, { "epoch": 0.17417368762151653, "grad_norm": 1.1493442058563232, "learning_rate": 4.989704830329535e-06, "loss": 0.1672, "step": 1075 }, { "epoch": 0.1743357096565133, "grad_norm": 1.1817610263824463, "learning_rate": 4.98966514639129e-06, "loss": 0.1617, "step": 1076 }, { "epoch": 0.17449773169151003, "grad_norm": 1.269439697265625, "learning_rate": 4.989625386275315e-06, "loss": 0.1821, "step": 1077 }, { "epoch": 0.1746597537265068, "grad_norm": 1.2774385213851929, "learning_rate": 4.98958554998283e-06, "loss": 0.1772, "step": 1078 }, { "epoch": 0.17482177576150357, "grad_norm": 1.3013243675231934, "learning_rate": 4.98954563751505e-06, "loss": 0.1776, "step": 1079 }, { "epoch": 0.17498379779650033, "grad_norm": 1.1634021997451782, "learning_rate": 4.989505648873198e-06, "loss": 0.164, "step": 1080 }, { "epoch": 0.17514581983149707, "grad_norm": 1.296128273010254, "learning_rate": 4.989465584058499e-06, "loss": 0.1689, "step": 1081 }, { "epoch": 0.17530784186649384, "grad_norm": 1.4235268831253052, "learning_rate": 4.989425443072177e-06, "loss": 0.2038, "step": 1082 }, { "epoch": 0.1754698639014906, "grad_norm": 1.384175181388855, "learning_rate": 4.989385225915461e-06, "loss": 0.1739, "step": 1083 }, { "epoch": 0.17563188593648738, "grad_norm": 1.1942464113235474, "learning_rate": 4.9893449325895804e-06, "loss": 0.1599, "step": 1084 }, { "epoch": 0.17579390797148411, "grad_norm": 1.3841326236724854, "learning_rate": 4.989304563095769e-06, "loss": 0.1975, "step": 1085 }, { "epoch": 0.17595593000648088, "grad_norm": 1.2877038717269897, "learning_rate": 4.989264117435263e-06, "loss": 0.1825, "step": 1086 }, { "epoch": 0.17611795204147765, "grad_norm": 1.3684138059616089, "learning_rate": 4.9892235956092985e-06, "loss": 0.1802, "step": 1087 }, { "epoch": 0.1762799740764744, "grad_norm": 1.2257750034332275, "learning_rate": 4.9891829976191155e-06, "loss": 0.169, "step": 1088 }, { "epoch": 0.17644199611147116, "grad_norm": 1.281955599784851, "learning_rate": 4.989142323465957e-06, "loss": 0.1884, "step": 1089 }, { "epoch": 0.17660401814646792, "grad_norm": 1.2138808965682983, "learning_rate": 4.9891015731510665e-06, "loss": 0.1738, "step": 1090 }, { "epoch": 0.1767660401814647, "grad_norm": 1.185698390007019, "learning_rate": 4.989060746675691e-06, "loss": 0.1527, "step": 1091 }, { "epoch": 0.17692806221646143, "grad_norm": 1.1625752449035645, "learning_rate": 4.989019844041081e-06, "loss": 0.1573, "step": 1092 }, { "epoch": 0.1770900842514582, "grad_norm": 1.1147958040237427, "learning_rate": 4.988978865248486e-06, "loss": 0.1471, "step": 1093 }, { "epoch": 0.17725210628645496, "grad_norm": 1.0958008766174316, "learning_rate": 4.988937810299161e-06, "loss": 0.1473, "step": 1094 }, { "epoch": 0.17741412832145173, "grad_norm": 1.2946974039077759, "learning_rate": 4.988896679194363e-06, "loss": 0.169, "step": 1095 }, { "epoch": 0.17757615035644847, "grad_norm": 1.2533652782440186, "learning_rate": 4.98885547193535e-06, "loss": 0.1622, "step": 1096 }, { "epoch": 0.17773817239144524, "grad_norm": 1.2509846687316895, "learning_rate": 4.988814188523381e-06, "loss": 0.1657, "step": 1097 }, { "epoch": 0.177900194426442, "grad_norm": 1.2330974340438843, "learning_rate": 4.988772828959722e-06, "loss": 0.171, "step": 1098 }, { "epoch": 0.17806221646143874, "grad_norm": 1.1308923959732056, "learning_rate": 4.988731393245636e-06, "loss": 0.1486, "step": 1099 }, { "epoch": 0.1782242384964355, "grad_norm": 1.2318450212478638, "learning_rate": 4.988689881382392e-06, "loss": 0.1681, "step": 1100 }, { "epoch": 0.17838626053143228, "grad_norm": 1.292179822921753, "learning_rate": 4.988648293371262e-06, "loss": 0.1751, "step": 1101 }, { "epoch": 0.17854828256642905, "grad_norm": 1.2476286888122559, "learning_rate": 4.988606629213515e-06, "loss": 0.1792, "step": 1102 }, { "epoch": 0.17871030460142578, "grad_norm": 1.4195069074630737, "learning_rate": 4.988564888910428e-06, "loss": 0.1311, "step": 1103 }, { "epoch": 0.17887232663642255, "grad_norm": 1.1690012216567993, "learning_rate": 4.9885230724632775e-06, "loss": 0.1554, "step": 1104 }, { "epoch": 0.17903434867141932, "grad_norm": 1.2006351947784424, "learning_rate": 4.988481179873342e-06, "loss": 0.1589, "step": 1105 }, { "epoch": 0.17919637070641609, "grad_norm": 1.189113974571228, "learning_rate": 4.9884392111419056e-06, "loss": 0.1631, "step": 1106 }, { "epoch": 0.17935839274141283, "grad_norm": 1.1603453159332275, "learning_rate": 4.9883971662702514e-06, "loss": 0.1682, "step": 1107 }, { "epoch": 0.1795204147764096, "grad_norm": 1.2347701787948608, "learning_rate": 4.988355045259665e-06, "loss": 0.1641, "step": 1108 }, { "epoch": 0.17968243681140636, "grad_norm": 1.1546489000320435, "learning_rate": 4.988312848111436e-06, "loss": 0.1586, "step": 1109 }, { "epoch": 0.1798444588464031, "grad_norm": 1.1792272329330444, "learning_rate": 4.988270574826857e-06, "loss": 0.1579, "step": 1110 }, { "epoch": 0.18000648088139987, "grad_norm": 1.2593525648117065, "learning_rate": 4.988228225407218e-06, "loss": 0.1624, "step": 1111 }, { "epoch": 0.18016850291639663, "grad_norm": 1.1456053256988525, "learning_rate": 4.9881857998538175e-06, "loss": 0.1591, "step": 1112 }, { "epoch": 0.1803305249513934, "grad_norm": 1.169472336769104, "learning_rate": 4.988143298167952e-06, "loss": 0.1534, "step": 1113 }, { "epoch": 0.18049254698639014, "grad_norm": 1.1957638263702393, "learning_rate": 4.988100720350924e-06, "loss": 0.1684, "step": 1114 }, { "epoch": 0.1806545690213869, "grad_norm": 1.2542282342910767, "learning_rate": 4.988058066404035e-06, "loss": 0.1566, "step": 1115 }, { "epoch": 0.18081659105638367, "grad_norm": 1.421115517616272, "learning_rate": 4.988015336328589e-06, "loss": 0.1874, "step": 1116 }, { "epoch": 0.18097861309138044, "grad_norm": 1.3274586200714111, "learning_rate": 4.987972530125895e-06, "loss": 0.19, "step": 1117 }, { "epoch": 0.18114063512637718, "grad_norm": 1.2618639469146729, "learning_rate": 4.987929647797263e-06, "loss": 0.1596, "step": 1118 }, { "epoch": 0.18130265716137395, "grad_norm": 1.172289490699768, "learning_rate": 4.987886689344003e-06, "loss": 0.1407, "step": 1119 }, { "epoch": 0.18146467919637072, "grad_norm": 1.2448173761367798, "learning_rate": 4.987843654767432e-06, "loss": 0.1548, "step": 1120 }, { "epoch": 0.18162670123136745, "grad_norm": 1.3023630380630493, "learning_rate": 4.987800544068866e-06, "loss": 0.1638, "step": 1121 }, { "epoch": 0.18178872326636422, "grad_norm": 1.1977190971374512, "learning_rate": 4.987757357249623e-06, "loss": 0.1713, "step": 1122 }, { "epoch": 0.181950745301361, "grad_norm": 1.3710788488388062, "learning_rate": 4.987714094311026e-06, "loss": 0.1915, "step": 1123 }, { "epoch": 0.18211276733635776, "grad_norm": 1.228145956993103, "learning_rate": 4.987670755254397e-06, "loss": 0.1672, "step": 1124 }, { "epoch": 0.1822747893713545, "grad_norm": 1.3164801597595215, "learning_rate": 4.9876273400810636e-06, "loss": 0.2021, "step": 1125 }, { "epoch": 0.18243681140635126, "grad_norm": 1.2684440612792969, "learning_rate": 4.987583848792353e-06, "loss": 0.1668, "step": 1126 }, { "epoch": 0.18259883344134803, "grad_norm": 1.2884535789489746, "learning_rate": 4.987540281389596e-06, "loss": 0.1717, "step": 1127 }, { "epoch": 0.18276085547634477, "grad_norm": 1.170680284500122, "learning_rate": 4.987496637874127e-06, "loss": 0.1524, "step": 1128 }, { "epoch": 0.18292287751134154, "grad_norm": 1.2426053285598755, "learning_rate": 4.98745291824728e-06, "loss": 0.167, "step": 1129 }, { "epoch": 0.1830848995463383, "grad_norm": 1.2498632669448853, "learning_rate": 4.987409122510394e-06, "loss": 0.1702, "step": 1130 }, { "epoch": 0.18324692158133507, "grad_norm": 1.2569835186004639, "learning_rate": 4.987365250664807e-06, "loss": 0.1658, "step": 1131 }, { "epoch": 0.1834089436163318, "grad_norm": 1.327354073524475, "learning_rate": 4.9873213027118635e-06, "loss": 0.1838, "step": 1132 }, { "epoch": 0.18357096565132858, "grad_norm": 1.0752174854278564, "learning_rate": 4.987277278652907e-06, "loss": 0.1393, "step": 1133 }, { "epoch": 0.18373298768632534, "grad_norm": 1.2634954452514648, "learning_rate": 4.987233178489285e-06, "loss": 0.1603, "step": 1134 }, { "epoch": 0.1838950097213221, "grad_norm": 1.1911259889602661, "learning_rate": 4.987189002222347e-06, "loss": 0.1575, "step": 1135 }, { "epoch": 0.18405703175631885, "grad_norm": 1.2841031551361084, "learning_rate": 4.987144749853444e-06, "loss": 0.1747, "step": 1136 }, { "epoch": 0.18421905379131562, "grad_norm": 1.2230887413024902, "learning_rate": 4.987100421383931e-06, "loss": 0.1785, "step": 1137 }, { "epoch": 0.18438107582631239, "grad_norm": 1.2518471479415894, "learning_rate": 4.987056016815163e-06, "loss": 0.1764, "step": 1138 }, { "epoch": 0.18454309786130912, "grad_norm": 1.0959880352020264, "learning_rate": 4.9870115361485e-06, "loss": 0.1398, "step": 1139 }, { "epoch": 0.1847051198963059, "grad_norm": 1.3243485689163208, "learning_rate": 4.986966979385302e-06, "loss": 0.1744, "step": 1140 }, { "epoch": 0.18486714193130266, "grad_norm": 1.2652283906936646, "learning_rate": 4.986922346526933e-06, "loss": 0.1814, "step": 1141 }, { "epoch": 0.18502916396629943, "grad_norm": 1.2872614860534668, "learning_rate": 4.986877637574758e-06, "loss": 0.1784, "step": 1142 }, { "epoch": 0.18519118600129617, "grad_norm": 1.1612954139709473, "learning_rate": 4.9868328525301465e-06, "loss": 0.1584, "step": 1143 }, { "epoch": 0.18535320803629293, "grad_norm": 1.3280037641525269, "learning_rate": 4.986787991394467e-06, "loss": 0.1737, "step": 1144 }, { "epoch": 0.1855152300712897, "grad_norm": 1.0931044816970825, "learning_rate": 4.986743054169093e-06, "loss": 0.1389, "step": 1145 }, { "epoch": 0.18567725210628647, "grad_norm": 1.2529737949371338, "learning_rate": 4.9866980408554e-06, "loss": 0.1537, "step": 1146 }, { "epoch": 0.1858392741412832, "grad_norm": 1.2056132555007935, "learning_rate": 4.986652951454764e-06, "loss": 0.1531, "step": 1147 }, { "epoch": 0.18600129617627997, "grad_norm": 1.2318496704101562, "learning_rate": 4.986607785968565e-06, "loss": 0.1839, "step": 1148 }, { "epoch": 0.18616331821127674, "grad_norm": 1.3150043487548828, "learning_rate": 4.9865625443981854e-06, "loss": 0.1764, "step": 1149 }, { "epoch": 0.18632534024627348, "grad_norm": 1.3862760066986084, "learning_rate": 4.986517226745009e-06, "loss": 0.1665, "step": 1150 }, { "epoch": 0.18648736228127025, "grad_norm": 1.3067452907562256, "learning_rate": 4.986471833010423e-06, "loss": 0.1704, "step": 1151 }, { "epoch": 0.18664938431626701, "grad_norm": 1.1937528848648071, "learning_rate": 4.9864263631958165e-06, "loss": 0.1661, "step": 1152 }, { "epoch": 0.18681140635126378, "grad_norm": 1.2431349754333496, "learning_rate": 4.986380817302579e-06, "loss": 0.1721, "step": 1153 }, { "epoch": 0.18697342838626052, "grad_norm": 1.0870198011398315, "learning_rate": 4.986335195332107e-06, "loss": 0.1445, "step": 1154 }, { "epoch": 0.1871354504212573, "grad_norm": 1.366522192955017, "learning_rate": 4.986289497285794e-06, "loss": 0.1665, "step": 1155 }, { "epoch": 0.18729747245625405, "grad_norm": 1.1426249742507935, "learning_rate": 4.986243723165039e-06, "loss": 0.1526, "step": 1156 }, { "epoch": 0.18745949449125082, "grad_norm": 1.2970033884048462, "learning_rate": 4.986197872971244e-06, "loss": 0.18, "step": 1157 }, { "epoch": 0.18762151652624756, "grad_norm": 1.2247062921524048, "learning_rate": 4.9861519467058094e-06, "loss": 0.1613, "step": 1158 }, { "epoch": 0.18778353856124433, "grad_norm": 1.169364333152771, "learning_rate": 4.986105944370142e-06, "loss": 0.1621, "step": 1159 }, { "epoch": 0.1879455605962411, "grad_norm": 1.2808866500854492, "learning_rate": 4.986059865965649e-06, "loss": 0.1978, "step": 1160 }, { "epoch": 0.18810758263123784, "grad_norm": 1.209956169128418, "learning_rate": 4.986013711493739e-06, "loss": 0.1637, "step": 1161 }, { "epoch": 0.1882696046662346, "grad_norm": 1.3208413124084473, "learning_rate": 4.985967480955827e-06, "loss": 0.181, "step": 1162 }, { "epoch": 0.18843162670123137, "grad_norm": 1.1594798564910889, "learning_rate": 4.985921174353325e-06, "loss": 0.1576, "step": 1163 }, { "epoch": 0.18859364873622814, "grad_norm": 1.7212146520614624, "learning_rate": 4.9858747916876515e-06, "loss": 0.1835, "step": 1164 }, { "epoch": 0.18875567077122488, "grad_norm": 1.0957105159759521, "learning_rate": 4.985828332960225e-06, "loss": 0.1462, "step": 1165 }, { "epoch": 0.18891769280622164, "grad_norm": 1.3084644079208374, "learning_rate": 4.985781798172467e-06, "loss": 0.1791, "step": 1166 }, { "epoch": 0.1890797148412184, "grad_norm": 1.1999437808990479, "learning_rate": 4.985735187325802e-06, "loss": 0.1615, "step": 1167 }, { "epoch": 0.18924173687621518, "grad_norm": 1.319326400756836, "learning_rate": 4.9856885004216545e-06, "loss": 0.1777, "step": 1168 }, { "epoch": 0.18940375891121192, "grad_norm": 1.5149074792861938, "learning_rate": 4.985641737461455e-06, "loss": 0.1957, "step": 1169 }, { "epoch": 0.18956578094620868, "grad_norm": 1.3002344369888306, "learning_rate": 4.985594898446633e-06, "loss": 0.1655, "step": 1170 }, { "epoch": 0.18972780298120545, "grad_norm": 1.3267287015914917, "learning_rate": 4.985547983378622e-06, "loss": 0.1698, "step": 1171 }, { "epoch": 0.1898898250162022, "grad_norm": 1.2608460187911987, "learning_rate": 4.9855009922588585e-06, "loss": 0.1638, "step": 1172 }, { "epoch": 0.19005184705119896, "grad_norm": 1.1155411005020142, "learning_rate": 4.985453925088779e-06, "loss": 0.1507, "step": 1173 }, { "epoch": 0.19021386908619572, "grad_norm": 1.2303216457366943, "learning_rate": 4.985406781869824e-06, "loss": 0.1799, "step": 1174 }, { "epoch": 0.1903758911211925, "grad_norm": 1.1471197605133057, "learning_rate": 4.985359562603436e-06, "loss": 0.1512, "step": 1175 }, { "epoch": 0.19053791315618923, "grad_norm": 1.1257221698760986, "learning_rate": 4.98531226729106e-06, "loss": 0.1563, "step": 1176 }, { "epoch": 0.190699935191186, "grad_norm": 1.333609700202942, "learning_rate": 4.985264895934142e-06, "loss": 0.2002, "step": 1177 }, { "epoch": 0.19086195722618277, "grad_norm": 1.2045459747314453, "learning_rate": 4.985217448534134e-06, "loss": 0.1613, "step": 1178 }, { "epoch": 0.19102397926117953, "grad_norm": 1.1349177360534668, "learning_rate": 4.985169925092485e-06, "loss": 0.1533, "step": 1179 }, { "epoch": 0.19118600129617627, "grad_norm": 1.0895230770111084, "learning_rate": 4.985122325610651e-06, "loss": 0.1478, "step": 1180 }, { "epoch": 0.19134802333117304, "grad_norm": 1.375070571899414, "learning_rate": 4.985074650090087e-06, "loss": 0.1824, "step": 1181 }, { "epoch": 0.1915100453661698, "grad_norm": 1.3039453029632568, "learning_rate": 4.985026898532253e-06, "loss": 0.1727, "step": 1182 }, { "epoch": 0.19167206740116655, "grad_norm": 1.2111849784851074, "learning_rate": 4.984979070938609e-06, "loss": 0.1557, "step": 1183 }, { "epoch": 0.1918340894361633, "grad_norm": 1.2889275550842285, "learning_rate": 4.98493116731062e-06, "loss": 0.1659, "step": 1184 }, { "epoch": 0.19199611147116008, "grad_norm": 1.2837518453598022, "learning_rate": 4.98488318764975e-06, "loss": 0.1584, "step": 1185 }, { "epoch": 0.19215813350615685, "grad_norm": 1.1638296842575073, "learning_rate": 4.984835131957468e-06, "loss": 0.1552, "step": 1186 }, { "epoch": 0.1923201555411536, "grad_norm": 1.2172795534133911, "learning_rate": 4.9847870002352435e-06, "loss": 0.1586, "step": 1187 }, { "epoch": 0.19248217757615035, "grad_norm": 1.3987014293670654, "learning_rate": 4.98473879248455e-06, "loss": 0.1872, "step": 1188 }, { "epoch": 0.19264419961114712, "grad_norm": 1.2352248430252075, "learning_rate": 4.984690508706863e-06, "loss": 0.1545, "step": 1189 }, { "epoch": 0.1928062216461439, "grad_norm": 1.2587782144546509, "learning_rate": 4.984642148903659e-06, "loss": 0.1632, "step": 1190 }, { "epoch": 0.19296824368114063, "grad_norm": 1.3047451972961426, "learning_rate": 4.9845937130764185e-06, "loss": 0.1715, "step": 1191 }, { "epoch": 0.1931302657161374, "grad_norm": 1.393811583518982, "learning_rate": 4.984545201226623e-06, "loss": 0.1814, "step": 1192 }, { "epoch": 0.19329228775113416, "grad_norm": 1.2711546421051025, "learning_rate": 4.984496613355756e-06, "loss": 0.1545, "step": 1193 }, { "epoch": 0.1934543097861309, "grad_norm": 1.268141269683838, "learning_rate": 4.984447949465305e-06, "loss": 0.1825, "step": 1194 }, { "epoch": 0.19361633182112767, "grad_norm": 1.3329243659973145, "learning_rate": 4.984399209556759e-06, "loss": 0.2102, "step": 1195 }, { "epoch": 0.19377835385612444, "grad_norm": 1.380758285522461, "learning_rate": 4.98435039363161e-06, "loss": 0.1462, "step": 1196 }, { "epoch": 0.1939403758911212, "grad_norm": 1.252043604850769, "learning_rate": 4.98430150169135e-06, "loss": 0.1578, "step": 1197 }, { "epoch": 0.19410239792611794, "grad_norm": 1.2641617059707642, "learning_rate": 4.984252533737477e-06, "loss": 0.177, "step": 1198 }, { "epoch": 0.1942644199611147, "grad_norm": 1.1985224485397339, "learning_rate": 4.984203489771488e-06, "loss": 0.1531, "step": 1199 }, { "epoch": 0.19442644199611148, "grad_norm": 1.3280110359191895, "learning_rate": 4.984154369794883e-06, "loss": 0.181, "step": 1200 }, { "epoch": 0.19458846403110824, "grad_norm": 1.196886658668518, "learning_rate": 4.9841051738091675e-06, "loss": 0.1658, "step": 1201 }, { "epoch": 0.19475048606610498, "grad_norm": 1.1714789867401123, "learning_rate": 4.984055901815844e-06, "loss": 0.1547, "step": 1202 }, { "epoch": 0.19491250810110175, "grad_norm": 1.390263557434082, "learning_rate": 4.984006553816421e-06, "loss": 0.1754, "step": 1203 }, { "epoch": 0.19507453013609852, "grad_norm": 1.1799697875976562, "learning_rate": 4.983957129812409e-06, "loss": 0.147, "step": 1204 }, { "epoch": 0.19523655217109526, "grad_norm": 1.1202303171157837, "learning_rate": 4.983907629805319e-06, "loss": 0.1609, "step": 1205 }, { "epoch": 0.19539857420609202, "grad_norm": 1.2552509307861328, "learning_rate": 4.9838580537966676e-06, "loss": 0.1653, "step": 1206 }, { "epoch": 0.1955605962410888, "grad_norm": 1.516097068786621, "learning_rate": 4.98380840178797e-06, "loss": 0.1858, "step": 1207 }, { "epoch": 0.19572261827608556, "grad_norm": 1.360250473022461, "learning_rate": 4.983758673780747e-06, "loss": 0.1848, "step": 1208 }, { "epoch": 0.1958846403110823, "grad_norm": 1.223142385482788, "learning_rate": 4.983708869776518e-06, "loss": 0.1666, "step": 1209 }, { "epoch": 0.19604666234607906, "grad_norm": 1.1533637046813965, "learning_rate": 4.9836589897768084e-06, "loss": 0.1673, "step": 1210 }, { "epoch": 0.19620868438107583, "grad_norm": 1.2063921689987183, "learning_rate": 4.983609033783144e-06, "loss": 0.165, "step": 1211 }, { "epoch": 0.1963707064160726, "grad_norm": 1.1458781957626343, "learning_rate": 4.983559001797054e-06, "loss": 0.1422, "step": 1212 }, { "epoch": 0.19653272845106934, "grad_norm": 1.3407319784164429, "learning_rate": 4.9835088938200674e-06, "loss": 0.2003, "step": 1213 }, { "epoch": 0.1966947504860661, "grad_norm": 1.2016749382019043, "learning_rate": 4.983458709853719e-06, "loss": 0.1774, "step": 1214 }, { "epoch": 0.19685677252106287, "grad_norm": 1.1205006837844849, "learning_rate": 4.983408449899545e-06, "loss": 0.1567, "step": 1215 }, { "epoch": 0.1970187945560596, "grad_norm": 1.1485540866851807, "learning_rate": 4.9833581139590814e-06, "loss": 0.1703, "step": 1216 }, { "epoch": 0.19718081659105638, "grad_norm": 1.086588978767395, "learning_rate": 4.983307702033869e-06, "loss": 0.1514, "step": 1217 }, { "epoch": 0.19734283862605315, "grad_norm": 1.2290334701538086, "learning_rate": 4.983257214125451e-06, "loss": 0.1737, "step": 1218 }, { "epoch": 0.1975048606610499, "grad_norm": 1.3126085996627808, "learning_rate": 4.98320665023537e-06, "loss": 0.1807, "step": 1219 }, { "epoch": 0.19766688269604665, "grad_norm": 1.2392646074295044, "learning_rate": 4.9831560103651765e-06, "loss": 0.184, "step": 1220 }, { "epoch": 0.19782890473104342, "grad_norm": 1.257860779762268, "learning_rate": 4.983105294516418e-06, "loss": 0.1823, "step": 1221 }, { "epoch": 0.1979909267660402, "grad_norm": 1.3138630390167236, "learning_rate": 4.983054502690646e-06, "loss": 0.1887, "step": 1222 }, { "epoch": 0.19815294880103695, "grad_norm": 1.1415594816207886, "learning_rate": 4.983003634889415e-06, "loss": 0.1509, "step": 1223 }, { "epoch": 0.1983149708360337, "grad_norm": 1.1062023639678955, "learning_rate": 4.9829526911142825e-06, "loss": 0.1551, "step": 1224 }, { "epoch": 0.19847699287103046, "grad_norm": 1.069726824760437, "learning_rate": 4.982901671366805e-06, "loss": 0.1436, "step": 1225 }, { "epoch": 0.19863901490602723, "grad_norm": 1.1246789693832397, "learning_rate": 4.982850575648545e-06, "loss": 0.1428, "step": 1226 }, { "epoch": 0.19880103694102397, "grad_norm": 1.2873611450195312, "learning_rate": 4.982799403961067e-06, "loss": 0.1658, "step": 1227 }, { "epoch": 0.19896305897602073, "grad_norm": 1.4319779872894287, "learning_rate": 4.982748156305934e-06, "loss": 0.1705, "step": 1228 }, { "epoch": 0.1991250810110175, "grad_norm": 1.3176896572113037, "learning_rate": 4.982696832684716e-06, "loss": 0.1681, "step": 1229 }, { "epoch": 0.19928710304601427, "grad_norm": 1.3779314756393433, "learning_rate": 4.982645433098984e-06, "loss": 0.1747, "step": 1230 }, { "epoch": 0.199449125081011, "grad_norm": 1.322016954421997, "learning_rate": 4.982593957550308e-06, "loss": 0.1696, "step": 1231 }, { "epoch": 0.19961114711600778, "grad_norm": 1.2837717533111572, "learning_rate": 4.982542406040266e-06, "loss": 0.1536, "step": 1232 }, { "epoch": 0.19977316915100454, "grad_norm": 1.2116730213165283, "learning_rate": 4.982490778570434e-06, "loss": 0.174, "step": 1233 }, { "epoch": 0.1999351911860013, "grad_norm": 1.3564155101776123, "learning_rate": 4.98243907514239e-06, "loss": 0.1875, "step": 1234 }, { "epoch": 0.20009721322099805, "grad_norm": 1.15635085105896, "learning_rate": 4.982387295757719e-06, "loss": 0.1603, "step": 1235 }, { "epoch": 0.20025923525599482, "grad_norm": 1.2285187244415283, "learning_rate": 4.982335440418004e-06, "loss": 0.1765, "step": 1236 }, { "epoch": 0.20042125729099158, "grad_norm": 1.1799185276031494, "learning_rate": 4.982283509124831e-06, "loss": 0.1598, "step": 1237 }, { "epoch": 0.20058327932598832, "grad_norm": 1.2755019664764404, "learning_rate": 4.98223150187979e-06, "loss": 0.1781, "step": 1238 }, { "epoch": 0.2007453013609851, "grad_norm": 1.2312284708023071, "learning_rate": 4.982179418684473e-06, "loss": 0.1673, "step": 1239 }, { "epoch": 0.20090732339598186, "grad_norm": 1.0840716361999512, "learning_rate": 4.982127259540471e-06, "loss": 0.1442, "step": 1240 }, { "epoch": 0.20106934543097862, "grad_norm": 1.2539925575256348, "learning_rate": 4.9820750244493825e-06, "loss": 0.157, "step": 1241 }, { "epoch": 0.20123136746597536, "grad_norm": 1.1770260334014893, "learning_rate": 4.9820227134128045e-06, "loss": 0.1677, "step": 1242 }, { "epoch": 0.20139338950097213, "grad_norm": 1.3164162635803223, "learning_rate": 4.9819703264323375e-06, "loss": 0.163, "step": 1243 }, { "epoch": 0.2015554115359689, "grad_norm": 1.240263819694519, "learning_rate": 4.981917863509585e-06, "loss": 0.1804, "step": 1244 }, { "epoch": 0.20171743357096567, "grad_norm": 1.2952622175216675, "learning_rate": 4.981865324646152e-06, "loss": 0.1781, "step": 1245 }, { "epoch": 0.2018794556059624, "grad_norm": 1.1874738931655884, "learning_rate": 4.981812709843646e-06, "loss": 0.1665, "step": 1246 }, { "epoch": 0.20204147764095917, "grad_norm": 1.251424789428711, "learning_rate": 4.981760019103677e-06, "loss": 0.1743, "step": 1247 }, { "epoch": 0.20220349967595594, "grad_norm": 1.243334174156189, "learning_rate": 4.981707252427857e-06, "loss": 0.166, "step": 1248 }, { "epoch": 0.20236552171095268, "grad_norm": 1.208656668663025, "learning_rate": 4.981654409817801e-06, "loss": 0.1696, "step": 1249 }, { "epoch": 0.20252754374594945, "grad_norm": 1.227049708366394, "learning_rate": 4.981601491275125e-06, "loss": 0.1641, "step": 1250 }, { "epoch": 0.2026895657809462, "grad_norm": 1.4524271488189697, "learning_rate": 4.981548496801449e-06, "loss": 0.1648, "step": 1251 }, { "epoch": 0.20285158781594298, "grad_norm": 1.2255809307098389, "learning_rate": 4.981495426398395e-06, "loss": 0.1596, "step": 1252 }, { "epoch": 0.20301360985093972, "grad_norm": 1.2606754302978516, "learning_rate": 4.981442280067585e-06, "loss": 0.1745, "step": 1253 }, { "epoch": 0.2031756318859365, "grad_norm": 1.145128846168518, "learning_rate": 4.981389057810647e-06, "loss": 0.1593, "step": 1254 }, { "epoch": 0.20333765392093325, "grad_norm": 1.2478601932525635, "learning_rate": 4.981335759629208e-06, "loss": 0.1837, "step": 1255 }, { "epoch": 0.20349967595593, "grad_norm": 1.1817132234573364, "learning_rate": 4.9812823855248996e-06, "loss": 0.152, "step": 1256 }, { "epoch": 0.20366169799092676, "grad_norm": 1.2523393630981445, "learning_rate": 4.981228935499355e-06, "loss": 0.1685, "step": 1257 }, { "epoch": 0.20382372002592353, "grad_norm": 1.1938599348068237, "learning_rate": 4.98117540955421e-06, "loss": 0.1645, "step": 1258 }, { "epoch": 0.2039857420609203, "grad_norm": 1.2206389904022217, "learning_rate": 4.981121807691101e-06, "loss": 0.1622, "step": 1259 }, { "epoch": 0.20414776409591703, "grad_norm": 1.2022802829742432, "learning_rate": 4.981068129911669e-06, "loss": 0.1563, "step": 1260 }, { "epoch": 0.2043097861309138, "grad_norm": 1.244292974472046, "learning_rate": 4.981014376217556e-06, "loss": 0.1593, "step": 1261 }, { "epoch": 0.20447180816591057, "grad_norm": 1.2590563297271729, "learning_rate": 4.980960546610408e-06, "loss": 0.1762, "step": 1262 }, { "epoch": 0.20463383020090734, "grad_norm": 1.3502979278564453, "learning_rate": 4.98090664109187e-06, "loss": 0.1802, "step": 1263 }, { "epoch": 0.20479585223590407, "grad_norm": 1.1320891380310059, "learning_rate": 4.980852659663593e-06, "loss": 0.1658, "step": 1264 }, { "epoch": 0.20495787427090084, "grad_norm": 1.092449426651001, "learning_rate": 4.980798602327228e-06, "loss": 0.1452, "step": 1265 }, { "epoch": 0.2051198963058976, "grad_norm": 1.2534480094909668, "learning_rate": 4.9807444690844296e-06, "loss": 0.1628, "step": 1266 }, { "epoch": 0.20528191834089435, "grad_norm": 1.4840904474258423, "learning_rate": 4.980690259936853e-06, "loss": 0.1727, "step": 1267 }, { "epoch": 0.20544394037589112, "grad_norm": 1.1676522493362427, "learning_rate": 4.980635974886158e-06, "loss": 0.1553, "step": 1268 }, { "epoch": 0.20560596241088788, "grad_norm": 1.1992971897125244, "learning_rate": 4.980581613934005e-06, "loss": 0.1611, "step": 1269 }, { "epoch": 0.20576798444588465, "grad_norm": 1.3434457778930664, "learning_rate": 4.980527177082058e-06, "loss": 0.2013, "step": 1270 }, { "epoch": 0.2059300064808814, "grad_norm": 1.306654691696167, "learning_rate": 4.980472664331982e-06, "loss": 0.196, "step": 1271 }, { "epoch": 0.20609202851587816, "grad_norm": 1.187699317932129, "learning_rate": 4.980418075685445e-06, "loss": 0.1684, "step": 1272 }, { "epoch": 0.20625405055087492, "grad_norm": 1.3280266523361206, "learning_rate": 4.980363411144117e-06, "loss": 0.1887, "step": 1273 }, { "epoch": 0.2064160725858717, "grad_norm": 1.3123143911361694, "learning_rate": 4.980308670709671e-06, "loss": 0.1653, "step": 1274 }, { "epoch": 0.20657809462086843, "grad_norm": 1.1524012088775635, "learning_rate": 4.980253854383782e-06, "loss": 0.1647, "step": 1275 }, { "epoch": 0.2067401166558652, "grad_norm": 1.745810866355896, "learning_rate": 4.980198962168128e-06, "loss": 0.1887, "step": 1276 }, { "epoch": 0.20690213869086196, "grad_norm": 1.2560205459594727, "learning_rate": 4.980143994064387e-06, "loss": 0.1829, "step": 1277 }, { "epoch": 0.2070641607258587, "grad_norm": 1.1559257507324219, "learning_rate": 4.9800889500742415e-06, "loss": 0.1694, "step": 1278 }, { "epoch": 0.20722618276085547, "grad_norm": 1.3864670991897583, "learning_rate": 4.980033830199376e-06, "loss": 0.1933, "step": 1279 }, { "epoch": 0.20738820479585224, "grad_norm": 1.1323646306991577, "learning_rate": 4.979978634441477e-06, "loss": 0.1587, "step": 1280 }, { "epoch": 0.207550226830849, "grad_norm": 1.1979477405548096, "learning_rate": 4.979923362802233e-06, "loss": 0.1691, "step": 1281 }, { "epoch": 0.20771224886584574, "grad_norm": 1.0671766996383667, "learning_rate": 4.979868015283336e-06, "loss": 0.1499, "step": 1282 }, { "epoch": 0.2078742709008425, "grad_norm": 1.2722365856170654, "learning_rate": 4.979812591886478e-06, "loss": 0.1702, "step": 1283 }, { "epoch": 0.20803629293583928, "grad_norm": 1.3761050701141357, "learning_rate": 4.979757092613357e-06, "loss": 0.1916, "step": 1284 }, { "epoch": 0.20819831497083605, "grad_norm": 1.2674570083618164, "learning_rate": 4.9797015174656685e-06, "loss": 0.1792, "step": 1285 }, { "epoch": 0.20836033700583279, "grad_norm": 1.072959065437317, "learning_rate": 4.979645866445114e-06, "loss": 0.1454, "step": 1286 }, { "epoch": 0.20852235904082955, "grad_norm": 1.4338332414627075, "learning_rate": 4.979590139553398e-06, "loss": 0.1976, "step": 1287 }, { "epoch": 0.20868438107582632, "grad_norm": 1.3137916326522827, "learning_rate": 4.9795343367922235e-06, "loss": 0.1822, "step": 1288 }, { "epoch": 0.20884640311082306, "grad_norm": 1.1669158935546875, "learning_rate": 4.9794784581632986e-06, "loss": 0.1585, "step": 1289 }, { "epoch": 0.20900842514581983, "grad_norm": 1.2235841751098633, "learning_rate": 4.979422503668334e-06, "loss": 0.1558, "step": 1290 }, { "epoch": 0.2091704471808166, "grad_norm": 1.178096890449524, "learning_rate": 4.97936647330904e-06, "loss": 0.1593, "step": 1291 }, { "epoch": 0.20933246921581336, "grad_norm": 1.1539371013641357, "learning_rate": 4.979310367087132e-06, "loss": 0.1526, "step": 1292 }, { "epoch": 0.2094944912508101, "grad_norm": 1.1756463050842285, "learning_rate": 4.979254185004327e-06, "loss": 0.1486, "step": 1293 }, { "epoch": 0.20965651328580687, "grad_norm": 1.4390196800231934, "learning_rate": 4.979197927062343e-06, "loss": 0.1937, "step": 1294 }, { "epoch": 0.20981853532080363, "grad_norm": 1.195878505706787, "learning_rate": 4.979141593262902e-06, "loss": 0.1656, "step": 1295 }, { "epoch": 0.2099805573558004, "grad_norm": 1.1376088857650757, "learning_rate": 4.979085183607728e-06, "loss": 0.1668, "step": 1296 }, { "epoch": 0.21014257939079714, "grad_norm": 1.0837515592575073, "learning_rate": 4.979028698098546e-06, "loss": 0.1448, "step": 1297 }, { "epoch": 0.2103046014257939, "grad_norm": 1.2839235067367554, "learning_rate": 4.978972136737086e-06, "loss": 0.1886, "step": 1298 }, { "epoch": 0.21046662346079067, "grad_norm": 1.239888072013855, "learning_rate": 4.978915499525077e-06, "loss": 0.175, "step": 1299 }, { "epoch": 0.21062864549578741, "grad_norm": 1.2794440984725952, "learning_rate": 4.978858786464252e-06, "loss": 0.1718, "step": 1300 }, { "epoch": 0.21079066753078418, "grad_norm": 1.2917277812957764, "learning_rate": 4.978801997556348e-06, "loss": 0.1761, "step": 1301 }, { "epoch": 0.21095268956578095, "grad_norm": 1.2395626306533813, "learning_rate": 4.978745132803101e-06, "loss": 0.1647, "step": 1302 }, { "epoch": 0.21111471160077772, "grad_norm": 1.2468279600143433, "learning_rate": 4.9786881922062515e-06, "loss": 0.1574, "step": 1303 }, { "epoch": 0.21127673363577446, "grad_norm": 1.3926787376403809, "learning_rate": 4.9786311757675425e-06, "loss": 0.2085, "step": 1304 }, { "epoch": 0.21143875567077122, "grad_norm": 1.1423304080963135, "learning_rate": 4.978574083488716e-06, "loss": 0.1471, "step": 1305 }, { "epoch": 0.211600777705768, "grad_norm": 1.2583112716674805, "learning_rate": 4.978516915371522e-06, "loss": 0.1673, "step": 1306 }, { "epoch": 0.21176279974076476, "grad_norm": 1.2514091730117798, "learning_rate": 4.978459671417707e-06, "loss": 0.1794, "step": 1307 }, { "epoch": 0.2119248217757615, "grad_norm": 1.5359858274459839, "learning_rate": 4.978402351629024e-06, "loss": 0.1582, "step": 1308 }, { "epoch": 0.21208684381075826, "grad_norm": 1.161242127418518, "learning_rate": 4.978344956007227e-06, "loss": 0.1542, "step": 1309 }, { "epoch": 0.21224886584575503, "grad_norm": 1.2749699354171753, "learning_rate": 4.9782874845540715e-06, "loss": 0.1607, "step": 1310 }, { "epoch": 0.21241088788075177, "grad_norm": 1.3189657926559448, "learning_rate": 4.978229937271317e-06, "loss": 0.1844, "step": 1311 }, { "epoch": 0.21257290991574854, "grad_norm": 1.31024169921875, "learning_rate": 4.978172314160724e-06, "loss": 0.1992, "step": 1312 }, { "epoch": 0.2127349319507453, "grad_norm": 1.2073999643325806, "learning_rate": 4.978114615224055e-06, "loss": 0.1685, "step": 1313 }, { "epoch": 0.21289695398574207, "grad_norm": 1.292680263519287, "learning_rate": 4.9780568404630746e-06, "loss": 0.1758, "step": 1314 }, { "epoch": 0.2130589760207388, "grad_norm": 1.2444231510162354, "learning_rate": 4.977998989879552e-06, "loss": 0.1771, "step": 1315 }, { "epoch": 0.21322099805573558, "grad_norm": 1.1644980907440186, "learning_rate": 4.977941063475258e-06, "loss": 0.1637, "step": 1316 }, { "epoch": 0.21338302009073234, "grad_norm": 1.2748589515686035, "learning_rate": 4.977883061251962e-06, "loss": 0.1813, "step": 1317 }, { "epoch": 0.2135450421257291, "grad_norm": 1.2317109107971191, "learning_rate": 4.977824983211443e-06, "loss": 0.1654, "step": 1318 }, { "epoch": 0.21370706416072585, "grad_norm": 1.2727543115615845, "learning_rate": 4.977766829355474e-06, "loss": 0.1848, "step": 1319 }, { "epoch": 0.21386908619572262, "grad_norm": 1.2605267763137817, "learning_rate": 4.977708599685837e-06, "loss": 0.1881, "step": 1320 }, { "epoch": 0.21403110823071939, "grad_norm": 1.2069200277328491, "learning_rate": 4.977650294204313e-06, "loss": 0.1601, "step": 1321 }, { "epoch": 0.21419313026571613, "grad_norm": 1.2258086204528809, "learning_rate": 4.977591912912685e-06, "loss": 0.1709, "step": 1322 }, { "epoch": 0.2143551523007129, "grad_norm": 1.255131721496582, "learning_rate": 4.977533455812741e-06, "loss": 0.173, "step": 1323 }, { "epoch": 0.21451717433570966, "grad_norm": 1.2011017799377441, "learning_rate": 4.977474922906268e-06, "loss": 0.1784, "step": 1324 }, { "epoch": 0.21467919637070643, "grad_norm": 1.1094778776168823, "learning_rate": 4.977416314195058e-06, "loss": 0.1653, "step": 1325 }, { "epoch": 0.21484121840570317, "grad_norm": 1.3300212621688843, "learning_rate": 4.977357629680903e-06, "loss": 0.178, "step": 1326 }, { "epoch": 0.21500324044069993, "grad_norm": 1.190017819404602, "learning_rate": 4.977298869365601e-06, "loss": 0.1671, "step": 1327 }, { "epoch": 0.2151652624756967, "grad_norm": 1.2279374599456787, "learning_rate": 4.977240033250948e-06, "loss": 0.1738, "step": 1328 }, { "epoch": 0.21532728451069347, "grad_norm": 1.1839739084243774, "learning_rate": 4.977181121338745e-06, "loss": 0.1663, "step": 1329 }, { "epoch": 0.2154893065456902, "grad_norm": 1.1655634641647339, "learning_rate": 4.977122133630795e-06, "loss": 0.148, "step": 1330 }, { "epoch": 0.21565132858068697, "grad_norm": 1.151414394378662, "learning_rate": 4.977063070128902e-06, "loss": 0.1716, "step": 1331 }, { "epoch": 0.21581335061568374, "grad_norm": 1.125185489654541, "learning_rate": 4.9770039308348725e-06, "loss": 0.1489, "step": 1332 }, { "epoch": 0.21597537265068048, "grad_norm": 1.1280821561813354, "learning_rate": 4.976944715750517e-06, "loss": 0.1624, "step": 1333 }, { "epoch": 0.21613739468567725, "grad_norm": 1.2168023586273193, "learning_rate": 4.9768854248776475e-06, "loss": 0.168, "step": 1334 }, { "epoch": 0.21629941672067401, "grad_norm": 1.1852880716323853, "learning_rate": 4.976826058218079e-06, "loss": 0.1585, "step": 1335 }, { "epoch": 0.21646143875567078, "grad_norm": 1.210832953453064, "learning_rate": 4.976766615773626e-06, "loss": 0.1665, "step": 1336 }, { "epoch": 0.21662346079066752, "grad_norm": 1.44858980178833, "learning_rate": 4.9767070975461075e-06, "loss": 0.1918, "step": 1337 }, { "epoch": 0.2167854828256643, "grad_norm": 1.1260679960250854, "learning_rate": 4.976647503537347e-06, "loss": 0.1658, "step": 1338 }, { "epoch": 0.21694750486066106, "grad_norm": 1.1670085191726685, "learning_rate": 4.976587833749164e-06, "loss": 0.1604, "step": 1339 }, { "epoch": 0.21710952689565782, "grad_norm": 1.2441238164901733, "learning_rate": 4.9765280881833885e-06, "loss": 0.1674, "step": 1340 }, { "epoch": 0.21727154893065456, "grad_norm": 1.1147247552871704, "learning_rate": 4.976468266841846e-06, "loss": 0.1441, "step": 1341 }, { "epoch": 0.21743357096565133, "grad_norm": 1.1170032024383545, "learning_rate": 4.976408369726368e-06, "loss": 0.153, "step": 1342 }, { "epoch": 0.2175955930006481, "grad_norm": 1.2686792612075806, "learning_rate": 4.976348396838786e-06, "loss": 0.1759, "step": 1343 }, { "epoch": 0.21775761503564484, "grad_norm": 1.1871846914291382, "learning_rate": 4.976288348180935e-06, "loss": 0.1524, "step": 1344 }, { "epoch": 0.2179196370706416, "grad_norm": 1.2280350923538208, "learning_rate": 4.976228223754654e-06, "loss": 0.1729, "step": 1345 }, { "epoch": 0.21808165910563837, "grad_norm": 1.1394003629684448, "learning_rate": 4.976168023561782e-06, "loss": 0.1569, "step": 1346 }, { "epoch": 0.21824368114063514, "grad_norm": 1.1886025667190552, "learning_rate": 4.976107747604161e-06, "loss": 0.1539, "step": 1347 }, { "epoch": 0.21840570317563188, "grad_norm": 1.199474811553955, "learning_rate": 4.976047395883634e-06, "loss": 0.1721, "step": 1348 }, { "epoch": 0.21856772521062864, "grad_norm": 1.3025383949279785, "learning_rate": 4.975986968402048e-06, "loss": 0.1944, "step": 1349 }, { "epoch": 0.2187297472456254, "grad_norm": 1.2892481088638306, "learning_rate": 4.975926465161254e-06, "loss": 0.1628, "step": 1350 }, { "epoch": 0.21889176928062218, "grad_norm": 1.1720067262649536, "learning_rate": 4.975865886163101e-06, "loss": 0.1611, "step": 1351 }, { "epoch": 0.21905379131561892, "grad_norm": 1.284433364868164, "learning_rate": 4.975805231409444e-06, "loss": 0.1986, "step": 1352 }, { "epoch": 0.21921581335061568, "grad_norm": 1.3091256618499756, "learning_rate": 4.975744500902138e-06, "loss": 0.173, "step": 1353 }, { "epoch": 0.21937783538561245, "grad_norm": 1.1260159015655518, "learning_rate": 4.975683694643041e-06, "loss": 0.1485, "step": 1354 }, { "epoch": 0.2195398574206092, "grad_norm": 1.1246238946914673, "learning_rate": 4.975622812634014e-06, "loss": 0.1565, "step": 1355 }, { "epoch": 0.21970187945560596, "grad_norm": 1.2171014547348022, "learning_rate": 4.97556185487692e-06, "loss": 0.1701, "step": 1356 }, { "epoch": 0.21986390149060273, "grad_norm": 1.202234148979187, "learning_rate": 4.975500821373624e-06, "loss": 0.146, "step": 1357 }, { "epoch": 0.2200259235255995, "grad_norm": 1.1403602361679077, "learning_rate": 4.9754397121259935e-06, "loss": 0.1585, "step": 1358 }, { "epoch": 0.22018794556059623, "grad_norm": 1.2086261510849, "learning_rate": 4.975378527135899e-06, "loss": 0.166, "step": 1359 }, { "epoch": 0.220349967595593, "grad_norm": 1.2435903549194336, "learning_rate": 4.975317266405211e-06, "loss": 0.1783, "step": 1360 }, { "epoch": 0.22051198963058977, "grad_norm": 1.0744258165359497, "learning_rate": 4.975255929935805e-06, "loss": 0.1591, "step": 1361 }, { "epoch": 0.22067401166558653, "grad_norm": 1.2490445375442505, "learning_rate": 4.975194517729557e-06, "loss": 0.1816, "step": 1362 }, { "epoch": 0.22083603370058327, "grad_norm": 1.1408127546310425, "learning_rate": 4.975133029788347e-06, "loss": 0.1566, "step": 1363 }, { "epoch": 0.22099805573558004, "grad_norm": 1.2081525325775146, "learning_rate": 4.975071466114057e-06, "loss": 0.1567, "step": 1364 }, { "epoch": 0.2211600777705768, "grad_norm": 1.1004225015640259, "learning_rate": 4.975009826708568e-06, "loss": 0.1483, "step": 1365 }, { "epoch": 0.22132209980557355, "grad_norm": 1.3118537664413452, "learning_rate": 4.974948111573768e-06, "loss": 0.1818, "step": 1366 }, { "epoch": 0.2214841218405703, "grad_norm": 1.1786134243011475, "learning_rate": 4.974886320711546e-06, "loss": 0.1716, "step": 1367 }, { "epoch": 0.22164614387556708, "grad_norm": 1.2705552577972412, "learning_rate": 4.9748244541237915e-06, "loss": 0.1852, "step": 1368 }, { "epoch": 0.22180816591056385, "grad_norm": 1.1799037456512451, "learning_rate": 4.974762511812398e-06, "loss": 0.1414, "step": 1369 }, { "epoch": 0.2219701879455606, "grad_norm": 1.1992617845535278, "learning_rate": 4.97470049377926e-06, "loss": 0.1662, "step": 1370 }, { "epoch": 0.22213220998055735, "grad_norm": 1.1669738292694092, "learning_rate": 4.974638400026275e-06, "loss": 0.1544, "step": 1371 }, { "epoch": 0.22229423201555412, "grad_norm": 1.2163314819335938, "learning_rate": 4.974576230555344e-06, "loss": 0.1643, "step": 1372 }, { "epoch": 0.22245625405055086, "grad_norm": 1.3139532804489136, "learning_rate": 4.9745139853683685e-06, "loss": 0.1617, "step": 1373 }, { "epoch": 0.22261827608554763, "grad_norm": 1.1525776386260986, "learning_rate": 4.974451664467253e-06, "loss": 0.1752, "step": 1374 }, { "epoch": 0.2227802981205444, "grad_norm": 1.110068917274475, "learning_rate": 4.974389267853905e-06, "loss": 0.1511, "step": 1375 }, { "epoch": 0.22294232015554116, "grad_norm": 1.1273125410079956, "learning_rate": 4.974326795530234e-06, "loss": 0.1528, "step": 1376 }, { "epoch": 0.2231043421905379, "grad_norm": 1.3362386226654053, "learning_rate": 4.97426424749815e-06, "loss": 0.2061, "step": 1377 }, { "epoch": 0.22326636422553467, "grad_norm": 1.0781415700912476, "learning_rate": 4.974201623759568e-06, "loss": 0.1522, "step": 1378 }, { "epoch": 0.22342838626053144, "grad_norm": 1.1233181953430176, "learning_rate": 4.974138924316403e-06, "loss": 0.1601, "step": 1379 }, { "epoch": 0.2235904082955282, "grad_norm": 1.0733990669250488, "learning_rate": 4.974076149170575e-06, "loss": 0.1613, "step": 1380 }, { "epoch": 0.22375243033052494, "grad_norm": 0.9839252233505249, "learning_rate": 4.9740132983240036e-06, "loss": 0.1275, "step": 1381 }, { "epoch": 0.2239144523655217, "grad_norm": 1.2110671997070312, "learning_rate": 4.973950371778612e-06, "loss": 0.1764, "step": 1382 }, { "epoch": 0.22407647440051848, "grad_norm": 1.2692327499389648, "learning_rate": 4.973887369536326e-06, "loss": 0.1741, "step": 1383 }, { "epoch": 0.22423849643551522, "grad_norm": 1.226586937904358, "learning_rate": 4.973824291599074e-06, "loss": 0.1578, "step": 1384 }, { "epoch": 0.22440051847051198, "grad_norm": 1.1944162845611572, "learning_rate": 4.973761137968784e-06, "loss": 0.1543, "step": 1385 }, { "epoch": 0.22456254050550875, "grad_norm": 1.3723398447036743, "learning_rate": 4.973697908647391e-06, "loss": 0.1834, "step": 1386 }, { "epoch": 0.22472456254050552, "grad_norm": 1.196521520614624, "learning_rate": 4.973634603636828e-06, "loss": 0.1519, "step": 1387 }, { "epoch": 0.22488658457550226, "grad_norm": 1.104246735572815, "learning_rate": 4.973571222939031e-06, "loss": 0.1552, "step": 1388 }, { "epoch": 0.22504860661049902, "grad_norm": 1.137224555015564, "learning_rate": 4.973507766555941e-06, "loss": 0.1628, "step": 1389 }, { "epoch": 0.2252106286454958, "grad_norm": 1.2728477716445923, "learning_rate": 4.973444234489499e-06, "loss": 0.1687, "step": 1390 }, { "epoch": 0.22537265068049256, "grad_norm": 1.2384727001190186, "learning_rate": 4.97338062674165e-06, "loss": 0.1916, "step": 1391 }, { "epoch": 0.2255346727154893, "grad_norm": 1.280981183052063, "learning_rate": 4.973316943314338e-06, "loss": 0.1612, "step": 1392 }, { "epoch": 0.22569669475048607, "grad_norm": 1.1161082983016968, "learning_rate": 4.9732531842095135e-06, "loss": 0.1507, "step": 1393 }, { "epoch": 0.22585871678548283, "grad_norm": 1.3085819482803345, "learning_rate": 4.9731893494291275e-06, "loss": 0.192, "step": 1394 }, { "epoch": 0.22602073882047957, "grad_norm": 1.1078639030456543, "learning_rate": 4.973125438975131e-06, "loss": 0.1516, "step": 1395 }, { "epoch": 0.22618276085547634, "grad_norm": 1.1671632528305054, "learning_rate": 4.973061452849481e-06, "loss": 0.1693, "step": 1396 }, { "epoch": 0.2263447828904731, "grad_norm": 1.103091835975647, "learning_rate": 4.9729973910541365e-06, "loss": 0.1415, "step": 1397 }, { "epoch": 0.22650680492546987, "grad_norm": 1.1498920917510986, "learning_rate": 4.972933253591056e-06, "loss": 0.1562, "step": 1398 }, { "epoch": 0.2266688269604666, "grad_norm": 1.1860241889953613, "learning_rate": 4.972869040462202e-06, "loss": 0.1641, "step": 1399 }, { "epoch": 0.22683084899546338, "grad_norm": 1.2506409883499146, "learning_rate": 4.972804751669539e-06, "loss": 0.1622, "step": 1400 }, { "epoch": 0.22699287103046015, "grad_norm": 1.1482832431793213, "learning_rate": 4.9727403872150345e-06, "loss": 0.1506, "step": 1401 }, { "epoch": 0.22715489306545691, "grad_norm": 1.3503156900405884, "learning_rate": 4.972675947100659e-06, "loss": 0.2058, "step": 1402 }, { "epoch": 0.22731691510045365, "grad_norm": 1.2772489786148071, "learning_rate": 4.972611431328384e-06, "loss": 0.1633, "step": 1403 }, { "epoch": 0.22747893713545042, "grad_norm": 1.2101973295211792, "learning_rate": 4.972546839900181e-06, "loss": 0.1793, "step": 1404 }, { "epoch": 0.2276409591704472, "grad_norm": 1.225361943244934, "learning_rate": 4.972482172818029e-06, "loss": 0.174, "step": 1405 }, { "epoch": 0.22780298120544393, "grad_norm": 1.211272120475769, "learning_rate": 4.972417430083906e-06, "loss": 0.1821, "step": 1406 }, { "epoch": 0.2279650032404407, "grad_norm": 1.1716586351394653, "learning_rate": 4.9723526116997925e-06, "loss": 0.1567, "step": 1407 }, { "epoch": 0.22812702527543746, "grad_norm": 1.2331879138946533, "learning_rate": 4.972287717667672e-06, "loss": 0.1849, "step": 1408 }, { "epoch": 0.22828904731043423, "grad_norm": 1.1068236827850342, "learning_rate": 4.972222747989531e-06, "loss": 0.1506, "step": 1409 }, { "epoch": 0.22845106934543097, "grad_norm": 1.0929367542266846, "learning_rate": 4.972157702667356e-06, "loss": 0.1625, "step": 1410 }, { "epoch": 0.22861309138042774, "grad_norm": 1.1465107202529907, "learning_rate": 4.972092581703138e-06, "loss": 0.1676, "step": 1411 }, { "epoch": 0.2287751134154245, "grad_norm": 1.1976622343063354, "learning_rate": 4.972027385098868e-06, "loss": 0.1546, "step": 1412 }, { "epoch": 0.22893713545042127, "grad_norm": 1.201499104499817, "learning_rate": 4.971962112856543e-06, "loss": 0.1639, "step": 1413 }, { "epoch": 0.229099157485418, "grad_norm": 1.1784822940826416, "learning_rate": 4.97189676497816e-06, "loss": 0.1709, "step": 1414 }, { "epoch": 0.22926117952041478, "grad_norm": 1.2148628234863281, "learning_rate": 4.971831341465718e-06, "loss": 0.1641, "step": 1415 }, { "epoch": 0.22942320155541154, "grad_norm": 1.1798665523529053, "learning_rate": 4.971765842321218e-06, "loss": 0.1761, "step": 1416 }, { "epoch": 0.22958522359040828, "grad_norm": 1.0728332996368408, "learning_rate": 4.9717002675466645e-06, "loss": 0.1542, "step": 1417 }, { "epoch": 0.22974724562540505, "grad_norm": 1.094524621963501, "learning_rate": 4.971634617144065e-06, "loss": 0.1603, "step": 1418 }, { "epoch": 0.22990926766040182, "grad_norm": 1.1839401721954346, "learning_rate": 4.971568891115427e-06, "loss": 0.1517, "step": 1419 }, { "epoch": 0.23007128969539858, "grad_norm": 1.2270963191986084, "learning_rate": 4.971503089462762e-06, "loss": 0.1751, "step": 1420 }, { "epoch": 0.23023331173039532, "grad_norm": 1.342659831047058, "learning_rate": 4.971437212188084e-06, "loss": 0.1662, "step": 1421 }, { "epoch": 0.2303953337653921, "grad_norm": 1.0489001274108887, "learning_rate": 4.9713712592934075e-06, "loss": 0.1384, "step": 1422 }, { "epoch": 0.23055735580038886, "grad_norm": 1.2548742294311523, "learning_rate": 4.971305230780751e-06, "loss": 0.1829, "step": 1423 }, { "epoch": 0.23071937783538563, "grad_norm": 1.1241710186004639, "learning_rate": 4.971239126652135e-06, "loss": 0.1583, "step": 1424 }, { "epoch": 0.23088139987038236, "grad_norm": 1.1561769247055054, "learning_rate": 4.971172946909582e-06, "loss": 0.1747, "step": 1425 }, { "epoch": 0.23104342190537913, "grad_norm": 1.1572365760803223, "learning_rate": 4.971106691555116e-06, "loss": 0.1447, "step": 1426 }, { "epoch": 0.2312054439403759, "grad_norm": 1.766800880432129, "learning_rate": 4.971040360590767e-06, "loss": 0.1769, "step": 1427 }, { "epoch": 0.23136746597537264, "grad_norm": 1.1585261821746826, "learning_rate": 4.9709739540185616e-06, "loss": 0.1782, "step": 1428 }, { "epoch": 0.2315294880103694, "grad_norm": 0.9941073656082153, "learning_rate": 4.9709074718405335e-06, "loss": 0.1433, "step": 1429 }, { "epoch": 0.23169151004536617, "grad_norm": 1.3293631076812744, "learning_rate": 4.970840914058716e-06, "loss": 0.1575, "step": 1430 }, { "epoch": 0.23185353208036294, "grad_norm": 1.0774224996566772, "learning_rate": 4.970774280675146e-06, "loss": 0.1464, "step": 1431 }, { "epoch": 0.23201555411535968, "grad_norm": 1.1900949478149414, "learning_rate": 4.970707571691862e-06, "loss": 0.1418, "step": 1432 }, { "epoch": 0.23217757615035645, "grad_norm": 1.2267847061157227, "learning_rate": 4.9706407871109056e-06, "loss": 0.1644, "step": 1433 }, { "epoch": 0.2323395981853532, "grad_norm": 1.3006565570831299, "learning_rate": 4.970573926934319e-06, "loss": 0.1772, "step": 1434 }, { "epoch": 0.23250162022034998, "grad_norm": 1.324775218963623, "learning_rate": 4.97050699116415e-06, "loss": 0.1835, "step": 1435 }, { "epoch": 0.23266364225534672, "grad_norm": 1.1360862255096436, "learning_rate": 4.970439979802445e-06, "loss": 0.1578, "step": 1436 }, { "epoch": 0.2328256642903435, "grad_norm": 1.1616891622543335, "learning_rate": 4.970372892851255e-06, "loss": 0.1525, "step": 1437 }, { "epoch": 0.23298768632534025, "grad_norm": 1.2094240188598633, "learning_rate": 4.970305730312632e-06, "loss": 0.1678, "step": 1438 }, { "epoch": 0.233149708360337, "grad_norm": 1.3469047546386719, "learning_rate": 4.970238492188633e-06, "loss": 0.1741, "step": 1439 }, { "epoch": 0.23331173039533376, "grad_norm": 1.2809699773788452, "learning_rate": 4.9701711784813135e-06, "loss": 0.1724, "step": 1440 }, { "epoch": 0.23347375243033053, "grad_norm": 1.1660102605819702, "learning_rate": 4.970103789192734e-06, "loss": 0.1461, "step": 1441 }, { "epoch": 0.2336357744653273, "grad_norm": 1.1002432107925415, "learning_rate": 4.970036324324955e-06, "loss": 0.1546, "step": 1442 }, { "epoch": 0.23379779650032403, "grad_norm": 1.0596157312393188, "learning_rate": 4.9699687838800425e-06, "loss": 0.1511, "step": 1443 }, { "epoch": 0.2339598185353208, "grad_norm": 1.231737732887268, "learning_rate": 4.969901167860063e-06, "loss": 0.1683, "step": 1444 }, { "epoch": 0.23412184057031757, "grad_norm": 1.239596962928772, "learning_rate": 4.969833476267084e-06, "loss": 0.1763, "step": 1445 }, { "epoch": 0.23428386260531434, "grad_norm": 1.0822583436965942, "learning_rate": 4.969765709103177e-06, "loss": 0.1484, "step": 1446 }, { "epoch": 0.23444588464031108, "grad_norm": 1.0738798379898071, "learning_rate": 4.969697866370417e-06, "loss": 0.1508, "step": 1447 }, { "epoch": 0.23460790667530784, "grad_norm": 1.1625072956085205, "learning_rate": 4.9696299480708785e-06, "loss": 0.1582, "step": 1448 }, { "epoch": 0.2347699287103046, "grad_norm": 1.0520604848861694, "learning_rate": 4.969561954206641e-06, "loss": 0.1381, "step": 1449 }, { "epoch": 0.23493195074530135, "grad_norm": 1.2117533683776855, "learning_rate": 4.969493884779783e-06, "loss": 0.1823, "step": 1450 }, { "epoch": 0.23509397278029812, "grad_norm": 1.217532753944397, "learning_rate": 4.969425739792388e-06, "loss": 0.1778, "step": 1451 }, { "epoch": 0.23525599481529488, "grad_norm": 1.1805583238601685, "learning_rate": 4.969357519246542e-06, "loss": 0.164, "step": 1452 }, { "epoch": 0.23541801685029165, "grad_norm": 1.243613600730896, "learning_rate": 4.96928922314433e-06, "loss": 0.1634, "step": 1453 }, { "epoch": 0.2355800388852884, "grad_norm": 1.224203109741211, "learning_rate": 4.9692208514878445e-06, "loss": 0.1688, "step": 1454 }, { "epoch": 0.23574206092028516, "grad_norm": 1.2090548276901245, "learning_rate": 4.9691524042791765e-06, "loss": 0.178, "step": 1455 }, { "epoch": 0.23590408295528192, "grad_norm": 1.1282353401184082, "learning_rate": 4.96908388152042e-06, "loss": 0.1615, "step": 1456 }, { "epoch": 0.2360661049902787, "grad_norm": 1.0806899070739746, "learning_rate": 4.9690152832136715e-06, "loss": 0.1528, "step": 1457 }, { "epoch": 0.23622812702527543, "grad_norm": 1.3501883745193481, "learning_rate": 4.968946609361031e-06, "loss": 0.1745, "step": 1458 }, { "epoch": 0.2363901490602722, "grad_norm": 1.1516779661178589, "learning_rate": 4.968877859964598e-06, "loss": 0.1427, "step": 1459 }, { "epoch": 0.23655217109526896, "grad_norm": 1.0786315202713013, "learning_rate": 4.968809035026477e-06, "loss": 0.1552, "step": 1460 }, { "epoch": 0.2367141931302657, "grad_norm": 1.1482332944869995, "learning_rate": 4.968740134548775e-06, "loss": 0.168, "step": 1461 }, { "epoch": 0.23687621516526247, "grad_norm": 1.3289909362792969, "learning_rate": 4.968671158533599e-06, "loss": 0.1726, "step": 1462 }, { "epoch": 0.23703823720025924, "grad_norm": 1.1058714389801025, "learning_rate": 4.968602106983059e-06, "loss": 0.1569, "step": 1463 }, { "epoch": 0.237200259235256, "grad_norm": 1.113430142402649, "learning_rate": 4.968532979899269e-06, "loss": 0.1542, "step": 1464 }, { "epoch": 0.23736228127025275, "grad_norm": 1.465322732925415, "learning_rate": 4.968463777284342e-06, "loss": 0.1846, "step": 1465 }, { "epoch": 0.2375243033052495, "grad_norm": 1.1434563398361206, "learning_rate": 4.9683944991403985e-06, "loss": 0.1574, "step": 1466 }, { "epoch": 0.23768632534024628, "grad_norm": 1.3530895709991455, "learning_rate": 4.9683251454695565e-06, "loss": 0.1646, "step": 1467 }, { "epoch": 0.23784834737524305, "grad_norm": 1.193445086479187, "learning_rate": 4.968255716273938e-06, "loss": 0.1696, "step": 1468 }, { "epoch": 0.23801036941023979, "grad_norm": 1.2225266695022583, "learning_rate": 4.968186211555668e-06, "loss": 0.1669, "step": 1469 }, { "epoch": 0.23817239144523655, "grad_norm": 1.0870074033737183, "learning_rate": 4.968116631316873e-06, "loss": 0.1507, "step": 1470 }, { "epoch": 0.23833441348023332, "grad_norm": 1.3210666179656982, "learning_rate": 4.968046975559681e-06, "loss": 0.1747, "step": 1471 }, { "epoch": 0.23849643551523006, "grad_norm": 1.0983004570007324, "learning_rate": 4.967977244286225e-06, "loss": 0.1512, "step": 1472 }, { "epoch": 0.23865845755022683, "grad_norm": 1.1378045082092285, "learning_rate": 4.9679074374986365e-06, "loss": 0.1599, "step": 1473 }, { "epoch": 0.2388204795852236, "grad_norm": 0.968702495098114, "learning_rate": 4.967837555199054e-06, "loss": 0.1294, "step": 1474 }, { "epoch": 0.23898250162022036, "grad_norm": 1.1442790031433105, "learning_rate": 4.967767597389613e-06, "loss": 0.1666, "step": 1475 }, { "epoch": 0.2391445236552171, "grad_norm": 1.2723668813705444, "learning_rate": 4.967697564072457e-06, "loss": 0.1767, "step": 1476 }, { "epoch": 0.23930654569021387, "grad_norm": 1.1569876670837402, "learning_rate": 4.967627455249726e-06, "loss": 0.1688, "step": 1477 }, { "epoch": 0.23946856772521063, "grad_norm": 1.2262758016586304, "learning_rate": 4.9675572709235665e-06, "loss": 0.1873, "step": 1478 }, { "epoch": 0.2396305897602074, "grad_norm": 1.2375082969665527, "learning_rate": 4.967487011096126e-06, "loss": 0.1713, "step": 1479 }, { "epoch": 0.23979261179520414, "grad_norm": 1.2592830657958984, "learning_rate": 4.967416675769555e-06, "loss": 0.1847, "step": 1480 }, { "epoch": 0.2399546338302009, "grad_norm": 1.3204195499420166, "learning_rate": 4.9673462649460045e-06, "loss": 0.1656, "step": 1481 }, { "epoch": 0.24011665586519768, "grad_norm": 1.1137760877609253, "learning_rate": 4.967275778627628e-06, "loss": 0.1659, "step": 1482 }, { "epoch": 0.24027867790019442, "grad_norm": 1.2828960418701172, "learning_rate": 4.967205216816584e-06, "loss": 0.1922, "step": 1483 }, { "epoch": 0.24044069993519118, "grad_norm": 1.2552210092544556, "learning_rate": 4.967134579515032e-06, "loss": 0.1616, "step": 1484 }, { "epoch": 0.24060272197018795, "grad_norm": 1.251631259918213, "learning_rate": 4.96706386672513e-06, "loss": 0.1807, "step": 1485 }, { "epoch": 0.24076474400518472, "grad_norm": 1.195192575454712, "learning_rate": 4.966993078449046e-06, "loss": 0.1639, "step": 1486 }, { "epoch": 0.24092676604018146, "grad_norm": 1.0965896844863892, "learning_rate": 4.966922214688943e-06, "loss": 0.1566, "step": 1487 }, { "epoch": 0.24108878807517822, "grad_norm": 1.3898707628250122, "learning_rate": 4.96685127544699e-06, "loss": 0.1726, "step": 1488 }, { "epoch": 0.241250810110175, "grad_norm": 1.1594412326812744, "learning_rate": 4.966780260725357e-06, "loss": 0.1645, "step": 1489 }, { "epoch": 0.24141283214517173, "grad_norm": 1.1280404329299927, "learning_rate": 4.966709170526219e-06, "loss": 0.1496, "step": 1490 }, { "epoch": 0.2415748541801685, "grad_norm": 1.0832349061965942, "learning_rate": 4.966638004851748e-06, "loss": 0.1577, "step": 1491 }, { "epoch": 0.24173687621516526, "grad_norm": 1.2211941480636597, "learning_rate": 4.966566763704124e-06, "loss": 0.1856, "step": 1492 }, { "epoch": 0.24189889825016203, "grad_norm": 1.1475859880447388, "learning_rate": 4.9664954470855265e-06, "loss": 0.1602, "step": 1493 }, { "epoch": 0.24206092028515877, "grad_norm": 1.326402187347412, "learning_rate": 4.966424054998137e-06, "loss": 0.2013, "step": 1494 }, { "epoch": 0.24222294232015554, "grad_norm": 1.3355193138122559, "learning_rate": 4.96635258744414e-06, "loss": 0.1864, "step": 1495 }, { "epoch": 0.2423849643551523, "grad_norm": 1.110781192779541, "learning_rate": 4.966281044425722e-06, "loss": 0.1592, "step": 1496 }, { "epoch": 0.24254698639014907, "grad_norm": 1.1372196674346924, "learning_rate": 4.966209425945072e-06, "loss": 0.1666, "step": 1497 }, { "epoch": 0.2427090084251458, "grad_norm": 1.1487059593200684, "learning_rate": 4.9661377320043815e-06, "loss": 0.167, "step": 1498 }, { "epoch": 0.24287103046014258, "grad_norm": 1.1017019748687744, "learning_rate": 4.966065962605845e-06, "loss": 0.1661, "step": 1499 }, { "epoch": 0.24303305249513935, "grad_norm": 1.0528111457824707, "learning_rate": 4.965994117751658e-06, "loss": 0.1547, "step": 1500 }, { "epoch": 0.24319507453013609, "grad_norm": 1.1754587888717651, "learning_rate": 4.965922197444017e-06, "loss": 0.1734, "step": 1501 }, { "epoch": 0.24335709656513285, "grad_norm": 1.20737886428833, "learning_rate": 4.965850201685126e-06, "loss": 0.1809, "step": 1502 }, { "epoch": 0.24351911860012962, "grad_norm": 1.1585618257522583, "learning_rate": 4.965778130477185e-06, "loss": 0.1683, "step": 1503 }, { "epoch": 0.2436811406351264, "grad_norm": 1.1678732633590698, "learning_rate": 4.965705983822401e-06, "loss": 0.1567, "step": 1504 }, { "epoch": 0.24384316267012313, "grad_norm": 1.127969741821289, "learning_rate": 4.965633761722981e-06, "loss": 0.1636, "step": 1505 }, { "epoch": 0.2440051847051199, "grad_norm": 1.1678990125656128, "learning_rate": 4.965561464181134e-06, "loss": 0.1616, "step": 1506 }, { "epoch": 0.24416720674011666, "grad_norm": 1.166166067123413, "learning_rate": 4.965489091199073e-06, "loss": 0.1483, "step": 1507 }, { "epoch": 0.24432922877511343, "grad_norm": 1.092760443687439, "learning_rate": 4.965416642779012e-06, "loss": 0.1541, "step": 1508 }, { "epoch": 0.24449125081011017, "grad_norm": 1.230621099472046, "learning_rate": 4.965344118923168e-06, "loss": 0.1603, "step": 1509 }, { "epoch": 0.24465327284510693, "grad_norm": 1.213835597038269, "learning_rate": 4.96527151963376e-06, "loss": 0.1719, "step": 1510 }, { "epoch": 0.2448152948801037, "grad_norm": 1.032114863395691, "learning_rate": 4.965198844913009e-06, "loss": 0.1459, "step": 1511 }, { "epoch": 0.24497731691510044, "grad_norm": 1.3471325635910034, "learning_rate": 4.9651260947631395e-06, "loss": 0.1784, "step": 1512 }, { "epoch": 0.2451393389500972, "grad_norm": 1.0477313995361328, "learning_rate": 4.965053269186378e-06, "loss": 0.1542, "step": 1513 }, { "epoch": 0.24530136098509397, "grad_norm": 1.0779989957809448, "learning_rate": 4.9649803681849495e-06, "loss": 0.1431, "step": 1514 }, { "epoch": 0.24546338302009074, "grad_norm": 1.123201847076416, "learning_rate": 4.964907391761088e-06, "loss": 0.1537, "step": 1515 }, { "epoch": 0.24562540505508748, "grad_norm": 1.2395498752593994, "learning_rate": 4.9648343399170254e-06, "loss": 0.1803, "step": 1516 }, { "epoch": 0.24578742709008425, "grad_norm": 1.1589041948318481, "learning_rate": 4.964761212654997e-06, "loss": 0.1604, "step": 1517 }, { "epoch": 0.24594944912508102, "grad_norm": 1.2080649137496948, "learning_rate": 4.964688009977239e-06, "loss": 0.1731, "step": 1518 }, { "epoch": 0.24611147116007778, "grad_norm": 1.0178292989730835, "learning_rate": 4.964614731885994e-06, "loss": 0.1335, "step": 1519 }, { "epoch": 0.24627349319507452, "grad_norm": 1.047829508781433, "learning_rate": 4.9645413783835006e-06, "loss": 0.1516, "step": 1520 }, { "epoch": 0.2464355152300713, "grad_norm": 1.1087828874588013, "learning_rate": 4.964467949472006e-06, "loss": 0.1596, "step": 1521 }, { "epoch": 0.24659753726506806, "grad_norm": 1.0549379587173462, "learning_rate": 4.964394445153756e-06, "loss": 0.1287, "step": 1522 }, { "epoch": 0.2467595593000648, "grad_norm": 1.2687568664550781, "learning_rate": 4.964320865431001e-06, "loss": 0.1743, "step": 1523 }, { "epoch": 0.24692158133506156, "grad_norm": 1.0614700317382812, "learning_rate": 4.964247210305989e-06, "loss": 0.1444, "step": 1524 }, { "epoch": 0.24708360337005833, "grad_norm": 1.253021478652954, "learning_rate": 4.964173479780976e-06, "loss": 0.15, "step": 1525 }, { "epoch": 0.2472456254050551, "grad_norm": 1.270143747329712, "learning_rate": 4.964099673858219e-06, "loss": 0.1641, "step": 1526 }, { "epoch": 0.24740764744005184, "grad_norm": 1.1180580854415894, "learning_rate": 4.964025792539974e-06, "loss": 0.1566, "step": 1527 }, { "epoch": 0.2475696694750486, "grad_norm": 1.1324844360351562, "learning_rate": 4.963951835828503e-06, "loss": 0.1518, "step": 1528 }, { "epoch": 0.24773169151004537, "grad_norm": 1.225598931312561, "learning_rate": 4.963877803726068e-06, "loss": 0.1681, "step": 1529 }, { "epoch": 0.24789371354504214, "grad_norm": 1.3392179012298584, "learning_rate": 4.963803696234935e-06, "loss": 0.1815, "step": 1530 }, { "epoch": 0.24805573558003888, "grad_norm": 1.1699576377868652, "learning_rate": 4.963729513357372e-06, "loss": 0.164, "step": 1531 }, { "epoch": 0.24821775761503564, "grad_norm": 1.1221762895584106, "learning_rate": 4.9636552550956465e-06, "loss": 0.1688, "step": 1532 }, { "epoch": 0.2483797796500324, "grad_norm": 1.1934576034545898, "learning_rate": 4.963580921452033e-06, "loss": 0.1814, "step": 1533 }, { "epoch": 0.24854180168502915, "grad_norm": 1.0234527587890625, "learning_rate": 4.963506512428804e-06, "loss": 0.1457, "step": 1534 }, { "epoch": 0.24870382372002592, "grad_norm": 1.1598873138427734, "learning_rate": 4.963432028028239e-06, "loss": 0.1653, "step": 1535 }, { "epoch": 0.24886584575502269, "grad_norm": 1.1791259050369263, "learning_rate": 4.963357468252614e-06, "loss": 0.1833, "step": 1536 }, { "epoch": 0.24902786779001945, "grad_norm": 1.2526721954345703, "learning_rate": 4.9632828331042124e-06, "loss": 0.1743, "step": 1537 }, { "epoch": 0.2491898898250162, "grad_norm": 1.1005678176879883, "learning_rate": 4.9632081225853165e-06, "loss": 0.179, "step": 1538 }, { "epoch": 0.24935191186001296, "grad_norm": 1.1455602645874023, "learning_rate": 4.963133336698214e-06, "loss": 0.17, "step": 1539 }, { "epoch": 0.24951393389500973, "grad_norm": 1.3969601392745972, "learning_rate": 4.9630584754451906e-06, "loss": 0.1942, "step": 1540 }, { "epoch": 0.2496759559300065, "grad_norm": 1.1257600784301758, "learning_rate": 4.962983538828539e-06, "loss": 0.1532, "step": 1541 }, { "epoch": 0.24983797796500323, "grad_norm": 1.151532769203186, "learning_rate": 4.962908526850552e-06, "loss": 0.1781, "step": 1542 }, { "epoch": 0.25, "grad_norm": 1.063228964805603, "learning_rate": 4.962833439513524e-06, "loss": 0.157, "step": 1543 }, { "epoch": 0.25016202203499677, "grad_norm": 1.101438283920288, "learning_rate": 4.962758276819752e-06, "loss": 0.155, "step": 1544 }, { "epoch": 0.25032404406999353, "grad_norm": 1.0311859846115112, "learning_rate": 4.9626830387715365e-06, "loss": 0.1524, "step": 1545 }, { "epoch": 0.2504860661049903, "grad_norm": 1.2852880954742432, "learning_rate": 4.9626077253711805e-06, "loss": 0.1775, "step": 1546 }, { "epoch": 0.250648088139987, "grad_norm": 1.198421835899353, "learning_rate": 4.962532336620987e-06, "loss": 0.1595, "step": 1547 }, { "epoch": 0.2508101101749838, "grad_norm": 1.1567708253860474, "learning_rate": 4.962456872523263e-06, "loss": 0.1514, "step": 1548 }, { "epoch": 0.25097213220998055, "grad_norm": 1.1411212682724, "learning_rate": 4.9623813330803174e-06, "loss": 0.1753, "step": 1549 }, { "epoch": 0.2511341542449773, "grad_norm": 1.034947395324707, "learning_rate": 4.962305718294462e-06, "loss": 0.1484, "step": 1550 }, { "epoch": 0.2512961762799741, "grad_norm": 1.2145742177963257, "learning_rate": 4.962230028168011e-06, "loss": 0.1791, "step": 1551 }, { "epoch": 0.25145819831497085, "grad_norm": 1.173612356185913, "learning_rate": 4.96215426270328e-06, "loss": 0.1555, "step": 1552 }, { "epoch": 0.2516202203499676, "grad_norm": 1.1682580709457397, "learning_rate": 4.9620784219025855e-06, "loss": 0.1707, "step": 1553 }, { "epoch": 0.2517822423849643, "grad_norm": 1.1669775247573853, "learning_rate": 4.962002505768251e-06, "loss": 0.172, "step": 1554 }, { "epoch": 0.2519442644199611, "grad_norm": 1.1306649446487427, "learning_rate": 4.961926514302597e-06, "loss": 0.1706, "step": 1555 }, { "epoch": 0.25210628645495786, "grad_norm": 1.0562387704849243, "learning_rate": 4.961850447507948e-06, "loss": 0.1599, "step": 1556 }, { "epoch": 0.25226830848995463, "grad_norm": 1.000566005706787, "learning_rate": 4.961774305386634e-06, "loss": 0.142, "step": 1557 }, { "epoch": 0.2524303305249514, "grad_norm": 1.1344536542892456, "learning_rate": 4.961698087940984e-06, "loss": 0.1568, "step": 1558 }, { "epoch": 0.25259235255994816, "grad_norm": 1.1511046886444092, "learning_rate": 4.961621795173329e-06, "loss": 0.1594, "step": 1559 }, { "epoch": 0.25275437459494493, "grad_norm": 1.1231741905212402, "learning_rate": 4.961545427086006e-06, "loss": 0.1734, "step": 1560 }, { "epoch": 0.2529163966299417, "grad_norm": 1.2015700340270996, "learning_rate": 4.961468983681347e-06, "loss": 0.1664, "step": 1561 }, { "epoch": 0.2530784186649384, "grad_norm": 1.1765875816345215, "learning_rate": 4.961392464961695e-06, "loss": 0.1562, "step": 1562 }, { "epoch": 0.2532404406999352, "grad_norm": 1.1523048877716064, "learning_rate": 4.96131587092939e-06, "loss": 0.1658, "step": 1563 }, { "epoch": 0.25340246273493194, "grad_norm": 1.113807201385498, "learning_rate": 4.961239201586776e-06, "loss": 0.1567, "step": 1564 }, { "epoch": 0.2535644847699287, "grad_norm": 1.1596148014068604, "learning_rate": 4.961162456936199e-06, "loss": 0.1667, "step": 1565 }, { "epoch": 0.2537265068049255, "grad_norm": 1.4176827669143677, "learning_rate": 4.961085636980005e-06, "loss": 0.2077, "step": 1566 }, { "epoch": 0.25388852883992225, "grad_norm": 1.307410717010498, "learning_rate": 4.961008741720546e-06, "loss": 0.1839, "step": 1567 }, { "epoch": 0.254050550874919, "grad_norm": 1.387787938117981, "learning_rate": 4.960931771160177e-06, "loss": 0.1534, "step": 1568 }, { "epoch": 0.2542125729099157, "grad_norm": 1.2094498872756958, "learning_rate": 4.96085472530125e-06, "loss": 0.1716, "step": 1569 }, { "epoch": 0.2543745949449125, "grad_norm": 1.3138521909713745, "learning_rate": 4.960777604146124e-06, "loss": 0.1826, "step": 1570 }, { "epoch": 0.25453661697990926, "grad_norm": 1.0649223327636719, "learning_rate": 4.960700407697158e-06, "loss": 0.1628, "step": 1571 }, { "epoch": 0.254698639014906, "grad_norm": 1.0295525789260864, "learning_rate": 4.9606231359567146e-06, "loss": 0.1441, "step": 1572 }, { "epoch": 0.2548606610499028, "grad_norm": 1.0389995574951172, "learning_rate": 4.960545788927158e-06, "loss": 0.1559, "step": 1573 }, { "epoch": 0.25502268308489956, "grad_norm": 1.1343345642089844, "learning_rate": 4.960468366610854e-06, "loss": 0.1656, "step": 1574 }, { "epoch": 0.2551847051198963, "grad_norm": 1.055141568183899, "learning_rate": 4.960390869010172e-06, "loss": 0.1614, "step": 1575 }, { "epoch": 0.25534672715489304, "grad_norm": 1.1201953887939453, "learning_rate": 4.960313296127485e-06, "loss": 0.1514, "step": 1576 }, { "epoch": 0.2555087491898898, "grad_norm": 1.041771650314331, "learning_rate": 4.960235647965163e-06, "loss": 0.1388, "step": 1577 }, { "epoch": 0.2556707712248866, "grad_norm": 1.19291353225708, "learning_rate": 4.960157924525585e-06, "loss": 0.1508, "step": 1578 }, { "epoch": 0.25583279325988334, "grad_norm": 1.1116414070129395, "learning_rate": 4.960080125811127e-06, "loss": 0.1493, "step": 1579 }, { "epoch": 0.2559948152948801, "grad_norm": 1.1531809568405151, "learning_rate": 4.960002251824172e-06, "loss": 0.1732, "step": 1580 }, { "epoch": 0.2561568373298769, "grad_norm": 1.1807605028152466, "learning_rate": 4.9599243025671e-06, "loss": 0.1722, "step": 1581 }, { "epoch": 0.25631885936487364, "grad_norm": 1.049898624420166, "learning_rate": 4.959846278042298e-06, "loss": 0.1554, "step": 1582 }, { "epoch": 0.2564808813998704, "grad_norm": 1.3067052364349365, "learning_rate": 4.959768178252152e-06, "loss": 0.194, "step": 1583 }, { "epoch": 0.2566429034348671, "grad_norm": 1.1992570161819458, "learning_rate": 4.959690003199052e-06, "loss": 0.1739, "step": 1584 }, { "epoch": 0.2568049254698639, "grad_norm": 1.368794560432434, "learning_rate": 4.959611752885392e-06, "loss": 0.174, "step": 1585 }, { "epoch": 0.25696694750486065, "grad_norm": 1.3984448909759521, "learning_rate": 4.959533427313562e-06, "loss": 0.1909, "step": 1586 }, { "epoch": 0.2571289695398574, "grad_norm": 1.0769413709640503, "learning_rate": 4.959455026485963e-06, "loss": 0.1573, "step": 1587 }, { "epoch": 0.2572909915748542, "grad_norm": 1.1236642599105835, "learning_rate": 4.95937655040499e-06, "loss": 0.1559, "step": 1588 }, { "epoch": 0.25745301360985096, "grad_norm": 1.0431153774261475, "learning_rate": 4.959297999073048e-06, "loss": 0.1562, "step": 1589 }, { "epoch": 0.2576150356448477, "grad_norm": 1.1195615530014038, "learning_rate": 4.959219372492539e-06, "loss": 0.1543, "step": 1590 }, { "epoch": 0.25777705767984443, "grad_norm": 1.070062518119812, "learning_rate": 4.959140670665867e-06, "loss": 0.1548, "step": 1591 }, { "epoch": 0.2579390797148412, "grad_norm": 1.1161715984344482, "learning_rate": 4.9590618935954415e-06, "loss": 0.1677, "step": 1592 }, { "epoch": 0.25810110174983797, "grad_norm": 1.1165508031845093, "learning_rate": 4.958983041283674e-06, "loss": 0.1623, "step": 1593 }, { "epoch": 0.25826312378483474, "grad_norm": 1.2309761047363281, "learning_rate": 4.958904113732975e-06, "loss": 0.1937, "step": 1594 }, { "epoch": 0.2584251458198315, "grad_norm": 1.0280898809432983, "learning_rate": 4.958825110945761e-06, "loss": 0.145, "step": 1595 }, { "epoch": 0.25858716785482827, "grad_norm": 1.1085617542266846, "learning_rate": 4.958746032924449e-06, "loss": 0.1624, "step": 1596 }, { "epoch": 0.25874918988982504, "grad_norm": 1.162490963935852, "learning_rate": 4.958666879671458e-06, "loss": 0.1569, "step": 1597 }, { "epoch": 0.25891121192482175, "grad_norm": 1.267038106918335, "learning_rate": 4.95858765118921e-06, "loss": 0.1804, "step": 1598 }, { "epoch": 0.2590732339598185, "grad_norm": 1.0841212272644043, "learning_rate": 4.9585083474801296e-06, "loss": 0.1546, "step": 1599 }, { "epoch": 0.2592352559948153, "grad_norm": 1.171931266784668, "learning_rate": 4.9584289685466444e-06, "loss": 0.1688, "step": 1600 }, { "epoch": 0.25939727802981205, "grad_norm": 1.20844304561615, "learning_rate": 4.9583495143911804e-06, "loss": 0.1752, "step": 1601 }, { "epoch": 0.2595593000648088, "grad_norm": 1.2079849243164062, "learning_rate": 4.958269985016172e-06, "loss": 0.1573, "step": 1602 }, { "epoch": 0.2597213220998056, "grad_norm": 1.101150631904602, "learning_rate": 4.95819038042405e-06, "loss": 0.1602, "step": 1603 }, { "epoch": 0.25988334413480235, "grad_norm": 1.0884321928024292, "learning_rate": 4.958110700617251e-06, "loss": 0.1451, "step": 1604 }, { "epoch": 0.2600453661697991, "grad_norm": 1.1064330339431763, "learning_rate": 4.958030945598213e-06, "loss": 0.1497, "step": 1605 }, { "epoch": 0.26020738820479583, "grad_norm": 1.055196762084961, "learning_rate": 4.957951115369378e-06, "loss": 0.1493, "step": 1606 }, { "epoch": 0.2603694102397926, "grad_norm": 1.1254777908325195, "learning_rate": 4.957871209933185e-06, "loss": 0.156, "step": 1607 }, { "epoch": 0.26053143227478937, "grad_norm": 1.1287553310394287, "learning_rate": 4.957791229292082e-06, "loss": 0.1537, "step": 1608 }, { "epoch": 0.26069345430978613, "grad_norm": 1.1099610328674316, "learning_rate": 4.957711173448515e-06, "loss": 0.1447, "step": 1609 }, { "epoch": 0.2608554763447829, "grad_norm": 1.1488322019577026, "learning_rate": 4.957631042404934e-06, "loss": 0.1814, "step": 1610 }, { "epoch": 0.26101749837977967, "grad_norm": 1.169036626815796, "learning_rate": 4.957550836163789e-06, "loss": 0.1601, "step": 1611 }, { "epoch": 0.26117952041477643, "grad_norm": 1.2050291299819946, "learning_rate": 4.957470554727536e-06, "loss": 0.1633, "step": 1612 }, { "epoch": 0.26134154244977315, "grad_norm": 1.1404060125350952, "learning_rate": 4.9573901980986315e-06, "loss": 0.1632, "step": 1613 }, { "epoch": 0.2615035644847699, "grad_norm": 1.3234813213348389, "learning_rate": 4.9573097662795344e-06, "loss": 0.1938, "step": 1614 }, { "epoch": 0.2616655865197667, "grad_norm": 1.1954011917114258, "learning_rate": 4.957229259272705e-06, "loss": 0.1611, "step": 1615 }, { "epoch": 0.26182760855476345, "grad_norm": 1.1279500722885132, "learning_rate": 4.957148677080605e-06, "loss": 0.1471, "step": 1616 }, { "epoch": 0.2619896305897602, "grad_norm": 1.2045884132385254, "learning_rate": 4.957068019705703e-06, "loss": 0.1625, "step": 1617 }, { "epoch": 0.262151652624757, "grad_norm": 1.2116997241973877, "learning_rate": 4.956987287150465e-06, "loss": 0.1822, "step": 1618 }, { "epoch": 0.26231367465975375, "grad_norm": 1.1517181396484375, "learning_rate": 4.956906479417361e-06, "loss": 0.1566, "step": 1619 }, { "epoch": 0.26247569669475046, "grad_norm": 1.1915812492370605, "learning_rate": 4.956825596508867e-06, "loss": 0.1808, "step": 1620 }, { "epoch": 0.2626377187297472, "grad_norm": 1.0590485334396362, "learning_rate": 4.9567446384274525e-06, "loss": 0.153, "step": 1621 }, { "epoch": 0.262799740764744, "grad_norm": 1.147382140159607, "learning_rate": 4.956663605175599e-06, "loss": 0.1621, "step": 1622 }, { "epoch": 0.26296176279974076, "grad_norm": 1.2645186185836792, "learning_rate": 4.956582496755783e-06, "loss": 0.1875, "step": 1623 }, { "epoch": 0.26312378483473753, "grad_norm": 1.0937161445617676, "learning_rate": 4.956501313170487e-06, "loss": 0.1671, "step": 1624 }, { "epoch": 0.2632858068697343, "grad_norm": 1.2296099662780762, "learning_rate": 4.956420054422197e-06, "loss": 0.1887, "step": 1625 }, { "epoch": 0.26344782890473106, "grad_norm": 1.030358910560608, "learning_rate": 4.956338720513397e-06, "loss": 0.1567, "step": 1626 }, { "epoch": 0.26360985093972783, "grad_norm": 1.3956369161605835, "learning_rate": 4.956257311446576e-06, "loss": 0.1818, "step": 1627 }, { "epoch": 0.26377187297472454, "grad_norm": 1.2601412534713745, "learning_rate": 4.956175827224226e-06, "loss": 0.1668, "step": 1628 }, { "epoch": 0.2639338950097213, "grad_norm": 1.13845694065094, "learning_rate": 4.956094267848839e-06, "loss": 0.1688, "step": 1629 }, { "epoch": 0.2640959170447181, "grad_norm": 1.1298080682754517, "learning_rate": 4.956012633322912e-06, "loss": 0.1605, "step": 1630 }, { "epoch": 0.26425793907971484, "grad_norm": 1.1088753938674927, "learning_rate": 4.955930923648941e-06, "loss": 0.1386, "step": 1631 }, { "epoch": 0.2644199611147116, "grad_norm": 1.1304455995559692, "learning_rate": 4.955849138829428e-06, "loss": 0.1563, "step": 1632 }, { "epoch": 0.2645819831497084, "grad_norm": 1.1293634176254272, "learning_rate": 4.955767278866872e-06, "loss": 0.1387, "step": 1633 }, { "epoch": 0.26474400518470514, "grad_norm": 1.0126081705093384, "learning_rate": 4.955685343763782e-06, "loss": 0.1385, "step": 1634 }, { "epoch": 0.26490602721970186, "grad_norm": 1.0980356931686401, "learning_rate": 4.955603333522663e-06, "loss": 0.1575, "step": 1635 }, { "epoch": 0.2650680492546986, "grad_norm": 1.0100072622299194, "learning_rate": 4.9555212481460245e-06, "loss": 0.1492, "step": 1636 }, { "epoch": 0.2652300712896954, "grad_norm": 1.275562047958374, "learning_rate": 4.955439087636378e-06, "loss": 0.1708, "step": 1637 }, { "epoch": 0.26539209332469216, "grad_norm": 1.1762036085128784, "learning_rate": 4.955356851996236e-06, "loss": 0.1598, "step": 1638 }, { "epoch": 0.2655541153596889, "grad_norm": 1.199687123298645, "learning_rate": 4.9552745412281175e-06, "loss": 0.1696, "step": 1639 }, { "epoch": 0.2657161373946857, "grad_norm": 1.060361385345459, "learning_rate": 4.955192155334539e-06, "loss": 0.1561, "step": 1640 }, { "epoch": 0.26587815942968246, "grad_norm": 1.3154797554016113, "learning_rate": 4.955109694318024e-06, "loss": 0.1891, "step": 1641 }, { "epoch": 0.26604018146467917, "grad_norm": 1.0718739032745361, "learning_rate": 4.955027158181092e-06, "loss": 0.1487, "step": 1642 }, { "epoch": 0.26620220349967594, "grad_norm": 1.0659395456314087, "learning_rate": 4.95494454692627e-06, "loss": 0.15, "step": 1643 }, { "epoch": 0.2663642255346727, "grad_norm": 1.4435391426086426, "learning_rate": 4.9548618605560855e-06, "loss": 0.1911, "step": 1644 }, { "epoch": 0.26652624756966947, "grad_norm": 1.1285501718521118, "learning_rate": 4.954779099073069e-06, "loss": 0.1573, "step": 1645 }, { "epoch": 0.26668826960466624, "grad_norm": 1.2026793956756592, "learning_rate": 4.954696262479753e-06, "loss": 0.1823, "step": 1646 }, { "epoch": 0.266850291639663, "grad_norm": 1.1052802801132202, "learning_rate": 4.954613350778671e-06, "loss": 0.1497, "step": 1647 }, { "epoch": 0.2670123136746598, "grad_norm": 1.0569524765014648, "learning_rate": 4.954530363972361e-06, "loss": 0.1615, "step": 1648 }, { "epoch": 0.26717433570965654, "grad_norm": 1.3057336807250977, "learning_rate": 4.954447302063362e-06, "loss": 0.1856, "step": 1649 }, { "epoch": 0.26733635774465325, "grad_norm": 1.0749009847640991, "learning_rate": 4.954364165054214e-06, "loss": 0.169, "step": 1650 }, { "epoch": 0.26749837977965, "grad_norm": 1.1229180097579956, "learning_rate": 4.954280952947463e-06, "loss": 0.1572, "step": 1651 }, { "epoch": 0.2676604018146468, "grad_norm": 1.134561538696289, "learning_rate": 4.9541976657456535e-06, "loss": 0.1618, "step": 1652 }, { "epoch": 0.26782242384964355, "grad_norm": 1.229466199874878, "learning_rate": 4.954114303451335e-06, "loss": 0.1516, "step": 1653 }, { "epoch": 0.2679844458846403, "grad_norm": 1.3184914588928223, "learning_rate": 4.954030866067057e-06, "loss": 0.1565, "step": 1654 }, { "epoch": 0.2681464679196371, "grad_norm": 1.1177244186401367, "learning_rate": 4.953947353595374e-06, "loss": 0.1492, "step": 1655 }, { "epoch": 0.26830848995463386, "grad_norm": 1.136872410774231, "learning_rate": 4.95386376603884e-06, "loss": 0.1586, "step": 1656 }, { "epoch": 0.26847051198963057, "grad_norm": 1.8951784372329712, "learning_rate": 4.953780103400012e-06, "loss": 0.1676, "step": 1657 }, { "epoch": 0.26863253402462733, "grad_norm": 1.2328137159347534, "learning_rate": 4.953696365681452e-06, "loss": 0.1778, "step": 1658 }, { "epoch": 0.2687945560596241, "grad_norm": 1.0323786735534668, "learning_rate": 4.953612552885721e-06, "loss": 0.1374, "step": 1659 }, { "epoch": 0.26895657809462087, "grad_norm": 1.2623248100280762, "learning_rate": 4.953528665015383e-06, "loss": 0.1787, "step": 1660 }, { "epoch": 0.26911860012961764, "grad_norm": 1.1066336631774902, "learning_rate": 4.953444702073006e-06, "loss": 0.1546, "step": 1661 }, { "epoch": 0.2692806221646144, "grad_norm": 1.1255744695663452, "learning_rate": 4.953360664061159e-06, "loss": 0.1574, "step": 1662 }, { "epoch": 0.26944264419961117, "grad_norm": 1.143039584159851, "learning_rate": 4.953276550982411e-06, "loss": 0.1585, "step": 1663 }, { "epoch": 0.2696046662346079, "grad_norm": 1.1908190250396729, "learning_rate": 4.953192362839338e-06, "loss": 0.1744, "step": 1664 }, { "epoch": 0.26976668826960465, "grad_norm": 1.1476013660430908, "learning_rate": 4.953108099634516e-06, "loss": 0.1698, "step": 1665 }, { "epoch": 0.2699287103046014, "grad_norm": 1.0105767250061035, "learning_rate": 4.953023761370521e-06, "loss": 0.1454, "step": 1666 }, { "epoch": 0.2700907323395982, "grad_norm": 0.9987475872039795, "learning_rate": 4.9529393480499365e-06, "loss": 0.1345, "step": 1667 }, { "epoch": 0.27025275437459495, "grad_norm": 1.234141230583191, "learning_rate": 4.952854859675343e-06, "loss": 0.1642, "step": 1668 }, { "epoch": 0.2704147764095917, "grad_norm": 1.2194494009017944, "learning_rate": 4.952770296249328e-06, "loss": 0.1766, "step": 1669 }, { "epoch": 0.2705767984445885, "grad_norm": 1.22120201587677, "learning_rate": 4.952685657774476e-06, "loss": 0.1875, "step": 1670 }, { "epoch": 0.2707388204795852, "grad_norm": 1.0285940170288086, "learning_rate": 4.952600944253379e-06, "loss": 0.1544, "step": 1671 }, { "epoch": 0.27090084251458196, "grad_norm": 1.1357102394104004, "learning_rate": 4.952516155688628e-06, "loss": 0.1668, "step": 1672 }, { "epoch": 0.27106286454957873, "grad_norm": 1.0606294870376587, "learning_rate": 4.952431292082818e-06, "loss": 0.1639, "step": 1673 }, { "epoch": 0.2712248865845755, "grad_norm": 1.2481400966644287, "learning_rate": 4.9523463534385444e-06, "loss": 0.1709, "step": 1674 }, { "epoch": 0.27138690861957226, "grad_norm": 1.1794558763504028, "learning_rate": 4.9522613397584075e-06, "loss": 0.1749, "step": 1675 }, { "epoch": 0.27154893065456903, "grad_norm": 1.0960800647735596, "learning_rate": 4.952176251045008e-06, "loss": 0.1427, "step": 1676 }, { "epoch": 0.2717109526895658, "grad_norm": 1.2068989276885986, "learning_rate": 4.95209108730095e-06, "loss": 0.1591, "step": 1677 }, { "epoch": 0.27187297472456257, "grad_norm": 1.1335726976394653, "learning_rate": 4.952005848528838e-06, "loss": 0.1549, "step": 1678 }, { "epoch": 0.2720349967595593, "grad_norm": 1.113123893737793, "learning_rate": 4.95192053473128e-06, "loss": 0.1502, "step": 1679 }, { "epoch": 0.27219701879455604, "grad_norm": 1.2119156122207642, "learning_rate": 4.951835145910888e-06, "loss": 0.1636, "step": 1680 }, { "epoch": 0.2723590408295528, "grad_norm": 1.0948169231414795, "learning_rate": 4.951749682070274e-06, "loss": 0.1543, "step": 1681 }, { "epoch": 0.2725210628645496, "grad_norm": 1.4161688089370728, "learning_rate": 4.951664143212053e-06, "loss": 0.1859, "step": 1682 }, { "epoch": 0.27268308489954635, "grad_norm": 1.244466781616211, "learning_rate": 4.951578529338842e-06, "loss": 0.1875, "step": 1683 }, { "epoch": 0.2728451069345431, "grad_norm": 2.2999372482299805, "learning_rate": 4.95149284045326e-06, "loss": 0.1494, "step": 1684 }, { "epoch": 0.2730071289695399, "grad_norm": 1.1153371334075928, "learning_rate": 4.95140707655793e-06, "loss": 0.1467, "step": 1685 }, { "epoch": 0.2731691510045366, "grad_norm": 1.0886139869689941, "learning_rate": 4.951321237655477e-06, "loss": 0.148, "step": 1686 }, { "epoch": 0.27333117303953336, "grad_norm": 1.16849684715271, "learning_rate": 4.951235323748524e-06, "loss": 0.1876, "step": 1687 }, { "epoch": 0.2734931950745301, "grad_norm": 1.0493582487106323, "learning_rate": 4.951149334839703e-06, "loss": 0.1584, "step": 1688 }, { "epoch": 0.2736552171095269, "grad_norm": 1.146984577178955, "learning_rate": 4.951063270931644e-06, "loss": 0.1778, "step": 1689 }, { "epoch": 0.27381723914452366, "grad_norm": 1.1503607034683228, "learning_rate": 4.950977132026981e-06, "loss": 0.1555, "step": 1690 }, { "epoch": 0.27397926117952043, "grad_norm": 1.2447178363800049, "learning_rate": 4.950890918128348e-06, "loss": 0.1876, "step": 1691 }, { "epoch": 0.2741412832145172, "grad_norm": 1.197851300239563, "learning_rate": 4.9508046292383846e-06, "loss": 0.1763, "step": 1692 }, { "epoch": 0.2743033052495139, "grad_norm": 1.1843172311782837, "learning_rate": 4.950718265359729e-06, "loss": 0.166, "step": 1693 }, { "epoch": 0.2744653272845107, "grad_norm": 1.0466846227645874, "learning_rate": 4.950631826495027e-06, "loss": 0.1472, "step": 1694 }, { "epoch": 0.27462734931950744, "grad_norm": 1.1562830209732056, "learning_rate": 4.950545312646921e-06, "loss": 0.1596, "step": 1695 }, { "epoch": 0.2747893713545042, "grad_norm": 1.1788415908813477, "learning_rate": 4.950458723818058e-06, "loss": 0.1586, "step": 1696 }, { "epoch": 0.274951393389501, "grad_norm": 1.238731861114502, "learning_rate": 4.9503720600110884e-06, "loss": 0.1626, "step": 1697 }, { "epoch": 0.27511341542449774, "grad_norm": 1.1631656885147095, "learning_rate": 4.950285321228664e-06, "loss": 0.1508, "step": 1698 }, { "epoch": 0.2752754374594945, "grad_norm": 1.2769675254821777, "learning_rate": 4.950198507473438e-06, "loss": 0.1555, "step": 1699 }, { "epoch": 0.2754374594944913, "grad_norm": 1.2280595302581787, "learning_rate": 4.950111618748067e-06, "loss": 0.1416, "step": 1700 }, { "epoch": 0.275599481529488, "grad_norm": 1.3224934339523315, "learning_rate": 4.95002465505521e-06, "loss": 0.1673, "step": 1701 }, { "epoch": 0.27576150356448476, "grad_norm": 1.2435288429260254, "learning_rate": 4.949937616397527e-06, "loss": 0.1776, "step": 1702 }, { "epoch": 0.2759235255994815, "grad_norm": 1.2028788328170776, "learning_rate": 4.949850502777681e-06, "loss": 0.1704, "step": 1703 }, { "epoch": 0.2760855476344783, "grad_norm": 1.0093039274215698, "learning_rate": 4.949763314198339e-06, "loss": 0.1424, "step": 1704 }, { "epoch": 0.27624756966947506, "grad_norm": 1.1535528898239136, "learning_rate": 4.949676050662169e-06, "loss": 0.1564, "step": 1705 }, { "epoch": 0.2764095917044718, "grad_norm": 1.1377063989639282, "learning_rate": 4.949588712171838e-06, "loss": 0.1687, "step": 1706 }, { "epoch": 0.2765716137394686, "grad_norm": 1.1593130826950073, "learning_rate": 4.949501298730021e-06, "loss": 0.1611, "step": 1707 }, { "epoch": 0.2767336357744653, "grad_norm": 1.2367503643035889, "learning_rate": 4.949413810339392e-06, "loss": 0.1803, "step": 1708 }, { "epoch": 0.27689565780946207, "grad_norm": 1.1531428098678589, "learning_rate": 4.9493262470026286e-06, "loss": 0.183, "step": 1709 }, { "epoch": 0.27705767984445884, "grad_norm": 1.1906907558441162, "learning_rate": 4.949238608722408e-06, "loss": 0.1486, "step": 1710 }, { "epoch": 0.2772197018794556, "grad_norm": 1.1683154106140137, "learning_rate": 4.949150895501414e-06, "loss": 0.1657, "step": 1711 }, { "epoch": 0.27738172391445237, "grad_norm": 1.2222694158554077, "learning_rate": 4.949063107342329e-06, "loss": 0.1983, "step": 1712 }, { "epoch": 0.27754374594944914, "grad_norm": 1.0516256093978882, "learning_rate": 4.948975244247839e-06, "loss": 0.1497, "step": 1713 }, { "epoch": 0.2777057679844459, "grad_norm": 1.630317211151123, "learning_rate": 4.948887306220634e-06, "loss": 0.2039, "step": 1714 }, { "epoch": 0.2778677900194426, "grad_norm": 1.3137788772583008, "learning_rate": 4.948799293263403e-06, "loss": 0.1633, "step": 1715 }, { "epoch": 0.2780298120544394, "grad_norm": 1.0964162349700928, "learning_rate": 4.94871120537884e-06, "loss": 0.1566, "step": 1716 }, { "epoch": 0.27819183408943615, "grad_norm": 1.437134027481079, "learning_rate": 4.948623042569639e-06, "loss": 0.1775, "step": 1717 }, { "epoch": 0.2783538561244329, "grad_norm": 1.0482769012451172, "learning_rate": 4.9485348048385e-06, "loss": 0.1494, "step": 1718 }, { "epoch": 0.2785158781594297, "grad_norm": 1.0254579782485962, "learning_rate": 4.94844649218812e-06, "loss": 0.1427, "step": 1719 }, { "epoch": 0.27867790019442645, "grad_norm": 1.1733806133270264, "learning_rate": 4.9483581046212025e-06, "loss": 0.1733, "step": 1720 }, { "epoch": 0.2788399222294232, "grad_norm": 1.1621792316436768, "learning_rate": 4.948269642140453e-06, "loss": 0.1532, "step": 1721 }, { "epoch": 0.27900194426442, "grad_norm": 1.2027387619018555, "learning_rate": 4.948181104748576e-06, "loss": 0.1711, "step": 1722 }, { "epoch": 0.2791639662994167, "grad_norm": 1.3323115110397339, "learning_rate": 4.9480924924482824e-06, "loss": 0.185, "step": 1723 }, { "epoch": 0.27932598833441347, "grad_norm": 1.1466425657272339, "learning_rate": 4.948003805242282e-06, "loss": 0.1606, "step": 1724 }, { "epoch": 0.27948801036941023, "grad_norm": 1.1077498197555542, "learning_rate": 4.94791504313329e-06, "loss": 0.1582, "step": 1725 }, { "epoch": 0.279650032404407, "grad_norm": 1.1000559329986572, "learning_rate": 4.9478262061240216e-06, "loss": 0.1451, "step": 1726 }, { "epoch": 0.27981205443940377, "grad_norm": 1.2915170192718506, "learning_rate": 4.9477372942171945e-06, "loss": 0.1759, "step": 1727 }, { "epoch": 0.27997407647440054, "grad_norm": 1.1661245822906494, "learning_rate": 4.947648307415529e-06, "loss": 0.1615, "step": 1728 }, { "epoch": 0.2801360985093973, "grad_norm": 1.1373041868209839, "learning_rate": 4.947559245721749e-06, "loss": 0.1458, "step": 1729 }, { "epoch": 0.280298120544394, "grad_norm": 1.0064786672592163, "learning_rate": 4.947470109138579e-06, "loss": 0.1409, "step": 1730 }, { "epoch": 0.2804601425793908, "grad_norm": 0.9815242290496826, "learning_rate": 4.947380897668747e-06, "loss": 0.1361, "step": 1731 }, { "epoch": 0.28062216461438755, "grad_norm": 0.9623909592628479, "learning_rate": 4.947291611314981e-06, "loss": 0.1412, "step": 1732 }, { "epoch": 0.2807841866493843, "grad_norm": 1.241988182067871, "learning_rate": 4.947202250080015e-06, "loss": 0.1754, "step": 1733 }, { "epoch": 0.2809462086843811, "grad_norm": 1.213842749595642, "learning_rate": 4.9471128139665826e-06, "loss": 0.1757, "step": 1734 }, { "epoch": 0.28110823071937785, "grad_norm": 1.181797742843628, "learning_rate": 4.9470233029774195e-06, "loss": 0.1591, "step": 1735 }, { "epoch": 0.2812702527543746, "grad_norm": 1.0789086818695068, "learning_rate": 4.9469337171152645e-06, "loss": 0.1524, "step": 1736 }, { "epoch": 0.28143227478937133, "grad_norm": 1.2921925783157349, "learning_rate": 4.94684405638286e-06, "loss": 0.1836, "step": 1737 }, { "epoch": 0.2815942968243681, "grad_norm": 1.1767724752426147, "learning_rate": 4.946754320782948e-06, "loss": 0.1598, "step": 1738 }, { "epoch": 0.28175631885936486, "grad_norm": 1.1282929182052612, "learning_rate": 4.946664510318275e-06, "loss": 0.1573, "step": 1739 }, { "epoch": 0.28191834089436163, "grad_norm": 1.316778302192688, "learning_rate": 4.946574624991589e-06, "loss": 0.1816, "step": 1740 }, { "epoch": 0.2820803629293584, "grad_norm": 1.234459638595581, "learning_rate": 4.9464846648056396e-06, "loss": 0.1854, "step": 1741 }, { "epoch": 0.28224238496435516, "grad_norm": 1.1687872409820557, "learning_rate": 4.946394629763181e-06, "loss": 0.1638, "step": 1742 }, { "epoch": 0.28240440699935193, "grad_norm": 1.0062087774276733, "learning_rate": 4.946304519866966e-06, "loss": 0.1454, "step": 1743 }, { "epoch": 0.2825664290343487, "grad_norm": 1.048824429512024, "learning_rate": 4.946214335119752e-06, "loss": 0.1415, "step": 1744 }, { "epoch": 0.2827284510693454, "grad_norm": 1.0653513669967651, "learning_rate": 4.9461240755243e-06, "loss": 0.1454, "step": 1745 }, { "epoch": 0.2828904731043422, "grad_norm": 1.1784061193466187, "learning_rate": 4.94603374108337e-06, "loss": 0.1578, "step": 1746 }, { "epoch": 0.28305249513933894, "grad_norm": 1.1248995065689087, "learning_rate": 4.945943331799728e-06, "loss": 0.1534, "step": 1747 }, { "epoch": 0.2832145171743357, "grad_norm": 1.266843557357788, "learning_rate": 4.945852847676138e-06, "loss": 0.1917, "step": 1748 }, { "epoch": 0.2833765392093325, "grad_norm": 1.1719672679901123, "learning_rate": 4.945762288715371e-06, "loss": 0.1702, "step": 1749 }, { "epoch": 0.28353856124432925, "grad_norm": 1.1670610904693604, "learning_rate": 4.945671654920195e-06, "loss": 0.1615, "step": 1750 }, { "epoch": 0.283700583279326, "grad_norm": 1.1604700088500977, "learning_rate": 4.945580946293386e-06, "loss": 0.1599, "step": 1751 }, { "epoch": 0.2838626053143227, "grad_norm": 1.085565447807312, "learning_rate": 4.945490162837718e-06, "loss": 0.1558, "step": 1752 }, { "epoch": 0.2840246273493195, "grad_norm": 1.1299009323120117, "learning_rate": 4.945399304555968e-06, "loss": 0.1584, "step": 1753 }, { "epoch": 0.28418664938431626, "grad_norm": 1.1173944473266602, "learning_rate": 4.945308371450919e-06, "loss": 0.1524, "step": 1754 }, { "epoch": 0.284348671419313, "grad_norm": 0.8910510540008545, "learning_rate": 4.945217363525349e-06, "loss": 0.1184, "step": 1755 }, { "epoch": 0.2845106934543098, "grad_norm": 1.2654694318771362, "learning_rate": 4.945126280782047e-06, "loss": 0.2026, "step": 1756 }, { "epoch": 0.28467271548930656, "grad_norm": 1.1252206563949585, "learning_rate": 4.945035123223797e-06, "loss": 0.1871, "step": 1757 }, { "epoch": 0.2848347375243033, "grad_norm": 1.245661973953247, "learning_rate": 4.944943890853389e-06, "loss": 0.1596, "step": 1758 }, { "epoch": 0.28499675955930004, "grad_norm": 1.2262191772460938, "learning_rate": 4.944852583673615e-06, "loss": 0.1797, "step": 1759 }, { "epoch": 0.2851587815942968, "grad_norm": 1.0503751039505005, "learning_rate": 4.944761201687268e-06, "loss": 0.1437, "step": 1760 }, { "epoch": 0.2853208036292936, "grad_norm": 1.460869550704956, "learning_rate": 4.944669744897144e-06, "loss": 0.1576, "step": 1761 }, { "epoch": 0.28548282566429034, "grad_norm": 1.1839721202850342, "learning_rate": 4.944578213306043e-06, "loss": 0.1463, "step": 1762 }, { "epoch": 0.2856448476992871, "grad_norm": 1.1517478227615356, "learning_rate": 4.944486606916764e-06, "loss": 0.1873, "step": 1763 }, { "epoch": 0.2858068697342839, "grad_norm": 1.1901956796646118, "learning_rate": 4.94439492573211e-06, "loss": 0.159, "step": 1764 }, { "epoch": 0.28596889176928064, "grad_norm": 1.0697051286697388, "learning_rate": 4.944303169754887e-06, "loss": 0.1668, "step": 1765 }, { "epoch": 0.2861309138042774, "grad_norm": 1.2297983169555664, "learning_rate": 4.944211338987901e-06, "loss": 0.1695, "step": 1766 }, { "epoch": 0.2862929358392741, "grad_norm": 1.160058617591858, "learning_rate": 4.944119433433964e-06, "loss": 0.1674, "step": 1767 }, { "epoch": 0.2864549578742709, "grad_norm": 1.1287837028503418, "learning_rate": 4.944027453095887e-06, "loss": 0.1578, "step": 1768 }, { "epoch": 0.28661697990926766, "grad_norm": 1.0510386228561401, "learning_rate": 4.943935397976484e-06, "loss": 0.154, "step": 1769 }, { "epoch": 0.2867790019442644, "grad_norm": 1.118772268295288, "learning_rate": 4.943843268078572e-06, "loss": 0.151, "step": 1770 }, { "epoch": 0.2869410239792612, "grad_norm": 1.0504902601242065, "learning_rate": 4.94375106340497e-06, "loss": 0.1569, "step": 1771 }, { "epoch": 0.28710304601425796, "grad_norm": 1.1878551244735718, "learning_rate": 4.9436587839585e-06, "loss": 0.1696, "step": 1772 }, { "epoch": 0.2872650680492547, "grad_norm": 1.1703705787658691, "learning_rate": 4.9435664297419836e-06, "loss": 0.1505, "step": 1773 }, { "epoch": 0.28742709008425144, "grad_norm": 1.1289130449295044, "learning_rate": 4.9434740007582485e-06, "loss": 0.1568, "step": 1774 }, { "epoch": 0.2875891121192482, "grad_norm": 1.1618683338165283, "learning_rate": 4.943381497010122e-06, "loss": 0.1708, "step": 1775 }, { "epoch": 0.28775113415424497, "grad_norm": 1.0598828792572021, "learning_rate": 4.943288918500434e-06, "loss": 0.1608, "step": 1776 }, { "epoch": 0.28791315618924174, "grad_norm": 1.1021727323532104, "learning_rate": 4.943196265232018e-06, "loss": 0.1598, "step": 1777 }, { "epoch": 0.2880751782242385, "grad_norm": 1.0794800519943237, "learning_rate": 4.94310353720771e-06, "loss": 0.1445, "step": 1778 }, { "epoch": 0.28823720025923527, "grad_norm": 1.0673820972442627, "learning_rate": 4.9430107344303445e-06, "loss": 0.1494, "step": 1779 }, { "epoch": 0.28839922229423204, "grad_norm": 1.0401991605758667, "learning_rate": 4.942917856902763e-06, "loss": 0.1386, "step": 1780 }, { "epoch": 0.28856124432922875, "grad_norm": 1.2189371585845947, "learning_rate": 4.9428249046278065e-06, "loss": 0.1655, "step": 1781 }, { "epoch": 0.2887232663642255, "grad_norm": 1.2600358724594116, "learning_rate": 4.942731877608319e-06, "loss": 0.1771, "step": 1782 }, { "epoch": 0.2888852883992223, "grad_norm": 1.2140158414840698, "learning_rate": 4.942638775847149e-06, "loss": 0.1741, "step": 1783 }, { "epoch": 0.28904731043421905, "grad_norm": 1.0172653198242188, "learning_rate": 4.942545599347142e-06, "loss": 0.1417, "step": 1784 }, { "epoch": 0.2892093324692158, "grad_norm": 1.2738704681396484, "learning_rate": 4.942452348111151e-06, "loss": 0.1731, "step": 1785 }, { "epoch": 0.2893713545042126, "grad_norm": 1.1915194988250732, "learning_rate": 4.942359022142028e-06, "loss": 0.1663, "step": 1786 }, { "epoch": 0.28953337653920935, "grad_norm": 1.04640531539917, "learning_rate": 4.94226562144263e-06, "loss": 0.1371, "step": 1787 }, { "epoch": 0.28969539857420606, "grad_norm": 1.054979681968689, "learning_rate": 4.942172146015814e-06, "loss": 0.1486, "step": 1788 }, { "epoch": 0.28985742060920283, "grad_norm": 1.1150860786437988, "learning_rate": 4.942078595864441e-06, "loss": 0.1521, "step": 1789 }, { "epoch": 0.2900194426441996, "grad_norm": 1.2064958810806274, "learning_rate": 4.941984970991372e-06, "loss": 0.1682, "step": 1790 }, { "epoch": 0.29018146467919637, "grad_norm": 1.0891704559326172, "learning_rate": 4.941891271399473e-06, "loss": 0.1581, "step": 1791 }, { "epoch": 0.29034348671419313, "grad_norm": 1.2679988145828247, "learning_rate": 4.9417974970916096e-06, "loss": 0.1671, "step": 1792 }, { "epoch": 0.2905055087491899, "grad_norm": 1.0891644954681396, "learning_rate": 4.941703648070653e-06, "loss": 0.1621, "step": 1793 }, { "epoch": 0.29066753078418667, "grad_norm": 0.9570609331130981, "learning_rate": 4.9416097243394725e-06, "loss": 0.1428, "step": 1794 }, { "epoch": 0.29082955281918343, "grad_norm": 1.0233310461044312, "learning_rate": 4.941515725900943e-06, "loss": 0.158, "step": 1795 }, { "epoch": 0.29099157485418015, "grad_norm": 1.1454052925109863, "learning_rate": 4.94142165275794e-06, "loss": 0.1722, "step": 1796 }, { "epoch": 0.2911535968891769, "grad_norm": 1.0904475450515747, "learning_rate": 4.941327504913344e-06, "loss": 0.164, "step": 1797 }, { "epoch": 0.2913156189241737, "grad_norm": 1.0754122734069824, "learning_rate": 4.941233282370034e-06, "loss": 0.1507, "step": 1798 }, { "epoch": 0.29147764095917045, "grad_norm": 1.1219291687011719, "learning_rate": 4.941138985130893e-06, "loss": 0.1742, "step": 1799 }, { "epoch": 0.2916396629941672, "grad_norm": 1.2050334215164185, "learning_rate": 4.941044613198807e-06, "loss": 0.1694, "step": 1800 }, { "epoch": 0.291801685029164, "grad_norm": 1.0843576192855835, "learning_rate": 4.940950166576661e-06, "loss": 0.1558, "step": 1801 }, { "epoch": 0.29196370706416075, "grad_norm": 1.1150481700897217, "learning_rate": 4.940855645267349e-06, "loss": 0.1559, "step": 1802 }, { "epoch": 0.29212572909915746, "grad_norm": 1.0459260940551758, "learning_rate": 4.94076104927376e-06, "loss": 0.1608, "step": 1803 }, { "epoch": 0.29228775113415423, "grad_norm": 1.014258623123169, "learning_rate": 4.94066637859879e-06, "loss": 0.133, "step": 1804 }, { "epoch": 0.292449773169151, "grad_norm": 1.1400861740112305, "learning_rate": 4.940571633245335e-06, "loss": 0.1778, "step": 1805 }, { "epoch": 0.29261179520414776, "grad_norm": 1.1275112628936768, "learning_rate": 4.940476813216294e-06, "loss": 0.1555, "step": 1806 }, { "epoch": 0.29277381723914453, "grad_norm": 1.0518170595169067, "learning_rate": 4.940381918514568e-06, "loss": 0.159, "step": 1807 }, { "epoch": 0.2929358392741413, "grad_norm": 1.0463672876358032, "learning_rate": 4.940286949143061e-06, "loss": 0.1614, "step": 1808 }, { "epoch": 0.29309786130913806, "grad_norm": 1.200708270072937, "learning_rate": 4.94019190510468e-06, "loss": 0.1809, "step": 1809 }, { "epoch": 0.2932598833441348, "grad_norm": 1.2362070083618164, "learning_rate": 4.940096786402331e-06, "loss": 0.1431, "step": 1810 }, { "epoch": 0.29342190537913154, "grad_norm": 1.1330715417861938, "learning_rate": 4.940001593038925e-06, "loss": 0.1431, "step": 1811 }, { "epoch": 0.2935839274141283, "grad_norm": 1.2720601558685303, "learning_rate": 4.939906325017374e-06, "loss": 0.1729, "step": 1812 }, { "epoch": 0.2937459494491251, "grad_norm": 1.2702455520629883, "learning_rate": 4.939810982340595e-06, "loss": 0.1722, "step": 1813 }, { "epoch": 0.29390797148412184, "grad_norm": 1.1870192289352417, "learning_rate": 4.939715565011504e-06, "loss": 0.1647, "step": 1814 }, { "epoch": 0.2940699935191186, "grad_norm": 1.2095550298690796, "learning_rate": 4.939620073033021e-06, "loss": 0.1647, "step": 1815 }, { "epoch": 0.2942320155541154, "grad_norm": 1.1870856285095215, "learning_rate": 4.939524506408068e-06, "loss": 0.1554, "step": 1816 }, { "epoch": 0.29439403758911215, "grad_norm": 1.080191969871521, "learning_rate": 4.939428865139568e-06, "loss": 0.1572, "step": 1817 }, { "epoch": 0.29455605962410886, "grad_norm": 1.1094964742660522, "learning_rate": 4.939333149230447e-06, "loss": 0.1623, "step": 1818 }, { "epoch": 0.2947180816591056, "grad_norm": 1.0105398893356323, "learning_rate": 4.939237358683636e-06, "loss": 0.1591, "step": 1819 }, { "epoch": 0.2948801036941024, "grad_norm": 1.1917641162872314, "learning_rate": 4.9391414935020656e-06, "loss": 0.1728, "step": 1820 }, { "epoch": 0.29504212572909916, "grad_norm": 1.0402076244354248, "learning_rate": 4.939045553688666e-06, "loss": 0.1525, "step": 1821 }, { "epoch": 0.2952041477640959, "grad_norm": 1.235533356666565, "learning_rate": 4.938949539246376e-06, "loss": 0.1522, "step": 1822 }, { "epoch": 0.2953661697990927, "grad_norm": 0.9965629577636719, "learning_rate": 4.9388534501781325e-06, "loss": 0.1515, "step": 1823 }, { "epoch": 0.29552819183408946, "grad_norm": 1.0826505422592163, "learning_rate": 4.938757286486874e-06, "loss": 0.14, "step": 1824 }, { "epoch": 0.29569021386908617, "grad_norm": 1.1033453941345215, "learning_rate": 4.938661048175545e-06, "loss": 0.1591, "step": 1825 }, { "epoch": 0.29585223590408294, "grad_norm": 1.169182538986206, "learning_rate": 4.93856473524709e-06, "loss": 0.154, "step": 1826 }, { "epoch": 0.2960142579390797, "grad_norm": 1.1595622301101685, "learning_rate": 4.938468347704455e-06, "loss": 0.1506, "step": 1827 }, { "epoch": 0.2961762799740765, "grad_norm": 1.1768068075180054, "learning_rate": 4.938371885550589e-06, "loss": 0.1738, "step": 1828 }, { "epoch": 0.29633830200907324, "grad_norm": 1.0577977895736694, "learning_rate": 4.938275348788443e-06, "loss": 0.1349, "step": 1829 }, { "epoch": 0.29650032404407, "grad_norm": 1.1384469270706177, "learning_rate": 4.938178737420974e-06, "loss": 0.1506, "step": 1830 }, { "epoch": 0.2966623460790668, "grad_norm": 1.1064151525497437, "learning_rate": 4.938082051451135e-06, "loss": 0.1583, "step": 1831 }, { "epoch": 0.2968243681140635, "grad_norm": 1.0915504693984985, "learning_rate": 4.937985290881886e-06, "loss": 0.1642, "step": 1832 }, { "epoch": 0.29698639014906025, "grad_norm": 0.9604697823524475, "learning_rate": 4.937888455716186e-06, "loss": 0.1342, "step": 1833 }, { "epoch": 0.297148412184057, "grad_norm": 1.1359755992889404, "learning_rate": 4.9377915459569995e-06, "loss": 0.161, "step": 1834 }, { "epoch": 0.2973104342190538, "grad_norm": 1.0202888250350952, "learning_rate": 4.93769456160729e-06, "loss": 0.144, "step": 1835 }, { "epoch": 0.29747245625405055, "grad_norm": 1.0656861066818237, "learning_rate": 4.937597502670027e-06, "loss": 0.1468, "step": 1836 }, { "epoch": 0.2976344782890473, "grad_norm": 1.2268567085266113, "learning_rate": 4.937500369148179e-06, "loss": 0.1796, "step": 1837 }, { "epoch": 0.2977965003240441, "grad_norm": 1.1543471813201904, "learning_rate": 4.9374031610447185e-06, "loss": 0.1666, "step": 1838 }, { "epoch": 0.29795852235904086, "grad_norm": 1.1135607957839966, "learning_rate": 4.9373058783626195e-06, "loss": 0.1525, "step": 1839 }, { "epoch": 0.29812054439403757, "grad_norm": 0.995367169380188, "learning_rate": 4.937208521104858e-06, "loss": 0.1383, "step": 1840 }, { "epoch": 0.29828256642903433, "grad_norm": 1.0049978494644165, "learning_rate": 4.9371110892744146e-06, "loss": 0.1365, "step": 1841 }, { "epoch": 0.2984445884640311, "grad_norm": 1.1172624826431274, "learning_rate": 4.937013582874269e-06, "loss": 0.1624, "step": 1842 }, { "epoch": 0.29860661049902787, "grad_norm": 1.1392813920974731, "learning_rate": 4.936916001907406e-06, "loss": 0.1725, "step": 1843 }, { "epoch": 0.29876863253402464, "grad_norm": 1.1414449214935303, "learning_rate": 4.93681834637681e-06, "loss": 0.1639, "step": 1844 }, { "epoch": 0.2989306545690214, "grad_norm": 1.2181705236434937, "learning_rate": 4.9367206162854695e-06, "loss": 0.1662, "step": 1845 }, { "epoch": 0.29909267660401817, "grad_norm": 1.1003068685531616, "learning_rate": 4.936622811636376e-06, "loss": 0.1654, "step": 1846 }, { "epoch": 0.2992546986390149, "grad_norm": 1.1408618688583374, "learning_rate": 4.93652493243252e-06, "loss": 0.1622, "step": 1847 }, { "epoch": 0.29941672067401165, "grad_norm": 1.5164649486541748, "learning_rate": 4.936426978676897e-06, "loss": 0.1466, "step": 1848 }, { "epoch": 0.2995787427090084, "grad_norm": 1.0539342164993286, "learning_rate": 4.9363289503725055e-06, "loss": 0.1605, "step": 1849 }, { "epoch": 0.2997407647440052, "grad_norm": 1.0239052772521973, "learning_rate": 4.936230847522343e-06, "loss": 0.1398, "step": 1850 }, { "epoch": 0.29990278677900195, "grad_norm": 0.9674493670463562, "learning_rate": 4.9361326701294124e-06, "loss": 0.1389, "step": 1851 }, { "epoch": 0.3000648088139987, "grad_norm": 1.2970346212387085, "learning_rate": 4.936034418196718e-06, "loss": 0.1836, "step": 1852 }, { "epoch": 0.3002268308489955, "grad_norm": 1.1987262964248657, "learning_rate": 4.935936091727264e-06, "loss": 0.1852, "step": 1853 }, { "epoch": 0.3003888528839922, "grad_norm": 1.051525354385376, "learning_rate": 4.935837690724063e-06, "loss": 0.1552, "step": 1854 }, { "epoch": 0.30055087491898896, "grad_norm": 1.2423964738845825, "learning_rate": 4.9357392151901204e-06, "loss": 0.1768, "step": 1855 }, { "epoch": 0.30071289695398573, "grad_norm": 1.0378652811050415, "learning_rate": 4.935640665128454e-06, "loss": 0.1486, "step": 1856 }, { "epoch": 0.3008749189889825, "grad_norm": 0.9993836879730225, "learning_rate": 4.935542040542077e-06, "loss": 0.1506, "step": 1857 }, { "epoch": 0.30103694102397927, "grad_norm": 1.056410789489746, "learning_rate": 4.935443341434008e-06, "loss": 0.1442, "step": 1858 }, { "epoch": 0.30119896305897603, "grad_norm": 1.1144357919692993, "learning_rate": 4.935344567807265e-06, "loss": 0.1679, "step": 1859 }, { "epoch": 0.3013609850939728, "grad_norm": 0.9949638247489929, "learning_rate": 4.935245719664873e-06, "loss": 0.1405, "step": 1860 }, { "epoch": 0.30152300712896957, "grad_norm": 1.090957760810852, "learning_rate": 4.935146797009854e-06, "loss": 0.1663, "step": 1861 }, { "epoch": 0.3016850291639663, "grad_norm": 1.0422848463058472, "learning_rate": 4.935047799845238e-06, "loss": 0.1413, "step": 1862 }, { "epoch": 0.30184705119896305, "grad_norm": 1.0720633268356323, "learning_rate": 4.93494872817405e-06, "loss": 0.1583, "step": 1863 }, { "epoch": 0.3020090732339598, "grad_norm": 1.1506516933441162, "learning_rate": 4.9348495819993235e-06, "loss": 0.1554, "step": 1864 }, { "epoch": 0.3021710952689566, "grad_norm": 1.3008663654327393, "learning_rate": 4.934750361324092e-06, "loss": 0.1636, "step": 1865 }, { "epoch": 0.30233311730395335, "grad_norm": 1.1331379413604736, "learning_rate": 4.9346510661513924e-06, "loss": 0.149, "step": 1866 }, { "epoch": 0.3024951393389501, "grad_norm": 1.184205174446106, "learning_rate": 4.934551696484262e-06, "loss": 0.1718, "step": 1867 }, { "epoch": 0.3026571613739469, "grad_norm": 1.1299738883972168, "learning_rate": 4.93445225232574e-06, "loss": 0.1697, "step": 1868 }, { "epoch": 0.3028191834089436, "grad_norm": 1.1674683094024658, "learning_rate": 4.934352733678871e-06, "loss": 0.1672, "step": 1869 }, { "epoch": 0.30298120544394036, "grad_norm": 1.0782699584960938, "learning_rate": 4.9342531405467e-06, "loss": 0.165, "step": 1870 }, { "epoch": 0.3031432274789371, "grad_norm": 1.0888804197311401, "learning_rate": 4.934153472932272e-06, "loss": 0.1549, "step": 1871 }, { "epoch": 0.3033052495139339, "grad_norm": 1.1337841749191284, "learning_rate": 4.934053730838639e-06, "loss": 0.1743, "step": 1872 }, { "epoch": 0.30346727154893066, "grad_norm": 1.0406060218811035, "learning_rate": 4.933953914268853e-06, "loss": 0.1504, "step": 1873 }, { "epoch": 0.30362929358392743, "grad_norm": 1.1255491971969604, "learning_rate": 4.9338540232259664e-06, "loss": 0.1566, "step": 1874 }, { "epoch": 0.3037913156189242, "grad_norm": 1.169628381729126, "learning_rate": 4.933754057713037e-06, "loss": 0.1566, "step": 1875 }, { "epoch": 0.3039533376539209, "grad_norm": 1.4170974493026733, "learning_rate": 4.9336540177331225e-06, "loss": 0.1623, "step": 1876 }, { "epoch": 0.3041153596889177, "grad_norm": 1.1771212816238403, "learning_rate": 4.933553903289285e-06, "loss": 0.1438, "step": 1877 }, { "epoch": 0.30427738172391444, "grad_norm": 1.251401662826538, "learning_rate": 4.9334537143845876e-06, "loss": 0.1762, "step": 1878 }, { "epoch": 0.3044394037589112, "grad_norm": 1.1183122396469116, "learning_rate": 4.933353451022094e-06, "loss": 0.1617, "step": 1879 }, { "epoch": 0.304601425793908, "grad_norm": 1.187208652496338, "learning_rate": 4.933253113204874e-06, "loss": 0.1815, "step": 1880 }, { "epoch": 0.30476344782890474, "grad_norm": 1.1539027690887451, "learning_rate": 4.933152700935997e-06, "loss": 0.1718, "step": 1881 }, { "epoch": 0.3049254698639015, "grad_norm": 1.0791608095169067, "learning_rate": 4.933052214218535e-06, "loss": 0.1562, "step": 1882 }, { "epoch": 0.3050874918988983, "grad_norm": 1.1433926820755005, "learning_rate": 4.932951653055564e-06, "loss": 0.1717, "step": 1883 }, { "epoch": 0.305249513933895, "grad_norm": 1.2988801002502441, "learning_rate": 4.93285101745016e-06, "loss": 0.1815, "step": 1884 }, { "epoch": 0.30541153596889176, "grad_norm": 1.1645463705062866, "learning_rate": 4.932750307405402e-06, "loss": 0.1674, "step": 1885 }, { "epoch": 0.3055735580038885, "grad_norm": 1.1217708587646484, "learning_rate": 4.932649522924372e-06, "loss": 0.1636, "step": 1886 }, { "epoch": 0.3057355800388853, "grad_norm": 1.087047815322876, "learning_rate": 4.932548664010153e-06, "loss": 0.1462, "step": 1887 }, { "epoch": 0.30589760207388206, "grad_norm": 1.0934066772460938, "learning_rate": 4.932447730665832e-06, "loss": 0.176, "step": 1888 }, { "epoch": 0.3060596241088788, "grad_norm": 1.0176986455917358, "learning_rate": 4.9323467228944965e-06, "loss": 0.1458, "step": 1889 }, { "epoch": 0.3062216461438756, "grad_norm": 0.9754590392112732, "learning_rate": 4.932245640699238e-06, "loss": 0.1338, "step": 1890 }, { "epoch": 0.3063836681788723, "grad_norm": 1.1034765243530273, "learning_rate": 4.932144484083148e-06, "loss": 0.16, "step": 1891 }, { "epoch": 0.30654569021386907, "grad_norm": 1.0700440406799316, "learning_rate": 4.932043253049323e-06, "loss": 0.1531, "step": 1892 }, { "epoch": 0.30670771224886584, "grad_norm": 1.2314177751541138, "learning_rate": 4.93194194760086e-06, "loss": 0.1607, "step": 1893 }, { "epoch": 0.3068697342838626, "grad_norm": 1.1649889945983887, "learning_rate": 4.931840567740858e-06, "loss": 0.1783, "step": 1894 }, { "epoch": 0.3070317563188594, "grad_norm": 1.099457025527954, "learning_rate": 4.9317391134724195e-06, "loss": 0.1763, "step": 1895 }, { "epoch": 0.30719377835385614, "grad_norm": 1.0466156005859375, "learning_rate": 4.93163758479865e-06, "loss": 0.1406, "step": 1896 }, { "epoch": 0.3073558003888529, "grad_norm": 1.3971292972564697, "learning_rate": 4.931535981722654e-06, "loss": 0.1728, "step": 1897 }, { "epoch": 0.3075178224238496, "grad_norm": 1.2202054262161255, "learning_rate": 4.931434304247541e-06, "loss": 0.1857, "step": 1898 }, { "epoch": 0.3076798444588464, "grad_norm": 1.1348539590835571, "learning_rate": 4.931332552376422e-06, "loss": 0.1688, "step": 1899 }, { "epoch": 0.30784186649384315, "grad_norm": 1.024888038635254, "learning_rate": 4.931230726112412e-06, "loss": 0.1495, "step": 1900 }, { "epoch": 0.3080038885288399, "grad_norm": 1.1341696977615356, "learning_rate": 4.931128825458623e-06, "loss": 0.1586, "step": 1901 }, { "epoch": 0.3081659105638367, "grad_norm": 1.094712495803833, "learning_rate": 4.9310268504181764e-06, "loss": 0.1744, "step": 1902 }, { "epoch": 0.30832793259883345, "grad_norm": 1.1410554647445679, "learning_rate": 4.930924800994192e-06, "loss": 0.1684, "step": 1903 }, { "epoch": 0.3084899546338302, "grad_norm": 1.1105761528015137, "learning_rate": 4.930822677189791e-06, "loss": 0.1649, "step": 1904 }, { "epoch": 0.30865197666882693, "grad_norm": 0.9734887480735779, "learning_rate": 4.930720479008098e-06, "loss": 0.1453, "step": 1905 }, { "epoch": 0.3088139987038237, "grad_norm": 1.0480237007141113, "learning_rate": 4.93061820645224e-06, "loss": 0.1511, "step": 1906 }, { "epoch": 0.30897602073882047, "grad_norm": 1.168448567390442, "learning_rate": 4.930515859525348e-06, "loss": 0.151, "step": 1907 }, { "epoch": 0.30913804277381723, "grad_norm": 1.0188759565353394, "learning_rate": 4.930413438230552e-06, "loss": 0.1355, "step": 1908 }, { "epoch": 0.309300064808814, "grad_norm": 1.064790964126587, "learning_rate": 4.930310942570987e-06, "loss": 0.1482, "step": 1909 }, { "epoch": 0.30946208684381077, "grad_norm": 1.1251689195632935, "learning_rate": 4.930208372549787e-06, "loss": 0.1616, "step": 1910 }, { "epoch": 0.30962410887880754, "grad_norm": 1.050378441810608, "learning_rate": 4.930105728170093e-06, "loss": 0.1405, "step": 1911 }, { "epoch": 0.3097861309138043, "grad_norm": 1.0885461568832397, "learning_rate": 4.930003009435043e-06, "loss": 0.1513, "step": 1912 }, { "epoch": 0.309948152948801, "grad_norm": 1.0849794149398804, "learning_rate": 4.929900216347783e-06, "loss": 0.1529, "step": 1913 }, { "epoch": 0.3101101749837978, "grad_norm": 1.1687252521514893, "learning_rate": 4.9297973489114565e-06, "loss": 0.1697, "step": 1914 }, { "epoch": 0.31027219701879455, "grad_norm": 1.0808390378952026, "learning_rate": 4.929694407129211e-06, "loss": 0.1474, "step": 1915 }, { "epoch": 0.3104342190537913, "grad_norm": 1.0042861700057983, "learning_rate": 4.929591391004196e-06, "loss": 0.1445, "step": 1916 }, { "epoch": 0.3105962410887881, "grad_norm": 1.0821775197982788, "learning_rate": 4.929488300539564e-06, "loss": 0.1594, "step": 1917 }, { "epoch": 0.31075826312378485, "grad_norm": 1.214570164680481, "learning_rate": 4.929385135738469e-06, "loss": 0.1792, "step": 1918 }, { "epoch": 0.3109202851587816, "grad_norm": 1.0570710897445679, "learning_rate": 4.929281896604068e-06, "loss": 0.1638, "step": 1919 }, { "epoch": 0.31108230719377833, "grad_norm": 0.8534372448921204, "learning_rate": 4.92917858313952e-06, "loss": 0.121, "step": 1920 }, { "epoch": 0.3112443292287751, "grad_norm": 1.1174410581588745, "learning_rate": 4.9290751953479856e-06, "loss": 0.1623, "step": 1921 }, { "epoch": 0.31140635126377186, "grad_norm": 1.2499920129776, "learning_rate": 4.928971733232628e-06, "loss": 0.1853, "step": 1922 }, { "epoch": 0.31156837329876863, "grad_norm": 0.961334764957428, "learning_rate": 4.928868196796615e-06, "loss": 0.1464, "step": 1923 }, { "epoch": 0.3117303953337654, "grad_norm": 1.0711408853530884, "learning_rate": 4.928764586043111e-06, "loss": 0.1597, "step": 1924 }, { "epoch": 0.31189241736876216, "grad_norm": 1.0463709831237793, "learning_rate": 4.928660900975289e-06, "loss": 0.1594, "step": 1925 }, { "epoch": 0.31205443940375893, "grad_norm": 1.1414794921875, "learning_rate": 4.9285571415963205e-06, "loss": 0.1703, "step": 1926 }, { "epoch": 0.31221646143875564, "grad_norm": 1.1475234031677246, "learning_rate": 4.928453307909381e-06, "loss": 0.177, "step": 1927 }, { "epoch": 0.3123784834737524, "grad_norm": 0.9466526508331299, "learning_rate": 4.928349399917646e-06, "loss": 0.1405, "step": 1928 }, { "epoch": 0.3125405055087492, "grad_norm": 1.0631221532821655, "learning_rate": 4.928245417624297e-06, "loss": 0.1643, "step": 1929 }, { "epoch": 0.31270252754374595, "grad_norm": 1.1224422454833984, "learning_rate": 4.928141361032513e-06, "loss": 0.1689, "step": 1930 }, { "epoch": 0.3128645495787427, "grad_norm": 1.0592671632766724, "learning_rate": 4.928037230145481e-06, "loss": 0.1495, "step": 1931 }, { "epoch": 0.3130265716137395, "grad_norm": 1.071808099746704, "learning_rate": 4.927933024966385e-06, "loss": 0.1601, "step": 1932 }, { "epoch": 0.31318859364873625, "grad_norm": 1.0835838317871094, "learning_rate": 4.927828745498414e-06, "loss": 0.1623, "step": 1933 }, { "epoch": 0.313350615683733, "grad_norm": 1.1354283094406128, "learning_rate": 4.927724391744758e-06, "loss": 0.145, "step": 1934 }, { "epoch": 0.3135126377187297, "grad_norm": 1.1181296110153198, "learning_rate": 4.9276199637086106e-06, "loss": 0.1633, "step": 1935 }, { "epoch": 0.3136746597537265, "grad_norm": 1.1335471868515015, "learning_rate": 4.927515461393167e-06, "loss": 0.1455, "step": 1936 }, { "epoch": 0.31383668178872326, "grad_norm": 1.091052770614624, "learning_rate": 4.927410884801626e-06, "loss": 0.1619, "step": 1937 }, { "epoch": 0.31399870382372, "grad_norm": 1.1227446794509888, "learning_rate": 4.927306233937185e-06, "loss": 0.1757, "step": 1938 }, { "epoch": 0.3141607258587168, "grad_norm": 0.9883224368095398, "learning_rate": 4.927201508803048e-06, "loss": 0.1359, "step": 1939 }, { "epoch": 0.31432274789371356, "grad_norm": 1.1337419748306274, "learning_rate": 4.927096709402417e-06, "loss": 0.1501, "step": 1940 }, { "epoch": 0.31448476992871033, "grad_norm": 1.0160683393478394, "learning_rate": 4.9269918357385015e-06, "loss": 0.143, "step": 1941 }, { "epoch": 0.31464679196370704, "grad_norm": 1.1797884702682495, "learning_rate": 4.926886887814509e-06, "loss": 0.1696, "step": 1942 }, { "epoch": 0.3148088139987038, "grad_norm": 1.0038928985595703, "learning_rate": 4.92678186563365e-06, "loss": 0.1523, "step": 1943 }, { "epoch": 0.3149708360337006, "grad_norm": 1.1223050355911255, "learning_rate": 4.926676769199139e-06, "loss": 0.1695, "step": 1944 }, { "epoch": 0.31513285806869734, "grad_norm": 1.1693401336669922, "learning_rate": 4.9265715985141914e-06, "loss": 0.1676, "step": 1945 }, { "epoch": 0.3152948801036941, "grad_norm": 1.0387099981307983, "learning_rate": 4.9264663535820256e-06, "loss": 0.1502, "step": 1946 }, { "epoch": 0.3154569021386909, "grad_norm": 1.1783015727996826, "learning_rate": 4.926361034405861e-06, "loss": 0.1697, "step": 1947 }, { "epoch": 0.31561892417368764, "grad_norm": 1.137247920036316, "learning_rate": 4.926255640988919e-06, "loss": 0.1691, "step": 1948 }, { "epoch": 0.31578094620868435, "grad_norm": 1.0531809329986572, "learning_rate": 4.926150173334427e-06, "loss": 0.1597, "step": 1949 }, { "epoch": 0.3159429682436811, "grad_norm": 1.0189039707183838, "learning_rate": 4.926044631445611e-06, "loss": 0.1597, "step": 1950 }, { "epoch": 0.3161049902786779, "grad_norm": 1.1217700242996216, "learning_rate": 4.9259390153257006e-06, "loss": 0.1609, "step": 1951 }, { "epoch": 0.31626701231367466, "grad_norm": 1.0535531044006348, "learning_rate": 4.925833324977926e-06, "loss": 0.1584, "step": 1952 }, { "epoch": 0.3164290343486714, "grad_norm": 1.066357970237732, "learning_rate": 4.925727560405522e-06, "loss": 0.1622, "step": 1953 }, { "epoch": 0.3165910563836682, "grad_norm": 1.065629005432129, "learning_rate": 4.925621721611726e-06, "loss": 0.1709, "step": 1954 }, { "epoch": 0.31675307841866496, "grad_norm": 1.0723075866699219, "learning_rate": 4.925515808599774e-06, "loss": 0.165, "step": 1955 }, { "epoch": 0.3169151004536617, "grad_norm": 1.1291669607162476, "learning_rate": 4.925409821372908e-06, "loss": 0.1531, "step": 1956 }, { "epoch": 0.31707712248865844, "grad_norm": 1.150839924812317, "learning_rate": 4.925303759934372e-06, "loss": 0.1676, "step": 1957 }, { "epoch": 0.3172391445236552, "grad_norm": 1.0867832899093628, "learning_rate": 4.925197624287409e-06, "loss": 0.1749, "step": 1958 }, { "epoch": 0.31740116655865197, "grad_norm": 1.1858301162719727, "learning_rate": 4.925091414435268e-06, "loss": 0.1802, "step": 1959 }, { "epoch": 0.31756318859364874, "grad_norm": 1.1005200147628784, "learning_rate": 4.924985130381198e-06, "loss": 0.1735, "step": 1960 }, { "epoch": 0.3177252106286455, "grad_norm": 1.015781044960022, "learning_rate": 4.924878772128452e-06, "loss": 0.1494, "step": 1961 }, { "epoch": 0.31788723266364227, "grad_norm": 1.1282938718795776, "learning_rate": 4.924772339680283e-06, "loss": 0.1528, "step": 1962 }, { "epoch": 0.31804925469863904, "grad_norm": 1.103499412536621, "learning_rate": 4.9246658330399474e-06, "loss": 0.1722, "step": 1963 }, { "epoch": 0.31821127673363575, "grad_norm": 1.0378453731536865, "learning_rate": 4.9245592522107065e-06, "loss": 0.1648, "step": 1964 }, { "epoch": 0.3183732987686325, "grad_norm": 1.1685363054275513, "learning_rate": 4.924452597195819e-06, "loss": 0.1796, "step": 1965 }, { "epoch": 0.3185353208036293, "grad_norm": 1.0074273347854614, "learning_rate": 4.92434586799855e-06, "loss": 0.1525, "step": 1966 }, { "epoch": 0.31869734283862605, "grad_norm": 0.9873900413513184, "learning_rate": 4.924239064622163e-06, "loss": 0.146, "step": 1967 }, { "epoch": 0.3188593648736228, "grad_norm": 1.1330924034118652, "learning_rate": 4.924132187069928e-06, "loss": 0.1609, "step": 1968 }, { "epoch": 0.3190213869086196, "grad_norm": 1.1219487190246582, "learning_rate": 4.924025235345114e-06, "loss": 0.1646, "step": 1969 }, { "epoch": 0.31918340894361635, "grad_norm": 1.068410873413086, "learning_rate": 4.923918209450994e-06, "loss": 0.154, "step": 1970 }, { "epoch": 0.31934543097861307, "grad_norm": 1.062517762184143, "learning_rate": 4.923811109390843e-06, "loss": 0.1454, "step": 1971 }, { "epoch": 0.31950745301360983, "grad_norm": 1.0570393800735474, "learning_rate": 4.9237039351679365e-06, "loss": 0.1551, "step": 1972 }, { "epoch": 0.3196694750486066, "grad_norm": 1.120102882385254, "learning_rate": 4.923596686785556e-06, "loss": 0.1628, "step": 1973 }, { "epoch": 0.31983149708360337, "grad_norm": 1.1168919801712036, "learning_rate": 4.923489364246981e-06, "loss": 0.1715, "step": 1974 }, { "epoch": 0.31999351911860013, "grad_norm": 1.2127281427383423, "learning_rate": 4.923381967555496e-06, "loss": 0.1598, "step": 1975 }, { "epoch": 0.3201555411535969, "grad_norm": 1.1189041137695312, "learning_rate": 4.923274496714387e-06, "loss": 0.1604, "step": 1976 }, { "epoch": 0.32031756318859367, "grad_norm": 1.0939730405807495, "learning_rate": 4.923166951726945e-06, "loss": 0.1557, "step": 1977 }, { "epoch": 0.32047958522359044, "grad_norm": 1.0992319583892822, "learning_rate": 4.923059332596456e-06, "loss": 0.1615, "step": 1978 }, { "epoch": 0.32064160725858715, "grad_norm": 1.0739459991455078, "learning_rate": 4.922951639326215e-06, "loss": 0.1537, "step": 1979 }, { "epoch": 0.3208036292935839, "grad_norm": 1.1589041948318481, "learning_rate": 4.922843871919518e-06, "loss": 0.1663, "step": 1980 }, { "epoch": 0.3209656513285807, "grad_norm": 1.0140794515609741, "learning_rate": 4.922736030379662e-06, "loss": 0.1435, "step": 1981 }, { "epoch": 0.32112767336357745, "grad_norm": 1.1577235460281372, "learning_rate": 4.922628114709945e-06, "loss": 0.1815, "step": 1982 }, { "epoch": 0.3212896953985742, "grad_norm": 0.9740235805511475, "learning_rate": 4.922520124913672e-06, "loss": 0.1442, "step": 1983 }, { "epoch": 0.321451717433571, "grad_norm": 1.3087810277938843, "learning_rate": 4.922412060994145e-06, "loss": 0.1556, "step": 1984 }, { "epoch": 0.32161373946856775, "grad_norm": 1.1714460849761963, "learning_rate": 4.922303922954671e-06, "loss": 0.1828, "step": 1985 }, { "epoch": 0.32177576150356446, "grad_norm": 0.9607410430908203, "learning_rate": 4.922195710798559e-06, "loss": 0.1292, "step": 1986 }, { "epoch": 0.32193778353856123, "grad_norm": 1.0208156108856201, "learning_rate": 4.9220874245291194e-06, "loss": 0.143, "step": 1987 }, { "epoch": 0.322099805573558, "grad_norm": 1.004679560661316, "learning_rate": 4.9219790641496656e-06, "loss": 0.1454, "step": 1988 }, { "epoch": 0.32226182760855476, "grad_norm": 1.2176995277404785, "learning_rate": 4.921870629663514e-06, "loss": 0.1631, "step": 1989 }, { "epoch": 0.32242384964355153, "grad_norm": 1.067450761795044, "learning_rate": 4.9217621210739826e-06, "loss": 0.1605, "step": 1990 }, { "epoch": 0.3225858716785483, "grad_norm": 1.1650382280349731, "learning_rate": 4.92165353838439e-06, "loss": 0.1758, "step": 1991 }, { "epoch": 0.32274789371354506, "grad_norm": 1.0696114301681519, "learning_rate": 4.921544881598059e-06, "loss": 0.1483, "step": 1992 }, { "epoch": 0.3229099157485418, "grad_norm": 1.1148940324783325, "learning_rate": 4.921436150718316e-06, "loss": 0.1585, "step": 1993 }, { "epoch": 0.32307193778353854, "grad_norm": 1.0661381483078003, "learning_rate": 4.921327345748486e-06, "loss": 0.1714, "step": 1994 }, { "epoch": 0.3232339598185353, "grad_norm": 1.0887075662612915, "learning_rate": 4.921218466691898e-06, "loss": 0.1494, "step": 1995 }, { "epoch": 0.3233959818535321, "grad_norm": 1.2316148281097412, "learning_rate": 4.921109513551885e-06, "loss": 0.1681, "step": 1996 }, { "epoch": 0.32355800388852884, "grad_norm": 0.982879102230072, "learning_rate": 4.92100048633178e-06, "loss": 0.1363, "step": 1997 }, { "epoch": 0.3237200259235256, "grad_norm": 1.0092542171478271, "learning_rate": 4.920891385034918e-06, "loss": 0.1371, "step": 1998 }, { "epoch": 0.3238820479585224, "grad_norm": 1.0851917266845703, "learning_rate": 4.9207822096646385e-06, "loss": 0.144, "step": 1999 }, { "epoch": 0.32404406999351915, "grad_norm": 0.9787352681159973, "learning_rate": 4.920672960224282e-06, "loss": 0.146, "step": 2000 }, { "epoch": 0.32420609202851586, "grad_norm": 1.1132274866104126, "learning_rate": 4.92056363671719e-06, "loss": 0.1693, "step": 2001 }, { "epoch": 0.3243681140635126, "grad_norm": 1.1033443212509155, "learning_rate": 4.920454239146709e-06, "loss": 0.1584, "step": 2002 }, { "epoch": 0.3245301360985094, "grad_norm": 1.1150498390197754, "learning_rate": 4.920344767516186e-06, "loss": 0.1472, "step": 2003 }, { "epoch": 0.32469215813350616, "grad_norm": 1.13274347782135, "learning_rate": 4.92023522182897e-06, "loss": 0.1729, "step": 2004 }, { "epoch": 0.3248541801685029, "grad_norm": 0.9556041359901428, "learning_rate": 4.920125602088412e-06, "loss": 0.1365, "step": 2005 }, { "epoch": 0.3250162022034997, "grad_norm": 0.979815661907196, "learning_rate": 4.9200159082978685e-06, "loss": 0.1376, "step": 2006 }, { "epoch": 0.32517822423849646, "grad_norm": 1.1807565689086914, "learning_rate": 4.919906140460693e-06, "loss": 0.1665, "step": 2007 }, { "epoch": 0.32534024627349317, "grad_norm": 1.0704436302185059, "learning_rate": 4.919796298580247e-06, "loss": 0.158, "step": 2008 }, { "epoch": 0.32550226830848994, "grad_norm": 1.0892672538757324, "learning_rate": 4.919686382659889e-06, "loss": 0.1461, "step": 2009 }, { "epoch": 0.3256642903434867, "grad_norm": 1.1033027172088623, "learning_rate": 4.919576392702984e-06, "loss": 0.1592, "step": 2010 }, { "epoch": 0.3258263123784835, "grad_norm": 1.2258738279342651, "learning_rate": 4.919466328712897e-06, "loss": 0.1911, "step": 2011 }, { "epoch": 0.32598833441348024, "grad_norm": 0.9703921675682068, "learning_rate": 4.9193561906929945e-06, "loss": 0.1397, "step": 2012 }, { "epoch": 0.326150356448477, "grad_norm": 1.1200995445251465, "learning_rate": 4.919245978646648e-06, "loss": 0.1705, "step": 2013 }, { "epoch": 0.3263123784834738, "grad_norm": 1.0445135831832886, "learning_rate": 4.919135692577229e-06, "loss": 0.1488, "step": 2014 }, { "epoch": 0.3264744005184705, "grad_norm": 0.9916481375694275, "learning_rate": 4.919025332488111e-06, "loss": 0.134, "step": 2015 }, { "epoch": 0.32663642255346725, "grad_norm": 1.1209403276443481, "learning_rate": 4.918914898382673e-06, "loss": 0.1423, "step": 2016 }, { "epoch": 0.326798444588464, "grad_norm": 0.954210638999939, "learning_rate": 4.918804390264292e-06, "loss": 0.1461, "step": 2017 }, { "epoch": 0.3269604666234608, "grad_norm": 1.1691701412200928, "learning_rate": 4.91869380813635e-06, "loss": 0.1693, "step": 2018 }, { "epoch": 0.32712248865845756, "grad_norm": 1.2081804275512695, "learning_rate": 4.918583152002231e-06, "loss": 0.1663, "step": 2019 }, { "epoch": 0.3272845106934543, "grad_norm": 1.0673002004623413, "learning_rate": 4.91847242186532e-06, "loss": 0.1625, "step": 2020 }, { "epoch": 0.3274465327284511, "grad_norm": 1.0329972505569458, "learning_rate": 4.918361617729006e-06, "loss": 0.1458, "step": 2021 }, { "epoch": 0.3276085547634478, "grad_norm": 1.0796326398849487, "learning_rate": 4.918250739596678e-06, "loss": 0.1394, "step": 2022 }, { "epoch": 0.32777057679844457, "grad_norm": 1.0734831094741821, "learning_rate": 4.91813978747173e-06, "loss": 0.1557, "step": 2023 }, { "epoch": 0.32793259883344134, "grad_norm": 1.1349564790725708, "learning_rate": 4.918028761357557e-06, "loss": 0.1694, "step": 2024 }, { "epoch": 0.3280946208684381, "grad_norm": 1.027542233467102, "learning_rate": 4.917917661257554e-06, "loss": 0.1479, "step": 2025 }, { "epoch": 0.32825664290343487, "grad_norm": 1.1970611810684204, "learning_rate": 4.917806487175123e-06, "loss": 0.1549, "step": 2026 }, { "epoch": 0.32841866493843164, "grad_norm": 1.0326762199401855, "learning_rate": 4.917695239113665e-06, "loss": 0.1454, "step": 2027 }, { "epoch": 0.3285806869734284, "grad_norm": 1.1361476182937622, "learning_rate": 4.917583917076581e-06, "loss": 0.1529, "step": 2028 }, { "epoch": 0.32874270900842517, "grad_norm": 1.0551778078079224, "learning_rate": 4.917472521067281e-06, "loss": 0.155, "step": 2029 }, { "epoch": 0.3289047310434219, "grad_norm": 1.0310840606689453, "learning_rate": 4.917361051089172e-06, "loss": 0.1433, "step": 2030 }, { "epoch": 0.32906675307841865, "grad_norm": 1.0534615516662598, "learning_rate": 4.917249507145665e-06, "loss": 0.1675, "step": 2031 }, { "epoch": 0.3292287751134154, "grad_norm": 1.0969111919403076, "learning_rate": 4.917137889240172e-06, "loss": 0.1741, "step": 2032 }, { "epoch": 0.3293907971484122, "grad_norm": 1.0823769569396973, "learning_rate": 4.91702619737611e-06, "loss": 0.1621, "step": 2033 }, { "epoch": 0.32955281918340895, "grad_norm": 1.058327317237854, "learning_rate": 4.916914431556895e-06, "loss": 0.1553, "step": 2034 }, { "epoch": 0.3297148412184057, "grad_norm": 1.0909065008163452, "learning_rate": 4.9168025917859465e-06, "loss": 0.1621, "step": 2035 }, { "epoch": 0.3298768632534025, "grad_norm": 1.1126458644866943, "learning_rate": 4.916690678066688e-06, "loss": 0.1628, "step": 2036 }, { "epoch": 0.3300388852883992, "grad_norm": 1.1230723857879639, "learning_rate": 4.916578690402542e-06, "loss": 0.1714, "step": 2037 }, { "epoch": 0.33020090732339596, "grad_norm": 1.2173502445220947, "learning_rate": 4.916466628796938e-06, "loss": 0.1657, "step": 2038 }, { "epoch": 0.33036292935839273, "grad_norm": 0.9629350900650024, "learning_rate": 4.916354493253301e-06, "loss": 0.1262, "step": 2039 }, { "epoch": 0.3305249513933895, "grad_norm": 1.159567952156067, "learning_rate": 4.9162422837750654e-06, "loss": 0.1867, "step": 2040 }, { "epoch": 0.33068697342838627, "grad_norm": 1.039483904838562, "learning_rate": 4.916130000365662e-06, "loss": 0.1369, "step": 2041 }, { "epoch": 0.33084899546338303, "grad_norm": 1.0981976985931396, "learning_rate": 4.916017643028529e-06, "loss": 0.1542, "step": 2042 }, { "epoch": 0.3310110174983798, "grad_norm": 1.1774072647094727, "learning_rate": 4.915905211767101e-06, "loss": 0.1406, "step": 2043 }, { "epoch": 0.3311730395333765, "grad_norm": 1.048284649848938, "learning_rate": 4.915792706584821e-06, "loss": 0.1514, "step": 2044 }, { "epoch": 0.3313350615683733, "grad_norm": 1.050802230834961, "learning_rate": 4.9156801274851295e-06, "loss": 0.1515, "step": 2045 }, { "epoch": 0.33149708360337005, "grad_norm": 1.0515334606170654, "learning_rate": 4.9155674744714725e-06, "loss": 0.1386, "step": 2046 }, { "epoch": 0.3316591056383668, "grad_norm": 0.9953398704528809, "learning_rate": 4.915454747547296e-06, "loss": 0.1465, "step": 2047 }, { "epoch": 0.3318211276733636, "grad_norm": 0.9482908844947815, "learning_rate": 4.91534194671605e-06, "loss": 0.14, "step": 2048 }, { "epoch": 0.33198314970836035, "grad_norm": 1.0365067720413208, "learning_rate": 4.915229071981186e-06, "loss": 0.1446, "step": 2049 }, { "epoch": 0.3321451717433571, "grad_norm": 1.1342229843139648, "learning_rate": 4.915116123346155e-06, "loss": 0.1796, "step": 2050 }, { "epoch": 0.3323071937783539, "grad_norm": 1.1378239393234253, "learning_rate": 4.915003100814417e-06, "loss": 0.1679, "step": 2051 }, { "epoch": 0.3324692158133506, "grad_norm": 1.1816859245300293, "learning_rate": 4.9148900043894275e-06, "loss": 0.185, "step": 2052 }, { "epoch": 0.33263123784834736, "grad_norm": 1.2354105710983276, "learning_rate": 4.9147768340746486e-06, "loss": 0.1913, "step": 2053 }, { "epoch": 0.33279325988334413, "grad_norm": 1.0721561908721924, "learning_rate": 4.914663589873541e-06, "loss": 0.156, "step": 2054 }, { "epoch": 0.3329552819183409, "grad_norm": 1.0587486028671265, "learning_rate": 4.914550271789572e-06, "loss": 0.1607, "step": 2055 }, { "epoch": 0.33311730395333766, "grad_norm": 1.0155506134033203, "learning_rate": 4.914436879826207e-06, "loss": 0.1528, "step": 2056 }, { "epoch": 0.33327932598833443, "grad_norm": 1.0975433588027954, "learning_rate": 4.914323413986917e-06, "loss": 0.1606, "step": 2057 }, { "epoch": 0.3334413480233312, "grad_norm": 1.12659752368927, "learning_rate": 4.9142098742751726e-06, "loss": 0.1677, "step": 2058 }, { "epoch": 0.3336033700583279, "grad_norm": 1.0046424865722656, "learning_rate": 4.914096260694449e-06, "loss": 0.163, "step": 2059 }, { "epoch": 0.3337653920933247, "grad_norm": 1.103347897529602, "learning_rate": 4.9139825732482205e-06, "loss": 0.1583, "step": 2060 }, { "epoch": 0.33392741412832144, "grad_norm": 1.109889268875122, "learning_rate": 4.913868811939968e-06, "loss": 0.162, "step": 2061 }, { "epoch": 0.3340894361633182, "grad_norm": 1.036608338356018, "learning_rate": 4.91375497677317e-06, "loss": 0.1487, "step": 2062 }, { "epoch": 0.334251458198315, "grad_norm": 1.199803113937378, "learning_rate": 4.913641067751313e-06, "loss": 0.1858, "step": 2063 }, { "epoch": 0.33441348023331174, "grad_norm": 1.304969072341919, "learning_rate": 4.913527084877879e-06, "loss": 0.1704, "step": 2064 }, { "epoch": 0.3345755022683085, "grad_norm": 1.1703951358795166, "learning_rate": 4.913413028156358e-06, "loss": 0.1688, "step": 2065 }, { "epoch": 0.3347375243033052, "grad_norm": 1.0002027750015259, "learning_rate": 4.913298897590237e-06, "loss": 0.1413, "step": 2066 }, { "epoch": 0.334899546338302, "grad_norm": 1.1760587692260742, "learning_rate": 4.913184693183011e-06, "loss": 0.177, "step": 2067 }, { "epoch": 0.33506156837329876, "grad_norm": 1.080187439918518, "learning_rate": 4.913070414938172e-06, "loss": 0.1556, "step": 2068 }, { "epoch": 0.3352235904082955, "grad_norm": 1.124131202697754, "learning_rate": 4.912956062859219e-06, "loss": 0.1701, "step": 2069 }, { "epoch": 0.3353856124432923, "grad_norm": 1.0667952299118042, "learning_rate": 4.912841636949649e-06, "loss": 0.1503, "step": 2070 }, { "epoch": 0.33554763447828906, "grad_norm": 1.2242783308029175, "learning_rate": 4.912727137212964e-06, "loss": 0.1686, "step": 2071 }, { "epoch": 0.3357096565132858, "grad_norm": 1.0249295234680176, "learning_rate": 4.912612563652667e-06, "loss": 0.1517, "step": 2072 }, { "epoch": 0.3358716785482826, "grad_norm": 1.5885037183761597, "learning_rate": 4.912497916272264e-06, "loss": 0.1591, "step": 2073 }, { "epoch": 0.3360337005832793, "grad_norm": 1.040554404258728, "learning_rate": 4.912383195075264e-06, "loss": 0.1571, "step": 2074 }, { "epoch": 0.33619572261827607, "grad_norm": 1.0694094896316528, "learning_rate": 4.912268400065175e-06, "loss": 0.1511, "step": 2075 }, { "epoch": 0.33635774465327284, "grad_norm": 2.055079460144043, "learning_rate": 4.912153531245511e-06, "loss": 0.1697, "step": 2076 }, { "epoch": 0.3365197666882696, "grad_norm": 1.066028356552124, "learning_rate": 4.912038588619786e-06, "loss": 0.1572, "step": 2077 }, { "epoch": 0.3366817887232664, "grad_norm": 1.1714937686920166, "learning_rate": 4.9119235721915174e-06, "loss": 0.1764, "step": 2078 }, { "epoch": 0.33684381075826314, "grad_norm": 1.0577641725540161, "learning_rate": 4.911808481964224e-06, "loss": 0.1411, "step": 2079 }, { "epoch": 0.3370058327932599, "grad_norm": 1.0684007406234741, "learning_rate": 4.911693317941428e-06, "loss": 0.154, "step": 2080 }, { "epoch": 0.3371678548282566, "grad_norm": 1.107500672340393, "learning_rate": 4.911578080126652e-06, "loss": 0.1549, "step": 2081 }, { "epoch": 0.3373298768632534, "grad_norm": 1.0562461614608765, "learning_rate": 4.911462768523423e-06, "loss": 0.1475, "step": 2082 }, { "epoch": 0.33749189889825015, "grad_norm": 1.016905426979065, "learning_rate": 4.911347383135269e-06, "loss": 0.1391, "step": 2083 }, { "epoch": 0.3376539209332469, "grad_norm": 1.1356723308563232, "learning_rate": 4.9112319239657204e-06, "loss": 0.1747, "step": 2084 }, { "epoch": 0.3378159429682437, "grad_norm": 1.1550018787384033, "learning_rate": 4.91111639101831e-06, "loss": 0.1509, "step": 2085 }, { "epoch": 0.33797796500324045, "grad_norm": 1.108013391494751, "learning_rate": 4.911000784296572e-06, "loss": 0.1712, "step": 2086 }, { "epoch": 0.3381399870382372, "grad_norm": 1.057617425918579, "learning_rate": 4.910885103804046e-06, "loss": 0.1624, "step": 2087 }, { "epoch": 0.33830200907323393, "grad_norm": 1.2254884243011475, "learning_rate": 4.910769349544269e-06, "loss": 0.1485, "step": 2088 }, { "epoch": 0.3384640311082307, "grad_norm": 1.2375078201293945, "learning_rate": 4.910653521520784e-06, "loss": 0.1587, "step": 2089 }, { "epoch": 0.33862605314322747, "grad_norm": 1.054060459136963, "learning_rate": 4.9105376197371355e-06, "loss": 0.1391, "step": 2090 }, { "epoch": 0.33878807517822424, "grad_norm": 1.0424585342407227, "learning_rate": 4.910421644196868e-06, "loss": 0.1532, "step": 2091 }, { "epoch": 0.338950097213221, "grad_norm": 1.091741681098938, "learning_rate": 4.9103055949035326e-06, "loss": 0.1619, "step": 2092 }, { "epoch": 0.33911211924821777, "grad_norm": 1.0904366970062256, "learning_rate": 4.910189471860678e-06, "loss": 0.1675, "step": 2093 }, { "epoch": 0.33927414128321454, "grad_norm": 1.1803327798843384, "learning_rate": 4.910073275071858e-06, "loss": 0.1856, "step": 2094 }, { "epoch": 0.3394361633182113, "grad_norm": 1.1525360345840454, "learning_rate": 4.909957004540629e-06, "loss": 0.1703, "step": 2095 }, { "epoch": 0.339598185353208, "grad_norm": 1.070910096168518, "learning_rate": 4.909840660270547e-06, "loss": 0.1768, "step": 2096 }, { "epoch": 0.3397602073882048, "grad_norm": 0.9406764507293701, "learning_rate": 4.909724242265172e-06, "loss": 0.1367, "step": 2097 }, { "epoch": 0.33992222942320155, "grad_norm": 1.0764198303222656, "learning_rate": 4.909607750528068e-06, "loss": 0.1583, "step": 2098 }, { "epoch": 0.3400842514581983, "grad_norm": 0.9914731383323669, "learning_rate": 4.909491185062797e-06, "loss": 0.1486, "step": 2099 }, { "epoch": 0.3402462734931951, "grad_norm": 1.056030511856079, "learning_rate": 4.909374545872927e-06, "loss": 0.1546, "step": 2100 }, { "epoch": 0.34040829552819185, "grad_norm": 1.0676145553588867, "learning_rate": 4.909257832962026e-06, "loss": 0.1284, "step": 2101 }, { "epoch": 0.3405703175631886, "grad_norm": 1.0468038320541382, "learning_rate": 4.909141046333666e-06, "loss": 0.1365, "step": 2102 }, { "epoch": 0.34073233959818533, "grad_norm": 1.0132776498794556, "learning_rate": 4.90902418599142e-06, "loss": 0.1521, "step": 2103 }, { "epoch": 0.3408943616331821, "grad_norm": 1.1214451789855957, "learning_rate": 4.908907251938864e-06, "loss": 0.1561, "step": 2104 }, { "epoch": 0.34105638366817886, "grad_norm": 1.1212339401245117, "learning_rate": 4.9087902441795745e-06, "loss": 0.161, "step": 2105 }, { "epoch": 0.34121840570317563, "grad_norm": 1.0603835582733154, "learning_rate": 4.908673162717133e-06, "loss": 0.1596, "step": 2106 }, { "epoch": 0.3413804277381724, "grad_norm": 1.171034812927246, "learning_rate": 4.908556007555122e-06, "loss": 0.1534, "step": 2107 }, { "epoch": 0.34154244977316917, "grad_norm": 1.055219292640686, "learning_rate": 4.908438778697125e-06, "loss": 0.1511, "step": 2108 }, { "epoch": 0.34170447180816593, "grad_norm": 1.1049814224243164, "learning_rate": 4.90832147614673e-06, "loss": 0.1679, "step": 2109 }, { "epoch": 0.34186649384316264, "grad_norm": 1.144884705543518, "learning_rate": 4.908204099907527e-06, "loss": 0.1635, "step": 2110 }, { "epoch": 0.3420285158781594, "grad_norm": 1.1654030084609985, "learning_rate": 4.908086649983105e-06, "loss": 0.1663, "step": 2111 }, { "epoch": 0.3421905379131562, "grad_norm": 1.1463876962661743, "learning_rate": 4.907969126377059e-06, "loss": 0.1601, "step": 2112 }, { "epoch": 0.34235255994815295, "grad_norm": 1.105735182762146, "learning_rate": 4.9078515290929855e-06, "loss": 0.1573, "step": 2113 }, { "epoch": 0.3425145819831497, "grad_norm": 1.1214326620101929, "learning_rate": 4.907733858134482e-06, "loss": 0.1705, "step": 2114 }, { "epoch": 0.3426766040181465, "grad_norm": 0.9656097292900085, "learning_rate": 4.907616113505149e-06, "loss": 0.1477, "step": 2115 }, { "epoch": 0.34283862605314325, "grad_norm": 1.0422648191452026, "learning_rate": 4.907498295208589e-06, "loss": 0.1577, "step": 2116 }, { "epoch": 0.34300064808814, "grad_norm": 1.0416967868804932, "learning_rate": 4.907380403248408e-06, "loss": 0.1674, "step": 2117 }, { "epoch": 0.3431626701231367, "grad_norm": 1.0211541652679443, "learning_rate": 4.907262437628211e-06, "loss": 0.1615, "step": 2118 }, { "epoch": 0.3433246921581335, "grad_norm": 1.0481899976730347, "learning_rate": 4.90714439835161e-06, "loss": 0.1624, "step": 2119 }, { "epoch": 0.34348671419313026, "grad_norm": 0.9321041703224182, "learning_rate": 4.907026285422215e-06, "loss": 0.149, "step": 2120 }, { "epoch": 0.343648736228127, "grad_norm": 1.062423825263977, "learning_rate": 4.9069080988436405e-06, "loss": 0.1582, "step": 2121 }, { "epoch": 0.3438107582631238, "grad_norm": 1.213869571685791, "learning_rate": 4.906789838619504e-06, "loss": 0.1728, "step": 2122 }, { "epoch": 0.34397278029812056, "grad_norm": 1.011985421180725, "learning_rate": 4.9066715047534205e-06, "loss": 0.1525, "step": 2123 }, { "epoch": 0.34413480233311733, "grad_norm": 1.0821608304977417, "learning_rate": 4.906553097249015e-06, "loss": 0.1607, "step": 2124 }, { "epoch": 0.34429682436811404, "grad_norm": 0.9949021339416504, "learning_rate": 4.906434616109907e-06, "loss": 0.1306, "step": 2125 }, { "epoch": 0.3444588464031108, "grad_norm": 1.1297369003295898, "learning_rate": 4.906316061339724e-06, "loss": 0.1498, "step": 2126 }, { "epoch": 0.3446208684381076, "grad_norm": 1.0807654857635498, "learning_rate": 4.906197432942093e-06, "loss": 0.1492, "step": 2127 }, { "epoch": 0.34478289047310434, "grad_norm": 1.2625532150268555, "learning_rate": 4.9060787309206436e-06, "loss": 0.1784, "step": 2128 }, { "epoch": 0.3449449125081011, "grad_norm": 1.1583614349365234, "learning_rate": 4.905959955279007e-06, "loss": 0.1652, "step": 2129 }, { "epoch": 0.3451069345430979, "grad_norm": 1.1260557174682617, "learning_rate": 4.905841106020818e-06, "loss": 0.1649, "step": 2130 }, { "epoch": 0.34526895657809464, "grad_norm": 1.1886639595031738, "learning_rate": 4.905722183149714e-06, "loss": 0.1724, "step": 2131 }, { "epoch": 0.34543097861309136, "grad_norm": 1.138461709022522, "learning_rate": 4.905603186669332e-06, "loss": 0.177, "step": 2132 }, { "epoch": 0.3455930006480881, "grad_norm": 1.1662522554397583, "learning_rate": 4.905484116583314e-06, "loss": 0.1436, "step": 2133 }, { "epoch": 0.3457550226830849, "grad_norm": 1.0118292570114136, "learning_rate": 4.905364972895304e-06, "loss": 0.1573, "step": 2134 }, { "epoch": 0.34591704471808166, "grad_norm": 0.9873135685920715, "learning_rate": 4.905245755608946e-06, "loss": 0.1484, "step": 2135 }, { "epoch": 0.3460790667530784, "grad_norm": 1.1140849590301514, "learning_rate": 4.9051264647278886e-06, "loss": 0.1773, "step": 2136 }, { "epoch": 0.3462410887880752, "grad_norm": 1.053364872932434, "learning_rate": 4.9050071002557815e-06, "loss": 0.1462, "step": 2137 }, { "epoch": 0.34640311082307196, "grad_norm": 1.0225468873977661, "learning_rate": 4.904887662196277e-06, "loss": 0.165, "step": 2138 }, { "epoch": 0.34656513285806867, "grad_norm": 1.1107808351516724, "learning_rate": 4.90476815055303e-06, "loss": 0.1427, "step": 2139 }, { "epoch": 0.34672715489306544, "grad_norm": 1.0493844747543335, "learning_rate": 4.904648565329697e-06, "loss": 0.1416, "step": 2140 }, { "epoch": 0.3468891769280622, "grad_norm": 1.1383299827575684, "learning_rate": 4.9045289065299375e-06, "loss": 0.1915, "step": 2141 }, { "epoch": 0.34705119896305897, "grad_norm": 1.1016839742660522, "learning_rate": 4.904409174157412e-06, "loss": 0.1629, "step": 2142 }, { "epoch": 0.34721322099805574, "grad_norm": 1.0556694269180298, "learning_rate": 4.904289368215784e-06, "loss": 0.17, "step": 2143 }, { "epoch": 0.3473752430330525, "grad_norm": 1.0352901220321655, "learning_rate": 4.90416948870872e-06, "loss": 0.149, "step": 2144 }, { "epoch": 0.3475372650680493, "grad_norm": 1.0276658535003662, "learning_rate": 4.9040495356398874e-06, "loss": 0.1466, "step": 2145 }, { "epoch": 0.34769928710304604, "grad_norm": 1.1721683740615845, "learning_rate": 4.903929509012957e-06, "loss": 0.1652, "step": 2146 }, { "epoch": 0.34786130913804275, "grad_norm": 1.0136381387710571, "learning_rate": 4.903809408831601e-06, "loss": 0.1425, "step": 2147 }, { "epoch": 0.3480233311730395, "grad_norm": 1.0642050504684448, "learning_rate": 4.9036892350994935e-06, "loss": 0.1454, "step": 2148 }, { "epoch": 0.3481853532080363, "grad_norm": 1.0709917545318604, "learning_rate": 4.903568987820313e-06, "loss": 0.1478, "step": 2149 }, { "epoch": 0.34834737524303305, "grad_norm": 1.0638947486877441, "learning_rate": 4.9034486669977375e-06, "loss": 0.1625, "step": 2150 }, { "epoch": 0.3485093972780298, "grad_norm": 1.0096039772033691, "learning_rate": 4.903328272635449e-06, "loss": 0.1462, "step": 2151 }, { "epoch": 0.3486714193130266, "grad_norm": 1.1130759716033936, "learning_rate": 4.903207804737132e-06, "loss": 0.1488, "step": 2152 }, { "epoch": 0.34883344134802335, "grad_norm": 1.105833649635315, "learning_rate": 4.9030872633064715e-06, "loss": 0.1669, "step": 2153 }, { "epoch": 0.34899546338302007, "grad_norm": 1.2638037204742432, "learning_rate": 4.902966648347156e-06, "loss": 0.1613, "step": 2154 }, { "epoch": 0.34915748541801683, "grad_norm": 0.990151584148407, "learning_rate": 4.902845959862876e-06, "loss": 0.1456, "step": 2155 }, { "epoch": 0.3493195074530136, "grad_norm": 1.108185887336731, "learning_rate": 4.902725197857325e-06, "loss": 0.1522, "step": 2156 }, { "epoch": 0.34948152948801037, "grad_norm": 0.9440566897392273, "learning_rate": 4.902604362334197e-06, "loss": 0.1266, "step": 2157 }, { "epoch": 0.34964355152300713, "grad_norm": 1.2154903411865234, "learning_rate": 4.902483453297189e-06, "loss": 0.1911, "step": 2158 }, { "epoch": 0.3498055735580039, "grad_norm": 1.0809717178344727, "learning_rate": 4.902362470750002e-06, "loss": 0.1564, "step": 2159 }, { "epoch": 0.34996759559300067, "grad_norm": 1.0994709730148315, "learning_rate": 4.902241414696337e-06, "loss": 0.1707, "step": 2160 }, { "epoch": 0.3501296176279974, "grad_norm": 1.0126726627349854, "learning_rate": 4.902120285139898e-06, "loss": 0.1451, "step": 2161 }, { "epoch": 0.35029163966299415, "grad_norm": 1.068403720855713, "learning_rate": 4.901999082084391e-06, "loss": 0.1578, "step": 2162 }, { "epoch": 0.3504536616979909, "grad_norm": 0.9774585962295532, "learning_rate": 4.901877805533525e-06, "loss": 0.1406, "step": 2163 }, { "epoch": 0.3506156837329877, "grad_norm": 1.0109790563583374, "learning_rate": 4.901756455491011e-06, "loss": 0.1425, "step": 2164 }, { "epoch": 0.35077770576798445, "grad_norm": 1.182918667793274, "learning_rate": 4.901635031960561e-06, "loss": 0.1952, "step": 2165 }, { "epoch": 0.3509397278029812, "grad_norm": 0.9629144668579102, "learning_rate": 4.901513534945891e-06, "loss": 0.1487, "step": 2166 }, { "epoch": 0.351101749837978, "grad_norm": 1.0952849388122559, "learning_rate": 4.901391964450718e-06, "loss": 0.171, "step": 2167 }, { "epoch": 0.35126377187297475, "grad_norm": 1.0250297784805298, "learning_rate": 4.901270320478763e-06, "loss": 0.1532, "step": 2168 }, { "epoch": 0.35142579390797146, "grad_norm": 1.1184073686599731, "learning_rate": 4.901148603033747e-06, "loss": 0.1641, "step": 2169 }, { "epoch": 0.35158781594296823, "grad_norm": 1.0301399230957031, "learning_rate": 4.901026812119394e-06, "loss": 0.1515, "step": 2170 }, { "epoch": 0.351749837977965, "grad_norm": 1.188010573387146, "learning_rate": 4.900904947739431e-06, "loss": 0.1565, "step": 2171 }, { "epoch": 0.35191186001296176, "grad_norm": 1.0016369819641113, "learning_rate": 4.9007830098975875e-06, "loss": 0.1523, "step": 2172 }, { "epoch": 0.35207388204795853, "grad_norm": 1.049594521522522, "learning_rate": 4.9006609985975925e-06, "loss": 0.1426, "step": 2173 }, { "epoch": 0.3522359040829553, "grad_norm": 1.197577714920044, "learning_rate": 4.900538913843181e-06, "loss": 0.1812, "step": 2174 }, { "epoch": 0.35239792611795207, "grad_norm": 1.06314218044281, "learning_rate": 4.900416755638087e-06, "loss": 0.1601, "step": 2175 }, { "epoch": 0.3525599481529488, "grad_norm": 1.0126889944076538, "learning_rate": 4.900294523986051e-06, "loss": 0.1472, "step": 2176 }, { "epoch": 0.35272197018794554, "grad_norm": 1.1102066040039062, "learning_rate": 4.900172218890809e-06, "loss": 0.1699, "step": 2177 }, { "epoch": 0.3528839922229423, "grad_norm": 1.1450811624526978, "learning_rate": 4.900049840356107e-06, "loss": 0.1804, "step": 2178 }, { "epoch": 0.3530460142579391, "grad_norm": 1.1297409534454346, "learning_rate": 4.899927388385688e-06, "loss": 0.1678, "step": 2179 }, { "epoch": 0.35320803629293585, "grad_norm": 1.0835684537887573, "learning_rate": 4.899804862983298e-06, "loss": 0.173, "step": 2180 }, { "epoch": 0.3533700583279326, "grad_norm": 1.0187311172485352, "learning_rate": 4.899682264152686e-06, "loss": 0.1353, "step": 2181 }, { "epoch": 0.3535320803629294, "grad_norm": 0.9485295414924622, "learning_rate": 4.899559591897604e-06, "loss": 0.1516, "step": 2182 }, { "epoch": 0.3536941023979261, "grad_norm": 0.9085932970046997, "learning_rate": 4.899436846221807e-06, "loss": 0.148, "step": 2183 }, { "epoch": 0.35385612443292286, "grad_norm": 1.1273099184036255, "learning_rate": 4.899314027129047e-06, "loss": 0.1685, "step": 2184 }, { "epoch": 0.3540181464679196, "grad_norm": 1.0366731882095337, "learning_rate": 4.899191134623086e-06, "loss": 0.1542, "step": 2185 }, { "epoch": 0.3541801685029164, "grad_norm": 1.049047589302063, "learning_rate": 4.89906816870768e-06, "loss": 0.172, "step": 2186 }, { "epoch": 0.35434219053791316, "grad_norm": 1.1898120641708374, "learning_rate": 4.8989451293865955e-06, "loss": 0.1729, "step": 2187 }, { "epoch": 0.3545042125729099, "grad_norm": 1.0170189142227173, "learning_rate": 4.898822016663595e-06, "loss": 0.1539, "step": 2188 }, { "epoch": 0.3546662346079067, "grad_norm": 0.9323112368583679, "learning_rate": 4.8986988305424445e-06, "loss": 0.1392, "step": 2189 }, { "epoch": 0.35482825664290346, "grad_norm": 1.097751259803772, "learning_rate": 4.898575571026916e-06, "loss": 0.1697, "step": 2190 }, { "epoch": 0.3549902786779002, "grad_norm": 1.2580288648605347, "learning_rate": 4.898452238120779e-06, "loss": 0.1925, "step": 2191 }, { "epoch": 0.35515230071289694, "grad_norm": 1.1584991216659546, "learning_rate": 4.898328831827808e-06, "loss": 0.1696, "step": 2192 }, { "epoch": 0.3553143227478937, "grad_norm": 1.0582317113876343, "learning_rate": 4.898205352151777e-06, "loss": 0.1498, "step": 2193 }, { "epoch": 0.3554763447828905, "grad_norm": 1.039017915725708, "learning_rate": 4.898081799096467e-06, "loss": 0.1458, "step": 2194 }, { "epoch": 0.35563836681788724, "grad_norm": 1.237728476524353, "learning_rate": 4.897958172665658e-06, "loss": 0.1588, "step": 2195 }, { "epoch": 0.355800388852884, "grad_norm": 0.9922459125518799, "learning_rate": 4.897834472863131e-06, "loss": 0.146, "step": 2196 }, { "epoch": 0.3559624108878808, "grad_norm": 1.1066429615020752, "learning_rate": 4.897710699692672e-06, "loss": 0.1492, "step": 2197 }, { "epoch": 0.3561244329228775, "grad_norm": 1.1246432065963745, "learning_rate": 4.897586853158067e-06, "loss": 0.1644, "step": 2198 }, { "epoch": 0.35628645495787425, "grad_norm": 1.1196368932724, "learning_rate": 4.897462933263107e-06, "loss": 0.1517, "step": 2199 }, { "epoch": 0.356448476992871, "grad_norm": 1.1763758659362793, "learning_rate": 4.897338940011583e-06, "loss": 0.169, "step": 2200 }, { "epoch": 0.3566104990278678, "grad_norm": 1.0583901405334473, "learning_rate": 4.897214873407289e-06, "loss": 0.1462, "step": 2201 }, { "epoch": 0.35677252106286456, "grad_norm": 0.9856936931610107, "learning_rate": 4.897090733454021e-06, "loss": 0.1408, "step": 2202 }, { "epoch": 0.3569345430978613, "grad_norm": 1.0380568504333496, "learning_rate": 4.8969665201555775e-06, "loss": 0.1692, "step": 2203 }, { "epoch": 0.3570965651328581, "grad_norm": 1.0581004619598389, "learning_rate": 4.896842233515759e-06, "loss": 0.1714, "step": 2204 }, { "epoch": 0.3572585871678548, "grad_norm": 1.2082271575927734, "learning_rate": 4.896717873538368e-06, "loss": 0.1791, "step": 2205 }, { "epoch": 0.35742060920285157, "grad_norm": 1.0210479497909546, "learning_rate": 4.89659344022721e-06, "loss": 0.1652, "step": 2206 }, { "epoch": 0.35758263123784834, "grad_norm": 0.9992541074752808, "learning_rate": 4.896468933586094e-06, "loss": 0.162, "step": 2207 }, { "epoch": 0.3577446532728451, "grad_norm": 1.0144470930099487, "learning_rate": 4.896344353618826e-06, "loss": 0.1597, "step": 2208 }, { "epoch": 0.35790667530784187, "grad_norm": 1.1150000095367432, "learning_rate": 4.89621970032922e-06, "loss": 0.167, "step": 2209 }, { "epoch": 0.35806869734283864, "grad_norm": 1.0578536987304688, "learning_rate": 4.896094973721091e-06, "loss": 0.1402, "step": 2210 }, { "epoch": 0.3582307193778354, "grad_norm": 0.9122249484062195, "learning_rate": 4.8959701737982535e-06, "loss": 0.1362, "step": 2211 }, { "epoch": 0.35839274141283217, "grad_norm": 1.0043046474456787, "learning_rate": 4.8958453005645265e-06, "loss": 0.1537, "step": 2212 }, { "epoch": 0.3585547634478289, "grad_norm": 1.1452409029006958, "learning_rate": 4.895720354023732e-06, "loss": 0.1239, "step": 2213 }, { "epoch": 0.35871678548282565, "grad_norm": 1.0220675468444824, "learning_rate": 4.895595334179692e-06, "loss": 0.1461, "step": 2214 }, { "epoch": 0.3588788075178224, "grad_norm": 1.3352243900299072, "learning_rate": 4.895470241036232e-06, "loss": 0.2061, "step": 2215 }, { "epoch": 0.3590408295528192, "grad_norm": 1.1318044662475586, "learning_rate": 4.89534507459718e-06, "loss": 0.1479, "step": 2216 }, { "epoch": 0.35920285158781595, "grad_norm": 1.2025889158248901, "learning_rate": 4.895219834866364e-06, "loss": 0.1721, "step": 2217 }, { "epoch": 0.3593648736228127, "grad_norm": 1.0799037218093872, "learning_rate": 4.895094521847617e-06, "loss": 0.1504, "step": 2218 }, { "epoch": 0.3595268956578095, "grad_norm": 0.9984576106071472, "learning_rate": 4.894969135544776e-06, "loss": 0.1485, "step": 2219 }, { "epoch": 0.3596889176928062, "grad_norm": 1.1945985555648804, "learning_rate": 4.894843675961673e-06, "loss": 0.1784, "step": 2220 }, { "epoch": 0.35985093972780297, "grad_norm": 0.962485671043396, "learning_rate": 4.894718143102151e-06, "loss": 0.1345, "step": 2221 }, { "epoch": 0.36001296176279973, "grad_norm": 0.9608977437019348, "learning_rate": 4.894592536970047e-06, "loss": 0.144, "step": 2222 }, { "epoch": 0.3601749837977965, "grad_norm": 1.0343858003616333, "learning_rate": 4.894466857569207e-06, "loss": 0.1507, "step": 2223 }, { "epoch": 0.36033700583279327, "grad_norm": 1.0216684341430664, "learning_rate": 4.894341104903476e-06, "loss": 0.1354, "step": 2224 }, { "epoch": 0.36049902786779003, "grad_norm": 1.18091881275177, "learning_rate": 4.8942152789767e-06, "loss": 0.1631, "step": 2225 }, { "epoch": 0.3606610499027868, "grad_norm": 1.0483477115631104, "learning_rate": 4.894089379792731e-06, "loss": 0.1607, "step": 2226 }, { "epoch": 0.3608230719377835, "grad_norm": 1.1260876655578613, "learning_rate": 4.893963407355422e-06, "loss": 0.1611, "step": 2227 }, { "epoch": 0.3609850939727803, "grad_norm": 0.9986515045166016, "learning_rate": 4.893837361668624e-06, "loss": 0.1565, "step": 2228 }, { "epoch": 0.36114711600777705, "grad_norm": 1.1207728385925293, "learning_rate": 4.893711242736197e-06, "loss": 0.1611, "step": 2229 }, { "epoch": 0.3613091380427738, "grad_norm": 1.1995974779129028, "learning_rate": 4.8935850505619985e-06, "loss": 0.1705, "step": 2230 }, { "epoch": 0.3614711600777706, "grad_norm": 1.1663684844970703, "learning_rate": 4.893458785149889e-06, "loss": 0.1841, "step": 2231 }, { "epoch": 0.36163318211276735, "grad_norm": 0.9084141850471497, "learning_rate": 4.8933324465037334e-06, "loss": 0.1425, "step": 2232 }, { "epoch": 0.3617952041477641, "grad_norm": 0.9766511917114258, "learning_rate": 4.893206034627397e-06, "loss": 0.1585, "step": 2233 }, { "epoch": 0.3619572261827609, "grad_norm": 1.0167397260665894, "learning_rate": 4.893079549524747e-06, "loss": 0.167, "step": 2234 }, { "epoch": 0.3621192482177576, "grad_norm": 1.0378706455230713, "learning_rate": 4.892952991199654e-06, "loss": 0.1502, "step": 2235 }, { "epoch": 0.36228127025275436, "grad_norm": 0.9949929118156433, "learning_rate": 4.89282635965599e-06, "loss": 0.1482, "step": 2236 }, { "epoch": 0.36244329228775113, "grad_norm": 1.006990909576416, "learning_rate": 4.89269965489763e-06, "loss": 0.1504, "step": 2237 }, { "epoch": 0.3626053143227479, "grad_norm": 0.947759747505188, "learning_rate": 4.8925728769284504e-06, "loss": 0.1436, "step": 2238 }, { "epoch": 0.36276733635774466, "grad_norm": 0.9912741184234619, "learning_rate": 4.892446025752332e-06, "loss": 0.1401, "step": 2239 }, { "epoch": 0.36292935839274143, "grad_norm": 0.9576963782310486, "learning_rate": 4.892319101373154e-06, "loss": 0.1406, "step": 2240 }, { "epoch": 0.3630913804277382, "grad_norm": 1.0038036108016968, "learning_rate": 4.892192103794801e-06, "loss": 0.1418, "step": 2241 }, { "epoch": 0.3632534024627349, "grad_norm": 1.1145362854003906, "learning_rate": 4.892065033021158e-06, "loss": 0.1654, "step": 2242 }, { "epoch": 0.3634154244977317, "grad_norm": 1.185004472732544, "learning_rate": 4.8919378890561145e-06, "loss": 0.1686, "step": 2243 }, { "epoch": 0.36357744653272844, "grad_norm": 1.0970295667648315, "learning_rate": 4.8918106719035594e-06, "loss": 0.1504, "step": 2244 }, { "epoch": 0.3637394685677252, "grad_norm": 1.0193977355957031, "learning_rate": 4.891683381567386e-06, "loss": 0.1405, "step": 2245 }, { "epoch": 0.363901490602722, "grad_norm": 0.9931899309158325, "learning_rate": 4.891556018051489e-06, "loss": 0.1569, "step": 2246 }, { "epoch": 0.36406351263771874, "grad_norm": 1.1715176105499268, "learning_rate": 4.891428581359764e-06, "loss": 0.1799, "step": 2247 }, { "epoch": 0.3642255346727155, "grad_norm": 1.0806795358657837, "learning_rate": 4.891301071496113e-06, "loss": 0.1547, "step": 2248 }, { "epoch": 0.3643875567077122, "grad_norm": 1.0172020196914673, "learning_rate": 4.891173488464436e-06, "loss": 0.1409, "step": 2249 }, { "epoch": 0.364549578742709, "grad_norm": 0.9692556858062744, "learning_rate": 4.891045832268637e-06, "loss": 0.1519, "step": 2250 }, { "epoch": 0.36471160077770576, "grad_norm": 1.108995795249939, "learning_rate": 4.890918102912621e-06, "loss": 0.1695, "step": 2251 }, { "epoch": 0.3648736228127025, "grad_norm": 1.081821322441101, "learning_rate": 4.890790300400297e-06, "loss": 0.1719, "step": 2252 }, { "epoch": 0.3650356448476993, "grad_norm": 1.0040013790130615, "learning_rate": 4.890662424735576e-06, "loss": 0.1603, "step": 2253 }, { "epoch": 0.36519766688269606, "grad_norm": 1.0947988033294678, "learning_rate": 4.8905344759223696e-06, "loss": 0.1791, "step": 2254 }, { "epoch": 0.3653596889176928, "grad_norm": 1.1201766729354858, "learning_rate": 4.890406453964594e-06, "loss": 0.1646, "step": 2255 }, { "epoch": 0.36552171095268954, "grad_norm": 1.0785022974014282, "learning_rate": 4.890278358866165e-06, "loss": 0.1617, "step": 2256 }, { "epoch": 0.3656837329876863, "grad_norm": 1.1495450735092163, "learning_rate": 4.8901501906310024e-06, "loss": 0.1674, "step": 2257 }, { "epoch": 0.3658457550226831, "grad_norm": 1.0032192468643188, "learning_rate": 4.890021949263027e-06, "loss": 0.1427, "step": 2258 }, { "epoch": 0.36600777705767984, "grad_norm": 1.148429274559021, "learning_rate": 4.889893634766165e-06, "loss": 0.1654, "step": 2259 }, { "epoch": 0.3661697990926766, "grad_norm": 1.0179355144500732, "learning_rate": 4.889765247144341e-06, "loss": 0.1496, "step": 2260 }, { "epoch": 0.3663318211276734, "grad_norm": 0.9481959939002991, "learning_rate": 4.889636786401484e-06, "loss": 0.1423, "step": 2261 }, { "epoch": 0.36649384316267014, "grad_norm": 1.1533952951431274, "learning_rate": 4.889508252541524e-06, "loss": 0.1582, "step": 2262 }, { "epoch": 0.3666558651976669, "grad_norm": 1.0846741199493408, "learning_rate": 4.889379645568394e-06, "loss": 0.1599, "step": 2263 }, { "epoch": 0.3668178872326636, "grad_norm": 1.2179301977157593, "learning_rate": 4.889250965486029e-06, "loss": 0.1812, "step": 2264 }, { "epoch": 0.3669799092676604, "grad_norm": 0.9992818832397461, "learning_rate": 4.889122212298366e-06, "loss": 0.138, "step": 2265 }, { "epoch": 0.36714193130265715, "grad_norm": 1.1005460023880005, "learning_rate": 4.888993386009345e-06, "loss": 0.1519, "step": 2266 }, { "epoch": 0.3673039533376539, "grad_norm": 1.0345803499221802, "learning_rate": 4.888864486622907e-06, "loss": 0.1539, "step": 2267 }, { "epoch": 0.3674659753726507, "grad_norm": 1.0553728342056274, "learning_rate": 4.888735514142998e-06, "loss": 0.1459, "step": 2268 }, { "epoch": 0.36762799740764746, "grad_norm": 1.0219756364822388, "learning_rate": 4.888606468573562e-06, "loss": 0.1459, "step": 2269 }, { "epoch": 0.3677900194426442, "grad_norm": 1.0199123620986938, "learning_rate": 4.8884773499185485e-06, "loss": 0.1466, "step": 2270 }, { "epoch": 0.36795204147764093, "grad_norm": 0.9618728160858154, "learning_rate": 4.888348158181908e-06, "loss": 0.1478, "step": 2271 }, { "epoch": 0.3681140635126377, "grad_norm": 1.018974781036377, "learning_rate": 4.8882188933675935e-06, "loss": 0.1493, "step": 2272 }, { "epoch": 0.36827608554763447, "grad_norm": 1.1692665815353394, "learning_rate": 4.888089555479561e-06, "loss": 0.1571, "step": 2273 }, { "epoch": 0.36843810758263124, "grad_norm": 1.0094574689865112, "learning_rate": 4.887960144521766e-06, "loss": 0.1413, "step": 2274 }, { "epoch": 0.368600129617628, "grad_norm": 1.0901294946670532, "learning_rate": 4.8878306604981704e-06, "loss": 0.1495, "step": 2275 }, { "epoch": 0.36876215165262477, "grad_norm": 1.043537974357605, "learning_rate": 4.887701103412734e-06, "loss": 0.1641, "step": 2276 }, { "epoch": 0.36892417368762154, "grad_norm": 1.0010807514190674, "learning_rate": 4.887571473269422e-06, "loss": 0.1562, "step": 2277 }, { "epoch": 0.36908619572261825, "grad_norm": 0.9946657419204712, "learning_rate": 4.8874417700722025e-06, "loss": 0.1406, "step": 2278 }, { "epoch": 0.369248217757615, "grad_norm": 1.007975697517395, "learning_rate": 4.887311993825041e-06, "loss": 0.1448, "step": 2279 }, { "epoch": 0.3694102397926118, "grad_norm": 1.072035312652588, "learning_rate": 4.887182144531909e-06, "loss": 0.1686, "step": 2280 }, { "epoch": 0.36957226182760855, "grad_norm": 2.578970432281494, "learning_rate": 4.887052222196782e-06, "loss": 0.1594, "step": 2281 }, { "epoch": 0.3697342838626053, "grad_norm": 0.9519162774085999, "learning_rate": 4.886922226823632e-06, "loss": 0.1369, "step": 2282 }, { "epoch": 0.3698963058976021, "grad_norm": 1.0015640258789062, "learning_rate": 4.886792158416439e-06, "loss": 0.1469, "step": 2283 }, { "epoch": 0.37005832793259885, "grad_norm": 0.9746085405349731, "learning_rate": 4.8866620169791815e-06, "loss": 0.1342, "step": 2284 }, { "epoch": 0.3702203499675956, "grad_norm": 1.0695668458938599, "learning_rate": 4.886531802515842e-06, "loss": 0.1645, "step": 2285 }, { "epoch": 0.37038237200259233, "grad_norm": 1.1742403507232666, "learning_rate": 4.886401515030404e-06, "loss": 0.1668, "step": 2286 }, { "epoch": 0.3705443940375891, "grad_norm": 1.0946766138076782, "learning_rate": 4.886271154526856e-06, "loss": 0.1486, "step": 2287 }, { "epoch": 0.37070641607258586, "grad_norm": 1.0523529052734375, "learning_rate": 4.886140721009184e-06, "loss": 0.1458, "step": 2288 }, { "epoch": 0.37086843810758263, "grad_norm": 1.1310120820999146, "learning_rate": 4.88601021448138e-06, "loss": 0.1568, "step": 2289 }, { "epoch": 0.3710304601425794, "grad_norm": 1.176133155822754, "learning_rate": 4.885879634947439e-06, "loss": 0.1723, "step": 2290 }, { "epoch": 0.37119248217757617, "grad_norm": 1.064237117767334, "learning_rate": 4.8857489824113544e-06, "loss": 0.1507, "step": 2291 }, { "epoch": 0.37135450421257293, "grad_norm": 1.0737059116363525, "learning_rate": 4.885618256877123e-06, "loss": 0.1391, "step": 2292 }, { "epoch": 0.37151652624756965, "grad_norm": 1.0706037282943726, "learning_rate": 4.885487458348748e-06, "loss": 0.1442, "step": 2293 }, { "epoch": 0.3716785482825664, "grad_norm": 1.3316296339035034, "learning_rate": 4.885356586830229e-06, "loss": 0.1965, "step": 2294 }, { "epoch": 0.3718405703175632, "grad_norm": 0.9738705158233643, "learning_rate": 4.8852256423255706e-06, "loss": 0.1412, "step": 2295 }, { "epoch": 0.37200259235255995, "grad_norm": 0.9631845355033875, "learning_rate": 4.8850946248387795e-06, "loss": 0.1446, "step": 2296 }, { "epoch": 0.3721646143875567, "grad_norm": 1.002110481262207, "learning_rate": 4.884963534373864e-06, "loss": 0.1489, "step": 2297 }, { "epoch": 0.3723266364225535, "grad_norm": 0.8810906410217285, "learning_rate": 4.884832370934838e-06, "loss": 0.1249, "step": 2298 }, { "epoch": 0.37248865845755025, "grad_norm": 1.2521817684173584, "learning_rate": 4.88470113452571e-06, "loss": 0.155, "step": 2299 }, { "epoch": 0.37265068049254696, "grad_norm": 1.0234705209732056, "learning_rate": 4.8845698251505e-06, "loss": 0.1481, "step": 2300 }, { "epoch": 0.3728127025275437, "grad_norm": 1.0289057493209839, "learning_rate": 4.884438442813223e-06, "loss": 0.1226, "step": 2301 }, { "epoch": 0.3729747245625405, "grad_norm": 1.0109435319900513, "learning_rate": 4.8843069875179005e-06, "loss": 0.1527, "step": 2302 }, { "epoch": 0.37313674659753726, "grad_norm": 1.0381628274917603, "learning_rate": 4.884175459268554e-06, "loss": 0.1689, "step": 2303 }, { "epoch": 0.37329876863253403, "grad_norm": 1.0785562992095947, "learning_rate": 4.884043858069208e-06, "loss": 0.1507, "step": 2304 }, { "epoch": 0.3734607906675308, "grad_norm": 1.0197018384933472, "learning_rate": 4.883912183923889e-06, "loss": 0.1564, "step": 2305 }, { "epoch": 0.37362281270252756, "grad_norm": 1.1440730094909668, "learning_rate": 4.883780436836627e-06, "loss": 0.1594, "step": 2306 }, { "epoch": 0.37378483473752433, "grad_norm": 1.0093541145324707, "learning_rate": 4.883648616811451e-06, "loss": 0.1565, "step": 2307 }, { "epoch": 0.37394685677252104, "grad_norm": 1.0944989919662476, "learning_rate": 4.883516723852396e-06, "loss": 0.1549, "step": 2308 }, { "epoch": 0.3741088788075178, "grad_norm": 1.0716201066970825, "learning_rate": 4.883384757963498e-06, "loss": 0.1666, "step": 2309 }, { "epoch": 0.3742709008425146, "grad_norm": 0.946195125579834, "learning_rate": 4.883252719148794e-06, "loss": 0.1422, "step": 2310 }, { "epoch": 0.37443292287751134, "grad_norm": 0.9485779404640198, "learning_rate": 4.883120607412323e-06, "loss": 0.1536, "step": 2311 }, { "epoch": 0.3745949449125081, "grad_norm": 1.0378730297088623, "learning_rate": 4.8829884227581294e-06, "loss": 0.1448, "step": 2312 }, { "epoch": 0.3747569669475049, "grad_norm": 1.0789496898651123, "learning_rate": 4.882856165190256e-06, "loss": 0.1549, "step": 2313 }, { "epoch": 0.37491898898250164, "grad_norm": 1.0196281671524048, "learning_rate": 4.88272383471275e-06, "loss": 0.1511, "step": 2314 }, { "epoch": 0.37508101101749836, "grad_norm": 0.9408444762229919, "learning_rate": 4.882591431329662e-06, "loss": 0.1301, "step": 2315 }, { "epoch": 0.3752430330524951, "grad_norm": 1.0712037086486816, "learning_rate": 4.8824589550450415e-06, "loss": 0.1601, "step": 2316 }, { "epoch": 0.3754050550874919, "grad_norm": 0.8943222165107727, "learning_rate": 4.8823264058629426e-06, "loss": 0.1318, "step": 2317 }, { "epoch": 0.37556707712248866, "grad_norm": 0.9707005620002747, "learning_rate": 4.882193783787421e-06, "loss": 0.1476, "step": 2318 }, { "epoch": 0.3757290991574854, "grad_norm": 1.0680534839630127, "learning_rate": 4.882061088822534e-06, "loss": 0.1753, "step": 2319 }, { "epoch": 0.3758911211924822, "grad_norm": 1.0867925882339478, "learning_rate": 4.881928320972342e-06, "loss": 0.1571, "step": 2320 }, { "epoch": 0.37605314322747896, "grad_norm": 1.1130952835083008, "learning_rate": 4.881795480240908e-06, "loss": 0.169, "step": 2321 }, { "epoch": 0.37621516526247567, "grad_norm": 1.034469485282898, "learning_rate": 4.881662566632296e-06, "loss": 0.1709, "step": 2322 }, { "epoch": 0.37637718729747244, "grad_norm": 1.1244481801986694, "learning_rate": 4.881529580150573e-06, "loss": 0.1594, "step": 2323 }, { "epoch": 0.3765392093324692, "grad_norm": 1.132101058959961, "learning_rate": 4.881396520799808e-06, "loss": 0.1757, "step": 2324 }, { "epoch": 0.37670123136746597, "grad_norm": 1.098602294921875, "learning_rate": 4.881263388584072e-06, "loss": 0.154, "step": 2325 }, { "epoch": 0.37686325340246274, "grad_norm": 0.9612120389938354, "learning_rate": 4.8811301835074384e-06, "loss": 0.1427, "step": 2326 }, { "epoch": 0.3770252754374595, "grad_norm": 1.0000402927398682, "learning_rate": 4.880996905573985e-06, "loss": 0.1531, "step": 2327 }, { "epoch": 0.3771872974724563, "grad_norm": 1.076193928718567, "learning_rate": 4.880863554787787e-06, "loss": 0.1791, "step": 2328 }, { "epoch": 0.37734931950745304, "grad_norm": 0.9946293234825134, "learning_rate": 4.8807301311529266e-06, "loss": 0.1507, "step": 2329 }, { "epoch": 0.37751134154244975, "grad_norm": 0.9795472025871277, "learning_rate": 4.880596634673484e-06, "loss": 0.1498, "step": 2330 }, { "epoch": 0.3776733635774465, "grad_norm": 1.1108335256576538, "learning_rate": 4.880463065353547e-06, "loss": 0.1735, "step": 2331 }, { "epoch": 0.3778353856124433, "grad_norm": 1.0525665283203125, "learning_rate": 4.8803294231972e-06, "loss": 0.1655, "step": 2332 }, { "epoch": 0.37799740764744005, "grad_norm": 1.1461604833602905, "learning_rate": 4.880195708208533e-06, "loss": 0.1622, "step": 2333 }, { "epoch": 0.3781594296824368, "grad_norm": 1.1616642475128174, "learning_rate": 4.8800619203916376e-06, "loss": 0.1538, "step": 2334 }, { "epoch": 0.3783214517174336, "grad_norm": 1.3006972074508667, "learning_rate": 4.879928059750606e-06, "loss": 0.1981, "step": 2335 }, { "epoch": 0.37848347375243035, "grad_norm": 1.2075080871582031, "learning_rate": 4.8797941262895365e-06, "loss": 0.1655, "step": 2336 }, { "epoch": 0.37864549578742707, "grad_norm": 1.0310864448547363, "learning_rate": 4.8796601200125245e-06, "loss": 0.1668, "step": 2337 }, { "epoch": 0.37880751782242383, "grad_norm": 1.1267403364181519, "learning_rate": 4.8795260409236725e-06, "loss": 0.1574, "step": 2338 }, { "epoch": 0.3789695398574206, "grad_norm": 1.0787062644958496, "learning_rate": 4.879391889027081e-06, "loss": 0.1417, "step": 2339 }, { "epoch": 0.37913156189241737, "grad_norm": 0.9812328219413757, "learning_rate": 4.879257664326856e-06, "loss": 0.1436, "step": 2340 }, { "epoch": 0.37929358392741414, "grad_norm": 1.0274299383163452, "learning_rate": 4.8791233668271055e-06, "loss": 0.1532, "step": 2341 }, { "epoch": 0.3794556059624109, "grad_norm": 0.916479766368866, "learning_rate": 4.8789889965319355e-06, "loss": 0.1369, "step": 2342 }, { "epoch": 0.37961762799740767, "grad_norm": 1.0698994398117065, "learning_rate": 4.87885455344546e-06, "loss": 0.1632, "step": 2343 }, { "epoch": 0.3797796500324044, "grad_norm": 1.1658228635787964, "learning_rate": 4.878720037571792e-06, "loss": 0.1735, "step": 2344 }, { "epoch": 0.37994167206740115, "grad_norm": 1.0319538116455078, "learning_rate": 4.878585448915047e-06, "loss": 0.1601, "step": 2345 }, { "epoch": 0.3801036941023979, "grad_norm": 1.0444625616073608, "learning_rate": 4.878450787479344e-06, "loss": 0.1496, "step": 2346 }, { "epoch": 0.3802657161373947, "grad_norm": 0.9932622909545898, "learning_rate": 4.8783160532688026e-06, "loss": 0.1479, "step": 2347 }, { "epoch": 0.38042773817239145, "grad_norm": 1.0771230459213257, "learning_rate": 4.878181246287544e-06, "loss": 0.172, "step": 2348 }, { "epoch": 0.3805897602073882, "grad_norm": 0.9821139574050903, "learning_rate": 4.878046366539696e-06, "loss": 0.1355, "step": 2349 }, { "epoch": 0.380751782242385, "grad_norm": 1.1744256019592285, "learning_rate": 4.877911414029382e-06, "loss": 0.1728, "step": 2350 }, { "epoch": 0.38091380427738175, "grad_norm": 1.0486180782318115, "learning_rate": 4.877776388760735e-06, "loss": 0.151, "step": 2351 }, { "epoch": 0.38107582631237846, "grad_norm": 0.974438488483429, "learning_rate": 4.8776412907378845e-06, "loss": 0.1425, "step": 2352 }, { "epoch": 0.38123784834737523, "grad_norm": 0.9871600270271301, "learning_rate": 4.877506119964964e-06, "loss": 0.1509, "step": 2353 }, { "epoch": 0.381399870382372, "grad_norm": 1.0788719654083252, "learning_rate": 4.877370876446109e-06, "loss": 0.1502, "step": 2354 }, { "epoch": 0.38156189241736876, "grad_norm": 0.8554217219352722, "learning_rate": 4.877235560185459e-06, "loss": 0.1317, "step": 2355 }, { "epoch": 0.38172391445236553, "grad_norm": 1.0223811864852905, "learning_rate": 4.877100171187154e-06, "loss": 0.1618, "step": 2356 }, { "epoch": 0.3818859364873623, "grad_norm": 1.042266607284546, "learning_rate": 4.876964709455335e-06, "loss": 0.1565, "step": 2357 }, { "epoch": 0.38204795852235907, "grad_norm": 1.0862993001937866, "learning_rate": 4.876829174994149e-06, "loss": 0.1689, "step": 2358 }, { "epoch": 0.3822099805573558, "grad_norm": 1.0016382932662964, "learning_rate": 4.876693567807741e-06, "loss": 0.1428, "step": 2359 }, { "epoch": 0.38237200259235254, "grad_norm": 1.009976863861084, "learning_rate": 4.8765578879002625e-06, "loss": 0.1525, "step": 2360 }, { "epoch": 0.3825340246273493, "grad_norm": 0.9850345253944397, "learning_rate": 4.876422135275863e-06, "loss": 0.1373, "step": 2361 }, { "epoch": 0.3826960466623461, "grad_norm": 1.0292307138442993, "learning_rate": 4.8762863099386984e-06, "loss": 0.1444, "step": 2362 }, { "epoch": 0.38285806869734285, "grad_norm": 0.9896308779716492, "learning_rate": 4.876150411892922e-06, "loss": 0.1527, "step": 2363 }, { "epoch": 0.3830200907323396, "grad_norm": 1.091084599494934, "learning_rate": 4.876014441142693e-06, "loss": 0.1541, "step": 2364 }, { "epoch": 0.3831821127673364, "grad_norm": 1.06158447265625, "learning_rate": 4.875878397692172e-06, "loss": 0.1555, "step": 2365 }, { "epoch": 0.3833441348023331, "grad_norm": 1.0953679084777832, "learning_rate": 4.8757422815455215e-06, "loss": 0.1643, "step": 2366 }, { "epoch": 0.38350615683732986, "grad_norm": 1.0464844703674316, "learning_rate": 4.8756060927069075e-06, "loss": 0.1512, "step": 2367 }, { "epoch": 0.3836681788723266, "grad_norm": 1.0464715957641602, "learning_rate": 4.875469831180495e-06, "loss": 0.1573, "step": 2368 }, { "epoch": 0.3838302009073234, "grad_norm": 0.9663591384887695, "learning_rate": 4.875333496970454e-06, "loss": 0.1446, "step": 2369 }, { "epoch": 0.38399222294232016, "grad_norm": 1.192948341369629, "learning_rate": 4.875197090080957e-06, "loss": 0.1528, "step": 2370 }, { "epoch": 0.3841542449773169, "grad_norm": 1.0319154262542725, "learning_rate": 4.875060610516176e-06, "loss": 0.1489, "step": 2371 }, { "epoch": 0.3843162670123137, "grad_norm": 1.1265735626220703, "learning_rate": 4.874924058280288e-06, "loss": 0.1558, "step": 2372 }, { "epoch": 0.38447828904731046, "grad_norm": 1.1732511520385742, "learning_rate": 4.874787433377472e-06, "loss": 0.1631, "step": 2373 }, { "epoch": 0.3846403110823072, "grad_norm": 1.2255806922912598, "learning_rate": 4.874650735811906e-06, "loss": 0.1717, "step": 2374 }, { "epoch": 0.38480233311730394, "grad_norm": 1.0163358449935913, "learning_rate": 4.874513965587775e-06, "loss": 0.1531, "step": 2375 }, { "epoch": 0.3849643551523007, "grad_norm": 1.047625184059143, "learning_rate": 4.874377122709263e-06, "loss": 0.1702, "step": 2376 }, { "epoch": 0.3851263771872975, "grad_norm": 1.0815023183822632, "learning_rate": 4.874240207180556e-06, "loss": 0.1649, "step": 2377 }, { "epoch": 0.38528839922229424, "grad_norm": 0.9695912003517151, "learning_rate": 4.874103219005845e-06, "loss": 0.1392, "step": 2378 }, { "epoch": 0.385450421257291, "grad_norm": 1.0727660655975342, "learning_rate": 4.873966158189321e-06, "loss": 0.1428, "step": 2379 }, { "epoch": 0.3856124432922878, "grad_norm": 1.184160590171814, "learning_rate": 4.873829024735176e-06, "loss": 0.1532, "step": 2380 }, { "epoch": 0.3857744653272845, "grad_norm": 0.9503794312477112, "learning_rate": 4.873691818647609e-06, "loss": 0.1313, "step": 2381 }, { "epoch": 0.38593648736228126, "grad_norm": 1.0387951135635376, "learning_rate": 4.873554539930815e-06, "loss": 0.1699, "step": 2382 }, { "epoch": 0.386098509397278, "grad_norm": 1.1206920146942139, "learning_rate": 4.873417188588997e-06, "loss": 0.1645, "step": 2383 }, { "epoch": 0.3862605314322748, "grad_norm": 1.0586580038070679, "learning_rate": 4.873279764626357e-06, "loss": 0.1505, "step": 2384 }, { "epoch": 0.38642255346727156, "grad_norm": 1.080195665359497, "learning_rate": 4.873142268047099e-06, "loss": 0.1518, "step": 2385 }, { "epoch": 0.3865845755022683, "grad_norm": 1.0671923160552979, "learning_rate": 4.87300469885543e-06, "loss": 0.17, "step": 2386 }, { "epoch": 0.3867465975372651, "grad_norm": 0.9395395517349243, "learning_rate": 4.87286705705556e-06, "loss": 0.1497, "step": 2387 }, { "epoch": 0.3869086195722618, "grad_norm": 1.0905933380126953, "learning_rate": 4.872729342651701e-06, "loss": 0.1711, "step": 2388 }, { "epoch": 0.38707064160725857, "grad_norm": 1.1106778383255005, "learning_rate": 4.8725915556480655e-06, "loss": 0.1736, "step": 2389 }, { "epoch": 0.38723266364225534, "grad_norm": 1.1426775455474854, "learning_rate": 4.87245369604887e-06, "loss": 0.178, "step": 2390 }, { "epoch": 0.3873946856772521, "grad_norm": 1.0927367210388184, "learning_rate": 4.872315763858332e-06, "loss": 0.1624, "step": 2391 }, { "epoch": 0.38755670771224887, "grad_norm": 1.1219305992126465, "learning_rate": 4.872177759080673e-06, "loss": 0.1561, "step": 2392 }, { "epoch": 0.38771872974724564, "grad_norm": 1.0665841102600098, "learning_rate": 4.872039681720116e-06, "loss": 0.1651, "step": 2393 }, { "epoch": 0.3878807517822424, "grad_norm": 1.0562465190887451, "learning_rate": 4.8719015317808835e-06, "loss": 0.1674, "step": 2394 }, { "epoch": 0.3880427738172391, "grad_norm": 1.0711188316345215, "learning_rate": 4.8717633092672045e-06, "loss": 0.1534, "step": 2395 }, { "epoch": 0.3882047958522359, "grad_norm": 1.0176469087600708, "learning_rate": 4.871625014183308e-06, "loss": 0.1558, "step": 2396 }, { "epoch": 0.38836681788723265, "grad_norm": 1.0027252435684204, "learning_rate": 4.871486646533425e-06, "loss": 0.1381, "step": 2397 }, { "epoch": 0.3885288399222294, "grad_norm": 1.0068401098251343, "learning_rate": 4.8713482063217895e-06, "loss": 0.1579, "step": 2398 }, { "epoch": 0.3886908619572262, "grad_norm": 0.9579773545265198, "learning_rate": 4.871209693552638e-06, "loss": 0.1335, "step": 2399 }, { "epoch": 0.38885288399222295, "grad_norm": 1.1602519750595093, "learning_rate": 4.871071108230208e-06, "loss": 0.1684, "step": 2400 }, { "epoch": 0.3890149060272197, "grad_norm": 1.4036099910736084, "learning_rate": 4.87093245035874e-06, "loss": 0.1646, "step": 2401 }, { "epoch": 0.3891769280622165, "grad_norm": 1.1225903034210205, "learning_rate": 4.8707937199424756e-06, "loss": 0.1643, "step": 2402 }, { "epoch": 0.3893389500972132, "grad_norm": 1.1143039464950562, "learning_rate": 4.870654916985661e-06, "loss": 0.1577, "step": 2403 }, { "epoch": 0.38950097213220997, "grad_norm": 1.080872893333435, "learning_rate": 4.870516041492543e-06, "loss": 0.1504, "step": 2404 }, { "epoch": 0.38966299416720673, "grad_norm": 1.2829567193984985, "learning_rate": 4.870377093467371e-06, "loss": 0.1817, "step": 2405 }, { "epoch": 0.3898250162022035, "grad_norm": 1.2042837142944336, "learning_rate": 4.870238072914396e-06, "loss": 0.1548, "step": 2406 }, { "epoch": 0.38998703823720027, "grad_norm": 0.9398330450057983, "learning_rate": 4.870098979837871e-06, "loss": 0.1389, "step": 2407 }, { "epoch": 0.39014906027219703, "grad_norm": 1.0796722173690796, "learning_rate": 4.869959814242054e-06, "loss": 0.157, "step": 2408 }, { "epoch": 0.3903110823071938, "grad_norm": 1.1259878873825073, "learning_rate": 4.869820576131202e-06, "loss": 0.153, "step": 2409 }, { "epoch": 0.3904731043421905, "grad_norm": 1.019061803817749, "learning_rate": 4.8696812655095744e-06, "loss": 0.1538, "step": 2410 }, { "epoch": 0.3906351263771873, "grad_norm": 0.9436379075050354, "learning_rate": 4.869541882381435e-06, "loss": 0.1485, "step": 2411 }, { "epoch": 0.39079714841218405, "grad_norm": 1.071523904800415, "learning_rate": 4.869402426751048e-06, "loss": 0.1594, "step": 2412 }, { "epoch": 0.3909591704471808, "grad_norm": 1.1812318563461304, "learning_rate": 4.86926289862268e-06, "loss": 0.1701, "step": 2413 }, { "epoch": 0.3911211924821776, "grad_norm": 1.1143845319747925, "learning_rate": 4.8691232980006015e-06, "loss": 0.1499, "step": 2414 }, { "epoch": 0.39128321451717435, "grad_norm": 1.0867897272109985, "learning_rate": 4.868983624889083e-06, "loss": 0.1617, "step": 2415 }, { "epoch": 0.3914452365521711, "grad_norm": 1.0865579843521118, "learning_rate": 4.868843879292399e-06, "loss": 0.1634, "step": 2416 }, { "epoch": 0.39160725858716783, "grad_norm": 0.9552433490753174, "learning_rate": 4.868704061214824e-06, "loss": 0.1485, "step": 2417 }, { "epoch": 0.3917692806221646, "grad_norm": 1.0676617622375488, "learning_rate": 4.868564170660637e-06, "loss": 0.1744, "step": 2418 }, { "epoch": 0.39193130265716136, "grad_norm": 0.9854423403739929, "learning_rate": 4.868424207634118e-06, "loss": 0.1511, "step": 2419 }, { "epoch": 0.39209332469215813, "grad_norm": 0.8792446851730347, "learning_rate": 4.868284172139551e-06, "loss": 0.1247, "step": 2420 }, { "epoch": 0.3922553467271549, "grad_norm": 1.0896074771881104, "learning_rate": 4.868144064181218e-06, "loss": 0.1551, "step": 2421 }, { "epoch": 0.39241736876215166, "grad_norm": 1.0454200506210327, "learning_rate": 4.868003883763408e-06, "loss": 0.1417, "step": 2422 }, { "epoch": 0.39257939079714843, "grad_norm": 0.9985229969024658, "learning_rate": 4.8678636308904095e-06, "loss": 0.1619, "step": 2423 }, { "epoch": 0.3927414128321452, "grad_norm": 0.9544141888618469, "learning_rate": 4.867723305566514e-06, "loss": 0.1435, "step": 2424 }, { "epoch": 0.3929034348671419, "grad_norm": 1.029191017150879, "learning_rate": 4.867582907796016e-06, "loss": 0.1544, "step": 2425 }, { "epoch": 0.3930654569021387, "grad_norm": 1.372963786125183, "learning_rate": 4.86744243758321e-06, "loss": 0.1815, "step": 2426 }, { "epoch": 0.39322747893713544, "grad_norm": 1.0754982233047485, "learning_rate": 4.867301894932394e-06, "loss": 0.1632, "step": 2427 }, { "epoch": 0.3933895009721322, "grad_norm": 1.3161104917526245, "learning_rate": 4.8671612798478685e-06, "loss": 0.1773, "step": 2428 }, { "epoch": 0.393551523007129, "grad_norm": 1.0129326581954956, "learning_rate": 4.867020592333937e-06, "loss": 0.1519, "step": 2429 }, { "epoch": 0.39371354504212575, "grad_norm": 0.9305555820465088, "learning_rate": 4.866879832394903e-06, "loss": 0.1458, "step": 2430 }, { "epoch": 0.3938755670771225, "grad_norm": 1.048586130142212, "learning_rate": 4.866739000035074e-06, "loss": 0.1473, "step": 2431 }, { "epoch": 0.3940375891121192, "grad_norm": 1.050463318824768, "learning_rate": 4.86659809525876e-06, "loss": 0.1673, "step": 2432 }, { "epoch": 0.394199611147116, "grad_norm": 0.9650267958641052, "learning_rate": 4.86645711807027e-06, "loss": 0.1479, "step": 2433 }, { "epoch": 0.39436163318211276, "grad_norm": 1.1384695768356323, "learning_rate": 4.866316068473919e-06, "loss": 0.1779, "step": 2434 }, { "epoch": 0.3945236552171095, "grad_norm": 1.1191463470458984, "learning_rate": 4.866174946474023e-06, "loss": 0.1442, "step": 2435 }, { "epoch": 0.3946856772521063, "grad_norm": 1.047663688659668, "learning_rate": 4.8660337520749e-06, "loss": 0.1437, "step": 2436 }, { "epoch": 0.39484769928710306, "grad_norm": 1.0196114778518677, "learning_rate": 4.865892485280869e-06, "loss": 0.1477, "step": 2437 }, { "epoch": 0.3950097213220998, "grad_norm": 0.9830042719841003, "learning_rate": 4.865751146096255e-06, "loss": 0.1492, "step": 2438 }, { "epoch": 0.39517174335709654, "grad_norm": 1.0708327293395996, "learning_rate": 4.865609734525379e-06, "loss": 0.1685, "step": 2439 }, { "epoch": 0.3953337653920933, "grad_norm": 1.0699785947799683, "learning_rate": 4.865468250572571e-06, "loss": 0.173, "step": 2440 }, { "epoch": 0.3954957874270901, "grad_norm": 0.9510377049446106, "learning_rate": 4.8653266942421585e-06, "loss": 0.1457, "step": 2441 }, { "epoch": 0.39565780946208684, "grad_norm": 0.93174147605896, "learning_rate": 4.865185065538472e-06, "loss": 0.1385, "step": 2442 }, { "epoch": 0.3958198314970836, "grad_norm": 0.9724693894386292, "learning_rate": 4.865043364465848e-06, "loss": 0.1609, "step": 2443 }, { "epoch": 0.3959818535320804, "grad_norm": 0.9042225480079651, "learning_rate": 4.86490159102862e-06, "loss": 0.1339, "step": 2444 }, { "epoch": 0.39614387556707714, "grad_norm": 1.1967670917510986, "learning_rate": 4.864759745231126e-06, "loss": 0.1431, "step": 2445 }, { "epoch": 0.3963058976020739, "grad_norm": 1.0818557739257812, "learning_rate": 4.8646178270777055e-06, "loss": 0.1786, "step": 2446 }, { "epoch": 0.3964679196370706, "grad_norm": 1.0691944360733032, "learning_rate": 4.864475836572703e-06, "loss": 0.1758, "step": 2447 }, { "epoch": 0.3966299416720674, "grad_norm": 0.9906453490257263, "learning_rate": 4.864333773720461e-06, "loss": 0.1508, "step": 2448 }, { "epoch": 0.39679196370706415, "grad_norm": 0.993923008441925, "learning_rate": 4.864191638525328e-06, "loss": 0.1369, "step": 2449 }, { "epoch": 0.3969539857420609, "grad_norm": 1.0036680698394775, "learning_rate": 4.8640494309916506e-06, "loss": 0.1497, "step": 2450 }, { "epoch": 0.3971160077770577, "grad_norm": 0.9663056135177612, "learning_rate": 4.863907151123782e-06, "loss": 0.1443, "step": 2451 }, { "epoch": 0.39727802981205446, "grad_norm": 1.1457011699676514, "learning_rate": 4.863764798926076e-06, "loss": 0.1601, "step": 2452 }, { "epoch": 0.3974400518470512, "grad_norm": 1.0735100507736206, "learning_rate": 4.863622374402887e-06, "loss": 0.1587, "step": 2453 }, { "epoch": 0.39760207388204793, "grad_norm": 1.3038159608840942, "learning_rate": 4.863479877558573e-06, "loss": 0.2004, "step": 2454 }, { "epoch": 0.3977640959170447, "grad_norm": 1.0412776470184326, "learning_rate": 4.863337308397495e-06, "loss": 0.1383, "step": 2455 }, { "epoch": 0.39792611795204147, "grad_norm": 1.0914403200149536, "learning_rate": 4.863194666924013e-06, "loss": 0.1643, "step": 2456 }, { "epoch": 0.39808813998703824, "grad_norm": 0.943895697593689, "learning_rate": 4.863051953142494e-06, "loss": 0.1351, "step": 2457 }, { "epoch": 0.398250162022035, "grad_norm": 1.0537207126617432, "learning_rate": 4.862909167057304e-06, "loss": 0.1631, "step": 2458 }, { "epoch": 0.39841218405703177, "grad_norm": 1.0041303634643555, "learning_rate": 4.862766308672811e-06, "loss": 0.1668, "step": 2459 }, { "epoch": 0.39857420609202854, "grad_norm": 0.9804761409759521, "learning_rate": 4.862623377993387e-06, "loss": 0.1657, "step": 2460 }, { "epoch": 0.39873622812702525, "grad_norm": 0.928268551826477, "learning_rate": 4.862480375023405e-06, "loss": 0.1355, "step": 2461 }, { "epoch": 0.398898250162022, "grad_norm": 1.046536922454834, "learning_rate": 4.862337299767241e-06, "loss": 0.1444, "step": 2462 }, { "epoch": 0.3990602721970188, "grad_norm": 1.052215814590454, "learning_rate": 4.862194152229271e-06, "loss": 0.1699, "step": 2463 }, { "epoch": 0.39922229423201555, "grad_norm": 0.983517050743103, "learning_rate": 4.862050932413878e-06, "loss": 0.1326, "step": 2464 }, { "epoch": 0.3993843162670123, "grad_norm": 1.0024183988571167, "learning_rate": 4.861907640325442e-06, "loss": 0.1512, "step": 2465 }, { "epoch": 0.3995463383020091, "grad_norm": 1.0899640321731567, "learning_rate": 4.8617642759683474e-06, "loss": 0.1622, "step": 2466 }, { "epoch": 0.39970836033700585, "grad_norm": 1.0080574750900269, "learning_rate": 4.861620839346982e-06, "loss": 0.147, "step": 2467 }, { "epoch": 0.3998703823720026, "grad_norm": 1.098242163658142, "learning_rate": 4.861477330465734e-06, "loss": 0.1628, "step": 2468 }, { "epoch": 0.40003240440699933, "grad_norm": 1.0178964138031006, "learning_rate": 4.861333749328993e-06, "loss": 0.1432, "step": 2469 }, { "epoch": 0.4001944264419961, "grad_norm": 1.0105563402175903, "learning_rate": 4.861190095941155e-06, "loss": 0.148, "step": 2470 }, { "epoch": 0.40035644847699287, "grad_norm": 1.041428565979004, "learning_rate": 4.861046370306613e-06, "loss": 0.15, "step": 2471 }, { "epoch": 0.40051847051198963, "grad_norm": 1.3379833698272705, "learning_rate": 4.860902572429767e-06, "loss": 0.17, "step": 2472 }, { "epoch": 0.4006804925469864, "grad_norm": 1.0125343799591064, "learning_rate": 4.8607587023150145e-06, "loss": 0.1607, "step": 2473 }, { "epoch": 0.40084251458198317, "grad_norm": 0.9886438846588135, "learning_rate": 4.86061475996676e-06, "loss": 0.1524, "step": 2474 }, { "epoch": 0.40100453661697993, "grad_norm": 1.094799280166626, "learning_rate": 4.860470745389405e-06, "loss": 0.1548, "step": 2475 }, { "epoch": 0.40116655865197665, "grad_norm": 0.9762154221534729, "learning_rate": 4.860326658587358e-06, "loss": 0.1545, "step": 2476 }, { "epoch": 0.4013285806869734, "grad_norm": 1.0075474977493286, "learning_rate": 4.860182499565027e-06, "loss": 0.1675, "step": 2477 }, { "epoch": 0.4014906027219702, "grad_norm": 0.9806322455406189, "learning_rate": 4.860038268326823e-06, "loss": 0.1658, "step": 2478 }, { "epoch": 0.40165262475696695, "grad_norm": 0.9867870211601257, "learning_rate": 4.859893964877159e-06, "loss": 0.1424, "step": 2479 }, { "epoch": 0.4018146467919637, "grad_norm": 1.0405807495117188, "learning_rate": 4.85974958922045e-06, "loss": 0.1644, "step": 2480 }, { "epoch": 0.4019766688269605, "grad_norm": 0.9794811606407166, "learning_rate": 4.8596051413611155e-06, "loss": 0.1547, "step": 2481 }, { "epoch": 0.40213869086195725, "grad_norm": 1.097029209136963, "learning_rate": 4.859460621303572e-06, "loss": 0.1774, "step": 2482 }, { "epoch": 0.40230071289695396, "grad_norm": 0.9748839139938354, "learning_rate": 4.859316029052245e-06, "loss": 0.1384, "step": 2483 }, { "epoch": 0.4024627349319507, "grad_norm": 1.0535310506820679, "learning_rate": 4.859171364611556e-06, "loss": 0.1546, "step": 2484 }, { "epoch": 0.4026247569669475, "grad_norm": 0.8713586330413818, "learning_rate": 4.859026627985933e-06, "loss": 0.1416, "step": 2485 }, { "epoch": 0.40278677900194426, "grad_norm": 1.1586978435516357, "learning_rate": 4.8588818191798035e-06, "loss": 0.1747, "step": 2486 }, { "epoch": 0.40294880103694103, "grad_norm": 1.052422285079956, "learning_rate": 4.858736938197599e-06, "loss": 0.1516, "step": 2487 }, { "epoch": 0.4031108230719378, "grad_norm": 0.9205193519592285, "learning_rate": 4.858591985043751e-06, "loss": 0.1432, "step": 2488 }, { "epoch": 0.40327284510693456, "grad_norm": 0.9910753965377808, "learning_rate": 4.858446959722698e-06, "loss": 0.1499, "step": 2489 }, { "epoch": 0.40343486714193133, "grad_norm": 1.0436080694198608, "learning_rate": 4.858301862238874e-06, "loss": 0.1561, "step": 2490 }, { "epoch": 0.40359688917692804, "grad_norm": 1.0225828886032104, "learning_rate": 4.858156692596721e-06, "loss": 0.151, "step": 2491 }, { "epoch": 0.4037589112119248, "grad_norm": 1.093070387840271, "learning_rate": 4.858011450800678e-06, "loss": 0.1608, "step": 2492 }, { "epoch": 0.4039209332469216, "grad_norm": 1.1206576824188232, "learning_rate": 4.857866136855192e-06, "loss": 0.1707, "step": 2493 }, { "epoch": 0.40408295528191834, "grad_norm": 0.9448593854904175, "learning_rate": 4.857720750764708e-06, "loss": 0.1395, "step": 2494 }, { "epoch": 0.4042449773169151, "grad_norm": 0.9939359426498413, "learning_rate": 4.857575292533675e-06, "loss": 0.1436, "step": 2495 }, { "epoch": 0.4044069993519119, "grad_norm": 1.074859857559204, "learning_rate": 4.857429762166543e-06, "loss": 0.1645, "step": 2496 }, { "epoch": 0.40456902138690864, "grad_norm": 0.9814433455467224, "learning_rate": 4.857284159667766e-06, "loss": 0.1486, "step": 2497 }, { "epoch": 0.40473104342190536, "grad_norm": 1.1380925178527832, "learning_rate": 4.857138485041797e-06, "loss": 0.1672, "step": 2498 }, { "epoch": 0.4048930654569021, "grad_norm": 1.0334879159927368, "learning_rate": 4.8569927382930945e-06, "loss": 0.147, "step": 2499 }, { "epoch": 0.4050550874918989, "grad_norm": 1.037170171737671, "learning_rate": 4.856846919426118e-06, "loss": 0.1521, "step": 2500 }, { "epoch": 0.40521710952689566, "grad_norm": 0.9876829385757446, "learning_rate": 4.85670102844533e-06, "loss": 0.1543, "step": 2501 }, { "epoch": 0.4053791315618924, "grad_norm": 1.055932879447937, "learning_rate": 4.856555065355193e-06, "loss": 0.1641, "step": 2502 }, { "epoch": 0.4055411535968892, "grad_norm": 0.9136426448822021, "learning_rate": 4.856409030160174e-06, "loss": 0.1424, "step": 2503 }, { "epoch": 0.40570317563188596, "grad_norm": 1.0281522274017334, "learning_rate": 4.856262922864741e-06, "loss": 0.1646, "step": 2504 }, { "epoch": 0.40586519766688267, "grad_norm": 0.9944785237312317, "learning_rate": 4.8561167434733655e-06, "loss": 0.144, "step": 2505 }, { "epoch": 0.40602721970187944, "grad_norm": 1.1220792531967163, "learning_rate": 4.855970491990518e-06, "loss": 0.1582, "step": 2506 }, { "epoch": 0.4061892417368762, "grad_norm": 0.8896439671516418, "learning_rate": 4.855824168420675e-06, "loss": 0.1386, "step": 2507 }, { "epoch": 0.406351263771873, "grad_norm": 1.0499117374420166, "learning_rate": 4.855677772768315e-06, "loss": 0.1566, "step": 2508 }, { "epoch": 0.40651328580686974, "grad_norm": 0.9450101256370544, "learning_rate": 4.855531305037914e-06, "loss": 0.1459, "step": 2509 }, { "epoch": 0.4066753078418665, "grad_norm": 1.0647412538528442, "learning_rate": 4.855384765233956e-06, "loss": 0.1569, "step": 2510 }, { "epoch": 0.4068373298768633, "grad_norm": 0.9091509580612183, "learning_rate": 4.855238153360924e-06, "loss": 0.1313, "step": 2511 }, { "epoch": 0.40699935191186, "grad_norm": 1.0979092121124268, "learning_rate": 4.8550914694233045e-06, "loss": 0.1662, "step": 2512 }, { "epoch": 0.40716137394685675, "grad_norm": 1.062098503112793, "learning_rate": 4.854944713425585e-06, "loss": 0.1572, "step": 2513 }, { "epoch": 0.4073233959818535, "grad_norm": 0.9755269289016724, "learning_rate": 4.854797885372255e-06, "loss": 0.146, "step": 2514 }, { "epoch": 0.4074854180168503, "grad_norm": 1.0798227787017822, "learning_rate": 4.854650985267809e-06, "loss": 0.1669, "step": 2515 }, { "epoch": 0.40764744005184705, "grad_norm": 1.0882648229599, "learning_rate": 4.854504013116741e-06, "loss": 0.163, "step": 2516 }, { "epoch": 0.4078094620868438, "grad_norm": 0.9996983408927917, "learning_rate": 4.854356968923549e-06, "loss": 0.1403, "step": 2517 }, { "epoch": 0.4079714841218406, "grad_norm": 1.076771855354309, "learning_rate": 4.8542098526927304e-06, "loss": 0.1401, "step": 2518 }, { "epoch": 0.40813350615683736, "grad_norm": 1.0781205892562866, "learning_rate": 4.854062664428787e-06, "loss": 0.1554, "step": 2519 }, { "epoch": 0.40829552819183407, "grad_norm": 0.9651884436607361, "learning_rate": 4.853915404136223e-06, "loss": 0.1401, "step": 2520 }, { "epoch": 0.40845755022683083, "grad_norm": 1.0154225826263428, "learning_rate": 4.853768071819544e-06, "loss": 0.1669, "step": 2521 }, { "epoch": 0.4086195722618276, "grad_norm": 0.9365798830986023, "learning_rate": 4.853620667483259e-06, "loss": 0.1321, "step": 2522 }, { "epoch": 0.40878159429682437, "grad_norm": 0.9837552309036255, "learning_rate": 4.8534731911318755e-06, "loss": 0.1488, "step": 2523 }, { "epoch": 0.40894361633182114, "grad_norm": 1.0226497650146484, "learning_rate": 4.853325642769908e-06, "loss": 0.1504, "step": 2524 }, { "epoch": 0.4091056383668179, "grad_norm": 1.206057071685791, "learning_rate": 4.853178022401872e-06, "loss": 0.1996, "step": 2525 }, { "epoch": 0.40926766040181467, "grad_norm": 1.0094561576843262, "learning_rate": 4.853030330032283e-06, "loss": 0.1633, "step": 2526 }, { "epoch": 0.4094296824368114, "grad_norm": 1.0124329328536987, "learning_rate": 4.8528825656656585e-06, "loss": 0.158, "step": 2527 }, { "epoch": 0.40959170447180815, "grad_norm": 0.9299939274787903, "learning_rate": 4.852734729306523e-06, "loss": 0.1398, "step": 2528 }, { "epoch": 0.4097537265068049, "grad_norm": 1.040216088294983, "learning_rate": 4.852586820959398e-06, "loss": 0.1592, "step": 2529 }, { "epoch": 0.4099157485418017, "grad_norm": 1.0537763833999634, "learning_rate": 4.852438840628808e-06, "loss": 0.1607, "step": 2530 }, { "epoch": 0.41007777057679845, "grad_norm": 0.9010412693023682, "learning_rate": 4.852290788319284e-06, "loss": 0.1365, "step": 2531 }, { "epoch": 0.4102397926117952, "grad_norm": 0.9371026754379272, "learning_rate": 4.852142664035353e-06, "loss": 0.1374, "step": 2532 }, { "epoch": 0.410401814646792, "grad_norm": 0.8150733113288879, "learning_rate": 4.8519944677815495e-06, "loss": 0.1214, "step": 2533 }, { "epoch": 0.4105638366817887, "grad_norm": 1.083972454071045, "learning_rate": 4.8518461995624064e-06, "loss": 0.1656, "step": 2534 }, { "epoch": 0.41072585871678546, "grad_norm": 0.9189485907554626, "learning_rate": 4.851697859382461e-06, "loss": 0.1295, "step": 2535 }, { "epoch": 0.41088788075178223, "grad_norm": 1.0893805027008057, "learning_rate": 4.851549447246253e-06, "loss": 0.1678, "step": 2536 }, { "epoch": 0.411049902786779, "grad_norm": 1.0611486434936523, "learning_rate": 4.8514009631583215e-06, "loss": 0.1484, "step": 2537 }, { "epoch": 0.41121192482177576, "grad_norm": 1.0211429595947266, "learning_rate": 4.851252407123211e-06, "loss": 0.1585, "step": 2538 }, { "epoch": 0.41137394685677253, "grad_norm": 1.0858979225158691, "learning_rate": 4.851103779145467e-06, "loss": 0.1493, "step": 2539 }, { "epoch": 0.4115359688917693, "grad_norm": 1.152523398399353, "learning_rate": 4.850955079229637e-06, "loss": 0.1583, "step": 2540 }, { "epoch": 0.41169799092676607, "grad_norm": 1.043539047241211, "learning_rate": 4.8508063073802715e-06, "loss": 0.1549, "step": 2541 }, { "epoch": 0.4118600129617628, "grad_norm": 0.9617857933044434, "learning_rate": 4.850657463601921e-06, "loss": 0.1388, "step": 2542 }, { "epoch": 0.41202203499675955, "grad_norm": 0.9477269649505615, "learning_rate": 4.85050854789914e-06, "loss": 0.1371, "step": 2543 }, { "epoch": 0.4121840570317563, "grad_norm": 1.0493178367614746, "learning_rate": 4.850359560276486e-06, "loss": 0.1549, "step": 2544 }, { "epoch": 0.4123460790667531, "grad_norm": 0.9363582134246826, "learning_rate": 4.850210500738518e-06, "loss": 0.1428, "step": 2545 }, { "epoch": 0.41250810110174985, "grad_norm": 0.9234378337860107, "learning_rate": 4.850061369289795e-06, "loss": 0.1389, "step": 2546 }, { "epoch": 0.4126701231367466, "grad_norm": 1.1004607677459717, "learning_rate": 4.849912165934882e-06, "loss": 0.166, "step": 2547 }, { "epoch": 0.4128321451717434, "grad_norm": 1.073050856590271, "learning_rate": 4.8497628906783425e-06, "loss": 0.1658, "step": 2548 }, { "epoch": 0.4129941672067401, "grad_norm": 1.1251699924468994, "learning_rate": 4.849613543524746e-06, "loss": 0.1618, "step": 2549 }, { "epoch": 0.41315618924173686, "grad_norm": 0.9501916766166687, "learning_rate": 4.84946412447866e-06, "loss": 0.1468, "step": 2550 }, { "epoch": 0.4133182112767336, "grad_norm": 1.087240219116211, "learning_rate": 4.849314633544659e-06, "loss": 0.1596, "step": 2551 }, { "epoch": 0.4134802333117304, "grad_norm": 1.0362616777420044, "learning_rate": 4.849165070727313e-06, "loss": 0.1707, "step": 2552 }, { "epoch": 0.41364225534672716, "grad_norm": 0.9123900532722473, "learning_rate": 4.849015436031202e-06, "loss": 0.1363, "step": 2553 }, { "epoch": 0.41380427738172393, "grad_norm": 0.9124921560287476, "learning_rate": 4.848865729460903e-06, "loss": 0.1288, "step": 2554 }, { "epoch": 0.4139662994167207, "grad_norm": 0.9334890842437744, "learning_rate": 4.848715951020997e-06, "loss": 0.1289, "step": 2555 }, { "epoch": 0.4141283214517174, "grad_norm": 1.121118187904358, "learning_rate": 4.848566100716066e-06, "loss": 0.1475, "step": 2556 }, { "epoch": 0.4142903434867142, "grad_norm": 1.0138769149780273, "learning_rate": 4.848416178550697e-06, "loss": 0.1556, "step": 2557 }, { "epoch": 0.41445236552171094, "grad_norm": 1.0010759830474854, "learning_rate": 4.848266184529475e-06, "loss": 0.151, "step": 2558 }, { "epoch": 0.4146143875567077, "grad_norm": 1.0928181409835815, "learning_rate": 4.848116118656991e-06, "loss": 0.1721, "step": 2559 }, { "epoch": 0.4147764095917045, "grad_norm": 1.068526029586792, "learning_rate": 4.847965980937836e-06, "loss": 0.1644, "step": 2560 }, { "epoch": 0.41493843162670124, "grad_norm": 1.0553364753723145, "learning_rate": 4.847815771376604e-06, "loss": 0.1501, "step": 2561 }, { "epoch": 0.415100453661698, "grad_norm": 0.9555181860923767, "learning_rate": 4.847665489977891e-06, "loss": 0.1336, "step": 2562 }, { "epoch": 0.4152624756966948, "grad_norm": 0.970905601978302, "learning_rate": 4.847515136746295e-06, "loss": 0.1426, "step": 2563 }, { "epoch": 0.4154244977316915, "grad_norm": 1.1288583278656006, "learning_rate": 4.847364711686417e-06, "loss": 0.1637, "step": 2564 }, { "epoch": 0.41558651976668826, "grad_norm": 1.0185610055923462, "learning_rate": 4.8472142148028585e-06, "loss": 0.1561, "step": 2565 }, { "epoch": 0.415748541801685, "grad_norm": 1.0335227251052856, "learning_rate": 4.847063646100226e-06, "loss": 0.1576, "step": 2566 }, { "epoch": 0.4159105638366818, "grad_norm": 0.9629384875297546, "learning_rate": 4.846913005583125e-06, "loss": 0.1378, "step": 2567 }, { "epoch": 0.41607258587167856, "grad_norm": 1.0787895917892456, "learning_rate": 4.846762293256167e-06, "loss": 0.1794, "step": 2568 }, { "epoch": 0.4162346079066753, "grad_norm": 1.1242948770523071, "learning_rate": 4.84661150912396e-06, "loss": 0.1591, "step": 2569 }, { "epoch": 0.4163966299416721, "grad_norm": 0.9214066863059998, "learning_rate": 4.846460653191121e-06, "loss": 0.1397, "step": 2570 }, { "epoch": 0.4165586519766688, "grad_norm": 1.106182336807251, "learning_rate": 4.846309725462264e-06, "loss": 0.1657, "step": 2571 }, { "epoch": 0.41672067401166557, "grad_norm": 1.0790849924087524, "learning_rate": 4.846158725942006e-06, "loss": 0.1565, "step": 2572 }, { "epoch": 0.41688269604666234, "grad_norm": 1.0249879360198975, "learning_rate": 4.84600765463497e-06, "loss": 0.1579, "step": 2573 }, { "epoch": 0.4170447180816591, "grad_norm": 1.0132466554641724, "learning_rate": 4.845856511545777e-06, "loss": 0.1618, "step": 2574 }, { "epoch": 0.41720674011665587, "grad_norm": 1.0668222904205322, "learning_rate": 4.845705296679051e-06, "loss": 0.1762, "step": 2575 }, { "epoch": 0.41736876215165264, "grad_norm": 1.0214135646820068, "learning_rate": 4.84555401003942e-06, "loss": 0.1508, "step": 2576 }, { "epoch": 0.4175307841866494, "grad_norm": 0.97760009765625, "learning_rate": 4.845402651631512e-06, "loss": 0.1552, "step": 2577 }, { "epoch": 0.4176928062216461, "grad_norm": 0.9772970080375671, "learning_rate": 4.845251221459958e-06, "loss": 0.1478, "step": 2578 }, { "epoch": 0.4178548282566429, "grad_norm": 0.999789297580719, "learning_rate": 4.845099719529393e-06, "loss": 0.1563, "step": 2579 }, { "epoch": 0.41801685029163965, "grad_norm": 1.1174688339233398, "learning_rate": 4.844948145844452e-06, "loss": 0.1767, "step": 2580 }, { "epoch": 0.4181788723266364, "grad_norm": 0.9995019435882568, "learning_rate": 4.844796500409771e-06, "loss": 0.1353, "step": 2581 }, { "epoch": 0.4183408943616332, "grad_norm": 0.9696946144104004, "learning_rate": 4.844644783229993e-06, "loss": 0.1431, "step": 2582 }, { "epoch": 0.41850291639662995, "grad_norm": 1.0892887115478516, "learning_rate": 4.844492994309757e-06, "loss": 0.1597, "step": 2583 }, { "epoch": 0.4186649384316267, "grad_norm": 1.0031565427780151, "learning_rate": 4.844341133653709e-06, "loss": 0.1602, "step": 2584 }, { "epoch": 0.4188269604666235, "grad_norm": 1.021321177482605, "learning_rate": 4.844189201266497e-06, "loss": 0.162, "step": 2585 }, { "epoch": 0.4189889825016202, "grad_norm": 1.1473654508590698, "learning_rate": 4.844037197152767e-06, "loss": 0.1854, "step": 2586 }, { "epoch": 0.41915100453661697, "grad_norm": 1.1398388147354126, "learning_rate": 4.8438851213171715e-06, "loss": 0.1643, "step": 2587 }, { "epoch": 0.41931302657161373, "grad_norm": 0.9572474956512451, "learning_rate": 4.843732973764363e-06, "loss": 0.1537, "step": 2588 }, { "epoch": 0.4194750486066105, "grad_norm": 0.9969721436500549, "learning_rate": 4.843580754498999e-06, "loss": 0.1557, "step": 2589 }, { "epoch": 0.41963707064160727, "grad_norm": 0.9691717624664307, "learning_rate": 4.8434284635257335e-06, "loss": 0.156, "step": 2590 }, { "epoch": 0.41979909267660404, "grad_norm": 1.0504859685897827, "learning_rate": 4.8432761008492284e-06, "loss": 0.1416, "step": 2591 }, { "epoch": 0.4199611147116008, "grad_norm": 1.0523217916488647, "learning_rate": 4.843123666474146e-06, "loss": 0.1704, "step": 2592 }, { "epoch": 0.4201231367465975, "grad_norm": 1.1369895935058594, "learning_rate": 4.842971160405149e-06, "loss": 0.1514, "step": 2593 }, { "epoch": 0.4202851587815943, "grad_norm": 1.0184272527694702, "learning_rate": 4.842818582646904e-06, "loss": 0.1512, "step": 2594 }, { "epoch": 0.42044718081659105, "grad_norm": 1.0141220092773438, "learning_rate": 4.84266593320408e-06, "loss": 0.1563, "step": 2595 }, { "epoch": 0.4206092028515878, "grad_norm": 1.072980284690857, "learning_rate": 4.842513212081348e-06, "loss": 0.1629, "step": 2596 }, { "epoch": 0.4207712248865846, "grad_norm": 1.1694176197052002, "learning_rate": 4.842360419283381e-06, "loss": 0.169, "step": 2597 }, { "epoch": 0.42093324692158135, "grad_norm": 1.0028104782104492, "learning_rate": 4.8422075548148525e-06, "loss": 0.1462, "step": 2598 }, { "epoch": 0.4210952689565781, "grad_norm": 0.9839292764663696, "learning_rate": 4.84205461868044e-06, "loss": 0.1486, "step": 2599 }, { "epoch": 0.42125729099157483, "grad_norm": 0.9316514730453491, "learning_rate": 4.841901610884826e-06, "loss": 0.14, "step": 2600 }, { "epoch": 0.4214193130265716, "grad_norm": 1.1105822324752808, "learning_rate": 4.8417485314326895e-06, "loss": 0.1726, "step": 2601 }, { "epoch": 0.42158133506156836, "grad_norm": 0.9990409016609192, "learning_rate": 4.841595380328714e-06, "loss": 0.1669, "step": 2602 }, { "epoch": 0.42174335709656513, "grad_norm": 0.978840172290802, "learning_rate": 4.841442157577587e-06, "loss": 0.1542, "step": 2603 }, { "epoch": 0.4219053791315619, "grad_norm": 1.0527600049972534, "learning_rate": 4.841288863183996e-06, "loss": 0.1514, "step": 2604 }, { "epoch": 0.42206740116655866, "grad_norm": 1.0424004793167114, "learning_rate": 4.8411354971526316e-06, "loss": 0.1565, "step": 2605 }, { "epoch": 0.42222942320155543, "grad_norm": 0.9171922206878662, "learning_rate": 4.840982059488186e-06, "loss": 0.1424, "step": 2606 }, { "epoch": 0.4223914452365522, "grad_norm": 0.9491299986839294, "learning_rate": 4.840828550195355e-06, "loss": 0.14, "step": 2607 }, { "epoch": 0.4225534672715489, "grad_norm": 1.1257144212722778, "learning_rate": 4.840674969278836e-06, "loss": 0.1776, "step": 2608 }, { "epoch": 0.4227154893065457, "grad_norm": 0.9474332928657532, "learning_rate": 4.840521316743326e-06, "loss": 0.1463, "step": 2609 }, { "epoch": 0.42287751134154244, "grad_norm": 1.0280464887619019, "learning_rate": 4.8403675925935275e-06, "loss": 0.1528, "step": 2610 }, { "epoch": 0.4230395333765392, "grad_norm": 1.010370135307312, "learning_rate": 4.840213796834145e-06, "loss": 0.1519, "step": 2611 }, { "epoch": 0.423201555411536, "grad_norm": 1.0161646604537964, "learning_rate": 4.8400599294698825e-06, "loss": 0.1563, "step": 2612 }, { "epoch": 0.42336357744653275, "grad_norm": 1.0096008777618408, "learning_rate": 4.83990599050545e-06, "loss": 0.1559, "step": 2613 }, { "epoch": 0.4235255994815295, "grad_norm": 0.9370619654655457, "learning_rate": 4.839751979945556e-06, "loss": 0.122, "step": 2614 }, { "epoch": 0.4236876215165262, "grad_norm": 0.9001135230064392, "learning_rate": 4.839597897794915e-06, "loss": 0.133, "step": 2615 }, { "epoch": 0.423849643551523, "grad_norm": 0.9671546816825867, "learning_rate": 4.839443744058238e-06, "loss": 0.1472, "step": 2616 }, { "epoch": 0.42401166558651976, "grad_norm": 0.9636013507843018, "learning_rate": 4.839289518740245e-06, "loss": 0.1435, "step": 2617 }, { "epoch": 0.4241736876215165, "grad_norm": 1.114350438117981, "learning_rate": 4.839135221845654e-06, "loss": 0.1679, "step": 2618 }, { "epoch": 0.4243357096565133, "grad_norm": 1.0020900964736938, "learning_rate": 4.838980853379184e-06, "loss": 0.1374, "step": 2619 }, { "epoch": 0.42449773169151006, "grad_norm": 1.0408121347427368, "learning_rate": 4.838826413345561e-06, "loss": 0.1569, "step": 2620 }, { "epoch": 0.42465975372650683, "grad_norm": 1.0487539768218994, "learning_rate": 4.83867190174951e-06, "loss": 0.1568, "step": 2621 }, { "epoch": 0.42482177576150354, "grad_norm": 1.0748343467712402, "learning_rate": 4.838517318595758e-06, "loss": 0.1628, "step": 2622 }, { "epoch": 0.4249837977965003, "grad_norm": 1.2027356624603271, "learning_rate": 4.8383626638890355e-06, "loss": 0.1863, "step": 2623 }, { "epoch": 0.4251458198314971, "grad_norm": 1.07537841796875, "learning_rate": 4.838207937634074e-06, "loss": 0.1492, "step": 2624 }, { "epoch": 0.42530784186649384, "grad_norm": 1.2119613885879517, "learning_rate": 4.838053139835608e-06, "loss": 0.1757, "step": 2625 }, { "epoch": 0.4254698639014906, "grad_norm": 0.9143966436386108, "learning_rate": 4.837898270498374e-06, "loss": 0.1334, "step": 2626 }, { "epoch": 0.4256318859364874, "grad_norm": 1.0104649066925049, "learning_rate": 4.83774332962711e-06, "loss": 0.1594, "step": 2627 }, { "epoch": 0.42579390797148414, "grad_norm": 0.9676437377929688, "learning_rate": 4.837588317226558e-06, "loss": 0.1587, "step": 2628 }, { "epoch": 0.42595593000648085, "grad_norm": 0.922566831111908, "learning_rate": 4.837433233301461e-06, "loss": 0.1529, "step": 2629 }, { "epoch": 0.4261179520414776, "grad_norm": 1.1346608400344849, "learning_rate": 4.837278077856562e-06, "loss": 0.1793, "step": 2630 }, { "epoch": 0.4262799740764744, "grad_norm": 0.9662337303161621, "learning_rate": 4.837122850896611e-06, "loss": 0.1447, "step": 2631 }, { "epoch": 0.42644199611147116, "grad_norm": 1.0667489767074585, "learning_rate": 4.836967552426355e-06, "loss": 0.1784, "step": 2632 }, { "epoch": 0.4266040181464679, "grad_norm": 1.030396580696106, "learning_rate": 4.836812182450549e-06, "loss": 0.1485, "step": 2633 }, { "epoch": 0.4267660401814647, "grad_norm": 0.9403509497642517, "learning_rate": 4.836656740973944e-06, "loss": 0.1476, "step": 2634 }, { "epoch": 0.42692806221646146, "grad_norm": 1.0584341287612915, "learning_rate": 4.836501228001298e-06, "loss": 0.1499, "step": 2635 }, { "epoch": 0.4270900842514582, "grad_norm": 1.0108885765075684, "learning_rate": 4.836345643537368e-06, "loss": 0.1485, "step": 2636 }, { "epoch": 0.42725210628645494, "grad_norm": 0.9687297940254211, "learning_rate": 4.8361899875869165e-06, "loss": 0.1516, "step": 2637 }, { "epoch": 0.4274141283214517, "grad_norm": 0.9748333096504211, "learning_rate": 4.836034260154704e-06, "loss": 0.1523, "step": 2638 }, { "epoch": 0.42757615035644847, "grad_norm": 1.1141382455825806, "learning_rate": 4.835878461245496e-06, "loss": 0.1485, "step": 2639 }, { "epoch": 0.42773817239144524, "grad_norm": 0.8912030458450317, "learning_rate": 4.83572259086406e-06, "loss": 0.133, "step": 2640 }, { "epoch": 0.427900194426442, "grad_norm": 1.0493932962417603, "learning_rate": 4.835566649015165e-06, "loss": 0.1706, "step": 2641 }, { "epoch": 0.42806221646143877, "grad_norm": 1.024950385093689, "learning_rate": 4.835410635703582e-06, "loss": 0.1362, "step": 2642 }, { "epoch": 0.42822423849643554, "grad_norm": 1.094906210899353, "learning_rate": 4.8352545509340865e-06, "loss": 0.1505, "step": 2643 }, { "epoch": 0.42838626053143225, "grad_norm": 1.0383245944976807, "learning_rate": 4.835098394711451e-06, "loss": 0.1569, "step": 2644 }, { "epoch": 0.428548282566429, "grad_norm": 0.9308538436889648, "learning_rate": 4.834942167040457e-06, "loss": 0.131, "step": 2645 }, { "epoch": 0.4287103046014258, "grad_norm": 0.9128456711769104, "learning_rate": 4.834785867925883e-06, "loss": 0.1431, "step": 2646 }, { "epoch": 0.42887232663642255, "grad_norm": 1.047548770904541, "learning_rate": 4.8346294973725115e-06, "loss": 0.1592, "step": 2647 }, { "epoch": 0.4290343486714193, "grad_norm": 0.9378132820129395, "learning_rate": 4.8344730553851275e-06, "loss": 0.146, "step": 2648 }, { "epoch": 0.4291963707064161, "grad_norm": 1.0160073041915894, "learning_rate": 4.8343165419685155e-06, "loss": 0.1598, "step": 2649 }, { "epoch": 0.42935839274141285, "grad_norm": 1.0141408443450928, "learning_rate": 4.834159957127468e-06, "loss": 0.1496, "step": 2650 }, { "epoch": 0.42952041477640956, "grad_norm": 1.0546174049377441, "learning_rate": 4.834003300866773e-06, "loss": 0.1479, "step": 2651 }, { "epoch": 0.42968243681140633, "grad_norm": 1.0627154111862183, "learning_rate": 4.833846573191227e-06, "loss": 0.1585, "step": 2652 }, { "epoch": 0.4298444588464031, "grad_norm": 1.0541164875030518, "learning_rate": 4.833689774105622e-06, "loss": 0.1551, "step": 2653 }, { "epoch": 0.43000648088139987, "grad_norm": 0.9814350008964539, "learning_rate": 4.833532903614758e-06, "loss": 0.1397, "step": 2654 }, { "epoch": 0.43016850291639663, "grad_norm": 1.0309319496154785, "learning_rate": 4.8333759617234344e-06, "loss": 0.1603, "step": 2655 }, { "epoch": 0.4303305249513934, "grad_norm": 1.044171929359436, "learning_rate": 4.833218948436453e-06, "loss": 0.1588, "step": 2656 }, { "epoch": 0.43049254698639017, "grad_norm": 1.0367193222045898, "learning_rate": 4.833061863758618e-06, "loss": 0.1734, "step": 2657 }, { "epoch": 0.43065456902138693, "grad_norm": 0.9801927804946899, "learning_rate": 4.832904707694736e-06, "loss": 0.1459, "step": 2658 }, { "epoch": 0.43081659105638365, "grad_norm": 0.9928318858146667, "learning_rate": 4.8327474802496145e-06, "loss": 0.1478, "step": 2659 }, { "epoch": 0.4309786130913804, "grad_norm": 0.9557510614395142, "learning_rate": 4.832590181428066e-06, "loss": 0.1511, "step": 2660 }, { "epoch": 0.4311406351263772, "grad_norm": 1.0317496061325073, "learning_rate": 4.832432811234902e-06, "loss": 0.1646, "step": 2661 }, { "epoch": 0.43130265716137395, "grad_norm": 1.0222221612930298, "learning_rate": 4.832275369674939e-06, "loss": 0.1483, "step": 2662 }, { "epoch": 0.4314646791963707, "grad_norm": 0.9168278574943542, "learning_rate": 4.832117856752994e-06, "loss": 0.1329, "step": 2663 }, { "epoch": 0.4316267012313675, "grad_norm": 0.9790104031562805, "learning_rate": 4.831960272473886e-06, "loss": 0.1395, "step": 2664 }, { "epoch": 0.43178872326636425, "grad_norm": 0.9434686303138733, "learning_rate": 4.831802616842436e-06, "loss": 0.138, "step": 2665 }, { "epoch": 0.43195074530136096, "grad_norm": 1.0316851139068604, "learning_rate": 4.831644889863471e-06, "loss": 0.1684, "step": 2666 }, { "epoch": 0.43211276733635773, "grad_norm": 1.1002922058105469, "learning_rate": 4.831487091541812e-06, "loss": 0.151, "step": 2667 }, { "epoch": 0.4322747893713545, "grad_norm": 1.2835469245910645, "learning_rate": 4.831329221882291e-06, "loss": 0.1845, "step": 2668 }, { "epoch": 0.43243681140635126, "grad_norm": 1.0473268032073975, "learning_rate": 4.831171280889739e-06, "loss": 0.1447, "step": 2669 }, { "epoch": 0.43259883344134803, "grad_norm": 1.0189570188522339, "learning_rate": 4.831013268568986e-06, "loss": 0.1545, "step": 2670 }, { "epoch": 0.4327608554763448, "grad_norm": 1.0731713771820068, "learning_rate": 4.830855184924868e-06, "loss": 0.1656, "step": 2671 }, { "epoch": 0.43292287751134156, "grad_norm": 0.955833911895752, "learning_rate": 4.830697029962222e-06, "loss": 0.1447, "step": 2672 }, { "epoch": 0.4330848995463383, "grad_norm": 1.0004793405532837, "learning_rate": 4.830538803685887e-06, "loss": 0.1471, "step": 2673 }, { "epoch": 0.43324692158133504, "grad_norm": 1.0488044023513794, "learning_rate": 4.830380506100704e-06, "loss": 0.1557, "step": 2674 }, { "epoch": 0.4334089436163318, "grad_norm": 1.1116876602172852, "learning_rate": 4.830222137211518e-06, "loss": 0.1584, "step": 2675 }, { "epoch": 0.4335709656513286, "grad_norm": 0.9907661080360413, "learning_rate": 4.830063697023173e-06, "loss": 0.1414, "step": 2676 }, { "epoch": 0.43373298768632534, "grad_norm": 1.0337917804718018, "learning_rate": 4.829905185540517e-06, "loss": 0.1692, "step": 2677 }, { "epoch": 0.4338950097213221, "grad_norm": 0.9875041246414185, "learning_rate": 4.829746602768401e-06, "loss": 0.1462, "step": 2678 }, { "epoch": 0.4340570317563189, "grad_norm": 1.0087636709213257, "learning_rate": 4.829587948711677e-06, "loss": 0.1484, "step": 2679 }, { "epoch": 0.43421905379131565, "grad_norm": 1.0348845720291138, "learning_rate": 4.8294292233752e-06, "loss": 0.1457, "step": 2680 }, { "epoch": 0.43438107582631236, "grad_norm": 0.8487588167190552, "learning_rate": 4.829270426763824e-06, "loss": 0.1168, "step": 2681 }, { "epoch": 0.4345430978613091, "grad_norm": 1.086271047592163, "learning_rate": 4.829111558882411e-06, "loss": 0.1501, "step": 2682 }, { "epoch": 0.4347051198963059, "grad_norm": 0.9329110980033875, "learning_rate": 4.828952619735821e-06, "loss": 0.1482, "step": 2683 }, { "epoch": 0.43486714193130266, "grad_norm": 1.0108580589294434, "learning_rate": 4.828793609328916e-06, "loss": 0.1457, "step": 2684 }, { "epoch": 0.4350291639662994, "grad_norm": 1.005904197692871, "learning_rate": 4.828634527666562e-06, "loss": 0.1391, "step": 2685 }, { "epoch": 0.4351911860012962, "grad_norm": 0.9748127460479736, "learning_rate": 4.828475374753627e-06, "loss": 0.1528, "step": 2686 }, { "epoch": 0.43535320803629296, "grad_norm": 1.1028996706008911, "learning_rate": 4.82831615059498e-06, "loss": 0.1504, "step": 2687 }, { "epoch": 0.43551523007128967, "grad_norm": 1.1191354990005493, "learning_rate": 4.828156855195493e-06, "loss": 0.1613, "step": 2688 }, { "epoch": 0.43567725210628644, "grad_norm": 0.9752424955368042, "learning_rate": 4.827997488560041e-06, "loss": 0.1479, "step": 2689 }, { "epoch": 0.4358392741412832, "grad_norm": 1.1131165027618408, "learning_rate": 4.827838050693499e-06, "loss": 0.1846, "step": 2690 }, { "epoch": 0.43600129617628, "grad_norm": 0.9194788336753845, "learning_rate": 4.827678541600747e-06, "loss": 0.1299, "step": 2691 }, { "epoch": 0.43616331821127674, "grad_norm": 1.0693033933639526, "learning_rate": 4.827518961286663e-06, "loss": 0.1686, "step": 2692 }, { "epoch": 0.4363253402462735, "grad_norm": 1.0003294944763184, "learning_rate": 4.827359309756132e-06, "loss": 0.1645, "step": 2693 }, { "epoch": 0.4364873622812703, "grad_norm": 1.1136189699172974, "learning_rate": 4.827199587014038e-06, "loss": 0.1632, "step": 2694 }, { "epoch": 0.436649384316267, "grad_norm": 0.9603433012962341, "learning_rate": 4.8270397930652685e-06, "loss": 0.1411, "step": 2695 }, { "epoch": 0.43681140635126375, "grad_norm": 1.076905369758606, "learning_rate": 4.826879927914713e-06, "loss": 0.1593, "step": 2696 }, { "epoch": 0.4369734283862605, "grad_norm": 0.9769685864448547, "learning_rate": 4.826719991567262e-06, "loss": 0.1612, "step": 2697 }, { "epoch": 0.4371354504212573, "grad_norm": 0.9933087825775146, "learning_rate": 4.82655998402781e-06, "loss": 0.1777, "step": 2698 }, { "epoch": 0.43729747245625405, "grad_norm": 0.9159439206123352, "learning_rate": 4.826399905301252e-06, "loss": 0.1424, "step": 2699 }, { "epoch": 0.4374594944912508, "grad_norm": 0.9449082016944885, "learning_rate": 4.826239755392488e-06, "loss": 0.1445, "step": 2700 }, { "epoch": 0.4376215165262476, "grad_norm": 1.004356861114502, "learning_rate": 4.826079534306417e-06, "loss": 0.1589, "step": 2701 }, { "epoch": 0.43778353856124436, "grad_norm": 1.0486918687820435, "learning_rate": 4.8259192420479395e-06, "loss": 0.1625, "step": 2702 }, { "epoch": 0.43794556059624107, "grad_norm": 1.0001510381698608, "learning_rate": 4.825758878621963e-06, "loss": 0.1594, "step": 2703 }, { "epoch": 0.43810758263123784, "grad_norm": 1.1367384195327759, "learning_rate": 4.825598444033393e-06, "loss": 0.1772, "step": 2704 }, { "epoch": 0.4382696046662346, "grad_norm": 1.0560551881790161, "learning_rate": 4.825437938287139e-06, "loss": 0.164, "step": 2705 }, { "epoch": 0.43843162670123137, "grad_norm": 0.9951310157775879, "learning_rate": 4.82527736138811e-06, "loss": 0.1679, "step": 2706 }, { "epoch": 0.43859364873622814, "grad_norm": 1.0460946559906006, "learning_rate": 4.825116713341223e-06, "loss": 0.1679, "step": 2707 }, { "epoch": 0.4387556707712249, "grad_norm": 1.0097354650497437, "learning_rate": 4.824955994151389e-06, "loss": 0.1549, "step": 2708 }, { "epoch": 0.43891769280622167, "grad_norm": 0.9873436093330383, "learning_rate": 4.824795203823529e-06, "loss": 0.1522, "step": 2709 }, { "epoch": 0.4390797148412184, "grad_norm": 0.9897306561470032, "learning_rate": 4.824634342362561e-06, "loss": 0.1603, "step": 2710 }, { "epoch": 0.43924173687621515, "grad_norm": 1.0298486948013306, "learning_rate": 4.824473409773408e-06, "loss": 0.1711, "step": 2711 }, { "epoch": 0.4394037589112119, "grad_norm": 1.0957099199295044, "learning_rate": 4.824312406060995e-06, "loss": 0.1806, "step": 2712 }, { "epoch": 0.4395657809462087, "grad_norm": 0.9915971755981445, "learning_rate": 4.824151331230245e-06, "loss": 0.1612, "step": 2713 }, { "epoch": 0.43972780298120545, "grad_norm": 0.9229410886764526, "learning_rate": 4.82399018528609e-06, "loss": 0.1444, "step": 2714 }, { "epoch": 0.4398898250162022, "grad_norm": 1.0651377439498901, "learning_rate": 4.823828968233459e-06, "loss": 0.1627, "step": 2715 }, { "epoch": 0.440051847051199, "grad_norm": 0.9680843353271484, "learning_rate": 4.823667680077285e-06, "loss": 0.1354, "step": 2716 }, { "epoch": 0.4402138690861957, "grad_norm": 1.1376385688781738, "learning_rate": 4.823506320822503e-06, "loss": 0.1696, "step": 2717 }, { "epoch": 0.44037589112119246, "grad_norm": 0.9446072578430176, "learning_rate": 4.8233448904740505e-06, "loss": 0.1395, "step": 2718 }, { "epoch": 0.44053791315618923, "grad_norm": 1.1791229248046875, "learning_rate": 4.823183389036867e-06, "loss": 0.1819, "step": 2719 }, { "epoch": 0.440699935191186, "grad_norm": 1.0275377035140991, "learning_rate": 4.823021816515893e-06, "loss": 0.1589, "step": 2720 }, { "epoch": 0.44086195722618277, "grad_norm": 1.0309733152389526, "learning_rate": 4.822860172916074e-06, "loss": 0.1415, "step": 2721 }, { "epoch": 0.44102397926117953, "grad_norm": 1.0780889987945557, "learning_rate": 4.8226984582423545e-06, "loss": 0.1693, "step": 2722 }, { "epoch": 0.4411860012961763, "grad_norm": 1.0635453462600708, "learning_rate": 4.8225366724996826e-06, "loss": 0.1393, "step": 2723 }, { "epoch": 0.44134802333117307, "grad_norm": 0.916522204875946, "learning_rate": 4.82237481569301e-06, "loss": 0.1435, "step": 2724 }, { "epoch": 0.4415100453661698, "grad_norm": 0.949371337890625, "learning_rate": 4.822212887827287e-06, "loss": 0.1441, "step": 2725 }, { "epoch": 0.44167206740116655, "grad_norm": 0.9767130613327026, "learning_rate": 4.822050888907469e-06, "loss": 0.1539, "step": 2726 }, { "epoch": 0.4418340894361633, "grad_norm": 0.9982846975326538, "learning_rate": 4.8218888189385145e-06, "loss": 0.1326, "step": 2727 }, { "epoch": 0.4419961114711601, "grad_norm": 0.99046790599823, "learning_rate": 4.82172667792538e-06, "loss": 0.1545, "step": 2728 }, { "epoch": 0.44215813350615685, "grad_norm": 0.9805136919021606, "learning_rate": 4.821564465873027e-06, "loss": 0.1505, "step": 2729 }, { "epoch": 0.4423201555411536, "grad_norm": 0.9637601375579834, "learning_rate": 4.821402182786421e-06, "loss": 0.1468, "step": 2730 }, { "epoch": 0.4424821775761504, "grad_norm": 1.0324764251708984, "learning_rate": 4.821239828670525e-06, "loss": 0.1655, "step": 2731 }, { "epoch": 0.4426441996111471, "grad_norm": 1.0406447649002075, "learning_rate": 4.8210774035303085e-06, "loss": 0.1528, "step": 2732 }, { "epoch": 0.44280622164614386, "grad_norm": 1.000382661819458, "learning_rate": 4.820914907370739e-06, "loss": 0.1424, "step": 2733 }, { "epoch": 0.4429682436811406, "grad_norm": 1.0306026935577393, "learning_rate": 4.82075234019679e-06, "loss": 0.1612, "step": 2734 }, { "epoch": 0.4431302657161374, "grad_norm": 0.9841228723526001, "learning_rate": 4.820589702013436e-06, "loss": 0.1515, "step": 2735 }, { "epoch": 0.44329228775113416, "grad_norm": 0.9914823770523071, "learning_rate": 4.820426992825653e-06, "loss": 0.1586, "step": 2736 }, { "epoch": 0.44345430978613093, "grad_norm": 1.0662131309509277, "learning_rate": 4.820264212638419e-06, "loss": 0.1617, "step": 2737 }, { "epoch": 0.4436163318211277, "grad_norm": 1.1415921449661255, "learning_rate": 4.820101361456715e-06, "loss": 0.1786, "step": 2738 }, { "epoch": 0.4437783538561244, "grad_norm": 1.0659246444702148, "learning_rate": 4.819938439285524e-06, "loss": 0.1655, "step": 2739 }, { "epoch": 0.4439403758911212, "grad_norm": 1.2058014869689941, "learning_rate": 4.819775446129832e-06, "loss": 0.1672, "step": 2740 }, { "epoch": 0.44410239792611794, "grad_norm": 1.0413726568222046, "learning_rate": 4.819612381994624e-06, "loss": 0.1361, "step": 2741 }, { "epoch": 0.4442644199611147, "grad_norm": 0.9703432321548462, "learning_rate": 4.8194492468848895e-06, "loss": 0.156, "step": 2742 }, { "epoch": 0.4444264419961115, "grad_norm": 1.1094372272491455, "learning_rate": 4.819286040805622e-06, "loss": 0.1517, "step": 2743 }, { "epoch": 0.44458846403110824, "grad_norm": 1.0455727577209473, "learning_rate": 4.8191227637618145e-06, "loss": 0.1484, "step": 2744 }, { "epoch": 0.444750486066105, "grad_norm": 0.9904317855834961, "learning_rate": 4.818959415758463e-06, "loss": 0.1528, "step": 2745 }, { "epoch": 0.4449125081011017, "grad_norm": 0.9994639158248901, "learning_rate": 4.818795996800564e-06, "loss": 0.1466, "step": 2746 }, { "epoch": 0.4450745301360985, "grad_norm": 1.0804579257965088, "learning_rate": 4.818632506893119e-06, "loss": 0.1715, "step": 2747 }, { "epoch": 0.44523655217109526, "grad_norm": 1.024933099746704, "learning_rate": 4.8184689460411306e-06, "loss": 0.1411, "step": 2748 }, { "epoch": 0.445398574206092, "grad_norm": 1.01506507396698, "learning_rate": 4.8183053142496025e-06, "loss": 0.1706, "step": 2749 }, { "epoch": 0.4455605962410888, "grad_norm": 0.9807841777801514, "learning_rate": 4.818141611523543e-06, "loss": 0.1517, "step": 2750 }, { "epoch": 0.44572261827608556, "grad_norm": 0.8835573196411133, "learning_rate": 4.81797783786796e-06, "loss": 0.1406, "step": 2751 }, { "epoch": 0.4458846403110823, "grad_norm": 0.9974625110626221, "learning_rate": 4.817813993287863e-06, "loss": 0.1729, "step": 2752 }, { "epoch": 0.4460466623460791, "grad_norm": 1.12760329246521, "learning_rate": 4.817650077788268e-06, "loss": 0.1616, "step": 2753 }, { "epoch": 0.4462086843810758, "grad_norm": 1.0579090118408203, "learning_rate": 4.817486091374189e-06, "loss": 0.138, "step": 2754 }, { "epoch": 0.44637070641607257, "grad_norm": 0.9531564116477966, "learning_rate": 4.817322034050645e-06, "loss": 0.1276, "step": 2755 }, { "epoch": 0.44653272845106934, "grad_norm": 0.9502880573272705, "learning_rate": 4.817157905822652e-06, "loss": 0.1437, "step": 2756 }, { "epoch": 0.4466947504860661, "grad_norm": 1.0466476678848267, "learning_rate": 4.816993706695237e-06, "loss": 0.1648, "step": 2757 }, { "epoch": 0.4468567725210629, "grad_norm": 1.0479638576507568, "learning_rate": 4.816829436673421e-06, "loss": 0.1539, "step": 2758 }, { "epoch": 0.44701879455605964, "grad_norm": 1.0833531618118286, "learning_rate": 4.81666509576223e-06, "loss": 0.1675, "step": 2759 }, { "epoch": 0.4471808165910564, "grad_norm": 0.9449465274810791, "learning_rate": 4.816500683966694e-06, "loss": 0.1481, "step": 2760 }, { "epoch": 0.4473428386260531, "grad_norm": 1.0143202543258667, "learning_rate": 4.816336201291842e-06, "loss": 0.152, "step": 2761 }, { "epoch": 0.4475048606610499, "grad_norm": 1.0159945487976074, "learning_rate": 4.816171647742708e-06, "loss": 0.1393, "step": 2762 }, { "epoch": 0.44766688269604665, "grad_norm": 1.0358235836029053, "learning_rate": 4.816007023324327e-06, "loss": 0.1563, "step": 2763 }, { "epoch": 0.4478289047310434, "grad_norm": 0.9171221852302551, "learning_rate": 4.815842328041736e-06, "loss": 0.1449, "step": 2764 }, { "epoch": 0.4479909267660402, "grad_norm": 1.0671533346176147, "learning_rate": 4.815677561899973e-06, "loss": 0.1561, "step": 2765 }, { "epoch": 0.44815294880103695, "grad_norm": 1.0144374370574951, "learning_rate": 4.815512724904081e-06, "loss": 0.1648, "step": 2766 }, { "epoch": 0.4483149708360337, "grad_norm": 0.9934388399124146, "learning_rate": 4.815347817059103e-06, "loss": 0.143, "step": 2767 }, { "epoch": 0.44847699287103043, "grad_norm": 0.9061841368675232, "learning_rate": 4.815182838370085e-06, "loss": 0.139, "step": 2768 }, { "epoch": 0.4486390149060272, "grad_norm": 1.0116674900054932, "learning_rate": 4.815017788842075e-06, "loss": 0.1504, "step": 2769 }, { "epoch": 0.44880103694102397, "grad_norm": 1.012621283531189, "learning_rate": 4.814852668480122e-06, "loss": 0.164, "step": 2770 }, { "epoch": 0.44896305897602073, "grad_norm": 0.9846920967102051, "learning_rate": 4.81468747728928e-06, "loss": 0.1494, "step": 2771 }, { "epoch": 0.4491250810110175, "grad_norm": 0.9715894460678101, "learning_rate": 4.814522215274603e-06, "loss": 0.1446, "step": 2772 }, { "epoch": 0.44928710304601427, "grad_norm": 1.0353641510009766, "learning_rate": 4.814356882441147e-06, "loss": 0.1578, "step": 2773 }, { "epoch": 0.44944912508101104, "grad_norm": 0.9785324931144714, "learning_rate": 4.81419147879397e-06, "loss": 0.1495, "step": 2774 }, { "epoch": 0.4496111471160078, "grad_norm": 1.190711259841919, "learning_rate": 4.814026004338135e-06, "loss": 0.1844, "step": 2775 }, { "epoch": 0.4497731691510045, "grad_norm": 1.0003420114517212, "learning_rate": 4.813860459078703e-06, "loss": 0.1485, "step": 2776 }, { "epoch": 0.4499351911860013, "grad_norm": 0.9324052929878235, "learning_rate": 4.8136948430207415e-06, "loss": 0.1421, "step": 2777 }, { "epoch": 0.45009721322099805, "grad_norm": 1.0191559791564941, "learning_rate": 4.813529156169317e-06, "loss": 0.1593, "step": 2778 }, { "epoch": 0.4502592352559948, "grad_norm": 1.0456205606460571, "learning_rate": 4.813363398529498e-06, "loss": 0.1552, "step": 2779 }, { "epoch": 0.4504212572909916, "grad_norm": 0.9817255735397339, "learning_rate": 4.813197570106357e-06, "loss": 0.1554, "step": 2780 }, { "epoch": 0.45058327932598835, "grad_norm": 1.0291125774383545, "learning_rate": 4.813031670904969e-06, "loss": 0.1549, "step": 2781 }, { "epoch": 0.4507453013609851, "grad_norm": 0.9215115904808044, "learning_rate": 4.8128657009304096e-06, "loss": 0.1342, "step": 2782 }, { "epoch": 0.45090732339598183, "grad_norm": 0.9075199961662292, "learning_rate": 4.8126996601877564e-06, "loss": 0.1471, "step": 2783 }, { "epoch": 0.4510693454309786, "grad_norm": 0.9484758377075195, "learning_rate": 4.8125335486820905e-06, "loss": 0.1392, "step": 2784 }, { "epoch": 0.45123136746597536, "grad_norm": 1.10005521774292, "learning_rate": 4.812367366418493e-06, "loss": 0.162, "step": 2785 }, { "epoch": 0.45139338950097213, "grad_norm": 0.9598970413208008, "learning_rate": 4.8122011134020505e-06, "loss": 0.1256, "step": 2786 }, { "epoch": 0.4515554115359689, "grad_norm": 1.010698914527893, "learning_rate": 4.81203478963785e-06, "loss": 0.1521, "step": 2787 }, { "epoch": 0.45171743357096567, "grad_norm": 0.8810874819755554, "learning_rate": 4.8118683951309795e-06, "loss": 0.1325, "step": 2788 }, { "epoch": 0.45187945560596243, "grad_norm": 1.020269751548767, "learning_rate": 4.811701929886531e-06, "loss": 0.1457, "step": 2789 }, { "epoch": 0.45204147764095914, "grad_norm": 0.9705826640129089, "learning_rate": 4.811535393909598e-06, "loss": 0.1452, "step": 2790 }, { "epoch": 0.4522034996759559, "grad_norm": 1.204943060874939, "learning_rate": 4.811368787205275e-06, "loss": 0.1884, "step": 2791 }, { "epoch": 0.4523655217109527, "grad_norm": 1.057442545890808, "learning_rate": 4.811202109778661e-06, "loss": 0.1642, "step": 2792 }, { "epoch": 0.45252754374594945, "grad_norm": 1.0493812561035156, "learning_rate": 4.811035361634855e-06, "loss": 0.1466, "step": 2793 }, { "epoch": 0.4526895657809462, "grad_norm": 1.0692343711853027, "learning_rate": 4.810868542778959e-06, "loss": 0.1522, "step": 2794 }, { "epoch": 0.452851587815943, "grad_norm": 0.98219895362854, "learning_rate": 4.8107016532160784e-06, "loss": 0.1417, "step": 2795 }, { "epoch": 0.45301360985093975, "grad_norm": 1.0239708423614502, "learning_rate": 4.81053469295132e-06, "loss": 0.1593, "step": 2796 }, { "epoch": 0.4531756318859365, "grad_norm": 0.94671630859375, "learning_rate": 4.81036766198979e-06, "loss": 0.1411, "step": 2797 }, { "epoch": 0.4533376539209332, "grad_norm": 1.0224705934524536, "learning_rate": 4.810200560336601e-06, "loss": 0.1615, "step": 2798 }, { "epoch": 0.45349967595593, "grad_norm": 0.970841109752655, "learning_rate": 4.810033387996865e-06, "loss": 0.1588, "step": 2799 }, { "epoch": 0.45366169799092676, "grad_norm": 1.008506417274475, "learning_rate": 4.809866144975699e-06, "loss": 0.1653, "step": 2800 }, { "epoch": 0.4538237200259235, "grad_norm": 0.9327491521835327, "learning_rate": 4.809698831278217e-06, "loss": 0.1419, "step": 2801 }, { "epoch": 0.4539857420609203, "grad_norm": 0.9707223176956177, "learning_rate": 4.809531446909541e-06, "loss": 0.161, "step": 2802 }, { "epoch": 0.45414776409591706, "grad_norm": 1.0070552825927734, "learning_rate": 4.8093639918747915e-06, "loss": 0.1703, "step": 2803 }, { "epoch": 0.45430978613091383, "grad_norm": 1.0490121841430664, "learning_rate": 4.8091964661790926e-06, "loss": 0.1556, "step": 2804 }, { "epoch": 0.45447180816591054, "grad_norm": 1.058758020401001, "learning_rate": 4.80902886982757e-06, "loss": 0.1536, "step": 2805 }, { "epoch": 0.4546338302009073, "grad_norm": 0.9229541420936584, "learning_rate": 4.808861202825351e-06, "loss": 0.1402, "step": 2806 }, { "epoch": 0.4547958522359041, "grad_norm": 1.017575979232788, "learning_rate": 4.8086934651775675e-06, "loss": 0.1572, "step": 2807 }, { "epoch": 0.45495787427090084, "grad_norm": 0.8822991847991943, "learning_rate": 4.80852565688935e-06, "loss": 0.1379, "step": 2808 }, { "epoch": 0.4551198963058976, "grad_norm": 0.9270743131637573, "learning_rate": 4.8083577779658344e-06, "loss": 0.1278, "step": 2809 }, { "epoch": 0.4552819183408944, "grad_norm": 0.9304889440536499, "learning_rate": 4.808189828412157e-06, "loss": 0.1374, "step": 2810 }, { "epoch": 0.45544394037589114, "grad_norm": 0.9572131633758545, "learning_rate": 4.8080218082334566e-06, "loss": 0.1386, "step": 2811 }, { "epoch": 0.45560596241088785, "grad_norm": 0.9808574318885803, "learning_rate": 4.807853717434874e-06, "loss": 0.1678, "step": 2812 }, { "epoch": 0.4557679844458846, "grad_norm": 1.0128886699676514, "learning_rate": 4.807685556021552e-06, "loss": 0.1619, "step": 2813 }, { "epoch": 0.4559300064808814, "grad_norm": 1.1356350183486938, "learning_rate": 4.807517323998637e-06, "loss": 0.1639, "step": 2814 }, { "epoch": 0.45609202851587816, "grad_norm": 0.9979428052902222, "learning_rate": 4.807349021371276e-06, "loss": 0.1587, "step": 2815 }, { "epoch": 0.4562540505508749, "grad_norm": 0.9667853116989136, "learning_rate": 4.8071806481446194e-06, "loss": 0.1401, "step": 2816 }, { "epoch": 0.4564160725858717, "grad_norm": 0.9587422013282776, "learning_rate": 4.807012204323817e-06, "loss": 0.1515, "step": 2817 }, { "epoch": 0.45657809462086846, "grad_norm": 0.9749378561973572, "learning_rate": 4.806843689914025e-06, "loss": 0.1488, "step": 2818 }, { "epoch": 0.4567401166558652, "grad_norm": 1.0103932619094849, "learning_rate": 4.806675104920397e-06, "loss": 0.1579, "step": 2819 }, { "epoch": 0.45690213869086194, "grad_norm": 1.1067167520523071, "learning_rate": 4.806506449348094e-06, "loss": 0.1602, "step": 2820 }, { "epoch": 0.4570641607258587, "grad_norm": 1.0808192491531372, "learning_rate": 4.8063377232022755e-06, "loss": 0.1781, "step": 2821 }, { "epoch": 0.45722618276085547, "grad_norm": 0.9388707280158997, "learning_rate": 4.8061689264881036e-06, "loss": 0.1362, "step": 2822 }, { "epoch": 0.45738820479585224, "grad_norm": 0.9397252202033997, "learning_rate": 4.806000059210744e-06, "loss": 0.1385, "step": 2823 }, { "epoch": 0.457550226830849, "grad_norm": 1.0616799592971802, "learning_rate": 4.805831121375361e-06, "loss": 0.1753, "step": 2824 }, { "epoch": 0.45771224886584577, "grad_norm": 0.973735511302948, "learning_rate": 4.805662112987127e-06, "loss": 0.152, "step": 2825 }, { "epoch": 0.45787427090084254, "grad_norm": 0.9995198845863342, "learning_rate": 4.805493034051212e-06, "loss": 0.1607, "step": 2826 }, { "epoch": 0.45803629293583925, "grad_norm": 0.9708908200263977, "learning_rate": 4.80532388457279e-06, "loss": 0.141, "step": 2827 }, { "epoch": 0.458198314970836, "grad_norm": 1.112001895904541, "learning_rate": 4.805154664557034e-06, "loss": 0.1623, "step": 2828 }, { "epoch": 0.4583603370058328, "grad_norm": 0.9360017776489258, "learning_rate": 4.804985374009125e-06, "loss": 0.1294, "step": 2829 }, { "epoch": 0.45852235904082955, "grad_norm": 0.8773815631866455, "learning_rate": 4.804816012934242e-06, "loss": 0.1369, "step": 2830 }, { "epoch": 0.4586843810758263, "grad_norm": 0.9873615503311157, "learning_rate": 4.8046465813375655e-06, "loss": 0.1471, "step": 2831 }, { "epoch": 0.4588464031108231, "grad_norm": 1.0159286260604858, "learning_rate": 4.8044770792242815e-06, "loss": 0.1654, "step": 2832 }, { "epoch": 0.45900842514581985, "grad_norm": 0.9192469120025635, "learning_rate": 4.8043075065995755e-06, "loss": 0.1376, "step": 2833 }, { "epoch": 0.45917044718081657, "grad_norm": 0.8338161110877991, "learning_rate": 4.8041378634686355e-06, "loss": 0.13, "step": 2834 }, { "epoch": 0.45933246921581333, "grad_norm": 0.9531093835830688, "learning_rate": 4.803968149836653e-06, "loss": 0.1326, "step": 2835 }, { "epoch": 0.4594944912508101, "grad_norm": 1.2418529987335205, "learning_rate": 4.803798365708821e-06, "loss": 0.1606, "step": 2836 }, { "epoch": 0.45965651328580687, "grad_norm": 0.956653892993927, "learning_rate": 4.803628511090333e-06, "loss": 0.1383, "step": 2837 }, { "epoch": 0.45981853532080363, "grad_norm": 0.9836128950119019, "learning_rate": 4.803458585986389e-06, "loss": 0.1557, "step": 2838 }, { "epoch": 0.4599805573558004, "grad_norm": 1.1878564357757568, "learning_rate": 4.803288590402185e-06, "loss": 0.1793, "step": 2839 }, { "epoch": 0.46014257939079717, "grad_norm": 1.0045480728149414, "learning_rate": 4.803118524342925e-06, "loss": 0.1658, "step": 2840 }, { "epoch": 0.46030460142579394, "grad_norm": 1.1326467990875244, "learning_rate": 4.802948387813812e-06, "loss": 0.1524, "step": 2841 }, { "epoch": 0.46046662346079065, "grad_norm": 0.9452898502349854, "learning_rate": 4.80277818082005e-06, "loss": 0.149, "step": 2842 }, { "epoch": 0.4606286454957874, "grad_norm": 1.0143297910690308, "learning_rate": 4.802607903366849e-06, "loss": 0.1439, "step": 2843 }, { "epoch": 0.4607906675307842, "grad_norm": 0.9265870451927185, "learning_rate": 4.802437555459418e-06, "loss": 0.1461, "step": 2844 }, { "epoch": 0.46095268956578095, "grad_norm": 0.9968574643135071, "learning_rate": 4.80226713710297e-06, "loss": 0.1549, "step": 2845 }, { "epoch": 0.4611147116007777, "grad_norm": 0.9609559774398804, "learning_rate": 4.802096648302718e-06, "loss": 0.1511, "step": 2846 }, { "epoch": 0.4612767336357745, "grad_norm": 1.0482757091522217, "learning_rate": 4.8019260890638805e-06, "loss": 0.1849, "step": 2847 }, { "epoch": 0.46143875567077125, "grad_norm": 1.261745572090149, "learning_rate": 4.801755459391675e-06, "loss": 0.2021, "step": 2848 }, { "epoch": 0.46160077770576796, "grad_norm": 1.0810171365737915, "learning_rate": 4.801584759291323e-06, "loss": 0.1696, "step": 2849 }, { "epoch": 0.46176279974076473, "grad_norm": 1.0707732439041138, "learning_rate": 4.801413988768047e-06, "loss": 0.1727, "step": 2850 }, { "epoch": 0.4619248217757615, "grad_norm": 1.021913766860962, "learning_rate": 4.8012431478270716e-06, "loss": 0.1585, "step": 2851 }, { "epoch": 0.46208684381075826, "grad_norm": 0.9242175817489624, "learning_rate": 4.801072236473625e-06, "loss": 0.1406, "step": 2852 }, { "epoch": 0.46224886584575503, "grad_norm": 0.953057587146759, "learning_rate": 4.800901254712936e-06, "loss": 0.1605, "step": 2853 }, { "epoch": 0.4624108878807518, "grad_norm": 0.9746292233467102, "learning_rate": 4.800730202550237e-06, "loss": 0.141, "step": 2854 }, { "epoch": 0.46257290991574856, "grad_norm": 0.9918842315673828, "learning_rate": 4.800559079990762e-06, "loss": 0.1639, "step": 2855 }, { "epoch": 0.4627349319507453, "grad_norm": 0.9181191325187683, "learning_rate": 4.800387887039747e-06, "loss": 0.1396, "step": 2856 }, { "epoch": 0.46289695398574204, "grad_norm": 1.0667685270309448, "learning_rate": 4.800216623702428e-06, "loss": 0.1612, "step": 2857 }, { "epoch": 0.4630589760207388, "grad_norm": 0.8903344869613647, "learning_rate": 4.800045289984047e-06, "loss": 0.1219, "step": 2858 }, { "epoch": 0.4632209980557356, "grad_norm": 1.1288483142852783, "learning_rate": 4.7998738858898475e-06, "loss": 0.1552, "step": 2859 }, { "epoch": 0.46338302009073234, "grad_norm": 0.9569337964057922, "learning_rate": 4.799702411425071e-06, "loss": 0.1304, "step": 2860 }, { "epoch": 0.4635450421257291, "grad_norm": 0.9571936130523682, "learning_rate": 4.799530866594967e-06, "loss": 0.1502, "step": 2861 }, { "epoch": 0.4637070641607259, "grad_norm": 0.955163836479187, "learning_rate": 4.7993592514047825e-06, "loss": 0.1475, "step": 2862 }, { "epoch": 0.4638690861957226, "grad_norm": 0.8739729523658752, "learning_rate": 4.79918756585977e-06, "loss": 0.1334, "step": 2863 }, { "epoch": 0.46403110823071936, "grad_norm": 1.0791646242141724, "learning_rate": 4.7990158099651815e-06, "loss": 0.1593, "step": 2864 }, { "epoch": 0.4641931302657161, "grad_norm": 1.0804657936096191, "learning_rate": 4.798843983726272e-06, "loss": 0.148, "step": 2865 }, { "epoch": 0.4643551523007129, "grad_norm": 0.9415281414985657, "learning_rate": 4.798672087148301e-06, "loss": 0.1481, "step": 2866 }, { "epoch": 0.46451717433570966, "grad_norm": 0.9962906837463379, "learning_rate": 4.798500120236526e-06, "loss": 0.154, "step": 2867 }, { "epoch": 0.4646791963707064, "grad_norm": 1.016023874282837, "learning_rate": 4.79832808299621e-06, "loss": 0.1592, "step": 2868 }, { "epoch": 0.4648412184057032, "grad_norm": 0.8804002404212952, "learning_rate": 4.7981559754326154e-06, "loss": 0.1345, "step": 2869 }, { "epoch": 0.46500324044069996, "grad_norm": 0.9187561869621277, "learning_rate": 4.797983797551011e-06, "loss": 0.1399, "step": 2870 }, { "epoch": 0.4651652624756967, "grad_norm": 0.9533474445343018, "learning_rate": 4.797811549356662e-06, "loss": 0.1513, "step": 2871 }, { "epoch": 0.46532728451069344, "grad_norm": 1.0123627185821533, "learning_rate": 4.7976392308548416e-06, "loss": 0.1563, "step": 2872 }, { "epoch": 0.4654893065456902, "grad_norm": 1.0109021663665771, "learning_rate": 4.7974668420508195e-06, "loss": 0.1527, "step": 2873 }, { "epoch": 0.465651328580687, "grad_norm": 1.1355345249176025, "learning_rate": 4.797294382949873e-06, "loss": 0.1783, "step": 2874 }, { "epoch": 0.46581335061568374, "grad_norm": 1.0185506343841553, "learning_rate": 4.797121853557277e-06, "loss": 0.1591, "step": 2875 }, { "epoch": 0.4659753726506805, "grad_norm": 0.9530416131019592, "learning_rate": 4.796949253878311e-06, "loss": 0.1403, "step": 2876 }, { "epoch": 0.4661373946856773, "grad_norm": 0.9338066577911377, "learning_rate": 4.796776583918256e-06, "loss": 0.1505, "step": 2877 }, { "epoch": 0.466299416720674, "grad_norm": 0.8232749700546265, "learning_rate": 4.796603843682397e-06, "loss": 0.1251, "step": 2878 }, { "epoch": 0.46646143875567075, "grad_norm": 1.013901710510254, "learning_rate": 4.7964310331760174e-06, "loss": 0.1506, "step": 2879 }, { "epoch": 0.4666234607906675, "grad_norm": 1.042008399963379, "learning_rate": 4.796258152404406e-06, "loss": 0.1578, "step": 2880 }, { "epoch": 0.4667854828256643, "grad_norm": 1.0777783393859863, "learning_rate": 4.796085201372852e-06, "loss": 0.1805, "step": 2881 }, { "epoch": 0.46694750486066106, "grad_norm": 0.9231262803077698, "learning_rate": 4.795912180086646e-06, "loss": 0.1404, "step": 2882 }, { "epoch": 0.4671095268956578, "grad_norm": 0.972801685333252, "learning_rate": 4.795739088551084e-06, "loss": 0.1478, "step": 2883 }, { "epoch": 0.4672715489306546, "grad_norm": 0.9988305568695068, "learning_rate": 4.795565926771461e-06, "loss": 0.1507, "step": 2884 }, { "epoch": 0.4674335709656513, "grad_norm": 0.9387997388839722, "learning_rate": 4.795392694753077e-06, "loss": 0.1514, "step": 2885 }, { "epoch": 0.46759559300064807, "grad_norm": 0.9761515855789185, "learning_rate": 4.79521939250123e-06, "loss": 0.1604, "step": 2886 }, { "epoch": 0.46775761503564484, "grad_norm": 0.9907017946243286, "learning_rate": 4.7950460200212244e-06, "loss": 0.1448, "step": 2887 }, { "epoch": 0.4679196370706416, "grad_norm": 1.0086826086044312, "learning_rate": 4.7948725773183645e-06, "loss": 0.1539, "step": 2888 }, { "epoch": 0.46808165910563837, "grad_norm": 0.9991224408149719, "learning_rate": 4.794699064397957e-06, "loss": 0.156, "step": 2889 }, { "epoch": 0.46824368114063514, "grad_norm": 1.0062789916992188, "learning_rate": 4.794525481265312e-06, "loss": 0.1594, "step": 2890 }, { "epoch": 0.4684057031756319, "grad_norm": 1.0728275775909424, "learning_rate": 4.794351827925739e-06, "loss": 0.156, "step": 2891 }, { "epoch": 0.46856772521062867, "grad_norm": 0.9946532249450684, "learning_rate": 4.794178104384554e-06, "loss": 0.1754, "step": 2892 }, { "epoch": 0.4687297472456254, "grad_norm": 1.0101428031921387, "learning_rate": 4.794004310647069e-06, "loss": 0.1571, "step": 2893 }, { "epoch": 0.46889176928062215, "grad_norm": 0.8463723063468933, "learning_rate": 4.7938304467186036e-06, "loss": 0.1326, "step": 2894 }, { "epoch": 0.4690537913156189, "grad_norm": 0.8931572437286377, "learning_rate": 4.793656512604478e-06, "loss": 0.1415, "step": 2895 }, { "epoch": 0.4692158133506157, "grad_norm": 1.0302973985671997, "learning_rate": 4.793482508310014e-06, "loss": 0.1637, "step": 2896 }, { "epoch": 0.46937783538561245, "grad_norm": 1.800075888633728, "learning_rate": 4.793308433840534e-06, "loss": 0.1476, "step": 2897 }, { "epoch": 0.4695398574206092, "grad_norm": 0.8768447041511536, "learning_rate": 4.793134289201367e-06, "loss": 0.1269, "step": 2898 }, { "epoch": 0.469701879455606, "grad_norm": 0.9154813289642334, "learning_rate": 4.792960074397839e-06, "loss": 0.1353, "step": 2899 }, { "epoch": 0.4698639014906027, "grad_norm": 1.0366954803466797, "learning_rate": 4.792785789435283e-06, "loss": 0.1506, "step": 2900 }, { "epoch": 0.47002592352559946, "grad_norm": 0.8922173380851746, "learning_rate": 4.792611434319029e-06, "loss": 0.1285, "step": 2901 }, { "epoch": 0.47018794556059623, "grad_norm": 0.9748638272285461, "learning_rate": 4.792437009054413e-06, "loss": 0.1438, "step": 2902 }, { "epoch": 0.470349967595593, "grad_norm": 1.032471776008606, "learning_rate": 4.792262513646773e-06, "loss": 0.1685, "step": 2903 }, { "epoch": 0.47051198963058977, "grad_norm": 1.1114881038665771, "learning_rate": 4.792087948101447e-06, "loss": 0.1312, "step": 2904 }, { "epoch": 0.47067401166558653, "grad_norm": 2.0958445072174072, "learning_rate": 4.791913312423776e-06, "loss": 0.1668, "step": 2905 }, { "epoch": 0.4708360337005833, "grad_norm": 1.0381637811660767, "learning_rate": 4.791738606619105e-06, "loss": 0.1517, "step": 2906 }, { "epoch": 0.47099805573558, "grad_norm": 0.963405191898346, "learning_rate": 4.7915638306927775e-06, "loss": 0.1395, "step": 2907 }, { "epoch": 0.4711600777705768, "grad_norm": 1.0255495309829712, "learning_rate": 4.791388984650143e-06, "loss": 0.1537, "step": 2908 }, { "epoch": 0.47132209980557355, "grad_norm": 0.9075080156326294, "learning_rate": 4.79121406849655e-06, "loss": 0.142, "step": 2909 }, { "epoch": 0.4714841218405703, "grad_norm": 0.981643557548523, "learning_rate": 4.791039082237352e-06, "loss": 0.1381, "step": 2910 }, { "epoch": 0.4716461438755671, "grad_norm": 1.0010422468185425, "learning_rate": 4.790864025877902e-06, "loss": 0.1668, "step": 2911 }, { "epoch": 0.47180816591056385, "grad_norm": 1.1024540662765503, "learning_rate": 4.790688899423556e-06, "loss": 0.1753, "step": 2912 }, { "epoch": 0.4719701879455606, "grad_norm": 0.8568621277809143, "learning_rate": 4.790513702879673e-06, "loss": 0.1248, "step": 2913 }, { "epoch": 0.4721322099805574, "grad_norm": 0.8953239917755127, "learning_rate": 4.7903384362516135e-06, "loss": 0.1436, "step": 2914 }, { "epoch": 0.4722942320155541, "grad_norm": 1.0069905519485474, "learning_rate": 4.790163099544741e-06, "loss": 0.1673, "step": 2915 }, { "epoch": 0.47245625405055086, "grad_norm": 0.9999399185180664, "learning_rate": 4.78998769276442e-06, "loss": 0.1499, "step": 2916 }, { "epoch": 0.47261827608554763, "grad_norm": 1.05617356300354, "learning_rate": 4.789812215916016e-06, "loss": 0.1648, "step": 2917 }, { "epoch": 0.4727802981205444, "grad_norm": 0.9993578195571899, "learning_rate": 4.7896366690049016e-06, "loss": 0.1546, "step": 2918 }, { "epoch": 0.47294232015554116, "grad_norm": 0.8876897692680359, "learning_rate": 4.789461052036444e-06, "loss": 0.1327, "step": 2919 }, { "epoch": 0.47310434219053793, "grad_norm": 0.9634594321250916, "learning_rate": 4.789285365016019e-06, "loss": 0.1576, "step": 2920 }, { "epoch": 0.4732663642255347, "grad_norm": 0.8948929905891418, "learning_rate": 4.7891096079490016e-06, "loss": 0.137, "step": 2921 }, { "epoch": 0.4734283862605314, "grad_norm": 1.0438789129257202, "learning_rate": 4.788933780840771e-06, "loss": 0.1549, "step": 2922 }, { "epoch": 0.4735904082955282, "grad_norm": 1.0046937465667725, "learning_rate": 4.7887578836967045e-06, "loss": 0.1338, "step": 2923 }, { "epoch": 0.47375243033052494, "grad_norm": 0.9940471649169922, "learning_rate": 4.788581916522186e-06, "loss": 0.1524, "step": 2924 }, { "epoch": 0.4739144523655217, "grad_norm": 0.9437041878700256, "learning_rate": 4.788405879322599e-06, "loss": 0.1619, "step": 2925 }, { "epoch": 0.4740764744005185, "grad_norm": 0.9506624937057495, "learning_rate": 4.78822977210333e-06, "loss": 0.1405, "step": 2926 }, { "epoch": 0.47423849643551524, "grad_norm": 1.014735460281372, "learning_rate": 4.788053594869767e-06, "loss": 0.132, "step": 2927 }, { "epoch": 0.474400518470512, "grad_norm": 1.1211403608322144, "learning_rate": 4.787877347627302e-06, "loss": 0.1818, "step": 2928 }, { "epoch": 0.4745625405055087, "grad_norm": 1.0037801265716553, "learning_rate": 4.787701030381326e-06, "loss": 0.1575, "step": 2929 }, { "epoch": 0.4747245625405055, "grad_norm": 0.911635160446167, "learning_rate": 4.787524643137235e-06, "loss": 0.1325, "step": 2930 }, { "epoch": 0.47488658457550226, "grad_norm": 0.9881319999694824, "learning_rate": 4.7873481859004245e-06, "loss": 0.1486, "step": 2931 }, { "epoch": 0.475048606610499, "grad_norm": 1.0285166501998901, "learning_rate": 4.7871716586762965e-06, "loss": 0.1545, "step": 2932 }, { "epoch": 0.4752106286454958, "grad_norm": 1.2436772584915161, "learning_rate": 4.786995061470249e-06, "loss": 0.1594, "step": 2933 }, { "epoch": 0.47537265068049256, "grad_norm": 1.0951595306396484, "learning_rate": 4.786818394287688e-06, "loss": 0.1712, "step": 2934 }, { "epoch": 0.4755346727154893, "grad_norm": 1.0446057319641113, "learning_rate": 4.786641657134017e-06, "loss": 0.1583, "step": 2935 }, { "epoch": 0.4756966947504861, "grad_norm": 0.9601659774780273, "learning_rate": 4.786464850014646e-06, "loss": 0.1574, "step": 2936 }, { "epoch": 0.4758587167854828, "grad_norm": 0.9458787441253662, "learning_rate": 4.786287972934984e-06, "loss": 0.1306, "step": 2937 }, { "epoch": 0.47602073882047957, "grad_norm": 1.0759549140930176, "learning_rate": 4.786111025900442e-06, "loss": 0.1646, "step": 2938 }, { "epoch": 0.47618276085547634, "grad_norm": 0.9733848571777344, "learning_rate": 4.785934008916435e-06, "loss": 0.1616, "step": 2939 }, { "epoch": 0.4763447828904731, "grad_norm": 0.9360319375991821, "learning_rate": 4.785756921988379e-06, "loss": 0.1504, "step": 2940 }, { "epoch": 0.4765068049254699, "grad_norm": 0.9589405059814453, "learning_rate": 4.785579765121693e-06, "loss": 0.1566, "step": 2941 }, { "epoch": 0.47666882696046664, "grad_norm": 1.0184801816940308, "learning_rate": 4.785402538321798e-06, "loss": 0.1502, "step": 2942 }, { "epoch": 0.4768308489954634, "grad_norm": 1.011683702468872, "learning_rate": 4.785225241594114e-06, "loss": 0.165, "step": 2943 }, { "epoch": 0.4769928710304601, "grad_norm": 0.9309144020080566, "learning_rate": 4.785047874944069e-06, "loss": 0.1353, "step": 2944 }, { "epoch": 0.4771548930654569, "grad_norm": 1.050207495689392, "learning_rate": 4.7848704383770875e-06, "loss": 0.1605, "step": 2945 }, { "epoch": 0.47731691510045365, "grad_norm": 0.9079956412315369, "learning_rate": 4.784692931898601e-06, "loss": 0.1404, "step": 2946 }, { "epoch": 0.4774789371354504, "grad_norm": 0.9349779486656189, "learning_rate": 4.784515355514039e-06, "loss": 0.1332, "step": 2947 }, { "epoch": 0.4776409591704472, "grad_norm": 1.089605689048767, "learning_rate": 4.7843377092288365e-06, "loss": 0.1575, "step": 2948 }, { "epoch": 0.47780298120544396, "grad_norm": 1.0942819118499756, "learning_rate": 4.784159993048427e-06, "loss": 0.1675, "step": 2949 }, { "epoch": 0.4779650032404407, "grad_norm": 0.9137088656425476, "learning_rate": 4.7839822069782505e-06, "loss": 0.1388, "step": 2950 }, { "epoch": 0.47812702527543743, "grad_norm": 0.9597694277763367, "learning_rate": 4.783804351023745e-06, "loss": 0.155, "step": 2951 }, { "epoch": 0.4782890473104342, "grad_norm": 0.9747596383094788, "learning_rate": 4.783626425190353e-06, "loss": 0.1504, "step": 2952 }, { "epoch": 0.47845106934543097, "grad_norm": 0.9618208408355713, "learning_rate": 4.783448429483518e-06, "loss": 0.1479, "step": 2953 }, { "epoch": 0.47861309138042774, "grad_norm": 0.9732487201690674, "learning_rate": 4.783270363908687e-06, "loss": 0.1515, "step": 2954 }, { "epoch": 0.4787751134154245, "grad_norm": 1.075685977935791, "learning_rate": 4.78309222847131e-06, "loss": 0.1613, "step": 2955 }, { "epoch": 0.47893713545042127, "grad_norm": 0.9794430136680603, "learning_rate": 4.782914023176834e-06, "loss": 0.1425, "step": 2956 }, { "epoch": 0.47909915748541804, "grad_norm": 1.0761573314666748, "learning_rate": 4.782735748030714e-06, "loss": 0.1697, "step": 2957 }, { "epoch": 0.4792611795204148, "grad_norm": 1.1438524723052979, "learning_rate": 4.782557403038404e-06, "loss": 0.1605, "step": 2958 }, { "epoch": 0.4794232015554115, "grad_norm": 0.9872993230819702, "learning_rate": 4.782378988205362e-06, "loss": 0.1534, "step": 2959 }, { "epoch": 0.4795852235904083, "grad_norm": 0.964551568031311, "learning_rate": 4.7822005035370455e-06, "loss": 0.1568, "step": 2960 }, { "epoch": 0.47974724562540505, "grad_norm": 0.9149985313415527, "learning_rate": 4.782021949038916e-06, "loss": 0.138, "step": 2961 }, { "epoch": 0.4799092676604018, "grad_norm": 0.9830363392829895, "learning_rate": 4.781843324716437e-06, "loss": 0.1573, "step": 2962 }, { "epoch": 0.4800712896953986, "grad_norm": 0.9936883449554443, "learning_rate": 4.781664630575076e-06, "loss": 0.1575, "step": 2963 }, { "epoch": 0.48023331173039535, "grad_norm": 1.0400924682617188, "learning_rate": 4.7814858666202975e-06, "loss": 0.1731, "step": 2964 }, { "epoch": 0.4803953337653921, "grad_norm": 0.9391010999679565, "learning_rate": 4.781307032857573e-06, "loss": 0.1357, "step": 2965 }, { "epoch": 0.48055735580038883, "grad_norm": 1.0018044710159302, "learning_rate": 4.781128129292374e-06, "loss": 0.1614, "step": 2966 }, { "epoch": 0.4807193778353856, "grad_norm": 0.8832952976226807, "learning_rate": 4.780949155930174e-06, "loss": 0.142, "step": 2967 }, { "epoch": 0.48088139987038236, "grad_norm": 1.0228934288024902, "learning_rate": 4.7807701127764506e-06, "loss": 0.166, "step": 2968 }, { "epoch": 0.48104342190537913, "grad_norm": 0.8861875534057617, "learning_rate": 4.78059099983668e-06, "loss": 0.1308, "step": 2969 }, { "epoch": 0.4812054439403759, "grad_norm": 1.0241353511810303, "learning_rate": 4.780411817116344e-06, "loss": 0.167, "step": 2970 }, { "epoch": 0.48136746597537267, "grad_norm": 1.0048408508300781, "learning_rate": 4.7802325646209255e-06, "loss": 0.1637, "step": 2971 }, { "epoch": 0.48152948801036943, "grad_norm": 0.9971985816955566, "learning_rate": 4.780053242355908e-06, "loss": 0.1671, "step": 2972 }, { "epoch": 0.48169151004536614, "grad_norm": 1.110739827156067, "learning_rate": 4.779873850326778e-06, "loss": 0.1776, "step": 2973 }, { "epoch": 0.4818535320803629, "grad_norm": 0.9932035207748413, "learning_rate": 4.779694388539027e-06, "loss": 0.1502, "step": 2974 }, { "epoch": 0.4820155541153597, "grad_norm": 0.9594526290893555, "learning_rate": 4.779514856998144e-06, "loss": 0.1553, "step": 2975 }, { "epoch": 0.48217757615035645, "grad_norm": 0.9843863844871521, "learning_rate": 4.779335255709623e-06, "loss": 0.1436, "step": 2976 }, { "epoch": 0.4823395981853532, "grad_norm": 1.0879184007644653, "learning_rate": 4.7791555846789585e-06, "loss": 0.1591, "step": 2977 }, { "epoch": 0.48250162022035, "grad_norm": 1.059070348739624, "learning_rate": 4.778975843911649e-06, "loss": 0.1468, "step": 2978 }, { "epoch": 0.48266364225534675, "grad_norm": 0.9296880960464478, "learning_rate": 4.778796033413193e-06, "loss": 0.1387, "step": 2979 }, { "epoch": 0.48282566429034346, "grad_norm": 1.030938982963562, "learning_rate": 4.778616153189093e-06, "loss": 0.1576, "step": 2980 }, { "epoch": 0.4829876863253402, "grad_norm": 0.967088520526886, "learning_rate": 4.778436203244853e-06, "loss": 0.1516, "step": 2981 }, { "epoch": 0.483149708360337, "grad_norm": 0.9913309216499329, "learning_rate": 4.7782561835859795e-06, "loss": 0.1429, "step": 2982 }, { "epoch": 0.48331173039533376, "grad_norm": 1.0217386484146118, "learning_rate": 4.77807609421798e-06, "loss": 0.1632, "step": 2983 }, { "epoch": 0.48347375243033053, "grad_norm": 0.9644971489906311, "learning_rate": 4.777895935146364e-06, "loss": 0.1548, "step": 2984 }, { "epoch": 0.4836357744653273, "grad_norm": 0.9488604068756104, "learning_rate": 4.777715706376645e-06, "loss": 0.1591, "step": 2985 }, { "epoch": 0.48379779650032406, "grad_norm": 1.0006232261657715, "learning_rate": 4.777535407914338e-06, "loss": 0.1522, "step": 2986 }, { "epoch": 0.48395981853532083, "grad_norm": 1.0294928550720215, "learning_rate": 4.777355039764958e-06, "loss": 0.1633, "step": 2987 }, { "epoch": 0.48412184057031754, "grad_norm": 0.8555730581283569, "learning_rate": 4.777174601934026e-06, "loss": 0.1297, "step": 2988 }, { "epoch": 0.4842838626053143, "grad_norm": 0.8943042755126953, "learning_rate": 4.776994094427061e-06, "loss": 0.1294, "step": 2989 }, { "epoch": 0.4844458846403111, "grad_norm": 0.976116418838501, "learning_rate": 4.776813517249588e-06, "loss": 0.1681, "step": 2990 }, { "epoch": 0.48460790667530784, "grad_norm": 0.9380887746810913, "learning_rate": 4.77663287040713e-06, "loss": 0.156, "step": 2991 }, { "epoch": 0.4847699287103046, "grad_norm": 0.9756038188934326, "learning_rate": 4.776452153905216e-06, "loss": 0.1531, "step": 2992 }, { "epoch": 0.4849319507453014, "grad_norm": 0.9774467945098877, "learning_rate": 4.776271367749375e-06, "loss": 0.1688, "step": 2993 }, { "epoch": 0.48509397278029814, "grad_norm": 1.0772470235824585, "learning_rate": 4.776090511945139e-06, "loss": 0.174, "step": 2994 }, { "epoch": 0.48525599481529486, "grad_norm": 0.983753502368927, "learning_rate": 4.77590958649804e-06, "loss": 0.1422, "step": 2995 }, { "epoch": 0.4854180168502916, "grad_norm": 1.1349477767944336, "learning_rate": 4.775728591413616e-06, "loss": 0.1654, "step": 2996 }, { "epoch": 0.4855800388852884, "grad_norm": 1.0424829721450806, "learning_rate": 4.775547526697405e-06, "loss": 0.1515, "step": 2997 }, { "epoch": 0.48574206092028516, "grad_norm": 1.055623173713684, "learning_rate": 4.775366392354946e-06, "loss": 0.1482, "step": 2998 }, { "epoch": 0.4859040829552819, "grad_norm": 0.9909132122993469, "learning_rate": 4.775185188391781e-06, "loss": 0.1529, "step": 2999 }, { "epoch": 0.4860661049902787, "grad_norm": 0.981861412525177, "learning_rate": 4.775003914813456e-06, "loss": 0.1505, "step": 3000 }, { "epoch": 0.48622812702527546, "grad_norm": 0.9199234247207642, "learning_rate": 4.774822571625516e-06, "loss": 0.1376, "step": 3001 }, { "epoch": 0.48639014906027217, "grad_norm": 1.0599560737609863, "learning_rate": 4.7746411588335105e-06, "loss": 0.1656, "step": 3002 }, { "epoch": 0.48655217109526894, "grad_norm": 0.9341553449630737, "learning_rate": 4.774459676442991e-06, "loss": 0.1573, "step": 3003 }, { "epoch": 0.4867141931302657, "grad_norm": 1.0569862127304077, "learning_rate": 4.774278124459509e-06, "loss": 0.1664, "step": 3004 }, { "epoch": 0.48687621516526247, "grad_norm": 0.970799446105957, "learning_rate": 4.774096502888619e-06, "loss": 0.1452, "step": 3005 }, { "epoch": 0.48703823720025924, "grad_norm": 1.03872811794281, "learning_rate": 4.773914811735879e-06, "loss": 0.173, "step": 3006 }, { "epoch": 0.487200259235256, "grad_norm": 0.8187316656112671, "learning_rate": 4.773733051006849e-06, "loss": 0.1248, "step": 3007 }, { "epoch": 0.4873622812702528, "grad_norm": 0.9771019816398621, "learning_rate": 4.773551220707091e-06, "loss": 0.1625, "step": 3008 }, { "epoch": 0.48752430330524954, "grad_norm": 0.9272666573524475, "learning_rate": 4.773369320842167e-06, "loss": 0.1544, "step": 3009 }, { "epoch": 0.48768632534024625, "grad_norm": 0.886873185634613, "learning_rate": 4.773187351417643e-06, "loss": 0.1422, "step": 3010 }, { "epoch": 0.487848347375243, "grad_norm": 1.0471525192260742, "learning_rate": 4.773005312439087e-06, "loss": 0.1653, "step": 3011 }, { "epoch": 0.4880103694102398, "grad_norm": 1.0167343616485596, "learning_rate": 4.772823203912069e-06, "loss": 0.1555, "step": 3012 }, { "epoch": 0.48817239144523655, "grad_norm": 0.9511725902557373, "learning_rate": 4.7726410258421616e-06, "loss": 0.1502, "step": 3013 }, { "epoch": 0.4883344134802333, "grad_norm": 1.0081369876861572, "learning_rate": 4.772458778234938e-06, "loss": 0.1521, "step": 3014 }, { "epoch": 0.4884964355152301, "grad_norm": 1.0319949388504028, "learning_rate": 4.772276461095975e-06, "loss": 0.1658, "step": 3015 }, { "epoch": 0.48865845755022685, "grad_norm": 1.0191717147827148, "learning_rate": 4.772094074430852e-06, "loss": 0.1671, "step": 3016 }, { "epoch": 0.48882047958522357, "grad_norm": 0.9789111614227295, "learning_rate": 4.771911618245148e-06, "loss": 0.1515, "step": 3017 }, { "epoch": 0.48898250162022033, "grad_norm": 1.0732017755508423, "learning_rate": 4.771729092544446e-06, "loss": 0.1736, "step": 3018 }, { "epoch": 0.4891445236552171, "grad_norm": 0.9397895932197571, "learning_rate": 4.771546497334331e-06, "loss": 0.1399, "step": 3019 }, { "epoch": 0.48930654569021387, "grad_norm": 0.9154207706451416, "learning_rate": 4.771363832620391e-06, "loss": 0.146, "step": 3020 }, { "epoch": 0.48946856772521063, "grad_norm": 0.9853832125663757, "learning_rate": 4.771181098408214e-06, "loss": 0.1648, "step": 3021 }, { "epoch": 0.4896305897602074, "grad_norm": 1.054186463356018, "learning_rate": 4.770998294703392e-06, "loss": 0.1758, "step": 3022 }, { "epoch": 0.48979261179520417, "grad_norm": 1.047492504119873, "learning_rate": 4.770815421511517e-06, "loss": 0.1563, "step": 3023 }, { "epoch": 0.4899546338302009, "grad_norm": 0.9914636015892029, "learning_rate": 4.7706324788381865e-06, "loss": 0.1574, "step": 3024 }, { "epoch": 0.49011665586519765, "grad_norm": 1.0021421909332275, "learning_rate": 4.770449466688996e-06, "loss": 0.1657, "step": 3025 }, { "epoch": 0.4902786779001944, "grad_norm": 0.9965320229530334, "learning_rate": 4.770266385069547e-06, "loss": 0.1498, "step": 3026 }, { "epoch": 0.4904406999351912, "grad_norm": 1.0368542671203613, "learning_rate": 4.77008323398544e-06, "loss": 0.1701, "step": 3027 }, { "epoch": 0.49060272197018795, "grad_norm": 0.9540989398956299, "learning_rate": 4.769900013442279e-06, "loss": 0.1525, "step": 3028 }, { "epoch": 0.4907647440051847, "grad_norm": 0.9963111877441406, "learning_rate": 4.769716723445672e-06, "loss": 0.1578, "step": 3029 }, { "epoch": 0.4909267660401815, "grad_norm": 0.9703772664070129, "learning_rate": 4.769533364001225e-06, "loss": 0.1462, "step": 3030 }, { "epoch": 0.49108878807517825, "grad_norm": 1.1421406269073486, "learning_rate": 4.769349935114549e-06, "loss": 0.1572, "step": 3031 }, { "epoch": 0.49125081011017496, "grad_norm": 0.9838953018188477, "learning_rate": 4.769166436791257e-06, "loss": 0.1629, "step": 3032 }, { "epoch": 0.49141283214517173, "grad_norm": 0.8788158893585205, "learning_rate": 4.768982869036964e-06, "loss": 0.1342, "step": 3033 }, { "epoch": 0.4915748541801685, "grad_norm": 0.9803071618080139, "learning_rate": 4.768799231857285e-06, "loss": 0.1427, "step": 3034 }, { "epoch": 0.49173687621516526, "grad_norm": 0.9650258421897888, "learning_rate": 4.76861552525784e-06, "loss": 0.1321, "step": 3035 }, { "epoch": 0.49189889825016203, "grad_norm": 0.854787290096283, "learning_rate": 4.768431749244251e-06, "loss": 0.1314, "step": 3036 }, { "epoch": 0.4920609202851588, "grad_norm": 1.0625677108764648, "learning_rate": 4.768247903822139e-06, "loss": 0.1617, "step": 3037 }, { "epoch": 0.49222294232015557, "grad_norm": 1.0747294425964355, "learning_rate": 4.76806398899713e-06, "loss": 0.1586, "step": 3038 }, { "epoch": 0.4923849643551523, "grad_norm": 1.0550459623336792, "learning_rate": 4.767880004774853e-06, "loss": 0.1567, "step": 3039 }, { "epoch": 0.49254698639014904, "grad_norm": 1.0270591974258423, "learning_rate": 4.767695951160934e-06, "loss": 0.1546, "step": 3040 }, { "epoch": 0.4927090084251458, "grad_norm": 0.9791101813316345, "learning_rate": 4.767511828161008e-06, "loss": 0.1442, "step": 3041 }, { "epoch": 0.4928710304601426, "grad_norm": 1.110756754875183, "learning_rate": 4.767327635780707e-06, "loss": 0.1766, "step": 3042 }, { "epoch": 0.49303305249513935, "grad_norm": 1.0451438426971436, "learning_rate": 4.7671433740256664e-06, "loss": 0.1476, "step": 3043 }, { "epoch": 0.4931950745301361, "grad_norm": 1.0751953125, "learning_rate": 4.7669590429015265e-06, "loss": 0.1748, "step": 3044 }, { "epoch": 0.4933570965651329, "grad_norm": 0.9066848158836365, "learning_rate": 4.766774642413925e-06, "loss": 0.1459, "step": 3045 }, { "epoch": 0.4935191186001296, "grad_norm": 0.9705883860588074, "learning_rate": 4.7665901725685045e-06, "loss": 0.1495, "step": 3046 }, { "epoch": 0.49368114063512636, "grad_norm": 1.0257165431976318, "learning_rate": 4.76640563337091e-06, "loss": 0.1661, "step": 3047 }, { "epoch": 0.4938431626701231, "grad_norm": 0.933527410030365, "learning_rate": 4.766221024826788e-06, "loss": 0.1444, "step": 3048 }, { "epoch": 0.4940051847051199, "grad_norm": 0.9257835745811462, "learning_rate": 4.766036346941787e-06, "loss": 0.1586, "step": 3049 }, { "epoch": 0.49416720674011666, "grad_norm": 1.0416429042816162, "learning_rate": 4.765851599721557e-06, "loss": 0.1379, "step": 3050 }, { "epoch": 0.4943292287751134, "grad_norm": 1.0839523077011108, "learning_rate": 4.7656667831717514e-06, "loss": 0.1593, "step": 3051 }, { "epoch": 0.4944912508101102, "grad_norm": 1.0328129529953003, "learning_rate": 4.765481897298025e-06, "loss": 0.1518, "step": 3052 }, { "epoch": 0.49465327284510696, "grad_norm": 1.078291654586792, "learning_rate": 4.765296942106035e-06, "loss": 0.1678, "step": 3053 }, { "epoch": 0.4948152948801037, "grad_norm": 0.9669474363327026, "learning_rate": 4.76511191760144e-06, "loss": 0.1472, "step": 3054 }, { "epoch": 0.49497731691510044, "grad_norm": 0.9748349189758301, "learning_rate": 4.764926823789903e-06, "loss": 0.1488, "step": 3055 }, { "epoch": 0.4951393389500972, "grad_norm": 0.9624823927879333, "learning_rate": 4.764741660677085e-06, "loss": 0.1376, "step": 3056 }, { "epoch": 0.495301360985094, "grad_norm": 1.2067596912384033, "learning_rate": 4.7645564282686534e-06, "loss": 0.189, "step": 3057 }, { "epoch": 0.49546338302009074, "grad_norm": 1.1277415752410889, "learning_rate": 4.764371126570275e-06, "loss": 0.1515, "step": 3058 }, { "epoch": 0.4956254050550875, "grad_norm": 1.0058308839797974, "learning_rate": 4.76418575558762e-06, "loss": 0.1585, "step": 3059 }, { "epoch": 0.4957874270900843, "grad_norm": 0.9348163604736328, "learning_rate": 4.76400031532636e-06, "loss": 0.1388, "step": 3060 }, { "epoch": 0.495949449125081, "grad_norm": 0.9315893054008484, "learning_rate": 4.763814805792169e-06, "loss": 0.1327, "step": 3061 }, { "epoch": 0.49611147116007775, "grad_norm": 0.9435446262359619, "learning_rate": 4.763629226990724e-06, "loss": 0.1536, "step": 3062 }, { "epoch": 0.4962734931950745, "grad_norm": 1.0565837621688843, "learning_rate": 4.763443578927701e-06, "loss": 0.1814, "step": 3063 }, { "epoch": 0.4964355152300713, "grad_norm": 0.9601361155509949, "learning_rate": 4.763257861608783e-06, "loss": 0.1412, "step": 3064 }, { "epoch": 0.49659753726506806, "grad_norm": 0.9786501526832581, "learning_rate": 4.763072075039651e-06, "loss": 0.1485, "step": 3065 }, { "epoch": 0.4967595593000648, "grad_norm": 0.9912653565406799, "learning_rate": 4.762886219225991e-06, "loss": 0.1415, "step": 3066 }, { "epoch": 0.4969215813350616, "grad_norm": 0.8689770102500916, "learning_rate": 4.762700294173487e-06, "loss": 0.1392, "step": 3067 }, { "epoch": 0.4970836033700583, "grad_norm": 1.0026378631591797, "learning_rate": 4.762514299887831e-06, "loss": 0.1389, "step": 3068 }, { "epoch": 0.49724562540505507, "grad_norm": 0.907612144947052, "learning_rate": 4.762328236374713e-06, "loss": 0.1334, "step": 3069 }, { "epoch": 0.49740764744005184, "grad_norm": 1.0089789628982544, "learning_rate": 4.762142103639824e-06, "loss": 0.1653, "step": 3070 }, { "epoch": 0.4975696694750486, "grad_norm": 1.0177544355392456, "learning_rate": 4.761955901688862e-06, "loss": 0.1704, "step": 3071 }, { "epoch": 0.49773169151004537, "grad_norm": 1.010158658027649, "learning_rate": 4.761769630527523e-06, "loss": 0.1641, "step": 3072 }, { "epoch": 0.49789371354504214, "grad_norm": 0.8676300644874573, "learning_rate": 4.761583290161507e-06, "loss": 0.1422, "step": 3073 }, { "epoch": 0.4980557355800389, "grad_norm": 0.9401457905769348, "learning_rate": 4.761396880596515e-06, "loss": 0.1441, "step": 3074 }, { "epoch": 0.4982177576150357, "grad_norm": 1.0235344171524048, "learning_rate": 4.761210401838251e-06, "loss": 0.1493, "step": 3075 }, { "epoch": 0.4983797796500324, "grad_norm": 0.9820840358734131, "learning_rate": 4.76102385389242e-06, "loss": 0.135, "step": 3076 }, { "epoch": 0.49854180168502915, "grad_norm": 0.96462482213974, "learning_rate": 4.760837236764731e-06, "loss": 0.1605, "step": 3077 }, { "epoch": 0.4987038237200259, "grad_norm": 0.929373025894165, "learning_rate": 4.760650550460895e-06, "loss": 0.1183, "step": 3078 }, { "epoch": 0.4988658457550227, "grad_norm": 0.9171031713485718, "learning_rate": 4.760463794986622e-06, "loss": 0.1457, "step": 3079 }, { "epoch": 0.49902786779001945, "grad_norm": 0.9517503976821899, "learning_rate": 4.760276970347627e-06, "loss": 0.136, "step": 3080 }, { "epoch": 0.4991898898250162, "grad_norm": 0.9725329875946045, "learning_rate": 4.760090076549626e-06, "loss": 0.152, "step": 3081 }, { "epoch": 0.499351911860013, "grad_norm": 0.9958840012550354, "learning_rate": 4.759903113598338e-06, "loss": 0.1427, "step": 3082 }, { "epoch": 0.4995139338950097, "grad_norm": 1.0073212385177612, "learning_rate": 4.759716081499484e-06, "loss": 0.154, "step": 3083 }, { "epoch": 0.49967595593000647, "grad_norm": 0.9372782707214355, "learning_rate": 4.759528980258786e-06, "loss": 0.1594, "step": 3084 }, { "epoch": 0.49983797796500323, "grad_norm": 0.9391396641731262, "learning_rate": 4.7593418098819695e-06, "loss": 0.1285, "step": 3085 }, { "epoch": 0.5, "grad_norm": 0.9143754839897156, "learning_rate": 4.759154570374761e-06, "loss": 0.1312, "step": 3086 }, { "epoch": 0.5001620220349967, "grad_norm": 1.103759765625, "learning_rate": 4.75896726174289e-06, "loss": 0.1854, "step": 3087 }, { "epoch": 0.5003240440699935, "grad_norm": 0.9957083463668823, "learning_rate": 4.758779883992087e-06, "loss": 0.1459, "step": 3088 }, { "epoch": 0.5004860661049902, "grad_norm": 1.0584758520126343, "learning_rate": 4.758592437128086e-06, "loss": 0.1514, "step": 3089 }, { "epoch": 0.5006480881399871, "grad_norm": 0.9559106230735779, "learning_rate": 4.758404921156622e-06, "loss": 0.1483, "step": 3090 }, { "epoch": 0.5008101101749838, "grad_norm": 1.1724073886871338, "learning_rate": 4.7582173360834326e-06, "loss": 0.1854, "step": 3091 }, { "epoch": 0.5009721322099806, "grad_norm": 1.016783595085144, "learning_rate": 4.7580296819142565e-06, "loss": 0.1646, "step": 3092 }, { "epoch": 0.5011341542449773, "grad_norm": 0.9434977769851685, "learning_rate": 4.757841958654838e-06, "loss": 0.1467, "step": 3093 }, { "epoch": 0.501296176279974, "grad_norm": 0.8541172742843628, "learning_rate": 4.757654166310919e-06, "loss": 0.1325, "step": 3094 }, { "epoch": 0.5014581983149708, "grad_norm": 0.8900263905525208, "learning_rate": 4.757466304888245e-06, "loss": 0.1388, "step": 3095 }, { "epoch": 0.5016202203499676, "grad_norm": 0.9139759540557861, "learning_rate": 4.757278374392567e-06, "loss": 0.1381, "step": 3096 }, { "epoch": 0.5017822423849644, "grad_norm": 0.8878260254859924, "learning_rate": 4.757090374829631e-06, "loss": 0.1404, "step": 3097 }, { "epoch": 0.5019442644199611, "grad_norm": 1.07386314868927, "learning_rate": 4.7569023062051936e-06, "loss": 0.1747, "step": 3098 }, { "epoch": 0.5021062864549579, "grad_norm": 0.9467965960502625, "learning_rate": 4.756714168525006e-06, "loss": 0.1505, "step": 3099 }, { "epoch": 0.5022683084899546, "grad_norm": 0.9320406913757324, "learning_rate": 4.756525961794826e-06, "loss": 0.1414, "step": 3100 }, { "epoch": 0.5024303305249513, "grad_norm": 0.9513562321662903, "learning_rate": 4.756337686020413e-06, "loss": 0.1516, "step": 3101 }, { "epoch": 0.5025923525599482, "grad_norm": 0.9535584449768066, "learning_rate": 4.756149341207526e-06, "loss": 0.1461, "step": 3102 }, { "epoch": 0.5027543745949449, "grad_norm": 0.9998999834060669, "learning_rate": 4.75596092736193e-06, "loss": 0.1684, "step": 3103 }, { "epoch": 0.5029163966299417, "grad_norm": 1.0532805919647217, "learning_rate": 4.755772444489388e-06, "loss": 0.1307, "step": 3104 }, { "epoch": 0.5030784186649384, "grad_norm": 0.9271810054779053, "learning_rate": 4.7555838925956686e-06, "loss": 0.1446, "step": 3105 }, { "epoch": 0.5032404406999352, "grad_norm": 1.0862815380096436, "learning_rate": 4.75539527168654e-06, "loss": 0.1484, "step": 3106 }, { "epoch": 0.5034024627349319, "grad_norm": 1.0652508735656738, "learning_rate": 4.755206581767775e-06, "loss": 0.1749, "step": 3107 }, { "epoch": 0.5035644847699287, "grad_norm": 0.9044068455696106, "learning_rate": 4.755017822845145e-06, "loss": 0.1525, "step": 3108 }, { "epoch": 0.5037265068049255, "grad_norm": 0.9966428279876709, "learning_rate": 4.754828994924428e-06, "loss": 0.1536, "step": 3109 }, { "epoch": 0.5038885288399222, "grad_norm": 1.0621579885482788, "learning_rate": 4.754640098011399e-06, "loss": 0.1773, "step": 3110 }, { "epoch": 0.504050550874919, "grad_norm": 1.0683941841125488, "learning_rate": 4.754451132111839e-06, "loss": 0.1586, "step": 3111 }, { "epoch": 0.5042125729099157, "grad_norm": 0.7734267115592957, "learning_rate": 4.754262097231531e-06, "loss": 0.1281, "step": 3112 }, { "epoch": 0.5043745949449125, "grad_norm": 1.0103263854980469, "learning_rate": 4.754072993376258e-06, "loss": 0.1583, "step": 3113 }, { "epoch": 0.5045366169799093, "grad_norm": 0.9807709455490112, "learning_rate": 4.753883820551806e-06, "loss": 0.148, "step": 3114 }, { "epoch": 0.5046986390149061, "grad_norm": 1.0581525564193726, "learning_rate": 4.753694578763963e-06, "loss": 0.1637, "step": 3115 }, { "epoch": 0.5048606610499028, "grad_norm": 1.050081491470337, "learning_rate": 4.75350526801852e-06, "loss": 0.1468, "step": 3116 }, { "epoch": 0.5050226830848995, "grad_norm": 0.991671621799469, "learning_rate": 4.753315888321269e-06, "loss": 0.1562, "step": 3117 }, { "epoch": 0.5051847051198963, "grad_norm": 0.9450017809867859, "learning_rate": 4.753126439678005e-06, "loss": 0.1554, "step": 3118 }, { "epoch": 0.505346727154893, "grad_norm": 0.8860989809036255, "learning_rate": 4.752936922094524e-06, "loss": 0.1439, "step": 3119 }, { "epoch": 0.5055087491898899, "grad_norm": 1.0846068859100342, "learning_rate": 4.752747335576626e-06, "loss": 0.1612, "step": 3120 }, { "epoch": 0.5056707712248866, "grad_norm": 1.1103551387786865, "learning_rate": 4.75255768013011e-06, "loss": 0.1659, "step": 3121 }, { "epoch": 0.5058327932598834, "grad_norm": 0.9545550346374512, "learning_rate": 4.752367955760781e-06, "loss": 0.1395, "step": 3122 }, { "epoch": 0.5059948152948801, "grad_norm": 1.050429344177246, "learning_rate": 4.752178162474443e-06, "loss": 0.1587, "step": 3123 }, { "epoch": 0.5061568373298768, "grad_norm": 0.9017760157585144, "learning_rate": 4.751988300276903e-06, "loss": 0.1398, "step": 3124 }, { "epoch": 0.5063188593648736, "grad_norm": 1.020270586013794, "learning_rate": 4.751798369173971e-06, "loss": 0.1452, "step": 3125 }, { "epoch": 0.5064808813998704, "grad_norm": 1.0533658266067505, "learning_rate": 4.751608369171458e-06, "loss": 0.1577, "step": 3126 }, { "epoch": 0.5066429034348672, "grad_norm": 0.9330847263336182, "learning_rate": 4.751418300275178e-06, "loss": 0.142, "step": 3127 }, { "epoch": 0.5068049254698639, "grad_norm": 0.9158099293708801, "learning_rate": 4.751228162490946e-06, "loss": 0.1455, "step": 3128 }, { "epoch": 0.5069669475048607, "grad_norm": 0.9073064923286438, "learning_rate": 4.75103795582458e-06, "loss": 0.136, "step": 3129 }, { "epoch": 0.5071289695398574, "grad_norm": 1.0237650871276855, "learning_rate": 4.750847680281901e-06, "loss": 0.1599, "step": 3130 }, { "epoch": 0.5072909915748541, "grad_norm": 0.9799794554710388, "learning_rate": 4.750657335868728e-06, "loss": 0.1526, "step": 3131 }, { "epoch": 0.507453013609851, "grad_norm": 0.9370033740997314, "learning_rate": 4.750466922590888e-06, "loss": 0.1508, "step": 3132 }, { "epoch": 0.5076150356448477, "grad_norm": 0.8960480093955994, "learning_rate": 4.750276440454207e-06, "loss": 0.1249, "step": 3133 }, { "epoch": 0.5077770576798445, "grad_norm": 0.9389232993125916, "learning_rate": 4.750085889464512e-06, "loss": 0.1578, "step": 3134 }, { "epoch": 0.5079390797148412, "grad_norm": 0.9818000197410583, "learning_rate": 4.749895269627633e-06, "loss": 0.1501, "step": 3135 }, { "epoch": 0.508101101749838, "grad_norm": 0.9019086956977844, "learning_rate": 4.749704580949404e-06, "loss": 0.1541, "step": 3136 }, { "epoch": 0.5082631237848347, "grad_norm": 1.0174736976623535, "learning_rate": 4.749513823435659e-06, "loss": 0.1599, "step": 3137 }, { "epoch": 0.5084251458198314, "grad_norm": 1.0276374816894531, "learning_rate": 4.749322997092235e-06, "loss": 0.1574, "step": 3138 }, { "epoch": 0.5085871678548283, "grad_norm": 0.8995428681373596, "learning_rate": 4.74913210192497e-06, "loss": 0.1356, "step": 3139 }, { "epoch": 0.508749189889825, "grad_norm": 1.0277477502822876, "learning_rate": 4.748941137939706e-06, "loss": 0.1604, "step": 3140 }, { "epoch": 0.5089112119248218, "grad_norm": 1.0544497966766357, "learning_rate": 4.748750105142285e-06, "loss": 0.1749, "step": 3141 }, { "epoch": 0.5090732339598185, "grad_norm": 0.8580374717712402, "learning_rate": 4.748559003538553e-06, "loss": 0.1397, "step": 3142 }, { "epoch": 0.5092352559948153, "grad_norm": 1.0036252737045288, "learning_rate": 4.748367833134357e-06, "loss": 0.1463, "step": 3143 }, { "epoch": 0.509397278029812, "grad_norm": 0.8418014049530029, "learning_rate": 4.748176593935546e-06, "loss": 0.1334, "step": 3144 }, { "epoch": 0.5095593000648088, "grad_norm": 1.012681245803833, "learning_rate": 4.747985285947972e-06, "loss": 0.1758, "step": 3145 }, { "epoch": 0.5097213220998056, "grad_norm": 0.9466359615325928, "learning_rate": 4.7477939091774885e-06, "loss": 0.1552, "step": 3146 }, { "epoch": 0.5098833441348023, "grad_norm": 1.010433316230774, "learning_rate": 4.74760246362995e-06, "loss": 0.158, "step": 3147 }, { "epoch": 0.5100453661697991, "grad_norm": 0.9007865786552429, "learning_rate": 4.7474109493112154e-06, "loss": 0.1287, "step": 3148 }, { "epoch": 0.5102073882047958, "grad_norm": 0.9816288352012634, "learning_rate": 4.747219366227145e-06, "loss": 0.147, "step": 3149 }, { "epoch": 0.5103694102397927, "grad_norm": 1.0334168672561646, "learning_rate": 4.7470277143836e-06, "loss": 0.1545, "step": 3150 }, { "epoch": 0.5105314322747894, "grad_norm": 0.978326678276062, "learning_rate": 4.746835993786445e-06, "loss": 0.1715, "step": 3151 }, { "epoch": 0.5106934543097861, "grad_norm": 0.9503743052482605, "learning_rate": 4.746644204441545e-06, "loss": 0.147, "step": 3152 }, { "epoch": 0.5108554763447829, "grad_norm": 0.8829200863838196, "learning_rate": 4.7464523463547695e-06, "loss": 0.1584, "step": 3153 }, { "epoch": 0.5110174983797796, "grad_norm": 1.0434906482696533, "learning_rate": 4.746260419531989e-06, "loss": 0.1657, "step": 3154 }, { "epoch": 0.5111795204147764, "grad_norm": 0.9752653241157532, "learning_rate": 4.746068423979074e-06, "loss": 0.1518, "step": 3155 }, { "epoch": 0.5113415424497731, "grad_norm": 1.0544825792312622, "learning_rate": 4.745876359701902e-06, "loss": 0.1599, "step": 3156 }, { "epoch": 0.51150356448477, "grad_norm": 1.0769002437591553, "learning_rate": 4.745684226706348e-06, "loss": 0.154, "step": 3157 }, { "epoch": 0.5116655865197667, "grad_norm": 0.8938611745834351, "learning_rate": 4.745492024998291e-06, "loss": 0.1473, "step": 3158 }, { "epoch": 0.5118276085547635, "grad_norm": 0.9698961973190308, "learning_rate": 4.745299754583612e-06, "loss": 0.149, "step": 3159 }, { "epoch": 0.5119896305897602, "grad_norm": 0.9677153825759888, "learning_rate": 4.745107415468194e-06, "loss": 0.1546, "step": 3160 }, { "epoch": 0.5121516526247569, "grad_norm": 1.0147168636322021, "learning_rate": 4.744915007657922e-06, "loss": 0.1603, "step": 3161 }, { "epoch": 0.5123136746597537, "grad_norm": 1.016696572303772, "learning_rate": 4.744722531158683e-06, "loss": 0.1499, "step": 3162 }, { "epoch": 0.5124756966947505, "grad_norm": 0.9147832989692688, "learning_rate": 4.744529985976368e-06, "loss": 0.1331, "step": 3163 }, { "epoch": 0.5126377187297473, "grad_norm": 1.0391688346862793, "learning_rate": 4.744337372116866e-06, "loss": 0.1582, "step": 3164 }, { "epoch": 0.512799740764744, "grad_norm": 0.8867128491401672, "learning_rate": 4.744144689586072e-06, "loss": 0.135, "step": 3165 }, { "epoch": 0.5129617627997408, "grad_norm": 0.9849241971969604, "learning_rate": 4.743951938389881e-06, "loss": 0.1613, "step": 3166 }, { "epoch": 0.5131237848347375, "grad_norm": 1.0392513275146484, "learning_rate": 4.743759118534191e-06, "loss": 0.1329, "step": 3167 }, { "epoch": 0.5132858068697342, "grad_norm": 0.9350460767745972, "learning_rate": 4.743566230024902e-06, "loss": 0.146, "step": 3168 }, { "epoch": 0.5134478289047311, "grad_norm": 1.038048505783081, "learning_rate": 4.743373272867916e-06, "loss": 0.1474, "step": 3169 }, { "epoch": 0.5136098509397278, "grad_norm": 0.8922699689865112, "learning_rate": 4.7431802470691355e-06, "loss": 0.1342, "step": 3170 }, { "epoch": 0.5137718729747246, "grad_norm": 1.0452051162719727, "learning_rate": 4.742987152634469e-06, "loss": 0.1422, "step": 3171 }, { "epoch": 0.5139338950097213, "grad_norm": 0.9297380447387695, "learning_rate": 4.7427939895698235e-06, "loss": 0.1562, "step": 3172 }, { "epoch": 0.5140959170447181, "grad_norm": 1.0185930728912354, "learning_rate": 4.7426007578811085e-06, "loss": 0.1658, "step": 3173 }, { "epoch": 0.5142579390797148, "grad_norm": 0.8730944991111755, "learning_rate": 4.742407457574238e-06, "loss": 0.1304, "step": 3174 }, { "epoch": 0.5144199611147116, "grad_norm": 1.0207183361053467, "learning_rate": 4.742214088655126e-06, "loss": 0.1488, "step": 3175 }, { "epoch": 0.5145819831497084, "grad_norm": 0.978741466999054, "learning_rate": 4.7420206511296885e-06, "loss": 0.1657, "step": 3176 }, { "epoch": 0.5147440051847051, "grad_norm": 0.8174691200256348, "learning_rate": 4.7418271450038444e-06, "loss": 0.1233, "step": 3177 }, { "epoch": 0.5149060272197019, "grad_norm": 0.9762808084487915, "learning_rate": 4.7416335702835155e-06, "loss": 0.1507, "step": 3178 }, { "epoch": 0.5150680492546986, "grad_norm": 0.8684691786766052, "learning_rate": 4.7414399269746235e-06, "loss": 0.1492, "step": 3179 }, { "epoch": 0.5152300712896954, "grad_norm": 0.9834610223770142, "learning_rate": 4.741246215083094e-06, "loss": 0.1429, "step": 3180 }, { "epoch": 0.5153920933246922, "grad_norm": 0.9805695414543152, "learning_rate": 4.741052434614854e-06, "loss": 0.1474, "step": 3181 }, { "epoch": 0.5155541153596889, "grad_norm": 1.0447092056274414, "learning_rate": 4.740858585575832e-06, "loss": 0.1786, "step": 3182 }, { "epoch": 0.5157161373946857, "grad_norm": 1.1218931674957275, "learning_rate": 4.740664667971962e-06, "loss": 0.1708, "step": 3183 }, { "epoch": 0.5158781594296824, "grad_norm": 0.8798757195472717, "learning_rate": 4.7404706818091736e-06, "loss": 0.1393, "step": 3184 }, { "epoch": 0.5160401814646792, "grad_norm": 0.8483942747116089, "learning_rate": 4.740276627093405e-06, "loss": 0.1447, "step": 3185 }, { "epoch": 0.5162022034996759, "grad_norm": 1.1056780815124512, "learning_rate": 4.740082503830593e-06, "loss": 0.1656, "step": 3186 }, { "epoch": 0.5163642255346728, "grad_norm": 0.8028669953346252, "learning_rate": 4.739888312026677e-06, "loss": 0.1112, "step": 3187 }, { "epoch": 0.5165262475696695, "grad_norm": 1.00840163230896, "learning_rate": 4.7396940516875996e-06, "loss": 0.1385, "step": 3188 }, { "epoch": 0.5166882696046662, "grad_norm": 1.072086215019226, "learning_rate": 4.739499722819304e-06, "loss": 0.1697, "step": 3189 }, { "epoch": 0.516850291639663, "grad_norm": 0.9224776029586792, "learning_rate": 4.739305325427736e-06, "loss": 0.1426, "step": 3190 }, { "epoch": 0.5170123136746597, "grad_norm": 0.8584015369415283, "learning_rate": 4.739110859518844e-06, "loss": 0.1288, "step": 3191 }, { "epoch": 0.5171743357096565, "grad_norm": 1.027614712715149, "learning_rate": 4.738916325098579e-06, "loss": 0.1508, "step": 3192 }, { "epoch": 0.5173363577446533, "grad_norm": 1.064180612564087, "learning_rate": 4.738721722172891e-06, "loss": 0.1641, "step": 3193 }, { "epoch": 0.5174983797796501, "grad_norm": 0.8860505223274231, "learning_rate": 4.738527050747738e-06, "loss": 0.1344, "step": 3194 }, { "epoch": 0.5176604018146468, "grad_norm": 0.8795318603515625, "learning_rate": 4.738332310829073e-06, "loss": 0.1374, "step": 3195 }, { "epoch": 0.5178224238496435, "grad_norm": 1.0304006338119507, "learning_rate": 4.738137502422856e-06, "loss": 0.1534, "step": 3196 }, { "epoch": 0.5179844458846403, "grad_norm": 1.116926670074463, "learning_rate": 4.737942625535048e-06, "loss": 0.1539, "step": 3197 }, { "epoch": 0.518146467919637, "grad_norm": 0.8668916821479797, "learning_rate": 4.737747680171611e-06, "loss": 0.1338, "step": 3198 }, { "epoch": 0.5183084899546339, "grad_norm": 1.1048635244369507, "learning_rate": 4.737552666338511e-06, "loss": 0.1753, "step": 3199 }, { "epoch": 0.5184705119896306, "grad_norm": 0.8767505884170532, "learning_rate": 4.737357584041713e-06, "loss": 0.1328, "step": 3200 }, { "epoch": 0.5186325340246274, "grad_norm": 0.9243037700653076, "learning_rate": 4.737162433287188e-06, "loss": 0.1562, "step": 3201 }, { "epoch": 0.5187945560596241, "grad_norm": 1.0530647039413452, "learning_rate": 4.7369672140809065e-06, "loss": 0.1638, "step": 3202 }, { "epoch": 0.5189565780946209, "grad_norm": 0.9306735396385193, "learning_rate": 4.736771926428841e-06, "loss": 0.1411, "step": 3203 }, { "epoch": 0.5191186001296176, "grad_norm": 1.0215457677841187, "learning_rate": 4.736576570336968e-06, "loss": 0.1399, "step": 3204 }, { "epoch": 0.5192806221646143, "grad_norm": 0.9567215442657471, "learning_rate": 4.736381145811264e-06, "loss": 0.1628, "step": 3205 }, { "epoch": 0.5194426441996112, "grad_norm": 0.9945108294487, "learning_rate": 4.736185652857709e-06, "loss": 0.1668, "step": 3206 }, { "epoch": 0.5196046662346079, "grad_norm": 0.9826516509056091, "learning_rate": 4.735990091482284e-06, "loss": 0.1528, "step": 3207 }, { "epoch": 0.5197666882696047, "grad_norm": 0.8901511430740356, "learning_rate": 4.7357944616909745e-06, "loss": 0.14, "step": 3208 }, { "epoch": 0.5199287103046014, "grad_norm": 0.9824097156524658, "learning_rate": 4.735598763489764e-06, "loss": 0.1584, "step": 3209 }, { "epoch": 0.5200907323395982, "grad_norm": 0.8953663110733032, "learning_rate": 4.735402996884642e-06, "loss": 0.1347, "step": 3210 }, { "epoch": 0.520252754374595, "grad_norm": 0.8961206674575806, "learning_rate": 4.735207161881596e-06, "loss": 0.1368, "step": 3211 }, { "epoch": 0.5204147764095917, "grad_norm": 0.9631252884864807, "learning_rate": 4.7350112584866225e-06, "loss": 0.1638, "step": 3212 }, { "epoch": 0.5205767984445885, "grad_norm": 0.7925556898117065, "learning_rate": 4.734815286705712e-06, "loss": 0.13, "step": 3213 }, { "epoch": 0.5207388204795852, "grad_norm": 1.1014782190322876, "learning_rate": 4.734619246544862e-06, "loss": 0.1628, "step": 3214 }, { "epoch": 0.520900842514582, "grad_norm": 0.9577755928039551, "learning_rate": 4.73442313801007e-06, "loss": 0.1562, "step": 3215 }, { "epoch": 0.5210628645495787, "grad_norm": 0.9577489495277405, "learning_rate": 4.734226961107338e-06, "loss": 0.1627, "step": 3216 }, { "epoch": 0.5212248865845756, "grad_norm": 0.9195597171783447, "learning_rate": 4.734030715842667e-06, "loss": 0.1485, "step": 3217 }, { "epoch": 0.5213869086195723, "grad_norm": 1.0537441968917847, "learning_rate": 4.733834402222064e-06, "loss": 0.1548, "step": 3218 }, { "epoch": 0.521548930654569, "grad_norm": 1.0351758003234863, "learning_rate": 4.733638020251532e-06, "loss": 0.1574, "step": 3219 }, { "epoch": 0.5217109526895658, "grad_norm": 0.934990406036377, "learning_rate": 4.7334415699370825e-06, "loss": 0.1425, "step": 3220 }, { "epoch": 0.5218729747245625, "grad_norm": 1.002612590789795, "learning_rate": 4.733245051284727e-06, "loss": 0.1538, "step": 3221 }, { "epoch": 0.5220349967595593, "grad_norm": 1.078823208808899, "learning_rate": 4.733048464300476e-06, "loss": 0.1703, "step": 3222 }, { "epoch": 0.522197018794556, "grad_norm": 0.9427096843719482, "learning_rate": 4.732851808990346e-06, "loss": 0.1432, "step": 3223 }, { "epoch": 0.5223590408295529, "grad_norm": 0.9111664891242981, "learning_rate": 4.732655085360355e-06, "loss": 0.1408, "step": 3224 }, { "epoch": 0.5225210628645496, "grad_norm": 1.06932532787323, "learning_rate": 4.732458293416519e-06, "loss": 0.1691, "step": 3225 }, { "epoch": 0.5226830848995463, "grad_norm": 1.0022755861282349, "learning_rate": 4.7322614331648645e-06, "loss": 0.161, "step": 3226 }, { "epoch": 0.5228451069345431, "grad_norm": 0.9708278775215149, "learning_rate": 4.73206450461141e-06, "loss": 0.1496, "step": 3227 }, { "epoch": 0.5230071289695398, "grad_norm": 1.0193688869476318, "learning_rate": 4.731867507762184e-06, "loss": 0.1484, "step": 3228 }, { "epoch": 0.5231691510045366, "grad_norm": 0.90104079246521, "learning_rate": 4.731670442623214e-06, "loss": 0.1321, "step": 3229 }, { "epoch": 0.5233311730395334, "grad_norm": 1.0102612972259521, "learning_rate": 4.731473309200528e-06, "loss": 0.1552, "step": 3230 }, { "epoch": 0.5234931950745302, "grad_norm": 0.8935184478759766, "learning_rate": 4.731276107500159e-06, "loss": 0.1449, "step": 3231 }, { "epoch": 0.5236552171095269, "grad_norm": 0.8871548771858215, "learning_rate": 4.731078837528141e-06, "loss": 0.1353, "step": 3232 }, { "epoch": 0.5238172391445236, "grad_norm": 1.071672797203064, "learning_rate": 4.730881499290509e-06, "loss": 0.1683, "step": 3233 }, { "epoch": 0.5239792611795204, "grad_norm": 0.872079074382782, "learning_rate": 4.730684092793302e-06, "loss": 0.1411, "step": 3234 }, { "epoch": 0.5241412832145171, "grad_norm": 0.931596040725708, "learning_rate": 4.73048661804256e-06, "loss": 0.1633, "step": 3235 }, { "epoch": 0.524303305249514, "grad_norm": 0.8818705081939697, "learning_rate": 4.730289075044326e-06, "loss": 0.1281, "step": 3236 }, { "epoch": 0.5244653272845107, "grad_norm": 0.9318626523017883, "learning_rate": 4.730091463804642e-06, "loss": 0.1438, "step": 3237 }, { "epoch": 0.5246273493195075, "grad_norm": 1.0269670486450195, "learning_rate": 4.729893784329557e-06, "loss": 0.1696, "step": 3238 }, { "epoch": 0.5247893713545042, "grad_norm": 0.8802597522735596, "learning_rate": 4.729696036625119e-06, "loss": 0.1319, "step": 3239 }, { "epoch": 0.5249513933895009, "grad_norm": 0.9144219756126404, "learning_rate": 4.729498220697377e-06, "loss": 0.1412, "step": 3240 }, { "epoch": 0.5251134154244977, "grad_norm": 0.9358303546905518, "learning_rate": 4.729300336552385e-06, "loss": 0.1604, "step": 3241 }, { "epoch": 0.5252754374594945, "grad_norm": 0.8559423089027405, "learning_rate": 4.729102384196197e-06, "loss": 0.1352, "step": 3242 }, { "epoch": 0.5254374594944913, "grad_norm": 0.9450469017028809, "learning_rate": 4.728904363634871e-06, "loss": 0.1367, "step": 3243 }, { "epoch": 0.525599481529488, "grad_norm": 0.9907295107841492, "learning_rate": 4.728706274874465e-06, "loss": 0.1418, "step": 3244 }, { "epoch": 0.5257615035644848, "grad_norm": 0.9787341356277466, "learning_rate": 4.72850811792104e-06, "loss": 0.1519, "step": 3245 }, { "epoch": 0.5259235255994815, "grad_norm": 0.9200270771980286, "learning_rate": 4.72830989278066e-06, "loss": 0.1517, "step": 3246 }, { "epoch": 0.5260855476344782, "grad_norm": 0.9773831367492676, "learning_rate": 4.72811159945939e-06, "loss": 0.1637, "step": 3247 }, { "epoch": 0.5262475696694751, "grad_norm": 1.007265329360962, "learning_rate": 4.727913237963296e-06, "loss": 0.1624, "step": 3248 }, { "epoch": 0.5264095917044718, "grad_norm": 0.9802579283714294, "learning_rate": 4.7277148082984495e-06, "loss": 0.1675, "step": 3249 }, { "epoch": 0.5265716137394686, "grad_norm": 0.9074774980545044, "learning_rate": 4.72751631047092e-06, "loss": 0.1461, "step": 3250 }, { "epoch": 0.5267336357744653, "grad_norm": 1.1058282852172852, "learning_rate": 4.727317744486783e-06, "loss": 0.1912, "step": 3251 }, { "epoch": 0.5268956578094621, "grad_norm": 0.9917949438095093, "learning_rate": 4.727119110352112e-06, "loss": 0.1563, "step": 3252 }, { "epoch": 0.5270576798444588, "grad_norm": 0.9093613624572754, "learning_rate": 4.726920408072985e-06, "loss": 0.1402, "step": 3253 }, { "epoch": 0.5272197018794557, "grad_norm": 0.9544733166694641, "learning_rate": 4.726721637655484e-06, "loss": 0.1594, "step": 3254 }, { "epoch": 0.5273817239144524, "grad_norm": 0.920921802520752, "learning_rate": 4.726522799105689e-06, "loss": 0.152, "step": 3255 }, { "epoch": 0.5275437459494491, "grad_norm": 1.037856101989746, "learning_rate": 4.7263238924296835e-06, "loss": 0.1575, "step": 3256 }, { "epoch": 0.5277057679844459, "grad_norm": 0.9055295586585999, "learning_rate": 4.726124917633556e-06, "loss": 0.1405, "step": 3257 }, { "epoch": 0.5278677900194426, "grad_norm": 0.952745258808136, "learning_rate": 4.725925874723393e-06, "loss": 0.1487, "step": 3258 }, { "epoch": 0.5280298120544394, "grad_norm": 1.02610445022583, "learning_rate": 4.725726763705284e-06, "loss": 0.1586, "step": 3259 }, { "epoch": 0.5281918340894362, "grad_norm": 0.9188981652259827, "learning_rate": 4.725527584585322e-06, "loss": 0.1264, "step": 3260 }, { "epoch": 0.528353856124433, "grad_norm": 1.0480659008026123, "learning_rate": 4.725328337369602e-06, "loss": 0.149, "step": 3261 }, { "epoch": 0.5285158781594297, "grad_norm": 0.9283803105354309, "learning_rate": 4.725129022064221e-06, "loss": 0.1442, "step": 3262 }, { "epoch": 0.5286779001944264, "grad_norm": 0.8994842171669006, "learning_rate": 4.7249296386752754e-06, "loss": 0.1296, "step": 3263 }, { "epoch": 0.5288399222294232, "grad_norm": 0.9448267817497253, "learning_rate": 4.724730187208868e-06, "loss": 0.1365, "step": 3264 }, { "epoch": 0.5290019442644199, "grad_norm": 1.0473707914352417, "learning_rate": 4.7245306676711e-06, "loss": 0.1619, "step": 3265 }, { "epoch": 0.5291639662994168, "grad_norm": 0.9334203004837036, "learning_rate": 4.724331080068077e-06, "loss": 0.1456, "step": 3266 }, { "epoch": 0.5293259883344135, "grad_norm": 1.053633451461792, "learning_rate": 4.724131424405906e-06, "loss": 0.1759, "step": 3267 }, { "epoch": 0.5294880103694103, "grad_norm": 0.9441131949424744, "learning_rate": 4.723931700690695e-06, "loss": 0.1525, "step": 3268 }, { "epoch": 0.529650032404407, "grad_norm": 0.875206470489502, "learning_rate": 4.723731908928556e-06, "loss": 0.1461, "step": 3269 }, { "epoch": 0.5298120544394037, "grad_norm": 0.9882826805114746, "learning_rate": 4.7235320491256026e-06, "loss": 0.1414, "step": 3270 }, { "epoch": 0.5299740764744005, "grad_norm": 0.8946253061294556, "learning_rate": 4.723332121287949e-06, "loss": 0.1441, "step": 3271 }, { "epoch": 0.5301360985093972, "grad_norm": 0.8672228455543518, "learning_rate": 4.723132125421712e-06, "loss": 0.1381, "step": 3272 }, { "epoch": 0.5302981205443941, "grad_norm": 0.9539285898208618, "learning_rate": 4.7229320615330136e-06, "loss": 0.1544, "step": 3273 }, { "epoch": 0.5304601425793908, "grad_norm": 1.0140817165374756, "learning_rate": 4.722731929627971e-06, "loss": 0.1621, "step": 3274 }, { "epoch": 0.5306221646143876, "grad_norm": 1.0421953201293945, "learning_rate": 4.7225317297127125e-06, "loss": 0.1633, "step": 3275 }, { "epoch": 0.5307841866493843, "grad_norm": 1.0012775659561157, "learning_rate": 4.722331461793361e-06, "loss": 0.1464, "step": 3276 }, { "epoch": 0.530946208684381, "grad_norm": 0.9863828420639038, "learning_rate": 4.722131125876044e-06, "loss": 0.1476, "step": 3277 }, { "epoch": 0.5311082307193778, "grad_norm": 0.9448386430740356, "learning_rate": 4.721930721966893e-06, "loss": 0.1432, "step": 3278 }, { "epoch": 0.5312702527543746, "grad_norm": 0.898563802242279, "learning_rate": 4.721730250072038e-06, "loss": 0.1365, "step": 3279 }, { "epoch": 0.5314322747893714, "grad_norm": 0.8489564061164856, "learning_rate": 4.7215297101976145e-06, "loss": 0.1388, "step": 3280 }, { "epoch": 0.5315942968243681, "grad_norm": 0.9576735496520996, "learning_rate": 4.721329102349757e-06, "loss": 0.1527, "step": 3281 }, { "epoch": 0.5317563188593649, "grad_norm": 0.9543820023536682, "learning_rate": 4.721128426534605e-06, "loss": 0.1396, "step": 3282 }, { "epoch": 0.5319183408943616, "grad_norm": 1.031876564025879, "learning_rate": 4.720927682758298e-06, "loss": 0.1608, "step": 3283 }, { "epoch": 0.5320803629293583, "grad_norm": 0.9371171593666077, "learning_rate": 4.720726871026978e-06, "loss": 0.1366, "step": 3284 }, { "epoch": 0.5322423849643552, "grad_norm": 0.9507832527160645, "learning_rate": 4.720525991346791e-06, "loss": 0.1423, "step": 3285 }, { "epoch": 0.5324044069993519, "grad_norm": 1.2056124210357666, "learning_rate": 4.720325043723881e-06, "loss": 0.1797, "step": 3286 }, { "epoch": 0.5325664290343487, "grad_norm": 1.0787984132766724, "learning_rate": 4.720124028164399e-06, "loss": 0.1548, "step": 3287 }, { "epoch": 0.5327284510693454, "grad_norm": 1.0400218963623047, "learning_rate": 4.719922944674494e-06, "loss": 0.1581, "step": 3288 }, { "epoch": 0.5328904731043422, "grad_norm": 0.9209017753601074, "learning_rate": 4.719721793260318e-06, "loss": 0.1439, "step": 3289 }, { "epoch": 0.5330524951393389, "grad_norm": 0.8602655529975891, "learning_rate": 4.719520573928028e-06, "loss": 0.1304, "step": 3290 }, { "epoch": 0.5332145171743357, "grad_norm": 0.9528794884681702, "learning_rate": 4.719319286683779e-06, "loss": 0.1422, "step": 3291 }, { "epoch": 0.5333765392093325, "grad_norm": 1.055066466331482, "learning_rate": 4.71911793153373e-06, "loss": 0.1698, "step": 3292 }, { "epoch": 0.5335385612443292, "grad_norm": 0.9883852601051331, "learning_rate": 4.718916508484043e-06, "loss": 0.1581, "step": 3293 }, { "epoch": 0.533700583279326, "grad_norm": 0.9446104764938354, "learning_rate": 4.7187150175408805e-06, "loss": 0.1551, "step": 3294 }, { "epoch": 0.5338626053143227, "grad_norm": 1.1086978912353516, "learning_rate": 4.7185134587104075e-06, "loss": 0.1783, "step": 3295 }, { "epoch": 0.5340246273493195, "grad_norm": 0.9049498438835144, "learning_rate": 4.718311831998792e-06, "loss": 0.146, "step": 3296 }, { "epoch": 0.5341866493843163, "grad_norm": 1.0368142127990723, "learning_rate": 4.718110137412201e-06, "loss": 0.1789, "step": 3297 }, { "epoch": 0.5343486714193131, "grad_norm": 0.9038468599319458, "learning_rate": 4.71790837495681e-06, "loss": 0.1441, "step": 3298 }, { "epoch": 0.5345106934543098, "grad_norm": 0.9993532299995422, "learning_rate": 4.717706544638788e-06, "loss": 0.1552, "step": 3299 }, { "epoch": 0.5346727154893065, "grad_norm": 0.8008403778076172, "learning_rate": 4.717504646464314e-06, "loss": 0.1378, "step": 3300 }, { "epoch": 0.5348347375243033, "grad_norm": 0.9350391030311584, "learning_rate": 4.717302680439563e-06, "loss": 0.1507, "step": 3301 }, { "epoch": 0.5349967595593, "grad_norm": 0.8926771879196167, "learning_rate": 4.717100646570716e-06, "loss": 0.1515, "step": 3302 }, { "epoch": 0.5351587815942969, "grad_norm": 0.8540582060813904, "learning_rate": 4.716898544863954e-06, "loss": 0.1348, "step": 3303 }, { "epoch": 0.5353208036292936, "grad_norm": 0.9826602935791016, "learning_rate": 4.7166963753254616e-06, "loss": 0.1562, "step": 3304 }, { "epoch": 0.5354828256642904, "grad_norm": 0.9145976901054382, "learning_rate": 4.716494137961425e-06, "loss": 0.1367, "step": 3305 }, { "epoch": 0.5356448476992871, "grad_norm": 0.8795981407165527, "learning_rate": 4.716291832778031e-06, "loss": 0.1399, "step": 3306 }, { "epoch": 0.5358068697342838, "grad_norm": 0.932141125202179, "learning_rate": 4.71608945978147e-06, "loss": 0.1474, "step": 3307 }, { "epoch": 0.5359688917692806, "grad_norm": 0.9054761528968811, "learning_rate": 4.715887018977935e-06, "loss": 0.1486, "step": 3308 }, { "epoch": 0.5361309138042774, "grad_norm": 0.9789231419563293, "learning_rate": 4.715684510373619e-06, "loss": 0.1493, "step": 3309 }, { "epoch": 0.5362929358392742, "grad_norm": 0.9172061681747437, "learning_rate": 4.715481933974719e-06, "loss": 0.1352, "step": 3310 }, { "epoch": 0.5364549578742709, "grad_norm": 0.9078491926193237, "learning_rate": 4.715279289787434e-06, "loss": 0.141, "step": 3311 }, { "epoch": 0.5366169799092677, "grad_norm": 1.0831925868988037, "learning_rate": 4.715076577817963e-06, "loss": 0.153, "step": 3312 }, { "epoch": 0.5367790019442644, "grad_norm": 1.0742647647857666, "learning_rate": 4.714873798072509e-06, "loss": 0.165, "step": 3313 }, { "epoch": 0.5369410239792611, "grad_norm": 1.015164852142334, "learning_rate": 4.714670950557276e-06, "loss": 0.1408, "step": 3314 }, { "epoch": 0.537103046014258, "grad_norm": 1.0785647630691528, "learning_rate": 4.714468035278473e-06, "loss": 0.1709, "step": 3315 }, { "epoch": 0.5372650680492547, "grad_norm": 1.0501564741134644, "learning_rate": 4.714265052242306e-06, "loss": 0.1692, "step": 3316 }, { "epoch": 0.5374270900842515, "grad_norm": 0.8828537464141846, "learning_rate": 4.714062001454986e-06, "loss": 0.1389, "step": 3317 }, { "epoch": 0.5375891121192482, "grad_norm": 1.0610477924346924, "learning_rate": 4.7138588829227285e-06, "loss": 0.1675, "step": 3318 }, { "epoch": 0.537751134154245, "grad_norm": 0.8431143760681152, "learning_rate": 4.713655696651746e-06, "loss": 0.1328, "step": 3319 }, { "epoch": 0.5379131561892417, "grad_norm": 1.091422438621521, "learning_rate": 4.713452442648255e-06, "loss": 0.1691, "step": 3320 }, { "epoch": 0.5380751782242384, "grad_norm": 0.9422255158424377, "learning_rate": 4.713249120918476e-06, "loss": 0.1448, "step": 3321 }, { "epoch": 0.5382372002592353, "grad_norm": 0.9391464591026306, "learning_rate": 4.7130457314686316e-06, "loss": 0.1406, "step": 3322 }, { "epoch": 0.538399222294232, "grad_norm": 0.9336527585983276, "learning_rate": 4.712842274304942e-06, "loss": 0.1559, "step": 3323 }, { "epoch": 0.5385612443292288, "grad_norm": 1.0600395202636719, "learning_rate": 4.712638749433634e-06, "loss": 0.1621, "step": 3324 }, { "epoch": 0.5387232663642255, "grad_norm": 0.8601197004318237, "learning_rate": 4.712435156860934e-06, "loss": 0.1338, "step": 3325 }, { "epoch": 0.5388852883992223, "grad_norm": 0.9096711277961731, "learning_rate": 4.7122314965930724e-06, "loss": 0.1335, "step": 3326 }, { "epoch": 0.539047310434219, "grad_norm": 0.9492406845092773, "learning_rate": 4.712027768636282e-06, "loss": 0.1475, "step": 3327 }, { "epoch": 0.5392093324692158, "grad_norm": 1.070809245109558, "learning_rate": 4.711823972996793e-06, "loss": 0.1636, "step": 3328 }, { "epoch": 0.5393713545042126, "grad_norm": 1.0436028242111206, "learning_rate": 4.711620109680843e-06, "loss": 0.1788, "step": 3329 }, { "epoch": 0.5395333765392093, "grad_norm": 0.9128246307373047, "learning_rate": 4.711416178694671e-06, "loss": 0.1435, "step": 3330 }, { "epoch": 0.5396953985742061, "grad_norm": 0.9323081970214844, "learning_rate": 4.7112121800445146e-06, "loss": 0.1534, "step": 3331 }, { "epoch": 0.5398574206092028, "grad_norm": 1.0300610065460205, "learning_rate": 4.711008113736617e-06, "loss": 0.1807, "step": 3332 }, { "epoch": 0.5400194426441997, "grad_norm": 0.9709299206733704, "learning_rate": 4.710803979777221e-06, "loss": 0.1371, "step": 3333 }, { "epoch": 0.5401814646791964, "grad_norm": 0.9480953812599182, "learning_rate": 4.710599778172575e-06, "loss": 0.1549, "step": 3334 }, { "epoch": 0.5403434867141931, "grad_norm": 0.871699869632721, "learning_rate": 4.710395508928923e-06, "loss": 0.1341, "step": 3335 }, { "epoch": 0.5405055087491899, "grad_norm": 1.0588428974151611, "learning_rate": 4.7101911720525186e-06, "loss": 0.1603, "step": 3336 }, { "epoch": 0.5406675307841866, "grad_norm": 0.9104177951812744, "learning_rate": 4.709986767549612e-06, "loss": 0.1527, "step": 3337 }, { "epoch": 0.5408295528191834, "grad_norm": 0.8339903950691223, "learning_rate": 4.70978229542646e-06, "loss": 0.1166, "step": 3338 }, { "epoch": 0.5409915748541801, "grad_norm": 1.0834025144577026, "learning_rate": 4.709577755689316e-06, "loss": 0.1623, "step": 3339 }, { "epoch": 0.541153596889177, "grad_norm": 0.9392449259757996, "learning_rate": 4.709373148344441e-06, "loss": 0.1518, "step": 3340 }, { "epoch": 0.5413156189241737, "grad_norm": 1.014581561088562, "learning_rate": 4.709168473398094e-06, "loss": 0.1585, "step": 3341 }, { "epoch": 0.5414776409591704, "grad_norm": 0.9345294833183289, "learning_rate": 4.708963730856536e-06, "loss": 0.1385, "step": 3342 }, { "epoch": 0.5416396629941672, "grad_norm": 0.975787341594696, "learning_rate": 4.708758920726036e-06, "loss": 0.1515, "step": 3343 }, { "epoch": 0.5418016850291639, "grad_norm": 0.9591774940490723, "learning_rate": 4.708554043012857e-06, "loss": 0.1466, "step": 3344 }, { "epoch": 0.5419637070641607, "grad_norm": 0.9647694230079651, "learning_rate": 4.708349097723268e-06, "loss": 0.15, "step": 3345 }, { "epoch": 0.5421257290991575, "grad_norm": 1.0015144348144531, "learning_rate": 4.708144084863541e-06, "loss": 0.1497, "step": 3346 }, { "epoch": 0.5422877511341543, "grad_norm": 0.9959015846252441, "learning_rate": 4.70793900443995e-06, "loss": 0.1338, "step": 3347 }, { "epoch": 0.542449773169151, "grad_norm": 1.017600655555725, "learning_rate": 4.707733856458767e-06, "loss": 0.1643, "step": 3348 }, { "epoch": 0.5426117952041478, "grad_norm": 1.165099859237671, "learning_rate": 4.707528640926271e-06, "loss": 0.1774, "step": 3349 }, { "epoch": 0.5427738172391445, "grad_norm": 1.0708073377609253, "learning_rate": 4.707323357848741e-06, "loss": 0.174, "step": 3350 }, { "epoch": 0.5429358392741412, "grad_norm": 0.931759238243103, "learning_rate": 4.707118007232457e-06, "loss": 0.142, "step": 3351 }, { "epoch": 0.5430978613091381, "grad_norm": 1.1331273317337036, "learning_rate": 4.706912589083704e-06, "loss": 0.171, "step": 3352 }, { "epoch": 0.5432598833441348, "grad_norm": 0.9179681539535522, "learning_rate": 4.706707103408767e-06, "loss": 0.1463, "step": 3353 }, { "epoch": 0.5434219053791316, "grad_norm": 0.9513891935348511, "learning_rate": 4.706501550213932e-06, "loss": 0.1706, "step": 3354 }, { "epoch": 0.5435839274141283, "grad_norm": 0.9615473747253418, "learning_rate": 4.706295929505489e-06, "loss": 0.1522, "step": 3355 }, { "epoch": 0.5437459494491251, "grad_norm": 0.9769582152366638, "learning_rate": 4.70609024128973e-06, "loss": 0.1464, "step": 3356 }, { "epoch": 0.5439079714841218, "grad_norm": 0.9356233477592468, "learning_rate": 4.705884485572948e-06, "loss": 0.1442, "step": 3357 }, { "epoch": 0.5440699935191186, "grad_norm": 1.0967140197753906, "learning_rate": 4.7056786623614395e-06, "loss": 0.1843, "step": 3358 }, { "epoch": 0.5442320155541154, "grad_norm": 1.0115594863891602, "learning_rate": 4.705472771661501e-06, "loss": 0.1405, "step": 3359 }, { "epoch": 0.5443940375891121, "grad_norm": 1.0418258905410767, "learning_rate": 4.705266813479434e-06, "loss": 0.1626, "step": 3360 }, { "epoch": 0.5445560596241089, "grad_norm": 0.8964266180992126, "learning_rate": 4.7050607878215375e-06, "loss": 0.1398, "step": 3361 }, { "epoch": 0.5447180816591056, "grad_norm": 0.9070065021514893, "learning_rate": 4.704854694694117e-06, "loss": 0.137, "step": 3362 }, { "epoch": 0.5448801036941024, "grad_norm": 0.9841972589492798, "learning_rate": 4.704648534103479e-06, "loss": 0.1477, "step": 3363 }, { "epoch": 0.5450421257290992, "grad_norm": 0.9104343056678772, "learning_rate": 4.704442306055932e-06, "loss": 0.1468, "step": 3364 }, { "epoch": 0.5452041477640959, "grad_norm": 0.8985049724578857, "learning_rate": 4.704236010557784e-06, "loss": 0.1294, "step": 3365 }, { "epoch": 0.5453661697990927, "grad_norm": 0.9941574335098267, "learning_rate": 4.704029647615348e-06, "loss": 0.1539, "step": 3366 }, { "epoch": 0.5455281918340894, "grad_norm": 1.0449243783950806, "learning_rate": 4.7038232172349394e-06, "loss": 0.1488, "step": 3367 }, { "epoch": 0.5456902138690862, "grad_norm": 1.0465726852416992, "learning_rate": 4.703616719422873e-06, "loss": 0.1555, "step": 3368 }, { "epoch": 0.5458522359040829, "grad_norm": 1.0622889995574951, "learning_rate": 4.703410154185467e-06, "loss": 0.1572, "step": 3369 }, { "epoch": 0.5460142579390798, "grad_norm": 0.9786834716796875, "learning_rate": 4.703203521529044e-06, "loss": 0.1558, "step": 3370 }, { "epoch": 0.5461762799740765, "grad_norm": 1.072187066078186, "learning_rate": 4.702996821459923e-06, "loss": 0.1553, "step": 3371 }, { "epoch": 0.5463383020090732, "grad_norm": 0.9539135098457336, "learning_rate": 4.702790053984432e-06, "loss": 0.1469, "step": 3372 }, { "epoch": 0.54650032404407, "grad_norm": 0.9775049686431885, "learning_rate": 4.702583219108895e-06, "loss": 0.154, "step": 3373 }, { "epoch": 0.5466623460790667, "grad_norm": 0.8917540907859802, "learning_rate": 4.702376316839642e-06, "loss": 0.1335, "step": 3374 }, { "epoch": 0.5468243681140635, "grad_norm": 0.9353879690170288, "learning_rate": 4.7021693471830035e-06, "loss": 0.1562, "step": 3375 }, { "epoch": 0.5469863901490603, "grad_norm": 0.9763373732566833, "learning_rate": 4.701962310145312e-06, "loss": 0.1546, "step": 3376 }, { "epoch": 0.5471484121840571, "grad_norm": 0.9837629795074463, "learning_rate": 4.701755205732902e-06, "loss": 0.1645, "step": 3377 }, { "epoch": 0.5473104342190538, "grad_norm": 0.8612603545188904, "learning_rate": 4.7015480339521115e-06, "loss": 0.1393, "step": 3378 }, { "epoch": 0.5474724562540505, "grad_norm": 0.8714431524276733, "learning_rate": 4.701340794809278e-06, "loss": 0.1376, "step": 3379 }, { "epoch": 0.5476344782890473, "grad_norm": 0.9637095332145691, "learning_rate": 4.701133488310744e-06, "loss": 0.1527, "step": 3380 }, { "epoch": 0.547796500324044, "grad_norm": 1.0376760959625244, "learning_rate": 4.700926114462852e-06, "loss": 0.1709, "step": 3381 }, { "epoch": 0.5479585223590409, "grad_norm": 0.9160656332969666, "learning_rate": 4.700718673271947e-06, "loss": 0.1439, "step": 3382 }, { "epoch": 0.5481205443940376, "grad_norm": 0.8436545729637146, "learning_rate": 4.700511164744376e-06, "loss": 0.1203, "step": 3383 }, { "epoch": 0.5482825664290344, "grad_norm": 0.9517103433609009, "learning_rate": 4.700303588886489e-06, "loss": 0.1516, "step": 3384 }, { "epoch": 0.5484445884640311, "grad_norm": 0.9557342529296875, "learning_rate": 4.700095945704636e-06, "loss": 0.1502, "step": 3385 }, { "epoch": 0.5486066104990278, "grad_norm": 0.9375651478767395, "learning_rate": 4.699888235205172e-06, "loss": 0.1455, "step": 3386 }, { "epoch": 0.5487686325340246, "grad_norm": 0.9998489022254944, "learning_rate": 4.699680457394451e-06, "loss": 0.1429, "step": 3387 }, { "epoch": 0.5489306545690213, "grad_norm": 0.9671725630760193, "learning_rate": 4.699472612278831e-06, "loss": 0.1548, "step": 3388 }, { "epoch": 0.5490926766040182, "grad_norm": 1.063137412071228, "learning_rate": 4.699264699864672e-06, "loss": 0.1602, "step": 3389 }, { "epoch": 0.5492546986390149, "grad_norm": 1.0313401222229004, "learning_rate": 4.699056720158336e-06, "loss": 0.1663, "step": 3390 }, { "epoch": 0.5494167206740117, "grad_norm": 1.0677765607833862, "learning_rate": 4.698848673166185e-06, "loss": 0.1436, "step": 3391 }, { "epoch": 0.5495787427090084, "grad_norm": 1.0899100303649902, "learning_rate": 4.698640558894586e-06, "loss": 0.1608, "step": 3392 }, { "epoch": 0.5497407647440052, "grad_norm": 0.9777064323425293, "learning_rate": 4.6984323773499066e-06, "loss": 0.1537, "step": 3393 }, { "epoch": 0.549902786779002, "grad_norm": 1.0204713344573975, "learning_rate": 4.698224128538517e-06, "loss": 0.1535, "step": 3394 }, { "epoch": 0.5500648088139987, "grad_norm": 0.9061859846115112, "learning_rate": 4.698015812466787e-06, "loss": 0.1429, "step": 3395 }, { "epoch": 0.5502268308489955, "grad_norm": 0.9810669422149658, "learning_rate": 4.6978074291410936e-06, "loss": 0.1544, "step": 3396 }, { "epoch": 0.5503888528839922, "grad_norm": 0.9356314539909363, "learning_rate": 4.697598978567811e-06, "loss": 0.1427, "step": 3397 }, { "epoch": 0.550550874918989, "grad_norm": 1.0116984844207764, "learning_rate": 4.697390460753318e-06, "loss": 0.148, "step": 3398 }, { "epoch": 0.5507128969539857, "grad_norm": 0.9845331907272339, "learning_rate": 4.697181875703995e-06, "loss": 0.1722, "step": 3399 }, { "epoch": 0.5508749189889826, "grad_norm": 1.221174716949463, "learning_rate": 4.696973223426224e-06, "loss": 0.1573, "step": 3400 }, { "epoch": 0.5510369410239793, "grad_norm": 0.8805899620056152, "learning_rate": 4.696764503926387e-06, "loss": 0.137, "step": 3401 }, { "epoch": 0.551198963058976, "grad_norm": 0.8739351630210876, "learning_rate": 4.696555717210873e-06, "loss": 0.125, "step": 3402 }, { "epoch": 0.5513609850939728, "grad_norm": 0.8733137249946594, "learning_rate": 4.696346863286071e-06, "loss": 0.1306, "step": 3403 }, { "epoch": 0.5515230071289695, "grad_norm": 0.967108428478241, "learning_rate": 4.6961379421583685e-06, "loss": 0.1513, "step": 3404 }, { "epoch": 0.5516850291639663, "grad_norm": 0.9212470054626465, "learning_rate": 4.69592895383416e-06, "loss": 0.1431, "step": 3405 }, { "epoch": 0.551847051198963, "grad_norm": 0.9542239904403687, "learning_rate": 4.695719898319839e-06, "loss": 0.149, "step": 3406 }, { "epoch": 0.5520090732339599, "grad_norm": 0.8492709994316101, "learning_rate": 4.6955107756218035e-06, "loss": 0.1331, "step": 3407 }, { "epoch": 0.5521710952689566, "grad_norm": 0.9666154384613037, "learning_rate": 4.695301585746451e-06, "loss": 0.1457, "step": 3408 }, { "epoch": 0.5523331173039533, "grad_norm": 0.9427182674407959, "learning_rate": 4.695092328700182e-06, "loss": 0.1505, "step": 3409 }, { "epoch": 0.5524951393389501, "grad_norm": 0.9889812469482422, "learning_rate": 4.6948830044894016e-06, "loss": 0.1627, "step": 3410 }, { "epoch": 0.5526571613739468, "grad_norm": 1.1829040050506592, "learning_rate": 4.694673613120511e-06, "loss": 0.1849, "step": 3411 }, { "epoch": 0.5528191834089436, "grad_norm": 0.8475874662399292, "learning_rate": 4.6944641545999194e-06, "loss": 0.133, "step": 3412 }, { "epoch": 0.5529812054439404, "grad_norm": 0.9569193720817566, "learning_rate": 4.694254628934035e-06, "loss": 0.1498, "step": 3413 }, { "epoch": 0.5531432274789372, "grad_norm": 1.0036123991012573, "learning_rate": 4.694045036129269e-06, "loss": 0.1409, "step": 3414 }, { "epoch": 0.5533052495139339, "grad_norm": 0.9770514369010925, "learning_rate": 4.6938353761920345e-06, "loss": 0.1484, "step": 3415 }, { "epoch": 0.5534672715489306, "grad_norm": 0.9479352235794067, "learning_rate": 4.693625649128746e-06, "loss": 0.1481, "step": 3416 }, { "epoch": 0.5536292935839274, "grad_norm": 1.000531792640686, "learning_rate": 4.693415854945822e-06, "loss": 0.145, "step": 3417 }, { "epoch": 0.5537913156189241, "grad_norm": 1.1228997707366943, "learning_rate": 4.69320599364968e-06, "loss": 0.1561, "step": 3418 }, { "epoch": 0.553953337653921, "grad_norm": 0.9270069003105164, "learning_rate": 4.692996065246742e-06, "loss": 0.1384, "step": 3419 }, { "epoch": 0.5541153596889177, "grad_norm": 1.013025164604187, "learning_rate": 4.692786069743432e-06, "loss": 0.1832, "step": 3420 }, { "epoch": 0.5542773817239145, "grad_norm": 0.9863829612731934, "learning_rate": 4.692576007146175e-06, "loss": 0.149, "step": 3421 }, { "epoch": 0.5544394037589112, "grad_norm": 0.9370028376579285, "learning_rate": 4.692365877461397e-06, "loss": 0.1392, "step": 3422 }, { "epoch": 0.5546014257939079, "grad_norm": 1.1558516025543213, "learning_rate": 4.692155680695529e-06, "loss": 0.1732, "step": 3423 }, { "epoch": 0.5547634478289047, "grad_norm": 0.8533956408500671, "learning_rate": 4.691945416855002e-06, "loss": 0.1409, "step": 3424 }, { "epoch": 0.5549254698639015, "grad_norm": 1.1222232580184937, "learning_rate": 4.69173508594625e-06, "loss": 0.1489, "step": 3425 }, { "epoch": 0.5550874918988983, "grad_norm": 1.032179594039917, "learning_rate": 4.6915246879757084e-06, "loss": 0.162, "step": 3426 }, { "epoch": 0.555249513933895, "grad_norm": 1.0500115156173706, "learning_rate": 4.691314222949814e-06, "loss": 0.1491, "step": 3427 }, { "epoch": 0.5554115359688918, "grad_norm": 0.944101095199585, "learning_rate": 4.691103690875007e-06, "loss": 0.1565, "step": 3428 }, { "epoch": 0.5555735580038885, "grad_norm": 0.9684193134307861, "learning_rate": 4.690893091757731e-06, "loss": 0.1523, "step": 3429 }, { "epoch": 0.5557355800388852, "grad_norm": 0.9088751077651978, "learning_rate": 4.690682425604427e-06, "loss": 0.1326, "step": 3430 }, { "epoch": 0.5558976020738821, "grad_norm": 0.8529621958732605, "learning_rate": 4.6904716924215425e-06, "loss": 0.1359, "step": 3431 }, { "epoch": 0.5560596241088788, "grad_norm": 1.1265101432800293, "learning_rate": 4.690260892215525e-06, "loss": 0.1779, "step": 3432 }, { "epoch": 0.5562216461438756, "grad_norm": 0.9452961087226868, "learning_rate": 4.690050024992825e-06, "loss": 0.1409, "step": 3433 }, { "epoch": 0.5563836681788723, "grad_norm": 0.9862555265426636, "learning_rate": 4.689839090759893e-06, "loss": 0.1692, "step": 3434 }, { "epoch": 0.5565456902138691, "grad_norm": 0.9753914475440979, "learning_rate": 4.689628089523185e-06, "loss": 0.1589, "step": 3435 }, { "epoch": 0.5567077122488658, "grad_norm": 0.8871638178825378, "learning_rate": 4.689417021289157e-06, "loss": 0.1438, "step": 3436 }, { "epoch": 0.5568697342838627, "grad_norm": 0.890839159488678, "learning_rate": 4.689205886064265e-06, "loss": 0.1378, "step": 3437 }, { "epoch": 0.5570317563188594, "grad_norm": 1.1297328472137451, "learning_rate": 4.68899468385497e-06, "loss": 0.1829, "step": 3438 }, { "epoch": 0.5571937783538561, "grad_norm": 1.0578821897506714, "learning_rate": 4.6887834146677365e-06, "loss": 0.1538, "step": 3439 }, { "epoch": 0.5573558003888529, "grad_norm": 0.9971956014633179, "learning_rate": 4.688572078509027e-06, "loss": 0.1608, "step": 3440 }, { "epoch": 0.5575178224238496, "grad_norm": 1.0291696786880493, "learning_rate": 4.688360675385308e-06, "loss": 0.1569, "step": 3441 }, { "epoch": 0.5576798444588464, "grad_norm": 0.9045324921607971, "learning_rate": 4.688149205303048e-06, "loss": 0.1374, "step": 3442 }, { "epoch": 0.5578418664938432, "grad_norm": 1.0575282573699951, "learning_rate": 4.687937668268718e-06, "loss": 0.1511, "step": 3443 }, { "epoch": 0.55800388852884, "grad_norm": 0.9568415880203247, "learning_rate": 4.687726064288789e-06, "loss": 0.1441, "step": 3444 }, { "epoch": 0.5581659105638367, "grad_norm": 0.9325804710388184, "learning_rate": 4.687514393369738e-06, "loss": 0.1417, "step": 3445 }, { "epoch": 0.5583279325988334, "grad_norm": 1.066925048828125, "learning_rate": 4.6873026555180386e-06, "loss": 0.1646, "step": 3446 }, { "epoch": 0.5584899546338302, "grad_norm": 0.9176453948020935, "learning_rate": 4.687090850740172e-06, "loss": 0.1265, "step": 3447 }, { "epoch": 0.5586519766688269, "grad_norm": 0.9668508768081665, "learning_rate": 4.6868789790426185e-06, "loss": 0.1433, "step": 3448 }, { "epoch": 0.5588139987038238, "grad_norm": 0.8948216438293457, "learning_rate": 4.68666704043186e-06, "loss": 0.1344, "step": 3449 }, { "epoch": 0.5589760207388205, "grad_norm": 0.9884295463562012, "learning_rate": 4.6864550349143815e-06, "loss": 0.1584, "step": 3450 }, { "epoch": 0.5591380427738173, "grad_norm": 0.8515805006027222, "learning_rate": 4.6862429624966695e-06, "loss": 0.144, "step": 3451 }, { "epoch": 0.559300064808814, "grad_norm": 0.9054228663444519, "learning_rate": 4.686030823185215e-06, "loss": 0.1445, "step": 3452 }, { "epoch": 0.5594620868438107, "grad_norm": 1.0499194860458374, "learning_rate": 4.685818616986506e-06, "loss": 0.1536, "step": 3453 }, { "epoch": 0.5596241088788075, "grad_norm": 0.8390993475914001, "learning_rate": 4.685606343907038e-06, "loss": 0.1301, "step": 3454 }, { "epoch": 0.5597861309138042, "grad_norm": 0.996111273765564, "learning_rate": 4.685394003953304e-06, "loss": 0.1515, "step": 3455 }, { "epoch": 0.5599481529488011, "grad_norm": 1.1226462125778198, "learning_rate": 4.685181597131802e-06, "loss": 0.1795, "step": 3456 }, { "epoch": 0.5601101749837978, "grad_norm": 1.1062334775924683, "learning_rate": 4.684969123449032e-06, "loss": 0.1587, "step": 3457 }, { "epoch": 0.5602721970187946, "grad_norm": 0.9502385258674622, "learning_rate": 4.684756582911494e-06, "loss": 0.1506, "step": 3458 }, { "epoch": 0.5604342190537913, "grad_norm": 0.9273852109909058, "learning_rate": 4.684543975525691e-06, "loss": 0.1421, "step": 3459 }, { "epoch": 0.560596241088788, "grad_norm": 1.0246306657791138, "learning_rate": 4.6843313012981295e-06, "loss": 0.1696, "step": 3460 }, { "epoch": 0.5607582631237849, "grad_norm": 0.9162903428077698, "learning_rate": 4.684118560235315e-06, "loss": 0.1468, "step": 3461 }, { "epoch": 0.5609202851587816, "grad_norm": 1.003145456314087, "learning_rate": 4.6839057523437606e-06, "loss": 0.1484, "step": 3462 }, { "epoch": 0.5610823071937784, "grad_norm": 0.9194111824035645, "learning_rate": 4.683692877629973e-06, "loss": 0.1415, "step": 3463 }, { "epoch": 0.5612443292287751, "grad_norm": 1.1066536903381348, "learning_rate": 4.683479936100468e-06, "loss": 0.1734, "step": 3464 }, { "epoch": 0.5614063512637719, "grad_norm": 0.9368934631347656, "learning_rate": 4.683266927761762e-06, "loss": 0.1652, "step": 3465 }, { "epoch": 0.5615683732987686, "grad_norm": 0.9979063868522644, "learning_rate": 4.68305385262037e-06, "loss": 0.1536, "step": 3466 }, { "epoch": 0.5617303953337653, "grad_norm": 0.9077597856521606, "learning_rate": 4.6828407106828135e-06, "loss": 0.1353, "step": 3467 }, { "epoch": 0.5618924173687622, "grad_norm": 1.0348340272903442, "learning_rate": 4.682627501955614e-06, "loss": 0.1543, "step": 3468 }, { "epoch": 0.5620544394037589, "grad_norm": 0.8664389252662659, "learning_rate": 4.6824142264452945e-06, "loss": 0.1277, "step": 3469 }, { "epoch": 0.5622164614387557, "grad_norm": 0.9749402403831482, "learning_rate": 4.682200884158381e-06, "loss": 0.1528, "step": 3470 }, { "epoch": 0.5623784834737524, "grad_norm": 0.9896659255027771, "learning_rate": 4.6819874751014015e-06, "loss": 0.1463, "step": 3471 }, { "epoch": 0.5625405055087492, "grad_norm": 0.9059292674064636, "learning_rate": 4.6817739992808855e-06, "loss": 0.1469, "step": 3472 }, { "epoch": 0.562702527543746, "grad_norm": 0.8704636693000793, "learning_rate": 4.681560456703364e-06, "loss": 0.1475, "step": 3473 }, { "epoch": 0.5628645495787427, "grad_norm": 0.9798029065132141, "learning_rate": 4.681346847375373e-06, "loss": 0.1693, "step": 3474 }, { "epoch": 0.5630265716137395, "grad_norm": 0.971039891242981, "learning_rate": 4.681133171303447e-06, "loss": 0.1549, "step": 3475 }, { "epoch": 0.5631885936487362, "grad_norm": 0.9427266120910645, "learning_rate": 4.6809194284941236e-06, "loss": 0.157, "step": 3476 }, { "epoch": 0.563350615683733, "grad_norm": 1.0199015140533447, "learning_rate": 4.680705618953944e-06, "loss": 0.1631, "step": 3477 }, { "epoch": 0.5635126377187297, "grad_norm": 0.9573736786842346, "learning_rate": 4.6804917426894495e-06, "loss": 0.1644, "step": 3478 }, { "epoch": 0.5636746597537265, "grad_norm": 0.992607831954956, "learning_rate": 4.680277799707185e-06, "loss": 0.1475, "step": 3479 }, { "epoch": 0.5638366817887233, "grad_norm": 0.8607563376426697, "learning_rate": 4.6800637900136944e-06, "loss": 0.1461, "step": 3480 }, { "epoch": 0.56399870382372, "grad_norm": 0.8701785206794739, "learning_rate": 4.6798497136155286e-06, "loss": 0.1307, "step": 3481 }, { "epoch": 0.5641607258587168, "grad_norm": 0.8611844778060913, "learning_rate": 4.679635570519236e-06, "loss": 0.1326, "step": 3482 }, { "epoch": 0.5643227478937135, "grad_norm": 1.2296451330184937, "learning_rate": 4.679421360731371e-06, "loss": 0.1533, "step": 3483 }, { "epoch": 0.5644847699287103, "grad_norm": 0.9519492983818054, "learning_rate": 4.6792070842584855e-06, "loss": 0.1453, "step": 3484 }, { "epoch": 0.564646791963707, "grad_norm": 0.9066160321235657, "learning_rate": 4.678992741107136e-06, "loss": 0.1469, "step": 3485 }, { "epoch": 0.5648088139987039, "grad_norm": 1.1821013689041138, "learning_rate": 4.678778331283883e-06, "loss": 0.1911, "step": 3486 }, { "epoch": 0.5649708360337006, "grad_norm": 0.9712387919425964, "learning_rate": 4.678563854795285e-06, "loss": 0.166, "step": 3487 }, { "epoch": 0.5651328580686974, "grad_norm": 0.964177668094635, "learning_rate": 4.678349311647905e-06, "loss": 0.1541, "step": 3488 }, { "epoch": 0.5652948801036941, "grad_norm": 0.864924967288971, "learning_rate": 4.678134701848308e-06, "loss": 0.1395, "step": 3489 }, { "epoch": 0.5654569021386908, "grad_norm": 0.9088456630706787, "learning_rate": 4.67792002540306e-06, "loss": 0.1444, "step": 3490 }, { "epoch": 0.5656189241736876, "grad_norm": 0.9394357204437256, "learning_rate": 4.67770528231873e-06, "loss": 0.1482, "step": 3491 }, { "epoch": 0.5657809462086844, "grad_norm": 0.8865934014320374, "learning_rate": 4.677490472601888e-06, "loss": 0.1456, "step": 3492 }, { "epoch": 0.5659429682436812, "grad_norm": 1.0510127544403076, "learning_rate": 4.677275596259107e-06, "loss": 0.1593, "step": 3493 }, { "epoch": 0.5661049902786779, "grad_norm": 0.8334269523620605, "learning_rate": 4.677060653296961e-06, "loss": 0.123, "step": 3494 }, { "epoch": 0.5662670123136747, "grad_norm": 1.0426408052444458, "learning_rate": 4.676845643722028e-06, "loss": 0.1603, "step": 3495 }, { "epoch": 0.5664290343486714, "grad_norm": 0.9913350343704224, "learning_rate": 4.676630567540886e-06, "loss": 0.1548, "step": 3496 }, { "epoch": 0.5665910563836681, "grad_norm": 0.9299079775810242, "learning_rate": 4.676415424760115e-06, "loss": 0.1387, "step": 3497 }, { "epoch": 0.566753078418665, "grad_norm": 0.8800061345100403, "learning_rate": 4.6762002153863e-06, "loss": 0.1399, "step": 3498 }, { "epoch": 0.5669151004536617, "grad_norm": 0.827441930770874, "learning_rate": 4.675984939426026e-06, "loss": 0.1308, "step": 3499 }, { "epoch": 0.5670771224886585, "grad_norm": 0.914867639541626, "learning_rate": 4.675769596885877e-06, "loss": 0.1541, "step": 3500 }, { "epoch": 0.5672391445236552, "grad_norm": 0.7989079356193542, "learning_rate": 4.675554187772444e-06, "loss": 0.1272, "step": 3501 }, { "epoch": 0.567401166558652, "grad_norm": 1.044179916381836, "learning_rate": 4.675338712092316e-06, "loss": 0.1792, "step": 3502 }, { "epoch": 0.5675631885936487, "grad_norm": 1.00692617893219, "learning_rate": 4.67512316985209e-06, "loss": 0.1667, "step": 3503 }, { "epoch": 0.5677252106286454, "grad_norm": 0.9069098830223083, "learning_rate": 4.674907561058358e-06, "loss": 0.1426, "step": 3504 }, { "epoch": 0.5678872326636423, "grad_norm": 0.9729463458061218, "learning_rate": 4.674691885717717e-06, "loss": 0.1551, "step": 3505 }, { "epoch": 0.568049254698639, "grad_norm": 0.8880230188369751, "learning_rate": 4.674476143836768e-06, "loss": 0.1346, "step": 3506 }, { "epoch": 0.5682112767336358, "grad_norm": 0.9867011904716492, "learning_rate": 4.6742603354221105e-06, "loss": 0.1409, "step": 3507 }, { "epoch": 0.5683732987686325, "grad_norm": 0.9959049820899963, "learning_rate": 4.674044460480348e-06, "loss": 0.1525, "step": 3508 }, { "epoch": 0.5685353208036293, "grad_norm": 0.9901296496391296, "learning_rate": 4.6738285190180865e-06, "loss": 0.145, "step": 3509 }, { "epoch": 0.568697342838626, "grad_norm": 0.9741671085357666, "learning_rate": 4.673612511041933e-06, "loss": 0.157, "step": 3510 }, { "epoch": 0.5688593648736228, "grad_norm": 0.9612037539482117, "learning_rate": 4.673396436558497e-06, "loss": 0.1535, "step": 3511 }, { "epoch": 0.5690213869086196, "grad_norm": 1.0780646800994873, "learning_rate": 4.673180295574389e-06, "loss": 0.1713, "step": 3512 }, { "epoch": 0.5691834089436163, "grad_norm": 1.152421236038208, "learning_rate": 4.672964088096223e-06, "loss": 0.1358, "step": 3513 }, { "epoch": 0.5693454309786131, "grad_norm": 0.9687821269035339, "learning_rate": 4.672747814130615e-06, "loss": 0.157, "step": 3514 }, { "epoch": 0.5695074530136098, "grad_norm": 1.0094846487045288, "learning_rate": 4.6725314736841806e-06, "loss": 0.1582, "step": 3515 }, { "epoch": 0.5696694750486067, "grad_norm": 1.0530569553375244, "learning_rate": 4.672315066763542e-06, "loss": 0.1764, "step": 3516 }, { "epoch": 0.5698314970836034, "grad_norm": 0.8865777254104614, "learning_rate": 4.672098593375319e-06, "loss": 0.1384, "step": 3517 }, { "epoch": 0.5699935191186001, "grad_norm": 0.9916114211082458, "learning_rate": 4.671882053526135e-06, "loss": 0.1531, "step": 3518 }, { "epoch": 0.5701555411535969, "grad_norm": 0.9261521100997925, "learning_rate": 4.671665447222615e-06, "loss": 0.1444, "step": 3519 }, { "epoch": 0.5703175631885936, "grad_norm": 0.8022376894950867, "learning_rate": 4.671448774471389e-06, "loss": 0.1337, "step": 3520 }, { "epoch": 0.5704795852235904, "grad_norm": 0.8965422511100769, "learning_rate": 4.671232035279085e-06, "loss": 0.136, "step": 3521 }, { "epoch": 0.5706416072585871, "grad_norm": 0.9427691102027893, "learning_rate": 4.671015229652335e-06, "loss": 0.1415, "step": 3522 }, { "epoch": 0.570803629293584, "grad_norm": 0.9803239107131958, "learning_rate": 4.6707983575977724e-06, "loss": 0.1568, "step": 3523 }, { "epoch": 0.5709656513285807, "grad_norm": 1.0181081295013428, "learning_rate": 4.670581419122034e-06, "loss": 0.1559, "step": 3524 }, { "epoch": 0.5711276733635774, "grad_norm": 0.9415149688720703, "learning_rate": 4.670364414231756e-06, "loss": 0.1375, "step": 3525 }, { "epoch": 0.5712896953985742, "grad_norm": 0.9571231007575989, "learning_rate": 4.67014734293358e-06, "loss": 0.1489, "step": 3526 }, { "epoch": 0.5714517174335709, "grad_norm": 0.9342589378356934, "learning_rate": 4.669930205234146e-06, "loss": 0.1547, "step": 3527 }, { "epoch": 0.5716137394685677, "grad_norm": 1.1925525665283203, "learning_rate": 4.6697130011401e-06, "loss": 0.1662, "step": 3528 }, { "epoch": 0.5717757615035645, "grad_norm": 0.8905578851699829, "learning_rate": 4.669495730658086e-06, "loss": 0.1552, "step": 3529 }, { "epoch": 0.5719377835385613, "grad_norm": 0.839648425579071, "learning_rate": 4.669278393794753e-06, "loss": 0.1288, "step": 3530 }, { "epoch": 0.572099805573558, "grad_norm": 0.937215268611908, "learning_rate": 4.669060990556751e-06, "loss": 0.1486, "step": 3531 }, { "epoch": 0.5722618276085548, "grad_norm": 1.0918093919754028, "learning_rate": 4.6688435209507305e-06, "loss": 0.1545, "step": 3532 }, { "epoch": 0.5724238496435515, "grad_norm": 1.0010573863983154, "learning_rate": 4.668625984983347e-06, "loss": 0.1635, "step": 3533 }, { "epoch": 0.5725858716785482, "grad_norm": 0.9082236289978027, "learning_rate": 4.668408382661257e-06, "loss": 0.136, "step": 3534 }, { "epoch": 0.5727478937135451, "grad_norm": 0.9376429319381714, "learning_rate": 4.6681907139911185e-06, "loss": 0.1462, "step": 3535 }, { "epoch": 0.5729099157485418, "grad_norm": 0.9339603185653687, "learning_rate": 4.66797297897959e-06, "loss": 0.1397, "step": 3536 }, { "epoch": 0.5730719377835386, "grad_norm": 0.9145956635475159, "learning_rate": 4.667755177633335e-06, "loss": 0.1423, "step": 3537 }, { "epoch": 0.5732339598185353, "grad_norm": 0.9105459451675415, "learning_rate": 4.667537309959018e-06, "loss": 0.1487, "step": 3538 }, { "epoch": 0.5733959818535321, "grad_norm": 0.9851331114768982, "learning_rate": 4.667319375963304e-06, "loss": 0.15, "step": 3539 }, { "epoch": 0.5735580038885288, "grad_norm": 0.9845294952392578, "learning_rate": 4.667101375652862e-06, "loss": 0.1442, "step": 3540 }, { "epoch": 0.5737200259235256, "grad_norm": 0.8981902599334717, "learning_rate": 4.666883309034362e-06, "loss": 0.143, "step": 3541 }, { "epoch": 0.5738820479585224, "grad_norm": 1.1543735265731812, "learning_rate": 4.666665176114477e-06, "loss": 0.1609, "step": 3542 }, { "epoch": 0.5740440699935191, "grad_norm": 0.9345577359199524, "learning_rate": 4.666446976899881e-06, "loss": 0.1438, "step": 3543 }, { "epoch": 0.5742060920285159, "grad_norm": 0.9611945152282715, "learning_rate": 4.666228711397249e-06, "loss": 0.1577, "step": 3544 }, { "epoch": 0.5743681140635126, "grad_norm": 0.9625419974327087, "learning_rate": 4.6660103796132615e-06, "loss": 0.1464, "step": 3545 }, { "epoch": 0.5745301360985094, "grad_norm": 0.9665799140930176, "learning_rate": 4.665791981554598e-06, "loss": 0.1501, "step": 3546 }, { "epoch": 0.5746921581335062, "grad_norm": 1.2185211181640625, "learning_rate": 4.665573517227942e-06, "loss": 0.1475, "step": 3547 }, { "epoch": 0.5748541801685029, "grad_norm": 0.938981294631958, "learning_rate": 4.665354986639975e-06, "loss": 0.1545, "step": 3548 }, { "epoch": 0.5750162022034997, "grad_norm": 0.9329091310501099, "learning_rate": 4.665136389797387e-06, "loss": 0.1437, "step": 3549 }, { "epoch": 0.5751782242384964, "grad_norm": 1.1116046905517578, "learning_rate": 4.664917726706864e-06, "loss": 0.1516, "step": 3550 }, { "epoch": 0.5753402462734932, "grad_norm": 0.8297967910766602, "learning_rate": 4.664698997375098e-06, "loss": 0.1187, "step": 3551 }, { "epoch": 0.5755022683084899, "grad_norm": 0.9134968519210815, "learning_rate": 4.6644802018087806e-06, "loss": 0.1374, "step": 3552 }, { "epoch": 0.5756642903434868, "grad_norm": 1.091581106185913, "learning_rate": 4.664261340014608e-06, "loss": 0.1626, "step": 3553 }, { "epoch": 0.5758263123784835, "grad_norm": 0.9347933530807495, "learning_rate": 4.664042411999276e-06, "loss": 0.139, "step": 3554 }, { "epoch": 0.5759883344134802, "grad_norm": 1.0377494096755981, "learning_rate": 4.663823417769482e-06, "loss": 0.1703, "step": 3555 }, { "epoch": 0.576150356448477, "grad_norm": 0.9047291874885559, "learning_rate": 4.663604357331928e-06, "loss": 0.14, "step": 3556 }, { "epoch": 0.5763123784834737, "grad_norm": 0.9950608611106873, "learning_rate": 4.663385230693316e-06, "loss": 0.1631, "step": 3557 }, { "epoch": 0.5764744005184705, "grad_norm": 0.8619599938392639, "learning_rate": 4.6631660378603526e-06, "loss": 0.1388, "step": 3558 }, { "epoch": 0.5766364225534673, "grad_norm": 0.9685336947441101, "learning_rate": 4.662946778839742e-06, "loss": 0.1649, "step": 3559 }, { "epoch": 0.5767984445884641, "grad_norm": 0.9717000722885132, "learning_rate": 4.662727453638195e-06, "loss": 0.1489, "step": 3560 }, { "epoch": 0.5769604666234608, "grad_norm": 0.8928928375244141, "learning_rate": 4.662508062262421e-06, "loss": 0.1475, "step": 3561 }, { "epoch": 0.5771224886584575, "grad_norm": 0.8069142699241638, "learning_rate": 4.662288604719134e-06, "loss": 0.125, "step": 3562 }, { "epoch": 0.5772845106934543, "grad_norm": 0.8762704133987427, "learning_rate": 4.662069081015047e-06, "loss": 0.1369, "step": 3563 }, { "epoch": 0.577446532728451, "grad_norm": 0.9283084869384766, "learning_rate": 4.66184949115688e-06, "loss": 0.1376, "step": 3564 }, { "epoch": 0.5776085547634479, "grad_norm": 1.0266906023025513, "learning_rate": 4.66162983515135e-06, "loss": 0.1532, "step": 3565 }, { "epoch": 0.5777705767984446, "grad_norm": 1.1265392303466797, "learning_rate": 4.661410113005177e-06, "loss": 0.1586, "step": 3566 }, { "epoch": 0.5779325988334414, "grad_norm": 0.9027408361434937, "learning_rate": 4.661190324725085e-06, "loss": 0.1427, "step": 3567 }, { "epoch": 0.5780946208684381, "grad_norm": 0.9591796398162842, "learning_rate": 4.6609704703178e-06, "loss": 0.1442, "step": 3568 }, { "epoch": 0.5782566429034348, "grad_norm": 1.0705912113189697, "learning_rate": 4.6607505497900475e-06, "loss": 0.156, "step": 3569 }, { "epoch": 0.5784186649384316, "grad_norm": 1.016605257987976, "learning_rate": 4.660530563148557e-06, "loss": 0.1494, "step": 3570 }, { "epoch": 0.5785806869734283, "grad_norm": 0.9418259263038635, "learning_rate": 4.66031051040006e-06, "loss": 0.144, "step": 3571 }, { "epoch": 0.5787427090084252, "grad_norm": 1.0305404663085938, "learning_rate": 4.66009039155129e-06, "loss": 0.1371, "step": 3572 }, { "epoch": 0.5789047310434219, "grad_norm": 0.9588339328765869, "learning_rate": 4.65987020660898e-06, "loss": 0.1428, "step": 3573 }, { "epoch": 0.5790667530784187, "grad_norm": 0.9257709980010986, "learning_rate": 4.659649955579869e-06, "loss": 0.1238, "step": 3574 }, { "epoch": 0.5792287751134154, "grad_norm": 0.9726823568344116, "learning_rate": 4.659429638470695e-06, "loss": 0.1537, "step": 3575 }, { "epoch": 0.5793907971484121, "grad_norm": 0.880847156047821, "learning_rate": 4.659209255288201e-06, "loss": 0.1404, "step": 3576 }, { "epoch": 0.579552819183409, "grad_norm": 0.9704412817955017, "learning_rate": 4.658988806039129e-06, "loss": 0.1457, "step": 3577 }, { "epoch": 0.5797148412184057, "grad_norm": 0.9277657866477966, "learning_rate": 4.658768290730222e-06, "loss": 0.1439, "step": 3578 }, { "epoch": 0.5798768632534025, "grad_norm": 1.1359479427337646, "learning_rate": 4.658547709368232e-06, "loss": 0.181, "step": 3579 }, { "epoch": 0.5800388852883992, "grad_norm": 1.0015472173690796, "learning_rate": 4.658327061959904e-06, "loss": 0.148, "step": 3580 }, { "epoch": 0.580200907323396, "grad_norm": 0.9326437711715698, "learning_rate": 4.658106348511992e-06, "loss": 0.1476, "step": 3581 }, { "epoch": 0.5803629293583927, "grad_norm": 1.0903030633926392, "learning_rate": 4.6578855690312474e-06, "loss": 0.1691, "step": 3582 }, { "epoch": 0.5805249513933896, "grad_norm": 0.8469444513320923, "learning_rate": 4.657664723524426e-06, "loss": 0.1308, "step": 3583 }, { "epoch": 0.5806869734283863, "grad_norm": 0.9137740135192871, "learning_rate": 4.657443811998287e-06, "loss": 0.1478, "step": 3584 }, { "epoch": 0.580848995463383, "grad_norm": 0.9702494740486145, "learning_rate": 4.657222834459588e-06, "loss": 0.1455, "step": 3585 }, { "epoch": 0.5810110174983798, "grad_norm": 0.9724593162536621, "learning_rate": 4.65700179091509e-06, "loss": 0.1515, "step": 3586 }, { "epoch": 0.5811730395333765, "grad_norm": 0.8013084530830383, "learning_rate": 4.6567806813715575e-06, "loss": 0.1202, "step": 3587 }, { "epoch": 0.5813350615683733, "grad_norm": 0.9419142007827759, "learning_rate": 4.656559505835755e-06, "loss": 0.1389, "step": 3588 }, { "epoch": 0.58149708360337, "grad_norm": 0.9750432372093201, "learning_rate": 4.6563382643144505e-06, "loss": 0.1489, "step": 3589 }, { "epoch": 0.5816591056383669, "grad_norm": 0.8804764151573181, "learning_rate": 4.656116956814414e-06, "loss": 0.1381, "step": 3590 }, { "epoch": 0.5818211276733636, "grad_norm": 0.9245219230651855, "learning_rate": 4.655895583342415e-06, "loss": 0.1461, "step": 3591 }, { "epoch": 0.5819831497083603, "grad_norm": 0.8934546113014221, "learning_rate": 4.655674143905229e-06, "loss": 0.141, "step": 3592 }, { "epoch": 0.5821451717433571, "grad_norm": 0.992603600025177, "learning_rate": 4.655452638509631e-06, "loss": 0.1639, "step": 3593 }, { "epoch": 0.5823071937783538, "grad_norm": 1.0013014078140259, "learning_rate": 4.655231067162398e-06, "loss": 0.1502, "step": 3594 }, { "epoch": 0.5824692158133506, "grad_norm": 0.9464520812034607, "learning_rate": 4.655009429870311e-06, "loss": 0.1467, "step": 3595 }, { "epoch": 0.5826312378483474, "grad_norm": 0.8180822730064392, "learning_rate": 4.65478772664015e-06, "loss": 0.1202, "step": 3596 }, { "epoch": 0.5827932598833442, "grad_norm": 0.7912185788154602, "learning_rate": 4.6545659574786985e-06, "loss": 0.1202, "step": 3597 }, { "epoch": 0.5829552819183409, "grad_norm": 0.8654948472976685, "learning_rate": 4.654344122392742e-06, "loss": 0.1296, "step": 3598 }, { "epoch": 0.5831173039533376, "grad_norm": 1.000624418258667, "learning_rate": 4.65412222138907e-06, "loss": 0.1421, "step": 3599 }, { "epoch": 0.5832793259883344, "grad_norm": 0.9236934185028076, "learning_rate": 4.6539002544744705e-06, "loss": 0.139, "step": 3600 }, { "epoch": 0.5834413480233311, "grad_norm": 0.8793966174125671, "learning_rate": 4.653678221655735e-06, "loss": 0.1356, "step": 3601 }, { "epoch": 0.583603370058328, "grad_norm": 0.9714186787605286, "learning_rate": 4.653456122939659e-06, "loss": 0.132, "step": 3602 }, { "epoch": 0.5837653920933247, "grad_norm": 1.067306399345398, "learning_rate": 4.653233958333036e-06, "loss": 0.159, "step": 3603 }, { "epoch": 0.5839274141283215, "grad_norm": 0.8334851264953613, "learning_rate": 4.653011727842665e-06, "loss": 0.1267, "step": 3604 }, { "epoch": 0.5840894361633182, "grad_norm": 0.8716873526573181, "learning_rate": 4.652789431475346e-06, "loss": 0.1423, "step": 3605 }, { "epoch": 0.5842514581983149, "grad_norm": 0.8517516851425171, "learning_rate": 4.652567069237877e-06, "loss": 0.1191, "step": 3606 }, { "epoch": 0.5844134802333117, "grad_norm": 0.9621455669403076, "learning_rate": 4.652344641137068e-06, "loss": 0.1457, "step": 3607 }, { "epoch": 0.5845755022683085, "grad_norm": 1.0080509185791016, "learning_rate": 4.652122147179721e-06, "loss": 0.1642, "step": 3608 }, { "epoch": 0.5847375243033053, "grad_norm": 0.8625085353851318, "learning_rate": 4.6518995873726434e-06, "loss": 0.1353, "step": 3609 }, { "epoch": 0.584899546338302, "grad_norm": 0.8965188264846802, "learning_rate": 4.651676961722647e-06, "loss": 0.1345, "step": 3610 }, { "epoch": 0.5850615683732988, "grad_norm": 1.0228919982910156, "learning_rate": 4.651454270236541e-06, "loss": 0.1283, "step": 3611 }, { "epoch": 0.5852235904082955, "grad_norm": 1.0728081464767456, "learning_rate": 4.651231512921142e-06, "loss": 0.1795, "step": 3612 }, { "epoch": 0.5853856124432922, "grad_norm": 1.0229250192642212, "learning_rate": 4.651008689783264e-06, "loss": 0.1516, "step": 3613 }, { "epoch": 0.5855476344782891, "grad_norm": 0.8845334649085999, "learning_rate": 4.650785800829726e-06, "loss": 0.1361, "step": 3614 }, { "epoch": 0.5857096565132858, "grad_norm": 0.9456861615180969, "learning_rate": 4.650562846067347e-06, "loss": 0.159, "step": 3615 }, { "epoch": 0.5858716785482826, "grad_norm": 1.1445139646530151, "learning_rate": 4.650339825502949e-06, "loss": 0.163, "step": 3616 }, { "epoch": 0.5860337005832793, "grad_norm": 0.9295792579650879, "learning_rate": 4.650116739143356e-06, "loss": 0.1346, "step": 3617 }, { "epoch": 0.5861957226182761, "grad_norm": 0.9515857696533203, "learning_rate": 4.6498935869953945e-06, "loss": 0.1431, "step": 3618 }, { "epoch": 0.5863577446532728, "grad_norm": 0.8937877416610718, "learning_rate": 4.649670369065891e-06, "loss": 0.1438, "step": 3619 }, { "epoch": 0.5865197666882696, "grad_norm": 0.9099476337432861, "learning_rate": 4.649447085361677e-06, "loss": 0.1447, "step": 3620 }, { "epoch": 0.5866817887232664, "grad_norm": 0.973203182220459, "learning_rate": 4.649223735889583e-06, "loss": 0.1532, "step": 3621 }, { "epoch": 0.5868438107582631, "grad_norm": 0.8838689923286438, "learning_rate": 4.649000320656445e-06, "loss": 0.1507, "step": 3622 }, { "epoch": 0.5870058327932599, "grad_norm": 0.8679401874542236, "learning_rate": 4.6487768396690965e-06, "loss": 0.1281, "step": 3623 }, { "epoch": 0.5871678548282566, "grad_norm": 0.9477259516716003, "learning_rate": 4.648553292934377e-06, "loss": 0.1491, "step": 3624 }, { "epoch": 0.5873298768632534, "grad_norm": 0.923620879650116, "learning_rate": 4.648329680459127e-06, "loss": 0.1535, "step": 3625 }, { "epoch": 0.5874918988982502, "grad_norm": 0.9671549797058105, "learning_rate": 4.648106002250186e-06, "loss": 0.1598, "step": 3626 }, { "epoch": 0.587653920933247, "grad_norm": 0.845306396484375, "learning_rate": 4.6478822583144015e-06, "loss": 0.1349, "step": 3627 }, { "epoch": 0.5878159429682437, "grad_norm": 1.0081162452697754, "learning_rate": 4.647658448658616e-06, "loss": 0.1665, "step": 3628 }, { "epoch": 0.5879779650032404, "grad_norm": 0.9138731360435486, "learning_rate": 4.64743457328968e-06, "loss": 0.1496, "step": 3629 }, { "epoch": 0.5881399870382372, "grad_norm": 0.9138461351394653, "learning_rate": 4.647210632214443e-06, "loss": 0.1446, "step": 3630 }, { "epoch": 0.5883020090732339, "grad_norm": 0.8777061104774475, "learning_rate": 4.6469866254397564e-06, "loss": 0.1461, "step": 3631 }, { "epoch": 0.5884640311082308, "grad_norm": 0.984879732131958, "learning_rate": 4.646762552972475e-06, "loss": 0.1488, "step": 3632 }, { "epoch": 0.5886260531432275, "grad_norm": 0.9393163919448853, "learning_rate": 4.646538414819454e-06, "loss": 0.1398, "step": 3633 }, { "epoch": 0.5887880751782243, "grad_norm": 0.8989644646644592, "learning_rate": 4.646314210987552e-06, "loss": 0.1317, "step": 3634 }, { "epoch": 0.588950097213221, "grad_norm": 1.0422638654708862, "learning_rate": 4.646089941483629e-06, "loss": 0.1567, "step": 3635 }, { "epoch": 0.5891121192482177, "grad_norm": 0.9822480082511902, "learning_rate": 4.645865606314548e-06, "loss": 0.1557, "step": 3636 }, { "epoch": 0.5892741412832145, "grad_norm": 1.0314334630966187, "learning_rate": 4.645641205487172e-06, "loss": 0.1502, "step": 3637 }, { "epoch": 0.5894361633182112, "grad_norm": 1.0244547128677368, "learning_rate": 4.645416739008367e-06, "loss": 0.159, "step": 3638 }, { "epoch": 0.5895981853532081, "grad_norm": 1.0546983480453491, "learning_rate": 4.645192206885003e-06, "loss": 0.1466, "step": 3639 }, { "epoch": 0.5897602073882048, "grad_norm": 0.9143233895301819, "learning_rate": 4.644967609123947e-06, "loss": 0.1341, "step": 3640 }, { "epoch": 0.5899222294232016, "grad_norm": 0.9980403184890747, "learning_rate": 4.644742945732074e-06, "loss": 0.1516, "step": 3641 }, { "epoch": 0.5900842514581983, "grad_norm": 0.9705789685249329, "learning_rate": 4.644518216716256e-06, "loss": 0.1653, "step": 3642 }, { "epoch": 0.590246273493195, "grad_norm": 0.951261579990387, "learning_rate": 4.6442934220833716e-06, "loss": 0.1499, "step": 3643 }, { "epoch": 0.5904082955281919, "grad_norm": 0.9710079431533813, "learning_rate": 4.644068561840297e-06, "loss": 0.1623, "step": 3644 }, { "epoch": 0.5905703175631886, "grad_norm": 0.9166867733001709, "learning_rate": 4.643843635993913e-06, "loss": 0.1437, "step": 3645 }, { "epoch": 0.5907323395981854, "grad_norm": 0.9674573540687561, "learning_rate": 4.643618644551101e-06, "loss": 0.1575, "step": 3646 }, { "epoch": 0.5908943616331821, "grad_norm": 0.9722270369529724, "learning_rate": 4.643393587518747e-06, "loss": 0.1585, "step": 3647 }, { "epoch": 0.5910563836681789, "grad_norm": 0.8995879888534546, "learning_rate": 4.643168464903736e-06, "loss": 0.1396, "step": 3648 }, { "epoch": 0.5912184057031756, "grad_norm": 0.9309381246566772, "learning_rate": 4.642943276712956e-06, "loss": 0.1465, "step": 3649 }, { "epoch": 0.5913804277381723, "grad_norm": 0.9563421010971069, "learning_rate": 4.642718022953297e-06, "loss": 0.1389, "step": 3650 }, { "epoch": 0.5915424497731692, "grad_norm": 0.9059107303619385, "learning_rate": 4.642492703631652e-06, "loss": 0.1399, "step": 3651 }, { "epoch": 0.5917044718081659, "grad_norm": 1.094211220741272, "learning_rate": 4.642267318754915e-06, "loss": 0.1802, "step": 3652 }, { "epoch": 0.5918664938431627, "grad_norm": 0.952056348323822, "learning_rate": 4.6420418683299825e-06, "loss": 0.1658, "step": 3653 }, { "epoch": 0.5920285158781594, "grad_norm": 0.9111037254333496, "learning_rate": 4.641816352363753e-06, "loss": 0.1377, "step": 3654 }, { "epoch": 0.5921905379131562, "grad_norm": 0.8917416334152222, "learning_rate": 4.641590770863126e-06, "loss": 0.1374, "step": 3655 }, { "epoch": 0.592352559948153, "grad_norm": 0.9995194673538208, "learning_rate": 4.641365123835004e-06, "loss": 0.1524, "step": 3656 }, { "epoch": 0.5925145819831497, "grad_norm": 0.972366213798523, "learning_rate": 4.641139411286291e-06, "loss": 0.1565, "step": 3657 }, { "epoch": 0.5926766040181465, "grad_norm": 1.033241868019104, "learning_rate": 4.640913633223893e-06, "loss": 0.1777, "step": 3658 }, { "epoch": 0.5928386260531432, "grad_norm": 0.9050754904747009, "learning_rate": 4.640687789654719e-06, "loss": 0.1473, "step": 3659 }, { "epoch": 0.59300064808814, "grad_norm": 0.962814211845398, "learning_rate": 4.64046188058568e-06, "loss": 0.1508, "step": 3660 }, { "epoch": 0.5931626701231367, "grad_norm": 1.0121870040893555, "learning_rate": 4.640235906023686e-06, "loss": 0.1686, "step": 3661 }, { "epoch": 0.5933246921581335, "grad_norm": 0.9426042437553406, "learning_rate": 4.6400098659756525e-06, "loss": 0.1478, "step": 3662 }, { "epoch": 0.5934867141931303, "grad_norm": 1.017554759979248, "learning_rate": 4.639783760448497e-06, "loss": 0.1571, "step": 3663 }, { "epoch": 0.593648736228127, "grad_norm": 0.9146802425384521, "learning_rate": 4.639557589449135e-06, "loss": 0.1372, "step": 3664 }, { "epoch": 0.5938107582631238, "grad_norm": 0.938179612159729, "learning_rate": 4.6393313529844895e-06, "loss": 0.1441, "step": 3665 }, { "epoch": 0.5939727802981205, "grad_norm": 0.8877557516098022, "learning_rate": 4.639105051061481e-06, "loss": 0.1409, "step": 3666 }, { "epoch": 0.5941348023331173, "grad_norm": 0.9249974489212036, "learning_rate": 4.638878683687036e-06, "loss": 0.1561, "step": 3667 }, { "epoch": 0.594296824368114, "grad_norm": 0.906252384185791, "learning_rate": 4.638652250868078e-06, "loss": 0.1418, "step": 3668 }, { "epoch": 0.5944588464031109, "grad_norm": 0.9417980909347534, "learning_rate": 4.638425752611536e-06, "loss": 0.1536, "step": 3669 }, { "epoch": 0.5946208684381076, "grad_norm": 0.8455900549888611, "learning_rate": 4.6381991889243416e-06, "loss": 0.1347, "step": 3670 }, { "epoch": 0.5947828904731044, "grad_norm": 1.0007245540618896, "learning_rate": 4.6379725598134265e-06, "loss": 0.1662, "step": 3671 }, { "epoch": 0.5949449125081011, "grad_norm": 0.8600866198539734, "learning_rate": 4.637745865285725e-06, "loss": 0.1389, "step": 3672 }, { "epoch": 0.5951069345430978, "grad_norm": 0.9751458764076233, "learning_rate": 4.637519105348173e-06, "loss": 0.154, "step": 3673 }, { "epoch": 0.5952689565780946, "grad_norm": 0.9357625842094421, "learning_rate": 4.637292280007709e-06, "loss": 0.1592, "step": 3674 }, { "epoch": 0.5954309786130914, "grad_norm": 0.8890631794929504, "learning_rate": 4.637065389271274e-06, "loss": 0.137, "step": 3675 }, { "epoch": 0.5955930006480882, "grad_norm": 0.8981730341911316, "learning_rate": 4.6368384331458085e-06, "loss": 0.1411, "step": 3676 }, { "epoch": 0.5957550226830849, "grad_norm": 0.987389862537384, "learning_rate": 4.636611411638259e-06, "loss": 0.1455, "step": 3677 }, { "epoch": 0.5959170447180817, "grad_norm": 1.0186084508895874, "learning_rate": 4.63638432475557e-06, "loss": 0.1597, "step": 3678 }, { "epoch": 0.5960790667530784, "grad_norm": 1.0408918857574463, "learning_rate": 4.636157172504692e-06, "loss": 0.1647, "step": 3679 }, { "epoch": 0.5962410887880751, "grad_norm": 0.9388018250465393, "learning_rate": 4.635929954892572e-06, "loss": 0.1402, "step": 3680 }, { "epoch": 0.596403110823072, "grad_norm": 0.8321965932846069, "learning_rate": 4.635702671926166e-06, "loss": 0.1253, "step": 3681 }, { "epoch": 0.5965651328580687, "grad_norm": 0.8523208498954773, "learning_rate": 4.6354753236124254e-06, "loss": 0.1384, "step": 3682 }, { "epoch": 0.5967271548930655, "grad_norm": 1.0064256191253662, "learning_rate": 4.635247909958308e-06, "loss": 0.1559, "step": 3683 }, { "epoch": 0.5968891769280622, "grad_norm": 0.8501699566841125, "learning_rate": 4.635020430970771e-06, "loss": 0.1278, "step": 3684 }, { "epoch": 0.597051198963059, "grad_norm": 0.8985577821731567, "learning_rate": 4.634792886656777e-06, "loss": 0.1393, "step": 3685 }, { "epoch": 0.5972132209980557, "grad_norm": 1.0347974300384521, "learning_rate": 4.6345652770232856e-06, "loss": 0.1612, "step": 3686 }, { "epoch": 0.5973752430330524, "grad_norm": 0.8934628367424011, "learning_rate": 4.634337602077263e-06, "loss": 0.1367, "step": 3687 }, { "epoch": 0.5975372650680493, "grad_norm": 1.1091103553771973, "learning_rate": 4.6341098618256745e-06, "loss": 0.16, "step": 3688 }, { "epoch": 0.597699287103046, "grad_norm": 1.0096241235733032, "learning_rate": 4.633882056275488e-06, "loss": 0.1496, "step": 3689 }, { "epoch": 0.5978613091380428, "grad_norm": 0.793545126914978, "learning_rate": 4.633654185433676e-06, "loss": 0.1135, "step": 3690 }, { "epoch": 0.5980233311730395, "grad_norm": 0.9871333241462708, "learning_rate": 4.633426249307208e-06, "loss": 0.1621, "step": 3691 }, { "epoch": 0.5981853532080363, "grad_norm": 0.9502182602882385, "learning_rate": 4.63319824790306e-06, "loss": 0.1588, "step": 3692 }, { "epoch": 0.598347375243033, "grad_norm": 0.9651210904121399, "learning_rate": 4.632970181228208e-06, "loss": 0.1581, "step": 3693 }, { "epoch": 0.5985093972780298, "grad_norm": 0.91889488697052, "learning_rate": 4.6327420492896295e-06, "loss": 0.1405, "step": 3694 }, { "epoch": 0.5986714193130266, "grad_norm": 0.9224652051925659, "learning_rate": 4.632513852094306e-06, "loss": 0.1439, "step": 3695 }, { "epoch": 0.5988334413480233, "grad_norm": 1.0088536739349365, "learning_rate": 4.632285589649219e-06, "loss": 0.1434, "step": 3696 }, { "epoch": 0.5989954633830201, "grad_norm": 0.9861193299293518, "learning_rate": 4.632057261961353e-06, "loss": 0.1488, "step": 3697 }, { "epoch": 0.5991574854180168, "grad_norm": 0.9628931879997253, "learning_rate": 4.631828869037694e-06, "loss": 0.1572, "step": 3698 }, { "epoch": 0.5993195074530137, "grad_norm": 0.9910879135131836, "learning_rate": 4.631600410885231e-06, "loss": 0.1529, "step": 3699 }, { "epoch": 0.5994815294880104, "grad_norm": 0.876059889793396, "learning_rate": 4.631371887510954e-06, "loss": 0.1401, "step": 3700 }, { "epoch": 0.5996435515230071, "grad_norm": 0.9525460004806519, "learning_rate": 4.6311432989218545e-06, "loss": 0.1607, "step": 3701 }, { "epoch": 0.5998055735580039, "grad_norm": 0.8962662816047668, "learning_rate": 4.630914645124928e-06, "loss": 0.155, "step": 3702 }, { "epoch": 0.5999675955930006, "grad_norm": 0.852394163608551, "learning_rate": 4.630685926127169e-06, "loss": 0.1339, "step": 3703 }, { "epoch": 0.6001296176279974, "grad_norm": 1.0351753234863281, "learning_rate": 4.630457141935577e-06, "loss": 0.1648, "step": 3704 }, { "epoch": 0.6002916396629941, "grad_norm": 0.8926696181297302, "learning_rate": 4.630228292557153e-06, "loss": 0.1472, "step": 3705 }, { "epoch": 0.600453661697991, "grad_norm": 0.8851702213287354, "learning_rate": 4.629999377998898e-06, "loss": 0.1355, "step": 3706 }, { "epoch": 0.6006156837329877, "grad_norm": 0.9555678367614746, "learning_rate": 4.629770398267815e-06, "loss": 0.1617, "step": 3707 }, { "epoch": 0.6007777057679844, "grad_norm": 0.979743242263794, "learning_rate": 4.629541353370914e-06, "loss": 0.1756, "step": 3708 }, { "epoch": 0.6009397278029812, "grad_norm": 0.8310829997062683, "learning_rate": 4.6293122433152e-06, "loss": 0.127, "step": 3709 }, { "epoch": 0.6011017498379779, "grad_norm": 0.9136250615119934, "learning_rate": 4.629083068107684e-06, "loss": 0.1452, "step": 3710 }, { "epoch": 0.6012637718729748, "grad_norm": 0.915210485458374, "learning_rate": 4.628853827755378e-06, "loss": 0.1557, "step": 3711 }, { "epoch": 0.6014257939079715, "grad_norm": 0.902146577835083, "learning_rate": 4.628624522265298e-06, "loss": 0.1351, "step": 3712 }, { "epoch": 0.6015878159429683, "grad_norm": 0.8265898823738098, "learning_rate": 4.628395151644458e-06, "loss": 0.1417, "step": 3713 }, { "epoch": 0.601749837977965, "grad_norm": 1.038116693496704, "learning_rate": 4.628165715899877e-06, "loss": 0.1567, "step": 3714 }, { "epoch": 0.6019118600129617, "grad_norm": 1.0465978384017944, "learning_rate": 4.6279362150385755e-06, "loss": 0.1566, "step": 3715 }, { "epoch": 0.6020738820479585, "grad_norm": 0.9295293688774109, "learning_rate": 4.627706649067575e-06, "loss": 0.1515, "step": 3716 }, { "epoch": 0.6022359040829552, "grad_norm": 0.8935041427612305, "learning_rate": 4.6274770179939e-06, "loss": 0.1467, "step": 3717 }, { "epoch": 0.6023979261179521, "grad_norm": 1.1107021570205688, "learning_rate": 4.627247321824576e-06, "loss": 0.1565, "step": 3718 }, { "epoch": 0.6025599481529488, "grad_norm": 0.9548209309577942, "learning_rate": 4.627017560566633e-06, "loss": 0.1464, "step": 3719 }, { "epoch": 0.6027219701879456, "grad_norm": 0.8951996564865112, "learning_rate": 4.6267877342271e-06, "loss": 0.1415, "step": 3720 }, { "epoch": 0.6028839922229423, "grad_norm": 0.8305820226669312, "learning_rate": 4.626557842813008e-06, "loss": 0.1319, "step": 3721 }, { "epoch": 0.6030460142579391, "grad_norm": 0.8860799074172974, "learning_rate": 4.626327886331392e-06, "loss": 0.1335, "step": 3722 }, { "epoch": 0.6032080362929358, "grad_norm": 0.9841209053993225, "learning_rate": 4.626097864789289e-06, "loss": 0.1727, "step": 3723 }, { "epoch": 0.6033700583279326, "grad_norm": 0.8832147717475891, "learning_rate": 4.625867778193737e-06, "loss": 0.1418, "step": 3724 }, { "epoch": 0.6035320803629294, "grad_norm": 0.9186353087425232, "learning_rate": 4.625637626551774e-06, "loss": 0.1462, "step": 3725 }, { "epoch": 0.6036941023979261, "grad_norm": 0.8324099779129028, "learning_rate": 4.625407409870444e-06, "loss": 0.1441, "step": 3726 }, { "epoch": 0.6038561244329229, "grad_norm": 0.7835507988929749, "learning_rate": 4.625177128156791e-06, "loss": 0.1322, "step": 3727 }, { "epoch": 0.6040181464679196, "grad_norm": 0.9128600358963013, "learning_rate": 4.624946781417861e-06, "loss": 0.1451, "step": 3728 }, { "epoch": 0.6041801685029164, "grad_norm": 0.7894352078437805, "learning_rate": 4.624716369660701e-06, "loss": 0.1217, "step": 3729 }, { "epoch": 0.6043421905379132, "grad_norm": 0.8810085654258728, "learning_rate": 4.624485892892363e-06, "loss": 0.1463, "step": 3730 }, { "epoch": 0.6045042125729099, "grad_norm": 1.0495206117630005, "learning_rate": 4.624255351119897e-06, "loss": 0.1634, "step": 3731 }, { "epoch": 0.6046662346079067, "grad_norm": 1.0064868927001953, "learning_rate": 4.624024744350358e-06, "loss": 0.1705, "step": 3732 }, { "epoch": 0.6048282566429034, "grad_norm": 0.985504150390625, "learning_rate": 4.6237940725908014e-06, "loss": 0.1366, "step": 3733 }, { "epoch": 0.6049902786779002, "grad_norm": 0.9360939264297485, "learning_rate": 4.623563335848286e-06, "loss": 0.1567, "step": 3734 }, { "epoch": 0.6051523007128969, "grad_norm": 0.9163661003112793, "learning_rate": 4.623332534129872e-06, "loss": 0.1453, "step": 3735 }, { "epoch": 0.6053143227478938, "grad_norm": 1.0676296949386597, "learning_rate": 4.62310166744262e-06, "loss": 0.1633, "step": 3736 }, { "epoch": 0.6054763447828905, "grad_norm": 0.9336465001106262, "learning_rate": 4.622870735793595e-06, "loss": 0.1483, "step": 3737 }, { "epoch": 0.6056383668178872, "grad_norm": 0.8538773655891418, "learning_rate": 4.622639739189863e-06, "loss": 0.128, "step": 3738 }, { "epoch": 0.605800388852884, "grad_norm": 0.9462392926216125, "learning_rate": 4.622408677638491e-06, "loss": 0.1465, "step": 3739 }, { "epoch": 0.6059624108878807, "grad_norm": 0.965332567691803, "learning_rate": 4.62217755114655e-06, "loss": 0.1309, "step": 3740 }, { "epoch": 0.6061244329228775, "grad_norm": 0.9652532339096069, "learning_rate": 4.621946359721112e-06, "loss": 0.1488, "step": 3741 }, { "epoch": 0.6062864549578743, "grad_norm": 0.9327961802482605, "learning_rate": 4.62171510336925e-06, "loss": 0.1618, "step": 3742 }, { "epoch": 0.6064484769928711, "grad_norm": 0.9430671334266663, "learning_rate": 4.621483782098041e-06, "loss": 0.1497, "step": 3743 }, { "epoch": 0.6066104990278678, "grad_norm": 1.0052106380462646, "learning_rate": 4.621252395914561e-06, "loss": 0.1474, "step": 3744 }, { "epoch": 0.6067725210628645, "grad_norm": 0.8685867786407471, "learning_rate": 4.621020944825891e-06, "loss": 0.1379, "step": 3745 }, { "epoch": 0.6069345430978613, "grad_norm": 0.9735004305839539, "learning_rate": 4.620789428839114e-06, "loss": 0.1687, "step": 3746 }, { "epoch": 0.607096565132858, "grad_norm": 0.8425636291503906, "learning_rate": 4.6205578479613125e-06, "loss": 0.1331, "step": 3747 }, { "epoch": 0.6072585871678549, "grad_norm": 0.9645400047302246, "learning_rate": 4.620326202199572e-06, "loss": 0.1448, "step": 3748 }, { "epoch": 0.6074206092028516, "grad_norm": 0.8548397421836853, "learning_rate": 4.62009449156098e-06, "loss": 0.139, "step": 3749 }, { "epoch": 0.6075826312378484, "grad_norm": 0.92023104429245, "learning_rate": 4.619862716052629e-06, "loss": 0.1418, "step": 3750 }, { "epoch": 0.6077446532728451, "grad_norm": 1.001118540763855, "learning_rate": 4.6196308756816075e-06, "loss": 0.1444, "step": 3751 }, { "epoch": 0.6079066753078418, "grad_norm": 1.030665397644043, "learning_rate": 4.6193989704550105e-06, "loss": 0.1619, "step": 3752 }, { "epoch": 0.6080686973428386, "grad_norm": 0.9593086242675781, "learning_rate": 4.619167000379934e-06, "loss": 0.1527, "step": 3753 }, { "epoch": 0.6082307193778353, "grad_norm": 0.9325686097145081, "learning_rate": 4.6189349654634766e-06, "loss": 0.1493, "step": 3754 }, { "epoch": 0.6083927414128322, "grad_norm": 1.058605670928955, "learning_rate": 4.618702865712736e-06, "loss": 0.1687, "step": 3755 }, { "epoch": 0.6085547634478289, "grad_norm": 0.9116065502166748, "learning_rate": 4.618470701134815e-06, "loss": 0.1231, "step": 3756 }, { "epoch": 0.6087167854828257, "grad_norm": 1.0380024909973145, "learning_rate": 4.6182384717368174e-06, "loss": 0.1602, "step": 3757 }, { "epoch": 0.6088788075178224, "grad_norm": 1.04635488986969, "learning_rate": 4.618006177525849e-06, "loss": 0.1619, "step": 3758 }, { "epoch": 0.6090408295528191, "grad_norm": 0.9052918553352356, "learning_rate": 4.617773818509016e-06, "loss": 0.1408, "step": 3759 }, { "epoch": 0.609202851587816, "grad_norm": 0.8943411111831665, "learning_rate": 4.61754139469343e-06, "loss": 0.1294, "step": 3760 }, { "epoch": 0.6093648736228127, "grad_norm": 0.9428607225418091, "learning_rate": 4.617308906086201e-06, "loss": 0.1561, "step": 3761 }, { "epoch": 0.6095268956578095, "grad_norm": 0.9149422645568848, "learning_rate": 4.6170763526944425e-06, "loss": 0.1324, "step": 3762 }, { "epoch": 0.6096889176928062, "grad_norm": 0.9159535765647888, "learning_rate": 4.616843734525272e-06, "loss": 0.1459, "step": 3763 }, { "epoch": 0.609850939727803, "grad_norm": 0.915641188621521, "learning_rate": 4.616611051585806e-06, "loss": 0.1408, "step": 3764 }, { "epoch": 0.6100129617627997, "grad_norm": 0.9071922898292542, "learning_rate": 4.616378303883163e-06, "loss": 0.145, "step": 3765 }, { "epoch": 0.6101749837977966, "grad_norm": 0.8846275806427002, "learning_rate": 4.6161454914244665e-06, "loss": 0.1196, "step": 3766 }, { "epoch": 0.6103370058327933, "grad_norm": 0.8655915856361389, "learning_rate": 4.615912614216838e-06, "loss": 0.1349, "step": 3767 }, { "epoch": 0.61049902786779, "grad_norm": 0.8834684491157532, "learning_rate": 4.615679672267405e-06, "loss": 0.144, "step": 3768 }, { "epoch": 0.6106610499027868, "grad_norm": 0.9629760384559631, "learning_rate": 4.615446665583293e-06, "loss": 0.149, "step": 3769 }, { "epoch": 0.6108230719377835, "grad_norm": 0.8233225345611572, "learning_rate": 4.615213594171633e-06, "loss": 0.1256, "step": 3770 }, { "epoch": 0.6109850939727803, "grad_norm": 0.8711490631103516, "learning_rate": 4.6149804580395555e-06, "loss": 0.1413, "step": 3771 }, { "epoch": 0.611147116007777, "grad_norm": 0.8631579875946045, "learning_rate": 4.614747257194194e-06, "loss": 0.1279, "step": 3772 }, { "epoch": 0.6113091380427739, "grad_norm": 1.0351872444152832, "learning_rate": 4.614513991642684e-06, "loss": 0.1701, "step": 3773 }, { "epoch": 0.6114711600777706, "grad_norm": 0.8989577293395996, "learning_rate": 4.614280661392163e-06, "loss": 0.1516, "step": 3774 }, { "epoch": 0.6116331821127673, "grad_norm": 0.9219076037406921, "learning_rate": 4.61404726644977e-06, "loss": 0.1482, "step": 3775 }, { "epoch": 0.6117952041477641, "grad_norm": 1.016165852546692, "learning_rate": 4.613813806822647e-06, "loss": 0.1632, "step": 3776 }, { "epoch": 0.6119572261827608, "grad_norm": 0.8687697649002075, "learning_rate": 4.613580282517936e-06, "loss": 0.1438, "step": 3777 }, { "epoch": 0.6121192482177576, "grad_norm": 0.941464900970459, "learning_rate": 4.613346693542784e-06, "loss": 0.1477, "step": 3778 }, { "epoch": 0.6122812702527544, "grad_norm": 0.8832119703292847, "learning_rate": 4.613113039904337e-06, "loss": 0.1303, "step": 3779 }, { "epoch": 0.6124432922877512, "grad_norm": 0.837990403175354, "learning_rate": 4.6128793216097445e-06, "loss": 0.1366, "step": 3780 }, { "epoch": 0.6126053143227479, "grad_norm": 0.8604587912559509, "learning_rate": 4.612645538666157e-06, "loss": 0.1297, "step": 3781 }, { "epoch": 0.6127673363577446, "grad_norm": 0.9468358755111694, "learning_rate": 4.61241169108073e-06, "loss": 0.1561, "step": 3782 }, { "epoch": 0.6129293583927414, "grad_norm": 0.8678791522979736, "learning_rate": 4.612177778860617e-06, "loss": 0.1352, "step": 3783 }, { "epoch": 0.6130913804277381, "grad_norm": 0.8907611966133118, "learning_rate": 4.611943802012975e-06, "loss": 0.1373, "step": 3784 }, { "epoch": 0.613253402462735, "grad_norm": 0.8536321520805359, "learning_rate": 4.611709760544963e-06, "loss": 0.1302, "step": 3785 }, { "epoch": 0.6134154244977317, "grad_norm": 0.8262412548065186, "learning_rate": 4.611475654463743e-06, "loss": 0.1426, "step": 3786 }, { "epoch": 0.6135774465327285, "grad_norm": 0.9103348255157471, "learning_rate": 4.611241483776478e-06, "loss": 0.1445, "step": 3787 }, { "epoch": 0.6137394685677252, "grad_norm": 0.9351794123649597, "learning_rate": 4.6110072484903326e-06, "loss": 0.1353, "step": 3788 }, { "epoch": 0.6139014906027219, "grad_norm": 1.0249840021133423, "learning_rate": 4.610772948612473e-06, "loss": 0.1455, "step": 3789 }, { "epoch": 0.6140635126377187, "grad_norm": 0.9844290018081665, "learning_rate": 4.610538584150071e-06, "loss": 0.1643, "step": 3790 }, { "epoch": 0.6142255346727155, "grad_norm": 0.9268943667411804, "learning_rate": 4.6103041551102935e-06, "loss": 0.1388, "step": 3791 }, { "epoch": 0.6143875567077123, "grad_norm": 0.9610852599143982, "learning_rate": 4.610069661500317e-06, "loss": 0.1476, "step": 3792 }, { "epoch": 0.614549578742709, "grad_norm": 0.9968616366386414, "learning_rate": 4.609835103327315e-06, "loss": 0.1548, "step": 3793 }, { "epoch": 0.6147116007777058, "grad_norm": 0.9769883155822754, "learning_rate": 4.609600480598464e-06, "loss": 0.1619, "step": 3794 }, { "epoch": 0.6148736228127025, "grad_norm": 0.9648511409759521, "learning_rate": 4.609365793320944e-06, "loss": 0.1464, "step": 3795 }, { "epoch": 0.6150356448476992, "grad_norm": 0.9245080351829529, "learning_rate": 4.6091310415019355e-06, "loss": 0.1612, "step": 3796 }, { "epoch": 0.6151976668826961, "grad_norm": 0.8954278230667114, "learning_rate": 4.608896225148621e-06, "loss": 0.1421, "step": 3797 }, { "epoch": 0.6153596889176928, "grad_norm": 1.078444004058838, "learning_rate": 4.608661344268185e-06, "loss": 0.1863, "step": 3798 }, { "epoch": 0.6155217109526896, "grad_norm": 0.7994515299797058, "learning_rate": 4.608426398867815e-06, "loss": 0.1162, "step": 3799 }, { "epoch": 0.6156837329876863, "grad_norm": 0.8549524545669556, "learning_rate": 4.608191388954699e-06, "loss": 0.1416, "step": 3800 }, { "epoch": 0.6158457550226831, "grad_norm": 0.8880654573440552, "learning_rate": 4.607956314536029e-06, "loss": 0.1308, "step": 3801 }, { "epoch": 0.6160077770576798, "grad_norm": 0.9206951856613159, "learning_rate": 4.607721175618997e-06, "loss": 0.1543, "step": 3802 }, { "epoch": 0.6161697990926766, "grad_norm": 1.0166141986846924, "learning_rate": 4.607485972210797e-06, "loss": 0.1533, "step": 3803 }, { "epoch": 0.6163318211276734, "grad_norm": 0.9442442655563354, "learning_rate": 4.6072507043186265e-06, "loss": 0.1442, "step": 3804 }, { "epoch": 0.6164938431626701, "grad_norm": 1.1184486150741577, "learning_rate": 4.607015371949683e-06, "loss": 0.158, "step": 3805 }, { "epoch": 0.6166558651976669, "grad_norm": 0.9163842797279358, "learning_rate": 4.60677997511117e-06, "loss": 0.142, "step": 3806 }, { "epoch": 0.6168178872326636, "grad_norm": 0.8781442642211914, "learning_rate": 4.606544513810287e-06, "loss": 0.1387, "step": 3807 }, { "epoch": 0.6169799092676604, "grad_norm": 0.9204709529876709, "learning_rate": 4.606308988054239e-06, "loss": 0.1549, "step": 3808 }, { "epoch": 0.6171419313026572, "grad_norm": 0.9622353315353394, "learning_rate": 4.606073397850234e-06, "loss": 0.1534, "step": 3809 }, { "epoch": 0.6173039533376539, "grad_norm": 1.0953677892684937, "learning_rate": 4.605837743205479e-06, "loss": 0.144, "step": 3810 }, { "epoch": 0.6174659753726507, "grad_norm": 0.8764045834541321, "learning_rate": 4.6056020241271855e-06, "loss": 0.1359, "step": 3811 }, { "epoch": 0.6176279974076474, "grad_norm": 0.9213500022888184, "learning_rate": 4.605366240622565e-06, "loss": 0.1521, "step": 3812 }, { "epoch": 0.6177900194426442, "grad_norm": 0.9212419986724854, "learning_rate": 4.605130392698833e-06, "loss": 0.1425, "step": 3813 }, { "epoch": 0.6179520414776409, "grad_norm": 0.9497610330581665, "learning_rate": 4.604894480363205e-06, "loss": 0.1536, "step": 3814 }, { "epoch": 0.6181140635126378, "grad_norm": 0.9440318942070007, "learning_rate": 4.6046585036229005e-06, "loss": 0.1481, "step": 3815 }, { "epoch": 0.6182760855476345, "grad_norm": 0.8853862881660461, "learning_rate": 4.604422462485138e-06, "loss": 0.141, "step": 3816 }, { "epoch": 0.6184381075826313, "grad_norm": 0.9499521851539612, "learning_rate": 4.604186356957141e-06, "loss": 0.147, "step": 3817 }, { "epoch": 0.618600129617628, "grad_norm": 0.8915985822677612, "learning_rate": 4.603950187046134e-06, "loss": 0.1515, "step": 3818 }, { "epoch": 0.6187621516526247, "grad_norm": 0.9126549959182739, "learning_rate": 4.6037139527593424e-06, "loss": 0.1505, "step": 3819 }, { "epoch": 0.6189241736876215, "grad_norm": 1.0714317560195923, "learning_rate": 4.603477654103994e-06, "loss": 0.171, "step": 3820 }, { "epoch": 0.6190861957226182, "grad_norm": 1.0061242580413818, "learning_rate": 4.60324129108732e-06, "loss": 0.1606, "step": 3821 }, { "epoch": 0.6192482177576151, "grad_norm": 1.0254570245742798, "learning_rate": 4.603004863716553e-06, "loss": 0.177, "step": 3822 }, { "epoch": 0.6194102397926118, "grad_norm": 0.7951987981796265, "learning_rate": 4.602768371998925e-06, "loss": 0.1198, "step": 3823 }, { "epoch": 0.6195722618276086, "grad_norm": 0.9415546655654907, "learning_rate": 4.602531815941676e-06, "loss": 0.1581, "step": 3824 }, { "epoch": 0.6197342838626053, "grad_norm": 0.9311978220939636, "learning_rate": 4.602295195552039e-06, "loss": 0.1474, "step": 3825 }, { "epoch": 0.619896305897602, "grad_norm": 0.8883768320083618, "learning_rate": 4.602058510837257e-06, "loss": 0.1383, "step": 3826 }, { "epoch": 0.6200583279325989, "grad_norm": 1.01768958568573, "learning_rate": 4.601821761804572e-06, "loss": 0.1721, "step": 3827 }, { "epoch": 0.6202203499675956, "grad_norm": 0.9163593053817749, "learning_rate": 4.6015849484612265e-06, "loss": 0.1258, "step": 3828 }, { "epoch": 0.6203823720025924, "grad_norm": 0.9427400231361389, "learning_rate": 4.601348070814468e-06, "loss": 0.1308, "step": 3829 }, { "epoch": 0.6205443940375891, "grad_norm": 1.179629921913147, "learning_rate": 4.601111128871544e-06, "loss": 0.1838, "step": 3830 }, { "epoch": 0.6207064160725859, "grad_norm": 1.0116398334503174, "learning_rate": 4.600874122639703e-06, "loss": 0.147, "step": 3831 }, { "epoch": 0.6208684381075826, "grad_norm": 0.8127241134643555, "learning_rate": 4.600637052126199e-06, "loss": 0.1333, "step": 3832 }, { "epoch": 0.6210304601425793, "grad_norm": 0.9073969125747681, "learning_rate": 4.600399917338284e-06, "loss": 0.1369, "step": 3833 }, { "epoch": 0.6211924821775762, "grad_norm": 0.8607815504074097, "learning_rate": 4.600162718283215e-06, "loss": 0.1357, "step": 3834 }, { "epoch": 0.6213545042125729, "grad_norm": 0.9723119735717773, "learning_rate": 4.5999254549682484e-06, "loss": 0.1456, "step": 3835 }, { "epoch": 0.6215165262475697, "grad_norm": 0.9572594165802002, "learning_rate": 4.599688127400645e-06, "loss": 0.1548, "step": 3836 }, { "epoch": 0.6216785482825664, "grad_norm": 0.9637323021888733, "learning_rate": 4.599450735587666e-06, "loss": 0.1533, "step": 3837 }, { "epoch": 0.6218405703175632, "grad_norm": 0.9875199794769287, "learning_rate": 4.599213279536575e-06, "loss": 0.159, "step": 3838 }, { "epoch": 0.62200259235256, "grad_norm": 0.9891331195831299, "learning_rate": 4.598975759254638e-06, "loss": 0.1594, "step": 3839 }, { "epoch": 0.6221646143875567, "grad_norm": 0.941134512424469, "learning_rate": 4.598738174749121e-06, "loss": 0.1466, "step": 3840 }, { "epoch": 0.6223266364225535, "grad_norm": 0.8841784596443176, "learning_rate": 4.598500526027296e-06, "loss": 0.1571, "step": 3841 }, { "epoch": 0.6224886584575502, "grad_norm": 0.9083060622215271, "learning_rate": 4.598262813096432e-06, "loss": 0.1543, "step": 3842 }, { "epoch": 0.622650680492547, "grad_norm": 0.8198730945587158, "learning_rate": 4.598025035963805e-06, "loss": 0.1286, "step": 3843 }, { "epoch": 0.6228127025275437, "grad_norm": 0.9744411706924438, "learning_rate": 4.597787194636688e-06, "loss": 0.1523, "step": 3844 }, { "epoch": 0.6229747245625405, "grad_norm": 0.8268557190895081, "learning_rate": 4.597549289122361e-06, "loss": 0.1294, "step": 3845 }, { "epoch": 0.6231367465975373, "grad_norm": 1.0291190147399902, "learning_rate": 4.597311319428099e-06, "loss": 0.1556, "step": 3846 }, { "epoch": 0.623298768632534, "grad_norm": 0.8949023485183716, "learning_rate": 4.597073285561188e-06, "loss": 0.1354, "step": 3847 }, { "epoch": 0.6234607906675308, "grad_norm": 0.9648371338844299, "learning_rate": 4.596835187528908e-06, "loss": 0.1517, "step": 3848 }, { "epoch": 0.6236228127025275, "grad_norm": 1.00344717502594, "learning_rate": 4.596597025338547e-06, "loss": 0.1659, "step": 3849 }, { "epoch": 0.6237848347375243, "grad_norm": 0.8660650849342346, "learning_rate": 4.59635879899739e-06, "loss": 0.1356, "step": 3850 }, { "epoch": 0.623946856772521, "grad_norm": 1.0011287927627563, "learning_rate": 4.596120508512727e-06, "loss": 0.1452, "step": 3851 }, { "epoch": 0.6241088788075179, "grad_norm": 0.8602931499481201, "learning_rate": 4.595882153891849e-06, "loss": 0.1315, "step": 3852 }, { "epoch": 0.6242709008425146, "grad_norm": 0.8699659705162048, "learning_rate": 4.595643735142049e-06, "loss": 0.1395, "step": 3853 }, { "epoch": 0.6244329228775113, "grad_norm": 0.782909631729126, "learning_rate": 4.595405252270622e-06, "loss": 0.1228, "step": 3854 }, { "epoch": 0.6245949449125081, "grad_norm": 0.9927259683609009, "learning_rate": 4.595166705284864e-06, "loss": 0.1533, "step": 3855 }, { "epoch": 0.6247569669475048, "grad_norm": 0.9355255365371704, "learning_rate": 4.594928094192076e-06, "loss": 0.1478, "step": 3856 }, { "epoch": 0.6249189889825016, "grad_norm": 0.9448959231376648, "learning_rate": 4.594689418999558e-06, "loss": 0.1456, "step": 3857 }, { "epoch": 0.6250810110174984, "grad_norm": 0.8999738097190857, "learning_rate": 4.594450679714613e-06, "loss": 0.144, "step": 3858 }, { "epoch": 0.6252430330524952, "grad_norm": 0.852476954460144, "learning_rate": 4.594211876344545e-06, "loss": 0.1304, "step": 3859 }, { "epoch": 0.6254050550874919, "grad_norm": 0.8585977554321289, "learning_rate": 4.593973008896662e-06, "loss": 0.14, "step": 3860 }, { "epoch": 0.6255670771224887, "grad_norm": 0.8411082625389099, "learning_rate": 4.593734077378273e-06, "loss": 0.1301, "step": 3861 }, { "epoch": 0.6257290991574854, "grad_norm": 0.9660947322845459, "learning_rate": 4.593495081796686e-06, "loss": 0.1478, "step": 3862 }, { "epoch": 0.6258911211924821, "grad_norm": 0.9528719186782837, "learning_rate": 4.593256022159217e-06, "loss": 0.141, "step": 3863 }, { "epoch": 0.626053143227479, "grad_norm": 0.8987707495689392, "learning_rate": 4.59301689847318e-06, "loss": 0.1377, "step": 3864 }, { "epoch": 0.6262151652624757, "grad_norm": 0.8041302561759949, "learning_rate": 4.592777710745889e-06, "loss": 0.1266, "step": 3865 }, { "epoch": 0.6263771872974725, "grad_norm": 0.9248653650283813, "learning_rate": 4.592538458984666e-06, "loss": 0.1373, "step": 3866 }, { "epoch": 0.6265392093324692, "grad_norm": 0.9307569265365601, "learning_rate": 4.592299143196829e-06, "loss": 0.1531, "step": 3867 }, { "epoch": 0.626701231367466, "grad_norm": 1.1055402755737305, "learning_rate": 4.5920597633897015e-06, "loss": 0.1396, "step": 3868 }, { "epoch": 0.6268632534024627, "grad_norm": 0.9206196069717407, "learning_rate": 4.591820319570609e-06, "loss": 0.1401, "step": 3869 }, { "epoch": 0.6270252754374595, "grad_norm": 0.8944006562232971, "learning_rate": 4.5915808117468766e-06, "loss": 0.1455, "step": 3870 }, { "epoch": 0.6271872974724563, "grad_norm": 0.87538743019104, "learning_rate": 4.591341239925831e-06, "loss": 0.1393, "step": 3871 }, { "epoch": 0.627349319507453, "grad_norm": 1.0033069849014282, "learning_rate": 4.591101604114807e-06, "loss": 0.1531, "step": 3872 }, { "epoch": 0.6275113415424498, "grad_norm": 1.0902032852172852, "learning_rate": 4.590861904321133e-06, "loss": 0.1743, "step": 3873 }, { "epoch": 0.6276733635774465, "grad_norm": 1.0226129293441772, "learning_rate": 4.590622140552144e-06, "loss": 0.1443, "step": 3874 }, { "epoch": 0.6278353856124433, "grad_norm": 0.9415045380592346, "learning_rate": 4.590382312815178e-06, "loss": 0.1412, "step": 3875 }, { "epoch": 0.62799740764744, "grad_norm": 1.0171313285827637, "learning_rate": 4.5901424211175715e-06, "loss": 0.1658, "step": 3876 }, { "epoch": 0.6281594296824368, "grad_norm": 0.9125556945800781, "learning_rate": 4.589902465466665e-06, "loss": 0.1417, "step": 3877 }, { "epoch": 0.6283214517174336, "grad_norm": 0.9008194804191589, "learning_rate": 4.5896624458698e-06, "loss": 0.1405, "step": 3878 }, { "epoch": 0.6284834737524303, "grad_norm": 0.9593300819396973, "learning_rate": 4.589422362334321e-06, "loss": 0.1403, "step": 3879 }, { "epoch": 0.6286454957874271, "grad_norm": 0.9999030232429504, "learning_rate": 4.5891822148675745e-06, "loss": 0.1457, "step": 3880 }, { "epoch": 0.6288075178224238, "grad_norm": 0.9375879764556885, "learning_rate": 4.588942003476907e-06, "loss": 0.1425, "step": 3881 }, { "epoch": 0.6289695398574207, "grad_norm": 0.8549643754959106, "learning_rate": 4.588701728169671e-06, "loss": 0.1354, "step": 3882 }, { "epoch": 0.6291315618924174, "grad_norm": 0.9586682915687561, "learning_rate": 4.588461388953216e-06, "loss": 0.1707, "step": 3883 }, { "epoch": 0.6292935839274141, "grad_norm": 0.8284324407577515, "learning_rate": 4.5882209858348956e-06, "loss": 0.1326, "step": 3884 }, { "epoch": 0.6294556059624109, "grad_norm": 0.9033730030059814, "learning_rate": 4.587980518822067e-06, "loss": 0.1452, "step": 3885 }, { "epoch": 0.6296176279974076, "grad_norm": 0.9468452334403992, "learning_rate": 4.587739987922087e-06, "loss": 0.1466, "step": 3886 }, { "epoch": 0.6297796500324044, "grad_norm": 0.97198885679245, "learning_rate": 4.587499393142316e-06, "loss": 0.1459, "step": 3887 }, { "epoch": 0.6299416720674011, "grad_norm": 1.0194365978240967, "learning_rate": 4.587258734490115e-06, "loss": 0.1597, "step": 3888 }, { "epoch": 0.630103694102398, "grad_norm": 0.9163743257522583, "learning_rate": 4.587018011972848e-06, "loss": 0.1414, "step": 3889 }, { "epoch": 0.6302657161373947, "grad_norm": 0.7911040186882019, "learning_rate": 4.586777225597881e-06, "loss": 0.1259, "step": 3890 }, { "epoch": 0.6304277381723914, "grad_norm": 0.9265354871749878, "learning_rate": 4.58653637537258e-06, "loss": 0.1403, "step": 3891 }, { "epoch": 0.6305897602073882, "grad_norm": 0.9925926923751831, "learning_rate": 4.586295461304315e-06, "loss": 0.1521, "step": 3892 }, { "epoch": 0.6307517822423849, "grad_norm": 1.0709103345870972, "learning_rate": 4.586054483400459e-06, "loss": 0.1772, "step": 3893 }, { "epoch": 0.6309138042773818, "grad_norm": 0.9420156478881836, "learning_rate": 4.585813441668383e-06, "loss": 0.1495, "step": 3894 }, { "epoch": 0.6310758263123785, "grad_norm": 0.840775728225708, "learning_rate": 4.585572336115463e-06, "loss": 0.1301, "step": 3895 }, { "epoch": 0.6312378483473753, "grad_norm": 0.8908707499504089, "learning_rate": 4.585331166749077e-06, "loss": 0.1455, "step": 3896 }, { "epoch": 0.631399870382372, "grad_norm": 0.9626536965370178, "learning_rate": 4.5850899335766034e-06, "loss": 0.1543, "step": 3897 }, { "epoch": 0.6315618924173687, "grad_norm": 0.8213000297546387, "learning_rate": 4.584848636605423e-06, "loss": 0.133, "step": 3898 }, { "epoch": 0.6317239144523655, "grad_norm": 0.9156107306480408, "learning_rate": 4.584607275842921e-06, "loss": 0.1383, "step": 3899 }, { "epoch": 0.6318859364873622, "grad_norm": 0.9265947341918945, "learning_rate": 4.58436585129648e-06, "loss": 0.1336, "step": 3900 }, { "epoch": 0.6320479585223591, "grad_norm": 0.9135920405387878, "learning_rate": 4.584124362973488e-06, "loss": 0.1347, "step": 3901 }, { "epoch": 0.6322099805573558, "grad_norm": 1.0750758647918701, "learning_rate": 4.583882810881334e-06, "loss": 0.1476, "step": 3902 }, { "epoch": 0.6323720025923526, "grad_norm": 1.0379818677902222, "learning_rate": 4.583641195027409e-06, "loss": 0.1563, "step": 3903 }, { "epoch": 0.6325340246273493, "grad_norm": 0.9261550903320312, "learning_rate": 4.583399515419106e-06, "loss": 0.139, "step": 3904 }, { "epoch": 0.6326960466623461, "grad_norm": 0.9920796155929565, "learning_rate": 4.58315777206382e-06, "loss": 0.1358, "step": 3905 }, { "epoch": 0.6328580686973428, "grad_norm": 0.9471383690834045, "learning_rate": 4.582915964968946e-06, "loss": 0.1418, "step": 3906 }, { "epoch": 0.6330200907323396, "grad_norm": 1.0869243144989014, "learning_rate": 4.582674094141885e-06, "loss": 0.166, "step": 3907 }, { "epoch": 0.6331821127673364, "grad_norm": 0.9842541813850403, "learning_rate": 4.582432159590037e-06, "loss": 0.1565, "step": 3908 }, { "epoch": 0.6333441348023331, "grad_norm": 1.0878725051879883, "learning_rate": 4.582190161320803e-06, "loss": 0.173, "step": 3909 }, { "epoch": 0.6335061568373299, "grad_norm": 0.9759945869445801, "learning_rate": 4.58194809934159e-06, "loss": 0.1576, "step": 3910 }, { "epoch": 0.6336681788723266, "grad_norm": 0.9469559192657471, "learning_rate": 4.581705973659803e-06, "loss": 0.1445, "step": 3911 }, { "epoch": 0.6338302009073234, "grad_norm": 0.9183310866355896, "learning_rate": 4.5814637842828506e-06, "loss": 0.1411, "step": 3912 }, { "epoch": 0.6339922229423202, "grad_norm": 1.0365331172943115, "learning_rate": 4.581221531218144e-06, "loss": 0.154, "step": 3913 }, { "epoch": 0.6341542449773169, "grad_norm": 0.9276525974273682, "learning_rate": 4.580979214473095e-06, "loss": 0.1452, "step": 3914 }, { "epoch": 0.6343162670123137, "grad_norm": 0.9412997961044312, "learning_rate": 4.580736834055117e-06, "loss": 0.1418, "step": 3915 }, { "epoch": 0.6344782890473104, "grad_norm": 0.906779408454895, "learning_rate": 4.580494389971628e-06, "loss": 0.152, "step": 3916 }, { "epoch": 0.6346403110823072, "grad_norm": 0.8911001682281494, "learning_rate": 4.580251882230045e-06, "loss": 0.1523, "step": 3917 }, { "epoch": 0.6348023331173039, "grad_norm": 0.9510197639465332, "learning_rate": 4.580009310837789e-06, "loss": 0.1451, "step": 3918 }, { "epoch": 0.6349643551523008, "grad_norm": 0.9477640986442566, "learning_rate": 4.579766675802281e-06, "loss": 0.1485, "step": 3919 }, { "epoch": 0.6351263771872975, "grad_norm": 0.9456864595413208, "learning_rate": 4.579523977130946e-06, "loss": 0.1489, "step": 3920 }, { "epoch": 0.6352883992222942, "grad_norm": 0.8579930663108826, "learning_rate": 4.57928121483121e-06, "loss": 0.1462, "step": 3921 }, { "epoch": 0.635450421257291, "grad_norm": 0.8796806931495667, "learning_rate": 4.579038388910499e-06, "loss": 0.1349, "step": 3922 }, { "epoch": 0.6356124432922877, "grad_norm": 0.9751520156860352, "learning_rate": 4.578795499376246e-06, "loss": 0.1583, "step": 3923 }, { "epoch": 0.6357744653272845, "grad_norm": 0.9027172327041626, "learning_rate": 4.578552546235882e-06, "loss": 0.1483, "step": 3924 }, { "epoch": 0.6359364873622813, "grad_norm": 0.9351487159729004, "learning_rate": 4.578309529496839e-06, "loss": 0.1398, "step": 3925 }, { "epoch": 0.6360985093972781, "grad_norm": 0.99167400598526, "learning_rate": 4.578066449166554e-06, "loss": 0.172, "step": 3926 }, { "epoch": 0.6362605314322748, "grad_norm": 1.0527704954147339, "learning_rate": 4.577823305252464e-06, "loss": 0.1422, "step": 3927 }, { "epoch": 0.6364225534672715, "grad_norm": 0.8460220694541931, "learning_rate": 4.57758009776201e-06, "loss": 0.1374, "step": 3928 }, { "epoch": 0.6365845755022683, "grad_norm": 0.8465033769607544, "learning_rate": 4.577336826702631e-06, "loss": 0.1408, "step": 3929 }, { "epoch": 0.636746597537265, "grad_norm": 0.9382408261299133, "learning_rate": 4.577093492081774e-06, "loss": 0.1412, "step": 3930 }, { "epoch": 0.6369086195722619, "grad_norm": 0.9168792366981506, "learning_rate": 4.576850093906881e-06, "loss": 0.143, "step": 3931 }, { "epoch": 0.6370706416072586, "grad_norm": 0.8806384205818176, "learning_rate": 4.576606632185403e-06, "loss": 0.1321, "step": 3932 }, { "epoch": 0.6372326636422554, "grad_norm": 0.8602378964424133, "learning_rate": 4.576363106924785e-06, "loss": 0.122, "step": 3933 }, { "epoch": 0.6373946856772521, "grad_norm": 0.9462723135948181, "learning_rate": 4.576119518132483e-06, "loss": 0.1421, "step": 3934 }, { "epoch": 0.6375567077122488, "grad_norm": 0.9258326292037964, "learning_rate": 4.5758758658159465e-06, "loss": 0.1483, "step": 3935 }, { "epoch": 0.6377187297472456, "grad_norm": 0.9369986653327942, "learning_rate": 4.575632149982631e-06, "loss": 0.1457, "step": 3936 }, { "epoch": 0.6378807517822424, "grad_norm": 0.8889022469520569, "learning_rate": 4.575388370639997e-06, "loss": 0.1267, "step": 3937 }, { "epoch": 0.6380427738172392, "grad_norm": 0.9667481780052185, "learning_rate": 4.5751445277955e-06, "loss": 0.1417, "step": 3938 }, { "epoch": 0.6382047958522359, "grad_norm": 0.9078060388565063, "learning_rate": 4.574900621456602e-06, "loss": 0.1371, "step": 3939 }, { "epoch": 0.6383668178872327, "grad_norm": 0.9612594246864319, "learning_rate": 4.574656651630767e-06, "loss": 0.1446, "step": 3940 }, { "epoch": 0.6385288399222294, "grad_norm": 0.9944157004356384, "learning_rate": 4.574412618325458e-06, "loss": 0.146, "step": 3941 }, { "epoch": 0.6386908619572261, "grad_norm": 0.9619837999343872, "learning_rate": 4.574168521548144e-06, "loss": 0.1485, "step": 3942 }, { "epoch": 0.638852883992223, "grad_norm": 1.0251742601394653, "learning_rate": 4.5739243613062915e-06, "loss": 0.1626, "step": 3943 }, { "epoch": 0.6390149060272197, "grad_norm": 1.0261211395263672, "learning_rate": 4.573680137607373e-06, "loss": 0.1482, "step": 3944 }, { "epoch": 0.6391769280622165, "grad_norm": 0.9001795053482056, "learning_rate": 4.57343585045886e-06, "loss": 0.1326, "step": 3945 }, { "epoch": 0.6393389500972132, "grad_norm": 1.0013598203659058, "learning_rate": 4.573191499868228e-06, "loss": 0.1509, "step": 3946 }, { "epoch": 0.63950097213221, "grad_norm": 1.0281734466552734, "learning_rate": 4.572947085842952e-06, "loss": 0.1414, "step": 3947 }, { "epoch": 0.6396629941672067, "grad_norm": 0.9392046928405762, "learning_rate": 4.572702608390513e-06, "loss": 0.1421, "step": 3948 }, { "epoch": 0.6398250162022034, "grad_norm": 0.9891103506088257, "learning_rate": 4.57245806751839e-06, "loss": 0.1592, "step": 3949 }, { "epoch": 0.6399870382372003, "grad_norm": 0.8271520137786865, "learning_rate": 4.572213463234065e-06, "loss": 0.1301, "step": 3950 }, { "epoch": 0.640149060272197, "grad_norm": 0.8745349645614624, "learning_rate": 4.571968795545023e-06, "loss": 0.1386, "step": 3951 }, { "epoch": 0.6403110823071938, "grad_norm": 0.9697176218032837, "learning_rate": 4.5717240644587495e-06, "loss": 0.1628, "step": 3952 }, { "epoch": 0.6404731043421905, "grad_norm": 1.1860628128051758, "learning_rate": 4.571479269982734e-06, "loss": 0.1444, "step": 3953 }, { "epoch": 0.6406351263771873, "grad_norm": 0.9648879766464233, "learning_rate": 4.571234412124464e-06, "loss": 0.1604, "step": 3954 }, { "epoch": 0.640797148412184, "grad_norm": 0.9528911709785461, "learning_rate": 4.570989490891434e-06, "loss": 0.1569, "step": 3955 }, { "epoch": 0.6409591704471809, "grad_norm": 0.8974754214286804, "learning_rate": 4.570744506291138e-06, "loss": 0.1485, "step": 3956 }, { "epoch": 0.6411211924821776, "grad_norm": 0.8654069304466248, "learning_rate": 4.570499458331071e-06, "loss": 0.14, "step": 3957 }, { "epoch": 0.6412832145171743, "grad_norm": 0.906300961971283, "learning_rate": 4.570254347018731e-06, "loss": 0.1508, "step": 3958 }, { "epoch": 0.6414452365521711, "grad_norm": 1.011763572692871, "learning_rate": 4.570009172361617e-06, "loss": 0.1649, "step": 3959 }, { "epoch": 0.6416072585871678, "grad_norm": 0.956451952457428, "learning_rate": 4.5697639343672325e-06, "loss": 0.1585, "step": 3960 }, { "epoch": 0.6417692806221647, "grad_norm": 0.8895651698112488, "learning_rate": 4.569518633043081e-06, "loss": 0.1425, "step": 3961 }, { "epoch": 0.6419313026571614, "grad_norm": 0.922642171382904, "learning_rate": 4.569273268396667e-06, "loss": 0.1554, "step": 3962 }, { "epoch": 0.6420933246921582, "grad_norm": 0.8361677527427673, "learning_rate": 4.569027840435498e-06, "loss": 0.1271, "step": 3963 }, { "epoch": 0.6422553467271549, "grad_norm": 0.762519121170044, "learning_rate": 4.568782349167084e-06, "loss": 0.1148, "step": 3964 }, { "epoch": 0.6424173687621516, "grad_norm": 1.0175034999847412, "learning_rate": 4.568536794598937e-06, "loss": 0.1408, "step": 3965 }, { "epoch": 0.6425793907971484, "grad_norm": 1.0413769483566284, "learning_rate": 4.56829117673857e-06, "loss": 0.17, "step": 3966 }, { "epoch": 0.6427414128321451, "grad_norm": 0.8488859534263611, "learning_rate": 4.568045495593497e-06, "loss": 0.1435, "step": 3967 }, { "epoch": 0.642903434867142, "grad_norm": 0.8258789777755737, "learning_rate": 4.567799751171237e-06, "loss": 0.1261, "step": 3968 }, { "epoch": 0.6430654569021387, "grad_norm": 0.8980733752250671, "learning_rate": 4.567553943479309e-06, "loss": 0.1372, "step": 3969 }, { "epoch": 0.6432274789371355, "grad_norm": 0.8406203389167786, "learning_rate": 4.567308072525233e-06, "loss": 0.122, "step": 3970 }, { "epoch": 0.6433895009721322, "grad_norm": 0.9138239622116089, "learning_rate": 4.567062138316534e-06, "loss": 0.1483, "step": 3971 }, { "epoch": 0.6435515230071289, "grad_norm": 0.9522116780281067, "learning_rate": 4.566816140860735e-06, "loss": 0.1437, "step": 3972 }, { "epoch": 0.6437135450421257, "grad_norm": 0.8937114477157593, "learning_rate": 4.566570080165363e-06, "loss": 0.1242, "step": 3973 }, { "epoch": 0.6438755670771225, "grad_norm": 0.9830919504165649, "learning_rate": 4.566323956237948e-06, "loss": 0.1407, "step": 3974 }, { "epoch": 0.6440375891121193, "grad_norm": 1.0365149974822998, "learning_rate": 4.566077769086022e-06, "loss": 0.1556, "step": 3975 }, { "epoch": 0.644199611147116, "grad_norm": 0.9378160238265991, "learning_rate": 4.565831518717114e-06, "loss": 0.1469, "step": 3976 }, { "epoch": 0.6443616331821128, "grad_norm": 0.9110143184661865, "learning_rate": 4.565585205138761e-06, "loss": 0.1368, "step": 3977 }, { "epoch": 0.6445236552171095, "grad_norm": 0.936230480670929, "learning_rate": 4.5653388283585e-06, "loss": 0.1328, "step": 3978 }, { "epoch": 0.6446856772521062, "grad_norm": 1.0089045763015747, "learning_rate": 4.565092388383869e-06, "loss": 0.1427, "step": 3979 }, { "epoch": 0.6448476992871031, "grad_norm": 0.9380981922149658, "learning_rate": 4.564845885222407e-06, "loss": 0.1466, "step": 3980 }, { "epoch": 0.6450097213220998, "grad_norm": 0.9660612344741821, "learning_rate": 4.564599318881659e-06, "loss": 0.1387, "step": 3981 }, { "epoch": 0.6451717433570966, "grad_norm": 0.8815035223960876, "learning_rate": 4.564352689369168e-06, "loss": 0.1407, "step": 3982 }, { "epoch": 0.6453337653920933, "grad_norm": 0.8399519324302673, "learning_rate": 4.56410599669248e-06, "loss": 0.1371, "step": 3983 }, { "epoch": 0.6454957874270901, "grad_norm": 0.965580403804779, "learning_rate": 4.563859240859144e-06, "loss": 0.1474, "step": 3984 }, { "epoch": 0.6456578094620868, "grad_norm": 0.9897104501724243, "learning_rate": 4.5636124218767095e-06, "loss": 0.1633, "step": 3985 }, { "epoch": 0.6458198314970836, "grad_norm": 1.1038873195648193, "learning_rate": 4.563365539752728e-06, "loss": 0.1622, "step": 3986 }, { "epoch": 0.6459818535320804, "grad_norm": 0.9341737031936646, "learning_rate": 4.563118594494755e-06, "loss": 0.1524, "step": 3987 }, { "epoch": 0.6461438755670771, "grad_norm": 1.0048877000808716, "learning_rate": 4.5628715861103455e-06, "loss": 0.1549, "step": 3988 }, { "epoch": 0.6463058976020739, "grad_norm": 0.8541507124900818, "learning_rate": 4.562624514607058e-06, "loss": 0.1366, "step": 3989 }, { "epoch": 0.6464679196370706, "grad_norm": 0.8617687821388245, "learning_rate": 4.562377379992451e-06, "loss": 0.1352, "step": 3990 }, { "epoch": 0.6466299416720674, "grad_norm": 0.8900352120399475, "learning_rate": 4.5621301822740875e-06, "loss": 0.1451, "step": 3991 }, { "epoch": 0.6467919637070642, "grad_norm": 0.917232871055603, "learning_rate": 4.56188292145953e-06, "loss": 0.1397, "step": 3992 }, { "epoch": 0.6469539857420609, "grad_norm": 0.8828876614570618, "learning_rate": 4.5616355975563456e-06, "loss": 0.1346, "step": 3993 }, { "epoch": 0.6471160077770577, "grad_norm": 1.0017690658569336, "learning_rate": 4.561388210572101e-06, "loss": 0.1667, "step": 3994 }, { "epoch": 0.6472780298120544, "grad_norm": 0.9873506426811218, "learning_rate": 4.561140760514365e-06, "loss": 0.1538, "step": 3995 }, { "epoch": 0.6474400518470512, "grad_norm": 0.9482616782188416, "learning_rate": 4.56089324739071e-06, "loss": 0.1576, "step": 3996 }, { "epoch": 0.6476020738820479, "grad_norm": 0.8878632187843323, "learning_rate": 4.560645671208709e-06, "loss": 0.1395, "step": 3997 }, { "epoch": 0.6477640959170448, "grad_norm": 1.057747483253479, "learning_rate": 4.560398031975937e-06, "loss": 0.1718, "step": 3998 }, { "epoch": 0.6479261179520415, "grad_norm": 0.8953585028648376, "learning_rate": 4.560150329699971e-06, "loss": 0.1451, "step": 3999 }, { "epoch": 0.6480881399870383, "grad_norm": 0.900346577167511, "learning_rate": 4.55990256438839e-06, "loss": 0.1447, "step": 4000 }, { "epoch": 0.648250162022035, "grad_norm": 0.8216709494590759, "learning_rate": 4.559654736048776e-06, "loss": 0.1327, "step": 4001 }, { "epoch": 0.6484121840570317, "grad_norm": 0.850792646408081, "learning_rate": 4.559406844688711e-06, "loss": 0.1341, "step": 4002 }, { "epoch": 0.6485742060920285, "grad_norm": 0.9442715644836426, "learning_rate": 4.5591588903157816e-06, "loss": 0.1442, "step": 4003 }, { "epoch": 0.6487362281270252, "grad_norm": 0.8908452391624451, "learning_rate": 4.558910872937572e-06, "loss": 0.1427, "step": 4004 }, { "epoch": 0.6488982501620221, "grad_norm": 0.9406809210777283, "learning_rate": 4.558662792561672e-06, "loss": 0.1616, "step": 4005 }, { "epoch": 0.6490602721970188, "grad_norm": 0.8570629954338074, "learning_rate": 4.558414649195673e-06, "loss": 0.1443, "step": 4006 }, { "epoch": 0.6492222942320156, "grad_norm": 0.8232815861701965, "learning_rate": 4.558166442847166e-06, "loss": 0.1351, "step": 4007 }, { "epoch": 0.6493843162670123, "grad_norm": 0.9287505745887756, "learning_rate": 4.557918173523747e-06, "loss": 0.1492, "step": 4008 }, { "epoch": 0.649546338302009, "grad_norm": 0.9767974615097046, "learning_rate": 4.557669841233013e-06, "loss": 0.1583, "step": 4009 }, { "epoch": 0.6497083603370059, "grad_norm": 0.8061318397521973, "learning_rate": 4.55742144598256e-06, "loss": 0.1257, "step": 4010 }, { "epoch": 0.6498703823720026, "grad_norm": 0.8351037502288818, "learning_rate": 4.557172987779991e-06, "loss": 0.143, "step": 4011 }, { "epoch": 0.6500324044069994, "grad_norm": 0.8559525609016418, "learning_rate": 4.5569244666329055e-06, "loss": 0.1358, "step": 4012 }, { "epoch": 0.6501944264419961, "grad_norm": 0.9949284195899963, "learning_rate": 4.556675882548909e-06, "loss": 0.1463, "step": 4013 }, { "epoch": 0.6503564484769929, "grad_norm": 0.8767020106315613, "learning_rate": 4.5564272355356085e-06, "loss": 0.1366, "step": 4014 }, { "epoch": 0.6505184705119896, "grad_norm": 0.9822929501533508, "learning_rate": 4.556178525600611e-06, "loss": 0.1454, "step": 4015 }, { "epoch": 0.6506804925469863, "grad_norm": 0.8953209519386292, "learning_rate": 4.555929752751526e-06, "loss": 0.1393, "step": 4016 }, { "epoch": 0.6508425145819832, "grad_norm": 0.9238772988319397, "learning_rate": 4.555680916995965e-06, "loss": 0.15, "step": 4017 }, { "epoch": 0.6510045366169799, "grad_norm": 0.8017407655715942, "learning_rate": 4.5554320183415435e-06, "loss": 0.1256, "step": 4018 }, { "epoch": 0.6511665586519767, "grad_norm": 0.9731351137161255, "learning_rate": 4.555183056795877e-06, "loss": 0.1371, "step": 4019 }, { "epoch": 0.6513285806869734, "grad_norm": 0.8913125991821289, "learning_rate": 4.5549340323665815e-06, "loss": 0.1392, "step": 4020 }, { "epoch": 0.6514906027219702, "grad_norm": 0.9241263270378113, "learning_rate": 4.5546849450612774e-06, "loss": 0.1483, "step": 4021 }, { "epoch": 0.651652624756967, "grad_norm": 0.925363302230835, "learning_rate": 4.554435794887586e-06, "loss": 0.1376, "step": 4022 }, { "epoch": 0.6518146467919637, "grad_norm": 1.0443159341812134, "learning_rate": 4.5541865818531315e-06, "loss": 0.1505, "step": 4023 }, { "epoch": 0.6519766688269605, "grad_norm": 0.9155001044273376, "learning_rate": 4.553937305965539e-06, "loss": 0.1499, "step": 4024 }, { "epoch": 0.6521386908619572, "grad_norm": 0.920136034488678, "learning_rate": 4.5536879672324345e-06, "loss": 0.1312, "step": 4025 }, { "epoch": 0.652300712896954, "grad_norm": 0.8369788527488708, "learning_rate": 4.553438565661448e-06, "loss": 0.1225, "step": 4026 }, { "epoch": 0.6524627349319507, "grad_norm": 0.9391274452209473, "learning_rate": 4.553189101260211e-06, "loss": 0.1424, "step": 4027 }, { "epoch": 0.6526247569669476, "grad_norm": 0.9724919199943542, "learning_rate": 4.552939574036356e-06, "loss": 0.1591, "step": 4028 }, { "epoch": 0.6527867790019443, "grad_norm": 0.8575350642204285, "learning_rate": 4.552689983997519e-06, "loss": 0.1452, "step": 4029 }, { "epoch": 0.652948801036941, "grad_norm": 0.9005478620529175, "learning_rate": 4.552440331151334e-06, "loss": 0.152, "step": 4030 }, { "epoch": 0.6531108230719378, "grad_norm": 0.9679160118103027, "learning_rate": 4.552190615505444e-06, "loss": 0.1539, "step": 4031 }, { "epoch": 0.6532728451069345, "grad_norm": 0.8400557041168213, "learning_rate": 4.551940837067486e-06, "loss": 0.1406, "step": 4032 }, { "epoch": 0.6534348671419313, "grad_norm": 0.9820280075073242, "learning_rate": 4.551690995845104e-06, "loss": 0.1618, "step": 4033 }, { "epoch": 0.653596889176928, "grad_norm": 0.862453818321228, "learning_rate": 4.551441091845942e-06, "loss": 0.1438, "step": 4034 }, { "epoch": 0.6537589112119249, "grad_norm": 0.8491960167884827, "learning_rate": 4.551191125077647e-06, "loss": 0.1303, "step": 4035 }, { "epoch": 0.6539209332469216, "grad_norm": 0.8754890561103821, "learning_rate": 4.550941095547869e-06, "loss": 0.1446, "step": 4036 }, { "epoch": 0.6540829552819183, "grad_norm": 0.9268251657485962, "learning_rate": 4.550691003264256e-06, "loss": 0.1556, "step": 4037 }, { "epoch": 0.6542449773169151, "grad_norm": 0.8505957126617432, "learning_rate": 4.55044084823446e-06, "loss": 0.1343, "step": 4038 }, { "epoch": 0.6544069993519118, "grad_norm": 0.8797240853309631, "learning_rate": 4.550190630466137e-06, "loss": 0.1445, "step": 4039 }, { "epoch": 0.6545690213869086, "grad_norm": 0.8217586278915405, "learning_rate": 4.5499403499669415e-06, "loss": 0.1309, "step": 4040 }, { "epoch": 0.6547310434219054, "grad_norm": 1.0774375200271606, "learning_rate": 4.549690006744531e-06, "loss": 0.1524, "step": 4041 }, { "epoch": 0.6548930654569022, "grad_norm": 0.9880624413490295, "learning_rate": 4.549439600806568e-06, "loss": 0.1523, "step": 4042 }, { "epoch": 0.6550550874918989, "grad_norm": 0.9190874695777893, "learning_rate": 4.549189132160713e-06, "loss": 0.133, "step": 4043 }, { "epoch": 0.6552171095268956, "grad_norm": 0.9123296141624451, "learning_rate": 4.548938600814629e-06, "loss": 0.1377, "step": 4044 }, { "epoch": 0.6553791315618924, "grad_norm": 1.0104409456253052, "learning_rate": 4.548688006775981e-06, "loss": 0.1488, "step": 4045 }, { "epoch": 0.6555411535968891, "grad_norm": 0.955862283706665, "learning_rate": 4.5484373500524395e-06, "loss": 0.1588, "step": 4046 }, { "epoch": 0.655703175631886, "grad_norm": 0.7813193202018738, "learning_rate": 4.548186630651671e-06, "loss": 0.1188, "step": 4047 }, { "epoch": 0.6558651976668827, "grad_norm": 1.0729082822799683, "learning_rate": 4.547935848581349e-06, "loss": 0.178, "step": 4048 }, { "epoch": 0.6560272197018795, "grad_norm": 0.8658989071846008, "learning_rate": 4.547685003849145e-06, "loss": 0.1391, "step": 4049 }, { "epoch": 0.6561892417368762, "grad_norm": 1.0283029079437256, "learning_rate": 4.5474340964627365e-06, "loss": 0.1565, "step": 4050 }, { "epoch": 0.656351263771873, "grad_norm": 0.9804076552391052, "learning_rate": 4.547183126429798e-06, "loss": 0.1667, "step": 4051 }, { "epoch": 0.6565132858068697, "grad_norm": 0.7968271374702454, "learning_rate": 4.5469320937580105e-06, "loss": 0.1288, "step": 4052 }, { "epoch": 0.6566753078418665, "grad_norm": 0.9019933938980103, "learning_rate": 4.546680998455054e-06, "loss": 0.1439, "step": 4053 }, { "epoch": 0.6568373298768633, "grad_norm": 0.8464137315750122, "learning_rate": 4.546429840528612e-06, "loss": 0.1405, "step": 4054 }, { "epoch": 0.65699935191186, "grad_norm": 0.8122446537017822, "learning_rate": 4.54617861998637e-06, "loss": 0.1338, "step": 4055 }, { "epoch": 0.6571613739468568, "grad_norm": 0.9290385246276855, "learning_rate": 4.545927336836013e-06, "loss": 0.1323, "step": 4056 }, { "epoch": 0.6573233959818535, "grad_norm": 0.9269915223121643, "learning_rate": 4.545675991085231e-06, "loss": 0.1515, "step": 4057 }, { "epoch": 0.6574854180168503, "grad_norm": 1.0693535804748535, "learning_rate": 4.545424582741714e-06, "loss": 0.1695, "step": 4058 }, { "epoch": 0.657647440051847, "grad_norm": 0.9048995971679688, "learning_rate": 4.545173111813154e-06, "loss": 0.1444, "step": 4059 }, { "epoch": 0.6578094620868438, "grad_norm": 0.8983332514762878, "learning_rate": 4.544921578307246e-06, "loss": 0.1323, "step": 4060 }, { "epoch": 0.6579714841218406, "grad_norm": 1.1153713464736938, "learning_rate": 4.544669982231688e-06, "loss": 0.1742, "step": 4061 }, { "epoch": 0.6581335061568373, "grad_norm": 0.9735933542251587, "learning_rate": 4.544418323594175e-06, "loss": 0.1485, "step": 4062 }, { "epoch": 0.6582955281918341, "grad_norm": 0.9374943375587463, "learning_rate": 4.544166602402409e-06, "loss": 0.144, "step": 4063 }, { "epoch": 0.6584575502268308, "grad_norm": 0.9201976656913757, "learning_rate": 4.543914818664092e-06, "loss": 0.1497, "step": 4064 }, { "epoch": 0.6586195722618277, "grad_norm": 1.0155818462371826, "learning_rate": 4.543662972386927e-06, "loss": 0.1638, "step": 4065 }, { "epoch": 0.6587815942968244, "grad_norm": 0.913718044757843, "learning_rate": 4.543411063578621e-06, "loss": 0.1466, "step": 4066 }, { "epoch": 0.6589436163318211, "grad_norm": 0.813508927822113, "learning_rate": 4.5431590922468815e-06, "loss": 0.1324, "step": 4067 }, { "epoch": 0.6591056383668179, "grad_norm": 0.7492986917495728, "learning_rate": 4.5429070583994185e-06, "loss": 0.1263, "step": 4068 }, { "epoch": 0.6592676604018146, "grad_norm": 0.8122268915176392, "learning_rate": 4.542654962043943e-06, "loss": 0.1274, "step": 4069 }, { "epoch": 0.6594296824368114, "grad_norm": 0.8873049020767212, "learning_rate": 4.542402803188168e-06, "loss": 0.1413, "step": 4070 }, { "epoch": 0.6595917044718081, "grad_norm": 0.9349288940429688, "learning_rate": 4.542150581839811e-06, "loss": 0.1534, "step": 4071 }, { "epoch": 0.659753726506805, "grad_norm": 0.8754650354385376, "learning_rate": 4.5418982980065874e-06, "loss": 0.1503, "step": 4072 }, { "epoch": 0.6599157485418017, "grad_norm": 0.8489086627960205, "learning_rate": 4.541645951696217e-06, "loss": 0.1353, "step": 4073 }, { "epoch": 0.6600777705767984, "grad_norm": 0.8746541142463684, "learning_rate": 4.541393542916423e-06, "loss": 0.1541, "step": 4074 }, { "epoch": 0.6602397926117952, "grad_norm": 0.9938592910766602, "learning_rate": 4.541141071674924e-06, "loss": 0.1663, "step": 4075 }, { "epoch": 0.6604018146467919, "grad_norm": 0.9091808199882507, "learning_rate": 4.540888537979449e-06, "loss": 0.1333, "step": 4076 }, { "epoch": 0.6605638366817888, "grad_norm": 0.8329112529754639, "learning_rate": 4.540635941837723e-06, "loss": 0.1314, "step": 4077 }, { "epoch": 0.6607258587167855, "grad_norm": 0.8982328176498413, "learning_rate": 4.540383283257477e-06, "loss": 0.1536, "step": 4078 }, { "epoch": 0.6608878807517823, "grad_norm": 0.9931498169898987, "learning_rate": 4.540130562246439e-06, "loss": 0.1613, "step": 4079 }, { "epoch": 0.661049902786779, "grad_norm": 0.9610978364944458, "learning_rate": 4.539877778812342e-06, "loss": 0.1464, "step": 4080 }, { "epoch": 0.6612119248217757, "grad_norm": 0.8849239349365234, "learning_rate": 4.539624932962923e-06, "loss": 0.1327, "step": 4081 }, { "epoch": 0.6613739468567725, "grad_norm": 0.9079961776733398, "learning_rate": 4.539372024705916e-06, "loss": 0.1298, "step": 4082 }, { "epoch": 0.6615359688917692, "grad_norm": 0.8636612296104431, "learning_rate": 4.5391190540490595e-06, "loss": 0.1354, "step": 4083 }, { "epoch": 0.6616979909267661, "grad_norm": 1.0267540216445923, "learning_rate": 4.538866021000096e-06, "loss": 0.1476, "step": 4084 }, { "epoch": 0.6618600129617628, "grad_norm": 0.9484116435050964, "learning_rate": 4.538612925566765e-06, "loss": 0.1392, "step": 4085 }, { "epoch": 0.6620220349967596, "grad_norm": 0.9751802086830139, "learning_rate": 4.538359767756813e-06, "loss": 0.1462, "step": 4086 }, { "epoch": 0.6621840570317563, "grad_norm": 0.9586076140403748, "learning_rate": 4.538106547577984e-06, "loss": 0.1576, "step": 4087 }, { "epoch": 0.662346079066753, "grad_norm": 0.9770367741584778, "learning_rate": 4.537853265038027e-06, "loss": 0.1509, "step": 4088 }, { "epoch": 0.6625081011017498, "grad_norm": 0.9630382657051086, "learning_rate": 4.537599920144692e-06, "loss": 0.1464, "step": 4089 }, { "epoch": 0.6626701231367466, "grad_norm": 0.9468212127685547, "learning_rate": 4.537346512905729e-06, "loss": 0.1563, "step": 4090 }, { "epoch": 0.6628321451717434, "grad_norm": 1.0076836347579956, "learning_rate": 4.537093043328894e-06, "loss": 0.1756, "step": 4091 }, { "epoch": 0.6629941672067401, "grad_norm": 0.879906415939331, "learning_rate": 4.536839511421941e-06, "loss": 0.1328, "step": 4092 }, { "epoch": 0.6631561892417369, "grad_norm": 0.8465386629104614, "learning_rate": 4.536585917192629e-06, "loss": 0.1299, "step": 4093 }, { "epoch": 0.6633182112767336, "grad_norm": 0.960082471370697, "learning_rate": 4.536332260648716e-06, "loss": 0.1401, "step": 4094 }, { "epoch": 0.6634802333117304, "grad_norm": 0.8401903510093689, "learning_rate": 4.536078541797964e-06, "loss": 0.1229, "step": 4095 }, { "epoch": 0.6636422553467272, "grad_norm": 1.0268646478652954, "learning_rate": 4.535824760648135e-06, "loss": 0.1713, "step": 4096 }, { "epoch": 0.6638042773817239, "grad_norm": 0.8968318104743958, "learning_rate": 4.535570917206995e-06, "loss": 0.1454, "step": 4097 }, { "epoch": 0.6639662994167207, "grad_norm": 0.9677649736404419, "learning_rate": 4.535317011482311e-06, "loss": 0.1556, "step": 4098 }, { "epoch": 0.6641283214517174, "grad_norm": 0.9355975389480591, "learning_rate": 4.535063043481852e-06, "loss": 0.1549, "step": 4099 }, { "epoch": 0.6642903434867142, "grad_norm": 0.8558212518692017, "learning_rate": 4.534809013213389e-06, "loss": 0.1448, "step": 4100 }, { "epoch": 0.6644523655217109, "grad_norm": 0.8515587449073792, "learning_rate": 4.534554920684694e-06, "loss": 0.1419, "step": 4101 }, { "epoch": 0.6646143875567078, "grad_norm": 0.807841956615448, "learning_rate": 4.534300765903542e-06, "loss": 0.1292, "step": 4102 }, { "epoch": 0.6647764095917045, "grad_norm": 0.8981359601020813, "learning_rate": 4.534046548877709e-06, "loss": 0.1352, "step": 4103 }, { "epoch": 0.6649384316267012, "grad_norm": 0.893144965171814, "learning_rate": 4.533792269614974e-06, "loss": 0.1303, "step": 4104 }, { "epoch": 0.665100453661698, "grad_norm": 0.9923272728919983, "learning_rate": 4.533537928123118e-06, "loss": 0.1647, "step": 4105 }, { "epoch": 0.6652624756966947, "grad_norm": 0.8646336793899536, "learning_rate": 4.533283524409922e-06, "loss": 0.1513, "step": 4106 }, { "epoch": 0.6654244977316915, "grad_norm": 0.952130138874054, "learning_rate": 4.53302905848317e-06, "loss": 0.1633, "step": 4107 }, { "epoch": 0.6655865197666883, "grad_norm": 0.8783090710639954, "learning_rate": 4.53277453035065e-06, "loss": 0.1316, "step": 4108 }, { "epoch": 0.6657485418016851, "grad_norm": 0.8041051626205444, "learning_rate": 4.532519940020148e-06, "loss": 0.1197, "step": 4109 }, { "epoch": 0.6659105638366818, "grad_norm": 0.9836557507514954, "learning_rate": 4.532265287499454e-06, "loss": 0.1687, "step": 4110 }, { "epoch": 0.6660725858716785, "grad_norm": 0.8173419833183289, "learning_rate": 4.532010572796361e-06, "loss": 0.1313, "step": 4111 }, { "epoch": 0.6662346079066753, "grad_norm": 0.8361591696739197, "learning_rate": 4.531755795918661e-06, "loss": 0.128, "step": 4112 }, { "epoch": 0.666396629941672, "grad_norm": 0.9987451434135437, "learning_rate": 4.531500956874151e-06, "loss": 0.1556, "step": 4113 }, { "epoch": 0.6665586519766689, "grad_norm": 0.9050056338310242, "learning_rate": 4.531246055670627e-06, "loss": 0.1359, "step": 4114 }, { "epoch": 0.6667206740116656, "grad_norm": 0.8897657990455627, "learning_rate": 4.53099109231589e-06, "loss": 0.1415, "step": 4115 }, { "epoch": 0.6668826960466624, "grad_norm": 1.0086582899093628, "learning_rate": 4.53073606681774e-06, "loss": 0.1757, "step": 4116 }, { "epoch": 0.6670447180816591, "grad_norm": 0.9577541351318359, "learning_rate": 4.530480979183981e-06, "loss": 0.1415, "step": 4117 }, { "epoch": 0.6672067401166558, "grad_norm": 0.8575126528739929, "learning_rate": 4.530225829422418e-06, "loss": 0.1258, "step": 4118 }, { "epoch": 0.6673687621516526, "grad_norm": 0.9692230224609375, "learning_rate": 4.529970617540857e-06, "loss": 0.1456, "step": 4119 }, { "epoch": 0.6675307841866494, "grad_norm": 0.8811309337615967, "learning_rate": 4.529715343547107e-06, "loss": 0.1295, "step": 4120 }, { "epoch": 0.6676928062216462, "grad_norm": 0.9429895281791687, "learning_rate": 4.529460007448981e-06, "loss": 0.1667, "step": 4121 }, { "epoch": 0.6678548282566429, "grad_norm": 0.9843415021896362, "learning_rate": 4.5292046092542885e-06, "loss": 0.1663, "step": 4122 }, { "epoch": 0.6680168502916397, "grad_norm": 0.8283978700637817, "learning_rate": 4.528949148970846e-06, "loss": 0.1327, "step": 4123 }, { "epoch": 0.6681788723266364, "grad_norm": 0.9020947217941284, "learning_rate": 4.52869362660647e-06, "loss": 0.1461, "step": 4124 }, { "epoch": 0.6683408943616331, "grad_norm": 0.8452586531639099, "learning_rate": 4.528438042168978e-06, "loss": 0.145, "step": 4125 }, { "epoch": 0.66850291639663, "grad_norm": 0.914142370223999, "learning_rate": 4.5281823956661905e-06, "loss": 0.1416, "step": 4126 }, { "epoch": 0.6686649384316267, "grad_norm": 0.9229021072387695, "learning_rate": 4.52792668710593e-06, "loss": 0.1534, "step": 4127 }, { "epoch": 0.6688269604666235, "grad_norm": 0.8286415338516235, "learning_rate": 4.527670916496021e-06, "loss": 0.1303, "step": 4128 }, { "epoch": 0.6689889825016202, "grad_norm": 1.0366954803466797, "learning_rate": 4.5274150838442875e-06, "loss": 0.1616, "step": 4129 }, { "epoch": 0.669151004536617, "grad_norm": 0.886921226978302, "learning_rate": 4.52715918915856e-06, "loss": 0.1416, "step": 4130 }, { "epoch": 0.6693130265716137, "grad_norm": 0.9015594124794006, "learning_rate": 4.5269032324466656e-06, "loss": 0.1492, "step": 4131 }, { "epoch": 0.6694750486066104, "grad_norm": 0.8952576518058777, "learning_rate": 4.526647213716438e-06, "loss": 0.1392, "step": 4132 }, { "epoch": 0.6696370706416073, "grad_norm": 0.8199195861816406, "learning_rate": 4.526391132975711e-06, "loss": 0.1342, "step": 4133 }, { "epoch": 0.669799092676604, "grad_norm": 0.822807252407074, "learning_rate": 4.526134990232317e-06, "loss": 0.1377, "step": 4134 }, { "epoch": 0.6699611147116008, "grad_norm": 0.8194425702095032, "learning_rate": 4.525878785494097e-06, "loss": 0.1341, "step": 4135 }, { "epoch": 0.6701231367465975, "grad_norm": 1.0225645303726196, "learning_rate": 4.525622518768888e-06, "loss": 0.1631, "step": 4136 }, { "epoch": 0.6702851587815943, "grad_norm": 0.8572471737861633, "learning_rate": 4.5253661900645315e-06, "loss": 0.1328, "step": 4137 }, { "epoch": 0.670447180816591, "grad_norm": 0.9486353397369385, "learning_rate": 4.5251097993888726e-06, "loss": 0.1543, "step": 4138 }, { "epoch": 0.6706092028515879, "grad_norm": 0.9721882343292236, "learning_rate": 4.524853346749753e-06, "loss": 0.1558, "step": 4139 }, { "epoch": 0.6707712248865846, "grad_norm": 0.8730854988098145, "learning_rate": 4.524596832155022e-06, "loss": 0.1354, "step": 4140 }, { "epoch": 0.6709332469215813, "grad_norm": 0.8489865660667419, "learning_rate": 4.524340255612526e-06, "loss": 0.1314, "step": 4141 }, { "epoch": 0.6710952689565781, "grad_norm": 0.8910239934921265, "learning_rate": 4.524083617130118e-06, "loss": 0.1545, "step": 4142 }, { "epoch": 0.6712572909915748, "grad_norm": 0.7663037776947021, "learning_rate": 4.523826916715649e-06, "loss": 0.1255, "step": 4143 }, { "epoch": 0.6714193130265717, "grad_norm": 0.9194484353065491, "learning_rate": 4.523570154376975e-06, "loss": 0.1457, "step": 4144 }, { "epoch": 0.6715813350615684, "grad_norm": 0.9784338474273682, "learning_rate": 4.52331333012195e-06, "loss": 0.1645, "step": 4145 }, { "epoch": 0.6717433570965652, "grad_norm": 0.8883211612701416, "learning_rate": 4.5230564439584335e-06, "loss": 0.1359, "step": 4146 }, { "epoch": 0.6719053791315619, "grad_norm": 0.9348551034927368, "learning_rate": 4.522799495894286e-06, "loss": 0.1549, "step": 4147 }, { "epoch": 0.6720674011665586, "grad_norm": 0.76348876953125, "learning_rate": 4.522542485937369e-06, "loss": 0.116, "step": 4148 }, { "epoch": 0.6722294232015554, "grad_norm": 0.8616440296173096, "learning_rate": 4.522285414095547e-06, "loss": 0.1296, "step": 4149 }, { "epoch": 0.6723914452365521, "grad_norm": 0.901292622089386, "learning_rate": 4.522028280376683e-06, "loss": 0.1495, "step": 4150 }, { "epoch": 0.672553467271549, "grad_norm": 0.8072662353515625, "learning_rate": 4.521771084788649e-06, "loss": 0.1316, "step": 4151 }, { "epoch": 0.6727154893065457, "grad_norm": 0.9773443937301636, "learning_rate": 4.521513827339311e-06, "loss": 0.164, "step": 4152 }, { "epoch": 0.6728775113415425, "grad_norm": 0.8923728466033936, "learning_rate": 4.521256508036543e-06, "loss": 0.1437, "step": 4153 }, { "epoch": 0.6730395333765392, "grad_norm": 1.0206551551818848, "learning_rate": 4.5209991268882165e-06, "loss": 0.1608, "step": 4154 }, { "epoch": 0.6732015554115359, "grad_norm": 0.9942020177841187, "learning_rate": 4.520741683902208e-06, "loss": 0.1553, "step": 4155 }, { "epoch": 0.6733635774465327, "grad_norm": 0.9606051445007324, "learning_rate": 4.520484179086394e-06, "loss": 0.1577, "step": 4156 }, { "epoch": 0.6735255994815295, "grad_norm": 0.9637233018875122, "learning_rate": 4.520226612448653e-06, "loss": 0.1646, "step": 4157 }, { "epoch": 0.6736876215165263, "grad_norm": 0.8612246513366699, "learning_rate": 4.519968983996867e-06, "loss": 0.1358, "step": 4158 }, { "epoch": 0.673849643551523, "grad_norm": 0.9165799617767334, "learning_rate": 4.519711293738918e-06, "loss": 0.154, "step": 4159 }, { "epoch": 0.6740116655865198, "grad_norm": 0.853792130947113, "learning_rate": 4.519453541682691e-06, "loss": 0.1378, "step": 4160 }, { "epoch": 0.6741736876215165, "grad_norm": 0.9091629385948181, "learning_rate": 4.519195727836073e-06, "loss": 0.1311, "step": 4161 }, { "epoch": 0.6743357096565132, "grad_norm": 1.033930778503418, "learning_rate": 4.518937852206952e-06, "loss": 0.1692, "step": 4162 }, { "epoch": 0.6744977316915101, "grad_norm": 0.9184512495994568, "learning_rate": 4.518679914803218e-06, "loss": 0.1417, "step": 4163 }, { "epoch": 0.6746597537265068, "grad_norm": 0.8692227602005005, "learning_rate": 4.518421915632764e-06, "loss": 0.1491, "step": 4164 }, { "epoch": 0.6748217757615036, "grad_norm": 0.817550778388977, "learning_rate": 4.518163854703484e-06, "loss": 0.1407, "step": 4165 }, { "epoch": 0.6749837977965003, "grad_norm": 0.8003614544868469, "learning_rate": 4.5179057320232735e-06, "loss": 0.126, "step": 4166 }, { "epoch": 0.6751458198314971, "grad_norm": 0.847512423992157, "learning_rate": 4.517647547600032e-06, "loss": 0.1203, "step": 4167 }, { "epoch": 0.6753078418664938, "grad_norm": 0.9135223031044006, "learning_rate": 4.517389301441657e-06, "loss": 0.1311, "step": 4168 }, { "epoch": 0.6754698639014906, "grad_norm": 0.9252517819404602, "learning_rate": 4.517130993556051e-06, "loss": 0.1576, "step": 4169 }, { "epoch": 0.6756318859364874, "grad_norm": 0.8751834034919739, "learning_rate": 4.51687262395112e-06, "loss": 0.1332, "step": 4170 }, { "epoch": 0.6757939079714841, "grad_norm": 0.8638715147972107, "learning_rate": 4.516614192634765e-06, "loss": 0.1313, "step": 4171 }, { "epoch": 0.6759559300064809, "grad_norm": 0.910304844379425, "learning_rate": 4.516355699614897e-06, "loss": 0.1441, "step": 4172 }, { "epoch": 0.6761179520414776, "grad_norm": 0.8590049743652344, "learning_rate": 4.516097144899424e-06, "loss": 0.1324, "step": 4173 }, { "epoch": 0.6762799740764744, "grad_norm": 0.9081874489784241, "learning_rate": 4.515838528496257e-06, "loss": 0.1493, "step": 4174 }, { "epoch": 0.6764419961114712, "grad_norm": 0.9282753467559814, "learning_rate": 4.51557985041331e-06, "loss": 0.1555, "step": 4175 }, { "epoch": 0.6766040181464679, "grad_norm": 0.9835655093193054, "learning_rate": 4.5153211106584965e-06, "loss": 0.1559, "step": 4176 }, { "epoch": 0.6767660401814647, "grad_norm": 1.0135793685913086, "learning_rate": 4.515062309239734e-06, "loss": 0.1568, "step": 4177 }, { "epoch": 0.6769280622164614, "grad_norm": 0.8695417046546936, "learning_rate": 4.514803446164941e-06, "loss": 0.1355, "step": 4178 }, { "epoch": 0.6770900842514582, "grad_norm": 0.7575681805610657, "learning_rate": 4.514544521442039e-06, "loss": 0.1289, "step": 4179 }, { "epoch": 0.6772521062864549, "grad_norm": 0.9172993302345276, "learning_rate": 4.514285535078949e-06, "loss": 0.1565, "step": 4180 }, { "epoch": 0.6774141283214518, "grad_norm": 0.8855761885643005, "learning_rate": 4.5140264870835974e-06, "loss": 0.1476, "step": 4181 }, { "epoch": 0.6775761503564485, "grad_norm": 0.9483667016029358, "learning_rate": 4.513767377463908e-06, "loss": 0.1516, "step": 4182 }, { "epoch": 0.6777381723914452, "grad_norm": 0.8638037443161011, "learning_rate": 4.51350820622781e-06, "loss": 0.1359, "step": 4183 }, { "epoch": 0.677900194426442, "grad_norm": 0.8898397088050842, "learning_rate": 4.513248973383234e-06, "loss": 0.1323, "step": 4184 }, { "epoch": 0.6780622164614387, "grad_norm": 0.9830394983291626, "learning_rate": 4.512989678938111e-06, "loss": 0.1544, "step": 4185 }, { "epoch": 0.6782242384964355, "grad_norm": 0.8503865599632263, "learning_rate": 4.512730322900375e-06, "loss": 0.1273, "step": 4186 }, { "epoch": 0.6783862605314323, "grad_norm": 1.0824391841888428, "learning_rate": 4.5124709052779626e-06, "loss": 0.1775, "step": 4187 }, { "epoch": 0.6785482825664291, "grad_norm": 0.9633022546768188, "learning_rate": 4.51221142607881e-06, "loss": 0.1557, "step": 4188 }, { "epoch": 0.6787103046014258, "grad_norm": 0.9733066558837891, "learning_rate": 4.511951885310858e-06, "loss": 0.1404, "step": 4189 }, { "epoch": 0.6788723266364226, "grad_norm": 1.0073535442352295, "learning_rate": 4.511692282982047e-06, "loss": 0.1678, "step": 4190 }, { "epoch": 0.6790343486714193, "grad_norm": 0.8856287002563477, "learning_rate": 4.511432619100319e-06, "loss": 0.131, "step": 4191 }, { "epoch": 0.679196370706416, "grad_norm": 0.896113395690918, "learning_rate": 4.511172893673621e-06, "loss": 0.1373, "step": 4192 }, { "epoch": 0.6793583927414129, "grad_norm": 0.9079231023788452, "learning_rate": 4.5109131067099e-06, "loss": 0.1411, "step": 4193 }, { "epoch": 0.6795204147764096, "grad_norm": 0.7973667979240417, "learning_rate": 4.510653258217103e-06, "loss": 0.1338, "step": 4194 }, { "epoch": 0.6796824368114064, "grad_norm": 0.9222817420959473, "learning_rate": 4.510393348203184e-06, "loss": 0.1556, "step": 4195 }, { "epoch": 0.6798444588464031, "grad_norm": 1.0345180034637451, "learning_rate": 4.5101333766760926e-06, "loss": 0.1483, "step": 4196 }, { "epoch": 0.6800064808813999, "grad_norm": 0.8539285659790039, "learning_rate": 4.509873343643783e-06, "loss": 0.1385, "step": 4197 }, { "epoch": 0.6801685029163966, "grad_norm": 0.8832521438598633, "learning_rate": 4.509613249114215e-06, "loss": 0.1512, "step": 4198 }, { "epoch": 0.6803305249513933, "grad_norm": 1.0024913549423218, "learning_rate": 4.509353093095344e-06, "loss": 0.1768, "step": 4199 }, { "epoch": 0.6804925469863902, "grad_norm": 0.8852626085281372, "learning_rate": 4.509092875595131e-06, "loss": 0.1321, "step": 4200 }, { "epoch": 0.6806545690213869, "grad_norm": 0.8801068663597107, "learning_rate": 4.5088325966215375e-06, "loss": 0.147, "step": 4201 }, { "epoch": 0.6808165910563837, "grad_norm": 0.850282609462738, "learning_rate": 4.508572256182528e-06, "loss": 0.1439, "step": 4202 }, { "epoch": 0.6809786130913804, "grad_norm": 0.880113959312439, "learning_rate": 4.508311854286068e-06, "loss": 0.1409, "step": 4203 }, { "epoch": 0.6811406351263772, "grad_norm": 0.895704984664917, "learning_rate": 4.508051390940125e-06, "loss": 0.1374, "step": 4204 }, { "epoch": 0.681302657161374, "grad_norm": 0.9211687445640564, "learning_rate": 4.507790866152669e-06, "loss": 0.1412, "step": 4205 }, { "epoch": 0.6814646791963707, "grad_norm": 0.7838913798332214, "learning_rate": 4.507530279931673e-06, "loss": 0.1248, "step": 4206 }, { "epoch": 0.6816267012313675, "grad_norm": 0.923410952091217, "learning_rate": 4.507269632285106e-06, "loss": 0.1537, "step": 4207 }, { "epoch": 0.6817887232663642, "grad_norm": 0.8629544377326965, "learning_rate": 4.5070089232209465e-06, "loss": 0.1259, "step": 4208 }, { "epoch": 0.681950745301361, "grad_norm": 1.0048764944076538, "learning_rate": 4.506748152747171e-06, "loss": 0.1619, "step": 4209 }, { "epoch": 0.6821127673363577, "grad_norm": 1.133623719215393, "learning_rate": 4.506487320871758e-06, "loss": 0.1596, "step": 4210 }, { "epoch": 0.6822747893713546, "grad_norm": 0.979425311088562, "learning_rate": 4.50622642760269e-06, "loss": 0.1672, "step": 4211 }, { "epoch": 0.6824368114063513, "grad_norm": 0.9712530374526978, "learning_rate": 4.5059654729479474e-06, "loss": 0.1245, "step": 4212 }, { "epoch": 0.682598833441348, "grad_norm": 0.7488314509391785, "learning_rate": 4.505704456915515e-06, "loss": 0.1177, "step": 4213 }, { "epoch": 0.6827608554763448, "grad_norm": 0.8927283883094788, "learning_rate": 4.505443379513381e-06, "loss": 0.1482, "step": 4214 }, { "epoch": 0.6829228775113415, "grad_norm": 0.9022080898284912, "learning_rate": 4.5051822407495314e-06, "loss": 0.1382, "step": 4215 }, { "epoch": 0.6830848995463383, "grad_norm": 1.0110646486282349, "learning_rate": 4.5049210406319585e-06, "loss": 0.1619, "step": 4216 }, { "epoch": 0.683246921581335, "grad_norm": 0.8575459122657776, "learning_rate": 4.504659779168654e-06, "loss": 0.147, "step": 4217 }, { "epoch": 0.6834089436163319, "grad_norm": 0.9155558943748474, "learning_rate": 4.5043984563676105e-06, "loss": 0.1438, "step": 4218 }, { "epoch": 0.6835709656513286, "grad_norm": 0.9820864796638489, "learning_rate": 4.504137072236825e-06, "loss": 0.1536, "step": 4219 }, { "epoch": 0.6837329876863253, "grad_norm": 0.8118696808815002, "learning_rate": 4.503875626784295e-06, "loss": 0.1275, "step": 4220 }, { "epoch": 0.6838950097213221, "grad_norm": 0.9118869304656982, "learning_rate": 4.5036141200180206e-06, "loss": 0.1514, "step": 4221 }, { "epoch": 0.6840570317563188, "grad_norm": 0.895452082157135, "learning_rate": 4.503352551946003e-06, "loss": 0.1496, "step": 4222 }, { "epoch": 0.6842190537913156, "grad_norm": 0.8009963035583496, "learning_rate": 4.503090922576244e-06, "loss": 0.1173, "step": 4223 }, { "epoch": 0.6843810758263124, "grad_norm": 0.8450411558151245, "learning_rate": 4.5028292319167515e-06, "loss": 0.1401, "step": 4224 }, { "epoch": 0.6845430978613092, "grad_norm": 0.884223997592926, "learning_rate": 4.5025674799755306e-06, "loss": 0.1356, "step": 4225 }, { "epoch": 0.6847051198963059, "grad_norm": 1.1534299850463867, "learning_rate": 4.502305666760592e-06, "loss": 0.1775, "step": 4226 }, { "epoch": 0.6848671419313026, "grad_norm": 0.9515455365180969, "learning_rate": 4.502043792279943e-06, "loss": 0.1428, "step": 4227 }, { "epoch": 0.6850291639662994, "grad_norm": 0.9897893071174622, "learning_rate": 4.501781856541601e-06, "loss": 0.1498, "step": 4228 }, { "epoch": 0.6851911860012961, "grad_norm": 0.8131996393203735, "learning_rate": 4.501519859553578e-06, "loss": 0.1341, "step": 4229 }, { "epoch": 0.685353208036293, "grad_norm": 0.8663079142570496, "learning_rate": 4.50125780132389e-06, "loss": 0.1286, "step": 4230 }, { "epoch": 0.6855152300712897, "grad_norm": 0.8376840949058533, "learning_rate": 4.500995681860557e-06, "loss": 0.1301, "step": 4231 }, { "epoch": 0.6856772521062865, "grad_norm": 0.8951122164726257, "learning_rate": 4.500733501171599e-06, "loss": 0.1411, "step": 4232 }, { "epoch": 0.6858392741412832, "grad_norm": 1.0260791778564453, "learning_rate": 4.500471259265037e-06, "loss": 0.1705, "step": 4233 }, { "epoch": 0.68600129617628, "grad_norm": 1.0505539178848267, "learning_rate": 4.500208956148895e-06, "loss": 0.1712, "step": 4234 }, { "epoch": 0.6861633182112767, "grad_norm": 1.0583857297897339, "learning_rate": 4.4999465918312e-06, "loss": 0.1724, "step": 4235 }, { "epoch": 0.6863253402462735, "grad_norm": 0.8932731747627258, "learning_rate": 4.499684166319978e-06, "loss": 0.1523, "step": 4236 }, { "epoch": 0.6864873622812703, "grad_norm": 0.9509609937667847, "learning_rate": 4.499421679623261e-06, "loss": 0.1455, "step": 4237 }, { "epoch": 0.686649384316267, "grad_norm": 0.9110451340675354, "learning_rate": 4.499159131749079e-06, "loss": 0.1367, "step": 4238 }, { "epoch": 0.6868114063512638, "grad_norm": 0.9273454546928406, "learning_rate": 4.498896522705465e-06, "loss": 0.1397, "step": 4239 }, { "epoch": 0.6869734283862605, "grad_norm": 0.8918704390525818, "learning_rate": 4.498633852500455e-06, "loss": 0.1376, "step": 4240 }, { "epoch": 0.6871354504212573, "grad_norm": 0.9055836200714111, "learning_rate": 4.4983711211420844e-06, "loss": 0.1428, "step": 4241 }, { "epoch": 0.687297472456254, "grad_norm": 0.9966475963592529, "learning_rate": 4.498108328638395e-06, "loss": 0.1578, "step": 4242 }, { "epoch": 0.6874594944912508, "grad_norm": 0.954372227191925, "learning_rate": 4.497845474997425e-06, "loss": 0.1375, "step": 4243 }, { "epoch": 0.6876215165262476, "grad_norm": 1.3921228647232056, "learning_rate": 4.4975825602272185e-06, "loss": 0.1365, "step": 4244 }, { "epoch": 0.6877835385612443, "grad_norm": 0.9259489178657532, "learning_rate": 4.497319584335821e-06, "loss": 0.1462, "step": 4245 }, { "epoch": 0.6879455605962411, "grad_norm": 0.9492351412773132, "learning_rate": 4.497056547331276e-06, "loss": 0.149, "step": 4246 }, { "epoch": 0.6881075826312378, "grad_norm": 0.9279522895812988, "learning_rate": 4.496793449221634e-06, "loss": 0.1434, "step": 4247 }, { "epoch": 0.6882696046662347, "grad_norm": 1.0066229104995728, "learning_rate": 4.496530290014945e-06, "loss": 0.1634, "step": 4248 }, { "epoch": 0.6884316267012314, "grad_norm": 0.853121817111969, "learning_rate": 4.496267069719259e-06, "loss": 0.1469, "step": 4249 }, { "epoch": 0.6885936487362281, "grad_norm": 1.041290521621704, "learning_rate": 4.496003788342633e-06, "loss": 0.1604, "step": 4250 }, { "epoch": 0.6887556707712249, "grad_norm": 1.0122541189193726, "learning_rate": 4.495740445893121e-06, "loss": 0.1334, "step": 4251 }, { "epoch": 0.6889176928062216, "grad_norm": 1.0344403982162476, "learning_rate": 4.495477042378781e-06, "loss": 0.1643, "step": 4252 }, { "epoch": 0.6890797148412184, "grad_norm": 0.811650276184082, "learning_rate": 4.495213577807672e-06, "loss": 0.1343, "step": 4253 }, { "epoch": 0.6892417368762151, "grad_norm": 0.842697024345398, "learning_rate": 4.494950052187857e-06, "loss": 0.1383, "step": 4254 }, { "epoch": 0.689403758911212, "grad_norm": 0.8029190897941589, "learning_rate": 4.494686465527397e-06, "loss": 0.1306, "step": 4255 }, { "epoch": 0.6895657809462087, "grad_norm": 0.8258494734764099, "learning_rate": 4.494422817834359e-06, "loss": 0.1334, "step": 4256 }, { "epoch": 0.6897278029812054, "grad_norm": 0.8976909518241882, "learning_rate": 4.494159109116809e-06, "loss": 0.1548, "step": 4257 }, { "epoch": 0.6898898250162022, "grad_norm": 0.8805419206619263, "learning_rate": 4.493895339382815e-06, "loss": 0.1286, "step": 4258 }, { "epoch": 0.6900518470511989, "grad_norm": 0.7982555627822876, "learning_rate": 4.49363150864045e-06, "loss": 0.1213, "step": 4259 }, { "epoch": 0.6902138690861958, "grad_norm": 1.0019187927246094, "learning_rate": 4.493367616897785e-06, "loss": 0.1542, "step": 4260 }, { "epoch": 0.6903758911211925, "grad_norm": 0.832612931728363, "learning_rate": 4.4931036641628946e-06, "loss": 0.1404, "step": 4261 }, { "epoch": 0.6905379131561893, "grad_norm": 0.9622425436973572, "learning_rate": 4.4928396504438555e-06, "loss": 0.1418, "step": 4262 }, { "epoch": 0.690699935191186, "grad_norm": 0.8642094731330872, "learning_rate": 4.492575575748746e-06, "loss": 0.1437, "step": 4263 }, { "epoch": 0.6908619572261827, "grad_norm": 1.0327246189117432, "learning_rate": 4.4923114400856445e-06, "loss": 0.1633, "step": 4264 }, { "epoch": 0.6910239792611795, "grad_norm": 1.0118496417999268, "learning_rate": 4.492047243462636e-06, "loss": 0.1497, "step": 4265 }, { "epoch": 0.6911860012961762, "grad_norm": 0.860984742641449, "learning_rate": 4.491782985887802e-06, "loss": 0.1416, "step": 4266 }, { "epoch": 0.6913480233311731, "grad_norm": 0.9283857941627502, "learning_rate": 4.491518667369228e-06, "loss": 0.1471, "step": 4267 }, { "epoch": 0.6915100453661698, "grad_norm": 0.9157022833824158, "learning_rate": 4.491254287915003e-06, "loss": 0.1482, "step": 4268 }, { "epoch": 0.6916720674011666, "grad_norm": 0.9066836833953857, "learning_rate": 4.490989847533214e-06, "loss": 0.1549, "step": 4269 }, { "epoch": 0.6918340894361633, "grad_norm": 0.7749550938606262, "learning_rate": 4.490725346231954e-06, "loss": 0.1114, "step": 4270 }, { "epoch": 0.69199611147116, "grad_norm": 0.7799788117408752, "learning_rate": 4.490460784019317e-06, "loss": 0.1278, "step": 4271 }, { "epoch": 0.6921581335061568, "grad_norm": 1.0473695993423462, "learning_rate": 4.4901961609033965e-06, "loss": 0.19, "step": 4272 }, { "epoch": 0.6923201555411536, "grad_norm": 0.9344850778579712, "learning_rate": 4.489931476892289e-06, "loss": 0.1531, "step": 4273 }, { "epoch": 0.6924821775761504, "grad_norm": 0.990973711013794, "learning_rate": 4.489666731994095e-06, "loss": 0.1538, "step": 4274 }, { "epoch": 0.6926441996111471, "grad_norm": 0.9593062400817871, "learning_rate": 4.4894019262169134e-06, "loss": 0.1559, "step": 4275 }, { "epoch": 0.6928062216461439, "grad_norm": 0.8276656866073608, "learning_rate": 4.489137059568847e-06, "loss": 0.1293, "step": 4276 }, { "epoch": 0.6929682436811406, "grad_norm": 0.8525620698928833, "learning_rate": 4.488872132058001e-06, "loss": 0.1359, "step": 4277 }, { "epoch": 0.6931302657161373, "grad_norm": 0.933274507522583, "learning_rate": 4.48860714369248e-06, "loss": 0.1463, "step": 4278 }, { "epoch": 0.6932922877511342, "grad_norm": 1.014924168586731, "learning_rate": 4.4883420944803925e-06, "loss": 0.1627, "step": 4279 }, { "epoch": 0.6934543097861309, "grad_norm": 0.9470938444137573, "learning_rate": 4.488076984429849e-06, "loss": 0.1612, "step": 4280 }, { "epoch": 0.6936163318211277, "grad_norm": 0.861634373664856, "learning_rate": 4.48781181354896e-06, "loss": 0.1445, "step": 4281 }, { "epoch": 0.6937783538561244, "grad_norm": 0.8474523425102234, "learning_rate": 4.4875465818458404e-06, "loss": 0.1379, "step": 4282 }, { "epoch": 0.6939403758911212, "grad_norm": 0.8774752616882324, "learning_rate": 4.487281289328605e-06, "loss": 0.1374, "step": 4283 }, { "epoch": 0.6941023979261179, "grad_norm": 0.8172398805618286, "learning_rate": 4.4870159360053725e-06, "loss": 0.1332, "step": 4284 }, { "epoch": 0.6942644199611148, "grad_norm": 0.8533669114112854, "learning_rate": 4.4867505218842596e-06, "loss": 0.1421, "step": 4285 }, { "epoch": 0.6944264419961115, "grad_norm": 0.909589946269989, "learning_rate": 4.4864850469733886e-06, "loss": 0.1532, "step": 4286 }, { "epoch": 0.6945884640311082, "grad_norm": 0.7759212255477905, "learning_rate": 4.486219511280883e-06, "loss": 0.1142, "step": 4287 }, { "epoch": 0.694750486066105, "grad_norm": 0.9085809588432312, "learning_rate": 4.485953914814867e-06, "loss": 0.1354, "step": 4288 }, { "epoch": 0.6949125081011017, "grad_norm": 0.857865035533905, "learning_rate": 4.485688257583467e-06, "loss": 0.1398, "step": 4289 }, { "epoch": 0.6950745301360985, "grad_norm": 0.8624712824821472, "learning_rate": 4.485422539594811e-06, "loss": 0.1342, "step": 4290 }, { "epoch": 0.6952365521710953, "grad_norm": 0.8267756104469299, "learning_rate": 4.48515676085703e-06, "loss": 0.1381, "step": 4291 }, { "epoch": 0.6953985742060921, "grad_norm": 0.9393563270568848, "learning_rate": 4.4848909213782566e-06, "loss": 0.1485, "step": 4292 }, { "epoch": 0.6955605962410888, "grad_norm": 0.9284929037094116, "learning_rate": 4.484625021166624e-06, "loss": 0.1549, "step": 4293 }, { "epoch": 0.6957226182760855, "grad_norm": 0.9308271408081055, "learning_rate": 4.484359060230269e-06, "loss": 0.1443, "step": 4294 }, { "epoch": 0.6958846403110823, "grad_norm": 0.9445673227310181, "learning_rate": 4.484093038577329e-06, "loss": 0.127, "step": 4295 }, { "epoch": 0.696046662346079, "grad_norm": 0.8204240798950195, "learning_rate": 4.483826956215942e-06, "loss": 0.1279, "step": 4296 }, { "epoch": 0.6962086843810759, "grad_norm": 0.9389863610267639, "learning_rate": 4.483560813154252e-06, "loss": 0.1493, "step": 4297 }, { "epoch": 0.6963707064160726, "grad_norm": 0.8122605085372925, "learning_rate": 4.4832946094004e-06, "loss": 0.1314, "step": 4298 }, { "epoch": 0.6965327284510694, "grad_norm": 0.9366967082023621, "learning_rate": 4.483028344962534e-06, "loss": 0.1396, "step": 4299 }, { "epoch": 0.6966947504860661, "grad_norm": 0.8493170738220215, "learning_rate": 4.482762019848799e-06, "loss": 0.1372, "step": 4300 }, { "epoch": 0.6968567725210628, "grad_norm": 0.9560868740081787, "learning_rate": 4.482495634067344e-06, "loss": 0.1642, "step": 4301 }, { "epoch": 0.6970187945560596, "grad_norm": 0.8868750333786011, "learning_rate": 4.48222918762632e-06, "loss": 0.1407, "step": 4302 }, { "epoch": 0.6971808165910564, "grad_norm": 0.8307478427886963, "learning_rate": 4.48196268053388e-06, "loss": 0.136, "step": 4303 }, { "epoch": 0.6973428386260532, "grad_norm": 0.923088014125824, "learning_rate": 4.481696112798179e-06, "loss": 0.1425, "step": 4304 }, { "epoch": 0.6975048606610499, "grad_norm": 0.8834422826766968, "learning_rate": 4.481429484427372e-06, "loss": 0.1395, "step": 4305 }, { "epoch": 0.6976668826960467, "grad_norm": 0.9474985599517822, "learning_rate": 4.481162795429618e-06, "loss": 0.153, "step": 4306 }, { "epoch": 0.6978289047310434, "grad_norm": 0.9343580603599548, "learning_rate": 4.480896045813076e-06, "loss": 0.145, "step": 4307 }, { "epoch": 0.6979909267660401, "grad_norm": 0.746100127696991, "learning_rate": 4.480629235585909e-06, "loss": 0.1194, "step": 4308 }, { "epoch": 0.698152948801037, "grad_norm": 0.9636530876159668, "learning_rate": 4.480362364756281e-06, "loss": 0.1526, "step": 4309 }, { "epoch": 0.6983149708360337, "grad_norm": 0.7834259271621704, "learning_rate": 4.480095433332357e-06, "loss": 0.1296, "step": 4310 }, { "epoch": 0.6984769928710305, "grad_norm": 0.9413818717002869, "learning_rate": 4.479828441322304e-06, "loss": 0.1542, "step": 4311 }, { "epoch": 0.6986390149060272, "grad_norm": 0.9346475005149841, "learning_rate": 4.4795613887342916e-06, "loss": 0.1429, "step": 4312 }, { "epoch": 0.698801036941024, "grad_norm": 0.917719841003418, "learning_rate": 4.479294275576492e-06, "loss": 0.1366, "step": 4313 }, { "epoch": 0.6989630589760207, "grad_norm": 0.8321374654769897, "learning_rate": 4.479027101857076e-06, "loss": 0.1395, "step": 4314 }, { "epoch": 0.6991250810110174, "grad_norm": 0.9253593683242798, "learning_rate": 4.478759867584221e-06, "loss": 0.1371, "step": 4315 }, { "epoch": 0.6992871030460143, "grad_norm": 0.8277562856674194, "learning_rate": 4.4784925727661025e-06, "loss": 0.1368, "step": 4316 }, { "epoch": 0.699449125081011, "grad_norm": 1.0059376955032349, "learning_rate": 4.478225217410898e-06, "loss": 0.1577, "step": 4317 }, { "epoch": 0.6996111471160078, "grad_norm": 0.9551058411598206, "learning_rate": 4.47795780152679e-06, "loss": 0.1414, "step": 4318 }, { "epoch": 0.6997731691510045, "grad_norm": 1.059830665588379, "learning_rate": 4.47769032512196e-06, "loss": 0.1566, "step": 4319 }, { "epoch": 0.6999351911860013, "grad_norm": 0.8938872814178467, "learning_rate": 4.477422788204592e-06, "loss": 0.1362, "step": 4320 }, { "epoch": 0.700097213220998, "grad_norm": 0.9738820195198059, "learning_rate": 4.4771551907828714e-06, "loss": 0.1627, "step": 4321 }, { "epoch": 0.7002592352559948, "grad_norm": 0.812980592250824, "learning_rate": 4.476887532864986e-06, "loss": 0.1344, "step": 4322 }, { "epoch": 0.7004212572909916, "grad_norm": 0.9212116599082947, "learning_rate": 4.476619814459128e-06, "loss": 0.1346, "step": 4323 }, { "epoch": 0.7005832793259883, "grad_norm": 0.7404506802558899, "learning_rate": 4.476352035573486e-06, "loss": 0.1101, "step": 4324 }, { "epoch": 0.7007453013609851, "grad_norm": 0.8756342530250549, "learning_rate": 4.4760841962162535e-06, "loss": 0.1492, "step": 4325 }, { "epoch": 0.7009073233959818, "grad_norm": 0.9470415711402893, "learning_rate": 4.475816296395627e-06, "loss": 0.1547, "step": 4326 }, { "epoch": 0.7010693454309787, "grad_norm": 0.7365562915802002, "learning_rate": 4.475548336119804e-06, "loss": 0.1169, "step": 4327 }, { "epoch": 0.7012313674659754, "grad_norm": 0.9501516819000244, "learning_rate": 4.475280315396982e-06, "loss": 0.1503, "step": 4328 }, { "epoch": 0.7013933895009722, "grad_norm": 0.8905579447746277, "learning_rate": 4.475012234235363e-06, "loss": 0.1577, "step": 4329 }, { "epoch": 0.7015554115359689, "grad_norm": 0.9142157435417175, "learning_rate": 4.474744092643149e-06, "loss": 0.1435, "step": 4330 }, { "epoch": 0.7017174335709656, "grad_norm": 0.8763073086738586, "learning_rate": 4.474475890628545e-06, "loss": 0.1384, "step": 4331 }, { "epoch": 0.7018794556059624, "grad_norm": 0.8825966715812683, "learning_rate": 4.474207628199756e-06, "loss": 0.1515, "step": 4332 }, { "epoch": 0.7020414776409591, "grad_norm": 0.876340389251709, "learning_rate": 4.473939305364991e-06, "loss": 0.1513, "step": 4333 }, { "epoch": 0.702203499675956, "grad_norm": 0.8109468221664429, "learning_rate": 4.47367092213246e-06, "loss": 0.1298, "step": 4334 }, { "epoch": 0.7023655217109527, "grad_norm": 0.9166135787963867, "learning_rate": 4.473402478510376e-06, "loss": 0.1428, "step": 4335 }, { "epoch": 0.7025275437459495, "grad_norm": 0.838951051235199, "learning_rate": 4.473133974506951e-06, "loss": 0.1315, "step": 4336 }, { "epoch": 0.7026895657809462, "grad_norm": 0.866460382938385, "learning_rate": 4.472865410130401e-06, "loss": 0.1348, "step": 4337 }, { "epoch": 0.7028515878159429, "grad_norm": 0.8910289406776428, "learning_rate": 4.472596785388944e-06, "loss": 0.1366, "step": 4338 }, { "epoch": 0.7030136098509397, "grad_norm": 1.0762733221054077, "learning_rate": 4.472328100290799e-06, "loss": 0.173, "step": 4339 }, { "epoch": 0.7031756318859365, "grad_norm": 0.9918913841247559, "learning_rate": 4.472059354844187e-06, "loss": 0.1718, "step": 4340 }, { "epoch": 0.7033376539209333, "grad_norm": 0.9252434968948364, "learning_rate": 4.471790549057332e-06, "loss": 0.147, "step": 4341 }, { "epoch": 0.70349967595593, "grad_norm": 0.9707101583480835, "learning_rate": 4.4715216829384566e-06, "loss": 0.1484, "step": 4342 }, { "epoch": 0.7036616979909268, "grad_norm": 0.9649327397346497, "learning_rate": 4.471252756495789e-06, "loss": 0.1663, "step": 4343 }, { "epoch": 0.7038237200259235, "grad_norm": 0.8951583504676819, "learning_rate": 4.470983769737557e-06, "loss": 0.1424, "step": 4344 }, { "epoch": 0.7039857420609202, "grad_norm": 0.885223388671875, "learning_rate": 4.470714722671992e-06, "loss": 0.1515, "step": 4345 }, { "epoch": 0.7041477640959171, "grad_norm": 1.0771913528442383, "learning_rate": 4.470445615307325e-06, "loss": 0.179, "step": 4346 }, { "epoch": 0.7043097861309138, "grad_norm": 1.0075687170028687, "learning_rate": 4.470176447651791e-06, "loss": 0.1614, "step": 4347 }, { "epoch": 0.7044718081659106, "grad_norm": 0.9713208675384521, "learning_rate": 4.4699072197136255e-06, "loss": 0.1478, "step": 4348 }, { "epoch": 0.7046338302009073, "grad_norm": 0.9104232788085938, "learning_rate": 4.469637931501066e-06, "loss": 0.1519, "step": 4349 }, { "epoch": 0.7047958522359041, "grad_norm": 0.9640597105026245, "learning_rate": 4.469368583022352e-06, "loss": 0.172, "step": 4350 }, { "epoch": 0.7049578742709008, "grad_norm": 0.966541588306427, "learning_rate": 4.469099174285725e-06, "loss": 0.1659, "step": 4351 }, { "epoch": 0.7051198963058976, "grad_norm": 0.9105446338653564, "learning_rate": 4.468829705299429e-06, "loss": 0.1513, "step": 4352 }, { "epoch": 0.7052819183408944, "grad_norm": 0.916776180267334, "learning_rate": 4.4685601760717075e-06, "loss": 0.1533, "step": 4353 }, { "epoch": 0.7054439403758911, "grad_norm": 0.8211317658424377, "learning_rate": 4.4682905866108094e-06, "loss": 0.1411, "step": 4354 }, { "epoch": 0.7056059624108879, "grad_norm": 0.908372163772583, "learning_rate": 4.468020936924983e-06, "loss": 0.1392, "step": 4355 }, { "epoch": 0.7057679844458846, "grad_norm": 0.9611108303070068, "learning_rate": 4.467751227022478e-06, "loss": 0.1592, "step": 4356 }, { "epoch": 0.7059300064808814, "grad_norm": 0.8858509063720703, "learning_rate": 4.467481456911547e-06, "loss": 0.1414, "step": 4357 }, { "epoch": 0.7060920285158782, "grad_norm": 0.8831855654716492, "learning_rate": 4.467211626600444e-06, "loss": 0.1331, "step": 4358 }, { "epoch": 0.7062540505508749, "grad_norm": 0.8268503546714783, "learning_rate": 4.466941736097427e-06, "loss": 0.1466, "step": 4359 }, { "epoch": 0.7064160725858717, "grad_norm": 0.952812135219574, "learning_rate": 4.466671785410752e-06, "loss": 0.1511, "step": 4360 }, { "epoch": 0.7065780946208684, "grad_norm": 0.9500433802604675, "learning_rate": 4.46640177454868e-06, "loss": 0.1439, "step": 4361 }, { "epoch": 0.7067401166558652, "grad_norm": 0.8875350952148438, "learning_rate": 4.4661317035194716e-06, "loss": 0.145, "step": 4362 }, { "epoch": 0.7069021386908619, "grad_norm": 0.8836730718612671, "learning_rate": 4.465861572331392e-06, "loss": 0.1386, "step": 4363 }, { "epoch": 0.7070641607258588, "grad_norm": 0.9382752180099487, "learning_rate": 4.4655913809927045e-06, "loss": 0.1529, "step": 4364 }, { "epoch": 0.7072261827608555, "grad_norm": 0.968842625617981, "learning_rate": 4.465321129511678e-06, "loss": 0.1463, "step": 4365 }, { "epoch": 0.7073882047958522, "grad_norm": 0.9342696666717529, "learning_rate": 4.4650508178965814e-06, "loss": 0.1376, "step": 4366 }, { "epoch": 0.707550226830849, "grad_norm": 1.0476247072219849, "learning_rate": 4.464780446155684e-06, "loss": 0.1389, "step": 4367 }, { "epoch": 0.7077122488658457, "grad_norm": 0.9861329197883606, "learning_rate": 4.464510014297261e-06, "loss": 0.1561, "step": 4368 }, { "epoch": 0.7078742709008425, "grad_norm": 0.8797351717948914, "learning_rate": 4.464239522329585e-06, "loss": 0.1307, "step": 4369 }, { "epoch": 0.7080362929358393, "grad_norm": 0.9944042563438416, "learning_rate": 4.4639689702609326e-06, "loss": 0.1431, "step": 4370 }, { "epoch": 0.7081983149708361, "grad_norm": 0.8967122435569763, "learning_rate": 4.463698358099583e-06, "loss": 0.1483, "step": 4371 }, { "epoch": 0.7083603370058328, "grad_norm": 0.9998703598976135, "learning_rate": 4.463427685853815e-06, "loss": 0.1347, "step": 4372 }, { "epoch": 0.7085223590408296, "grad_norm": 0.9268200397491455, "learning_rate": 4.463156953531912e-06, "loss": 0.1591, "step": 4373 }, { "epoch": 0.7086843810758263, "grad_norm": 0.8729379177093506, "learning_rate": 4.462886161142157e-06, "loss": 0.1271, "step": 4374 }, { "epoch": 0.708846403110823, "grad_norm": 0.8896871209144592, "learning_rate": 4.462615308692835e-06, "loss": 0.1595, "step": 4375 }, { "epoch": 0.7090084251458199, "grad_norm": 0.8534110188484192, "learning_rate": 4.4623443961922334e-06, "loss": 0.1321, "step": 4376 }, { "epoch": 0.7091704471808166, "grad_norm": 0.9220858216285706, "learning_rate": 4.462073423648643e-06, "loss": 0.1584, "step": 4377 }, { "epoch": 0.7093324692158134, "grad_norm": 0.8980048298835754, "learning_rate": 4.461802391070354e-06, "loss": 0.1432, "step": 4378 }, { "epoch": 0.7094944912508101, "grad_norm": 0.8806954622268677, "learning_rate": 4.461531298465659e-06, "loss": 0.1342, "step": 4379 }, { "epoch": 0.7096565132858069, "grad_norm": 0.9965754747390747, "learning_rate": 4.4612601458428525e-06, "loss": 0.1537, "step": 4380 }, { "epoch": 0.7098185353208036, "grad_norm": 0.9695927500724792, "learning_rate": 4.460988933210233e-06, "loss": 0.1357, "step": 4381 }, { "epoch": 0.7099805573558003, "grad_norm": 0.9649356007575989, "learning_rate": 4.460717660576097e-06, "loss": 0.1534, "step": 4382 }, { "epoch": 0.7101425793907972, "grad_norm": 0.9325234293937683, "learning_rate": 4.460446327948745e-06, "loss": 0.1443, "step": 4383 }, { "epoch": 0.7103046014257939, "grad_norm": 1.038757562637329, "learning_rate": 4.46017493533648e-06, "loss": 0.1623, "step": 4384 }, { "epoch": 0.7104666234607907, "grad_norm": 0.959611177444458, "learning_rate": 4.459903482747605e-06, "loss": 0.1428, "step": 4385 }, { "epoch": 0.7106286454957874, "grad_norm": 0.9437382817268372, "learning_rate": 4.459631970190428e-06, "loss": 0.1429, "step": 4386 }, { "epoch": 0.7107906675307842, "grad_norm": 0.9380490779876709, "learning_rate": 4.459360397673253e-06, "loss": 0.1459, "step": 4387 }, { "epoch": 0.710952689565781, "grad_norm": 0.8897026777267456, "learning_rate": 4.4590887652043925e-06, "loss": 0.1397, "step": 4388 }, { "epoch": 0.7111147116007777, "grad_norm": 0.9225705862045288, "learning_rate": 4.458817072792155e-06, "loss": 0.1516, "step": 4389 }, { "epoch": 0.7112767336357745, "grad_norm": 0.8232748508453369, "learning_rate": 4.458545320444857e-06, "loss": 0.1326, "step": 4390 }, { "epoch": 0.7114387556707712, "grad_norm": 0.9818866848945618, "learning_rate": 4.458273508170812e-06, "loss": 0.1557, "step": 4391 }, { "epoch": 0.711600777705768, "grad_norm": 1.086909294128418, "learning_rate": 4.458001635978335e-06, "loss": 0.1492, "step": 4392 }, { "epoch": 0.7117627997407647, "grad_norm": 0.8664153814315796, "learning_rate": 4.457729703875749e-06, "loss": 0.135, "step": 4393 }, { "epoch": 0.7119248217757616, "grad_norm": 0.8988549113273621, "learning_rate": 4.457457711871369e-06, "loss": 0.1473, "step": 4394 }, { "epoch": 0.7120868438107583, "grad_norm": 0.9167562127113342, "learning_rate": 4.45718565997352e-06, "loss": 0.1553, "step": 4395 }, { "epoch": 0.712248865845755, "grad_norm": 0.8664883971214294, "learning_rate": 4.4569135481905274e-06, "loss": 0.1404, "step": 4396 }, { "epoch": 0.7124108878807518, "grad_norm": 0.7986405491828918, "learning_rate": 4.456641376530715e-06, "loss": 0.1239, "step": 4397 }, { "epoch": 0.7125729099157485, "grad_norm": 0.9306362867355347, "learning_rate": 4.456369145002412e-06, "loss": 0.1489, "step": 4398 }, { "epoch": 0.7127349319507453, "grad_norm": 0.9109500050544739, "learning_rate": 4.456096853613948e-06, "loss": 0.1439, "step": 4399 }, { "epoch": 0.712896953985742, "grad_norm": 0.8157075643539429, "learning_rate": 4.455824502373653e-06, "loss": 0.1308, "step": 4400 }, { "epoch": 0.7130589760207389, "grad_norm": 0.8615176677703857, "learning_rate": 4.4555520912898616e-06, "loss": 0.1299, "step": 4401 }, { "epoch": 0.7132209980557356, "grad_norm": 0.9204617142677307, "learning_rate": 4.455279620370908e-06, "loss": 0.1405, "step": 4402 }, { "epoch": 0.7133830200907323, "grad_norm": 0.8997926115989685, "learning_rate": 4.455007089625131e-06, "loss": 0.1319, "step": 4403 }, { "epoch": 0.7135450421257291, "grad_norm": 0.861057698726654, "learning_rate": 4.454734499060867e-06, "loss": 0.1556, "step": 4404 }, { "epoch": 0.7137070641607258, "grad_norm": 0.9755337238311768, "learning_rate": 4.4544618486864575e-06, "loss": 0.1606, "step": 4405 }, { "epoch": 0.7138690861957226, "grad_norm": 0.9111999869346619, "learning_rate": 4.454189138510246e-06, "loss": 0.1339, "step": 4406 }, { "epoch": 0.7140311082307194, "grad_norm": 0.8962233662605286, "learning_rate": 4.4539163685405755e-06, "loss": 0.1345, "step": 4407 }, { "epoch": 0.7141931302657162, "grad_norm": 0.9078991413116455, "learning_rate": 4.453643538785793e-06, "loss": 0.1504, "step": 4408 }, { "epoch": 0.7143551523007129, "grad_norm": 0.9566717743873596, "learning_rate": 4.453370649254245e-06, "loss": 0.1546, "step": 4409 }, { "epoch": 0.7145171743357096, "grad_norm": 0.8746940493583679, "learning_rate": 4.453097699954282e-06, "loss": 0.147, "step": 4410 }, { "epoch": 0.7146791963707064, "grad_norm": 0.9663369059562683, "learning_rate": 4.452824690894257e-06, "loss": 0.1594, "step": 4411 }, { "epoch": 0.7148412184057031, "grad_norm": 0.9418163895606995, "learning_rate": 4.452551622082522e-06, "loss": 0.1533, "step": 4412 }, { "epoch": 0.7150032404407, "grad_norm": 0.8881242871284485, "learning_rate": 4.452278493527431e-06, "loss": 0.1353, "step": 4413 }, { "epoch": 0.7151652624756967, "grad_norm": 0.9275038838386536, "learning_rate": 4.452005305237344e-06, "loss": 0.1411, "step": 4414 }, { "epoch": 0.7153272845106935, "grad_norm": 1.107954740524292, "learning_rate": 4.451732057220618e-06, "loss": 0.1572, "step": 4415 }, { "epoch": 0.7154893065456902, "grad_norm": 0.9651256799697876, "learning_rate": 4.451458749485614e-06, "loss": 0.153, "step": 4416 }, { "epoch": 0.7156513285806869, "grad_norm": 0.8925125598907471, "learning_rate": 4.451185382040695e-06, "loss": 0.1423, "step": 4417 }, { "epoch": 0.7158133506156837, "grad_norm": 0.8692678213119507, "learning_rate": 4.4509119548942245e-06, "loss": 0.1448, "step": 4418 }, { "epoch": 0.7159753726506805, "grad_norm": 0.9533895254135132, "learning_rate": 4.45063846805457e-06, "loss": 0.1495, "step": 4419 }, { "epoch": 0.7161373946856773, "grad_norm": 0.8917126059532166, "learning_rate": 4.450364921530099e-06, "loss": 0.1437, "step": 4420 }, { "epoch": 0.716299416720674, "grad_norm": 0.7850384712219238, "learning_rate": 4.450091315329181e-06, "loss": 0.1339, "step": 4421 }, { "epoch": 0.7164614387556708, "grad_norm": 1.0266042947769165, "learning_rate": 4.449817649460187e-06, "loss": 0.1571, "step": 4422 }, { "epoch": 0.7166234607906675, "grad_norm": 1.084693193435669, "learning_rate": 4.449543923931493e-06, "loss": 0.1458, "step": 4423 }, { "epoch": 0.7167854828256643, "grad_norm": 0.9740076065063477, "learning_rate": 4.449270138751471e-06, "loss": 0.1395, "step": 4424 }, { "epoch": 0.7169475048606611, "grad_norm": 0.8249508738517761, "learning_rate": 4.4489962939285015e-06, "loss": 0.133, "step": 4425 }, { "epoch": 0.7171095268956578, "grad_norm": 0.9032569527626038, "learning_rate": 4.4487223894709606e-06, "loss": 0.1395, "step": 4426 }, { "epoch": 0.7172715489306546, "grad_norm": 0.7856298089027405, "learning_rate": 4.448448425387231e-06, "loss": 0.1226, "step": 4427 }, { "epoch": 0.7174335709656513, "grad_norm": 0.8503175377845764, "learning_rate": 4.448174401685694e-06, "loss": 0.1409, "step": 4428 }, { "epoch": 0.7175955930006481, "grad_norm": 0.9140015244483948, "learning_rate": 4.447900318374736e-06, "loss": 0.1488, "step": 4429 }, { "epoch": 0.7177576150356448, "grad_norm": 0.8657589554786682, "learning_rate": 4.447626175462741e-06, "loss": 0.1395, "step": 4430 }, { "epoch": 0.7179196370706417, "grad_norm": 0.8597119450569153, "learning_rate": 4.447351972958099e-06, "loss": 0.1285, "step": 4431 }, { "epoch": 0.7180816591056384, "grad_norm": 1.0180467367172241, "learning_rate": 4.447077710869199e-06, "loss": 0.1668, "step": 4432 }, { "epoch": 0.7182436811406351, "grad_norm": 0.924322783946991, "learning_rate": 4.446803389204433e-06, "loss": 0.1472, "step": 4433 }, { "epoch": 0.7184057031756319, "grad_norm": 0.9639684557914734, "learning_rate": 4.4465290079721935e-06, "loss": 0.1419, "step": 4434 }, { "epoch": 0.7185677252106286, "grad_norm": 0.9265577793121338, "learning_rate": 4.446254567180877e-06, "loss": 0.1461, "step": 4435 }, { "epoch": 0.7187297472456254, "grad_norm": 0.9008076190948486, "learning_rate": 4.445980066838882e-06, "loss": 0.1408, "step": 4436 }, { "epoch": 0.7188917692806222, "grad_norm": 0.8096259236335754, "learning_rate": 4.445705506954605e-06, "loss": 0.1268, "step": 4437 }, { "epoch": 0.719053791315619, "grad_norm": 0.9201697111129761, "learning_rate": 4.4454308875364486e-06, "loss": 0.1484, "step": 4438 }, { "epoch": 0.7192158133506157, "grad_norm": 1.0293805599212646, "learning_rate": 4.445156208592814e-06, "loss": 0.1722, "step": 4439 }, { "epoch": 0.7193778353856124, "grad_norm": 0.9077914357185364, "learning_rate": 4.444881470132108e-06, "loss": 0.1476, "step": 4440 }, { "epoch": 0.7195398574206092, "grad_norm": 0.9666748046875, "learning_rate": 4.444606672162735e-06, "loss": 0.1537, "step": 4441 }, { "epoch": 0.7197018794556059, "grad_norm": 0.942487895488739, "learning_rate": 4.444331814693103e-06, "loss": 0.1257, "step": 4442 }, { "epoch": 0.7198639014906028, "grad_norm": 0.9764305353164673, "learning_rate": 4.444056897731622e-06, "loss": 0.1494, "step": 4443 }, { "epoch": 0.7200259235255995, "grad_norm": 0.8661373257637024, "learning_rate": 4.443781921286706e-06, "loss": 0.1331, "step": 4444 }, { "epoch": 0.7201879455605963, "grad_norm": 0.8567464351654053, "learning_rate": 4.443506885366767e-06, "loss": 0.143, "step": 4445 }, { "epoch": 0.720349967595593, "grad_norm": 0.8924309015274048, "learning_rate": 4.4432317899802205e-06, "loss": 0.1297, "step": 4446 }, { "epoch": 0.7205119896305897, "grad_norm": 0.874181866645813, "learning_rate": 4.442956635135482e-06, "loss": 0.1448, "step": 4447 }, { "epoch": 0.7206740116655865, "grad_norm": 0.9796168804168701, "learning_rate": 4.442681420840974e-06, "loss": 0.1361, "step": 4448 }, { "epoch": 0.7208360337005832, "grad_norm": 0.9041408896446228, "learning_rate": 4.442406147105116e-06, "loss": 0.1405, "step": 4449 }, { "epoch": 0.7209980557355801, "grad_norm": 0.9344844222068787, "learning_rate": 4.44213081393633e-06, "loss": 0.1571, "step": 4450 }, { "epoch": 0.7211600777705768, "grad_norm": 0.927335798740387, "learning_rate": 4.4418554213430405e-06, "loss": 0.1417, "step": 4451 }, { "epoch": 0.7213220998055736, "grad_norm": 0.7550799250602722, "learning_rate": 4.441579969333675e-06, "loss": 0.1111, "step": 4452 }, { "epoch": 0.7214841218405703, "grad_norm": 0.8267915844917297, "learning_rate": 4.44130445791666e-06, "loss": 0.1308, "step": 4453 }, { "epoch": 0.721646143875567, "grad_norm": 1.0004557371139526, "learning_rate": 4.441028887100427e-06, "loss": 0.1523, "step": 4454 }, { "epoch": 0.7218081659105638, "grad_norm": 0.9250260591506958, "learning_rate": 4.440753256893408e-06, "loss": 0.1553, "step": 4455 }, { "epoch": 0.7219701879455606, "grad_norm": 0.8540483713150024, "learning_rate": 4.4404775673040346e-06, "loss": 0.156, "step": 4456 }, { "epoch": 0.7221322099805574, "grad_norm": 0.8714447617530823, "learning_rate": 4.4402018183407435e-06, "loss": 0.1415, "step": 4457 }, { "epoch": 0.7222942320155541, "grad_norm": 0.9081652164459229, "learning_rate": 4.4399260100119726e-06, "loss": 0.148, "step": 4458 }, { "epoch": 0.7224562540505509, "grad_norm": 0.9180371165275574, "learning_rate": 4.439650142326161e-06, "loss": 0.1537, "step": 4459 }, { "epoch": 0.7226182760855476, "grad_norm": 0.9707509279251099, "learning_rate": 4.439374215291748e-06, "loss": 0.1333, "step": 4460 }, { "epoch": 0.7227802981205443, "grad_norm": 0.9013661742210388, "learning_rate": 4.439098228917177e-06, "loss": 0.1352, "step": 4461 }, { "epoch": 0.7229423201555412, "grad_norm": 0.8960440158843994, "learning_rate": 4.438822183210894e-06, "loss": 0.1514, "step": 4462 }, { "epoch": 0.7231043421905379, "grad_norm": 0.8287968635559082, "learning_rate": 4.4385460781813426e-06, "loss": 0.1281, "step": 4463 }, { "epoch": 0.7232663642255347, "grad_norm": 0.8026749491691589, "learning_rate": 4.438269913836972e-06, "loss": 0.1293, "step": 4464 }, { "epoch": 0.7234283862605314, "grad_norm": 0.9157952070236206, "learning_rate": 4.437993690186234e-06, "loss": 0.1577, "step": 4465 }, { "epoch": 0.7235904082955282, "grad_norm": 0.8798143863677979, "learning_rate": 4.437717407237578e-06, "loss": 0.1296, "step": 4466 }, { "epoch": 0.7237524303305249, "grad_norm": 0.8337207436561584, "learning_rate": 4.437441064999459e-06, "loss": 0.1351, "step": 4467 }, { "epoch": 0.7239144523655218, "grad_norm": 0.9726783037185669, "learning_rate": 4.437164663480332e-06, "loss": 0.1518, "step": 4468 }, { "epoch": 0.7240764744005185, "grad_norm": 0.9185786843299866, "learning_rate": 4.436888202688654e-06, "loss": 0.1466, "step": 4469 }, { "epoch": 0.7242384964355152, "grad_norm": 1.0107709169387817, "learning_rate": 4.436611682632884e-06, "loss": 0.1403, "step": 4470 }, { "epoch": 0.724400518470512, "grad_norm": 0.886813759803772, "learning_rate": 4.436335103321484e-06, "loss": 0.1544, "step": 4471 }, { "epoch": 0.7245625405055087, "grad_norm": 0.8717353343963623, "learning_rate": 4.436058464762915e-06, "loss": 0.1332, "step": 4472 }, { "epoch": 0.7247245625405055, "grad_norm": 0.9517882466316223, "learning_rate": 4.435781766965641e-06, "loss": 0.1582, "step": 4473 }, { "epoch": 0.7248865845755023, "grad_norm": 0.946277379989624, "learning_rate": 4.435505009938131e-06, "loss": 0.1437, "step": 4474 }, { "epoch": 0.7250486066104991, "grad_norm": 0.970646858215332, "learning_rate": 4.435228193688851e-06, "loss": 0.1502, "step": 4475 }, { "epoch": 0.7252106286454958, "grad_norm": 0.8949056267738342, "learning_rate": 4.434951318226272e-06, "loss": 0.1389, "step": 4476 }, { "epoch": 0.7253726506804925, "grad_norm": 0.8123460412025452, "learning_rate": 4.434674383558865e-06, "loss": 0.1244, "step": 4477 }, { "epoch": 0.7255346727154893, "grad_norm": 0.8786303400993347, "learning_rate": 4.434397389695102e-06, "loss": 0.1267, "step": 4478 }, { "epoch": 0.725696694750486, "grad_norm": 0.9474332332611084, "learning_rate": 4.434120336643462e-06, "loss": 0.1479, "step": 4479 }, { "epoch": 0.7258587167854829, "grad_norm": 0.917175829410553, "learning_rate": 4.433843224412419e-06, "loss": 0.1538, "step": 4480 }, { "epoch": 0.7260207388204796, "grad_norm": 0.8738764524459839, "learning_rate": 4.433566053010454e-06, "loss": 0.1279, "step": 4481 }, { "epoch": 0.7261827608554764, "grad_norm": 0.9041066765785217, "learning_rate": 4.4332888224460466e-06, "loss": 0.1384, "step": 4482 }, { "epoch": 0.7263447828904731, "grad_norm": 0.8361867070198059, "learning_rate": 4.433011532727679e-06, "loss": 0.1305, "step": 4483 }, { "epoch": 0.7265068049254698, "grad_norm": 0.9306924343109131, "learning_rate": 4.432734183863837e-06, "loss": 0.1426, "step": 4484 }, { "epoch": 0.7266688269604666, "grad_norm": 0.9259305596351624, "learning_rate": 4.432456775863006e-06, "loss": 0.1332, "step": 4485 }, { "epoch": 0.7268308489954634, "grad_norm": 0.8822481036186218, "learning_rate": 4.432179308733674e-06, "loss": 0.1458, "step": 4486 }, { "epoch": 0.7269928710304602, "grad_norm": 0.922392725944519, "learning_rate": 4.4319017824843315e-06, "loss": 0.1534, "step": 4487 }, { "epoch": 0.7271548930654569, "grad_norm": 0.8622284531593323, "learning_rate": 4.43162419712347e-06, "loss": 0.1243, "step": 4488 }, { "epoch": 0.7273169151004537, "grad_norm": 0.9705631136894226, "learning_rate": 4.431346552659581e-06, "loss": 0.1426, "step": 4489 }, { "epoch": 0.7274789371354504, "grad_norm": 0.9161760807037354, "learning_rate": 4.431068849101162e-06, "loss": 0.1513, "step": 4490 }, { "epoch": 0.7276409591704471, "grad_norm": 0.9033561944961548, "learning_rate": 4.430791086456709e-06, "loss": 0.133, "step": 4491 }, { "epoch": 0.727802981205444, "grad_norm": 0.9011300206184387, "learning_rate": 4.4305132647347215e-06, "loss": 0.1466, "step": 4492 }, { "epoch": 0.7279650032404407, "grad_norm": 1.0322880744934082, "learning_rate": 4.4302353839437e-06, "loss": 0.1472, "step": 4493 }, { "epoch": 0.7281270252754375, "grad_norm": 0.9922388195991516, "learning_rate": 4.429957444092146e-06, "loss": 0.1596, "step": 4494 }, { "epoch": 0.7282890473104342, "grad_norm": 1.0610861778259277, "learning_rate": 4.4296794451885665e-06, "loss": 0.178, "step": 4495 }, { "epoch": 0.728451069345431, "grad_norm": 0.9445720314979553, "learning_rate": 4.429401387241464e-06, "loss": 0.1644, "step": 4496 }, { "epoch": 0.7286130913804277, "grad_norm": 1.009913444519043, "learning_rate": 4.429123270259348e-06, "loss": 0.1726, "step": 4497 }, { "epoch": 0.7287751134154244, "grad_norm": 0.9682244658470154, "learning_rate": 4.428845094250729e-06, "loss": 0.1474, "step": 4498 }, { "epoch": 0.7289371354504213, "grad_norm": 0.9171390533447266, "learning_rate": 4.4285668592241186e-06, "loss": 0.1586, "step": 4499 }, { "epoch": 0.729099157485418, "grad_norm": 0.9223143458366394, "learning_rate": 4.428288565188028e-06, "loss": 0.1357, "step": 4500 }, { "epoch": 0.7292611795204148, "grad_norm": 0.9437743425369263, "learning_rate": 4.4280102121509734e-06, "loss": 0.1514, "step": 4501 }, { "epoch": 0.7294232015554115, "grad_norm": 0.8216745853424072, "learning_rate": 4.427731800121473e-06, "loss": 0.1336, "step": 4502 }, { "epoch": 0.7295852235904083, "grad_norm": 0.8366663455963135, "learning_rate": 4.427453329108045e-06, "loss": 0.1307, "step": 4503 }, { "epoch": 0.729747245625405, "grad_norm": 0.8991605043411255, "learning_rate": 4.427174799119208e-06, "loss": 0.151, "step": 4504 }, { "epoch": 0.7299092676604018, "grad_norm": 0.7747397422790527, "learning_rate": 4.426896210163487e-06, "loss": 0.1273, "step": 4505 }, { "epoch": 0.7300712896953986, "grad_norm": 1.0024263858795166, "learning_rate": 4.426617562249405e-06, "loss": 0.1545, "step": 4506 }, { "epoch": 0.7302333117303953, "grad_norm": 0.8834472298622131, "learning_rate": 4.426338855385487e-06, "loss": 0.1525, "step": 4507 }, { "epoch": 0.7303953337653921, "grad_norm": 0.8129113912582397, "learning_rate": 4.426060089580262e-06, "loss": 0.1393, "step": 4508 }, { "epoch": 0.7305573558003888, "grad_norm": 1.0072026252746582, "learning_rate": 4.42578126484226e-06, "loss": 0.1591, "step": 4509 }, { "epoch": 0.7307193778353857, "grad_norm": 0.9830167889595032, "learning_rate": 4.42550238118001e-06, "loss": 0.1498, "step": 4510 }, { "epoch": 0.7308813998703824, "grad_norm": 0.8229345083236694, "learning_rate": 4.425223438602047e-06, "loss": 0.133, "step": 4511 }, { "epoch": 0.7310434219053791, "grad_norm": 0.8896669745445251, "learning_rate": 4.424944437116907e-06, "loss": 0.1397, "step": 4512 }, { "epoch": 0.7312054439403759, "grad_norm": 0.9277743101119995, "learning_rate": 4.424665376733125e-06, "loss": 0.1421, "step": 4513 }, { "epoch": 0.7313674659753726, "grad_norm": 0.9299547076225281, "learning_rate": 4.424386257459241e-06, "loss": 0.1507, "step": 4514 }, { "epoch": 0.7315294880103694, "grad_norm": 0.9168359637260437, "learning_rate": 4.424107079303793e-06, "loss": 0.149, "step": 4515 }, { "epoch": 0.7316915100453661, "grad_norm": 0.994986891746521, "learning_rate": 4.423827842275325e-06, "loss": 0.1692, "step": 4516 }, { "epoch": 0.731853532080363, "grad_norm": 1.0216580629348755, "learning_rate": 4.42354854638238e-06, "loss": 0.1601, "step": 4517 }, { "epoch": 0.7320155541153597, "grad_norm": 0.8891658782958984, "learning_rate": 4.4232691916335055e-06, "loss": 0.1446, "step": 4518 }, { "epoch": 0.7321775761503565, "grad_norm": 0.8627836108207703, "learning_rate": 4.422989778037248e-06, "loss": 0.1371, "step": 4519 }, { "epoch": 0.7323395981853532, "grad_norm": 0.8696764707565308, "learning_rate": 4.422710305602156e-06, "loss": 0.1213, "step": 4520 }, { "epoch": 0.7325016202203499, "grad_norm": 0.8861494064331055, "learning_rate": 4.422430774336782e-06, "loss": 0.1454, "step": 4521 }, { "epoch": 0.7326636422553467, "grad_norm": 0.9271621108055115, "learning_rate": 4.422151184249679e-06, "loss": 0.1513, "step": 4522 }, { "epoch": 0.7328256642903435, "grad_norm": 0.8663860559463501, "learning_rate": 4.4218715353494e-06, "loss": 0.1484, "step": 4523 }, { "epoch": 0.7329876863253403, "grad_norm": 0.8715428113937378, "learning_rate": 4.421591827644503e-06, "loss": 0.1486, "step": 4524 }, { "epoch": 0.733149708360337, "grad_norm": 0.8773646950721741, "learning_rate": 4.4213120611435475e-06, "loss": 0.1462, "step": 4525 }, { "epoch": 0.7333117303953338, "grad_norm": 0.775646984577179, "learning_rate": 4.4210322358550915e-06, "loss": 0.1371, "step": 4526 }, { "epoch": 0.7334737524303305, "grad_norm": 0.9878357648849487, "learning_rate": 4.420752351787698e-06, "loss": 0.1479, "step": 4527 }, { "epoch": 0.7336357744653272, "grad_norm": 1.0782580375671387, "learning_rate": 4.420472408949931e-06, "loss": 0.1573, "step": 4528 }, { "epoch": 0.7337977965003241, "grad_norm": 0.8356068134307861, "learning_rate": 4.420192407350355e-06, "loss": 0.1349, "step": 4529 }, { "epoch": 0.7339598185353208, "grad_norm": 0.8317657113075256, "learning_rate": 4.419912346997539e-06, "loss": 0.122, "step": 4530 }, { "epoch": 0.7341218405703176, "grad_norm": 0.8698350787162781, "learning_rate": 4.4196322279000506e-06, "loss": 0.1496, "step": 4531 }, { "epoch": 0.7342838626053143, "grad_norm": 0.7942752242088318, "learning_rate": 4.419352050066462e-06, "loss": 0.1128, "step": 4532 }, { "epoch": 0.7344458846403111, "grad_norm": 1.0515409708023071, "learning_rate": 4.419071813505345e-06, "loss": 0.165, "step": 4533 }, { "epoch": 0.7346079066753078, "grad_norm": 0.9110780358314514, "learning_rate": 4.418791518225275e-06, "loss": 0.1305, "step": 4534 }, { "epoch": 0.7347699287103046, "grad_norm": 0.7942949533462524, "learning_rate": 4.4185111642348276e-06, "loss": 0.1108, "step": 4535 }, { "epoch": 0.7349319507453014, "grad_norm": 0.920792818069458, "learning_rate": 4.418230751542581e-06, "loss": 0.1577, "step": 4536 }, { "epoch": 0.7350939727802981, "grad_norm": 1.1174674034118652, "learning_rate": 4.417950280157115e-06, "loss": 0.1558, "step": 4537 }, { "epoch": 0.7352559948152949, "grad_norm": 0.9814813137054443, "learning_rate": 4.417669750087014e-06, "loss": 0.1552, "step": 4538 }, { "epoch": 0.7354180168502916, "grad_norm": 0.7974295020103455, "learning_rate": 4.417389161340857e-06, "loss": 0.1274, "step": 4539 }, { "epoch": 0.7355800388852884, "grad_norm": 0.9223327040672302, "learning_rate": 4.417108513927233e-06, "loss": 0.1425, "step": 4540 }, { "epoch": 0.7357420609202852, "grad_norm": 0.8545518517494202, "learning_rate": 4.416827807854727e-06, "loss": 0.1264, "step": 4541 }, { "epoch": 0.7359040829552819, "grad_norm": 0.8425375819206238, "learning_rate": 4.416547043131929e-06, "loss": 0.1276, "step": 4542 }, { "epoch": 0.7360661049902787, "grad_norm": 0.8444299697875977, "learning_rate": 4.416266219767429e-06, "loss": 0.1357, "step": 4543 }, { "epoch": 0.7362281270252754, "grad_norm": 0.8412787914276123, "learning_rate": 4.41598533776982e-06, "loss": 0.135, "step": 4544 }, { "epoch": 0.7363901490602722, "grad_norm": 0.9908415079116821, "learning_rate": 4.415704397147698e-06, "loss": 0.1655, "step": 4545 }, { "epoch": 0.7365521710952689, "grad_norm": 0.8918123245239258, "learning_rate": 4.415423397909655e-06, "loss": 0.1396, "step": 4546 }, { "epoch": 0.7367141931302658, "grad_norm": 0.8907253742218018, "learning_rate": 4.4151423400642925e-06, "loss": 0.1421, "step": 4547 }, { "epoch": 0.7368762151652625, "grad_norm": 0.9191277027130127, "learning_rate": 4.414861223620209e-06, "loss": 0.1483, "step": 4548 }, { "epoch": 0.7370382372002592, "grad_norm": 0.7574411034584045, "learning_rate": 4.414580048586005e-06, "loss": 0.1211, "step": 4549 }, { "epoch": 0.737200259235256, "grad_norm": 0.7837011218070984, "learning_rate": 4.414298814970286e-06, "loss": 0.1293, "step": 4550 }, { "epoch": 0.7373622812702527, "grad_norm": 0.8395361304283142, "learning_rate": 4.414017522781655e-06, "loss": 0.1331, "step": 4551 }, { "epoch": 0.7375243033052495, "grad_norm": 0.9138143658638, "learning_rate": 4.41373617202872e-06, "loss": 0.1533, "step": 4552 }, { "epoch": 0.7376863253402463, "grad_norm": 0.8615904450416565, "learning_rate": 4.413454762720088e-06, "loss": 0.1347, "step": 4553 }, { "epoch": 0.7378483473752431, "grad_norm": 0.8940994739532471, "learning_rate": 4.413173294864373e-06, "loss": 0.134, "step": 4554 }, { "epoch": 0.7380103694102398, "grad_norm": 0.9082682132720947, "learning_rate": 4.412891768470183e-06, "loss": 0.1427, "step": 4555 }, { "epoch": 0.7381723914452365, "grad_norm": 0.978569507598877, "learning_rate": 4.412610183546135e-06, "loss": 0.149, "step": 4556 }, { "epoch": 0.7383344134802333, "grad_norm": 0.9564186930656433, "learning_rate": 4.412328540100843e-06, "loss": 0.1526, "step": 4557 }, { "epoch": 0.73849643551523, "grad_norm": 0.756712794303894, "learning_rate": 4.412046838142927e-06, "loss": 0.1162, "step": 4558 }, { "epoch": 0.7386584575502269, "grad_norm": 0.9216713309288025, "learning_rate": 4.411765077681003e-06, "loss": 0.1544, "step": 4559 }, { "epoch": 0.7388204795852236, "grad_norm": 0.7456929087638855, "learning_rate": 4.411483258723695e-06, "loss": 0.1176, "step": 4560 }, { "epoch": 0.7389825016202204, "grad_norm": 0.8160392045974731, "learning_rate": 4.411201381279625e-06, "loss": 0.1369, "step": 4561 }, { "epoch": 0.7391445236552171, "grad_norm": 0.8832926750183105, "learning_rate": 4.410919445357418e-06, "loss": 0.1453, "step": 4562 }, { "epoch": 0.7393065456902139, "grad_norm": 0.9234940409660339, "learning_rate": 4.410637450965699e-06, "loss": 0.1416, "step": 4563 }, { "epoch": 0.7394685677252106, "grad_norm": 0.9423860311508179, "learning_rate": 4.410355398113099e-06, "loss": 0.1502, "step": 4564 }, { "epoch": 0.7396305897602073, "grad_norm": 0.9716334939002991, "learning_rate": 4.410073286808247e-06, "loss": 0.1462, "step": 4565 }, { "epoch": 0.7397926117952042, "grad_norm": 0.8593721985816956, "learning_rate": 4.409791117059773e-06, "loss": 0.1287, "step": 4566 }, { "epoch": 0.7399546338302009, "grad_norm": 0.9580432176589966, "learning_rate": 4.409508888876313e-06, "loss": 0.1527, "step": 4567 }, { "epoch": 0.7401166558651977, "grad_norm": 0.8143693804740906, "learning_rate": 4.409226602266503e-06, "loss": 0.1272, "step": 4568 }, { "epoch": 0.7402786779001944, "grad_norm": 0.8784132599830627, "learning_rate": 4.408944257238979e-06, "loss": 0.1321, "step": 4569 }, { "epoch": 0.7404406999351912, "grad_norm": 0.9013129472732544, "learning_rate": 4.408661853802379e-06, "loss": 0.1524, "step": 4570 }, { "epoch": 0.740602721970188, "grad_norm": 0.8545945286750793, "learning_rate": 4.408379391965346e-06, "loss": 0.1461, "step": 4571 }, { "epoch": 0.7407647440051847, "grad_norm": 0.9265003800392151, "learning_rate": 4.408096871736522e-06, "loss": 0.1592, "step": 4572 }, { "epoch": 0.7409267660401815, "grad_norm": 0.901270866394043, "learning_rate": 4.407814293124551e-06, "loss": 0.1615, "step": 4573 }, { "epoch": 0.7410887880751782, "grad_norm": 0.9868318438529968, "learning_rate": 4.407531656138079e-06, "loss": 0.1498, "step": 4574 }, { "epoch": 0.741250810110175, "grad_norm": 0.847172737121582, "learning_rate": 4.407248960785756e-06, "loss": 0.1408, "step": 4575 }, { "epoch": 0.7414128321451717, "grad_norm": 0.941987156867981, "learning_rate": 4.406966207076229e-06, "loss": 0.1502, "step": 4576 }, { "epoch": 0.7415748541801686, "grad_norm": 0.8444817662239075, "learning_rate": 4.406683395018151e-06, "loss": 0.1493, "step": 4577 }, { "epoch": 0.7417368762151653, "grad_norm": 0.7697441577911377, "learning_rate": 4.406400524620174e-06, "loss": 0.1207, "step": 4578 }, { "epoch": 0.741898898250162, "grad_norm": 0.8291786909103394, "learning_rate": 4.406117595890956e-06, "loss": 0.1369, "step": 4579 }, { "epoch": 0.7420609202851588, "grad_norm": 0.8335320353507996, "learning_rate": 4.405834608839152e-06, "loss": 0.1313, "step": 4580 }, { "epoch": 0.7422229423201555, "grad_norm": 0.8066971302032471, "learning_rate": 4.405551563473421e-06, "loss": 0.1419, "step": 4581 }, { "epoch": 0.7423849643551523, "grad_norm": 0.9154806137084961, "learning_rate": 4.405268459802423e-06, "loss": 0.1421, "step": 4582 }, { "epoch": 0.742546986390149, "grad_norm": 0.8322879672050476, "learning_rate": 4.404985297834821e-06, "loss": 0.1335, "step": 4583 }, { "epoch": 0.7427090084251459, "grad_norm": 0.9182136058807373, "learning_rate": 4.404702077579279e-06, "loss": 0.1531, "step": 4584 }, { "epoch": 0.7428710304601426, "grad_norm": 0.9670420289039612, "learning_rate": 4.404418799044463e-06, "loss": 0.1728, "step": 4585 }, { "epoch": 0.7430330524951393, "grad_norm": 1.0733063220977783, "learning_rate": 4.4041354622390395e-06, "loss": 0.1469, "step": 4586 }, { "epoch": 0.7431950745301361, "grad_norm": 0.7664371132850647, "learning_rate": 4.40385206717168e-06, "loss": 0.1228, "step": 4587 }, { "epoch": 0.7433570965651328, "grad_norm": 0.8765845894813538, "learning_rate": 4.403568613851054e-06, "loss": 0.1436, "step": 4588 }, { "epoch": 0.7435191186001296, "grad_norm": 0.9102659225463867, "learning_rate": 4.403285102285835e-06, "loss": 0.148, "step": 4589 }, { "epoch": 0.7436811406351264, "grad_norm": 1.0098685026168823, "learning_rate": 4.403001532484697e-06, "loss": 0.1589, "step": 4590 }, { "epoch": 0.7438431626701232, "grad_norm": 0.912421703338623, "learning_rate": 4.402717904456318e-06, "loss": 0.1474, "step": 4591 }, { "epoch": 0.7440051847051199, "grad_norm": 0.960193932056427, "learning_rate": 4.4024342182093745e-06, "loss": 0.1535, "step": 4592 }, { "epoch": 0.7441672067401166, "grad_norm": 0.8692451119422913, "learning_rate": 4.402150473752549e-06, "loss": 0.1506, "step": 4593 }, { "epoch": 0.7443292287751134, "grad_norm": 0.9402160048484802, "learning_rate": 4.401866671094522e-06, "loss": 0.156, "step": 4594 }, { "epoch": 0.7444912508101101, "grad_norm": 0.8891216516494751, "learning_rate": 4.401582810243977e-06, "loss": 0.1464, "step": 4595 }, { "epoch": 0.744653272845107, "grad_norm": 0.9088370203971863, "learning_rate": 4.4012988912096e-06, "loss": 0.137, "step": 4596 }, { "epoch": 0.7448152948801037, "grad_norm": 0.8495668172836304, "learning_rate": 4.401014914000078e-06, "loss": 0.1337, "step": 4597 }, { "epoch": 0.7449773169151005, "grad_norm": 0.8261443376541138, "learning_rate": 4.4007308786241e-06, "loss": 0.1217, "step": 4598 }, { "epoch": 0.7451393389500972, "grad_norm": 0.8053951263427734, "learning_rate": 4.400446785090356e-06, "loss": 0.1249, "step": 4599 }, { "epoch": 0.7453013609850939, "grad_norm": 0.8850007653236389, "learning_rate": 4.40016263340754e-06, "loss": 0.1391, "step": 4600 }, { "epoch": 0.7454633830200907, "grad_norm": 0.8763007521629333, "learning_rate": 4.399878423584345e-06, "loss": 0.1301, "step": 4601 }, { "epoch": 0.7456254050550875, "grad_norm": 0.9268038272857666, "learning_rate": 4.399594155629469e-06, "loss": 0.135, "step": 4602 }, { "epoch": 0.7457874270900843, "grad_norm": 0.8517520427703857, "learning_rate": 4.3993098295516085e-06, "loss": 0.1419, "step": 4603 }, { "epoch": 0.745949449125081, "grad_norm": 0.8787991404533386, "learning_rate": 4.3990254453594634e-06, "loss": 0.1233, "step": 4604 }, { "epoch": 0.7461114711600778, "grad_norm": 0.9639225602149963, "learning_rate": 4.398741003061735e-06, "loss": 0.1407, "step": 4605 }, { "epoch": 0.7462734931950745, "grad_norm": 0.8817579746246338, "learning_rate": 4.398456502667127e-06, "loss": 0.134, "step": 4606 }, { "epoch": 0.7464355152300713, "grad_norm": 0.9382551908493042, "learning_rate": 4.398171944184344e-06, "loss": 0.1614, "step": 4607 }, { "epoch": 0.7465975372650681, "grad_norm": 0.917212963104248, "learning_rate": 4.397887327622093e-06, "loss": 0.1304, "step": 4608 }, { "epoch": 0.7467595593000648, "grad_norm": 1.028524398803711, "learning_rate": 4.397602652989083e-06, "loss": 0.1568, "step": 4609 }, { "epoch": 0.7469215813350616, "grad_norm": 0.9571022391319275, "learning_rate": 4.397317920294023e-06, "loss": 0.1503, "step": 4610 }, { "epoch": 0.7470836033700583, "grad_norm": 0.9256038069725037, "learning_rate": 4.397033129545627e-06, "loss": 0.1341, "step": 4611 }, { "epoch": 0.7472456254050551, "grad_norm": 0.8719132542610168, "learning_rate": 4.396748280752608e-06, "loss": 0.1495, "step": 4612 }, { "epoch": 0.7474076474400518, "grad_norm": 0.8998635411262512, "learning_rate": 4.39646337392368e-06, "loss": 0.1421, "step": 4613 }, { "epoch": 0.7475696694750487, "grad_norm": 0.9811137914657593, "learning_rate": 4.396178409067564e-06, "loss": 0.1531, "step": 4614 }, { "epoch": 0.7477316915100454, "grad_norm": 0.962475061416626, "learning_rate": 4.395893386192976e-06, "loss": 0.1606, "step": 4615 }, { "epoch": 0.7478937135450421, "grad_norm": 0.8668140172958374, "learning_rate": 4.395608305308639e-06, "loss": 0.1432, "step": 4616 }, { "epoch": 0.7480557355800389, "grad_norm": 1.009864330291748, "learning_rate": 4.3953231664232755e-06, "loss": 0.1518, "step": 4617 }, { "epoch": 0.7482177576150356, "grad_norm": 1.0338926315307617, "learning_rate": 4.395037969545609e-06, "loss": 0.1602, "step": 4618 }, { "epoch": 0.7483797796500324, "grad_norm": 0.937615692615509, "learning_rate": 4.394752714684367e-06, "loss": 0.1508, "step": 4619 }, { "epoch": 0.7485418016850292, "grad_norm": 1.010367512702942, "learning_rate": 4.394467401848277e-06, "loss": 0.1512, "step": 4620 }, { "epoch": 0.748703823720026, "grad_norm": 0.8209222555160522, "learning_rate": 4.394182031046069e-06, "loss": 0.1412, "step": 4621 }, { "epoch": 0.7488658457550227, "grad_norm": 0.8631600141525269, "learning_rate": 4.393896602286475e-06, "loss": 0.1371, "step": 4622 }, { "epoch": 0.7490278677900194, "grad_norm": 0.7968021631240845, "learning_rate": 4.393611115578228e-06, "loss": 0.1263, "step": 4623 }, { "epoch": 0.7491898898250162, "grad_norm": 0.9319000244140625, "learning_rate": 4.3933255709300635e-06, "loss": 0.158, "step": 4624 }, { "epoch": 0.7493519118600129, "grad_norm": 0.8134043216705322, "learning_rate": 4.393039968350718e-06, "loss": 0.1389, "step": 4625 }, { "epoch": 0.7495139338950098, "grad_norm": 0.968559205532074, "learning_rate": 4.3927543078489295e-06, "loss": 0.1761, "step": 4626 }, { "epoch": 0.7496759559300065, "grad_norm": 0.8765543103218079, "learning_rate": 4.392468589433441e-06, "loss": 0.1496, "step": 4627 }, { "epoch": 0.7498379779650033, "grad_norm": 0.9812090992927551, "learning_rate": 4.392182813112993e-06, "loss": 0.1566, "step": 4628 }, { "epoch": 0.75, "grad_norm": 0.9258951544761658, "learning_rate": 4.3918969788963295e-06, "loss": 0.1479, "step": 4629 }, { "epoch": 0.7501620220349967, "grad_norm": 0.8851484656333923, "learning_rate": 4.391611086792198e-06, "loss": 0.1463, "step": 4630 }, { "epoch": 0.7503240440699935, "grad_norm": 1.0019028186798096, "learning_rate": 4.391325136809344e-06, "loss": 0.1457, "step": 4631 }, { "epoch": 0.7504860661049902, "grad_norm": 0.8665192723274231, "learning_rate": 4.391039128956517e-06, "loss": 0.1279, "step": 4632 }, { "epoch": 0.7506480881399871, "grad_norm": 0.8466840982437134, "learning_rate": 4.39075306324247e-06, "loss": 0.1273, "step": 4633 }, { "epoch": 0.7508101101749838, "grad_norm": 0.9541529417037964, "learning_rate": 4.390466939675954e-06, "loss": 0.1667, "step": 4634 }, { "epoch": 0.7509721322099806, "grad_norm": 1.0194523334503174, "learning_rate": 4.390180758265725e-06, "loss": 0.1509, "step": 4635 }, { "epoch": 0.7511341542449773, "grad_norm": 0.9222609996795654, "learning_rate": 4.389894519020539e-06, "loss": 0.1455, "step": 4636 }, { "epoch": 0.751296176279974, "grad_norm": 0.9126465320587158, "learning_rate": 4.389608221949153e-06, "loss": 0.1303, "step": 4637 }, { "epoch": 0.7514581983149708, "grad_norm": 0.8073568344116211, "learning_rate": 4.38932186706033e-06, "loss": 0.1254, "step": 4638 }, { "epoch": 0.7516202203499676, "grad_norm": 0.9920159578323364, "learning_rate": 4.389035454362829e-06, "loss": 0.1476, "step": 4639 }, { "epoch": 0.7517822423849644, "grad_norm": 0.8352819085121155, "learning_rate": 4.388748983865414e-06, "loss": 0.1289, "step": 4640 }, { "epoch": 0.7519442644199611, "grad_norm": 0.9489824175834656, "learning_rate": 4.388462455576852e-06, "loss": 0.1533, "step": 4641 }, { "epoch": 0.7521062864549579, "grad_norm": 0.9182109832763672, "learning_rate": 4.388175869505908e-06, "loss": 0.1494, "step": 4642 }, { "epoch": 0.7522683084899546, "grad_norm": 0.9489102959632874, "learning_rate": 4.387889225661352e-06, "loss": 0.1317, "step": 4643 }, { "epoch": 0.7524303305249513, "grad_norm": 0.887995183467865, "learning_rate": 4.387602524051954e-06, "loss": 0.1433, "step": 4644 }, { "epoch": 0.7525923525599482, "grad_norm": 0.934482753276825, "learning_rate": 4.387315764686487e-06, "loss": 0.1458, "step": 4645 }, { "epoch": 0.7527543745949449, "grad_norm": 0.757559061050415, "learning_rate": 4.387028947573724e-06, "loss": 0.1199, "step": 4646 }, { "epoch": 0.7529163966299417, "grad_norm": 0.8067179322242737, "learning_rate": 4.386742072722443e-06, "loss": 0.1321, "step": 4647 }, { "epoch": 0.7530784186649384, "grad_norm": 0.9727403521537781, "learning_rate": 4.3864551401414195e-06, "loss": 0.1172, "step": 4648 }, { "epoch": 0.7532404406999352, "grad_norm": 0.9205012917518616, "learning_rate": 4.386168149839434e-06, "loss": 0.1555, "step": 4649 }, { "epoch": 0.7534024627349319, "grad_norm": 0.8873549103736877, "learning_rate": 4.385881101825268e-06, "loss": 0.1476, "step": 4650 }, { "epoch": 0.7535644847699287, "grad_norm": 0.9281837940216064, "learning_rate": 4.3855939961077034e-06, "loss": 0.1385, "step": 4651 }, { "epoch": 0.7537265068049255, "grad_norm": 1.0171685218811035, "learning_rate": 4.385306832695526e-06, "loss": 0.1635, "step": 4652 }, { "epoch": 0.7538885288399222, "grad_norm": 0.8333146572113037, "learning_rate": 4.385019611597522e-06, "loss": 0.1398, "step": 4653 }, { "epoch": 0.754050550874919, "grad_norm": 0.9079596400260925, "learning_rate": 4.384732332822479e-06, "loss": 0.1396, "step": 4654 }, { "epoch": 0.7542125729099157, "grad_norm": 0.8788375854492188, "learning_rate": 4.384444996379188e-06, "loss": 0.1384, "step": 4655 }, { "epoch": 0.7543745949449125, "grad_norm": 0.8239230513572693, "learning_rate": 4.38415760227644e-06, "loss": 0.1278, "step": 4656 }, { "epoch": 0.7545366169799093, "grad_norm": 0.8959946036338806, "learning_rate": 4.383870150523029e-06, "loss": 0.1499, "step": 4657 }, { "epoch": 0.7546986390149061, "grad_norm": 0.8564679026603699, "learning_rate": 4.38358264112775e-06, "loss": 0.1381, "step": 4658 }, { "epoch": 0.7548606610499028, "grad_norm": 0.8459985256195068, "learning_rate": 4.383295074099402e-06, "loss": 0.1358, "step": 4659 }, { "epoch": 0.7550226830848995, "grad_norm": 0.9368695020675659, "learning_rate": 4.3830074494467815e-06, "loss": 0.1419, "step": 4660 }, { "epoch": 0.7551847051198963, "grad_norm": 0.8994658589363098, "learning_rate": 4.382719767178689e-06, "loss": 0.1558, "step": 4661 }, { "epoch": 0.755346727154893, "grad_norm": 0.9667396545410156, "learning_rate": 4.382432027303928e-06, "loss": 0.1627, "step": 4662 }, { "epoch": 0.7555087491898899, "grad_norm": 0.8609565496444702, "learning_rate": 4.382144229831302e-06, "loss": 0.1351, "step": 4663 }, { "epoch": 0.7556707712248866, "grad_norm": 0.845554769039154, "learning_rate": 4.381856374769617e-06, "loss": 0.1273, "step": 4664 }, { "epoch": 0.7558327932598834, "grad_norm": 0.9373748302459717, "learning_rate": 4.3815684621276824e-06, "loss": 0.1663, "step": 4665 }, { "epoch": 0.7559948152948801, "grad_norm": 0.8447197079658508, "learning_rate": 4.3812804919143055e-06, "loss": 0.1337, "step": 4666 }, { "epoch": 0.7561568373298768, "grad_norm": 0.8827223777770996, "learning_rate": 4.380992464138298e-06, "loss": 0.1419, "step": 4667 }, { "epoch": 0.7563188593648736, "grad_norm": 0.8952583074569702, "learning_rate": 4.380704378808473e-06, "loss": 0.1249, "step": 4668 }, { "epoch": 0.7564808813998704, "grad_norm": 0.8964098691940308, "learning_rate": 4.380416235933646e-06, "loss": 0.1544, "step": 4669 }, { "epoch": 0.7566429034348672, "grad_norm": 1.0078829526901245, "learning_rate": 4.380128035522632e-06, "loss": 0.1629, "step": 4670 }, { "epoch": 0.7568049254698639, "grad_norm": 0.9272462725639343, "learning_rate": 4.379839777584249e-06, "loss": 0.158, "step": 4671 }, { "epoch": 0.7569669475048607, "grad_norm": 0.9298140406608582, "learning_rate": 4.379551462127319e-06, "loss": 0.1442, "step": 4672 }, { "epoch": 0.7571289695398574, "grad_norm": 1.0363284349441528, "learning_rate": 4.3792630891606635e-06, "loss": 0.1739, "step": 4673 }, { "epoch": 0.7572909915748541, "grad_norm": 0.9505801796913147, "learning_rate": 4.3789746586931034e-06, "loss": 0.1657, "step": 4674 }, { "epoch": 0.757453013609851, "grad_norm": 1.0468703508377075, "learning_rate": 4.3786861707334676e-06, "loss": 0.1587, "step": 4675 }, { "epoch": 0.7576150356448477, "grad_norm": 0.8523300886154175, "learning_rate": 4.37839762529058e-06, "loss": 0.1465, "step": 4676 }, { "epoch": 0.7577770576798445, "grad_norm": 0.9114488363265991, "learning_rate": 4.378109022373272e-06, "loss": 0.127, "step": 4677 }, { "epoch": 0.7579390797148412, "grad_norm": 1.036751627922058, "learning_rate": 4.3778203619903716e-06, "loss": 0.1341, "step": 4678 }, { "epoch": 0.758101101749838, "grad_norm": 1.0055261850357056, "learning_rate": 4.377531644150712e-06, "loss": 0.1549, "step": 4679 }, { "epoch": 0.7582631237848347, "grad_norm": 1.0141489505767822, "learning_rate": 4.3772428688631285e-06, "loss": 0.1552, "step": 4680 }, { "epoch": 0.7584251458198314, "grad_norm": 0.9310604929924011, "learning_rate": 4.376954036136456e-06, "loss": 0.1502, "step": 4681 }, { "epoch": 0.7585871678548283, "grad_norm": 0.7873426675796509, "learning_rate": 4.376665145979532e-06, "loss": 0.1249, "step": 4682 }, { "epoch": 0.758749189889825, "grad_norm": 0.8380732536315918, "learning_rate": 4.376376198401195e-06, "loss": 0.154, "step": 4683 }, { "epoch": 0.7589112119248218, "grad_norm": 0.762469470500946, "learning_rate": 4.376087193410289e-06, "loss": 0.1214, "step": 4684 }, { "epoch": 0.7590732339598185, "grad_norm": 0.868675947189331, "learning_rate": 4.375798131015654e-06, "loss": 0.1446, "step": 4685 }, { "epoch": 0.7592352559948153, "grad_norm": 0.998455286026001, "learning_rate": 4.375509011226135e-06, "loss": 0.1611, "step": 4686 }, { "epoch": 0.759397278029812, "grad_norm": 1.0500266551971436, "learning_rate": 4.3752198340505795e-06, "loss": 0.1398, "step": 4687 }, { "epoch": 0.7595593000648088, "grad_norm": 0.8594135642051697, "learning_rate": 4.374930599497835e-06, "loss": 0.1354, "step": 4688 }, { "epoch": 0.7597213220998056, "grad_norm": 0.8801107406616211, "learning_rate": 4.374641307576751e-06, "loss": 0.1432, "step": 4689 }, { "epoch": 0.7598833441348023, "grad_norm": 0.8573765158653259, "learning_rate": 4.37435195829618e-06, "loss": 0.1404, "step": 4690 }, { "epoch": 0.7600453661697991, "grad_norm": 0.8854019045829773, "learning_rate": 4.3740625516649755e-06, "loss": 0.1406, "step": 4691 }, { "epoch": 0.7602073882047958, "grad_norm": 0.899628758430481, "learning_rate": 4.373773087691992e-06, "loss": 0.1331, "step": 4692 }, { "epoch": 0.7603694102397927, "grad_norm": 0.9409993886947632, "learning_rate": 4.373483566386086e-06, "loss": 0.1582, "step": 4693 }, { "epoch": 0.7605314322747894, "grad_norm": 0.9930229783058167, "learning_rate": 4.373193987756116e-06, "loss": 0.1317, "step": 4694 }, { "epoch": 0.7606934543097861, "grad_norm": 0.8028172254562378, "learning_rate": 4.372904351810943e-06, "loss": 0.1267, "step": 4695 }, { "epoch": 0.7608554763447829, "grad_norm": 0.8876671195030212, "learning_rate": 4.3726146585594296e-06, "loss": 0.1482, "step": 4696 }, { "epoch": 0.7610174983797796, "grad_norm": 0.8735641837120056, "learning_rate": 4.3723249080104395e-06, "loss": 0.1362, "step": 4697 }, { "epoch": 0.7611795204147764, "grad_norm": 1.0168266296386719, "learning_rate": 4.372035100172838e-06, "loss": 0.1535, "step": 4698 }, { "epoch": 0.7613415424497731, "grad_norm": 0.9453420042991638, "learning_rate": 4.371745235055492e-06, "loss": 0.1625, "step": 4699 }, { "epoch": 0.76150356448477, "grad_norm": 0.8696652054786682, "learning_rate": 4.371455312667272e-06, "loss": 0.1469, "step": 4700 }, { "epoch": 0.7616655865197667, "grad_norm": 0.8512628078460693, "learning_rate": 4.371165333017049e-06, "loss": 0.1464, "step": 4701 }, { "epoch": 0.7618276085547635, "grad_norm": 0.9412540793418884, "learning_rate": 4.370875296113694e-06, "loss": 0.1495, "step": 4702 }, { "epoch": 0.7619896305897602, "grad_norm": 0.9683601260185242, "learning_rate": 4.370585201966082e-06, "loss": 0.1696, "step": 4703 }, { "epoch": 0.7621516526247569, "grad_norm": 0.9484912753105164, "learning_rate": 4.370295050583091e-06, "loss": 0.162, "step": 4704 }, { "epoch": 0.7623136746597537, "grad_norm": 0.8469820022583008, "learning_rate": 4.370004841973596e-06, "loss": 0.1332, "step": 4705 }, { "epoch": 0.7624756966947505, "grad_norm": 0.9001607894897461, "learning_rate": 4.3697145761464785e-06, "loss": 0.1468, "step": 4706 }, { "epoch": 0.7626377187297473, "grad_norm": 0.8998278975486755, "learning_rate": 4.36942425311062e-06, "loss": 0.1493, "step": 4707 }, { "epoch": 0.762799740764744, "grad_norm": 0.9656628966331482, "learning_rate": 4.369133872874903e-06, "loss": 0.1528, "step": 4708 }, { "epoch": 0.7629617627997408, "grad_norm": 1.009851098060608, "learning_rate": 4.368843435448213e-06, "loss": 0.1392, "step": 4709 }, { "epoch": 0.7631237848347375, "grad_norm": 0.9109621644020081, "learning_rate": 4.368552940839436e-06, "loss": 0.1571, "step": 4710 }, { "epoch": 0.7632858068697342, "grad_norm": 0.9232974052429199, "learning_rate": 4.368262389057462e-06, "loss": 0.1551, "step": 4711 }, { "epoch": 0.7634478289047311, "grad_norm": 0.8760313987731934, "learning_rate": 4.367971780111179e-06, "loss": 0.1587, "step": 4712 }, { "epoch": 0.7636098509397278, "grad_norm": 0.945943295955658, "learning_rate": 4.36768111400948e-06, "loss": 0.1431, "step": 4713 }, { "epoch": 0.7637718729747246, "grad_norm": 0.8429069519042969, "learning_rate": 4.367390390761258e-06, "loss": 0.1377, "step": 4714 }, { "epoch": 0.7639338950097213, "grad_norm": 0.8647720813751221, "learning_rate": 4.367099610375409e-06, "loss": 0.1262, "step": 4715 }, { "epoch": 0.7640959170447181, "grad_norm": 0.8221338987350464, "learning_rate": 4.3668087728608314e-06, "loss": 0.1269, "step": 4716 }, { "epoch": 0.7642579390797148, "grad_norm": 0.8592271208763123, "learning_rate": 4.366517878226423e-06, "loss": 0.1344, "step": 4717 }, { "epoch": 0.7644199611147116, "grad_norm": 0.8569756150245667, "learning_rate": 4.366226926481083e-06, "loss": 0.1417, "step": 4718 }, { "epoch": 0.7645819831497084, "grad_norm": 0.8143072128295898, "learning_rate": 4.365935917633716e-06, "loss": 0.1295, "step": 4719 }, { "epoch": 0.7647440051847051, "grad_norm": 0.9709054827690125, "learning_rate": 4.365644851693226e-06, "loss": 0.1597, "step": 4720 }, { "epoch": 0.7649060272197019, "grad_norm": 0.8787278532981873, "learning_rate": 4.365353728668518e-06, "loss": 0.14, "step": 4721 }, { "epoch": 0.7650680492546986, "grad_norm": 0.945436418056488, "learning_rate": 4.3650625485685e-06, "loss": 0.153, "step": 4722 }, { "epoch": 0.7652300712896954, "grad_norm": 0.9423735737800598, "learning_rate": 4.3647713114020805e-06, "loss": 0.1324, "step": 4723 }, { "epoch": 0.7653920933246922, "grad_norm": 0.9254855513572693, "learning_rate": 4.364480017178172e-06, "loss": 0.149, "step": 4724 }, { "epoch": 0.7655541153596889, "grad_norm": 0.8730553984642029, "learning_rate": 4.364188665905687e-06, "loss": 0.1386, "step": 4725 }, { "epoch": 0.7657161373946857, "grad_norm": 0.8605020046234131, "learning_rate": 4.36389725759354e-06, "loss": 0.1381, "step": 4726 }, { "epoch": 0.7658781594296824, "grad_norm": 1.0751097202301025, "learning_rate": 4.363605792250648e-06, "loss": 0.1849, "step": 4727 }, { "epoch": 0.7660401814646792, "grad_norm": 0.9074150919914246, "learning_rate": 4.363314269885928e-06, "loss": 0.1377, "step": 4728 }, { "epoch": 0.7662022034996759, "grad_norm": 0.8164258599281311, "learning_rate": 4.363022690508301e-06, "loss": 0.1357, "step": 4729 }, { "epoch": 0.7663642255346728, "grad_norm": 0.9147708415985107, "learning_rate": 4.362731054126687e-06, "loss": 0.1494, "step": 4730 }, { "epoch": 0.7665262475696695, "grad_norm": 0.9929783344268799, "learning_rate": 4.362439360750012e-06, "loss": 0.162, "step": 4731 }, { "epoch": 0.7666882696046662, "grad_norm": 0.977254331111908, "learning_rate": 4.362147610387198e-06, "loss": 0.1586, "step": 4732 }, { "epoch": 0.766850291639663, "grad_norm": 0.9575954079627991, "learning_rate": 4.361855803047175e-06, "loss": 0.1561, "step": 4733 }, { "epoch": 0.7670123136746597, "grad_norm": 0.8501836657524109, "learning_rate": 4.361563938738869e-06, "loss": 0.138, "step": 4734 }, { "epoch": 0.7671743357096565, "grad_norm": 0.8874675035476685, "learning_rate": 4.361272017471212e-06, "loss": 0.1503, "step": 4735 }, { "epoch": 0.7673363577446533, "grad_norm": 0.830808699131012, "learning_rate": 4.3609800392531345e-06, "loss": 0.1304, "step": 4736 }, { "epoch": 0.7674983797796501, "grad_norm": 0.8549823760986328, "learning_rate": 4.3606880040935714e-06, "loss": 0.1475, "step": 4737 }, { "epoch": 0.7676604018146468, "grad_norm": 0.9000952243804932, "learning_rate": 4.36039591200146e-06, "loss": 0.1549, "step": 4738 }, { "epoch": 0.7678224238496435, "grad_norm": 0.9087912440299988, "learning_rate": 4.360103762985734e-06, "loss": 0.1516, "step": 4739 }, { "epoch": 0.7679844458846403, "grad_norm": 0.9787373542785645, "learning_rate": 4.359811557055335e-06, "loss": 0.172, "step": 4740 }, { "epoch": 0.768146467919637, "grad_norm": 0.8458462953567505, "learning_rate": 4.359519294219201e-06, "loss": 0.1462, "step": 4741 }, { "epoch": 0.7683084899546339, "grad_norm": 0.8422055840492249, "learning_rate": 4.3592269744862794e-06, "loss": 0.1269, "step": 4742 }, { "epoch": 0.7684705119896306, "grad_norm": 0.8210686445236206, "learning_rate": 4.35893459786551e-06, "loss": 0.1425, "step": 4743 }, { "epoch": 0.7686325340246274, "grad_norm": 0.9287550449371338, "learning_rate": 4.3586421643658404e-06, "loss": 0.1574, "step": 4744 }, { "epoch": 0.7687945560596241, "grad_norm": 0.9414339065551758, "learning_rate": 4.3583496739962195e-06, "loss": 0.1515, "step": 4745 }, { "epoch": 0.7689565780946209, "grad_norm": 0.8130404353141785, "learning_rate": 4.3580571267655945e-06, "loss": 0.1309, "step": 4746 }, { "epoch": 0.7691186001296176, "grad_norm": 0.7749109864234924, "learning_rate": 4.357764522682919e-06, "loss": 0.1363, "step": 4747 }, { "epoch": 0.7692806221646143, "grad_norm": 0.874220609664917, "learning_rate": 4.357471861757144e-06, "loss": 0.1415, "step": 4748 }, { "epoch": 0.7694426441996112, "grad_norm": 0.9805276393890381, "learning_rate": 4.357179143997225e-06, "loss": 0.1563, "step": 4749 }, { "epoch": 0.7696046662346079, "grad_norm": 0.9434357285499573, "learning_rate": 4.3568863694121185e-06, "loss": 0.1492, "step": 4750 }, { "epoch": 0.7697666882696047, "grad_norm": 0.968651294708252, "learning_rate": 4.356593538010783e-06, "loss": 0.1611, "step": 4751 }, { "epoch": 0.7699287103046014, "grad_norm": 0.7928199172019958, "learning_rate": 4.356300649802178e-06, "loss": 0.1264, "step": 4752 }, { "epoch": 0.7700907323395982, "grad_norm": 0.8838950991630554, "learning_rate": 4.356007704795265e-06, "loss": 0.142, "step": 4753 }, { "epoch": 0.770252754374595, "grad_norm": 0.8881464004516602, "learning_rate": 4.355714702999008e-06, "loss": 0.1438, "step": 4754 }, { "epoch": 0.7704147764095917, "grad_norm": 0.938313364982605, "learning_rate": 4.355421644422372e-06, "loss": 0.1423, "step": 4755 }, { "epoch": 0.7705767984445885, "grad_norm": 0.892318069934845, "learning_rate": 4.355128529074323e-06, "loss": 0.1451, "step": 4756 }, { "epoch": 0.7707388204795852, "grad_norm": 0.8982798457145691, "learning_rate": 4.354835356963831e-06, "loss": 0.1375, "step": 4757 }, { "epoch": 0.770900842514582, "grad_norm": 0.8019870519638062, "learning_rate": 4.354542128099866e-06, "loss": 0.1271, "step": 4758 }, { "epoch": 0.7710628645495787, "grad_norm": 0.8931732773780823, "learning_rate": 4.354248842491399e-06, "loss": 0.1481, "step": 4759 }, { "epoch": 0.7712248865845756, "grad_norm": 0.9127657413482666, "learning_rate": 4.353955500147405e-06, "loss": 0.1466, "step": 4760 }, { "epoch": 0.7713869086195723, "grad_norm": 0.9457529783248901, "learning_rate": 4.353662101076859e-06, "loss": 0.1488, "step": 4761 }, { "epoch": 0.771548930654569, "grad_norm": 0.9101020097732544, "learning_rate": 4.353368645288738e-06, "loss": 0.1464, "step": 4762 }, { "epoch": 0.7717109526895658, "grad_norm": 0.9181843400001526, "learning_rate": 4.353075132792023e-06, "loss": 0.1379, "step": 4763 }, { "epoch": 0.7718729747245625, "grad_norm": 0.8580861687660217, "learning_rate": 4.352781563595691e-06, "loss": 0.145, "step": 4764 }, { "epoch": 0.7720349967595593, "grad_norm": 0.8708130717277527, "learning_rate": 4.352487937708729e-06, "loss": 0.1536, "step": 4765 }, { "epoch": 0.772197018794556, "grad_norm": 0.8263076543807983, "learning_rate": 4.352194255140118e-06, "loss": 0.1327, "step": 4766 }, { "epoch": 0.7723590408295529, "grad_norm": 0.9026300311088562, "learning_rate": 4.351900515898846e-06, "loss": 0.1477, "step": 4767 }, { "epoch": 0.7725210628645496, "grad_norm": 0.9587908387184143, "learning_rate": 4.351606719993899e-06, "loss": 0.1537, "step": 4768 }, { "epoch": 0.7726830848995463, "grad_norm": 0.78399258852005, "learning_rate": 4.3513128674342665e-06, "loss": 0.1308, "step": 4769 }, { "epoch": 0.7728451069345431, "grad_norm": 0.8863502144813538, "learning_rate": 4.351018958228941e-06, "loss": 0.1467, "step": 4770 }, { "epoch": 0.7730071289695398, "grad_norm": 0.9153334498405457, "learning_rate": 4.350724992386915e-06, "loss": 0.1646, "step": 4771 }, { "epoch": 0.7731691510045366, "grad_norm": 0.8299160003662109, "learning_rate": 4.350430969917182e-06, "loss": 0.1437, "step": 4772 }, { "epoch": 0.7733311730395334, "grad_norm": 0.9863147139549255, "learning_rate": 4.35013689082874e-06, "loss": 0.1435, "step": 4773 }, { "epoch": 0.7734931950745302, "grad_norm": 0.9008839130401611, "learning_rate": 4.349842755130587e-06, "loss": 0.1472, "step": 4774 }, { "epoch": 0.7736552171095269, "grad_norm": 0.9455453157424927, "learning_rate": 4.349548562831721e-06, "loss": 0.1493, "step": 4775 }, { "epoch": 0.7738172391445236, "grad_norm": 0.9574447870254517, "learning_rate": 4.349254313941146e-06, "loss": 0.1572, "step": 4776 }, { "epoch": 0.7739792611795204, "grad_norm": 0.9903965592384338, "learning_rate": 4.348960008467863e-06, "loss": 0.1688, "step": 4777 }, { "epoch": 0.7741412832145171, "grad_norm": 0.8893287181854248, "learning_rate": 4.3486656464208785e-06, "loss": 0.142, "step": 4778 }, { "epoch": 0.774303305249514, "grad_norm": 0.8900960087776184, "learning_rate": 4.348371227809199e-06, "loss": 0.1408, "step": 4779 }, { "epoch": 0.7744653272845107, "grad_norm": 0.9652172923088074, "learning_rate": 4.348076752641834e-06, "loss": 0.1675, "step": 4780 }, { "epoch": 0.7746273493195075, "grad_norm": 0.8140687346458435, "learning_rate": 4.34778222092779e-06, "loss": 0.1229, "step": 4781 }, { "epoch": 0.7747893713545042, "grad_norm": 0.7977285385131836, "learning_rate": 4.347487632676084e-06, "loss": 0.122, "step": 4782 }, { "epoch": 0.7749513933895009, "grad_norm": 0.8936629891395569, "learning_rate": 4.347192987895726e-06, "loss": 0.147, "step": 4783 }, { "epoch": 0.7751134154244977, "grad_norm": 0.811756432056427, "learning_rate": 4.346898286595733e-06, "loss": 0.126, "step": 4784 }, { "epoch": 0.7752754374594945, "grad_norm": 0.8740372657775879, "learning_rate": 4.346603528785122e-06, "loss": 0.1515, "step": 4785 }, { "epoch": 0.7754374594944913, "grad_norm": 0.8331345319747925, "learning_rate": 4.3463087144729115e-06, "loss": 0.1341, "step": 4786 }, { "epoch": 0.775599481529488, "grad_norm": 0.8488640785217285, "learning_rate": 4.346013843668122e-06, "loss": 0.1361, "step": 4787 }, { "epoch": 0.7757615035644848, "grad_norm": 0.9174832701683044, "learning_rate": 4.3457189163797776e-06, "loss": 0.1343, "step": 4788 }, { "epoch": 0.7759235255994815, "grad_norm": 0.8470262289047241, "learning_rate": 4.345423932616899e-06, "loss": 0.1319, "step": 4789 }, { "epoch": 0.7760855476344782, "grad_norm": 0.8204308152198792, "learning_rate": 4.345128892388515e-06, "loss": 0.1331, "step": 4790 }, { "epoch": 0.7762475696694751, "grad_norm": 0.8398123383522034, "learning_rate": 4.344833795703652e-06, "loss": 0.1382, "step": 4791 }, { "epoch": 0.7764095917044718, "grad_norm": 0.9840171933174133, "learning_rate": 4.344538642571339e-06, "loss": 0.1512, "step": 4792 }, { "epoch": 0.7765716137394686, "grad_norm": 0.8746889233589172, "learning_rate": 4.3442434330006075e-06, "loss": 0.1425, "step": 4793 }, { "epoch": 0.7767336357744653, "grad_norm": 0.8782622218132019, "learning_rate": 4.3439481670004895e-06, "loss": 0.1317, "step": 4794 }, { "epoch": 0.7768956578094621, "grad_norm": 0.8894055485725403, "learning_rate": 4.34365284458002e-06, "loss": 0.1424, "step": 4795 }, { "epoch": 0.7770576798444588, "grad_norm": 0.8703826069831848, "learning_rate": 4.343357465748235e-06, "loss": 0.1309, "step": 4796 }, { "epoch": 0.7772197018794557, "grad_norm": 0.8910863399505615, "learning_rate": 4.343062030514172e-06, "loss": 0.1431, "step": 4797 }, { "epoch": 0.7773817239144524, "grad_norm": 0.9824634194374084, "learning_rate": 4.342766538886872e-06, "loss": 0.1456, "step": 4798 }, { "epoch": 0.7775437459494491, "grad_norm": 0.862075686454773, "learning_rate": 4.342470990875375e-06, "loss": 0.1338, "step": 4799 }, { "epoch": 0.7777057679844459, "grad_norm": 0.7366591095924377, "learning_rate": 4.342175386488724e-06, "loss": 0.1238, "step": 4800 }, { "epoch": 0.7778677900194426, "grad_norm": 0.8682642579078674, "learning_rate": 4.341879725735965e-06, "loss": 0.1458, "step": 4801 }, { "epoch": 0.7780298120544394, "grad_norm": 1.0655760765075684, "learning_rate": 4.341584008626143e-06, "loss": 0.1728, "step": 4802 }, { "epoch": 0.7781918340894362, "grad_norm": 0.9638577103614807, "learning_rate": 4.341288235168306e-06, "loss": 0.1636, "step": 4803 }, { "epoch": 0.778353856124433, "grad_norm": 0.9434710741043091, "learning_rate": 4.340992405371506e-06, "loss": 0.1469, "step": 4804 }, { "epoch": 0.7785158781594297, "grad_norm": 0.9497054219245911, "learning_rate": 4.340696519244794e-06, "loss": 0.1343, "step": 4805 }, { "epoch": 0.7786779001944264, "grad_norm": 0.8136816620826721, "learning_rate": 4.340400576797221e-06, "loss": 0.1239, "step": 4806 }, { "epoch": 0.7788399222294232, "grad_norm": 0.81424480676651, "learning_rate": 4.340104578037846e-06, "loss": 0.1231, "step": 4807 }, { "epoch": 0.7790019442644199, "grad_norm": 0.8290922045707703, "learning_rate": 4.339808522975722e-06, "loss": 0.1283, "step": 4808 }, { "epoch": 0.7791639662994168, "grad_norm": 0.9101153016090393, "learning_rate": 4.339512411619912e-06, "loss": 0.1419, "step": 4809 }, { "epoch": 0.7793259883344135, "grad_norm": 0.8872000575065613, "learning_rate": 4.339216243979471e-06, "loss": 0.1378, "step": 4810 }, { "epoch": 0.7794880103694103, "grad_norm": 0.9886912703514099, "learning_rate": 4.338920020063465e-06, "loss": 0.1599, "step": 4811 }, { "epoch": 0.779650032404407, "grad_norm": 0.7226459383964539, "learning_rate": 4.3386237398809576e-06, "loss": 0.1087, "step": 4812 }, { "epoch": 0.7798120544394037, "grad_norm": 0.9056746959686279, "learning_rate": 4.338327403441012e-06, "loss": 0.1485, "step": 4813 }, { "epoch": 0.7799740764744005, "grad_norm": 0.831541121006012, "learning_rate": 4.338031010752696e-06, "loss": 0.1161, "step": 4814 }, { "epoch": 0.7801360985093972, "grad_norm": 0.9287052750587463, "learning_rate": 4.337734561825079e-06, "loss": 0.1564, "step": 4815 }, { "epoch": 0.7802981205443941, "grad_norm": 0.9070348739624023, "learning_rate": 4.337438056667233e-06, "loss": 0.1533, "step": 4816 }, { "epoch": 0.7804601425793908, "grad_norm": 0.8945810794830322, "learning_rate": 4.337141495288228e-06, "loss": 0.141, "step": 4817 }, { "epoch": 0.7806221646143876, "grad_norm": 0.9323962926864624, "learning_rate": 4.336844877697139e-06, "loss": 0.1525, "step": 4818 }, { "epoch": 0.7807841866493843, "grad_norm": 0.8296406269073486, "learning_rate": 4.336548203903042e-06, "loss": 0.1358, "step": 4819 }, { "epoch": 0.780946208684381, "grad_norm": 0.8179200887680054, "learning_rate": 4.336251473915015e-06, "loss": 0.138, "step": 4820 }, { "epoch": 0.7811082307193778, "grad_norm": 0.8966086506843567, "learning_rate": 4.335954687742136e-06, "loss": 0.1401, "step": 4821 }, { "epoch": 0.7812702527543746, "grad_norm": 1.0740383863449097, "learning_rate": 4.335657845393486e-06, "loss": 0.1471, "step": 4822 }, { "epoch": 0.7814322747893714, "grad_norm": 0.9319535493850708, "learning_rate": 4.335360946878148e-06, "loss": 0.1565, "step": 4823 }, { "epoch": 0.7815942968243681, "grad_norm": 0.9597566723823547, "learning_rate": 4.335063992205207e-06, "loss": 0.1614, "step": 4824 }, { "epoch": 0.7817563188593649, "grad_norm": 0.8953394293785095, "learning_rate": 4.334766981383749e-06, "loss": 0.1495, "step": 4825 }, { "epoch": 0.7819183408943616, "grad_norm": 0.8695473074913025, "learning_rate": 4.3344699144228605e-06, "loss": 0.1353, "step": 4826 }, { "epoch": 0.7820803629293583, "grad_norm": 0.8189257383346558, "learning_rate": 4.334172791331633e-06, "loss": 0.1269, "step": 4827 }, { "epoch": 0.7822423849643552, "grad_norm": 0.8564877510070801, "learning_rate": 4.333875612119156e-06, "loss": 0.1302, "step": 4828 }, { "epoch": 0.7824044069993519, "grad_norm": 0.8532876968383789, "learning_rate": 4.3335783767945235e-06, "loss": 0.1405, "step": 4829 }, { "epoch": 0.7825664290343487, "grad_norm": 0.9211379289627075, "learning_rate": 4.333281085366829e-06, "loss": 0.1515, "step": 4830 }, { "epoch": 0.7827284510693454, "grad_norm": 0.8540376424789429, "learning_rate": 4.332983737845171e-06, "loss": 0.1295, "step": 4831 }, { "epoch": 0.7828904731043422, "grad_norm": 0.886938750743866, "learning_rate": 4.332686334238646e-06, "loss": 0.1318, "step": 4832 }, { "epoch": 0.7830524951393389, "grad_norm": 0.8990640640258789, "learning_rate": 4.3323888745563544e-06, "loss": 0.1463, "step": 4833 }, { "epoch": 0.7832145171743357, "grad_norm": 0.8602086305618286, "learning_rate": 4.332091358807397e-06, "loss": 0.1309, "step": 4834 }, { "epoch": 0.7833765392093325, "grad_norm": 1.16105318069458, "learning_rate": 4.331793787000878e-06, "loss": 0.1761, "step": 4835 }, { "epoch": 0.7835385612443292, "grad_norm": 0.9003329277038574, "learning_rate": 4.3314961591459015e-06, "loss": 0.1451, "step": 4836 }, { "epoch": 0.783700583279326, "grad_norm": 0.7860771417617798, "learning_rate": 4.3311984752515745e-06, "loss": 0.1227, "step": 4837 }, { "epoch": 0.7838626053143227, "grad_norm": 0.7542071342468262, "learning_rate": 4.330900735327006e-06, "loss": 0.1211, "step": 4838 }, { "epoch": 0.7840246273493195, "grad_norm": 0.9031782150268555, "learning_rate": 4.330602939381306e-06, "loss": 0.1391, "step": 4839 }, { "epoch": 0.7841866493843163, "grad_norm": 0.8484506607055664, "learning_rate": 4.330305087423585e-06, "loss": 0.1333, "step": 4840 }, { "epoch": 0.7843486714193131, "grad_norm": 1.0116180181503296, "learning_rate": 4.3300071794629585e-06, "loss": 0.1477, "step": 4841 }, { "epoch": 0.7845106934543098, "grad_norm": 0.8738240003585815, "learning_rate": 4.329709215508541e-06, "loss": 0.1412, "step": 4842 }, { "epoch": 0.7846727154893065, "grad_norm": 0.8802550435066223, "learning_rate": 4.329411195569448e-06, "loss": 0.1366, "step": 4843 }, { "epoch": 0.7848347375243033, "grad_norm": 1.0167311429977417, "learning_rate": 4.329113119654801e-06, "loss": 0.1548, "step": 4844 }, { "epoch": 0.7849967595593, "grad_norm": 0.9607383608818054, "learning_rate": 4.328814987773718e-06, "loss": 0.1532, "step": 4845 }, { "epoch": 0.7851587815942969, "grad_norm": 0.8469568490982056, "learning_rate": 4.328516799935323e-06, "loss": 0.1294, "step": 4846 }, { "epoch": 0.7853208036292936, "grad_norm": 0.8837965130805969, "learning_rate": 4.328218556148738e-06, "loss": 0.1422, "step": 4847 }, { "epoch": 0.7854828256642904, "grad_norm": 1.0237815380096436, "learning_rate": 4.327920256423089e-06, "loss": 0.165, "step": 4848 }, { "epoch": 0.7856448476992871, "grad_norm": 0.8523423671722412, "learning_rate": 4.327621900767504e-06, "loss": 0.1523, "step": 4849 }, { "epoch": 0.7858068697342838, "grad_norm": 0.831685483455658, "learning_rate": 4.3273234891911135e-06, "loss": 0.137, "step": 4850 }, { "epoch": 0.7859688917692806, "grad_norm": 1.195661187171936, "learning_rate": 4.327025021703044e-06, "loss": 0.146, "step": 4851 }, { "epoch": 0.7861309138042774, "grad_norm": 0.8539892435073853, "learning_rate": 4.3267264983124304e-06, "loss": 0.1462, "step": 4852 }, { "epoch": 0.7862929358392742, "grad_norm": 0.8452509045600891, "learning_rate": 4.326427919028407e-06, "loss": 0.1356, "step": 4853 }, { "epoch": 0.7864549578742709, "grad_norm": 0.9559705853462219, "learning_rate": 4.326129283860109e-06, "loss": 0.1544, "step": 4854 }, { "epoch": 0.7866169799092677, "grad_norm": 0.9361833333969116, "learning_rate": 4.325830592816675e-06, "loss": 0.1396, "step": 4855 }, { "epoch": 0.7867790019442644, "grad_norm": 0.9473642110824585, "learning_rate": 4.3255318459072415e-06, "loss": 0.1549, "step": 4856 }, { "epoch": 0.7869410239792611, "grad_norm": 0.8166648149490356, "learning_rate": 4.325233043140952e-06, "loss": 0.1309, "step": 4857 }, { "epoch": 0.787103046014258, "grad_norm": 0.8858399987220764, "learning_rate": 4.324934184526949e-06, "loss": 0.1317, "step": 4858 }, { "epoch": 0.7872650680492547, "grad_norm": 0.9922860264778137, "learning_rate": 4.324635270074375e-06, "loss": 0.1545, "step": 4859 }, { "epoch": 0.7874270900842515, "grad_norm": 0.9596473574638367, "learning_rate": 4.324336299792378e-06, "loss": 0.163, "step": 4860 }, { "epoch": 0.7875891121192482, "grad_norm": 0.8286867737770081, "learning_rate": 4.3240372736901044e-06, "loss": 0.1262, "step": 4861 }, { "epoch": 0.787751134154245, "grad_norm": 0.8577548861503601, "learning_rate": 4.3237381917767054e-06, "loss": 0.1487, "step": 4862 }, { "epoch": 0.7879131561892417, "grad_norm": 0.9122594594955444, "learning_rate": 4.323439054061331e-06, "loss": 0.1592, "step": 4863 }, { "epoch": 0.7880751782242384, "grad_norm": 0.7763839364051819, "learning_rate": 4.323139860553133e-06, "loss": 0.1135, "step": 4864 }, { "epoch": 0.7882372002592353, "grad_norm": 0.8384743332862854, "learning_rate": 4.3228406112612686e-06, "loss": 0.1249, "step": 4865 }, { "epoch": 0.788399222294232, "grad_norm": 0.9874293804168701, "learning_rate": 4.3225413061948915e-06, "loss": 0.1608, "step": 4866 }, { "epoch": 0.7885612443292288, "grad_norm": 0.8717676997184753, "learning_rate": 4.322241945363161e-06, "loss": 0.1304, "step": 4867 }, { "epoch": 0.7887232663642255, "grad_norm": 0.8286629319190979, "learning_rate": 4.321942528775238e-06, "loss": 0.1303, "step": 4868 }, { "epoch": 0.7888852883992223, "grad_norm": 0.7938746809959412, "learning_rate": 4.3216430564402815e-06, "loss": 0.1231, "step": 4869 }, { "epoch": 0.789047310434219, "grad_norm": 1.0110461711883545, "learning_rate": 4.3213435283674556e-06, "loss": 0.149, "step": 4870 }, { "epoch": 0.7892093324692158, "grad_norm": 0.8735683560371399, "learning_rate": 4.3210439445659255e-06, "loss": 0.1276, "step": 4871 }, { "epoch": 0.7893713545042126, "grad_norm": 1.0407592058181763, "learning_rate": 4.320744305044858e-06, "loss": 0.1411, "step": 4872 }, { "epoch": 0.7895333765392093, "grad_norm": 0.9165899753570557, "learning_rate": 4.3204446098134215e-06, "loss": 0.1503, "step": 4873 }, { "epoch": 0.7896953985742061, "grad_norm": 0.9459313154220581, "learning_rate": 4.320144858880784e-06, "loss": 0.1372, "step": 4874 }, { "epoch": 0.7898574206092028, "grad_norm": 0.8336299061775208, "learning_rate": 4.319845052256119e-06, "loss": 0.1305, "step": 4875 }, { "epoch": 0.7900194426441997, "grad_norm": 0.928596019744873, "learning_rate": 4.319545189948599e-06, "loss": 0.1473, "step": 4876 }, { "epoch": 0.7901814646791964, "grad_norm": 0.8122962713241577, "learning_rate": 4.3192452719674e-06, "loss": 0.1276, "step": 4877 }, { "epoch": 0.7903434867141931, "grad_norm": 0.7669326066970825, "learning_rate": 4.318945298321698e-06, "loss": 0.1314, "step": 4878 }, { "epoch": 0.7905055087491899, "grad_norm": 0.8381757140159607, "learning_rate": 4.318645269020671e-06, "loss": 0.1312, "step": 4879 }, { "epoch": 0.7906675307841866, "grad_norm": 0.8036909103393555, "learning_rate": 4.3183451840735e-06, "loss": 0.1092, "step": 4880 }, { "epoch": 0.7908295528191834, "grad_norm": 0.8607558608055115, "learning_rate": 4.318045043489367e-06, "loss": 0.137, "step": 4881 }, { "epoch": 0.7909915748541801, "grad_norm": 0.8910214900970459, "learning_rate": 4.3177448472774566e-06, "loss": 0.153, "step": 4882 }, { "epoch": 0.791153596889177, "grad_norm": 0.8832724094390869, "learning_rate": 4.317444595446951e-06, "loss": 0.148, "step": 4883 }, { "epoch": 0.7913156189241737, "grad_norm": 0.9177297353744507, "learning_rate": 4.317144288007039e-06, "loss": 0.1537, "step": 4884 }, { "epoch": 0.7914776409591704, "grad_norm": 0.9235965609550476, "learning_rate": 4.316843924966909e-06, "loss": 0.1404, "step": 4885 }, { "epoch": 0.7916396629941672, "grad_norm": 0.8111236691474915, "learning_rate": 4.316543506335752e-06, "loss": 0.1276, "step": 4886 }, { "epoch": 0.7918016850291639, "grad_norm": 0.8697571158409119, "learning_rate": 4.31624303212276e-06, "loss": 0.1325, "step": 4887 }, { "epoch": 0.7919637070641607, "grad_norm": 0.8985753059387207, "learning_rate": 4.315942502337126e-06, "loss": 0.1418, "step": 4888 }, { "epoch": 0.7921257290991575, "grad_norm": 0.8289467096328735, "learning_rate": 4.315641916988046e-06, "loss": 0.1169, "step": 4889 }, { "epoch": 0.7922877511341543, "grad_norm": 1.004921555519104, "learning_rate": 4.315341276084717e-06, "loss": 0.1646, "step": 4890 }, { "epoch": 0.792449773169151, "grad_norm": 0.9182339906692505, "learning_rate": 4.315040579636339e-06, "loss": 0.1565, "step": 4891 }, { "epoch": 0.7926117952041478, "grad_norm": 0.8737165927886963, "learning_rate": 4.3147398276521105e-06, "loss": 0.1278, "step": 4892 }, { "epoch": 0.7927738172391445, "grad_norm": 0.8931154608726501, "learning_rate": 4.314439020141235e-06, "loss": 0.145, "step": 4893 }, { "epoch": 0.7929358392741412, "grad_norm": 0.9454561471939087, "learning_rate": 4.314138157112916e-06, "loss": 0.1496, "step": 4894 }, { "epoch": 0.7930978613091381, "grad_norm": 0.8674540519714355, "learning_rate": 4.313837238576361e-06, "loss": 0.1277, "step": 4895 }, { "epoch": 0.7932598833441348, "grad_norm": 0.8071511387825012, "learning_rate": 4.313536264540774e-06, "loss": 0.1258, "step": 4896 }, { "epoch": 0.7934219053791316, "grad_norm": 0.9354859590530396, "learning_rate": 4.313235235015367e-06, "loss": 0.1537, "step": 4897 }, { "epoch": 0.7935839274141283, "grad_norm": 0.9247655272483826, "learning_rate": 4.312934150009351e-06, "loss": 0.1535, "step": 4898 }, { "epoch": 0.7937459494491251, "grad_norm": 0.8361133933067322, "learning_rate": 4.312633009531935e-06, "loss": 0.1397, "step": 4899 }, { "epoch": 0.7939079714841218, "grad_norm": 1.0772885084152222, "learning_rate": 4.3123318135923355e-06, "loss": 0.1683, "step": 4900 }, { "epoch": 0.7940699935191186, "grad_norm": 0.7447131872177124, "learning_rate": 4.312030562199769e-06, "loss": 0.1155, "step": 4901 }, { "epoch": 0.7942320155541154, "grad_norm": 0.7897053360939026, "learning_rate": 4.311729255363453e-06, "loss": 0.1229, "step": 4902 }, { "epoch": 0.7943940375891121, "grad_norm": 0.8198691606521606, "learning_rate": 4.3114278930926055e-06, "loss": 0.1323, "step": 4903 }, { "epoch": 0.7945560596241089, "grad_norm": 0.8728782534599304, "learning_rate": 4.3111264753964475e-06, "loss": 0.1368, "step": 4904 }, { "epoch": 0.7947180816591056, "grad_norm": 0.8745116591453552, "learning_rate": 4.3108250022842026e-06, "loss": 0.1475, "step": 4905 }, { "epoch": 0.7948801036941024, "grad_norm": 0.8484328389167786, "learning_rate": 4.310523473765095e-06, "loss": 0.1349, "step": 4906 }, { "epoch": 0.7950421257290992, "grad_norm": 0.8352507948875427, "learning_rate": 4.31022188984835e-06, "loss": 0.1349, "step": 4907 }, { "epoch": 0.7952041477640959, "grad_norm": 0.9030001759529114, "learning_rate": 4.309920250543196e-06, "loss": 0.1343, "step": 4908 }, { "epoch": 0.7953661697990927, "grad_norm": 0.9628517627716064, "learning_rate": 4.3096185558588625e-06, "loss": 0.158, "step": 4909 }, { "epoch": 0.7955281918340894, "grad_norm": 0.84720778465271, "learning_rate": 4.30931680580458e-06, "loss": 0.1336, "step": 4910 }, { "epoch": 0.7956902138690862, "grad_norm": 0.9344910383224487, "learning_rate": 4.309015000389583e-06, "loss": 0.1469, "step": 4911 }, { "epoch": 0.7958522359040829, "grad_norm": 0.8999947309494019, "learning_rate": 4.308713139623103e-06, "loss": 0.1324, "step": 4912 }, { "epoch": 0.7960142579390798, "grad_norm": 0.9617306590080261, "learning_rate": 4.308411223514378e-06, "loss": 0.1633, "step": 4913 }, { "epoch": 0.7961762799740765, "grad_norm": 0.8142529726028442, "learning_rate": 4.308109252072647e-06, "loss": 0.1394, "step": 4914 }, { "epoch": 0.7963383020090732, "grad_norm": 0.837073028087616, "learning_rate": 4.307807225307148e-06, "loss": 0.1277, "step": 4915 }, { "epoch": 0.79650032404407, "grad_norm": 0.9646647572517395, "learning_rate": 4.307505143227122e-06, "loss": 0.1675, "step": 4916 }, { "epoch": 0.7966623460790667, "grad_norm": 0.893146812915802, "learning_rate": 4.307203005841813e-06, "loss": 0.146, "step": 4917 }, { "epoch": 0.7968243681140635, "grad_norm": 0.9233022928237915, "learning_rate": 4.306900813160466e-06, "loss": 0.1535, "step": 4918 }, { "epoch": 0.7969863901490603, "grad_norm": 0.8315757513046265, "learning_rate": 4.306598565192327e-06, "loss": 0.133, "step": 4919 }, { "epoch": 0.7971484121840571, "grad_norm": 0.8391256332397461, "learning_rate": 4.306296261946643e-06, "loss": 0.1378, "step": 4920 }, { "epoch": 0.7973104342190538, "grad_norm": 0.9489231109619141, "learning_rate": 4.305993903432664e-06, "loss": 0.1461, "step": 4921 }, { "epoch": 0.7974724562540505, "grad_norm": 0.8281417489051819, "learning_rate": 4.305691489659643e-06, "loss": 0.1266, "step": 4922 }, { "epoch": 0.7976344782890473, "grad_norm": 0.8485118746757507, "learning_rate": 4.305389020636832e-06, "loss": 0.1373, "step": 4923 }, { "epoch": 0.797796500324044, "grad_norm": 0.8453590869903564, "learning_rate": 4.3050864963734854e-06, "loss": 0.1371, "step": 4924 }, { "epoch": 0.7979585223590409, "grad_norm": 0.8031056523323059, "learning_rate": 4.304783916878861e-06, "loss": 0.1262, "step": 4925 }, { "epoch": 0.7981205443940376, "grad_norm": 0.9986342191696167, "learning_rate": 4.304481282162215e-06, "loss": 0.1689, "step": 4926 }, { "epoch": 0.7982825664290344, "grad_norm": 0.8515965938568115, "learning_rate": 4.304178592232809e-06, "loss": 0.1373, "step": 4927 }, { "epoch": 0.7984445884640311, "grad_norm": 0.9090455174446106, "learning_rate": 4.3038758470999056e-06, "loss": 0.1508, "step": 4928 }, { "epoch": 0.7986066104990278, "grad_norm": 0.8485004305839539, "learning_rate": 4.303573046772765e-06, "loss": 0.1513, "step": 4929 }, { "epoch": 0.7987686325340246, "grad_norm": 1.051450490951538, "learning_rate": 4.303270191260654e-06, "loss": 0.1781, "step": 4930 }, { "epoch": 0.7989306545690213, "grad_norm": 0.8531157374382019, "learning_rate": 4.302967280572839e-06, "loss": 0.1348, "step": 4931 }, { "epoch": 0.7990926766040182, "grad_norm": 0.7763565182685852, "learning_rate": 4.302664314718588e-06, "loss": 0.1182, "step": 4932 }, { "epoch": 0.7992546986390149, "grad_norm": 0.9212713837623596, "learning_rate": 4.302361293707172e-06, "loss": 0.1209, "step": 4933 }, { "epoch": 0.7994167206740117, "grad_norm": 0.8668341040611267, "learning_rate": 4.302058217547862e-06, "loss": 0.1316, "step": 4934 }, { "epoch": 0.7995787427090084, "grad_norm": 1.0723251104354858, "learning_rate": 4.3017550862499314e-06, "loss": 0.1604, "step": 4935 }, { "epoch": 0.7997407647440052, "grad_norm": 0.9705539345741272, "learning_rate": 4.301451899822655e-06, "loss": 0.1541, "step": 4936 }, { "epoch": 0.799902786779002, "grad_norm": 0.8101187348365784, "learning_rate": 4.30114865827531e-06, "loss": 0.1226, "step": 4937 }, { "epoch": 0.8000648088139987, "grad_norm": 0.9303157925605774, "learning_rate": 4.3008453616171746e-06, "loss": 0.1661, "step": 4938 }, { "epoch": 0.8002268308489955, "grad_norm": 0.8247880339622498, "learning_rate": 4.300542009857529e-06, "loss": 0.1287, "step": 4939 }, { "epoch": 0.8003888528839922, "grad_norm": 0.8863083124160767, "learning_rate": 4.300238603005656e-06, "loss": 0.1557, "step": 4940 }, { "epoch": 0.800550874918989, "grad_norm": 0.9141151905059814, "learning_rate": 4.299935141070837e-06, "loss": 0.1515, "step": 4941 }, { "epoch": 0.8007128969539857, "grad_norm": 0.925873875617981, "learning_rate": 4.299631624062359e-06, "loss": 0.1467, "step": 4942 }, { "epoch": 0.8008749189889826, "grad_norm": 0.9137710332870483, "learning_rate": 4.299328051989509e-06, "loss": 0.1325, "step": 4943 }, { "epoch": 0.8010369410239793, "grad_norm": 1.1716556549072266, "learning_rate": 4.299024424861574e-06, "loss": 0.1653, "step": 4944 }, { "epoch": 0.801198963058976, "grad_norm": 1.0240533351898193, "learning_rate": 4.298720742687846e-06, "loss": 0.1516, "step": 4945 }, { "epoch": 0.8013609850939728, "grad_norm": 0.8595703840255737, "learning_rate": 4.298417005477616e-06, "loss": 0.1388, "step": 4946 }, { "epoch": 0.8015230071289695, "grad_norm": 0.817602813243866, "learning_rate": 4.298113213240176e-06, "loss": 0.1203, "step": 4947 }, { "epoch": 0.8016850291639663, "grad_norm": 0.9021492004394531, "learning_rate": 4.2978093659848255e-06, "loss": 0.1397, "step": 4948 }, { "epoch": 0.801847051198963, "grad_norm": 0.756421685218811, "learning_rate": 4.297505463720857e-06, "loss": 0.1294, "step": 4949 }, { "epoch": 0.8020090732339599, "grad_norm": 0.8492708206176758, "learning_rate": 4.2972015064575726e-06, "loss": 0.146, "step": 4950 }, { "epoch": 0.8021710952689566, "grad_norm": 0.8680381774902344, "learning_rate": 4.29689749420427e-06, "loss": 0.1556, "step": 4951 }, { "epoch": 0.8023331173039533, "grad_norm": 0.7963783144950867, "learning_rate": 4.2965934269702535e-06, "loss": 0.1291, "step": 4952 }, { "epoch": 0.8024951393389501, "grad_norm": 0.8368898630142212, "learning_rate": 4.296289304764825e-06, "loss": 0.1396, "step": 4953 }, { "epoch": 0.8026571613739468, "grad_norm": 0.951833188533783, "learning_rate": 4.295985127597291e-06, "loss": 0.1592, "step": 4954 }, { "epoch": 0.8028191834089436, "grad_norm": 0.8976386785507202, "learning_rate": 4.295680895476959e-06, "loss": 0.1454, "step": 4955 }, { "epoch": 0.8029812054439404, "grad_norm": 0.8689819574356079, "learning_rate": 4.295376608413136e-06, "loss": 0.1393, "step": 4956 }, { "epoch": 0.8031432274789372, "grad_norm": 0.9349297285079956, "learning_rate": 4.295072266415135e-06, "loss": 0.1617, "step": 4957 }, { "epoch": 0.8033052495139339, "grad_norm": 0.9541299939155579, "learning_rate": 4.294767869492265e-06, "loss": 0.1545, "step": 4958 }, { "epoch": 0.8034672715489306, "grad_norm": 0.9361594915390015, "learning_rate": 4.294463417653842e-06, "loss": 0.156, "step": 4959 }, { "epoch": 0.8036292935839274, "grad_norm": 0.8772572875022888, "learning_rate": 4.294158910909181e-06, "loss": 0.14, "step": 4960 }, { "epoch": 0.8037913156189241, "grad_norm": 0.84719318151474, "learning_rate": 4.2938543492676e-06, "loss": 0.1324, "step": 4961 }, { "epoch": 0.803953337653921, "grad_norm": 0.7897992730140686, "learning_rate": 4.293549732738415e-06, "loss": 0.1266, "step": 4962 }, { "epoch": 0.8041153596889177, "grad_norm": 0.9460259079933167, "learning_rate": 4.29324506133095e-06, "loss": 0.1521, "step": 4963 }, { "epoch": 0.8042773817239145, "grad_norm": 0.8699532747268677, "learning_rate": 4.2929403350545255e-06, "loss": 0.1435, "step": 4964 }, { "epoch": 0.8044394037589112, "grad_norm": 0.9408538341522217, "learning_rate": 4.292635553918466e-06, "loss": 0.1431, "step": 4965 }, { "epoch": 0.8046014257939079, "grad_norm": 0.885382354259491, "learning_rate": 4.292330717932095e-06, "loss": 0.1411, "step": 4966 }, { "epoch": 0.8047634478289047, "grad_norm": 0.9126858115196228, "learning_rate": 4.292025827104744e-06, "loss": 0.1511, "step": 4967 }, { "epoch": 0.8049254698639015, "grad_norm": 0.8485498428344727, "learning_rate": 4.2917208814457364e-06, "loss": 0.1325, "step": 4968 }, { "epoch": 0.8050874918988983, "grad_norm": 0.9165229797363281, "learning_rate": 4.291415880964407e-06, "loss": 0.1521, "step": 4969 }, { "epoch": 0.805249513933895, "grad_norm": 0.811354398727417, "learning_rate": 4.291110825670087e-06, "loss": 0.1297, "step": 4970 }, { "epoch": 0.8054115359688918, "grad_norm": 0.8046636581420898, "learning_rate": 4.29080571557211e-06, "loss": 0.1192, "step": 4971 }, { "epoch": 0.8055735580038885, "grad_norm": 0.8413184881210327, "learning_rate": 4.290500550679811e-06, "loss": 0.1316, "step": 4972 }, { "epoch": 0.8057355800388852, "grad_norm": 0.9264662861824036, "learning_rate": 4.290195331002529e-06, "loss": 0.15, "step": 4973 }, { "epoch": 0.8058976020738821, "grad_norm": 0.917666494846344, "learning_rate": 4.289890056549603e-06, "loss": 0.1395, "step": 4974 }, { "epoch": 0.8060596241088788, "grad_norm": 0.9180327653884888, "learning_rate": 4.2895847273303705e-06, "loss": 0.1427, "step": 4975 }, { "epoch": 0.8062216461438756, "grad_norm": 0.902094841003418, "learning_rate": 4.289279343354178e-06, "loss": 0.1421, "step": 4976 }, { "epoch": 0.8063836681788723, "grad_norm": 0.8890548944473267, "learning_rate": 4.288973904630366e-06, "loss": 0.134, "step": 4977 }, { "epoch": 0.8065456902138691, "grad_norm": 0.8452367186546326, "learning_rate": 4.288668411168283e-06, "loss": 0.1293, "step": 4978 }, { "epoch": 0.8067077122488658, "grad_norm": 0.9811566472053528, "learning_rate": 4.288362862977274e-06, "loss": 0.1597, "step": 4979 }, { "epoch": 0.8068697342838627, "grad_norm": 0.9972265958786011, "learning_rate": 4.28805726006669e-06, "loss": 0.1607, "step": 4980 }, { "epoch": 0.8070317563188594, "grad_norm": 0.9556589722633362, "learning_rate": 4.287751602445881e-06, "loss": 0.1415, "step": 4981 }, { "epoch": 0.8071937783538561, "grad_norm": 0.9611114859580994, "learning_rate": 4.287445890124198e-06, "loss": 0.1601, "step": 4982 }, { "epoch": 0.8073558003888529, "grad_norm": 0.8882191181182861, "learning_rate": 4.287140123110998e-06, "loss": 0.1359, "step": 4983 }, { "epoch": 0.8075178224238496, "grad_norm": 0.9331012964248657, "learning_rate": 4.286834301415634e-06, "loss": 0.1498, "step": 4984 }, { "epoch": 0.8076798444588464, "grad_norm": 0.8011325001716614, "learning_rate": 4.286528425047464e-06, "loss": 0.1285, "step": 4985 }, { "epoch": 0.8078418664938432, "grad_norm": 0.9134606719017029, "learning_rate": 4.286222494015848e-06, "loss": 0.155, "step": 4986 }, { "epoch": 0.80800388852884, "grad_norm": 0.8747284412384033, "learning_rate": 4.285916508330146e-06, "loss": 0.143, "step": 4987 }, { "epoch": 0.8081659105638367, "grad_norm": 0.8503855466842651, "learning_rate": 4.285610467999722e-06, "loss": 0.1233, "step": 4988 }, { "epoch": 0.8083279325988334, "grad_norm": 0.8365163207054138, "learning_rate": 4.285304373033938e-06, "loss": 0.1299, "step": 4989 }, { "epoch": 0.8084899546338302, "grad_norm": 0.7736684083938599, "learning_rate": 4.28499822344216e-06, "loss": 0.1336, "step": 4990 }, { "epoch": 0.8086519766688269, "grad_norm": 0.7120151519775391, "learning_rate": 4.284692019233756e-06, "loss": 0.1262, "step": 4991 }, { "epoch": 0.8088139987038238, "grad_norm": 0.987061083316803, "learning_rate": 4.2843857604180955e-06, "loss": 0.1582, "step": 4992 }, { "epoch": 0.8089760207388205, "grad_norm": 0.804357647895813, "learning_rate": 4.2840794470045484e-06, "loss": 0.1307, "step": 4993 }, { "epoch": 0.8091380427738173, "grad_norm": 0.9282926917076111, "learning_rate": 4.283773079002488e-06, "loss": 0.1554, "step": 4994 }, { "epoch": 0.809300064808814, "grad_norm": 0.8444812297821045, "learning_rate": 4.283466656421289e-06, "loss": 0.1413, "step": 4995 }, { "epoch": 0.8094620868438107, "grad_norm": 0.9229618310928345, "learning_rate": 4.283160179270325e-06, "loss": 0.1442, "step": 4996 }, { "epoch": 0.8096241088788075, "grad_norm": 0.8210486173629761, "learning_rate": 4.282853647558974e-06, "loss": 0.1277, "step": 4997 }, { "epoch": 0.8097861309138042, "grad_norm": 0.8737907409667969, "learning_rate": 4.282547061296618e-06, "loss": 0.1377, "step": 4998 }, { "epoch": 0.8099481529488011, "grad_norm": 0.8875065445899963, "learning_rate": 4.2822404204926334e-06, "loss": 0.1343, "step": 4999 }, { "epoch": 0.8101101749837978, "grad_norm": 0.8774368762969971, "learning_rate": 4.281933725156406e-06, "loss": 0.1423, "step": 5000 }, { "epoch": 0.8102721970187946, "grad_norm": 0.8778204917907715, "learning_rate": 4.281626975297319e-06, "loss": 0.1279, "step": 5001 }, { "epoch": 0.8104342190537913, "grad_norm": 0.8505150079727173, "learning_rate": 4.281320170924758e-06, "loss": 0.1398, "step": 5002 }, { "epoch": 0.810596241088788, "grad_norm": 0.7801287770271301, "learning_rate": 4.281013312048109e-06, "loss": 0.1218, "step": 5003 }, { "epoch": 0.8107582631237849, "grad_norm": 0.9131519794464111, "learning_rate": 4.280706398676764e-06, "loss": 0.1413, "step": 5004 }, { "epoch": 0.8109202851587816, "grad_norm": 0.8857017159461975, "learning_rate": 4.280399430820112e-06, "loss": 0.1381, "step": 5005 }, { "epoch": 0.8110823071937784, "grad_norm": 0.8741483092308044, "learning_rate": 4.2800924084875465e-06, "loss": 0.1353, "step": 5006 }, { "epoch": 0.8112443292287751, "grad_norm": 0.816307783126831, "learning_rate": 4.27978533168846e-06, "loss": 0.1354, "step": 5007 }, { "epoch": 0.8114063512637719, "grad_norm": 0.8606013059616089, "learning_rate": 4.27947820043225e-06, "loss": 0.1452, "step": 5008 }, { "epoch": 0.8115683732987686, "grad_norm": 0.999919593334198, "learning_rate": 4.279171014728314e-06, "loss": 0.1466, "step": 5009 }, { "epoch": 0.8117303953337653, "grad_norm": 0.8366535305976868, "learning_rate": 4.278863774586049e-06, "loss": 0.1267, "step": 5010 }, { "epoch": 0.8118924173687622, "grad_norm": 0.9078406691551208, "learning_rate": 4.278556480014858e-06, "loss": 0.1422, "step": 5011 }, { "epoch": 0.8120544394037589, "grad_norm": 0.9016123414039612, "learning_rate": 4.2782491310241426e-06, "loss": 0.1574, "step": 5012 }, { "epoch": 0.8122164614387557, "grad_norm": 0.7560967206954956, "learning_rate": 4.277941727623307e-06, "loss": 0.1211, "step": 5013 }, { "epoch": 0.8123784834737524, "grad_norm": 0.8945764303207397, "learning_rate": 4.2776342698217575e-06, "loss": 0.148, "step": 5014 }, { "epoch": 0.8125405055087492, "grad_norm": 0.8035917282104492, "learning_rate": 4.277326757628901e-06, "loss": 0.1312, "step": 5015 }, { "epoch": 0.812702527543746, "grad_norm": 0.925362229347229, "learning_rate": 4.277019191054146e-06, "loss": 0.1421, "step": 5016 }, { "epoch": 0.8128645495787427, "grad_norm": 0.8655949831008911, "learning_rate": 4.276711570106905e-06, "loss": 0.1454, "step": 5017 }, { "epoch": 0.8130265716137395, "grad_norm": 0.9567270874977112, "learning_rate": 4.276403894796589e-06, "loss": 0.1528, "step": 5018 }, { "epoch": 0.8131885936487362, "grad_norm": 0.8218672275543213, "learning_rate": 4.276096165132613e-06, "loss": 0.1435, "step": 5019 }, { "epoch": 0.813350615683733, "grad_norm": 0.9071885943412781, "learning_rate": 4.275788381124393e-06, "loss": 0.1343, "step": 5020 }, { "epoch": 0.8135126377187297, "grad_norm": 0.801608681678772, "learning_rate": 4.2754805427813455e-06, "loss": 0.138, "step": 5021 }, { "epoch": 0.8136746597537265, "grad_norm": 0.960382878780365, "learning_rate": 4.275172650112889e-06, "loss": 0.159, "step": 5022 }, { "epoch": 0.8138366817887233, "grad_norm": 0.8618503212928772, "learning_rate": 4.274864703128446e-06, "loss": 0.1346, "step": 5023 }, { "epoch": 0.81399870382372, "grad_norm": 0.8866019248962402, "learning_rate": 4.274556701837438e-06, "loss": 0.1402, "step": 5024 }, { "epoch": 0.8141607258587168, "grad_norm": 0.8554643988609314, "learning_rate": 4.27424864624929e-06, "loss": 0.1376, "step": 5025 }, { "epoch": 0.8143227478937135, "grad_norm": 0.8216258883476257, "learning_rate": 4.273940536373426e-06, "loss": 0.1221, "step": 5026 }, { "epoch": 0.8144847699287103, "grad_norm": 1.0153284072875977, "learning_rate": 4.273632372219274e-06, "loss": 0.1503, "step": 5027 }, { "epoch": 0.814646791963707, "grad_norm": 0.8892536759376526, "learning_rate": 4.273324153796264e-06, "loss": 0.1476, "step": 5028 }, { "epoch": 0.8148088139987039, "grad_norm": 0.9032856225967407, "learning_rate": 4.273015881113827e-06, "loss": 0.1476, "step": 5029 }, { "epoch": 0.8149708360337006, "grad_norm": 0.8178720474243164, "learning_rate": 4.2727075541813945e-06, "loss": 0.136, "step": 5030 }, { "epoch": 0.8151328580686974, "grad_norm": 0.8843294382095337, "learning_rate": 4.2723991730084e-06, "loss": 0.152, "step": 5031 }, { "epoch": 0.8152948801036941, "grad_norm": 0.8823347091674805, "learning_rate": 4.27209073760428e-06, "loss": 0.1439, "step": 5032 }, { "epoch": 0.8154569021386908, "grad_norm": 0.886072039604187, "learning_rate": 4.271782247978473e-06, "loss": 0.1416, "step": 5033 }, { "epoch": 0.8156189241736876, "grad_norm": 0.8647177815437317, "learning_rate": 4.271473704140415e-06, "loss": 0.1335, "step": 5034 }, { "epoch": 0.8157809462086844, "grad_norm": 0.8254427313804626, "learning_rate": 4.271165106099549e-06, "loss": 0.1284, "step": 5035 }, { "epoch": 0.8159429682436812, "grad_norm": 0.8761928081512451, "learning_rate": 4.270856453865318e-06, "loss": 0.1494, "step": 5036 }, { "epoch": 0.8161049902786779, "grad_norm": 0.8046637773513794, "learning_rate": 4.2705477474471645e-06, "loss": 0.1394, "step": 5037 }, { "epoch": 0.8162670123136747, "grad_norm": 0.8774124383926392, "learning_rate": 4.270238986854534e-06, "loss": 0.1426, "step": 5038 }, { "epoch": 0.8164290343486714, "grad_norm": 0.7928304672241211, "learning_rate": 4.269930172096875e-06, "loss": 0.1345, "step": 5039 }, { "epoch": 0.8165910563836681, "grad_norm": 0.7432117462158203, "learning_rate": 4.2696213031836355e-06, "loss": 0.1129, "step": 5040 }, { "epoch": 0.816753078418665, "grad_norm": 0.860908031463623, "learning_rate": 4.269312380124268e-06, "loss": 0.1405, "step": 5041 }, { "epoch": 0.8169151004536617, "grad_norm": 0.9895366430282593, "learning_rate": 4.2690034029282214e-06, "loss": 0.1602, "step": 5042 }, { "epoch": 0.8170771224886585, "grad_norm": 0.9045162200927734, "learning_rate": 4.268694371604952e-06, "loss": 0.1551, "step": 5043 }, { "epoch": 0.8172391445236552, "grad_norm": 0.8091841340065002, "learning_rate": 4.268385286163915e-06, "loss": 0.1301, "step": 5044 }, { "epoch": 0.817401166558652, "grad_norm": 0.8731530904769897, "learning_rate": 4.2680761466145685e-06, "loss": 0.1509, "step": 5045 }, { "epoch": 0.8175631885936487, "grad_norm": 1.0045936107635498, "learning_rate": 4.267766952966369e-06, "loss": 0.1587, "step": 5046 }, { "epoch": 0.8177252106286454, "grad_norm": 0.7668003439903259, "learning_rate": 4.267457705228781e-06, "loss": 0.1308, "step": 5047 }, { "epoch": 0.8178872326636423, "grad_norm": 0.9017691612243652, "learning_rate": 4.267148403411261e-06, "loss": 0.1451, "step": 5048 }, { "epoch": 0.818049254698639, "grad_norm": 0.8749794363975525, "learning_rate": 4.266839047523279e-06, "loss": 0.1391, "step": 5049 }, { "epoch": 0.8182112767336358, "grad_norm": 0.7766733765602112, "learning_rate": 4.266529637574297e-06, "loss": 0.1278, "step": 5050 }, { "epoch": 0.8183732987686325, "grad_norm": 0.8146058320999146, "learning_rate": 4.266220173573783e-06, "loss": 0.1348, "step": 5051 }, { "epoch": 0.8185353208036293, "grad_norm": 0.8790404796600342, "learning_rate": 4.265910655531206e-06, "loss": 0.1364, "step": 5052 }, { "epoch": 0.818697342838626, "grad_norm": 0.8823887705802917, "learning_rate": 4.265601083456036e-06, "loss": 0.1339, "step": 5053 }, { "epoch": 0.8188593648736228, "grad_norm": 0.8041099309921265, "learning_rate": 4.265291457357746e-06, "loss": 0.1253, "step": 5054 }, { "epoch": 0.8190213869086196, "grad_norm": 0.9392186403274536, "learning_rate": 4.264981777245809e-06, "loss": 0.1374, "step": 5055 }, { "epoch": 0.8191834089436163, "grad_norm": 0.8520307540893555, "learning_rate": 4.2646720431297006e-06, "loss": 0.1322, "step": 5056 }, { "epoch": 0.8193454309786131, "grad_norm": 0.8722667694091797, "learning_rate": 4.264362255018898e-06, "loss": 0.1467, "step": 5057 }, { "epoch": 0.8195074530136098, "grad_norm": 0.8683182001113892, "learning_rate": 4.2640524129228815e-06, "loss": 0.1249, "step": 5058 }, { "epoch": 0.8196694750486067, "grad_norm": 0.8108190298080444, "learning_rate": 4.263742516851128e-06, "loss": 0.125, "step": 5059 }, { "epoch": 0.8198314970836034, "grad_norm": 1.2350842952728271, "learning_rate": 4.263432566813123e-06, "loss": 0.159, "step": 5060 }, { "epoch": 0.8199935191186001, "grad_norm": 1.0421861410140991, "learning_rate": 4.263122562818349e-06, "loss": 0.1681, "step": 5061 }, { "epoch": 0.8201555411535969, "grad_norm": 0.8750341534614563, "learning_rate": 4.262812504876291e-06, "loss": 0.1288, "step": 5062 }, { "epoch": 0.8203175631885936, "grad_norm": 0.8388615846633911, "learning_rate": 4.262502392996436e-06, "loss": 0.1289, "step": 5063 }, { "epoch": 0.8204795852235904, "grad_norm": 0.865910530090332, "learning_rate": 4.262192227188273e-06, "loss": 0.1329, "step": 5064 }, { "epoch": 0.8206416072585871, "grad_norm": 0.9594810009002686, "learning_rate": 4.261882007461292e-06, "loss": 0.153, "step": 5065 }, { "epoch": 0.820803629293584, "grad_norm": 0.8188930153846741, "learning_rate": 4.261571733824986e-06, "loss": 0.1377, "step": 5066 }, { "epoch": 0.8209656513285807, "grad_norm": 0.9599230289459229, "learning_rate": 4.261261406288847e-06, "loss": 0.1379, "step": 5067 }, { "epoch": 0.8211276733635774, "grad_norm": 0.9220377802848816, "learning_rate": 4.260951024862372e-06, "loss": 0.1523, "step": 5068 }, { "epoch": 0.8212896953985742, "grad_norm": 0.8499447703361511, "learning_rate": 4.2606405895550565e-06, "loss": 0.1336, "step": 5069 }, { "epoch": 0.8214517174335709, "grad_norm": 0.9115306735038757, "learning_rate": 4.2603301003763994e-06, "loss": 0.1438, "step": 5070 }, { "epoch": 0.8216137394685677, "grad_norm": 0.9516029357910156, "learning_rate": 4.260019557335902e-06, "loss": 0.153, "step": 5071 }, { "epoch": 0.8217757615035645, "grad_norm": 0.7999269962310791, "learning_rate": 4.259708960443065e-06, "loss": 0.1234, "step": 5072 }, { "epoch": 0.8219377835385613, "grad_norm": 0.8605833053588867, "learning_rate": 4.259398309707392e-06, "loss": 0.1404, "step": 5073 }, { "epoch": 0.822099805573558, "grad_norm": 0.8901498317718506, "learning_rate": 4.259087605138388e-06, "loss": 0.1481, "step": 5074 }, { "epoch": 0.8222618276085548, "grad_norm": 0.8830893039703369, "learning_rate": 4.258776846745561e-06, "loss": 0.1479, "step": 5075 }, { "epoch": 0.8224238496435515, "grad_norm": 0.9161304831504822, "learning_rate": 4.2584660345384176e-06, "loss": 0.1487, "step": 5076 }, { "epoch": 0.8225858716785482, "grad_norm": 0.8231655955314636, "learning_rate": 4.25815516852647e-06, "loss": 0.1412, "step": 5077 }, { "epoch": 0.8227478937135451, "grad_norm": 0.7701215744018555, "learning_rate": 4.257844248719229e-06, "loss": 0.1308, "step": 5078 }, { "epoch": 0.8229099157485418, "grad_norm": 0.9185945987701416, "learning_rate": 4.257533275126206e-06, "loss": 0.1517, "step": 5079 }, { "epoch": 0.8230719377835386, "grad_norm": 0.9465458989143372, "learning_rate": 4.25722224775692e-06, "loss": 0.1686, "step": 5080 }, { "epoch": 0.8232339598185353, "grad_norm": 0.8534404635429382, "learning_rate": 4.256911166620885e-06, "loss": 0.1425, "step": 5081 }, { "epoch": 0.8233959818535321, "grad_norm": 0.8880043625831604, "learning_rate": 4.25660003172762e-06, "loss": 0.1425, "step": 5082 }, { "epoch": 0.8235580038885288, "grad_norm": 1.02664053440094, "learning_rate": 4.256288843086645e-06, "loss": 0.1425, "step": 5083 }, { "epoch": 0.8237200259235256, "grad_norm": 0.8933080434799194, "learning_rate": 4.255977600707481e-06, "loss": 0.1529, "step": 5084 }, { "epoch": 0.8238820479585224, "grad_norm": 0.9547656774520874, "learning_rate": 4.255666304599653e-06, "loss": 0.157, "step": 5085 }, { "epoch": 0.8240440699935191, "grad_norm": 0.8259825706481934, "learning_rate": 4.255354954772684e-06, "loss": 0.1333, "step": 5086 }, { "epoch": 0.8242060920285159, "grad_norm": 0.7905333638191223, "learning_rate": 4.255043551236101e-06, "loss": 0.1245, "step": 5087 }, { "epoch": 0.8243681140635126, "grad_norm": 0.8866136074066162, "learning_rate": 4.2547320939994315e-06, "loss": 0.147, "step": 5088 }, { "epoch": 0.8245301360985094, "grad_norm": 1.0445470809936523, "learning_rate": 4.254420583072209e-06, "loss": 0.1631, "step": 5089 }, { "epoch": 0.8246921581335062, "grad_norm": 0.865193247795105, "learning_rate": 4.25410901846396e-06, "loss": 0.1331, "step": 5090 }, { "epoch": 0.8248541801685029, "grad_norm": 0.8681652545928955, "learning_rate": 4.25379740018422e-06, "loss": 0.1356, "step": 5091 }, { "epoch": 0.8250162022034997, "grad_norm": 0.803639829158783, "learning_rate": 4.253485728242525e-06, "loss": 0.1306, "step": 5092 }, { "epoch": 0.8251782242384964, "grad_norm": 0.8816442489624023, "learning_rate": 4.253174002648409e-06, "loss": 0.1355, "step": 5093 }, { "epoch": 0.8253402462734932, "grad_norm": 0.8702470660209656, "learning_rate": 4.252862223411412e-06, "loss": 0.1454, "step": 5094 }, { "epoch": 0.8255022683084899, "grad_norm": 0.7982497811317444, "learning_rate": 4.2525503905410715e-06, "loss": 0.1281, "step": 5095 }, { "epoch": 0.8256642903434868, "grad_norm": 0.8184255957603455, "learning_rate": 4.252238504046931e-06, "loss": 0.1397, "step": 5096 }, { "epoch": 0.8258263123784835, "grad_norm": 0.8369611501693726, "learning_rate": 4.251926563938533e-06, "loss": 0.1266, "step": 5097 }, { "epoch": 0.8259883344134802, "grad_norm": 0.7753140926361084, "learning_rate": 4.251614570225421e-06, "loss": 0.1157, "step": 5098 }, { "epoch": 0.826150356448477, "grad_norm": 1.0960832834243774, "learning_rate": 4.251302522917142e-06, "loss": 0.1757, "step": 5099 }, { "epoch": 0.8263123784834737, "grad_norm": 0.8341733813285828, "learning_rate": 4.250990422023243e-06, "loss": 0.1455, "step": 5100 }, { "epoch": 0.8264744005184705, "grad_norm": 0.8899952173233032, "learning_rate": 4.250678267553277e-06, "loss": 0.1514, "step": 5101 }, { "epoch": 0.8266364225534673, "grad_norm": 0.850158154964447, "learning_rate": 4.250366059516791e-06, "loss": 0.1365, "step": 5102 }, { "epoch": 0.8267984445884641, "grad_norm": 0.8187429904937744, "learning_rate": 4.250053797923339e-06, "loss": 0.1324, "step": 5103 }, { "epoch": 0.8269604666234608, "grad_norm": 0.9219319224357605, "learning_rate": 4.249741482782476e-06, "loss": 0.1514, "step": 5104 }, { "epoch": 0.8271224886584575, "grad_norm": 0.9627091884613037, "learning_rate": 4.249429114103758e-06, "loss": 0.1689, "step": 5105 }, { "epoch": 0.8272845106934543, "grad_norm": 0.7787367105484009, "learning_rate": 4.249116691896743e-06, "loss": 0.1235, "step": 5106 }, { "epoch": 0.827446532728451, "grad_norm": 0.8211469054222107, "learning_rate": 4.24880421617099e-06, "loss": 0.1301, "step": 5107 }, { "epoch": 0.8276085547634479, "grad_norm": 0.8499755859375, "learning_rate": 4.248491686936059e-06, "loss": 0.138, "step": 5108 }, { "epoch": 0.8277705767984446, "grad_norm": 0.8932889699935913, "learning_rate": 4.248179104201515e-06, "loss": 0.1618, "step": 5109 }, { "epoch": 0.8279325988334414, "grad_norm": 0.8149641156196594, "learning_rate": 4.2478664679769196e-06, "loss": 0.1358, "step": 5110 }, { "epoch": 0.8280946208684381, "grad_norm": 0.8986614942550659, "learning_rate": 4.247553778271841e-06, "loss": 0.1365, "step": 5111 }, { "epoch": 0.8282566429034348, "grad_norm": 0.9837633371353149, "learning_rate": 4.247241035095846e-06, "loss": 0.1583, "step": 5112 }, { "epoch": 0.8284186649384316, "grad_norm": 0.7755539417266846, "learning_rate": 4.246928238458503e-06, "loss": 0.1245, "step": 5113 }, { "epoch": 0.8285806869734283, "grad_norm": 0.8996148109436035, "learning_rate": 4.246615388369384e-06, "loss": 0.1397, "step": 5114 }, { "epoch": 0.8287427090084252, "grad_norm": 1.0767894983291626, "learning_rate": 4.24630248483806e-06, "loss": 0.1456, "step": 5115 }, { "epoch": 0.8289047310434219, "grad_norm": 0.9750000238418579, "learning_rate": 4.245989527874107e-06, "loss": 0.1515, "step": 5116 }, { "epoch": 0.8290667530784187, "grad_norm": 0.9112747311592102, "learning_rate": 4.245676517487098e-06, "loss": 0.1469, "step": 5117 }, { "epoch": 0.8292287751134154, "grad_norm": 0.7812881469726562, "learning_rate": 4.245363453686614e-06, "loss": 0.1129, "step": 5118 }, { "epoch": 0.8293907971484121, "grad_norm": 0.838598370552063, "learning_rate": 4.245050336482231e-06, "loss": 0.1267, "step": 5119 }, { "epoch": 0.829552819183409, "grad_norm": 0.9569753408432007, "learning_rate": 4.24473716588353e-06, "loss": 0.1627, "step": 5120 }, { "epoch": 0.8297148412184057, "grad_norm": 0.9220561385154724, "learning_rate": 4.244423941900095e-06, "loss": 0.1537, "step": 5121 }, { "epoch": 0.8298768632534025, "grad_norm": 0.9660263061523438, "learning_rate": 4.2441106645415085e-06, "loss": 0.1571, "step": 5122 }, { "epoch": 0.8300388852883992, "grad_norm": 0.9250351190567017, "learning_rate": 4.243797333817356e-06, "loss": 0.1553, "step": 5123 }, { "epoch": 0.830200907323396, "grad_norm": 0.8232597708702087, "learning_rate": 4.243483949737225e-06, "loss": 0.1277, "step": 5124 }, { "epoch": 0.8303629293583927, "grad_norm": 0.9091588854789734, "learning_rate": 4.2431705123107045e-06, "loss": 0.1436, "step": 5125 }, { "epoch": 0.8305249513933896, "grad_norm": 0.9420716762542725, "learning_rate": 4.242857021547385e-06, "loss": 0.1722, "step": 5126 }, { "epoch": 0.8306869734283863, "grad_norm": 0.8245367407798767, "learning_rate": 4.242543477456858e-06, "loss": 0.1339, "step": 5127 }, { "epoch": 0.830848995463383, "grad_norm": 0.8320567011833191, "learning_rate": 4.242229880048718e-06, "loss": 0.1509, "step": 5128 }, { "epoch": 0.8310110174983798, "grad_norm": 0.8776043057441711, "learning_rate": 4.241916229332559e-06, "loss": 0.148, "step": 5129 }, { "epoch": 0.8311730395333765, "grad_norm": 0.8687680959701538, "learning_rate": 4.241602525317979e-06, "loss": 0.1468, "step": 5130 }, { "epoch": 0.8313350615683733, "grad_norm": 1.0185246467590332, "learning_rate": 4.241288768014576e-06, "loss": 0.1771, "step": 5131 }, { "epoch": 0.83149708360337, "grad_norm": 0.9902436137199402, "learning_rate": 4.240974957431951e-06, "loss": 0.1443, "step": 5132 }, { "epoch": 0.8316591056383669, "grad_norm": 0.865923285484314, "learning_rate": 4.240661093579705e-06, "loss": 0.1428, "step": 5133 }, { "epoch": 0.8318211276733636, "grad_norm": 0.8009111881256104, "learning_rate": 4.240347176467442e-06, "loss": 0.1209, "step": 5134 }, { "epoch": 0.8319831497083603, "grad_norm": 0.8269287943840027, "learning_rate": 4.240033206104767e-06, "loss": 0.1354, "step": 5135 }, { "epoch": 0.8321451717433571, "grad_norm": 0.8295719623565674, "learning_rate": 4.2397191825012865e-06, "loss": 0.1359, "step": 5136 }, { "epoch": 0.8323071937783538, "grad_norm": 0.9293026328086853, "learning_rate": 4.23940510566661e-06, "loss": 0.1586, "step": 5137 }, { "epoch": 0.8324692158133506, "grad_norm": 0.8136693239212036, "learning_rate": 4.239090975610346e-06, "loss": 0.1386, "step": 5138 }, { "epoch": 0.8326312378483474, "grad_norm": 0.8754627108573914, "learning_rate": 4.238776792342106e-06, "loss": 0.1393, "step": 5139 }, { "epoch": 0.8327932598833442, "grad_norm": 0.8677736520767212, "learning_rate": 4.2384625558715045e-06, "loss": 0.1352, "step": 5140 }, { "epoch": 0.8329552819183409, "grad_norm": 0.8522474765777588, "learning_rate": 4.2381482662081555e-06, "loss": 0.1534, "step": 5141 }, { "epoch": 0.8331173039533376, "grad_norm": 0.7467418313026428, "learning_rate": 4.237833923361676e-06, "loss": 0.1175, "step": 5142 }, { "epoch": 0.8332793259883344, "grad_norm": 0.905116856098175, "learning_rate": 4.237519527341684e-06, "loss": 0.1425, "step": 5143 }, { "epoch": 0.8334413480233311, "grad_norm": 1.009063720703125, "learning_rate": 4.237205078157799e-06, "loss": 0.1333, "step": 5144 }, { "epoch": 0.833603370058328, "grad_norm": 0.7382147908210754, "learning_rate": 4.2368905758196436e-06, "loss": 0.1214, "step": 5145 }, { "epoch": 0.8337653920933247, "grad_norm": 1.0095723867416382, "learning_rate": 4.236576020336838e-06, "loss": 0.1717, "step": 5146 }, { "epoch": 0.8339274141283215, "grad_norm": 0.9047743082046509, "learning_rate": 4.236261411719009e-06, "loss": 0.1274, "step": 5147 }, { "epoch": 0.8340894361633182, "grad_norm": 0.8646296262741089, "learning_rate": 4.235946749975783e-06, "loss": 0.152, "step": 5148 }, { "epoch": 0.8342514581983149, "grad_norm": 0.8817318677902222, "learning_rate": 4.235632035116788e-06, "loss": 0.1393, "step": 5149 }, { "epoch": 0.8344134802333117, "grad_norm": 0.9330871105194092, "learning_rate": 4.235317267151652e-06, "loss": 0.1393, "step": 5150 }, { "epoch": 0.8345755022683085, "grad_norm": 0.9412763714790344, "learning_rate": 4.235002446090007e-06, "loss": 0.167, "step": 5151 }, { "epoch": 0.8347375243033053, "grad_norm": 0.8925046920776367, "learning_rate": 4.234687571941486e-06, "loss": 0.1515, "step": 5152 }, { "epoch": 0.834899546338302, "grad_norm": 0.7668562531471252, "learning_rate": 4.234372644715723e-06, "loss": 0.1195, "step": 5153 }, { "epoch": 0.8350615683732988, "grad_norm": 0.8491714596748352, "learning_rate": 4.234057664422354e-06, "loss": 0.1408, "step": 5154 }, { "epoch": 0.8352235904082955, "grad_norm": 0.8507294654846191, "learning_rate": 4.233742631071017e-06, "loss": 0.1333, "step": 5155 }, { "epoch": 0.8353856124432922, "grad_norm": 0.9354631900787354, "learning_rate": 4.2334275446713515e-06, "loss": 0.1507, "step": 5156 }, { "epoch": 0.8355476344782891, "grad_norm": 0.8924568295478821, "learning_rate": 4.233112405232998e-06, "loss": 0.1431, "step": 5157 }, { "epoch": 0.8357096565132858, "grad_norm": 0.8328578472137451, "learning_rate": 4.232797212765598e-06, "loss": 0.1397, "step": 5158 }, { "epoch": 0.8358716785482826, "grad_norm": 0.8072636127471924, "learning_rate": 4.2324819672787976e-06, "loss": 0.1458, "step": 5159 }, { "epoch": 0.8360337005832793, "grad_norm": 0.813198447227478, "learning_rate": 4.2321666687822405e-06, "loss": 0.1361, "step": 5160 }, { "epoch": 0.8361957226182761, "grad_norm": 0.8552424311637878, "learning_rate": 4.231851317285576e-06, "loss": 0.1433, "step": 5161 }, { "epoch": 0.8363577446532728, "grad_norm": 0.8645519018173218, "learning_rate": 4.231535912798452e-06, "loss": 0.1511, "step": 5162 }, { "epoch": 0.8365197666882696, "grad_norm": 0.8857008814811707, "learning_rate": 4.2312204553305195e-06, "loss": 0.1396, "step": 5163 }, { "epoch": 0.8366817887232664, "grad_norm": 0.9378755688667297, "learning_rate": 4.23090494489143e-06, "loss": 0.1589, "step": 5164 }, { "epoch": 0.8368438107582631, "grad_norm": 0.8300303220748901, "learning_rate": 4.230589381490837e-06, "loss": 0.1385, "step": 5165 }, { "epoch": 0.8370058327932599, "grad_norm": 0.8071770668029785, "learning_rate": 4.230273765138399e-06, "loss": 0.1184, "step": 5166 }, { "epoch": 0.8371678548282566, "grad_norm": 0.7939083576202393, "learning_rate": 4.229958095843769e-06, "loss": 0.1276, "step": 5167 }, { "epoch": 0.8373298768632534, "grad_norm": 0.8759216070175171, "learning_rate": 4.229642373616609e-06, "loss": 0.1323, "step": 5168 }, { "epoch": 0.8374918988982502, "grad_norm": 0.8885746598243713, "learning_rate": 4.2293265984665775e-06, "loss": 0.159, "step": 5169 }, { "epoch": 0.837653920933247, "grad_norm": 0.9272924065589905, "learning_rate": 4.229010770403337e-06, "loss": 0.1432, "step": 5170 }, { "epoch": 0.8378159429682437, "grad_norm": 0.7635154724121094, "learning_rate": 4.228694889436552e-06, "loss": 0.1129, "step": 5171 }, { "epoch": 0.8379779650032404, "grad_norm": 0.9018572568893433, "learning_rate": 4.228378955575885e-06, "loss": 0.1413, "step": 5172 }, { "epoch": 0.8381399870382372, "grad_norm": 0.8228177428245544, "learning_rate": 4.228062968831006e-06, "loss": 0.1292, "step": 5173 }, { "epoch": 0.8383020090732339, "grad_norm": 0.7802498936653137, "learning_rate": 4.227746929211582e-06, "loss": 0.1169, "step": 5174 }, { "epoch": 0.8384640311082308, "grad_norm": 0.8804471492767334, "learning_rate": 4.227430836727282e-06, "loss": 0.1482, "step": 5175 }, { "epoch": 0.8386260531432275, "grad_norm": 0.8976423740386963, "learning_rate": 4.227114691387779e-06, "loss": 0.1446, "step": 5176 }, { "epoch": 0.8387880751782243, "grad_norm": 0.8988388180732727, "learning_rate": 4.226798493202746e-06, "loss": 0.1433, "step": 5177 }, { "epoch": 0.838950097213221, "grad_norm": 0.8048079013824463, "learning_rate": 4.226482242181859e-06, "loss": 0.1248, "step": 5178 }, { "epoch": 0.8391121192482177, "grad_norm": 0.9093589782714844, "learning_rate": 4.226165938334792e-06, "loss": 0.1449, "step": 5179 }, { "epoch": 0.8392741412832145, "grad_norm": 0.9186120629310608, "learning_rate": 4.225849581671225e-06, "loss": 0.1496, "step": 5180 }, { "epoch": 0.8394361633182112, "grad_norm": 0.9053645730018616, "learning_rate": 4.225533172200837e-06, "loss": 0.1418, "step": 5181 }, { "epoch": 0.8395981853532081, "grad_norm": 0.8142003417015076, "learning_rate": 4.225216709933309e-06, "loss": 0.132, "step": 5182 }, { "epoch": 0.8397602073882048, "grad_norm": 0.8198449611663818, "learning_rate": 4.224900194878326e-06, "loss": 0.1387, "step": 5183 }, { "epoch": 0.8399222294232016, "grad_norm": 0.8121005892753601, "learning_rate": 4.2245836270455706e-06, "loss": 0.1237, "step": 5184 }, { "epoch": 0.8400842514581983, "grad_norm": 0.8428120017051697, "learning_rate": 4.22426700644473e-06, "loss": 0.1441, "step": 5185 }, { "epoch": 0.840246273493195, "grad_norm": 0.9493716955184937, "learning_rate": 4.223950333085492e-06, "loss": 0.1443, "step": 5186 }, { "epoch": 0.8404082955281919, "grad_norm": 0.794461190700531, "learning_rate": 4.2236336069775445e-06, "loss": 0.1181, "step": 5187 }, { "epoch": 0.8405703175631886, "grad_norm": 0.9470990300178528, "learning_rate": 4.223316828130581e-06, "loss": 0.1715, "step": 5188 }, { "epoch": 0.8407323395981854, "grad_norm": 0.7863299250602722, "learning_rate": 4.222999996554291e-06, "loss": 0.1198, "step": 5189 }, { "epoch": 0.8408943616331821, "grad_norm": 0.916057288646698, "learning_rate": 4.222683112258372e-06, "loss": 0.148, "step": 5190 }, { "epoch": 0.8410563836681789, "grad_norm": 0.8652752041816711, "learning_rate": 4.222366175252519e-06, "loss": 0.1299, "step": 5191 }, { "epoch": 0.8412184057031756, "grad_norm": 0.8845123648643494, "learning_rate": 4.222049185546428e-06, "loss": 0.1356, "step": 5192 }, { "epoch": 0.8413804277381723, "grad_norm": 0.9572479724884033, "learning_rate": 4.2217321431498e-06, "loss": 0.1432, "step": 5193 }, { "epoch": 0.8415424497731692, "grad_norm": 0.885009765625, "learning_rate": 4.221415048072335e-06, "loss": 0.1171, "step": 5194 }, { "epoch": 0.8417044718081659, "grad_norm": 1.277252435684204, "learning_rate": 4.221097900323735e-06, "loss": 0.1719, "step": 5195 }, { "epoch": 0.8418664938431627, "grad_norm": 1.0181325674057007, "learning_rate": 4.220780699913704e-06, "loss": 0.18, "step": 5196 }, { "epoch": 0.8420285158781594, "grad_norm": 0.9635038375854492, "learning_rate": 4.220463446851948e-06, "loss": 0.1485, "step": 5197 }, { "epoch": 0.8421905379131562, "grad_norm": 0.8910326361656189, "learning_rate": 4.220146141148174e-06, "loss": 0.1497, "step": 5198 }, { "epoch": 0.842352559948153, "grad_norm": 0.8969202041625977, "learning_rate": 4.2198287828120905e-06, "loss": 0.1315, "step": 5199 }, { "epoch": 0.8425145819831497, "grad_norm": 0.8396177291870117, "learning_rate": 4.219511371853408e-06, "loss": 0.1371, "step": 5200 }, { "epoch": 0.8426766040181465, "grad_norm": 0.8654994964599609, "learning_rate": 4.21919390828184e-06, "loss": 0.1358, "step": 5201 }, { "epoch": 0.8428386260531432, "grad_norm": 0.877016544342041, "learning_rate": 4.2188763921070974e-06, "loss": 0.1539, "step": 5202 }, { "epoch": 0.84300064808814, "grad_norm": 0.7908318042755127, "learning_rate": 4.218558823338898e-06, "loss": 0.1257, "step": 5203 }, { "epoch": 0.8431626701231367, "grad_norm": 0.9762617349624634, "learning_rate": 4.2182412019869556e-06, "loss": 0.1717, "step": 5204 }, { "epoch": 0.8433246921581335, "grad_norm": 0.8188162446022034, "learning_rate": 4.217923528060992e-06, "loss": 0.1281, "step": 5205 }, { "epoch": 0.8434867141931303, "grad_norm": 0.8744212985038757, "learning_rate": 4.217605801570725e-06, "loss": 0.1317, "step": 5206 }, { "epoch": 0.843648736228127, "grad_norm": 1.0196107625961304, "learning_rate": 4.217288022525877e-06, "loss": 0.1875, "step": 5207 }, { "epoch": 0.8438107582631238, "grad_norm": 0.7780096530914307, "learning_rate": 4.216970190936171e-06, "loss": 0.1387, "step": 5208 }, { "epoch": 0.8439727802981205, "grad_norm": 0.9348713755607605, "learning_rate": 4.216652306811333e-06, "loss": 0.1568, "step": 5209 }, { "epoch": 0.8441348023331173, "grad_norm": 0.8681674599647522, "learning_rate": 4.2163343701610884e-06, "loss": 0.1436, "step": 5210 }, { "epoch": 0.844296824368114, "grad_norm": 0.8520421981811523, "learning_rate": 4.216016380995166e-06, "loss": 0.1438, "step": 5211 }, { "epoch": 0.8444588464031109, "grad_norm": 0.8671303391456604, "learning_rate": 4.215698339323294e-06, "loss": 0.1472, "step": 5212 }, { "epoch": 0.8446208684381076, "grad_norm": 0.9922376275062561, "learning_rate": 4.215380245155205e-06, "loss": 0.1798, "step": 5213 }, { "epoch": 0.8447828904731044, "grad_norm": 0.8721550703048706, "learning_rate": 4.215062098500632e-06, "loss": 0.1332, "step": 5214 }, { "epoch": 0.8449449125081011, "grad_norm": 0.8614236116409302, "learning_rate": 4.214743899369309e-06, "loss": 0.1504, "step": 5215 }, { "epoch": 0.8451069345430978, "grad_norm": 0.8543869853019714, "learning_rate": 4.214425647770972e-06, "loss": 0.1403, "step": 5216 }, { "epoch": 0.8452689565780946, "grad_norm": 0.7991150617599487, "learning_rate": 4.21410734371536e-06, "loss": 0.1359, "step": 5217 }, { "epoch": 0.8454309786130914, "grad_norm": 0.9248189926147461, "learning_rate": 4.213788987212211e-06, "loss": 0.1542, "step": 5218 }, { "epoch": 0.8455930006480882, "grad_norm": 0.950682520866394, "learning_rate": 4.213470578271265e-06, "loss": 0.169, "step": 5219 }, { "epoch": 0.8457550226830849, "grad_norm": 0.9078273177146912, "learning_rate": 4.213152116902267e-06, "loss": 0.1633, "step": 5220 }, { "epoch": 0.8459170447180817, "grad_norm": 0.8999932408332825, "learning_rate": 4.21283360311496e-06, "loss": 0.1556, "step": 5221 }, { "epoch": 0.8460790667530784, "grad_norm": 0.7840633988380432, "learning_rate": 4.212515036919089e-06, "loss": 0.1242, "step": 5222 }, { "epoch": 0.8462410887880751, "grad_norm": 0.8409323692321777, "learning_rate": 4.212196418324402e-06, "loss": 0.1342, "step": 5223 }, { "epoch": 0.846403110823072, "grad_norm": 0.7986196279525757, "learning_rate": 4.211877747340649e-06, "loss": 0.1334, "step": 5224 }, { "epoch": 0.8465651328580687, "grad_norm": 0.8227624297142029, "learning_rate": 4.211559023977579e-06, "loss": 0.1352, "step": 5225 }, { "epoch": 0.8467271548930655, "grad_norm": 0.7546223402023315, "learning_rate": 4.211240248244945e-06, "loss": 0.1311, "step": 5226 }, { "epoch": 0.8468891769280622, "grad_norm": 0.7613492012023926, "learning_rate": 4.2109214201525e-06, "loss": 0.1243, "step": 5227 }, { "epoch": 0.847051198963059, "grad_norm": 0.9470206499099731, "learning_rate": 4.21060253971e-06, "loss": 0.1578, "step": 5228 }, { "epoch": 0.8472132209980557, "grad_norm": 0.716661274433136, "learning_rate": 4.210283606927203e-06, "loss": 0.1045, "step": 5229 }, { "epoch": 0.8473752430330524, "grad_norm": 0.8112010359764099, "learning_rate": 4.2099646218138655e-06, "loss": 0.1199, "step": 5230 }, { "epoch": 0.8475372650680493, "grad_norm": 0.9128946661949158, "learning_rate": 4.209645584379748e-06, "loss": 0.1431, "step": 5231 }, { "epoch": 0.847699287103046, "grad_norm": 1.0120484828948975, "learning_rate": 4.209326494634614e-06, "loss": 0.1603, "step": 5232 }, { "epoch": 0.8478613091380428, "grad_norm": 0.7939407229423523, "learning_rate": 4.209007352588226e-06, "loss": 0.1269, "step": 5233 }, { "epoch": 0.8480233311730395, "grad_norm": 0.8421609401702881, "learning_rate": 4.208688158250348e-06, "loss": 0.1258, "step": 5234 }, { "epoch": 0.8481853532080363, "grad_norm": 0.8701285719871521, "learning_rate": 4.208368911630747e-06, "loss": 0.1282, "step": 5235 }, { "epoch": 0.848347375243033, "grad_norm": 0.9212756752967834, "learning_rate": 4.2080496127391914e-06, "loss": 0.13, "step": 5236 }, { "epoch": 0.8485093972780298, "grad_norm": 0.8270485997200012, "learning_rate": 4.207730261585452e-06, "loss": 0.1358, "step": 5237 }, { "epoch": 0.8486714193130266, "grad_norm": 0.8947330713272095, "learning_rate": 4.207410858179298e-06, "loss": 0.1462, "step": 5238 }, { "epoch": 0.8488334413480233, "grad_norm": 0.7907639741897583, "learning_rate": 4.207091402530504e-06, "loss": 0.1296, "step": 5239 }, { "epoch": 0.8489954633830201, "grad_norm": 0.956722617149353, "learning_rate": 4.206771894648846e-06, "loss": 0.157, "step": 5240 }, { "epoch": 0.8491574854180168, "grad_norm": 0.8899962306022644, "learning_rate": 4.206452334544096e-06, "loss": 0.1576, "step": 5241 }, { "epoch": 0.8493195074530137, "grad_norm": 0.8392747640609741, "learning_rate": 4.206132722226035e-06, "loss": 0.132, "step": 5242 }, { "epoch": 0.8494815294880104, "grad_norm": 0.9162569642066956, "learning_rate": 4.205813057704441e-06, "loss": 0.1577, "step": 5243 }, { "epoch": 0.8496435515230071, "grad_norm": 0.9247191548347473, "learning_rate": 4.205493340989096e-06, "loss": 0.1536, "step": 5244 }, { "epoch": 0.8498055735580039, "grad_norm": 0.8115431666374207, "learning_rate": 4.2051735720897815e-06, "loss": 0.1302, "step": 5245 }, { "epoch": 0.8499675955930006, "grad_norm": 0.8264175653457642, "learning_rate": 4.204853751016282e-06, "loss": 0.1469, "step": 5246 }, { "epoch": 0.8501296176279974, "grad_norm": 0.8077431321144104, "learning_rate": 4.2045338777783844e-06, "loss": 0.141, "step": 5247 }, { "epoch": 0.8502916396629941, "grad_norm": 0.9603082537651062, "learning_rate": 4.204213952385875e-06, "loss": 0.1644, "step": 5248 }, { "epoch": 0.850453661697991, "grad_norm": 0.8778827786445618, "learning_rate": 4.2038939748485416e-06, "loss": 0.1475, "step": 5249 }, { "epoch": 0.8506156837329877, "grad_norm": 0.7682041525840759, "learning_rate": 4.203573945176177e-06, "loss": 0.1167, "step": 5250 }, { "epoch": 0.8507777057679844, "grad_norm": 0.8808218836784363, "learning_rate": 4.203253863378571e-06, "loss": 0.137, "step": 5251 }, { "epoch": 0.8509397278029812, "grad_norm": 0.7965406775474548, "learning_rate": 4.202933729465519e-06, "loss": 0.131, "step": 5252 }, { "epoch": 0.8511017498379779, "grad_norm": 0.8025760650634766, "learning_rate": 4.202613543446817e-06, "loss": 0.1314, "step": 5253 }, { "epoch": 0.8512637718729748, "grad_norm": 0.9073064923286438, "learning_rate": 4.20229330533226e-06, "loss": 0.1461, "step": 5254 }, { "epoch": 0.8514257939079715, "grad_norm": 0.90974360704422, "learning_rate": 4.201973015131647e-06, "loss": 0.1437, "step": 5255 }, { "epoch": 0.8515878159429683, "grad_norm": 0.9711911082267761, "learning_rate": 4.201652672854779e-06, "loss": 0.1507, "step": 5256 }, { "epoch": 0.851749837977965, "grad_norm": 0.9854370355606079, "learning_rate": 4.2013322785114574e-06, "loss": 0.1656, "step": 5257 }, { "epoch": 0.8519118600129617, "grad_norm": 0.7474818229675293, "learning_rate": 4.201011832111485e-06, "loss": 0.1203, "step": 5258 }, { "epoch": 0.8520738820479585, "grad_norm": 0.90165776014328, "learning_rate": 4.200691333664666e-06, "loss": 0.1446, "step": 5259 }, { "epoch": 0.8522359040829552, "grad_norm": 0.8739867210388184, "learning_rate": 4.2003707831808086e-06, "loss": 0.1355, "step": 5260 }, { "epoch": 0.8523979261179521, "grad_norm": 0.9299155473709106, "learning_rate": 4.20005018066972e-06, "loss": 0.1332, "step": 5261 }, { "epoch": 0.8525599481529488, "grad_norm": 0.8256843686103821, "learning_rate": 4.199729526141209e-06, "loss": 0.1293, "step": 5262 }, { "epoch": 0.8527219701879456, "grad_norm": 0.8616979122161865, "learning_rate": 4.199408819605089e-06, "loss": 0.1399, "step": 5263 }, { "epoch": 0.8528839922229423, "grad_norm": 0.9190691709518433, "learning_rate": 4.199088061071172e-06, "loss": 0.1357, "step": 5264 }, { "epoch": 0.8530460142579391, "grad_norm": 0.9460611939430237, "learning_rate": 4.19876725054927e-06, "loss": 0.1527, "step": 5265 }, { "epoch": 0.8532080362929358, "grad_norm": 0.9079461693763733, "learning_rate": 4.198446388049203e-06, "loss": 0.1596, "step": 5266 }, { "epoch": 0.8533700583279326, "grad_norm": 0.7783133387565613, "learning_rate": 4.198125473580786e-06, "loss": 0.1377, "step": 5267 }, { "epoch": 0.8535320803629294, "grad_norm": 0.9109153151512146, "learning_rate": 4.197804507153838e-06, "loss": 0.1472, "step": 5268 }, { "epoch": 0.8536941023979261, "grad_norm": 0.8502582907676697, "learning_rate": 4.197483488778182e-06, "loss": 0.137, "step": 5269 }, { "epoch": 0.8538561244329229, "grad_norm": 0.7953091859817505, "learning_rate": 4.197162418463639e-06, "loss": 0.1279, "step": 5270 }, { "epoch": 0.8540181464679196, "grad_norm": 0.8548662662506104, "learning_rate": 4.196841296220033e-06, "loss": 0.1379, "step": 5271 }, { "epoch": 0.8541801685029164, "grad_norm": 0.8967195749282837, "learning_rate": 4.1965201220571895e-06, "loss": 0.1413, "step": 5272 }, { "epoch": 0.8543421905379132, "grad_norm": 0.771815836429596, "learning_rate": 4.1961988959849355e-06, "loss": 0.1281, "step": 5273 }, { "epoch": 0.8545042125729099, "grad_norm": 1.0291990041732788, "learning_rate": 4.1958776180131e-06, "loss": 0.1764, "step": 5274 }, { "epoch": 0.8546662346079067, "grad_norm": 0.8116664290428162, "learning_rate": 4.195556288151513e-06, "loss": 0.1252, "step": 5275 }, { "epoch": 0.8548282566429034, "grad_norm": 0.8838712573051453, "learning_rate": 4.1952349064100074e-06, "loss": 0.1545, "step": 5276 }, { "epoch": 0.8549902786779002, "grad_norm": 0.8008884787559509, "learning_rate": 4.194913472798415e-06, "loss": 0.1244, "step": 5277 }, { "epoch": 0.8551523007128969, "grad_norm": 0.890133261680603, "learning_rate": 4.194591987326574e-06, "loss": 0.1484, "step": 5278 }, { "epoch": 0.8553143227478938, "grad_norm": 0.8935836553573608, "learning_rate": 4.194270450004317e-06, "loss": 0.1325, "step": 5279 }, { "epoch": 0.8554763447828905, "grad_norm": 0.888762354850769, "learning_rate": 4.193948860841485e-06, "loss": 0.1455, "step": 5280 }, { "epoch": 0.8556383668178872, "grad_norm": 0.7954049706459045, "learning_rate": 4.193627219847918e-06, "loss": 0.1257, "step": 5281 }, { "epoch": 0.855800388852884, "grad_norm": 0.8666353821754456, "learning_rate": 4.193305527033456e-06, "loss": 0.1473, "step": 5282 }, { "epoch": 0.8559624108878807, "grad_norm": 0.8803139328956604, "learning_rate": 4.192983782407941e-06, "loss": 0.1557, "step": 5283 }, { "epoch": 0.8561244329228775, "grad_norm": 0.81354159116745, "learning_rate": 4.192661985981221e-06, "loss": 0.1376, "step": 5284 }, { "epoch": 0.8562864549578743, "grad_norm": 0.936042070388794, "learning_rate": 4.19234013776314e-06, "loss": 0.1426, "step": 5285 }, { "epoch": 0.8564484769928711, "grad_norm": 0.8939327597618103, "learning_rate": 4.192018237763547e-06, "loss": 0.1422, "step": 5286 }, { "epoch": 0.8566104990278678, "grad_norm": 0.806371808052063, "learning_rate": 4.19169628599229e-06, "loss": 0.1313, "step": 5287 }, { "epoch": 0.8567725210628645, "grad_norm": 0.9480794072151184, "learning_rate": 4.19137428245922e-06, "loss": 0.1468, "step": 5288 }, { "epoch": 0.8569345430978613, "grad_norm": 0.8664402365684509, "learning_rate": 4.191052227174189e-06, "loss": 0.1419, "step": 5289 }, { "epoch": 0.857096565132858, "grad_norm": 0.8385558724403381, "learning_rate": 4.190730120147054e-06, "loss": 0.1322, "step": 5290 }, { "epoch": 0.8572585871678549, "grad_norm": 0.8843077421188354, "learning_rate": 4.190407961387668e-06, "loss": 0.1497, "step": 5291 }, { "epoch": 0.8574206092028516, "grad_norm": 0.8863962292671204, "learning_rate": 4.190085750905889e-06, "loss": 0.1397, "step": 5292 }, { "epoch": 0.8575826312378484, "grad_norm": 0.8218333125114441, "learning_rate": 4.189763488711576e-06, "loss": 0.123, "step": 5293 }, { "epoch": 0.8577446532728451, "grad_norm": 0.8600748777389526, "learning_rate": 4.189441174814589e-06, "loss": 0.1472, "step": 5294 }, { "epoch": 0.8579066753078418, "grad_norm": 0.7937850952148438, "learning_rate": 4.189118809224792e-06, "loss": 0.1289, "step": 5295 }, { "epoch": 0.8580686973428386, "grad_norm": 0.8559263348579407, "learning_rate": 4.188796391952046e-06, "loss": 0.1388, "step": 5296 }, { "epoch": 0.8582307193778353, "grad_norm": 0.8307374119758606, "learning_rate": 4.1884739230062165e-06, "loss": 0.1346, "step": 5297 }, { "epoch": 0.8583927414128322, "grad_norm": 0.8185672760009766, "learning_rate": 4.188151402397172e-06, "loss": 0.1309, "step": 5298 }, { "epoch": 0.8585547634478289, "grad_norm": 0.8880597949028015, "learning_rate": 4.187828830134779e-06, "loss": 0.1561, "step": 5299 }, { "epoch": 0.8587167854828257, "grad_norm": 0.7549867630004883, "learning_rate": 4.187506206228909e-06, "loss": 0.1112, "step": 5300 }, { "epoch": 0.8588788075178224, "grad_norm": 0.9183105230331421, "learning_rate": 4.187183530689433e-06, "loss": 0.1465, "step": 5301 }, { "epoch": 0.8590408295528191, "grad_norm": 0.8254534602165222, "learning_rate": 4.1868608035262225e-06, "loss": 0.1268, "step": 5302 }, { "epoch": 0.859202851587816, "grad_norm": 0.8823205232620239, "learning_rate": 4.186538024749155e-06, "loss": 0.1542, "step": 5303 }, { "epoch": 0.8593648736228127, "grad_norm": 0.793454110622406, "learning_rate": 4.186215194368105e-06, "loss": 0.1343, "step": 5304 }, { "epoch": 0.8595268956578095, "grad_norm": 0.820057213306427, "learning_rate": 4.18589231239295e-06, "loss": 0.1341, "step": 5305 }, { "epoch": 0.8596889176928062, "grad_norm": 0.7398642301559448, "learning_rate": 4.18556937883357e-06, "loss": 0.1248, "step": 5306 }, { "epoch": 0.859850939727803, "grad_norm": 0.8161394596099854, "learning_rate": 4.185246393699847e-06, "loss": 0.131, "step": 5307 }, { "epoch": 0.8600129617627997, "grad_norm": 0.9015643000602722, "learning_rate": 4.184923357001661e-06, "loss": 0.1234, "step": 5308 }, { "epoch": 0.8601749837977966, "grad_norm": 0.8390319347381592, "learning_rate": 4.184600268748899e-06, "loss": 0.1388, "step": 5309 }, { "epoch": 0.8603370058327933, "grad_norm": 0.9225760102272034, "learning_rate": 4.184277128951445e-06, "loss": 0.1541, "step": 5310 }, { "epoch": 0.86049902786779, "grad_norm": 0.8592953085899353, "learning_rate": 4.183953937619187e-06, "loss": 0.1292, "step": 5311 }, { "epoch": 0.8606610499027868, "grad_norm": 0.9123047590255737, "learning_rate": 4.1836306947620135e-06, "loss": 0.152, "step": 5312 }, { "epoch": 0.8608230719377835, "grad_norm": 0.903361439704895, "learning_rate": 4.183307400389815e-06, "loss": 0.1498, "step": 5313 }, { "epoch": 0.8609850939727803, "grad_norm": 0.8942160606384277, "learning_rate": 4.182984054512483e-06, "loss": 0.1397, "step": 5314 }, { "epoch": 0.861147116007777, "grad_norm": 0.9034626483917236, "learning_rate": 4.1826606571399134e-06, "loss": 0.1574, "step": 5315 }, { "epoch": 0.8613091380427739, "grad_norm": 0.8866369128227234, "learning_rate": 4.182337208281998e-06, "loss": 0.1503, "step": 5316 }, { "epoch": 0.8614711600777706, "grad_norm": 0.8173493146896362, "learning_rate": 4.182013707948635e-06, "loss": 0.1335, "step": 5317 }, { "epoch": 0.8616331821127673, "grad_norm": 0.9902571439743042, "learning_rate": 4.181690156149724e-06, "loss": 0.158, "step": 5318 }, { "epoch": 0.8617952041477641, "grad_norm": 0.8877143859863281, "learning_rate": 4.181366552895163e-06, "loss": 0.1396, "step": 5319 }, { "epoch": 0.8619572261827608, "grad_norm": 0.9022709131240845, "learning_rate": 4.1810428981948555e-06, "loss": 0.145, "step": 5320 }, { "epoch": 0.8621192482177576, "grad_norm": 0.8367879986763, "learning_rate": 4.180719192058702e-06, "loss": 0.135, "step": 5321 }, { "epoch": 0.8622812702527544, "grad_norm": 0.8607010245323181, "learning_rate": 4.1803954344966095e-06, "loss": 0.1522, "step": 5322 }, { "epoch": 0.8624432922877512, "grad_norm": 0.9045525193214417, "learning_rate": 4.180071625518482e-06, "loss": 0.1469, "step": 5323 }, { "epoch": 0.8626053143227479, "grad_norm": 0.7549746632575989, "learning_rate": 4.17974776513423e-06, "loss": 0.1298, "step": 5324 }, { "epoch": 0.8627673363577446, "grad_norm": 0.8301894068717957, "learning_rate": 4.17942385335376e-06, "loss": 0.13, "step": 5325 }, { "epoch": 0.8629293583927414, "grad_norm": 0.9236185550689697, "learning_rate": 4.179099890186985e-06, "loss": 0.1521, "step": 5326 }, { "epoch": 0.8630913804277381, "grad_norm": 0.8897588849067688, "learning_rate": 4.1787758756438166e-06, "loss": 0.1416, "step": 5327 }, { "epoch": 0.863253402462735, "grad_norm": 0.8857617974281311, "learning_rate": 4.178451809734168e-06, "loss": 0.1507, "step": 5328 }, { "epoch": 0.8634154244977317, "grad_norm": 0.7978904247283936, "learning_rate": 4.178127692467957e-06, "loss": 0.1303, "step": 5329 }, { "epoch": 0.8635774465327285, "grad_norm": 0.880963921546936, "learning_rate": 4.1778035238550995e-06, "loss": 0.1519, "step": 5330 }, { "epoch": 0.8637394685677252, "grad_norm": 0.9408493638038635, "learning_rate": 4.177479303905514e-06, "loss": 0.1644, "step": 5331 }, { "epoch": 0.8639014906027219, "grad_norm": 0.8565788269042969, "learning_rate": 4.177155032629122e-06, "loss": 0.1322, "step": 5332 }, { "epoch": 0.8640635126377187, "grad_norm": 0.8621842861175537, "learning_rate": 4.176830710035843e-06, "loss": 0.1456, "step": 5333 }, { "epoch": 0.8642255346727155, "grad_norm": 1.0310484170913696, "learning_rate": 4.176506336135603e-06, "loss": 0.1557, "step": 5334 }, { "epoch": 0.8643875567077123, "grad_norm": 0.9071840643882751, "learning_rate": 4.176181910938326e-06, "loss": 0.154, "step": 5335 }, { "epoch": 0.864549578742709, "grad_norm": 0.8645082116127014, "learning_rate": 4.175857434453939e-06, "loss": 0.1404, "step": 5336 }, { "epoch": 0.8647116007777058, "grad_norm": 0.9229341149330139, "learning_rate": 4.1755329066923705e-06, "loss": 0.1429, "step": 5337 }, { "epoch": 0.8648736228127025, "grad_norm": 0.9101964831352234, "learning_rate": 4.175208327663549e-06, "loss": 0.1418, "step": 5338 }, { "epoch": 0.8650356448476992, "grad_norm": 0.8676378726959229, "learning_rate": 4.1748836973774075e-06, "loss": 0.1481, "step": 5339 }, { "epoch": 0.8651976668826961, "grad_norm": 0.8613499402999878, "learning_rate": 4.174559015843878e-06, "loss": 0.147, "step": 5340 }, { "epoch": 0.8653596889176928, "grad_norm": 0.8355089426040649, "learning_rate": 4.174234283072894e-06, "loss": 0.1394, "step": 5341 }, { "epoch": 0.8655217109526896, "grad_norm": 0.8457769751548767, "learning_rate": 4.173909499074392e-06, "loss": 0.1457, "step": 5342 }, { "epoch": 0.8656837329876863, "grad_norm": 0.7920171618461609, "learning_rate": 4.173584663858311e-06, "loss": 0.115, "step": 5343 }, { "epoch": 0.8658457550226831, "grad_norm": 0.7907648682594299, "learning_rate": 4.173259777434589e-06, "loss": 0.1376, "step": 5344 }, { "epoch": 0.8660077770576798, "grad_norm": 0.8158167004585266, "learning_rate": 4.172934839813168e-06, "loss": 0.1325, "step": 5345 }, { "epoch": 0.8661697990926766, "grad_norm": 0.8299226760864258, "learning_rate": 4.1726098510039894e-06, "loss": 0.1327, "step": 5346 }, { "epoch": 0.8663318211276734, "grad_norm": 0.7698670029640198, "learning_rate": 4.172284811016996e-06, "loss": 0.1341, "step": 5347 }, { "epoch": 0.8664938431626701, "grad_norm": 0.9853094816207886, "learning_rate": 4.171959719862134e-06, "loss": 0.1616, "step": 5348 }, { "epoch": 0.8666558651976669, "grad_norm": 0.7773309350013733, "learning_rate": 4.171634577549351e-06, "loss": 0.1279, "step": 5349 }, { "epoch": 0.8668178872326636, "grad_norm": 0.8574365377426147, "learning_rate": 4.171309384088596e-06, "loss": 0.1493, "step": 5350 }, { "epoch": 0.8669799092676604, "grad_norm": 0.8746230006217957, "learning_rate": 4.170984139489817e-06, "loss": 0.1465, "step": 5351 }, { "epoch": 0.8671419313026572, "grad_norm": 0.9685440063476562, "learning_rate": 4.170658843762968e-06, "loss": 0.1518, "step": 5352 }, { "epoch": 0.8673039533376539, "grad_norm": 0.930526077747345, "learning_rate": 4.170333496918001e-06, "loss": 0.1746, "step": 5353 }, { "epoch": 0.8674659753726507, "grad_norm": 0.8363896608352661, "learning_rate": 4.170008098964871e-06, "loss": 0.1354, "step": 5354 }, { "epoch": 0.8676279974076474, "grad_norm": 0.8706420063972473, "learning_rate": 4.1696826499135345e-06, "loss": 0.1363, "step": 5355 }, { "epoch": 0.8677900194426442, "grad_norm": 0.8633265495300293, "learning_rate": 4.169357149773949e-06, "loss": 0.1452, "step": 5356 }, { "epoch": 0.8679520414776409, "grad_norm": 0.8911048173904419, "learning_rate": 4.169031598556076e-06, "loss": 0.1588, "step": 5357 }, { "epoch": 0.8681140635126378, "grad_norm": 0.8662182092666626, "learning_rate": 4.168705996269874e-06, "loss": 0.148, "step": 5358 }, { "epoch": 0.8682760855476345, "grad_norm": 0.9110774397850037, "learning_rate": 4.168380342925307e-06, "loss": 0.1447, "step": 5359 }, { "epoch": 0.8684381075826313, "grad_norm": 0.7783262729644775, "learning_rate": 4.168054638532338e-06, "loss": 0.1278, "step": 5360 }, { "epoch": 0.868600129617628, "grad_norm": 0.8996500372886658, "learning_rate": 4.167728883100935e-06, "loss": 0.1546, "step": 5361 }, { "epoch": 0.8687621516526247, "grad_norm": 0.8864397406578064, "learning_rate": 4.167403076641063e-06, "loss": 0.1348, "step": 5362 }, { "epoch": 0.8689241736876215, "grad_norm": 0.7758836150169373, "learning_rate": 4.167077219162693e-06, "loss": 0.1296, "step": 5363 }, { "epoch": 0.8690861957226182, "grad_norm": 0.8843368291854858, "learning_rate": 4.166751310675793e-06, "loss": 0.1553, "step": 5364 }, { "epoch": 0.8692482177576151, "grad_norm": 0.7979202270507812, "learning_rate": 4.166425351190337e-06, "loss": 0.1344, "step": 5365 }, { "epoch": 0.8694102397926118, "grad_norm": 0.7076486945152283, "learning_rate": 4.166099340716298e-06, "loss": 0.1014, "step": 5366 }, { "epoch": 0.8695722618276086, "grad_norm": 0.8106122612953186, "learning_rate": 4.165773279263651e-06, "loss": 0.1328, "step": 5367 }, { "epoch": 0.8697342838626053, "grad_norm": 0.7798807621002197, "learning_rate": 4.165447166842373e-06, "loss": 0.1404, "step": 5368 }, { "epoch": 0.869896305897602, "grad_norm": 0.776866614818573, "learning_rate": 4.165121003462441e-06, "loss": 0.1164, "step": 5369 }, { "epoch": 0.8700583279325989, "grad_norm": 0.8250222206115723, "learning_rate": 4.164794789133837e-06, "loss": 0.1363, "step": 5370 }, { "epoch": 0.8702203499675956, "grad_norm": 0.8319069147109985, "learning_rate": 4.164468523866541e-06, "loss": 0.1417, "step": 5371 }, { "epoch": 0.8703823720025924, "grad_norm": 0.919909656047821, "learning_rate": 4.164142207670536e-06, "loss": 0.1611, "step": 5372 }, { "epoch": 0.8705443940375891, "grad_norm": 0.8057491183280945, "learning_rate": 4.163815840555806e-06, "loss": 0.1266, "step": 5373 }, { "epoch": 0.8707064160725859, "grad_norm": 0.8016455769538879, "learning_rate": 4.163489422532338e-06, "loss": 0.1256, "step": 5374 }, { "epoch": 0.8708684381075826, "grad_norm": 0.976750910282135, "learning_rate": 4.1631629536101195e-06, "loss": 0.1645, "step": 5375 }, { "epoch": 0.8710304601425793, "grad_norm": 0.9348950982093811, "learning_rate": 4.162836433799139e-06, "loss": 0.1425, "step": 5376 }, { "epoch": 0.8711924821775762, "grad_norm": 0.7779992818832397, "learning_rate": 4.162509863109389e-06, "loss": 0.1247, "step": 5377 }, { "epoch": 0.8713545042125729, "grad_norm": 0.9029181003570557, "learning_rate": 4.162183241550858e-06, "loss": 0.141, "step": 5378 }, { "epoch": 0.8715165262475697, "grad_norm": 0.8280603885650635, "learning_rate": 4.1618565691335434e-06, "loss": 0.1285, "step": 5379 }, { "epoch": 0.8716785482825664, "grad_norm": 0.792360782623291, "learning_rate": 4.161529845867439e-06, "loss": 0.128, "step": 5380 }, { "epoch": 0.8718405703175632, "grad_norm": 0.9347276091575623, "learning_rate": 4.161203071762543e-06, "loss": 0.1447, "step": 5381 }, { "epoch": 0.87200259235256, "grad_norm": 0.9125970602035522, "learning_rate": 4.160876246828853e-06, "loss": 0.1381, "step": 5382 }, { "epoch": 0.8721646143875567, "grad_norm": 0.8670451045036316, "learning_rate": 4.160549371076369e-06, "loss": 0.1382, "step": 5383 }, { "epoch": 0.8723266364225535, "grad_norm": 0.8932413458824158, "learning_rate": 4.160222444515092e-06, "loss": 0.1445, "step": 5384 }, { "epoch": 0.8724886584575502, "grad_norm": 0.8334319591522217, "learning_rate": 4.159895467155026e-06, "loss": 0.1404, "step": 5385 }, { "epoch": 0.872650680492547, "grad_norm": 0.8121306896209717, "learning_rate": 4.159568439006176e-06, "loss": 0.1379, "step": 5386 }, { "epoch": 0.8728127025275437, "grad_norm": 0.8688573837280273, "learning_rate": 4.159241360078548e-06, "loss": 0.1456, "step": 5387 }, { "epoch": 0.8729747245625405, "grad_norm": 0.8056637644767761, "learning_rate": 4.1589142303821485e-06, "loss": 0.1377, "step": 5388 }, { "epoch": 0.8731367465975373, "grad_norm": 0.8120809197425842, "learning_rate": 4.15858704992699e-06, "loss": 0.1306, "step": 5389 }, { "epoch": 0.873298768632534, "grad_norm": 0.8274014592170715, "learning_rate": 4.158259818723079e-06, "loss": 0.1463, "step": 5390 }, { "epoch": 0.8734607906675308, "grad_norm": 0.9297308921813965, "learning_rate": 4.157932536780432e-06, "loss": 0.145, "step": 5391 }, { "epoch": 0.8736228127025275, "grad_norm": 0.8492165803909302, "learning_rate": 4.157605204109062e-06, "loss": 0.1381, "step": 5392 }, { "epoch": 0.8737848347375243, "grad_norm": 0.8986421823501587, "learning_rate": 4.157277820718983e-06, "loss": 0.145, "step": 5393 }, { "epoch": 0.873946856772521, "grad_norm": 0.8207853436470032, "learning_rate": 4.156950386620214e-06, "loss": 0.1245, "step": 5394 }, { "epoch": 0.8741088788075179, "grad_norm": 0.8820725679397583, "learning_rate": 4.156622901822772e-06, "loss": 0.1459, "step": 5395 }, { "epoch": 0.8742709008425146, "grad_norm": 0.8960903286933899, "learning_rate": 4.156295366336679e-06, "loss": 0.1464, "step": 5396 }, { "epoch": 0.8744329228775113, "grad_norm": 0.8151302337646484, "learning_rate": 4.1559677801719554e-06, "loss": 0.1344, "step": 5397 }, { "epoch": 0.8745949449125081, "grad_norm": 0.8475366830825806, "learning_rate": 4.155640143338625e-06, "loss": 0.1303, "step": 5398 }, { "epoch": 0.8747569669475048, "grad_norm": 0.8408517241477966, "learning_rate": 4.155312455846714e-06, "loss": 0.1476, "step": 5399 }, { "epoch": 0.8749189889825016, "grad_norm": 0.8452920317649841, "learning_rate": 4.154984717706246e-06, "loss": 0.143, "step": 5400 }, { "epoch": 0.8750810110174984, "grad_norm": 0.8104416728019714, "learning_rate": 4.154656928927252e-06, "loss": 0.1317, "step": 5401 }, { "epoch": 0.8752430330524952, "grad_norm": 0.8546898365020752, "learning_rate": 4.15432908951976e-06, "loss": 0.1447, "step": 5402 }, { "epoch": 0.8754050550874919, "grad_norm": 0.9186674952507019, "learning_rate": 4.1540011994938e-06, "loss": 0.1344, "step": 5403 }, { "epoch": 0.8755670771224887, "grad_norm": 0.8319907188415527, "learning_rate": 4.153673258859406e-06, "loss": 0.1459, "step": 5404 }, { "epoch": 0.8757290991574854, "grad_norm": 0.8684608340263367, "learning_rate": 4.153345267626614e-06, "loss": 0.1466, "step": 5405 }, { "epoch": 0.8758911211924821, "grad_norm": 0.8214244246482849, "learning_rate": 4.153017225805456e-06, "loss": 0.1257, "step": 5406 }, { "epoch": 0.876053143227479, "grad_norm": 0.9074698090553284, "learning_rate": 4.152689133405971e-06, "loss": 0.1296, "step": 5407 }, { "epoch": 0.8762151652624757, "grad_norm": 0.8300620317459106, "learning_rate": 4.1523609904382e-06, "loss": 0.1331, "step": 5408 }, { "epoch": 0.8763771872974725, "grad_norm": 0.8042019009590149, "learning_rate": 4.152032796912179e-06, "loss": 0.1265, "step": 5409 }, { "epoch": 0.8765392093324692, "grad_norm": 0.8547518253326416, "learning_rate": 4.1517045528379544e-06, "loss": 0.138, "step": 5410 }, { "epoch": 0.876701231367466, "grad_norm": 0.9264114499092102, "learning_rate": 4.1513762582255655e-06, "loss": 0.1608, "step": 5411 }, { "epoch": 0.8768632534024627, "grad_norm": 0.7929533123970032, "learning_rate": 4.151047913085061e-06, "loss": 0.129, "step": 5412 }, { "epoch": 0.8770252754374595, "grad_norm": 0.9087817072868347, "learning_rate": 4.150719517426485e-06, "loss": 0.1529, "step": 5413 }, { "epoch": 0.8771872974724563, "grad_norm": 0.8429016470909119, "learning_rate": 4.150391071259886e-06, "loss": 0.1428, "step": 5414 }, { "epoch": 0.877349319507453, "grad_norm": 0.9927331209182739, "learning_rate": 4.1500625745953145e-06, "loss": 0.1493, "step": 5415 }, { "epoch": 0.8775113415424498, "grad_norm": 0.7533746361732483, "learning_rate": 4.149734027442821e-06, "loss": 0.1202, "step": 5416 }, { "epoch": 0.8776733635774465, "grad_norm": 0.8377231359481812, "learning_rate": 4.14940542981246e-06, "loss": 0.1399, "step": 5417 }, { "epoch": 0.8778353856124433, "grad_norm": 1.0180152654647827, "learning_rate": 4.149076781714283e-06, "loss": 0.1536, "step": 5418 }, { "epoch": 0.87799740764744, "grad_norm": 0.9475023746490479, "learning_rate": 4.148748083158347e-06, "loss": 0.1683, "step": 5419 }, { "epoch": 0.8781594296824368, "grad_norm": 0.863429844379425, "learning_rate": 4.1484193341547106e-06, "loss": 0.1407, "step": 5420 }, { "epoch": 0.8783214517174336, "grad_norm": 0.7526499032974243, "learning_rate": 4.14809053471343e-06, "loss": 0.1262, "step": 5421 }, { "epoch": 0.8784834737524303, "grad_norm": 0.851190984249115, "learning_rate": 4.147761684844569e-06, "loss": 0.1361, "step": 5422 }, { "epoch": 0.8786454957874271, "grad_norm": 0.823888897895813, "learning_rate": 4.147432784558188e-06, "loss": 0.1298, "step": 5423 }, { "epoch": 0.8788075178224238, "grad_norm": 0.8875905871391296, "learning_rate": 4.147103833864349e-06, "loss": 0.1347, "step": 5424 }, { "epoch": 0.8789695398574207, "grad_norm": 0.9177705645561218, "learning_rate": 4.146774832773119e-06, "loss": 0.1559, "step": 5425 }, { "epoch": 0.8791315618924174, "grad_norm": 0.8365907669067383, "learning_rate": 4.146445781294566e-06, "loss": 0.1295, "step": 5426 }, { "epoch": 0.8792935839274141, "grad_norm": 0.8464487791061401, "learning_rate": 4.146116679438754e-06, "loss": 0.135, "step": 5427 }, { "epoch": 0.8794556059624109, "grad_norm": 0.8880855441093445, "learning_rate": 4.145787527215757e-06, "loss": 0.1451, "step": 5428 }, { "epoch": 0.8796176279974076, "grad_norm": 0.991811215877533, "learning_rate": 4.145458324635643e-06, "loss": 0.1647, "step": 5429 }, { "epoch": 0.8797796500324044, "grad_norm": 0.7490970492362976, "learning_rate": 4.145129071708487e-06, "loss": 0.1117, "step": 5430 }, { "epoch": 0.8799416720674011, "grad_norm": 1.0946390628814697, "learning_rate": 4.144799768444362e-06, "loss": 0.1776, "step": 5431 }, { "epoch": 0.880103694102398, "grad_norm": 0.8403367400169373, "learning_rate": 4.144470414853345e-06, "loss": 0.148, "step": 5432 }, { "epoch": 0.8802657161373947, "grad_norm": 0.9710992574691772, "learning_rate": 4.1441410109455126e-06, "loss": 0.1562, "step": 5433 }, { "epoch": 0.8804277381723914, "grad_norm": 0.8550183176994324, "learning_rate": 4.143811556730944e-06, "loss": 0.1501, "step": 5434 }, { "epoch": 0.8805897602073882, "grad_norm": 0.92138671875, "learning_rate": 4.143482052219719e-06, "loss": 0.1599, "step": 5435 }, { "epoch": 0.8807517822423849, "grad_norm": 0.8856903910636902, "learning_rate": 4.143152497421922e-06, "loss": 0.1497, "step": 5436 }, { "epoch": 0.8809138042773818, "grad_norm": 0.7469887733459473, "learning_rate": 4.142822892347634e-06, "loss": 0.1194, "step": 5437 }, { "epoch": 0.8810758263123785, "grad_norm": 0.8389981389045715, "learning_rate": 4.142493237006941e-06, "loss": 0.1428, "step": 5438 }, { "epoch": 0.8812378483473753, "grad_norm": 0.7978473901748657, "learning_rate": 4.14216353140993e-06, "loss": 0.1363, "step": 5439 }, { "epoch": 0.881399870382372, "grad_norm": 0.9018765687942505, "learning_rate": 4.141833775566688e-06, "loss": 0.141, "step": 5440 }, { "epoch": 0.8815618924173687, "grad_norm": 0.9037135243415833, "learning_rate": 4.141503969487307e-06, "loss": 0.1424, "step": 5441 }, { "epoch": 0.8817239144523655, "grad_norm": 0.9119111895561218, "learning_rate": 4.1411741131818765e-06, "loss": 0.1625, "step": 5442 }, { "epoch": 0.8818859364873622, "grad_norm": 0.796134352684021, "learning_rate": 4.140844206660489e-06, "loss": 0.1313, "step": 5443 }, { "epoch": 0.8820479585223591, "grad_norm": 0.7966582179069519, "learning_rate": 4.14051424993324e-06, "loss": 0.1299, "step": 5444 }, { "epoch": 0.8822099805573558, "grad_norm": 0.9936891794204712, "learning_rate": 4.140184243010225e-06, "loss": 0.156, "step": 5445 }, { "epoch": 0.8823720025923526, "grad_norm": 0.7525246739387512, "learning_rate": 4.1398541859015405e-06, "loss": 0.1276, "step": 5446 }, { "epoch": 0.8825340246273493, "grad_norm": 0.8001253604888916, "learning_rate": 4.139524078617287e-06, "loss": 0.1233, "step": 5447 }, { "epoch": 0.8826960466623461, "grad_norm": 0.8744903206825256, "learning_rate": 4.139193921167565e-06, "loss": 0.1431, "step": 5448 }, { "epoch": 0.8828580686973428, "grad_norm": 0.8717635273933411, "learning_rate": 4.138863713562475e-06, "loss": 0.1468, "step": 5449 }, { "epoch": 0.8830200907323396, "grad_norm": 0.8670765161514282, "learning_rate": 4.138533455812121e-06, "loss": 0.1377, "step": 5450 }, { "epoch": 0.8831821127673364, "grad_norm": 0.9572997689247131, "learning_rate": 4.1382031479266084e-06, "loss": 0.1574, "step": 5451 }, { "epoch": 0.8833441348023331, "grad_norm": 0.9534028768539429, "learning_rate": 4.137872789916044e-06, "loss": 0.176, "step": 5452 }, { "epoch": 0.8835061568373299, "grad_norm": 1.0097438097000122, "learning_rate": 4.137542381790537e-06, "loss": 0.1481, "step": 5453 }, { "epoch": 0.8836681788723266, "grad_norm": 0.7446777820587158, "learning_rate": 4.137211923560195e-06, "loss": 0.1169, "step": 5454 }, { "epoch": 0.8838302009073234, "grad_norm": 0.7878828644752502, "learning_rate": 4.13688141523513e-06, "loss": 0.1216, "step": 5455 }, { "epoch": 0.8839922229423202, "grad_norm": 0.9208990931510925, "learning_rate": 4.136550856825455e-06, "loss": 0.1597, "step": 5456 }, { "epoch": 0.8841542449773169, "grad_norm": 0.872546911239624, "learning_rate": 4.136220248341284e-06, "loss": 0.1296, "step": 5457 }, { "epoch": 0.8843162670123137, "grad_norm": 0.9872225522994995, "learning_rate": 4.135889589792733e-06, "loss": 0.1712, "step": 5458 }, { "epoch": 0.8844782890473104, "grad_norm": 0.8968865871429443, "learning_rate": 4.135558881189919e-06, "loss": 0.1464, "step": 5459 }, { "epoch": 0.8846403110823072, "grad_norm": 0.883246898651123, "learning_rate": 4.135228122542962e-06, "loss": 0.1519, "step": 5460 }, { "epoch": 0.8848023331173039, "grad_norm": 0.9164681434631348, "learning_rate": 4.134897313861981e-06, "loss": 0.1489, "step": 5461 }, { "epoch": 0.8849643551523008, "grad_norm": 0.8244009017944336, "learning_rate": 4.1345664551570985e-06, "loss": 0.1366, "step": 5462 }, { "epoch": 0.8851263771872975, "grad_norm": 0.8056451082229614, "learning_rate": 4.134235546438439e-06, "loss": 0.1461, "step": 5463 }, { "epoch": 0.8852883992222942, "grad_norm": 0.8311920762062073, "learning_rate": 4.133904587716126e-06, "loss": 0.1495, "step": 5464 }, { "epoch": 0.885450421257291, "grad_norm": 0.9182047843933105, "learning_rate": 4.133573579000286e-06, "loss": 0.1392, "step": 5465 }, { "epoch": 0.8856124432922877, "grad_norm": 0.8886732459068298, "learning_rate": 4.133242520301049e-06, "loss": 0.133, "step": 5466 }, { "epoch": 0.8857744653272845, "grad_norm": 0.6573583483695984, "learning_rate": 4.1329114116285415e-06, "loss": 0.1026, "step": 5467 }, { "epoch": 0.8859364873622813, "grad_norm": 0.8312880992889404, "learning_rate": 4.132580252992898e-06, "loss": 0.1333, "step": 5468 }, { "epoch": 0.8860985093972781, "grad_norm": 0.8526415228843689, "learning_rate": 4.132249044404249e-06, "loss": 0.1405, "step": 5469 }, { "epoch": 0.8862605314322748, "grad_norm": 0.9522249102592468, "learning_rate": 4.131917785872728e-06, "loss": 0.1444, "step": 5470 }, { "epoch": 0.8864225534672715, "grad_norm": 0.8345977067947388, "learning_rate": 4.131586477408473e-06, "loss": 0.1337, "step": 5471 }, { "epoch": 0.8865845755022683, "grad_norm": 0.8408584594726562, "learning_rate": 4.13125511902162e-06, "loss": 0.1444, "step": 5472 }, { "epoch": 0.886746597537265, "grad_norm": 0.8456746935844421, "learning_rate": 4.1309237107223086e-06, "loss": 0.1427, "step": 5473 }, { "epoch": 0.8869086195722619, "grad_norm": 0.8888773918151855, "learning_rate": 4.130592252520677e-06, "loss": 0.1442, "step": 5474 }, { "epoch": 0.8870706416072586, "grad_norm": 0.9684158563613892, "learning_rate": 4.13026074442687e-06, "loss": 0.1564, "step": 5475 }, { "epoch": 0.8872326636422554, "grad_norm": 0.9485166072845459, "learning_rate": 4.129929186451028e-06, "loss": 0.1569, "step": 5476 }, { "epoch": 0.8873946856772521, "grad_norm": 0.9848198294639587, "learning_rate": 4.129597578603298e-06, "loss": 0.1365, "step": 5477 }, { "epoch": 0.8875567077122488, "grad_norm": 0.9694206118583679, "learning_rate": 4.129265920893826e-06, "loss": 0.1594, "step": 5478 }, { "epoch": 0.8877187297472456, "grad_norm": 0.7698529958724976, "learning_rate": 4.128934213332759e-06, "loss": 0.1278, "step": 5479 }, { "epoch": 0.8878807517822424, "grad_norm": 0.814104437828064, "learning_rate": 4.128602455930247e-06, "loss": 0.1389, "step": 5480 }, { "epoch": 0.8880427738172392, "grad_norm": 0.8766177892684937, "learning_rate": 4.128270648696441e-06, "loss": 0.1513, "step": 5481 }, { "epoch": 0.8882047958522359, "grad_norm": 0.796972393989563, "learning_rate": 4.127938791641493e-06, "loss": 0.1341, "step": 5482 }, { "epoch": 0.8883668178872327, "grad_norm": 0.8404687643051147, "learning_rate": 4.127606884775559e-06, "loss": 0.1433, "step": 5483 }, { "epoch": 0.8885288399222294, "grad_norm": 0.8364979028701782, "learning_rate": 4.127274928108792e-06, "loss": 0.1228, "step": 5484 }, { "epoch": 0.8886908619572261, "grad_norm": 0.872944176197052, "learning_rate": 4.12694292165135e-06, "loss": 0.1582, "step": 5485 }, { "epoch": 0.888852883992223, "grad_norm": 0.8381401300430298, "learning_rate": 4.126610865413392e-06, "loss": 0.1407, "step": 5486 }, { "epoch": 0.8890149060272197, "grad_norm": 0.8389954566955566, "learning_rate": 4.126278759405078e-06, "loss": 0.1522, "step": 5487 }, { "epoch": 0.8891769280622165, "grad_norm": 0.8609594702720642, "learning_rate": 4.125946603636569e-06, "loss": 0.149, "step": 5488 }, { "epoch": 0.8893389500972132, "grad_norm": 0.9145193099975586, "learning_rate": 4.12561439811803e-06, "loss": 0.1501, "step": 5489 }, { "epoch": 0.88950097213221, "grad_norm": 0.8599084615707397, "learning_rate": 4.125282142859622e-06, "loss": 0.1438, "step": 5490 }, { "epoch": 0.8896629941672067, "grad_norm": 0.8774310946464539, "learning_rate": 4.124949837871516e-06, "loss": 0.1433, "step": 5491 }, { "epoch": 0.8898250162022034, "grad_norm": 0.8296245336532593, "learning_rate": 4.124617483163876e-06, "loss": 0.1127, "step": 5492 }, { "epoch": 0.8899870382372003, "grad_norm": 0.8327217102050781, "learning_rate": 4.124285078746872e-06, "loss": 0.1337, "step": 5493 }, { "epoch": 0.890149060272197, "grad_norm": 0.8418365120887756, "learning_rate": 4.123952624630676e-06, "loss": 0.1461, "step": 5494 }, { "epoch": 0.8903110823071938, "grad_norm": 0.8435823917388916, "learning_rate": 4.123620120825459e-06, "loss": 0.1376, "step": 5495 }, { "epoch": 0.8904731043421905, "grad_norm": 0.8990997076034546, "learning_rate": 4.123287567341396e-06, "loss": 0.1331, "step": 5496 }, { "epoch": 0.8906351263771873, "grad_norm": 0.9464835524559021, "learning_rate": 4.122954964188662e-06, "loss": 0.1385, "step": 5497 }, { "epoch": 0.890797148412184, "grad_norm": 0.8224294781684875, "learning_rate": 4.122622311377433e-06, "loss": 0.1298, "step": 5498 }, { "epoch": 0.8909591704471809, "grad_norm": 0.8208863735198975, "learning_rate": 4.122289608917888e-06, "loss": 0.1374, "step": 5499 }, { "epoch": 0.8911211924821776, "grad_norm": 0.7936856746673584, "learning_rate": 4.121956856820207e-06, "loss": 0.1262, "step": 5500 }, { "epoch": 0.8912832145171743, "grad_norm": 0.8944551348686218, "learning_rate": 4.121624055094571e-06, "loss": 0.1464, "step": 5501 }, { "epoch": 0.8914452365521711, "grad_norm": 0.8844398260116577, "learning_rate": 4.1212912037511634e-06, "loss": 0.1422, "step": 5502 }, { "epoch": 0.8916072585871678, "grad_norm": 0.790374755859375, "learning_rate": 4.120958302800169e-06, "loss": 0.1264, "step": 5503 }, { "epoch": 0.8917692806221647, "grad_norm": 0.838116466999054, "learning_rate": 4.1206253522517725e-06, "loss": 0.1279, "step": 5504 }, { "epoch": 0.8919313026571614, "grad_norm": 0.9478814601898193, "learning_rate": 4.120292352116162e-06, "loss": 0.1509, "step": 5505 }, { "epoch": 0.8920933246921582, "grad_norm": 0.8016614317893982, "learning_rate": 4.119959302403527e-06, "loss": 0.1353, "step": 5506 }, { "epoch": 0.8922553467271549, "grad_norm": 0.7916797399520874, "learning_rate": 4.119626203124056e-06, "loss": 0.1264, "step": 5507 }, { "epoch": 0.8924173687621516, "grad_norm": 0.8813158273696899, "learning_rate": 4.119293054287945e-06, "loss": 0.1237, "step": 5508 }, { "epoch": 0.8925793907971484, "grad_norm": 0.8696245551109314, "learning_rate": 4.118959855905383e-06, "loss": 0.1459, "step": 5509 }, { "epoch": 0.8927414128321451, "grad_norm": 0.7300010919570923, "learning_rate": 4.118626607986569e-06, "loss": 0.1244, "step": 5510 }, { "epoch": 0.892903434867142, "grad_norm": 0.8156650066375732, "learning_rate": 4.118293310541697e-06, "loss": 0.1353, "step": 5511 }, { "epoch": 0.8930654569021387, "grad_norm": 0.7121819853782654, "learning_rate": 4.1179599635809654e-06, "loss": 0.1122, "step": 5512 }, { "epoch": 0.8932274789371355, "grad_norm": 0.8799218535423279, "learning_rate": 4.117626567114575e-06, "loss": 0.1382, "step": 5513 }, { "epoch": 0.8933895009721322, "grad_norm": 0.863865315914154, "learning_rate": 4.1172931211527254e-06, "loss": 0.1367, "step": 5514 }, { "epoch": 0.8935515230071289, "grad_norm": 0.9362986087799072, "learning_rate": 4.116959625705621e-06, "loss": 0.151, "step": 5515 }, { "epoch": 0.8937135450421257, "grad_norm": 1.1234683990478516, "learning_rate": 4.116626080783464e-06, "loss": 0.1598, "step": 5516 }, { "epoch": 0.8938755670771225, "grad_norm": 0.8734990954399109, "learning_rate": 4.116292486396463e-06, "loss": 0.148, "step": 5517 }, { "epoch": 0.8940375891121193, "grad_norm": 0.8898338079452515, "learning_rate": 4.1159588425548215e-06, "loss": 0.1485, "step": 5518 }, { "epoch": 0.894199611147116, "grad_norm": 0.9480844140052795, "learning_rate": 4.1156251492687505e-06, "loss": 0.1463, "step": 5519 }, { "epoch": 0.8943616331821128, "grad_norm": 0.8957088589668274, "learning_rate": 4.11529140654846e-06, "loss": 0.1609, "step": 5520 }, { "epoch": 0.8945236552171095, "grad_norm": 0.8064636588096619, "learning_rate": 4.114957614404161e-06, "loss": 0.1282, "step": 5521 }, { "epoch": 0.8946856772521062, "grad_norm": 0.9250004291534424, "learning_rate": 4.114623772846067e-06, "loss": 0.1473, "step": 5522 }, { "epoch": 0.8948476992871031, "grad_norm": 0.8078949451446533, "learning_rate": 4.114289881884394e-06, "loss": 0.1319, "step": 5523 }, { "epoch": 0.8950097213220998, "grad_norm": 0.7498173713684082, "learning_rate": 4.113955941529355e-06, "loss": 0.1088, "step": 5524 }, { "epoch": 0.8951717433570966, "grad_norm": 0.7994401454925537, "learning_rate": 4.1136219517911715e-06, "loss": 0.1244, "step": 5525 }, { "epoch": 0.8953337653920933, "grad_norm": 1.1339514255523682, "learning_rate": 4.113287912680061e-06, "loss": 0.1646, "step": 5526 }, { "epoch": 0.8954957874270901, "grad_norm": 0.9239411950111389, "learning_rate": 4.112953824206244e-06, "loss": 0.1468, "step": 5527 }, { "epoch": 0.8956578094620868, "grad_norm": 0.9831513166427612, "learning_rate": 4.112619686379944e-06, "loss": 0.1582, "step": 5528 }, { "epoch": 0.8958198314970836, "grad_norm": 0.8446866869926453, "learning_rate": 4.112285499211383e-06, "loss": 0.1353, "step": 5529 }, { "epoch": 0.8959818535320804, "grad_norm": 0.8573483228683472, "learning_rate": 4.111951262710788e-06, "loss": 0.1379, "step": 5530 }, { "epoch": 0.8961438755670771, "grad_norm": 0.9121212959289551, "learning_rate": 4.111616976888385e-06, "loss": 0.1577, "step": 5531 }, { "epoch": 0.8963058976020739, "grad_norm": 0.8051736950874329, "learning_rate": 4.111282641754403e-06, "loss": 0.1327, "step": 5532 }, { "epoch": 0.8964679196370706, "grad_norm": 0.7908130288124084, "learning_rate": 4.1109482573190705e-06, "loss": 0.1223, "step": 5533 }, { "epoch": 0.8966299416720674, "grad_norm": 0.8380170464515686, "learning_rate": 4.110613823592621e-06, "loss": 0.1249, "step": 5534 }, { "epoch": 0.8967919637070642, "grad_norm": 0.9545575380325317, "learning_rate": 4.110279340585285e-06, "loss": 0.1605, "step": 5535 }, { "epoch": 0.8969539857420609, "grad_norm": 0.8907868266105652, "learning_rate": 4.109944808307298e-06, "loss": 0.1562, "step": 5536 }, { "epoch": 0.8971160077770577, "grad_norm": 0.8784735202789307, "learning_rate": 4.109610226768897e-06, "loss": 0.1361, "step": 5537 }, { "epoch": 0.8972780298120544, "grad_norm": 0.8206544518470764, "learning_rate": 4.109275595980316e-06, "loss": 0.1343, "step": 5538 }, { "epoch": 0.8974400518470512, "grad_norm": 0.7294670939445496, "learning_rate": 4.108940915951798e-06, "loss": 0.1152, "step": 5539 }, { "epoch": 0.8976020738820479, "grad_norm": 0.9412605166435242, "learning_rate": 4.108606186693582e-06, "loss": 0.1527, "step": 5540 }, { "epoch": 0.8977640959170448, "grad_norm": 0.8774265050888062, "learning_rate": 4.1082714082159084e-06, "loss": 0.147, "step": 5541 }, { "epoch": 0.8979261179520415, "grad_norm": 0.8635554313659668, "learning_rate": 4.1079365805290214e-06, "loss": 0.1484, "step": 5542 }, { "epoch": 0.8980881399870383, "grad_norm": 0.9252685904502869, "learning_rate": 4.107601703643167e-06, "loss": 0.1404, "step": 5543 }, { "epoch": 0.898250162022035, "grad_norm": 0.9969372749328613, "learning_rate": 4.10726677756859e-06, "loss": 0.1578, "step": 5544 }, { "epoch": 0.8984121840570317, "grad_norm": 0.9096860289573669, "learning_rate": 4.1069318023155405e-06, "loss": 0.1433, "step": 5545 }, { "epoch": 0.8985742060920285, "grad_norm": 0.8402926325798035, "learning_rate": 4.106596777894265e-06, "loss": 0.1256, "step": 5546 }, { "epoch": 0.8987362281270252, "grad_norm": 0.8750645518302917, "learning_rate": 4.106261704315017e-06, "loss": 0.1416, "step": 5547 }, { "epoch": 0.8988982501620221, "grad_norm": 0.7757678627967834, "learning_rate": 4.105926581588046e-06, "loss": 0.1162, "step": 5548 }, { "epoch": 0.8990602721970188, "grad_norm": 1.0043946504592896, "learning_rate": 4.10559140972361e-06, "loss": 0.1457, "step": 5549 }, { "epoch": 0.8992222942320156, "grad_norm": 0.8587821125984192, "learning_rate": 4.105256188731962e-06, "loss": 0.1499, "step": 5550 }, { "epoch": 0.8993843162670123, "grad_norm": 0.8745139241218567, "learning_rate": 4.104920918623359e-06, "loss": 0.1486, "step": 5551 }, { "epoch": 0.899546338302009, "grad_norm": 0.8056247234344482, "learning_rate": 4.104585599408059e-06, "loss": 0.1264, "step": 5552 }, { "epoch": 0.8997083603370059, "grad_norm": 0.9480019211769104, "learning_rate": 4.104250231096324e-06, "loss": 0.1563, "step": 5553 }, { "epoch": 0.8998703823720026, "grad_norm": 0.8790834546089172, "learning_rate": 4.1039148136984134e-06, "loss": 0.1446, "step": 5554 }, { "epoch": 0.9000324044069994, "grad_norm": 0.8392221927642822, "learning_rate": 4.1035793472245905e-06, "loss": 0.1363, "step": 5555 }, { "epoch": 0.9001944264419961, "grad_norm": 0.9883176684379578, "learning_rate": 4.103243831685121e-06, "loss": 0.1532, "step": 5556 }, { "epoch": 0.9003564484769929, "grad_norm": 0.9498316049575806, "learning_rate": 4.102908267090269e-06, "loss": 0.1664, "step": 5557 }, { "epoch": 0.9005184705119896, "grad_norm": 1.109565258026123, "learning_rate": 4.102572653450304e-06, "loss": 0.1679, "step": 5558 }, { "epoch": 0.9006804925469863, "grad_norm": 1.8112865686416626, "learning_rate": 4.102236990775493e-06, "loss": 0.1594, "step": 5559 }, { "epoch": 0.9008425145819832, "grad_norm": 0.8903408050537109, "learning_rate": 4.101901279076108e-06, "loss": 0.1408, "step": 5560 }, { "epoch": 0.9010045366169799, "grad_norm": 0.8736206293106079, "learning_rate": 4.101565518362421e-06, "loss": 0.1407, "step": 5561 }, { "epoch": 0.9011665586519767, "grad_norm": 0.8691003322601318, "learning_rate": 4.101229708644704e-06, "loss": 0.1545, "step": 5562 }, { "epoch": 0.9013285806869734, "grad_norm": 0.951155960559845, "learning_rate": 4.100893849933234e-06, "loss": 0.1373, "step": 5563 }, { "epoch": 0.9014906027219702, "grad_norm": 0.774764358997345, "learning_rate": 4.100557942238284e-06, "loss": 0.1234, "step": 5564 }, { "epoch": 0.901652624756967, "grad_norm": 0.8135146498680115, "learning_rate": 4.100221985570137e-06, "loss": 0.1403, "step": 5565 }, { "epoch": 0.9018146467919637, "grad_norm": 0.8030052781105042, "learning_rate": 4.099885979939068e-06, "loss": 0.1311, "step": 5566 }, { "epoch": 0.9019766688269605, "grad_norm": 0.7571940422058105, "learning_rate": 4.099549925355359e-06, "loss": 0.1109, "step": 5567 }, { "epoch": 0.9021386908619572, "grad_norm": 0.7888020277023315, "learning_rate": 4.099213821829295e-06, "loss": 0.1194, "step": 5568 }, { "epoch": 0.902300712896954, "grad_norm": 0.9889339208602905, "learning_rate": 4.098877669371156e-06, "loss": 0.1492, "step": 5569 }, { "epoch": 0.9024627349319507, "grad_norm": 0.8497052192687988, "learning_rate": 4.098541467991231e-06, "loss": 0.1194, "step": 5570 }, { "epoch": 0.9026247569669476, "grad_norm": 0.8576518297195435, "learning_rate": 4.098205217699806e-06, "loss": 0.1441, "step": 5571 }, { "epoch": 0.9027867790019443, "grad_norm": 0.8945521712303162, "learning_rate": 4.097868918507168e-06, "loss": 0.1584, "step": 5572 }, { "epoch": 0.902948801036941, "grad_norm": 0.9076288342475891, "learning_rate": 4.097532570423608e-06, "loss": 0.1563, "step": 5573 }, { "epoch": 0.9031108230719378, "grad_norm": 0.8376015424728394, "learning_rate": 4.097196173459417e-06, "loss": 0.1199, "step": 5574 }, { "epoch": 0.9032728451069345, "grad_norm": 0.9018917679786682, "learning_rate": 4.096859727624889e-06, "loss": 0.1483, "step": 5575 }, { "epoch": 0.9034348671419313, "grad_norm": 0.8175559639930725, "learning_rate": 4.0965232329303175e-06, "loss": 0.1333, "step": 5576 }, { "epoch": 0.903596889176928, "grad_norm": 0.8534868955612183, "learning_rate": 4.096186689385997e-06, "loss": 0.136, "step": 5577 }, { "epoch": 0.9037589112119249, "grad_norm": 0.8894876837730408, "learning_rate": 4.095850097002228e-06, "loss": 0.146, "step": 5578 }, { "epoch": 0.9039209332469216, "grad_norm": 0.852396547794342, "learning_rate": 4.095513455789307e-06, "loss": 0.1268, "step": 5579 }, { "epoch": 0.9040829552819183, "grad_norm": 0.9027949571609497, "learning_rate": 4.095176765757537e-06, "loss": 0.1505, "step": 5580 }, { "epoch": 0.9042449773169151, "grad_norm": 0.9837702512741089, "learning_rate": 4.094840026917217e-06, "loss": 0.154, "step": 5581 }, { "epoch": 0.9044069993519118, "grad_norm": 0.7658412456512451, "learning_rate": 4.094503239278652e-06, "loss": 0.1292, "step": 5582 }, { "epoch": 0.9045690213869086, "grad_norm": 0.8777854442596436, "learning_rate": 4.094166402852146e-06, "loss": 0.1327, "step": 5583 }, { "epoch": 0.9047310434219054, "grad_norm": 0.9733120799064636, "learning_rate": 4.0938295176480055e-06, "loss": 0.1326, "step": 5584 }, { "epoch": 0.9048930654569022, "grad_norm": 0.9805978536605835, "learning_rate": 4.09349258367654e-06, "loss": 0.1428, "step": 5585 }, { "epoch": 0.9050550874918989, "grad_norm": 0.8723875284194946, "learning_rate": 4.093155600948057e-06, "loss": 0.1496, "step": 5586 }, { "epoch": 0.9052171095268956, "grad_norm": 0.8509448766708374, "learning_rate": 4.092818569472869e-06, "loss": 0.1371, "step": 5587 }, { "epoch": 0.9053791315618924, "grad_norm": 1.0776581764221191, "learning_rate": 4.092481489261285e-06, "loss": 0.1783, "step": 5588 }, { "epoch": 0.9055411535968891, "grad_norm": 0.8364329934120178, "learning_rate": 4.0921443603236235e-06, "loss": 0.1295, "step": 5589 }, { "epoch": 0.905703175631886, "grad_norm": 0.8056396245956421, "learning_rate": 4.0918071826701966e-06, "loss": 0.1196, "step": 5590 }, { "epoch": 0.9058651976668827, "grad_norm": 0.8377984166145325, "learning_rate": 4.0914699563113214e-06, "loss": 0.1338, "step": 5591 }, { "epoch": 0.9060272197018795, "grad_norm": 0.9143077731132507, "learning_rate": 4.091132681257317e-06, "loss": 0.1505, "step": 5592 }, { "epoch": 0.9061892417368762, "grad_norm": 0.874538242816925, "learning_rate": 4.0907953575185035e-06, "loss": 0.1465, "step": 5593 }, { "epoch": 0.906351263771873, "grad_norm": 0.9015555381774902, "learning_rate": 4.090457985105202e-06, "loss": 0.1463, "step": 5594 }, { "epoch": 0.9065132858068697, "grad_norm": 0.8938778638839722, "learning_rate": 4.090120564027734e-06, "loss": 0.167, "step": 5595 }, { "epoch": 0.9066753078418665, "grad_norm": 0.7953081130981445, "learning_rate": 4.089783094296425e-06, "loss": 0.1215, "step": 5596 }, { "epoch": 0.9068373298768633, "grad_norm": 0.9266294836997986, "learning_rate": 4.0894455759216015e-06, "loss": 0.1584, "step": 5597 }, { "epoch": 0.90699935191186, "grad_norm": 0.9055389165878296, "learning_rate": 4.089108008913589e-06, "loss": 0.1614, "step": 5598 }, { "epoch": 0.9071613739468568, "grad_norm": 0.9142413139343262, "learning_rate": 4.088770393282717e-06, "loss": 0.1326, "step": 5599 }, { "epoch": 0.9073233959818535, "grad_norm": 0.922514796257019, "learning_rate": 4.088432729039316e-06, "loss": 0.1363, "step": 5600 }, { "epoch": 0.9074854180168503, "grad_norm": 0.8099052906036377, "learning_rate": 4.088095016193717e-06, "loss": 0.1321, "step": 5601 }, { "epoch": 0.907647440051847, "grad_norm": 0.7604457139968872, "learning_rate": 4.087757254756254e-06, "loss": 0.1283, "step": 5602 }, { "epoch": 0.9078094620868438, "grad_norm": 0.8297195434570312, "learning_rate": 4.087419444737261e-06, "loss": 0.1483, "step": 5603 }, { "epoch": 0.9079714841218406, "grad_norm": 0.7264284491539001, "learning_rate": 4.087081586147075e-06, "loss": 0.1193, "step": 5604 }, { "epoch": 0.9081335061568373, "grad_norm": 0.7358347177505493, "learning_rate": 4.086743678996032e-06, "loss": 0.1254, "step": 5605 }, { "epoch": 0.9082955281918341, "grad_norm": 0.7477496862411499, "learning_rate": 4.086405723294474e-06, "loss": 0.1247, "step": 5606 }, { "epoch": 0.9084575502268308, "grad_norm": 0.8757005929946899, "learning_rate": 4.086067719052739e-06, "loss": 0.1475, "step": 5607 }, { "epoch": 0.9086195722618277, "grad_norm": 0.7548813223838806, "learning_rate": 4.0857296662811696e-06, "loss": 0.1146, "step": 5608 }, { "epoch": 0.9087815942968244, "grad_norm": 0.9473977088928223, "learning_rate": 4.08539156499011e-06, "loss": 0.134, "step": 5609 }, { "epoch": 0.9089436163318211, "grad_norm": 0.8978647589683533, "learning_rate": 4.085053415189905e-06, "loss": 0.151, "step": 5610 }, { "epoch": 0.9091056383668179, "grad_norm": 0.8081299662590027, "learning_rate": 4.084715216890902e-06, "loss": 0.1178, "step": 5611 }, { "epoch": 0.9092676604018146, "grad_norm": 0.8348207473754883, "learning_rate": 4.084376970103448e-06, "loss": 0.1308, "step": 5612 }, { "epoch": 0.9094296824368114, "grad_norm": 1.0297025442123413, "learning_rate": 4.0840386748378914e-06, "loss": 0.1464, "step": 5613 }, { "epoch": 0.9095917044718081, "grad_norm": 0.7381079196929932, "learning_rate": 4.0837003311045865e-06, "loss": 0.1112, "step": 5614 }, { "epoch": 0.909753726506805, "grad_norm": 0.8947216868400574, "learning_rate": 4.083361938913884e-06, "loss": 0.1424, "step": 5615 }, { "epoch": 0.9099157485418017, "grad_norm": 0.9418050050735474, "learning_rate": 4.083023498276136e-06, "loss": 0.1445, "step": 5616 }, { "epoch": 0.9100777705767984, "grad_norm": 0.8582934141159058, "learning_rate": 4.0826850092017015e-06, "loss": 0.1446, "step": 5617 }, { "epoch": 0.9102397926117952, "grad_norm": 0.8298516273498535, "learning_rate": 4.082346471700935e-06, "loss": 0.1382, "step": 5618 }, { "epoch": 0.9104018146467919, "grad_norm": 0.8987380862236023, "learning_rate": 4.082007885784196e-06, "loss": 0.1405, "step": 5619 }, { "epoch": 0.9105638366817888, "grad_norm": 0.7453068494796753, "learning_rate": 4.081669251461844e-06, "loss": 0.1175, "step": 5620 }, { "epoch": 0.9107258587167855, "grad_norm": 0.9048896431922913, "learning_rate": 4.08133056874424e-06, "loss": 0.1491, "step": 5621 }, { "epoch": 0.9108878807517823, "grad_norm": 0.9545086026191711, "learning_rate": 4.080991837641748e-06, "loss": 0.1519, "step": 5622 }, { "epoch": 0.911049902786779, "grad_norm": 0.8169971704483032, "learning_rate": 4.0806530581647305e-06, "loss": 0.1244, "step": 5623 }, { "epoch": 0.9112119248217757, "grad_norm": 0.7835274338722229, "learning_rate": 4.080314230323556e-06, "loss": 0.1404, "step": 5624 }, { "epoch": 0.9113739468567725, "grad_norm": 0.9390804171562195, "learning_rate": 4.07997535412859e-06, "loss": 0.1438, "step": 5625 }, { "epoch": 0.9115359688917692, "grad_norm": 0.7569699883460999, "learning_rate": 4.079636429590201e-06, "loss": 0.1186, "step": 5626 }, { "epoch": 0.9116979909267661, "grad_norm": 0.9515724778175354, "learning_rate": 4.07929745671876e-06, "loss": 0.1582, "step": 5627 }, { "epoch": 0.9118600129617628, "grad_norm": 0.892797589302063, "learning_rate": 4.07895843552464e-06, "loss": 0.1337, "step": 5628 }, { "epoch": 0.9120220349967596, "grad_norm": 0.9236721396446228, "learning_rate": 4.078619366018212e-06, "loss": 0.1684, "step": 5629 }, { "epoch": 0.9121840570317563, "grad_norm": 0.8024287223815918, "learning_rate": 4.078280248209851e-06, "loss": 0.1236, "step": 5630 }, { "epoch": 0.912346079066753, "grad_norm": 0.8392508029937744, "learning_rate": 4.077941082109934e-06, "loss": 0.1306, "step": 5631 }, { "epoch": 0.9125081011017498, "grad_norm": 0.9787663221359253, "learning_rate": 4.077601867728839e-06, "loss": 0.1641, "step": 5632 }, { "epoch": 0.9126701231367466, "grad_norm": 0.7922909259796143, "learning_rate": 4.077262605076943e-06, "loss": 0.1286, "step": 5633 }, { "epoch": 0.9128321451717434, "grad_norm": 0.7346786260604858, "learning_rate": 4.07692329416463e-06, "loss": 0.1231, "step": 5634 }, { "epoch": 0.9129941672067401, "grad_norm": 0.8033357262611389, "learning_rate": 4.07658393500228e-06, "loss": 0.1274, "step": 5635 }, { "epoch": 0.9131561892417369, "grad_norm": 0.8313119411468506, "learning_rate": 4.0762445276002765e-06, "loss": 0.1404, "step": 5636 }, { "epoch": 0.9133182112767336, "grad_norm": 0.7644381523132324, "learning_rate": 4.075905071969005e-06, "loss": 0.1241, "step": 5637 }, { "epoch": 0.9134802333117304, "grad_norm": 0.8910484313964844, "learning_rate": 4.075565568118852e-06, "loss": 0.1439, "step": 5638 }, { "epoch": 0.9136422553467272, "grad_norm": 0.8554601669311523, "learning_rate": 4.075226016060205e-06, "loss": 0.1388, "step": 5639 }, { "epoch": 0.9138042773817239, "grad_norm": 0.8526726365089417, "learning_rate": 4.074886415803454e-06, "loss": 0.1508, "step": 5640 }, { "epoch": 0.9139662994167207, "grad_norm": 0.8573095798492432, "learning_rate": 4.07454676735899e-06, "loss": 0.1487, "step": 5641 }, { "epoch": 0.9141283214517174, "grad_norm": 0.7731853723526001, "learning_rate": 4.074207070737205e-06, "loss": 0.1233, "step": 5642 }, { "epoch": 0.9142903434867142, "grad_norm": 0.9754795432090759, "learning_rate": 4.073867325948494e-06, "loss": 0.1559, "step": 5643 }, { "epoch": 0.9144523655217109, "grad_norm": 0.8023756146430969, "learning_rate": 4.07352753300325e-06, "loss": 0.1269, "step": 5644 }, { "epoch": 0.9146143875567078, "grad_norm": 0.9316949248313904, "learning_rate": 4.073187691911873e-06, "loss": 0.146, "step": 5645 }, { "epoch": 0.9147764095917045, "grad_norm": 0.8113452196121216, "learning_rate": 4.072847802684758e-06, "loss": 0.1353, "step": 5646 }, { "epoch": 0.9149384316267012, "grad_norm": 0.8404650092124939, "learning_rate": 4.072507865332308e-06, "loss": 0.1431, "step": 5647 }, { "epoch": 0.915100453661698, "grad_norm": 0.9183782935142517, "learning_rate": 4.072167879864922e-06, "loss": 0.1453, "step": 5648 }, { "epoch": 0.9152624756966947, "grad_norm": 0.8436623811721802, "learning_rate": 4.071827846293004e-06, "loss": 0.1267, "step": 5649 }, { "epoch": 0.9154244977316915, "grad_norm": 0.9088074564933777, "learning_rate": 4.071487764626957e-06, "loss": 0.1451, "step": 5650 }, { "epoch": 0.9155865197666883, "grad_norm": 0.8326597213745117, "learning_rate": 4.071147634877187e-06, "loss": 0.1361, "step": 5651 }, { "epoch": 0.9157485418016851, "grad_norm": 0.850050687789917, "learning_rate": 4.070807457054102e-06, "loss": 0.1234, "step": 5652 }, { "epoch": 0.9159105638366818, "grad_norm": 0.8121395707130432, "learning_rate": 4.07046723116811e-06, "loss": 0.1254, "step": 5653 }, { "epoch": 0.9160725858716785, "grad_norm": 0.8910734057426453, "learning_rate": 4.070126957229622e-06, "loss": 0.1436, "step": 5654 }, { "epoch": 0.9162346079066753, "grad_norm": 0.8616095781326294, "learning_rate": 4.0697866352490475e-06, "loss": 0.1444, "step": 5655 }, { "epoch": 0.916396629941672, "grad_norm": 0.9634042978286743, "learning_rate": 4.069446265236801e-06, "loss": 0.1737, "step": 5656 }, { "epoch": 0.9165586519766689, "grad_norm": 0.8356743454933167, "learning_rate": 4.0691058472032975e-06, "loss": 0.1345, "step": 5657 }, { "epoch": 0.9167206740116656, "grad_norm": 0.8691932559013367, "learning_rate": 4.068765381158951e-06, "loss": 0.1398, "step": 5658 }, { "epoch": 0.9168826960466624, "grad_norm": 0.9177868366241455, "learning_rate": 4.068424867114181e-06, "loss": 0.1573, "step": 5659 }, { "epoch": 0.9170447180816591, "grad_norm": 0.802828311920166, "learning_rate": 4.068084305079406e-06, "loss": 0.1188, "step": 5660 }, { "epoch": 0.9172067401166558, "grad_norm": 0.920111358165741, "learning_rate": 4.067743695065045e-06, "loss": 0.1502, "step": 5661 }, { "epoch": 0.9173687621516526, "grad_norm": 0.872612714767456, "learning_rate": 4.067403037081522e-06, "loss": 0.1551, "step": 5662 }, { "epoch": 0.9175307841866494, "grad_norm": 0.7936202883720398, "learning_rate": 4.0670623311392575e-06, "loss": 0.1376, "step": 5663 }, { "epoch": 0.9176928062216462, "grad_norm": 0.8577109575271606, "learning_rate": 4.06672157724868e-06, "loss": 0.1404, "step": 5664 }, { "epoch": 0.9178548282566429, "grad_norm": 0.7876363396644592, "learning_rate": 4.066380775420211e-06, "loss": 0.1236, "step": 5665 }, { "epoch": 0.9180168502916397, "grad_norm": 0.9405772089958191, "learning_rate": 4.066039925664283e-06, "loss": 0.1358, "step": 5666 }, { "epoch": 0.9181788723266364, "grad_norm": 0.8037310242652893, "learning_rate": 4.065699027991322e-06, "loss": 0.1344, "step": 5667 }, { "epoch": 0.9183408943616331, "grad_norm": 0.9089890718460083, "learning_rate": 4.06535808241176e-06, "loss": 0.1502, "step": 5668 }, { "epoch": 0.91850291639663, "grad_norm": 0.7645153999328613, "learning_rate": 4.065017088936028e-06, "loss": 0.1121, "step": 5669 }, { "epoch": 0.9186649384316267, "grad_norm": 0.8740200400352478, "learning_rate": 4.064676047574561e-06, "loss": 0.1576, "step": 5670 }, { "epoch": 0.9188269604666235, "grad_norm": 0.8704571723937988, "learning_rate": 4.064334958337794e-06, "loss": 0.1325, "step": 5671 }, { "epoch": 0.9189889825016202, "grad_norm": 0.9229740500450134, "learning_rate": 4.063993821236162e-06, "loss": 0.1428, "step": 5672 }, { "epoch": 0.919151004536617, "grad_norm": 1.0061372518539429, "learning_rate": 4.063652636280105e-06, "loss": 0.1629, "step": 5673 }, { "epoch": 0.9193130265716137, "grad_norm": 0.8159903883934021, "learning_rate": 4.063311403480061e-06, "loss": 0.1316, "step": 5674 }, { "epoch": 0.9194750486066104, "grad_norm": 0.91038578748703, "learning_rate": 4.06297012284647e-06, "loss": 0.1555, "step": 5675 }, { "epoch": 0.9196370706416073, "grad_norm": 0.9407716989517212, "learning_rate": 4.0626287943897765e-06, "loss": 0.1476, "step": 5676 }, { "epoch": 0.919799092676604, "grad_norm": 0.8589774370193481, "learning_rate": 4.062287418120423e-06, "loss": 0.1333, "step": 5677 }, { "epoch": 0.9199611147116008, "grad_norm": 0.8598899841308594, "learning_rate": 4.061945994048855e-06, "loss": 0.1264, "step": 5678 }, { "epoch": 0.9201231367465975, "grad_norm": 0.8190400004386902, "learning_rate": 4.06160452218552e-06, "loss": 0.1291, "step": 5679 }, { "epoch": 0.9202851587815943, "grad_norm": 0.9325578808784485, "learning_rate": 4.061263002540865e-06, "loss": 0.15, "step": 5680 }, { "epoch": 0.920447180816591, "grad_norm": 0.8312631845474243, "learning_rate": 4.060921435125341e-06, "loss": 0.1357, "step": 5681 }, { "epoch": 0.9206092028515879, "grad_norm": 0.8420397043228149, "learning_rate": 4.060579819949398e-06, "loss": 0.1447, "step": 5682 }, { "epoch": 0.9207712248865846, "grad_norm": 0.7866024971008301, "learning_rate": 4.06023815702349e-06, "loss": 0.1397, "step": 5683 }, { "epoch": 0.9209332469215813, "grad_norm": 0.8895429372787476, "learning_rate": 4.059896446358068e-06, "loss": 0.1463, "step": 5684 }, { "epoch": 0.9210952689565781, "grad_norm": 0.7228549122810364, "learning_rate": 4.059554687963591e-06, "loss": 0.1146, "step": 5685 }, { "epoch": 0.9212572909915748, "grad_norm": 0.7941901087760925, "learning_rate": 4.059212881850515e-06, "loss": 0.1317, "step": 5686 }, { "epoch": 0.9214193130265717, "grad_norm": 0.7205981016159058, "learning_rate": 4.058871028029296e-06, "loss": 0.1143, "step": 5687 }, { "epoch": 0.9215813350615684, "grad_norm": 0.934319257736206, "learning_rate": 4.0585291265103985e-06, "loss": 0.1565, "step": 5688 }, { "epoch": 0.9217433570965652, "grad_norm": 0.8551682233810425, "learning_rate": 4.05818717730428e-06, "loss": 0.141, "step": 5689 }, { "epoch": 0.9219053791315619, "grad_norm": 0.9247624278068542, "learning_rate": 4.057845180421405e-06, "loss": 0.1454, "step": 5690 }, { "epoch": 0.9220674011665586, "grad_norm": 0.8106127381324768, "learning_rate": 4.057503135872237e-06, "loss": 0.1259, "step": 5691 }, { "epoch": 0.9222294232015554, "grad_norm": 0.8142223954200745, "learning_rate": 4.057161043667243e-06, "loss": 0.1383, "step": 5692 }, { "epoch": 0.9223914452365521, "grad_norm": 0.7778276801109314, "learning_rate": 4.056818903816888e-06, "loss": 0.1258, "step": 5693 }, { "epoch": 0.922553467271549, "grad_norm": 0.9407106041908264, "learning_rate": 4.056476716331643e-06, "loss": 0.1488, "step": 5694 }, { "epoch": 0.9227154893065457, "grad_norm": 0.8241350054740906, "learning_rate": 4.056134481221977e-06, "loss": 0.1358, "step": 5695 }, { "epoch": 0.9228775113415425, "grad_norm": 0.9014641046524048, "learning_rate": 4.05579219849836e-06, "loss": 0.1459, "step": 5696 }, { "epoch": 0.9230395333765392, "grad_norm": 0.7756420969963074, "learning_rate": 4.055449868171269e-06, "loss": 0.1265, "step": 5697 }, { "epoch": 0.9232015554115359, "grad_norm": 0.9437333345413208, "learning_rate": 4.055107490251175e-06, "loss": 0.1625, "step": 5698 }, { "epoch": 0.9233635774465327, "grad_norm": 0.9008864164352417, "learning_rate": 4.054765064748554e-06, "loss": 0.1386, "step": 5699 }, { "epoch": 0.9235255994815295, "grad_norm": 0.8397160768508911, "learning_rate": 4.054422591673887e-06, "loss": 0.1367, "step": 5700 }, { "epoch": 0.9236876215165263, "grad_norm": 0.8157175183296204, "learning_rate": 4.054080071037649e-06, "loss": 0.1287, "step": 5701 }, { "epoch": 0.923849643551523, "grad_norm": 0.8612614870071411, "learning_rate": 4.0537375028503225e-06, "loss": 0.1411, "step": 5702 }, { "epoch": 0.9240116655865198, "grad_norm": 0.933879554271698, "learning_rate": 4.053394887122387e-06, "loss": 0.1231, "step": 5703 }, { "epoch": 0.9241736876215165, "grad_norm": 0.8254159688949585, "learning_rate": 4.053052223864328e-06, "loss": 0.1382, "step": 5704 }, { "epoch": 0.9243357096565132, "grad_norm": 0.9842225313186646, "learning_rate": 4.052709513086629e-06, "loss": 0.161, "step": 5705 }, { "epoch": 0.9244977316915101, "grad_norm": 0.8593588471412659, "learning_rate": 4.052366754799776e-06, "loss": 0.1222, "step": 5706 }, { "epoch": 0.9246597537265068, "grad_norm": 0.7498965859413147, "learning_rate": 4.052023949014257e-06, "loss": 0.1276, "step": 5707 }, { "epoch": 0.9248217757615036, "grad_norm": 0.9924911856651306, "learning_rate": 4.051681095740561e-06, "loss": 0.1548, "step": 5708 }, { "epoch": 0.9249837977965003, "grad_norm": 0.8889843821525574, "learning_rate": 4.051338194989179e-06, "loss": 0.1553, "step": 5709 }, { "epoch": 0.9251458198314971, "grad_norm": 0.8717356324195862, "learning_rate": 4.050995246770602e-06, "loss": 0.146, "step": 5710 }, { "epoch": 0.9253078418664938, "grad_norm": 0.8971138596534729, "learning_rate": 4.050652251095324e-06, "loss": 0.1565, "step": 5711 }, { "epoch": 0.9254698639014906, "grad_norm": 0.8394131660461426, "learning_rate": 4.05030920797384e-06, "loss": 0.1485, "step": 5712 }, { "epoch": 0.9256318859364874, "grad_norm": 0.8194043636322021, "learning_rate": 4.049966117416645e-06, "loss": 0.1376, "step": 5713 }, { "epoch": 0.9257939079714841, "grad_norm": 0.9140671491622925, "learning_rate": 4.049622979434239e-06, "loss": 0.1417, "step": 5714 }, { "epoch": 0.9259559300064809, "grad_norm": 0.7816025018692017, "learning_rate": 4.049279794037118e-06, "loss": 0.1382, "step": 5715 }, { "epoch": 0.9261179520414776, "grad_norm": 0.9249107241630554, "learning_rate": 4.0489365612357854e-06, "loss": 0.1547, "step": 5716 }, { "epoch": 0.9262799740764744, "grad_norm": 0.8584994673728943, "learning_rate": 4.048593281040743e-06, "loss": 0.129, "step": 5717 }, { "epoch": 0.9264419961114712, "grad_norm": 0.8270529508590698, "learning_rate": 4.0482499534624934e-06, "loss": 0.1382, "step": 5718 }, { "epoch": 0.9266040181464679, "grad_norm": 0.8987339735031128, "learning_rate": 4.047906578511542e-06, "loss": 0.1495, "step": 5719 }, { "epoch": 0.9267660401814647, "grad_norm": 0.7987256050109863, "learning_rate": 4.047563156198394e-06, "loss": 0.1449, "step": 5720 }, { "epoch": 0.9269280622164614, "grad_norm": 0.746697723865509, "learning_rate": 4.047219686533559e-06, "loss": 0.1149, "step": 5721 }, { "epoch": 0.9270900842514582, "grad_norm": 0.860031008720398, "learning_rate": 4.046876169527547e-06, "loss": 0.1473, "step": 5722 }, { "epoch": 0.9272521062864549, "grad_norm": 0.837323784828186, "learning_rate": 4.046532605190866e-06, "loss": 0.1438, "step": 5723 }, { "epoch": 0.9274141283214518, "grad_norm": 0.8923266530036926, "learning_rate": 4.04618899353403e-06, "loss": 0.1301, "step": 5724 }, { "epoch": 0.9275761503564485, "grad_norm": 0.9036858677864075, "learning_rate": 4.045845334567553e-06, "loss": 0.14, "step": 5725 }, { "epoch": 0.9277381723914452, "grad_norm": 0.8130469918251038, "learning_rate": 4.04550162830195e-06, "loss": 0.1394, "step": 5726 }, { "epoch": 0.927900194426442, "grad_norm": 0.9334059953689575, "learning_rate": 4.045157874747737e-06, "loss": 0.1509, "step": 5727 }, { "epoch": 0.9280622164614387, "grad_norm": 0.8468704223632812, "learning_rate": 4.044814073915432e-06, "loss": 0.1319, "step": 5728 }, { "epoch": 0.9282242384964355, "grad_norm": 0.875339150428772, "learning_rate": 4.0444702258155545e-06, "loss": 0.1525, "step": 5729 }, { "epoch": 0.9283862605314323, "grad_norm": 1.0649487972259521, "learning_rate": 4.044126330458626e-06, "loss": 0.1664, "step": 5730 }, { "epoch": 0.9285482825664291, "grad_norm": 0.8550140261650085, "learning_rate": 4.043782387855169e-06, "loss": 0.1398, "step": 5731 }, { "epoch": 0.9287103046014258, "grad_norm": 0.7701947689056396, "learning_rate": 4.0434383980157055e-06, "loss": 0.1186, "step": 5732 }, { "epoch": 0.9288723266364226, "grad_norm": 0.7681037783622742, "learning_rate": 4.043094360950763e-06, "loss": 0.1364, "step": 5733 }, { "epoch": 0.9290343486714193, "grad_norm": 0.8084831833839417, "learning_rate": 4.042750276670867e-06, "loss": 0.1258, "step": 5734 }, { "epoch": 0.929196370706416, "grad_norm": 0.824355959892273, "learning_rate": 4.042406145186546e-06, "loss": 0.1295, "step": 5735 }, { "epoch": 0.9293583927414129, "grad_norm": 0.9090822339057922, "learning_rate": 4.04206196650833e-06, "loss": 0.1374, "step": 5736 }, { "epoch": 0.9295204147764096, "grad_norm": 0.7409492135047913, "learning_rate": 4.041717740646749e-06, "loss": 0.1126, "step": 5737 }, { "epoch": 0.9296824368114064, "grad_norm": 0.8566955327987671, "learning_rate": 4.041373467612337e-06, "loss": 0.1297, "step": 5738 }, { "epoch": 0.9298444588464031, "grad_norm": 0.8296953439712524, "learning_rate": 4.0410291474156246e-06, "loss": 0.1296, "step": 5739 }, { "epoch": 0.9300064808813999, "grad_norm": 0.8668819665908813, "learning_rate": 4.0406847800671515e-06, "loss": 0.1438, "step": 5740 }, { "epoch": 0.9301685029163966, "grad_norm": 0.8064781427383423, "learning_rate": 4.040340365577452e-06, "loss": 0.1289, "step": 5741 }, { "epoch": 0.9303305249513933, "grad_norm": 0.9188829064369202, "learning_rate": 4.0399959039570646e-06, "loss": 0.1417, "step": 5742 }, { "epoch": 0.9304925469863902, "grad_norm": 0.9218767881393433, "learning_rate": 4.039651395216529e-06, "loss": 0.1355, "step": 5743 }, { "epoch": 0.9306545690213869, "grad_norm": 1.0256109237670898, "learning_rate": 4.039306839366387e-06, "loss": 0.1755, "step": 5744 }, { "epoch": 0.9308165910563837, "grad_norm": 0.7666441202163696, "learning_rate": 4.038962236417181e-06, "loss": 0.1259, "step": 5745 }, { "epoch": 0.9309786130913804, "grad_norm": 0.7907612323760986, "learning_rate": 4.038617586379455e-06, "loss": 0.1331, "step": 5746 }, { "epoch": 0.9311406351263772, "grad_norm": 0.8179818391799927, "learning_rate": 4.0382728892637535e-06, "loss": 0.1369, "step": 5747 }, { "epoch": 0.931302657161374, "grad_norm": 0.8541617393493652, "learning_rate": 4.0379281450806255e-06, "loss": 0.1495, "step": 5748 }, { "epoch": 0.9314646791963707, "grad_norm": 0.7930795550346375, "learning_rate": 4.037583353840616e-06, "loss": 0.1259, "step": 5749 }, { "epoch": 0.9316267012313675, "grad_norm": 0.8774799108505249, "learning_rate": 4.037238515554278e-06, "loss": 0.1422, "step": 5750 }, { "epoch": 0.9317887232663642, "grad_norm": 0.7685874700546265, "learning_rate": 4.03689363023216e-06, "loss": 0.137, "step": 5751 }, { "epoch": 0.931950745301361, "grad_norm": 0.8230496644973755, "learning_rate": 4.0365486978848176e-06, "loss": 0.1345, "step": 5752 }, { "epoch": 0.9321127673363577, "grad_norm": 0.7255528569221497, "learning_rate": 4.0362037185228045e-06, "loss": 0.1223, "step": 5753 }, { "epoch": 0.9322747893713546, "grad_norm": 0.8288103938102722, "learning_rate": 4.035858692156673e-06, "loss": 0.1412, "step": 5754 }, { "epoch": 0.9324368114063513, "grad_norm": 0.7872039675712585, "learning_rate": 4.035513618796983e-06, "loss": 0.1254, "step": 5755 }, { "epoch": 0.932598833441348, "grad_norm": 0.7703442573547363, "learning_rate": 4.035168498454292e-06, "loss": 0.1177, "step": 5756 }, { "epoch": 0.9327608554763448, "grad_norm": 0.7858262658119202, "learning_rate": 4.034823331139161e-06, "loss": 0.123, "step": 5757 }, { "epoch": 0.9329228775113415, "grad_norm": 0.9467945694923401, "learning_rate": 4.034478116862149e-06, "loss": 0.1372, "step": 5758 }, { "epoch": 0.9330848995463383, "grad_norm": 0.7315438985824585, "learning_rate": 4.03413285563382e-06, "loss": 0.1219, "step": 5759 }, { "epoch": 0.933246921581335, "grad_norm": 0.8702850937843323, "learning_rate": 4.033787547464738e-06, "loss": 0.1397, "step": 5760 }, { "epoch": 0.9334089436163319, "grad_norm": 0.9242250919342041, "learning_rate": 4.03344219236547e-06, "loss": 0.1486, "step": 5761 }, { "epoch": 0.9335709656513286, "grad_norm": 0.8591980934143066, "learning_rate": 4.033096790346581e-06, "loss": 0.1383, "step": 5762 }, { "epoch": 0.9337329876863253, "grad_norm": 0.8047747611999512, "learning_rate": 4.03275134141864e-06, "loss": 0.127, "step": 5763 }, { "epoch": 0.9338950097213221, "grad_norm": 0.8145176768302917, "learning_rate": 4.032405845592218e-06, "loss": 0.1187, "step": 5764 }, { "epoch": 0.9340570317563188, "grad_norm": 0.8953231573104858, "learning_rate": 4.0320603028778845e-06, "loss": 0.1398, "step": 5765 }, { "epoch": 0.9342190537913156, "grad_norm": 0.8610212206840515, "learning_rate": 4.0317147132862135e-06, "loss": 0.1355, "step": 5766 }, { "epoch": 0.9343810758263124, "grad_norm": 0.7924541234970093, "learning_rate": 4.031369076827779e-06, "loss": 0.1243, "step": 5767 }, { "epoch": 0.9345430978613092, "grad_norm": 0.8468953371047974, "learning_rate": 4.031023393513157e-06, "loss": 0.1312, "step": 5768 }, { "epoch": 0.9347051198963059, "grad_norm": 0.8163899779319763, "learning_rate": 4.030677663352924e-06, "loss": 0.1254, "step": 5769 }, { "epoch": 0.9348671419313026, "grad_norm": 0.8453715443611145, "learning_rate": 4.030331886357659e-06, "loss": 0.1306, "step": 5770 }, { "epoch": 0.9350291639662994, "grad_norm": 0.8978089094161987, "learning_rate": 4.0299860625379405e-06, "loss": 0.1512, "step": 5771 }, { "epoch": 0.9351911860012961, "grad_norm": 0.8564348816871643, "learning_rate": 4.029640191904352e-06, "loss": 0.147, "step": 5772 }, { "epoch": 0.935353208036293, "grad_norm": 0.771431028842926, "learning_rate": 4.029294274467475e-06, "loss": 0.1263, "step": 5773 }, { "epoch": 0.9355152300712897, "grad_norm": 0.7943999767303467, "learning_rate": 4.028948310237893e-06, "loss": 0.1132, "step": 5774 }, { "epoch": 0.9356772521062865, "grad_norm": 0.8058924078941345, "learning_rate": 4.028602299226194e-06, "loss": 0.1315, "step": 5775 }, { "epoch": 0.9358392741412832, "grad_norm": 0.9111430048942566, "learning_rate": 4.0282562414429635e-06, "loss": 0.1582, "step": 5776 }, { "epoch": 0.93600129617628, "grad_norm": 0.8768683075904846, "learning_rate": 4.02791013689879e-06, "loss": 0.147, "step": 5777 }, { "epoch": 0.9361633182112767, "grad_norm": 0.7665877342224121, "learning_rate": 4.027563985604264e-06, "loss": 0.1359, "step": 5778 }, { "epoch": 0.9363253402462735, "grad_norm": 0.9200119972229004, "learning_rate": 4.027217787569977e-06, "loss": 0.1459, "step": 5779 }, { "epoch": 0.9364873622812703, "grad_norm": 0.9305282831192017, "learning_rate": 4.026871542806521e-06, "loss": 0.1408, "step": 5780 }, { "epoch": 0.936649384316267, "grad_norm": 0.7743881344795227, "learning_rate": 4.026525251324491e-06, "loss": 0.1419, "step": 5781 }, { "epoch": 0.9368114063512638, "grad_norm": 0.8831008672714233, "learning_rate": 4.026178913134482e-06, "loss": 0.1248, "step": 5782 }, { "epoch": 0.9369734283862605, "grad_norm": 0.9593657851219177, "learning_rate": 4.025832528247092e-06, "loss": 0.1668, "step": 5783 }, { "epoch": 0.9371354504212573, "grad_norm": 0.8962870836257935, "learning_rate": 4.02548609667292e-06, "loss": 0.1516, "step": 5784 }, { "epoch": 0.937297472456254, "grad_norm": 0.8342962265014648, "learning_rate": 4.025139618422563e-06, "loss": 0.1261, "step": 5785 }, { "epoch": 0.9374594944912508, "grad_norm": 0.8116139769554138, "learning_rate": 4.024793093506626e-06, "loss": 0.1366, "step": 5786 }, { "epoch": 0.9376215165262476, "grad_norm": 0.8429117798805237, "learning_rate": 4.024446521935709e-06, "loss": 0.1453, "step": 5787 }, { "epoch": 0.9377835385612443, "grad_norm": 0.8313448429107666, "learning_rate": 4.024099903720419e-06, "loss": 0.1356, "step": 5788 }, { "epoch": 0.9379455605962411, "grad_norm": 0.8871910572052002, "learning_rate": 4.023753238871359e-06, "loss": 0.1517, "step": 5789 }, { "epoch": 0.9381075826312378, "grad_norm": 0.7346284985542297, "learning_rate": 4.023406527399137e-06, "loss": 0.1137, "step": 5790 }, { "epoch": 0.9382696046662347, "grad_norm": 0.7601230144500732, "learning_rate": 4.023059769314363e-06, "loss": 0.127, "step": 5791 }, { "epoch": 0.9384316267012314, "grad_norm": 0.7658340930938721, "learning_rate": 4.022712964627645e-06, "loss": 0.1183, "step": 5792 }, { "epoch": 0.9385936487362281, "grad_norm": 0.7574445605278015, "learning_rate": 4.022366113349596e-06, "loss": 0.1352, "step": 5793 }, { "epoch": 0.9387556707712249, "grad_norm": 0.9390842914581299, "learning_rate": 4.022019215490827e-06, "loss": 0.1511, "step": 5794 }, { "epoch": 0.9389176928062216, "grad_norm": 0.8839474320411682, "learning_rate": 4.021672271061955e-06, "loss": 0.1477, "step": 5795 }, { "epoch": 0.9390797148412184, "grad_norm": 0.7684565186500549, "learning_rate": 4.021325280073592e-06, "loss": 0.1171, "step": 5796 }, { "epoch": 0.9392417368762151, "grad_norm": 0.9148947596549988, "learning_rate": 4.020978242536357e-06, "loss": 0.1434, "step": 5797 }, { "epoch": 0.939403758911212, "grad_norm": 0.8702596426010132, "learning_rate": 4.0206311584608705e-06, "loss": 0.1391, "step": 5798 }, { "epoch": 0.9395657809462087, "grad_norm": 0.8113718628883362, "learning_rate": 4.020284027857748e-06, "loss": 0.1407, "step": 5799 }, { "epoch": 0.9397278029812054, "grad_norm": 1.005882978439331, "learning_rate": 4.019936850737615e-06, "loss": 0.1302, "step": 5800 }, { "epoch": 0.9398898250162022, "grad_norm": 0.8756170272827148, "learning_rate": 4.019589627111092e-06, "loss": 0.1336, "step": 5801 }, { "epoch": 0.9400518470511989, "grad_norm": 0.8532119393348694, "learning_rate": 4.019242356988803e-06, "loss": 0.1408, "step": 5802 }, { "epoch": 0.9402138690861958, "grad_norm": 1.0048043727874756, "learning_rate": 4.018895040381375e-06, "loss": 0.1587, "step": 5803 }, { "epoch": 0.9403758911211925, "grad_norm": 0.9122523069381714, "learning_rate": 4.018547677299434e-06, "loss": 0.1292, "step": 5804 }, { "epoch": 0.9405379131561893, "grad_norm": 0.8948111534118652, "learning_rate": 4.018200267753609e-06, "loss": 0.1612, "step": 5805 }, { "epoch": 0.940699935191186, "grad_norm": 0.7902446985244751, "learning_rate": 4.01785281175453e-06, "loss": 0.1225, "step": 5806 }, { "epoch": 0.9408619572261827, "grad_norm": 0.9093374013900757, "learning_rate": 4.017505309312829e-06, "loss": 0.1477, "step": 5807 }, { "epoch": 0.9410239792611795, "grad_norm": 0.9980859756469727, "learning_rate": 4.017157760439136e-06, "loss": 0.1574, "step": 5808 }, { "epoch": 0.9411860012961762, "grad_norm": 0.8467405438423157, "learning_rate": 4.0168101651440885e-06, "loss": 0.139, "step": 5809 }, { "epoch": 0.9413480233311731, "grad_norm": 0.7334152460098267, "learning_rate": 4.01646252343832e-06, "loss": 0.1238, "step": 5810 }, { "epoch": 0.9415100453661698, "grad_norm": 0.8250093460083008, "learning_rate": 4.016114835332467e-06, "loss": 0.1381, "step": 5811 }, { "epoch": 0.9416720674011666, "grad_norm": 0.8436107039451599, "learning_rate": 4.015767100837171e-06, "loss": 0.1459, "step": 5812 }, { "epoch": 0.9418340894361633, "grad_norm": 0.8007429242134094, "learning_rate": 4.015419319963069e-06, "loss": 0.1275, "step": 5813 }, { "epoch": 0.94199611147116, "grad_norm": 0.7847580909729004, "learning_rate": 4.015071492720802e-06, "loss": 0.1295, "step": 5814 }, { "epoch": 0.9421581335061568, "grad_norm": 0.8911027312278748, "learning_rate": 4.014723619121015e-06, "loss": 0.1504, "step": 5815 }, { "epoch": 0.9423201555411536, "grad_norm": 0.811799168586731, "learning_rate": 4.014375699174351e-06, "loss": 0.1243, "step": 5816 }, { "epoch": 0.9424821775761504, "grad_norm": 0.8510453701019287, "learning_rate": 4.014027732891454e-06, "loss": 0.1331, "step": 5817 }, { "epoch": 0.9426441996111471, "grad_norm": 0.9293267726898193, "learning_rate": 4.013679720282973e-06, "loss": 0.1531, "step": 5818 }, { "epoch": 0.9428062216461439, "grad_norm": 0.8839837312698364, "learning_rate": 4.013331661359556e-06, "loss": 0.158, "step": 5819 }, { "epoch": 0.9429682436811406, "grad_norm": 0.7744541764259338, "learning_rate": 4.012983556131852e-06, "loss": 0.1346, "step": 5820 }, { "epoch": 0.9431302657161373, "grad_norm": 0.8198082447052002, "learning_rate": 4.012635404610512e-06, "loss": 0.1272, "step": 5821 }, { "epoch": 0.9432922877511342, "grad_norm": 0.9199150204658508, "learning_rate": 4.01228720680619e-06, "loss": 0.145, "step": 5822 }, { "epoch": 0.9434543097861309, "grad_norm": 0.9087578058242798, "learning_rate": 4.011938962729538e-06, "loss": 0.1471, "step": 5823 }, { "epoch": 0.9436163318211277, "grad_norm": 0.7970622181892395, "learning_rate": 4.011590672391213e-06, "loss": 0.1335, "step": 5824 }, { "epoch": 0.9437783538561244, "grad_norm": 0.9035589694976807, "learning_rate": 4.011242335801872e-06, "loss": 0.1493, "step": 5825 }, { "epoch": 0.9439403758911212, "grad_norm": 0.825301468372345, "learning_rate": 4.010893952972173e-06, "loss": 0.1256, "step": 5826 }, { "epoch": 0.9441023979261179, "grad_norm": 0.8092053532600403, "learning_rate": 4.010545523912775e-06, "loss": 0.1258, "step": 5827 }, { "epoch": 0.9442644199611148, "grad_norm": 0.9639272689819336, "learning_rate": 4.010197048634338e-06, "loss": 0.1487, "step": 5828 }, { "epoch": 0.9444264419961115, "grad_norm": 1.0004661083221436, "learning_rate": 4.009848527147527e-06, "loss": 0.1527, "step": 5829 }, { "epoch": 0.9445884640311082, "grad_norm": 0.8995679616928101, "learning_rate": 4.009499959463005e-06, "loss": 0.1308, "step": 5830 }, { "epoch": 0.944750486066105, "grad_norm": 0.9287495613098145, "learning_rate": 4.009151345591437e-06, "loss": 0.1367, "step": 5831 }, { "epoch": 0.9449125081011017, "grad_norm": 0.9027858376502991, "learning_rate": 4.00880268554349e-06, "loss": 0.1273, "step": 5832 }, { "epoch": 0.9450745301360985, "grad_norm": 0.8884682655334473, "learning_rate": 4.008453979329832e-06, "loss": 0.144, "step": 5833 }, { "epoch": 0.9452365521710953, "grad_norm": 0.932540774345398, "learning_rate": 4.008105226961132e-06, "loss": 0.1585, "step": 5834 }, { "epoch": 0.9453985742060921, "grad_norm": 0.7685552835464478, "learning_rate": 4.0077564284480625e-06, "loss": 0.1161, "step": 5835 }, { "epoch": 0.9455605962410888, "grad_norm": 0.830932080745697, "learning_rate": 4.007407583801295e-06, "loss": 0.1369, "step": 5836 }, { "epoch": 0.9457226182760855, "grad_norm": 0.6946200132369995, "learning_rate": 4.007058693031502e-06, "loss": 0.1087, "step": 5837 }, { "epoch": 0.9458846403110823, "grad_norm": 0.8081919550895691, "learning_rate": 4.006709756149362e-06, "loss": 0.1276, "step": 5838 }, { "epoch": 0.946046662346079, "grad_norm": 0.9541810154914856, "learning_rate": 4.0063607731655486e-06, "loss": 0.1513, "step": 5839 }, { "epoch": 0.9462086843810759, "grad_norm": 0.8741765022277832, "learning_rate": 4.006011744090741e-06, "loss": 0.151, "step": 5840 }, { "epoch": 0.9463707064160726, "grad_norm": 0.8932287096977234, "learning_rate": 4.005662668935618e-06, "loss": 0.1519, "step": 5841 }, { "epoch": 0.9465327284510694, "grad_norm": 0.7929561138153076, "learning_rate": 4.005313547710861e-06, "loss": 0.1335, "step": 5842 }, { "epoch": 0.9466947504860661, "grad_norm": 0.8250575661659241, "learning_rate": 4.004964380427153e-06, "loss": 0.1288, "step": 5843 }, { "epoch": 0.9468567725210628, "grad_norm": 0.8816642761230469, "learning_rate": 4.004615167095176e-06, "loss": 0.144, "step": 5844 }, { "epoch": 0.9470187945560596, "grad_norm": 0.7737101316452026, "learning_rate": 4.004265907725616e-06, "loss": 0.1375, "step": 5845 }, { "epoch": 0.9471808165910564, "grad_norm": 0.851394534111023, "learning_rate": 4.003916602329161e-06, "loss": 0.1353, "step": 5846 }, { "epoch": 0.9473428386260532, "grad_norm": 0.8754749894142151, "learning_rate": 4.003567250916496e-06, "loss": 0.1465, "step": 5847 }, { "epoch": 0.9475048606610499, "grad_norm": 0.8369944095611572, "learning_rate": 4.0032178534983115e-06, "loss": 0.1441, "step": 5848 }, { "epoch": 0.9476668826960467, "grad_norm": 0.9141656160354614, "learning_rate": 4.0028684100852986e-06, "loss": 0.1613, "step": 5849 }, { "epoch": 0.9478289047310434, "grad_norm": 0.8459381461143494, "learning_rate": 4.00251892068815e-06, "loss": 0.1346, "step": 5850 }, { "epoch": 0.9479909267660401, "grad_norm": 0.8450573086738586, "learning_rate": 4.002169385317558e-06, "loss": 0.1378, "step": 5851 }, { "epoch": 0.948152948801037, "grad_norm": 0.8298230767250061, "learning_rate": 4.001819803984218e-06, "loss": 0.1431, "step": 5852 }, { "epoch": 0.9483149708360337, "grad_norm": 0.7699709534645081, "learning_rate": 4.001470176698826e-06, "loss": 0.132, "step": 5853 }, { "epoch": 0.9484769928710305, "grad_norm": 0.7468587160110474, "learning_rate": 4.00112050347208e-06, "loss": 0.1125, "step": 5854 }, { "epoch": 0.9486390149060272, "grad_norm": 0.8313590288162231, "learning_rate": 4.00077078431468e-06, "loss": 0.1382, "step": 5855 }, { "epoch": 0.948801036941024, "grad_norm": 0.9513286352157593, "learning_rate": 4.000421019237326e-06, "loss": 0.1512, "step": 5856 }, { "epoch": 0.9489630589760207, "grad_norm": 0.9002991914749146, "learning_rate": 4.000071208250719e-06, "loss": 0.1662, "step": 5857 }, { "epoch": 0.9491250810110174, "grad_norm": 0.9496574401855469, "learning_rate": 3.999721351365563e-06, "loss": 0.1571, "step": 5858 }, { "epoch": 0.9492871030460143, "grad_norm": 0.9157005548477173, "learning_rate": 3.999371448592563e-06, "loss": 0.1489, "step": 5859 }, { "epoch": 0.949449125081011, "grad_norm": 0.8769446015357971, "learning_rate": 3.999021499942425e-06, "loss": 0.1484, "step": 5860 }, { "epoch": 0.9496111471160078, "grad_norm": 0.8674890398979187, "learning_rate": 3.9986715054258575e-06, "loss": 0.1579, "step": 5861 }, { "epoch": 0.9497731691510045, "grad_norm": 0.766059935092926, "learning_rate": 3.998321465053568e-06, "loss": 0.13, "step": 5862 }, { "epoch": 0.9499351911860013, "grad_norm": 0.8092472553253174, "learning_rate": 3.997971378836268e-06, "loss": 0.1296, "step": 5863 }, { "epoch": 0.950097213220998, "grad_norm": 0.8249281048774719, "learning_rate": 3.9976212467846674e-06, "loss": 0.1396, "step": 5864 }, { "epoch": 0.9502592352559948, "grad_norm": 0.782484769821167, "learning_rate": 3.997271068909483e-06, "loss": 0.1208, "step": 5865 }, { "epoch": 0.9504212572909916, "grad_norm": 0.7566808462142944, "learning_rate": 3.996920845221425e-06, "loss": 0.1269, "step": 5866 }, { "epoch": 0.9505832793259883, "grad_norm": 0.7882890105247498, "learning_rate": 3.9965705757312136e-06, "loss": 0.1227, "step": 5867 }, { "epoch": 0.9507453013609851, "grad_norm": 0.8613394498825073, "learning_rate": 3.996220260449563e-06, "loss": 0.1334, "step": 5868 }, { "epoch": 0.9509073233959818, "grad_norm": 0.9516828060150146, "learning_rate": 3.9958698993871935e-06, "loss": 0.1641, "step": 5869 }, { "epoch": 0.9510693454309787, "grad_norm": 0.8244841694831848, "learning_rate": 3.9955194925548245e-06, "loss": 0.1327, "step": 5870 }, { "epoch": 0.9512313674659754, "grad_norm": 0.7766361832618713, "learning_rate": 3.995169039963179e-06, "loss": 0.1201, "step": 5871 }, { "epoch": 0.9513933895009722, "grad_norm": 0.8498024344444275, "learning_rate": 3.994818541622979e-06, "loss": 0.1465, "step": 5872 }, { "epoch": 0.9515554115359689, "grad_norm": 0.9167964458465576, "learning_rate": 3.994467997544948e-06, "loss": 0.1403, "step": 5873 }, { "epoch": 0.9517174335709656, "grad_norm": 1.072054386138916, "learning_rate": 3.994117407739814e-06, "loss": 0.1786, "step": 5874 }, { "epoch": 0.9518794556059624, "grad_norm": 0.8586252331733704, "learning_rate": 3.993766772218303e-06, "loss": 0.1465, "step": 5875 }, { "epoch": 0.9520414776409591, "grad_norm": 0.8002846837043762, "learning_rate": 3.993416090991143e-06, "loss": 0.1172, "step": 5876 }, { "epoch": 0.952203499675956, "grad_norm": 0.7997404932975769, "learning_rate": 3.9930653640690655e-06, "loss": 0.125, "step": 5877 }, { "epoch": 0.9523655217109527, "grad_norm": 0.8767850399017334, "learning_rate": 3.992714591462799e-06, "loss": 0.1404, "step": 5878 }, { "epoch": 0.9525275437459495, "grad_norm": 0.8756327629089355, "learning_rate": 3.992363773183081e-06, "loss": 0.1434, "step": 5879 }, { "epoch": 0.9526895657809462, "grad_norm": 0.8966729044914246, "learning_rate": 3.992012909240641e-06, "loss": 0.1502, "step": 5880 }, { "epoch": 0.9528515878159429, "grad_norm": 0.7201352715492249, "learning_rate": 3.991661999646218e-06, "loss": 0.1114, "step": 5881 }, { "epoch": 0.9530136098509397, "grad_norm": 0.8088727593421936, "learning_rate": 3.991311044410546e-06, "loss": 0.1477, "step": 5882 }, { "epoch": 0.9531756318859365, "grad_norm": 0.8761907815933228, "learning_rate": 3.9909600435443665e-06, "loss": 0.1541, "step": 5883 }, { "epoch": 0.9533376539209333, "grad_norm": 0.7636457681655884, "learning_rate": 3.990608997058416e-06, "loss": 0.1187, "step": 5884 }, { "epoch": 0.95349967595593, "grad_norm": 0.7406367063522339, "learning_rate": 3.9902579049634385e-06, "loss": 0.1194, "step": 5885 }, { "epoch": 0.9536616979909268, "grad_norm": 0.8607000112533569, "learning_rate": 3.989906767270175e-06, "loss": 0.1342, "step": 5886 }, { "epoch": 0.9538237200259235, "grad_norm": 0.8230603933334351, "learning_rate": 3.98955558398937e-06, "loss": 0.1441, "step": 5887 }, { "epoch": 0.9539857420609202, "grad_norm": 0.8925058841705322, "learning_rate": 3.989204355131769e-06, "loss": 0.1424, "step": 5888 }, { "epoch": 0.9541477640959171, "grad_norm": 0.9012094140052795, "learning_rate": 3.98885308070812e-06, "loss": 0.1487, "step": 5889 }, { "epoch": 0.9543097861309138, "grad_norm": 0.818138599395752, "learning_rate": 3.988501760729168e-06, "loss": 0.1225, "step": 5890 }, { "epoch": 0.9544718081659106, "grad_norm": 0.8324544429779053, "learning_rate": 3.988150395205665e-06, "loss": 0.1278, "step": 5891 }, { "epoch": 0.9546338302009073, "grad_norm": 0.8996634483337402, "learning_rate": 3.98779898414836e-06, "loss": 0.1454, "step": 5892 }, { "epoch": 0.9547958522359041, "grad_norm": 0.9417580962181091, "learning_rate": 3.987447527568007e-06, "loss": 0.1461, "step": 5893 }, { "epoch": 0.9549578742709008, "grad_norm": 0.9308149814605713, "learning_rate": 3.98709602547536e-06, "loss": 0.154, "step": 5894 }, { "epoch": 0.9551198963058976, "grad_norm": 0.8258563876152039, "learning_rate": 3.986744477881172e-06, "loss": 0.1396, "step": 5895 }, { "epoch": 0.9552819183408944, "grad_norm": 0.9481770396232605, "learning_rate": 3.986392884796202e-06, "loss": 0.1515, "step": 5896 }, { "epoch": 0.9554439403758911, "grad_norm": 0.8203932046890259, "learning_rate": 3.986041246231206e-06, "loss": 0.1401, "step": 5897 }, { "epoch": 0.9556059624108879, "grad_norm": 0.8350275158882141, "learning_rate": 3.9856895621969435e-06, "loss": 0.1318, "step": 5898 }, { "epoch": 0.9557679844458846, "grad_norm": 0.8285270929336548, "learning_rate": 3.985337832704177e-06, "loss": 0.139, "step": 5899 }, { "epoch": 0.9559300064808814, "grad_norm": 0.7445301413536072, "learning_rate": 3.984986057763667e-06, "loss": 0.1271, "step": 5900 }, { "epoch": 0.9560920285158782, "grad_norm": 0.8104132413864136, "learning_rate": 3.984634237386177e-06, "loss": 0.1355, "step": 5901 }, { "epoch": 0.9562540505508749, "grad_norm": 0.9375022649765015, "learning_rate": 3.984282371582472e-06, "loss": 0.1473, "step": 5902 }, { "epoch": 0.9564160725858717, "grad_norm": 0.7736932635307312, "learning_rate": 3.983930460363318e-06, "loss": 0.1289, "step": 5903 }, { "epoch": 0.9565780946208684, "grad_norm": 0.8169133067131042, "learning_rate": 3.983578503739483e-06, "loss": 0.1125, "step": 5904 }, { "epoch": 0.9567401166558652, "grad_norm": 0.9903810620307922, "learning_rate": 3.983226501721736e-06, "loss": 0.1406, "step": 5905 }, { "epoch": 0.9569021386908619, "grad_norm": 0.8533335328102112, "learning_rate": 3.982874454320849e-06, "loss": 0.135, "step": 5906 }, { "epoch": 0.9570641607258588, "grad_norm": 0.8362215161323547, "learning_rate": 3.98252236154759e-06, "loss": 0.1378, "step": 5907 }, { "epoch": 0.9572261827608555, "grad_norm": 0.8589804768562317, "learning_rate": 3.982170223412735e-06, "loss": 0.1484, "step": 5908 }, { "epoch": 0.9573882047958522, "grad_norm": 0.8295727372169495, "learning_rate": 3.981818039927058e-06, "loss": 0.147, "step": 5909 }, { "epoch": 0.957550226830849, "grad_norm": 0.9022241234779358, "learning_rate": 3.981465811101335e-06, "loss": 0.1553, "step": 5910 }, { "epoch": 0.9577122488658457, "grad_norm": 0.8496821522712708, "learning_rate": 3.981113536946344e-06, "loss": 0.1388, "step": 5911 }, { "epoch": 0.9578742709008425, "grad_norm": 0.7762805819511414, "learning_rate": 3.9807612174728615e-06, "loss": 0.1212, "step": 5912 }, { "epoch": 0.9580362929358393, "grad_norm": 0.7569793462753296, "learning_rate": 3.9804088526916706e-06, "loss": 0.1175, "step": 5913 }, { "epoch": 0.9581983149708361, "grad_norm": 1.0253901481628418, "learning_rate": 3.98005644261355e-06, "loss": 0.1636, "step": 5914 }, { "epoch": 0.9583603370058328, "grad_norm": 0.7959587574005127, "learning_rate": 3.979703987249285e-06, "loss": 0.1359, "step": 5915 }, { "epoch": 0.9585223590408296, "grad_norm": 0.8208194971084595, "learning_rate": 3.979351486609659e-06, "loss": 0.1304, "step": 5916 }, { "epoch": 0.9586843810758263, "grad_norm": 0.8786490559577942, "learning_rate": 3.978998940705456e-06, "loss": 0.144, "step": 5917 }, { "epoch": 0.958846403110823, "grad_norm": 0.8570854663848877, "learning_rate": 3.978646349547466e-06, "loss": 0.1458, "step": 5918 }, { "epoch": 0.9590084251458199, "grad_norm": 0.8755070567131042, "learning_rate": 3.978293713146475e-06, "loss": 0.1521, "step": 5919 }, { "epoch": 0.9591704471808166, "grad_norm": 0.8440230488777161, "learning_rate": 3.977941031513275e-06, "loss": 0.1334, "step": 5920 }, { "epoch": 0.9593324692158134, "grad_norm": 0.7312674522399902, "learning_rate": 3.977588304658654e-06, "loss": 0.1117, "step": 5921 }, { "epoch": 0.9594944912508101, "grad_norm": 0.9204146265983582, "learning_rate": 3.977235532593408e-06, "loss": 0.1492, "step": 5922 }, { "epoch": 0.9596565132858069, "grad_norm": 0.7905224561691284, "learning_rate": 3.9768827153283295e-06, "loss": 0.128, "step": 5923 }, { "epoch": 0.9598185353208036, "grad_norm": 0.8433038592338562, "learning_rate": 3.976529852874214e-06, "loss": 0.1244, "step": 5924 }, { "epoch": 0.9599805573558003, "grad_norm": 0.8728172183036804, "learning_rate": 3.976176945241857e-06, "loss": 0.1406, "step": 5925 }, { "epoch": 0.9601425793907972, "grad_norm": 0.9237974286079407, "learning_rate": 3.975823992442058e-06, "loss": 0.1445, "step": 5926 }, { "epoch": 0.9603046014257939, "grad_norm": 0.8911517262458801, "learning_rate": 3.9754709944856175e-06, "loss": 0.1443, "step": 5927 }, { "epoch": 0.9604666234607907, "grad_norm": 0.9263610243797302, "learning_rate": 3.975117951383334e-06, "loss": 0.1428, "step": 5928 }, { "epoch": 0.9606286454957874, "grad_norm": 0.7457212209701538, "learning_rate": 3.974764863146012e-06, "loss": 0.1228, "step": 5929 }, { "epoch": 0.9607906675307842, "grad_norm": 0.9272271990776062, "learning_rate": 3.974411729784453e-06, "loss": 0.1505, "step": 5930 }, { "epoch": 0.960952689565781, "grad_norm": 0.9309628009796143, "learning_rate": 3.974058551309463e-06, "loss": 0.1544, "step": 5931 }, { "epoch": 0.9611147116007777, "grad_norm": 0.8147203326225281, "learning_rate": 3.973705327731849e-06, "loss": 0.1389, "step": 5932 }, { "epoch": 0.9612767336357745, "grad_norm": 0.7795599699020386, "learning_rate": 3.9733520590624185e-06, "loss": 0.1237, "step": 5933 }, { "epoch": 0.9614387556707712, "grad_norm": 0.804670512676239, "learning_rate": 3.97299874531198e-06, "loss": 0.139, "step": 5934 }, { "epoch": 0.961600777705768, "grad_norm": 0.8195658326148987, "learning_rate": 3.972645386491345e-06, "loss": 0.1495, "step": 5935 }, { "epoch": 0.9617627997407647, "grad_norm": 0.7763250470161438, "learning_rate": 3.972291982611325e-06, "loss": 0.1301, "step": 5936 }, { "epoch": 0.9619248217757616, "grad_norm": 1.2038568258285522, "learning_rate": 3.971938533682732e-06, "loss": 0.1845, "step": 5937 }, { "epoch": 0.9620868438107583, "grad_norm": 0.8428332805633545, "learning_rate": 3.971585039716382e-06, "loss": 0.1433, "step": 5938 }, { "epoch": 0.962248865845755, "grad_norm": 0.8756902813911438, "learning_rate": 3.971231500723093e-06, "loss": 0.148, "step": 5939 }, { "epoch": 0.9624108878807518, "grad_norm": 0.8229630589485168, "learning_rate": 3.970877916713678e-06, "loss": 0.1428, "step": 5940 }, { "epoch": 0.9625729099157485, "grad_norm": 0.8338178992271423, "learning_rate": 3.97052428769896e-06, "loss": 0.1336, "step": 5941 }, { "epoch": 0.9627349319507453, "grad_norm": 0.8867552280426025, "learning_rate": 3.9701706136897564e-06, "loss": 0.1449, "step": 5942 }, { "epoch": 0.962896953985742, "grad_norm": 0.8758612275123596, "learning_rate": 3.96981689469689e-06, "loss": 0.1592, "step": 5943 }, { "epoch": 0.9630589760207389, "grad_norm": 0.8010472059249878, "learning_rate": 3.969463130731183e-06, "loss": 0.1295, "step": 5944 }, { "epoch": 0.9632209980557356, "grad_norm": 0.9243650436401367, "learning_rate": 3.969109321803461e-06, "loss": 0.1523, "step": 5945 }, { "epoch": 0.9633830200907323, "grad_norm": 0.8313032984733582, "learning_rate": 3.968755467924549e-06, "loss": 0.1382, "step": 5946 }, { "epoch": 0.9635450421257291, "grad_norm": 0.8543305397033691, "learning_rate": 3.9684015691052736e-06, "loss": 0.143, "step": 5947 }, { "epoch": 0.9637070641607258, "grad_norm": 0.8567736148834229, "learning_rate": 3.968047625356463e-06, "loss": 0.1359, "step": 5948 }, { "epoch": 0.9638690861957226, "grad_norm": 0.9071134328842163, "learning_rate": 3.967693636688948e-06, "loss": 0.1325, "step": 5949 }, { "epoch": 0.9640311082307194, "grad_norm": 0.7577062249183655, "learning_rate": 3.96733960311356e-06, "loss": 0.1227, "step": 5950 }, { "epoch": 0.9641931302657162, "grad_norm": 0.9051762223243713, "learning_rate": 3.966985524641132e-06, "loss": 0.1367, "step": 5951 }, { "epoch": 0.9643551523007129, "grad_norm": 0.8882315754890442, "learning_rate": 3.966631401282495e-06, "loss": 0.1543, "step": 5952 }, { "epoch": 0.9645171743357096, "grad_norm": 0.8317487835884094, "learning_rate": 3.966277233048487e-06, "loss": 0.1269, "step": 5953 }, { "epoch": 0.9646791963707064, "grad_norm": 0.8450263738632202, "learning_rate": 3.965923019949944e-06, "loss": 0.1332, "step": 5954 }, { "epoch": 0.9648412184057031, "grad_norm": 0.9014632105827332, "learning_rate": 3.965568761997704e-06, "loss": 0.1534, "step": 5955 }, { "epoch": 0.9650032404407, "grad_norm": 0.8284904956817627, "learning_rate": 3.965214459202607e-06, "loss": 0.1298, "step": 5956 }, { "epoch": 0.9651652624756967, "grad_norm": 0.7886707782745361, "learning_rate": 3.964860111575493e-06, "loss": 0.1353, "step": 5957 }, { "epoch": 0.9653272845106935, "grad_norm": 0.8882688879966736, "learning_rate": 3.964505719127205e-06, "loss": 0.1421, "step": 5958 }, { "epoch": 0.9654893065456902, "grad_norm": 0.8411626219749451, "learning_rate": 3.964151281868585e-06, "loss": 0.1326, "step": 5959 }, { "epoch": 0.9656513285806869, "grad_norm": 0.9165163636207581, "learning_rate": 3.963796799810479e-06, "loss": 0.1466, "step": 5960 }, { "epoch": 0.9658133506156837, "grad_norm": 0.7748314142227173, "learning_rate": 3.963442272963735e-06, "loss": 0.133, "step": 5961 }, { "epoch": 0.9659753726506805, "grad_norm": 0.8058779835700989, "learning_rate": 3.9630877013391964e-06, "loss": 0.1459, "step": 5962 }, { "epoch": 0.9661373946856773, "grad_norm": 0.9120404720306396, "learning_rate": 3.962733084947717e-06, "loss": 0.1459, "step": 5963 }, { "epoch": 0.966299416720674, "grad_norm": 0.8716152906417847, "learning_rate": 3.962378423800143e-06, "loss": 0.1448, "step": 5964 }, { "epoch": 0.9664614387556708, "grad_norm": 0.8008790612220764, "learning_rate": 3.962023717907329e-06, "loss": 0.131, "step": 5965 }, { "epoch": 0.9666234607906675, "grad_norm": 0.7553631067276001, "learning_rate": 3.961668967280128e-06, "loss": 0.1304, "step": 5966 }, { "epoch": 0.9667854828256643, "grad_norm": 0.9899448752403259, "learning_rate": 3.961314171929392e-06, "loss": 0.1312, "step": 5967 }, { "epoch": 0.9669475048606611, "grad_norm": 0.7708137631416321, "learning_rate": 3.96095933186598e-06, "loss": 0.1328, "step": 5968 }, { "epoch": 0.9671095268956578, "grad_norm": 0.7999005317687988, "learning_rate": 3.960604447100747e-06, "loss": 0.1344, "step": 5969 }, { "epoch": 0.9672715489306546, "grad_norm": 0.7382838129997253, "learning_rate": 3.960249517644553e-06, "loss": 0.1226, "step": 5970 }, { "epoch": 0.9674335709656513, "grad_norm": 1.0125248432159424, "learning_rate": 3.959894543508258e-06, "loss": 0.1487, "step": 5971 }, { "epoch": 0.9675955930006481, "grad_norm": 0.871058464050293, "learning_rate": 3.959539524702722e-06, "loss": 0.133, "step": 5972 }, { "epoch": 0.9677576150356448, "grad_norm": 0.7588186264038086, "learning_rate": 3.9591844612388095e-06, "loss": 0.1224, "step": 5973 }, { "epoch": 0.9679196370706417, "grad_norm": 0.7937440276145935, "learning_rate": 3.958829353127383e-06, "loss": 0.1285, "step": 5974 }, { "epoch": 0.9680816591056384, "grad_norm": 0.8277894258499146, "learning_rate": 3.958474200379309e-06, "loss": 0.136, "step": 5975 }, { "epoch": 0.9682436811406351, "grad_norm": 0.8382477760314941, "learning_rate": 3.958119003005453e-06, "loss": 0.1454, "step": 5976 }, { "epoch": 0.9684057031756319, "grad_norm": 0.8663250207901001, "learning_rate": 3.9577637610166855e-06, "loss": 0.1466, "step": 5977 }, { "epoch": 0.9685677252106286, "grad_norm": 0.9586002230644226, "learning_rate": 3.9574084744238735e-06, "loss": 0.1648, "step": 5978 }, { "epoch": 0.9687297472456254, "grad_norm": 0.7380560636520386, "learning_rate": 3.95705314323789e-06, "loss": 0.1182, "step": 5979 }, { "epoch": 0.9688917692806222, "grad_norm": 0.8138405084609985, "learning_rate": 3.956697767469606e-06, "loss": 0.1381, "step": 5980 }, { "epoch": 0.969053791315619, "grad_norm": 0.8596041202545166, "learning_rate": 3.956342347129894e-06, "loss": 0.1617, "step": 5981 }, { "epoch": 0.9692158133506157, "grad_norm": 0.83723384141922, "learning_rate": 3.955986882229632e-06, "loss": 0.1415, "step": 5982 }, { "epoch": 0.9693778353856124, "grad_norm": 0.8260191679000854, "learning_rate": 3.955631372779694e-06, "loss": 0.1442, "step": 5983 }, { "epoch": 0.9695398574206092, "grad_norm": 0.776252806186676, "learning_rate": 3.95527581879096e-06, "loss": 0.134, "step": 5984 }, { "epoch": 0.9697018794556059, "grad_norm": 0.8889530301094055, "learning_rate": 3.954920220274307e-06, "loss": 0.1459, "step": 5985 }, { "epoch": 0.9698639014906028, "grad_norm": 0.9564645886421204, "learning_rate": 3.954564577240615e-06, "loss": 0.164, "step": 5986 }, { "epoch": 0.9700259235255995, "grad_norm": 0.7842409610748291, "learning_rate": 3.954208889700768e-06, "loss": 0.1222, "step": 5987 }, { "epoch": 0.9701879455605963, "grad_norm": 0.8455632328987122, "learning_rate": 3.9538531576656465e-06, "loss": 0.1347, "step": 5988 }, { "epoch": 0.970349967595593, "grad_norm": 0.688930869102478, "learning_rate": 3.953497381146139e-06, "loss": 0.1216, "step": 5989 }, { "epoch": 0.9705119896305897, "grad_norm": 1.0175490379333496, "learning_rate": 3.953141560153128e-06, "loss": 0.1671, "step": 5990 }, { "epoch": 0.9706740116655865, "grad_norm": 0.9077379703521729, "learning_rate": 3.952785694697502e-06, "loss": 0.1554, "step": 5991 }, { "epoch": 0.9708360337005832, "grad_norm": 0.8699192404747009, "learning_rate": 3.952429784790148e-06, "loss": 0.1375, "step": 5992 }, { "epoch": 0.9709980557355801, "grad_norm": 0.8834347724914551, "learning_rate": 3.952073830441959e-06, "loss": 0.1395, "step": 5993 }, { "epoch": 0.9711600777705768, "grad_norm": 0.8686573505401611, "learning_rate": 3.951717831663825e-06, "loss": 0.1358, "step": 5994 }, { "epoch": 0.9713220998055736, "grad_norm": 1.110762357711792, "learning_rate": 3.951361788466636e-06, "loss": 0.1464, "step": 5995 }, { "epoch": 0.9714841218405703, "grad_norm": 0.8450319170951843, "learning_rate": 3.951005700861291e-06, "loss": 0.1428, "step": 5996 }, { "epoch": 0.971646143875567, "grad_norm": 0.9132416844367981, "learning_rate": 3.950649568858682e-06, "loss": 0.1431, "step": 5997 }, { "epoch": 0.9718081659105638, "grad_norm": 0.695837140083313, "learning_rate": 3.9502933924697076e-06, "loss": 0.1142, "step": 5998 }, { "epoch": 0.9719701879455606, "grad_norm": 0.8655622005462646, "learning_rate": 3.949937171705264e-06, "loss": 0.1355, "step": 5999 }, { "epoch": 0.9721322099805574, "grad_norm": 0.7984117865562439, "learning_rate": 3.949580906576252e-06, "loss": 0.1272, "step": 6000 }, { "epoch": 0.9722942320155541, "grad_norm": 0.8514125347137451, "learning_rate": 3.949224597093572e-06, "loss": 0.1446, "step": 6001 }, { "epoch": 0.9724562540505509, "grad_norm": 0.9616315960884094, "learning_rate": 3.948868243268127e-06, "loss": 0.1523, "step": 6002 }, { "epoch": 0.9726182760855476, "grad_norm": 0.9208241105079651, "learning_rate": 3.948511845110819e-06, "loss": 0.1457, "step": 6003 }, { "epoch": 0.9727802981205443, "grad_norm": 0.7493133544921875, "learning_rate": 3.948155402632554e-06, "loss": 0.1158, "step": 6004 }, { "epoch": 0.9729423201555412, "grad_norm": 1.0176682472229004, "learning_rate": 3.947798915844239e-06, "loss": 0.1731, "step": 6005 }, { "epoch": 0.9731043421905379, "grad_norm": 0.8895021080970764, "learning_rate": 3.94744238475678e-06, "loss": 0.1423, "step": 6006 }, { "epoch": 0.9732663642255347, "grad_norm": 0.7985418438911438, "learning_rate": 3.947085809381087e-06, "loss": 0.1244, "step": 6007 }, { "epoch": 0.9734283862605314, "grad_norm": 0.7558802366256714, "learning_rate": 3.94672918972807e-06, "loss": 0.1223, "step": 6008 }, { "epoch": 0.9735904082955282, "grad_norm": 0.8864288330078125, "learning_rate": 3.946372525808641e-06, "loss": 0.1423, "step": 6009 }, { "epoch": 0.9737524303305249, "grad_norm": 0.9449102282524109, "learning_rate": 3.946015817633714e-06, "loss": 0.1252, "step": 6010 }, { "epoch": 0.9739144523655218, "grad_norm": 0.7771594524383545, "learning_rate": 3.9456590652142005e-06, "loss": 0.1204, "step": 6011 }, { "epoch": 0.9740764744005185, "grad_norm": 0.7628992199897766, "learning_rate": 3.945302268561019e-06, "loss": 0.1282, "step": 6012 }, { "epoch": 0.9742384964355152, "grad_norm": 0.8803264498710632, "learning_rate": 3.944945427685085e-06, "loss": 0.1308, "step": 6013 }, { "epoch": 0.974400518470512, "grad_norm": 0.7819192409515381, "learning_rate": 3.944588542597319e-06, "loss": 0.1302, "step": 6014 }, { "epoch": 0.9745625405055087, "grad_norm": 0.9194301962852478, "learning_rate": 3.944231613308637e-06, "loss": 0.1479, "step": 6015 }, { "epoch": 0.9747245625405055, "grad_norm": 0.9355913996696472, "learning_rate": 3.943874639829964e-06, "loss": 0.1463, "step": 6016 }, { "epoch": 0.9748865845755023, "grad_norm": 0.8575900793075562, "learning_rate": 3.9435176221722215e-06, "loss": 0.125, "step": 6017 }, { "epoch": 0.9750486066104991, "grad_norm": 0.9341332912445068, "learning_rate": 3.943160560346332e-06, "loss": 0.1502, "step": 6018 }, { "epoch": 0.9752106286454958, "grad_norm": 0.9694084525108337, "learning_rate": 3.942803454363224e-06, "loss": 0.1459, "step": 6019 }, { "epoch": 0.9753726506804925, "grad_norm": 0.7550426721572876, "learning_rate": 3.942446304233819e-06, "loss": 0.1141, "step": 6020 }, { "epoch": 0.9755346727154893, "grad_norm": 0.890663743019104, "learning_rate": 3.942089109969049e-06, "loss": 0.144, "step": 6021 }, { "epoch": 0.975696694750486, "grad_norm": 0.7910590171813965, "learning_rate": 3.941731871579842e-06, "loss": 0.1293, "step": 6022 }, { "epoch": 0.9758587167854829, "grad_norm": 0.9037308096885681, "learning_rate": 3.941374589077128e-06, "loss": 0.1524, "step": 6023 }, { "epoch": 0.9760207388204796, "grad_norm": 0.9725434184074402, "learning_rate": 3.94101726247184e-06, "loss": 0.1665, "step": 6024 }, { "epoch": 0.9761827608554764, "grad_norm": 0.8812149167060852, "learning_rate": 3.940659891774912e-06, "loss": 0.139, "step": 6025 }, { "epoch": 0.9763447828904731, "grad_norm": 0.9266581535339355, "learning_rate": 3.9403024769972766e-06, "loss": 0.1425, "step": 6026 }, { "epoch": 0.9765068049254698, "grad_norm": 0.7862972617149353, "learning_rate": 3.939945018149871e-06, "loss": 0.1288, "step": 6027 }, { "epoch": 0.9766688269604666, "grad_norm": 0.7440429925918579, "learning_rate": 3.939587515243632e-06, "loss": 0.1236, "step": 6028 }, { "epoch": 0.9768308489954634, "grad_norm": 0.9152294397354126, "learning_rate": 3.9392299682894995e-06, "loss": 0.1558, "step": 6029 }, { "epoch": 0.9769928710304602, "grad_norm": 0.907541811466217, "learning_rate": 3.938872377298413e-06, "loss": 0.1379, "step": 6030 }, { "epoch": 0.9771548930654569, "grad_norm": 0.8130697011947632, "learning_rate": 3.938514742281313e-06, "loss": 0.1342, "step": 6031 }, { "epoch": 0.9773169151004537, "grad_norm": 0.7944709658622742, "learning_rate": 3.938157063249144e-06, "loss": 0.1385, "step": 6032 }, { "epoch": 0.9774789371354504, "grad_norm": 0.8079535365104675, "learning_rate": 3.937799340212849e-06, "loss": 0.1355, "step": 6033 }, { "epoch": 0.9776409591704471, "grad_norm": 0.8324079513549805, "learning_rate": 3.937441573183373e-06, "loss": 0.1358, "step": 6034 }, { "epoch": 0.977802981205444, "grad_norm": 0.868264377117157, "learning_rate": 3.937083762171663e-06, "loss": 0.1393, "step": 6035 }, { "epoch": 0.9779650032404407, "grad_norm": 0.7842541933059692, "learning_rate": 3.936725907188668e-06, "loss": 0.1252, "step": 6036 }, { "epoch": 0.9781270252754375, "grad_norm": 0.9624860286712646, "learning_rate": 3.936368008245337e-06, "loss": 0.1542, "step": 6037 }, { "epoch": 0.9782890473104342, "grad_norm": 0.866369366645813, "learning_rate": 3.936010065352622e-06, "loss": 0.1383, "step": 6038 }, { "epoch": 0.978451069345431, "grad_norm": 0.8309353590011597, "learning_rate": 3.935652078521473e-06, "loss": 0.1299, "step": 6039 }, { "epoch": 0.9786130913804277, "grad_norm": 0.8431254029273987, "learning_rate": 3.935294047762844e-06, "loss": 0.1346, "step": 6040 }, { "epoch": 0.9787751134154244, "grad_norm": 0.8997429609298706, "learning_rate": 3.934935973087691e-06, "loss": 0.1351, "step": 6041 }, { "epoch": 0.9789371354504213, "grad_norm": 0.9118685722351074, "learning_rate": 3.93457785450697e-06, "loss": 0.1442, "step": 6042 }, { "epoch": 0.979099157485418, "grad_norm": 0.8979122638702393, "learning_rate": 3.934219692031639e-06, "loss": 0.1526, "step": 6043 }, { "epoch": 0.9792611795204148, "grad_norm": 0.8222776055335999, "learning_rate": 3.933861485672656e-06, "loss": 0.1327, "step": 6044 }, { "epoch": 0.9794232015554115, "grad_norm": 0.854524552822113, "learning_rate": 3.9335032354409794e-06, "loss": 0.1402, "step": 6045 }, { "epoch": 0.9795852235904083, "grad_norm": 0.8611454367637634, "learning_rate": 3.933144941347574e-06, "loss": 0.1437, "step": 6046 }, { "epoch": 0.979747245625405, "grad_norm": 0.8141959309577942, "learning_rate": 3.9327866034034025e-06, "loss": 0.1492, "step": 6047 }, { "epoch": 0.9799092676604018, "grad_norm": 0.7863458395004272, "learning_rate": 3.932428221619427e-06, "loss": 0.1275, "step": 6048 }, { "epoch": 0.9800712896953986, "grad_norm": 0.8340588808059692, "learning_rate": 3.9320697960066155e-06, "loss": 0.1455, "step": 6049 }, { "epoch": 0.9802333117303953, "grad_norm": 0.9195173978805542, "learning_rate": 3.931711326575933e-06, "loss": 0.1282, "step": 6050 }, { "epoch": 0.9803953337653921, "grad_norm": 0.8525646328926086, "learning_rate": 3.931352813338348e-06, "loss": 0.1431, "step": 6051 }, { "epoch": 0.9805573558003888, "grad_norm": 0.8366729617118835, "learning_rate": 3.9309942563048315e-06, "loss": 0.1291, "step": 6052 }, { "epoch": 0.9807193778353857, "grad_norm": 0.8262105584144592, "learning_rate": 3.930635655486353e-06, "loss": 0.1377, "step": 6053 }, { "epoch": 0.9808813998703824, "grad_norm": 0.9952908158302307, "learning_rate": 3.930277010893887e-06, "loss": 0.1637, "step": 6054 }, { "epoch": 0.9810434219053791, "grad_norm": 0.8296589851379395, "learning_rate": 3.929918322538404e-06, "loss": 0.1395, "step": 6055 }, { "epoch": 0.9812054439403759, "grad_norm": 0.8813459873199463, "learning_rate": 3.929559590430881e-06, "loss": 0.1525, "step": 6056 }, { "epoch": 0.9813674659753726, "grad_norm": 2.0823469161987305, "learning_rate": 3.9292008145822955e-06, "loss": 0.1402, "step": 6057 }, { "epoch": 0.9815294880103694, "grad_norm": 0.8811355233192444, "learning_rate": 3.928841995003622e-06, "loss": 0.1431, "step": 6058 }, { "epoch": 0.9816915100453661, "grad_norm": 0.7101410627365112, "learning_rate": 3.928483131705842e-06, "loss": 0.1126, "step": 6059 }, { "epoch": 0.981853532080363, "grad_norm": 0.7249727249145508, "learning_rate": 3.928124224699935e-06, "loss": 0.1111, "step": 6060 }, { "epoch": 0.9820155541153597, "grad_norm": 1.166760802268982, "learning_rate": 3.927765273996882e-06, "loss": 0.1405, "step": 6061 }, { "epoch": 0.9821775761503565, "grad_norm": 0.7682209610939026, "learning_rate": 3.927406279607668e-06, "loss": 0.1365, "step": 6062 }, { "epoch": 0.9823395981853532, "grad_norm": 0.916122317314148, "learning_rate": 3.927047241543275e-06, "loss": 0.1388, "step": 6063 }, { "epoch": 0.9825016202203499, "grad_norm": 0.9284025430679321, "learning_rate": 3.92668815981469e-06, "loss": 0.1475, "step": 6064 }, { "epoch": 0.9826636422553467, "grad_norm": 0.9036963582038879, "learning_rate": 3.9263290344329e-06, "loss": 0.1482, "step": 6065 }, { "epoch": 0.9828256642903435, "grad_norm": 0.9499265551567078, "learning_rate": 3.925969865408893e-06, "loss": 0.1494, "step": 6066 }, { "epoch": 0.9829876863253403, "grad_norm": 0.8025641441345215, "learning_rate": 3.925610652753659e-06, "loss": 0.1319, "step": 6067 }, { "epoch": 0.983149708360337, "grad_norm": 0.840201199054718, "learning_rate": 3.925251396478189e-06, "loss": 0.1483, "step": 6068 }, { "epoch": 0.9833117303953338, "grad_norm": 0.9733452796936035, "learning_rate": 3.924892096593476e-06, "loss": 0.1581, "step": 6069 }, { "epoch": 0.9834737524303305, "grad_norm": 0.7932161688804626, "learning_rate": 3.9245327531105115e-06, "loss": 0.1362, "step": 6070 }, { "epoch": 0.9836357744653272, "grad_norm": 0.9006250500679016, "learning_rate": 3.924173366040294e-06, "loss": 0.1351, "step": 6071 }, { "epoch": 0.9837977965003241, "grad_norm": 0.913811206817627, "learning_rate": 3.923813935393816e-06, "loss": 0.1392, "step": 6072 }, { "epoch": 0.9839598185353208, "grad_norm": 0.7927339673042297, "learning_rate": 3.923454461182078e-06, "loss": 0.1271, "step": 6073 }, { "epoch": 0.9841218405703176, "grad_norm": 0.8521562218666077, "learning_rate": 3.923094943416078e-06, "loss": 0.1226, "step": 6074 }, { "epoch": 0.9842838626053143, "grad_norm": 1.042307734489441, "learning_rate": 3.922735382106817e-06, "loss": 0.1802, "step": 6075 }, { "epoch": 0.9844458846403111, "grad_norm": 0.9505312442779541, "learning_rate": 3.922375777265296e-06, "loss": 0.1345, "step": 6076 }, { "epoch": 0.9846079066753078, "grad_norm": 0.8530185222625732, "learning_rate": 3.922016128902519e-06, "loss": 0.1367, "step": 6077 }, { "epoch": 0.9847699287103046, "grad_norm": 0.8222798705101013, "learning_rate": 3.921656437029488e-06, "loss": 0.1481, "step": 6078 }, { "epoch": 0.9849319507453014, "grad_norm": 1.2158775329589844, "learning_rate": 3.921296701657211e-06, "loss": 0.1225, "step": 6079 }, { "epoch": 0.9850939727802981, "grad_norm": 2.2171168327331543, "learning_rate": 3.9209369227966945e-06, "loss": 0.1395, "step": 6080 }, { "epoch": 0.9852559948152949, "grad_norm": 0.7408256530761719, "learning_rate": 3.920577100458948e-06, "loss": 0.1185, "step": 6081 }, { "epoch": 0.9854180168502916, "grad_norm": 0.8751834034919739, "learning_rate": 3.920217234654978e-06, "loss": 0.1487, "step": 6082 }, { "epoch": 0.9855800388852884, "grad_norm": 0.9367472529411316, "learning_rate": 3.919857325395799e-06, "loss": 0.1458, "step": 6083 }, { "epoch": 0.9857420609202852, "grad_norm": 0.9146280288696289, "learning_rate": 3.919497372692421e-06, "loss": 0.1509, "step": 6084 }, { "epoch": 0.9859040829552819, "grad_norm": 0.8548406958580017, "learning_rate": 3.919137376555859e-06, "loss": 0.1375, "step": 6085 }, { "epoch": 0.9860661049902787, "grad_norm": 0.9017359614372253, "learning_rate": 3.918777336997127e-06, "loss": 0.1334, "step": 6086 }, { "epoch": 0.9862281270252754, "grad_norm": 0.6784223318099976, "learning_rate": 3.918417254027243e-06, "loss": 0.1084, "step": 6087 }, { "epoch": 0.9863901490602722, "grad_norm": 0.7751449346542358, "learning_rate": 3.918057127657222e-06, "loss": 0.1193, "step": 6088 }, { "epoch": 0.9865521710952689, "grad_norm": 0.9636908769607544, "learning_rate": 3.917696957898085e-06, "loss": 0.1814, "step": 6089 }, { "epoch": 0.9867141931302658, "grad_norm": 0.8328016996383667, "learning_rate": 3.9173367447608525e-06, "loss": 0.1353, "step": 6090 }, { "epoch": 0.9868762151652625, "grad_norm": 0.9201585054397583, "learning_rate": 3.9169764882565445e-06, "loss": 0.13, "step": 6091 }, { "epoch": 0.9870382372002592, "grad_norm": 0.8333777189254761, "learning_rate": 3.916616188396185e-06, "loss": 0.1367, "step": 6092 }, { "epoch": 0.987200259235256, "grad_norm": 0.8652458190917969, "learning_rate": 3.916255845190799e-06, "loss": 0.1338, "step": 6093 }, { "epoch": 0.9873622812702527, "grad_norm": 0.9521787762641907, "learning_rate": 3.915895458651411e-06, "loss": 0.1585, "step": 6094 }, { "epoch": 0.9875243033052495, "grad_norm": 0.70457923412323, "learning_rate": 3.915535028789049e-06, "loss": 0.1024, "step": 6095 }, { "epoch": 0.9876863253402463, "grad_norm": 0.9282441735267639, "learning_rate": 3.9151745556147404e-06, "loss": 0.1636, "step": 6096 }, { "epoch": 0.9878483473752431, "grad_norm": 0.8370496034622192, "learning_rate": 3.914814039139515e-06, "loss": 0.13, "step": 6097 }, { "epoch": 0.9880103694102398, "grad_norm": 0.8045435547828674, "learning_rate": 3.914453479374403e-06, "loss": 0.1413, "step": 6098 }, { "epoch": 0.9881723914452365, "grad_norm": 0.8645136952400208, "learning_rate": 3.914092876330439e-06, "loss": 0.1422, "step": 6099 }, { "epoch": 0.9883344134802333, "grad_norm": 0.8007838726043701, "learning_rate": 3.913732230018654e-06, "loss": 0.1364, "step": 6100 }, { "epoch": 0.98849643551523, "grad_norm": 0.9162132143974304, "learning_rate": 3.913371540450084e-06, "loss": 0.1577, "step": 6101 }, { "epoch": 0.9886584575502269, "grad_norm": 0.8496082425117493, "learning_rate": 3.913010807635765e-06, "loss": 0.1321, "step": 6102 }, { "epoch": 0.9888204795852236, "grad_norm": 0.7729654312133789, "learning_rate": 3.912650031586734e-06, "loss": 0.1298, "step": 6103 }, { "epoch": 0.9889825016202204, "grad_norm": 0.7477056980133057, "learning_rate": 3.9122892123140324e-06, "loss": 0.111, "step": 6104 }, { "epoch": 0.9891445236552171, "grad_norm": 0.937633752822876, "learning_rate": 3.911928349828697e-06, "loss": 0.1427, "step": 6105 }, { "epoch": 0.9893065456902139, "grad_norm": 0.8032740354537964, "learning_rate": 3.911567444141771e-06, "loss": 0.1345, "step": 6106 }, { "epoch": 0.9894685677252106, "grad_norm": 0.8660422563552856, "learning_rate": 3.911206495264299e-06, "loss": 0.1322, "step": 6107 }, { "epoch": 0.9896305897602073, "grad_norm": 0.987167477607727, "learning_rate": 3.910845503207322e-06, "loss": 0.1606, "step": 6108 }, { "epoch": 0.9897926117952042, "grad_norm": 0.7517658472061157, "learning_rate": 3.910484467981886e-06, "loss": 0.1172, "step": 6109 }, { "epoch": 0.9899546338302009, "grad_norm": 0.8762201070785522, "learning_rate": 3.9101233895990396e-06, "loss": 0.145, "step": 6110 }, { "epoch": 0.9901166558651977, "grad_norm": 0.7897788882255554, "learning_rate": 3.9097622680698296e-06, "loss": 0.1191, "step": 6111 }, { "epoch": 0.9902786779001944, "grad_norm": 0.8457385301589966, "learning_rate": 3.909401103405307e-06, "loss": 0.1304, "step": 6112 }, { "epoch": 0.9904406999351912, "grad_norm": 0.8108502626419067, "learning_rate": 3.9090398956165194e-06, "loss": 0.1161, "step": 6113 }, { "epoch": 0.990602721970188, "grad_norm": 0.9371310472488403, "learning_rate": 3.908678644714522e-06, "loss": 0.1596, "step": 6114 }, { "epoch": 0.9907647440051847, "grad_norm": 0.8349683284759521, "learning_rate": 3.908317350710366e-06, "loss": 0.1356, "step": 6115 }, { "epoch": 0.9909267660401815, "grad_norm": 0.8057882785797119, "learning_rate": 3.907956013615108e-06, "loss": 0.1202, "step": 6116 }, { "epoch": 0.9910887880751782, "grad_norm": 0.8097211718559265, "learning_rate": 3.907594633439803e-06, "loss": 0.1269, "step": 6117 }, { "epoch": 0.991250810110175, "grad_norm": 0.8821548819541931, "learning_rate": 3.907233210195508e-06, "loss": 0.1455, "step": 6118 }, { "epoch": 0.9914128321451717, "grad_norm": 0.88327556848526, "learning_rate": 3.906871743893283e-06, "loss": 0.143, "step": 6119 }, { "epoch": 0.9915748541801686, "grad_norm": 0.7924086451530457, "learning_rate": 3.906510234544186e-06, "loss": 0.1278, "step": 6120 }, { "epoch": 0.9917368762151653, "grad_norm": 0.8668779134750366, "learning_rate": 3.906148682159281e-06, "loss": 0.1289, "step": 6121 }, { "epoch": 0.991898898250162, "grad_norm": 0.8895964026451111, "learning_rate": 3.905787086749628e-06, "loss": 0.1447, "step": 6122 }, { "epoch": 0.9920609202851588, "grad_norm": 0.8018289804458618, "learning_rate": 3.905425448326293e-06, "loss": 0.1378, "step": 6123 }, { "epoch": 0.9922229423201555, "grad_norm": 0.8257730603218079, "learning_rate": 3.90506376690034e-06, "loss": 0.1348, "step": 6124 }, { "epoch": 0.9923849643551523, "grad_norm": 0.8249266743659973, "learning_rate": 3.9047020424828355e-06, "loss": 0.1194, "step": 6125 }, { "epoch": 0.992546986390149, "grad_norm": 0.8011137247085571, "learning_rate": 3.904340275084848e-06, "loss": 0.1286, "step": 6126 }, { "epoch": 0.9927090084251459, "grad_norm": 0.8544657230377197, "learning_rate": 3.903978464717446e-06, "loss": 0.1367, "step": 6127 }, { "epoch": 0.9928710304601426, "grad_norm": 0.8813843131065369, "learning_rate": 3.9036166113917015e-06, "loss": 0.1451, "step": 6128 }, { "epoch": 0.9930330524951393, "grad_norm": 0.7664751410484314, "learning_rate": 3.903254715118686e-06, "loss": 0.1218, "step": 6129 }, { "epoch": 0.9931950745301361, "grad_norm": 0.7715081572532654, "learning_rate": 3.90289277590947e-06, "loss": 0.1336, "step": 6130 }, { "epoch": 0.9933570965651328, "grad_norm": 0.8148114085197449, "learning_rate": 3.902530793775132e-06, "loss": 0.1296, "step": 6131 }, { "epoch": 0.9935191186001296, "grad_norm": 0.8578192591667175, "learning_rate": 3.902168768726745e-06, "loss": 0.1366, "step": 6132 }, { "epoch": 0.9936811406351264, "grad_norm": 0.7358187437057495, "learning_rate": 3.9018067007753865e-06, "loss": 0.1169, "step": 6133 }, { "epoch": 0.9938431626701232, "grad_norm": 0.7255628108978271, "learning_rate": 3.9014445899321355e-06, "loss": 0.1157, "step": 6134 }, { "epoch": 0.9940051847051199, "grad_norm": 0.7971196174621582, "learning_rate": 3.901082436208071e-06, "loss": 0.1348, "step": 6135 }, { "epoch": 0.9941672067401166, "grad_norm": 0.800977349281311, "learning_rate": 3.900720239614275e-06, "loss": 0.1239, "step": 6136 }, { "epoch": 0.9943292287751134, "grad_norm": 0.8563368320465088, "learning_rate": 3.90035800016183e-06, "loss": 0.1358, "step": 6137 }, { "epoch": 0.9944912508101101, "grad_norm": 0.7976403832435608, "learning_rate": 3.899995717861818e-06, "loss": 0.1327, "step": 6138 }, { "epoch": 0.994653272845107, "grad_norm": 0.7980660200119019, "learning_rate": 3.899633392725325e-06, "loss": 0.1272, "step": 6139 }, { "epoch": 0.9948152948801037, "grad_norm": 0.7953473329544067, "learning_rate": 3.899271024763438e-06, "loss": 0.135, "step": 6140 }, { "epoch": 0.9949773169151005, "grad_norm": 0.7407287955284119, "learning_rate": 3.898908613987243e-06, "loss": 0.119, "step": 6141 }, { "epoch": 0.9951393389500972, "grad_norm": 0.8958144187927246, "learning_rate": 3.89854616040783e-06, "loss": 0.1443, "step": 6142 }, { "epoch": 0.9953013609850939, "grad_norm": 0.7830373644828796, "learning_rate": 3.898183664036289e-06, "loss": 0.1207, "step": 6143 }, { "epoch": 0.9954633830200907, "grad_norm": 0.7853714227676392, "learning_rate": 3.897821124883711e-06, "loss": 0.1148, "step": 6144 }, { "epoch": 0.9956254050550875, "grad_norm": 0.9703736305236816, "learning_rate": 3.89745854296119e-06, "loss": 0.1581, "step": 6145 }, { "epoch": 0.9957874270900843, "grad_norm": 0.6939820051193237, "learning_rate": 3.897095918279818e-06, "loss": 0.11, "step": 6146 }, { "epoch": 0.995949449125081, "grad_norm": 1.0396939516067505, "learning_rate": 3.896733250850694e-06, "loss": 0.1744, "step": 6147 }, { "epoch": 0.9961114711600778, "grad_norm": 0.8816990256309509, "learning_rate": 3.896370540684911e-06, "loss": 0.1486, "step": 6148 }, { "epoch": 0.9962734931950745, "grad_norm": 0.8170563578605652, "learning_rate": 3.896007787793569e-06, "loss": 0.1283, "step": 6149 }, { "epoch": 0.9964355152300713, "grad_norm": 0.7984526753425598, "learning_rate": 3.895644992187767e-06, "loss": 0.1279, "step": 6150 }, { "epoch": 0.9965975372650681, "grad_norm": 0.8505795001983643, "learning_rate": 3.895282153878606e-06, "loss": 0.1496, "step": 6151 }, { "epoch": 0.9967595593000648, "grad_norm": 0.8687652349472046, "learning_rate": 3.894919272877187e-06, "loss": 0.1546, "step": 6152 }, { "epoch": 0.9969215813350616, "grad_norm": 0.7245486974716187, "learning_rate": 3.894556349194613e-06, "loss": 0.1203, "step": 6153 }, { "epoch": 0.9970836033700583, "grad_norm": 0.8453713059425354, "learning_rate": 3.894193382841991e-06, "loss": 0.1318, "step": 6154 }, { "epoch": 0.9972456254050551, "grad_norm": 0.7163641452789307, "learning_rate": 3.893830373830425e-06, "loss": 0.1158, "step": 6155 }, { "epoch": 0.9974076474400518, "grad_norm": 0.8621683120727539, "learning_rate": 3.893467322171022e-06, "loss": 0.1385, "step": 6156 }, { "epoch": 0.9975696694750487, "grad_norm": 0.7639792561531067, "learning_rate": 3.893104227874892e-06, "loss": 0.1332, "step": 6157 }, { "epoch": 0.9977316915100454, "grad_norm": 0.8099455237388611, "learning_rate": 3.892741090953143e-06, "loss": 0.1299, "step": 6158 }, { "epoch": 0.9978937135450421, "grad_norm": 1.0411269664764404, "learning_rate": 3.892377911416888e-06, "loss": 0.1516, "step": 6159 }, { "epoch": 0.9980557355800389, "grad_norm": 0.8103548288345337, "learning_rate": 3.892014689277238e-06, "loss": 0.1205, "step": 6160 }, { "epoch": 0.9982177576150356, "grad_norm": 0.7883999943733215, "learning_rate": 3.891651424545307e-06, "loss": 0.1273, "step": 6161 }, { "epoch": 0.9983797796500324, "grad_norm": 0.8199430704116821, "learning_rate": 3.891288117232209e-06, "loss": 0.1318, "step": 6162 }, { "epoch": 0.9985418016850292, "grad_norm": 0.8376681208610535, "learning_rate": 3.890924767349062e-06, "loss": 0.1385, "step": 6163 }, { "epoch": 0.998703823720026, "grad_norm": 0.8806131482124329, "learning_rate": 3.890561374906985e-06, "loss": 0.1461, "step": 6164 }, { "epoch": 0.9988658457550227, "grad_norm": 0.7740499377250671, "learning_rate": 3.8901979399170935e-06, "loss": 0.1256, "step": 6165 }, { "epoch": 0.9990278677900194, "grad_norm": 0.8058324456214905, "learning_rate": 3.889834462390509e-06, "loss": 0.123, "step": 6166 }, { "epoch": 0.9991898898250162, "grad_norm": 0.8365480303764343, "learning_rate": 3.889470942338354e-06, "loss": 0.1291, "step": 6167 }, { "epoch": 0.9993519118600129, "grad_norm": 0.8258197903633118, "learning_rate": 3.889107379771749e-06, "loss": 0.1342, "step": 6168 }, { "epoch": 0.9995139338950098, "grad_norm": 0.8261362314224243, "learning_rate": 3.888743774701822e-06, "loss": 0.1371, "step": 6169 }, { "epoch": 0.9996759559300065, "grad_norm": 0.8388069868087769, "learning_rate": 3.888380127139695e-06, "loss": 0.1249, "step": 6170 }, { "epoch": 0.9998379779650033, "grad_norm": 0.9523016214370728, "learning_rate": 3.888016437096497e-06, "loss": 0.1439, "step": 6171 }, { "epoch": 1.0, "grad_norm": 0.9021637439727783, "learning_rate": 3.887652704583354e-06, "loss": 0.1394, "step": 6172 }, { "epoch": 1.0001620220349967, "grad_norm": 0.7438717484474182, "learning_rate": 3.887288929611396e-06, "loss": 0.1082, "step": 6173 }, { "epoch": 1.0003240440699934, "grad_norm": 0.7213918566703796, "learning_rate": 3.886925112191754e-06, "loss": 0.1127, "step": 6174 }, { "epoch": 1.0004860661049904, "grad_norm": 0.6617137789726257, "learning_rate": 3.88656125233556e-06, "loss": 0.0903, "step": 6175 }, { "epoch": 1.000648088139987, "grad_norm": 0.8376255631446838, "learning_rate": 3.886197350053948e-06, "loss": 0.1015, "step": 6176 }, { "epoch": 1.0008101101749838, "grad_norm": 0.7548001408576965, "learning_rate": 3.88583340535805e-06, "loss": 0.1017, "step": 6177 }, { "epoch": 1.0009721322099805, "grad_norm": 0.7314820885658264, "learning_rate": 3.885469418259005e-06, "loss": 0.1011, "step": 6178 }, { "epoch": 1.0011341542449774, "grad_norm": 0.7919092178344727, "learning_rate": 3.885105388767948e-06, "loss": 0.1008, "step": 6179 }, { "epoch": 1.0012961762799741, "grad_norm": 0.7691347002983093, "learning_rate": 3.8847413168960175e-06, "loss": 0.1067, "step": 6180 }, { "epoch": 1.0014581983149708, "grad_norm": 0.7515387535095215, "learning_rate": 3.884377202654354e-06, "loss": 0.0979, "step": 6181 }, { "epoch": 1.0016202203499676, "grad_norm": 0.819832980632782, "learning_rate": 3.884013046054098e-06, "loss": 0.1017, "step": 6182 }, { "epoch": 1.0017822423849643, "grad_norm": 0.7462260723114014, "learning_rate": 3.883648847106393e-06, "loss": 0.1009, "step": 6183 }, { "epoch": 1.0019442644199612, "grad_norm": 0.8221039772033691, "learning_rate": 3.8832846058223814e-06, "loss": 0.1054, "step": 6184 }, { "epoch": 1.002106286454958, "grad_norm": 0.8322626948356628, "learning_rate": 3.882920322213207e-06, "loss": 0.0982, "step": 6185 }, { "epoch": 1.0022683084899546, "grad_norm": 0.8119961023330688, "learning_rate": 3.882555996290019e-06, "loss": 0.0944, "step": 6186 }, { "epoch": 1.0024303305249513, "grad_norm": 1.05998957157135, "learning_rate": 3.882191628063962e-06, "loss": 0.1197, "step": 6187 }, { "epoch": 1.002592352559948, "grad_norm": 0.9906516075134277, "learning_rate": 3.881827217546187e-06, "loss": 0.1164, "step": 6188 }, { "epoch": 1.002754374594945, "grad_norm": 0.8134022355079651, "learning_rate": 3.881462764747842e-06, "loss": 0.0937, "step": 6189 }, { "epoch": 1.0029163966299417, "grad_norm": 1.1540935039520264, "learning_rate": 3.881098269680081e-06, "loss": 0.1009, "step": 6190 }, { "epoch": 1.0030784186649384, "grad_norm": 0.8985638618469238, "learning_rate": 3.880733732354054e-06, "loss": 0.1034, "step": 6191 }, { "epoch": 1.0032404406999351, "grad_norm": 0.9259430766105652, "learning_rate": 3.880369152780916e-06, "loss": 0.1073, "step": 6192 }, { "epoch": 1.003402462734932, "grad_norm": 0.8292042016983032, "learning_rate": 3.880004530971823e-06, "loss": 0.1135, "step": 6193 }, { "epoch": 1.0035644847699288, "grad_norm": 0.7862805128097534, "learning_rate": 3.879639866937931e-06, "loss": 0.0931, "step": 6194 }, { "epoch": 1.0037265068049255, "grad_norm": 0.7968874573707581, "learning_rate": 3.879275160690397e-06, "loss": 0.1072, "step": 6195 }, { "epoch": 1.0038885288399222, "grad_norm": 0.7896031141281128, "learning_rate": 3.8789104122403815e-06, "loss": 0.0998, "step": 6196 }, { "epoch": 1.004050550874919, "grad_norm": 0.8073359727859497, "learning_rate": 3.878545621599043e-06, "loss": 0.1079, "step": 6197 }, { "epoch": 1.0042125729099158, "grad_norm": 0.871297299861908, "learning_rate": 3.878180788777546e-06, "loss": 0.116, "step": 6198 }, { "epoch": 1.0043745949449125, "grad_norm": 0.7919323444366455, "learning_rate": 3.877815913787052e-06, "loss": 0.1082, "step": 6199 }, { "epoch": 1.0045366169799093, "grad_norm": 0.8027322292327881, "learning_rate": 3.877450996638725e-06, "loss": 0.1072, "step": 6200 }, { "epoch": 1.004698639014906, "grad_norm": 0.7986040115356445, "learning_rate": 3.87708603734373e-06, "loss": 0.1078, "step": 6201 }, { "epoch": 1.0048606610499027, "grad_norm": 0.7671363949775696, "learning_rate": 3.876721035913236e-06, "loss": 0.1029, "step": 6202 }, { "epoch": 1.0050226830848996, "grad_norm": 0.7083774209022522, "learning_rate": 3.87635599235841e-06, "loss": 0.0964, "step": 6203 }, { "epoch": 1.0051847051198963, "grad_norm": 0.7173096537590027, "learning_rate": 3.87599090669042e-06, "loss": 0.0972, "step": 6204 }, { "epoch": 1.005346727154893, "grad_norm": 0.8262728452682495, "learning_rate": 3.8756257789204384e-06, "loss": 0.1103, "step": 6205 }, { "epoch": 1.0055087491898898, "grad_norm": 0.783673107624054, "learning_rate": 3.875260609059638e-06, "loss": 0.0982, "step": 6206 }, { "epoch": 1.0056707712248867, "grad_norm": 0.7423332929611206, "learning_rate": 3.8748953971191895e-06, "loss": 0.089, "step": 6207 }, { "epoch": 1.0058327932598834, "grad_norm": 0.7428287267684937, "learning_rate": 3.87453014311027e-06, "loss": 0.0988, "step": 6208 }, { "epoch": 1.00599481529488, "grad_norm": 0.886810302734375, "learning_rate": 3.874164847044054e-06, "loss": 0.1142, "step": 6209 }, { "epoch": 1.0061568373298768, "grad_norm": 0.8856390714645386, "learning_rate": 3.87379950893172e-06, "loss": 0.1035, "step": 6210 }, { "epoch": 1.0063188593648735, "grad_norm": 0.7835367321968079, "learning_rate": 3.873434128784444e-06, "loss": 0.1062, "step": 6211 }, { "epoch": 1.0064808813998705, "grad_norm": 0.9305019974708557, "learning_rate": 3.8730687066134086e-06, "loss": 0.1045, "step": 6212 }, { "epoch": 1.0066429034348672, "grad_norm": 1.0636266469955444, "learning_rate": 3.872703242429794e-06, "loss": 0.1275, "step": 6213 }, { "epoch": 1.0068049254698639, "grad_norm": 0.8035116195678711, "learning_rate": 3.8723377362447805e-06, "loss": 0.1081, "step": 6214 }, { "epoch": 1.0069669475048606, "grad_norm": 0.7572904825210571, "learning_rate": 3.871972188069554e-06, "loss": 0.0901, "step": 6215 }, { "epoch": 1.0071289695398573, "grad_norm": 0.8394489288330078, "learning_rate": 3.871606597915298e-06, "loss": 0.0988, "step": 6216 }, { "epoch": 1.0072909915748542, "grad_norm": 0.9537179470062256, "learning_rate": 3.871240965793201e-06, "loss": 0.1117, "step": 6217 }, { "epoch": 1.007453013609851, "grad_norm": 0.8602259755134583, "learning_rate": 3.870875291714448e-06, "loss": 0.1031, "step": 6218 }, { "epoch": 1.0076150356448477, "grad_norm": 0.894371747970581, "learning_rate": 3.870509575690228e-06, "loss": 0.117, "step": 6219 }, { "epoch": 1.0077770576798444, "grad_norm": 0.8317716121673584, "learning_rate": 3.870143817731732e-06, "loss": 0.104, "step": 6220 }, { "epoch": 1.0079390797148413, "grad_norm": 0.8154535293579102, "learning_rate": 3.86977801785015e-06, "loss": 0.1053, "step": 6221 }, { "epoch": 1.008101101749838, "grad_norm": 0.9335169196128845, "learning_rate": 3.8694121760566765e-06, "loss": 0.1125, "step": 6222 }, { "epoch": 1.0082631237848347, "grad_norm": 0.8863696455955505, "learning_rate": 3.869046292362504e-06, "loss": 0.1095, "step": 6223 }, { "epoch": 1.0084251458198314, "grad_norm": 0.7653059363365173, "learning_rate": 3.868680366778828e-06, "loss": 0.0922, "step": 6224 }, { "epoch": 1.0085871678548282, "grad_norm": 0.7611356973648071, "learning_rate": 3.868314399316845e-06, "loss": 0.1027, "step": 6225 }, { "epoch": 1.008749189889825, "grad_norm": 0.9434992671012878, "learning_rate": 3.867948389987752e-06, "loss": 0.1091, "step": 6226 }, { "epoch": 1.0089112119248218, "grad_norm": 0.8308627605438232, "learning_rate": 3.86758233880275e-06, "loss": 0.1032, "step": 6227 }, { "epoch": 1.0090732339598185, "grad_norm": 1.1444343328475952, "learning_rate": 3.8672162457730365e-06, "loss": 0.1183, "step": 6228 }, { "epoch": 1.0092352559948152, "grad_norm": 0.9171835780143738, "learning_rate": 3.866850110909816e-06, "loss": 0.1025, "step": 6229 }, { "epoch": 1.0093972780298122, "grad_norm": 0.8272443413734436, "learning_rate": 3.866483934224288e-06, "loss": 0.1077, "step": 6230 }, { "epoch": 1.0095593000648089, "grad_norm": 0.7675562500953674, "learning_rate": 3.866117715727659e-06, "loss": 0.0994, "step": 6231 }, { "epoch": 1.0097213220998056, "grad_norm": 0.8303428888320923, "learning_rate": 3.865751455431134e-06, "loss": 0.1019, "step": 6232 }, { "epoch": 1.0098833441348023, "grad_norm": 0.9183821082115173, "learning_rate": 3.86538515334592e-06, "loss": 0.1054, "step": 6233 }, { "epoch": 1.010045366169799, "grad_norm": 0.8656824827194214, "learning_rate": 3.865018809483224e-06, "loss": 0.1171, "step": 6234 }, { "epoch": 1.010207388204796, "grad_norm": 0.8201124668121338, "learning_rate": 3.864652423854256e-06, "loss": 0.1118, "step": 6235 }, { "epoch": 1.0103694102397927, "grad_norm": 0.8729208111763, "learning_rate": 3.864285996470226e-06, "loss": 0.1078, "step": 6236 }, { "epoch": 1.0105314322747894, "grad_norm": 0.8456924557685852, "learning_rate": 3.863919527342346e-06, "loss": 0.1091, "step": 6237 }, { "epoch": 1.010693454309786, "grad_norm": 0.9836832284927368, "learning_rate": 3.863553016481829e-06, "loss": 0.1226, "step": 6238 }, { "epoch": 1.0108554763447828, "grad_norm": 0.7698848843574524, "learning_rate": 3.863186463899891e-06, "loss": 0.1032, "step": 6239 }, { "epoch": 1.0110174983797797, "grad_norm": 0.8878111839294434, "learning_rate": 3.862819869607743e-06, "loss": 0.1039, "step": 6240 }, { "epoch": 1.0111795204147764, "grad_norm": 0.7739588618278503, "learning_rate": 3.862453233616608e-06, "loss": 0.1052, "step": 6241 }, { "epoch": 1.0113415424497731, "grad_norm": 0.7633801698684692, "learning_rate": 3.862086555937699e-06, "loss": 0.0993, "step": 6242 }, { "epoch": 1.0115035644847699, "grad_norm": 0.8758237957954407, "learning_rate": 3.861719836582239e-06, "loss": 0.1156, "step": 6243 }, { "epoch": 1.0116655865197668, "grad_norm": 0.8216294050216675, "learning_rate": 3.861353075561446e-06, "loss": 0.1035, "step": 6244 }, { "epoch": 1.0118276085547635, "grad_norm": 0.9009713530540466, "learning_rate": 3.860986272886545e-06, "loss": 0.1155, "step": 6245 }, { "epoch": 1.0119896305897602, "grad_norm": 0.8081684112548828, "learning_rate": 3.860619428568756e-06, "loss": 0.1059, "step": 6246 }, { "epoch": 1.012151652624757, "grad_norm": 0.6876736283302307, "learning_rate": 3.860252542619305e-06, "loss": 0.0884, "step": 6247 }, { "epoch": 1.0123136746597536, "grad_norm": 0.8331660032272339, "learning_rate": 3.859885615049419e-06, "loss": 0.1155, "step": 6248 }, { "epoch": 1.0124756966947506, "grad_norm": 0.8493173122406006, "learning_rate": 3.859518645870323e-06, "loss": 0.1121, "step": 6249 }, { "epoch": 1.0126377187297473, "grad_norm": 0.7302095293998718, "learning_rate": 3.8591516350932476e-06, "loss": 0.0871, "step": 6250 }, { "epoch": 1.012799740764744, "grad_norm": 0.8454660773277283, "learning_rate": 3.85878458272942e-06, "loss": 0.1086, "step": 6251 }, { "epoch": 1.0129617627997407, "grad_norm": 0.786479651927948, "learning_rate": 3.8584174887900735e-06, "loss": 0.0989, "step": 6252 }, { "epoch": 1.0131237848347374, "grad_norm": 0.8221831917762756, "learning_rate": 3.858050353286439e-06, "loss": 0.104, "step": 6253 }, { "epoch": 1.0132858068697344, "grad_norm": 0.7293557524681091, "learning_rate": 3.8576831762297495e-06, "loss": 0.0906, "step": 6254 }, { "epoch": 1.013447828904731, "grad_norm": 0.7902601361274719, "learning_rate": 3.85731595763124e-06, "loss": 0.1, "step": 6255 }, { "epoch": 1.0136098509397278, "grad_norm": 0.8392426371574402, "learning_rate": 3.856948697502148e-06, "loss": 0.1109, "step": 6256 }, { "epoch": 1.0137718729747245, "grad_norm": 0.7996932864189148, "learning_rate": 3.856581395853709e-06, "loss": 0.1005, "step": 6257 }, { "epoch": 1.0139338950097214, "grad_norm": 0.7863364815711975, "learning_rate": 3.8562140526971625e-06, "loss": 0.0929, "step": 6258 }, { "epoch": 1.0140959170447181, "grad_norm": 0.7616758942604065, "learning_rate": 3.855846668043747e-06, "loss": 0.0958, "step": 6259 }, { "epoch": 1.0142579390797148, "grad_norm": 0.8799887299537659, "learning_rate": 3.855479241904705e-06, "loss": 0.0998, "step": 6260 }, { "epoch": 1.0144199611147116, "grad_norm": 0.8284506797790527, "learning_rate": 3.855111774291279e-06, "loss": 0.1061, "step": 6261 }, { "epoch": 1.0145819831497083, "grad_norm": 0.7687853574752808, "learning_rate": 3.8547442652147115e-06, "loss": 0.0868, "step": 6262 }, { "epoch": 1.0147440051847052, "grad_norm": 0.8260930776596069, "learning_rate": 3.854376714686249e-06, "loss": 0.1064, "step": 6263 }, { "epoch": 1.014906027219702, "grad_norm": 0.7864606380462646, "learning_rate": 3.854009122717135e-06, "loss": 0.1048, "step": 6264 }, { "epoch": 1.0150680492546986, "grad_norm": 0.8198474645614624, "learning_rate": 3.853641489318619e-06, "loss": 0.1052, "step": 6265 }, { "epoch": 1.0152300712896953, "grad_norm": 0.8373472690582275, "learning_rate": 3.8532738145019484e-06, "loss": 0.1046, "step": 6266 }, { "epoch": 1.0153920933246923, "grad_norm": 0.8073359131813049, "learning_rate": 3.8529060982783756e-06, "loss": 0.0921, "step": 6267 }, { "epoch": 1.015554115359689, "grad_norm": 1.0620416402816772, "learning_rate": 3.852538340659149e-06, "loss": 0.1189, "step": 6268 }, { "epoch": 1.0157161373946857, "grad_norm": 0.9277188181877136, "learning_rate": 3.852170541655523e-06, "loss": 0.111, "step": 6269 }, { "epoch": 1.0158781594296824, "grad_norm": 0.8073578476905823, "learning_rate": 3.85180270127875e-06, "loss": 0.0924, "step": 6270 }, { "epoch": 1.0160401814646791, "grad_norm": 0.8376033902168274, "learning_rate": 3.8514348195400854e-06, "loss": 0.1046, "step": 6271 }, { "epoch": 1.016202203499676, "grad_norm": 0.7943440675735474, "learning_rate": 3.851066896450787e-06, "loss": 0.0997, "step": 6272 }, { "epoch": 1.0163642255346728, "grad_norm": 0.8442563414573669, "learning_rate": 3.85069893202211e-06, "loss": 0.1051, "step": 6273 }, { "epoch": 1.0165262475696695, "grad_norm": 0.8523194789886475, "learning_rate": 3.850330926265314e-06, "loss": 0.1097, "step": 6274 }, { "epoch": 1.0166882696046662, "grad_norm": 0.6826653480529785, "learning_rate": 3.849962879191661e-06, "loss": 0.0823, "step": 6275 }, { "epoch": 1.016850291639663, "grad_norm": 0.782434344291687, "learning_rate": 3.849594790812409e-06, "loss": 0.1043, "step": 6276 }, { "epoch": 1.0170123136746598, "grad_norm": 0.8440970182418823, "learning_rate": 3.849226661138823e-06, "loss": 0.1049, "step": 6277 }, { "epoch": 1.0171743357096565, "grad_norm": 0.809670090675354, "learning_rate": 3.848858490182167e-06, "loss": 0.1051, "step": 6278 }, { "epoch": 1.0173363577446533, "grad_norm": 0.7815456390380859, "learning_rate": 3.848490277953704e-06, "loss": 0.0917, "step": 6279 }, { "epoch": 1.01749837977965, "grad_norm": 0.8251093626022339, "learning_rate": 3.8481220244647025e-06, "loss": 0.1014, "step": 6280 }, { "epoch": 1.017660401814647, "grad_norm": 0.7042380571365356, "learning_rate": 3.84775372972643e-06, "loss": 0.078, "step": 6281 }, { "epoch": 1.0178224238496436, "grad_norm": 0.795159101486206, "learning_rate": 3.847385393750154e-06, "loss": 0.1048, "step": 6282 }, { "epoch": 1.0179844458846403, "grad_norm": 0.8827878832817078, "learning_rate": 3.847017016547146e-06, "loss": 0.1123, "step": 6283 }, { "epoch": 1.018146467919637, "grad_norm": 0.7256340980529785, "learning_rate": 3.846648598128677e-06, "loss": 0.0877, "step": 6284 }, { "epoch": 1.0183084899546337, "grad_norm": 1.0416500568389893, "learning_rate": 3.846280138506019e-06, "loss": 0.1112, "step": 6285 }, { "epoch": 1.0184705119896307, "grad_norm": 0.8687002062797546, "learning_rate": 3.8459116376904475e-06, "loss": 0.1075, "step": 6286 }, { "epoch": 1.0186325340246274, "grad_norm": 0.8167076706886292, "learning_rate": 3.845543095693236e-06, "loss": 0.0987, "step": 6287 }, { "epoch": 1.018794556059624, "grad_norm": 0.7660663723945618, "learning_rate": 3.8451745125256635e-06, "loss": 0.1017, "step": 6288 }, { "epoch": 1.0189565780946208, "grad_norm": 0.80263751745224, "learning_rate": 3.8448058881990055e-06, "loss": 0.0947, "step": 6289 }, { "epoch": 1.0191186001296175, "grad_norm": 0.9206402897834778, "learning_rate": 3.8444372227245415e-06, "loss": 0.1107, "step": 6290 }, { "epoch": 1.0192806221646145, "grad_norm": 0.9525105953216553, "learning_rate": 3.8440685161135514e-06, "loss": 0.1077, "step": 6291 }, { "epoch": 1.0194426441996112, "grad_norm": 0.7992419004440308, "learning_rate": 3.843699768377318e-06, "loss": 0.1066, "step": 6292 }, { "epoch": 1.0196046662346079, "grad_norm": 0.7668746709823608, "learning_rate": 3.843330979527124e-06, "loss": 0.0952, "step": 6293 }, { "epoch": 1.0197666882696046, "grad_norm": 0.8306681513786316, "learning_rate": 3.842962149574252e-06, "loss": 0.1061, "step": 6294 }, { "epoch": 1.0199287103046015, "grad_norm": 0.9119642376899719, "learning_rate": 3.8425932785299875e-06, "loss": 0.1174, "step": 6295 }, { "epoch": 1.0200907323395982, "grad_norm": 0.8632789254188538, "learning_rate": 3.842224366405619e-06, "loss": 0.1145, "step": 6296 }, { "epoch": 1.020252754374595, "grad_norm": 0.8445510864257812, "learning_rate": 3.841855413212432e-06, "loss": 0.1029, "step": 6297 }, { "epoch": 1.0204147764095917, "grad_norm": 0.8951784372329712, "learning_rate": 3.841486418961717e-06, "loss": 0.1144, "step": 6298 }, { "epoch": 1.0205767984445884, "grad_norm": 1.5688501596450806, "learning_rate": 3.841117383664763e-06, "loss": 0.0966, "step": 6299 }, { "epoch": 1.0207388204795853, "grad_norm": 0.9988122582435608, "learning_rate": 3.840748307332865e-06, "loss": 0.1283, "step": 6300 }, { "epoch": 1.020900842514582, "grad_norm": 0.7491373419761658, "learning_rate": 3.84037918997731e-06, "loss": 0.0965, "step": 6301 }, { "epoch": 1.0210628645495787, "grad_norm": 0.8068737387657166, "learning_rate": 3.840010031609398e-06, "loss": 0.0956, "step": 6302 }, { "epoch": 1.0212248865845754, "grad_norm": 0.8668238520622253, "learning_rate": 3.839640832240421e-06, "loss": 0.1107, "step": 6303 }, { "epoch": 1.0213869086195722, "grad_norm": 0.9297085404396057, "learning_rate": 3.8392715918816755e-06, "loss": 0.1196, "step": 6304 }, { "epoch": 1.021548930654569, "grad_norm": 0.7360817193984985, "learning_rate": 3.8389023105444625e-06, "loss": 0.0893, "step": 6305 }, { "epoch": 1.0217109526895658, "grad_norm": 0.8899022340774536, "learning_rate": 3.838532988240077e-06, "loss": 0.1111, "step": 6306 }, { "epoch": 1.0218729747245625, "grad_norm": 0.6964313387870789, "learning_rate": 3.838163624979822e-06, "loss": 0.0904, "step": 6307 }, { "epoch": 1.0220349967595592, "grad_norm": 0.8330636024475098, "learning_rate": 3.837794220774998e-06, "loss": 0.0966, "step": 6308 }, { "epoch": 1.0221970187945562, "grad_norm": 0.9185472726821899, "learning_rate": 3.837424775636908e-06, "loss": 0.1158, "step": 6309 }, { "epoch": 1.0223590408295529, "grad_norm": 0.8500825762748718, "learning_rate": 3.8370552895768565e-06, "loss": 0.1024, "step": 6310 }, { "epoch": 1.0225210628645496, "grad_norm": 0.9668059349060059, "learning_rate": 3.836685762606149e-06, "loss": 0.1048, "step": 6311 }, { "epoch": 1.0226830848995463, "grad_norm": 0.9085898399353027, "learning_rate": 3.836316194736093e-06, "loss": 0.1038, "step": 6312 }, { "epoch": 1.022845106934543, "grad_norm": 0.829475998878479, "learning_rate": 3.8359465859779934e-06, "loss": 0.1031, "step": 6313 }, { "epoch": 1.02300712896954, "grad_norm": 0.946996808052063, "learning_rate": 3.835576936343162e-06, "loss": 0.107, "step": 6314 }, { "epoch": 1.0231691510045366, "grad_norm": 0.9723039865493774, "learning_rate": 3.835207245842908e-06, "loss": 0.1155, "step": 6315 }, { "epoch": 1.0233311730395334, "grad_norm": 0.9182832837104797, "learning_rate": 3.8348375144885445e-06, "loss": 0.1155, "step": 6316 }, { "epoch": 1.02349319507453, "grad_norm": 1.0076240301132202, "learning_rate": 3.834467742291382e-06, "loss": 0.12, "step": 6317 }, { "epoch": 1.023655217109527, "grad_norm": 0.7207759618759155, "learning_rate": 3.834097929262737e-06, "loss": 0.0801, "step": 6318 }, { "epoch": 1.0238172391445237, "grad_norm": 0.9244574904441833, "learning_rate": 3.833728075413923e-06, "loss": 0.1174, "step": 6319 }, { "epoch": 1.0239792611795204, "grad_norm": 0.7919167280197144, "learning_rate": 3.833358180756258e-06, "loss": 0.0981, "step": 6320 }, { "epoch": 1.0241412832145171, "grad_norm": 0.7644730806350708, "learning_rate": 3.832988245301058e-06, "loss": 0.0937, "step": 6321 }, { "epoch": 1.0243033052495139, "grad_norm": 0.8419947624206543, "learning_rate": 3.832618269059645e-06, "loss": 0.0974, "step": 6322 }, { "epoch": 1.0244653272845108, "grad_norm": 0.7853063941001892, "learning_rate": 3.832248252043338e-06, "loss": 0.0995, "step": 6323 }, { "epoch": 1.0246273493195075, "grad_norm": 0.9024096727371216, "learning_rate": 3.831878194263458e-06, "loss": 0.1134, "step": 6324 }, { "epoch": 1.0247893713545042, "grad_norm": 0.8228088021278381, "learning_rate": 3.831508095731328e-06, "loss": 0.1003, "step": 6325 }, { "epoch": 1.024951393389501, "grad_norm": 0.798202395439148, "learning_rate": 3.831137956458272e-06, "loss": 0.0996, "step": 6326 }, { "epoch": 1.0251134154244976, "grad_norm": 1.0323954820632935, "learning_rate": 3.830767776455617e-06, "loss": 0.1177, "step": 6327 }, { "epoch": 1.0252754374594946, "grad_norm": 0.8482728004455566, "learning_rate": 3.830397555734687e-06, "loss": 0.1058, "step": 6328 }, { "epoch": 1.0254374594944913, "grad_norm": 0.8211125731468201, "learning_rate": 3.830027294306813e-06, "loss": 0.1168, "step": 6329 }, { "epoch": 1.025599481529488, "grad_norm": 0.8774312138557434, "learning_rate": 3.8296569921833214e-06, "loss": 0.1134, "step": 6330 }, { "epoch": 1.0257615035644847, "grad_norm": 0.8460497856140137, "learning_rate": 3.829286649375544e-06, "loss": 0.1064, "step": 6331 }, { "epoch": 1.0259235255994816, "grad_norm": 0.8636878728866577, "learning_rate": 3.8289162658948114e-06, "loss": 0.0956, "step": 6332 }, { "epoch": 1.0260855476344783, "grad_norm": 0.8535585403442383, "learning_rate": 3.828545841752457e-06, "loss": 0.1093, "step": 6333 }, { "epoch": 1.026247569669475, "grad_norm": 0.9328159093856812, "learning_rate": 3.828175376959815e-06, "loss": 0.1123, "step": 6334 }, { "epoch": 1.0264095917044718, "grad_norm": 0.8551136255264282, "learning_rate": 3.827804871528221e-06, "loss": 0.0988, "step": 6335 }, { "epoch": 1.0265716137394685, "grad_norm": 0.7710674405097961, "learning_rate": 3.827434325469011e-06, "loss": 0.0962, "step": 6336 }, { "epoch": 1.0267336357744654, "grad_norm": 0.7422839999198914, "learning_rate": 3.827063738793523e-06, "loss": 0.0941, "step": 6337 }, { "epoch": 1.0268956578094621, "grad_norm": 0.8090795874595642, "learning_rate": 3.8266931115130955e-06, "loss": 0.0914, "step": 6338 }, { "epoch": 1.0270576798444588, "grad_norm": 0.8428863286972046, "learning_rate": 3.82632244363907e-06, "loss": 0.1066, "step": 6339 }, { "epoch": 1.0272197018794555, "grad_norm": 0.8122255802154541, "learning_rate": 3.8259517351827866e-06, "loss": 0.1008, "step": 6340 }, { "epoch": 1.0273817239144523, "grad_norm": 0.9258431792259216, "learning_rate": 3.8255809861555895e-06, "loss": 0.1109, "step": 6341 }, { "epoch": 1.0275437459494492, "grad_norm": 0.886329174041748, "learning_rate": 3.825210196568823e-06, "loss": 0.1144, "step": 6342 }, { "epoch": 1.027705767984446, "grad_norm": 0.9017022848129272, "learning_rate": 3.824839366433829e-06, "loss": 0.1152, "step": 6343 }, { "epoch": 1.0278677900194426, "grad_norm": 0.8935456871986389, "learning_rate": 3.824468495761958e-06, "loss": 0.0945, "step": 6344 }, { "epoch": 1.0280298120544393, "grad_norm": 0.8333491683006287, "learning_rate": 3.824097584564556e-06, "loss": 0.1041, "step": 6345 }, { "epoch": 1.0281918340894363, "grad_norm": 0.7439111471176147, "learning_rate": 3.823726632852972e-06, "loss": 0.0931, "step": 6346 }, { "epoch": 1.028353856124433, "grad_norm": 0.8706629276275635, "learning_rate": 3.823355640638557e-06, "loss": 0.1069, "step": 6347 }, { "epoch": 1.0285158781594297, "grad_norm": 0.8351131081581116, "learning_rate": 3.822984607932661e-06, "loss": 0.1065, "step": 6348 }, { "epoch": 1.0286779001944264, "grad_norm": 0.7511221766471863, "learning_rate": 3.822613534746638e-06, "loss": 0.0905, "step": 6349 }, { "epoch": 1.028839922229423, "grad_norm": 0.9029641151428223, "learning_rate": 3.8222424210918404e-06, "loss": 0.1051, "step": 6350 }, { "epoch": 1.02900194426442, "grad_norm": 0.770819902420044, "learning_rate": 3.821871266979626e-06, "loss": 0.0997, "step": 6351 }, { "epoch": 1.0291639662994168, "grad_norm": 0.9414584040641785, "learning_rate": 3.821500072421349e-06, "loss": 0.1117, "step": 6352 }, { "epoch": 1.0293259883344135, "grad_norm": 0.836219072341919, "learning_rate": 3.821128837428368e-06, "loss": 0.1091, "step": 6353 }, { "epoch": 1.0294880103694102, "grad_norm": 0.9013179540634155, "learning_rate": 3.820757562012042e-06, "loss": 0.1025, "step": 6354 }, { "epoch": 1.029650032404407, "grad_norm": 0.7845829725265503, "learning_rate": 3.82038624618373e-06, "loss": 0.1003, "step": 6355 }, { "epoch": 1.0298120544394038, "grad_norm": 0.7390191555023193, "learning_rate": 3.820014889954794e-06, "loss": 0.0994, "step": 6356 }, { "epoch": 1.0299740764744005, "grad_norm": 0.8984519243240356, "learning_rate": 3.819643493336598e-06, "loss": 0.1132, "step": 6357 }, { "epoch": 1.0301360985093972, "grad_norm": 0.8943284749984741, "learning_rate": 3.819272056340504e-06, "loss": 0.1086, "step": 6358 }, { "epoch": 1.030298120544394, "grad_norm": 0.9379931688308716, "learning_rate": 3.818900578977877e-06, "loss": 0.1062, "step": 6359 }, { "epoch": 1.030460142579391, "grad_norm": 0.8385069370269775, "learning_rate": 3.818529061260084e-06, "loss": 0.1045, "step": 6360 }, { "epoch": 1.0306221646143876, "grad_norm": 0.8240720629692078, "learning_rate": 3.8181575031984935e-06, "loss": 0.0985, "step": 6361 }, { "epoch": 1.0307841866493843, "grad_norm": 0.8299695253372192, "learning_rate": 3.817785904804473e-06, "loss": 0.1021, "step": 6362 }, { "epoch": 1.030946208684381, "grad_norm": 0.7998603582382202, "learning_rate": 3.817414266089392e-06, "loss": 0.1041, "step": 6363 }, { "epoch": 1.0311082307193777, "grad_norm": 0.9111914038658142, "learning_rate": 3.817042587064623e-06, "loss": 0.1054, "step": 6364 }, { "epoch": 1.0312702527543747, "grad_norm": 0.7303438186645508, "learning_rate": 3.816670867741538e-06, "loss": 0.0943, "step": 6365 }, { "epoch": 1.0314322747893714, "grad_norm": 0.8028538227081299, "learning_rate": 3.81629910813151e-06, "loss": 0.0959, "step": 6366 }, { "epoch": 1.031594296824368, "grad_norm": 0.7954698204994202, "learning_rate": 3.815927308245917e-06, "loss": 0.1083, "step": 6367 }, { "epoch": 1.0317563188593648, "grad_norm": 0.7787458896636963, "learning_rate": 3.815555468096131e-06, "loss": 0.1001, "step": 6368 }, { "epoch": 1.0319183408943617, "grad_norm": 0.8813876509666443, "learning_rate": 3.815183587693531e-06, "loss": 0.1022, "step": 6369 }, { "epoch": 1.0320803629293585, "grad_norm": 0.9205014705657959, "learning_rate": 3.814811667049497e-06, "loss": 0.1054, "step": 6370 }, { "epoch": 1.0322423849643552, "grad_norm": 0.7909181714057922, "learning_rate": 3.8144397061754066e-06, "loss": 0.1005, "step": 6371 }, { "epoch": 1.0324044069993519, "grad_norm": 0.7991738319396973, "learning_rate": 3.814067705082643e-06, "loss": 0.0988, "step": 6372 }, { "epoch": 1.0325664290343486, "grad_norm": 0.8876237869262695, "learning_rate": 3.8136956637825878e-06, "loss": 0.1064, "step": 6373 }, { "epoch": 1.0327284510693455, "grad_norm": 1.0412876605987549, "learning_rate": 3.8133235822866234e-06, "loss": 0.1069, "step": 6374 }, { "epoch": 1.0328904731043422, "grad_norm": 0.9145109057426453, "learning_rate": 3.812951460606136e-06, "loss": 0.0998, "step": 6375 }, { "epoch": 1.033052495139339, "grad_norm": 0.7650631070137024, "learning_rate": 3.812579298752511e-06, "loss": 0.0925, "step": 6376 }, { "epoch": 1.0332145171743357, "grad_norm": 0.7742469310760498, "learning_rate": 3.812207096737137e-06, "loss": 0.0951, "step": 6377 }, { "epoch": 1.0333765392093324, "grad_norm": 0.900032639503479, "learning_rate": 3.8118348545714e-06, "loss": 0.1075, "step": 6378 }, { "epoch": 1.0335385612443293, "grad_norm": 0.8306341767311096, "learning_rate": 3.811462572266691e-06, "loss": 0.1033, "step": 6379 }, { "epoch": 1.033700583279326, "grad_norm": 0.9517765641212463, "learning_rate": 3.8110902498344023e-06, "loss": 0.1076, "step": 6380 }, { "epoch": 1.0338626053143227, "grad_norm": 0.8399965763092041, "learning_rate": 3.810717887285923e-06, "loss": 0.0931, "step": 6381 }, { "epoch": 1.0340246273493194, "grad_norm": 1.0168367624282837, "learning_rate": 3.8103454846326493e-06, "loss": 0.103, "step": 6382 }, { "epoch": 1.0341866493843164, "grad_norm": 0.8459004163742065, "learning_rate": 3.8099730418859743e-06, "loss": 0.1079, "step": 6383 }, { "epoch": 1.034348671419313, "grad_norm": 0.8548713326454163, "learning_rate": 3.809600559057295e-06, "loss": 0.0932, "step": 6384 }, { "epoch": 1.0345106934543098, "grad_norm": 0.986228346824646, "learning_rate": 3.809228036158007e-06, "loss": 0.1074, "step": 6385 }, { "epoch": 1.0346727154893065, "grad_norm": 0.919900119304657, "learning_rate": 3.80885547319951e-06, "loss": 0.1051, "step": 6386 }, { "epoch": 1.0348347375243032, "grad_norm": 0.8741225004196167, "learning_rate": 3.808482870193202e-06, "loss": 0.1084, "step": 6387 }, { "epoch": 1.0349967595593002, "grad_norm": 0.898740828037262, "learning_rate": 3.808110227150485e-06, "loss": 0.0994, "step": 6388 }, { "epoch": 1.0351587815942969, "grad_norm": 0.8765122890472412, "learning_rate": 3.8077375440827613e-06, "loss": 0.1025, "step": 6389 }, { "epoch": 1.0353208036292936, "grad_norm": 0.8248468637466431, "learning_rate": 3.8073648210014323e-06, "loss": 0.0955, "step": 6390 }, { "epoch": 1.0354828256642903, "grad_norm": 0.7942231297492981, "learning_rate": 3.8069920579179042e-06, "loss": 0.0982, "step": 6391 }, { "epoch": 1.035644847699287, "grad_norm": 0.8907127380371094, "learning_rate": 3.806619254843582e-06, "loss": 0.1016, "step": 6392 }, { "epoch": 1.035806869734284, "grad_norm": 0.9660011529922485, "learning_rate": 3.806246411789872e-06, "loss": 0.1185, "step": 6393 }, { "epoch": 1.0359688917692806, "grad_norm": 0.7657979130744934, "learning_rate": 3.8058735287681835e-06, "loss": 0.0972, "step": 6394 }, { "epoch": 1.0361309138042774, "grad_norm": 0.8937506675720215, "learning_rate": 3.8055006057899254e-06, "loss": 0.1065, "step": 6395 }, { "epoch": 1.036292935839274, "grad_norm": 0.789402425289154, "learning_rate": 3.8051276428665074e-06, "loss": 0.091, "step": 6396 }, { "epoch": 1.036454957874271, "grad_norm": 0.9826111197471619, "learning_rate": 3.8047546400093425e-06, "loss": 0.1106, "step": 6397 }, { "epoch": 1.0366169799092677, "grad_norm": 0.8755872249603271, "learning_rate": 3.8043815972298424e-06, "loss": 0.114, "step": 6398 }, { "epoch": 1.0367790019442644, "grad_norm": 0.8057990074157715, "learning_rate": 3.8040085145394224e-06, "loss": 0.1052, "step": 6399 }, { "epoch": 1.0369410239792611, "grad_norm": 0.8804965615272522, "learning_rate": 3.8036353919494973e-06, "loss": 0.1134, "step": 6400 }, { "epoch": 1.0371030460142578, "grad_norm": 0.8010181784629822, "learning_rate": 3.8032622294714837e-06, "loss": 0.1062, "step": 6401 }, { "epoch": 1.0372650680492548, "grad_norm": 0.7778938412666321, "learning_rate": 3.8028890271168e-06, "loss": 0.0924, "step": 6402 }, { "epoch": 1.0374270900842515, "grad_norm": 0.7856367826461792, "learning_rate": 3.8025157848968653e-06, "loss": 0.1058, "step": 6403 }, { "epoch": 1.0375891121192482, "grad_norm": 0.6976110339164734, "learning_rate": 3.8021425028230994e-06, "loss": 0.0898, "step": 6404 }, { "epoch": 1.037751134154245, "grad_norm": 0.7459751963615417, "learning_rate": 3.8017691809069234e-06, "loss": 0.0889, "step": 6405 }, { "epoch": 1.0379131561892416, "grad_norm": 0.7587976455688477, "learning_rate": 3.801395819159761e-06, "loss": 0.0969, "step": 6406 }, { "epoch": 1.0380751782242386, "grad_norm": 0.7939549684524536, "learning_rate": 3.8010224175930366e-06, "loss": 0.0979, "step": 6407 }, { "epoch": 1.0382372002592353, "grad_norm": 0.7911587953567505, "learning_rate": 3.8006489762181744e-06, "loss": 0.0932, "step": 6408 }, { "epoch": 1.038399222294232, "grad_norm": 0.9099779725074768, "learning_rate": 3.8002754950466004e-06, "loss": 0.1058, "step": 6409 }, { "epoch": 1.0385612443292287, "grad_norm": 0.7849684953689575, "learning_rate": 3.7999019740897423e-06, "loss": 0.0961, "step": 6410 }, { "epoch": 1.0387232663642256, "grad_norm": 0.8940638303756714, "learning_rate": 3.7995284133590317e-06, "loss": 0.0876, "step": 6411 }, { "epoch": 1.0388852883992223, "grad_norm": 0.9506855607032776, "learning_rate": 3.799154812865894e-06, "loss": 0.1111, "step": 6412 }, { "epoch": 1.039047310434219, "grad_norm": 0.8368770480155945, "learning_rate": 3.798781172621765e-06, "loss": 0.1079, "step": 6413 }, { "epoch": 1.0392093324692158, "grad_norm": 0.8906238079071045, "learning_rate": 3.7984074926380733e-06, "loss": 0.1126, "step": 6414 }, { "epoch": 1.0393713545042125, "grad_norm": 0.950920581817627, "learning_rate": 3.7980337729262555e-06, "loss": 0.1118, "step": 6415 }, { "epoch": 1.0395333765392094, "grad_norm": 0.9369775652885437, "learning_rate": 3.7976600134977455e-06, "loss": 0.1117, "step": 6416 }, { "epoch": 1.0396953985742061, "grad_norm": 0.8446056246757507, "learning_rate": 3.7972862143639788e-06, "loss": 0.0977, "step": 6417 }, { "epoch": 1.0398574206092028, "grad_norm": 0.930582582950592, "learning_rate": 3.7969123755363935e-06, "loss": 0.1135, "step": 6418 }, { "epoch": 1.0400194426441995, "grad_norm": 0.7642794251441956, "learning_rate": 3.796538497026428e-06, "loss": 0.0935, "step": 6419 }, { "epoch": 1.0401814646791965, "grad_norm": 0.9868995547294617, "learning_rate": 3.7961645788455225e-06, "loss": 0.1143, "step": 6420 }, { "epoch": 1.0403434867141932, "grad_norm": 1.0182535648345947, "learning_rate": 3.7957906210051173e-06, "loss": 0.0991, "step": 6421 }, { "epoch": 1.04050550874919, "grad_norm": 0.9137951731681824, "learning_rate": 3.7954166235166545e-06, "loss": 0.1159, "step": 6422 }, { "epoch": 1.0406675307841866, "grad_norm": 0.9183018207550049, "learning_rate": 3.795042586391578e-06, "loss": 0.09, "step": 6423 }, { "epoch": 1.0408295528191833, "grad_norm": 0.856632649898529, "learning_rate": 3.794668509641332e-06, "loss": 0.1037, "step": 6424 }, { "epoch": 1.0409915748541803, "grad_norm": 0.8316843509674072, "learning_rate": 3.7942943932773636e-06, "loss": 0.1061, "step": 6425 }, { "epoch": 1.041153596889177, "grad_norm": 0.8080172538757324, "learning_rate": 3.793920237311118e-06, "loss": 0.1112, "step": 6426 }, { "epoch": 1.0413156189241737, "grad_norm": 0.8021390438079834, "learning_rate": 3.793546041754044e-06, "loss": 0.1035, "step": 6427 }, { "epoch": 1.0414776409591704, "grad_norm": 0.8733294010162354, "learning_rate": 3.793171806617593e-06, "loss": 0.1044, "step": 6428 }, { "epoch": 1.041639662994167, "grad_norm": 0.8201737403869629, "learning_rate": 3.7927975319132133e-06, "loss": 0.0958, "step": 6429 }, { "epoch": 1.041801685029164, "grad_norm": 0.7947893142700195, "learning_rate": 3.7924232176523574e-06, "loss": 0.1048, "step": 6430 }, { "epoch": 1.0419637070641607, "grad_norm": 0.839570939540863, "learning_rate": 3.7920488638464788e-06, "loss": 0.1012, "step": 6431 }, { "epoch": 1.0421257290991575, "grad_norm": 0.8176724910736084, "learning_rate": 3.7916744705070318e-06, "loss": 0.0941, "step": 6432 }, { "epoch": 1.0422877511341542, "grad_norm": 0.7297854423522949, "learning_rate": 3.7913000376454713e-06, "loss": 0.0962, "step": 6433 }, { "epoch": 1.042449773169151, "grad_norm": 0.7584792375564575, "learning_rate": 3.790925565273255e-06, "loss": 0.0948, "step": 6434 }, { "epoch": 1.0426117952041478, "grad_norm": 0.7733738422393799, "learning_rate": 3.790551053401841e-06, "loss": 0.0947, "step": 6435 }, { "epoch": 1.0427738172391445, "grad_norm": 0.8243172764778137, "learning_rate": 3.790176502042686e-06, "loss": 0.1108, "step": 6436 }, { "epoch": 1.0429358392741412, "grad_norm": 0.8829347491264343, "learning_rate": 3.7898019112072537e-06, "loss": 0.114, "step": 6437 }, { "epoch": 1.043097861309138, "grad_norm": 0.8396970629692078, "learning_rate": 3.789427280907004e-06, "loss": 0.1037, "step": 6438 }, { "epoch": 1.0432598833441349, "grad_norm": 0.8729767203330994, "learning_rate": 3.7890526111534e-06, "loss": 0.107, "step": 6439 }, { "epoch": 1.0434219053791316, "grad_norm": 0.9895196557044983, "learning_rate": 3.7886779019579045e-06, "loss": 0.1178, "step": 6440 }, { "epoch": 1.0435839274141283, "grad_norm": 0.8271682262420654, "learning_rate": 3.788303153331985e-06, "loss": 0.1127, "step": 6441 }, { "epoch": 1.043745949449125, "grad_norm": 0.8002727031707764, "learning_rate": 3.787928365287106e-06, "loss": 0.1027, "step": 6442 }, { "epoch": 1.043907971484122, "grad_norm": 0.788192629814148, "learning_rate": 3.7875535378347356e-06, "loss": 0.1054, "step": 6443 }, { "epoch": 1.0440699935191187, "grad_norm": 0.8381497263908386, "learning_rate": 3.7871786709863435e-06, "loss": 0.1006, "step": 6444 }, { "epoch": 1.0442320155541154, "grad_norm": 0.9270142912864685, "learning_rate": 3.7868037647533977e-06, "loss": 0.1152, "step": 6445 }, { "epoch": 1.044394037589112, "grad_norm": 0.8742038011550903, "learning_rate": 3.7864288191473718e-06, "loss": 0.11, "step": 6446 }, { "epoch": 1.0445560596241088, "grad_norm": 0.7907035946846008, "learning_rate": 3.786053834179737e-06, "loss": 0.105, "step": 6447 }, { "epoch": 1.0447180816591057, "grad_norm": 0.7882545590400696, "learning_rate": 3.7856788098619667e-06, "loss": 0.1039, "step": 6448 }, { "epoch": 1.0448801036941024, "grad_norm": 0.880588710308075, "learning_rate": 3.7853037462055366e-06, "loss": 0.1136, "step": 6449 }, { "epoch": 1.0450421257290992, "grad_norm": 0.7238618731498718, "learning_rate": 3.7849286432219216e-06, "loss": 0.0888, "step": 6450 }, { "epoch": 1.0452041477640959, "grad_norm": 0.887286365032196, "learning_rate": 3.7845535009226e-06, "loss": 0.107, "step": 6451 }, { "epoch": 1.0453661697990926, "grad_norm": 0.8213676810264587, "learning_rate": 3.78417831931905e-06, "loss": 0.1036, "step": 6452 }, { "epoch": 1.0455281918340895, "grad_norm": 0.8090675473213196, "learning_rate": 3.783803098422751e-06, "loss": 0.0995, "step": 6453 }, { "epoch": 1.0456902138690862, "grad_norm": 0.8054291605949402, "learning_rate": 3.783427838245184e-06, "loss": 0.1053, "step": 6454 }, { "epoch": 1.045852235904083, "grad_norm": 0.9021458625793457, "learning_rate": 3.78305253879783e-06, "loss": 0.1142, "step": 6455 }, { "epoch": 1.0460142579390797, "grad_norm": 0.710974395275116, "learning_rate": 3.7826772000921742e-06, "loss": 0.0924, "step": 6456 }, { "epoch": 1.0461762799740764, "grad_norm": 0.8471440672874451, "learning_rate": 3.7823018221397e-06, "loss": 0.1111, "step": 6457 }, { "epoch": 1.0463383020090733, "grad_norm": 0.8694987297058105, "learning_rate": 3.781926404951893e-06, "loss": 0.1016, "step": 6458 }, { "epoch": 1.04650032404407, "grad_norm": 0.8285094499588013, "learning_rate": 3.78155094854024e-06, "loss": 0.1016, "step": 6459 }, { "epoch": 1.0466623460790667, "grad_norm": 0.9631251692771912, "learning_rate": 3.7811754529162294e-06, "loss": 0.1083, "step": 6460 }, { "epoch": 1.0468243681140634, "grad_norm": 0.7672160267829895, "learning_rate": 3.7807999180913514e-06, "loss": 0.0994, "step": 6461 }, { "epoch": 1.0469863901490604, "grad_norm": 0.8148001432418823, "learning_rate": 3.7804243440770936e-06, "loss": 0.1043, "step": 6462 }, { "epoch": 1.047148412184057, "grad_norm": 0.9620845317840576, "learning_rate": 3.780048730884951e-06, "loss": 0.1056, "step": 6463 }, { "epoch": 1.0473104342190538, "grad_norm": 0.840048611164093, "learning_rate": 3.779673078526414e-06, "loss": 0.1031, "step": 6464 }, { "epoch": 1.0474724562540505, "grad_norm": 1.0370488166809082, "learning_rate": 3.7792973870129773e-06, "loss": 0.1211, "step": 6465 }, { "epoch": 1.0476344782890472, "grad_norm": 0.7531951665878296, "learning_rate": 3.7789216563561373e-06, "loss": 0.0909, "step": 6466 }, { "epoch": 1.0477965003240441, "grad_norm": 0.9405876994132996, "learning_rate": 3.7785458865673885e-06, "loss": 0.1147, "step": 6467 }, { "epoch": 1.0479585223590409, "grad_norm": 1.0181210041046143, "learning_rate": 3.778170077658231e-06, "loss": 0.1209, "step": 6468 }, { "epoch": 1.0481205443940376, "grad_norm": 0.8447110056877136, "learning_rate": 3.7777942296401606e-06, "loss": 0.1024, "step": 6469 }, { "epoch": 1.0482825664290343, "grad_norm": 0.8490682244300842, "learning_rate": 3.77741834252468e-06, "loss": 0.1012, "step": 6470 }, { "epoch": 1.0484445884640312, "grad_norm": 0.8966073393821716, "learning_rate": 3.777042416323289e-06, "loss": 0.104, "step": 6471 }, { "epoch": 1.048606610499028, "grad_norm": 0.7608264088630676, "learning_rate": 3.7766664510474903e-06, "loss": 0.0988, "step": 6472 }, { "epoch": 1.0487686325340246, "grad_norm": 0.9018314480781555, "learning_rate": 3.776290446708789e-06, "loss": 0.1082, "step": 6473 }, { "epoch": 1.0489306545690213, "grad_norm": 0.8391294479370117, "learning_rate": 3.775914403318687e-06, "loss": 0.1015, "step": 6474 }, { "epoch": 1.049092676604018, "grad_norm": 0.8135133981704712, "learning_rate": 3.7755383208886923e-06, "loss": 0.095, "step": 6475 }, { "epoch": 1.049254698639015, "grad_norm": 0.8267797231674194, "learning_rate": 3.7751621994303123e-06, "loss": 0.0941, "step": 6476 }, { "epoch": 1.0494167206740117, "grad_norm": 0.7961505055427551, "learning_rate": 3.774786038955054e-06, "loss": 0.1048, "step": 6477 }, { "epoch": 1.0495787427090084, "grad_norm": 0.7784950137138367, "learning_rate": 3.7744098394744287e-06, "loss": 0.1042, "step": 6478 }, { "epoch": 1.0497407647440051, "grad_norm": 0.8187713623046875, "learning_rate": 3.774033600999946e-06, "loss": 0.1076, "step": 6479 }, { "epoch": 1.0499027867790018, "grad_norm": 0.8849846720695496, "learning_rate": 3.7736573235431174e-06, "loss": 0.1115, "step": 6480 }, { "epoch": 1.0500648088139988, "grad_norm": 0.830732524394989, "learning_rate": 3.773281007115458e-06, "loss": 0.1076, "step": 6481 }, { "epoch": 1.0502268308489955, "grad_norm": 0.8834472298622131, "learning_rate": 3.7729046517284805e-06, "loss": 0.1195, "step": 6482 }, { "epoch": 1.0503888528839922, "grad_norm": 0.8346211910247803, "learning_rate": 3.7725282573937015e-06, "loss": 0.112, "step": 6483 }, { "epoch": 1.050550874918989, "grad_norm": 0.7828723192214966, "learning_rate": 3.7721518241226375e-06, "loss": 0.0984, "step": 6484 }, { "epoch": 1.0507128969539858, "grad_norm": 0.8663693070411682, "learning_rate": 3.7717753519268053e-06, "loss": 0.1056, "step": 6485 }, { "epoch": 1.0508749189889826, "grad_norm": 0.819918155670166, "learning_rate": 3.771398840817725e-06, "loss": 0.0987, "step": 6486 }, { "epoch": 1.0510369410239793, "grad_norm": 0.7646182179450989, "learning_rate": 3.771022290806917e-06, "loss": 0.0878, "step": 6487 }, { "epoch": 1.051198963058976, "grad_norm": 0.8001990914344788, "learning_rate": 3.770645701905904e-06, "loss": 0.095, "step": 6488 }, { "epoch": 1.0513609850939727, "grad_norm": 0.9029562473297119, "learning_rate": 3.770269074126206e-06, "loss": 0.1188, "step": 6489 }, { "epoch": 1.0515230071289696, "grad_norm": 0.9364879727363586, "learning_rate": 3.7698924074793484e-06, "loss": 0.1128, "step": 6490 }, { "epoch": 1.0516850291639663, "grad_norm": 0.8705113530158997, "learning_rate": 3.769515701976856e-06, "loss": 0.0952, "step": 6491 }, { "epoch": 1.051847051198963, "grad_norm": 0.8115032911300659, "learning_rate": 3.7691389576302567e-06, "loss": 0.099, "step": 6492 }, { "epoch": 1.0520090732339598, "grad_norm": 0.8196799159049988, "learning_rate": 3.7687621744510756e-06, "loss": 0.0995, "step": 6493 }, { "epoch": 1.0521710952689567, "grad_norm": 0.8506253957748413, "learning_rate": 3.768385352450842e-06, "loss": 0.0996, "step": 6494 }, { "epoch": 1.0523331173039534, "grad_norm": 0.7713782787322998, "learning_rate": 3.7680084916410876e-06, "loss": 0.0889, "step": 6495 }, { "epoch": 1.0524951393389501, "grad_norm": 0.8281717896461487, "learning_rate": 3.7676315920333396e-06, "loss": 0.1026, "step": 6496 }, { "epoch": 1.0526571613739468, "grad_norm": 0.870488703250885, "learning_rate": 3.7672546536391343e-06, "loss": 0.1065, "step": 6497 }, { "epoch": 1.0528191834089435, "grad_norm": 0.9622962474822998, "learning_rate": 3.7668776764700023e-06, "loss": 0.1097, "step": 6498 }, { "epoch": 1.0529812054439405, "grad_norm": 0.8624140620231628, "learning_rate": 3.76650066053748e-06, "loss": 0.1093, "step": 6499 }, { "epoch": 1.0531432274789372, "grad_norm": 0.9349178671836853, "learning_rate": 3.766123605853101e-06, "loss": 0.115, "step": 6500 }, { "epoch": 1.053305249513934, "grad_norm": 0.8151592016220093, "learning_rate": 3.7657465124284047e-06, "loss": 0.0977, "step": 6501 }, { "epoch": 1.0534672715489306, "grad_norm": 0.9163764715194702, "learning_rate": 3.765369380274928e-06, "loss": 0.1155, "step": 6502 }, { "epoch": 1.0536292935839273, "grad_norm": 0.8974437117576599, "learning_rate": 3.76499220940421e-06, "loss": 0.1149, "step": 6503 }, { "epoch": 1.0537913156189243, "grad_norm": 0.8582158088684082, "learning_rate": 3.7646149998277924e-06, "loss": 0.1047, "step": 6504 }, { "epoch": 1.053953337653921, "grad_norm": 0.8871607184410095, "learning_rate": 3.7642377515572153e-06, "loss": 0.0993, "step": 6505 }, { "epoch": 1.0541153596889177, "grad_norm": 0.8530762791633606, "learning_rate": 3.7638604646040232e-06, "loss": 0.1047, "step": 6506 }, { "epoch": 1.0542773817239144, "grad_norm": 0.8357167840003967, "learning_rate": 3.763483138979759e-06, "loss": 0.111, "step": 6507 }, { "epoch": 1.054439403758911, "grad_norm": 0.8622068166732788, "learning_rate": 3.763105774695968e-06, "loss": 0.111, "step": 6508 }, { "epoch": 1.054601425793908, "grad_norm": 0.8668839335441589, "learning_rate": 3.762728371764197e-06, "loss": 0.102, "step": 6509 }, { "epoch": 1.0547634478289047, "grad_norm": 0.7583625316619873, "learning_rate": 3.7623509301959935e-06, "loss": 0.102, "step": 6510 }, { "epoch": 1.0549254698639015, "grad_norm": 0.7609004974365234, "learning_rate": 3.761973450002907e-06, "loss": 0.0888, "step": 6511 }, { "epoch": 1.0550874918988982, "grad_norm": 0.8587732911109924, "learning_rate": 3.7615959311964865e-06, "loss": 0.1126, "step": 6512 }, { "epoch": 1.055249513933895, "grad_norm": 0.8386414647102356, "learning_rate": 3.7612183737882833e-06, "loss": 0.1107, "step": 6513 }, { "epoch": 1.0554115359688918, "grad_norm": 0.7540522813796997, "learning_rate": 3.760840777789851e-06, "loss": 0.0933, "step": 6514 }, { "epoch": 1.0555735580038885, "grad_norm": 0.7534650564193726, "learning_rate": 3.7604631432127413e-06, "loss": 0.0938, "step": 6515 }, { "epoch": 1.0557355800388852, "grad_norm": 0.831891655921936, "learning_rate": 3.7600854700685095e-06, "loss": 0.108, "step": 6516 }, { "epoch": 1.055897602073882, "grad_norm": 0.8216171264648438, "learning_rate": 3.7597077583687115e-06, "loss": 0.1109, "step": 6517 }, { "epoch": 1.0560596241088789, "grad_norm": 0.8234738707542419, "learning_rate": 3.759330008124905e-06, "loss": 0.1066, "step": 6518 }, { "epoch": 1.0562216461438756, "grad_norm": 0.799088180065155, "learning_rate": 3.7589522193486476e-06, "loss": 0.0963, "step": 6519 }, { "epoch": 1.0563836681788723, "grad_norm": 0.850976288318634, "learning_rate": 3.7585743920514985e-06, "loss": 0.1049, "step": 6520 }, { "epoch": 1.056545690213869, "grad_norm": 0.8160610198974609, "learning_rate": 3.7581965262450193e-06, "loss": 0.1048, "step": 6521 }, { "epoch": 1.056707712248866, "grad_norm": 0.850570023059845, "learning_rate": 3.757818621940771e-06, "loss": 0.1042, "step": 6522 }, { "epoch": 1.0568697342838627, "grad_norm": 0.7830667495727539, "learning_rate": 3.7574406791503167e-06, "loss": 0.1023, "step": 6523 }, { "epoch": 1.0570317563188594, "grad_norm": 0.831290066242218, "learning_rate": 3.7570626978852203e-06, "loss": 0.1041, "step": 6524 }, { "epoch": 1.057193778353856, "grad_norm": 0.9002595543861389, "learning_rate": 3.7566846781570476e-06, "loss": 0.1133, "step": 6525 }, { "epoch": 1.0573558003888528, "grad_norm": 1.0122647285461426, "learning_rate": 3.7563066199773645e-06, "loss": 0.1241, "step": 6526 }, { "epoch": 1.0575178224238497, "grad_norm": 0.858625054359436, "learning_rate": 3.75592852335774e-06, "loss": 0.1033, "step": 6527 }, { "epoch": 1.0576798444588464, "grad_norm": 0.7898378372192383, "learning_rate": 3.7555503883097414e-06, "loss": 0.1033, "step": 6528 }, { "epoch": 1.0578418664938432, "grad_norm": 0.8522166609764099, "learning_rate": 3.755172214844939e-06, "loss": 0.1046, "step": 6529 }, { "epoch": 1.0580038885288399, "grad_norm": 0.8669360280036926, "learning_rate": 3.7547940029749054e-06, "loss": 0.1087, "step": 6530 }, { "epoch": 1.0581659105638366, "grad_norm": 0.8492510914802551, "learning_rate": 3.7544157527112103e-06, "loss": 0.1089, "step": 6531 }, { "epoch": 1.0583279325988335, "grad_norm": 0.7833178043365479, "learning_rate": 3.75403746406543e-06, "loss": 0.0964, "step": 6532 }, { "epoch": 1.0584899546338302, "grad_norm": 0.8604576587677002, "learning_rate": 3.7536591370491373e-06, "loss": 0.1123, "step": 6533 }, { "epoch": 1.058651976668827, "grad_norm": 0.7791811227798462, "learning_rate": 3.7532807716739082e-06, "loss": 0.1048, "step": 6534 }, { "epoch": 1.0588139987038236, "grad_norm": 0.8196133375167847, "learning_rate": 3.7529023679513217e-06, "loss": 0.109, "step": 6535 }, { "epoch": 1.0589760207388206, "grad_norm": 0.7942836284637451, "learning_rate": 3.752523925892954e-06, "loss": 0.0982, "step": 6536 }, { "epoch": 1.0591380427738173, "grad_norm": 0.7868502140045166, "learning_rate": 3.7521454455103857e-06, "loss": 0.1046, "step": 6537 }, { "epoch": 1.059300064808814, "grad_norm": 0.8380435109138489, "learning_rate": 3.7517669268151967e-06, "loss": 0.1071, "step": 6538 }, { "epoch": 1.0594620868438107, "grad_norm": 0.6852514743804932, "learning_rate": 3.751388369818969e-06, "loss": 0.0811, "step": 6539 }, { "epoch": 1.0596241088788074, "grad_norm": 0.7679120302200317, "learning_rate": 3.751009774533285e-06, "loss": 0.0952, "step": 6540 }, { "epoch": 1.0597861309138044, "grad_norm": 0.8072113394737244, "learning_rate": 3.7506311409697295e-06, "loss": 0.1035, "step": 6541 }, { "epoch": 1.059948152948801, "grad_norm": 1.01187002658844, "learning_rate": 3.7502524691398877e-06, "loss": 0.131, "step": 6542 }, { "epoch": 1.0601101749837978, "grad_norm": 0.8866243958473206, "learning_rate": 3.7498737590553465e-06, "loss": 0.1028, "step": 6543 }, { "epoch": 1.0602721970187945, "grad_norm": 0.9462065696716309, "learning_rate": 3.7494950107276917e-06, "loss": 0.1029, "step": 6544 }, { "epoch": 1.0604342190537914, "grad_norm": 0.7820582985877991, "learning_rate": 3.749116224168514e-06, "loss": 0.0942, "step": 6545 }, { "epoch": 1.0605962410887881, "grad_norm": 0.9271798133850098, "learning_rate": 3.7487373993894027e-06, "loss": 0.1145, "step": 6546 }, { "epoch": 1.0607582631237849, "grad_norm": 0.8751158714294434, "learning_rate": 3.748358536401949e-06, "loss": 0.105, "step": 6547 }, { "epoch": 1.0609202851587816, "grad_norm": 0.8312302231788635, "learning_rate": 3.7479796352177445e-06, "loss": 0.0998, "step": 6548 }, { "epoch": 1.0610823071937783, "grad_norm": 0.7504762411117554, "learning_rate": 3.7476006958483835e-06, "loss": 0.0974, "step": 6549 }, { "epoch": 1.0612443292287752, "grad_norm": 0.8843858242034912, "learning_rate": 3.7472217183054605e-06, "loss": 0.1055, "step": 6550 }, { "epoch": 1.061406351263772, "grad_norm": 0.9589284658432007, "learning_rate": 3.7468427026005705e-06, "loss": 0.1222, "step": 6551 }, { "epoch": 1.0615683732987686, "grad_norm": 0.8354429602622986, "learning_rate": 3.7464636487453122e-06, "loss": 0.1088, "step": 6552 }, { "epoch": 1.0617303953337653, "grad_norm": 0.7289818525314331, "learning_rate": 3.7460845567512817e-06, "loss": 0.0927, "step": 6553 }, { "epoch": 1.061892417368762, "grad_norm": 0.8219195604324341, "learning_rate": 3.74570542663008e-06, "loss": 0.1049, "step": 6554 }, { "epoch": 1.062054439403759, "grad_norm": 0.8895604610443115, "learning_rate": 3.745326258393306e-06, "loss": 0.1069, "step": 6555 }, { "epoch": 1.0622164614387557, "grad_norm": 0.8279933929443359, "learning_rate": 3.744947052052562e-06, "loss": 0.1081, "step": 6556 }, { "epoch": 1.0623784834737524, "grad_norm": 0.7519248127937317, "learning_rate": 3.744567807619451e-06, "loss": 0.0904, "step": 6557 }, { "epoch": 1.0625405055087491, "grad_norm": 0.7404701113700867, "learning_rate": 3.7441885251055774e-06, "loss": 0.0901, "step": 6558 }, { "epoch": 1.0627025275437458, "grad_norm": 0.8219286203384399, "learning_rate": 3.743809204522546e-06, "loss": 0.1059, "step": 6559 }, { "epoch": 1.0628645495787428, "grad_norm": 0.8403288125991821, "learning_rate": 3.7434298458819622e-06, "loss": 0.1126, "step": 6560 }, { "epoch": 1.0630265716137395, "grad_norm": 0.742680549621582, "learning_rate": 3.743050449195435e-06, "loss": 0.0932, "step": 6561 }, { "epoch": 1.0631885936487362, "grad_norm": 0.8612416386604309, "learning_rate": 3.7426710144745717e-06, "loss": 0.1066, "step": 6562 }, { "epoch": 1.063350615683733, "grad_norm": 0.8878268599510193, "learning_rate": 3.7422915417309825e-06, "loss": 0.1157, "step": 6563 }, { "epoch": 1.0635126377187298, "grad_norm": 0.8379852771759033, "learning_rate": 3.7419120309762787e-06, "loss": 0.1026, "step": 6564 }, { "epoch": 1.0636746597537265, "grad_norm": 0.8249161839485168, "learning_rate": 3.7415324822220717e-06, "loss": 0.1052, "step": 6565 }, { "epoch": 1.0638366817887233, "grad_norm": 0.8621309399604797, "learning_rate": 3.7411528954799752e-06, "loss": 0.1006, "step": 6566 }, { "epoch": 1.06399870382372, "grad_norm": 0.860681414604187, "learning_rate": 3.740773270761604e-06, "loss": 0.103, "step": 6567 }, { "epoch": 1.0641607258587167, "grad_norm": 0.995371401309967, "learning_rate": 3.740393608078573e-06, "loss": 0.1238, "step": 6568 }, { "epoch": 1.0643227478937136, "grad_norm": 0.8341780304908752, "learning_rate": 3.7400139074424997e-06, "loss": 0.1076, "step": 6569 }, { "epoch": 1.0644847699287103, "grad_norm": 0.8263168931007385, "learning_rate": 3.739634168865001e-06, "loss": 0.1026, "step": 6570 }, { "epoch": 1.064646791963707, "grad_norm": 0.959525465965271, "learning_rate": 3.7392543923576974e-06, "loss": 0.1143, "step": 6571 }, { "epoch": 1.0648088139987038, "grad_norm": 0.8506279587745667, "learning_rate": 3.738874577932208e-06, "loss": 0.1033, "step": 6572 }, { "epoch": 1.0649708360337007, "grad_norm": 0.8389852046966553, "learning_rate": 3.7384947256001534e-06, "loss": 0.1043, "step": 6573 }, { "epoch": 1.0651328580686974, "grad_norm": 0.7704646587371826, "learning_rate": 3.738114835373159e-06, "loss": 0.0953, "step": 6574 }, { "epoch": 1.065294880103694, "grad_norm": 0.8785123229026794, "learning_rate": 3.7377349072628457e-06, "loss": 0.1146, "step": 6575 }, { "epoch": 1.0654569021386908, "grad_norm": 0.7751638889312744, "learning_rate": 3.73735494128084e-06, "loss": 0.0961, "step": 6576 }, { "epoch": 1.0656189241736875, "grad_norm": 0.8245144486427307, "learning_rate": 3.7369749374387677e-06, "loss": 0.108, "step": 6577 }, { "epoch": 1.0657809462086845, "grad_norm": 0.8200885653495789, "learning_rate": 3.736594895748255e-06, "loss": 0.0974, "step": 6578 }, { "epoch": 1.0659429682436812, "grad_norm": 0.8444353342056274, "learning_rate": 3.7362148162209315e-06, "loss": 0.0993, "step": 6579 }, { "epoch": 1.0661049902786779, "grad_norm": 0.822864830493927, "learning_rate": 3.7358346988684258e-06, "loss": 0.1042, "step": 6580 }, { "epoch": 1.0662670123136746, "grad_norm": 0.8917192220687866, "learning_rate": 3.73545454370237e-06, "loss": 0.1073, "step": 6581 }, { "epoch": 1.0664290343486713, "grad_norm": 1.0070949792861938, "learning_rate": 3.735074350734393e-06, "loss": 0.1275, "step": 6582 }, { "epoch": 1.0665910563836682, "grad_norm": 0.8910343647003174, "learning_rate": 3.7346941199761317e-06, "loss": 0.1139, "step": 6583 }, { "epoch": 1.066753078418665, "grad_norm": 0.8535321950912476, "learning_rate": 3.734313851439217e-06, "loss": 0.103, "step": 6584 }, { "epoch": 1.0669151004536617, "grad_norm": 0.8377382159233093, "learning_rate": 3.7339335451352864e-06, "loss": 0.1053, "step": 6585 }, { "epoch": 1.0670771224886584, "grad_norm": 0.8273501992225647, "learning_rate": 3.7335532010759747e-06, "loss": 0.1034, "step": 6586 }, { "epoch": 1.0672391445236553, "grad_norm": 0.9124737977981567, "learning_rate": 3.73317281927292e-06, "loss": 0.1059, "step": 6587 }, { "epoch": 1.067401166558652, "grad_norm": 0.8772713541984558, "learning_rate": 3.732792399737761e-06, "loss": 0.1118, "step": 6588 }, { "epoch": 1.0675631885936487, "grad_norm": 0.7443407773971558, "learning_rate": 3.7324119424821387e-06, "loss": 0.0956, "step": 6589 }, { "epoch": 1.0677252106286454, "grad_norm": 0.7781869173049927, "learning_rate": 3.7320314475176933e-06, "loss": 0.0921, "step": 6590 }, { "epoch": 1.0678872326636422, "grad_norm": 1.039441466331482, "learning_rate": 3.7316509148560664e-06, "loss": 0.1076, "step": 6591 }, { "epoch": 1.068049254698639, "grad_norm": 0.7879117131233215, "learning_rate": 3.731270344508903e-06, "loss": 0.0951, "step": 6592 }, { "epoch": 1.0682112767336358, "grad_norm": 0.8069095015525818, "learning_rate": 3.730889736487846e-06, "loss": 0.103, "step": 6593 }, { "epoch": 1.0683732987686325, "grad_norm": 0.7860719561576843, "learning_rate": 3.7305090908045422e-06, "loss": 0.0989, "step": 6594 }, { "epoch": 1.0685353208036292, "grad_norm": 0.7485154271125793, "learning_rate": 3.7301284074706372e-06, "loss": 0.0916, "step": 6595 }, { "epoch": 1.0686973428386262, "grad_norm": 0.8204521536827087, "learning_rate": 3.7297476864977805e-06, "loss": 0.1062, "step": 6596 }, { "epoch": 1.0688593648736229, "grad_norm": 0.7526237964630127, "learning_rate": 3.72936692789762e-06, "loss": 0.0921, "step": 6597 }, { "epoch": 1.0690213869086196, "grad_norm": 0.7098335027694702, "learning_rate": 3.7289861316818077e-06, "loss": 0.0926, "step": 6598 }, { "epoch": 1.0691834089436163, "grad_norm": 0.8343385457992554, "learning_rate": 3.7286052978619926e-06, "loss": 0.1039, "step": 6599 }, { "epoch": 1.069345430978613, "grad_norm": 0.8440389633178711, "learning_rate": 3.728224426449829e-06, "loss": 0.1092, "step": 6600 }, { "epoch": 1.06950745301361, "grad_norm": 0.9151831865310669, "learning_rate": 3.72784351745697e-06, "loss": 0.1205, "step": 6601 }, { "epoch": 1.0696694750486067, "grad_norm": 0.8347547650337219, "learning_rate": 3.7274625708950706e-06, "loss": 0.0992, "step": 6602 }, { "epoch": 1.0698314970836034, "grad_norm": 0.9815458655357361, "learning_rate": 3.727081586775787e-06, "loss": 0.1116, "step": 6603 }, { "epoch": 1.0699935191186, "grad_norm": 0.7796982526779175, "learning_rate": 3.7267005651107763e-06, "loss": 0.1061, "step": 6604 }, { "epoch": 1.0701555411535968, "grad_norm": 0.7754164338111877, "learning_rate": 3.7263195059116973e-06, "loss": 0.1041, "step": 6605 }, { "epoch": 1.0703175631885937, "grad_norm": 0.8905292749404907, "learning_rate": 3.7259384091902085e-06, "loss": 0.1112, "step": 6606 }, { "epoch": 1.0704795852235904, "grad_norm": 0.7484425902366638, "learning_rate": 3.7255572749579716e-06, "loss": 0.0919, "step": 6607 }, { "epoch": 1.0706416072585871, "grad_norm": 0.863379955291748, "learning_rate": 3.7251761032266475e-06, "loss": 0.1023, "step": 6608 }, { "epoch": 1.0708036292935839, "grad_norm": 0.9683666229248047, "learning_rate": 3.7247948940078996e-06, "loss": 0.1008, "step": 6609 }, { "epoch": 1.0709656513285806, "grad_norm": 0.9362031817436218, "learning_rate": 3.7244136473133924e-06, "loss": 0.1129, "step": 6610 }, { "epoch": 1.0711276733635775, "grad_norm": 0.8648001551628113, "learning_rate": 3.72403236315479e-06, "loss": 0.1067, "step": 6611 }, { "epoch": 1.0712896953985742, "grad_norm": 0.8078235387802124, "learning_rate": 3.7236510415437598e-06, "loss": 0.1048, "step": 6612 }, { "epoch": 1.071451717433571, "grad_norm": 0.7274784445762634, "learning_rate": 3.7232696824919685e-06, "loss": 0.0971, "step": 6613 }, { "epoch": 1.0716137394685676, "grad_norm": 0.820236325263977, "learning_rate": 3.7228882860110856e-06, "loss": 0.0984, "step": 6614 }, { "epoch": 1.0717757615035646, "grad_norm": 0.8109707832336426, "learning_rate": 3.7225068521127793e-06, "loss": 0.1016, "step": 6615 }, { "epoch": 1.0719377835385613, "grad_norm": 0.8222618103027344, "learning_rate": 3.7221253808087234e-06, "loss": 0.1114, "step": 6616 }, { "epoch": 1.072099805573558, "grad_norm": 0.7346887588500977, "learning_rate": 3.7217438721105876e-06, "loss": 0.0923, "step": 6617 }, { "epoch": 1.0722618276085547, "grad_norm": 0.7791684865951538, "learning_rate": 3.721362326030046e-06, "loss": 0.1004, "step": 6618 }, { "epoch": 1.0724238496435514, "grad_norm": 0.8646988868713379, "learning_rate": 3.7209807425787724e-06, "loss": 0.1047, "step": 6619 }, { "epoch": 1.0725858716785484, "grad_norm": 0.7894246578216553, "learning_rate": 3.720599121768443e-06, "loss": 0.0985, "step": 6620 }, { "epoch": 1.072747893713545, "grad_norm": 0.9039403796195984, "learning_rate": 3.720217463610735e-06, "loss": 0.1114, "step": 6621 }, { "epoch": 1.0729099157485418, "grad_norm": 0.8473367094993591, "learning_rate": 3.7198357681173247e-06, "loss": 0.1048, "step": 6622 }, { "epoch": 1.0730719377835385, "grad_norm": 0.8533846735954285, "learning_rate": 3.719454035299892e-06, "loss": 0.1001, "step": 6623 }, { "epoch": 1.0732339598185354, "grad_norm": 0.8822036981582642, "learning_rate": 3.7190722651701166e-06, "loss": 0.1085, "step": 6624 }, { "epoch": 1.0733959818535321, "grad_norm": 0.74126797914505, "learning_rate": 3.7186904577396805e-06, "loss": 0.0936, "step": 6625 }, { "epoch": 1.0735580038885288, "grad_norm": 1.021998643875122, "learning_rate": 3.718308613020265e-06, "loss": 0.1299, "step": 6626 }, { "epoch": 1.0737200259235256, "grad_norm": 0.7277312278747559, "learning_rate": 3.7179267310235544e-06, "loss": 0.0948, "step": 6627 }, { "epoch": 1.0738820479585223, "grad_norm": 0.8704293966293335, "learning_rate": 3.717544811761233e-06, "loss": 0.1089, "step": 6628 }, { "epoch": 1.0740440699935192, "grad_norm": 0.7847870588302612, "learning_rate": 3.717162855244988e-06, "loss": 0.0992, "step": 6629 }, { "epoch": 1.074206092028516, "grad_norm": 0.8332440853118896, "learning_rate": 3.716780861486503e-06, "loss": 0.1022, "step": 6630 }, { "epoch": 1.0743681140635126, "grad_norm": 0.8781189322471619, "learning_rate": 3.7163988304974704e-06, "loss": 0.1105, "step": 6631 }, { "epoch": 1.0745301360985093, "grad_norm": 0.9302778244018555, "learning_rate": 3.716016762289576e-06, "loss": 0.1082, "step": 6632 }, { "epoch": 1.074692158133506, "grad_norm": 0.7837849855422974, "learning_rate": 3.715634656874511e-06, "loss": 0.096, "step": 6633 }, { "epoch": 1.074854180168503, "grad_norm": 0.7850882411003113, "learning_rate": 3.7152525142639682e-06, "loss": 0.1039, "step": 6634 }, { "epoch": 1.0750162022034997, "grad_norm": 0.8564865589141846, "learning_rate": 3.7148703344696386e-06, "loss": 0.1117, "step": 6635 }, { "epoch": 1.0751782242384964, "grad_norm": 0.9498354196548462, "learning_rate": 3.7144881175032178e-06, "loss": 0.1151, "step": 6636 }, { "epoch": 1.0753402462734931, "grad_norm": 0.949449896812439, "learning_rate": 3.714105863376398e-06, "loss": 0.1191, "step": 6637 }, { "epoch": 1.07550226830849, "grad_norm": 0.8418267369270325, "learning_rate": 3.713723572100878e-06, "loss": 0.1067, "step": 6638 }, { "epoch": 1.0756642903434868, "grad_norm": 0.7026755809783936, "learning_rate": 3.713341243688353e-06, "loss": 0.091, "step": 6639 }, { "epoch": 1.0758263123784835, "grad_norm": 0.7640417218208313, "learning_rate": 3.7129588781505232e-06, "loss": 0.0962, "step": 6640 }, { "epoch": 1.0759883344134802, "grad_norm": 0.7686159610748291, "learning_rate": 3.7125764754990864e-06, "loss": 0.0942, "step": 6641 }, { "epoch": 1.076150356448477, "grad_norm": 0.9323800206184387, "learning_rate": 3.7121940357457438e-06, "loss": 0.1188, "step": 6642 }, { "epoch": 1.0763123784834738, "grad_norm": 0.801908016204834, "learning_rate": 3.7118115589021976e-06, "loss": 0.0964, "step": 6643 }, { "epoch": 1.0764744005184705, "grad_norm": 0.7992369532585144, "learning_rate": 3.7114290449801493e-06, "loss": 0.0955, "step": 6644 }, { "epoch": 1.0766364225534673, "grad_norm": 0.8944490551948547, "learning_rate": 3.711046493991305e-06, "loss": 0.1083, "step": 6645 }, { "epoch": 1.076798444588464, "grad_norm": 0.8392427563667297, "learning_rate": 3.7106639059473675e-06, "loss": 0.1101, "step": 6646 }, { "epoch": 1.076960466623461, "grad_norm": 0.8066583871841431, "learning_rate": 3.7102812808600452e-06, "loss": 0.0912, "step": 6647 }, { "epoch": 1.0771224886584576, "grad_norm": 0.8310062885284424, "learning_rate": 3.7098986187410447e-06, "loss": 0.1061, "step": 6648 }, { "epoch": 1.0772845106934543, "grad_norm": 0.9091915488243103, "learning_rate": 3.7095159196020736e-06, "loss": 0.1213, "step": 6649 }, { "epoch": 1.077446532728451, "grad_norm": 0.8808692693710327, "learning_rate": 3.7091331834548427e-06, "loss": 0.1142, "step": 6650 }, { "epoch": 1.0776085547634477, "grad_norm": 0.8686168789863586, "learning_rate": 3.708750410311062e-06, "loss": 0.1112, "step": 6651 }, { "epoch": 1.0777705767984447, "grad_norm": 0.8946408629417419, "learning_rate": 3.7083676001824443e-06, "loss": 0.1145, "step": 6652 }, { "epoch": 1.0779325988334414, "grad_norm": 0.8658775091171265, "learning_rate": 3.7079847530807023e-06, "loss": 0.1079, "step": 6653 }, { "epoch": 1.078094620868438, "grad_norm": 0.8047525882720947, "learning_rate": 3.70760186901755e-06, "loss": 0.0999, "step": 6654 }, { "epoch": 1.0782566429034348, "grad_norm": 0.7709795236587524, "learning_rate": 3.7072189480047027e-06, "loss": 0.0851, "step": 6655 }, { "epoch": 1.0784186649384315, "grad_norm": 0.9803235530853271, "learning_rate": 3.706835990053877e-06, "loss": 0.1154, "step": 6656 }, { "epoch": 1.0785806869734285, "grad_norm": 0.8865209817886353, "learning_rate": 3.7064529951767905e-06, "loss": 0.1043, "step": 6657 }, { "epoch": 1.0787427090084252, "grad_norm": 0.8521233797073364, "learning_rate": 3.7060699633851615e-06, "loss": 0.1057, "step": 6658 }, { "epoch": 1.0789047310434219, "grad_norm": 0.8935887217521667, "learning_rate": 3.705686894690711e-06, "loss": 0.1043, "step": 6659 }, { "epoch": 1.0790667530784186, "grad_norm": 0.7139661908149719, "learning_rate": 3.7053037891051596e-06, "loss": 0.0868, "step": 6660 }, { "epoch": 1.0792287751134155, "grad_norm": 0.8877597451210022, "learning_rate": 3.7049206466402278e-06, "loss": 0.1123, "step": 6661 }, { "epoch": 1.0793907971484122, "grad_norm": 0.8162228465080261, "learning_rate": 3.704537467307641e-06, "loss": 0.1032, "step": 6662 }, { "epoch": 1.079552819183409, "grad_norm": 0.8396819829940796, "learning_rate": 3.704154251119122e-06, "loss": 0.1027, "step": 6663 }, { "epoch": 1.0797148412184057, "grad_norm": 0.8406090140342712, "learning_rate": 3.7037709980863974e-06, "loss": 0.0978, "step": 6664 }, { "epoch": 1.0798768632534024, "grad_norm": 0.9074726700782776, "learning_rate": 3.703387708221193e-06, "loss": 0.1035, "step": 6665 }, { "epoch": 1.0800388852883993, "grad_norm": 0.9996922612190247, "learning_rate": 3.703004381535237e-06, "loss": 0.1203, "step": 6666 }, { "epoch": 1.080200907323396, "grad_norm": 0.8596282005310059, "learning_rate": 3.7026210180402588e-06, "loss": 0.1132, "step": 6667 }, { "epoch": 1.0803629293583927, "grad_norm": 0.7845479249954224, "learning_rate": 3.7022376177479863e-06, "loss": 0.0991, "step": 6668 }, { "epoch": 1.0805249513933894, "grad_norm": 0.8344932794570923, "learning_rate": 3.701854180670153e-06, "loss": 0.1019, "step": 6669 }, { "epoch": 1.0806869734283864, "grad_norm": 0.8211415410041809, "learning_rate": 3.7014707068184895e-06, "loss": 0.1064, "step": 6670 }, { "epoch": 1.080848995463383, "grad_norm": 0.7003439664840698, "learning_rate": 3.7010871962047314e-06, "loss": 0.0901, "step": 6671 }, { "epoch": 1.0810110174983798, "grad_norm": 0.8313621282577515, "learning_rate": 3.70070364884061e-06, "loss": 0.0949, "step": 6672 }, { "epoch": 1.0811730395333765, "grad_norm": 0.9178413152694702, "learning_rate": 3.7003200647378634e-06, "loss": 0.1111, "step": 6673 }, { "epoch": 1.0813350615683732, "grad_norm": 0.8344830870628357, "learning_rate": 3.6999364439082274e-06, "loss": 0.1049, "step": 6674 }, { "epoch": 1.0814970836033702, "grad_norm": 0.8041746616363525, "learning_rate": 3.69955278636344e-06, "loss": 0.1088, "step": 6675 }, { "epoch": 1.0816591056383669, "grad_norm": 0.8075587153434753, "learning_rate": 3.6991690921152407e-06, "loss": 0.1055, "step": 6676 }, { "epoch": 1.0818211276733636, "grad_norm": 0.729253351688385, "learning_rate": 3.6987853611753686e-06, "loss": 0.0843, "step": 6677 }, { "epoch": 1.0819831497083603, "grad_norm": 0.9236080646514893, "learning_rate": 3.698401593555565e-06, "loss": 0.117, "step": 6678 }, { "epoch": 1.082145171743357, "grad_norm": 0.9156818985939026, "learning_rate": 3.6980177892675735e-06, "loss": 0.1183, "step": 6679 }, { "epoch": 1.082307193778354, "grad_norm": 0.8869015574455261, "learning_rate": 3.697633948323136e-06, "loss": 0.103, "step": 6680 }, { "epoch": 1.0824692158133506, "grad_norm": 0.7443458437919617, "learning_rate": 3.6972500707339986e-06, "loss": 0.0958, "step": 6681 }, { "epoch": 1.0826312378483474, "grad_norm": 0.8404899835586548, "learning_rate": 3.6968661565119062e-06, "loss": 0.1051, "step": 6682 }, { "epoch": 1.082793259883344, "grad_norm": 0.8908175826072693, "learning_rate": 3.6964822056686057e-06, "loss": 0.108, "step": 6683 }, { "epoch": 1.0829552819183408, "grad_norm": 0.8125652074813843, "learning_rate": 3.6960982182158458e-06, "loss": 0.1058, "step": 6684 }, { "epoch": 1.0831173039533377, "grad_norm": 1.0027449131011963, "learning_rate": 3.695714194165374e-06, "loss": 0.1219, "step": 6685 }, { "epoch": 1.0832793259883344, "grad_norm": 0.8607674837112427, "learning_rate": 3.6953301335289415e-06, "loss": 0.1124, "step": 6686 }, { "epoch": 1.0834413480233311, "grad_norm": 0.8296065926551819, "learning_rate": 3.6949460363183e-06, "loss": 0.114, "step": 6687 }, { "epoch": 1.0836033700583279, "grad_norm": 0.923507571220398, "learning_rate": 3.6945619025452006e-06, "loss": 0.1124, "step": 6688 }, { "epoch": 1.0837653920933248, "grad_norm": 0.8684310913085938, "learning_rate": 3.694177732221399e-06, "loss": 0.1112, "step": 6689 }, { "epoch": 1.0839274141283215, "grad_norm": 0.7922837138175964, "learning_rate": 3.6937935253586475e-06, "loss": 0.1036, "step": 6690 }, { "epoch": 1.0840894361633182, "grad_norm": 0.7880176305770874, "learning_rate": 3.693409281968704e-06, "loss": 0.0899, "step": 6691 }, { "epoch": 1.084251458198315, "grad_norm": 0.8277053833007812, "learning_rate": 3.6930250020633237e-06, "loss": 0.1015, "step": 6692 }, { "epoch": 1.0844134802333116, "grad_norm": 0.8327934145927429, "learning_rate": 3.692640685654266e-06, "loss": 0.1002, "step": 6693 }, { "epoch": 1.0845755022683086, "grad_norm": 0.8471828699111938, "learning_rate": 3.692256332753289e-06, "loss": 0.1098, "step": 6694 }, { "epoch": 1.0847375243033053, "grad_norm": 0.8789288401603699, "learning_rate": 3.691871943372154e-06, "loss": 0.1028, "step": 6695 }, { "epoch": 1.084899546338302, "grad_norm": 0.8185938000679016, "learning_rate": 3.691487517522621e-06, "loss": 0.1081, "step": 6696 }, { "epoch": 1.0850615683732987, "grad_norm": 0.8919969797134399, "learning_rate": 3.691103055216454e-06, "loss": 0.1175, "step": 6697 }, { "epoch": 1.0852235904082956, "grad_norm": 0.8887705206871033, "learning_rate": 3.690718556465416e-06, "loss": 0.1059, "step": 6698 }, { "epoch": 1.0853856124432923, "grad_norm": 0.8780819773674011, "learning_rate": 3.690334021281271e-06, "loss": 0.0972, "step": 6699 }, { "epoch": 1.085547634478289, "grad_norm": 0.9211699962615967, "learning_rate": 3.689949449675786e-06, "loss": 0.1204, "step": 6700 }, { "epoch": 1.0857096565132858, "grad_norm": 0.8538796901702881, "learning_rate": 3.6895648416607273e-06, "loss": 0.1136, "step": 6701 }, { "epoch": 1.0858716785482825, "grad_norm": 0.9533454775810242, "learning_rate": 3.689180197247863e-06, "loss": 0.1214, "step": 6702 }, { "epoch": 1.0860337005832794, "grad_norm": 0.7428584098815918, "learning_rate": 3.6887955164489626e-06, "loss": 0.0825, "step": 6703 }, { "epoch": 1.0861957226182761, "grad_norm": 0.8387787342071533, "learning_rate": 3.688410799275796e-06, "loss": 0.105, "step": 6704 }, { "epoch": 1.0863577446532728, "grad_norm": 0.9467548131942749, "learning_rate": 3.6880260457401353e-06, "loss": 0.1085, "step": 6705 }, { "epoch": 1.0865197666882696, "grad_norm": 0.88310307264328, "learning_rate": 3.6876412558537524e-06, "loss": 0.1143, "step": 6706 }, { "epoch": 1.0866817887232663, "grad_norm": 0.8482463955879211, "learning_rate": 3.6872564296284214e-06, "loss": 0.1039, "step": 6707 }, { "epoch": 1.0868438107582632, "grad_norm": 0.8076966404914856, "learning_rate": 3.686871567075916e-06, "loss": 0.1022, "step": 6708 }, { "epoch": 1.08700583279326, "grad_norm": 0.8349097967147827, "learning_rate": 3.686486668208013e-06, "loss": 0.1116, "step": 6709 }, { "epoch": 1.0871678548282566, "grad_norm": 0.8799702525138855, "learning_rate": 3.6861017330364897e-06, "loss": 0.1052, "step": 6710 }, { "epoch": 1.0873298768632533, "grad_norm": 1.1798293590545654, "learning_rate": 3.685716761573123e-06, "loss": 0.1036, "step": 6711 }, { "epoch": 1.0874918988982503, "grad_norm": 0.907839298248291, "learning_rate": 3.685331753829693e-06, "loss": 0.1081, "step": 6712 }, { "epoch": 1.087653920933247, "grad_norm": 0.8660617470741272, "learning_rate": 3.68494670981798e-06, "loss": 0.1047, "step": 6713 }, { "epoch": 1.0878159429682437, "grad_norm": 0.8517418503761292, "learning_rate": 3.684561629549765e-06, "loss": 0.0974, "step": 6714 }, { "epoch": 1.0879779650032404, "grad_norm": 0.862423300743103, "learning_rate": 3.684176513036831e-06, "loss": 0.1084, "step": 6715 }, { "epoch": 1.088139987038237, "grad_norm": 0.6887312531471252, "learning_rate": 3.6837913602909615e-06, "loss": 0.0862, "step": 6716 }, { "epoch": 1.088302009073234, "grad_norm": 0.7741922736167908, "learning_rate": 3.683406171323941e-06, "loss": 0.0962, "step": 6717 }, { "epoch": 1.0884640311082308, "grad_norm": 0.8818541169166565, "learning_rate": 3.6830209461475554e-06, "loss": 0.1033, "step": 6718 }, { "epoch": 1.0886260531432275, "grad_norm": 0.8155554533004761, "learning_rate": 3.682635684773591e-06, "loss": 0.1041, "step": 6719 }, { "epoch": 1.0887880751782242, "grad_norm": 0.86274254322052, "learning_rate": 3.6822503872138377e-06, "loss": 0.1046, "step": 6720 }, { "epoch": 1.0889500972132211, "grad_norm": 0.8824654817581177, "learning_rate": 3.681865053480082e-06, "loss": 0.1147, "step": 6721 }, { "epoch": 1.0891121192482178, "grad_norm": 0.7932639122009277, "learning_rate": 3.6814796835841172e-06, "loss": 0.1008, "step": 6722 }, { "epoch": 1.0892741412832145, "grad_norm": 0.7773560285568237, "learning_rate": 3.681094277537732e-06, "loss": 0.1036, "step": 6723 }, { "epoch": 1.0894361633182112, "grad_norm": 0.7581130862236023, "learning_rate": 3.6807088353527216e-06, "loss": 0.0872, "step": 6724 }, { "epoch": 1.089598185353208, "grad_norm": 0.8676707148551941, "learning_rate": 3.680323357040877e-06, "loss": 0.1011, "step": 6725 }, { "epoch": 1.089760207388205, "grad_norm": 0.8844422101974487, "learning_rate": 3.6799378426139942e-06, "loss": 0.1222, "step": 6726 }, { "epoch": 1.0899222294232016, "grad_norm": 0.7242690920829773, "learning_rate": 3.679552292083869e-06, "loss": 0.0894, "step": 6727 }, { "epoch": 1.0900842514581983, "grad_norm": 0.7637506127357483, "learning_rate": 3.679166705462298e-06, "loss": 0.0938, "step": 6728 }, { "epoch": 1.090246273493195, "grad_norm": 0.9128175973892212, "learning_rate": 3.67878108276108e-06, "loss": 0.1092, "step": 6729 }, { "epoch": 1.0904082955281917, "grad_norm": 0.7904967069625854, "learning_rate": 3.6783954239920118e-06, "loss": 0.0979, "step": 6730 }, { "epoch": 1.0905703175631887, "grad_norm": 0.827260434627533, "learning_rate": 3.678009729166897e-06, "loss": 0.1045, "step": 6731 }, { "epoch": 1.0907323395981854, "grad_norm": 0.7866484522819519, "learning_rate": 3.677623998297534e-06, "loss": 0.0973, "step": 6732 }, { "epoch": 1.090894361633182, "grad_norm": 0.8426867723464966, "learning_rate": 3.677238231395727e-06, "loss": 0.1104, "step": 6733 }, { "epoch": 1.0910563836681788, "grad_norm": 0.7677156329154968, "learning_rate": 3.6768524284732794e-06, "loss": 0.0942, "step": 6734 }, { "epoch": 1.0912184057031755, "grad_norm": 1.0549900531768799, "learning_rate": 3.676466589541995e-06, "loss": 0.1023, "step": 6735 }, { "epoch": 1.0913804277381725, "grad_norm": 0.7646496295928955, "learning_rate": 3.6760807146136796e-06, "loss": 0.0946, "step": 6736 }, { "epoch": 1.0915424497731692, "grad_norm": 0.8362929224967957, "learning_rate": 3.6756948037001406e-06, "loss": 0.1068, "step": 6737 }, { "epoch": 1.0917044718081659, "grad_norm": 0.940573513507843, "learning_rate": 3.675308856813186e-06, "loss": 0.1196, "step": 6738 }, { "epoch": 1.0918664938431626, "grad_norm": 0.9485230445861816, "learning_rate": 3.674922873964625e-06, "loss": 0.1154, "step": 6739 }, { "epoch": 1.0920285158781595, "grad_norm": 0.8467913866043091, "learning_rate": 3.6745368551662663e-06, "loss": 0.0991, "step": 6740 }, { "epoch": 1.0921905379131562, "grad_norm": 0.8293401002883911, "learning_rate": 3.6741508004299227e-06, "loss": 0.1055, "step": 6741 }, { "epoch": 1.092352559948153, "grad_norm": 0.9706871509552002, "learning_rate": 3.6737647097674056e-06, "loss": 0.1058, "step": 6742 }, { "epoch": 1.0925145819831497, "grad_norm": 0.9424450993537903, "learning_rate": 3.673378583190529e-06, "loss": 0.1142, "step": 6743 }, { "epoch": 1.0926766040181464, "grad_norm": 0.8551537990570068, "learning_rate": 3.6729924207111077e-06, "loss": 0.1007, "step": 6744 }, { "epoch": 1.0928386260531433, "grad_norm": 0.8865105509757996, "learning_rate": 3.6726062223409563e-06, "loss": 0.1095, "step": 6745 }, { "epoch": 1.09300064808814, "grad_norm": 0.7189201712608337, "learning_rate": 3.6722199880918928e-06, "loss": 0.091, "step": 6746 }, { "epoch": 1.0931626701231367, "grad_norm": 0.8280571699142456, "learning_rate": 3.6718337179757336e-06, "loss": 0.1077, "step": 6747 }, { "epoch": 1.0933246921581334, "grad_norm": 0.9577053189277649, "learning_rate": 3.6714474120042993e-06, "loss": 0.1168, "step": 6748 }, { "epoch": 1.0934867141931304, "grad_norm": 0.8207298517227173, "learning_rate": 3.6710610701894088e-06, "loss": 0.1025, "step": 6749 }, { "epoch": 1.093648736228127, "grad_norm": 0.78386390209198, "learning_rate": 3.6706746925428833e-06, "loss": 0.1061, "step": 6750 }, { "epoch": 1.0938107582631238, "grad_norm": 0.799771249294281, "learning_rate": 3.6702882790765453e-06, "loss": 0.0973, "step": 6751 }, { "epoch": 1.0939727802981205, "grad_norm": 0.824006199836731, "learning_rate": 3.6699018298022173e-06, "loss": 0.1105, "step": 6752 }, { "epoch": 1.0941348023331172, "grad_norm": 0.8404650092124939, "learning_rate": 3.6695153447317254e-06, "loss": 0.1, "step": 6753 }, { "epoch": 1.0942968243681142, "grad_norm": 0.7487378120422363, "learning_rate": 3.6691288238768928e-06, "loss": 0.0998, "step": 6754 }, { "epoch": 1.0944588464031109, "grad_norm": 0.9007918834686279, "learning_rate": 3.6687422672495493e-06, "loss": 0.1164, "step": 6755 }, { "epoch": 1.0946208684381076, "grad_norm": 0.7542750835418701, "learning_rate": 3.6683556748615196e-06, "loss": 0.0965, "step": 6756 }, { "epoch": 1.0947828904731043, "grad_norm": 0.8000967502593994, "learning_rate": 3.6679690467246337e-06, "loss": 0.1096, "step": 6757 }, { "epoch": 1.094944912508101, "grad_norm": 0.7331892251968384, "learning_rate": 3.667582382850721e-06, "loss": 0.0891, "step": 6758 }, { "epoch": 1.095106934543098, "grad_norm": 0.790254533290863, "learning_rate": 3.6671956832516136e-06, "loss": 0.097, "step": 6759 }, { "epoch": 1.0952689565780946, "grad_norm": 0.7792706489562988, "learning_rate": 3.6668089479391433e-06, "loss": 0.0989, "step": 6760 }, { "epoch": 1.0954309786130914, "grad_norm": 0.8442437648773193, "learning_rate": 3.6664221769251414e-06, "loss": 0.1006, "step": 6761 }, { "epoch": 1.095593000648088, "grad_norm": 0.928298830986023, "learning_rate": 3.666035370221445e-06, "loss": 0.103, "step": 6762 }, { "epoch": 1.095755022683085, "grad_norm": 0.8222261071205139, "learning_rate": 3.6656485278398866e-06, "loss": 0.1058, "step": 6763 }, { "epoch": 1.0959170447180817, "grad_norm": 0.7384744882583618, "learning_rate": 3.665261649792305e-06, "loss": 0.0896, "step": 6764 }, { "epoch": 1.0960790667530784, "grad_norm": 0.836329460144043, "learning_rate": 3.664874736090537e-06, "loss": 0.1059, "step": 6765 }, { "epoch": 1.0962410887880751, "grad_norm": 1.0209158658981323, "learning_rate": 3.66448778674642e-06, "loss": 0.1111, "step": 6766 }, { "epoch": 1.0964031108230718, "grad_norm": 0.7318083047866821, "learning_rate": 3.6641008017717957e-06, "loss": 0.0875, "step": 6767 }, { "epoch": 1.0965651328580688, "grad_norm": 0.7760915160179138, "learning_rate": 3.663713781178504e-06, "loss": 0.0986, "step": 6768 }, { "epoch": 1.0967271548930655, "grad_norm": 0.7997096180915833, "learning_rate": 3.6633267249783854e-06, "loss": 0.0943, "step": 6769 }, { "epoch": 1.0968891769280622, "grad_norm": 0.8263211846351624, "learning_rate": 3.6629396331832854e-06, "loss": 0.0919, "step": 6770 }, { "epoch": 1.097051198963059, "grad_norm": 0.9232663512229919, "learning_rate": 3.6625525058050467e-06, "loss": 0.1063, "step": 6771 }, { "epoch": 1.0972132209980558, "grad_norm": 0.9478903412818909, "learning_rate": 3.6621653428555144e-06, "loss": 0.1076, "step": 6772 }, { "epoch": 1.0973752430330526, "grad_norm": 0.8772745132446289, "learning_rate": 3.661778144346535e-06, "loss": 0.1131, "step": 6773 }, { "epoch": 1.0975372650680493, "grad_norm": 0.8307527899742126, "learning_rate": 3.661390910289956e-06, "loss": 0.1062, "step": 6774 }, { "epoch": 1.097699287103046, "grad_norm": 0.8453426361083984, "learning_rate": 3.661003640697626e-06, "loss": 0.0974, "step": 6775 }, { "epoch": 1.0978613091380427, "grad_norm": 1.1416118144989014, "learning_rate": 3.6606163355813935e-06, "loss": 0.1092, "step": 6776 }, { "epoch": 1.0980233311730396, "grad_norm": 0.8578381538391113, "learning_rate": 3.66022899495311e-06, "loss": 0.1045, "step": 6777 }, { "epoch": 1.0981853532080363, "grad_norm": 0.7609858512878418, "learning_rate": 3.6598416188246265e-06, "loss": 0.0962, "step": 6778 }, { "epoch": 1.098347375243033, "grad_norm": 0.8034330606460571, "learning_rate": 3.659454207207798e-06, "loss": 0.1032, "step": 6779 }, { "epoch": 1.0985093972780298, "grad_norm": 0.9241167306900024, "learning_rate": 3.6590667601144748e-06, "loss": 0.1066, "step": 6780 }, { "epoch": 1.0986714193130265, "grad_norm": 0.7677343487739563, "learning_rate": 3.6586792775565137e-06, "loss": 0.0913, "step": 6781 }, { "epoch": 1.0988334413480234, "grad_norm": 1.0235810279846191, "learning_rate": 3.6582917595457718e-06, "loss": 0.1226, "step": 6782 }, { "epoch": 1.0989954633830201, "grad_norm": 0.7913617491722107, "learning_rate": 3.657904206094104e-06, "loss": 0.1079, "step": 6783 }, { "epoch": 1.0991574854180168, "grad_norm": 0.7789272665977478, "learning_rate": 3.6575166172133703e-06, "loss": 0.0998, "step": 6784 }, { "epoch": 1.0993195074530135, "grad_norm": 0.7309193015098572, "learning_rate": 3.657128992915428e-06, "loss": 0.0935, "step": 6785 }, { "epoch": 1.0994815294880103, "grad_norm": 0.9271590113639832, "learning_rate": 3.6567413332121402e-06, "loss": 0.1065, "step": 6786 }, { "epoch": 1.0996435515230072, "grad_norm": 0.8196094632148743, "learning_rate": 3.6563536381153663e-06, "loss": 0.1061, "step": 6787 }, { "epoch": 1.099805573558004, "grad_norm": 0.8200319409370422, "learning_rate": 3.655965907636969e-06, "loss": 0.1075, "step": 6788 }, { "epoch": 1.0999675955930006, "grad_norm": 0.8080103993415833, "learning_rate": 3.6555781417888126e-06, "loss": 0.106, "step": 6789 }, { "epoch": 1.1001296176279973, "grad_norm": 0.8300761580467224, "learning_rate": 3.6551903405827615e-06, "loss": 0.111, "step": 6790 }, { "epoch": 1.1002916396629943, "grad_norm": 0.887389600276947, "learning_rate": 3.6548025040306816e-06, "loss": 0.1125, "step": 6791 }, { "epoch": 1.100453661697991, "grad_norm": 0.6711512207984924, "learning_rate": 3.6544146321444397e-06, "loss": 0.0879, "step": 6792 }, { "epoch": 1.1006156837329877, "grad_norm": 0.7568984627723694, "learning_rate": 3.654026724935904e-06, "loss": 0.0932, "step": 6793 }, { "epoch": 1.1007777057679844, "grad_norm": 0.8204541206359863, "learning_rate": 3.653638782416943e-06, "loss": 0.1042, "step": 6794 }, { "epoch": 1.100939727802981, "grad_norm": 0.8643372058868408, "learning_rate": 3.6532508045994262e-06, "loss": 0.1071, "step": 6795 }, { "epoch": 1.101101749837978, "grad_norm": 0.8105908036231995, "learning_rate": 3.6528627914952263e-06, "loss": 0.094, "step": 6796 }, { "epoch": 1.1012637718729748, "grad_norm": 0.8288483023643494, "learning_rate": 3.6524747431162148e-06, "loss": 0.1059, "step": 6797 }, { "epoch": 1.1014257939079715, "grad_norm": 0.8761540651321411, "learning_rate": 3.652086659474265e-06, "loss": 0.1127, "step": 6798 }, { "epoch": 1.1015878159429682, "grad_norm": 0.8513060808181763, "learning_rate": 3.651698540581252e-06, "loss": 0.1, "step": 6799 }, { "epoch": 1.101749837977965, "grad_norm": 0.791142463684082, "learning_rate": 3.6513103864490497e-06, "loss": 0.1036, "step": 6800 }, { "epoch": 1.1019118600129618, "grad_norm": 0.7884705662727356, "learning_rate": 3.6509221970895365e-06, "loss": 0.0956, "step": 6801 }, { "epoch": 1.1020738820479585, "grad_norm": 0.8608754277229309, "learning_rate": 3.650533972514589e-06, "loss": 0.1153, "step": 6802 }, { "epoch": 1.1022359040829552, "grad_norm": 0.7507930397987366, "learning_rate": 3.6501457127360863e-06, "loss": 0.0989, "step": 6803 }, { "epoch": 1.102397926117952, "grad_norm": 0.9590283036231995, "learning_rate": 3.6497574177659073e-06, "loss": 0.1085, "step": 6804 }, { "epoch": 1.1025599481529489, "grad_norm": 0.805808424949646, "learning_rate": 3.6493690876159343e-06, "loss": 0.0971, "step": 6805 }, { "epoch": 1.1027219701879456, "grad_norm": 0.7817717790603638, "learning_rate": 3.6489807222980487e-06, "loss": 0.0987, "step": 6806 }, { "epoch": 1.1028839922229423, "grad_norm": 0.8280859589576721, "learning_rate": 3.648592321824133e-06, "loss": 0.1153, "step": 6807 }, { "epoch": 1.103046014257939, "grad_norm": 0.9206019639968872, "learning_rate": 3.648203886206073e-06, "loss": 0.1135, "step": 6808 }, { "epoch": 1.1032080362929357, "grad_norm": 0.8165472745895386, "learning_rate": 3.647815415455751e-06, "loss": 0.1011, "step": 6809 }, { "epoch": 1.1033700583279327, "grad_norm": 0.8732919096946716, "learning_rate": 3.6474269095850568e-06, "loss": 0.1107, "step": 6810 }, { "epoch": 1.1035320803629294, "grad_norm": 0.9100021719932556, "learning_rate": 3.647038368605875e-06, "loss": 0.1159, "step": 6811 }, { "epoch": 1.103694102397926, "grad_norm": 0.8154706358909607, "learning_rate": 3.646649792530094e-06, "loss": 0.1027, "step": 6812 }, { "epoch": 1.1038561244329228, "grad_norm": 0.8055473566055298, "learning_rate": 3.646261181369605e-06, "loss": 0.1039, "step": 6813 }, { "epoch": 1.1040181464679197, "grad_norm": 0.7719459533691406, "learning_rate": 3.645872535136298e-06, "loss": 0.0941, "step": 6814 }, { "epoch": 1.1041801685029164, "grad_norm": 0.8571450710296631, "learning_rate": 3.6454838538420645e-06, "loss": 0.1105, "step": 6815 }, { "epoch": 1.1043421905379132, "grad_norm": 0.8617749214172363, "learning_rate": 3.6450951374987958e-06, "loss": 0.1069, "step": 6816 }, { "epoch": 1.1045042125729099, "grad_norm": 0.7611509561538696, "learning_rate": 3.6447063861183886e-06, "loss": 0.1015, "step": 6817 }, { "epoch": 1.1046662346079066, "grad_norm": 0.7744086980819702, "learning_rate": 3.6443175997127354e-06, "loss": 0.094, "step": 6818 }, { "epoch": 1.1048282566429035, "grad_norm": 0.7558098435401917, "learning_rate": 3.6439287782937328e-06, "loss": 0.0985, "step": 6819 }, { "epoch": 1.1049902786779002, "grad_norm": 0.8518016934394836, "learning_rate": 3.6435399218732776e-06, "loss": 0.1097, "step": 6820 }, { "epoch": 1.105152300712897, "grad_norm": 0.789505660533905, "learning_rate": 3.6431510304632683e-06, "loss": 0.1019, "step": 6821 }, { "epoch": 1.1053143227478937, "grad_norm": 0.9358975291252136, "learning_rate": 3.642762104075604e-06, "loss": 0.117, "step": 6822 }, { "epoch": 1.1054763447828906, "grad_norm": 0.9172407388687134, "learning_rate": 3.642373142722185e-06, "loss": 0.1055, "step": 6823 }, { "epoch": 1.1056383668178873, "grad_norm": 0.8525999784469604, "learning_rate": 3.641984146414912e-06, "loss": 0.1054, "step": 6824 }, { "epoch": 1.105800388852884, "grad_norm": 0.8727070093154907, "learning_rate": 3.6415951151656874e-06, "loss": 0.108, "step": 6825 }, { "epoch": 1.1059624108878807, "grad_norm": 0.8490038514137268, "learning_rate": 3.6412060489864155e-06, "loss": 0.1079, "step": 6826 }, { "epoch": 1.1061244329228774, "grad_norm": 0.8071814179420471, "learning_rate": 3.6408169478889997e-06, "loss": 0.1039, "step": 6827 }, { "epoch": 1.1062864549578744, "grad_norm": 0.8197512030601501, "learning_rate": 3.640427811885346e-06, "loss": 0.0976, "step": 6828 }, { "epoch": 1.106448476992871, "grad_norm": 0.8252851963043213, "learning_rate": 3.640038640987361e-06, "loss": 0.0959, "step": 6829 }, { "epoch": 1.1066104990278678, "grad_norm": 0.9495880603790283, "learning_rate": 3.639649435206953e-06, "loss": 0.11, "step": 6830 }, { "epoch": 1.1067725210628645, "grad_norm": 0.9515889286994934, "learning_rate": 3.639260194556029e-06, "loss": 0.1006, "step": 6831 }, { "epoch": 1.1069345430978612, "grad_norm": 0.9411259889602661, "learning_rate": 3.6388709190465018e-06, "loss": 0.1134, "step": 6832 }, { "epoch": 1.1070965651328581, "grad_norm": 1.127980351448059, "learning_rate": 3.638481608690279e-06, "loss": 0.1175, "step": 6833 }, { "epoch": 1.1072585871678549, "grad_norm": 0.776599109172821, "learning_rate": 3.638092263499274e-06, "loss": 0.0971, "step": 6834 }, { "epoch": 1.1074206092028516, "grad_norm": 1.0031170845031738, "learning_rate": 3.637702883485401e-06, "loss": 0.1137, "step": 6835 }, { "epoch": 1.1075826312378483, "grad_norm": 0.817516028881073, "learning_rate": 3.6373134686605722e-06, "loss": 0.1019, "step": 6836 }, { "epoch": 1.107744653272845, "grad_norm": 0.7830043435096741, "learning_rate": 3.636924019036704e-06, "loss": 0.0928, "step": 6837 }, { "epoch": 1.107906675307842, "grad_norm": 0.7438381314277649, "learning_rate": 3.6365345346257112e-06, "loss": 0.0897, "step": 6838 }, { "epoch": 1.1080686973428386, "grad_norm": 0.8136735558509827, "learning_rate": 3.636145015439513e-06, "loss": 0.1014, "step": 6839 }, { "epoch": 1.1082307193778353, "grad_norm": 0.8344704508781433, "learning_rate": 3.635755461490026e-06, "loss": 0.0962, "step": 6840 }, { "epoch": 1.108392741412832, "grad_norm": 1.0317944288253784, "learning_rate": 3.635365872789171e-06, "loss": 0.1298, "step": 6841 }, { "epoch": 1.108554763447829, "grad_norm": 0.8629235029220581, "learning_rate": 3.634976249348867e-06, "loss": 0.1053, "step": 6842 }, { "epoch": 1.1087167854828257, "grad_norm": 0.8366121649742126, "learning_rate": 3.6345865911810373e-06, "loss": 0.1031, "step": 6843 }, { "epoch": 1.1088788075178224, "grad_norm": 0.7734202146530151, "learning_rate": 3.6341968982976027e-06, "loss": 0.0979, "step": 6844 }, { "epoch": 1.1090408295528191, "grad_norm": 0.9085724353790283, "learning_rate": 3.633807170710488e-06, "loss": 0.1118, "step": 6845 }, { "epoch": 1.1092028515878158, "grad_norm": 0.8711262941360474, "learning_rate": 3.6334174084316186e-06, "loss": 0.1035, "step": 6846 }, { "epoch": 1.1093648736228128, "grad_norm": 0.7932325005531311, "learning_rate": 3.6330276114729185e-06, "loss": 0.1024, "step": 6847 }, { "epoch": 1.1095268956578095, "grad_norm": 0.8877636194229126, "learning_rate": 3.632637779846315e-06, "loss": 0.0966, "step": 6848 }, { "epoch": 1.1096889176928062, "grad_norm": 0.7809078693389893, "learning_rate": 3.6322479135637366e-06, "loss": 0.1062, "step": 6849 }, { "epoch": 1.109850939727803, "grad_norm": 0.7734099626541138, "learning_rate": 3.6318580126371124e-06, "loss": 0.0957, "step": 6850 }, { "epoch": 1.1100129617627998, "grad_norm": 0.811259925365448, "learning_rate": 3.6314680770783717e-06, "loss": 0.108, "step": 6851 }, { "epoch": 1.1101749837977966, "grad_norm": 0.9896193742752075, "learning_rate": 3.631078106899446e-06, "loss": 0.1288, "step": 6852 }, { "epoch": 1.1103370058327933, "grad_norm": 0.7642198801040649, "learning_rate": 3.6306881021122675e-06, "loss": 0.0916, "step": 6853 }, { "epoch": 1.11049902786779, "grad_norm": 0.774316668510437, "learning_rate": 3.630298062728769e-06, "loss": 0.1009, "step": 6854 }, { "epoch": 1.1106610499027867, "grad_norm": 0.9205217361450195, "learning_rate": 3.629907988760886e-06, "loss": 0.1038, "step": 6855 }, { "epoch": 1.1108230719377836, "grad_norm": 0.8705753087997437, "learning_rate": 3.6295178802205515e-06, "loss": 0.1142, "step": 6856 }, { "epoch": 1.1109850939727803, "grad_norm": 0.8856770992279053, "learning_rate": 3.6291277371197042e-06, "loss": 0.1111, "step": 6857 }, { "epoch": 1.111147116007777, "grad_norm": 0.7869232296943665, "learning_rate": 3.62873755947028e-06, "loss": 0.1016, "step": 6858 }, { "epoch": 1.1113091380427738, "grad_norm": 0.8357443809509277, "learning_rate": 3.628347347284218e-06, "loss": 0.1095, "step": 6859 }, { "epoch": 1.1114711600777705, "grad_norm": 0.8363984227180481, "learning_rate": 3.6279571005734583e-06, "loss": 0.0984, "step": 6860 }, { "epoch": 1.1116331821127674, "grad_norm": 0.9447827339172363, "learning_rate": 3.627566819349941e-06, "loss": 0.1097, "step": 6861 }, { "epoch": 1.1117952041477641, "grad_norm": 0.9603884220123291, "learning_rate": 3.6271765036256064e-06, "loss": 0.1168, "step": 6862 }, { "epoch": 1.1119572261827608, "grad_norm": 0.7766293883323669, "learning_rate": 3.6267861534124e-06, "loss": 0.0919, "step": 6863 }, { "epoch": 1.1121192482177575, "grad_norm": 0.6767370700836182, "learning_rate": 3.6263957687222633e-06, "loss": 0.0832, "step": 6864 }, { "epoch": 1.1122812702527545, "grad_norm": 0.8346736431121826, "learning_rate": 3.6260053495671423e-06, "loss": 0.1018, "step": 6865 }, { "epoch": 1.1124432922877512, "grad_norm": 0.871659517288208, "learning_rate": 3.625614895958982e-06, "loss": 0.1006, "step": 6866 }, { "epoch": 1.112605314322748, "grad_norm": 0.6866405010223389, "learning_rate": 3.6252244079097296e-06, "loss": 0.0874, "step": 6867 }, { "epoch": 1.1127673363577446, "grad_norm": 0.8131086230278015, "learning_rate": 3.624833885431334e-06, "loss": 0.1062, "step": 6868 }, { "epoch": 1.1129293583927413, "grad_norm": 0.9282816052436829, "learning_rate": 3.6244433285357433e-06, "loss": 0.1105, "step": 6869 }, { "epoch": 1.1130913804277383, "grad_norm": 0.7851607799530029, "learning_rate": 3.624052737234908e-06, "loss": 0.1028, "step": 6870 }, { "epoch": 1.113253402462735, "grad_norm": 0.7424038648605347, "learning_rate": 3.623662111540779e-06, "loss": 0.0843, "step": 6871 }, { "epoch": 1.1134154244977317, "grad_norm": 0.8808372616767883, "learning_rate": 3.6232714514653082e-06, "loss": 0.1105, "step": 6872 }, { "epoch": 1.1135774465327284, "grad_norm": 0.9049409627914429, "learning_rate": 3.62288075702045e-06, "loss": 0.1022, "step": 6873 }, { "epoch": 1.1137394685677253, "grad_norm": 0.8752559423446655, "learning_rate": 3.6224900282181574e-06, "loss": 0.1034, "step": 6874 }, { "epoch": 1.113901490602722, "grad_norm": 0.7354977130889893, "learning_rate": 3.622099265070386e-06, "loss": 0.0939, "step": 6875 }, { "epoch": 1.1140635126377187, "grad_norm": 0.9659753441810608, "learning_rate": 3.6217084675890935e-06, "loss": 0.1085, "step": 6876 }, { "epoch": 1.1142255346727155, "grad_norm": 0.8423113226890564, "learning_rate": 3.6213176357862364e-06, "loss": 0.1078, "step": 6877 }, { "epoch": 1.1143875567077122, "grad_norm": 0.8920717239379883, "learning_rate": 3.6209267696737723e-06, "loss": 0.111, "step": 6878 }, { "epoch": 1.114549578742709, "grad_norm": 0.9556100964546204, "learning_rate": 3.6205358692636618e-06, "loss": 0.1241, "step": 6879 }, { "epoch": 1.1147116007777058, "grad_norm": 0.8358135223388672, "learning_rate": 3.6201449345678657e-06, "loss": 0.1113, "step": 6880 }, { "epoch": 1.1148736228127025, "grad_norm": 0.9949773550033569, "learning_rate": 3.6197539655983447e-06, "loss": 0.1158, "step": 6881 }, { "epoch": 1.1150356448476992, "grad_norm": 0.7863255739212036, "learning_rate": 3.6193629623670627e-06, "loss": 0.101, "step": 6882 }, { "epoch": 1.115197666882696, "grad_norm": 0.8213348388671875, "learning_rate": 3.6189719248859827e-06, "loss": 0.0921, "step": 6883 }, { "epoch": 1.1153596889176929, "grad_norm": 0.841783344745636, "learning_rate": 3.6185808531670695e-06, "loss": 0.1075, "step": 6884 }, { "epoch": 1.1155217109526896, "grad_norm": 0.8018286228179932, "learning_rate": 3.61818974722229e-06, "loss": 0.1009, "step": 6885 }, { "epoch": 1.1156837329876863, "grad_norm": 0.826497495174408, "learning_rate": 3.617798607063609e-06, "loss": 0.0976, "step": 6886 }, { "epoch": 1.115845755022683, "grad_norm": 0.796943187713623, "learning_rate": 3.6174074327029964e-06, "loss": 0.1033, "step": 6887 }, { "epoch": 1.1160077770576797, "grad_norm": 0.8021527528762817, "learning_rate": 3.61701622415242e-06, "loss": 0.1055, "step": 6888 }, { "epoch": 1.1161697990926767, "grad_norm": 0.7664604187011719, "learning_rate": 3.6166249814238503e-06, "loss": 0.0922, "step": 6889 }, { "epoch": 1.1163318211276734, "grad_norm": 0.7481946349143982, "learning_rate": 3.616233704529259e-06, "loss": 0.0954, "step": 6890 }, { "epoch": 1.11649384316267, "grad_norm": 0.7271841168403625, "learning_rate": 3.6158423934806164e-06, "loss": 0.09, "step": 6891 }, { "epoch": 1.1166558651976668, "grad_norm": 1.0113109350204468, "learning_rate": 3.6154510482898973e-06, "loss": 0.1189, "step": 6892 }, { "epoch": 1.1168178872326637, "grad_norm": 0.7146120667457581, "learning_rate": 3.6150596689690753e-06, "loss": 0.0896, "step": 6893 }, { "epoch": 1.1169799092676604, "grad_norm": 0.8736560344696045, "learning_rate": 3.6146682555301266e-06, "loss": 0.111, "step": 6894 }, { "epoch": 1.1171419313026572, "grad_norm": 0.9498534202575684, "learning_rate": 3.614276807985026e-06, "loss": 0.1128, "step": 6895 }, { "epoch": 1.1173039533376539, "grad_norm": 0.9466055631637573, "learning_rate": 3.613885326345752e-06, "loss": 0.1196, "step": 6896 }, { "epoch": 1.1174659753726506, "grad_norm": 0.8748520016670227, "learning_rate": 3.6134938106242823e-06, "loss": 0.1088, "step": 6897 }, { "epoch": 1.1176279974076475, "grad_norm": 1.0820103883743286, "learning_rate": 3.6131022608325973e-06, "loss": 0.1119, "step": 6898 }, { "epoch": 1.1177900194426442, "grad_norm": 0.7906978726387024, "learning_rate": 3.6127106769826763e-06, "loss": 0.1029, "step": 6899 }, { "epoch": 1.117952041477641, "grad_norm": 0.7698807120323181, "learning_rate": 3.6123190590865e-06, "loss": 0.0988, "step": 6900 }, { "epoch": 1.1181140635126376, "grad_norm": 0.8202066421508789, "learning_rate": 3.6119274071560545e-06, "loss": 0.1104, "step": 6901 }, { "epoch": 1.1182760855476346, "grad_norm": 0.969008207321167, "learning_rate": 3.6115357212033196e-06, "loss": 0.1047, "step": 6902 }, { "epoch": 1.1184381075826313, "grad_norm": 0.7530080676078796, "learning_rate": 3.611144001240282e-06, "loss": 0.0888, "step": 6903 }, { "epoch": 1.118600129617628, "grad_norm": 0.8667441010475159, "learning_rate": 3.610752247278927e-06, "loss": 0.1088, "step": 6904 }, { "epoch": 1.1187621516526247, "grad_norm": 0.7977768778800964, "learning_rate": 3.610360459331241e-06, "loss": 0.1009, "step": 6905 }, { "epoch": 1.1189241736876214, "grad_norm": 0.8574357032775879, "learning_rate": 3.609968637409212e-06, "loss": 0.1125, "step": 6906 }, { "epoch": 1.1190861957226184, "grad_norm": 0.8181487321853638, "learning_rate": 3.609576781524829e-06, "loss": 0.1047, "step": 6907 }, { "epoch": 1.119248217757615, "grad_norm": 0.8313112258911133, "learning_rate": 3.6091848916900816e-06, "loss": 0.1016, "step": 6908 }, { "epoch": 1.1194102397926118, "grad_norm": 0.7930647730827332, "learning_rate": 3.6087929679169603e-06, "loss": 0.1034, "step": 6909 }, { "epoch": 1.1195722618276085, "grad_norm": 0.9460997581481934, "learning_rate": 3.6084010102174576e-06, "loss": 0.1059, "step": 6910 }, { "epoch": 1.1197342838626052, "grad_norm": 0.9013634920120239, "learning_rate": 3.6080090186035664e-06, "loss": 0.1091, "step": 6911 }, { "epoch": 1.1198963058976021, "grad_norm": 0.905096173286438, "learning_rate": 3.6076169930872805e-06, "loss": 0.1161, "step": 6912 }, { "epoch": 1.1200583279325989, "grad_norm": 0.7841644883155823, "learning_rate": 3.607224933680595e-06, "loss": 0.106, "step": 6913 }, { "epoch": 1.1202203499675956, "grad_norm": 0.8874266743659973, "learning_rate": 3.606832840395506e-06, "loss": 0.1087, "step": 6914 }, { "epoch": 1.1203823720025923, "grad_norm": 0.7959884405136108, "learning_rate": 3.60644071324401e-06, "loss": 0.1002, "step": 6915 }, { "epoch": 1.1205443940375892, "grad_norm": 0.8270828127861023, "learning_rate": 3.6060485522381067e-06, "loss": 0.1056, "step": 6916 }, { "epoch": 1.120706416072586, "grad_norm": 0.7726624011993408, "learning_rate": 3.6056563573897927e-06, "loss": 0.0999, "step": 6917 }, { "epoch": 1.1208684381075826, "grad_norm": 0.9138970375061035, "learning_rate": 3.605264128711072e-06, "loss": 0.1154, "step": 6918 }, { "epoch": 1.1210304601425793, "grad_norm": 0.8613343238830566, "learning_rate": 3.604871866213942e-06, "loss": 0.111, "step": 6919 }, { "epoch": 1.121192482177576, "grad_norm": 0.855797290802002, "learning_rate": 3.6044795699104074e-06, "loss": 0.1185, "step": 6920 }, { "epoch": 1.121354504212573, "grad_norm": 0.7498341202735901, "learning_rate": 3.6040872398124705e-06, "loss": 0.0892, "step": 6921 }, { "epoch": 1.1215165262475697, "grad_norm": 0.7906498312950134, "learning_rate": 3.6036948759321357e-06, "loss": 0.0972, "step": 6922 }, { "epoch": 1.1216785482825664, "grad_norm": 0.8063822388648987, "learning_rate": 3.603302478281409e-06, "loss": 0.1016, "step": 6923 }, { "epoch": 1.1218405703175631, "grad_norm": 0.7980828881263733, "learning_rate": 3.6029100468722954e-06, "loss": 0.0919, "step": 6924 }, { "epoch": 1.12200259235256, "grad_norm": 0.7965691685676575, "learning_rate": 3.6025175817168046e-06, "loss": 0.0889, "step": 6925 }, { "epoch": 1.1221646143875568, "grad_norm": 0.8087196350097656, "learning_rate": 3.602125082826944e-06, "loss": 0.0985, "step": 6926 }, { "epoch": 1.1223266364225535, "grad_norm": 0.9410887956619263, "learning_rate": 3.6017325502147215e-06, "loss": 0.101, "step": 6927 }, { "epoch": 1.1224886584575502, "grad_norm": 0.8947740197181702, "learning_rate": 3.60133998389215e-06, "loss": 0.111, "step": 6928 }, { "epoch": 1.122650680492547, "grad_norm": 0.7792215347290039, "learning_rate": 3.6009473838712405e-06, "loss": 0.0952, "step": 6929 }, { "epoch": 1.1228127025275438, "grad_norm": 0.9168524146080017, "learning_rate": 3.600554750164005e-06, "loss": 0.1141, "step": 6930 }, { "epoch": 1.1229747245625405, "grad_norm": 0.8868537545204163, "learning_rate": 3.600162082782457e-06, "loss": 0.1154, "step": 6931 }, { "epoch": 1.1231367465975373, "grad_norm": 0.8856688737869263, "learning_rate": 3.5997693817386128e-06, "loss": 0.0962, "step": 6932 }, { "epoch": 1.123298768632534, "grad_norm": 0.8674602508544922, "learning_rate": 3.5993766470444856e-06, "loss": 0.096, "step": 6933 }, { "epoch": 1.1234607906675307, "grad_norm": 0.8470571041107178, "learning_rate": 3.598983878712094e-06, "loss": 0.1131, "step": 6934 }, { "epoch": 1.1236228127025276, "grad_norm": 0.9755722284317017, "learning_rate": 3.598591076753455e-06, "loss": 0.1109, "step": 6935 }, { "epoch": 1.1237848347375243, "grad_norm": 0.8115435838699341, "learning_rate": 3.598198241180588e-06, "loss": 0.1044, "step": 6936 }, { "epoch": 1.123946856772521, "grad_norm": 0.7542571425437927, "learning_rate": 3.5978053720055117e-06, "loss": 0.097, "step": 6937 }, { "epoch": 1.1241088788075178, "grad_norm": 0.7188454270362854, "learning_rate": 3.597412469240248e-06, "loss": 0.086, "step": 6938 }, { "epoch": 1.1242709008425145, "grad_norm": 0.807829737663269, "learning_rate": 3.5970195328968183e-06, "loss": 0.1037, "step": 6939 }, { "epoch": 1.1244329228775114, "grad_norm": 0.7683031558990479, "learning_rate": 3.5966265629872466e-06, "loss": 0.1012, "step": 6940 }, { "epoch": 1.124594944912508, "grad_norm": 0.805733323097229, "learning_rate": 3.5962335595235547e-06, "loss": 0.1005, "step": 6941 }, { "epoch": 1.1247569669475048, "grad_norm": 0.8244421482086182, "learning_rate": 3.595840522517769e-06, "loss": 0.1096, "step": 6942 }, { "epoch": 1.1249189889825015, "grad_norm": 0.7452391386032104, "learning_rate": 3.5954474519819155e-06, "loss": 0.1078, "step": 6943 }, { "epoch": 1.1250810110174985, "grad_norm": 0.8935032486915588, "learning_rate": 3.5950543479280205e-06, "loss": 0.1056, "step": 6944 }, { "epoch": 1.1252430330524952, "grad_norm": 0.7510908246040344, "learning_rate": 3.5946612103681135e-06, "loss": 0.0906, "step": 6945 }, { "epoch": 1.125405055087492, "grad_norm": 0.7809535264968872, "learning_rate": 3.5942680393142203e-06, "loss": 0.1014, "step": 6946 }, { "epoch": 1.1255670771224886, "grad_norm": 0.9290381073951721, "learning_rate": 3.5938748347783754e-06, "loss": 0.12, "step": 6947 }, { "epoch": 1.1257290991574855, "grad_norm": 0.7380486726760864, "learning_rate": 3.593481596772606e-06, "loss": 0.0862, "step": 6948 }, { "epoch": 1.1258911211924822, "grad_norm": 0.7775296568870544, "learning_rate": 3.593088325308947e-06, "loss": 0.1076, "step": 6949 }, { "epoch": 1.126053143227479, "grad_norm": 0.8212987184524536, "learning_rate": 3.5926950203994303e-06, "loss": 0.0965, "step": 6950 }, { "epoch": 1.1262151652624757, "grad_norm": 0.758618175983429, "learning_rate": 3.5923016820560904e-06, "loss": 0.0958, "step": 6951 }, { "epoch": 1.1263771872974724, "grad_norm": 0.7440875172615051, "learning_rate": 3.5919083102909615e-06, "loss": 0.092, "step": 6952 }, { "epoch": 1.1265392093324693, "grad_norm": 0.755118727684021, "learning_rate": 3.5915149051160812e-06, "loss": 0.0912, "step": 6953 }, { "epoch": 1.126701231367466, "grad_norm": 0.9674204587936401, "learning_rate": 3.591121466543487e-06, "loss": 0.119, "step": 6954 }, { "epoch": 1.1268632534024627, "grad_norm": 0.7956207990646362, "learning_rate": 3.5907279945852145e-06, "loss": 0.0993, "step": 6955 }, { "epoch": 1.1270252754374595, "grad_norm": 0.8555571436882019, "learning_rate": 3.5903344892533067e-06, "loss": 0.1116, "step": 6956 }, { "epoch": 1.1271872974724562, "grad_norm": 0.741603434085846, "learning_rate": 3.5899409505598014e-06, "loss": 0.0924, "step": 6957 }, { "epoch": 1.127349319507453, "grad_norm": 0.8872222900390625, "learning_rate": 3.5895473785167407e-06, "loss": 0.0995, "step": 6958 }, { "epoch": 1.1275113415424498, "grad_norm": 0.8348520398139954, "learning_rate": 3.589153773136167e-06, "loss": 0.1014, "step": 6959 }, { "epoch": 1.1276733635774465, "grad_norm": 0.8107140064239502, "learning_rate": 3.5887601344301228e-06, "loss": 0.1088, "step": 6960 }, { "epoch": 1.1278353856124432, "grad_norm": 0.8019354939460754, "learning_rate": 3.588366462410654e-06, "loss": 0.1078, "step": 6961 }, { "epoch": 1.12799740764744, "grad_norm": 0.8682474493980408, "learning_rate": 3.587972757089805e-06, "loss": 0.1116, "step": 6962 }, { "epoch": 1.1281594296824369, "grad_norm": 0.9506396055221558, "learning_rate": 3.5875790184796228e-06, "loss": 0.1215, "step": 6963 }, { "epoch": 1.1283214517174336, "grad_norm": 0.8694068789482117, "learning_rate": 3.587185246592154e-06, "loss": 0.0923, "step": 6964 }, { "epoch": 1.1284834737524303, "grad_norm": 0.8598493933677673, "learning_rate": 3.5867914414394478e-06, "loss": 0.1113, "step": 6965 }, { "epoch": 1.128645495787427, "grad_norm": 0.7127923965454102, "learning_rate": 3.5863976030335535e-06, "loss": 0.0917, "step": 6966 }, { "epoch": 1.128807517822424, "grad_norm": 0.9294934272766113, "learning_rate": 3.5860037313865216e-06, "loss": 0.1139, "step": 6967 }, { "epoch": 1.1289695398574207, "grad_norm": 0.7821537256240845, "learning_rate": 3.5856098265104033e-06, "loss": 0.1006, "step": 6968 }, { "epoch": 1.1291315618924174, "grad_norm": 0.7920688986778259, "learning_rate": 3.5852158884172523e-06, "loss": 0.0979, "step": 6969 }, { "epoch": 1.129293583927414, "grad_norm": 0.772294819355011, "learning_rate": 3.58482191711912e-06, "loss": 0.0933, "step": 6970 }, { "epoch": 1.1294556059624108, "grad_norm": 0.7953448295593262, "learning_rate": 3.5844279126280635e-06, "loss": 0.1057, "step": 6971 }, { "epoch": 1.1296176279974077, "grad_norm": 0.7928208112716675, "learning_rate": 3.5840338749561365e-06, "loss": 0.1052, "step": 6972 }, { "epoch": 1.1297796500324044, "grad_norm": 0.885570228099823, "learning_rate": 3.5836398041153962e-06, "loss": 0.106, "step": 6973 }, { "epoch": 1.1299416720674011, "grad_norm": 0.8886401057243347, "learning_rate": 3.5832457001179e-06, "loss": 0.107, "step": 6974 }, { "epoch": 1.1301036941023979, "grad_norm": 0.6969801783561707, "learning_rate": 3.5828515629757073e-06, "loss": 0.0911, "step": 6975 }, { "epoch": 1.1302657161373948, "grad_norm": 0.8328415155410767, "learning_rate": 3.582457392700878e-06, "loss": 0.1073, "step": 6976 }, { "epoch": 1.1304277381723915, "grad_norm": 0.8379804491996765, "learning_rate": 3.5820631893054703e-06, "loss": 0.114, "step": 6977 }, { "epoch": 1.1305897602073882, "grad_norm": 0.8626766204833984, "learning_rate": 3.5816689528015485e-06, "loss": 0.1088, "step": 6978 }, { "epoch": 1.130751782242385, "grad_norm": 0.7580254077911377, "learning_rate": 3.5812746832011734e-06, "loss": 0.0952, "step": 6979 }, { "epoch": 1.1309138042773816, "grad_norm": 0.9701016545295715, "learning_rate": 3.580880380516411e-06, "loss": 0.1154, "step": 6980 }, { "epoch": 1.1310758263123786, "grad_norm": 0.8925403356552124, "learning_rate": 3.5804860447593236e-06, "loss": 0.1066, "step": 6981 }, { "epoch": 1.1312378483473753, "grad_norm": 0.861957848072052, "learning_rate": 3.5800916759419784e-06, "loss": 0.0999, "step": 6982 }, { "epoch": 1.131399870382372, "grad_norm": 0.9451190233230591, "learning_rate": 3.579697274076442e-06, "loss": 0.1148, "step": 6983 }, { "epoch": 1.1315618924173687, "grad_norm": 0.8173058032989502, "learning_rate": 3.579302839174781e-06, "loss": 0.1155, "step": 6984 }, { "epoch": 1.1317239144523654, "grad_norm": 0.9785115718841553, "learning_rate": 3.578908371249066e-06, "loss": 0.12, "step": 6985 }, { "epoch": 1.1318859364873624, "grad_norm": 0.8944734334945679, "learning_rate": 3.578513870311365e-06, "loss": 0.1105, "step": 6986 }, { "epoch": 1.132047958522359, "grad_norm": 0.8797488808631897, "learning_rate": 3.57811933637375e-06, "loss": 0.1095, "step": 6987 }, { "epoch": 1.1322099805573558, "grad_norm": 0.8068286180496216, "learning_rate": 3.577724769448292e-06, "loss": 0.1002, "step": 6988 }, { "epoch": 1.1323720025923525, "grad_norm": 0.993378758430481, "learning_rate": 3.577330169547064e-06, "loss": 0.1254, "step": 6989 }, { "epoch": 1.1325340246273492, "grad_norm": 0.7981913089752197, "learning_rate": 3.57693553668214e-06, "loss": 0.1031, "step": 6990 }, { "epoch": 1.1326960466623461, "grad_norm": 0.8486551642417908, "learning_rate": 3.5765408708655946e-06, "loss": 0.1028, "step": 6991 }, { "epoch": 1.1328580686973428, "grad_norm": 0.8483676910400391, "learning_rate": 3.5761461721095037e-06, "loss": 0.1061, "step": 6992 }, { "epoch": 1.1330200907323396, "grad_norm": 0.8058338165283203, "learning_rate": 3.5757514404259447e-06, "loss": 0.1053, "step": 6993 }, { "epoch": 1.1331821127673363, "grad_norm": 0.9978876709938049, "learning_rate": 3.575356675826995e-06, "loss": 0.1242, "step": 6994 }, { "epoch": 1.1333441348023332, "grad_norm": 0.8573361039161682, "learning_rate": 3.5749618783247326e-06, "loss": 0.1093, "step": 6995 }, { "epoch": 1.13350615683733, "grad_norm": 0.8889877200126648, "learning_rate": 3.574567047931238e-06, "loss": 0.1067, "step": 6996 }, { "epoch": 1.1336681788723266, "grad_norm": 0.7973397970199585, "learning_rate": 3.5741721846585926e-06, "loss": 0.0991, "step": 6997 }, { "epoch": 1.1338302009073233, "grad_norm": 0.8841112852096558, "learning_rate": 3.5737772885188777e-06, "loss": 0.1111, "step": 6998 }, { "epoch": 1.1339922229423203, "grad_norm": 0.8456192016601562, "learning_rate": 3.5733823595241758e-06, "loss": 0.111, "step": 6999 }, { "epoch": 1.134154244977317, "grad_norm": 1.4436651468276978, "learning_rate": 3.5729873976865726e-06, "loss": 0.0931, "step": 7000 }, { "epoch": 1.1343162670123137, "grad_norm": 0.8534703850746155, "learning_rate": 3.5725924030181498e-06, "loss": 0.1073, "step": 7001 }, { "epoch": 1.1344782890473104, "grad_norm": 0.8066534996032715, "learning_rate": 3.5721973755309963e-06, "loss": 0.0973, "step": 7002 }, { "epoch": 1.1346403110823071, "grad_norm": 0.8632422685623169, "learning_rate": 3.571802315237197e-06, "loss": 0.1128, "step": 7003 }, { "epoch": 1.134802333117304, "grad_norm": 0.9532308578491211, "learning_rate": 3.5714072221488414e-06, "loss": 0.1121, "step": 7004 }, { "epoch": 1.1349643551523008, "grad_norm": 0.8398197889328003, "learning_rate": 3.571012096278017e-06, "loss": 0.1081, "step": 7005 }, { "epoch": 1.1351263771872975, "grad_norm": 0.7505828142166138, "learning_rate": 3.5706169376368143e-06, "loss": 0.0898, "step": 7006 }, { "epoch": 1.1352883992222942, "grad_norm": 0.8282100558280945, "learning_rate": 3.570221746237325e-06, "loss": 0.1008, "step": 7007 }, { "epoch": 1.135450421257291, "grad_norm": 0.8855400681495667, "learning_rate": 3.5698265220916388e-06, "loss": 0.118, "step": 7008 }, { "epoch": 1.1356124432922878, "grad_norm": 0.8593195080757141, "learning_rate": 3.5694312652118513e-06, "loss": 0.1072, "step": 7009 }, { "epoch": 1.1357744653272845, "grad_norm": 0.9126600027084351, "learning_rate": 3.5690359756100532e-06, "loss": 0.1101, "step": 7010 }, { "epoch": 1.1359364873622813, "grad_norm": 0.7796016335487366, "learning_rate": 3.5686406532983432e-06, "loss": 0.1009, "step": 7011 }, { "epoch": 1.136098509397278, "grad_norm": 0.822391152381897, "learning_rate": 3.5682452982888143e-06, "loss": 0.1099, "step": 7012 }, { "epoch": 1.1362605314322747, "grad_norm": 0.8462624549865723, "learning_rate": 3.5678499105935642e-06, "loss": 0.1055, "step": 7013 }, { "epoch": 1.1364225534672716, "grad_norm": 0.8373414278030396, "learning_rate": 3.5674544902246916e-06, "loss": 0.1144, "step": 7014 }, { "epoch": 1.1365845755022683, "grad_norm": 0.8660956621170044, "learning_rate": 3.567059037194294e-06, "loss": 0.1096, "step": 7015 }, { "epoch": 1.136746597537265, "grad_norm": 0.841236412525177, "learning_rate": 3.566663551514473e-06, "loss": 0.1058, "step": 7016 }, { "epoch": 1.1369086195722617, "grad_norm": 0.8246383666992188, "learning_rate": 3.5662680331973282e-06, "loss": 0.1015, "step": 7017 }, { "epoch": 1.1370706416072587, "grad_norm": 0.8704448342323303, "learning_rate": 3.5658724822549624e-06, "loss": 0.1055, "step": 7018 }, { "epoch": 1.1372326636422554, "grad_norm": 0.7135106325149536, "learning_rate": 3.565476898699477e-06, "loss": 0.0863, "step": 7019 }, { "epoch": 1.137394685677252, "grad_norm": 0.7876743674278259, "learning_rate": 3.5650812825429774e-06, "loss": 0.1019, "step": 7020 }, { "epoch": 1.1375567077122488, "grad_norm": 0.773827850818634, "learning_rate": 3.564685633797568e-06, "loss": 0.0966, "step": 7021 }, { "epoch": 1.1377187297472457, "grad_norm": 0.8527294397354126, "learning_rate": 3.5642899524753548e-06, "loss": 0.1016, "step": 7022 }, { "epoch": 1.1378807517822425, "grad_norm": 0.8665226697921753, "learning_rate": 3.5638942385884443e-06, "loss": 0.1061, "step": 7023 }, { "epoch": 1.1380427738172392, "grad_norm": 0.7744306325912476, "learning_rate": 3.5634984921489455e-06, "loss": 0.0958, "step": 7024 }, { "epoch": 1.1382047958522359, "grad_norm": 0.7468297481536865, "learning_rate": 3.563102713168966e-06, "loss": 0.0925, "step": 7025 }, { "epoch": 1.1383668178872326, "grad_norm": 0.7687481641769409, "learning_rate": 3.562706901660616e-06, "loss": 0.0983, "step": 7026 }, { "epoch": 1.1385288399222295, "grad_norm": 0.899817943572998, "learning_rate": 3.5623110576360065e-06, "loss": 0.1092, "step": 7027 }, { "epoch": 1.1386908619572262, "grad_norm": 0.8666191101074219, "learning_rate": 3.561915181107249e-06, "loss": 0.1056, "step": 7028 }, { "epoch": 1.138852883992223, "grad_norm": 0.8024566173553467, "learning_rate": 3.5615192720864572e-06, "loss": 0.1007, "step": 7029 }, { "epoch": 1.1390149060272197, "grad_norm": 0.7231770753860474, "learning_rate": 3.561123330585744e-06, "loss": 0.0895, "step": 7030 }, { "epoch": 1.1391769280622164, "grad_norm": 0.9641432762145996, "learning_rate": 3.5607273566172255e-06, "loss": 0.1041, "step": 7031 }, { "epoch": 1.1393389500972133, "grad_norm": 0.7931097745895386, "learning_rate": 3.560331350193016e-06, "loss": 0.1088, "step": 7032 }, { "epoch": 1.13950097213221, "grad_norm": 0.8860135674476624, "learning_rate": 3.5599353113252343e-06, "loss": 0.1082, "step": 7033 }, { "epoch": 1.1396629941672067, "grad_norm": 0.8670450448989868, "learning_rate": 3.5595392400259963e-06, "loss": 0.1046, "step": 7034 }, { "epoch": 1.1398250162022034, "grad_norm": 0.7805025577545166, "learning_rate": 3.5591431363074214e-06, "loss": 0.0926, "step": 7035 }, { "epoch": 1.1399870382372002, "grad_norm": 0.8997758626937866, "learning_rate": 3.55874700018163e-06, "loss": 0.1134, "step": 7036 }, { "epoch": 1.140149060272197, "grad_norm": 0.799332857131958, "learning_rate": 3.558350831660742e-06, "loss": 0.0953, "step": 7037 }, { "epoch": 1.1403110823071938, "grad_norm": 0.8448926210403442, "learning_rate": 3.5579546307568807e-06, "loss": 0.1027, "step": 7038 }, { "epoch": 1.1404731043421905, "grad_norm": 0.8926445245742798, "learning_rate": 3.557558397482167e-06, "loss": 0.1064, "step": 7039 }, { "epoch": 1.1406351263771872, "grad_norm": 0.9030429124832153, "learning_rate": 3.557162131848726e-06, "loss": 0.1143, "step": 7040 }, { "epoch": 1.140797148412184, "grad_norm": 0.8533268570899963, "learning_rate": 3.556765833868682e-06, "loss": 0.1089, "step": 7041 }, { "epoch": 1.1409591704471809, "grad_norm": 0.9436481595039368, "learning_rate": 3.5563695035541607e-06, "loss": 0.1096, "step": 7042 }, { "epoch": 1.1411211924821776, "grad_norm": 0.7506798505783081, "learning_rate": 3.5559731409172887e-06, "loss": 0.0839, "step": 7043 }, { "epoch": 1.1412832145171743, "grad_norm": 0.858508288860321, "learning_rate": 3.5555767459701946e-06, "loss": 0.1047, "step": 7044 }, { "epoch": 1.141445236552171, "grad_norm": 0.7885152697563171, "learning_rate": 3.5551803187250062e-06, "loss": 0.1009, "step": 7045 }, { "epoch": 1.141607258587168, "grad_norm": 0.7987487316131592, "learning_rate": 3.554783859193853e-06, "loss": 0.0999, "step": 7046 }, { "epoch": 1.1417692806221647, "grad_norm": 0.8181909322738647, "learning_rate": 3.5543873673888676e-06, "loss": 0.0995, "step": 7047 }, { "epoch": 1.1419313026571614, "grad_norm": 0.8372443914413452, "learning_rate": 3.5539908433221793e-06, "loss": 0.1056, "step": 7048 }, { "epoch": 1.142093324692158, "grad_norm": 0.851033627986908, "learning_rate": 3.553594287005922e-06, "loss": 0.1099, "step": 7049 }, { "epoch": 1.142255346727155, "grad_norm": 0.7364274859428406, "learning_rate": 3.553197698452229e-06, "loss": 0.098, "step": 7050 }, { "epoch": 1.1424173687621517, "grad_norm": 0.8522658348083496, "learning_rate": 3.5528010776732354e-06, "loss": 0.1083, "step": 7051 }, { "epoch": 1.1425793907971484, "grad_norm": 0.8110232949256897, "learning_rate": 3.5524044246810764e-06, "loss": 0.0991, "step": 7052 }, { "epoch": 1.1427414128321451, "grad_norm": 0.9086326956748962, "learning_rate": 3.5520077394878883e-06, "loss": 0.1123, "step": 7053 }, { "epoch": 1.1429034348671419, "grad_norm": 0.9316560626029968, "learning_rate": 3.5516110221058096e-06, "loss": 0.1043, "step": 7054 }, { "epoch": 1.1430654569021388, "grad_norm": 0.8712682723999023, "learning_rate": 3.551214272546979e-06, "loss": 0.1018, "step": 7055 }, { "epoch": 1.1432274789371355, "grad_norm": 0.8193374872207642, "learning_rate": 3.550817490823535e-06, "loss": 0.1063, "step": 7056 }, { "epoch": 1.1433895009721322, "grad_norm": 0.7940148115158081, "learning_rate": 3.550420676947619e-06, "loss": 0.0989, "step": 7057 }, { "epoch": 1.143551523007129, "grad_norm": 1.0240070819854736, "learning_rate": 3.5500238309313717e-06, "loss": 0.0994, "step": 7058 }, { "epoch": 1.1437135450421256, "grad_norm": 0.8923311233520508, "learning_rate": 3.549626952786937e-06, "loss": 0.1026, "step": 7059 }, { "epoch": 1.1438755670771226, "grad_norm": 0.8232337236404419, "learning_rate": 3.5492300425264574e-06, "loss": 0.099, "step": 7060 }, { "epoch": 1.1440375891121193, "grad_norm": 0.8222522139549255, "learning_rate": 3.548833100162077e-06, "loss": 0.0961, "step": 7061 }, { "epoch": 1.144199611147116, "grad_norm": 0.8657082319259644, "learning_rate": 3.5484361257059425e-06, "loss": 0.1066, "step": 7062 }, { "epoch": 1.1443616331821127, "grad_norm": 0.9332712888717651, "learning_rate": 3.548039119170199e-06, "loss": 0.1138, "step": 7063 }, { "epoch": 1.1445236552171094, "grad_norm": 1.0337094068527222, "learning_rate": 3.5476420805669953e-06, "loss": 0.1053, "step": 7064 }, { "epoch": 1.1446856772521063, "grad_norm": 0.874484658241272, "learning_rate": 3.5472450099084786e-06, "loss": 0.1125, "step": 7065 }, { "epoch": 1.144847699287103, "grad_norm": 0.7913826704025269, "learning_rate": 3.5468479072067996e-06, "loss": 0.1014, "step": 7066 }, { "epoch": 1.1450097213220998, "grad_norm": 0.8043907880783081, "learning_rate": 3.5464507724741076e-06, "loss": 0.1029, "step": 7067 }, { "epoch": 1.1451717433570965, "grad_norm": 0.833845317363739, "learning_rate": 3.5460536057225542e-06, "loss": 0.1024, "step": 7068 }, { "epoch": 1.1453337653920934, "grad_norm": 0.8379101753234863, "learning_rate": 3.545656406964292e-06, "loss": 0.1051, "step": 7069 }, { "epoch": 1.1454957874270901, "grad_norm": 0.7530349493026733, "learning_rate": 3.545259176211474e-06, "loss": 0.0899, "step": 7070 }, { "epoch": 1.1456578094620868, "grad_norm": 0.7313252687454224, "learning_rate": 3.5448619134762552e-06, "loss": 0.09, "step": 7071 }, { "epoch": 1.1458198314970836, "grad_norm": 0.8761491179466248, "learning_rate": 3.5444646187707897e-06, "loss": 0.106, "step": 7072 }, { "epoch": 1.1459818535320805, "grad_norm": 0.8301008343696594, "learning_rate": 3.544067292107235e-06, "loss": 0.1041, "step": 7073 }, { "epoch": 1.1461438755670772, "grad_norm": 0.8845946788787842, "learning_rate": 3.5436699334977476e-06, "loss": 0.1079, "step": 7074 }, { "epoch": 1.146305897602074, "grad_norm": 0.9687334299087524, "learning_rate": 3.5432725429544856e-06, "loss": 0.1142, "step": 7075 }, { "epoch": 1.1464679196370706, "grad_norm": 0.9008753299713135, "learning_rate": 3.5428751204896083e-06, "loss": 0.1243, "step": 7076 }, { "epoch": 1.1466299416720673, "grad_norm": 0.8190997838973999, "learning_rate": 3.5424776661152766e-06, "loss": 0.1043, "step": 7077 }, { "epoch": 1.1467919637070643, "grad_norm": 0.7773595452308655, "learning_rate": 3.542080179843651e-06, "loss": 0.0952, "step": 7078 }, { "epoch": 1.146953985742061, "grad_norm": 0.8167815208435059, "learning_rate": 3.5416826616868938e-06, "loss": 0.1005, "step": 7079 }, { "epoch": 1.1471160077770577, "grad_norm": 0.881354570388794, "learning_rate": 3.5412851116571673e-06, "loss": 0.1099, "step": 7080 }, { "epoch": 1.1472780298120544, "grad_norm": 0.9158351421356201, "learning_rate": 3.5408875297666366e-06, "loss": 0.1125, "step": 7081 }, { "epoch": 1.1474400518470511, "grad_norm": 0.8541279435157776, "learning_rate": 3.5404899160274664e-06, "loss": 0.0947, "step": 7082 }, { "epoch": 1.147602073882048, "grad_norm": 0.9512801170349121, "learning_rate": 3.5400922704518225e-06, "loss": 0.1134, "step": 7083 }, { "epoch": 1.1477640959170448, "grad_norm": 0.8026208281517029, "learning_rate": 3.5396945930518722e-06, "loss": 0.1004, "step": 7084 }, { "epoch": 1.1479261179520415, "grad_norm": 0.8425459861755371, "learning_rate": 3.5392968838397836e-06, "loss": 0.1065, "step": 7085 }, { "epoch": 1.1480881399870382, "grad_norm": 0.9077960252761841, "learning_rate": 3.538899142827726e-06, "loss": 0.1203, "step": 7086 }, { "epoch": 1.148250162022035, "grad_norm": 0.8521792888641357, "learning_rate": 3.538501370027867e-06, "loss": 0.1073, "step": 7087 }, { "epoch": 1.1484121840570318, "grad_norm": 0.7184135913848877, "learning_rate": 3.538103565452381e-06, "loss": 0.0894, "step": 7088 }, { "epoch": 1.1485742060920285, "grad_norm": 0.8005532622337341, "learning_rate": 3.537705729113437e-06, "loss": 0.1096, "step": 7089 }, { "epoch": 1.1487362281270252, "grad_norm": 0.8464052081108093, "learning_rate": 3.537307861023209e-06, "loss": 0.1035, "step": 7090 }, { "epoch": 1.148898250162022, "grad_norm": 0.7864227890968323, "learning_rate": 3.536909961193872e-06, "loss": 0.0955, "step": 7091 }, { "epoch": 1.1490602721970187, "grad_norm": 0.923495352268219, "learning_rate": 3.536512029637597e-06, "loss": 0.1104, "step": 7092 }, { "epoch": 1.1492222942320156, "grad_norm": 0.732354998588562, "learning_rate": 3.5361140663665644e-06, "loss": 0.0921, "step": 7093 }, { "epoch": 1.1493843162670123, "grad_norm": 0.8492215871810913, "learning_rate": 3.5357160713929473e-06, "loss": 0.1059, "step": 7094 }, { "epoch": 1.149546338302009, "grad_norm": 0.899074137210846, "learning_rate": 3.5353180447289253e-06, "loss": 0.1078, "step": 7095 }, { "epoch": 1.1497083603370057, "grad_norm": 0.8724895119667053, "learning_rate": 3.534919986386676e-06, "loss": 0.1126, "step": 7096 }, { "epoch": 1.1498703823720027, "grad_norm": 0.9067556262016296, "learning_rate": 3.534521896378381e-06, "loss": 0.1107, "step": 7097 }, { "epoch": 1.1500324044069994, "grad_norm": 0.8136131763458252, "learning_rate": 3.5341237747162183e-06, "loss": 0.1047, "step": 7098 }, { "epoch": 1.150194426441996, "grad_norm": 0.8796071410179138, "learning_rate": 3.533725621412371e-06, "loss": 0.1117, "step": 7099 }, { "epoch": 1.1503564484769928, "grad_norm": 0.8252900242805481, "learning_rate": 3.533327436479021e-06, "loss": 0.1106, "step": 7100 }, { "epoch": 1.1505184705119897, "grad_norm": 0.8371667861938477, "learning_rate": 3.5329292199283522e-06, "loss": 0.1012, "step": 7101 }, { "epoch": 1.1506804925469865, "grad_norm": 0.8886693716049194, "learning_rate": 3.53253097177255e-06, "loss": 0.1118, "step": 7102 }, { "epoch": 1.1508425145819832, "grad_norm": 0.8799384832382202, "learning_rate": 3.532132692023797e-06, "loss": 0.114, "step": 7103 }, { "epoch": 1.1510045366169799, "grad_norm": 0.815346896648407, "learning_rate": 3.531734380694282e-06, "loss": 0.1071, "step": 7104 }, { "epoch": 1.1511665586519766, "grad_norm": 0.8964957594871521, "learning_rate": 3.531336037796192e-06, "loss": 0.1094, "step": 7105 }, { "epoch": 1.1513285806869735, "grad_norm": 0.8892197012901306, "learning_rate": 3.5309376633417146e-06, "loss": 0.1091, "step": 7106 }, { "epoch": 1.1514906027219702, "grad_norm": 0.9133774042129517, "learning_rate": 3.5305392573430398e-06, "loss": 0.1154, "step": 7107 }, { "epoch": 1.151652624756967, "grad_norm": 1.0447365045547485, "learning_rate": 3.530140819812357e-06, "loss": 0.1256, "step": 7108 }, { "epoch": 1.1518146467919637, "grad_norm": 0.8976051807403564, "learning_rate": 3.5297423507618582e-06, "loss": 0.107, "step": 7109 }, { "epoch": 1.1519766688269604, "grad_norm": 0.7730125784873962, "learning_rate": 3.5293438502037363e-06, "loss": 0.0979, "step": 7110 }, { "epoch": 1.1521386908619573, "grad_norm": 0.9154132008552551, "learning_rate": 3.5289453181501832e-06, "loss": 0.1154, "step": 7111 }, { "epoch": 1.152300712896954, "grad_norm": 0.731338620185852, "learning_rate": 3.5285467546133926e-06, "loss": 0.0924, "step": 7112 }, { "epoch": 1.1524627349319507, "grad_norm": 0.8675764203071594, "learning_rate": 3.5281481596055613e-06, "loss": 0.1181, "step": 7113 }, { "epoch": 1.1526247569669474, "grad_norm": 0.7357743382453918, "learning_rate": 3.5277495331388835e-06, "loss": 0.0895, "step": 7114 }, { "epoch": 1.1527867790019442, "grad_norm": 0.8133664131164551, "learning_rate": 3.527350875225558e-06, "loss": 0.1026, "step": 7115 }, { "epoch": 1.152948801036941, "grad_norm": 0.8403448462486267, "learning_rate": 3.526952185877781e-06, "loss": 0.1002, "step": 7116 }, { "epoch": 1.1531108230719378, "grad_norm": 0.7883726358413696, "learning_rate": 3.5265534651077527e-06, "loss": 0.1038, "step": 7117 }, { "epoch": 1.1532728451069345, "grad_norm": 0.9349051117897034, "learning_rate": 3.526154712927672e-06, "loss": 0.1239, "step": 7118 }, { "epoch": 1.1534348671419312, "grad_norm": 0.8447469472885132, "learning_rate": 3.525755929349741e-06, "loss": 0.1049, "step": 7119 }, { "epoch": 1.1535968891769282, "grad_norm": 0.8494859933853149, "learning_rate": 3.525357114386161e-06, "loss": 0.1031, "step": 7120 }, { "epoch": 1.1537589112119249, "grad_norm": 0.7881488800048828, "learning_rate": 3.5249582680491346e-06, "loss": 0.102, "step": 7121 }, { "epoch": 1.1539209332469216, "grad_norm": 0.7983292937278748, "learning_rate": 3.524559390350865e-06, "loss": 0.0992, "step": 7122 }, { "epoch": 1.1540829552819183, "grad_norm": 0.8363736867904663, "learning_rate": 3.5241604813035577e-06, "loss": 0.1017, "step": 7123 }, { "epoch": 1.1542449773169152, "grad_norm": 0.9091150760650635, "learning_rate": 3.523761540919418e-06, "loss": 0.116, "step": 7124 }, { "epoch": 1.154406999351912, "grad_norm": 0.8165498971939087, "learning_rate": 3.5233625692106525e-06, "loss": 0.1047, "step": 7125 }, { "epoch": 1.1545690213869086, "grad_norm": 0.7147705554962158, "learning_rate": 3.5229635661894696e-06, "loss": 0.0823, "step": 7126 }, { "epoch": 1.1547310434219054, "grad_norm": 0.7142750024795532, "learning_rate": 3.5225645318680768e-06, "loss": 0.0944, "step": 7127 }, { "epoch": 1.154893065456902, "grad_norm": 0.795220673084259, "learning_rate": 3.5221654662586837e-06, "loss": 0.0923, "step": 7128 }, { "epoch": 1.155055087491899, "grad_norm": 0.7979345321655273, "learning_rate": 3.5217663693735006e-06, "loss": 0.0994, "step": 7129 }, { "epoch": 1.1552171095268957, "grad_norm": 0.856704592704773, "learning_rate": 3.521367241224739e-06, "loss": 0.1009, "step": 7130 }, { "epoch": 1.1553791315618924, "grad_norm": 0.7614887356758118, "learning_rate": 3.5209680818246124e-06, "loss": 0.0994, "step": 7131 }, { "epoch": 1.1555411535968891, "grad_norm": 0.8549346923828125, "learning_rate": 3.5205688911853326e-06, "loss": 0.098, "step": 7132 }, { "epoch": 1.1557031756318858, "grad_norm": 0.7560814619064331, "learning_rate": 3.520169669319115e-06, "loss": 0.0975, "step": 7133 }, { "epoch": 1.1558651976668828, "grad_norm": 0.754621148109436, "learning_rate": 3.5197704162381742e-06, "loss": 0.0918, "step": 7134 }, { "epoch": 1.1560272197018795, "grad_norm": 0.878743588924408, "learning_rate": 3.5193711319547257e-06, "loss": 0.1169, "step": 7135 }, { "epoch": 1.1561892417368762, "grad_norm": 0.7919618487358093, "learning_rate": 3.5189718164809884e-06, "loss": 0.1018, "step": 7136 }, { "epoch": 1.156351263771873, "grad_norm": 0.8777773976325989, "learning_rate": 3.5185724698291784e-06, "loss": 0.1095, "step": 7137 }, { "epoch": 1.1565132858068696, "grad_norm": 0.9273558855056763, "learning_rate": 3.5181730920115165e-06, "loss": 0.1212, "step": 7138 }, { "epoch": 1.1566753078418666, "grad_norm": 0.7581186890602112, "learning_rate": 3.5177736830402213e-06, "loss": 0.1012, "step": 7139 }, { "epoch": 1.1568373298768633, "grad_norm": 0.8174877762794495, "learning_rate": 3.517374242927514e-06, "loss": 0.107, "step": 7140 }, { "epoch": 1.15699935191186, "grad_norm": 0.8098114132881165, "learning_rate": 3.5169747716856186e-06, "loss": 0.1038, "step": 7141 }, { "epoch": 1.1571613739468567, "grad_norm": 0.8155732154846191, "learning_rate": 3.516575269326755e-06, "loss": 0.0981, "step": 7142 }, { "epoch": 1.1573233959818534, "grad_norm": 0.8303235769271851, "learning_rate": 3.5161757358631482e-06, "loss": 0.1058, "step": 7143 }, { "epoch": 1.1574854180168503, "grad_norm": 0.7622935771942139, "learning_rate": 3.515776171307023e-06, "loss": 0.0971, "step": 7144 }, { "epoch": 1.157647440051847, "grad_norm": 0.7291949391365051, "learning_rate": 3.5153765756706054e-06, "loss": 0.0866, "step": 7145 }, { "epoch": 1.1578094620868438, "grad_norm": 0.7386295199394226, "learning_rate": 3.5149769489661216e-06, "loss": 0.095, "step": 7146 }, { "epoch": 1.1579714841218405, "grad_norm": 0.7064818143844604, "learning_rate": 3.514577291205799e-06, "loss": 0.0891, "step": 7147 }, { "epoch": 1.1581335061568374, "grad_norm": 0.6918752193450928, "learning_rate": 3.5141776024018676e-06, "loss": 0.0936, "step": 7148 }, { "epoch": 1.1582955281918341, "grad_norm": 0.750155508518219, "learning_rate": 3.5137778825665542e-06, "loss": 0.0925, "step": 7149 }, { "epoch": 1.1584575502268308, "grad_norm": 0.7644116878509521, "learning_rate": 3.513378131712092e-06, "loss": 0.0917, "step": 7150 }, { "epoch": 1.1586195722618275, "grad_norm": 0.8276258707046509, "learning_rate": 3.5129783498507114e-06, "loss": 0.0993, "step": 7151 }, { "epoch": 1.1587815942968245, "grad_norm": 0.9577162265777588, "learning_rate": 3.5125785369946442e-06, "loss": 0.1086, "step": 7152 }, { "epoch": 1.1589436163318212, "grad_norm": 0.9136195182800293, "learning_rate": 3.512178693156124e-06, "loss": 0.1052, "step": 7153 }, { "epoch": 1.159105638366818, "grad_norm": 0.9026337265968323, "learning_rate": 3.5117788183473856e-06, "loss": 0.1135, "step": 7154 }, { "epoch": 1.1592676604018146, "grad_norm": 0.836702823638916, "learning_rate": 3.511378912580664e-06, "loss": 0.106, "step": 7155 }, { "epoch": 1.1594296824368113, "grad_norm": 0.7680188417434692, "learning_rate": 3.5109789758681944e-06, "loss": 0.0917, "step": 7156 }, { "epoch": 1.1595917044718083, "grad_norm": 0.9207019209861755, "learning_rate": 3.5105790082222157e-06, "loss": 0.1087, "step": 7157 }, { "epoch": 1.159753726506805, "grad_norm": 0.7964971661567688, "learning_rate": 3.5101790096549643e-06, "loss": 0.0916, "step": 7158 }, { "epoch": 1.1599157485418017, "grad_norm": 0.8500689268112183, "learning_rate": 3.5097789801786797e-06, "loss": 0.1081, "step": 7159 }, { "epoch": 1.1600777705767984, "grad_norm": 0.8392435908317566, "learning_rate": 3.509378919805602e-06, "loss": 0.095, "step": 7160 }, { "epoch": 1.160239792611795, "grad_norm": 0.7941946983337402, "learning_rate": 3.508978828547972e-06, "loss": 0.0961, "step": 7161 }, { "epoch": 1.160401814646792, "grad_norm": 0.800559937953949, "learning_rate": 3.5085787064180317e-06, "loss": 0.1083, "step": 7162 }, { "epoch": 1.1605638366817888, "grad_norm": 0.8886862993240356, "learning_rate": 3.5081785534280233e-06, "loss": 0.1146, "step": 7163 }, { "epoch": 1.1607258587167855, "grad_norm": 0.8028414249420166, "learning_rate": 3.5077783695901917e-06, "loss": 0.1006, "step": 7164 }, { "epoch": 1.1608878807517822, "grad_norm": 0.7642043828964233, "learning_rate": 3.507378154916781e-06, "loss": 0.0978, "step": 7165 }, { "epoch": 1.1610499027867789, "grad_norm": 0.7976152896881104, "learning_rate": 3.506977909420035e-06, "loss": 0.0988, "step": 7166 }, { "epoch": 1.1612119248217758, "grad_norm": 0.8192991018295288, "learning_rate": 3.5065776331122027e-06, "loss": 0.1036, "step": 7167 }, { "epoch": 1.1613739468567725, "grad_norm": 0.7200789451599121, "learning_rate": 3.506177326005531e-06, "loss": 0.0982, "step": 7168 }, { "epoch": 1.1615359688917692, "grad_norm": 0.7286558151245117, "learning_rate": 3.5057769881122674e-06, "loss": 0.1023, "step": 7169 }, { "epoch": 1.161697990926766, "grad_norm": 0.9604185223579407, "learning_rate": 3.5053766194446626e-06, "loss": 0.1162, "step": 7170 }, { "epoch": 1.161860012961763, "grad_norm": 0.8336222171783447, "learning_rate": 3.504976220014965e-06, "loss": 0.098, "step": 7171 }, { "epoch": 1.1620220349967596, "grad_norm": 0.7740049362182617, "learning_rate": 3.504575789835428e-06, "loss": 0.1082, "step": 7172 }, { "epoch": 1.1621840570317563, "grad_norm": 0.8649178147315979, "learning_rate": 3.5041753289183033e-06, "loss": 0.107, "step": 7173 }, { "epoch": 1.162346079066753, "grad_norm": 0.8420133590698242, "learning_rate": 3.503774837275843e-06, "loss": 0.1032, "step": 7174 }, { "epoch": 1.16250810110175, "grad_norm": 0.8649719953536987, "learning_rate": 3.5033743149203013e-06, "loss": 0.1087, "step": 7175 }, { "epoch": 1.1626701231367467, "grad_norm": 0.8566991686820984, "learning_rate": 3.5029737618639344e-06, "loss": 0.0972, "step": 7176 }, { "epoch": 1.1628321451717434, "grad_norm": 0.7619419097900391, "learning_rate": 3.5025731781189976e-06, "loss": 0.0886, "step": 7177 }, { "epoch": 1.16299416720674, "grad_norm": 0.7711379528045654, "learning_rate": 3.5021725636977466e-06, "loss": 0.0956, "step": 7178 }, { "epoch": 1.1631561892417368, "grad_norm": 0.9927428364753723, "learning_rate": 3.501771918612442e-06, "loss": 0.1124, "step": 7179 }, { "epoch": 1.1633182112767337, "grad_norm": 0.776023268699646, "learning_rate": 3.5013712428753392e-06, "loss": 0.0964, "step": 7180 }, { "epoch": 1.1634802333117304, "grad_norm": 0.9973410367965698, "learning_rate": 3.500970536498701e-06, "loss": 0.1263, "step": 7181 }, { "epoch": 1.1636422553467272, "grad_norm": 0.8526068329811096, "learning_rate": 3.500569799494786e-06, "loss": 0.1016, "step": 7182 }, { "epoch": 1.1638042773817239, "grad_norm": 0.8753652572631836, "learning_rate": 3.500169031875857e-06, "loss": 0.0948, "step": 7183 }, { "epoch": 1.1639662994167206, "grad_norm": 0.8186274170875549, "learning_rate": 3.4997682336541756e-06, "loss": 0.1025, "step": 7184 }, { "epoch": 1.1641283214517175, "grad_norm": 0.7919983267784119, "learning_rate": 3.4993674048420056e-06, "loss": 0.1053, "step": 7185 }, { "epoch": 1.1642903434867142, "grad_norm": 0.8245944976806641, "learning_rate": 3.498966545451612e-06, "loss": 0.0985, "step": 7186 }, { "epoch": 1.164452365521711, "grad_norm": 0.8587673902511597, "learning_rate": 3.4985656554952583e-06, "loss": 0.1062, "step": 7187 }, { "epoch": 1.1646143875567077, "grad_norm": 0.9467253088951111, "learning_rate": 3.4981647349852137e-06, "loss": 0.1154, "step": 7188 }, { "epoch": 1.1647764095917044, "grad_norm": 0.8093448877334595, "learning_rate": 3.497763783933743e-06, "loss": 0.1013, "step": 7189 }, { "epoch": 1.1649384316267013, "grad_norm": 0.8070668578147888, "learning_rate": 3.4973628023531146e-06, "loss": 0.0967, "step": 7190 }, { "epoch": 1.165100453661698, "grad_norm": 0.8548862934112549, "learning_rate": 3.4969617902555984e-06, "loss": 0.0957, "step": 7191 }, { "epoch": 1.1652624756966947, "grad_norm": 0.8574987649917603, "learning_rate": 3.496560747653464e-06, "loss": 0.0881, "step": 7192 }, { "epoch": 1.1654244977316914, "grad_norm": 0.7991284132003784, "learning_rate": 3.496159674558982e-06, "loss": 0.0937, "step": 7193 }, { "epoch": 1.1655865197666881, "grad_norm": 0.9537588357925415, "learning_rate": 3.4957585709844254e-06, "loss": 0.1166, "step": 7194 }, { "epoch": 1.165748541801685, "grad_norm": 0.7716676592826843, "learning_rate": 3.495357436942066e-06, "loss": 0.0916, "step": 7195 }, { "epoch": 1.1659105638366818, "grad_norm": 0.8516098856925964, "learning_rate": 3.494956272444177e-06, "loss": 0.1054, "step": 7196 }, { "epoch": 1.1660725858716785, "grad_norm": 0.7702400088310242, "learning_rate": 3.4945550775030346e-06, "loss": 0.0919, "step": 7197 }, { "epoch": 1.1662346079066752, "grad_norm": 0.8230445981025696, "learning_rate": 3.494153852130913e-06, "loss": 0.102, "step": 7198 }, { "epoch": 1.1663966299416721, "grad_norm": 0.8861101269721985, "learning_rate": 3.49375259634009e-06, "loss": 0.1092, "step": 7199 }, { "epoch": 1.1665586519766689, "grad_norm": 0.8347850441932678, "learning_rate": 3.4933513101428416e-06, "loss": 0.1064, "step": 7200 }, { "epoch": 1.1667206740116656, "grad_norm": 0.7879016399383545, "learning_rate": 3.492949993551448e-06, "loss": 0.0927, "step": 7201 }, { "epoch": 1.1668826960466623, "grad_norm": 0.8113725781440735, "learning_rate": 3.4925486465781865e-06, "loss": 0.0954, "step": 7202 }, { "epoch": 1.1670447180816592, "grad_norm": 0.709119439125061, "learning_rate": 3.492147269235339e-06, "loss": 0.092, "step": 7203 }, { "epoch": 1.167206740116656, "grad_norm": 1.0254310369491577, "learning_rate": 3.4917458615351853e-06, "loss": 0.1181, "step": 7204 }, { "epoch": 1.1673687621516526, "grad_norm": 0.7908617258071899, "learning_rate": 3.4913444234900092e-06, "loss": 0.0991, "step": 7205 }, { "epoch": 1.1675307841866494, "grad_norm": 0.9039345979690552, "learning_rate": 3.490942955112092e-06, "loss": 0.1172, "step": 7206 }, { "epoch": 1.167692806221646, "grad_norm": 0.7758670449256897, "learning_rate": 3.4905414564137187e-06, "loss": 0.0894, "step": 7207 }, { "epoch": 1.167854828256643, "grad_norm": 0.9767671823501587, "learning_rate": 3.490139927407174e-06, "loss": 0.1139, "step": 7208 }, { "epoch": 1.1680168502916397, "grad_norm": 1.0379667282104492, "learning_rate": 3.489738368104743e-06, "loss": 0.1256, "step": 7209 }, { "epoch": 1.1681788723266364, "grad_norm": 0.9443620443344116, "learning_rate": 3.4893367785187137e-06, "loss": 0.1217, "step": 7210 }, { "epoch": 1.1683408943616331, "grad_norm": 0.8020828366279602, "learning_rate": 3.488935158661373e-06, "loss": 0.1089, "step": 7211 }, { "epoch": 1.1685029163966298, "grad_norm": 0.8911034464836121, "learning_rate": 3.4885335085450095e-06, "loss": 0.1104, "step": 7212 }, { "epoch": 1.1686649384316268, "grad_norm": 0.8075478672981262, "learning_rate": 3.4881318281819134e-06, "loss": 0.0951, "step": 7213 }, { "epoch": 1.1688269604666235, "grad_norm": 0.9641019701957703, "learning_rate": 3.4877301175843735e-06, "loss": 0.1115, "step": 7214 }, { "epoch": 1.1689889825016202, "grad_norm": 0.8437277674674988, "learning_rate": 3.4873283767646828e-06, "loss": 0.1074, "step": 7215 }, { "epoch": 1.169151004536617, "grad_norm": 0.7787491679191589, "learning_rate": 3.486926605735133e-06, "loss": 0.0975, "step": 7216 }, { "epoch": 1.1693130265716136, "grad_norm": 0.9039609432220459, "learning_rate": 3.486524804508018e-06, "loss": 0.1196, "step": 7217 }, { "epoch": 1.1694750486066106, "grad_norm": 0.8110989928245544, "learning_rate": 3.486122973095631e-06, "loss": 0.0996, "step": 7218 }, { "epoch": 1.1696370706416073, "grad_norm": 0.7526705265045166, "learning_rate": 3.485721111510267e-06, "loss": 0.1038, "step": 7219 }, { "epoch": 1.169799092676604, "grad_norm": 0.9018827676773071, "learning_rate": 3.4853192197642226e-06, "loss": 0.124, "step": 7220 }, { "epoch": 1.1699611147116007, "grad_norm": 0.8486402034759521, "learning_rate": 3.4849172978697942e-06, "loss": 0.1047, "step": 7221 }, { "epoch": 1.1701231367465976, "grad_norm": 0.9394757151603699, "learning_rate": 3.48451534583928e-06, "loss": 0.1075, "step": 7222 }, { "epoch": 1.1702851587815943, "grad_norm": 0.8895528316497803, "learning_rate": 3.4841133636849787e-06, "loss": 0.1223, "step": 7223 }, { "epoch": 1.170447180816591, "grad_norm": 0.8455293774604797, "learning_rate": 3.4837113514191907e-06, "loss": 0.1074, "step": 7224 }, { "epoch": 1.1706092028515878, "grad_norm": 0.9192505478858948, "learning_rate": 3.483309309054216e-06, "loss": 0.1201, "step": 7225 }, { "epoch": 1.1707712248865847, "grad_norm": 0.9902666211128235, "learning_rate": 3.482907236602354e-06, "loss": 0.1139, "step": 7226 }, { "epoch": 1.1709332469215814, "grad_norm": 0.8750571608543396, "learning_rate": 3.4825051340759114e-06, "loss": 0.1164, "step": 7227 }, { "epoch": 1.1710952689565781, "grad_norm": 0.793886125087738, "learning_rate": 3.4821030014871886e-06, "loss": 0.1018, "step": 7228 }, { "epoch": 1.1712572909915748, "grad_norm": 0.824790358543396, "learning_rate": 3.48170083884849e-06, "loss": 0.1069, "step": 7229 }, { "epoch": 1.1714193130265715, "grad_norm": 0.8293957114219666, "learning_rate": 3.481298646172122e-06, "loss": 0.1003, "step": 7230 }, { "epoch": 1.1715813350615685, "grad_norm": 0.8428707718849182, "learning_rate": 3.4808964234703903e-06, "loss": 0.0993, "step": 7231 }, { "epoch": 1.1717433570965652, "grad_norm": 0.8865581154823303, "learning_rate": 3.480494170755602e-06, "loss": 0.1059, "step": 7232 }, { "epoch": 1.171905379131562, "grad_norm": 0.8242313265800476, "learning_rate": 3.4800918880400635e-06, "loss": 0.105, "step": 7233 }, { "epoch": 1.1720674011665586, "grad_norm": 0.843008279800415, "learning_rate": 3.479689575336086e-06, "loss": 0.099, "step": 7234 }, { "epoch": 1.1722294232015553, "grad_norm": 0.7857580780982971, "learning_rate": 3.479287232655978e-06, "loss": 0.0967, "step": 7235 }, { "epoch": 1.1723914452365523, "grad_norm": 0.8480691313743591, "learning_rate": 3.4788848600120507e-06, "loss": 0.0949, "step": 7236 }, { "epoch": 1.172553467271549, "grad_norm": 0.783110499382019, "learning_rate": 3.4784824574166153e-06, "loss": 0.0997, "step": 7237 }, { "epoch": 1.1727154893065457, "grad_norm": 0.8678748607635498, "learning_rate": 3.4780800248819847e-06, "loss": 0.0947, "step": 7238 }, { "epoch": 1.1728775113415424, "grad_norm": 0.8238358497619629, "learning_rate": 3.477677562420472e-06, "loss": 0.1107, "step": 7239 }, { "epoch": 1.173039533376539, "grad_norm": 0.8292809724807739, "learning_rate": 3.4772750700443923e-06, "loss": 0.1047, "step": 7240 }, { "epoch": 1.173201555411536, "grad_norm": 0.8215393424034119, "learning_rate": 3.47687254776606e-06, "loss": 0.0902, "step": 7241 }, { "epoch": 1.1733635774465327, "grad_norm": 0.9231199622154236, "learning_rate": 3.476469995597792e-06, "loss": 0.1207, "step": 7242 }, { "epoch": 1.1735255994815295, "grad_norm": 0.7816325426101685, "learning_rate": 3.476067413551906e-06, "loss": 0.0951, "step": 7243 }, { "epoch": 1.1736876215165262, "grad_norm": 0.8484027981758118, "learning_rate": 3.4756648016407175e-06, "loss": 0.103, "step": 7244 }, { "epoch": 1.173849643551523, "grad_norm": 0.7972317337989807, "learning_rate": 3.475262159876548e-06, "loss": 0.1049, "step": 7245 }, { "epoch": 1.1740116655865198, "grad_norm": 0.8663620948791504, "learning_rate": 3.4748594882717163e-06, "loss": 0.0965, "step": 7246 }, { "epoch": 1.1741736876215165, "grad_norm": 0.9606160521507263, "learning_rate": 3.4744567868385432e-06, "loss": 0.1068, "step": 7247 }, { "epoch": 1.1743357096565132, "grad_norm": 0.9100252389907837, "learning_rate": 3.474054055589351e-06, "loss": 0.1114, "step": 7248 }, { "epoch": 1.17449773169151, "grad_norm": 0.7617812156677246, "learning_rate": 3.473651294536462e-06, "loss": 0.1033, "step": 7249 }, { "epoch": 1.1746597537265069, "grad_norm": 0.7390443086624146, "learning_rate": 3.473248503692199e-06, "loss": 0.0894, "step": 7250 }, { "epoch": 1.1748217757615036, "grad_norm": 0.9591291546821594, "learning_rate": 3.4728456830688873e-06, "loss": 0.1099, "step": 7251 }, { "epoch": 1.1749837977965003, "grad_norm": 0.8032423853874207, "learning_rate": 3.472442832678852e-06, "loss": 0.1055, "step": 7252 }, { "epoch": 1.175145819831497, "grad_norm": 0.8758203387260437, "learning_rate": 3.472039952534419e-06, "loss": 0.107, "step": 7253 }, { "epoch": 1.175307841866494, "grad_norm": 0.8825302124023438, "learning_rate": 3.471637042647916e-06, "loss": 0.1115, "step": 7254 }, { "epoch": 1.1754698639014907, "grad_norm": 0.9467746019363403, "learning_rate": 3.471234103031671e-06, "loss": 0.122, "step": 7255 }, { "epoch": 1.1756318859364874, "grad_norm": 0.8400065302848816, "learning_rate": 3.470831133698013e-06, "loss": 0.1034, "step": 7256 }, { "epoch": 1.175793907971484, "grad_norm": 0.8600103259086609, "learning_rate": 3.4704281346592703e-06, "loss": 0.1021, "step": 7257 }, { "epoch": 1.1759559300064808, "grad_norm": 1.0073620080947876, "learning_rate": 3.470025105927777e-06, "loss": 0.1043, "step": 7258 }, { "epoch": 1.1761179520414777, "grad_norm": 0.8043482303619385, "learning_rate": 3.4696220475158615e-06, "loss": 0.0953, "step": 7259 }, { "epoch": 1.1762799740764744, "grad_norm": 0.9232493042945862, "learning_rate": 3.4692189594358578e-06, "loss": 0.1104, "step": 7260 }, { "epoch": 1.1764419961114712, "grad_norm": 0.8835765719413757, "learning_rate": 3.4688158417000993e-06, "loss": 0.1025, "step": 7261 }, { "epoch": 1.1766040181464679, "grad_norm": 0.9452476501464844, "learning_rate": 3.468412694320921e-06, "loss": 0.1119, "step": 7262 }, { "epoch": 1.1767660401814646, "grad_norm": 0.7869393825531006, "learning_rate": 3.468009517310659e-06, "loss": 0.0965, "step": 7263 }, { "epoch": 1.1769280622164615, "grad_norm": 1.0424165725708008, "learning_rate": 3.467606310681646e-06, "loss": 0.1077, "step": 7264 }, { "epoch": 1.1770900842514582, "grad_norm": 0.8021036982536316, "learning_rate": 3.4672030744462224e-06, "loss": 0.1049, "step": 7265 }, { "epoch": 1.177252106286455, "grad_norm": 0.830315113067627, "learning_rate": 3.4667998086167253e-06, "loss": 0.099, "step": 7266 }, { "epoch": 1.1774141283214516, "grad_norm": 0.93221116065979, "learning_rate": 3.4663965132054943e-06, "loss": 0.1029, "step": 7267 }, { "epoch": 1.1775761503564484, "grad_norm": 0.9032275080680847, "learning_rate": 3.465993188224868e-06, "loss": 0.1074, "step": 7268 }, { "epoch": 1.1777381723914453, "grad_norm": 0.8030032515525818, "learning_rate": 3.465589833687188e-06, "loss": 0.0987, "step": 7269 }, { "epoch": 1.177900194426442, "grad_norm": 0.7802344560623169, "learning_rate": 3.4651864496047952e-06, "loss": 0.0968, "step": 7270 }, { "epoch": 1.1780622164614387, "grad_norm": 0.816124439239502, "learning_rate": 3.4647830359900335e-06, "loss": 0.1089, "step": 7271 }, { "epoch": 1.1782242384964354, "grad_norm": 0.6987655758857727, "learning_rate": 3.464379592855246e-06, "loss": 0.0939, "step": 7272 }, { "epoch": 1.1783862605314324, "grad_norm": 0.9841788411140442, "learning_rate": 3.463976120212776e-06, "loss": 0.1049, "step": 7273 }, { "epoch": 1.178548282566429, "grad_norm": 0.9158380627632141, "learning_rate": 3.4635726180749698e-06, "loss": 0.1152, "step": 7274 }, { "epoch": 1.1787103046014258, "grad_norm": 0.7727981805801392, "learning_rate": 3.4631690864541723e-06, "loss": 0.1027, "step": 7275 }, { "epoch": 1.1788723266364225, "grad_norm": 0.7879157662391663, "learning_rate": 3.4627655253627324e-06, "loss": 0.1041, "step": 7276 }, { "epoch": 1.1790343486714194, "grad_norm": 0.8352596759796143, "learning_rate": 3.4623619348129973e-06, "loss": 0.1104, "step": 7277 }, { "epoch": 1.1791963707064161, "grad_norm": 0.8539366126060486, "learning_rate": 3.461958314817316e-06, "loss": 0.1097, "step": 7278 }, { "epoch": 1.1793583927414129, "grad_norm": 0.8420810699462891, "learning_rate": 3.461554665388038e-06, "loss": 0.111, "step": 7279 }, { "epoch": 1.1795204147764096, "grad_norm": 0.864422082901001, "learning_rate": 3.4611509865375143e-06, "loss": 0.0987, "step": 7280 }, { "epoch": 1.1796824368114063, "grad_norm": 0.9048283100128174, "learning_rate": 3.460747278278096e-06, "loss": 0.1139, "step": 7281 }, { "epoch": 1.1798444588464032, "grad_norm": 0.9229883551597595, "learning_rate": 3.4603435406221356e-06, "loss": 0.1075, "step": 7282 }, { "epoch": 1.1800064808814, "grad_norm": 0.8494004011154175, "learning_rate": 3.4599397735819877e-06, "loss": 0.1002, "step": 7283 }, { "epoch": 1.1801685029163966, "grad_norm": 0.926969051361084, "learning_rate": 3.4595359771700055e-06, "loss": 0.1131, "step": 7284 }, { "epoch": 1.1803305249513933, "grad_norm": 0.9146028757095337, "learning_rate": 3.459132151398544e-06, "loss": 0.1114, "step": 7285 }, { "epoch": 1.18049254698639, "grad_norm": 0.937514066696167, "learning_rate": 3.4587282962799602e-06, "loss": 0.1124, "step": 7286 }, { "epoch": 1.180654569021387, "grad_norm": 0.7594295144081116, "learning_rate": 3.4583244118266107e-06, "loss": 0.0934, "step": 7287 }, { "epoch": 1.1808165910563837, "grad_norm": 0.8070241212844849, "learning_rate": 3.4579204980508525e-06, "loss": 0.0971, "step": 7288 }, { "epoch": 1.1809786130913804, "grad_norm": 0.8179810047149658, "learning_rate": 3.4575165549650463e-06, "loss": 0.0992, "step": 7289 }, { "epoch": 1.1811406351263771, "grad_norm": 0.8048136830329895, "learning_rate": 3.45711258258155e-06, "loss": 0.1008, "step": 7290 }, { "epoch": 1.1813026571613738, "grad_norm": 0.755553662776947, "learning_rate": 3.4567085809127247e-06, "loss": 0.0869, "step": 7291 }, { "epoch": 1.1814646791963708, "grad_norm": 0.9157342314720154, "learning_rate": 3.4563045499709324e-06, "loss": 0.118, "step": 7292 }, { "epoch": 1.1816267012313675, "grad_norm": 1.028659462928772, "learning_rate": 3.455900489768535e-06, "loss": 0.1275, "step": 7293 }, { "epoch": 1.1817887232663642, "grad_norm": 0.9139890074729919, "learning_rate": 3.455496400317896e-06, "loss": 0.1034, "step": 7294 }, { "epoch": 1.181950745301361, "grad_norm": 0.8444374799728394, "learning_rate": 3.455092281631379e-06, "loss": 0.1173, "step": 7295 }, { "epoch": 1.1821127673363578, "grad_norm": 0.8052892088890076, "learning_rate": 3.45468813372135e-06, "loss": 0.097, "step": 7296 }, { "epoch": 1.1822747893713546, "grad_norm": 0.7657241225242615, "learning_rate": 3.4542839566001728e-06, "loss": 0.0919, "step": 7297 }, { "epoch": 1.1824368114063513, "grad_norm": 0.8102367520332336, "learning_rate": 3.453879750280218e-06, "loss": 0.1024, "step": 7298 }, { "epoch": 1.182598833441348, "grad_norm": 0.7548478245735168, "learning_rate": 3.45347551477385e-06, "loss": 0.0999, "step": 7299 }, { "epoch": 1.1827608554763447, "grad_norm": 0.8339917659759521, "learning_rate": 3.4530712500934393e-06, "loss": 0.0953, "step": 7300 }, { "epoch": 1.1829228775113416, "grad_norm": 0.7205444574356079, "learning_rate": 3.4526669562513536e-06, "loss": 0.0898, "step": 7301 }, { "epoch": 1.1830848995463383, "grad_norm": 0.8617227077484131, "learning_rate": 3.4522626332599657e-06, "loss": 0.1154, "step": 7302 }, { "epoch": 1.183246921581335, "grad_norm": 0.7841586470603943, "learning_rate": 3.4518582811316455e-06, "loss": 0.1032, "step": 7303 }, { "epoch": 1.1834089436163318, "grad_norm": 0.9288532733917236, "learning_rate": 3.451453899878765e-06, "loss": 0.1166, "step": 7304 }, { "epoch": 1.1835709656513287, "grad_norm": 0.7976135611534119, "learning_rate": 3.4510494895136975e-06, "loss": 0.1023, "step": 7305 }, { "epoch": 1.1837329876863254, "grad_norm": 0.8904582858085632, "learning_rate": 3.450645050048817e-06, "loss": 0.1189, "step": 7306 }, { "epoch": 1.1838950097213221, "grad_norm": 0.8351088166236877, "learning_rate": 3.450240581496499e-06, "loss": 0.0989, "step": 7307 }, { "epoch": 1.1840570317563188, "grad_norm": 0.8244988322257996, "learning_rate": 3.449836083869118e-06, "loss": 0.1055, "step": 7308 }, { "epoch": 1.1842190537913155, "grad_norm": 0.7345836758613586, "learning_rate": 3.449431557179052e-06, "loss": 0.0927, "step": 7309 }, { "epoch": 1.1843810758263125, "grad_norm": 0.8357600569725037, "learning_rate": 3.449027001438678e-06, "loss": 0.1007, "step": 7310 }, { "epoch": 1.1845430978613092, "grad_norm": 0.7918616533279419, "learning_rate": 3.4486224166603743e-06, "loss": 0.0971, "step": 7311 }, { "epoch": 1.184705119896306, "grad_norm": 0.8694443702697754, "learning_rate": 3.44821780285652e-06, "loss": 0.109, "step": 7312 }, { "epoch": 1.1848671419313026, "grad_norm": 0.8621920943260193, "learning_rate": 3.447813160039496e-06, "loss": 0.106, "step": 7313 }, { "epoch": 1.1850291639662993, "grad_norm": 0.8037292957305908, "learning_rate": 3.4474084882216826e-06, "loss": 0.1025, "step": 7314 }, { "epoch": 1.1851911860012962, "grad_norm": 0.8269169330596924, "learning_rate": 3.447003787415462e-06, "loss": 0.114, "step": 7315 }, { "epoch": 1.185353208036293, "grad_norm": 0.9500218629837036, "learning_rate": 3.4465990576332177e-06, "loss": 0.114, "step": 7316 }, { "epoch": 1.1855152300712897, "grad_norm": 0.9417467713356018, "learning_rate": 3.4461942988873322e-06, "loss": 0.1184, "step": 7317 }, { "epoch": 1.1856772521062864, "grad_norm": 0.8411504030227661, "learning_rate": 3.445789511190192e-06, "loss": 0.1066, "step": 7318 }, { "epoch": 1.185839274141283, "grad_norm": 0.8924190402030945, "learning_rate": 3.44538469455418e-06, "loss": 0.102, "step": 7319 }, { "epoch": 1.18600129617628, "grad_norm": 0.977077305316925, "learning_rate": 3.4449798489916856e-06, "loss": 0.1224, "step": 7320 }, { "epoch": 1.1861633182112767, "grad_norm": 0.949083149433136, "learning_rate": 3.444574974515094e-06, "loss": 0.1256, "step": 7321 }, { "epoch": 1.1863253402462735, "grad_norm": 0.8046411871910095, "learning_rate": 3.444170071136794e-06, "loss": 0.098, "step": 7322 }, { "epoch": 1.1864873622812702, "grad_norm": 0.918962299823761, "learning_rate": 3.443765138869175e-06, "loss": 0.1103, "step": 7323 }, { "epoch": 1.186649384316267, "grad_norm": 0.9088669419288635, "learning_rate": 3.4433601777246263e-06, "loss": 0.1128, "step": 7324 }, { "epoch": 1.1868114063512638, "grad_norm": 0.8256091475486755, "learning_rate": 3.4429551877155396e-06, "loss": 0.1039, "step": 7325 }, { "epoch": 1.1869734283862605, "grad_norm": 0.8927512168884277, "learning_rate": 3.442550168854305e-06, "loss": 0.1106, "step": 7326 }, { "epoch": 1.1871354504212572, "grad_norm": 0.7968646883964539, "learning_rate": 3.442145121153317e-06, "loss": 0.102, "step": 7327 }, { "epoch": 1.1872974724562542, "grad_norm": 0.8282347321510315, "learning_rate": 3.4417400446249684e-06, "loss": 0.1074, "step": 7328 }, { "epoch": 1.1874594944912509, "grad_norm": 0.7620131969451904, "learning_rate": 3.4413349392816526e-06, "loss": 0.0978, "step": 7329 }, { "epoch": 1.1876215165262476, "grad_norm": 0.7769449353218079, "learning_rate": 3.440929805135766e-06, "loss": 0.0909, "step": 7330 }, { "epoch": 1.1877835385612443, "grad_norm": 0.8219037055969238, "learning_rate": 3.440524642199704e-06, "loss": 0.1035, "step": 7331 }, { "epoch": 1.187945560596241, "grad_norm": 0.9052240252494812, "learning_rate": 3.440119450485865e-06, "loss": 0.1266, "step": 7332 }, { "epoch": 1.188107582631238, "grad_norm": 0.8714154958724976, "learning_rate": 3.439714230006645e-06, "loss": 0.1143, "step": 7333 }, { "epoch": 1.1882696046662347, "grad_norm": 0.7879825234413147, "learning_rate": 3.439308980774444e-06, "loss": 0.0973, "step": 7334 }, { "epoch": 1.1884316267012314, "grad_norm": 0.8592246770858765, "learning_rate": 3.4389037028016615e-06, "loss": 0.1139, "step": 7335 }, { "epoch": 1.188593648736228, "grad_norm": 0.7850585579872131, "learning_rate": 3.438498396100697e-06, "loss": 0.1065, "step": 7336 }, { "epoch": 1.1887556707712248, "grad_norm": 0.8654761910438538, "learning_rate": 3.438093060683953e-06, "loss": 0.1017, "step": 7337 }, { "epoch": 1.1889176928062217, "grad_norm": 0.6961141228675842, "learning_rate": 3.4376876965638317e-06, "loss": 0.0819, "step": 7338 }, { "epoch": 1.1890797148412184, "grad_norm": 0.9045152068138123, "learning_rate": 3.4372823037527357e-06, "loss": 0.1117, "step": 7339 }, { "epoch": 1.1892417368762151, "grad_norm": 0.9146777391433716, "learning_rate": 3.4368768822630705e-06, "loss": 0.1137, "step": 7340 }, { "epoch": 1.1894037589112119, "grad_norm": 0.7968778014183044, "learning_rate": 3.4364714321072384e-06, "loss": 0.0937, "step": 7341 }, { "epoch": 1.1895657809462086, "grad_norm": 0.8366844654083252, "learning_rate": 3.4360659532976475e-06, "loss": 0.1011, "step": 7342 }, { "epoch": 1.1897278029812055, "grad_norm": 0.9418715238571167, "learning_rate": 3.4356604458467034e-06, "loss": 0.1189, "step": 7343 }, { "epoch": 1.1898898250162022, "grad_norm": 0.8848615288734436, "learning_rate": 3.435254909766814e-06, "loss": 0.1043, "step": 7344 }, { "epoch": 1.190051847051199, "grad_norm": 0.7343488931655884, "learning_rate": 3.434849345070388e-06, "loss": 0.1001, "step": 7345 }, { "epoch": 1.1902138690861956, "grad_norm": 0.9168867468833923, "learning_rate": 3.4344437517698336e-06, "loss": 0.1032, "step": 7346 }, { "epoch": 1.1903758911211926, "grad_norm": 0.7533358931541443, "learning_rate": 3.4340381298775628e-06, "loss": 0.0936, "step": 7347 }, { "epoch": 1.1905379131561893, "grad_norm": 0.8484786152839661, "learning_rate": 3.433632479405984e-06, "loss": 0.1138, "step": 7348 }, { "epoch": 1.190699935191186, "grad_norm": 0.7753875255584717, "learning_rate": 3.4332268003675117e-06, "loss": 0.0983, "step": 7349 }, { "epoch": 1.1908619572261827, "grad_norm": 0.7216082215309143, "learning_rate": 3.4328210927745577e-06, "loss": 0.0931, "step": 7350 }, { "epoch": 1.1910239792611796, "grad_norm": 0.8276437520980835, "learning_rate": 3.432415356639536e-06, "loss": 0.1084, "step": 7351 }, { "epoch": 1.1911860012961764, "grad_norm": 0.9757489562034607, "learning_rate": 3.4320095919748596e-06, "loss": 0.1231, "step": 7352 }, { "epoch": 1.191348023331173, "grad_norm": 0.760451078414917, "learning_rate": 3.4316037987929456e-06, "loss": 0.0991, "step": 7353 }, { "epoch": 1.1915100453661698, "grad_norm": 0.7524585127830505, "learning_rate": 3.43119797710621e-06, "loss": 0.0946, "step": 7354 }, { "epoch": 1.1916720674011665, "grad_norm": 0.9145233631134033, "learning_rate": 3.4307921269270694e-06, "loss": 0.1226, "step": 7355 }, { "epoch": 1.1918340894361634, "grad_norm": 0.8342117667198181, "learning_rate": 3.4303862482679435e-06, "loss": 0.1062, "step": 7356 }, { "epoch": 1.1919961114711601, "grad_norm": 0.9443584680557251, "learning_rate": 3.429980341141248e-06, "loss": 0.1181, "step": 7357 }, { "epoch": 1.1921581335061568, "grad_norm": 0.7842267155647278, "learning_rate": 3.429574405559406e-06, "loss": 0.1026, "step": 7358 }, { "epoch": 1.1923201555411536, "grad_norm": 0.8545981645584106, "learning_rate": 3.429168441534836e-06, "loss": 0.1103, "step": 7359 }, { "epoch": 1.1924821775761503, "grad_norm": 0.8200758695602417, "learning_rate": 3.4287624490799605e-06, "loss": 0.1054, "step": 7360 }, { "epoch": 1.1926441996111472, "grad_norm": 0.8782671093940735, "learning_rate": 3.428356428207201e-06, "loss": 0.1025, "step": 7361 }, { "epoch": 1.192806221646144, "grad_norm": 0.841168999671936, "learning_rate": 3.4279503789289824e-06, "loss": 0.1002, "step": 7362 }, { "epoch": 1.1929682436811406, "grad_norm": 0.7707106471061707, "learning_rate": 3.427544301257727e-06, "loss": 0.0937, "step": 7363 }, { "epoch": 1.1931302657161373, "grad_norm": 1.0922257900238037, "learning_rate": 3.4271381952058607e-06, "loss": 0.1023, "step": 7364 }, { "epoch": 1.193292287751134, "grad_norm": 0.8869131803512573, "learning_rate": 3.4267320607858094e-06, "loss": 0.1113, "step": 7365 }, { "epoch": 1.193454309786131, "grad_norm": 0.8469724655151367, "learning_rate": 3.42632589801e-06, "loss": 0.1005, "step": 7366 }, { "epoch": 1.1936163318211277, "grad_norm": 0.9287112951278687, "learning_rate": 3.42591970689086e-06, "loss": 0.1235, "step": 7367 }, { "epoch": 1.1937783538561244, "grad_norm": 0.7303425669670105, "learning_rate": 3.425513487440817e-06, "loss": 0.0855, "step": 7368 }, { "epoch": 1.1939403758911211, "grad_norm": 0.7840588688850403, "learning_rate": 3.425107239672301e-06, "loss": 0.1109, "step": 7369 }, { "epoch": 1.1941023979261178, "grad_norm": 0.9029366970062256, "learning_rate": 3.4247009635977425e-06, "loss": 0.1064, "step": 7370 }, { "epoch": 1.1942644199611148, "grad_norm": 0.9053102731704712, "learning_rate": 3.4242946592295724e-06, "loss": 0.1151, "step": 7371 }, { "epoch": 1.1944264419961115, "grad_norm": 0.7992795705795288, "learning_rate": 3.4238883265802215e-06, "loss": 0.1023, "step": 7372 }, { "epoch": 1.1945884640311082, "grad_norm": 0.7910366058349609, "learning_rate": 3.423481965662125e-06, "loss": 0.1009, "step": 7373 }, { "epoch": 1.194750486066105, "grad_norm": 0.756036102771759, "learning_rate": 3.4230755764877133e-06, "loss": 0.0944, "step": 7374 }, { "epoch": 1.1949125081011018, "grad_norm": 0.8976595401763916, "learning_rate": 3.4226691590694244e-06, "loss": 0.1048, "step": 7375 }, { "epoch": 1.1950745301360985, "grad_norm": 0.8526069521903992, "learning_rate": 3.4222627134196917e-06, "loss": 0.1115, "step": 7376 }, { "epoch": 1.1952365521710953, "grad_norm": 0.8503440022468567, "learning_rate": 3.4218562395509513e-06, "loss": 0.1178, "step": 7377 }, { "epoch": 1.195398574206092, "grad_norm": 0.8521484732627869, "learning_rate": 3.4214497374756415e-06, "loss": 0.1027, "step": 7378 }, { "epoch": 1.195560596241089, "grad_norm": 0.8041279315948486, "learning_rate": 3.421043207206199e-06, "loss": 0.0938, "step": 7379 }, { "epoch": 1.1957226182760856, "grad_norm": 0.7933258414268494, "learning_rate": 3.4206366487550637e-06, "loss": 0.0986, "step": 7380 }, { "epoch": 1.1958846403110823, "grad_norm": 0.7659348845481873, "learning_rate": 3.420230062134674e-06, "loss": 0.0948, "step": 7381 }, { "epoch": 1.196046662346079, "grad_norm": 0.766547679901123, "learning_rate": 3.419823447357472e-06, "loss": 0.0984, "step": 7382 }, { "epoch": 1.1962086843810757, "grad_norm": 0.9550457000732422, "learning_rate": 3.419416804435899e-06, "loss": 0.1161, "step": 7383 }, { "epoch": 1.1963707064160727, "grad_norm": 0.8955060243606567, "learning_rate": 3.4190101333823956e-06, "loss": 0.1151, "step": 7384 }, { "epoch": 1.1965327284510694, "grad_norm": 0.8570713996887207, "learning_rate": 3.4186034342094066e-06, "loss": 0.1072, "step": 7385 }, { "epoch": 1.196694750486066, "grad_norm": 0.9379814863204956, "learning_rate": 3.4181967069293754e-06, "loss": 0.1144, "step": 7386 }, { "epoch": 1.1968567725210628, "grad_norm": 0.8272217512130737, "learning_rate": 3.417789951554747e-06, "loss": 0.1065, "step": 7387 }, { "epoch": 1.1970187945560595, "grad_norm": 0.8084843754768372, "learning_rate": 3.417383168097967e-06, "loss": 0.0922, "step": 7388 }, { "epoch": 1.1971808165910565, "grad_norm": 0.894687294960022, "learning_rate": 3.4169763565714824e-06, "loss": 0.1148, "step": 7389 }, { "epoch": 1.1973428386260532, "grad_norm": 0.7344521284103394, "learning_rate": 3.41656951698774e-06, "loss": 0.0919, "step": 7390 }, { "epoch": 1.1975048606610499, "grad_norm": 0.7177636623382568, "learning_rate": 3.416162649359189e-06, "loss": 0.0873, "step": 7391 }, { "epoch": 1.1976668826960466, "grad_norm": 0.8915464282035828, "learning_rate": 3.4157557536982773e-06, "loss": 0.1083, "step": 7392 }, { "epoch": 1.1978289047310433, "grad_norm": 0.8106177449226379, "learning_rate": 3.4153488300174557e-06, "loss": 0.0969, "step": 7393 }, { "epoch": 1.1979909267660402, "grad_norm": 0.8630214929580688, "learning_rate": 3.414941878329175e-06, "loss": 0.1064, "step": 7394 }, { "epoch": 1.198152948801037, "grad_norm": 0.9472280144691467, "learning_rate": 3.4145348986458874e-06, "loss": 0.1136, "step": 7395 }, { "epoch": 1.1983149708360337, "grad_norm": 0.795714795589447, "learning_rate": 3.4141278909800444e-06, "loss": 0.0959, "step": 7396 }, { "epoch": 1.1984769928710304, "grad_norm": 0.8350076675415039, "learning_rate": 3.4137208553441008e-06, "loss": 0.0958, "step": 7397 }, { "epoch": 1.1986390149060273, "grad_norm": 0.9593220353126526, "learning_rate": 3.41331379175051e-06, "loss": 0.1109, "step": 7398 }, { "epoch": 1.198801036941024, "grad_norm": 0.7994752526283264, "learning_rate": 3.4129067002117266e-06, "loss": 0.1018, "step": 7399 }, { "epoch": 1.1989630589760207, "grad_norm": 0.7729980945587158, "learning_rate": 3.4124995807402082e-06, "loss": 0.0983, "step": 7400 }, { "epoch": 1.1991250810110174, "grad_norm": 0.8964594602584839, "learning_rate": 3.412092433348411e-06, "loss": 0.1145, "step": 7401 }, { "epoch": 1.1992871030460144, "grad_norm": 0.8727504014968872, "learning_rate": 3.4116852580487925e-06, "loss": 0.1036, "step": 7402 }, { "epoch": 1.199449125081011, "grad_norm": 0.8404155373573303, "learning_rate": 3.4112780548538097e-06, "loss": 0.101, "step": 7403 }, { "epoch": 1.1996111471160078, "grad_norm": 0.7900668382644653, "learning_rate": 3.4108708237759258e-06, "loss": 0.096, "step": 7404 }, { "epoch": 1.1997731691510045, "grad_norm": 1.0467677116394043, "learning_rate": 3.4104635648275975e-06, "loss": 0.1102, "step": 7405 }, { "epoch": 1.1999351911860012, "grad_norm": 0.7542744874954224, "learning_rate": 3.4100562780212887e-06, "loss": 0.0906, "step": 7406 }, { "epoch": 1.2000972132209982, "grad_norm": 0.898759663105011, "learning_rate": 3.409648963369459e-06, "loss": 0.1133, "step": 7407 }, { "epoch": 1.2002592352559949, "grad_norm": 0.758513867855072, "learning_rate": 3.4092416208845723e-06, "loss": 0.0935, "step": 7408 }, { "epoch": 1.2004212572909916, "grad_norm": 0.8105527758598328, "learning_rate": 3.408834250579093e-06, "loss": 0.1066, "step": 7409 }, { "epoch": 1.2005832793259883, "grad_norm": 0.8187546730041504, "learning_rate": 3.4084268524654847e-06, "loss": 0.1023, "step": 7410 }, { "epoch": 1.200745301360985, "grad_norm": 0.8059993386268616, "learning_rate": 3.4080194265562133e-06, "loss": 0.1112, "step": 7411 }, { "epoch": 1.200907323395982, "grad_norm": 0.8363770246505737, "learning_rate": 3.407611972863744e-06, "loss": 0.1069, "step": 7412 }, { "epoch": 1.2010693454309787, "grad_norm": 0.7790720462799072, "learning_rate": 3.407204491400546e-06, "loss": 0.1021, "step": 7413 }, { "epoch": 1.2012313674659754, "grad_norm": 0.8233287930488586, "learning_rate": 3.406796982179085e-06, "loss": 0.1054, "step": 7414 }, { "epoch": 1.201393389500972, "grad_norm": 0.8368380665779114, "learning_rate": 3.4063894452118313e-06, "loss": 0.1095, "step": 7415 }, { "epoch": 1.2015554115359688, "grad_norm": 0.8810364007949829, "learning_rate": 3.4059818805112534e-06, "loss": 0.1126, "step": 7416 }, { "epoch": 1.2017174335709657, "grad_norm": 0.9108153581619263, "learning_rate": 3.4055742880898223e-06, "loss": 0.1095, "step": 7417 }, { "epoch": 1.2018794556059624, "grad_norm": 0.7670288681983948, "learning_rate": 3.4051666679600105e-06, "loss": 0.0969, "step": 7418 }, { "epoch": 1.2020414776409591, "grad_norm": 0.6589247584342957, "learning_rate": 3.404759020134288e-06, "loss": 0.0842, "step": 7419 }, { "epoch": 1.2022034996759559, "grad_norm": 0.7312752604484558, "learning_rate": 3.40435134462513e-06, "loss": 0.0972, "step": 7420 }, { "epoch": 1.2023655217109526, "grad_norm": 0.8262923359870911, "learning_rate": 3.403943641445008e-06, "loss": 0.1024, "step": 7421 }, { "epoch": 1.2025275437459495, "grad_norm": 0.8193771243095398, "learning_rate": 3.403535910606399e-06, "loss": 0.1016, "step": 7422 }, { "epoch": 1.2026895657809462, "grad_norm": 0.7638645768165588, "learning_rate": 3.4031281521217772e-06, "loss": 0.0947, "step": 7423 }, { "epoch": 1.202851587815943, "grad_norm": 0.9505733847618103, "learning_rate": 3.4027203660036202e-06, "loss": 0.1089, "step": 7424 }, { "epoch": 1.2030136098509396, "grad_norm": 0.7956478595733643, "learning_rate": 3.402312552264404e-06, "loss": 0.0925, "step": 7425 }, { "epoch": 1.2031756318859366, "grad_norm": 0.7707629203796387, "learning_rate": 3.4019047109166077e-06, "loss": 0.0984, "step": 7426 }, { "epoch": 1.2033376539209333, "grad_norm": 0.8056702613830566, "learning_rate": 3.401496841972709e-06, "loss": 0.1076, "step": 7427 }, { "epoch": 1.20349967595593, "grad_norm": 0.7400745749473572, "learning_rate": 3.401088945445189e-06, "loss": 0.0873, "step": 7428 }, { "epoch": 1.2036616979909267, "grad_norm": 0.891022801399231, "learning_rate": 3.400681021346528e-06, "loss": 0.1001, "step": 7429 }, { "epoch": 1.2038237200259236, "grad_norm": 0.8220235705375671, "learning_rate": 3.4002730696892073e-06, "loss": 0.1081, "step": 7430 }, { "epoch": 1.2039857420609203, "grad_norm": 0.8286675810813904, "learning_rate": 3.399865090485709e-06, "loss": 0.1008, "step": 7431 }, { "epoch": 1.204147764095917, "grad_norm": 0.8980154991149902, "learning_rate": 3.3994570837485163e-06, "loss": 0.1087, "step": 7432 }, { "epoch": 1.2043097861309138, "grad_norm": 1.0518361330032349, "learning_rate": 3.3990490494901148e-06, "loss": 0.1222, "step": 7433 }, { "epoch": 1.2044718081659105, "grad_norm": 0.9503573179244995, "learning_rate": 3.3986409877229863e-06, "loss": 0.1152, "step": 7434 }, { "epoch": 1.2046338302009074, "grad_norm": 0.7729112505912781, "learning_rate": 3.3982328984596196e-06, "loss": 0.0995, "step": 7435 }, { "epoch": 1.2047958522359041, "grad_norm": 0.8068438172340393, "learning_rate": 3.3978247817124986e-06, "loss": 0.0985, "step": 7436 }, { "epoch": 1.2049578742709008, "grad_norm": 0.8518000245094299, "learning_rate": 3.3974166374941137e-06, "loss": 0.11, "step": 7437 }, { "epoch": 1.2051198963058976, "grad_norm": 0.9522218704223633, "learning_rate": 3.39700846581695e-06, "loss": 0.1175, "step": 7438 }, { "epoch": 1.2052819183408943, "grad_norm": 0.724781334400177, "learning_rate": 3.396600266693498e-06, "loss": 0.0842, "step": 7439 }, { "epoch": 1.2054439403758912, "grad_norm": 0.7208175659179688, "learning_rate": 3.3961920401362488e-06, "loss": 0.0929, "step": 7440 }, { "epoch": 1.205605962410888, "grad_norm": 0.8319954872131348, "learning_rate": 3.395783786157691e-06, "loss": 0.1001, "step": 7441 }, { "epoch": 1.2057679844458846, "grad_norm": 0.8171935081481934, "learning_rate": 3.3953755047703174e-06, "loss": 0.104, "step": 7442 }, { "epoch": 1.2059300064808813, "grad_norm": 0.7243193984031677, "learning_rate": 3.394967195986619e-06, "loss": 0.0957, "step": 7443 }, { "epoch": 1.206092028515878, "grad_norm": 0.8252148628234863, "learning_rate": 3.394558859819092e-06, "loss": 0.1097, "step": 7444 }, { "epoch": 1.206254050550875, "grad_norm": 0.8779266476631165, "learning_rate": 3.3941504962802273e-06, "loss": 0.1172, "step": 7445 }, { "epoch": 1.2064160725858717, "grad_norm": 0.8700554370880127, "learning_rate": 3.393742105382522e-06, "loss": 0.1084, "step": 7446 }, { "epoch": 1.2065780946208684, "grad_norm": 0.9085803627967834, "learning_rate": 3.393333687138471e-06, "loss": 0.1156, "step": 7447 }, { "epoch": 1.2067401166558651, "grad_norm": 0.938339352607727, "learning_rate": 3.3929252415605708e-06, "loss": 0.1032, "step": 7448 }, { "epoch": 1.206902138690862, "grad_norm": 0.8974847197532654, "learning_rate": 3.392516768661319e-06, "loss": 0.11, "step": 7449 }, { "epoch": 1.2070641607258588, "grad_norm": 0.9363797903060913, "learning_rate": 3.3921082684532143e-06, "loss": 0.112, "step": 7450 }, { "epoch": 1.2072261827608555, "grad_norm": 0.7975560426712036, "learning_rate": 3.3916997409487552e-06, "loss": 0.1048, "step": 7451 }, { "epoch": 1.2073882047958522, "grad_norm": 0.7326340675354004, "learning_rate": 3.391291186160441e-06, "loss": 0.0983, "step": 7452 }, { "epoch": 1.2075502268308491, "grad_norm": 0.8057294487953186, "learning_rate": 3.390882604100775e-06, "loss": 0.0962, "step": 7453 }, { "epoch": 1.2077122488658458, "grad_norm": 0.7411153316497803, "learning_rate": 3.3904739947822556e-06, "loss": 0.0974, "step": 7454 }, { "epoch": 1.2078742709008425, "grad_norm": 0.822479248046875, "learning_rate": 3.3900653582173883e-06, "loss": 0.1059, "step": 7455 }, { "epoch": 1.2080362929358393, "grad_norm": 0.9575856924057007, "learning_rate": 3.3896566944186737e-06, "loss": 0.1204, "step": 7456 }, { "epoch": 1.208198314970836, "grad_norm": 0.7944145798683167, "learning_rate": 3.3892480033986186e-06, "loss": 0.1, "step": 7457 }, { "epoch": 1.208360337005833, "grad_norm": 0.6548885107040405, "learning_rate": 3.388839285169725e-06, "loss": 0.0808, "step": 7458 }, { "epoch": 1.2085223590408296, "grad_norm": 0.9237521886825562, "learning_rate": 3.3884305397445017e-06, "loss": 0.1132, "step": 7459 }, { "epoch": 1.2086843810758263, "grad_norm": 0.8143603205680847, "learning_rate": 3.3880217671354527e-06, "loss": 0.0988, "step": 7460 }, { "epoch": 1.208846403110823, "grad_norm": 0.8373053073883057, "learning_rate": 3.3876129673550873e-06, "loss": 0.1062, "step": 7461 }, { "epoch": 1.2090084251458197, "grad_norm": 0.8747492432594299, "learning_rate": 3.3872041404159124e-06, "loss": 0.0978, "step": 7462 }, { "epoch": 1.2091704471808167, "grad_norm": 0.9876968860626221, "learning_rate": 3.386795286330438e-06, "loss": 0.1106, "step": 7463 }, { "epoch": 1.2093324692158134, "grad_norm": 0.8512849807739258, "learning_rate": 3.3863864051111744e-06, "loss": 0.1119, "step": 7464 }, { "epoch": 1.20949449125081, "grad_norm": 0.816228449344635, "learning_rate": 3.385977496770631e-06, "loss": 0.113, "step": 7465 }, { "epoch": 1.2096565132858068, "grad_norm": 0.8686400055885315, "learning_rate": 3.385568561321321e-06, "loss": 0.1065, "step": 7466 }, { "epoch": 1.2098185353208035, "grad_norm": 0.9017913937568665, "learning_rate": 3.385159598775755e-06, "loss": 0.105, "step": 7467 }, { "epoch": 1.2099805573558005, "grad_norm": 0.8263371586799622, "learning_rate": 3.3847506091464487e-06, "loss": 0.1017, "step": 7468 }, { "epoch": 1.2101425793907972, "grad_norm": 0.8818573355674744, "learning_rate": 3.3843415924459146e-06, "loss": 0.1109, "step": 7469 }, { "epoch": 1.2103046014257939, "grad_norm": 0.8400024771690369, "learning_rate": 3.383932548686667e-06, "loss": 0.1062, "step": 7470 }, { "epoch": 1.2104666234607906, "grad_norm": 0.8324894309043884, "learning_rate": 3.3835234778812232e-06, "loss": 0.1058, "step": 7471 }, { "epoch": 1.2106286454957873, "grad_norm": 0.8616311550140381, "learning_rate": 3.3831143800420983e-06, "loss": 0.1095, "step": 7472 }, { "epoch": 1.2107906675307842, "grad_norm": 0.821205735206604, "learning_rate": 3.3827052551818113e-06, "loss": 0.1111, "step": 7473 }, { "epoch": 1.210952689565781, "grad_norm": 0.794984757900238, "learning_rate": 3.3822961033128793e-06, "loss": 0.1011, "step": 7474 }, { "epoch": 1.2111147116007777, "grad_norm": 0.7533223032951355, "learning_rate": 3.3818869244478214e-06, "loss": 0.0949, "step": 7475 }, { "epoch": 1.2112767336357744, "grad_norm": 0.8010703921318054, "learning_rate": 3.3814777185991577e-06, "loss": 0.1074, "step": 7476 }, { "epoch": 1.2114387556707713, "grad_norm": 0.7717397212982178, "learning_rate": 3.3810684857794097e-06, "loss": 0.0989, "step": 7477 }, { "epoch": 1.211600777705768, "grad_norm": 0.8146265745162964, "learning_rate": 3.380659226001097e-06, "loss": 0.1, "step": 7478 }, { "epoch": 1.2117627997407647, "grad_norm": 0.7482508420944214, "learning_rate": 3.380249939276744e-06, "loss": 0.0988, "step": 7479 }, { "epoch": 1.2119248217757614, "grad_norm": 0.8667011260986328, "learning_rate": 3.3798406256188725e-06, "loss": 0.1127, "step": 7480 }, { "epoch": 1.2120868438107584, "grad_norm": 0.8829639554023743, "learning_rate": 3.379431285040008e-06, "loss": 0.1111, "step": 7481 }, { "epoch": 1.212248865845755, "grad_norm": 0.7957995533943176, "learning_rate": 3.3790219175526733e-06, "loss": 0.1036, "step": 7482 }, { "epoch": 1.2124108878807518, "grad_norm": 0.844648003578186, "learning_rate": 3.3786125231693955e-06, "loss": 0.0985, "step": 7483 }, { "epoch": 1.2125729099157485, "grad_norm": 0.8586723804473877, "learning_rate": 3.3782031019027006e-06, "loss": 0.1066, "step": 7484 }, { "epoch": 1.2127349319507452, "grad_norm": 0.8165646195411682, "learning_rate": 3.3777936537651162e-06, "loss": 0.1031, "step": 7485 }, { "epoch": 1.2128969539857422, "grad_norm": 0.825261652469635, "learning_rate": 3.3773841787691708e-06, "loss": 0.0991, "step": 7486 }, { "epoch": 1.2130589760207389, "grad_norm": 0.8294909000396729, "learning_rate": 3.3769746769273915e-06, "loss": 0.1069, "step": 7487 }, { "epoch": 1.2132209980557356, "grad_norm": 0.8192000389099121, "learning_rate": 3.3765651482523097e-06, "loss": 0.0998, "step": 7488 }, { "epoch": 1.2133830200907323, "grad_norm": 0.7805353999137878, "learning_rate": 3.3761555927564553e-06, "loss": 0.0919, "step": 7489 }, { "epoch": 1.213545042125729, "grad_norm": 0.8784085512161255, "learning_rate": 3.375746010452361e-06, "loss": 0.117, "step": 7490 }, { "epoch": 1.213707064160726, "grad_norm": 0.9832420945167542, "learning_rate": 3.375336401352557e-06, "loss": 0.115, "step": 7491 }, { "epoch": 1.2138690861957226, "grad_norm": 0.8437249064445496, "learning_rate": 3.374926765469578e-06, "loss": 0.1156, "step": 7492 }, { "epoch": 1.2140311082307194, "grad_norm": 0.8112325668334961, "learning_rate": 3.374517102815958e-06, "loss": 0.0974, "step": 7493 }, { "epoch": 1.214193130265716, "grad_norm": 0.8753647208213806, "learning_rate": 3.3741074134042297e-06, "loss": 0.1087, "step": 7494 }, { "epoch": 1.2143551523007128, "grad_norm": 0.9637497067451477, "learning_rate": 3.373697697246931e-06, "loss": 0.1211, "step": 7495 }, { "epoch": 1.2145171743357097, "grad_norm": 0.8705383539199829, "learning_rate": 3.3732879543565955e-06, "loss": 0.1168, "step": 7496 }, { "epoch": 1.2146791963707064, "grad_norm": 0.7167220115661621, "learning_rate": 3.372878184745764e-06, "loss": 0.0967, "step": 7497 }, { "epoch": 1.2148412184057031, "grad_norm": 0.8306147456169128, "learning_rate": 3.3724683884269702e-06, "loss": 0.1044, "step": 7498 }, { "epoch": 1.2150032404406998, "grad_norm": 0.9196276068687439, "learning_rate": 3.3720585654127564e-06, "loss": 0.1258, "step": 7499 }, { "epoch": 1.2151652624756968, "grad_norm": 0.8953840732574463, "learning_rate": 3.37164871571566e-06, "loss": 0.1224, "step": 7500 }, { "epoch": 1.2153272845106935, "grad_norm": 0.8981062173843384, "learning_rate": 3.3712388393482224e-06, "loss": 0.1102, "step": 7501 }, { "epoch": 1.2154893065456902, "grad_norm": 0.7924917340278625, "learning_rate": 3.370828936322985e-06, "loss": 0.1097, "step": 7502 }, { "epoch": 1.215651328580687, "grad_norm": 0.806941568851471, "learning_rate": 3.37041900665249e-06, "loss": 0.104, "step": 7503 }, { "epoch": 1.2158133506156839, "grad_norm": 0.7318122982978821, "learning_rate": 3.3700090503492795e-06, "loss": 0.0876, "step": 7504 }, { "epoch": 1.2159753726506806, "grad_norm": 0.7857891917228699, "learning_rate": 3.369599067425897e-06, "loss": 0.1066, "step": 7505 }, { "epoch": 1.2161373946856773, "grad_norm": 0.7979137897491455, "learning_rate": 3.3691890578948876e-06, "loss": 0.1032, "step": 7506 }, { "epoch": 1.216299416720674, "grad_norm": 0.8069559931755066, "learning_rate": 3.3687790217687966e-06, "loss": 0.1047, "step": 7507 }, { "epoch": 1.2164614387556707, "grad_norm": 0.9257494211196899, "learning_rate": 3.36836895906017e-06, "loss": 0.1059, "step": 7508 }, { "epoch": 1.2166234607906676, "grad_norm": 0.8408833146095276, "learning_rate": 3.367958869781554e-06, "loss": 0.0991, "step": 7509 }, { "epoch": 1.2167854828256643, "grad_norm": 0.7625467777252197, "learning_rate": 3.3675487539454972e-06, "loss": 0.102, "step": 7510 }, { "epoch": 1.216947504860661, "grad_norm": 0.7763059735298157, "learning_rate": 3.367138611564548e-06, "loss": 0.1047, "step": 7511 }, { "epoch": 1.2171095268956578, "grad_norm": 0.872353196144104, "learning_rate": 3.3667284426512565e-06, "loss": 0.1122, "step": 7512 }, { "epoch": 1.2172715489306545, "grad_norm": 0.8491617441177368, "learning_rate": 3.366318247218171e-06, "loss": 0.0998, "step": 7513 }, { "epoch": 1.2174335709656514, "grad_norm": 0.8177269101142883, "learning_rate": 3.3659080252778446e-06, "loss": 0.1037, "step": 7514 }, { "epoch": 1.2175955930006481, "grad_norm": 0.7532639503479004, "learning_rate": 3.3654977768428276e-06, "loss": 0.0947, "step": 7515 }, { "epoch": 1.2177576150356448, "grad_norm": 0.7339494824409485, "learning_rate": 3.365087501925673e-06, "loss": 0.1038, "step": 7516 }, { "epoch": 1.2179196370706415, "grad_norm": 0.9405337572097778, "learning_rate": 3.364677200538935e-06, "loss": 0.1128, "step": 7517 }, { "epoch": 1.2180816591056383, "grad_norm": 0.8983637094497681, "learning_rate": 3.3642668726951657e-06, "loss": 0.1095, "step": 7518 }, { "epoch": 1.2182436811406352, "grad_norm": 0.9077023863792419, "learning_rate": 3.363856518406923e-06, "loss": 0.1199, "step": 7519 }, { "epoch": 1.218405703175632, "grad_norm": 0.8318829536437988, "learning_rate": 3.36344613768676e-06, "loss": 0.0994, "step": 7520 }, { "epoch": 1.2185677252106286, "grad_norm": 0.9089265465736389, "learning_rate": 3.3630357305472363e-06, "loss": 0.1143, "step": 7521 }, { "epoch": 1.2187297472456253, "grad_norm": 0.8133178353309631, "learning_rate": 3.362625297000906e-06, "loss": 0.0978, "step": 7522 }, { "epoch": 1.2188917692806223, "grad_norm": 0.8779920339584351, "learning_rate": 3.3622148370603305e-06, "loss": 0.0998, "step": 7523 }, { "epoch": 1.219053791315619, "grad_norm": 0.8766910433769226, "learning_rate": 3.3618043507380673e-06, "loss": 0.1154, "step": 7524 }, { "epoch": 1.2192158133506157, "grad_norm": 0.8510516285896301, "learning_rate": 3.3613938380466758e-06, "loss": 0.1042, "step": 7525 }, { "epoch": 1.2193778353856124, "grad_norm": 0.8500891327857971, "learning_rate": 3.3609832989987178e-06, "loss": 0.1009, "step": 7526 }, { "epoch": 1.219539857420609, "grad_norm": 0.8215943574905396, "learning_rate": 3.360572733606754e-06, "loss": 0.1003, "step": 7527 }, { "epoch": 1.219701879455606, "grad_norm": 0.8531811833381653, "learning_rate": 3.360162141883348e-06, "loss": 0.1103, "step": 7528 }, { "epoch": 1.2198639014906028, "grad_norm": 0.7764996290206909, "learning_rate": 3.359751523841062e-06, "loss": 0.0973, "step": 7529 }, { "epoch": 1.2200259235255995, "grad_norm": 0.7268168926239014, "learning_rate": 3.3593408794924585e-06, "loss": 0.0937, "step": 7530 }, { "epoch": 1.2201879455605962, "grad_norm": 0.7981647253036499, "learning_rate": 3.358930208850105e-06, "loss": 0.1023, "step": 7531 }, { "epoch": 1.220349967595593, "grad_norm": 0.7587500214576721, "learning_rate": 3.358519511926565e-06, "loss": 0.1031, "step": 7532 }, { "epoch": 1.2205119896305898, "grad_norm": 0.8148356676101685, "learning_rate": 3.358108788734406e-06, "loss": 0.0965, "step": 7533 }, { "epoch": 1.2206740116655865, "grad_norm": 0.7737621665000916, "learning_rate": 3.357698039286194e-06, "loss": 0.0945, "step": 7534 }, { "epoch": 1.2208360337005832, "grad_norm": 0.8707150816917419, "learning_rate": 3.3572872635944982e-06, "loss": 0.1078, "step": 7535 }, { "epoch": 1.22099805573558, "grad_norm": 0.8325858116149902, "learning_rate": 3.356876461671887e-06, "loss": 0.1106, "step": 7536 }, { "epoch": 1.221160077770577, "grad_norm": 0.8393840789794922, "learning_rate": 3.3564656335309293e-06, "loss": 0.1064, "step": 7537 }, { "epoch": 1.2213220998055736, "grad_norm": 0.8071523308753967, "learning_rate": 3.3560547791841957e-06, "loss": 0.1005, "step": 7538 }, { "epoch": 1.2214841218405703, "grad_norm": 0.845231831073761, "learning_rate": 3.3556438986442574e-06, "loss": 0.1026, "step": 7539 }, { "epoch": 1.221646143875567, "grad_norm": 0.8520015478134155, "learning_rate": 3.3552329919236865e-06, "loss": 0.0976, "step": 7540 }, { "epoch": 1.2218081659105637, "grad_norm": 0.9010183215141296, "learning_rate": 3.3548220590350563e-06, "loss": 0.1105, "step": 7541 }, { "epoch": 1.2219701879455607, "grad_norm": 0.7583953142166138, "learning_rate": 3.3544110999909385e-06, "loss": 0.0901, "step": 7542 }, { "epoch": 1.2221322099805574, "grad_norm": 0.7752031683921814, "learning_rate": 3.354000114803909e-06, "loss": 0.099, "step": 7543 }, { "epoch": 1.222294232015554, "grad_norm": 0.8464565873146057, "learning_rate": 3.3535891034865433e-06, "loss": 0.0952, "step": 7544 }, { "epoch": 1.2224562540505508, "grad_norm": 0.8120436668395996, "learning_rate": 3.3531780660514164e-06, "loss": 0.098, "step": 7545 }, { "epoch": 1.2226182760855475, "grad_norm": 0.7880799770355225, "learning_rate": 3.3527670025111046e-06, "loss": 0.1071, "step": 7546 }, { "epoch": 1.2227802981205445, "grad_norm": 0.8038657903671265, "learning_rate": 3.352355912878187e-06, "loss": 0.0996, "step": 7547 }, { "epoch": 1.2229423201555412, "grad_norm": 0.7888135313987732, "learning_rate": 3.3519447971652407e-06, "loss": 0.089, "step": 7548 }, { "epoch": 1.2231043421905379, "grad_norm": 0.7552701830863953, "learning_rate": 3.3515336553848454e-06, "loss": 0.0972, "step": 7549 }, { "epoch": 1.2232663642255346, "grad_norm": 0.720099151134491, "learning_rate": 3.351122487549582e-06, "loss": 0.0838, "step": 7550 }, { "epoch": 1.2234283862605315, "grad_norm": 0.8370726108551025, "learning_rate": 3.350711293672029e-06, "loss": 0.1043, "step": 7551 }, { "epoch": 1.2235904082955282, "grad_norm": 0.8462739586830139, "learning_rate": 3.3503000737647696e-06, "loss": 0.1001, "step": 7552 }, { "epoch": 1.223752430330525, "grad_norm": 0.8034543991088867, "learning_rate": 3.349888827840385e-06, "loss": 0.0989, "step": 7553 }, { "epoch": 1.2239144523655217, "grad_norm": 0.9257510304450989, "learning_rate": 3.349477555911459e-06, "loss": 0.1208, "step": 7554 }, { "epoch": 1.2240764744005186, "grad_norm": 0.79314786195755, "learning_rate": 3.349066257990576e-06, "loss": 0.0993, "step": 7555 }, { "epoch": 1.2242384964355153, "grad_norm": 0.8348277807235718, "learning_rate": 3.3486549340903196e-06, "loss": 0.096, "step": 7556 }, { "epoch": 1.224400518470512, "grad_norm": 0.7744476199150085, "learning_rate": 3.3482435842232763e-06, "loss": 0.1027, "step": 7557 }, { "epoch": 1.2245625405055087, "grad_norm": 0.9177954792976379, "learning_rate": 3.3478322084020322e-06, "loss": 0.1159, "step": 7558 }, { "epoch": 1.2247245625405054, "grad_norm": 0.8459001779556274, "learning_rate": 3.3474208066391747e-06, "loss": 0.0988, "step": 7559 }, { "epoch": 1.2248865845755024, "grad_norm": 0.8714538812637329, "learning_rate": 3.34700937894729e-06, "loss": 0.1032, "step": 7560 }, { "epoch": 1.225048606610499, "grad_norm": 0.887595534324646, "learning_rate": 3.3465979253389685e-06, "loss": 0.1079, "step": 7561 }, { "epoch": 1.2252106286454958, "grad_norm": 0.8663240075111389, "learning_rate": 3.3461864458267996e-06, "loss": 0.1049, "step": 7562 }, { "epoch": 1.2253726506804925, "grad_norm": 0.7935128211975098, "learning_rate": 3.3457749404233724e-06, "loss": 0.1011, "step": 7563 }, { "epoch": 1.2255346727154892, "grad_norm": 0.8313045501708984, "learning_rate": 3.3453634091412795e-06, "loss": 0.093, "step": 7564 }, { "epoch": 1.2256966947504861, "grad_norm": 0.8548048138618469, "learning_rate": 3.3449518519931123e-06, "loss": 0.1095, "step": 7565 }, { "epoch": 1.2258587167854829, "grad_norm": 0.8249443173408508, "learning_rate": 3.344540268991462e-06, "loss": 0.1114, "step": 7566 }, { "epoch": 1.2260207388204796, "grad_norm": 0.850307285785675, "learning_rate": 3.344128660148924e-06, "loss": 0.1191, "step": 7567 }, { "epoch": 1.2261827608554763, "grad_norm": 0.9916208386421204, "learning_rate": 3.343717025478092e-06, "loss": 0.1222, "step": 7568 }, { "epoch": 1.226344782890473, "grad_norm": 0.825520932674408, "learning_rate": 3.3433053649915603e-06, "loss": 0.1055, "step": 7569 }, { "epoch": 1.22650680492547, "grad_norm": 0.7133052349090576, "learning_rate": 3.342893678701925e-06, "loss": 0.0932, "step": 7570 }, { "epoch": 1.2266688269604666, "grad_norm": 0.7931620478630066, "learning_rate": 3.3424819666217834e-06, "loss": 0.0977, "step": 7571 }, { "epoch": 1.2268308489954634, "grad_norm": 0.80473393201828, "learning_rate": 3.3420702287637325e-06, "loss": 0.106, "step": 7572 }, { "epoch": 1.22699287103046, "grad_norm": 1.0347249507904053, "learning_rate": 3.3416584651403696e-06, "loss": 0.132, "step": 7573 }, { "epoch": 1.227154893065457, "grad_norm": 0.8027249574661255, "learning_rate": 3.341246675764295e-06, "loss": 0.1066, "step": 7574 }, { "epoch": 1.2273169151004537, "grad_norm": 0.7861801385879517, "learning_rate": 3.3408348606481078e-06, "loss": 0.0994, "step": 7575 }, { "epoch": 1.2274789371354504, "grad_norm": 0.943520188331604, "learning_rate": 3.3404230198044085e-06, "loss": 0.1147, "step": 7576 }, { "epoch": 1.2276409591704471, "grad_norm": 0.9658901691436768, "learning_rate": 3.340011153245799e-06, "loss": 0.1152, "step": 7577 }, { "epoch": 1.2278029812054438, "grad_norm": 0.7559435367584229, "learning_rate": 3.3395992609848804e-06, "loss": 0.0998, "step": 7578 }, { "epoch": 1.2279650032404408, "grad_norm": 0.8726533651351929, "learning_rate": 3.339187343034257e-06, "loss": 0.1108, "step": 7579 }, { "epoch": 1.2281270252754375, "grad_norm": 0.7859377264976501, "learning_rate": 3.338775399406531e-06, "loss": 0.102, "step": 7580 }, { "epoch": 1.2282890473104342, "grad_norm": 0.8078779578208923, "learning_rate": 3.338363430114308e-06, "loss": 0.0931, "step": 7581 }, { "epoch": 1.228451069345431, "grad_norm": 0.9209697842597961, "learning_rate": 3.3379514351701924e-06, "loss": 0.112, "step": 7582 }, { "epoch": 1.2286130913804278, "grad_norm": 0.8043226599693298, "learning_rate": 3.3375394145867912e-06, "loss": 0.0952, "step": 7583 }, { "epoch": 1.2287751134154246, "grad_norm": 0.8135362267494202, "learning_rate": 3.3371273683767102e-06, "loss": 0.1067, "step": 7584 }, { "epoch": 1.2289371354504213, "grad_norm": 0.7498790621757507, "learning_rate": 3.336715296552558e-06, "loss": 0.0919, "step": 7585 }, { "epoch": 1.229099157485418, "grad_norm": 0.8408077359199524, "learning_rate": 3.3363031991269423e-06, "loss": 0.101, "step": 7586 }, { "epoch": 1.2292611795204147, "grad_norm": 0.8685369491577148, "learning_rate": 3.3358910761124724e-06, "loss": 0.1081, "step": 7587 }, { "epoch": 1.2294232015554116, "grad_norm": 0.9170325994491577, "learning_rate": 3.3354789275217587e-06, "loss": 0.1099, "step": 7588 }, { "epoch": 1.2295852235904083, "grad_norm": 0.8715429306030273, "learning_rate": 3.3350667533674108e-06, "loss": 0.108, "step": 7589 }, { "epoch": 1.229747245625405, "grad_norm": 0.870263397693634, "learning_rate": 3.3346545536620425e-06, "loss": 0.1057, "step": 7590 }, { "epoch": 1.2299092676604018, "grad_norm": 0.7764174938201904, "learning_rate": 3.334242328418264e-06, "loss": 0.1045, "step": 7591 }, { "epoch": 1.2300712896953985, "grad_norm": 0.8364241719245911, "learning_rate": 3.3338300776486886e-06, "loss": 0.1014, "step": 7592 }, { "epoch": 1.2302333117303954, "grad_norm": 0.8532553315162659, "learning_rate": 3.333417801365931e-06, "loss": 0.1043, "step": 7593 }, { "epoch": 1.2303953337653921, "grad_norm": 0.795436441898346, "learning_rate": 3.3330054995826056e-06, "loss": 0.1042, "step": 7594 }, { "epoch": 1.2305573558003888, "grad_norm": 0.7866604328155518, "learning_rate": 3.332593172311328e-06, "loss": 0.0932, "step": 7595 }, { "epoch": 1.2307193778353855, "grad_norm": 0.8465556502342224, "learning_rate": 3.3321808195647144e-06, "loss": 0.1037, "step": 7596 }, { "epoch": 1.2308813998703823, "grad_norm": 0.8134123086929321, "learning_rate": 3.33176844135538e-06, "loss": 0.1038, "step": 7597 }, { "epoch": 1.2310434219053792, "grad_norm": 0.7845098972320557, "learning_rate": 3.3313560376959456e-06, "loss": 0.103, "step": 7598 }, { "epoch": 1.231205443940376, "grad_norm": 0.8044149279594421, "learning_rate": 3.330943608599028e-06, "loss": 0.0938, "step": 7599 }, { "epoch": 1.2313674659753726, "grad_norm": 0.7864116430282593, "learning_rate": 3.3305311540772467e-06, "loss": 0.0998, "step": 7600 }, { "epoch": 1.2315294880103693, "grad_norm": 0.8183262944221497, "learning_rate": 3.3301186741432217e-06, "loss": 0.0995, "step": 7601 }, { "epoch": 1.2316915100453663, "grad_norm": 0.9076040983200073, "learning_rate": 3.3297061688095746e-06, "loss": 0.1081, "step": 7602 }, { "epoch": 1.231853532080363, "grad_norm": 0.6733155250549316, "learning_rate": 3.3292936380889262e-06, "loss": 0.0878, "step": 7603 }, { "epoch": 1.2320155541153597, "grad_norm": 0.8160568475723267, "learning_rate": 3.3288810819938995e-06, "loss": 0.1036, "step": 7604 }, { "epoch": 1.2321775761503564, "grad_norm": 0.757789134979248, "learning_rate": 3.3284685005371176e-06, "loss": 0.0993, "step": 7605 }, { "epoch": 1.2323395981853533, "grad_norm": 0.8649821877479553, "learning_rate": 3.3280558937312037e-06, "loss": 0.1062, "step": 7606 }, { "epoch": 1.23250162022035, "grad_norm": 0.8108959794044495, "learning_rate": 3.3276432615887843e-06, "loss": 0.0959, "step": 7607 }, { "epoch": 1.2326636422553467, "grad_norm": 0.7675016522407532, "learning_rate": 3.327230604122484e-06, "loss": 0.0941, "step": 7608 }, { "epoch": 1.2328256642903435, "grad_norm": 0.8105819225311279, "learning_rate": 3.326817921344928e-06, "loss": 0.1078, "step": 7609 }, { "epoch": 1.2329876863253402, "grad_norm": 0.7653875946998596, "learning_rate": 3.326405213268745e-06, "loss": 0.0968, "step": 7610 }, { "epoch": 1.233149708360337, "grad_norm": 0.7907019257545471, "learning_rate": 3.3259924799065628e-06, "loss": 0.1014, "step": 7611 }, { "epoch": 1.2333117303953338, "grad_norm": 0.9260424971580505, "learning_rate": 3.3255797212710095e-06, "loss": 0.1051, "step": 7612 }, { "epoch": 1.2334737524303305, "grad_norm": 0.8345485925674438, "learning_rate": 3.325166937374714e-06, "loss": 0.1032, "step": 7613 }, { "epoch": 1.2336357744653272, "grad_norm": 0.9643160104751587, "learning_rate": 3.3247541282303082e-06, "loss": 0.1185, "step": 7614 }, { "epoch": 1.233797796500324, "grad_norm": 0.8535207509994507, "learning_rate": 3.3243412938504205e-06, "loss": 0.1094, "step": 7615 }, { "epoch": 1.2339598185353209, "grad_norm": 0.8478245735168457, "learning_rate": 3.3239284342476852e-06, "loss": 0.1035, "step": 7616 }, { "epoch": 1.2341218405703176, "grad_norm": 0.8517696261405945, "learning_rate": 3.3235155494347325e-06, "loss": 0.1113, "step": 7617 }, { "epoch": 1.2342838626053143, "grad_norm": 1.065172791481018, "learning_rate": 3.3231026394241983e-06, "loss": 0.1272, "step": 7618 }, { "epoch": 1.234445884640311, "grad_norm": 0.9062070250511169, "learning_rate": 3.3226897042287145e-06, "loss": 0.1209, "step": 7619 }, { "epoch": 1.2346079066753077, "grad_norm": 0.8547554612159729, "learning_rate": 3.3222767438609166e-06, "loss": 0.1091, "step": 7620 }, { "epoch": 1.2347699287103047, "grad_norm": 0.7908268570899963, "learning_rate": 3.32186375833344e-06, "loss": 0.0999, "step": 7621 }, { "epoch": 1.2349319507453014, "grad_norm": 0.7787481546401978, "learning_rate": 3.321450747658922e-06, "loss": 0.0974, "step": 7622 }, { "epoch": 1.235093972780298, "grad_norm": 0.8876661062240601, "learning_rate": 3.321037711849998e-06, "loss": 0.1232, "step": 7623 }, { "epoch": 1.2352559948152948, "grad_norm": 0.8357555270195007, "learning_rate": 3.3206246509193076e-06, "loss": 0.1095, "step": 7624 }, { "epoch": 1.2354180168502917, "grad_norm": 0.8862370848655701, "learning_rate": 3.320211564879488e-06, "loss": 0.1157, "step": 7625 }, { "epoch": 1.2355800388852884, "grad_norm": 0.835319459438324, "learning_rate": 3.3197984537431797e-06, "loss": 0.1024, "step": 7626 }, { "epoch": 1.2357420609202852, "grad_norm": 0.8422128558158875, "learning_rate": 3.319385317523024e-06, "loss": 0.1071, "step": 7627 }, { "epoch": 1.2359040829552819, "grad_norm": 0.7388719916343689, "learning_rate": 3.3189721562316585e-06, "loss": 0.0847, "step": 7628 }, { "epoch": 1.2360661049902788, "grad_norm": 0.7805131673812866, "learning_rate": 3.318558969881728e-06, "loss": 0.0995, "step": 7629 }, { "epoch": 1.2362281270252755, "grad_norm": 1.0341185331344604, "learning_rate": 3.3181457584858736e-06, "loss": 0.1243, "step": 7630 }, { "epoch": 1.2363901490602722, "grad_norm": 0.7121812701225281, "learning_rate": 3.3177325220567385e-06, "loss": 0.0965, "step": 7631 }, { "epoch": 1.236552171095269, "grad_norm": 0.7928653955459595, "learning_rate": 3.3173192606069673e-06, "loss": 0.1043, "step": 7632 }, { "epoch": 1.2367141931302656, "grad_norm": 0.7301590442657471, "learning_rate": 3.316905974149205e-06, "loss": 0.0967, "step": 7633 }, { "epoch": 1.2368762151652626, "grad_norm": 0.9105049967765808, "learning_rate": 3.316492662696097e-06, "loss": 0.1122, "step": 7634 }, { "epoch": 1.2370382372002593, "grad_norm": 0.8258975148200989, "learning_rate": 3.3160793262602882e-06, "loss": 0.1109, "step": 7635 }, { "epoch": 1.237200259235256, "grad_norm": 0.9241932034492493, "learning_rate": 3.3156659648544276e-06, "loss": 0.1102, "step": 7636 }, { "epoch": 1.2373622812702527, "grad_norm": 0.8554189205169678, "learning_rate": 3.315252578491162e-06, "loss": 0.1104, "step": 7637 }, { "epoch": 1.2375243033052494, "grad_norm": 0.9000004529953003, "learning_rate": 3.314839167183141e-06, "loss": 0.1058, "step": 7638 }, { "epoch": 1.2376863253402464, "grad_norm": 0.7197251319885254, "learning_rate": 3.3144257309430127e-06, "loss": 0.0863, "step": 7639 }, { "epoch": 1.237848347375243, "grad_norm": 0.948573887348175, "learning_rate": 3.3140122697834287e-06, "loss": 0.1086, "step": 7640 }, { "epoch": 1.2380103694102398, "grad_norm": 0.8102221488952637, "learning_rate": 3.3135987837170386e-06, "loss": 0.1001, "step": 7641 }, { "epoch": 1.2381723914452365, "grad_norm": 0.7119641304016113, "learning_rate": 3.3131852727564947e-06, "loss": 0.0848, "step": 7642 }, { "epoch": 1.2383344134802332, "grad_norm": 0.8429257869720459, "learning_rate": 3.31277173691445e-06, "loss": 0.1068, "step": 7643 }, { "epoch": 1.2384964355152301, "grad_norm": 0.7876508831977844, "learning_rate": 3.3123581762035557e-06, "loss": 0.0945, "step": 7644 }, { "epoch": 1.2386584575502269, "grad_norm": 0.9862732291221619, "learning_rate": 3.311944590636468e-06, "loss": 0.1212, "step": 7645 }, { "epoch": 1.2388204795852236, "grad_norm": 0.8856245279312134, "learning_rate": 3.31153098022584e-06, "loss": 0.1084, "step": 7646 }, { "epoch": 1.2389825016202203, "grad_norm": 0.9834122657775879, "learning_rate": 3.3111173449843283e-06, "loss": 0.1125, "step": 7647 }, { "epoch": 1.239144523655217, "grad_norm": 0.7890782356262207, "learning_rate": 3.3107036849245883e-06, "loss": 0.0953, "step": 7648 }, { "epoch": 1.239306545690214, "grad_norm": 0.8613621592521667, "learning_rate": 3.310290000059278e-06, "loss": 0.1074, "step": 7649 }, { "epoch": 1.2394685677252106, "grad_norm": 0.8193936944007874, "learning_rate": 3.309876290401054e-06, "loss": 0.1, "step": 7650 }, { "epoch": 1.2396305897602073, "grad_norm": 0.7670435309410095, "learning_rate": 3.309462555962576e-06, "loss": 0.0993, "step": 7651 }, { "epoch": 1.239792611795204, "grad_norm": 0.9082610011100769, "learning_rate": 3.309048796756503e-06, "loss": 0.1142, "step": 7652 }, { "epoch": 1.239954633830201, "grad_norm": 0.9768080115318298, "learning_rate": 3.3086350127954935e-06, "loss": 0.1186, "step": 7653 }, { "epoch": 1.2401166558651977, "grad_norm": 0.7574945092201233, "learning_rate": 3.3082212040922103e-06, "loss": 0.0913, "step": 7654 }, { "epoch": 1.2402786779001944, "grad_norm": 0.8311034440994263, "learning_rate": 3.3078073706593133e-06, "loss": 0.1038, "step": 7655 }, { "epoch": 1.2404406999351911, "grad_norm": 0.7912598252296448, "learning_rate": 3.307393512509466e-06, "loss": 0.1065, "step": 7656 }, { "epoch": 1.240602721970188, "grad_norm": 0.8380460143089294, "learning_rate": 3.3069796296553316e-06, "loss": 0.1073, "step": 7657 }, { "epoch": 1.2407647440051848, "grad_norm": 0.8099128007888794, "learning_rate": 3.3065657221095732e-06, "loss": 0.0964, "step": 7658 }, { "epoch": 1.2409267660401815, "grad_norm": 0.6533287167549133, "learning_rate": 3.306151789884855e-06, "loss": 0.0861, "step": 7659 }, { "epoch": 1.2410887880751782, "grad_norm": 0.6852719187736511, "learning_rate": 3.3057378329938432e-06, "loss": 0.0837, "step": 7660 }, { "epoch": 1.241250810110175, "grad_norm": 0.8555914163589478, "learning_rate": 3.305323851449203e-06, "loss": 0.1047, "step": 7661 }, { "epoch": 1.2414128321451718, "grad_norm": 0.8162693381309509, "learning_rate": 3.304909845263603e-06, "loss": 0.1062, "step": 7662 }, { "epoch": 1.2415748541801686, "grad_norm": 0.8287134766578674, "learning_rate": 3.3044958144497086e-06, "loss": 0.1038, "step": 7663 }, { "epoch": 1.2417368762151653, "grad_norm": 0.7691091299057007, "learning_rate": 3.3040817590201897e-06, "loss": 0.0984, "step": 7664 }, { "epoch": 1.241898898250162, "grad_norm": 0.8376115560531616, "learning_rate": 3.3036676789877146e-06, "loss": 0.1, "step": 7665 }, { "epoch": 1.2420609202851587, "grad_norm": 0.7699323296546936, "learning_rate": 3.303253574364953e-06, "loss": 0.0892, "step": 7666 }, { "epoch": 1.2422229423201556, "grad_norm": 0.8108561038970947, "learning_rate": 3.3028394451645766e-06, "loss": 0.1031, "step": 7667 }, { "epoch": 1.2423849643551523, "grad_norm": 0.8874189257621765, "learning_rate": 3.3024252913992548e-06, "loss": 0.11, "step": 7668 }, { "epoch": 1.242546986390149, "grad_norm": 0.7574527859687805, "learning_rate": 3.3020111130816623e-06, "loss": 0.0989, "step": 7669 }, { "epoch": 1.2427090084251458, "grad_norm": 0.9647727608680725, "learning_rate": 3.3015969102244704e-06, "loss": 0.1051, "step": 7670 }, { "epoch": 1.2428710304601425, "grad_norm": 0.9136444926261902, "learning_rate": 3.3011826828403527e-06, "loss": 0.1138, "step": 7671 }, { "epoch": 1.2430330524951394, "grad_norm": 0.9177752733230591, "learning_rate": 3.300768430941983e-06, "loss": 0.1238, "step": 7672 }, { "epoch": 1.2431950745301361, "grad_norm": 0.7569882869720459, "learning_rate": 3.3003541545420377e-06, "loss": 0.1009, "step": 7673 }, { "epoch": 1.2433570965651328, "grad_norm": 0.7180430889129639, "learning_rate": 3.299939853653192e-06, "loss": 0.0873, "step": 7674 }, { "epoch": 1.2435191186001295, "grad_norm": 0.7186658978462219, "learning_rate": 3.2995255282881227e-06, "loss": 0.0835, "step": 7675 }, { "epoch": 1.2436811406351265, "grad_norm": 0.8784341812133789, "learning_rate": 3.299111178459507e-06, "loss": 0.1175, "step": 7676 }, { "epoch": 1.2438431626701232, "grad_norm": 0.8925139307975769, "learning_rate": 3.2986968041800234e-06, "loss": 0.1186, "step": 7677 }, { "epoch": 1.24400518470512, "grad_norm": 0.8931295275688171, "learning_rate": 3.29828240546235e-06, "loss": 0.1053, "step": 7678 }, { "epoch": 1.2441672067401166, "grad_norm": 0.8283528685569763, "learning_rate": 3.297867982319166e-06, "loss": 0.1039, "step": 7679 }, { "epoch": 1.2443292287751135, "grad_norm": 0.9566637873649597, "learning_rate": 3.297453534763154e-06, "loss": 0.1112, "step": 7680 }, { "epoch": 1.2444912508101102, "grad_norm": 0.8479618430137634, "learning_rate": 3.2970390628069924e-06, "loss": 0.107, "step": 7681 }, { "epoch": 1.244653272845107, "grad_norm": 0.7277421951293945, "learning_rate": 3.2966245664633654e-06, "loss": 0.0957, "step": 7682 }, { "epoch": 1.2448152948801037, "grad_norm": 0.7973564863204956, "learning_rate": 3.296210045744954e-06, "loss": 0.0933, "step": 7683 }, { "epoch": 1.2449773169151004, "grad_norm": 0.8752889037132263, "learning_rate": 3.295795500664442e-06, "loss": 0.1146, "step": 7684 }, { "epoch": 1.2451393389500973, "grad_norm": 0.772816002368927, "learning_rate": 3.295380931234513e-06, "loss": 0.0988, "step": 7685 }, { "epoch": 1.245301360985094, "grad_norm": 1.0557066202163696, "learning_rate": 3.294966337467853e-06, "loss": 0.1175, "step": 7686 }, { "epoch": 1.2454633830200907, "grad_norm": 0.7834747433662415, "learning_rate": 3.294551719377147e-06, "loss": 0.1024, "step": 7687 }, { "epoch": 1.2456254050550875, "grad_norm": 0.8686286807060242, "learning_rate": 3.2941370769750804e-06, "loss": 0.1102, "step": 7688 }, { "epoch": 1.2457874270900842, "grad_norm": 0.8577167391777039, "learning_rate": 3.2937224102743414e-06, "loss": 0.1075, "step": 7689 }, { "epoch": 1.245949449125081, "grad_norm": 0.712014377117157, "learning_rate": 3.293307719287617e-06, "loss": 0.0949, "step": 7690 }, { "epoch": 1.2461114711600778, "grad_norm": 0.7292968034744263, "learning_rate": 3.292893004027597e-06, "loss": 0.0953, "step": 7691 }, { "epoch": 1.2462734931950745, "grad_norm": 0.8187505006790161, "learning_rate": 3.2924782645069684e-06, "loss": 0.0978, "step": 7692 }, { "epoch": 1.2464355152300712, "grad_norm": 0.8897111415863037, "learning_rate": 3.292063500738424e-06, "loss": 0.1111, "step": 7693 }, { "epoch": 1.246597537265068, "grad_norm": 0.806907057762146, "learning_rate": 3.291648712734653e-06, "loss": 0.096, "step": 7694 }, { "epoch": 1.2467595593000649, "grad_norm": 0.8400275111198425, "learning_rate": 3.2912339005083473e-06, "loss": 0.0945, "step": 7695 }, { "epoch": 1.2469215813350616, "grad_norm": 0.8764445185661316, "learning_rate": 3.290819064072198e-06, "loss": 0.1005, "step": 7696 }, { "epoch": 1.2470836033700583, "grad_norm": 0.994835615158081, "learning_rate": 3.2904042034389e-06, "loss": 0.1262, "step": 7697 }, { "epoch": 1.247245625405055, "grad_norm": 0.8427457213401794, "learning_rate": 3.289989318621146e-06, "loss": 0.1038, "step": 7698 }, { "epoch": 1.2474076474400517, "grad_norm": 0.8053460717201233, "learning_rate": 3.289574409631631e-06, "loss": 0.0978, "step": 7699 }, { "epoch": 1.2475696694750487, "grad_norm": 0.9428670406341553, "learning_rate": 3.289159476483049e-06, "loss": 0.1128, "step": 7700 }, { "epoch": 1.2477316915100454, "grad_norm": 0.9878700375556946, "learning_rate": 3.288744519188097e-06, "loss": 0.1245, "step": 7701 }, { "epoch": 1.247893713545042, "grad_norm": 0.84808748960495, "learning_rate": 3.2883295377594716e-06, "loss": 0.0883, "step": 7702 }, { "epoch": 1.2480557355800388, "grad_norm": 0.8799005746841431, "learning_rate": 3.2879145322098694e-06, "loss": 0.1154, "step": 7703 }, { "epoch": 1.2482177576150357, "grad_norm": 0.9856189489364624, "learning_rate": 3.2874995025519897e-06, "loss": 0.115, "step": 7704 }, { "epoch": 1.2483797796500324, "grad_norm": 0.798912525177002, "learning_rate": 3.2870844487985307e-06, "loss": 0.0959, "step": 7705 }, { "epoch": 1.2485418016850292, "grad_norm": 0.8618741035461426, "learning_rate": 3.2866693709621933e-06, "loss": 0.1131, "step": 7706 }, { "epoch": 1.2487038237200259, "grad_norm": 0.8063377737998962, "learning_rate": 3.2862542690556765e-06, "loss": 0.1011, "step": 7707 }, { "epoch": 1.2488658457550228, "grad_norm": 0.7270522713661194, "learning_rate": 3.285839143091681e-06, "loss": 0.098, "step": 7708 }, { "epoch": 1.2490278677900195, "grad_norm": 0.7814246416091919, "learning_rate": 3.2854239930829097e-06, "loss": 0.098, "step": 7709 }, { "epoch": 1.2491898898250162, "grad_norm": 0.7192318439483643, "learning_rate": 3.2850088190420647e-06, "loss": 0.0922, "step": 7710 }, { "epoch": 1.249351911860013, "grad_norm": 0.7969322204589844, "learning_rate": 3.284593620981851e-06, "loss": 0.1072, "step": 7711 }, { "epoch": 1.2495139338950096, "grad_norm": 0.8182503581047058, "learning_rate": 3.284178398914969e-06, "loss": 0.1014, "step": 7712 }, { "epoch": 1.2496759559300066, "grad_norm": 0.9507556557655334, "learning_rate": 3.283763152854127e-06, "loss": 0.1117, "step": 7713 }, { "epoch": 1.2498379779650033, "grad_norm": 0.7872381210327148, "learning_rate": 3.283347882812028e-06, "loss": 0.0951, "step": 7714 }, { "epoch": 1.25, "grad_norm": 0.7765859961509705, "learning_rate": 3.282932588801381e-06, "loss": 0.0978, "step": 7715 }, { "epoch": 1.2501620220349967, "grad_norm": 0.9354822635650635, "learning_rate": 3.282517270834891e-06, "loss": 0.1167, "step": 7716 }, { "epoch": 1.2503240440699934, "grad_norm": 0.8700661659240723, "learning_rate": 3.2821019289252654e-06, "loss": 0.1112, "step": 7717 }, { "epoch": 1.2504860661049904, "grad_norm": 0.8417790532112122, "learning_rate": 3.281686563085214e-06, "loss": 0.1065, "step": 7718 }, { "epoch": 1.250648088139987, "grad_norm": 0.8531923294067383, "learning_rate": 3.2812711733274453e-06, "loss": 0.1171, "step": 7719 }, { "epoch": 1.2508101101749838, "grad_norm": 0.8502379059791565, "learning_rate": 3.28085575966467e-06, "loss": 0.1064, "step": 7720 }, { "epoch": 1.2509721322099805, "grad_norm": 0.9494119882583618, "learning_rate": 3.280440322109597e-06, "loss": 0.1099, "step": 7721 }, { "epoch": 1.2511341542449772, "grad_norm": 0.7425954341888428, "learning_rate": 3.2800248606749395e-06, "loss": 0.0846, "step": 7722 }, { "epoch": 1.2512961762799741, "grad_norm": 0.7303003668785095, "learning_rate": 3.2796093753734087e-06, "loss": 0.098, "step": 7723 }, { "epoch": 1.2514581983149708, "grad_norm": 0.9115877151489258, "learning_rate": 3.2791938662177174e-06, "loss": 0.1093, "step": 7724 }, { "epoch": 1.2516202203499676, "grad_norm": 0.7848078608512878, "learning_rate": 3.2787783332205796e-06, "loss": 0.0934, "step": 7725 }, { "epoch": 1.2517822423849643, "grad_norm": 0.8890565037727356, "learning_rate": 3.278362776394709e-06, "loss": 0.1088, "step": 7726 }, { "epoch": 1.251944264419961, "grad_norm": 1.0119807720184326, "learning_rate": 3.277947195752822e-06, "loss": 0.1263, "step": 7727 }, { "epoch": 1.252106286454958, "grad_norm": 0.8432857394218445, "learning_rate": 3.277531591307632e-06, "loss": 0.1109, "step": 7728 }, { "epoch": 1.2522683084899546, "grad_norm": 0.8839412331581116, "learning_rate": 3.2771159630718584e-06, "loss": 0.1058, "step": 7729 }, { "epoch": 1.2524303305249513, "grad_norm": 0.8032359480857849, "learning_rate": 3.2767003110582164e-06, "loss": 0.0991, "step": 7730 }, { "epoch": 1.2525923525599483, "grad_norm": 0.7420846819877625, "learning_rate": 3.276284635279424e-06, "loss": 0.0962, "step": 7731 }, { "epoch": 1.252754374594945, "grad_norm": 0.7983898520469666, "learning_rate": 3.275868935748201e-06, "loss": 0.1003, "step": 7732 }, { "epoch": 1.2529163966299417, "grad_norm": 0.8384861946105957, "learning_rate": 3.2754532124772653e-06, "loss": 0.1081, "step": 7733 }, { "epoch": 1.2530784186649384, "grad_norm": 0.8961964845657349, "learning_rate": 3.2750374654793387e-06, "loss": 0.1239, "step": 7734 }, { "epoch": 1.2532404406999351, "grad_norm": 0.818870484828949, "learning_rate": 3.2746216947671405e-06, "loss": 0.1061, "step": 7735 }, { "epoch": 1.253402462734932, "grad_norm": 0.8016375303268433, "learning_rate": 3.2742059003533933e-06, "loss": 0.0954, "step": 7736 }, { "epoch": 1.2535644847699288, "grad_norm": 0.9985673427581787, "learning_rate": 3.2737900822508197e-06, "loss": 0.1111, "step": 7737 }, { "epoch": 1.2537265068049255, "grad_norm": 0.8439716100692749, "learning_rate": 3.2733742404721413e-06, "loss": 0.1127, "step": 7738 }, { "epoch": 1.2538885288399222, "grad_norm": 0.7654055953025818, "learning_rate": 3.272958375030083e-06, "loss": 0.1004, "step": 7739 }, { "epoch": 1.254050550874919, "grad_norm": 0.7196124792098999, "learning_rate": 3.272542485937369e-06, "loss": 0.0928, "step": 7740 }, { "epoch": 1.2542125729099158, "grad_norm": 0.9231361150741577, "learning_rate": 3.272126573206724e-06, "loss": 0.1016, "step": 7741 }, { "epoch": 1.2543745949449125, "grad_norm": 0.7543466687202454, "learning_rate": 3.2717106368508755e-06, "loss": 0.0929, "step": 7742 }, { "epoch": 1.2545366169799093, "grad_norm": 0.8236292600631714, "learning_rate": 3.271294676882548e-06, "loss": 0.1005, "step": 7743 }, { "epoch": 1.254698639014906, "grad_norm": 0.8038961887359619, "learning_rate": 3.27087869331447e-06, "loss": 0.101, "step": 7744 }, { "epoch": 1.2548606610499027, "grad_norm": 0.8604749441146851, "learning_rate": 3.270462686159369e-06, "loss": 0.1037, "step": 7745 }, { "epoch": 1.2550226830848996, "grad_norm": 0.744813859462738, "learning_rate": 3.2700466554299755e-06, "loss": 0.0934, "step": 7746 }, { "epoch": 1.2551847051198963, "grad_norm": 0.8217744827270508, "learning_rate": 3.2696306011390167e-06, "loss": 0.106, "step": 7747 }, { "epoch": 1.255346727154893, "grad_norm": 0.755609929561615, "learning_rate": 3.2692145232992244e-06, "loss": 0.0933, "step": 7748 }, { "epoch": 1.2555087491898898, "grad_norm": 0.7776917219161987, "learning_rate": 3.2687984219233295e-06, "loss": 0.1035, "step": 7749 }, { "epoch": 1.2556707712248865, "grad_norm": 0.8656284809112549, "learning_rate": 3.268382297024063e-06, "loss": 0.111, "step": 7750 }, { "epoch": 1.2558327932598834, "grad_norm": 0.8041141629219055, "learning_rate": 3.2679661486141577e-06, "loss": 0.1009, "step": 7751 }, { "epoch": 1.25599481529488, "grad_norm": 0.7365778088569641, "learning_rate": 3.2675499767063464e-06, "loss": 0.0935, "step": 7752 }, { "epoch": 1.2561568373298768, "grad_norm": 0.8223831057548523, "learning_rate": 3.267133781313364e-06, "loss": 0.1034, "step": 7753 }, { "epoch": 1.2563188593648738, "grad_norm": 0.8573729395866394, "learning_rate": 3.266717562447944e-06, "loss": 0.1009, "step": 7754 }, { "epoch": 1.2564808813998705, "grad_norm": 0.9449803829193115, "learning_rate": 3.2663013201228216e-06, "loss": 0.1201, "step": 7755 }, { "epoch": 1.2566429034348672, "grad_norm": 0.7694631814956665, "learning_rate": 3.2658850543507336e-06, "loss": 0.0973, "step": 7756 }, { "epoch": 1.2568049254698639, "grad_norm": 1.0734034776687622, "learning_rate": 3.265468765144416e-06, "loss": 0.1069, "step": 7757 }, { "epoch": 1.2569669475048606, "grad_norm": 0.9590340852737427, "learning_rate": 3.2650524525166064e-06, "loss": 0.1212, "step": 7758 }, { "epoch": 1.2571289695398575, "grad_norm": 0.7816064357757568, "learning_rate": 3.264636116480044e-06, "loss": 0.1041, "step": 7759 }, { "epoch": 1.2572909915748542, "grad_norm": 0.745184600353241, "learning_rate": 3.2642197570474665e-06, "loss": 0.0875, "step": 7760 }, { "epoch": 1.257453013609851, "grad_norm": 0.8741956949234009, "learning_rate": 3.2638033742316137e-06, "loss": 0.1039, "step": 7761 }, { "epoch": 1.2576150356448477, "grad_norm": 0.9357261061668396, "learning_rate": 3.263386968045226e-06, "loss": 0.1157, "step": 7762 }, { "epoch": 1.2577770576798444, "grad_norm": 0.773931086063385, "learning_rate": 3.2629705385010445e-06, "loss": 0.0962, "step": 7763 }, { "epoch": 1.2579390797148413, "grad_norm": 0.8367682695388794, "learning_rate": 3.262554085611811e-06, "loss": 0.1009, "step": 7764 }, { "epoch": 1.258101101749838, "grad_norm": 0.8734476566314697, "learning_rate": 3.2621376093902675e-06, "loss": 0.1155, "step": 7765 }, { "epoch": 1.2582631237848347, "grad_norm": 0.9115141034126282, "learning_rate": 3.261721109849158e-06, "loss": 0.1112, "step": 7766 }, { "epoch": 1.2584251458198314, "grad_norm": 0.7026832103729248, "learning_rate": 3.261304587001225e-06, "loss": 0.0882, "step": 7767 }, { "epoch": 1.2585871678548282, "grad_norm": 0.7548426389694214, "learning_rate": 3.2608880408592148e-06, "loss": 0.097, "step": 7768 }, { "epoch": 1.258749189889825, "grad_norm": 0.736499011516571, "learning_rate": 3.2604714714358716e-06, "loss": 0.0918, "step": 7769 }, { "epoch": 1.2589112119248218, "grad_norm": 0.7644399404525757, "learning_rate": 3.2600548787439413e-06, "loss": 0.094, "step": 7770 }, { "epoch": 1.2590732339598185, "grad_norm": 0.8792965412139893, "learning_rate": 3.2596382627961714e-06, "loss": 0.1082, "step": 7771 }, { "epoch": 1.2592352559948152, "grad_norm": 0.8653140664100647, "learning_rate": 3.2592216236053086e-06, "loss": 0.1051, "step": 7772 }, { "epoch": 1.259397278029812, "grad_norm": 0.898324728012085, "learning_rate": 3.2588049611841023e-06, "loss": 0.107, "step": 7773 }, { "epoch": 1.2595593000648089, "grad_norm": 0.8365474343299866, "learning_rate": 3.2583882755452994e-06, "loss": 0.102, "step": 7774 }, { "epoch": 1.2597213220998056, "grad_norm": 0.8603896498680115, "learning_rate": 3.2579715667016516e-06, "loss": 0.1069, "step": 7775 }, { "epoch": 1.2598833441348023, "grad_norm": 0.8619314432144165, "learning_rate": 3.257554834665907e-06, "loss": 0.1045, "step": 7776 }, { "epoch": 1.2600453661697992, "grad_norm": 0.7768551111221313, "learning_rate": 3.2571380794508183e-06, "loss": 0.0992, "step": 7777 }, { "epoch": 1.2602073882047957, "grad_norm": 0.8619160056114197, "learning_rate": 3.2567213010691367e-06, "loss": 0.1118, "step": 7778 }, { "epoch": 1.2603694102397927, "grad_norm": 0.9219297170639038, "learning_rate": 3.256304499533614e-06, "loss": 0.1117, "step": 7779 }, { "epoch": 1.2605314322747894, "grad_norm": 0.8448036313056946, "learning_rate": 3.255887674857004e-06, "loss": 0.104, "step": 7780 }, { "epoch": 1.260693454309786, "grad_norm": 0.80354905128479, "learning_rate": 3.255470827052061e-06, "loss": 0.1035, "step": 7781 }, { "epoch": 1.260855476344783, "grad_norm": 0.7997376918792725, "learning_rate": 3.2550539561315385e-06, "loss": 0.1034, "step": 7782 }, { "epoch": 1.2610174983797797, "grad_norm": 0.7824645042419434, "learning_rate": 3.2546370621081912e-06, "loss": 0.0989, "step": 7783 }, { "epoch": 1.2611795204147764, "grad_norm": 0.757111668586731, "learning_rate": 3.2542201449947774e-06, "loss": 0.1016, "step": 7784 }, { "epoch": 1.2613415424497731, "grad_norm": 0.8208655714988708, "learning_rate": 3.253803204804052e-06, "loss": 0.1116, "step": 7785 }, { "epoch": 1.2615035644847699, "grad_norm": 0.817868709564209, "learning_rate": 3.2533862415487723e-06, "loss": 0.1056, "step": 7786 }, { "epoch": 1.2616655865197668, "grad_norm": 0.7902513146400452, "learning_rate": 3.252969255241697e-06, "loss": 0.1007, "step": 7787 }, { "epoch": 1.2618276085547635, "grad_norm": 0.8338719010353088, "learning_rate": 3.2525522458955843e-06, "loss": 0.1024, "step": 7788 }, { "epoch": 1.2619896305897602, "grad_norm": 0.8464298844337463, "learning_rate": 3.2521352135231944e-06, "loss": 0.1108, "step": 7789 }, { "epoch": 1.262151652624757, "grad_norm": 0.809252142906189, "learning_rate": 3.251718158137287e-06, "loss": 0.1044, "step": 7790 }, { "epoch": 1.2623136746597536, "grad_norm": 0.6791431903839111, "learning_rate": 3.2513010797506236e-06, "loss": 0.0965, "step": 7791 }, { "epoch": 1.2624756966947506, "grad_norm": 0.7555492520332336, "learning_rate": 3.2508839783759642e-06, "loss": 0.1014, "step": 7792 }, { "epoch": 1.2626377187297473, "grad_norm": 0.6891449093818665, "learning_rate": 3.2504668540260732e-06, "loss": 0.088, "step": 7793 }, { "epoch": 1.262799740764744, "grad_norm": 0.770533561706543, "learning_rate": 3.2500497067137116e-06, "loss": 0.1035, "step": 7794 }, { "epoch": 1.2629617627997407, "grad_norm": 0.7673627138137817, "learning_rate": 3.2496325364516444e-06, "loss": 0.0951, "step": 7795 }, { "epoch": 1.2631237848347374, "grad_norm": 0.8320962190628052, "learning_rate": 3.2492153432526356e-06, "loss": 0.1048, "step": 7796 }, { "epoch": 1.2632858068697344, "grad_norm": 0.8069162368774414, "learning_rate": 3.248798127129451e-06, "loss": 0.1025, "step": 7797 }, { "epoch": 1.263447828904731, "grad_norm": 0.9537956118583679, "learning_rate": 3.2483808880948552e-06, "loss": 0.1023, "step": 7798 }, { "epoch": 1.2636098509397278, "grad_norm": 0.9524184465408325, "learning_rate": 3.2479636261616156e-06, "loss": 0.1296, "step": 7799 }, { "epoch": 1.2637718729747245, "grad_norm": 0.8504661321640015, "learning_rate": 3.2475463413424983e-06, "loss": 0.1071, "step": 7800 }, { "epoch": 1.2639338950097212, "grad_norm": 0.8363580703735352, "learning_rate": 3.247129033650273e-06, "loss": 0.0971, "step": 7801 }, { "epoch": 1.2640959170447181, "grad_norm": 0.7588921785354614, "learning_rate": 3.246711703097707e-06, "loss": 0.0923, "step": 7802 }, { "epoch": 1.2642579390797148, "grad_norm": 0.7907635569572449, "learning_rate": 3.2462943496975696e-06, "loss": 0.1052, "step": 7803 }, { "epoch": 1.2644199611147116, "grad_norm": 0.8892641067504883, "learning_rate": 3.2458769734626315e-06, "loss": 0.1188, "step": 7804 }, { "epoch": 1.2645819831497085, "grad_norm": 0.7947612404823303, "learning_rate": 3.245459574405662e-06, "loss": 0.1109, "step": 7805 }, { "epoch": 1.2647440051847052, "grad_norm": 0.8161318898200989, "learning_rate": 3.245042152539435e-06, "loss": 0.1071, "step": 7806 }, { "epoch": 1.264906027219702, "grad_norm": 0.8670127987861633, "learning_rate": 3.2446247078767195e-06, "loss": 0.1086, "step": 7807 }, { "epoch": 1.2650680492546986, "grad_norm": 0.8083024621009827, "learning_rate": 3.2442072404302917e-06, "loss": 0.1044, "step": 7808 }, { "epoch": 1.2652300712896953, "grad_norm": 0.7659672498703003, "learning_rate": 3.243789750212922e-06, "loss": 0.097, "step": 7809 }, { "epoch": 1.2653920933246923, "grad_norm": 0.7633987069129944, "learning_rate": 3.243372237237386e-06, "loss": 0.0838, "step": 7810 }, { "epoch": 1.265554115359689, "grad_norm": 0.7828187942504883, "learning_rate": 3.2429547015164585e-06, "loss": 0.0969, "step": 7811 }, { "epoch": 1.2657161373946857, "grad_norm": 0.9652653932571411, "learning_rate": 3.2425371430629155e-06, "loss": 0.1092, "step": 7812 }, { "epoch": 1.2658781594296824, "grad_norm": 0.807366669178009, "learning_rate": 3.242119561889533e-06, "loss": 0.0976, "step": 7813 }, { "epoch": 1.2660401814646791, "grad_norm": 0.8526018857955933, "learning_rate": 3.241701958009087e-06, "loss": 0.1037, "step": 7814 }, { "epoch": 1.266202203499676, "grad_norm": 0.9918192625045776, "learning_rate": 3.2412843314343566e-06, "loss": 0.1089, "step": 7815 }, { "epoch": 1.2663642255346728, "grad_norm": 0.7938478589057922, "learning_rate": 3.2408666821781186e-06, "loss": 0.1041, "step": 7816 }, { "epoch": 1.2665262475696695, "grad_norm": 0.8537562489509583, "learning_rate": 3.2404490102531536e-06, "loss": 0.1142, "step": 7817 }, { "epoch": 1.2666882696046662, "grad_norm": 0.858700692653656, "learning_rate": 3.2400313156722414e-06, "loss": 0.1092, "step": 7818 }, { "epoch": 1.266850291639663, "grad_norm": 0.8112673163414001, "learning_rate": 3.2396135984481607e-06, "loss": 0.097, "step": 7819 }, { "epoch": 1.2670123136746598, "grad_norm": 0.7495766282081604, "learning_rate": 3.2391958585936946e-06, "loss": 0.1006, "step": 7820 }, { "epoch": 1.2671743357096565, "grad_norm": 0.8106545805931091, "learning_rate": 3.2387780961216237e-06, "loss": 0.1012, "step": 7821 }, { "epoch": 1.2673363577446533, "grad_norm": 0.7898968458175659, "learning_rate": 3.2383603110447304e-06, "loss": 0.0889, "step": 7822 }, { "epoch": 1.26749837977965, "grad_norm": 0.8760552406311035, "learning_rate": 3.237942503375799e-06, "loss": 0.1115, "step": 7823 }, { "epoch": 1.2676604018146467, "grad_norm": 0.7963511943817139, "learning_rate": 3.2375246731276122e-06, "loss": 0.1045, "step": 7824 }, { "epoch": 1.2678224238496436, "grad_norm": 0.8547530770301819, "learning_rate": 3.237106820312956e-06, "loss": 0.1028, "step": 7825 }, { "epoch": 1.2679844458846403, "grad_norm": 0.749346137046814, "learning_rate": 3.236688944944614e-06, "loss": 0.0918, "step": 7826 }, { "epoch": 1.268146467919637, "grad_norm": 0.6958228945732117, "learning_rate": 3.2362710470353737e-06, "loss": 0.087, "step": 7827 }, { "epoch": 1.268308489954634, "grad_norm": 0.9344890713691711, "learning_rate": 3.2358531265980207e-06, "loss": 0.1125, "step": 7828 }, { "epoch": 1.2684705119896305, "grad_norm": 0.8283147215843201, "learning_rate": 3.2354351836453423e-06, "loss": 0.097, "step": 7829 }, { "epoch": 1.2686325340246274, "grad_norm": 0.9189488291740417, "learning_rate": 3.2350172181901283e-06, "loss": 0.1061, "step": 7830 }, { "epoch": 1.268794556059624, "grad_norm": 0.8217201232910156, "learning_rate": 3.234599230245165e-06, "loss": 0.1026, "step": 7831 }, { "epoch": 1.2689565780946208, "grad_norm": 0.9388702511787415, "learning_rate": 3.2341812198232437e-06, "loss": 0.1086, "step": 7832 }, { "epoch": 1.2691186001296177, "grad_norm": 0.7720819115638733, "learning_rate": 3.2337631869371534e-06, "loss": 0.1009, "step": 7833 }, { "epoch": 1.2692806221646145, "grad_norm": 0.9018075466156006, "learning_rate": 3.2333451315996857e-06, "loss": 0.1147, "step": 7834 }, { "epoch": 1.2694426441996112, "grad_norm": 0.7737125754356384, "learning_rate": 3.2329270538236313e-06, "loss": 0.098, "step": 7835 }, { "epoch": 1.2696046662346079, "grad_norm": 0.9752976298332214, "learning_rate": 3.232508953621782e-06, "loss": 0.1188, "step": 7836 }, { "epoch": 1.2697666882696046, "grad_norm": 0.8050476908683777, "learning_rate": 3.232090831006932e-06, "loss": 0.1102, "step": 7837 }, { "epoch": 1.2699287103046015, "grad_norm": 0.8317782878875732, "learning_rate": 3.231672685991874e-06, "loss": 0.0991, "step": 7838 }, { "epoch": 1.2700907323395982, "grad_norm": 0.8194529414176941, "learning_rate": 3.231254518589403e-06, "loss": 0.1068, "step": 7839 }, { "epoch": 1.270252754374595, "grad_norm": 0.8640487194061279, "learning_rate": 3.2308363288123128e-06, "loss": 0.1159, "step": 7840 }, { "epoch": 1.2704147764095917, "grad_norm": 0.7963087558746338, "learning_rate": 3.2304181166733993e-06, "loss": 0.1065, "step": 7841 }, { "epoch": 1.2705767984445884, "grad_norm": 1.0018774271011353, "learning_rate": 3.2299998821854593e-06, "loss": 0.1209, "step": 7842 }, { "epoch": 1.2707388204795853, "grad_norm": 0.7300251722335815, "learning_rate": 3.2295816253612897e-06, "loss": 0.0895, "step": 7843 }, { "epoch": 1.270900842514582, "grad_norm": 0.7629234790802002, "learning_rate": 3.229163346213688e-06, "loss": 0.1015, "step": 7844 }, { "epoch": 1.2710628645495787, "grad_norm": 0.7023546099662781, "learning_rate": 3.2287450447554526e-06, "loss": 0.0956, "step": 7845 }, { "epoch": 1.2712248865845754, "grad_norm": 0.8268736600875854, "learning_rate": 3.228326720999382e-06, "loss": 0.1019, "step": 7846 }, { "epoch": 1.2713869086195722, "grad_norm": 0.7980479001998901, "learning_rate": 3.227908374958276e-06, "loss": 0.1008, "step": 7847 }, { "epoch": 1.271548930654569, "grad_norm": 0.7699779868125916, "learning_rate": 3.2274900066449355e-06, "loss": 0.096, "step": 7848 }, { "epoch": 1.2717109526895658, "grad_norm": 0.9146498441696167, "learning_rate": 3.2270716160721612e-06, "loss": 0.1143, "step": 7849 }, { "epoch": 1.2718729747245625, "grad_norm": 0.8463569283485413, "learning_rate": 3.2266532032527548e-06, "loss": 0.105, "step": 7850 }, { "epoch": 1.2720349967595592, "grad_norm": 0.8177133202552795, "learning_rate": 3.2262347681995187e-06, "loss": 0.1015, "step": 7851 }, { "epoch": 1.272197018794556, "grad_norm": 0.8679166436195374, "learning_rate": 3.225816310925257e-06, "loss": 0.0998, "step": 7852 }, { "epoch": 1.2723590408295529, "grad_norm": 0.7825645804405212, "learning_rate": 3.2253978314427716e-06, "loss": 0.1001, "step": 7853 }, { "epoch": 1.2725210628645496, "grad_norm": 0.8941231966018677, "learning_rate": 3.224979329764869e-06, "loss": 0.0982, "step": 7854 }, { "epoch": 1.2726830848995463, "grad_norm": 0.8882502913475037, "learning_rate": 3.2245608059043525e-06, "loss": 0.1052, "step": 7855 }, { "epoch": 1.2728451069345432, "grad_norm": 0.7035767436027527, "learning_rate": 3.224142259874029e-06, "loss": 0.0808, "step": 7856 }, { "epoch": 1.27300712896954, "grad_norm": 0.837063193321228, "learning_rate": 3.2237236916867047e-06, "loss": 0.1036, "step": 7857 }, { "epoch": 1.2731691510045366, "grad_norm": 0.8827104568481445, "learning_rate": 3.223305101355187e-06, "loss": 0.1098, "step": 7858 }, { "epoch": 1.2733311730395334, "grad_norm": 0.8305638432502747, "learning_rate": 3.2228864888922838e-06, "loss": 0.1051, "step": 7859 }, { "epoch": 1.27349319507453, "grad_norm": 0.9665209054946899, "learning_rate": 3.2224678543108024e-06, "loss": 0.1178, "step": 7860 }, { "epoch": 1.273655217109527, "grad_norm": 0.8135932683944702, "learning_rate": 3.222049197623554e-06, "loss": 0.0927, "step": 7861 }, { "epoch": 1.2738172391445237, "grad_norm": 0.8016318082809448, "learning_rate": 3.221630518843347e-06, "loss": 0.1005, "step": 7862 }, { "epoch": 1.2739792611795204, "grad_norm": 0.8300334811210632, "learning_rate": 3.2212118179829925e-06, "loss": 0.1057, "step": 7863 }, { "epoch": 1.2741412832145171, "grad_norm": 0.7906784415245056, "learning_rate": 3.2207930950553017e-06, "loss": 0.1001, "step": 7864 }, { "epoch": 1.2743033052495139, "grad_norm": 0.8609299063682556, "learning_rate": 3.2203743500730867e-06, "loss": 0.1019, "step": 7865 }, { "epoch": 1.2744653272845108, "grad_norm": 0.783918023109436, "learning_rate": 3.2199555830491597e-06, "loss": 0.0979, "step": 7866 }, { "epoch": 1.2746273493195075, "grad_norm": 0.9125205278396606, "learning_rate": 3.219536793996334e-06, "loss": 0.1175, "step": 7867 }, { "epoch": 1.2747893713545042, "grad_norm": 0.8157711029052734, "learning_rate": 3.2191179829274244e-06, "loss": 0.0942, "step": 7868 }, { "epoch": 1.274951393389501, "grad_norm": 0.9252278208732605, "learning_rate": 3.218699149855244e-06, "loss": 0.1111, "step": 7869 }, { "epoch": 1.2751134154244976, "grad_norm": 0.8688713908195496, "learning_rate": 3.2182802947926086e-06, "loss": 0.1111, "step": 7870 }, { "epoch": 1.2752754374594946, "grad_norm": 0.841914176940918, "learning_rate": 3.217861417752335e-06, "loss": 0.1005, "step": 7871 }, { "epoch": 1.2754374594944913, "grad_norm": 0.8829323053359985, "learning_rate": 3.2174425187472387e-06, "loss": 0.1121, "step": 7872 }, { "epoch": 1.275599481529488, "grad_norm": 0.8816058039665222, "learning_rate": 3.2170235977901375e-06, "loss": 0.1215, "step": 7873 }, { "epoch": 1.2757615035644847, "grad_norm": 0.7793697118759155, "learning_rate": 3.2166046548938497e-06, "loss": 0.1007, "step": 7874 }, { "epoch": 1.2759235255994814, "grad_norm": 0.7956479787826538, "learning_rate": 3.216185690071193e-06, "loss": 0.1034, "step": 7875 }, { "epoch": 1.2760855476344783, "grad_norm": 0.80839604139328, "learning_rate": 3.215766703334988e-06, "loss": 0.1001, "step": 7876 }, { "epoch": 1.276247569669475, "grad_norm": 0.8526255488395691, "learning_rate": 3.215347694698054e-06, "loss": 0.1031, "step": 7877 }, { "epoch": 1.2764095917044718, "grad_norm": 0.8941551446914673, "learning_rate": 3.214928664173211e-06, "loss": 0.1132, "step": 7878 }, { "epoch": 1.2765716137394687, "grad_norm": 0.7045158743858337, "learning_rate": 3.2145096117732823e-06, "loss": 0.0901, "step": 7879 }, { "epoch": 1.2767336357744652, "grad_norm": 0.9706689715385437, "learning_rate": 3.2140905375110875e-06, "loss": 0.1244, "step": 7880 }, { "epoch": 1.2768956578094621, "grad_norm": 0.9292829036712646, "learning_rate": 3.21367144139945e-06, "loss": 0.1131, "step": 7881 }, { "epoch": 1.2770576798444588, "grad_norm": 0.8300909996032715, "learning_rate": 3.2132523234511943e-06, "loss": 0.1164, "step": 7882 }, { "epoch": 1.2772197018794555, "grad_norm": 0.7891339659690857, "learning_rate": 3.2128331836791436e-06, "loss": 0.1007, "step": 7883 }, { "epoch": 1.2773817239144525, "grad_norm": 0.6846839189529419, "learning_rate": 3.2124140220961215e-06, "loss": 0.0874, "step": 7884 }, { "epoch": 1.2775437459494492, "grad_norm": 0.8767536282539368, "learning_rate": 3.211994838714955e-06, "loss": 0.1121, "step": 7885 }, { "epoch": 1.277705767984446, "grad_norm": 0.891893744468689, "learning_rate": 3.2115756335484694e-06, "loss": 0.113, "step": 7886 }, { "epoch": 1.2778677900194426, "grad_norm": 0.8657979965209961, "learning_rate": 3.2111564066094913e-06, "loss": 0.1176, "step": 7887 }, { "epoch": 1.2780298120544393, "grad_norm": 0.8780067563056946, "learning_rate": 3.210737157910848e-06, "loss": 0.111, "step": 7888 }, { "epoch": 1.2781918340894363, "grad_norm": 0.9224578738212585, "learning_rate": 3.2103178874653677e-06, "loss": 0.1151, "step": 7889 }, { "epoch": 1.278353856124433, "grad_norm": 0.7573640942573547, "learning_rate": 3.2098985952858796e-06, "loss": 0.1087, "step": 7890 }, { "epoch": 1.2785158781594297, "grad_norm": 0.8327637910842896, "learning_rate": 3.2094792813852116e-06, "loss": 0.1107, "step": 7891 }, { "epoch": 1.2786779001944264, "grad_norm": 0.9042618870735168, "learning_rate": 3.209059945776195e-06, "loss": 0.1194, "step": 7892 }, { "epoch": 1.278839922229423, "grad_norm": 0.9340540766716003, "learning_rate": 3.2086405884716592e-06, "loss": 0.1316, "step": 7893 }, { "epoch": 1.27900194426442, "grad_norm": 0.7579165697097778, "learning_rate": 3.2082212094844374e-06, "loss": 0.099, "step": 7894 }, { "epoch": 1.2791639662994168, "grad_norm": 0.7476134896278381, "learning_rate": 3.20780180882736e-06, "loss": 0.1061, "step": 7895 }, { "epoch": 1.2793259883344135, "grad_norm": 0.8922802209854126, "learning_rate": 3.20738238651326e-06, "loss": 0.1033, "step": 7896 }, { "epoch": 1.2794880103694102, "grad_norm": 0.8457995057106018, "learning_rate": 3.2069629425549705e-06, "loss": 0.1123, "step": 7897 }, { "epoch": 1.279650032404407, "grad_norm": 0.7586995959281921, "learning_rate": 3.206543476965326e-06, "loss": 0.0922, "step": 7898 }, { "epoch": 1.2798120544394038, "grad_norm": 0.9203418493270874, "learning_rate": 3.2061239897571613e-06, "loss": 0.1247, "step": 7899 }, { "epoch": 1.2799740764744005, "grad_norm": 0.8178397417068481, "learning_rate": 3.2057044809433108e-06, "loss": 0.1039, "step": 7900 }, { "epoch": 1.2801360985093972, "grad_norm": 0.794354259967804, "learning_rate": 3.2052849505366113e-06, "loss": 0.1085, "step": 7901 }, { "epoch": 1.280298120544394, "grad_norm": 0.7944015264511108, "learning_rate": 3.2048653985498985e-06, "loss": 0.1099, "step": 7902 }, { "epoch": 1.2804601425793907, "grad_norm": 0.8562796115875244, "learning_rate": 3.2044458249960108e-06, "loss": 0.1018, "step": 7903 }, { "epoch": 1.2806221646143876, "grad_norm": 0.9842815399169922, "learning_rate": 3.204026229887785e-06, "loss": 0.12, "step": 7904 }, { "epoch": 1.2807841866493843, "grad_norm": 0.7473559379577637, "learning_rate": 3.2036066132380606e-06, "loss": 0.0911, "step": 7905 }, { "epoch": 1.280946208684381, "grad_norm": 0.7652568221092224, "learning_rate": 3.203186975059677e-06, "loss": 0.1059, "step": 7906 }, { "epoch": 1.281108230719378, "grad_norm": 0.8993542790412903, "learning_rate": 3.2027673153654733e-06, "loss": 0.112, "step": 7907 }, { "epoch": 1.2812702527543747, "grad_norm": 0.8552115559577942, "learning_rate": 3.2023476341682902e-06, "loss": 0.1073, "step": 7908 }, { "epoch": 1.2814322747893714, "grad_norm": 0.7751871347427368, "learning_rate": 3.2019279314809694e-06, "loss": 0.0883, "step": 7909 }, { "epoch": 1.281594296824368, "grad_norm": 0.891760528087616, "learning_rate": 3.2015082073163524e-06, "loss": 0.114, "step": 7910 }, { "epoch": 1.2817563188593648, "grad_norm": 0.8688755035400391, "learning_rate": 3.201088461687282e-06, "loss": 0.1083, "step": 7911 }, { "epoch": 1.2819183408943617, "grad_norm": 0.8324447274208069, "learning_rate": 3.2006686946066012e-06, "loss": 0.107, "step": 7912 }, { "epoch": 1.2820803629293585, "grad_norm": 0.779424250125885, "learning_rate": 3.2002489060871534e-06, "loss": 0.0916, "step": 7913 }, { "epoch": 1.2822423849643552, "grad_norm": 0.7420961260795593, "learning_rate": 3.1998290961417844e-06, "loss": 0.0904, "step": 7914 }, { "epoch": 1.2824044069993519, "grad_norm": 0.8292026519775391, "learning_rate": 3.199409264783338e-06, "loss": 0.0986, "step": 7915 }, { "epoch": 1.2825664290343486, "grad_norm": 0.8516577482223511, "learning_rate": 3.1989894120246613e-06, "loss": 0.103, "step": 7916 }, { "epoch": 1.2827284510693455, "grad_norm": 0.7372321486473083, "learning_rate": 3.1985695378786e-06, "loss": 0.0895, "step": 7917 }, { "epoch": 1.2828904731043422, "grad_norm": 0.8905530571937561, "learning_rate": 3.1981496423580012e-06, "loss": 0.1178, "step": 7918 }, { "epoch": 1.283052495139339, "grad_norm": 0.819083571434021, "learning_rate": 3.1977297254757124e-06, "loss": 0.1058, "step": 7919 }, { "epoch": 1.2832145171743357, "grad_norm": 0.8953108787536621, "learning_rate": 3.1973097872445828e-06, "loss": 0.1166, "step": 7920 }, { "epoch": 1.2833765392093324, "grad_norm": 0.7964316606521606, "learning_rate": 3.196889827677462e-06, "loss": 0.0898, "step": 7921 }, { "epoch": 1.2835385612443293, "grad_norm": 0.8181843161582947, "learning_rate": 3.1964698467871976e-06, "loss": 0.0913, "step": 7922 }, { "epoch": 1.283700583279326, "grad_norm": 0.8924493789672852, "learning_rate": 3.1960498445866423e-06, "loss": 0.1187, "step": 7923 }, { "epoch": 1.2838626053143227, "grad_norm": 0.9556626677513123, "learning_rate": 3.1956298210886454e-06, "loss": 0.1147, "step": 7924 }, { "epoch": 1.2840246273493194, "grad_norm": 0.8342770934104919, "learning_rate": 3.1952097763060595e-06, "loss": 0.1027, "step": 7925 }, { "epoch": 1.2841866493843161, "grad_norm": 0.7476953864097595, "learning_rate": 3.1947897102517374e-06, "loss": 0.0885, "step": 7926 }, { "epoch": 1.284348671419313, "grad_norm": 0.8038281798362732, "learning_rate": 3.1943696229385307e-06, "loss": 0.1014, "step": 7927 }, { "epoch": 1.2845106934543098, "grad_norm": 0.9089291095733643, "learning_rate": 3.1939495143792944e-06, "loss": 0.1119, "step": 7928 }, { "epoch": 1.2846727154893065, "grad_norm": 0.9920743107795715, "learning_rate": 3.193529384586882e-06, "loss": 0.1191, "step": 7929 }, { "epoch": 1.2848347375243034, "grad_norm": 0.7742721438407898, "learning_rate": 3.1931092335741497e-06, "loss": 0.0964, "step": 7930 }, { "epoch": 1.2849967595593, "grad_norm": 0.8795700073242188, "learning_rate": 3.1926890613539513e-06, "loss": 0.1061, "step": 7931 }, { "epoch": 1.2851587815942969, "grad_norm": 0.8437364101409912, "learning_rate": 3.192268867939144e-06, "loss": 0.1071, "step": 7932 }, { "epoch": 1.2853208036292936, "grad_norm": 0.8689562678337097, "learning_rate": 3.191848653342584e-06, "loss": 0.1101, "step": 7933 }, { "epoch": 1.2854828256642903, "grad_norm": 0.8216108679771423, "learning_rate": 3.1914284175771303e-06, "loss": 0.1049, "step": 7934 }, { "epoch": 1.2856448476992872, "grad_norm": 0.8345628380775452, "learning_rate": 3.19100816065564e-06, "loss": 0.0987, "step": 7935 }, { "epoch": 1.285806869734284, "grad_norm": 0.830443799495697, "learning_rate": 3.1905878825909726e-06, "loss": 0.107, "step": 7936 }, { "epoch": 1.2859688917692806, "grad_norm": 0.7912009358406067, "learning_rate": 3.190167583395986e-06, "loss": 0.1001, "step": 7937 }, { "epoch": 1.2861309138042774, "grad_norm": 0.8424001932144165, "learning_rate": 3.189747263083543e-06, "loss": 0.1061, "step": 7938 }, { "epoch": 1.286292935839274, "grad_norm": 0.8694502115249634, "learning_rate": 3.1893269216665017e-06, "loss": 0.1152, "step": 7939 }, { "epoch": 1.286454957874271, "grad_norm": 0.8436936736106873, "learning_rate": 3.188906559157725e-06, "loss": 0.1028, "step": 7940 }, { "epoch": 1.2866169799092677, "grad_norm": 0.7603984475135803, "learning_rate": 3.188486175570075e-06, "loss": 0.0907, "step": 7941 }, { "epoch": 1.2867790019442644, "grad_norm": 0.8339808583259583, "learning_rate": 3.1880657709164144e-06, "loss": 0.107, "step": 7942 }, { "epoch": 1.2869410239792611, "grad_norm": 0.8520210385322571, "learning_rate": 3.187645345209606e-06, "loss": 0.1049, "step": 7943 }, { "epoch": 1.2871030460142578, "grad_norm": 0.8916787505149841, "learning_rate": 3.1872248984625135e-06, "loss": 0.108, "step": 7944 }, { "epoch": 1.2872650680492548, "grad_norm": 0.9074912667274475, "learning_rate": 3.1868044306880037e-06, "loss": 0.1019, "step": 7945 }, { "epoch": 1.2874270900842515, "grad_norm": 0.9418996572494507, "learning_rate": 3.1863839418989385e-06, "loss": 0.1114, "step": 7946 }, { "epoch": 1.2875891121192482, "grad_norm": 0.7757999897003174, "learning_rate": 3.185963432108187e-06, "loss": 0.0916, "step": 7947 }, { "epoch": 1.287751134154245, "grad_norm": 0.8359844088554382, "learning_rate": 3.185542901328613e-06, "loss": 0.1024, "step": 7948 }, { "epoch": 1.2879131561892416, "grad_norm": 1.3665879964828491, "learning_rate": 3.185122349573087e-06, "loss": 0.1411, "step": 7949 }, { "epoch": 1.2880751782242386, "grad_norm": 0.7807172536849976, "learning_rate": 3.184701776854474e-06, "loss": 0.104, "step": 7950 }, { "epoch": 1.2882372002592353, "grad_norm": 0.7108926177024841, "learning_rate": 3.1842811831856444e-06, "loss": 0.0831, "step": 7951 }, { "epoch": 1.288399222294232, "grad_norm": 0.7794750332832336, "learning_rate": 3.1838605685794665e-06, "loss": 0.1034, "step": 7952 }, { "epoch": 1.2885612443292287, "grad_norm": 0.8731608986854553, "learning_rate": 3.183439933048809e-06, "loss": 0.0996, "step": 7953 }, { "epoch": 1.2887232663642254, "grad_norm": 0.8640766143798828, "learning_rate": 3.1830192766065445e-06, "loss": 0.1089, "step": 7954 }, { "epoch": 1.2888852883992223, "grad_norm": 0.8059561848640442, "learning_rate": 3.1825985992655422e-06, "loss": 0.0989, "step": 7955 }, { "epoch": 1.289047310434219, "grad_norm": 0.889413595199585, "learning_rate": 3.1821779010386755e-06, "loss": 0.1129, "step": 7956 }, { "epoch": 1.2892093324692158, "grad_norm": 0.7468616366386414, "learning_rate": 3.181757181938815e-06, "loss": 0.0968, "step": 7957 }, { "epoch": 1.2893713545042127, "grad_norm": 0.7996143102645874, "learning_rate": 3.181336441978835e-06, "loss": 0.0968, "step": 7958 }, { "epoch": 1.2895333765392094, "grad_norm": 0.8068464398384094, "learning_rate": 3.1809156811716084e-06, "loss": 0.0977, "step": 7959 }, { "epoch": 1.2896953985742061, "grad_norm": 0.7814459800720215, "learning_rate": 3.18049489953001e-06, "loss": 0.1007, "step": 7960 }, { "epoch": 1.2898574206092028, "grad_norm": 0.7772566080093384, "learning_rate": 3.180074097066914e-06, "loss": 0.0983, "step": 7961 }, { "epoch": 1.2900194426441995, "grad_norm": 0.7291167974472046, "learning_rate": 3.1796532737951975e-06, "loss": 0.0959, "step": 7962 }, { "epoch": 1.2901814646791965, "grad_norm": 0.7671595215797424, "learning_rate": 3.1792324297277345e-06, "loss": 0.0991, "step": 7963 }, { "epoch": 1.2903434867141932, "grad_norm": 0.7343456745147705, "learning_rate": 3.1788115648774033e-06, "loss": 0.0958, "step": 7964 }, { "epoch": 1.29050550874919, "grad_norm": 0.8339682817459106, "learning_rate": 3.1783906792570805e-06, "loss": 0.1065, "step": 7965 }, { "epoch": 1.2906675307841866, "grad_norm": 0.9149320125579834, "learning_rate": 3.177969772879645e-06, "loss": 0.1138, "step": 7966 }, { "epoch": 1.2908295528191833, "grad_norm": 0.8161713480949402, "learning_rate": 3.1775488457579756e-06, "loss": 0.1041, "step": 7967 }, { "epoch": 1.2909915748541803, "grad_norm": 0.8028397560119629, "learning_rate": 3.1771278979049496e-06, "loss": 0.0971, "step": 7968 }, { "epoch": 1.291153596889177, "grad_norm": 0.7893111109733582, "learning_rate": 3.1767069293334502e-06, "loss": 0.0984, "step": 7969 }, { "epoch": 1.2913156189241737, "grad_norm": 0.9668081402778625, "learning_rate": 3.176285940056355e-06, "loss": 0.1123, "step": 7970 }, { "epoch": 1.2914776409591704, "grad_norm": 0.896835207939148, "learning_rate": 3.1758649300865473e-06, "loss": 0.1062, "step": 7971 }, { "epoch": 1.291639662994167, "grad_norm": 0.8189420700073242, "learning_rate": 3.1754438994369087e-06, "loss": 0.1035, "step": 7972 }, { "epoch": 1.291801685029164, "grad_norm": 0.8541057109832764, "learning_rate": 3.1750228481203206e-06, "loss": 0.107, "step": 7973 }, { "epoch": 1.2919637070641607, "grad_norm": 0.7424408793449402, "learning_rate": 3.174601776149668e-06, "loss": 0.0974, "step": 7974 }, { "epoch": 1.2921257290991575, "grad_norm": 0.9748353958129883, "learning_rate": 3.174180683537832e-06, "loss": 0.1157, "step": 7975 }, { "epoch": 1.2922877511341542, "grad_norm": 0.9521284699440002, "learning_rate": 3.1737595702976996e-06, "loss": 0.1038, "step": 7976 }, { "epoch": 1.2924497731691509, "grad_norm": 0.9242678284645081, "learning_rate": 3.1733384364421536e-06, "loss": 0.1133, "step": 7977 }, { "epoch": 1.2926117952041478, "grad_norm": 0.8057260513305664, "learning_rate": 3.1729172819840825e-06, "loss": 0.0995, "step": 7978 }, { "epoch": 1.2927738172391445, "grad_norm": 0.7817633152008057, "learning_rate": 3.17249610693637e-06, "loss": 0.1035, "step": 7979 }, { "epoch": 1.2929358392741412, "grad_norm": 0.8198245763778687, "learning_rate": 3.1720749113119045e-06, "loss": 0.1063, "step": 7980 }, { "epoch": 1.2930978613091382, "grad_norm": 0.7432113885879517, "learning_rate": 3.1716536951235727e-06, "loss": 0.0965, "step": 7981 }, { "epoch": 1.2932598833441347, "grad_norm": 0.7126169800758362, "learning_rate": 3.1712324583842637e-06, "loss": 0.0963, "step": 7982 }, { "epoch": 1.2934219053791316, "grad_norm": 0.7479450702667236, "learning_rate": 3.1708112011068647e-06, "loss": 0.1006, "step": 7983 }, { "epoch": 1.2935839274141283, "grad_norm": 0.8944701552391052, "learning_rate": 3.1703899233042675e-06, "loss": 0.1105, "step": 7984 }, { "epoch": 1.293745949449125, "grad_norm": 0.8011032938957214, "learning_rate": 3.1699686249893614e-06, "loss": 0.1094, "step": 7985 }, { "epoch": 1.293907971484122, "grad_norm": 0.7931639552116394, "learning_rate": 3.1695473061750353e-06, "loss": 0.1034, "step": 7986 }, { "epoch": 1.2940699935191187, "grad_norm": 0.8425852060317993, "learning_rate": 3.1691259668741823e-06, "loss": 0.0978, "step": 7987 }, { "epoch": 1.2942320155541154, "grad_norm": 0.7550665736198425, "learning_rate": 3.1687046070996942e-06, "loss": 0.0942, "step": 7988 }, { "epoch": 1.294394037589112, "grad_norm": 0.8623692393302917, "learning_rate": 3.168283226864463e-06, "loss": 0.111, "step": 7989 }, { "epoch": 1.2945560596241088, "grad_norm": 0.8119961619377136, "learning_rate": 3.1678618261813828e-06, "loss": 0.1057, "step": 7990 }, { "epoch": 1.2947180816591057, "grad_norm": 0.9069503545761108, "learning_rate": 3.1674404050633465e-06, "loss": 0.1106, "step": 7991 }, { "epoch": 1.2948801036941024, "grad_norm": 0.716511070728302, "learning_rate": 3.167018963523249e-06, "loss": 0.0845, "step": 7992 }, { "epoch": 1.2950421257290992, "grad_norm": 0.8659084439277649, "learning_rate": 3.166597501573986e-06, "loss": 0.1073, "step": 7993 }, { "epoch": 1.2952041477640959, "grad_norm": 0.9604227542877197, "learning_rate": 3.1661760192284518e-06, "loss": 0.1231, "step": 7994 }, { "epoch": 1.2953661697990926, "grad_norm": 0.869018018245697, "learning_rate": 3.165754516499544e-06, "loss": 0.0979, "step": 7995 }, { "epoch": 1.2955281918340895, "grad_norm": 0.8611443638801575, "learning_rate": 3.165332993400159e-06, "loss": 0.1135, "step": 7996 }, { "epoch": 1.2956902138690862, "grad_norm": 0.8204544186592102, "learning_rate": 3.1649114499431944e-06, "loss": 0.108, "step": 7997 }, { "epoch": 1.295852235904083, "grad_norm": 0.9157761931419373, "learning_rate": 3.1644898861415484e-06, "loss": 0.1186, "step": 7998 }, { "epoch": 1.2960142579390797, "grad_norm": 0.8155969381332397, "learning_rate": 3.1640683020081196e-06, "loss": 0.1094, "step": 7999 }, { "epoch": 1.2961762799740764, "grad_norm": 0.9700890779495239, "learning_rate": 3.163646697555809e-06, "loss": 0.1176, "step": 8000 }, { "epoch": 1.2963383020090733, "grad_norm": 0.9052625894546509, "learning_rate": 3.163225072797514e-06, "loss": 0.1113, "step": 8001 }, { "epoch": 1.29650032404407, "grad_norm": 0.8415457010269165, "learning_rate": 3.1628034277461376e-06, "loss": 0.103, "step": 8002 }, { "epoch": 1.2966623460790667, "grad_norm": 0.9100468158721924, "learning_rate": 3.1623817624145804e-06, "loss": 0.1048, "step": 8003 }, { "epoch": 1.2968243681140634, "grad_norm": 0.8173115849494934, "learning_rate": 3.161960076815743e-06, "loss": 0.0973, "step": 8004 }, { "epoch": 1.2969863901490601, "grad_norm": 0.8509911298751831, "learning_rate": 3.1615383709625303e-06, "loss": 0.1052, "step": 8005 }, { "epoch": 1.297148412184057, "grad_norm": 0.8229318261146545, "learning_rate": 3.1611166448678445e-06, "loss": 0.1083, "step": 8006 }, { "epoch": 1.2973104342190538, "grad_norm": 0.7618302702903748, "learning_rate": 3.1606948985445884e-06, "loss": 0.1004, "step": 8007 }, { "epoch": 1.2974724562540505, "grad_norm": 0.7803520560264587, "learning_rate": 3.1602731320056675e-06, "loss": 0.0968, "step": 8008 }, { "epoch": 1.2976344782890474, "grad_norm": 1.071578860282898, "learning_rate": 3.1598513452639867e-06, "loss": 0.1186, "step": 8009 }, { "epoch": 1.2977965003240441, "grad_norm": 0.9254783987998962, "learning_rate": 3.159429538332452e-06, "loss": 0.1081, "step": 8010 }, { "epoch": 1.2979585223590409, "grad_norm": 0.8896194100379944, "learning_rate": 3.1590077112239685e-06, "loss": 0.1153, "step": 8011 }, { "epoch": 1.2981205443940376, "grad_norm": 0.8195780515670776, "learning_rate": 3.1585858639514444e-06, "loss": 0.1114, "step": 8012 }, { "epoch": 1.2982825664290343, "grad_norm": 0.869594931602478, "learning_rate": 3.158163996527786e-06, "loss": 0.104, "step": 8013 }, { "epoch": 1.2984445884640312, "grad_norm": 0.9179608225822449, "learning_rate": 3.1577421089659023e-06, "loss": 0.1009, "step": 8014 }, { "epoch": 1.298606610499028, "grad_norm": 0.8011884689331055, "learning_rate": 3.157320201278702e-06, "loss": 0.0954, "step": 8015 }, { "epoch": 1.2987686325340246, "grad_norm": 0.8461195826530457, "learning_rate": 3.1568982734790943e-06, "loss": 0.108, "step": 8016 }, { "epoch": 1.2989306545690213, "grad_norm": 0.7561697959899902, "learning_rate": 3.1564763255799886e-06, "loss": 0.0965, "step": 8017 }, { "epoch": 1.299092676604018, "grad_norm": 0.8390095233917236, "learning_rate": 3.1560543575942958e-06, "loss": 0.1127, "step": 8018 }, { "epoch": 1.299254698639015, "grad_norm": 0.9161712527275085, "learning_rate": 3.155632369534928e-06, "loss": 0.1163, "step": 8019 }, { "epoch": 1.2994167206740117, "grad_norm": 0.8636978268623352, "learning_rate": 3.1552103614147955e-06, "loss": 0.1006, "step": 8020 }, { "epoch": 1.2995787427090084, "grad_norm": 0.7662655115127563, "learning_rate": 3.154788333246812e-06, "loss": 0.093, "step": 8021 }, { "epoch": 1.2997407647440051, "grad_norm": 0.7853266000747681, "learning_rate": 3.1543662850438905e-06, "loss": 0.0991, "step": 8022 }, { "epoch": 1.2999027867790018, "grad_norm": 0.8680589199066162, "learning_rate": 3.153944216818943e-06, "loss": 0.1214, "step": 8023 }, { "epoch": 1.3000648088139988, "grad_norm": 0.7828645706176758, "learning_rate": 3.1535221285848866e-06, "loss": 0.1039, "step": 8024 }, { "epoch": 1.3002268308489955, "grad_norm": 0.8467226624488831, "learning_rate": 3.1531000203546336e-06, "loss": 0.1025, "step": 8025 }, { "epoch": 1.3003888528839922, "grad_norm": 0.8064707517623901, "learning_rate": 3.1526778921411006e-06, "loss": 0.1102, "step": 8026 }, { "epoch": 1.300550874918989, "grad_norm": 0.8857673406600952, "learning_rate": 3.152255743957203e-06, "loss": 0.1082, "step": 8027 }, { "epoch": 1.3007128969539856, "grad_norm": 0.7488471865653992, "learning_rate": 3.151833575815859e-06, "loss": 0.1013, "step": 8028 }, { "epoch": 1.3008749189889826, "grad_norm": 0.7051052451133728, "learning_rate": 3.1514113877299844e-06, "loss": 0.087, "step": 8029 }, { "epoch": 1.3010369410239793, "grad_norm": 0.8191677927970886, "learning_rate": 3.1509891797124977e-06, "loss": 0.1059, "step": 8030 }, { "epoch": 1.301198963058976, "grad_norm": 0.8188061118125916, "learning_rate": 3.150566951776318e-06, "loss": 0.1143, "step": 8031 }, { "epoch": 1.301360985093973, "grad_norm": 0.9264690279960632, "learning_rate": 3.150144703934363e-06, "loss": 0.1195, "step": 8032 }, { "epoch": 1.3015230071289696, "grad_norm": 0.786266565322876, "learning_rate": 3.1497224361995544e-06, "loss": 0.1015, "step": 8033 }, { "epoch": 1.3016850291639663, "grad_norm": 0.9195615649223328, "learning_rate": 3.149300148584811e-06, "loss": 0.1219, "step": 8034 }, { "epoch": 1.301847051198963, "grad_norm": 0.8377419114112854, "learning_rate": 3.1488778411030547e-06, "loss": 0.1083, "step": 8035 }, { "epoch": 1.3020090732339598, "grad_norm": 0.7512086629867554, "learning_rate": 3.1484555137672063e-06, "loss": 0.0884, "step": 8036 }, { "epoch": 1.3021710952689567, "grad_norm": 0.7718841433525085, "learning_rate": 3.148033166590188e-06, "loss": 0.0941, "step": 8037 }, { "epoch": 1.3023331173039534, "grad_norm": 0.7764375805854797, "learning_rate": 3.147610799584924e-06, "loss": 0.1025, "step": 8038 }, { "epoch": 1.3024951393389501, "grad_norm": 0.9073190689086914, "learning_rate": 3.147188412764336e-06, "loss": 0.1038, "step": 8039 }, { "epoch": 1.3026571613739468, "grad_norm": 0.794582724571228, "learning_rate": 3.1467660061413497e-06, "loss": 0.0986, "step": 8040 }, { "epoch": 1.3028191834089435, "grad_norm": 0.8445225358009338, "learning_rate": 3.1463435797288876e-06, "loss": 0.11, "step": 8041 }, { "epoch": 1.3029812054439405, "grad_norm": 0.8097845315933228, "learning_rate": 3.1459211335398765e-06, "loss": 0.1001, "step": 8042 }, { "epoch": 1.3031432274789372, "grad_norm": 0.8664547801017761, "learning_rate": 3.1454986675872417e-06, "loss": 0.1093, "step": 8043 }, { "epoch": 1.303305249513934, "grad_norm": 0.9166635274887085, "learning_rate": 3.14507618188391e-06, "loss": 0.1105, "step": 8044 }, { "epoch": 1.3034672715489306, "grad_norm": 0.8189054727554321, "learning_rate": 3.1446536764428083e-06, "loss": 0.1063, "step": 8045 }, { "epoch": 1.3036292935839273, "grad_norm": 0.8049901723861694, "learning_rate": 3.144231151276864e-06, "loss": 0.1105, "step": 8046 }, { "epoch": 1.3037913156189243, "grad_norm": 0.8540433049201965, "learning_rate": 3.1438086063990054e-06, "loss": 0.1084, "step": 8047 }, { "epoch": 1.303953337653921, "grad_norm": 0.8205851912498474, "learning_rate": 3.143386041822162e-06, "loss": 0.0978, "step": 8048 }, { "epoch": 1.3041153596889177, "grad_norm": 0.9036335945129395, "learning_rate": 3.1429634575592617e-06, "loss": 0.1028, "step": 8049 }, { "epoch": 1.3042773817239144, "grad_norm": 0.8568320870399475, "learning_rate": 3.142540853623236e-06, "loss": 0.1099, "step": 8050 }, { "epoch": 1.304439403758911, "grad_norm": 0.9346566796302795, "learning_rate": 3.1421182300270146e-06, "loss": 0.1128, "step": 8051 }, { "epoch": 1.304601425793908, "grad_norm": 0.8423227667808533, "learning_rate": 3.14169558678353e-06, "loss": 0.1033, "step": 8052 }, { "epoch": 1.3047634478289047, "grad_norm": 0.8017847537994385, "learning_rate": 3.1412729239057133e-06, "loss": 0.0991, "step": 8053 }, { "epoch": 1.3049254698639015, "grad_norm": 0.9350404739379883, "learning_rate": 3.1408502414064963e-06, "loss": 0.1108, "step": 8054 }, { "epoch": 1.3050874918988984, "grad_norm": 0.7774401307106018, "learning_rate": 3.140427539298814e-06, "loss": 0.0955, "step": 8055 }, { "epoch": 1.3052495139338949, "grad_norm": 0.7763550281524658, "learning_rate": 3.140004817595597e-06, "loss": 0.0892, "step": 8056 }, { "epoch": 1.3054115359688918, "grad_norm": 0.8174300193786621, "learning_rate": 3.139582076309783e-06, "loss": 0.1017, "step": 8057 }, { "epoch": 1.3055735580038885, "grad_norm": 0.7830010056495667, "learning_rate": 3.1391593154543043e-06, "loss": 0.0931, "step": 8058 }, { "epoch": 1.3057355800388852, "grad_norm": 0.8983414769172668, "learning_rate": 3.1387365350420973e-06, "loss": 0.1045, "step": 8059 }, { "epoch": 1.3058976020738822, "grad_norm": 0.8986459374427795, "learning_rate": 3.138313735086099e-06, "loss": 0.1145, "step": 8060 }, { "epoch": 1.3060596241088789, "grad_norm": 0.924191415309906, "learning_rate": 3.137890915599243e-06, "loss": 0.1091, "step": 8061 }, { "epoch": 1.3062216461438756, "grad_norm": 0.9674059152603149, "learning_rate": 3.137468076594471e-06, "loss": 0.1148, "step": 8062 }, { "epoch": 1.3063836681788723, "grad_norm": 0.8017002940177917, "learning_rate": 3.1370452180847165e-06, "loss": 0.108, "step": 8063 }, { "epoch": 1.306545690213869, "grad_norm": 0.8276110291481018, "learning_rate": 3.1366223400829215e-06, "loss": 0.1081, "step": 8064 }, { "epoch": 1.306707712248866, "grad_norm": 0.8368842005729675, "learning_rate": 3.136199442602023e-06, "loss": 0.1065, "step": 8065 }, { "epoch": 1.3068697342838627, "grad_norm": 0.8028512597084045, "learning_rate": 3.135776525654961e-06, "loss": 0.0926, "step": 8066 }, { "epoch": 1.3070317563188594, "grad_norm": 0.7708948850631714, "learning_rate": 3.135353589254676e-06, "loss": 0.0956, "step": 8067 }, { "epoch": 1.307193778353856, "grad_norm": 0.7509301900863647, "learning_rate": 3.1349306334141084e-06, "loss": 0.1024, "step": 8068 }, { "epoch": 1.3073558003888528, "grad_norm": 0.8279680609703064, "learning_rate": 3.1345076581462007e-06, "loss": 0.1069, "step": 8069 }, { "epoch": 1.3075178224238497, "grad_norm": 0.763145387172699, "learning_rate": 3.134084663463894e-06, "loss": 0.097, "step": 8070 }, { "epoch": 1.3076798444588464, "grad_norm": 0.8883261680603027, "learning_rate": 3.1336616493801305e-06, "loss": 0.1074, "step": 8071 }, { "epoch": 1.3078418664938432, "grad_norm": 0.9554191827774048, "learning_rate": 3.1332386159078536e-06, "loss": 0.1057, "step": 8072 }, { "epoch": 1.3080038885288399, "grad_norm": 0.9759199023246765, "learning_rate": 3.132815563060008e-06, "loss": 0.1251, "step": 8073 }, { "epoch": 1.3081659105638366, "grad_norm": 0.7938733696937561, "learning_rate": 3.132392490849537e-06, "loss": 0.0819, "step": 8074 }, { "epoch": 1.3083279325988335, "grad_norm": 0.7426988482475281, "learning_rate": 3.1319693992893874e-06, "loss": 0.089, "step": 8075 }, { "epoch": 1.3084899546338302, "grad_norm": 0.8131446838378906, "learning_rate": 3.1315462883925026e-06, "loss": 0.0958, "step": 8076 }, { "epoch": 1.308651976668827, "grad_norm": 0.9161220192909241, "learning_rate": 3.1311231581718303e-06, "loss": 0.1206, "step": 8077 }, { "epoch": 1.3088139987038236, "grad_norm": 0.834787905216217, "learning_rate": 3.1307000086403162e-06, "loss": 0.1093, "step": 8078 }, { "epoch": 1.3089760207388204, "grad_norm": 1.0066626071929932, "learning_rate": 3.1302768398109077e-06, "loss": 0.1017, "step": 8079 }, { "epoch": 1.3091380427738173, "grad_norm": 0.7865386605262756, "learning_rate": 3.1298536516965537e-06, "loss": 0.1025, "step": 8080 }, { "epoch": 1.309300064808814, "grad_norm": 0.8171322345733643, "learning_rate": 3.129430444310202e-06, "loss": 0.1062, "step": 8081 }, { "epoch": 1.3094620868438107, "grad_norm": 0.850459635257721, "learning_rate": 3.129007217664802e-06, "loss": 0.107, "step": 8082 }, { "epoch": 1.3096241088788076, "grad_norm": 0.8040875792503357, "learning_rate": 3.128583971773303e-06, "loss": 0.1006, "step": 8083 }, { "epoch": 1.3097861309138044, "grad_norm": 0.7739570736885071, "learning_rate": 3.1281607066486565e-06, "loss": 0.0991, "step": 8084 }, { "epoch": 1.309948152948801, "grad_norm": 0.7959015369415283, "learning_rate": 3.127737422303811e-06, "loss": 0.097, "step": 8085 }, { "epoch": 1.3101101749837978, "grad_norm": 0.8150349259376526, "learning_rate": 3.127314118751721e-06, "loss": 0.0953, "step": 8086 }, { "epoch": 1.3102721970187945, "grad_norm": 0.8278794884681702, "learning_rate": 3.1268907960053356e-06, "loss": 0.0999, "step": 8087 }, { "epoch": 1.3104342190537914, "grad_norm": 0.7229242324829102, "learning_rate": 3.12646745407761e-06, "loss": 0.0914, "step": 8088 }, { "epoch": 1.3105962410887881, "grad_norm": 0.9541682004928589, "learning_rate": 3.126044092981496e-06, "loss": 0.1125, "step": 8089 }, { "epoch": 1.3107582631237849, "grad_norm": 0.8512731790542603, "learning_rate": 3.1256207127299475e-06, "loss": 0.1065, "step": 8090 }, { "epoch": 1.3109202851587816, "grad_norm": 0.9198841452598572, "learning_rate": 3.12519731333592e-06, "loss": 0.1117, "step": 8091 }, { "epoch": 1.3110823071937783, "grad_norm": 0.7064549326896667, "learning_rate": 3.124773894812367e-06, "loss": 0.0941, "step": 8092 }, { "epoch": 1.3112443292287752, "grad_norm": 0.806461751461029, "learning_rate": 3.124350457172245e-06, "loss": 0.0987, "step": 8093 }, { "epoch": 1.311406351263772, "grad_norm": 0.7722499370574951, "learning_rate": 3.123927000428509e-06, "loss": 0.1029, "step": 8094 }, { "epoch": 1.3115683732987686, "grad_norm": 0.9062454104423523, "learning_rate": 3.123503524594118e-06, "loss": 0.1205, "step": 8095 }, { "epoch": 1.3117303953337653, "grad_norm": 0.9097535610198975, "learning_rate": 3.123080029682027e-06, "loss": 0.1028, "step": 8096 }, { "epoch": 1.311892417368762, "grad_norm": 0.9619364738464355, "learning_rate": 3.1226565157051953e-06, "loss": 0.116, "step": 8097 }, { "epoch": 1.312054439403759, "grad_norm": 0.845770001411438, "learning_rate": 3.1222329826765806e-06, "loss": 0.1112, "step": 8098 }, { "epoch": 1.3122164614387557, "grad_norm": 0.7698118686676025, "learning_rate": 3.121809430609143e-06, "loss": 0.101, "step": 8099 }, { "epoch": 1.3123784834737524, "grad_norm": 0.9154854416847229, "learning_rate": 3.121385859515842e-06, "loss": 0.101, "step": 8100 }, { "epoch": 1.3125405055087491, "grad_norm": 0.9142758250236511, "learning_rate": 3.1209622694096362e-06, "loss": 0.1169, "step": 8101 }, { "epoch": 1.3127025275437458, "grad_norm": 0.8378267288208008, "learning_rate": 3.1205386603034886e-06, "loss": 0.1112, "step": 8102 }, { "epoch": 1.3128645495787428, "grad_norm": 0.8595819473266602, "learning_rate": 3.1201150322103593e-06, "loss": 0.1037, "step": 8103 }, { "epoch": 1.3130265716137395, "grad_norm": 0.9312635660171509, "learning_rate": 3.1196913851432108e-06, "loss": 0.1021, "step": 8104 }, { "epoch": 1.3131885936487362, "grad_norm": 0.7517058253288269, "learning_rate": 3.119267719115005e-06, "loss": 0.0987, "step": 8105 }, { "epoch": 1.3133506156837331, "grad_norm": 0.7930306196212769, "learning_rate": 3.1188440341387063e-06, "loss": 0.0988, "step": 8106 }, { "epoch": 1.3135126377187296, "grad_norm": 0.692436933517456, "learning_rate": 3.1184203302272775e-06, "loss": 0.0828, "step": 8107 }, { "epoch": 1.3136746597537265, "grad_norm": 0.7574872970581055, "learning_rate": 3.1179966073936837e-06, "loss": 0.0962, "step": 8108 }, { "epoch": 1.3138366817887233, "grad_norm": 0.758668065071106, "learning_rate": 3.1175728656508874e-06, "loss": 0.1004, "step": 8109 }, { "epoch": 1.31399870382372, "grad_norm": 0.7493375539779663, "learning_rate": 3.117149105011858e-06, "loss": 0.0977, "step": 8110 }, { "epoch": 1.314160725858717, "grad_norm": 0.7759285569190979, "learning_rate": 3.1167253254895584e-06, "loss": 0.1007, "step": 8111 }, { "epoch": 1.3143227478937136, "grad_norm": 0.8017150163650513, "learning_rate": 3.1163015270969567e-06, "loss": 0.101, "step": 8112 }, { "epoch": 1.3144847699287103, "grad_norm": 0.9174550771713257, "learning_rate": 3.1158777098470194e-06, "loss": 0.1181, "step": 8113 }, { "epoch": 1.314646791963707, "grad_norm": 1.0454412698745728, "learning_rate": 3.115453873752714e-06, "loss": 0.1151, "step": 8114 }, { "epoch": 1.3148088139987038, "grad_norm": 0.9942188262939453, "learning_rate": 3.115030018827011e-06, "loss": 0.1252, "step": 8115 }, { "epoch": 1.3149708360337007, "grad_norm": 0.8930821418762207, "learning_rate": 3.114606145082876e-06, "loss": 0.1013, "step": 8116 }, { "epoch": 1.3151328580686974, "grad_norm": 0.8020307421684265, "learning_rate": 3.1141822525332815e-06, "loss": 0.0981, "step": 8117 }, { "epoch": 1.315294880103694, "grad_norm": 0.9933139681816101, "learning_rate": 3.1137583411911954e-06, "loss": 0.1177, "step": 8118 }, { "epoch": 1.3154569021386908, "grad_norm": 0.9036347270011902, "learning_rate": 3.113334411069591e-06, "loss": 0.1096, "step": 8119 }, { "epoch": 1.3156189241736875, "grad_norm": 0.874993622303009, "learning_rate": 3.1129104621814365e-06, "loss": 0.1011, "step": 8120 }, { "epoch": 1.3157809462086845, "grad_norm": 0.870242178440094, "learning_rate": 3.112486494539705e-06, "loss": 0.1122, "step": 8121 }, { "epoch": 1.3159429682436812, "grad_norm": 0.7542531490325928, "learning_rate": 3.1120625081573696e-06, "loss": 0.0923, "step": 8122 }, { "epoch": 1.3161049902786779, "grad_norm": 0.8048637509346008, "learning_rate": 3.111638503047402e-06, "loss": 0.0955, "step": 8123 }, { "epoch": 1.3162670123136746, "grad_norm": 0.7695650458335876, "learning_rate": 3.1112144792227774e-06, "loss": 0.0995, "step": 8124 }, { "epoch": 1.3164290343486713, "grad_norm": 0.8860523700714111, "learning_rate": 3.110790436696468e-06, "loss": 0.113, "step": 8125 }, { "epoch": 1.3165910563836682, "grad_norm": 0.7619615197181702, "learning_rate": 3.1103663754814493e-06, "loss": 0.1018, "step": 8126 }, { "epoch": 1.316753078418665, "grad_norm": 1.0961564779281616, "learning_rate": 3.1099422955906965e-06, "loss": 0.1428, "step": 8127 }, { "epoch": 1.3169151004536617, "grad_norm": 0.8678660988807678, "learning_rate": 3.109518197037186e-06, "loss": 0.1052, "step": 8128 }, { "epoch": 1.3170771224886584, "grad_norm": 0.8276517987251282, "learning_rate": 3.109094079833893e-06, "loss": 0.0979, "step": 8129 }, { "epoch": 1.317239144523655, "grad_norm": 0.8221682906150818, "learning_rate": 3.1086699439937957e-06, "loss": 0.101, "step": 8130 }, { "epoch": 1.317401166558652, "grad_norm": 0.7636736631393433, "learning_rate": 3.1082457895298705e-06, "loss": 0.0981, "step": 8131 }, { "epoch": 1.3175631885936487, "grad_norm": 0.7403510212898254, "learning_rate": 3.1078216164550966e-06, "loss": 0.1002, "step": 8132 }, { "epoch": 1.3177252106286454, "grad_norm": 0.9044564962387085, "learning_rate": 3.1073974247824523e-06, "loss": 0.1244, "step": 8133 }, { "epoch": 1.3178872326636424, "grad_norm": 0.7922056913375854, "learning_rate": 3.1069732145249166e-06, "loss": 0.1003, "step": 8134 }, { "epoch": 1.318049254698639, "grad_norm": 0.8038402795791626, "learning_rate": 3.106548985695469e-06, "loss": 0.1019, "step": 8135 }, { "epoch": 1.3182112767336358, "grad_norm": 0.7442052364349365, "learning_rate": 3.1061247383070905e-06, "loss": 0.0865, "step": 8136 }, { "epoch": 1.3183732987686325, "grad_norm": 0.6717336177825928, "learning_rate": 3.105700472372762e-06, "loss": 0.0862, "step": 8137 }, { "epoch": 1.3185353208036292, "grad_norm": 0.7845094799995422, "learning_rate": 3.1052761879054637e-06, "loss": 0.1059, "step": 8138 }, { "epoch": 1.3186973428386262, "grad_norm": 0.9265906810760498, "learning_rate": 3.1048518849181795e-06, "loss": 0.1059, "step": 8139 }, { "epoch": 1.3188593648736229, "grad_norm": 0.7679669260978699, "learning_rate": 3.1044275634238913e-06, "loss": 0.097, "step": 8140 }, { "epoch": 1.3190213869086196, "grad_norm": 0.7996553182601929, "learning_rate": 3.1040032234355827e-06, "loss": 0.0952, "step": 8141 }, { "epoch": 1.3191834089436163, "grad_norm": 0.8960670232772827, "learning_rate": 3.103578864966237e-06, "loss": 0.1098, "step": 8142 }, { "epoch": 1.319345430978613, "grad_norm": 0.8924859166145325, "learning_rate": 3.1031544880288384e-06, "loss": 0.1093, "step": 8143 }, { "epoch": 1.31950745301361, "grad_norm": 0.874876856803894, "learning_rate": 3.1027300926363723e-06, "loss": 0.1064, "step": 8144 }, { "epoch": 1.3196694750486067, "grad_norm": 1.0655916929244995, "learning_rate": 3.1023056788018234e-06, "loss": 0.1288, "step": 8145 }, { "epoch": 1.3198314970836034, "grad_norm": 0.8831046223640442, "learning_rate": 3.1018812465381796e-06, "loss": 0.1017, "step": 8146 }, { "epoch": 1.3199935191186, "grad_norm": 0.7947402596473694, "learning_rate": 3.1014567958584246e-06, "loss": 0.0977, "step": 8147 }, { "epoch": 1.3201555411535968, "grad_norm": 0.945785641670227, "learning_rate": 3.1010323267755486e-06, "loss": 0.1148, "step": 8148 }, { "epoch": 1.3203175631885937, "grad_norm": 0.8066475987434387, "learning_rate": 3.1006078393025366e-06, "loss": 0.1053, "step": 8149 }, { "epoch": 1.3204795852235904, "grad_norm": 0.8218502998352051, "learning_rate": 3.100183333452379e-06, "loss": 0.0961, "step": 8150 }, { "epoch": 1.3206416072585871, "grad_norm": 0.7845996618270874, "learning_rate": 3.0997588092380636e-06, "loss": 0.1035, "step": 8151 }, { "epoch": 1.3208036292935839, "grad_norm": 0.7798967957496643, "learning_rate": 3.0993342666725803e-06, "loss": 0.095, "step": 8152 }, { "epoch": 1.3209656513285806, "grad_norm": 0.8794483542442322, "learning_rate": 3.0989097057689175e-06, "loss": 0.1131, "step": 8153 }, { "epoch": 1.3211276733635775, "grad_norm": 0.8552145957946777, "learning_rate": 3.0984851265400683e-06, "loss": 0.0966, "step": 8154 }, { "epoch": 1.3212896953985742, "grad_norm": 0.8511651158332825, "learning_rate": 3.098060528999023e-06, "loss": 0.102, "step": 8155 }, { "epoch": 1.321451717433571, "grad_norm": 0.8631334900856018, "learning_rate": 3.097635913158772e-06, "loss": 0.1088, "step": 8156 }, { "epoch": 1.3216137394685679, "grad_norm": 0.6932099461555481, "learning_rate": 3.0972112790323076e-06, "loss": 0.0885, "step": 8157 }, { "epoch": 1.3217757615035644, "grad_norm": 0.8453887104988098, "learning_rate": 3.096786626632624e-06, "loss": 0.111, "step": 8158 }, { "epoch": 1.3219377835385613, "grad_norm": 0.8994348049163818, "learning_rate": 3.0963619559727143e-06, "loss": 0.1123, "step": 8159 }, { "epoch": 1.322099805573558, "grad_norm": 0.869174599647522, "learning_rate": 3.0959372670655714e-06, "loss": 0.1109, "step": 8160 }, { "epoch": 1.3222618276085547, "grad_norm": 0.7786162495613098, "learning_rate": 3.09551255992419e-06, "loss": 0.0986, "step": 8161 }, { "epoch": 1.3224238496435516, "grad_norm": 0.8360916972160339, "learning_rate": 3.0950878345615654e-06, "loss": 0.1046, "step": 8162 }, { "epoch": 1.3225858716785484, "grad_norm": 0.8206416368484497, "learning_rate": 3.0946630909906943e-06, "loss": 0.1086, "step": 8163 }, { "epoch": 1.322747893713545, "grad_norm": 0.904058575630188, "learning_rate": 3.0942383292245704e-06, "loss": 0.1145, "step": 8164 }, { "epoch": 1.3229099157485418, "grad_norm": 0.8536134362220764, "learning_rate": 3.0938135492761923e-06, "loss": 0.1122, "step": 8165 }, { "epoch": 1.3230719377835385, "grad_norm": 0.7881593108177185, "learning_rate": 3.0933887511585564e-06, "loss": 0.0954, "step": 8166 }, { "epoch": 1.3232339598185354, "grad_norm": 0.8875266909599304, "learning_rate": 3.0929639348846604e-06, "loss": 0.1132, "step": 8167 }, { "epoch": 1.3233959818535321, "grad_norm": 0.8193500638008118, "learning_rate": 3.0925391004675037e-06, "loss": 0.1013, "step": 8168 }, { "epoch": 1.3235580038885288, "grad_norm": 0.8682994842529297, "learning_rate": 3.0921142479200833e-06, "loss": 0.1098, "step": 8169 }, { "epoch": 1.3237200259235256, "grad_norm": 0.7923477292060852, "learning_rate": 3.0916893772554006e-06, "loss": 0.0961, "step": 8170 }, { "epoch": 1.3238820479585223, "grad_norm": 0.8617781400680542, "learning_rate": 3.0912644884864547e-06, "loss": 0.1053, "step": 8171 }, { "epoch": 1.3240440699935192, "grad_norm": 0.7619832158088684, "learning_rate": 3.0908395816262466e-06, "loss": 0.1016, "step": 8172 }, { "epoch": 1.324206092028516, "grad_norm": 0.8054799437522888, "learning_rate": 3.0904146566877762e-06, "loss": 0.1082, "step": 8173 }, { "epoch": 1.3243681140635126, "grad_norm": 0.8365015387535095, "learning_rate": 3.0899897136840468e-06, "loss": 0.1075, "step": 8174 }, { "epoch": 1.3245301360985093, "grad_norm": 0.753804087638855, "learning_rate": 3.0895647526280598e-06, "loss": 0.0947, "step": 8175 }, { "epoch": 1.324692158133506, "grad_norm": 0.9248984456062317, "learning_rate": 3.0891397735328176e-06, "loss": 0.1242, "step": 8176 }, { "epoch": 1.324854180168503, "grad_norm": 0.7784146070480347, "learning_rate": 3.088714776411325e-06, "loss": 0.1127, "step": 8177 }, { "epoch": 1.3250162022034997, "grad_norm": 0.8793583512306213, "learning_rate": 3.088289761276584e-06, "loss": 0.09, "step": 8178 }, { "epoch": 1.3251782242384964, "grad_norm": 0.7790801525115967, "learning_rate": 3.0878647281416007e-06, "loss": 0.0928, "step": 8179 }, { "epoch": 1.3253402462734931, "grad_norm": 0.8331507444381714, "learning_rate": 3.0874396770193785e-06, "loss": 0.1003, "step": 8180 }, { "epoch": 1.3255022683084898, "grad_norm": 0.8366468548774719, "learning_rate": 3.0870146079229245e-06, "loss": 0.1221, "step": 8181 }, { "epoch": 1.3256642903434868, "grad_norm": 0.8266817331314087, "learning_rate": 3.0865895208652436e-06, "loss": 0.1096, "step": 8182 }, { "epoch": 1.3258263123784835, "grad_norm": 0.8993229866027832, "learning_rate": 3.086164415859343e-06, "loss": 0.1211, "step": 8183 }, { "epoch": 1.3259883344134802, "grad_norm": 0.8876615762710571, "learning_rate": 3.0857392929182296e-06, "loss": 0.1092, "step": 8184 }, { "epoch": 1.3261503564484771, "grad_norm": 0.7923975586891174, "learning_rate": 3.085314152054911e-06, "loss": 0.1, "step": 8185 }, { "epoch": 1.3263123784834738, "grad_norm": 0.7979410886764526, "learning_rate": 3.084888993282397e-06, "loss": 0.1007, "step": 8186 }, { "epoch": 1.3264744005184705, "grad_norm": 0.9142311215400696, "learning_rate": 3.0844638166136943e-06, "loss": 0.1154, "step": 8187 }, { "epoch": 1.3266364225534673, "grad_norm": 0.8483611941337585, "learning_rate": 3.0840386220618137e-06, "loss": 0.1061, "step": 8188 }, { "epoch": 1.326798444588464, "grad_norm": 0.9049779772758484, "learning_rate": 3.0836134096397642e-06, "loss": 0.1039, "step": 8189 }, { "epoch": 1.326960466623461, "grad_norm": 0.8768754601478577, "learning_rate": 3.083188179360556e-06, "loss": 0.1141, "step": 8190 }, { "epoch": 1.3271224886584576, "grad_norm": 0.7958570718765259, "learning_rate": 3.082762931237202e-06, "loss": 0.1006, "step": 8191 }, { "epoch": 1.3272845106934543, "grad_norm": 0.8159347176551819, "learning_rate": 3.0823376652827123e-06, "loss": 0.1026, "step": 8192 }, { "epoch": 1.327446532728451, "grad_norm": 0.8997524976730347, "learning_rate": 3.081912381510099e-06, "loss": 0.1096, "step": 8193 }, { "epoch": 1.3276085547634477, "grad_norm": 0.8138853311538696, "learning_rate": 3.0814870799323748e-06, "loss": 0.1031, "step": 8194 }, { "epoch": 1.3277705767984447, "grad_norm": 0.8049913048744202, "learning_rate": 3.0810617605625538e-06, "loss": 0.0991, "step": 8195 }, { "epoch": 1.3279325988334414, "grad_norm": 0.7610349059104919, "learning_rate": 3.080636423413649e-06, "loss": 0.1038, "step": 8196 }, { "epoch": 1.328094620868438, "grad_norm": 0.8136640191078186, "learning_rate": 3.0802110684986742e-06, "loss": 0.101, "step": 8197 }, { "epoch": 1.3282566429034348, "grad_norm": 0.8479365706443787, "learning_rate": 3.079785695830645e-06, "loss": 0.1213, "step": 8198 }, { "epoch": 1.3284186649384315, "grad_norm": 0.8573151230812073, "learning_rate": 3.0793603054225767e-06, "loss": 0.1135, "step": 8199 }, { "epoch": 1.3285806869734285, "grad_norm": 0.8498213887214661, "learning_rate": 3.0789348972874844e-06, "loss": 0.1053, "step": 8200 }, { "epoch": 1.3287427090084252, "grad_norm": 0.764045774936676, "learning_rate": 3.078509471438386e-06, "loss": 0.1018, "step": 8201 }, { "epoch": 1.3289047310434219, "grad_norm": 0.7893896102905273, "learning_rate": 3.0780840278882974e-06, "loss": 0.0983, "step": 8202 }, { "epoch": 1.3290667530784186, "grad_norm": 0.7424217462539673, "learning_rate": 3.0776585666502367e-06, "loss": 0.093, "step": 8203 }, { "epoch": 1.3292287751134153, "grad_norm": 0.8020110726356506, "learning_rate": 3.077233087737222e-06, "loss": 0.1035, "step": 8204 }, { "epoch": 1.3293907971484122, "grad_norm": 0.7891095280647278, "learning_rate": 3.0768075911622712e-06, "loss": 0.1097, "step": 8205 }, { "epoch": 1.329552819183409, "grad_norm": 0.769567608833313, "learning_rate": 3.0763820769384038e-06, "loss": 0.0996, "step": 8206 }, { "epoch": 1.3297148412184057, "grad_norm": 0.8365778923034668, "learning_rate": 3.07595654507864e-06, "loss": 0.0994, "step": 8207 }, { "epoch": 1.3298768632534026, "grad_norm": 0.8200099468231201, "learning_rate": 3.0755309955960007e-06, "loss": 0.104, "step": 8208 }, { "epoch": 1.330038885288399, "grad_norm": 0.7496293783187866, "learning_rate": 3.0751054285035037e-06, "loss": 0.1011, "step": 8209 }, { "epoch": 1.330200907323396, "grad_norm": 0.9090161919593811, "learning_rate": 3.074679843814174e-06, "loss": 0.1109, "step": 8210 }, { "epoch": 1.3303629293583927, "grad_norm": 0.8106396198272705, "learning_rate": 3.0742542415410307e-06, "loss": 0.0995, "step": 8211 }, { "epoch": 1.3305249513933894, "grad_norm": 0.9422435164451599, "learning_rate": 3.073828621697098e-06, "loss": 0.1133, "step": 8212 }, { "epoch": 1.3306869734283864, "grad_norm": 0.8159134387969971, "learning_rate": 3.0734029842953976e-06, "loss": 0.1045, "step": 8213 }, { "epoch": 1.330848995463383, "grad_norm": 0.8325510621070862, "learning_rate": 3.072977329348954e-06, "loss": 0.1119, "step": 8214 }, { "epoch": 1.3310110174983798, "grad_norm": 0.8749605417251587, "learning_rate": 3.07255165687079e-06, "loss": 0.1091, "step": 8215 }, { "epoch": 1.3311730395333765, "grad_norm": 0.8472917079925537, "learning_rate": 3.072125966873932e-06, "loss": 0.11, "step": 8216 }, { "epoch": 1.3313350615683732, "grad_norm": 0.9041043519973755, "learning_rate": 3.0717002593714027e-06, "loss": 0.1122, "step": 8217 }, { "epoch": 1.3314970836033702, "grad_norm": 0.9191260933876038, "learning_rate": 3.0712745343762295e-06, "loss": 0.1157, "step": 8218 }, { "epoch": 1.3316591056383669, "grad_norm": 0.8039606809616089, "learning_rate": 3.070848791901438e-06, "loss": 0.1042, "step": 8219 }, { "epoch": 1.3318211276733636, "grad_norm": 0.8702144026756287, "learning_rate": 3.0704230319600547e-06, "loss": 0.1087, "step": 8220 }, { "epoch": 1.3319831497083603, "grad_norm": 0.7728626132011414, "learning_rate": 3.0699972545651067e-06, "loss": 0.0971, "step": 8221 }, { "epoch": 1.332145171743357, "grad_norm": 0.7619557976722717, "learning_rate": 3.069571459729623e-06, "loss": 0.092, "step": 8222 }, { "epoch": 1.332307193778354, "grad_norm": 0.7611822485923767, "learning_rate": 3.069145647466631e-06, "loss": 0.1038, "step": 8223 }, { "epoch": 1.3324692158133506, "grad_norm": 0.7991523146629333, "learning_rate": 3.068719817789158e-06, "loss": 0.1018, "step": 8224 }, { "epoch": 1.3326312378483474, "grad_norm": 0.7888767719268799, "learning_rate": 3.0682939707102366e-06, "loss": 0.0988, "step": 8225 }, { "epoch": 1.332793259883344, "grad_norm": 0.7734952569007874, "learning_rate": 3.067868106242894e-06, "loss": 0.0969, "step": 8226 }, { "epoch": 1.3329552819183408, "grad_norm": 0.848461925983429, "learning_rate": 3.0674422244001616e-06, "loss": 0.1006, "step": 8227 }, { "epoch": 1.3331173039533377, "grad_norm": 0.7484978437423706, "learning_rate": 3.0670163251950703e-06, "loss": 0.0981, "step": 8228 }, { "epoch": 1.3332793259883344, "grad_norm": 0.7978498935699463, "learning_rate": 3.0665904086406516e-06, "loss": 0.1004, "step": 8229 }, { "epoch": 1.3334413480233311, "grad_norm": 0.8219764828681946, "learning_rate": 3.0661644747499385e-06, "loss": 0.0978, "step": 8230 }, { "epoch": 1.3336033700583279, "grad_norm": 0.910052478313446, "learning_rate": 3.065738523535961e-06, "loss": 0.1144, "step": 8231 }, { "epoch": 1.3337653920933246, "grad_norm": 0.7298288941383362, "learning_rate": 3.0653125550117547e-06, "loss": 0.0957, "step": 8232 }, { "epoch": 1.3339274141283215, "grad_norm": 0.7882039546966553, "learning_rate": 3.064886569190352e-06, "loss": 0.101, "step": 8233 }, { "epoch": 1.3340894361633182, "grad_norm": 0.8351643085479736, "learning_rate": 3.0644605660847875e-06, "loss": 0.1027, "step": 8234 }, { "epoch": 1.334251458198315, "grad_norm": 0.8841098546981812, "learning_rate": 3.0640345457080955e-06, "loss": 0.1066, "step": 8235 }, { "epoch": 1.3344134802333119, "grad_norm": 0.9468151330947876, "learning_rate": 3.0636085080733113e-06, "loss": 0.1148, "step": 8236 }, { "epoch": 1.3345755022683086, "grad_norm": 0.7528395056724548, "learning_rate": 3.0631824531934707e-06, "loss": 0.0928, "step": 8237 }, { "epoch": 1.3347375243033053, "grad_norm": 0.81801837682724, "learning_rate": 3.0627563810816097e-06, "loss": 0.0954, "step": 8238 }, { "epoch": 1.334899546338302, "grad_norm": 0.9190750122070312, "learning_rate": 3.0623302917507657e-06, "loss": 0.1095, "step": 8239 }, { "epoch": 1.3350615683732987, "grad_norm": 0.8107596039772034, "learning_rate": 3.0619041852139746e-06, "loss": 0.1023, "step": 8240 }, { "epoch": 1.3352235904082956, "grad_norm": 0.8642371296882629, "learning_rate": 3.0614780614842764e-06, "loss": 0.1095, "step": 8241 }, { "epoch": 1.3353856124432923, "grad_norm": 0.92511385679245, "learning_rate": 3.061051920574708e-06, "loss": 0.1094, "step": 8242 }, { "epoch": 1.335547634478289, "grad_norm": 0.87492835521698, "learning_rate": 3.0606257624983082e-06, "loss": 0.1073, "step": 8243 }, { "epoch": 1.3357096565132858, "grad_norm": 0.7280792593955994, "learning_rate": 3.0601995872681167e-06, "loss": 0.0947, "step": 8244 }, { "epoch": 1.3358716785482825, "grad_norm": 0.7862234711647034, "learning_rate": 3.0597733948971737e-06, "loss": 0.092, "step": 8245 }, { "epoch": 1.3360337005832794, "grad_norm": 0.9407415390014648, "learning_rate": 3.0593471853985197e-06, "loss": 0.1128, "step": 8246 }, { "epoch": 1.3361957226182761, "grad_norm": 0.7237008213996887, "learning_rate": 3.0589209587851954e-06, "loss": 0.0908, "step": 8247 }, { "epoch": 1.3363577446532728, "grad_norm": 0.8024490475654602, "learning_rate": 3.058494715070242e-06, "loss": 0.096, "step": 8248 }, { "epoch": 1.3365197666882696, "grad_norm": 0.7283364534378052, "learning_rate": 3.0580684542667016e-06, "loss": 0.0916, "step": 8249 }, { "epoch": 1.3366817887232663, "grad_norm": 0.8358945250511169, "learning_rate": 3.0576421763876174e-06, "loss": 0.0954, "step": 8250 }, { "epoch": 1.3368438107582632, "grad_norm": 0.9684699773788452, "learning_rate": 3.0572158814460323e-06, "loss": 0.126, "step": 8251 }, { "epoch": 1.33700583279326, "grad_norm": 0.7843111157417297, "learning_rate": 3.056789569454989e-06, "loss": 0.099, "step": 8252 }, { "epoch": 1.3371678548282566, "grad_norm": 0.7540812492370605, "learning_rate": 3.056363240427533e-06, "loss": 0.0931, "step": 8253 }, { "epoch": 1.3373298768632533, "grad_norm": 0.9374097585678101, "learning_rate": 3.055936894376708e-06, "loss": 0.1093, "step": 8254 }, { "epoch": 1.33749189889825, "grad_norm": 0.8523105978965759, "learning_rate": 3.0555105313155587e-06, "loss": 0.102, "step": 8255 }, { "epoch": 1.337653920933247, "grad_norm": 0.914612352848053, "learning_rate": 3.055084151257133e-06, "loss": 0.1071, "step": 8256 }, { "epoch": 1.3378159429682437, "grad_norm": 0.8243370056152344, "learning_rate": 3.0546577542144734e-06, "loss": 0.1144, "step": 8257 }, { "epoch": 1.3379779650032404, "grad_norm": 0.7740969061851501, "learning_rate": 3.054231340200631e-06, "loss": 0.0894, "step": 8258 }, { "epoch": 1.3381399870382373, "grad_norm": 0.6962242722511292, "learning_rate": 3.053804909228649e-06, "loss": 0.0889, "step": 8259 }, { "epoch": 1.3383020090732338, "grad_norm": 0.7744547128677368, "learning_rate": 3.053378461311578e-06, "loss": 0.0924, "step": 8260 }, { "epoch": 1.3384640311082308, "grad_norm": 0.8196523189544678, "learning_rate": 3.052951996462465e-06, "loss": 0.0982, "step": 8261 }, { "epoch": 1.3386260531432275, "grad_norm": 0.7271378040313721, "learning_rate": 3.0525255146943582e-06, "loss": 0.0969, "step": 8262 }, { "epoch": 1.3387880751782242, "grad_norm": 0.7864364981651306, "learning_rate": 3.052099016020309e-06, "loss": 0.0988, "step": 8263 }, { "epoch": 1.3389500972132211, "grad_norm": 0.7747987508773804, "learning_rate": 3.0516725004533648e-06, "loss": 0.0925, "step": 8264 }, { "epoch": 1.3391121192482178, "grad_norm": 0.8016791343688965, "learning_rate": 3.0512459680065785e-06, "loss": 0.1004, "step": 8265 }, { "epoch": 1.3392741412832145, "grad_norm": 0.8657497763633728, "learning_rate": 3.0508194186929983e-06, "loss": 0.1072, "step": 8266 }, { "epoch": 1.3394361633182112, "grad_norm": 0.8073965311050415, "learning_rate": 3.0503928525256775e-06, "loss": 0.098, "step": 8267 }, { "epoch": 1.339598185353208, "grad_norm": 0.7711490988731384, "learning_rate": 3.0499662695176675e-06, "loss": 0.1004, "step": 8268 }, { "epoch": 1.339760207388205, "grad_norm": 0.7585821151733398, "learning_rate": 3.04953966968202e-06, "loss": 0.0958, "step": 8269 }, { "epoch": 1.3399222294232016, "grad_norm": 0.7577686309814453, "learning_rate": 3.0491130530317887e-06, "loss": 0.0968, "step": 8270 }, { "epoch": 1.3400842514581983, "grad_norm": 0.7601026296615601, "learning_rate": 3.048686419580027e-06, "loss": 0.0991, "step": 8271 }, { "epoch": 1.340246273493195, "grad_norm": 0.9418452382087708, "learning_rate": 3.0482597693397887e-06, "loss": 0.1168, "step": 8272 }, { "epoch": 1.3404082955281917, "grad_norm": 0.8291217684745789, "learning_rate": 3.047833102324128e-06, "loss": 0.1024, "step": 8273 }, { "epoch": 1.3405703175631887, "grad_norm": 0.8256798982620239, "learning_rate": 3.0474064185461e-06, "loss": 0.1015, "step": 8274 }, { "epoch": 1.3407323395981854, "grad_norm": 0.823760986328125, "learning_rate": 3.0469797180187606e-06, "loss": 0.1101, "step": 8275 }, { "epoch": 1.340894361633182, "grad_norm": 0.7810642123222351, "learning_rate": 3.0465530007551646e-06, "loss": 0.1018, "step": 8276 }, { "epoch": 1.3410563836681788, "grad_norm": 0.8536295890808105, "learning_rate": 3.04612626676837e-06, "loss": 0.1047, "step": 8277 }, { "epoch": 1.3412184057031755, "grad_norm": 0.837985098361969, "learning_rate": 3.0456995160714344e-06, "loss": 0.1103, "step": 8278 }, { "epoch": 1.3413804277381725, "grad_norm": 0.765397846698761, "learning_rate": 3.0452727486774118e-06, "loss": 0.0972, "step": 8279 }, { "epoch": 1.3415424497731692, "grad_norm": 0.8630374073982239, "learning_rate": 3.044845964599365e-06, "loss": 0.0982, "step": 8280 }, { "epoch": 1.3417044718081659, "grad_norm": 0.7347887754440308, "learning_rate": 3.044419163850349e-06, "loss": 0.0935, "step": 8281 }, { "epoch": 1.3418664938431626, "grad_norm": 0.773932158946991, "learning_rate": 3.043992346443424e-06, "loss": 0.0996, "step": 8282 }, { "epoch": 1.3420285158781593, "grad_norm": 0.77265864610672, "learning_rate": 3.04356551239165e-06, "loss": 0.0913, "step": 8283 }, { "epoch": 1.3421905379131562, "grad_norm": 0.7659427523612976, "learning_rate": 3.043138661708086e-06, "loss": 0.1001, "step": 8284 }, { "epoch": 1.342352559948153, "grad_norm": 0.8944420218467712, "learning_rate": 3.0427117944057943e-06, "loss": 0.1002, "step": 8285 }, { "epoch": 1.3425145819831497, "grad_norm": 0.8467785716056824, "learning_rate": 3.042284910497834e-06, "loss": 0.0995, "step": 8286 }, { "epoch": 1.3426766040181466, "grad_norm": 0.9520701766014099, "learning_rate": 3.0418580099972687e-06, "loss": 0.1151, "step": 8287 }, { "epoch": 1.3428386260531433, "grad_norm": 0.7847069501876831, "learning_rate": 3.0414310929171587e-06, "loss": 0.1078, "step": 8288 }, { "epoch": 1.34300064808814, "grad_norm": 0.9613496661186218, "learning_rate": 3.0410041592705687e-06, "loss": 0.1154, "step": 8289 }, { "epoch": 1.3431626701231367, "grad_norm": 0.8269860148429871, "learning_rate": 3.04057720907056e-06, "loss": 0.096, "step": 8290 }, { "epoch": 1.3433246921581334, "grad_norm": 0.8524847626686096, "learning_rate": 3.0401502423301966e-06, "loss": 0.111, "step": 8291 }, { "epoch": 1.3434867141931304, "grad_norm": 0.7680644392967224, "learning_rate": 3.039723259062543e-06, "loss": 0.0969, "step": 8292 }, { "epoch": 1.343648736228127, "grad_norm": 0.8680287003517151, "learning_rate": 3.0392962592806635e-06, "loss": 0.1138, "step": 8293 }, { "epoch": 1.3438107582631238, "grad_norm": 0.8054776191711426, "learning_rate": 3.0388692429976247e-06, "loss": 0.0998, "step": 8294 }, { "epoch": 1.3439727802981205, "grad_norm": 0.8584299087524414, "learning_rate": 3.03844221022649e-06, "loss": 0.1017, "step": 8295 }, { "epoch": 1.3441348023331172, "grad_norm": 0.9022834300994873, "learning_rate": 3.038015160980327e-06, "loss": 0.1122, "step": 8296 }, { "epoch": 1.3442968243681142, "grad_norm": 0.9513327479362488, "learning_rate": 3.037588095272202e-06, "loss": 0.1153, "step": 8297 }, { "epoch": 1.3444588464031109, "grad_norm": 0.7637044787406921, "learning_rate": 3.0371610131151823e-06, "loss": 0.1051, "step": 8298 }, { "epoch": 1.3446208684381076, "grad_norm": 0.8572631478309631, "learning_rate": 3.0367339145223352e-06, "loss": 0.1051, "step": 8299 }, { "epoch": 1.3447828904731043, "grad_norm": 0.7625088095664978, "learning_rate": 3.0363067995067297e-06, "loss": 0.1026, "step": 8300 }, { "epoch": 1.344944912508101, "grad_norm": 0.9297232627868652, "learning_rate": 3.0358796680814333e-06, "loss": 0.1002, "step": 8301 }, { "epoch": 1.345106934543098, "grad_norm": 0.8700084090232849, "learning_rate": 3.035452520259517e-06, "loss": 0.1198, "step": 8302 }, { "epoch": 1.3452689565780946, "grad_norm": 0.8522663116455078, "learning_rate": 3.035025356054049e-06, "loss": 0.106, "step": 8303 }, { "epoch": 1.3454309786130914, "grad_norm": 0.7052852511405945, "learning_rate": 3.034598175478099e-06, "loss": 0.0937, "step": 8304 }, { "epoch": 1.345593000648088, "grad_norm": 0.8086321353912354, "learning_rate": 3.034170978544739e-06, "loss": 0.1022, "step": 8305 }, { "epoch": 1.3457550226830848, "grad_norm": 0.8114489912986755, "learning_rate": 3.03374376526704e-06, "loss": 0.1035, "step": 8306 }, { "epoch": 1.3459170447180817, "grad_norm": 0.8548650741577148, "learning_rate": 3.033316535658073e-06, "loss": 0.1087, "step": 8307 }, { "epoch": 1.3460790667530784, "grad_norm": 1.0227341651916504, "learning_rate": 3.0328892897309105e-06, "loss": 0.1139, "step": 8308 }, { "epoch": 1.3462410887880751, "grad_norm": 0.9773898720741272, "learning_rate": 3.032462027498626e-06, "loss": 0.1205, "step": 8309 }, { "epoch": 1.346403110823072, "grad_norm": 0.853813111782074, "learning_rate": 3.0320347489742905e-06, "loss": 0.1066, "step": 8310 }, { "epoch": 1.3465651328580686, "grad_norm": 0.7821661829948425, "learning_rate": 3.0316074541709813e-06, "loss": 0.105, "step": 8311 }, { "epoch": 1.3467271548930655, "grad_norm": 0.7272818684577942, "learning_rate": 3.031180143101769e-06, "loss": 0.0946, "step": 8312 }, { "epoch": 1.3468891769280622, "grad_norm": 0.8688091039657593, "learning_rate": 3.0307528157797306e-06, "loss": 0.1046, "step": 8313 }, { "epoch": 1.347051198963059, "grad_norm": 0.8635636568069458, "learning_rate": 3.03032547221794e-06, "loss": 0.1076, "step": 8314 }, { "epoch": 1.3472132209980558, "grad_norm": 0.8457701802253723, "learning_rate": 3.029898112429473e-06, "loss": 0.1164, "step": 8315 }, { "epoch": 1.3473752430330526, "grad_norm": 0.8496267199516296, "learning_rate": 3.0294707364274066e-06, "loss": 0.1043, "step": 8316 }, { "epoch": 1.3475372650680493, "grad_norm": 0.7697789669036865, "learning_rate": 3.0290433442248163e-06, "loss": 0.0969, "step": 8317 }, { "epoch": 1.347699287103046, "grad_norm": 0.8936092257499695, "learning_rate": 3.028615935834781e-06, "loss": 0.1069, "step": 8318 }, { "epoch": 1.3478613091380427, "grad_norm": 0.7584058046340942, "learning_rate": 3.028188511270376e-06, "loss": 0.0984, "step": 8319 }, { "epoch": 1.3480233311730396, "grad_norm": 0.7890931963920593, "learning_rate": 3.027761070544682e-06, "loss": 0.1024, "step": 8320 }, { "epoch": 1.3481853532080363, "grad_norm": 0.7327829599380493, "learning_rate": 3.027333613670775e-06, "loss": 0.09, "step": 8321 }, { "epoch": 1.348347375243033, "grad_norm": 0.7482281923294067, "learning_rate": 3.026906140661737e-06, "loss": 0.102, "step": 8322 }, { "epoch": 1.3485093972780298, "grad_norm": 0.7569482922554016, "learning_rate": 3.0264786515306453e-06, "loss": 0.096, "step": 8323 }, { "epoch": 1.3486714193130265, "grad_norm": 0.7618415355682373, "learning_rate": 3.026051146290581e-06, "loss": 0.0961, "step": 8324 }, { "epoch": 1.3488334413480234, "grad_norm": 0.8373454213142395, "learning_rate": 3.0256236249546256e-06, "loss": 0.103, "step": 8325 }, { "epoch": 1.3489954633830201, "grad_norm": 0.9048294425010681, "learning_rate": 3.025196087535858e-06, "loss": 0.111, "step": 8326 }, { "epoch": 1.3491574854180168, "grad_norm": 0.8331298232078552, "learning_rate": 3.024768534047362e-06, "loss": 0.1084, "step": 8327 }, { "epoch": 1.3493195074530135, "grad_norm": 0.8242894411087036, "learning_rate": 3.024340964502218e-06, "loss": 0.1079, "step": 8328 }, { "epoch": 1.3494815294880103, "grad_norm": 0.7231031060218811, "learning_rate": 3.0239133789135094e-06, "loss": 0.0966, "step": 8329 }, { "epoch": 1.3496435515230072, "grad_norm": 0.7646691799163818, "learning_rate": 3.0234857772943197e-06, "loss": 0.0967, "step": 8330 }, { "epoch": 1.349805573558004, "grad_norm": 0.7422172427177429, "learning_rate": 3.023058159657732e-06, "loss": 0.0914, "step": 8331 }, { "epoch": 1.3499675955930006, "grad_norm": 0.7762441635131836, "learning_rate": 3.0226305260168298e-06, "loss": 0.0945, "step": 8332 }, { "epoch": 1.3501296176279973, "grad_norm": 0.9666473865509033, "learning_rate": 3.0222028763846994e-06, "loss": 0.1287, "step": 8333 }, { "epoch": 1.350291639662994, "grad_norm": 0.8605021834373474, "learning_rate": 3.0217752107744237e-06, "loss": 0.1062, "step": 8334 }, { "epoch": 1.350453661697991, "grad_norm": 0.877007246017456, "learning_rate": 3.0213475291990897e-06, "loss": 0.1046, "step": 8335 }, { "epoch": 1.3506156837329877, "grad_norm": 0.7591307163238525, "learning_rate": 3.0209198316717825e-06, "loss": 0.0909, "step": 8336 }, { "epoch": 1.3507777057679844, "grad_norm": 0.9145157337188721, "learning_rate": 3.020492118205589e-06, "loss": 0.1147, "step": 8337 }, { "epoch": 1.3509397278029813, "grad_norm": 0.8037732243537903, "learning_rate": 3.0200643888135973e-06, "loss": 0.1012, "step": 8338 }, { "epoch": 1.351101749837978, "grad_norm": 0.9152517318725586, "learning_rate": 3.0196366435088926e-06, "loss": 0.115, "step": 8339 }, { "epoch": 1.3512637718729748, "grad_norm": 0.868812620639801, "learning_rate": 3.019208882304565e-06, "loss": 0.1139, "step": 8340 }, { "epoch": 1.3514257939079715, "grad_norm": 0.7427473068237305, "learning_rate": 3.018781105213701e-06, "loss": 0.0896, "step": 8341 }, { "epoch": 1.3515878159429682, "grad_norm": 0.7403958439826965, "learning_rate": 3.0183533122493917e-06, "loss": 0.0943, "step": 8342 }, { "epoch": 1.351749837977965, "grad_norm": 0.8998346328735352, "learning_rate": 3.017925503424725e-06, "loss": 0.1174, "step": 8343 }, { "epoch": 1.3519118600129618, "grad_norm": 0.8514041304588318, "learning_rate": 3.017497678752791e-06, "loss": 0.1092, "step": 8344 }, { "epoch": 1.3520738820479585, "grad_norm": 0.8314045667648315, "learning_rate": 3.0170698382466805e-06, "loss": 0.1074, "step": 8345 }, { "epoch": 1.3522359040829552, "grad_norm": 0.9838075041770935, "learning_rate": 3.016641981919485e-06, "loss": 0.111, "step": 8346 }, { "epoch": 1.352397926117952, "grad_norm": 0.9298703670501709, "learning_rate": 3.0162141097842943e-06, "loss": 0.1102, "step": 8347 }, { "epoch": 1.3525599481529489, "grad_norm": 0.7722398638725281, "learning_rate": 3.0157862218542004e-06, "loss": 0.0921, "step": 8348 }, { "epoch": 1.3527219701879456, "grad_norm": 0.9279158711433411, "learning_rate": 3.015358318142298e-06, "loss": 0.1113, "step": 8349 }, { "epoch": 1.3528839922229423, "grad_norm": 0.8341646194458008, "learning_rate": 3.0149303986616772e-06, "loss": 0.1125, "step": 8350 }, { "epoch": 1.353046014257939, "grad_norm": 0.8737770915031433, "learning_rate": 3.0145024634254323e-06, "loss": 0.1119, "step": 8351 }, { "epoch": 1.3532080362929357, "grad_norm": 0.8188356757164001, "learning_rate": 3.014074512446657e-06, "loss": 0.0972, "step": 8352 }, { "epoch": 1.3533700583279327, "grad_norm": 0.8271098136901855, "learning_rate": 3.0136465457384454e-06, "loss": 0.1017, "step": 8353 }, { "epoch": 1.3535320803629294, "grad_norm": 0.852016031742096, "learning_rate": 3.0132185633138934e-06, "loss": 0.115, "step": 8354 }, { "epoch": 1.353694102397926, "grad_norm": 0.8249271512031555, "learning_rate": 3.0127905651860946e-06, "loss": 0.0977, "step": 8355 }, { "epoch": 1.3538561244329228, "grad_norm": 0.7700305581092834, "learning_rate": 3.0123625513681463e-06, "loss": 0.1055, "step": 8356 }, { "epoch": 1.3540181464679195, "grad_norm": 0.9442518353462219, "learning_rate": 3.0119345218731433e-06, "loss": 0.1065, "step": 8357 }, { "epoch": 1.3541801685029164, "grad_norm": 0.8089423179626465, "learning_rate": 3.0115064767141827e-06, "loss": 0.0996, "step": 8358 }, { "epoch": 1.3543421905379132, "grad_norm": 0.9584595561027527, "learning_rate": 3.0110784159043614e-06, "loss": 0.1212, "step": 8359 }, { "epoch": 1.3545042125729099, "grad_norm": 0.7914947867393494, "learning_rate": 3.0106503394567775e-06, "loss": 0.1007, "step": 8360 }, { "epoch": 1.3546662346079068, "grad_norm": 0.7712557911872864, "learning_rate": 3.0102222473845296e-06, "loss": 0.1009, "step": 8361 }, { "epoch": 1.3548282566429035, "grad_norm": 0.9304143190383911, "learning_rate": 3.0097941397007156e-06, "loss": 0.1097, "step": 8362 }, { "epoch": 1.3549902786779002, "grad_norm": 0.7734033465385437, "learning_rate": 3.0093660164184333e-06, "loss": 0.0984, "step": 8363 }, { "epoch": 1.355152300712897, "grad_norm": 0.7931289672851562, "learning_rate": 3.008937877550785e-06, "loss": 0.1012, "step": 8364 }, { "epoch": 1.3553143227478937, "grad_norm": 0.8496924638748169, "learning_rate": 3.008509723110869e-06, "loss": 0.1057, "step": 8365 }, { "epoch": 1.3554763447828906, "grad_norm": 0.8549742102622986, "learning_rate": 3.008081553111786e-06, "loss": 0.1145, "step": 8366 }, { "epoch": 1.3556383668178873, "grad_norm": 0.8030797243118286, "learning_rate": 3.007653367566636e-06, "loss": 0.1055, "step": 8367 }, { "epoch": 1.355800388852884, "grad_norm": 0.9936630725860596, "learning_rate": 3.0072251664885222e-06, "loss": 0.1309, "step": 8368 }, { "epoch": 1.3559624108878807, "grad_norm": 0.8453781604766846, "learning_rate": 3.0067969498905463e-06, "loss": 0.0999, "step": 8369 }, { "epoch": 1.3561244329228774, "grad_norm": 0.8510198593139648, "learning_rate": 3.006368717785809e-06, "loss": 0.103, "step": 8370 }, { "epoch": 1.3562864549578744, "grad_norm": 0.770231306552887, "learning_rate": 3.0059404701874157e-06, "loss": 0.0957, "step": 8371 }, { "epoch": 1.356448476992871, "grad_norm": 0.7896842956542969, "learning_rate": 3.005512207108467e-06, "loss": 0.0984, "step": 8372 }, { "epoch": 1.3566104990278678, "grad_norm": 0.7394090294837952, "learning_rate": 3.005083928562069e-06, "loss": 0.094, "step": 8373 }, { "epoch": 1.3567725210628645, "grad_norm": 0.8859434127807617, "learning_rate": 3.004655634561325e-06, "loss": 0.0986, "step": 8374 }, { "epoch": 1.3569345430978612, "grad_norm": 1.0016928911209106, "learning_rate": 3.004227325119339e-06, "loss": 0.1174, "step": 8375 }, { "epoch": 1.3570965651328581, "grad_norm": 0.7929229736328125, "learning_rate": 3.003799000249218e-06, "loss": 0.1043, "step": 8376 }, { "epoch": 1.3572585871678549, "grad_norm": 0.8156124949455261, "learning_rate": 3.0033706599640665e-06, "loss": 0.106, "step": 8377 }, { "epoch": 1.3574206092028516, "grad_norm": 0.8548935055732727, "learning_rate": 3.002942304276991e-06, "loss": 0.111, "step": 8378 }, { "epoch": 1.3575826312378483, "grad_norm": 0.8992919325828552, "learning_rate": 3.0025139332010976e-06, "loss": 0.0987, "step": 8379 }, { "epoch": 1.357744653272845, "grad_norm": 0.8693149089813232, "learning_rate": 3.002085546749495e-06, "loss": 0.1025, "step": 8380 }, { "epoch": 1.357906675307842, "grad_norm": 0.8654988408088684, "learning_rate": 3.0016571449352882e-06, "loss": 0.0976, "step": 8381 }, { "epoch": 1.3580686973428386, "grad_norm": 0.8404430747032166, "learning_rate": 3.001228727771588e-06, "loss": 0.1063, "step": 8382 }, { "epoch": 1.3582307193778353, "grad_norm": 0.9719839692115784, "learning_rate": 3.0008002952715008e-06, "loss": 0.121, "step": 8383 }, { "epoch": 1.3583927414128323, "grad_norm": 0.9208077788352966, "learning_rate": 3.000371847448137e-06, "loss": 0.11, "step": 8384 }, { "epoch": 1.3585547634478288, "grad_norm": 0.7984682321548462, "learning_rate": 2.9999433843146055e-06, "loss": 0.0949, "step": 8385 }, { "epoch": 1.3587167854828257, "grad_norm": 0.8321592807769775, "learning_rate": 2.9995149058840157e-06, "loss": 0.1012, "step": 8386 }, { "epoch": 1.3588788075178224, "grad_norm": 0.8269400596618652, "learning_rate": 2.9990864121694795e-06, "loss": 0.1062, "step": 8387 }, { "epoch": 1.3590408295528191, "grad_norm": 0.862639844417572, "learning_rate": 2.998657903184107e-06, "loss": 0.0967, "step": 8388 }, { "epoch": 1.359202851587816, "grad_norm": 0.8576428890228271, "learning_rate": 2.9982293789410083e-06, "loss": 0.0987, "step": 8389 }, { "epoch": 1.3593648736228128, "grad_norm": 0.9003967642784119, "learning_rate": 2.9978008394532966e-06, "loss": 0.1054, "step": 8390 }, { "epoch": 1.3595268956578095, "grad_norm": 0.8770749568939209, "learning_rate": 2.997372284734084e-06, "loss": 0.1018, "step": 8391 }, { "epoch": 1.3596889176928062, "grad_norm": 0.8922051191329956, "learning_rate": 2.996943714796483e-06, "loss": 0.1078, "step": 8392 }, { "epoch": 1.359850939727803, "grad_norm": 0.8432496190071106, "learning_rate": 2.9965151296536076e-06, "loss": 0.1048, "step": 8393 }, { "epoch": 1.3600129617627998, "grad_norm": 0.8538945913314819, "learning_rate": 2.9960865293185697e-06, "loss": 0.1141, "step": 8394 }, { "epoch": 1.3601749837977966, "grad_norm": 0.9138311147689819, "learning_rate": 2.9956579138044857e-06, "loss": 0.1115, "step": 8395 }, { "epoch": 1.3603370058327933, "grad_norm": 0.8055441379547119, "learning_rate": 2.995229283124468e-06, "loss": 0.0974, "step": 8396 }, { "epoch": 1.36049902786779, "grad_norm": 0.7506464719772339, "learning_rate": 2.9948006372916332e-06, "loss": 0.0969, "step": 8397 }, { "epoch": 1.3606610499027867, "grad_norm": 0.7606310248374939, "learning_rate": 2.994371976319096e-06, "loss": 0.1064, "step": 8398 }, { "epoch": 1.3608230719377836, "grad_norm": 0.8419187068939209, "learning_rate": 2.993943300219973e-06, "loss": 0.1104, "step": 8399 }, { "epoch": 1.3609850939727803, "grad_norm": 0.7976471781730652, "learning_rate": 2.993514609007381e-06, "loss": 0.0947, "step": 8400 }, { "epoch": 1.361147116007777, "grad_norm": 0.935541033744812, "learning_rate": 2.993085902694434e-06, "loss": 0.1214, "step": 8401 }, { "epoch": 1.3613091380427738, "grad_norm": 1.0250697135925293, "learning_rate": 2.992657181294254e-06, "loss": 0.1132, "step": 8402 }, { "epoch": 1.3614711600777705, "grad_norm": 0.7330549955368042, "learning_rate": 2.9922284448199548e-06, "loss": 0.0954, "step": 8403 }, { "epoch": 1.3616331821127674, "grad_norm": 0.7346470355987549, "learning_rate": 2.9917996932846572e-06, "loss": 0.0933, "step": 8404 }, { "epoch": 1.3617952041477641, "grad_norm": 0.8301489353179932, "learning_rate": 2.991370926701479e-06, "loss": 0.0975, "step": 8405 }, { "epoch": 1.3619572261827608, "grad_norm": 0.8129703402519226, "learning_rate": 2.99094214508354e-06, "loss": 0.0998, "step": 8406 }, { "epoch": 1.3621192482177575, "grad_norm": 0.9655615091323853, "learning_rate": 2.9905133484439585e-06, "loss": 0.1182, "step": 8407 }, { "epoch": 1.3622812702527543, "grad_norm": 0.8164478540420532, "learning_rate": 2.990084536795856e-06, "loss": 0.1012, "step": 8408 }, { "epoch": 1.3624432922877512, "grad_norm": 0.7988752722740173, "learning_rate": 2.989655710152353e-06, "loss": 0.095, "step": 8409 }, { "epoch": 1.362605314322748, "grad_norm": 0.7468502521514893, "learning_rate": 2.989226868526569e-06, "loss": 0.09, "step": 8410 }, { "epoch": 1.3627673363577446, "grad_norm": 0.816019594669342, "learning_rate": 2.9887980119316284e-06, "loss": 0.0967, "step": 8411 }, { "epoch": 1.3629293583927415, "grad_norm": 0.7134393453598022, "learning_rate": 2.98836914038065e-06, "loss": 0.093, "step": 8412 }, { "epoch": 1.3630913804277383, "grad_norm": 0.8280972242355347, "learning_rate": 2.9879402538867584e-06, "loss": 0.0959, "step": 8413 }, { "epoch": 1.363253402462735, "grad_norm": 0.9556366801261902, "learning_rate": 2.987511352463076e-06, "loss": 0.1118, "step": 8414 }, { "epoch": 1.3634154244977317, "grad_norm": 0.9054288268089294, "learning_rate": 2.9870824361227257e-06, "loss": 0.1048, "step": 8415 }, { "epoch": 1.3635774465327284, "grad_norm": 0.9046202898025513, "learning_rate": 2.9866535048788314e-06, "loss": 0.1089, "step": 8416 }, { "epoch": 1.3637394685677253, "grad_norm": 0.8369488716125488, "learning_rate": 2.986224558744519e-06, "loss": 0.1042, "step": 8417 }, { "epoch": 1.363901490602722, "grad_norm": 0.710746705532074, "learning_rate": 2.9857955977329095e-06, "loss": 0.0917, "step": 8418 }, { "epoch": 1.3640635126377187, "grad_norm": 0.8769986629486084, "learning_rate": 2.985366621857132e-06, "loss": 0.0997, "step": 8419 }, { "epoch": 1.3642255346727155, "grad_norm": 0.8238828182220459, "learning_rate": 2.9849376311303095e-06, "loss": 0.1029, "step": 8420 }, { "epoch": 1.3643875567077122, "grad_norm": 0.7568957209587097, "learning_rate": 2.9845086255655692e-06, "loss": 0.0914, "step": 8421 }, { "epoch": 1.364549578742709, "grad_norm": 0.7604775428771973, "learning_rate": 2.984079605176038e-06, "loss": 0.1007, "step": 8422 }, { "epoch": 1.3647116007777058, "grad_norm": 0.9038381576538086, "learning_rate": 2.9836505699748414e-06, "loss": 0.1094, "step": 8423 }, { "epoch": 1.3648736228127025, "grad_norm": 0.9362145066261292, "learning_rate": 2.9832215199751085e-06, "loss": 0.1154, "step": 8424 }, { "epoch": 1.3650356448476992, "grad_norm": 0.8526574969291687, "learning_rate": 2.9827924551899657e-06, "loss": 0.1054, "step": 8425 }, { "epoch": 1.365197666882696, "grad_norm": 0.8114335536956787, "learning_rate": 2.9823633756325433e-06, "loss": 0.1028, "step": 8426 }, { "epoch": 1.3653596889176929, "grad_norm": 0.814687967300415, "learning_rate": 2.9819342813159674e-06, "loss": 0.0995, "step": 8427 }, { "epoch": 1.3655217109526896, "grad_norm": 0.7649007439613342, "learning_rate": 2.9815051722533707e-06, "loss": 0.0909, "step": 8428 }, { "epoch": 1.3656837329876863, "grad_norm": 0.9378827214241028, "learning_rate": 2.9810760484578794e-06, "loss": 0.1072, "step": 8429 }, { "epoch": 1.365845755022683, "grad_norm": 0.841758668422699, "learning_rate": 2.9806469099426254e-06, "loss": 0.1043, "step": 8430 }, { "epoch": 1.3660077770576797, "grad_norm": 0.7740828990936279, "learning_rate": 2.98021775672074e-06, "loss": 0.0929, "step": 8431 }, { "epoch": 1.3661697990926767, "grad_norm": 0.7625635266304016, "learning_rate": 2.9797885888053517e-06, "loss": 0.0944, "step": 8432 }, { "epoch": 1.3663318211276734, "grad_norm": 0.8351330757141113, "learning_rate": 2.9793594062095955e-06, "loss": 0.1061, "step": 8433 }, { "epoch": 1.36649384316267, "grad_norm": 0.791860044002533, "learning_rate": 2.9789302089466e-06, "loss": 0.0981, "step": 8434 }, { "epoch": 1.366655865197667, "grad_norm": 0.8011177778244019, "learning_rate": 2.9785009970294997e-06, "loss": 0.1084, "step": 8435 }, { "epoch": 1.3668178872326635, "grad_norm": 0.9865897297859192, "learning_rate": 2.978071770471427e-06, "loss": 0.1206, "step": 8436 }, { "epoch": 1.3669799092676604, "grad_norm": 0.9098086357116699, "learning_rate": 2.977642529285515e-06, "loss": 0.1129, "step": 8437 }, { "epoch": 1.3671419313026572, "grad_norm": 0.9035070538520813, "learning_rate": 2.9772132734848974e-06, "loss": 0.111, "step": 8438 }, { "epoch": 1.3673039533376539, "grad_norm": 0.8441570401191711, "learning_rate": 2.9767840030827082e-06, "loss": 0.1142, "step": 8439 }, { "epoch": 1.3674659753726508, "grad_norm": 0.857961118221283, "learning_rate": 2.9763547180920825e-06, "loss": 0.108, "step": 8440 }, { "epoch": 1.3676279974076475, "grad_norm": 0.7608411312103271, "learning_rate": 2.9759254185261555e-06, "loss": 0.0939, "step": 8441 }, { "epoch": 1.3677900194426442, "grad_norm": 0.8588743209838867, "learning_rate": 2.9754961043980623e-06, "loss": 0.1139, "step": 8442 }, { "epoch": 1.367952041477641, "grad_norm": 0.771369993686676, "learning_rate": 2.9750667757209385e-06, "loss": 0.1037, "step": 8443 }, { "epoch": 1.3681140635126376, "grad_norm": 0.8773581385612488, "learning_rate": 2.9746374325079213e-06, "loss": 0.1133, "step": 8444 }, { "epoch": 1.3682760855476346, "grad_norm": 0.798323392868042, "learning_rate": 2.9742080747721473e-06, "loss": 0.1001, "step": 8445 }, { "epoch": 1.3684381075826313, "grad_norm": 0.826897144317627, "learning_rate": 2.973778702526754e-06, "loss": 0.1104, "step": 8446 }, { "epoch": 1.368600129617628, "grad_norm": 0.8716105222702026, "learning_rate": 2.973349315784878e-06, "loss": 0.1109, "step": 8447 }, { "epoch": 1.3687621516526247, "grad_norm": 0.9297152757644653, "learning_rate": 2.97291991455966e-06, "loss": 0.1176, "step": 8448 }, { "epoch": 1.3689241736876214, "grad_norm": 0.8426374197006226, "learning_rate": 2.9724904988642357e-06, "loss": 0.0943, "step": 8449 }, { "epoch": 1.3690861957226184, "grad_norm": 0.7848527431488037, "learning_rate": 2.9720610687117462e-06, "loss": 0.1075, "step": 8450 }, { "epoch": 1.369248217757615, "grad_norm": 0.822970986366272, "learning_rate": 2.9716316241153303e-06, "loss": 0.1023, "step": 8451 }, { "epoch": 1.3694102397926118, "grad_norm": 0.8297960162162781, "learning_rate": 2.971202165088128e-06, "loss": 0.1, "step": 8452 }, { "epoch": 1.3695722618276085, "grad_norm": 0.799751341342926, "learning_rate": 2.9707726916432793e-06, "loss": 0.1051, "step": 8453 }, { "epoch": 1.3697342838626052, "grad_norm": 0.8356010317802429, "learning_rate": 2.9703432037939255e-06, "loss": 0.1089, "step": 8454 }, { "epoch": 1.3698963058976021, "grad_norm": 0.8152604699134827, "learning_rate": 2.969913701553209e-06, "loss": 0.1082, "step": 8455 }, { "epoch": 1.3700583279325989, "grad_norm": 0.8205832839012146, "learning_rate": 2.9694841849342688e-06, "loss": 0.1016, "step": 8456 }, { "epoch": 1.3702203499675956, "grad_norm": 0.9030212163925171, "learning_rate": 2.9690546539502496e-06, "loss": 0.1063, "step": 8457 }, { "epoch": 1.3703823720025923, "grad_norm": 0.8552389144897461, "learning_rate": 2.9686251086142927e-06, "loss": 0.1158, "step": 8458 }, { "epoch": 1.370544394037589, "grad_norm": 0.8218772411346436, "learning_rate": 2.968195548939542e-06, "loss": 0.1007, "step": 8459 }, { "epoch": 1.370706416072586, "grad_norm": 0.8865529894828796, "learning_rate": 2.9677659749391404e-06, "loss": 0.1133, "step": 8460 }, { "epoch": 1.3708684381075826, "grad_norm": 0.8135964274406433, "learning_rate": 2.967336386626232e-06, "loss": 0.0994, "step": 8461 }, { "epoch": 1.3710304601425793, "grad_norm": 0.7750198841094971, "learning_rate": 2.9669067840139603e-06, "loss": 0.1008, "step": 8462 }, { "epoch": 1.3711924821775763, "grad_norm": 0.7373397946357727, "learning_rate": 2.966477167115472e-06, "loss": 0.0951, "step": 8463 }, { "epoch": 1.371354504212573, "grad_norm": 0.8936923146247864, "learning_rate": 2.9660475359439113e-06, "loss": 0.1062, "step": 8464 }, { "epoch": 1.3715165262475697, "grad_norm": 0.8021456003189087, "learning_rate": 2.9656178905124222e-06, "loss": 0.102, "step": 8465 }, { "epoch": 1.3716785482825664, "grad_norm": 0.921617865562439, "learning_rate": 2.965188230834154e-06, "loss": 0.1193, "step": 8466 }, { "epoch": 1.3718405703175631, "grad_norm": 0.8052314519882202, "learning_rate": 2.9647585569222516e-06, "loss": 0.0948, "step": 8467 }, { "epoch": 1.37200259235256, "grad_norm": 1.5328847169876099, "learning_rate": 2.9643288687898614e-06, "loss": 0.1096, "step": 8468 }, { "epoch": 1.3721646143875568, "grad_norm": 0.905802309513092, "learning_rate": 2.9638991664501314e-06, "loss": 0.1174, "step": 8469 }, { "epoch": 1.3723266364225535, "grad_norm": 0.80458664894104, "learning_rate": 2.96346944991621e-06, "loss": 0.0972, "step": 8470 }, { "epoch": 1.3724886584575502, "grad_norm": 0.8991355299949646, "learning_rate": 2.9630397192012445e-06, "loss": 0.1096, "step": 8471 }, { "epoch": 1.372650680492547, "grad_norm": 1.1109977960586548, "learning_rate": 2.962609974318385e-06, "loss": 0.1276, "step": 8472 }, { "epoch": 1.3728127025275438, "grad_norm": 0.8459230065345764, "learning_rate": 2.962180215280779e-06, "loss": 0.107, "step": 8473 }, { "epoch": 1.3729747245625405, "grad_norm": 0.8833880424499512, "learning_rate": 2.961750442101577e-06, "loss": 0.106, "step": 8474 }, { "epoch": 1.3731367465975373, "grad_norm": 0.8358442783355713, "learning_rate": 2.9613206547939287e-06, "loss": 0.1001, "step": 8475 }, { "epoch": 1.373298768632534, "grad_norm": 0.8312876224517822, "learning_rate": 2.9608908533709852e-06, "loss": 0.104, "step": 8476 }, { "epoch": 1.3734607906675307, "grad_norm": 0.7915255427360535, "learning_rate": 2.9604610378458965e-06, "loss": 0.0953, "step": 8477 }, { "epoch": 1.3736228127025276, "grad_norm": 0.7739593386650085, "learning_rate": 2.9600312082318144e-06, "loss": 0.1022, "step": 8478 }, { "epoch": 1.3737848347375243, "grad_norm": 0.9240787029266357, "learning_rate": 2.9596013645418913e-06, "loss": 0.1201, "step": 8479 }, { "epoch": 1.373946856772521, "grad_norm": 0.7890738248825073, "learning_rate": 2.9591715067892777e-06, "loss": 0.0993, "step": 8480 }, { "epoch": 1.3741088788075178, "grad_norm": 0.7773252725601196, "learning_rate": 2.9587416349871277e-06, "loss": 0.095, "step": 8481 }, { "epoch": 1.3742709008425145, "grad_norm": 0.888715386390686, "learning_rate": 2.958311749148594e-06, "loss": 0.1149, "step": 8482 }, { "epoch": 1.3744329228775114, "grad_norm": 0.8670759797096252, "learning_rate": 2.9578818492868293e-06, "loss": 0.1132, "step": 8483 }, { "epoch": 1.374594944912508, "grad_norm": 0.8122788667678833, "learning_rate": 2.9574519354149884e-06, "loss": 0.0999, "step": 8484 }, { "epoch": 1.3747569669475048, "grad_norm": 0.9208338260650635, "learning_rate": 2.9570220075462254e-06, "loss": 0.1139, "step": 8485 }, { "epoch": 1.3749189889825018, "grad_norm": 0.7937371134757996, "learning_rate": 2.9565920656936947e-06, "loss": 0.1054, "step": 8486 }, { "epoch": 1.3750810110174982, "grad_norm": 0.9310660362243652, "learning_rate": 2.956162109870551e-06, "loss": 0.1085, "step": 8487 }, { "epoch": 1.3752430330524952, "grad_norm": 0.8265377283096313, "learning_rate": 2.9557321400899524e-06, "loss": 0.1031, "step": 8488 }, { "epoch": 1.375405055087492, "grad_norm": 0.7675789594650269, "learning_rate": 2.9553021563650514e-06, "loss": 0.0969, "step": 8489 }, { "epoch": 1.3755670771224886, "grad_norm": 0.8487404584884644, "learning_rate": 2.9548721587090075e-06, "loss": 0.1223, "step": 8490 }, { "epoch": 1.3757290991574855, "grad_norm": 0.8002118468284607, "learning_rate": 2.9544421471349753e-06, "loss": 0.0937, "step": 8491 }, { "epoch": 1.3758911211924822, "grad_norm": 0.7949121594429016, "learning_rate": 2.954012121656114e-06, "loss": 0.0965, "step": 8492 }, { "epoch": 1.376053143227479, "grad_norm": 0.8120603561401367, "learning_rate": 2.9535820822855797e-06, "loss": 0.0894, "step": 8493 }, { "epoch": 1.3762151652624757, "grad_norm": 0.7125270366668701, "learning_rate": 2.9531520290365316e-06, "loss": 0.0855, "step": 8494 }, { "epoch": 1.3763771872974724, "grad_norm": 0.819965124130249, "learning_rate": 2.9527219619221293e-06, "loss": 0.0934, "step": 8495 }, { "epoch": 1.3765392093324693, "grad_norm": 0.8064921498298645, "learning_rate": 2.952291880955529e-06, "loss": 0.0961, "step": 8496 }, { "epoch": 1.376701231367466, "grad_norm": 0.7966877818107605, "learning_rate": 2.9518617861498924e-06, "loss": 0.0986, "step": 8497 }, { "epoch": 1.3768632534024627, "grad_norm": 0.8604177236557007, "learning_rate": 2.9514316775183777e-06, "loss": 0.1135, "step": 8498 }, { "epoch": 1.3770252754374595, "grad_norm": 0.8813721537590027, "learning_rate": 2.9510015550741467e-06, "loss": 0.108, "step": 8499 }, { "epoch": 1.3771872974724562, "grad_norm": 0.8800192475318909, "learning_rate": 2.950571418830359e-06, "loss": 0.1143, "step": 8500 }, { "epoch": 1.377349319507453, "grad_norm": 0.8808828592300415, "learning_rate": 2.950141268800177e-06, "loss": 0.1095, "step": 8501 }, { "epoch": 1.3775113415424498, "grad_norm": 0.7128430008888245, "learning_rate": 2.949711104996761e-06, "loss": 0.0887, "step": 8502 }, { "epoch": 1.3776733635774465, "grad_norm": 0.7968690395355225, "learning_rate": 2.9492809274332745e-06, "loss": 0.0957, "step": 8503 }, { "epoch": 1.3778353856124432, "grad_norm": 0.8926577568054199, "learning_rate": 2.948850736122878e-06, "loss": 0.1194, "step": 8504 }, { "epoch": 1.37799740764744, "grad_norm": 0.9188405871391296, "learning_rate": 2.948420531078735e-06, "loss": 0.1107, "step": 8505 }, { "epoch": 1.3781594296824369, "grad_norm": 0.7922679781913757, "learning_rate": 2.947990312314009e-06, "loss": 0.0989, "step": 8506 }, { "epoch": 1.3783214517174336, "grad_norm": 0.9483537077903748, "learning_rate": 2.9475600798418636e-06, "loss": 0.1224, "step": 8507 }, { "epoch": 1.3784834737524303, "grad_norm": 0.9121973514556885, "learning_rate": 2.9471298336754633e-06, "loss": 0.1141, "step": 8508 }, { "epoch": 1.378645495787427, "grad_norm": 0.8246192932128906, "learning_rate": 2.9466995738279715e-06, "loss": 0.1031, "step": 8509 }, { "epoch": 1.3788075178224237, "grad_norm": 0.8705335259437561, "learning_rate": 2.9462693003125544e-06, "loss": 0.1033, "step": 8510 }, { "epoch": 1.3789695398574207, "grad_norm": 0.7904585003852844, "learning_rate": 2.9458390131423754e-06, "loss": 0.0992, "step": 8511 }, { "epoch": 1.3791315618924174, "grad_norm": 0.8808677196502686, "learning_rate": 2.945408712330603e-06, "loss": 0.1059, "step": 8512 }, { "epoch": 1.379293583927414, "grad_norm": 1.1480685472488403, "learning_rate": 2.944978397890401e-06, "loss": 0.1, "step": 8513 }, { "epoch": 1.379455605962411, "grad_norm": 0.8541058301925659, "learning_rate": 2.944548069834937e-06, "loss": 0.1072, "step": 8514 }, { "epoch": 1.3796176279974077, "grad_norm": 0.7569074630737305, "learning_rate": 2.9441177281773783e-06, "loss": 0.1054, "step": 8515 }, { "epoch": 1.3797796500324044, "grad_norm": 0.8691943287849426, "learning_rate": 2.943687372930891e-06, "loss": 0.1116, "step": 8516 }, { "epoch": 1.3799416720674011, "grad_norm": 0.8004394173622131, "learning_rate": 2.943257004108645e-06, "loss": 0.1001, "step": 8517 }, { "epoch": 1.3801036941023979, "grad_norm": 0.8240222930908203, "learning_rate": 2.942826621723806e-06, "loss": 0.1008, "step": 8518 }, { "epoch": 1.3802657161373948, "grad_norm": 0.8319725394248962, "learning_rate": 2.942396225789545e-06, "loss": 0.1021, "step": 8519 }, { "epoch": 1.3804277381723915, "grad_norm": 0.8528062701225281, "learning_rate": 2.9419658163190295e-06, "loss": 0.1136, "step": 8520 }, { "epoch": 1.3805897602073882, "grad_norm": 0.7287147641181946, "learning_rate": 2.9415353933254297e-06, "loss": 0.0952, "step": 8521 }, { "epoch": 1.380751782242385, "grad_norm": 0.8255961537361145, "learning_rate": 2.9411049568219153e-06, "loss": 0.1072, "step": 8522 }, { "epoch": 1.3809138042773816, "grad_norm": 0.7438040971755981, "learning_rate": 2.940674506821657e-06, "loss": 0.0972, "step": 8523 }, { "epoch": 1.3810758263123786, "grad_norm": 0.7560163736343384, "learning_rate": 2.9402440433378247e-06, "loss": 0.0906, "step": 8524 }, { "epoch": 1.3812378483473753, "grad_norm": 0.952171802520752, "learning_rate": 2.9398135663835904e-06, "loss": 0.1007, "step": 8525 }, { "epoch": 1.381399870382372, "grad_norm": 0.7956331968307495, "learning_rate": 2.939383075972125e-06, "loss": 0.1024, "step": 8526 }, { "epoch": 1.3815618924173687, "grad_norm": 0.8440989255905151, "learning_rate": 2.9389525721166013e-06, "loss": 0.1028, "step": 8527 }, { "epoch": 1.3817239144523654, "grad_norm": 1.5592060089111328, "learning_rate": 2.9385220548301906e-06, "loss": 0.1023, "step": 8528 }, { "epoch": 1.3818859364873624, "grad_norm": 0.8761277794837952, "learning_rate": 2.9380915241260665e-06, "loss": 0.0997, "step": 8529 }, { "epoch": 1.382047958522359, "grad_norm": 0.8244863748550415, "learning_rate": 2.937660980017402e-06, "loss": 0.1096, "step": 8530 }, { "epoch": 1.3822099805573558, "grad_norm": 0.7663264870643616, "learning_rate": 2.9372304225173703e-06, "loss": 0.0926, "step": 8531 }, { "epoch": 1.3823720025923525, "grad_norm": 0.8507534861564636, "learning_rate": 2.936799851639146e-06, "loss": 0.1017, "step": 8532 }, { "epoch": 1.3825340246273492, "grad_norm": 0.9161567091941833, "learning_rate": 2.936369267395903e-06, "loss": 0.1143, "step": 8533 }, { "epoch": 1.3826960466623461, "grad_norm": 0.8694104552268982, "learning_rate": 2.9359386698008172e-06, "loss": 0.1074, "step": 8534 }, { "epoch": 1.3828580686973428, "grad_norm": 0.853909969329834, "learning_rate": 2.9355080588670626e-06, "loss": 0.1031, "step": 8535 }, { "epoch": 1.3830200907323396, "grad_norm": 0.7998418211936951, "learning_rate": 2.935077434607815e-06, "loss": 0.1022, "step": 8536 }, { "epoch": 1.3831821127673365, "grad_norm": 0.9292730689048767, "learning_rate": 2.934646797036251e-06, "loss": 0.1037, "step": 8537 }, { "epoch": 1.383344134802333, "grad_norm": 0.7879573106765747, "learning_rate": 2.9342161461655468e-06, "loss": 0.095, "step": 8538 }, { "epoch": 1.38350615683733, "grad_norm": 0.9311516284942627, "learning_rate": 2.9337854820088797e-06, "loss": 0.1086, "step": 8539 }, { "epoch": 1.3836681788723266, "grad_norm": 0.8644829392433167, "learning_rate": 2.9333548045794253e-06, "loss": 0.1038, "step": 8540 }, { "epoch": 1.3838302009073233, "grad_norm": 0.8739936947822571, "learning_rate": 2.9329241138903642e-06, "loss": 0.1093, "step": 8541 }, { "epoch": 1.3839922229423203, "grad_norm": 0.7543818354606628, "learning_rate": 2.9324934099548713e-06, "loss": 0.0904, "step": 8542 }, { "epoch": 1.384154244977317, "grad_norm": 0.7458750009536743, "learning_rate": 2.9320626927861283e-06, "loss": 0.0913, "step": 8543 }, { "epoch": 1.3843162670123137, "grad_norm": 0.8349683284759521, "learning_rate": 2.931631962397311e-06, "loss": 0.1058, "step": 8544 }, { "epoch": 1.3844782890473104, "grad_norm": 1.0070006847381592, "learning_rate": 2.9312012188016014e-06, "loss": 0.1281, "step": 8545 }, { "epoch": 1.3846403110823071, "grad_norm": 0.8791117072105408, "learning_rate": 2.9307704620121775e-06, "loss": 0.1125, "step": 8546 }, { "epoch": 1.384802333117304, "grad_norm": 0.8105389475822449, "learning_rate": 2.9303396920422196e-06, "loss": 0.1058, "step": 8547 }, { "epoch": 1.3849643551523008, "grad_norm": 0.8246057629585266, "learning_rate": 2.9299089089049092e-06, "loss": 0.1031, "step": 8548 }, { "epoch": 1.3851263771872975, "grad_norm": 0.9281556606292725, "learning_rate": 2.9294781126134254e-06, "loss": 0.1114, "step": 8549 }, { "epoch": 1.3852883992222942, "grad_norm": 0.885179340839386, "learning_rate": 2.929047303180952e-06, "loss": 0.1128, "step": 8550 }, { "epoch": 1.385450421257291, "grad_norm": 0.7432131767272949, "learning_rate": 2.9286164806206683e-06, "loss": 0.0927, "step": 8551 }, { "epoch": 1.3856124432922878, "grad_norm": 0.7628340721130371, "learning_rate": 2.9281856449457587e-06, "loss": 0.1045, "step": 8552 }, { "epoch": 1.3857744653272845, "grad_norm": 0.8845008015632629, "learning_rate": 2.9277547961694037e-06, "loss": 0.1161, "step": 8553 }, { "epoch": 1.3859364873622813, "grad_norm": 0.7263888120651245, "learning_rate": 2.927323934304787e-06, "loss": 0.095, "step": 8554 }, { "epoch": 1.386098509397278, "grad_norm": 0.9359971284866333, "learning_rate": 2.9268930593650926e-06, "loss": 0.1342, "step": 8555 }, { "epoch": 1.3862605314322747, "grad_norm": 0.8538017272949219, "learning_rate": 2.926462171363503e-06, "loss": 0.1154, "step": 8556 }, { "epoch": 1.3864225534672716, "grad_norm": 0.8223465085029602, "learning_rate": 2.9260312703132037e-06, "loss": 0.1037, "step": 8557 }, { "epoch": 1.3865845755022683, "grad_norm": 0.7046705484390259, "learning_rate": 2.9256003562273784e-06, "loss": 0.0974, "step": 8558 }, { "epoch": 1.386746597537265, "grad_norm": 0.8450808525085449, "learning_rate": 2.9251694291192113e-06, "loss": 0.1041, "step": 8559 }, { "epoch": 1.3869086195722617, "grad_norm": 0.7554501295089722, "learning_rate": 2.924738489001889e-06, "loss": 0.0989, "step": 8560 }, { "epoch": 1.3870706416072585, "grad_norm": 0.7961752414703369, "learning_rate": 2.924307535888597e-06, "loss": 0.0983, "step": 8561 }, { "epoch": 1.3872326636422554, "grad_norm": 0.7975520491600037, "learning_rate": 2.923876569792521e-06, "loss": 0.1012, "step": 8562 }, { "epoch": 1.387394685677252, "grad_norm": 0.980924129486084, "learning_rate": 2.923445590726848e-06, "loss": 0.1208, "step": 8563 }, { "epoch": 1.3875567077122488, "grad_norm": 0.92117840051651, "learning_rate": 2.923014598704764e-06, "loss": 0.1097, "step": 8564 }, { "epoch": 1.3877187297472457, "grad_norm": 0.801445484161377, "learning_rate": 2.922583593739458e-06, "loss": 0.103, "step": 8565 }, { "epoch": 1.3878807517822425, "grad_norm": 0.9321786761283875, "learning_rate": 2.9221525758441155e-06, "loss": 0.1197, "step": 8566 }, { "epoch": 1.3880427738172392, "grad_norm": 0.7319613099098206, "learning_rate": 2.921721545031927e-06, "loss": 0.0961, "step": 8567 }, { "epoch": 1.3882047958522359, "grad_norm": 0.7948114275932312, "learning_rate": 2.9212905013160784e-06, "loss": 0.1095, "step": 8568 }, { "epoch": 1.3883668178872326, "grad_norm": 0.7754814624786377, "learning_rate": 2.920859444709761e-06, "loss": 0.0997, "step": 8569 }, { "epoch": 1.3885288399222295, "grad_norm": 0.9290817975997925, "learning_rate": 2.920428375226163e-06, "loss": 0.101, "step": 8570 }, { "epoch": 1.3886908619572262, "grad_norm": 0.761486291885376, "learning_rate": 2.919997292878474e-06, "loss": 0.0954, "step": 8571 }, { "epoch": 1.388852883992223, "grad_norm": 0.672540009021759, "learning_rate": 2.9195661976798838e-06, "loss": 0.0868, "step": 8572 }, { "epoch": 1.3890149060272197, "grad_norm": 0.8108746409416199, "learning_rate": 2.919135089643583e-06, "loss": 0.0953, "step": 8573 }, { "epoch": 1.3891769280622164, "grad_norm": 1.304598093032837, "learning_rate": 2.918703968782764e-06, "loss": 0.0915, "step": 8574 }, { "epoch": 1.3893389500972133, "grad_norm": 0.9010568261146545, "learning_rate": 2.918272835110616e-06, "loss": 0.1142, "step": 8575 }, { "epoch": 1.38950097213221, "grad_norm": 0.7341850399971008, "learning_rate": 2.9178416886403318e-06, "loss": 0.0943, "step": 8576 }, { "epoch": 1.3896629941672067, "grad_norm": 0.8488278388977051, "learning_rate": 2.9174105293851025e-06, "loss": 0.1099, "step": 8577 }, { "epoch": 1.3898250162022034, "grad_norm": 0.9017966389656067, "learning_rate": 2.916979357358121e-06, "loss": 0.1107, "step": 8578 }, { "epoch": 1.3899870382372002, "grad_norm": 0.7202101945877075, "learning_rate": 2.916548172572581e-06, "loss": 0.098, "step": 8579 }, { "epoch": 1.390149060272197, "grad_norm": 0.857900857925415, "learning_rate": 2.9161169750416746e-06, "loss": 0.1077, "step": 8580 }, { "epoch": 1.3903110823071938, "grad_norm": 0.8433865904808044, "learning_rate": 2.9156857647785964e-06, "loss": 0.1083, "step": 8581 }, { "epoch": 1.3904731043421905, "grad_norm": 0.8308601975440979, "learning_rate": 2.915254541796539e-06, "loss": 0.1088, "step": 8582 }, { "epoch": 1.3906351263771872, "grad_norm": 0.77713942527771, "learning_rate": 2.9148233061086973e-06, "loss": 0.1047, "step": 8583 }, { "epoch": 1.390797148412184, "grad_norm": 0.6983367204666138, "learning_rate": 2.914392057728267e-06, "loss": 0.0904, "step": 8584 }, { "epoch": 1.3909591704471809, "grad_norm": 0.844786524772644, "learning_rate": 2.913960796668442e-06, "loss": 0.1129, "step": 8585 }, { "epoch": 1.3911211924821776, "grad_norm": 0.756144106388092, "learning_rate": 2.913529522942418e-06, "loss": 0.1032, "step": 8586 }, { "epoch": 1.3912832145171743, "grad_norm": 0.7644606828689575, "learning_rate": 2.9130982365633926e-06, "loss": 0.1047, "step": 8587 }, { "epoch": 1.3914452365521712, "grad_norm": 0.8337986469268799, "learning_rate": 2.9126669375445595e-06, "loss": 0.1056, "step": 8588 }, { "epoch": 1.3916072585871677, "grad_norm": 0.7386319041252136, "learning_rate": 2.912235625899118e-06, "loss": 0.1003, "step": 8589 }, { "epoch": 1.3917692806221647, "grad_norm": 0.8051424026489258, "learning_rate": 2.911804301640263e-06, "loss": 0.1058, "step": 8590 }, { "epoch": 1.3919313026571614, "grad_norm": 0.8822247982025146, "learning_rate": 2.9113729647811935e-06, "loss": 0.1107, "step": 8591 }, { "epoch": 1.392093324692158, "grad_norm": 0.8278679251670837, "learning_rate": 2.910941615335106e-06, "loss": 0.115, "step": 8592 }, { "epoch": 1.392255346727155, "grad_norm": 0.8865328431129456, "learning_rate": 2.9105102533152e-06, "loss": 0.1151, "step": 8593 }, { "epoch": 1.3924173687621517, "grad_norm": 0.8504626750946045, "learning_rate": 2.9100788787346746e-06, "loss": 0.1131, "step": 8594 }, { "epoch": 1.3925793907971484, "grad_norm": 0.8320386409759521, "learning_rate": 2.9096474916067264e-06, "loss": 0.1116, "step": 8595 }, { "epoch": 1.3927414128321451, "grad_norm": 0.8168127536773682, "learning_rate": 2.9092160919445566e-06, "loss": 0.1058, "step": 8596 }, { "epoch": 1.3929034348671419, "grad_norm": 0.8525434136390686, "learning_rate": 2.9087846797613645e-06, "loss": 0.0983, "step": 8597 }, { "epoch": 1.3930654569021388, "grad_norm": 0.8819026947021484, "learning_rate": 2.9083532550703515e-06, "loss": 0.1126, "step": 8598 }, { "epoch": 1.3932274789371355, "grad_norm": 0.7245476841926575, "learning_rate": 2.907921817884716e-06, "loss": 0.0919, "step": 8599 }, { "epoch": 1.3933895009721322, "grad_norm": 0.7397482395172119, "learning_rate": 2.9074903682176607e-06, "loss": 0.1035, "step": 8600 }, { "epoch": 1.393551523007129, "grad_norm": 0.7988397479057312, "learning_rate": 2.907058906082386e-06, "loss": 0.1043, "step": 8601 }, { "epoch": 1.3937135450421256, "grad_norm": 0.9405267238616943, "learning_rate": 2.906627431492094e-06, "loss": 0.1193, "step": 8602 }, { "epoch": 1.3938755670771226, "grad_norm": 0.766861617565155, "learning_rate": 2.9061959444599867e-06, "loss": 0.0918, "step": 8603 }, { "epoch": 1.3940375891121193, "grad_norm": 0.8565654754638672, "learning_rate": 2.9057644449992655e-06, "loss": 0.1066, "step": 8604 }, { "epoch": 1.394199611147116, "grad_norm": 0.8554398417472839, "learning_rate": 2.9053329331231356e-06, "loss": 0.1046, "step": 8605 }, { "epoch": 1.3943616331821127, "grad_norm": 0.8772569298744202, "learning_rate": 2.904901408844798e-06, "loss": 0.1049, "step": 8606 }, { "epoch": 1.3945236552171094, "grad_norm": 0.8543737530708313, "learning_rate": 2.904469872177458e-06, "loss": 0.1047, "step": 8607 }, { "epoch": 1.3946856772521063, "grad_norm": 0.8698646426200867, "learning_rate": 2.9040383231343173e-06, "loss": 0.1063, "step": 8608 }, { "epoch": 1.394847699287103, "grad_norm": 0.8685057163238525, "learning_rate": 2.9036067617285825e-06, "loss": 0.1025, "step": 8609 }, { "epoch": 1.3950097213220998, "grad_norm": 0.7601488828659058, "learning_rate": 2.903175187973457e-06, "loss": 0.0914, "step": 8610 }, { "epoch": 1.3951717433570965, "grad_norm": 0.7056039571762085, "learning_rate": 2.902743601882147e-06, "loss": 0.0907, "step": 8611 }, { "epoch": 1.3953337653920932, "grad_norm": 0.9211429357528687, "learning_rate": 2.9023120034678575e-06, "loss": 0.1088, "step": 8612 }, { "epoch": 1.3954957874270901, "grad_norm": 0.8788020610809326, "learning_rate": 2.9018803927437946e-06, "loss": 0.117, "step": 8613 }, { "epoch": 1.3956578094620868, "grad_norm": 0.7978447675704956, "learning_rate": 2.901448769723163e-06, "loss": 0.1123, "step": 8614 }, { "epoch": 1.3958198314970836, "grad_norm": 0.8556854724884033, "learning_rate": 2.901017134419171e-06, "loss": 0.1007, "step": 8615 }, { "epoch": 1.3959818535320805, "grad_norm": 0.8050543069839478, "learning_rate": 2.900585486845026e-06, "loss": 0.0957, "step": 8616 }, { "epoch": 1.3961438755670772, "grad_norm": 0.8602048754692078, "learning_rate": 2.900153827013933e-06, "loss": 0.1133, "step": 8617 }, { "epoch": 1.396305897602074, "grad_norm": 0.8299257159233093, "learning_rate": 2.8997221549391025e-06, "loss": 0.112, "step": 8618 }, { "epoch": 1.3964679196370706, "grad_norm": 0.9464213848114014, "learning_rate": 2.8992904706337406e-06, "loss": 0.1197, "step": 8619 }, { "epoch": 1.3966299416720673, "grad_norm": 0.7960741519927979, "learning_rate": 2.8988587741110575e-06, "loss": 0.097, "step": 8620 }, { "epoch": 1.3967919637070643, "grad_norm": 0.7059590220451355, "learning_rate": 2.89842706538426e-06, "loss": 0.0874, "step": 8621 }, { "epoch": 1.396953985742061, "grad_norm": 0.7564810514450073, "learning_rate": 2.8979953444665585e-06, "loss": 0.0873, "step": 8622 }, { "epoch": 1.3971160077770577, "grad_norm": 0.7459419965744019, "learning_rate": 2.8975636113711637e-06, "loss": 0.0916, "step": 8623 }, { "epoch": 1.3972780298120544, "grad_norm": 0.8232574462890625, "learning_rate": 2.8971318661112836e-06, "loss": 0.0907, "step": 8624 }, { "epoch": 1.3974400518470511, "grad_norm": 0.8205267190933228, "learning_rate": 2.89670010870013e-06, "loss": 0.1081, "step": 8625 }, { "epoch": 1.397602073882048, "grad_norm": 0.8244900703430176, "learning_rate": 2.896268339150912e-06, "loss": 0.0983, "step": 8626 }, { "epoch": 1.3977640959170448, "grad_norm": 0.7982784509658813, "learning_rate": 2.8958365574768434e-06, "loss": 0.0898, "step": 8627 }, { "epoch": 1.3979261179520415, "grad_norm": 0.8250177502632141, "learning_rate": 2.895404763691132e-06, "loss": 0.1079, "step": 8628 }, { "epoch": 1.3980881399870382, "grad_norm": 0.828636646270752, "learning_rate": 2.8949729578069936e-06, "loss": 0.105, "step": 8629 }, { "epoch": 1.398250162022035, "grad_norm": 0.9336274266242981, "learning_rate": 2.894541139837638e-06, "loss": 0.1116, "step": 8630 }, { "epoch": 1.3984121840570318, "grad_norm": 0.760558545589447, "learning_rate": 2.8941093097962776e-06, "loss": 0.0934, "step": 8631 }, { "epoch": 1.3985742060920285, "grad_norm": 0.7663840055465698, "learning_rate": 2.8936774676961264e-06, "loss": 0.1026, "step": 8632 }, { "epoch": 1.3987362281270252, "grad_norm": 0.8325403928756714, "learning_rate": 2.893245613550397e-06, "loss": 0.1097, "step": 8633 }, { "epoch": 1.398898250162022, "grad_norm": 0.8603909611701965, "learning_rate": 2.892813747372305e-06, "loss": 0.1057, "step": 8634 }, { "epoch": 1.3990602721970187, "grad_norm": 0.859589159488678, "learning_rate": 2.892381869175061e-06, "loss": 0.1124, "step": 8635 }, { "epoch": 1.3992222942320156, "grad_norm": 0.9289984703063965, "learning_rate": 2.891949978971883e-06, "loss": 0.1186, "step": 8636 }, { "epoch": 1.3993843162670123, "grad_norm": 0.7763188481330872, "learning_rate": 2.891518076775983e-06, "loss": 0.0958, "step": 8637 }, { "epoch": 1.399546338302009, "grad_norm": 0.8290835618972778, "learning_rate": 2.8910861626005774e-06, "loss": 0.112, "step": 8638 }, { "epoch": 1.399708360337006, "grad_norm": 0.7143067121505737, "learning_rate": 2.890654236458882e-06, "loss": 0.0856, "step": 8639 }, { "epoch": 1.3998703823720027, "grad_norm": 0.8053386211395264, "learning_rate": 2.890222298364112e-06, "loss": 0.0909, "step": 8640 }, { "epoch": 1.4000324044069994, "grad_norm": 0.9519616961479187, "learning_rate": 2.8897903483294844e-06, "loss": 0.1239, "step": 8641 }, { "epoch": 1.400194426441996, "grad_norm": 0.7467250227928162, "learning_rate": 2.8893583863682157e-06, "loss": 0.0984, "step": 8642 }, { "epoch": 1.4003564484769928, "grad_norm": 0.7794899940490723, "learning_rate": 2.8889264124935217e-06, "loss": 0.1038, "step": 8643 }, { "epoch": 1.4005184705119897, "grad_norm": 0.8558558821678162, "learning_rate": 2.888494426718621e-06, "loss": 0.1056, "step": 8644 }, { "epoch": 1.4006804925469865, "grad_norm": 0.9650840759277344, "learning_rate": 2.888062429056731e-06, "loss": 0.1189, "step": 8645 }, { "epoch": 1.4008425145819832, "grad_norm": 1.1352195739746094, "learning_rate": 2.8876304195210697e-06, "loss": 0.1153, "step": 8646 }, { "epoch": 1.4010045366169799, "grad_norm": 0.755615770816803, "learning_rate": 2.8871983981248556e-06, "loss": 0.098, "step": 8647 }, { "epoch": 1.4011665586519766, "grad_norm": 0.7980273365974426, "learning_rate": 2.8867663648813077e-06, "loss": 0.1014, "step": 8648 }, { "epoch": 1.4013285806869735, "grad_norm": 0.9927170872688293, "learning_rate": 2.8863343198036453e-06, "loss": 0.116, "step": 8649 }, { "epoch": 1.4014906027219702, "grad_norm": 0.8513253331184387, "learning_rate": 2.885902262905087e-06, "loss": 0.1084, "step": 8650 }, { "epoch": 1.401652624756967, "grad_norm": 0.849332332611084, "learning_rate": 2.885470194198854e-06, "loss": 0.1006, "step": 8651 }, { "epoch": 1.4018146467919637, "grad_norm": 0.7961606979370117, "learning_rate": 2.885038113698165e-06, "loss": 0.0955, "step": 8652 }, { "epoch": 1.4019766688269604, "grad_norm": 0.7978127002716064, "learning_rate": 2.8846060214162426e-06, "loss": 0.1055, "step": 8653 }, { "epoch": 1.4021386908619573, "grad_norm": 0.9275472164154053, "learning_rate": 2.8841739173663057e-06, "loss": 0.1165, "step": 8654 }, { "epoch": 1.402300712896954, "grad_norm": 0.9332693815231323, "learning_rate": 2.883741801561577e-06, "loss": 0.1121, "step": 8655 }, { "epoch": 1.4024627349319507, "grad_norm": 0.7103630304336548, "learning_rate": 2.883309674015278e-06, "loss": 0.0933, "step": 8656 }, { "epoch": 1.4026247569669474, "grad_norm": 0.8611851930618286, "learning_rate": 2.8828775347406295e-06, "loss": 0.1044, "step": 8657 }, { "epoch": 1.4027867790019442, "grad_norm": 0.7644463181495667, "learning_rate": 2.8824453837508563e-06, "loss": 0.0959, "step": 8658 }, { "epoch": 1.402948801036941, "grad_norm": 0.7625099420547485, "learning_rate": 2.882013221059179e-06, "loss": 0.0867, "step": 8659 }, { "epoch": 1.4031108230719378, "grad_norm": 0.9080460667610168, "learning_rate": 2.8815810466788225e-06, "loss": 0.109, "step": 8660 }, { "epoch": 1.4032728451069345, "grad_norm": 0.8725658059120178, "learning_rate": 2.881148860623009e-06, "loss": 0.1114, "step": 8661 }, { "epoch": 1.4034348671419314, "grad_norm": 0.9554914832115173, "learning_rate": 2.8807166629049623e-06, "loss": 0.1124, "step": 8662 }, { "epoch": 1.403596889176928, "grad_norm": 0.9919114708900452, "learning_rate": 2.880284453537907e-06, "loss": 0.1289, "step": 8663 }, { "epoch": 1.4037589112119249, "grad_norm": 0.8158717751502991, "learning_rate": 2.8798522325350683e-06, "loss": 0.1049, "step": 8664 }, { "epoch": 1.4039209332469216, "grad_norm": 0.8903204202651978, "learning_rate": 2.8794199999096708e-06, "loss": 0.105, "step": 8665 }, { "epoch": 1.4040829552819183, "grad_norm": 0.7267638444900513, "learning_rate": 2.8789877556749383e-06, "loss": 0.0889, "step": 8666 }, { "epoch": 1.4042449773169152, "grad_norm": 0.7867768406867981, "learning_rate": 2.8785554998440983e-06, "loss": 0.0943, "step": 8667 }, { "epoch": 1.404406999351912, "grad_norm": 0.7844392657279968, "learning_rate": 2.8781232324303758e-06, "loss": 0.1015, "step": 8668 }, { "epoch": 1.4045690213869086, "grad_norm": 0.8047603964805603, "learning_rate": 2.8776909534469976e-06, "loss": 0.1025, "step": 8669 }, { "epoch": 1.4047310434219054, "grad_norm": 0.85113525390625, "learning_rate": 2.8772586629071902e-06, "loss": 0.1092, "step": 8670 }, { "epoch": 1.404893065456902, "grad_norm": 0.7649945020675659, "learning_rate": 2.8768263608241805e-06, "loss": 0.1005, "step": 8671 }, { "epoch": 1.405055087491899, "grad_norm": 0.8279699683189392, "learning_rate": 2.876394047211196e-06, "loss": 0.1035, "step": 8672 }, { "epoch": 1.4052171095268957, "grad_norm": 0.8674699664115906, "learning_rate": 2.8759617220814654e-06, "loss": 0.1056, "step": 8673 }, { "epoch": 1.4053791315618924, "grad_norm": 0.7891427278518677, "learning_rate": 2.875529385448215e-06, "loss": 0.0974, "step": 8674 }, { "epoch": 1.4055411535968891, "grad_norm": 0.7829881906509399, "learning_rate": 2.8750970373246745e-06, "loss": 0.096, "step": 8675 }, { "epoch": 1.4057031756318858, "grad_norm": 0.8128500580787659, "learning_rate": 2.8746646777240724e-06, "loss": 0.1085, "step": 8676 }, { "epoch": 1.4058651976668828, "grad_norm": 0.9597805142402649, "learning_rate": 2.874232306659638e-06, "loss": 0.12, "step": 8677 }, { "epoch": 1.4060272197018795, "grad_norm": 0.7913134694099426, "learning_rate": 2.8737999241446e-06, "loss": 0.102, "step": 8678 }, { "epoch": 1.4061892417368762, "grad_norm": 0.8597573637962341, "learning_rate": 2.8733675301921893e-06, "loss": 0.1114, "step": 8679 }, { "epoch": 1.406351263771873, "grad_norm": 0.8230311274528503, "learning_rate": 2.8729351248156364e-06, "loss": 0.1063, "step": 8680 }, { "epoch": 1.4065132858068696, "grad_norm": 0.9289247989654541, "learning_rate": 2.87250270802817e-06, "loss": 0.1094, "step": 8681 }, { "epoch": 1.4066753078418666, "grad_norm": 0.647177517414093, "learning_rate": 2.872070279843023e-06, "loss": 0.0788, "step": 8682 }, { "epoch": 1.4068373298768633, "grad_norm": 0.850144624710083, "learning_rate": 2.871637840273425e-06, "loss": 0.1101, "step": 8683 }, { "epoch": 1.40699935191186, "grad_norm": 0.8800421357154846, "learning_rate": 2.8712053893326088e-06, "loss": 0.1125, "step": 8684 }, { "epoch": 1.4071613739468567, "grad_norm": 0.9055140614509583, "learning_rate": 2.8707729270338058e-06, "loss": 0.1144, "step": 8685 }, { "epoch": 1.4073233959818534, "grad_norm": 0.9264971017837524, "learning_rate": 2.8703404533902492e-06, "loss": 0.1058, "step": 8686 }, { "epoch": 1.4074854180168503, "grad_norm": 0.8147197365760803, "learning_rate": 2.869907968415171e-06, "loss": 0.1054, "step": 8687 }, { "epoch": 1.407647440051847, "grad_norm": 0.9164337515830994, "learning_rate": 2.8694754721218027e-06, "loss": 0.1209, "step": 8688 }, { "epoch": 1.4078094620868438, "grad_norm": 0.8125103116035461, "learning_rate": 2.8690429645233808e-06, "loss": 0.0983, "step": 8689 }, { "epoch": 1.4079714841218407, "grad_norm": 0.7318618297576904, "learning_rate": 2.8686104456331356e-06, "loss": 0.0946, "step": 8690 }, { "epoch": 1.4081335061568374, "grad_norm": 0.7763205170631409, "learning_rate": 2.868177915464304e-06, "loss": 0.0966, "step": 8691 }, { "epoch": 1.4082955281918341, "grad_norm": 0.7962520122528076, "learning_rate": 2.8677453740301185e-06, "loss": 0.0979, "step": 8692 }, { "epoch": 1.4084575502268308, "grad_norm": 0.8198848366737366, "learning_rate": 2.867312821343815e-06, "loss": 0.0941, "step": 8693 }, { "epoch": 1.4086195722618275, "grad_norm": 0.8297473192214966, "learning_rate": 2.8668802574186277e-06, "loss": 0.1059, "step": 8694 }, { "epoch": 1.4087815942968245, "grad_norm": 0.9335203766822815, "learning_rate": 2.866447682267792e-06, "loss": 0.1144, "step": 8695 }, { "epoch": 1.4089436163318212, "grad_norm": 0.8151853084564209, "learning_rate": 2.8660150959045456e-06, "loss": 0.1041, "step": 8696 }, { "epoch": 1.409105638366818, "grad_norm": 0.8563622832298279, "learning_rate": 2.8655824983421217e-06, "loss": 0.1103, "step": 8697 }, { "epoch": 1.4092676604018146, "grad_norm": 0.8096403479576111, "learning_rate": 2.865149889593758e-06, "loss": 0.1094, "step": 8698 }, { "epoch": 1.4094296824368113, "grad_norm": 0.859600841999054, "learning_rate": 2.8647172696726917e-06, "loss": 0.1041, "step": 8699 }, { "epoch": 1.4095917044718083, "grad_norm": 0.8782287836074829, "learning_rate": 2.8642846385921593e-06, "loss": 0.1014, "step": 8700 }, { "epoch": 1.409753726506805, "grad_norm": 0.8271836638450623, "learning_rate": 2.8638519963653987e-06, "loss": 0.1045, "step": 8701 }, { "epoch": 1.4099157485418017, "grad_norm": 0.7663862705230713, "learning_rate": 2.863419343005647e-06, "loss": 0.1, "step": 8702 }, { "epoch": 1.4100777705767984, "grad_norm": 0.9826388955116272, "learning_rate": 2.8629866785261435e-06, "loss": 0.1191, "step": 8703 }, { "epoch": 1.410239792611795, "grad_norm": 0.7487360239028931, "learning_rate": 2.8625540029401262e-06, "loss": 0.0963, "step": 8704 }, { "epoch": 1.410401814646792, "grad_norm": 0.8938003778457642, "learning_rate": 2.862121316260833e-06, "loss": 0.107, "step": 8705 }, { "epoch": 1.4105638366817888, "grad_norm": 0.8720892071723938, "learning_rate": 2.8616886185015046e-06, "loss": 0.1147, "step": 8706 }, { "epoch": 1.4107258587167855, "grad_norm": 0.8877646327018738, "learning_rate": 2.8612559096753797e-06, "loss": 0.1134, "step": 8707 }, { "epoch": 1.4108878807517822, "grad_norm": 0.8477475643157959, "learning_rate": 2.860823189795697e-06, "loss": 0.1173, "step": 8708 }, { "epoch": 1.4110499027867789, "grad_norm": 0.8832873106002808, "learning_rate": 2.8603904588756996e-06, "loss": 0.1127, "step": 8709 }, { "epoch": 1.4112119248217758, "grad_norm": 0.8365153074264526, "learning_rate": 2.859957716928625e-06, "loss": 0.1029, "step": 8710 }, { "epoch": 1.4113739468567725, "grad_norm": 0.861546516418457, "learning_rate": 2.8595249639677164e-06, "loss": 0.1085, "step": 8711 }, { "epoch": 1.4115359688917692, "grad_norm": 0.8814380168914795, "learning_rate": 2.8590922000062125e-06, "loss": 0.1142, "step": 8712 }, { "epoch": 1.4116979909267662, "grad_norm": 0.8180842995643616, "learning_rate": 2.858659425057357e-06, "loss": 0.1038, "step": 8713 }, { "epoch": 1.4118600129617627, "grad_norm": 0.7602577209472656, "learning_rate": 2.858226639134391e-06, "loss": 0.0989, "step": 8714 }, { "epoch": 1.4120220349967596, "grad_norm": 0.8542532324790955, "learning_rate": 2.8577938422505573e-06, "loss": 0.1079, "step": 8715 }, { "epoch": 1.4121840570317563, "grad_norm": 0.7752845883369446, "learning_rate": 2.8573610344190978e-06, "loss": 0.1, "step": 8716 }, { "epoch": 1.412346079066753, "grad_norm": 0.8004235029220581, "learning_rate": 2.8569282156532548e-06, "loss": 0.1044, "step": 8717 }, { "epoch": 1.41250810110175, "grad_norm": 0.904018759727478, "learning_rate": 2.8564953859662725e-06, "loss": 0.1214, "step": 8718 }, { "epoch": 1.4126701231367467, "grad_norm": 0.8129547238349915, "learning_rate": 2.8560625453713935e-06, "loss": 0.1058, "step": 8719 }, { "epoch": 1.4128321451717434, "grad_norm": 0.8800033926963806, "learning_rate": 2.8556296938818632e-06, "loss": 0.0912, "step": 8720 }, { "epoch": 1.41299416720674, "grad_norm": 0.7877295017242432, "learning_rate": 2.8551968315109246e-06, "loss": 0.0965, "step": 8721 }, { "epoch": 1.4131561892417368, "grad_norm": 0.9171632528305054, "learning_rate": 2.8547639582718223e-06, "loss": 0.1216, "step": 8722 }, { "epoch": 1.4133182112767337, "grad_norm": 0.766700804233551, "learning_rate": 2.8543310741778013e-06, "loss": 0.0901, "step": 8723 }, { "epoch": 1.4134802333117304, "grad_norm": 0.8369571566581726, "learning_rate": 2.853898179242107e-06, "loss": 0.1022, "step": 8724 }, { "epoch": 1.4136422553467272, "grad_norm": 0.7450268268585205, "learning_rate": 2.853465273477985e-06, "loss": 0.0961, "step": 8725 }, { "epoch": 1.4138042773817239, "grad_norm": 0.8542775511741638, "learning_rate": 2.8530323568986805e-06, "loss": 0.1118, "step": 8726 }, { "epoch": 1.4139662994167206, "grad_norm": 0.7954638004302979, "learning_rate": 2.852599429517441e-06, "loss": 0.104, "step": 8727 }, { "epoch": 1.4141283214517175, "grad_norm": 0.7494431138038635, "learning_rate": 2.8521664913475123e-06, "loss": 0.0874, "step": 8728 }, { "epoch": 1.4142903434867142, "grad_norm": 0.8307495713233948, "learning_rate": 2.8517335424021404e-06, "loss": 0.0994, "step": 8729 }, { "epoch": 1.414452365521711, "grad_norm": 0.8603584170341492, "learning_rate": 2.8513005826945733e-06, "loss": 0.11, "step": 8730 }, { "epoch": 1.4146143875567077, "grad_norm": 0.9381308555603027, "learning_rate": 2.850867612238059e-06, "loss": 0.1049, "step": 8731 }, { "epoch": 1.4147764095917044, "grad_norm": 0.7319527864456177, "learning_rate": 2.8504346310458446e-06, "loss": 0.0898, "step": 8732 }, { "epoch": 1.4149384316267013, "grad_norm": 0.8726157546043396, "learning_rate": 2.850001639131179e-06, "loss": 0.1157, "step": 8733 }, { "epoch": 1.415100453661698, "grad_norm": 0.6778660416603088, "learning_rate": 2.8495686365073096e-06, "loss": 0.088, "step": 8734 }, { "epoch": 1.4152624756966947, "grad_norm": 0.9084062576293945, "learning_rate": 2.849135623187486e-06, "loss": 0.1115, "step": 8735 }, { "epoch": 1.4154244977316914, "grad_norm": 0.8225180506706238, "learning_rate": 2.848702599184957e-06, "loss": 0.1022, "step": 8736 }, { "epoch": 1.4155865197666881, "grad_norm": 0.7956673502922058, "learning_rate": 2.8482695645129725e-06, "loss": 0.0988, "step": 8737 }, { "epoch": 1.415748541801685, "grad_norm": 0.710880696773529, "learning_rate": 2.8478365191847824e-06, "loss": 0.0868, "step": 8738 }, { "epoch": 1.4159105638366818, "grad_norm": 0.7995860576629639, "learning_rate": 2.8474034632136365e-06, "loss": 0.1029, "step": 8739 }, { "epoch": 1.4160725858716785, "grad_norm": 0.8467408418655396, "learning_rate": 2.8469703966127853e-06, "loss": 0.1085, "step": 8740 }, { "epoch": 1.4162346079066754, "grad_norm": 0.8162935972213745, "learning_rate": 2.8465373193954794e-06, "loss": 0.1065, "step": 8741 }, { "epoch": 1.4163966299416721, "grad_norm": 0.8570012450218201, "learning_rate": 2.8461042315749706e-06, "loss": 0.1045, "step": 8742 }, { "epoch": 1.4165586519766689, "grad_norm": 0.7713600397109985, "learning_rate": 2.8456711331645085e-06, "loss": 0.0956, "step": 8743 }, { "epoch": 1.4167206740116656, "grad_norm": 0.7573341131210327, "learning_rate": 2.845238024177348e-06, "loss": 0.098, "step": 8744 }, { "epoch": 1.4168826960466623, "grad_norm": 0.8098436594009399, "learning_rate": 2.8448049046267377e-06, "loss": 0.0979, "step": 8745 }, { "epoch": 1.4170447180816592, "grad_norm": 0.8652111887931824, "learning_rate": 2.8443717745259335e-06, "loss": 0.1117, "step": 8746 }, { "epoch": 1.417206740116656, "grad_norm": 0.8840187788009644, "learning_rate": 2.843938633888186e-06, "loss": 0.1069, "step": 8747 }, { "epoch": 1.4173687621516526, "grad_norm": 0.7981458902359009, "learning_rate": 2.8435054827267476e-06, "loss": 0.1045, "step": 8748 }, { "epoch": 1.4175307841866494, "grad_norm": 0.8698453903198242, "learning_rate": 2.843072321054873e-06, "loss": 0.1061, "step": 8749 }, { "epoch": 1.417692806221646, "grad_norm": 0.9237022995948792, "learning_rate": 2.8426391488858163e-06, "loss": 0.1168, "step": 8750 }, { "epoch": 1.417854828256643, "grad_norm": 0.8793653249740601, "learning_rate": 2.8422059662328306e-06, "loss": 0.117, "step": 8751 }, { "epoch": 1.4180168502916397, "grad_norm": 0.7878406047821045, "learning_rate": 2.8417727731091705e-06, "loss": 0.1027, "step": 8752 }, { "epoch": 1.4181788723266364, "grad_norm": 0.9198635816574097, "learning_rate": 2.84133956952809e-06, "loss": 0.1156, "step": 8753 }, { "epoch": 1.4183408943616331, "grad_norm": 0.8705915212631226, "learning_rate": 2.840906355502845e-06, "loss": 0.1141, "step": 8754 }, { "epoch": 1.4185029163966298, "grad_norm": 0.8760009407997131, "learning_rate": 2.8404731310466904e-06, "loss": 0.1222, "step": 8755 }, { "epoch": 1.4186649384316268, "grad_norm": 0.8203396201133728, "learning_rate": 2.840039896172882e-06, "loss": 0.1032, "step": 8756 }, { "epoch": 1.4188269604666235, "grad_norm": 0.7818074226379395, "learning_rate": 2.8396066508946757e-06, "loss": 0.1038, "step": 8757 }, { "epoch": 1.4189889825016202, "grad_norm": 0.7406920194625854, "learning_rate": 2.8391733952253277e-06, "loss": 0.0913, "step": 8758 }, { "epoch": 1.419151004536617, "grad_norm": 0.8644748330116272, "learning_rate": 2.8387401291780953e-06, "loss": 0.1071, "step": 8759 }, { "epoch": 1.4193130265716136, "grad_norm": 0.8011223077774048, "learning_rate": 2.838306852766234e-06, "loss": 0.1088, "step": 8760 }, { "epoch": 1.4194750486066106, "grad_norm": 0.9043474197387695, "learning_rate": 2.8378735660030015e-06, "loss": 0.1187, "step": 8761 }, { "epoch": 1.4196370706416073, "grad_norm": 0.7650865316390991, "learning_rate": 2.8374402689016557e-06, "loss": 0.0959, "step": 8762 }, { "epoch": 1.419799092676604, "grad_norm": 0.7926430702209473, "learning_rate": 2.8370069614754543e-06, "loss": 0.094, "step": 8763 }, { "epoch": 1.419961114711601, "grad_norm": 0.8434385061264038, "learning_rate": 2.8365736437376555e-06, "loss": 0.1034, "step": 8764 }, { "epoch": 1.4201231367465974, "grad_norm": 0.7807652950286865, "learning_rate": 2.836140315701517e-06, "loss": 0.1004, "step": 8765 }, { "epoch": 1.4202851587815943, "grad_norm": 0.9462375640869141, "learning_rate": 2.8357069773802996e-06, "loss": 0.1111, "step": 8766 }, { "epoch": 1.420447180816591, "grad_norm": 0.8678317666053772, "learning_rate": 2.8352736287872593e-06, "loss": 0.1101, "step": 8767 }, { "epoch": 1.4206092028515878, "grad_norm": 0.8266333937644958, "learning_rate": 2.834840269935659e-06, "loss": 0.1046, "step": 8768 }, { "epoch": 1.4207712248865847, "grad_norm": 0.8634325265884399, "learning_rate": 2.8344069008387565e-06, "loss": 0.1141, "step": 8769 }, { "epoch": 1.4209332469215814, "grad_norm": 0.7722366452217102, "learning_rate": 2.833973521509812e-06, "loss": 0.0944, "step": 8770 }, { "epoch": 1.4210952689565781, "grad_norm": 0.7650349736213684, "learning_rate": 2.8335401319620855e-06, "loss": 0.1033, "step": 8771 }, { "epoch": 1.4212572909915748, "grad_norm": 0.94795161485672, "learning_rate": 2.833106732208838e-06, "loss": 0.1154, "step": 8772 }, { "epoch": 1.4214193130265715, "grad_norm": 0.8152647614479065, "learning_rate": 2.832673322263331e-06, "loss": 0.1029, "step": 8773 }, { "epoch": 1.4215813350615685, "grad_norm": 0.843647301197052, "learning_rate": 2.8322399021388248e-06, "loss": 0.1097, "step": 8774 }, { "epoch": 1.4217433570965652, "grad_norm": 0.8536216616630554, "learning_rate": 2.8318064718485826e-06, "loss": 0.1011, "step": 8775 }, { "epoch": 1.421905379131562, "grad_norm": 0.8077113628387451, "learning_rate": 2.8313730314058645e-06, "loss": 0.103, "step": 8776 }, { "epoch": 1.4220674011665586, "grad_norm": 0.964287281036377, "learning_rate": 2.830939580823934e-06, "loss": 0.1125, "step": 8777 }, { "epoch": 1.4222294232015553, "grad_norm": 0.8470258712768555, "learning_rate": 2.830506120116053e-06, "loss": 0.1101, "step": 8778 }, { "epoch": 1.4223914452365523, "grad_norm": 1.022905707359314, "learning_rate": 2.8300726492954845e-06, "loss": 0.1195, "step": 8779 }, { "epoch": 1.422553467271549, "grad_norm": 0.8365886211395264, "learning_rate": 2.8296391683754916e-06, "loss": 0.107, "step": 8780 }, { "epoch": 1.4227154893065457, "grad_norm": 0.8074929118156433, "learning_rate": 2.829205677369338e-06, "loss": 0.1062, "step": 8781 }, { "epoch": 1.4228775113415424, "grad_norm": 0.8210521340370178, "learning_rate": 2.8287721762902877e-06, "loss": 0.1056, "step": 8782 }, { "epoch": 1.423039533376539, "grad_norm": 0.845281183719635, "learning_rate": 2.8283386651516037e-06, "loss": 0.1036, "step": 8783 }, { "epoch": 1.423201555411536, "grad_norm": 0.7844056487083435, "learning_rate": 2.8279051439665516e-06, "loss": 0.0968, "step": 8784 }, { "epoch": 1.4233635774465327, "grad_norm": 0.8035227656364441, "learning_rate": 2.8274716127483955e-06, "loss": 0.1052, "step": 8785 }, { "epoch": 1.4235255994815295, "grad_norm": 0.8737231492996216, "learning_rate": 2.8270380715104e-06, "loss": 0.114, "step": 8786 }, { "epoch": 1.4236876215165262, "grad_norm": 0.8196536302566528, "learning_rate": 2.8266045202658316e-06, "loss": 0.1088, "step": 8787 }, { "epoch": 1.4238496435515229, "grad_norm": 0.7950606942176819, "learning_rate": 2.826170959027956e-06, "loss": 0.0977, "step": 8788 }, { "epoch": 1.4240116655865198, "grad_norm": 0.8498073816299438, "learning_rate": 2.8257373878100363e-06, "loss": 0.1049, "step": 8789 }, { "epoch": 1.4241736876215165, "grad_norm": 0.8204236030578613, "learning_rate": 2.8253038066253423e-06, "loss": 0.1023, "step": 8790 }, { "epoch": 1.4243357096565132, "grad_norm": 0.9035735130310059, "learning_rate": 2.8248702154871387e-06, "loss": 0.1169, "step": 8791 }, { "epoch": 1.4244977316915102, "grad_norm": 0.9391461610794067, "learning_rate": 2.8244366144086926e-06, "loss": 0.1122, "step": 8792 }, { "epoch": 1.4246597537265069, "grad_norm": 0.8815183639526367, "learning_rate": 2.824003003403271e-06, "loss": 0.1041, "step": 8793 }, { "epoch": 1.4248217757615036, "grad_norm": 0.7553703784942627, "learning_rate": 2.823569382484142e-06, "loss": 0.0949, "step": 8794 }, { "epoch": 1.4249837977965003, "grad_norm": 0.8771671652793884, "learning_rate": 2.823135751664573e-06, "loss": 0.1142, "step": 8795 }, { "epoch": 1.425145819831497, "grad_norm": 0.845828652381897, "learning_rate": 2.822702110957831e-06, "loss": 0.1074, "step": 8796 }, { "epoch": 1.425307841866494, "grad_norm": 0.823070764541626, "learning_rate": 2.8222684603771867e-06, "loss": 0.1042, "step": 8797 }, { "epoch": 1.4254698639014907, "grad_norm": 0.8956811428070068, "learning_rate": 2.8218347999359066e-06, "loss": 0.1038, "step": 8798 }, { "epoch": 1.4256318859364874, "grad_norm": 0.9194256067276001, "learning_rate": 2.821401129647261e-06, "loss": 0.1194, "step": 8799 }, { "epoch": 1.425793907971484, "grad_norm": 0.787661612033844, "learning_rate": 2.8209674495245177e-06, "loss": 0.0979, "step": 8800 }, { "epoch": 1.4259559300064808, "grad_norm": 0.8394095301628113, "learning_rate": 2.820533759580948e-06, "loss": 0.1078, "step": 8801 }, { "epoch": 1.4261179520414777, "grad_norm": 0.7722134590148926, "learning_rate": 2.82010005982982e-06, "loss": 0.1064, "step": 8802 }, { "epoch": 1.4262799740764744, "grad_norm": 0.841996967792511, "learning_rate": 2.8196663502844057e-06, "loss": 0.0996, "step": 8803 }, { "epoch": 1.4264419961114712, "grad_norm": 0.9296277165412903, "learning_rate": 2.819232630957975e-06, "loss": 0.1196, "step": 8804 }, { "epoch": 1.4266040181464679, "grad_norm": 0.8732765316963196, "learning_rate": 2.8187989018637967e-06, "loss": 0.1097, "step": 8805 }, { "epoch": 1.4267660401814646, "grad_norm": 0.9206928014755249, "learning_rate": 2.818365163015145e-06, "loss": 0.1229, "step": 8806 }, { "epoch": 1.4269280622164615, "grad_norm": 0.853338897228241, "learning_rate": 2.817931414425289e-06, "loss": 0.0942, "step": 8807 }, { "epoch": 1.4270900842514582, "grad_norm": 0.8418228030204773, "learning_rate": 2.8174976561075013e-06, "loss": 0.1125, "step": 8808 }, { "epoch": 1.427252106286455, "grad_norm": 0.9058745503425598, "learning_rate": 2.8170638880750534e-06, "loss": 0.1103, "step": 8809 }, { "epoch": 1.4274141283214516, "grad_norm": 0.8129554986953735, "learning_rate": 2.816630110341218e-06, "loss": 0.1046, "step": 8810 }, { "epoch": 1.4275761503564484, "grad_norm": 0.7814062237739563, "learning_rate": 2.8161963229192677e-06, "loss": 0.103, "step": 8811 }, { "epoch": 1.4277381723914453, "grad_norm": 0.7614774703979492, "learning_rate": 2.8157625258224746e-06, "loss": 0.0985, "step": 8812 }, { "epoch": 1.427900194426442, "grad_norm": 0.8251438140869141, "learning_rate": 2.8153287190641133e-06, "loss": 0.105, "step": 8813 }, { "epoch": 1.4280622164614387, "grad_norm": 0.8020395040512085, "learning_rate": 2.814894902657456e-06, "loss": 0.1027, "step": 8814 }, { "epoch": 1.4282242384964356, "grad_norm": 0.7605652213096619, "learning_rate": 2.8144610766157758e-06, "loss": 0.0941, "step": 8815 }, { "epoch": 1.4283862605314321, "grad_norm": 0.7266712784767151, "learning_rate": 2.814027240952348e-06, "loss": 0.085, "step": 8816 }, { "epoch": 1.428548282566429, "grad_norm": 0.8249565958976746, "learning_rate": 2.813593395680447e-06, "loss": 0.1041, "step": 8817 }, { "epoch": 1.4287103046014258, "grad_norm": 1.0275297164916992, "learning_rate": 2.8131595408133467e-06, "loss": 0.1263, "step": 8818 }, { "epoch": 1.4288723266364225, "grad_norm": 1.5727760791778564, "learning_rate": 2.812725676364322e-06, "loss": 0.1006, "step": 8819 }, { "epoch": 1.4290343486714194, "grad_norm": 0.8301246762275696, "learning_rate": 2.8122918023466485e-06, "loss": 0.1036, "step": 8820 }, { "epoch": 1.4291963707064161, "grad_norm": 0.8229403495788574, "learning_rate": 2.811857918773602e-06, "loss": 0.0966, "step": 8821 }, { "epoch": 1.4293583927414129, "grad_norm": 0.794026255607605, "learning_rate": 2.811424025658458e-06, "loss": 0.1077, "step": 8822 }, { "epoch": 1.4295204147764096, "grad_norm": 0.6794479489326477, "learning_rate": 2.810990123014492e-06, "loss": 0.0826, "step": 8823 }, { "epoch": 1.4296824368114063, "grad_norm": 0.759669303894043, "learning_rate": 2.8105562108549807e-06, "loss": 0.0996, "step": 8824 }, { "epoch": 1.4298444588464032, "grad_norm": 0.8474135398864746, "learning_rate": 2.8101222891932013e-06, "loss": 0.1085, "step": 8825 }, { "epoch": 1.4300064808814, "grad_norm": 0.9885803461074829, "learning_rate": 2.80968835804243e-06, "loss": 0.121, "step": 8826 }, { "epoch": 1.4301685029163966, "grad_norm": 0.8411709070205688, "learning_rate": 2.809254417415944e-06, "loss": 0.1085, "step": 8827 }, { "epoch": 1.4303305249513933, "grad_norm": 1.034460425376892, "learning_rate": 2.808820467327022e-06, "loss": 0.1044, "step": 8828 }, { "epoch": 1.43049254698639, "grad_norm": 0.9514307975769043, "learning_rate": 2.8083865077889404e-06, "loss": 0.1225, "step": 8829 }, { "epoch": 1.430654569021387, "grad_norm": 0.7484989762306213, "learning_rate": 2.8079525388149787e-06, "loss": 0.0957, "step": 8830 }, { "epoch": 1.4308165910563837, "grad_norm": 0.8544020652770996, "learning_rate": 2.807518560418414e-06, "loss": 0.1155, "step": 8831 }, { "epoch": 1.4309786130913804, "grad_norm": 0.8093070387840271, "learning_rate": 2.8070845726125257e-06, "loss": 0.1042, "step": 8832 }, { "epoch": 1.4311406351263771, "grad_norm": 0.7702214121818542, "learning_rate": 2.806650575410592e-06, "loss": 0.0942, "step": 8833 }, { "epoch": 1.4313026571613738, "grad_norm": 0.793833315372467, "learning_rate": 2.8062165688258934e-06, "loss": 0.0903, "step": 8834 }, { "epoch": 1.4314646791963708, "grad_norm": 0.9053971171379089, "learning_rate": 2.8057825528717093e-06, "loss": 0.1259, "step": 8835 }, { "epoch": 1.4316267012313675, "grad_norm": 0.9572237730026245, "learning_rate": 2.8053485275613177e-06, "loss": 0.111, "step": 8836 }, { "epoch": 1.4317887232663642, "grad_norm": 0.7796591520309448, "learning_rate": 2.804914492908001e-06, "loss": 0.0959, "step": 8837 }, { "epoch": 1.431950745301361, "grad_norm": 0.733897864818573, "learning_rate": 2.804480448925039e-06, "loss": 0.0852, "step": 8838 }, { "epoch": 1.4321127673363576, "grad_norm": 0.7287041544914246, "learning_rate": 2.8040463956257113e-06, "loss": 0.0934, "step": 8839 }, { "epoch": 1.4322747893713546, "grad_norm": 0.8480597138404846, "learning_rate": 2.8036123330233e-06, "loss": 0.1066, "step": 8840 }, { "epoch": 1.4324368114063513, "grad_norm": 0.8591471314430237, "learning_rate": 2.8031782611310863e-06, "loss": 0.1044, "step": 8841 }, { "epoch": 1.432598833441348, "grad_norm": 0.7007931470870972, "learning_rate": 2.802744179962351e-06, "loss": 0.0905, "step": 8842 }, { "epoch": 1.432760855476345, "grad_norm": 0.7390332818031311, "learning_rate": 2.802310089530377e-06, "loss": 0.0923, "step": 8843 }, { "epoch": 1.4329228775113416, "grad_norm": 0.8909687995910645, "learning_rate": 2.801875989848446e-06, "loss": 0.1139, "step": 8844 }, { "epoch": 1.4330848995463383, "grad_norm": 0.795418918132782, "learning_rate": 2.801441880929839e-06, "loss": 0.103, "step": 8845 }, { "epoch": 1.433246921581335, "grad_norm": 0.7816963195800781, "learning_rate": 2.8010077627878414e-06, "loss": 0.1, "step": 8846 }, { "epoch": 1.4334089436163318, "grad_norm": 0.8099395632743835, "learning_rate": 2.8005736354357338e-06, "loss": 0.094, "step": 8847 }, { "epoch": 1.4335709656513287, "grad_norm": 0.7562690377235413, "learning_rate": 2.8001394988868003e-06, "loss": 0.0969, "step": 8848 }, { "epoch": 1.4337329876863254, "grad_norm": 0.9833892583847046, "learning_rate": 2.7997053531543246e-06, "loss": 0.1045, "step": 8849 }, { "epoch": 1.4338950097213221, "grad_norm": 0.8875625133514404, "learning_rate": 2.7992711982515908e-06, "loss": 0.1067, "step": 8850 }, { "epoch": 1.4340570317563188, "grad_norm": 0.7625311613082886, "learning_rate": 2.7988370341918814e-06, "loss": 0.0928, "step": 8851 }, { "epoch": 1.4342190537913155, "grad_norm": 0.8266885876655579, "learning_rate": 2.798402860988483e-06, "loss": 0.1055, "step": 8852 }, { "epoch": 1.4343810758263125, "grad_norm": 0.8304306864738464, "learning_rate": 2.7979686786546784e-06, "loss": 0.1017, "step": 8853 }, { "epoch": 1.4345430978613092, "grad_norm": 0.8403946161270142, "learning_rate": 2.797534487203755e-06, "loss": 0.0894, "step": 8854 }, { "epoch": 1.434705119896306, "grad_norm": 0.7953592538833618, "learning_rate": 2.7971002866489944e-06, "loss": 0.098, "step": 8855 }, { "epoch": 1.4348671419313026, "grad_norm": 0.7994452714920044, "learning_rate": 2.7966660770036845e-06, "loss": 0.1017, "step": 8856 }, { "epoch": 1.4350291639662993, "grad_norm": 0.8061219453811646, "learning_rate": 2.7962318582811113e-06, "loss": 0.1053, "step": 8857 }, { "epoch": 1.4351911860012962, "grad_norm": 0.9328574538230896, "learning_rate": 2.795797630494559e-06, "loss": 0.1206, "step": 8858 }, { "epoch": 1.435353208036293, "grad_norm": 0.8536462187767029, "learning_rate": 2.795363393657316e-06, "loss": 0.1042, "step": 8859 }, { "epoch": 1.4355152300712897, "grad_norm": 0.7858737111091614, "learning_rate": 2.7949291477826666e-06, "loss": 0.0977, "step": 8860 }, { "epoch": 1.4356772521062864, "grad_norm": 0.887958824634552, "learning_rate": 2.7944948928839007e-06, "loss": 0.1106, "step": 8861 }, { "epoch": 1.435839274141283, "grad_norm": 0.817033588886261, "learning_rate": 2.7940606289743026e-06, "loss": 0.0977, "step": 8862 }, { "epoch": 1.43600129617628, "grad_norm": 0.8055084943771362, "learning_rate": 2.793626356067161e-06, "loss": 0.0928, "step": 8863 }, { "epoch": 1.4361633182112767, "grad_norm": 0.8124540448188782, "learning_rate": 2.793192074175764e-06, "loss": 0.0977, "step": 8864 }, { "epoch": 1.4363253402462735, "grad_norm": 0.9017152786254883, "learning_rate": 2.7927577833133984e-06, "loss": 0.1147, "step": 8865 }, { "epoch": 1.4364873622812704, "grad_norm": 0.8836957812309265, "learning_rate": 2.792323483493354e-06, "loss": 0.1094, "step": 8866 }, { "epoch": 1.4366493843162669, "grad_norm": 0.8717904686927795, "learning_rate": 2.791889174728918e-06, "loss": 0.1102, "step": 8867 }, { "epoch": 1.4368114063512638, "grad_norm": 0.7438794374465942, "learning_rate": 2.791454857033379e-06, "loss": 0.0957, "step": 8868 }, { "epoch": 1.4369734283862605, "grad_norm": 0.7357698678970337, "learning_rate": 2.7910205304200273e-06, "loss": 0.0928, "step": 8869 }, { "epoch": 1.4371354504212572, "grad_norm": 0.8071755170822144, "learning_rate": 2.790586194902151e-06, "loss": 0.0974, "step": 8870 }, { "epoch": 1.4372974724562542, "grad_norm": 0.8086465001106262, "learning_rate": 2.790151850493041e-06, "loss": 0.1087, "step": 8871 }, { "epoch": 1.4374594944912509, "grad_norm": 0.7857526540756226, "learning_rate": 2.789717497205986e-06, "loss": 0.0962, "step": 8872 }, { "epoch": 1.4376215165262476, "grad_norm": 0.8768362998962402, "learning_rate": 2.789283135054277e-06, "loss": 0.0975, "step": 8873 }, { "epoch": 1.4377835385612443, "grad_norm": 0.8221284747123718, "learning_rate": 2.7888487640512046e-06, "loss": 0.0953, "step": 8874 }, { "epoch": 1.437945560596241, "grad_norm": 0.9030830264091492, "learning_rate": 2.7884143842100573e-06, "loss": 0.1122, "step": 8875 }, { "epoch": 1.438107582631238, "grad_norm": 0.9299379587173462, "learning_rate": 2.78797999554413e-06, "loss": 0.1183, "step": 8876 }, { "epoch": 1.4382696046662347, "grad_norm": 0.7504619359970093, "learning_rate": 2.7875455980667106e-06, "loss": 0.0942, "step": 8877 }, { "epoch": 1.4384316267012314, "grad_norm": 0.7830450534820557, "learning_rate": 2.787111191791092e-06, "loss": 0.0983, "step": 8878 }, { "epoch": 1.438593648736228, "grad_norm": 0.7624955177307129, "learning_rate": 2.786676776730566e-06, "loss": 0.0961, "step": 8879 }, { "epoch": 1.4387556707712248, "grad_norm": 0.7818362712860107, "learning_rate": 2.7862423528984233e-06, "loss": 0.1087, "step": 8880 }, { "epoch": 1.4389176928062217, "grad_norm": 0.8774171471595764, "learning_rate": 2.7858079203079587e-06, "loss": 0.1192, "step": 8881 }, { "epoch": 1.4390797148412184, "grad_norm": 0.7342621088027954, "learning_rate": 2.7853734789724618e-06, "loss": 0.0933, "step": 8882 }, { "epoch": 1.4392417368762151, "grad_norm": 0.7560212016105652, "learning_rate": 2.7849390289052287e-06, "loss": 0.0867, "step": 8883 }, { "epoch": 1.4394037589112119, "grad_norm": 1.23517644405365, "learning_rate": 2.7845045701195494e-06, "loss": 0.1019, "step": 8884 }, { "epoch": 1.4395657809462086, "grad_norm": 0.9435912370681763, "learning_rate": 2.78407010262872e-06, "loss": 0.1217, "step": 8885 }, { "epoch": 1.4397278029812055, "grad_norm": 0.9272345304489136, "learning_rate": 2.7836356264460316e-06, "loss": 0.1167, "step": 8886 }, { "epoch": 1.4398898250162022, "grad_norm": 0.8792025446891785, "learning_rate": 2.7832011415847802e-06, "loss": 0.1165, "step": 8887 }, { "epoch": 1.440051847051199, "grad_norm": 0.7143024206161499, "learning_rate": 2.7827666480582593e-06, "loss": 0.0933, "step": 8888 }, { "epoch": 1.4402138690861956, "grad_norm": 0.8933820724487305, "learning_rate": 2.782332145879763e-06, "loss": 0.1105, "step": 8889 }, { "epoch": 1.4403758911211924, "grad_norm": 1.0869182348251343, "learning_rate": 2.7818976350625864e-06, "loss": 0.0966, "step": 8890 }, { "epoch": 1.4405379131561893, "grad_norm": 0.7515729665756226, "learning_rate": 2.781463115620024e-06, "loss": 0.0954, "step": 8891 }, { "epoch": 1.440699935191186, "grad_norm": 0.6556647419929504, "learning_rate": 2.781028587565372e-06, "loss": 0.0812, "step": 8892 }, { "epoch": 1.4408619572261827, "grad_norm": 0.8055543899536133, "learning_rate": 2.780594050911925e-06, "loss": 0.1076, "step": 8893 }, { "epoch": 1.4410239792611796, "grad_norm": 0.8170286417007446, "learning_rate": 2.780159505672979e-06, "loss": 0.1098, "step": 8894 }, { "epoch": 1.4411860012961764, "grad_norm": 0.8746091723442078, "learning_rate": 2.7797249518618304e-06, "loss": 0.1001, "step": 8895 }, { "epoch": 1.441348023331173, "grad_norm": 0.8125444650650024, "learning_rate": 2.7792903894917746e-06, "loss": 0.1104, "step": 8896 }, { "epoch": 1.4415100453661698, "grad_norm": 0.8494876623153687, "learning_rate": 2.778855818576109e-06, "loss": 0.1037, "step": 8897 }, { "epoch": 1.4416720674011665, "grad_norm": 0.8670206069946289, "learning_rate": 2.7784212391281307e-06, "loss": 0.1096, "step": 8898 }, { "epoch": 1.4418340894361634, "grad_norm": 0.7742645144462585, "learning_rate": 2.777986651161136e-06, "loss": 0.0943, "step": 8899 }, { "epoch": 1.4419961114711601, "grad_norm": 0.8516318798065186, "learning_rate": 2.7775520546884216e-06, "loss": 0.1088, "step": 8900 }, { "epoch": 1.4421581335061568, "grad_norm": 0.8558171987533569, "learning_rate": 2.7771174497232867e-06, "loss": 0.0966, "step": 8901 }, { "epoch": 1.4423201555411536, "grad_norm": 0.8049555420875549, "learning_rate": 2.7766828362790283e-06, "loss": 0.1069, "step": 8902 }, { "epoch": 1.4424821775761503, "grad_norm": 0.8404254913330078, "learning_rate": 2.776248214368945e-06, "loss": 0.1062, "step": 8903 }, { "epoch": 1.4426441996111472, "grad_norm": 0.8144481182098389, "learning_rate": 2.7758135840063344e-06, "loss": 0.1033, "step": 8904 }, { "epoch": 1.442806221646144, "grad_norm": 0.8399234414100647, "learning_rate": 2.7753789452044965e-06, "loss": 0.1119, "step": 8905 }, { "epoch": 1.4429682436811406, "grad_norm": 0.8735939264297485, "learning_rate": 2.7749442979767276e-06, "loss": 0.1109, "step": 8906 }, { "epoch": 1.4431302657161373, "grad_norm": 0.7629880309104919, "learning_rate": 2.7745096423363304e-06, "loss": 0.096, "step": 8907 }, { "epoch": 1.443292287751134, "grad_norm": 0.9925363659858704, "learning_rate": 2.7740749782966016e-06, "loss": 0.1199, "step": 8908 }, { "epoch": 1.443454309786131, "grad_norm": 0.8804534077644348, "learning_rate": 2.7736403058708418e-06, "loss": 0.1041, "step": 8909 }, { "epoch": 1.4436163318211277, "grad_norm": 0.8817947506904602, "learning_rate": 2.7732056250723505e-06, "loss": 0.1021, "step": 8910 }, { "epoch": 1.4437783538561244, "grad_norm": 0.8750995397567749, "learning_rate": 2.7727709359144285e-06, "loss": 0.1109, "step": 8911 }, { "epoch": 1.4439403758911211, "grad_norm": 0.8417905569076538, "learning_rate": 2.7723362384103757e-06, "loss": 0.0968, "step": 8912 }, { "epoch": 1.4441023979261178, "grad_norm": 0.8828087449073792, "learning_rate": 2.771901532573493e-06, "loss": 0.1105, "step": 8913 }, { "epoch": 1.4442644199611148, "grad_norm": 0.8812400102615356, "learning_rate": 2.771466818417082e-06, "loss": 0.1137, "step": 8914 }, { "epoch": 1.4444264419961115, "grad_norm": 1.0078636407852173, "learning_rate": 2.7710320959544425e-06, "loss": 0.1276, "step": 8915 }, { "epoch": 1.4445884640311082, "grad_norm": 0.8423323035240173, "learning_rate": 2.7705973651988777e-06, "loss": 0.0995, "step": 8916 }, { "epoch": 1.4447504860661051, "grad_norm": 0.6651060581207275, "learning_rate": 2.7701626261636878e-06, "loss": 0.0942, "step": 8917 }, { "epoch": 1.4449125081011016, "grad_norm": 0.8124906420707703, "learning_rate": 2.769727878862175e-06, "loss": 0.0977, "step": 8918 }, { "epoch": 1.4450745301360985, "grad_norm": 0.784057080745697, "learning_rate": 2.7692931233076424e-06, "loss": 0.0979, "step": 8919 }, { "epoch": 1.4452365521710953, "grad_norm": 0.825549840927124, "learning_rate": 2.768858359513392e-06, "loss": 0.1119, "step": 8920 }, { "epoch": 1.445398574206092, "grad_norm": 0.7953062653541565, "learning_rate": 2.7684235874927264e-06, "loss": 0.1072, "step": 8921 }, { "epoch": 1.445560596241089, "grad_norm": 0.8686825633049011, "learning_rate": 2.767988807258948e-06, "loss": 0.103, "step": 8922 }, { "epoch": 1.4457226182760856, "grad_norm": 0.8071175813674927, "learning_rate": 2.7675540188253606e-06, "loss": 0.1091, "step": 8923 }, { "epoch": 1.4458846403110823, "grad_norm": 0.8495711088180542, "learning_rate": 2.7671192222052685e-06, "loss": 0.1146, "step": 8924 }, { "epoch": 1.446046662346079, "grad_norm": 0.7676787376403809, "learning_rate": 2.7666844174119738e-06, "loss": 0.0902, "step": 8925 }, { "epoch": 1.4462086843810757, "grad_norm": 0.950420618057251, "learning_rate": 2.7662496044587817e-06, "loss": 0.1138, "step": 8926 }, { "epoch": 1.4463707064160727, "grad_norm": 0.7517897486686707, "learning_rate": 2.765814783358996e-06, "loss": 0.094, "step": 8927 }, { "epoch": 1.4465327284510694, "grad_norm": 0.8687278032302856, "learning_rate": 2.765379954125921e-06, "loss": 0.1039, "step": 8928 }, { "epoch": 1.446694750486066, "grad_norm": 0.9656805992126465, "learning_rate": 2.764945116772862e-06, "loss": 0.1127, "step": 8929 }, { "epoch": 1.4468567725210628, "grad_norm": 0.9119703769683838, "learning_rate": 2.764510271313123e-06, "loss": 0.117, "step": 8930 }, { "epoch": 1.4470187945560595, "grad_norm": 0.7118834853172302, "learning_rate": 2.7640754177600105e-06, "loss": 0.0926, "step": 8931 }, { "epoch": 1.4471808165910565, "grad_norm": 0.7804062962532043, "learning_rate": 2.7636405561268286e-06, "loss": 0.1061, "step": 8932 }, { "epoch": 1.4473428386260532, "grad_norm": 0.8037486672401428, "learning_rate": 2.763205686426884e-06, "loss": 0.1024, "step": 8933 }, { "epoch": 1.4475048606610499, "grad_norm": 0.9153349995613098, "learning_rate": 2.7627708086734827e-06, "loss": 0.1165, "step": 8934 }, { "epoch": 1.4476668826960466, "grad_norm": 0.8715000748634338, "learning_rate": 2.7623359228799295e-06, "loss": 0.1049, "step": 8935 }, { "epoch": 1.4478289047310433, "grad_norm": 1.018729329109192, "learning_rate": 2.7619010290595333e-06, "loss": 0.1107, "step": 8936 }, { "epoch": 1.4479909267660402, "grad_norm": 0.7773792743682861, "learning_rate": 2.761466127225598e-06, "loss": 0.0909, "step": 8937 }, { "epoch": 1.448152948801037, "grad_norm": 0.722775399684906, "learning_rate": 2.7610312173914334e-06, "loss": 0.0939, "step": 8938 }, { "epoch": 1.4483149708360337, "grad_norm": 0.8534601926803589, "learning_rate": 2.760596299570344e-06, "loss": 0.0987, "step": 8939 }, { "epoch": 1.4484769928710304, "grad_norm": 0.81373131275177, "learning_rate": 2.760161373775639e-06, "loss": 0.101, "step": 8940 }, { "epoch": 1.448639014906027, "grad_norm": 0.8647781014442444, "learning_rate": 2.7597264400206255e-06, "loss": 0.1064, "step": 8941 }, { "epoch": 1.448801036941024, "grad_norm": 0.9041435122489929, "learning_rate": 2.7592914983186113e-06, "loss": 0.104, "step": 8942 }, { "epoch": 1.4489630589760207, "grad_norm": 0.7579626441001892, "learning_rate": 2.7588565486829054e-06, "loss": 0.1023, "step": 8943 }, { "epoch": 1.4491250810110174, "grad_norm": 0.7907978892326355, "learning_rate": 2.758421591126814e-06, "loss": 0.0998, "step": 8944 }, { "epoch": 1.4492871030460144, "grad_norm": 0.7959480285644531, "learning_rate": 2.757986625663649e-06, "loss": 0.1035, "step": 8945 }, { "epoch": 1.449449125081011, "grad_norm": 0.8328735828399658, "learning_rate": 2.757551652306717e-06, "loss": 0.1081, "step": 8946 }, { "epoch": 1.4496111471160078, "grad_norm": 0.8220754265785217, "learning_rate": 2.757116671069327e-06, "loss": 0.0985, "step": 8947 }, { "epoch": 1.4497731691510045, "grad_norm": 0.7394413352012634, "learning_rate": 2.7566816819647897e-06, "loss": 0.0931, "step": 8948 }, { "epoch": 1.4499351911860012, "grad_norm": 0.8674811124801636, "learning_rate": 2.756246685006414e-06, "loss": 0.1102, "step": 8949 }, { "epoch": 1.4500972132209982, "grad_norm": 0.944241464138031, "learning_rate": 2.7558116802075095e-06, "loss": 0.1129, "step": 8950 }, { "epoch": 1.4502592352559949, "grad_norm": 0.8318021297454834, "learning_rate": 2.755376667581387e-06, "loss": 0.1066, "step": 8951 }, { "epoch": 1.4504212572909916, "grad_norm": 0.7419362664222717, "learning_rate": 2.754941647141357e-06, "loss": 0.0957, "step": 8952 }, { "epoch": 1.4505832793259883, "grad_norm": 0.7914148569107056, "learning_rate": 2.754506618900729e-06, "loss": 0.1047, "step": 8953 }, { "epoch": 1.450745301360985, "grad_norm": 0.8463177680969238, "learning_rate": 2.754071582872814e-06, "loss": 0.1088, "step": 8954 }, { "epoch": 1.450907323395982, "grad_norm": 0.9250964522361755, "learning_rate": 2.753636539070924e-06, "loss": 0.118, "step": 8955 }, { "epoch": 1.4510693454309787, "grad_norm": 0.8688222169876099, "learning_rate": 2.753201487508369e-06, "loss": 0.0961, "step": 8956 }, { "epoch": 1.4512313674659754, "grad_norm": 0.8172348737716675, "learning_rate": 2.752766428198462e-06, "loss": 0.1063, "step": 8957 }, { "epoch": 1.451393389500972, "grad_norm": 0.8788450360298157, "learning_rate": 2.7523313611545133e-06, "loss": 0.1075, "step": 8958 }, { "epoch": 1.4515554115359688, "grad_norm": 0.8048200011253357, "learning_rate": 2.7518962863898356e-06, "loss": 0.1012, "step": 8959 }, { "epoch": 1.4517174335709657, "grad_norm": 0.8715628981590271, "learning_rate": 2.7514612039177422e-06, "loss": 0.1027, "step": 8960 }, { "epoch": 1.4518794556059624, "grad_norm": 0.9554488658905029, "learning_rate": 2.7510261137515437e-06, "loss": 0.1079, "step": 8961 }, { "epoch": 1.4520414776409591, "grad_norm": 0.908086895942688, "learning_rate": 2.7505910159045534e-06, "loss": 0.1199, "step": 8962 }, { "epoch": 1.4522034996759559, "grad_norm": 0.7679954171180725, "learning_rate": 2.750155910390085e-06, "loss": 0.0917, "step": 8963 }, { "epoch": 1.4523655217109526, "grad_norm": 0.8922917246818542, "learning_rate": 2.74972079722145e-06, "loss": 0.1107, "step": 8964 }, { "epoch": 1.4525275437459495, "grad_norm": 0.8348422050476074, "learning_rate": 2.7492856764119644e-06, "loss": 0.1075, "step": 8965 }, { "epoch": 1.4526895657809462, "grad_norm": 0.852973997592926, "learning_rate": 2.7488505479749395e-06, "loss": 0.1131, "step": 8966 }, { "epoch": 1.452851587815943, "grad_norm": 0.8772859573364258, "learning_rate": 2.7484154119236906e-06, "loss": 0.1085, "step": 8967 }, { "epoch": 1.4530136098509399, "grad_norm": 0.8057224154472351, "learning_rate": 2.74798026827153e-06, "loss": 0.1, "step": 8968 }, { "epoch": 1.4531756318859366, "grad_norm": 0.8312748670578003, "learning_rate": 2.7475451170317748e-06, "loss": 0.1064, "step": 8969 }, { "epoch": 1.4533376539209333, "grad_norm": 0.8106637001037598, "learning_rate": 2.747109958217737e-06, "loss": 0.106, "step": 8970 }, { "epoch": 1.45349967595593, "grad_norm": 0.8003639578819275, "learning_rate": 2.7466747918427326e-06, "loss": 0.0978, "step": 8971 }, { "epoch": 1.4536616979909267, "grad_norm": 0.8317378759384155, "learning_rate": 2.746239617920077e-06, "loss": 0.0995, "step": 8972 }, { "epoch": 1.4538237200259236, "grad_norm": 0.8461683988571167, "learning_rate": 2.7458044364630844e-06, "loss": 0.0959, "step": 8973 }, { "epoch": 1.4539857420609203, "grad_norm": 0.9303465485572815, "learning_rate": 2.745369247485072e-06, "loss": 0.1011, "step": 8974 }, { "epoch": 1.454147764095917, "grad_norm": 0.7978488802909851, "learning_rate": 2.7449340509993526e-06, "loss": 0.0928, "step": 8975 }, { "epoch": 1.4543097861309138, "grad_norm": 0.7847487330436707, "learning_rate": 2.7444988470192457e-06, "loss": 0.0917, "step": 8976 }, { "epoch": 1.4544718081659105, "grad_norm": 0.7997845411300659, "learning_rate": 2.744063635558065e-06, "loss": 0.0966, "step": 8977 }, { "epoch": 1.4546338302009074, "grad_norm": 0.7787443995475769, "learning_rate": 2.743628416629128e-06, "loss": 0.0986, "step": 8978 }, { "epoch": 1.4547958522359041, "grad_norm": 0.9163244366645813, "learning_rate": 2.7431931902457504e-06, "loss": 0.1012, "step": 8979 }, { "epoch": 1.4549578742709008, "grad_norm": 0.7863480448722839, "learning_rate": 2.7427579564212496e-06, "loss": 0.095, "step": 8980 }, { "epoch": 1.4551198963058976, "grad_norm": 0.8067967295646667, "learning_rate": 2.7423227151689436e-06, "loss": 0.0963, "step": 8981 }, { "epoch": 1.4552819183408943, "grad_norm": 0.8217342495918274, "learning_rate": 2.7418874665021483e-06, "loss": 0.1071, "step": 8982 }, { "epoch": 1.4554439403758912, "grad_norm": 0.754622757434845, "learning_rate": 2.7414522104341827e-06, "loss": 0.0997, "step": 8983 }, { "epoch": 1.455605962410888, "grad_norm": 0.8360654711723328, "learning_rate": 2.7410169469783632e-06, "loss": 0.1013, "step": 8984 }, { "epoch": 1.4557679844458846, "grad_norm": 0.8221597075462341, "learning_rate": 2.740581676148008e-06, "loss": 0.101, "step": 8985 }, { "epoch": 1.4559300064808813, "grad_norm": 0.9259410500526428, "learning_rate": 2.7401463979564365e-06, "loss": 0.1103, "step": 8986 }, { "epoch": 1.456092028515878, "grad_norm": 0.8288673162460327, "learning_rate": 2.739711112416966e-06, "loss": 0.0889, "step": 8987 }, { "epoch": 1.456254050550875, "grad_norm": 0.8852518796920776, "learning_rate": 2.7392758195429153e-06, "loss": 0.1054, "step": 8988 }, { "epoch": 1.4564160725858717, "grad_norm": 0.7813482284545898, "learning_rate": 2.738840519347604e-06, "loss": 0.104, "step": 8989 }, { "epoch": 1.4565780946208684, "grad_norm": 0.753773033618927, "learning_rate": 2.73840521184435e-06, "loss": 0.0911, "step": 8990 }, { "epoch": 1.4567401166558653, "grad_norm": 0.8681461215019226, "learning_rate": 2.737969897046475e-06, "loss": 0.107, "step": 8991 }, { "epoch": 1.4569021386908618, "grad_norm": 0.8012184500694275, "learning_rate": 2.737534574967295e-06, "loss": 0.1021, "step": 8992 }, { "epoch": 1.4570641607258588, "grad_norm": 0.9006675481796265, "learning_rate": 2.7370992456201333e-06, "loss": 0.1089, "step": 8993 }, { "epoch": 1.4572261827608555, "grad_norm": 0.8169563412666321, "learning_rate": 2.7366639090183076e-06, "loss": 0.1066, "step": 8994 }, { "epoch": 1.4573882047958522, "grad_norm": 0.9039627313613892, "learning_rate": 2.7362285651751396e-06, "loss": 0.1084, "step": 8995 }, { "epoch": 1.4575502268308491, "grad_norm": 1.059561014175415, "learning_rate": 2.7357932141039494e-06, "loss": 0.1334, "step": 8996 }, { "epoch": 1.4577122488658458, "grad_norm": 0.8857733011245728, "learning_rate": 2.7353578558180566e-06, "loss": 0.1026, "step": 8997 }, { "epoch": 1.4578742709008425, "grad_norm": 0.9009446501731873, "learning_rate": 2.7349224903307836e-06, "loss": 0.1097, "step": 8998 }, { "epoch": 1.4580362929358393, "grad_norm": 0.794425368309021, "learning_rate": 2.7344871176554498e-06, "loss": 0.0953, "step": 8999 }, { "epoch": 1.458198314970836, "grad_norm": 0.752216637134552, "learning_rate": 2.734051737805379e-06, "loss": 0.1114, "step": 9000 }, { "epoch": 1.458360337005833, "grad_norm": 0.8077507019042969, "learning_rate": 2.733616350793891e-06, "loss": 0.0994, "step": 9001 }, { "epoch": 1.4585223590408296, "grad_norm": 0.8075626492500305, "learning_rate": 2.733180956634308e-06, "loss": 0.1092, "step": 9002 }, { "epoch": 1.4586843810758263, "grad_norm": 0.8624060153961182, "learning_rate": 2.7327455553399523e-06, "loss": 0.1134, "step": 9003 }, { "epoch": 1.458846403110823, "grad_norm": 0.8211466073989868, "learning_rate": 2.7323101469241454e-06, "loss": 0.102, "step": 9004 }, { "epoch": 1.4590084251458197, "grad_norm": 0.838489830493927, "learning_rate": 2.7318747314002108e-06, "loss": 0.1028, "step": 9005 }, { "epoch": 1.4591704471808167, "grad_norm": 0.7504817247390747, "learning_rate": 2.7314393087814693e-06, "loss": 0.103, "step": 9006 }, { "epoch": 1.4593324692158134, "grad_norm": 0.8312764167785645, "learning_rate": 2.731003879081246e-06, "loss": 0.1052, "step": 9007 }, { "epoch": 1.45949449125081, "grad_norm": 0.7648634910583496, "learning_rate": 2.7305684423128633e-06, "loss": 0.0971, "step": 9008 }, { "epoch": 1.4596565132858068, "grad_norm": 0.7204511165618896, "learning_rate": 2.7301329984896435e-06, "loss": 0.088, "step": 9009 }, { "epoch": 1.4598185353208035, "grad_norm": 0.8412272334098816, "learning_rate": 2.729697547624911e-06, "loss": 0.1028, "step": 9010 }, { "epoch": 1.4599805573558005, "grad_norm": 0.7355968952178955, "learning_rate": 2.7292620897319892e-06, "loss": 0.0942, "step": 9011 }, { "epoch": 1.4601425793907972, "grad_norm": 0.8538913726806641, "learning_rate": 2.7288266248242025e-06, "loss": 0.0954, "step": 9012 }, { "epoch": 1.4603046014257939, "grad_norm": 0.7657922506332397, "learning_rate": 2.7283911529148753e-06, "loss": 0.1011, "step": 9013 }, { "epoch": 1.4604666234607906, "grad_norm": 0.951962947845459, "learning_rate": 2.7279556740173306e-06, "loss": 0.1025, "step": 9014 }, { "epoch": 1.4606286454957873, "grad_norm": 0.7168994545936584, "learning_rate": 2.727520188144895e-06, "loss": 0.0845, "step": 9015 }, { "epoch": 1.4607906675307842, "grad_norm": 0.7955917716026306, "learning_rate": 2.7270846953108913e-06, "loss": 0.0934, "step": 9016 }, { "epoch": 1.460952689565781, "grad_norm": 0.8339834213256836, "learning_rate": 2.7266491955286457e-06, "loss": 0.1052, "step": 9017 }, { "epoch": 1.4611147116007777, "grad_norm": 0.9418988823890686, "learning_rate": 2.7262136888114833e-06, "loss": 0.1146, "step": 9018 }, { "epoch": 1.4612767336357746, "grad_norm": 0.7261226177215576, "learning_rate": 2.725778175172729e-06, "loss": 0.0869, "step": 9019 }, { "epoch": 1.4614387556707713, "grad_norm": 1.1076301336288452, "learning_rate": 2.72534265462571e-06, "loss": 0.1158, "step": 9020 }, { "epoch": 1.461600777705768, "grad_norm": 0.8176051378250122, "learning_rate": 2.7249071271837503e-06, "loss": 0.1012, "step": 9021 }, { "epoch": 1.4617627997407647, "grad_norm": 0.7100493311882019, "learning_rate": 2.7244715928601774e-06, "loss": 0.088, "step": 9022 }, { "epoch": 1.4619248217757614, "grad_norm": 0.8428002595901489, "learning_rate": 2.7240360516683155e-06, "loss": 0.0962, "step": 9023 }, { "epoch": 1.4620868438107584, "grad_norm": 0.8279069066047668, "learning_rate": 2.723600503621494e-06, "loss": 0.1123, "step": 9024 }, { "epoch": 1.462248865845755, "grad_norm": 0.8103442788124084, "learning_rate": 2.723164948733038e-06, "loss": 0.1006, "step": 9025 }, { "epoch": 1.4624108878807518, "grad_norm": 0.8227192163467407, "learning_rate": 2.7227293870162742e-06, "loss": 0.1054, "step": 9026 }, { "epoch": 1.4625729099157485, "grad_norm": 0.7976576089859009, "learning_rate": 2.7222938184845304e-06, "loss": 0.0935, "step": 9027 }, { "epoch": 1.4627349319507452, "grad_norm": 0.8542677164077759, "learning_rate": 2.721858243151133e-06, "loss": 0.1126, "step": 9028 }, { "epoch": 1.4628969539857422, "grad_norm": 0.913292407989502, "learning_rate": 2.7214226610294114e-06, "loss": 0.1131, "step": 9029 }, { "epoch": 1.4630589760207389, "grad_norm": 0.9596574902534485, "learning_rate": 2.7209870721326915e-06, "loss": 0.1056, "step": 9030 }, { "epoch": 1.4632209980557356, "grad_norm": 0.8938264846801758, "learning_rate": 2.7205514764743025e-06, "loss": 0.1035, "step": 9031 }, { "epoch": 1.4633830200907323, "grad_norm": 0.7663517594337463, "learning_rate": 2.7201158740675714e-06, "loss": 0.0973, "step": 9032 }, { "epoch": 1.463545042125729, "grad_norm": 0.8649742007255554, "learning_rate": 2.7196802649258273e-06, "loss": 0.0919, "step": 9033 }, { "epoch": 1.463707064160726, "grad_norm": 0.9753785729408264, "learning_rate": 2.719244649062399e-06, "loss": 0.1166, "step": 9034 }, { "epoch": 1.4638690861957226, "grad_norm": 0.7970572710037231, "learning_rate": 2.7188090264906147e-06, "loss": 0.0942, "step": 9035 }, { "epoch": 1.4640311082307194, "grad_norm": 0.8879532217979431, "learning_rate": 2.718373397223804e-06, "loss": 0.1078, "step": 9036 }, { "epoch": 1.464193130265716, "grad_norm": 0.7902569770812988, "learning_rate": 2.7179377612752954e-06, "loss": 0.1028, "step": 9037 }, { "epoch": 1.4643551523007128, "grad_norm": 0.7548561096191406, "learning_rate": 2.71750211865842e-06, "loss": 0.0948, "step": 9038 }, { "epoch": 1.4645171743357097, "grad_norm": 0.7446286082267761, "learning_rate": 2.7170664693865045e-06, "loss": 0.0925, "step": 9039 }, { "epoch": 1.4646791963707064, "grad_norm": 0.7989212274551392, "learning_rate": 2.7166308134728814e-06, "loss": 0.0981, "step": 9040 }, { "epoch": 1.4648412184057031, "grad_norm": 0.8018403053283691, "learning_rate": 2.7161951509308785e-06, "loss": 0.0979, "step": 9041 }, { "epoch": 1.4650032404407, "grad_norm": 0.7907342314720154, "learning_rate": 2.715759481773828e-06, "loss": 0.1051, "step": 9042 }, { "epoch": 1.4651652624756966, "grad_norm": 0.8176236152648926, "learning_rate": 2.7153238060150592e-06, "loss": 0.1035, "step": 9043 }, { "epoch": 1.4653272845106935, "grad_norm": 0.8785366415977478, "learning_rate": 2.7148881236679035e-06, "loss": 0.1067, "step": 9044 }, { "epoch": 1.4654893065456902, "grad_norm": 0.8045976161956787, "learning_rate": 2.7144524347456906e-06, "loss": 0.1027, "step": 9045 }, { "epoch": 1.465651328580687, "grad_norm": 0.7513163685798645, "learning_rate": 2.7140167392617527e-06, "loss": 0.0968, "step": 9046 }, { "epoch": 1.4658133506156839, "grad_norm": 0.9477952122688293, "learning_rate": 2.7135810372294204e-06, "loss": 0.1219, "step": 9047 }, { "epoch": 1.4659753726506806, "grad_norm": 0.7233965992927551, "learning_rate": 2.7131453286620253e-06, "loss": 0.0966, "step": 9048 }, { "epoch": 1.4661373946856773, "grad_norm": 0.8609042763710022, "learning_rate": 2.7127096135728987e-06, "loss": 0.1081, "step": 9049 }, { "epoch": 1.466299416720674, "grad_norm": 0.7274024486541748, "learning_rate": 2.712273891975372e-06, "loss": 0.097, "step": 9050 }, { "epoch": 1.4664614387556707, "grad_norm": 0.8457092046737671, "learning_rate": 2.7118381638827795e-06, "loss": 0.1131, "step": 9051 }, { "epoch": 1.4666234607906676, "grad_norm": 0.8323071599006653, "learning_rate": 2.7114024293084502e-06, "loss": 0.1064, "step": 9052 }, { "epoch": 1.4667854828256643, "grad_norm": 0.6933931112289429, "learning_rate": 2.710966688265719e-06, "loss": 0.0793, "step": 9053 }, { "epoch": 1.466947504860661, "grad_norm": 0.8105365633964539, "learning_rate": 2.710530940767917e-06, "loss": 0.0991, "step": 9054 }, { "epoch": 1.4671095268956578, "grad_norm": 0.7702953815460205, "learning_rate": 2.7100951868283785e-06, "loss": 0.1003, "step": 9055 }, { "epoch": 1.4672715489306545, "grad_norm": 0.8761657476425171, "learning_rate": 2.7096594264604357e-06, "loss": 0.1091, "step": 9056 }, { "epoch": 1.4674335709656514, "grad_norm": 0.8410671353340149, "learning_rate": 2.709223659677421e-06, "loss": 0.1059, "step": 9057 }, { "epoch": 1.4675955930006481, "grad_norm": 0.7424046993255615, "learning_rate": 2.7087878864926696e-06, "loss": 0.0873, "step": 9058 }, { "epoch": 1.4677576150356448, "grad_norm": 0.8431706428527832, "learning_rate": 2.7083521069195134e-06, "loss": 0.1037, "step": 9059 }, { "epoch": 1.4679196370706415, "grad_norm": 0.7804480195045471, "learning_rate": 2.707916320971288e-06, "loss": 0.0958, "step": 9060 }, { "epoch": 1.4680816591056383, "grad_norm": 0.7562078237533569, "learning_rate": 2.707480528661325e-06, "loss": 0.0974, "step": 9061 }, { "epoch": 1.4682436811406352, "grad_norm": 0.7641584277153015, "learning_rate": 2.7070447300029607e-06, "loss": 0.095, "step": 9062 }, { "epoch": 1.468405703175632, "grad_norm": 0.9363652467727661, "learning_rate": 2.7066089250095284e-06, "loss": 0.1097, "step": 9063 }, { "epoch": 1.4685677252106286, "grad_norm": 1.033353567123413, "learning_rate": 2.706173113694363e-06, "loss": 0.1097, "step": 9064 }, { "epoch": 1.4687297472456253, "grad_norm": 0.8495287299156189, "learning_rate": 2.705737296070799e-06, "loss": 0.1042, "step": 9065 }, { "epoch": 1.468891769280622, "grad_norm": 0.7492153644561768, "learning_rate": 2.705301472152172e-06, "loss": 0.0886, "step": 9066 }, { "epoch": 1.469053791315619, "grad_norm": 0.9077339768409729, "learning_rate": 2.7048656419518168e-06, "loss": 0.1071, "step": 9067 }, { "epoch": 1.4692158133506157, "grad_norm": 0.9256390333175659, "learning_rate": 2.7044298054830687e-06, "loss": 0.1117, "step": 9068 }, { "epoch": 1.4693778353856124, "grad_norm": 0.8640137910842896, "learning_rate": 2.703993962759263e-06, "loss": 0.1049, "step": 9069 }, { "epoch": 1.4695398574206093, "grad_norm": 0.8645317554473877, "learning_rate": 2.703558113793736e-06, "loss": 0.1036, "step": 9070 }, { "epoch": 1.469701879455606, "grad_norm": 0.8858664631843567, "learning_rate": 2.703122258599823e-06, "loss": 0.1113, "step": 9071 }, { "epoch": 1.4698639014906028, "grad_norm": 0.856381893157959, "learning_rate": 2.7026863971908607e-06, "loss": 0.0984, "step": 9072 }, { "epoch": 1.4700259235255995, "grad_norm": 0.8103935718536377, "learning_rate": 2.702250529580185e-06, "loss": 0.1053, "step": 9073 }, { "epoch": 1.4701879455605962, "grad_norm": 0.8574365377426147, "learning_rate": 2.7018146557811325e-06, "loss": 0.0997, "step": 9074 }, { "epoch": 1.470349967595593, "grad_norm": 0.8630319833755493, "learning_rate": 2.70137877580704e-06, "loss": 0.1014, "step": 9075 }, { "epoch": 1.4705119896305898, "grad_norm": 0.8821179866790771, "learning_rate": 2.7009428896712443e-06, "loss": 0.1108, "step": 9076 }, { "epoch": 1.4706740116655865, "grad_norm": 0.8524888157844543, "learning_rate": 2.7005069973870823e-06, "loss": 0.1072, "step": 9077 }, { "epoch": 1.4708360337005832, "grad_norm": 0.790708601474762, "learning_rate": 2.700071098967892e-06, "loss": 0.0959, "step": 9078 }, { "epoch": 1.47099805573558, "grad_norm": 0.7711350917816162, "learning_rate": 2.6996351944270096e-06, "loss": 0.0944, "step": 9079 }, { "epoch": 1.471160077770577, "grad_norm": 0.862248957157135, "learning_rate": 2.699199283777773e-06, "loss": 0.1142, "step": 9080 }, { "epoch": 1.4713220998055736, "grad_norm": 0.7187860608100891, "learning_rate": 2.698763367033521e-06, "loss": 0.0918, "step": 9081 }, { "epoch": 1.4714841218405703, "grad_norm": 0.7436991333961487, "learning_rate": 2.6983274442075914e-06, "loss": 0.0882, "step": 9082 }, { "epoch": 1.471646143875567, "grad_norm": 0.7763605117797852, "learning_rate": 2.6978915153133207e-06, "loss": 0.1046, "step": 9083 }, { "epoch": 1.4718081659105637, "grad_norm": 0.8496381044387817, "learning_rate": 2.69745558036405e-06, "loss": 0.1087, "step": 9084 }, { "epoch": 1.4719701879455607, "grad_norm": 0.8233307003974915, "learning_rate": 2.6970196393731146e-06, "loss": 0.1056, "step": 9085 }, { "epoch": 1.4721322099805574, "grad_norm": 0.806120753288269, "learning_rate": 2.6965836923538568e-06, "loss": 0.1013, "step": 9086 }, { "epoch": 1.472294232015554, "grad_norm": 0.7680891156196594, "learning_rate": 2.696147739319613e-06, "loss": 0.0902, "step": 9087 }, { "epoch": 1.4724562540505508, "grad_norm": 0.8429840803146362, "learning_rate": 2.695711780283723e-06, "loss": 0.1087, "step": 9088 }, { "epoch": 1.4726182760855475, "grad_norm": 0.8204013109207153, "learning_rate": 2.695275815259526e-06, "loss": 0.1044, "step": 9089 }, { "epoch": 1.4727802981205445, "grad_norm": 0.7914881110191345, "learning_rate": 2.694839844260361e-06, "loss": 0.1021, "step": 9090 }, { "epoch": 1.4729423201555412, "grad_norm": 0.8489986658096313, "learning_rate": 2.69440386729957e-06, "loss": 0.1064, "step": 9091 }, { "epoch": 1.4731043421905379, "grad_norm": 0.8216438293457031, "learning_rate": 2.6939678843904897e-06, "loss": 0.1061, "step": 9092 }, { "epoch": 1.4732663642255348, "grad_norm": 0.8662644028663635, "learning_rate": 2.6935318955464624e-06, "loss": 0.1127, "step": 9093 }, { "epoch": 1.4734283862605313, "grad_norm": 0.9734071493148804, "learning_rate": 2.6930959007808268e-06, "loss": 0.127, "step": 9094 }, { "epoch": 1.4735904082955282, "grad_norm": 0.9388130307197571, "learning_rate": 2.692659900106924e-06, "loss": 0.111, "step": 9095 }, { "epoch": 1.473752430330525, "grad_norm": 0.8008855581283569, "learning_rate": 2.6922238935380946e-06, "loss": 0.1069, "step": 9096 }, { "epoch": 1.4739144523655217, "grad_norm": 0.8032642602920532, "learning_rate": 2.691787881087679e-06, "loss": 0.0985, "step": 9097 }, { "epoch": 1.4740764744005186, "grad_norm": 0.7424758672714233, "learning_rate": 2.691351862769018e-06, "loss": 0.1027, "step": 9098 }, { "epoch": 1.4742384964355153, "grad_norm": 0.8054472208023071, "learning_rate": 2.6909158385954544e-06, "loss": 0.105, "step": 9099 }, { "epoch": 1.474400518470512, "grad_norm": 0.7597915530204773, "learning_rate": 2.6904798085803276e-06, "loss": 0.0986, "step": 9100 }, { "epoch": 1.4745625405055087, "grad_norm": 0.7698268294334412, "learning_rate": 2.6900437727369793e-06, "loss": 0.0944, "step": 9101 }, { "epoch": 1.4747245625405054, "grad_norm": 0.7732874155044556, "learning_rate": 2.689607731078751e-06, "loss": 0.1, "step": 9102 }, { "epoch": 1.4748865845755024, "grad_norm": 0.7452662587165833, "learning_rate": 2.6891716836189857e-06, "loss": 0.0938, "step": 9103 }, { "epoch": 1.475048606610499, "grad_norm": 0.7833341956138611, "learning_rate": 2.688735630371024e-06, "loss": 0.1036, "step": 9104 }, { "epoch": 1.4752106286454958, "grad_norm": 0.7782242894172668, "learning_rate": 2.6882995713482097e-06, "loss": 0.0966, "step": 9105 }, { "epoch": 1.4753726506804925, "grad_norm": 0.7549180388450623, "learning_rate": 2.6878635065638843e-06, "loss": 0.1022, "step": 9106 }, { "epoch": 1.4755346727154892, "grad_norm": 0.8578351736068726, "learning_rate": 2.687427436031389e-06, "loss": 0.1039, "step": 9107 }, { "epoch": 1.4756966947504861, "grad_norm": 0.7809894680976868, "learning_rate": 2.6869913597640686e-06, "loss": 0.101, "step": 9108 }, { "epoch": 1.4758587167854829, "grad_norm": 0.7138332724571228, "learning_rate": 2.6865552777752644e-06, "loss": 0.0917, "step": 9109 }, { "epoch": 1.4760207388204796, "grad_norm": 0.8516620397567749, "learning_rate": 2.6861191900783213e-06, "loss": 0.1023, "step": 9110 }, { "epoch": 1.4761827608554763, "grad_norm": 0.8884662389755249, "learning_rate": 2.6856830966865804e-06, "loss": 0.1067, "step": 9111 }, { "epoch": 1.476344782890473, "grad_norm": 0.9057988524436951, "learning_rate": 2.685246997613386e-06, "loss": 0.1059, "step": 9112 }, { "epoch": 1.47650680492547, "grad_norm": 0.7630183696746826, "learning_rate": 2.684810892872083e-06, "loss": 0.1064, "step": 9113 }, { "epoch": 1.4766688269604666, "grad_norm": 0.751277506351471, "learning_rate": 2.6843747824760125e-06, "loss": 0.0947, "step": 9114 }, { "epoch": 1.4768308489954634, "grad_norm": 0.8783870935440063, "learning_rate": 2.683938666438521e-06, "loss": 0.1032, "step": 9115 }, { "epoch": 1.47699287103046, "grad_norm": 0.8420021533966064, "learning_rate": 2.6835025447729495e-06, "loss": 0.0947, "step": 9116 }, { "epoch": 1.4771548930654568, "grad_norm": 0.7928836941719055, "learning_rate": 2.6830664174926465e-06, "loss": 0.1003, "step": 9117 }, { "epoch": 1.4773169151004537, "grad_norm": 0.7881826162338257, "learning_rate": 2.682630284610953e-06, "loss": 0.1031, "step": 9118 }, { "epoch": 1.4774789371354504, "grad_norm": 0.7746166586875916, "learning_rate": 2.682194146141215e-06, "loss": 0.0952, "step": 9119 }, { "epoch": 1.4776409591704471, "grad_norm": 0.8245477676391602, "learning_rate": 2.6817580020967767e-06, "loss": 0.1023, "step": 9120 }, { "epoch": 1.477802981205444, "grad_norm": 0.8022861480712891, "learning_rate": 2.6813218524909836e-06, "loss": 0.0994, "step": 9121 }, { "epoch": 1.4779650032404408, "grad_norm": 0.8441992402076721, "learning_rate": 2.680885697337181e-06, "loss": 0.0999, "step": 9122 }, { "epoch": 1.4781270252754375, "grad_norm": 0.7806877493858337, "learning_rate": 2.6804495366487132e-06, "loss": 0.1001, "step": 9123 }, { "epoch": 1.4782890473104342, "grad_norm": 0.6983300447463989, "learning_rate": 2.6800133704389263e-06, "loss": 0.0843, "step": 9124 }, { "epoch": 1.478451069345431, "grad_norm": 0.8451159596443176, "learning_rate": 2.679577198721166e-06, "loss": 0.1103, "step": 9125 }, { "epoch": 1.4786130913804278, "grad_norm": 0.7998707890510559, "learning_rate": 2.6791410215087783e-06, "loss": 0.0962, "step": 9126 }, { "epoch": 1.4787751134154246, "grad_norm": 0.8116523027420044, "learning_rate": 2.678704838815108e-06, "loss": 0.1025, "step": 9127 }, { "epoch": 1.4789371354504213, "grad_norm": 0.7786074876785278, "learning_rate": 2.678268650653503e-06, "loss": 0.0974, "step": 9128 }, { "epoch": 1.479099157485418, "grad_norm": 0.8149200081825256, "learning_rate": 2.6778324570373083e-06, "loss": 0.1001, "step": 9129 }, { "epoch": 1.4792611795204147, "grad_norm": 0.8207477331161499, "learning_rate": 2.6773962579798713e-06, "loss": 0.0947, "step": 9130 }, { "epoch": 1.4794232015554116, "grad_norm": 0.8058983683586121, "learning_rate": 2.676960053494538e-06, "loss": 0.107, "step": 9131 }, { "epoch": 1.4795852235904083, "grad_norm": 0.9266466498374939, "learning_rate": 2.6765238435946543e-06, "loss": 0.1122, "step": 9132 }, { "epoch": 1.479747245625405, "grad_norm": 0.9578770995140076, "learning_rate": 2.676087628293569e-06, "loss": 0.1173, "step": 9133 }, { "epoch": 1.4799092676604018, "grad_norm": 0.8516537547111511, "learning_rate": 2.675651407604628e-06, "loss": 0.1006, "step": 9134 }, { "epoch": 1.4800712896953985, "grad_norm": 0.8952048420906067, "learning_rate": 2.6752151815411797e-06, "loss": 0.1053, "step": 9135 }, { "epoch": 1.4802333117303954, "grad_norm": 0.694008469581604, "learning_rate": 2.67477895011657e-06, "loss": 0.0859, "step": 9136 }, { "epoch": 1.4803953337653921, "grad_norm": 0.8128898739814758, "learning_rate": 2.674342713344148e-06, "loss": 0.1016, "step": 9137 }, { "epoch": 1.4805573558003888, "grad_norm": 0.8021711111068726, "learning_rate": 2.6739064712372596e-06, "loss": 0.1034, "step": 9138 }, { "epoch": 1.4807193778353855, "grad_norm": 0.9167004823684692, "learning_rate": 2.6734702238092557e-06, "loss": 0.1221, "step": 9139 }, { "epoch": 1.4808813998703823, "grad_norm": 0.8511152863502502, "learning_rate": 2.6730339710734815e-06, "loss": 0.1096, "step": 9140 }, { "epoch": 1.4810434219053792, "grad_norm": 0.9012813568115234, "learning_rate": 2.6725977130432877e-06, "loss": 0.114, "step": 9141 }, { "epoch": 1.481205443940376, "grad_norm": 0.7799967527389526, "learning_rate": 2.672161449732021e-06, "loss": 0.0945, "step": 9142 }, { "epoch": 1.4813674659753726, "grad_norm": 0.8400177359580994, "learning_rate": 2.6717251811530304e-06, "loss": 0.1052, "step": 9143 }, { "epoch": 1.4815294880103695, "grad_norm": 0.7064500451087952, "learning_rate": 2.671288907319666e-06, "loss": 0.0897, "step": 9144 }, { "epoch": 1.481691510045366, "grad_norm": 0.8531193733215332, "learning_rate": 2.6708526282452734e-06, "loss": 0.1076, "step": 9145 }, { "epoch": 1.481853532080363, "grad_norm": 0.8008085489273071, "learning_rate": 2.670416343943205e-06, "loss": 0.1068, "step": 9146 }, { "epoch": 1.4820155541153597, "grad_norm": 0.7878074645996094, "learning_rate": 2.669980054426809e-06, "loss": 0.0997, "step": 9147 }, { "epoch": 1.4821775761503564, "grad_norm": 0.8357486724853516, "learning_rate": 2.669543759709434e-06, "loss": 0.1048, "step": 9148 }, { "epoch": 1.4823395981853533, "grad_norm": 0.7349844574928284, "learning_rate": 2.669107459804431e-06, "loss": 0.0923, "step": 9149 }, { "epoch": 1.48250162022035, "grad_norm": 0.7902621030807495, "learning_rate": 2.668671154725149e-06, "loss": 0.1033, "step": 9150 }, { "epoch": 1.4826636422553467, "grad_norm": 0.7515067458152771, "learning_rate": 2.6682348444849373e-06, "loss": 0.0931, "step": 9151 }, { "epoch": 1.4828256642903435, "grad_norm": 0.798224151134491, "learning_rate": 2.6677985290971464e-06, "loss": 0.102, "step": 9152 }, { "epoch": 1.4829876863253402, "grad_norm": 0.8462631702423096, "learning_rate": 2.6673622085751275e-06, "loss": 0.0963, "step": 9153 }, { "epoch": 1.483149708360337, "grad_norm": 0.7932848334312439, "learning_rate": 2.666925882932229e-06, "loss": 0.1096, "step": 9154 }, { "epoch": 1.4833117303953338, "grad_norm": 0.9925400614738464, "learning_rate": 2.666489552181803e-06, "loss": 0.0995, "step": 9155 }, { "epoch": 1.4834737524303305, "grad_norm": 0.8233938813209534, "learning_rate": 2.6660532163371995e-06, "loss": 0.097, "step": 9156 }, { "epoch": 1.4836357744653272, "grad_norm": 0.819572925567627, "learning_rate": 2.6656168754117697e-06, "loss": 0.1022, "step": 9157 }, { "epoch": 1.483797796500324, "grad_norm": 0.7211984992027283, "learning_rate": 2.665180529418863e-06, "loss": 0.09, "step": 9158 }, { "epoch": 1.4839598185353209, "grad_norm": 0.7187896966934204, "learning_rate": 2.6647441783718335e-06, "loss": 0.0809, "step": 9159 }, { "epoch": 1.4841218405703176, "grad_norm": 0.7895532250404358, "learning_rate": 2.6643078222840295e-06, "loss": 0.1055, "step": 9160 }, { "epoch": 1.4842838626053143, "grad_norm": 0.8991692066192627, "learning_rate": 2.663871461168805e-06, "loss": 0.1035, "step": 9161 }, { "epoch": 1.484445884640311, "grad_norm": 0.9207788109779358, "learning_rate": 2.6634350950395096e-06, "loss": 0.1109, "step": 9162 }, { "epoch": 1.4846079066753077, "grad_norm": 0.8399395942687988, "learning_rate": 2.6629987239094956e-06, "loss": 0.1089, "step": 9163 }, { "epoch": 1.4847699287103047, "grad_norm": 0.8203895688056946, "learning_rate": 2.662562347792116e-06, "loss": 0.0992, "step": 9164 }, { "epoch": 1.4849319507453014, "grad_norm": 0.7525299787521362, "learning_rate": 2.662125966700721e-06, "loss": 0.0946, "step": 9165 }, { "epoch": 1.485093972780298, "grad_norm": 0.9387903213500977, "learning_rate": 2.6616895806486644e-06, "loss": 0.118, "step": 9166 }, { "epoch": 1.4852559948152948, "grad_norm": 0.8505684733390808, "learning_rate": 2.661253189649297e-06, "loss": 0.1121, "step": 9167 }, { "epoch": 1.4854180168502915, "grad_norm": 0.8152345418930054, "learning_rate": 2.6608167937159735e-06, "loss": 0.1032, "step": 9168 }, { "epoch": 1.4855800388852884, "grad_norm": 0.7628380060195923, "learning_rate": 2.6603803928620436e-06, "loss": 0.1009, "step": 9169 }, { "epoch": 1.4857420609202852, "grad_norm": 0.8258995413780212, "learning_rate": 2.6599439871008636e-06, "loss": 0.1027, "step": 9170 }, { "epoch": 1.4859040829552819, "grad_norm": 0.7622812986373901, "learning_rate": 2.6595075764457838e-06, "loss": 0.0986, "step": 9171 }, { "epoch": 1.4860661049902788, "grad_norm": 0.7521347999572754, "learning_rate": 2.659071160910158e-06, "loss": 0.0953, "step": 9172 }, { "epoch": 1.4862281270252755, "grad_norm": 0.7840893864631653, "learning_rate": 2.6586347405073398e-06, "loss": 0.0931, "step": 9173 }, { "epoch": 1.4863901490602722, "grad_norm": 0.7666772603988647, "learning_rate": 2.6581983152506825e-06, "loss": 0.0981, "step": 9174 }, { "epoch": 1.486552171095269, "grad_norm": 0.8099150061607361, "learning_rate": 2.65776188515354e-06, "loss": 0.0986, "step": 9175 }, { "epoch": 1.4867141931302656, "grad_norm": 0.8243248462677002, "learning_rate": 2.6573254502292644e-06, "loss": 0.1039, "step": 9176 }, { "epoch": 1.4868762151652626, "grad_norm": 0.799664318561554, "learning_rate": 2.6568890104912123e-06, "loss": 0.1003, "step": 9177 }, { "epoch": 1.4870382372002593, "grad_norm": 0.7624625563621521, "learning_rate": 2.656452565952735e-06, "loss": 0.1001, "step": 9178 }, { "epoch": 1.487200259235256, "grad_norm": 0.835109531879425, "learning_rate": 2.656016116627188e-06, "loss": 0.1002, "step": 9179 }, { "epoch": 1.4873622812702527, "grad_norm": 0.7921520471572876, "learning_rate": 2.6555796625279257e-06, "loss": 0.0996, "step": 9180 }, { "epoch": 1.4875243033052494, "grad_norm": 0.8584132790565491, "learning_rate": 2.6551432036683017e-06, "loss": 0.1059, "step": 9181 }, { "epoch": 1.4876863253402464, "grad_norm": 0.8519357442855835, "learning_rate": 2.6547067400616717e-06, "loss": 0.0943, "step": 9182 }, { "epoch": 1.487848347375243, "grad_norm": 0.8248621225357056, "learning_rate": 2.654270271721389e-06, "loss": 0.0998, "step": 9183 }, { "epoch": 1.4880103694102398, "grad_norm": 0.8023591041564941, "learning_rate": 2.6538337986608105e-06, "loss": 0.0943, "step": 9184 }, { "epoch": 1.4881723914452365, "grad_norm": 0.8533332347869873, "learning_rate": 2.6533973208932893e-06, "loss": 0.1109, "step": 9185 }, { "epoch": 1.4883344134802332, "grad_norm": 0.7940263748168945, "learning_rate": 2.6529608384321815e-06, "loss": 0.1073, "step": 9186 }, { "epoch": 1.4884964355152301, "grad_norm": 0.8785202503204346, "learning_rate": 2.6525243512908423e-06, "loss": 0.1016, "step": 9187 }, { "epoch": 1.4886584575502269, "grad_norm": 0.7625306844711304, "learning_rate": 2.6520878594826268e-06, "loss": 0.0974, "step": 9188 }, { "epoch": 1.4888204795852236, "grad_norm": 0.8531996607780457, "learning_rate": 2.6516513630208906e-06, "loss": 0.1111, "step": 9189 }, { "epoch": 1.4889825016202203, "grad_norm": 0.8253282904624939, "learning_rate": 2.651214861918991e-06, "loss": 0.1047, "step": 9190 }, { "epoch": 1.489144523655217, "grad_norm": 0.8038217425346375, "learning_rate": 2.6507783561902804e-06, "loss": 0.1079, "step": 9191 }, { "epoch": 1.489306545690214, "grad_norm": 0.9079601168632507, "learning_rate": 2.6503418458481188e-06, "loss": 0.1085, "step": 9192 }, { "epoch": 1.4894685677252106, "grad_norm": 0.7232383489608765, "learning_rate": 2.6499053309058593e-06, "loss": 0.0884, "step": 9193 }, { "epoch": 1.4896305897602073, "grad_norm": 0.7744395136833191, "learning_rate": 2.649468811376861e-06, "loss": 0.099, "step": 9194 }, { "epoch": 1.4897926117952043, "grad_norm": 0.8593305349349976, "learning_rate": 2.649032287274477e-06, "loss": 0.1131, "step": 9195 }, { "epoch": 1.4899546338302008, "grad_norm": 0.7993839383125305, "learning_rate": 2.6485957586120664e-06, "loss": 0.1102, "step": 9196 }, { "epoch": 1.4901166558651977, "grad_norm": 0.7705836296081543, "learning_rate": 2.6481592254029854e-06, "loss": 0.0971, "step": 9197 }, { "epoch": 1.4902786779001944, "grad_norm": 0.816331148147583, "learning_rate": 2.6477226876605903e-06, "loss": 0.1041, "step": 9198 }, { "epoch": 1.4904406999351911, "grad_norm": 0.853085458278656, "learning_rate": 2.647286145398239e-06, "loss": 0.1112, "step": 9199 }, { "epoch": 1.490602721970188, "grad_norm": 0.7312716841697693, "learning_rate": 2.646849598629287e-06, "loss": 0.0957, "step": 9200 }, { "epoch": 1.4907647440051848, "grad_norm": 0.8262644410133362, "learning_rate": 2.646413047367094e-06, "loss": 0.107, "step": 9201 }, { "epoch": 1.4909267660401815, "grad_norm": 0.8195114731788635, "learning_rate": 2.645976491625015e-06, "loss": 0.1021, "step": 9202 }, { "epoch": 1.4910887880751782, "grad_norm": 0.813556432723999, "learning_rate": 2.6455399314164095e-06, "loss": 0.1105, "step": 9203 }, { "epoch": 1.491250810110175, "grad_norm": 0.8205865025520325, "learning_rate": 2.645103366754633e-06, "loss": 0.1, "step": 9204 }, { "epoch": 1.4914128321451718, "grad_norm": 0.8455175161361694, "learning_rate": 2.644666797653046e-06, "loss": 0.1036, "step": 9205 }, { "epoch": 1.4915748541801686, "grad_norm": 0.7989739775657654, "learning_rate": 2.6442302241250047e-06, "loss": 0.1039, "step": 9206 }, { "epoch": 1.4917368762151653, "grad_norm": 0.8118696808815002, "learning_rate": 2.643793646183867e-06, "loss": 0.0953, "step": 9207 }, { "epoch": 1.491898898250162, "grad_norm": 0.7257483005523682, "learning_rate": 2.6433570638429923e-06, "loss": 0.0943, "step": 9208 }, { "epoch": 1.4920609202851587, "grad_norm": 0.6711858510971069, "learning_rate": 2.6429204771157384e-06, "loss": 0.0901, "step": 9209 }, { "epoch": 1.4922229423201556, "grad_norm": 0.7894310355186462, "learning_rate": 2.6424838860154633e-06, "loss": 0.104, "step": 9210 }, { "epoch": 1.4923849643551523, "grad_norm": 0.7803335785865784, "learning_rate": 2.6420472905555262e-06, "loss": 0.0985, "step": 9211 }, { "epoch": 1.492546986390149, "grad_norm": 0.7969242334365845, "learning_rate": 2.641610690749286e-06, "loss": 0.1104, "step": 9212 }, { "epoch": 1.4927090084251458, "grad_norm": 0.9152027368545532, "learning_rate": 2.641174086610101e-06, "loss": 0.1166, "step": 9213 }, { "epoch": 1.4928710304601425, "grad_norm": 0.883071780204773, "learning_rate": 2.640737478151331e-06, "loss": 0.114, "step": 9214 }, { "epoch": 1.4930330524951394, "grad_norm": 0.7122350335121155, "learning_rate": 2.6403008653863343e-06, "loss": 0.0903, "step": 9215 }, { "epoch": 1.4931950745301361, "grad_norm": 0.8668462634086609, "learning_rate": 2.6398642483284716e-06, "loss": 0.1001, "step": 9216 }, { "epoch": 1.4933570965651328, "grad_norm": 0.8878929615020752, "learning_rate": 2.6394276269911003e-06, "loss": 0.1123, "step": 9217 }, { "epoch": 1.4935191186001295, "grad_norm": 0.7911863327026367, "learning_rate": 2.6389910013875814e-06, "loss": 0.1051, "step": 9218 }, { "epoch": 1.4936811406351262, "grad_norm": 0.9753386378288269, "learning_rate": 2.638554371531274e-06, "loss": 0.0956, "step": 9219 }, { "epoch": 1.4938431626701232, "grad_norm": 0.7793456315994263, "learning_rate": 2.638117737435538e-06, "loss": 0.0986, "step": 9220 }, { "epoch": 1.49400518470512, "grad_norm": 0.8406729698181152, "learning_rate": 2.637681099113735e-06, "loss": 0.1014, "step": 9221 }, { "epoch": 1.4941672067401166, "grad_norm": 0.7772307395935059, "learning_rate": 2.637244456579221e-06, "loss": 0.0949, "step": 9222 }, { "epoch": 1.4943292287751135, "grad_norm": 0.8179559111595154, "learning_rate": 2.636807809845361e-06, "loss": 0.1041, "step": 9223 }, { "epoch": 1.4944912508101102, "grad_norm": 0.8753211498260498, "learning_rate": 2.6363711589255115e-06, "loss": 0.1051, "step": 9224 }, { "epoch": 1.494653272845107, "grad_norm": 0.8662494421005249, "learning_rate": 2.6359345038330352e-06, "loss": 0.1087, "step": 9225 }, { "epoch": 1.4948152948801037, "grad_norm": 0.7673994898796082, "learning_rate": 2.6354978445812923e-06, "loss": 0.0913, "step": 9226 }, { "epoch": 1.4949773169151004, "grad_norm": 0.8224777579307556, "learning_rate": 2.6350611811836428e-06, "loss": 0.1055, "step": 9227 }, { "epoch": 1.4951393389500973, "grad_norm": 0.8199198246002197, "learning_rate": 2.6346245136534483e-06, "loss": 0.0981, "step": 9228 }, { "epoch": 1.495301360985094, "grad_norm": 0.7925064563751221, "learning_rate": 2.6341878420040694e-06, "loss": 0.0915, "step": 9229 }, { "epoch": 1.4954633830200907, "grad_norm": 0.7900672554969788, "learning_rate": 2.6337511662488678e-06, "loss": 0.1045, "step": 9230 }, { "epoch": 1.4956254050550875, "grad_norm": 0.8776764869689941, "learning_rate": 2.6333144864012027e-06, "loss": 0.1074, "step": 9231 }, { "epoch": 1.4957874270900842, "grad_norm": 0.9464653730392456, "learning_rate": 2.6328778024744384e-06, "loss": 0.1074, "step": 9232 }, { "epoch": 1.495949449125081, "grad_norm": 0.7300053834915161, "learning_rate": 2.6324411144819345e-06, "loss": 0.0959, "step": 9233 }, { "epoch": 1.4961114711600778, "grad_norm": 0.793613851070404, "learning_rate": 2.6320044224370526e-06, "loss": 0.1007, "step": 9234 }, { "epoch": 1.4962734931950745, "grad_norm": 0.7878672480583191, "learning_rate": 2.631567726353155e-06, "loss": 0.1016, "step": 9235 }, { "epoch": 1.4964355152300712, "grad_norm": 0.7945536971092224, "learning_rate": 2.6311310262436035e-06, "loss": 0.0929, "step": 9236 }, { "epoch": 1.496597537265068, "grad_norm": 0.9132923483848572, "learning_rate": 2.6306943221217597e-06, "loss": 0.114, "step": 9237 }, { "epoch": 1.4967595593000649, "grad_norm": 0.9040144681930542, "learning_rate": 2.6302576140009866e-06, "loss": 0.117, "step": 9238 }, { "epoch": 1.4969215813350616, "grad_norm": 0.8708134889602661, "learning_rate": 2.629820901894645e-06, "loss": 0.1075, "step": 9239 }, { "epoch": 1.4970836033700583, "grad_norm": 0.7671794295310974, "learning_rate": 2.6293841858160983e-06, "loss": 0.0955, "step": 9240 }, { "epoch": 1.497245625405055, "grad_norm": 0.7187214493751526, "learning_rate": 2.6289474657787084e-06, "loss": 0.0901, "step": 9241 }, { "epoch": 1.4974076474400517, "grad_norm": 0.8010789752006531, "learning_rate": 2.6285107417958385e-06, "loss": 0.1064, "step": 9242 }, { "epoch": 1.4975696694750487, "grad_norm": 0.7064031958580017, "learning_rate": 2.6280740138808503e-06, "loss": 0.0918, "step": 9243 }, { "epoch": 1.4977316915100454, "grad_norm": 0.7605817317962646, "learning_rate": 2.6276372820471073e-06, "loss": 0.0905, "step": 9244 }, { "epoch": 1.497893713545042, "grad_norm": 0.8160539269447327, "learning_rate": 2.6272005463079732e-06, "loss": 0.1117, "step": 9245 }, { "epoch": 1.498055735580039, "grad_norm": 0.7572857141494751, "learning_rate": 2.6267638066768087e-06, "loss": 0.0911, "step": 9246 }, { "epoch": 1.4982177576150357, "grad_norm": 0.7898995876312256, "learning_rate": 2.6263270631669796e-06, "loss": 0.0998, "step": 9247 }, { "epoch": 1.4983797796500324, "grad_norm": 0.8465975522994995, "learning_rate": 2.625890315791848e-06, "loss": 0.1061, "step": 9248 }, { "epoch": 1.4985418016850292, "grad_norm": 0.7732163667678833, "learning_rate": 2.6254535645647772e-06, "loss": 0.0983, "step": 9249 }, { "epoch": 1.4987038237200259, "grad_norm": 0.7840855717658997, "learning_rate": 2.625016809499131e-06, "loss": 0.1021, "step": 9250 }, { "epoch": 1.4988658457550228, "grad_norm": 0.7952522039413452, "learning_rate": 2.6245800506082727e-06, "loss": 0.0972, "step": 9251 }, { "epoch": 1.4990278677900195, "grad_norm": 0.7457720637321472, "learning_rate": 2.6241432879055667e-06, "loss": 0.0979, "step": 9252 }, { "epoch": 1.4991898898250162, "grad_norm": 0.8276432752609253, "learning_rate": 2.6237065214043754e-06, "loss": 0.108, "step": 9253 }, { "epoch": 1.499351911860013, "grad_norm": 0.7755012512207031, "learning_rate": 2.6232697511180654e-06, "loss": 0.0939, "step": 9254 }, { "epoch": 1.4995139338950096, "grad_norm": 0.7345067858695984, "learning_rate": 2.622832977059998e-06, "loss": 0.0905, "step": 9255 }, { "epoch": 1.4996759559300066, "grad_norm": 0.8585684299468994, "learning_rate": 2.6223961992435406e-06, "loss": 0.1049, "step": 9256 }, { "epoch": 1.4998379779650033, "grad_norm": 0.729276180267334, "learning_rate": 2.621959417682054e-06, "loss": 0.0873, "step": 9257 }, { "epoch": 1.5, "grad_norm": 0.8658743500709534, "learning_rate": 2.6215226323889048e-06, "loss": 0.1089, "step": 9258 }, { "epoch": 1.5001620220349967, "grad_norm": 0.7544455528259277, "learning_rate": 2.621085843377457e-06, "loss": 0.0943, "step": 9259 }, { "epoch": 1.5003240440699934, "grad_norm": 0.7996721863746643, "learning_rate": 2.620649050661076e-06, "loss": 0.0983, "step": 9260 }, { "epoch": 1.5004860661049904, "grad_norm": 0.8732225894927979, "learning_rate": 2.620212254253126e-06, "loss": 0.1048, "step": 9261 }, { "epoch": 1.500648088139987, "grad_norm": 0.8265805244445801, "learning_rate": 2.6197754541669714e-06, "loss": 0.1041, "step": 9262 }, { "epoch": 1.5008101101749838, "grad_norm": 0.8330001831054688, "learning_rate": 2.6193386504159777e-06, "loss": 0.1099, "step": 9263 }, { "epoch": 1.5009721322099807, "grad_norm": 0.780525267124176, "learning_rate": 2.6189018430135106e-06, "loss": 0.0999, "step": 9264 }, { "epoch": 1.5011341542449772, "grad_norm": 0.9457441568374634, "learning_rate": 2.618465031972935e-06, "loss": 0.1189, "step": 9265 }, { "epoch": 1.5012961762799741, "grad_norm": 0.8373966813087463, "learning_rate": 2.6180282173076156e-06, "loss": 0.1104, "step": 9266 }, { "epoch": 1.5014581983149708, "grad_norm": 0.7619836330413818, "learning_rate": 2.6175913990309184e-06, "loss": 0.0991, "step": 9267 }, { "epoch": 1.5016202203499676, "grad_norm": 0.7752346992492676, "learning_rate": 2.6171545771562085e-06, "loss": 0.0955, "step": 9268 }, { "epoch": 1.5017822423849645, "grad_norm": 0.8263670206069946, "learning_rate": 2.6167177516968536e-06, "loss": 0.0997, "step": 9269 }, { "epoch": 1.501944264419961, "grad_norm": 0.796293318271637, "learning_rate": 2.6162809226662167e-06, "loss": 0.1056, "step": 9270 }, { "epoch": 1.502106286454958, "grad_norm": 0.7782325148582458, "learning_rate": 2.615844090077665e-06, "loss": 0.0962, "step": 9271 }, { "epoch": 1.5022683084899546, "grad_norm": 0.9106411337852478, "learning_rate": 2.6154072539445645e-06, "loss": 0.1008, "step": 9272 }, { "epoch": 1.5024303305249513, "grad_norm": 0.8080524802207947, "learning_rate": 2.614970414280281e-06, "loss": 0.1053, "step": 9273 }, { "epoch": 1.5025923525599483, "grad_norm": 0.9011566638946533, "learning_rate": 2.6145335710981817e-06, "loss": 0.1223, "step": 9274 }, { "epoch": 1.5027543745949448, "grad_norm": 0.7807307243347168, "learning_rate": 2.6140967244116322e-06, "loss": 0.0926, "step": 9275 }, { "epoch": 1.5029163966299417, "grad_norm": 0.8064615726470947, "learning_rate": 2.613659874233999e-06, "loss": 0.0976, "step": 9276 }, { "epoch": 1.5030784186649384, "grad_norm": 0.8541350960731506, "learning_rate": 2.6132230205786483e-06, "loss": 0.1034, "step": 9277 }, { "epoch": 1.5032404406999351, "grad_norm": 0.8118453621864319, "learning_rate": 2.612786163458948e-06, "loss": 0.1009, "step": 9278 }, { "epoch": 1.503402462734932, "grad_norm": 0.852788507938385, "learning_rate": 2.6123493028882634e-06, "loss": 0.1146, "step": 9279 }, { "epoch": 1.5035644847699285, "grad_norm": 0.7566162347793579, "learning_rate": 2.611912438879962e-06, "loss": 0.0975, "step": 9280 }, { "epoch": 1.5037265068049255, "grad_norm": 0.9245677590370178, "learning_rate": 2.611475571447411e-06, "loss": 0.1078, "step": 9281 }, { "epoch": 1.5038885288399222, "grad_norm": 0.9976536631584167, "learning_rate": 2.611038700603977e-06, "loss": 0.116, "step": 9282 }, { "epoch": 1.504050550874919, "grad_norm": 0.8083858489990234, "learning_rate": 2.6106018263630283e-06, "loss": 0.1036, "step": 9283 }, { "epoch": 1.5042125729099158, "grad_norm": 0.7877292037010193, "learning_rate": 2.6101649487379304e-06, "loss": 0.0926, "step": 9284 }, { "epoch": 1.5043745949449125, "grad_norm": 0.9315115809440613, "learning_rate": 2.609728067742053e-06, "loss": 0.1173, "step": 9285 }, { "epoch": 1.5045366169799093, "grad_norm": 0.8206225037574768, "learning_rate": 2.6092911833887602e-06, "loss": 0.1077, "step": 9286 }, { "epoch": 1.5046986390149062, "grad_norm": 0.805716335773468, "learning_rate": 2.6088542956914233e-06, "loss": 0.1041, "step": 9287 }, { "epoch": 1.5048606610499027, "grad_norm": 0.9114910364151001, "learning_rate": 2.6084174046634075e-06, "loss": 0.1181, "step": 9288 }, { "epoch": 1.5050226830848996, "grad_norm": 0.8441805243492126, "learning_rate": 2.607980510318082e-06, "loss": 0.1069, "step": 9289 }, { "epoch": 1.5051847051198963, "grad_norm": 0.8596044182777405, "learning_rate": 2.607543612668814e-06, "loss": 0.101, "step": 9290 }, { "epoch": 1.505346727154893, "grad_norm": 0.8306862711906433, "learning_rate": 2.6071067117289717e-06, "loss": 0.1047, "step": 9291 }, { "epoch": 1.50550874918989, "grad_norm": 0.7705062031745911, "learning_rate": 2.6066698075119237e-06, "loss": 0.0974, "step": 9292 }, { "epoch": 1.5056707712248865, "grad_norm": 0.8047751784324646, "learning_rate": 2.606232900031037e-06, "loss": 0.1029, "step": 9293 }, { "epoch": 1.5058327932598834, "grad_norm": 0.7562994360923767, "learning_rate": 2.605795989299681e-06, "loss": 0.0957, "step": 9294 }, { "epoch": 1.50599481529488, "grad_norm": 0.9327423572540283, "learning_rate": 2.6053590753312237e-06, "loss": 0.0989, "step": 9295 }, { "epoch": 1.5061568373298768, "grad_norm": 0.7977178692817688, "learning_rate": 2.604922158139033e-06, "loss": 0.1054, "step": 9296 }, { "epoch": 1.5063188593648738, "grad_norm": 0.7948964238166809, "learning_rate": 2.6044852377364794e-06, "loss": 0.0938, "step": 9297 }, { "epoch": 1.5064808813998702, "grad_norm": 0.8069312572479248, "learning_rate": 2.6040483141369293e-06, "loss": 0.0934, "step": 9298 }, { "epoch": 1.5066429034348672, "grad_norm": 0.850750744342804, "learning_rate": 2.6036113873537526e-06, "loss": 0.0958, "step": 9299 }, { "epoch": 1.5068049254698639, "grad_norm": 0.8110750317573547, "learning_rate": 2.603174457400319e-06, "loss": 0.1068, "step": 9300 }, { "epoch": 1.5069669475048606, "grad_norm": 0.8430596590042114, "learning_rate": 2.602737524289996e-06, "loss": 0.0992, "step": 9301 }, { "epoch": 1.5071289695398575, "grad_norm": 0.8320333361625671, "learning_rate": 2.602300588036154e-06, "loss": 0.1031, "step": 9302 }, { "epoch": 1.507290991574854, "grad_norm": 0.8486113548278809, "learning_rate": 2.6018636486521615e-06, "loss": 0.1004, "step": 9303 }, { "epoch": 1.507453013609851, "grad_norm": 0.7198436260223389, "learning_rate": 2.6014267061513875e-06, "loss": 0.0936, "step": 9304 }, { "epoch": 1.5076150356448477, "grad_norm": 0.7572947144508362, "learning_rate": 2.6009897605472022e-06, "loss": 0.094, "step": 9305 }, { "epoch": 1.5077770576798444, "grad_norm": 0.8611274361610413, "learning_rate": 2.6005528118529738e-06, "loss": 0.1026, "step": 9306 }, { "epoch": 1.5079390797148413, "grad_norm": 0.904730498790741, "learning_rate": 2.6001158600820735e-06, "loss": 0.1111, "step": 9307 }, { "epoch": 1.508101101749838, "grad_norm": 0.8339022994041443, "learning_rate": 2.5996789052478693e-06, "loss": 0.1016, "step": 9308 }, { "epoch": 1.5082631237848347, "grad_norm": 0.8966616988182068, "learning_rate": 2.5992419473637327e-06, "loss": 0.1131, "step": 9309 }, { "epoch": 1.5084251458198314, "grad_norm": 0.7851499319076538, "learning_rate": 2.5988049864430314e-06, "loss": 0.1015, "step": 9310 }, { "epoch": 1.5085871678548282, "grad_norm": 0.8004862666130066, "learning_rate": 2.598368022499138e-06, "loss": 0.0977, "step": 9311 }, { "epoch": 1.508749189889825, "grad_norm": 0.7532079219818115, "learning_rate": 2.597931055545421e-06, "loss": 0.0939, "step": 9312 }, { "epoch": 1.5089112119248218, "grad_norm": 0.8671373128890991, "learning_rate": 2.59749408559525e-06, "loss": 0.1082, "step": 9313 }, { "epoch": 1.5090732339598185, "grad_norm": 0.7479811906814575, "learning_rate": 2.597057112661997e-06, "loss": 0.0937, "step": 9314 }, { "epoch": 1.5092352559948155, "grad_norm": 0.7849185466766357, "learning_rate": 2.59662013675903e-06, "loss": 0.1082, "step": 9315 }, { "epoch": 1.509397278029812, "grad_norm": 0.7421824336051941, "learning_rate": 2.5961831578997214e-06, "loss": 0.1007, "step": 9316 }, { "epoch": 1.5095593000648089, "grad_norm": 0.752606213092804, "learning_rate": 2.5957461760974407e-06, "loss": 0.0929, "step": 9317 }, { "epoch": 1.5097213220998056, "grad_norm": 0.8576077222824097, "learning_rate": 2.5953091913655586e-06, "loss": 0.1081, "step": 9318 }, { "epoch": 1.5098833441348023, "grad_norm": 0.7942414879798889, "learning_rate": 2.594872203717446e-06, "loss": 0.1102, "step": 9319 }, { "epoch": 1.5100453661697992, "grad_norm": 0.7361174821853638, "learning_rate": 2.594435213166473e-06, "loss": 0.0973, "step": 9320 }, { "epoch": 1.5102073882047957, "grad_norm": 0.7839969992637634, "learning_rate": 2.5939982197260115e-06, "loss": 0.0995, "step": 9321 }, { "epoch": 1.5103694102397927, "grad_norm": 0.9291492104530334, "learning_rate": 2.593561223409432e-06, "loss": 0.1089, "step": 9322 }, { "epoch": 1.5105314322747894, "grad_norm": 0.8989511132240295, "learning_rate": 2.5931242242301054e-06, "loss": 0.1078, "step": 9323 }, { "epoch": 1.510693454309786, "grad_norm": 0.8241267800331116, "learning_rate": 2.592687222201403e-06, "loss": 0.1092, "step": 9324 }, { "epoch": 1.510855476344783, "grad_norm": 0.7994397878646851, "learning_rate": 2.592250217336696e-06, "loss": 0.1023, "step": 9325 }, { "epoch": 1.5110174983797795, "grad_norm": 0.777235746383667, "learning_rate": 2.5918132096493552e-06, "loss": 0.0913, "step": 9326 }, { "epoch": 1.5111795204147764, "grad_norm": 0.7653077840805054, "learning_rate": 2.5913761991527527e-06, "loss": 0.0998, "step": 9327 }, { "epoch": 1.5113415424497731, "grad_norm": 0.9247298836708069, "learning_rate": 2.5909391858602596e-06, "loss": 0.114, "step": 9328 }, { "epoch": 1.5115035644847699, "grad_norm": 0.8177992701530457, "learning_rate": 2.590502169785247e-06, "loss": 0.0999, "step": 9329 }, { "epoch": 1.5116655865197668, "grad_norm": 0.7962645292282104, "learning_rate": 2.5900651509410875e-06, "loss": 0.1064, "step": 9330 }, { "epoch": 1.5118276085547635, "grad_norm": 0.8084893822669983, "learning_rate": 2.589628129341153e-06, "loss": 0.1103, "step": 9331 }, { "epoch": 1.5119896305897602, "grad_norm": 0.7229411005973816, "learning_rate": 2.5891911049988133e-06, "loss": 0.089, "step": 9332 }, { "epoch": 1.512151652624757, "grad_norm": 1.190307855606079, "learning_rate": 2.588754077927442e-06, "loss": 0.1183, "step": 9333 }, { "epoch": 1.5123136746597536, "grad_norm": 0.746012806892395, "learning_rate": 2.5883170481404112e-06, "loss": 0.0924, "step": 9334 }, { "epoch": 1.5124756966947506, "grad_norm": 0.8450835347175598, "learning_rate": 2.5878800156510925e-06, "loss": 0.1091, "step": 9335 }, { "epoch": 1.5126377187297473, "grad_norm": 0.7555860280990601, "learning_rate": 2.587442980472858e-06, "loss": 0.0875, "step": 9336 }, { "epoch": 1.512799740764744, "grad_norm": 0.8183492422103882, "learning_rate": 2.5870059426190787e-06, "loss": 0.0995, "step": 9337 }, { "epoch": 1.512961762799741, "grad_norm": 0.8046535849571228, "learning_rate": 2.5865689021031292e-06, "loss": 0.0967, "step": 9338 }, { "epoch": 1.5131237848347374, "grad_norm": 0.8389936685562134, "learning_rate": 2.5861318589383806e-06, "loss": 0.0875, "step": 9339 }, { "epoch": 1.5132858068697344, "grad_norm": 0.9016577005386353, "learning_rate": 2.5856948131382055e-06, "loss": 0.1099, "step": 9340 }, { "epoch": 1.513447828904731, "grad_norm": 0.8551927804946899, "learning_rate": 2.585257764715976e-06, "loss": 0.1044, "step": 9341 }, { "epoch": 1.5136098509397278, "grad_norm": 0.8470696210861206, "learning_rate": 2.584820713685066e-06, "loss": 0.1048, "step": 9342 }, { "epoch": 1.5137718729747247, "grad_norm": 0.9146429896354675, "learning_rate": 2.5843836600588474e-06, "loss": 0.0846, "step": 9343 }, { "epoch": 1.5139338950097212, "grad_norm": 0.8819512128829956, "learning_rate": 2.5839466038506927e-06, "loss": 0.1131, "step": 9344 }, { "epoch": 1.5140959170447181, "grad_norm": 0.7611178755760193, "learning_rate": 2.583509545073975e-06, "loss": 0.0911, "step": 9345 }, { "epoch": 1.5142579390797148, "grad_norm": 0.8343874216079712, "learning_rate": 2.5830724837420675e-06, "loss": 0.1025, "step": 9346 }, { "epoch": 1.5144199611147116, "grad_norm": 0.8855875134468079, "learning_rate": 2.5826354198683433e-06, "loss": 0.1137, "step": 9347 }, { "epoch": 1.5145819831497085, "grad_norm": 0.8919705152511597, "learning_rate": 2.582198353466175e-06, "loss": 0.1163, "step": 9348 }, { "epoch": 1.514744005184705, "grad_norm": 0.7063683271408081, "learning_rate": 2.5817612845489355e-06, "loss": 0.0853, "step": 9349 }, { "epoch": 1.514906027219702, "grad_norm": 0.9223026633262634, "learning_rate": 2.5813242131299986e-06, "loss": 0.1116, "step": 9350 }, { "epoch": 1.5150680492546986, "grad_norm": 0.7677808403968811, "learning_rate": 2.580887139222738e-06, "loss": 0.0966, "step": 9351 }, { "epoch": 1.5152300712896953, "grad_norm": 0.8146395087242126, "learning_rate": 2.5804500628405265e-06, "loss": 0.0944, "step": 9352 }, { "epoch": 1.5153920933246923, "grad_norm": 0.804042637348175, "learning_rate": 2.580012983996738e-06, "loss": 0.1033, "step": 9353 }, { "epoch": 1.5155541153596888, "grad_norm": 0.9203242659568787, "learning_rate": 2.5795759027047457e-06, "loss": 0.1108, "step": 9354 }, { "epoch": 1.5157161373946857, "grad_norm": 0.8421667218208313, "learning_rate": 2.5791388189779237e-06, "loss": 0.0992, "step": 9355 }, { "epoch": 1.5158781594296824, "grad_norm": 0.8935291767120361, "learning_rate": 2.578701732829645e-06, "loss": 0.1045, "step": 9356 }, { "epoch": 1.5160401814646791, "grad_norm": 0.8606316447257996, "learning_rate": 2.5782646442732833e-06, "loss": 0.1096, "step": 9357 }, { "epoch": 1.516202203499676, "grad_norm": 0.8459725975990295, "learning_rate": 2.5778275533222135e-06, "loss": 0.1099, "step": 9358 }, { "epoch": 1.5163642255346728, "grad_norm": 0.7800662517547607, "learning_rate": 2.5773904599898087e-06, "loss": 0.0971, "step": 9359 }, { "epoch": 1.5165262475696695, "grad_norm": 0.8285083770751953, "learning_rate": 2.5769533642894433e-06, "loss": 0.1019, "step": 9360 }, { "epoch": 1.5166882696046662, "grad_norm": 0.8594668507575989, "learning_rate": 2.5765162662344906e-06, "loss": 0.1067, "step": 9361 }, { "epoch": 1.516850291639663, "grad_norm": 0.7574124932289124, "learning_rate": 2.576079165838326e-06, "loss": 0.0894, "step": 9362 }, { "epoch": 1.5170123136746598, "grad_norm": 0.8705188632011414, "learning_rate": 2.575642063114322e-06, "loss": 0.1104, "step": 9363 }, { "epoch": 1.5171743357096565, "grad_norm": 0.8245108723640442, "learning_rate": 2.5752049580758555e-06, "loss": 0.104, "step": 9364 }, { "epoch": 1.5173363577446533, "grad_norm": 0.9124253988265991, "learning_rate": 2.5747678507362977e-06, "loss": 0.1215, "step": 9365 }, { "epoch": 1.5174983797796502, "grad_norm": 0.8030577898025513, "learning_rate": 2.5743307411090255e-06, "loss": 0.1064, "step": 9366 }, { "epoch": 1.5176604018146467, "grad_norm": 0.7944320440292358, "learning_rate": 2.5738936292074122e-06, "loss": 0.0991, "step": 9367 }, { "epoch": 1.5178224238496436, "grad_norm": 0.7724756598472595, "learning_rate": 2.5734565150448325e-06, "loss": 0.1032, "step": 9368 }, { "epoch": 1.5179844458846403, "grad_norm": 0.7421293258666992, "learning_rate": 2.5730193986346623e-06, "loss": 0.0941, "step": 9369 }, { "epoch": 1.518146467919637, "grad_norm": 0.7390490770339966, "learning_rate": 2.5725822799902738e-06, "loss": 0.0941, "step": 9370 }, { "epoch": 1.518308489954634, "grad_norm": 0.8915257453918457, "learning_rate": 2.572145159125044e-06, "loss": 0.1067, "step": 9371 }, { "epoch": 1.5184705119896305, "grad_norm": 0.8176239132881165, "learning_rate": 2.5717080360523464e-06, "loss": 0.1001, "step": 9372 }, { "epoch": 1.5186325340246274, "grad_norm": 0.7526306509971619, "learning_rate": 2.571270910785557e-06, "loss": 0.1013, "step": 9373 }, { "epoch": 1.518794556059624, "grad_norm": 0.7733591794967651, "learning_rate": 2.57083378333805e-06, "loss": 0.1024, "step": 9374 }, { "epoch": 1.5189565780946208, "grad_norm": 0.7548816204071045, "learning_rate": 2.5703966537232006e-06, "loss": 0.104, "step": 9375 }, { "epoch": 1.5191186001296177, "grad_norm": 0.8029460906982422, "learning_rate": 2.5699595219543838e-06, "loss": 0.1031, "step": 9376 }, { "epoch": 1.5192806221646142, "grad_norm": 0.912986695766449, "learning_rate": 2.569522388044975e-06, "loss": 0.1102, "step": 9377 }, { "epoch": 1.5194426441996112, "grad_norm": 0.8626551032066345, "learning_rate": 2.5690852520083496e-06, "loss": 0.1074, "step": 9378 }, { "epoch": 1.5196046662346079, "grad_norm": 0.7937400937080383, "learning_rate": 2.5686481138578824e-06, "loss": 0.0928, "step": 9379 }, { "epoch": 1.5197666882696046, "grad_norm": 0.7942370772361755, "learning_rate": 2.5682109736069492e-06, "loss": 0.101, "step": 9380 }, { "epoch": 1.5199287103046015, "grad_norm": 0.8187708854675293, "learning_rate": 2.5677738312689248e-06, "loss": 0.1009, "step": 9381 }, { "epoch": 1.5200907323395982, "grad_norm": 1.0334949493408203, "learning_rate": 2.5673366868571858e-06, "loss": 0.1277, "step": 9382 }, { "epoch": 1.520252754374595, "grad_norm": 0.7634156942367554, "learning_rate": 2.5668995403851065e-06, "loss": 0.0979, "step": 9383 }, { "epoch": 1.5204147764095917, "grad_norm": 0.7355403304100037, "learning_rate": 2.566462391866064e-06, "loss": 0.0939, "step": 9384 }, { "epoch": 1.5205767984445884, "grad_norm": 0.8203917145729065, "learning_rate": 2.5660252413134323e-06, "loss": 0.103, "step": 9385 }, { "epoch": 1.5207388204795853, "grad_norm": 0.8019030690193176, "learning_rate": 2.5655880887405893e-06, "loss": 0.1089, "step": 9386 }, { "epoch": 1.520900842514582, "grad_norm": 0.8758410215377808, "learning_rate": 2.565150934160908e-06, "loss": 0.1171, "step": 9387 }, { "epoch": 1.5210628645495787, "grad_norm": 0.8551106452941895, "learning_rate": 2.564713777587767e-06, "loss": 0.1006, "step": 9388 }, { "epoch": 1.5212248865845757, "grad_norm": 0.7470712065696716, "learning_rate": 2.5642766190345396e-06, "loss": 0.0948, "step": 9389 }, { "epoch": 1.5213869086195722, "grad_norm": 0.845240592956543, "learning_rate": 2.5638394585146044e-06, "loss": 0.0975, "step": 9390 }, { "epoch": 1.521548930654569, "grad_norm": 0.9042078256607056, "learning_rate": 2.5634022960413362e-06, "loss": 0.1057, "step": 9391 }, { "epoch": 1.5217109526895658, "grad_norm": 0.844704270362854, "learning_rate": 2.56296513162811e-06, "loss": 0.1008, "step": 9392 }, { "epoch": 1.5218729747245625, "grad_norm": 0.8592380881309509, "learning_rate": 2.5625279652883043e-06, "loss": 0.1054, "step": 9393 }, { "epoch": 1.5220349967595594, "grad_norm": 0.7636672258377075, "learning_rate": 2.5620907970352937e-06, "loss": 0.0947, "step": 9394 }, { "epoch": 1.522197018794556, "grad_norm": 0.887532114982605, "learning_rate": 2.5616536268824555e-06, "loss": 0.1053, "step": 9395 }, { "epoch": 1.5223590408295529, "grad_norm": 0.9102559089660645, "learning_rate": 2.561216454843165e-06, "loss": 0.1074, "step": 9396 }, { "epoch": 1.5225210628645496, "grad_norm": 0.9450837969779968, "learning_rate": 2.560779280930799e-06, "loss": 0.1085, "step": 9397 }, { "epoch": 1.5226830848995463, "grad_norm": 0.7672545313835144, "learning_rate": 2.5603421051587344e-06, "loss": 0.0921, "step": 9398 }, { "epoch": 1.5228451069345432, "grad_norm": 0.8284415006637573, "learning_rate": 2.559904927540347e-06, "loss": 0.1048, "step": 9399 }, { "epoch": 1.5230071289695397, "grad_norm": 0.856656014919281, "learning_rate": 2.5594677480890152e-06, "loss": 0.1047, "step": 9400 }, { "epoch": 1.5231691510045366, "grad_norm": 0.791228711605072, "learning_rate": 2.559030566818112e-06, "loss": 0.1041, "step": 9401 }, { "epoch": 1.5233311730395334, "grad_norm": 0.8752877116203308, "learning_rate": 2.558593383741018e-06, "loss": 0.0996, "step": 9402 }, { "epoch": 1.52349319507453, "grad_norm": 0.8415939211845398, "learning_rate": 2.558156198871108e-06, "loss": 0.0964, "step": 9403 }, { "epoch": 1.523655217109527, "grad_norm": 0.8422085046768188, "learning_rate": 2.5577190122217583e-06, "loss": 0.1056, "step": 9404 }, { "epoch": 1.5238172391445235, "grad_norm": 0.7601490616798401, "learning_rate": 2.557281823806347e-06, "loss": 0.0986, "step": 9405 }, { "epoch": 1.5239792611795204, "grad_norm": 0.8581531047821045, "learning_rate": 2.55684463363825e-06, "loss": 0.1018, "step": 9406 }, { "epoch": 1.5241412832145171, "grad_norm": 0.7569452524185181, "learning_rate": 2.5564074417308454e-06, "loss": 0.0993, "step": 9407 }, { "epoch": 1.5243033052495139, "grad_norm": 0.7859471440315247, "learning_rate": 2.5559702480975094e-06, "loss": 0.0949, "step": 9408 }, { "epoch": 1.5244653272845108, "grad_norm": 0.7991012930870056, "learning_rate": 2.5555330527516197e-06, "loss": 0.1051, "step": 9409 }, { "epoch": 1.5246273493195075, "grad_norm": 0.8928097486495972, "learning_rate": 2.5550958557065523e-06, "loss": 0.0995, "step": 9410 }, { "epoch": 1.5247893713545042, "grad_norm": 1.0804157257080078, "learning_rate": 2.554658656975686e-06, "loss": 0.1198, "step": 9411 }, { "epoch": 1.524951393389501, "grad_norm": 0.8879733681678772, "learning_rate": 2.554221456572396e-06, "loss": 0.1064, "step": 9412 }, { "epoch": 1.5251134154244976, "grad_norm": 0.8105085492134094, "learning_rate": 2.553784254510061e-06, "loss": 0.104, "step": 9413 }, { "epoch": 1.5252754374594946, "grad_norm": 0.7311967015266418, "learning_rate": 2.553347050802058e-06, "loss": 0.0945, "step": 9414 }, { "epoch": 1.5254374594944913, "grad_norm": 0.8524883985519409, "learning_rate": 2.5529098454617644e-06, "loss": 0.1129, "step": 9415 }, { "epoch": 1.525599481529488, "grad_norm": 0.7317702174186707, "learning_rate": 2.552472638502557e-06, "loss": 0.0935, "step": 9416 }, { "epoch": 1.525761503564485, "grad_norm": 0.8908723592758179, "learning_rate": 2.5520354299378145e-06, "loss": 0.1069, "step": 9417 }, { "epoch": 1.5259235255994814, "grad_norm": 0.8989855051040649, "learning_rate": 2.5515982197809142e-06, "loss": 0.1121, "step": 9418 }, { "epoch": 1.5260855476344783, "grad_norm": 0.9153575897216797, "learning_rate": 2.5511610080452322e-06, "loss": 0.1078, "step": 9419 }, { "epoch": 1.526247569669475, "grad_norm": 1.041412353515625, "learning_rate": 2.5507237947441478e-06, "loss": 0.1138, "step": 9420 }, { "epoch": 1.5264095917044718, "grad_norm": 0.935231626033783, "learning_rate": 2.5502865798910377e-06, "loss": 0.1152, "step": 9421 }, { "epoch": 1.5265716137394687, "grad_norm": 0.7345618009567261, "learning_rate": 2.5498493634992803e-06, "loss": 0.0955, "step": 9422 }, { "epoch": 1.5267336357744652, "grad_norm": 0.8384798765182495, "learning_rate": 2.5494121455822526e-06, "loss": 0.1117, "step": 9423 }, { "epoch": 1.5268956578094621, "grad_norm": 0.8286236524581909, "learning_rate": 2.5489749261533333e-06, "loss": 0.104, "step": 9424 }, { "epoch": 1.5270576798444588, "grad_norm": 0.7071642875671387, "learning_rate": 2.5485377052258987e-06, "loss": 0.0943, "step": 9425 }, { "epoch": 1.5272197018794555, "grad_norm": 0.7644874453544617, "learning_rate": 2.548100482813329e-06, "loss": 0.0962, "step": 9426 }, { "epoch": 1.5273817239144525, "grad_norm": 0.8506636619567871, "learning_rate": 2.547663258929001e-06, "loss": 0.1085, "step": 9427 }, { "epoch": 1.527543745949449, "grad_norm": 0.7493010759353638, "learning_rate": 2.5472260335862915e-06, "loss": 0.0899, "step": 9428 }, { "epoch": 1.527705767984446, "grad_norm": 0.7209338545799255, "learning_rate": 2.5467888067985803e-06, "loss": 0.0993, "step": 9429 }, { "epoch": 1.5278677900194426, "grad_norm": 0.9093368053436279, "learning_rate": 2.546351578579245e-06, "loss": 0.1087, "step": 9430 }, { "epoch": 1.5280298120544393, "grad_norm": 0.7698819637298584, "learning_rate": 2.545914348941664e-06, "loss": 0.0959, "step": 9431 }, { "epoch": 1.5281918340894363, "grad_norm": 0.8163896799087524, "learning_rate": 2.545477117899213e-06, "loss": 0.0969, "step": 9432 }, { "epoch": 1.528353856124433, "grad_norm": 0.7964690327644348, "learning_rate": 2.5450398854652747e-06, "loss": 0.1028, "step": 9433 }, { "epoch": 1.5285158781594297, "grad_norm": 0.894778311252594, "learning_rate": 2.5446026516532235e-06, "loss": 0.111, "step": 9434 }, { "epoch": 1.5286779001944264, "grad_norm": 0.903476893901825, "learning_rate": 2.5441654164764396e-06, "loss": 0.1154, "step": 9435 }, { "epoch": 1.528839922229423, "grad_norm": 0.8074314594268799, "learning_rate": 2.5437281799483005e-06, "loss": 0.1042, "step": 9436 }, { "epoch": 1.52900194426442, "grad_norm": 0.844743013381958, "learning_rate": 2.543290942082185e-06, "loss": 0.1131, "step": 9437 }, { "epoch": 1.5291639662994168, "grad_norm": 0.8503636121749878, "learning_rate": 2.542853702891471e-06, "loss": 0.1046, "step": 9438 }, { "epoch": 1.5293259883344135, "grad_norm": 0.8960352540016174, "learning_rate": 2.542416462389539e-06, "loss": 0.1142, "step": 9439 }, { "epoch": 1.5294880103694104, "grad_norm": 0.7933899164199829, "learning_rate": 2.541979220589765e-06, "loss": 0.0976, "step": 9440 }, { "epoch": 1.529650032404407, "grad_norm": 0.8900126814842224, "learning_rate": 2.5415419775055277e-06, "loss": 0.1091, "step": 9441 }, { "epoch": 1.5298120544394038, "grad_norm": 0.7652503848075867, "learning_rate": 2.541104733150207e-06, "loss": 0.0999, "step": 9442 }, { "epoch": 1.5299740764744005, "grad_norm": 0.818130373954773, "learning_rate": 2.5406674875371807e-06, "loss": 0.1101, "step": 9443 }, { "epoch": 1.5301360985093972, "grad_norm": 0.7544717788696289, "learning_rate": 2.540230240679828e-06, "loss": 0.0881, "step": 9444 }, { "epoch": 1.5302981205443942, "grad_norm": 0.856606662273407, "learning_rate": 2.539792992591527e-06, "loss": 0.1124, "step": 9445 }, { "epoch": 1.5304601425793907, "grad_norm": 0.7668637633323669, "learning_rate": 2.5393557432856575e-06, "loss": 0.0951, "step": 9446 }, { "epoch": 1.5306221646143876, "grad_norm": 0.8169387578964233, "learning_rate": 2.538918492775596e-06, "loss": 0.1015, "step": 9447 }, { "epoch": 1.5307841866493843, "grad_norm": 0.7206966876983643, "learning_rate": 2.5384812410747244e-06, "loss": 0.0873, "step": 9448 }, { "epoch": 1.530946208684381, "grad_norm": 0.803081750869751, "learning_rate": 2.5380439881964185e-06, "loss": 0.093, "step": 9449 }, { "epoch": 1.531108230719378, "grad_norm": 0.9361199736595154, "learning_rate": 2.53760673415406e-06, "loss": 0.1114, "step": 9450 }, { "epoch": 1.5312702527543745, "grad_norm": 0.7436367273330688, "learning_rate": 2.537169478961026e-06, "loss": 0.0945, "step": 9451 }, { "epoch": 1.5314322747893714, "grad_norm": 0.8524910807609558, "learning_rate": 2.5367322226306956e-06, "loss": 0.1094, "step": 9452 }, { "epoch": 1.531594296824368, "grad_norm": 0.8166900873184204, "learning_rate": 2.5362949651764484e-06, "loss": 0.1032, "step": 9453 }, { "epoch": 1.5317563188593648, "grad_norm": 0.8203127384185791, "learning_rate": 2.5358577066116622e-06, "loss": 0.1051, "step": 9454 }, { "epoch": 1.5319183408943617, "grad_norm": 0.7983186841011047, "learning_rate": 2.5354204469497185e-06, "loss": 0.1018, "step": 9455 }, { "epoch": 1.5320803629293582, "grad_norm": 0.7971822023391724, "learning_rate": 2.534983186203993e-06, "loss": 0.1028, "step": 9456 }, { "epoch": 1.5322423849643552, "grad_norm": 0.7759663462638855, "learning_rate": 2.5345459243878684e-06, "loss": 0.0967, "step": 9457 }, { "epoch": 1.5324044069993519, "grad_norm": 0.8662815093994141, "learning_rate": 2.5341086615147207e-06, "loss": 0.1036, "step": 9458 }, { "epoch": 1.5325664290343486, "grad_norm": 0.8577237725257874, "learning_rate": 2.5336713975979315e-06, "loss": 0.1091, "step": 9459 }, { "epoch": 1.5327284510693455, "grad_norm": 0.713832676410675, "learning_rate": 2.5332341326508786e-06, "loss": 0.0945, "step": 9460 }, { "epoch": 1.5328904731043422, "grad_norm": 0.7215037941932678, "learning_rate": 2.532796866686942e-06, "loss": 0.0919, "step": 9461 }, { "epoch": 1.533052495139339, "grad_norm": 0.7585532665252686, "learning_rate": 2.5323595997195005e-06, "loss": 0.098, "step": 9462 }, { "epoch": 1.5332145171743357, "grad_norm": 0.7682742476463318, "learning_rate": 2.5319223317619333e-06, "loss": 0.0952, "step": 9463 }, { "epoch": 1.5333765392093324, "grad_norm": 0.7284139394760132, "learning_rate": 2.53148506282762e-06, "loss": 0.0969, "step": 9464 }, { "epoch": 1.5335385612443293, "grad_norm": 0.8855341076850891, "learning_rate": 2.5310477929299402e-06, "loss": 0.1052, "step": 9465 }, { "epoch": 1.533700583279326, "grad_norm": 0.7966269254684448, "learning_rate": 2.530610522082273e-06, "loss": 0.1066, "step": 9466 }, { "epoch": 1.5338626053143227, "grad_norm": 0.8642690181732178, "learning_rate": 2.5301732502979977e-06, "loss": 0.1016, "step": 9467 }, { "epoch": 1.5340246273493197, "grad_norm": 1.0195221900939941, "learning_rate": 2.529735977590494e-06, "loss": 0.1063, "step": 9468 }, { "epoch": 1.5341866493843161, "grad_norm": 0.9482093453407288, "learning_rate": 2.5292987039731415e-06, "loss": 0.116, "step": 9469 }, { "epoch": 1.534348671419313, "grad_norm": 0.8184714317321777, "learning_rate": 2.52886142945932e-06, "loss": 0.0935, "step": 9470 }, { "epoch": 1.5345106934543098, "grad_norm": 0.9412416219711304, "learning_rate": 2.5284241540624077e-06, "loss": 0.1175, "step": 9471 }, { "epoch": 1.5346727154893065, "grad_norm": 0.9039766192436218, "learning_rate": 2.527986877795786e-06, "loss": 0.1045, "step": 9472 }, { "epoch": 1.5348347375243034, "grad_norm": 0.7086098790168762, "learning_rate": 2.527549600672833e-06, "loss": 0.0866, "step": 9473 }, { "epoch": 1.5349967595593, "grad_norm": 0.7739736437797546, "learning_rate": 2.527112322706929e-06, "loss": 0.1019, "step": 9474 }, { "epoch": 1.5351587815942969, "grad_norm": 0.8489776253700256, "learning_rate": 2.5266750439114533e-06, "loss": 0.1016, "step": 9475 }, { "epoch": 1.5353208036292936, "grad_norm": 0.8796210885047913, "learning_rate": 2.526237764299786e-06, "loss": 0.1076, "step": 9476 }, { "epoch": 1.5354828256642903, "grad_norm": 0.7349861264228821, "learning_rate": 2.525800483885307e-06, "loss": 0.0936, "step": 9477 }, { "epoch": 1.5356448476992872, "grad_norm": 0.833838701248169, "learning_rate": 2.5253632026813945e-06, "loss": 0.1022, "step": 9478 }, { "epoch": 1.5358068697342837, "grad_norm": 0.7381966710090637, "learning_rate": 2.524925920701431e-06, "loss": 0.0966, "step": 9479 }, { "epoch": 1.5359688917692806, "grad_norm": 0.8320499658584595, "learning_rate": 2.524488637958793e-06, "loss": 0.1105, "step": 9480 }, { "epoch": 1.5361309138042774, "grad_norm": 0.9006655216217041, "learning_rate": 2.5240513544668634e-06, "loss": 0.11, "step": 9481 }, { "epoch": 1.536292935839274, "grad_norm": 0.910051703453064, "learning_rate": 2.5236140702390194e-06, "loss": 0.1015, "step": 9482 }, { "epoch": 1.536454957874271, "grad_norm": 0.788629949092865, "learning_rate": 2.5231767852886424e-06, "loss": 0.0979, "step": 9483 }, { "epoch": 1.5366169799092677, "grad_norm": 0.8495368957519531, "learning_rate": 2.522739499629112e-06, "loss": 0.0949, "step": 9484 }, { "epoch": 1.5367790019442644, "grad_norm": 0.7806404232978821, "learning_rate": 2.522302213273808e-06, "loss": 0.1007, "step": 9485 }, { "epoch": 1.5369410239792611, "grad_norm": 0.8518389463424683, "learning_rate": 2.5218649262361104e-06, "loss": 0.1034, "step": 9486 }, { "epoch": 1.5371030460142578, "grad_norm": 0.9559544324874878, "learning_rate": 2.521427638529398e-06, "loss": 0.1126, "step": 9487 }, { "epoch": 1.5372650680492548, "grad_norm": 0.7953001260757446, "learning_rate": 2.520990350167053e-06, "loss": 0.0949, "step": 9488 }, { "epoch": 1.5374270900842515, "grad_norm": 0.8259668946266174, "learning_rate": 2.5205530611624537e-06, "loss": 0.0983, "step": 9489 }, { "epoch": 1.5375891121192482, "grad_norm": 0.9413937330245972, "learning_rate": 2.5201157715289796e-06, "loss": 0.1171, "step": 9490 }, { "epoch": 1.5377511341542451, "grad_norm": 0.7779377698898315, "learning_rate": 2.5196784812800125e-06, "loss": 0.1009, "step": 9491 }, { "epoch": 1.5379131561892416, "grad_norm": 0.7650463581085205, "learning_rate": 2.519241190428931e-06, "loss": 0.0964, "step": 9492 }, { "epoch": 1.5380751782242386, "grad_norm": 0.8391408920288086, "learning_rate": 2.5188038989891154e-06, "loss": 0.1014, "step": 9493 }, { "epoch": 1.5382372002592353, "grad_norm": 0.9325923323631287, "learning_rate": 2.518366606973947e-06, "loss": 0.1169, "step": 9494 }, { "epoch": 1.538399222294232, "grad_norm": 0.8795773983001709, "learning_rate": 2.5179293143968048e-06, "loss": 0.1103, "step": 9495 }, { "epoch": 1.538561244329229, "grad_norm": 0.9822404384613037, "learning_rate": 2.517492021271068e-06, "loss": 0.1189, "step": 9496 }, { "epoch": 1.5387232663642254, "grad_norm": 0.8007500171661377, "learning_rate": 2.517054727610118e-06, "loss": 0.1077, "step": 9497 }, { "epoch": 1.5388852883992223, "grad_norm": 0.7420747876167297, "learning_rate": 2.5166174334273347e-06, "loss": 0.0917, "step": 9498 }, { "epoch": 1.539047310434219, "grad_norm": 0.7478721141815186, "learning_rate": 2.5161801387360986e-06, "loss": 0.1011, "step": 9499 }, { "epoch": 1.5392093324692158, "grad_norm": 0.7703060507774353, "learning_rate": 2.5157428435497887e-06, "loss": 0.0919, "step": 9500 }, { "epoch": 1.5393713545042127, "grad_norm": 0.8613007068634033, "learning_rate": 2.515305547881787e-06, "loss": 0.1087, "step": 9501 }, { "epoch": 1.5395333765392092, "grad_norm": 0.7977644205093384, "learning_rate": 2.5148682517454707e-06, "loss": 0.1016, "step": 9502 }, { "epoch": 1.5396953985742061, "grad_norm": 0.8988528251647949, "learning_rate": 2.5144309551542233e-06, "loss": 0.1072, "step": 9503 }, { "epoch": 1.5398574206092028, "grad_norm": 0.7992510795593262, "learning_rate": 2.5139936581214235e-06, "loss": 0.0993, "step": 9504 }, { "epoch": 1.5400194426441995, "grad_norm": 0.7492799758911133, "learning_rate": 2.513556360660451e-06, "loss": 0.0916, "step": 9505 }, { "epoch": 1.5401814646791965, "grad_norm": 0.6518522500991821, "learning_rate": 2.5131190627846875e-06, "loss": 0.0852, "step": 9506 }, { "epoch": 1.540343486714193, "grad_norm": 0.8497022390365601, "learning_rate": 2.512681764507512e-06, "loss": 0.109, "step": 9507 }, { "epoch": 1.54050550874919, "grad_norm": 0.756066083908081, "learning_rate": 2.512244465842305e-06, "loss": 0.1012, "step": 9508 }, { "epoch": 1.5406675307841866, "grad_norm": 0.8988474607467651, "learning_rate": 2.511807166802447e-06, "loss": 0.1147, "step": 9509 }, { "epoch": 1.5408295528191833, "grad_norm": 0.7836807370185852, "learning_rate": 2.5113698674013186e-06, "loss": 0.0989, "step": 9510 }, { "epoch": 1.5409915748541803, "grad_norm": 0.8765679597854614, "learning_rate": 2.510932567652299e-06, "loss": 0.1124, "step": 9511 }, { "epoch": 1.541153596889177, "grad_norm": 0.8468899130821228, "learning_rate": 2.5104952675687706e-06, "loss": 0.1, "step": 9512 }, { "epoch": 1.5413156189241737, "grad_norm": 0.8771848082542419, "learning_rate": 2.5100579671641114e-06, "loss": 0.1102, "step": 9513 }, { "epoch": 1.5414776409591704, "grad_norm": 0.8875617980957031, "learning_rate": 2.509620666451703e-06, "loss": 0.1088, "step": 9514 }, { "epoch": 1.541639662994167, "grad_norm": 0.7843181490898132, "learning_rate": 2.5091833654449254e-06, "loss": 0.0942, "step": 9515 }, { "epoch": 1.541801685029164, "grad_norm": 0.9153252243995667, "learning_rate": 2.5087460641571594e-06, "loss": 0.1122, "step": 9516 }, { "epoch": 1.5419637070641607, "grad_norm": 0.8474258184432983, "learning_rate": 2.5083087626017847e-06, "loss": 0.1033, "step": 9517 }, { "epoch": 1.5421257290991575, "grad_norm": 0.8334153294563293, "learning_rate": 2.5078714607921825e-06, "loss": 0.106, "step": 9518 }, { "epoch": 1.5422877511341544, "grad_norm": 0.8846608400344849, "learning_rate": 2.507434158741732e-06, "loss": 0.1001, "step": 9519 }, { "epoch": 1.5424497731691509, "grad_norm": 0.8367900848388672, "learning_rate": 2.506996856463814e-06, "loss": 0.1098, "step": 9520 }, { "epoch": 1.5426117952041478, "grad_norm": 0.7622178792953491, "learning_rate": 2.5065595539718098e-06, "loss": 0.0984, "step": 9521 }, { "epoch": 1.5427738172391445, "grad_norm": 0.7733889818191528, "learning_rate": 2.506122251279099e-06, "loss": 0.1089, "step": 9522 }, { "epoch": 1.5429358392741412, "grad_norm": 0.9063085913658142, "learning_rate": 2.5056849483990614e-06, "loss": 0.1027, "step": 9523 }, { "epoch": 1.5430978613091382, "grad_norm": 0.9877629280090332, "learning_rate": 2.5052476453450788e-06, "loss": 0.1207, "step": 9524 }, { "epoch": 1.5432598833441347, "grad_norm": 0.8588782548904419, "learning_rate": 2.5048103421305313e-06, "loss": 0.1079, "step": 9525 }, { "epoch": 1.5434219053791316, "grad_norm": 0.7452812194824219, "learning_rate": 2.504373038768799e-06, "loss": 0.0914, "step": 9526 }, { "epoch": 1.5435839274141283, "grad_norm": 0.8148205876350403, "learning_rate": 2.5039357352732613e-06, "loss": 0.109, "step": 9527 }, { "epoch": 1.543745949449125, "grad_norm": 0.9041098356246948, "learning_rate": 2.5034984316573003e-06, "loss": 0.1128, "step": 9528 }, { "epoch": 1.543907971484122, "grad_norm": 0.8906643986701965, "learning_rate": 2.5030611279342955e-06, "loss": 0.123, "step": 9529 }, { "epoch": 1.5440699935191184, "grad_norm": 0.8250147700309753, "learning_rate": 2.5026238241176283e-06, "loss": 0.1044, "step": 9530 }, { "epoch": 1.5442320155541154, "grad_norm": 0.8400077819824219, "learning_rate": 2.502186520220677e-06, "loss": 0.1025, "step": 9531 }, { "epoch": 1.544394037589112, "grad_norm": 0.8428972959518433, "learning_rate": 2.5017492162568246e-06, "loss": 0.0909, "step": 9532 }, { "epoch": 1.5445560596241088, "grad_norm": 0.7809198498725891, "learning_rate": 2.5013119122394495e-06, "loss": 0.0961, "step": 9533 }, { "epoch": 1.5447180816591057, "grad_norm": 0.9663342237472534, "learning_rate": 2.5008746081819345e-06, "loss": 0.1164, "step": 9534 }, { "epoch": 1.5448801036941024, "grad_norm": 0.9518924355506897, "learning_rate": 2.5004373040976574e-06, "loss": 0.1091, "step": 9535 }, { "epoch": 1.5450421257290992, "grad_norm": 0.8073295950889587, "learning_rate": 2.5e-06, "loss": 0.1027, "step": 9536 }, { "epoch": 1.5452041477640959, "grad_norm": 0.8112584948539734, "learning_rate": 2.499562695902343e-06, "loss": 0.0952, "step": 9537 }, { "epoch": 1.5453661697990926, "grad_norm": 0.8796746730804443, "learning_rate": 2.4991253918180668e-06, "loss": 0.1113, "step": 9538 }, { "epoch": 1.5455281918340895, "grad_norm": 0.7987788915634155, "learning_rate": 2.4986880877605504e-06, "loss": 0.0995, "step": 9539 }, { "epoch": 1.5456902138690862, "grad_norm": 0.924967348575592, "learning_rate": 2.498250783743176e-06, "loss": 0.1205, "step": 9540 }, { "epoch": 1.545852235904083, "grad_norm": 0.9094009399414062, "learning_rate": 2.497813479779324e-06, "loss": 0.1179, "step": 9541 }, { "epoch": 1.5460142579390799, "grad_norm": 0.8155476450920105, "learning_rate": 2.4973761758823734e-06, "loss": 0.1004, "step": 9542 }, { "epoch": 1.5461762799740764, "grad_norm": 0.7931246161460876, "learning_rate": 2.4969388720657058e-06, "loss": 0.103, "step": 9543 }, { "epoch": 1.5463383020090733, "grad_norm": 0.8180692791938782, "learning_rate": 2.4965015683427005e-06, "loss": 0.1055, "step": 9544 }, { "epoch": 1.54650032404407, "grad_norm": 0.7871511578559875, "learning_rate": 2.4960642647267395e-06, "loss": 0.0943, "step": 9545 }, { "epoch": 1.5466623460790667, "grad_norm": 0.8144401907920837, "learning_rate": 2.4956269612312025e-06, "loss": 0.1029, "step": 9546 }, { "epoch": 1.5468243681140637, "grad_norm": 0.7799917459487915, "learning_rate": 2.49518965786947e-06, "loss": 0.1033, "step": 9547 }, { "epoch": 1.5469863901490601, "grad_norm": 0.7506991028785706, "learning_rate": 2.494752354654921e-06, "loss": 0.1017, "step": 9548 }, { "epoch": 1.547148412184057, "grad_norm": 0.80491042137146, "learning_rate": 2.4943150516009386e-06, "loss": 0.0994, "step": 9549 }, { "epoch": 1.5473104342190538, "grad_norm": 0.7802474498748779, "learning_rate": 2.4938777487209022e-06, "loss": 0.1002, "step": 9550 }, { "epoch": 1.5474724562540505, "grad_norm": 0.9787641167640686, "learning_rate": 2.493440446028191e-06, "loss": 0.1154, "step": 9551 }, { "epoch": 1.5476344782890474, "grad_norm": 0.9254834055900574, "learning_rate": 2.493003143536187e-06, "loss": 0.118, "step": 9552 }, { "epoch": 1.547796500324044, "grad_norm": 0.8243023157119751, "learning_rate": 2.492565841258268e-06, "loss": 0.1069, "step": 9553 }, { "epoch": 1.5479585223590409, "grad_norm": 0.8549127578735352, "learning_rate": 2.4921285392078184e-06, "loss": 0.1025, "step": 9554 }, { "epoch": 1.5481205443940376, "grad_norm": 0.8300154805183411, "learning_rate": 2.4916912373982157e-06, "loss": 0.1019, "step": 9555 }, { "epoch": 1.5482825664290343, "grad_norm": 0.7590198516845703, "learning_rate": 2.491253935842842e-06, "loss": 0.0964, "step": 9556 }, { "epoch": 1.5484445884640312, "grad_norm": 0.899855375289917, "learning_rate": 2.490816634555075e-06, "loss": 0.1141, "step": 9557 }, { "epoch": 1.5486066104990277, "grad_norm": 0.7786848545074463, "learning_rate": 2.490379333548297e-06, "loss": 0.1019, "step": 9558 }, { "epoch": 1.5487686325340246, "grad_norm": 0.7648515105247498, "learning_rate": 2.489942032835889e-06, "loss": 0.0955, "step": 9559 }, { "epoch": 1.5489306545690213, "grad_norm": 1.0073561668395996, "learning_rate": 2.4895047324312303e-06, "loss": 0.1208, "step": 9560 }, { "epoch": 1.549092676604018, "grad_norm": 0.7584699392318726, "learning_rate": 2.4890674323477016e-06, "loss": 0.0963, "step": 9561 }, { "epoch": 1.549254698639015, "grad_norm": 0.757125735282898, "learning_rate": 2.4886301325986827e-06, "loss": 0.104, "step": 9562 }, { "epoch": 1.5494167206740117, "grad_norm": 0.7259702682495117, "learning_rate": 2.4881928331975534e-06, "loss": 0.0944, "step": 9563 }, { "epoch": 1.5495787427090084, "grad_norm": 0.786858320236206, "learning_rate": 2.4877555341576955e-06, "loss": 0.0979, "step": 9564 }, { "epoch": 1.5497407647440054, "grad_norm": 0.7230560183525085, "learning_rate": 2.487318235492489e-06, "loss": 0.0913, "step": 9565 }, { "epoch": 1.5499027867790018, "grad_norm": 0.7546066641807556, "learning_rate": 2.4868809372153137e-06, "loss": 0.0931, "step": 9566 }, { "epoch": 1.5500648088139988, "grad_norm": 0.8449350595474243, "learning_rate": 2.48644363933955e-06, "loss": 0.1074, "step": 9567 }, { "epoch": 1.5502268308489955, "grad_norm": 0.8704739212989807, "learning_rate": 2.4860063418785773e-06, "loss": 0.1049, "step": 9568 }, { "epoch": 1.5503888528839922, "grad_norm": 0.7751917839050293, "learning_rate": 2.485569044845777e-06, "loss": 0.0923, "step": 9569 }, { "epoch": 1.5505508749189891, "grad_norm": 0.7371068596839905, "learning_rate": 2.4851317482545297e-06, "loss": 0.0853, "step": 9570 }, { "epoch": 1.5507128969539856, "grad_norm": 0.8631237745285034, "learning_rate": 2.4846944521182144e-06, "loss": 0.1069, "step": 9571 }, { "epoch": 1.5508749189889826, "grad_norm": 0.8396697044372559, "learning_rate": 2.4842571564502117e-06, "loss": 0.1098, "step": 9572 }, { "epoch": 1.5510369410239793, "grad_norm": 0.7827080488204956, "learning_rate": 2.4838198612639018e-06, "loss": 0.1022, "step": 9573 }, { "epoch": 1.551198963058976, "grad_norm": 0.7799431681632996, "learning_rate": 2.4833825665726657e-06, "loss": 0.0985, "step": 9574 }, { "epoch": 1.551360985093973, "grad_norm": 0.8115366101264954, "learning_rate": 2.4829452723898824e-06, "loss": 0.1082, "step": 9575 }, { "epoch": 1.5515230071289694, "grad_norm": 0.7954683899879456, "learning_rate": 2.482507978728933e-06, "loss": 0.1012, "step": 9576 }, { "epoch": 1.5516850291639663, "grad_norm": 0.8462672233581543, "learning_rate": 2.482070685603196e-06, "loss": 0.1127, "step": 9577 }, { "epoch": 1.551847051198963, "grad_norm": 0.7555440664291382, "learning_rate": 2.4816333930260535e-06, "loss": 0.1004, "step": 9578 }, { "epoch": 1.5520090732339598, "grad_norm": 0.7217851281166077, "learning_rate": 2.481196101010885e-06, "loss": 0.0943, "step": 9579 }, { "epoch": 1.5521710952689567, "grad_norm": 0.8560697436332703, "learning_rate": 2.4807588095710696e-06, "loss": 0.1011, "step": 9580 }, { "epoch": 1.5523331173039532, "grad_norm": 0.8755968809127808, "learning_rate": 2.4803215187199883e-06, "loss": 0.0974, "step": 9581 }, { "epoch": 1.5524951393389501, "grad_norm": 0.7938658595085144, "learning_rate": 2.4798842284710203e-06, "loss": 0.1006, "step": 9582 }, { "epoch": 1.5526571613739468, "grad_norm": 0.8489831686019897, "learning_rate": 2.4794469388375476e-06, "loss": 0.0943, "step": 9583 }, { "epoch": 1.5528191834089435, "grad_norm": 0.963909924030304, "learning_rate": 2.4790096498329477e-06, "loss": 0.1159, "step": 9584 }, { "epoch": 1.5529812054439405, "grad_norm": 0.8376120924949646, "learning_rate": 2.4785723614706025e-06, "loss": 0.1055, "step": 9585 }, { "epoch": 1.5531432274789372, "grad_norm": 0.9356231689453125, "learning_rate": 2.478135073763891e-06, "loss": 0.1079, "step": 9586 }, { "epoch": 1.553305249513934, "grad_norm": 0.904419481754303, "learning_rate": 2.477697786726192e-06, "loss": 0.112, "step": 9587 }, { "epoch": 1.5534672715489306, "grad_norm": 0.7490609288215637, "learning_rate": 2.4772605003708885e-06, "loss": 0.0986, "step": 9588 }, { "epoch": 1.5536292935839273, "grad_norm": 0.7533844113349915, "learning_rate": 2.476823214711358e-06, "loss": 0.0991, "step": 9589 }, { "epoch": 1.5537913156189243, "grad_norm": 0.8684905767440796, "learning_rate": 2.476385929760981e-06, "loss": 0.1085, "step": 9590 }, { "epoch": 1.553953337653921, "grad_norm": 0.8226639032363892, "learning_rate": 2.475948645533138e-06, "loss": 0.1083, "step": 9591 }, { "epoch": 1.5541153596889177, "grad_norm": 0.8018856644630432, "learning_rate": 2.475511362041207e-06, "loss": 0.1005, "step": 9592 }, { "epoch": 1.5542773817239146, "grad_norm": 0.6188633441925049, "learning_rate": 2.47507407929857e-06, "loss": 0.0776, "step": 9593 }, { "epoch": 1.554439403758911, "grad_norm": 0.7864198088645935, "learning_rate": 2.4746367973186063e-06, "loss": 0.0957, "step": 9594 }, { "epoch": 1.554601425793908, "grad_norm": 0.7992034554481506, "learning_rate": 2.474199516114694e-06, "loss": 0.099, "step": 9595 }, { "epoch": 1.5547634478289047, "grad_norm": 0.849092423915863, "learning_rate": 2.473762235700214e-06, "loss": 0.1055, "step": 9596 }, { "epoch": 1.5549254698639015, "grad_norm": 0.709578275680542, "learning_rate": 2.473324956088547e-06, "loss": 0.0904, "step": 9597 }, { "epoch": 1.5550874918988984, "grad_norm": 0.7516464591026306, "learning_rate": 2.472887677293072e-06, "loss": 0.0981, "step": 9598 }, { "epoch": 1.5552495139338949, "grad_norm": 0.7759402394294739, "learning_rate": 2.472450399327168e-06, "loss": 0.0952, "step": 9599 }, { "epoch": 1.5554115359688918, "grad_norm": 0.7998428344726562, "learning_rate": 2.4720131222042156e-06, "loss": 0.1004, "step": 9600 }, { "epoch": 1.5555735580038885, "grad_norm": 0.8491755127906799, "learning_rate": 2.4715758459375923e-06, "loss": 0.1057, "step": 9601 }, { "epoch": 1.5557355800388852, "grad_norm": 0.8094433546066284, "learning_rate": 2.4711385705406805e-06, "loss": 0.0951, "step": 9602 }, { "epoch": 1.5558976020738822, "grad_norm": 0.7939477562904358, "learning_rate": 2.4707012960268594e-06, "loss": 0.1063, "step": 9603 }, { "epoch": 1.5560596241088787, "grad_norm": 1.0064921379089355, "learning_rate": 2.4702640224095066e-06, "loss": 0.1098, "step": 9604 }, { "epoch": 1.5562216461438756, "grad_norm": 0.7896994948387146, "learning_rate": 2.4698267497020035e-06, "loss": 0.1027, "step": 9605 }, { "epoch": 1.5563836681788723, "grad_norm": 0.9108518362045288, "learning_rate": 2.469389477917727e-06, "loss": 0.115, "step": 9606 }, { "epoch": 1.556545690213869, "grad_norm": 0.7819544076919556, "learning_rate": 2.4689522070700606e-06, "loss": 0.0948, "step": 9607 }, { "epoch": 1.556707712248866, "grad_norm": 0.7864106297492981, "learning_rate": 2.4685149371723806e-06, "loss": 0.1019, "step": 9608 }, { "epoch": 1.5568697342838627, "grad_norm": 0.7853366136550903, "learning_rate": 2.468077668238068e-06, "loss": 0.1051, "step": 9609 }, { "epoch": 1.5570317563188594, "grad_norm": 0.7699379324913025, "learning_rate": 2.467640400280501e-06, "loss": 0.0979, "step": 9610 }, { "epoch": 1.557193778353856, "grad_norm": 0.7062657475471497, "learning_rate": 2.4672031333130584e-06, "loss": 0.0885, "step": 9611 }, { "epoch": 1.5573558003888528, "grad_norm": 0.8259475231170654, "learning_rate": 2.466765867349122e-06, "loss": 0.0942, "step": 9612 }, { "epoch": 1.5575178224238497, "grad_norm": 0.875845193862915, "learning_rate": 2.466328602402069e-06, "loss": 0.1193, "step": 9613 }, { "epoch": 1.5576798444588464, "grad_norm": 0.7817234396934509, "learning_rate": 2.46589133848528e-06, "loss": 0.0976, "step": 9614 }, { "epoch": 1.5578418664938432, "grad_norm": 0.8780922889709473, "learning_rate": 2.465454075612132e-06, "loss": 0.1128, "step": 9615 }, { "epoch": 1.55800388852884, "grad_norm": 0.7593608498573303, "learning_rate": 2.465016813796007e-06, "loss": 0.1004, "step": 9616 }, { "epoch": 1.5581659105638366, "grad_norm": 0.9103496074676514, "learning_rate": 2.4645795530502823e-06, "loss": 0.1147, "step": 9617 }, { "epoch": 1.5583279325988335, "grad_norm": 0.7615711688995361, "learning_rate": 2.464142293388338e-06, "loss": 0.0939, "step": 9618 }, { "epoch": 1.5584899546338302, "grad_norm": 0.7604543566703796, "learning_rate": 2.463705034823553e-06, "loss": 0.0899, "step": 9619 }, { "epoch": 1.558651976668827, "grad_norm": 0.7992773056030273, "learning_rate": 2.4632677773693048e-06, "loss": 0.0994, "step": 9620 }, { "epoch": 1.5588139987038239, "grad_norm": 0.7582528591156006, "learning_rate": 2.462830521038975e-06, "loss": 0.0977, "step": 9621 }, { "epoch": 1.5589760207388204, "grad_norm": 0.7414041757583618, "learning_rate": 2.4623932658459406e-06, "loss": 0.0937, "step": 9622 }, { "epoch": 1.5591380427738173, "grad_norm": 0.8438722491264343, "learning_rate": 2.461956011803582e-06, "loss": 0.1067, "step": 9623 }, { "epoch": 1.559300064808814, "grad_norm": 0.8454424142837524, "learning_rate": 2.461518758925277e-06, "loss": 0.107, "step": 9624 }, { "epoch": 1.5594620868438107, "grad_norm": 0.7657983303070068, "learning_rate": 2.461081507224404e-06, "loss": 0.0993, "step": 9625 }, { "epoch": 1.5596241088788076, "grad_norm": 0.8123411536216736, "learning_rate": 2.4606442567143434e-06, "loss": 0.0954, "step": 9626 }, { "epoch": 1.5597861309138041, "grad_norm": 0.8028444051742554, "learning_rate": 2.460207007408474e-06, "loss": 0.0998, "step": 9627 }, { "epoch": 1.559948152948801, "grad_norm": 0.8465455174446106, "learning_rate": 2.4597697593201728e-06, "loss": 0.1055, "step": 9628 }, { "epoch": 1.5601101749837978, "grad_norm": 0.7659565210342407, "learning_rate": 2.4593325124628206e-06, "loss": 0.0974, "step": 9629 }, { "epoch": 1.5602721970187945, "grad_norm": 0.7613688707351685, "learning_rate": 2.4588952668497937e-06, "loss": 0.0921, "step": 9630 }, { "epoch": 1.5604342190537914, "grad_norm": 0.8725959062576294, "learning_rate": 2.458458022494473e-06, "loss": 0.0952, "step": 9631 }, { "epoch": 1.560596241088788, "grad_norm": 0.7647036910057068, "learning_rate": 2.4580207794102364e-06, "loss": 0.1001, "step": 9632 }, { "epoch": 1.5607582631237849, "grad_norm": 0.7943955659866333, "learning_rate": 2.4575835376104624e-06, "loss": 0.098, "step": 9633 }, { "epoch": 1.5609202851587816, "grad_norm": 0.7511798739433289, "learning_rate": 2.4571462971085293e-06, "loss": 0.0927, "step": 9634 }, { "epoch": 1.5610823071937783, "grad_norm": 0.8319833874702454, "learning_rate": 2.456709057917815e-06, "loss": 0.0968, "step": 9635 }, { "epoch": 1.5612443292287752, "grad_norm": 0.8254125118255615, "learning_rate": 2.4562718200517003e-06, "loss": 0.0988, "step": 9636 }, { "epoch": 1.561406351263772, "grad_norm": 0.7782612442970276, "learning_rate": 2.4558345835235613e-06, "loss": 0.1042, "step": 9637 }, { "epoch": 1.5615683732987686, "grad_norm": 0.8092712163925171, "learning_rate": 2.4553973483467778e-06, "loss": 0.1003, "step": 9638 }, { "epoch": 1.5617303953337653, "grad_norm": 0.8870753049850464, "learning_rate": 2.454960114534726e-06, "loss": 0.1047, "step": 9639 }, { "epoch": 1.561892417368762, "grad_norm": 0.7420865297317505, "learning_rate": 2.454522882100787e-06, "loss": 0.0899, "step": 9640 }, { "epoch": 1.562054439403759, "grad_norm": 0.8217720985412598, "learning_rate": 2.4540856510583374e-06, "loss": 0.1038, "step": 9641 }, { "epoch": 1.5622164614387557, "grad_norm": 0.7875351309776306, "learning_rate": 2.453648421420756e-06, "loss": 0.0947, "step": 9642 }, { "epoch": 1.5623784834737524, "grad_norm": 0.8513997793197632, "learning_rate": 2.4532111932014205e-06, "loss": 0.1039, "step": 9643 }, { "epoch": 1.5625405055087493, "grad_norm": 0.814285159111023, "learning_rate": 2.4527739664137085e-06, "loss": 0.0976, "step": 9644 }, { "epoch": 1.5627025275437458, "grad_norm": 0.8172099590301514, "learning_rate": 2.4523367410710004e-06, "loss": 0.096, "step": 9645 }, { "epoch": 1.5628645495787428, "grad_norm": 0.7031353116035461, "learning_rate": 2.4518995171866717e-06, "loss": 0.0891, "step": 9646 }, { "epoch": 1.5630265716137395, "grad_norm": 0.9233510494232178, "learning_rate": 2.451462294774102e-06, "loss": 0.1044, "step": 9647 }, { "epoch": 1.5631885936487362, "grad_norm": 0.9054529070854187, "learning_rate": 2.451025073846668e-06, "loss": 0.1108, "step": 9648 }, { "epoch": 1.5633506156837331, "grad_norm": 0.8287831544876099, "learning_rate": 2.450587854417748e-06, "loss": 0.1046, "step": 9649 }, { "epoch": 1.5635126377187296, "grad_norm": 0.9027771949768066, "learning_rate": 2.45015063650072e-06, "loss": 0.1089, "step": 9650 }, { "epoch": 1.5636746597537265, "grad_norm": 0.7133320569992065, "learning_rate": 2.449713420108963e-06, "loss": 0.0931, "step": 9651 }, { "epoch": 1.5638366817887233, "grad_norm": 0.8660880327224731, "learning_rate": 2.449276205255853e-06, "loss": 0.1106, "step": 9652 }, { "epoch": 1.56399870382372, "grad_norm": 0.779906690120697, "learning_rate": 2.448838991954769e-06, "loss": 0.0977, "step": 9653 }, { "epoch": 1.564160725858717, "grad_norm": 0.9731188416481018, "learning_rate": 2.448401780219087e-06, "loss": 0.1092, "step": 9654 }, { "epoch": 1.5643227478937134, "grad_norm": 0.8192594647407532, "learning_rate": 2.447964570062186e-06, "loss": 0.0919, "step": 9655 }, { "epoch": 1.5644847699287103, "grad_norm": 0.8036348223686218, "learning_rate": 2.4475273614974437e-06, "loss": 0.0945, "step": 9656 }, { "epoch": 1.564646791963707, "grad_norm": 0.8575658202171326, "learning_rate": 2.447090154538237e-06, "loss": 0.1013, "step": 9657 }, { "epoch": 1.5648088139987038, "grad_norm": 0.7713079452514648, "learning_rate": 2.4466529491979437e-06, "loss": 0.1026, "step": 9658 }, { "epoch": 1.5649708360337007, "grad_norm": 0.7967318892478943, "learning_rate": 2.4462157454899393e-06, "loss": 0.0993, "step": 9659 }, { "epoch": 1.5651328580686974, "grad_norm": 0.8018705248832703, "learning_rate": 2.445778543427605e-06, "loss": 0.1044, "step": 9660 }, { "epoch": 1.565294880103694, "grad_norm": 0.7897207140922546, "learning_rate": 2.445341343024315e-06, "loss": 0.1001, "step": 9661 }, { "epoch": 1.5654569021386908, "grad_norm": 0.818270206451416, "learning_rate": 2.4449041442934485e-06, "loss": 0.092, "step": 9662 }, { "epoch": 1.5656189241736875, "grad_norm": 0.9617789387702942, "learning_rate": 2.4444669472483807e-06, "loss": 0.1061, "step": 9663 }, { "epoch": 1.5657809462086845, "grad_norm": 0.7740570902824402, "learning_rate": 2.4440297519024906e-06, "loss": 0.1002, "step": 9664 }, { "epoch": 1.5659429682436812, "grad_norm": 0.8566815853118896, "learning_rate": 2.443592558269155e-06, "loss": 0.1084, "step": 9665 }, { "epoch": 1.5661049902786779, "grad_norm": 0.8553325533866882, "learning_rate": 2.4431553663617502e-06, "loss": 0.1038, "step": 9666 }, { "epoch": 1.5662670123136748, "grad_norm": 0.7744235396385193, "learning_rate": 2.4427181761936535e-06, "loss": 0.0937, "step": 9667 }, { "epoch": 1.5664290343486713, "grad_norm": 0.8114905953407288, "learning_rate": 2.4422809877782417e-06, "loss": 0.1026, "step": 9668 }, { "epoch": 1.5665910563836682, "grad_norm": 0.8174008727073669, "learning_rate": 2.4418438011288926e-06, "loss": 0.0945, "step": 9669 }, { "epoch": 1.566753078418665, "grad_norm": 0.9269281625747681, "learning_rate": 2.4414066162589823e-06, "loss": 0.1222, "step": 9670 }, { "epoch": 1.5669151004536617, "grad_norm": 0.7899512052536011, "learning_rate": 2.4409694331818884e-06, "loss": 0.1009, "step": 9671 }, { "epoch": 1.5670771224886586, "grad_norm": 0.7267419099807739, "learning_rate": 2.4405322519109864e-06, "loss": 0.0993, "step": 9672 }, { "epoch": 1.567239144523655, "grad_norm": 0.8098709583282471, "learning_rate": 2.4400950724596527e-06, "loss": 0.0992, "step": 9673 }, { "epoch": 1.567401166558652, "grad_norm": 0.7339199185371399, "learning_rate": 2.4396578948412664e-06, "loss": 0.1005, "step": 9674 }, { "epoch": 1.5675631885936487, "grad_norm": 0.7503772377967834, "learning_rate": 2.4392207190692015e-06, "loss": 0.1032, "step": 9675 }, { "epoch": 1.5677252106286454, "grad_norm": 0.8887887001037598, "learning_rate": 2.4387835451568355e-06, "loss": 0.12, "step": 9676 }, { "epoch": 1.5678872326636424, "grad_norm": 0.8450667262077332, "learning_rate": 2.4383463731175457e-06, "loss": 0.108, "step": 9677 }, { "epoch": 1.5680492546986389, "grad_norm": 0.9332994818687439, "learning_rate": 2.4379092029647067e-06, "loss": 0.1086, "step": 9678 }, { "epoch": 1.5682112767336358, "grad_norm": 0.7733569741249084, "learning_rate": 2.437472034711696e-06, "loss": 0.0988, "step": 9679 }, { "epoch": 1.5683732987686325, "grad_norm": 0.806662917137146, "learning_rate": 2.4370348683718906e-06, "loss": 0.0925, "step": 9680 }, { "epoch": 1.5685353208036292, "grad_norm": 0.8055416941642761, "learning_rate": 2.436597703958665e-06, "loss": 0.108, "step": 9681 }, { "epoch": 1.5686973428386262, "grad_norm": 0.7006134390830994, "learning_rate": 2.436160541485396e-06, "loss": 0.0885, "step": 9682 }, { "epoch": 1.5688593648736227, "grad_norm": 0.8832897543907166, "learning_rate": 2.4357233809654608e-06, "loss": 0.1098, "step": 9683 }, { "epoch": 1.5690213869086196, "grad_norm": 0.7593520879745483, "learning_rate": 2.4352862224122344e-06, "loss": 0.0944, "step": 9684 }, { "epoch": 1.5691834089436163, "grad_norm": 0.8768290877342224, "learning_rate": 2.4348490658390924e-06, "loss": 0.114, "step": 9685 }, { "epoch": 1.569345430978613, "grad_norm": 0.7737889289855957, "learning_rate": 2.4344119112594124e-06, "loss": 0.0968, "step": 9686 }, { "epoch": 1.56950745301361, "grad_norm": 0.6443475484848022, "learning_rate": 2.4339747586865677e-06, "loss": 0.0738, "step": 9687 }, { "epoch": 1.5696694750486067, "grad_norm": 0.7420233488082886, "learning_rate": 2.4335376081339364e-06, "loss": 0.0994, "step": 9688 }, { "epoch": 1.5698314970836034, "grad_norm": 0.8285709023475647, "learning_rate": 2.433100459614894e-06, "loss": 0.1032, "step": 9689 }, { "epoch": 1.5699935191186, "grad_norm": 0.8334299921989441, "learning_rate": 2.4326633131428147e-06, "loss": 0.1027, "step": 9690 }, { "epoch": 1.5701555411535968, "grad_norm": 0.7420817613601685, "learning_rate": 2.432226168731076e-06, "loss": 0.0909, "step": 9691 }, { "epoch": 1.5703175631885937, "grad_norm": 0.7946280241012573, "learning_rate": 2.4317890263930516e-06, "loss": 0.0953, "step": 9692 }, { "epoch": 1.5704795852235904, "grad_norm": 0.8967946767807007, "learning_rate": 2.431351886142118e-06, "loss": 0.1133, "step": 9693 }, { "epoch": 1.5706416072585871, "grad_norm": 0.7146013379096985, "learning_rate": 2.430914747991651e-06, "loss": 0.0835, "step": 9694 }, { "epoch": 1.570803629293584, "grad_norm": 0.9583059549331665, "learning_rate": 2.430477611955026e-06, "loss": 0.1078, "step": 9695 }, { "epoch": 1.5709656513285806, "grad_norm": 0.8162189722061157, "learning_rate": 2.430040478045617e-06, "loss": 0.1036, "step": 9696 }, { "epoch": 1.5711276733635775, "grad_norm": 0.8183581829071045, "learning_rate": 2.4296033462768e-06, "loss": 0.0958, "step": 9697 }, { "epoch": 1.5712896953985742, "grad_norm": 0.7942171096801758, "learning_rate": 2.429166216661951e-06, "loss": 0.0945, "step": 9698 }, { "epoch": 1.571451717433571, "grad_norm": 0.9565669298171997, "learning_rate": 2.4287290892144434e-06, "loss": 0.1132, "step": 9699 }, { "epoch": 1.5716137394685679, "grad_norm": 0.7852692604064941, "learning_rate": 2.4282919639476544e-06, "loss": 0.0944, "step": 9700 }, { "epoch": 1.5717757615035644, "grad_norm": 0.8304145336151123, "learning_rate": 2.427854840874957e-06, "loss": 0.092, "step": 9701 }, { "epoch": 1.5719377835385613, "grad_norm": 0.869964599609375, "learning_rate": 2.4274177200097266e-06, "loss": 0.1011, "step": 9702 }, { "epoch": 1.572099805573558, "grad_norm": 0.9880481958389282, "learning_rate": 2.4269806013653385e-06, "loss": 0.1098, "step": 9703 }, { "epoch": 1.5722618276085547, "grad_norm": 0.8023558259010315, "learning_rate": 2.426543484955168e-06, "loss": 0.1075, "step": 9704 }, { "epoch": 1.5724238496435516, "grad_norm": 0.8949816823005676, "learning_rate": 2.426106370792588e-06, "loss": 0.1046, "step": 9705 }, { "epoch": 1.5725858716785481, "grad_norm": 1.0093536376953125, "learning_rate": 2.425669258890975e-06, "loss": 0.1205, "step": 9706 }, { "epoch": 1.572747893713545, "grad_norm": 0.8207192420959473, "learning_rate": 2.4252321492637027e-06, "loss": 0.1094, "step": 9707 }, { "epoch": 1.5729099157485418, "grad_norm": 0.9045940637588501, "learning_rate": 2.4247950419241457e-06, "loss": 0.1129, "step": 9708 }, { "epoch": 1.5730719377835385, "grad_norm": 0.9074610471725464, "learning_rate": 2.4243579368856787e-06, "loss": 0.1103, "step": 9709 }, { "epoch": 1.5732339598185354, "grad_norm": 0.7920365333557129, "learning_rate": 2.4239208341616755e-06, "loss": 0.099, "step": 9710 }, { "epoch": 1.5733959818535321, "grad_norm": 0.9034141898155212, "learning_rate": 2.4234837337655098e-06, "loss": 0.1067, "step": 9711 }, { "epoch": 1.5735580038885288, "grad_norm": 0.7888884544372559, "learning_rate": 2.4230466357105575e-06, "loss": 0.1013, "step": 9712 }, { "epoch": 1.5737200259235256, "grad_norm": 0.8596574068069458, "learning_rate": 2.422609540010192e-06, "loss": 0.1108, "step": 9713 }, { "epoch": 1.5738820479585223, "grad_norm": 0.9732898473739624, "learning_rate": 2.4221724466777874e-06, "loss": 0.0983, "step": 9714 }, { "epoch": 1.5740440699935192, "grad_norm": 0.8367780447006226, "learning_rate": 2.421735355726718e-06, "loss": 0.1024, "step": 9715 }, { "epoch": 1.574206092028516, "grad_norm": 0.7838643193244934, "learning_rate": 2.421298267170356e-06, "loss": 0.0972, "step": 9716 }, { "epoch": 1.5743681140635126, "grad_norm": 0.9189515709877014, "learning_rate": 2.420861181022077e-06, "loss": 0.1078, "step": 9717 }, { "epoch": 1.5745301360985096, "grad_norm": 0.812330424785614, "learning_rate": 2.420424097295255e-06, "loss": 0.1065, "step": 9718 }, { "epoch": 1.574692158133506, "grad_norm": 0.7901645302772522, "learning_rate": 2.419987016003263e-06, "loss": 0.0998, "step": 9719 }, { "epoch": 1.574854180168503, "grad_norm": 0.9493116736412048, "learning_rate": 2.419549937159474e-06, "loss": 0.1202, "step": 9720 }, { "epoch": 1.5750162022034997, "grad_norm": 0.873347282409668, "learning_rate": 2.419112860777262e-06, "loss": 0.0935, "step": 9721 }, { "epoch": 1.5751782242384964, "grad_norm": 0.7348765730857849, "learning_rate": 2.418675786870002e-06, "loss": 0.0989, "step": 9722 }, { "epoch": 1.5753402462734933, "grad_norm": 0.9190308451652527, "learning_rate": 2.4182387154510653e-06, "loss": 0.1148, "step": 9723 }, { "epoch": 1.5755022683084898, "grad_norm": 0.8104141354560852, "learning_rate": 2.4178016465338266e-06, "loss": 0.102, "step": 9724 }, { "epoch": 1.5756642903434868, "grad_norm": 0.826193630695343, "learning_rate": 2.417364580131658e-06, "loss": 0.1042, "step": 9725 }, { "epoch": 1.5758263123784835, "grad_norm": 0.7655068635940552, "learning_rate": 2.416927516257933e-06, "loss": 0.0976, "step": 9726 }, { "epoch": 1.5759883344134802, "grad_norm": 0.8392152190208435, "learning_rate": 2.416490454926026e-06, "loss": 0.1102, "step": 9727 }, { "epoch": 1.5761503564484771, "grad_norm": 0.7975762486457825, "learning_rate": 2.416053396149308e-06, "loss": 0.1035, "step": 9728 }, { "epoch": 1.5763123784834736, "grad_norm": 0.8951084613800049, "learning_rate": 2.4156163399411534e-06, "loss": 0.0971, "step": 9729 }, { "epoch": 1.5764744005184705, "grad_norm": 0.8403456211090088, "learning_rate": 2.415179286314934e-06, "loss": 0.1045, "step": 9730 }, { "epoch": 1.5766364225534673, "grad_norm": 0.8686217665672302, "learning_rate": 2.414742235284024e-06, "loss": 0.1051, "step": 9731 }, { "epoch": 1.576798444588464, "grad_norm": 0.810461699962616, "learning_rate": 2.414305186861795e-06, "loss": 0.107, "step": 9732 }, { "epoch": 1.576960466623461, "grad_norm": 0.8022754788398743, "learning_rate": 2.4138681410616206e-06, "loss": 0.1036, "step": 9733 }, { "epoch": 1.5771224886584574, "grad_norm": 0.8617199063301086, "learning_rate": 2.4134310978968716e-06, "loss": 0.112, "step": 9734 }, { "epoch": 1.5772845106934543, "grad_norm": 0.8537782430648804, "learning_rate": 2.4129940573809213e-06, "loss": 0.1043, "step": 9735 }, { "epoch": 1.577446532728451, "grad_norm": 0.7416249513626099, "learning_rate": 2.412557019527143e-06, "loss": 0.0956, "step": 9736 }, { "epoch": 1.5776085547634477, "grad_norm": 0.8986727595329285, "learning_rate": 2.4121199843489084e-06, "loss": 0.1091, "step": 9737 }, { "epoch": 1.5777705767984447, "grad_norm": 0.7465632557868958, "learning_rate": 2.4116829518595896e-06, "loss": 0.0912, "step": 9738 }, { "epoch": 1.5779325988334414, "grad_norm": 0.7991532683372498, "learning_rate": 2.4112459220725588e-06, "loss": 0.0968, "step": 9739 }, { "epoch": 1.578094620868438, "grad_norm": 0.7432222366333008, "learning_rate": 2.410808895001187e-06, "loss": 0.0916, "step": 9740 }, { "epoch": 1.5782566429034348, "grad_norm": 0.8202318549156189, "learning_rate": 2.4103718706588477e-06, "loss": 0.106, "step": 9741 }, { "epoch": 1.5784186649384315, "grad_norm": 0.8392171263694763, "learning_rate": 2.409934849058913e-06, "loss": 0.1007, "step": 9742 }, { "epoch": 1.5785806869734285, "grad_norm": 0.8795257806777954, "learning_rate": 2.4094978302147533e-06, "loss": 0.106, "step": 9743 }, { "epoch": 1.5787427090084252, "grad_norm": 0.8620293140411377, "learning_rate": 2.4090608141397417e-06, "loss": 0.1091, "step": 9744 }, { "epoch": 1.5789047310434219, "grad_norm": 0.7913221120834351, "learning_rate": 2.4086238008472473e-06, "loss": 0.1002, "step": 9745 }, { "epoch": 1.5790667530784188, "grad_norm": 0.8350681662559509, "learning_rate": 2.408186790350645e-06, "loss": 0.0996, "step": 9746 }, { "epoch": 1.5792287751134153, "grad_norm": 0.6608853936195374, "learning_rate": 2.4077497826633045e-06, "loss": 0.0797, "step": 9747 }, { "epoch": 1.5793907971484122, "grad_norm": 0.8556117415428162, "learning_rate": 2.4073127777985982e-06, "loss": 0.1055, "step": 9748 }, { "epoch": 1.579552819183409, "grad_norm": 0.8157343864440918, "learning_rate": 2.4068757757698954e-06, "loss": 0.0863, "step": 9749 }, { "epoch": 1.5797148412184057, "grad_norm": 0.8553090691566467, "learning_rate": 2.406438776590568e-06, "loss": 0.0932, "step": 9750 }, { "epoch": 1.5798768632534026, "grad_norm": 0.8190301656723022, "learning_rate": 2.406001780273989e-06, "loss": 0.1047, "step": 9751 }, { "epoch": 1.580038885288399, "grad_norm": 0.7428902983665466, "learning_rate": 2.4055647868335273e-06, "loss": 0.0918, "step": 9752 }, { "epoch": 1.580200907323396, "grad_norm": 0.7674239277839661, "learning_rate": 2.4051277962825555e-06, "loss": 0.0956, "step": 9753 }, { "epoch": 1.5803629293583927, "grad_norm": 0.7433567047119141, "learning_rate": 2.404690808634442e-06, "loss": 0.0922, "step": 9754 }, { "epoch": 1.5805249513933894, "grad_norm": 0.8673844337463379, "learning_rate": 2.40425382390256e-06, "loss": 0.1007, "step": 9755 }, { "epoch": 1.5806869734283864, "grad_norm": 0.8878071904182434, "learning_rate": 2.4038168421002795e-06, "loss": 0.1117, "step": 9756 }, { "epoch": 1.5808489954633829, "grad_norm": 0.81288743019104, "learning_rate": 2.403379863240971e-06, "loss": 0.104, "step": 9757 }, { "epoch": 1.5810110174983798, "grad_norm": 0.8590624332427979, "learning_rate": 2.4029428873380044e-06, "loss": 0.1045, "step": 9758 }, { "epoch": 1.5811730395333765, "grad_norm": 0.8249778151512146, "learning_rate": 2.40250591440475e-06, "loss": 0.0995, "step": 9759 }, { "epoch": 1.5813350615683732, "grad_norm": 0.747291624546051, "learning_rate": 2.4020689444545796e-06, "loss": 0.0925, "step": 9760 }, { "epoch": 1.5814970836033702, "grad_norm": 0.8458421230316162, "learning_rate": 2.4016319775008623e-06, "loss": 0.1052, "step": 9761 }, { "epoch": 1.5816591056383669, "grad_norm": 0.7993534803390503, "learning_rate": 2.401195013556969e-06, "loss": 0.1025, "step": 9762 }, { "epoch": 1.5818211276733636, "grad_norm": 0.8762085437774658, "learning_rate": 2.4007580526362685e-06, "loss": 0.1053, "step": 9763 }, { "epoch": 1.5819831497083603, "grad_norm": 0.9038422107696533, "learning_rate": 2.400321094752131e-06, "loss": 0.1019, "step": 9764 }, { "epoch": 1.582145171743357, "grad_norm": 0.793109655380249, "learning_rate": 2.399884139917927e-06, "loss": 0.0976, "step": 9765 }, { "epoch": 1.582307193778354, "grad_norm": 0.8598572611808777, "learning_rate": 2.399447188147027e-06, "loss": 0.103, "step": 9766 }, { "epoch": 1.5824692158133506, "grad_norm": 0.7793141007423401, "learning_rate": 2.399010239452799e-06, "loss": 0.0916, "step": 9767 }, { "epoch": 1.5826312378483474, "grad_norm": 0.8122705817222595, "learning_rate": 2.3985732938486137e-06, "loss": 0.0946, "step": 9768 }, { "epoch": 1.5827932598833443, "grad_norm": 0.8980499505996704, "learning_rate": 2.3981363513478394e-06, "loss": 0.1131, "step": 9769 }, { "epoch": 1.5829552819183408, "grad_norm": 0.8115260004997253, "learning_rate": 2.3976994119638464e-06, "loss": 0.1007, "step": 9770 }, { "epoch": 1.5831173039533377, "grad_norm": 0.766045868396759, "learning_rate": 2.3972624757100044e-06, "loss": 0.1038, "step": 9771 }, { "epoch": 1.5832793259883344, "grad_norm": 0.8186202049255371, "learning_rate": 2.3968255425996817e-06, "loss": 0.1021, "step": 9772 }, { "epoch": 1.5834413480233311, "grad_norm": 0.7019743919372559, "learning_rate": 2.396388612646247e-06, "loss": 0.0887, "step": 9773 }, { "epoch": 1.583603370058328, "grad_norm": 0.9179200530052185, "learning_rate": 2.3959516858630707e-06, "loss": 0.1108, "step": 9774 }, { "epoch": 1.5837653920933246, "grad_norm": 0.8982585668563843, "learning_rate": 2.395514762263522e-06, "loss": 0.1048, "step": 9775 }, { "epoch": 1.5839274141283215, "grad_norm": 0.8416269421577454, "learning_rate": 2.3950778418609676e-06, "loss": 0.1036, "step": 9776 }, { "epoch": 1.5840894361633182, "grad_norm": 0.6837180256843567, "learning_rate": 2.3946409246687775e-06, "loss": 0.083, "step": 9777 }, { "epoch": 1.584251458198315, "grad_norm": 0.7735916376113892, "learning_rate": 2.39420401070032e-06, "loss": 0.0908, "step": 9778 }, { "epoch": 1.5844134802333119, "grad_norm": 0.894906759262085, "learning_rate": 2.3937670999689634e-06, "loss": 0.1112, "step": 9779 }, { "epoch": 1.5845755022683083, "grad_norm": 0.8407071232795715, "learning_rate": 2.3933301924880768e-06, "loss": 0.1083, "step": 9780 }, { "epoch": 1.5847375243033053, "grad_norm": 0.7255405187606812, "learning_rate": 2.392893288271029e-06, "loss": 0.088, "step": 9781 }, { "epoch": 1.584899546338302, "grad_norm": 0.8209105134010315, "learning_rate": 2.3924563873311868e-06, "loss": 0.104, "step": 9782 }, { "epoch": 1.5850615683732987, "grad_norm": 0.8289636373519897, "learning_rate": 2.3920194896819183e-06, "loss": 0.1019, "step": 9783 }, { "epoch": 1.5852235904082956, "grad_norm": 0.8864776492118835, "learning_rate": 2.391582595336593e-06, "loss": 0.1073, "step": 9784 }, { "epoch": 1.5853856124432921, "grad_norm": 0.8953683376312256, "learning_rate": 2.391145704308577e-06, "loss": 0.1121, "step": 9785 }, { "epoch": 1.585547634478289, "grad_norm": 0.8129318356513977, "learning_rate": 2.3907088166112406e-06, "loss": 0.1012, "step": 9786 }, { "epoch": 1.5857096565132858, "grad_norm": 0.7446659207344055, "learning_rate": 2.3902719322579487e-06, "loss": 0.0929, "step": 9787 }, { "epoch": 1.5858716785482825, "grad_norm": 0.8847704529762268, "learning_rate": 2.3898350512620696e-06, "loss": 0.1074, "step": 9788 }, { "epoch": 1.5860337005832794, "grad_norm": 0.8319103121757507, "learning_rate": 2.389398173636972e-06, "loss": 0.1071, "step": 9789 }, { "epoch": 1.5861957226182761, "grad_norm": 0.884433388710022, "learning_rate": 2.3889612993960233e-06, "loss": 0.1163, "step": 9790 }, { "epoch": 1.5863577446532728, "grad_norm": 0.813884973526001, "learning_rate": 2.3885244285525892e-06, "loss": 0.1017, "step": 9791 }, { "epoch": 1.5865197666882696, "grad_norm": 0.8731319308280945, "learning_rate": 2.3880875611200387e-06, "loss": 0.1089, "step": 9792 }, { "epoch": 1.5866817887232663, "grad_norm": 0.7939242720603943, "learning_rate": 2.387650697111737e-06, "loss": 0.1039, "step": 9793 }, { "epoch": 1.5868438107582632, "grad_norm": 0.7616491317749023, "learning_rate": 2.3872138365410525e-06, "loss": 0.1019, "step": 9794 }, { "epoch": 1.58700583279326, "grad_norm": 0.8765544295310974, "learning_rate": 2.386776979421352e-06, "loss": 0.1133, "step": 9795 }, { "epoch": 1.5871678548282566, "grad_norm": 0.8053047060966492, "learning_rate": 2.3863401257660016e-06, "loss": 0.0989, "step": 9796 }, { "epoch": 1.5873298768632536, "grad_norm": 0.7080039381980896, "learning_rate": 2.3859032755883677e-06, "loss": 0.0945, "step": 9797 }, { "epoch": 1.58749189889825, "grad_norm": 0.7942770719528198, "learning_rate": 2.3854664289018182e-06, "loss": 0.1005, "step": 9798 }, { "epoch": 1.587653920933247, "grad_norm": 0.8453302383422852, "learning_rate": 2.3850295857197193e-06, "loss": 0.1095, "step": 9799 }, { "epoch": 1.5878159429682437, "grad_norm": 0.9450112581253052, "learning_rate": 2.3845927460554363e-06, "loss": 0.1154, "step": 9800 }, { "epoch": 1.5879779650032404, "grad_norm": 0.8308724164962769, "learning_rate": 2.3841559099223363e-06, "loss": 0.1083, "step": 9801 }, { "epoch": 1.5881399870382373, "grad_norm": 0.8302947878837585, "learning_rate": 2.383719077333784e-06, "loss": 0.0977, "step": 9802 }, { "epoch": 1.5883020090732338, "grad_norm": 0.8785433173179626, "learning_rate": 2.3832822483031477e-06, "loss": 0.1076, "step": 9803 }, { "epoch": 1.5884640311082308, "grad_norm": 0.7806341648101807, "learning_rate": 2.382845422843792e-06, "loss": 0.1086, "step": 9804 }, { "epoch": 1.5886260531432275, "grad_norm": 0.737848699092865, "learning_rate": 2.382408600969083e-06, "loss": 0.0928, "step": 9805 }, { "epoch": 1.5887880751782242, "grad_norm": 0.8146355748176575, "learning_rate": 2.381971782692386e-06, "loss": 0.1076, "step": 9806 }, { "epoch": 1.5889500972132211, "grad_norm": 0.7324455380439758, "learning_rate": 2.3815349680270654e-06, "loss": 0.0893, "step": 9807 }, { "epoch": 1.5891121192482176, "grad_norm": 0.781540036201477, "learning_rate": 2.3810981569864898e-06, "loss": 0.097, "step": 9808 }, { "epoch": 1.5892741412832145, "grad_norm": 0.7014777660369873, "learning_rate": 2.3806613495840227e-06, "loss": 0.0886, "step": 9809 }, { "epoch": 1.5894361633182112, "grad_norm": 0.8267843723297119, "learning_rate": 2.38022454583303e-06, "loss": 0.1021, "step": 9810 }, { "epoch": 1.589598185353208, "grad_norm": 0.7938618063926697, "learning_rate": 2.379787745746875e-06, "loss": 0.0953, "step": 9811 }, { "epoch": 1.589760207388205, "grad_norm": 0.9035655856132507, "learning_rate": 2.379350949338924e-06, "loss": 0.1138, "step": 9812 }, { "epoch": 1.5899222294232016, "grad_norm": 0.7727793455123901, "learning_rate": 2.3789141566225437e-06, "loss": 0.0966, "step": 9813 }, { "epoch": 1.5900842514581983, "grad_norm": 1.0545909404754639, "learning_rate": 2.378477367611096e-06, "loss": 0.12, "step": 9814 }, { "epoch": 1.590246273493195, "grad_norm": 0.8968018889427185, "learning_rate": 2.378040582317947e-06, "loss": 0.109, "step": 9815 }, { "epoch": 1.5904082955281917, "grad_norm": 0.7647299766540527, "learning_rate": 2.377603800756461e-06, "loss": 0.0939, "step": 9816 }, { "epoch": 1.5905703175631887, "grad_norm": 0.8128365278244019, "learning_rate": 2.377167022940002e-06, "loss": 0.1, "step": 9817 }, { "epoch": 1.5907323395981854, "grad_norm": 0.7933886051177979, "learning_rate": 2.376730248881935e-06, "loss": 0.1007, "step": 9818 }, { "epoch": 1.590894361633182, "grad_norm": 0.8362367153167725, "learning_rate": 2.376293478595625e-06, "loss": 0.1051, "step": 9819 }, { "epoch": 1.591056383668179, "grad_norm": 0.7798475027084351, "learning_rate": 2.3758567120944345e-06, "loss": 0.0938, "step": 9820 }, { "epoch": 1.5912184057031755, "grad_norm": 0.8067512512207031, "learning_rate": 2.3754199493917277e-06, "loss": 0.0933, "step": 9821 }, { "epoch": 1.5913804277381725, "grad_norm": 0.7828888893127441, "learning_rate": 2.3749831905008704e-06, "loss": 0.1024, "step": 9822 }, { "epoch": 1.5915424497731692, "grad_norm": 0.7774432897567749, "learning_rate": 2.3745464354352236e-06, "loss": 0.0959, "step": 9823 }, { "epoch": 1.5917044718081659, "grad_norm": 0.7905421257019043, "learning_rate": 2.374109684208153e-06, "loss": 0.0848, "step": 9824 }, { "epoch": 1.5918664938431628, "grad_norm": 0.9159008264541626, "learning_rate": 2.3736729368330212e-06, "loss": 0.1114, "step": 9825 }, { "epoch": 1.5920285158781593, "grad_norm": 0.9248816967010498, "learning_rate": 2.3732361933231917e-06, "loss": 0.0909, "step": 9826 }, { "epoch": 1.5921905379131562, "grad_norm": 0.7748779654502869, "learning_rate": 2.3727994536920276e-06, "loss": 0.0993, "step": 9827 }, { "epoch": 1.592352559948153, "grad_norm": 0.8885847926139832, "learning_rate": 2.3723627179528935e-06, "loss": 0.1007, "step": 9828 }, { "epoch": 1.5925145819831497, "grad_norm": 0.7419458031654358, "learning_rate": 2.3719259861191506e-06, "loss": 0.0938, "step": 9829 }, { "epoch": 1.5926766040181466, "grad_norm": 0.7908516526222229, "learning_rate": 2.371489258204163e-06, "loss": 0.0994, "step": 9830 }, { "epoch": 1.592838626053143, "grad_norm": 0.8385130763053894, "learning_rate": 2.3710525342212925e-06, "loss": 0.1031, "step": 9831 }, { "epoch": 1.59300064808814, "grad_norm": 0.7185788750648499, "learning_rate": 2.3706158141839025e-06, "loss": 0.0933, "step": 9832 }, { "epoch": 1.5931626701231367, "grad_norm": 0.7869822382926941, "learning_rate": 2.3701790981053556e-06, "loss": 0.0997, "step": 9833 }, { "epoch": 1.5933246921581334, "grad_norm": 0.881678581237793, "learning_rate": 2.3697423859990147e-06, "loss": 0.1069, "step": 9834 }, { "epoch": 1.5934867141931304, "grad_norm": 0.9156317710876465, "learning_rate": 2.3693056778782407e-06, "loss": 0.1083, "step": 9835 }, { "epoch": 1.5936487362281269, "grad_norm": 0.9538189768791199, "learning_rate": 2.3688689737563965e-06, "loss": 0.0896, "step": 9836 }, { "epoch": 1.5938107582631238, "grad_norm": 0.7372860312461853, "learning_rate": 2.3684322736468457e-06, "loss": 0.0993, "step": 9837 }, { "epoch": 1.5939727802981205, "grad_norm": 0.8044807314872742, "learning_rate": 2.367995577562948e-06, "loss": 0.1033, "step": 9838 }, { "epoch": 1.5941348023331172, "grad_norm": 0.8724144697189331, "learning_rate": 2.3675588855180668e-06, "loss": 0.1125, "step": 9839 }, { "epoch": 1.5942968243681142, "grad_norm": 0.7166265249252319, "learning_rate": 2.3671221975255616e-06, "loss": 0.0858, "step": 9840 }, { "epoch": 1.5944588464031109, "grad_norm": 0.9747242331504822, "learning_rate": 2.3666855135987972e-06, "loss": 0.1067, "step": 9841 }, { "epoch": 1.5946208684381076, "grad_norm": 0.8712856769561768, "learning_rate": 2.366248833751133e-06, "loss": 0.1019, "step": 9842 }, { "epoch": 1.5947828904731045, "grad_norm": 0.8115395903587341, "learning_rate": 2.3658121579959314e-06, "loss": 0.0982, "step": 9843 }, { "epoch": 1.594944912508101, "grad_norm": 0.9539070129394531, "learning_rate": 2.365375486346552e-06, "loss": 0.1215, "step": 9844 }, { "epoch": 1.595106934543098, "grad_norm": 0.870464026927948, "learning_rate": 2.3649388188163572e-06, "loss": 0.1112, "step": 9845 }, { "epoch": 1.5952689565780946, "grad_norm": 0.7583725452423096, "learning_rate": 2.3645021554187086e-06, "loss": 0.0935, "step": 9846 }, { "epoch": 1.5954309786130914, "grad_norm": 0.8613632321357727, "learning_rate": 2.364065496166965e-06, "loss": 0.1064, "step": 9847 }, { "epoch": 1.5955930006480883, "grad_norm": 0.8895267844200134, "learning_rate": 2.3636288410744894e-06, "loss": 0.1075, "step": 9848 }, { "epoch": 1.5957550226830848, "grad_norm": 0.7960240840911865, "learning_rate": 2.36319219015464e-06, "loss": 0.0979, "step": 9849 }, { "epoch": 1.5959170447180817, "grad_norm": 0.7139129042625427, "learning_rate": 2.3627555434207787e-06, "loss": 0.0916, "step": 9850 }, { "epoch": 1.5960790667530784, "grad_norm": 0.9061529636383057, "learning_rate": 2.3623189008862664e-06, "loss": 0.1055, "step": 9851 }, { "epoch": 1.5962410887880751, "grad_norm": 0.8803797364234924, "learning_rate": 2.3618822625644624e-06, "loss": 0.1099, "step": 9852 }, { "epoch": 1.596403110823072, "grad_norm": 1.062333106994629, "learning_rate": 2.3614456284687267e-06, "loss": 0.1194, "step": 9853 }, { "epoch": 1.5965651328580686, "grad_norm": 0.7200803756713867, "learning_rate": 2.36100899861242e-06, "loss": 0.0902, "step": 9854 }, { "epoch": 1.5967271548930655, "grad_norm": 0.7464329600334167, "learning_rate": 2.3605723730089e-06, "loss": 0.0964, "step": 9855 }, { "epoch": 1.5968891769280622, "grad_norm": 0.743965744972229, "learning_rate": 2.3601357516715297e-06, "loss": 0.0935, "step": 9856 }, { "epoch": 1.597051198963059, "grad_norm": 0.8007190227508545, "learning_rate": 2.3596991346136666e-06, "loss": 0.1018, "step": 9857 }, { "epoch": 1.5972132209980558, "grad_norm": 0.7653955817222595, "learning_rate": 2.35926252184867e-06, "loss": 0.0963, "step": 9858 }, { "epoch": 1.5973752430330523, "grad_norm": 0.9055894017219543, "learning_rate": 2.3588259133898995e-06, "loss": 0.1082, "step": 9859 }, { "epoch": 1.5975372650680493, "grad_norm": 0.8475822806358337, "learning_rate": 2.3583893092507144e-06, "loss": 0.1045, "step": 9860 }, { "epoch": 1.597699287103046, "grad_norm": 0.8215590119361877, "learning_rate": 2.357952709444474e-06, "loss": 0.1003, "step": 9861 }, { "epoch": 1.5978613091380427, "grad_norm": 0.7612400650978088, "learning_rate": 2.3575161139845375e-06, "loss": 0.0869, "step": 9862 }, { "epoch": 1.5980233311730396, "grad_norm": 0.7190806865692139, "learning_rate": 2.357079522884263e-06, "loss": 0.0876, "step": 9863 }, { "epoch": 1.5981853532080363, "grad_norm": 0.8134793043136597, "learning_rate": 2.356642936157008e-06, "loss": 0.0957, "step": 9864 }, { "epoch": 1.598347375243033, "grad_norm": 0.8630086779594421, "learning_rate": 2.3562063538161332e-06, "loss": 0.1002, "step": 9865 }, { "epoch": 1.5985093972780298, "grad_norm": 0.7363705635070801, "learning_rate": 2.3557697758749966e-06, "loss": 0.0914, "step": 9866 }, { "epoch": 1.5986714193130265, "grad_norm": 0.9773649573326111, "learning_rate": 2.355333202346955e-06, "loss": 0.1158, "step": 9867 }, { "epoch": 1.5988334413480234, "grad_norm": 0.8616942167282104, "learning_rate": 2.3548966332453673e-06, "loss": 0.1058, "step": 9868 }, { "epoch": 1.5989954633830201, "grad_norm": 0.78786700963974, "learning_rate": 2.354460068583591e-06, "loss": 0.1064, "step": 9869 }, { "epoch": 1.5991574854180168, "grad_norm": 0.8264607787132263, "learning_rate": 2.3540235083749853e-06, "loss": 0.0996, "step": 9870 }, { "epoch": 1.5993195074530138, "grad_norm": 0.8713717460632324, "learning_rate": 2.3535869526329067e-06, "loss": 0.0998, "step": 9871 }, { "epoch": 1.5994815294880103, "grad_norm": 0.8369178175926208, "learning_rate": 2.3531504013707134e-06, "loss": 0.1026, "step": 9872 }, { "epoch": 1.5996435515230072, "grad_norm": 0.8778187036514282, "learning_rate": 2.3527138546017623e-06, "loss": 0.1061, "step": 9873 }, { "epoch": 1.599805573558004, "grad_norm": 0.7916249632835388, "learning_rate": 2.35227731233941e-06, "loss": 0.0948, "step": 9874 }, { "epoch": 1.5999675955930006, "grad_norm": 0.8178213238716125, "learning_rate": 2.3518407745970155e-06, "loss": 0.1131, "step": 9875 }, { "epoch": 1.6001296176279975, "grad_norm": 0.7969405055046082, "learning_rate": 2.3514042413879344e-06, "loss": 0.0874, "step": 9876 }, { "epoch": 1.600291639662994, "grad_norm": 0.9230097532272339, "learning_rate": 2.3509677127255233e-06, "loss": 0.1043, "step": 9877 }, { "epoch": 1.600453661697991, "grad_norm": 0.8090982437133789, "learning_rate": 2.350531188623141e-06, "loss": 0.0954, "step": 9878 }, { "epoch": 1.6006156837329877, "grad_norm": 0.8020190000534058, "learning_rate": 2.3500946690941407e-06, "loss": 0.1007, "step": 9879 }, { "epoch": 1.6007777057679844, "grad_norm": 0.8556835651397705, "learning_rate": 2.349658154151882e-06, "loss": 0.105, "step": 9880 }, { "epoch": 1.6009397278029813, "grad_norm": 0.7550347447395325, "learning_rate": 2.34922164380972e-06, "loss": 0.0951, "step": 9881 }, { "epoch": 1.6011017498379778, "grad_norm": 0.8687736988067627, "learning_rate": 2.3487851380810106e-06, "loss": 0.1147, "step": 9882 }, { "epoch": 1.6012637718729748, "grad_norm": 0.8163692951202393, "learning_rate": 2.3483486369791106e-06, "loss": 0.0996, "step": 9883 }, { "epoch": 1.6014257939079715, "grad_norm": 0.9547244310379028, "learning_rate": 2.3479121405173736e-06, "loss": 0.1209, "step": 9884 }, { "epoch": 1.6015878159429682, "grad_norm": 0.7805865406990051, "learning_rate": 2.3474756487091586e-06, "loss": 0.1022, "step": 9885 }, { "epoch": 1.601749837977965, "grad_norm": 0.7808001041412354, "learning_rate": 2.347039161567819e-06, "loss": 0.1002, "step": 9886 }, { "epoch": 1.6019118600129616, "grad_norm": 0.8721002340316772, "learning_rate": 2.346602679106712e-06, "loss": 0.1171, "step": 9887 }, { "epoch": 1.6020738820479585, "grad_norm": 0.8056678175926208, "learning_rate": 2.34616620133919e-06, "loss": 0.0971, "step": 9888 }, { "epoch": 1.6022359040829552, "grad_norm": 0.7318929433822632, "learning_rate": 2.345729728278611e-06, "loss": 0.1024, "step": 9889 }, { "epoch": 1.602397926117952, "grad_norm": 0.7396207451820374, "learning_rate": 2.345293259938329e-06, "loss": 0.0944, "step": 9890 }, { "epoch": 1.6025599481529489, "grad_norm": 0.8219144344329834, "learning_rate": 2.3448567963316987e-06, "loss": 0.1008, "step": 9891 }, { "epoch": 1.6027219701879456, "grad_norm": 0.7780699133872986, "learning_rate": 2.3444203374720755e-06, "loss": 0.0938, "step": 9892 }, { "epoch": 1.6028839922229423, "grad_norm": 0.7455930709838867, "learning_rate": 2.3439838833728122e-06, "loss": 0.0948, "step": 9893 }, { "epoch": 1.6030460142579392, "grad_norm": 0.8056654334068298, "learning_rate": 2.3435474340472657e-06, "loss": 0.098, "step": 9894 }, { "epoch": 1.6032080362929357, "grad_norm": 0.7863870859146118, "learning_rate": 2.3431109895087886e-06, "loss": 0.1008, "step": 9895 }, { "epoch": 1.6033700583279327, "grad_norm": 0.7881834506988525, "learning_rate": 2.3426745497707364e-06, "loss": 0.1083, "step": 9896 }, { "epoch": 1.6035320803629294, "grad_norm": 0.8019961714744568, "learning_rate": 2.3422381148464614e-06, "loss": 0.0984, "step": 9897 }, { "epoch": 1.603694102397926, "grad_norm": 0.8999032974243164, "learning_rate": 2.341801684749318e-06, "loss": 0.1112, "step": 9898 }, { "epoch": 1.603856124432923, "grad_norm": 0.7355244755744934, "learning_rate": 2.341365259492661e-06, "loss": 0.0951, "step": 9899 }, { "epoch": 1.6040181464679195, "grad_norm": 0.8507843017578125, "learning_rate": 2.3409288390898427e-06, "loss": 0.1107, "step": 9900 }, { "epoch": 1.6041801685029164, "grad_norm": 0.7582950592041016, "learning_rate": 2.3404924235542175e-06, "loss": 0.0901, "step": 9901 }, { "epoch": 1.6043421905379132, "grad_norm": 0.9285005927085876, "learning_rate": 2.3400560128991377e-06, "loss": 0.1125, "step": 9902 }, { "epoch": 1.6045042125729099, "grad_norm": 0.7797400951385498, "learning_rate": 2.3396196071379563e-06, "loss": 0.1004, "step": 9903 }, { "epoch": 1.6046662346079068, "grad_norm": 0.8635115623474121, "learning_rate": 2.3391832062840273e-06, "loss": 0.1106, "step": 9904 }, { "epoch": 1.6048282566429033, "grad_norm": 0.7101535797119141, "learning_rate": 2.3387468103507037e-06, "loss": 0.0905, "step": 9905 }, { "epoch": 1.6049902786779002, "grad_norm": 0.8763110041618347, "learning_rate": 2.338310419351337e-06, "loss": 0.1038, "step": 9906 }, { "epoch": 1.605152300712897, "grad_norm": 0.6906706094741821, "learning_rate": 2.3378740332992794e-06, "loss": 0.0851, "step": 9907 }, { "epoch": 1.6053143227478937, "grad_norm": 0.7445785403251648, "learning_rate": 2.3374376522078852e-06, "loss": 0.0896, "step": 9908 }, { "epoch": 1.6054763447828906, "grad_norm": 0.9417932629585266, "learning_rate": 2.337001276090505e-06, "loss": 0.1059, "step": 9909 }, { "epoch": 1.605638366817887, "grad_norm": 0.8038756847381592, "learning_rate": 2.3365649049604917e-06, "loss": 0.0981, "step": 9910 }, { "epoch": 1.605800388852884, "grad_norm": 0.903634786605835, "learning_rate": 2.3361285388311963e-06, "loss": 0.1137, "step": 9911 }, { "epoch": 1.6059624108878807, "grad_norm": 1.037934422492981, "learning_rate": 2.3356921777159705e-06, "loss": 0.1227, "step": 9912 }, { "epoch": 1.6061244329228774, "grad_norm": 0.8287703990936279, "learning_rate": 2.335255821628167e-06, "loss": 0.0974, "step": 9913 }, { "epoch": 1.6062864549578744, "grad_norm": 0.8133599758148193, "learning_rate": 2.334819470581137e-06, "loss": 0.0945, "step": 9914 }, { "epoch": 1.606448476992871, "grad_norm": 0.8264515995979309, "learning_rate": 2.3343831245882316e-06, "loss": 0.108, "step": 9915 }, { "epoch": 1.6066104990278678, "grad_norm": 0.7875205874443054, "learning_rate": 2.3339467836628018e-06, "loss": 0.0986, "step": 9916 }, { "epoch": 1.6067725210628645, "grad_norm": 0.784372091293335, "learning_rate": 2.333510447818198e-06, "loss": 0.1, "step": 9917 }, { "epoch": 1.6069345430978612, "grad_norm": 0.9532830715179443, "learning_rate": 2.3330741170677713e-06, "loss": 0.1321, "step": 9918 }, { "epoch": 1.6070965651328581, "grad_norm": 0.8498433828353882, "learning_rate": 2.3326377914248733e-06, "loss": 0.1047, "step": 9919 }, { "epoch": 1.6072585871678549, "grad_norm": 0.9842066168785095, "learning_rate": 2.3322014709028545e-06, "loss": 0.1192, "step": 9920 }, { "epoch": 1.6074206092028516, "grad_norm": 0.6417335867881775, "learning_rate": 2.3317651555150636e-06, "loss": 0.0823, "step": 9921 }, { "epoch": 1.6075826312378485, "grad_norm": 0.8965574502944946, "learning_rate": 2.3313288452748515e-06, "loss": 0.1156, "step": 9922 }, { "epoch": 1.607744653272845, "grad_norm": 0.810722291469574, "learning_rate": 2.3308925401955694e-06, "loss": 0.0982, "step": 9923 }, { "epoch": 1.607906675307842, "grad_norm": 0.8201885223388672, "learning_rate": 2.3304562402905662e-06, "loss": 0.1068, "step": 9924 }, { "epoch": 1.6080686973428386, "grad_norm": 0.8500423431396484, "learning_rate": 2.3300199455731922e-06, "loss": 0.1053, "step": 9925 }, { "epoch": 1.6082307193778353, "grad_norm": 0.8203928470611572, "learning_rate": 2.329583656056796e-06, "loss": 0.0957, "step": 9926 }, { "epoch": 1.6083927414128323, "grad_norm": 0.7739825248718262, "learning_rate": 2.329147371754727e-06, "loss": 0.1001, "step": 9927 }, { "epoch": 1.6085547634478288, "grad_norm": 0.8251372575759888, "learning_rate": 2.3287110926803354e-06, "loss": 0.1153, "step": 9928 }, { "epoch": 1.6087167854828257, "grad_norm": 0.7434987425804138, "learning_rate": 2.3282748188469704e-06, "loss": 0.0985, "step": 9929 }, { "epoch": 1.6088788075178224, "grad_norm": 1.1180413961410522, "learning_rate": 2.32783855026798e-06, "loss": 0.1098, "step": 9930 }, { "epoch": 1.6090408295528191, "grad_norm": 0.8994811177253723, "learning_rate": 2.3274022869567123e-06, "loss": 0.1136, "step": 9931 }, { "epoch": 1.609202851587816, "grad_norm": 0.8187955021858215, "learning_rate": 2.3269660289265184e-06, "loss": 0.1092, "step": 9932 }, { "epoch": 1.6093648736228126, "grad_norm": 0.894491970539093, "learning_rate": 2.3265297761907447e-06, "loss": 0.1171, "step": 9933 }, { "epoch": 1.6095268956578095, "grad_norm": 0.9280018210411072, "learning_rate": 2.3260935287627408e-06, "loss": 0.1186, "step": 9934 }, { "epoch": 1.6096889176928062, "grad_norm": 0.9082358479499817, "learning_rate": 2.3256572866558533e-06, "loss": 0.1149, "step": 9935 }, { "epoch": 1.609850939727803, "grad_norm": 0.8570298552513123, "learning_rate": 2.3252210498834306e-06, "loss": 0.1134, "step": 9936 }, { "epoch": 1.6100129617627998, "grad_norm": 0.8263987898826599, "learning_rate": 2.3247848184588208e-06, "loss": 0.0962, "step": 9937 }, { "epoch": 1.6101749837977966, "grad_norm": 0.8012087345123291, "learning_rate": 2.3243485923953725e-06, "loss": 0.1005, "step": 9938 }, { "epoch": 1.6103370058327933, "grad_norm": 0.8303671479225159, "learning_rate": 2.323912371706432e-06, "loss": 0.1127, "step": 9939 }, { "epoch": 1.61049902786779, "grad_norm": 0.9363792538642883, "learning_rate": 2.323476156405347e-06, "loss": 0.1046, "step": 9940 }, { "epoch": 1.6106610499027867, "grad_norm": 0.8461704254150391, "learning_rate": 2.323039946505463e-06, "loss": 0.1076, "step": 9941 }, { "epoch": 1.6108230719377836, "grad_norm": 0.7788037061691284, "learning_rate": 2.3226037420201296e-06, "loss": 0.0963, "step": 9942 }, { "epoch": 1.6109850939727803, "grad_norm": 0.9676366448402405, "learning_rate": 2.3221675429626925e-06, "loss": 0.1045, "step": 9943 }, { "epoch": 1.611147116007777, "grad_norm": 0.9113138318061829, "learning_rate": 2.3217313493464977e-06, "loss": 0.108, "step": 9944 }, { "epoch": 1.611309138042774, "grad_norm": 0.6841544508934021, "learning_rate": 2.3212951611848927e-06, "loss": 0.0867, "step": 9945 }, { "epoch": 1.6114711600777705, "grad_norm": 0.7147634625434875, "learning_rate": 2.320858978491222e-06, "loss": 0.0938, "step": 9946 }, { "epoch": 1.6116331821127674, "grad_norm": 0.8967075347900391, "learning_rate": 2.3204228012788346e-06, "loss": 0.1185, "step": 9947 }, { "epoch": 1.6117952041477641, "grad_norm": 0.7635133266448975, "learning_rate": 2.319986629561074e-06, "loss": 0.0941, "step": 9948 }, { "epoch": 1.6119572261827608, "grad_norm": 0.9004403352737427, "learning_rate": 2.319550463351288e-06, "loss": 0.1137, "step": 9949 }, { "epoch": 1.6121192482177578, "grad_norm": 0.7615978121757507, "learning_rate": 2.3191143026628206e-06, "loss": 0.0906, "step": 9950 }, { "epoch": 1.6122812702527543, "grad_norm": 0.8137632608413696, "learning_rate": 2.3186781475090168e-06, "loss": 0.0992, "step": 9951 }, { "epoch": 1.6124432922877512, "grad_norm": 0.8467097282409668, "learning_rate": 2.318241997903224e-06, "loss": 0.1082, "step": 9952 }, { "epoch": 1.612605314322748, "grad_norm": 0.8358951210975647, "learning_rate": 2.317805853858786e-06, "loss": 0.1054, "step": 9953 }, { "epoch": 1.6127673363577446, "grad_norm": 0.8378834128379822, "learning_rate": 2.3173697153890486e-06, "loss": 0.1095, "step": 9954 }, { "epoch": 1.6129293583927415, "grad_norm": 0.7304298281669617, "learning_rate": 2.316933582507354e-06, "loss": 0.0923, "step": 9955 }, { "epoch": 1.613091380427738, "grad_norm": 0.7708770632743835, "learning_rate": 2.31649745522705e-06, "loss": 0.0938, "step": 9956 }, { "epoch": 1.613253402462735, "grad_norm": 0.8786473870277405, "learning_rate": 2.31606133356148e-06, "loss": 0.111, "step": 9957 }, { "epoch": 1.6134154244977317, "grad_norm": 0.724189281463623, "learning_rate": 2.3156252175239883e-06, "loss": 0.0913, "step": 9958 }, { "epoch": 1.6135774465327284, "grad_norm": 0.6984865665435791, "learning_rate": 2.3151891071279183e-06, "loss": 0.0833, "step": 9959 }, { "epoch": 1.6137394685677253, "grad_norm": 0.833328902721405, "learning_rate": 2.3147530023866136e-06, "loss": 0.1095, "step": 9960 }, { "epoch": 1.6139014906027218, "grad_norm": 0.8859132528305054, "learning_rate": 2.3143169033134204e-06, "loss": 0.1241, "step": 9961 }, { "epoch": 1.6140635126377187, "grad_norm": 0.8652672171592712, "learning_rate": 2.3138808099216796e-06, "loss": 0.1024, "step": 9962 }, { "epoch": 1.6142255346727155, "grad_norm": 0.7666911482810974, "learning_rate": 2.313444722224736e-06, "loss": 0.0994, "step": 9963 }, { "epoch": 1.6143875567077122, "grad_norm": 0.6945036053657532, "learning_rate": 2.3130086402359327e-06, "loss": 0.0851, "step": 9964 }, { "epoch": 1.614549578742709, "grad_norm": 0.8811757564544678, "learning_rate": 2.3125725639686116e-06, "loss": 0.11, "step": 9965 }, { "epoch": 1.6147116007777058, "grad_norm": 0.7738437056541443, "learning_rate": 2.312136493436117e-06, "loss": 0.0957, "step": 9966 }, { "epoch": 1.6148736228127025, "grad_norm": 0.8249509334564209, "learning_rate": 2.311700428651791e-06, "loss": 0.1109, "step": 9967 }, { "epoch": 1.6150356448476992, "grad_norm": 0.8863063454627991, "learning_rate": 2.311264369628976e-06, "loss": 0.1033, "step": 9968 }, { "epoch": 1.615197666882696, "grad_norm": 0.7870522141456604, "learning_rate": 2.3108283163810155e-06, "loss": 0.0972, "step": 9969 }, { "epoch": 1.6153596889176929, "grad_norm": 0.8956674337387085, "learning_rate": 2.3103922689212494e-06, "loss": 0.1076, "step": 9970 }, { "epoch": 1.6155217109526896, "grad_norm": 0.7711880207061768, "learning_rate": 2.3099562272630216e-06, "loss": 0.0966, "step": 9971 }, { "epoch": 1.6156837329876863, "grad_norm": 0.8216359615325928, "learning_rate": 2.3095201914196732e-06, "loss": 0.1039, "step": 9972 }, { "epoch": 1.6158457550226832, "grad_norm": 0.7300341725349426, "learning_rate": 2.309084161404547e-06, "loss": 0.0933, "step": 9973 }, { "epoch": 1.6160077770576797, "grad_norm": 0.783735454082489, "learning_rate": 2.308648137230982e-06, "loss": 0.0987, "step": 9974 }, { "epoch": 1.6161697990926767, "grad_norm": 0.8075628876686096, "learning_rate": 2.3082121189123213e-06, "loss": 0.1045, "step": 9975 }, { "epoch": 1.6163318211276734, "grad_norm": 0.8673627972602844, "learning_rate": 2.3077761064619062e-06, "loss": 0.1028, "step": 9976 }, { "epoch": 1.61649384316267, "grad_norm": 0.8210455179214478, "learning_rate": 2.307340099893077e-06, "loss": 0.1024, "step": 9977 }, { "epoch": 1.616655865197667, "grad_norm": 0.803597092628479, "learning_rate": 2.3069040992191745e-06, "loss": 0.0922, "step": 9978 }, { "epoch": 1.6168178872326635, "grad_norm": 0.8294203877449036, "learning_rate": 2.3064681044535385e-06, "loss": 0.1002, "step": 9979 }, { "epoch": 1.6169799092676604, "grad_norm": 0.7242622971534729, "learning_rate": 2.3060321156095107e-06, "loss": 0.0904, "step": 9980 }, { "epoch": 1.6171419313026572, "grad_norm": 0.6747761964797974, "learning_rate": 2.305596132700431e-06, "loss": 0.0841, "step": 9981 }, { "epoch": 1.6173039533376539, "grad_norm": 0.7614217400550842, "learning_rate": 2.3051601557396393e-06, "loss": 0.0929, "step": 9982 }, { "epoch": 1.6174659753726508, "grad_norm": 0.8482229113578796, "learning_rate": 2.304724184740475e-06, "loss": 0.1065, "step": 9983 }, { "epoch": 1.6176279974076473, "grad_norm": 0.8954651951789856, "learning_rate": 2.3042882197162776e-06, "loss": 0.1151, "step": 9984 }, { "epoch": 1.6177900194426442, "grad_norm": 0.9330970048904419, "learning_rate": 2.3038522606803882e-06, "loss": 0.1117, "step": 9985 }, { "epoch": 1.617952041477641, "grad_norm": 0.8899834156036377, "learning_rate": 2.303416307646144e-06, "loss": 0.106, "step": 9986 }, { "epoch": 1.6181140635126376, "grad_norm": 0.8274690508842468, "learning_rate": 2.302980360626886e-06, "loss": 0.1017, "step": 9987 }, { "epoch": 1.6182760855476346, "grad_norm": 0.8075361251831055, "learning_rate": 2.3025444196359513e-06, "loss": 0.1055, "step": 9988 }, { "epoch": 1.6184381075826313, "grad_norm": 0.8113299012184143, "learning_rate": 2.3021084846866793e-06, "loss": 0.1039, "step": 9989 }, { "epoch": 1.618600129617628, "grad_norm": 0.7555704712867737, "learning_rate": 2.3016725557924095e-06, "loss": 0.0869, "step": 9990 }, { "epoch": 1.6187621516526247, "grad_norm": 0.8604114055633545, "learning_rate": 2.3012366329664794e-06, "loss": 0.109, "step": 9991 }, { "epoch": 1.6189241736876214, "grad_norm": 0.8491653800010681, "learning_rate": 2.3008007162222273e-06, "loss": 0.1071, "step": 9992 }, { "epoch": 1.6190861957226184, "grad_norm": 0.7831771969795227, "learning_rate": 2.3003648055729917e-06, "loss": 0.0946, "step": 9993 }, { "epoch": 1.619248217757615, "grad_norm": 0.8524945378303528, "learning_rate": 2.2999289010321092e-06, "loss": 0.1057, "step": 9994 }, { "epoch": 1.6194102397926118, "grad_norm": 0.7014240622520447, "learning_rate": 2.299493002612918e-06, "loss": 0.092, "step": 9995 }, { "epoch": 1.6195722618276087, "grad_norm": 0.8205536007881165, "learning_rate": 2.299057110328757e-06, "loss": 0.1018, "step": 9996 }, { "epoch": 1.6197342838626052, "grad_norm": 0.810691773891449, "learning_rate": 2.298621224192961e-06, "loss": 0.0999, "step": 9997 }, { "epoch": 1.6198963058976021, "grad_norm": 0.7380390763282776, "learning_rate": 2.298185344218868e-06, "loss": 0.0948, "step": 9998 }, { "epoch": 1.6200583279325989, "grad_norm": 0.8573938012123108, "learning_rate": 2.297749470419815e-06, "loss": 0.0986, "step": 9999 }, { "epoch": 1.6202203499675956, "grad_norm": 0.749604344367981, "learning_rate": 2.29731360280914e-06, "loss": 0.1046, "step": 10000 }, { "epoch": 1.6203823720025925, "grad_norm": 0.8711916208267212, "learning_rate": 2.2968777414001773e-06, "loss": 0.1106, "step": 10001 }, { "epoch": 1.620544394037589, "grad_norm": 0.9774971008300781, "learning_rate": 2.2964418862062655e-06, "loss": 0.1258, "step": 10002 }, { "epoch": 1.620706416072586, "grad_norm": 0.9113691449165344, "learning_rate": 2.2960060372407377e-06, "loss": 0.1038, "step": 10003 }, { "epoch": 1.6208684381075826, "grad_norm": 0.7974822521209717, "learning_rate": 2.2955701945169317e-06, "loss": 0.1024, "step": 10004 }, { "epoch": 1.6210304601425793, "grad_norm": 0.7355794310569763, "learning_rate": 2.295134358048184e-06, "loss": 0.093, "step": 10005 }, { "epoch": 1.6211924821775763, "grad_norm": 0.8737595677375793, "learning_rate": 2.294698527847829e-06, "loss": 0.1173, "step": 10006 }, { "epoch": 1.6213545042125728, "grad_norm": 0.8428897261619568, "learning_rate": 2.2942627039292016e-06, "loss": 0.1065, "step": 10007 }, { "epoch": 1.6215165262475697, "grad_norm": 0.7106808423995972, "learning_rate": 2.2938268863056373e-06, "loss": 0.0899, "step": 10008 }, { "epoch": 1.6216785482825664, "grad_norm": 0.989214301109314, "learning_rate": 2.2933910749904724e-06, "loss": 0.1183, "step": 10009 }, { "epoch": 1.6218405703175631, "grad_norm": 0.7430517077445984, "learning_rate": 2.29295526999704e-06, "loss": 0.0899, "step": 10010 }, { "epoch": 1.62200259235256, "grad_norm": 0.9804396033287048, "learning_rate": 2.292519471338676e-06, "loss": 0.1184, "step": 10011 }, { "epoch": 1.6221646143875565, "grad_norm": 0.7713508009910583, "learning_rate": 2.2920836790287134e-06, "loss": 0.0946, "step": 10012 }, { "epoch": 1.6223266364225535, "grad_norm": 0.7687755227088928, "learning_rate": 2.2916478930804865e-06, "loss": 0.1005, "step": 10013 }, { "epoch": 1.6224886584575502, "grad_norm": 0.7584219574928284, "learning_rate": 2.291212113507331e-06, "loss": 0.0925, "step": 10014 }, { "epoch": 1.622650680492547, "grad_norm": 0.6680698394775391, "learning_rate": 2.2907763403225793e-06, "loss": 0.0855, "step": 10015 }, { "epoch": 1.6228127025275438, "grad_norm": 0.7672422528266907, "learning_rate": 2.290340573539565e-06, "loss": 0.0994, "step": 10016 }, { "epoch": 1.6229747245625405, "grad_norm": 0.8922812938690186, "learning_rate": 2.2899048131716223e-06, "loss": 0.1152, "step": 10017 }, { "epoch": 1.6231367465975373, "grad_norm": 0.8483216166496277, "learning_rate": 2.2894690592320827e-06, "loss": 0.1128, "step": 10018 }, { "epoch": 1.623298768632534, "grad_norm": 0.6468700766563416, "learning_rate": 2.2890333117342813e-06, "loss": 0.086, "step": 10019 }, { "epoch": 1.6234607906675307, "grad_norm": 0.7255584001541138, "learning_rate": 2.2885975706915506e-06, "loss": 0.0884, "step": 10020 }, { "epoch": 1.6236228127025276, "grad_norm": 0.8776664137840271, "learning_rate": 2.288161836117222e-06, "loss": 0.1049, "step": 10021 }, { "epoch": 1.6237848347375243, "grad_norm": 0.750275731086731, "learning_rate": 2.287726108024628e-06, "loss": 0.0964, "step": 10022 }, { "epoch": 1.623946856772521, "grad_norm": 0.9269810318946838, "learning_rate": 2.2872903864271017e-06, "loss": 0.112, "step": 10023 }, { "epoch": 1.624108878807518, "grad_norm": 0.8907431364059448, "learning_rate": 2.2868546713379755e-06, "loss": 0.1046, "step": 10024 }, { "epoch": 1.6242709008425145, "grad_norm": 0.8910091519355774, "learning_rate": 2.28641896277058e-06, "loss": 0.0983, "step": 10025 }, { "epoch": 1.6244329228775114, "grad_norm": 0.8178685307502747, "learning_rate": 2.285983260738248e-06, "loss": 0.1033, "step": 10026 }, { "epoch": 1.624594944912508, "grad_norm": 0.8685728907585144, "learning_rate": 2.2855475652543094e-06, "loss": 0.1143, "step": 10027 }, { "epoch": 1.6247569669475048, "grad_norm": 0.996419370174408, "learning_rate": 2.285111876332097e-06, "loss": 0.111, "step": 10028 }, { "epoch": 1.6249189889825018, "grad_norm": 0.9297820925712585, "learning_rate": 2.284676193984941e-06, "loss": 0.1146, "step": 10029 }, { "epoch": 1.6250810110174982, "grad_norm": 0.7859435677528381, "learning_rate": 2.2842405182261725e-06, "loss": 0.0975, "step": 10030 }, { "epoch": 1.6252430330524952, "grad_norm": 0.8805785775184631, "learning_rate": 2.2838048490691223e-06, "loss": 0.104, "step": 10031 }, { "epoch": 1.625405055087492, "grad_norm": 0.8301118612289429, "learning_rate": 2.283369186527119e-06, "loss": 0.1087, "step": 10032 }, { "epoch": 1.6255670771224886, "grad_norm": 0.7605770230293274, "learning_rate": 2.282933530613496e-06, "loss": 0.0949, "step": 10033 }, { "epoch": 1.6257290991574855, "grad_norm": 0.8895880579948425, "learning_rate": 2.282497881341581e-06, "loss": 0.1106, "step": 10034 }, { "epoch": 1.625891121192482, "grad_norm": 0.8832073211669922, "learning_rate": 2.282062238724705e-06, "loss": 0.1151, "step": 10035 }, { "epoch": 1.626053143227479, "grad_norm": 0.6511034965515137, "learning_rate": 2.2816266027761965e-06, "loss": 0.0846, "step": 10036 }, { "epoch": 1.6262151652624757, "grad_norm": 0.7454072833061218, "learning_rate": 2.2811909735093853e-06, "loss": 0.0911, "step": 10037 }, { "epoch": 1.6263771872974724, "grad_norm": 0.9449716806411743, "learning_rate": 2.280755350937602e-06, "loss": 0.1066, "step": 10038 }, { "epoch": 1.6265392093324693, "grad_norm": 0.7572311162948608, "learning_rate": 2.280319735074173e-06, "loss": 0.0937, "step": 10039 }, { "epoch": 1.626701231367466, "grad_norm": 0.8121317028999329, "learning_rate": 2.27988412593243e-06, "loss": 0.1019, "step": 10040 }, { "epoch": 1.6268632534024627, "grad_norm": 0.8382424712181091, "learning_rate": 2.279448523525699e-06, "loss": 0.1101, "step": 10041 }, { "epoch": 1.6270252754374595, "grad_norm": 0.7908496856689453, "learning_rate": 2.279012927867309e-06, "loss": 0.0963, "step": 10042 }, { "epoch": 1.6271872974724562, "grad_norm": 0.8468271493911743, "learning_rate": 2.278577338970589e-06, "loss": 0.1045, "step": 10043 }, { "epoch": 1.627349319507453, "grad_norm": 0.8646363019943237, "learning_rate": 2.2781417568488677e-06, "loss": 0.1113, "step": 10044 }, { "epoch": 1.6275113415424498, "grad_norm": 0.8169398307800293, "learning_rate": 2.2777061815154705e-06, "loss": 0.1056, "step": 10045 }, { "epoch": 1.6276733635774465, "grad_norm": 0.8818169832229614, "learning_rate": 2.277270612983726e-06, "loss": 0.1123, "step": 10046 }, { "epoch": 1.6278353856124435, "grad_norm": 0.8572877049446106, "learning_rate": 2.276835051266963e-06, "loss": 0.0976, "step": 10047 }, { "epoch": 1.62799740764744, "grad_norm": 0.8052610754966736, "learning_rate": 2.2763994963785066e-06, "loss": 0.1041, "step": 10048 }, { "epoch": 1.6281594296824369, "grad_norm": 0.7382464408874512, "learning_rate": 2.275963948331685e-06, "loss": 0.0905, "step": 10049 }, { "epoch": 1.6283214517174336, "grad_norm": 0.7890332341194153, "learning_rate": 2.2755284071398243e-06, "loss": 0.1029, "step": 10050 }, { "epoch": 1.6284834737524303, "grad_norm": 0.794537365436554, "learning_rate": 2.27509287281625e-06, "loss": 0.0951, "step": 10051 }, { "epoch": 1.6286454957874272, "grad_norm": 0.7802528738975525, "learning_rate": 2.2746573453742905e-06, "loss": 0.0916, "step": 10052 }, { "epoch": 1.6288075178224237, "grad_norm": 0.8495976328849792, "learning_rate": 2.2742218248272714e-06, "loss": 0.1044, "step": 10053 }, { "epoch": 1.6289695398574207, "grad_norm": 0.8333568572998047, "learning_rate": 2.2737863111885175e-06, "loss": 0.1017, "step": 10054 }, { "epoch": 1.6291315618924174, "grad_norm": 0.7988643646240234, "learning_rate": 2.273350804471355e-06, "loss": 0.1004, "step": 10055 }, { "epoch": 1.629293583927414, "grad_norm": 0.8200765252113342, "learning_rate": 2.2729153046891095e-06, "loss": 0.1005, "step": 10056 }, { "epoch": 1.629455605962411, "grad_norm": 0.7785143852233887, "learning_rate": 2.272479811855106e-06, "loss": 0.0918, "step": 10057 }, { "epoch": 1.6296176279974075, "grad_norm": 0.7829576730728149, "learning_rate": 2.2720443259826702e-06, "loss": 0.0891, "step": 10058 }, { "epoch": 1.6297796500324044, "grad_norm": 0.8104037046432495, "learning_rate": 2.271608847085126e-06, "loss": 0.0981, "step": 10059 }, { "epoch": 1.6299416720674011, "grad_norm": 0.9911179542541504, "learning_rate": 2.2711733751757983e-06, "loss": 0.1145, "step": 10060 }, { "epoch": 1.6301036941023979, "grad_norm": 0.7678511142730713, "learning_rate": 2.270737910268011e-06, "loss": 0.0952, "step": 10061 }, { "epoch": 1.6302657161373948, "grad_norm": 0.9023809432983398, "learning_rate": 2.27030245237509e-06, "loss": 0.0994, "step": 10062 }, { "epoch": 1.6304277381723913, "grad_norm": 0.8956974148750305, "learning_rate": 2.2698670015103574e-06, "loss": 0.1128, "step": 10063 }, { "epoch": 1.6305897602073882, "grad_norm": 0.8878682255744934, "learning_rate": 2.2694315576871384e-06, "loss": 0.0983, "step": 10064 }, { "epoch": 1.630751782242385, "grad_norm": 0.7095435857772827, "learning_rate": 2.2689961209187543e-06, "loss": 0.0903, "step": 10065 }, { "epoch": 1.6309138042773816, "grad_norm": 0.8130254149436951, "learning_rate": 2.268560691218531e-06, "loss": 0.0988, "step": 10066 }, { "epoch": 1.6310758263123786, "grad_norm": 0.8411709070205688, "learning_rate": 2.26812526859979e-06, "loss": 0.1038, "step": 10067 }, { "epoch": 1.6312378483473753, "grad_norm": 0.9909535050392151, "learning_rate": 2.2676898530758554e-06, "loss": 0.12, "step": 10068 }, { "epoch": 1.631399870382372, "grad_norm": 0.8369352221488953, "learning_rate": 2.2672544446600485e-06, "loss": 0.0975, "step": 10069 }, { "epoch": 1.6315618924173687, "grad_norm": 0.8436316847801208, "learning_rate": 2.266819043365692e-06, "loss": 0.102, "step": 10070 }, { "epoch": 1.6317239144523654, "grad_norm": 0.7915019392967224, "learning_rate": 2.2663836492061097e-06, "loss": 0.1006, "step": 10071 }, { "epoch": 1.6318859364873624, "grad_norm": 0.6947129964828491, "learning_rate": 2.265948262194621e-06, "loss": 0.0943, "step": 10072 }, { "epoch": 1.632047958522359, "grad_norm": 0.738064169883728, "learning_rate": 2.2655128823445507e-06, "loss": 0.0989, "step": 10073 }, { "epoch": 1.6322099805573558, "grad_norm": 0.9031091928482056, "learning_rate": 2.2650775096692176e-06, "loss": 0.1185, "step": 10074 }, { "epoch": 1.6323720025923527, "grad_norm": 0.736219048500061, "learning_rate": 2.264642144181944e-06, "loss": 0.0998, "step": 10075 }, { "epoch": 1.6325340246273492, "grad_norm": 0.7197458744049072, "learning_rate": 2.2642067858960514e-06, "loss": 0.0991, "step": 10076 }, { "epoch": 1.6326960466623461, "grad_norm": 0.8605630993843079, "learning_rate": 2.263771434824861e-06, "loss": 0.1069, "step": 10077 }, { "epoch": 1.6328580686973428, "grad_norm": 0.7979494333267212, "learning_rate": 2.263336090981693e-06, "loss": 0.0967, "step": 10078 }, { "epoch": 1.6330200907323396, "grad_norm": 0.8447449207305908, "learning_rate": 2.262900754379868e-06, "loss": 0.099, "step": 10079 }, { "epoch": 1.6331821127673365, "grad_norm": 0.9383016228675842, "learning_rate": 2.2624654250327054e-06, "loss": 0.1159, "step": 10080 }, { "epoch": 1.633344134802333, "grad_norm": 0.8210467100143433, "learning_rate": 2.2620301029535264e-06, "loss": 0.1078, "step": 10081 }, { "epoch": 1.63350615683733, "grad_norm": 0.8023772239685059, "learning_rate": 2.2615947881556506e-06, "loss": 0.101, "step": 10082 }, { "epoch": 1.6336681788723266, "grad_norm": 0.8335037231445312, "learning_rate": 2.2611594806523975e-06, "loss": 0.103, "step": 10083 }, { "epoch": 1.6338302009073233, "grad_norm": 0.7512911558151245, "learning_rate": 2.2607241804570864e-06, "loss": 0.0998, "step": 10084 }, { "epoch": 1.6339922229423203, "grad_norm": 0.8822583556175232, "learning_rate": 2.2602888875830346e-06, "loss": 0.1034, "step": 10085 }, { "epoch": 1.6341542449773168, "grad_norm": 0.830302357673645, "learning_rate": 2.2598536020435644e-06, "loss": 0.1027, "step": 10086 }, { "epoch": 1.6343162670123137, "grad_norm": 0.7789014577865601, "learning_rate": 2.2594183238519923e-06, "loss": 0.0965, "step": 10087 }, { "epoch": 1.6344782890473104, "grad_norm": 0.7837060689926147, "learning_rate": 2.258983053021638e-06, "loss": 0.1004, "step": 10088 }, { "epoch": 1.6346403110823071, "grad_norm": 0.8886387348175049, "learning_rate": 2.258547789565818e-06, "loss": 0.1169, "step": 10089 }, { "epoch": 1.634802333117304, "grad_norm": 0.9173817038536072, "learning_rate": 2.2581125334978517e-06, "loss": 0.1102, "step": 10090 }, { "epoch": 1.6349643551523008, "grad_norm": 0.8326734304428101, "learning_rate": 2.2576772848310572e-06, "loss": 0.1016, "step": 10091 }, { "epoch": 1.6351263771872975, "grad_norm": 0.7596706748008728, "learning_rate": 2.257242043578751e-06, "loss": 0.0957, "step": 10092 }, { "epoch": 1.6352883992222942, "grad_norm": 0.7185407876968384, "learning_rate": 2.256806809754251e-06, "loss": 0.084, "step": 10093 }, { "epoch": 1.635450421257291, "grad_norm": 0.8314585089683533, "learning_rate": 2.2563715833708726e-06, "loss": 0.1079, "step": 10094 }, { "epoch": 1.6356124432922878, "grad_norm": 0.804526686668396, "learning_rate": 2.2559363644419357e-06, "loss": 0.1031, "step": 10095 }, { "epoch": 1.6357744653272845, "grad_norm": 0.7893649935722351, "learning_rate": 2.255501152980755e-06, "loss": 0.0935, "step": 10096 }, { "epoch": 1.6359364873622813, "grad_norm": 0.9371634721755981, "learning_rate": 2.255065949000648e-06, "loss": 0.1163, "step": 10097 }, { "epoch": 1.6360985093972782, "grad_norm": 0.8292739987373352, "learning_rate": 2.2546307525149293e-06, "loss": 0.1, "step": 10098 }, { "epoch": 1.6362605314322747, "grad_norm": 0.7563901543617249, "learning_rate": 2.2541955635369156e-06, "loss": 0.0997, "step": 10099 }, { "epoch": 1.6364225534672716, "grad_norm": 0.8337042927742004, "learning_rate": 2.253760382079924e-06, "loss": 0.1052, "step": 10100 }, { "epoch": 1.6365845755022683, "grad_norm": 0.7527819275856018, "learning_rate": 2.253325208157268e-06, "loss": 0.0978, "step": 10101 }, { "epoch": 1.636746597537265, "grad_norm": 0.8317774534225464, "learning_rate": 2.2528900417822636e-06, "loss": 0.1005, "step": 10102 }, { "epoch": 1.636908619572262, "grad_norm": 0.8075408935546875, "learning_rate": 2.252454882968227e-06, "loss": 0.1078, "step": 10103 }, { "epoch": 1.6370706416072585, "grad_norm": 0.9008114337921143, "learning_rate": 2.2520197317284702e-06, "loss": 0.1172, "step": 10104 }, { "epoch": 1.6372326636422554, "grad_norm": 0.8030856251716614, "learning_rate": 2.2515845880763102e-06, "loss": 0.099, "step": 10105 }, { "epoch": 1.637394685677252, "grad_norm": 0.8751403093338013, "learning_rate": 2.2511494520250613e-06, "loss": 0.1087, "step": 10106 }, { "epoch": 1.6375567077122488, "grad_norm": 0.8014907836914062, "learning_rate": 2.2507143235880364e-06, "loss": 0.0864, "step": 10107 }, { "epoch": 1.6377187297472457, "grad_norm": 0.7244542241096497, "learning_rate": 2.2502792027785508e-06, "loss": 0.0942, "step": 10108 }, { "epoch": 1.6378807517822422, "grad_norm": 0.9491717219352722, "learning_rate": 2.249844089609916e-06, "loss": 0.119, "step": 10109 }, { "epoch": 1.6380427738172392, "grad_norm": 0.730972170829773, "learning_rate": 2.249408984095447e-06, "loss": 0.0864, "step": 10110 }, { "epoch": 1.6382047958522359, "grad_norm": 0.7497561573982239, "learning_rate": 2.248973886248457e-06, "loss": 0.1, "step": 10111 }, { "epoch": 1.6383668178872326, "grad_norm": 0.8914108276367188, "learning_rate": 2.248538796082259e-06, "loss": 0.1044, "step": 10112 }, { "epoch": 1.6385288399222295, "grad_norm": 0.8104687929153442, "learning_rate": 2.248103713610164e-06, "loss": 0.104, "step": 10113 }, { "epoch": 1.638690861957226, "grad_norm": 0.851868748664856, "learning_rate": 2.2476686388454867e-06, "loss": 0.1021, "step": 10114 }, { "epoch": 1.638852883992223, "grad_norm": 0.8389064073562622, "learning_rate": 2.247233571801539e-06, "loss": 0.1069, "step": 10115 }, { "epoch": 1.6390149060272197, "grad_norm": 0.7837998867034912, "learning_rate": 2.2467985124916314e-06, "loss": 0.0959, "step": 10116 }, { "epoch": 1.6391769280622164, "grad_norm": 0.7479491233825684, "learning_rate": 2.2463634609290776e-06, "loss": 0.1026, "step": 10117 }, { "epoch": 1.6393389500972133, "grad_norm": 0.7724059820175171, "learning_rate": 2.2459284171271863e-06, "loss": 0.1027, "step": 10118 }, { "epoch": 1.63950097213221, "grad_norm": 0.7824938893318176, "learning_rate": 2.245493381099272e-06, "loss": 0.1015, "step": 10119 }, { "epoch": 1.6396629941672067, "grad_norm": 0.8370165824890137, "learning_rate": 2.2450583528586437e-06, "loss": 0.112, "step": 10120 }, { "epoch": 1.6398250162022034, "grad_norm": 0.8152546286582947, "learning_rate": 2.244623332418614e-06, "loss": 0.1056, "step": 10121 }, { "epoch": 1.6399870382372002, "grad_norm": 0.7992122173309326, "learning_rate": 2.244188319792491e-06, "loss": 0.0924, "step": 10122 }, { "epoch": 1.640149060272197, "grad_norm": 0.7578029036521912, "learning_rate": 2.243753314993586e-06, "loss": 0.0947, "step": 10123 }, { "epoch": 1.6403110823071938, "grad_norm": 0.8097305297851562, "learning_rate": 2.243318318035211e-06, "loss": 0.1012, "step": 10124 }, { "epoch": 1.6404731043421905, "grad_norm": 0.7099080681800842, "learning_rate": 2.2428833289306735e-06, "loss": 0.0909, "step": 10125 }, { "epoch": 1.6406351263771874, "grad_norm": 0.8755943179130554, "learning_rate": 2.2424483476932847e-06, "loss": 0.1099, "step": 10126 }, { "epoch": 1.640797148412184, "grad_norm": 0.7338682413101196, "learning_rate": 2.2420133743363524e-06, "loss": 0.0978, "step": 10127 }, { "epoch": 1.6409591704471809, "grad_norm": 0.7446590065956116, "learning_rate": 2.241578408873186e-06, "loss": 0.0917, "step": 10128 }, { "epoch": 1.6411211924821776, "grad_norm": 0.8787501454353333, "learning_rate": 2.2411434513170955e-06, "loss": 0.1037, "step": 10129 }, { "epoch": 1.6412832145171743, "grad_norm": 0.73140549659729, "learning_rate": 2.2407085016813895e-06, "loss": 0.0915, "step": 10130 }, { "epoch": 1.6414452365521712, "grad_norm": 0.6761218905448914, "learning_rate": 2.2402735599793754e-06, "loss": 0.0846, "step": 10131 }, { "epoch": 1.6416072585871677, "grad_norm": 0.9457839131355286, "learning_rate": 2.239838626224361e-06, "loss": 0.1162, "step": 10132 }, { "epoch": 1.6417692806221647, "grad_norm": 0.7481329441070557, "learning_rate": 2.2394037004296566e-06, "loss": 0.0894, "step": 10133 }, { "epoch": 1.6419313026571614, "grad_norm": 0.8582826852798462, "learning_rate": 2.2389687826085675e-06, "loss": 0.1031, "step": 10134 }, { "epoch": 1.642093324692158, "grad_norm": 0.9034931063652039, "learning_rate": 2.2385338727744027e-06, "loss": 0.1094, "step": 10135 }, { "epoch": 1.642255346727155, "grad_norm": 0.8110890984535217, "learning_rate": 2.238098970940468e-06, "loss": 0.0943, "step": 10136 }, { "epoch": 1.6424173687621515, "grad_norm": 0.7057623863220215, "learning_rate": 2.23766407712007e-06, "loss": 0.0871, "step": 10137 }, { "epoch": 1.6425793907971484, "grad_norm": 0.8530978560447693, "learning_rate": 2.2372291913265177e-06, "loss": 0.0934, "step": 10138 }, { "epoch": 1.6427414128321451, "grad_norm": 0.8763788342475891, "learning_rate": 2.2367943135731164e-06, "loss": 0.1104, "step": 10139 }, { "epoch": 1.6429034348671419, "grad_norm": 0.8446993827819824, "learning_rate": 2.236359443873172e-06, "loss": 0.1086, "step": 10140 }, { "epoch": 1.6430654569021388, "grad_norm": 0.9074305891990662, "learning_rate": 2.2359245822399908e-06, "loss": 0.1015, "step": 10141 }, { "epoch": 1.6432274789371355, "grad_norm": 0.8572295904159546, "learning_rate": 2.2354897286868773e-06, "loss": 0.1, "step": 10142 }, { "epoch": 1.6433895009721322, "grad_norm": 0.8138932585716248, "learning_rate": 2.2350548832271386e-06, "loss": 0.0973, "step": 10143 }, { "epoch": 1.643551523007129, "grad_norm": 0.9844521880149841, "learning_rate": 2.23462004587408e-06, "loss": 0.1023, "step": 10144 }, { "epoch": 1.6437135450421256, "grad_norm": 0.794780433177948, "learning_rate": 2.2341852166410048e-06, "loss": 0.0918, "step": 10145 }, { "epoch": 1.6438755670771226, "grad_norm": 0.8207656145095825, "learning_rate": 2.233750395541219e-06, "loss": 0.0913, "step": 10146 }, { "epoch": 1.6440375891121193, "grad_norm": 0.7891299724578857, "learning_rate": 2.233315582588026e-06, "loss": 0.0977, "step": 10147 }, { "epoch": 1.644199611147116, "grad_norm": 0.7756161689758301, "learning_rate": 2.2328807777947323e-06, "loss": 0.094, "step": 10148 }, { "epoch": 1.644361633182113, "grad_norm": 0.9181487560272217, "learning_rate": 2.23244598117464e-06, "loss": 0.1011, "step": 10149 }, { "epoch": 1.6445236552171094, "grad_norm": 0.7459124326705933, "learning_rate": 2.232011192741053e-06, "loss": 0.0858, "step": 10150 }, { "epoch": 1.6446856772521063, "grad_norm": 0.8103591203689575, "learning_rate": 2.231576412507275e-06, "loss": 0.1069, "step": 10151 }, { "epoch": 1.644847699287103, "grad_norm": 0.9799508452415466, "learning_rate": 2.2311416404866085e-06, "loss": 0.1186, "step": 10152 }, { "epoch": 1.6450097213220998, "grad_norm": 0.7545525431632996, "learning_rate": 2.2307068766923584e-06, "loss": 0.0958, "step": 10153 }, { "epoch": 1.6451717433570967, "grad_norm": 0.8151463866233826, "learning_rate": 2.2302721211378254e-06, "loss": 0.0976, "step": 10154 }, { "epoch": 1.6453337653920932, "grad_norm": 0.8132693767547607, "learning_rate": 2.229837373836313e-06, "loss": 0.1047, "step": 10155 }, { "epoch": 1.6454957874270901, "grad_norm": 0.6447371244430542, "learning_rate": 2.2294026348011223e-06, "loss": 0.0838, "step": 10156 }, { "epoch": 1.6456578094620868, "grad_norm": 0.8918012380599976, "learning_rate": 2.228967904045558e-06, "loss": 0.1196, "step": 10157 }, { "epoch": 1.6458198314970836, "grad_norm": 0.7891850471496582, "learning_rate": 2.2285331815829187e-06, "loss": 0.1002, "step": 10158 }, { "epoch": 1.6459818535320805, "grad_norm": 0.7595945000648499, "learning_rate": 2.2280984674265077e-06, "loss": 0.0927, "step": 10159 }, { "epoch": 1.646143875567077, "grad_norm": 0.8276386857032776, "learning_rate": 2.227663761589625e-06, "loss": 0.1031, "step": 10160 }, { "epoch": 1.646305897602074, "grad_norm": 0.8383728861808777, "learning_rate": 2.227229064085572e-06, "loss": 0.0979, "step": 10161 }, { "epoch": 1.6464679196370706, "grad_norm": 0.6650486588478088, "learning_rate": 2.2267943749276503e-06, "loss": 0.0886, "step": 10162 }, { "epoch": 1.6466299416720673, "grad_norm": 0.7667943835258484, "learning_rate": 2.2263596941291595e-06, "loss": 0.0959, "step": 10163 }, { "epoch": 1.6467919637070643, "grad_norm": 0.9279943108558655, "learning_rate": 2.225925021703399e-06, "loss": 0.1164, "step": 10164 }, { "epoch": 1.6469539857420608, "grad_norm": 0.608851969242096, "learning_rate": 2.2254903576636713e-06, "loss": 0.076, "step": 10165 }, { "epoch": 1.6471160077770577, "grad_norm": 0.7714102268218994, "learning_rate": 2.2250557020232724e-06, "loss": 0.0894, "step": 10166 }, { "epoch": 1.6472780298120544, "grad_norm": 0.9038023352622986, "learning_rate": 2.2246210547955043e-06, "loss": 0.1038, "step": 10167 }, { "epoch": 1.6474400518470511, "grad_norm": 0.8809518814086914, "learning_rate": 2.2241864159936664e-06, "loss": 0.1081, "step": 10168 }, { "epoch": 1.647602073882048, "grad_norm": 0.7901051640510559, "learning_rate": 2.2237517856310558e-06, "loss": 0.0955, "step": 10169 }, { "epoch": 1.6477640959170448, "grad_norm": 0.8911927938461304, "learning_rate": 2.223317163720973e-06, "loss": 0.1086, "step": 10170 }, { "epoch": 1.6479261179520415, "grad_norm": 0.7963345050811768, "learning_rate": 2.2228825502767133e-06, "loss": 0.0976, "step": 10171 }, { "epoch": 1.6480881399870384, "grad_norm": 0.902242124080658, "learning_rate": 2.222447945311579e-06, "loss": 0.1149, "step": 10172 }, { "epoch": 1.648250162022035, "grad_norm": 0.8996853232383728, "learning_rate": 2.2220133488388652e-06, "loss": 0.118, "step": 10173 }, { "epoch": 1.6484121840570318, "grad_norm": 0.9371250867843628, "learning_rate": 2.2215787608718706e-06, "loss": 0.1137, "step": 10174 }, { "epoch": 1.6485742060920285, "grad_norm": 0.9435984492301941, "learning_rate": 2.221144181423892e-06, "loss": 0.1131, "step": 10175 }, { "epoch": 1.6487362281270252, "grad_norm": 0.7899986505508423, "learning_rate": 2.220709610508226e-06, "loss": 0.095, "step": 10176 }, { "epoch": 1.6488982501620222, "grad_norm": 0.7958756685256958, "learning_rate": 2.220275048138171e-06, "loss": 0.096, "step": 10177 }, { "epoch": 1.6490602721970187, "grad_norm": 0.7749899625778198, "learning_rate": 2.2198404943270217e-06, "loss": 0.0984, "step": 10178 }, { "epoch": 1.6492222942320156, "grad_norm": 0.7689141631126404, "learning_rate": 2.2194059490880764e-06, "loss": 0.0908, "step": 10179 }, { "epoch": 1.6493843162670123, "grad_norm": 0.869452714920044, "learning_rate": 2.218971412434628e-06, "loss": 0.1048, "step": 10180 }, { "epoch": 1.649546338302009, "grad_norm": 0.840836226940155, "learning_rate": 2.2185368843799764e-06, "loss": 0.1033, "step": 10181 }, { "epoch": 1.649708360337006, "grad_norm": 0.8797898888587952, "learning_rate": 2.218102364937414e-06, "loss": 0.1093, "step": 10182 }, { "epoch": 1.6498703823720025, "grad_norm": 0.7758781909942627, "learning_rate": 2.217667854120238e-06, "loss": 0.094, "step": 10183 }, { "epoch": 1.6500324044069994, "grad_norm": 0.830178439617157, "learning_rate": 2.2172333519417415e-06, "loss": 0.1035, "step": 10184 }, { "epoch": 1.650194426441996, "grad_norm": 0.8420971632003784, "learning_rate": 2.2167988584152198e-06, "loss": 0.1108, "step": 10185 }, { "epoch": 1.6503564484769928, "grad_norm": 0.8024832606315613, "learning_rate": 2.2163643735539688e-06, "loss": 0.0986, "step": 10186 }, { "epoch": 1.6505184705119897, "grad_norm": 0.8306187391281128, "learning_rate": 2.215929897371281e-06, "loss": 0.1036, "step": 10187 }, { "epoch": 1.6506804925469862, "grad_norm": 0.7863216400146484, "learning_rate": 2.2154954298804514e-06, "loss": 0.1019, "step": 10188 }, { "epoch": 1.6508425145819832, "grad_norm": 0.7025604248046875, "learning_rate": 2.215060971094773e-06, "loss": 0.0838, "step": 10189 }, { "epoch": 1.6510045366169799, "grad_norm": 0.9769557118415833, "learning_rate": 2.214626521027538e-06, "loss": 0.0856, "step": 10190 }, { "epoch": 1.6511665586519766, "grad_norm": 0.7868211269378662, "learning_rate": 2.214192079692042e-06, "loss": 0.0988, "step": 10191 }, { "epoch": 1.6513285806869735, "grad_norm": 0.8536260724067688, "learning_rate": 2.213757647101577e-06, "loss": 0.1069, "step": 10192 }, { "epoch": 1.6514906027219702, "grad_norm": 0.8652046322822571, "learning_rate": 2.2133232232694354e-06, "loss": 0.1003, "step": 10193 }, { "epoch": 1.651652624756967, "grad_norm": 0.7852055430412292, "learning_rate": 2.2128888082089093e-06, "loss": 0.0992, "step": 10194 }, { "epoch": 1.6518146467919637, "grad_norm": 0.7507728338241577, "learning_rate": 2.2124544019332898e-06, "loss": 0.0894, "step": 10195 }, { "epoch": 1.6519766688269604, "grad_norm": 0.8173365592956543, "learning_rate": 2.2120200044558705e-06, "loss": 0.1081, "step": 10196 }, { "epoch": 1.6521386908619573, "grad_norm": 0.9333679676055908, "learning_rate": 2.211585615789943e-06, "loss": 0.1155, "step": 10197 }, { "epoch": 1.652300712896954, "grad_norm": 0.861423671245575, "learning_rate": 2.2111512359487967e-06, "loss": 0.0958, "step": 10198 }, { "epoch": 1.6524627349319507, "grad_norm": 0.7481124997138977, "learning_rate": 2.2107168649457233e-06, "loss": 0.097, "step": 10199 }, { "epoch": 1.6526247569669477, "grad_norm": 0.8404136896133423, "learning_rate": 2.2102825027940143e-06, "loss": 0.0984, "step": 10200 }, { "epoch": 1.6527867790019442, "grad_norm": 0.7924706339836121, "learning_rate": 2.20984814950696e-06, "loss": 0.103, "step": 10201 }, { "epoch": 1.652948801036941, "grad_norm": 0.8639355301856995, "learning_rate": 2.2094138050978496e-06, "loss": 0.1058, "step": 10202 }, { "epoch": 1.6531108230719378, "grad_norm": 0.7134700417518616, "learning_rate": 2.2089794695799744e-06, "loss": 0.093, "step": 10203 }, { "epoch": 1.6532728451069345, "grad_norm": 0.7675890922546387, "learning_rate": 2.2085451429666215e-06, "loss": 0.0912, "step": 10204 }, { "epoch": 1.6534348671419314, "grad_norm": 0.9659287929534912, "learning_rate": 2.208110825271083e-06, "loss": 0.1052, "step": 10205 }, { "epoch": 1.653596889176928, "grad_norm": 0.821732759475708, "learning_rate": 2.207676516506647e-06, "loss": 0.0996, "step": 10206 }, { "epoch": 1.6537589112119249, "grad_norm": 0.8499715924263, "learning_rate": 2.2072422166866024e-06, "loss": 0.1036, "step": 10207 }, { "epoch": 1.6539209332469216, "grad_norm": 0.8570707440376282, "learning_rate": 2.206807925824237e-06, "loss": 0.0909, "step": 10208 }, { "epoch": 1.6540829552819183, "grad_norm": 0.8913640975952148, "learning_rate": 2.206373643932839e-06, "loss": 0.1169, "step": 10209 }, { "epoch": 1.6542449773169152, "grad_norm": 0.7439228296279907, "learning_rate": 2.205939371025698e-06, "loss": 0.0964, "step": 10210 }, { "epoch": 1.6544069993519117, "grad_norm": 0.8434868454933167, "learning_rate": 2.2055051071161e-06, "loss": 0.1139, "step": 10211 }, { "epoch": 1.6545690213869086, "grad_norm": 0.8420630097389221, "learning_rate": 2.205070852217334e-06, "loss": 0.1057, "step": 10212 }, { "epoch": 1.6547310434219054, "grad_norm": 1.035088062286377, "learning_rate": 2.204636606342685e-06, "loss": 0.112, "step": 10213 }, { "epoch": 1.654893065456902, "grad_norm": 0.7095579504966736, "learning_rate": 2.204202369505441e-06, "loss": 0.0954, "step": 10214 }, { "epoch": 1.655055087491899, "grad_norm": 0.8334465622901917, "learning_rate": 2.2037681417188895e-06, "loss": 0.1064, "step": 10215 }, { "epoch": 1.6552171095268955, "grad_norm": 0.7914650440216064, "learning_rate": 2.203333922996316e-06, "loss": 0.1005, "step": 10216 }, { "epoch": 1.6553791315618924, "grad_norm": 0.6868200898170471, "learning_rate": 2.2028997133510065e-06, "loss": 0.0868, "step": 10217 }, { "epoch": 1.6555411535968891, "grad_norm": 0.7120256423950195, "learning_rate": 2.202465512796247e-06, "loss": 0.0839, "step": 10218 }, { "epoch": 1.6557031756318858, "grad_norm": 0.8592171669006348, "learning_rate": 2.2020313213453216e-06, "loss": 0.1126, "step": 10219 }, { "epoch": 1.6558651976668828, "grad_norm": 0.8605828285217285, "learning_rate": 2.2015971390115172e-06, "loss": 0.0982, "step": 10220 }, { "epoch": 1.6560272197018795, "grad_norm": 0.75331050157547, "learning_rate": 2.2011629658081194e-06, "loss": 0.0965, "step": 10221 }, { "epoch": 1.6561892417368762, "grad_norm": 0.8181818127632141, "learning_rate": 2.2007288017484105e-06, "loss": 0.0948, "step": 10222 }, { "epoch": 1.6563512637718731, "grad_norm": 0.8317630290985107, "learning_rate": 2.2002946468456758e-06, "loss": 0.0963, "step": 10223 }, { "epoch": 1.6565132858068696, "grad_norm": 0.8039714694023132, "learning_rate": 2.1998605011131997e-06, "loss": 0.0992, "step": 10224 }, { "epoch": 1.6566753078418666, "grad_norm": 0.8288991451263428, "learning_rate": 2.199426364564267e-06, "loss": 0.1062, "step": 10225 }, { "epoch": 1.6568373298768633, "grad_norm": 0.781379222869873, "learning_rate": 2.19899223721216e-06, "loss": 0.0961, "step": 10226 }, { "epoch": 1.65699935191186, "grad_norm": 0.8361613750457764, "learning_rate": 2.1985581190701617e-06, "loss": 0.1011, "step": 10227 }, { "epoch": 1.657161373946857, "grad_norm": 0.8540022373199463, "learning_rate": 2.1981240101515548e-06, "loss": 0.1012, "step": 10228 }, { "epoch": 1.6573233959818534, "grad_norm": 0.7892126441001892, "learning_rate": 2.197689910469623e-06, "loss": 0.087, "step": 10229 }, { "epoch": 1.6574854180168503, "grad_norm": 0.9751836061477661, "learning_rate": 2.1972558200376497e-06, "loss": 0.1122, "step": 10230 }, { "epoch": 1.657647440051847, "grad_norm": 0.9051089882850647, "learning_rate": 2.1968217388689145e-06, "loss": 0.1043, "step": 10231 }, { "epoch": 1.6578094620868438, "grad_norm": 0.783577024936676, "learning_rate": 2.1963876669767008e-06, "loss": 0.094, "step": 10232 }, { "epoch": 1.6579714841218407, "grad_norm": 0.835684597492218, "learning_rate": 2.1959536043742887e-06, "loss": 0.0975, "step": 10233 }, { "epoch": 1.6581335061568372, "grad_norm": 0.8699052929878235, "learning_rate": 2.1955195510749614e-06, "loss": 0.1112, "step": 10234 }, { "epoch": 1.6582955281918341, "grad_norm": 0.8392834067344666, "learning_rate": 2.1950855070919992e-06, "loss": 0.1118, "step": 10235 }, { "epoch": 1.6584575502268308, "grad_norm": 0.7662588357925415, "learning_rate": 2.1946514724386827e-06, "loss": 0.1046, "step": 10236 }, { "epoch": 1.6586195722618275, "grad_norm": 0.8604794144630432, "learning_rate": 2.194217447128292e-06, "loss": 0.1106, "step": 10237 }, { "epoch": 1.6587815942968245, "grad_norm": 0.7735698223114014, "learning_rate": 2.1937834311741066e-06, "loss": 0.091, "step": 10238 }, { "epoch": 1.658943616331821, "grad_norm": 0.8237504959106445, "learning_rate": 2.1933494245894087e-06, "loss": 0.0916, "step": 10239 }, { "epoch": 1.659105638366818, "grad_norm": 0.7927703857421875, "learning_rate": 2.192915427387475e-06, "loss": 0.0967, "step": 10240 }, { "epoch": 1.6592676604018146, "grad_norm": 0.7646491527557373, "learning_rate": 2.1924814395815875e-06, "loss": 0.0996, "step": 10241 }, { "epoch": 1.6594296824368113, "grad_norm": 0.7301465272903442, "learning_rate": 2.1920474611850225e-06, "loss": 0.0896, "step": 10242 }, { "epoch": 1.6595917044718083, "grad_norm": 0.7497079372406006, "learning_rate": 2.19161349221106e-06, "loss": 0.0904, "step": 10243 }, { "epoch": 1.659753726506805, "grad_norm": 0.8112010955810547, "learning_rate": 2.1911795326729784e-06, "loss": 0.0977, "step": 10244 }, { "epoch": 1.6599157485418017, "grad_norm": 0.7948684692382812, "learning_rate": 2.1907455825840568e-06, "loss": 0.0968, "step": 10245 }, { "epoch": 1.6600777705767984, "grad_norm": 0.7585175633430481, "learning_rate": 2.190311641957571e-06, "loss": 0.0896, "step": 10246 }, { "epoch": 1.660239792611795, "grad_norm": 0.7399813532829285, "learning_rate": 2.189877710806799e-06, "loss": 0.0879, "step": 10247 }, { "epoch": 1.660401814646792, "grad_norm": 0.7585265636444092, "learning_rate": 2.18944378914502e-06, "loss": 0.0918, "step": 10248 }, { "epoch": 1.6605638366817888, "grad_norm": 0.9892318844795227, "learning_rate": 2.189009876985509e-06, "loss": 0.1184, "step": 10249 }, { "epoch": 1.6607258587167855, "grad_norm": 0.8371285200119019, "learning_rate": 2.188575974341543e-06, "loss": 0.103, "step": 10250 }, { "epoch": 1.6608878807517824, "grad_norm": 0.7434375882148743, "learning_rate": 2.188142081226399e-06, "loss": 0.0905, "step": 10251 }, { "epoch": 1.6610499027867789, "grad_norm": 0.7424872517585754, "learning_rate": 2.1877081976533515e-06, "loss": 0.0832, "step": 10252 }, { "epoch": 1.6612119248217758, "grad_norm": 0.9748948812484741, "learning_rate": 2.1872743236356783e-06, "loss": 0.1224, "step": 10253 }, { "epoch": 1.6613739468567725, "grad_norm": 0.7499638199806213, "learning_rate": 2.186840459186654e-06, "loss": 0.0857, "step": 10254 }, { "epoch": 1.6615359688917692, "grad_norm": 0.8377159237861633, "learning_rate": 2.186406604319554e-06, "loss": 0.0981, "step": 10255 }, { "epoch": 1.6616979909267662, "grad_norm": 0.8437566161155701, "learning_rate": 2.185972759047653e-06, "loss": 0.0917, "step": 10256 }, { "epoch": 1.6618600129617627, "grad_norm": 0.9447170495986938, "learning_rate": 2.185538923384225e-06, "loss": 0.1206, "step": 10257 }, { "epoch": 1.6620220349967596, "grad_norm": 0.9525909423828125, "learning_rate": 2.1851050973425454e-06, "loss": 0.1178, "step": 10258 }, { "epoch": 1.6621840570317563, "grad_norm": 0.8417654633522034, "learning_rate": 2.1846712809358876e-06, "loss": 0.0978, "step": 10259 }, { "epoch": 1.662346079066753, "grad_norm": 0.8003092408180237, "learning_rate": 2.1842374741775262e-06, "loss": 0.1017, "step": 10260 }, { "epoch": 1.66250810110175, "grad_norm": 0.7186848521232605, "learning_rate": 2.183803677080733e-06, "loss": 0.0841, "step": 10261 }, { "epoch": 1.6626701231367464, "grad_norm": 0.8107516765594482, "learning_rate": 2.1833698896587816e-06, "loss": 0.0997, "step": 10262 }, { "epoch": 1.6628321451717434, "grad_norm": 0.852218508720398, "learning_rate": 2.182936111924947e-06, "loss": 0.1055, "step": 10263 }, { "epoch": 1.66299416720674, "grad_norm": 0.7575379610061646, "learning_rate": 2.1825023438924995e-06, "loss": 0.095, "step": 10264 }, { "epoch": 1.6631561892417368, "grad_norm": 0.7673975825309753, "learning_rate": 2.182068585574712e-06, "loss": 0.1017, "step": 10265 }, { "epoch": 1.6633182112767337, "grad_norm": 0.8737732172012329, "learning_rate": 2.1816348369848555e-06, "loss": 0.1138, "step": 10266 }, { "epoch": 1.6634802333117304, "grad_norm": 0.7601016163825989, "learning_rate": 2.1812010981362033e-06, "loss": 0.0916, "step": 10267 }, { "epoch": 1.6636422553467272, "grad_norm": 0.7540645003318787, "learning_rate": 2.180767369042026e-06, "loss": 0.1005, "step": 10268 }, { "epoch": 1.6638042773817239, "grad_norm": 0.7578513622283936, "learning_rate": 2.180333649715595e-06, "loss": 0.0875, "step": 10269 }, { "epoch": 1.6639662994167206, "grad_norm": 0.682778000831604, "learning_rate": 2.1798999401701802e-06, "loss": 0.0829, "step": 10270 }, { "epoch": 1.6641283214517175, "grad_norm": 0.7566827535629272, "learning_rate": 2.1794662404190526e-06, "loss": 0.0932, "step": 10271 }, { "epoch": 1.6642903434867142, "grad_norm": 0.7969744205474854, "learning_rate": 2.1790325504754827e-06, "loss": 0.099, "step": 10272 }, { "epoch": 1.664452365521711, "grad_norm": 0.7715543508529663, "learning_rate": 2.17859887035274e-06, "loss": 0.0954, "step": 10273 }, { "epoch": 1.6646143875567079, "grad_norm": 0.9869740009307861, "learning_rate": 2.1781652000640947e-06, "loss": 0.1277, "step": 10274 }, { "epoch": 1.6647764095917044, "grad_norm": 0.7891120910644531, "learning_rate": 2.1777315396228145e-06, "loss": 0.1008, "step": 10275 }, { "epoch": 1.6649384316267013, "grad_norm": 0.7361397743225098, "learning_rate": 2.177297889042169e-06, "loss": 0.0878, "step": 10276 }, { "epoch": 1.665100453661698, "grad_norm": 0.8581258654594421, "learning_rate": 2.1768642483354274e-06, "loss": 0.103, "step": 10277 }, { "epoch": 1.6652624756966947, "grad_norm": 0.8369101285934448, "learning_rate": 2.1764306175158588e-06, "loss": 0.0994, "step": 10278 }, { "epoch": 1.6654244977316917, "grad_norm": 1.0208182334899902, "learning_rate": 2.1759969965967293e-06, "loss": 0.1056, "step": 10279 }, { "epoch": 1.6655865197666881, "grad_norm": 0.717728316783905, "learning_rate": 2.1755633855913086e-06, "loss": 0.0869, "step": 10280 }, { "epoch": 1.665748541801685, "grad_norm": 0.7904380559921265, "learning_rate": 2.175129784512862e-06, "loss": 0.0956, "step": 10281 }, { "epoch": 1.6659105638366818, "grad_norm": 0.8746015429496765, "learning_rate": 2.174696193374658e-06, "loss": 0.1046, "step": 10282 }, { "epoch": 1.6660725858716785, "grad_norm": 0.7690415978431702, "learning_rate": 2.1742626121899645e-06, "loss": 0.0932, "step": 10283 }, { "epoch": 1.6662346079066754, "grad_norm": 0.8206772208213806, "learning_rate": 2.173829040972046e-06, "loss": 0.107, "step": 10284 }, { "epoch": 1.666396629941672, "grad_norm": 0.830139696598053, "learning_rate": 2.1733954797341692e-06, "loss": 0.1108, "step": 10285 }, { "epoch": 1.6665586519766689, "grad_norm": 0.7665162086486816, "learning_rate": 2.1729619284896e-06, "loss": 0.096, "step": 10286 }, { "epoch": 1.6667206740116656, "grad_norm": 0.9206477403640747, "learning_rate": 2.1725283872516053e-06, "loss": 0.1022, "step": 10287 }, { "epoch": 1.6668826960466623, "grad_norm": 0.7268409729003906, "learning_rate": 2.1720948560334492e-06, "loss": 0.0966, "step": 10288 }, { "epoch": 1.6670447180816592, "grad_norm": 0.7334338426589966, "learning_rate": 2.171661334848397e-06, "loss": 0.0948, "step": 10289 }, { "epoch": 1.6672067401166557, "grad_norm": 0.661564826965332, "learning_rate": 2.171227823709713e-06, "loss": 0.0766, "step": 10290 }, { "epoch": 1.6673687621516526, "grad_norm": 1.0224709510803223, "learning_rate": 2.1707943226306626e-06, "loss": 0.1268, "step": 10291 }, { "epoch": 1.6675307841866494, "grad_norm": 0.9280935525894165, "learning_rate": 2.1703608316245092e-06, "loss": 0.1148, "step": 10292 }, { "epoch": 1.667692806221646, "grad_norm": 0.8443484306335449, "learning_rate": 2.1699273507045163e-06, "loss": 0.1064, "step": 10293 }, { "epoch": 1.667854828256643, "grad_norm": 0.8637576103210449, "learning_rate": 2.169493879883948e-06, "loss": 0.1034, "step": 10294 }, { "epoch": 1.6680168502916397, "grad_norm": 0.8016158938407898, "learning_rate": 2.169060419176066e-06, "loss": 0.0982, "step": 10295 }, { "epoch": 1.6681788723266364, "grad_norm": 0.7897706627845764, "learning_rate": 2.168626968594136e-06, "loss": 0.0963, "step": 10296 }, { "epoch": 1.6683408943616331, "grad_norm": 0.7999807596206665, "learning_rate": 2.1681935281514182e-06, "loss": 0.0962, "step": 10297 }, { "epoch": 1.6685029163966298, "grad_norm": 0.8183321356773376, "learning_rate": 2.167760097861176e-06, "loss": 0.1015, "step": 10298 }, { "epoch": 1.6686649384316268, "grad_norm": 0.7776053547859192, "learning_rate": 2.16732667773667e-06, "loss": 0.0998, "step": 10299 }, { "epoch": 1.6688269604666235, "grad_norm": 0.983116626739502, "learning_rate": 2.1668932677911624e-06, "loss": 0.1021, "step": 10300 }, { "epoch": 1.6689889825016202, "grad_norm": 0.8453471064567566, "learning_rate": 2.1664598680379158e-06, "loss": 0.0946, "step": 10301 }, { "epoch": 1.6691510045366171, "grad_norm": 0.7629480957984924, "learning_rate": 2.166026478490189e-06, "loss": 0.097, "step": 10302 }, { "epoch": 1.6693130265716136, "grad_norm": 0.7451595664024353, "learning_rate": 2.1655930991612443e-06, "loss": 0.0916, "step": 10303 }, { "epoch": 1.6694750486066106, "grad_norm": 0.7532944083213806, "learning_rate": 2.1651597300643418e-06, "loss": 0.0916, "step": 10304 }, { "epoch": 1.6696370706416073, "grad_norm": 0.7477149963378906, "learning_rate": 2.1647263712127402e-06, "loss": 0.0871, "step": 10305 }, { "epoch": 1.669799092676604, "grad_norm": 0.7762525081634521, "learning_rate": 2.1642930226197012e-06, "loss": 0.0979, "step": 10306 }, { "epoch": 1.669961114711601, "grad_norm": 0.8461878299713135, "learning_rate": 2.1638596842984834e-06, "loss": 0.0979, "step": 10307 }, { "epoch": 1.6701231367465974, "grad_norm": 0.9171335697174072, "learning_rate": 2.1634263562623454e-06, "loss": 0.105, "step": 10308 }, { "epoch": 1.6702851587815943, "grad_norm": 0.7819655537605286, "learning_rate": 2.162993038524547e-06, "loss": 0.0919, "step": 10309 }, { "epoch": 1.670447180816591, "grad_norm": 0.8039749264717102, "learning_rate": 2.162559731098345e-06, "loss": 0.1012, "step": 10310 }, { "epoch": 1.6706092028515878, "grad_norm": 0.7070164084434509, "learning_rate": 2.162126433996999e-06, "loss": 0.085, "step": 10311 }, { "epoch": 1.6707712248865847, "grad_norm": 0.6756847500801086, "learning_rate": 2.161693147233767e-06, "loss": 0.0854, "step": 10312 }, { "epoch": 1.6709332469215812, "grad_norm": 0.8924721479415894, "learning_rate": 2.161259870821906e-06, "loss": 0.1165, "step": 10313 }, { "epoch": 1.6710952689565781, "grad_norm": 0.8302946090698242, "learning_rate": 2.1608266047746723e-06, "loss": 0.1093, "step": 10314 }, { "epoch": 1.6712572909915748, "grad_norm": 0.9837422370910645, "learning_rate": 2.1603933491053243e-06, "loss": 0.1093, "step": 10315 }, { "epoch": 1.6714193130265715, "grad_norm": 0.8880236744880676, "learning_rate": 2.1599601038271186e-06, "loss": 0.1147, "step": 10316 }, { "epoch": 1.6715813350615685, "grad_norm": 0.7705572247505188, "learning_rate": 2.1595268689533105e-06, "loss": 0.0923, "step": 10317 }, { "epoch": 1.6717433570965652, "grad_norm": 0.834516167640686, "learning_rate": 2.1590936444971563e-06, "loss": 0.0956, "step": 10318 }, { "epoch": 1.671905379131562, "grad_norm": 0.8553721904754639, "learning_rate": 2.15866043047191e-06, "loss": 0.1118, "step": 10319 }, { "epoch": 1.6720674011665586, "grad_norm": 0.899256706237793, "learning_rate": 2.1582272268908307e-06, "loss": 0.1104, "step": 10320 }, { "epoch": 1.6722294232015553, "grad_norm": 0.8428982496261597, "learning_rate": 2.1577940337671698e-06, "loss": 0.108, "step": 10321 }, { "epoch": 1.6723914452365523, "grad_norm": 0.8363228440284729, "learning_rate": 2.1573608511141845e-06, "loss": 0.105, "step": 10322 }, { "epoch": 1.672553467271549, "grad_norm": 0.7880693674087524, "learning_rate": 2.1569276789451273e-06, "loss": 0.1044, "step": 10323 }, { "epoch": 1.6727154893065457, "grad_norm": 0.7894632816314697, "learning_rate": 2.1564945172732523e-06, "loss": 0.0986, "step": 10324 }, { "epoch": 1.6728775113415426, "grad_norm": 0.8343285918235779, "learning_rate": 2.1560613661118154e-06, "loss": 0.1084, "step": 10325 }, { "epoch": 1.673039533376539, "grad_norm": 0.8286653757095337, "learning_rate": 2.155628225474067e-06, "loss": 0.1083, "step": 10326 }, { "epoch": 1.673201555411536, "grad_norm": 0.7715649008750916, "learning_rate": 2.1551950953732627e-06, "loss": 0.1026, "step": 10327 }, { "epoch": 1.6733635774465327, "grad_norm": 0.7973687648773193, "learning_rate": 2.154761975822653e-06, "loss": 0.1054, "step": 10328 }, { "epoch": 1.6735255994815295, "grad_norm": 0.7777444124221802, "learning_rate": 2.1543288668354914e-06, "loss": 0.0949, "step": 10329 }, { "epoch": 1.6736876215165264, "grad_norm": 0.8096343278884888, "learning_rate": 2.1538957684250303e-06, "loss": 0.1013, "step": 10330 }, { "epoch": 1.6738496435515229, "grad_norm": 0.751850962638855, "learning_rate": 2.153462680604522e-06, "loss": 0.0921, "step": 10331 }, { "epoch": 1.6740116655865198, "grad_norm": 0.8717265725135803, "learning_rate": 2.1530296033872155e-06, "loss": 0.1096, "step": 10332 }, { "epoch": 1.6741736876215165, "grad_norm": 0.8423851728439331, "learning_rate": 2.152596536786364e-06, "loss": 0.1089, "step": 10333 }, { "epoch": 1.6743357096565132, "grad_norm": 0.7620172500610352, "learning_rate": 2.152163480815218e-06, "loss": 0.095, "step": 10334 }, { "epoch": 1.6744977316915102, "grad_norm": 0.7917490005493164, "learning_rate": 2.151730435487028e-06, "loss": 0.0961, "step": 10335 }, { "epoch": 1.6746597537265067, "grad_norm": 0.8136554956436157, "learning_rate": 2.151297400815044e-06, "loss": 0.1113, "step": 10336 }, { "epoch": 1.6748217757615036, "grad_norm": 0.8516972064971924, "learning_rate": 2.150864376812515e-06, "loss": 0.1168, "step": 10337 }, { "epoch": 1.6749837977965003, "grad_norm": 0.7402398586273193, "learning_rate": 2.150431363492691e-06, "loss": 0.0879, "step": 10338 }, { "epoch": 1.675145819831497, "grad_norm": 0.7780933976173401, "learning_rate": 2.1499983608688217e-06, "loss": 0.0975, "step": 10339 }, { "epoch": 1.675307841866494, "grad_norm": 0.8871686458587646, "learning_rate": 2.1495653689541562e-06, "loss": 0.1125, "step": 10340 }, { "epoch": 1.6754698639014904, "grad_norm": 0.854103147983551, "learning_rate": 2.149132387761942e-06, "loss": 0.113, "step": 10341 }, { "epoch": 1.6756318859364874, "grad_norm": 0.8291221857070923, "learning_rate": 2.1486994173054276e-06, "loss": 0.1044, "step": 10342 }, { "epoch": 1.675793907971484, "grad_norm": 0.838173508644104, "learning_rate": 2.14826645759786e-06, "loss": 0.1034, "step": 10343 }, { "epoch": 1.6759559300064808, "grad_norm": 0.7588802576065063, "learning_rate": 2.1478335086524885e-06, "loss": 0.1, "step": 10344 }, { "epoch": 1.6761179520414777, "grad_norm": 0.7420892119407654, "learning_rate": 2.14740057048256e-06, "loss": 0.0877, "step": 10345 }, { "epoch": 1.6762799740764744, "grad_norm": 0.823940634727478, "learning_rate": 2.14696764310132e-06, "loss": 0.1046, "step": 10346 }, { "epoch": 1.6764419961114712, "grad_norm": 0.8017604351043701, "learning_rate": 2.146534726522016e-06, "loss": 0.1034, "step": 10347 }, { "epoch": 1.6766040181464679, "grad_norm": 0.8648914694786072, "learning_rate": 2.1461018207578932e-06, "loss": 0.1073, "step": 10348 }, { "epoch": 1.6767660401814646, "grad_norm": 0.7010998129844666, "learning_rate": 2.145668925822199e-06, "loss": 0.0923, "step": 10349 }, { "epoch": 1.6769280622164615, "grad_norm": 0.7532062530517578, "learning_rate": 2.1452360417281786e-06, "loss": 0.0892, "step": 10350 }, { "epoch": 1.6770900842514582, "grad_norm": 0.7723690867424011, "learning_rate": 2.1448031684890767e-06, "loss": 0.0971, "step": 10351 }, { "epoch": 1.677252106286455, "grad_norm": 0.7597529292106628, "learning_rate": 2.144370306118138e-06, "loss": 0.095, "step": 10352 }, { "epoch": 1.6774141283214519, "grad_norm": 0.9111921787261963, "learning_rate": 2.1439374546286065e-06, "loss": 0.0962, "step": 10353 }, { "epoch": 1.6775761503564484, "grad_norm": 0.7498681545257568, "learning_rate": 2.143504614033728e-06, "loss": 0.0917, "step": 10354 }, { "epoch": 1.6777381723914453, "grad_norm": 0.7228400707244873, "learning_rate": 2.143071784346746e-06, "loss": 0.0927, "step": 10355 }, { "epoch": 1.677900194426442, "grad_norm": 0.6969931721687317, "learning_rate": 2.142638965580903e-06, "loss": 0.0866, "step": 10356 }, { "epoch": 1.6780622164614387, "grad_norm": 0.8031118512153625, "learning_rate": 2.1422061577494427e-06, "loss": 0.1015, "step": 10357 }, { "epoch": 1.6782242384964356, "grad_norm": 0.8556954264640808, "learning_rate": 2.141773360865609e-06, "loss": 0.1002, "step": 10358 }, { "epoch": 1.6783862605314321, "grad_norm": 0.8014951348304749, "learning_rate": 2.1413405749426432e-06, "loss": 0.0944, "step": 10359 }, { "epoch": 1.678548282566429, "grad_norm": 1.1695481538772583, "learning_rate": 2.1409077999937883e-06, "loss": 0.1157, "step": 10360 }, { "epoch": 1.6787103046014258, "grad_norm": 0.8790541887283325, "learning_rate": 2.1404750360322852e-06, "loss": 0.1036, "step": 10361 }, { "epoch": 1.6788723266364225, "grad_norm": 0.8709704279899597, "learning_rate": 2.1400422830713752e-06, "loss": 0.1126, "step": 10362 }, { "epoch": 1.6790343486714194, "grad_norm": 0.8556258678436279, "learning_rate": 2.139609541124301e-06, "loss": 0.1063, "step": 10363 }, { "epoch": 1.679196370706416, "grad_norm": 0.779319703578949, "learning_rate": 2.1391768102043032e-06, "loss": 0.0961, "step": 10364 }, { "epoch": 1.6793583927414129, "grad_norm": 0.8869314789772034, "learning_rate": 2.138744090324621e-06, "loss": 0.1104, "step": 10365 }, { "epoch": 1.6795204147764096, "grad_norm": 0.9177558422088623, "learning_rate": 2.1383113814984967e-06, "loss": 0.1191, "step": 10366 }, { "epoch": 1.6796824368114063, "grad_norm": 0.7949991822242737, "learning_rate": 2.1378786837391673e-06, "loss": 0.1051, "step": 10367 }, { "epoch": 1.6798444588464032, "grad_norm": 0.8216454982757568, "learning_rate": 2.137445997059874e-06, "loss": 0.0968, "step": 10368 }, { "epoch": 1.6800064808814, "grad_norm": 0.7865923643112183, "learning_rate": 2.1370133214738573e-06, "loss": 0.1002, "step": 10369 }, { "epoch": 1.6801685029163966, "grad_norm": 0.7961848378181458, "learning_rate": 2.1365806569943533e-06, "loss": 0.0945, "step": 10370 }, { "epoch": 1.6803305249513933, "grad_norm": 0.843292236328125, "learning_rate": 2.1361480036346025e-06, "loss": 0.1049, "step": 10371 }, { "epoch": 1.68049254698639, "grad_norm": 0.9252605438232422, "learning_rate": 2.1357153614078407e-06, "loss": 0.114, "step": 10372 }, { "epoch": 1.680654569021387, "grad_norm": 0.7975597977638245, "learning_rate": 2.135282730327309e-06, "loss": 0.0955, "step": 10373 }, { "epoch": 1.6808165910563837, "grad_norm": 0.9786580204963684, "learning_rate": 2.1348501104062423e-06, "loss": 0.1155, "step": 10374 }, { "epoch": 1.6809786130913804, "grad_norm": 0.9285010695457458, "learning_rate": 2.1344175016578796e-06, "loss": 0.1207, "step": 10375 }, { "epoch": 1.6811406351263773, "grad_norm": 0.7795856595039368, "learning_rate": 2.1339849040954556e-06, "loss": 0.0978, "step": 10376 }, { "epoch": 1.6813026571613738, "grad_norm": 1.0095703601837158, "learning_rate": 2.133552317732208e-06, "loss": 0.1229, "step": 10377 }, { "epoch": 1.6814646791963708, "grad_norm": 0.7144725918769836, "learning_rate": 2.133119742581373e-06, "loss": 0.0896, "step": 10378 }, { "epoch": 1.6816267012313675, "grad_norm": 0.7718963623046875, "learning_rate": 2.1326871786561856e-06, "loss": 0.0949, "step": 10379 }, { "epoch": 1.6817887232663642, "grad_norm": 0.8956825733184814, "learning_rate": 2.1322546259698823e-06, "loss": 0.1074, "step": 10380 }, { "epoch": 1.6819507453013611, "grad_norm": 0.8126304745674133, "learning_rate": 2.131822084535696e-06, "loss": 0.1, "step": 10381 }, { "epoch": 1.6821127673363576, "grad_norm": 0.8905256986618042, "learning_rate": 2.1313895543668644e-06, "loss": 0.1035, "step": 10382 }, { "epoch": 1.6822747893713546, "grad_norm": 0.7790430188179016, "learning_rate": 2.13095703547662e-06, "loss": 0.0894, "step": 10383 }, { "epoch": 1.6824368114063513, "grad_norm": 0.7982741594314575, "learning_rate": 2.1305245278781977e-06, "loss": 0.1081, "step": 10384 }, { "epoch": 1.682598833441348, "grad_norm": 0.790199875831604, "learning_rate": 2.1300920315848307e-06, "loss": 0.1041, "step": 10385 }, { "epoch": 1.682760855476345, "grad_norm": 0.8125954866409302, "learning_rate": 2.129659546609751e-06, "loss": 0.0963, "step": 10386 }, { "epoch": 1.6829228775113414, "grad_norm": 0.9038329124450684, "learning_rate": 2.1292270729661946e-06, "loss": 0.1123, "step": 10387 }, { "epoch": 1.6830848995463383, "grad_norm": 0.8060694336891174, "learning_rate": 2.1287946106673916e-06, "loss": 0.0908, "step": 10388 }, { "epoch": 1.683246921581335, "grad_norm": 0.7719234824180603, "learning_rate": 2.128362159726576e-06, "loss": 0.097, "step": 10389 }, { "epoch": 1.6834089436163318, "grad_norm": 1.3537698984146118, "learning_rate": 2.1279297201569787e-06, "loss": 0.0971, "step": 10390 }, { "epoch": 1.6835709656513287, "grad_norm": 0.8270101547241211, "learning_rate": 2.1274972919718305e-06, "loss": 0.1124, "step": 10391 }, { "epoch": 1.6837329876863252, "grad_norm": 0.8727843761444092, "learning_rate": 2.127064875184365e-06, "loss": 0.1085, "step": 10392 }, { "epoch": 1.6838950097213221, "grad_norm": 0.8102154731750488, "learning_rate": 2.1266324698078116e-06, "loss": 0.0979, "step": 10393 }, { "epoch": 1.6840570317563188, "grad_norm": 0.7432061433792114, "learning_rate": 2.126200075855401e-06, "loss": 0.0951, "step": 10394 }, { "epoch": 1.6842190537913155, "grad_norm": 0.7901977300643921, "learning_rate": 2.1257676933403637e-06, "loss": 0.1051, "step": 10395 }, { "epoch": 1.6843810758263125, "grad_norm": 0.84058678150177, "learning_rate": 2.125335322275928e-06, "loss": 0.11, "step": 10396 }, { "epoch": 1.6845430978613092, "grad_norm": 0.6626092195510864, "learning_rate": 2.124902962675326e-06, "loss": 0.0884, "step": 10397 }, { "epoch": 1.684705119896306, "grad_norm": 0.7434554100036621, "learning_rate": 2.1244706145517853e-06, "loss": 0.0998, "step": 10398 }, { "epoch": 1.6848671419313026, "grad_norm": 0.7729745507240295, "learning_rate": 2.124038277918536e-06, "loss": 0.0976, "step": 10399 }, { "epoch": 1.6850291639662993, "grad_norm": 0.6964062452316284, "learning_rate": 2.1236059527888044e-06, "loss": 0.0883, "step": 10400 }, { "epoch": 1.6851911860012962, "grad_norm": 0.8118695616722107, "learning_rate": 2.1231736391758195e-06, "loss": 0.1021, "step": 10401 }, { "epoch": 1.685353208036293, "grad_norm": 0.7241361737251282, "learning_rate": 2.1227413370928106e-06, "loss": 0.0955, "step": 10402 }, { "epoch": 1.6855152300712897, "grad_norm": 0.8646603226661682, "learning_rate": 2.1223090465530032e-06, "loss": 0.0978, "step": 10403 }, { "epoch": 1.6856772521062866, "grad_norm": 0.8380978107452393, "learning_rate": 2.1218767675696255e-06, "loss": 0.1027, "step": 10404 }, { "epoch": 1.685839274141283, "grad_norm": 0.8121195435523987, "learning_rate": 2.1214445001559025e-06, "loss": 0.0994, "step": 10405 }, { "epoch": 1.68600129617628, "grad_norm": 0.7910208106040955, "learning_rate": 2.1210122443250625e-06, "loss": 0.0954, "step": 10406 }, { "epoch": 1.6861633182112767, "grad_norm": 0.8031769394874573, "learning_rate": 2.1205800000903305e-06, "loss": 0.1046, "step": 10407 }, { "epoch": 1.6863253402462735, "grad_norm": 0.7870137095451355, "learning_rate": 2.1201477674649326e-06, "loss": 0.1009, "step": 10408 }, { "epoch": 1.6864873622812704, "grad_norm": 0.8784118294715881, "learning_rate": 2.1197155464620934e-06, "loss": 0.1071, "step": 10409 }, { "epoch": 1.6866493843162669, "grad_norm": 0.7897698283195496, "learning_rate": 2.119283337095038e-06, "loss": 0.0943, "step": 10410 }, { "epoch": 1.6868114063512638, "grad_norm": 0.770966112613678, "learning_rate": 2.118851139376992e-06, "loss": 0.0914, "step": 10411 }, { "epoch": 1.6869734283862605, "grad_norm": 0.8083310127258301, "learning_rate": 2.1184189533211783e-06, "loss": 0.099, "step": 10412 }, { "epoch": 1.6871354504212572, "grad_norm": 0.8235817551612854, "learning_rate": 2.117986778940822e-06, "loss": 0.1128, "step": 10413 }, { "epoch": 1.6872974724562542, "grad_norm": 0.7883199453353882, "learning_rate": 2.117554616249145e-06, "loss": 0.0981, "step": 10414 }, { "epoch": 1.6874594944912507, "grad_norm": 0.8687751293182373, "learning_rate": 2.11712246525937e-06, "loss": 0.1017, "step": 10415 }, { "epoch": 1.6876215165262476, "grad_norm": 0.863337516784668, "learning_rate": 2.1166903259847228e-06, "loss": 0.111, "step": 10416 }, { "epoch": 1.6877835385612443, "grad_norm": 0.7061415314674377, "learning_rate": 2.116258198438424e-06, "loss": 0.0925, "step": 10417 }, { "epoch": 1.687945560596241, "grad_norm": 0.7574256658554077, "learning_rate": 2.115826082633695e-06, "loss": 0.0906, "step": 10418 }, { "epoch": 1.688107582631238, "grad_norm": 0.7754940390586853, "learning_rate": 2.115393978583759e-06, "loss": 0.1083, "step": 10419 }, { "epoch": 1.6882696046662347, "grad_norm": 0.799916684627533, "learning_rate": 2.114961886301835e-06, "loss": 0.0945, "step": 10420 }, { "epoch": 1.6884316267012314, "grad_norm": 0.9458333849906921, "learning_rate": 2.114529805801147e-06, "loss": 0.1102, "step": 10421 }, { "epoch": 1.688593648736228, "grad_norm": 0.7674143314361572, "learning_rate": 2.114097737094914e-06, "loss": 0.0925, "step": 10422 }, { "epoch": 1.6887556707712248, "grad_norm": 1.0405738353729248, "learning_rate": 2.1136656801963556e-06, "loss": 0.1039, "step": 10423 }, { "epoch": 1.6889176928062217, "grad_norm": 0.9508488178253174, "learning_rate": 2.1132336351186923e-06, "loss": 0.1144, "step": 10424 }, { "epoch": 1.6890797148412184, "grad_norm": 0.8733676075935364, "learning_rate": 2.1128016018751444e-06, "loss": 0.1035, "step": 10425 }, { "epoch": 1.6892417368762151, "grad_norm": 0.8814464807510376, "learning_rate": 2.1123695804789307e-06, "loss": 0.1017, "step": 10426 }, { "epoch": 1.689403758911212, "grad_norm": 0.7157515287399292, "learning_rate": 2.1119375709432696e-06, "loss": 0.0853, "step": 10427 }, { "epoch": 1.6895657809462086, "grad_norm": 0.8367986083030701, "learning_rate": 2.11150557328138e-06, "loss": 0.1023, "step": 10428 }, { "epoch": 1.6897278029812055, "grad_norm": 0.8281162977218628, "learning_rate": 2.1110735875064787e-06, "loss": 0.1074, "step": 10429 }, { "epoch": 1.6898898250162022, "grad_norm": 0.8819671273231506, "learning_rate": 2.110641613631785e-06, "loss": 0.1114, "step": 10430 }, { "epoch": 1.690051847051199, "grad_norm": 0.796600341796875, "learning_rate": 2.1102096516705165e-06, "loss": 0.099, "step": 10431 }, { "epoch": 1.6902138690861959, "grad_norm": 0.8326752185821533, "learning_rate": 2.109777701635889e-06, "loss": 0.1044, "step": 10432 }, { "epoch": 1.6903758911211924, "grad_norm": 0.9205048084259033, "learning_rate": 2.109345763541119e-06, "loss": 0.1107, "step": 10433 }, { "epoch": 1.6905379131561893, "grad_norm": 0.799943745136261, "learning_rate": 2.1089138373994226e-06, "loss": 0.1003, "step": 10434 }, { "epoch": 1.690699935191186, "grad_norm": 0.8054278492927551, "learning_rate": 2.1084819232240177e-06, "loss": 0.0972, "step": 10435 }, { "epoch": 1.6908619572261827, "grad_norm": 0.7532472014427185, "learning_rate": 2.108050021028118e-06, "loss": 0.0929, "step": 10436 }, { "epoch": 1.6910239792611796, "grad_norm": 0.7412585020065308, "learning_rate": 2.1076181308249396e-06, "loss": 0.0968, "step": 10437 }, { "epoch": 1.6911860012961761, "grad_norm": 0.8518087267875671, "learning_rate": 2.1071862526276963e-06, "loss": 0.0933, "step": 10438 }, { "epoch": 1.691348023331173, "grad_norm": 0.7559210658073425, "learning_rate": 2.1067543864496028e-06, "loss": 0.0815, "step": 10439 }, { "epoch": 1.6915100453661698, "grad_norm": 0.7951977849006653, "learning_rate": 2.1063225323038744e-06, "loss": 0.0965, "step": 10440 }, { "epoch": 1.6916720674011665, "grad_norm": 0.7670897841453552, "learning_rate": 2.1058906902037228e-06, "loss": 0.0946, "step": 10441 }, { "epoch": 1.6918340894361634, "grad_norm": 0.7800466418266296, "learning_rate": 2.1054588601623634e-06, "loss": 0.0971, "step": 10442 }, { "epoch": 1.69199611147116, "grad_norm": 0.872276246547699, "learning_rate": 2.1050270421930077e-06, "loss": 0.0954, "step": 10443 }, { "epoch": 1.6921581335061568, "grad_norm": 0.8790923357009888, "learning_rate": 2.104595236308868e-06, "loss": 0.1148, "step": 10444 }, { "epoch": 1.6923201555411536, "grad_norm": 0.8432591557502747, "learning_rate": 2.104163442523158e-06, "loss": 0.1034, "step": 10445 }, { "epoch": 1.6924821775761503, "grad_norm": 0.7778016328811646, "learning_rate": 2.1037316608490886e-06, "loss": 0.0907, "step": 10446 }, { "epoch": 1.6926441996111472, "grad_norm": 0.8325784206390381, "learning_rate": 2.1032998912998712e-06, "loss": 0.1069, "step": 10447 }, { "epoch": 1.692806221646144, "grad_norm": 0.789277195930481, "learning_rate": 2.1028681338887164e-06, "loss": 0.1051, "step": 10448 }, { "epoch": 1.6929682436811406, "grad_norm": 0.8284058570861816, "learning_rate": 2.1024363886288375e-06, "loss": 0.11, "step": 10449 }, { "epoch": 1.6931302657161373, "grad_norm": 0.8092173337936401, "learning_rate": 2.102004655533442e-06, "loss": 0.1038, "step": 10450 }, { "epoch": 1.693292287751134, "grad_norm": 0.7652307152748108, "learning_rate": 2.1015729346157406e-06, "loss": 0.097, "step": 10451 }, { "epoch": 1.693454309786131, "grad_norm": 0.6917365789413452, "learning_rate": 2.101141225888944e-06, "loss": 0.082, "step": 10452 }, { "epoch": 1.6936163318211277, "grad_norm": 0.770603597164154, "learning_rate": 2.10070952936626e-06, "loss": 0.0931, "step": 10453 }, { "epoch": 1.6937783538561244, "grad_norm": 0.8083109855651855, "learning_rate": 2.100277845060898e-06, "loss": 0.1012, "step": 10454 }, { "epoch": 1.6939403758911213, "grad_norm": 0.7694876194000244, "learning_rate": 2.0998461729860675e-06, "loss": 0.0965, "step": 10455 }, { "epoch": 1.6941023979261178, "grad_norm": 0.7325409054756165, "learning_rate": 2.0994145131549755e-06, "loss": 0.0911, "step": 10456 }, { "epoch": 1.6942644199611148, "grad_norm": 0.8317882418632507, "learning_rate": 2.09898286558083e-06, "loss": 0.099, "step": 10457 }, { "epoch": 1.6944264419961115, "grad_norm": 0.8942394852638245, "learning_rate": 2.0985512302768366e-06, "loss": 0.1108, "step": 10458 }, { "epoch": 1.6945884640311082, "grad_norm": 0.850330114364624, "learning_rate": 2.0981196072562067e-06, "loss": 0.1008, "step": 10459 }, { "epoch": 1.6947504860661051, "grad_norm": 0.7139987945556641, "learning_rate": 2.097687996532143e-06, "loss": 0.077, "step": 10460 }, { "epoch": 1.6949125081011016, "grad_norm": 0.7986920475959778, "learning_rate": 2.097256398117854e-06, "loss": 0.0954, "step": 10461 }, { "epoch": 1.6950745301360985, "grad_norm": 0.7126197218894958, "learning_rate": 2.0968248120265433e-06, "loss": 0.083, "step": 10462 }, { "epoch": 1.6952365521710953, "grad_norm": 0.8080899715423584, "learning_rate": 2.0963932382714175e-06, "loss": 0.1012, "step": 10463 }, { "epoch": 1.695398574206092, "grad_norm": 0.9293273687362671, "learning_rate": 2.095961676865683e-06, "loss": 0.1071, "step": 10464 }, { "epoch": 1.695560596241089, "grad_norm": 0.7605911493301392, "learning_rate": 2.0955301278225433e-06, "loss": 0.1003, "step": 10465 }, { "epoch": 1.6957226182760854, "grad_norm": 0.7792356014251709, "learning_rate": 2.095098591155203e-06, "loss": 0.0969, "step": 10466 }, { "epoch": 1.6958846403110823, "grad_norm": 0.7824127674102783, "learning_rate": 2.0946670668768652e-06, "loss": 0.0951, "step": 10467 }, { "epoch": 1.696046662346079, "grad_norm": 0.8019390106201172, "learning_rate": 2.094235555000734e-06, "loss": 0.0993, "step": 10468 }, { "epoch": 1.6962086843810757, "grad_norm": 0.8202789425849915, "learning_rate": 2.0938040555400137e-06, "loss": 0.0955, "step": 10469 }, { "epoch": 1.6963707064160727, "grad_norm": 0.8047801852226257, "learning_rate": 2.093372568507907e-06, "loss": 0.0991, "step": 10470 }, { "epoch": 1.6965327284510694, "grad_norm": 0.7065772414207458, "learning_rate": 2.0929410939176147e-06, "loss": 0.0858, "step": 10471 }, { "epoch": 1.696694750486066, "grad_norm": 0.8477860689163208, "learning_rate": 2.0925096317823393e-06, "loss": 0.097, "step": 10472 }, { "epoch": 1.6968567725210628, "grad_norm": 0.7459856867790222, "learning_rate": 2.0920781821152843e-06, "loss": 0.0914, "step": 10473 }, { "epoch": 1.6970187945560595, "grad_norm": 0.8197339773178101, "learning_rate": 2.091646744929649e-06, "loss": 0.099, "step": 10474 }, { "epoch": 1.6971808165910565, "grad_norm": 0.7804821133613586, "learning_rate": 2.091215320238636e-06, "loss": 0.101, "step": 10475 }, { "epoch": 1.6973428386260532, "grad_norm": 0.7543721795082092, "learning_rate": 2.0907839080554443e-06, "loss": 0.0866, "step": 10476 }, { "epoch": 1.6975048606610499, "grad_norm": 0.8172874450683594, "learning_rate": 2.090352508393274e-06, "loss": 0.1058, "step": 10477 }, { "epoch": 1.6976668826960468, "grad_norm": 0.8281826972961426, "learning_rate": 2.0899211212653262e-06, "loss": 0.0992, "step": 10478 }, { "epoch": 1.6978289047310433, "grad_norm": 0.784435510635376, "learning_rate": 2.0894897466848007e-06, "loss": 0.098, "step": 10479 }, { "epoch": 1.6979909267660402, "grad_norm": 0.871392548084259, "learning_rate": 2.0890583846648945e-06, "loss": 0.1029, "step": 10480 }, { "epoch": 1.698152948801037, "grad_norm": 0.7556376457214355, "learning_rate": 2.0886270352188082e-06, "loss": 0.0959, "step": 10481 }, { "epoch": 1.6983149708360337, "grad_norm": 0.7945913672447205, "learning_rate": 2.0881956983597375e-06, "loss": 0.0944, "step": 10482 }, { "epoch": 1.6984769928710306, "grad_norm": 0.8585154414176941, "learning_rate": 2.0877643741008828e-06, "loss": 0.106, "step": 10483 }, { "epoch": 1.698639014906027, "grad_norm": 0.8008867502212524, "learning_rate": 2.087333062455441e-06, "loss": 0.0965, "step": 10484 }, { "epoch": 1.698801036941024, "grad_norm": 0.8277882933616638, "learning_rate": 2.0869017634366087e-06, "loss": 0.1087, "step": 10485 }, { "epoch": 1.6989630589760207, "grad_norm": 0.781322717666626, "learning_rate": 2.0864704770575824e-06, "loss": 0.1053, "step": 10486 }, { "epoch": 1.6991250810110174, "grad_norm": 0.8764674663543701, "learning_rate": 2.0860392033315584e-06, "loss": 0.1127, "step": 10487 }, { "epoch": 1.6992871030460144, "grad_norm": 0.8520863652229309, "learning_rate": 2.085607942271734e-06, "loss": 0.0996, "step": 10488 }, { "epoch": 1.6994491250810109, "grad_norm": 0.8383838534355164, "learning_rate": 2.085176693891303e-06, "loss": 0.1004, "step": 10489 }, { "epoch": 1.6996111471160078, "grad_norm": 0.8455978631973267, "learning_rate": 2.0847454582034625e-06, "loss": 0.0961, "step": 10490 }, { "epoch": 1.6997731691510045, "grad_norm": 0.8698278665542603, "learning_rate": 2.084314235221405e-06, "loss": 0.109, "step": 10491 }, { "epoch": 1.6999351911860012, "grad_norm": 0.7847959995269775, "learning_rate": 2.0838830249583254e-06, "loss": 0.1002, "step": 10492 }, { "epoch": 1.7000972132209982, "grad_norm": 0.8136616349220276, "learning_rate": 2.0834518274274195e-06, "loss": 0.0992, "step": 10493 }, { "epoch": 1.7002592352559946, "grad_norm": 0.8103066086769104, "learning_rate": 2.0830206426418794e-06, "loss": 0.0894, "step": 10494 }, { "epoch": 1.7004212572909916, "grad_norm": 0.7221809029579163, "learning_rate": 2.0825894706148984e-06, "loss": 0.0907, "step": 10495 }, { "epoch": 1.7005832793259883, "grad_norm": 0.9179300665855408, "learning_rate": 2.0821583113596686e-06, "loss": 0.1152, "step": 10496 }, { "epoch": 1.700745301360985, "grad_norm": 0.8785554766654968, "learning_rate": 2.0817271648893848e-06, "loss": 0.1152, "step": 10497 }, { "epoch": 1.700907323395982, "grad_norm": 0.7163464426994324, "learning_rate": 2.081296031217237e-06, "loss": 0.0873, "step": 10498 }, { "epoch": 1.7010693454309787, "grad_norm": 0.8808443546295166, "learning_rate": 2.0808649103564173e-06, "loss": 0.1108, "step": 10499 }, { "epoch": 1.7012313674659754, "grad_norm": 0.7949004769325256, "learning_rate": 2.080433802320117e-06, "loss": 0.0997, "step": 10500 }, { "epoch": 1.7013933895009723, "grad_norm": 0.7522899508476257, "learning_rate": 2.0800027071215265e-06, "loss": 0.0996, "step": 10501 }, { "epoch": 1.7015554115359688, "grad_norm": 0.7748807072639465, "learning_rate": 2.0795716247738374e-06, "loss": 0.1031, "step": 10502 }, { "epoch": 1.7017174335709657, "grad_norm": 0.7756296396255493, "learning_rate": 2.0791405552902396e-06, "loss": 0.0983, "step": 10503 }, { "epoch": 1.7018794556059624, "grad_norm": 0.8680400848388672, "learning_rate": 2.078709498683922e-06, "loss": 0.1047, "step": 10504 }, { "epoch": 1.7020414776409591, "grad_norm": 0.7261191606521606, "learning_rate": 2.0782784549680744e-06, "loss": 0.091, "step": 10505 }, { "epoch": 1.702203499675956, "grad_norm": 0.7516106963157654, "learning_rate": 2.0778474241558845e-06, "loss": 0.0944, "step": 10506 }, { "epoch": 1.7023655217109526, "grad_norm": 0.8887795805931091, "learning_rate": 2.0774164062605425e-06, "loss": 0.1098, "step": 10507 }, { "epoch": 1.7025275437459495, "grad_norm": 0.7267820835113525, "learning_rate": 2.0769854012952368e-06, "loss": 0.0917, "step": 10508 }, { "epoch": 1.7026895657809462, "grad_norm": 0.6994649767875671, "learning_rate": 2.076554409273153e-06, "loss": 0.0883, "step": 10509 }, { "epoch": 1.702851587815943, "grad_norm": 0.8000863194465637, "learning_rate": 2.0761234302074803e-06, "loss": 0.0984, "step": 10510 }, { "epoch": 1.7030136098509399, "grad_norm": 0.8726096749305725, "learning_rate": 2.075692464111403e-06, "loss": 0.1051, "step": 10511 }, { "epoch": 1.7031756318859363, "grad_norm": 0.8187968134880066, "learning_rate": 2.0752615109981116e-06, "loss": 0.1046, "step": 10512 }, { "epoch": 1.7033376539209333, "grad_norm": 0.753304660320282, "learning_rate": 2.074830570880789e-06, "loss": 0.0933, "step": 10513 }, { "epoch": 1.70349967595593, "grad_norm": 0.8444907665252686, "learning_rate": 2.0743996437726233e-06, "loss": 0.093, "step": 10514 }, { "epoch": 1.7036616979909267, "grad_norm": 0.8190134167671204, "learning_rate": 2.073968729686797e-06, "loss": 0.0956, "step": 10515 }, { "epoch": 1.7038237200259236, "grad_norm": 0.6358878016471863, "learning_rate": 2.073537828636497e-06, "loss": 0.0766, "step": 10516 }, { "epoch": 1.7039857420609201, "grad_norm": 0.7422366738319397, "learning_rate": 2.0731069406349087e-06, "loss": 0.086, "step": 10517 }, { "epoch": 1.704147764095917, "grad_norm": 0.775372326374054, "learning_rate": 2.0726760656952137e-06, "loss": 0.0876, "step": 10518 }, { "epoch": 1.7043097861309138, "grad_norm": 0.8271961808204651, "learning_rate": 2.0722452038305976e-06, "loss": 0.1032, "step": 10519 }, { "epoch": 1.7044718081659105, "grad_norm": 0.6599870920181274, "learning_rate": 2.0718143550542418e-06, "loss": 0.0808, "step": 10520 }, { "epoch": 1.7046338302009074, "grad_norm": 0.7971540689468384, "learning_rate": 2.071383519379332e-06, "loss": 0.1085, "step": 10521 }, { "epoch": 1.7047958522359041, "grad_norm": 0.7135748267173767, "learning_rate": 2.0709526968190483e-06, "loss": 0.0867, "step": 10522 }, { "epoch": 1.7049578742709008, "grad_norm": 0.9244253039360046, "learning_rate": 2.070521887386575e-06, "loss": 0.1061, "step": 10523 }, { "epoch": 1.7051198963058976, "grad_norm": 0.9357718825340271, "learning_rate": 2.070091091095092e-06, "loss": 0.1023, "step": 10524 }, { "epoch": 1.7052819183408943, "grad_norm": 0.9502805471420288, "learning_rate": 2.0696603079577808e-06, "loss": 0.1071, "step": 10525 }, { "epoch": 1.7054439403758912, "grad_norm": 0.8359770178794861, "learning_rate": 2.0692295379878237e-06, "loss": 0.0975, "step": 10526 }, { "epoch": 1.705605962410888, "grad_norm": 0.9007936120033264, "learning_rate": 2.0687987811983994e-06, "loss": 0.1114, "step": 10527 }, { "epoch": 1.7057679844458846, "grad_norm": 0.8316621780395508, "learning_rate": 2.0683680376026897e-06, "loss": 0.1015, "step": 10528 }, { "epoch": 1.7059300064808816, "grad_norm": 0.8183777332305908, "learning_rate": 2.067937307213873e-06, "loss": 0.108, "step": 10529 }, { "epoch": 1.706092028515878, "grad_norm": 0.8549315333366394, "learning_rate": 2.0675065900451287e-06, "loss": 0.1099, "step": 10530 }, { "epoch": 1.706254050550875, "grad_norm": 0.8095566630363464, "learning_rate": 2.0670758861096366e-06, "loss": 0.1013, "step": 10531 }, { "epoch": 1.7064160725858717, "grad_norm": 0.8298711776733398, "learning_rate": 2.066645195420575e-06, "loss": 0.1061, "step": 10532 }, { "epoch": 1.7065780946208684, "grad_norm": 0.8974525928497314, "learning_rate": 2.0662145179911216e-06, "loss": 0.0994, "step": 10533 }, { "epoch": 1.7067401166558653, "grad_norm": 0.8173624873161316, "learning_rate": 2.0657838538344545e-06, "loss": 0.1013, "step": 10534 }, { "epoch": 1.7069021386908618, "grad_norm": 0.8199102878570557, "learning_rate": 2.06535320296375e-06, "loss": 0.1004, "step": 10535 }, { "epoch": 1.7070641607258588, "grad_norm": 0.8255974054336548, "learning_rate": 2.0649225653921855e-06, "loss": 0.1085, "step": 10536 }, { "epoch": 1.7072261827608555, "grad_norm": 0.703346848487854, "learning_rate": 2.064491941132938e-06, "loss": 0.0922, "step": 10537 }, { "epoch": 1.7073882047958522, "grad_norm": 0.7433433532714844, "learning_rate": 2.064061330199184e-06, "loss": 0.0975, "step": 10538 }, { "epoch": 1.7075502268308491, "grad_norm": 0.7282018065452576, "learning_rate": 2.0636307326040972e-06, "loss": 0.0926, "step": 10539 }, { "epoch": 1.7077122488658456, "grad_norm": 0.7705861330032349, "learning_rate": 2.0632001483608544e-06, "loss": 0.1065, "step": 10540 }, { "epoch": 1.7078742709008425, "grad_norm": 0.771189272403717, "learning_rate": 2.0627695774826305e-06, "loss": 0.0907, "step": 10541 }, { "epoch": 1.7080362929358393, "grad_norm": 0.7684487700462341, "learning_rate": 2.062339019982599e-06, "loss": 0.1023, "step": 10542 }, { "epoch": 1.708198314970836, "grad_norm": 0.8463597297668457, "learning_rate": 2.0619084758739348e-06, "loss": 0.1077, "step": 10543 }, { "epoch": 1.708360337005833, "grad_norm": 0.7792380452156067, "learning_rate": 2.06147794516981e-06, "loss": 0.102, "step": 10544 }, { "epoch": 1.7085223590408296, "grad_norm": 0.8330045938491821, "learning_rate": 2.061047427883399e-06, "loss": 0.1016, "step": 10545 }, { "epoch": 1.7086843810758263, "grad_norm": 0.7635950446128845, "learning_rate": 2.0606169240278752e-06, "loss": 0.1005, "step": 10546 }, { "epoch": 1.708846403110823, "grad_norm": 0.8526593446731567, "learning_rate": 2.0601864336164104e-06, "loss": 0.1061, "step": 10547 }, { "epoch": 1.7090084251458197, "grad_norm": 0.8109990954399109, "learning_rate": 2.059755956662176e-06, "loss": 0.1015, "step": 10548 }, { "epoch": 1.7091704471808167, "grad_norm": 0.7520654201507568, "learning_rate": 2.0593254931783436e-06, "loss": 0.0998, "step": 10549 }, { "epoch": 1.7093324692158134, "grad_norm": 0.9032507538795471, "learning_rate": 2.058895043178085e-06, "loss": 0.1102, "step": 10550 }, { "epoch": 1.70949449125081, "grad_norm": 0.7866460084915161, "learning_rate": 2.0584646066745707e-06, "loss": 0.0917, "step": 10551 }, { "epoch": 1.709656513285807, "grad_norm": 0.800690770149231, "learning_rate": 2.0580341836809718e-06, "loss": 0.0962, "step": 10552 }, { "epoch": 1.7098185353208035, "grad_norm": 0.7801680564880371, "learning_rate": 2.0576037742104563e-06, "loss": 0.0926, "step": 10553 }, { "epoch": 1.7099805573558005, "grad_norm": 0.7548884153366089, "learning_rate": 2.0571733782761943e-06, "loss": 0.0968, "step": 10554 }, { "epoch": 1.7101425793907972, "grad_norm": 0.7508038282394409, "learning_rate": 2.056742995891356e-06, "loss": 0.0899, "step": 10555 }, { "epoch": 1.7103046014257939, "grad_norm": 0.8736903667449951, "learning_rate": 2.0563126270691097e-06, "loss": 0.1057, "step": 10556 }, { "epoch": 1.7104666234607908, "grad_norm": 0.8422747254371643, "learning_rate": 2.0558822718226226e-06, "loss": 0.1087, "step": 10557 }, { "epoch": 1.7106286454957873, "grad_norm": 0.8379523754119873, "learning_rate": 2.055451930165063e-06, "loss": 0.1065, "step": 10558 }, { "epoch": 1.7107906675307842, "grad_norm": 0.7081695795059204, "learning_rate": 2.0550216021095993e-06, "loss": 0.0838, "step": 10559 }, { "epoch": 1.710952689565781, "grad_norm": 0.9157091975212097, "learning_rate": 2.054591287669398e-06, "loss": 0.0991, "step": 10560 }, { "epoch": 1.7111147116007777, "grad_norm": 0.8604560494422913, "learning_rate": 2.054160986857625e-06, "loss": 0.0997, "step": 10561 }, { "epoch": 1.7112767336357746, "grad_norm": 0.861049473285675, "learning_rate": 2.053730699687447e-06, "loss": 0.1093, "step": 10562 }, { "epoch": 1.711438755670771, "grad_norm": 0.8312821388244629, "learning_rate": 2.053300426172029e-06, "loss": 0.1004, "step": 10563 }, { "epoch": 1.711600777705768, "grad_norm": 0.8654624223709106, "learning_rate": 2.052870166324537e-06, "loss": 0.1035, "step": 10564 }, { "epoch": 1.7117627997407647, "grad_norm": 0.8277021646499634, "learning_rate": 2.052439920158137e-06, "loss": 0.0949, "step": 10565 }, { "epoch": 1.7119248217757614, "grad_norm": 0.8619728088378906, "learning_rate": 2.0520096876859918e-06, "loss": 0.1026, "step": 10566 }, { "epoch": 1.7120868438107584, "grad_norm": 0.8873988389968872, "learning_rate": 2.051579468921266e-06, "loss": 0.1064, "step": 10567 }, { "epoch": 1.7122488658457549, "grad_norm": 0.8158526420593262, "learning_rate": 2.051149263877123e-06, "loss": 0.1018, "step": 10568 }, { "epoch": 1.7124108878807518, "grad_norm": 0.8675525784492493, "learning_rate": 2.0507190725667263e-06, "loss": 0.1038, "step": 10569 }, { "epoch": 1.7125729099157485, "grad_norm": 0.7022666931152344, "learning_rate": 2.0502888950032396e-06, "loss": 0.0982, "step": 10570 }, { "epoch": 1.7127349319507452, "grad_norm": 1.0205082893371582, "learning_rate": 2.0498587311998235e-06, "loss": 0.1244, "step": 10571 }, { "epoch": 1.7128969539857422, "grad_norm": 0.7745731472969055, "learning_rate": 2.0494285811696417e-06, "loss": 0.1027, "step": 10572 }, { "epoch": 1.7130589760207389, "grad_norm": 0.8017035722732544, "learning_rate": 2.0489984449258537e-06, "loss": 0.0978, "step": 10573 }, { "epoch": 1.7132209980557356, "grad_norm": 0.7118944525718689, "learning_rate": 2.048568322481623e-06, "loss": 0.0959, "step": 10574 }, { "epoch": 1.7133830200907323, "grad_norm": 0.8438950777053833, "learning_rate": 2.048138213850109e-06, "loss": 0.1062, "step": 10575 }, { "epoch": 1.713545042125729, "grad_norm": 0.7368760108947754, "learning_rate": 2.0477081190444724e-06, "loss": 0.0892, "step": 10576 }, { "epoch": 1.713707064160726, "grad_norm": 0.7474925518035889, "learning_rate": 2.0472780380778724e-06, "loss": 0.0932, "step": 10577 }, { "epoch": 1.7138690861957226, "grad_norm": 0.794523298740387, "learning_rate": 2.046847970963468e-06, "loss": 0.0957, "step": 10578 }, { "epoch": 1.7140311082307194, "grad_norm": 0.8395006060600281, "learning_rate": 2.0464179177144207e-06, "loss": 0.1031, "step": 10579 }, { "epoch": 1.7141931302657163, "grad_norm": 0.8234453797340393, "learning_rate": 2.0459878783438867e-06, "loss": 0.105, "step": 10580 }, { "epoch": 1.7143551523007128, "grad_norm": 0.7391911149024963, "learning_rate": 2.045557852865025e-06, "loss": 0.0862, "step": 10581 }, { "epoch": 1.7145171743357097, "grad_norm": 0.7759642601013184, "learning_rate": 2.045127841290993e-06, "loss": 0.1004, "step": 10582 }, { "epoch": 1.7146791963707064, "grad_norm": 0.7772481441497803, "learning_rate": 2.0446978436349486e-06, "loss": 0.0965, "step": 10583 }, { "epoch": 1.7148412184057031, "grad_norm": 0.8068507313728333, "learning_rate": 2.0442678599100484e-06, "loss": 0.0977, "step": 10584 }, { "epoch": 1.7150032404407, "grad_norm": 0.8451778292655945, "learning_rate": 2.0438378901294493e-06, "loss": 0.1098, "step": 10585 }, { "epoch": 1.7151652624756966, "grad_norm": 0.7878326773643494, "learning_rate": 2.043407934306306e-06, "loss": 0.0967, "step": 10586 }, { "epoch": 1.7153272845106935, "grad_norm": 0.8413339853286743, "learning_rate": 2.042977992453775e-06, "loss": 0.0935, "step": 10587 }, { "epoch": 1.7154893065456902, "grad_norm": 0.9668422937393188, "learning_rate": 2.0425480645850124e-06, "loss": 0.1142, "step": 10588 }, { "epoch": 1.715651328580687, "grad_norm": 0.829973578453064, "learning_rate": 2.042118150713171e-06, "loss": 0.1115, "step": 10589 }, { "epoch": 1.7158133506156839, "grad_norm": 0.8030633330345154, "learning_rate": 2.041688250851407e-06, "loss": 0.0899, "step": 10590 }, { "epoch": 1.7159753726506803, "grad_norm": 0.9173603057861328, "learning_rate": 2.041258365012873e-06, "loss": 0.1067, "step": 10591 }, { "epoch": 1.7161373946856773, "grad_norm": 0.9267182946205139, "learning_rate": 2.0408284932107227e-06, "loss": 0.1038, "step": 10592 }, { "epoch": 1.716299416720674, "grad_norm": 0.7872363328933716, "learning_rate": 2.040398635458109e-06, "loss": 0.1063, "step": 10593 }, { "epoch": 1.7164614387556707, "grad_norm": 0.7939862012863159, "learning_rate": 2.039968791768186e-06, "loss": 0.1016, "step": 10594 }, { "epoch": 1.7166234607906676, "grad_norm": 0.7923547625541687, "learning_rate": 2.039538962154104e-06, "loss": 0.0921, "step": 10595 }, { "epoch": 1.7167854828256643, "grad_norm": 0.7851249575614929, "learning_rate": 2.039109146629016e-06, "loss": 0.0993, "step": 10596 }, { "epoch": 1.716947504860661, "grad_norm": 0.7416827082633972, "learning_rate": 2.0386793452060717e-06, "loss": 0.0926, "step": 10597 }, { "epoch": 1.7171095268956578, "grad_norm": 0.8344098329544067, "learning_rate": 2.0382495578984236e-06, "loss": 0.1, "step": 10598 }, { "epoch": 1.7172715489306545, "grad_norm": 0.81064373254776, "learning_rate": 2.0378197847192216e-06, "loss": 0.0975, "step": 10599 }, { "epoch": 1.7174335709656514, "grad_norm": 0.8358397483825684, "learning_rate": 2.0373900256816166e-06, "loss": 0.1013, "step": 10600 }, { "epoch": 1.7175955930006481, "grad_norm": 0.7919216752052307, "learning_rate": 2.0369602807987564e-06, "loss": 0.0989, "step": 10601 }, { "epoch": 1.7177576150356448, "grad_norm": 0.7866799831390381, "learning_rate": 2.0365305500837906e-06, "loss": 0.0988, "step": 10602 }, { "epoch": 1.7179196370706418, "grad_norm": 0.7129443287849426, "learning_rate": 2.0361008335498695e-06, "loss": 0.0871, "step": 10603 }, { "epoch": 1.7180816591056383, "grad_norm": 0.8652666807174683, "learning_rate": 2.0356711312101394e-06, "loss": 0.1064, "step": 10604 }, { "epoch": 1.7182436811406352, "grad_norm": 0.7662056088447571, "learning_rate": 2.03524144307775e-06, "loss": 0.097, "step": 10605 }, { "epoch": 1.718405703175632, "grad_norm": 0.8492453098297119, "learning_rate": 2.0348117691658463e-06, "loss": 0.1026, "step": 10606 }, { "epoch": 1.7185677252106286, "grad_norm": 0.7717750668525696, "learning_rate": 2.0343821094875777e-06, "loss": 0.0952, "step": 10607 }, { "epoch": 1.7187297472456255, "grad_norm": 0.8496338725090027, "learning_rate": 2.03395246405609e-06, "loss": 0.0991, "step": 10608 }, { "epoch": 1.718891769280622, "grad_norm": 0.7787384390830994, "learning_rate": 2.0335228328845293e-06, "loss": 0.0885, "step": 10609 }, { "epoch": 1.719053791315619, "grad_norm": 0.6998451948165894, "learning_rate": 2.03309321598604e-06, "loss": 0.089, "step": 10610 }, { "epoch": 1.7192158133506157, "grad_norm": 0.8012830018997192, "learning_rate": 2.0326636133737686e-06, "loss": 0.0946, "step": 10611 }, { "epoch": 1.7193778353856124, "grad_norm": 0.7706624269485474, "learning_rate": 2.03223402506086e-06, "loss": 0.0964, "step": 10612 }, { "epoch": 1.7195398574206093, "grad_norm": 0.8254458904266357, "learning_rate": 2.0318044510604586e-06, "loss": 0.1, "step": 10613 }, { "epoch": 1.7197018794556058, "grad_norm": 0.7956833243370056, "learning_rate": 2.031374891385708e-06, "loss": 0.0903, "step": 10614 }, { "epoch": 1.7198639014906028, "grad_norm": 0.8892257213592529, "learning_rate": 2.030945346049751e-06, "loss": 0.1094, "step": 10615 }, { "epoch": 1.7200259235255995, "grad_norm": 0.7731269598007202, "learning_rate": 2.0305158150657316e-06, "loss": 0.0916, "step": 10616 }, { "epoch": 1.7201879455605962, "grad_norm": 0.8199908137321472, "learning_rate": 2.030086298446792e-06, "loss": 0.0944, "step": 10617 }, { "epoch": 1.720349967595593, "grad_norm": 0.8723816275596619, "learning_rate": 2.0296567962060753e-06, "loss": 0.1072, "step": 10618 }, { "epoch": 1.7205119896305896, "grad_norm": 0.8999892473220825, "learning_rate": 2.0292273083567215e-06, "loss": 0.1013, "step": 10619 }, { "epoch": 1.7206740116655865, "grad_norm": 0.7920383810997009, "learning_rate": 2.0287978349118737e-06, "loss": 0.0988, "step": 10620 }, { "epoch": 1.7208360337005832, "grad_norm": 0.9639683365821838, "learning_rate": 2.0283683758846705e-06, "loss": 0.1159, "step": 10621 }, { "epoch": 1.72099805573558, "grad_norm": 0.9006828665733337, "learning_rate": 2.0279389312882546e-06, "loss": 0.0991, "step": 10622 }, { "epoch": 1.721160077770577, "grad_norm": 0.8471527099609375, "learning_rate": 2.0275095011357655e-06, "loss": 0.0966, "step": 10623 }, { "epoch": 1.7213220998055736, "grad_norm": 0.9707726836204529, "learning_rate": 2.027080085440341e-06, "loss": 0.1061, "step": 10624 }, { "epoch": 1.7214841218405703, "grad_norm": 0.7146053910255432, "learning_rate": 2.0266506842151216e-06, "loss": 0.0878, "step": 10625 }, { "epoch": 1.721646143875567, "grad_norm": 0.7884452939033508, "learning_rate": 2.0262212974732465e-06, "loss": 0.0955, "step": 10626 }, { "epoch": 1.7218081659105637, "grad_norm": 0.8306124210357666, "learning_rate": 2.0257919252278535e-06, "loss": 0.1006, "step": 10627 }, { "epoch": 1.7219701879455607, "grad_norm": 0.8302614688873291, "learning_rate": 2.0253625674920795e-06, "loss": 0.0927, "step": 10628 }, { "epoch": 1.7221322099805574, "grad_norm": 0.7767832279205322, "learning_rate": 2.0249332242790627e-06, "loss": 0.0956, "step": 10629 }, { "epoch": 1.722294232015554, "grad_norm": 0.8298105001449585, "learning_rate": 2.0245038956019386e-06, "loss": 0.1043, "step": 10630 }, { "epoch": 1.722456254050551, "grad_norm": 0.8026810884475708, "learning_rate": 2.024074581473845e-06, "loss": 0.1006, "step": 10631 }, { "epoch": 1.7226182760855475, "grad_norm": 0.7589970231056213, "learning_rate": 2.0236452819079183e-06, "loss": 0.0942, "step": 10632 }, { "epoch": 1.7227802981205445, "grad_norm": 0.7752396464347839, "learning_rate": 2.0232159969172926e-06, "loss": 0.095, "step": 10633 }, { "epoch": 1.7229423201555412, "grad_norm": 0.7742292284965515, "learning_rate": 2.0227867265151035e-06, "loss": 0.0986, "step": 10634 }, { "epoch": 1.7231043421905379, "grad_norm": 0.9077650904655457, "learning_rate": 2.0223574707144854e-06, "loss": 0.1147, "step": 10635 }, { "epoch": 1.7232663642255348, "grad_norm": 0.7141374349594116, "learning_rate": 2.0219282295285734e-06, "loss": 0.0923, "step": 10636 }, { "epoch": 1.7234283862605313, "grad_norm": 0.7725054621696472, "learning_rate": 2.0214990029705007e-06, "loss": 0.0992, "step": 10637 }, { "epoch": 1.7235904082955282, "grad_norm": 0.8039776086807251, "learning_rate": 2.021069791053401e-06, "loss": 0.0928, "step": 10638 }, { "epoch": 1.723752430330525, "grad_norm": 0.8312842845916748, "learning_rate": 2.0206405937904058e-06, "loss": 0.0994, "step": 10639 }, { "epoch": 1.7239144523655217, "grad_norm": 0.7526655793190002, "learning_rate": 2.0202114111946483e-06, "loss": 0.0902, "step": 10640 }, { "epoch": 1.7240764744005186, "grad_norm": 0.9838579297065735, "learning_rate": 2.0197822432792606e-06, "loss": 0.109, "step": 10641 }, { "epoch": 1.724238496435515, "grad_norm": 0.9125879406929016, "learning_rate": 2.019353090057375e-06, "loss": 0.1185, "step": 10642 }, { "epoch": 1.724400518470512, "grad_norm": 0.7206192016601562, "learning_rate": 2.0189239515421214e-06, "loss": 0.0869, "step": 10643 }, { "epoch": 1.7245625405055087, "grad_norm": 0.8060803413391113, "learning_rate": 2.018494827746631e-06, "loss": 0.0971, "step": 10644 }, { "epoch": 1.7247245625405054, "grad_norm": 0.804918110370636, "learning_rate": 2.0180657186840326e-06, "loss": 0.1081, "step": 10645 }, { "epoch": 1.7248865845755024, "grad_norm": 0.8952481150627136, "learning_rate": 2.0176366243674575e-06, "loss": 0.1074, "step": 10646 }, { "epoch": 1.725048606610499, "grad_norm": 0.7076264023780823, "learning_rate": 2.0172075448100347e-06, "loss": 0.0892, "step": 10647 }, { "epoch": 1.7252106286454958, "grad_norm": 0.7837821245193481, "learning_rate": 2.0167784800248924e-06, "loss": 0.1005, "step": 10648 }, { "epoch": 1.7253726506804925, "grad_norm": 0.7912270426750183, "learning_rate": 2.0163494300251586e-06, "loss": 0.1001, "step": 10649 }, { "epoch": 1.7255346727154892, "grad_norm": 0.8220000863075256, "learning_rate": 2.0159203948239624e-06, "loss": 0.1018, "step": 10650 }, { "epoch": 1.7256966947504861, "grad_norm": 0.8370126485824585, "learning_rate": 2.0154913744344316e-06, "loss": 0.1055, "step": 10651 }, { "epoch": 1.7258587167854829, "grad_norm": 0.8922955989837646, "learning_rate": 2.015062368869691e-06, "loss": 0.1027, "step": 10652 }, { "epoch": 1.7260207388204796, "grad_norm": 0.7914833426475525, "learning_rate": 2.0146333781428694e-06, "loss": 0.0964, "step": 10653 }, { "epoch": 1.7261827608554765, "grad_norm": 0.8470481038093567, "learning_rate": 2.0142044022670905e-06, "loss": 0.1105, "step": 10654 }, { "epoch": 1.726344782890473, "grad_norm": 0.7832110524177551, "learning_rate": 2.0137754412554823e-06, "loss": 0.095, "step": 10655 }, { "epoch": 1.72650680492547, "grad_norm": 0.8746216297149658, "learning_rate": 2.013346495121169e-06, "loss": 0.1111, "step": 10656 }, { "epoch": 1.7266688269604666, "grad_norm": 0.8226485848426819, "learning_rate": 2.012917563877275e-06, "loss": 0.0996, "step": 10657 }, { "epoch": 1.7268308489954634, "grad_norm": 0.7802627682685852, "learning_rate": 2.012488647536925e-06, "loss": 0.1043, "step": 10658 }, { "epoch": 1.7269928710304603, "grad_norm": 0.9438074231147766, "learning_rate": 2.0120597461132416e-06, "loss": 0.1009, "step": 10659 }, { "epoch": 1.7271548930654568, "grad_norm": 0.810778796672821, "learning_rate": 2.0116308596193502e-06, "loss": 0.1009, "step": 10660 }, { "epoch": 1.7273169151004537, "grad_norm": 0.7767440676689148, "learning_rate": 2.0112019880683724e-06, "loss": 0.0921, "step": 10661 }, { "epoch": 1.7274789371354504, "grad_norm": 0.8341066241264343, "learning_rate": 2.0107731314734316e-06, "loss": 0.1, "step": 10662 }, { "epoch": 1.7276409591704471, "grad_norm": 0.8395964503288269, "learning_rate": 2.0103442898476484e-06, "loss": 0.1091, "step": 10663 }, { "epoch": 1.727802981205444, "grad_norm": 0.9881349802017212, "learning_rate": 2.0099154632041446e-06, "loss": 0.1047, "step": 10664 }, { "epoch": 1.7279650032404406, "grad_norm": 0.7424138784408569, "learning_rate": 2.0094866515560424e-06, "loss": 0.0817, "step": 10665 }, { "epoch": 1.7281270252754375, "grad_norm": 1.004927635192871, "learning_rate": 2.0090578549164614e-06, "loss": 0.117, "step": 10666 }, { "epoch": 1.7282890473104342, "grad_norm": 0.8730446100234985, "learning_rate": 2.0086290732985224e-06, "loss": 0.0988, "step": 10667 }, { "epoch": 1.728451069345431, "grad_norm": 0.8061848282814026, "learning_rate": 2.0082003067153436e-06, "loss": 0.1047, "step": 10668 }, { "epoch": 1.7286130913804278, "grad_norm": 0.7493446469306946, "learning_rate": 2.0077715551800457e-06, "loss": 0.0995, "step": 10669 }, { "epoch": 1.7287751134154243, "grad_norm": 0.8697327971458435, "learning_rate": 2.007342818705747e-06, "loss": 0.0994, "step": 10670 }, { "epoch": 1.7289371354504213, "grad_norm": 0.842828631401062, "learning_rate": 2.0069140973055663e-06, "loss": 0.1031, "step": 10671 }, { "epoch": 1.729099157485418, "grad_norm": 0.7752946019172668, "learning_rate": 2.006485390992621e-06, "loss": 0.0983, "step": 10672 }, { "epoch": 1.7292611795204147, "grad_norm": 0.71074378490448, "learning_rate": 2.0060566997800274e-06, "loss": 0.0777, "step": 10673 }, { "epoch": 1.7294232015554116, "grad_norm": 0.7402112483978271, "learning_rate": 2.0056280236809044e-06, "loss": 0.0999, "step": 10674 }, { "epoch": 1.7295852235904083, "grad_norm": 0.7987510561943054, "learning_rate": 2.005199362708367e-06, "loss": 0.0993, "step": 10675 }, { "epoch": 1.729747245625405, "grad_norm": 0.9736399054527283, "learning_rate": 2.004770716875533e-06, "loss": 0.1144, "step": 10676 }, { "epoch": 1.7299092676604018, "grad_norm": 0.7902863621711731, "learning_rate": 2.0043420861955155e-06, "loss": 0.0946, "step": 10677 }, { "epoch": 1.7300712896953985, "grad_norm": 0.8167175650596619, "learning_rate": 2.0039134706814303e-06, "loss": 0.1059, "step": 10678 }, { "epoch": 1.7302333117303954, "grad_norm": 0.8277512788772583, "learning_rate": 2.003484870346393e-06, "loss": 0.1024, "step": 10679 }, { "epoch": 1.7303953337653921, "grad_norm": 0.7947298884391785, "learning_rate": 2.0030562852035175e-06, "loss": 0.1047, "step": 10680 }, { "epoch": 1.7305573558003888, "grad_norm": 0.7207613587379456, "learning_rate": 2.0026277152659166e-06, "loss": 0.0903, "step": 10681 }, { "epoch": 1.7307193778353858, "grad_norm": 0.8288989663124084, "learning_rate": 2.0021991605467043e-06, "loss": 0.0966, "step": 10682 }, { "epoch": 1.7308813998703823, "grad_norm": 0.7648212909698486, "learning_rate": 2.0017706210589925e-06, "loss": 0.0918, "step": 10683 }, { "epoch": 1.7310434219053792, "grad_norm": 0.7417365312576294, "learning_rate": 2.0013420968158944e-06, "loss": 0.0972, "step": 10684 }, { "epoch": 1.731205443940376, "grad_norm": 0.8182329535484314, "learning_rate": 2.000913587830521e-06, "loss": 0.1003, "step": 10685 }, { "epoch": 1.7313674659753726, "grad_norm": 0.8272933959960938, "learning_rate": 2.0004850941159847e-06, "loss": 0.1045, "step": 10686 }, { "epoch": 1.7315294880103695, "grad_norm": 0.8044247031211853, "learning_rate": 2.0000566156853957e-06, "loss": 0.0977, "step": 10687 }, { "epoch": 1.731691510045366, "grad_norm": 0.7936481237411499, "learning_rate": 1.999628152551863e-06, "loss": 0.0906, "step": 10688 }, { "epoch": 1.731853532080363, "grad_norm": 0.8275524973869324, "learning_rate": 1.9991997047285e-06, "loss": 0.0972, "step": 10689 }, { "epoch": 1.7320155541153597, "grad_norm": 0.7702337503433228, "learning_rate": 1.9987712722284132e-06, "loss": 0.0968, "step": 10690 }, { "epoch": 1.7321775761503564, "grad_norm": 0.6895307302474976, "learning_rate": 1.9983428550647126e-06, "loss": 0.0807, "step": 10691 }, { "epoch": 1.7323395981853533, "grad_norm": 0.8178178668022156, "learning_rate": 1.9979144532505064e-06, "loss": 0.0974, "step": 10692 }, { "epoch": 1.7325016202203498, "grad_norm": 0.7914819717407227, "learning_rate": 1.997486066798903e-06, "loss": 0.0964, "step": 10693 }, { "epoch": 1.7326636422553467, "grad_norm": 0.8385084867477417, "learning_rate": 1.9970576957230094e-06, "loss": 0.0965, "step": 10694 }, { "epoch": 1.7328256642903435, "grad_norm": 0.9208899140357971, "learning_rate": 1.9966293400359343e-06, "loss": 0.1082, "step": 10695 }, { "epoch": 1.7329876863253402, "grad_norm": 0.9039163589477539, "learning_rate": 1.996200999750783e-06, "loss": 0.1038, "step": 10696 }, { "epoch": 1.733149708360337, "grad_norm": 0.7650443911552429, "learning_rate": 1.9957726748806608e-06, "loss": 0.0898, "step": 10697 }, { "epoch": 1.7333117303953338, "grad_norm": 0.8120889663696289, "learning_rate": 1.995344365438676e-06, "loss": 0.1004, "step": 10698 }, { "epoch": 1.7334737524303305, "grad_norm": 0.9312616586685181, "learning_rate": 1.9949160714379314e-06, "loss": 0.1158, "step": 10699 }, { "epoch": 1.7336357744653272, "grad_norm": 0.8903511166572571, "learning_rate": 1.994487792891534e-06, "loss": 0.1053, "step": 10700 }, { "epoch": 1.733797796500324, "grad_norm": 0.8750895261764526, "learning_rate": 1.9940595298125855e-06, "loss": 0.1043, "step": 10701 }, { "epoch": 1.7339598185353209, "grad_norm": 0.8411079049110413, "learning_rate": 1.993631282214191e-06, "loss": 0.1051, "step": 10702 }, { "epoch": 1.7341218405703176, "grad_norm": 0.7505423426628113, "learning_rate": 1.993203050109454e-06, "loss": 0.1055, "step": 10703 }, { "epoch": 1.7342838626053143, "grad_norm": 0.7114359736442566, "learning_rate": 1.992774833511478e-06, "loss": 0.0923, "step": 10704 }, { "epoch": 1.7344458846403112, "grad_norm": 1.49692964553833, "learning_rate": 1.9923466324333646e-06, "loss": 0.1233, "step": 10705 }, { "epoch": 1.7346079066753077, "grad_norm": 0.8229801654815674, "learning_rate": 1.991918446888216e-06, "loss": 0.1105, "step": 10706 }, { "epoch": 1.7347699287103047, "grad_norm": 0.7137386202812195, "learning_rate": 1.991490276889132e-06, "loss": 0.0934, "step": 10707 }, { "epoch": 1.7349319507453014, "grad_norm": 0.8458231687545776, "learning_rate": 1.9910621224492154e-06, "loss": 0.1179, "step": 10708 }, { "epoch": 1.735093972780298, "grad_norm": 0.8449655771255493, "learning_rate": 1.990633983581567e-06, "loss": 0.1009, "step": 10709 }, { "epoch": 1.735255994815295, "grad_norm": 0.8100825548171997, "learning_rate": 1.9902058602992856e-06, "loss": 0.0982, "step": 10710 }, { "epoch": 1.7354180168502915, "grad_norm": 0.6856768131256104, "learning_rate": 1.9897777526154717e-06, "loss": 0.0872, "step": 10711 }, { "epoch": 1.7355800388852884, "grad_norm": 0.8536728620529175, "learning_rate": 1.989349660543222e-06, "loss": 0.1053, "step": 10712 }, { "epoch": 1.7357420609202852, "grad_norm": 0.6712336540222168, "learning_rate": 1.988921584095639e-06, "loss": 0.08, "step": 10713 }, { "epoch": 1.7359040829552819, "grad_norm": 0.8158391118049622, "learning_rate": 1.988493523285818e-06, "loss": 0.0878, "step": 10714 }, { "epoch": 1.7360661049902788, "grad_norm": 0.8350120782852173, "learning_rate": 1.988065478126858e-06, "loss": 0.0983, "step": 10715 }, { "epoch": 1.7362281270252753, "grad_norm": 0.8612555265426636, "learning_rate": 1.9876374486318545e-06, "loss": 0.1114, "step": 10716 }, { "epoch": 1.7363901490602722, "grad_norm": 0.8192741274833679, "learning_rate": 1.9872094348139054e-06, "loss": 0.1062, "step": 10717 }, { "epoch": 1.736552171095269, "grad_norm": 0.8786032795906067, "learning_rate": 1.9867814366861075e-06, "loss": 0.1065, "step": 10718 }, { "epoch": 1.7367141931302656, "grad_norm": 0.7366222739219666, "learning_rate": 1.986353454261555e-06, "loss": 0.0918, "step": 10719 }, { "epoch": 1.7368762151652626, "grad_norm": 0.7951706051826477, "learning_rate": 1.9859254875533435e-06, "loss": 0.0903, "step": 10720 }, { "epoch": 1.737038237200259, "grad_norm": 0.8006754517555237, "learning_rate": 1.985497536574568e-06, "loss": 0.0967, "step": 10721 }, { "epoch": 1.737200259235256, "grad_norm": 0.8424306511878967, "learning_rate": 1.9850696013383236e-06, "loss": 0.103, "step": 10722 }, { "epoch": 1.7373622812702527, "grad_norm": 0.7816236615180969, "learning_rate": 1.9846416818577025e-06, "loss": 0.0935, "step": 10723 }, { "epoch": 1.7375243033052494, "grad_norm": 0.9311386942863464, "learning_rate": 1.9842137781458e-06, "loss": 0.1199, "step": 10724 }, { "epoch": 1.7376863253402464, "grad_norm": 0.7541922926902771, "learning_rate": 1.983785890215707e-06, "loss": 0.0901, "step": 10725 }, { "epoch": 1.737848347375243, "grad_norm": 0.842083215713501, "learning_rate": 1.9833580180805155e-06, "loss": 0.1022, "step": 10726 }, { "epoch": 1.7380103694102398, "grad_norm": 0.7754312753677368, "learning_rate": 1.98293016175332e-06, "loss": 0.104, "step": 10727 }, { "epoch": 1.7381723914452365, "grad_norm": 0.7790486216545105, "learning_rate": 1.9825023212472095e-06, "loss": 0.0973, "step": 10728 }, { "epoch": 1.7383344134802332, "grad_norm": 0.9156441688537598, "learning_rate": 1.9820744965752754e-06, "loss": 0.1056, "step": 10729 }, { "epoch": 1.7384964355152301, "grad_norm": 0.7913225293159485, "learning_rate": 1.9816466877506095e-06, "loss": 0.0946, "step": 10730 }, { "epoch": 1.7386584575502269, "grad_norm": 0.7693167328834534, "learning_rate": 1.981218894786299e-06, "loss": 0.0908, "step": 10731 }, { "epoch": 1.7388204795852236, "grad_norm": 0.7898248434066772, "learning_rate": 1.9807911176954357e-06, "loss": 0.0949, "step": 10732 }, { "epoch": 1.7389825016202205, "grad_norm": 0.7249963283538818, "learning_rate": 1.980363356491108e-06, "loss": 0.0939, "step": 10733 }, { "epoch": 1.739144523655217, "grad_norm": 0.8590476512908936, "learning_rate": 1.9799356111864036e-06, "loss": 0.1106, "step": 10734 }, { "epoch": 1.739306545690214, "grad_norm": 0.8085042834281921, "learning_rate": 1.979507881794412e-06, "loss": 0.1028, "step": 10735 }, { "epoch": 1.7394685677252106, "grad_norm": 0.801541805267334, "learning_rate": 1.979080168328218e-06, "loss": 0.0979, "step": 10736 }, { "epoch": 1.7396305897602073, "grad_norm": 0.8037110567092896, "learning_rate": 1.978652470800911e-06, "loss": 0.1054, "step": 10737 }, { "epoch": 1.7397926117952043, "grad_norm": 0.830059826374054, "learning_rate": 1.9782247892255767e-06, "loss": 0.0985, "step": 10738 }, { "epoch": 1.7399546338302008, "grad_norm": 0.8223294019699097, "learning_rate": 1.977797123615302e-06, "loss": 0.0968, "step": 10739 }, { "epoch": 1.7401166558651977, "grad_norm": 0.8012174367904663, "learning_rate": 1.9773694739831702e-06, "loss": 0.0944, "step": 10740 }, { "epoch": 1.7402786779001944, "grad_norm": 0.9736762046813965, "learning_rate": 1.9769418403422685e-06, "loss": 0.1149, "step": 10741 }, { "epoch": 1.7404406999351911, "grad_norm": 0.8288683891296387, "learning_rate": 1.976514222705681e-06, "loss": 0.0896, "step": 10742 }, { "epoch": 1.740602721970188, "grad_norm": 0.908768892288208, "learning_rate": 1.976086621086491e-06, "loss": 0.1, "step": 10743 }, { "epoch": 1.7407647440051845, "grad_norm": 0.8298162221908569, "learning_rate": 1.975659035497783e-06, "loss": 0.0956, "step": 10744 }, { "epoch": 1.7409267660401815, "grad_norm": 0.8755427598953247, "learning_rate": 1.9752314659526383e-06, "loss": 0.1017, "step": 10745 }, { "epoch": 1.7410887880751782, "grad_norm": 0.7467525005340576, "learning_rate": 1.9748039124641426e-06, "loss": 0.0907, "step": 10746 }, { "epoch": 1.741250810110175, "grad_norm": 0.7956831455230713, "learning_rate": 1.974376375045375e-06, "loss": 0.0944, "step": 10747 }, { "epoch": 1.7414128321451718, "grad_norm": 0.7522971034049988, "learning_rate": 1.9739488537094197e-06, "loss": 0.0952, "step": 10748 }, { "epoch": 1.7415748541801686, "grad_norm": 0.8951601982116699, "learning_rate": 1.973521348469355e-06, "loss": 0.1079, "step": 10749 }, { "epoch": 1.7417368762151653, "grad_norm": 0.8221662640571594, "learning_rate": 1.973093859338263e-06, "loss": 0.098, "step": 10750 }, { "epoch": 1.741898898250162, "grad_norm": 0.8099002242088318, "learning_rate": 1.972666386329225e-06, "loss": 0.0924, "step": 10751 }, { "epoch": 1.7420609202851587, "grad_norm": 0.8443790674209595, "learning_rate": 1.9722389294553188e-06, "loss": 0.1002, "step": 10752 }, { "epoch": 1.7422229423201556, "grad_norm": 0.777952253818512, "learning_rate": 1.9718114887296245e-06, "loss": 0.0979, "step": 10753 }, { "epoch": 1.7423849643551523, "grad_norm": 0.7357156276702881, "learning_rate": 1.9713840641652206e-06, "loss": 0.093, "step": 10754 }, { "epoch": 1.742546986390149, "grad_norm": 0.8331161141395569, "learning_rate": 1.970956655775184e-06, "loss": 0.1058, "step": 10755 }, { "epoch": 1.742709008425146, "grad_norm": 1.1018832921981812, "learning_rate": 1.970529263572594e-06, "loss": 0.1138, "step": 10756 }, { "epoch": 1.7428710304601425, "grad_norm": 0.8057063221931458, "learning_rate": 1.9701018875705277e-06, "loss": 0.1083, "step": 10757 }, { "epoch": 1.7430330524951394, "grad_norm": 0.8819854259490967, "learning_rate": 1.9696745277820613e-06, "loss": 0.1038, "step": 10758 }, { "epoch": 1.7431950745301361, "grad_norm": 0.7874314188957214, "learning_rate": 1.969247184220271e-06, "loss": 0.0942, "step": 10759 }, { "epoch": 1.7433570965651328, "grad_norm": 0.8394420742988586, "learning_rate": 1.9688198568982316e-06, "loss": 0.1002, "step": 10760 }, { "epoch": 1.7435191186001298, "grad_norm": 0.8666523694992065, "learning_rate": 1.9683925458290196e-06, "loss": 0.1118, "step": 10761 }, { "epoch": 1.7436811406351262, "grad_norm": 0.8129344582557678, "learning_rate": 1.96796525102571e-06, "loss": 0.1116, "step": 10762 }, { "epoch": 1.7438431626701232, "grad_norm": 0.7237949967384338, "learning_rate": 1.9675379725013752e-06, "loss": 0.0892, "step": 10763 }, { "epoch": 1.74400518470512, "grad_norm": 0.757645845413208, "learning_rate": 1.96711071026909e-06, "loss": 0.0978, "step": 10764 }, { "epoch": 1.7441672067401166, "grad_norm": 0.7295411825180054, "learning_rate": 1.9666834643419276e-06, "loss": 0.087, "step": 10765 }, { "epoch": 1.7443292287751135, "grad_norm": 0.8933332562446594, "learning_rate": 1.9662562347329613e-06, "loss": 0.1023, "step": 10766 }, { "epoch": 1.74449125081011, "grad_norm": 0.8272913098335266, "learning_rate": 1.9658290214552616e-06, "loss": 0.1089, "step": 10767 }, { "epoch": 1.744653272845107, "grad_norm": 0.8127201795578003, "learning_rate": 1.9654018245219024e-06, "loss": 0.0923, "step": 10768 }, { "epoch": 1.7448152948801037, "grad_norm": 0.7642537951469421, "learning_rate": 1.9649746439459523e-06, "loss": 0.0996, "step": 10769 }, { "epoch": 1.7449773169151004, "grad_norm": 0.7745651602745056, "learning_rate": 1.9645474797404838e-06, "loss": 0.0977, "step": 10770 }, { "epoch": 1.7451393389500973, "grad_norm": 0.7775667905807495, "learning_rate": 1.964120331918567e-06, "loss": 0.0981, "step": 10771 }, { "epoch": 1.7453013609850938, "grad_norm": 0.776843786239624, "learning_rate": 1.963693200493271e-06, "loss": 0.1047, "step": 10772 }, { "epoch": 1.7454633830200907, "grad_norm": 0.6424158215522766, "learning_rate": 1.963266085477665e-06, "loss": 0.0803, "step": 10773 }, { "epoch": 1.7456254050550875, "grad_norm": 0.8612257242202759, "learning_rate": 1.962838986884818e-06, "loss": 0.1058, "step": 10774 }, { "epoch": 1.7457874270900842, "grad_norm": 0.8193663358688354, "learning_rate": 1.9624119047277987e-06, "loss": 0.0973, "step": 10775 }, { "epoch": 1.745949449125081, "grad_norm": 0.7953779101371765, "learning_rate": 1.9619848390196734e-06, "loss": 0.1113, "step": 10776 }, { "epoch": 1.7461114711600778, "grad_norm": 0.8136135339736938, "learning_rate": 1.961557789773511e-06, "loss": 0.1051, "step": 10777 }, { "epoch": 1.7462734931950745, "grad_norm": 0.7347664833068848, "learning_rate": 1.9611307570023766e-06, "loss": 0.0897, "step": 10778 }, { "epoch": 1.7464355152300715, "grad_norm": 0.6952412128448486, "learning_rate": 1.9607037407193365e-06, "loss": 0.0851, "step": 10779 }, { "epoch": 1.746597537265068, "grad_norm": 0.8789510726928711, "learning_rate": 1.960276740937458e-06, "loss": 0.1074, "step": 10780 }, { "epoch": 1.7467595593000649, "grad_norm": 0.8486936092376709, "learning_rate": 1.9598497576698043e-06, "loss": 0.0999, "step": 10781 }, { "epoch": 1.7469215813350616, "grad_norm": 0.8231163620948792, "learning_rate": 1.959422790929441e-06, "loss": 0.1075, "step": 10782 }, { "epoch": 1.7470836033700583, "grad_norm": 0.7864092588424683, "learning_rate": 1.9589958407294317e-06, "loss": 0.0967, "step": 10783 }, { "epoch": 1.7472456254050552, "grad_norm": 0.7766144871711731, "learning_rate": 1.9585689070828413e-06, "loss": 0.0872, "step": 10784 }, { "epoch": 1.7474076474400517, "grad_norm": 0.9048768877983093, "learning_rate": 1.9581419900027317e-06, "loss": 0.101, "step": 10785 }, { "epoch": 1.7475696694750487, "grad_norm": 0.7717433571815491, "learning_rate": 1.9577150895021664e-06, "loss": 0.0954, "step": 10786 }, { "epoch": 1.7477316915100454, "grad_norm": 0.8796309232711792, "learning_rate": 1.9572882055942065e-06, "loss": 0.101, "step": 10787 }, { "epoch": 1.747893713545042, "grad_norm": 0.9133003354072571, "learning_rate": 1.9568613382919142e-06, "loss": 0.1146, "step": 10788 }, { "epoch": 1.748055735580039, "grad_norm": 0.7639305591583252, "learning_rate": 1.9564344876083504e-06, "loss": 0.0979, "step": 10789 }, { "epoch": 1.7482177576150355, "grad_norm": 0.7829166650772095, "learning_rate": 1.9560076535565766e-06, "loss": 0.1073, "step": 10790 }, { "epoch": 1.7483797796500324, "grad_norm": 0.7283692359924316, "learning_rate": 1.955580836149652e-06, "loss": 0.0943, "step": 10791 }, { "epoch": 1.7485418016850292, "grad_norm": 0.886214017868042, "learning_rate": 1.9551540354006366e-06, "loss": 0.1067, "step": 10792 }, { "epoch": 1.7487038237200259, "grad_norm": 0.7852003574371338, "learning_rate": 1.954727251322588e-06, "loss": 0.0953, "step": 10793 }, { "epoch": 1.7488658457550228, "grad_norm": 0.8498847484588623, "learning_rate": 1.954300483928567e-06, "loss": 0.1003, "step": 10794 }, { "epoch": 1.7490278677900193, "grad_norm": 0.8580987453460693, "learning_rate": 1.9538737332316304e-06, "loss": 0.1139, "step": 10795 }, { "epoch": 1.7491898898250162, "grad_norm": 0.800059974193573, "learning_rate": 1.953446999244836e-06, "loss": 0.0961, "step": 10796 }, { "epoch": 1.749351911860013, "grad_norm": 0.7808043956756592, "learning_rate": 1.953020281981241e-06, "loss": 0.0931, "step": 10797 }, { "epoch": 1.7495139338950096, "grad_norm": 0.7637292146682739, "learning_rate": 1.9525935814539e-06, "loss": 0.0983, "step": 10798 }, { "epoch": 1.7496759559300066, "grad_norm": 0.7604991793632507, "learning_rate": 1.952166897675873e-06, "loss": 0.0898, "step": 10799 }, { "epoch": 1.7498379779650033, "grad_norm": 0.7365829944610596, "learning_rate": 1.951740230660212e-06, "loss": 0.0953, "step": 10800 }, { "epoch": 1.75, "grad_norm": 0.8294869065284729, "learning_rate": 1.951313580419974e-06, "loss": 0.1092, "step": 10801 }, { "epoch": 1.7501620220349967, "grad_norm": 0.8134753704071045, "learning_rate": 1.950886946968212e-06, "loss": 0.1008, "step": 10802 }, { "epoch": 1.7503240440699934, "grad_norm": 0.967177152633667, "learning_rate": 1.9504603303179805e-06, "loss": 0.1083, "step": 10803 }, { "epoch": 1.7504860661049904, "grad_norm": 0.8222710490226746, "learning_rate": 1.9500337304823333e-06, "loss": 0.0935, "step": 10804 }, { "epoch": 1.750648088139987, "grad_norm": 0.7220136523246765, "learning_rate": 1.949607147474323e-06, "loss": 0.0931, "step": 10805 }, { "epoch": 1.7508101101749838, "grad_norm": 0.6608802676200867, "learning_rate": 1.9491805813070025e-06, "loss": 0.0843, "step": 10806 }, { "epoch": 1.7509721322099807, "grad_norm": 0.816785454750061, "learning_rate": 1.948754031993422e-06, "loss": 0.0995, "step": 10807 }, { "epoch": 1.7511341542449772, "grad_norm": 0.8732054829597473, "learning_rate": 1.948327499546635e-06, "loss": 0.1105, "step": 10808 }, { "epoch": 1.7512961762799741, "grad_norm": 0.9379077553749084, "learning_rate": 1.9479009839796913e-06, "loss": 0.1121, "step": 10809 }, { "epoch": 1.7514581983149708, "grad_norm": 0.8605905771255493, "learning_rate": 1.947474485305642e-06, "loss": 0.0981, "step": 10810 }, { "epoch": 1.7516202203499676, "grad_norm": 0.8368394374847412, "learning_rate": 1.947048003537536e-06, "loss": 0.1033, "step": 10811 }, { "epoch": 1.7517822423849645, "grad_norm": 0.7215535640716553, "learning_rate": 1.9466215386884223e-06, "loss": 0.0826, "step": 10812 }, { "epoch": 1.751944264419961, "grad_norm": 0.8332518339157104, "learning_rate": 1.9461950907713517e-06, "loss": 0.1041, "step": 10813 }, { "epoch": 1.752106286454958, "grad_norm": 0.9338610768318176, "learning_rate": 1.9457686597993704e-06, "loss": 0.1167, "step": 10814 }, { "epoch": 1.7522683084899546, "grad_norm": 0.8345838785171509, "learning_rate": 1.9453422457855274e-06, "loss": 0.1062, "step": 10815 }, { "epoch": 1.7524303305249513, "grad_norm": 0.7421085834503174, "learning_rate": 1.9449158487428688e-06, "loss": 0.0859, "step": 10816 }, { "epoch": 1.7525923525599483, "grad_norm": 0.9578739404678345, "learning_rate": 1.9444894686844417e-06, "loss": 0.1091, "step": 10817 }, { "epoch": 1.7527543745949448, "grad_norm": 0.819701075553894, "learning_rate": 1.9440631056232926e-06, "loss": 0.1051, "step": 10818 }, { "epoch": 1.7529163966299417, "grad_norm": 0.8500429391860962, "learning_rate": 1.943636759572468e-06, "loss": 0.1019, "step": 10819 }, { "epoch": 1.7530784186649384, "grad_norm": 0.8731544017791748, "learning_rate": 1.9432104305450117e-06, "loss": 0.1023, "step": 10820 }, { "epoch": 1.7532404406999351, "grad_norm": 0.8563147783279419, "learning_rate": 1.9427841185539693e-06, "loss": 0.1049, "step": 10821 }, { "epoch": 1.753402462734932, "grad_norm": 0.8676608800888062, "learning_rate": 1.942357823612383e-06, "loss": 0.1079, "step": 10822 }, { "epoch": 1.7535644847699285, "grad_norm": 0.870190441608429, "learning_rate": 1.941931545733299e-06, "loss": 0.1082, "step": 10823 }, { "epoch": 1.7537265068049255, "grad_norm": 0.8860259652137756, "learning_rate": 1.9415052849297585e-06, "loss": 0.1054, "step": 10824 }, { "epoch": 1.7538885288399222, "grad_norm": 1.0227184295654297, "learning_rate": 1.941079041214806e-06, "loss": 0.1208, "step": 10825 }, { "epoch": 1.754050550874919, "grad_norm": 0.787338137626648, "learning_rate": 1.9406528146014815e-06, "loss": 0.1075, "step": 10826 }, { "epoch": 1.7542125729099158, "grad_norm": 0.9310184121131897, "learning_rate": 1.9402266051028263e-06, "loss": 0.1064, "step": 10827 }, { "epoch": 1.7543745949449125, "grad_norm": 0.8110270500183105, "learning_rate": 1.939800412731884e-06, "loss": 0.1018, "step": 10828 }, { "epoch": 1.7545366169799093, "grad_norm": 0.8377658128738403, "learning_rate": 1.9393742375016926e-06, "loss": 0.1095, "step": 10829 }, { "epoch": 1.7546986390149062, "grad_norm": 0.8631830811500549, "learning_rate": 1.9389480794252933e-06, "loss": 0.1097, "step": 10830 }, { "epoch": 1.7548606610499027, "grad_norm": 0.76842200756073, "learning_rate": 1.9385219385157244e-06, "loss": 0.0994, "step": 10831 }, { "epoch": 1.7550226830848996, "grad_norm": 0.9799662828445435, "learning_rate": 1.9380958147860254e-06, "loss": 0.1145, "step": 10832 }, { "epoch": 1.7551847051198963, "grad_norm": 0.7072003483772278, "learning_rate": 1.937669708249235e-06, "loss": 0.0899, "step": 10833 }, { "epoch": 1.755346727154893, "grad_norm": 0.8198095560073853, "learning_rate": 1.937243618918391e-06, "loss": 0.1064, "step": 10834 }, { "epoch": 1.75550874918989, "grad_norm": 0.8468987941741943, "learning_rate": 1.9368175468065305e-06, "loss": 0.1043, "step": 10835 }, { "epoch": 1.7556707712248865, "grad_norm": 0.9012013077735901, "learning_rate": 1.936391491926689e-06, "loss": 0.107, "step": 10836 }, { "epoch": 1.7558327932598834, "grad_norm": 0.7513500452041626, "learning_rate": 1.9359654542919054e-06, "loss": 0.0948, "step": 10837 }, { "epoch": 1.75599481529488, "grad_norm": 0.7412696480751038, "learning_rate": 1.9355394339152133e-06, "loss": 0.0926, "step": 10838 }, { "epoch": 1.7561568373298768, "grad_norm": 0.8077243566513062, "learning_rate": 1.9351134308096493e-06, "loss": 0.0994, "step": 10839 }, { "epoch": 1.7563188593648738, "grad_norm": 0.7305166721343994, "learning_rate": 1.9346874449882465e-06, "loss": 0.0951, "step": 10840 }, { "epoch": 1.7564808813998702, "grad_norm": 0.8272958993911743, "learning_rate": 1.9342614764640392e-06, "loss": 0.1083, "step": 10841 }, { "epoch": 1.7566429034348672, "grad_norm": 0.7705273628234863, "learning_rate": 1.9338355252500624e-06, "loss": 0.095, "step": 10842 }, { "epoch": 1.7568049254698639, "grad_norm": 0.754991352558136, "learning_rate": 1.933409591359349e-06, "loss": 0.1042, "step": 10843 }, { "epoch": 1.7569669475048606, "grad_norm": 0.8502324819564819, "learning_rate": 1.93298367480493e-06, "loss": 0.0999, "step": 10844 }, { "epoch": 1.7571289695398575, "grad_norm": 0.7320595383644104, "learning_rate": 1.9325577755998397e-06, "loss": 0.0893, "step": 10845 }, { "epoch": 1.757290991574854, "grad_norm": 0.7977878451347351, "learning_rate": 1.932131893757107e-06, "loss": 0.0983, "step": 10846 }, { "epoch": 1.757453013609851, "grad_norm": 0.7108481526374817, "learning_rate": 1.9317060292897643e-06, "loss": 0.0906, "step": 10847 }, { "epoch": 1.7576150356448477, "grad_norm": 0.7112817764282227, "learning_rate": 1.9312801822108425e-06, "loss": 0.0883, "step": 10848 }, { "epoch": 1.7577770576798444, "grad_norm": 0.7918272614479065, "learning_rate": 1.9308543525333707e-06, "loss": 0.0957, "step": 10849 }, { "epoch": 1.7579390797148413, "grad_norm": 0.8627058267593384, "learning_rate": 1.9304285402703775e-06, "loss": 0.1079, "step": 10850 }, { "epoch": 1.758101101749838, "grad_norm": 0.7404596209526062, "learning_rate": 1.9300027454348932e-06, "loss": 0.0908, "step": 10851 }, { "epoch": 1.7582631237848347, "grad_norm": 0.879859447479248, "learning_rate": 1.929576968039946e-06, "loss": 0.1072, "step": 10852 }, { "epoch": 1.7584251458198314, "grad_norm": 0.8356106281280518, "learning_rate": 1.9291512080985626e-06, "loss": 0.105, "step": 10853 }, { "epoch": 1.7585871678548282, "grad_norm": 0.8642502427101135, "learning_rate": 1.928725465623772e-06, "loss": 0.1099, "step": 10854 }, { "epoch": 1.758749189889825, "grad_norm": 0.9117339849472046, "learning_rate": 1.928299740628598e-06, "loss": 0.113, "step": 10855 }, { "epoch": 1.7589112119248218, "grad_norm": 0.8020870685577393, "learning_rate": 1.927874033126069e-06, "loss": 0.1042, "step": 10856 }, { "epoch": 1.7590732339598185, "grad_norm": 0.8990389704704285, "learning_rate": 1.9274483431292107e-06, "loss": 0.109, "step": 10857 }, { "epoch": 1.7592352559948155, "grad_norm": 0.9079889059066772, "learning_rate": 1.927022670651047e-06, "loss": 0.1068, "step": 10858 }, { "epoch": 1.759397278029812, "grad_norm": 0.928896427154541, "learning_rate": 1.9265970157046037e-06, "loss": 0.1076, "step": 10859 }, { "epoch": 1.7595593000648089, "grad_norm": 0.936068594455719, "learning_rate": 1.9261713783029024e-06, "loss": 0.1145, "step": 10860 }, { "epoch": 1.7597213220998056, "grad_norm": 0.791902482509613, "learning_rate": 1.9257457584589697e-06, "loss": 0.1006, "step": 10861 }, { "epoch": 1.7598833441348023, "grad_norm": 0.7125447988510132, "learning_rate": 1.9253201561858266e-06, "loss": 0.0929, "step": 10862 }, { "epoch": 1.7600453661697992, "grad_norm": 0.7819201946258545, "learning_rate": 1.9248945714964967e-06, "loss": 0.0972, "step": 10863 }, { "epoch": 1.7602073882047957, "grad_norm": 0.6780325174331665, "learning_rate": 1.924469004404001e-06, "loss": 0.0861, "step": 10864 }, { "epoch": 1.7603694102397927, "grad_norm": 0.755989670753479, "learning_rate": 1.92404345492136e-06, "loss": 0.0954, "step": 10865 }, { "epoch": 1.7605314322747894, "grad_norm": 0.6931602358818054, "learning_rate": 1.9236179230615967e-06, "loss": 0.0822, "step": 10866 }, { "epoch": 1.760693454309786, "grad_norm": 0.7863826155662537, "learning_rate": 1.9231924088377296e-06, "loss": 0.0948, "step": 10867 }, { "epoch": 1.760855476344783, "grad_norm": 0.6952497959136963, "learning_rate": 1.922766912262779e-06, "loss": 0.0819, "step": 10868 }, { "epoch": 1.7610174983797795, "grad_norm": 0.6934300065040588, "learning_rate": 1.922341433349764e-06, "loss": 0.0829, "step": 10869 }, { "epoch": 1.7611795204147764, "grad_norm": 0.7480776906013489, "learning_rate": 1.921915972111703e-06, "loss": 0.0958, "step": 10870 }, { "epoch": 1.7613415424497731, "grad_norm": 0.8322224020957947, "learning_rate": 1.9214905285616147e-06, "loss": 0.1, "step": 10871 }, { "epoch": 1.7615035644847699, "grad_norm": 0.8011393547058105, "learning_rate": 1.9210651027125164e-06, "loss": 0.1024, "step": 10872 }, { "epoch": 1.7616655865197668, "grad_norm": 0.8309357166290283, "learning_rate": 1.9206396945774246e-06, "loss": 0.1033, "step": 10873 }, { "epoch": 1.7618276085547635, "grad_norm": 0.7737796306610107, "learning_rate": 1.9202143041693554e-06, "loss": 0.0988, "step": 10874 }, { "epoch": 1.7619896305897602, "grad_norm": 0.7730358242988586, "learning_rate": 1.919788931501327e-06, "loss": 0.0998, "step": 10875 }, { "epoch": 1.762151652624757, "grad_norm": 0.8785960078239441, "learning_rate": 1.919363576586352e-06, "loss": 0.1073, "step": 10876 }, { "epoch": 1.7623136746597536, "grad_norm": 0.8502126932144165, "learning_rate": 1.918938239437447e-06, "loss": 0.1092, "step": 10877 }, { "epoch": 1.7624756966947506, "grad_norm": 0.7489198446273804, "learning_rate": 1.918512920067626e-06, "loss": 0.0944, "step": 10878 }, { "epoch": 1.7626377187297473, "grad_norm": 0.7126352787017822, "learning_rate": 1.9180876184899015e-06, "loss": 0.0952, "step": 10879 }, { "epoch": 1.762799740764744, "grad_norm": 0.8675928711891174, "learning_rate": 1.9176623347172885e-06, "loss": 0.1067, "step": 10880 }, { "epoch": 1.762961762799741, "grad_norm": 0.820986807346344, "learning_rate": 1.9172370687627987e-06, "loss": 0.0972, "step": 10881 }, { "epoch": 1.7631237848347374, "grad_norm": 0.8926191329956055, "learning_rate": 1.9168118206394443e-06, "loss": 0.1056, "step": 10882 }, { "epoch": 1.7632858068697344, "grad_norm": 0.8152083158493042, "learning_rate": 1.9163865903602374e-06, "loss": 0.1041, "step": 10883 }, { "epoch": 1.763447828904731, "grad_norm": 0.837230920791626, "learning_rate": 1.915961377938187e-06, "loss": 0.1074, "step": 10884 }, { "epoch": 1.7636098509397278, "grad_norm": 0.8547905683517456, "learning_rate": 1.915536183386306e-06, "loss": 0.1056, "step": 10885 }, { "epoch": 1.7637718729747247, "grad_norm": 0.8433102965354919, "learning_rate": 1.9151110067176038e-06, "loss": 0.1013, "step": 10886 }, { "epoch": 1.7639338950097212, "grad_norm": 0.8947946429252625, "learning_rate": 1.9146858479450894e-06, "loss": 0.1166, "step": 10887 }, { "epoch": 1.7640959170447181, "grad_norm": 0.9684112668037415, "learning_rate": 1.914260707081771e-06, "loss": 0.1186, "step": 10888 }, { "epoch": 1.7642579390797148, "grad_norm": 0.7762362360954285, "learning_rate": 1.913835584140657e-06, "loss": 0.0967, "step": 10889 }, { "epoch": 1.7644199611147116, "grad_norm": 0.7626681923866272, "learning_rate": 1.913410479134757e-06, "loss": 0.0881, "step": 10890 }, { "epoch": 1.7645819831497085, "grad_norm": 0.7825966477394104, "learning_rate": 1.9129853920770763e-06, "loss": 0.0941, "step": 10891 }, { "epoch": 1.764744005184705, "grad_norm": 0.8394178748130798, "learning_rate": 1.9125603229806223e-06, "loss": 0.1112, "step": 10892 }, { "epoch": 1.764906027219702, "grad_norm": 0.7935326099395752, "learning_rate": 1.9121352718584006e-06, "loss": 0.1012, "step": 10893 }, { "epoch": 1.7650680492546986, "grad_norm": 0.8410793542861938, "learning_rate": 1.9117102387234165e-06, "loss": 0.1077, "step": 10894 }, { "epoch": 1.7652300712896953, "grad_norm": 0.7813910245895386, "learning_rate": 1.9112852235886757e-06, "loss": 0.0947, "step": 10895 }, { "epoch": 1.7653920933246923, "grad_norm": 0.81944340467453, "learning_rate": 1.910860226467183e-06, "loss": 0.0989, "step": 10896 }, { "epoch": 1.7655541153596888, "grad_norm": 0.7879267930984497, "learning_rate": 1.910435247371941e-06, "loss": 0.1065, "step": 10897 }, { "epoch": 1.7657161373946857, "grad_norm": 0.8651462197303772, "learning_rate": 1.910010286315953e-06, "loss": 0.1011, "step": 10898 }, { "epoch": 1.7658781594296824, "grad_norm": 0.7657998204231262, "learning_rate": 1.909585343312224e-06, "loss": 0.0925, "step": 10899 }, { "epoch": 1.7660401814646791, "grad_norm": 0.652816891670227, "learning_rate": 1.9091604183737546e-06, "loss": 0.0832, "step": 10900 }, { "epoch": 1.766202203499676, "grad_norm": 0.8381374478340149, "learning_rate": 1.9087355115135465e-06, "loss": 0.1008, "step": 10901 }, { "epoch": 1.7663642255346728, "grad_norm": 0.7962815761566162, "learning_rate": 1.9083106227446e-06, "loss": 0.0991, "step": 10902 }, { "epoch": 1.7665262475696695, "grad_norm": 0.7816174030303955, "learning_rate": 1.9078857520799167e-06, "loss": 0.098, "step": 10903 }, { "epoch": 1.7666882696046662, "grad_norm": 0.8635630011558533, "learning_rate": 1.907460899532497e-06, "loss": 0.0986, "step": 10904 }, { "epoch": 1.766850291639663, "grad_norm": 0.8344462513923645, "learning_rate": 1.9070360651153402e-06, "loss": 0.094, "step": 10905 }, { "epoch": 1.7670123136746598, "grad_norm": 0.6713218092918396, "learning_rate": 1.9066112488414445e-06, "loss": 0.0832, "step": 10906 }, { "epoch": 1.7671743357096565, "grad_norm": 0.8062579035758972, "learning_rate": 1.906186450723809e-06, "loss": 0.104, "step": 10907 }, { "epoch": 1.7673363577446533, "grad_norm": 0.8023903965950012, "learning_rate": 1.90576167077543e-06, "loss": 0.091, "step": 10908 }, { "epoch": 1.7674983797796502, "grad_norm": 0.739756166934967, "learning_rate": 1.9053369090093065e-06, "loss": 0.0819, "step": 10909 }, { "epoch": 1.7676604018146467, "grad_norm": 0.9430824518203735, "learning_rate": 1.904912165438435e-06, "loss": 0.1085, "step": 10910 }, { "epoch": 1.7678224238496436, "grad_norm": 1.0915939807891846, "learning_rate": 1.9044874400758106e-06, "loss": 0.12, "step": 10911 }, { "epoch": 1.7679844458846403, "grad_norm": 0.6942973136901855, "learning_rate": 1.9040627329344296e-06, "loss": 0.0896, "step": 10912 }, { "epoch": 1.768146467919637, "grad_norm": 0.8167296648025513, "learning_rate": 1.9036380440272861e-06, "loss": 0.0949, "step": 10913 }, { "epoch": 1.768308489954634, "grad_norm": 0.8740625381469727, "learning_rate": 1.9032133733673764e-06, "loss": 0.0991, "step": 10914 }, { "epoch": 1.7684705119896305, "grad_norm": 0.9350792169570923, "learning_rate": 1.9027887209676925e-06, "loss": 0.1148, "step": 10915 }, { "epoch": 1.7686325340246274, "grad_norm": 0.7898386716842651, "learning_rate": 1.9023640868412297e-06, "loss": 0.0929, "step": 10916 }, { "epoch": 1.768794556059624, "grad_norm": 0.7293332815170288, "learning_rate": 1.901939471000978e-06, "loss": 0.0902, "step": 10917 }, { "epoch": 1.7689565780946208, "grad_norm": 0.8032740950584412, "learning_rate": 1.9015148734599317e-06, "loss": 0.097, "step": 10918 }, { "epoch": 1.7691186001296177, "grad_norm": 0.7515488266944885, "learning_rate": 1.9010902942310827e-06, "loss": 0.1018, "step": 10919 }, { "epoch": 1.7692806221646142, "grad_norm": 0.7839230895042419, "learning_rate": 1.900665733327421e-06, "loss": 0.0935, "step": 10920 }, { "epoch": 1.7694426441996112, "grad_norm": 0.8209660649299622, "learning_rate": 1.9002411907619372e-06, "loss": 0.1026, "step": 10921 }, { "epoch": 1.7696046662346079, "grad_norm": 0.7910726070404053, "learning_rate": 1.899816666547621e-06, "loss": 0.0966, "step": 10922 }, { "epoch": 1.7697666882696046, "grad_norm": 0.8440234065055847, "learning_rate": 1.8993921606974636e-06, "loss": 0.1055, "step": 10923 }, { "epoch": 1.7699287103046015, "grad_norm": 0.9682288765907288, "learning_rate": 1.8989676732244522e-06, "loss": 0.1216, "step": 10924 }, { "epoch": 1.7700907323395982, "grad_norm": 0.8367732763290405, "learning_rate": 1.8985432041415758e-06, "loss": 0.1072, "step": 10925 }, { "epoch": 1.770252754374595, "grad_norm": 1.015958547592163, "learning_rate": 1.8981187534618217e-06, "loss": 0.1244, "step": 10926 }, { "epoch": 1.7704147764095917, "grad_norm": 1.0870450735092163, "learning_rate": 1.8976943211981764e-06, "loss": 0.1068, "step": 10927 }, { "epoch": 1.7705767984445884, "grad_norm": 0.8047299981117249, "learning_rate": 1.8972699073636283e-06, "loss": 0.1014, "step": 10928 }, { "epoch": 1.7707388204795853, "grad_norm": 0.7418354153633118, "learning_rate": 1.896845511971162e-06, "loss": 0.0959, "step": 10929 }, { "epoch": 1.770900842514582, "grad_norm": 0.6954073905944824, "learning_rate": 1.8964211350337637e-06, "loss": 0.0887, "step": 10930 }, { "epoch": 1.7710628645495787, "grad_norm": 0.8665586113929749, "learning_rate": 1.8959967765644182e-06, "loss": 0.1077, "step": 10931 }, { "epoch": 1.7712248865845757, "grad_norm": 0.6979767084121704, "learning_rate": 1.895572436576109e-06, "loss": 0.0904, "step": 10932 }, { "epoch": 1.7713869086195722, "grad_norm": 0.8404514193534851, "learning_rate": 1.8951481150818206e-06, "loss": 0.1054, "step": 10933 }, { "epoch": 1.771548930654569, "grad_norm": 0.7205252647399902, "learning_rate": 1.8947238120945372e-06, "loss": 0.0875, "step": 10934 }, { "epoch": 1.7717109526895658, "grad_norm": 0.8082873821258545, "learning_rate": 1.8942995276272396e-06, "loss": 0.0989, "step": 10935 }, { "epoch": 1.7718729747245625, "grad_norm": 0.9559950828552246, "learning_rate": 1.8938752616929112e-06, "loss": 0.1251, "step": 10936 }, { "epoch": 1.7720349967595594, "grad_norm": 0.7924472093582153, "learning_rate": 1.8934510143045316e-06, "loss": 0.097, "step": 10937 }, { "epoch": 1.772197018794556, "grad_norm": 0.8172940015792847, "learning_rate": 1.8930267854750845e-06, "loss": 0.1057, "step": 10938 }, { "epoch": 1.7723590408295529, "grad_norm": 0.8141445517539978, "learning_rate": 1.8926025752175486e-06, "loss": 0.1122, "step": 10939 }, { "epoch": 1.7725210628645496, "grad_norm": 0.8678747415542603, "learning_rate": 1.8921783835449042e-06, "loss": 0.1103, "step": 10940 }, { "epoch": 1.7726830848995463, "grad_norm": 0.6963508725166321, "learning_rate": 1.8917542104701297e-06, "loss": 0.0906, "step": 10941 }, { "epoch": 1.7728451069345432, "grad_norm": 0.7853334546089172, "learning_rate": 1.8913300560062047e-06, "loss": 0.0972, "step": 10942 }, { "epoch": 1.7730071289695397, "grad_norm": 0.8051917552947998, "learning_rate": 1.8909059201661079e-06, "loss": 0.0975, "step": 10943 }, { "epoch": 1.7731691510045366, "grad_norm": 0.7873483896255493, "learning_rate": 1.890481802962815e-06, "loss": 0.1039, "step": 10944 }, { "epoch": 1.7733311730395334, "grad_norm": 0.7235662937164307, "learning_rate": 1.8900577044093045e-06, "loss": 0.0817, "step": 10945 }, { "epoch": 1.77349319507453, "grad_norm": 0.793947696685791, "learning_rate": 1.889633624518551e-06, "loss": 0.0988, "step": 10946 }, { "epoch": 1.773655217109527, "grad_norm": 0.8600531816482544, "learning_rate": 1.889209563303533e-06, "loss": 0.1138, "step": 10947 }, { "epoch": 1.7738172391445235, "grad_norm": 0.7487325072288513, "learning_rate": 1.8887855207772235e-06, "loss": 0.0858, "step": 10948 }, { "epoch": 1.7739792611795204, "grad_norm": 0.8943690061569214, "learning_rate": 1.8883614969525987e-06, "loss": 0.1172, "step": 10949 }, { "epoch": 1.7741412832145171, "grad_norm": 0.7959859371185303, "learning_rate": 1.8879374918426312e-06, "loss": 0.1, "step": 10950 }, { "epoch": 1.7743033052495139, "grad_norm": 0.8319025635719299, "learning_rate": 1.887513505460295e-06, "loss": 0.1021, "step": 10951 }, { "epoch": 1.7744653272845108, "grad_norm": 0.7502698302268982, "learning_rate": 1.8870895378185643e-06, "loss": 0.092, "step": 10952 }, { "epoch": 1.7746273493195075, "grad_norm": 0.8591358661651611, "learning_rate": 1.88666558893041e-06, "loss": 0.1105, "step": 10953 }, { "epoch": 1.7747893713545042, "grad_norm": 0.7255021333694458, "learning_rate": 1.886241658808805e-06, "loss": 0.0905, "step": 10954 }, { "epoch": 1.774951393389501, "grad_norm": 0.7662566304206848, "learning_rate": 1.8858177474667195e-06, "loss": 0.0917, "step": 10955 }, { "epoch": 1.7751134154244976, "grad_norm": 0.7199138402938843, "learning_rate": 1.8853938549171242e-06, "loss": 0.0868, "step": 10956 }, { "epoch": 1.7752754374594946, "grad_norm": 0.8491278886795044, "learning_rate": 1.88496998117299e-06, "loss": 0.1025, "step": 10957 }, { "epoch": 1.7754374594944913, "grad_norm": 0.9104598164558411, "learning_rate": 1.8845461262472863e-06, "loss": 0.108, "step": 10958 }, { "epoch": 1.775599481529488, "grad_norm": 0.7580164670944214, "learning_rate": 1.8841222901529816e-06, "loss": 0.0952, "step": 10959 }, { "epoch": 1.775761503564485, "grad_norm": 0.8028944730758667, "learning_rate": 1.883698472903045e-06, "loss": 0.1113, "step": 10960 }, { "epoch": 1.7759235255994814, "grad_norm": 0.7645183801651001, "learning_rate": 1.8832746745104425e-06, "loss": 0.1035, "step": 10961 }, { "epoch": 1.7760855476344783, "grad_norm": 0.7198721170425415, "learning_rate": 1.882850894988143e-06, "loss": 0.0931, "step": 10962 }, { "epoch": 1.776247569669475, "grad_norm": 0.7722495794296265, "learning_rate": 1.882427134349113e-06, "loss": 0.0999, "step": 10963 }, { "epoch": 1.7764095917044718, "grad_norm": 0.8241525292396545, "learning_rate": 1.882003392606318e-06, "loss": 0.1011, "step": 10964 }, { "epoch": 1.7765716137394687, "grad_norm": 0.7520769834518433, "learning_rate": 1.881579669772723e-06, "loss": 0.0956, "step": 10965 }, { "epoch": 1.7767336357744652, "grad_norm": 0.7766212224960327, "learning_rate": 1.8811559658612941e-06, "loss": 0.1007, "step": 10966 }, { "epoch": 1.7768956578094621, "grad_norm": 0.7454885244369507, "learning_rate": 1.8807322808849953e-06, "loss": 0.0895, "step": 10967 }, { "epoch": 1.7770576798444588, "grad_norm": 0.7523648142814636, "learning_rate": 1.88030861485679e-06, "loss": 0.0972, "step": 10968 }, { "epoch": 1.7772197018794555, "grad_norm": 0.902167022228241, "learning_rate": 1.879884967789642e-06, "loss": 0.1076, "step": 10969 }, { "epoch": 1.7773817239144525, "grad_norm": 0.9376851320266724, "learning_rate": 1.879461339696512e-06, "loss": 0.1125, "step": 10970 }, { "epoch": 1.777543745949449, "grad_norm": 0.7125734090805054, "learning_rate": 1.879037730590364e-06, "loss": 0.0869, "step": 10971 }, { "epoch": 1.777705767984446, "grad_norm": 0.9217870831489563, "learning_rate": 1.8786141404841587e-06, "loss": 0.1053, "step": 10972 }, { "epoch": 1.7778677900194426, "grad_norm": 0.7570266723632812, "learning_rate": 1.8781905693908575e-06, "loss": 0.0966, "step": 10973 }, { "epoch": 1.7780298120544393, "grad_norm": 0.8740879893302917, "learning_rate": 1.8777670173234198e-06, "loss": 0.1015, "step": 10974 }, { "epoch": 1.7781918340894363, "grad_norm": 0.7980040907859802, "learning_rate": 1.877343484294805e-06, "loss": 0.0967, "step": 10975 }, { "epoch": 1.778353856124433, "grad_norm": 0.8354352116584778, "learning_rate": 1.8769199703179736e-06, "loss": 0.1052, "step": 10976 }, { "epoch": 1.7785158781594297, "grad_norm": 0.7894411683082581, "learning_rate": 1.876496475405883e-06, "loss": 0.0979, "step": 10977 }, { "epoch": 1.7786779001944264, "grad_norm": 0.7108971476554871, "learning_rate": 1.8760729995714916e-06, "loss": 0.0876, "step": 10978 }, { "epoch": 1.778839922229423, "grad_norm": 0.8221232891082764, "learning_rate": 1.8756495428277562e-06, "loss": 0.1034, "step": 10979 }, { "epoch": 1.77900194426442, "grad_norm": 0.7995948195457458, "learning_rate": 1.8752261051876337e-06, "loss": 0.0979, "step": 10980 }, { "epoch": 1.7791639662994168, "grad_norm": 0.8195783495903015, "learning_rate": 1.8748026866640806e-06, "loss": 0.104, "step": 10981 }, { "epoch": 1.7793259883344135, "grad_norm": 0.8994091153144836, "learning_rate": 1.8743792872700529e-06, "loss": 0.1015, "step": 10982 }, { "epoch": 1.7794880103694104, "grad_norm": 0.9404149055480957, "learning_rate": 1.8739559070185045e-06, "loss": 0.1078, "step": 10983 }, { "epoch": 1.779650032404407, "grad_norm": 0.7274839878082275, "learning_rate": 1.873532545922391e-06, "loss": 0.0896, "step": 10984 }, { "epoch": 1.7798120544394038, "grad_norm": 0.7796977758407593, "learning_rate": 1.8731092039946646e-06, "loss": 0.098, "step": 10985 }, { "epoch": 1.7799740764744005, "grad_norm": 0.7981799840927124, "learning_rate": 1.8726858812482798e-06, "loss": 0.0993, "step": 10986 }, { "epoch": 1.7801360985093972, "grad_norm": 0.8132720589637756, "learning_rate": 1.8722625776961894e-06, "loss": 0.1009, "step": 10987 }, { "epoch": 1.7802981205443942, "grad_norm": 0.769990086555481, "learning_rate": 1.871839293351345e-06, "loss": 0.0948, "step": 10988 }, { "epoch": 1.7804601425793907, "grad_norm": 0.7847530245780945, "learning_rate": 1.8714160282266973e-06, "loss": 0.0981, "step": 10989 }, { "epoch": 1.7806221646143876, "grad_norm": 0.8644593954086304, "learning_rate": 1.870992782335198e-06, "loss": 0.0972, "step": 10990 }, { "epoch": 1.7807841866493843, "grad_norm": 0.777860701084137, "learning_rate": 1.8705695556897986e-06, "loss": 0.0979, "step": 10991 }, { "epoch": 1.780946208684381, "grad_norm": 0.9635716676712036, "learning_rate": 1.8701463483034471e-06, "loss": 0.1061, "step": 10992 }, { "epoch": 1.781108230719378, "grad_norm": 0.7884843945503235, "learning_rate": 1.8697231601890933e-06, "loss": 0.0937, "step": 10993 }, { "epoch": 1.7812702527543745, "grad_norm": 0.8972017168998718, "learning_rate": 1.8692999913596846e-06, "loss": 0.1078, "step": 10994 }, { "epoch": 1.7814322747893714, "grad_norm": 0.7947750091552734, "learning_rate": 1.8688768418281705e-06, "loss": 0.0987, "step": 10995 }, { "epoch": 1.781594296824368, "grad_norm": 1.048323154449463, "learning_rate": 1.8684537116074983e-06, "loss": 0.126, "step": 10996 }, { "epoch": 1.7817563188593648, "grad_norm": 0.8322978615760803, "learning_rate": 1.8680306007106136e-06, "loss": 0.1015, "step": 10997 }, { "epoch": 1.7819183408943617, "grad_norm": 0.8976532816886902, "learning_rate": 1.8676075091504637e-06, "loss": 0.1016, "step": 10998 }, { "epoch": 1.7820803629293582, "grad_norm": 0.6236394643783569, "learning_rate": 1.8671844369399922e-06, "loss": 0.0794, "step": 10999 }, { "epoch": 1.7822423849643552, "grad_norm": 0.8208524584770203, "learning_rate": 1.866761384092147e-06, "loss": 0.1006, "step": 11000 }, { "epoch": 1.7824044069993519, "grad_norm": 0.8767183423042297, "learning_rate": 1.8663383506198706e-06, "loss": 0.0964, "step": 11001 }, { "epoch": 1.7825664290343486, "grad_norm": 0.7991448044776917, "learning_rate": 1.8659153365361076e-06, "loss": 0.1013, "step": 11002 }, { "epoch": 1.7827284510693455, "grad_norm": 0.8050661683082581, "learning_rate": 1.8654923418538003e-06, "loss": 0.0933, "step": 11003 }, { "epoch": 1.7828904731043422, "grad_norm": 0.8594214916229248, "learning_rate": 1.8650693665858916e-06, "loss": 0.1113, "step": 11004 }, { "epoch": 1.783052495139339, "grad_norm": 0.8001613616943359, "learning_rate": 1.8646464107453247e-06, "loss": 0.1044, "step": 11005 }, { "epoch": 1.7832145171743357, "grad_norm": 0.865673840045929, "learning_rate": 1.8642234743450394e-06, "loss": 0.1033, "step": 11006 }, { "epoch": 1.7833765392093324, "grad_norm": 0.720534086227417, "learning_rate": 1.8638005573979776e-06, "loss": 0.0874, "step": 11007 }, { "epoch": 1.7835385612443293, "grad_norm": 0.7906041145324707, "learning_rate": 1.8633776599170783e-06, "loss": 0.1006, "step": 11008 }, { "epoch": 1.783700583279326, "grad_norm": 0.8074170351028442, "learning_rate": 1.8629547819152832e-06, "loss": 0.1024, "step": 11009 }, { "epoch": 1.7838626053143227, "grad_norm": 0.7554319500923157, "learning_rate": 1.86253192340553e-06, "loss": 0.0986, "step": 11010 }, { "epoch": 1.7840246273493197, "grad_norm": 0.7377730011940002, "learning_rate": 1.8621090844007572e-06, "loss": 0.0984, "step": 11011 }, { "epoch": 1.7841866493843161, "grad_norm": 0.9073734283447266, "learning_rate": 1.8616862649139024e-06, "loss": 0.1075, "step": 11012 }, { "epoch": 1.784348671419313, "grad_norm": 0.8492515683174133, "learning_rate": 1.861263464957903e-06, "loss": 0.1033, "step": 11013 }, { "epoch": 1.7845106934543098, "grad_norm": 0.7091047763824463, "learning_rate": 1.8608406845456968e-06, "loss": 0.0868, "step": 11014 }, { "epoch": 1.7846727154893065, "grad_norm": 0.7672532796859741, "learning_rate": 1.860417923690218e-06, "loss": 0.0974, "step": 11015 }, { "epoch": 1.7848347375243034, "grad_norm": 0.7299427390098572, "learning_rate": 1.8599951824044033e-06, "loss": 0.0926, "step": 11016 }, { "epoch": 1.7849967595593, "grad_norm": 0.8017887473106384, "learning_rate": 1.8595724607011878e-06, "loss": 0.0925, "step": 11017 }, { "epoch": 1.7851587815942969, "grad_norm": 0.7088046669960022, "learning_rate": 1.8591497585935041e-06, "loss": 0.0911, "step": 11018 }, { "epoch": 1.7853208036292936, "grad_norm": 0.9126763939857483, "learning_rate": 1.8587270760942875e-06, "loss": 0.1143, "step": 11019 }, { "epoch": 1.7854828256642903, "grad_norm": 0.9007970094680786, "learning_rate": 1.858304413216471e-06, "loss": 0.1132, "step": 11020 }, { "epoch": 1.7856448476992872, "grad_norm": 0.7455145120620728, "learning_rate": 1.8578817699729862e-06, "loss": 0.0945, "step": 11021 }, { "epoch": 1.7858068697342837, "grad_norm": 0.8875641226768494, "learning_rate": 1.8574591463767656e-06, "loss": 0.1057, "step": 11022 }, { "epoch": 1.7859688917692806, "grad_norm": 0.8011730909347534, "learning_rate": 1.8570365424407394e-06, "loss": 0.1005, "step": 11023 }, { "epoch": 1.7861309138042774, "grad_norm": 0.843669593334198, "learning_rate": 1.8566139581778392e-06, "loss": 0.0926, "step": 11024 }, { "epoch": 1.786292935839274, "grad_norm": 0.7854774594306946, "learning_rate": 1.856191393600995e-06, "loss": 0.0967, "step": 11025 }, { "epoch": 1.786454957874271, "grad_norm": 0.9077264666557312, "learning_rate": 1.855768848723137e-06, "loss": 0.1092, "step": 11026 }, { "epoch": 1.7866169799092677, "grad_norm": 0.8441132307052612, "learning_rate": 1.8553463235571927e-06, "loss": 0.1036, "step": 11027 }, { "epoch": 1.7867790019442644, "grad_norm": 1.07819664478302, "learning_rate": 1.85492381811609e-06, "loss": 0.0925, "step": 11028 }, { "epoch": 1.7869410239792611, "grad_norm": 0.8448095917701721, "learning_rate": 1.8545013324127587e-06, "loss": 0.1002, "step": 11029 }, { "epoch": 1.7871030460142578, "grad_norm": 0.8025045394897461, "learning_rate": 1.854078866460124e-06, "loss": 0.1058, "step": 11030 }, { "epoch": 1.7872650680492548, "grad_norm": 0.7943110466003418, "learning_rate": 1.8536564202711135e-06, "loss": 0.1004, "step": 11031 }, { "epoch": 1.7874270900842515, "grad_norm": 0.8177294731140137, "learning_rate": 1.8532339938586513e-06, "loss": 0.0892, "step": 11032 }, { "epoch": 1.7875891121192482, "grad_norm": 0.9388481378555298, "learning_rate": 1.8528115872356641e-06, "loss": 0.111, "step": 11033 }, { "epoch": 1.7877511341542451, "grad_norm": 0.8275322914123535, "learning_rate": 1.8523892004150765e-06, "loss": 0.1, "step": 11034 }, { "epoch": 1.7879131561892416, "grad_norm": 0.777814507484436, "learning_rate": 1.8519668334098124e-06, "loss": 0.1026, "step": 11035 }, { "epoch": 1.7880751782242386, "grad_norm": 1.0052603483200073, "learning_rate": 1.8515444862327947e-06, "loss": 0.1034, "step": 11036 }, { "epoch": 1.7882372002592353, "grad_norm": 0.8155965805053711, "learning_rate": 1.8511221588969457e-06, "loss": 0.1, "step": 11037 }, { "epoch": 1.788399222294232, "grad_norm": 0.7355263829231262, "learning_rate": 1.8506998514151896e-06, "loss": 0.091, "step": 11038 }, { "epoch": 1.788561244329229, "grad_norm": 0.7432408332824707, "learning_rate": 1.850277563800446e-06, "loss": 0.092, "step": 11039 }, { "epoch": 1.7887232663642254, "grad_norm": 0.7793557643890381, "learning_rate": 1.8498552960656378e-06, "loss": 0.1009, "step": 11040 }, { "epoch": 1.7888852883992223, "grad_norm": 0.8657031655311584, "learning_rate": 1.8494330482236832e-06, "loss": 0.0957, "step": 11041 }, { "epoch": 1.789047310434219, "grad_norm": 0.752953052520752, "learning_rate": 1.8490108202875023e-06, "loss": 0.0939, "step": 11042 }, { "epoch": 1.7892093324692158, "grad_norm": 0.9895876049995422, "learning_rate": 1.8485886122700158e-06, "loss": 0.1191, "step": 11043 }, { "epoch": 1.7893713545042127, "grad_norm": 0.7776055335998535, "learning_rate": 1.848166424184142e-06, "loss": 0.0981, "step": 11044 }, { "epoch": 1.7895333765392092, "grad_norm": 0.7290987968444824, "learning_rate": 1.8477442560427975e-06, "loss": 0.0947, "step": 11045 }, { "epoch": 1.7896953985742061, "grad_norm": 0.7357193231582642, "learning_rate": 1.8473221078589006e-06, "loss": 0.0906, "step": 11046 }, { "epoch": 1.7898574206092028, "grad_norm": 0.8186837434768677, "learning_rate": 1.8468999796453672e-06, "loss": 0.1071, "step": 11047 }, { "epoch": 1.7900194426441995, "grad_norm": 0.8652331233024597, "learning_rate": 1.846477871415114e-06, "loss": 0.1072, "step": 11048 }, { "epoch": 1.7901814646791965, "grad_norm": 0.8034560084342957, "learning_rate": 1.8460557831810571e-06, "loss": 0.0951, "step": 11049 }, { "epoch": 1.790343486714193, "grad_norm": 0.7994994521141052, "learning_rate": 1.8456337149561105e-06, "loss": 0.0932, "step": 11050 }, { "epoch": 1.79050550874919, "grad_norm": 0.8264526724815369, "learning_rate": 1.8452116667531886e-06, "loss": 0.0989, "step": 11051 }, { "epoch": 1.7906675307841866, "grad_norm": 0.8260409235954285, "learning_rate": 1.8447896385852043e-06, "loss": 0.104, "step": 11052 }, { "epoch": 1.7908295528191833, "grad_norm": 0.8477978706359863, "learning_rate": 1.844367630465073e-06, "loss": 0.1045, "step": 11053 }, { "epoch": 1.7909915748541803, "grad_norm": 0.790149986743927, "learning_rate": 1.8439456424057044e-06, "loss": 0.0985, "step": 11054 }, { "epoch": 1.791153596889177, "grad_norm": 0.7050209045410156, "learning_rate": 1.8435236744200126e-06, "loss": 0.0867, "step": 11055 }, { "epoch": 1.7913156189241737, "grad_norm": 0.9373990297317505, "learning_rate": 1.8431017265209067e-06, "loss": 0.1088, "step": 11056 }, { "epoch": 1.7914776409591704, "grad_norm": 0.8682579398155212, "learning_rate": 1.8426797987212985e-06, "loss": 0.1047, "step": 11057 }, { "epoch": 1.791639662994167, "grad_norm": 0.7753797769546509, "learning_rate": 1.8422578910340985e-06, "loss": 0.0957, "step": 11058 }, { "epoch": 1.791801685029164, "grad_norm": 0.9039705395698547, "learning_rate": 1.8418360034722149e-06, "loss": 0.1037, "step": 11059 }, { "epoch": 1.7919637070641607, "grad_norm": 0.7357102632522583, "learning_rate": 1.8414141360485565e-06, "loss": 0.0893, "step": 11060 }, { "epoch": 1.7921257290991575, "grad_norm": 0.7715552449226379, "learning_rate": 1.8409922887760317e-06, "loss": 0.0987, "step": 11061 }, { "epoch": 1.7922877511341544, "grad_norm": 0.8303348422050476, "learning_rate": 1.840570461667549e-06, "loss": 0.1043, "step": 11062 }, { "epoch": 1.7924497731691509, "grad_norm": 0.7379674315452576, "learning_rate": 1.8401486547360137e-06, "loss": 0.0908, "step": 11063 }, { "epoch": 1.7926117952041478, "grad_norm": 0.8676923513412476, "learning_rate": 1.8397268679943333e-06, "loss": 0.0983, "step": 11064 }, { "epoch": 1.7927738172391445, "grad_norm": 0.8327484130859375, "learning_rate": 1.8393051014554124e-06, "loss": 0.1041, "step": 11065 }, { "epoch": 1.7929358392741412, "grad_norm": 0.8785510659217834, "learning_rate": 1.8388833551321562e-06, "loss": 0.0972, "step": 11066 }, { "epoch": 1.7930978613091382, "grad_norm": 0.8931607007980347, "learning_rate": 1.8384616290374705e-06, "loss": 0.1099, "step": 11067 }, { "epoch": 1.7932598833441347, "grad_norm": 0.7808845639228821, "learning_rate": 1.838039923184257e-06, "loss": 0.0937, "step": 11068 }, { "epoch": 1.7934219053791316, "grad_norm": 0.8837364912033081, "learning_rate": 1.8376182375854207e-06, "loss": 0.1046, "step": 11069 }, { "epoch": 1.7935839274141283, "grad_norm": 0.7206534743309021, "learning_rate": 1.8371965722538636e-06, "loss": 0.0915, "step": 11070 }, { "epoch": 1.793745949449125, "grad_norm": 0.7323435544967651, "learning_rate": 1.8367749272024865e-06, "loss": 0.0969, "step": 11071 }, { "epoch": 1.793907971484122, "grad_norm": 0.7837962508201599, "learning_rate": 1.836353302444192e-06, "loss": 0.1013, "step": 11072 }, { "epoch": 1.7940699935191184, "grad_norm": 0.7067288160324097, "learning_rate": 1.8359316979918808e-06, "loss": 0.0903, "step": 11073 }, { "epoch": 1.7942320155541154, "grad_norm": 0.7567185163497925, "learning_rate": 1.8355101138584524e-06, "loss": 0.0876, "step": 11074 }, { "epoch": 1.794394037589112, "grad_norm": 0.9895825982093811, "learning_rate": 1.835088550056806e-06, "loss": 0.1207, "step": 11075 }, { "epoch": 1.7945560596241088, "grad_norm": 0.8607103824615479, "learning_rate": 1.8346670065998411e-06, "loss": 0.0999, "step": 11076 }, { "epoch": 1.7947180816591057, "grad_norm": 0.7561851143836975, "learning_rate": 1.8342454835004566e-06, "loss": 0.0952, "step": 11077 }, { "epoch": 1.7948801036941024, "grad_norm": 0.756078839302063, "learning_rate": 1.8338239807715486e-06, "loss": 0.0955, "step": 11078 }, { "epoch": 1.7950421257290992, "grad_norm": 0.948203444480896, "learning_rate": 1.833402498426015e-06, "loss": 0.1169, "step": 11079 }, { "epoch": 1.7952041477640959, "grad_norm": 0.809870183467865, "learning_rate": 1.8329810364767511e-06, "loss": 0.0992, "step": 11080 }, { "epoch": 1.7953661697990926, "grad_norm": 0.7944110631942749, "learning_rate": 1.8325595949366537e-06, "loss": 0.0974, "step": 11081 }, { "epoch": 1.7955281918340895, "grad_norm": 0.7319938540458679, "learning_rate": 1.8321381738186178e-06, "loss": 0.0953, "step": 11082 }, { "epoch": 1.7956902138690862, "grad_norm": 0.7145232558250427, "learning_rate": 1.8317167731355373e-06, "loss": 0.0909, "step": 11083 }, { "epoch": 1.795852235904083, "grad_norm": 0.7656394243240356, "learning_rate": 1.8312953929003068e-06, "loss": 0.0937, "step": 11084 }, { "epoch": 1.7960142579390799, "grad_norm": 0.8850581645965576, "learning_rate": 1.8308740331258177e-06, "loss": 0.1003, "step": 11085 }, { "epoch": 1.7961762799740764, "grad_norm": 0.7528477311134338, "learning_rate": 1.8304526938249653e-06, "loss": 0.0894, "step": 11086 }, { "epoch": 1.7963383020090733, "grad_norm": 0.7964629530906677, "learning_rate": 1.8300313750106396e-06, "loss": 0.0934, "step": 11087 }, { "epoch": 1.79650032404407, "grad_norm": 0.6928067207336426, "learning_rate": 1.8296100766957331e-06, "loss": 0.0834, "step": 11088 }, { "epoch": 1.7966623460790667, "grad_norm": 0.9328933954238892, "learning_rate": 1.8291887988931357e-06, "loss": 0.1128, "step": 11089 }, { "epoch": 1.7968243681140637, "grad_norm": 0.7624589800834656, "learning_rate": 1.828767541615737e-06, "loss": 0.0925, "step": 11090 }, { "epoch": 1.7969863901490601, "grad_norm": 0.7582963109016418, "learning_rate": 1.828346304876428e-06, "loss": 0.0924, "step": 11091 }, { "epoch": 1.797148412184057, "grad_norm": 0.8816431760787964, "learning_rate": 1.8279250886880962e-06, "loss": 0.0985, "step": 11092 }, { "epoch": 1.7973104342190538, "grad_norm": 0.9561519026756287, "learning_rate": 1.8275038930636314e-06, "loss": 0.1172, "step": 11093 }, { "epoch": 1.7974724562540505, "grad_norm": 0.845643162727356, "learning_rate": 1.827082718015919e-06, "loss": 0.1133, "step": 11094 }, { "epoch": 1.7976344782890474, "grad_norm": 0.8886216282844543, "learning_rate": 1.8266615635578464e-06, "loss": 0.1089, "step": 11095 }, { "epoch": 1.797796500324044, "grad_norm": 0.8109029531478882, "learning_rate": 1.8262404297023013e-06, "loss": 0.0978, "step": 11096 }, { "epoch": 1.7979585223590409, "grad_norm": 0.7375802993774414, "learning_rate": 1.825819316462169e-06, "loss": 0.0906, "step": 11097 }, { "epoch": 1.7981205443940376, "grad_norm": 0.8273477554321289, "learning_rate": 1.8253982238503338e-06, "loss": 0.1088, "step": 11098 }, { "epoch": 1.7982825664290343, "grad_norm": 0.6932176947593689, "learning_rate": 1.8249771518796794e-06, "loss": 0.0845, "step": 11099 }, { "epoch": 1.7984445884640312, "grad_norm": 0.7964252233505249, "learning_rate": 1.8245561005630921e-06, "loss": 0.0918, "step": 11100 }, { "epoch": 1.7986066104990277, "grad_norm": 0.708993136882782, "learning_rate": 1.824135069913453e-06, "loss": 0.0911, "step": 11101 }, { "epoch": 1.7987686325340246, "grad_norm": 0.8443676233291626, "learning_rate": 1.823714059943646e-06, "loss": 0.1014, "step": 11102 }, { "epoch": 1.7989306545690213, "grad_norm": 0.8077982068061829, "learning_rate": 1.823293070666551e-06, "loss": 0.0996, "step": 11103 }, { "epoch": 1.799092676604018, "grad_norm": 0.8241330981254578, "learning_rate": 1.8228721020950504e-06, "loss": 0.0983, "step": 11104 }, { "epoch": 1.799254698639015, "grad_norm": 0.8449083566665649, "learning_rate": 1.8224511542420254e-06, "loss": 0.1019, "step": 11105 }, { "epoch": 1.7994167206740117, "grad_norm": 0.6822419166564941, "learning_rate": 1.8220302271203557e-06, "loss": 0.0793, "step": 11106 }, { "epoch": 1.7995787427090084, "grad_norm": 0.8199465870857239, "learning_rate": 1.82160932074292e-06, "loss": 0.0983, "step": 11107 }, { "epoch": 1.7997407647440054, "grad_norm": 0.9466975927352905, "learning_rate": 1.8211884351225978e-06, "loss": 0.1151, "step": 11108 }, { "epoch": 1.7999027867790018, "grad_norm": 0.7171126008033752, "learning_rate": 1.8207675702722661e-06, "loss": 0.0904, "step": 11109 }, { "epoch": 1.8000648088139988, "grad_norm": 0.8177133202552795, "learning_rate": 1.8203467262048033e-06, "loss": 0.096, "step": 11110 }, { "epoch": 1.8002268308489955, "grad_norm": 0.9946437478065491, "learning_rate": 1.8199259029330865e-06, "loss": 0.1111, "step": 11111 }, { "epoch": 1.8003888528839922, "grad_norm": 0.8421857357025146, "learning_rate": 1.819505100469991e-06, "loss": 0.1072, "step": 11112 }, { "epoch": 1.8005508749189891, "grad_norm": 0.7888447642326355, "learning_rate": 1.8190843188283925e-06, "loss": 0.0884, "step": 11113 }, { "epoch": 1.8007128969539856, "grad_norm": 0.7778288722038269, "learning_rate": 1.8186635580211654e-06, "loss": 0.0878, "step": 11114 }, { "epoch": 1.8008749189889826, "grad_norm": 0.7060684561729431, "learning_rate": 1.8182428180611855e-06, "loss": 0.0799, "step": 11115 }, { "epoch": 1.8010369410239793, "grad_norm": 0.8897756934165955, "learning_rate": 1.8178220989613255e-06, "loss": 0.1116, "step": 11116 }, { "epoch": 1.801198963058976, "grad_norm": 0.7480401992797852, "learning_rate": 1.8174014007344586e-06, "loss": 0.0954, "step": 11117 }, { "epoch": 1.801360985093973, "grad_norm": 0.7541413307189941, "learning_rate": 1.8169807233934567e-06, "loss": 0.0945, "step": 11118 }, { "epoch": 1.8015230071289694, "grad_norm": 0.7814289927482605, "learning_rate": 1.8165600669511912e-06, "loss": 0.0951, "step": 11119 }, { "epoch": 1.8016850291639663, "grad_norm": 0.8429241180419922, "learning_rate": 1.8161394314205343e-06, "loss": 0.1045, "step": 11120 }, { "epoch": 1.801847051198963, "grad_norm": 0.8381679654121399, "learning_rate": 1.8157188168143564e-06, "loss": 0.1059, "step": 11121 }, { "epoch": 1.8020090732339598, "grad_norm": 0.8170045018196106, "learning_rate": 1.8152982231455262e-06, "loss": 0.0991, "step": 11122 }, { "epoch": 1.8021710952689567, "grad_norm": 0.8006271719932556, "learning_rate": 1.8148776504269129e-06, "loss": 0.0989, "step": 11123 }, { "epoch": 1.8023331173039532, "grad_norm": 0.7720314264297485, "learning_rate": 1.8144570986713867e-06, "loss": 0.0928, "step": 11124 }, { "epoch": 1.8024951393389501, "grad_norm": 0.7991904020309448, "learning_rate": 1.8140365678918138e-06, "loss": 0.0986, "step": 11125 }, { "epoch": 1.8026571613739468, "grad_norm": 0.9030035138130188, "learning_rate": 1.8136160581010624e-06, "loss": 0.1187, "step": 11126 }, { "epoch": 1.8028191834089435, "grad_norm": 0.8388223648071289, "learning_rate": 1.813195569311998e-06, "loss": 0.1017, "step": 11127 }, { "epoch": 1.8029812054439405, "grad_norm": 0.9252680540084839, "learning_rate": 1.8127751015374865e-06, "loss": 0.1104, "step": 11128 }, { "epoch": 1.8031432274789372, "grad_norm": 0.8280481696128845, "learning_rate": 1.8123546547903944e-06, "loss": 0.1046, "step": 11129 }, { "epoch": 1.803305249513934, "grad_norm": 0.8752947449684143, "learning_rate": 1.8119342290835864e-06, "loss": 0.1067, "step": 11130 }, { "epoch": 1.8034672715489306, "grad_norm": 0.753736138343811, "learning_rate": 1.8115138244299254e-06, "loss": 0.0953, "step": 11131 }, { "epoch": 1.8036292935839273, "grad_norm": 0.8405237197875977, "learning_rate": 1.8110934408422758e-06, "loss": 0.0967, "step": 11132 }, { "epoch": 1.8037913156189243, "grad_norm": 0.9108558893203735, "learning_rate": 1.8106730783334985e-06, "loss": 0.1006, "step": 11133 }, { "epoch": 1.803953337653921, "grad_norm": 0.7528913021087646, "learning_rate": 1.810252736916458e-06, "loss": 0.0983, "step": 11134 }, { "epoch": 1.8041153596889177, "grad_norm": 0.8099750280380249, "learning_rate": 1.8098324166040146e-06, "loss": 0.1013, "step": 11135 }, { "epoch": 1.8042773817239146, "grad_norm": 0.9023746848106384, "learning_rate": 1.8094121174090288e-06, "loss": 0.1148, "step": 11136 }, { "epoch": 1.804439403758911, "grad_norm": 0.8526237607002258, "learning_rate": 1.8089918393443611e-06, "loss": 0.1046, "step": 11137 }, { "epoch": 1.804601425793908, "grad_norm": 0.7785555124282837, "learning_rate": 1.80857158242287e-06, "loss": 0.0932, "step": 11138 }, { "epoch": 1.8047634478289047, "grad_norm": 0.7802260518074036, "learning_rate": 1.8081513466574164e-06, "loss": 0.1013, "step": 11139 }, { "epoch": 1.8049254698639015, "grad_norm": 0.8971158862113953, "learning_rate": 1.8077311320608571e-06, "loss": 0.1221, "step": 11140 }, { "epoch": 1.8050874918988984, "grad_norm": 0.833736002445221, "learning_rate": 1.8073109386460502e-06, "loss": 0.1064, "step": 11141 }, { "epoch": 1.8052495139338949, "grad_norm": 0.8659934401512146, "learning_rate": 1.806890766425851e-06, "loss": 0.1017, "step": 11142 }, { "epoch": 1.8054115359688918, "grad_norm": 0.7669563889503479, "learning_rate": 1.8064706154131179e-06, "loss": 0.0973, "step": 11143 }, { "epoch": 1.8055735580038885, "grad_norm": 0.9565890431404114, "learning_rate": 1.8060504856207062e-06, "loss": 0.1079, "step": 11144 }, { "epoch": 1.8057355800388852, "grad_norm": 0.8553591370582581, "learning_rate": 1.8056303770614697e-06, "loss": 0.1065, "step": 11145 }, { "epoch": 1.8058976020738822, "grad_norm": 0.9055613279342651, "learning_rate": 1.8052102897482643e-06, "loss": 0.1114, "step": 11146 }, { "epoch": 1.8060596241088787, "grad_norm": 0.8043663501739502, "learning_rate": 1.8047902236939405e-06, "loss": 0.1029, "step": 11147 }, { "epoch": 1.8062216461438756, "grad_norm": 0.8190831542015076, "learning_rate": 1.8043701789113552e-06, "loss": 0.098, "step": 11148 }, { "epoch": 1.8063836681788723, "grad_norm": 0.8615659475326538, "learning_rate": 1.8039501554133588e-06, "loss": 0.0942, "step": 11149 }, { "epoch": 1.806545690213869, "grad_norm": 0.9080948829650879, "learning_rate": 1.8035301532128032e-06, "loss": 0.1049, "step": 11150 }, { "epoch": 1.806707712248866, "grad_norm": 0.7901988625526428, "learning_rate": 1.8031101723225393e-06, "loss": 0.0924, "step": 11151 }, { "epoch": 1.8068697342838627, "grad_norm": 0.8067225813865662, "learning_rate": 1.8026902127554172e-06, "loss": 0.1016, "step": 11152 }, { "epoch": 1.8070317563188594, "grad_norm": 0.7822276949882507, "learning_rate": 1.8022702745242882e-06, "loss": 0.0953, "step": 11153 }, { "epoch": 1.807193778353856, "grad_norm": 0.8535957336425781, "learning_rate": 1.8018503576419996e-06, "loss": 0.1074, "step": 11154 }, { "epoch": 1.8073558003888528, "grad_norm": 0.7549822926521301, "learning_rate": 1.8014304621214008e-06, "loss": 0.0964, "step": 11155 }, { "epoch": 1.8075178224238497, "grad_norm": 0.816744863986969, "learning_rate": 1.8010105879753398e-06, "loss": 0.105, "step": 11156 }, { "epoch": 1.8076798444588464, "grad_norm": 0.7305224537849426, "learning_rate": 1.800590735216662e-06, "loss": 0.0952, "step": 11157 }, { "epoch": 1.8078418664938432, "grad_norm": 0.8567788600921631, "learning_rate": 1.800170903858216e-06, "loss": 0.1097, "step": 11158 }, { "epoch": 1.80800388852884, "grad_norm": 0.8007251024246216, "learning_rate": 1.799751093912847e-06, "loss": 0.0898, "step": 11159 }, { "epoch": 1.8081659105638366, "grad_norm": 0.7454932928085327, "learning_rate": 1.7993313053933998e-06, "loss": 0.0942, "step": 11160 }, { "epoch": 1.8083279325988335, "grad_norm": 0.8023138046264648, "learning_rate": 1.7989115383127195e-06, "loss": 0.102, "step": 11161 }, { "epoch": 1.8084899546338302, "grad_norm": 0.7055501937866211, "learning_rate": 1.7984917926836484e-06, "loss": 0.0865, "step": 11162 }, { "epoch": 1.808651976668827, "grad_norm": 0.7876911759376526, "learning_rate": 1.7980720685190314e-06, "loss": 0.0841, "step": 11163 }, { "epoch": 1.8088139987038239, "grad_norm": 0.8725854158401489, "learning_rate": 1.7976523658317104e-06, "loss": 0.1082, "step": 11164 }, { "epoch": 1.8089760207388204, "grad_norm": 0.8327855467796326, "learning_rate": 1.7972326846345277e-06, "loss": 0.1103, "step": 11165 }, { "epoch": 1.8091380427738173, "grad_norm": 0.9465359449386597, "learning_rate": 1.7968130249403238e-06, "loss": 0.114, "step": 11166 }, { "epoch": 1.809300064808814, "grad_norm": 0.8367246389389038, "learning_rate": 1.7963933867619396e-06, "loss": 0.1114, "step": 11167 }, { "epoch": 1.8094620868438107, "grad_norm": 0.821707546710968, "learning_rate": 1.7959737701122157e-06, "loss": 0.0933, "step": 11168 }, { "epoch": 1.8096241088788076, "grad_norm": 0.951481282711029, "learning_rate": 1.79555417500399e-06, "loss": 0.1188, "step": 11169 }, { "epoch": 1.8097861309138041, "grad_norm": 0.7933133244514465, "learning_rate": 1.7951346014501027e-06, "loss": 0.1043, "step": 11170 }, { "epoch": 1.809948152948801, "grad_norm": 0.8114445209503174, "learning_rate": 1.7947150494633897e-06, "loss": 0.0908, "step": 11171 }, { "epoch": 1.8101101749837978, "grad_norm": 0.7849157452583313, "learning_rate": 1.7942955190566899e-06, "loss": 0.1009, "step": 11172 }, { "epoch": 1.8102721970187945, "grad_norm": 0.7490556836128235, "learning_rate": 1.7938760102428396e-06, "loss": 0.0816, "step": 11173 }, { "epoch": 1.8104342190537914, "grad_norm": 0.8201428651809692, "learning_rate": 1.7934565230346752e-06, "loss": 0.1019, "step": 11174 }, { "epoch": 1.810596241088788, "grad_norm": 0.7032244801521301, "learning_rate": 1.7930370574450304e-06, "loss": 0.0911, "step": 11175 }, { "epoch": 1.8107582631237849, "grad_norm": 0.8158414959907532, "learning_rate": 1.7926176134867408e-06, "loss": 0.1025, "step": 11176 }, { "epoch": 1.8109202851587816, "grad_norm": 0.8338707685470581, "learning_rate": 1.792198191172641e-06, "loss": 0.1059, "step": 11177 }, { "epoch": 1.8110823071937783, "grad_norm": 0.7679630517959595, "learning_rate": 1.7917787905155634e-06, "loss": 0.0831, "step": 11178 }, { "epoch": 1.8112443292287752, "grad_norm": 0.8508248329162598, "learning_rate": 1.7913594115283414e-06, "loss": 0.1109, "step": 11179 }, { "epoch": 1.811406351263772, "grad_norm": 0.8345566391944885, "learning_rate": 1.790940054223806e-06, "loss": 0.1056, "step": 11180 }, { "epoch": 1.8115683732987686, "grad_norm": 0.8556733131408691, "learning_rate": 1.7905207186147888e-06, "loss": 0.1059, "step": 11181 }, { "epoch": 1.8117303953337653, "grad_norm": 0.7630212903022766, "learning_rate": 1.7901014047141208e-06, "loss": 0.0991, "step": 11182 }, { "epoch": 1.811892417368762, "grad_norm": 0.9459897875785828, "learning_rate": 1.7896821125346325e-06, "loss": 0.11, "step": 11183 }, { "epoch": 1.812054439403759, "grad_norm": 0.7780237793922424, "learning_rate": 1.7892628420891526e-06, "loss": 0.1011, "step": 11184 }, { "epoch": 1.8122164614387557, "grad_norm": 0.7486929297447205, "learning_rate": 1.7888435933905097e-06, "loss": 0.0906, "step": 11185 }, { "epoch": 1.8123784834737524, "grad_norm": 0.8000259399414062, "learning_rate": 1.788424366451531e-06, "loss": 0.0972, "step": 11186 }, { "epoch": 1.8125405055087493, "grad_norm": 0.837604284286499, "learning_rate": 1.7880051612850455e-06, "loss": 0.1022, "step": 11187 }, { "epoch": 1.8127025275437458, "grad_norm": 0.7862725853919983, "learning_rate": 1.7875859779038796e-06, "loss": 0.0986, "step": 11188 }, { "epoch": 1.8128645495787428, "grad_norm": 0.7725867629051208, "learning_rate": 1.7871668163208577e-06, "loss": 0.0968, "step": 11189 }, { "epoch": 1.8130265716137395, "grad_norm": 0.7349421977996826, "learning_rate": 1.7867476765488061e-06, "loss": 0.0899, "step": 11190 }, { "epoch": 1.8131885936487362, "grad_norm": 0.7476187944412231, "learning_rate": 1.78632855860055e-06, "loss": 0.0975, "step": 11191 }, { "epoch": 1.8133506156837331, "grad_norm": 0.8673322200775146, "learning_rate": 1.7859094624889135e-06, "loss": 0.0985, "step": 11192 }, { "epoch": 1.8135126377187296, "grad_norm": 0.77607262134552, "learning_rate": 1.785490388226719e-06, "loss": 0.0923, "step": 11193 }, { "epoch": 1.8136746597537265, "grad_norm": 0.8594310283660889, "learning_rate": 1.7850713358267897e-06, "loss": 0.1137, "step": 11194 }, { "epoch": 1.8138366817887233, "grad_norm": 0.7893880009651184, "learning_rate": 1.7846523053019466e-06, "loss": 0.096, "step": 11195 }, { "epoch": 1.81399870382372, "grad_norm": 0.8915963768959045, "learning_rate": 1.7842332966650122e-06, "loss": 0.1095, "step": 11196 }, { "epoch": 1.814160725858717, "grad_norm": 0.7770095467567444, "learning_rate": 1.7838143099288075e-06, "loss": 0.0921, "step": 11197 }, { "epoch": 1.8143227478937134, "grad_norm": 0.8442981243133545, "learning_rate": 1.7833953451061513e-06, "loss": 0.1042, "step": 11198 }, { "epoch": 1.8144847699287103, "grad_norm": 0.8559162020683289, "learning_rate": 1.7829764022098633e-06, "loss": 0.1041, "step": 11199 }, { "epoch": 1.814646791963707, "grad_norm": 0.7358631491661072, "learning_rate": 1.7825574812527617e-06, "loss": 0.0866, "step": 11200 }, { "epoch": 1.8148088139987038, "grad_norm": 0.8252244591712952, "learning_rate": 1.7821385822476661e-06, "loss": 0.1032, "step": 11201 }, { "epoch": 1.8149708360337007, "grad_norm": 0.725712239742279, "learning_rate": 1.781719705207392e-06, "loss": 0.0829, "step": 11202 }, { "epoch": 1.8151328580686974, "grad_norm": 0.7640675902366638, "learning_rate": 1.7813008501447576e-06, "loss": 0.0867, "step": 11203 }, { "epoch": 1.815294880103694, "grad_norm": 0.8494378924369812, "learning_rate": 1.7808820170725772e-06, "loss": 0.1012, "step": 11204 }, { "epoch": 1.8154569021386908, "grad_norm": 0.7609313130378723, "learning_rate": 1.7804632060036665e-06, "loss": 0.096, "step": 11205 }, { "epoch": 1.8156189241736875, "grad_norm": 0.7626781463623047, "learning_rate": 1.7800444169508414e-06, "loss": 0.0921, "step": 11206 }, { "epoch": 1.8157809462086845, "grad_norm": 0.9517927169799805, "learning_rate": 1.7796256499269141e-06, "loss": 0.1178, "step": 11207 }, { "epoch": 1.8159429682436812, "grad_norm": 0.8355745673179626, "learning_rate": 1.7792069049446987e-06, "loss": 0.1029, "step": 11208 }, { "epoch": 1.8161049902786779, "grad_norm": 0.8401842713356018, "learning_rate": 1.7787881820170073e-06, "loss": 0.1033, "step": 11209 }, { "epoch": 1.8162670123136748, "grad_norm": 0.8746349215507507, "learning_rate": 1.7783694811566534e-06, "loss": 0.104, "step": 11210 }, { "epoch": 1.8164290343486713, "grad_norm": 0.8786240816116333, "learning_rate": 1.7779508023764464e-06, "loss": 0.1073, "step": 11211 }, { "epoch": 1.8165910563836682, "grad_norm": 0.700187087059021, "learning_rate": 1.777532145689198e-06, "loss": 0.0841, "step": 11212 }, { "epoch": 1.816753078418665, "grad_norm": 0.7752106785774231, "learning_rate": 1.7771135111077173e-06, "loss": 0.0987, "step": 11213 }, { "epoch": 1.8169151004536617, "grad_norm": 0.8160271644592285, "learning_rate": 1.7766948986448131e-06, "loss": 0.096, "step": 11214 }, { "epoch": 1.8170771224886586, "grad_norm": 0.8085820078849792, "learning_rate": 1.7762763083132958e-06, "loss": 0.0965, "step": 11215 }, { "epoch": 1.817239144523655, "grad_norm": 0.8033862113952637, "learning_rate": 1.7758577401259716e-06, "loss": 0.0961, "step": 11216 }, { "epoch": 1.817401166558652, "grad_norm": 0.7184333205223083, "learning_rate": 1.775439194095648e-06, "loss": 0.0934, "step": 11217 }, { "epoch": 1.8175631885936487, "grad_norm": 0.8042249083518982, "learning_rate": 1.7750206702351325e-06, "loss": 0.1002, "step": 11218 }, { "epoch": 1.8177252106286454, "grad_norm": 0.779135525226593, "learning_rate": 1.7746021685572284e-06, "loss": 0.0907, "step": 11219 }, { "epoch": 1.8178872326636424, "grad_norm": 0.8143093585968018, "learning_rate": 1.7741836890747438e-06, "loss": 0.1011, "step": 11220 }, { "epoch": 1.8180492546986389, "grad_norm": 0.8787755370140076, "learning_rate": 1.7737652318004818e-06, "loss": 0.1014, "step": 11221 }, { "epoch": 1.8182112767336358, "grad_norm": 0.8515397310256958, "learning_rate": 1.7733467967472459e-06, "loss": 0.1032, "step": 11222 }, { "epoch": 1.8183732987686325, "grad_norm": 0.8510406017303467, "learning_rate": 1.7729283839278403e-06, "loss": 0.0972, "step": 11223 }, { "epoch": 1.8185353208036292, "grad_norm": 0.8091854453086853, "learning_rate": 1.7725099933550649e-06, "loss": 0.0957, "step": 11224 }, { "epoch": 1.8186973428386262, "grad_norm": 0.7440215349197388, "learning_rate": 1.7720916250417248e-06, "loss": 0.0914, "step": 11225 }, { "epoch": 1.8188593648736227, "grad_norm": 0.8673259615898132, "learning_rate": 1.7716732790006188e-06, "loss": 0.101, "step": 11226 }, { "epoch": 1.8190213869086196, "grad_norm": 0.8858534097671509, "learning_rate": 1.7712549552445484e-06, "loss": 0.1069, "step": 11227 }, { "epoch": 1.8191834089436163, "grad_norm": 0.7519997954368591, "learning_rate": 1.7708366537863129e-06, "loss": 0.0896, "step": 11228 }, { "epoch": 1.819345430978613, "grad_norm": 0.7740821242332458, "learning_rate": 1.7704183746387105e-06, "loss": 0.0916, "step": 11229 }, { "epoch": 1.81950745301361, "grad_norm": 0.898308277130127, "learning_rate": 1.7700001178145409e-06, "loss": 0.1155, "step": 11230 }, { "epoch": 1.8196694750486067, "grad_norm": 0.7707512378692627, "learning_rate": 1.7695818833266009e-06, "loss": 0.0876, "step": 11231 }, { "epoch": 1.8198314970836034, "grad_norm": 0.7827706336975098, "learning_rate": 1.7691636711876883e-06, "loss": 0.1, "step": 11232 }, { "epoch": 1.8199935191186, "grad_norm": 0.8508397340774536, "learning_rate": 1.768745481410597e-06, "loss": 0.0975, "step": 11233 }, { "epoch": 1.8201555411535968, "grad_norm": 0.8238134384155273, "learning_rate": 1.768327314008126e-06, "loss": 0.0968, "step": 11234 }, { "epoch": 1.8203175631885937, "grad_norm": 0.7700952291488647, "learning_rate": 1.7679091689930683e-06, "loss": 0.0831, "step": 11235 }, { "epoch": 1.8204795852235904, "grad_norm": 0.7718937993049622, "learning_rate": 1.7674910463782186e-06, "loss": 0.1014, "step": 11236 }, { "epoch": 1.8206416072585871, "grad_norm": 0.9156939387321472, "learning_rate": 1.76707294617637e-06, "loss": 0.1105, "step": 11237 }, { "epoch": 1.820803629293584, "grad_norm": 0.85805743932724, "learning_rate": 1.766654868400315e-06, "loss": 0.105, "step": 11238 }, { "epoch": 1.8209656513285806, "grad_norm": 0.7723628878593445, "learning_rate": 1.766236813062847e-06, "loss": 0.0991, "step": 11239 }, { "epoch": 1.8211276733635775, "grad_norm": 0.7656691670417786, "learning_rate": 1.7658187801767568e-06, "loss": 0.0986, "step": 11240 }, { "epoch": 1.8212896953985742, "grad_norm": 0.8363527655601501, "learning_rate": 1.765400769754836e-06, "loss": 0.0918, "step": 11241 }, { "epoch": 1.821451717433571, "grad_norm": 0.6773914694786072, "learning_rate": 1.7649827818098727e-06, "loss": 0.0836, "step": 11242 }, { "epoch": 1.8216137394685679, "grad_norm": 0.751105546951294, "learning_rate": 1.7645648163546574e-06, "loss": 0.0893, "step": 11243 }, { "epoch": 1.8217757615035644, "grad_norm": 0.8291444182395935, "learning_rate": 1.7641468734019795e-06, "loss": 0.0998, "step": 11244 }, { "epoch": 1.8219377835385613, "grad_norm": 0.735002875328064, "learning_rate": 1.7637289529646273e-06, "loss": 0.0898, "step": 11245 }, { "epoch": 1.822099805573558, "grad_norm": 0.7483386993408203, "learning_rate": 1.7633110550553867e-06, "loss": 0.0863, "step": 11246 }, { "epoch": 1.8222618276085547, "grad_norm": 0.7296152114868164, "learning_rate": 1.7628931796870454e-06, "loss": 0.0961, "step": 11247 }, { "epoch": 1.8224238496435516, "grad_norm": 0.7359948754310608, "learning_rate": 1.7624753268723882e-06, "loss": 0.0927, "step": 11248 }, { "epoch": 1.8225858716785481, "grad_norm": 0.8318410515785217, "learning_rate": 1.7620574966242015e-06, "loss": 0.1004, "step": 11249 }, { "epoch": 1.822747893713545, "grad_norm": 0.9866887331008911, "learning_rate": 1.7616396889552706e-06, "loss": 0.1179, "step": 11250 }, { "epoch": 1.8229099157485418, "grad_norm": 0.913759708404541, "learning_rate": 1.7612219038783775e-06, "loss": 0.105, "step": 11251 }, { "epoch": 1.8230719377835385, "grad_norm": 0.7952281832695007, "learning_rate": 1.7608041414063065e-06, "loss": 0.0996, "step": 11252 }, { "epoch": 1.8232339598185354, "grad_norm": 0.9138891100883484, "learning_rate": 1.7603864015518392e-06, "loss": 0.1103, "step": 11253 }, { "epoch": 1.8233959818535321, "grad_norm": 0.9621608853340149, "learning_rate": 1.7599686843277596e-06, "loss": 0.1116, "step": 11254 }, { "epoch": 1.8235580038885288, "grad_norm": 0.9279875755310059, "learning_rate": 1.7595509897468466e-06, "loss": 0.1177, "step": 11255 }, { "epoch": 1.8237200259235256, "grad_norm": 0.7723210453987122, "learning_rate": 1.7591333178218823e-06, "loss": 0.1005, "step": 11256 }, { "epoch": 1.8238820479585223, "grad_norm": 0.7563832998275757, "learning_rate": 1.7587156685656442e-06, "loss": 0.0964, "step": 11257 }, { "epoch": 1.8240440699935192, "grad_norm": 0.7410457134246826, "learning_rate": 1.7582980419909135e-06, "loss": 0.0911, "step": 11258 }, { "epoch": 1.824206092028516, "grad_norm": 0.8052392601966858, "learning_rate": 1.7578804381104678e-06, "loss": 0.1009, "step": 11259 }, { "epoch": 1.8243681140635126, "grad_norm": 0.8688940405845642, "learning_rate": 1.7574628569370855e-06, "loss": 0.1155, "step": 11260 }, { "epoch": 1.8245301360985096, "grad_norm": 0.8440880179405212, "learning_rate": 1.757045298483542e-06, "loss": 0.1034, "step": 11261 }, { "epoch": 1.824692158133506, "grad_norm": 0.764716386795044, "learning_rate": 1.756627762762614e-06, "loss": 0.0984, "step": 11262 }, { "epoch": 1.824854180168503, "grad_norm": 0.7497915029525757, "learning_rate": 1.7562102497870787e-06, "loss": 0.0923, "step": 11263 }, { "epoch": 1.8250162022034997, "grad_norm": 0.8336397409439087, "learning_rate": 1.7557927595697094e-06, "loss": 0.1071, "step": 11264 }, { "epoch": 1.8251782242384964, "grad_norm": 0.8785907626152039, "learning_rate": 1.7553752921232809e-06, "loss": 0.1079, "step": 11265 }, { "epoch": 1.8253402462734933, "grad_norm": 0.8184788823127747, "learning_rate": 1.7549578474605661e-06, "loss": 0.0985, "step": 11266 }, { "epoch": 1.8255022683084898, "grad_norm": 0.7712414860725403, "learning_rate": 1.754540425594338e-06, "loss": 0.095, "step": 11267 }, { "epoch": 1.8256642903434868, "grad_norm": 0.8796233534812927, "learning_rate": 1.754123026537369e-06, "loss": 0.1065, "step": 11268 }, { "epoch": 1.8258263123784835, "grad_norm": 0.9053205251693726, "learning_rate": 1.7537056503024314e-06, "loss": 0.0946, "step": 11269 }, { "epoch": 1.8259883344134802, "grad_norm": 0.7080380916595459, "learning_rate": 1.7532882969022941e-06, "loss": 0.0892, "step": 11270 }, { "epoch": 1.8261503564484771, "grad_norm": 0.8562909364700317, "learning_rate": 1.7528709663497282e-06, "loss": 0.1094, "step": 11271 }, { "epoch": 1.8263123784834736, "grad_norm": 0.7841035723686218, "learning_rate": 1.752453658657502e-06, "loss": 0.0991, "step": 11272 }, { "epoch": 1.8264744005184705, "grad_norm": 0.7752327919006348, "learning_rate": 1.752036373838385e-06, "loss": 0.0916, "step": 11273 }, { "epoch": 1.8266364225534673, "grad_norm": 0.7416565418243408, "learning_rate": 1.7516191119051456e-06, "loss": 0.0904, "step": 11274 }, { "epoch": 1.826798444588464, "grad_norm": 0.8184176683425903, "learning_rate": 1.7512018728705498e-06, "loss": 0.1011, "step": 11275 }, { "epoch": 1.826960466623461, "grad_norm": 0.7727778553962708, "learning_rate": 1.7507846567473643e-06, "loss": 0.0946, "step": 11276 }, { "epoch": 1.8271224886584574, "grad_norm": 0.7771718502044678, "learning_rate": 1.7503674635483558e-06, "loss": 0.0977, "step": 11277 }, { "epoch": 1.8272845106934543, "grad_norm": 0.958016037940979, "learning_rate": 1.749950293286289e-06, "loss": 0.1002, "step": 11278 }, { "epoch": 1.827446532728451, "grad_norm": 0.7841407060623169, "learning_rate": 1.7495331459739278e-06, "loss": 0.1045, "step": 11279 }, { "epoch": 1.8276085547634477, "grad_norm": 0.7898461818695068, "learning_rate": 1.7491160216240368e-06, "loss": 0.0953, "step": 11280 }, { "epoch": 1.8277705767984447, "grad_norm": 0.8376603126525879, "learning_rate": 1.7486989202493775e-06, "loss": 0.1077, "step": 11281 }, { "epoch": 1.8279325988334414, "grad_norm": 0.7808676958084106, "learning_rate": 1.7482818418627134e-06, "loss": 0.0983, "step": 11282 }, { "epoch": 1.828094620868438, "grad_norm": 0.8467119932174683, "learning_rate": 1.7478647864768067e-06, "loss": 0.1026, "step": 11283 }, { "epoch": 1.8282566429034348, "grad_norm": 0.7723277807235718, "learning_rate": 1.7474477541044165e-06, "loss": 0.0954, "step": 11284 }, { "epoch": 1.8284186649384315, "grad_norm": 0.8611959218978882, "learning_rate": 1.7470307447583047e-06, "loss": 0.0971, "step": 11285 }, { "epoch": 1.8285806869734285, "grad_norm": 0.7999573945999146, "learning_rate": 1.746613758451228e-06, "loss": 0.0988, "step": 11286 }, { "epoch": 1.8287427090084252, "grad_norm": 0.8502542972564697, "learning_rate": 1.746196795195949e-06, "loss": 0.0988, "step": 11287 }, { "epoch": 1.8289047310434219, "grad_norm": 0.6951650977134705, "learning_rate": 1.7457798550052232e-06, "loss": 0.0888, "step": 11288 }, { "epoch": 1.8290667530784188, "grad_norm": 0.7761609554290771, "learning_rate": 1.7453629378918094e-06, "loss": 0.097, "step": 11289 }, { "epoch": 1.8292287751134153, "grad_norm": 0.882725715637207, "learning_rate": 1.744946043868463e-06, "loss": 0.1073, "step": 11290 }, { "epoch": 1.8293907971484122, "grad_norm": 0.8751239776611328, "learning_rate": 1.7445291729479397e-06, "loss": 0.1095, "step": 11291 }, { "epoch": 1.829552819183409, "grad_norm": 0.7810752987861633, "learning_rate": 1.7441123251429968e-06, "loss": 0.0916, "step": 11292 }, { "epoch": 1.8297148412184057, "grad_norm": 0.8557392358779907, "learning_rate": 1.7436955004663868e-06, "loss": 0.0986, "step": 11293 }, { "epoch": 1.8298768632534026, "grad_norm": 0.88213050365448, "learning_rate": 1.7432786989308648e-06, "loss": 0.1046, "step": 11294 }, { "epoch": 1.830038885288399, "grad_norm": 0.8699725270271301, "learning_rate": 1.7428619205491831e-06, "loss": 0.1018, "step": 11295 }, { "epoch": 1.830200907323396, "grad_norm": 0.7817003726959229, "learning_rate": 1.7424451653340934e-06, "loss": 0.0926, "step": 11296 }, { "epoch": 1.8303629293583927, "grad_norm": 0.8012980818748474, "learning_rate": 1.7420284332983495e-06, "loss": 0.1063, "step": 11297 }, { "epoch": 1.8305249513933894, "grad_norm": 0.7765569686889648, "learning_rate": 1.7416117244547014e-06, "loss": 0.0899, "step": 11298 }, { "epoch": 1.8306869734283864, "grad_norm": 0.8100259900093079, "learning_rate": 1.7411950388158987e-06, "loss": 0.1055, "step": 11299 }, { "epoch": 1.8308489954633829, "grad_norm": 0.996043860912323, "learning_rate": 1.7407783763946911e-06, "loss": 0.1143, "step": 11300 }, { "epoch": 1.8310110174983798, "grad_norm": 0.9620465636253357, "learning_rate": 1.7403617372038293e-06, "loss": 0.1167, "step": 11301 }, { "epoch": 1.8311730395333765, "grad_norm": 0.7914530038833618, "learning_rate": 1.7399451212560593e-06, "loss": 0.1029, "step": 11302 }, { "epoch": 1.8313350615683732, "grad_norm": 0.8515611886978149, "learning_rate": 1.7395285285641292e-06, "loss": 0.1059, "step": 11303 }, { "epoch": 1.8314970836033702, "grad_norm": 0.8178187608718872, "learning_rate": 1.7391119591407863e-06, "loss": 0.0974, "step": 11304 }, { "epoch": 1.8316591056383669, "grad_norm": 0.8800912499427795, "learning_rate": 1.7386954129987754e-06, "loss": 0.1066, "step": 11305 }, { "epoch": 1.8318211276733636, "grad_norm": 0.8402878642082214, "learning_rate": 1.7382788901508426e-06, "loss": 0.1038, "step": 11306 }, { "epoch": 1.8319831497083603, "grad_norm": 0.8607720732688904, "learning_rate": 1.7378623906097333e-06, "loss": 0.1116, "step": 11307 }, { "epoch": 1.832145171743357, "grad_norm": 0.8464848399162292, "learning_rate": 1.7374459143881899e-06, "loss": 0.1, "step": 11308 }, { "epoch": 1.832307193778354, "grad_norm": 0.7194477319717407, "learning_rate": 1.737029461498957e-06, "loss": 0.0803, "step": 11309 }, { "epoch": 1.8324692158133506, "grad_norm": 0.9045358300209045, "learning_rate": 1.7366130319547747e-06, "loss": 0.1053, "step": 11310 }, { "epoch": 1.8326312378483474, "grad_norm": 0.8817188143730164, "learning_rate": 1.736196625768387e-06, "loss": 0.1033, "step": 11311 }, { "epoch": 1.8327932598833443, "grad_norm": 0.7992186546325684, "learning_rate": 1.735780242952534e-06, "loss": 0.0999, "step": 11312 }, { "epoch": 1.8329552819183408, "grad_norm": 0.711328387260437, "learning_rate": 1.7353638835199568e-06, "loss": 0.0875, "step": 11313 }, { "epoch": 1.8331173039533377, "grad_norm": 0.8536904454231262, "learning_rate": 1.7349475474833938e-06, "loss": 0.1047, "step": 11314 }, { "epoch": 1.8332793259883344, "grad_norm": 0.7888259887695312, "learning_rate": 1.7345312348555843e-06, "loss": 0.1089, "step": 11315 }, { "epoch": 1.8334413480233311, "grad_norm": 0.8059415221214294, "learning_rate": 1.7341149456492672e-06, "loss": 0.0995, "step": 11316 }, { "epoch": 1.833603370058328, "grad_norm": 0.8481978178024292, "learning_rate": 1.733698679877179e-06, "loss": 0.1035, "step": 11317 }, { "epoch": 1.8337653920933246, "grad_norm": 0.72404545545578, "learning_rate": 1.7332824375520574e-06, "loss": 0.0971, "step": 11318 }, { "epoch": 1.8339274141283215, "grad_norm": 0.8225178718566895, "learning_rate": 1.7328662186866373e-06, "loss": 0.0951, "step": 11319 }, { "epoch": 1.8340894361633182, "grad_norm": 0.7030364871025085, "learning_rate": 1.7324500232936536e-06, "loss": 0.0844, "step": 11320 }, { "epoch": 1.834251458198315, "grad_norm": 0.9015457630157471, "learning_rate": 1.7320338513858425e-06, "loss": 0.1048, "step": 11321 }, { "epoch": 1.8344134802333119, "grad_norm": 0.8932374715805054, "learning_rate": 1.731617702975938e-06, "loss": 0.1184, "step": 11322 }, { "epoch": 1.8345755022683083, "grad_norm": 1.008551836013794, "learning_rate": 1.7312015780766714e-06, "loss": 0.1336, "step": 11323 }, { "epoch": 1.8347375243033053, "grad_norm": 0.7870475053787231, "learning_rate": 1.7307854767007756e-06, "loss": 0.1007, "step": 11324 }, { "epoch": 1.834899546338302, "grad_norm": 0.7753018736839294, "learning_rate": 1.7303693988609837e-06, "loss": 0.0904, "step": 11325 }, { "epoch": 1.8350615683732987, "grad_norm": 0.9246848225593567, "learning_rate": 1.7299533445700253e-06, "loss": 0.1144, "step": 11326 }, { "epoch": 1.8352235904082956, "grad_norm": 0.7962560057640076, "learning_rate": 1.7295373138406318e-06, "loss": 0.1006, "step": 11327 }, { "epoch": 1.8353856124432921, "grad_norm": 0.8699910044670105, "learning_rate": 1.7291213066855312e-06, "loss": 0.1003, "step": 11328 }, { "epoch": 1.835547634478289, "grad_norm": 0.8488613367080688, "learning_rate": 1.7287053231174528e-06, "loss": 0.1104, "step": 11329 }, { "epoch": 1.8357096565132858, "grad_norm": 0.8561351895332336, "learning_rate": 1.7282893631491253e-06, "loss": 0.1013, "step": 11330 }, { "epoch": 1.8358716785482825, "grad_norm": 0.7828042507171631, "learning_rate": 1.7278734267932764e-06, "loss": 0.0941, "step": 11331 }, { "epoch": 1.8360337005832794, "grad_norm": 0.916395366191864, "learning_rate": 1.7274575140626318e-06, "loss": 0.1124, "step": 11332 }, { "epoch": 1.8361957226182761, "grad_norm": 0.8568091988563538, "learning_rate": 1.7270416249699179e-06, "loss": 0.0995, "step": 11333 }, { "epoch": 1.8363577446532728, "grad_norm": 0.7647148966789246, "learning_rate": 1.7266257595278591e-06, "loss": 0.0951, "step": 11334 }, { "epoch": 1.8365197666882696, "grad_norm": 0.8157481551170349, "learning_rate": 1.726209917749181e-06, "loss": 0.0979, "step": 11335 }, { "epoch": 1.8366817887232663, "grad_norm": 0.8966127634048462, "learning_rate": 1.725794099646607e-06, "loss": 0.1024, "step": 11336 }, { "epoch": 1.8368438107582632, "grad_norm": 0.8767910003662109, "learning_rate": 1.72537830523286e-06, "loss": 0.1108, "step": 11337 }, { "epoch": 1.83700583279326, "grad_norm": 0.8303874731063843, "learning_rate": 1.7249625345206623e-06, "loss": 0.0929, "step": 11338 }, { "epoch": 1.8371678548282566, "grad_norm": 0.7946867346763611, "learning_rate": 1.7245467875227345e-06, "loss": 0.1038, "step": 11339 }, { "epoch": 1.8373298768632536, "grad_norm": 0.7958531379699707, "learning_rate": 1.7241310642517998e-06, "loss": 0.0946, "step": 11340 }, { "epoch": 1.83749189889825, "grad_norm": 0.7723376750946045, "learning_rate": 1.7237153647205762e-06, "loss": 0.1002, "step": 11341 }, { "epoch": 1.837653920933247, "grad_norm": 1.005839228630066, "learning_rate": 1.7232996889417846e-06, "loss": 0.109, "step": 11342 }, { "epoch": 1.8378159429682437, "grad_norm": 0.7882110476493835, "learning_rate": 1.7228840369281424e-06, "loss": 0.0978, "step": 11343 }, { "epoch": 1.8379779650032404, "grad_norm": 0.6778146028518677, "learning_rate": 1.7224684086923677e-06, "loss": 0.0867, "step": 11344 }, { "epoch": 1.8381399870382373, "grad_norm": 0.8371578454971313, "learning_rate": 1.722052804247179e-06, "loss": 0.104, "step": 11345 }, { "epoch": 1.8383020090732338, "grad_norm": 0.684536874294281, "learning_rate": 1.7216372236052914e-06, "loss": 0.0873, "step": 11346 }, { "epoch": 1.8384640311082308, "grad_norm": 0.777190089225769, "learning_rate": 1.7212216667794213e-06, "loss": 0.1056, "step": 11347 }, { "epoch": 1.8386260531432275, "grad_norm": 0.7780445218086243, "learning_rate": 1.7208061337822828e-06, "loss": 0.0936, "step": 11348 }, { "epoch": 1.8387880751782242, "grad_norm": 0.8263536095619202, "learning_rate": 1.7203906246265921e-06, "loss": 0.095, "step": 11349 }, { "epoch": 1.8389500972132211, "grad_norm": 0.7346975803375244, "learning_rate": 1.7199751393250614e-06, "loss": 0.0939, "step": 11350 }, { "epoch": 1.8391121192482176, "grad_norm": 0.8008426427841187, "learning_rate": 1.719559677890404e-06, "loss": 0.0932, "step": 11351 }, { "epoch": 1.8392741412832145, "grad_norm": 0.8390111923217773, "learning_rate": 1.7191442403353314e-06, "loss": 0.0961, "step": 11352 }, { "epoch": 1.8394361633182112, "grad_norm": 0.8071438074111938, "learning_rate": 1.7187288266725549e-06, "loss": 0.094, "step": 11353 }, { "epoch": 1.839598185353208, "grad_norm": 0.8307203054428101, "learning_rate": 1.7183134369147866e-06, "loss": 0.1013, "step": 11354 }, { "epoch": 1.839760207388205, "grad_norm": 0.8359047174453735, "learning_rate": 1.717898071074735e-06, "loss": 0.1013, "step": 11355 }, { "epoch": 1.8399222294232016, "grad_norm": 0.925969660282135, "learning_rate": 1.71748272916511e-06, "loss": 0.1027, "step": 11356 }, { "epoch": 1.8400842514581983, "grad_norm": 0.7569020986557007, "learning_rate": 1.7170674111986202e-06, "loss": 0.0964, "step": 11357 }, { "epoch": 1.840246273493195, "grad_norm": 0.8798254728317261, "learning_rate": 1.716652117187972e-06, "loss": 0.105, "step": 11358 }, { "epoch": 1.8404082955281917, "grad_norm": 0.757815957069397, "learning_rate": 1.7162368471458738e-06, "loss": 0.0955, "step": 11359 }, { "epoch": 1.8405703175631887, "grad_norm": 0.8111533522605896, "learning_rate": 1.7158216010850318e-06, "loss": 0.0971, "step": 11360 }, { "epoch": 1.8407323395981854, "grad_norm": 0.793456494808197, "learning_rate": 1.7154063790181507e-06, "loss": 0.1047, "step": 11361 }, { "epoch": 1.840894361633182, "grad_norm": 0.7648953199386597, "learning_rate": 1.7149911809579361e-06, "loss": 0.0951, "step": 11362 }, { "epoch": 1.841056383668179, "grad_norm": 0.7821177244186401, "learning_rate": 1.7145760069170905e-06, "loss": 0.0973, "step": 11363 }, { "epoch": 1.8412184057031755, "grad_norm": 0.8774672150611877, "learning_rate": 1.7141608569083195e-06, "loss": 0.1019, "step": 11364 }, { "epoch": 1.8413804277381725, "grad_norm": 0.815719485282898, "learning_rate": 1.7137457309443245e-06, "loss": 0.0915, "step": 11365 }, { "epoch": 1.8415424497731692, "grad_norm": 0.8151124715805054, "learning_rate": 1.7133306290378077e-06, "loss": 0.094, "step": 11366 }, { "epoch": 1.8417044718081659, "grad_norm": 0.6767386794090271, "learning_rate": 1.7129155512014692e-06, "loss": 0.0797, "step": 11367 }, { "epoch": 1.8418664938431628, "grad_norm": 0.8355568051338196, "learning_rate": 1.7125004974480102e-06, "loss": 0.0956, "step": 11368 }, { "epoch": 1.8420285158781593, "grad_norm": 0.9013076424598694, "learning_rate": 1.7120854677901312e-06, "loss": 0.1056, "step": 11369 }, { "epoch": 1.8421905379131562, "grad_norm": 0.7785685062408447, "learning_rate": 1.7116704622405295e-06, "loss": 0.0867, "step": 11370 }, { "epoch": 1.842352559948153, "grad_norm": 0.7849324345588684, "learning_rate": 1.7112554808119043e-06, "loss": 0.0983, "step": 11371 }, { "epoch": 1.8425145819831497, "grad_norm": 0.9096629023551941, "learning_rate": 1.7108405235169511e-06, "loss": 0.1079, "step": 11372 }, { "epoch": 1.8426766040181466, "grad_norm": 0.7472807765007019, "learning_rate": 1.71042559036837e-06, "loss": 0.0946, "step": 11373 }, { "epoch": 1.842838626053143, "grad_norm": 0.6788498163223267, "learning_rate": 1.7100106813788544e-06, "loss": 0.0785, "step": 11374 }, { "epoch": 1.84300064808814, "grad_norm": 0.8583519458770752, "learning_rate": 1.7095957965611008e-06, "loss": 0.1001, "step": 11375 }, { "epoch": 1.8431626701231367, "grad_norm": 0.9140331149101257, "learning_rate": 1.7091809359278025e-06, "loss": 0.1117, "step": 11376 }, { "epoch": 1.8433246921581334, "grad_norm": 0.8352115154266357, "learning_rate": 1.7087660994916533e-06, "loss": 0.1087, "step": 11377 }, { "epoch": 1.8434867141931304, "grad_norm": 0.8460627198219299, "learning_rate": 1.7083512872653477e-06, "loss": 0.1032, "step": 11378 }, { "epoch": 1.8436487362281269, "grad_norm": 0.8629457950592041, "learning_rate": 1.7079364992615766e-06, "loss": 0.1015, "step": 11379 }, { "epoch": 1.8438107582631238, "grad_norm": 0.8817080855369568, "learning_rate": 1.7075217354930324e-06, "loss": 0.1085, "step": 11380 }, { "epoch": 1.8439727802981205, "grad_norm": 0.9188164472579956, "learning_rate": 1.7071069959724046e-06, "loss": 0.0987, "step": 11381 }, { "epoch": 1.8441348023331172, "grad_norm": 0.7526326179504395, "learning_rate": 1.7066922807123834e-06, "loss": 0.0948, "step": 11382 }, { "epoch": 1.8442968243681142, "grad_norm": 0.7177049517631531, "learning_rate": 1.7062775897256593e-06, "loss": 0.0892, "step": 11383 }, { "epoch": 1.8444588464031109, "grad_norm": 0.8121328353881836, "learning_rate": 1.7058629230249207e-06, "loss": 0.0961, "step": 11384 }, { "epoch": 1.8446208684381076, "grad_norm": 0.7545210123062134, "learning_rate": 1.7054482806228543e-06, "loss": 0.0927, "step": 11385 }, { "epoch": 1.8447828904731045, "grad_norm": 0.8250703811645508, "learning_rate": 1.7050336625321484e-06, "loss": 0.095, "step": 11386 }, { "epoch": 1.844944912508101, "grad_norm": 0.772592306137085, "learning_rate": 1.7046190687654873e-06, "loss": 0.0977, "step": 11387 }, { "epoch": 1.845106934543098, "grad_norm": 0.7452101111412048, "learning_rate": 1.704204499335559e-06, "loss": 0.0897, "step": 11388 }, { "epoch": 1.8452689565780946, "grad_norm": 0.7722578048706055, "learning_rate": 1.703789954255047e-06, "loss": 0.0977, "step": 11389 }, { "epoch": 1.8454309786130914, "grad_norm": 0.8865172863006592, "learning_rate": 1.7033754335366356e-06, "loss": 0.1031, "step": 11390 }, { "epoch": 1.8455930006480883, "grad_norm": 0.8430839776992798, "learning_rate": 1.7029609371930076e-06, "loss": 0.1073, "step": 11391 }, { "epoch": 1.8457550226830848, "grad_norm": 0.8359355330467224, "learning_rate": 1.7025464652368464e-06, "loss": 0.101, "step": 11392 }, { "epoch": 1.8459170447180817, "grad_norm": 0.8882021307945251, "learning_rate": 1.7021320176808343e-06, "loss": 0.1072, "step": 11393 }, { "epoch": 1.8460790667530784, "grad_norm": 0.7827454209327698, "learning_rate": 1.701717594537651e-06, "loss": 0.0927, "step": 11394 }, { "epoch": 1.8462410887880751, "grad_norm": 0.8166926503181458, "learning_rate": 1.7013031958199783e-06, "loss": 0.1026, "step": 11395 }, { "epoch": 1.846403110823072, "grad_norm": 0.7511534690856934, "learning_rate": 1.7008888215404933e-06, "loss": 0.0942, "step": 11396 }, { "epoch": 1.8465651328580686, "grad_norm": 0.8353126645088196, "learning_rate": 1.7004744717118777e-06, "loss": 0.1063, "step": 11397 }, { "epoch": 1.8467271548930655, "grad_norm": 0.6988396644592285, "learning_rate": 1.7000601463468088e-06, "loss": 0.0877, "step": 11398 }, { "epoch": 1.8468891769280622, "grad_norm": 0.8091744184494019, "learning_rate": 1.6996458454579632e-06, "loss": 0.0984, "step": 11399 }, { "epoch": 1.847051198963059, "grad_norm": 0.7793344855308533, "learning_rate": 1.6992315690580178e-06, "loss": 0.093, "step": 11400 }, { "epoch": 1.8472132209980558, "grad_norm": 0.7006025910377502, "learning_rate": 1.6988173171596479e-06, "loss": 0.0805, "step": 11401 }, { "epoch": 1.8473752430330523, "grad_norm": 0.7687976956367493, "learning_rate": 1.6984030897755304e-06, "loss": 0.0942, "step": 11402 }, { "epoch": 1.8475372650680493, "grad_norm": 0.8496354222297668, "learning_rate": 1.697988886918338e-06, "loss": 0.1107, "step": 11403 }, { "epoch": 1.847699287103046, "grad_norm": 0.8020095229148865, "learning_rate": 1.6975747086007454e-06, "loss": 0.0906, "step": 11404 }, { "epoch": 1.8478613091380427, "grad_norm": 0.8590508699417114, "learning_rate": 1.6971605548354244e-06, "loss": 0.0976, "step": 11405 }, { "epoch": 1.8480233311730396, "grad_norm": 0.8174715042114258, "learning_rate": 1.6967464256350468e-06, "loss": 0.0967, "step": 11406 }, { "epoch": 1.8481853532080363, "grad_norm": 0.8774664998054504, "learning_rate": 1.6963323210122856e-06, "loss": 0.0971, "step": 11407 }, { "epoch": 1.848347375243033, "grad_norm": 0.8936482667922974, "learning_rate": 1.6959182409798111e-06, "loss": 0.1196, "step": 11408 }, { "epoch": 1.8485093972780298, "grad_norm": 0.7813578248023987, "learning_rate": 1.6955041855502918e-06, "loss": 0.1045, "step": 11409 }, { "epoch": 1.8486714193130265, "grad_norm": 0.8374517560005188, "learning_rate": 1.695090154736398e-06, "loss": 0.1012, "step": 11410 }, { "epoch": 1.8488334413480234, "grad_norm": 0.7886912822723389, "learning_rate": 1.694676148550797e-06, "loss": 0.0903, "step": 11411 }, { "epoch": 1.8489954633830201, "grad_norm": 0.8474147915840149, "learning_rate": 1.6942621670061574e-06, "loss": 0.1001, "step": 11412 }, { "epoch": 1.8491574854180168, "grad_norm": 0.7622694373130798, "learning_rate": 1.693848210115146e-06, "loss": 0.0996, "step": 11413 }, { "epoch": 1.8493195074530138, "grad_norm": 0.7860474586486816, "learning_rate": 1.693434277890428e-06, "loss": 0.0952, "step": 11414 }, { "epoch": 1.8494815294880103, "grad_norm": 0.8053168058395386, "learning_rate": 1.693020370344669e-06, "loss": 0.0991, "step": 11415 }, { "epoch": 1.8496435515230072, "grad_norm": 0.769270658493042, "learning_rate": 1.692606487490534e-06, "loss": 0.1014, "step": 11416 }, { "epoch": 1.849805573558004, "grad_norm": 0.7316917181015015, "learning_rate": 1.6921926293406874e-06, "loss": 0.0897, "step": 11417 }, { "epoch": 1.8499675955930006, "grad_norm": 0.7723559737205505, "learning_rate": 1.6917787959077907e-06, "loss": 0.0973, "step": 11418 }, { "epoch": 1.8501296176279975, "grad_norm": 0.8190063238143921, "learning_rate": 1.6913649872045076e-06, "loss": 0.1082, "step": 11419 }, { "epoch": 1.850291639662994, "grad_norm": 0.8806280493736267, "learning_rate": 1.6909512032434984e-06, "loss": 0.1103, "step": 11420 }, { "epoch": 1.850453661697991, "grad_norm": 0.7780027985572815, "learning_rate": 1.6905374440374245e-06, "loss": 0.1017, "step": 11421 }, { "epoch": 1.8506156837329877, "grad_norm": 0.9774267077445984, "learning_rate": 1.6901237095989464e-06, "loss": 0.1157, "step": 11422 }, { "epoch": 1.8507777057679844, "grad_norm": 0.7308449745178223, "learning_rate": 1.689709999940723e-06, "loss": 0.0916, "step": 11423 }, { "epoch": 1.8509397278029813, "grad_norm": 0.7397428154945374, "learning_rate": 1.6892963150754128e-06, "loss": 0.0873, "step": 11424 }, { "epoch": 1.8511017498379778, "grad_norm": 0.8941960334777832, "learning_rate": 1.688882655015672e-06, "loss": 0.102, "step": 11425 }, { "epoch": 1.8512637718729748, "grad_norm": 0.77532958984375, "learning_rate": 1.6884690197741608e-06, "loss": 0.0966, "step": 11426 }, { "epoch": 1.8514257939079715, "grad_norm": 0.834740936756134, "learning_rate": 1.6880554093635331e-06, "loss": 0.1077, "step": 11427 }, { "epoch": 1.8515878159429682, "grad_norm": 0.8465557098388672, "learning_rate": 1.6876418237964453e-06, "loss": 0.1014, "step": 11428 }, { "epoch": 1.851749837977965, "grad_norm": 0.8378772139549255, "learning_rate": 1.6872282630855519e-06, "loss": 0.0937, "step": 11429 }, { "epoch": 1.8519118600129616, "grad_norm": 0.9061734676361084, "learning_rate": 1.6868147272435057e-06, "loss": 0.1131, "step": 11430 }, { "epoch": 1.8520738820479585, "grad_norm": 0.8114215135574341, "learning_rate": 1.6864012162829624e-06, "loss": 0.0961, "step": 11431 }, { "epoch": 1.8522359040829552, "grad_norm": 0.8266593217849731, "learning_rate": 1.6859877302165723e-06, "loss": 0.0923, "step": 11432 }, { "epoch": 1.852397926117952, "grad_norm": 0.7605752944946289, "learning_rate": 1.6855742690569881e-06, "loss": 0.098, "step": 11433 }, { "epoch": 1.8525599481529489, "grad_norm": 0.77652508020401, "learning_rate": 1.6851608328168589e-06, "loss": 0.0995, "step": 11434 }, { "epoch": 1.8527219701879456, "grad_norm": 0.7098603248596191, "learning_rate": 1.6847474215088382e-06, "loss": 0.0844, "step": 11435 }, { "epoch": 1.8528839922229423, "grad_norm": 0.9365832209587097, "learning_rate": 1.6843340351455728e-06, "loss": 0.1145, "step": 11436 }, { "epoch": 1.8530460142579392, "grad_norm": 0.8150047659873962, "learning_rate": 1.6839206737397126e-06, "loss": 0.0992, "step": 11437 }, { "epoch": 1.8532080362929357, "grad_norm": 0.7815777659416199, "learning_rate": 1.6835073373039045e-06, "loss": 0.092, "step": 11438 }, { "epoch": 1.8533700583279327, "grad_norm": 0.8175069689750671, "learning_rate": 1.6830940258507955e-06, "loss": 0.0988, "step": 11439 }, { "epoch": 1.8535320803629294, "grad_norm": 0.8436422944068909, "learning_rate": 1.6826807393930334e-06, "loss": 0.0944, "step": 11440 }, { "epoch": 1.853694102397926, "grad_norm": 0.6752289533615112, "learning_rate": 1.682267477943262e-06, "loss": 0.0803, "step": 11441 }, { "epoch": 1.853856124432923, "grad_norm": 0.7708499431610107, "learning_rate": 1.6818542415141273e-06, "loss": 0.0933, "step": 11442 }, { "epoch": 1.8540181464679195, "grad_norm": 0.8612736463546753, "learning_rate": 1.6814410301182732e-06, "loss": 0.0977, "step": 11443 }, { "epoch": 1.8541801685029164, "grad_norm": 0.8443825840950012, "learning_rate": 1.6810278437683419e-06, "loss": 0.1044, "step": 11444 }, { "epoch": 1.8543421905379132, "grad_norm": 0.8466036319732666, "learning_rate": 1.680614682476977e-06, "loss": 0.0982, "step": 11445 }, { "epoch": 1.8545042125729099, "grad_norm": 0.9107270240783691, "learning_rate": 1.6802015462568205e-06, "loss": 0.1083, "step": 11446 }, { "epoch": 1.8546662346079068, "grad_norm": 0.7344313859939575, "learning_rate": 1.6797884351205123e-06, "loss": 0.0926, "step": 11447 }, { "epoch": 1.8548282566429033, "grad_norm": 0.8583900332450867, "learning_rate": 1.6793753490806939e-06, "loss": 0.1143, "step": 11448 }, { "epoch": 1.8549902786779002, "grad_norm": 0.7386242151260376, "learning_rate": 1.678962288150003e-06, "loss": 0.0855, "step": 11449 }, { "epoch": 1.855152300712897, "grad_norm": 0.7551776766777039, "learning_rate": 1.678549252341079e-06, "loss": 0.0893, "step": 11450 }, { "epoch": 1.8553143227478937, "grad_norm": 0.8036359548568726, "learning_rate": 1.6781362416665602e-06, "loss": 0.0918, "step": 11451 }, { "epoch": 1.8554763447828906, "grad_norm": 0.7629950046539307, "learning_rate": 1.6777232561390844e-06, "loss": 0.1067, "step": 11452 }, { "epoch": 1.855638366817887, "grad_norm": 0.7417677044868469, "learning_rate": 1.6773102957712866e-06, "loss": 0.087, "step": 11453 }, { "epoch": 1.855800388852884, "grad_norm": 0.7128724455833435, "learning_rate": 1.6768973605758021e-06, "loss": 0.091, "step": 11454 }, { "epoch": 1.8559624108878807, "grad_norm": 0.721200704574585, "learning_rate": 1.6764844505652677e-06, "loss": 0.0895, "step": 11455 }, { "epoch": 1.8561244329228774, "grad_norm": 0.7439916729927063, "learning_rate": 1.6760715657523158e-06, "loss": 0.0916, "step": 11456 }, { "epoch": 1.8562864549578744, "grad_norm": 0.8428785800933838, "learning_rate": 1.6756587061495805e-06, "loss": 0.1077, "step": 11457 }, { "epoch": 1.856448476992871, "grad_norm": 0.7453662753105164, "learning_rate": 1.6752458717696928e-06, "loss": 0.089, "step": 11458 }, { "epoch": 1.8566104990278678, "grad_norm": 0.8296511173248291, "learning_rate": 1.6748330626252862e-06, "loss": 0.1011, "step": 11459 }, { "epoch": 1.8567725210628645, "grad_norm": 0.8146061301231384, "learning_rate": 1.674420278728991e-06, "loss": 0.0924, "step": 11460 }, { "epoch": 1.8569345430978612, "grad_norm": 0.8605503439903259, "learning_rate": 1.674007520093438e-06, "loss": 0.0933, "step": 11461 }, { "epoch": 1.8570965651328581, "grad_norm": 0.820659875869751, "learning_rate": 1.6735947867312553e-06, "loss": 0.0988, "step": 11462 }, { "epoch": 1.8572585871678549, "grad_norm": 0.8566899299621582, "learning_rate": 1.6731820786550717e-06, "loss": 0.1045, "step": 11463 }, { "epoch": 1.8574206092028516, "grad_norm": 0.8467245101928711, "learning_rate": 1.6727693958775172e-06, "loss": 0.1007, "step": 11464 }, { "epoch": 1.8575826312378485, "grad_norm": 0.8376954197883606, "learning_rate": 1.6723567384112161e-06, "loss": 0.1074, "step": 11465 }, { "epoch": 1.857744653272845, "grad_norm": 0.7786714434623718, "learning_rate": 1.671944106268797e-06, "loss": 0.0937, "step": 11466 }, { "epoch": 1.857906675307842, "grad_norm": 0.9477052688598633, "learning_rate": 1.6715314994628834e-06, "loss": 0.1135, "step": 11467 }, { "epoch": 1.8580686973428386, "grad_norm": 0.9861614108085632, "learning_rate": 1.671118918006101e-06, "loss": 0.1181, "step": 11468 }, { "epoch": 1.8582307193778353, "grad_norm": 0.8557963371276855, "learning_rate": 1.6707063619110742e-06, "loss": 0.1045, "step": 11469 }, { "epoch": 1.8583927414128323, "grad_norm": 0.7471309900283813, "learning_rate": 1.6702938311904262e-06, "loss": 0.0944, "step": 11470 }, { "epoch": 1.8585547634478288, "grad_norm": 0.7850518822669983, "learning_rate": 1.669881325856779e-06, "loss": 0.0951, "step": 11471 }, { "epoch": 1.8587167854828257, "grad_norm": 0.8640868067741394, "learning_rate": 1.6694688459227545e-06, "loss": 0.1049, "step": 11472 }, { "epoch": 1.8588788075178224, "grad_norm": 0.909514844417572, "learning_rate": 1.6690563914009728e-06, "loss": 0.0996, "step": 11473 }, { "epoch": 1.8590408295528191, "grad_norm": 0.8319307565689087, "learning_rate": 1.6686439623040548e-06, "loss": 0.1022, "step": 11474 }, { "epoch": 1.859202851587816, "grad_norm": 0.797558605670929, "learning_rate": 1.6682315586446205e-06, "loss": 0.0896, "step": 11475 }, { "epoch": 1.8593648736228126, "grad_norm": 0.7331224083900452, "learning_rate": 1.6678191804352873e-06, "loss": 0.084, "step": 11476 }, { "epoch": 1.8595268956578095, "grad_norm": 0.8644435405731201, "learning_rate": 1.6674068276886734e-06, "loss": 0.1117, "step": 11477 }, { "epoch": 1.8596889176928062, "grad_norm": 0.7379981279373169, "learning_rate": 1.6669945004173944e-06, "loss": 0.0877, "step": 11478 }, { "epoch": 1.859850939727803, "grad_norm": 0.7866598963737488, "learning_rate": 1.6665821986340695e-06, "loss": 0.0984, "step": 11479 }, { "epoch": 1.8600129617627998, "grad_norm": 0.8214098215103149, "learning_rate": 1.6661699223513118e-06, "loss": 0.1077, "step": 11480 }, { "epoch": 1.8601749837977966, "grad_norm": 0.7403610348701477, "learning_rate": 1.6657576715817372e-06, "loss": 0.0934, "step": 11481 }, { "epoch": 1.8603370058327933, "grad_norm": 0.8874310255050659, "learning_rate": 1.6653454463379582e-06, "loss": 0.1102, "step": 11482 }, { "epoch": 1.86049902786779, "grad_norm": 1.0349223613739014, "learning_rate": 1.664933246632589e-06, "loss": 0.115, "step": 11483 }, { "epoch": 1.8606610499027867, "grad_norm": 0.7765905857086182, "learning_rate": 1.6645210724782423e-06, "loss": 0.1052, "step": 11484 }, { "epoch": 1.8608230719377836, "grad_norm": 0.8592008948326111, "learning_rate": 1.6641089238875283e-06, "loss": 0.1013, "step": 11485 }, { "epoch": 1.8609850939727803, "grad_norm": 0.8249764442443848, "learning_rate": 1.6636968008730586e-06, "loss": 0.1116, "step": 11486 }, { "epoch": 1.861147116007777, "grad_norm": 0.7741264700889587, "learning_rate": 1.6632847034474423e-06, "loss": 0.0926, "step": 11487 }, { "epoch": 1.861309138042774, "grad_norm": 0.6054975390434265, "learning_rate": 1.6628726316232902e-06, "loss": 0.0725, "step": 11488 }, { "epoch": 1.8614711600777705, "grad_norm": 0.7995757460594177, "learning_rate": 1.6624605854132094e-06, "loss": 0.0928, "step": 11489 }, { "epoch": 1.8616331821127674, "grad_norm": 0.6884124279022217, "learning_rate": 1.6620485648298084e-06, "loss": 0.0815, "step": 11490 }, { "epoch": 1.8617952041477641, "grad_norm": 0.8780040740966797, "learning_rate": 1.661636569885693e-06, "loss": 0.1026, "step": 11491 }, { "epoch": 1.8619572261827608, "grad_norm": 0.8116469383239746, "learning_rate": 1.6612246005934694e-06, "loss": 0.0942, "step": 11492 }, { "epoch": 1.8621192482177578, "grad_norm": 0.8133668899536133, "learning_rate": 1.660812656965744e-06, "loss": 0.0935, "step": 11493 }, { "epoch": 1.8622812702527543, "grad_norm": 0.6727808713912964, "learning_rate": 1.66040073901512e-06, "loss": 0.0841, "step": 11494 }, { "epoch": 1.8624432922877512, "grad_norm": 0.8516779541969299, "learning_rate": 1.6599888467542017e-06, "loss": 0.114, "step": 11495 }, { "epoch": 1.862605314322748, "grad_norm": 0.7665314078330994, "learning_rate": 1.6595769801955925e-06, "loss": 0.0951, "step": 11496 }, { "epoch": 1.8627673363577446, "grad_norm": 0.7693126797676086, "learning_rate": 1.6591651393518926e-06, "loss": 0.0922, "step": 11497 }, { "epoch": 1.8629293583927415, "grad_norm": 0.7686997652053833, "learning_rate": 1.6587533242357053e-06, "loss": 0.0905, "step": 11498 }, { "epoch": 1.863091380427738, "grad_norm": 0.7887589335441589, "learning_rate": 1.658341534859631e-06, "loss": 0.0993, "step": 11499 }, { "epoch": 1.863253402462735, "grad_norm": 0.8120632171630859, "learning_rate": 1.6579297712362686e-06, "loss": 0.0902, "step": 11500 }, { "epoch": 1.8634154244977317, "grad_norm": 0.9442248940467834, "learning_rate": 1.657518033378217e-06, "loss": 0.1086, "step": 11501 }, { "epoch": 1.8635774465327284, "grad_norm": 0.9302598237991333, "learning_rate": 1.6571063212980753e-06, "loss": 0.1051, "step": 11502 }, { "epoch": 1.8637394685677253, "grad_norm": 0.7944306135177612, "learning_rate": 1.6566946350084405e-06, "loss": 0.0994, "step": 11503 }, { "epoch": 1.8639014906027218, "grad_norm": 0.8751698732376099, "learning_rate": 1.6562829745219089e-06, "loss": 0.1055, "step": 11504 }, { "epoch": 1.8640635126377187, "grad_norm": 0.8934447169303894, "learning_rate": 1.6558713398510767e-06, "loss": 0.1028, "step": 11505 }, { "epoch": 1.8642255346727155, "grad_norm": 0.8261483907699585, "learning_rate": 1.6554597310085383e-06, "loss": 0.0983, "step": 11506 }, { "epoch": 1.8643875567077122, "grad_norm": 0.7163456082344055, "learning_rate": 1.6550481480068887e-06, "loss": 0.0841, "step": 11507 }, { "epoch": 1.864549578742709, "grad_norm": 0.7598121166229248, "learning_rate": 1.6546365908587213e-06, "loss": 0.0926, "step": 11508 }, { "epoch": 1.8647116007777058, "grad_norm": 0.7900456786155701, "learning_rate": 1.654225059576628e-06, "loss": 0.1034, "step": 11509 }, { "epoch": 1.8648736228127025, "grad_norm": 0.792512834072113, "learning_rate": 1.653813554173202e-06, "loss": 0.092, "step": 11510 }, { "epoch": 1.8650356448476992, "grad_norm": 0.8468700051307678, "learning_rate": 1.6534020746610315e-06, "loss": 0.1061, "step": 11511 }, { "epoch": 1.865197666882696, "grad_norm": 0.883598268032074, "learning_rate": 1.6529906210527107e-06, "loss": 0.1075, "step": 11512 }, { "epoch": 1.8653596889176929, "grad_norm": 0.8703530430793762, "learning_rate": 1.6525791933608266e-06, "loss": 0.1078, "step": 11513 }, { "epoch": 1.8655217109526896, "grad_norm": 0.7216241955757141, "learning_rate": 1.6521677915979688e-06, "loss": 0.0921, "step": 11514 }, { "epoch": 1.8656837329876863, "grad_norm": 0.7918997406959534, "learning_rate": 1.6517564157767245e-06, "loss": 0.0948, "step": 11515 }, { "epoch": 1.8658457550226832, "grad_norm": 0.7993650436401367, "learning_rate": 1.6513450659096804e-06, "loss": 0.1015, "step": 11516 }, { "epoch": 1.8660077770576797, "grad_norm": 0.9507842659950256, "learning_rate": 1.650933742009425e-06, "loss": 0.096, "step": 11517 }, { "epoch": 1.8661697990926767, "grad_norm": 0.7698103189468384, "learning_rate": 1.6505224440885414e-06, "loss": 0.0967, "step": 11518 }, { "epoch": 1.8663318211276734, "grad_norm": 0.9620030522346497, "learning_rate": 1.6501111721596163e-06, "loss": 0.1113, "step": 11519 }, { "epoch": 1.86649384316267, "grad_norm": 0.7965664863586426, "learning_rate": 1.649699926235232e-06, "loss": 0.101, "step": 11520 }, { "epoch": 1.866655865197667, "grad_norm": 0.7318791747093201, "learning_rate": 1.6492887063279717e-06, "loss": 0.0812, "step": 11521 }, { "epoch": 1.8668178872326635, "grad_norm": 0.7944748401641846, "learning_rate": 1.6488775124504188e-06, "loss": 0.1008, "step": 11522 }, { "epoch": 1.8669799092676604, "grad_norm": 0.8979195356369019, "learning_rate": 1.648466344615155e-06, "loss": 0.1114, "step": 11523 }, { "epoch": 1.8671419313026572, "grad_norm": 0.8624005317687988, "learning_rate": 1.6480552028347597e-06, "loss": 0.1084, "step": 11524 }, { "epoch": 1.8673039533376539, "grad_norm": 0.9684587717056274, "learning_rate": 1.647644087121813e-06, "loss": 0.1113, "step": 11525 }, { "epoch": 1.8674659753726508, "grad_norm": 0.7721800804138184, "learning_rate": 1.6472329974888956e-06, "loss": 0.0923, "step": 11526 }, { "epoch": 1.8676279974076473, "grad_norm": 0.7591527700424194, "learning_rate": 1.6468219339485845e-06, "loss": 0.0978, "step": 11527 }, { "epoch": 1.8677900194426442, "grad_norm": 0.7007966637611389, "learning_rate": 1.6464108965134578e-06, "loss": 0.0874, "step": 11528 }, { "epoch": 1.867952041477641, "grad_norm": 0.8641650080680847, "learning_rate": 1.6459998851960918e-06, "loss": 0.1124, "step": 11529 }, { "epoch": 1.8681140635126376, "grad_norm": 0.8576594591140747, "learning_rate": 1.645588900009062e-06, "loss": 0.1001, "step": 11530 }, { "epoch": 1.8682760855476346, "grad_norm": 0.8817536234855652, "learning_rate": 1.645177940964945e-06, "loss": 0.1046, "step": 11531 }, { "epoch": 1.8684381075826313, "grad_norm": 0.7928271889686584, "learning_rate": 1.6447670080763146e-06, "loss": 0.0985, "step": 11532 }, { "epoch": 1.868600129617628, "grad_norm": 0.8168180584907532, "learning_rate": 1.6443561013557434e-06, "loss": 0.1076, "step": 11533 }, { "epoch": 1.8687621516526247, "grad_norm": 0.845324695110321, "learning_rate": 1.6439452208158058e-06, "loss": 0.1029, "step": 11534 }, { "epoch": 1.8689241736876214, "grad_norm": 0.9935814142227173, "learning_rate": 1.6435343664690718e-06, "loss": 0.1097, "step": 11535 }, { "epoch": 1.8690861957226184, "grad_norm": 0.796512246131897, "learning_rate": 1.6431235383281135e-06, "loss": 0.0944, "step": 11536 }, { "epoch": 1.869248217757615, "grad_norm": 0.8797528147697449, "learning_rate": 1.6427127364055024e-06, "loss": 0.1073, "step": 11537 }, { "epoch": 1.8694102397926118, "grad_norm": 0.7383131384849548, "learning_rate": 1.6423019607138064e-06, "loss": 0.0979, "step": 11538 }, { "epoch": 1.8695722618276087, "grad_norm": 0.794525146484375, "learning_rate": 1.641891211265595e-06, "loss": 0.1006, "step": 11539 }, { "epoch": 1.8697342838626052, "grad_norm": 0.7232054471969604, "learning_rate": 1.641480488073435e-06, "loss": 0.0898, "step": 11540 }, { "epoch": 1.8698963058976021, "grad_norm": 0.7711024284362793, "learning_rate": 1.6410697911498957e-06, "loss": 0.099, "step": 11541 }, { "epoch": 1.8700583279325989, "grad_norm": 0.7795062065124512, "learning_rate": 1.6406591205075417e-06, "loss": 0.1017, "step": 11542 }, { "epoch": 1.8702203499675956, "grad_norm": 0.7736190557479858, "learning_rate": 1.6402484761589397e-06, "loss": 0.0874, "step": 11543 }, { "epoch": 1.8703823720025925, "grad_norm": 0.7875672578811646, "learning_rate": 1.639837858116653e-06, "loss": 0.1025, "step": 11544 }, { "epoch": 1.870544394037589, "grad_norm": 0.7884228825569153, "learning_rate": 1.639427266393246e-06, "loss": 0.0903, "step": 11545 }, { "epoch": 1.870706416072586, "grad_norm": 0.6791999340057373, "learning_rate": 1.6390167010012824e-06, "loss": 0.0852, "step": 11546 }, { "epoch": 1.8708684381075826, "grad_norm": 0.9688091278076172, "learning_rate": 1.638606161953325e-06, "loss": 0.1071, "step": 11547 }, { "epoch": 1.8710304601425793, "grad_norm": 0.7724692225456238, "learning_rate": 1.638195649261934e-06, "loss": 0.0987, "step": 11548 }, { "epoch": 1.8711924821775763, "grad_norm": 0.685142457485199, "learning_rate": 1.6377851629396695e-06, "loss": 0.0857, "step": 11549 }, { "epoch": 1.8713545042125728, "grad_norm": 0.8404377698898315, "learning_rate": 1.6373747029990943e-06, "loss": 0.1011, "step": 11550 }, { "epoch": 1.8715165262475697, "grad_norm": 0.8327875137329102, "learning_rate": 1.6369642694527648e-06, "loss": 0.1033, "step": 11551 }, { "epoch": 1.8716785482825664, "grad_norm": 0.777384877204895, "learning_rate": 1.6365538623132405e-06, "loss": 0.1067, "step": 11552 }, { "epoch": 1.8718405703175631, "grad_norm": 0.806379497051239, "learning_rate": 1.6361434815930782e-06, "loss": 0.0952, "step": 11553 }, { "epoch": 1.87200259235256, "grad_norm": 0.7385959029197693, "learning_rate": 1.6357331273048343e-06, "loss": 0.0857, "step": 11554 }, { "epoch": 1.8721646143875565, "grad_norm": 0.8200345635414124, "learning_rate": 1.635322799461066e-06, "loss": 0.1041, "step": 11555 }, { "epoch": 1.8723266364225535, "grad_norm": 0.8210601806640625, "learning_rate": 1.6349124980743278e-06, "loss": 0.101, "step": 11556 }, { "epoch": 1.8724886584575502, "grad_norm": 0.7943647503852844, "learning_rate": 1.6345022231571734e-06, "loss": 0.0912, "step": 11557 }, { "epoch": 1.872650680492547, "grad_norm": 0.8460164666175842, "learning_rate": 1.6340919747221568e-06, "loss": 0.0955, "step": 11558 }, { "epoch": 1.8728127025275438, "grad_norm": 0.7457050085067749, "learning_rate": 1.6336817527818292e-06, "loss": 0.0884, "step": 11559 }, { "epoch": 1.8729747245625405, "grad_norm": 1.021488070487976, "learning_rate": 1.633271557348744e-06, "loss": 0.1097, "step": 11560 }, { "epoch": 1.8731367465975373, "grad_norm": 0.8276289701461792, "learning_rate": 1.6328613884354524e-06, "loss": 0.0963, "step": 11561 }, { "epoch": 1.873298768632534, "grad_norm": 0.6760364770889282, "learning_rate": 1.6324512460545034e-06, "loss": 0.0817, "step": 11562 }, { "epoch": 1.8734607906675307, "grad_norm": 0.7393625378608704, "learning_rate": 1.6320411302184474e-06, "loss": 0.0919, "step": 11563 }, { "epoch": 1.8736228127025276, "grad_norm": 0.7952508926391602, "learning_rate": 1.6316310409398306e-06, "loss": 0.0932, "step": 11564 }, { "epoch": 1.8737848347375243, "grad_norm": 0.8211168050765991, "learning_rate": 1.6312209782312044e-06, "loss": 0.1028, "step": 11565 }, { "epoch": 1.873946856772521, "grad_norm": 0.8613480925559998, "learning_rate": 1.6308109421051132e-06, "loss": 0.106, "step": 11566 }, { "epoch": 1.874108878807518, "grad_norm": 0.6946225166320801, "learning_rate": 1.6304009325741044e-06, "loss": 0.0865, "step": 11567 }, { "epoch": 1.8742709008425145, "grad_norm": 0.8030040264129639, "learning_rate": 1.6299909496507214e-06, "loss": 0.1029, "step": 11568 }, { "epoch": 1.8744329228775114, "grad_norm": 0.840975284576416, "learning_rate": 1.6295809933475103e-06, "loss": 0.1061, "step": 11569 }, { "epoch": 1.874594944912508, "grad_norm": 0.893821120262146, "learning_rate": 1.6291710636770152e-06, "loss": 0.1042, "step": 11570 }, { "epoch": 1.8747569669475048, "grad_norm": 0.8448208570480347, "learning_rate": 1.6287611606517778e-06, "loss": 0.0991, "step": 11571 }, { "epoch": 1.8749189889825018, "grad_norm": 0.7172380089759827, "learning_rate": 1.628351284284341e-06, "loss": 0.0892, "step": 11572 }, { "epoch": 1.8750810110174982, "grad_norm": 0.8485018610954285, "learning_rate": 1.627941434587244e-06, "loss": 0.0992, "step": 11573 }, { "epoch": 1.8752430330524952, "grad_norm": 0.8565303087234497, "learning_rate": 1.6275316115730302e-06, "loss": 0.0989, "step": 11574 }, { "epoch": 1.875405055087492, "grad_norm": 0.8015487194061279, "learning_rate": 1.6271218152542373e-06, "loss": 0.0978, "step": 11575 }, { "epoch": 1.8755670771224886, "grad_norm": 0.962988555431366, "learning_rate": 1.626712045643405e-06, "loss": 0.1113, "step": 11576 }, { "epoch": 1.8757290991574855, "grad_norm": 0.8654093742370605, "learning_rate": 1.6263023027530706e-06, "loss": 0.0986, "step": 11577 }, { "epoch": 1.875891121192482, "grad_norm": 0.8079492449760437, "learning_rate": 1.6258925865957703e-06, "loss": 0.1023, "step": 11578 }, { "epoch": 1.876053143227479, "grad_norm": 0.7464910745620728, "learning_rate": 1.6254828971840432e-06, "loss": 0.0936, "step": 11579 }, { "epoch": 1.8762151652624757, "grad_norm": 0.8631336688995361, "learning_rate": 1.625073234530422e-06, "loss": 0.1021, "step": 11580 }, { "epoch": 1.8763771872974724, "grad_norm": 0.7467739582061768, "learning_rate": 1.6246635986474436e-06, "loss": 0.0909, "step": 11581 }, { "epoch": 1.8765392093324693, "grad_norm": 0.8743352293968201, "learning_rate": 1.62425398954764e-06, "loss": 0.1001, "step": 11582 }, { "epoch": 1.876701231367466, "grad_norm": 0.70003342628479, "learning_rate": 1.6238444072435447e-06, "loss": 0.0814, "step": 11583 }, { "epoch": 1.8768632534024627, "grad_norm": 0.7987949848175049, "learning_rate": 1.6234348517476905e-06, "loss": 0.1061, "step": 11584 }, { "epoch": 1.8770252754374595, "grad_norm": 0.8156781196594238, "learning_rate": 1.6230253230726096e-06, "loss": 0.1068, "step": 11585 }, { "epoch": 1.8771872974724562, "grad_norm": 0.8381296396255493, "learning_rate": 1.6226158212308307e-06, "loss": 0.1087, "step": 11586 }, { "epoch": 1.877349319507453, "grad_norm": 0.794044017791748, "learning_rate": 1.622206346234885e-06, "loss": 0.0952, "step": 11587 }, { "epoch": 1.8775113415424498, "grad_norm": 0.829454243183136, "learning_rate": 1.6217968980972998e-06, "loss": 0.1039, "step": 11588 }, { "epoch": 1.8776733635774465, "grad_norm": 0.7840944528579712, "learning_rate": 1.621387476830605e-06, "loss": 0.0937, "step": 11589 }, { "epoch": 1.8778353856124435, "grad_norm": 0.6949406862258911, "learning_rate": 1.620978082447327e-06, "loss": 0.0796, "step": 11590 }, { "epoch": 1.87799740764744, "grad_norm": 0.8238313794136047, "learning_rate": 1.6205687149599933e-06, "loss": 0.1038, "step": 11591 }, { "epoch": 1.8781594296824369, "grad_norm": 0.7915367484092712, "learning_rate": 1.6201593743811275e-06, "loss": 0.1021, "step": 11592 }, { "epoch": 1.8783214517174336, "grad_norm": 0.8443301320075989, "learning_rate": 1.6197500607232563e-06, "loss": 0.0991, "step": 11593 }, { "epoch": 1.8784834737524303, "grad_norm": 0.7770126461982727, "learning_rate": 1.6193407739989037e-06, "loss": 0.0969, "step": 11594 }, { "epoch": 1.8786454957874272, "grad_norm": 0.8288414478302002, "learning_rate": 1.6189315142205914e-06, "loss": 0.1031, "step": 11595 }, { "epoch": 1.8788075178224237, "grad_norm": 0.7647958993911743, "learning_rate": 1.6185222814008434e-06, "loss": 0.0886, "step": 11596 }, { "epoch": 1.8789695398574207, "grad_norm": 0.7341493368148804, "learning_rate": 1.6181130755521792e-06, "loss": 0.0968, "step": 11597 }, { "epoch": 1.8791315618924174, "grad_norm": 0.7972742319107056, "learning_rate": 1.6177038966871213e-06, "loss": 0.1008, "step": 11598 }, { "epoch": 1.879293583927414, "grad_norm": 0.8043832182884216, "learning_rate": 1.617294744818189e-06, "loss": 0.0959, "step": 11599 }, { "epoch": 1.879455605962411, "grad_norm": 0.8683483600616455, "learning_rate": 1.6168856199579025e-06, "loss": 0.1057, "step": 11600 }, { "epoch": 1.8796176279974075, "grad_norm": 0.8487406373023987, "learning_rate": 1.6164765221187778e-06, "loss": 0.0997, "step": 11601 }, { "epoch": 1.8797796500324044, "grad_norm": 0.7058298587799072, "learning_rate": 1.6160674513133332e-06, "loss": 0.0892, "step": 11602 }, { "epoch": 1.8799416720674011, "grad_norm": 1.010647177696228, "learning_rate": 1.6156584075540864e-06, "loss": 0.1148, "step": 11603 }, { "epoch": 1.8801036941023979, "grad_norm": 0.839988648891449, "learning_rate": 1.615249390853552e-06, "loss": 0.0978, "step": 11604 }, { "epoch": 1.8802657161373948, "grad_norm": 0.8383321762084961, "learning_rate": 1.6148404012242453e-06, "loss": 0.0976, "step": 11605 }, { "epoch": 1.8804277381723913, "grad_norm": 0.7959474325180054, "learning_rate": 1.61443143867868e-06, "loss": 0.0901, "step": 11606 }, { "epoch": 1.8805897602073882, "grad_norm": 0.854000985622406, "learning_rate": 1.614022503229369e-06, "loss": 0.1036, "step": 11607 }, { "epoch": 1.880751782242385, "grad_norm": 0.8434250950813293, "learning_rate": 1.613613594888826e-06, "loss": 0.0996, "step": 11608 }, { "epoch": 1.8809138042773816, "grad_norm": 0.7653728127479553, "learning_rate": 1.6132047136695625e-06, "loss": 0.101, "step": 11609 }, { "epoch": 1.8810758263123786, "grad_norm": 0.7736960649490356, "learning_rate": 1.612795859584088e-06, "loss": 0.0956, "step": 11610 }, { "epoch": 1.8812378483473753, "grad_norm": 0.7493591904640198, "learning_rate": 1.6123870326449144e-06, "loss": 0.0897, "step": 11611 }, { "epoch": 1.881399870382372, "grad_norm": 0.7444273829460144, "learning_rate": 1.611978232864548e-06, "loss": 0.0943, "step": 11612 }, { "epoch": 1.8815618924173687, "grad_norm": 0.7813089489936829, "learning_rate": 1.6115694602554994e-06, "loss": 0.0952, "step": 11613 }, { "epoch": 1.8817239144523654, "grad_norm": 0.8079869151115417, "learning_rate": 1.6111607148302758e-06, "loss": 0.1069, "step": 11614 }, { "epoch": 1.8818859364873624, "grad_norm": 0.8186015486717224, "learning_rate": 1.6107519966013828e-06, "loss": 0.1025, "step": 11615 }, { "epoch": 1.882047958522359, "grad_norm": 0.7281806468963623, "learning_rate": 1.6103433055813265e-06, "loss": 0.0893, "step": 11616 }, { "epoch": 1.8822099805573558, "grad_norm": 0.7015278935432434, "learning_rate": 1.6099346417826123e-06, "loss": 0.0844, "step": 11617 }, { "epoch": 1.8823720025923527, "grad_norm": 0.825904905796051, "learning_rate": 1.6095260052177446e-06, "loss": 0.1027, "step": 11618 }, { "epoch": 1.8825340246273492, "grad_norm": 0.8017039895057678, "learning_rate": 1.6091173958992261e-06, "loss": 0.0955, "step": 11619 }, { "epoch": 1.8826960466623461, "grad_norm": 0.7551977038383484, "learning_rate": 1.6087088138395598e-06, "loss": 0.0907, "step": 11620 }, { "epoch": 1.8828580686973428, "grad_norm": 0.9527973532676697, "learning_rate": 1.6083002590512458e-06, "loss": 0.1074, "step": 11621 }, { "epoch": 1.8830200907323396, "grad_norm": 0.8547660112380981, "learning_rate": 1.6078917315467867e-06, "loss": 0.106, "step": 11622 }, { "epoch": 1.8831821127673365, "grad_norm": 0.7676581144332886, "learning_rate": 1.607483231338682e-06, "loss": 0.0914, "step": 11623 }, { "epoch": 1.883344134802333, "grad_norm": 0.7763146162033081, "learning_rate": 1.6070747584394303e-06, "loss": 0.0927, "step": 11624 }, { "epoch": 1.88350615683733, "grad_norm": 0.8205904364585876, "learning_rate": 1.6066663128615301e-06, "loss": 0.1028, "step": 11625 }, { "epoch": 1.8836681788723266, "grad_norm": 0.7976201176643372, "learning_rate": 1.6062578946174785e-06, "loss": 0.0999, "step": 11626 }, { "epoch": 1.8838302009073233, "grad_norm": 0.8827847242355347, "learning_rate": 1.605849503719773e-06, "loss": 0.109, "step": 11627 }, { "epoch": 1.8839922229423203, "grad_norm": 0.8832690715789795, "learning_rate": 1.605441140180909e-06, "loss": 0.11, "step": 11628 }, { "epoch": 1.8841542449773168, "grad_norm": 0.7509612441062927, "learning_rate": 1.605032804013381e-06, "loss": 0.091, "step": 11629 }, { "epoch": 1.8843162670123137, "grad_norm": 0.7290849685668945, "learning_rate": 1.6046244952296839e-06, "loss": 0.0928, "step": 11630 }, { "epoch": 1.8844782890473104, "grad_norm": 0.7547253966331482, "learning_rate": 1.6042162138423095e-06, "loss": 0.0907, "step": 11631 }, { "epoch": 1.8846403110823071, "grad_norm": 0.7190500497817993, "learning_rate": 1.6038079598637523e-06, "loss": 0.0858, "step": 11632 }, { "epoch": 1.884802333117304, "grad_norm": 0.9019315242767334, "learning_rate": 1.6033997333065022e-06, "loss": 0.0975, "step": 11633 }, { "epoch": 1.8849643551523008, "grad_norm": 0.8361161947250366, "learning_rate": 1.6029915341830503e-06, "loss": 0.102, "step": 11634 }, { "epoch": 1.8851263771872975, "grad_norm": 1.2841830253601074, "learning_rate": 1.6025833625058878e-06, "loss": 0.1219, "step": 11635 }, { "epoch": 1.8852883992222942, "grad_norm": 0.7469258308410645, "learning_rate": 1.6021752182875012e-06, "loss": 0.0904, "step": 11636 }, { "epoch": 1.885450421257291, "grad_norm": 0.8182607293128967, "learning_rate": 1.601767101540381e-06, "loss": 0.1008, "step": 11637 }, { "epoch": 1.8856124432922878, "grad_norm": 0.6999529004096985, "learning_rate": 1.6013590122770143e-06, "loss": 0.0842, "step": 11638 }, { "epoch": 1.8857744653272845, "grad_norm": 0.7251670956611633, "learning_rate": 1.6009509505098863e-06, "loss": 0.0931, "step": 11639 }, { "epoch": 1.8859364873622813, "grad_norm": 0.825188934803009, "learning_rate": 1.6005429162514834e-06, "loss": 0.1004, "step": 11640 }, { "epoch": 1.8860985093972782, "grad_norm": 0.72586590051651, "learning_rate": 1.6001349095142918e-06, "loss": 0.0852, "step": 11641 }, { "epoch": 1.8862605314322747, "grad_norm": 0.8619117140769958, "learning_rate": 1.5997269303107937e-06, "loss": 0.0906, "step": 11642 }, { "epoch": 1.8864225534672716, "grad_norm": 0.8306232690811157, "learning_rate": 1.5993189786534727e-06, "loss": 0.1013, "step": 11643 }, { "epoch": 1.8865845755022683, "grad_norm": 0.7801927924156189, "learning_rate": 1.598911054554812e-06, "loss": 0.0915, "step": 11644 }, { "epoch": 1.886746597537265, "grad_norm": 0.7791352272033691, "learning_rate": 1.5985031580272914e-06, "loss": 0.0875, "step": 11645 }, { "epoch": 1.886908619572262, "grad_norm": 0.8080638647079468, "learning_rate": 1.5980952890833929e-06, "loss": 0.0956, "step": 11646 }, { "epoch": 1.8870706416072585, "grad_norm": 0.7200146913528442, "learning_rate": 1.597687447735597e-06, "loss": 0.0807, "step": 11647 }, { "epoch": 1.8872326636422554, "grad_norm": 0.8803225755691528, "learning_rate": 1.5972796339963806e-06, "loss": 0.1038, "step": 11648 }, { "epoch": 1.887394685677252, "grad_norm": 0.8330278992652893, "learning_rate": 1.5968718478782236e-06, "loss": 0.1015, "step": 11649 }, { "epoch": 1.8875567077122488, "grad_norm": 0.7998868823051453, "learning_rate": 1.5964640893936015e-06, "loss": 0.0993, "step": 11650 }, { "epoch": 1.8877187297472457, "grad_norm": 0.9711896777153015, "learning_rate": 1.596056358554992e-06, "loss": 0.1076, "step": 11651 }, { "epoch": 1.8878807517822422, "grad_norm": 0.8102350831031799, "learning_rate": 1.595648655374871e-06, "loss": 0.1031, "step": 11652 }, { "epoch": 1.8880427738172392, "grad_norm": 0.879172146320343, "learning_rate": 1.5952409798657127e-06, "loss": 0.1006, "step": 11653 }, { "epoch": 1.8882047958522359, "grad_norm": 0.8931054472923279, "learning_rate": 1.5948333320399905e-06, "loss": 0.1153, "step": 11654 }, { "epoch": 1.8883668178872326, "grad_norm": 0.7753148078918457, "learning_rate": 1.5944257119101775e-06, "loss": 0.0959, "step": 11655 }, { "epoch": 1.8885288399222295, "grad_norm": 0.8476399183273315, "learning_rate": 1.5940181194887472e-06, "loss": 0.1019, "step": 11656 }, { "epoch": 1.888690861957226, "grad_norm": 0.7567358613014221, "learning_rate": 1.5936105547881697e-06, "loss": 0.0865, "step": 11657 }, { "epoch": 1.888852883992223, "grad_norm": 0.7247362732887268, "learning_rate": 1.5932030178209163e-06, "loss": 0.0863, "step": 11658 }, { "epoch": 1.8890149060272197, "grad_norm": 0.7682359218597412, "learning_rate": 1.5927955085994544e-06, "loss": 0.0938, "step": 11659 }, { "epoch": 1.8891769280622164, "grad_norm": 0.750938355922699, "learning_rate": 1.592388027136256e-06, "loss": 0.0947, "step": 11660 }, { "epoch": 1.8893389500972133, "grad_norm": 0.8155219554901123, "learning_rate": 1.5919805734437871e-06, "loss": 0.1027, "step": 11661 }, { "epoch": 1.88950097213221, "grad_norm": 0.8531356453895569, "learning_rate": 1.591573147534516e-06, "loss": 0.1002, "step": 11662 }, { "epoch": 1.8896629941672067, "grad_norm": 0.8241977095603943, "learning_rate": 1.5911657494209077e-06, "loss": 0.0958, "step": 11663 }, { "epoch": 1.8898250162022034, "grad_norm": 0.8130655884742737, "learning_rate": 1.5907583791154275e-06, "loss": 0.1017, "step": 11664 }, { "epoch": 1.8899870382372002, "grad_norm": 0.7908192873001099, "learning_rate": 1.5903510366305416e-06, "loss": 0.1012, "step": 11665 }, { "epoch": 1.890149060272197, "grad_norm": 0.7589678168296814, "learning_rate": 1.5899437219787124e-06, "loss": 0.0919, "step": 11666 }, { "epoch": 1.8903110823071938, "grad_norm": 0.7555517554283142, "learning_rate": 1.5895364351724033e-06, "loss": 0.0946, "step": 11667 }, { "epoch": 1.8904731043421905, "grad_norm": 0.7375280857086182, "learning_rate": 1.5891291762240757e-06, "loss": 0.09, "step": 11668 }, { "epoch": 1.8906351263771874, "grad_norm": 0.8817306756973267, "learning_rate": 1.5887219451461903e-06, "loss": 0.1076, "step": 11669 }, { "epoch": 1.890797148412184, "grad_norm": 0.7232506275177002, "learning_rate": 1.5883147419512086e-06, "loss": 0.0868, "step": 11670 }, { "epoch": 1.8909591704471809, "grad_norm": 0.8023999333381653, "learning_rate": 1.5879075666515903e-06, "loss": 0.1019, "step": 11671 }, { "epoch": 1.8911211924821776, "grad_norm": 0.8410249948501587, "learning_rate": 1.5875004192597926e-06, "loss": 0.0945, "step": 11672 }, { "epoch": 1.8912832145171743, "grad_norm": 0.8737829327583313, "learning_rate": 1.5870932997882742e-06, "loss": 0.1045, "step": 11673 }, { "epoch": 1.8914452365521712, "grad_norm": 0.8467708230018616, "learning_rate": 1.5866862082494907e-06, "loss": 0.094, "step": 11674 }, { "epoch": 1.8916072585871677, "grad_norm": 0.6961696743965149, "learning_rate": 1.5862791446558999e-06, "loss": 0.0826, "step": 11675 }, { "epoch": 1.8917692806221647, "grad_norm": 0.8878881931304932, "learning_rate": 1.5858721090199564e-06, "loss": 0.097, "step": 11676 }, { "epoch": 1.8919313026571614, "grad_norm": 0.9326772093772888, "learning_rate": 1.5854651013541134e-06, "loss": 0.1038, "step": 11677 }, { "epoch": 1.892093324692158, "grad_norm": 0.769069492816925, "learning_rate": 1.5850581216708254e-06, "loss": 0.0946, "step": 11678 }, { "epoch": 1.892255346727155, "grad_norm": 0.8138481378555298, "learning_rate": 1.5846511699825445e-06, "loss": 0.099, "step": 11679 }, { "epoch": 1.8924173687621515, "grad_norm": 0.9026851058006287, "learning_rate": 1.5842442463017235e-06, "loss": 0.113, "step": 11680 }, { "epoch": 1.8925793907971484, "grad_norm": 0.796542763710022, "learning_rate": 1.583837350640812e-06, "loss": 0.0943, "step": 11681 }, { "epoch": 1.8927414128321451, "grad_norm": 0.7942173480987549, "learning_rate": 1.583430483012261e-06, "loss": 0.0911, "step": 11682 }, { "epoch": 1.8929034348671419, "grad_norm": 0.748563289642334, "learning_rate": 1.5830236434285182e-06, "loss": 0.0934, "step": 11683 }, { "epoch": 1.8930654569021388, "grad_norm": 0.8524467349052429, "learning_rate": 1.5826168319020332e-06, "loss": 0.105, "step": 11684 }, { "epoch": 1.8932274789371355, "grad_norm": 0.8268295526504517, "learning_rate": 1.5822100484452538e-06, "loss": 0.0841, "step": 11685 }, { "epoch": 1.8933895009721322, "grad_norm": 0.7947415709495544, "learning_rate": 1.5818032930706254e-06, "loss": 0.1011, "step": 11686 }, { "epoch": 1.893551523007129, "grad_norm": 0.8413145542144775, "learning_rate": 1.5813965657905942e-06, "loss": 0.1021, "step": 11687 }, { "epoch": 1.8937135450421256, "grad_norm": 0.7836431860923767, "learning_rate": 1.5809898666176044e-06, "loss": 0.0948, "step": 11688 }, { "epoch": 1.8938755670771226, "grad_norm": 0.8638646602630615, "learning_rate": 1.5805831955641022e-06, "loss": 0.1104, "step": 11689 }, { "epoch": 1.8940375891121193, "grad_norm": 0.7815718650817871, "learning_rate": 1.5801765526425283e-06, "loss": 0.0857, "step": 11690 }, { "epoch": 1.894199611147116, "grad_norm": 0.8989560008049011, "learning_rate": 1.5797699378653267e-06, "loss": 0.1111, "step": 11691 }, { "epoch": 1.894361633182113, "grad_norm": 0.8104327321052551, "learning_rate": 1.5793633512449374e-06, "loss": 0.0974, "step": 11692 }, { "epoch": 1.8945236552171094, "grad_norm": 0.7694452404975891, "learning_rate": 1.5789567927938016e-06, "loss": 0.0981, "step": 11693 }, { "epoch": 1.8946856772521063, "grad_norm": 0.8841857314109802, "learning_rate": 1.578550262524359e-06, "loss": 0.101, "step": 11694 }, { "epoch": 1.894847699287103, "grad_norm": 0.7945596575737, "learning_rate": 1.5781437604490493e-06, "loss": 0.0931, "step": 11695 }, { "epoch": 1.8950097213220998, "grad_norm": 0.8388197422027588, "learning_rate": 1.5777372865803091e-06, "loss": 0.0982, "step": 11696 }, { "epoch": 1.8951717433570967, "grad_norm": 0.8069587349891663, "learning_rate": 1.5773308409305766e-06, "loss": 0.0995, "step": 11697 }, { "epoch": 1.8953337653920932, "grad_norm": 0.7525715231895447, "learning_rate": 1.5769244235122867e-06, "loss": 0.0856, "step": 11698 }, { "epoch": 1.8954957874270901, "grad_norm": 0.9167400598526001, "learning_rate": 1.576518034337876e-06, "loss": 0.1076, "step": 11699 }, { "epoch": 1.8956578094620868, "grad_norm": 0.7757900357246399, "learning_rate": 1.576111673419779e-06, "loss": 0.0995, "step": 11700 }, { "epoch": 1.8958198314970836, "grad_norm": 0.8285884857177734, "learning_rate": 1.5757053407704287e-06, "loss": 0.106, "step": 11701 }, { "epoch": 1.8959818535320805, "grad_norm": 0.7494697570800781, "learning_rate": 1.5752990364022588e-06, "loss": 0.0959, "step": 11702 }, { "epoch": 1.896143875567077, "grad_norm": 0.7144713401794434, "learning_rate": 1.5748927603276992e-06, "loss": 0.0885, "step": 11703 }, { "epoch": 1.896305897602074, "grad_norm": 0.7766308784484863, "learning_rate": 1.5744865125591837e-06, "loss": 0.0969, "step": 11704 }, { "epoch": 1.8964679196370706, "grad_norm": 0.818439245223999, "learning_rate": 1.574080293109141e-06, "loss": 0.1071, "step": 11705 }, { "epoch": 1.8966299416720673, "grad_norm": 0.8813479542732239, "learning_rate": 1.573674101990001e-06, "loss": 0.1012, "step": 11706 }, { "epoch": 1.8967919637070643, "grad_norm": 0.8053517937660217, "learning_rate": 1.5732679392141906e-06, "loss": 0.1002, "step": 11707 }, { "epoch": 1.8969539857420608, "grad_norm": 0.8213833570480347, "learning_rate": 1.5728618047941393e-06, "loss": 0.1046, "step": 11708 }, { "epoch": 1.8971160077770577, "grad_norm": 0.9207565784454346, "learning_rate": 1.5724556987422738e-06, "loss": 0.1037, "step": 11709 }, { "epoch": 1.8972780298120544, "grad_norm": 0.7941535115242004, "learning_rate": 1.5720496210710185e-06, "loss": 0.0841, "step": 11710 }, { "epoch": 1.8974400518470511, "grad_norm": 0.9520290493965149, "learning_rate": 1.5716435717927996e-06, "loss": 0.1096, "step": 11711 }, { "epoch": 1.897602073882048, "grad_norm": 0.720905065536499, "learning_rate": 1.5712375509200397e-06, "loss": 0.0864, "step": 11712 }, { "epoch": 1.8977640959170448, "grad_norm": 0.8394162654876709, "learning_rate": 1.5708315584651646e-06, "loss": 0.1007, "step": 11713 }, { "epoch": 1.8979261179520415, "grad_norm": 0.9244471192359924, "learning_rate": 1.5704255944405947e-06, "loss": 0.1049, "step": 11714 }, { "epoch": 1.8980881399870384, "grad_norm": 0.9457299709320068, "learning_rate": 1.5700196588587527e-06, "loss": 0.1148, "step": 11715 }, { "epoch": 1.898250162022035, "grad_norm": 0.8000479340553284, "learning_rate": 1.5696137517320582e-06, "loss": 0.0928, "step": 11716 }, { "epoch": 1.8984121840570318, "grad_norm": 0.8423092365264893, "learning_rate": 1.5692078730729304e-06, "loss": 0.1003, "step": 11717 }, { "epoch": 1.8985742060920285, "grad_norm": 0.8390052914619446, "learning_rate": 1.5688020228937905e-06, "loss": 0.1021, "step": 11718 }, { "epoch": 1.8987362281270252, "grad_norm": 0.8341346979141235, "learning_rate": 1.5683962012070546e-06, "loss": 0.1038, "step": 11719 }, { "epoch": 1.8988982501620222, "grad_norm": 0.7826020121574402, "learning_rate": 1.5679904080251414e-06, "loss": 0.1, "step": 11720 }, { "epoch": 1.8990602721970187, "grad_norm": 0.9335909485816956, "learning_rate": 1.5675846433604658e-06, "loss": 0.1173, "step": 11721 }, { "epoch": 1.8992222942320156, "grad_norm": 0.8499523401260376, "learning_rate": 1.567178907225443e-06, "loss": 0.1009, "step": 11722 }, { "epoch": 1.8993843162670123, "grad_norm": 0.8055524826049805, "learning_rate": 1.5667731996324887e-06, "loss": 0.104, "step": 11723 }, { "epoch": 1.899546338302009, "grad_norm": 0.8389691710472107, "learning_rate": 1.5663675205940164e-06, "loss": 0.1059, "step": 11724 }, { "epoch": 1.899708360337006, "grad_norm": 0.7978635430335999, "learning_rate": 1.5659618701224385e-06, "loss": 0.0991, "step": 11725 }, { "epoch": 1.8998703823720025, "grad_norm": 0.8267003297805786, "learning_rate": 1.5655562482301664e-06, "loss": 0.1018, "step": 11726 }, { "epoch": 1.9000324044069994, "grad_norm": 0.8983197808265686, "learning_rate": 1.565150654929613e-06, "loss": 0.1094, "step": 11727 }, { "epoch": 1.900194426441996, "grad_norm": 0.7554136514663696, "learning_rate": 1.5647450902331866e-06, "loss": 0.0945, "step": 11728 }, { "epoch": 1.9003564484769928, "grad_norm": 0.8314508199691772, "learning_rate": 1.5643395541532972e-06, "loss": 0.1002, "step": 11729 }, { "epoch": 1.9005184705119897, "grad_norm": 0.8851383924484253, "learning_rate": 1.5639340467023534e-06, "loss": 0.1073, "step": 11730 }, { "epoch": 1.9006804925469862, "grad_norm": 0.7740817070007324, "learning_rate": 1.563528567892762e-06, "loss": 0.0942, "step": 11731 }, { "epoch": 1.9008425145819832, "grad_norm": 0.7232766151428223, "learning_rate": 1.5631231177369305e-06, "loss": 0.0925, "step": 11732 }, { "epoch": 1.9010045366169799, "grad_norm": 0.7759274840354919, "learning_rate": 1.562717696247265e-06, "loss": 0.1035, "step": 11733 }, { "epoch": 1.9011665586519766, "grad_norm": 0.7025986313819885, "learning_rate": 1.562312303436169e-06, "loss": 0.0873, "step": 11734 }, { "epoch": 1.9013285806869735, "grad_norm": 0.8542094230651855, "learning_rate": 1.561906939316048e-06, "loss": 0.106, "step": 11735 }, { "epoch": 1.9014906027219702, "grad_norm": 0.7692488431930542, "learning_rate": 1.5615016038993036e-06, "loss": 0.0989, "step": 11736 }, { "epoch": 1.901652624756967, "grad_norm": 0.7880541682243347, "learning_rate": 1.5610962971983395e-06, "loss": 0.1038, "step": 11737 }, { "epoch": 1.9018146467919637, "grad_norm": 0.8704632520675659, "learning_rate": 1.5606910192255565e-06, "loss": 0.0971, "step": 11738 }, { "epoch": 1.9019766688269604, "grad_norm": 0.8276471495628357, "learning_rate": 1.560285769993356e-06, "loss": 0.1017, "step": 11739 }, { "epoch": 1.9021386908619573, "grad_norm": 0.9419677257537842, "learning_rate": 1.5598805495141362e-06, "loss": 0.1009, "step": 11740 }, { "epoch": 1.902300712896954, "grad_norm": 0.8188934326171875, "learning_rate": 1.5594753578002957e-06, "loss": 0.1005, "step": 11741 }, { "epoch": 1.9024627349319507, "grad_norm": 0.7875339984893799, "learning_rate": 1.5590701948642348e-06, "loss": 0.0983, "step": 11742 }, { "epoch": 1.9026247569669477, "grad_norm": 0.756129264831543, "learning_rate": 1.5586650607183482e-06, "loss": 0.0916, "step": 11743 }, { "epoch": 1.9027867790019442, "grad_norm": 0.8351160287857056, "learning_rate": 1.5582599553750332e-06, "loss": 0.106, "step": 11744 }, { "epoch": 1.902948801036941, "grad_norm": 0.8231625556945801, "learning_rate": 1.5578548788466841e-06, "loss": 0.1069, "step": 11745 }, { "epoch": 1.9031108230719378, "grad_norm": 0.7880319952964783, "learning_rate": 1.5574498311456953e-06, "loss": 0.0998, "step": 11746 }, { "epoch": 1.9032728451069345, "grad_norm": 0.8101006150245667, "learning_rate": 1.5570448122844612e-06, "loss": 0.0968, "step": 11747 }, { "epoch": 1.9034348671419314, "grad_norm": 0.7617364525794983, "learning_rate": 1.5566398222753745e-06, "loss": 0.0917, "step": 11748 }, { "epoch": 1.903596889176928, "grad_norm": 0.8054034113883972, "learning_rate": 1.5562348611308259e-06, "loss": 0.0939, "step": 11749 }, { "epoch": 1.9037589112119249, "grad_norm": 0.8272371292114258, "learning_rate": 1.5558299288632061e-06, "loss": 0.0987, "step": 11750 }, { "epoch": 1.9039209332469216, "grad_norm": 0.8034398555755615, "learning_rate": 1.5554250254849065e-06, "loss": 0.0987, "step": 11751 }, { "epoch": 1.9040829552819183, "grad_norm": 0.681370198726654, "learning_rate": 1.555020151008315e-06, "loss": 0.0833, "step": 11752 }, { "epoch": 1.9042449773169152, "grad_norm": 0.9123519062995911, "learning_rate": 1.5546153054458204e-06, "loss": 0.1037, "step": 11753 }, { "epoch": 1.9044069993519117, "grad_norm": 0.8379228115081787, "learning_rate": 1.5542104888098093e-06, "loss": 0.1071, "step": 11754 }, { "epoch": 1.9045690213869086, "grad_norm": 0.6765666007995605, "learning_rate": 1.5538057011126678e-06, "loss": 0.0874, "step": 11755 }, { "epoch": 1.9047310434219054, "grad_norm": 0.7915697693824768, "learning_rate": 1.553400942366783e-06, "loss": 0.0974, "step": 11756 }, { "epoch": 1.904893065456902, "grad_norm": 0.7531107664108276, "learning_rate": 1.5529962125845387e-06, "loss": 0.0915, "step": 11757 }, { "epoch": 1.905055087491899, "grad_norm": 0.8496636152267456, "learning_rate": 1.5525915117783182e-06, "loss": 0.1105, "step": 11758 }, { "epoch": 1.9052171095268955, "grad_norm": 0.9320754408836365, "learning_rate": 1.5521868399605057e-06, "loss": 0.1043, "step": 11759 }, { "epoch": 1.9053791315618924, "grad_norm": 0.80690598487854, "learning_rate": 1.5517821971434804e-06, "loss": 0.105, "step": 11760 }, { "epoch": 1.9055411535968891, "grad_norm": 0.8309117555618286, "learning_rate": 1.5513775833396263e-06, "loss": 0.0969, "step": 11761 }, { "epoch": 1.9057031756318858, "grad_norm": 0.7836124300956726, "learning_rate": 1.5509729985613232e-06, "loss": 0.0968, "step": 11762 }, { "epoch": 1.9058651976668828, "grad_norm": 0.7297149300575256, "learning_rate": 1.5505684428209487e-06, "loss": 0.0931, "step": 11763 }, { "epoch": 1.9060272197018795, "grad_norm": 0.8328400254249573, "learning_rate": 1.5501639161308829e-06, "loss": 0.104, "step": 11764 }, { "epoch": 1.9061892417368762, "grad_norm": 0.8439556956291199, "learning_rate": 1.5497594185035015e-06, "loss": 0.1035, "step": 11765 }, { "epoch": 1.9063512637718731, "grad_norm": 0.7502602934837341, "learning_rate": 1.5493549499511834e-06, "loss": 0.0911, "step": 11766 }, { "epoch": 1.9065132858068696, "grad_norm": 0.9024834632873535, "learning_rate": 1.5489505104863034e-06, "loss": 0.1085, "step": 11767 }, { "epoch": 1.9066753078418666, "grad_norm": 0.8212124705314636, "learning_rate": 1.5485461001212365e-06, "loss": 0.1028, "step": 11768 }, { "epoch": 1.9068373298768633, "grad_norm": 0.8439403176307678, "learning_rate": 1.5481417188683557e-06, "loss": 0.1067, "step": 11769 }, { "epoch": 1.90699935191186, "grad_norm": 0.811410665512085, "learning_rate": 1.5477373667400347e-06, "loss": 0.0983, "step": 11770 }, { "epoch": 1.907161373946857, "grad_norm": 0.8574284315109253, "learning_rate": 1.5473330437486466e-06, "loss": 0.1048, "step": 11771 }, { "epoch": 1.9073233959818534, "grad_norm": 0.7073046565055847, "learning_rate": 1.5469287499065615e-06, "loss": 0.0905, "step": 11772 }, { "epoch": 1.9074854180168503, "grad_norm": 0.7401828169822693, "learning_rate": 1.5465244852261505e-06, "loss": 0.0906, "step": 11773 }, { "epoch": 1.907647440051847, "grad_norm": 0.7841954827308655, "learning_rate": 1.5461202497197821e-06, "loss": 0.0979, "step": 11774 }, { "epoch": 1.9078094620868438, "grad_norm": 0.7952117919921875, "learning_rate": 1.545716043399827e-06, "loss": 0.0935, "step": 11775 }, { "epoch": 1.9079714841218407, "grad_norm": 0.7880851626396179, "learning_rate": 1.5453118662786509e-06, "loss": 0.1009, "step": 11776 }, { "epoch": 1.9081335061568372, "grad_norm": 0.849980354309082, "learning_rate": 1.544907718368622e-06, "loss": 0.1105, "step": 11777 }, { "epoch": 1.9082955281918341, "grad_norm": 0.858780026435852, "learning_rate": 1.544503599682105e-06, "loss": 0.1007, "step": 11778 }, { "epoch": 1.9084575502268308, "grad_norm": 0.7746371030807495, "learning_rate": 1.5440995102314654e-06, "loss": 0.0995, "step": 11779 }, { "epoch": 1.9086195722618275, "grad_norm": 0.8200014233589172, "learning_rate": 1.5436954500290684e-06, "loss": 0.0951, "step": 11780 }, { "epoch": 1.9087815942968245, "grad_norm": 0.9434183835983276, "learning_rate": 1.5432914190872757e-06, "loss": 0.1081, "step": 11781 }, { "epoch": 1.908943616331821, "grad_norm": 0.7809907793998718, "learning_rate": 1.5428874174184509e-06, "loss": 0.0902, "step": 11782 }, { "epoch": 1.909105638366818, "grad_norm": 0.8580632209777832, "learning_rate": 1.5424834450349552e-06, "loss": 0.103, "step": 11783 }, { "epoch": 1.9092676604018146, "grad_norm": 0.8701772689819336, "learning_rate": 1.5420795019491475e-06, "loss": 0.1071, "step": 11784 }, { "epoch": 1.9094296824368113, "grad_norm": 0.784704327583313, "learning_rate": 1.54167558817339e-06, "loss": 0.0969, "step": 11785 }, { "epoch": 1.9095917044718083, "grad_norm": 0.7510059475898743, "learning_rate": 1.5412717037200406e-06, "loss": 0.0939, "step": 11786 }, { "epoch": 1.909753726506805, "grad_norm": 0.7119048237800598, "learning_rate": 1.5408678486014567e-06, "loss": 0.0859, "step": 11787 }, { "epoch": 1.9099157485418017, "grad_norm": 0.7860952615737915, "learning_rate": 1.540464022829996e-06, "loss": 0.095, "step": 11788 }, { "epoch": 1.9100777705767984, "grad_norm": 0.9239879846572876, "learning_rate": 1.540060226418013e-06, "loss": 0.1001, "step": 11789 }, { "epoch": 1.910239792611795, "grad_norm": 0.7606763243675232, "learning_rate": 1.5396564593778646e-06, "loss": 0.0849, "step": 11790 }, { "epoch": 1.910401814646792, "grad_norm": 0.830071747303009, "learning_rate": 1.5392527217219047e-06, "loss": 0.0984, "step": 11791 }, { "epoch": 1.9105638366817888, "grad_norm": 0.8571628332138062, "learning_rate": 1.538849013462487e-06, "loss": 0.103, "step": 11792 }, { "epoch": 1.9107258587167855, "grad_norm": 0.8239210844039917, "learning_rate": 1.5384453346119628e-06, "loss": 0.0916, "step": 11793 }, { "epoch": 1.9108878807517824, "grad_norm": 0.7969984412193298, "learning_rate": 1.5380416851826845e-06, "loss": 0.0952, "step": 11794 }, { "epoch": 1.9110499027867789, "grad_norm": 0.8779683113098145, "learning_rate": 1.5376380651870033e-06, "loss": 0.1102, "step": 11795 }, { "epoch": 1.9112119248217758, "grad_norm": 0.8378877639770508, "learning_rate": 1.537234474637268e-06, "loss": 0.1004, "step": 11796 }, { "epoch": 1.9113739468567725, "grad_norm": 0.8366298675537109, "learning_rate": 1.5368309135458287e-06, "loss": 0.1061, "step": 11797 }, { "epoch": 1.9115359688917692, "grad_norm": 0.8169813752174377, "learning_rate": 1.5364273819250308e-06, "loss": 0.0967, "step": 11798 }, { "epoch": 1.9116979909267662, "grad_norm": 0.880442202091217, "learning_rate": 1.536023879787225e-06, "loss": 0.1037, "step": 11799 }, { "epoch": 1.9118600129617627, "grad_norm": 0.8345661759376526, "learning_rate": 1.535620407144755e-06, "loss": 0.0997, "step": 11800 }, { "epoch": 1.9120220349967596, "grad_norm": 0.9036890268325806, "learning_rate": 1.5352169640099673e-06, "loss": 0.1058, "step": 11801 }, { "epoch": 1.9121840570317563, "grad_norm": 0.7998917102813721, "learning_rate": 1.534813550395205e-06, "loss": 0.1014, "step": 11802 }, { "epoch": 1.912346079066753, "grad_norm": 0.8374770879745483, "learning_rate": 1.5344101663128121e-06, "loss": 0.1047, "step": 11803 }, { "epoch": 1.91250810110175, "grad_norm": 0.8265429735183716, "learning_rate": 1.5340068117751329e-06, "loss": 0.098, "step": 11804 }, { "epoch": 1.9126701231367464, "grad_norm": 0.7321347594261169, "learning_rate": 1.5336034867945065e-06, "loss": 0.0839, "step": 11805 }, { "epoch": 1.9128321451717434, "grad_norm": 0.7928110361099243, "learning_rate": 1.5332001913832754e-06, "loss": 0.0964, "step": 11806 }, { "epoch": 1.91299416720674, "grad_norm": 0.7539445161819458, "learning_rate": 1.5327969255537784e-06, "loss": 0.0951, "step": 11807 }, { "epoch": 1.9131561892417368, "grad_norm": 0.8430777192115784, "learning_rate": 1.5323936893183542e-06, "loss": 0.1087, "step": 11808 }, { "epoch": 1.9133182112767337, "grad_norm": 0.8476277589797974, "learning_rate": 1.5319904826893422e-06, "loss": 0.1013, "step": 11809 }, { "epoch": 1.9134802333117304, "grad_norm": 0.8722755908966064, "learning_rate": 1.5315873056790791e-06, "loss": 0.0985, "step": 11810 }, { "epoch": 1.9136422553467272, "grad_norm": 0.708776593208313, "learning_rate": 1.5311841582999009e-06, "loss": 0.0864, "step": 11811 }, { "epoch": 1.9138042773817239, "grad_norm": 0.8320741057395935, "learning_rate": 1.5307810405641433e-06, "loss": 0.0971, "step": 11812 }, { "epoch": 1.9139662994167206, "grad_norm": 0.79482501745224, "learning_rate": 1.5303779524841395e-06, "loss": 0.1019, "step": 11813 }, { "epoch": 1.9141283214517175, "grad_norm": 0.8025671243667603, "learning_rate": 1.5299748940722241e-06, "loss": 0.0934, "step": 11814 }, { "epoch": 1.9142903434867142, "grad_norm": 0.9434292316436768, "learning_rate": 1.5295718653407305e-06, "loss": 0.1017, "step": 11815 }, { "epoch": 1.914452365521711, "grad_norm": 0.734461784362793, "learning_rate": 1.5291688663019885e-06, "loss": 0.0876, "step": 11816 }, { "epoch": 1.9146143875567079, "grad_norm": 0.858860194683075, "learning_rate": 1.5287658969683294e-06, "loss": 0.105, "step": 11817 }, { "epoch": 1.9147764095917044, "grad_norm": 0.7746593356132507, "learning_rate": 1.5283629573520841e-06, "loss": 0.0921, "step": 11818 }, { "epoch": 1.9149384316267013, "grad_norm": 0.7499451637268066, "learning_rate": 1.5279600474655814e-06, "loss": 0.0905, "step": 11819 }, { "epoch": 1.915100453661698, "grad_norm": 0.7706909775733948, "learning_rate": 1.5275571673211487e-06, "loss": 0.0979, "step": 11820 }, { "epoch": 1.9152624756966947, "grad_norm": 0.9270902872085571, "learning_rate": 1.5271543169311137e-06, "loss": 0.1123, "step": 11821 }, { "epoch": 1.9154244977316917, "grad_norm": 0.9533674120903015, "learning_rate": 1.5267514963078014e-06, "loss": 0.1095, "step": 11822 }, { "epoch": 1.9155865197666881, "grad_norm": 0.6820551156997681, "learning_rate": 1.5263487054635386e-06, "loss": 0.0837, "step": 11823 }, { "epoch": 1.915748541801685, "grad_norm": 0.7506440281867981, "learning_rate": 1.5259459444106497e-06, "loss": 0.0904, "step": 11824 }, { "epoch": 1.9159105638366818, "grad_norm": 0.7518582344055176, "learning_rate": 1.5255432131614572e-06, "loss": 0.0924, "step": 11825 }, { "epoch": 1.9160725858716785, "grad_norm": 0.7959820032119751, "learning_rate": 1.5251405117282843e-06, "loss": 0.0992, "step": 11826 }, { "epoch": 1.9162346079066754, "grad_norm": 0.8564347624778748, "learning_rate": 1.5247378401234521e-06, "loss": 0.1062, "step": 11827 }, { "epoch": 1.916396629941672, "grad_norm": 0.8178135752677917, "learning_rate": 1.524335198359283e-06, "loss": 0.0948, "step": 11828 }, { "epoch": 1.9165586519766689, "grad_norm": 0.8334271907806396, "learning_rate": 1.5239325864480952e-06, "loss": 0.1052, "step": 11829 }, { "epoch": 1.9167206740116656, "grad_norm": 0.7920629978179932, "learning_rate": 1.5235300044022088e-06, "loss": 0.0976, "step": 11830 }, { "epoch": 1.9168826960466623, "grad_norm": 0.7598364353179932, "learning_rate": 1.5231274522339407e-06, "loss": 0.091, "step": 11831 }, { "epoch": 1.9170447180816592, "grad_norm": 0.8865482211112976, "learning_rate": 1.522724929955608e-06, "loss": 0.1087, "step": 11832 }, { "epoch": 1.9172067401166557, "grad_norm": 0.7840036153793335, "learning_rate": 1.5223224375795287e-06, "loss": 0.0972, "step": 11833 }, { "epoch": 1.9173687621516526, "grad_norm": 0.7893587350845337, "learning_rate": 1.5219199751180162e-06, "loss": 0.0954, "step": 11834 }, { "epoch": 1.9175307841866494, "grad_norm": 0.7772044539451599, "learning_rate": 1.5215175425833855e-06, "loss": 0.0893, "step": 11835 }, { "epoch": 1.917692806221646, "grad_norm": 0.8612580299377441, "learning_rate": 1.5211151399879505e-06, "loss": 0.0945, "step": 11836 }, { "epoch": 1.917854828256643, "grad_norm": 0.8095962405204773, "learning_rate": 1.5207127673440226e-06, "loss": 0.1019, "step": 11837 }, { "epoch": 1.9180168502916397, "grad_norm": 0.794070839881897, "learning_rate": 1.5203104246639144e-06, "loss": 0.0995, "step": 11838 }, { "epoch": 1.9181788723266364, "grad_norm": 0.8148672580718994, "learning_rate": 1.5199081119599373e-06, "loss": 0.1067, "step": 11839 }, { "epoch": 1.9183408943616331, "grad_norm": 0.8468709588050842, "learning_rate": 1.5195058292443996e-06, "loss": 0.1072, "step": 11840 }, { "epoch": 1.9185029163966298, "grad_norm": 0.8336006999015808, "learning_rate": 1.5191035765296104e-06, "loss": 0.1073, "step": 11841 }, { "epoch": 1.9186649384316268, "grad_norm": 0.7484931945800781, "learning_rate": 1.518701353827878e-06, "loss": 0.0902, "step": 11842 }, { "epoch": 1.9188269604666235, "grad_norm": 0.8069817423820496, "learning_rate": 1.5182991611515102e-06, "loss": 0.0964, "step": 11843 }, { "epoch": 1.9189889825016202, "grad_norm": 0.7606700658798218, "learning_rate": 1.5178969985128122e-06, "loss": 0.0955, "step": 11844 }, { "epoch": 1.9191510045366171, "grad_norm": 0.936642050743103, "learning_rate": 1.5174948659240896e-06, "loss": 0.1097, "step": 11845 }, { "epoch": 1.9193130265716136, "grad_norm": 0.827983021736145, "learning_rate": 1.5170927633976457e-06, "loss": 0.1045, "step": 11846 }, { "epoch": 1.9194750486066106, "grad_norm": 0.8345808386802673, "learning_rate": 1.516690690945785e-06, "loss": 0.0996, "step": 11847 }, { "epoch": 1.9196370706416073, "grad_norm": 0.8033564686775208, "learning_rate": 1.5162886485808102e-06, "loss": 0.1027, "step": 11848 }, { "epoch": 1.919799092676604, "grad_norm": 0.8006458878517151, "learning_rate": 1.5158866363150215e-06, "loss": 0.1001, "step": 11849 }, { "epoch": 1.919961114711601, "grad_norm": 0.8043130040168762, "learning_rate": 1.515484654160721e-06, "loss": 0.1028, "step": 11850 }, { "epoch": 1.9201231367465974, "grad_norm": 0.8533475995063782, "learning_rate": 1.515082702130206e-06, "loss": 0.1065, "step": 11851 }, { "epoch": 1.9202851587815943, "grad_norm": 0.9136732816696167, "learning_rate": 1.5146807802357782e-06, "loss": 0.1156, "step": 11852 }, { "epoch": 1.920447180816591, "grad_norm": 0.8210397958755493, "learning_rate": 1.5142788884897336e-06, "loss": 0.1002, "step": 11853 }, { "epoch": 1.9206092028515878, "grad_norm": 0.906101405620575, "learning_rate": 1.5138770269043704e-06, "loss": 0.1062, "step": 11854 }, { "epoch": 1.9207712248865847, "grad_norm": 0.8309211730957031, "learning_rate": 1.5134751954919833e-06, "loss": 0.0985, "step": 11855 }, { "epoch": 1.9209332469215812, "grad_norm": 0.9222484230995178, "learning_rate": 1.513073394264867e-06, "loss": 0.1017, "step": 11856 }, { "epoch": 1.9210952689565781, "grad_norm": 0.7615657448768616, "learning_rate": 1.5126716232353178e-06, "loss": 0.0931, "step": 11857 }, { "epoch": 1.9212572909915748, "grad_norm": 0.823805570602417, "learning_rate": 1.5122698824156271e-06, "loss": 0.0979, "step": 11858 }, { "epoch": 1.9214193130265715, "grad_norm": 0.801001787185669, "learning_rate": 1.5118681718180883e-06, "loss": 0.1006, "step": 11859 }, { "epoch": 1.9215813350615685, "grad_norm": 0.7479082345962524, "learning_rate": 1.5114664914549903e-06, "loss": 0.0909, "step": 11860 }, { "epoch": 1.9217433570965652, "grad_norm": 0.662217915058136, "learning_rate": 1.5110648413386275e-06, "loss": 0.0826, "step": 11861 }, { "epoch": 1.921905379131562, "grad_norm": 0.837660014629364, "learning_rate": 1.5106632214812865e-06, "loss": 0.1019, "step": 11862 }, { "epoch": 1.9220674011665586, "grad_norm": 0.8495209813117981, "learning_rate": 1.5102616318952575e-06, "loss": 0.105, "step": 11863 }, { "epoch": 1.9222294232015553, "grad_norm": 0.723486065864563, "learning_rate": 1.5098600725928269e-06, "loss": 0.0883, "step": 11864 }, { "epoch": 1.9223914452365523, "grad_norm": 0.7528011798858643, "learning_rate": 1.5094585435862817e-06, "loss": 0.0919, "step": 11865 }, { "epoch": 1.922553467271549, "grad_norm": 0.9446701407432556, "learning_rate": 1.5090570448879088e-06, "loss": 0.115, "step": 11866 }, { "epoch": 1.9227154893065457, "grad_norm": 0.8201817274093628, "learning_rate": 1.5086555765099916e-06, "loss": 0.1131, "step": 11867 }, { "epoch": 1.9228775113415426, "grad_norm": 0.6670837998390198, "learning_rate": 1.5082541384648154e-06, "loss": 0.0816, "step": 11868 }, { "epoch": 1.923039533376539, "grad_norm": 0.6985659003257751, "learning_rate": 1.5078527307646622e-06, "loss": 0.0847, "step": 11869 }, { "epoch": 1.923201555411536, "grad_norm": 0.8145870566368103, "learning_rate": 1.5074513534218137e-06, "loss": 0.1065, "step": 11870 }, { "epoch": 1.9233635774465327, "grad_norm": 0.7868598699569702, "learning_rate": 1.5070500064485527e-06, "loss": 0.0941, "step": 11871 }, { "epoch": 1.9235255994815295, "grad_norm": 0.8277310729026794, "learning_rate": 1.5066486898571588e-06, "loss": 0.0937, "step": 11872 }, { "epoch": 1.9236876215165264, "grad_norm": 0.752480685710907, "learning_rate": 1.5062474036599109e-06, "loss": 0.1008, "step": 11873 }, { "epoch": 1.9238496435515229, "grad_norm": 0.7744075655937195, "learning_rate": 1.5058461478690878e-06, "loss": 0.0879, "step": 11874 }, { "epoch": 1.9240116655865198, "grad_norm": 0.8470804691314697, "learning_rate": 1.505444922496966e-06, "loss": 0.0971, "step": 11875 }, { "epoch": 1.9241736876215165, "grad_norm": 0.6907162666320801, "learning_rate": 1.5050437275558233e-06, "loss": 0.0813, "step": 11876 }, { "epoch": 1.9243357096565132, "grad_norm": 0.8127276301383972, "learning_rate": 1.5046425630579348e-06, "loss": 0.1044, "step": 11877 }, { "epoch": 1.9244977316915102, "grad_norm": 0.8600353598594666, "learning_rate": 1.5042414290155754e-06, "loss": 0.1021, "step": 11878 }, { "epoch": 1.9246597537265067, "grad_norm": 0.7428255081176758, "learning_rate": 1.5038403254410183e-06, "loss": 0.0966, "step": 11879 }, { "epoch": 1.9248217757615036, "grad_norm": 0.8952332139015198, "learning_rate": 1.5034392523465364e-06, "loss": 0.097, "step": 11880 }, { "epoch": 1.9249837977965003, "grad_norm": 0.7860379815101624, "learning_rate": 1.5030382097444025e-06, "loss": 0.0909, "step": 11881 }, { "epoch": 1.925145819831497, "grad_norm": 0.8410727977752686, "learning_rate": 1.502637197646886e-06, "loss": 0.0759, "step": 11882 }, { "epoch": 1.925307841866494, "grad_norm": 0.8401789665222168, "learning_rate": 1.5022362160662584e-06, "loss": 0.1057, "step": 11883 }, { "epoch": 1.9254698639014904, "grad_norm": 0.7923102378845215, "learning_rate": 1.5018352650147872e-06, "loss": 0.0931, "step": 11884 }, { "epoch": 1.9256318859364874, "grad_norm": 0.8386868834495544, "learning_rate": 1.5014343445047414e-06, "loss": 0.1025, "step": 11885 }, { "epoch": 1.925793907971484, "grad_norm": 0.723055362701416, "learning_rate": 1.5010334545483885e-06, "loss": 0.0889, "step": 11886 }, { "epoch": 1.9259559300064808, "grad_norm": 0.8790740966796875, "learning_rate": 1.5006325951579948e-06, "loss": 0.1046, "step": 11887 }, { "epoch": 1.9261179520414777, "grad_norm": 0.8958154320716858, "learning_rate": 1.500231766345825e-06, "loss": 0.1142, "step": 11888 }, { "epoch": 1.9262799740764744, "grad_norm": 0.7713485360145569, "learning_rate": 1.499830968124143e-06, "loss": 0.1023, "step": 11889 }, { "epoch": 1.9264419961114712, "grad_norm": 0.8863093852996826, "learning_rate": 1.4994302005052141e-06, "loss": 0.1149, "step": 11890 }, { "epoch": 1.9266040181464679, "grad_norm": 0.7300902009010315, "learning_rate": 1.4990294635012995e-06, "loss": 0.0852, "step": 11891 }, { "epoch": 1.9267660401814646, "grad_norm": 0.7891831398010254, "learning_rate": 1.4986287571246614e-06, "loss": 0.1016, "step": 11892 }, { "epoch": 1.9269280622164615, "grad_norm": 0.8113868236541748, "learning_rate": 1.4982280813875593e-06, "loss": 0.1071, "step": 11893 }, { "epoch": 1.9270900842514582, "grad_norm": 0.8107513189315796, "learning_rate": 1.4978274363022532e-06, "loss": 0.0996, "step": 11894 }, { "epoch": 1.927252106286455, "grad_norm": 0.7865254878997803, "learning_rate": 1.497426821881003e-06, "loss": 0.0956, "step": 11895 }, { "epoch": 1.9274141283214519, "grad_norm": 0.8345635533332825, "learning_rate": 1.4970262381360664e-06, "loss": 0.1072, "step": 11896 }, { "epoch": 1.9275761503564484, "grad_norm": 0.7102372050285339, "learning_rate": 1.4966256850796993e-06, "loss": 0.0904, "step": 11897 }, { "epoch": 1.9277381723914453, "grad_norm": 0.8325737118721008, "learning_rate": 1.4962251627241583e-06, "loss": 0.0958, "step": 11898 }, { "epoch": 1.927900194426442, "grad_norm": 0.8160897493362427, "learning_rate": 1.4958246710816976e-06, "loss": 0.101, "step": 11899 }, { "epoch": 1.9280622164614387, "grad_norm": 0.794330358505249, "learning_rate": 1.4954242101645722e-06, "loss": 0.0976, "step": 11900 }, { "epoch": 1.9282242384964356, "grad_norm": 0.8225795030593872, "learning_rate": 1.4950237799850354e-06, "loss": 0.104, "step": 11901 }, { "epoch": 1.9283862605314321, "grad_norm": 0.7986380457878113, "learning_rate": 1.4946233805553387e-06, "loss": 0.0807, "step": 11902 }, { "epoch": 1.928548282566429, "grad_norm": 0.7666733860969543, "learning_rate": 1.4942230118877337e-06, "loss": 0.0926, "step": 11903 }, { "epoch": 1.9287103046014258, "grad_norm": 0.8928430676460266, "learning_rate": 1.4938226739944694e-06, "loss": 0.1056, "step": 11904 }, { "epoch": 1.9288723266364225, "grad_norm": 0.7845842838287354, "learning_rate": 1.4934223668877979e-06, "loss": 0.0987, "step": 11905 }, { "epoch": 1.9290343486714194, "grad_norm": 0.8057361245155334, "learning_rate": 1.4930220905799652e-06, "loss": 0.0966, "step": 11906 }, { "epoch": 1.929196370706416, "grad_norm": 0.7778323888778687, "learning_rate": 1.4926218450832208e-06, "loss": 0.0946, "step": 11907 }, { "epoch": 1.9293583927414129, "grad_norm": 0.884856641292572, "learning_rate": 1.4922216304098085e-06, "loss": 0.1084, "step": 11908 }, { "epoch": 1.9295204147764096, "grad_norm": 0.9439958333969116, "learning_rate": 1.4918214465719765e-06, "loss": 0.1146, "step": 11909 }, { "epoch": 1.9296824368114063, "grad_norm": 0.7851779460906982, "learning_rate": 1.4914212935819689e-06, "loss": 0.0973, "step": 11910 }, { "epoch": 1.9298444588464032, "grad_norm": 0.8911312818527222, "learning_rate": 1.4910211714520285e-06, "loss": 0.1109, "step": 11911 }, { "epoch": 1.9300064808814, "grad_norm": 0.8925055265426636, "learning_rate": 1.4906210801943985e-06, "loss": 0.1051, "step": 11912 }, { "epoch": 1.9301685029163966, "grad_norm": 0.8144211769104004, "learning_rate": 1.4902210198213203e-06, "loss": 0.1023, "step": 11913 }, { "epoch": 1.9303305249513933, "grad_norm": 0.8194781541824341, "learning_rate": 1.4898209903450361e-06, "loss": 0.0997, "step": 11914 }, { "epoch": 1.93049254698639, "grad_norm": 0.8512380123138428, "learning_rate": 1.489420991777785e-06, "loss": 0.1035, "step": 11915 }, { "epoch": 1.930654569021387, "grad_norm": 0.8246690630912781, "learning_rate": 1.489021024131806e-06, "loss": 0.107, "step": 11916 }, { "epoch": 1.9308165910563837, "grad_norm": 0.8465759754180908, "learning_rate": 1.4886210874193368e-06, "loss": 0.0924, "step": 11917 }, { "epoch": 1.9309786130913804, "grad_norm": 0.757154643535614, "learning_rate": 1.4882211816526144e-06, "loss": 0.0958, "step": 11918 }, { "epoch": 1.9311406351263773, "grad_norm": 0.8065274357795715, "learning_rate": 1.4878213068438762e-06, "loss": 0.0939, "step": 11919 }, { "epoch": 1.9313026571613738, "grad_norm": 0.8495281934738159, "learning_rate": 1.4874214630053562e-06, "loss": 0.0914, "step": 11920 }, { "epoch": 1.9314646791963708, "grad_norm": 0.8558754920959473, "learning_rate": 1.4870216501492892e-06, "loss": 0.1046, "step": 11921 }, { "epoch": 1.9316267012313675, "grad_norm": 0.7717517018318176, "learning_rate": 1.4866218682879088e-06, "loss": 0.0917, "step": 11922 }, { "epoch": 1.9317887232663642, "grad_norm": 0.7911754250526428, "learning_rate": 1.4862221174334457e-06, "loss": 0.0999, "step": 11923 }, { "epoch": 1.9319507453013611, "grad_norm": 0.7910676598548889, "learning_rate": 1.4858223975981334e-06, "loss": 0.1002, "step": 11924 }, { "epoch": 1.9321127673363576, "grad_norm": 0.816967785358429, "learning_rate": 1.4854227087942016e-06, "loss": 0.0959, "step": 11925 }, { "epoch": 1.9322747893713546, "grad_norm": 0.828427255153656, "learning_rate": 1.4850230510338792e-06, "loss": 0.1038, "step": 11926 }, { "epoch": 1.9324368114063513, "grad_norm": 0.7696671485900879, "learning_rate": 1.484623424329395e-06, "loss": 0.0931, "step": 11927 }, { "epoch": 1.932598833441348, "grad_norm": 0.8365792036056519, "learning_rate": 1.4842238286929777e-06, "loss": 0.1076, "step": 11928 }, { "epoch": 1.932760855476345, "grad_norm": 0.7698932886123657, "learning_rate": 1.4838242641368526e-06, "loss": 0.089, "step": 11929 }, { "epoch": 1.9329228775113414, "grad_norm": 0.8664256930351257, "learning_rate": 1.4834247306732457e-06, "loss": 0.1108, "step": 11930 }, { "epoch": 1.9330848995463383, "grad_norm": 0.9179291129112244, "learning_rate": 1.4830252283143825e-06, "loss": 0.107, "step": 11931 }, { "epoch": 1.933246921581335, "grad_norm": 0.798081636428833, "learning_rate": 1.4826257570724856e-06, "loss": 0.0952, "step": 11932 }, { "epoch": 1.9334089436163318, "grad_norm": 0.7657841444015503, "learning_rate": 1.4822263169597789e-06, "loss": 0.0892, "step": 11933 }, { "epoch": 1.9335709656513287, "grad_norm": 0.7410942316055298, "learning_rate": 1.4818269079884845e-06, "loss": 0.091, "step": 11934 }, { "epoch": 1.9337329876863252, "grad_norm": 0.8873445987701416, "learning_rate": 1.4814275301708222e-06, "loss": 0.1068, "step": 11935 }, { "epoch": 1.9338950097213221, "grad_norm": 0.835503101348877, "learning_rate": 1.4810281835190132e-06, "loss": 0.0989, "step": 11936 }, { "epoch": 1.9340570317563188, "grad_norm": 0.831396758556366, "learning_rate": 1.4806288680452747e-06, "loss": 0.1001, "step": 11937 }, { "epoch": 1.9342190537913155, "grad_norm": 0.7926315665245056, "learning_rate": 1.4802295837618268e-06, "loss": 0.0978, "step": 11938 }, { "epoch": 1.9343810758263125, "grad_norm": 0.9609958529472351, "learning_rate": 1.4798303306808857e-06, "loss": 0.1113, "step": 11939 }, { "epoch": 1.9345430978613092, "grad_norm": 0.8151983618736267, "learning_rate": 1.479431108814668e-06, "loss": 0.0924, "step": 11940 }, { "epoch": 1.934705119896306, "grad_norm": 0.763329803943634, "learning_rate": 1.4790319181753884e-06, "loss": 0.0871, "step": 11941 }, { "epoch": 1.9348671419313026, "grad_norm": 0.827495813369751, "learning_rate": 1.4786327587752608e-06, "loss": 0.0946, "step": 11942 }, { "epoch": 1.9350291639662993, "grad_norm": 0.7745943665504456, "learning_rate": 1.4782336306265002e-06, "loss": 0.0914, "step": 11943 }, { "epoch": 1.9351911860012962, "grad_norm": 0.8348422646522522, "learning_rate": 1.4778345337413174e-06, "loss": 0.1025, "step": 11944 }, { "epoch": 1.935353208036293, "grad_norm": 0.9461818933486938, "learning_rate": 1.4774354681319247e-06, "loss": 0.1125, "step": 11945 }, { "epoch": 1.9355152300712897, "grad_norm": 8.034222602844238, "learning_rate": 1.4770364338105315e-06, "loss": 0.0958, "step": 11946 }, { "epoch": 1.9356772521062866, "grad_norm": 0.747870147228241, "learning_rate": 1.4766374307893477e-06, "loss": 0.0903, "step": 11947 }, { "epoch": 1.935839274141283, "grad_norm": 0.8536267876625061, "learning_rate": 1.4762384590805823e-06, "loss": 0.1048, "step": 11948 }, { "epoch": 1.93600129617628, "grad_norm": 0.8837407827377319, "learning_rate": 1.475839518696443e-06, "loss": 0.1007, "step": 11949 }, { "epoch": 1.9361633182112767, "grad_norm": 0.8272178173065186, "learning_rate": 1.475440609649136e-06, "loss": 0.1017, "step": 11950 }, { "epoch": 1.9363253402462735, "grad_norm": 0.8612387180328369, "learning_rate": 1.4750417319508658e-06, "loss": 0.1037, "step": 11951 }, { "epoch": 1.9364873622812704, "grad_norm": 0.8727052211761475, "learning_rate": 1.4746428856138395e-06, "loss": 0.1062, "step": 11952 }, { "epoch": 1.9366493843162669, "grad_norm": 0.7352006435394287, "learning_rate": 1.4742440706502591e-06, "loss": 0.0934, "step": 11953 }, { "epoch": 1.9368114063512638, "grad_norm": 0.8387872576713562, "learning_rate": 1.4738452870723286e-06, "loss": 0.1012, "step": 11954 }, { "epoch": 1.9369734283862605, "grad_norm": 0.8393522500991821, "learning_rate": 1.4734465348922484e-06, "loss": 0.0972, "step": 11955 }, { "epoch": 1.9371354504212572, "grad_norm": 0.8170238733291626, "learning_rate": 1.4730478141222194e-06, "loss": 0.0957, "step": 11956 }, { "epoch": 1.9372974724562542, "grad_norm": 0.908352792263031, "learning_rate": 1.4726491247744429e-06, "loss": 0.1151, "step": 11957 }, { "epoch": 1.9374594944912507, "grad_norm": 0.8904739022254944, "learning_rate": 1.4722504668611172e-06, "loss": 0.1008, "step": 11958 }, { "epoch": 1.9376215165262476, "grad_norm": 0.7439394593238831, "learning_rate": 1.4718518403944398e-06, "loss": 0.0903, "step": 11959 }, { "epoch": 1.9377835385612443, "grad_norm": 0.8450915217399597, "learning_rate": 1.4714532453866084e-06, "loss": 0.0941, "step": 11960 }, { "epoch": 1.937945560596241, "grad_norm": 0.8702685236930847, "learning_rate": 1.4710546818498178e-06, "loss": 0.095, "step": 11961 }, { "epoch": 1.938107582631238, "grad_norm": 0.807630717754364, "learning_rate": 1.4706561497962644e-06, "loss": 0.1006, "step": 11962 }, { "epoch": 1.9382696046662347, "grad_norm": 0.8445601463317871, "learning_rate": 1.470257649238142e-06, "loss": 0.0985, "step": 11963 }, { "epoch": 1.9384316267012314, "grad_norm": 0.7522417902946472, "learning_rate": 1.4698591801876435e-06, "loss": 0.0966, "step": 11964 }, { "epoch": 1.938593648736228, "grad_norm": 0.9122772812843323, "learning_rate": 1.4694607426569613e-06, "loss": 0.106, "step": 11965 }, { "epoch": 1.9387556707712248, "grad_norm": 0.8337773084640503, "learning_rate": 1.4690623366582856e-06, "loss": 0.1084, "step": 11966 }, { "epoch": 1.9389176928062217, "grad_norm": 0.8264670968055725, "learning_rate": 1.468663962203809e-06, "loss": 0.108, "step": 11967 }, { "epoch": 1.9390797148412184, "grad_norm": 0.7818570733070374, "learning_rate": 1.4682656193057189e-06, "loss": 0.0925, "step": 11968 }, { "epoch": 1.9392417368762151, "grad_norm": 0.7806037664413452, "learning_rate": 1.467867307976204e-06, "loss": 0.0911, "step": 11969 }, { "epoch": 1.939403758911212, "grad_norm": 0.8423177599906921, "learning_rate": 1.4674690282274517e-06, "loss": 0.0982, "step": 11970 }, { "epoch": 1.9395657809462086, "grad_norm": 0.963801920413971, "learning_rate": 1.4670707800716478e-06, "loss": 0.1058, "step": 11971 }, { "epoch": 1.9397278029812055, "grad_norm": 0.8713017106056213, "learning_rate": 1.4666725635209794e-06, "loss": 0.1015, "step": 11972 }, { "epoch": 1.9398898250162022, "grad_norm": 0.7701468467712402, "learning_rate": 1.4662743785876298e-06, "loss": 0.0996, "step": 11973 }, { "epoch": 1.940051847051199, "grad_norm": 0.7816067934036255, "learning_rate": 1.4658762252837821e-06, "loss": 0.0938, "step": 11974 }, { "epoch": 1.9402138690861959, "grad_norm": 1.0052039623260498, "learning_rate": 1.4654781036216193e-06, "loss": 0.1145, "step": 11975 }, { "epoch": 1.9403758911211924, "grad_norm": 0.8187944889068604, "learning_rate": 1.4650800136133238e-06, "loss": 0.094, "step": 11976 }, { "epoch": 1.9405379131561893, "grad_norm": 0.7152525186538696, "learning_rate": 1.4646819552710751e-06, "loss": 0.0915, "step": 11977 }, { "epoch": 1.940699935191186, "grad_norm": 0.8357136845588684, "learning_rate": 1.4642839286070537e-06, "loss": 0.0951, "step": 11978 }, { "epoch": 1.9408619572261827, "grad_norm": 0.8096917271614075, "learning_rate": 1.463885933633437e-06, "loss": 0.1029, "step": 11979 }, { "epoch": 1.9410239792611796, "grad_norm": 0.8772410154342651, "learning_rate": 1.4634879703624027e-06, "loss": 0.108, "step": 11980 }, { "epoch": 1.9411860012961761, "grad_norm": 0.7658387422561646, "learning_rate": 1.4630900388061292e-06, "loss": 0.0968, "step": 11981 }, { "epoch": 1.941348023331173, "grad_norm": 0.7496578693389893, "learning_rate": 1.4626921389767915e-06, "loss": 0.0883, "step": 11982 }, { "epoch": 1.9415100453661698, "grad_norm": 0.852181613445282, "learning_rate": 1.4622942708865635e-06, "loss": 0.0962, "step": 11983 }, { "epoch": 1.9416720674011665, "grad_norm": 0.8182169198989868, "learning_rate": 1.4618964345476203e-06, "loss": 0.1018, "step": 11984 }, { "epoch": 1.9418340894361634, "grad_norm": 0.781848669052124, "learning_rate": 1.4614986299721328e-06, "loss": 0.0939, "step": 11985 }, { "epoch": 1.94199611147116, "grad_norm": 0.7615278363227844, "learning_rate": 1.4611008571722748e-06, "loss": 0.0952, "step": 11986 }, { "epoch": 1.9421581335061568, "grad_norm": 0.8403747081756592, "learning_rate": 1.460703116160217e-06, "loss": 0.1036, "step": 11987 }, { "epoch": 1.9423201555411536, "grad_norm": 0.7197666168212891, "learning_rate": 1.4603054069481282e-06, "loss": 0.0873, "step": 11988 }, { "epoch": 1.9424821775761503, "grad_norm": 0.7320033311843872, "learning_rate": 1.4599077295481783e-06, "loss": 0.0831, "step": 11989 }, { "epoch": 1.9426441996111472, "grad_norm": 0.7511314749717712, "learning_rate": 1.4595100839725338e-06, "loss": 0.0869, "step": 11990 }, { "epoch": 1.942806221646144, "grad_norm": 0.809007465839386, "learning_rate": 1.4591124702333636e-06, "loss": 0.0954, "step": 11991 }, { "epoch": 1.9429682436811406, "grad_norm": 0.8283510804176331, "learning_rate": 1.4587148883428337e-06, "loss": 0.1011, "step": 11992 }, { "epoch": 1.9431302657161373, "grad_norm": 0.7291625738143921, "learning_rate": 1.4583173383131077e-06, "loss": 0.0918, "step": 11993 }, { "epoch": 1.943292287751134, "grad_norm": 0.7869513034820557, "learning_rate": 1.45791982015635e-06, "loss": 0.0993, "step": 11994 }, { "epoch": 1.943454309786131, "grad_norm": 0.7561447620391846, "learning_rate": 1.457522333884724e-06, "loss": 0.0897, "step": 11995 }, { "epoch": 1.9436163318211277, "grad_norm": 0.8446448445320129, "learning_rate": 1.4571248795103921e-06, "loss": 0.1032, "step": 11996 }, { "epoch": 1.9437783538561244, "grad_norm": 0.8000361919403076, "learning_rate": 1.456727457045515e-06, "loss": 0.096, "step": 11997 }, { "epoch": 1.9439403758911213, "grad_norm": 0.7143806219100952, "learning_rate": 1.4563300665022534e-06, "loss": 0.0851, "step": 11998 }, { "epoch": 1.9441023979261178, "grad_norm": 0.7861617207527161, "learning_rate": 1.4559327078927656e-06, "loss": 0.098, "step": 11999 }, { "epoch": 1.9442644199611148, "grad_norm": 0.8464603424072266, "learning_rate": 1.4555353812292105e-06, "loss": 0.0929, "step": 12000 }, { "epoch": 1.9444264419961115, "grad_norm": 0.817179262638092, "learning_rate": 1.4551380865237456e-06, "loss": 0.09, "step": 12001 }, { "epoch": 1.9445884640311082, "grad_norm": 0.9822414517402649, "learning_rate": 1.4547408237885262e-06, "loss": 0.1172, "step": 12002 }, { "epoch": 1.9447504860661051, "grad_norm": 0.6776533722877502, "learning_rate": 1.454343593035709e-06, "loss": 0.078, "step": 12003 }, { "epoch": 1.9449125081011016, "grad_norm": 0.7004638314247131, "learning_rate": 1.4539463942774462e-06, "loss": 0.0857, "step": 12004 }, { "epoch": 1.9450745301360985, "grad_norm": 0.8583202362060547, "learning_rate": 1.4535492275258928e-06, "loss": 0.0998, "step": 12005 }, { "epoch": 1.9452365521710953, "grad_norm": 0.9123525619506836, "learning_rate": 1.4531520927932017e-06, "loss": 0.107, "step": 12006 }, { "epoch": 1.945398574206092, "grad_norm": 0.8352869153022766, "learning_rate": 1.4527549900915222e-06, "loss": 0.1025, "step": 12007 }, { "epoch": 1.945560596241089, "grad_norm": 0.7704386115074158, "learning_rate": 1.452357919433006e-06, "loss": 0.0921, "step": 12008 }, { "epoch": 1.9457226182760854, "grad_norm": 0.7887938618659973, "learning_rate": 1.4519608808298007e-06, "loss": 0.1008, "step": 12009 }, { "epoch": 1.9458846403110823, "grad_norm": 0.7636969685554504, "learning_rate": 1.4515638742940585e-06, "loss": 0.0949, "step": 12010 }, { "epoch": 1.946046662346079, "grad_norm": 0.7282156348228455, "learning_rate": 1.4511668998379238e-06, "loss": 0.082, "step": 12011 }, { "epoch": 1.9462086843810757, "grad_norm": 0.8383048176765442, "learning_rate": 1.4507699574735436e-06, "loss": 0.1019, "step": 12012 }, { "epoch": 1.9463707064160727, "grad_norm": 0.8490771055221558, "learning_rate": 1.450373047213064e-06, "loss": 0.0947, "step": 12013 }, { "epoch": 1.9465327284510694, "grad_norm": 0.7578926086425781, "learning_rate": 1.4499761690686287e-06, "loss": 0.0871, "step": 12014 }, { "epoch": 1.946694750486066, "grad_norm": 0.847022533416748, "learning_rate": 1.4495793230523817e-06, "loss": 0.1013, "step": 12015 }, { "epoch": 1.9468567725210628, "grad_norm": 0.7949681282043457, "learning_rate": 1.4491825091764656e-06, "loss": 0.094, "step": 12016 }, { "epoch": 1.9470187945560595, "grad_norm": 0.8004774451255798, "learning_rate": 1.4487857274530214e-06, "loss": 0.0959, "step": 12017 }, { "epoch": 1.9471808165910565, "grad_norm": 0.8467872142791748, "learning_rate": 1.4483889778941904e-06, "loss": 0.1049, "step": 12018 }, { "epoch": 1.9473428386260532, "grad_norm": 0.767632246017456, "learning_rate": 1.4479922605121117e-06, "loss": 0.0965, "step": 12019 }, { "epoch": 1.9475048606610499, "grad_norm": 0.7307461500167847, "learning_rate": 1.447595575318924e-06, "loss": 0.0858, "step": 12020 }, { "epoch": 1.9476668826960468, "grad_norm": 0.7664514780044556, "learning_rate": 1.447198922326766e-06, "loss": 0.1005, "step": 12021 }, { "epoch": 1.9478289047310433, "grad_norm": 0.9641528129577637, "learning_rate": 1.4468023015477722e-06, "loss": 0.1177, "step": 12022 }, { "epoch": 1.9479909267660402, "grad_norm": 0.8184154629707336, "learning_rate": 1.4464057129940783e-06, "loss": 0.0997, "step": 12023 }, { "epoch": 1.948152948801037, "grad_norm": 0.7754473090171814, "learning_rate": 1.446009156677822e-06, "loss": 0.0928, "step": 12024 }, { "epoch": 1.9483149708360337, "grad_norm": 0.8045803308486938, "learning_rate": 1.4456126326111337e-06, "loss": 0.0991, "step": 12025 }, { "epoch": 1.9484769928710306, "grad_norm": 0.7364550828933716, "learning_rate": 1.4452161408061478e-06, "loss": 0.0943, "step": 12026 }, { "epoch": 1.948639014906027, "grad_norm": 0.8381508588790894, "learning_rate": 1.4448196812749948e-06, "loss": 0.1039, "step": 12027 }, { "epoch": 1.948801036941024, "grad_norm": 0.7855244278907776, "learning_rate": 1.4444232540298064e-06, "loss": 0.106, "step": 12028 }, { "epoch": 1.9489630589760207, "grad_norm": 0.8092867136001587, "learning_rate": 1.4440268590827117e-06, "loss": 0.1001, "step": 12029 }, { "epoch": 1.9491250810110174, "grad_norm": 0.8405110239982605, "learning_rate": 1.44363049644584e-06, "loss": 0.1049, "step": 12030 }, { "epoch": 1.9492871030460144, "grad_norm": 0.821543276309967, "learning_rate": 1.4432341661313188e-06, "loss": 0.1045, "step": 12031 }, { "epoch": 1.9494491250810109, "grad_norm": 0.7550950646400452, "learning_rate": 1.4428378681512755e-06, "loss": 0.0893, "step": 12032 }, { "epoch": 1.9496111471160078, "grad_norm": 0.733284056186676, "learning_rate": 1.4424416025178335e-06, "loss": 0.0866, "step": 12033 }, { "epoch": 1.9497731691510045, "grad_norm": 0.8085827231407166, "learning_rate": 1.4420453692431197e-06, "loss": 0.0921, "step": 12034 }, { "epoch": 1.9499351911860012, "grad_norm": 0.7645505666732788, "learning_rate": 1.441649168339258e-06, "loss": 0.0936, "step": 12035 }, { "epoch": 1.9500972132209982, "grad_norm": 0.781045138835907, "learning_rate": 1.441252999818371e-06, "loss": 0.0983, "step": 12036 }, { "epoch": 1.9502592352559946, "grad_norm": 0.7690817713737488, "learning_rate": 1.4408568636925796e-06, "loss": 0.0885, "step": 12037 }, { "epoch": 1.9504212572909916, "grad_norm": 0.8504323363304138, "learning_rate": 1.440460759974004e-06, "loss": 0.099, "step": 12038 }, { "epoch": 1.9505832793259883, "grad_norm": 0.788281261920929, "learning_rate": 1.4400646886747672e-06, "loss": 0.0929, "step": 12039 }, { "epoch": 1.950745301360985, "grad_norm": 0.7282304167747498, "learning_rate": 1.4396686498069844e-06, "loss": 0.0861, "step": 12040 }, { "epoch": 1.950907323395982, "grad_norm": 0.8458184003829956, "learning_rate": 1.4392726433827754e-06, "loss": 0.1047, "step": 12041 }, { "epoch": 1.9510693454309787, "grad_norm": 0.9088663458824158, "learning_rate": 1.4388766694142553e-06, "loss": 0.1065, "step": 12042 }, { "epoch": 1.9512313674659754, "grad_norm": 0.827163815498352, "learning_rate": 1.4384807279135438e-06, "loss": 0.095, "step": 12043 }, { "epoch": 1.9513933895009723, "grad_norm": 0.7869982123374939, "learning_rate": 1.4380848188927516e-06, "loss": 0.0893, "step": 12044 }, { "epoch": 1.9515554115359688, "grad_norm": 0.940324604511261, "learning_rate": 1.4376889423639945e-06, "loss": 0.1045, "step": 12045 }, { "epoch": 1.9517174335709657, "grad_norm": 0.8107567429542542, "learning_rate": 1.4372930983393849e-06, "loss": 0.0969, "step": 12046 }, { "epoch": 1.9518794556059624, "grad_norm": 0.8711578845977783, "learning_rate": 1.4368972868310349e-06, "loss": 0.0977, "step": 12047 }, { "epoch": 1.9520414776409591, "grad_norm": 0.7904216647148132, "learning_rate": 1.4365015078510553e-06, "loss": 0.102, "step": 12048 }, { "epoch": 1.952203499675956, "grad_norm": 0.7687509655952454, "learning_rate": 1.4361057614115557e-06, "loss": 0.0939, "step": 12049 }, { "epoch": 1.9523655217109526, "grad_norm": 0.7089811563491821, "learning_rate": 1.4357100475246463e-06, "loss": 0.0847, "step": 12050 }, { "epoch": 1.9525275437459495, "grad_norm": 0.8005346059799194, "learning_rate": 1.435314366202433e-06, "loss": 0.0934, "step": 12051 }, { "epoch": 1.9526895657809462, "grad_norm": 0.8521600365638733, "learning_rate": 1.4349187174570226e-06, "loss": 0.0972, "step": 12052 }, { "epoch": 1.952851587815943, "grad_norm": 0.7018685936927795, "learning_rate": 1.4345231013005229e-06, "loss": 0.0859, "step": 12053 }, { "epoch": 1.9530136098509399, "grad_norm": 0.8380179405212402, "learning_rate": 1.4341275177450389e-06, "loss": 0.0971, "step": 12054 }, { "epoch": 1.9531756318859363, "grad_norm": 0.8714627623558044, "learning_rate": 1.4337319668026726e-06, "loss": 0.1073, "step": 12055 }, { "epoch": 1.9533376539209333, "grad_norm": 0.7577527165412903, "learning_rate": 1.4333364484855277e-06, "loss": 0.0881, "step": 12056 }, { "epoch": 1.95349967595593, "grad_norm": 0.8456433415412903, "learning_rate": 1.4329409628057062e-06, "loss": 0.0975, "step": 12057 }, { "epoch": 1.9536616979909267, "grad_norm": 0.7880712151527405, "learning_rate": 1.432545509775309e-06, "loss": 0.0929, "step": 12058 }, { "epoch": 1.9538237200259236, "grad_norm": 0.7699628472328186, "learning_rate": 1.432150089406436e-06, "loss": 0.0881, "step": 12059 }, { "epoch": 1.9539857420609201, "grad_norm": 0.7075265049934387, "learning_rate": 1.4317547017111865e-06, "loss": 0.0902, "step": 12060 }, { "epoch": 1.954147764095917, "grad_norm": 0.8275308609008789, "learning_rate": 1.4313593467016576e-06, "loss": 0.1006, "step": 12061 }, { "epoch": 1.9543097861309138, "grad_norm": 0.876686155796051, "learning_rate": 1.4309640243899467e-06, "loss": 0.0997, "step": 12062 }, { "epoch": 1.9544718081659105, "grad_norm": 0.9776948690414429, "learning_rate": 1.4305687347881497e-06, "loss": 0.102, "step": 12063 }, { "epoch": 1.9546338302009074, "grad_norm": 0.8320091962814331, "learning_rate": 1.4301734779083614e-06, "loss": 0.0979, "step": 12064 }, { "epoch": 1.9547958522359041, "grad_norm": 0.9126972556114197, "learning_rate": 1.429778253762677e-06, "loss": 0.1078, "step": 12065 }, { "epoch": 1.9549578742709008, "grad_norm": 0.7437605857849121, "learning_rate": 1.4293830623631857e-06, "loss": 0.0844, "step": 12066 }, { "epoch": 1.9551198963058976, "grad_norm": 0.8160969018936157, "learning_rate": 1.4289879037219832e-06, "loss": 0.0981, "step": 12067 }, { "epoch": 1.9552819183408943, "grad_norm": 0.7898657917976379, "learning_rate": 1.4285927778511598e-06, "loss": 0.0951, "step": 12068 }, { "epoch": 1.9554439403758912, "grad_norm": 0.87503582239151, "learning_rate": 1.4281976847628038e-06, "loss": 0.1059, "step": 12069 }, { "epoch": 1.955605962410888, "grad_norm": 0.8932657241821289, "learning_rate": 1.4278026244690046e-06, "loss": 0.1041, "step": 12070 }, { "epoch": 1.9557679844458846, "grad_norm": 0.878448486328125, "learning_rate": 1.4274075969818498e-06, "loss": 0.092, "step": 12071 }, { "epoch": 1.9559300064808816, "grad_norm": 0.7802169322967529, "learning_rate": 1.427012602313429e-06, "loss": 0.0938, "step": 12072 }, { "epoch": 1.956092028515878, "grad_norm": 0.9439387321472168, "learning_rate": 1.4266176404758246e-06, "loss": 0.1151, "step": 12073 }, { "epoch": 1.956254050550875, "grad_norm": 0.7736334800720215, "learning_rate": 1.4262227114811233e-06, "loss": 0.0914, "step": 12074 }, { "epoch": 1.9564160725858717, "grad_norm": 0.8040494322776794, "learning_rate": 1.4258278153414082e-06, "loss": 0.0988, "step": 12075 }, { "epoch": 1.9565780946208684, "grad_norm": 0.8778871297836304, "learning_rate": 1.4254329520687626e-06, "loss": 0.1024, "step": 12076 }, { "epoch": 1.9567401166558653, "grad_norm": 0.7685941457748413, "learning_rate": 1.4250381216752685e-06, "loss": 0.0928, "step": 12077 }, { "epoch": 1.9569021386908618, "grad_norm": 0.8489927053451538, "learning_rate": 1.4246433241730062e-06, "loss": 0.1049, "step": 12078 }, { "epoch": 1.9570641607258588, "grad_norm": 0.7054093480110168, "learning_rate": 1.4242485595740557e-06, "loss": 0.0874, "step": 12079 }, { "epoch": 1.9572261827608555, "grad_norm": 0.8379080295562744, "learning_rate": 1.4238538278904973e-06, "loss": 0.1119, "step": 12080 }, { "epoch": 1.9573882047958522, "grad_norm": 0.7417575716972351, "learning_rate": 1.4234591291344058e-06, "loss": 0.0906, "step": 12081 }, { "epoch": 1.9575502268308491, "grad_norm": 0.7702432870864868, "learning_rate": 1.4230644633178603e-06, "loss": 0.0917, "step": 12082 }, { "epoch": 1.9577122488658456, "grad_norm": 0.7377634644508362, "learning_rate": 1.4226698304529373e-06, "loss": 0.0904, "step": 12083 }, { "epoch": 1.9578742709008425, "grad_norm": 0.7292881608009338, "learning_rate": 1.4222752305517093e-06, "loss": 0.0947, "step": 12084 }, { "epoch": 1.9580362929358393, "grad_norm": 0.8291403651237488, "learning_rate": 1.4218806636262504e-06, "loss": 0.0972, "step": 12085 }, { "epoch": 1.958198314970836, "grad_norm": 0.8070185780525208, "learning_rate": 1.421486129688635e-06, "loss": 0.0977, "step": 12086 }, { "epoch": 1.958360337005833, "grad_norm": 0.8218309283256531, "learning_rate": 1.421091628750935e-06, "loss": 0.1092, "step": 12087 }, { "epoch": 1.9585223590408296, "grad_norm": 0.911953330039978, "learning_rate": 1.4206971608252196e-06, "loss": 0.1041, "step": 12088 }, { "epoch": 1.9586843810758263, "grad_norm": 0.8383828997612, "learning_rate": 1.4203027259235592e-06, "loss": 0.0998, "step": 12089 }, { "epoch": 1.958846403110823, "grad_norm": 0.7073849439620972, "learning_rate": 1.4199083240580218e-06, "loss": 0.0869, "step": 12090 }, { "epoch": 1.9590084251458197, "grad_norm": 0.8118810653686523, "learning_rate": 1.4195139552406766e-06, "loss": 0.095, "step": 12091 }, { "epoch": 1.9591704471808167, "grad_norm": 0.8553704619407654, "learning_rate": 1.41911961948359e-06, "loss": 0.1069, "step": 12092 }, { "epoch": 1.9593324692158134, "grad_norm": 0.8816598653793335, "learning_rate": 1.4187253167988266e-06, "loss": 0.1, "step": 12093 }, { "epoch": 1.95949449125081, "grad_norm": 0.7908227443695068, "learning_rate": 1.4183310471984532e-06, "loss": 0.0944, "step": 12094 }, { "epoch": 1.959656513285807, "grad_norm": 0.8013996481895447, "learning_rate": 1.41793681069453e-06, "loss": 0.1013, "step": 12095 }, { "epoch": 1.9598185353208035, "grad_norm": 0.8322909474372864, "learning_rate": 1.4175426072991234e-06, "loss": 0.0962, "step": 12096 }, { "epoch": 1.9599805573558005, "grad_norm": 0.8282430768013, "learning_rate": 1.4171484370242927e-06, "loss": 0.1035, "step": 12097 }, { "epoch": 1.9601425793907972, "grad_norm": 0.8286914229393005, "learning_rate": 1.416754299882101e-06, "loss": 0.0942, "step": 12098 }, { "epoch": 1.9603046014257939, "grad_norm": 0.7469590902328491, "learning_rate": 1.4163601958846052e-06, "loss": 0.0882, "step": 12099 }, { "epoch": 1.9604666234607908, "grad_norm": 0.8161764144897461, "learning_rate": 1.415966125043864e-06, "loss": 0.1035, "step": 12100 }, { "epoch": 1.9606286454957873, "grad_norm": 0.8531409502029419, "learning_rate": 1.4155720873719378e-06, "loss": 0.0952, "step": 12101 }, { "epoch": 1.9607906675307842, "grad_norm": 0.8322476148605347, "learning_rate": 1.415178082880881e-06, "loss": 0.0941, "step": 12102 }, { "epoch": 1.960952689565781, "grad_norm": 0.7497605681419373, "learning_rate": 1.414784111582749e-06, "loss": 0.087, "step": 12103 }, { "epoch": 1.9611147116007777, "grad_norm": 0.7348429560661316, "learning_rate": 1.4143901734895973e-06, "loss": 0.0873, "step": 12104 }, { "epoch": 1.9612767336357746, "grad_norm": 0.8899442553520203, "learning_rate": 1.4139962686134792e-06, "loss": 0.1037, "step": 12105 }, { "epoch": 1.961438755670771, "grad_norm": 0.7474508881568909, "learning_rate": 1.4136023969664471e-06, "loss": 0.0878, "step": 12106 }, { "epoch": 1.961600777705768, "grad_norm": 0.8204247951507568, "learning_rate": 1.4132085585605528e-06, "loss": 0.1014, "step": 12107 }, { "epoch": 1.9617627997407647, "grad_norm": 0.906720757484436, "learning_rate": 1.4128147534078469e-06, "loss": 0.0965, "step": 12108 }, { "epoch": 1.9619248217757614, "grad_norm": 0.8094850778579712, "learning_rate": 1.4124209815203779e-06, "loss": 0.0905, "step": 12109 }, { "epoch": 1.9620868438107584, "grad_norm": 0.8408055901527405, "learning_rate": 1.4120272429101955e-06, "loss": 0.1028, "step": 12110 }, { "epoch": 1.9622488658457549, "grad_norm": 0.852473258972168, "learning_rate": 1.4116335375893464e-06, "loss": 0.0992, "step": 12111 }, { "epoch": 1.9624108878807518, "grad_norm": 0.7675461173057556, "learning_rate": 1.4112398655698772e-06, "loss": 0.0989, "step": 12112 }, { "epoch": 1.9625729099157485, "grad_norm": 0.7929125428199768, "learning_rate": 1.4108462268638346e-06, "loss": 0.0971, "step": 12113 }, { "epoch": 1.9627349319507452, "grad_norm": 0.7752059102058411, "learning_rate": 1.4104526214832595e-06, "loss": 0.0955, "step": 12114 }, { "epoch": 1.9628969539857422, "grad_norm": 0.7863343358039856, "learning_rate": 1.4100590494401988e-06, "loss": 0.0925, "step": 12115 }, { "epoch": 1.9630589760207389, "grad_norm": 0.8089168071746826, "learning_rate": 1.4096655107466943e-06, "loss": 0.0934, "step": 12116 }, { "epoch": 1.9632209980557356, "grad_norm": 0.8375380635261536, "learning_rate": 1.4092720054147857e-06, "loss": 0.0978, "step": 12117 }, { "epoch": 1.9633830200907323, "grad_norm": 0.7996324896812439, "learning_rate": 1.4088785334565145e-06, "loss": 0.0971, "step": 12118 }, { "epoch": 1.963545042125729, "grad_norm": 0.7718796730041504, "learning_rate": 1.4084850948839194e-06, "loss": 0.1003, "step": 12119 }, { "epoch": 1.963707064160726, "grad_norm": 0.8828782439231873, "learning_rate": 1.4080916897090391e-06, "loss": 0.1085, "step": 12120 }, { "epoch": 1.9638690861957226, "grad_norm": 0.8628697395324707, "learning_rate": 1.4076983179439107e-06, "loss": 0.1042, "step": 12121 }, { "epoch": 1.9640311082307194, "grad_norm": 0.802811861038208, "learning_rate": 1.4073049796005705e-06, "loss": 0.1005, "step": 12122 }, { "epoch": 1.9641931302657163, "grad_norm": 0.7491890788078308, "learning_rate": 1.4069116746910536e-06, "loss": 0.0984, "step": 12123 }, { "epoch": 1.9643551523007128, "grad_norm": 0.9189904928207397, "learning_rate": 1.4065184032273942e-06, "loss": 0.1173, "step": 12124 }, { "epoch": 1.9645171743357097, "grad_norm": 0.6986983418464661, "learning_rate": 1.4061251652216254e-06, "loss": 0.0886, "step": 12125 }, { "epoch": 1.9646791963707064, "grad_norm": 0.9891625642776489, "learning_rate": 1.4057319606857795e-06, "loss": 0.1135, "step": 12126 }, { "epoch": 1.9648412184057031, "grad_norm": 0.8146089911460876, "learning_rate": 1.4053387896318888e-06, "loss": 0.1027, "step": 12127 }, { "epoch": 1.9650032404407, "grad_norm": 0.8153820633888245, "learning_rate": 1.4049456520719805e-06, "loss": 0.0987, "step": 12128 }, { "epoch": 1.9651652624756966, "grad_norm": 0.6867256164550781, "learning_rate": 1.4045525480180849e-06, "loss": 0.0889, "step": 12129 }, { "epoch": 1.9653272845106935, "grad_norm": 0.8280233144760132, "learning_rate": 1.404159477482231e-06, "loss": 0.1033, "step": 12130 }, { "epoch": 1.9654893065456902, "grad_norm": 0.7022069096565247, "learning_rate": 1.4037664404764465e-06, "loss": 0.083, "step": 12131 }, { "epoch": 1.965651328580687, "grad_norm": 0.8097946643829346, "learning_rate": 1.403373437012755e-06, "loss": 0.0949, "step": 12132 }, { "epoch": 1.9658133506156839, "grad_norm": 0.7029479742050171, "learning_rate": 1.4029804671031812e-06, "loss": 0.0877, "step": 12133 }, { "epoch": 1.9659753726506803, "grad_norm": 0.7599433064460754, "learning_rate": 1.4025875307597528e-06, "loss": 0.0892, "step": 12134 }, { "epoch": 1.9661373946856773, "grad_norm": 0.754822313785553, "learning_rate": 1.4021946279944893e-06, "loss": 0.0842, "step": 12135 }, { "epoch": 1.966299416720674, "grad_norm": 0.8841560482978821, "learning_rate": 1.4018017588194132e-06, "loss": 0.1086, "step": 12136 }, { "epoch": 1.9664614387556707, "grad_norm": 0.7273808121681213, "learning_rate": 1.4014089232465458e-06, "loss": 0.0901, "step": 12137 }, { "epoch": 1.9666234607906676, "grad_norm": 0.7973136305809021, "learning_rate": 1.401016121287907e-06, "loss": 0.085, "step": 12138 }, { "epoch": 1.9667854828256643, "grad_norm": 0.7728840708732605, "learning_rate": 1.4006233529555152e-06, "loss": 0.0985, "step": 12139 }, { "epoch": 1.966947504860661, "grad_norm": 0.807805597782135, "learning_rate": 1.4002306182613885e-06, "loss": 0.1028, "step": 12140 }, { "epoch": 1.9671095268956578, "grad_norm": 0.8040981888771057, "learning_rate": 1.399837917217543e-06, "loss": 0.105, "step": 12141 }, { "epoch": 1.9672715489306545, "grad_norm": 0.8386695384979248, "learning_rate": 1.3994452498359963e-06, "loss": 0.1036, "step": 12142 }, { "epoch": 1.9674335709656514, "grad_norm": 0.7889523506164551, "learning_rate": 1.39905261612876e-06, "loss": 0.091, "step": 12143 }, { "epoch": 1.9675955930006481, "grad_norm": 0.792294979095459, "learning_rate": 1.39866001610785e-06, "loss": 0.0958, "step": 12144 }, { "epoch": 1.9677576150356448, "grad_norm": 0.7585206627845764, "learning_rate": 1.3982674497852794e-06, "loss": 0.0992, "step": 12145 }, { "epoch": 1.9679196370706418, "grad_norm": 0.8587349057197571, "learning_rate": 1.3978749171730577e-06, "loss": 0.1085, "step": 12146 }, { "epoch": 1.9680816591056383, "grad_norm": 0.742311418056488, "learning_rate": 1.3974824182831965e-06, "loss": 0.0892, "step": 12147 }, { "epoch": 1.9682436811406352, "grad_norm": 0.805219829082489, "learning_rate": 1.397089953127704e-06, "loss": 0.0963, "step": 12148 }, { "epoch": 1.968405703175632, "grad_norm": 0.8392459154129028, "learning_rate": 1.3966975217185922e-06, "loss": 0.0924, "step": 12149 }, { "epoch": 1.9685677252106286, "grad_norm": 0.7114721536636353, "learning_rate": 1.3963051240678652e-06, "loss": 0.0909, "step": 12150 }, { "epoch": 1.9687297472456255, "grad_norm": 0.8719701766967773, "learning_rate": 1.3959127601875305e-06, "loss": 0.1132, "step": 12151 }, { "epoch": 1.968891769280622, "grad_norm": 0.7276889085769653, "learning_rate": 1.3955204300895937e-06, "loss": 0.0862, "step": 12152 }, { "epoch": 1.969053791315619, "grad_norm": 0.7744901180267334, "learning_rate": 1.3951281337860583e-06, "loss": 0.0942, "step": 12153 }, { "epoch": 1.9692158133506157, "grad_norm": 0.8694067001342773, "learning_rate": 1.3947358712889292e-06, "loss": 0.0981, "step": 12154 }, { "epoch": 1.9693778353856124, "grad_norm": 0.8138403296470642, "learning_rate": 1.394343642610207e-06, "loss": 0.1028, "step": 12155 }, { "epoch": 1.9695398574206093, "grad_norm": 0.8411658406257629, "learning_rate": 1.3939514477618944e-06, "loss": 0.0972, "step": 12156 }, { "epoch": 1.9697018794556058, "grad_norm": 0.7511509656906128, "learning_rate": 1.3935592867559902e-06, "loss": 0.0868, "step": 12157 }, { "epoch": 1.9698639014906028, "grad_norm": 0.7799199819564819, "learning_rate": 1.3931671596044946e-06, "loss": 0.103, "step": 12158 }, { "epoch": 1.9700259235255995, "grad_norm": 0.7285469770431519, "learning_rate": 1.3927750663194055e-06, "loss": 0.0917, "step": 12159 }, { "epoch": 1.9701879455605962, "grad_norm": 0.8250842690467834, "learning_rate": 1.392383006912721e-06, "loss": 0.0929, "step": 12160 }, { "epoch": 1.970349967595593, "grad_norm": 0.7213485836982727, "learning_rate": 1.391990981396435e-06, "loss": 0.0876, "step": 12161 }, { "epoch": 1.9705119896305896, "grad_norm": 0.7967683672904968, "learning_rate": 1.3915989897825424e-06, "loss": 0.0975, "step": 12162 }, { "epoch": 1.9706740116655865, "grad_norm": 0.7662811875343323, "learning_rate": 1.3912070320830406e-06, "loss": 0.0975, "step": 12163 }, { "epoch": 1.9708360337005832, "grad_norm": 0.7548596262931824, "learning_rate": 1.3908151083099195e-06, "loss": 0.0958, "step": 12164 }, { "epoch": 1.97099805573558, "grad_norm": 0.7877540588378906, "learning_rate": 1.390423218475172e-06, "loss": 0.0953, "step": 12165 }, { "epoch": 1.971160077770577, "grad_norm": 0.7539090514183044, "learning_rate": 1.3900313625907886e-06, "loss": 0.0947, "step": 12166 }, { "epoch": 1.9713220998055736, "grad_norm": 0.7667350769042969, "learning_rate": 1.3896395406687597e-06, "loss": 0.091, "step": 12167 }, { "epoch": 1.9714841218405703, "grad_norm": 0.9332156777381897, "learning_rate": 1.3892477527210734e-06, "loss": 0.1106, "step": 12168 }, { "epoch": 1.971646143875567, "grad_norm": 0.8445827960968018, "learning_rate": 1.3888559987597182e-06, "loss": 0.1082, "step": 12169 }, { "epoch": 1.9718081659105637, "grad_norm": 0.8100192546844482, "learning_rate": 1.3884642787966806e-06, "loss": 0.0948, "step": 12170 }, { "epoch": 1.9719701879455607, "grad_norm": 1.1084216833114624, "learning_rate": 1.3880725928439472e-06, "loss": 0.1227, "step": 12171 }, { "epoch": 1.9721322099805574, "grad_norm": 0.7511377930641174, "learning_rate": 1.3876809409134994e-06, "loss": 0.0881, "step": 12172 }, { "epoch": 1.972294232015554, "grad_norm": 0.8674477338790894, "learning_rate": 1.3872893230173245e-06, "loss": 0.1032, "step": 12173 }, { "epoch": 1.972456254050551, "grad_norm": 0.8338192701339722, "learning_rate": 1.3868977391674033e-06, "loss": 0.1097, "step": 12174 }, { "epoch": 1.9726182760855475, "grad_norm": 0.7292324304580688, "learning_rate": 1.3865061893757187e-06, "loss": 0.0908, "step": 12175 }, { "epoch": 1.9727802981205445, "grad_norm": 0.8517992496490479, "learning_rate": 1.386114673654248e-06, "loss": 0.0971, "step": 12176 }, { "epoch": 1.9729423201555412, "grad_norm": 0.6976124048233032, "learning_rate": 1.3857231920149738e-06, "loss": 0.0792, "step": 12177 }, { "epoch": 1.9731043421905379, "grad_norm": 0.8370488286018372, "learning_rate": 1.3853317444698744e-06, "loss": 0.1007, "step": 12178 }, { "epoch": 1.9732663642255348, "grad_norm": 1.0718648433685303, "learning_rate": 1.3849403310309251e-06, "loss": 0.1228, "step": 12179 }, { "epoch": 1.9734283862605313, "grad_norm": 0.7935207486152649, "learning_rate": 1.3845489517101036e-06, "loss": 0.1041, "step": 12180 }, { "epoch": 1.9735904082955282, "grad_norm": 0.8802226185798645, "learning_rate": 1.3841576065193834e-06, "loss": 0.1127, "step": 12181 }, { "epoch": 1.973752430330525, "grad_norm": 0.8075469136238098, "learning_rate": 1.3837662954707426e-06, "loss": 0.0987, "step": 12182 }, { "epoch": 1.9739144523655217, "grad_norm": 0.7968436479568481, "learning_rate": 1.3833750185761507e-06, "loss": 0.0968, "step": 12183 }, { "epoch": 1.9740764744005186, "grad_norm": 0.8242005109786987, "learning_rate": 1.3829837758475808e-06, "loss": 0.0978, "step": 12184 }, { "epoch": 1.974238496435515, "grad_norm": 0.7901813983917236, "learning_rate": 1.3825925672970048e-06, "loss": 0.1014, "step": 12185 }, { "epoch": 1.974400518470512, "grad_norm": 0.8069804310798645, "learning_rate": 1.3822013929363914e-06, "loss": 0.0971, "step": 12186 }, { "epoch": 1.9745625405055087, "grad_norm": 0.8082523345947266, "learning_rate": 1.3818102527777111e-06, "loss": 0.0949, "step": 12187 }, { "epoch": 1.9747245625405054, "grad_norm": 0.7194974422454834, "learning_rate": 1.3814191468329307e-06, "loss": 0.0868, "step": 12188 }, { "epoch": 1.9748865845755024, "grad_norm": 0.8970668911933899, "learning_rate": 1.3810280751140188e-06, "loss": 0.1057, "step": 12189 }, { "epoch": 1.975048606610499, "grad_norm": 0.7686210870742798, "learning_rate": 1.3806370376329388e-06, "loss": 0.0913, "step": 12190 }, { "epoch": 1.9752106286454958, "grad_norm": 0.8302438259124756, "learning_rate": 1.3802460344016552e-06, "loss": 0.1052, "step": 12191 }, { "epoch": 1.9753726506804925, "grad_norm": 0.7827860116958618, "learning_rate": 1.3798550654321347e-06, "loss": 0.091, "step": 12192 }, { "epoch": 1.9755346727154892, "grad_norm": 0.8492727875709534, "learning_rate": 1.3794641307363393e-06, "loss": 0.1065, "step": 12193 }, { "epoch": 1.9756966947504861, "grad_norm": 0.8518586158752441, "learning_rate": 1.379073230326229e-06, "loss": 0.1037, "step": 12194 }, { "epoch": 1.9758587167854829, "grad_norm": 0.8506153225898743, "learning_rate": 1.378682364213765e-06, "loss": 0.1091, "step": 12195 }, { "epoch": 1.9760207388204796, "grad_norm": 0.895915687084198, "learning_rate": 1.3782915324109075e-06, "loss": 0.1101, "step": 12196 }, { "epoch": 1.9761827608554765, "grad_norm": 0.8467504978179932, "learning_rate": 1.377900734929614e-06, "loss": 0.0992, "step": 12197 }, { "epoch": 1.976344782890473, "grad_norm": 0.7702296376228333, "learning_rate": 1.3775099717818432e-06, "loss": 0.0924, "step": 12198 }, { "epoch": 1.97650680492547, "grad_norm": 0.8202542066574097, "learning_rate": 1.377119242979551e-06, "loss": 0.0916, "step": 12199 }, { "epoch": 1.9766688269604666, "grad_norm": 0.9093190431594849, "learning_rate": 1.376728548534692e-06, "loss": 0.1095, "step": 12200 }, { "epoch": 1.9768308489954634, "grad_norm": 0.8310642242431641, "learning_rate": 1.3763378884592215e-06, "loss": 0.0967, "step": 12201 }, { "epoch": 1.9769928710304603, "grad_norm": 0.774259090423584, "learning_rate": 1.3759472627650926e-06, "loss": 0.0962, "step": 12202 }, { "epoch": 1.9771548930654568, "grad_norm": 0.7820743918418884, "learning_rate": 1.3755566714642571e-06, "loss": 0.0974, "step": 12203 }, { "epoch": 1.9773169151004537, "grad_norm": 0.8799723386764526, "learning_rate": 1.3751661145686673e-06, "loss": 0.1064, "step": 12204 }, { "epoch": 1.9774789371354504, "grad_norm": 0.7707881927490234, "learning_rate": 1.3747755920902706e-06, "loss": 0.094, "step": 12205 }, { "epoch": 1.9776409591704471, "grad_norm": 0.9051207900047302, "learning_rate": 1.3743851040410183e-06, "loss": 0.1111, "step": 12206 }, { "epoch": 1.977802981205444, "grad_norm": 0.8284469246864319, "learning_rate": 1.3739946504328594e-06, "loss": 0.1007, "step": 12207 }, { "epoch": 1.9779650032404406, "grad_norm": 0.7556292414665222, "learning_rate": 1.3736042312777381e-06, "loss": 0.0925, "step": 12208 }, { "epoch": 1.9781270252754375, "grad_norm": 0.798641562461853, "learning_rate": 1.3732138465876012e-06, "loss": 0.0906, "step": 12209 }, { "epoch": 1.9782890473104342, "grad_norm": 0.7734487652778625, "learning_rate": 1.3728234963743931e-06, "loss": 0.0985, "step": 12210 }, { "epoch": 1.978451069345431, "grad_norm": 0.7930302619934082, "learning_rate": 1.3724331806500604e-06, "loss": 0.0921, "step": 12211 }, { "epoch": 1.9786130913804278, "grad_norm": 0.8522673845291138, "learning_rate": 1.3720428994265427e-06, "loss": 0.0993, "step": 12212 }, { "epoch": 1.9787751134154243, "grad_norm": 0.7778444290161133, "learning_rate": 1.3716526527157826e-06, "loss": 0.0865, "step": 12213 }, { "epoch": 1.9789371354504213, "grad_norm": 0.8435918092727661, "learning_rate": 1.3712624405297209e-06, "loss": 0.0991, "step": 12214 }, { "epoch": 1.979099157485418, "grad_norm": 0.9310854077339172, "learning_rate": 1.3708722628802968e-06, "loss": 0.1127, "step": 12215 }, { "epoch": 1.9792611795204147, "grad_norm": 0.7594826221466064, "learning_rate": 1.3704821197794491e-06, "loss": 0.093, "step": 12216 }, { "epoch": 1.9794232015554116, "grad_norm": 0.6724945902824402, "learning_rate": 1.3700920112391152e-06, "loss": 0.0864, "step": 12217 }, { "epoch": 1.9795852235904083, "grad_norm": 0.7259680032730103, "learning_rate": 1.369701937271231e-06, "loss": 0.0876, "step": 12218 }, { "epoch": 1.979747245625405, "grad_norm": 1.109673261642456, "learning_rate": 1.369311897887733e-06, "loss": 0.107, "step": 12219 }, { "epoch": 1.9799092676604018, "grad_norm": 0.7597100138664246, "learning_rate": 1.3689218931005543e-06, "loss": 0.09, "step": 12220 }, { "epoch": 1.9800712896953985, "grad_norm": 0.8943421244621277, "learning_rate": 1.3685319229216287e-06, "loss": 0.1104, "step": 12221 }, { "epoch": 1.9802333117303954, "grad_norm": 0.8570875525474548, "learning_rate": 1.368141987362889e-06, "loss": 0.0965, "step": 12222 }, { "epoch": 1.9803953337653921, "grad_norm": 0.8116633296012878, "learning_rate": 1.3677520864362644e-06, "loss": 0.0989, "step": 12223 }, { "epoch": 1.9805573558003888, "grad_norm": 0.8231261968612671, "learning_rate": 1.3673622201536852e-06, "loss": 0.0923, "step": 12224 }, { "epoch": 1.9807193778353858, "grad_norm": 0.7679911851882935, "learning_rate": 1.366972388527082e-06, "loss": 0.0988, "step": 12225 }, { "epoch": 1.9808813998703823, "grad_norm": 0.7934950590133667, "learning_rate": 1.3665825915683829e-06, "loss": 0.0949, "step": 12226 }, { "epoch": 1.9810434219053792, "grad_norm": 0.8774157762527466, "learning_rate": 1.3661928292895123e-06, "loss": 0.1021, "step": 12227 }, { "epoch": 1.981205443940376, "grad_norm": 0.8431322574615479, "learning_rate": 1.3658031017023977e-06, "loss": 0.1071, "step": 12228 }, { "epoch": 1.9813674659753726, "grad_norm": 0.7929509878158569, "learning_rate": 1.3654134088189636e-06, "loss": 0.0955, "step": 12229 }, { "epoch": 1.9815294880103695, "grad_norm": 0.8263244032859802, "learning_rate": 1.3650237506511333e-06, "loss": 0.0889, "step": 12230 }, { "epoch": 1.981691510045366, "grad_norm": 0.8262062668800354, "learning_rate": 1.36463412721083e-06, "loss": 0.1029, "step": 12231 }, { "epoch": 1.981853532080363, "grad_norm": 0.8104032874107361, "learning_rate": 1.3642445385099746e-06, "loss": 0.093, "step": 12232 }, { "epoch": 1.9820155541153597, "grad_norm": 0.7863628268241882, "learning_rate": 1.3638549845604886e-06, "loss": 0.0944, "step": 12233 }, { "epoch": 1.9821775761503564, "grad_norm": 0.7456783652305603, "learning_rate": 1.363465465374289e-06, "loss": 0.093, "step": 12234 }, { "epoch": 1.9823395981853533, "grad_norm": 0.7725854516029358, "learning_rate": 1.3630759809632965e-06, "loss": 0.0886, "step": 12235 }, { "epoch": 1.9825016202203498, "grad_norm": 0.7280664443969727, "learning_rate": 1.362686531339428e-06, "loss": 0.0885, "step": 12236 }, { "epoch": 1.9826636422553467, "grad_norm": 0.9137564301490784, "learning_rate": 1.3622971165146005e-06, "loss": 0.1132, "step": 12237 }, { "epoch": 1.9828256642903435, "grad_norm": 0.8117855787277222, "learning_rate": 1.3619077365007266e-06, "loss": 0.0978, "step": 12238 }, { "epoch": 1.9829876863253402, "grad_norm": 0.7241969108581543, "learning_rate": 1.3615183913097211e-06, "loss": 0.0911, "step": 12239 }, { "epoch": 1.983149708360337, "grad_norm": 0.9119266867637634, "learning_rate": 1.3611290809534997e-06, "loss": 0.1046, "step": 12240 }, { "epoch": 1.9833117303953338, "grad_norm": 0.7932629585266113, "learning_rate": 1.3607398054439713e-06, "loss": 0.1004, "step": 12241 }, { "epoch": 1.9834737524303305, "grad_norm": 0.79780513048172, "learning_rate": 1.3603505647930481e-06, "loss": 0.0932, "step": 12242 }, { "epoch": 1.9836357744653272, "grad_norm": 0.7685292959213257, "learning_rate": 1.3599613590126388e-06, "loss": 0.0969, "step": 12243 }, { "epoch": 1.983797796500324, "grad_norm": 0.7224975228309631, "learning_rate": 1.3595721881146548e-06, "loss": 0.0876, "step": 12244 }, { "epoch": 1.9839598185353209, "grad_norm": 0.7350090742111206, "learning_rate": 1.359183052111001e-06, "loss": 0.0943, "step": 12245 }, { "epoch": 1.9841218405703176, "grad_norm": 0.7922118306159973, "learning_rate": 1.3587939510135856e-06, "loss": 0.0939, "step": 12246 }, { "epoch": 1.9842838626053143, "grad_norm": 0.7330470085144043, "learning_rate": 1.358404884834313e-06, "loss": 0.0913, "step": 12247 }, { "epoch": 1.9844458846403112, "grad_norm": 0.8037322759628296, "learning_rate": 1.3580158535850884e-06, "loss": 0.1021, "step": 12248 }, { "epoch": 1.9846079066753077, "grad_norm": 0.7963745594024658, "learning_rate": 1.3576268572778156e-06, "loss": 0.1056, "step": 12249 }, { "epoch": 1.9847699287103047, "grad_norm": 0.8091640472412109, "learning_rate": 1.357237895924396e-06, "loss": 0.0959, "step": 12250 }, { "epoch": 1.9849319507453014, "grad_norm": 0.8512594103813171, "learning_rate": 1.3568489695367325e-06, "loss": 0.0948, "step": 12251 }, { "epoch": 1.985093972780298, "grad_norm": 0.8013456463813782, "learning_rate": 1.3564600781267234e-06, "loss": 0.0988, "step": 12252 }, { "epoch": 1.985255994815295, "grad_norm": 0.6600555181503296, "learning_rate": 1.3560712217062676e-06, "loss": 0.0808, "step": 12253 }, { "epoch": 1.9854180168502915, "grad_norm": 0.8468796014785767, "learning_rate": 1.3556824002872648e-06, "loss": 0.0989, "step": 12254 }, { "epoch": 1.9855800388852884, "grad_norm": 0.8054071664810181, "learning_rate": 1.3552936138816124e-06, "loss": 0.1016, "step": 12255 }, { "epoch": 1.9857420609202852, "grad_norm": 0.7995196580886841, "learning_rate": 1.3549048625012046e-06, "loss": 0.0946, "step": 12256 }, { "epoch": 1.9859040829552819, "grad_norm": 0.9262045621871948, "learning_rate": 1.3545161461579367e-06, "loss": 0.1046, "step": 12257 }, { "epoch": 1.9860661049902788, "grad_norm": 0.9522650837898254, "learning_rate": 1.354127464863703e-06, "loss": 0.1038, "step": 12258 }, { "epoch": 1.9862281270252753, "grad_norm": 0.8085470199584961, "learning_rate": 1.3537388186303956e-06, "loss": 0.0969, "step": 12259 }, { "epoch": 1.9863901490602722, "grad_norm": 0.7567479014396667, "learning_rate": 1.3533502074699065e-06, "loss": 0.0935, "step": 12260 }, { "epoch": 1.986552171095269, "grad_norm": 0.7655736804008484, "learning_rate": 1.3529616313941264e-06, "loss": 0.0944, "step": 12261 }, { "epoch": 1.9867141931302656, "grad_norm": 0.8020748496055603, "learning_rate": 1.3525730904149443e-06, "loss": 0.0902, "step": 12262 }, { "epoch": 1.9868762151652626, "grad_norm": 0.8509409427642822, "learning_rate": 1.3521845845442489e-06, "loss": 0.1011, "step": 12263 }, { "epoch": 1.987038237200259, "grad_norm": 0.8280978798866272, "learning_rate": 1.351796113793928e-06, "loss": 0.0972, "step": 12264 }, { "epoch": 1.987200259235256, "grad_norm": 0.8213714957237244, "learning_rate": 1.351407678175867e-06, "loss": 0.0884, "step": 12265 }, { "epoch": 1.9873622812702527, "grad_norm": 0.8149861097335815, "learning_rate": 1.3510192777019527e-06, "loss": 0.0976, "step": 12266 }, { "epoch": 1.9875243033052494, "grad_norm": 0.7219667434692383, "learning_rate": 1.3506309123840659e-06, "loss": 0.0856, "step": 12267 }, { "epoch": 1.9876863253402464, "grad_norm": 0.7291889786720276, "learning_rate": 1.3502425822340925e-06, "loss": 0.0941, "step": 12268 }, { "epoch": 1.987848347375243, "grad_norm": 0.7790191769599915, "learning_rate": 1.3498542872639142e-06, "loss": 0.0993, "step": 12269 }, { "epoch": 1.9880103694102398, "grad_norm": 0.7414619326591492, "learning_rate": 1.3494660274854122e-06, "loss": 0.0905, "step": 12270 }, { "epoch": 1.9881723914452365, "grad_norm": 0.7423388957977295, "learning_rate": 1.3490778029104646e-06, "loss": 0.0886, "step": 12271 }, { "epoch": 1.9883344134802332, "grad_norm": 0.8346954584121704, "learning_rate": 1.3486896135509503e-06, "loss": 0.1073, "step": 12272 }, { "epoch": 1.9884964355152301, "grad_norm": 0.7810722589492798, "learning_rate": 1.3483014594187493e-06, "loss": 0.0918, "step": 12273 }, { "epoch": 1.9886584575502269, "grad_norm": 0.7663103938102722, "learning_rate": 1.3479133405257355e-06, "loss": 0.0959, "step": 12274 }, { "epoch": 1.9888204795852236, "grad_norm": 0.7423315048217773, "learning_rate": 1.347525256883786e-06, "loss": 0.0895, "step": 12275 }, { "epoch": 1.9889825016202205, "grad_norm": 0.7934430241584778, "learning_rate": 1.3471372085047743e-06, "loss": 0.0964, "step": 12276 }, { "epoch": 1.989144523655217, "grad_norm": 0.8418073654174805, "learning_rate": 1.346749195400574e-06, "loss": 0.1047, "step": 12277 }, { "epoch": 1.989306545690214, "grad_norm": 0.7353390455245972, "learning_rate": 1.3463612175830578e-06, "loss": 0.0857, "step": 12278 }, { "epoch": 1.9894685677252106, "grad_norm": 0.8936602473258972, "learning_rate": 1.3459732750640967e-06, "loss": 0.1083, "step": 12279 }, { "epoch": 1.9896305897602073, "grad_norm": 0.7523042559623718, "learning_rate": 1.3455853678555605e-06, "loss": 0.086, "step": 12280 }, { "epoch": 1.9897926117952043, "grad_norm": 0.7889019846916199, "learning_rate": 1.3451974959693193e-06, "loss": 0.0887, "step": 12281 }, { "epoch": 1.9899546338302008, "grad_norm": 0.8025287389755249, "learning_rate": 1.3448096594172383e-06, "loss": 0.0981, "step": 12282 }, { "epoch": 1.9901166558651977, "grad_norm": 0.7614328861236572, "learning_rate": 1.3444218582111872e-06, "loss": 0.0908, "step": 12283 }, { "epoch": 1.9902786779001944, "grad_norm": 0.7384836673736572, "learning_rate": 1.344034092363032e-06, "loss": 0.0878, "step": 12284 }, { "epoch": 1.9904406999351911, "grad_norm": 0.7912430167198181, "learning_rate": 1.3436463618846351e-06, "loss": 0.0902, "step": 12285 }, { "epoch": 1.990602721970188, "grad_norm": 0.8135248422622681, "learning_rate": 1.343258666787861e-06, "loss": 0.0986, "step": 12286 }, { "epoch": 1.9907647440051845, "grad_norm": 0.7323644757270813, "learning_rate": 1.3428710070845716e-06, "loss": 0.0917, "step": 12287 }, { "epoch": 1.9909267660401815, "grad_norm": 0.7355465292930603, "learning_rate": 1.3424833827866312e-06, "loss": 0.0901, "step": 12288 }, { "epoch": 1.9910887880751782, "grad_norm": 0.7884531021118164, "learning_rate": 1.342095793905897e-06, "loss": 0.0937, "step": 12289 }, { "epoch": 1.991250810110175, "grad_norm": 0.7865089178085327, "learning_rate": 1.3417082404542295e-06, "loss": 0.0997, "step": 12290 }, { "epoch": 1.9914128321451718, "grad_norm": 0.711365818977356, "learning_rate": 1.3413207224434867e-06, "loss": 0.0833, "step": 12291 }, { "epoch": 1.9915748541801686, "grad_norm": 0.9250528812408447, "learning_rate": 1.3409332398855263e-06, "loss": 0.1068, "step": 12292 }, { "epoch": 1.9917368762151653, "grad_norm": 0.9538305401802063, "learning_rate": 1.3405457927922032e-06, "loss": 0.0967, "step": 12293 }, { "epoch": 1.991898898250162, "grad_norm": 0.7699099183082581, "learning_rate": 1.3401583811753735e-06, "loss": 0.0936, "step": 12294 }, { "epoch": 1.9920609202851587, "grad_norm": 0.7329535484313965, "learning_rate": 1.3397710050468903e-06, "loss": 0.0914, "step": 12295 }, { "epoch": 1.9922229423201556, "grad_norm": 0.7969571352005005, "learning_rate": 1.339383664418607e-06, "loss": 0.0971, "step": 12296 }, { "epoch": 1.9923849643551523, "grad_norm": 0.7721100449562073, "learning_rate": 1.3389963593023747e-06, "loss": 0.0894, "step": 12297 }, { "epoch": 1.992546986390149, "grad_norm": 0.8299007415771484, "learning_rate": 1.3386090897100442e-06, "loss": 0.0902, "step": 12298 }, { "epoch": 1.992709008425146, "grad_norm": 0.8381181955337524, "learning_rate": 1.338221855653466e-06, "loss": 0.0899, "step": 12299 }, { "epoch": 1.9928710304601425, "grad_norm": 0.7096841931343079, "learning_rate": 1.3378346571444866e-06, "loss": 0.0798, "step": 12300 }, { "epoch": 1.9930330524951394, "grad_norm": 0.7077094912528992, "learning_rate": 1.3374474941949535e-06, "loss": 0.0878, "step": 12301 }, { "epoch": 1.9931950745301361, "grad_norm": 0.8370009660720825, "learning_rate": 1.3370603668167156e-06, "loss": 0.1005, "step": 12302 }, { "epoch": 1.9933570965651328, "grad_norm": 0.7673652768135071, "learning_rate": 1.3366732750216154e-06, "loss": 0.0939, "step": 12303 }, { "epoch": 1.9935191186001298, "grad_norm": 0.7367184162139893, "learning_rate": 1.3362862188214977e-06, "loss": 0.0913, "step": 12304 }, { "epoch": 1.9936811406351262, "grad_norm": 0.8323283195495605, "learning_rate": 1.3358991982282055e-06, "loss": 0.1035, "step": 12305 }, { "epoch": 1.9938431626701232, "grad_norm": 0.7226582765579224, "learning_rate": 1.3355122132535806e-06, "loss": 0.0931, "step": 12306 }, { "epoch": 1.99400518470512, "grad_norm": 0.8198063969612122, "learning_rate": 1.3351252639094641e-06, "loss": 0.1003, "step": 12307 }, { "epoch": 1.9941672067401166, "grad_norm": 0.6655625104904175, "learning_rate": 1.3347383502076955e-06, "loss": 0.0806, "step": 12308 }, { "epoch": 1.9943292287751135, "grad_norm": 0.8288505673408508, "learning_rate": 1.3343514721601136e-06, "loss": 0.0963, "step": 12309 }, { "epoch": 1.99449125081011, "grad_norm": 0.7732831835746765, "learning_rate": 1.333964629778556e-06, "loss": 0.1015, "step": 12310 }, { "epoch": 1.994653272845107, "grad_norm": 0.821479320526123, "learning_rate": 1.3335778230748588e-06, "loss": 0.1061, "step": 12311 }, { "epoch": 1.9948152948801037, "grad_norm": 0.8334558010101318, "learning_rate": 1.3331910520608576e-06, "loss": 0.095, "step": 12312 }, { "epoch": 1.9949773169151004, "grad_norm": 0.8023782968521118, "learning_rate": 1.3328043167483868e-06, "loss": 0.1001, "step": 12313 }, { "epoch": 1.9951393389500973, "grad_norm": 0.8799611330032349, "learning_rate": 1.3324176171492798e-06, "loss": 0.1116, "step": 12314 }, { "epoch": 1.9953013609850938, "grad_norm": 0.8682718276977539, "learning_rate": 1.3320309532753667e-06, "loss": 0.1067, "step": 12315 }, { "epoch": 1.9954633830200907, "grad_norm": 0.7998306751251221, "learning_rate": 1.3316443251384808e-06, "loss": 0.0978, "step": 12316 }, { "epoch": 1.9956254050550875, "grad_norm": 1.0022040605545044, "learning_rate": 1.3312577327504522e-06, "loss": 0.1222, "step": 12317 }, { "epoch": 1.9957874270900842, "grad_norm": 0.7971022129058838, "learning_rate": 1.3308711761231074e-06, "loss": 0.0886, "step": 12318 }, { "epoch": 1.995949449125081, "grad_norm": 0.7757930755615234, "learning_rate": 1.3304846552682756e-06, "loss": 0.0984, "step": 12319 }, { "epoch": 1.9961114711600778, "grad_norm": 0.8637643456459045, "learning_rate": 1.3300981701977834e-06, "loss": 0.1074, "step": 12320 }, { "epoch": 1.9962734931950745, "grad_norm": 0.7365697622299194, "learning_rate": 1.3297117209234558e-06, "loss": 0.0892, "step": 12321 }, { "epoch": 1.9964355152300715, "grad_norm": 0.861108660697937, "learning_rate": 1.3293253074571178e-06, "loss": 0.0967, "step": 12322 }, { "epoch": 1.996597537265068, "grad_norm": 0.7773643732070923, "learning_rate": 1.328938929810592e-06, "loss": 0.0964, "step": 12323 }, { "epoch": 1.9967595593000649, "grad_norm": 0.7831589579582214, "learning_rate": 1.3285525879957011e-06, "loss": 0.0884, "step": 12324 }, { "epoch": 1.9969215813350616, "grad_norm": 0.7906063795089722, "learning_rate": 1.3281662820242664e-06, "loss": 0.0979, "step": 12325 }, { "epoch": 1.9970836033700583, "grad_norm": 0.9140375256538391, "learning_rate": 1.3277800119081077e-06, "loss": 0.1065, "step": 12326 }, { "epoch": 1.9972456254050552, "grad_norm": 0.8144938349723816, "learning_rate": 1.327393777659044e-06, "loss": 0.1046, "step": 12327 }, { "epoch": 1.9974076474400517, "grad_norm": 0.795957624912262, "learning_rate": 1.3270075792888937e-06, "loss": 0.0933, "step": 12328 }, { "epoch": 1.9975696694750487, "grad_norm": 0.7858502268791199, "learning_rate": 1.326621416809472e-06, "loss": 0.093, "step": 12329 }, { "epoch": 1.9977316915100454, "grad_norm": 0.7812694907188416, "learning_rate": 1.3262352902325944e-06, "loss": 0.1023, "step": 12330 }, { "epoch": 1.997893713545042, "grad_norm": 0.8662413954734802, "learning_rate": 1.3258491995700777e-06, "loss": 0.1051, "step": 12331 }, { "epoch": 1.998055735580039, "grad_norm": 0.8278566598892212, "learning_rate": 1.325463144833735e-06, "loss": 0.0963, "step": 12332 }, { "epoch": 1.9982177576150355, "grad_norm": 0.7443529963493347, "learning_rate": 1.3250771260353764e-06, "loss": 0.0903, "step": 12333 }, { "epoch": 1.9983797796500324, "grad_norm": 0.7548669576644897, "learning_rate": 1.324691143186814e-06, "loss": 0.0901, "step": 12334 }, { "epoch": 1.9985418016850292, "grad_norm": 0.839152455329895, "learning_rate": 1.3243051962998598e-06, "loss": 0.0943, "step": 12335 }, { "epoch": 1.9987038237200259, "grad_norm": 0.7400187849998474, "learning_rate": 1.323919285386321e-06, "loss": 0.0864, "step": 12336 }, { "epoch": 1.9988658457550228, "grad_norm": 0.7706547379493713, "learning_rate": 1.3235334104580061e-06, "loss": 0.0983, "step": 12337 }, { "epoch": 1.9990278677900193, "grad_norm": 0.8732326626777649, "learning_rate": 1.3231475715267217e-06, "loss": 0.1046, "step": 12338 }, { "epoch": 1.9991898898250162, "grad_norm": 0.7628592848777771, "learning_rate": 1.3227617686042734e-06, "loss": 0.0925, "step": 12339 }, { "epoch": 1.999351911860013, "grad_norm": 0.7879310846328735, "learning_rate": 1.3223760017024661e-06, "loss": 0.1, "step": 12340 }, { "epoch": 1.9995139338950096, "grad_norm": 0.7694489359855652, "learning_rate": 1.321990270833104e-06, "loss": 0.0851, "step": 12341 }, { "epoch": 1.9996759559300066, "grad_norm": 0.7563639283180237, "learning_rate": 1.3216045760079882e-06, "loss": 0.0893, "step": 12342 }, { "epoch": 1.9998379779650033, "grad_norm": 0.8943942189216614, "learning_rate": 1.321218917238922e-06, "loss": 0.1126, "step": 12343 }, { "epoch": 2.0, "grad_norm": 0.822969377040863, "learning_rate": 1.3208332945377022e-06, "loss": 0.1075, "step": 12344 }, { "epoch": 2.000162022034997, "grad_norm": 0.6357743740081787, "learning_rate": 1.3204477079161312e-06, "loss": 0.0723, "step": 12345 }, { "epoch": 2.0003240440699934, "grad_norm": 0.5724396109580994, "learning_rate": 1.3200621573860068e-06, "loss": 0.0654, "step": 12346 }, { "epoch": 2.0004860661049904, "grad_norm": 0.6248465776443481, "learning_rate": 1.319676642959124e-06, "loss": 0.0719, "step": 12347 }, { "epoch": 2.000648088139987, "grad_norm": 0.5666736960411072, "learning_rate": 1.3192911646472796e-06, "loss": 0.0628, "step": 12348 }, { "epoch": 2.000810110174984, "grad_norm": 0.5950966477394104, "learning_rate": 1.3189057224622676e-06, "loss": 0.0634, "step": 12349 }, { "epoch": 2.0009721322099807, "grad_norm": 0.624751091003418, "learning_rate": 1.3185203164158838e-06, "loss": 0.0675, "step": 12350 }, { "epoch": 2.001134154244977, "grad_norm": 0.7292012572288513, "learning_rate": 1.3181349465199184e-06, "loss": 0.0717, "step": 12351 }, { "epoch": 2.001296176279974, "grad_norm": 0.5715608596801758, "learning_rate": 1.3177496127861635e-06, "loss": 0.0591, "step": 12352 }, { "epoch": 2.0014581983149706, "grad_norm": 0.6922410130500793, "learning_rate": 1.31736431522641e-06, "loss": 0.0729, "step": 12353 }, { "epoch": 2.0016202203499676, "grad_norm": 0.5971651077270508, "learning_rate": 1.3169790538524457e-06, "loss": 0.0648, "step": 12354 }, { "epoch": 2.0017822423849645, "grad_norm": 0.6563034057617188, "learning_rate": 1.3165938286760599e-06, "loss": 0.0687, "step": 12355 }, { "epoch": 2.001944264419961, "grad_norm": 0.6092911958694458, "learning_rate": 1.316208639709039e-06, "loss": 0.0641, "step": 12356 }, { "epoch": 2.002106286454958, "grad_norm": 0.5998396873474121, "learning_rate": 1.3158234869631692e-06, "loss": 0.0602, "step": 12357 }, { "epoch": 2.002268308489955, "grad_norm": 0.6295647025108337, "learning_rate": 1.3154383704502349e-06, "loss": 0.0667, "step": 12358 }, { "epoch": 2.0024303305249513, "grad_norm": 0.6843347549438477, "learning_rate": 1.31505329018202e-06, "loss": 0.0644, "step": 12359 }, { "epoch": 2.0025923525599483, "grad_norm": 0.8760754466056824, "learning_rate": 1.3146682461703069e-06, "loss": 0.0674, "step": 12360 }, { "epoch": 2.0027543745949448, "grad_norm": 0.617087721824646, "learning_rate": 1.314283238426878e-06, "loss": 0.0603, "step": 12361 }, { "epoch": 2.0029163966299417, "grad_norm": 0.7366885542869568, "learning_rate": 1.3138982669635117e-06, "loss": 0.0619, "step": 12362 }, { "epoch": 2.0030784186649386, "grad_norm": 0.6823921799659729, "learning_rate": 1.3135133317919868e-06, "loss": 0.0606, "step": 12363 }, { "epoch": 2.003240440699935, "grad_norm": 0.6802733540534973, "learning_rate": 1.313128432924084e-06, "loss": 0.0667, "step": 12364 }, { "epoch": 2.003402462734932, "grad_norm": 0.6357323527336121, "learning_rate": 1.3127435703715802e-06, "loss": 0.0608, "step": 12365 }, { "epoch": 2.0035644847699285, "grad_norm": 0.6992286443710327, "learning_rate": 1.3123587441462487e-06, "loss": 0.0601, "step": 12366 }, { "epoch": 2.0037265068049255, "grad_norm": 0.7034692168235779, "learning_rate": 1.3119739542598655e-06, "loss": 0.0617, "step": 12367 }, { "epoch": 2.0038885288399224, "grad_norm": 0.7643810510635376, "learning_rate": 1.3115892007242046e-06, "loss": 0.0653, "step": 12368 }, { "epoch": 2.004050550874919, "grad_norm": 0.6599617004394531, "learning_rate": 1.3112044835510378e-06, "loss": 0.0582, "step": 12369 }, { "epoch": 2.004212572909916, "grad_norm": 0.7879641652107239, "learning_rate": 1.3108198027521374e-06, "loss": 0.0667, "step": 12370 }, { "epoch": 2.0043745949449123, "grad_norm": 0.7727499008178711, "learning_rate": 1.3104351583392732e-06, "loss": 0.0622, "step": 12371 }, { "epoch": 2.0045366169799093, "grad_norm": 0.7340289354324341, "learning_rate": 1.3100505503242156e-06, "loss": 0.0605, "step": 12372 }, { "epoch": 2.004698639014906, "grad_norm": 0.7155749797821045, "learning_rate": 1.3096659787187294e-06, "loss": 0.062, "step": 12373 }, { "epoch": 2.0048606610499027, "grad_norm": 0.7992011308670044, "learning_rate": 1.3092814435345845e-06, "loss": 0.0655, "step": 12374 }, { "epoch": 2.0050226830848996, "grad_norm": 0.6771997809410095, "learning_rate": 1.3088969447835464e-06, "loss": 0.0567, "step": 12375 }, { "epoch": 2.005184705119896, "grad_norm": 0.7328808307647705, "learning_rate": 1.3085124824773797e-06, "loss": 0.0597, "step": 12376 }, { "epoch": 2.005346727154893, "grad_norm": 0.7776128649711609, "learning_rate": 1.3081280566278464e-06, "loss": 0.0617, "step": 12377 }, { "epoch": 2.00550874918989, "grad_norm": 0.8074184656143188, "learning_rate": 1.307743667246711e-06, "loss": 0.0738, "step": 12378 }, { "epoch": 2.0056707712248865, "grad_norm": 0.7445822358131409, "learning_rate": 1.3073593143457353e-06, "loss": 0.0609, "step": 12379 }, { "epoch": 2.0058327932598834, "grad_norm": 0.811712920665741, "learning_rate": 1.306974997936677e-06, "loss": 0.0675, "step": 12380 }, { "epoch": 2.00599481529488, "grad_norm": 0.8170737624168396, "learning_rate": 1.306590718031297e-06, "loss": 0.0713, "step": 12381 }, { "epoch": 2.006156837329877, "grad_norm": 0.6781127452850342, "learning_rate": 1.3062064746413522e-06, "loss": 0.0607, "step": 12382 }, { "epoch": 2.0063188593648738, "grad_norm": 0.8460241556167603, "learning_rate": 1.305822267778602e-06, "loss": 0.0651, "step": 12383 }, { "epoch": 2.0064808813998702, "grad_norm": 0.7995967864990234, "learning_rate": 1.3054380974547998e-06, "loss": 0.0606, "step": 12384 }, { "epoch": 2.006642903434867, "grad_norm": 0.8841012120246887, "learning_rate": 1.3050539636817012e-06, "loss": 0.0734, "step": 12385 }, { "epoch": 2.006804925469864, "grad_norm": 0.8261283040046692, "learning_rate": 1.3046698664710595e-06, "loss": 0.0605, "step": 12386 }, { "epoch": 2.0069669475048606, "grad_norm": 0.6940838098526001, "learning_rate": 1.304285805834627e-06, "loss": 0.0601, "step": 12387 }, { "epoch": 2.0071289695398575, "grad_norm": 0.8337867856025696, "learning_rate": 1.3039017817841553e-06, "loss": 0.0617, "step": 12388 }, { "epoch": 2.007290991574854, "grad_norm": 0.7418117523193359, "learning_rate": 1.3035177943313947e-06, "loss": 0.0594, "step": 12389 }, { "epoch": 2.007453013609851, "grad_norm": 0.853374183177948, "learning_rate": 1.3031338434880952e-06, "loss": 0.0681, "step": 12390 }, { "epoch": 2.007615035644848, "grad_norm": 0.8610039949417114, "learning_rate": 1.3027499292660022e-06, "loss": 0.0707, "step": 12391 }, { "epoch": 2.0077770576798444, "grad_norm": 0.8662420511245728, "learning_rate": 1.3023660516768638e-06, "loss": 0.0693, "step": 12392 }, { "epoch": 2.0079390797148413, "grad_norm": 0.8422592282295227, "learning_rate": 1.3019822107324267e-06, "loss": 0.0759, "step": 12393 }, { "epoch": 2.008101101749838, "grad_norm": 0.8292291760444641, "learning_rate": 1.301598406444436e-06, "loss": 0.0707, "step": 12394 }, { "epoch": 2.0082631237848347, "grad_norm": 0.7821356058120728, "learning_rate": 1.3012146388246328e-06, "loss": 0.0616, "step": 12395 }, { "epoch": 2.0084251458198317, "grad_norm": 0.8509025573730469, "learning_rate": 1.3008309078847605e-06, "loss": 0.0696, "step": 12396 }, { "epoch": 2.008587167854828, "grad_norm": 0.8189096450805664, "learning_rate": 1.3004472136365609e-06, "loss": 0.0658, "step": 12397 }, { "epoch": 2.008749189889825, "grad_norm": 0.7005947828292847, "learning_rate": 1.3000635560917735e-06, "loss": 0.0585, "step": 12398 }, { "epoch": 2.0089112119248216, "grad_norm": 0.749654233455658, "learning_rate": 1.2996799352621372e-06, "loss": 0.0564, "step": 12399 }, { "epoch": 2.0090732339598185, "grad_norm": 0.8583326935768127, "learning_rate": 1.2992963511593904e-06, "loss": 0.0669, "step": 12400 }, { "epoch": 2.0092352559948155, "grad_norm": 0.834697425365448, "learning_rate": 1.2989128037952698e-06, "loss": 0.0709, "step": 12401 }, { "epoch": 2.009397278029812, "grad_norm": 0.7957351207733154, "learning_rate": 1.2985292931815105e-06, "loss": 0.063, "step": 12402 }, { "epoch": 2.009559300064809, "grad_norm": 0.7907761335372925, "learning_rate": 1.2981458193298473e-06, "loss": 0.0607, "step": 12403 }, { "epoch": 2.0097213220998054, "grad_norm": 0.813335120677948, "learning_rate": 1.2977623822520141e-06, "loss": 0.0622, "step": 12404 }, { "epoch": 2.0098833441348023, "grad_norm": 0.9492120146751404, "learning_rate": 1.2973789819597431e-06, "loss": 0.0669, "step": 12405 }, { "epoch": 2.0100453661697992, "grad_norm": 0.8153406381607056, "learning_rate": 1.296995618464763e-06, "loss": 0.0651, "step": 12406 }, { "epoch": 2.0102073882047957, "grad_norm": 0.7369259595870972, "learning_rate": 1.296612291778807e-06, "loss": 0.058, "step": 12407 }, { "epoch": 2.0103694102397927, "grad_norm": 0.7126782536506653, "learning_rate": 1.2962290019136028e-06, "loss": 0.0606, "step": 12408 }, { "epoch": 2.0105314322747896, "grad_norm": 0.7159781455993652, "learning_rate": 1.295845748880879e-06, "loss": 0.0635, "step": 12409 }, { "epoch": 2.010693454309786, "grad_norm": 0.7070374488830566, "learning_rate": 1.2954625326923602e-06, "loss": 0.0552, "step": 12410 }, { "epoch": 2.010855476344783, "grad_norm": 0.7887537479400635, "learning_rate": 1.2950793533597722e-06, "loss": 0.0673, "step": 12411 }, { "epoch": 2.0110174983797795, "grad_norm": 0.7388550639152527, "learning_rate": 1.294696210894842e-06, "loss": 0.0542, "step": 12412 }, { "epoch": 2.0111795204147764, "grad_norm": 0.8547154068946838, "learning_rate": 1.2943131053092895e-06, "loss": 0.0693, "step": 12413 }, { "epoch": 2.0113415424497734, "grad_norm": 0.7168740034103394, "learning_rate": 1.2939300366148389e-06, "loss": 0.0591, "step": 12414 }, { "epoch": 2.01150356448477, "grad_norm": 0.717652440071106, "learning_rate": 1.2935470048232102e-06, "loss": 0.0568, "step": 12415 }, { "epoch": 2.011665586519767, "grad_norm": 0.8575769066810608, "learning_rate": 1.2931640099461237e-06, "loss": 0.0617, "step": 12416 }, { "epoch": 2.0118276085547633, "grad_norm": 0.862146258354187, "learning_rate": 1.292781051995298e-06, "loss": 0.0645, "step": 12417 }, { "epoch": 2.01198963058976, "grad_norm": 0.8514499068260193, "learning_rate": 1.2923981309824507e-06, "loss": 0.0685, "step": 12418 }, { "epoch": 2.012151652624757, "grad_norm": 0.8388655781745911, "learning_rate": 1.292015246919298e-06, "loss": 0.0724, "step": 12419 }, { "epoch": 2.0123136746597536, "grad_norm": 0.8496232032775879, "learning_rate": 1.291632399817557e-06, "loss": 0.0593, "step": 12420 }, { "epoch": 2.0124756966947506, "grad_norm": 0.7816452383995056, "learning_rate": 1.2912495896889383e-06, "loss": 0.0612, "step": 12421 }, { "epoch": 2.012637718729747, "grad_norm": 0.808066189289093, "learning_rate": 1.2908668165451577e-06, "loss": 0.0599, "step": 12422 }, { "epoch": 2.012799740764744, "grad_norm": 0.8434270024299622, "learning_rate": 1.2904840803979276e-06, "loss": 0.0663, "step": 12423 }, { "epoch": 2.012961762799741, "grad_norm": 0.8326642513275146, "learning_rate": 1.290101381258957e-06, "loss": 0.0616, "step": 12424 }, { "epoch": 2.0131237848347374, "grad_norm": 0.971601128578186, "learning_rate": 1.2897187191399546e-06, "loss": 0.0676, "step": 12425 }, { "epoch": 2.0132858068697344, "grad_norm": 0.8341547250747681, "learning_rate": 1.289336094052632e-06, "loss": 0.0643, "step": 12426 }, { "epoch": 2.013447828904731, "grad_norm": 0.917320728302002, "learning_rate": 1.288953506008696e-06, "loss": 0.0688, "step": 12427 }, { "epoch": 2.0136098509397278, "grad_norm": 0.8390260934829712, "learning_rate": 1.288570955019851e-06, "loss": 0.0646, "step": 12428 }, { "epoch": 2.0137718729747247, "grad_norm": 0.9914124608039856, "learning_rate": 1.2881884410978034e-06, "loss": 0.0705, "step": 12429 }, { "epoch": 2.013933895009721, "grad_norm": 0.8766891956329346, "learning_rate": 1.2878059642542566e-06, "loss": 0.0725, "step": 12430 }, { "epoch": 2.014095917044718, "grad_norm": 0.7540597915649414, "learning_rate": 1.2874235245009143e-06, "loss": 0.0608, "step": 12431 }, { "epoch": 2.0142579390797146, "grad_norm": 0.8450373411178589, "learning_rate": 1.2870411218494778e-06, "loss": 0.0668, "step": 12432 }, { "epoch": 2.0144199611147116, "grad_norm": 0.743740975856781, "learning_rate": 1.2866587563116473e-06, "loss": 0.058, "step": 12433 }, { "epoch": 2.0145819831497085, "grad_norm": 0.7940791845321655, "learning_rate": 1.2862764278991236e-06, "loss": 0.0618, "step": 12434 }, { "epoch": 2.014744005184705, "grad_norm": 0.8542442321777344, "learning_rate": 1.2858941366236021e-06, "loss": 0.0641, "step": 12435 }, { "epoch": 2.014906027219702, "grad_norm": 0.8491990566253662, "learning_rate": 1.2855118824967833e-06, "loss": 0.0603, "step": 12436 }, { "epoch": 2.015068049254699, "grad_norm": 1.0003999471664429, "learning_rate": 1.2851296655303616e-06, "loss": 0.064, "step": 12437 }, { "epoch": 2.0152300712896953, "grad_norm": 0.8259167671203613, "learning_rate": 1.2847474857360332e-06, "loss": 0.0616, "step": 12438 }, { "epoch": 2.0153920933246923, "grad_norm": 0.9689138531684875, "learning_rate": 1.28436534312549e-06, "loss": 0.0662, "step": 12439 }, { "epoch": 2.0155541153596888, "grad_norm": 0.8320235013961792, "learning_rate": 1.2839832377104245e-06, "loss": 0.0579, "step": 12440 }, { "epoch": 2.0157161373946857, "grad_norm": 0.8748543858528137, "learning_rate": 1.283601169502531e-06, "loss": 0.0762, "step": 12441 }, { "epoch": 2.0158781594296826, "grad_norm": 0.920680820941925, "learning_rate": 1.2832191385134972e-06, "loss": 0.0721, "step": 12442 }, { "epoch": 2.016040181464679, "grad_norm": 0.6789097785949707, "learning_rate": 1.2828371447550133e-06, "loss": 0.0546, "step": 12443 }, { "epoch": 2.016202203499676, "grad_norm": 0.8041635155677795, "learning_rate": 1.2824551882387664e-06, "loss": 0.0615, "step": 12444 }, { "epoch": 2.0163642255346725, "grad_norm": 0.8807735443115234, "learning_rate": 1.2820732689764462e-06, "loss": 0.0688, "step": 12445 }, { "epoch": 2.0165262475696695, "grad_norm": 0.7847744822502136, "learning_rate": 1.2816913869797353e-06, "loss": 0.0639, "step": 12446 }, { "epoch": 2.0166882696046664, "grad_norm": 0.9476509094238281, "learning_rate": 1.2813095422603203e-06, "loss": 0.0652, "step": 12447 }, { "epoch": 2.016850291639663, "grad_norm": 0.7772141695022583, "learning_rate": 1.2809277348298838e-06, "loss": 0.0583, "step": 12448 }, { "epoch": 2.01701231367466, "grad_norm": 0.8144988417625427, "learning_rate": 1.2805459647001087e-06, "loss": 0.0634, "step": 12449 }, { "epoch": 2.0171743357096563, "grad_norm": 0.9111658334732056, "learning_rate": 1.2801642318826759e-06, "loss": 0.0599, "step": 12450 }, { "epoch": 2.0173363577446533, "grad_norm": 0.9419159889221191, "learning_rate": 1.2797825363892658e-06, "loss": 0.0759, "step": 12451 }, { "epoch": 2.01749837977965, "grad_norm": 0.9087123870849609, "learning_rate": 1.279400878231557e-06, "loss": 0.0676, "step": 12452 }, { "epoch": 2.0176604018146467, "grad_norm": 0.7883154153823853, "learning_rate": 1.2790192574212287e-06, "loss": 0.0585, "step": 12453 }, { "epoch": 2.0178224238496436, "grad_norm": 0.7743737101554871, "learning_rate": 1.2786376739699547e-06, "loss": 0.0643, "step": 12454 }, { "epoch": 2.01798444588464, "grad_norm": 0.8277117013931274, "learning_rate": 1.2782561278894126e-06, "loss": 0.069, "step": 12455 }, { "epoch": 2.018146467919637, "grad_norm": 0.7758954763412476, "learning_rate": 1.2778746191912778e-06, "loss": 0.0629, "step": 12456 }, { "epoch": 2.018308489954634, "grad_norm": 0.7998802065849304, "learning_rate": 1.277493147887221e-06, "loss": 0.0572, "step": 12457 }, { "epoch": 2.0184705119896305, "grad_norm": 0.7002156972885132, "learning_rate": 1.2771117139889155e-06, "loss": 0.0612, "step": 12458 }, { "epoch": 2.0186325340246274, "grad_norm": 0.7348694801330566, "learning_rate": 1.2767303175080325e-06, "loss": 0.0593, "step": 12459 }, { "epoch": 2.0187945560596243, "grad_norm": 0.7863420248031616, "learning_rate": 1.276348958456241e-06, "loss": 0.0638, "step": 12460 }, { "epoch": 2.018956578094621, "grad_norm": 0.8472959995269775, "learning_rate": 1.2759676368452106e-06, "loss": 0.0664, "step": 12461 }, { "epoch": 2.0191186001296177, "grad_norm": 0.7812498807907104, "learning_rate": 1.2755863526866087e-06, "loss": 0.0568, "step": 12462 }, { "epoch": 2.0192806221646142, "grad_norm": 0.85575932264328, "learning_rate": 1.2752051059921005e-06, "loss": 0.0662, "step": 12463 }, { "epoch": 2.019442644199611, "grad_norm": 0.9001664519309998, "learning_rate": 1.2748238967733529e-06, "loss": 0.0682, "step": 12464 }, { "epoch": 2.019604666234608, "grad_norm": 0.948502779006958, "learning_rate": 1.2744427250420288e-06, "loss": 0.0695, "step": 12465 }, { "epoch": 2.0197666882696046, "grad_norm": 0.8993748426437378, "learning_rate": 1.2740615908097915e-06, "loss": 0.0641, "step": 12466 }, { "epoch": 2.0199287103046015, "grad_norm": 0.8371912837028503, "learning_rate": 1.273680494088304e-06, "loss": 0.064, "step": 12467 }, { "epoch": 2.020090732339598, "grad_norm": 1.5723109245300293, "learning_rate": 1.2732994348892237e-06, "loss": 0.0644, "step": 12468 }, { "epoch": 2.020252754374595, "grad_norm": 0.9138685464859009, "learning_rate": 1.2729184132242131e-06, "loss": 0.0678, "step": 12469 }, { "epoch": 2.020414776409592, "grad_norm": 0.8680717349052429, "learning_rate": 1.2725374291049296e-06, "loss": 0.0615, "step": 12470 }, { "epoch": 2.0205767984445884, "grad_norm": 1.0072784423828125, "learning_rate": 1.2721564825430313e-06, "loss": 0.0675, "step": 12471 }, { "epoch": 2.0207388204795853, "grad_norm": 0.8381701707839966, "learning_rate": 1.2717755735501725e-06, "loss": 0.0593, "step": 12472 }, { "epoch": 2.020900842514582, "grad_norm": 0.8638261556625366, "learning_rate": 1.2713947021380078e-06, "loss": 0.0674, "step": 12473 }, { "epoch": 2.0210628645495787, "grad_norm": 0.8350870609283447, "learning_rate": 1.2710138683181937e-06, "loss": 0.0686, "step": 12474 }, { "epoch": 2.0212248865845757, "grad_norm": 0.8235458731651306, "learning_rate": 1.2706330721023807e-06, "loss": 0.0662, "step": 12475 }, { "epoch": 2.021386908619572, "grad_norm": 0.7439950704574585, "learning_rate": 1.2702523135022205e-06, "loss": 0.0597, "step": 12476 }, { "epoch": 2.021548930654569, "grad_norm": 0.7728898525238037, "learning_rate": 1.2698715925293634e-06, "loss": 0.0563, "step": 12477 }, { "epoch": 2.0217109526895656, "grad_norm": 0.7948071956634521, "learning_rate": 1.2694909091954588e-06, "loss": 0.0595, "step": 12478 }, { "epoch": 2.0218729747245625, "grad_norm": 0.8696475625038147, "learning_rate": 1.2691102635121544e-06, "loss": 0.0629, "step": 12479 }, { "epoch": 2.0220349967595594, "grad_norm": 0.869878888130188, "learning_rate": 1.2687296554910978e-06, "loss": 0.0731, "step": 12480 }, { "epoch": 2.022197018794556, "grad_norm": 0.812859833240509, "learning_rate": 1.2683490851439334e-06, "loss": 0.0624, "step": 12481 }, { "epoch": 2.022359040829553, "grad_norm": 0.8346046209335327, "learning_rate": 1.2679685524823082e-06, "loss": 0.0628, "step": 12482 }, { "epoch": 2.0225210628645494, "grad_norm": 0.808914065361023, "learning_rate": 1.2675880575178613e-06, "loss": 0.0587, "step": 12483 }, { "epoch": 2.0226830848995463, "grad_norm": 0.7890689969062805, "learning_rate": 1.2672076002622386e-06, "loss": 0.0623, "step": 12484 }, { "epoch": 2.0228451069345432, "grad_norm": 0.7109611630439758, "learning_rate": 1.266827180727081e-06, "loss": 0.053, "step": 12485 }, { "epoch": 2.0230071289695397, "grad_norm": 0.9208827018737793, "learning_rate": 1.2664467989240265e-06, "loss": 0.0608, "step": 12486 }, { "epoch": 2.0231691510045366, "grad_norm": 1.064787745475769, "learning_rate": 1.266066454864715e-06, "loss": 0.0668, "step": 12487 }, { "epoch": 2.0233311730395336, "grad_norm": 0.8241201639175415, "learning_rate": 1.2656861485607828e-06, "loss": 0.06, "step": 12488 }, { "epoch": 2.02349319507453, "grad_norm": 0.8332204222679138, "learning_rate": 1.2653058800238693e-06, "loss": 0.0669, "step": 12489 }, { "epoch": 2.023655217109527, "grad_norm": 0.8376948833465576, "learning_rate": 1.264925649265607e-06, "loss": 0.0604, "step": 12490 }, { "epoch": 2.0238172391445235, "grad_norm": 0.971579372882843, "learning_rate": 1.2645454562976311e-06, "loss": 0.0593, "step": 12491 }, { "epoch": 2.0239792611795204, "grad_norm": 1.004150390625, "learning_rate": 1.2641653011315746e-06, "loss": 0.0687, "step": 12492 }, { "epoch": 2.0241412832145174, "grad_norm": 0.7998213768005371, "learning_rate": 1.2637851837790694e-06, "loss": 0.0622, "step": 12493 }, { "epoch": 2.024303305249514, "grad_norm": 1.0076199769973755, "learning_rate": 1.2634051042517453e-06, "loss": 0.064, "step": 12494 }, { "epoch": 2.024465327284511, "grad_norm": 0.8546391725540161, "learning_rate": 1.2630250625612331e-06, "loss": 0.0648, "step": 12495 }, { "epoch": 2.0246273493195073, "grad_norm": 0.9467459917068481, "learning_rate": 1.2626450587191602e-06, "loss": 0.0631, "step": 12496 }, { "epoch": 2.024789371354504, "grad_norm": 0.8554684519767761, "learning_rate": 1.2622650927371543e-06, "loss": 0.0655, "step": 12497 }, { "epoch": 2.024951393389501, "grad_norm": 0.8624327182769775, "learning_rate": 1.2618851646268416e-06, "loss": 0.0617, "step": 12498 }, { "epoch": 2.0251134154244976, "grad_norm": 0.9098758101463318, "learning_rate": 1.2615052743998463e-06, "loss": 0.066, "step": 12499 }, { "epoch": 2.0252754374594946, "grad_norm": 0.8350497484207153, "learning_rate": 1.2611254220677937e-06, "loss": 0.064, "step": 12500 }, { "epoch": 2.025437459494491, "grad_norm": 0.877099871635437, "learning_rate": 1.260745607642304e-06, "loss": 0.0547, "step": 12501 }, { "epoch": 2.025599481529488, "grad_norm": 0.7435240149497986, "learning_rate": 1.260365831134999e-06, "loss": 0.052, "step": 12502 }, { "epoch": 2.025761503564485, "grad_norm": 0.7691801190376282, "learning_rate": 1.2599860925575014e-06, "loss": 0.0535, "step": 12503 }, { "epoch": 2.0259235255994814, "grad_norm": 0.7755981087684631, "learning_rate": 1.259606391921428e-06, "loss": 0.0574, "step": 12504 }, { "epoch": 2.0260855476344783, "grad_norm": 1.0379624366760254, "learning_rate": 1.259226729238397e-06, "loss": 0.0631, "step": 12505 }, { "epoch": 2.026247569669475, "grad_norm": 0.8337475657463074, "learning_rate": 1.2588471045200256e-06, "loss": 0.0557, "step": 12506 }, { "epoch": 2.0264095917044718, "grad_norm": 0.8900526165962219, "learning_rate": 1.2584675177779294e-06, "loss": 0.0599, "step": 12507 }, { "epoch": 2.0265716137394687, "grad_norm": 0.7923566102981567, "learning_rate": 1.2580879690237224e-06, "loss": 0.0552, "step": 12508 }, { "epoch": 2.026733635774465, "grad_norm": 0.8455385565757751, "learning_rate": 1.257708458269018e-06, "loss": 0.0609, "step": 12509 }, { "epoch": 2.026895657809462, "grad_norm": 0.8646577000617981, "learning_rate": 1.257328985525429e-06, "loss": 0.068, "step": 12510 }, { "epoch": 2.027057679844459, "grad_norm": 0.8488696217536926, "learning_rate": 1.2569495508045656e-06, "loss": 0.0622, "step": 12511 }, { "epoch": 2.0272197018794555, "grad_norm": 0.9858669638633728, "learning_rate": 1.256570154118038e-06, "loss": 0.0687, "step": 12512 }, { "epoch": 2.0273817239144525, "grad_norm": 0.9625844359397888, "learning_rate": 1.2561907954774544e-06, "loss": 0.0651, "step": 12513 }, { "epoch": 2.027543745949449, "grad_norm": 0.9064575433731079, "learning_rate": 1.2558114748944226e-06, "loss": 0.0618, "step": 12514 }, { "epoch": 2.027705767984446, "grad_norm": 0.8604201674461365, "learning_rate": 1.2554321923805496e-06, "loss": 0.0648, "step": 12515 }, { "epoch": 2.027867790019443, "grad_norm": 1.1641322374343872, "learning_rate": 1.2550529479474383e-06, "loss": 0.0645, "step": 12516 }, { "epoch": 2.0280298120544393, "grad_norm": 0.8054379224777222, "learning_rate": 1.2546737416066945e-06, "loss": 0.0605, "step": 12517 }, { "epoch": 2.0281918340894363, "grad_norm": 0.9893051385879517, "learning_rate": 1.2542945733699216e-06, "loss": 0.0739, "step": 12518 }, { "epoch": 2.0283538561244328, "grad_norm": 0.8926251530647278, "learning_rate": 1.2539154432487193e-06, "loss": 0.0588, "step": 12519 }, { "epoch": 2.0285158781594297, "grad_norm": 0.8101766705513, "learning_rate": 1.2535363512546892e-06, "loss": 0.0602, "step": 12520 }, { "epoch": 2.0286779001944266, "grad_norm": 0.9098559021949768, "learning_rate": 1.2531572973994293e-06, "loss": 0.0676, "step": 12521 }, { "epoch": 2.028839922229423, "grad_norm": 1.1210435628890991, "learning_rate": 1.2527782816945405e-06, "loss": 0.0665, "step": 12522 }, { "epoch": 2.02900194426442, "grad_norm": 0.73442542552948, "learning_rate": 1.2523993041516175e-06, "loss": 0.045, "step": 12523 }, { "epoch": 2.0291639662994165, "grad_norm": 0.8347865343093872, "learning_rate": 1.2520203647822563e-06, "loss": 0.0643, "step": 12524 }, { "epoch": 2.0293259883344135, "grad_norm": 0.9335924983024597, "learning_rate": 1.2516414635980518e-06, "loss": 0.0657, "step": 12525 }, { "epoch": 2.0294880103694104, "grad_norm": 0.845848023891449, "learning_rate": 1.2512626006105977e-06, "loss": 0.0618, "step": 12526 }, { "epoch": 2.029650032404407, "grad_norm": 0.8612715601921082, "learning_rate": 1.2508837758314862e-06, "loss": 0.0725, "step": 12527 }, { "epoch": 2.029812054439404, "grad_norm": 0.936465322971344, "learning_rate": 1.2505049892723083e-06, "loss": 0.0711, "step": 12528 }, { "epoch": 2.0299740764744003, "grad_norm": 0.8308136463165283, "learning_rate": 1.2501262409446552e-06, "loss": 0.0601, "step": 12529 }, { "epoch": 2.0301360985093972, "grad_norm": 0.8098605871200562, "learning_rate": 1.2497475308601134e-06, "loss": 0.0643, "step": 12530 }, { "epoch": 2.030298120544394, "grad_norm": 0.932350754737854, "learning_rate": 1.2493688590302705e-06, "loss": 0.0697, "step": 12531 }, { "epoch": 2.0304601425793907, "grad_norm": 0.8417340517044067, "learning_rate": 1.248990225466715e-06, "loss": 0.0661, "step": 12532 }, { "epoch": 2.0306221646143876, "grad_norm": 0.7914611101150513, "learning_rate": 1.2486116301810322e-06, "loss": 0.0647, "step": 12533 }, { "epoch": 2.0307841866493845, "grad_norm": 1.3596220016479492, "learning_rate": 1.2482330731848044e-06, "loss": 0.0616, "step": 12534 }, { "epoch": 2.030946208684381, "grad_norm": 1.0075905323028564, "learning_rate": 1.2478545544896143e-06, "loss": 0.0648, "step": 12535 }, { "epoch": 2.031108230719378, "grad_norm": 0.8666545748710632, "learning_rate": 1.2474760741070465e-06, "loss": 0.0621, "step": 12536 }, { "epoch": 2.0312702527543745, "grad_norm": 0.8424669504165649, "learning_rate": 1.2470976320486792e-06, "loss": 0.0616, "step": 12537 }, { "epoch": 2.0314322747893714, "grad_norm": 0.858910083770752, "learning_rate": 1.246719228326092e-06, "loss": 0.0631, "step": 12538 }, { "epoch": 2.0315942968243683, "grad_norm": 0.9575031995773315, "learning_rate": 1.2463408629508635e-06, "loss": 0.0641, "step": 12539 }, { "epoch": 2.031756318859365, "grad_norm": 1.0462077856063843, "learning_rate": 1.2459625359345712e-06, "loss": 0.0718, "step": 12540 }, { "epoch": 2.0319183408943617, "grad_norm": 0.9476367831230164, "learning_rate": 1.2455842472887903e-06, "loss": 0.0599, "step": 12541 }, { "epoch": 2.0320803629293582, "grad_norm": 1.0100579261779785, "learning_rate": 1.2452059970250957e-06, "loss": 0.0597, "step": 12542 }, { "epoch": 2.032242384964355, "grad_norm": 0.8134729266166687, "learning_rate": 1.2448277851550613e-06, "loss": 0.0586, "step": 12543 }, { "epoch": 2.032404406999352, "grad_norm": 0.9285221695899963, "learning_rate": 1.2444496116902602e-06, "loss": 0.0693, "step": 12544 }, { "epoch": 2.0325664290343486, "grad_norm": 1.1232776641845703, "learning_rate": 1.2440714766422604e-06, "loss": 0.0791, "step": 12545 }, { "epoch": 2.0327284510693455, "grad_norm": 0.7684057950973511, "learning_rate": 1.2436933800226352e-06, "loss": 0.0565, "step": 12546 }, { "epoch": 2.032890473104342, "grad_norm": 0.7600722908973694, "learning_rate": 1.2433153218429526e-06, "loss": 0.0612, "step": 12547 }, { "epoch": 2.033052495139339, "grad_norm": 0.8198429942131042, "learning_rate": 1.2429373021147808e-06, "loss": 0.0616, "step": 12548 }, { "epoch": 2.033214517174336, "grad_norm": 0.9691295027732849, "learning_rate": 1.2425593208496844e-06, "loss": 0.0707, "step": 12549 }, { "epoch": 2.0333765392093324, "grad_norm": 0.7660728693008423, "learning_rate": 1.2421813780592294e-06, "loss": 0.0643, "step": 12550 }, { "epoch": 2.0335385612443293, "grad_norm": 0.9578198194503784, "learning_rate": 1.2418034737549818e-06, "loss": 0.0708, "step": 12551 }, { "epoch": 2.033700583279326, "grad_norm": 0.7433199882507324, "learning_rate": 1.2414256079485021e-06, "loss": 0.0557, "step": 12552 }, { "epoch": 2.0338626053143227, "grad_norm": 0.862591028213501, "learning_rate": 1.2410477806513535e-06, "loss": 0.0666, "step": 12553 }, { "epoch": 2.0340246273493197, "grad_norm": 0.8760819435119629, "learning_rate": 1.240669991875096e-06, "loss": 0.0661, "step": 12554 }, { "epoch": 2.034186649384316, "grad_norm": 0.8833930492401123, "learning_rate": 1.2402922416312891e-06, "loss": 0.0626, "step": 12555 }, { "epoch": 2.034348671419313, "grad_norm": 0.8621806502342224, "learning_rate": 1.2399145299314913e-06, "loss": 0.0646, "step": 12556 }, { "epoch": 2.0345106934543096, "grad_norm": 0.8710420727729797, "learning_rate": 1.2395368567872596e-06, "loss": 0.0686, "step": 12557 }, { "epoch": 2.0346727154893065, "grad_norm": 0.935194194316864, "learning_rate": 1.2391592222101497e-06, "loss": 0.0659, "step": 12558 }, { "epoch": 2.0348347375243034, "grad_norm": 0.9124763011932373, "learning_rate": 1.2387816262117167e-06, "loss": 0.0611, "step": 12559 }, { "epoch": 2.0349967595593, "grad_norm": 0.861625075340271, "learning_rate": 1.2384040688035135e-06, "loss": 0.0576, "step": 12560 }, { "epoch": 2.035158781594297, "grad_norm": 0.9553015232086182, "learning_rate": 1.2380265499970932e-06, "loss": 0.0698, "step": 12561 }, { "epoch": 2.035320803629294, "grad_norm": 0.7892826795578003, "learning_rate": 1.2376490698040069e-06, "loss": 0.0561, "step": 12562 }, { "epoch": 2.0354828256642903, "grad_norm": 0.7279547452926636, "learning_rate": 1.2372716282358038e-06, "loss": 0.0527, "step": 12563 }, { "epoch": 2.035644847699287, "grad_norm": 0.7964885234832764, "learning_rate": 1.236894225304032e-06, "loss": 0.0593, "step": 12564 }, { "epoch": 2.0358068697342837, "grad_norm": 0.7441739439964294, "learning_rate": 1.2365168610202411e-06, "loss": 0.0557, "step": 12565 }, { "epoch": 2.0359688917692806, "grad_norm": 1.0166600942611694, "learning_rate": 1.2361395353959776e-06, "loss": 0.064, "step": 12566 }, { "epoch": 2.0361309138042776, "grad_norm": 0.8261235356330872, "learning_rate": 1.2357622484427854e-06, "loss": 0.0591, "step": 12567 }, { "epoch": 2.036292935839274, "grad_norm": 0.8217520117759705, "learning_rate": 1.2353850001722084e-06, "loss": 0.06, "step": 12568 }, { "epoch": 2.036454957874271, "grad_norm": 0.887876570224762, "learning_rate": 1.2350077905957902e-06, "loss": 0.0678, "step": 12569 }, { "epoch": 2.0366169799092675, "grad_norm": 0.8636623024940491, "learning_rate": 1.2346306197250727e-06, "loss": 0.0623, "step": 12570 }, { "epoch": 2.0367790019442644, "grad_norm": 0.9496206045150757, "learning_rate": 1.2342534875715958e-06, "loss": 0.0559, "step": 12571 }, { "epoch": 2.0369410239792614, "grad_norm": 0.827489972114563, "learning_rate": 1.2338763941468993e-06, "loss": 0.0609, "step": 12572 }, { "epoch": 2.037103046014258, "grad_norm": 0.8658286929130554, "learning_rate": 1.2334993394625219e-06, "loss": 0.0712, "step": 12573 }, { "epoch": 2.037265068049255, "grad_norm": 0.8474152088165283, "learning_rate": 1.2331223235299983e-06, "loss": 0.0627, "step": 12574 }, { "epoch": 2.0374270900842513, "grad_norm": 0.8493207693099976, "learning_rate": 1.2327453463608663e-06, "loss": 0.0655, "step": 12575 }, { "epoch": 2.037589112119248, "grad_norm": 0.9206482172012329, "learning_rate": 1.2323684079666604e-06, "loss": 0.0604, "step": 12576 }, { "epoch": 2.037751134154245, "grad_norm": 1.0142006874084473, "learning_rate": 1.2319915083589143e-06, "loss": 0.0613, "step": 12577 }, { "epoch": 2.0379131561892416, "grad_norm": 0.7615647315979004, "learning_rate": 1.2316146475491578e-06, "loss": 0.059, "step": 12578 }, { "epoch": 2.0380751782242386, "grad_norm": 0.7730655670166016, "learning_rate": 1.2312378255489246e-06, "loss": 0.0621, "step": 12579 }, { "epoch": 2.038237200259235, "grad_norm": 0.788704514503479, "learning_rate": 1.2308610423697446e-06, "loss": 0.0553, "step": 12580 }, { "epoch": 2.038399222294232, "grad_norm": 0.8466129302978516, "learning_rate": 1.2304842980231442e-06, "loss": 0.0635, "step": 12581 }, { "epoch": 2.038561244329229, "grad_norm": 0.8458765745162964, "learning_rate": 1.2301075925206524e-06, "loss": 0.0605, "step": 12582 }, { "epoch": 2.0387232663642254, "grad_norm": 0.8162645101547241, "learning_rate": 1.229730925873794e-06, "loss": 0.0538, "step": 12583 }, { "epoch": 2.0388852883992223, "grad_norm": 0.9227370023727417, "learning_rate": 1.2293542980940974e-06, "loss": 0.0655, "step": 12584 }, { "epoch": 2.039047310434219, "grad_norm": 0.759735643863678, "learning_rate": 1.2289777091930832e-06, "loss": 0.0537, "step": 12585 }, { "epoch": 2.0392093324692158, "grad_norm": 0.8518356680870056, "learning_rate": 1.2286011591822756e-06, "loss": 0.0646, "step": 12586 }, { "epoch": 2.0393713545042127, "grad_norm": 0.9240495562553406, "learning_rate": 1.2282246480731955e-06, "loss": 0.0714, "step": 12587 }, { "epoch": 2.039533376539209, "grad_norm": 0.8255005478858948, "learning_rate": 1.2278481758773636e-06, "loss": 0.0656, "step": 12588 }, { "epoch": 2.039695398574206, "grad_norm": 0.8890777826309204, "learning_rate": 1.227471742606299e-06, "loss": 0.0655, "step": 12589 }, { "epoch": 2.039857420609203, "grad_norm": 0.9725179076194763, "learning_rate": 1.2270953482715197e-06, "loss": 0.0681, "step": 12590 }, { "epoch": 2.0400194426441995, "grad_norm": 0.9976947903633118, "learning_rate": 1.2267189928845424e-06, "loss": 0.0639, "step": 12591 }, { "epoch": 2.0401814646791965, "grad_norm": 0.8435698747634888, "learning_rate": 1.2263426764568835e-06, "loss": 0.0615, "step": 12592 }, { "epoch": 2.040343486714193, "grad_norm": 1.729702353477478, "learning_rate": 1.2259663990000544e-06, "loss": 0.0608, "step": 12593 }, { "epoch": 2.04050550874919, "grad_norm": 0.8571469783782959, "learning_rate": 1.2255901605255715e-06, "loss": 0.0673, "step": 12594 }, { "epoch": 2.040667530784187, "grad_norm": 1.0115898847579956, "learning_rate": 1.2252139610449468e-06, "loss": 0.0618, "step": 12595 }, { "epoch": 2.0408295528191833, "grad_norm": 0.7657039165496826, "learning_rate": 1.224837800569689e-06, "loss": 0.0583, "step": 12596 }, { "epoch": 2.0409915748541803, "grad_norm": 0.8274670243263245, "learning_rate": 1.2244616791113085e-06, "loss": 0.0617, "step": 12597 }, { "epoch": 2.0411535968891767, "grad_norm": 0.8327546119689941, "learning_rate": 1.224085596681314e-06, "loss": 0.0532, "step": 12598 }, { "epoch": 2.0413156189241737, "grad_norm": 0.8720483779907227, "learning_rate": 1.2237095532912125e-06, "loss": 0.0622, "step": 12599 }, { "epoch": 2.0414776409591706, "grad_norm": 0.9215652942657471, "learning_rate": 1.22333354895251e-06, "loss": 0.0658, "step": 12600 }, { "epoch": 2.041639662994167, "grad_norm": 0.792291522026062, "learning_rate": 1.2229575836767115e-06, "loss": 0.0579, "step": 12601 }, { "epoch": 2.041801685029164, "grad_norm": 0.8133555054664612, "learning_rate": 1.2225816574753208e-06, "loss": 0.0622, "step": 12602 }, { "epoch": 2.0419637070641605, "grad_norm": 0.7037177681922913, "learning_rate": 1.2222057703598398e-06, "loss": 0.0573, "step": 12603 }, { "epoch": 2.0421257290991575, "grad_norm": 0.9011344909667969, "learning_rate": 1.2218299223417702e-06, "loss": 0.0612, "step": 12604 }, { "epoch": 2.0422877511341544, "grad_norm": 0.8436145782470703, "learning_rate": 1.2214541134326117e-06, "loss": 0.0604, "step": 12605 }, { "epoch": 2.042449773169151, "grad_norm": 0.7856259346008301, "learning_rate": 1.2210783436438644e-06, "loss": 0.0538, "step": 12606 }, { "epoch": 2.042611795204148, "grad_norm": 1.0986276865005493, "learning_rate": 1.2207026129870229e-06, "loss": 0.0682, "step": 12607 }, { "epoch": 2.0427738172391443, "grad_norm": 0.8682820796966553, "learning_rate": 1.2203269214735866e-06, "loss": 0.0579, "step": 12608 }, { "epoch": 2.0429358392741412, "grad_norm": 0.8327391147613525, "learning_rate": 1.2199512691150496e-06, "loss": 0.0624, "step": 12609 }, { "epoch": 2.043097861309138, "grad_norm": 0.8508796691894531, "learning_rate": 1.2195756559229072e-06, "loss": 0.0579, "step": 12610 }, { "epoch": 2.0432598833441347, "grad_norm": 0.8715740442276001, "learning_rate": 1.2192000819086502e-06, "loss": 0.0582, "step": 12611 }, { "epoch": 2.0434219053791316, "grad_norm": 0.8794846534729004, "learning_rate": 1.2188245470837702e-06, "loss": 0.0638, "step": 12612 }, { "epoch": 2.0435839274141285, "grad_norm": 0.8919373750686646, "learning_rate": 1.2184490514597606e-06, "loss": 0.068, "step": 12613 }, { "epoch": 2.043745949449125, "grad_norm": 1.1181578636169434, "learning_rate": 1.218073595048108e-06, "loss": 0.074, "step": 12614 }, { "epoch": 2.043907971484122, "grad_norm": 0.9722860455513, "learning_rate": 1.217698177860301e-06, "loss": 0.0629, "step": 12615 }, { "epoch": 2.0440699935191184, "grad_norm": 0.8910360932350159, "learning_rate": 1.2173227999078264e-06, "loss": 0.0586, "step": 12616 }, { "epoch": 2.0442320155541154, "grad_norm": 1.0762535333633423, "learning_rate": 1.2169474612021703e-06, "loss": 0.066, "step": 12617 }, { "epoch": 2.0443940375891123, "grad_norm": 0.9633141756057739, "learning_rate": 1.2165721617548172e-06, "loss": 0.0638, "step": 12618 }, { "epoch": 2.044556059624109, "grad_norm": 0.9522542357444763, "learning_rate": 1.2161969015772498e-06, "loss": 0.0645, "step": 12619 }, { "epoch": 2.0447180816591057, "grad_norm": 0.907698392868042, "learning_rate": 1.2158216806809505e-06, "loss": 0.0629, "step": 12620 }, { "epoch": 2.0448801036941022, "grad_norm": 0.9610335230827332, "learning_rate": 1.2154464990774013e-06, "loss": 0.0621, "step": 12621 }, { "epoch": 2.045042125729099, "grad_norm": 0.8890466690063477, "learning_rate": 1.2150713567780786e-06, "loss": 0.0585, "step": 12622 }, { "epoch": 2.045204147764096, "grad_norm": 0.8295314908027649, "learning_rate": 1.2146962537944638e-06, "loss": 0.0649, "step": 12623 }, { "epoch": 2.0453661697990926, "grad_norm": 0.883902907371521, "learning_rate": 1.2143211901380341e-06, "loss": 0.055, "step": 12624 }, { "epoch": 2.0455281918340895, "grad_norm": 0.8097559213638306, "learning_rate": 1.2139461658202642e-06, "loss": 0.0591, "step": 12625 }, { "epoch": 2.045690213869086, "grad_norm": 0.8150593638420105, "learning_rate": 1.2135711808526282e-06, "loss": 0.0614, "step": 12626 }, { "epoch": 2.045852235904083, "grad_norm": 0.8336517810821533, "learning_rate": 1.213196235246602e-06, "loss": 0.0626, "step": 12627 }, { "epoch": 2.04601425793908, "grad_norm": 0.7647026777267456, "learning_rate": 1.2128213290136578e-06, "loss": 0.0603, "step": 12628 }, { "epoch": 2.0461762799740764, "grad_norm": 0.7869901657104492, "learning_rate": 1.212446462165265e-06, "loss": 0.0578, "step": 12629 }, { "epoch": 2.0463383020090733, "grad_norm": 0.957288920879364, "learning_rate": 1.212071634712895e-06, "loss": 0.0735, "step": 12630 }, { "epoch": 2.04650032404407, "grad_norm": 0.9335697889328003, "learning_rate": 1.2116968466680159e-06, "loss": 0.062, "step": 12631 }, { "epoch": 2.0466623460790667, "grad_norm": 0.85823655128479, "learning_rate": 1.211322098042096e-06, "loss": 0.0543, "step": 12632 }, { "epoch": 2.0468243681140637, "grad_norm": 0.9083124995231628, "learning_rate": 1.210947388846601e-06, "loss": 0.0638, "step": 12633 }, { "epoch": 2.04698639014906, "grad_norm": 0.773314893245697, "learning_rate": 1.2105727190929967e-06, "loss": 0.0619, "step": 12634 }, { "epoch": 2.047148412184057, "grad_norm": 0.8823757767677307, "learning_rate": 1.2101980887927467e-06, "loss": 0.0663, "step": 12635 }, { "epoch": 2.047310434219054, "grad_norm": 1.0058071613311768, "learning_rate": 1.209823497957314e-06, "loss": 0.0649, "step": 12636 }, { "epoch": 2.0474724562540505, "grad_norm": 0.9731652736663818, "learning_rate": 1.2094489465981602e-06, "loss": 0.0596, "step": 12637 }, { "epoch": 2.0476344782890474, "grad_norm": 0.9225329160690308, "learning_rate": 1.2090744347267452e-06, "loss": 0.074, "step": 12638 }, { "epoch": 2.047796500324044, "grad_norm": 0.7908154129981995, "learning_rate": 1.2086999623545297e-06, "loss": 0.0611, "step": 12639 }, { "epoch": 2.047958522359041, "grad_norm": 0.7675386667251587, "learning_rate": 1.2083255294929697e-06, "loss": 0.0613, "step": 12640 }, { "epoch": 2.048120544394038, "grad_norm": 0.7376556396484375, "learning_rate": 1.2079511361535214e-06, "loss": 0.0549, "step": 12641 }, { "epoch": 2.0482825664290343, "grad_norm": 1.0447229146957397, "learning_rate": 1.2075767823476439e-06, "loss": 0.0698, "step": 12642 }, { "epoch": 2.048444588464031, "grad_norm": 0.8112842440605164, "learning_rate": 1.207202468086788e-06, "loss": 0.0597, "step": 12643 }, { "epoch": 2.0486066104990277, "grad_norm": 0.9694043397903442, "learning_rate": 1.2068281933824084e-06, "loss": 0.0697, "step": 12644 }, { "epoch": 2.0487686325340246, "grad_norm": 1.0804142951965332, "learning_rate": 1.2064539582459564e-06, "loss": 0.0691, "step": 12645 }, { "epoch": 2.0489306545690216, "grad_norm": 0.8556890487670898, "learning_rate": 1.2060797626888828e-06, "loss": 0.0708, "step": 12646 }, { "epoch": 2.049092676604018, "grad_norm": 0.839736819267273, "learning_rate": 1.2057056067226374e-06, "loss": 0.0549, "step": 12647 }, { "epoch": 2.049254698639015, "grad_norm": 0.7325344681739807, "learning_rate": 1.2053314903586685e-06, "loss": 0.0569, "step": 12648 }, { "epoch": 2.0494167206740115, "grad_norm": 0.9087137579917908, "learning_rate": 1.2049574136084228e-06, "loss": 0.066, "step": 12649 }, { "epoch": 2.0495787427090084, "grad_norm": 0.8081238865852356, "learning_rate": 1.2045833764833461e-06, "loss": 0.064, "step": 12650 }, { "epoch": 2.0497407647440054, "grad_norm": 1.0425145626068115, "learning_rate": 1.2042093789948836e-06, "loss": 0.0651, "step": 12651 }, { "epoch": 2.049902786779002, "grad_norm": 0.8078098893165588, "learning_rate": 1.2038354211544781e-06, "loss": 0.0593, "step": 12652 }, { "epoch": 2.0500648088139988, "grad_norm": 0.8982751369476318, "learning_rate": 1.2034615029735722e-06, "loss": 0.0703, "step": 12653 }, { "epoch": 2.0502268308489953, "grad_norm": 1.2443002462387085, "learning_rate": 1.2030876244636078e-06, "loss": 0.0627, "step": 12654 }, { "epoch": 2.050388852883992, "grad_norm": 0.8545292019844055, "learning_rate": 1.2027137856360212e-06, "loss": 0.0665, "step": 12655 }, { "epoch": 2.050550874918989, "grad_norm": 0.7731471061706543, "learning_rate": 1.202339986502255e-06, "loss": 0.058, "step": 12656 }, { "epoch": 2.0507128969539856, "grad_norm": 0.8670762777328491, "learning_rate": 1.2019662270737455e-06, "loss": 0.0616, "step": 12657 }, { "epoch": 2.0508749189889826, "grad_norm": 0.7677872776985168, "learning_rate": 1.2015925073619275e-06, "loss": 0.0561, "step": 12658 }, { "epoch": 2.051036941023979, "grad_norm": 0.8861536979675293, "learning_rate": 1.2012188273782367e-06, "loss": 0.0666, "step": 12659 }, { "epoch": 2.051198963058976, "grad_norm": 0.8551197052001953, "learning_rate": 1.2008451871341056e-06, "loss": 0.0547, "step": 12660 }, { "epoch": 2.051360985093973, "grad_norm": 0.7268028855323792, "learning_rate": 1.20047158664097e-06, "loss": 0.0543, "step": 12661 }, { "epoch": 2.0515230071289694, "grad_norm": 1.0221055746078491, "learning_rate": 1.200098025910258e-06, "loss": 0.0664, "step": 12662 }, { "epoch": 2.0516850291639663, "grad_norm": 0.7850842475891113, "learning_rate": 1.1997245049534007e-06, "loss": 0.0598, "step": 12663 }, { "epoch": 2.0518470511989633, "grad_norm": 0.9158909320831299, "learning_rate": 1.1993510237818269e-06, "loss": 0.0676, "step": 12664 }, { "epoch": 2.0520090732339598, "grad_norm": 0.9576240181922913, "learning_rate": 1.1989775824069645e-06, "loss": 0.0613, "step": 12665 }, { "epoch": 2.0521710952689567, "grad_norm": 0.7837166786193848, "learning_rate": 1.1986041808402393e-06, "loss": 0.0554, "step": 12666 }, { "epoch": 2.052333117303953, "grad_norm": 0.9599198698997498, "learning_rate": 1.198230819093077e-06, "loss": 0.0716, "step": 12667 }, { "epoch": 2.05249513933895, "grad_norm": 1.0300556421279907, "learning_rate": 1.1978574971769025e-06, "loss": 0.0639, "step": 12668 }, { "epoch": 2.052657161373947, "grad_norm": 0.9385240077972412, "learning_rate": 1.1974842151031354e-06, "loss": 0.0651, "step": 12669 }, { "epoch": 2.0528191834089435, "grad_norm": 0.8229407072067261, "learning_rate": 1.1971109728832003e-06, "loss": 0.0599, "step": 12670 }, { "epoch": 2.0529812054439405, "grad_norm": 0.8111064434051514, "learning_rate": 1.1967377705285163e-06, "loss": 0.0614, "step": 12671 }, { "epoch": 2.053143227478937, "grad_norm": 0.716200053691864, "learning_rate": 1.196364608050504e-06, "loss": 0.061, "step": 12672 }, { "epoch": 2.053305249513934, "grad_norm": 0.8643745183944702, "learning_rate": 1.1959914854605788e-06, "loss": 0.0606, "step": 12673 }, { "epoch": 2.053467271548931, "grad_norm": 0.9790539741516113, "learning_rate": 1.1956184027701576e-06, "loss": 0.0629, "step": 12674 }, { "epoch": 2.0536292935839273, "grad_norm": 0.9859734177589417, "learning_rate": 1.1952453599906585e-06, "loss": 0.0655, "step": 12675 }, { "epoch": 2.0537913156189243, "grad_norm": 0.784511148929596, "learning_rate": 1.1948723571334932e-06, "loss": 0.0544, "step": 12676 }, { "epoch": 2.0539533376539207, "grad_norm": 0.7424229383468628, "learning_rate": 1.1944993942100755e-06, "loss": 0.0563, "step": 12677 }, { "epoch": 2.0541153596889177, "grad_norm": 0.9328092932701111, "learning_rate": 1.1941264712318167e-06, "loss": 0.0677, "step": 12678 }, { "epoch": 2.0542773817239146, "grad_norm": 0.8559496998786926, "learning_rate": 1.193753588210128e-06, "loss": 0.068, "step": 12679 }, { "epoch": 2.054439403758911, "grad_norm": 1.0441864728927612, "learning_rate": 1.1933807451564186e-06, "loss": 0.0597, "step": 12680 }, { "epoch": 2.054601425793908, "grad_norm": 0.8556796908378601, "learning_rate": 1.1930079420820962e-06, "loss": 0.0575, "step": 12681 }, { "epoch": 2.0547634478289045, "grad_norm": 0.9726545214653015, "learning_rate": 1.192635178998568e-06, "loss": 0.0698, "step": 12682 }, { "epoch": 2.0549254698639015, "grad_norm": 0.7726395726203918, "learning_rate": 1.1922624559172404e-06, "loss": 0.053, "step": 12683 }, { "epoch": 2.0550874918988984, "grad_norm": 1.3257524967193604, "learning_rate": 1.191889772849515e-06, "loss": 0.057, "step": 12684 }, { "epoch": 2.055249513933895, "grad_norm": 0.8716280460357666, "learning_rate": 1.1915171298067982e-06, "loss": 0.0626, "step": 12685 }, { "epoch": 2.055411535968892, "grad_norm": 0.7775639891624451, "learning_rate": 1.1911445268004917e-06, "loss": 0.0586, "step": 12686 }, { "epoch": 2.0555735580038887, "grad_norm": 0.7742239236831665, "learning_rate": 1.1907719638419943e-06, "loss": 0.0577, "step": 12687 }, { "epoch": 2.0557355800388852, "grad_norm": 0.9106540679931641, "learning_rate": 1.1903994409427063e-06, "loss": 0.0641, "step": 12688 }, { "epoch": 2.055897602073882, "grad_norm": 0.8494895100593567, "learning_rate": 1.1900269581140257e-06, "loss": 0.0622, "step": 12689 }, { "epoch": 2.0560596241088787, "grad_norm": 0.9407444596290588, "learning_rate": 1.1896545153673517e-06, "loss": 0.067, "step": 12690 }, { "epoch": 2.0562216461438756, "grad_norm": 0.734492301940918, "learning_rate": 1.1892821127140777e-06, "loss": 0.0553, "step": 12691 }, { "epoch": 2.0563836681788725, "grad_norm": 0.8618312478065491, "learning_rate": 1.1889097501655991e-06, "loss": 0.0624, "step": 12692 }, { "epoch": 2.056545690213869, "grad_norm": 0.8262837529182434, "learning_rate": 1.1885374277333095e-06, "loss": 0.0579, "step": 12693 }, { "epoch": 2.056707712248866, "grad_norm": 0.832383930683136, "learning_rate": 1.1881651454286008e-06, "loss": 0.0627, "step": 12694 }, { "epoch": 2.0568697342838624, "grad_norm": 0.8754958510398865, "learning_rate": 1.187792903262864e-06, "loss": 0.0592, "step": 12695 }, { "epoch": 2.0570317563188594, "grad_norm": 0.9576855301856995, "learning_rate": 1.1874207012474891e-06, "loss": 0.0672, "step": 12696 }, { "epoch": 2.0571937783538563, "grad_norm": 0.8752747774124146, "learning_rate": 1.1870485393938644e-06, "loss": 0.0658, "step": 12697 }, { "epoch": 2.057355800388853, "grad_norm": 0.9489736557006836, "learning_rate": 1.186676417713377e-06, "loss": 0.062, "step": 12698 }, { "epoch": 2.0575178224238497, "grad_norm": 0.791542112827301, "learning_rate": 1.1863043362174129e-06, "loss": 0.056, "step": 12699 }, { "epoch": 2.057679844458846, "grad_norm": 0.8600035309791565, "learning_rate": 1.1859322949173572e-06, "loss": 0.0646, "step": 12700 }, { "epoch": 2.057841866493843, "grad_norm": 0.8470669984817505, "learning_rate": 1.1855602938245942e-06, "loss": 0.0658, "step": 12701 }, { "epoch": 2.05800388852884, "grad_norm": 0.8929564952850342, "learning_rate": 1.1851883329505043e-06, "loss": 0.0589, "step": 12702 }, { "epoch": 2.0581659105638366, "grad_norm": 0.9186873435974121, "learning_rate": 1.1848164123064687e-06, "loss": 0.073, "step": 12703 }, { "epoch": 2.0583279325988335, "grad_norm": 0.803128719329834, "learning_rate": 1.1844445319038694e-06, "loss": 0.059, "step": 12704 }, { "epoch": 2.05848995463383, "grad_norm": 0.7790929675102234, "learning_rate": 1.1840726917540846e-06, "loss": 0.0562, "step": 12705 }, { "epoch": 2.058651976668827, "grad_norm": 0.9201391935348511, "learning_rate": 1.18370089186849e-06, "loss": 0.0644, "step": 12706 }, { "epoch": 2.058813998703824, "grad_norm": 1.1075505018234253, "learning_rate": 1.1833291322584625e-06, "loss": 0.0671, "step": 12707 }, { "epoch": 2.0589760207388204, "grad_norm": 0.869609534740448, "learning_rate": 1.1829574129353777e-06, "loss": 0.0663, "step": 12708 }, { "epoch": 2.0591380427738173, "grad_norm": 1.0262631177902222, "learning_rate": 1.1825857339106086e-06, "loss": 0.059, "step": 12709 }, { "epoch": 2.059300064808814, "grad_norm": 0.8231914043426514, "learning_rate": 1.182214095195528e-06, "loss": 0.0576, "step": 12710 }, { "epoch": 2.0594620868438107, "grad_norm": 0.8270055651664734, "learning_rate": 1.181842496801507e-06, "loss": 0.0574, "step": 12711 }, { "epoch": 2.0596241088788076, "grad_norm": 1.0092082023620605, "learning_rate": 1.181470938739917e-06, "loss": 0.0619, "step": 12712 }, { "epoch": 2.059786130913804, "grad_norm": 0.7911940217018127, "learning_rate": 1.1810994210221234e-06, "loss": 0.0587, "step": 12713 }, { "epoch": 2.059948152948801, "grad_norm": 0.9792639017105103, "learning_rate": 1.1807279436594967e-06, "loss": 0.0734, "step": 12714 }, { "epoch": 2.060110174983798, "grad_norm": 1.0325294733047485, "learning_rate": 1.1803565066634027e-06, "loss": 0.0708, "step": 12715 }, { "epoch": 2.0602721970187945, "grad_norm": 0.8511001467704773, "learning_rate": 1.1799851100452067e-06, "loss": 0.0583, "step": 12716 }, { "epoch": 2.0604342190537914, "grad_norm": 0.9019601345062256, "learning_rate": 1.17961375381627e-06, "loss": 0.0669, "step": 12717 }, { "epoch": 2.060596241088788, "grad_norm": 0.9868301153182983, "learning_rate": 1.1792424379879582e-06, "loss": 0.0608, "step": 12718 }, { "epoch": 2.060758263123785, "grad_norm": 0.954416036605835, "learning_rate": 1.178871162571633e-06, "loss": 0.0735, "step": 12719 }, { "epoch": 2.060920285158782, "grad_norm": 1.1071062088012695, "learning_rate": 1.1784999275786515e-06, "loss": 0.0564, "step": 12720 }, { "epoch": 2.0610823071937783, "grad_norm": 0.8846614956855774, "learning_rate": 1.1781287330203747e-06, "loss": 0.0631, "step": 12721 }, { "epoch": 2.061244329228775, "grad_norm": 0.894943118095398, "learning_rate": 1.177757578908159e-06, "loss": 0.0604, "step": 12722 }, { "epoch": 2.0614063512637717, "grad_norm": 1.3799413442611694, "learning_rate": 1.177386465253363e-06, "loss": 0.0675, "step": 12723 }, { "epoch": 2.0615683732987686, "grad_norm": 0.8426232933998108, "learning_rate": 1.17701539206734e-06, "loss": 0.0613, "step": 12724 }, { "epoch": 2.0617303953337656, "grad_norm": 0.9407797455787659, "learning_rate": 1.176644359361444e-06, "loss": 0.0729, "step": 12725 }, { "epoch": 2.061892417368762, "grad_norm": 0.9105675220489502, "learning_rate": 1.1762733671470285e-06, "loss": 0.0679, "step": 12726 }, { "epoch": 2.062054439403759, "grad_norm": 0.748227596282959, "learning_rate": 1.1759024154354446e-06, "loss": 0.0594, "step": 12727 }, { "epoch": 2.0622164614387555, "grad_norm": 0.8079039454460144, "learning_rate": 1.1755315042380425e-06, "loss": 0.0629, "step": 12728 }, { "epoch": 2.0623784834737524, "grad_norm": 0.9434335231781006, "learning_rate": 1.175160633566171e-06, "loss": 0.0646, "step": 12729 }, { "epoch": 2.0625405055087493, "grad_norm": 0.8539111018180847, "learning_rate": 1.1747898034311782e-06, "loss": 0.0605, "step": 12730 }, { "epoch": 2.062702527543746, "grad_norm": 0.8666152954101562, "learning_rate": 1.1744190138444118e-06, "loss": 0.071, "step": 12731 }, { "epoch": 2.0628645495787428, "grad_norm": 1.0582692623138428, "learning_rate": 1.1740482648172132e-06, "loss": 0.0718, "step": 12732 }, { "epoch": 2.0630265716137393, "grad_norm": 0.9556350111961365, "learning_rate": 1.1736775563609305e-06, "loss": 0.0716, "step": 12733 }, { "epoch": 2.063188593648736, "grad_norm": 0.829980731010437, "learning_rate": 1.1733068884869053e-06, "loss": 0.0654, "step": 12734 }, { "epoch": 2.063350615683733, "grad_norm": 0.7806819081306458, "learning_rate": 1.1729362612064782e-06, "loss": 0.0574, "step": 12735 }, { "epoch": 2.0635126377187296, "grad_norm": 1.1992825269699097, "learning_rate": 1.172565674530989e-06, "loss": 0.0556, "step": 12736 }, { "epoch": 2.0636746597537265, "grad_norm": 0.9343496561050415, "learning_rate": 1.1721951284717797e-06, "loss": 0.0697, "step": 12737 }, { "epoch": 2.0638366817887235, "grad_norm": 0.8090648055076599, "learning_rate": 1.1718246230401856e-06, "loss": 0.0644, "step": 12738 }, { "epoch": 2.06399870382372, "grad_norm": 0.9163773059844971, "learning_rate": 1.1714541582475435e-06, "loss": 0.0707, "step": 12739 }, { "epoch": 2.064160725858717, "grad_norm": 0.9538021683692932, "learning_rate": 1.1710837341051892e-06, "loss": 0.0655, "step": 12740 }, { "epoch": 2.0643227478937134, "grad_norm": 0.894012451171875, "learning_rate": 1.170713350624457e-06, "loss": 0.067, "step": 12741 }, { "epoch": 2.0644847699287103, "grad_norm": 0.896256148815155, "learning_rate": 1.1703430078166792e-06, "loss": 0.0683, "step": 12742 }, { "epoch": 2.0646467919637073, "grad_norm": 0.8130720257759094, "learning_rate": 1.1699727056931878e-06, "loss": 0.0666, "step": 12743 }, { "epoch": 2.0648088139987038, "grad_norm": 0.7944749593734741, "learning_rate": 1.169602444265313e-06, "loss": 0.0639, "step": 12744 }, { "epoch": 2.0649708360337007, "grad_norm": 0.9021603465080261, "learning_rate": 1.1692322235443845e-06, "loss": 0.069, "step": 12745 }, { "epoch": 2.065132858068697, "grad_norm": 0.8242917060852051, "learning_rate": 1.168862043541728e-06, "loss": 0.0675, "step": 12746 }, { "epoch": 2.065294880103694, "grad_norm": 0.806065559387207, "learning_rate": 1.1684919042686727e-06, "loss": 0.0595, "step": 12747 }, { "epoch": 2.065456902138691, "grad_norm": 0.8553471565246582, "learning_rate": 1.1681218057365429e-06, "loss": 0.0588, "step": 12748 }, { "epoch": 2.0656189241736875, "grad_norm": 0.8592274785041809, "learning_rate": 1.1677517479566636e-06, "loss": 0.0677, "step": 12749 }, { "epoch": 2.0657809462086845, "grad_norm": 0.8480504155158997, "learning_rate": 1.167381730940356e-06, "loss": 0.0636, "step": 12750 }, { "epoch": 2.065942968243681, "grad_norm": 0.768464207649231, "learning_rate": 1.1670117546989416e-06, "loss": 0.0603, "step": 12751 }, { "epoch": 2.066104990278678, "grad_norm": 0.8079333305358887, "learning_rate": 1.1666418192437434e-06, "loss": 0.0615, "step": 12752 }, { "epoch": 2.066267012313675, "grad_norm": 0.8408043384552002, "learning_rate": 1.1662719245860782e-06, "loss": 0.0604, "step": 12753 }, { "epoch": 2.0664290343486713, "grad_norm": 0.8342251181602478, "learning_rate": 1.1659020707372643e-06, "loss": 0.061, "step": 12754 }, { "epoch": 2.0665910563836682, "grad_norm": 0.868290364742279, "learning_rate": 1.1655322577086186e-06, "loss": 0.0628, "step": 12755 }, { "epoch": 2.0667530784186647, "grad_norm": 0.7598575353622437, "learning_rate": 1.1651624855114565e-06, "loss": 0.0547, "step": 12756 }, { "epoch": 2.0669151004536617, "grad_norm": 0.7536554932594299, "learning_rate": 1.1647927541570922e-06, "loss": 0.0583, "step": 12757 }, { "epoch": 2.0670771224886586, "grad_norm": 0.8469033241271973, "learning_rate": 1.1644230636568384e-06, "loss": 0.0659, "step": 12758 }, { "epoch": 2.067239144523655, "grad_norm": 0.796055257320404, "learning_rate": 1.164053414022007e-06, "loss": 0.0568, "step": 12759 }, { "epoch": 2.067401166558652, "grad_norm": 0.922766387462616, "learning_rate": 1.1636838052639081e-06, "loss": 0.0627, "step": 12760 }, { "epoch": 2.067563188593649, "grad_norm": 0.876051664352417, "learning_rate": 1.163314237393851e-06, "loss": 0.0594, "step": 12761 }, { "epoch": 2.0677252106286454, "grad_norm": 0.8684565424919128, "learning_rate": 1.1629447104231435e-06, "loss": 0.0642, "step": 12762 }, { "epoch": 2.0678872326636424, "grad_norm": 0.8998528122901917, "learning_rate": 1.162575224363093e-06, "loss": 0.0714, "step": 12763 }, { "epoch": 2.068049254698639, "grad_norm": 0.874921977519989, "learning_rate": 1.1622057792250033e-06, "loss": 0.0613, "step": 12764 }, { "epoch": 2.068211276733636, "grad_norm": 0.9340734481811523, "learning_rate": 1.1618363750201784e-06, "loss": 0.0604, "step": 12765 }, { "epoch": 2.0683732987686327, "grad_norm": 1.0002615451812744, "learning_rate": 1.1614670117599231e-06, "loss": 0.0656, "step": 12766 }, { "epoch": 2.0685353208036292, "grad_norm": 0.8985110521316528, "learning_rate": 1.161097689455539e-06, "loss": 0.0595, "step": 12767 }, { "epoch": 2.068697342838626, "grad_norm": 0.829571008682251, "learning_rate": 1.1607284081183245e-06, "loss": 0.0554, "step": 12768 }, { "epoch": 2.0688593648736227, "grad_norm": 0.9190569519996643, "learning_rate": 1.16035916775958e-06, "loss": 0.0607, "step": 12769 }, { "epoch": 2.0690213869086196, "grad_norm": 0.9735108017921448, "learning_rate": 1.1599899683906026e-06, "loss": 0.0649, "step": 12770 }, { "epoch": 2.0691834089436165, "grad_norm": 0.8792665600776672, "learning_rate": 1.1596208100226899e-06, "loss": 0.0554, "step": 12771 }, { "epoch": 2.069345430978613, "grad_norm": 0.8092507719993591, "learning_rate": 1.1592516926671367e-06, "loss": 0.0611, "step": 12772 }, { "epoch": 2.06950745301361, "grad_norm": 0.9477719068527222, "learning_rate": 1.1588826163352369e-06, "loss": 0.0648, "step": 12773 }, { "epoch": 2.0696694750486064, "grad_norm": 0.806533694267273, "learning_rate": 1.1585135810382836e-06, "loss": 0.0603, "step": 12774 }, { "epoch": 2.0698314970836034, "grad_norm": 0.8330726027488708, "learning_rate": 1.1581445867875684e-06, "loss": 0.0618, "step": 12775 }, { "epoch": 2.0699935191186003, "grad_norm": 0.9184839129447937, "learning_rate": 1.1577756335943818e-06, "loss": 0.0624, "step": 12776 }, { "epoch": 2.070155541153597, "grad_norm": 0.8566496968269348, "learning_rate": 1.1574067214700127e-06, "loss": 0.0613, "step": 12777 }, { "epoch": 2.0703175631885937, "grad_norm": 0.7928066849708557, "learning_rate": 1.1570378504257499e-06, "loss": 0.0562, "step": 12778 }, { "epoch": 2.07047958522359, "grad_norm": 0.8417590856552124, "learning_rate": 1.1566690204728779e-06, "loss": 0.0549, "step": 12779 }, { "epoch": 2.070641607258587, "grad_norm": 0.9719398021697998, "learning_rate": 1.156300231622682e-06, "loss": 0.0687, "step": 12780 }, { "epoch": 2.070803629293584, "grad_norm": 0.9284224510192871, "learning_rate": 1.1559314838864494e-06, "loss": 0.0637, "step": 12781 }, { "epoch": 2.0709656513285806, "grad_norm": 1.0845189094543457, "learning_rate": 1.1555627772754595e-06, "loss": 0.0689, "step": 12782 }, { "epoch": 2.0711276733635775, "grad_norm": 0.9417441487312317, "learning_rate": 1.1551941118009957e-06, "loss": 0.0633, "step": 12783 }, { "epoch": 2.071289695398574, "grad_norm": 0.8231827020645142, "learning_rate": 1.1548254874743365e-06, "loss": 0.06, "step": 12784 }, { "epoch": 2.071451717433571, "grad_norm": 0.8818009495735168, "learning_rate": 1.154456904306764e-06, "loss": 0.0611, "step": 12785 }, { "epoch": 2.071613739468568, "grad_norm": 0.8181620240211487, "learning_rate": 1.154088362309553e-06, "loss": 0.0578, "step": 12786 }, { "epoch": 2.0717757615035644, "grad_norm": 0.9999836087226868, "learning_rate": 1.1537198614939812e-06, "loss": 0.0684, "step": 12787 }, { "epoch": 2.0719377835385613, "grad_norm": 0.8913933634757996, "learning_rate": 1.1533514018713238e-06, "loss": 0.0593, "step": 12788 }, { "epoch": 2.072099805573558, "grad_norm": 0.8349035978317261, "learning_rate": 1.1529829834528547e-06, "loss": 0.0594, "step": 12789 }, { "epoch": 2.0722618276085547, "grad_norm": 1.0070099830627441, "learning_rate": 1.1526146062498464e-06, "loss": 0.067, "step": 12790 }, { "epoch": 2.0724238496435516, "grad_norm": 0.8604516386985779, "learning_rate": 1.1522462702735708e-06, "loss": 0.0622, "step": 12791 }, { "epoch": 2.072585871678548, "grad_norm": 0.8990740776062012, "learning_rate": 1.1518779755352977e-06, "loss": 0.0691, "step": 12792 }, { "epoch": 2.072747893713545, "grad_norm": 0.9744791388511658, "learning_rate": 1.151509722046297e-06, "loss": 0.0604, "step": 12793 }, { "epoch": 2.072909915748542, "grad_norm": 0.8340404629707336, "learning_rate": 1.1511415098178336e-06, "loss": 0.0611, "step": 12794 }, { "epoch": 2.0730719377835385, "grad_norm": 0.9593438506126404, "learning_rate": 1.1507733388611768e-06, "loss": 0.0699, "step": 12795 }, { "epoch": 2.0732339598185354, "grad_norm": 0.8299152255058289, "learning_rate": 1.1504052091875917e-06, "loss": 0.0567, "step": 12796 }, { "epoch": 2.073395981853532, "grad_norm": 0.7919155359268188, "learning_rate": 1.1500371208083405e-06, "loss": 0.0604, "step": 12797 }, { "epoch": 2.073558003888529, "grad_norm": 0.9348534345626831, "learning_rate": 1.1496690737346864e-06, "loss": 0.0608, "step": 12798 }, { "epoch": 2.073720025923526, "grad_norm": 0.8300426006317139, "learning_rate": 1.14930106797789e-06, "loss": 0.0642, "step": 12799 }, { "epoch": 2.0738820479585223, "grad_norm": 0.8624827861785889, "learning_rate": 1.148933103549214e-06, "loss": 0.0623, "step": 12800 }, { "epoch": 2.074044069993519, "grad_norm": 0.9354908466339111, "learning_rate": 1.148565180459915e-06, "loss": 0.0667, "step": 12801 }, { "epoch": 2.0742060920285157, "grad_norm": 0.9018098711967468, "learning_rate": 1.1481972987212505e-06, "loss": 0.0565, "step": 12802 }, { "epoch": 2.0743681140635126, "grad_norm": 0.9622835516929626, "learning_rate": 1.1478294583444779e-06, "loss": 0.0651, "step": 12803 }, { "epoch": 2.0745301360985096, "grad_norm": 0.8161885142326355, "learning_rate": 1.1474616593408513e-06, "loss": 0.0615, "step": 12804 }, { "epoch": 2.074692158133506, "grad_norm": 0.909511148929596, "learning_rate": 1.147093901721625e-06, "loss": 0.062, "step": 12805 }, { "epoch": 2.074854180168503, "grad_norm": 0.7995123267173767, "learning_rate": 1.1467261854980513e-06, "loss": 0.063, "step": 12806 }, { "epoch": 2.0750162022034995, "grad_norm": 0.8639039993286133, "learning_rate": 1.1463585106813823e-06, "loss": 0.0635, "step": 12807 }, { "epoch": 2.0751782242384964, "grad_norm": 0.8831427693367004, "learning_rate": 1.1459908772828658e-06, "loss": 0.0637, "step": 12808 }, { "epoch": 2.0753402462734933, "grad_norm": 0.9369395971298218, "learning_rate": 1.1456232853137522e-06, "loss": 0.065, "step": 12809 }, { "epoch": 2.07550226830849, "grad_norm": 0.8888958096504211, "learning_rate": 1.1452557347852885e-06, "loss": 0.062, "step": 12810 }, { "epoch": 2.0756642903434868, "grad_norm": 0.9641885757446289, "learning_rate": 1.1448882257087222e-06, "loss": 0.0675, "step": 12811 }, { "epoch": 2.0758263123784833, "grad_norm": 1.0952035188674927, "learning_rate": 1.1445207580952956e-06, "loss": 0.0677, "step": 12812 }, { "epoch": 2.07598833441348, "grad_norm": 1.0690641403198242, "learning_rate": 1.1441533319562528e-06, "loss": 0.0685, "step": 12813 }, { "epoch": 2.076150356448477, "grad_norm": 0.9642135500907898, "learning_rate": 1.143785947302839e-06, "loss": 0.0711, "step": 12814 }, { "epoch": 2.0763123784834736, "grad_norm": 1.0191136598587036, "learning_rate": 1.143418604146292e-06, "loss": 0.0728, "step": 12815 }, { "epoch": 2.0764744005184705, "grad_norm": 0.8897978663444519, "learning_rate": 1.143051302497853e-06, "loss": 0.0671, "step": 12816 }, { "epoch": 2.0766364225534675, "grad_norm": 1.1707680225372314, "learning_rate": 1.1426840423687605e-06, "loss": 0.0633, "step": 12817 }, { "epoch": 2.076798444588464, "grad_norm": 0.7090242505073547, "learning_rate": 1.1423168237702515e-06, "loss": 0.0524, "step": 12818 }, { "epoch": 2.076960466623461, "grad_norm": 0.8160051703453064, "learning_rate": 1.141949646713562e-06, "loss": 0.0589, "step": 12819 }, { "epoch": 2.0771224886584574, "grad_norm": 0.8117794394493103, "learning_rate": 1.1415825112099274e-06, "loss": 0.0533, "step": 12820 }, { "epoch": 2.0772845106934543, "grad_norm": 0.9710679054260254, "learning_rate": 1.1412154172705803e-06, "loss": 0.0625, "step": 12821 }, { "epoch": 2.0774465327284513, "grad_norm": 0.8708137273788452, "learning_rate": 1.1408483649067541e-06, "loss": 0.0557, "step": 12822 }, { "epoch": 2.0776085547634477, "grad_norm": 0.8636717200279236, "learning_rate": 1.1404813541296772e-06, "loss": 0.0605, "step": 12823 }, { "epoch": 2.0777705767984447, "grad_norm": 0.763903021812439, "learning_rate": 1.1401143849505816e-06, "loss": 0.0597, "step": 12824 }, { "epoch": 2.077932598833441, "grad_norm": 0.9749400019645691, "learning_rate": 1.139747457380696e-06, "loss": 0.0633, "step": 12825 }, { "epoch": 2.078094620868438, "grad_norm": 0.9545486569404602, "learning_rate": 1.1393805714312456e-06, "loss": 0.0734, "step": 12826 }, { "epoch": 2.078256642903435, "grad_norm": 0.8538177013397217, "learning_rate": 1.1390137271134564e-06, "loss": 0.064, "step": 12827 }, { "epoch": 2.0784186649384315, "grad_norm": 1.0204607248306274, "learning_rate": 1.138646924438554e-06, "loss": 0.0705, "step": 12828 }, { "epoch": 2.0785806869734285, "grad_norm": 0.9345843195915222, "learning_rate": 1.1382801634177627e-06, "loss": 0.0648, "step": 12829 }, { "epoch": 2.078742709008425, "grad_norm": 0.9654140472412109, "learning_rate": 1.1379134440623018e-06, "loss": 0.0674, "step": 12830 }, { "epoch": 2.078904731043422, "grad_norm": 0.8916672468185425, "learning_rate": 1.1375467663833935e-06, "loss": 0.0664, "step": 12831 }, { "epoch": 2.079066753078419, "grad_norm": 0.9074366092681885, "learning_rate": 1.137180130392257e-06, "loss": 0.0525, "step": 12832 }, { "epoch": 2.0792287751134153, "grad_norm": 0.9792858362197876, "learning_rate": 1.1368135361001107e-06, "loss": 0.0692, "step": 12833 }, { "epoch": 2.0793907971484122, "grad_norm": 0.9119909405708313, "learning_rate": 1.1364469835181712e-06, "loss": 0.0681, "step": 12834 }, { "epoch": 2.0795528191834087, "grad_norm": 0.876586377620697, "learning_rate": 1.1360804726576543e-06, "loss": 0.0603, "step": 12835 }, { "epoch": 2.0797148412184057, "grad_norm": 0.8376288414001465, "learning_rate": 1.1357140035297745e-06, "loss": 0.0638, "step": 12836 }, { "epoch": 2.0798768632534026, "grad_norm": 0.8386695981025696, "learning_rate": 1.1353475761457445e-06, "loss": 0.057, "step": 12837 }, { "epoch": 2.080038885288399, "grad_norm": 0.8405042886734009, "learning_rate": 1.1349811905167762e-06, "loss": 0.066, "step": 12838 }, { "epoch": 2.080200907323396, "grad_norm": 0.9115575551986694, "learning_rate": 1.13461484665408e-06, "loss": 0.0623, "step": 12839 }, { "epoch": 2.080362929358393, "grad_norm": 0.7548916339874268, "learning_rate": 1.134248544568867e-06, "loss": 0.0554, "step": 12840 }, { "epoch": 2.0805249513933894, "grad_norm": 0.873468816280365, "learning_rate": 1.1338822842723418e-06, "loss": 0.0674, "step": 12841 }, { "epoch": 2.0806869734283864, "grad_norm": 0.9145466685295105, "learning_rate": 1.1335160657757121e-06, "loss": 0.0608, "step": 12842 }, { "epoch": 2.080848995463383, "grad_norm": 0.8636155724525452, "learning_rate": 1.1331498890901851e-06, "loss": 0.064, "step": 12843 }, { "epoch": 2.08101101749838, "grad_norm": 0.9404274821281433, "learning_rate": 1.1327837542269645e-06, "loss": 0.0603, "step": 12844 }, { "epoch": 2.0811730395333767, "grad_norm": 0.867577850818634, "learning_rate": 1.1324176611972515e-06, "loss": 0.0659, "step": 12845 }, { "epoch": 2.0813350615683732, "grad_norm": 0.7574745416641235, "learning_rate": 1.1320516100122487e-06, "loss": 0.0572, "step": 12846 }, { "epoch": 2.08149708360337, "grad_norm": 0.8237707614898682, "learning_rate": 1.1316856006831562e-06, "loss": 0.0643, "step": 12847 }, { "epoch": 2.0816591056383666, "grad_norm": 0.950802743434906, "learning_rate": 1.1313196332211728e-06, "loss": 0.0632, "step": 12848 }, { "epoch": 2.0818211276733636, "grad_norm": 0.9620135426521301, "learning_rate": 1.1309537076374968e-06, "loss": 0.0682, "step": 12849 }, { "epoch": 2.0819831497083605, "grad_norm": 0.9418061375617981, "learning_rate": 1.130587823943324e-06, "loss": 0.0629, "step": 12850 }, { "epoch": 2.082145171743357, "grad_norm": 0.8530777096748352, "learning_rate": 1.1302219821498502e-06, "loss": 0.0565, "step": 12851 }, { "epoch": 2.082307193778354, "grad_norm": 0.8891876339912415, "learning_rate": 1.1298561822682687e-06, "loss": 0.0709, "step": 12852 }, { "epoch": 2.0824692158133504, "grad_norm": 0.9837715029716492, "learning_rate": 1.1294904243097726e-06, "loss": 0.0619, "step": 12853 }, { "epoch": 2.0826312378483474, "grad_norm": 0.8900513648986816, "learning_rate": 1.1291247082855528e-06, "loss": 0.0661, "step": 12854 }, { "epoch": 2.0827932598833443, "grad_norm": 0.8813618421554565, "learning_rate": 1.1287590342068005e-06, "loss": 0.0588, "step": 12855 }, { "epoch": 2.082955281918341, "grad_norm": 0.7814889550209045, "learning_rate": 1.1283934020847015e-06, "loss": 0.0516, "step": 12856 }, { "epoch": 2.0831173039533377, "grad_norm": 0.7788052558898926, "learning_rate": 1.128027811930446e-06, "loss": 0.0581, "step": 12857 }, { "epoch": 2.083279325988334, "grad_norm": 0.8594955205917358, "learning_rate": 1.1276622637552203e-06, "loss": 0.0575, "step": 12858 }, { "epoch": 2.083441348023331, "grad_norm": 0.8879890441894531, "learning_rate": 1.1272967575702075e-06, "loss": 0.0587, "step": 12859 }, { "epoch": 2.083603370058328, "grad_norm": 1.0055869817733765, "learning_rate": 1.126931293386592e-06, "loss": 0.0642, "step": 12860 }, { "epoch": 2.0837653920933246, "grad_norm": 1.0345205068588257, "learning_rate": 1.1265658712155552e-06, "loss": 0.0691, "step": 12861 }, { "epoch": 2.0839274141283215, "grad_norm": 0.8441528081893921, "learning_rate": 1.1262004910682811e-06, "loss": 0.0597, "step": 12862 }, { "epoch": 2.0840894361633184, "grad_norm": 0.9225409030914307, "learning_rate": 1.1258351529559463e-06, "loss": 0.0598, "step": 12863 }, { "epoch": 2.084251458198315, "grad_norm": 0.9410536885261536, "learning_rate": 1.1254698568897308e-06, "loss": 0.0637, "step": 12864 }, { "epoch": 2.084413480233312, "grad_norm": 0.9590262174606323, "learning_rate": 1.1251046028808107e-06, "loss": 0.0622, "step": 12865 }, { "epoch": 2.0845755022683083, "grad_norm": 0.944026529788971, "learning_rate": 1.124739390940363e-06, "loss": 0.0729, "step": 12866 }, { "epoch": 2.0847375243033053, "grad_norm": 0.8297381401062012, "learning_rate": 1.124374221079562e-06, "loss": 0.0582, "step": 12867 }, { "epoch": 2.084899546338302, "grad_norm": 0.8802173137664795, "learning_rate": 1.1240090933095806e-06, "loss": 0.0617, "step": 12868 }, { "epoch": 2.0850615683732987, "grad_norm": 0.8409082889556885, "learning_rate": 1.1236440076415923e-06, "loss": 0.0575, "step": 12869 }, { "epoch": 2.0852235904082956, "grad_norm": 0.8129790425300598, "learning_rate": 1.1232789640867644e-06, "loss": 0.0586, "step": 12870 }, { "epoch": 2.085385612443292, "grad_norm": 0.9125465750694275, "learning_rate": 1.1229139626562698e-06, "loss": 0.0634, "step": 12871 }, { "epoch": 2.085547634478289, "grad_norm": 0.8047336935997009, "learning_rate": 1.1225490033612755e-06, "loss": 0.0589, "step": 12872 }, { "epoch": 2.085709656513286, "grad_norm": 0.9558115005493164, "learning_rate": 1.1221840862129493e-06, "loss": 0.0637, "step": 12873 }, { "epoch": 2.0858716785482825, "grad_norm": 0.873754620552063, "learning_rate": 1.1218192112224547e-06, "loss": 0.0675, "step": 12874 }, { "epoch": 2.0860337005832794, "grad_norm": 0.857243001461029, "learning_rate": 1.1214543784009563e-06, "loss": 0.065, "step": 12875 }, { "epoch": 2.086195722618276, "grad_norm": 0.8386802673339844, "learning_rate": 1.1210895877596195e-06, "loss": 0.0564, "step": 12876 }, { "epoch": 2.086357744653273, "grad_norm": 0.958071231842041, "learning_rate": 1.1207248393096038e-06, "loss": 0.0732, "step": 12877 }, { "epoch": 2.0865197666882698, "grad_norm": 0.8746595978736877, "learning_rate": 1.12036013306207e-06, "loss": 0.0591, "step": 12878 }, { "epoch": 2.0866817887232663, "grad_norm": 0.854904294013977, "learning_rate": 1.1199954690281779e-06, "loss": 0.0653, "step": 12879 }, { "epoch": 2.086843810758263, "grad_norm": 0.8066260814666748, "learning_rate": 1.1196308472190845e-06, "loss": 0.0576, "step": 12880 }, { "epoch": 2.0870058327932597, "grad_norm": 0.894486129283905, "learning_rate": 1.1192662676459468e-06, "loss": 0.0684, "step": 12881 }, { "epoch": 2.0871678548282566, "grad_norm": 0.8854237198829651, "learning_rate": 1.1189017303199198e-06, "loss": 0.0656, "step": 12882 }, { "epoch": 2.0873298768632536, "grad_norm": 0.8725504875183105, "learning_rate": 1.1185372352521581e-06, "loss": 0.0578, "step": 12883 }, { "epoch": 2.08749189889825, "grad_norm": 0.8695107102394104, "learning_rate": 1.1181727824538147e-06, "loss": 0.0676, "step": 12884 }, { "epoch": 2.087653920933247, "grad_norm": 0.9441255331039429, "learning_rate": 1.117808371936038e-06, "loss": 0.065, "step": 12885 }, { "epoch": 2.087815942968244, "grad_norm": 0.9579770565032959, "learning_rate": 1.1174440037099815e-06, "loss": 0.0623, "step": 12886 }, { "epoch": 2.0879779650032404, "grad_norm": 0.9786186218261719, "learning_rate": 1.117079677786793e-06, "loss": 0.0702, "step": 12887 }, { "epoch": 2.0881399870382373, "grad_norm": 0.8708941340446472, "learning_rate": 1.1167153941776205e-06, "loss": 0.056, "step": 12888 }, { "epoch": 2.088302009073234, "grad_norm": 0.8261350989341736, "learning_rate": 1.1163511528936084e-06, "loss": 0.0617, "step": 12889 }, { "epoch": 2.0884640311082308, "grad_norm": 0.9200928211212158, "learning_rate": 1.1159869539459018e-06, "loss": 0.0622, "step": 12890 }, { "epoch": 2.0886260531432277, "grad_norm": 1.0103826522827148, "learning_rate": 1.1156227973456468e-06, "loss": 0.0651, "step": 12891 }, { "epoch": 2.088788075178224, "grad_norm": 1.0453431606292725, "learning_rate": 1.1152586831039835e-06, "loss": 0.0617, "step": 12892 }, { "epoch": 2.088950097213221, "grad_norm": 0.8877750039100647, "learning_rate": 1.1148946112320533e-06, "loss": 0.06, "step": 12893 }, { "epoch": 2.0891121192482176, "grad_norm": 0.9256067872047424, "learning_rate": 1.1145305817409962e-06, "loss": 0.0679, "step": 12894 }, { "epoch": 2.0892741412832145, "grad_norm": 0.7883033752441406, "learning_rate": 1.1141665946419506e-06, "loss": 0.0532, "step": 12895 }, { "epoch": 2.0894361633182115, "grad_norm": 0.9102799892425537, "learning_rate": 1.1138026499460532e-06, "loss": 0.0666, "step": 12896 }, { "epoch": 2.089598185353208, "grad_norm": 0.9670751690864563, "learning_rate": 1.1134387476644407e-06, "loss": 0.0625, "step": 12897 }, { "epoch": 2.089760207388205, "grad_norm": 0.8531269431114197, "learning_rate": 1.1130748878082467e-06, "loss": 0.0646, "step": 12898 }, { "epoch": 2.0899222294232014, "grad_norm": 0.749323844909668, "learning_rate": 1.1127110703886048e-06, "loss": 0.0549, "step": 12899 }, { "epoch": 2.0900842514581983, "grad_norm": 0.8951242566108704, "learning_rate": 1.1123472954166473e-06, "loss": 0.0562, "step": 12900 }, { "epoch": 2.0902462734931953, "grad_norm": 0.8564949035644531, "learning_rate": 1.1119835629035042e-06, "loss": 0.0605, "step": 12901 }, { "epoch": 2.0904082955281917, "grad_norm": 0.7760194540023804, "learning_rate": 1.1116198728603061e-06, "loss": 0.06, "step": 12902 }, { "epoch": 2.0905703175631887, "grad_norm": 0.9799525141716003, "learning_rate": 1.1112562252981793e-06, "loss": 0.0622, "step": 12903 }, { "epoch": 2.090732339598185, "grad_norm": 0.8512018918991089, "learning_rate": 1.1108926202282505e-06, "loss": 0.0646, "step": 12904 }, { "epoch": 2.090894361633182, "grad_norm": 0.9717202186584473, "learning_rate": 1.1105290576616467e-06, "loss": 0.0642, "step": 12905 }, { "epoch": 2.091056383668179, "grad_norm": 0.8135266304016113, "learning_rate": 1.110165537609492e-06, "loss": 0.0635, "step": 12906 }, { "epoch": 2.0912184057031755, "grad_norm": 0.8754247426986694, "learning_rate": 1.109802060082908e-06, "loss": 0.0609, "step": 12907 }, { "epoch": 2.0913804277381725, "grad_norm": 0.9074878692626953, "learning_rate": 1.1094386250930164e-06, "loss": 0.0601, "step": 12908 }, { "epoch": 2.091542449773169, "grad_norm": 0.8210632801055908, "learning_rate": 1.1090752326509379e-06, "loss": 0.059, "step": 12909 }, { "epoch": 2.091704471808166, "grad_norm": 0.9320157766342163, "learning_rate": 1.1087118827677915e-06, "loss": 0.0734, "step": 12910 }, { "epoch": 2.091866493843163, "grad_norm": 0.8074702024459839, "learning_rate": 1.1083485754546944e-06, "loss": 0.0599, "step": 12911 }, { "epoch": 2.0920285158781593, "grad_norm": 0.81490558385849, "learning_rate": 1.1079853107227634e-06, "loss": 0.0622, "step": 12912 }, { "epoch": 2.0921905379131562, "grad_norm": 0.9869770407676697, "learning_rate": 1.107622088583113e-06, "loss": 0.0634, "step": 12913 }, { "epoch": 2.0923525599481527, "grad_norm": 1.0530625581741333, "learning_rate": 1.1072589090468571e-06, "loss": 0.067, "step": 12914 }, { "epoch": 2.0925145819831497, "grad_norm": 0.9498258233070374, "learning_rate": 1.1068957721251085e-06, "loss": 0.0615, "step": 12915 }, { "epoch": 2.0926766040181466, "grad_norm": 0.9322092533111572, "learning_rate": 1.1065326778289782e-06, "loss": 0.0678, "step": 12916 }, { "epoch": 2.092838626053143, "grad_norm": 1.2187130451202393, "learning_rate": 1.1061696261695765e-06, "loss": 0.0707, "step": 12917 }, { "epoch": 2.09300064808814, "grad_norm": 0.8521288633346558, "learning_rate": 1.1058066171580092e-06, "loss": 0.0667, "step": 12918 }, { "epoch": 2.093162670123137, "grad_norm": 0.9618637561798096, "learning_rate": 1.1054436508053866e-06, "loss": 0.0642, "step": 12919 }, { "epoch": 2.0933246921581334, "grad_norm": 0.7557185888290405, "learning_rate": 1.1050807271228146e-06, "loss": 0.0558, "step": 12920 }, { "epoch": 2.0934867141931304, "grad_norm": 0.9032410383224487, "learning_rate": 1.1047178461213956e-06, "loss": 0.0654, "step": 12921 }, { "epoch": 2.093648736228127, "grad_norm": 0.813040018081665, "learning_rate": 1.1043550078122342e-06, "loss": 0.0575, "step": 12922 }, { "epoch": 2.093810758263124, "grad_norm": 0.8633285164833069, "learning_rate": 1.103992212206431e-06, "loss": 0.061, "step": 12923 }, { "epoch": 2.0939727802981207, "grad_norm": 0.9251546263694763, "learning_rate": 1.1036294593150898e-06, "loss": 0.0627, "step": 12924 }, { "epoch": 2.094134802333117, "grad_norm": 0.7767398953437805, "learning_rate": 1.103266749149307e-06, "loss": 0.0583, "step": 12925 }, { "epoch": 2.094296824368114, "grad_norm": 0.823419988155365, "learning_rate": 1.1029040817201819e-06, "loss": 0.0631, "step": 12926 }, { "epoch": 2.0944588464031106, "grad_norm": 0.9466148018836975, "learning_rate": 1.1025414570388108e-06, "loss": 0.0655, "step": 12927 }, { "epoch": 2.0946208684381076, "grad_norm": 0.8676579594612122, "learning_rate": 1.1021788751162893e-06, "loss": 0.0575, "step": 12928 }, { "epoch": 2.0947828904731045, "grad_norm": 0.8906098008155823, "learning_rate": 1.1018163359637116e-06, "loss": 0.0659, "step": 12929 }, { "epoch": 2.094944912508101, "grad_norm": 0.9030002951622009, "learning_rate": 1.1014538395921704e-06, "loss": 0.0685, "step": 12930 }, { "epoch": 2.095106934543098, "grad_norm": 0.8031485676765442, "learning_rate": 1.1010913860127572e-06, "loss": 0.0627, "step": 12931 }, { "epoch": 2.0952689565780944, "grad_norm": 0.8047374486923218, "learning_rate": 1.1007289752365635e-06, "loss": 0.0581, "step": 12932 }, { "epoch": 2.0954309786130914, "grad_norm": 0.9250509738922119, "learning_rate": 1.100366607274675e-06, "loss": 0.0613, "step": 12933 }, { "epoch": 2.0955930006480883, "grad_norm": 0.7186943292617798, "learning_rate": 1.1000042821381823e-06, "loss": 0.0544, "step": 12934 }, { "epoch": 2.095755022683085, "grad_norm": 0.7666640281677246, "learning_rate": 1.0996419998381713e-06, "loss": 0.0594, "step": 12935 }, { "epoch": 2.0959170447180817, "grad_norm": 0.913008987903595, "learning_rate": 1.0992797603857257e-06, "loss": 0.0621, "step": 12936 }, { "epoch": 2.096079066753078, "grad_norm": 0.9413930177688599, "learning_rate": 1.0989175637919297e-06, "loss": 0.067, "step": 12937 }, { "epoch": 2.096241088788075, "grad_norm": 0.8046421408653259, "learning_rate": 1.0985554100678647e-06, "loss": 0.057, "step": 12938 }, { "epoch": 2.096403110823072, "grad_norm": 0.99111407995224, "learning_rate": 1.0981932992246144e-06, "loss": 0.0691, "step": 12939 }, { "epoch": 2.0965651328580686, "grad_norm": 0.8477975130081177, "learning_rate": 1.0978312312732562e-06, "loss": 0.0584, "step": 12940 }, { "epoch": 2.0967271548930655, "grad_norm": 0.8106995820999146, "learning_rate": 1.097469206224869e-06, "loss": 0.0588, "step": 12941 }, { "epoch": 2.0968891769280624, "grad_norm": 0.8158612847328186, "learning_rate": 1.09710722409053e-06, "loss": 0.0622, "step": 12942 }, { "epoch": 2.097051198963059, "grad_norm": 0.8949916362762451, "learning_rate": 1.096745284881315e-06, "loss": 0.0689, "step": 12943 }, { "epoch": 2.097213220998056, "grad_norm": 0.8782987594604492, "learning_rate": 1.0963833886082987e-06, "loss": 0.0649, "step": 12944 }, { "epoch": 2.0973752430330523, "grad_norm": 0.9232782125473022, "learning_rate": 1.0960215352825537e-06, "loss": 0.0711, "step": 12945 }, { "epoch": 2.0975372650680493, "grad_norm": 0.8627330660820007, "learning_rate": 1.0956597249151532e-06, "loss": 0.0611, "step": 12946 }, { "epoch": 2.097699287103046, "grad_norm": 0.779122531414032, "learning_rate": 1.0952979575171649e-06, "loss": 0.0556, "step": 12947 }, { "epoch": 2.0978613091380427, "grad_norm": 1.0145422220230103, "learning_rate": 1.0949362330996605e-06, "loss": 0.0594, "step": 12948 }, { "epoch": 2.0980233311730396, "grad_norm": 0.8492004871368408, "learning_rate": 1.0945745516737075e-06, "loss": 0.0625, "step": 12949 }, { "epoch": 2.098185353208036, "grad_norm": 0.8178960680961609, "learning_rate": 1.094212913250373e-06, "loss": 0.053, "step": 12950 }, { "epoch": 2.098347375243033, "grad_norm": 0.9389135837554932, "learning_rate": 1.0938513178407201e-06, "loss": 0.0631, "step": 12951 }, { "epoch": 2.09850939727803, "grad_norm": 0.9091375470161438, "learning_rate": 1.0934897654558134e-06, "loss": 0.0636, "step": 12952 }, { "epoch": 2.0986714193130265, "grad_norm": 0.8559199571609497, "learning_rate": 1.0931282561067183e-06, "loss": 0.0569, "step": 12953 }, { "epoch": 2.0988334413480234, "grad_norm": 0.8254791498184204, "learning_rate": 1.0927667898044927e-06, "loss": 0.0629, "step": 12954 }, { "epoch": 2.09899546338302, "grad_norm": 0.7848954796791077, "learning_rate": 1.092405366560198e-06, "loss": 0.0606, "step": 12955 }, { "epoch": 2.099157485418017, "grad_norm": 0.9098137021064758, "learning_rate": 1.092043986384893e-06, "loss": 0.0613, "step": 12956 }, { "epoch": 2.0993195074530138, "grad_norm": 0.7770287990570068, "learning_rate": 1.0916826492896346e-06, "loss": 0.0554, "step": 12957 }, { "epoch": 2.0994815294880103, "grad_norm": 0.8795650005340576, "learning_rate": 1.091321355285479e-06, "loss": 0.0574, "step": 12958 }, { "epoch": 2.099643551523007, "grad_norm": 1.0702177286148071, "learning_rate": 1.0909601043834812e-06, "loss": 0.0708, "step": 12959 }, { "epoch": 2.0998055735580037, "grad_norm": 0.8428245782852173, "learning_rate": 1.0905988965946942e-06, "loss": 0.0629, "step": 12960 }, { "epoch": 2.0999675955930006, "grad_norm": 0.8084932565689087, "learning_rate": 1.0902377319301704e-06, "loss": 0.0526, "step": 12961 }, { "epoch": 2.1001296176279975, "grad_norm": 0.9167864918708801, "learning_rate": 1.0898766104009606e-06, "loss": 0.0617, "step": 12962 }, { "epoch": 2.100291639662994, "grad_norm": 0.9969832301139832, "learning_rate": 1.089515532018114e-06, "loss": 0.0737, "step": 12963 }, { "epoch": 2.100453661697991, "grad_norm": 1.0059758424758911, "learning_rate": 1.0891544967926795e-06, "loss": 0.068, "step": 12964 }, { "epoch": 2.100615683732988, "grad_norm": 0.7887366414070129, "learning_rate": 1.0887935047357023e-06, "loss": 0.0574, "step": 12965 }, { "epoch": 2.1007777057679844, "grad_norm": 0.8655834794044495, "learning_rate": 1.0884325558582283e-06, "loss": 0.0611, "step": 12966 }, { "epoch": 2.1009397278029813, "grad_norm": 0.8879144191741943, "learning_rate": 1.0880716501713025e-06, "loss": 0.0605, "step": 12967 }, { "epoch": 2.101101749837978, "grad_norm": 0.7682301998138428, "learning_rate": 1.0877107876859688e-06, "loss": 0.0619, "step": 12968 }, { "epoch": 2.1012637718729748, "grad_norm": 1.0318995714187622, "learning_rate": 1.0873499684132663e-06, "loss": 0.0671, "step": 12969 }, { "epoch": 2.1014257939079717, "grad_norm": 0.8464838862419128, "learning_rate": 1.086989192364236e-06, "loss": 0.0651, "step": 12970 }, { "epoch": 2.101587815942968, "grad_norm": 1.0440428256988525, "learning_rate": 1.0866284595499172e-06, "loss": 0.068, "step": 12971 }, { "epoch": 2.101749837977965, "grad_norm": 0.8565576076507568, "learning_rate": 1.0862677699813471e-06, "loss": 0.0539, "step": 12972 }, { "epoch": 2.1019118600129616, "grad_norm": 0.86493980884552, "learning_rate": 1.0859071236695623e-06, "loss": 0.0623, "step": 12973 }, { "epoch": 2.1020738820479585, "grad_norm": 0.8555430173873901, "learning_rate": 1.0855465206255972e-06, "loss": 0.0614, "step": 12974 }, { "epoch": 2.1022359040829555, "grad_norm": 1.089228630065918, "learning_rate": 1.0851859608604858e-06, "loss": 0.0658, "step": 12975 }, { "epoch": 2.102397926117952, "grad_norm": 0.8358057141304016, "learning_rate": 1.0848254443852602e-06, "loss": 0.0618, "step": 12976 }, { "epoch": 2.102559948152949, "grad_norm": 0.9118403792381287, "learning_rate": 1.0844649712109515e-06, "loss": 0.0661, "step": 12977 }, { "epoch": 2.1027219701879454, "grad_norm": 0.7774009704589844, "learning_rate": 1.084104541348589e-06, "loss": 0.06, "step": 12978 }, { "epoch": 2.1028839922229423, "grad_norm": 0.921910285949707, "learning_rate": 1.083744154809202e-06, "loss": 0.0647, "step": 12979 }, { "epoch": 2.1030460142579392, "grad_norm": 0.8106131553649902, "learning_rate": 1.0833838116038156e-06, "loss": 0.0586, "step": 12980 }, { "epoch": 2.1032080362929357, "grad_norm": 0.8339465856552124, "learning_rate": 1.0830235117434557e-06, "loss": 0.062, "step": 12981 }, { "epoch": 2.1033700583279327, "grad_norm": 0.9177747368812561, "learning_rate": 1.0826632552391484e-06, "loss": 0.0655, "step": 12982 }, { "epoch": 2.103532080362929, "grad_norm": 0.8722216486930847, "learning_rate": 1.0823030421019163e-06, "loss": 0.062, "step": 12983 }, { "epoch": 2.103694102397926, "grad_norm": 0.8818717002868652, "learning_rate": 1.081942872342779e-06, "loss": 0.0585, "step": 12984 }, { "epoch": 2.103856124432923, "grad_norm": 0.8914388418197632, "learning_rate": 1.081582745972758e-06, "loss": 0.066, "step": 12985 }, { "epoch": 2.1040181464679195, "grad_norm": 0.9045519828796387, "learning_rate": 1.0812226630028738e-06, "loss": 0.0622, "step": 12986 }, { "epoch": 2.1041801685029164, "grad_norm": 1.0446083545684814, "learning_rate": 1.080862623444142e-06, "loss": 0.0668, "step": 12987 }, { "epoch": 2.1043421905379134, "grad_norm": 0.7879253029823303, "learning_rate": 1.0805026273075797e-06, "loss": 0.0604, "step": 12988 }, { "epoch": 2.10450421257291, "grad_norm": 0.8485068082809448, "learning_rate": 1.0801426746042018e-06, "loss": 0.0651, "step": 12989 }, { "epoch": 2.104666234607907, "grad_norm": 0.8506676554679871, "learning_rate": 1.0797827653450222e-06, "loss": 0.0647, "step": 12990 }, { "epoch": 2.1048282566429033, "grad_norm": 0.8726806640625, "learning_rate": 1.079422899541053e-06, "loss": 0.0626, "step": 12991 }, { "epoch": 2.1049902786779002, "grad_norm": 0.9030711650848389, "learning_rate": 1.0790630772033057e-06, "loss": 0.0698, "step": 12992 }, { "epoch": 2.105152300712897, "grad_norm": 0.8381808400154114, "learning_rate": 1.0787032983427892e-06, "loss": 0.0691, "step": 12993 }, { "epoch": 2.1053143227478937, "grad_norm": 0.9773826003074646, "learning_rate": 1.0783435629705134e-06, "loss": 0.0644, "step": 12994 }, { "epoch": 2.1054763447828906, "grad_norm": 0.8287825584411621, "learning_rate": 1.0779838710974822e-06, "loss": 0.0559, "step": 12995 }, { "epoch": 2.105638366817887, "grad_norm": 0.862917423248291, "learning_rate": 1.0776242227347044e-06, "loss": 0.0587, "step": 12996 }, { "epoch": 2.105800388852884, "grad_norm": 0.8506877422332764, "learning_rate": 1.0772646178931843e-06, "loss": 0.0565, "step": 12997 }, { "epoch": 2.105962410887881, "grad_norm": 0.8896209001541138, "learning_rate": 1.0769050565839228e-06, "loss": 0.0658, "step": 12998 }, { "epoch": 2.1061244329228774, "grad_norm": 0.9213668704032898, "learning_rate": 1.076545538817923e-06, "loss": 0.0604, "step": 12999 }, { "epoch": 2.1062864549578744, "grad_norm": 0.9148227572441101, "learning_rate": 1.0761860646061838e-06, "loss": 0.0712, "step": 13000 }, { "epoch": 2.106448476992871, "grad_norm": 0.8725471496582031, "learning_rate": 1.0758266339597077e-06, "loss": 0.0651, "step": 13001 }, { "epoch": 2.106610499027868, "grad_norm": 0.9468567967414856, "learning_rate": 1.0754672468894889e-06, "loss": 0.0638, "step": 13002 }, { "epoch": 2.1067725210628647, "grad_norm": 0.9501492977142334, "learning_rate": 1.075107903406525e-06, "loss": 0.0691, "step": 13003 }, { "epoch": 2.106934543097861, "grad_norm": 0.8555724024772644, "learning_rate": 1.0747486035218116e-06, "loss": 0.0641, "step": 13004 }, { "epoch": 2.107096565132858, "grad_norm": 1.0084507465362549, "learning_rate": 1.0743893472463416e-06, "loss": 0.0585, "step": 13005 }, { "epoch": 2.1072585871678546, "grad_norm": 0.8593863844871521, "learning_rate": 1.0740301345911075e-06, "loss": 0.0604, "step": 13006 }, { "epoch": 2.1074206092028516, "grad_norm": 0.9241552948951721, "learning_rate": 1.0736709655671004e-06, "loss": 0.0619, "step": 13007 }, { "epoch": 2.1075826312378485, "grad_norm": 0.9510383009910583, "learning_rate": 1.0733118401853112e-06, "loss": 0.0679, "step": 13008 }, { "epoch": 2.107744653272845, "grad_norm": 0.8677833676338196, "learning_rate": 1.0729527584567255e-06, "loss": 0.0654, "step": 13009 }, { "epoch": 2.107906675307842, "grad_norm": 0.8571850061416626, "learning_rate": 1.0725937203923327e-06, "loss": 0.0565, "step": 13010 }, { "epoch": 2.1080686973428384, "grad_norm": 0.7107164263725281, "learning_rate": 1.072234726003118e-06, "loss": 0.053, "step": 13011 }, { "epoch": 2.1082307193778353, "grad_norm": 0.9328707456588745, "learning_rate": 1.0718757753000665e-06, "loss": 0.0591, "step": 13012 }, { "epoch": 2.1083927414128323, "grad_norm": 0.8621041178703308, "learning_rate": 1.0715168682941591e-06, "loss": 0.0615, "step": 13013 }, { "epoch": 2.1085547634478288, "grad_norm": 0.8346017599105835, "learning_rate": 1.071158004996378e-06, "loss": 0.063, "step": 13014 }, { "epoch": 2.1087167854828257, "grad_norm": 0.8426311016082764, "learning_rate": 1.0707991854177057e-06, "loss": 0.06, "step": 13015 }, { "epoch": 2.108878807517822, "grad_norm": 0.9094135761260986, "learning_rate": 1.070440409569119e-06, "loss": 0.0647, "step": 13016 }, { "epoch": 2.109040829552819, "grad_norm": 1.048732042312622, "learning_rate": 1.0700816774615964e-06, "loss": 0.0622, "step": 13017 }, { "epoch": 2.109202851587816, "grad_norm": 0.9794455170631409, "learning_rate": 1.0697229891061141e-06, "loss": 0.0608, "step": 13018 }, { "epoch": 2.1093648736228126, "grad_norm": 0.8779764771461487, "learning_rate": 1.069364344513647e-06, "loss": 0.0625, "step": 13019 }, { "epoch": 2.1095268956578095, "grad_norm": 0.8880550861358643, "learning_rate": 1.0690057436951689e-06, "loss": 0.0581, "step": 13020 }, { "epoch": 2.1096889176928064, "grad_norm": 0.8012394309043884, "learning_rate": 1.0686471866616525e-06, "loss": 0.0571, "step": 13021 }, { "epoch": 2.109850939727803, "grad_norm": 0.9028552770614624, "learning_rate": 1.068288673424068e-06, "loss": 0.0609, "step": 13022 }, { "epoch": 2.1100129617628, "grad_norm": 0.8535613417625427, "learning_rate": 1.0679302039933864e-06, "loss": 0.0554, "step": 13023 }, { "epoch": 2.1101749837977963, "grad_norm": 0.8417321443557739, "learning_rate": 1.067571778380573e-06, "loss": 0.0549, "step": 13024 }, { "epoch": 2.1103370058327933, "grad_norm": 0.7988325357437134, "learning_rate": 1.0672133965965981e-06, "loss": 0.0567, "step": 13025 }, { "epoch": 2.11049902786779, "grad_norm": 1.0051651000976562, "learning_rate": 1.0668550586524256e-06, "loss": 0.0633, "step": 13026 }, { "epoch": 2.1106610499027867, "grad_norm": 0.9682597517967224, "learning_rate": 1.0664967645590212e-06, "loss": 0.0544, "step": 13027 }, { "epoch": 2.1108230719377836, "grad_norm": 0.841816782951355, "learning_rate": 1.066138514327345e-06, "loss": 0.0603, "step": 13028 }, { "epoch": 2.11098509397278, "grad_norm": 0.9751487970352173, "learning_rate": 1.0657803079683616e-06, "loss": 0.0612, "step": 13029 }, { "epoch": 2.111147116007777, "grad_norm": 0.8450110554695129, "learning_rate": 1.0654221454930305e-06, "loss": 0.0551, "step": 13030 }, { "epoch": 2.111309138042774, "grad_norm": 0.9710997343063354, "learning_rate": 1.0650640269123095e-06, "loss": 0.0593, "step": 13031 }, { "epoch": 2.1114711600777705, "grad_norm": 0.8456966280937195, "learning_rate": 1.0647059522371565e-06, "loss": 0.0608, "step": 13032 }, { "epoch": 2.1116331821127674, "grad_norm": 1.07785964012146, "learning_rate": 1.0643479214785283e-06, "loss": 0.0738, "step": 13033 }, { "epoch": 2.111795204147764, "grad_norm": 1.0498229265213013, "learning_rate": 1.0639899346473792e-06, "loss": 0.0668, "step": 13034 }, { "epoch": 2.111957226182761, "grad_norm": 0.8354172706604004, "learning_rate": 1.0636319917546631e-06, "loss": 0.0569, "step": 13035 }, { "epoch": 2.1121192482177578, "grad_norm": 0.8369887471199036, "learning_rate": 1.0632740928113323e-06, "loss": 0.0584, "step": 13036 }, { "epoch": 2.1122812702527543, "grad_norm": 0.9104105830192566, "learning_rate": 1.0629162378283372e-06, "loss": 0.0655, "step": 13037 }, { "epoch": 2.112443292287751, "grad_norm": 0.890618622303009, "learning_rate": 1.0625584268166278e-06, "loss": 0.0622, "step": 13038 }, { "epoch": 2.1126053143227477, "grad_norm": 0.9149554967880249, "learning_rate": 1.062200659787152e-06, "loss": 0.067, "step": 13039 }, { "epoch": 2.1127673363577446, "grad_norm": 0.8385174870491028, "learning_rate": 1.0618429367508564e-06, "loss": 0.0558, "step": 13040 }, { "epoch": 2.1129293583927415, "grad_norm": 0.7837886214256287, "learning_rate": 1.0614852577186877e-06, "loss": 0.0578, "step": 13041 }, { "epoch": 2.113091380427738, "grad_norm": 0.9414006471633911, "learning_rate": 1.061127622701588e-06, "loss": 0.0681, "step": 13042 }, { "epoch": 2.113253402462735, "grad_norm": 0.8170459270477295, "learning_rate": 1.0607700317105002e-06, "loss": 0.0576, "step": 13043 }, { "epoch": 2.113415424497732, "grad_norm": 0.7887612581253052, "learning_rate": 1.0604124847563674e-06, "loss": 0.0565, "step": 13044 }, { "epoch": 2.1135774465327284, "grad_norm": 0.9711698293685913, "learning_rate": 1.0600549818501298e-06, "loss": 0.0656, "step": 13045 }, { "epoch": 2.1137394685677253, "grad_norm": 1.077919363975525, "learning_rate": 1.0596975230027243e-06, "loss": 0.0674, "step": 13046 }, { "epoch": 2.113901490602722, "grad_norm": 0.9038639068603516, "learning_rate": 1.059340108225089e-06, "loss": 0.0589, "step": 13047 }, { "epoch": 2.1140635126377187, "grad_norm": 0.9091435074806213, "learning_rate": 1.05898273752816e-06, "loss": 0.068, "step": 13048 }, { "epoch": 2.1142255346727157, "grad_norm": 0.8895291686058044, "learning_rate": 1.0586254109228722e-06, "loss": 0.0593, "step": 13049 }, { "epoch": 2.114387556707712, "grad_norm": 0.8643745183944702, "learning_rate": 1.0582681284201587e-06, "loss": 0.0627, "step": 13050 }, { "epoch": 2.114549578742709, "grad_norm": 0.9382444024085999, "learning_rate": 1.0579108900309518e-06, "loss": 0.0609, "step": 13051 }, { "epoch": 2.1147116007777056, "grad_norm": 0.8059027791023254, "learning_rate": 1.0575536957661814e-06, "loss": 0.061, "step": 13052 }, { "epoch": 2.1148736228127025, "grad_norm": 0.8786547780036926, "learning_rate": 1.0571965456367774e-06, "loss": 0.0579, "step": 13053 }, { "epoch": 2.1150356448476995, "grad_norm": 0.7798833847045898, "learning_rate": 1.056839439653668e-06, "loss": 0.0528, "step": 13054 }, { "epoch": 2.115197666882696, "grad_norm": 0.9558907151222229, "learning_rate": 1.056482377827779e-06, "loss": 0.0664, "step": 13055 }, { "epoch": 2.115359688917693, "grad_norm": 0.9504494667053223, "learning_rate": 1.056125360170037e-06, "loss": 0.0659, "step": 13056 }, { "epoch": 2.1155217109526894, "grad_norm": 1.01918363571167, "learning_rate": 1.055768386691363e-06, "loss": 0.0646, "step": 13057 }, { "epoch": 2.1156837329876863, "grad_norm": 0.8410986065864563, "learning_rate": 1.0554114574026823e-06, "loss": 0.0581, "step": 13058 }, { "epoch": 2.1158457550226832, "grad_norm": 1.0521918535232544, "learning_rate": 1.055054572314916e-06, "loss": 0.0752, "step": 13059 }, { "epoch": 2.1160077770576797, "grad_norm": 0.847416877746582, "learning_rate": 1.0546977314389822e-06, "loss": 0.062, "step": 13060 }, { "epoch": 2.1161697990926767, "grad_norm": 1.0600688457489014, "learning_rate": 1.0543409347858001e-06, "loss": 0.069, "step": 13061 }, { "epoch": 2.116331821127673, "grad_norm": 0.8803973197937012, "learning_rate": 1.0539841823662867e-06, "loss": 0.0616, "step": 13062 }, { "epoch": 2.11649384316267, "grad_norm": 0.8875924348831177, "learning_rate": 1.053627474191359e-06, "loss": 0.0583, "step": 13063 }, { "epoch": 2.116655865197667, "grad_norm": 0.8933229446411133, "learning_rate": 1.0532708102719303e-06, "loss": 0.07, "step": 13064 }, { "epoch": 2.1168178872326635, "grad_norm": 0.9175959825515747, "learning_rate": 1.0529141906189133e-06, "loss": 0.0598, "step": 13065 }, { "epoch": 2.1169799092676604, "grad_norm": 0.8931822180747986, "learning_rate": 1.0525576152432204e-06, "loss": 0.0656, "step": 13066 }, { "epoch": 2.1171419313026574, "grad_norm": 0.9046509265899658, "learning_rate": 1.0522010841557615e-06, "loss": 0.0662, "step": 13067 }, { "epoch": 2.117303953337654, "grad_norm": 1.0218912363052368, "learning_rate": 1.051844597367446e-06, "loss": 0.0644, "step": 13068 }, { "epoch": 2.117465975372651, "grad_norm": 0.8441329598426819, "learning_rate": 1.0514881548891813e-06, "loss": 0.062, "step": 13069 }, { "epoch": 2.1176279974076473, "grad_norm": 0.9138961434364319, "learning_rate": 1.0511317567318737e-06, "loss": 0.0687, "step": 13070 }, { "epoch": 2.1177900194426442, "grad_norm": 0.9639347791671753, "learning_rate": 1.0507754029064293e-06, "loss": 0.0589, "step": 13071 }, { "epoch": 2.117952041477641, "grad_norm": 0.9006848931312561, "learning_rate": 1.0504190934237484e-06, "loss": 0.0641, "step": 13072 }, { "epoch": 2.1181140635126376, "grad_norm": 1.6156253814697266, "learning_rate": 1.0500628282947365e-06, "loss": 0.0539, "step": 13073 }, { "epoch": 2.1182760855476346, "grad_norm": 0.8980904817581177, "learning_rate": 1.0497066075302939e-06, "loss": 0.0691, "step": 13074 }, { "epoch": 2.118438107582631, "grad_norm": 1.003623604774475, "learning_rate": 1.0493504311413185e-06, "loss": 0.0757, "step": 13075 }, { "epoch": 2.118600129617628, "grad_norm": 0.8838923573493958, "learning_rate": 1.0489942991387088e-06, "loss": 0.0564, "step": 13076 }, { "epoch": 2.118762151652625, "grad_norm": 0.8242804408073425, "learning_rate": 1.0486382115333638e-06, "loss": 0.0575, "step": 13077 }, { "epoch": 2.1189241736876214, "grad_norm": 0.7853438258171082, "learning_rate": 1.0482821683361767e-06, "loss": 0.0624, "step": 13078 }, { "epoch": 2.1190861957226184, "grad_norm": 0.8636956810951233, "learning_rate": 1.0479261695580417e-06, "loss": 0.062, "step": 13079 }, { "epoch": 2.119248217757615, "grad_norm": 0.9305495023727417, "learning_rate": 1.0475702152098522e-06, "loss": 0.0606, "step": 13080 }, { "epoch": 2.119410239792612, "grad_norm": 0.8130801320075989, "learning_rate": 1.0472143053024994e-06, "loss": 0.0592, "step": 13081 }, { "epoch": 2.1195722618276087, "grad_norm": 0.9285720586776733, "learning_rate": 1.0468584398468729e-06, "loss": 0.0645, "step": 13082 }, { "epoch": 2.119734283862605, "grad_norm": 0.7963539958000183, "learning_rate": 1.0465026188538618e-06, "loss": 0.0576, "step": 13083 }, { "epoch": 2.119896305897602, "grad_norm": 0.8605098724365234, "learning_rate": 1.0461468423343532e-06, "loss": 0.0569, "step": 13084 }, { "epoch": 2.1200583279325986, "grad_norm": 0.8885794878005981, "learning_rate": 1.0457911102992338e-06, "loss": 0.0636, "step": 13085 }, { "epoch": 2.1202203499675956, "grad_norm": 0.8622367978096008, "learning_rate": 1.0454354227593855e-06, "loss": 0.0614, "step": 13086 }, { "epoch": 2.1203823720025925, "grad_norm": 0.8698747754096985, "learning_rate": 1.0450797797256942e-06, "loss": 0.063, "step": 13087 }, { "epoch": 2.120544394037589, "grad_norm": 0.8544244766235352, "learning_rate": 1.0447241812090408e-06, "loss": 0.0548, "step": 13088 }, { "epoch": 2.120706416072586, "grad_norm": 0.7378759980201721, "learning_rate": 1.0443686272203066e-06, "loss": 0.056, "step": 13089 }, { "epoch": 2.120868438107583, "grad_norm": 0.8694587349891663, "learning_rate": 1.0440131177703692e-06, "loss": 0.068, "step": 13090 }, { "epoch": 2.1210304601425793, "grad_norm": 0.8376104831695557, "learning_rate": 1.0436576528701057e-06, "loss": 0.0592, "step": 13091 }, { "epoch": 2.1211924821775763, "grad_norm": 0.9313507080078125, "learning_rate": 1.0433022325303956e-06, "loss": 0.0627, "step": 13092 }, { "epoch": 2.1213545042125728, "grad_norm": 0.9477971792221069, "learning_rate": 1.0429468567621115e-06, "loss": 0.0663, "step": 13093 }, { "epoch": 2.1215165262475697, "grad_norm": 0.8192235827445984, "learning_rate": 1.042591525576127e-06, "loss": 0.0603, "step": 13094 }, { "epoch": 2.1216785482825666, "grad_norm": 0.8660534620285034, "learning_rate": 1.0422362389833145e-06, "loss": 0.0631, "step": 13095 }, { "epoch": 2.121840570317563, "grad_norm": 0.8360748887062073, "learning_rate": 1.041880996994547e-06, "loss": 0.0584, "step": 13096 }, { "epoch": 2.12200259235256, "grad_norm": 0.8174402713775635, "learning_rate": 1.0415257996206918e-06, "loss": 0.0593, "step": 13097 }, { "epoch": 2.1221646143875565, "grad_norm": 0.9432098269462585, "learning_rate": 1.0411706468726173e-06, "loss": 0.0633, "step": 13098 }, { "epoch": 2.1223266364225535, "grad_norm": 1.0108842849731445, "learning_rate": 1.040815538761191e-06, "loss": 0.0607, "step": 13099 }, { "epoch": 2.1224886584575504, "grad_norm": 0.98541259765625, "learning_rate": 1.040460475297278e-06, "loss": 0.069, "step": 13100 }, { "epoch": 2.122650680492547, "grad_norm": 0.8681195974349976, "learning_rate": 1.0401054564917423e-06, "loss": 0.0654, "step": 13101 }, { "epoch": 2.122812702527544, "grad_norm": 0.9383473992347717, "learning_rate": 1.039750482355447e-06, "loss": 0.0651, "step": 13102 }, { "epoch": 2.1229747245625403, "grad_norm": 1.2092524766921997, "learning_rate": 1.0393955528992535e-06, "loss": 0.0658, "step": 13103 }, { "epoch": 2.1231367465975373, "grad_norm": 0.8957673907279968, "learning_rate": 1.0390406681340212e-06, "loss": 0.0572, "step": 13104 }, { "epoch": 2.123298768632534, "grad_norm": 0.9200911521911621, "learning_rate": 1.038685828070608e-06, "loss": 0.0673, "step": 13105 }, { "epoch": 2.1234607906675307, "grad_norm": 0.8420102000236511, "learning_rate": 1.0383310327198728e-06, "loss": 0.0562, "step": 13106 }, { "epoch": 2.1236228127025276, "grad_norm": 0.9516891837120056, "learning_rate": 1.037976282092672e-06, "loss": 0.0657, "step": 13107 }, { "epoch": 2.123784834737524, "grad_norm": 0.9453445076942444, "learning_rate": 1.0376215761998578e-06, "loss": 0.0617, "step": 13108 }, { "epoch": 2.123946856772521, "grad_norm": 0.8707835674285889, "learning_rate": 1.0372669150522845e-06, "loss": 0.0593, "step": 13109 }, { "epoch": 2.124108878807518, "grad_norm": 0.9308366775512695, "learning_rate": 1.0369122986608044e-06, "loss": 0.0588, "step": 13110 }, { "epoch": 2.1242709008425145, "grad_norm": 0.8552700877189636, "learning_rate": 1.0365577270362668e-06, "loss": 0.0617, "step": 13111 }, { "epoch": 2.1244329228775114, "grad_norm": 0.8702750205993652, "learning_rate": 1.0362032001895214e-06, "loss": 0.0667, "step": 13112 }, { "epoch": 2.124594944912508, "grad_norm": 0.8597316145896912, "learning_rate": 1.0358487181314155e-06, "loss": 0.0642, "step": 13113 }, { "epoch": 2.124756966947505, "grad_norm": 0.7736619114875793, "learning_rate": 1.0354942808727962e-06, "loss": 0.0589, "step": 13114 }, { "epoch": 2.1249189889825018, "grad_norm": 1.05555260181427, "learning_rate": 1.0351398884245076e-06, "loss": 0.0707, "step": 13115 }, { "epoch": 2.1250810110174982, "grad_norm": 0.8743281364440918, "learning_rate": 1.0347855407973933e-06, "loss": 0.0598, "step": 13116 }, { "epoch": 2.125243033052495, "grad_norm": 0.9425612092018127, "learning_rate": 1.0344312380022961e-06, "loss": 0.0646, "step": 13117 }, { "epoch": 2.1254050550874917, "grad_norm": 0.8367472290992737, "learning_rate": 1.034076980050057e-06, "loss": 0.0601, "step": 13118 }, { "epoch": 2.1255670771224886, "grad_norm": 0.9993242621421814, "learning_rate": 1.0337227669515131e-06, "loss": 0.0639, "step": 13119 }, { "epoch": 2.1257290991574855, "grad_norm": 0.7848953008651733, "learning_rate": 1.0333685987175052e-06, "loss": 0.0534, "step": 13120 }, { "epoch": 2.125891121192482, "grad_norm": 0.973812460899353, "learning_rate": 1.0330144753588688e-06, "loss": 0.0685, "step": 13121 }, { "epoch": 2.126053143227479, "grad_norm": 0.8284931182861328, "learning_rate": 1.0326603968864407e-06, "loss": 0.0557, "step": 13122 }, { "epoch": 2.126215165262476, "grad_norm": 0.995773196220398, "learning_rate": 1.0323063633110525e-06, "loss": 0.0593, "step": 13123 }, { "epoch": 2.1263771872974724, "grad_norm": 0.8277838230133057, "learning_rate": 1.0319523746435367e-06, "loss": 0.063, "step": 13124 }, { "epoch": 2.1265392093324693, "grad_norm": 0.8277966380119324, "learning_rate": 1.0315984308947277e-06, "loss": 0.0618, "step": 13125 }, { "epoch": 2.126701231367466, "grad_norm": 1.1131237745285034, "learning_rate": 1.0312445320754522e-06, "loss": 0.0729, "step": 13126 }, { "epoch": 2.1268632534024627, "grad_norm": 0.846409797668457, "learning_rate": 1.0308906781965396e-06, "loss": 0.0642, "step": 13127 }, { "epoch": 2.1270252754374597, "grad_norm": 0.8704565763473511, "learning_rate": 1.0305368692688175e-06, "loss": 0.07, "step": 13128 }, { "epoch": 2.127187297472456, "grad_norm": 0.8594701886177063, "learning_rate": 1.0301831053031109e-06, "loss": 0.0583, "step": 13129 }, { "epoch": 2.127349319507453, "grad_norm": 0.9101399183273315, "learning_rate": 1.0298293863102444e-06, "loss": 0.064, "step": 13130 }, { "epoch": 2.1275113415424496, "grad_norm": 0.896522045135498, "learning_rate": 1.0294757123010406e-06, "loss": 0.0645, "step": 13131 }, { "epoch": 2.1276733635774465, "grad_norm": 0.8539774417877197, "learning_rate": 1.0291220832863219e-06, "loss": 0.0617, "step": 13132 }, { "epoch": 2.1278353856124435, "grad_norm": 0.8655493259429932, "learning_rate": 1.0287684992769086e-06, "loss": 0.065, "step": 13133 }, { "epoch": 2.12799740764744, "grad_norm": 0.8757855296134949, "learning_rate": 1.0284149602836174e-06, "loss": 0.0602, "step": 13134 }, { "epoch": 2.128159429682437, "grad_norm": 0.9795240759849548, "learning_rate": 1.028061466317268e-06, "loss": 0.0667, "step": 13135 }, { "epoch": 2.1283214517174334, "grad_norm": 0.8074472546577454, "learning_rate": 1.0277080173886766e-06, "loss": 0.0633, "step": 13136 }, { "epoch": 2.1284834737524303, "grad_norm": 0.8354179263114929, "learning_rate": 1.0273546135086559e-06, "loss": 0.0551, "step": 13137 }, { "epoch": 2.1286454957874272, "grad_norm": 0.8012445569038391, "learning_rate": 1.0270012546880207e-06, "loss": 0.0567, "step": 13138 }, { "epoch": 2.1288075178224237, "grad_norm": 0.8529260158538818, "learning_rate": 1.0266479409375813e-06, "loss": 0.0646, "step": 13139 }, { "epoch": 2.1289695398574207, "grad_norm": 0.8222674131393433, "learning_rate": 1.0262946722681513e-06, "loss": 0.0624, "step": 13140 }, { "epoch": 2.129131561892417, "grad_norm": 0.757540762424469, "learning_rate": 1.0259414486905373e-06, "loss": 0.0591, "step": 13141 }, { "epoch": 2.129293583927414, "grad_norm": 0.9325169920921326, "learning_rate": 1.0255882702155476e-06, "loss": 0.0637, "step": 13142 }, { "epoch": 2.129455605962411, "grad_norm": 0.7963911294937134, "learning_rate": 1.025235136853989e-06, "loss": 0.0561, "step": 13143 }, { "epoch": 2.1296176279974075, "grad_norm": 0.8913578391075134, "learning_rate": 1.024882048616666e-06, "loss": 0.058, "step": 13144 }, { "epoch": 2.1297796500324044, "grad_norm": 0.9068898558616638, "learning_rate": 1.024529005514383e-06, "loss": 0.0654, "step": 13145 }, { "epoch": 2.1299416720674014, "grad_norm": 0.9450251460075378, "learning_rate": 1.0241760075579418e-06, "loss": 0.0663, "step": 13146 }, { "epoch": 2.130103694102398, "grad_norm": 0.8904002904891968, "learning_rate": 1.023823054758144e-06, "loss": 0.0605, "step": 13147 }, { "epoch": 2.130265716137395, "grad_norm": 0.7318344712257385, "learning_rate": 1.0234701471257868e-06, "loss": 0.0577, "step": 13148 }, { "epoch": 2.1304277381723913, "grad_norm": 0.8698272705078125, "learning_rate": 1.023117284671671e-06, "loss": 0.062, "step": 13149 }, { "epoch": 2.130589760207388, "grad_norm": 0.868567168712616, "learning_rate": 1.0227644674065923e-06, "loss": 0.0588, "step": 13150 }, { "epoch": 2.130751782242385, "grad_norm": 0.8194864988327026, "learning_rate": 1.0224116953413468e-06, "loss": 0.0544, "step": 13151 }, { "epoch": 2.1309138042773816, "grad_norm": 0.8874611258506775, "learning_rate": 1.0220589684867269e-06, "loss": 0.0614, "step": 13152 }, { "epoch": 2.1310758263123786, "grad_norm": 0.9767943620681763, "learning_rate": 1.0217062868535249e-06, "loss": 0.0582, "step": 13153 }, { "epoch": 2.131237848347375, "grad_norm": 0.8773373365402222, "learning_rate": 1.021353650452535e-06, "loss": 0.0601, "step": 13154 }, { "epoch": 2.131399870382372, "grad_norm": 0.8418240547180176, "learning_rate": 1.0210010592945442e-06, "loss": 0.0585, "step": 13155 }, { "epoch": 2.131561892417369, "grad_norm": 0.8298531174659729, "learning_rate": 1.0206485133903424e-06, "loss": 0.0618, "step": 13156 }, { "epoch": 2.1317239144523654, "grad_norm": 0.81301349401474, "learning_rate": 1.0202960127507155e-06, "loss": 0.0536, "step": 13157 }, { "epoch": 2.1318859364873624, "grad_norm": 0.8824573755264282, "learning_rate": 1.0199435573864502e-06, "loss": 0.0617, "step": 13158 }, { "epoch": 2.132047958522359, "grad_norm": 0.9553408622741699, "learning_rate": 1.01959114730833e-06, "loss": 0.0646, "step": 13159 }, { "epoch": 2.1322099805573558, "grad_norm": 0.8561825156211853, "learning_rate": 1.0192387825271384e-06, "loss": 0.0622, "step": 13160 }, { "epoch": 2.1323720025923527, "grad_norm": 0.9502358436584473, "learning_rate": 1.0188864630536568e-06, "loss": 0.0666, "step": 13161 }, { "epoch": 2.132534024627349, "grad_norm": 0.973298966884613, "learning_rate": 1.018534188898665e-06, "loss": 0.0671, "step": 13162 }, { "epoch": 2.132696046662346, "grad_norm": 0.9729242920875549, "learning_rate": 1.018181960072942e-06, "loss": 0.0594, "step": 13163 }, { "epoch": 2.1328580686973426, "grad_norm": 0.9466162919998169, "learning_rate": 1.0178297765872651e-06, "loss": 0.0709, "step": 13164 }, { "epoch": 2.1330200907323396, "grad_norm": 0.8360689878463745, "learning_rate": 1.0174776384524104e-06, "loss": 0.0593, "step": 13165 }, { "epoch": 2.1331821127673365, "grad_norm": 0.8890894055366516, "learning_rate": 1.0171255456791531e-06, "loss": 0.0664, "step": 13166 }, { "epoch": 2.133344134802333, "grad_norm": 0.9430520534515381, "learning_rate": 1.0167734982782636e-06, "loss": 0.0605, "step": 13167 }, { "epoch": 2.13350615683733, "grad_norm": 1.001865029335022, "learning_rate": 1.016421496260517e-06, "loss": 0.0626, "step": 13168 }, { "epoch": 2.133668178872327, "grad_norm": 1.0136559009552002, "learning_rate": 1.0160695396366832e-06, "loss": 0.0638, "step": 13169 }, { "epoch": 2.1338302009073233, "grad_norm": 1.062779188156128, "learning_rate": 1.0157176284175293e-06, "loss": 0.0654, "step": 13170 }, { "epoch": 2.1339922229423203, "grad_norm": 0.7638817429542542, "learning_rate": 1.0153657626138244e-06, "loss": 0.0505, "step": 13171 }, { "epoch": 2.1341542449773168, "grad_norm": 0.8198952078819275, "learning_rate": 1.0150139422363342e-06, "loss": 0.0552, "step": 13172 }, { "epoch": 2.1343162670123137, "grad_norm": 0.9854885935783386, "learning_rate": 1.0146621672958238e-06, "loss": 0.061, "step": 13173 }, { "epoch": 2.1344782890473106, "grad_norm": 0.9766724705696106, "learning_rate": 1.0143104378030565e-06, "loss": 0.0619, "step": 13174 }, { "epoch": 2.134640311082307, "grad_norm": 1.0882524251937866, "learning_rate": 1.013958753768795e-06, "loss": 0.0698, "step": 13175 }, { "epoch": 2.134802333117304, "grad_norm": 0.8586061596870422, "learning_rate": 1.013607115203799e-06, "loss": 0.0645, "step": 13176 }, { "epoch": 2.1349643551523005, "grad_norm": 0.8522896766662598, "learning_rate": 1.0132555221188285e-06, "loss": 0.0626, "step": 13177 }, { "epoch": 2.1351263771872975, "grad_norm": 0.9053431153297424, "learning_rate": 1.012903974524641e-06, "loss": 0.0658, "step": 13178 }, { "epoch": 2.1352883992222944, "grad_norm": 0.8362330794334412, "learning_rate": 1.0125524724319936e-06, "loss": 0.0619, "step": 13179 }, { "epoch": 2.135450421257291, "grad_norm": 0.8866271376609802, "learning_rate": 1.0122010158516412e-06, "loss": 0.0626, "step": 13180 }, { "epoch": 2.135612443292288, "grad_norm": 0.9750482439994812, "learning_rate": 1.0118496047943368e-06, "loss": 0.0746, "step": 13181 }, { "epoch": 2.1357744653272843, "grad_norm": 0.9252564311027527, "learning_rate": 1.0114982392708325e-06, "loss": 0.0663, "step": 13182 }, { "epoch": 2.1359364873622813, "grad_norm": 1.186126947402954, "learning_rate": 1.0111469192918808e-06, "loss": 0.0598, "step": 13183 }, { "epoch": 2.136098509397278, "grad_norm": 0.952793300151825, "learning_rate": 1.010795644868231e-06, "loss": 0.0554, "step": 13184 }, { "epoch": 2.1362605314322747, "grad_norm": 1.0087121725082397, "learning_rate": 1.01044441601063e-06, "loss": 0.0665, "step": 13185 }, { "epoch": 2.1364225534672716, "grad_norm": 0.8914116024971008, "learning_rate": 1.0100932327298244e-06, "loss": 0.064, "step": 13186 }, { "epoch": 2.136584575502268, "grad_norm": 0.8840557336807251, "learning_rate": 1.009742095036562e-06, "loss": 0.0569, "step": 13187 }, { "epoch": 2.136746597537265, "grad_norm": 0.9341459274291992, "learning_rate": 1.0093910029415843e-06, "loss": 0.0543, "step": 13188 }, { "epoch": 2.136908619572262, "grad_norm": 0.9242621064186096, "learning_rate": 1.0090399564556348e-06, "loss": 0.0626, "step": 13189 }, { "epoch": 2.1370706416072585, "grad_norm": 0.9278979897499084, "learning_rate": 1.0086889555894545e-06, "loss": 0.0592, "step": 13190 }, { "epoch": 2.1372326636422554, "grad_norm": 0.8471470475196838, "learning_rate": 1.0083380003537831e-06, "loss": 0.0644, "step": 13191 }, { "epoch": 2.1373946856772523, "grad_norm": 0.8700275421142578, "learning_rate": 1.0079870907593592e-06, "loss": 0.063, "step": 13192 }, { "epoch": 2.137556707712249, "grad_norm": 0.7276986241340637, "learning_rate": 1.0076362268169199e-06, "loss": 0.0525, "step": 13193 }, { "epoch": 2.1377187297472457, "grad_norm": 1.0328106880187988, "learning_rate": 1.0072854085372005e-06, "loss": 0.0698, "step": 13194 }, { "epoch": 2.1378807517822422, "grad_norm": 0.8020308017730713, "learning_rate": 1.0069346359309362e-06, "loss": 0.0555, "step": 13195 }, { "epoch": 2.138042773817239, "grad_norm": 0.9141387343406677, "learning_rate": 1.0065839090088572e-06, "loss": 0.0637, "step": 13196 }, { "epoch": 2.138204795852236, "grad_norm": 0.8611671328544617, "learning_rate": 1.0062332277816972e-06, "loss": 0.0624, "step": 13197 }, { "epoch": 2.1383668178872326, "grad_norm": 0.8084477186203003, "learning_rate": 1.0058825922601866e-06, "loss": 0.0594, "step": 13198 }, { "epoch": 2.1385288399222295, "grad_norm": 0.9061813950538635, "learning_rate": 1.0055320024550521e-06, "loss": 0.0678, "step": 13199 }, { "epoch": 2.138690861957226, "grad_norm": 0.9596869349479675, "learning_rate": 1.005181458377022e-06, "loss": 0.0662, "step": 13200 }, { "epoch": 2.138852883992223, "grad_norm": 0.8806290626525879, "learning_rate": 1.004830960036821e-06, "loss": 0.0613, "step": 13201 }, { "epoch": 2.13901490602722, "grad_norm": 0.7227916121482849, "learning_rate": 1.0044805074451757e-06, "loss": 0.0509, "step": 13202 }, { "epoch": 2.1391769280622164, "grad_norm": 0.7963190674781799, "learning_rate": 1.0041301006128073e-06, "loss": 0.0637, "step": 13203 }, { "epoch": 2.1393389500972133, "grad_norm": 0.936798632144928, "learning_rate": 1.003779739550438e-06, "loss": 0.0689, "step": 13204 }, { "epoch": 2.13950097213221, "grad_norm": 0.857064962387085, "learning_rate": 1.0034294242687875e-06, "loss": 0.0585, "step": 13205 }, { "epoch": 2.1396629941672067, "grad_norm": 0.9607579112052917, "learning_rate": 1.003079154778575e-06, "loss": 0.0691, "step": 13206 }, { "epoch": 2.1398250162022037, "grad_norm": 0.9430453181266785, "learning_rate": 1.0027289310905181e-06, "loss": 0.059, "step": 13207 }, { "epoch": 2.1399870382372, "grad_norm": 0.8209136724472046, "learning_rate": 1.0023787532153325e-06, "loss": 0.0648, "step": 13208 }, { "epoch": 2.140149060272197, "grad_norm": 0.9626993536949158, "learning_rate": 1.0020286211637328e-06, "loss": 0.0641, "step": 13209 }, { "epoch": 2.1403110823071936, "grad_norm": 0.9732939004898071, "learning_rate": 1.0016785349464326e-06, "loss": 0.0647, "step": 13210 }, { "epoch": 2.1404731043421905, "grad_norm": 0.8415422439575195, "learning_rate": 1.0013284945741431e-06, "loss": 0.0608, "step": 13211 }, { "epoch": 2.1406351263771874, "grad_norm": 0.9403479099273682, "learning_rate": 1.0009785000575747e-06, "loss": 0.0601, "step": 13212 }, { "epoch": 2.140797148412184, "grad_norm": 0.9788222312927246, "learning_rate": 1.000628551407438e-06, "loss": 0.0685, "step": 13213 }, { "epoch": 2.140959170447181, "grad_norm": 0.8836336135864258, "learning_rate": 1.0002786486344379e-06, "loss": 0.0625, "step": 13214 }, { "epoch": 2.141121192482178, "grad_norm": 0.8284887671470642, "learning_rate": 9.999287917492814e-07, "loss": 0.057, "step": 13215 }, { "epoch": 2.1412832145171743, "grad_norm": 0.9652777314186096, "learning_rate": 9.995789807626754e-07, "loss": 0.0552, "step": 13216 }, { "epoch": 2.1414452365521712, "grad_norm": 0.9502488970756531, "learning_rate": 9.992292156853207e-07, "loss": 0.0636, "step": 13217 }, { "epoch": 2.1416072585871677, "grad_norm": 0.9706515073776245, "learning_rate": 9.988794965279203e-07, "loss": 0.0636, "step": 13218 }, { "epoch": 2.1417692806221647, "grad_norm": 0.9208532571792603, "learning_rate": 9.985298233011746e-07, "loss": 0.072, "step": 13219 }, { "epoch": 2.141931302657161, "grad_norm": 0.8984748125076294, "learning_rate": 9.981801960157827e-07, "loss": 0.0574, "step": 13220 }, { "epoch": 2.142093324692158, "grad_norm": 0.8832286596298218, "learning_rate": 9.978306146824427e-07, "loss": 0.0625, "step": 13221 }, { "epoch": 2.142255346727155, "grad_norm": 1.0187299251556396, "learning_rate": 9.974810793118505e-07, "loss": 0.0616, "step": 13222 }, { "epoch": 2.1424173687621515, "grad_norm": 0.8346063494682312, "learning_rate": 9.971315899147012e-07, "loss": 0.0624, "step": 13223 }, { "epoch": 2.1425793907971484, "grad_norm": 1.0231930017471313, "learning_rate": 9.967821465016893e-07, "loss": 0.0675, "step": 13224 }, { "epoch": 2.1427414128321454, "grad_norm": 0.936922550201416, "learning_rate": 9.964327490835045e-07, "loss": 0.0647, "step": 13225 }, { "epoch": 2.142903434867142, "grad_norm": 0.8798637390136719, "learning_rate": 9.960833976708398e-07, "loss": 0.052, "step": 13226 }, { "epoch": 2.143065456902139, "grad_norm": 0.7966178059577942, "learning_rate": 9.957340922743835e-07, "loss": 0.0607, "step": 13227 }, { "epoch": 2.1432274789371353, "grad_norm": 0.7437213659286499, "learning_rate": 9.953848329048248e-07, "loss": 0.0558, "step": 13228 }, { "epoch": 2.143389500972132, "grad_norm": 1.0451050996780396, "learning_rate": 9.950356195728483e-07, "loss": 0.0761, "step": 13229 }, { "epoch": 2.143551523007129, "grad_norm": 0.8481780290603638, "learning_rate": 9.94686452289139e-07, "loss": 0.0546, "step": 13230 }, { "epoch": 2.1437135450421256, "grad_norm": 0.8842169046401978, "learning_rate": 9.943373310643831e-07, "loss": 0.0643, "step": 13231 }, { "epoch": 2.1438755670771226, "grad_norm": 0.9944992065429688, "learning_rate": 9.939882559092604e-07, "loss": 0.0705, "step": 13232 }, { "epoch": 2.144037589112119, "grad_norm": 0.9027183651924133, "learning_rate": 9.936392268344525e-07, "loss": 0.0682, "step": 13233 }, { "epoch": 2.144199611147116, "grad_norm": 0.7881889939308167, "learning_rate": 9.93290243850638e-07, "loss": 0.0604, "step": 13234 }, { "epoch": 2.144361633182113, "grad_norm": 0.8811602592468262, "learning_rate": 9.929413069684979e-07, "loss": 0.0649, "step": 13235 }, { "epoch": 2.1445236552171094, "grad_norm": 0.8543482422828674, "learning_rate": 9.925924161987057e-07, "loss": 0.0558, "step": 13236 }, { "epoch": 2.1446856772521063, "grad_norm": 1.0113856792449951, "learning_rate": 9.922435715519379e-07, "loss": 0.0649, "step": 13237 }, { "epoch": 2.144847699287103, "grad_norm": 0.8757432103157043, "learning_rate": 9.918947730388682e-07, "loss": 0.0651, "step": 13238 }, { "epoch": 2.1450097213220998, "grad_norm": 1.0214018821716309, "learning_rate": 9.915460206701685e-07, "loss": 0.0677, "step": 13239 }, { "epoch": 2.1451717433570967, "grad_norm": 0.9439998865127563, "learning_rate": 9.911973144565105e-07, "loss": 0.066, "step": 13240 }, { "epoch": 2.145333765392093, "grad_norm": 1.0487086772918701, "learning_rate": 9.908486544085632e-07, "loss": 0.0734, "step": 13241 }, { "epoch": 2.14549578742709, "grad_norm": 0.9988869428634644, "learning_rate": 9.90500040536996e-07, "loss": 0.0692, "step": 13242 }, { "epoch": 2.1456578094620866, "grad_norm": 0.87641841173172, "learning_rate": 9.901514728524739e-07, "loss": 0.056, "step": 13243 }, { "epoch": 2.1458198314970836, "grad_norm": 0.9582691192626953, "learning_rate": 9.898029513656618e-07, "loss": 0.0598, "step": 13244 }, { "epoch": 2.1459818535320805, "grad_norm": 1.0479621887207031, "learning_rate": 9.89454476087226e-07, "loss": 0.0682, "step": 13245 }, { "epoch": 2.146143875567077, "grad_norm": 0.893338143825531, "learning_rate": 9.891060470278286e-07, "loss": 0.0585, "step": 13246 }, { "epoch": 2.146305897602074, "grad_norm": 0.880436360836029, "learning_rate": 9.887576641981285e-07, "loss": 0.0668, "step": 13247 }, { "epoch": 2.146467919637071, "grad_norm": 0.9374086856842041, "learning_rate": 9.884093276087871e-07, "loss": 0.0614, "step": 13248 }, { "epoch": 2.1466299416720673, "grad_norm": 0.9596349596977234, "learning_rate": 9.880610372704624e-07, "loss": 0.0688, "step": 13249 }, { "epoch": 2.1467919637070643, "grad_norm": 1.77931809425354, "learning_rate": 9.877127931938111e-07, "loss": 0.0633, "step": 13250 }, { "epoch": 2.1469539857420608, "grad_norm": 0.8775843977928162, "learning_rate": 9.873645953894887e-07, "loss": 0.0577, "step": 13251 }, { "epoch": 2.1471160077770577, "grad_norm": 0.9120129942893982, "learning_rate": 9.87016443868149e-07, "loss": 0.0629, "step": 13252 }, { "epoch": 2.1472780298120546, "grad_norm": 0.838457465171814, "learning_rate": 9.86668338640445e-07, "loss": 0.0577, "step": 13253 }, { "epoch": 2.147440051847051, "grad_norm": 0.864747941493988, "learning_rate": 9.863202797170273e-07, "loss": 0.0634, "step": 13254 }, { "epoch": 2.147602073882048, "grad_norm": 0.7873530387878418, "learning_rate": 9.85972267108546e-07, "loss": 0.0582, "step": 13255 }, { "epoch": 2.1477640959170445, "grad_norm": 0.9956313371658325, "learning_rate": 9.8562430082565e-07, "loss": 0.0662, "step": 13256 }, { "epoch": 2.1479261179520415, "grad_norm": 0.8575721383094788, "learning_rate": 9.852763808789862e-07, "loss": 0.06, "step": 13257 }, { "epoch": 2.1480881399870384, "grad_norm": 0.9134724140167236, "learning_rate": 9.849285072791978e-07, "loss": 0.0657, "step": 13258 }, { "epoch": 2.148250162022035, "grad_norm": 0.8377612829208374, "learning_rate": 9.845806800369316e-07, "loss": 0.0613, "step": 13259 }, { "epoch": 2.148412184057032, "grad_norm": 0.7430257201194763, "learning_rate": 9.8423289916283e-07, "loss": 0.0535, "step": 13260 }, { "epoch": 2.1485742060920283, "grad_norm": 0.9967155456542969, "learning_rate": 9.838851646675329e-07, "loss": 0.0691, "step": 13261 }, { "epoch": 2.1487362281270252, "grad_norm": 0.809754490852356, "learning_rate": 9.835374765616809e-07, "loss": 0.052, "step": 13262 }, { "epoch": 2.148898250162022, "grad_norm": 0.816158652305603, "learning_rate": 9.831898348559115e-07, "loss": 0.0594, "step": 13263 }, { "epoch": 2.1490602721970187, "grad_norm": 0.8675599098205566, "learning_rate": 9.82842239560864e-07, "loss": 0.0603, "step": 13264 }, { "epoch": 2.1492222942320156, "grad_norm": 0.8979247808456421, "learning_rate": 9.824946906871721e-07, "loss": 0.0641, "step": 13265 }, { "epoch": 2.149384316267012, "grad_norm": 0.7833936214447021, "learning_rate": 9.821471882454703e-07, "loss": 0.0568, "step": 13266 }, { "epoch": 2.149546338302009, "grad_norm": 0.8592799305915833, "learning_rate": 9.817997322463912e-07, "loss": 0.0624, "step": 13267 }, { "epoch": 2.149708360337006, "grad_norm": 0.9396870732307434, "learning_rate": 9.814523227005662e-07, "loss": 0.0621, "step": 13268 }, { "epoch": 2.1498703823720025, "grad_norm": 0.876347005367279, "learning_rate": 9.811049596186255e-07, "loss": 0.0709, "step": 13269 }, { "epoch": 2.1500324044069994, "grad_norm": 0.9403771162033081, "learning_rate": 9.807576430111975e-07, "loss": 0.0612, "step": 13270 }, { "epoch": 2.1501944264419963, "grad_norm": 0.8575648665428162, "learning_rate": 9.804103728889089e-07, "loss": 0.0569, "step": 13271 }, { "epoch": 2.150356448476993, "grad_norm": 0.7617552876472473, "learning_rate": 9.800631492623867e-07, "loss": 0.0533, "step": 13272 }, { "epoch": 2.1505184705119897, "grad_norm": 0.7716460824012756, "learning_rate": 9.79715972142252e-07, "loss": 0.0536, "step": 13273 }, { "epoch": 2.1506804925469862, "grad_norm": 0.911914050579071, "learning_rate": 9.793688415391304e-07, "loss": 0.0623, "step": 13274 }, { "epoch": 2.150842514581983, "grad_norm": 0.832737386226654, "learning_rate": 9.790217574636433e-07, "loss": 0.0606, "step": 13275 }, { "epoch": 2.15100453661698, "grad_norm": 1.0580108165740967, "learning_rate": 9.786747199264088e-07, "loss": 0.0632, "step": 13276 }, { "epoch": 2.1511665586519766, "grad_norm": 0.8876791000366211, "learning_rate": 9.783277289380456e-07, "loss": 0.0628, "step": 13277 }, { "epoch": 2.1513285806869735, "grad_norm": 0.9212145209312439, "learning_rate": 9.779807845091722e-07, "loss": 0.0622, "step": 13278 }, { "epoch": 2.15149060272197, "grad_norm": 0.7946116924285889, "learning_rate": 9.776338866504045e-07, "loss": 0.0588, "step": 13279 }, { "epoch": 2.151652624756967, "grad_norm": 0.796489417552948, "learning_rate": 9.77287035372355e-07, "loss": 0.0572, "step": 13280 }, { "epoch": 2.151814646791964, "grad_norm": 0.8203312754631042, "learning_rate": 9.769402306856373e-07, "loss": 0.0578, "step": 13281 }, { "epoch": 2.1519766688269604, "grad_norm": 1.2622346878051758, "learning_rate": 9.76593472600863e-07, "loss": 0.0665, "step": 13282 }, { "epoch": 2.1521386908619573, "grad_norm": 0.9482854604721069, "learning_rate": 9.762467611286416e-07, "loss": 0.0666, "step": 13283 }, { "epoch": 2.152300712896954, "grad_norm": 0.8730493783950806, "learning_rate": 9.75900096279582e-07, "loss": 0.0604, "step": 13284 }, { "epoch": 2.1524627349319507, "grad_norm": 0.9414070248603821, "learning_rate": 9.755534780642911e-07, "loss": 0.0669, "step": 13285 }, { "epoch": 2.1526247569669477, "grad_norm": 0.9391159415245056, "learning_rate": 9.752069064933758e-07, "loss": 0.0634, "step": 13286 }, { "epoch": 2.152786779001944, "grad_norm": 0.8309518098831177, "learning_rate": 9.748603815774371e-07, "loss": 0.0635, "step": 13287 }, { "epoch": 2.152948801036941, "grad_norm": 0.7785058617591858, "learning_rate": 9.745139033270812e-07, "loss": 0.06, "step": 13288 }, { "epoch": 2.1531108230719376, "grad_norm": 0.9786415696144104, "learning_rate": 9.74167471752908e-07, "loss": 0.0626, "step": 13289 }, { "epoch": 2.1532728451069345, "grad_norm": 1.009415626525879, "learning_rate": 9.738210868655187e-07, "loss": 0.0712, "step": 13290 }, { "epoch": 2.1534348671419314, "grad_norm": 0.8282557725906372, "learning_rate": 9.7347474867551e-07, "loss": 0.0582, "step": 13291 }, { "epoch": 2.153596889176928, "grad_norm": 1.0145543813705444, "learning_rate": 9.73128457193479e-07, "loss": 0.0637, "step": 13292 }, { "epoch": 2.153758911211925, "grad_norm": 0.950143039226532, "learning_rate": 9.72782212430024e-07, "loss": 0.0676, "step": 13293 }, { "epoch": 2.153920933246922, "grad_norm": 0.8464543223381042, "learning_rate": 9.724360143957367e-07, "loss": 0.0622, "step": 13294 }, { "epoch": 2.1540829552819183, "grad_norm": 0.9932398200035095, "learning_rate": 9.720898631012106e-07, "loss": 0.0596, "step": 13295 }, { "epoch": 2.154244977316915, "grad_norm": 0.8855057954788208, "learning_rate": 9.717437585570375e-07, "loss": 0.0592, "step": 13296 }, { "epoch": 2.1544069993519117, "grad_norm": 0.9049186110496521, "learning_rate": 9.713977007738068e-07, "loss": 0.0685, "step": 13297 }, { "epoch": 2.1545690213869086, "grad_norm": 1.360369324684143, "learning_rate": 9.710516897621072e-07, "loss": 0.0636, "step": 13298 }, { "epoch": 2.1547310434219056, "grad_norm": 0.8323555588722229, "learning_rate": 9.707057255325262e-07, "loss": 0.0633, "step": 13299 }, { "epoch": 2.154893065456902, "grad_norm": 0.8513553738594055, "learning_rate": 9.703598080956488e-07, "loss": 0.0532, "step": 13300 }, { "epoch": 2.155055087491899, "grad_norm": 0.8135280609130859, "learning_rate": 9.700139374620602e-07, "loss": 0.0648, "step": 13301 }, { "epoch": 2.1552171095268955, "grad_norm": 0.821293830871582, "learning_rate": 9.696681136423422e-07, "loss": 0.0607, "step": 13302 }, { "epoch": 2.1553791315618924, "grad_norm": 0.984480082988739, "learning_rate": 9.693223366470767e-07, "loss": 0.0664, "step": 13303 }, { "epoch": 2.1555411535968894, "grad_norm": 0.8220646381378174, "learning_rate": 9.689766064868434e-07, "loss": 0.0597, "step": 13304 }, { "epoch": 2.155703175631886, "grad_norm": 0.9679084420204163, "learning_rate": 9.686309231722219e-07, "loss": 0.0619, "step": 13305 }, { "epoch": 2.155865197666883, "grad_norm": 1.0595329999923706, "learning_rate": 9.682852867137865e-07, "loss": 0.0668, "step": 13306 }, { "epoch": 2.1560272197018793, "grad_norm": 0.989339292049408, "learning_rate": 9.679396971221155e-07, "loss": 0.0646, "step": 13307 }, { "epoch": 2.156189241736876, "grad_norm": 1.0220208168029785, "learning_rate": 9.675941544077833e-07, "loss": 0.0686, "step": 13308 }, { "epoch": 2.156351263771873, "grad_norm": 0.9169559478759766, "learning_rate": 9.672486585813606e-07, "loss": 0.0612, "step": 13309 }, { "epoch": 2.1565132858068696, "grad_norm": 0.8436880707740784, "learning_rate": 9.6690320965342e-07, "loss": 0.0566, "step": 13310 }, { "epoch": 2.1566753078418666, "grad_norm": 0.835830569267273, "learning_rate": 9.665578076345307e-07, "loss": 0.0622, "step": 13311 }, { "epoch": 2.156837329876863, "grad_norm": 0.8049341440200806, "learning_rate": 9.66212452535262e-07, "loss": 0.0625, "step": 13312 }, { "epoch": 2.15699935191186, "grad_norm": 0.9086779952049255, "learning_rate": 9.658671443661804e-07, "loss": 0.0625, "step": 13313 }, { "epoch": 2.157161373946857, "grad_norm": 0.9593300223350525, "learning_rate": 9.655218831378518e-07, "loss": 0.0657, "step": 13314 }, { "epoch": 2.1573233959818534, "grad_norm": 0.790436863899231, "learning_rate": 9.651766688608402e-07, "loss": 0.0626, "step": 13315 }, { "epoch": 2.1574854180168503, "grad_norm": 0.783841073513031, "learning_rate": 9.648315015457083e-07, "loss": 0.0611, "step": 13316 }, { "epoch": 2.1576474400518473, "grad_norm": 0.861705482006073, "learning_rate": 9.644863812030176e-07, "loss": 0.0595, "step": 13317 }, { "epoch": 2.1578094620868438, "grad_norm": 0.8327277302742004, "learning_rate": 9.641413078433274e-07, "loss": 0.0562, "step": 13318 }, { "epoch": 2.1579714841218407, "grad_norm": 0.9547343850135803, "learning_rate": 9.637962814771976e-07, "loss": 0.0606, "step": 13319 }, { "epoch": 2.158133506156837, "grad_norm": 0.9020182490348816, "learning_rate": 9.63451302115182e-07, "loss": 0.0591, "step": 13320 }, { "epoch": 2.158295528191834, "grad_norm": 0.896525502204895, "learning_rate": 9.631063697678392e-07, "loss": 0.0574, "step": 13321 }, { "epoch": 2.158457550226831, "grad_norm": 0.833091139793396, "learning_rate": 9.627614844457222e-07, "loss": 0.0551, "step": 13322 }, { "epoch": 2.1586195722618275, "grad_norm": 0.8933079838752747, "learning_rate": 9.624166461593848e-07, "loss": 0.0587, "step": 13323 }, { "epoch": 2.1587815942968245, "grad_norm": 0.9775946736335754, "learning_rate": 9.620718549193764e-07, "loss": 0.0632, "step": 13324 }, { "epoch": 2.158943616331821, "grad_norm": 0.898419976234436, "learning_rate": 9.617271107362465e-07, "loss": 0.0688, "step": 13325 }, { "epoch": 2.159105638366818, "grad_norm": 0.8033084869384766, "learning_rate": 9.61382413620546e-07, "loss": 0.0575, "step": 13326 }, { "epoch": 2.159267660401815, "grad_norm": 0.9245520830154419, "learning_rate": 9.610377635828197e-07, "loss": 0.0599, "step": 13327 }, { "epoch": 2.1594296824368113, "grad_norm": 0.9904624819755554, "learning_rate": 9.606931606336134e-07, "loss": 0.0731, "step": 13328 }, { "epoch": 2.1595917044718083, "grad_norm": 1.0654817819595337, "learning_rate": 9.603486047834712e-07, "loss": 0.0672, "step": 13329 }, { "epoch": 2.1597537265068047, "grad_norm": 0.7820091843605042, "learning_rate": 9.60004096042936e-07, "loss": 0.0544, "step": 13330 }, { "epoch": 2.1599157485418017, "grad_norm": 0.8341110348701477, "learning_rate": 9.59659634422549e-07, "loss": 0.054, "step": 13331 }, { "epoch": 2.1600777705767986, "grad_norm": 0.8635512590408325, "learning_rate": 9.593152199328494e-07, "loss": 0.0546, "step": 13332 }, { "epoch": 2.160239792611795, "grad_norm": 0.8399879932403564, "learning_rate": 9.589708525843754e-07, "loss": 0.0633, "step": 13333 }, { "epoch": 2.160401814646792, "grad_norm": 0.8963909149169922, "learning_rate": 9.586265323876653e-07, "loss": 0.0589, "step": 13334 }, { "epoch": 2.1605638366817885, "grad_norm": 0.9390884637832642, "learning_rate": 9.582822593532514e-07, "loss": 0.0639, "step": 13335 }, { "epoch": 2.1607258587167855, "grad_norm": 1.0376230478286743, "learning_rate": 9.579380334916704e-07, "loss": 0.0714, "step": 13336 }, { "epoch": 2.1608878807517824, "grad_norm": 0.9008492827415466, "learning_rate": 9.575938548134548e-07, "loss": 0.0586, "step": 13337 }, { "epoch": 2.161049902786779, "grad_norm": 0.8083958625793457, "learning_rate": 9.572497233291337e-07, "loss": 0.0612, "step": 13338 }, { "epoch": 2.161211924821776, "grad_norm": 0.9335343837738037, "learning_rate": 9.56905639049238e-07, "loss": 0.0678, "step": 13339 }, { "epoch": 2.1613739468567728, "grad_norm": 0.9797310829162598, "learning_rate": 9.56561601984294e-07, "loss": 0.0699, "step": 13340 }, { "epoch": 2.1615359688917692, "grad_norm": 0.8286275267601013, "learning_rate": 9.562176121448322e-07, "loss": 0.059, "step": 13341 }, { "epoch": 2.161697990926766, "grad_norm": 0.887620210647583, "learning_rate": 9.558736695413745e-07, "loss": 0.0608, "step": 13342 }, { "epoch": 2.1618600129617627, "grad_norm": 0.888256847858429, "learning_rate": 9.55529774184446e-07, "loss": 0.0678, "step": 13343 }, { "epoch": 2.1620220349967596, "grad_norm": 0.908109724521637, "learning_rate": 9.551859260845686e-07, "loss": 0.0631, "step": 13344 }, { "epoch": 2.162184057031756, "grad_norm": 1.2315661907196045, "learning_rate": 9.548421252522635e-07, "loss": 0.0606, "step": 13345 }, { "epoch": 2.162346079066753, "grad_norm": 0.8499714136123657, "learning_rate": 9.544983716980505e-07, "loss": 0.06, "step": 13346 }, { "epoch": 2.16250810110175, "grad_norm": 0.9646509885787964, "learning_rate": 9.54154665432447e-07, "loss": 0.0673, "step": 13347 }, { "epoch": 2.1626701231367464, "grad_norm": 0.8329025506973267, "learning_rate": 9.5381100646597e-07, "loss": 0.0613, "step": 13348 }, { "epoch": 2.1628321451717434, "grad_norm": 0.9725831151008606, "learning_rate": 9.534673948091344e-07, "loss": 0.0676, "step": 13349 }, { "epoch": 2.1629941672067403, "grad_norm": 0.8599954843521118, "learning_rate": 9.531238304724538e-07, "loss": 0.06, "step": 13350 }, { "epoch": 2.163156189241737, "grad_norm": 0.8382558822631836, "learning_rate": 9.52780313466441e-07, "loss": 0.0622, "step": 13351 }, { "epoch": 2.1633182112767337, "grad_norm": 0.8793473839759827, "learning_rate": 9.524368438016071e-07, "loss": 0.0563, "step": 13352 }, { "epoch": 2.1634802333117302, "grad_norm": 0.9254469275474548, "learning_rate": 9.520934214884598e-07, "loss": 0.0632, "step": 13353 }, { "epoch": 2.163642255346727, "grad_norm": 0.8985252976417542, "learning_rate": 9.517500465375071e-07, "loss": 0.0598, "step": 13354 }, { "epoch": 2.163804277381724, "grad_norm": 0.7945815920829773, "learning_rate": 9.514067189592583e-07, "loss": 0.0595, "step": 13355 }, { "epoch": 2.1639662994167206, "grad_norm": 0.8715730309486389, "learning_rate": 9.510634387642151e-07, "loss": 0.0598, "step": 13356 }, { "epoch": 2.1641283214517175, "grad_norm": 0.8382450342178345, "learning_rate": 9.507202059628826e-07, "loss": 0.0691, "step": 13357 }, { "epoch": 2.164290343486714, "grad_norm": 1.0573970079421997, "learning_rate": 9.503770205657625e-07, "loss": 0.0636, "step": 13358 }, { "epoch": 2.164452365521711, "grad_norm": 0.8974131345748901, "learning_rate": 9.500338825833555e-07, "loss": 0.0545, "step": 13359 }, { "epoch": 2.164614387556708, "grad_norm": 0.8579263687133789, "learning_rate": 9.496907920261609e-07, "loss": 0.063, "step": 13360 }, { "epoch": 2.1647764095917044, "grad_norm": 0.8979440331459045, "learning_rate": 9.493477489046762e-07, "loss": 0.0669, "step": 13361 }, { "epoch": 2.1649384316267013, "grad_norm": 0.8528677225112915, "learning_rate": 9.490047532293984e-07, "loss": 0.0587, "step": 13362 }, { "epoch": 2.165100453661698, "grad_norm": 0.8880028128623962, "learning_rate": 9.486618050108223e-07, "loss": 0.0628, "step": 13363 }, { "epoch": 2.1652624756966947, "grad_norm": 0.9306471347808838, "learning_rate": 9.48318904259439e-07, "loss": 0.0645, "step": 13364 }, { "epoch": 2.1654244977316917, "grad_norm": 0.8764260411262512, "learning_rate": 9.479760509857433e-07, "loss": 0.0621, "step": 13365 }, { "epoch": 2.165586519766688, "grad_norm": 0.8734313249588013, "learning_rate": 9.476332452002245e-07, "loss": 0.0585, "step": 13366 }, { "epoch": 2.165748541801685, "grad_norm": 0.9644814729690552, "learning_rate": 9.472904869133726e-07, "loss": 0.07, "step": 13367 }, { "epoch": 2.1659105638366816, "grad_norm": 0.8495195508003235, "learning_rate": 9.469477761356727e-07, "loss": 0.0591, "step": 13368 }, { "epoch": 2.1660725858716785, "grad_norm": 0.8992612361907959, "learning_rate": 9.466051128776133e-07, "loss": 0.0637, "step": 13369 }, { "epoch": 2.1662346079066754, "grad_norm": 0.9654873013496399, "learning_rate": 9.462624971496793e-07, "loss": 0.0631, "step": 13370 }, { "epoch": 2.166396629941672, "grad_norm": 1.062732219696045, "learning_rate": 9.459199289623519e-07, "loss": 0.0635, "step": 13371 }, { "epoch": 2.166558651976669, "grad_norm": 0.927365243434906, "learning_rate": 9.455774083261138e-07, "loss": 0.0664, "step": 13372 }, { "epoch": 2.166720674011666, "grad_norm": 0.9588886499404907, "learning_rate": 9.452349352514448e-07, "loss": 0.0605, "step": 13373 }, { "epoch": 2.1668826960466623, "grad_norm": 0.8306574821472168, "learning_rate": 9.448925097488257e-07, "loss": 0.0587, "step": 13374 }, { "epoch": 2.167044718081659, "grad_norm": 0.8604657649993896, "learning_rate": 9.445501318287317e-07, "loss": 0.0605, "step": 13375 }, { "epoch": 2.1672067401166557, "grad_norm": 0.8701779842376709, "learning_rate": 9.442078015016398e-07, "loss": 0.0555, "step": 13376 }, { "epoch": 2.1673687621516526, "grad_norm": 1.0996429920196533, "learning_rate": 9.43865518778024e-07, "loss": 0.068, "step": 13377 }, { "epoch": 2.1675307841866496, "grad_norm": 0.9881284236907959, "learning_rate": 9.435232836683577e-07, "loss": 0.0707, "step": 13378 }, { "epoch": 2.167692806221646, "grad_norm": 1.0332772731781006, "learning_rate": 9.431810961831123e-07, "loss": 0.0676, "step": 13379 }, { "epoch": 2.167854828256643, "grad_norm": 0.9715332984924316, "learning_rate": 9.42838956332758e-07, "loss": 0.0713, "step": 13380 }, { "epoch": 2.1680168502916395, "grad_norm": 1.1476157903671265, "learning_rate": 9.42496864127764e-07, "loss": 0.0618, "step": 13381 }, { "epoch": 2.1681788723266364, "grad_norm": 0.9035125970840454, "learning_rate": 9.421548195785962e-07, "loss": 0.0641, "step": 13382 }, { "epoch": 2.1683408943616334, "grad_norm": 1.018491268157959, "learning_rate": 9.418128226957202e-07, "loss": 0.0682, "step": 13383 }, { "epoch": 2.16850291639663, "grad_norm": 0.8716436624526978, "learning_rate": 9.414708734896019e-07, "loss": 0.0631, "step": 13384 }, { "epoch": 2.1686649384316268, "grad_norm": 0.9046812057495117, "learning_rate": 9.411289719707039e-07, "loss": 0.0658, "step": 13385 }, { "epoch": 2.1688269604666233, "grad_norm": 0.9490453004837036, "learning_rate": 9.407871181494865e-07, "loss": 0.0601, "step": 13386 }, { "epoch": 2.16898898250162, "grad_norm": 0.8839377164840698, "learning_rate": 9.40445312036409e-07, "loss": 0.0684, "step": 13387 }, { "epoch": 2.169151004536617, "grad_norm": 1.0054982900619507, "learning_rate": 9.401035536419326e-07, "loss": 0.0635, "step": 13388 }, { "epoch": 2.1693130265716136, "grad_norm": 0.8407875299453735, "learning_rate": 9.397618429765118e-07, "loss": 0.0653, "step": 13389 }, { "epoch": 2.1694750486066106, "grad_norm": 0.8645152449607849, "learning_rate": 9.394201800506028e-07, "loss": 0.0612, "step": 13390 }, { "epoch": 2.169637070641607, "grad_norm": 1.003045678138733, "learning_rate": 9.390785648746598e-07, "loss": 0.0665, "step": 13391 }, { "epoch": 2.169799092676604, "grad_norm": 0.8517448306083679, "learning_rate": 9.387369974591353e-07, "loss": 0.065, "step": 13392 }, { "epoch": 2.169961114711601, "grad_norm": 0.9961963891983032, "learning_rate": 9.383954778144807e-07, "loss": 0.0635, "step": 13393 }, { "epoch": 2.1701231367465974, "grad_norm": 0.8124943375587463, "learning_rate": 9.380540059511453e-07, "loss": 0.0552, "step": 13394 }, { "epoch": 2.1702851587815943, "grad_norm": 0.801011323928833, "learning_rate": 9.377125818795777e-07, "loss": 0.0616, "step": 13395 }, { "epoch": 2.1704471808165913, "grad_norm": 0.938008725643158, "learning_rate": 9.373712056102249e-07, "loss": 0.0663, "step": 13396 }, { "epoch": 2.1706092028515878, "grad_norm": 0.9394778609275818, "learning_rate": 9.370298771535302e-07, "loss": 0.0658, "step": 13397 }, { "epoch": 2.1707712248865847, "grad_norm": 0.9980944991111755, "learning_rate": 9.366885965199398e-07, "loss": 0.0664, "step": 13398 }, { "epoch": 2.170933246921581, "grad_norm": 0.8614987730979919, "learning_rate": 9.363473637198964e-07, "loss": 0.0605, "step": 13399 }, { "epoch": 2.171095268956578, "grad_norm": 0.7758714556694031, "learning_rate": 9.360061787638383e-07, "loss": 0.0641, "step": 13400 }, { "epoch": 2.171257290991575, "grad_norm": 0.9587964415550232, "learning_rate": 9.356650416622065e-07, "loss": 0.0654, "step": 13401 }, { "epoch": 2.1714193130265715, "grad_norm": 0.970716118812561, "learning_rate": 9.353239524254382e-07, "loss": 0.0568, "step": 13402 }, { "epoch": 2.1715813350615685, "grad_norm": 0.8484756946563721, "learning_rate": 9.349829110639718e-07, "loss": 0.0628, "step": 13403 }, { "epoch": 2.171743357096565, "grad_norm": 0.8023274540901184, "learning_rate": 9.346419175882407e-07, "loss": 0.0593, "step": 13404 }, { "epoch": 2.171905379131562, "grad_norm": 0.8955522179603577, "learning_rate": 9.343009720086785e-07, "loss": 0.0591, "step": 13405 }, { "epoch": 2.172067401166559, "grad_norm": 0.9078856706619263, "learning_rate": 9.339600743357177e-07, "loss": 0.0688, "step": 13406 }, { "epoch": 2.1722294232015553, "grad_norm": 1.0572879314422607, "learning_rate": 9.33619224579789e-07, "loss": 0.0575, "step": 13407 }, { "epoch": 2.1723914452365523, "grad_norm": 0.8648132681846619, "learning_rate": 9.332784227513212e-07, "loss": 0.0621, "step": 13408 }, { "epoch": 2.1725534672715487, "grad_norm": 0.8254750967025757, "learning_rate": 9.329376688607425e-07, "loss": 0.06, "step": 13409 }, { "epoch": 2.1727154893065457, "grad_norm": 0.8656960725784302, "learning_rate": 9.325969629184789e-07, "loss": 0.0655, "step": 13410 }, { "epoch": 2.1728775113415426, "grad_norm": 0.8903995156288147, "learning_rate": 9.32256304934955e-07, "loss": 0.0603, "step": 13411 }, { "epoch": 2.173039533376539, "grad_norm": 0.9863763451576233, "learning_rate": 9.319156949205943e-07, "loss": 0.0615, "step": 13412 }, { "epoch": 2.173201555411536, "grad_norm": 0.7889726161956787, "learning_rate": 9.315751328858189e-07, "loss": 0.0544, "step": 13413 }, { "epoch": 2.1733635774465325, "grad_norm": 0.8542593717575073, "learning_rate": 9.312346188410496e-07, "loss": 0.0636, "step": 13414 }, { "epoch": 2.1735255994815295, "grad_norm": 0.8379946947097778, "learning_rate": 9.308941527967039e-07, "loss": 0.0606, "step": 13415 }, { "epoch": 2.1736876215165264, "grad_norm": 0.9521237015724182, "learning_rate": 9.30553734763199e-07, "loss": 0.0582, "step": 13416 }, { "epoch": 2.173849643551523, "grad_norm": 0.8582279682159424, "learning_rate": 9.302133647509526e-07, "loss": 0.0613, "step": 13417 }, { "epoch": 2.17401166558652, "grad_norm": 0.9010220766067505, "learning_rate": 9.298730427703795e-07, "loss": 0.0658, "step": 13418 }, { "epoch": 2.1741736876215167, "grad_norm": 1.0892761945724487, "learning_rate": 9.295327688318906e-07, "loss": 0.0687, "step": 13419 }, { "epoch": 2.1743357096565132, "grad_norm": 0.9256922006607056, "learning_rate": 9.291925429458987e-07, "loss": 0.0606, "step": 13420 }, { "epoch": 2.17449773169151, "grad_norm": 1.116697907447815, "learning_rate": 9.288523651228134e-07, "loss": 0.0613, "step": 13421 }, { "epoch": 2.1746597537265067, "grad_norm": 0.9603244066238403, "learning_rate": 9.285122353730439e-07, "loss": 0.0636, "step": 13422 }, { "epoch": 2.1748217757615036, "grad_norm": 0.980385959148407, "learning_rate": 9.281721537069971e-07, "loss": 0.0623, "step": 13423 }, { "epoch": 2.1749837977965005, "grad_norm": 0.8736245036125183, "learning_rate": 9.278321201350784e-07, "loss": 0.0626, "step": 13424 }, { "epoch": 2.175145819831497, "grad_norm": 0.8763680458068848, "learning_rate": 9.274921346676935e-07, "loss": 0.0618, "step": 13425 }, { "epoch": 2.175307841866494, "grad_norm": 0.9780583381652832, "learning_rate": 9.271521973152418e-07, "loss": 0.0612, "step": 13426 }, { "epoch": 2.1754698639014904, "grad_norm": 1.1210222244262695, "learning_rate": 9.268123080881275e-07, "loss": 0.0623, "step": 13427 }, { "epoch": 2.1756318859364874, "grad_norm": 0.9153855443000793, "learning_rate": 9.264724669967498e-07, "loss": 0.0649, "step": 13428 }, { "epoch": 2.1757939079714843, "grad_norm": 0.8484050631523132, "learning_rate": 9.261326740515075e-07, "loss": 0.0588, "step": 13429 }, { "epoch": 2.175955930006481, "grad_norm": 1.0825287103652954, "learning_rate": 9.257929292627956e-07, "loss": 0.0637, "step": 13430 }, { "epoch": 2.1761179520414777, "grad_norm": 0.7929718494415283, "learning_rate": 9.254532326410101e-07, "loss": 0.0594, "step": 13431 }, { "epoch": 2.176279974076474, "grad_norm": 0.9638476967811584, "learning_rate": 9.251135841965467e-07, "loss": 0.0708, "step": 13432 }, { "epoch": 2.176441996111471, "grad_norm": 0.856880247592926, "learning_rate": 9.24773983939796e-07, "loss": 0.0646, "step": 13433 }, { "epoch": 2.176604018146468, "grad_norm": 0.894572377204895, "learning_rate": 9.244344318811491e-07, "loss": 0.0658, "step": 13434 }, { "epoch": 2.1767660401814646, "grad_norm": 0.8938385248184204, "learning_rate": 9.240949280309949e-07, "loss": 0.0606, "step": 13435 }, { "epoch": 2.1769280622164615, "grad_norm": 0.8438916206359863, "learning_rate": 9.237554723997242e-07, "loss": 0.0586, "step": 13436 }, { "epoch": 2.177090084251458, "grad_norm": 0.9637516140937805, "learning_rate": 9.234160649977206e-07, "loss": 0.0687, "step": 13437 }, { "epoch": 2.177252106286455, "grad_norm": 0.8094197511672974, "learning_rate": 9.230767058353701e-07, "loss": 0.0559, "step": 13438 }, { "epoch": 2.177414128321452, "grad_norm": 0.8560066223144531, "learning_rate": 9.227373949230567e-07, "loss": 0.0559, "step": 13439 }, { "epoch": 2.1775761503564484, "grad_norm": 1.0146995782852173, "learning_rate": 9.223981322711617e-07, "loss": 0.0702, "step": 13440 }, { "epoch": 2.1777381723914453, "grad_norm": 0.8843867182731628, "learning_rate": 9.220589178900663e-07, "loss": 0.0625, "step": 13441 }, { "epoch": 2.1779001944264422, "grad_norm": 0.7497606873512268, "learning_rate": 9.217197517901494e-07, "loss": 0.0572, "step": 13442 }, { "epoch": 2.1780622164614387, "grad_norm": 0.8896629810333252, "learning_rate": 9.213806339817897e-07, "loss": 0.0626, "step": 13443 }, { "epoch": 2.1782242384964356, "grad_norm": 0.8931178450584412, "learning_rate": 9.210415644753615e-07, "loss": 0.0585, "step": 13444 }, { "epoch": 2.178386260531432, "grad_norm": 0.8438544869422913, "learning_rate": 9.207025432812397e-07, "loss": 0.0594, "step": 13445 }, { "epoch": 2.178548282566429, "grad_norm": 0.9344618320465088, "learning_rate": 9.203635704097988e-07, "loss": 0.0618, "step": 13446 }, { "epoch": 2.1787103046014256, "grad_norm": 0.863814115524292, "learning_rate": 9.20024645871411e-07, "loss": 0.0653, "step": 13447 }, { "epoch": 2.1788723266364225, "grad_norm": 0.8747982382774353, "learning_rate": 9.196857696764446e-07, "loss": 0.0585, "step": 13448 }, { "epoch": 2.1790343486714194, "grad_norm": 0.9165652394294739, "learning_rate": 9.193469418352696e-07, "loss": 0.069, "step": 13449 }, { "epoch": 2.179196370706416, "grad_norm": 0.9408144950866699, "learning_rate": 9.190081623582531e-07, "loss": 0.0711, "step": 13450 }, { "epoch": 2.179358392741413, "grad_norm": 0.9184764623641968, "learning_rate": 9.186694312557606e-07, "loss": 0.0666, "step": 13451 }, { "epoch": 2.17952041477641, "grad_norm": 0.8569600582122803, "learning_rate": 9.183307485381571e-07, "loss": 0.0613, "step": 13452 }, { "epoch": 2.1796824368114063, "grad_norm": 0.8777344226837158, "learning_rate": 9.17992114215805e-07, "loss": 0.0628, "step": 13453 }, { "epoch": 2.179844458846403, "grad_norm": 0.9402377605438232, "learning_rate": 9.176535282990656e-07, "loss": 0.0669, "step": 13454 }, { "epoch": 2.1800064808813997, "grad_norm": 0.9360073208808899, "learning_rate": 9.173149907982993e-07, "loss": 0.0617, "step": 13455 }, { "epoch": 2.1801685029163966, "grad_norm": 0.9287109971046448, "learning_rate": 9.169765017238641e-07, "loss": 0.0681, "step": 13456 }, { "epoch": 2.1803305249513936, "grad_norm": 0.8160260915756226, "learning_rate": 9.166380610861172e-07, "loss": 0.0637, "step": 13457 }, { "epoch": 2.18049254698639, "grad_norm": 0.8967964053153992, "learning_rate": 9.162996688954148e-07, "loss": 0.0634, "step": 13458 }, { "epoch": 2.180654569021387, "grad_norm": 0.968971848487854, "learning_rate": 9.159613251621083e-07, "loss": 0.065, "step": 13459 }, { "epoch": 2.1808165910563835, "grad_norm": 0.8118306994438171, "learning_rate": 9.156230298965529e-07, "loss": 0.0613, "step": 13460 }, { "epoch": 2.1809786130913804, "grad_norm": 0.9221633672714233, "learning_rate": 9.152847831090986e-07, "loss": 0.0642, "step": 13461 }, { "epoch": 2.1811406351263773, "grad_norm": 0.8746361136436462, "learning_rate": 9.149465848100958e-07, "loss": 0.0629, "step": 13462 }, { "epoch": 2.181302657161374, "grad_norm": 0.9616087079048157, "learning_rate": 9.14608435009891e-07, "loss": 0.0727, "step": 13463 }, { "epoch": 2.1814646791963708, "grad_norm": 0.8658609390258789, "learning_rate": 9.142703337188305e-07, "loss": 0.06, "step": 13464 }, { "epoch": 2.1816267012313673, "grad_norm": 0.833065390586853, "learning_rate": 9.139322809472623e-07, "loss": 0.0552, "step": 13465 }, { "epoch": 2.181788723266364, "grad_norm": 0.8363578915596008, "learning_rate": 9.135942767055272e-07, "loss": 0.0673, "step": 13466 }, { "epoch": 2.181950745301361, "grad_norm": 0.9329082369804382, "learning_rate": 9.132563210039683e-07, "loss": 0.0571, "step": 13467 }, { "epoch": 2.1821127673363576, "grad_norm": 0.8257620334625244, "learning_rate": 9.129184138529259e-07, "loss": 0.0663, "step": 13468 }, { "epoch": 2.1822747893713546, "grad_norm": 1.1378121376037598, "learning_rate": 9.125805552627395e-07, "loss": 0.0648, "step": 13469 }, { "epoch": 2.182436811406351, "grad_norm": 0.8082774877548218, "learning_rate": 9.122427452437465e-07, "loss": 0.0596, "step": 13470 }, { "epoch": 2.182598833441348, "grad_norm": 1.0287437438964844, "learning_rate": 9.119049838062832e-07, "loss": 0.067, "step": 13471 }, { "epoch": 2.182760855476345, "grad_norm": 0.9428204894065857, "learning_rate": 9.115672709606846e-07, "loss": 0.0679, "step": 13472 }, { "epoch": 2.1829228775113414, "grad_norm": 0.9620710611343384, "learning_rate": 9.11229606717284e-07, "loss": 0.0726, "step": 13473 }, { "epoch": 2.1830848995463383, "grad_norm": 0.8988024592399597, "learning_rate": 9.108919910864111e-07, "loss": 0.063, "step": 13474 }, { "epoch": 2.1832469215813353, "grad_norm": 0.978762686252594, "learning_rate": 9.105544240783987e-07, "loss": 0.0629, "step": 13475 }, { "epoch": 2.1834089436163318, "grad_norm": 0.9975540041923523, "learning_rate": 9.102169057035753e-07, "loss": 0.0659, "step": 13476 }, { "epoch": 2.1835709656513287, "grad_norm": 0.9281214475631714, "learning_rate": 9.098794359722668e-07, "loss": 0.0587, "step": 13477 }, { "epoch": 2.183732987686325, "grad_norm": 0.8466314673423767, "learning_rate": 9.095420148947984e-07, "loss": 0.0631, "step": 13478 }, { "epoch": 2.183895009721322, "grad_norm": 0.8057236671447754, "learning_rate": 9.092046424814962e-07, "loss": 0.0631, "step": 13479 }, { "epoch": 2.184057031756319, "grad_norm": 0.8913443088531494, "learning_rate": 9.088673187426836e-07, "loss": 0.0579, "step": 13480 }, { "epoch": 2.1842190537913155, "grad_norm": 0.8665725588798523, "learning_rate": 9.085300436886793e-07, "loss": 0.0602, "step": 13481 }, { "epoch": 2.1843810758263125, "grad_norm": 0.8465587496757507, "learning_rate": 9.081928173298046e-07, "loss": 0.0576, "step": 13482 }, { "epoch": 2.184543097861309, "grad_norm": 0.9389297962188721, "learning_rate": 9.078556396763777e-07, "loss": 0.0637, "step": 13483 }, { "epoch": 2.184705119896306, "grad_norm": 0.9527860879898071, "learning_rate": 9.075185107387149e-07, "loss": 0.0648, "step": 13484 }, { "epoch": 2.184867141931303, "grad_norm": 0.8697720170021057, "learning_rate": 9.071814305271323e-07, "loss": 0.0654, "step": 13485 }, { "epoch": 2.1850291639662993, "grad_norm": 0.963236391544342, "learning_rate": 9.068443990519432e-07, "loss": 0.066, "step": 13486 }, { "epoch": 2.1851911860012962, "grad_norm": 0.8899818658828735, "learning_rate": 9.065074163234602e-07, "loss": 0.0604, "step": 13487 }, { "epoch": 2.1853532080362927, "grad_norm": 0.8355003595352173, "learning_rate": 9.061704823519943e-07, "loss": 0.067, "step": 13488 }, { "epoch": 2.1855152300712897, "grad_norm": 0.9532532691955566, "learning_rate": 9.058335971478543e-07, "loss": 0.0583, "step": 13489 }, { "epoch": 2.1856772521062866, "grad_norm": 0.9321505427360535, "learning_rate": 9.054967607213486e-07, "loss": 0.0689, "step": 13490 }, { "epoch": 2.185839274141283, "grad_norm": 0.9366322755813599, "learning_rate": 9.051599730827842e-07, "loss": 0.0692, "step": 13491 }, { "epoch": 2.18600129617628, "grad_norm": 0.8937353491783142, "learning_rate": 9.048232342424642e-07, "loss": 0.0551, "step": 13492 }, { "epoch": 2.1861633182112765, "grad_norm": 0.9362406730651855, "learning_rate": 9.044865442106923e-07, "loss": 0.0698, "step": 13493 }, { "epoch": 2.1863253402462735, "grad_norm": 0.7960699200630188, "learning_rate": 9.04149902997773e-07, "loss": 0.0627, "step": 13494 }, { "epoch": 2.1864873622812704, "grad_norm": 0.9175137281417847, "learning_rate": 9.038133106140034e-07, "loss": 0.0695, "step": 13495 }, { "epoch": 2.186649384316267, "grad_norm": 1.0003280639648438, "learning_rate": 9.034767670696842e-07, "loss": 0.0628, "step": 13496 }, { "epoch": 2.186811406351264, "grad_norm": 1.0133966207504272, "learning_rate": 9.031402723751123e-07, "loss": 0.068, "step": 13497 }, { "epoch": 2.1869734283862607, "grad_norm": 1.0157874822616577, "learning_rate": 9.028038265405836e-07, "loss": 0.0642, "step": 13498 }, { "epoch": 2.1871354504212572, "grad_norm": 0.7945261001586914, "learning_rate": 9.02467429576393e-07, "loss": 0.0581, "step": 13499 }, { "epoch": 2.187297472456254, "grad_norm": 1.0244712829589844, "learning_rate": 9.021310814928328e-07, "loss": 0.0721, "step": 13500 }, { "epoch": 2.1874594944912507, "grad_norm": 0.8607012033462524, "learning_rate": 9.01794782300195e-07, "loss": 0.0603, "step": 13501 }, { "epoch": 2.1876215165262476, "grad_norm": 1.0268734693527222, "learning_rate": 9.01458532008769e-07, "loss": 0.0597, "step": 13502 }, { "epoch": 2.1877835385612445, "grad_norm": 0.8796284198760986, "learning_rate": 9.011223306288436e-07, "loss": 0.0594, "step": 13503 }, { "epoch": 2.187945560596241, "grad_norm": 0.8240768313407898, "learning_rate": 9.007861781707056e-07, "loss": 0.0583, "step": 13504 }, { "epoch": 2.188107582631238, "grad_norm": 0.7857369780540466, "learning_rate": 9.004500746446407e-07, "loss": 0.0549, "step": 13505 }, { "epoch": 2.1882696046662344, "grad_norm": 0.9636929631233215, "learning_rate": 9.001140200609334e-07, "loss": 0.0667, "step": 13506 }, { "epoch": 2.1884316267012314, "grad_norm": 0.9375138878822327, "learning_rate": 8.997780144298641e-07, "loss": 0.0653, "step": 13507 }, { "epoch": 2.1885936487362283, "grad_norm": 0.9468481540679932, "learning_rate": 8.994420577617155e-07, "loss": 0.0652, "step": 13508 }, { "epoch": 2.188755670771225, "grad_norm": 0.946871280670166, "learning_rate": 8.991061500667674e-07, "loss": 0.0666, "step": 13509 }, { "epoch": 2.1889176928062217, "grad_norm": 0.8926617503166199, "learning_rate": 8.987702913552964e-07, "loss": 0.066, "step": 13510 }, { "epoch": 2.189079714841218, "grad_norm": 0.8519583940505981, "learning_rate": 8.984344816375798e-07, "loss": 0.0596, "step": 13511 }, { "epoch": 2.189241736876215, "grad_norm": 0.8857722878456116, "learning_rate": 8.980987209238922e-07, "loss": 0.0557, "step": 13512 }, { "epoch": 2.189403758911212, "grad_norm": 0.9426934719085693, "learning_rate": 8.977630092245071e-07, "loss": 0.0582, "step": 13513 }, { "epoch": 2.1895657809462086, "grad_norm": 0.86836177110672, "learning_rate": 8.974273465496966e-07, "loss": 0.062, "step": 13514 }, { "epoch": 2.1897278029812055, "grad_norm": 0.8490039110183716, "learning_rate": 8.970917329097312e-07, "loss": 0.0581, "step": 13515 }, { "epoch": 2.189889825016202, "grad_norm": 0.9485715627670288, "learning_rate": 8.967561683148798e-07, "loss": 0.0639, "step": 13516 }, { "epoch": 2.190051847051199, "grad_norm": 0.7836689949035645, "learning_rate": 8.964206527754099e-07, "loss": 0.0571, "step": 13517 }, { "epoch": 2.190213869086196, "grad_norm": 0.9445635080337524, "learning_rate": 8.960851863015874e-07, "loss": 0.0558, "step": 13518 }, { "epoch": 2.1903758911211924, "grad_norm": 0.8634076118469238, "learning_rate": 8.957497689036768e-07, "loss": 0.0585, "step": 13519 }, { "epoch": 2.1905379131561893, "grad_norm": 1.2281583547592163, "learning_rate": 8.954144005919422e-07, "loss": 0.0657, "step": 13520 }, { "epoch": 2.190699935191186, "grad_norm": 0.9403387308120728, "learning_rate": 8.950790813766416e-07, "loss": 0.0543, "step": 13521 }, { "epoch": 2.1908619572261827, "grad_norm": 1.0141313076019287, "learning_rate": 8.947438112680387e-07, "loss": 0.0712, "step": 13522 }, { "epoch": 2.1910239792611796, "grad_norm": 0.9815289378166199, "learning_rate": 8.944085902763902e-07, "loss": 0.0637, "step": 13523 }, { "epoch": 2.191186001296176, "grad_norm": 0.9215153455734253, "learning_rate": 8.940734184119542e-07, "loss": 0.0563, "step": 13524 }, { "epoch": 2.191348023331173, "grad_norm": 0.9290637969970703, "learning_rate": 8.937382956849847e-07, "loss": 0.0601, "step": 13525 }, { "epoch": 2.19151004536617, "grad_norm": 0.8032289743423462, "learning_rate": 8.934032221057354e-07, "loss": 0.0625, "step": 13526 }, { "epoch": 2.1916720674011665, "grad_norm": 0.9063378572463989, "learning_rate": 8.930681976844613e-07, "loss": 0.059, "step": 13527 }, { "epoch": 2.1918340894361634, "grad_norm": 1.034857988357544, "learning_rate": 8.927332224314106e-07, "loss": 0.0695, "step": 13528 }, { "epoch": 2.19199611147116, "grad_norm": 1.0457537174224854, "learning_rate": 8.92398296356834e-07, "loss": 0.0632, "step": 13529 }, { "epoch": 2.192158133506157, "grad_norm": 0.8522835969924927, "learning_rate": 8.92063419470979e-07, "loss": 0.059, "step": 13530 }, { "epoch": 2.192320155541154, "grad_norm": 0.9645273685455322, "learning_rate": 8.917285917840926e-07, "loss": 0.0624, "step": 13531 }, { "epoch": 2.1924821775761503, "grad_norm": 1.049136996269226, "learning_rate": 8.91393813306419e-07, "loss": 0.0591, "step": 13532 }, { "epoch": 2.192644199611147, "grad_norm": 0.846191942691803, "learning_rate": 8.910590840482023e-07, "loss": 0.0586, "step": 13533 }, { "epoch": 2.1928062216461437, "grad_norm": 0.8312897682189941, "learning_rate": 8.907244040196836e-07, "loss": 0.0588, "step": 13534 }, { "epoch": 2.1929682436811406, "grad_norm": 0.8862128257751465, "learning_rate": 8.903897732311048e-07, "loss": 0.0608, "step": 13535 }, { "epoch": 2.1931302657161376, "grad_norm": 0.8992831110954285, "learning_rate": 8.900551916927022e-07, "loss": 0.0632, "step": 13536 }, { "epoch": 2.193292287751134, "grad_norm": 1.0759751796722412, "learning_rate": 8.897206594147156e-07, "loss": 0.0659, "step": 13537 }, { "epoch": 2.193454309786131, "grad_norm": 1.051218867301941, "learning_rate": 8.893861764073808e-07, "loss": 0.0672, "step": 13538 }, { "epoch": 2.1936163318211275, "grad_norm": 0.8192399144172668, "learning_rate": 8.890517426809306e-07, "loss": 0.0631, "step": 13539 }, { "epoch": 2.1937783538561244, "grad_norm": 0.7525157332420349, "learning_rate": 8.887173582455985e-07, "loss": 0.0506, "step": 13540 }, { "epoch": 2.1939403758911213, "grad_norm": 0.8876081109046936, "learning_rate": 8.883830231116153e-07, "loss": 0.0565, "step": 13541 }, { "epoch": 2.194102397926118, "grad_norm": 0.7892178893089294, "learning_rate": 8.88048737289213e-07, "loss": 0.0581, "step": 13542 }, { "epoch": 2.1942644199611148, "grad_norm": 0.8460841178894043, "learning_rate": 8.877145007886179e-07, "loss": 0.0665, "step": 13543 }, { "epoch": 2.1944264419961117, "grad_norm": 1.0782990455627441, "learning_rate": 8.873803136200574e-07, "loss": 0.0677, "step": 13544 }, { "epoch": 2.194588464031108, "grad_norm": 0.9548028707504272, "learning_rate": 8.870461757937568e-07, "loss": 0.0645, "step": 13545 }, { "epoch": 2.194750486066105, "grad_norm": 0.8092406392097473, "learning_rate": 8.8671208731994e-07, "loss": 0.0627, "step": 13546 }, { "epoch": 2.1949125081011016, "grad_norm": 0.8001680970191956, "learning_rate": 8.863780482088291e-07, "loss": 0.0547, "step": 13547 }, { "epoch": 2.1950745301360985, "grad_norm": 0.9187164306640625, "learning_rate": 8.860440584706451e-07, "loss": 0.0666, "step": 13548 }, { "epoch": 2.195236552171095, "grad_norm": 0.9525156617164612, "learning_rate": 8.857101181156072e-07, "loss": 0.0621, "step": 13549 }, { "epoch": 2.195398574206092, "grad_norm": 0.9552687406539917, "learning_rate": 8.853762271539332e-07, "loss": 0.0632, "step": 13550 }, { "epoch": 2.195560596241089, "grad_norm": 0.9133409857749939, "learning_rate": 8.850423855958393e-07, "loss": 0.0629, "step": 13551 }, { "epoch": 2.1957226182760854, "grad_norm": 0.7785288691520691, "learning_rate": 8.847085934515404e-07, "loss": 0.0539, "step": 13552 }, { "epoch": 2.1958846403110823, "grad_norm": 0.856397271156311, "learning_rate": 8.843748507312505e-07, "loss": 0.061, "step": 13553 }, { "epoch": 2.1960466623460793, "grad_norm": 0.9479225873947144, "learning_rate": 8.840411574451793e-07, "loss": 0.0606, "step": 13554 }, { "epoch": 2.1962086843810757, "grad_norm": 0.817360520362854, "learning_rate": 8.837075136035375e-07, "loss": 0.0542, "step": 13555 }, { "epoch": 2.1963707064160727, "grad_norm": 0.8960983157157898, "learning_rate": 8.833739192165352e-07, "loss": 0.0653, "step": 13556 }, { "epoch": 2.196532728451069, "grad_norm": 0.8757344484329224, "learning_rate": 8.830403742943797e-07, "loss": 0.0521, "step": 13557 }, { "epoch": 2.196694750486066, "grad_norm": 0.868101954460144, "learning_rate": 8.827068788472751e-07, "loss": 0.0589, "step": 13558 }, { "epoch": 2.196856772521063, "grad_norm": 0.8532265424728394, "learning_rate": 8.823734328854259e-07, "loss": 0.0614, "step": 13559 }, { "epoch": 2.1970187945560595, "grad_norm": 0.8555817008018494, "learning_rate": 8.820400364190351e-07, "loss": 0.0593, "step": 13560 }, { "epoch": 2.1971808165910565, "grad_norm": 0.8289614915847778, "learning_rate": 8.81706689458304e-07, "loss": 0.0605, "step": 13561 }, { "epoch": 2.197342838626053, "grad_norm": 0.767209529876709, "learning_rate": 8.813733920134321e-07, "loss": 0.0496, "step": 13562 }, { "epoch": 2.19750486066105, "grad_norm": 0.9439572691917419, "learning_rate": 8.81040144094617e-07, "loss": 0.0634, "step": 13563 }, { "epoch": 2.197666882696047, "grad_norm": 0.8966138958930969, "learning_rate": 8.807069457120571e-07, "loss": 0.0612, "step": 13564 }, { "epoch": 2.1978289047310433, "grad_norm": 1.0695065259933472, "learning_rate": 8.803737968759438e-07, "loss": 0.0759, "step": 13565 }, { "epoch": 2.1979909267660402, "grad_norm": 0.8874850273132324, "learning_rate": 8.80040697596474e-07, "loss": 0.0636, "step": 13566 }, { "epoch": 2.198152948801037, "grad_norm": 0.8661726713180542, "learning_rate": 8.797076478838388e-07, "loss": 0.0588, "step": 13567 }, { "epoch": 2.1983149708360337, "grad_norm": 0.8928310871124268, "learning_rate": 8.79374647748229e-07, "loss": 0.0601, "step": 13568 }, { "epoch": 2.1984769928710306, "grad_norm": 0.8463062644004822, "learning_rate": 8.790416971998317e-07, "loss": 0.059, "step": 13569 }, { "epoch": 2.198639014906027, "grad_norm": 0.9622822999954224, "learning_rate": 8.787087962488367e-07, "loss": 0.0595, "step": 13570 }, { "epoch": 2.198801036941024, "grad_norm": 0.9242643117904663, "learning_rate": 8.783759449054296e-07, "loss": 0.0645, "step": 13571 }, { "epoch": 2.1989630589760205, "grad_norm": 0.957097589969635, "learning_rate": 8.780431431797937e-07, "loss": 0.0685, "step": 13572 }, { "epoch": 2.1991250810110174, "grad_norm": 0.9083278775215149, "learning_rate": 8.777103910821127e-07, "loss": 0.0633, "step": 13573 }, { "epoch": 2.1992871030460144, "grad_norm": 0.9371110796928406, "learning_rate": 8.773776886225668e-07, "loss": 0.0639, "step": 13574 }, { "epoch": 2.199449125081011, "grad_norm": 0.9061893820762634, "learning_rate": 8.770450358113389e-07, "loss": 0.0606, "step": 13575 }, { "epoch": 2.199611147116008, "grad_norm": 0.9202576875686646, "learning_rate": 8.767124326586043e-07, "loss": 0.0632, "step": 13576 }, { "epoch": 2.1997731691510047, "grad_norm": 0.9155436754226685, "learning_rate": 8.763798791745413e-07, "loss": 0.0637, "step": 13577 }, { "epoch": 2.1999351911860012, "grad_norm": 1.0268107652664185, "learning_rate": 8.760473753693243e-07, "loss": 0.0673, "step": 13578 }, { "epoch": 2.200097213220998, "grad_norm": 1.1556326150894165, "learning_rate": 8.757149212531282e-07, "loss": 0.0645, "step": 13579 }, { "epoch": 2.2002592352559946, "grad_norm": 0.7752742171287537, "learning_rate": 8.753825168361249e-07, "loss": 0.0561, "step": 13580 }, { "epoch": 2.2004212572909916, "grad_norm": 0.9289119839668274, "learning_rate": 8.750501621284849e-07, "loss": 0.0638, "step": 13581 }, { "epoch": 2.2005832793259885, "grad_norm": 0.8528614640235901, "learning_rate": 8.747178571403786e-07, "loss": 0.0601, "step": 13582 }, { "epoch": 2.200745301360985, "grad_norm": 0.8271398544311523, "learning_rate": 8.743856018819719e-07, "loss": 0.0596, "step": 13583 }, { "epoch": 2.200907323395982, "grad_norm": 0.8116289377212524, "learning_rate": 8.74053396363431e-07, "loss": 0.0615, "step": 13584 }, { "epoch": 2.2010693454309784, "grad_norm": 0.7971740961074829, "learning_rate": 8.737212405949222e-07, "loss": 0.0615, "step": 13585 }, { "epoch": 2.2012313674659754, "grad_norm": 1.0324573516845703, "learning_rate": 8.733891345866088e-07, "loss": 0.0666, "step": 13586 }, { "epoch": 2.2013933895009723, "grad_norm": 0.8397610783576965, "learning_rate": 8.730570783486508e-07, "loss": 0.0664, "step": 13587 }, { "epoch": 2.201555411535969, "grad_norm": 0.9985054135322571, "learning_rate": 8.727250718912089e-07, "loss": 0.0602, "step": 13588 }, { "epoch": 2.2017174335709657, "grad_norm": 1.0729403495788574, "learning_rate": 8.723931152244421e-07, "loss": 0.0624, "step": 13589 }, { "epoch": 2.201879455605962, "grad_norm": 1.0045422315597534, "learning_rate": 8.72061208358507e-07, "loss": 0.0627, "step": 13590 }, { "epoch": 2.202041477640959, "grad_norm": 0.8267765045166016, "learning_rate": 8.717293513035596e-07, "loss": 0.0644, "step": 13591 }, { "epoch": 2.202203499675956, "grad_norm": 0.8496410250663757, "learning_rate": 8.713975440697536e-07, "loss": 0.0568, "step": 13592 }, { "epoch": 2.2023655217109526, "grad_norm": 0.9486085772514343, "learning_rate": 8.710657866672417e-07, "loss": 0.0668, "step": 13593 }, { "epoch": 2.2025275437459495, "grad_norm": 0.867942750453949, "learning_rate": 8.707340791061747e-07, "loss": 0.057, "step": 13594 }, { "epoch": 2.202689565780946, "grad_norm": 0.8752855062484741, "learning_rate": 8.704024213967021e-07, "loss": 0.0651, "step": 13595 }, { "epoch": 2.202851587815943, "grad_norm": 1.1905330419540405, "learning_rate": 8.700708135489722e-07, "loss": 0.0653, "step": 13596 }, { "epoch": 2.20301360985094, "grad_norm": 0.9045699834823608, "learning_rate": 8.697392555731315e-07, "loss": 0.0604, "step": 13597 }, { "epoch": 2.2031756318859363, "grad_norm": 0.8511764407157898, "learning_rate": 8.694077474793227e-07, "loss": 0.0662, "step": 13598 }, { "epoch": 2.2033376539209333, "grad_norm": 0.8251369595527649, "learning_rate": 8.690762892776918e-07, "loss": 0.0641, "step": 13599 }, { "epoch": 2.20349967595593, "grad_norm": 0.9330815076828003, "learning_rate": 8.687448809783799e-07, "loss": 0.0646, "step": 13600 }, { "epoch": 2.2036616979909267, "grad_norm": 0.8293221592903137, "learning_rate": 8.684135225915277e-07, "loss": 0.0595, "step": 13601 }, { "epoch": 2.2038237200259236, "grad_norm": 1.0434253215789795, "learning_rate": 8.680822141272727e-07, "loss": 0.0698, "step": 13602 }, { "epoch": 2.20398574206092, "grad_norm": 0.8310006260871887, "learning_rate": 8.677509555957517e-07, "loss": 0.0568, "step": 13603 }, { "epoch": 2.204147764095917, "grad_norm": 0.8653905391693115, "learning_rate": 8.674197470071033e-07, "loss": 0.0583, "step": 13604 }, { "epoch": 2.204309786130914, "grad_norm": 0.8970719575881958, "learning_rate": 8.670885883714591e-07, "loss": 0.0587, "step": 13605 }, { "epoch": 2.2044718081659105, "grad_norm": 1.2163879871368408, "learning_rate": 8.667574796989526e-07, "loss": 0.0714, "step": 13606 }, { "epoch": 2.2046338302009074, "grad_norm": 0.865898609161377, "learning_rate": 8.664264209997144e-07, "loss": 0.0627, "step": 13607 }, { "epoch": 2.204795852235904, "grad_norm": 0.900995671749115, "learning_rate": 8.66095412283875e-07, "loss": 0.0608, "step": 13608 }, { "epoch": 2.204957874270901, "grad_norm": 0.8361657857894897, "learning_rate": 8.657644535615617e-07, "loss": 0.0561, "step": 13609 }, { "epoch": 2.2051198963058978, "grad_norm": 0.9006054401397705, "learning_rate": 8.654335448429016e-07, "loss": 0.0639, "step": 13610 }, { "epoch": 2.2052819183408943, "grad_norm": 0.8334879875183105, "learning_rate": 8.651026861380193e-07, "loss": 0.0558, "step": 13611 }, { "epoch": 2.205443940375891, "grad_norm": 0.9533306360244751, "learning_rate": 8.647718774570385e-07, "loss": 0.0637, "step": 13612 }, { "epoch": 2.2056059624108877, "grad_norm": 0.8768250346183777, "learning_rate": 8.644411188100812e-07, "loss": 0.0609, "step": 13613 }, { "epoch": 2.2057679844458846, "grad_norm": 0.8379773497581482, "learning_rate": 8.641104102072676e-07, "loss": 0.0561, "step": 13614 }, { "epoch": 2.2059300064808816, "grad_norm": 1.2272884845733643, "learning_rate": 8.637797516587173e-07, "loss": 0.0692, "step": 13615 }, { "epoch": 2.206092028515878, "grad_norm": 0.8420472145080566, "learning_rate": 8.634491431745465e-07, "loss": 0.057, "step": 13616 }, { "epoch": 2.206254050550875, "grad_norm": 0.835096001625061, "learning_rate": 8.631185847648704e-07, "loss": 0.0571, "step": 13617 }, { "epoch": 2.2064160725858715, "grad_norm": 0.9500763416290283, "learning_rate": 8.627880764398055e-07, "loss": 0.0538, "step": 13618 }, { "epoch": 2.2065780946208684, "grad_norm": 0.8583593368530273, "learning_rate": 8.62457618209464e-07, "loss": 0.0588, "step": 13619 }, { "epoch": 2.2067401166558653, "grad_norm": 0.816092848777771, "learning_rate": 8.621272100839562e-07, "loss": 0.056, "step": 13620 }, { "epoch": 2.206902138690862, "grad_norm": 0.8541872501373291, "learning_rate": 8.617968520733919e-07, "loss": 0.0554, "step": 13621 }, { "epoch": 2.2070641607258588, "grad_norm": 0.8463016152381897, "learning_rate": 8.614665441878798e-07, "loss": 0.0608, "step": 13622 }, { "epoch": 2.2072261827608557, "grad_norm": 0.942939281463623, "learning_rate": 8.611362864375261e-07, "loss": 0.0682, "step": 13623 }, { "epoch": 2.207388204795852, "grad_norm": 1.0515944957733154, "learning_rate": 8.60806078832436e-07, "loss": 0.0771, "step": 13624 }, { "epoch": 2.207550226830849, "grad_norm": 0.9806361794471741, "learning_rate": 8.604759213827133e-07, "loss": 0.0698, "step": 13625 }, { "epoch": 2.2077122488658456, "grad_norm": 0.9298086762428284, "learning_rate": 8.601458140984606e-07, "loss": 0.0579, "step": 13626 }, { "epoch": 2.2078742709008425, "grad_norm": 0.8107653856277466, "learning_rate": 8.598157569897758e-07, "loss": 0.0553, "step": 13627 }, { "epoch": 2.2080362929358395, "grad_norm": 0.874210774898529, "learning_rate": 8.594857500667606e-07, "loss": 0.0583, "step": 13628 }, { "epoch": 2.208198314970836, "grad_norm": 0.8932999968528748, "learning_rate": 8.591557933395115e-07, "loss": 0.0645, "step": 13629 }, { "epoch": 2.208360337005833, "grad_norm": 0.8853711485862732, "learning_rate": 8.588258868181251e-07, "loss": 0.0611, "step": 13630 }, { "epoch": 2.2085223590408294, "grad_norm": 0.9822854995727539, "learning_rate": 8.584960305126943e-07, "loss": 0.0742, "step": 13631 }, { "epoch": 2.2086843810758263, "grad_norm": 0.930212676525116, "learning_rate": 8.581662244333116e-07, "loss": 0.0657, "step": 13632 }, { "epoch": 2.2088464031108233, "grad_norm": 0.8969360589981079, "learning_rate": 8.578364685900711e-07, "loss": 0.0596, "step": 13633 }, { "epoch": 2.2090084251458197, "grad_norm": 0.8433337211608887, "learning_rate": 8.575067629930601e-07, "loss": 0.0564, "step": 13634 }, { "epoch": 2.2091704471808167, "grad_norm": 0.8767102360725403, "learning_rate": 8.571771076523669e-07, "loss": 0.0608, "step": 13635 }, { "epoch": 2.209332469215813, "grad_norm": 0.8478491306304932, "learning_rate": 8.568475025780781e-07, "loss": 0.061, "step": 13636 }, { "epoch": 2.20949449125081, "grad_norm": 1.1657311916351318, "learning_rate": 8.56517947780281e-07, "loss": 0.0705, "step": 13637 }, { "epoch": 2.209656513285807, "grad_norm": 0.980461835861206, "learning_rate": 8.561884432690568e-07, "loss": 0.0583, "step": 13638 }, { "epoch": 2.2098185353208035, "grad_norm": 0.9112688302993774, "learning_rate": 8.55858989054488e-07, "loss": 0.0618, "step": 13639 }, { "epoch": 2.2099805573558005, "grad_norm": 1.0051236152648926, "learning_rate": 8.555295851466556e-07, "loss": 0.066, "step": 13640 }, { "epoch": 2.210142579390797, "grad_norm": 0.8563796281814575, "learning_rate": 8.552002315556382e-07, "loss": 0.0613, "step": 13641 }, { "epoch": 2.210304601425794, "grad_norm": 0.7843127846717834, "learning_rate": 8.548709282915135e-07, "loss": 0.0638, "step": 13642 }, { "epoch": 2.210466623460791, "grad_norm": 0.9892216324806213, "learning_rate": 8.545416753643574e-07, "loss": 0.0652, "step": 13643 }, { "epoch": 2.2106286454957873, "grad_norm": 0.9359889030456543, "learning_rate": 8.542124727842438e-07, "loss": 0.0596, "step": 13644 }, { "epoch": 2.2107906675307842, "grad_norm": 0.7579911947250366, "learning_rate": 8.538833205612468e-07, "loss": 0.0536, "step": 13645 }, { "epoch": 2.210952689565781, "grad_norm": 0.7758036255836487, "learning_rate": 8.535542187054352e-07, "loss": 0.059, "step": 13646 }, { "epoch": 2.2111147116007777, "grad_norm": 0.8697202801704407, "learning_rate": 8.532251672268807e-07, "loss": 0.0619, "step": 13647 }, { "epoch": 2.2112767336357746, "grad_norm": 0.9321410059928894, "learning_rate": 8.528961661356519e-07, "loss": 0.0647, "step": 13648 }, { "epoch": 2.211438755670771, "grad_norm": 1.245924711227417, "learning_rate": 8.525672154418138e-07, "loss": 0.0676, "step": 13649 }, { "epoch": 2.211600777705768, "grad_norm": 0.8289289474487305, "learning_rate": 8.52238315155432e-07, "loss": 0.0551, "step": 13650 }, { "epoch": 2.211762799740765, "grad_norm": 0.8378967642784119, "learning_rate": 8.519094652865703e-07, "loss": 0.0534, "step": 13651 }, { "epoch": 2.2119248217757614, "grad_norm": 0.9754065871238708, "learning_rate": 8.515806658452908e-07, "loss": 0.0669, "step": 13652 }, { "epoch": 2.2120868438107584, "grad_norm": 0.8882692456245422, "learning_rate": 8.512519168416536e-07, "loss": 0.0619, "step": 13653 }, { "epoch": 2.212248865845755, "grad_norm": 0.9585002660751343, "learning_rate": 8.50923218285718e-07, "loss": 0.0655, "step": 13654 }, { "epoch": 2.212410887880752, "grad_norm": 0.9813425540924072, "learning_rate": 8.505945701875412e-07, "loss": 0.0657, "step": 13655 }, { "epoch": 2.2125729099157487, "grad_norm": 0.9413607716560364, "learning_rate": 8.502659725571791e-07, "loss": 0.0606, "step": 13656 }, { "epoch": 2.212734931950745, "grad_norm": 0.8411443829536438, "learning_rate": 8.499374254046858e-07, "loss": 0.0617, "step": 13657 }, { "epoch": 2.212896953985742, "grad_norm": 0.9197219610214233, "learning_rate": 8.496089287401144e-07, "loss": 0.0619, "step": 13658 }, { "epoch": 2.2130589760207386, "grad_norm": 1.0152454376220703, "learning_rate": 8.492804825735166e-07, "loss": 0.0579, "step": 13659 }, { "epoch": 2.2132209980557356, "grad_norm": 0.9079082012176514, "learning_rate": 8.489520869149398e-07, "loss": 0.0711, "step": 13660 }, { "epoch": 2.2133830200907325, "grad_norm": 0.9641551375389099, "learning_rate": 8.486237417744344e-07, "loss": 0.0587, "step": 13661 }, { "epoch": 2.213545042125729, "grad_norm": 0.8907647132873535, "learning_rate": 8.482954471620464e-07, "loss": 0.0601, "step": 13662 }, { "epoch": 2.213707064160726, "grad_norm": 0.7322314381599426, "learning_rate": 8.479672030878213e-07, "loss": 0.054, "step": 13663 }, { "epoch": 2.2138690861957224, "grad_norm": 0.8832927346229553, "learning_rate": 8.476390095618015e-07, "loss": 0.0571, "step": 13664 }, { "epoch": 2.2140311082307194, "grad_norm": 0.8694517016410828, "learning_rate": 8.47310866594028e-07, "loss": 0.06, "step": 13665 }, { "epoch": 2.2141931302657163, "grad_norm": 0.8959830403327942, "learning_rate": 8.469827741945447e-07, "loss": 0.0642, "step": 13666 }, { "epoch": 2.214355152300713, "grad_norm": 0.9911413192749023, "learning_rate": 8.466547323733873e-07, "loss": 0.0602, "step": 13667 }, { "epoch": 2.2145171743357097, "grad_norm": 0.8468191027641296, "learning_rate": 8.46326741140594e-07, "loss": 0.0601, "step": 13668 }, { "epoch": 2.2146791963707066, "grad_norm": 1.0290790796279907, "learning_rate": 8.459988005062006e-07, "loss": 0.06, "step": 13669 }, { "epoch": 2.214841218405703, "grad_norm": 0.9934695363044739, "learning_rate": 8.456709104802413e-07, "loss": 0.0616, "step": 13670 }, { "epoch": 2.2150032404407, "grad_norm": 0.8425288200378418, "learning_rate": 8.453430710727486e-07, "loss": 0.0595, "step": 13671 }, { "epoch": 2.2151652624756966, "grad_norm": 0.862170398235321, "learning_rate": 8.450152822937541e-07, "loss": 0.0619, "step": 13672 }, { "epoch": 2.2153272845106935, "grad_norm": 0.8608142137527466, "learning_rate": 8.446875441532868e-07, "loss": 0.0645, "step": 13673 }, { "epoch": 2.21548930654569, "grad_norm": 0.8314158916473389, "learning_rate": 8.443598566613756e-07, "loss": 0.058, "step": 13674 }, { "epoch": 2.215651328580687, "grad_norm": 0.9272691607475281, "learning_rate": 8.440322198280446e-07, "loss": 0.0604, "step": 13675 }, { "epoch": 2.215813350615684, "grad_norm": 1.0183619260787964, "learning_rate": 8.437046336633212e-07, "loss": 0.0642, "step": 13676 }, { "epoch": 2.2159753726506803, "grad_norm": 0.8743815422058105, "learning_rate": 8.433770981772285e-07, "loss": 0.0625, "step": 13677 }, { "epoch": 2.2161373946856773, "grad_norm": 0.8977541327476501, "learning_rate": 8.430496133797872e-07, "loss": 0.067, "step": 13678 }, { "epoch": 2.216299416720674, "grad_norm": 0.9570757150650024, "learning_rate": 8.427221792810169e-07, "loss": 0.0597, "step": 13679 }, { "epoch": 2.2164614387556707, "grad_norm": 0.8252487778663635, "learning_rate": 8.423947958909381e-07, "loss": 0.0618, "step": 13680 }, { "epoch": 2.2166234607906676, "grad_norm": 0.801275372505188, "learning_rate": 8.420674632195683e-07, "loss": 0.0581, "step": 13681 }, { "epoch": 2.216785482825664, "grad_norm": 0.8917983770370483, "learning_rate": 8.41740181276921e-07, "loss": 0.0685, "step": 13682 }, { "epoch": 2.216947504860661, "grad_norm": 0.9908973574638367, "learning_rate": 8.414129500730115e-07, "loss": 0.0666, "step": 13683 }, { "epoch": 2.217109526895658, "grad_norm": 1.1001694202423096, "learning_rate": 8.410857696178518e-07, "loss": 0.0664, "step": 13684 }, { "epoch": 2.2172715489306545, "grad_norm": 0.8610407710075378, "learning_rate": 8.407586399214529e-07, "loss": 0.0634, "step": 13685 }, { "epoch": 2.2174335709656514, "grad_norm": 0.8666363954544067, "learning_rate": 8.404315609938246e-07, "loss": 0.0623, "step": 13686 }, { "epoch": 2.217595593000648, "grad_norm": 0.8178418278694153, "learning_rate": 8.401045328449742e-07, "loss": 0.0607, "step": 13687 }, { "epoch": 2.217757615035645, "grad_norm": 0.8369463086128235, "learning_rate": 8.397775554849086e-07, "loss": 0.0577, "step": 13688 }, { "epoch": 2.2179196370706418, "grad_norm": 1.0143942832946777, "learning_rate": 8.394506289236317e-07, "loss": 0.0673, "step": 13689 }, { "epoch": 2.2180816591056383, "grad_norm": 1.006434679031372, "learning_rate": 8.391237531711474e-07, "loss": 0.0585, "step": 13690 }, { "epoch": 2.218243681140635, "grad_norm": 0.9059908390045166, "learning_rate": 8.38796928237457e-07, "loss": 0.064, "step": 13691 }, { "epoch": 2.2184057031756317, "grad_norm": 0.9959692358970642, "learning_rate": 8.384701541325612e-07, "loss": 0.066, "step": 13692 }, { "epoch": 2.2185677252106286, "grad_norm": 0.9817578792572021, "learning_rate": 8.381434308664574e-07, "loss": 0.0707, "step": 13693 }, { "epoch": 2.2187297472456255, "grad_norm": 0.9044135808944702, "learning_rate": 8.378167584491417e-07, "loss": 0.0646, "step": 13694 }, { "epoch": 2.218891769280622, "grad_norm": 0.751171886920929, "learning_rate": 8.374901368906127e-07, "loss": 0.0566, "step": 13695 }, { "epoch": 2.219053791315619, "grad_norm": 0.8422271609306335, "learning_rate": 8.371635662008615e-07, "loss": 0.0642, "step": 13696 }, { "epoch": 2.2192158133506155, "grad_norm": 0.8549221754074097, "learning_rate": 8.368370463898812e-07, "loss": 0.0518, "step": 13697 }, { "epoch": 2.2193778353856124, "grad_norm": 0.8980638384819031, "learning_rate": 8.365105774676624e-07, "loss": 0.0633, "step": 13698 }, { "epoch": 2.2195398574206093, "grad_norm": 0.9105513691902161, "learning_rate": 8.361841594441944e-07, "loss": 0.06, "step": 13699 }, { "epoch": 2.219701879455606, "grad_norm": 0.8758683204650879, "learning_rate": 8.358577923294647e-07, "loss": 0.0646, "step": 13700 }, { "epoch": 2.2198639014906028, "grad_norm": 1.3330185413360596, "learning_rate": 8.355314761334596e-07, "loss": 0.0658, "step": 13701 }, { "epoch": 2.2200259235255997, "grad_norm": 0.9840524792671204, "learning_rate": 8.352052108661634e-07, "loss": 0.0674, "step": 13702 }, { "epoch": 2.220187945560596, "grad_norm": 0.8081815242767334, "learning_rate": 8.34878996537559e-07, "loss": 0.06, "step": 13703 }, { "epoch": 2.220349967595593, "grad_norm": 0.8371722102165222, "learning_rate": 8.345528331576275e-07, "loss": 0.0601, "step": 13704 }, { "epoch": 2.2205119896305896, "grad_norm": 1.0199873447418213, "learning_rate": 8.342267207363492e-07, "loss": 0.0612, "step": 13705 }, { "epoch": 2.2206740116655865, "grad_norm": 1.0433920621871948, "learning_rate": 8.339006592837021e-07, "loss": 0.0671, "step": 13706 }, { "epoch": 2.2208360337005835, "grad_norm": 0.8671245574951172, "learning_rate": 8.335746488096639e-07, "loss": 0.0677, "step": 13707 }, { "epoch": 2.22099805573558, "grad_norm": 0.9357213377952576, "learning_rate": 8.33248689324207e-07, "loss": 0.0716, "step": 13708 }, { "epoch": 2.221160077770577, "grad_norm": 0.8133964538574219, "learning_rate": 8.329227808373078e-07, "loss": 0.0578, "step": 13709 }, { "epoch": 2.2213220998055734, "grad_norm": 0.8895506858825684, "learning_rate": 8.325969233589376e-07, "loss": 0.0653, "step": 13710 }, { "epoch": 2.2214841218405703, "grad_norm": 0.8264877200126648, "learning_rate": 8.322711168990661e-07, "loss": 0.057, "step": 13711 }, { "epoch": 2.2216461438755672, "grad_norm": 0.8504375219345093, "learning_rate": 8.319453614676626e-07, "loss": 0.0644, "step": 13712 }, { "epoch": 2.2218081659105637, "grad_norm": 0.824181318283081, "learning_rate": 8.316196570746934e-07, "loss": 0.0557, "step": 13713 }, { "epoch": 2.2219701879455607, "grad_norm": 0.832217812538147, "learning_rate": 8.31294003730127e-07, "loss": 0.0613, "step": 13714 }, { "epoch": 2.222132209980557, "grad_norm": 0.8441714644432068, "learning_rate": 8.309684014439251e-07, "loss": 0.0639, "step": 13715 }, { "epoch": 2.222294232015554, "grad_norm": 0.998146653175354, "learning_rate": 8.306428502260511e-07, "loss": 0.0665, "step": 13716 }, { "epoch": 2.222456254050551, "grad_norm": 0.8211577534675598, "learning_rate": 8.303173500864661e-07, "loss": 0.0599, "step": 13717 }, { "epoch": 2.2226182760855475, "grad_norm": 0.9386523962020874, "learning_rate": 8.299919010351296e-07, "loss": 0.0625, "step": 13718 }, { "epoch": 2.2227802981205445, "grad_norm": 0.9545656442642212, "learning_rate": 8.296665030819998e-07, "loss": 0.0648, "step": 13719 }, { "epoch": 2.222942320155541, "grad_norm": 0.8156535029411316, "learning_rate": 8.293411562370327e-07, "loss": 0.0536, "step": 13720 }, { "epoch": 2.223104342190538, "grad_norm": 0.9009618163108826, "learning_rate": 8.290158605101842e-07, "loss": 0.0621, "step": 13721 }, { "epoch": 2.223266364225535, "grad_norm": 1.074135184288025, "learning_rate": 8.286906159114058e-07, "loss": 0.061, "step": 13722 }, { "epoch": 2.2234283862605313, "grad_norm": 0.9844740033149719, "learning_rate": 8.283654224506491e-07, "loss": 0.0656, "step": 13723 }, { "epoch": 2.2235904082955282, "grad_norm": 0.8761224746704102, "learning_rate": 8.280402801378662e-07, "loss": 0.0586, "step": 13724 }, { "epoch": 2.223752430330525, "grad_norm": 0.88485187292099, "learning_rate": 8.277151889830054e-07, "loss": 0.063, "step": 13725 }, { "epoch": 2.2239144523655217, "grad_norm": 1.0839747190475464, "learning_rate": 8.27390148996012e-07, "loss": 0.069, "step": 13726 }, { "epoch": 2.2240764744005186, "grad_norm": 0.9806898832321167, "learning_rate": 8.270651601868321e-07, "loss": 0.0658, "step": 13727 }, { "epoch": 2.224238496435515, "grad_norm": 0.8707731366157532, "learning_rate": 8.267402225654112e-07, "loss": 0.0614, "step": 13728 }, { "epoch": 2.224400518470512, "grad_norm": 0.7389007210731506, "learning_rate": 8.264153361416893e-07, "loss": 0.0543, "step": 13729 }, { "epoch": 2.224562540505509, "grad_norm": 0.907781183719635, "learning_rate": 8.260905009256081e-07, "loss": 0.061, "step": 13730 }, { "epoch": 2.2247245625405054, "grad_norm": 0.976334273815155, "learning_rate": 8.257657169271071e-07, "loss": 0.0654, "step": 13731 }, { "epoch": 2.2248865845755024, "grad_norm": 0.9755533337593079, "learning_rate": 8.254409841561234e-07, "loss": 0.0697, "step": 13732 }, { "epoch": 2.225048606610499, "grad_norm": 0.8844923377037048, "learning_rate": 8.251163026225934e-07, "loss": 0.0638, "step": 13733 }, { "epoch": 2.225210628645496, "grad_norm": 0.8760762810707092, "learning_rate": 8.24791672336451e-07, "loss": 0.059, "step": 13734 }, { "epoch": 2.2253726506804927, "grad_norm": 0.8645013570785522, "learning_rate": 8.244670933076298e-07, "loss": 0.0717, "step": 13735 }, { "epoch": 2.225534672715489, "grad_norm": 0.8673563003540039, "learning_rate": 8.241425655460616e-07, "loss": 0.0601, "step": 13736 }, { "epoch": 2.225696694750486, "grad_norm": 0.8699643015861511, "learning_rate": 8.23818089061674e-07, "loss": 0.0619, "step": 13737 }, { "epoch": 2.2258587167854826, "grad_norm": 0.8396235108375549, "learning_rate": 8.23493663864397e-07, "loss": 0.0601, "step": 13738 }, { "epoch": 2.2260207388204796, "grad_norm": 0.9061728715896606, "learning_rate": 8.231692899641572e-07, "loss": 0.0656, "step": 13739 }, { "epoch": 2.2261827608554765, "grad_norm": 0.9282206892967224, "learning_rate": 8.228449673708797e-07, "loss": 0.0617, "step": 13740 }, { "epoch": 2.226344782890473, "grad_norm": 0.8784255385398865, "learning_rate": 8.22520696094487e-07, "loss": 0.0644, "step": 13741 }, { "epoch": 2.22650680492547, "grad_norm": 1.012131690979004, "learning_rate": 8.221964761449008e-07, "loss": 0.0589, "step": 13742 }, { "epoch": 2.2266688269604664, "grad_norm": 0.80937260389328, "learning_rate": 8.218723075320437e-07, "loss": 0.0556, "step": 13743 }, { "epoch": 2.2268308489954634, "grad_norm": 0.9021971821784973, "learning_rate": 8.215481902658323e-07, "loss": 0.0644, "step": 13744 }, { "epoch": 2.2269928710304603, "grad_norm": 0.916597306728363, "learning_rate": 8.212241243561845e-07, "loss": 0.0588, "step": 13745 }, { "epoch": 2.2271548930654568, "grad_norm": 0.8220802545547485, "learning_rate": 8.209001098130157e-07, "loss": 0.0563, "step": 13746 }, { "epoch": 2.2273169151004537, "grad_norm": 0.8931221961975098, "learning_rate": 8.205761466462403e-07, "loss": 0.0614, "step": 13747 }, { "epoch": 2.2274789371354506, "grad_norm": 0.755279004573822, "learning_rate": 8.20252234865771e-07, "loss": 0.0602, "step": 13748 }, { "epoch": 2.227640959170447, "grad_norm": 0.8481705188751221, "learning_rate": 8.199283744815181e-07, "loss": 0.057, "step": 13749 }, { "epoch": 2.227802981205444, "grad_norm": 0.9167376160621643, "learning_rate": 8.196045655033913e-07, "loss": 0.0679, "step": 13750 }, { "epoch": 2.2279650032404406, "grad_norm": 0.9409997463226318, "learning_rate": 8.192808079412984e-07, "loss": 0.0669, "step": 13751 }, { "epoch": 2.2281270252754375, "grad_norm": 0.7802384495735168, "learning_rate": 8.189571018051454e-07, "loss": 0.0577, "step": 13752 }, { "epoch": 2.2282890473104344, "grad_norm": 1.0504868030548096, "learning_rate": 8.186334471048371e-07, "loss": 0.0615, "step": 13753 }, { "epoch": 2.228451069345431, "grad_norm": 0.762758731842041, "learning_rate": 8.183098438502771e-07, "loss": 0.0565, "step": 13754 }, { "epoch": 2.228613091380428, "grad_norm": 0.9312151074409485, "learning_rate": 8.179862920513656e-07, "loss": 0.063, "step": 13755 }, { "epoch": 2.2287751134154243, "grad_norm": 1.0615299940109253, "learning_rate": 8.176627917180025e-07, "loss": 0.0693, "step": 13756 }, { "epoch": 2.2289371354504213, "grad_norm": 1.0079830884933472, "learning_rate": 8.173393428600876e-07, "loss": 0.0621, "step": 13757 }, { "epoch": 2.229099157485418, "grad_norm": 1.1644383668899536, "learning_rate": 8.170159454875173e-07, "loss": 0.0672, "step": 13758 }, { "epoch": 2.2292611795204147, "grad_norm": 0.910068929195404, "learning_rate": 8.16692599610186e-07, "loss": 0.0656, "step": 13759 }, { "epoch": 2.2294232015554116, "grad_norm": 0.8690840005874634, "learning_rate": 8.163693052379873e-07, "loss": 0.0583, "step": 13760 }, { "epoch": 2.229585223590408, "grad_norm": 0.8714174628257751, "learning_rate": 8.160460623808136e-07, "loss": 0.067, "step": 13761 }, { "epoch": 2.229747245625405, "grad_norm": 0.9453072547912598, "learning_rate": 8.157228710485554e-07, "loss": 0.0694, "step": 13762 }, { "epoch": 2.229909267660402, "grad_norm": 0.8522040843963623, "learning_rate": 8.153997312511014e-07, "loss": 0.06, "step": 13763 }, { "epoch": 2.2300712896953985, "grad_norm": 1.055083155632019, "learning_rate": 8.15076642998339e-07, "loss": 0.0612, "step": 13764 }, { "epoch": 2.2302333117303954, "grad_norm": 0.7458101511001587, "learning_rate": 8.147536063001549e-07, "loss": 0.0586, "step": 13765 }, { "epoch": 2.230395333765392, "grad_norm": 0.8568379282951355, "learning_rate": 8.144306211664302e-07, "loss": 0.0622, "step": 13766 }, { "epoch": 2.230557355800389, "grad_norm": 0.8845552802085876, "learning_rate": 8.141076876070505e-07, "loss": 0.0635, "step": 13767 }, { "epoch": 2.2307193778353858, "grad_norm": 0.8834683299064636, "learning_rate": 8.137848056318959e-07, "loss": 0.0577, "step": 13768 }, { "epoch": 2.2308813998703823, "grad_norm": 0.8642720580101013, "learning_rate": 8.134619752508463e-07, "loss": 0.0618, "step": 13769 }, { "epoch": 2.231043421905379, "grad_norm": 0.991050660610199, "learning_rate": 8.131391964737773e-07, "loss": 0.0673, "step": 13770 }, { "epoch": 2.231205443940376, "grad_norm": 0.8958488702774048, "learning_rate": 8.128164693105678e-07, "loss": 0.064, "step": 13771 }, { "epoch": 2.2313674659753726, "grad_norm": 1.0806387662887573, "learning_rate": 8.12493793771092e-07, "loss": 0.0648, "step": 13772 }, { "epoch": 2.2315294880103695, "grad_norm": 0.922431468963623, "learning_rate": 8.121711698652219e-07, "loss": 0.0572, "step": 13773 }, { "epoch": 2.231691510045366, "grad_norm": 0.9419412016868591, "learning_rate": 8.118485976028292e-07, "loss": 0.0612, "step": 13774 }, { "epoch": 2.231853532080363, "grad_norm": 1.0189546346664429, "learning_rate": 8.115260769937835e-07, "loss": 0.065, "step": 13775 }, { "epoch": 2.2320155541153595, "grad_norm": 0.8037611246109009, "learning_rate": 8.112036080479554e-07, "loss": 0.0612, "step": 13776 }, { "epoch": 2.2321775761503564, "grad_norm": 0.9175558090209961, "learning_rate": 8.108811907752093e-07, "loss": 0.065, "step": 13777 }, { "epoch": 2.2323395981853533, "grad_norm": 0.9038329124450684, "learning_rate": 8.10558825185411e-07, "loss": 0.0585, "step": 13778 }, { "epoch": 2.23250162022035, "grad_norm": 0.8255992531776428, "learning_rate": 8.102365112884248e-07, "loss": 0.0649, "step": 13779 }, { "epoch": 2.2326636422553467, "grad_norm": 0.8868559002876282, "learning_rate": 8.099142490941117e-07, "loss": 0.0623, "step": 13780 }, { "epoch": 2.2328256642903437, "grad_norm": 0.8174579739570618, "learning_rate": 8.095920386123327e-07, "loss": 0.059, "step": 13781 }, { "epoch": 2.23298768632534, "grad_norm": 0.9110251069068909, "learning_rate": 8.09269879852947e-07, "loss": 0.064, "step": 13782 }, { "epoch": 2.233149708360337, "grad_norm": 1.100704550743103, "learning_rate": 8.08947772825811e-07, "loss": 0.0637, "step": 13783 }, { "epoch": 2.2333117303953336, "grad_norm": 0.9022136330604553, "learning_rate": 8.086257175407819e-07, "loss": 0.0534, "step": 13784 }, { "epoch": 2.2334737524303305, "grad_norm": 1.0603289604187012, "learning_rate": 8.083037140077113e-07, "loss": 0.0619, "step": 13785 }, { "epoch": 2.2336357744653275, "grad_norm": 0.9479257464408875, "learning_rate": 8.079817622364539e-07, "loss": 0.0647, "step": 13786 }, { "epoch": 2.233797796500324, "grad_norm": 0.8262972235679626, "learning_rate": 8.076598622368606e-07, "loss": 0.0535, "step": 13787 }, { "epoch": 2.233959818535321, "grad_norm": 1.2946476936340332, "learning_rate": 8.073380140187795e-07, "loss": 0.0728, "step": 13788 }, { "epoch": 2.2341218405703174, "grad_norm": 0.8733221292495728, "learning_rate": 8.07016217592059e-07, "loss": 0.0654, "step": 13789 }, { "epoch": 2.2342838626053143, "grad_norm": 0.9510716795921326, "learning_rate": 8.066944729665455e-07, "loss": 0.0665, "step": 13790 }, { "epoch": 2.2344458846403112, "grad_norm": 0.9815870523452759, "learning_rate": 8.063727801520832e-07, "loss": 0.0639, "step": 13791 }, { "epoch": 2.2346079066753077, "grad_norm": 0.7917437553405762, "learning_rate": 8.060511391585152e-07, "loss": 0.0597, "step": 13792 }, { "epoch": 2.2347699287103047, "grad_norm": 1.0030494928359985, "learning_rate": 8.057295499956832e-07, "loss": 0.0701, "step": 13793 }, { "epoch": 2.234931950745301, "grad_norm": 0.933049201965332, "learning_rate": 8.054080126734271e-07, "loss": 0.0682, "step": 13794 }, { "epoch": 2.235093972780298, "grad_norm": 0.9111407399177551, "learning_rate": 8.050865272015848e-07, "loss": 0.0632, "step": 13795 }, { "epoch": 2.235255994815295, "grad_norm": 0.9394775032997131, "learning_rate": 8.047650935899931e-07, "loss": 0.0664, "step": 13796 }, { "epoch": 2.2354180168502915, "grad_norm": 0.9155844449996948, "learning_rate": 8.044437118484874e-07, "loss": 0.0626, "step": 13797 }, { "epoch": 2.2355800388852884, "grad_norm": 0.9947577118873596, "learning_rate": 8.041223819869015e-07, "loss": 0.0598, "step": 13798 }, { "epoch": 2.235742060920285, "grad_norm": 0.9133854508399963, "learning_rate": 8.03801104015065e-07, "loss": 0.0639, "step": 13799 }, { "epoch": 2.235904082955282, "grad_norm": 0.981562077999115, "learning_rate": 8.034798779428113e-07, "loss": 0.0565, "step": 13800 }, { "epoch": 2.236066104990279, "grad_norm": 0.8836338520050049, "learning_rate": 8.031587037799673e-07, "loss": 0.0606, "step": 13801 }, { "epoch": 2.2362281270252753, "grad_norm": 0.9972084760665894, "learning_rate": 8.02837581536362e-07, "loss": 0.0707, "step": 13802 }, { "epoch": 2.2363901490602722, "grad_norm": 0.8902301788330078, "learning_rate": 8.025165112218186e-07, "loss": 0.0634, "step": 13803 }, { "epoch": 2.236552171095269, "grad_norm": 0.9194254875183105, "learning_rate": 8.021954928461611e-07, "loss": 0.0632, "step": 13804 }, { "epoch": 2.2367141931302656, "grad_norm": 1.0688180923461914, "learning_rate": 8.018745264192148e-07, "loss": 0.0654, "step": 13805 }, { "epoch": 2.2368762151652626, "grad_norm": 0.9109787940979004, "learning_rate": 8.015536119507977e-07, "loss": 0.0604, "step": 13806 }, { "epoch": 2.237038237200259, "grad_norm": 0.9395532011985779, "learning_rate": 8.012327494507302e-07, "loss": 0.0617, "step": 13807 }, { "epoch": 2.237200259235256, "grad_norm": 0.8452723622322083, "learning_rate": 8.009119389288292e-07, "loss": 0.0591, "step": 13808 }, { "epoch": 2.237362281270253, "grad_norm": 0.9636961221694946, "learning_rate": 8.005911803949115e-07, "loss": 0.0644, "step": 13809 }, { "epoch": 2.2375243033052494, "grad_norm": 0.8222397565841675, "learning_rate": 8.002704738587911e-07, "loss": 0.0579, "step": 13810 }, { "epoch": 2.2376863253402464, "grad_norm": 0.8121089935302734, "learning_rate": 7.999498193302807e-07, "loss": 0.059, "step": 13811 }, { "epoch": 2.237848347375243, "grad_norm": 0.8787469863891602, "learning_rate": 7.996292168191919e-07, "loss": 0.0537, "step": 13812 }, { "epoch": 2.23801036941024, "grad_norm": 0.7887811064720154, "learning_rate": 7.993086663353344e-07, "loss": 0.06, "step": 13813 }, { "epoch": 2.2381723914452367, "grad_norm": 0.9501045942306519, "learning_rate": 7.989881678885158e-07, "loss": 0.061, "step": 13814 }, { "epoch": 2.238334413480233, "grad_norm": 0.8966881036758423, "learning_rate": 7.986677214885433e-07, "loss": 0.0625, "step": 13815 }, { "epoch": 2.23849643551523, "grad_norm": 0.8494555354118347, "learning_rate": 7.983473271452219e-07, "loss": 0.0558, "step": 13816 }, { "epoch": 2.2386584575502266, "grad_norm": 0.8177490234375, "learning_rate": 7.980269848683536e-07, "loss": 0.0542, "step": 13817 }, { "epoch": 2.2388204795852236, "grad_norm": 0.9580766558647156, "learning_rate": 7.977066946677404e-07, "loss": 0.0619, "step": 13818 }, { "epoch": 2.2389825016202205, "grad_norm": 1.0076230764389038, "learning_rate": 7.973864565531833e-07, "loss": 0.0616, "step": 13819 }, { "epoch": 2.239144523655217, "grad_norm": 1.0264263153076172, "learning_rate": 7.970662705344812e-07, "loss": 0.058, "step": 13820 }, { "epoch": 2.239306545690214, "grad_norm": 0.7906507253646851, "learning_rate": 7.967461366214293e-07, "loss": 0.0569, "step": 13821 }, { "epoch": 2.2394685677252104, "grad_norm": 0.7899366617202759, "learning_rate": 7.964260548238242e-07, "loss": 0.0547, "step": 13822 }, { "epoch": 2.2396305897602073, "grad_norm": 0.9811822772026062, "learning_rate": 7.961060251514591e-07, "loss": 0.0609, "step": 13823 }, { "epoch": 2.2397926117952043, "grad_norm": 0.7992488145828247, "learning_rate": 7.957860476141261e-07, "loss": 0.0589, "step": 13824 }, { "epoch": 2.2399546338302008, "grad_norm": 0.9526104927062988, "learning_rate": 7.954661222216162e-07, "loss": 0.067, "step": 13825 }, { "epoch": 2.2401166558651977, "grad_norm": 0.8279509544372559, "learning_rate": 7.951462489837178e-07, "loss": 0.0608, "step": 13826 }, { "epoch": 2.2402786779001946, "grad_norm": 0.9480863213539124, "learning_rate": 7.948264279102186e-07, "loss": 0.0639, "step": 13827 }, { "epoch": 2.240440699935191, "grad_norm": 1.0198253393173218, "learning_rate": 7.945066590109044e-07, "loss": 0.0667, "step": 13828 }, { "epoch": 2.240602721970188, "grad_norm": 0.9767336845397949, "learning_rate": 7.941869422955592e-07, "loss": 0.0663, "step": 13829 }, { "epoch": 2.2407647440051845, "grad_norm": 0.8995802402496338, "learning_rate": 7.938672777739654e-07, "loss": 0.0588, "step": 13830 }, { "epoch": 2.2409267660401815, "grad_norm": 1.019019603729248, "learning_rate": 7.935476654559052e-07, "loss": 0.0574, "step": 13831 }, { "epoch": 2.2410887880751784, "grad_norm": 0.8723592758178711, "learning_rate": 7.932281053511559e-07, "loss": 0.0649, "step": 13832 }, { "epoch": 2.241250810110175, "grad_norm": 1.199785590171814, "learning_rate": 7.929085974694956e-07, "loss": 0.0671, "step": 13833 }, { "epoch": 2.241412832145172, "grad_norm": 0.9804271459579468, "learning_rate": 7.925891418207024e-07, "loss": 0.0613, "step": 13834 }, { "epoch": 2.2415748541801683, "grad_norm": 0.7946493625640869, "learning_rate": 7.922697384145492e-07, "loss": 0.0595, "step": 13835 }, { "epoch": 2.2417368762151653, "grad_norm": 0.9132854342460632, "learning_rate": 7.919503872608092e-07, "loss": 0.0628, "step": 13836 }, { "epoch": 2.241898898250162, "grad_norm": 0.9299383163452148, "learning_rate": 7.916310883692532e-07, "loss": 0.07, "step": 13837 }, { "epoch": 2.2420609202851587, "grad_norm": 0.9028305411338806, "learning_rate": 7.913118417496532e-07, "loss": 0.069, "step": 13838 }, { "epoch": 2.2422229423201556, "grad_norm": 0.9909502267837524, "learning_rate": 7.909926474117752e-07, "loss": 0.0608, "step": 13839 }, { "epoch": 2.242384964355152, "grad_norm": 1.0152370929718018, "learning_rate": 7.906735053653866e-07, "loss": 0.0648, "step": 13840 }, { "epoch": 2.242546986390149, "grad_norm": 0.8076937794685364, "learning_rate": 7.90354415620252e-07, "loss": 0.0589, "step": 13841 }, { "epoch": 2.242709008425146, "grad_norm": 0.9617068767547607, "learning_rate": 7.900353781861353e-07, "loss": 0.0603, "step": 13842 }, { "epoch": 2.2428710304601425, "grad_norm": 1.0339735746383667, "learning_rate": 7.897163930727977e-07, "loss": 0.0601, "step": 13843 }, { "epoch": 2.2430330524951394, "grad_norm": 1.0885800123214722, "learning_rate": 7.893974602899998e-07, "loss": 0.0664, "step": 13844 }, { "epoch": 2.243195074530136, "grad_norm": 0.8064873218536377, "learning_rate": 7.890785798475001e-07, "loss": 0.0589, "step": 13845 }, { "epoch": 2.243357096565133, "grad_norm": 0.8364428281784058, "learning_rate": 7.887597517550564e-07, "loss": 0.0587, "step": 13846 }, { "epoch": 2.2435191186001298, "grad_norm": 1.0745041370391846, "learning_rate": 7.884409760224212e-07, "loss": 0.074, "step": 13847 }, { "epoch": 2.2436811406351262, "grad_norm": 0.9691507816314697, "learning_rate": 7.881222526593513e-07, "loss": 0.0607, "step": 13848 }, { "epoch": 2.243843162670123, "grad_norm": 0.9089518189430237, "learning_rate": 7.878035816755985e-07, "loss": 0.0575, "step": 13849 }, { "epoch": 2.24400518470512, "grad_norm": 0.8140989542007446, "learning_rate": 7.87484963080912e-07, "loss": 0.0586, "step": 13850 }, { "epoch": 2.2441672067401166, "grad_norm": 0.6760884523391724, "learning_rate": 7.871663968850413e-07, "loss": 0.0508, "step": 13851 }, { "epoch": 2.2443292287751135, "grad_norm": 0.9919477105140686, "learning_rate": 7.868478830977331e-07, "loss": 0.0655, "step": 13852 }, { "epoch": 2.24449125081011, "grad_norm": 0.8631847500801086, "learning_rate": 7.865294217287356e-07, "loss": 0.0672, "step": 13853 }, { "epoch": 2.244653272845107, "grad_norm": 0.964144229888916, "learning_rate": 7.862110127877903e-07, "loss": 0.0652, "step": 13854 }, { "epoch": 2.244815294880104, "grad_norm": 0.8476447463035583, "learning_rate": 7.858926562846409e-07, "loss": 0.0572, "step": 13855 }, { "epoch": 2.2449773169151004, "grad_norm": 0.7753357887268066, "learning_rate": 7.855743522290283e-07, "loss": 0.056, "step": 13856 }, { "epoch": 2.2451393389500973, "grad_norm": 0.9003604650497437, "learning_rate": 7.852561006306913e-07, "loss": 0.0638, "step": 13857 }, { "epoch": 2.245301360985094, "grad_norm": 0.8805102109909058, "learning_rate": 7.849379014993683e-07, "loss": 0.0593, "step": 13858 }, { "epoch": 2.2454633830200907, "grad_norm": 1.0749431848526, "learning_rate": 7.84619754844795e-07, "loss": 0.07, "step": 13859 }, { "epoch": 2.2456254050550877, "grad_norm": 0.8165993094444275, "learning_rate": 7.84301660676707e-07, "loss": 0.0574, "step": 13860 }, { "epoch": 2.245787427090084, "grad_norm": 0.9074599742889404, "learning_rate": 7.839836190048344e-07, "loss": 0.0617, "step": 13861 }, { "epoch": 2.245949449125081, "grad_norm": 0.8614346981048584, "learning_rate": 7.836656298389114e-07, "loss": 0.0585, "step": 13862 }, { "epoch": 2.2461114711600776, "grad_norm": 0.908950924873352, "learning_rate": 7.833476931886666e-07, "loss": 0.0617, "step": 13863 }, { "epoch": 2.2462734931950745, "grad_norm": 0.9891899228096008, "learning_rate": 7.830298090638291e-07, "loss": 0.0613, "step": 13864 }, { "epoch": 2.2464355152300715, "grad_norm": 0.8221472501754761, "learning_rate": 7.827119774741238e-07, "loss": 0.0609, "step": 13865 }, { "epoch": 2.246597537265068, "grad_norm": 0.8985829949378967, "learning_rate": 7.823941984292752e-07, "loss": 0.0644, "step": 13866 }, { "epoch": 2.246759559300065, "grad_norm": 1.03141450881958, "learning_rate": 7.82076471939009e-07, "loss": 0.0713, "step": 13867 }, { "epoch": 2.2469215813350614, "grad_norm": 0.8261798620223999, "learning_rate": 7.817587980130451e-07, "loss": 0.0625, "step": 13868 }, { "epoch": 2.2470836033700583, "grad_norm": 0.9878301620483398, "learning_rate": 7.814411766611035e-07, "loss": 0.0581, "step": 13869 }, { "epoch": 2.2472456254050552, "grad_norm": 0.9584047794342041, "learning_rate": 7.811236078929033e-07, "loss": 0.0579, "step": 13870 }, { "epoch": 2.2474076474400517, "grad_norm": 1.0649832487106323, "learning_rate": 7.808060917181609e-07, "loss": 0.0656, "step": 13871 }, { "epoch": 2.2475696694750487, "grad_norm": 0.9474183917045593, "learning_rate": 7.80488628146592e-07, "loss": 0.0678, "step": 13872 }, { "epoch": 2.2477316915100456, "grad_norm": 1.1049025058746338, "learning_rate": 7.801712171879098e-07, "loss": 0.0667, "step": 13873 }, { "epoch": 2.247893713545042, "grad_norm": 0.8262870907783508, "learning_rate": 7.798538588518265e-07, "loss": 0.0596, "step": 13874 }, { "epoch": 2.248055735580039, "grad_norm": 0.9269388914108276, "learning_rate": 7.795365531480531e-07, "loss": 0.0747, "step": 13875 }, { "epoch": 2.2482177576150355, "grad_norm": 0.8992902040481567, "learning_rate": 7.792193000862964e-07, "loss": 0.0583, "step": 13876 }, { "epoch": 2.2483797796500324, "grad_norm": 0.9247242212295532, "learning_rate": 7.789020996762656e-07, "loss": 0.0561, "step": 13877 }, { "epoch": 2.248541801685029, "grad_norm": 0.9147598147392273, "learning_rate": 7.785849519276661e-07, "loss": 0.0595, "step": 13878 }, { "epoch": 2.248703823720026, "grad_norm": 0.9816052913665771, "learning_rate": 7.782678568502008e-07, "loss": 0.0665, "step": 13879 }, { "epoch": 2.248865845755023, "grad_norm": 0.8535481691360474, "learning_rate": 7.779508144535725e-07, "loss": 0.0621, "step": 13880 }, { "epoch": 2.2490278677900193, "grad_norm": 0.8713977336883545, "learning_rate": 7.776338247474812e-07, "loss": 0.0641, "step": 13881 }, { "epoch": 2.249189889825016, "grad_norm": 0.8746277093887329, "learning_rate": 7.773168877416285e-07, "loss": 0.058, "step": 13882 }, { "epoch": 2.249351911860013, "grad_norm": 0.837213397026062, "learning_rate": 7.770000034457092e-07, "loss": 0.0616, "step": 13883 }, { "epoch": 2.2495139338950096, "grad_norm": 0.8210061192512512, "learning_rate": 7.766831718694204e-07, "loss": 0.0613, "step": 13884 }, { "epoch": 2.2496759559300066, "grad_norm": 0.8450611233711243, "learning_rate": 7.763663930224563e-07, "loss": 0.0617, "step": 13885 }, { "epoch": 2.249837977965003, "grad_norm": 1.0194191932678223, "learning_rate": 7.760496669145093e-07, "loss": 0.0662, "step": 13886 }, { "epoch": 2.25, "grad_norm": 0.9273967146873474, "learning_rate": 7.757329935552707e-07, "loss": 0.0602, "step": 13887 }, { "epoch": 2.250162022034997, "grad_norm": 0.8759946227073669, "learning_rate": 7.754163729544297e-07, "loss": 0.059, "step": 13888 }, { "epoch": 2.2503240440699934, "grad_norm": 0.870991051197052, "learning_rate": 7.750998051216743e-07, "loss": 0.0674, "step": 13889 }, { "epoch": 2.2504860661049904, "grad_norm": 0.8487216830253601, "learning_rate": 7.747832900666907e-07, "loss": 0.0642, "step": 13890 }, { "epoch": 2.250648088139987, "grad_norm": 1.057746171951294, "learning_rate": 7.744668277991635e-07, "loss": 0.0703, "step": 13891 }, { "epoch": 2.250810110174984, "grad_norm": 0.8206788301467896, "learning_rate": 7.741504183287757e-07, "loss": 0.0604, "step": 13892 }, { "epoch": 2.2509721322099807, "grad_norm": 0.9289180040359497, "learning_rate": 7.738340616652096e-07, "loss": 0.0557, "step": 13893 }, { "epoch": 2.251134154244977, "grad_norm": 0.8246344923973083, "learning_rate": 7.73517757818143e-07, "loss": 0.0589, "step": 13894 }, { "epoch": 2.251296176279974, "grad_norm": 0.9617959856987, "learning_rate": 7.73201506797254e-07, "loss": 0.0698, "step": 13895 }, { "epoch": 2.251458198314971, "grad_norm": 0.9463308453559875, "learning_rate": 7.728853086122212e-07, "loss": 0.062, "step": 13896 }, { "epoch": 2.2516202203499676, "grad_norm": 0.9526486396789551, "learning_rate": 7.725691632727192e-07, "loss": 0.0671, "step": 13897 }, { "epoch": 2.2517822423849645, "grad_norm": 1.0356853008270264, "learning_rate": 7.722530707884196e-07, "loss": 0.0744, "step": 13898 }, { "epoch": 2.251944264419961, "grad_norm": 0.8983619213104248, "learning_rate": 7.71937031168995e-07, "loss": 0.0556, "step": 13899 }, { "epoch": 2.252106286454958, "grad_norm": 0.8707557916641235, "learning_rate": 7.716210444241154e-07, "loss": 0.0623, "step": 13900 }, { "epoch": 2.2522683084899544, "grad_norm": 0.9758105874061584, "learning_rate": 7.713051105634492e-07, "loss": 0.0633, "step": 13901 }, { "epoch": 2.2524303305249513, "grad_norm": 0.8898715972900391, "learning_rate": 7.709892295966634e-07, "loss": 0.056, "step": 13902 }, { "epoch": 2.2525923525599483, "grad_norm": 0.957287609577179, "learning_rate": 7.706734015334228e-07, "loss": 0.0673, "step": 13903 }, { "epoch": 2.2527543745949448, "grad_norm": 0.8354018330574036, "learning_rate": 7.703576263833915e-07, "loss": 0.0635, "step": 13904 }, { "epoch": 2.2529163966299417, "grad_norm": 0.9206665754318237, "learning_rate": 7.70041904156231e-07, "loss": 0.058, "step": 13905 }, { "epoch": 2.2530784186649386, "grad_norm": 0.9412673711776733, "learning_rate": 7.697262348616019e-07, "loss": 0.0661, "step": 13906 }, { "epoch": 2.253240440699935, "grad_norm": 0.8687724471092224, "learning_rate": 7.694106185091627e-07, "loss": 0.062, "step": 13907 }, { "epoch": 2.253402462734932, "grad_norm": 1.0838840007781982, "learning_rate": 7.690950551085716e-07, "loss": 0.0682, "step": 13908 }, { "epoch": 2.2535644847699285, "grad_norm": 0.8716097474098206, "learning_rate": 7.687795446694815e-07, "loss": 0.0592, "step": 13909 }, { "epoch": 2.2537265068049255, "grad_norm": 0.7702820897102356, "learning_rate": 7.684640872015484e-07, "loss": 0.0537, "step": 13910 }, { "epoch": 2.2538885288399224, "grad_norm": 0.8733231425285339, "learning_rate": 7.68148682714425e-07, "loss": 0.0612, "step": 13911 }, { "epoch": 2.254050550874919, "grad_norm": 0.8055692315101624, "learning_rate": 7.678333312177602e-07, "loss": 0.0562, "step": 13912 }, { "epoch": 2.254212572909916, "grad_norm": 0.948461651802063, "learning_rate": 7.675180327212037e-07, "loss": 0.0644, "step": 13913 }, { "epoch": 2.2543745949449123, "grad_norm": 0.8782212138175964, "learning_rate": 7.672027872344017e-07, "loss": 0.063, "step": 13914 }, { "epoch": 2.2545366169799093, "grad_norm": 0.9695743322372437, "learning_rate": 7.668875947670032e-07, "loss": 0.0607, "step": 13915 }, { "epoch": 2.254698639014906, "grad_norm": 0.8532809615135193, "learning_rate": 7.665724553286491e-07, "loss": 0.0584, "step": 13916 }, { "epoch": 2.2548606610499027, "grad_norm": 0.8807068467140198, "learning_rate": 7.662573689289832e-07, "loss": 0.0658, "step": 13917 }, { "epoch": 2.2550226830848996, "grad_norm": 0.8487615585327148, "learning_rate": 7.659423355776463e-07, "loss": 0.0602, "step": 13918 }, { "epoch": 2.2551847051198965, "grad_norm": 0.9274862408638, "learning_rate": 7.656273552842774e-07, "loss": 0.0589, "step": 13919 }, { "epoch": 2.255346727154893, "grad_norm": 0.8932498693466187, "learning_rate": 7.653124280585145e-07, "loss": 0.0658, "step": 13920 }, { "epoch": 2.25550874918989, "grad_norm": 0.9409620761871338, "learning_rate": 7.649975539099935e-07, "loss": 0.0634, "step": 13921 }, { "epoch": 2.2556707712248865, "grad_norm": 0.9468281269073486, "learning_rate": 7.646827328483486e-07, "loss": 0.0615, "step": 13922 }, { "epoch": 2.2558327932598834, "grad_norm": 1.0393003225326538, "learning_rate": 7.643679648832133e-07, "loss": 0.0657, "step": 13923 }, { "epoch": 2.25599481529488, "grad_norm": 0.9196584820747375, "learning_rate": 7.64053250024217e-07, "loss": 0.0633, "step": 13924 }, { "epoch": 2.256156837329877, "grad_norm": 1.1040884256362915, "learning_rate": 7.637385882809909e-07, "loss": 0.0673, "step": 13925 }, { "epoch": 2.2563188593648738, "grad_norm": 1.06027352809906, "learning_rate": 7.634239796631629e-07, "loss": 0.0616, "step": 13926 }, { "epoch": 2.2564808813998702, "grad_norm": 1.0992965698242188, "learning_rate": 7.631094241803582e-07, "loss": 0.078, "step": 13927 }, { "epoch": 2.256642903434867, "grad_norm": 0.9714069366455078, "learning_rate": 7.62794921842201e-07, "loss": 0.0667, "step": 13928 }, { "epoch": 2.256804925469864, "grad_norm": 1.0589038133621216, "learning_rate": 7.624804726583169e-07, "loss": 0.0668, "step": 13929 }, { "epoch": 2.2569669475048606, "grad_norm": 0.8686789870262146, "learning_rate": 7.621660766383246e-07, "loss": 0.0573, "step": 13930 }, { "epoch": 2.2571289695398575, "grad_norm": 0.8950777649879456, "learning_rate": 7.618517337918451e-07, "loss": 0.0524, "step": 13931 }, { "epoch": 2.257290991574854, "grad_norm": 0.8135221600532532, "learning_rate": 7.615374441284962e-07, "loss": 0.0596, "step": 13932 }, { "epoch": 2.257453013609851, "grad_norm": 0.8295992612838745, "learning_rate": 7.612232076578946e-07, "loss": 0.0618, "step": 13933 }, { "epoch": 2.257615035644848, "grad_norm": 0.9673225283622742, "learning_rate": 7.60909024389655e-07, "loss": 0.0575, "step": 13934 }, { "epoch": 2.2577770576798444, "grad_norm": 0.8375781178474426, "learning_rate": 7.605948943333908e-07, "loss": 0.0577, "step": 13935 }, { "epoch": 2.2579390797148413, "grad_norm": 1.1369965076446533, "learning_rate": 7.602808174987137e-07, "loss": 0.0696, "step": 13936 }, { "epoch": 2.258101101749838, "grad_norm": 0.774905800819397, "learning_rate": 7.599667938952341e-07, "loss": 0.0567, "step": 13937 }, { "epoch": 2.2582631237848347, "grad_norm": 0.8955923914909363, "learning_rate": 7.596528235325582e-07, "loss": 0.0651, "step": 13938 }, { "epoch": 2.2584251458198317, "grad_norm": 0.830094039440155, "learning_rate": 7.593389064202952e-07, "loss": 0.0658, "step": 13939 }, { "epoch": 2.258587167854828, "grad_norm": 0.7732976675033569, "learning_rate": 7.590250425680496e-07, "loss": 0.0535, "step": 13940 }, { "epoch": 2.258749189889825, "grad_norm": 1.0507856607437134, "learning_rate": 7.58711231985425e-07, "loss": 0.0673, "step": 13941 }, { "epoch": 2.2589112119248216, "grad_norm": 0.9185804128646851, "learning_rate": 7.583974746820222e-07, "loss": 0.0662, "step": 13942 }, { "epoch": 2.2590732339598185, "grad_norm": 0.9794391393661499, "learning_rate": 7.580837706674415e-07, "loss": 0.0692, "step": 13943 }, { "epoch": 2.2592352559948155, "grad_norm": 0.8743561506271362, "learning_rate": 7.577701199512835e-07, "loss": 0.0611, "step": 13944 }, { "epoch": 2.259397278029812, "grad_norm": 0.845887303352356, "learning_rate": 7.574565225431427e-07, "loss": 0.0609, "step": 13945 }, { "epoch": 2.259559300064809, "grad_norm": 1.0307512283325195, "learning_rate": 7.571429784526157e-07, "loss": 0.0634, "step": 13946 }, { "epoch": 2.2597213220998054, "grad_norm": 0.8212060332298279, "learning_rate": 7.56829487689296e-07, "loss": 0.0584, "step": 13947 }, { "epoch": 2.2598833441348023, "grad_norm": 0.892123281955719, "learning_rate": 7.565160502627752e-07, "loss": 0.0619, "step": 13948 }, { "epoch": 2.2600453661697992, "grad_norm": 0.8927651643753052, "learning_rate": 7.562026661826446e-07, "loss": 0.063, "step": 13949 }, { "epoch": 2.2602073882047957, "grad_norm": 0.908979594707489, "learning_rate": 7.558893354584923e-07, "loss": 0.06, "step": 13950 }, { "epoch": 2.2603694102397927, "grad_norm": 0.8717660903930664, "learning_rate": 7.555760580999055e-07, "loss": 0.0631, "step": 13951 }, { "epoch": 2.2605314322747896, "grad_norm": 1.0289645195007324, "learning_rate": 7.5526283411647e-07, "loss": 0.0634, "step": 13952 }, { "epoch": 2.260693454309786, "grad_norm": 0.8531572222709656, "learning_rate": 7.549496635177698e-07, "loss": 0.0628, "step": 13953 }, { "epoch": 2.260855476344783, "grad_norm": 0.8821384310722351, "learning_rate": 7.546365463133867e-07, "loss": 0.0641, "step": 13954 }, { "epoch": 2.2610174983797795, "grad_norm": 0.923827052116394, "learning_rate": 7.543234825129026e-07, "loss": 0.0629, "step": 13955 }, { "epoch": 2.2611795204147764, "grad_norm": 0.8377568125724792, "learning_rate": 7.540104721258945e-07, "loss": 0.0635, "step": 13956 }, { "epoch": 2.261341542449773, "grad_norm": 0.8890910148620605, "learning_rate": 7.5369751516194e-07, "loss": 0.0614, "step": 13957 }, { "epoch": 2.26150356448477, "grad_norm": 1.1075289249420166, "learning_rate": 7.533846116306162e-07, "loss": 0.0598, "step": 13958 }, { "epoch": 2.261665586519767, "grad_norm": 0.8395311236381531, "learning_rate": 7.530717615414976e-07, "loss": 0.0583, "step": 13959 }, { "epoch": 2.2618276085547633, "grad_norm": 0.872234046459198, "learning_rate": 7.527589649041548e-07, "loss": 0.0643, "step": 13960 }, { "epoch": 2.26198963058976, "grad_norm": 0.8673476576805115, "learning_rate": 7.524462217281592e-07, "loss": 0.0624, "step": 13961 }, { "epoch": 2.262151652624757, "grad_norm": 0.9594100117683411, "learning_rate": 7.521335320230804e-07, "loss": 0.0614, "step": 13962 }, { "epoch": 2.2623136746597536, "grad_norm": 0.8554080128669739, "learning_rate": 7.518208957984857e-07, "loss": 0.0596, "step": 13963 }, { "epoch": 2.2624756966947506, "grad_norm": 0.8185822367668152, "learning_rate": 7.515083130639411e-07, "loss": 0.0617, "step": 13964 }, { "epoch": 2.262637718729747, "grad_norm": 0.7946313619613647, "learning_rate": 7.51195783829011e-07, "loss": 0.0602, "step": 13965 }, { "epoch": 2.262799740764744, "grad_norm": 0.8937302827835083, "learning_rate": 7.508833081032577e-07, "loss": 0.065, "step": 13966 }, { "epoch": 2.262961762799741, "grad_norm": 0.9195818901062012, "learning_rate": 7.505708858962424e-07, "loss": 0.0651, "step": 13967 }, { "epoch": 2.2631237848347374, "grad_norm": 0.9528251886367798, "learning_rate": 7.502585172175244e-07, "loss": 0.0698, "step": 13968 }, { "epoch": 2.2632858068697344, "grad_norm": 0.9592039585113525, "learning_rate": 7.499462020766618e-07, "loss": 0.0562, "step": 13969 }, { "epoch": 2.263447828904731, "grad_norm": 0.8582433462142944, "learning_rate": 7.496339404832109e-07, "loss": 0.0619, "step": 13970 }, { "epoch": 2.2636098509397278, "grad_norm": 0.9389162063598633, "learning_rate": 7.493217324467239e-07, "loss": 0.071, "step": 13971 }, { "epoch": 2.2637718729747247, "grad_norm": 0.9363165497779846, "learning_rate": 7.490095779767564e-07, "loss": 0.0612, "step": 13972 }, { "epoch": 2.263933895009721, "grad_norm": 0.8165293335914612, "learning_rate": 7.486974770828592e-07, "loss": 0.0586, "step": 13973 }, { "epoch": 2.264095917044718, "grad_norm": 0.822784423828125, "learning_rate": 7.483854297745805e-07, "loss": 0.0595, "step": 13974 }, { "epoch": 2.264257939079715, "grad_norm": 1.0148231983184814, "learning_rate": 7.480734360614686e-07, "loss": 0.0658, "step": 13975 }, { "epoch": 2.2644199611147116, "grad_norm": 0.8952381610870361, "learning_rate": 7.47761495953069e-07, "loss": 0.0551, "step": 13976 }, { "epoch": 2.2645819831497085, "grad_norm": 1.0946059226989746, "learning_rate": 7.474496094589292e-07, "loss": 0.069, "step": 13977 }, { "epoch": 2.264744005184705, "grad_norm": 0.7693538069725037, "learning_rate": 7.471377765885893e-07, "loss": 0.0551, "step": 13978 }, { "epoch": 2.264906027219702, "grad_norm": 0.9450730681419373, "learning_rate": 7.468259973515918e-07, "loss": 0.0671, "step": 13979 }, { "epoch": 2.2650680492546984, "grad_norm": 0.9570711255073547, "learning_rate": 7.465142717574761e-07, "loss": 0.0681, "step": 13980 }, { "epoch": 2.2652300712896953, "grad_norm": 1.0384302139282227, "learning_rate": 7.462025998157801e-07, "loss": 0.0637, "step": 13981 }, { "epoch": 2.2653920933246923, "grad_norm": 0.9101808071136475, "learning_rate": 7.458909815360407e-07, "loss": 0.0672, "step": 13982 }, { "epoch": 2.2655541153596888, "grad_norm": 0.8050899505615234, "learning_rate": 7.455794169277922e-07, "loss": 0.0546, "step": 13983 }, { "epoch": 2.2657161373946857, "grad_norm": 0.863530695438385, "learning_rate": 7.45267906000568e-07, "loss": 0.0597, "step": 13984 }, { "epoch": 2.2658781594296826, "grad_norm": 0.8293231129646301, "learning_rate": 7.449564487639005e-07, "loss": 0.0586, "step": 13985 }, { "epoch": 2.266040181464679, "grad_norm": 0.9019443988800049, "learning_rate": 7.446450452273168e-07, "loss": 0.0652, "step": 13986 }, { "epoch": 2.266202203499676, "grad_norm": 1.059104084968567, "learning_rate": 7.443336954003474e-07, "loss": 0.0654, "step": 13987 }, { "epoch": 2.2663642255346725, "grad_norm": 0.8297203779220581, "learning_rate": 7.440223992925194e-07, "loss": 0.054, "step": 13988 }, { "epoch": 2.2665262475696695, "grad_norm": 1.012291669845581, "learning_rate": 7.437111569133556e-07, "loss": 0.0667, "step": 13989 }, { "epoch": 2.2666882696046664, "grad_norm": 0.8839371800422668, "learning_rate": 7.433999682723805e-07, "loss": 0.0632, "step": 13990 }, { "epoch": 2.266850291639663, "grad_norm": 0.8371898531913757, "learning_rate": 7.430888333791144e-07, "loss": 0.0566, "step": 13991 }, { "epoch": 2.26701231367466, "grad_norm": 0.8507769703865051, "learning_rate": 7.427777522430804e-07, "loss": 0.0567, "step": 13992 }, { "epoch": 2.2671743357096563, "grad_norm": 0.8428038358688354, "learning_rate": 7.424667248737936e-07, "loss": 0.0611, "step": 13993 }, { "epoch": 2.2673363577446533, "grad_norm": 0.8883320093154907, "learning_rate": 7.42155751280772e-07, "loss": 0.0616, "step": 13994 }, { "epoch": 2.26749837977965, "grad_norm": 0.839263916015625, "learning_rate": 7.418448314735305e-07, "loss": 0.0583, "step": 13995 }, { "epoch": 2.2676604018146467, "grad_norm": 0.9638897180557251, "learning_rate": 7.415339654615824e-07, "loss": 0.0617, "step": 13996 }, { "epoch": 2.2678224238496436, "grad_norm": 0.8128506541252136, "learning_rate": 7.412231532544398e-07, "loss": 0.0549, "step": 13997 }, { "epoch": 2.2679844458846405, "grad_norm": 0.9353517889976501, "learning_rate": 7.409123948616123e-07, "loss": 0.0626, "step": 13998 }, { "epoch": 2.268146467919637, "grad_norm": 0.7773940563201904, "learning_rate": 7.406016902926094e-07, "loss": 0.0569, "step": 13999 }, { "epoch": 2.268308489954634, "grad_norm": 0.9288927912712097, "learning_rate": 7.402910395569357e-07, "loss": 0.0608, "step": 14000 }, { "epoch": 2.2684705119896305, "grad_norm": 0.931483268737793, "learning_rate": 7.399804426640983e-07, "loss": 0.0536, "step": 14001 }, { "epoch": 2.2686325340246274, "grad_norm": 0.8563708662986755, "learning_rate": 7.396698996236004e-07, "loss": 0.0656, "step": 14002 }, { "epoch": 2.268794556059624, "grad_norm": 0.9786216020584106, "learning_rate": 7.393594104449445e-07, "loss": 0.0657, "step": 14003 }, { "epoch": 2.268956578094621, "grad_norm": 0.9174492955207825, "learning_rate": 7.39048975137629e-07, "loss": 0.0639, "step": 14004 }, { "epoch": 2.2691186001296177, "grad_norm": 0.8567657470703125, "learning_rate": 7.387385937111527e-07, "loss": 0.0573, "step": 14005 }, { "epoch": 2.2692806221646142, "grad_norm": 0.9229654669761658, "learning_rate": 7.38428266175015e-07, "loss": 0.0627, "step": 14006 }, { "epoch": 2.269442644199611, "grad_norm": 0.8565087914466858, "learning_rate": 7.381179925387086e-07, "loss": 0.0587, "step": 14007 }, { "epoch": 2.269604666234608, "grad_norm": 0.9805523753166199, "learning_rate": 7.378077728117277e-07, "loss": 0.0606, "step": 14008 }, { "epoch": 2.2697666882696046, "grad_norm": 0.7847617268562317, "learning_rate": 7.374976070035647e-07, "loss": 0.0519, "step": 14009 }, { "epoch": 2.2699287103046015, "grad_norm": 0.8503203392028809, "learning_rate": 7.371874951237099e-07, "loss": 0.058, "step": 14010 }, { "epoch": 2.270090732339598, "grad_norm": 0.955046534538269, "learning_rate": 7.368774371816517e-07, "loss": 0.0654, "step": 14011 }, { "epoch": 2.270252754374595, "grad_norm": 0.9594247937202454, "learning_rate": 7.365674331868772e-07, "loss": 0.0609, "step": 14012 }, { "epoch": 2.270414776409592, "grad_norm": 0.8345913887023926, "learning_rate": 7.36257483148872e-07, "loss": 0.0617, "step": 14013 }, { "epoch": 2.2705767984445884, "grad_norm": 0.8707557916641235, "learning_rate": 7.359475870771202e-07, "loss": 0.0613, "step": 14014 }, { "epoch": 2.2707388204795853, "grad_norm": 0.9228283166885376, "learning_rate": 7.356377449811017e-07, "loss": 0.0598, "step": 14015 }, { "epoch": 2.270900842514582, "grad_norm": 1.0070610046386719, "learning_rate": 7.353279568702995e-07, "loss": 0.0629, "step": 14016 }, { "epoch": 2.2710628645495787, "grad_norm": 0.901984453201294, "learning_rate": 7.350182227541922e-07, "loss": 0.0562, "step": 14017 }, { "epoch": 2.2712248865845757, "grad_norm": 0.9663466215133667, "learning_rate": 7.347085426422551e-07, "loss": 0.0641, "step": 14018 }, { "epoch": 2.271386908619572, "grad_norm": 0.9153159856796265, "learning_rate": 7.343989165439641e-07, "loss": 0.0643, "step": 14019 }, { "epoch": 2.271548930654569, "grad_norm": 0.9758960008621216, "learning_rate": 7.340893444687944e-07, "loss": 0.0615, "step": 14020 }, { "epoch": 2.271710952689566, "grad_norm": 0.8344132900238037, "learning_rate": 7.33779826426218e-07, "loss": 0.0622, "step": 14021 }, { "epoch": 2.2718729747245625, "grad_norm": 0.9549560546875, "learning_rate": 7.334703624257039e-07, "loss": 0.0642, "step": 14022 }, { "epoch": 2.2720349967595594, "grad_norm": 0.8046680688858032, "learning_rate": 7.331609524767219e-07, "loss": 0.0568, "step": 14023 }, { "epoch": 2.272197018794556, "grad_norm": 0.9434444904327393, "learning_rate": 7.328515965887389e-07, "loss": 0.0654, "step": 14024 }, { "epoch": 2.272359040829553, "grad_norm": 0.8848881721496582, "learning_rate": 7.32542294771221e-07, "loss": 0.0582, "step": 14025 }, { "epoch": 2.2725210628645494, "grad_norm": 0.8448486328125, "learning_rate": 7.322330470336314e-07, "loss": 0.0562, "step": 14026 }, { "epoch": 2.2726830848995463, "grad_norm": 0.896988034248352, "learning_rate": 7.319238533854328e-07, "loss": 0.0596, "step": 14027 }, { "epoch": 2.2728451069345432, "grad_norm": 0.843999981880188, "learning_rate": 7.316147138360855e-07, "loss": 0.0583, "step": 14028 }, { "epoch": 2.2730071289695397, "grad_norm": 0.8346846699714661, "learning_rate": 7.313056283950487e-07, "loss": 0.0557, "step": 14029 }, { "epoch": 2.2731691510045366, "grad_norm": 0.9750611186027527, "learning_rate": 7.309965970717795e-07, "loss": 0.0644, "step": 14030 }, { "epoch": 2.2733311730395336, "grad_norm": 0.939724862575531, "learning_rate": 7.306876198757332e-07, "loss": 0.0625, "step": 14031 }, { "epoch": 2.27349319507453, "grad_norm": 1.0688453912734985, "learning_rate": 7.303786968163651e-07, "loss": 0.073, "step": 14032 }, { "epoch": 2.273655217109527, "grad_norm": 0.954890251159668, "learning_rate": 7.300698279031257e-07, "loss": 0.0579, "step": 14033 }, { "epoch": 2.2738172391445235, "grad_norm": 1.0080914497375488, "learning_rate": 7.297610131454657e-07, "loss": 0.0592, "step": 14034 }, { "epoch": 2.2739792611795204, "grad_norm": 0.9062652587890625, "learning_rate": 7.294522525528355e-07, "loss": 0.0622, "step": 14035 }, { "epoch": 2.2741412832145174, "grad_norm": 1.0314079523086548, "learning_rate": 7.291435461346827e-07, "loss": 0.0665, "step": 14036 }, { "epoch": 2.274303305249514, "grad_norm": 0.8445022702217102, "learning_rate": 7.288348939004508e-07, "loss": 0.063, "step": 14037 }, { "epoch": 2.274465327284511, "grad_norm": 0.9952003955841064, "learning_rate": 7.285262958595846e-07, "loss": 0.0579, "step": 14038 }, { "epoch": 2.2746273493195073, "grad_norm": 0.773461639881134, "learning_rate": 7.282177520215283e-07, "loss": 0.0542, "step": 14039 }, { "epoch": 2.274789371354504, "grad_norm": 0.8747705221176147, "learning_rate": 7.279092623957204e-07, "loss": 0.06, "step": 14040 }, { "epoch": 2.274951393389501, "grad_norm": 0.9074735045433044, "learning_rate": 7.276008269916008e-07, "loss": 0.0685, "step": 14041 }, { "epoch": 2.2751134154244976, "grad_norm": 0.9272582530975342, "learning_rate": 7.272924458186064e-07, "loss": 0.0637, "step": 14042 }, { "epoch": 2.2752754374594946, "grad_norm": 0.9657379984855652, "learning_rate": 7.269841188861737e-07, "loss": 0.0644, "step": 14043 }, { "epoch": 2.2754374594944915, "grad_norm": 1.0053846836090088, "learning_rate": 7.26675846203736e-07, "loss": 0.0702, "step": 14044 }, { "epoch": 2.275599481529488, "grad_norm": 0.7461309432983398, "learning_rate": 7.263676277807263e-07, "loss": 0.0544, "step": 14045 }, { "epoch": 2.275761503564485, "grad_norm": 0.8807432055473328, "learning_rate": 7.26059463626575e-07, "loss": 0.063, "step": 14046 }, { "epoch": 2.2759235255994814, "grad_norm": 0.9560192823410034, "learning_rate": 7.257513537507121e-07, "loss": 0.0615, "step": 14047 }, { "epoch": 2.2760855476344783, "grad_norm": 0.7775177359580994, "learning_rate": 7.254432981625626e-07, "loss": 0.0573, "step": 14048 }, { "epoch": 2.276247569669475, "grad_norm": 0.8764267563819885, "learning_rate": 7.251352968715544e-07, "loss": 0.0547, "step": 14049 }, { "epoch": 2.2764095917044718, "grad_norm": 0.8146836161613464, "learning_rate": 7.248273498871119e-07, "loss": 0.0563, "step": 14050 }, { "epoch": 2.2765716137394687, "grad_norm": 0.8144499659538269, "learning_rate": 7.245194572186562e-07, "loss": 0.0548, "step": 14051 }, { "epoch": 2.276733635774465, "grad_norm": 0.8982124328613281, "learning_rate": 7.242116188756082e-07, "loss": 0.0632, "step": 14052 }, { "epoch": 2.276895657809462, "grad_norm": 1.0810835361480713, "learning_rate": 7.239038348673866e-07, "loss": 0.0683, "step": 14053 }, { "epoch": 2.277057679844459, "grad_norm": 0.9049067497253418, "learning_rate": 7.235961052034113e-07, "loss": 0.0659, "step": 14054 }, { "epoch": 2.2772197018794555, "grad_norm": 0.8906067609786987, "learning_rate": 7.232884298930953e-07, "loss": 0.062, "step": 14055 }, { "epoch": 2.2773817239144525, "grad_norm": 0.9106201529502869, "learning_rate": 7.22980808945854e-07, "loss": 0.063, "step": 14056 }, { "epoch": 2.277543745949449, "grad_norm": 0.9221261739730835, "learning_rate": 7.226732423710998e-07, "loss": 0.0688, "step": 14057 }, { "epoch": 2.277705767984446, "grad_norm": 0.8350453972816467, "learning_rate": 7.22365730178243e-07, "loss": 0.0599, "step": 14058 }, { "epoch": 2.2778677900194424, "grad_norm": 0.9052740335464478, "learning_rate": 7.220582723766931e-07, "loss": 0.0645, "step": 14059 }, { "epoch": 2.2780298120544393, "grad_norm": 0.8463963866233826, "learning_rate": 7.217508689758576e-07, "loss": 0.0665, "step": 14060 }, { "epoch": 2.2781918340894363, "grad_norm": 0.822176992893219, "learning_rate": 7.214435199851432e-07, "loss": 0.0555, "step": 14061 }, { "epoch": 2.2783538561244328, "grad_norm": 0.9079717993736267, "learning_rate": 7.211362254139512e-07, "loss": 0.0625, "step": 14062 }, { "epoch": 2.2785158781594297, "grad_norm": 0.9785674214363098, "learning_rate": 7.208289852716868e-07, "loss": 0.0684, "step": 14063 }, { "epoch": 2.2786779001944266, "grad_norm": 0.8325532078742981, "learning_rate": 7.205217995677502e-07, "loss": 0.0595, "step": 14064 }, { "epoch": 2.278839922229423, "grad_norm": 0.9397990107536316, "learning_rate": 7.202146683115408e-07, "loss": 0.0605, "step": 14065 }, { "epoch": 2.27900194426442, "grad_norm": 0.7694252729415894, "learning_rate": 7.199075915124548e-07, "loss": 0.0561, "step": 14066 }, { "epoch": 2.2791639662994165, "grad_norm": 0.9818680286407471, "learning_rate": 7.19600569179888e-07, "loss": 0.069, "step": 14067 }, { "epoch": 2.2793259883344135, "grad_norm": 0.8381089568138123, "learning_rate": 7.192936013232368e-07, "loss": 0.0555, "step": 14068 }, { "epoch": 2.2794880103694104, "grad_norm": 1.1629765033721924, "learning_rate": 7.189866879518914e-07, "loss": 0.0749, "step": 14069 }, { "epoch": 2.279650032404407, "grad_norm": 0.8009434342384338, "learning_rate": 7.186798290752436e-07, "loss": 0.0594, "step": 14070 }, { "epoch": 2.279812054439404, "grad_norm": 0.85289067029953, "learning_rate": 7.183730247026821e-07, "loss": 0.0626, "step": 14071 }, { "epoch": 2.2799740764744003, "grad_norm": 0.7887542247772217, "learning_rate": 7.180662748435946e-07, "loss": 0.0601, "step": 14072 }, { "epoch": 2.2801360985093972, "grad_norm": 0.8790128827095032, "learning_rate": 7.17759579507367e-07, "loss": 0.0633, "step": 14073 }, { "epoch": 2.280298120544394, "grad_norm": 0.8336759209632874, "learning_rate": 7.174529387033832e-07, "loss": 0.058, "step": 14074 }, { "epoch": 2.2804601425793907, "grad_norm": 0.8878383040428162, "learning_rate": 7.171463524410258e-07, "loss": 0.0615, "step": 14075 }, { "epoch": 2.2806221646143876, "grad_norm": 0.8392332196235657, "learning_rate": 7.168398207296764e-07, "loss": 0.0626, "step": 14076 }, { "epoch": 2.2807841866493845, "grad_norm": 0.8218026161193848, "learning_rate": 7.165333435787119e-07, "loss": 0.0639, "step": 14077 }, { "epoch": 2.280946208684381, "grad_norm": 1.1141018867492676, "learning_rate": 7.162269209975117e-07, "loss": 0.0645, "step": 14078 }, { "epoch": 2.281108230719378, "grad_norm": 0.8250923156738281, "learning_rate": 7.159205529954513e-07, "loss": 0.0575, "step": 14079 }, { "epoch": 2.2812702527543745, "grad_norm": 0.8710193037986755, "learning_rate": 7.156142395819055e-07, "loss": 0.0597, "step": 14080 }, { "epoch": 2.2814322747893714, "grad_norm": 0.8891174793243408, "learning_rate": 7.153079807662447e-07, "loss": 0.0643, "step": 14081 }, { "epoch": 2.281594296824368, "grad_norm": 1.0327494144439697, "learning_rate": 7.150017765578401e-07, "loss": 0.0676, "step": 14082 }, { "epoch": 2.281756318859365, "grad_norm": 0.9421011805534363, "learning_rate": 7.146956269660632e-07, "loss": 0.0664, "step": 14083 }, { "epoch": 2.2819183408943617, "grad_norm": 0.9506751894950867, "learning_rate": 7.143895320002789e-07, "loss": 0.0699, "step": 14084 }, { "epoch": 2.2820803629293582, "grad_norm": 0.8076117038726807, "learning_rate": 7.140834916698539e-07, "loss": 0.0615, "step": 14085 }, { "epoch": 2.282242384964355, "grad_norm": 0.993713915348053, "learning_rate": 7.137775059841523e-07, "loss": 0.0682, "step": 14086 }, { "epoch": 2.282404406999352, "grad_norm": 0.8705652952194214, "learning_rate": 7.134715749525364e-07, "loss": 0.0666, "step": 14087 }, { "epoch": 2.2825664290343486, "grad_norm": 0.9547131657600403, "learning_rate": 7.131656985843669e-07, "loss": 0.0683, "step": 14088 }, { "epoch": 2.2827284510693455, "grad_norm": 1.0205283164978027, "learning_rate": 7.12859876889003e-07, "loss": 0.0634, "step": 14089 }, { "epoch": 2.282890473104342, "grad_norm": 0.892135739326477, "learning_rate": 7.125541098758021e-07, "loss": 0.062, "step": 14090 }, { "epoch": 2.283052495139339, "grad_norm": 0.9227086305618286, "learning_rate": 7.122483975541197e-07, "loss": 0.0658, "step": 14091 }, { "epoch": 2.283214517174336, "grad_norm": 0.8980578184127808, "learning_rate": 7.119427399333104e-07, "loss": 0.0683, "step": 14092 }, { "epoch": 2.2833765392093324, "grad_norm": 0.7832695245742798, "learning_rate": 7.116371370227259e-07, "loss": 0.054, "step": 14093 }, { "epoch": 2.2835385612443293, "grad_norm": 1.094404935836792, "learning_rate": 7.113315888317182e-07, "loss": 0.0658, "step": 14094 }, { "epoch": 2.283700583279326, "grad_norm": 1.046769380569458, "learning_rate": 7.110260953696346e-07, "loss": 0.0687, "step": 14095 }, { "epoch": 2.2838626053143227, "grad_norm": 0.9280028343200684, "learning_rate": 7.107206566458225e-07, "loss": 0.0613, "step": 14096 }, { "epoch": 2.2840246273493197, "grad_norm": 0.8256856203079224, "learning_rate": 7.104152726696292e-07, "loss": 0.0597, "step": 14097 }, { "epoch": 2.284186649384316, "grad_norm": 0.9466334581375122, "learning_rate": 7.101099434503986e-07, "loss": 0.0654, "step": 14098 }, { "epoch": 2.284348671419313, "grad_norm": 0.8240459561347961, "learning_rate": 7.098046689974714e-07, "loss": 0.0608, "step": 14099 }, { "epoch": 2.28451069345431, "grad_norm": 0.8247748613357544, "learning_rate": 7.09499449320189e-07, "loss": 0.0593, "step": 14100 }, { "epoch": 2.2846727154893065, "grad_norm": 0.9899136424064636, "learning_rate": 7.091942844278907e-07, "loss": 0.0675, "step": 14101 }, { "epoch": 2.2848347375243034, "grad_norm": 1.1560351848602295, "learning_rate": 7.088891743299136e-07, "loss": 0.068, "step": 14102 }, { "epoch": 2.2849967595593, "grad_norm": 0.8623514771461487, "learning_rate": 7.085841190355932e-07, "loss": 0.0599, "step": 14103 }, { "epoch": 2.285158781594297, "grad_norm": 0.869429886341095, "learning_rate": 7.08279118554264e-07, "loss": 0.0593, "step": 14104 }, { "epoch": 2.2853208036292934, "grad_norm": 1.031187891960144, "learning_rate": 7.079741728952578e-07, "loss": 0.0635, "step": 14105 }, { "epoch": 2.2854828256642903, "grad_norm": 1.0560411214828491, "learning_rate": 7.076692820679051e-07, "loss": 0.0651, "step": 14106 }, { "epoch": 2.285644847699287, "grad_norm": 0.8320545554161072, "learning_rate": 7.073644460815348e-07, "loss": 0.06, "step": 14107 }, { "epoch": 2.2858068697342837, "grad_norm": 0.963650107383728, "learning_rate": 7.070596649454748e-07, "loss": 0.0651, "step": 14108 }, { "epoch": 2.2859688917692806, "grad_norm": 0.8947854042053223, "learning_rate": 7.067549386690509e-07, "loss": 0.057, "step": 14109 }, { "epoch": 2.2861309138042776, "grad_norm": 0.8852594494819641, "learning_rate": 7.064502672615847e-07, "loss": 0.0603, "step": 14110 }, { "epoch": 2.286292935839274, "grad_norm": 0.9978240728378296, "learning_rate": 7.061456507324008e-07, "loss": 0.0671, "step": 14111 }, { "epoch": 2.286454957874271, "grad_norm": 0.8848221302032471, "learning_rate": 7.058410890908196e-07, "loss": 0.0624, "step": 14112 }, { "epoch": 2.2866169799092675, "grad_norm": 0.8844780921936035, "learning_rate": 7.055365823461585e-07, "loss": 0.0554, "step": 14113 }, { "epoch": 2.2867790019442644, "grad_norm": 0.9852705001831055, "learning_rate": 7.052321305077356e-07, "loss": 0.0651, "step": 14114 }, { "epoch": 2.2869410239792614, "grad_norm": 0.9480606913566589, "learning_rate": 7.049277335848656e-07, "loss": 0.0659, "step": 14115 }, { "epoch": 2.287103046014258, "grad_norm": 0.9317473769187927, "learning_rate": 7.046233915868642e-07, "loss": 0.0626, "step": 14116 }, { "epoch": 2.287265068049255, "grad_norm": 0.8471056222915649, "learning_rate": 7.043191045230418e-07, "loss": 0.0551, "step": 14117 }, { "epoch": 2.2874270900842513, "grad_norm": 0.8562641143798828, "learning_rate": 7.04014872402709e-07, "loss": 0.0583, "step": 14118 }, { "epoch": 2.287589112119248, "grad_norm": 0.9273170232772827, "learning_rate": 7.037106952351752e-07, "loss": 0.0621, "step": 14119 }, { "epoch": 2.287751134154245, "grad_norm": 0.8602285981178284, "learning_rate": 7.034065730297471e-07, "loss": 0.0566, "step": 14120 }, { "epoch": 2.2879131561892416, "grad_norm": 0.9283899664878845, "learning_rate": 7.031025057957302e-07, "loss": 0.0624, "step": 14121 }, { "epoch": 2.2880751782242386, "grad_norm": 0.8553148508071899, "learning_rate": 7.027984935424284e-07, "loss": 0.064, "step": 14122 }, { "epoch": 2.2882372002592355, "grad_norm": 0.964326798915863, "learning_rate": 7.024945362791432e-07, "loss": 0.0618, "step": 14123 }, { "epoch": 2.288399222294232, "grad_norm": 0.9071498513221741, "learning_rate": 7.021906340151763e-07, "loss": 0.0718, "step": 14124 }, { "epoch": 2.288561244329229, "grad_norm": 0.9609620571136475, "learning_rate": 7.01886786759824e-07, "loss": 0.0663, "step": 14125 }, { "epoch": 2.2887232663642254, "grad_norm": 0.9779571890830994, "learning_rate": 7.015829945223851e-07, "loss": 0.065, "step": 14126 }, { "epoch": 2.2888852883992223, "grad_norm": 1.0127842426300049, "learning_rate": 7.012792573121551e-07, "loss": 0.0688, "step": 14127 }, { "epoch": 2.289047310434219, "grad_norm": 0.9270831942558289, "learning_rate": 7.009755751384267e-07, "loss": 0.0569, "step": 14128 }, { "epoch": 2.2892093324692158, "grad_norm": 1.1184446811676025, "learning_rate": 7.006719480104913e-07, "loss": 0.0677, "step": 14129 }, { "epoch": 2.2893713545042127, "grad_norm": 0.8415164947509766, "learning_rate": 7.003683759376415e-07, "loss": 0.0582, "step": 14130 }, { "epoch": 2.289533376539209, "grad_norm": 1.0956602096557617, "learning_rate": 7.000648589291634e-07, "loss": 0.0626, "step": 14131 }, { "epoch": 2.289695398574206, "grad_norm": 0.8971714973449707, "learning_rate": 6.997613969943451e-07, "loss": 0.0578, "step": 14132 }, { "epoch": 2.289857420609203, "grad_norm": 0.8495174050331116, "learning_rate": 6.994579901424714e-07, "loss": 0.0575, "step": 14133 }, { "epoch": 2.2900194426441995, "grad_norm": 1.226990818977356, "learning_rate": 6.99154638382826e-07, "loss": 0.0649, "step": 14134 }, { "epoch": 2.2901814646791965, "grad_norm": 1.0273133516311646, "learning_rate": 6.988513417246906e-07, "loss": 0.0696, "step": 14135 }, { "epoch": 2.290343486714193, "grad_norm": 0.996566116809845, "learning_rate": 6.985481001773456e-07, "loss": 0.0684, "step": 14136 }, { "epoch": 2.29050550874919, "grad_norm": 0.8226109743118286, "learning_rate": 6.982449137500694e-07, "loss": 0.0552, "step": 14137 }, { "epoch": 2.290667530784187, "grad_norm": 0.8552298545837402, "learning_rate": 6.979417824521393e-07, "loss": 0.0626, "step": 14138 }, { "epoch": 2.2908295528191833, "grad_norm": 0.8617782592773438, "learning_rate": 6.976387062928283e-07, "loss": 0.0569, "step": 14139 }, { "epoch": 2.2909915748541803, "grad_norm": 0.887869656085968, "learning_rate": 6.97335685281412e-07, "loss": 0.0597, "step": 14140 }, { "epoch": 2.2911535968891767, "grad_norm": 0.8527069687843323, "learning_rate": 6.970327194271612e-07, "loss": 0.0682, "step": 14141 }, { "epoch": 2.2913156189241737, "grad_norm": 0.9207058548927307, "learning_rate": 6.967298087393471e-07, "loss": 0.0674, "step": 14142 }, { "epoch": 2.2914776409591706, "grad_norm": 0.8777424097061157, "learning_rate": 6.964269532272361e-07, "loss": 0.062, "step": 14143 }, { "epoch": 2.291639662994167, "grad_norm": 0.9275683164596558, "learning_rate": 6.96124152900095e-07, "loss": 0.0591, "step": 14144 }, { "epoch": 2.291801685029164, "grad_norm": 0.9526832699775696, "learning_rate": 6.958214077671912e-07, "loss": 0.0673, "step": 14145 }, { "epoch": 2.291963707064161, "grad_norm": 1.0193464756011963, "learning_rate": 6.955187178377853e-07, "loss": 0.0628, "step": 14146 }, { "epoch": 2.2921257290991575, "grad_norm": 0.9001911282539368, "learning_rate": 6.952160831211401e-07, "loss": 0.0649, "step": 14147 }, { "epoch": 2.2922877511341544, "grad_norm": 0.7867611646652222, "learning_rate": 6.949135036265153e-07, "loss": 0.0577, "step": 14148 }, { "epoch": 2.292449773169151, "grad_norm": 0.8737292885780334, "learning_rate": 6.946109793631689e-07, "loss": 0.0549, "step": 14149 }, { "epoch": 2.292611795204148, "grad_norm": 0.9339378476142883, "learning_rate": 6.943085103403577e-07, "loss": 0.0673, "step": 14150 }, { "epoch": 2.2927738172391443, "grad_norm": 0.8713966012001038, "learning_rate": 6.940060965673362e-07, "loss": 0.0591, "step": 14151 }, { "epoch": 2.2929358392741412, "grad_norm": 0.8593830466270447, "learning_rate": 6.937037380533579e-07, "loss": 0.062, "step": 14152 }, { "epoch": 2.293097861309138, "grad_norm": 0.8531447649002075, "learning_rate": 6.93401434807674e-07, "loss": 0.057, "step": 14153 }, { "epoch": 2.2932598833441347, "grad_norm": 0.8757892847061157, "learning_rate": 6.930991868395343e-07, "loss": 0.0614, "step": 14154 }, { "epoch": 2.2934219053791316, "grad_norm": 0.868497371673584, "learning_rate": 6.92796994158187e-07, "loss": 0.0633, "step": 14155 }, { "epoch": 2.2935839274141285, "grad_norm": 0.9063879251480103, "learning_rate": 6.924948567728787e-07, "loss": 0.0596, "step": 14156 }, { "epoch": 2.293745949449125, "grad_norm": 1.0460374355316162, "learning_rate": 6.921927746928533e-07, "loss": 0.0672, "step": 14157 }, { "epoch": 2.293907971484122, "grad_norm": 0.8834719657897949, "learning_rate": 6.918907479273535e-07, "loss": 0.0637, "step": 14158 }, { "epoch": 2.2940699935191184, "grad_norm": 0.8647652864456177, "learning_rate": 6.915887764856216e-07, "loss": 0.0618, "step": 14159 }, { "epoch": 2.2942320155541154, "grad_norm": 0.8388234376907349, "learning_rate": 6.912868603768979e-07, "loss": 0.0574, "step": 14160 }, { "epoch": 2.2943940375891123, "grad_norm": 0.8639540076255798, "learning_rate": 6.909849996104187e-07, "loss": 0.0632, "step": 14161 }, { "epoch": 2.294556059624109, "grad_norm": 0.9691962599754333, "learning_rate": 6.906831941954206e-07, "loss": 0.0672, "step": 14162 }, { "epoch": 2.2947180816591057, "grad_norm": 0.8707907795906067, "learning_rate": 6.903814441411383e-07, "loss": 0.0623, "step": 14163 }, { "epoch": 2.2948801036941022, "grad_norm": 0.8201960325241089, "learning_rate": 6.900797494568045e-07, "loss": 0.0648, "step": 14164 }, { "epoch": 2.295042125729099, "grad_norm": 0.9355173110961914, "learning_rate": 6.897781101516504e-07, "loss": 0.0678, "step": 14165 }, { "epoch": 2.295204147764096, "grad_norm": 0.7753675580024719, "learning_rate": 6.894765262349056e-07, "loss": 0.0546, "step": 14166 }, { "epoch": 2.2953661697990926, "grad_norm": 1.1092512607574463, "learning_rate": 6.891749977157979e-07, "loss": 0.0609, "step": 14167 }, { "epoch": 2.2955281918340895, "grad_norm": 0.8517485857009888, "learning_rate": 6.88873524603553e-07, "loss": 0.0617, "step": 14168 }, { "epoch": 2.295690213869086, "grad_norm": 0.8324517011642456, "learning_rate": 6.885721069073953e-07, "loss": 0.0565, "step": 14169 }, { "epoch": 2.295852235904083, "grad_norm": 0.9597250819206238, "learning_rate": 6.882707446365477e-07, "loss": 0.0662, "step": 14170 }, { "epoch": 2.29601425793908, "grad_norm": 0.9508599042892456, "learning_rate": 6.879694378002316e-07, "loss": 0.0623, "step": 14171 }, { "epoch": 2.2961762799740764, "grad_norm": 1.0893688201904297, "learning_rate": 6.876681864076646e-07, "loss": 0.0757, "step": 14172 }, { "epoch": 2.2963383020090733, "grad_norm": 0.9242887496948242, "learning_rate": 6.873669904680655e-07, "loss": 0.063, "step": 14173 }, { "epoch": 2.29650032404407, "grad_norm": 0.8941336870193481, "learning_rate": 6.870658499906505e-07, "loss": 0.0595, "step": 14174 }, { "epoch": 2.2966623460790667, "grad_norm": 0.8984914422035217, "learning_rate": 6.867647649846338e-07, "loss": 0.0615, "step": 14175 }, { "epoch": 2.2968243681140637, "grad_norm": 0.9848359823226929, "learning_rate": 6.864637354592266e-07, "loss": 0.0634, "step": 14176 }, { "epoch": 2.29698639014906, "grad_norm": 0.9562865495681763, "learning_rate": 6.861627614236396e-07, "loss": 0.0661, "step": 14177 }, { "epoch": 2.297148412184057, "grad_norm": 0.8696370124816895, "learning_rate": 6.858618428870842e-07, "loss": 0.0591, "step": 14178 }, { "epoch": 2.297310434219054, "grad_norm": 0.9466931819915771, "learning_rate": 6.855609798587656e-07, "loss": 0.0605, "step": 14179 }, { "epoch": 2.2974724562540505, "grad_norm": 0.826383650302887, "learning_rate": 6.852601723478902e-07, "loss": 0.0515, "step": 14180 }, { "epoch": 2.2976344782890474, "grad_norm": 0.9467977285385132, "learning_rate": 6.849594203636619e-07, "loss": 0.0621, "step": 14181 }, { "epoch": 2.297796500324044, "grad_norm": 0.9184082746505737, "learning_rate": 6.84658723915283e-07, "loss": 0.0634, "step": 14182 }, { "epoch": 2.297958522359041, "grad_norm": 0.8161764740943909, "learning_rate": 6.843580830119542e-07, "loss": 0.0573, "step": 14183 }, { "epoch": 2.2981205443940373, "grad_norm": 0.8612385392189026, "learning_rate": 6.840574976628741e-07, "loss": 0.0674, "step": 14184 }, { "epoch": 2.2982825664290343, "grad_norm": 0.8990957140922546, "learning_rate": 6.837569678772401e-07, "loss": 0.0606, "step": 14185 }, { "epoch": 2.298444588464031, "grad_norm": 0.809965193271637, "learning_rate": 6.834564936642488e-07, "loss": 0.0644, "step": 14186 }, { "epoch": 2.2986066104990277, "grad_norm": 0.9587274193763733, "learning_rate": 6.831560750330909e-07, "loss": 0.0612, "step": 14187 }, { "epoch": 2.2987686325340246, "grad_norm": 0.9014949202537537, "learning_rate": 6.828557119929613e-07, "loss": 0.0591, "step": 14188 }, { "epoch": 2.2989306545690216, "grad_norm": 1.0473854541778564, "learning_rate": 6.825554045530502e-07, "loss": 0.0749, "step": 14189 }, { "epoch": 2.299092676604018, "grad_norm": 0.9291993379592896, "learning_rate": 6.822551527225452e-07, "loss": 0.0603, "step": 14190 }, { "epoch": 2.299254698639015, "grad_norm": 0.8897577524185181, "learning_rate": 6.819549565106331e-07, "loss": 0.056, "step": 14191 }, { "epoch": 2.2994167206740115, "grad_norm": 0.8365473747253418, "learning_rate": 6.816548159264993e-07, "loss": 0.0583, "step": 14192 }, { "epoch": 2.2995787427090084, "grad_norm": 0.8146736025810242, "learning_rate": 6.813547309793295e-07, "loss": 0.0569, "step": 14193 }, { "epoch": 2.2997407647440054, "grad_norm": 1.0734623670578003, "learning_rate": 6.810547016783029e-07, "loss": 0.0584, "step": 14194 }, { "epoch": 2.299902786779002, "grad_norm": 0.953218936920166, "learning_rate": 6.807547280326007e-07, "loss": 0.0679, "step": 14195 }, { "epoch": 2.3000648088139988, "grad_norm": 1.0048823356628418, "learning_rate": 6.804548100514013e-07, "loss": 0.0612, "step": 14196 }, { "epoch": 2.3002268308489953, "grad_norm": 1.0320632457733154, "learning_rate": 6.801549477438815e-07, "loss": 0.0585, "step": 14197 }, { "epoch": 2.300388852883992, "grad_norm": 0.9690525531768799, "learning_rate": 6.798551411192165e-07, "loss": 0.0676, "step": 14198 }, { "epoch": 2.300550874918989, "grad_norm": 0.8694241046905518, "learning_rate": 6.795553901865795e-07, "loss": 0.0623, "step": 14199 }, { "epoch": 2.3007128969539856, "grad_norm": 0.9456620216369629, "learning_rate": 6.792556949551426e-07, "loss": 0.0662, "step": 14200 }, { "epoch": 2.3008749189889826, "grad_norm": 0.9079062342643738, "learning_rate": 6.789560554340743e-07, "loss": 0.0629, "step": 14201 }, { "epoch": 2.3010369410239795, "grad_norm": 0.9178680181503296, "learning_rate": 6.786564716325441e-07, "loss": 0.0721, "step": 14202 }, { "epoch": 2.301198963058976, "grad_norm": 0.7588401436805725, "learning_rate": 6.783569435597188e-07, "loss": 0.0523, "step": 14203 }, { "epoch": 2.301360985093973, "grad_norm": 0.9008545875549316, "learning_rate": 6.780574712247632e-07, "loss": 0.0673, "step": 14204 }, { "epoch": 2.3015230071289694, "grad_norm": 0.960690438747406, "learning_rate": 6.777580546368393e-07, "loss": 0.0612, "step": 14205 }, { "epoch": 2.3016850291639663, "grad_norm": 0.9084741473197937, "learning_rate": 6.774586938051084e-07, "loss": 0.0618, "step": 14206 }, { "epoch": 2.301847051198963, "grad_norm": 1.0799715518951416, "learning_rate": 6.771593887387326e-07, "loss": 0.0649, "step": 14207 }, { "epoch": 2.3020090732339598, "grad_norm": 0.854361355304718, "learning_rate": 6.768601394468674e-07, "loss": 0.06, "step": 14208 }, { "epoch": 2.3021710952689567, "grad_norm": 0.9102272987365723, "learning_rate": 6.765609459386702e-07, "loss": 0.0681, "step": 14209 }, { "epoch": 2.302333117303953, "grad_norm": 0.9026054739952087, "learning_rate": 6.762618082232952e-07, "loss": 0.0594, "step": 14210 }, { "epoch": 2.30249513933895, "grad_norm": 0.9153203964233398, "learning_rate": 6.759627263098955e-07, "loss": 0.0695, "step": 14211 }, { "epoch": 2.302657161373947, "grad_norm": 0.8652878403663635, "learning_rate": 6.756637002076225e-07, "loss": 0.0624, "step": 14212 }, { "epoch": 2.3028191834089435, "grad_norm": 0.8761528730392456, "learning_rate": 6.753647299256255e-07, "loss": 0.0601, "step": 14213 }, { "epoch": 2.3029812054439405, "grad_norm": 0.8554459810256958, "learning_rate": 6.750658154730522e-07, "loss": 0.0597, "step": 14214 }, { "epoch": 2.303143227478937, "grad_norm": 0.8388009071350098, "learning_rate": 6.747669568590492e-07, "loss": 0.062, "step": 14215 }, { "epoch": 2.303305249513934, "grad_norm": 0.928914487361908, "learning_rate": 6.744681540927588e-07, "loss": 0.0618, "step": 14216 }, { "epoch": 2.303467271548931, "grad_norm": 0.8588629961013794, "learning_rate": 6.741694071833263e-07, "loss": 0.0574, "step": 14217 }, { "epoch": 2.3036292935839273, "grad_norm": 0.9985714554786682, "learning_rate": 6.738707161398914e-07, "loss": 0.0705, "step": 14218 }, { "epoch": 2.3037913156189243, "grad_norm": 0.9504481554031372, "learning_rate": 6.73572080971594e-07, "loss": 0.0678, "step": 14219 }, { "epoch": 2.3039533376539207, "grad_norm": 0.9666747450828552, "learning_rate": 6.732735016875697e-07, "loss": 0.0676, "step": 14220 }, { "epoch": 2.3041153596889177, "grad_norm": 0.8083941340446472, "learning_rate": 6.729749782969563e-07, "loss": 0.0612, "step": 14221 }, { "epoch": 2.3042773817239146, "grad_norm": 0.9107043743133545, "learning_rate": 6.726765108088881e-07, "loss": 0.0617, "step": 14222 }, { "epoch": 2.304439403758911, "grad_norm": 0.8032419681549072, "learning_rate": 6.72378099232496e-07, "loss": 0.0553, "step": 14223 }, { "epoch": 2.304601425793908, "grad_norm": 0.9696297645568848, "learning_rate": 6.720797435769111e-07, "loss": 0.0601, "step": 14224 }, { "epoch": 2.304763447828905, "grad_norm": 0.9108163118362427, "learning_rate": 6.717814438512626e-07, "loss": 0.0624, "step": 14225 }, { "epoch": 2.3049254698639015, "grad_norm": 0.8767720460891724, "learning_rate": 6.714832000646778e-07, "loss": 0.0662, "step": 14226 }, { "epoch": 2.3050874918988984, "grad_norm": 0.9097752571105957, "learning_rate": 6.711850122262823e-07, "loss": 0.0645, "step": 14227 }, { "epoch": 2.305249513933895, "grad_norm": 0.8309717178344727, "learning_rate": 6.708868803451992e-07, "loss": 0.052, "step": 14228 }, { "epoch": 2.305411535968892, "grad_norm": 0.8942591547966003, "learning_rate": 6.705888044305516e-07, "loss": 0.0665, "step": 14229 }, { "epoch": 2.3055735580038883, "grad_norm": 0.7907509207725525, "learning_rate": 6.702907844914597e-07, "loss": 0.0545, "step": 14230 }, { "epoch": 2.3057355800388852, "grad_norm": 0.913838267326355, "learning_rate": 6.699928205370418e-07, "loss": 0.0648, "step": 14231 }, { "epoch": 2.305897602073882, "grad_norm": 0.7393428087234497, "learning_rate": 6.696949125764149e-07, "loss": 0.0531, "step": 14232 }, { "epoch": 2.3060596241088787, "grad_norm": 1.152111291885376, "learning_rate": 6.693970606186953e-07, "loss": 0.0706, "step": 14233 }, { "epoch": 2.3062216461438756, "grad_norm": 0.8411812782287598, "learning_rate": 6.690992646729949e-07, "loss": 0.0603, "step": 14234 }, { "epoch": 2.3063836681788725, "grad_norm": 0.9782255291938782, "learning_rate": 6.688015247484256e-07, "loss": 0.0644, "step": 14235 }, { "epoch": 2.306545690213869, "grad_norm": 0.9612037539482117, "learning_rate": 6.685038408540989e-07, "loss": 0.0636, "step": 14236 }, { "epoch": 2.306707712248866, "grad_norm": 1.0477268695831299, "learning_rate": 6.682062129991232e-07, "loss": 0.07, "step": 14237 }, { "epoch": 2.3068697342838624, "grad_norm": 0.8569392561912537, "learning_rate": 6.679086411926039e-07, "loss": 0.0622, "step": 14238 }, { "epoch": 2.3070317563188594, "grad_norm": 0.8216943144798279, "learning_rate": 6.676111254436465e-07, "loss": 0.0564, "step": 14239 }, { "epoch": 2.3071937783538563, "grad_norm": 0.8884442448616028, "learning_rate": 6.673136657613547e-07, "loss": 0.0602, "step": 14240 }, { "epoch": 2.307355800388853, "grad_norm": 0.9046189785003662, "learning_rate": 6.670162621548293e-07, "loss": 0.0612, "step": 14241 }, { "epoch": 2.3075178224238497, "grad_norm": 0.9811939597129822, "learning_rate": 6.667189146331707e-07, "loss": 0.0644, "step": 14242 }, { "epoch": 2.307679844458846, "grad_norm": 0.9112708568572998, "learning_rate": 6.664216232054771e-07, "loss": 0.0598, "step": 14243 }, { "epoch": 2.307841866493843, "grad_norm": 1.0917285680770874, "learning_rate": 6.661243878808443e-07, "loss": 0.0707, "step": 14244 }, { "epoch": 2.30800388852884, "grad_norm": 0.8850948810577393, "learning_rate": 6.658272086683676e-07, "loss": 0.0545, "step": 14245 }, { "epoch": 2.3081659105638366, "grad_norm": 0.8096259832382202, "learning_rate": 6.655300855771393e-07, "loss": 0.0557, "step": 14246 }, { "epoch": 2.3083279325988335, "grad_norm": 0.9947896599769592, "learning_rate": 6.652330186162514e-07, "loss": 0.0618, "step": 14247 }, { "epoch": 2.3084899546338304, "grad_norm": 0.7889495491981506, "learning_rate": 6.649360077947939e-07, "loss": 0.0539, "step": 14248 }, { "epoch": 2.308651976668827, "grad_norm": 0.9984772801399231, "learning_rate": 6.646390531218522e-07, "loss": 0.0655, "step": 14249 }, { "epoch": 2.308813998703824, "grad_norm": 0.9736108183860779, "learning_rate": 6.643421546065146e-07, "loss": 0.0626, "step": 14250 }, { "epoch": 2.3089760207388204, "grad_norm": 0.9100701808929443, "learning_rate": 6.640453122578655e-07, "loss": 0.0652, "step": 14251 }, { "epoch": 2.3091380427738173, "grad_norm": 1.022388219833374, "learning_rate": 6.637485260849866e-07, "loss": 0.0645, "step": 14252 }, { "epoch": 2.309300064808814, "grad_norm": 0.9333613514900208, "learning_rate": 6.634517960969588e-07, "loss": 0.0595, "step": 14253 }, { "epoch": 2.3094620868438107, "grad_norm": 0.8503563404083252, "learning_rate": 6.63155122302861e-07, "loss": 0.053, "step": 14254 }, { "epoch": 2.3096241088788076, "grad_norm": 1.0957640409469604, "learning_rate": 6.628585047117731e-07, "loss": 0.063, "step": 14255 }, { "epoch": 2.309786130913804, "grad_norm": 1.1101269721984863, "learning_rate": 6.625619433327681e-07, "loss": 0.0574, "step": 14256 }, { "epoch": 2.309948152948801, "grad_norm": 1.0195109844207764, "learning_rate": 6.622654381749213e-07, "loss": 0.0655, "step": 14257 }, { "epoch": 2.310110174983798, "grad_norm": 0.9084102511405945, "learning_rate": 6.619689892473046e-07, "loss": 0.0594, "step": 14258 }, { "epoch": 2.3102721970187945, "grad_norm": 0.9905080795288086, "learning_rate": 6.616725965589893e-07, "loss": 0.0613, "step": 14259 }, { "epoch": 2.3104342190537914, "grad_norm": 0.8357704281806946, "learning_rate": 6.613762601190435e-07, "loss": 0.0619, "step": 14260 }, { "epoch": 2.310596241088788, "grad_norm": 0.9966726899147034, "learning_rate": 6.61079979936535e-07, "loss": 0.0677, "step": 14261 }, { "epoch": 2.310758263123785, "grad_norm": 1.088291049003601, "learning_rate": 6.60783756020529e-07, "loss": 0.0691, "step": 14262 }, { "epoch": 2.310920285158782, "grad_norm": 0.8392730951309204, "learning_rate": 6.60487588380089e-07, "loss": 0.0599, "step": 14263 }, { "epoch": 2.3110823071937783, "grad_norm": 0.935095489025116, "learning_rate": 6.601914770242776e-07, "loss": 0.0603, "step": 14264 }, { "epoch": 2.311244329228775, "grad_norm": 0.8228254318237305, "learning_rate": 6.598954219621545e-07, "loss": 0.0618, "step": 14265 }, { "epoch": 2.3114063512637717, "grad_norm": 0.9754266142845154, "learning_rate": 6.595994232027794e-07, "loss": 0.0648, "step": 14266 }, { "epoch": 2.3115683732987686, "grad_norm": 0.8244330286979675, "learning_rate": 6.593034807552076e-07, "loss": 0.051, "step": 14267 }, { "epoch": 2.3117303953337656, "grad_norm": 0.829617440700531, "learning_rate": 6.590075946284941e-07, "loss": 0.0569, "step": 14268 }, { "epoch": 2.311892417368762, "grad_norm": 1.0251657962799072, "learning_rate": 6.587117648316943e-07, "loss": 0.0588, "step": 14269 }, { "epoch": 2.312054439403759, "grad_norm": 0.8598863482475281, "learning_rate": 6.584159913738583e-07, "loss": 0.0531, "step": 14270 }, { "epoch": 2.3122164614387555, "grad_norm": 1.17449951171875, "learning_rate": 6.581202742640361e-07, "loss": 0.0737, "step": 14271 }, { "epoch": 2.3123784834737524, "grad_norm": 0.9494041800498962, "learning_rate": 6.578246135112765e-07, "loss": 0.0646, "step": 14272 }, { "epoch": 2.3125405055087493, "grad_norm": 0.9004831910133362, "learning_rate": 6.575290091246256e-07, "loss": 0.0553, "step": 14273 }, { "epoch": 2.312702527543746, "grad_norm": 0.8012595772743225, "learning_rate": 6.572334611131284e-07, "loss": 0.0556, "step": 14274 }, { "epoch": 2.3128645495787428, "grad_norm": 0.8960158228874207, "learning_rate": 6.569379694858277e-07, "loss": 0.0645, "step": 14275 }, { "epoch": 2.3130265716137393, "grad_norm": 1.01628577709198, "learning_rate": 6.566425342517652e-07, "loss": 0.0653, "step": 14276 }, { "epoch": 2.313188593648736, "grad_norm": 0.8243879079818726, "learning_rate": 6.56347155419981e-07, "loss": 0.0575, "step": 14277 }, { "epoch": 2.313350615683733, "grad_norm": 0.9484763741493225, "learning_rate": 6.560518329995108e-07, "loss": 0.0646, "step": 14278 }, { "epoch": 2.3135126377187296, "grad_norm": 1.1005399227142334, "learning_rate": 6.557565669993931e-07, "loss": 0.068, "step": 14279 }, { "epoch": 2.3136746597537265, "grad_norm": 0.9087778925895691, "learning_rate": 6.554613574286614e-07, "loss": 0.0637, "step": 14280 }, { "epoch": 2.3138366817887235, "grad_norm": 0.8552685976028442, "learning_rate": 6.551662042963491e-07, "loss": 0.0602, "step": 14281 }, { "epoch": 2.31399870382372, "grad_norm": 0.8670865297317505, "learning_rate": 6.548711076114858e-07, "loss": 0.0605, "step": 14282 }, { "epoch": 2.314160725858717, "grad_norm": 1.124015212059021, "learning_rate": 6.545760673831009e-07, "loss": 0.0686, "step": 14283 }, { "epoch": 2.3143227478937134, "grad_norm": 1.0868921279907227, "learning_rate": 6.542810836202237e-07, "loss": 0.0644, "step": 14284 }, { "epoch": 2.3144847699287103, "grad_norm": 0.8501294255256653, "learning_rate": 6.539861563318784e-07, "loss": 0.0641, "step": 14285 }, { "epoch": 2.314646791963707, "grad_norm": 0.9735018610954285, "learning_rate": 6.536912855270894e-07, "loss": 0.0572, "step": 14286 }, { "epoch": 2.3148088139987038, "grad_norm": 1.222994089126587, "learning_rate": 6.533964712148779e-07, "loss": 0.0736, "step": 14287 }, { "epoch": 2.3149708360337007, "grad_norm": 0.9061808586120605, "learning_rate": 6.531017134042678e-07, "loss": 0.0617, "step": 14288 }, { "epoch": 2.315132858068697, "grad_norm": 0.8229312896728516, "learning_rate": 6.528070121042746e-07, "loss": 0.0584, "step": 14289 }, { "epoch": 2.315294880103694, "grad_norm": 0.8917800784111023, "learning_rate": 6.52512367323917e-07, "loss": 0.0568, "step": 14290 }, { "epoch": 2.315456902138691, "grad_norm": 0.9813393950462341, "learning_rate": 6.522177790722101e-07, "loss": 0.0597, "step": 14291 }, { "epoch": 2.3156189241736875, "grad_norm": 0.951609194278717, "learning_rate": 6.519232473581675e-07, "loss": 0.054, "step": 14292 }, { "epoch": 2.3157809462086845, "grad_norm": 0.9440679550170898, "learning_rate": 6.516287721908013e-07, "loss": 0.0684, "step": 14293 }, { "epoch": 2.315942968243681, "grad_norm": 0.8250246644020081, "learning_rate": 6.513343535791216e-07, "loss": 0.0576, "step": 14294 }, { "epoch": 2.316104990278678, "grad_norm": 0.9275084733963013, "learning_rate": 6.510399915321381e-07, "loss": 0.0622, "step": 14295 }, { "epoch": 2.316267012313675, "grad_norm": 1.0191025733947754, "learning_rate": 6.507456860588554e-07, "loss": 0.0587, "step": 14296 }, { "epoch": 2.3164290343486713, "grad_norm": 1.1428616046905518, "learning_rate": 6.504514371682788e-07, "loss": 0.0684, "step": 14297 }, { "epoch": 2.3165910563836682, "grad_norm": 0.9954843521118164, "learning_rate": 6.501572448694135e-07, "loss": 0.0539, "step": 14298 }, { "epoch": 2.3167530784186647, "grad_norm": 0.9105631113052368, "learning_rate": 6.498631091712603e-07, "loss": 0.0609, "step": 14299 }, { "epoch": 2.3169151004536617, "grad_norm": 0.7692684531211853, "learning_rate": 6.495690300828183e-07, "loss": 0.056, "step": 14300 }, { "epoch": 2.3170771224886586, "grad_norm": 0.9106401205062866, "learning_rate": 6.492750076130858e-07, "loss": 0.0609, "step": 14301 }, { "epoch": 2.317239144523655, "grad_norm": 1.094725489616394, "learning_rate": 6.489810417710596e-07, "loss": 0.0642, "step": 14302 }, { "epoch": 2.317401166558652, "grad_norm": 0.7931134104728699, "learning_rate": 6.48687132565734e-07, "loss": 0.0578, "step": 14303 }, { "epoch": 2.317563188593649, "grad_norm": 0.9079036116600037, "learning_rate": 6.483932800061021e-07, "loss": 0.0586, "step": 14304 }, { "epoch": 2.3177252106286454, "grad_norm": 0.8913233280181885, "learning_rate": 6.480994841011551e-07, "loss": 0.0562, "step": 14305 }, { "epoch": 2.3178872326636424, "grad_norm": 0.9209250211715698, "learning_rate": 6.478057448598821e-07, "loss": 0.0622, "step": 14306 }, { "epoch": 2.318049254698639, "grad_norm": 0.994839608669281, "learning_rate": 6.475120622912714e-07, "loss": 0.0689, "step": 14307 }, { "epoch": 2.318211276733636, "grad_norm": 0.9868736863136292, "learning_rate": 6.472184364043085e-07, "loss": 0.0632, "step": 14308 }, { "epoch": 2.3183732987686323, "grad_norm": 0.9076941013336182, "learning_rate": 6.469248672079778e-07, "loss": 0.0614, "step": 14309 }, { "epoch": 2.3185353208036292, "grad_norm": 0.9764859080314636, "learning_rate": 6.466313547112627e-07, "loss": 0.0699, "step": 14310 }, { "epoch": 2.318697342838626, "grad_norm": 1.0119270086288452, "learning_rate": 6.463378989231414e-07, "loss": 0.0575, "step": 14311 }, { "epoch": 2.3188593648736227, "grad_norm": 0.8330714106559753, "learning_rate": 6.460444998525953e-07, "loss": 0.0593, "step": 14312 }, { "epoch": 2.3190213869086196, "grad_norm": 0.9974405765533447, "learning_rate": 6.45751157508602e-07, "loss": 0.0687, "step": 14313 }, { "epoch": 2.3191834089436165, "grad_norm": 0.9587564468383789, "learning_rate": 6.454578719001353e-07, "loss": 0.0632, "step": 14314 }, { "epoch": 2.319345430978613, "grad_norm": 0.8796777129173279, "learning_rate": 6.451646430361696e-07, "loss": 0.0626, "step": 14315 }, { "epoch": 2.31950745301361, "grad_norm": 0.9801618456840515, "learning_rate": 6.448714709256768e-07, "loss": 0.0687, "step": 14316 }, { "epoch": 2.3196694750486064, "grad_norm": 1.0900496244430542, "learning_rate": 6.445783555776289e-07, "loss": 0.0667, "step": 14317 }, { "epoch": 2.3198314970836034, "grad_norm": 0.8728010654449463, "learning_rate": 6.442852970009925e-07, "loss": 0.0648, "step": 14318 }, { "epoch": 2.3199935191186003, "grad_norm": 1.05231511592865, "learning_rate": 6.439922952047354e-07, "loss": 0.0703, "step": 14319 }, { "epoch": 2.320155541153597, "grad_norm": 1.0301355123519897, "learning_rate": 6.436993501978226e-07, "loss": 0.0609, "step": 14320 }, { "epoch": 2.3203175631885937, "grad_norm": 1.086633563041687, "learning_rate": 6.434064619892175e-07, "loss": 0.0576, "step": 14321 }, { "epoch": 2.32047958522359, "grad_norm": 0.8054091930389404, "learning_rate": 6.431136305878819e-07, "loss": 0.061, "step": 14322 }, { "epoch": 2.320641607258587, "grad_norm": 0.8511539697647095, "learning_rate": 6.428208560027755e-07, "loss": 0.0574, "step": 14323 }, { "epoch": 2.320803629293584, "grad_norm": 1.0268864631652832, "learning_rate": 6.425281382428566e-07, "loss": 0.0602, "step": 14324 }, { "epoch": 2.3209656513285806, "grad_norm": 0.941626787185669, "learning_rate": 6.422354773170825e-07, "loss": 0.0707, "step": 14325 }, { "epoch": 2.3211276733635775, "grad_norm": 0.8575886487960815, "learning_rate": 6.419428732344055e-07, "loss": 0.0642, "step": 14326 }, { "epoch": 2.3212896953985744, "grad_norm": 0.7996649146080017, "learning_rate": 6.41650326003781e-07, "loss": 0.0594, "step": 14327 }, { "epoch": 2.321451717433571, "grad_norm": 0.8261630535125732, "learning_rate": 6.413578356341602e-07, "loss": 0.061, "step": 14328 }, { "epoch": 2.321613739468568, "grad_norm": 0.9302612543106079, "learning_rate": 6.410654021344909e-07, "loss": 0.0629, "step": 14329 }, { "epoch": 2.3217757615035644, "grad_norm": 0.91783207654953, "learning_rate": 6.407730255137212e-07, "loss": 0.0618, "step": 14330 }, { "epoch": 2.3219377835385613, "grad_norm": 0.9778722524642944, "learning_rate": 6.404807057807982e-07, "loss": 0.0656, "step": 14331 }, { "epoch": 2.3220998055735578, "grad_norm": 0.9770204424858093, "learning_rate": 6.401884429446667e-07, "loss": 0.0747, "step": 14332 }, { "epoch": 2.3222618276085547, "grad_norm": 0.974606990814209, "learning_rate": 6.398962370142672e-07, "loss": 0.0708, "step": 14333 }, { "epoch": 2.3224238496435516, "grad_norm": 0.8390527963638306, "learning_rate": 6.396040879985416e-07, "loss": 0.0654, "step": 14334 }, { "epoch": 2.322585871678548, "grad_norm": 0.8269057869911194, "learning_rate": 6.393119959064287e-07, "loss": 0.0601, "step": 14335 }, { "epoch": 2.322747893713545, "grad_norm": 0.9386374354362488, "learning_rate": 6.390199607468661e-07, "loss": 0.0661, "step": 14336 }, { "epoch": 2.322909915748542, "grad_norm": 0.795021116733551, "learning_rate": 6.387279825287892e-07, "loss": 0.0568, "step": 14337 }, { "epoch": 2.3230719377835385, "grad_norm": 0.8094752430915833, "learning_rate": 6.384360612611317e-07, "loss": 0.0615, "step": 14338 }, { "epoch": 2.3232339598185354, "grad_norm": 0.8009088039398193, "learning_rate": 6.381441969528268e-07, "loss": 0.0567, "step": 14339 }, { "epoch": 2.323395981853532, "grad_norm": 1.1142011880874634, "learning_rate": 6.378523896128022e-07, "loss": 0.0741, "step": 14340 }, { "epoch": 2.323558003888529, "grad_norm": 0.8346922397613525, "learning_rate": 6.37560639249989e-07, "loss": 0.0549, "step": 14341 }, { "epoch": 2.323720025923526, "grad_norm": 0.9480348825454712, "learning_rate": 6.37268945873313e-07, "loss": 0.0684, "step": 14342 }, { "epoch": 2.3238820479585223, "grad_norm": 0.8845011591911316, "learning_rate": 6.369773094917006e-07, "loss": 0.0602, "step": 14343 }, { "epoch": 2.324044069993519, "grad_norm": 0.7797037363052368, "learning_rate": 6.36685730114073e-07, "loss": 0.0554, "step": 14344 }, { "epoch": 2.3242060920285157, "grad_norm": 0.9391816854476929, "learning_rate": 6.363942077493526e-07, "loss": 0.0599, "step": 14345 }, { "epoch": 2.3243681140635126, "grad_norm": 0.895332396030426, "learning_rate": 6.361027424064609e-07, "loss": 0.0646, "step": 14346 }, { "epoch": 2.3245301360985096, "grad_norm": 1.2685229778289795, "learning_rate": 6.35811334094314e-07, "loss": 0.0661, "step": 14347 }, { "epoch": 2.324692158133506, "grad_norm": 0.9253360033035278, "learning_rate": 6.355199828218289e-07, "loss": 0.0673, "step": 14348 }, { "epoch": 2.324854180168503, "grad_norm": 0.8045247197151184, "learning_rate": 6.352286885979206e-07, "loss": 0.0547, "step": 14349 }, { "epoch": 2.3250162022035, "grad_norm": 0.9229259490966797, "learning_rate": 6.349374514315015e-07, "loss": 0.0606, "step": 14350 }, { "epoch": 2.3251782242384964, "grad_norm": 0.9514033794403076, "learning_rate": 6.346462713314832e-07, "loss": 0.0599, "step": 14351 }, { "epoch": 2.3253402462734933, "grad_norm": 0.9198815822601318, "learning_rate": 6.343551483067751e-07, "loss": 0.0639, "step": 14352 }, { "epoch": 2.32550226830849, "grad_norm": 0.8940281271934509, "learning_rate": 6.340640823662842e-07, "loss": 0.059, "step": 14353 }, { "epoch": 2.3256642903434868, "grad_norm": 0.9115906953811646, "learning_rate": 6.337730735189174e-07, "loss": 0.0633, "step": 14354 }, { "epoch": 2.3258263123784833, "grad_norm": 0.9600539803504944, "learning_rate": 6.334821217735778e-07, "loss": 0.0585, "step": 14355 }, { "epoch": 2.32598833441348, "grad_norm": 0.9461213946342468, "learning_rate": 6.331912271391688e-07, "loss": 0.0607, "step": 14356 }, { "epoch": 2.326150356448477, "grad_norm": 0.9921082258224487, "learning_rate": 6.329003896245908e-07, "loss": 0.0614, "step": 14357 }, { "epoch": 2.3263123784834736, "grad_norm": 0.8889320492744446, "learning_rate": 6.326096092387429e-07, "loss": 0.0627, "step": 14358 }, { "epoch": 2.3264744005184705, "grad_norm": 0.9381389021873474, "learning_rate": 6.323188859905207e-07, "loss": 0.0633, "step": 14359 }, { "epoch": 2.3266364225534675, "grad_norm": 0.850811779499054, "learning_rate": 6.320282198888217e-07, "loss": 0.0619, "step": 14360 }, { "epoch": 2.326798444588464, "grad_norm": 0.831915557384491, "learning_rate": 6.317376109425397e-07, "loss": 0.0546, "step": 14361 }, { "epoch": 2.326960466623461, "grad_norm": 0.8602516651153564, "learning_rate": 6.314470591605646e-07, "loss": 0.0624, "step": 14362 }, { "epoch": 2.3271224886584574, "grad_norm": 0.9104455709457397, "learning_rate": 6.311565645517878e-07, "loss": 0.0652, "step": 14363 }, { "epoch": 2.3272845106934543, "grad_norm": 1.1242001056671143, "learning_rate": 6.308661271250974e-07, "loss": 0.0677, "step": 14364 }, { "epoch": 2.3274465327284513, "grad_norm": 0.8856641054153442, "learning_rate": 6.305757468893805e-07, "loss": 0.0649, "step": 14365 }, { "epoch": 2.3276085547634477, "grad_norm": 0.8960265517234802, "learning_rate": 6.302854238535219e-07, "loss": 0.0605, "step": 14366 }, { "epoch": 2.3277705767984447, "grad_norm": 0.9673840999603271, "learning_rate": 6.299951580264047e-07, "loss": 0.0579, "step": 14367 }, { "epoch": 2.327932598833441, "grad_norm": 1.0338740348815918, "learning_rate": 6.2970494941691e-07, "loss": 0.0713, "step": 14368 }, { "epoch": 2.328094620868438, "grad_norm": 1.090754508972168, "learning_rate": 6.294147980339182e-07, "loss": 0.0671, "step": 14369 }, { "epoch": 2.328256642903435, "grad_norm": 0.877884566783905, "learning_rate": 6.291247038863066e-07, "loss": 0.0598, "step": 14370 }, { "epoch": 2.3284186649384315, "grad_norm": 0.8679099678993225, "learning_rate": 6.288346669829518e-07, "loss": 0.0611, "step": 14371 }, { "epoch": 2.3285806869734285, "grad_norm": 0.972556471824646, "learning_rate": 6.285446873327289e-07, "loss": 0.0667, "step": 14372 }, { "epoch": 2.3287427090084254, "grad_norm": 0.8393288254737854, "learning_rate": 6.282547649445087e-07, "loss": 0.0602, "step": 14373 }, { "epoch": 2.328904731043422, "grad_norm": 0.8273356556892395, "learning_rate": 6.279648998271626e-07, "loss": 0.0599, "step": 14374 }, { "epoch": 2.329066753078419, "grad_norm": 0.8478513360023499, "learning_rate": 6.276750919895611e-07, "loss": 0.0557, "step": 14375 }, { "epoch": 2.3292287751134153, "grad_norm": 0.9557338953018188, "learning_rate": 6.273853414405715e-07, "loss": 0.0706, "step": 14376 }, { "epoch": 2.3293907971484122, "grad_norm": 0.9526261687278748, "learning_rate": 6.270956481890581e-07, "loss": 0.0676, "step": 14377 }, { "epoch": 2.3295528191834087, "grad_norm": 0.8927558660507202, "learning_rate": 6.268060122438846e-07, "loss": 0.0581, "step": 14378 }, { "epoch": 2.3297148412184057, "grad_norm": 0.9341113567352295, "learning_rate": 6.265164336139157e-07, "loss": 0.0619, "step": 14379 }, { "epoch": 2.3298768632534026, "grad_norm": 0.8526856303215027, "learning_rate": 6.262269123080095e-07, "loss": 0.06, "step": 14380 }, { "epoch": 2.330038885288399, "grad_norm": 1.0245894193649292, "learning_rate": 6.259374483350253e-07, "loss": 0.0641, "step": 14381 }, { "epoch": 2.330200907323396, "grad_norm": 0.8189849853515625, "learning_rate": 6.256480417038202e-07, "loss": 0.0572, "step": 14382 }, { "epoch": 2.330362929358393, "grad_norm": 0.9356175661087036, "learning_rate": 6.253586924232489e-07, "loss": 0.0652, "step": 14383 }, { "epoch": 2.3305249513933894, "grad_norm": 0.9660431742668152, "learning_rate": 6.250694005021651e-07, "loss": 0.0656, "step": 14384 }, { "epoch": 2.3306869734283864, "grad_norm": 0.7970089316368103, "learning_rate": 6.247801659494207e-07, "loss": 0.0563, "step": 14385 }, { "epoch": 2.330848995463383, "grad_norm": 0.9310674071311951, "learning_rate": 6.244909887738651e-07, "loss": 0.069, "step": 14386 }, { "epoch": 2.33101101749838, "grad_norm": 1.0296223163604736, "learning_rate": 6.242018689843471e-07, "loss": 0.07, "step": 14387 }, { "epoch": 2.3311730395333763, "grad_norm": 0.875980019569397, "learning_rate": 6.239128065897113e-07, "loss": 0.0623, "step": 14388 }, { "epoch": 2.3313350615683732, "grad_norm": 0.9242268204689026, "learning_rate": 6.236238015988044e-07, "loss": 0.0655, "step": 14389 }, { "epoch": 2.33149708360337, "grad_norm": 0.8916738629341125, "learning_rate": 6.233348540204689e-07, "loss": 0.0632, "step": 14390 }, { "epoch": 2.3316591056383666, "grad_norm": 0.9092872142791748, "learning_rate": 6.23045963863545e-07, "loss": 0.0652, "step": 14391 }, { "epoch": 2.3318211276733636, "grad_norm": 0.8567991852760315, "learning_rate": 6.227571311368724e-07, "loss": 0.0694, "step": 14392 }, { "epoch": 2.3319831497083605, "grad_norm": 0.921323299407959, "learning_rate": 6.22468355849288e-07, "loss": 0.0666, "step": 14393 }, { "epoch": 2.332145171743357, "grad_norm": 0.9615211486816406, "learning_rate": 6.221796380096298e-07, "loss": 0.0626, "step": 14394 }, { "epoch": 2.332307193778354, "grad_norm": 0.9106801748275757, "learning_rate": 6.218909776267295e-07, "loss": 0.0597, "step": 14395 }, { "epoch": 2.3324692158133504, "grad_norm": 0.896317183971405, "learning_rate": 6.216023747094207e-07, "loss": 0.06, "step": 14396 }, { "epoch": 2.3326312378483474, "grad_norm": 0.9247400760650635, "learning_rate": 6.213138292665333e-07, "loss": 0.0679, "step": 14397 }, { "epoch": 2.3327932598833443, "grad_norm": 0.9041847586631775, "learning_rate": 6.210253413068964e-07, "loss": 0.0586, "step": 14398 }, { "epoch": 2.332955281918341, "grad_norm": 0.840080201625824, "learning_rate": 6.207369108393374e-07, "loss": 0.0549, "step": 14399 }, { "epoch": 2.3331173039533377, "grad_norm": 0.8104625344276428, "learning_rate": 6.20448537872681e-07, "loss": 0.0578, "step": 14400 }, { "epoch": 2.333279325988334, "grad_norm": 0.9511587023735046, "learning_rate": 6.201602224157508e-07, "loss": 0.0697, "step": 14401 }, { "epoch": 2.333441348023331, "grad_norm": 1.060603380203247, "learning_rate": 6.198719644773687e-07, "loss": 0.0735, "step": 14402 }, { "epoch": 2.333603370058328, "grad_norm": 0.9461984634399414, "learning_rate": 6.195837640663546e-07, "loss": 0.06, "step": 14403 }, { "epoch": 2.3337653920933246, "grad_norm": 0.7724205851554871, "learning_rate": 6.192956211915269e-07, "loss": 0.0539, "step": 14404 }, { "epoch": 2.3339274141283215, "grad_norm": 0.7702530026435852, "learning_rate": 6.190075358617029e-07, "loss": 0.0553, "step": 14405 }, { "epoch": 2.3340894361633184, "grad_norm": 1.1382404565811157, "learning_rate": 6.187195080856953e-07, "loss": 0.0647, "step": 14406 }, { "epoch": 2.334251458198315, "grad_norm": 0.8377068042755127, "learning_rate": 6.184315378723177e-07, "loss": 0.0581, "step": 14407 }, { "epoch": 2.334413480233312, "grad_norm": 1.097277283668518, "learning_rate": 6.181436252303829e-07, "loss": 0.0576, "step": 14408 }, { "epoch": 2.3345755022683083, "grad_norm": 1.051430106163025, "learning_rate": 6.178557701686985e-07, "loss": 0.0641, "step": 14409 }, { "epoch": 2.3347375243033053, "grad_norm": 0.9613043665885925, "learning_rate": 6.175679726960731e-07, "loss": 0.0686, "step": 14410 }, { "epoch": 2.3348995463383018, "grad_norm": 0.791201114654541, "learning_rate": 6.17280232821312e-07, "loss": 0.0547, "step": 14411 }, { "epoch": 2.3350615683732987, "grad_norm": 1.0079299211502075, "learning_rate": 6.169925505532201e-07, "loss": 0.0659, "step": 14412 }, { "epoch": 2.3352235904082956, "grad_norm": 1.0087671279907227, "learning_rate": 6.167049259005989e-07, "loss": 0.0701, "step": 14413 }, { "epoch": 2.335385612443292, "grad_norm": 0.9747778177261353, "learning_rate": 6.164173588722497e-07, "loss": 0.0582, "step": 14414 }, { "epoch": 2.335547634478289, "grad_norm": 0.862362265586853, "learning_rate": 6.161298494769713e-07, "loss": 0.0548, "step": 14415 }, { "epoch": 2.335709656513286, "grad_norm": 1.0373525619506836, "learning_rate": 6.158423977235611e-07, "loss": 0.0639, "step": 14416 }, { "epoch": 2.3358716785482825, "grad_norm": 0.9192182421684265, "learning_rate": 6.155550036208125e-07, "loss": 0.066, "step": 14417 }, { "epoch": 2.3360337005832794, "grad_norm": 0.8472986221313477, "learning_rate": 6.152676671775215e-07, "loss": 0.056, "step": 14418 }, { "epoch": 2.336195722618276, "grad_norm": 1.020595669746399, "learning_rate": 6.149803884024786e-07, "loss": 0.0643, "step": 14419 }, { "epoch": 2.336357744653273, "grad_norm": 0.877204954624176, "learning_rate": 6.146931673044751e-07, "loss": 0.0563, "step": 14420 }, { "epoch": 2.3365197666882698, "grad_norm": 0.8729059100151062, "learning_rate": 6.144060038922967e-07, "loss": 0.0586, "step": 14421 }, { "epoch": 2.3366817887232663, "grad_norm": 1.002275824546814, "learning_rate": 6.141188981747323e-07, "loss": 0.0644, "step": 14422 }, { "epoch": 2.336843810758263, "grad_norm": 0.8643121123313904, "learning_rate": 6.138318501605667e-07, "loss": 0.0591, "step": 14423 }, { "epoch": 2.3370058327932597, "grad_norm": 0.9158600568771362, "learning_rate": 6.135448598585814e-07, "loss": 0.0647, "step": 14424 }, { "epoch": 2.3371678548282566, "grad_norm": 0.9756930470466614, "learning_rate": 6.132579272775583e-07, "loss": 0.0646, "step": 14425 }, { "epoch": 2.3373298768632536, "grad_norm": 0.9086965322494507, "learning_rate": 6.129710524262758e-07, "loss": 0.0622, "step": 14426 }, { "epoch": 2.33749189889825, "grad_norm": 0.8095213174819946, "learning_rate": 6.12684235313514e-07, "loss": 0.0585, "step": 14427 }, { "epoch": 2.337653920933247, "grad_norm": 0.8328830599784851, "learning_rate": 6.123974759480469e-07, "loss": 0.0553, "step": 14428 }, { "epoch": 2.337815942968244, "grad_norm": 0.9773808121681213, "learning_rate": 6.12110774338649e-07, "loss": 0.0597, "step": 14429 }, { "epoch": 2.3379779650032404, "grad_norm": 1.039530634880066, "learning_rate": 6.118241304940928e-07, "loss": 0.0648, "step": 14430 }, { "epoch": 2.3381399870382373, "grad_norm": 0.8209445476531982, "learning_rate": 6.115375444231489e-07, "loss": 0.0472, "step": 14431 }, { "epoch": 2.338302009073234, "grad_norm": 0.8805145025253296, "learning_rate": 6.112510161345861e-07, "loss": 0.0629, "step": 14432 }, { "epoch": 2.3384640311082308, "grad_norm": 1.0358737707138062, "learning_rate": 6.109645456371715e-07, "loss": 0.0585, "step": 14433 }, { "epoch": 2.3386260531432272, "grad_norm": 0.8988499641418457, "learning_rate": 6.106781329396714e-07, "loss": 0.0542, "step": 14434 }, { "epoch": 2.338788075178224, "grad_norm": 0.8143104910850525, "learning_rate": 6.103917780508475e-07, "loss": 0.0535, "step": 14435 }, { "epoch": 2.338950097213221, "grad_norm": 0.8741886019706726, "learning_rate": 6.101054809794615e-07, "loss": 0.0623, "step": 14436 }, { "epoch": 2.3391121192482176, "grad_norm": 0.9815524816513062, "learning_rate": 6.098192417342755e-07, "loss": 0.0693, "step": 14437 }, { "epoch": 2.3392741412832145, "grad_norm": 0.9720104336738586, "learning_rate": 6.095330603240468e-07, "loss": 0.0597, "step": 14438 }, { "epoch": 2.3394361633182115, "grad_norm": 0.9975051879882812, "learning_rate": 6.092469367575312e-07, "loss": 0.0565, "step": 14439 }, { "epoch": 2.339598185353208, "grad_norm": 1.0102384090423584, "learning_rate": 6.089608710434836e-07, "loss": 0.0639, "step": 14440 }, { "epoch": 2.339760207388205, "grad_norm": 1.2839295864105225, "learning_rate": 6.086748631906572e-07, "loss": 0.0529, "step": 14441 }, { "epoch": 2.3399222294232014, "grad_norm": 0.770176112651825, "learning_rate": 6.083889132078033e-07, "loss": 0.0553, "step": 14442 }, { "epoch": 2.3400842514581983, "grad_norm": 0.8635346293449402, "learning_rate": 6.081030211036707e-07, "loss": 0.0545, "step": 14443 }, { "epoch": 2.3402462734931953, "grad_norm": 0.951804518699646, "learning_rate": 6.078171868870075e-07, "loss": 0.0615, "step": 14444 }, { "epoch": 2.3404082955281917, "grad_norm": 0.8703580498695374, "learning_rate": 6.075314105665595e-07, "loss": 0.0609, "step": 14445 }, { "epoch": 2.3405703175631887, "grad_norm": 0.9203516840934753, "learning_rate": 6.072456921510703e-07, "loss": 0.0607, "step": 14446 }, { "epoch": 2.340732339598185, "grad_norm": 0.8416757583618164, "learning_rate": 6.06960031649283e-07, "loss": 0.0551, "step": 14447 }, { "epoch": 2.340894361633182, "grad_norm": 1.4582421779632568, "learning_rate": 6.066744290699372e-07, "loss": 0.0577, "step": 14448 }, { "epoch": 2.341056383668179, "grad_norm": 1.0675171613693237, "learning_rate": 6.06388884421773e-07, "loss": 0.0636, "step": 14449 }, { "epoch": 2.3412184057031755, "grad_norm": 1.3599189519882202, "learning_rate": 6.061033977135253e-07, "loss": 0.0655, "step": 14450 }, { "epoch": 2.3413804277381725, "grad_norm": 0.8678320050239563, "learning_rate": 6.058179689539309e-07, "loss": 0.0632, "step": 14451 }, { "epoch": 2.3415424497731694, "grad_norm": 0.7486401200294495, "learning_rate": 6.055325981517238e-07, "loss": 0.0604, "step": 14452 }, { "epoch": 2.341704471808166, "grad_norm": 0.9437369108200073, "learning_rate": 6.052472853156339e-07, "loss": 0.066, "step": 14453 }, { "epoch": 2.341866493843163, "grad_norm": 0.8997758030891418, "learning_rate": 6.049620304543916e-07, "loss": 0.0664, "step": 14454 }, { "epoch": 2.3420285158781593, "grad_norm": 0.826973557472229, "learning_rate": 6.046768335767248e-07, "loss": 0.0587, "step": 14455 }, { "epoch": 2.3421905379131562, "grad_norm": 0.9194096326828003, "learning_rate": 6.043916946913613e-07, "loss": 0.061, "step": 14456 }, { "epoch": 2.3423525599481527, "grad_norm": 1.0393024682998657, "learning_rate": 6.041066138070245e-07, "loss": 0.0642, "step": 14457 }, { "epoch": 2.3425145819831497, "grad_norm": 1.180245041847229, "learning_rate": 6.038215909324372e-07, "loss": 0.0696, "step": 14458 }, { "epoch": 2.3426766040181466, "grad_norm": 0.9270623326301575, "learning_rate": 6.035366260763203e-07, "loss": 0.0644, "step": 14459 }, { "epoch": 2.342838626053143, "grad_norm": 0.8007288575172424, "learning_rate": 6.032517192473935e-07, "loss": 0.0534, "step": 14460 }, { "epoch": 2.34300064808814, "grad_norm": 0.85854172706604, "learning_rate": 6.02966870454374e-07, "loss": 0.0591, "step": 14461 }, { "epoch": 2.343162670123137, "grad_norm": 0.8734108805656433, "learning_rate": 6.026820797059777e-07, "loss": 0.0645, "step": 14462 }, { "epoch": 2.3433246921581334, "grad_norm": 0.9076907634735107, "learning_rate": 6.023973470109182e-07, "loss": 0.0626, "step": 14463 }, { "epoch": 2.3434867141931304, "grad_norm": 0.7974627614021301, "learning_rate": 6.021126723779075e-07, "loss": 0.0583, "step": 14464 }, { "epoch": 2.343648736228127, "grad_norm": 0.9349268078804016, "learning_rate": 6.018280558156566e-07, "loss": 0.0679, "step": 14465 }, { "epoch": 2.343810758263124, "grad_norm": 0.9849607348442078, "learning_rate": 6.015434973328735e-07, "loss": 0.0693, "step": 14466 }, { "epoch": 2.3439727802981207, "grad_norm": 0.8764670491218567, "learning_rate": 6.012589969382659e-07, "loss": 0.0605, "step": 14467 }, { "epoch": 2.344134802333117, "grad_norm": 0.8385621905326843, "learning_rate": 6.009745546405377e-07, "loss": 0.0605, "step": 14468 }, { "epoch": 2.344296824368114, "grad_norm": 0.8304133415222168, "learning_rate": 6.006901704483917e-07, "loss": 0.0562, "step": 14469 }, { "epoch": 2.3444588464031106, "grad_norm": 0.8149062395095825, "learning_rate": 6.00405844370531e-07, "loss": 0.0544, "step": 14470 }, { "epoch": 2.3446208684381076, "grad_norm": 0.8023223280906677, "learning_rate": 6.001215764156551e-07, "loss": 0.057, "step": 14471 }, { "epoch": 2.3447828904731045, "grad_norm": 0.7750385403633118, "learning_rate": 5.998373665924606e-07, "loss": 0.0537, "step": 14472 }, { "epoch": 2.344944912508101, "grad_norm": 0.9637815952301025, "learning_rate": 5.995532149096447e-07, "loss": 0.066, "step": 14473 }, { "epoch": 2.345106934543098, "grad_norm": 0.8537014722824097, "learning_rate": 5.992691213759011e-07, "loss": 0.0598, "step": 14474 }, { "epoch": 2.345268956578095, "grad_norm": 0.8019862174987793, "learning_rate": 5.989850859999227e-07, "loss": 0.0572, "step": 14475 }, { "epoch": 2.3454309786130914, "grad_norm": 0.8479922413825989, "learning_rate": 5.987011087904007e-07, "loss": 0.0562, "step": 14476 }, { "epoch": 2.3455930006480883, "grad_norm": 0.8056397438049316, "learning_rate": 5.984171897560234e-07, "loss": 0.0596, "step": 14477 }, { "epoch": 2.345755022683085, "grad_norm": 0.8855328559875488, "learning_rate": 5.981333289054792e-07, "loss": 0.0617, "step": 14478 }, { "epoch": 2.3459170447180817, "grad_norm": 0.8664817810058594, "learning_rate": 5.978495262474509e-07, "loss": 0.0606, "step": 14479 }, { "epoch": 2.346079066753078, "grad_norm": 0.8697784543037415, "learning_rate": 5.975657817906253e-07, "loss": 0.0604, "step": 14480 }, { "epoch": 2.346241088788075, "grad_norm": 0.860231339931488, "learning_rate": 5.972820955436825e-07, "loss": 0.062, "step": 14481 }, { "epoch": 2.346403110823072, "grad_norm": 0.8649452328681946, "learning_rate": 5.96998467515304e-07, "loss": 0.0594, "step": 14482 }, { "epoch": 2.3465651328580686, "grad_norm": 0.9872585535049438, "learning_rate": 5.967148977141665e-07, "loss": 0.0711, "step": 14483 }, { "epoch": 2.3467271548930655, "grad_norm": 0.8951234817504883, "learning_rate": 5.964313861489466e-07, "loss": 0.0692, "step": 14484 }, { "epoch": 2.3468891769280624, "grad_norm": 0.8037145137786865, "learning_rate": 5.96147932828321e-07, "loss": 0.0561, "step": 14485 }, { "epoch": 2.347051198963059, "grad_norm": 0.9256864786148071, "learning_rate": 5.958645377609606e-07, "loss": 0.0604, "step": 14486 }, { "epoch": 2.347213220998056, "grad_norm": 0.9966773390769958, "learning_rate": 5.955812009555378e-07, "loss": 0.0661, "step": 14487 }, { "epoch": 2.3473752430330523, "grad_norm": 0.930876612663269, "learning_rate": 5.952979224207205e-07, "loss": 0.0658, "step": 14488 }, { "epoch": 2.3475372650680493, "grad_norm": 0.9100360870361328, "learning_rate": 5.950147021651792e-07, "loss": 0.0675, "step": 14489 }, { "epoch": 2.347699287103046, "grad_norm": 0.8395872116088867, "learning_rate": 5.947315401975773e-07, "loss": 0.0549, "step": 14490 }, { "epoch": 2.3478613091380427, "grad_norm": 0.807367205619812, "learning_rate": 5.944484365265795e-07, "loss": 0.0558, "step": 14491 }, { "epoch": 2.3480233311730396, "grad_norm": 0.902279257774353, "learning_rate": 5.941653911608486e-07, "loss": 0.0609, "step": 14492 }, { "epoch": 2.348185353208036, "grad_norm": 0.7829273343086243, "learning_rate": 5.938824041090443e-07, "loss": 0.0568, "step": 14493 }, { "epoch": 2.348347375243033, "grad_norm": 1.0976831912994385, "learning_rate": 5.935994753798258e-07, "loss": 0.0659, "step": 14494 }, { "epoch": 2.34850939727803, "grad_norm": 0.8173691630363464, "learning_rate": 5.933166049818501e-07, "loss": 0.0597, "step": 14495 }, { "epoch": 2.3486714193130265, "grad_norm": 0.7735475897789001, "learning_rate": 5.930337929237726e-07, "loss": 0.0606, "step": 14496 }, { "epoch": 2.3488334413480234, "grad_norm": 0.9407158493995667, "learning_rate": 5.927510392142458e-07, "loss": 0.0622, "step": 14497 }, { "epoch": 2.34899546338302, "grad_norm": 0.8489283323287964, "learning_rate": 5.924683438619208e-07, "loss": 0.0552, "step": 14498 }, { "epoch": 2.349157485418017, "grad_norm": 0.9036144614219666, "learning_rate": 5.921857068754494e-07, "loss": 0.0573, "step": 14499 }, { "epoch": 2.3493195074530138, "grad_norm": 0.8492865562438965, "learning_rate": 5.91903128263479e-07, "loss": 0.0608, "step": 14500 }, { "epoch": 2.3494815294880103, "grad_norm": 0.8074772953987122, "learning_rate": 5.916206080346549e-07, "loss": 0.0574, "step": 14501 }, { "epoch": 2.349643551523007, "grad_norm": 0.9476245641708374, "learning_rate": 5.913381461976217e-07, "loss": 0.0591, "step": 14502 }, { "epoch": 2.3498055735580037, "grad_norm": 0.938563346862793, "learning_rate": 5.910557427610225e-07, "loss": 0.0571, "step": 14503 }, { "epoch": 2.3499675955930006, "grad_norm": 0.9736203551292419, "learning_rate": 5.907733977334978e-07, "loss": 0.0621, "step": 14504 }, { "epoch": 2.3501296176279975, "grad_norm": 0.8642393350601196, "learning_rate": 5.904911111236872e-07, "loss": 0.0633, "step": 14505 }, { "epoch": 2.350291639662994, "grad_norm": 0.9992730617523193, "learning_rate": 5.902088829402274e-07, "loss": 0.0643, "step": 14506 }, { "epoch": 2.350453661697991, "grad_norm": 0.8306195735931396, "learning_rate": 5.899267131917547e-07, "loss": 0.0538, "step": 14507 }, { "epoch": 2.350615683732988, "grad_norm": 0.9145315885543823, "learning_rate": 5.896446018869018e-07, "loss": 0.0614, "step": 14508 }, { "epoch": 2.3507777057679844, "grad_norm": 0.948464035987854, "learning_rate": 5.893625490343014e-07, "loss": 0.0604, "step": 14509 }, { "epoch": 2.3509397278029813, "grad_norm": 0.7787171006202698, "learning_rate": 5.890805546425832e-07, "loss": 0.0572, "step": 14510 }, { "epoch": 2.351101749837978, "grad_norm": 0.9339138269424438, "learning_rate": 5.887986187203762e-07, "loss": 0.0636, "step": 14511 }, { "epoch": 2.3512637718729748, "grad_norm": 1.0364383459091187, "learning_rate": 5.885167412763051e-07, "loss": 0.067, "step": 14512 }, { "epoch": 2.3514257939079712, "grad_norm": 0.8669085502624512, "learning_rate": 5.88234922318997e-07, "loss": 0.0552, "step": 14513 }, { "epoch": 2.351587815942968, "grad_norm": 0.9558306932449341, "learning_rate": 5.879531618570738e-07, "loss": 0.0647, "step": 14514 }, { "epoch": 2.351749837977965, "grad_norm": 0.9057703614234924, "learning_rate": 5.876714598991573e-07, "loss": 0.0636, "step": 14515 }, { "epoch": 2.3519118600129616, "grad_norm": 0.9369593262672424, "learning_rate": 5.873898164538658e-07, "loss": 0.0662, "step": 14516 }, { "epoch": 2.3520738820479585, "grad_norm": 0.9057307839393616, "learning_rate": 5.871082315298168e-07, "loss": 0.0591, "step": 14517 }, { "epoch": 2.3522359040829555, "grad_norm": 0.8824877738952637, "learning_rate": 5.868267051356283e-07, "loss": 0.0576, "step": 14518 }, { "epoch": 2.352397926117952, "grad_norm": 0.895741879940033, "learning_rate": 5.865452372799121e-07, "loss": 0.0594, "step": 14519 }, { "epoch": 2.352559948152949, "grad_norm": 0.8106254935264587, "learning_rate": 5.86263827971281e-07, "loss": 0.0609, "step": 14520 }, { "epoch": 2.3527219701879454, "grad_norm": 0.7932037115097046, "learning_rate": 5.859824772183459e-07, "loss": 0.0541, "step": 14521 }, { "epoch": 2.3528839922229423, "grad_norm": 1.2299085855484009, "learning_rate": 5.857011850297148e-07, "loss": 0.0635, "step": 14522 }, { "epoch": 2.3530460142579392, "grad_norm": 0.9761585593223572, "learning_rate": 5.854199514139952e-07, "loss": 0.0563, "step": 14523 }, { "epoch": 2.3532080362929357, "grad_norm": 0.9831428527832031, "learning_rate": 5.851387763797916e-07, "loss": 0.0723, "step": 14524 }, { "epoch": 2.3533700583279327, "grad_norm": 0.9172781705856323, "learning_rate": 5.848576599357078e-07, "loss": 0.0666, "step": 14525 }, { "epoch": 2.353532080362929, "grad_norm": 0.9665817022323608, "learning_rate": 5.845766020903459e-07, "loss": 0.0599, "step": 14526 }, { "epoch": 2.353694102397926, "grad_norm": 0.8048878908157349, "learning_rate": 5.842956028523031e-07, "loss": 0.0518, "step": 14527 }, { "epoch": 2.353856124432923, "grad_norm": 1.1949489116668701, "learning_rate": 5.840146622301796e-07, "loss": 0.0697, "step": 14528 }, { "epoch": 2.3540181464679195, "grad_norm": 1.0436763763427734, "learning_rate": 5.837337802325718e-07, "loss": 0.0735, "step": 14529 }, { "epoch": 2.3541801685029164, "grad_norm": 0.9715738892555237, "learning_rate": 5.834529568680722e-07, "loss": 0.067, "step": 14530 }, { "epoch": 2.3543421905379134, "grad_norm": 0.9725156426429749, "learning_rate": 5.83172192145274e-07, "loss": 0.065, "step": 14531 }, { "epoch": 2.35450421257291, "grad_norm": 1.021435022354126, "learning_rate": 5.828914860727674e-07, "loss": 0.0635, "step": 14532 }, { "epoch": 2.354666234607907, "grad_norm": 0.9970274567604065, "learning_rate": 5.826108386591436e-07, "loss": 0.0587, "step": 14533 }, { "epoch": 2.3548282566429033, "grad_norm": 1.0088868141174316, "learning_rate": 5.823302499129873e-07, "loss": 0.0653, "step": 14534 }, { "epoch": 2.3549902786779002, "grad_norm": 0.9348923563957214, "learning_rate": 5.820497198428849e-07, "loss": 0.0649, "step": 14535 }, { "epoch": 2.3551523007128967, "grad_norm": 0.8347666263580322, "learning_rate": 5.817692484574197e-07, "loss": 0.0563, "step": 14536 }, { "epoch": 2.3553143227478937, "grad_norm": 0.8557009696960449, "learning_rate": 5.814888357651733e-07, "loss": 0.0638, "step": 14537 }, { "epoch": 2.3554763447828906, "grad_norm": 0.8046319484710693, "learning_rate": 5.81208481774726e-07, "loss": 0.0533, "step": 14538 }, { "epoch": 2.355638366817887, "grad_norm": 0.9410936832427979, "learning_rate": 5.809281864946556e-07, "loss": 0.0613, "step": 14539 }, { "epoch": 2.355800388852884, "grad_norm": 0.840984582901001, "learning_rate": 5.806479499335385e-07, "loss": 0.0563, "step": 14540 }, { "epoch": 2.355962410887881, "grad_norm": 0.9175610542297363, "learning_rate": 5.803677720999495e-07, "loss": 0.059, "step": 14541 }, { "epoch": 2.3561244329228774, "grad_norm": 0.8809838891029358, "learning_rate": 5.800876530024615e-07, "loss": 0.0598, "step": 14542 }, { "epoch": 2.3562864549578744, "grad_norm": 0.9057488441467285, "learning_rate": 5.798075926496449e-07, "loss": 0.0637, "step": 14543 }, { "epoch": 2.356448476992871, "grad_norm": 0.8945654630661011, "learning_rate": 5.795275910500703e-07, "loss": 0.067, "step": 14544 }, { "epoch": 2.356610499027868, "grad_norm": 1.0310019254684448, "learning_rate": 5.792476482123027e-07, "loss": 0.0654, "step": 14545 }, { "epoch": 2.3567725210628647, "grad_norm": 0.8311236500740051, "learning_rate": 5.789677641449087e-07, "loss": 0.0557, "step": 14546 }, { "epoch": 2.356934543097861, "grad_norm": 0.9365234971046448, "learning_rate": 5.786879388564534e-07, "loss": 0.0632, "step": 14547 }, { "epoch": 2.357096565132858, "grad_norm": 0.7686519026756287, "learning_rate": 5.784081723554971e-07, "loss": 0.0577, "step": 14548 }, { "epoch": 2.3572585871678546, "grad_norm": 0.86738520860672, "learning_rate": 5.781284646506008e-07, "loss": 0.0601, "step": 14549 }, { "epoch": 2.3574206092028516, "grad_norm": 0.8879827857017517, "learning_rate": 5.778488157503223e-07, "loss": 0.0595, "step": 14550 }, { "epoch": 2.3575826312378485, "grad_norm": 0.8974021077156067, "learning_rate": 5.775692256632187e-07, "loss": 0.0587, "step": 14551 }, { "epoch": 2.357744653272845, "grad_norm": 0.9620395302772522, "learning_rate": 5.772896943978446e-07, "loss": 0.0681, "step": 14552 }, { "epoch": 2.357906675307842, "grad_norm": 0.8157269954681396, "learning_rate": 5.770102219627526e-07, "loss": 0.058, "step": 14553 }, { "epoch": 2.358068697342839, "grad_norm": 0.857354998588562, "learning_rate": 5.767308083664949e-07, "loss": 0.0626, "step": 14554 }, { "epoch": 2.3582307193778353, "grad_norm": 1.1273916959762573, "learning_rate": 5.7645145361762e-07, "loss": 0.0657, "step": 14555 }, { "epoch": 2.3583927414128323, "grad_norm": 0.9196612238883972, "learning_rate": 5.761721577246754e-07, "loss": 0.0593, "step": 14556 }, { "epoch": 2.3585547634478288, "grad_norm": 0.9983531832695007, "learning_rate": 5.758929206962074e-07, "loss": 0.0676, "step": 14557 }, { "epoch": 2.3587167854828257, "grad_norm": 1.0570601224899292, "learning_rate": 5.756137425407598e-07, "loss": 0.0627, "step": 14558 }, { "epoch": 2.358878807517822, "grad_norm": 1.1186915636062622, "learning_rate": 5.753346232668758e-07, "loss": 0.0683, "step": 14559 }, { "epoch": 2.359040829552819, "grad_norm": 0.9919911623001099, "learning_rate": 5.750555628830928e-07, "loss": 0.0628, "step": 14560 }, { "epoch": 2.359202851587816, "grad_norm": 0.9504989385604858, "learning_rate": 5.747765613979523e-07, "loss": 0.0648, "step": 14561 }, { "epoch": 2.3593648736228126, "grad_norm": 0.9681611061096191, "learning_rate": 5.744976188199905e-07, "loss": 0.0653, "step": 14562 }, { "epoch": 2.3595268956578095, "grad_norm": 0.9179617166519165, "learning_rate": 5.742187351577416e-07, "loss": 0.064, "step": 14563 }, { "epoch": 2.3596889176928064, "grad_norm": 0.8961821794509888, "learning_rate": 5.739399104197388e-07, "loss": 0.0585, "step": 14564 }, { "epoch": 2.359850939727803, "grad_norm": 0.9559543132781982, "learning_rate": 5.73661144614514e-07, "loss": 0.0606, "step": 14565 }, { "epoch": 2.3600129617628, "grad_norm": 0.9991036653518677, "learning_rate": 5.733824377505965e-07, "loss": 0.0674, "step": 14566 }, { "epoch": 2.3601749837977963, "grad_norm": 0.9018924832344055, "learning_rate": 5.731037898365138e-07, "loss": 0.0603, "step": 14567 }, { "epoch": 2.3603370058327933, "grad_norm": 0.8355051279067993, "learning_rate": 5.728252008807925e-07, "loss": 0.0548, "step": 14568 }, { "epoch": 2.36049902786779, "grad_norm": 0.8183480501174927, "learning_rate": 5.725466708919561e-07, "loss": 0.0614, "step": 14569 }, { "epoch": 2.3606610499027867, "grad_norm": 0.9316161274909973, "learning_rate": 5.722681998785273e-07, "loss": 0.0618, "step": 14570 }, { "epoch": 2.3608230719377836, "grad_norm": 0.8802493810653687, "learning_rate": 5.719897878490265e-07, "loss": 0.0595, "step": 14571 }, { "epoch": 2.36098509397278, "grad_norm": 0.8617424368858337, "learning_rate": 5.717114348119726e-07, "loss": 0.0638, "step": 14572 }, { "epoch": 2.361147116007777, "grad_norm": 0.9052625894546509, "learning_rate": 5.71433140775883e-07, "loss": 0.0619, "step": 14573 }, { "epoch": 2.361309138042774, "grad_norm": 1.0340335369110107, "learning_rate": 5.711549057492718e-07, "loss": 0.0687, "step": 14574 }, { "epoch": 2.3614711600777705, "grad_norm": 0.8974990844726562, "learning_rate": 5.70876729740652e-07, "loss": 0.056, "step": 14575 }, { "epoch": 2.3616331821127674, "grad_norm": 0.8803661465644836, "learning_rate": 5.705986127585364e-07, "loss": 0.0533, "step": 14576 }, { "epoch": 2.3617952041477643, "grad_norm": 0.917258620262146, "learning_rate": 5.703205548114352e-07, "loss": 0.0671, "step": 14577 }, { "epoch": 2.361957226182761, "grad_norm": 0.8356657028198242, "learning_rate": 5.700425559078543e-07, "loss": 0.0538, "step": 14578 }, { "epoch": 2.3621192482177578, "grad_norm": 0.7526644468307495, "learning_rate": 5.697646160563001e-07, "loss": 0.0544, "step": 14579 }, { "epoch": 2.3622812702527543, "grad_norm": 0.8724152445793152, "learning_rate": 5.694867352652791e-07, "loss": 0.0571, "step": 14580 }, { "epoch": 2.362443292287751, "grad_norm": 0.988396167755127, "learning_rate": 5.692089135432913e-07, "loss": 0.0603, "step": 14581 }, { "epoch": 2.3626053143227477, "grad_norm": 0.8566252589225769, "learning_rate": 5.689311508988385e-07, "loss": 0.0614, "step": 14582 }, { "epoch": 2.3627673363577446, "grad_norm": 0.9325825572013855, "learning_rate": 5.686534473404195e-07, "loss": 0.062, "step": 14583 }, { "epoch": 2.3629293583927415, "grad_norm": 0.9369713664054871, "learning_rate": 5.68375802876531e-07, "loss": 0.0612, "step": 14584 }, { "epoch": 2.363091380427738, "grad_norm": 0.9119452834129333, "learning_rate": 5.680982175156688e-07, "loss": 0.0552, "step": 14585 }, { "epoch": 2.363253402462735, "grad_norm": 0.9514526128768921, "learning_rate": 5.678206912663259e-07, "loss": 0.0559, "step": 14586 }, { "epoch": 2.363415424497732, "grad_norm": 0.9722010493278503, "learning_rate": 5.675432241369938e-07, "loss": 0.0602, "step": 14587 }, { "epoch": 2.3635774465327284, "grad_norm": 0.9492889046669006, "learning_rate": 5.672658161361636e-07, "loss": 0.0601, "step": 14588 }, { "epoch": 2.3637394685677253, "grad_norm": 0.9314302206039429, "learning_rate": 5.669884672723208e-07, "loss": 0.0622, "step": 14589 }, { "epoch": 2.363901490602722, "grad_norm": 0.9510951638221741, "learning_rate": 5.667111775539538e-07, "loss": 0.0754, "step": 14590 }, { "epoch": 2.3640635126377187, "grad_norm": 1.04404616355896, "learning_rate": 5.664339469895472e-07, "loss": 0.0693, "step": 14591 }, { "epoch": 2.3642255346727157, "grad_norm": 0.877565860748291, "learning_rate": 5.661567755875816e-07, "loss": 0.061, "step": 14592 }, { "epoch": 2.364387556707712, "grad_norm": 0.8133190274238586, "learning_rate": 5.658796633565391e-07, "loss": 0.0578, "step": 14593 }, { "epoch": 2.364549578742709, "grad_norm": 0.9318547248840332, "learning_rate": 5.656026103048975e-07, "loss": 0.0612, "step": 14594 }, { "epoch": 2.3647116007777056, "grad_norm": 1.0102546215057373, "learning_rate": 5.653256164411366e-07, "loss": 0.066, "step": 14595 }, { "epoch": 2.3648736228127025, "grad_norm": 0.8324956297874451, "learning_rate": 5.650486817737291e-07, "loss": 0.0536, "step": 14596 }, { "epoch": 2.3650356448476995, "grad_norm": 0.9316480755805969, "learning_rate": 5.647718063111496e-07, "loss": 0.0612, "step": 14597 }, { "epoch": 2.365197666882696, "grad_norm": 1.2550431489944458, "learning_rate": 5.644949900618696e-07, "loss": 0.06, "step": 14598 }, { "epoch": 2.365359688917693, "grad_norm": 0.8270633816719055, "learning_rate": 5.64218233034359e-07, "loss": 0.0493, "step": 14599 }, { "epoch": 2.3655217109526894, "grad_norm": 0.9431858062744141, "learning_rate": 5.639415352370858e-07, "loss": 0.066, "step": 14600 }, { "epoch": 2.3656837329876863, "grad_norm": 0.8048388361930847, "learning_rate": 5.636648966785168e-07, "loss": 0.0605, "step": 14601 }, { "epoch": 2.3658457550226832, "grad_norm": 0.8069517016410828, "learning_rate": 5.633883173671159e-07, "loss": 0.0572, "step": 14602 }, { "epoch": 2.3660077770576797, "grad_norm": 0.9145830273628235, "learning_rate": 5.631117973113462e-07, "loss": 0.0575, "step": 14603 }, { "epoch": 2.3661697990926767, "grad_norm": 0.9402312636375427, "learning_rate": 5.628353365196682e-07, "loss": 0.064, "step": 14604 }, { "epoch": 2.366331821127673, "grad_norm": 0.8871921896934509, "learning_rate": 5.625589350005409e-07, "loss": 0.0612, "step": 14605 }, { "epoch": 2.36649384316267, "grad_norm": 0.9529529213905334, "learning_rate": 5.622825927624226e-07, "loss": 0.0685, "step": 14606 }, { "epoch": 2.366655865197667, "grad_norm": 0.8908757567405701, "learning_rate": 5.620063098137668e-07, "loss": 0.0539, "step": 14607 }, { "epoch": 2.3668178872326635, "grad_norm": 0.897894561290741, "learning_rate": 5.617300861630276e-07, "loss": 0.0623, "step": 14608 }, { "epoch": 2.3669799092676604, "grad_norm": 1.1129932403564453, "learning_rate": 5.61453921818658e-07, "loss": 0.0598, "step": 14609 }, { "epoch": 2.3671419313026574, "grad_norm": 0.9506595730781555, "learning_rate": 5.611778167891077e-07, "loss": 0.0658, "step": 14610 }, { "epoch": 2.367303953337654, "grad_norm": 1.14960777759552, "learning_rate": 5.609017710828238e-07, "loss": 0.0709, "step": 14611 }, { "epoch": 2.367465975372651, "grad_norm": 0.8241292834281921, "learning_rate": 5.60625784708253e-07, "loss": 0.0601, "step": 14612 }, { "epoch": 2.3676279974076473, "grad_norm": 0.987915575504303, "learning_rate": 5.6034985767384e-07, "loss": 0.0651, "step": 14613 }, { "epoch": 2.3677900194426442, "grad_norm": 0.8549913167953491, "learning_rate": 5.600739899880275e-07, "loss": 0.0588, "step": 14614 }, { "epoch": 2.3679520414776407, "grad_norm": 0.9886636137962341, "learning_rate": 5.597981816592565e-07, "loss": 0.0615, "step": 14615 }, { "epoch": 2.3681140635126376, "grad_norm": 0.8965216279029846, "learning_rate": 5.595224326959662e-07, "loss": 0.059, "step": 14616 }, { "epoch": 2.3682760855476346, "grad_norm": 0.8499079346656799, "learning_rate": 5.592467431065937e-07, "loss": 0.0602, "step": 14617 }, { "epoch": 2.368438107582631, "grad_norm": 0.9334196448326111, "learning_rate": 5.589711128995734e-07, "loss": 0.0587, "step": 14618 }, { "epoch": 2.368600129617628, "grad_norm": 0.8634200692176819, "learning_rate": 5.586955420833404e-07, "loss": 0.0553, "step": 14619 }, { "epoch": 2.368762151652625, "grad_norm": 0.9879969954490662, "learning_rate": 5.584200306663259e-07, "loss": 0.0591, "step": 14620 }, { "epoch": 2.3689241736876214, "grad_norm": 0.8652029037475586, "learning_rate": 5.581445786569606e-07, "loss": 0.0591, "step": 14621 }, { "epoch": 2.3690861957226184, "grad_norm": 1.0487457513809204, "learning_rate": 5.578691860636706e-07, "loss": 0.0652, "step": 14622 }, { "epoch": 2.369248217757615, "grad_norm": 0.9185906648635864, "learning_rate": 5.575938528948843e-07, "loss": 0.0608, "step": 14623 }, { "epoch": 2.369410239792612, "grad_norm": 0.8492238521575928, "learning_rate": 5.573185791590266e-07, "loss": 0.0612, "step": 14624 }, { "epoch": 2.3695722618276087, "grad_norm": 0.8446729779243469, "learning_rate": 5.570433648645182e-07, "loss": 0.0578, "step": 14625 }, { "epoch": 2.369734283862605, "grad_norm": 0.8767587542533875, "learning_rate": 5.567682100197808e-07, "loss": 0.0626, "step": 14626 }, { "epoch": 2.369896305897602, "grad_norm": 0.8166986703872681, "learning_rate": 5.564931146332334e-07, "loss": 0.0594, "step": 14627 }, { "epoch": 2.3700583279325986, "grad_norm": 0.84421306848526, "learning_rate": 5.562180787132945e-07, "loss": 0.06, "step": 14628 }, { "epoch": 2.3702203499675956, "grad_norm": 0.8665793538093567, "learning_rate": 5.559431022683779e-07, "loss": 0.046, "step": 14629 }, { "epoch": 2.3703823720025925, "grad_norm": 0.7502251267433167, "learning_rate": 5.55668185306898e-07, "loss": 0.0541, "step": 14630 }, { "epoch": 2.370544394037589, "grad_norm": 0.9786513447761536, "learning_rate": 5.553933278372664e-07, "loss": 0.0676, "step": 14631 }, { "epoch": 2.370706416072586, "grad_norm": 0.8539317846298218, "learning_rate": 5.551185298678929e-07, "loss": 0.059, "step": 14632 }, { "epoch": 2.370868438107583, "grad_norm": 0.9358149766921997, "learning_rate": 5.548437914071861e-07, "loss": 0.0671, "step": 14633 }, { "epoch": 2.3710304601425793, "grad_norm": 0.7582257986068726, "learning_rate": 5.545691124635518e-07, "loss": 0.0528, "step": 14634 }, { "epoch": 2.3711924821775763, "grad_norm": 0.9727949500083923, "learning_rate": 5.542944930453958e-07, "loss": 0.0642, "step": 14635 }, { "epoch": 2.3713545042125728, "grad_norm": 0.9191507697105408, "learning_rate": 5.54019933161119e-07, "loss": 0.0564, "step": 14636 }, { "epoch": 2.3715165262475697, "grad_norm": 0.9941038489341736, "learning_rate": 5.537454328191225e-07, "loss": 0.0654, "step": 14637 }, { "epoch": 2.371678548282566, "grad_norm": 1.0261110067367554, "learning_rate": 5.534709920278064e-07, "loss": 0.0663, "step": 14638 }, { "epoch": 2.371840570317563, "grad_norm": 1.0194072723388672, "learning_rate": 5.531966107955683e-07, "loss": 0.058, "step": 14639 }, { "epoch": 2.37200259235256, "grad_norm": 0.9304824471473694, "learning_rate": 5.52922289130802e-07, "loss": 0.0633, "step": 14640 }, { "epoch": 2.3721646143875565, "grad_norm": 0.9960220456123352, "learning_rate": 5.526480270419018e-07, "loss": 0.0677, "step": 14641 }, { "epoch": 2.3723266364225535, "grad_norm": 0.8424884080886841, "learning_rate": 5.523738245372596e-07, "loss": 0.06, "step": 14642 }, { "epoch": 2.3724886584575504, "grad_norm": 0.851523220539093, "learning_rate": 5.52099681625265e-07, "loss": 0.0618, "step": 14643 }, { "epoch": 2.372650680492547, "grad_norm": 0.8086889386177063, "learning_rate": 5.518255983143061e-07, "loss": 0.0611, "step": 14644 }, { "epoch": 2.372812702527544, "grad_norm": 0.8051342368125916, "learning_rate": 5.515515746127697e-07, "loss": 0.0589, "step": 14645 }, { "epoch": 2.3729747245625403, "grad_norm": 0.9982341527938843, "learning_rate": 5.512776105290402e-07, "loss": 0.0722, "step": 14646 }, { "epoch": 2.3731367465975373, "grad_norm": 0.933042049407959, "learning_rate": 5.510037060714995e-07, "loss": 0.0621, "step": 14647 }, { "epoch": 2.373298768632534, "grad_norm": 0.872920572757721, "learning_rate": 5.507298612485293e-07, "loss": 0.0582, "step": 14648 }, { "epoch": 2.3734607906675307, "grad_norm": 0.8864262104034424, "learning_rate": 5.504560760685079e-07, "loss": 0.0587, "step": 14649 }, { "epoch": 2.3736228127025276, "grad_norm": 0.8857452869415283, "learning_rate": 5.501823505398137e-07, "loss": 0.0643, "step": 14650 }, { "epoch": 2.373784834737524, "grad_norm": 0.9040178060531616, "learning_rate": 5.499086846708196e-07, "loss": 0.06, "step": 14651 }, { "epoch": 2.373946856772521, "grad_norm": 0.796747624874115, "learning_rate": 5.496350784699015e-07, "loss": 0.0553, "step": 14652 }, { "epoch": 2.374108878807518, "grad_norm": 1.0335019826889038, "learning_rate": 5.493615319454299e-07, "loss": 0.0642, "step": 14653 }, { "epoch": 2.3742709008425145, "grad_norm": 0.760349690914154, "learning_rate": 5.490880451057759e-07, "loss": 0.0557, "step": 14654 }, { "epoch": 2.3744329228775114, "grad_norm": 0.7830966711044312, "learning_rate": 5.488146179593057e-07, "loss": 0.0559, "step": 14655 }, { "epoch": 2.3745949449125083, "grad_norm": 1.081561803817749, "learning_rate": 5.485412505143858e-07, "loss": 0.0734, "step": 14656 }, { "epoch": 2.374756966947505, "grad_norm": 0.9587316513061523, "learning_rate": 5.482679427793827e-07, "loss": 0.0659, "step": 14657 }, { "epoch": 2.3749189889825018, "grad_norm": 0.8291387557983398, "learning_rate": 5.479946947626566e-07, "loss": 0.0584, "step": 14658 }, { "epoch": 2.3750810110174982, "grad_norm": 0.8340387940406799, "learning_rate": 5.477215064725692e-07, "loss": 0.0572, "step": 14659 }, { "epoch": 2.375243033052495, "grad_norm": 1.00160551071167, "learning_rate": 5.474483779174791e-07, "loss": 0.0646, "step": 14660 }, { "epoch": 2.3754050550874917, "grad_norm": 0.9756659269332886, "learning_rate": 5.471753091057438e-07, "loss": 0.0626, "step": 14661 }, { "epoch": 2.3755670771224886, "grad_norm": 0.8894532322883606, "learning_rate": 5.469023000457183e-07, "loss": 0.0557, "step": 14662 }, { "epoch": 2.3757290991574855, "grad_norm": 1.0041909217834473, "learning_rate": 5.466293507457557e-07, "loss": 0.0645, "step": 14663 }, { "epoch": 2.375891121192482, "grad_norm": 0.9752393364906311, "learning_rate": 5.463564612142083e-07, "loss": 0.0614, "step": 14664 }, { "epoch": 2.376053143227479, "grad_norm": 0.8114173412322998, "learning_rate": 5.460836314594259e-07, "loss": 0.0578, "step": 14665 }, { "epoch": 2.376215165262476, "grad_norm": 0.8527717590332031, "learning_rate": 5.458108614897545e-07, "loss": 0.0601, "step": 14666 }, { "epoch": 2.3763771872974724, "grad_norm": 1.0959402322769165, "learning_rate": 5.455381513135427e-07, "loss": 0.0673, "step": 14667 }, { "epoch": 2.3765392093324693, "grad_norm": 1.0152456760406494, "learning_rate": 5.452655009391341e-07, "loss": 0.0676, "step": 14668 }, { "epoch": 2.376701231367466, "grad_norm": 0.9547176957130432, "learning_rate": 5.449929103748705e-07, "loss": 0.0598, "step": 14669 }, { "epoch": 2.3768632534024627, "grad_norm": 0.8178269863128662, "learning_rate": 5.447203796290918e-07, "loss": 0.0586, "step": 14670 }, { "epoch": 2.3770252754374597, "grad_norm": 1.0196658372879028, "learning_rate": 5.444479087101387e-07, "loss": 0.0674, "step": 14671 }, { "epoch": 2.377187297472456, "grad_norm": 0.8403282761573792, "learning_rate": 5.441754976263478e-07, "loss": 0.0641, "step": 14672 }, { "epoch": 2.377349319507453, "grad_norm": 1.0634124279022217, "learning_rate": 5.43903146386053e-07, "loss": 0.0671, "step": 14673 }, { "epoch": 2.3775113415424496, "grad_norm": 0.9557974338531494, "learning_rate": 5.436308549975883e-07, "loss": 0.0666, "step": 14674 }, { "epoch": 2.3776733635774465, "grad_norm": 0.9498515725135803, "learning_rate": 5.433586234692853e-07, "loss": 0.0594, "step": 14675 }, { "epoch": 2.3778353856124435, "grad_norm": 0.8803273439407349, "learning_rate": 5.430864518094731e-07, "loss": 0.0602, "step": 14676 }, { "epoch": 2.37799740764744, "grad_norm": 0.8781242966651917, "learning_rate": 5.428143400264799e-07, "loss": 0.0583, "step": 14677 }, { "epoch": 2.378159429682437, "grad_norm": 0.9101912975311279, "learning_rate": 5.425422881286319e-07, "loss": 0.0566, "step": 14678 }, { "epoch": 2.378321451717434, "grad_norm": 0.9364922046661377, "learning_rate": 5.422702961242532e-07, "loss": 0.0705, "step": 14679 }, { "epoch": 2.3784834737524303, "grad_norm": 0.9377305507659912, "learning_rate": 5.419983640216647e-07, "loss": 0.0618, "step": 14680 }, { "epoch": 2.3786454957874272, "grad_norm": 0.8949405550956726, "learning_rate": 5.417264918291887e-07, "loss": 0.059, "step": 14681 }, { "epoch": 2.3788075178224237, "grad_norm": 1.1497116088867188, "learning_rate": 5.414546795551429e-07, "loss": 0.0691, "step": 14682 }, { "epoch": 2.3789695398574207, "grad_norm": 0.9060092568397522, "learning_rate": 5.411829272078453e-07, "loss": 0.0627, "step": 14683 }, { "epoch": 2.379131561892417, "grad_norm": 1.0954656600952148, "learning_rate": 5.409112347956089e-07, "loss": 0.0628, "step": 14684 }, { "epoch": 2.379293583927414, "grad_norm": 0.9496614336967468, "learning_rate": 5.406396023267473e-07, "loss": 0.0644, "step": 14685 }, { "epoch": 2.379455605962411, "grad_norm": 0.8147675395011902, "learning_rate": 5.403680298095737e-07, "loss": 0.0566, "step": 14686 }, { "epoch": 2.3796176279974075, "grad_norm": 0.8436890244483948, "learning_rate": 5.400965172523953e-07, "loss": 0.0596, "step": 14687 }, { "epoch": 2.3797796500324044, "grad_norm": 1.066830039024353, "learning_rate": 5.398250646635209e-07, "loss": 0.059, "step": 14688 }, { "epoch": 2.3799416720674014, "grad_norm": 1.006394386291504, "learning_rate": 5.395536720512551e-07, "loss": 0.0612, "step": 14689 }, { "epoch": 2.380103694102398, "grad_norm": 0.9849836230278015, "learning_rate": 5.392823394239042e-07, "loss": 0.0676, "step": 14690 }, { "epoch": 2.380265716137395, "grad_norm": 1.1965252161026, "learning_rate": 5.39011066789768e-07, "loss": 0.0702, "step": 14691 }, { "epoch": 2.3804277381723913, "grad_norm": 0.9115785360336304, "learning_rate": 5.387398541571479e-07, "loss": 0.0634, "step": 14692 }, { "epoch": 2.380589760207388, "grad_norm": 1.0645346641540527, "learning_rate": 5.384687015343418e-07, "loss": 0.0629, "step": 14693 }, { "epoch": 2.380751782242385, "grad_norm": 0.9696865677833557, "learning_rate": 5.381976089296467e-07, "loss": 0.0637, "step": 14694 }, { "epoch": 2.3809138042773816, "grad_norm": 0.9118956327438354, "learning_rate": 5.379265763513574e-07, "loss": 0.0642, "step": 14695 }, { "epoch": 2.3810758263123786, "grad_norm": 0.9417039155960083, "learning_rate": 5.376556038077668e-07, "loss": 0.0617, "step": 14696 }, { "epoch": 2.381237848347375, "grad_norm": 0.9443559050559998, "learning_rate": 5.373846913071659e-07, "loss": 0.0653, "step": 14697 }, { "epoch": 2.381399870382372, "grad_norm": 0.8240593671798706, "learning_rate": 5.371138388578448e-07, "loss": 0.0534, "step": 14698 }, { "epoch": 2.381561892417369, "grad_norm": 1.074074625968933, "learning_rate": 5.368430464680885e-07, "loss": 0.0641, "step": 14699 }, { "epoch": 2.3817239144523654, "grad_norm": 1.0060458183288574, "learning_rate": 5.365723141461851e-07, "loss": 0.0625, "step": 14700 }, { "epoch": 2.3818859364873624, "grad_norm": 0.9213908314704895, "learning_rate": 5.36301641900418e-07, "loss": 0.0614, "step": 14701 }, { "epoch": 2.3820479585223593, "grad_norm": 1.0687966346740723, "learning_rate": 5.360310297390681e-07, "loss": 0.0705, "step": 14702 }, { "epoch": 2.3822099805573558, "grad_norm": 0.8176394104957581, "learning_rate": 5.357604776704159e-07, "loss": 0.0566, "step": 14703 }, { "epoch": 2.3823720025923527, "grad_norm": 0.7344977259635925, "learning_rate": 5.354899857027398e-07, "loss": 0.052, "step": 14704 }, { "epoch": 2.382534024627349, "grad_norm": 0.8186675310134888, "learning_rate": 5.352195538443162e-07, "loss": 0.0598, "step": 14705 }, { "epoch": 2.382696046662346, "grad_norm": 0.8668035864830017, "learning_rate": 5.349491821034192e-07, "loss": 0.0565, "step": 14706 }, { "epoch": 2.3828580686973426, "grad_norm": 0.8794363737106323, "learning_rate": 5.346788704883222e-07, "loss": 0.0653, "step": 14707 }, { "epoch": 2.3830200907323396, "grad_norm": 0.8817334771156311, "learning_rate": 5.344086190072955e-07, "loss": 0.0602, "step": 14708 }, { "epoch": 2.3831821127673365, "grad_norm": 1.023415446281433, "learning_rate": 5.341384276686087e-07, "loss": 0.0543, "step": 14709 }, { "epoch": 2.383344134802333, "grad_norm": 0.9122980237007141, "learning_rate": 5.338682964805286e-07, "loss": 0.0592, "step": 14710 }, { "epoch": 2.38350615683733, "grad_norm": 1.0508270263671875, "learning_rate": 5.335982254513208e-07, "loss": 0.0671, "step": 14711 }, { "epoch": 2.383668178872327, "grad_norm": 0.8803588151931763, "learning_rate": 5.333282145892493e-07, "loss": 0.063, "step": 14712 }, { "epoch": 2.3838302009073233, "grad_norm": 0.9654552340507507, "learning_rate": 5.330582639025739e-07, "loss": 0.0598, "step": 14713 }, { "epoch": 2.3839922229423203, "grad_norm": 0.9189082384109497, "learning_rate": 5.327883733995562e-07, "loss": 0.0534, "step": 14714 }, { "epoch": 2.3841542449773168, "grad_norm": 0.9922522306442261, "learning_rate": 5.325185430884539e-07, "loss": 0.0617, "step": 14715 }, { "epoch": 2.3843162670123137, "grad_norm": 0.873923122882843, "learning_rate": 5.322487729775233e-07, "loss": 0.0668, "step": 14716 }, { "epoch": 2.3844782890473106, "grad_norm": 1.0077275037765503, "learning_rate": 5.319790630750182e-07, "loss": 0.0597, "step": 14717 }, { "epoch": 2.384640311082307, "grad_norm": 0.9447020888328552, "learning_rate": 5.317094133891903e-07, "loss": 0.0616, "step": 14718 }, { "epoch": 2.384802333117304, "grad_norm": 1.0282572507858276, "learning_rate": 5.314398239282926e-07, "loss": 0.072, "step": 14719 }, { "epoch": 2.3849643551523005, "grad_norm": 0.8626378178596497, "learning_rate": 5.311702947005718e-07, "loss": 0.0574, "step": 14720 }, { "epoch": 2.3851263771872975, "grad_norm": 0.8774121403694153, "learning_rate": 5.309008257142754e-07, "loss": 0.0563, "step": 14721 }, { "epoch": 2.3852883992222944, "grad_norm": 0.9489586353302002, "learning_rate": 5.306314169776486e-07, "loss": 0.0613, "step": 14722 }, { "epoch": 2.385450421257291, "grad_norm": 1.0716079473495483, "learning_rate": 5.303620684989347e-07, "loss": 0.0602, "step": 14723 }, { "epoch": 2.385612443292288, "grad_norm": 0.8634665012359619, "learning_rate": 5.30092780286375e-07, "loss": 0.0546, "step": 14724 }, { "epoch": 2.3857744653272843, "grad_norm": 1.048065423965454, "learning_rate": 5.298235523482093e-07, "loss": 0.0685, "step": 14725 }, { "epoch": 2.3859364873622813, "grad_norm": 0.8572198152542114, "learning_rate": 5.295543846926752e-07, "loss": 0.0559, "step": 14726 }, { "epoch": 2.386098509397278, "grad_norm": 0.8316371440887451, "learning_rate": 5.292852773280091e-07, "loss": 0.0603, "step": 14727 }, { "epoch": 2.3862605314322747, "grad_norm": 1.1684722900390625, "learning_rate": 5.290162302624433e-07, "loss": 0.0619, "step": 14728 }, { "epoch": 2.3864225534672716, "grad_norm": 0.960731029510498, "learning_rate": 5.287472435042116e-07, "loss": 0.0612, "step": 14729 }, { "epoch": 2.386584575502268, "grad_norm": 1.0198336839675903, "learning_rate": 5.284783170615446e-07, "loss": 0.0638, "step": 14730 }, { "epoch": 2.386746597537265, "grad_norm": 0.9215663075447083, "learning_rate": 5.282094509426694e-07, "loss": 0.0627, "step": 14731 }, { "epoch": 2.386908619572262, "grad_norm": 0.9803484678268433, "learning_rate": 5.279406451558136e-07, "loss": 0.0699, "step": 14732 }, { "epoch": 2.3870706416072585, "grad_norm": 0.8979873061180115, "learning_rate": 5.27671899709201e-07, "loss": 0.053, "step": 14733 }, { "epoch": 2.3872326636422554, "grad_norm": 1.023978352546692, "learning_rate": 5.274032146110567e-07, "loss": 0.0607, "step": 14734 }, { "epoch": 2.3873946856772523, "grad_norm": 0.7679246664047241, "learning_rate": 5.271345898695995e-07, "loss": 0.053, "step": 14735 }, { "epoch": 2.387556707712249, "grad_norm": 1.0310170650482178, "learning_rate": 5.268660254930499e-07, "loss": 0.062, "step": 14736 }, { "epoch": 2.3877187297472457, "grad_norm": 0.8446293473243713, "learning_rate": 5.265975214896249e-07, "loss": 0.0596, "step": 14737 }, { "epoch": 2.3878807517822422, "grad_norm": 1.072784662246704, "learning_rate": 5.263290778675401e-07, "loss": 0.0659, "step": 14738 }, { "epoch": 2.388042773817239, "grad_norm": 0.93727707862854, "learning_rate": 5.260606946350094e-07, "loss": 0.0634, "step": 14739 }, { "epoch": 2.3882047958522357, "grad_norm": 1.1457421779632568, "learning_rate": 5.257923718002447e-07, "loss": 0.0604, "step": 14740 }, { "epoch": 2.3883668178872326, "grad_norm": 1.0058701038360596, "learning_rate": 5.255241093714561e-07, "loss": 0.0608, "step": 14741 }, { "epoch": 2.3885288399222295, "grad_norm": 0.922507107257843, "learning_rate": 5.252559073568514e-07, "loss": 0.0557, "step": 14742 }, { "epoch": 2.388690861957226, "grad_norm": 0.9533309936523438, "learning_rate": 5.249877657646371e-07, "loss": 0.0567, "step": 14743 }, { "epoch": 2.388852883992223, "grad_norm": 0.8887631297111511, "learning_rate": 5.247196846030178e-07, "loss": 0.061, "step": 14744 }, { "epoch": 2.38901490602722, "grad_norm": 1.0275040864944458, "learning_rate": 5.244516638801966e-07, "loss": 0.0604, "step": 14745 }, { "epoch": 2.3891769280622164, "grad_norm": 0.7640145421028137, "learning_rate": 5.241837036043731e-07, "loss": 0.0586, "step": 14746 }, { "epoch": 2.3893389500972133, "grad_norm": 1.0371057987213135, "learning_rate": 5.239158037837464e-07, "loss": 0.0581, "step": 14747 }, { "epoch": 2.38950097213221, "grad_norm": 0.8980159163475037, "learning_rate": 5.236479644265153e-07, "loss": 0.0616, "step": 14748 }, { "epoch": 2.3896629941672067, "grad_norm": 0.923527181148529, "learning_rate": 5.233801855408733e-07, "loss": 0.0555, "step": 14749 }, { "epoch": 2.3898250162022037, "grad_norm": 0.9246334433555603, "learning_rate": 5.231124671350141e-07, "loss": 0.0641, "step": 14750 }, { "epoch": 2.3899870382372, "grad_norm": 0.7615161538124084, "learning_rate": 5.228448092171295e-07, "loss": 0.0538, "step": 14751 }, { "epoch": 2.390149060272197, "grad_norm": 0.9077004194259644, "learning_rate": 5.225772117954089e-07, "loss": 0.0647, "step": 14752 }, { "epoch": 2.3903110823071936, "grad_norm": 0.9212455749511719, "learning_rate": 5.223096748780407e-07, "loss": 0.0559, "step": 14753 }, { "epoch": 2.3904731043421905, "grad_norm": 0.8712791800498962, "learning_rate": 5.220421984732104e-07, "loss": 0.0621, "step": 14754 }, { "epoch": 2.3906351263771874, "grad_norm": 0.9051830172538757, "learning_rate": 5.217747825891023e-07, "loss": 0.0596, "step": 14755 }, { "epoch": 2.390797148412184, "grad_norm": 0.8532323241233826, "learning_rate": 5.215074272338986e-07, "loss": 0.0616, "step": 14756 }, { "epoch": 2.390959170447181, "grad_norm": 0.9374991059303284, "learning_rate": 5.212401324157795e-07, "loss": 0.0571, "step": 14757 }, { "epoch": 2.391121192482178, "grad_norm": 0.9487468600273132, "learning_rate": 5.20972898142924e-07, "loss": 0.0622, "step": 14758 }, { "epoch": 2.3912832145171743, "grad_norm": 0.8730701208114624, "learning_rate": 5.207057244235089e-07, "loss": 0.0594, "step": 14759 }, { "epoch": 2.3914452365521712, "grad_norm": 0.9014290571212769, "learning_rate": 5.204386112657095e-07, "loss": 0.0632, "step": 14760 }, { "epoch": 2.3916072585871677, "grad_norm": 0.9186999797821045, "learning_rate": 5.201715586776965e-07, "loss": 0.0588, "step": 14761 }, { "epoch": 2.3917692806221647, "grad_norm": 0.8001679182052612, "learning_rate": 5.199045666676436e-07, "loss": 0.0606, "step": 14762 }, { "epoch": 2.391931302657161, "grad_norm": 0.8610774874687195, "learning_rate": 5.196376352437199e-07, "loss": 0.0522, "step": 14763 }, { "epoch": 2.392093324692158, "grad_norm": 0.8187136650085449, "learning_rate": 5.193707644140913e-07, "loss": 0.0517, "step": 14764 }, { "epoch": 2.392255346727155, "grad_norm": 1.040032148361206, "learning_rate": 5.191039541869245e-07, "loss": 0.0672, "step": 14765 }, { "epoch": 2.3924173687621515, "grad_norm": 0.8961792588233948, "learning_rate": 5.188372045703824e-07, "loss": 0.0614, "step": 14766 }, { "epoch": 2.3925793907971484, "grad_norm": 0.7686487436294556, "learning_rate": 5.185705155726287e-07, "loss": 0.0552, "step": 14767 }, { "epoch": 2.3927414128321454, "grad_norm": 0.987948477268219, "learning_rate": 5.183038872018215e-07, "loss": 0.0584, "step": 14768 }, { "epoch": 2.392903434867142, "grad_norm": 0.9744378328323364, "learning_rate": 5.180373194661198e-07, "loss": 0.0644, "step": 14769 }, { "epoch": 2.393065456902139, "grad_norm": 0.9378809928894043, "learning_rate": 5.1777081237368e-07, "loss": 0.0642, "step": 14770 }, { "epoch": 2.3932274789371353, "grad_norm": 1.1186909675598145, "learning_rate": 5.175043659326564e-07, "loss": 0.0673, "step": 14771 }, { "epoch": 2.393389500972132, "grad_norm": 0.9698737263679504, "learning_rate": 5.172379801512014e-07, "loss": 0.0614, "step": 14772 }, { "epoch": 2.393551523007129, "grad_norm": 0.985581636428833, "learning_rate": 5.169716550374665e-07, "loss": 0.0609, "step": 14773 }, { "epoch": 2.3937135450421256, "grad_norm": 0.8977107405662537, "learning_rate": 5.167053905996003e-07, "loss": 0.0643, "step": 14774 }, { "epoch": 2.3938755670771226, "grad_norm": 0.8801769018173218, "learning_rate": 5.164391868457494e-07, "loss": 0.0647, "step": 14775 }, { "epoch": 2.394037589112119, "grad_norm": 1.0398766994476318, "learning_rate": 5.161730437840585e-07, "loss": 0.0658, "step": 14776 }, { "epoch": 2.394199611147116, "grad_norm": 0.9638035297393799, "learning_rate": 5.159069614226723e-07, "loss": 0.0655, "step": 14777 }, { "epoch": 2.394361633182113, "grad_norm": 1.0413271188735962, "learning_rate": 5.15640939769732e-07, "loss": 0.0587, "step": 14778 }, { "epoch": 2.3945236552171094, "grad_norm": 0.9833051562309265, "learning_rate": 5.153749788333767e-07, "loss": 0.0633, "step": 14779 }, { "epoch": 2.3946856772521063, "grad_norm": 0.9395384192466736, "learning_rate": 5.151090786217433e-07, "loss": 0.0647, "step": 14780 }, { "epoch": 2.3948476992871033, "grad_norm": 0.9286221265792847, "learning_rate": 5.148432391429703e-07, "loss": 0.0617, "step": 14781 }, { "epoch": 2.3950097213220998, "grad_norm": 0.8092193603515625, "learning_rate": 5.145774604051895e-07, "loss": 0.0587, "step": 14782 }, { "epoch": 2.3951717433570967, "grad_norm": 0.9903756380081177, "learning_rate": 5.143117424165339e-07, "loss": 0.0611, "step": 14783 }, { "epoch": 2.395333765392093, "grad_norm": 0.8078405261039734, "learning_rate": 5.140460851851336e-07, "loss": 0.0596, "step": 14784 }, { "epoch": 2.39549578742709, "grad_norm": 1.076858639717102, "learning_rate": 5.137804887191172e-07, "loss": 0.0657, "step": 14785 }, { "epoch": 2.3956578094620866, "grad_norm": 1.0425070524215698, "learning_rate": 5.135149530266112e-07, "loss": 0.0613, "step": 14786 }, { "epoch": 2.3958198314970836, "grad_norm": 0.9560631513595581, "learning_rate": 5.132494781157407e-07, "loss": 0.064, "step": 14787 }, { "epoch": 2.3959818535320805, "grad_norm": 0.765007495880127, "learning_rate": 5.129840639946279e-07, "loss": 0.0555, "step": 14788 }, { "epoch": 2.396143875567077, "grad_norm": 0.9489015936851501, "learning_rate": 5.127187106713951e-07, "loss": 0.0601, "step": 14789 }, { "epoch": 2.396305897602074, "grad_norm": 0.8428517580032349, "learning_rate": 5.124534181541596e-07, "loss": 0.0542, "step": 14790 }, { "epoch": 2.396467919637071, "grad_norm": 0.8759004473686218, "learning_rate": 5.1218818645104e-07, "loss": 0.0604, "step": 14791 }, { "epoch": 2.3966299416720673, "grad_norm": 0.9745606184005737, "learning_rate": 5.119230155701515e-07, "loss": 0.0633, "step": 14792 }, { "epoch": 2.3967919637070643, "grad_norm": 0.8945474624633789, "learning_rate": 5.116579055196085e-07, "loss": 0.0614, "step": 14793 }, { "epoch": 2.3969539857420608, "grad_norm": 0.9640982747077942, "learning_rate": 5.113928563075213e-07, "loss": 0.0664, "step": 14794 }, { "epoch": 2.3971160077770577, "grad_norm": 0.8617674112319946, "learning_rate": 5.111278679419996e-07, "loss": 0.0607, "step": 14795 }, { "epoch": 2.3972780298120546, "grad_norm": 1.115388035774231, "learning_rate": 5.108629404311535e-07, "loss": 0.0675, "step": 14796 }, { "epoch": 2.397440051847051, "grad_norm": 0.9158341288566589, "learning_rate": 5.105980737830871e-07, "loss": 0.0631, "step": 14797 }, { "epoch": 2.397602073882048, "grad_norm": 0.93878173828125, "learning_rate": 5.103332680059053e-07, "loss": 0.0581, "step": 14798 }, { "epoch": 2.3977640959170445, "grad_norm": 0.8788214921951294, "learning_rate": 5.100685231077107e-07, "loss": 0.061, "step": 14799 }, { "epoch": 2.3979261179520415, "grad_norm": 0.8120740056037903, "learning_rate": 5.098038390966039e-07, "loss": 0.0494, "step": 14800 }, { "epoch": 2.3980881399870384, "grad_norm": 0.9346016645431519, "learning_rate": 5.095392159806833e-07, "loss": 0.0663, "step": 14801 }, { "epoch": 2.398250162022035, "grad_norm": 1.0751339197158813, "learning_rate": 5.09274653768046e-07, "loss": 0.0646, "step": 14802 }, { "epoch": 2.398412184057032, "grad_norm": 0.9000687003135681, "learning_rate": 5.090101524667865e-07, "loss": 0.0632, "step": 14803 }, { "epoch": 2.3985742060920288, "grad_norm": 0.83973628282547, "learning_rate": 5.087457120849984e-07, "loss": 0.0593, "step": 14804 }, { "epoch": 2.3987362281270252, "grad_norm": 0.8891993165016174, "learning_rate": 5.084813326307728e-07, "loss": 0.0607, "step": 14805 }, { "epoch": 2.398898250162022, "grad_norm": 0.8002360463142395, "learning_rate": 5.082170141121992e-07, "loss": 0.0532, "step": 14806 }, { "epoch": 2.3990602721970187, "grad_norm": 0.980747401714325, "learning_rate": 5.079527565373654e-07, "loss": 0.0631, "step": 14807 }, { "epoch": 2.3992222942320156, "grad_norm": 0.9085080027580261, "learning_rate": 5.076885599143558e-07, "loss": 0.0628, "step": 14808 }, { "epoch": 2.399384316267012, "grad_norm": 0.9033792614936829, "learning_rate": 5.074244242512546e-07, "loss": 0.0585, "step": 14809 }, { "epoch": 2.399546338302009, "grad_norm": 0.9455671310424805, "learning_rate": 5.071603495561444e-07, "loss": 0.0657, "step": 14810 }, { "epoch": 2.399708360337006, "grad_norm": 1.0358432531356812, "learning_rate": 5.068963358371059e-07, "loss": 0.064, "step": 14811 }, { "epoch": 2.3998703823720025, "grad_norm": 0.968040943145752, "learning_rate": 5.066323831022155e-07, "loss": 0.0686, "step": 14812 }, { "epoch": 2.4000324044069994, "grad_norm": 0.8224153518676758, "learning_rate": 5.063684913595504e-07, "loss": 0.057, "step": 14813 }, { "epoch": 2.4001944264419963, "grad_norm": 0.9839720726013184, "learning_rate": 5.061046606171849e-07, "loss": 0.0606, "step": 14814 }, { "epoch": 2.400356448476993, "grad_norm": 0.8918019533157349, "learning_rate": 5.058408908831919e-07, "loss": 0.0628, "step": 14815 }, { "epoch": 2.4005184705119897, "grad_norm": 0.9705418348312378, "learning_rate": 5.055771821656416e-07, "loss": 0.0527, "step": 14816 }, { "epoch": 2.4006804925469862, "grad_norm": 1.0187605619430542, "learning_rate": 5.053135344726032e-07, "loss": 0.0579, "step": 14817 }, { "epoch": 2.400842514581983, "grad_norm": 0.8986850380897522, "learning_rate": 5.05049947812144e-07, "loss": 0.0617, "step": 14818 }, { "epoch": 2.40100453661698, "grad_norm": 0.9489282965660095, "learning_rate": 5.047864221923276e-07, "loss": 0.0595, "step": 14819 }, { "epoch": 2.4011665586519766, "grad_norm": 0.8312699794769287, "learning_rate": 5.045229576212191e-07, "loss": 0.0587, "step": 14820 }, { "epoch": 2.4013285806869735, "grad_norm": 0.8826655149459839, "learning_rate": 5.04259554106879e-07, "loss": 0.062, "step": 14821 }, { "epoch": 2.40149060272197, "grad_norm": 1.0575923919677734, "learning_rate": 5.039962116573676e-07, "loss": 0.0738, "step": 14822 }, { "epoch": 2.401652624756967, "grad_norm": 0.8363692164421082, "learning_rate": 5.037329302807409e-07, "loss": 0.0583, "step": 14823 }, { "epoch": 2.401814646791964, "grad_norm": 0.9132293462753296, "learning_rate": 5.034697099850557e-07, "loss": 0.066, "step": 14824 }, { "epoch": 2.4019766688269604, "grad_norm": 0.8294099569320679, "learning_rate": 5.032065507783671e-07, "loss": 0.0549, "step": 14825 }, { "epoch": 2.4021386908619573, "grad_norm": 0.8338202834129333, "learning_rate": 5.029434526687249e-07, "loss": 0.0594, "step": 14826 }, { "epoch": 2.402300712896954, "grad_norm": 0.9756371378898621, "learning_rate": 5.026804156641804e-07, "loss": 0.0635, "step": 14827 }, { "epoch": 2.4024627349319507, "grad_norm": 0.9361462593078613, "learning_rate": 5.02417439772781e-07, "loss": 0.0661, "step": 14828 }, { "epoch": 2.4026247569669477, "grad_norm": 0.8787729144096375, "learning_rate": 5.021545250025755e-07, "loss": 0.0638, "step": 14829 }, { "epoch": 2.402786779001944, "grad_norm": 0.8789749145507812, "learning_rate": 5.01891671361606e-07, "loss": 0.052, "step": 14830 }, { "epoch": 2.402948801036941, "grad_norm": 0.8396589756011963, "learning_rate": 5.01628878857916e-07, "loss": 0.0594, "step": 14831 }, { "epoch": 2.4031108230719376, "grad_norm": 0.9573028683662415, "learning_rate": 5.013661474995463e-07, "loss": 0.0675, "step": 14832 }, { "epoch": 2.4032728451069345, "grad_norm": 0.9700629115104675, "learning_rate": 5.011034772945359e-07, "loss": 0.0628, "step": 14833 }, { "epoch": 2.4034348671419314, "grad_norm": 1.1622406244277954, "learning_rate": 5.008408682509219e-07, "loss": 0.0643, "step": 14834 }, { "epoch": 2.403596889176928, "grad_norm": 0.8823803067207336, "learning_rate": 5.005783203767394e-07, "loss": 0.0606, "step": 14835 }, { "epoch": 2.403758911211925, "grad_norm": 0.8871647715568542, "learning_rate": 5.003158336800218e-07, "loss": 0.0567, "step": 14836 }, { "epoch": 2.403920933246922, "grad_norm": 0.9583814740180969, "learning_rate": 5.000534081688013e-07, "loss": 0.0589, "step": 14837 }, { "epoch": 2.4040829552819183, "grad_norm": 1.0450133085250854, "learning_rate": 4.997910438511052e-07, "loss": 0.0643, "step": 14838 }, { "epoch": 2.404244977316915, "grad_norm": 1.106645107269287, "learning_rate": 4.995287407349636e-07, "loss": 0.069, "step": 14839 }, { "epoch": 2.4044069993519117, "grad_norm": 0.9293127059936523, "learning_rate": 4.992664988284021e-07, "loss": 0.0619, "step": 14840 }, { "epoch": 2.4045690213869086, "grad_norm": 0.8363195061683655, "learning_rate": 4.990043181394433e-07, "loss": 0.0611, "step": 14841 }, { "epoch": 2.404731043421905, "grad_norm": 0.9512073993682861, "learning_rate": 4.987421986761101e-07, "loss": 0.0591, "step": 14842 }, { "epoch": 2.404893065456902, "grad_norm": 0.8879616260528564, "learning_rate": 4.984801404464229e-07, "loss": 0.0593, "step": 14843 }, { "epoch": 2.405055087491899, "grad_norm": 0.8494080901145935, "learning_rate": 4.982181434583996e-07, "loss": 0.0583, "step": 14844 }, { "epoch": 2.4052171095268955, "grad_norm": 0.8918368816375732, "learning_rate": 4.97956207720057e-07, "loss": 0.0637, "step": 14845 }, { "epoch": 2.4053791315618924, "grad_norm": 0.982917070388794, "learning_rate": 4.976943332394093e-07, "loss": 0.0662, "step": 14846 }, { "epoch": 2.4055411535968894, "grad_norm": 0.9451669454574585, "learning_rate": 4.974325200244698e-07, "loss": 0.0609, "step": 14847 }, { "epoch": 2.405703175631886, "grad_norm": 0.956188976764679, "learning_rate": 4.971707680832491e-07, "loss": 0.0558, "step": 14848 }, { "epoch": 2.405865197666883, "grad_norm": 0.9561905860900879, "learning_rate": 4.969090774237559e-07, "loss": 0.0601, "step": 14849 }, { "epoch": 2.4060272197018793, "grad_norm": 1.0489095449447632, "learning_rate": 4.966474480539976e-07, "loss": 0.064, "step": 14850 }, { "epoch": 2.406189241736876, "grad_norm": 0.8939489126205444, "learning_rate": 4.963858799819802e-07, "loss": 0.0586, "step": 14851 }, { "epoch": 2.406351263771873, "grad_norm": 0.8735487461090088, "learning_rate": 4.961243732157048e-07, "loss": 0.065, "step": 14852 }, { "epoch": 2.4065132858068696, "grad_norm": 0.900790810585022, "learning_rate": 4.958629277631749e-07, "loss": 0.0603, "step": 14853 }, { "epoch": 2.4066753078418666, "grad_norm": 0.991608738899231, "learning_rate": 4.956015436323897e-07, "loss": 0.0592, "step": 14854 }, { "epoch": 2.406837329876863, "grad_norm": 0.8543813228607178, "learning_rate": 4.953402208313471e-07, "loss": 0.0651, "step": 14855 }, { "epoch": 2.40699935191186, "grad_norm": 0.8871439695358276, "learning_rate": 4.950789593680422e-07, "loss": 0.0639, "step": 14856 }, { "epoch": 2.407161373946857, "grad_norm": 0.9490445852279663, "learning_rate": 4.948177592504682e-07, "loss": 0.0669, "step": 14857 }, { "epoch": 2.4073233959818534, "grad_norm": 0.8415353894233704, "learning_rate": 4.945566204866201e-07, "loss": 0.0585, "step": 14858 }, { "epoch": 2.4074854180168503, "grad_norm": 1.0602666139602661, "learning_rate": 4.942955430844856e-07, "loss": 0.0623, "step": 14859 }, { "epoch": 2.4076474400518473, "grad_norm": 0.8117191791534424, "learning_rate": 4.940345270520536e-07, "loss": 0.0591, "step": 14860 }, { "epoch": 2.4078094620868438, "grad_norm": 0.9509507417678833, "learning_rate": 4.937735723973109e-07, "loss": 0.065, "step": 14861 }, { "epoch": 2.4079714841218407, "grad_norm": 0.9605535268783569, "learning_rate": 4.935126791282419e-07, "loss": 0.0678, "step": 14862 }, { "epoch": 2.408133506156837, "grad_norm": 0.9226017594337463, "learning_rate": 4.932518472528292e-07, "loss": 0.0626, "step": 14863 }, { "epoch": 2.408295528191834, "grad_norm": 0.8513705730438232, "learning_rate": 4.929910767790536e-07, "loss": 0.0593, "step": 14864 }, { "epoch": 2.4084575502268306, "grad_norm": 0.7933249473571777, "learning_rate": 4.927303677148942e-07, "loss": 0.054, "step": 14865 }, { "epoch": 2.4086195722618275, "grad_norm": 0.8183956146240234, "learning_rate": 4.92469720068329e-07, "loss": 0.0534, "step": 14866 }, { "epoch": 2.4087815942968245, "grad_norm": 1.0017709732055664, "learning_rate": 4.922091338473309e-07, "loss": 0.0691, "step": 14867 }, { "epoch": 2.408943616331821, "grad_norm": 1.0862891674041748, "learning_rate": 4.919486090598749e-07, "loss": 0.0652, "step": 14868 }, { "epoch": 2.409105638366818, "grad_norm": 0.8221504092216492, "learning_rate": 4.91688145713933e-07, "loss": 0.0558, "step": 14869 }, { "epoch": 2.409267660401815, "grad_norm": 1.0607209205627441, "learning_rate": 4.91427743817473e-07, "loss": 0.0589, "step": 14870 }, { "epoch": 2.4094296824368113, "grad_norm": 0.8373808264732361, "learning_rate": 4.911674033784628e-07, "loss": 0.0584, "step": 14871 }, { "epoch": 2.4095917044718083, "grad_norm": 0.8616558909416199, "learning_rate": 4.909071244048694e-07, "loss": 0.0557, "step": 14872 }, { "epoch": 2.4097537265068047, "grad_norm": 0.8790077567100525, "learning_rate": 4.906469069046568e-07, "loss": 0.0644, "step": 14873 }, { "epoch": 2.4099157485418017, "grad_norm": 0.9295965433120728, "learning_rate": 4.903867508857857e-07, "loss": 0.0584, "step": 14874 }, { "epoch": 2.4100777705767986, "grad_norm": 0.8102318048477173, "learning_rate": 4.901266563562168e-07, "loss": 0.0586, "step": 14875 }, { "epoch": 2.410239792611795, "grad_norm": 0.8939180970191956, "learning_rate": 4.898666233239083e-07, "loss": 0.0602, "step": 14876 }, { "epoch": 2.410401814646792, "grad_norm": 0.8693578839302063, "learning_rate": 4.896066517968167e-07, "loss": 0.0623, "step": 14877 }, { "epoch": 2.4105638366817885, "grad_norm": 0.9918479323387146, "learning_rate": 4.893467417828967e-07, "loss": 0.0659, "step": 14878 }, { "epoch": 2.4107258587167855, "grad_norm": 0.8719764351844788, "learning_rate": 4.890868932901005e-07, "loss": 0.0604, "step": 14879 }, { "epoch": 2.4108878807517824, "grad_norm": 0.8317460417747498, "learning_rate": 4.888271063263791e-07, "loss": 0.0602, "step": 14880 }, { "epoch": 2.411049902786779, "grad_norm": 1.0084984302520752, "learning_rate": 4.885673808996816e-07, "loss": 0.0651, "step": 14881 }, { "epoch": 2.411211924821776, "grad_norm": 0.9814823269844055, "learning_rate": 4.883077170179542e-07, "loss": 0.0678, "step": 14882 }, { "epoch": 2.4113739468567728, "grad_norm": 0.9129720330238342, "learning_rate": 4.880481146891428e-07, "loss": 0.0585, "step": 14883 }, { "epoch": 2.4115359688917692, "grad_norm": 0.827803909778595, "learning_rate": 4.877885739211907e-07, "loss": 0.0594, "step": 14884 }, { "epoch": 2.411697990926766, "grad_norm": 1.2343494892120361, "learning_rate": 4.875290947220382e-07, "loss": 0.0581, "step": 14885 }, { "epoch": 2.4118600129617627, "grad_norm": 0.9479061961174011, "learning_rate": 4.872696770996246e-07, "loss": 0.0645, "step": 14886 }, { "epoch": 2.4120220349967596, "grad_norm": 0.9281307458877563, "learning_rate": 4.870103210618895e-07, "loss": 0.0629, "step": 14887 }, { "epoch": 2.412184057031756, "grad_norm": 0.9015679359436035, "learning_rate": 4.867510266167669e-07, "loss": 0.0598, "step": 14888 }, { "epoch": 2.412346079066753, "grad_norm": 1.0281862020492554, "learning_rate": 4.864917937721905e-07, "loss": 0.0627, "step": 14889 }, { "epoch": 2.41250810110175, "grad_norm": 0.7588191032409668, "learning_rate": 4.862326225360927e-07, "loss": 0.0536, "step": 14890 }, { "epoch": 2.4126701231367464, "grad_norm": 0.9489138722419739, "learning_rate": 4.859735129164036e-07, "loss": 0.0613, "step": 14891 }, { "epoch": 2.4128321451717434, "grad_norm": 0.9236519932746887, "learning_rate": 4.85714464921051e-07, "loss": 0.0605, "step": 14892 }, { "epoch": 2.4129941672067403, "grad_norm": 0.9763721823692322, "learning_rate": 4.854554785579613e-07, "loss": 0.0568, "step": 14893 }, { "epoch": 2.413156189241737, "grad_norm": 0.9076926112174988, "learning_rate": 4.851965538350589e-07, "loss": 0.0613, "step": 14894 }, { "epoch": 2.4133182112767337, "grad_norm": 0.8788256645202637, "learning_rate": 4.849376907602662e-07, "loss": 0.0606, "step": 14895 }, { "epoch": 2.4134802333117302, "grad_norm": 0.9264197945594788, "learning_rate": 4.846788893415038e-07, "loss": 0.0653, "step": 14896 }, { "epoch": 2.413642255346727, "grad_norm": 0.8280066847801208, "learning_rate": 4.844201495866904e-07, "loss": 0.0616, "step": 14897 }, { "epoch": 2.413804277381724, "grad_norm": 0.9356870055198669, "learning_rate": 4.841614715037429e-07, "loss": 0.0621, "step": 14898 }, { "epoch": 2.4139662994167206, "grad_norm": 0.9848315715789795, "learning_rate": 4.839028551005767e-07, "loss": 0.0644, "step": 14899 }, { "epoch": 2.4141283214517175, "grad_norm": 0.9987033605575562, "learning_rate": 4.83644300385103e-07, "loss": 0.0686, "step": 14900 }, { "epoch": 2.414290343486714, "grad_norm": 0.8517066836357117, "learning_rate": 4.833858073652351e-07, "loss": 0.0565, "step": 14901 }, { "epoch": 2.414452365521711, "grad_norm": 0.9564343094825745, "learning_rate": 4.831273760488816e-07, "loss": 0.0659, "step": 14902 }, { "epoch": 2.414614387556708, "grad_norm": 0.9652625322341919, "learning_rate": 4.828690064439492e-07, "loss": 0.0632, "step": 14903 }, { "epoch": 2.4147764095917044, "grad_norm": 0.9745001792907715, "learning_rate": 4.82610698558344e-07, "loss": 0.0628, "step": 14904 }, { "epoch": 2.4149384316267013, "grad_norm": 0.9366600513458252, "learning_rate": 4.823524523999685e-07, "loss": 0.0614, "step": 14905 }, { "epoch": 2.4151004536616982, "grad_norm": 0.8678472638130188, "learning_rate": 4.820942679767268e-07, "loss": 0.0553, "step": 14906 }, { "epoch": 2.4152624756966947, "grad_norm": 1.0045222043991089, "learning_rate": 4.818361452965165e-07, "loss": 0.0626, "step": 14907 }, { "epoch": 2.4154244977316917, "grad_norm": 0.8525367379188538, "learning_rate": 4.815780843672366e-07, "loss": 0.0577, "step": 14908 }, { "epoch": 2.415586519766688, "grad_norm": 0.8319639563560486, "learning_rate": 4.813200851967826e-07, "loss": 0.052, "step": 14909 }, { "epoch": 2.415748541801685, "grad_norm": 0.8100563287734985, "learning_rate": 4.810621477930488e-07, "loss": 0.0592, "step": 14910 }, { "epoch": 2.4159105638366816, "grad_norm": 0.8080427050590515, "learning_rate": 4.808042721639275e-07, "loss": 0.0556, "step": 14911 }, { "epoch": 2.4160725858716785, "grad_norm": 1.0545318126678467, "learning_rate": 4.805464583173094e-07, "loss": 0.0604, "step": 14912 }, { "epoch": 2.4162346079066754, "grad_norm": 1.0038424730300903, "learning_rate": 4.802887062610831e-07, "loss": 0.064, "step": 14913 }, { "epoch": 2.416396629941672, "grad_norm": 0.9150903224945068, "learning_rate": 4.800310160031335e-07, "loss": 0.0612, "step": 14914 }, { "epoch": 2.416558651976669, "grad_norm": 1.0144057273864746, "learning_rate": 4.797733875513475e-07, "loss": 0.0604, "step": 14915 }, { "epoch": 2.416720674011666, "grad_norm": 0.8084055781364441, "learning_rate": 4.795158209136067e-07, "loss": 0.0516, "step": 14916 }, { "epoch": 2.4168826960466623, "grad_norm": 1.1354771852493286, "learning_rate": 4.792583160977929e-07, "loss": 0.0603, "step": 14917 }, { "epoch": 2.417044718081659, "grad_norm": 0.9197603464126587, "learning_rate": 4.79000873111784e-07, "loss": 0.0604, "step": 14918 }, { "epoch": 2.4172067401166557, "grad_norm": 0.8190104961395264, "learning_rate": 4.787434919634571e-07, "loss": 0.0541, "step": 14919 }, { "epoch": 2.4173687621516526, "grad_norm": 0.9333441853523254, "learning_rate": 4.784861726606893e-07, "loss": 0.0566, "step": 14920 }, { "epoch": 2.4175307841866496, "grad_norm": 0.7886890172958374, "learning_rate": 4.782289152113518e-07, "loss": 0.0567, "step": 14921 }, { "epoch": 2.417692806221646, "grad_norm": 0.9538326859474182, "learning_rate": 4.779717196233169e-07, "loss": 0.0607, "step": 14922 }, { "epoch": 2.417854828256643, "grad_norm": 0.8618488311767578, "learning_rate": 4.777145859044543e-07, "loss": 0.0589, "step": 14923 }, { "epoch": 2.4180168502916395, "grad_norm": 0.9344967007637024, "learning_rate": 4.774575140626317e-07, "loss": 0.0666, "step": 14924 }, { "epoch": 2.4181788723266364, "grad_norm": 1.032467007637024, "learning_rate": 4.772005041057146e-07, "loss": 0.0656, "step": 14925 }, { "epoch": 2.4183408943616334, "grad_norm": 0.7819418907165527, "learning_rate": 4.769435560415666e-07, "loss": 0.0565, "step": 14926 }, { "epoch": 2.41850291639663, "grad_norm": 0.8860273361206055, "learning_rate": 4.766866698780506e-07, "loss": 0.062, "step": 14927 }, { "epoch": 2.4186649384316268, "grad_norm": 1.2574406862258911, "learning_rate": 4.764298456230265e-07, "loss": 0.059, "step": 14928 }, { "epoch": 2.4188269604666237, "grad_norm": 0.9362139105796814, "learning_rate": 4.7617308328435115e-07, "loss": 0.0599, "step": 14929 }, { "epoch": 2.41898898250162, "grad_norm": 1.0488052368164062, "learning_rate": 4.7591638286988234e-07, "loss": 0.0639, "step": 14930 }, { "epoch": 2.419151004536617, "grad_norm": 0.9460102915763855, "learning_rate": 4.756597443874747e-07, "loss": 0.0642, "step": 14931 }, { "epoch": 2.4193130265716136, "grad_norm": 0.952669084072113, "learning_rate": 4.754031678449794e-07, "loss": 0.0558, "step": 14932 }, { "epoch": 2.4194750486066106, "grad_norm": 0.9806849360466003, "learning_rate": 4.75146653250248e-07, "loss": 0.066, "step": 14933 }, { "epoch": 2.419637070641607, "grad_norm": 0.9641299247741699, "learning_rate": 4.7489020061112805e-07, "loss": 0.0566, "step": 14934 }, { "epoch": 2.419799092676604, "grad_norm": 0.7902235984802246, "learning_rate": 4.746338099354686e-07, "loss": 0.0521, "step": 14935 }, { "epoch": 2.419961114711601, "grad_norm": 0.8686903715133667, "learning_rate": 4.743774812311125e-07, "loss": 0.059, "step": 14936 }, { "epoch": 2.4201231367465974, "grad_norm": 0.8258570432662964, "learning_rate": 4.7412121450590374e-07, "loss": 0.0544, "step": 14937 }, { "epoch": 2.4202851587815943, "grad_norm": 0.9893523454666138, "learning_rate": 4.7386500976768337e-07, "loss": 0.0602, "step": 14938 }, { "epoch": 2.4204471808165913, "grad_norm": 0.9651594758033752, "learning_rate": 4.7360886702429056e-07, "loss": 0.0649, "step": 14939 }, { "epoch": 2.4206092028515878, "grad_norm": 0.8819870352745056, "learning_rate": 4.733527862835624e-07, "loss": 0.0593, "step": 14940 }, { "epoch": 2.4207712248865847, "grad_norm": 0.9650877714157104, "learning_rate": 4.730967675533346e-07, "loss": 0.0641, "step": 14941 }, { "epoch": 2.420933246921581, "grad_norm": 0.8652887344360352, "learning_rate": 4.728408108414409e-07, "loss": 0.0628, "step": 14942 }, { "epoch": 2.421095268956578, "grad_norm": 1.117307186126709, "learning_rate": 4.7258491615571277e-07, "loss": 0.0695, "step": 14943 }, { "epoch": 2.4212572909915746, "grad_norm": 0.9765766859054565, "learning_rate": 4.7232908350397984e-07, "loss": 0.0574, "step": 14944 }, { "epoch": 2.4214193130265715, "grad_norm": 0.8039732575416565, "learning_rate": 4.720733128940699e-07, "loss": 0.0507, "step": 14945 }, { "epoch": 2.4215813350615685, "grad_norm": 0.8668411374092102, "learning_rate": 4.7181760433381017e-07, "loss": 0.0637, "step": 14946 }, { "epoch": 2.421743357096565, "grad_norm": 0.9708417654037476, "learning_rate": 4.715619578310227e-07, "loss": 0.0674, "step": 14947 }, { "epoch": 2.421905379131562, "grad_norm": 0.8565574884414673, "learning_rate": 4.7130637339352995e-07, "loss": 0.0617, "step": 14948 }, { "epoch": 2.422067401166559, "grad_norm": 0.8651789426803589, "learning_rate": 4.7105085102915365e-07, "loss": 0.0635, "step": 14949 }, { "epoch": 2.4222294232015553, "grad_norm": 0.9995070695877075, "learning_rate": 4.707953907457119e-07, "loss": 0.0622, "step": 14950 }, { "epoch": 2.4223914452365523, "grad_norm": 0.948974072933197, "learning_rate": 4.7053999255101987e-07, "loss": 0.0637, "step": 14951 }, { "epoch": 2.4225534672715487, "grad_norm": 0.8724423050880432, "learning_rate": 4.702846564528929e-07, "loss": 0.0648, "step": 14952 }, { "epoch": 2.4227154893065457, "grad_norm": 0.8870061039924622, "learning_rate": 4.700293824591437e-07, "loss": 0.0611, "step": 14953 }, { "epoch": 2.4228775113415426, "grad_norm": 0.7316666841506958, "learning_rate": 4.6977417057758297e-07, "loss": 0.0513, "step": 14954 }, { "epoch": 2.423039533376539, "grad_norm": 0.8627529740333557, "learning_rate": 4.695190208160197e-07, "loss": 0.0617, "step": 14955 }, { "epoch": 2.423201555411536, "grad_norm": 0.8494905829429626, "learning_rate": 4.6926393318226045e-07, "loss": 0.057, "step": 14956 }, { "epoch": 2.4233635774465325, "grad_norm": 1.1695611476898193, "learning_rate": 4.6900890768411145e-07, "loss": 0.0733, "step": 14957 }, { "epoch": 2.4235255994815295, "grad_norm": 0.8258056640625, "learning_rate": 4.6875394432937345e-07, "loss": 0.0581, "step": 14958 }, { "epoch": 2.4236876215165264, "grad_norm": 0.8477557897567749, "learning_rate": 4.684990431258499e-07, "loss": 0.0589, "step": 14959 }, { "epoch": 2.423849643551523, "grad_norm": 0.9261993765830994, "learning_rate": 4.6824420408133953e-07, "loss": 0.0632, "step": 14960 }, { "epoch": 2.42401166558652, "grad_norm": 1.014751672744751, "learning_rate": 4.6798942720364063e-07, "loss": 0.0599, "step": 14961 }, { "epoch": 2.4241736876215167, "grad_norm": 1.0292463302612305, "learning_rate": 4.677347125005463e-07, "loss": 0.0717, "step": 14962 }, { "epoch": 2.4243357096565132, "grad_norm": 0.988088846206665, "learning_rate": 4.6748005997985264e-07, "loss": 0.0617, "step": 14963 }, { "epoch": 2.42449773169151, "grad_norm": 1.0865875482559204, "learning_rate": 4.6722546964935114e-07, "loss": 0.0694, "step": 14964 }, { "epoch": 2.4246597537265067, "grad_norm": 0.8739269375801086, "learning_rate": 4.6697094151683026e-07, "loss": 0.0573, "step": 14965 }, { "epoch": 2.4248217757615036, "grad_norm": 0.983854353427887, "learning_rate": 4.6671647559007884e-07, "loss": 0.0637, "step": 14966 }, { "epoch": 2.4249837977965, "grad_norm": 0.8353926539421082, "learning_rate": 4.6646207187688197e-07, "loss": 0.0606, "step": 14967 }, { "epoch": 2.425145819831497, "grad_norm": 0.9040305018424988, "learning_rate": 4.6620773038502625e-07, "loss": 0.0676, "step": 14968 }, { "epoch": 2.425307841866494, "grad_norm": 0.89899080991745, "learning_rate": 4.659534511222916e-07, "loss": 0.0592, "step": 14969 }, { "epoch": 2.4254698639014904, "grad_norm": 1.2018378973007202, "learning_rate": 4.656992340964589e-07, "loss": 0.0634, "step": 14970 }, { "epoch": 2.4256318859364874, "grad_norm": 0.8837653398513794, "learning_rate": 4.6544507931530676e-07, "loss": 0.0666, "step": 14971 }, { "epoch": 2.4257939079714843, "grad_norm": 0.8115970492362976, "learning_rate": 4.651909867866117e-07, "loss": 0.0603, "step": 14972 }, { "epoch": 2.425955930006481, "grad_norm": 0.8570439219474792, "learning_rate": 4.649369565181483e-07, "loss": 0.0545, "step": 14973 }, { "epoch": 2.4261179520414777, "grad_norm": 0.8621937036514282, "learning_rate": 4.64682988517689e-07, "loss": 0.0586, "step": 14974 }, { "epoch": 2.426279974076474, "grad_norm": 0.8469997048377991, "learning_rate": 4.6442908279300536e-07, "loss": 0.0578, "step": 14975 }, { "epoch": 2.426441996111471, "grad_norm": 0.898225724697113, "learning_rate": 4.641752393518661e-07, "loss": 0.0597, "step": 14976 }, { "epoch": 2.426604018146468, "grad_norm": 0.8789613842964172, "learning_rate": 4.639214582020368e-07, "loss": 0.0572, "step": 14977 }, { "epoch": 2.4267660401814646, "grad_norm": 0.9776742458343506, "learning_rate": 4.6366773935128423e-07, "loss": 0.062, "step": 14978 }, { "epoch": 2.4269280622164615, "grad_norm": 0.9235649704933167, "learning_rate": 4.634140828073716e-07, "loss": 0.0606, "step": 14979 }, { "epoch": 2.427090084251458, "grad_norm": 0.954810619354248, "learning_rate": 4.631604885780591e-07, "loss": 0.064, "step": 14980 }, { "epoch": 2.427252106286455, "grad_norm": 0.9652833938598633, "learning_rate": 4.629069566711059e-07, "loss": 0.061, "step": 14981 }, { "epoch": 2.427414128321452, "grad_norm": 0.9159372448921204, "learning_rate": 4.6265348709427146e-07, "loss": 0.0648, "step": 14982 }, { "epoch": 2.4275761503564484, "grad_norm": 0.8039860725402832, "learning_rate": 4.6240007985530913e-07, "loss": 0.0529, "step": 14983 }, { "epoch": 2.4277381723914453, "grad_norm": 0.8820045590400696, "learning_rate": 4.621467349619738e-07, "loss": 0.0562, "step": 14984 }, { "epoch": 2.4279001944264422, "grad_norm": 0.8682768940925598, "learning_rate": 4.6189345242201674e-07, "loss": 0.0572, "step": 14985 }, { "epoch": 2.4280622164614387, "grad_norm": 0.830511748790741, "learning_rate": 4.6164023224318786e-07, "loss": 0.0606, "step": 14986 }, { "epoch": 2.4282242384964356, "grad_norm": 0.9582686424255371, "learning_rate": 4.6138707443323523e-07, "loss": 0.064, "step": 14987 }, { "epoch": 2.428386260531432, "grad_norm": 0.7856261134147644, "learning_rate": 4.6113397899990474e-07, "loss": 0.0495, "step": 14988 }, { "epoch": 2.428548282566429, "grad_norm": 1.0667169094085693, "learning_rate": 4.6088094595094057e-07, "loss": 0.0712, "step": 14989 }, { "epoch": 2.4287103046014256, "grad_norm": 0.9162574410438538, "learning_rate": 4.6062797529408537e-07, "loss": 0.0666, "step": 14990 }, { "epoch": 2.4288723266364225, "grad_norm": 0.8367456197738647, "learning_rate": 4.603750670370777e-07, "loss": 0.0615, "step": 14991 }, { "epoch": 2.4290343486714194, "grad_norm": 1.0230121612548828, "learning_rate": 4.6012222118765796e-07, "loss": 0.063, "step": 14992 }, { "epoch": 2.429196370706416, "grad_norm": 0.9114834666252136, "learning_rate": 4.598694377535617e-07, "loss": 0.0676, "step": 14993 }, { "epoch": 2.429358392741413, "grad_norm": 1.0044584274291992, "learning_rate": 4.5961671674252447e-07, "loss": 0.0695, "step": 14994 }, { "epoch": 2.42952041477641, "grad_norm": 1.0129789113998413, "learning_rate": 4.593640581622771e-07, "loss": 0.0668, "step": 14995 }, { "epoch": 2.4296824368114063, "grad_norm": 1.0267726182937622, "learning_rate": 4.5911146202055113e-07, "loss": 0.0645, "step": 14996 }, { "epoch": 2.429844458846403, "grad_norm": 0.8808178901672363, "learning_rate": 4.588589283250763e-07, "loss": 0.063, "step": 14997 }, { "epoch": 2.4300064808813997, "grad_norm": 1.2315572500228882, "learning_rate": 4.5860645708357855e-07, "loss": 0.0587, "step": 14998 }, { "epoch": 2.4301685029163966, "grad_norm": 0.8882023096084595, "learning_rate": 4.5835404830378296e-07, "loss": 0.0604, "step": 14999 }, { "epoch": 2.4303305249513936, "grad_norm": 0.8703988790512085, "learning_rate": 4.581017019934131e-07, "loss": 0.0572, "step": 15000 }, { "epoch": 2.43049254698639, "grad_norm": 0.9442451596260071, "learning_rate": 4.578494181601895e-07, "loss": 0.0641, "step": 15001 }, { "epoch": 2.430654569021387, "grad_norm": 0.8746116757392883, "learning_rate": 4.57597196811832e-07, "loss": 0.0652, "step": 15002 }, { "epoch": 2.4308165910563835, "grad_norm": 0.8572196364402771, "learning_rate": 4.5734503795605763e-07, "loss": 0.058, "step": 15003 }, { "epoch": 2.4309786130913804, "grad_norm": 0.8255354166030884, "learning_rate": 4.5709294160058204e-07, "loss": 0.0576, "step": 15004 }, { "epoch": 2.4311406351263773, "grad_norm": 0.918545126914978, "learning_rate": 4.5684090775311855e-07, "loss": 0.0653, "step": 15005 }, { "epoch": 2.431302657161374, "grad_norm": 0.9600496292114258, "learning_rate": 4.565889364213791e-07, "loss": 0.0665, "step": 15006 }, { "epoch": 2.4314646791963708, "grad_norm": 1.181902527809143, "learning_rate": 4.5633702761307327e-07, "loss": 0.0618, "step": 15007 }, { "epoch": 2.4316267012313677, "grad_norm": 1.0709116458892822, "learning_rate": 4.5608518133590933e-07, "loss": 0.0652, "step": 15008 }, { "epoch": 2.431788723266364, "grad_norm": 0.9790709018707275, "learning_rate": 4.5583339759759203e-07, "loss": 0.0615, "step": 15009 }, { "epoch": 2.431950745301361, "grad_norm": 0.8114708662033081, "learning_rate": 4.5558167640582545e-07, "loss": 0.0577, "step": 15010 }, { "epoch": 2.4321127673363576, "grad_norm": 0.8501421213150024, "learning_rate": 4.553300177683129e-07, "loss": 0.0592, "step": 15011 }, { "epoch": 2.4322747893713546, "grad_norm": 0.9928768277168274, "learning_rate": 4.550784216927542e-07, "loss": 0.0652, "step": 15012 }, { "epoch": 2.432436811406351, "grad_norm": 1.0321820974349976, "learning_rate": 4.5482688818684646e-07, "loss": 0.0617, "step": 15013 }, { "epoch": 2.432598833441348, "grad_norm": 0.8856379985809326, "learning_rate": 4.5457541725828696e-07, "loss": 0.0677, "step": 15014 }, { "epoch": 2.432760855476345, "grad_norm": 0.8795359134674072, "learning_rate": 4.543240089147699e-07, "loss": 0.0588, "step": 15015 }, { "epoch": 2.4329228775113414, "grad_norm": 0.8717951774597168, "learning_rate": 4.5407266316398745e-07, "loss": 0.0664, "step": 15016 }, { "epoch": 2.4330848995463383, "grad_norm": 0.9385889768600464, "learning_rate": 4.5382138001363067e-07, "loss": 0.0656, "step": 15017 }, { "epoch": 2.4332469215813353, "grad_norm": 0.8398104906082153, "learning_rate": 4.5357015947138786e-07, "loss": 0.0579, "step": 15018 }, { "epoch": 2.4334089436163318, "grad_norm": 0.9150863289833069, "learning_rate": 4.5331900154494623e-07, "loss": 0.0595, "step": 15019 }, { "epoch": 2.4335709656513287, "grad_norm": 0.9102035164833069, "learning_rate": 4.530679062419899e-07, "loss": 0.059, "step": 15020 }, { "epoch": 2.433732987686325, "grad_norm": 0.9387508034706116, "learning_rate": 4.528168735702024e-07, "loss": 0.0654, "step": 15021 }, { "epoch": 2.433895009721322, "grad_norm": 0.9315344095230103, "learning_rate": 4.5256590353726426e-07, "loss": 0.0664, "step": 15022 }, { "epoch": 2.434057031756319, "grad_norm": 1.0084915161132812, "learning_rate": 4.523149961508558e-07, "loss": 0.0587, "step": 15023 }, { "epoch": 2.4342190537913155, "grad_norm": 0.8739719390869141, "learning_rate": 4.520641514186522e-07, "loss": 0.0623, "step": 15024 }, { "epoch": 2.4343810758263125, "grad_norm": 1.0345163345336914, "learning_rate": 4.5181336934832897e-07, "loss": 0.0693, "step": 15025 }, { "epoch": 2.434543097861309, "grad_norm": 0.8258783221244812, "learning_rate": 4.5156264994756144e-07, "loss": 0.065, "step": 15026 }, { "epoch": 2.434705119896306, "grad_norm": 0.8560571074485779, "learning_rate": 4.5131199322401926e-07, "loss": 0.0637, "step": 15027 }, { "epoch": 2.434867141931303, "grad_norm": 0.8309101462364197, "learning_rate": 4.510613991853721e-07, "loss": 0.0608, "step": 15028 }, { "epoch": 2.4350291639662993, "grad_norm": 0.91250079870224, "learning_rate": 4.5081086783928754e-07, "loss": 0.0579, "step": 15029 }, { "epoch": 2.4351911860012962, "grad_norm": 0.9563912749290466, "learning_rate": 4.5056039919343236e-07, "loss": 0.063, "step": 15030 }, { "epoch": 2.435353208036293, "grad_norm": 1.026058554649353, "learning_rate": 4.503099932554689e-07, "loss": 0.0614, "step": 15031 }, { "epoch": 2.4355152300712897, "grad_norm": 0.9105499386787415, "learning_rate": 4.5005965003305953e-07, "loss": 0.064, "step": 15032 }, { "epoch": 2.4356772521062866, "grad_norm": 0.8013800978660583, "learning_rate": 4.49809369533864e-07, "loss": 0.0565, "step": 15033 }, { "epoch": 2.435839274141283, "grad_norm": 1.1963187456130981, "learning_rate": 4.4955915176554065e-07, "loss": 0.0628, "step": 15034 }, { "epoch": 2.43600129617628, "grad_norm": 0.8717741966247559, "learning_rate": 4.49308996735745e-07, "loss": 0.0616, "step": 15035 }, { "epoch": 2.4361633182112765, "grad_norm": 0.8936408758163452, "learning_rate": 4.490589044521315e-07, "loss": 0.0577, "step": 15036 }, { "epoch": 2.4363253402462735, "grad_norm": 0.8526644706726074, "learning_rate": 4.4880887492235265e-07, "loss": 0.0613, "step": 15037 }, { "epoch": 2.4364873622812704, "grad_norm": 0.8698762059211731, "learning_rate": 4.4855890815405867e-07, "loss": 0.0637, "step": 15038 }, { "epoch": 2.436649384316267, "grad_norm": 0.8664577007293701, "learning_rate": 4.483090041548968e-07, "loss": 0.0591, "step": 15039 }, { "epoch": 2.436811406351264, "grad_norm": 0.8795555830001831, "learning_rate": 4.4805916293251486e-07, "loss": 0.0665, "step": 15040 }, { "epoch": 2.4369734283862607, "grad_norm": 0.8652831315994263, "learning_rate": 4.4780938449455747e-07, "loss": 0.0586, "step": 15041 }, { "epoch": 2.4371354504212572, "grad_norm": 0.8803925514221191, "learning_rate": 4.4755966884866606e-07, "loss": 0.0581, "step": 15042 }, { "epoch": 2.437297472456254, "grad_norm": 0.9763633608818054, "learning_rate": 4.4731001600248234e-07, "loss": 0.0573, "step": 15043 }, { "epoch": 2.4374594944912507, "grad_norm": 0.9532651305198669, "learning_rate": 4.470604259636438e-07, "loss": 0.0591, "step": 15044 }, { "epoch": 2.4376215165262476, "grad_norm": 0.8046735525131226, "learning_rate": 4.4681089873978957e-07, "loss": 0.0601, "step": 15045 }, { "epoch": 2.4377835385612445, "grad_norm": 0.9399983882904053, "learning_rate": 4.465614343385524e-07, "loss": 0.0584, "step": 15046 }, { "epoch": 2.437945560596241, "grad_norm": 0.8739177584648132, "learning_rate": 4.463120327675663e-07, "loss": 0.0596, "step": 15047 }, { "epoch": 2.438107582631238, "grad_norm": 0.8265982270240784, "learning_rate": 4.46062694034462e-07, "loss": 0.0565, "step": 15048 }, { "epoch": 2.4382696046662344, "grad_norm": 0.8940845727920532, "learning_rate": 4.45813418146869e-07, "loss": 0.0627, "step": 15049 }, { "epoch": 2.4384316267012314, "grad_norm": 0.8442987203598022, "learning_rate": 4.455642051124143e-07, "loss": 0.0598, "step": 15050 }, { "epoch": 2.4385936487362283, "grad_norm": 1.1562384366989136, "learning_rate": 4.4531505493872334e-07, "loss": 0.0673, "step": 15051 }, { "epoch": 2.438755670771225, "grad_norm": 0.9282644391059875, "learning_rate": 4.4506596763341985e-07, "loss": 0.059, "step": 15052 }, { "epoch": 2.4389176928062217, "grad_norm": 0.8264767527580261, "learning_rate": 4.4481694320412383e-07, "loss": 0.0552, "step": 15053 }, { "epoch": 2.439079714841218, "grad_norm": 0.8514079451560974, "learning_rate": 4.445679816584567e-07, "loss": 0.0586, "step": 15054 }, { "epoch": 2.439241736876215, "grad_norm": 0.8398727178573608, "learning_rate": 4.4431908300403506e-07, "loss": 0.061, "step": 15055 }, { "epoch": 2.439403758911212, "grad_norm": 0.8882549405097961, "learning_rate": 4.4407024724847534e-07, "loss": 0.0603, "step": 15056 }, { "epoch": 2.4395657809462086, "grad_norm": 1.0399445295333862, "learning_rate": 4.4382147439939045e-07, "loss": 0.0656, "step": 15057 }, { "epoch": 2.4397278029812055, "grad_norm": 0.9227023124694824, "learning_rate": 4.4357276446439197e-07, "loss": 0.064, "step": 15058 }, { "epoch": 2.439889825016202, "grad_norm": 0.911526620388031, "learning_rate": 4.4332411745109135e-07, "loss": 0.0632, "step": 15059 }, { "epoch": 2.440051847051199, "grad_norm": 0.8623772263526917, "learning_rate": 4.4307553336709525e-07, "loss": 0.0604, "step": 15060 }, { "epoch": 2.440213869086196, "grad_norm": 0.9016939401626587, "learning_rate": 4.428270122200104e-07, "loss": 0.0576, "step": 15061 }, { "epoch": 2.4403758911211924, "grad_norm": 0.9120055437088013, "learning_rate": 4.4257855401744044e-07, "loss": 0.0614, "step": 15062 }, { "epoch": 2.4405379131561893, "grad_norm": 1.0015937089920044, "learning_rate": 4.4233015876698787e-07, "loss": 0.0607, "step": 15063 }, { "epoch": 2.440699935191186, "grad_norm": 0.8989893794059753, "learning_rate": 4.42081826476253e-07, "loss": 0.0566, "step": 15064 }, { "epoch": 2.4408619572261827, "grad_norm": 0.9549097418785095, "learning_rate": 4.4183355715283425e-07, "loss": 0.0642, "step": 15065 }, { "epoch": 2.4410239792611796, "grad_norm": 0.9465216398239136, "learning_rate": 4.4158535080432803e-07, "loss": 0.0662, "step": 15066 }, { "epoch": 2.441186001296176, "grad_norm": 0.9384377002716064, "learning_rate": 4.413372074383293e-07, "loss": 0.0565, "step": 15067 }, { "epoch": 2.441348023331173, "grad_norm": 0.8981598019599915, "learning_rate": 4.4108912706242876e-07, "loss": 0.0571, "step": 15068 }, { "epoch": 2.4415100453661696, "grad_norm": 0.9155302047729492, "learning_rate": 4.408411096842194e-07, "loss": 0.0575, "step": 15069 }, { "epoch": 2.4416720674011665, "grad_norm": 0.8059252500534058, "learning_rate": 4.405931553112894e-07, "loss": 0.0568, "step": 15070 }, { "epoch": 2.4418340894361634, "grad_norm": 0.778841495513916, "learning_rate": 4.4034526395122485e-07, "loss": 0.0521, "step": 15071 }, { "epoch": 2.44199611147116, "grad_norm": 0.8791018724441528, "learning_rate": 4.4009743561161e-07, "loss": 0.0576, "step": 15072 }, { "epoch": 2.442158133506157, "grad_norm": 1.0327953100204468, "learning_rate": 4.3984967030002964e-07, "loss": 0.0668, "step": 15073 }, { "epoch": 2.442320155541154, "grad_norm": 0.899375319480896, "learning_rate": 4.396019680240643e-07, "loss": 0.0592, "step": 15074 }, { "epoch": 2.4424821775761503, "grad_norm": 0.8751654028892517, "learning_rate": 4.3935432879129215e-07, "loss": 0.0572, "step": 15075 }, { "epoch": 2.442644199611147, "grad_norm": 0.8992448449134827, "learning_rate": 4.3910675260929096e-07, "loss": 0.0603, "step": 15076 }, { "epoch": 2.4428062216461437, "grad_norm": 0.8739854693412781, "learning_rate": 4.3885923948563585e-07, "loss": 0.0588, "step": 15077 }, { "epoch": 2.4429682436811406, "grad_norm": 0.8920753598213196, "learning_rate": 4.386117894278999e-07, "loss": 0.0579, "step": 15078 }, { "epoch": 2.4431302657161376, "grad_norm": 0.9651167988777161, "learning_rate": 4.383644024436551e-07, "loss": 0.0569, "step": 15079 }, { "epoch": 2.443292287751134, "grad_norm": 0.9695246815681458, "learning_rate": 4.381170785404704e-07, "loss": 0.0606, "step": 15080 }, { "epoch": 2.443454309786131, "grad_norm": 0.8437383770942688, "learning_rate": 4.378698177259133e-07, "loss": 0.0604, "step": 15081 }, { "epoch": 2.4436163318211275, "grad_norm": 0.9582122564315796, "learning_rate": 4.376226200075495e-07, "loss": 0.0653, "step": 15082 }, { "epoch": 2.4437783538561244, "grad_norm": 1.188623070716858, "learning_rate": 4.3737548539294266e-07, "loss": 0.0702, "step": 15083 }, { "epoch": 2.4439403758911213, "grad_norm": 0.975605309009552, "learning_rate": 4.3712841388965476e-07, "loss": 0.0653, "step": 15084 }, { "epoch": 2.444102397926118, "grad_norm": 0.9400991201400757, "learning_rate": 4.368814055052459e-07, "loss": 0.0586, "step": 15085 }, { "epoch": 2.4442644199611148, "grad_norm": 0.957561194896698, "learning_rate": 4.3663446024727247e-07, "loss": 0.0627, "step": 15086 }, { "epoch": 2.4444264419961117, "grad_norm": 0.8637754917144775, "learning_rate": 4.3638757812329095e-07, "loss": 0.0598, "step": 15087 }, { "epoch": 2.444588464031108, "grad_norm": 0.7822928428649902, "learning_rate": 4.3614075914085617e-07, "loss": 0.0555, "step": 15088 }, { "epoch": 2.444750486066105, "grad_norm": 0.9488714337348938, "learning_rate": 4.358940033075207e-07, "loss": 0.0554, "step": 15089 }, { "epoch": 2.4449125081011016, "grad_norm": 1.0039563179016113, "learning_rate": 4.356473106308326e-07, "loss": 0.0636, "step": 15090 }, { "epoch": 2.4450745301360985, "grad_norm": 1.1325535774230957, "learning_rate": 4.3540068111834144e-07, "loss": 0.0678, "step": 15091 }, { "epoch": 2.445236552171095, "grad_norm": 0.962975263595581, "learning_rate": 4.351541147775931e-07, "loss": 0.0666, "step": 15092 }, { "epoch": 2.445398574206092, "grad_norm": 1.0033221244812012, "learning_rate": 4.3490761161613186e-07, "loss": 0.0651, "step": 15093 }, { "epoch": 2.445560596241089, "grad_norm": 1.0614312887191772, "learning_rate": 4.346611716415006e-07, "loss": 0.0623, "step": 15094 }, { "epoch": 2.4457226182760854, "grad_norm": 0.8363829255104065, "learning_rate": 4.344147948612393e-07, "loss": 0.0593, "step": 15095 }, { "epoch": 2.4458846403110823, "grad_norm": 0.7662982940673828, "learning_rate": 4.341684812828867e-07, "loss": 0.0506, "step": 15096 }, { "epoch": 2.4460466623460793, "grad_norm": 0.8307511806488037, "learning_rate": 4.3392223091397925e-07, "loss": 0.059, "step": 15097 }, { "epoch": 2.4462086843810757, "grad_norm": 0.9444789290428162, "learning_rate": 4.336760437620519e-07, "loss": 0.0681, "step": 15098 }, { "epoch": 2.4463707064160727, "grad_norm": 0.9271620512008667, "learning_rate": 4.33429919834637e-07, "loss": 0.0625, "step": 15099 }, { "epoch": 2.446532728451069, "grad_norm": 0.7332361936569214, "learning_rate": 4.331838591392662e-07, "loss": 0.0545, "step": 15100 }, { "epoch": 2.446694750486066, "grad_norm": 0.8970035314559937, "learning_rate": 4.3293786168346674e-07, "loss": 0.0612, "step": 15101 }, { "epoch": 2.446856772521063, "grad_norm": 0.9213421940803528, "learning_rate": 4.326919274747668e-07, "loss": 0.0575, "step": 15102 }, { "epoch": 2.4470187945560595, "grad_norm": 0.8858630657196045, "learning_rate": 4.32446056520692e-07, "loss": 0.0578, "step": 15103 }, { "epoch": 2.4471808165910565, "grad_norm": 1.067657709121704, "learning_rate": 4.322002488287635e-07, "loss": 0.0658, "step": 15104 }, { "epoch": 2.447342838626053, "grad_norm": 0.9456945061683655, "learning_rate": 4.319545044065038e-07, "loss": 0.0584, "step": 15105 }, { "epoch": 2.44750486066105, "grad_norm": 0.8441051244735718, "learning_rate": 4.317088232614308e-07, "loss": 0.0632, "step": 15106 }, { "epoch": 2.447666882696047, "grad_norm": 0.9701780080795288, "learning_rate": 4.3146320540106397e-07, "loss": 0.0627, "step": 15107 }, { "epoch": 2.4478289047310433, "grad_norm": 0.8207990527153015, "learning_rate": 4.3121765083291663e-07, "loss": 0.0608, "step": 15108 }, { "epoch": 2.4479909267660402, "grad_norm": 0.8845500946044922, "learning_rate": 4.3097215956450304e-07, "loss": 0.0592, "step": 15109 }, { "epoch": 2.448152948801037, "grad_norm": 0.8777700662612915, "learning_rate": 4.307267316033342e-07, "loss": 0.0663, "step": 15110 }, { "epoch": 2.4483149708360337, "grad_norm": 0.9499847888946533, "learning_rate": 4.3048136695691965e-07, "loss": 0.0657, "step": 15111 }, { "epoch": 2.4484769928710306, "grad_norm": 0.8840598464012146, "learning_rate": 4.3023606563276753e-07, "loss": 0.0586, "step": 15112 }, { "epoch": 2.448639014906027, "grad_norm": 0.9703836441040039, "learning_rate": 4.2999082763838293e-07, "loss": 0.06, "step": 15113 }, { "epoch": 2.448801036941024, "grad_norm": 1.0197018384933472, "learning_rate": 4.297456529812702e-07, "loss": 0.0734, "step": 15114 }, { "epoch": 2.4489630589760205, "grad_norm": 0.9363842606544495, "learning_rate": 4.2950054166892937e-07, "loss": 0.0602, "step": 15115 }, { "epoch": 2.4491250810110174, "grad_norm": 0.8683654069900513, "learning_rate": 4.292554937088622e-07, "loss": 0.0586, "step": 15116 }, { "epoch": 2.4492871030460144, "grad_norm": 0.9922870397567749, "learning_rate": 4.290105091085656e-07, "loss": 0.0618, "step": 15117 }, { "epoch": 2.449449125081011, "grad_norm": 0.9936143755912781, "learning_rate": 4.287655878755365e-07, "loss": 0.0596, "step": 15118 }, { "epoch": 2.449611147116008, "grad_norm": 0.7621217370033264, "learning_rate": 4.2852073001726754e-07, "loss": 0.0545, "step": 15119 }, { "epoch": 2.4497731691510047, "grad_norm": 1.023511290550232, "learning_rate": 4.282759355412505e-07, "loss": 0.0672, "step": 15120 }, { "epoch": 2.4499351911860012, "grad_norm": 0.9315782189369202, "learning_rate": 4.280312044549778e-07, "loss": 0.0593, "step": 15121 }, { "epoch": 2.450097213220998, "grad_norm": 0.8633824586868286, "learning_rate": 4.2778653676593534e-07, "loss": 0.0586, "step": 15122 }, { "epoch": 2.4502592352559946, "grad_norm": 0.99221271276474, "learning_rate": 4.275419324816105e-07, "loss": 0.0601, "step": 15123 }, { "epoch": 2.4504212572909916, "grad_norm": 0.7723276615142822, "learning_rate": 4.272973916094872e-07, "loss": 0.0519, "step": 15124 }, { "epoch": 2.4505832793259885, "grad_norm": 0.8727956414222717, "learning_rate": 4.2705291415704757e-07, "loss": 0.0611, "step": 15125 }, { "epoch": 2.450745301360985, "grad_norm": 1.0098901987075806, "learning_rate": 4.268085001317726e-07, "loss": 0.0693, "step": 15126 }, { "epoch": 2.450907323395982, "grad_norm": 0.9148622751235962, "learning_rate": 4.2656414954114044e-07, "loss": 0.0559, "step": 15127 }, { "epoch": 2.4510693454309784, "grad_norm": 1.0653668642044067, "learning_rate": 4.263198623926279e-07, "loss": 0.0658, "step": 15128 }, { "epoch": 2.4512313674659754, "grad_norm": 1.062276005744934, "learning_rate": 4.260756386937095e-07, "loss": 0.0647, "step": 15129 }, { "epoch": 2.4513933895009723, "grad_norm": 0.9811892509460449, "learning_rate": 4.258314784518569e-07, "loss": 0.0649, "step": 15130 }, { "epoch": 2.451555411535969, "grad_norm": 0.8770061135292053, "learning_rate": 4.2558738167454233e-07, "loss": 0.0617, "step": 15131 }, { "epoch": 2.4517174335709657, "grad_norm": 0.860925018787384, "learning_rate": 4.253433483692337e-07, "loss": 0.0617, "step": 15132 }, { "epoch": 2.4518794556059627, "grad_norm": 0.9953809976577759, "learning_rate": 4.250993785433988e-07, "loss": 0.0604, "step": 15133 }, { "epoch": 2.452041477640959, "grad_norm": 0.9298536777496338, "learning_rate": 4.248554722045009e-07, "loss": 0.0598, "step": 15134 }, { "epoch": 2.452203499675956, "grad_norm": 0.8232978582382202, "learning_rate": 4.246116293600033e-07, "loss": 0.0578, "step": 15135 }, { "epoch": 2.4523655217109526, "grad_norm": 0.9326602220535278, "learning_rate": 4.2436785001736896e-07, "loss": 0.0566, "step": 15136 }, { "epoch": 2.4525275437459495, "grad_norm": 1.011151671409607, "learning_rate": 4.241241341840546e-07, "loss": 0.0606, "step": 15137 }, { "epoch": 2.452689565780946, "grad_norm": 0.8477997183799744, "learning_rate": 4.2388048186751823e-07, "loss": 0.0556, "step": 15138 }, { "epoch": 2.452851587815943, "grad_norm": 0.9269173741340637, "learning_rate": 4.2363689307521494e-07, "loss": 0.0573, "step": 15139 }, { "epoch": 2.45301360985094, "grad_norm": 0.9142125844955444, "learning_rate": 4.233933678145982e-07, "loss": 0.0639, "step": 15140 }, { "epoch": 2.4531756318859363, "grad_norm": 1.0160726308822632, "learning_rate": 4.2314990609311905e-07, "loss": 0.0579, "step": 15141 }, { "epoch": 2.4533376539209333, "grad_norm": 0.950394332408905, "learning_rate": 4.229065079182268e-07, "loss": 0.0654, "step": 15142 }, { "epoch": 2.45349967595593, "grad_norm": 0.9993029236793518, "learning_rate": 4.2266317329736904e-07, "loss": 0.0583, "step": 15143 }, { "epoch": 2.4536616979909267, "grad_norm": 0.9236908555030823, "learning_rate": 4.224199022379913e-07, "loss": 0.0595, "step": 15144 }, { "epoch": 2.4538237200259236, "grad_norm": 0.7894659042358398, "learning_rate": 4.2217669474753644e-07, "loss": 0.0562, "step": 15145 }, { "epoch": 2.45398574206092, "grad_norm": 0.8922528028488159, "learning_rate": 4.2193355083344684e-07, "loss": 0.0649, "step": 15146 }, { "epoch": 2.454147764095917, "grad_norm": 0.8606491088867188, "learning_rate": 4.216904705031624e-07, "loss": 0.0582, "step": 15147 }, { "epoch": 2.454309786130914, "grad_norm": 0.8970129489898682, "learning_rate": 4.2144745376411946e-07, "loss": 0.0609, "step": 15148 }, { "epoch": 2.4544718081659105, "grad_norm": 0.8637439012527466, "learning_rate": 4.2120450062375364e-07, "loss": 0.0556, "step": 15149 }, { "epoch": 2.4546338302009074, "grad_norm": 0.9069430828094482, "learning_rate": 4.2096161108950015e-07, "loss": 0.0568, "step": 15150 }, { "epoch": 2.454795852235904, "grad_norm": 1.070995807647705, "learning_rate": 4.2071878516879107e-07, "loss": 0.0597, "step": 15151 }, { "epoch": 2.454957874270901, "grad_norm": 0.8751211762428284, "learning_rate": 4.204760228690546e-07, "loss": 0.0612, "step": 15152 }, { "epoch": 2.4551198963058978, "grad_norm": 0.9568556547164917, "learning_rate": 4.202333241977194e-07, "loss": 0.0578, "step": 15153 }, { "epoch": 2.4552819183408943, "grad_norm": 0.8110483288764954, "learning_rate": 4.1999068916221184e-07, "loss": 0.0536, "step": 15154 }, { "epoch": 2.455443940375891, "grad_norm": 1.0597234964370728, "learning_rate": 4.1974811776995526e-07, "loss": 0.0536, "step": 15155 }, { "epoch": 2.4556059624108877, "grad_norm": 0.8247944712638855, "learning_rate": 4.1950561002837257e-07, "loss": 0.0552, "step": 15156 }, { "epoch": 2.4557679844458846, "grad_norm": 0.8759192228317261, "learning_rate": 4.1926316594488315e-07, "loss": 0.0639, "step": 15157 }, { "epoch": 2.4559300064808816, "grad_norm": 0.8859750032424927, "learning_rate": 4.1902078552690573e-07, "loss": 0.055, "step": 15158 }, { "epoch": 2.456092028515878, "grad_norm": 0.9089791774749756, "learning_rate": 4.1877846878185635e-07, "loss": 0.0567, "step": 15159 }, { "epoch": 2.456254050550875, "grad_norm": 0.9586995244026184, "learning_rate": 4.185362157171496e-07, "loss": 0.0611, "step": 15160 }, { "epoch": 2.4564160725858715, "grad_norm": 0.9053599238395691, "learning_rate": 4.1829402634019746e-07, "loss": 0.0628, "step": 15161 }, { "epoch": 2.4565780946208684, "grad_norm": 0.8209922313690186, "learning_rate": 4.1805190065841107e-07, "loss": 0.0568, "step": 15162 }, { "epoch": 2.4567401166558653, "grad_norm": 1.2450376749038696, "learning_rate": 4.178098386791971e-07, "loss": 0.0639, "step": 15163 }, { "epoch": 2.456902138690862, "grad_norm": 0.9918776750564575, "learning_rate": 4.175678404099637e-07, "loss": 0.0621, "step": 15164 }, { "epoch": 2.4570641607258588, "grad_norm": 0.916986346244812, "learning_rate": 4.1732590585811586e-07, "loss": 0.0672, "step": 15165 }, { "epoch": 2.4572261827608557, "grad_norm": 1.3180897235870361, "learning_rate": 4.1708403503105456e-07, "loss": 0.0618, "step": 15166 }, { "epoch": 2.457388204795852, "grad_norm": 0.946431577205658, "learning_rate": 4.168422279361811e-07, "loss": 0.059, "step": 15167 }, { "epoch": 2.457550226830849, "grad_norm": 0.9107024669647217, "learning_rate": 4.166004845808941e-07, "loss": 0.0579, "step": 15168 }, { "epoch": 2.4577122488658456, "grad_norm": 0.9876469969749451, "learning_rate": 4.163588049725914e-07, "loss": 0.0622, "step": 15169 }, { "epoch": 2.4578742709008425, "grad_norm": 0.9634394645690918, "learning_rate": 4.1611718911866663e-07, "loss": 0.0572, "step": 15170 }, { "epoch": 2.458036292935839, "grad_norm": 0.9489405155181885, "learning_rate": 4.158756370265127e-07, "loss": 0.0577, "step": 15171 }, { "epoch": 2.458198314970836, "grad_norm": 0.8651463389396667, "learning_rate": 4.1563414870352093e-07, "loss": 0.0559, "step": 15172 }, { "epoch": 2.458360337005833, "grad_norm": 0.8804042935371399, "learning_rate": 4.1539272415708014e-07, "loss": 0.0612, "step": 15173 }, { "epoch": 2.4585223590408294, "grad_norm": 0.8089525103569031, "learning_rate": 4.1515136339457725e-07, "loss": 0.0591, "step": 15174 }, { "epoch": 2.4586843810758263, "grad_norm": 0.8935924768447876, "learning_rate": 4.1491006642339765e-07, "loss": 0.0638, "step": 15175 }, { "epoch": 2.4588464031108233, "grad_norm": 0.8347843289375305, "learning_rate": 4.146688332509241e-07, "loss": 0.0616, "step": 15176 }, { "epoch": 2.4590084251458197, "grad_norm": 0.9167835712432861, "learning_rate": 4.144276638845382e-07, "loss": 0.0613, "step": 15177 }, { "epoch": 2.4591704471808167, "grad_norm": 1.0505998134613037, "learning_rate": 4.1418655833161794e-07, "loss": 0.0596, "step": 15178 }, { "epoch": 2.459332469215813, "grad_norm": 0.8533574938774109, "learning_rate": 4.139455165995418e-07, "loss": 0.0528, "step": 15179 }, { "epoch": 2.45949449125081, "grad_norm": 0.8060251474380493, "learning_rate": 4.137045386956853e-07, "loss": 0.0542, "step": 15180 }, { "epoch": 2.459656513285807, "grad_norm": 0.7060052752494812, "learning_rate": 4.1346362462742067e-07, "loss": 0.0521, "step": 15181 }, { "epoch": 2.4598185353208035, "grad_norm": 1.0977946519851685, "learning_rate": 4.1322277440211973e-07, "loss": 0.062, "step": 15182 }, { "epoch": 2.4599805573558005, "grad_norm": 0.7807490229606628, "learning_rate": 4.129819880271516e-07, "loss": 0.0566, "step": 15183 }, { "epoch": 2.460142579390797, "grad_norm": 1.1565881967544556, "learning_rate": 4.1274126550988505e-07, "loss": 0.0633, "step": 15184 }, { "epoch": 2.460304601425794, "grad_norm": 0.9105919003486633, "learning_rate": 4.125006068576842e-07, "loss": 0.0642, "step": 15185 }, { "epoch": 2.460466623460791, "grad_norm": 0.8822482824325562, "learning_rate": 4.1226001207791327e-07, "loss": 0.0622, "step": 15186 }, { "epoch": 2.4606286454957873, "grad_norm": 0.873638927936554, "learning_rate": 4.120194811779335e-07, "loss": 0.0659, "step": 15187 }, { "epoch": 2.4607906675307842, "grad_norm": 0.9651090502738953, "learning_rate": 4.1177901416510485e-07, "loss": 0.0645, "step": 15188 }, { "epoch": 2.460952689565781, "grad_norm": 0.8976024389266968, "learning_rate": 4.1153861104678505e-07, "loss": 0.0582, "step": 15189 }, { "epoch": 2.4611147116007777, "grad_norm": 1.0942816734313965, "learning_rate": 4.112982718303299e-07, "loss": 0.0669, "step": 15190 }, { "epoch": 2.4612767336357746, "grad_norm": 1.016480803489685, "learning_rate": 4.1105799652309347e-07, "loss": 0.0662, "step": 15191 }, { "epoch": 2.461438755670771, "grad_norm": 0.9741160869598389, "learning_rate": 4.1081778513242606e-07, "loss": 0.0628, "step": 15192 }, { "epoch": 2.461600777705768, "grad_norm": 0.8383246064186096, "learning_rate": 4.105776376656795e-07, "loss": 0.054, "step": 15193 }, { "epoch": 2.4617627997407645, "grad_norm": 0.8395657539367676, "learning_rate": 4.103375541302007e-07, "loss": 0.0606, "step": 15194 }, { "epoch": 2.4619248217757614, "grad_norm": 0.8531553745269775, "learning_rate": 4.1009753453333636e-07, "loss": 0.0621, "step": 15195 }, { "epoch": 2.4620868438107584, "grad_norm": 0.9702677130699158, "learning_rate": 4.0985757888242965e-07, "loss": 0.0658, "step": 15196 }, { "epoch": 2.462248865845755, "grad_norm": 0.9990732073783875, "learning_rate": 4.09617687184822e-07, "loss": 0.0656, "step": 15197 }, { "epoch": 2.462410887880752, "grad_norm": 0.888516902923584, "learning_rate": 4.0937785944785617e-07, "loss": 0.0645, "step": 15198 }, { "epoch": 2.4625729099157487, "grad_norm": 1.0981239080429077, "learning_rate": 4.091380956788676e-07, "loss": 0.0567, "step": 15199 }, { "epoch": 2.462734931950745, "grad_norm": 0.9011585712432861, "learning_rate": 4.0889839588519386e-07, "loss": 0.0584, "step": 15200 }, { "epoch": 2.462896953985742, "grad_norm": 0.8724988698959351, "learning_rate": 4.086587600741687e-07, "loss": 0.0592, "step": 15201 }, { "epoch": 2.4630589760207386, "grad_norm": 1.0473746061325073, "learning_rate": 4.0841918825312465e-07, "loss": 0.0654, "step": 15202 }, { "epoch": 2.4632209980557356, "grad_norm": 1.0286039113998413, "learning_rate": 4.0817968042939165e-07, "loss": 0.066, "step": 15203 }, { "epoch": 2.4633830200907325, "grad_norm": 0.9677504301071167, "learning_rate": 4.0794023661029856e-07, "loss": 0.0646, "step": 15204 }, { "epoch": 2.463545042125729, "grad_norm": 0.9991774559020996, "learning_rate": 4.0770085680317153e-07, "loss": 0.0683, "step": 15205 }, { "epoch": 2.463707064160726, "grad_norm": 0.8651935458183289, "learning_rate": 4.0746154101533485e-07, "loss": 0.064, "step": 15206 }, { "epoch": 2.4638690861957224, "grad_norm": 0.9634072184562683, "learning_rate": 4.072222892541111e-07, "loss": 0.068, "step": 15207 }, { "epoch": 2.4640311082307194, "grad_norm": 0.7955970764160156, "learning_rate": 4.0698310152682107e-07, "loss": 0.0583, "step": 15208 }, { "epoch": 2.4641931302657163, "grad_norm": 0.8827134966850281, "learning_rate": 4.067439778407839e-07, "loss": 0.058, "step": 15209 }, { "epoch": 2.464355152300713, "grad_norm": 0.932346522808075, "learning_rate": 4.065049182033146e-07, "loss": 0.0603, "step": 15210 }, { "epoch": 2.4645171743357097, "grad_norm": 0.8273137807846069, "learning_rate": 4.0626592262172803e-07, "loss": 0.0568, "step": 15211 }, { "epoch": 2.4646791963707066, "grad_norm": 0.8896094560623169, "learning_rate": 4.0602699110333795e-07, "loss": 0.0552, "step": 15212 }, { "epoch": 2.464841218405703, "grad_norm": 0.8498284220695496, "learning_rate": 4.0578812365545533e-07, "loss": 0.0577, "step": 15213 }, { "epoch": 2.4650032404407, "grad_norm": 0.8612973690032959, "learning_rate": 4.0554932028538774e-07, "loss": 0.0596, "step": 15214 }, { "epoch": 2.4651652624756966, "grad_norm": 0.9590808153152466, "learning_rate": 4.0531058100044264e-07, "loss": 0.0692, "step": 15215 }, { "epoch": 2.4653272845106935, "grad_norm": 0.9362857341766357, "learning_rate": 4.050719058079244e-07, "loss": 0.0629, "step": 15216 }, { "epoch": 2.46548930654569, "grad_norm": 1.062246322631836, "learning_rate": 4.048332947151362e-07, "loss": 0.0645, "step": 15217 }, { "epoch": 2.465651328580687, "grad_norm": 0.8582957983016968, "learning_rate": 4.045947477293791e-07, "loss": 0.0562, "step": 15218 }, { "epoch": 2.465813350615684, "grad_norm": 0.8503392934799194, "learning_rate": 4.043562648579519e-07, "loss": 0.0538, "step": 15219 }, { "epoch": 2.4659753726506803, "grad_norm": 0.7822324633598328, "learning_rate": 4.041178461081519e-07, "loss": 0.0569, "step": 15220 }, { "epoch": 2.4661373946856773, "grad_norm": 0.8472219705581665, "learning_rate": 4.0387949148727343e-07, "loss": 0.0584, "step": 15221 }, { "epoch": 2.466299416720674, "grad_norm": 0.8778401613235474, "learning_rate": 4.036412010026103e-07, "loss": 0.0649, "step": 15222 }, { "epoch": 2.4664614387556707, "grad_norm": 0.8573176860809326, "learning_rate": 4.034029746614532e-07, "loss": 0.0615, "step": 15223 }, { "epoch": 2.4666234607906676, "grad_norm": 0.8038144707679749, "learning_rate": 4.0316481247109215e-07, "loss": 0.0562, "step": 15224 }, { "epoch": 2.466785482825664, "grad_norm": 0.9153627753257751, "learning_rate": 4.029267144388127e-07, "loss": 0.0603, "step": 15225 }, { "epoch": 2.466947504860661, "grad_norm": 0.8227776885032654, "learning_rate": 4.0268868057190075e-07, "loss": 0.0585, "step": 15226 }, { "epoch": 2.467109526895658, "grad_norm": 0.880547285079956, "learning_rate": 4.0245071087764015e-07, "loss": 0.0633, "step": 15227 }, { "epoch": 2.4672715489306545, "grad_norm": 0.9440403580665588, "learning_rate": 4.022128053633123e-07, "loss": 0.062, "step": 15228 }, { "epoch": 2.4674335709656514, "grad_norm": 0.8492519855499268, "learning_rate": 4.0197496403619557e-07, "loss": 0.0534, "step": 15229 }, { "epoch": 2.467595593000648, "grad_norm": 0.7454891800880432, "learning_rate": 4.017371869035674e-07, "loss": 0.051, "step": 15230 }, { "epoch": 2.467757615035645, "grad_norm": 0.8574808239936829, "learning_rate": 4.014994739727046e-07, "loss": 0.0585, "step": 15231 }, { "epoch": 2.4679196370706418, "grad_norm": 0.9869388937950134, "learning_rate": 4.01261825250879e-07, "loss": 0.063, "step": 15232 }, { "epoch": 2.4680816591056383, "grad_norm": 0.9070796966552734, "learning_rate": 4.0102424074536295e-07, "loss": 0.0593, "step": 15233 }, { "epoch": 2.468243681140635, "grad_norm": 1.0267338752746582, "learning_rate": 4.0078672046342553e-07, "loss": 0.0566, "step": 15234 }, { "epoch": 2.468405703175632, "grad_norm": 1.009340763092041, "learning_rate": 4.005492644123346e-07, "loss": 0.0684, "step": 15235 }, { "epoch": 2.4685677252106286, "grad_norm": 0.7549983263015747, "learning_rate": 4.0031187259935546e-07, "loss": 0.0585, "step": 15236 }, { "epoch": 2.4687297472456255, "grad_norm": 1.0285526514053345, "learning_rate": 4.0007454503175196e-07, "loss": 0.0649, "step": 15237 }, { "epoch": 2.468891769280622, "grad_norm": 0.9466545581817627, "learning_rate": 3.998372817167856e-07, "loss": 0.0668, "step": 15238 }, { "epoch": 2.469053791315619, "grad_norm": 0.8367640376091003, "learning_rate": 3.9960008266171663e-07, "loss": 0.055, "step": 15239 }, { "epoch": 2.4692158133506155, "grad_norm": 0.9086434841156006, "learning_rate": 3.993629478738012e-07, "loss": 0.0613, "step": 15240 }, { "epoch": 2.4693778353856124, "grad_norm": 0.9851820468902588, "learning_rate": 3.9912587736029656e-07, "loss": 0.0644, "step": 15241 }, { "epoch": 2.4695398574206093, "grad_norm": 0.8282864093780518, "learning_rate": 3.988888711284569e-07, "loss": 0.0498, "step": 15242 }, { "epoch": 2.469701879455606, "grad_norm": 1.0456504821777344, "learning_rate": 3.9865192918553256e-07, "loss": 0.0621, "step": 15243 }, { "epoch": 2.4698639014906028, "grad_norm": 1.0236272811889648, "learning_rate": 3.9841505153877387e-07, "loss": 0.0589, "step": 15244 }, { "epoch": 2.4700259235255997, "grad_norm": 0.982083797454834, "learning_rate": 3.981782381954283e-07, "loss": 0.0691, "step": 15245 }, { "epoch": 2.470187945560596, "grad_norm": 0.8207711577415466, "learning_rate": 3.9794148916274365e-07, "loss": 0.0525, "step": 15246 }, { "epoch": 2.470349967595593, "grad_norm": 0.7450169324874878, "learning_rate": 3.977048044479617e-07, "loss": 0.0538, "step": 15247 }, { "epoch": 2.4705119896305896, "grad_norm": 0.9385082125663757, "learning_rate": 3.974681840583255e-07, "loss": 0.0647, "step": 15248 }, { "epoch": 2.4706740116655865, "grad_norm": 0.9451351165771484, "learning_rate": 3.972316280010749e-07, "loss": 0.0596, "step": 15249 }, { "epoch": 2.4708360337005835, "grad_norm": 1.1026972532272339, "learning_rate": 3.969951362834476e-07, "loss": 0.0632, "step": 15250 }, { "epoch": 2.47099805573558, "grad_norm": 0.82200026512146, "learning_rate": 3.967587089126801e-07, "loss": 0.0561, "step": 15251 }, { "epoch": 2.471160077770577, "grad_norm": 0.9545662999153137, "learning_rate": 3.965223458960063e-07, "loss": 0.0698, "step": 15252 }, { "epoch": 2.4713220998055734, "grad_norm": 1.0327435731887817, "learning_rate": 3.9628604724065907e-07, "loss": 0.0596, "step": 15253 }, { "epoch": 2.4714841218405703, "grad_norm": 0.9295939207077026, "learning_rate": 3.9604981295386673e-07, "loss": 0.0572, "step": 15254 }, { "epoch": 2.4716461438755672, "grad_norm": 0.9229394793510437, "learning_rate": 3.958136430428594e-07, "loss": 0.0621, "step": 15255 }, { "epoch": 2.4718081659105637, "grad_norm": 0.9629893898963928, "learning_rate": 3.9557753751486237e-07, "loss": 0.0589, "step": 15256 }, { "epoch": 2.4719701879455607, "grad_norm": 0.985286295413971, "learning_rate": 3.9534149637710073e-07, "loss": 0.0636, "step": 15257 }, { "epoch": 2.4721322099805576, "grad_norm": 0.8284005522727966, "learning_rate": 3.9510551963679534e-07, "loss": 0.0588, "step": 15258 }, { "epoch": 2.472294232015554, "grad_norm": 1.0502867698669434, "learning_rate": 3.948696073011668e-07, "loss": 0.0616, "step": 15259 }, { "epoch": 2.472456254050551, "grad_norm": 0.9685097932815552, "learning_rate": 3.9463375937743546e-07, "loss": 0.0636, "step": 15260 }, { "epoch": 2.4726182760855475, "grad_norm": 0.9071670174598694, "learning_rate": 3.943979758728153e-07, "loss": 0.0646, "step": 15261 }, { "epoch": 2.4727802981205445, "grad_norm": 0.93815678358078, "learning_rate": 3.941622567945216e-07, "loss": 0.0576, "step": 15262 }, { "epoch": 2.472942320155541, "grad_norm": 0.8597636222839355, "learning_rate": 3.93926602149767e-07, "loss": 0.0634, "step": 15263 }, { "epoch": 2.473104342190538, "grad_norm": 0.9428747892379761, "learning_rate": 3.9369101194576156e-07, "loss": 0.058, "step": 15264 }, { "epoch": 2.473266364225535, "grad_norm": 0.8766636252403259, "learning_rate": 3.934554861897141e-07, "loss": 0.0541, "step": 15265 }, { "epoch": 2.4734283862605313, "grad_norm": 0.9514452219009399, "learning_rate": 3.93220024888831e-07, "loss": 0.0595, "step": 15266 }, { "epoch": 2.4735904082955282, "grad_norm": 1.0236924886703491, "learning_rate": 3.929846280503169e-07, "loss": 0.0666, "step": 15267 }, { "epoch": 2.473752430330525, "grad_norm": 0.8765333294868469, "learning_rate": 3.927492956813747e-07, "loss": 0.0601, "step": 15268 }, { "epoch": 2.4739144523655217, "grad_norm": 0.871446967124939, "learning_rate": 3.925140277892037e-07, "loss": 0.0566, "step": 15269 }, { "epoch": 2.4740764744005186, "grad_norm": 0.8625292181968689, "learning_rate": 3.922788243810038e-07, "loss": 0.0563, "step": 15270 }, { "epoch": 2.474238496435515, "grad_norm": 0.8439027070999146, "learning_rate": 3.9204368546397144e-07, "loss": 0.0631, "step": 15271 }, { "epoch": 2.474400518470512, "grad_norm": 0.9690973162651062, "learning_rate": 3.918086110453015e-07, "loss": 0.0647, "step": 15272 }, { "epoch": 2.4745625405055085, "grad_norm": 0.9805171489715576, "learning_rate": 3.915736011321855e-07, "loss": 0.0608, "step": 15273 }, { "epoch": 2.4747245625405054, "grad_norm": 0.8432701230049133, "learning_rate": 3.9133865573181524e-07, "loss": 0.0525, "step": 15274 }, { "epoch": 2.4748865845755024, "grad_norm": 1.0578525066375732, "learning_rate": 3.9110377485138017e-07, "loss": 0.0603, "step": 15275 }, { "epoch": 2.475048606610499, "grad_norm": 0.9432248473167419, "learning_rate": 3.9086895849806547e-07, "loss": 0.0623, "step": 15276 }, { "epoch": 2.475210628645496, "grad_norm": 0.8474089503288269, "learning_rate": 3.9063420667905637e-07, "loss": 0.0598, "step": 15277 }, { "epoch": 2.4753726506804927, "grad_norm": 0.940773069858551, "learning_rate": 3.903995194015364e-07, "loss": 0.053, "step": 15278 }, { "epoch": 2.475534672715489, "grad_norm": 1.0299676656723022, "learning_rate": 3.9016489667268563e-07, "loss": 0.0628, "step": 15279 }, { "epoch": 2.475696694750486, "grad_norm": 0.8737006187438965, "learning_rate": 3.899303384996836e-07, "loss": 0.0612, "step": 15280 }, { "epoch": 2.4758587167854826, "grad_norm": 0.9336732029914856, "learning_rate": 3.8969584488970675e-07, "loss": 0.0607, "step": 15281 }, { "epoch": 2.4760207388204796, "grad_norm": 0.8692206740379333, "learning_rate": 3.894614158499302e-07, "loss": 0.0597, "step": 15282 }, { "epoch": 2.4761827608554765, "grad_norm": 1.1092135906219482, "learning_rate": 3.892270513875271e-07, "loss": 0.0599, "step": 15283 }, { "epoch": 2.476344782890473, "grad_norm": 1.1713637113571167, "learning_rate": 3.889927515096681e-07, "loss": 0.0662, "step": 15284 }, { "epoch": 2.47650680492547, "grad_norm": 1.050701379776001, "learning_rate": 3.887585162235225e-07, "loss": 0.0573, "step": 15285 }, { "epoch": 2.4766688269604664, "grad_norm": 0.8351500630378723, "learning_rate": 3.885243455362578e-07, "loss": 0.0554, "step": 15286 }, { "epoch": 2.4768308489954634, "grad_norm": 0.9191629886627197, "learning_rate": 3.882902394550378e-07, "loss": 0.0612, "step": 15287 }, { "epoch": 2.4769928710304603, "grad_norm": 0.9809505939483643, "learning_rate": 3.8805619798702565e-07, "loss": 0.069, "step": 15288 }, { "epoch": 2.4771548930654568, "grad_norm": 0.9045706391334534, "learning_rate": 3.878222211393834e-07, "loss": 0.0568, "step": 15289 }, { "epoch": 2.4773169151004537, "grad_norm": 0.94025719165802, "learning_rate": 3.8758830891927056e-07, "loss": 0.0574, "step": 15290 }, { "epoch": 2.4774789371354506, "grad_norm": 0.948002278804779, "learning_rate": 3.8735446133384313e-07, "loss": 0.0584, "step": 15291 }, { "epoch": 2.477640959170447, "grad_norm": 1.0768437385559082, "learning_rate": 3.8712067839025647e-07, "loss": 0.0703, "step": 15292 }, { "epoch": 2.477802981205444, "grad_norm": 1.0241899490356445, "learning_rate": 3.8688696009566404e-07, "loss": 0.07, "step": 15293 }, { "epoch": 2.4779650032404406, "grad_norm": 0.8172342777252197, "learning_rate": 3.86653306457217e-07, "loss": 0.0562, "step": 15294 }, { "epoch": 2.4781270252754375, "grad_norm": 0.9391677379608154, "learning_rate": 3.864197174820647e-07, "loss": 0.0619, "step": 15295 }, { "epoch": 2.478289047310434, "grad_norm": 0.8953998684883118, "learning_rate": 3.861861931773542e-07, "loss": 0.0588, "step": 15296 }, { "epoch": 2.478451069345431, "grad_norm": 0.8568360209465027, "learning_rate": 3.8595273355023054e-07, "loss": 0.0573, "step": 15297 }, { "epoch": 2.478613091380428, "grad_norm": 1.2389211654663086, "learning_rate": 3.8571933860783785e-07, "loss": 0.0682, "step": 15298 }, { "epoch": 2.4787751134154243, "grad_norm": 0.9053958058357239, "learning_rate": 3.854860083573167e-07, "loss": 0.0544, "step": 15299 }, { "epoch": 2.4789371354504213, "grad_norm": 0.989845335483551, "learning_rate": 3.8525274280580646e-07, "loss": 0.0537, "step": 15300 }, { "epoch": 2.479099157485418, "grad_norm": 0.8106503486633301, "learning_rate": 3.850195419604455e-07, "loss": 0.0611, "step": 15301 }, { "epoch": 2.4792611795204147, "grad_norm": 0.9021776914596558, "learning_rate": 3.8478640582836733e-07, "loss": 0.0567, "step": 15302 }, { "epoch": 2.4794232015554116, "grad_norm": 0.9458431601524353, "learning_rate": 3.845533344167068e-07, "loss": 0.0572, "step": 15303 }, { "epoch": 2.479585223590408, "grad_norm": 1.0524413585662842, "learning_rate": 3.8432032773259574e-07, "loss": 0.0643, "step": 15304 }, { "epoch": 2.479747245625405, "grad_norm": 0.8422672152519226, "learning_rate": 3.84087385783162e-07, "loss": 0.0622, "step": 15305 }, { "epoch": 2.479909267660402, "grad_norm": 1.0135761499404907, "learning_rate": 3.838545085755341e-07, "loss": 0.0624, "step": 15306 }, { "epoch": 2.4800712896953985, "grad_norm": 0.9610772728919983, "learning_rate": 3.8362169611683655e-07, "loss": 0.0637, "step": 15307 }, { "epoch": 2.4802333117303954, "grad_norm": 0.903256356716156, "learning_rate": 3.8338894841419476e-07, "loss": 0.0601, "step": 15308 }, { "epoch": 2.480395333765392, "grad_norm": 1.0476837158203125, "learning_rate": 3.831562654747284e-07, "loss": 0.0646, "step": 15309 }, { "epoch": 2.480557355800389, "grad_norm": 0.9361180067062378, "learning_rate": 3.8292364730555754e-07, "loss": 0.057, "step": 15310 }, { "epoch": 2.4807193778353858, "grad_norm": 0.9486135244369507, "learning_rate": 3.826910939138001e-07, "loss": 0.0601, "step": 15311 }, { "epoch": 2.4808813998703823, "grad_norm": 0.8238722085952759, "learning_rate": 3.8245860530657126e-07, "loss": 0.0562, "step": 15312 }, { "epoch": 2.481043421905379, "grad_norm": 0.9989494681358337, "learning_rate": 3.8222618149098473e-07, "loss": 0.0571, "step": 15313 }, { "epoch": 2.481205443940376, "grad_norm": 0.9721057415008545, "learning_rate": 3.8199382247415236e-07, "loss": 0.0678, "step": 15314 }, { "epoch": 2.4813674659753726, "grad_norm": 0.9013247489929199, "learning_rate": 3.817615282631831e-07, "loss": 0.0597, "step": 15315 }, { "epoch": 2.4815294880103695, "grad_norm": 0.9429559707641602, "learning_rate": 3.8152929886518587e-07, "loss": 0.0692, "step": 15316 }, { "epoch": 2.481691510045366, "grad_norm": 0.803132176399231, "learning_rate": 3.8129713428726454e-07, "loss": 0.0536, "step": 15317 }, { "epoch": 2.481853532080363, "grad_norm": 0.7906658053398132, "learning_rate": 3.810650345365241e-07, "loss": 0.0508, "step": 15318 }, { "epoch": 2.4820155541153595, "grad_norm": 0.9531766772270203, "learning_rate": 3.808329996200663e-07, "loss": 0.0648, "step": 15319 }, { "epoch": 2.4821775761503564, "grad_norm": 0.8146340847015381, "learning_rate": 3.8060102954499024e-07, "loss": 0.0545, "step": 15320 }, { "epoch": 2.4823395981853533, "grad_norm": 0.9485729932785034, "learning_rate": 3.8036912431839297e-07, "loss": 0.055, "step": 15321 }, { "epoch": 2.48250162022035, "grad_norm": 0.8682623505592346, "learning_rate": 3.8013728394737216e-07, "loss": 0.0593, "step": 15322 }, { "epoch": 2.4826636422553467, "grad_norm": 1.042887806892395, "learning_rate": 3.7990550843902017e-07, "loss": 0.0639, "step": 15323 }, { "epoch": 2.4828256642903437, "grad_norm": 0.9474160075187683, "learning_rate": 3.796737978004289e-07, "loss": 0.0667, "step": 15324 }, { "epoch": 2.48298768632534, "grad_norm": 0.9614787101745605, "learning_rate": 3.7944215203868843e-07, "loss": 0.0681, "step": 15325 }, { "epoch": 2.483149708360337, "grad_norm": 0.7932602763175964, "learning_rate": 3.792105711608865e-07, "loss": 0.0587, "step": 15326 }, { "epoch": 2.4833117303953336, "grad_norm": 0.8903249502182007, "learning_rate": 3.7897905517410877e-07, "loss": 0.0531, "step": 15327 }, { "epoch": 2.4834737524303305, "grad_norm": 0.9789133667945862, "learning_rate": 3.7874760408543933e-07, "loss": 0.0701, "step": 15328 }, { "epoch": 2.4836357744653275, "grad_norm": 0.8971397280693054, "learning_rate": 3.785162179019597e-07, "loss": 0.0692, "step": 15329 }, { "epoch": 2.483797796500324, "grad_norm": 0.9187726974487305, "learning_rate": 3.7828489663075065e-07, "loss": 0.0591, "step": 15330 }, { "epoch": 2.483959818535321, "grad_norm": 1.0862635374069214, "learning_rate": 3.7805364027888787e-07, "loss": 0.0704, "step": 15331 }, { "epoch": 2.4841218405703174, "grad_norm": 0.8576655387878418, "learning_rate": 3.778224488534496e-07, "loss": 0.062, "step": 15332 }, { "epoch": 2.4842838626053143, "grad_norm": 1.0012966394424438, "learning_rate": 3.7759132236150854e-07, "loss": 0.068, "step": 15333 }, { "epoch": 2.4844458846403112, "grad_norm": 0.8457930088043213, "learning_rate": 3.773602608101376e-07, "loss": 0.0632, "step": 15334 }, { "epoch": 2.4846079066753077, "grad_norm": 1.0266876220703125, "learning_rate": 3.771292642064056e-07, "loss": 0.0679, "step": 15335 }, { "epoch": 2.4847699287103047, "grad_norm": 0.8954113721847534, "learning_rate": 3.7689833255737995e-07, "loss": 0.0574, "step": 15336 }, { "epoch": 2.4849319507453016, "grad_norm": 0.9594757556915283, "learning_rate": 3.7666746587012885e-07, "loss": 0.0611, "step": 15337 }, { "epoch": 2.485093972780298, "grad_norm": 0.8226574063301086, "learning_rate": 3.764366641517145e-07, "loss": 0.0606, "step": 15338 }, { "epoch": 2.485255994815295, "grad_norm": 0.9588454365730286, "learning_rate": 3.762059274091989e-07, "loss": 0.0639, "step": 15339 }, { "epoch": 2.4854180168502915, "grad_norm": 0.913917064666748, "learning_rate": 3.759752556496421e-07, "loss": 0.0661, "step": 15340 }, { "epoch": 2.4855800388852884, "grad_norm": 0.7962075471878052, "learning_rate": 3.7574464888010363e-07, "loss": 0.0535, "step": 15341 }, { "epoch": 2.485742060920285, "grad_norm": 0.8324793577194214, "learning_rate": 3.7551410710763764e-07, "loss": 0.0583, "step": 15342 }, { "epoch": 2.485904082955282, "grad_norm": 1.2726354598999023, "learning_rate": 3.75283630339299e-07, "loss": 0.0629, "step": 15343 }, { "epoch": 2.486066104990279, "grad_norm": 0.8221186995506287, "learning_rate": 3.7505321858213926e-07, "loss": 0.055, "step": 15344 }, { "epoch": 2.4862281270252753, "grad_norm": 1.0081355571746826, "learning_rate": 3.7482287184320897e-07, "loss": 0.0632, "step": 15345 }, { "epoch": 2.4863901490602722, "grad_norm": 0.920180082321167, "learning_rate": 3.7459259012955606e-07, "loss": 0.0622, "step": 15346 }, { "epoch": 2.486552171095269, "grad_norm": 0.9343588948249817, "learning_rate": 3.743623734482263e-07, "loss": 0.0634, "step": 15347 }, { "epoch": 2.4867141931302656, "grad_norm": 1.170159935951233, "learning_rate": 3.7413222180626455e-07, "loss": 0.0627, "step": 15348 }, { "epoch": 2.4868762151652626, "grad_norm": 0.9837419390678406, "learning_rate": 3.7390213521071193e-07, "loss": 0.0612, "step": 15349 }, { "epoch": 2.487038237200259, "grad_norm": 0.8149502873420715, "learning_rate": 3.736721136686081e-07, "loss": 0.0558, "step": 15350 }, { "epoch": 2.487200259235256, "grad_norm": 0.8310353755950928, "learning_rate": 3.7344215718699256e-07, "loss": 0.0522, "step": 15351 }, { "epoch": 2.487362281270253, "grad_norm": 0.8602123260498047, "learning_rate": 3.7321226577290147e-07, "loss": 0.0598, "step": 15352 }, { "epoch": 2.4875243033052494, "grad_norm": 0.8709070682525635, "learning_rate": 3.7298243943336784e-07, "loss": 0.0583, "step": 15353 }, { "epoch": 2.4876863253402464, "grad_norm": 0.9925591945648193, "learning_rate": 3.7275267817542425e-07, "loss": 0.0599, "step": 15354 }, { "epoch": 2.487848347375243, "grad_norm": 0.8003130555152893, "learning_rate": 3.725229820061008e-07, "loss": 0.0569, "step": 15355 }, { "epoch": 2.48801036941024, "grad_norm": 0.8514525890350342, "learning_rate": 3.7229335093242587e-07, "loss": 0.0528, "step": 15356 }, { "epoch": 2.4881723914452367, "grad_norm": 0.8922498226165771, "learning_rate": 3.720637849614253e-07, "loss": 0.0614, "step": 15357 }, { "epoch": 2.488334413480233, "grad_norm": 1.7786387205123901, "learning_rate": 3.7183428410012326e-07, "loss": 0.0647, "step": 15358 }, { "epoch": 2.48849643551523, "grad_norm": 0.9049399495124817, "learning_rate": 3.716048483555423e-07, "loss": 0.063, "step": 15359 }, { "epoch": 2.488658457550227, "grad_norm": 0.9355145692825317, "learning_rate": 3.713754777347023e-07, "loss": 0.0621, "step": 15360 }, { "epoch": 2.4888204795852236, "grad_norm": 1.0823627710342407, "learning_rate": 3.711461722446216e-07, "loss": 0.0676, "step": 15361 }, { "epoch": 2.4889825016202205, "grad_norm": 0.8411454558372498, "learning_rate": 3.7091693189231615e-07, "loss": 0.0586, "step": 15362 }, { "epoch": 2.489144523655217, "grad_norm": 0.8815544843673706, "learning_rate": 3.706877566848008e-07, "loss": 0.0552, "step": 15363 }, { "epoch": 2.489306545690214, "grad_norm": 0.96858811378479, "learning_rate": 3.704586466290863e-07, "loss": 0.0595, "step": 15364 }, { "epoch": 2.4894685677252104, "grad_norm": 0.8106810450553894, "learning_rate": 3.7022960173218437e-07, "loss": 0.0564, "step": 15365 }, { "epoch": 2.4896305897602073, "grad_norm": 0.8244809508323669, "learning_rate": 3.7000062200110266e-07, "loss": 0.0668, "step": 15366 }, { "epoch": 2.4897926117952043, "grad_norm": 0.9649903178215027, "learning_rate": 3.6977170744284805e-07, "loss": 0.0636, "step": 15367 }, { "epoch": 2.4899546338302008, "grad_norm": 1.087392807006836, "learning_rate": 3.6954285806442337e-07, "loss": 0.0681, "step": 15368 }, { "epoch": 2.4901166558651977, "grad_norm": 1.129231333732605, "learning_rate": 3.6931407387283126e-07, "loss": 0.0688, "step": 15369 }, { "epoch": 2.4902786779001946, "grad_norm": 0.8219298720359802, "learning_rate": 3.6908535487507335e-07, "loss": 0.0577, "step": 15370 }, { "epoch": 2.490440699935191, "grad_norm": 0.8305365443229675, "learning_rate": 3.688567010781463e-07, "loss": 0.0571, "step": 15371 }, { "epoch": 2.490602721970188, "grad_norm": 0.9187493324279785, "learning_rate": 3.68628112489047e-07, "loss": 0.0593, "step": 15372 }, { "epoch": 2.4907647440051845, "grad_norm": 0.8878763914108276, "learning_rate": 3.683995891147696e-07, "loss": 0.0579, "step": 15373 }, { "epoch": 2.4909267660401815, "grad_norm": 0.9653205275535583, "learning_rate": 3.681711309623065e-07, "loss": 0.0578, "step": 15374 }, { "epoch": 2.4910887880751784, "grad_norm": 0.8415849208831787, "learning_rate": 3.679427380386477e-07, "loss": 0.0582, "step": 15375 }, { "epoch": 2.491250810110175, "grad_norm": 0.9508287310600281, "learning_rate": 3.677144103507818e-07, "loss": 0.0608, "step": 15376 }, { "epoch": 2.491412832145172, "grad_norm": 0.9753308892250061, "learning_rate": 3.674861479056946e-07, "loss": 0.0614, "step": 15377 }, { "epoch": 2.4915748541801683, "grad_norm": 0.8694376349449158, "learning_rate": 3.672579507103716e-07, "loss": 0.0622, "step": 15378 }, { "epoch": 2.4917368762151653, "grad_norm": 0.854770839214325, "learning_rate": 3.67029818771793e-07, "loss": 0.0606, "step": 15379 }, { "epoch": 2.491898898250162, "grad_norm": 0.8874652981758118, "learning_rate": 3.668017520969405e-07, "loss": 0.0647, "step": 15380 }, { "epoch": 2.4920609202851587, "grad_norm": 0.9091949462890625, "learning_rate": 3.66573750692793e-07, "loss": 0.0686, "step": 15381 }, { "epoch": 2.4922229423201556, "grad_norm": 0.9150681495666504, "learning_rate": 3.663458145663254e-07, "loss": 0.0611, "step": 15382 }, { "epoch": 2.492384964355152, "grad_norm": 1.12820565700531, "learning_rate": 3.6611794372451244e-07, "loss": 0.0716, "step": 15383 }, { "epoch": 2.492546986390149, "grad_norm": 0.825421154499054, "learning_rate": 3.65890138174326e-07, "loss": 0.0642, "step": 15384 }, { "epoch": 2.492709008425146, "grad_norm": 0.9262431263923645, "learning_rate": 3.6566239792273775e-07, "loss": 0.0597, "step": 15385 }, { "epoch": 2.4928710304601425, "grad_norm": 0.9032964110374451, "learning_rate": 3.6543472297671495e-07, "loss": 0.0627, "step": 15386 }, { "epoch": 2.4930330524951394, "grad_norm": 0.8417792320251465, "learning_rate": 3.6520711334322387e-07, "loss": 0.0637, "step": 15387 }, { "epoch": 2.493195074530136, "grad_norm": 0.9112814664840698, "learning_rate": 3.6497956902922904e-07, "loss": 0.0601, "step": 15388 }, { "epoch": 2.493357096565133, "grad_norm": 1.061842918395996, "learning_rate": 3.6475209004169286e-07, "loss": 0.0566, "step": 15389 }, { "epoch": 2.4935191186001298, "grad_norm": 0.9109911918640137, "learning_rate": 3.645246763875754e-07, "loss": 0.0605, "step": 15390 }, { "epoch": 2.4936811406351262, "grad_norm": 0.8097963929176331, "learning_rate": 3.6429732807383517e-07, "loss": 0.0591, "step": 15391 }, { "epoch": 2.493843162670123, "grad_norm": 0.7953127026557922, "learning_rate": 3.640700451074289e-07, "loss": 0.0537, "step": 15392 }, { "epoch": 2.49400518470512, "grad_norm": 1.0355387926101685, "learning_rate": 3.63842827495309e-07, "loss": 0.0673, "step": 15393 }, { "epoch": 2.4941672067401166, "grad_norm": 0.8306580781936646, "learning_rate": 3.636156752444303e-07, "loss": 0.0578, "step": 15394 }, { "epoch": 2.4943292287751135, "grad_norm": 1.0002905130386353, "learning_rate": 3.633885883617416e-07, "loss": 0.0626, "step": 15395 }, { "epoch": 2.49449125081011, "grad_norm": 0.9213991165161133, "learning_rate": 3.631615668541921e-07, "loss": 0.0528, "step": 15396 }, { "epoch": 2.494653272845107, "grad_norm": 1.1550705432891846, "learning_rate": 3.6293461072872735e-07, "loss": 0.066, "step": 15397 }, { "epoch": 2.4948152948801035, "grad_norm": 0.9875110387802124, "learning_rate": 3.6270771999229124e-07, "loss": 0.0553, "step": 15398 }, { "epoch": 2.4949773169151004, "grad_norm": 0.9158915281295776, "learning_rate": 3.6248089465182797e-07, "loss": 0.0621, "step": 15399 }, { "epoch": 2.4951393389500973, "grad_norm": 0.9057533740997314, "learning_rate": 3.622541347142758e-07, "loss": 0.0537, "step": 15400 }, { "epoch": 2.495301360985094, "grad_norm": 0.8839161396026611, "learning_rate": 3.6202744018657393e-07, "loss": 0.0623, "step": 15401 }, { "epoch": 2.4954633830200907, "grad_norm": 0.9387931227684021, "learning_rate": 3.618008110756588e-07, "loss": 0.0662, "step": 15402 }, { "epoch": 2.4956254050550877, "grad_norm": 1.0209976434707642, "learning_rate": 3.6157424738846427e-07, "loss": 0.0676, "step": 15403 }, { "epoch": 2.495787427090084, "grad_norm": 0.9517818689346313, "learning_rate": 3.6134774913192314e-07, "loss": 0.0619, "step": 15404 }, { "epoch": 2.495949449125081, "grad_norm": 0.9246603846549988, "learning_rate": 3.6112131631296507e-07, "loss": 0.0608, "step": 15405 }, { "epoch": 2.4961114711600776, "grad_norm": 0.9202209711074829, "learning_rate": 3.608949489385191e-07, "loss": 0.0601, "step": 15406 }, { "epoch": 2.4962734931950745, "grad_norm": 0.8076236248016357, "learning_rate": 3.60668647015511e-07, "loss": 0.0593, "step": 15407 }, { "epoch": 2.4964355152300715, "grad_norm": 0.9734016060829163, "learning_rate": 3.6044241055086525e-07, "loss": 0.0606, "step": 15408 }, { "epoch": 2.496597537265068, "grad_norm": 1.0602586269378662, "learning_rate": 3.602162395515041e-07, "loss": 0.066, "step": 15409 }, { "epoch": 2.496759559300065, "grad_norm": 1.0129172801971436, "learning_rate": 3.599901340243478e-07, "loss": 0.0573, "step": 15410 }, { "epoch": 2.4969215813350614, "grad_norm": 0.8970673084259033, "learning_rate": 3.597640939763153e-07, "loss": 0.0616, "step": 15411 }, { "epoch": 2.4970836033700583, "grad_norm": 0.8650174140930176, "learning_rate": 3.5953811941432104e-07, "loss": 0.0544, "step": 15412 }, { "epoch": 2.4972456254050552, "grad_norm": 0.920050323009491, "learning_rate": 3.593122103452812e-07, "loss": 0.0581, "step": 15413 }, { "epoch": 2.4974076474400517, "grad_norm": 0.9447347521781921, "learning_rate": 3.590863667761077e-07, "loss": 0.0596, "step": 15414 }, { "epoch": 2.4975696694750487, "grad_norm": 0.8913710713386536, "learning_rate": 3.5886058871371005e-07, "loss": 0.0622, "step": 15415 }, { "epoch": 2.4977316915100456, "grad_norm": 1.1236344575881958, "learning_rate": 3.5863487616499713e-07, "loss": 0.064, "step": 15416 }, { "epoch": 2.497893713545042, "grad_norm": 0.8553385138511658, "learning_rate": 3.584092291368746e-07, "loss": 0.0516, "step": 15417 }, { "epoch": 2.498055735580039, "grad_norm": 0.8064867854118347, "learning_rate": 3.581836476362474e-07, "loss": 0.055, "step": 15418 }, { "epoch": 2.4982177576150355, "grad_norm": 1.035109043121338, "learning_rate": 3.579581316700173e-07, "loss": 0.0661, "step": 15419 }, { "epoch": 2.4983797796500324, "grad_norm": 0.9095247983932495, "learning_rate": 3.5773268124508485e-07, "loss": 0.0644, "step": 15420 }, { "epoch": 2.498541801685029, "grad_norm": 0.7617493867874146, "learning_rate": 3.575072963683482e-07, "loss": 0.055, "step": 15421 }, { "epoch": 2.498703823720026, "grad_norm": 0.863577663898468, "learning_rate": 3.5728197704670344e-07, "loss": 0.0569, "step": 15422 }, { "epoch": 2.498865845755023, "grad_norm": 0.8674677610397339, "learning_rate": 3.5705672328704503e-07, "loss": 0.0629, "step": 15423 }, { "epoch": 2.4990278677900193, "grad_norm": 0.8979498744010925, "learning_rate": 3.5683153509626504e-07, "loss": 0.0641, "step": 15424 }, { "epoch": 2.499189889825016, "grad_norm": 1.075107216835022, "learning_rate": 3.566064124812541e-07, "loss": 0.0667, "step": 15425 }, { "epoch": 2.499351911860013, "grad_norm": 0.913252055644989, "learning_rate": 3.563813554488996e-07, "loss": 0.0589, "step": 15426 }, { "epoch": 2.4995139338950096, "grad_norm": 0.9128844141960144, "learning_rate": 3.561563640060875e-07, "loss": 0.0632, "step": 15427 }, { "epoch": 2.4996759559300066, "grad_norm": 0.9388177394866943, "learning_rate": 3.559314381597034e-07, "loss": 0.0677, "step": 15428 }, { "epoch": 2.499837977965003, "grad_norm": 0.8632997870445251, "learning_rate": 3.557065779166291e-07, "loss": 0.0533, "step": 15429 }, { "epoch": 2.5, "grad_norm": 0.9118877649307251, "learning_rate": 3.55481783283744e-07, "loss": 0.0619, "step": 15430 }, { "epoch": 2.5001620220349965, "grad_norm": 1.0225169658660889, "learning_rate": 3.5525705426792624e-07, "loss": 0.0709, "step": 15431 }, { "epoch": 2.5003240440699934, "grad_norm": 0.8662908673286438, "learning_rate": 3.5503239087605337e-07, "loss": 0.0596, "step": 15432 }, { "epoch": 2.5004860661049904, "grad_norm": 0.7899366617202759, "learning_rate": 3.548077931149982e-07, "loss": 0.0581, "step": 15433 }, { "epoch": 2.500648088139987, "grad_norm": 0.9061411023139954, "learning_rate": 3.54583260991633e-07, "loss": 0.0606, "step": 15434 }, { "epoch": 2.500810110174984, "grad_norm": 0.8774332404136658, "learning_rate": 3.543587945128285e-07, "loss": 0.0537, "step": 15435 }, { "epoch": 2.5009721322099807, "grad_norm": 1.0649468898773193, "learning_rate": 3.541343936854524e-07, "loss": 0.0656, "step": 15436 }, { "epoch": 2.501134154244977, "grad_norm": 0.9664431810379028, "learning_rate": 3.53910058516371e-07, "loss": 0.0612, "step": 15437 }, { "epoch": 2.501296176279974, "grad_norm": 1.0078061819076538, "learning_rate": 3.5368578901244843e-07, "loss": 0.0584, "step": 15438 }, { "epoch": 2.501458198314971, "grad_norm": 1.0571532249450684, "learning_rate": 3.5346158518054674e-07, "loss": 0.0649, "step": 15439 }, { "epoch": 2.5016202203499676, "grad_norm": 0.8381620049476624, "learning_rate": 3.5323744702752657e-07, "loss": 0.0511, "step": 15440 }, { "epoch": 2.5017822423849645, "grad_norm": 0.9569916129112244, "learning_rate": 3.5301337456024434e-07, "loss": 0.0622, "step": 15441 }, { "epoch": 2.501944264419961, "grad_norm": 0.8359852433204651, "learning_rate": 3.5278936778555763e-07, "loss": 0.0558, "step": 15442 }, { "epoch": 2.502106286454958, "grad_norm": 0.9020277857780457, "learning_rate": 3.525654267103207e-07, "loss": 0.0602, "step": 15443 }, { "epoch": 2.5022683084899544, "grad_norm": 0.7572386860847473, "learning_rate": 3.523415513413847e-07, "loss": 0.0566, "step": 15444 }, { "epoch": 2.5024303305249513, "grad_norm": 0.9130625128746033, "learning_rate": 3.5211774168559976e-07, "loss": 0.0552, "step": 15445 }, { "epoch": 2.5025923525599483, "grad_norm": 1.0692723989486694, "learning_rate": 3.518939977498137e-07, "loss": 0.0597, "step": 15446 }, { "epoch": 2.5027543745949448, "grad_norm": 0.9212605357170105, "learning_rate": 3.516703195408741e-07, "loss": 0.0638, "step": 15447 }, { "epoch": 2.5029163966299417, "grad_norm": 0.9287461042404175, "learning_rate": 3.514467070656233e-07, "loss": 0.0586, "step": 15448 }, { "epoch": 2.5030784186649386, "grad_norm": 0.8843840956687927, "learning_rate": 3.512231603309038e-07, "loss": 0.063, "step": 15449 }, { "epoch": 2.503240440699935, "grad_norm": 0.8561710119247437, "learning_rate": 3.509996793435558e-07, "loss": 0.0593, "step": 15450 }, { "epoch": 2.503402462734932, "grad_norm": 1.0155240297317505, "learning_rate": 3.5077626411041707e-07, "loss": 0.0709, "step": 15451 }, { "epoch": 2.5035644847699285, "grad_norm": 0.9709542393684387, "learning_rate": 3.505529146383235e-07, "loss": 0.0626, "step": 15452 }, { "epoch": 2.5037265068049255, "grad_norm": 0.9691731929779053, "learning_rate": 3.503296309341095e-07, "loss": 0.0617, "step": 15453 }, { "epoch": 2.503888528839922, "grad_norm": 0.9437772035598755, "learning_rate": 3.501064130046064e-07, "loss": 0.0593, "step": 15454 }, { "epoch": 2.504050550874919, "grad_norm": 0.8508868217468262, "learning_rate": 3.4988326085664463e-07, "loss": 0.0586, "step": 15455 }, { "epoch": 2.504212572909916, "grad_norm": 0.8837978839874268, "learning_rate": 3.496601744970518e-07, "loss": 0.0541, "step": 15456 }, { "epoch": 2.5043745949449123, "grad_norm": 0.975632905960083, "learning_rate": 3.494371539326538e-07, "loss": 0.0556, "step": 15457 }, { "epoch": 2.5045366169799093, "grad_norm": 1.0804563760757446, "learning_rate": 3.492141991702752e-07, "loss": 0.0585, "step": 15458 }, { "epoch": 2.504698639014906, "grad_norm": 0.832747757434845, "learning_rate": 3.4899131021673693e-07, "loss": 0.0584, "step": 15459 }, { "epoch": 2.5048606610499027, "grad_norm": 0.9667251110076904, "learning_rate": 3.4876848707885854e-07, "loss": 0.0595, "step": 15460 }, { "epoch": 2.5050226830848996, "grad_norm": 0.9770776629447937, "learning_rate": 3.4854572976345954e-07, "loss": 0.0636, "step": 15461 }, { "epoch": 2.5051847051198965, "grad_norm": 0.8421487212181091, "learning_rate": 3.483230382773545e-07, "loss": 0.0574, "step": 15462 }, { "epoch": 2.505346727154893, "grad_norm": 1.0473026037216187, "learning_rate": 3.481004126273574e-07, "loss": 0.0623, "step": 15463 }, { "epoch": 2.50550874918989, "grad_norm": 0.7978666424751282, "learning_rate": 3.478778528202803e-07, "loss": 0.0544, "step": 15464 }, { "epoch": 2.5056707712248865, "grad_norm": 1.039564609527588, "learning_rate": 3.476553588629328e-07, "loss": 0.0655, "step": 15465 }, { "epoch": 2.5058327932598834, "grad_norm": 0.7758963108062744, "learning_rate": 3.474329307621227e-07, "loss": 0.053, "step": 15466 }, { "epoch": 2.50599481529488, "grad_norm": 0.9446272850036621, "learning_rate": 3.4721056852465575e-07, "loss": 0.0561, "step": 15467 }, { "epoch": 2.506156837329877, "grad_norm": 0.9161103963851929, "learning_rate": 3.469882721573356e-07, "loss": 0.0607, "step": 15468 }, { "epoch": 2.5063188593648738, "grad_norm": 0.8289987444877625, "learning_rate": 3.467660416669649e-07, "loss": 0.0606, "step": 15469 }, { "epoch": 2.5064808813998702, "grad_norm": 0.985321581363678, "learning_rate": 3.465438770603416e-07, "loss": 0.0652, "step": 15470 }, { "epoch": 2.506642903434867, "grad_norm": 0.8236386179924011, "learning_rate": 3.463217783442649e-07, "loss": 0.0561, "step": 15471 }, { "epoch": 2.506804925469864, "grad_norm": 1.0456392765045166, "learning_rate": 3.4609974552552993e-07, "loss": 0.0686, "step": 15472 }, { "epoch": 2.5069669475048606, "grad_norm": 1.1649935245513916, "learning_rate": 3.4587777861093105e-07, "loss": 0.0645, "step": 15473 }, { "epoch": 2.5071289695398575, "grad_norm": 0.954638659954071, "learning_rate": 3.456558776072585e-07, "loss": 0.0642, "step": 15474 }, { "epoch": 2.507290991574854, "grad_norm": 0.8497850298881531, "learning_rate": 3.4543404252130234e-07, "loss": 0.0587, "step": 15475 }, { "epoch": 2.507453013609851, "grad_norm": 0.8479697108268738, "learning_rate": 3.4521227335985146e-07, "loss": 0.0582, "step": 15476 }, { "epoch": 2.5076150356448474, "grad_norm": 0.8747241497039795, "learning_rate": 3.449905701296902e-07, "loss": 0.0555, "step": 15477 }, { "epoch": 2.5077770576798444, "grad_norm": 0.9284306764602661, "learning_rate": 3.447689328376022e-07, "loss": 0.0651, "step": 15478 }, { "epoch": 2.5079390797148413, "grad_norm": 0.9648377299308777, "learning_rate": 3.445473614903688e-07, "loss": 0.069, "step": 15479 }, { "epoch": 2.508101101749838, "grad_norm": 0.9850466251373291, "learning_rate": 3.4432585609477125e-07, "loss": 0.0611, "step": 15480 }, { "epoch": 2.5082631237848347, "grad_norm": 0.8666247725486755, "learning_rate": 3.441044166575855e-07, "loss": 0.0637, "step": 15481 }, { "epoch": 2.5084251458198317, "grad_norm": 0.8115009069442749, "learning_rate": 3.438830431855872e-07, "loss": 0.052, "step": 15482 }, { "epoch": 2.508587167854828, "grad_norm": 1.0275858640670776, "learning_rate": 3.4366173568555013e-07, "loss": 0.0657, "step": 15483 }, { "epoch": 2.508749189889825, "grad_norm": 0.9150252342224121, "learning_rate": 3.434404941642455e-07, "loss": 0.0587, "step": 15484 }, { "epoch": 2.508911211924822, "grad_norm": 1.8770604133605957, "learning_rate": 3.4321931862844327e-07, "loss": 0.0569, "step": 15485 }, { "epoch": 2.5090732339598185, "grad_norm": 1.0579850673675537, "learning_rate": 3.4299820908491045e-07, "loss": 0.0671, "step": 15486 }, { "epoch": 2.5092352559948155, "grad_norm": 0.8055444955825806, "learning_rate": 3.427771655404133e-07, "loss": 0.0595, "step": 15487 }, { "epoch": 2.509397278029812, "grad_norm": 0.8405749797821045, "learning_rate": 3.4255618800171366e-07, "loss": 0.0574, "step": 15488 }, { "epoch": 2.509559300064809, "grad_norm": 0.9373750686645508, "learning_rate": 3.423352764755733e-07, "loss": 0.0611, "step": 15489 }, { "epoch": 2.5097213220998054, "grad_norm": 0.957573652267456, "learning_rate": 3.421144309687527e-07, "loss": 0.0701, "step": 15490 }, { "epoch": 2.5098833441348023, "grad_norm": 0.9812716245651245, "learning_rate": 3.418936514880092e-07, "loss": 0.0618, "step": 15491 }, { "epoch": 2.5100453661697992, "grad_norm": 0.9846394658088684, "learning_rate": 3.4167293804009656e-07, "loss": 0.0691, "step": 15492 }, { "epoch": 2.5102073882047957, "grad_norm": 1.0045005083084106, "learning_rate": 3.414522906317691e-07, "loss": 0.0551, "step": 15493 }, { "epoch": 2.5103694102397927, "grad_norm": 0.8848669528961182, "learning_rate": 3.412317092697781e-07, "loss": 0.0637, "step": 15494 }, { "epoch": 2.5105314322747896, "grad_norm": 0.9748433232307434, "learning_rate": 3.4101119396087237e-07, "loss": 0.0644, "step": 15495 }, { "epoch": 2.510693454309786, "grad_norm": 0.8080410957336426, "learning_rate": 3.407907447117997e-07, "loss": 0.0569, "step": 15496 }, { "epoch": 2.510855476344783, "grad_norm": 0.9153392314910889, "learning_rate": 3.405703615293052e-07, "loss": 0.0655, "step": 15497 }, { "epoch": 2.5110174983797795, "grad_norm": 0.8493557572364807, "learning_rate": 3.4035004442013157e-07, "loss": 0.0581, "step": 15498 }, { "epoch": 2.5111795204147764, "grad_norm": 0.969572126865387, "learning_rate": 3.4012979339102054e-07, "loss": 0.0614, "step": 15499 }, { "epoch": 2.511341542449773, "grad_norm": 1.0106775760650635, "learning_rate": 3.399096084487108e-07, "loss": 0.0603, "step": 15500 }, { "epoch": 2.51150356448477, "grad_norm": 0.7856919765472412, "learning_rate": 3.3968948959994004e-07, "loss": 0.0533, "step": 15501 }, { "epoch": 2.511665586519767, "grad_norm": 0.8515869975090027, "learning_rate": 3.394694368514434e-07, "loss": 0.0634, "step": 15502 }, { "epoch": 2.5118276085547633, "grad_norm": 0.7295697927474976, "learning_rate": 3.3924945020995277e-07, "loss": 0.0557, "step": 15503 }, { "epoch": 2.51198963058976, "grad_norm": 0.8382651805877686, "learning_rate": 3.390295296822002e-07, "loss": 0.055, "step": 15504 }, { "epoch": 2.512151652624757, "grad_norm": 0.8761834502220154, "learning_rate": 3.388096752749154e-07, "loss": 0.0607, "step": 15505 }, { "epoch": 2.5123136746597536, "grad_norm": 0.9966699481010437, "learning_rate": 3.3858988699482397e-07, "loss": 0.0619, "step": 15506 }, { "epoch": 2.5124756966947506, "grad_norm": 0.8567604422569275, "learning_rate": 3.3837016484865146e-07, "loss": 0.0573, "step": 15507 }, { "epoch": 2.5126377187297475, "grad_norm": 1.0470939874649048, "learning_rate": 3.381505088431203e-07, "loss": 0.0621, "step": 15508 }, { "epoch": 2.512799740764744, "grad_norm": 0.8962857127189636, "learning_rate": 3.3793091898495305e-07, "loss": 0.0627, "step": 15509 }, { "epoch": 2.512961762799741, "grad_norm": 1.0544072389602661, "learning_rate": 3.377113952808669e-07, "loss": 0.0638, "step": 15510 }, { "epoch": 2.5131237848347374, "grad_norm": 0.7999473214149475, "learning_rate": 3.374919377375796e-07, "loss": 0.062, "step": 15511 }, { "epoch": 2.5132858068697344, "grad_norm": 0.9653675556182861, "learning_rate": 3.3727254636180597e-07, "loss": 0.0689, "step": 15512 }, { "epoch": 2.513447828904731, "grad_norm": 0.9286097288131714, "learning_rate": 3.370532211602587e-07, "loss": 0.0689, "step": 15513 }, { "epoch": 2.5136098509397278, "grad_norm": 0.9723500609397888, "learning_rate": 3.3683396213964826e-07, "loss": 0.0592, "step": 15514 }, { "epoch": 2.5137718729747247, "grad_norm": 0.8343885540962219, "learning_rate": 3.3661476930668404e-07, "loss": 0.0516, "step": 15515 }, { "epoch": 2.513933895009721, "grad_norm": 0.8575674295425415, "learning_rate": 3.363956426680728e-07, "loss": 0.0581, "step": 15516 }, { "epoch": 2.514095917044718, "grad_norm": 0.7917022109031677, "learning_rate": 3.3617658223051935e-07, "loss": 0.058, "step": 15517 }, { "epoch": 2.514257939079715, "grad_norm": 0.8156291246414185, "learning_rate": 3.3595758800072515e-07, "loss": 0.0549, "step": 15518 }, { "epoch": 2.5144199611147116, "grad_norm": 0.7907753586769104, "learning_rate": 3.3573865998539236e-07, "loss": 0.056, "step": 15519 }, { "epoch": 2.5145819831497085, "grad_norm": 0.9458636045455933, "learning_rate": 3.355197981912198e-07, "loss": 0.0628, "step": 15520 }, { "epoch": 2.514744005184705, "grad_norm": 0.9401422739028931, "learning_rate": 3.3530100262490287e-07, "loss": 0.0609, "step": 15521 }, { "epoch": 2.514906027219702, "grad_norm": 0.8505311608314514, "learning_rate": 3.350822732931361e-07, "loss": 0.0601, "step": 15522 }, { "epoch": 2.5150680492546984, "grad_norm": 0.9184473752975464, "learning_rate": 3.3486361020261345e-07, "loss": 0.0595, "step": 15523 }, { "epoch": 2.5152300712896953, "grad_norm": 0.870324432849884, "learning_rate": 3.3464501336002544e-07, "loss": 0.0557, "step": 15524 }, { "epoch": 2.5153920933246923, "grad_norm": 0.8141091465950012, "learning_rate": 3.344264827720592e-07, "loss": 0.0593, "step": 15525 }, { "epoch": 2.5155541153596888, "grad_norm": 0.8419532775878906, "learning_rate": 3.342080184454022e-07, "loss": 0.0568, "step": 15526 }, { "epoch": 2.5157161373946857, "grad_norm": 0.961286723613739, "learning_rate": 3.339896203867385e-07, "loss": 0.0676, "step": 15527 }, { "epoch": 2.5158781594296826, "grad_norm": 0.8865793347358704, "learning_rate": 3.337712886027511e-07, "loss": 0.0601, "step": 15528 }, { "epoch": 2.516040181464679, "grad_norm": 0.8175432682037354, "learning_rate": 3.3355302310011996e-07, "loss": 0.0587, "step": 15529 }, { "epoch": 2.516202203499676, "grad_norm": 0.8112668991088867, "learning_rate": 3.3333482388552356e-07, "loss": 0.0549, "step": 15530 }, { "epoch": 2.516364225534673, "grad_norm": 0.8561041951179504, "learning_rate": 3.3311669096563886e-07, "loss": 0.0598, "step": 15531 }, { "epoch": 2.5165262475696695, "grad_norm": 0.9206536412239075, "learning_rate": 3.3289862434713857e-07, "loss": 0.0605, "step": 15532 }, { "epoch": 2.516688269604666, "grad_norm": 1.009718418121338, "learning_rate": 3.3268062403669646e-07, "loss": 0.0722, "step": 15533 }, { "epoch": 2.516850291639663, "grad_norm": 0.945552408695221, "learning_rate": 3.3246269004098275e-07, "loss": 0.0655, "step": 15534 }, { "epoch": 2.51701231367466, "grad_norm": 0.9071747660636902, "learning_rate": 3.322448223666658e-07, "loss": 0.059, "step": 15535 }, { "epoch": 2.5171743357096563, "grad_norm": 0.9418054223060608, "learning_rate": 3.320270210204107e-07, "loss": 0.0657, "step": 15536 }, { "epoch": 2.5173363577446533, "grad_norm": 0.7941263914108276, "learning_rate": 3.31809286008882e-07, "loss": 0.0506, "step": 15537 }, { "epoch": 2.51749837977965, "grad_norm": 0.8504292964935303, "learning_rate": 3.3159161733874347e-07, "loss": 0.0592, "step": 15538 }, { "epoch": 2.5176604018146467, "grad_norm": 0.9112208485603333, "learning_rate": 3.3137401501665334e-07, "loss": 0.061, "step": 15539 }, { "epoch": 2.5178224238496436, "grad_norm": 0.8395227789878845, "learning_rate": 3.311564790492702e-07, "loss": 0.0598, "step": 15540 }, { "epoch": 2.5179844458846405, "grad_norm": 1.0928508043289185, "learning_rate": 3.3093900944325046e-07, "loss": 0.0652, "step": 15541 }, { "epoch": 2.518146467919637, "grad_norm": 0.8456630110740662, "learning_rate": 3.307216062052479e-07, "loss": 0.0569, "step": 15542 }, { "epoch": 2.518308489954634, "grad_norm": 0.9061066508293152, "learning_rate": 3.305042693419147e-07, "loss": 0.0579, "step": 15543 }, { "epoch": 2.5184705119896305, "grad_norm": 0.9693790674209595, "learning_rate": 3.3028699885990085e-07, "loss": 0.0609, "step": 15544 }, { "epoch": 2.5186325340246274, "grad_norm": 0.8443101644515991, "learning_rate": 3.300697947658543e-07, "loss": 0.0571, "step": 15545 }, { "epoch": 2.518794556059624, "grad_norm": 0.828754723072052, "learning_rate": 3.298526570664207e-07, "loss": 0.0597, "step": 15546 }, { "epoch": 2.518956578094621, "grad_norm": 0.961614727973938, "learning_rate": 3.296355857682443e-07, "loss": 0.0658, "step": 15547 }, { "epoch": 2.5191186001296177, "grad_norm": 1.3140058517456055, "learning_rate": 3.294185808779665e-07, "loss": 0.0645, "step": 15548 }, { "epoch": 2.5192806221646142, "grad_norm": 0.8643244504928589, "learning_rate": 3.292016424022276e-07, "loss": 0.0578, "step": 15549 }, { "epoch": 2.519442644199611, "grad_norm": 0.9640627503395081, "learning_rate": 3.289847703476659e-07, "loss": 0.0605, "step": 15550 }, { "epoch": 2.519604666234608, "grad_norm": 1.0605357885360718, "learning_rate": 3.287679647209152e-07, "loss": 0.0665, "step": 15551 }, { "epoch": 2.5197666882696046, "grad_norm": 0.8427574038505554, "learning_rate": 3.285512255286111e-07, "loss": 0.0604, "step": 15552 }, { "epoch": 2.5199287103046015, "grad_norm": 0.8638863563537598, "learning_rate": 3.283345527773854e-07, "loss": 0.0562, "step": 15553 }, { "epoch": 2.5200907323395985, "grad_norm": 1.155843734741211, "learning_rate": 3.2811794647386625e-07, "loss": 0.0649, "step": 15554 }, { "epoch": 2.520252754374595, "grad_norm": 0.9180019497871399, "learning_rate": 3.279014066246822e-07, "loss": 0.0602, "step": 15555 }, { "epoch": 2.5204147764095914, "grad_norm": 0.9564993977546692, "learning_rate": 3.276849332364587e-07, "loss": 0.0619, "step": 15556 }, { "epoch": 2.5205767984445884, "grad_norm": 0.8472095727920532, "learning_rate": 3.2746852631581947e-07, "loss": 0.0545, "step": 15557 }, { "epoch": 2.5207388204795853, "grad_norm": 0.9872234463691711, "learning_rate": 3.2725218586938584e-07, "loss": 0.0583, "step": 15558 }, { "epoch": 2.520900842514582, "grad_norm": 0.9279904961585999, "learning_rate": 3.270359119037772e-07, "loss": 0.0557, "step": 15559 }, { "epoch": 2.5210628645495787, "grad_norm": 0.9269344806671143, "learning_rate": 3.2681970442561134e-07, "loss": 0.06, "step": 15560 }, { "epoch": 2.5212248865845757, "grad_norm": 0.9857649803161621, "learning_rate": 3.266035634415035e-07, "loss": 0.0634, "step": 15561 }, { "epoch": 2.521386908619572, "grad_norm": 0.7839047312736511, "learning_rate": 3.2638748895806705e-07, "loss": 0.0513, "step": 15562 }, { "epoch": 2.521548930654569, "grad_norm": 0.9197927713394165, "learning_rate": 3.261714809819136e-07, "loss": 0.0632, "step": 15563 }, { "epoch": 2.521710952689566, "grad_norm": 0.8434021472930908, "learning_rate": 3.259555395196526e-07, "loss": 0.0637, "step": 15564 }, { "epoch": 2.5218729747245625, "grad_norm": 0.9010359048843384, "learning_rate": 3.2573966457789014e-07, "loss": 0.0669, "step": 15565 }, { "epoch": 2.5220349967595594, "grad_norm": 1.0379987955093384, "learning_rate": 3.255238561632326e-07, "loss": 0.0633, "step": 15566 }, { "epoch": 2.522197018794556, "grad_norm": 0.9836294651031494, "learning_rate": 3.253081142822831e-07, "loss": 0.0634, "step": 15567 }, { "epoch": 2.522359040829553, "grad_norm": 1.0170096158981323, "learning_rate": 3.250924389416432e-07, "loss": 0.062, "step": 15568 }, { "epoch": 2.5225210628645494, "grad_norm": 1.039481282234192, "learning_rate": 3.2487683014791077e-07, "loss": 0.0666, "step": 15569 }, { "epoch": 2.5226830848995463, "grad_norm": 0.9465806484222412, "learning_rate": 3.2466128790768327e-07, "loss": 0.0606, "step": 15570 }, { "epoch": 2.5228451069345432, "grad_norm": 1.00591242313385, "learning_rate": 3.2444581222755733e-07, "loss": 0.065, "step": 15571 }, { "epoch": 2.5230071289695397, "grad_norm": 0.8654181957244873, "learning_rate": 3.2423040311412384e-07, "loss": 0.0602, "step": 15572 }, { "epoch": 2.5231691510045366, "grad_norm": 0.86398845911026, "learning_rate": 3.2401506057397503e-07, "loss": 0.0563, "step": 15573 }, { "epoch": 2.5233311730395336, "grad_norm": 0.8598949313163757, "learning_rate": 3.2379978461369976e-07, "loss": 0.0625, "step": 15574 }, { "epoch": 2.52349319507453, "grad_norm": 0.9612644910812378, "learning_rate": 3.2358457523988446e-07, "loss": 0.0576, "step": 15575 }, { "epoch": 2.523655217109527, "grad_norm": 0.9757301211357117, "learning_rate": 3.233694324591144e-07, "loss": 0.0639, "step": 15576 }, { "epoch": 2.5238172391445235, "grad_norm": 1.0324627161026, "learning_rate": 3.2315435627797266e-07, "loss": 0.0676, "step": 15577 }, { "epoch": 2.5239792611795204, "grad_norm": 0.9274498224258423, "learning_rate": 3.229393467030395e-07, "loss": 0.0603, "step": 15578 }, { "epoch": 2.524141283214517, "grad_norm": 0.9407957196235657, "learning_rate": 3.2272440374089443e-07, "loss": 0.0647, "step": 15579 }, { "epoch": 2.524303305249514, "grad_norm": 0.7908738255500793, "learning_rate": 3.225095273981127e-07, "loss": 0.0517, "step": 15580 }, { "epoch": 2.524465327284511, "grad_norm": 0.8206102848052979, "learning_rate": 3.2229471768127047e-07, "loss": 0.0551, "step": 15581 }, { "epoch": 2.5246273493195073, "grad_norm": 0.9348705410957336, "learning_rate": 3.2207997459694053e-07, "loss": 0.065, "step": 15582 }, { "epoch": 2.524789371354504, "grad_norm": 0.9329501986503601, "learning_rate": 3.218652981516923e-07, "loss": 0.0654, "step": 15583 }, { "epoch": 2.524951393389501, "grad_norm": 0.928276002407074, "learning_rate": 3.2165068835209506e-07, "loss": 0.0653, "step": 15584 }, { "epoch": 2.5251134154244976, "grad_norm": 0.9858642220497131, "learning_rate": 3.214361452047149e-07, "loss": 0.0642, "step": 15585 }, { "epoch": 2.5252754374594946, "grad_norm": 1.175571322441101, "learning_rate": 3.2122166871611736e-07, "loss": 0.0692, "step": 15586 }, { "epoch": 2.5254374594944915, "grad_norm": 0.8295290470123291, "learning_rate": 3.2100725889286393e-07, "loss": 0.0581, "step": 15587 }, { "epoch": 2.525599481529488, "grad_norm": 0.8364087343215942, "learning_rate": 3.207929157415152e-07, "loss": 0.0551, "step": 15588 }, { "epoch": 2.525761503564485, "grad_norm": 0.9872838854789734, "learning_rate": 3.205786392686297e-07, "loss": 0.0602, "step": 15589 }, { "epoch": 2.5259235255994814, "grad_norm": 0.8154653310775757, "learning_rate": 3.2036442948076395e-07, "loss": 0.0598, "step": 15590 }, { "epoch": 2.5260855476344783, "grad_norm": 0.900958776473999, "learning_rate": 3.201502863844716e-07, "loss": 0.0608, "step": 15591 }, { "epoch": 2.526247569669475, "grad_norm": 0.9498975276947021, "learning_rate": 3.199362099863057e-07, "loss": 0.0633, "step": 15592 }, { "epoch": 2.5264095917044718, "grad_norm": 0.947784423828125, "learning_rate": 3.1972220029281605e-07, "loss": 0.0554, "step": 15593 }, { "epoch": 2.5265716137394687, "grad_norm": 0.9299330115318298, "learning_rate": 3.19508257310551e-07, "loss": 0.0585, "step": 15594 }, { "epoch": 2.526733635774465, "grad_norm": 0.9528511166572571, "learning_rate": 3.1929438104605636e-07, "loss": 0.06, "step": 15595 }, { "epoch": 2.526895657809462, "grad_norm": 0.8668416142463684, "learning_rate": 3.190805715058765e-07, "loss": 0.0589, "step": 15596 }, { "epoch": 2.527057679844459, "grad_norm": 0.8737120032310486, "learning_rate": 3.1886682869655403e-07, "loss": 0.0571, "step": 15597 }, { "epoch": 2.5272197018794555, "grad_norm": 0.9954075813293457, "learning_rate": 3.1865315262462783e-07, "loss": 0.0658, "step": 15598 }, { "epoch": 2.5273817239144525, "grad_norm": 0.8705222010612488, "learning_rate": 3.1843954329663583e-07, "loss": 0.0588, "step": 15599 }, { "epoch": 2.527543745949449, "grad_norm": 0.8527966737747192, "learning_rate": 3.182260007191157e-07, "loss": 0.059, "step": 15600 }, { "epoch": 2.527705767984446, "grad_norm": 1.4021953344345093, "learning_rate": 3.1801252489859933e-07, "loss": 0.0547, "step": 15601 }, { "epoch": 2.5278677900194424, "grad_norm": 0.9866907596588135, "learning_rate": 3.1779911584161963e-07, "loss": 0.0631, "step": 15602 }, { "epoch": 2.5280298120544393, "grad_norm": 0.8450327515602112, "learning_rate": 3.175857735547061e-07, "loss": 0.06, "step": 15603 }, { "epoch": 2.5281918340894363, "grad_norm": 0.7806409001350403, "learning_rate": 3.173724980443868e-07, "loss": 0.0496, "step": 15604 }, { "epoch": 2.5283538561244328, "grad_norm": 0.8109617233276367, "learning_rate": 3.171592893171868e-07, "loss": 0.0517, "step": 15605 }, { "epoch": 2.5285158781594297, "grad_norm": 0.9940459132194519, "learning_rate": 3.1694614737963036e-07, "loss": 0.0662, "step": 15606 }, { "epoch": 2.5286779001944266, "grad_norm": 0.9137442111968994, "learning_rate": 3.167330722382389e-07, "loss": 0.0588, "step": 15607 }, { "epoch": 2.528839922229423, "grad_norm": 0.7500993013381958, "learning_rate": 3.165200638995328e-07, "loss": 0.0513, "step": 15608 }, { "epoch": 2.52900194426442, "grad_norm": 0.8935854434967041, "learning_rate": 3.163071223700273e-07, "loss": 0.0569, "step": 15609 }, { "epoch": 2.529163966299417, "grad_norm": 0.8556219935417175, "learning_rate": 3.160942476562404e-07, "loss": 0.0588, "step": 15610 }, { "epoch": 2.5293259883344135, "grad_norm": 1.0256495475769043, "learning_rate": 3.158814397646842e-07, "loss": 0.0636, "step": 15611 }, { "epoch": 2.5294880103694104, "grad_norm": 0.8018587827682495, "learning_rate": 3.1566869870187115e-07, "loss": 0.0522, "step": 15612 }, { "epoch": 2.529650032404407, "grad_norm": 0.794672966003418, "learning_rate": 3.1545602447430904e-07, "loss": 0.0521, "step": 15613 }, { "epoch": 2.529812054439404, "grad_norm": 0.8398905992507935, "learning_rate": 3.1524341708850633e-07, "loss": 0.0643, "step": 15614 }, { "epoch": 2.5299740764744003, "grad_norm": 0.9044195413589478, "learning_rate": 3.150308765509688e-07, "loss": 0.0608, "step": 15615 }, { "epoch": 2.5301360985093972, "grad_norm": 0.9135866761207581, "learning_rate": 3.148184028681983e-07, "loss": 0.0634, "step": 15616 }, { "epoch": 2.530298120544394, "grad_norm": 0.8611244559288025, "learning_rate": 3.1460599604669684e-07, "loss": 0.0589, "step": 15617 }, { "epoch": 2.5304601425793907, "grad_norm": 0.9294557571411133, "learning_rate": 3.1439365609296253e-07, "loss": 0.0573, "step": 15618 }, { "epoch": 2.5306221646143876, "grad_norm": 0.9217930436134338, "learning_rate": 3.141813830134943e-07, "loss": 0.0631, "step": 15619 }, { "epoch": 2.5307841866493845, "grad_norm": 0.8782767653465271, "learning_rate": 3.1396917681478595e-07, "loss": 0.0633, "step": 15620 }, { "epoch": 2.530946208684381, "grad_norm": 0.9176865220069885, "learning_rate": 3.137570375033305e-07, "loss": 0.0532, "step": 15621 }, { "epoch": 2.531108230719378, "grad_norm": 0.9333579540252686, "learning_rate": 3.13544965085619e-07, "loss": 0.0611, "step": 15622 }, { "epoch": 2.5312702527543745, "grad_norm": 0.8954581618309021, "learning_rate": 3.133329595681406e-07, "loss": 0.0563, "step": 15623 }, { "epoch": 2.5314322747893714, "grad_norm": 0.8141576051712036, "learning_rate": 3.1312102095738205e-07, "loss": 0.0513, "step": 15624 }, { "epoch": 2.531594296824368, "grad_norm": 0.9502742886543274, "learning_rate": 3.1290914925982794e-07, "loss": 0.0651, "step": 15625 }, { "epoch": 2.531756318859365, "grad_norm": 0.951263964176178, "learning_rate": 3.12697344481962e-07, "loss": 0.0565, "step": 15626 }, { "epoch": 2.5319183408943617, "grad_norm": 0.9662578701972961, "learning_rate": 3.124856066302634e-07, "loss": 0.0623, "step": 15627 }, { "epoch": 2.5320803629293582, "grad_norm": 1.0653865337371826, "learning_rate": 3.1227393571121117e-07, "loss": 0.0634, "step": 15628 }, { "epoch": 2.532242384964355, "grad_norm": 0.9926478266716003, "learning_rate": 3.1206233173128265e-07, "loss": 0.0583, "step": 15629 }, { "epoch": 2.532404406999352, "grad_norm": 0.9750377535820007, "learning_rate": 3.1185079469695263e-07, "loss": 0.0629, "step": 15630 }, { "epoch": 2.5325664290343486, "grad_norm": 0.9622008800506592, "learning_rate": 3.116393246146926e-07, "loss": 0.0643, "step": 15631 }, { "epoch": 2.5327284510693455, "grad_norm": 1.4896754026412964, "learning_rate": 3.1142792149097297e-07, "loss": 0.0577, "step": 15632 }, { "epoch": 2.5328904731043425, "grad_norm": 1.024353265762329, "learning_rate": 3.1121658533226376e-07, "loss": 0.0706, "step": 15633 }, { "epoch": 2.533052495139339, "grad_norm": 0.9080899953842163, "learning_rate": 3.110053161450299e-07, "loss": 0.0589, "step": 15634 }, { "epoch": 2.5332145171743354, "grad_norm": 0.8674499988555908, "learning_rate": 3.1079411393573597e-07, "loss": 0.0575, "step": 15635 }, { "epoch": 2.5333765392093324, "grad_norm": 0.9596990346908569, "learning_rate": 3.105829787108444e-07, "loss": 0.0649, "step": 15636 }, { "epoch": 2.5335385612443293, "grad_norm": 0.9714479446411133, "learning_rate": 3.103719104768155e-07, "loss": 0.0638, "step": 15637 }, { "epoch": 2.533700583279326, "grad_norm": 0.8182390332221985, "learning_rate": 3.10160909240107e-07, "loss": 0.0556, "step": 15638 }, { "epoch": 2.5338626053143227, "grad_norm": 0.8322840332984924, "learning_rate": 3.0994997500717575e-07, "loss": 0.0517, "step": 15639 }, { "epoch": 2.5340246273493197, "grad_norm": 0.9775702357292175, "learning_rate": 3.0973910778447523e-07, "loss": 0.0633, "step": 15640 }, { "epoch": 2.534186649384316, "grad_norm": 0.8347146511077881, "learning_rate": 3.0952830757845833e-07, "loss": 0.0552, "step": 15641 }, { "epoch": 2.534348671419313, "grad_norm": 0.7945565581321716, "learning_rate": 3.0931757439557313e-07, "loss": 0.0533, "step": 15642 }, { "epoch": 2.53451069345431, "grad_norm": 0.970628023147583, "learning_rate": 3.091069082422696e-07, "loss": 0.0589, "step": 15643 }, { "epoch": 2.5346727154893065, "grad_norm": 0.7898392677307129, "learning_rate": 3.08896309124993e-07, "loss": 0.0513, "step": 15644 }, { "epoch": 2.5348347375243034, "grad_norm": 0.951751172542572, "learning_rate": 3.086857770501867e-07, "loss": 0.0617, "step": 15645 }, { "epoch": 2.5349967595593, "grad_norm": 0.9525232911109924, "learning_rate": 3.084753120242928e-07, "loss": 0.0635, "step": 15646 }, { "epoch": 2.535158781594297, "grad_norm": 0.8588886857032776, "learning_rate": 3.0826491405375024e-07, "loss": 0.0586, "step": 15647 }, { "epoch": 2.5353208036292934, "grad_norm": 0.9159846901893616, "learning_rate": 3.0805458314499855e-07, "loss": 0.063, "step": 15648 }, { "epoch": 2.5354828256642903, "grad_norm": 0.870168149471283, "learning_rate": 3.078443193044717e-07, "loss": 0.0574, "step": 15649 }, { "epoch": 2.535644847699287, "grad_norm": 0.8437957763671875, "learning_rate": 3.076341225386037e-07, "loss": 0.0585, "step": 15650 }, { "epoch": 2.5358068697342837, "grad_norm": 0.9445669054985046, "learning_rate": 3.074239928538261e-07, "loss": 0.0694, "step": 15651 }, { "epoch": 2.5359688917692806, "grad_norm": 0.81999272108078, "learning_rate": 3.0721393025656853e-07, "loss": 0.0524, "step": 15652 }, { "epoch": 2.5361309138042776, "grad_norm": 1.2289856672286987, "learning_rate": 3.070039347532583e-07, "loss": 0.0617, "step": 15653 }, { "epoch": 2.536292935839274, "grad_norm": 1.0222786664962769, "learning_rate": 3.0679400635032053e-07, "loss": 0.0564, "step": 15654 }, { "epoch": 2.536454957874271, "grad_norm": 0.8596547842025757, "learning_rate": 3.065841450541787e-07, "loss": 0.0636, "step": 15655 }, { "epoch": 2.536616979909268, "grad_norm": 1.1208213567733765, "learning_rate": 3.063743508712544e-07, "loss": 0.0638, "step": 15656 }, { "epoch": 2.5367790019442644, "grad_norm": 0.8171042799949646, "learning_rate": 3.06164623807966e-07, "loss": 0.058, "step": 15657 }, { "epoch": 2.536941023979261, "grad_norm": 0.8748427033424377, "learning_rate": 3.059549638707315e-07, "loss": 0.0573, "step": 15658 }, { "epoch": 2.537103046014258, "grad_norm": 1.00801420211792, "learning_rate": 3.0574537106596606e-07, "loss": 0.0647, "step": 15659 }, { "epoch": 2.537265068049255, "grad_norm": 0.9508548974990845, "learning_rate": 3.0553584540008176e-07, "loss": 0.0621, "step": 15660 }, { "epoch": 2.5374270900842513, "grad_norm": 0.862837553024292, "learning_rate": 3.053263868794895e-07, "loss": 0.0647, "step": 15661 }, { "epoch": 2.537589112119248, "grad_norm": 0.9013253450393677, "learning_rate": 3.0511699551059927e-07, "loss": 0.0671, "step": 15662 }, { "epoch": 2.537751134154245, "grad_norm": 1.0130845308303833, "learning_rate": 3.049076712998181e-07, "loss": 0.0585, "step": 15663 }, { "epoch": 2.5379131561892416, "grad_norm": 1.0517815351486206, "learning_rate": 3.0469841425354945e-07, "loss": 0.0628, "step": 15664 }, { "epoch": 2.5380751782242386, "grad_norm": 0.8955190777778625, "learning_rate": 3.044892243781969e-07, "loss": 0.0606, "step": 15665 }, { "epoch": 2.5382372002592355, "grad_norm": 0.9051300287246704, "learning_rate": 3.0428010168016107e-07, "loss": 0.0605, "step": 15666 }, { "epoch": 2.538399222294232, "grad_norm": 0.8366383910179138, "learning_rate": 3.040710461658408e-07, "loss": 0.0598, "step": 15667 }, { "epoch": 2.538561244329229, "grad_norm": 0.8393315672874451, "learning_rate": 3.0386205784163207e-07, "loss": 0.0633, "step": 15668 }, { "epoch": 2.5387232663642254, "grad_norm": 1.1416642665863037, "learning_rate": 3.0365313671393e-07, "loss": 0.0619, "step": 15669 }, { "epoch": 2.5388852883992223, "grad_norm": 0.8364261984825134, "learning_rate": 3.0344428278912765e-07, "loss": 0.0606, "step": 15670 }, { "epoch": 2.539047310434219, "grad_norm": 0.9973102807998657, "learning_rate": 3.0323549607361333e-07, "loss": 0.0586, "step": 15671 }, { "epoch": 2.5392093324692158, "grad_norm": 0.8072327971458435, "learning_rate": 3.030267765737774e-07, "loss": 0.0571, "step": 15672 }, { "epoch": 2.5393713545042127, "grad_norm": 1.0454763174057007, "learning_rate": 3.0281812429600544e-07, "loss": 0.0569, "step": 15673 }, { "epoch": 2.539533376539209, "grad_norm": 1.0612105131149292, "learning_rate": 3.026095392466824e-07, "loss": 0.0655, "step": 15674 }, { "epoch": 2.539695398574206, "grad_norm": 1.0224926471710205, "learning_rate": 3.024010214321893e-07, "loss": 0.0707, "step": 15675 }, { "epoch": 2.539857420609203, "grad_norm": 0.8485469222068787, "learning_rate": 3.021925708589066e-07, "loss": 0.0567, "step": 15676 }, { "epoch": 2.5400194426441995, "grad_norm": 1.0038886070251465, "learning_rate": 3.019841875332133e-07, "loss": 0.0614, "step": 15677 }, { "epoch": 2.5401814646791965, "grad_norm": 0.8578863739967346, "learning_rate": 3.0177587146148435e-07, "loss": 0.061, "step": 15678 }, { "epoch": 2.540343486714193, "grad_norm": 0.7758865356445312, "learning_rate": 3.0156762265009437e-07, "loss": 0.0567, "step": 15679 }, { "epoch": 2.54050550874919, "grad_norm": 1.1475505828857422, "learning_rate": 3.013594411054144e-07, "loss": 0.0682, "step": 15680 }, { "epoch": 2.5406675307841864, "grad_norm": 0.9241853952407837, "learning_rate": 3.0115132683381565e-07, "loss": 0.0679, "step": 15681 }, { "epoch": 2.5408295528191833, "grad_norm": 0.9604292511940002, "learning_rate": 3.0094327984166506e-07, "loss": 0.0564, "step": 15682 }, { "epoch": 2.5409915748541803, "grad_norm": 0.8330355882644653, "learning_rate": 3.007353001353283e-07, "loss": 0.0574, "step": 15683 }, { "epoch": 2.5411535968891767, "grad_norm": 0.9024338126182556, "learning_rate": 3.0052738772116925e-07, "loss": 0.069, "step": 15684 }, { "epoch": 2.5413156189241737, "grad_norm": 0.9222663640975952, "learning_rate": 3.003195426055497e-07, "loss": 0.0632, "step": 15685 }, { "epoch": 2.5414776409591706, "grad_norm": 0.9716166257858276, "learning_rate": 3.001117647948287e-07, "loss": 0.0651, "step": 15686 }, { "epoch": 2.541639662994167, "grad_norm": 0.904063880443573, "learning_rate": 2.9990405429536433e-07, "loss": 0.0624, "step": 15687 }, { "epoch": 2.541801685029164, "grad_norm": 0.8916031122207642, "learning_rate": 2.996964111135123e-07, "loss": 0.0565, "step": 15688 }, { "epoch": 2.541963707064161, "grad_norm": 0.8588840961456299, "learning_rate": 2.9948883525562464e-07, "loss": 0.0574, "step": 15689 }, { "epoch": 2.5421257290991575, "grad_norm": 0.8580512404441833, "learning_rate": 2.992813267280531e-07, "loss": 0.0594, "step": 15690 }, { "epoch": 2.5422877511341544, "grad_norm": 0.8236280083656311, "learning_rate": 2.9907388553714806e-07, "loss": 0.0582, "step": 15691 }, { "epoch": 2.542449773169151, "grad_norm": 0.8864600658416748, "learning_rate": 2.988665116892564e-07, "loss": 0.0582, "step": 15692 }, { "epoch": 2.542611795204148, "grad_norm": 0.934640109539032, "learning_rate": 2.986592051907222e-07, "loss": 0.066, "step": 15693 }, { "epoch": 2.5427738172391443, "grad_norm": 0.8519915342330933, "learning_rate": 2.9845196604788935e-07, "loss": 0.0627, "step": 15694 }, { "epoch": 2.5429358392741412, "grad_norm": 0.9214040637016296, "learning_rate": 2.9824479426709853e-07, "loss": 0.0572, "step": 15695 }, { "epoch": 2.543097861309138, "grad_norm": 1.0581270456314087, "learning_rate": 2.980376898546888e-07, "loss": 0.0634, "step": 15696 }, { "epoch": 2.5432598833441347, "grad_norm": 1.086905598640442, "learning_rate": 2.9783065281699747e-07, "loss": 0.0629, "step": 15697 }, { "epoch": 2.5434219053791316, "grad_norm": 1.031447172164917, "learning_rate": 2.976236831603588e-07, "loss": 0.0629, "step": 15698 }, { "epoch": 2.5435839274141285, "grad_norm": 0.9910357594490051, "learning_rate": 2.9741678089110576e-07, "loss": 0.0699, "step": 15699 }, { "epoch": 2.543745949449125, "grad_norm": 0.838286817073822, "learning_rate": 2.972099460155689e-07, "loss": 0.0555, "step": 15700 }, { "epoch": 2.543907971484122, "grad_norm": 0.939771294593811, "learning_rate": 2.970031785400773e-07, "loss": 0.0593, "step": 15701 }, { "epoch": 2.5440699935191184, "grad_norm": 1.2176207304000854, "learning_rate": 2.9679647847095735e-07, "loss": 0.0637, "step": 15702 }, { "epoch": 2.5442320155541154, "grad_norm": 0.8864500522613525, "learning_rate": 2.965898458145339e-07, "loss": 0.0614, "step": 15703 }, { "epoch": 2.544394037589112, "grad_norm": 1.0630028247833252, "learning_rate": 2.9638328057712775e-07, "loss": 0.0688, "step": 15704 }, { "epoch": 2.544556059624109, "grad_norm": 0.9076703786849976, "learning_rate": 2.9617678276506136e-07, "loss": 0.0578, "step": 15705 }, { "epoch": 2.5447180816591057, "grad_norm": 0.9207355976104736, "learning_rate": 2.9597035238465214e-07, "loss": 0.0546, "step": 15706 }, { "epoch": 2.5448801036941022, "grad_norm": 0.9597820043563843, "learning_rate": 2.9576398944221707e-07, "loss": 0.0627, "step": 15707 }, { "epoch": 2.545042125729099, "grad_norm": 0.8808280229568481, "learning_rate": 2.9555769394406934e-07, "loss": 0.0615, "step": 15708 }, { "epoch": 2.545204147764096, "grad_norm": 0.9642734527587891, "learning_rate": 2.9535146589652093e-07, "loss": 0.0668, "step": 15709 }, { "epoch": 2.5453661697990926, "grad_norm": 0.897223711013794, "learning_rate": 2.9514530530588367e-07, "loss": 0.0609, "step": 15710 }, { "epoch": 2.5455281918340895, "grad_norm": 0.9457272887229919, "learning_rate": 2.949392121784636e-07, "loss": 0.0596, "step": 15711 }, { "epoch": 2.5456902138690864, "grad_norm": 0.9758214950561523, "learning_rate": 2.947331865205677e-07, "loss": 0.0564, "step": 15712 }, { "epoch": 2.545852235904083, "grad_norm": 1.0195071697235107, "learning_rate": 2.9452722833849976e-07, "loss": 0.0663, "step": 15713 }, { "epoch": 2.54601425793908, "grad_norm": 0.8964647054672241, "learning_rate": 2.943213376385612e-07, "loss": 0.0587, "step": 15714 }, { "epoch": 2.5461762799740764, "grad_norm": 1.0028923749923706, "learning_rate": 2.9411551442705243e-07, "loss": 0.0637, "step": 15715 }, { "epoch": 2.5463383020090733, "grad_norm": 0.8939936757087708, "learning_rate": 2.9390975871027046e-07, "loss": 0.0589, "step": 15716 }, { "epoch": 2.54650032404407, "grad_norm": 0.9271273016929626, "learning_rate": 2.9370407049451126e-07, "loss": 0.0572, "step": 15717 }, { "epoch": 2.5466623460790667, "grad_norm": 0.9877331256866455, "learning_rate": 2.934984497860691e-07, "loss": 0.0683, "step": 15718 }, { "epoch": 2.5468243681140637, "grad_norm": 0.8610256910324097, "learning_rate": 2.932928965912335e-07, "loss": 0.0636, "step": 15719 }, { "epoch": 2.54698639014906, "grad_norm": 0.9936156868934631, "learning_rate": 2.9308741091629596e-07, "loss": 0.059, "step": 15720 }, { "epoch": 2.547148412184057, "grad_norm": 0.9047034382820129, "learning_rate": 2.928819927675433e-07, "loss": 0.064, "step": 15721 }, { "epoch": 2.547310434219054, "grad_norm": 0.9128992557525635, "learning_rate": 2.9267664215126e-07, "loss": 0.0647, "step": 15722 }, { "epoch": 2.5474724562540505, "grad_norm": 0.9131163358688354, "learning_rate": 2.924713590737294e-07, "loss": 0.0631, "step": 15723 }, { "epoch": 2.5476344782890474, "grad_norm": 0.8168431520462036, "learning_rate": 2.9226614354123356e-07, "loss": 0.0582, "step": 15724 }, { "epoch": 2.547796500324044, "grad_norm": 0.9571110606193542, "learning_rate": 2.9206099556005145e-07, "loss": 0.0636, "step": 15725 }, { "epoch": 2.547958522359041, "grad_norm": 0.9737008213996887, "learning_rate": 2.9185591513645947e-07, "loss": 0.0674, "step": 15726 }, { "epoch": 2.5481205443940373, "grad_norm": 0.8390480875968933, "learning_rate": 2.9165090227673306e-07, "loss": 0.0563, "step": 15727 }, { "epoch": 2.5482825664290343, "grad_norm": 1.0809296369552612, "learning_rate": 2.914459569871447e-07, "loss": 0.0703, "step": 15728 }, { "epoch": 2.548444588464031, "grad_norm": 0.7961978912353516, "learning_rate": 2.9124107927396534e-07, "loss": 0.0548, "step": 15729 }, { "epoch": 2.5486066104990277, "grad_norm": 0.9523206949234009, "learning_rate": 2.91036269143464e-07, "loss": 0.0619, "step": 15730 }, { "epoch": 2.5487686325340246, "grad_norm": 0.756062924861908, "learning_rate": 2.908315266019074e-07, "loss": 0.0545, "step": 15731 }, { "epoch": 2.5489306545690216, "grad_norm": 0.991388201713562, "learning_rate": 2.9062685165555963e-07, "loss": 0.0585, "step": 15732 }, { "epoch": 2.549092676604018, "grad_norm": 0.9171768426895142, "learning_rate": 2.904222443106838e-07, "loss": 0.0607, "step": 15733 }, { "epoch": 2.549254698639015, "grad_norm": 1.0466676950454712, "learning_rate": 2.9021770457354046e-07, "loss": 0.0611, "step": 15734 }, { "epoch": 2.549416720674012, "grad_norm": 1.0103275775909424, "learning_rate": 2.9001323245038765e-07, "loss": 0.0688, "step": 15735 }, { "epoch": 2.5495787427090084, "grad_norm": 0.8275580406188965, "learning_rate": 2.8980882794748227e-07, "loss": 0.0535, "step": 15736 }, { "epoch": 2.5497407647440054, "grad_norm": 0.885045051574707, "learning_rate": 2.896044910710777e-07, "loss": 0.0677, "step": 15737 }, { "epoch": 2.549902786779002, "grad_norm": 0.9406101703643799, "learning_rate": 2.894002218274261e-07, "loss": 0.0581, "step": 15738 }, { "epoch": 2.5500648088139988, "grad_norm": 0.9260234236717224, "learning_rate": 2.8919602022277923e-07, "loss": 0.0661, "step": 15739 }, { "epoch": 2.5502268308489953, "grad_norm": 1.0107648372650146, "learning_rate": 2.8899188626338363e-07, "loss": 0.0613, "step": 15740 }, { "epoch": 2.550388852883992, "grad_norm": 1.016796350479126, "learning_rate": 2.8878781995548584e-07, "loss": 0.0708, "step": 15741 }, { "epoch": 2.550550874918989, "grad_norm": 0.8699153661727905, "learning_rate": 2.8858382130532965e-07, "loss": 0.0604, "step": 15742 }, { "epoch": 2.5507128969539856, "grad_norm": 0.9409704804420471, "learning_rate": 2.8837989031915674e-07, "loss": 0.0604, "step": 15743 }, { "epoch": 2.5508749189889826, "grad_norm": 0.884575605392456, "learning_rate": 2.8817602700320747e-07, "loss": 0.0657, "step": 15744 }, { "epoch": 2.5510369410239795, "grad_norm": 0.9905060529708862, "learning_rate": 2.879722313637193e-07, "loss": 0.0693, "step": 15745 }, { "epoch": 2.551198963058976, "grad_norm": 0.9149320125579834, "learning_rate": 2.8776850340692777e-07, "loss": 0.0633, "step": 15746 }, { "epoch": 2.551360985093973, "grad_norm": 1.0935709476470947, "learning_rate": 2.875648431390665e-07, "loss": 0.0625, "step": 15747 }, { "epoch": 2.5515230071289694, "grad_norm": 0.9420832991600037, "learning_rate": 2.87361250566367e-07, "loss": 0.0557, "step": 15748 }, { "epoch": 2.5516850291639663, "grad_norm": 1.622456431388855, "learning_rate": 2.871577256950589e-07, "loss": 0.0592, "step": 15749 }, { "epoch": 2.551847051198963, "grad_norm": 0.8848714232444763, "learning_rate": 2.869542685313692e-07, "loss": 0.0572, "step": 15750 }, { "epoch": 2.5520090732339598, "grad_norm": 1.0072789192199707, "learning_rate": 2.8675087908152407e-07, "loss": 0.0661, "step": 15751 }, { "epoch": 2.5521710952689567, "grad_norm": 0.8592365384101868, "learning_rate": 2.865475573517451e-07, "loss": 0.0597, "step": 15752 }, { "epoch": 2.552333117303953, "grad_norm": 0.9718042612075806, "learning_rate": 2.863443033482549e-07, "loss": 0.0634, "step": 15753 }, { "epoch": 2.55249513933895, "grad_norm": 1.0915923118591309, "learning_rate": 2.8614111707727267e-07, "loss": 0.0533, "step": 15754 }, { "epoch": 2.552657161373947, "grad_norm": 0.9023035764694214, "learning_rate": 2.859379985450142e-07, "loss": 0.0615, "step": 15755 }, { "epoch": 2.5528191834089435, "grad_norm": 0.9342001676559448, "learning_rate": 2.8573494775769485e-07, "loss": 0.0639, "step": 15756 }, { "epoch": 2.5529812054439405, "grad_norm": 1.2013731002807617, "learning_rate": 2.8553196472152794e-07, "loss": 0.0641, "step": 15757 }, { "epoch": 2.5531432274789374, "grad_norm": 0.8660479784011841, "learning_rate": 2.853290494427238e-07, "loss": 0.0601, "step": 15758 }, { "epoch": 2.553305249513934, "grad_norm": 0.8665417432785034, "learning_rate": 2.851262019274917e-07, "loss": 0.066, "step": 15759 }, { "epoch": 2.5534672715489304, "grad_norm": 0.9686926007270813, "learning_rate": 2.8492342218203766e-07, "loss": 0.061, "step": 15760 }, { "epoch": 2.5536292935839273, "grad_norm": 0.931259274482727, "learning_rate": 2.847207102125665e-07, "loss": 0.0667, "step": 15761 }, { "epoch": 2.5537913156189243, "grad_norm": 0.8071960210800171, "learning_rate": 2.845180660252808e-07, "loss": 0.0659, "step": 15762 }, { "epoch": 2.5539533376539207, "grad_norm": 0.8331485986709595, "learning_rate": 2.843154896263811e-07, "loss": 0.0538, "step": 15763 }, { "epoch": 2.5541153596889177, "grad_norm": 0.8786145448684692, "learning_rate": 2.8411298102206524e-07, "loss": 0.0623, "step": 15764 }, { "epoch": 2.5542773817239146, "grad_norm": 0.9104515314102173, "learning_rate": 2.839105402185305e-07, "loss": 0.065, "step": 15765 }, { "epoch": 2.554439403758911, "grad_norm": 0.935090959072113, "learning_rate": 2.837081672219694e-07, "loss": 0.0581, "step": 15766 }, { "epoch": 2.554601425793908, "grad_norm": 0.8648780584335327, "learning_rate": 2.835058620385756e-07, "loss": 0.061, "step": 15767 }, { "epoch": 2.554763447828905, "grad_norm": 0.8187403678894043, "learning_rate": 2.833036246745385e-07, "loss": 0.0566, "step": 15768 }, { "epoch": 2.5549254698639015, "grad_norm": 0.8875253796577454, "learning_rate": 2.831014551360467e-07, "loss": 0.0608, "step": 15769 }, { "epoch": 2.5550874918988984, "grad_norm": 0.8889281153678894, "learning_rate": 2.828993534292851e-07, "loss": 0.0608, "step": 15770 }, { "epoch": 2.555249513933895, "grad_norm": 0.9261566400527954, "learning_rate": 2.8269731956043736e-07, "loss": 0.0626, "step": 15771 }, { "epoch": 2.555411535968892, "grad_norm": 0.8873212337493896, "learning_rate": 2.824953535356872e-07, "loss": 0.0524, "step": 15772 }, { "epoch": 2.5555735580038883, "grad_norm": 0.919251561164856, "learning_rate": 2.8229345536121223e-07, "loss": 0.0607, "step": 15773 }, { "epoch": 2.5557355800388852, "grad_norm": 0.8338304162025452, "learning_rate": 2.820916250431907e-07, "loss": 0.0569, "step": 15774 }, { "epoch": 2.555897602073882, "grad_norm": 0.7551842331886292, "learning_rate": 2.818898625877983e-07, "loss": 0.0512, "step": 15775 }, { "epoch": 2.5560596241088787, "grad_norm": 0.929349422454834, "learning_rate": 2.8168816800120845e-07, "loss": 0.0586, "step": 15776 }, { "epoch": 2.5562216461438756, "grad_norm": 0.8814112544059753, "learning_rate": 2.814865412895926e-07, "loss": 0.0554, "step": 15777 }, { "epoch": 2.5563836681788725, "grad_norm": 0.9311420917510986, "learning_rate": 2.812849824591196e-07, "loss": 0.0615, "step": 15778 }, { "epoch": 2.556545690213869, "grad_norm": 0.8256794810295105, "learning_rate": 2.8108349151595713e-07, "loss": 0.0577, "step": 15779 }, { "epoch": 2.556707712248866, "grad_norm": 0.8068723082542419, "learning_rate": 2.808820684662705e-07, "loss": 0.0626, "step": 15780 }, { "epoch": 2.556869734283863, "grad_norm": 0.75613933801651, "learning_rate": 2.806807133162215e-07, "loss": 0.0509, "step": 15781 }, { "epoch": 2.5570317563188594, "grad_norm": 1.0102713108062744, "learning_rate": 2.804794260719726e-07, "loss": 0.0697, "step": 15782 }, { "epoch": 2.557193778353856, "grad_norm": 0.8469998836517334, "learning_rate": 2.8027820673968256e-07, "loss": 0.0618, "step": 15783 }, { "epoch": 2.557355800388853, "grad_norm": 1.0581094026565552, "learning_rate": 2.800770553255072e-07, "loss": 0.0644, "step": 15784 }, { "epoch": 2.5575178224238497, "grad_norm": 0.8770332932472229, "learning_rate": 2.7987597183560175e-07, "loss": 0.0615, "step": 15785 }, { "epoch": 2.557679844458846, "grad_norm": 0.9421771764755249, "learning_rate": 2.796749562761186e-07, "loss": 0.0575, "step": 15786 }, { "epoch": 2.557841866493843, "grad_norm": 1.0569273233413696, "learning_rate": 2.7947400865320966e-07, "loss": 0.0666, "step": 15787 }, { "epoch": 2.55800388852884, "grad_norm": 1.0118812322616577, "learning_rate": 2.7927312897302217e-07, "loss": 0.0615, "step": 15788 }, { "epoch": 2.5581659105638366, "grad_norm": 1.168845772743225, "learning_rate": 2.790723172417026e-07, "loss": 0.065, "step": 15789 }, { "epoch": 2.5583279325988335, "grad_norm": 0.9244213700294495, "learning_rate": 2.7887157346539574e-07, "loss": 0.0632, "step": 15790 }, { "epoch": 2.5584899546338304, "grad_norm": 0.9718979001045227, "learning_rate": 2.7867089765024365e-07, "loss": 0.0678, "step": 15791 }, { "epoch": 2.558651976668827, "grad_norm": 1.047033667564392, "learning_rate": 2.7847028980238666e-07, "loss": 0.0602, "step": 15792 }, { "epoch": 2.558813998703824, "grad_norm": 1.0640023946762085, "learning_rate": 2.7826974992796266e-07, "loss": 0.0594, "step": 15793 }, { "epoch": 2.5589760207388204, "grad_norm": 0.8724460005760193, "learning_rate": 2.780692780331079e-07, "loss": 0.059, "step": 15794 }, { "epoch": 2.5591380427738173, "grad_norm": 0.7924855947494507, "learning_rate": 2.778688741239563e-07, "loss": 0.0529, "step": 15795 }, { "epoch": 2.559300064808814, "grad_norm": 0.8275933861732483, "learning_rate": 2.7766853820663963e-07, "loss": 0.0577, "step": 15796 }, { "epoch": 2.5594620868438107, "grad_norm": 1.010675072669983, "learning_rate": 2.774682702872877e-07, "loss": 0.0616, "step": 15797 }, { "epoch": 2.5596241088788076, "grad_norm": 0.9357376098632812, "learning_rate": 2.7726807037202903e-07, "loss": 0.06, "step": 15798 }, { "epoch": 2.559786130913804, "grad_norm": 0.8635546565055847, "learning_rate": 2.7706793846698776e-07, "loss": 0.0589, "step": 15799 }, { "epoch": 2.559948152948801, "grad_norm": 0.8578700423240662, "learning_rate": 2.7686787457828796e-07, "loss": 0.0534, "step": 15800 }, { "epoch": 2.560110174983798, "grad_norm": 1.073947787284851, "learning_rate": 2.7666787871205135e-07, "loss": 0.063, "step": 15801 }, { "epoch": 2.5602721970187945, "grad_norm": 1.1108258962631226, "learning_rate": 2.764679508743981e-07, "loss": 0.0571, "step": 15802 }, { "epoch": 2.5604342190537914, "grad_norm": 0.8796466588973999, "learning_rate": 2.7626809107144435e-07, "loss": 0.0547, "step": 15803 }, { "epoch": 2.560596241088788, "grad_norm": 1.0041248798370361, "learning_rate": 2.7606829930930555e-07, "loss": 0.0646, "step": 15804 }, { "epoch": 2.560758263123785, "grad_norm": 0.9209561944007874, "learning_rate": 2.7586857559409484e-07, "loss": 0.0549, "step": 15805 }, { "epoch": 2.5609202851587813, "grad_norm": 0.8668680787086487, "learning_rate": 2.7566891993192347e-07, "loss": 0.0578, "step": 15806 }, { "epoch": 2.5610823071937783, "grad_norm": 0.8909144401550293, "learning_rate": 2.754693323289004e-07, "loss": 0.0564, "step": 15807 }, { "epoch": 2.561244329228775, "grad_norm": 0.8772615790367126, "learning_rate": 2.752698127911327e-07, "loss": 0.0613, "step": 15808 }, { "epoch": 2.5614063512637717, "grad_norm": 0.8881198167800903, "learning_rate": 2.750703613247252e-07, "loss": 0.052, "step": 15809 }, { "epoch": 2.5615683732987686, "grad_norm": 0.8577731847763062, "learning_rate": 2.748709779357794e-07, "loss": 0.0573, "step": 15810 }, { "epoch": 2.5617303953337656, "grad_norm": 0.8215259313583374, "learning_rate": 2.7467166263039776e-07, "loss": 0.0586, "step": 15811 }, { "epoch": 2.561892417368762, "grad_norm": 0.8955356478691101, "learning_rate": 2.744724154146777e-07, "loss": 0.0568, "step": 15812 }, { "epoch": 2.562054439403759, "grad_norm": 0.9561681747436523, "learning_rate": 2.7427323629471677e-07, "loss": 0.0685, "step": 15813 }, { "epoch": 2.562216461438756, "grad_norm": 0.9938089847564697, "learning_rate": 2.740741252766077e-07, "loss": 0.0621, "step": 15814 }, { "epoch": 2.5623784834737524, "grad_norm": 0.9976528882980347, "learning_rate": 2.7387508236644404e-07, "loss": 0.0609, "step": 15815 }, { "epoch": 2.5625405055087493, "grad_norm": 0.8575246334075928, "learning_rate": 2.736761075703165e-07, "loss": 0.0586, "step": 15816 }, { "epoch": 2.562702527543746, "grad_norm": 0.9577282071113586, "learning_rate": 2.734772008943118e-07, "loss": 0.0626, "step": 15817 }, { "epoch": 2.5628645495787428, "grad_norm": 0.8331069946289062, "learning_rate": 2.732783623445168e-07, "loss": 0.0563, "step": 15818 }, { "epoch": 2.5630265716137393, "grad_norm": 0.8915289640426636, "learning_rate": 2.730795919270149e-07, "loss": 0.0608, "step": 15819 }, { "epoch": 2.563188593648736, "grad_norm": 0.9112197160720825, "learning_rate": 2.728808896478891e-07, "loss": 0.0571, "step": 15820 }, { "epoch": 2.563350615683733, "grad_norm": 1.0440930128097534, "learning_rate": 2.726822555132183e-07, "loss": 0.0612, "step": 15821 }, { "epoch": 2.5635126377187296, "grad_norm": 1.0141581296920776, "learning_rate": 2.7248368952908055e-07, "loss": 0.0702, "step": 15822 }, { "epoch": 2.5636746597537265, "grad_norm": 1.0160237550735474, "learning_rate": 2.722851917015512e-07, "loss": 0.0653, "step": 15823 }, { "epoch": 2.5638366817887235, "grad_norm": 0.8820620775222778, "learning_rate": 2.7208676203670406e-07, "loss": 0.0551, "step": 15824 }, { "epoch": 2.56399870382372, "grad_norm": 1.0316556692123413, "learning_rate": 2.7188840054061084e-07, "loss": 0.0625, "step": 15825 }, { "epoch": 2.564160725858717, "grad_norm": 1.0389971733093262, "learning_rate": 2.716901072193404e-07, "loss": 0.0646, "step": 15826 }, { "epoch": 2.5643227478937134, "grad_norm": 0.8673059940338135, "learning_rate": 2.7149188207896084e-07, "loss": 0.0631, "step": 15827 }, { "epoch": 2.5644847699287103, "grad_norm": 0.8147962689399719, "learning_rate": 2.71293725125536e-07, "loss": 0.0547, "step": 15828 }, { "epoch": 2.564646791963707, "grad_norm": 1.1430022716522217, "learning_rate": 2.710956363651296e-07, "loss": 0.0643, "step": 15829 }, { "epoch": 2.5648088139987038, "grad_norm": 1.0345301628112793, "learning_rate": 2.7089761580380346e-07, "loss": 0.0694, "step": 15830 }, { "epoch": 2.5649708360337007, "grad_norm": 1.059556245803833, "learning_rate": 2.7069966344761636e-07, "loss": 0.0555, "step": 15831 }, { "epoch": 2.565132858068697, "grad_norm": 0.8952658176422119, "learning_rate": 2.7050177930262406e-07, "loss": 0.062, "step": 15832 }, { "epoch": 2.565294880103694, "grad_norm": 0.9212254285812378, "learning_rate": 2.703039633748822e-07, "loss": 0.0525, "step": 15833 }, { "epoch": 2.565456902138691, "grad_norm": 0.8648931980133057, "learning_rate": 2.701062156704434e-07, "loss": 0.0598, "step": 15834 }, { "epoch": 2.5656189241736875, "grad_norm": 0.8723458647727966, "learning_rate": 2.6990853619535793e-07, "loss": 0.06, "step": 15835 }, { "epoch": 2.5657809462086845, "grad_norm": 0.8057177662849426, "learning_rate": 2.697109249556748e-07, "loss": 0.0555, "step": 15836 }, { "epoch": 2.5659429682436814, "grad_norm": 1.0397346019744873, "learning_rate": 2.6951338195744e-07, "loss": 0.0679, "step": 15837 }, { "epoch": 2.566104990278678, "grad_norm": 0.824124276638031, "learning_rate": 2.6931590720669807e-07, "loss": 0.0582, "step": 15838 }, { "epoch": 2.566267012313675, "grad_norm": 0.807733952999115, "learning_rate": 2.6911850070949124e-07, "loss": 0.0591, "step": 15839 }, { "epoch": 2.5664290343486713, "grad_norm": 1.1735118627548218, "learning_rate": 2.6892116247185964e-07, "loss": 0.0717, "step": 15840 }, { "epoch": 2.5665910563836682, "grad_norm": 0.878514289855957, "learning_rate": 2.687238924998414e-07, "loss": 0.0493, "step": 15841 }, { "epoch": 2.5667530784186647, "grad_norm": 0.9837947487831116, "learning_rate": 2.6852669079947294e-07, "loss": 0.0616, "step": 15842 }, { "epoch": 2.5669151004536617, "grad_norm": 0.9340665340423584, "learning_rate": 2.683295573767866e-07, "loss": 0.0597, "step": 15843 }, { "epoch": 2.5670771224886586, "grad_norm": 0.9732357263565063, "learning_rate": 2.681324922378159e-07, "loss": 0.058, "step": 15844 }, { "epoch": 2.567239144523655, "grad_norm": 0.8672887682914734, "learning_rate": 2.679354953885899e-07, "loss": 0.0621, "step": 15845 }, { "epoch": 2.567401166558652, "grad_norm": 0.9062552452087402, "learning_rate": 2.6773856683513677e-07, "loss": 0.0574, "step": 15846 }, { "epoch": 2.567563188593649, "grad_norm": 0.9597328305244446, "learning_rate": 2.6754170658348094e-07, "loss": 0.056, "step": 15847 }, { "epoch": 2.5677252106286454, "grad_norm": 0.8402513265609741, "learning_rate": 2.673449146396459e-07, "loss": 0.0555, "step": 15848 }, { "epoch": 2.5678872326636424, "grad_norm": 0.9047306776046753, "learning_rate": 2.671481910096546e-07, "loss": 0.0586, "step": 15849 }, { "epoch": 2.568049254698639, "grad_norm": 0.9194369912147522, "learning_rate": 2.6695153569952475e-07, "loss": 0.0625, "step": 15850 }, { "epoch": 2.568211276733636, "grad_norm": 0.9545932412147522, "learning_rate": 2.6675494871527404e-07, "loss": 0.0646, "step": 15851 }, { "epoch": 2.5683732987686323, "grad_norm": 0.9350480437278748, "learning_rate": 2.665584300629176e-07, "loss": 0.0616, "step": 15852 }, { "epoch": 2.5685353208036292, "grad_norm": 0.8764953017234802, "learning_rate": 2.663619797484684e-07, "loss": 0.0584, "step": 15853 }, { "epoch": 2.568697342838626, "grad_norm": 0.904297947883606, "learning_rate": 2.661655977779373e-07, "loss": 0.0539, "step": 15854 }, { "epoch": 2.5688593648736227, "grad_norm": 0.9557779431343079, "learning_rate": 2.65969284157333e-07, "loss": 0.0549, "step": 15855 }, { "epoch": 2.5690213869086196, "grad_norm": 0.7922280430793762, "learning_rate": 2.6577303889266244e-07, "loss": 0.0576, "step": 15856 }, { "epoch": 2.5691834089436165, "grad_norm": 1.1768953800201416, "learning_rate": 2.655768619899302e-07, "loss": 0.0574, "step": 15857 }, { "epoch": 2.569345430978613, "grad_norm": 0.9072592854499817, "learning_rate": 2.6538075345513864e-07, "loss": 0.062, "step": 15858 }, { "epoch": 2.56950745301361, "grad_norm": 0.9729146361351013, "learning_rate": 2.651847132942886e-07, "loss": 0.0647, "step": 15859 }, { "epoch": 2.569669475048607, "grad_norm": 0.8979154825210571, "learning_rate": 2.6498874151337865e-07, "loss": 0.0649, "step": 15860 }, { "epoch": 2.5698314970836034, "grad_norm": 1.0150434970855713, "learning_rate": 2.6479283811840393e-07, "loss": 0.0602, "step": 15861 }, { "epoch": 2.5699935191186, "grad_norm": 0.9745667576789856, "learning_rate": 2.6459700311535885e-07, "loss": 0.0662, "step": 15862 }, { "epoch": 2.570155541153597, "grad_norm": 0.8473979830741882, "learning_rate": 2.6440123651023634e-07, "loss": 0.0554, "step": 15863 }, { "epoch": 2.5703175631885937, "grad_norm": 1.0695520639419556, "learning_rate": 2.642055383090264e-07, "loss": 0.0609, "step": 15864 }, { "epoch": 2.57047958522359, "grad_norm": 0.8144644498825073, "learning_rate": 2.6400990851771615e-07, "loss": 0.0502, "step": 15865 }, { "epoch": 2.570641607258587, "grad_norm": 0.8602494597434998, "learning_rate": 2.638143471422916e-07, "loss": 0.0585, "step": 15866 }, { "epoch": 2.570803629293584, "grad_norm": 0.8381325006484985, "learning_rate": 2.636188541887366e-07, "loss": 0.0611, "step": 15867 }, { "epoch": 2.5709656513285806, "grad_norm": 0.8610426783561707, "learning_rate": 2.634234296630328e-07, "loss": 0.0578, "step": 15868 }, { "epoch": 2.5711276733635775, "grad_norm": 0.8060207366943359, "learning_rate": 2.632280735711595e-07, "loss": 0.0534, "step": 15869 }, { "epoch": 2.5712896953985744, "grad_norm": 1.0370417833328247, "learning_rate": 2.6303278591909426e-07, "loss": 0.061, "step": 15870 }, { "epoch": 2.571451717433571, "grad_norm": 0.9766287803649902, "learning_rate": 2.62837566712813e-07, "loss": 0.0637, "step": 15871 }, { "epoch": 2.571613739468568, "grad_norm": 1.0253368616104126, "learning_rate": 2.626424159582872e-07, "loss": 0.0585, "step": 15872 }, { "epoch": 2.5717757615035644, "grad_norm": 1.0771368741989136, "learning_rate": 2.6244733366148994e-07, "loss": 0.0607, "step": 15873 }, { "epoch": 2.5719377835385613, "grad_norm": 0.9325494170188904, "learning_rate": 2.622523198283894e-07, "loss": 0.0618, "step": 15874 }, { "epoch": 2.5720998055735578, "grad_norm": 0.887843906879425, "learning_rate": 2.6205737446495296e-07, "loss": 0.068, "step": 15875 }, { "epoch": 2.5722618276085547, "grad_norm": 0.909279465675354, "learning_rate": 2.6186249757714474e-07, "loss": 0.0615, "step": 15876 }, { "epoch": 2.5724238496435516, "grad_norm": 1.0336390733718872, "learning_rate": 2.6166768917092746e-07, "loss": 0.0653, "step": 15877 }, { "epoch": 2.572585871678548, "grad_norm": 0.9292657375335693, "learning_rate": 2.614729492522633e-07, "loss": 0.0644, "step": 15878 }, { "epoch": 2.572747893713545, "grad_norm": 0.995266854763031, "learning_rate": 2.6127827782710916e-07, "loss": 0.0614, "step": 15879 }, { "epoch": 2.572909915748542, "grad_norm": 0.8683896660804749, "learning_rate": 2.61083674901422e-07, "loss": 0.0525, "step": 15880 }, { "epoch": 2.5730719377835385, "grad_norm": 0.9227241277694702, "learning_rate": 2.6088914048115585e-07, "loss": 0.0609, "step": 15881 }, { "epoch": 2.5732339598185354, "grad_norm": 0.9650192856788635, "learning_rate": 2.6069467457226467e-07, "loss": 0.0655, "step": 15882 }, { "epoch": 2.5733959818535324, "grad_norm": 0.9700379371643066, "learning_rate": 2.6050027718069694e-07, "loss": 0.0588, "step": 15883 }, { "epoch": 2.573558003888529, "grad_norm": 0.901326596736908, "learning_rate": 2.6030594831240094e-07, "loss": 0.064, "step": 15884 }, { "epoch": 2.5737200259235253, "grad_norm": 0.947603166103363, "learning_rate": 2.601116879733231e-07, "loss": 0.0545, "step": 15885 }, { "epoch": 2.5738820479585223, "grad_norm": 1.0234147310256958, "learning_rate": 2.599174961694073e-07, "loss": 0.0712, "step": 15886 }, { "epoch": 2.574044069993519, "grad_norm": 0.9118129014968872, "learning_rate": 2.597233729065951e-07, "loss": 0.066, "step": 15887 }, { "epoch": 2.5742060920285157, "grad_norm": 0.8596914410591125, "learning_rate": 2.595293181908265e-07, "loss": 0.0624, "step": 15888 }, { "epoch": 2.5743681140635126, "grad_norm": 0.8012384176254272, "learning_rate": 2.593353320280387e-07, "loss": 0.0588, "step": 15889 }, { "epoch": 2.5745301360985096, "grad_norm": 0.8562291860580444, "learning_rate": 2.59141414424168e-07, "loss": 0.0572, "step": 15890 }, { "epoch": 2.574692158133506, "grad_norm": 1.0421948432922363, "learning_rate": 2.5894756538514644e-07, "loss": 0.0693, "step": 15891 }, { "epoch": 2.574854180168503, "grad_norm": 0.880355715751648, "learning_rate": 2.587537849169064e-07, "loss": 0.0567, "step": 15892 }, { "epoch": 2.5750162022035, "grad_norm": 0.8059505224227905, "learning_rate": 2.585600730253773e-07, "loss": 0.051, "step": 15893 }, { "epoch": 2.5751782242384964, "grad_norm": 0.8571881651878357, "learning_rate": 2.5836642971648534e-07, "loss": 0.068, "step": 15894 }, { "epoch": 2.5753402462734933, "grad_norm": 0.9805305600166321, "learning_rate": 2.5817285499615624e-07, "loss": 0.0608, "step": 15895 }, { "epoch": 2.57550226830849, "grad_norm": 0.9158777594566345, "learning_rate": 2.579793488703122e-07, "loss": 0.0623, "step": 15896 }, { "epoch": 2.5756642903434868, "grad_norm": 0.8620784878730774, "learning_rate": 2.5778591134487494e-07, "loss": 0.0629, "step": 15897 }, { "epoch": 2.5758263123784833, "grad_norm": 0.8118161559104919, "learning_rate": 2.5759254242576246e-07, "loss": 0.0536, "step": 15898 }, { "epoch": 2.57598833441348, "grad_norm": 0.9540441036224365, "learning_rate": 2.5739924211889173e-07, "loss": 0.0572, "step": 15899 }, { "epoch": 2.576150356448477, "grad_norm": 0.7988532185554504, "learning_rate": 2.572060104301771e-07, "loss": 0.0562, "step": 15900 }, { "epoch": 2.5763123784834736, "grad_norm": 0.9321579933166504, "learning_rate": 2.5701284736553146e-07, "loss": 0.0652, "step": 15901 }, { "epoch": 2.5764744005184705, "grad_norm": 0.7826856970787048, "learning_rate": 2.5681975293086443e-07, "loss": 0.055, "step": 15902 }, { "epoch": 2.5766364225534675, "grad_norm": 0.9522474408149719, "learning_rate": 2.5662672713208465e-07, "loss": 0.0601, "step": 15903 }, { "epoch": 2.576798444588464, "grad_norm": 0.9039526581764221, "learning_rate": 2.564337699750985e-07, "loss": 0.0628, "step": 15904 }, { "epoch": 2.576960466623461, "grad_norm": 0.9911723136901855, "learning_rate": 2.5624088146580903e-07, "loss": 0.07, "step": 15905 }, { "epoch": 2.5771224886584574, "grad_norm": 0.9902855753898621, "learning_rate": 2.560480616101191e-07, "loss": 0.0602, "step": 15906 }, { "epoch": 2.5772845106934543, "grad_norm": 1.157789707183838, "learning_rate": 2.55855310413928e-07, "loss": 0.0609, "step": 15907 }, { "epoch": 2.577446532728451, "grad_norm": 0.9956571459770203, "learning_rate": 2.556626278831345e-07, "loss": 0.0597, "step": 15908 }, { "epoch": 2.5776085547634477, "grad_norm": 0.8518067598342896, "learning_rate": 2.554700140236327e-07, "loss": 0.0552, "step": 15909 }, { "epoch": 2.5777705767984447, "grad_norm": 0.8812946081161499, "learning_rate": 2.552774688413165e-07, "loss": 0.0525, "step": 15910 }, { "epoch": 2.577932598833441, "grad_norm": 1.0784142017364502, "learning_rate": 2.550849923420787e-07, "loss": 0.0615, "step": 15911 }, { "epoch": 2.578094620868438, "grad_norm": 1.0830011367797852, "learning_rate": 2.5489258453180676e-07, "loss": 0.0558, "step": 15912 }, { "epoch": 2.578256642903435, "grad_norm": 0.8007397055625916, "learning_rate": 2.547002454163888e-07, "loss": 0.0524, "step": 15913 }, { "epoch": 2.5784186649384315, "grad_norm": 0.9391987323760986, "learning_rate": 2.545079750017099e-07, "loss": 0.0546, "step": 15914 }, { "epoch": 2.5785806869734285, "grad_norm": 0.9835785627365112, "learning_rate": 2.543157732936527e-07, "loss": 0.061, "step": 15915 }, { "epoch": 2.5787427090084254, "grad_norm": 0.9381945729255676, "learning_rate": 2.541236402980987e-07, "loss": 0.0547, "step": 15916 }, { "epoch": 2.578904731043422, "grad_norm": 1.088310956954956, "learning_rate": 2.5393157602092626e-07, "loss": 0.0656, "step": 15917 }, { "epoch": 2.579066753078419, "grad_norm": 0.9402004480361938, "learning_rate": 2.5373958046801207e-07, "loss": 0.0596, "step": 15918 }, { "epoch": 2.5792287751134153, "grad_norm": 0.8306174874305725, "learning_rate": 2.5354765364523164e-07, "loss": 0.0564, "step": 15919 }, { "epoch": 2.5793907971484122, "grad_norm": 0.8160369396209717, "learning_rate": 2.5335579555845563e-07, "loss": 0.0584, "step": 15920 }, { "epoch": 2.5795528191834087, "grad_norm": 1.0433648824691772, "learning_rate": 2.531640062135557e-07, "loss": 0.0592, "step": 15921 }, { "epoch": 2.5797148412184057, "grad_norm": 1.0717437267303467, "learning_rate": 2.5297228561640075e-07, "loss": 0.0689, "step": 15922 }, { "epoch": 2.5798768632534026, "grad_norm": 0.8935438394546509, "learning_rate": 2.5278063377285556e-07, "loss": 0.0655, "step": 15923 }, { "epoch": 2.580038885288399, "grad_norm": 0.8454123735427856, "learning_rate": 2.5258905068878433e-07, "loss": 0.0582, "step": 15924 }, { "epoch": 2.580200907323396, "grad_norm": 0.9273526072502136, "learning_rate": 2.523975363700501e-07, "loss": 0.0555, "step": 15925 }, { "epoch": 2.580362929358393, "grad_norm": 1.308284878730774, "learning_rate": 2.522060908225127e-07, "loss": 0.0684, "step": 15926 }, { "epoch": 2.5805249513933894, "grad_norm": 0.8910729885101318, "learning_rate": 2.520147140520288e-07, "loss": 0.0531, "step": 15927 }, { "epoch": 2.5806869734283864, "grad_norm": 0.9772580862045288, "learning_rate": 2.518234060644545e-07, "loss": 0.0598, "step": 15928 }, { "epoch": 2.580848995463383, "grad_norm": 0.845855712890625, "learning_rate": 2.5163216686564354e-07, "loss": 0.0567, "step": 15929 }, { "epoch": 2.58101101749838, "grad_norm": 0.9278295040130615, "learning_rate": 2.5144099646144724e-07, "loss": 0.0579, "step": 15930 }, { "epoch": 2.5811730395333763, "grad_norm": 1.0706762075424194, "learning_rate": 2.512498948577152e-07, "loss": 0.0644, "step": 15931 }, { "epoch": 2.5813350615683732, "grad_norm": 0.9492532014846802, "learning_rate": 2.510588620602947e-07, "loss": 0.0567, "step": 15932 }, { "epoch": 2.58149708360337, "grad_norm": 0.8904463648796082, "learning_rate": 2.5086789807503036e-07, "loss": 0.0576, "step": 15933 }, { "epoch": 2.5816591056383666, "grad_norm": 0.8406658172607422, "learning_rate": 2.506770029077657e-07, "loss": 0.0558, "step": 15934 }, { "epoch": 2.5818211276733636, "grad_norm": 0.919401228427887, "learning_rate": 2.5048617656434127e-07, "loss": 0.0607, "step": 15935 }, { "epoch": 2.5819831497083605, "grad_norm": 0.9194125533103943, "learning_rate": 2.502954190505963e-07, "loss": 0.0582, "step": 15936 }, { "epoch": 2.582145171743357, "grad_norm": 0.8466059565544128, "learning_rate": 2.5010473037236776e-07, "loss": 0.0589, "step": 15937 }, { "epoch": 2.582307193778354, "grad_norm": 0.8999043107032776, "learning_rate": 2.499141105354894e-07, "loss": 0.0631, "step": 15938 }, { "epoch": 2.582469215813351, "grad_norm": 0.8571875095367432, "learning_rate": 2.4972355954579366e-07, "loss": 0.0622, "step": 15939 }, { "epoch": 2.5826312378483474, "grad_norm": 0.9047456383705139, "learning_rate": 2.495330774091126e-07, "loss": 0.0596, "step": 15940 }, { "epoch": 2.5827932598833443, "grad_norm": 0.9654316902160645, "learning_rate": 2.493426641312724e-07, "loss": 0.0587, "step": 15941 }, { "epoch": 2.582955281918341, "grad_norm": 0.8947680592536926, "learning_rate": 2.4915231971810064e-07, "loss": 0.0586, "step": 15942 }, { "epoch": 2.5831173039533377, "grad_norm": 0.7643946409225464, "learning_rate": 2.4896204417542066e-07, "loss": 0.0537, "step": 15943 }, { "epoch": 2.583279325988334, "grad_norm": 0.9256800413131714, "learning_rate": 2.4877183750905475e-07, "loss": 0.0596, "step": 15944 }, { "epoch": 2.583441348023331, "grad_norm": 1.0002453327178955, "learning_rate": 2.4858169972482276e-07, "loss": 0.0602, "step": 15945 }, { "epoch": 2.583603370058328, "grad_norm": 0.8987006545066833, "learning_rate": 2.483916308285425e-07, "loss": 0.0584, "step": 15946 }, { "epoch": 2.5837653920933246, "grad_norm": 0.8947418332099915, "learning_rate": 2.482016308260296e-07, "loss": 0.0596, "step": 15947 }, { "epoch": 2.5839274141283215, "grad_norm": 0.8880223631858826, "learning_rate": 2.4801169972309745e-07, "loss": 0.0575, "step": 15948 }, { "epoch": 2.5840894361633184, "grad_norm": 0.8866935968399048, "learning_rate": 2.4782183752555784e-07, "loss": 0.0536, "step": 15949 }, { "epoch": 2.584251458198315, "grad_norm": 0.8493706583976746, "learning_rate": 2.4763204423921937e-07, "loss": 0.0541, "step": 15950 }, { "epoch": 2.584413480233312, "grad_norm": 0.8858264684677124, "learning_rate": 2.4744231986988996e-07, "loss": 0.0645, "step": 15951 }, { "epoch": 2.5845755022683083, "grad_norm": 1.0235286951065063, "learning_rate": 2.47252664423375e-07, "loss": 0.0737, "step": 15952 }, { "epoch": 2.5847375243033053, "grad_norm": 1.0129871368408203, "learning_rate": 2.4706307790547614e-07, "loss": 0.0563, "step": 15953 }, { "epoch": 2.5848995463383018, "grad_norm": 0.9243896007537842, "learning_rate": 2.4687356032199516e-07, "loss": 0.0617, "step": 15954 }, { "epoch": 2.5850615683732987, "grad_norm": 0.7274792790412903, "learning_rate": 2.4668411167873165e-07, "loss": 0.0506, "step": 15955 }, { "epoch": 2.5852235904082956, "grad_norm": 0.7890907526016235, "learning_rate": 2.464947319814806e-07, "loss": 0.0544, "step": 15956 }, { "epoch": 2.585385612443292, "grad_norm": 0.9411144852638245, "learning_rate": 2.4630542123603775e-07, "loss": 0.0586, "step": 15957 }, { "epoch": 2.585547634478289, "grad_norm": 0.8673883080482483, "learning_rate": 2.461161794481945e-07, "loss": 0.0541, "step": 15958 }, { "epoch": 2.585709656513286, "grad_norm": 1.1016530990600586, "learning_rate": 2.4592700662374265e-07, "loss": 0.0667, "step": 15959 }, { "epoch": 2.5858716785482825, "grad_norm": 0.9959900975227356, "learning_rate": 2.4573790276846947e-07, "loss": 0.0651, "step": 15960 }, { "epoch": 2.5860337005832794, "grad_norm": 0.895260751247406, "learning_rate": 2.4554886788816094e-07, "loss": 0.0672, "step": 15961 }, { "epoch": 2.5861957226182763, "grad_norm": 1.0384845733642578, "learning_rate": 2.453599019886016e-07, "loss": 0.059, "step": 15962 }, { "epoch": 2.586357744653273, "grad_norm": 0.8022738099098206, "learning_rate": 2.451710050755732e-07, "loss": 0.0506, "step": 15963 }, { "epoch": 2.5865197666882693, "grad_norm": 0.8844985365867615, "learning_rate": 2.449821771548552e-07, "loss": 0.0591, "step": 15964 }, { "epoch": 2.5866817887232663, "grad_norm": 0.8309991955757141, "learning_rate": 2.4479341823222564e-07, "loss": 0.0558, "step": 15965 }, { "epoch": 2.586843810758263, "grad_norm": 0.9121332168579102, "learning_rate": 2.446047283134606e-07, "loss": 0.0673, "step": 15966 }, { "epoch": 2.5870058327932597, "grad_norm": 0.8563621640205383, "learning_rate": 2.444161074043325e-07, "loss": 0.0605, "step": 15967 }, { "epoch": 2.5871678548282566, "grad_norm": 0.834702730178833, "learning_rate": 2.4422755551061246e-07, "loss": 0.0561, "step": 15968 }, { "epoch": 2.5873298768632536, "grad_norm": 0.9449539184570312, "learning_rate": 2.4403907263807064e-07, "loss": 0.0555, "step": 15969 }, { "epoch": 2.58749189889825, "grad_norm": 1.0673609972000122, "learning_rate": 2.4385065879247466e-07, "loss": 0.0612, "step": 15970 }, { "epoch": 2.587653920933247, "grad_norm": 0.8567180633544922, "learning_rate": 2.4366231397958823e-07, "loss": 0.0603, "step": 15971 }, { "epoch": 2.587815942968244, "grad_norm": 0.8513876795768738, "learning_rate": 2.4347403820517423e-07, "loss": 0.058, "step": 15972 }, { "epoch": 2.5879779650032404, "grad_norm": 0.8742343187332153, "learning_rate": 2.4328583147499503e-07, "loss": 0.0598, "step": 15973 }, { "epoch": 2.5881399870382373, "grad_norm": 1.0191162824630737, "learning_rate": 2.4309769379480764e-07, "loss": 0.0576, "step": 15974 }, { "epoch": 2.588302009073234, "grad_norm": 0.8935777544975281, "learning_rate": 2.4290962517036915e-07, "loss": 0.0568, "step": 15975 }, { "epoch": 2.5884640311082308, "grad_norm": 0.8707433342933655, "learning_rate": 2.427216256074341e-07, "loss": 0.0537, "step": 15976 }, { "epoch": 2.5886260531432272, "grad_norm": 1.0060011148452759, "learning_rate": 2.425336951117549e-07, "loss": 0.0622, "step": 15977 }, { "epoch": 2.588788075178224, "grad_norm": 0.9074399471282959, "learning_rate": 2.423458336890816e-07, "loss": 0.0621, "step": 15978 }, { "epoch": 2.588950097213221, "grad_norm": 0.931549072265625, "learning_rate": 2.421580413451624e-07, "loss": 0.0688, "step": 15979 }, { "epoch": 2.5891121192482176, "grad_norm": 0.9901089072227478, "learning_rate": 2.4197031808574327e-07, "loss": 0.0644, "step": 15980 }, { "epoch": 2.5892741412832145, "grad_norm": 0.8553241491317749, "learning_rate": 2.417826639165688e-07, "loss": 0.0572, "step": 15981 }, { "epoch": 2.5894361633182115, "grad_norm": 0.7999852895736694, "learning_rate": 2.4159507884337877e-07, "loss": 0.0553, "step": 15982 }, { "epoch": 2.589598185353208, "grad_norm": 0.7739843726158142, "learning_rate": 2.414075628719145e-07, "loss": 0.0565, "step": 15983 }, { "epoch": 2.589760207388205, "grad_norm": 0.8175353407859802, "learning_rate": 2.4122011600791334e-07, "loss": 0.0576, "step": 15984 }, { "epoch": 2.589922229423202, "grad_norm": 0.9324160218238831, "learning_rate": 2.4103273825711094e-07, "loss": 0.0601, "step": 15985 }, { "epoch": 2.5900842514581983, "grad_norm": 1.0186549425125122, "learning_rate": 2.408454296252397e-07, "loss": 0.0605, "step": 15986 }, { "epoch": 2.590246273493195, "grad_norm": 1.0774922370910645, "learning_rate": 2.406581901180305e-07, "loss": 0.0632, "step": 15987 }, { "epoch": 2.5904082955281917, "grad_norm": 1.0663115978240967, "learning_rate": 2.404710197412144e-07, "loss": 0.0681, "step": 15988 }, { "epoch": 2.5905703175631887, "grad_norm": 0.9276178479194641, "learning_rate": 2.4028391850051654e-07, "loss": 0.0622, "step": 15989 }, { "epoch": 2.590732339598185, "grad_norm": 0.9000306725502014, "learning_rate": 2.4009688640166257e-07, "loss": 0.0572, "step": 15990 }, { "epoch": 2.590894361633182, "grad_norm": 1.0842214822769165, "learning_rate": 2.399099234503749e-07, "loss": 0.0616, "step": 15991 }, { "epoch": 2.591056383668179, "grad_norm": 0.9430128335952759, "learning_rate": 2.397230296523742e-07, "loss": 0.0669, "step": 15992 }, { "epoch": 2.5912184057031755, "grad_norm": 1.052301049232483, "learning_rate": 2.39536205013379e-07, "loss": 0.07, "step": 15993 }, { "epoch": 2.5913804277381725, "grad_norm": 0.9034858345985413, "learning_rate": 2.3934944953910576e-07, "loss": 0.0519, "step": 15994 }, { "epoch": 2.5915424497731694, "grad_norm": 0.8782376050949097, "learning_rate": 2.391627632352686e-07, "loss": 0.0527, "step": 15995 }, { "epoch": 2.591704471808166, "grad_norm": 0.8798543214797974, "learning_rate": 2.3897614610757984e-07, "loss": 0.0616, "step": 15996 }, { "epoch": 2.591866493843163, "grad_norm": 0.9117831587791443, "learning_rate": 2.387895981617497e-07, "loss": 0.061, "step": 15997 }, { "epoch": 2.5920285158781593, "grad_norm": 0.8840068578720093, "learning_rate": 2.386031194034855e-07, "loss": 0.0605, "step": 15998 }, { "epoch": 2.5921905379131562, "grad_norm": 0.9171715378761292, "learning_rate": 2.3841670983849402e-07, "loss": 0.0562, "step": 15999 }, { "epoch": 2.5923525599481527, "grad_norm": 0.9331315755844116, "learning_rate": 2.3823036947247773e-07, "loss": 0.0573, "step": 16000 }, { "epoch": 2.5925145819831497, "grad_norm": 0.8709453344345093, "learning_rate": 2.3804409831113817e-07, "loss": 0.0554, "step": 16001 }, { "epoch": 2.5926766040181466, "grad_norm": 0.8639891147613525, "learning_rate": 2.3785789636017604e-07, "loss": 0.0603, "step": 16002 }, { "epoch": 2.592838626053143, "grad_norm": 0.9981393814086914, "learning_rate": 2.3767176362528843e-07, "loss": 0.0577, "step": 16003 }, { "epoch": 2.59300064808814, "grad_norm": 0.8646852374076843, "learning_rate": 2.374857001121697e-07, "loss": 0.0618, "step": 16004 }, { "epoch": 2.593162670123137, "grad_norm": 0.9664946794509888, "learning_rate": 2.3729970582651307e-07, "loss": 0.0676, "step": 16005 }, { "epoch": 2.5933246921581334, "grad_norm": 0.8121821284294128, "learning_rate": 2.371137807740101e-07, "loss": 0.0553, "step": 16006 }, { "epoch": 2.5934867141931304, "grad_norm": 1.0388915538787842, "learning_rate": 2.3692792496034928e-07, "loss": 0.0603, "step": 16007 }, { "epoch": 2.593648736228127, "grad_norm": 0.9599438905715942, "learning_rate": 2.3674213839121745e-07, "loss": 0.0629, "step": 16008 }, { "epoch": 2.593810758263124, "grad_norm": 0.8781747817993164, "learning_rate": 2.3655642107229925e-07, "loss": 0.0596, "step": 16009 }, { "epoch": 2.5939727802981203, "grad_norm": 0.9688482284545898, "learning_rate": 2.3637077300927762e-07, "loss": 0.0646, "step": 16010 }, { "epoch": 2.594134802333117, "grad_norm": 0.9513368606567383, "learning_rate": 2.3618519420783137e-07, "loss": 0.0647, "step": 16011 }, { "epoch": 2.594296824368114, "grad_norm": 1.025209903717041, "learning_rate": 2.3599968467364037e-07, "loss": 0.0603, "step": 16012 }, { "epoch": 2.5944588464031106, "grad_norm": 0.7293410897254944, "learning_rate": 2.3581424441238038e-07, "loss": 0.0476, "step": 16013 }, { "epoch": 2.5946208684381076, "grad_norm": 0.8842470645904541, "learning_rate": 2.3562887342972574e-07, "loss": 0.0616, "step": 16014 }, { "epoch": 2.5947828904731045, "grad_norm": 0.8669375777244568, "learning_rate": 2.3544357173134691e-07, "loss": 0.0557, "step": 16015 }, { "epoch": 2.594944912508101, "grad_norm": 1.0717798471450806, "learning_rate": 2.3525833932291491e-07, "loss": 0.0648, "step": 16016 }, { "epoch": 2.595106934543098, "grad_norm": 0.8745622038841248, "learning_rate": 2.35073176210098e-07, "loss": 0.063, "step": 16017 }, { "epoch": 2.595268956578095, "grad_norm": 1.0365678071975708, "learning_rate": 2.3488808239855998e-07, "loss": 0.063, "step": 16018 }, { "epoch": 2.5954309786130914, "grad_norm": 0.9520593285560608, "learning_rate": 2.3470305789396546e-07, "loss": 0.06, "step": 16019 }, { "epoch": 2.5955930006480883, "grad_norm": 0.9252042174339294, "learning_rate": 2.3451810270197494e-07, "loss": 0.062, "step": 16020 }, { "epoch": 2.595755022683085, "grad_norm": 0.9404069781303406, "learning_rate": 2.3433321682824917e-07, "loss": 0.0542, "step": 16021 }, { "epoch": 2.5959170447180817, "grad_norm": 0.8857480883598328, "learning_rate": 2.341484002784436e-07, "loss": 0.0554, "step": 16022 }, { "epoch": 2.596079066753078, "grad_norm": 0.9531388878822327, "learning_rate": 2.3396365305821372e-07, "loss": 0.0647, "step": 16023 }, { "epoch": 2.596241088788075, "grad_norm": 0.8874648809432983, "learning_rate": 2.3377897517321224e-07, "loss": 0.0634, "step": 16024 }, { "epoch": 2.596403110823072, "grad_norm": 0.8631939888000488, "learning_rate": 2.3359436662909018e-07, "loss": 0.0571, "step": 16025 }, { "epoch": 2.5965651328580686, "grad_norm": 0.7769051194190979, "learning_rate": 2.3340982743149582e-07, "loss": 0.0585, "step": 16026 }, { "epoch": 2.5967271548930655, "grad_norm": 0.8491709232330322, "learning_rate": 2.3322535758607573e-07, "loss": 0.0531, "step": 16027 }, { "epoch": 2.5968891769280624, "grad_norm": 0.8236337304115295, "learning_rate": 2.3304095709847402e-07, "loss": 0.0529, "step": 16028 }, { "epoch": 2.597051198963059, "grad_norm": 0.9366066455841064, "learning_rate": 2.3285662597433368e-07, "loss": 0.0658, "step": 16029 }, { "epoch": 2.597213220998056, "grad_norm": 1.007311463356018, "learning_rate": 2.3267236421929323e-07, "loss": 0.0583, "step": 16030 }, { "epoch": 2.5973752430330523, "grad_norm": 0.8656664490699768, "learning_rate": 2.3248817183899209e-07, "loss": 0.0572, "step": 16031 }, { "epoch": 2.5975372650680493, "grad_norm": 0.909775972366333, "learning_rate": 2.3230404883906626e-07, "loss": 0.0573, "step": 16032 }, { "epoch": 2.5976992871030458, "grad_norm": 0.9270328283309937, "learning_rate": 2.321199952251482e-07, "loss": 0.0578, "step": 16033 }, { "epoch": 2.5978613091380427, "grad_norm": 1.056470274925232, "learning_rate": 2.319360110028701e-07, "loss": 0.0651, "step": 16034 }, { "epoch": 2.5980233311730396, "grad_norm": 0.8853468894958496, "learning_rate": 2.3175209617786133e-07, "loss": 0.063, "step": 16035 }, { "epoch": 2.598185353208036, "grad_norm": 0.9667817950248718, "learning_rate": 2.3156825075574956e-07, "loss": 0.06, "step": 16036 }, { "epoch": 2.598347375243033, "grad_norm": 0.7612243890762329, "learning_rate": 2.3138447474215981e-07, "loss": 0.0556, "step": 16037 }, { "epoch": 2.59850939727803, "grad_norm": 1.081939935684204, "learning_rate": 2.312007681427153e-07, "loss": 0.0573, "step": 16038 }, { "epoch": 2.5986714193130265, "grad_norm": 0.9254032969474792, "learning_rate": 2.3101713096303658e-07, "loss": 0.0651, "step": 16039 }, { "epoch": 2.5988334413480234, "grad_norm": 0.9094540476799011, "learning_rate": 2.30833563208743e-07, "loss": 0.0645, "step": 16040 }, { "epoch": 2.5989954633830203, "grad_norm": 0.8876714706420898, "learning_rate": 2.3065006488545122e-07, "loss": 0.0662, "step": 16041 }, { "epoch": 2.599157485418017, "grad_norm": 0.9631155729293823, "learning_rate": 2.304666359987756e-07, "loss": 0.0585, "step": 16042 }, { "epoch": 2.5993195074530138, "grad_norm": 0.8992942571640015, "learning_rate": 2.302832765543292e-07, "loss": 0.0564, "step": 16043 }, { "epoch": 2.5994815294880103, "grad_norm": 0.8657216429710388, "learning_rate": 2.300999865577211e-07, "loss": 0.0597, "step": 16044 }, { "epoch": 2.599643551523007, "grad_norm": 0.8959026336669922, "learning_rate": 2.2991676601456069e-07, "loss": 0.0652, "step": 16045 }, { "epoch": 2.5998055735580037, "grad_norm": 0.8713762760162354, "learning_rate": 2.2973361493045382e-07, "loss": 0.0621, "step": 16046 }, { "epoch": 2.5999675955930006, "grad_norm": 0.9186781048774719, "learning_rate": 2.2955053331100486e-07, "loss": 0.0584, "step": 16047 }, { "epoch": 2.6001296176279975, "grad_norm": 1.0051521062850952, "learning_rate": 2.293675211618146e-07, "loss": 0.065, "step": 16048 }, { "epoch": 2.600291639662994, "grad_norm": 0.8411121964454651, "learning_rate": 2.2918457848848303e-07, "loss": 0.0585, "step": 16049 }, { "epoch": 2.600453661697991, "grad_norm": 0.9174489974975586, "learning_rate": 2.2900170529660898e-07, "loss": 0.0634, "step": 16050 }, { "epoch": 2.600615683732988, "grad_norm": 0.8582833409309387, "learning_rate": 2.288189015917866e-07, "loss": 0.0589, "step": 16051 }, { "epoch": 2.6007777057679844, "grad_norm": 0.7952532768249512, "learning_rate": 2.2863616737960976e-07, "loss": 0.0533, "step": 16052 }, { "epoch": 2.6009397278029813, "grad_norm": 0.9317299723625183, "learning_rate": 2.2845350266566952e-07, "loss": 0.0659, "step": 16053 }, { "epoch": 2.601101749837978, "grad_norm": 0.8274461627006531, "learning_rate": 2.2827090745555502e-07, "loss": 0.0573, "step": 16054 }, { "epoch": 2.6012637718729748, "grad_norm": 0.8277769088745117, "learning_rate": 2.2808838175485321e-07, "loss": 0.054, "step": 16055 }, { "epoch": 2.6014257939079712, "grad_norm": 0.8775892853736877, "learning_rate": 2.279059255691493e-07, "loss": 0.061, "step": 16056 }, { "epoch": 2.601587815942968, "grad_norm": 1.035484790802002, "learning_rate": 2.2772353890402527e-07, "loss": 0.0613, "step": 16057 }, { "epoch": 2.601749837977965, "grad_norm": 0.9841363430023193, "learning_rate": 2.2754122176506244e-07, "loss": 0.0717, "step": 16058 }, { "epoch": 2.6019118600129616, "grad_norm": 0.8622410297393799, "learning_rate": 2.2735897415783888e-07, "loss": 0.0613, "step": 16059 }, { "epoch": 2.6020738820479585, "grad_norm": 0.7690012454986572, "learning_rate": 2.271767960879312e-07, "loss": 0.0528, "step": 16060 }, { "epoch": 2.6022359040829555, "grad_norm": 0.8313643336296082, "learning_rate": 2.2699468756091385e-07, "loss": 0.0507, "step": 16061 }, { "epoch": 2.602397926117952, "grad_norm": 0.8808419704437256, "learning_rate": 2.2681264858235797e-07, "loss": 0.0611, "step": 16062 }, { "epoch": 2.602559948152949, "grad_norm": 0.8801603317260742, "learning_rate": 2.2663067915783349e-07, "loss": 0.0557, "step": 16063 }, { "epoch": 2.602721970187946, "grad_norm": 0.8919458985328674, "learning_rate": 2.2644877929290932e-07, "loss": 0.0587, "step": 16064 }, { "epoch": 2.6028839922229423, "grad_norm": 0.8611831665039062, "learning_rate": 2.26266948993151e-07, "loss": 0.0564, "step": 16065 }, { "epoch": 2.6030460142579392, "grad_norm": 0.9475187659263611, "learning_rate": 2.2608518826412128e-07, "loss": 0.0631, "step": 16066 }, { "epoch": 2.6032080362929357, "grad_norm": 0.8380742073059082, "learning_rate": 2.2590349711138214e-07, "loss": 0.0553, "step": 16067 }, { "epoch": 2.6033700583279327, "grad_norm": 0.780228316783905, "learning_rate": 2.2572187554049274e-07, "loss": 0.0566, "step": 16068 }, { "epoch": 2.603532080362929, "grad_norm": 0.9787698984146118, "learning_rate": 2.2554032355701027e-07, "loss": 0.0641, "step": 16069 }, { "epoch": 2.603694102397926, "grad_norm": 0.9883422255516052, "learning_rate": 2.2535884116648976e-07, "loss": 0.0625, "step": 16070 }, { "epoch": 2.603856124432923, "grad_norm": 1.032846212387085, "learning_rate": 2.2517742837448425e-07, "loss": 0.0589, "step": 16071 }, { "epoch": 2.6040181464679195, "grad_norm": 0.8250254988670349, "learning_rate": 2.2499608518654432e-07, "loss": 0.0569, "step": 16072 }, { "epoch": 2.6041801685029164, "grad_norm": 0.9005188345909119, "learning_rate": 2.2481481160821883e-07, "loss": 0.0606, "step": 16073 }, { "epoch": 2.6043421905379134, "grad_norm": 0.9215274453163147, "learning_rate": 2.2463360764505448e-07, "loss": 0.0597, "step": 16074 }, { "epoch": 2.60450421257291, "grad_norm": 0.7813541889190674, "learning_rate": 2.244524733025952e-07, "loss": 0.0521, "step": 16075 }, { "epoch": 2.604666234607907, "grad_norm": 1.026735782623291, "learning_rate": 2.2427140858638424e-07, "loss": 0.0677, "step": 16076 }, { "epoch": 2.6048282566429033, "grad_norm": 0.8096170425415039, "learning_rate": 2.240904135019603e-07, "loss": 0.0549, "step": 16077 }, { "epoch": 2.6049902786779002, "grad_norm": 1.0487642288208008, "learning_rate": 2.2390948805486174e-07, "loss": 0.0671, "step": 16078 }, { "epoch": 2.6051523007128967, "grad_norm": 0.9688891768455505, "learning_rate": 2.2372863225062574e-07, "loss": 0.058, "step": 16079 }, { "epoch": 2.6053143227478937, "grad_norm": 0.9506645202636719, "learning_rate": 2.2354784609478485e-07, "loss": 0.0602, "step": 16080 }, { "epoch": 2.6054763447828906, "grad_norm": 0.7836284041404724, "learning_rate": 2.2336712959287077e-07, "loss": 0.0532, "step": 16081 }, { "epoch": 2.605638366817887, "grad_norm": 0.8392066359519958, "learning_rate": 2.2318648275041267e-07, "loss": 0.0583, "step": 16082 }, { "epoch": 2.605800388852884, "grad_norm": 0.7897651195526123, "learning_rate": 2.2300590557293944e-07, "loss": 0.0497, "step": 16083 }, { "epoch": 2.605962410887881, "grad_norm": 0.9836229085922241, "learning_rate": 2.2282539806597476e-07, "loss": 0.0629, "step": 16084 }, { "epoch": 2.6061244329228774, "grad_norm": 1.0149391889572144, "learning_rate": 2.2264496023504223e-07, "loss": 0.0598, "step": 16085 }, { "epoch": 2.6062864549578744, "grad_norm": 0.824090301990509, "learning_rate": 2.22464592085663e-07, "loss": 0.055, "step": 16086 }, { "epoch": 2.6064484769928713, "grad_norm": 0.874244213104248, "learning_rate": 2.2228429362335546e-07, "loss": 0.0538, "step": 16087 }, { "epoch": 2.606610499027868, "grad_norm": 0.8717989325523376, "learning_rate": 2.2210406485363656e-07, "loss": 0.063, "step": 16088 }, { "epoch": 2.6067725210628643, "grad_norm": 1.0721566677093506, "learning_rate": 2.2192390578202105e-07, "loss": 0.0644, "step": 16089 }, { "epoch": 2.606934543097861, "grad_norm": 0.8851465582847595, "learning_rate": 2.217438164140212e-07, "loss": 0.0536, "step": 16090 }, { "epoch": 2.607096565132858, "grad_norm": 1.2500100135803223, "learning_rate": 2.2156379675514762e-07, "loss": 0.0662, "step": 16091 }, { "epoch": 2.6072585871678546, "grad_norm": 0.930827260017395, "learning_rate": 2.213838468109075e-07, "loss": 0.0613, "step": 16092 }, { "epoch": 2.6074206092028516, "grad_norm": 0.7674300074577332, "learning_rate": 2.2120396658680765e-07, "loss": 0.0532, "step": 16093 }, { "epoch": 2.6075826312378485, "grad_norm": 0.9664437174797058, "learning_rate": 2.210241560883525e-07, "loss": 0.0676, "step": 16094 }, { "epoch": 2.607744653272845, "grad_norm": 0.9895860552787781, "learning_rate": 2.2084441532104262e-07, "loss": 0.0644, "step": 16095 }, { "epoch": 2.607906675307842, "grad_norm": 0.9111254811286926, "learning_rate": 2.206647442903781e-07, "loss": 0.0614, "step": 16096 }, { "epoch": 2.608068697342839, "grad_norm": 1.104712724685669, "learning_rate": 2.204851430018562e-07, "loss": 0.069, "step": 16097 }, { "epoch": 2.6082307193778353, "grad_norm": 1.0142390727996826, "learning_rate": 2.2030561146097363e-07, "loss": 0.0548, "step": 16098 }, { "epoch": 2.6083927414128323, "grad_norm": 0.9152945876121521, "learning_rate": 2.2012614967322182e-07, "loss": 0.0608, "step": 16099 }, { "epoch": 2.6085547634478288, "grad_norm": 0.9016642570495605, "learning_rate": 2.199467576440928e-07, "loss": 0.0629, "step": 16100 }, { "epoch": 2.6087167854828257, "grad_norm": 0.7842551469802856, "learning_rate": 2.1976743537907546e-07, "loss": 0.055, "step": 16101 }, { "epoch": 2.608878807517822, "grad_norm": 0.8693322539329529, "learning_rate": 2.195881828836563e-07, "loss": 0.0594, "step": 16102 }, { "epoch": 2.609040829552819, "grad_norm": 1.128956913948059, "learning_rate": 2.194090001633206e-07, "loss": 0.0671, "step": 16103 }, { "epoch": 2.609202851587816, "grad_norm": 0.8959473371505737, "learning_rate": 2.1922988722355044e-07, "loss": 0.0583, "step": 16104 }, { "epoch": 2.6093648736228126, "grad_norm": 0.9709823131561279, "learning_rate": 2.1905084406982663e-07, "loss": 0.0631, "step": 16105 }, { "epoch": 2.6095268956578095, "grad_norm": 1.0241434574127197, "learning_rate": 2.188718707076265e-07, "loss": 0.0605, "step": 16106 }, { "epoch": 2.6096889176928064, "grad_norm": 1.0129493474960327, "learning_rate": 2.1869296714242732e-07, "loss": 0.0609, "step": 16107 }, { "epoch": 2.609850939727803, "grad_norm": 0.9752632975578308, "learning_rate": 2.185141333797025e-07, "loss": 0.0576, "step": 16108 }, { "epoch": 2.6100129617628, "grad_norm": 0.8120836019515991, "learning_rate": 2.183353694249249e-07, "loss": 0.048, "step": 16109 }, { "epoch": 2.6101749837977968, "grad_norm": 0.946182370185852, "learning_rate": 2.181566752835626e-07, "loss": 0.0621, "step": 16110 }, { "epoch": 2.6103370058327933, "grad_norm": 0.913953959941864, "learning_rate": 2.1797805096108405e-07, "loss": 0.0609, "step": 16111 }, { "epoch": 2.6104990278677898, "grad_norm": 0.9735072255134583, "learning_rate": 2.177994964629554e-07, "loss": 0.0589, "step": 16112 }, { "epoch": 2.6106610499027867, "grad_norm": 0.8608875274658203, "learning_rate": 2.1762101179463896e-07, "loss": 0.064, "step": 16113 }, { "epoch": 2.6108230719377836, "grad_norm": 0.9533604979515076, "learning_rate": 2.174425969615962e-07, "loss": 0.0635, "step": 16114 }, { "epoch": 2.61098509397278, "grad_norm": 0.88776695728302, "learning_rate": 2.1726425196928663e-07, "loss": 0.0543, "step": 16115 }, { "epoch": 2.611147116007777, "grad_norm": 1.0538303852081299, "learning_rate": 2.1708597682316645e-07, "loss": 0.0626, "step": 16116 }, { "epoch": 2.611309138042774, "grad_norm": 0.8502702713012695, "learning_rate": 2.1690777152869103e-07, "loss": 0.0554, "step": 16117 }, { "epoch": 2.6114711600777705, "grad_norm": 0.9557256102561951, "learning_rate": 2.1672963609131292e-07, "loss": 0.0582, "step": 16118 }, { "epoch": 2.6116331821127674, "grad_norm": 1.0968202352523804, "learning_rate": 2.1655157051648223e-07, "loss": 0.0612, "step": 16119 }, { "epoch": 2.6117952041477643, "grad_norm": 0.9737009406089783, "learning_rate": 2.1637357480964821e-07, "loss": 0.0624, "step": 16120 }, { "epoch": 2.611957226182761, "grad_norm": 0.9180959463119507, "learning_rate": 2.1619564897625566e-07, "loss": 0.0625, "step": 16121 }, { "epoch": 2.6121192482177578, "grad_norm": 0.9389775395393372, "learning_rate": 2.1601779302175026e-07, "loss": 0.0632, "step": 16122 }, { "epoch": 2.6122812702527543, "grad_norm": 0.9112357497215271, "learning_rate": 2.158400069515734e-07, "loss": 0.0631, "step": 16123 }, { "epoch": 2.612443292287751, "grad_norm": 0.9230049848556519, "learning_rate": 2.1566229077116445e-07, "loss": 0.058, "step": 16124 }, { "epoch": 2.6126053143227477, "grad_norm": 0.9439942240715027, "learning_rate": 2.1548464448596123e-07, "loss": 0.0643, "step": 16125 }, { "epoch": 2.6127673363577446, "grad_norm": 0.8843783140182495, "learning_rate": 2.1530706810139913e-07, "loss": 0.064, "step": 16126 }, { "epoch": 2.6129293583927415, "grad_norm": 0.980243980884552, "learning_rate": 2.1512956162291294e-07, "loss": 0.0633, "step": 16127 }, { "epoch": 2.613091380427738, "grad_norm": 0.9535460472106934, "learning_rate": 2.1495212505593221e-07, "loss": 0.0688, "step": 16128 }, { "epoch": 2.613253402462735, "grad_norm": 0.9649513959884644, "learning_rate": 2.147747584058868e-07, "loss": 0.0587, "step": 16129 }, { "epoch": 2.613415424497732, "grad_norm": 0.9310510158538818, "learning_rate": 2.1459746167820372e-07, "loss": 0.0603, "step": 16130 }, { "epoch": 2.6135774465327284, "grad_norm": 1.098154902458191, "learning_rate": 2.1442023487830782e-07, "loss": 0.0632, "step": 16131 }, { "epoch": 2.6137394685677253, "grad_norm": 0.9631187915802002, "learning_rate": 2.142430780116214e-07, "loss": 0.0624, "step": 16132 }, { "epoch": 2.613901490602722, "grad_norm": 1.261637568473816, "learning_rate": 2.1406599108356573e-07, "loss": 0.067, "step": 16133 }, { "epoch": 2.6140635126377187, "grad_norm": 0.8249843120574951, "learning_rate": 2.1388897409955867e-07, "loss": 0.0528, "step": 16134 }, { "epoch": 2.6142255346727152, "grad_norm": 0.8405267596244812, "learning_rate": 2.1371202706501697e-07, "loss": 0.0559, "step": 16135 }, { "epoch": 2.614387556707712, "grad_norm": 0.8323154449462891, "learning_rate": 2.1353514998535414e-07, "loss": 0.0581, "step": 16136 }, { "epoch": 2.614549578742709, "grad_norm": 0.9908446669578552, "learning_rate": 2.13358342865983e-07, "loss": 0.0634, "step": 16137 }, { "epoch": 2.6147116007777056, "grad_norm": 0.9334249496459961, "learning_rate": 2.1318160571231316e-07, "loss": 0.0594, "step": 16138 }, { "epoch": 2.6148736228127025, "grad_norm": 0.9564669728279114, "learning_rate": 2.1300493852975167e-07, "loss": 0.0613, "step": 16139 }, { "epoch": 2.6150356448476995, "grad_norm": 1.0061742067337036, "learning_rate": 2.128283413237045e-07, "loss": 0.0643, "step": 16140 }, { "epoch": 2.615197666882696, "grad_norm": 0.8926098942756653, "learning_rate": 2.1265181409957537e-07, "loss": 0.0605, "step": 16141 }, { "epoch": 2.615359688917693, "grad_norm": 0.8778125047683716, "learning_rate": 2.1247535686276632e-07, "loss": 0.0568, "step": 16142 }, { "epoch": 2.61552171095269, "grad_norm": 0.8341763615608215, "learning_rate": 2.1229896961867475e-07, "loss": 0.0596, "step": 16143 }, { "epoch": 2.6156837329876863, "grad_norm": 1.0330524444580078, "learning_rate": 2.121226523726988e-07, "loss": 0.07, "step": 16144 }, { "epoch": 2.6158457550226832, "grad_norm": 0.8376036286354065, "learning_rate": 2.1194640513023306e-07, "loss": 0.0545, "step": 16145 }, { "epoch": 2.6160077770576797, "grad_norm": 0.8829439282417297, "learning_rate": 2.1177022789667045e-07, "loss": 0.059, "step": 16146 }, { "epoch": 2.6161697990926767, "grad_norm": 0.9374154806137085, "learning_rate": 2.1159412067740136e-07, "loss": 0.0578, "step": 16147 }, { "epoch": 2.616331821127673, "grad_norm": 0.9516973495483398, "learning_rate": 2.1141808347781428e-07, "loss": 0.0641, "step": 16148 }, { "epoch": 2.61649384316267, "grad_norm": 0.885984480381012, "learning_rate": 2.1124211630329571e-07, "loss": 0.057, "step": 16149 }, { "epoch": 2.616655865197667, "grad_norm": 0.9362042546272278, "learning_rate": 2.110662191592297e-07, "loss": 0.0587, "step": 16150 }, { "epoch": 2.6168178872326635, "grad_norm": 1.1420210599899292, "learning_rate": 2.1089039205099832e-07, "loss": 0.0599, "step": 16151 }, { "epoch": 2.6169799092676604, "grad_norm": 0.8978714346885681, "learning_rate": 2.1071463498398114e-07, "loss": 0.0628, "step": 16152 }, { "epoch": 2.6171419313026574, "grad_norm": 0.8524075746536255, "learning_rate": 2.1053894796355694e-07, "loss": 0.0582, "step": 16153 }, { "epoch": 2.617303953337654, "grad_norm": 0.8688123822212219, "learning_rate": 2.103633309950995e-07, "loss": 0.0616, "step": 16154 }, { "epoch": 2.617465975372651, "grad_norm": 0.8266528844833374, "learning_rate": 2.101877840839836e-07, "loss": 0.0588, "step": 16155 }, { "epoch": 2.6176279974076473, "grad_norm": 0.9983370900154114, "learning_rate": 2.1001230723558087e-07, "loss": 0.0625, "step": 16156 }, { "epoch": 2.6177900194426442, "grad_norm": 0.835728645324707, "learning_rate": 2.0983690045525944e-07, "loss": 0.0593, "step": 16157 }, { "epoch": 2.6179520414776407, "grad_norm": 0.8325420618057251, "learning_rate": 2.0966156374838677e-07, "loss": 0.0582, "step": 16158 }, { "epoch": 2.6181140635126376, "grad_norm": 0.8607774376869202, "learning_rate": 2.0948629712032738e-07, "loss": 0.0552, "step": 16159 }, { "epoch": 2.6182760855476346, "grad_norm": 1.0366528034210205, "learning_rate": 2.0931110057644505e-07, "loss": 0.0654, "step": 16160 }, { "epoch": 2.618438107582631, "grad_norm": 1.09884512424469, "learning_rate": 2.0913597412209941e-07, "loss": 0.0653, "step": 16161 }, { "epoch": 2.618600129617628, "grad_norm": 1.109409213066101, "learning_rate": 2.089609177626492e-07, "loss": 0.0675, "step": 16162 }, { "epoch": 2.618762151652625, "grad_norm": 0.9305920004844666, "learning_rate": 2.0878593150345043e-07, "loss": 0.057, "step": 16163 }, { "epoch": 2.6189241736876214, "grad_norm": 0.9153008460998535, "learning_rate": 2.0861101534985774e-07, "loss": 0.0565, "step": 16164 }, { "epoch": 2.6190861957226184, "grad_norm": 0.9763879179954529, "learning_rate": 2.0843616930722288e-07, "loss": 0.0676, "step": 16165 }, { "epoch": 2.6192482177576153, "grad_norm": 0.9485346078872681, "learning_rate": 2.082613933808958e-07, "loss": 0.0615, "step": 16166 }, { "epoch": 2.619410239792612, "grad_norm": 0.7745640277862549, "learning_rate": 2.0808668757622413e-07, "loss": 0.0542, "step": 16167 }, { "epoch": 2.6195722618276087, "grad_norm": 0.9337866902351379, "learning_rate": 2.079120518985539e-07, "loss": 0.0631, "step": 16168 }, { "epoch": 2.619734283862605, "grad_norm": 0.7666480541229248, "learning_rate": 2.077374863532275e-07, "loss": 0.0533, "step": 16169 }, { "epoch": 2.619896305897602, "grad_norm": 0.8683062791824341, "learning_rate": 2.07562990945587e-07, "loss": 0.0573, "step": 16170 }, { "epoch": 2.6200583279325986, "grad_norm": 0.9523324370384216, "learning_rate": 2.073885656809718e-07, "loss": 0.061, "step": 16171 }, { "epoch": 2.6202203499675956, "grad_norm": 0.855353832244873, "learning_rate": 2.0721421056471818e-07, "loss": 0.0556, "step": 16172 }, { "epoch": 2.6203823720025925, "grad_norm": 0.924647331237793, "learning_rate": 2.0703992560216075e-07, "loss": 0.0608, "step": 16173 }, { "epoch": 2.620544394037589, "grad_norm": 0.9063007831573486, "learning_rate": 2.0686571079863383e-07, "loss": 0.0636, "step": 16174 }, { "epoch": 2.620706416072586, "grad_norm": 0.9852257370948792, "learning_rate": 2.0669156615946623e-07, "loss": 0.0637, "step": 16175 }, { "epoch": 2.620868438107583, "grad_norm": 0.8728942275047302, "learning_rate": 2.0651749168998703e-07, "loss": 0.0603, "step": 16176 }, { "epoch": 2.6210304601425793, "grad_norm": 0.9853529334068298, "learning_rate": 2.0634348739552251e-07, "loss": 0.065, "step": 16177 }, { "epoch": 2.6211924821775763, "grad_norm": 0.9242932200431824, "learning_rate": 2.0616955328139675e-07, "loss": 0.0658, "step": 16178 }, { "epoch": 2.6213545042125728, "grad_norm": 0.8449025750160217, "learning_rate": 2.059956893529319e-07, "loss": 0.0517, "step": 16179 }, { "epoch": 2.6215165262475697, "grad_norm": 0.9864807724952698, "learning_rate": 2.058218956154473e-07, "loss": 0.063, "step": 16180 }, { "epoch": 2.621678548282566, "grad_norm": 1.0352917909622192, "learning_rate": 2.0564817207426092e-07, "loss": 0.0568, "step": 16181 }, { "epoch": 2.621840570317563, "grad_norm": 1.0253640413284302, "learning_rate": 2.0547451873468877e-07, "loss": 0.0694, "step": 16182 }, { "epoch": 2.62200259235256, "grad_norm": 0.8204119205474854, "learning_rate": 2.0530093560204272e-07, "loss": 0.0532, "step": 16183 }, { "epoch": 2.6221646143875565, "grad_norm": 0.9108116030693054, "learning_rate": 2.051274226816355e-07, "loss": 0.0631, "step": 16184 }, { "epoch": 2.6223266364225535, "grad_norm": 0.8796073198318481, "learning_rate": 2.0495397997877558e-07, "loss": 0.0594, "step": 16185 }, { "epoch": 2.6224886584575504, "grad_norm": 0.931476891040802, "learning_rate": 2.0478060749877044e-07, "loss": 0.0647, "step": 16186 }, { "epoch": 2.622650680492547, "grad_norm": 0.9860643148422241, "learning_rate": 2.0460730524692384e-07, "loss": 0.061, "step": 16187 }, { "epoch": 2.622812702527544, "grad_norm": 0.9249363541603088, "learning_rate": 2.0443407322853882e-07, "loss": 0.058, "step": 16188 }, { "epoch": 2.6229747245625408, "grad_norm": 0.9313977956771851, "learning_rate": 2.0426091144891664e-07, "loss": 0.0585, "step": 16189 }, { "epoch": 2.6231367465975373, "grad_norm": 0.8045008182525635, "learning_rate": 2.0408781991335446e-07, "loss": 0.0542, "step": 16190 }, { "epoch": 2.6232987686325338, "grad_norm": 0.8684973120689392, "learning_rate": 2.039147986271492e-07, "loss": 0.0604, "step": 16191 }, { "epoch": 2.6234607906675307, "grad_norm": 0.8771528005599976, "learning_rate": 2.0374184759559463e-07, "loss": 0.053, "step": 16192 }, { "epoch": 2.6236228127025276, "grad_norm": 0.9225574731826782, "learning_rate": 2.0356896682398264e-07, "loss": 0.0605, "step": 16193 }, { "epoch": 2.623784834737524, "grad_norm": 0.8193483948707581, "learning_rate": 2.033961563176029e-07, "loss": 0.0596, "step": 16194 }, { "epoch": 2.623946856772521, "grad_norm": 1.0594878196716309, "learning_rate": 2.0322341608174338e-07, "loss": 0.0619, "step": 16195 }, { "epoch": 2.624108878807518, "grad_norm": 0.7767667770385742, "learning_rate": 2.0305074612168906e-07, "loss": 0.0532, "step": 16196 }, { "epoch": 2.6242709008425145, "grad_norm": 0.8942044377326965, "learning_rate": 2.0287814644272347e-07, "loss": 0.0574, "step": 16197 }, { "epoch": 2.6244329228775114, "grad_norm": 0.8722692728042603, "learning_rate": 2.0270561705012765e-07, "loss": 0.0565, "step": 16198 }, { "epoch": 2.6245949449125083, "grad_norm": 0.9796530604362488, "learning_rate": 2.0253315794918043e-07, "loss": 0.0698, "step": 16199 }, { "epoch": 2.624756966947505, "grad_norm": 0.9763656854629517, "learning_rate": 2.0236076914515956e-07, "loss": 0.0721, "step": 16200 }, { "epoch": 2.6249189889825018, "grad_norm": 1.099333643913269, "learning_rate": 2.021884506433383e-07, "loss": 0.0629, "step": 16201 }, { "epoch": 2.6250810110174982, "grad_norm": 0.8187487721443176, "learning_rate": 2.020162024489894e-07, "loss": 0.059, "step": 16202 }, { "epoch": 2.625243033052495, "grad_norm": 0.874085009098053, "learning_rate": 2.0184402456738444e-07, "loss": 0.0601, "step": 16203 }, { "epoch": 2.6254050550874917, "grad_norm": 0.9488238096237183, "learning_rate": 2.0167191700379092e-07, "loss": 0.0593, "step": 16204 }, { "epoch": 2.6255670771224886, "grad_norm": 1.0066373348236084, "learning_rate": 2.0149987976347485e-07, "loss": 0.0596, "step": 16205 }, { "epoch": 2.6257290991574855, "grad_norm": 0.8716149926185608, "learning_rate": 2.0132791285169985e-07, "loss": 0.0615, "step": 16206 }, { "epoch": 2.625891121192482, "grad_norm": 0.8857298493385315, "learning_rate": 2.0115601627372832e-07, "loss": 0.0551, "step": 16207 }, { "epoch": 2.626053143227479, "grad_norm": 0.8469251394271851, "learning_rate": 2.0098419003481946e-07, "loss": 0.0573, "step": 16208 }, { "epoch": 2.626215165262476, "grad_norm": 0.8539047837257385, "learning_rate": 2.0081243414023067e-07, "loss": 0.0561, "step": 16209 }, { "epoch": 2.6263771872974724, "grad_norm": 0.9203428626060486, "learning_rate": 2.0064074859521777e-07, "loss": 0.0624, "step": 16210 }, { "epoch": 2.6265392093324693, "grad_norm": 0.9050666093826294, "learning_rate": 2.004691334050335e-07, "loss": 0.0579, "step": 16211 }, { "epoch": 2.6267012313674662, "grad_norm": 0.9167254567146301, "learning_rate": 2.0029758857492893e-07, "loss": 0.0663, "step": 16212 }, { "epoch": 2.6268632534024627, "grad_norm": 0.9239572882652283, "learning_rate": 2.001261141101532e-07, "loss": 0.0646, "step": 16213 }, { "epoch": 2.6270252754374592, "grad_norm": 0.7515432238578796, "learning_rate": 1.9995471001595267e-07, "loss": 0.0568, "step": 16214 }, { "epoch": 2.627187297472456, "grad_norm": 0.834296703338623, "learning_rate": 1.9978337629757233e-07, "loss": 0.0564, "step": 16215 }, { "epoch": 2.627349319507453, "grad_norm": 0.78092360496521, "learning_rate": 1.9961211296025352e-07, "loss": 0.0565, "step": 16216 }, { "epoch": 2.6275113415424496, "grad_norm": 0.9915261268615723, "learning_rate": 1.994409200092376e-07, "loss": 0.0632, "step": 16217 }, { "epoch": 2.6276733635774465, "grad_norm": 1.0370954275131226, "learning_rate": 1.992697974497629e-07, "loss": 0.066, "step": 16218 }, { "epoch": 2.6278353856124435, "grad_norm": 0.905182957649231, "learning_rate": 1.9909874528706407e-07, "loss": 0.0663, "step": 16219 }, { "epoch": 2.62799740764744, "grad_norm": 0.8637315034866333, "learning_rate": 1.989277635263756e-07, "loss": 0.0556, "step": 16220 }, { "epoch": 2.628159429682437, "grad_norm": 0.8456125855445862, "learning_rate": 1.9875685217292856e-07, "loss": 0.0563, "step": 16221 }, { "epoch": 2.628321451717434, "grad_norm": 0.8584468960762024, "learning_rate": 1.9858601123195403e-07, "loss": 0.0578, "step": 16222 }, { "epoch": 2.6284834737524303, "grad_norm": 0.9558644890785217, "learning_rate": 1.9841524070867784e-07, "loss": 0.0656, "step": 16223 }, { "epoch": 2.6286454957874272, "grad_norm": 1.1154943704605103, "learning_rate": 1.9824454060832526e-07, "loss": 0.0623, "step": 16224 }, { "epoch": 2.6288075178224237, "grad_norm": 0.9311351776123047, "learning_rate": 1.980739109361199e-07, "loss": 0.0581, "step": 16225 }, { "epoch": 2.6289695398574207, "grad_norm": 1.0172123908996582, "learning_rate": 1.9790335169728197e-07, "loss": 0.0644, "step": 16226 }, { "epoch": 2.629131561892417, "grad_norm": 1.0837866067886353, "learning_rate": 1.977328628970307e-07, "loss": 0.0607, "step": 16227 }, { "epoch": 2.629293583927414, "grad_norm": 0.8728600144386292, "learning_rate": 1.9756244454058244e-07, "loss": 0.06, "step": 16228 }, { "epoch": 2.629455605962411, "grad_norm": 0.9363974332809448, "learning_rate": 1.9739209663315162e-07, "loss": 0.0597, "step": 16229 }, { "epoch": 2.6296176279974075, "grad_norm": 0.9871296882629395, "learning_rate": 1.9722181917995103e-07, "loss": 0.0646, "step": 16230 }, { "epoch": 2.6297796500324044, "grad_norm": 0.8740096092224121, "learning_rate": 1.9705161218618902e-07, "loss": 0.0625, "step": 16231 }, { "epoch": 2.6299416720674014, "grad_norm": 1.0138083696365356, "learning_rate": 1.9688147565707528e-07, "loss": 0.0668, "step": 16232 }, { "epoch": 2.630103694102398, "grad_norm": 0.7797446846961975, "learning_rate": 1.967114095978151e-07, "loss": 0.0515, "step": 16233 }, { "epoch": 2.630265716137395, "grad_norm": 0.9009860754013062, "learning_rate": 1.9654141401361183e-07, "loss": 0.0583, "step": 16234 }, { "epoch": 2.6304277381723913, "grad_norm": 0.9890360236167908, "learning_rate": 1.9637148890966685e-07, "loss": 0.0623, "step": 16235 }, { "epoch": 2.630589760207388, "grad_norm": 0.7815765738487244, "learning_rate": 1.9620163429117906e-07, "loss": 0.0524, "step": 16236 }, { "epoch": 2.6307517822423847, "grad_norm": 0.9130102396011353, "learning_rate": 1.9603185016334737e-07, "loss": 0.0572, "step": 16237 }, { "epoch": 2.6309138042773816, "grad_norm": 0.9095372557640076, "learning_rate": 1.958621365313648e-07, "loss": 0.0609, "step": 16238 }, { "epoch": 2.6310758263123786, "grad_norm": 0.7958774566650391, "learning_rate": 1.9569249340042534e-07, "loss": 0.0591, "step": 16239 }, { "epoch": 2.631237848347375, "grad_norm": 0.8964948654174805, "learning_rate": 1.9552292077571894e-07, "loss": 0.0678, "step": 16240 }, { "epoch": 2.631399870382372, "grad_norm": 0.9855856895446777, "learning_rate": 1.953534186624345e-07, "loss": 0.065, "step": 16241 }, { "epoch": 2.631561892417369, "grad_norm": 1.1027717590332031, "learning_rate": 1.9518398706575846e-07, "loss": 0.064, "step": 16242 }, { "epoch": 2.6317239144523654, "grad_norm": 0.8115701079368591, "learning_rate": 1.9501462599087472e-07, "loss": 0.0496, "step": 16243 }, { "epoch": 2.6318859364873624, "grad_norm": 0.9368529915809631, "learning_rate": 1.948453354429661e-07, "loss": 0.0613, "step": 16244 }, { "epoch": 2.6320479585223593, "grad_norm": 0.8521546721458435, "learning_rate": 1.946761154272106e-07, "loss": 0.059, "step": 16245 }, { "epoch": 2.6322099805573558, "grad_norm": 0.9154667258262634, "learning_rate": 1.9450696594878804e-07, "loss": 0.0632, "step": 16246 }, { "epoch": 2.6323720025923527, "grad_norm": 0.8996468782424927, "learning_rate": 1.9433788701287288e-07, "loss": 0.0639, "step": 16247 }, { "epoch": 2.632534024627349, "grad_norm": 0.8377342820167542, "learning_rate": 1.941688786246393e-07, "loss": 0.0593, "step": 16248 }, { "epoch": 2.632696046662346, "grad_norm": 0.7824660539627075, "learning_rate": 1.939999407892576e-07, "loss": 0.052, "step": 16249 }, { "epoch": 2.6328580686973426, "grad_norm": 0.8129716515541077, "learning_rate": 1.9383107351189672e-07, "loss": 0.0569, "step": 16250 }, { "epoch": 2.6330200907323396, "grad_norm": 0.8622326254844666, "learning_rate": 1.936622767977253e-07, "loss": 0.0588, "step": 16251 }, { "epoch": 2.6331821127673365, "grad_norm": 0.9913507103919983, "learning_rate": 1.9349355065190618e-07, "loss": 0.0662, "step": 16252 }, { "epoch": 2.633344134802333, "grad_norm": 0.8238366842269897, "learning_rate": 1.9332489507960324e-07, "loss": 0.0566, "step": 16253 }, { "epoch": 2.63350615683733, "grad_norm": 0.9328113794326782, "learning_rate": 1.9315631008597596e-07, "loss": 0.0648, "step": 16254 }, { "epoch": 2.633668178872327, "grad_norm": 0.9620225429534912, "learning_rate": 1.9298779567618357e-07, "loss": 0.0593, "step": 16255 }, { "epoch": 2.6338302009073233, "grad_norm": 0.7656645774841309, "learning_rate": 1.9281935185538141e-07, "loss": 0.0511, "step": 16256 }, { "epoch": 2.6339922229423203, "grad_norm": 0.8839242458343506, "learning_rate": 1.9265097862872423e-07, "loss": 0.0611, "step": 16257 }, { "epoch": 2.6341542449773168, "grad_norm": 0.8976559638977051, "learning_rate": 1.9248267600136317e-07, "loss": 0.0574, "step": 16258 }, { "epoch": 2.6343162670123137, "grad_norm": 1.1227093935012817, "learning_rate": 1.9231444397844855e-07, "loss": 0.0671, "step": 16259 }, { "epoch": 2.63447828904731, "grad_norm": 0.8992955088615417, "learning_rate": 1.9214628256512656e-07, "loss": 0.0623, "step": 16260 }, { "epoch": 2.634640311082307, "grad_norm": 1.0141760110855103, "learning_rate": 1.919781917665439e-07, "loss": 0.0653, "step": 16261 }, { "epoch": 2.634802333117304, "grad_norm": 1.1703956127166748, "learning_rate": 1.918101715878437e-07, "loss": 0.0751, "step": 16262 }, { "epoch": 2.6349643551523005, "grad_norm": 0.9227219820022583, "learning_rate": 1.9164222203416627e-07, "loss": 0.0644, "step": 16263 }, { "epoch": 2.6351263771872975, "grad_norm": 0.804478645324707, "learning_rate": 1.9147434311065028e-07, "loss": 0.0484, "step": 16264 }, { "epoch": 2.6352883992222944, "grad_norm": 0.8943800926208496, "learning_rate": 1.91306534822433e-07, "loss": 0.0589, "step": 16265 }, { "epoch": 2.635450421257291, "grad_norm": 0.9125702977180481, "learning_rate": 1.911387971746495e-07, "loss": 0.0503, "step": 16266 }, { "epoch": 2.635612443292288, "grad_norm": 0.9437099099159241, "learning_rate": 1.9097113017243097e-07, "loss": 0.0515, "step": 16267 }, { "epoch": 2.6357744653272848, "grad_norm": 0.9175340533256531, "learning_rate": 1.9080353382090798e-07, "loss": 0.0571, "step": 16268 }, { "epoch": 2.6359364873622813, "grad_norm": 1.21630859375, "learning_rate": 1.9063600812520898e-07, "loss": 0.0573, "step": 16269 }, { "epoch": 2.636098509397278, "grad_norm": 1.0302997827529907, "learning_rate": 1.9046855309045957e-07, "loss": 0.0666, "step": 16270 }, { "epoch": 2.6362605314322747, "grad_norm": 0.8015779256820679, "learning_rate": 1.9030116872178317e-07, "loss": 0.0546, "step": 16271 }, { "epoch": 2.6364225534672716, "grad_norm": 1.0317645072937012, "learning_rate": 1.9013385502430175e-07, "loss": 0.0664, "step": 16272 }, { "epoch": 2.636584575502268, "grad_norm": 0.9456698894500732, "learning_rate": 1.899666120031349e-07, "loss": 0.0589, "step": 16273 }, { "epoch": 2.636746597537265, "grad_norm": 0.9725162386894226, "learning_rate": 1.8979943966339924e-07, "loss": 0.065, "step": 16274 }, { "epoch": 2.636908619572262, "grad_norm": 0.8339110016822815, "learning_rate": 1.8963233801021024e-07, "loss": 0.0542, "step": 16275 }, { "epoch": 2.6370706416072585, "grad_norm": 0.991547167301178, "learning_rate": 1.8946530704868072e-07, "loss": 0.0624, "step": 16276 }, { "epoch": 2.6372326636422554, "grad_norm": 0.8456936478614807, "learning_rate": 1.8929834678392184e-07, "loss": 0.0566, "step": 16277 }, { "epoch": 2.6373946856772523, "grad_norm": 0.9428996443748474, "learning_rate": 1.891314572210412e-07, "loss": 0.0575, "step": 16278 }, { "epoch": 2.637556707712249, "grad_norm": 0.9978534579277039, "learning_rate": 1.8896463836514556e-07, "loss": 0.0676, "step": 16279 }, { "epoch": 2.6377187297472457, "grad_norm": 0.837712824344635, "learning_rate": 1.887978902213397e-07, "loss": 0.0553, "step": 16280 }, { "epoch": 2.6378807517822422, "grad_norm": 1.049025058746338, "learning_rate": 1.8863121279472595e-07, "loss": 0.0626, "step": 16281 }, { "epoch": 2.638042773817239, "grad_norm": 1.0920323133468628, "learning_rate": 1.8846460609040302e-07, "loss": 0.0624, "step": 16282 }, { "epoch": 2.6382047958522357, "grad_norm": 0.900133490562439, "learning_rate": 1.88298070113469e-07, "loss": 0.0582, "step": 16283 }, { "epoch": 2.6383668178872326, "grad_norm": 0.9234185814857483, "learning_rate": 1.881316048690207e-07, "loss": 0.0614, "step": 16284 }, { "epoch": 2.6385288399222295, "grad_norm": 0.9673925042152405, "learning_rate": 1.8796521036215044e-07, "loss": 0.0552, "step": 16285 }, { "epoch": 2.638690861957226, "grad_norm": 0.9933484792709351, "learning_rate": 1.8779888659794937e-07, "loss": 0.0658, "step": 16286 }, { "epoch": 2.638852883992223, "grad_norm": 0.8367605209350586, "learning_rate": 1.8763263358150735e-07, "loss": 0.0614, "step": 16287 }, { "epoch": 2.63901490602722, "grad_norm": 0.9234703779220581, "learning_rate": 1.874664513179106e-07, "loss": 0.0558, "step": 16288 }, { "epoch": 2.6391769280622164, "grad_norm": 1.0613017082214355, "learning_rate": 1.8730033981224443e-07, "loss": 0.0688, "step": 16289 }, { "epoch": 2.6393389500972133, "grad_norm": 1.0138506889343262, "learning_rate": 1.8713429906959097e-07, "loss": 0.0574, "step": 16290 }, { "epoch": 2.6395009721322102, "grad_norm": 0.9312925338745117, "learning_rate": 1.869683290950311e-07, "loss": 0.0671, "step": 16291 }, { "epoch": 2.6396629941672067, "grad_norm": 0.90425705909729, "learning_rate": 1.8680242989364327e-07, "loss": 0.0607, "step": 16292 }, { "epoch": 2.6398250162022032, "grad_norm": 1.0604413747787476, "learning_rate": 1.8663660147050262e-07, "loss": 0.0629, "step": 16293 }, { "epoch": 2.6399870382372, "grad_norm": 0.7951275706291199, "learning_rate": 1.8647084383068393e-07, "loss": 0.051, "step": 16294 }, { "epoch": 2.640149060272197, "grad_norm": 0.8711312413215637, "learning_rate": 1.8630515697925927e-07, "loss": 0.0563, "step": 16295 }, { "epoch": 2.6403110823071936, "grad_norm": 1.0285757780075073, "learning_rate": 1.8613954092129738e-07, "loss": 0.0657, "step": 16296 }, { "epoch": 2.6404731043421905, "grad_norm": 0.9499953985214233, "learning_rate": 1.8597399566186615e-07, "loss": 0.06, "step": 16297 }, { "epoch": 2.6406351263771874, "grad_norm": 0.8965511918067932, "learning_rate": 1.858085212060304e-07, "loss": 0.0599, "step": 16298 }, { "epoch": 2.640797148412184, "grad_norm": 1.0099302530288696, "learning_rate": 1.8564311755885438e-07, "loss": 0.0557, "step": 16299 }, { "epoch": 2.640959170447181, "grad_norm": 0.9318955540657043, "learning_rate": 1.85477784725398e-07, "loss": 0.0622, "step": 16300 }, { "epoch": 2.641121192482178, "grad_norm": 0.9583910703659058, "learning_rate": 1.8531252271072025e-07, "loss": 0.0625, "step": 16301 }, { "epoch": 2.6412832145171743, "grad_norm": 0.8921169638633728, "learning_rate": 1.851473315198782e-07, "loss": 0.0591, "step": 16302 }, { "epoch": 2.6414452365521712, "grad_norm": 0.9584546089172363, "learning_rate": 1.8498221115792554e-07, "loss": 0.0664, "step": 16303 }, { "epoch": 2.6416072585871677, "grad_norm": 0.9304138422012329, "learning_rate": 1.848171616299152e-07, "loss": 0.06, "step": 16304 }, { "epoch": 2.6417692806221647, "grad_norm": 0.9597314596176147, "learning_rate": 1.8465218294089704e-07, "loss": 0.0605, "step": 16305 }, { "epoch": 2.641931302657161, "grad_norm": 0.9333338737487793, "learning_rate": 1.8448727509591951e-07, "loss": 0.0571, "step": 16306 }, { "epoch": 2.642093324692158, "grad_norm": 0.9065783619880676, "learning_rate": 1.843224381000272e-07, "loss": 0.0621, "step": 16307 }, { "epoch": 2.642255346727155, "grad_norm": 0.7040373086929321, "learning_rate": 1.8415767195826468e-07, "loss": 0.0506, "step": 16308 }, { "epoch": 2.6424173687621515, "grad_norm": 0.825442373752594, "learning_rate": 1.8399297667567317e-07, "loss": 0.0603, "step": 16309 }, { "epoch": 2.6425793907971484, "grad_norm": 0.9128686189651489, "learning_rate": 1.8382835225729256e-07, "loss": 0.0604, "step": 16310 }, { "epoch": 2.6427414128321454, "grad_norm": 0.8846583962440491, "learning_rate": 1.836637987081588e-07, "loss": 0.06, "step": 16311 }, { "epoch": 2.642903434867142, "grad_norm": 0.8874152302742004, "learning_rate": 1.834993160333068e-07, "loss": 0.0601, "step": 16312 }, { "epoch": 2.643065456902139, "grad_norm": 1.2292134761810303, "learning_rate": 1.8333490423777106e-07, "loss": 0.0675, "step": 16313 }, { "epoch": 2.6432274789371357, "grad_norm": 0.841103732585907, "learning_rate": 1.831705633265804e-07, "loss": 0.0571, "step": 16314 }, { "epoch": 2.643389500972132, "grad_norm": 0.9429198503494263, "learning_rate": 1.8300629330476383e-07, "loss": 0.0633, "step": 16315 }, { "epoch": 2.6435515230071287, "grad_norm": 1.0920491218566895, "learning_rate": 1.8284209417734762e-07, "loss": 0.0608, "step": 16316 }, { "epoch": 2.6437135450421256, "grad_norm": 0.9739688634872437, "learning_rate": 1.8267796594935606e-07, "loss": 0.0637, "step": 16317 }, { "epoch": 2.6438755670771226, "grad_norm": 1.1021599769592285, "learning_rate": 1.8251390862581097e-07, "loss": 0.0693, "step": 16318 }, { "epoch": 2.644037589112119, "grad_norm": 0.9459995627403259, "learning_rate": 1.8234992221173198e-07, "loss": 0.0639, "step": 16319 }, { "epoch": 2.644199611147116, "grad_norm": 0.9659364223480225, "learning_rate": 1.8218600671213698e-07, "loss": 0.06, "step": 16320 }, { "epoch": 2.644361633182113, "grad_norm": 0.954434871673584, "learning_rate": 1.8202216213204144e-07, "loss": 0.0613, "step": 16321 }, { "epoch": 2.6445236552171094, "grad_norm": 0.8224755525588989, "learning_rate": 1.8185838847645743e-07, "loss": 0.055, "step": 16322 }, { "epoch": 2.6446856772521063, "grad_norm": 0.9007136821746826, "learning_rate": 1.8169468575039735e-07, "loss": 0.0645, "step": 16323 }, { "epoch": 2.6448476992871033, "grad_norm": 1.1272404193878174, "learning_rate": 1.8153105395886967e-07, "loss": 0.0585, "step": 16324 }, { "epoch": 2.6450097213220998, "grad_norm": 0.8485245108604431, "learning_rate": 1.813674931068818e-07, "loss": 0.0603, "step": 16325 }, { "epoch": 2.6451717433570967, "grad_norm": 0.8735572099685669, "learning_rate": 1.8120400319943692e-07, "loss": 0.0557, "step": 16326 }, { "epoch": 2.645333765392093, "grad_norm": 0.8615409731864929, "learning_rate": 1.8104058424153802e-07, "loss": 0.0594, "step": 16327 }, { "epoch": 2.64549578742709, "grad_norm": 0.9410812258720398, "learning_rate": 1.8087723623818608e-07, "loss": 0.0596, "step": 16328 }, { "epoch": 2.6456578094620866, "grad_norm": 0.7836385369300842, "learning_rate": 1.8071395919437823e-07, "loss": 0.0522, "step": 16329 }, { "epoch": 2.6458198314970836, "grad_norm": 0.9764214158058167, "learning_rate": 1.805507531151107e-07, "loss": 0.067, "step": 16330 }, { "epoch": 2.6459818535320805, "grad_norm": 0.8474244475364685, "learning_rate": 1.8038761800537708e-07, "loss": 0.0555, "step": 16331 }, { "epoch": 2.646143875567077, "grad_norm": 0.8792239427566528, "learning_rate": 1.8022455387016913e-07, "loss": 0.0558, "step": 16332 }, { "epoch": 2.646305897602074, "grad_norm": 0.8611176609992981, "learning_rate": 1.8006156071447595e-07, "loss": 0.0568, "step": 16333 }, { "epoch": 2.646467919637071, "grad_norm": 0.9201486706733704, "learning_rate": 1.7989863854328492e-07, "loss": 0.0564, "step": 16334 }, { "epoch": 2.6466299416720673, "grad_norm": 0.8337270021438599, "learning_rate": 1.7973578736158098e-07, "loss": 0.0522, "step": 16335 }, { "epoch": 2.6467919637070643, "grad_norm": 0.9367399215698242, "learning_rate": 1.7957300717434706e-07, "loss": 0.0577, "step": 16336 }, { "epoch": 2.6469539857420608, "grad_norm": 0.913486123085022, "learning_rate": 1.794102979865639e-07, "loss": 0.0639, "step": 16337 }, { "epoch": 2.6471160077770577, "grad_norm": 0.9401398301124573, "learning_rate": 1.7924765980320974e-07, "loss": 0.0588, "step": 16338 }, { "epoch": 2.647278029812054, "grad_norm": 0.8506883382797241, "learning_rate": 1.790850926292617e-07, "loss": 0.0583, "step": 16339 }, { "epoch": 2.647440051847051, "grad_norm": 0.9261347055435181, "learning_rate": 1.7892259646969278e-07, "loss": 0.0591, "step": 16340 }, { "epoch": 2.647602073882048, "grad_norm": 1.0030386447906494, "learning_rate": 1.7876017132947483e-07, "loss": 0.061, "step": 16341 }, { "epoch": 2.6477640959170445, "grad_norm": 0.9110730290412903, "learning_rate": 1.785978172135791e-07, "loss": 0.0569, "step": 16342 }, { "epoch": 2.6479261179520415, "grad_norm": 0.8250390887260437, "learning_rate": 1.7843553412697278e-07, "loss": 0.0486, "step": 16343 }, { "epoch": 2.6480881399870384, "grad_norm": 1.0518966913223267, "learning_rate": 1.782733220746205e-07, "loss": 0.0639, "step": 16344 }, { "epoch": 2.648250162022035, "grad_norm": 0.9728784561157227, "learning_rate": 1.7811118106148633e-07, "loss": 0.0679, "step": 16345 }, { "epoch": 2.648412184057032, "grad_norm": 0.9852555990219116, "learning_rate": 1.7794911109253105e-07, "loss": 0.0634, "step": 16346 }, { "epoch": 2.6485742060920288, "grad_norm": 0.9500349164009094, "learning_rate": 1.777871121727137e-07, "loss": 0.0619, "step": 16347 }, { "epoch": 2.6487362281270252, "grad_norm": 0.9328777194023132, "learning_rate": 1.7762518430699122e-07, "loss": 0.0637, "step": 16348 }, { "epoch": 2.648898250162022, "grad_norm": 1.0019419193267822, "learning_rate": 1.774633275003179e-07, "loss": 0.0678, "step": 16349 }, { "epoch": 2.6490602721970187, "grad_norm": 1.070502758026123, "learning_rate": 1.7730154175764623e-07, "loss": 0.0578, "step": 16350 }, { "epoch": 2.6492222942320156, "grad_norm": 0.82248455286026, "learning_rate": 1.771398270839267e-07, "loss": 0.054, "step": 16351 }, { "epoch": 2.649384316267012, "grad_norm": 1.011540412902832, "learning_rate": 1.7697818348410722e-07, "loss": 0.066, "step": 16352 }, { "epoch": 2.649546338302009, "grad_norm": 0.9603102207183838, "learning_rate": 1.7681661096313364e-07, "loss": 0.0643, "step": 16353 }, { "epoch": 2.649708360337006, "grad_norm": 0.9704177379608154, "learning_rate": 1.7665510952595027e-07, "loss": 0.0703, "step": 16354 }, { "epoch": 2.6498703823720025, "grad_norm": 0.9658411741256714, "learning_rate": 1.764936791774974e-07, "loss": 0.0621, "step": 16355 }, { "epoch": 2.6500324044069994, "grad_norm": 0.9105367660522461, "learning_rate": 1.7633231992271572e-07, "loss": 0.0545, "step": 16356 }, { "epoch": 2.6501944264419963, "grad_norm": 0.8852334022521973, "learning_rate": 1.7617103176654187e-07, "loss": 0.0669, "step": 16357 }, { "epoch": 2.650356448476993, "grad_norm": 0.9062175154685974, "learning_rate": 1.7600981471391083e-07, "loss": 0.0594, "step": 16358 }, { "epoch": 2.6505184705119897, "grad_norm": 0.8429781794548035, "learning_rate": 1.7584866876975526e-07, "loss": 0.0531, "step": 16359 }, { "epoch": 2.6506804925469862, "grad_norm": 0.9941640496253967, "learning_rate": 1.7568759393900597e-07, "loss": 0.0622, "step": 16360 }, { "epoch": 2.650842514581983, "grad_norm": 1.0209076404571533, "learning_rate": 1.7552659022659206e-07, "loss": 0.0636, "step": 16361 }, { "epoch": 2.6510045366169797, "grad_norm": 1.1594321727752686, "learning_rate": 1.7536565763743934e-07, "loss": 0.0646, "step": 16362 }, { "epoch": 2.6511665586519766, "grad_norm": 0.9234576225280762, "learning_rate": 1.7520479617647163e-07, "loss": 0.0573, "step": 16363 }, { "epoch": 2.6513285806869735, "grad_norm": 0.9568017721176147, "learning_rate": 1.7504400584861137e-07, "loss": 0.065, "step": 16364 }, { "epoch": 2.65149060272197, "grad_norm": 0.9261468052864075, "learning_rate": 1.7488328665877823e-07, "loss": 0.0632, "step": 16365 }, { "epoch": 2.651652624756967, "grad_norm": 0.8637058734893799, "learning_rate": 1.7472263861189e-07, "loss": 0.0605, "step": 16366 }, { "epoch": 2.651814646791964, "grad_norm": 0.9233385324478149, "learning_rate": 1.7456206171286182e-07, "loss": 0.061, "step": 16367 }, { "epoch": 2.6519766688269604, "grad_norm": 0.8062421679496765, "learning_rate": 1.7440155596660735e-07, "loss": 0.0519, "step": 16368 }, { "epoch": 2.6521386908619573, "grad_norm": 0.8380430340766907, "learning_rate": 1.7424112137803763e-07, "loss": 0.0575, "step": 16369 }, { "epoch": 2.6523007128969542, "grad_norm": 0.9855707287788391, "learning_rate": 1.7408075795206037e-07, "loss": 0.0603, "step": 16370 }, { "epoch": 2.6524627349319507, "grad_norm": 1.0595271587371826, "learning_rate": 1.739204656935839e-07, "loss": 0.0718, "step": 16371 }, { "epoch": 2.6526247569669477, "grad_norm": 1.0735374689102173, "learning_rate": 1.7376024460751262e-07, "loss": 0.0748, "step": 16372 }, { "epoch": 2.652786779001944, "grad_norm": 0.929942786693573, "learning_rate": 1.7360009469874788e-07, "loss": 0.0564, "step": 16373 }, { "epoch": 2.652948801036941, "grad_norm": 0.8766565918922424, "learning_rate": 1.7344001597219024e-07, "loss": 0.0616, "step": 16374 }, { "epoch": 2.6531108230719376, "grad_norm": 0.9398424029350281, "learning_rate": 1.7328000843273879e-07, "loss": 0.0622, "step": 16375 }, { "epoch": 2.6532728451069345, "grad_norm": 0.9084738492965698, "learning_rate": 1.7312007208528796e-07, "loss": 0.0656, "step": 16376 }, { "epoch": 2.6534348671419314, "grad_norm": 0.9480365514755249, "learning_rate": 1.729602069347322e-07, "loss": 0.061, "step": 16377 }, { "epoch": 2.653596889176928, "grad_norm": 0.875299870967865, "learning_rate": 1.7280041298596257e-07, "loss": 0.0631, "step": 16378 }, { "epoch": 2.653758911211925, "grad_norm": 0.9430073499679565, "learning_rate": 1.7264069024386876e-07, "loss": 0.066, "step": 16379 }, { "epoch": 2.653920933246922, "grad_norm": 1.0011956691741943, "learning_rate": 1.7248103871333743e-07, "loss": 0.0557, "step": 16380 }, { "epoch": 2.6540829552819183, "grad_norm": 0.783429741859436, "learning_rate": 1.7232145839925413e-07, "loss": 0.0582, "step": 16381 }, { "epoch": 2.654244977316915, "grad_norm": 1.0384507179260254, "learning_rate": 1.7216194930650105e-07, "loss": 0.0629, "step": 16382 }, { "epoch": 2.6544069993519117, "grad_norm": 0.9398157596588135, "learning_rate": 1.7200251143995983e-07, "loss": 0.0562, "step": 16383 }, { "epoch": 2.6545690213869086, "grad_norm": 1.124776005744934, "learning_rate": 1.7184314480450713e-07, "loss": 0.0653, "step": 16384 }, { "epoch": 2.654731043421905, "grad_norm": 0.9212331771850586, "learning_rate": 1.7168384940502048e-07, "loss": 0.0662, "step": 16385 }, { "epoch": 2.654893065456902, "grad_norm": 1.0174168348312378, "learning_rate": 1.715246252463737e-07, "loss": 0.065, "step": 16386 }, { "epoch": 2.655055087491899, "grad_norm": 0.8449579477310181, "learning_rate": 1.7136547233343904e-07, "loss": 0.0596, "step": 16387 }, { "epoch": 2.6552171095268955, "grad_norm": 0.9600573182106018, "learning_rate": 1.7120639067108508e-07, "loss": 0.0591, "step": 16388 }, { "epoch": 2.6553791315618924, "grad_norm": 0.9382137060165405, "learning_rate": 1.7104738026417987e-07, "loss": 0.0599, "step": 16389 }, { "epoch": 2.6555411535968894, "grad_norm": 1.0673377513885498, "learning_rate": 1.7088844111758956e-07, "loss": 0.0638, "step": 16390 }, { "epoch": 2.655703175631886, "grad_norm": 0.9802731871604919, "learning_rate": 1.7072957323617635e-07, "loss": 0.0656, "step": 16391 }, { "epoch": 2.655865197666883, "grad_norm": 0.9394249320030212, "learning_rate": 1.7057077662480131e-07, "loss": 0.0533, "step": 16392 }, { "epoch": 2.6560272197018797, "grad_norm": 0.9298037886619568, "learning_rate": 1.7041205128832338e-07, "loss": 0.059, "step": 16393 }, { "epoch": 2.656189241736876, "grad_norm": 1.0378389358520508, "learning_rate": 1.7025339723159924e-07, "loss": 0.0637, "step": 16394 }, { "epoch": 2.656351263771873, "grad_norm": 0.7979336380958557, "learning_rate": 1.700948144594833e-07, "loss": 0.059, "step": 16395 }, { "epoch": 2.6565132858068696, "grad_norm": 0.8939142823219299, "learning_rate": 1.6993630297682778e-07, "loss": 0.0633, "step": 16396 }, { "epoch": 2.6566753078418666, "grad_norm": 0.8715953230857849, "learning_rate": 1.6977786278848275e-07, "loss": 0.0577, "step": 16397 }, { "epoch": 2.656837329876863, "grad_norm": 0.8175707459449768, "learning_rate": 1.6961949389929593e-07, "loss": 0.0571, "step": 16398 }, { "epoch": 2.65699935191186, "grad_norm": 1.1204349994659424, "learning_rate": 1.6946119631411352e-07, "loss": 0.0584, "step": 16399 }, { "epoch": 2.657161373946857, "grad_norm": 1.0144906044006348, "learning_rate": 1.693029700377785e-07, "loss": 0.0669, "step": 16400 }, { "epoch": 2.6573233959818534, "grad_norm": 0.8621246218681335, "learning_rate": 1.6914481507513263e-07, "loss": 0.0629, "step": 16401 }, { "epoch": 2.6574854180168503, "grad_norm": 0.8595054745674133, "learning_rate": 1.6898673143101479e-07, "loss": 0.0557, "step": 16402 }, { "epoch": 2.6576474400518473, "grad_norm": 0.8768499493598938, "learning_rate": 1.688287191102614e-07, "loss": 0.0645, "step": 16403 }, { "epoch": 2.6578094620868438, "grad_norm": 0.9614449143409729, "learning_rate": 1.6867077811770826e-07, "loss": 0.0583, "step": 16404 }, { "epoch": 2.6579714841218407, "grad_norm": 0.9931366443634033, "learning_rate": 1.6851290845818824e-07, "loss": 0.0712, "step": 16405 }, { "epoch": 2.658133506156837, "grad_norm": 0.8831222653388977, "learning_rate": 1.683551101365305e-07, "loss": 0.055, "step": 16406 }, { "epoch": 2.658295528191834, "grad_norm": 0.9368479251861572, "learning_rate": 1.6819738315756395e-07, "loss": 0.0537, "step": 16407 }, { "epoch": 2.6584575502268306, "grad_norm": 0.9596263766288757, "learning_rate": 1.6803972752611475e-07, "loss": 0.0548, "step": 16408 }, { "epoch": 2.6586195722618275, "grad_norm": 0.9823015928268433, "learning_rate": 1.6788214324700652e-07, "loss": 0.0593, "step": 16409 }, { "epoch": 2.6587815942968245, "grad_norm": 0.9856812357902527, "learning_rate": 1.6772463032506126e-07, "loss": 0.0581, "step": 16410 }, { "epoch": 2.658943616331821, "grad_norm": 1.0176301002502441, "learning_rate": 1.6756718876509815e-07, "loss": 0.06, "step": 16411 }, { "epoch": 2.659105638366818, "grad_norm": 0.8685017228126526, "learning_rate": 1.6740981857193471e-07, "loss": 0.0569, "step": 16412 }, { "epoch": 2.659267660401815, "grad_norm": 0.8308966755867004, "learning_rate": 1.672525197503863e-07, "loss": 0.0605, "step": 16413 }, { "epoch": 2.6594296824368113, "grad_norm": 0.7896578311920166, "learning_rate": 1.6709529230526544e-07, "loss": 0.0544, "step": 16414 }, { "epoch": 2.6595917044718083, "grad_norm": 1.1066980361938477, "learning_rate": 1.66938136241383e-07, "loss": 0.0637, "step": 16415 }, { "epoch": 2.659753726506805, "grad_norm": 1.2486014366149902, "learning_rate": 1.667810515635482e-07, "loss": 0.0629, "step": 16416 }, { "epoch": 2.6599157485418017, "grad_norm": 0.92633056640625, "learning_rate": 1.6662403827656603e-07, "loss": 0.0627, "step": 16417 }, { "epoch": 2.660077770576798, "grad_norm": 0.9085054397583008, "learning_rate": 1.6646709638524216e-07, "loss": 0.0594, "step": 16418 }, { "epoch": 2.660239792611795, "grad_norm": 1.1926897764205933, "learning_rate": 1.6631022589437828e-07, "loss": 0.0662, "step": 16419 }, { "epoch": 2.660401814646792, "grad_norm": 0.877842128276825, "learning_rate": 1.6615342680877417e-07, "loss": 0.0617, "step": 16420 }, { "epoch": 2.6605638366817885, "grad_norm": 1.0418201684951782, "learning_rate": 1.6599669913322708e-07, "loss": 0.061, "step": 16421 }, { "epoch": 2.6607258587167855, "grad_norm": 0.9494113326072693, "learning_rate": 1.6584004287253235e-07, "loss": 0.0586, "step": 16422 }, { "epoch": 2.6608878807517824, "grad_norm": 1.1011654138565063, "learning_rate": 1.6568345803148478e-07, "loss": 0.0642, "step": 16423 }, { "epoch": 2.661049902786779, "grad_norm": 1.0519500970840454, "learning_rate": 1.6552694461487385e-07, "loss": 0.0583, "step": 16424 }, { "epoch": 2.661211924821776, "grad_norm": 0.9799832105636597, "learning_rate": 1.6537050262748905e-07, "loss": 0.0611, "step": 16425 }, { "epoch": 2.6613739468567728, "grad_norm": 0.9446945190429688, "learning_rate": 1.652141320741174e-07, "loss": 0.0623, "step": 16426 }, { "epoch": 2.6615359688917692, "grad_norm": 0.9712892770767212, "learning_rate": 1.6505783295954314e-07, "loss": 0.0589, "step": 16427 }, { "epoch": 2.661697990926766, "grad_norm": 0.9126527905464172, "learning_rate": 1.6490160528854855e-07, "loss": 0.063, "step": 16428 }, { "epoch": 2.6618600129617627, "grad_norm": 0.843527615070343, "learning_rate": 1.6474544906591422e-07, "loss": 0.0636, "step": 16429 }, { "epoch": 2.6620220349967596, "grad_norm": 1.002021312713623, "learning_rate": 1.6458936429641803e-07, "loss": 0.0638, "step": 16430 }, { "epoch": 2.662184057031756, "grad_norm": 0.8258562684059143, "learning_rate": 1.6443335098483586e-07, "loss": 0.0567, "step": 16431 }, { "epoch": 2.662346079066753, "grad_norm": 0.987740159034729, "learning_rate": 1.642774091359406e-07, "loss": 0.0617, "step": 16432 }, { "epoch": 2.66250810110175, "grad_norm": 1.08059823513031, "learning_rate": 1.6412153875450448e-07, "loss": 0.0681, "step": 16433 }, { "epoch": 2.6626701231367464, "grad_norm": 0.9311217069625854, "learning_rate": 1.6396573984529707e-07, "loss": 0.063, "step": 16434 }, { "epoch": 2.6628321451717434, "grad_norm": 0.8519605994224548, "learning_rate": 1.6381001241308452e-07, "loss": 0.0563, "step": 16435 }, { "epoch": 2.6629941672067403, "grad_norm": 0.9323353171348572, "learning_rate": 1.6365435646263223e-07, "loss": 0.0625, "step": 16436 }, { "epoch": 2.663156189241737, "grad_norm": 0.9580549597740173, "learning_rate": 1.6349877199870218e-07, "loss": 0.0612, "step": 16437 }, { "epoch": 2.6633182112767337, "grad_norm": 0.9537264108657837, "learning_rate": 1.6334325902605642e-07, "loss": 0.0611, "step": 16438 }, { "epoch": 2.6634802333117307, "grad_norm": 0.8179647922515869, "learning_rate": 1.6318781754945168e-07, "loss": 0.0574, "step": 16439 }, { "epoch": 2.663642255346727, "grad_norm": 0.8439454436302185, "learning_rate": 1.63032447573645e-07, "loss": 0.0547, "step": 16440 }, { "epoch": 2.6638042773817237, "grad_norm": 0.8650406002998352, "learning_rate": 1.6287714910339008e-07, "loss": 0.0558, "step": 16441 }, { "epoch": 2.6639662994167206, "grad_norm": 1.0165598392486572, "learning_rate": 1.6272192214343868e-07, "loss": 0.069, "step": 16442 }, { "epoch": 2.6641283214517175, "grad_norm": 0.9209080934524536, "learning_rate": 1.6256676669854032e-07, "loss": 0.0659, "step": 16443 }, { "epoch": 2.664290343486714, "grad_norm": 1.0296342372894287, "learning_rate": 1.6241168277344232e-07, "loss": 0.0654, "step": 16444 }, { "epoch": 2.664452365521711, "grad_norm": 0.9982236623764038, "learning_rate": 1.6225667037289034e-07, "loss": 0.0727, "step": 16445 }, { "epoch": 2.664614387556708, "grad_norm": 0.8094567656517029, "learning_rate": 1.6210172950162639e-07, "loss": 0.0574, "step": 16446 }, { "epoch": 2.6647764095917044, "grad_norm": 1.097362995147705, "learning_rate": 1.6194686016439227e-07, "loss": 0.0682, "step": 16447 }, { "epoch": 2.6649384316267013, "grad_norm": 0.9655567407608032, "learning_rate": 1.617920623659261e-07, "loss": 0.0602, "step": 16448 }, { "epoch": 2.6651004536616982, "grad_norm": 2.5268149375915527, "learning_rate": 1.6163733611096495e-07, "loss": 0.0604, "step": 16449 }, { "epoch": 2.6652624756966947, "grad_norm": 0.8302810788154602, "learning_rate": 1.6148268140424224e-07, "loss": 0.0574, "step": 16450 }, { "epoch": 2.6654244977316917, "grad_norm": 1.3241533041000366, "learning_rate": 1.6132809825049e-07, "loss": 0.0606, "step": 16451 }, { "epoch": 2.665586519766688, "grad_norm": 0.991156280040741, "learning_rate": 1.6117358665443922e-07, "loss": 0.0633, "step": 16452 }, { "epoch": 2.665748541801685, "grad_norm": 0.8456504344940186, "learning_rate": 1.6101914662081665e-07, "loss": 0.0609, "step": 16453 }, { "epoch": 2.6659105638366816, "grad_norm": 1.1732730865478516, "learning_rate": 1.6086477815434763e-07, "loss": 0.0678, "step": 16454 }, { "epoch": 2.6660725858716785, "grad_norm": 0.9648259878158569, "learning_rate": 1.6071048125975598e-07, "loss": 0.0613, "step": 16455 }, { "epoch": 2.6662346079066754, "grad_norm": 1.1859503984451294, "learning_rate": 1.6055625594176254e-07, "loss": 0.0761, "step": 16456 }, { "epoch": 2.666396629941672, "grad_norm": 0.9471797347068787, "learning_rate": 1.604021022050864e-07, "loss": 0.0643, "step": 16457 }, { "epoch": 2.666558651976669, "grad_norm": 0.9277358651161194, "learning_rate": 1.602480200544443e-07, "loss": 0.0676, "step": 16458 }, { "epoch": 2.666720674011666, "grad_norm": 0.9459307789802551, "learning_rate": 1.600940094945505e-07, "loss": 0.0658, "step": 16459 }, { "epoch": 2.6668826960466623, "grad_norm": 0.8798080086708069, "learning_rate": 1.5994007053011796e-07, "loss": 0.0642, "step": 16460 }, { "epoch": 2.667044718081659, "grad_norm": 0.9812191128730774, "learning_rate": 1.5978620316585564e-07, "loss": 0.0614, "step": 16461 }, { "epoch": 2.6672067401166557, "grad_norm": 0.898577094078064, "learning_rate": 1.5963240740647285e-07, "loss": 0.0627, "step": 16462 }, { "epoch": 2.6673687621516526, "grad_norm": 0.9194305539131165, "learning_rate": 1.594786832566747e-07, "loss": 0.0637, "step": 16463 }, { "epoch": 2.667530784186649, "grad_norm": 0.895622730255127, "learning_rate": 1.5932503072116524e-07, "loss": 0.0625, "step": 16464 }, { "epoch": 2.667692806221646, "grad_norm": 0.9284375309944153, "learning_rate": 1.5917144980464483e-07, "loss": 0.0645, "step": 16465 }, { "epoch": 2.667854828256643, "grad_norm": 0.8722718358039856, "learning_rate": 1.5901794051181362e-07, "loss": 0.057, "step": 16466 }, { "epoch": 2.6680168502916395, "grad_norm": 0.8681865930557251, "learning_rate": 1.58864502847369e-07, "loss": 0.059, "step": 16467 }, { "epoch": 2.6681788723266364, "grad_norm": 0.9118895530700684, "learning_rate": 1.5871113681600464e-07, "loss": 0.0578, "step": 16468 }, { "epoch": 2.6683408943616334, "grad_norm": 0.9743660092353821, "learning_rate": 1.5855784242241352e-07, "loss": 0.0638, "step": 16469 }, { "epoch": 2.66850291639663, "grad_norm": 0.9498010277748108, "learning_rate": 1.5840461967128628e-07, "loss": 0.0605, "step": 16470 }, { "epoch": 2.6686649384316268, "grad_norm": 0.9760184288024902, "learning_rate": 1.5825146856731144e-07, "loss": 0.0638, "step": 16471 }, { "epoch": 2.6688269604666237, "grad_norm": 0.9990751147270203, "learning_rate": 1.5809838911517438e-07, "loss": 0.0612, "step": 16472 }, { "epoch": 2.66898898250162, "grad_norm": 0.8862509727478027, "learning_rate": 1.5794538131955944e-07, "loss": 0.0579, "step": 16473 }, { "epoch": 2.669151004536617, "grad_norm": 0.9482281804084778, "learning_rate": 1.5779244518514813e-07, "loss": 0.0669, "step": 16474 }, { "epoch": 2.6693130265716136, "grad_norm": 0.9086149334907532, "learning_rate": 1.5763958071662006e-07, "loss": 0.059, "step": 16475 }, { "epoch": 2.6694750486066106, "grad_norm": 0.8625389933586121, "learning_rate": 1.574867879186523e-07, "loss": 0.0598, "step": 16476 }, { "epoch": 2.669637070641607, "grad_norm": 0.9149574041366577, "learning_rate": 1.5733406679592028e-07, "loss": 0.0646, "step": 16477 }, { "epoch": 2.669799092676604, "grad_norm": 0.7897137999534607, "learning_rate": 1.5718141735309695e-07, "loss": 0.0554, "step": 16478 }, { "epoch": 2.669961114711601, "grad_norm": 0.7885839939117432, "learning_rate": 1.5702883959485215e-07, "loss": 0.0547, "step": 16479 }, { "epoch": 2.6701231367465974, "grad_norm": 0.9897783398628235, "learning_rate": 1.5687633352585467e-07, "loss": 0.0536, "step": 16480 }, { "epoch": 2.6702851587815943, "grad_norm": 1.0975406169891357, "learning_rate": 1.5672389915077162e-07, "loss": 0.0666, "step": 16481 }, { "epoch": 2.6704471808165913, "grad_norm": 0.8759128451347351, "learning_rate": 1.5657153647426703e-07, "loss": 0.0639, "step": 16482 }, { "epoch": 2.6706092028515878, "grad_norm": 0.8712435364723206, "learning_rate": 1.5641924550100218e-07, "loss": 0.059, "step": 16483 }, { "epoch": 2.6707712248865847, "grad_norm": 0.7633887529373169, "learning_rate": 1.5626702623563694e-07, "loss": 0.0531, "step": 16484 }, { "epoch": 2.670933246921581, "grad_norm": 1.1077615022659302, "learning_rate": 1.561148786828287e-07, "loss": 0.0654, "step": 16485 }, { "epoch": 2.671095268956578, "grad_norm": 0.95298832654953, "learning_rate": 1.5596280284723348e-07, "loss": 0.0593, "step": 16486 }, { "epoch": 2.6712572909915746, "grad_norm": 1.1085457801818848, "learning_rate": 1.558107987335039e-07, "loss": 0.0647, "step": 16487 }, { "epoch": 2.6714193130265715, "grad_norm": 0.8979312181472778, "learning_rate": 1.5565886634629102e-07, "loss": 0.06, "step": 16488 }, { "epoch": 2.6715813350615685, "grad_norm": 0.8960966467857361, "learning_rate": 1.5550700569024358e-07, "loss": 0.0561, "step": 16489 }, { "epoch": 2.671743357096565, "grad_norm": 0.8627614378929138, "learning_rate": 1.5535521677000813e-07, "loss": 0.0538, "step": 16490 }, { "epoch": 2.671905379131562, "grad_norm": 0.9660282135009766, "learning_rate": 1.5520349959022934e-07, "loss": 0.0582, "step": 16491 }, { "epoch": 2.672067401166559, "grad_norm": 0.9266077876091003, "learning_rate": 1.5505185415554903e-07, "loss": 0.063, "step": 16492 }, { "epoch": 2.6722294232015553, "grad_norm": 0.9693909287452698, "learning_rate": 1.5490028047060762e-07, "loss": 0.0612, "step": 16493 }, { "epoch": 2.6723914452365523, "grad_norm": 0.9419223666191101, "learning_rate": 1.54748778540042e-07, "loss": 0.0568, "step": 16494 }, { "epoch": 2.672553467271549, "grad_norm": 0.9191377758979797, "learning_rate": 1.5459734836848872e-07, "loss": 0.0631, "step": 16495 }, { "epoch": 2.6727154893065457, "grad_norm": 1.1649914979934692, "learning_rate": 1.544459899605813e-07, "loss": 0.063, "step": 16496 }, { "epoch": 2.6728775113415426, "grad_norm": 0.7725280523300171, "learning_rate": 1.5429470332094992e-07, "loss": 0.0554, "step": 16497 }, { "epoch": 2.673039533376539, "grad_norm": 0.9325549006462097, "learning_rate": 1.5414348845422394e-07, "loss": 0.0567, "step": 16498 }, { "epoch": 2.673201555411536, "grad_norm": 1.1128214597702026, "learning_rate": 1.5399234536503023e-07, "loss": 0.0684, "step": 16499 }, { "epoch": 2.6733635774465325, "grad_norm": 0.967528223991394, "learning_rate": 1.538412740579942e-07, "loss": 0.0644, "step": 16500 }, { "epoch": 2.6735255994815295, "grad_norm": 0.893129825592041, "learning_rate": 1.536902745377372e-07, "loss": 0.0641, "step": 16501 }, { "epoch": 2.6736876215165264, "grad_norm": 1.0466177463531494, "learning_rate": 1.5353934680888e-07, "loss": 0.0636, "step": 16502 }, { "epoch": 2.673849643551523, "grad_norm": 1.234455943107605, "learning_rate": 1.5338849087604025e-07, "loss": 0.0587, "step": 16503 }, { "epoch": 2.67401166558652, "grad_norm": 0.9519467353820801, "learning_rate": 1.5323770674383398e-07, "loss": 0.0616, "step": 16504 }, { "epoch": 2.6741736876215167, "grad_norm": 0.8917698860168457, "learning_rate": 1.5308699441687502e-07, "loss": 0.0564, "step": 16505 }, { "epoch": 2.6743357096565132, "grad_norm": 0.8450326919555664, "learning_rate": 1.529363538997744e-07, "loss": 0.0553, "step": 16506 }, { "epoch": 2.67449773169151, "grad_norm": 0.9240355491638184, "learning_rate": 1.5278578519714177e-07, "loss": 0.063, "step": 16507 }, { "epoch": 2.6746597537265067, "grad_norm": 0.9043802618980408, "learning_rate": 1.526352883135837e-07, "loss": 0.0639, "step": 16508 }, { "epoch": 2.6748217757615036, "grad_norm": 0.8774120807647705, "learning_rate": 1.5248486325370544e-07, "loss": 0.0609, "step": 16509 }, { "epoch": 2.6749837977965, "grad_norm": 0.9532600045204163, "learning_rate": 1.5233451002210964e-07, "loss": 0.0594, "step": 16510 }, { "epoch": 2.675145819831497, "grad_norm": 0.8727222681045532, "learning_rate": 1.5218422862339683e-07, "loss": 0.0651, "step": 16511 }, { "epoch": 2.675307841866494, "grad_norm": 0.8903719186782837, "learning_rate": 1.520340190621647e-07, "loss": 0.0586, "step": 16512 }, { "epoch": 2.6754698639014904, "grad_norm": 1.0103119611740112, "learning_rate": 1.5188388134300901e-07, "loss": 0.0592, "step": 16513 }, { "epoch": 2.6756318859364874, "grad_norm": 0.8567571043968201, "learning_rate": 1.5173381547052528e-07, "loss": 0.0577, "step": 16514 }, { "epoch": 2.6757939079714843, "grad_norm": 0.874523401260376, "learning_rate": 1.5158382144930344e-07, "loss": 0.0564, "step": 16515 }, { "epoch": 2.675955930006481, "grad_norm": 0.8860069513320923, "learning_rate": 1.5143389928393398e-07, "loss": 0.0573, "step": 16516 }, { "epoch": 2.6761179520414777, "grad_norm": 0.8461055159568787, "learning_rate": 1.512840489790035e-07, "loss": 0.0578, "step": 16517 }, { "epoch": 2.6762799740764747, "grad_norm": 0.9307635426521301, "learning_rate": 1.5113427053909725e-07, "loss": 0.0589, "step": 16518 }, { "epoch": 2.676441996111471, "grad_norm": 0.9444718360900879, "learning_rate": 1.5098456396879846e-07, "loss": 0.0627, "step": 16519 }, { "epoch": 2.6766040181464676, "grad_norm": 0.8695940375328064, "learning_rate": 1.508349292726874e-07, "loss": 0.0566, "step": 16520 }, { "epoch": 2.6767660401814646, "grad_norm": 0.9241846203804016, "learning_rate": 1.506853664553426e-07, "loss": 0.0509, "step": 16521 }, { "epoch": 2.6769280622164615, "grad_norm": 0.9394335150718689, "learning_rate": 1.505358755213407e-07, "loss": 0.0598, "step": 16522 }, { "epoch": 2.677090084251458, "grad_norm": 0.865931510925293, "learning_rate": 1.503864564752547e-07, "loss": 0.0542, "step": 16523 }, { "epoch": 2.677252106286455, "grad_norm": 0.9105206727981567, "learning_rate": 1.5023710932165758e-07, "loss": 0.051, "step": 16524 }, { "epoch": 2.677414128321452, "grad_norm": 0.8169817328453064, "learning_rate": 1.500878340651185e-07, "loss": 0.0584, "step": 16525 }, { "epoch": 2.6775761503564484, "grad_norm": 0.8245558142662048, "learning_rate": 1.4993863071020548e-07, "loss": 0.0581, "step": 16526 }, { "epoch": 2.6777381723914453, "grad_norm": 0.9422661662101746, "learning_rate": 1.4978949926148288e-07, "loss": 0.0599, "step": 16527 }, { "epoch": 2.6779001944264422, "grad_norm": 1.0110220909118652, "learning_rate": 1.4964043972351377e-07, "loss": 0.0624, "step": 16528 }, { "epoch": 2.6780622164614387, "grad_norm": 1.0897401571273804, "learning_rate": 1.494914521008603e-07, "loss": 0.0539, "step": 16529 }, { "epoch": 2.6782242384964356, "grad_norm": 0.8992266654968262, "learning_rate": 1.4934253639807994e-07, "loss": 0.0606, "step": 16530 }, { "epoch": 2.678386260531432, "grad_norm": 0.9721251130104065, "learning_rate": 1.4919369261972933e-07, "loss": 0.0646, "step": 16531 }, { "epoch": 2.678548282566429, "grad_norm": 0.8682243824005127, "learning_rate": 1.4904492077036286e-07, "loss": 0.0566, "step": 16532 }, { "epoch": 2.6787103046014256, "grad_norm": 0.7980409860610962, "learning_rate": 1.4889622085453304e-07, "loss": 0.0563, "step": 16533 }, { "epoch": 2.6788723266364225, "grad_norm": 0.8528122901916504, "learning_rate": 1.4874759287678898e-07, "loss": 0.0609, "step": 16534 }, { "epoch": 2.6790343486714194, "grad_norm": 0.7880252599716187, "learning_rate": 1.485990368416787e-07, "loss": 0.0584, "step": 16535 }, { "epoch": 2.679196370706416, "grad_norm": 1.048828363418579, "learning_rate": 1.484505527537475e-07, "loss": 0.0558, "step": 16536 }, { "epoch": 2.679358392741413, "grad_norm": 0.9639270305633545, "learning_rate": 1.483021406175389e-07, "loss": 0.0636, "step": 16537 }, { "epoch": 2.67952041477641, "grad_norm": 0.9279593825340271, "learning_rate": 1.4815380043759374e-07, "loss": 0.0578, "step": 16538 }, { "epoch": 2.6796824368114063, "grad_norm": 0.8325791954994202, "learning_rate": 1.4800553221845094e-07, "loss": 0.0565, "step": 16539 }, { "epoch": 2.679844458846403, "grad_norm": 0.8952010273933411, "learning_rate": 1.4785733596464736e-07, "loss": 0.0601, "step": 16540 }, { "epoch": 2.6800064808814, "grad_norm": 0.8639797568321228, "learning_rate": 1.4770921168071717e-07, "loss": 0.0601, "step": 16541 }, { "epoch": 2.6801685029163966, "grad_norm": 1.0438389778137207, "learning_rate": 1.4756115937119202e-07, "loss": 0.0681, "step": 16542 }, { "epoch": 2.680330524951393, "grad_norm": 0.9621492028236389, "learning_rate": 1.4741317904060304e-07, "loss": 0.06, "step": 16543 }, { "epoch": 2.68049254698639, "grad_norm": 0.814268946647644, "learning_rate": 1.4726527069347796e-07, "loss": 0.0555, "step": 16544 }, { "epoch": 2.680654569021387, "grad_norm": 0.8960631489753723, "learning_rate": 1.471174343343418e-07, "loss": 0.0576, "step": 16545 }, { "epoch": 2.6808165910563835, "grad_norm": 0.9540743827819824, "learning_rate": 1.4696966996771838e-07, "loss": 0.0691, "step": 16546 }, { "epoch": 2.6809786130913804, "grad_norm": 1.125815987586975, "learning_rate": 1.468219775981286e-07, "loss": 0.0665, "step": 16547 }, { "epoch": 2.6811406351263773, "grad_norm": 0.8713697195053101, "learning_rate": 1.4667435723009187e-07, "loss": 0.0554, "step": 16548 }, { "epoch": 2.681302657161374, "grad_norm": 0.961420476436615, "learning_rate": 1.4652680886812488e-07, "loss": 0.0615, "step": 16549 }, { "epoch": 2.6814646791963708, "grad_norm": 0.8258307576179504, "learning_rate": 1.463793325167423e-07, "loss": 0.0602, "step": 16550 }, { "epoch": 2.6816267012313677, "grad_norm": 0.946225106716156, "learning_rate": 1.4623192818045638e-07, "loss": 0.0613, "step": 16551 }, { "epoch": 2.681788723266364, "grad_norm": 1.008615255355835, "learning_rate": 1.4608459586377743e-07, "loss": 0.0708, "step": 16552 }, { "epoch": 2.681950745301361, "grad_norm": 1.0377353429794312, "learning_rate": 1.4593733557121347e-07, "loss": 0.0649, "step": 16553 }, { "epoch": 2.6821127673363576, "grad_norm": 0.9528126120567322, "learning_rate": 1.4579014730727037e-07, "loss": 0.0641, "step": 16554 }, { "epoch": 2.6822747893713546, "grad_norm": 0.9190387725830078, "learning_rate": 1.45643031076452e-07, "loss": 0.0629, "step": 16555 }, { "epoch": 2.682436811406351, "grad_norm": 0.855954647064209, "learning_rate": 1.4549598688325896e-07, "loss": 0.0566, "step": 16556 }, { "epoch": 2.682598833441348, "grad_norm": 0.9234669208526611, "learning_rate": 1.4534901473219093e-07, "loss": 0.0671, "step": 16557 }, { "epoch": 2.682760855476345, "grad_norm": 1.030083179473877, "learning_rate": 1.4520211462774548e-07, "loss": 0.0586, "step": 16558 }, { "epoch": 2.6829228775113414, "grad_norm": 1.0558991432189941, "learning_rate": 1.4505528657441648e-07, "loss": 0.0627, "step": 16559 }, { "epoch": 2.6830848995463383, "grad_norm": 1.0315167903900146, "learning_rate": 1.4490853057669675e-07, "loss": 0.061, "step": 16560 }, { "epoch": 2.6832469215813353, "grad_norm": 0.8899813890457153, "learning_rate": 1.4476184663907628e-07, "loss": 0.0682, "step": 16561 }, { "epoch": 2.6834089436163318, "grad_norm": 0.787449300289154, "learning_rate": 1.4461523476604482e-07, "loss": 0.0506, "step": 16562 }, { "epoch": 2.6835709656513287, "grad_norm": 0.8441698551177979, "learning_rate": 1.444686949620866e-07, "loss": 0.0617, "step": 16563 }, { "epoch": 2.683732987686325, "grad_norm": 0.8496869206428528, "learning_rate": 1.4432222723168632e-07, "loss": 0.0619, "step": 16564 }, { "epoch": 2.683895009721322, "grad_norm": 1.025014042854309, "learning_rate": 1.4417583157932485e-07, "loss": 0.0568, "step": 16565 }, { "epoch": 2.6840570317563186, "grad_norm": 0.9680132269859314, "learning_rate": 1.4402950800948223e-07, "loss": 0.0626, "step": 16566 }, { "epoch": 2.6842190537913155, "grad_norm": 0.7993006110191345, "learning_rate": 1.4388325652663542e-07, "loss": 0.0592, "step": 16567 }, { "epoch": 2.6843810758263125, "grad_norm": 0.8684203028678894, "learning_rate": 1.437370771352589e-07, "loss": 0.0611, "step": 16568 }, { "epoch": 2.684543097861309, "grad_norm": 0.9955109357833862, "learning_rate": 1.4359096983982607e-07, "loss": 0.0586, "step": 16569 }, { "epoch": 2.684705119896306, "grad_norm": 0.884691059589386, "learning_rate": 1.4344493464480745e-07, "loss": 0.0633, "step": 16570 }, { "epoch": 2.684867141931303, "grad_norm": 0.8436540961265564, "learning_rate": 1.4329897155467039e-07, "loss": 0.0623, "step": 16571 }, { "epoch": 2.6850291639662993, "grad_norm": 1.0994386672973633, "learning_rate": 1.4315308057388206e-07, "loss": 0.0638, "step": 16572 }, { "epoch": 2.6851911860012962, "grad_norm": 0.9540835022926331, "learning_rate": 1.4300726170690614e-07, "loss": 0.056, "step": 16573 }, { "epoch": 2.685353208036293, "grad_norm": 0.9536152482032776, "learning_rate": 1.428615149582041e-07, "loss": 0.0571, "step": 16574 }, { "epoch": 2.6855152300712897, "grad_norm": 0.7673203349113464, "learning_rate": 1.4271584033223512e-07, "loss": 0.0578, "step": 16575 }, { "epoch": 2.6856772521062866, "grad_norm": 0.899626612663269, "learning_rate": 1.425702378334573e-07, "loss": 0.0572, "step": 16576 }, { "epoch": 2.685839274141283, "grad_norm": 0.9618022441864014, "learning_rate": 1.4242470746632542e-07, "loss": 0.0644, "step": 16577 }, { "epoch": 2.68600129617628, "grad_norm": 0.9150751233100891, "learning_rate": 1.4227924923529228e-07, "loss": 0.0629, "step": 16578 }, { "epoch": 2.6861633182112765, "grad_norm": 1.083203911781311, "learning_rate": 1.4213386314480825e-07, "loss": 0.0701, "step": 16579 }, { "epoch": 2.6863253402462735, "grad_norm": 0.9395116567611694, "learning_rate": 1.4198854919932225e-07, "loss": 0.0624, "step": 16580 }, { "epoch": 2.6864873622812704, "grad_norm": 0.8851693868637085, "learning_rate": 1.4184330740328044e-07, "loss": 0.0657, "step": 16581 }, { "epoch": 2.686649384316267, "grad_norm": 0.8476728200912476, "learning_rate": 1.4169813776112652e-07, "loss": 0.0589, "step": 16582 }, { "epoch": 2.686811406351264, "grad_norm": 1.0356652736663818, "learning_rate": 1.4155304027730271e-07, "loss": 0.0627, "step": 16583 }, { "epoch": 2.6869734283862607, "grad_norm": 0.8928619623184204, "learning_rate": 1.4140801495624913e-07, "loss": 0.0626, "step": 16584 }, { "epoch": 2.6871354504212572, "grad_norm": 1.1158393621444702, "learning_rate": 1.412630618024016e-07, "loss": 0.0641, "step": 16585 }, { "epoch": 2.687297472456254, "grad_norm": 1.0287305116653442, "learning_rate": 1.4111818082019696e-07, "loss": 0.065, "step": 16586 }, { "epoch": 2.6874594944912507, "grad_norm": 0.8559868931770325, "learning_rate": 1.4097337201406742e-07, "loss": 0.0576, "step": 16587 }, { "epoch": 2.6876215165262476, "grad_norm": 0.9814032912254333, "learning_rate": 1.4082863538844444e-07, "loss": 0.0598, "step": 16588 }, { "epoch": 2.687783538561244, "grad_norm": 1.0753200054168701, "learning_rate": 1.406839709477556e-07, "loss": 0.0605, "step": 16589 }, { "epoch": 2.687945560596241, "grad_norm": 0.9408139586448669, "learning_rate": 1.4053937869642737e-07, "loss": 0.0624, "step": 16590 }, { "epoch": 2.688107582631238, "grad_norm": 0.8320403695106506, "learning_rate": 1.4039485863888537e-07, "loss": 0.0557, "step": 16591 }, { "epoch": 2.6882696046662344, "grad_norm": 0.9615926146507263, "learning_rate": 1.402504107795502e-07, "loss": 0.0627, "step": 16592 }, { "epoch": 2.6884316267012314, "grad_norm": 0.8587374091148376, "learning_rate": 1.401060351228417e-07, "loss": 0.0631, "step": 16593 }, { "epoch": 2.6885936487362283, "grad_norm": 0.8640631437301636, "learning_rate": 1.39961731673178e-07, "loss": 0.0605, "step": 16594 }, { "epoch": 2.688755670771225, "grad_norm": 0.8059629797935486, "learning_rate": 1.398175004349739e-07, "loss": 0.0556, "step": 16595 }, { "epoch": 2.6889176928062217, "grad_norm": 0.9349533319473267, "learning_rate": 1.3967334141264277e-07, "loss": 0.0646, "step": 16596 }, { "epoch": 2.6890797148412187, "grad_norm": 0.9726789593696594, "learning_rate": 1.3952925461059558e-07, "loss": 0.0594, "step": 16597 }, { "epoch": 2.689241736876215, "grad_norm": 0.9107815623283386, "learning_rate": 1.39385240033241e-07, "loss": 0.0614, "step": 16598 }, { "epoch": 2.689403758911212, "grad_norm": 0.8884726762771606, "learning_rate": 1.392412976849855e-07, "loss": 0.0643, "step": 16599 }, { "epoch": 2.6895657809462086, "grad_norm": 0.8729420900344849, "learning_rate": 1.3909742757023336e-07, "loss": 0.0585, "step": 16600 }, { "epoch": 2.6897278029812055, "grad_norm": 0.8863007426261902, "learning_rate": 1.3895362969338662e-07, "loss": 0.0613, "step": 16601 }, { "epoch": 2.689889825016202, "grad_norm": 0.9088746309280396, "learning_rate": 1.3880990405884532e-07, "loss": 0.056, "step": 16602 }, { "epoch": 2.690051847051199, "grad_norm": 0.9081185460090637, "learning_rate": 1.3866625067100707e-07, "loss": 0.0599, "step": 16603 }, { "epoch": 2.690213869086196, "grad_norm": 0.9388935565948486, "learning_rate": 1.3852266953426674e-07, "loss": 0.0642, "step": 16604 }, { "epoch": 2.6903758911211924, "grad_norm": 0.7939647436141968, "learning_rate": 1.3837916065301827e-07, "loss": 0.0603, "step": 16605 }, { "epoch": 2.6905379131561893, "grad_norm": 0.8875724077224731, "learning_rate": 1.3823572403165285e-07, "loss": 0.0599, "step": 16606 }, { "epoch": 2.690699935191186, "grad_norm": 0.7508094906806946, "learning_rate": 1.380923596745587e-07, "loss": 0.0548, "step": 16607 }, { "epoch": 2.6908619572261827, "grad_norm": 0.8791196942329407, "learning_rate": 1.3794906758612252e-07, "loss": 0.0589, "step": 16608 }, { "epoch": 2.6910239792611796, "grad_norm": 0.9110643267631531, "learning_rate": 1.3780584777072892e-07, "loss": 0.061, "step": 16609 }, { "epoch": 2.691186001296176, "grad_norm": 0.9413241744041443, "learning_rate": 1.376627002327599e-07, "loss": 0.0639, "step": 16610 }, { "epoch": 2.691348023331173, "grad_norm": 0.9628585577011108, "learning_rate": 1.375196249765956e-07, "loss": 0.0696, "step": 16611 }, { "epoch": 2.6915100453661696, "grad_norm": 0.8341220021247864, "learning_rate": 1.373766220066136e-07, "loss": 0.0596, "step": 16612 }, { "epoch": 2.6916720674011665, "grad_norm": 0.8509212732315063, "learning_rate": 1.372336913271896e-07, "loss": 0.0554, "step": 16613 }, { "epoch": 2.6918340894361634, "grad_norm": 0.8546708822250366, "learning_rate": 1.3709083294269676e-07, "loss": 0.0609, "step": 16614 }, { "epoch": 2.69199611147116, "grad_norm": 0.9867240190505981, "learning_rate": 1.369480468575063e-07, "loss": 0.0511, "step": 16615 }, { "epoch": 2.692158133506157, "grad_norm": 1.0250896215438843, "learning_rate": 1.368053330759872e-07, "loss": 0.0635, "step": 16616 }, { "epoch": 2.692320155541154, "grad_norm": 1.0135303735733032, "learning_rate": 1.366626916025063e-07, "loss": 0.0621, "step": 16617 }, { "epoch": 2.6924821775761503, "grad_norm": 0.852816104888916, "learning_rate": 1.3652012244142754e-07, "loss": 0.0576, "step": 16618 }, { "epoch": 2.692644199611147, "grad_norm": 0.8429510593414307, "learning_rate": 1.363776255971133e-07, "loss": 0.0597, "step": 16619 }, { "epoch": 2.692806221646144, "grad_norm": 0.8124983906745911, "learning_rate": 1.362352010739243e-07, "loss": 0.0522, "step": 16620 }, { "epoch": 2.6929682436811406, "grad_norm": 0.836234450340271, "learning_rate": 1.360928488762181e-07, "loss": 0.0579, "step": 16621 }, { "epoch": 2.693130265716137, "grad_norm": 0.9209657907485962, "learning_rate": 1.3595056900834986e-07, "loss": 0.0608, "step": 16622 }, { "epoch": 2.693292287751134, "grad_norm": 0.9225515127182007, "learning_rate": 1.3580836147467304e-07, "loss": 0.0635, "step": 16623 }, { "epoch": 2.693454309786131, "grad_norm": 0.9024338722229004, "learning_rate": 1.3566622627953968e-07, "loss": 0.0602, "step": 16624 }, { "epoch": 2.6936163318211275, "grad_norm": 0.959028959274292, "learning_rate": 1.3552416342729802e-07, "loss": 0.0628, "step": 16625 }, { "epoch": 2.6937783538561244, "grad_norm": 0.917083203792572, "learning_rate": 1.3538217292229482e-07, "loss": 0.0589, "step": 16626 }, { "epoch": 2.6939403758911213, "grad_norm": 0.9200828671455383, "learning_rate": 1.3524025476887527e-07, "loss": 0.0577, "step": 16627 }, { "epoch": 2.694102397926118, "grad_norm": 1.1384090185165405, "learning_rate": 1.3509840897138083e-07, "loss": 0.0641, "step": 16628 }, { "epoch": 2.6942644199611148, "grad_norm": 0.9467897415161133, "learning_rate": 1.349566355341525e-07, "loss": 0.0635, "step": 16629 }, { "epoch": 2.6944264419961117, "grad_norm": 0.8724448680877686, "learning_rate": 1.3481493446152766e-07, "loss": 0.0598, "step": 16630 }, { "epoch": 2.694588464031108, "grad_norm": 0.9046519994735718, "learning_rate": 1.3467330575784226e-07, "loss": 0.0619, "step": 16631 }, { "epoch": 2.694750486066105, "grad_norm": 0.7974745631217957, "learning_rate": 1.3453174942743008e-07, "loss": 0.0567, "step": 16632 }, { "epoch": 2.6949125081011016, "grad_norm": 0.8849309682846069, "learning_rate": 1.3439026547462126e-07, "loss": 0.0636, "step": 16633 }, { "epoch": 2.6950745301360985, "grad_norm": 1.4565906524658203, "learning_rate": 1.3424885390374593e-07, "loss": 0.0653, "step": 16634 }, { "epoch": 2.695236552171095, "grad_norm": 0.9429144859313965, "learning_rate": 1.341075147191312e-07, "loss": 0.0632, "step": 16635 }, { "epoch": 2.695398574206092, "grad_norm": 0.8843449354171753, "learning_rate": 1.3396624792510082e-07, "loss": 0.0587, "step": 16636 }, { "epoch": 2.695560596241089, "grad_norm": 0.84376460313797, "learning_rate": 1.3382505352597747e-07, "loss": 0.0592, "step": 16637 }, { "epoch": 2.6957226182760854, "grad_norm": 0.9161346554756165, "learning_rate": 1.33683931526081e-07, "loss": 0.0644, "step": 16638 }, { "epoch": 2.6958846403110823, "grad_norm": 0.9942044019699097, "learning_rate": 1.3354288192973074e-07, "loss": 0.0626, "step": 16639 }, { "epoch": 2.6960466623460793, "grad_norm": 0.9702102541923523, "learning_rate": 1.3340190474124104e-07, "loss": 0.0654, "step": 16640 }, { "epoch": 2.6962086843810757, "grad_norm": 0.9594953060150146, "learning_rate": 1.3326099996492618e-07, "loss": 0.0608, "step": 16641 }, { "epoch": 2.6963707064160727, "grad_norm": 0.9994754195213318, "learning_rate": 1.3312016760509722e-07, "loss": 0.0606, "step": 16642 }, { "epoch": 2.6965327284510696, "grad_norm": 0.8460058569908142, "learning_rate": 1.3297940766606344e-07, "loss": 0.0541, "step": 16643 }, { "epoch": 2.696694750486066, "grad_norm": 0.9044547080993652, "learning_rate": 1.3283872015213168e-07, "loss": 0.0607, "step": 16644 }, { "epoch": 2.6968567725210626, "grad_norm": 0.807353675365448, "learning_rate": 1.3269810506760683e-07, "loss": 0.0583, "step": 16645 }, { "epoch": 2.6970187945560595, "grad_norm": 1.019974708557129, "learning_rate": 1.3255756241679102e-07, "loss": 0.0644, "step": 16646 }, { "epoch": 2.6971808165910565, "grad_norm": 0.818080723285675, "learning_rate": 1.3241709220398467e-07, "loss": 0.0579, "step": 16647 }, { "epoch": 2.697342838626053, "grad_norm": 0.959563672542572, "learning_rate": 1.3227669443348578e-07, "loss": 0.0625, "step": 16648 }, { "epoch": 2.69750486066105, "grad_norm": 0.9866206645965576, "learning_rate": 1.321363691095906e-07, "loss": 0.0584, "step": 16649 }, { "epoch": 2.697666882696047, "grad_norm": 1.2002931833267212, "learning_rate": 1.3199611623659235e-07, "loss": 0.0728, "step": 16650 }, { "epoch": 2.6978289047310433, "grad_norm": 0.8130374550819397, "learning_rate": 1.3185593581878238e-07, "loss": 0.0582, "step": 16651 }, { "epoch": 2.6979909267660402, "grad_norm": 0.883966863155365, "learning_rate": 1.3171582786044968e-07, "loss": 0.0638, "step": 16652 }, { "epoch": 2.698152948801037, "grad_norm": 1.0147669315338135, "learning_rate": 1.3157579236588197e-07, "loss": 0.0619, "step": 16653 }, { "epoch": 2.6983149708360337, "grad_norm": 0.9203122854232788, "learning_rate": 1.3143582933936333e-07, "loss": 0.0581, "step": 16654 }, { "epoch": 2.6984769928710306, "grad_norm": 0.9031851887702942, "learning_rate": 1.3129593878517643e-07, "loss": 0.0599, "step": 16655 }, { "epoch": 2.698639014906027, "grad_norm": 0.8836839199066162, "learning_rate": 1.3115612070760174e-07, "loss": 0.0583, "step": 16656 }, { "epoch": 2.698801036941024, "grad_norm": 0.9112008810043335, "learning_rate": 1.3101637511091724e-07, "loss": 0.0549, "step": 16657 }, { "epoch": 2.6989630589760205, "grad_norm": 1.0826163291931152, "learning_rate": 1.3087670199939894e-07, "loss": 0.0597, "step": 16658 }, { "epoch": 2.6991250810110174, "grad_norm": 0.8839635252952576, "learning_rate": 1.3073710137732037e-07, "loss": 0.0545, "step": 16659 }, { "epoch": 2.6992871030460144, "grad_norm": 0.9589625000953674, "learning_rate": 1.3059757324895283e-07, "loss": 0.0638, "step": 16660 }, { "epoch": 2.699449125081011, "grad_norm": 0.862845242023468, "learning_rate": 1.3045811761856597e-07, "loss": 0.0589, "step": 16661 }, { "epoch": 2.699611147116008, "grad_norm": 0.8167102336883545, "learning_rate": 1.303187344904261e-07, "loss": 0.0543, "step": 16662 }, { "epoch": 2.6997731691510047, "grad_norm": 1.040915608406067, "learning_rate": 1.3017942386879867e-07, "loss": 0.067, "step": 16663 }, { "epoch": 2.6999351911860012, "grad_norm": 0.9312401413917542, "learning_rate": 1.3004018575794586e-07, "loss": 0.0596, "step": 16664 }, { "epoch": 2.700097213220998, "grad_norm": 1.038403034210205, "learning_rate": 1.2990102016212868e-07, "loss": 0.0634, "step": 16665 }, { "epoch": 2.7002592352559946, "grad_norm": 0.8530080914497375, "learning_rate": 1.2976192708560432e-07, "loss": 0.0642, "step": 16666 }, { "epoch": 2.7004212572909916, "grad_norm": 1.1165688037872314, "learning_rate": 1.2962290653262903e-07, "loss": 0.0593, "step": 16667 }, { "epoch": 2.700583279325988, "grad_norm": 0.9035430550575256, "learning_rate": 1.2948395850745726e-07, "loss": 0.0613, "step": 16668 }, { "epoch": 2.700745301360985, "grad_norm": 0.8505802154541016, "learning_rate": 1.293450830143392e-07, "loss": 0.0609, "step": 16669 }, { "epoch": 2.700907323395982, "grad_norm": 0.98024582862854, "learning_rate": 1.29206280057525e-07, "loss": 0.0601, "step": 16670 }, { "epoch": 2.7010693454309784, "grad_norm": 0.8178975582122803, "learning_rate": 1.2906754964126078e-07, "loss": 0.0573, "step": 16671 }, { "epoch": 2.7012313674659754, "grad_norm": 0.8594771027565002, "learning_rate": 1.2892889176979284e-07, "loss": 0.0607, "step": 16672 }, { "epoch": 2.7013933895009723, "grad_norm": 0.9803131818771362, "learning_rate": 1.2879030644736252e-07, "loss": 0.0643, "step": 16673 }, { "epoch": 2.701555411535969, "grad_norm": 0.8199018836021423, "learning_rate": 1.2865179367821083e-07, "loss": 0.0536, "step": 16674 }, { "epoch": 2.7017174335709657, "grad_norm": 0.8338936567306519, "learning_rate": 1.2851335346657557e-07, "loss": 0.0597, "step": 16675 }, { "epoch": 2.7018794556059627, "grad_norm": 0.974616527557373, "learning_rate": 1.283749858166927e-07, "loss": 0.0615, "step": 16676 }, { "epoch": 2.702041477640959, "grad_norm": 0.9886922240257263, "learning_rate": 1.2823669073279615e-07, "loss": 0.0644, "step": 16677 }, { "epoch": 2.702203499675956, "grad_norm": 0.7480154037475586, "learning_rate": 1.280984682191172e-07, "loss": 0.0548, "step": 16678 }, { "epoch": 2.7023655217109526, "grad_norm": 0.8866371512413025, "learning_rate": 1.2796031827988582e-07, "loss": 0.0576, "step": 16679 }, { "epoch": 2.7025275437459495, "grad_norm": 1.1099352836608887, "learning_rate": 1.2782224091932775e-07, "loss": 0.0609, "step": 16680 }, { "epoch": 2.702689565780946, "grad_norm": 0.902736246585846, "learning_rate": 1.276842361416686e-07, "loss": 0.0601, "step": 16681 }, { "epoch": 2.702851587815943, "grad_norm": 1.0859075784683228, "learning_rate": 1.2754630395113098e-07, "loss": 0.0672, "step": 16682 }, { "epoch": 2.70301360985094, "grad_norm": 0.9454689025878906, "learning_rate": 1.2740844435193578e-07, "loss": 0.0651, "step": 16683 }, { "epoch": 2.7031756318859363, "grad_norm": 0.8466532230377197, "learning_rate": 1.2727065734830013e-07, "loss": 0.0515, "step": 16684 }, { "epoch": 2.7033376539209333, "grad_norm": 0.9573439359664917, "learning_rate": 1.271329429444404e-07, "loss": 0.0619, "step": 16685 }, { "epoch": 2.70349967595593, "grad_norm": 0.9744999408721924, "learning_rate": 1.269953011445707e-07, "loss": 0.0584, "step": 16686 }, { "epoch": 2.7036616979909267, "grad_norm": 0.9962242841720581, "learning_rate": 1.2685773195290186e-07, "loss": 0.0582, "step": 16687 }, { "epoch": 2.7038237200259236, "grad_norm": 0.9875708818435669, "learning_rate": 1.267202353736438e-07, "loss": 0.0621, "step": 16688 }, { "epoch": 2.70398574206092, "grad_norm": 0.8407350778579712, "learning_rate": 1.265828114110032e-07, "loss": 0.0556, "step": 16689 }, { "epoch": 2.704147764095917, "grad_norm": 0.9471036791801453, "learning_rate": 1.26445460069185e-07, "loss": 0.0601, "step": 16690 }, { "epoch": 2.7043097861309136, "grad_norm": 0.739356279373169, "learning_rate": 1.2630818135239198e-07, "loss": 0.0503, "step": 16691 }, { "epoch": 2.7044718081659105, "grad_norm": 1.1428637504577637, "learning_rate": 1.2617097526482407e-07, "loss": 0.0649, "step": 16692 }, { "epoch": 2.7046338302009074, "grad_norm": 0.9683777689933777, "learning_rate": 1.2603384181068018e-07, "loss": 0.0627, "step": 16693 }, { "epoch": 2.704795852235904, "grad_norm": 1.0176392793655396, "learning_rate": 1.2589678099415582e-07, "loss": 0.0653, "step": 16694 }, { "epoch": 2.704957874270901, "grad_norm": 0.9063898921012878, "learning_rate": 1.2575979281944429e-07, "loss": 0.0545, "step": 16695 }, { "epoch": 2.7051198963058978, "grad_norm": 1.0391916036605835, "learning_rate": 1.256228772907378e-07, "loss": 0.0706, "step": 16696 }, { "epoch": 2.7052819183408943, "grad_norm": 0.9358698129653931, "learning_rate": 1.254860344122255e-07, "loss": 0.0651, "step": 16697 }, { "epoch": 2.705443940375891, "grad_norm": 1.0807019472122192, "learning_rate": 1.2534926418809433e-07, "loss": 0.0649, "step": 16698 }, { "epoch": 2.705605962410888, "grad_norm": 0.9569528102874756, "learning_rate": 1.2521256662252902e-07, "loss": 0.0634, "step": 16699 }, { "epoch": 2.7057679844458846, "grad_norm": 0.9983630180358887, "learning_rate": 1.2507594171971198e-07, "loss": 0.0618, "step": 16700 }, { "epoch": 2.7059300064808816, "grad_norm": 1.1226799488067627, "learning_rate": 1.2493938948382468e-07, "loss": 0.0627, "step": 16701 }, { "epoch": 2.706092028515878, "grad_norm": 0.9132358431816101, "learning_rate": 1.2480290991904398e-07, "loss": 0.064, "step": 16702 }, { "epoch": 2.706254050550875, "grad_norm": 0.8795627951622009, "learning_rate": 1.246665030295463e-07, "loss": 0.0611, "step": 16703 }, { "epoch": 2.7064160725858715, "grad_norm": 0.8634214997291565, "learning_rate": 1.245301688195058e-07, "loss": 0.0563, "step": 16704 }, { "epoch": 2.7065780946208684, "grad_norm": 0.9130164384841919, "learning_rate": 1.243939072930933e-07, "loss": 0.057, "step": 16705 }, { "epoch": 2.7067401166558653, "grad_norm": 1.1923558712005615, "learning_rate": 1.2425771845447853e-07, "loss": 0.0607, "step": 16706 }, { "epoch": 2.706902138690862, "grad_norm": 1.020593523979187, "learning_rate": 1.2412160230782844e-07, "loss": 0.0604, "step": 16707 }, { "epoch": 2.7070641607258588, "grad_norm": 0.8900664448738098, "learning_rate": 1.2398555885730774e-07, "loss": 0.0539, "step": 16708 }, { "epoch": 2.7072261827608557, "grad_norm": 0.9280899167060852, "learning_rate": 1.2384958810707892e-07, "loss": 0.0645, "step": 16709 }, { "epoch": 2.707388204795852, "grad_norm": 0.9784387350082397, "learning_rate": 1.2371369006130256e-07, "loss": 0.063, "step": 16710 }, { "epoch": 2.707550226830849, "grad_norm": 0.920939564704895, "learning_rate": 1.2357786472413702e-07, "loss": 0.0656, "step": 16711 }, { "epoch": 2.7077122488658456, "grad_norm": 0.858536958694458, "learning_rate": 1.2344211209973811e-07, "loss": 0.0582, "step": 16712 }, { "epoch": 2.7078742709008425, "grad_norm": 0.9796631336212158, "learning_rate": 1.2330643219225918e-07, "loss": 0.0593, "step": 16713 }, { "epoch": 2.708036292935839, "grad_norm": 0.9091916084289551, "learning_rate": 1.2317082500585163e-07, "loss": 0.06, "step": 16714 }, { "epoch": 2.708198314970836, "grad_norm": 1.0876365900039673, "learning_rate": 1.2303529054466522e-07, "loss": 0.0648, "step": 16715 }, { "epoch": 2.708360337005833, "grad_norm": 0.9896219968795776, "learning_rate": 1.2289982881284718e-07, "loss": 0.0657, "step": 16716 }, { "epoch": 2.7085223590408294, "grad_norm": 0.9894326329231262, "learning_rate": 1.2276443981454167e-07, "loss": 0.0645, "step": 16717 }, { "epoch": 2.7086843810758263, "grad_norm": 0.9388425946235657, "learning_rate": 1.226291235538915e-07, "loss": 0.0582, "step": 16718 }, { "epoch": 2.7088464031108233, "grad_norm": 0.9230713844299316, "learning_rate": 1.22493880035037e-07, "loss": 0.0647, "step": 16719 }, { "epoch": 2.7090084251458197, "grad_norm": 0.8833425045013428, "learning_rate": 1.223587092621162e-07, "loss": 0.0634, "step": 16720 }, { "epoch": 2.7091704471808167, "grad_norm": 0.9370494484901428, "learning_rate": 1.2222361123926525e-07, "loss": 0.0603, "step": 16721 }, { "epoch": 2.7093324692158136, "grad_norm": 0.8519914150238037, "learning_rate": 1.2208858597061752e-07, "loss": 0.0599, "step": 16722 }, { "epoch": 2.70949449125081, "grad_norm": 0.9887134432792664, "learning_rate": 1.2195363346030497e-07, "loss": 0.0671, "step": 16723 }, { "epoch": 2.709656513285807, "grad_norm": 0.8368325233459473, "learning_rate": 1.21818753712456e-07, "loss": 0.0527, "step": 16724 }, { "epoch": 2.7098185353208035, "grad_norm": 0.8991503119468689, "learning_rate": 1.2168394673119837e-07, "loss": 0.0631, "step": 16725 }, { "epoch": 2.7099805573558005, "grad_norm": 0.9804664850234985, "learning_rate": 1.2154921252065633e-07, "loss": 0.0622, "step": 16726 }, { "epoch": 2.710142579390797, "grad_norm": 0.8143778443336487, "learning_rate": 1.2141455108495321e-07, "loss": 0.0536, "step": 16727 }, { "epoch": 2.710304601425794, "grad_norm": 0.934648334980011, "learning_rate": 1.2127996242820822e-07, "loss": 0.0581, "step": 16728 }, { "epoch": 2.710466623460791, "grad_norm": 1.0055309534072876, "learning_rate": 1.2114544655454002e-07, "loss": 0.0675, "step": 16729 }, { "epoch": 2.7106286454957873, "grad_norm": 0.9444065690040588, "learning_rate": 1.2101100346806478e-07, "loss": 0.0661, "step": 16730 }, { "epoch": 2.7107906675307842, "grad_norm": 0.7594142556190491, "learning_rate": 1.2087663317289554e-07, "loss": 0.0543, "step": 16731 }, { "epoch": 2.710952689565781, "grad_norm": 0.9627096056938171, "learning_rate": 1.2074233567314408e-07, "loss": 0.0609, "step": 16732 }, { "epoch": 2.7111147116007777, "grad_norm": 0.8099537491798401, "learning_rate": 1.2060811097291874e-07, "loss": 0.0559, "step": 16733 }, { "epoch": 2.7112767336357746, "grad_norm": 1.0347872972488403, "learning_rate": 1.2047395907632818e-07, "loss": 0.06, "step": 16734 }, { "epoch": 2.711438755670771, "grad_norm": 0.824565052986145, "learning_rate": 1.2033987998747582e-07, "loss": 0.062, "step": 16735 }, { "epoch": 2.711600777705768, "grad_norm": 1.082157015800476, "learning_rate": 1.2020587371046445e-07, "loss": 0.0715, "step": 16736 }, { "epoch": 2.7117627997407645, "grad_norm": 0.8374156355857849, "learning_rate": 1.2007194024939412e-07, "loss": 0.0589, "step": 16737 }, { "epoch": 2.7119248217757614, "grad_norm": 1.058831810951233, "learning_rate": 1.1993807960836322e-07, "loss": 0.0694, "step": 16738 }, { "epoch": 2.7120868438107584, "grad_norm": 1.0584897994995117, "learning_rate": 1.198042917914677e-07, "loss": 0.0624, "step": 16739 }, { "epoch": 2.712248865845755, "grad_norm": 0.9175471067428589, "learning_rate": 1.1967057680280058e-07, "loss": 0.0571, "step": 16740 }, { "epoch": 2.712410887880752, "grad_norm": 0.9468705058097839, "learning_rate": 1.1953693464645395e-07, "loss": 0.0601, "step": 16741 }, { "epoch": 2.7125729099157487, "grad_norm": 0.8563498258590698, "learning_rate": 1.1940336532651614e-07, "loss": 0.0611, "step": 16742 }, { "epoch": 2.712734931950745, "grad_norm": 1.0424282550811768, "learning_rate": 1.192698688470742e-07, "loss": 0.058, "step": 16743 }, { "epoch": 2.712896953985742, "grad_norm": 0.9378633499145508, "learning_rate": 1.1913644521221345e-07, "loss": 0.0613, "step": 16744 }, { "epoch": 2.713058976020739, "grad_norm": 0.874260425567627, "learning_rate": 1.1900309442601593e-07, "loss": 0.0591, "step": 16745 }, { "epoch": 2.7132209980557356, "grad_norm": 1.0240492820739746, "learning_rate": 1.1886981649256169e-07, "loss": 0.0673, "step": 16746 }, { "epoch": 2.713383020090732, "grad_norm": 0.8745399117469788, "learning_rate": 1.1873661141592857e-07, "loss": 0.0605, "step": 16747 }, { "epoch": 2.713545042125729, "grad_norm": 0.8494243025779724, "learning_rate": 1.1860347920019304e-07, "loss": 0.0592, "step": 16748 }, { "epoch": 2.713707064160726, "grad_norm": 0.9611318111419678, "learning_rate": 1.184704198494277e-07, "loss": 0.0642, "step": 16749 }, { "epoch": 2.7138690861957224, "grad_norm": 0.9604257941246033, "learning_rate": 1.1833743336770482e-07, "loss": 0.0631, "step": 16750 }, { "epoch": 2.7140311082307194, "grad_norm": 0.8594368696212769, "learning_rate": 1.1820451975909253e-07, "loss": 0.0571, "step": 16751 }, { "epoch": 2.7141931302657163, "grad_norm": 0.8387914299964905, "learning_rate": 1.1807167902765843e-07, "loss": 0.0564, "step": 16752 }, { "epoch": 2.714355152300713, "grad_norm": 1.1130517721176147, "learning_rate": 1.1793891117746648e-07, "loss": 0.0618, "step": 16753 }, { "epoch": 2.7145171743357097, "grad_norm": 0.8822175860404968, "learning_rate": 1.1780621621257953e-07, "loss": 0.057, "step": 16754 }, { "epoch": 2.7146791963707066, "grad_norm": 0.8818928599357605, "learning_rate": 1.176735941370577e-07, "loss": 0.0605, "step": 16755 }, { "epoch": 2.714841218405703, "grad_norm": 0.932831883430481, "learning_rate": 1.1754104495495882e-07, "loss": 0.064, "step": 16756 }, { "epoch": 2.7150032404407, "grad_norm": 0.8247097134590149, "learning_rate": 1.1740856867033801e-07, "loss": 0.0576, "step": 16757 }, { "epoch": 2.7151652624756966, "grad_norm": 0.8611258864402771, "learning_rate": 1.1727616528724949e-07, "loss": 0.0581, "step": 16758 }, { "epoch": 2.7153272845106935, "grad_norm": 0.8132169842720032, "learning_rate": 1.171438348097445e-07, "loss": 0.0533, "step": 16759 }, { "epoch": 2.71548930654569, "grad_norm": 0.8210513591766357, "learning_rate": 1.1701157724187173e-07, "loss": 0.0576, "step": 16760 }, { "epoch": 2.715651328580687, "grad_norm": 0.9274697303771973, "learning_rate": 1.1687939258767795e-07, "loss": 0.0642, "step": 16761 }, { "epoch": 2.715813350615684, "grad_norm": 0.9596958160400391, "learning_rate": 1.1674728085120713e-07, "loss": 0.063, "step": 16762 }, { "epoch": 2.7159753726506803, "grad_norm": 0.9137746691703796, "learning_rate": 1.1661524203650298e-07, "loss": 0.0604, "step": 16763 }, { "epoch": 2.7161373946856773, "grad_norm": 0.9403409361839294, "learning_rate": 1.1648327614760452e-07, "loss": 0.0622, "step": 16764 }, { "epoch": 2.716299416720674, "grad_norm": 0.9043160676956177, "learning_rate": 1.1635138318854961e-07, "loss": 0.0668, "step": 16765 }, { "epoch": 2.7164614387556707, "grad_norm": 0.8880981802940369, "learning_rate": 1.1621956316337391e-07, "loss": 0.061, "step": 16766 }, { "epoch": 2.7166234607906676, "grad_norm": 0.938401460647583, "learning_rate": 1.1608781607611113e-07, "loss": 0.0631, "step": 16767 }, { "epoch": 2.7167854828256646, "grad_norm": 0.9028334021568298, "learning_rate": 1.1595614193079224e-07, "loss": 0.0553, "step": 16768 }, { "epoch": 2.716947504860661, "grad_norm": 0.8787040710449219, "learning_rate": 1.1582454073144623e-07, "loss": 0.0611, "step": 16769 }, { "epoch": 2.7171095268956575, "grad_norm": 0.8604116439819336, "learning_rate": 1.1569301248209958e-07, "loss": 0.0499, "step": 16770 }, { "epoch": 2.7172715489306545, "grad_norm": 0.9343183636665344, "learning_rate": 1.1556155718677714e-07, "loss": 0.0587, "step": 16771 }, { "epoch": 2.7174335709656514, "grad_norm": 0.7739042639732361, "learning_rate": 1.1543017484950015e-07, "loss": 0.0545, "step": 16772 }, { "epoch": 2.717595593000648, "grad_norm": 1.046132206916809, "learning_rate": 1.1529886547428954e-07, "loss": 0.0585, "step": 16773 }, { "epoch": 2.717757615035645, "grad_norm": 0.8855156302452087, "learning_rate": 1.1516762906516322e-07, "loss": 0.0581, "step": 16774 }, { "epoch": 2.7179196370706418, "grad_norm": 0.8061611652374268, "learning_rate": 1.1503646562613602e-07, "loss": 0.0588, "step": 16775 }, { "epoch": 2.7180816591056383, "grad_norm": 0.9897039532661438, "learning_rate": 1.1490537516122141e-07, "loss": 0.0622, "step": 16776 }, { "epoch": 2.718243681140635, "grad_norm": 0.9542493224143982, "learning_rate": 1.1477435767443007e-07, "loss": 0.0619, "step": 16777 }, { "epoch": 2.718405703175632, "grad_norm": 0.8548462986946106, "learning_rate": 1.1464341316977184e-07, "loss": 0.0585, "step": 16778 }, { "epoch": 2.7185677252106286, "grad_norm": 0.810540497303009, "learning_rate": 1.145125416512527e-07, "loss": 0.053, "step": 16779 }, { "epoch": 2.7187297472456255, "grad_norm": 0.8522582054138184, "learning_rate": 1.1438174312287664e-07, "loss": 0.0563, "step": 16780 }, { "epoch": 2.718891769280622, "grad_norm": 0.9812653064727783, "learning_rate": 1.1425101758864632e-07, "loss": 0.0679, "step": 16781 }, { "epoch": 2.719053791315619, "grad_norm": 1.0658005475997925, "learning_rate": 1.1412036505256158e-07, "loss": 0.057, "step": 16782 }, { "epoch": 2.7192158133506155, "grad_norm": 0.930414080619812, "learning_rate": 1.1398978551861978e-07, "loss": 0.0626, "step": 16783 }, { "epoch": 2.7193778353856124, "grad_norm": 0.9118586182594299, "learning_rate": 1.1385927899081661e-07, "loss": 0.0641, "step": 16784 }, { "epoch": 2.7195398574206093, "grad_norm": 0.9204965829849243, "learning_rate": 1.13728845473145e-07, "loss": 0.0619, "step": 16785 }, { "epoch": 2.719701879455606, "grad_norm": 0.9930737614631653, "learning_rate": 1.1359848496959618e-07, "loss": 0.0617, "step": 16786 }, { "epoch": 2.7198639014906028, "grad_norm": 0.8213454484939575, "learning_rate": 1.1346819748415893e-07, "loss": 0.0568, "step": 16787 }, { "epoch": 2.7200259235255997, "grad_norm": 0.8408246636390686, "learning_rate": 1.1333798302081922e-07, "loss": 0.0554, "step": 16788 }, { "epoch": 2.720187945560596, "grad_norm": 0.8678555488586426, "learning_rate": 1.1320784158356218e-07, "loss": 0.0564, "step": 16789 }, { "epoch": 2.720349967595593, "grad_norm": 0.8617439866065979, "learning_rate": 1.1307777317636882e-07, "loss": 0.0587, "step": 16790 }, { "epoch": 2.7205119896305896, "grad_norm": 0.8767697811126709, "learning_rate": 1.1294777780321898e-07, "loss": 0.0613, "step": 16791 }, { "epoch": 2.7206740116655865, "grad_norm": 1.116636872291565, "learning_rate": 1.1281785546809115e-07, "loss": 0.0709, "step": 16792 }, { "epoch": 2.720836033700583, "grad_norm": 0.9495435357093811, "learning_rate": 1.1268800617495995e-07, "loss": 0.0596, "step": 16793 }, { "epoch": 2.72099805573558, "grad_norm": 0.8045752644538879, "learning_rate": 1.1255822992779858e-07, "loss": 0.0519, "step": 16794 }, { "epoch": 2.721160077770577, "grad_norm": 0.7897116541862488, "learning_rate": 1.1242852673057774e-07, "loss": 0.0559, "step": 16795 }, { "epoch": 2.7213220998055734, "grad_norm": 0.9391621947288513, "learning_rate": 1.1229889658726623e-07, "loss": 0.0606, "step": 16796 }, { "epoch": 2.7214841218405703, "grad_norm": 0.8378191590309143, "learning_rate": 1.1216933950183028e-07, "loss": 0.0553, "step": 16797 }, { "epoch": 2.7216461438755672, "grad_norm": 0.8450056314468384, "learning_rate": 1.1203985547823427e-07, "loss": 0.0608, "step": 16798 }, { "epoch": 2.7218081659105637, "grad_norm": 0.9059349894523621, "learning_rate": 1.1191044452043998e-07, "loss": 0.0581, "step": 16799 }, { "epoch": 2.7219701879455607, "grad_norm": 0.9473249316215515, "learning_rate": 1.1178110663240676e-07, "loss": 0.0646, "step": 16800 }, { "epoch": 2.7221322099805576, "grad_norm": 1.025634527206421, "learning_rate": 1.1165184181809258e-07, "loss": 0.0586, "step": 16801 }, { "epoch": 2.722294232015554, "grad_norm": 0.9218292832374573, "learning_rate": 1.1152265008145202e-07, "loss": 0.0651, "step": 16802 }, { "epoch": 2.722456254050551, "grad_norm": 0.8640052080154419, "learning_rate": 1.1139353142643861e-07, "loss": 0.0558, "step": 16803 }, { "epoch": 2.7226182760855475, "grad_norm": 1.0886274576187134, "learning_rate": 1.1126448585700306e-07, "loss": 0.06, "step": 16804 }, { "epoch": 2.7227802981205445, "grad_norm": 0.8292529582977295, "learning_rate": 1.1113551337709305e-07, "loss": 0.0591, "step": 16805 }, { "epoch": 2.722942320155541, "grad_norm": 0.85284024477005, "learning_rate": 1.110066139906557e-07, "loss": 0.0607, "step": 16806 }, { "epoch": 2.723104342190538, "grad_norm": 0.8384661674499512, "learning_rate": 1.1087778770163482e-07, "loss": 0.0543, "step": 16807 }, { "epoch": 2.723266364225535, "grad_norm": 0.9519869089126587, "learning_rate": 1.1074903451397195e-07, "loss": 0.0633, "step": 16808 }, { "epoch": 2.7234283862605313, "grad_norm": 0.8796502947807312, "learning_rate": 1.1062035443160673e-07, "loss": 0.0599, "step": 16809 }, { "epoch": 2.7235904082955282, "grad_norm": 0.9636805057525635, "learning_rate": 1.1049174745847657e-07, "loss": 0.065, "step": 16810 }, { "epoch": 2.723752430330525, "grad_norm": 0.8485105037689209, "learning_rate": 1.1036321359851638e-07, "loss": 0.0555, "step": 16811 }, { "epoch": 2.7239144523655217, "grad_norm": 1.940087080001831, "learning_rate": 1.1023475285565882e-07, "loss": 0.0602, "step": 16812 }, { "epoch": 2.7240764744005186, "grad_norm": 0.9121723175048828, "learning_rate": 1.1010636523383494e-07, "loss": 0.0591, "step": 16813 }, { "epoch": 2.724238496435515, "grad_norm": 0.9082077145576477, "learning_rate": 1.099780507369727e-07, "loss": 0.062, "step": 16814 }, { "epoch": 2.724400518470512, "grad_norm": 0.9480251669883728, "learning_rate": 1.0984980936899842e-07, "loss": 0.0608, "step": 16815 }, { "epoch": 2.7245625405055085, "grad_norm": 1.0009106397628784, "learning_rate": 1.0972164113383616e-07, "loss": 0.0651, "step": 16816 }, { "epoch": 2.7247245625405054, "grad_norm": 1.0686898231506348, "learning_rate": 1.0959354603540695e-07, "loss": 0.069, "step": 16817 }, { "epoch": 2.7248865845755024, "grad_norm": 0.8673676252365112, "learning_rate": 1.09465524077631e-07, "loss": 0.0545, "step": 16818 }, { "epoch": 2.725048606610499, "grad_norm": 0.9199877977371216, "learning_rate": 1.093375752644249e-07, "loss": 0.053, "step": 16819 }, { "epoch": 2.725210628645496, "grad_norm": 0.8862485885620117, "learning_rate": 1.0920969959970301e-07, "loss": 0.0579, "step": 16820 }, { "epoch": 2.7253726506804927, "grad_norm": 0.8255491256713867, "learning_rate": 1.0908189708737942e-07, "loss": 0.0614, "step": 16821 }, { "epoch": 2.725534672715489, "grad_norm": 0.9784971475601196, "learning_rate": 1.0895416773136408e-07, "loss": 0.0637, "step": 16822 }, { "epoch": 2.725696694750486, "grad_norm": 1.1451879739761353, "learning_rate": 1.088265115355644e-07, "loss": 0.0695, "step": 16823 }, { "epoch": 2.725858716785483, "grad_norm": 0.827608585357666, "learning_rate": 1.0869892850388697e-07, "loss": 0.0603, "step": 16824 }, { "epoch": 2.7260207388204796, "grad_norm": 0.9534388184547424, "learning_rate": 1.0857141864023591e-07, "loss": 0.0635, "step": 16825 }, { "epoch": 2.7261827608554765, "grad_norm": 0.9384109973907471, "learning_rate": 1.0844398194851197e-07, "loss": 0.0566, "step": 16826 }, { "epoch": 2.726344782890473, "grad_norm": 1.1413118839263916, "learning_rate": 1.0831661843261482e-07, "loss": 0.0673, "step": 16827 }, { "epoch": 2.72650680492547, "grad_norm": 0.878052830696106, "learning_rate": 1.0818932809644161e-07, "loss": 0.0537, "step": 16828 }, { "epoch": 2.7266688269604664, "grad_norm": 0.9519762992858887, "learning_rate": 1.0806211094388647e-07, "loss": 0.0624, "step": 16829 }, { "epoch": 2.7268308489954634, "grad_norm": 0.9057498574256897, "learning_rate": 1.0793496697884265e-07, "loss": 0.0527, "step": 16830 }, { "epoch": 2.7269928710304603, "grad_norm": 0.9844750761985779, "learning_rate": 1.078078962052001e-07, "loss": 0.0589, "step": 16831 }, { "epoch": 2.7271548930654568, "grad_norm": 1.1606096029281616, "learning_rate": 1.0768089862684684e-07, "loss": 0.0655, "step": 16832 }, { "epoch": 2.7273169151004537, "grad_norm": 0.8622628450393677, "learning_rate": 1.0755397424766917e-07, "loss": 0.0558, "step": 16833 }, { "epoch": 2.7274789371354506, "grad_norm": 0.8796065449714661, "learning_rate": 1.0742712307154957e-07, "loss": 0.0685, "step": 16834 }, { "epoch": 2.727640959170447, "grad_norm": 0.8955232501029968, "learning_rate": 1.0730034510237048e-07, "loss": 0.0654, "step": 16835 }, { "epoch": 2.727802981205444, "grad_norm": 0.9430897235870361, "learning_rate": 1.0717364034401073e-07, "loss": 0.0662, "step": 16836 }, { "epoch": 2.7279650032404406, "grad_norm": 0.849699079990387, "learning_rate": 1.0704700880034696e-07, "loss": 0.0577, "step": 16837 }, { "epoch": 2.7281270252754375, "grad_norm": 0.9427576065063477, "learning_rate": 1.0692045047525384e-07, "loss": 0.0603, "step": 16838 }, { "epoch": 2.728289047310434, "grad_norm": 0.8516416549682617, "learning_rate": 1.0679396537260356e-07, "loss": 0.0539, "step": 16839 }, { "epoch": 2.728451069345431, "grad_norm": 0.9089876413345337, "learning_rate": 1.066675534962669e-07, "loss": 0.0621, "step": 16840 }, { "epoch": 2.728613091380428, "grad_norm": 1.2640206813812256, "learning_rate": 1.0654121485011131e-07, "loss": 0.0632, "step": 16841 }, { "epoch": 2.7287751134154243, "grad_norm": 0.8047393560409546, "learning_rate": 1.0641494943800234e-07, "loss": 0.0581, "step": 16842 }, { "epoch": 2.7289371354504213, "grad_norm": 0.8546563982963562, "learning_rate": 1.0628875726380355e-07, "loss": 0.057, "step": 16843 }, { "epoch": 2.729099157485418, "grad_norm": 1.0105202198028564, "learning_rate": 1.0616263833137602e-07, "loss": 0.0678, "step": 16844 }, { "epoch": 2.7292611795204147, "grad_norm": 0.9958057999610901, "learning_rate": 1.0603659264457888e-07, "loss": 0.0581, "step": 16845 }, { "epoch": 2.7294232015554116, "grad_norm": 0.9695627093315125, "learning_rate": 1.0591062020726878e-07, "loss": 0.059, "step": 16846 }, { "epoch": 2.7295852235904086, "grad_norm": 0.8972888588905334, "learning_rate": 1.0578472102330011e-07, "loss": 0.0588, "step": 16847 }, { "epoch": 2.729747245625405, "grad_norm": 1.0485844612121582, "learning_rate": 1.0565889509652483e-07, "loss": 0.0638, "step": 16848 }, { "epoch": 2.7299092676604015, "grad_norm": 0.9550689458847046, "learning_rate": 1.0553314243079343e-07, "loss": 0.0621, "step": 16849 }, { "epoch": 2.7300712896953985, "grad_norm": 0.8764699101448059, "learning_rate": 1.0540746302995341e-07, "loss": 0.0568, "step": 16850 }, { "epoch": 2.7302333117303954, "grad_norm": 0.8220252394676208, "learning_rate": 1.0528185689785031e-07, "loss": 0.056, "step": 16851 }, { "epoch": 2.730395333765392, "grad_norm": 1.0028624534606934, "learning_rate": 1.0515632403832715e-07, "loss": 0.0509, "step": 16852 }, { "epoch": 2.730557355800389, "grad_norm": 0.84007328748703, "learning_rate": 1.0503086445522476e-07, "loss": 0.0505, "step": 16853 }, { "epoch": 2.7307193778353858, "grad_norm": 0.9844392538070679, "learning_rate": 1.0490547815238228e-07, "loss": 0.0602, "step": 16854 }, { "epoch": 2.7308813998703823, "grad_norm": 1.050789475440979, "learning_rate": 1.0478016513363665e-07, "loss": 0.0632, "step": 16855 }, { "epoch": 2.731043421905379, "grad_norm": 1.0053508281707764, "learning_rate": 1.0465492540282146e-07, "loss": 0.0655, "step": 16856 }, { "epoch": 2.731205443940376, "grad_norm": 0.9643328785896301, "learning_rate": 1.0452975896376865e-07, "loss": 0.0642, "step": 16857 }, { "epoch": 2.7313674659753726, "grad_norm": 0.9741927981376648, "learning_rate": 1.044046658203085e-07, "loss": 0.0602, "step": 16858 }, { "epoch": 2.7315294880103695, "grad_norm": 0.9322402477264404, "learning_rate": 1.0427964597626822e-07, "loss": 0.0567, "step": 16859 }, { "epoch": 2.731691510045366, "grad_norm": 0.8425524234771729, "learning_rate": 1.0415469943547335e-07, "loss": 0.0608, "step": 16860 }, { "epoch": 2.731853532080363, "grad_norm": 1.0030529499053955, "learning_rate": 1.0402982620174696e-07, "loss": 0.0638, "step": 16861 }, { "epoch": 2.7320155541153595, "grad_norm": 0.8651495575904846, "learning_rate": 1.0390502627890986e-07, "loss": 0.0586, "step": 16862 }, { "epoch": 2.7321775761503564, "grad_norm": 0.9395628571510315, "learning_rate": 1.0378029967077985e-07, "loss": 0.0558, "step": 16863 }, { "epoch": 2.7323395981853533, "grad_norm": 0.8924857378005981, "learning_rate": 1.0365564638117442e-07, "loss": 0.0659, "step": 16864 }, { "epoch": 2.73250162022035, "grad_norm": 0.860127329826355, "learning_rate": 1.0353106641390693e-07, "loss": 0.0651, "step": 16865 }, { "epoch": 2.7326636422553467, "grad_norm": 0.846647322177887, "learning_rate": 1.0340655977279012e-07, "loss": 0.0592, "step": 16866 }, { "epoch": 2.7328256642903437, "grad_norm": 0.871707022190094, "learning_rate": 1.032821264616321e-07, "loss": 0.0618, "step": 16867 }, { "epoch": 2.73298768632534, "grad_norm": 0.89871746301651, "learning_rate": 1.0315776648424119e-07, "loss": 0.0655, "step": 16868 }, { "epoch": 2.733149708360337, "grad_norm": 0.8250013589859009, "learning_rate": 1.0303347984442297e-07, "loss": 0.0585, "step": 16869 }, { "epoch": 2.733311730395334, "grad_norm": 0.8901594281196594, "learning_rate": 1.0290926654597938e-07, "loss": 0.0568, "step": 16870 }, { "epoch": 2.7334737524303305, "grad_norm": 0.9799373745918274, "learning_rate": 1.0278512659271128e-07, "loss": 0.0617, "step": 16871 }, { "epoch": 2.733635774465327, "grad_norm": 0.9966387748718262, "learning_rate": 1.0266105998841702e-07, "loss": 0.058, "step": 16872 }, { "epoch": 2.733797796500324, "grad_norm": 0.8289188742637634, "learning_rate": 1.0253706673689328e-07, "loss": 0.0542, "step": 16873 }, { "epoch": 2.733959818535321, "grad_norm": 0.9346227645874023, "learning_rate": 1.0241314684193343e-07, "loss": 0.0611, "step": 16874 }, { "epoch": 2.7341218405703174, "grad_norm": 0.9059098958969116, "learning_rate": 1.0228930030732914e-07, "loss": 0.0619, "step": 16875 }, { "epoch": 2.7342838626053143, "grad_norm": 0.9077804684638977, "learning_rate": 1.0216552713686989e-07, "loss": 0.0595, "step": 16876 }, { "epoch": 2.7344458846403112, "grad_norm": 0.9058060646057129, "learning_rate": 1.0204182733434293e-07, "loss": 0.0612, "step": 16877 }, { "epoch": 2.7346079066753077, "grad_norm": 0.8321201205253601, "learning_rate": 1.01918200903533e-07, "loss": 0.0592, "step": 16878 }, { "epoch": 2.7347699287103047, "grad_norm": 0.9400634765625, "learning_rate": 1.017946478482229e-07, "loss": 0.068, "step": 16879 }, { "epoch": 2.7349319507453016, "grad_norm": 0.822931170463562, "learning_rate": 1.0167116817219325e-07, "loss": 0.0585, "step": 16880 }, { "epoch": 2.735093972780298, "grad_norm": 1.0861873626708984, "learning_rate": 1.0154776187922182e-07, "loss": 0.068, "step": 16881 }, { "epoch": 2.735255994815295, "grad_norm": 1.1060582399368286, "learning_rate": 1.0142442897308453e-07, "loss": 0.0665, "step": 16882 }, { "epoch": 2.7354180168502915, "grad_norm": 0.9769384860992432, "learning_rate": 1.0130116945755553e-07, "loss": 0.0658, "step": 16883 }, { "epoch": 2.7355800388852884, "grad_norm": 0.9157807230949402, "learning_rate": 1.0117798333640627e-07, "loss": 0.0583, "step": 16884 }, { "epoch": 2.735742060920285, "grad_norm": 0.98633873462677, "learning_rate": 1.0105487061340541e-07, "loss": 0.0652, "step": 16885 }, { "epoch": 2.735904082955282, "grad_norm": 0.9951062202453613, "learning_rate": 1.0093183129231993e-07, "loss": 0.0606, "step": 16886 }, { "epoch": 2.736066104990279, "grad_norm": 0.8598393201828003, "learning_rate": 1.0080886537691514e-07, "loss": 0.0607, "step": 16887 }, { "epoch": 2.7362281270252753, "grad_norm": 0.911261260509491, "learning_rate": 1.0068597287095305e-07, "loss": 0.0566, "step": 16888 }, { "epoch": 2.7363901490602722, "grad_norm": 0.9654666185379028, "learning_rate": 1.005631537781937e-07, "loss": 0.0631, "step": 16889 }, { "epoch": 2.736552171095269, "grad_norm": 1.0117939710617065, "learning_rate": 1.0044040810239547e-07, "loss": 0.063, "step": 16890 }, { "epoch": 2.7367141931302656, "grad_norm": 0.8387407064437866, "learning_rate": 1.0031773584731397e-07, "loss": 0.0552, "step": 16891 }, { "epoch": 2.7368762151652626, "grad_norm": 0.8349279165267944, "learning_rate": 1.0019513701670285e-07, "loss": 0.06, "step": 16892 }, { "epoch": 2.737038237200259, "grad_norm": 0.9240767955780029, "learning_rate": 1.0007261161431275e-07, "loss": 0.0591, "step": 16893 }, { "epoch": 2.737200259235256, "grad_norm": 0.9168483018875122, "learning_rate": 9.995015964389315e-08, "loss": 0.0643, "step": 16894 }, { "epoch": 2.7373622812702525, "grad_norm": 1.0924078226089478, "learning_rate": 9.982778110919106e-08, "loss": 0.0587, "step": 16895 }, { "epoch": 2.7375243033052494, "grad_norm": 0.9266462326049805, "learning_rate": 9.970547601394986e-08, "loss": 0.0645, "step": 16896 }, { "epoch": 2.7376863253402464, "grad_norm": 1.2055165767669678, "learning_rate": 9.958324436191297e-08, "loss": 0.0617, "step": 16897 }, { "epoch": 2.737848347375243, "grad_norm": 0.8799644708633423, "learning_rate": 9.94610861568196e-08, "loss": 0.0596, "step": 16898 }, { "epoch": 2.73801036941024, "grad_norm": 0.9457053542137146, "learning_rate": 9.933900140240843e-08, "loss": 0.057, "step": 16899 }, { "epoch": 2.7381723914452367, "grad_norm": 0.8883636593818665, "learning_rate": 9.92169901024137e-08, "loss": 0.0646, "step": 16900 }, { "epoch": 2.738334413480233, "grad_norm": 0.9511093497276306, "learning_rate": 9.90950522605691e-08, "loss": 0.0616, "step": 16901 }, { "epoch": 2.73849643551523, "grad_norm": 0.9124867916107178, "learning_rate": 9.897318788060662e-08, "loss": 0.0563, "step": 16902 }, { "epoch": 2.738658457550227, "grad_norm": 1.006246566772461, "learning_rate": 9.885139696625356e-08, "loss": 0.0661, "step": 16903 }, { "epoch": 2.7388204795852236, "grad_norm": 0.8624682426452637, "learning_rate": 9.872967952123752e-08, "loss": 0.0538, "step": 16904 }, { "epoch": 2.7389825016202205, "grad_norm": 1.1840333938598633, "learning_rate": 9.860803554928189e-08, "loss": 0.0687, "step": 16905 }, { "epoch": 2.739144523655217, "grad_norm": 0.8678603172302246, "learning_rate": 9.848646505410953e-08, "loss": 0.0524, "step": 16906 }, { "epoch": 2.739306545690214, "grad_norm": 0.8984764218330383, "learning_rate": 9.83649680394394e-08, "loss": 0.0554, "step": 16907 }, { "epoch": 2.7394685677252104, "grad_norm": 0.9465093016624451, "learning_rate": 9.824354450898966e-08, "loss": 0.0618, "step": 16908 }, { "epoch": 2.7396305897602073, "grad_norm": 0.8658840656280518, "learning_rate": 9.812219446647509e-08, "loss": 0.0571, "step": 16909 }, { "epoch": 2.7397926117952043, "grad_norm": 0.9028376340866089, "learning_rate": 9.800091791560939e-08, "loss": 0.0542, "step": 16910 }, { "epoch": 2.7399546338302008, "grad_norm": 1.0068341493606567, "learning_rate": 9.78797148601024e-08, "loss": 0.0694, "step": 16911 }, { "epoch": 2.7401166558651977, "grad_norm": 0.8635134696960449, "learning_rate": 9.775858530366334e-08, "loss": 0.06, "step": 16912 }, { "epoch": 2.7402786779001946, "grad_norm": 0.8920632600784302, "learning_rate": 9.763752924999842e-08, "loss": 0.0559, "step": 16913 }, { "epoch": 2.740440699935191, "grad_norm": 0.8138787746429443, "learning_rate": 9.751654670281135e-08, "loss": 0.0555, "step": 16914 }, { "epoch": 2.740602721970188, "grad_norm": 0.9727160334587097, "learning_rate": 9.739563766580362e-08, "loss": 0.0632, "step": 16915 }, { "epoch": 2.7407647440051845, "grad_norm": 0.830783486366272, "learning_rate": 9.727480214267559e-08, "loss": 0.0599, "step": 16916 }, { "epoch": 2.7409267660401815, "grad_norm": 0.8595759272575378, "learning_rate": 9.715404013712432e-08, "loss": 0.0564, "step": 16917 }, { "epoch": 2.741088788075178, "grad_norm": 1.019550085067749, "learning_rate": 9.70333516528446e-08, "loss": 0.0684, "step": 16918 }, { "epoch": 2.741250810110175, "grad_norm": 1.0129154920578003, "learning_rate": 9.691273669352908e-08, "loss": 0.066, "step": 16919 }, { "epoch": 2.741412832145172, "grad_norm": 1.0200157165527344, "learning_rate": 9.679219526286837e-08, "loss": 0.066, "step": 16920 }, { "epoch": 2.7415748541801683, "grad_norm": 1.0694841146469116, "learning_rate": 9.667172736455093e-08, "loss": 0.0631, "step": 16921 }, { "epoch": 2.7417368762151653, "grad_norm": 0.9911954402923584, "learning_rate": 9.655133300226271e-08, "loss": 0.0644, "step": 16922 }, { "epoch": 2.741898898250162, "grad_norm": 1.1050474643707275, "learning_rate": 9.643101217968743e-08, "loss": 0.0572, "step": 16923 }, { "epoch": 2.7420609202851587, "grad_norm": 0.8301832675933838, "learning_rate": 9.631076490050684e-08, "loss": 0.0577, "step": 16924 }, { "epoch": 2.7422229423201556, "grad_norm": 1.2750616073608398, "learning_rate": 9.619059116839968e-08, "loss": 0.07, "step": 16925 }, { "epoch": 2.7423849643551526, "grad_norm": 0.9567539691925049, "learning_rate": 9.60704909870433e-08, "loss": 0.0601, "step": 16926 }, { "epoch": 2.742546986390149, "grad_norm": 1.1425048112869263, "learning_rate": 9.59504643601128e-08, "loss": 0.0629, "step": 16927 }, { "epoch": 2.742709008425146, "grad_norm": 0.8750780820846558, "learning_rate": 9.583051129128051e-08, "loss": 0.0594, "step": 16928 }, { "epoch": 2.7428710304601425, "grad_norm": 0.8770819306373596, "learning_rate": 9.57106317842163e-08, "loss": 0.0646, "step": 16929 }, { "epoch": 2.7430330524951394, "grad_norm": 1.0766657590866089, "learning_rate": 9.559082584258833e-08, "loss": 0.0548, "step": 16930 }, { "epoch": 2.743195074530136, "grad_norm": 0.9907754063606262, "learning_rate": 9.547109347006312e-08, "loss": 0.0658, "step": 16931 }, { "epoch": 2.743357096565133, "grad_norm": 0.9115135669708252, "learning_rate": 9.535143467030327e-08, "loss": 0.0584, "step": 16932 }, { "epoch": 2.7435191186001298, "grad_norm": 0.9572370052337646, "learning_rate": 9.523184944697034e-08, "loss": 0.0609, "step": 16933 }, { "epoch": 2.7436811406351262, "grad_norm": 0.8873291015625, "learning_rate": 9.511233780372303e-08, "loss": 0.0609, "step": 16934 }, { "epoch": 2.743843162670123, "grad_norm": 0.8288209438323975, "learning_rate": 9.499289974421927e-08, "loss": 0.0567, "step": 16935 }, { "epoch": 2.74400518470512, "grad_norm": 0.8421540856361389, "learning_rate": 9.487353527211223e-08, "loss": 0.0597, "step": 16936 }, { "epoch": 2.7441672067401166, "grad_norm": 0.884370744228363, "learning_rate": 9.475424439105485e-08, "loss": 0.0656, "step": 16937 }, { "epoch": 2.7443292287751135, "grad_norm": 0.9282497763633728, "learning_rate": 9.463502710469697e-08, "loss": 0.0596, "step": 16938 }, { "epoch": 2.74449125081011, "grad_norm": 0.9526280760765076, "learning_rate": 9.45158834166865e-08, "loss": 0.0618, "step": 16939 }, { "epoch": 2.744653272845107, "grad_norm": 1.0486241579055786, "learning_rate": 9.439681333066858e-08, "loss": 0.0664, "step": 16940 }, { "epoch": 2.7448152948801035, "grad_norm": 0.9274911284446716, "learning_rate": 9.427781685028697e-08, "loss": 0.0546, "step": 16941 }, { "epoch": 2.7449773169151004, "grad_norm": 0.9596142768859863, "learning_rate": 9.415889397918238e-08, "loss": 0.0612, "step": 16942 }, { "epoch": 2.7451393389500973, "grad_norm": 0.8194335699081421, "learning_rate": 9.404004472099382e-08, "loss": 0.0555, "step": 16943 }, { "epoch": 2.745301360985094, "grad_norm": 0.9087086319923401, "learning_rate": 9.3921269079357e-08, "loss": 0.0567, "step": 16944 }, { "epoch": 2.7454633830200907, "grad_norm": 0.8666279911994934, "learning_rate": 9.380256705790708e-08, "loss": 0.0553, "step": 16945 }, { "epoch": 2.7456254050550877, "grad_norm": 0.9419772624969482, "learning_rate": 9.368393866027614e-08, "loss": 0.0666, "step": 16946 }, { "epoch": 2.745787427090084, "grad_norm": 0.8581135869026184, "learning_rate": 9.356538389009296e-08, "loss": 0.0524, "step": 16947 }, { "epoch": 2.745949449125081, "grad_norm": 0.846437394618988, "learning_rate": 9.344690275098573e-08, "loss": 0.0499, "step": 16948 }, { "epoch": 2.746111471160078, "grad_norm": 0.8755781054496765, "learning_rate": 9.332849524657961e-08, "loss": 0.0586, "step": 16949 }, { "epoch": 2.7462734931950745, "grad_norm": 0.9014527201652527, "learning_rate": 9.321016138049727e-08, "loss": 0.0562, "step": 16950 }, { "epoch": 2.7464355152300715, "grad_norm": 0.9872837662696838, "learning_rate": 9.309190115635996e-08, "loss": 0.057, "step": 16951 }, { "epoch": 2.746597537265068, "grad_norm": 0.9479248523712158, "learning_rate": 9.297371457778565e-08, "loss": 0.0581, "step": 16952 }, { "epoch": 2.746759559300065, "grad_norm": 0.9341238737106323, "learning_rate": 9.285560164839086e-08, "loss": 0.0633, "step": 16953 }, { "epoch": 2.7469215813350614, "grad_norm": 0.9351992011070251, "learning_rate": 9.273756237178938e-08, "loss": 0.0639, "step": 16954 }, { "epoch": 2.7470836033700583, "grad_norm": 0.8418577313423157, "learning_rate": 9.261959675159304e-08, "loss": 0.0612, "step": 16955 }, { "epoch": 2.7472456254050552, "grad_norm": 1.0418609380722046, "learning_rate": 9.250170479141146e-08, "loss": 0.0661, "step": 16956 }, { "epoch": 2.7474076474400517, "grad_norm": 1.1158641576766968, "learning_rate": 9.238388649485175e-08, "loss": 0.065, "step": 16957 }, { "epoch": 2.7475696694750487, "grad_norm": 0.9515754580497742, "learning_rate": 9.226614186551852e-08, "loss": 0.0578, "step": 16958 }, { "epoch": 2.7477316915100456, "grad_norm": 0.8326787352561951, "learning_rate": 9.214847090701474e-08, "loss": 0.0537, "step": 16959 }, { "epoch": 2.747893713545042, "grad_norm": 0.9199459552764893, "learning_rate": 9.20308736229411e-08, "loss": 0.0591, "step": 16960 }, { "epoch": 2.748055735580039, "grad_norm": 0.9134218692779541, "learning_rate": 9.19133500168956e-08, "loss": 0.0604, "step": 16961 }, { "epoch": 2.7482177576150355, "grad_norm": 0.8909247517585754, "learning_rate": 9.179590009247397e-08, "loss": 0.0587, "step": 16962 }, { "epoch": 2.7483797796500324, "grad_norm": 0.8900055885314941, "learning_rate": 9.167852385326969e-08, "loss": 0.0579, "step": 16963 }, { "epoch": 2.748541801685029, "grad_norm": 1.0126845836639404, "learning_rate": 9.15612213028752e-08, "loss": 0.0661, "step": 16964 }, { "epoch": 2.748703823720026, "grad_norm": 0.9734671711921692, "learning_rate": 9.144399244487873e-08, "loss": 0.06, "step": 16965 }, { "epoch": 2.748865845755023, "grad_norm": 0.8340158462524414, "learning_rate": 9.132683728286767e-08, "loss": 0.0552, "step": 16966 }, { "epoch": 2.7490278677900193, "grad_norm": 0.81150883436203, "learning_rate": 9.120975582042613e-08, "loss": 0.057, "step": 16967 }, { "epoch": 2.749189889825016, "grad_norm": 0.9296124577522278, "learning_rate": 9.109274806113732e-08, "loss": 0.0647, "step": 16968 }, { "epoch": 2.749351911860013, "grad_norm": 0.8688836097717285, "learning_rate": 9.097581400858064e-08, "loss": 0.0588, "step": 16969 }, { "epoch": 2.7495139338950096, "grad_norm": 1.00139319896698, "learning_rate": 9.085895366633457e-08, "loss": 0.0626, "step": 16970 }, { "epoch": 2.7496759559300066, "grad_norm": 0.9233237504959106, "learning_rate": 9.074216703797434e-08, "loss": 0.0646, "step": 16971 }, { "epoch": 2.7498379779650035, "grad_norm": 0.954022228717804, "learning_rate": 9.062545412707375e-08, "loss": 0.0637, "step": 16972 }, { "epoch": 2.75, "grad_norm": 0.7831946611404419, "learning_rate": 9.050881493720326e-08, "loss": 0.0533, "step": 16973 }, { "epoch": 2.7501620220349965, "grad_norm": 0.8610273599624634, "learning_rate": 9.039224947193254e-08, "loss": 0.0567, "step": 16974 }, { "epoch": 2.7503240440699934, "grad_norm": 1.0053431987762451, "learning_rate": 9.027575773482788e-08, "loss": 0.063, "step": 16975 }, { "epoch": 2.7504860661049904, "grad_norm": 1.0265311002731323, "learning_rate": 9.01593397294534e-08, "loss": 0.068, "step": 16976 }, { "epoch": 2.750648088139987, "grad_norm": 1.0385494232177734, "learning_rate": 9.004299545937151e-08, "loss": 0.0615, "step": 16977 }, { "epoch": 2.750810110174984, "grad_norm": 1.0085492134094238, "learning_rate": 8.992672492814158e-08, "loss": 0.0618, "step": 16978 }, { "epoch": 2.7509721322099807, "grad_norm": 0.9866387248039246, "learning_rate": 8.981052813932245e-08, "loss": 0.0675, "step": 16979 }, { "epoch": 2.751134154244977, "grad_norm": 1.0213267803192139, "learning_rate": 8.969440509646821e-08, "loss": 0.0586, "step": 16980 }, { "epoch": 2.751296176279974, "grad_norm": 1.007440447807312, "learning_rate": 8.957835580313212e-08, "loss": 0.0656, "step": 16981 }, { "epoch": 2.751458198314971, "grad_norm": 0.9071718454360962, "learning_rate": 8.946238026286552e-08, "loss": 0.0619, "step": 16982 }, { "epoch": 2.7516202203499676, "grad_norm": 1.0188510417938232, "learning_rate": 8.93464784792164e-08, "loss": 0.0566, "step": 16983 }, { "epoch": 2.7517822423849645, "grad_norm": 0.8976929187774658, "learning_rate": 8.923065045573165e-08, "loss": 0.0603, "step": 16984 }, { "epoch": 2.751944264419961, "grad_norm": 0.7960435748100281, "learning_rate": 8.911489619595482e-08, "loss": 0.0537, "step": 16985 }, { "epoch": 2.752106286454958, "grad_norm": 1.094245195388794, "learning_rate": 8.899921570342807e-08, "loss": 0.0682, "step": 16986 }, { "epoch": 2.7522683084899544, "grad_norm": 0.8297873139381409, "learning_rate": 8.888360898169079e-08, "loss": 0.0515, "step": 16987 }, { "epoch": 2.7524303305249513, "grad_norm": 0.8277057409286499, "learning_rate": 8.876807603428017e-08, "loss": 0.0569, "step": 16988 }, { "epoch": 2.7525923525599483, "grad_norm": 0.8910994529724121, "learning_rate": 8.865261686473143e-08, "loss": 0.0529, "step": 16989 }, { "epoch": 2.7527543745949448, "grad_norm": 0.798820436000824, "learning_rate": 8.853723147657755e-08, "loss": 0.0544, "step": 16990 }, { "epoch": 2.7529163966299417, "grad_norm": 0.994156539440155, "learning_rate": 8.842191987334853e-08, "loss": 0.0661, "step": 16991 }, { "epoch": 2.7530784186649386, "grad_norm": 0.8740126490592957, "learning_rate": 8.830668205857263e-08, "loss": 0.0599, "step": 16992 }, { "epoch": 2.753240440699935, "grad_norm": 0.9140499830245972, "learning_rate": 8.819151803577647e-08, "loss": 0.0605, "step": 16993 }, { "epoch": 2.753402462734932, "grad_norm": 0.9925417304039001, "learning_rate": 8.807642780848335e-08, "loss": 0.0623, "step": 16994 }, { "epoch": 2.7535644847699285, "grad_norm": 0.8395823836326599, "learning_rate": 8.796141138021464e-08, "loss": 0.0575, "step": 16995 }, { "epoch": 2.7537265068049255, "grad_norm": 1.012656331062317, "learning_rate": 8.784646875448971e-08, "loss": 0.0669, "step": 16996 }, { "epoch": 2.753888528839922, "grad_norm": 0.8465946316719055, "learning_rate": 8.77315999348255e-08, "loss": 0.0572, "step": 16997 }, { "epoch": 2.754050550874919, "grad_norm": 1.1134271621704102, "learning_rate": 8.761680492473668e-08, "loss": 0.058, "step": 16998 }, { "epoch": 2.754212572909916, "grad_norm": 0.852190375328064, "learning_rate": 8.7502083727736e-08, "loss": 0.0576, "step": 16999 }, { "epoch": 2.7543745949449123, "grad_norm": 0.9086182713508606, "learning_rate": 8.738743634733316e-08, "loss": 0.0631, "step": 17000 }, { "epoch": 2.7545366169799093, "grad_norm": 0.9035629034042358, "learning_rate": 8.727286278703672e-08, "loss": 0.0589, "step": 17001 }, { "epoch": 2.754698639014906, "grad_norm": 0.8812063932418823, "learning_rate": 8.715836305035169e-08, "loss": 0.0516, "step": 17002 }, { "epoch": 2.7548606610499027, "grad_norm": 0.9071505069732666, "learning_rate": 8.704393714078191e-08, "loss": 0.0585, "step": 17003 }, { "epoch": 2.7550226830848996, "grad_norm": 0.9533473253250122, "learning_rate": 8.692958506182847e-08, "loss": 0.0562, "step": 17004 }, { "epoch": 2.7551847051198965, "grad_norm": 0.8007147312164307, "learning_rate": 8.681530681699024e-08, "loss": 0.0611, "step": 17005 }, { "epoch": 2.755346727154893, "grad_norm": 0.9169260263442993, "learning_rate": 8.67011024097636e-08, "loss": 0.065, "step": 17006 }, { "epoch": 2.75550874918989, "grad_norm": 0.8631471395492554, "learning_rate": 8.658697184364323e-08, "loss": 0.0504, "step": 17007 }, { "epoch": 2.7556707712248865, "grad_norm": 0.9157608151435852, "learning_rate": 8.647291512212136e-08, "loss": 0.0572, "step": 17008 }, { "epoch": 2.7558327932598834, "grad_norm": 0.9917179942131042, "learning_rate": 8.635893224868769e-08, "loss": 0.064, "step": 17009 }, { "epoch": 2.75599481529488, "grad_norm": 0.9392203092575073, "learning_rate": 8.624502322682942e-08, "loss": 0.0655, "step": 17010 }, { "epoch": 2.756156837329877, "grad_norm": 0.8594088554382324, "learning_rate": 8.61311880600324e-08, "loss": 0.0567, "step": 17011 }, { "epoch": 2.7563188593648738, "grad_norm": 0.7704145312309265, "learning_rate": 8.601742675177993e-08, "loss": 0.0494, "step": 17012 }, { "epoch": 2.7564808813998702, "grad_norm": 0.9152031540870667, "learning_rate": 8.590373930555201e-08, "loss": 0.0579, "step": 17013 }, { "epoch": 2.756642903434867, "grad_norm": 0.8692240715026855, "learning_rate": 8.57901257248278e-08, "loss": 0.0578, "step": 17014 }, { "epoch": 2.756804925469864, "grad_norm": 0.8714682459831238, "learning_rate": 8.567658601308371e-08, "loss": 0.0523, "step": 17015 }, { "epoch": 2.7569669475048606, "grad_norm": 0.9357670545578003, "learning_rate": 8.556312017379332e-08, "loss": 0.0581, "step": 17016 }, { "epoch": 2.7571289695398575, "grad_norm": 0.9461519718170166, "learning_rate": 8.544972821042857e-08, "loss": 0.0652, "step": 17017 }, { "epoch": 2.757290991574854, "grad_norm": 0.7985438108444214, "learning_rate": 8.533641012645921e-08, "loss": 0.0567, "step": 17018 }, { "epoch": 2.757453013609851, "grad_norm": 0.8582321405410767, "learning_rate": 8.522316592535246e-08, "loss": 0.0545, "step": 17019 }, { "epoch": 2.7576150356448474, "grad_norm": 0.9717732071876526, "learning_rate": 8.510999561057276e-08, "loss": 0.0655, "step": 17020 }, { "epoch": 2.7577770576798444, "grad_norm": 0.8554613590240479, "learning_rate": 8.499689918558318e-08, "loss": 0.0555, "step": 17021 }, { "epoch": 2.7579390797148413, "grad_norm": 0.9778949618339539, "learning_rate": 8.488387665384457e-08, "loss": 0.0646, "step": 17022 }, { "epoch": 2.758101101749838, "grad_norm": 1.0577203035354614, "learning_rate": 8.477092801881525e-08, "loss": 0.0594, "step": 17023 }, { "epoch": 2.7582631237848347, "grad_norm": 0.9223877787590027, "learning_rate": 8.465805328395055e-08, "loss": 0.0574, "step": 17024 }, { "epoch": 2.7584251458198317, "grad_norm": 0.7924177050590515, "learning_rate": 8.454525245270378e-08, "loss": 0.0518, "step": 17025 }, { "epoch": 2.758587167854828, "grad_norm": 0.8869810700416565, "learning_rate": 8.443252552852776e-08, "loss": 0.0636, "step": 17026 }, { "epoch": 2.758749189889825, "grad_norm": 0.927229106426239, "learning_rate": 8.431987251487083e-08, "loss": 0.0608, "step": 17027 }, { "epoch": 2.758911211924822, "grad_norm": 1.047018051147461, "learning_rate": 8.42072934151797e-08, "loss": 0.0618, "step": 17028 }, { "epoch": 2.7590732339598185, "grad_norm": 0.8683516979217529, "learning_rate": 8.409478823289934e-08, "loss": 0.0614, "step": 17029 }, { "epoch": 2.7592352559948155, "grad_norm": 0.9671280384063721, "learning_rate": 8.398235697147205e-08, "loss": 0.0533, "step": 17030 }, { "epoch": 2.759397278029812, "grad_norm": 0.8958953619003296, "learning_rate": 8.386999963433812e-08, "loss": 0.0573, "step": 17031 }, { "epoch": 2.759559300064809, "grad_norm": 0.8084527850151062, "learning_rate": 8.375771622493506e-08, "loss": 0.0498, "step": 17032 }, { "epoch": 2.7597213220998054, "grad_norm": 0.838610827922821, "learning_rate": 8.364550674669875e-08, "loss": 0.0635, "step": 17033 }, { "epoch": 2.7598833441348023, "grad_norm": 0.9222990274429321, "learning_rate": 8.353337120306282e-08, "loss": 0.0647, "step": 17034 }, { "epoch": 2.7600453661697992, "grad_norm": 1.0761842727661133, "learning_rate": 8.342130959745731e-08, "loss": 0.0665, "step": 17035 }, { "epoch": 2.7602073882047957, "grad_norm": 0.8044500350952148, "learning_rate": 8.330932193331226e-08, "loss": 0.0568, "step": 17036 }, { "epoch": 2.7603694102397927, "grad_norm": 0.8677234649658203, "learning_rate": 8.319740821405354e-08, "loss": 0.0588, "step": 17037 }, { "epoch": 2.7605314322747896, "grad_norm": 1.054807424545288, "learning_rate": 8.308556844310589e-08, "loss": 0.0701, "step": 17038 }, { "epoch": 2.760693454309786, "grad_norm": 1.1043615341186523, "learning_rate": 8.297380262389077e-08, "loss": 0.0624, "step": 17039 }, { "epoch": 2.760855476344783, "grad_norm": 0.7604734301567078, "learning_rate": 8.286211075982764e-08, "loss": 0.0498, "step": 17040 }, { "epoch": 2.7610174983797795, "grad_norm": 0.8906594514846802, "learning_rate": 8.275049285433545e-08, "loss": 0.0596, "step": 17041 }, { "epoch": 2.7611795204147764, "grad_norm": 0.9391190409660339, "learning_rate": 8.263894891082813e-08, "loss": 0.0644, "step": 17042 }, { "epoch": 2.761341542449773, "grad_norm": 0.9467473030090332, "learning_rate": 8.252747893271906e-08, "loss": 0.0614, "step": 17043 }, { "epoch": 2.76150356448477, "grad_norm": 0.8915271162986755, "learning_rate": 8.241608292341913e-08, "loss": 0.0602, "step": 17044 }, { "epoch": 2.761665586519767, "grad_norm": 0.9345802068710327, "learning_rate": 8.230476088633644e-08, "loss": 0.0603, "step": 17045 }, { "epoch": 2.7618276085547633, "grad_norm": 0.8942722082138062, "learning_rate": 8.219351282487742e-08, "loss": 0.0602, "step": 17046 }, { "epoch": 2.76198963058976, "grad_norm": 0.9697921276092529, "learning_rate": 8.208233874244575e-08, "loss": 0.0628, "step": 17047 }, { "epoch": 2.762151652624757, "grad_norm": 0.877307116985321, "learning_rate": 8.197123864244344e-08, "loss": 0.0578, "step": 17048 }, { "epoch": 2.7623136746597536, "grad_norm": 0.9268743395805359, "learning_rate": 8.18602125282697e-08, "loss": 0.0567, "step": 17049 }, { "epoch": 2.7624756966947506, "grad_norm": 0.8506268262863159, "learning_rate": 8.174926040332182e-08, "loss": 0.0533, "step": 17050 }, { "epoch": 2.7626377187297475, "grad_norm": 0.9653171896934509, "learning_rate": 8.16383822709943e-08, "loss": 0.0601, "step": 17051 }, { "epoch": 2.762799740764744, "grad_norm": 0.9039610028266907, "learning_rate": 8.152757813468027e-08, "loss": 0.0585, "step": 17052 }, { "epoch": 2.762961762799741, "grad_norm": 0.8565463423728943, "learning_rate": 8.14168479977695e-08, "loss": 0.0521, "step": 17053 }, { "epoch": 2.7631237848347374, "grad_norm": 0.8485569357872009, "learning_rate": 8.130619186365012e-08, "loss": 0.0554, "step": 17054 }, { "epoch": 2.7632858068697344, "grad_norm": 0.872444212436676, "learning_rate": 8.119560973570834e-08, "loss": 0.0548, "step": 17055 }, { "epoch": 2.763447828904731, "grad_norm": 0.9443418979644775, "learning_rate": 8.10851016173278e-08, "loss": 0.0645, "step": 17056 }, { "epoch": 2.7636098509397278, "grad_norm": 0.9589036703109741, "learning_rate": 8.097466751188915e-08, "loss": 0.064, "step": 17057 }, { "epoch": 2.7637718729747247, "grad_norm": 0.9315710663795471, "learning_rate": 8.086430742277191e-08, "loss": 0.0634, "step": 17058 }, { "epoch": 2.763933895009721, "grad_norm": 1.0759693384170532, "learning_rate": 8.075402135335253e-08, "loss": 0.0581, "step": 17059 }, { "epoch": 2.764095917044718, "grad_norm": 0.9197863340377808, "learning_rate": 8.064380930700556e-08, "loss": 0.0607, "step": 17060 }, { "epoch": 2.764257939079715, "grad_norm": 0.8800620436668396, "learning_rate": 8.053367128710355e-08, "loss": 0.0556, "step": 17061 }, { "epoch": 2.7644199611147116, "grad_norm": 1.0651123523712158, "learning_rate": 8.042360729701604e-08, "loss": 0.0606, "step": 17062 }, { "epoch": 2.7645819831497085, "grad_norm": 0.8733367323875427, "learning_rate": 8.031361734011118e-08, "loss": 0.0663, "step": 17063 }, { "epoch": 2.764744005184705, "grad_norm": 1.1324882507324219, "learning_rate": 8.020370141975347e-08, "loss": 0.0627, "step": 17064 }, { "epoch": 2.764906027219702, "grad_norm": 0.9091924428939819, "learning_rate": 8.009385953930721e-08, "loss": 0.0635, "step": 17065 }, { "epoch": 2.7650680492546984, "grad_norm": 0.9367949962615967, "learning_rate": 7.998409170213245e-08, "loss": 0.0591, "step": 17066 }, { "epoch": 2.7652300712896953, "grad_norm": 0.7516220211982727, "learning_rate": 7.987439791158874e-08, "loss": 0.0515, "step": 17067 }, { "epoch": 2.7653920933246923, "grad_norm": 1.0401772260665894, "learning_rate": 7.976477817103117e-08, "loss": 0.0612, "step": 17068 }, { "epoch": 2.7655541153596888, "grad_norm": 0.9251458644866943, "learning_rate": 7.965523248381485e-08, "loss": 0.0598, "step": 17069 }, { "epoch": 2.7657161373946857, "grad_norm": 1.0257396697998047, "learning_rate": 7.954576085329152e-08, "loss": 0.0613, "step": 17070 }, { "epoch": 2.7658781594296826, "grad_norm": 1.0854121446609497, "learning_rate": 7.943636328281018e-08, "loss": 0.0627, "step": 17071 }, { "epoch": 2.766040181464679, "grad_norm": 0.9566676616668701, "learning_rate": 7.93270397757187e-08, "loss": 0.0564, "step": 17072 }, { "epoch": 2.766202203499676, "grad_norm": 0.9054695963859558, "learning_rate": 7.921779033536137e-08, "loss": 0.0537, "step": 17073 }, { "epoch": 2.766364225534673, "grad_norm": 1.10857093334198, "learning_rate": 7.910861496508216e-08, "loss": 0.0631, "step": 17074 }, { "epoch": 2.7665262475696695, "grad_norm": 1.0174477100372314, "learning_rate": 7.899951366822061e-08, "loss": 0.0657, "step": 17075 }, { "epoch": 2.766688269604666, "grad_norm": 0.9663045406341553, "learning_rate": 7.88904864481152e-08, "loss": 0.061, "step": 17076 }, { "epoch": 2.766850291639663, "grad_norm": 0.9540015459060669, "learning_rate": 7.878153330810184e-08, "loss": 0.0563, "step": 17077 }, { "epoch": 2.76701231367466, "grad_norm": 0.9780965447425842, "learning_rate": 7.867265425151454e-08, "loss": 0.0641, "step": 17078 }, { "epoch": 2.7671743357096563, "grad_norm": 0.9400720596313477, "learning_rate": 7.856384928168426e-08, "loss": 0.0663, "step": 17079 }, { "epoch": 2.7673363577446533, "grad_norm": 1.059554934501648, "learning_rate": 7.845511840194081e-08, "loss": 0.0611, "step": 17080 }, { "epoch": 2.76749837977965, "grad_norm": 0.911750078201294, "learning_rate": 7.834646161561044e-08, "loss": 0.0582, "step": 17081 }, { "epoch": 2.7676604018146467, "grad_norm": 1.0574133396148682, "learning_rate": 7.823787892601825e-08, "loss": 0.0648, "step": 17082 }, { "epoch": 2.7678224238496436, "grad_norm": 0.8240780234336853, "learning_rate": 7.812937033648604e-08, "loss": 0.0583, "step": 17083 }, { "epoch": 2.7679844458846405, "grad_norm": 1.0235298871994019, "learning_rate": 7.802093585033449e-08, "loss": 0.0694, "step": 17084 }, { "epoch": 2.768146467919637, "grad_norm": 1.218329668045044, "learning_rate": 7.79125754708815e-08, "loss": 0.0731, "step": 17085 }, { "epoch": 2.768308489954634, "grad_norm": 0.8903065919876099, "learning_rate": 7.780428920144217e-08, "loss": 0.0582, "step": 17086 }, { "epoch": 2.7684705119896305, "grad_norm": 0.8183029294013977, "learning_rate": 7.769607704532972e-08, "loss": 0.0587, "step": 17087 }, { "epoch": 2.7686325340246274, "grad_norm": 0.8426903486251831, "learning_rate": 7.758793900585565e-08, "loss": 0.0606, "step": 17088 }, { "epoch": 2.768794556059624, "grad_norm": 1.1522109508514404, "learning_rate": 7.747987508632871e-08, "loss": 0.0576, "step": 17089 }, { "epoch": 2.768956578094621, "grad_norm": 0.7906349897384644, "learning_rate": 7.737188529005484e-08, "loss": 0.0545, "step": 17090 }, { "epoch": 2.7691186001296177, "grad_norm": 0.8786861896514893, "learning_rate": 7.726396962033894e-08, "loss": 0.0612, "step": 17091 }, { "epoch": 2.7692806221646142, "grad_norm": 1.079187273979187, "learning_rate": 7.715612808048251e-08, "loss": 0.0612, "step": 17092 }, { "epoch": 2.769442644199611, "grad_norm": 1.0963796377182007, "learning_rate": 7.70483606737854e-08, "loss": 0.0613, "step": 17093 }, { "epoch": 2.769604666234608, "grad_norm": 0.9393141269683838, "learning_rate": 7.6940667403545e-08, "loss": 0.0641, "step": 17094 }, { "epoch": 2.7697666882696046, "grad_norm": 0.9409939050674438, "learning_rate": 7.683304827305644e-08, "loss": 0.0603, "step": 17095 }, { "epoch": 2.7699287103046015, "grad_norm": 0.8602232933044434, "learning_rate": 7.672550328561318e-08, "loss": 0.0574, "step": 17096 }, { "epoch": 2.7700907323395985, "grad_norm": 1.0245064496994019, "learning_rate": 7.661803244450455e-08, "loss": 0.0628, "step": 17097 }, { "epoch": 2.770252754374595, "grad_norm": 0.8060587048530579, "learning_rate": 7.651063575301986e-08, "loss": 0.0561, "step": 17098 }, { "epoch": 2.7704147764095914, "grad_norm": 1.021560549736023, "learning_rate": 7.64033132144451e-08, "loss": 0.0612, "step": 17099 }, { "epoch": 2.7705767984445884, "grad_norm": 1.0078121423721313, "learning_rate": 7.62960648320643e-08, "loss": 0.0581, "step": 17100 }, { "epoch": 2.7707388204795853, "grad_norm": 0.9183746576309204, "learning_rate": 7.618889060915819e-08, "loss": 0.0544, "step": 17101 }, { "epoch": 2.770900842514582, "grad_norm": 0.9616214632987976, "learning_rate": 7.608179054900634e-08, "loss": 0.0645, "step": 17102 }, { "epoch": 2.7710628645495787, "grad_norm": 1.02703058719635, "learning_rate": 7.597476465488668e-08, "loss": 0.0657, "step": 17103 }, { "epoch": 2.7712248865845757, "grad_norm": 0.9865126609802246, "learning_rate": 7.586781293007273e-08, "loss": 0.0653, "step": 17104 }, { "epoch": 2.771386908619572, "grad_norm": 0.8494060039520264, "learning_rate": 7.57609353778374e-08, "loss": 0.0586, "step": 17105 }, { "epoch": 2.771548930654569, "grad_norm": 0.9772338271141052, "learning_rate": 7.565413200145089e-08, "loss": 0.0611, "step": 17106 }, { "epoch": 2.771710952689566, "grad_norm": 1.0353899002075195, "learning_rate": 7.55474028041811e-08, "loss": 0.0631, "step": 17107 }, { "epoch": 2.7718729747245625, "grad_norm": 0.9081644415855408, "learning_rate": 7.544074778929378e-08, "loss": 0.0616, "step": 17108 }, { "epoch": 2.7720349967595594, "grad_norm": 1.0636510848999023, "learning_rate": 7.533416696005242e-08, "loss": 0.0683, "step": 17109 }, { "epoch": 2.772197018794556, "grad_norm": 0.981203556060791, "learning_rate": 7.522766031971774e-08, "loss": 0.0553, "step": 17110 }, { "epoch": 2.772359040829553, "grad_norm": 0.9812530279159546, "learning_rate": 7.512122787154908e-08, "loss": 0.0583, "step": 17111 }, { "epoch": 2.7725210628645494, "grad_norm": 0.9434771537780762, "learning_rate": 7.501486961880245e-08, "loss": 0.0635, "step": 17112 }, { "epoch": 2.7726830848995463, "grad_norm": 0.9104921817779541, "learning_rate": 7.490858556473246e-08, "loss": 0.056, "step": 17113 }, { "epoch": 2.7728451069345432, "grad_norm": 0.996190071105957, "learning_rate": 7.480237571259153e-08, "loss": 0.0633, "step": 17114 }, { "epoch": 2.7730071289695397, "grad_norm": 0.8751932978630066, "learning_rate": 7.469624006562898e-08, "loss": 0.0574, "step": 17115 }, { "epoch": 2.7731691510045366, "grad_norm": 0.8272261023521423, "learning_rate": 7.459017862709194e-08, "loss": 0.0515, "step": 17116 }, { "epoch": 2.7733311730395336, "grad_norm": 0.916673481464386, "learning_rate": 7.448419140022616e-08, "loss": 0.0609, "step": 17117 }, { "epoch": 2.77349319507453, "grad_norm": 1.0777217149734497, "learning_rate": 7.437827838827488e-08, "loss": 0.0615, "step": 17118 }, { "epoch": 2.773655217109527, "grad_norm": 1.0199631452560425, "learning_rate": 7.4272439594478e-08, "loss": 0.0547, "step": 17119 }, { "epoch": 2.7738172391445235, "grad_norm": 0.8210328817367554, "learning_rate": 7.416667502207458e-08, "loss": 0.0541, "step": 17120 }, { "epoch": 2.7739792611795204, "grad_norm": 0.8746510148048401, "learning_rate": 7.40609846743004e-08, "loss": 0.0565, "step": 17121 }, { "epoch": 2.774141283214517, "grad_norm": 0.8559312224388123, "learning_rate": 7.395536855438923e-08, "loss": 0.0582, "step": 17122 }, { "epoch": 2.774303305249514, "grad_norm": 1.033912181854248, "learning_rate": 7.384982666557322e-08, "loss": 0.0567, "step": 17123 }, { "epoch": 2.774465327284511, "grad_norm": 0.8947577476501465, "learning_rate": 7.37443590110809e-08, "loss": 0.0601, "step": 17124 }, { "epoch": 2.7746273493195073, "grad_norm": 0.901314377784729, "learning_rate": 7.363896559414024e-08, "loss": 0.0593, "step": 17125 }, { "epoch": 2.774789371354504, "grad_norm": 0.8654842972755432, "learning_rate": 7.353364641797533e-08, "loss": 0.0636, "step": 17126 }, { "epoch": 2.774951393389501, "grad_norm": 0.9468207955360413, "learning_rate": 7.342840148580888e-08, "loss": 0.0626, "step": 17127 }, { "epoch": 2.7751134154244976, "grad_norm": 0.8542966246604919, "learning_rate": 7.332323080086106e-08, "loss": 0.0542, "step": 17128 }, { "epoch": 2.7752754374594946, "grad_norm": 0.9125382900238037, "learning_rate": 7.321813436635044e-08, "loss": 0.0621, "step": 17129 }, { "epoch": 2.7754374594944915, "grad_norm": 0.9692714810371399, "learning_rate": 7.311311218549166e-08, "loss": 0.0604, "step": 17130 }, { "epoch": 2.775599481529488, "grad_norm": 0.8632164597511292, "learning_rate": 7.300816426149854e-08, "loss": 0.0619, "step": 17131 }, { "epoch": 2.775761503564485, "grad_norm": 0.9177026748657227, "learning_rate": 7.290329059758294e-08, "loss": 0.0648, "step": 17132 }, { "epoch": 2.7759235255994814, "grad_norm": 0.9254752993583679, "learning_rate": 7.279849119695314e-08, "loss": 0.0649, "step": 17133 }, { "epoch": 2.7760855476344783, "grad_norm": 1.0560814142227173, "learning_rate": 7.269376606281547e-08, "loss": 0.0711, "step": 17134 }, { "epoch": 2.776247569669475, "grad_norm": 0.8399720788002014, "learning_rate": 7.258911519837486e-08, "loss": 0.0619, "step": 17135 }, { "epoch": 2.7764095917044718, "grad_norm": 0.8254349827766418, "learning_rate": 7.248453860683291e-08, "loss": 0.0566, "step": 17136 }, { "epoch": 2.7765716137394687, "grad_norm": 0.8527083992958069, "learning_rate": 7.238003629138957e-08, "loss": 0.0586, "step": 17137 }, { "epoch": 2.776733635774465, "grad_norm": 1.0240825414657593, "learning_rate": 7.227560825524255e-08, "loss": 0.0664, "step": 17138 }, { "epoch": 2.776895657809462, "grad_norm": 1.000952124595642, "learning_rate": 7.21712545015868e-08, "loss": 0.0619, "step": 17139 }, { "epoch": 2.777057679844459, "grad_norm": 0.9190289378166199, "learning_rate": 7.20669750336156e-08, "loss": 0.064, "step": 17140 }, { "epoch": 2.7772197018794555, "grad_norm": 0.967212975025177, "learning_rate": 7.196276985451916e-08, "loss": 0.0608, "step": 17141 }, { "epoch": 2.7773817239144525, "grad_norm": 0.7784137725830078, "learning_rate": 7.185863896748662e-08, "loss": 0.0556, "step": 17142 }, { "epoch": 2.777543745949449, "grad_norm": 0.9892643094062805, "learning_rate": 7.175458237570349e-08, "loss": 0.0673, "step": 17143 }, { "epoch": 2.777705767984446, "grad_norm": 0.872578501701355, "learning_rate": 7.165060008235414e-08, "loss": 0.0567, "step": 17144 }, { "epoch": 2.7778677900194424, "grad_norm": 0.8793370723724365, "learning_rate": 7.154669209061965e-08, "loss": 0.0538, "step": 17145 }, { "epoch": 2.7780298120544393, "grad_norm": 1.0323148965835571, "learning_rate": 7.14428584036797e-08, "loss": 0.058, "step": 17146 }, { "epoch": 2.7781918340894363, "grad_norm": 0.8175366520881653, "learning_rate": 7.133909902471147e-08, "loss": 0.0573, "step": 17147 }, { "epoch": 2.7783538561244328, "grad_norm": 0.9729697108268738, "learning_rate": 7.123541395688966e-08, "loss": 0.0602, "step": 17148 }, { "epoch": 2.7785158781594297, "grad_norm": 0.9307907223701477, "learning_rate": 7.113180320338642e-08, "loss": 0.0591, "step": 17149 }, { "epoch": 2.7786779001944266, "grad_norm": 0.8893921375274658, "learning_rate": 7.102826676737202e-08, "loss": 0.0552, "step": 17150 }, { "epoch": 2.778839922229423, "grad_norm": 0.8809131979942322, "learning_rate": 7.0924804652015e-08, "loss": 0.0638, "step": 17151 }, { "epoch": 2.77900194426442, "grad_norm": 0.9402658939361572, "learning_rate": 7.082141686048066e-08, "loss": 0.0621, "step": 17152 }, { "epoch": 2.779163966299417, "grad_norm": 1.108227014541626, "learning_rate": 7.071810339593254e-08, "loss": 0.0662, "step": 17153 }, { "epoch": 2.7793259883344135, "grad_norm": 0.9297798871994019, "learning_rate": 7.061486426153146e-08, "loss": 0.0619, "step": 17154 }, { "epoch": 2.7794880103694104, "grad_norm": 0.8598953485488892, "learning_rate": 7.051169946043685e-08, "loss": 0.065, "step": 17155 }, { "epoch": 2.779650032404407, "grad_norm": 1.2274415493011475, "learning_rate": 7.040860899580475e-08, "loss": 0.0584, "step": 17156 }, { "epoch": 2.779812054439404, "grad_norm": 0.8859155774116516, "learning_rate": 7.030559287078992e-08, "loss": 0.0601, "step": 17157 }, { "epoch": 2.7799740764744003, "grad_norm": 0.8546262979507446, "learning_rate": 7.020265108854423e-08, "loss": 0.0584, "step": 17158 }, { "epoch": 2.7801360985093972, "grad_norm": 0.8823912739753723, "learning_rate": 7.009978365221687e-08, "loss": 0.0592, "step": 17159 }, { "epoch": 2.780298120544394, "grad_norm": 0.9547003507614136, "learning_rate": 6.99969905649564e-08, "loss": 0.0606, "step": 17160 }, { "epoch": 2.7804601425793907, "grad_norm": 0.7352187633514404, "learning_rate": 6.989427182990727e-08, "loss": 0.0517, "step": 17161 }, { "epoch": 2.7806221646143876, "grad_norm": 0.7668613791465759, "learning_rate": 6.979162745021306e-08, "loss": 0.0512, "step": 17162 }, { "epoch": 2.7807841866493845, "grad_norm": 0.9652883410453796, "learning_rate": 6.968905742901405e-08, "loss": 0.0578, "step": 17163 }, { "epoch": 2.780946208684381, "grad_norm": 0.8641253709793091, "learning_rate": 6.958656176944801e-08, "loss": 0.0599, "step": 17164 }, { "epoch": 2.781108230719378, "grad_norm": 0.9378582835197449, "learning_rate": 6.94841404746524e-08, "loss": 0.0599, "step": 17165 }, { "epoch": 2.7812702527543745, "grad_norm": 0.902350664138794, "learning_rate": 6.938179354776003e-08, "loss": 0.0588, "step": 17166 }, { "epoch": 2.7814322747893714, "grad_norm": 0.9747065305709839, "learning_rate": 6.927952099190282e-08, "loss": 0.0606, "step": 17167 }, { "epoch": 2.781594296824368, "grad_norm": 0.9471100568771362, "learning_rate": 6.917732281020995e-08, "loss": 0.0641, "step": 17168 }, { "epoch": 2.781756318859365, "grad_norm": 0.9175786972045898, "learning_rate": 6.907519900580862e-08, "loss": 0.0576, "step": 17169 }, { "epoch": 2.7819183408943617, "grad_norm": 0.8128597736358643, "learning_rate": 6.897314958182327e-08, "loss": 0.0569, "step": 17170 }, { "epoch": 2.7820803629293582, "grad_norm": 1.0002559423446655, "learning_rate": 6.887117454137698e-08, "loss": 0.0681, "step": 17171 }, { "epoch": 2.782242384964355, "grad_norm": 0.9145398736000061, "learning_rate": 6.87692738875892e-08, "loss": 0.0587, "step": 17172 }, { "epoch": 2.782404406999352, "grad_norm": 0.9303719997406006, "learning_rate": 6.866744762357852e-08, "loss": 0.06, "step": 17173 }, { "epoch": 2.7825664290343486, "grad_norm": 0.9759852886199951, "learning_rate": 6.856569575245969e-08, "loss": 0.062, "step": 17174 }, { "epoch": 2.7827284510693455, "grad_norm": 0.9545977711677551, "learning_rate": 6.846401827734689e-08, "loss": 0.0615, "step": 17175 }, { "epoch": 2.7828904731043425, "grad_norm": 0.8938077092170715, "learning_rate": 6.836241520135123e-08, "loss": 0.0616, "step": 17176 }, { "epoch": 2.783052495139339, "grad_norm": 1.1049113273620605, "learning_rate": 6.826088652758106e-08, "loss": 0.0655, "step": 17177 }, { "epoch": 2.7832145171743354, "grad_norm": 0.866930365562439, "learning_rate": 6.815943225914278e-08, "loss": 0.0569, "step": 17178 }, { "epoch": 2.7833765392093324, "grad_norm": 0.9813682436943054, "learning_rate": 6.805805239914087e-08, "loss": 0.0615, "step": 17179 }, { "epoch": 2.7835385612443293, "grad_norm": 1.0694559812545776, "learning_rate": 6.795674695067783e-08, "loss": 0.0656, "step": 17180 }, { "epoch": 2.783700583279326, "grad_norm": 0.8712436556816101, "learning_rate": 6.785551591685257e-08, "loss": 0.0597, "step": 17181 }, { "epoch": 2.7838626053143227, "grad_norm": 0.7749736905097961, "learning_rate": 6.77543593007629e-08, "loss": 0.0492, "step": 17182 }, { "epoch": 2.7840246273493197, "grad_norm": 0.8556262850761414, "learning_rate": 6.765327710550412e-08, "loss": 0.062, "step": 17183 }, { "epoch": 2.784186649384316, "grad_norm": 0.953547477722168, "learning_rate": 6.755226933416876e-08, "loss": 0.0589, "step": 17184 }, { "epoch": 2.784348671419313, "grad_norm": 0.8861910104751587, "learning_rate": 6.745133598984737e-08, "loss": 0.0543, "step": 17185 }, { "epoch": 2.78451069345431, "grad_norm": 1.0571638345718384, "learning_rate": 6.735047707562863e-08, "loss": 0.0595, "step": 17186 }, { "epoch": 2.7846727154893065, "grad_norm": 1.2609553337097168, "learning_rate": 6.72496925945984e-08, "loss": 0.0701, "step": 17187 }, { "epoch": 2.7848347375243034, "grad_norm": 0.9145808815956116, "learning_rate": 6.714898254984031e-08, "loss": 0.062, "step": 17188 }, { "epoch": 2.7849967595593, "grad_norm": 0.9036248326301575, "learning_rate": 6.704834694443608e-08, "loss": 0.0625, "step": 17189 }, { "epoch": 2.785158781594297, "grad_norm": 0.834865391254425, "learning_rate": 6.69477857814646e-08, "loss": 0.0523, "step": 17190 }, { "epoch": 2.7853208036292934, "grad_norm": 0.823307991027832, "learning_rate": 6.684729906400344e-08, "loss": 0.0572, "step": 17191 }, { "epoch": 2.7854828256642903, "grad_norm": 1.1076858043670654, "learning_rate": 6.674688679512654e-08, "loss": 0.0676, "step": 17192 }, { "epoch": 2.785644847699287, "grad_norm": 0.8785462975502014, "learning_rate": 6.66465489779064e-08, "loss": 0.0596, "step": 17193 }, { "epoch": 2.7858068697342837, "grad_norm": 0.8788356781005859, "learning_rate": 6.654628561541337e-08, "loss": 0.0625, "step": 17194 }, { "epoch": 2.7859688917692806, "grad_norm": 0.8812581896781921, "learning_rate": 6.644609671071556e-08, "loss": 0.0609, "step": 17195 }, { "epoch": 2.7861309138042776, "grad_norm": 0.9713631272315979, "learning_rate": 6.634598226687772e-08, "loss": 0.0644, "step": 17196 }, { "epoch": 2.786292935839274, "grad_norm": 0.881287693977356, "learning_rate": 6.624594228696323e-08, "loss": 0.0618, "step": 17197 }, { "epoch": 2.786454957874271, "grad_norm": 0.8775894641876221, "learning_rate": 6.614597677403384e-08, "loss": 0.0579, "step": 17198 }, { "epoch": 2.786616979909268, "grad_norm": 0.877691388130188, "learning_rate": 6.604608573114735e-08, "loss": 0.0617, "step": 17199 }, { "epoch": 2.7867790019442644, "grad_norm": 0.8632456064224243, "learning_rate": 6.594626916136077e-08, "loss": 0.0617, "step": 17200 }, { "epoch": 2.786941023979261, "grad_norm": 1.1622703075408936, "learning_rate": 6.584652706772804e-08, "loss": 0.0661, "step": 17201 }, { "epoch": 2.787103046014258, "grad_norm": 0.9076300263404846, "learning_rate": 6.574685945330145e-08, "loss": 0.0588, "step": 17202 }, { "epoch": 2.787265068049255, "grad_norm": 0.931695282459259, "learning_rate": 6.564726632112939e-08, "loss": 0.0568, "step": 17203 }, { "epoch": 2.7874270900842513, "grad_norm": 1.018306016921997, "learning_rate": 6.554774767426026e-08, "loss": 0.0644, "step": 17204 }, { "epoch": 2.787589112119248, "grad_norm": 0.9979287981987, "learning_rate": 6.544830351573883e-08, "loss": 0.0625, "step": 17205 }, { "epoch": 2.787751134154245, "grad_norm": 0.8496156334877014, "learning_rate": 6.534893384860824e-08, "loss": 0.0638, "step": 17206 }, { "epoch": 2.7879131561892416, "grad_norm": 0.8916325569152832, "learning_rate": 6.524963867590772e-08, "loss": 0.0595, "step": 17207 }, { "epoch": 2.7880751782242386, "grad_norm": 0.9312731027603149, "learning_rate": 6.515041800067678e-08, "loss": 0.0627, "step": 17208 }, { "epoch": 2.7882372002592355, "grad_norm": 0.8041099309921265, "learning_rate": 6.505127182595106e-08, "loss": 0.0594, "step": 17209 }, { "epoch": 2.788399222294232, "grad_norm": 0.9212023019790649, "learning_rate": 6.495220015476366e-08, "loss": 0.0539, "step": 17210 }, { "epoch": 2.788561244329229, "grad_norm": 0.9335891008377075, "learning_rate": 6.485320299014608e-08, "loss": 0.0572, "step": 17211 }, { "epoch": 2.7887232663642254, "grad_norm": 0.9345332384109497, "learning_rate": 6.475428033512754e-08, "loss": 0.0667, "step": 17212 }, { "epoch": 2.7888852883992223, "grad_norm": 0.938679575920105, "learning_rate": 6.465543219273507e-08, "loss": 0.0602, "step": 17213 }, { "epoch": 2.789047310434219, "grad_norm": 0.8519788980484009, "learning_rate": 6.455665856599291e-08, "loss": 0.05, "step": 17214 }, { "epoch": 2.7892093324692158, "grad_norm": 1.0343631505966187, "learning_rate": 6.445795945792338e-08, "loss": 0.0649, "step": 17215 }, { "epoch": 2.7893713545042127, "grad_norm": 0.935124933719635, "learning_rate": 6.435933487154627e-08, "loss": 0.0668, "step": 17216 }, { "epoch": 2.789533376539209, "grad_norm": 0.9516078233718872, "learning_rate": 6.426078480987947e-08, "loss": 0.0628, "step": 17217 }, { "epoch": 2.789695398574206, "grad_norm": 1.1208375692367554, "learning_rate": 6.416230927593803e-08, "loss": 0.0603, "step": 17218 }, { "epoch": 2.789857420609203, "grad_norm": 0.8511807918548584, "learning_rate": 6.406390827273567e-08, "loss": 0.0597, "step": 17219 }, { "epoch": 2.7900194426441995, "grad_norm": 0.8629888296127319, "learning_rate": 6.39655818032825e-08, "loss": 0.0586, "step": 17220 }, { "epoch": 2.7901814646791965, "grad_norm": 1.064953088760376, "learning_rate": 6.386732987058802e-08, "loss": 0.0625, "step": 17221 }, { "epoch": 2.790343486714193, "grad_norm": 0.9076513648033142, "learning_rate": 6.376915247765735e-08, "loss": 0.0551, "step": 17222 }, { "epoch": 2.79050550874919, "grad_norm": 0.8662562966346741, "learning_rate": 6.367104962749504e-08, "loss": 0.0561, "step": 17223 }, { "epoch": 2.7906675307841864, "grad_norm": 0.8259896636009216, "learning_rate": 6.357302132310338e-08, "loss": 0.054, "step": 17224 }, { "epoch": 2.7908295528191833, "grad_norm": 0.8116698265075684, "learning_rate": 6.347506756748084e-08, "loss": 0.0535, "step": 17225 }, { "epoch": 2.7909915748541803, "grad_norm": 0.8382045030593872, "learning_rate": 6.337718836362473e-08, "loss": 0.056, "step": 17226 }, { "epoch": 2.7911535968891767, "grad_norm": 0.8277113437652588, "learning_rate": 6.327938371453069e-08, "loss": 0.0618, "step": 17227 }, { "epoch": 2.7913156189241737, "grad_norm": 1.0887233018875122, "learning_rate": 6.318165362319023e-08, "loss": 0.0643, "step": 17228 }, { "epoch": 2.7914776409591706, "grad_norm": 0.9287629723548889, "learning_rate": 6.308399809259457e-08, "loss": 0.0593, "step": 17229 }, { "epoch": 2.791639662994167, "grad_norm": 0.8305715918540955, "learning_rate": 6.298641712573105e-08, "loss": 0.057, "step": 17230 }, { "epoch": 2.791801685029164, "grad_norm": 0.8574482202529907, "learning_rate": 6.28889107255859e-08, "loss": 0.0548, "step": 17231 }, { "epoch": 2.791963707064161, "grad_norm": 0.9703893661499023, "learning_rate": 6.279147889514226e-08, "loss": 0.0562, "step": 17232 }, { "epoch": 2.7921257290991575, "grad_norm": 0.9155253767967224, "learning_rate": 6.269412163738137e-08, "loss": 0.0623, "step": 17233 }, { "epoch": 2.7922877511341544, "grad_norm": 0.8451345562934875, "learning_rate": 6.259683895528251e-08, "loss": 0.0535, "step": 17234 }, { "epoch": 2.792449773169151, "grad_norm": 0.997577965259552, "learning_rate": 6.249963085182192e-08, "loss": 0.0666, "step": 17235 }, { "epoch": 2.792611795204148, "grad_norm": 1.17750883102417, "learning_rate": 6.24024973299736e-08, "loss": 0.0615, "step": 17236 }, { "epoch": 2.7927738172391443, "grad_norm": 0.9555923938751221, "learning_rate": 6.230543839271019e-08, "loss": 0.0606, "step": 17237 }, { "epoch": 2.7929358392741412, "grad_norm": 1.071610689163208, "learning_rate": 6.220845404300124e-08, "loss": 0.0687, "step": 17238 }, { "epoch": 2.793097861309138, "grad_norm": 0.7882108092308044, "learning_rate": 6.211154428381466e-08, "loss": 0.0511, "step": 17239 }, { "epoch": 2.7932598833441347, "grad_norm": 0.882218599319458, "learning_rate": 6.201470911811474e-08, "loss": 0.0581, "step": 17240 }, { "epoch": 2.7934219053791316, "grad_norm": 0.8951191902160645, "learning_rate": 6.191794854886496e-08, "loss": 0.0643, "step": 17241 }, { "epoch": 2.7935839274141285, "grad_norm": 0.8501166105270386, "learning_rate": 6.182126257902626e-08, "loss": 0.0549, "step": 17242 }, { "epoch": 2.793745949449125, "grad_norm": 1.0171254873275757, "learning_rate": 6.17246512115563e-08, "loss": 0.0557, "step": 17243 }, { "epoch": 2.793907971484122, "grad_norm": 0.9592586159706116, "learning_rate": 6.162811444941159e-08, "loss": 0.0647, "step": 17244 }, { "epoch": 2.7940699935191184, "grad_norm": 0.7187346816062927, "learning_rate": 6.153165229554587e-08, "loss": 0.043, "step": 17245 }, { "epoch": 2.7942320155541154, "grad_norm": 0.9293650984764099, "learning_rate": 6.143526475291067e-08, "loss": 0.0611, "step": 17246 }, { "epoch": 2.794394037589112, "grad_norm": 0.919105052947998, "learning_rate": 6.133895182445504e-08, "loss": 0.0585, "step": 17247 }, { "epoch": 2.794556059624109, "grad_norm": 0.9237651228904724, "learning_rate": 6.124271351312605e-08, "loss": 0.0615, "step": 17248 }, { "epoch": 2.7947180816591057, "grad_norm": 0.9139578938484192, "learning_rate": 6.114654982186829e-08, "loss": 0.0635, "step": 17249 }, { "epoch": 2.7948801036941022, "grad_norm": 1.0389397144317627, "learning_rate": 6.105046075362441e-08, "loss": 0.0565, "step": 17250 }, { "epoch": 2.795042125729099, "grad_norm": 0.9643591046333313, "learning_rate": 6.095444631133401e-08, "loss": 0.0637, "step": 17251 }, { "epoch": 2.795204147764096, "grad_norm": 0.9869605898857117, "learning_rate": 6.085850649793529e-08, "loss": 0.0612, "step": 17252 }, { "epoch": 2.7953661697990926, "grad_norm": 1.068179965019226, "learning_rate": 6.076264131636394e-08, "loss": 0.0603, "step": 17253 }, { "epoch": 2.7955281918340895, "grad_norm": 0.8888418674468994, "learning_rate": 6.066685076955264e-08, "loss": 0.0567, "step": 17254 }, { "epoch": 2.7956902138690864, "grad_norm": 0.759746253490448, "learning_rate": 6.057113486043236e-08, "loss": 0.0525, "step": 17255 }, { "epoch": 2.795852235904083, "grad_norm": 1.0633058547973633, "learning_rate": 6.047549359193245e-08, "loss": 0.0631, "step": 17256 }, { "epoch": 2.79601425793908, "grad_norm": 0.906599223613739, "learning_rate": 6.037992696697914e-08, "loss": 0.0616, "step": 17257 }, { "epoch": 2.7961762799740764, "grad_norm": 0.8136864900588989, "learning_rate": 6.028443498849596e-08, "loss": 0.0528, "step": 17258 }, { "epoch": 2.7963383020090733, "grad_norm": 0.992445170879364, "learning_rate": 6.018901765940499e-08, "loss": 0.0618, "step": 17259 }, { "epoch": 2.79650032404407, "grad_norm": 0.8325508236885071, "learning_rate": 6.009367498262587e-08, "loss": 0.05, "step": 17260 }, { "epoch": 2.7966623460790667, "grad_norm": 0.7613760232925415, "learning_rate": 5.999840696107595e-08, "loss": 0.0488, "step": 17261 }, { "epoch": 2.7968243681140637, "grad_norm": 1.0377253293991089, "learning_rate": 5.990321359767015e-08, "loss": 0.068, "step": 17262 }, { "epoch": 2.79698639014906, "grad_norm": 1.0658155679702759, "learning_rate": 5.980809489532085e-08, "loss": 0.0616, "step": 17263 }, { "epoch": 2.797148412184057, "grad_norm": 0.9770277142524719, "learning_rate": 5.97130508569388e-08, "loss": 0.0582, "step": 17264 }, { "epoch": 2.797310434219054, "grad_norm": 0.9497054219245911, "learning_rate": 5.961808148543219e-08, "loss": 0.0561, "step": 17265 }, { "epoch": 2.7974724562540505, "grad_norm": 0.9469033479690552, "learning_rate": 5.95231867837065e-08, "loss": 0.0573, "step": 17266 }, { "epoch": 2.7976344782890474, "grad_norm": 0.9465834498405457, "learning_rate": 5.9428366754665234e-08, "loss": 0.061, "step": 17267 }, { "epoch": 2.797796500324044, "grad_norm": 0.92009437084198, "learning_rate": 5.933362140121052e-08, "loss": 0.0604, "step": 17268 }, { "epoch": 2.797958522359041, "grad_norm": 0.8519408702850342, "learning_rate": 5.923895072624031e-08, "loss": 0.0618, "step": 17269 }, { "epoch": 2.7981205443940373, "grad_norm": 1.1452556848526, "learning_rate": 5.9144354732651455e-08, "loss": 0.0634, "step": 17270 }, { "epoch": 2.7982825664290343, "grad_norm": 0.9965704679489136, "learning_rate": 5.904983342333887e-08, "loss": 0.0651, "step": 17271 }, { "epoch": 2.798444588464031, "grad_norm": 0.9546822905540466, "learning_rate": 5.8955386801194394e-08, "loss": 0.0615, "step": 17272 }, { "epoch": 2.7986066104990277, "grad_norm": 0.8720921277999878, "learning_rate": 5.886101486910767e-08, "loss": 0.058, "step": 17273 }, { "epoch": 2.7987686325340246, "grad_norm": 0.9746670722961426, "learning_rate": 5.8766717629966387e-08, "loss": 0.0621, "step": 17274 }, { "epoch": 2.7989306545690216, "grad_norm": 0.9597407579421997, "learning_rate": 5.867249508665629e-08, "loss": 0.062, "step": 17275 }, { "epoch": 2.799092676604018, "grad_norm": 1.0109037160873413, "learning_rate": 5.857834724205979e-08, "loss": 0.0593, "step": 17276 }, { "epoch": 2.799254698639015, "grad_norm": 0.8651391863822937, "learning_rate": 5.848427409905766e-08, "loss": 0.0586, "step": 17277 }, { "epoch": 2.799416720674012, "grad_norm": 0.9493432641029358, "learning_rate": 5.839027566052841e-08, "loss": 0.0629, "step": 17278 }, { "epoch": 2.7995787427090084, "grad_norm": 0.9215166568756104, "learning_rate": 5.829635192934807e-08, "loss": 0.0638, "step": 17279 }, { "epoch": 2.7997407647440054, "grad_norm": 0.8210483193397522, "learning_rate": 5.820250290839047e-08, "loss": 0.0543, "step": 17280 }, { "epoch": 2.799902786779002, "grad_norm": 0.8877785801887512, "learning_rate": 5.810872860052747e-08, "loss": 0.0562, "step": 17281 }, { "epoch": 2.8000648088139988, "grad_norm": 0.9447657465934753, "learning_rate": 5.801502900862788e-08, "loss": 0.0624, "step": 17282 }, { "epoch": 2.8002268308489953, "grad_norm": 1.0124446153640747, "learning_rate": 5.7921404135559414e-08, "loss": 0.0577, "step": 17283 }, { "epoch": 2.800388852883992, "grad_norm": 1.1428158283233643, "learning_rate": 5.782785398418561e-08, "loss": 0.0618, "step": 17284 }, { "epoch": 2.800550874918989, "grad_norm": 1.0584673881530762, "learning_rate": 5.773437855736974e-08, "loss": 0.0651, "step": 17285 }, { "epoch": 2.8007128969539856, "grad_norm": 0.9388712048530579, "learning_rate": 5.7640977857972016e-08, "loss": 0.0651, "step": 17286 }, { "epoch": 2.8008749189889826, "grad_norm": 0.9077460765838623, "learning_rate": 5.7547651888849864e-08, "loss": 0.0635, "step": 17287 }, { "epoch": 2.8010369410239795, "grad_norm": 0.8643128275871277, "learning_rate": 5.745440065285879e-08, "loss": 0.0568, "step": 17288 }, { "epoch": 2.801198963058976, "grad_norm": 0.9081558585166931, "learning_rate": 5.736122415285206e-08, "loss": 0.0654, "step": 17289 }, { "epoch": 2.801360985093973, "grad_norm": 0.9258993864059448, "learning_rate": 5.726812239168128e-08, "loss": 0.0616, "step": 17290 }, { "epoch": 2.8015230071289694, "grad_norm": 0.9604719281196594, "learning_rate": 5.717509537219418e-08, "loss": 0.0643, "step": 17291 }, { "epoch": 2.8016850291639663, "grad_norm": 0.8643040060997009, "learning_rate": 5.708214309723792e-08, "loss": 0.0642, "step": 17292 }, { "epoch": 2.801847051198963, "grad_norm": 0.8756396770477295, "learning_rate": 5.6989265569656335e-08, "loss": 0.0583, "step": 17293 }, { "epoch": 2.8020090732339598, "grad_norm": 0.7664240598678589, "learning_rate": 5.689646279229105e-08, "loss": 0.0532, "step": 17294 }, { "epoch": 2.8021710952689567, "grad_norm": 0.7147080898284912, "learning_rate": 5.680373476798201e-08, "loss": 0.0479, "step": 17295 }, { "epoch": 2.802333117303953, "grad_norm": 0.7867782711982727, "learning_rate": 5.671108149956611e-08, "loss": 0.0516, "step": 17296 }, { "epoch": 2.80249513933895, "grad_norm": 0.9071274399757385, "learning_rate": 5.661850298987859e-08, "loss": 0.0572, "step": 17297 }, { "epoch": 2.802657161373947, "grad_norm": 0.8463209867477417, "learning_rate": 5.6525999241751894e-08, "loss": 0.056, "step": 17298 }, { "epoch": 2.8028191834089435, "grad_norm": 1.1977580785751343, "learning_rate": 5.643357025801655e-08, "loss": 0.0617, "step": 17299 }, { "epoch": 2.8029812054439405, "grad_norm": 0.9107047319412231, "learning_rate": 5.6341216041500555e-08, "loss": 0.061, "step": 17300 }, { "epoch": 2.8031432274789374, "grad_norm": 0.907247006893158, "learning_rate": 5.624893659503028e-08, "loss": 0.0526, "step": 17301 }, { "epoch": 2.803305249513934, "grad_norm": 0.8431805968284607, "learning_rate": 5.6156731921428455e-08, "loss": 0.0576, "step": 17302 }, { "epoch": 2.8034672715489304, "grad_norm": 1.0197104215621948, "learning_rate": 5.6064602023516154e-08, "loss": 0.0609, "step": 17303 }, { "epoch": 2.8036292935839273, "grad_norm": 0.908807098865509, "learning_rate": 5.597254690411363e-08, "loss": 0.061, "step": 17304 }, { "epoch": 2.8037913156189243, "grad_norm": 0.8455592393875122, "learning_rate": 5.588056656603641e-08, "loss": 0.0588, "step": 17305 }, { "epoch": 2.8039533376539207, "grad_norm": 0.9535689949989319, "learning_rate": 5.5788661012099176e-08, "loss": 0.0598, "step": 17306 }, { "epoch": 2.8041153596889177, "grad_norm": 0.8808077573776245, "learning_rate": 5.5696830245114134e-08, "loss": 0.0658, "step": 17307 }, { "epoch": 2.8042773817239146, "grad_norm": 0.8899673819541931, "learning_rate": 5.560507426789069e-08, "loss": 0.0557, "step": 17308 }, { "epoch": 2.804439403758911, "grad_norm": 0.8185904622077942, "learning_rate": 5.551339308323689e-08, "loss": 0.0569, "step": 17309 }, { "epoch": 2.804601425793908, "grad_norm": 0.8157827258110046, "learning_rate": 5.5421786693957705e-08, "loss": 0.0508, "step": 17310 }, { "epoch": 2.804763447828905, "grad_norm": 0.9755039811134338, "learning_rate": 5.533025510285617e-08, "loss": 0.0592, "step": 17311 }, { "epoch": 2.8049254698639015, "grad_norm": 0.9864017367362976, "learning_rate": 5.523879831273282e-08, "loss": 0.0625, "step": 17312 }, { "epoch": 2.8050874918988984, "grad_norm": 0.8920227289199829, "learning_rate": 5.514741632638571e-08, "loss": 0.0634, "step": 17313 }, { "epoch": 2.805249513933895, "grad_norm": 0.9825411438941956, "learning_rate": 5.505610914661147e-08, "loss": 0.0567, "step": 17314 }, { "epoch": 2.805411535968892, "grad_norm": 1.1181998252868652, "learning_rate": 5.496487677620399e-08, "loss": 0.0629, "step": 17315 }, { "epoch": 2.8055735580038883, "grad_norm": 1.0909833908081055, "learning_rate": 5.487371921795381e-08, "loss": 0.0687, "step": 17316 }, { "epoch": 2.8057355800388852, "grad_norm": 0.9423796534538269, "learning_rate": 5.478263647465093e-08, "loss": 0.0582, "step": 17317 }, { "epoch": 2.805897602073882, "grad_norm": 0.8542878031730652, "learning_rate": 5.4691628549082e-08, "loss": 0.0655, "step": 17318 }, { "epoch": 2.8060596241088787, "grad_norm": 0.9813777208328247, "learning_rate": 5.4600695444032014e-08, "loss": 0.0615, "step": 17319 }, { "epoch": 2.8062216461438756, "grad_norm": 0.9087584018707275, "learning_rate": 5.450983716228292e-08, "loss": 0.0576, "step": 17320 }, { "epoch": 2.8063836681788725, "grad_norm": 0.9132459759712219, "learning_rate": 5.441905370661471e-08, "loss": 0.0615, "step": 17321 }, { "epoch": 2.806545690213869, "grad_norm": 0.7650594115257263, "learning_rate": 5.4328345079805164e-08, "loss": 0.0522, "step": 17322 }, { "epoch": 2.806707712248866, "grad_norm": 0.9708273410797119, "learning_rate": 5.423771128462985e-08, "loss": 0.0676, "step": 17323 }, { "epoch": 2.806869734283863, "grad_norm": 0.949398934841156, "learning_rate": 5.4147152323862085e-08, "loss": 0.0591, "step": 17324 }, { "epoch": 2.8070317563188594, "grad_norm": 0.8356457352638245, "learning_rate": 5.405666820027272e-08, "loss": 0.0577, "step": 17325 }, { "epoch": 2.807193778353856, "grad_norm": 0.9790476560592651, "learning_rate": 5.3966258916629824e-08, "loss": 0.0585, "step": 17326 }, { "epoch": 2.807355800388853, "grad_norm": 0.979407548904419, "learning_rate": 5.387592447570061e-08, "loss": 0.0618, "step": 17327 }, { "epoch": 2.8075178224238497, "grad_norm": 0.8905614614486694, "learning_rate": 5.378566488024817e-08, "loss": 0.0569, "step": 17328 }, { "epoch": 2.807679844458846, "grad_norm": 0.8756917715072632, "learning_rate": 5.3695480133034994e-08, "loss": 0.0593, "step": 17329 }, { "epoch": 2.807841866493843, "grad_norm": 0.8476364016532898, "learning_rate": 5.3605370236820276e-08, "loss": 0.0571, "step": 17330 }, { "epoch": 2.80800388852884, "grad_norm": 1.1375116109848022, "learning_rate": 5.3515335194360694e-08, "loss": 0.0673, "step": 17331 }, { "epoch": 2.8081659105638366, "grad_norm": 0.9701889157295227, "learning_rate": 5.3425375008411276e-08, "loss": 0.0694, "step": 17332 }, { "epoch": 2.8083279325988335, "grad_norm": 0.8260276913642883, "learning_rate": 5.333548968172536e-08, "loss": 0.0599, "step": 17333 }, { "epoch": 2.8084899546338304, "grad_norm": 0.8484598994255066, "learning_rate": 5.3245679217052424e-08, "loss": 0.0548, "step": 17334 }, { "epoch": 2.808651976668827, "grad_norm": 0.9745419025421143, "learning_rate": 5.315594361714083e-08, "loss": 0.062, "step": 17335 }, { "epoch": 2.808813998703824, "grad_norm": 0.868465006351471, "learning_rate": 5.3066282884735863e-08, "loss": 0.0614, "step": 17336 }, { "epoch": 2.8089760207388204, "grad_norm": 0.9748360514640808, "learning_rate": 5.297669702258118e-08, "loss": 0.0632, "step": 17337 }, { "epoch": 2.8091380427738173, "grad_norm": 1.0200855731964111, "learning_rate": 5.2887186033417914e-08, "loss": 0.0625, "step": 17338 }, { "epoch": 2.809300064808814, "grad_norm": 1.0314770936965942, "learning_rate": 5.279774991998499e-08, "loss": 0.0611, "step": 17339 }, { "epoch": 2.8094620868438107, "grad_norm": 1.0516095161437988, "learning_rate": 5.270838868501854e-08, "loss": 0.0644, "step": 17340 }, { "epoch": 2.8096241088788076, "grad_norm": 0.8740168809890747, "learning_rate": 5.261910233125333e-08, "loss": 0.0567, "step": 17341 }, { "epoch": 2.809786130913804, "grad_norm": 0.9460188746452332, "learning_rate": 5.252989086142107e-08, "loss": 0.0538, "step": 17342 }, { "epoch": 2.809948152948801, "grad_norm": 0.8696618676185608, "learning_rate": 5.244075427825124e-08, "loss": 0.0557, "step": 17343 }, { "epoch": 2.810110174983798, "grad_norm": 1.0212994813919067, "learning_rate": 5.235169258447137e-08, "loss": 0.0699, "step": 17344 }, { "epoch": 2.8102721970187945, "grad_norm": 1.036773443222046, "learning_rate": 5.2262705782806513e-08, "loss": 0.0664, "step": 17345 }, { "epoch": 2.8104342190537914, "grad_norm": 0.875264585018158, "learning_rate": 5.2173793875979204e-08, "loss": 0.0647, "step": 17346 }, { "epoch": 2.810596241088788, "grad_norm": 1.0210704803466797, "learning_rate": 5.208495686671061e-08, "loss": 0.0674, "step": 17347 }, { "epoch": 2.810758263123785, "grad_norm": 1.0680155754089355, "learning_rate": 5.199619475771856e-08, "loss": 0.0707, "step": 17348 }, { "epoch": 2.8109202851587813, "grad_norm": 0.8734906315803528, "learning_rate": 5.190750755171864e-08, "loss": 0.0533, "step": 17349 }, { "epoch": 2.8110823071937783, "grad_norm": 0.8121834397315979, "learning_rate": 5.181889525142453e-08, "loss": 0.0516, "step": 17350 }, { "epoch": 2.811244329228775, "grad_norm": 1.0527007579803467, "learning_rate": 5.1730357859547666e-08, "loss": 0.0645, "step": 17351 }, { "epoch": 2.8114063512637717, "grad_norm": 0.9521415829658508, "learning_rate": 5.164189537879782e-08, "loss": 0.0601, "step": 17352 }, { "epoch": 2.8115683732987686, "grad_norm": 0.9354329705238342, "learning_rate": 5.155350781188062e-08, "loss": 0.0609, "step": 17353 }, { "epoch": 2.8117303953337656, "grad_norm": 0.8666455149650574, "learning_rate": 5.146519516150084e-08, "loss": 0.0575, "step": 17354 }, { "epoch": 2.811892417368762, "grad_norm": 0.874804675579071, "learning_rate": 5.137695743036103e-08, "loss": 0.0606, "step": 17355 }, { "epoch": 2.812054439403759, "grad_norm": 0.96415114402771, "learning_rate": 5.128879462116071e-08, "loss": 0.0546, "step": 17356 }, { "epoch": 2.812216461438756, "grad_norm": 1.104056477546692, "learning_rate": 5.1200706736597435e-08, "loss": 0.0641, "step": 17357 }, { "epoch": 2.8123784834737524, "grad_norm": 0.9852107763290405, "learning_rate": 5.111269377936656e-08, "loss": 0.0684, "step": 17358 }, { "epoch": 2.8125405055087493, "grad_norm": 0.9444692730903625, "learning_rate": 5.10247557521612e-08, "loss": 0.0621, "step": 17359 }, { "epoch": 2.812702527543746, "grad_norm": 0.8795128464698792, "learning_rate": 5.093689265767143e-08, "loss": 0.0592, "step": 17360 }, { "epoch": 2.8128645495787428, "grad_norm": 0.9272993206977844, "learning_rate": 5.084910449858649e-08, "loss": 0.0582, "step": 17361 }, { "epoch": 2.8130265716137393, "grad_norm": 1.0397619009017944, "learning_rate": 5.0761391277591996e-08, "loss": 0.0598, "step": 17362 }, { "epoch": 2.813188593648736, "grad_norm": 0.9312973022460938, "learning_rate": 5.0673752997372204e-08, "loss": 0.0581, "step": 17363 }, { "epoch": 2.813350615683733, "grad_norm": 0.9002386331558228, "learning_rate": 5.05861896606083e-08, "loss": 0.0614, "step": 17364 }, { "epoch": 2.8135126377187296, "grad_norm": 0.9482325911521912, "learning_rate": 5.049870126997897e-08, "loss": 0.0642, "step": 17365 }, { "epoch": 2.8136746597537265, "grad_norm": 0.8579296469688416, "learning_rate": 5.0411287828162346e-08, "loss": 0.0602, "step": 17366 }, { "epoch": 2.8138366817887235, "grad_norm": 0.9267897605895996, "learning_rate": 5.032394933783213e-08, "loss": 0.0552, "step": 17367 }, { "epoch": 2.81399870382372, "grad_norm": 0.8635824918746948, "learning_rate": 5.023668580166091e-08, "loss": 0.057, "step": 17368 }, { "epoch": 2.814160725858717, "grad_norm": 1.0104823112487793, "learning_rate": 5.014949722231876e-08, "loss": 0.0652, "step": 17369 }, { "epoch": 2.8143227478937134, "grad_norm": 0.9497948884963989, "learning_rate": 5.0062383602473566e-08, "loss": 0.0546, "step": 17370 }, { "epoch": 2.8144847699287103, "grad_norm": 0.9637094140052795, "learning_rate": 4.9975344944790674e-08, "loss": 0.0576, "step": 17371 }, { "epoch": 2.814646791963707, "grad_norm": 0.904549241065979, "learning_rate": 4.9888381251933237e-08, "loss": 0.0664, "step": 17372 }, { "epoch": 2.8148088139987038, "grad_norm": 0.9141635894775391, "learning_rate": 4.980149252656219e-08, "loss": 0.0575, "step": 17373 }, { "epoch": 2.8149708360337007, "grad_norm": 0.8708399534225464, "learning_rate": 4.971467877133651e-08, "loss": 0.0565, "step": 17374 }, { "epoch": 2.815132858068697, "grad_norm": 0.9253936409950256, "learning_rate": 4.962793998891158e-08, "loss": 0.0568, "step": 17375 }, { "epoch": 2.815294880103694, "grad_norm": 1.0665290355682373, "learning_rate": 4.954127618194193e-08, "loss": 0.0641, "step": 17376 }, { "epoch": 2.815456902138691, "grad_norm": 0.9630182385444641, "learning_rate": 4.945468735307934e-08, "loss": 0.0655, "step": 17377 }, { "epoch": 2.8156189241736875, "grad_norm": 0.8089599609375, "learning_rate": 4.936817350497336e-08, "loss": 0.057, "step": 17378 }, { "epoch": 2.8157809462086845, "grad_norm": 0.85359787940979, "learning_rate": 4.9281734640270476e-08, "loss": 0.0535, "step": 17379 }, { "epoch": 2.8159429682436814, "grad_norm": 0.9590152502059937, "learning_rate": 4.919537076161579e-08, "loss": 0.0589, "step": 17380 }, { "epoch": 2.816104990278678, "grad_norm": 0.8613442182540894, "learning_rate": 4.910908187165248e-08, "loss": 0.0625, "step": 17381 }, { "epoch": 2.816267012313675, "grad_norm": 0.9294003248214722, "learning_rate": 4.90228679730198e-08, "loss": 0.0623, "step": 17382 }, { "epoch": 2.8164290343486713, "grad_norm": 0.906424343585968, "learning_rate": 4.893672906835623e-08, "loss": 0.058, "step": 17383 }, { "epoch": 2.8165910563836682, "grad_norm": 0.8201852440834045, "learning_rate": 4.8850665160297406e-08, "loss": 0.0538, "step": 17384 }, { "epoch": 2.8167530784186647, "grad_norm": 0.8930208086967468, "learning_rate": 4.8764676251476237e-08, "loss": 0.0556, "step": 17385 }, { "epoch": 2.8169151004536617, "grad_norm": 0.8145630955696106, "learning_rate": 4.867876234452423e-08, "loss": 0.0543, "step": 17386 }, { "epoch": 2.8170771224886586, "grad_norm": 1.011269211769104, "learning_rate": 4.859292344207012e-08, "loss": 0.0641, "step": 17387 }, { "epoch": 2.817239144523655, "grad_norm": 0.9331755638122559, "learning_rate": 4.85071595467404e-08, "loss": 0.0623, "step": 17388 }, { "epoch": 2.817401166558652, "grad_norm": 1.006993055343628, "learning_rate": 4.842147066115882e-08, "loss": 0.0664, "step": 17389 }, { "epoch": 2.817563188593649, "grad_norm": 0.8741045594215393, "learning_rate": 4.8335856787947447e-08, "loss": 0.0569, "step": 17390 }, { "epoch": 2.8177252106286454, "grad_norm": 0.9738301634788513, "learning_rate": 4.825031792972612e-08, "loss": 0.0626, "step": 17391 }, { "epoch": 2.8178872326636424, "grad_norm": 0.9677324295043945, "learning_rate": 4.81648540891122e-08, "loss": 0.0639, "step": 17392 }, { "epoch": 2.818049254698639, "grad_norm": 0.8875753283500671, "learning_rate": 4.807946526872026e-08, "loss": 0.0516, "step": 17393 }, { "epoch": 2.818211276733636, "grad_norm": 0.8754310607910156, "learning_rate": 4.799415147116265e-08, "loss": 0.0591, "step": 17394 }, { "epoch": 2.8183732987686323, "grad_norm": 0.9936445355415344, "learning_rate": 4.7908912699050906e-08, "loss": 0.0666, "step": 17395 }, { "epoch": 2.8185353208036292, "grad_norm": 0.9375000596046448, "learning_rate": 4.782374895499236e-08, "loss": 0.0646, "step": 17396 }, { "epoch": 2.818697342838626, "grad_norm": 0.9706060886383057, "learning_rate": 4.773866024159274e-08, "loss": 0.0558, "step": 17397 }, { "epoch": 2.8188593648736227, "grad_norm": 0.8691913485527039, "learning_rate": 4.7653646561455767e-08, "loss": 0.063, "step": 17398 }, { "epoch": 2.8190213869086196, "grad_norm": 1.1515320539474487, "learning_rate": 4.756870791718271e-08, "loss": 0.0665, "step": 17399 }, { "epoch": 2.8191834089436165, "grad_norm": 1.0879017114639282, "learning_rate": 4.7483844311372594e-08, "loss": 0.0659, "step": 17400 }, { "epoch": 2.819345430978613, "grad_norm": 0.8411441445350647, "learning_rate": 4.739905574662168e-08, "loss": 0.0525, "step": 17401 }, { "epoch": 2.81950745301361, "grad_norm": 0.8254294395446777, "learning_rate": 4.731434222552456e-08, "loss": 0.0584, "step": 17402 }, { "epoch": 2.819669475048607, "grad_norm": 0.8175220489501953, "learning_rate": 4.722970375067304e-08, "loss": 0.0581, "step": 17403 }, { "epoch": 2.8198314970836034, "grad_norm": 0.9433904886245728, "learning_rate": 4.7145140324657e-08, "loss": 0.0595, "step": 17404 }, { "epoch": 2.8199935191186, "grad_norm": 0.9785334467887878, "learning_rate": 4.7060651950064094e-08, "loss": 0.0633, "step": 17405 }, { "epoch": 2.820155541153597, "grad_norm": 0.7298676371574402, "learning_rate": 4.697623862947892e-08, "loss": 0.0528, "step": 17406 }, { "epoch": 2.8203175631885937, "grad_norm": 0.8374927639961243, "learning_rate": 4.689190036548524e-08, "loss": 0.0558, "step": 17407 }, { "epoch": 2.82047958522359, "grad_norm": 0.8497073650360107, "learning_rate": 4.680763716066239e-08, "loss": 0.0592, "step": 17408 }, { "epoch": 2.820641607258587, "grad_norm": 1.0084335803985596, "learning_rate": 4.672344901758941e-08, "loss": 0.0642, "step": 17409 }, { "epoch": 2.820803629293584, "grad_norm": 0.8744716644287109, "learning_rate": 4.663933593884229e-08, "loss": 0.0627, "step": 17410 }, { "epoch": 2.8209656513285806, "grad_norm": 0.8989715576171875, "learning_rate": 4.655529792699426e-08, "loss": 0.0603, "step": 17411 }, { "epoch": 2.8211276733635775, "grad_norm": 1.0203579664230347, "learning_rate": 4.6471334984616866e-08, "loss": 0.0635, "step": 17412 }, { "epoch": 2.8212896953985744, "grad_norm": 0.8238543272018433, "learning_rate": 4.6387447114278897e-08, "loss": 0.0526, "step": 17413 }, { "epoch": 2.821451717433571, "grad_norm": 0.872983455657959, "learning_rate": 4.6303634318548006e-08, "loss": 0.0611, "step": 17414 }, { "epoch": 2.821613739468568, "grad_norm": 0.9443795084953308, "learning_rate": 4.6219896599987714e-08, "loss": 0.0602, "step": 17415 }, { "epoch": 2.8217757615035644, "grad_norm": 0.8985307812690735, "learning_rate": 4.613623396116068e-08, "loss": 0.0577, "step": 17416 }, { "epoch": 2.8219377835385613, "grad_norm": 0.9005011916160583, "learning_rate": 4.6052646404626814e-08, "loss": 0.0627, "step": 17417 }, { "epoch": 2.8220998055735578, "grad_norm": 1.0429044961929321, "learning_rate": 4.596913393294322e-08, "loss": 0.0734, "step": 17418 }, { "epoch": 2.8222618276085547, "grad_norm": 0.9500260949134827, "learning_rate": 4.5885696548665645e-08, "loss": 0.0641, "step": 17419 }, { "epoch": 2.8224238496435516, "grad_norm": 0.828524112701416, "learning_rate": 4.580233425434677e-08, "loss": 0.0574, "step": 17420 }, { "epoch": 2.822585871678548, "grad_norm": 1.0398857593536377, "learning_rate": 4.57190470525376e-08, "loss": 0.0606, "step": 17421 }, { "epoch": 2.822747893713545, "grad_norm": 0.9271135926246643, "learning_rate": 4.563583494578638e-08, "loss": 0.0619, "step": 17422 }, { "epoch": 2.822909915748542, "grad_norm": 0.8910772800445557, "learning_rate": 4.555269793663886e-08, "loss": 0.0572, "step": 17423 }, { "epoch": 2.8230719377835385, "grad_norm": 0.9423297047615051, "learning_rate": 4.546963602763937e-08, "loss": 0.0671, "step": 17424 }, { "epoch": 2.8232339598185354, "grad_norm": 0.9918851256370544, "learning_rate": 4.5386649221329516e-08, "loss": 0.0641, "step": 17425 }, { "epoch": 2.8233959818535324, "grad_norm": 0.9373489022254944, "learning_rate": 4.530373752024753e-08, "loss": 0.0565, "step": 17426 }, { "epoch": 2.823558003888529, "grad_norm": 0.9793664216995239, "learning_rate": 4.5220900926931374e-08, "loss": 0.0552, "step": 17427 }, { "epoch": 2.8237200259235253, "grad_norm": 0.9216907024383545, "learning_rate": 4.51381394439146e-08, "loss": 0.0623, "step": 17428 }, { "epoch": 2.8238820479585223, "grad_norm": 0.9568885564804077, "learning_rate": 4.5055453073730715e-08, "loss": 0.0557, "step": 17429 }, { "epoch": 2.824044069993519, "grad_norm": 0.8096397519111633, "learning_rate": 4.497284181890882e-08, "loss": 0.0505, "step": 17430 }, { "epoch": 2.8242060920285157, "grad_norm": 0.8063415884971619, "learning_rate": 4.4890305681977164e-08, "loss": 0.0506, "step": 17431 }, { "epoch": 2.8243681140635126, "grad_norm": 0.9902795553207397, "learning_rate": 4.480784466546068e-08, "loss": 0.0676, "step": 17432 }, { "epoch": 2.8245301360985096, "grad_norm": 0.9459345936775208, "learning_rate": 4.4725458771882615e-08, "loss": 0.0634, "step": 17433 }, { "epoch": 2.824692158133506, "grad_norm": 0.8988990783691406, "learning_rate": 4.4643148003764015e-08, "loss": 0.0587, "step": 17434 }, { "epoch": 2.824854180168503, "grad_norm": 0.9229409694671631, "learning_rate": 4.456091236362314e-08, "loss": 0.0612, "step": 17435 }, { "epoch": 2.8250162022035, "grad_norm": 1.0969082117080688, "learning_rate": 4.44787518539766e-08, "loss": 0.0609, "step": 17436 }, { "epoch": 2.8251782242384964, "grad_norm": 0.863645076751709, "learning_rate": 4.4396666477337645e-08, "loss": 0.055, "step": 17437 }, { "epoch": 2.8253402462734933, "grad_norm": 0.9219480156898499, "learning_rate": 4.4314656236218444e-08, "loss": 0.061, "step": 17438 }, { "epoch": 2.82550226830849, "grad_norm": 1.2461076974868774, "learning_rate": 4.423272113312782e-08, "loss": 0.0572, "step": 17439 }, { "epoch": 2.8256642903434868, "grad_norm": 0.7902315258979797, "learning_rate": 4.415086117057377e-08, "loss": 0.0539, "step": 17440 }, { "epoch": 2.8258263123784833, "grad_norm": 0.904567301273346, "learning_rate": 4.406907635105984e-08, "loss": 0.0638, "step": 17441 }, { "epoch": 2.82598833441348, "grad_norm": 0.8270232081413269, "learning_rate": 4.398736667708875e-08, "loss": 0.0571, "step": 17442 }, { "epoch": 2.826150356448477, "grad_norm": 1.1331998109817505, "learning_rate": 4.390573215116101e-08, "loss": 0.0772, "step": 17443 }, { "epoch": 2.8263123784834736, "grad_norm": 0.8062843680381775, "learning_rate": 4.382417277577433e-08, "loss": 0.0547, "step": 17444 }, { "epoch": 2.8264744005184705, "grad_norm": 1.1029092073440552, "learning_rate": 4.374268855342395e-08, "loss": 0.065, "step": 17445 }, { "epoch": 2.8266364225534675, "grad_norm": 0.9821694493293762, "learning_rate": 4.3661279486603424e-08, "loss": 0.0602, "step": 17446 }, { "epoch": 2.826798444588464, "grad_norm": 0.8740617036819458, "learning_rate": 4.357994557780354e-08, "loss": 0.0604, "step": 17447 }, { "epoch": 2.826960466623461, "grad_norm": 1.0357604026794434, "learning_rate": 4.349868682951286e-08, "loss": 0.0618, "step": 17448 }, { "epoch": 2.8271224886584574, "grad_norm": 0.8919463753700256, "learning_rate": 4.3417503244217726e-08, "loss": 0.0608, "step": 17449 }, { "epoch": 2.8272845106934543, "grad_norm": 0.9080489873886108, "learning_rate": 4.333639482440199e-08, "loss": 0.0629, "step": 17450 }, { "epoch": 2.827446532728451, "grad_norm": 0.9267844557762146, "learning_rate": 4.3255361572547836e-08, "loss": 0.0615, "step": 17451 }, { "epoch": 2.8276085547634477, "grad_norm": 0.9971922039985657, "learning_rate": 4.3174403491134385e-08, "loss": 0.0687, "step": 17452 }, { "epoch": 2.8277705767984447, "grad_norm": 0.7694254517555237, "learning_rate": 4.309352058263855e-08, "loss": 0.0511, "step": 17453 }, { "epoch": 2.827932598833441, "grad_norm": 0.826542854309082, "learning_rate": 4.301271284953584e-08, "loss": 0.0516, "step": 17454 }, { "epoch": 2.828094620868438, "grad_norm": 0.7882339358329773, "learning_rate": 4.29319802942979e-08, "loss": 0.0527, "step": 17455 }, { "epoch": 2.828256642903435, "grad_norm": 0.8655419945716858, "learning_rate": 4.285132291939526e-08, "loss": 0.059, "step": 17456 }, { "epoch": 2.8284186649384315, "grad_norm": 0.8806518316268921, "learning_rate": 4.27707407272962e-08, "loss": 0.0605, "step": 17457 }, { "epoch": 2.8285806869734285, "grad_norm": 0.9758058786392212, "learning_rate": 4.2690233720466265e-08, "loss": 0.0649, "step": 17458 }, { "epoch": 2.8287427090084254, "grad_norm": 0.8468053340911865, "learning_rate": 4.2609801901368485e-08, "loss": 0.0556, "step": 17459 }, { "epoch": 2.828904731043422, "grad_norm": 0.8715428709983826, "learning_rate": 4.2529445272463946e-08, "loss": 0.0595, "step": 17460 }, { "epoch": 2.829066753078419, "grad_norm": 1.172911524772644, "learning_rate": 4.2449163836211507e-08, "loss": 0.0675, "step": 17461 }, { "epoch": 2.8292287751134153, "grad_norm": 0.836900532245636, "learning_rate": 4.2368957595067264e-08, "loss": 0.0587, "step": 17462 }, { "epoch": 2.8293907971484122, "grad_norm": 0.9022508263587952, "learning_rate": 4.228882655148564e-08, "loss": 0.0594, "step": 17463 }, { "epoch": 2.8295528191834087, "grad_norm": 0.8521215915679932, "learning_rate": 4.220877070791857e-08, "loss": 0.0583, "step": 17464 }, { "epoch": 2.8297148412184057, "grad_norm": 0.9187961220741272, "learning_rate": 4.2128790066815195e-08, "loss": 0.0621, "step": 17465 }, { "epoch": 2.8298768632534026, "grad_norm": 0.8813655972480774, "learning_rate": 4.204888463062273e-08, "loss": 0.0555, "step": 17466 }, { "epoch": 2.830038885288399, "grad_norm": 0.7997599840164185, "learning_rate": 4.1969054401786724e-08, "loss": 0.0553, "step": 17467 }, { "epoch": 2.830200907323396, "grad_norm": 0.8015317916870117, "learning_rate": 4.188929938274911e-08, "loss": 0.0537, "step": 17468 }, { "epoch": 2.830362929358393, "grad_norm": 0.8224532604217529, "learning_rate": 4.1809619575950425e-08, "loss": 0.0552, "step": 17469 }, { "epoch": 2.8305249513933894, "grad_norm": 1.1566404104232788, "learning_rate": 4.1730014983828724e-08, "loss": 0.0624, "step": 17470 }, { "epoch": 2.8306869734283864, "grad_norm": 0.8228906989097595, "learning_rate": 4.165048560881929e-08, "loss": 0.0598, "step": 17471 }, { "epoch": 2.830848995463383, "grad_norm": 1.1967852115631104, "learning_rate": 4.157103145335628e-08, "loss": 0.0753, "step": 17472 }, { "epoch": 2.83101101749838, "grad_norm": 0.9739968776702881, "learning_rate": 4.149165251987053e-08, "loss": 0.0575, "step": 17473 }, { "epoch": 2.8311730395333763, "grad_norm": 0.9316312074661255, "learning_rate": 4.141234881079065e-08, "loss": 0.0653, "step": 17474 }, { "epoch": 2.8313350615683732, "grad_norm": 1.3163434267044067, "learning_rate": 4.1333120328542754e-08, "loss": 0.0668, "step": 17475 }, { "epoch": 2.83149708360337, "grad_norm": 0.9953809976577759, "learning_rate": 4.125396707555213e-08, "loss": 0.0615, "step": 17476 }, { "epoch": 2.8316591056383666, "grad_norm": 1.2429362535476685, "learning_rate": 4.1174889054239885e-08, "loss": 0.0626, "step": 17477 }, { "epoch": 2.8318211276733636, "grad_norm": 1.1035979986190796, "learning_rate": 4.109588626702576e-08, "loss": 0.0654, "step": 17478 }, { "epoch": 2.8319831497083605, "grad_norm": 0.7999586462974548, "learning_rate": 4.101695871632699e-08, "loss": 0.0552, "step": 17479 }, { "epoch": 2.832145171743357, "grad_norm": 1.007940649986267, "learning_rate": 4.0938106404558864e-08, "loss": 0.0631, "step": 17480 }, { "epoch": 2.832307193778354, "grad_norm": 0.8191199898719788, "learning_rate": 4.085932933413361e-08, "loss": 0.0567, "step": 17481 }, { "epoch": 2.832469215813351, "grad_norm": 1.0429675579071045, "learning_rate": 4.078062750746209e-08, "loss": 0.0633, "step": 17482 }, { "epoch": 2.8326312378483474, "grad_norm": 0.9091582298278809, "learning_rate": 4.070200092695209e-08, "loss": 0.0591, "step": 17483 }, { "epoch": 2.8327932598833443, "grad_norm": 0.9163491725921631, "learning_rate": 4.062344959500947e-08, "loss": 0.0672, "step": 17484 }, { "epoch": 2.832955281918341, "grad_norm": 1.2758320569992065, "learning_rate": 4.054497351403758e-08, "loss": 0.0597, "step": 17485 }, { "epoch": 2.8331173039533377, "grad_norm": 1.0452107191085815, "learning_rate": 4.0466572686437833e-08, "loss": 0.0654, "step": 17486 }, { "epoch": 2.833279325988334, "grad_norm": 1.031578540802002, "learning_rate": 4.038824711460943e-08, "loss": 0.0656, "step": 17487 }, { "epoch": 2.833441348023331, "grad_norm": 0.8796032071113586, "learning_rate": 4.0309996800947936e-08, "loss": 0.06, "step": 17488 }, { "epoch": 2.833603370058328, "grad_norm": 0.7887482643127441, "learning_rate": 4.02318217478484e-08, "loss": 0.0564, "step": 17489 }, { "epoch": 2.8337653920933246, "grad_norm": 0.9122143387794495, "learning_rate": 4.0153721957702504e-08, "loss": 0.0655, "step": 17490 }, { "epoch": 2.8339274141283215, "grad_norm": 0.9661285877227783, "learning_rate": 4.0075697432900295e-08, "loss": 0.065, "step": 17491 }, { "epoch": 2.8340894361633184, "grad_norm": 0.8843967318534851, "learning_rate": 3.9997748175828467e-08, "loss": 0.0567, "step": 17492 }, { "epoch": 2.834251458198315, "grad_norm": 0.9385737180709839, "learning_rate": 3.9919874188872607e-08, "loss": 0.0606, "step": 17493 }, { "epoch": 2.834413480233312, "grad_norm": 0.9699385166168213, "learning_rate": 3.9842075474415545e-08, "loss": 0.0575, "step": 17494 }, { "epoch": 2.8345755022683083, "grad_norm": 0.8735253214836121, "learning_rate": 3.976435203483703e-08, "loss": 0.0592, "step": 17495 }, { "epoch": 2.8347375243033053, "grad_norm": 0.9157827496528625, "learning_rate": 3.9686703872516e-08, "loss": 0.0653, "step": 17496 }, { "epoch": 2.8348995463383018, "grad_norm": 0.9803964495658875, "learning_rate": 3.960913098982805e-08, "loss": 0.0605, "step": 17497 }, { "epoch": 2.8350615683732987, "grad_norm": 0.8232876658439636, "learning_rate": 3.953163338914656e-08, "loss": 0.0562, "step": 17498 }, { "epoch": 2.8352235904082956, "grad_norm": 0.7949116826057434, "learning_rate": 3.94542110728427e-08, "loss": 0.0577, "step": 17499 }, { "epoch": 2.835385612443292, "grad_norm": 0.94376540184021, "learning_rate": 3.9376864043285943e-08, "loss": 0.0646, "step": 17500 }, { "epoch": 2.835547634478289, "grad_norm": 1.0657342672348022, "learning_rate": 3.9299592302842195e-08, "loss": 0.061, "step": 17501 }, { "epoch": 2.835709656513286, "grad_norm": 0.8984468579292297, "learning_rate": 3.922239585387649e-08, "loss": 0.0647, "step": 17502 }, { "epoch": 2.8358716785482825, "grad_norm": 1.0442514419555664, "learning_rate": 3.914527469875029e-08, "loss": 0.06, "step": 17503 }, { "epoch": 2.8360337005832794, "grad_norm": 0.9332334995269775, "learning_rate": 3.906822883982336e-08, "loss": 0.063, "step": 17504 }, { "epoch": 2.8361957226182763, "grad_norm": 1.0261024236679077, "learning_rate": 3.8991258279453544e-08, "loss": 0.0672, "step": 17505 }, { "epoch": 2.836357744653273, "grad_norm": 1.0123735666275024, "learning_rate": 3.891436301999563e-08, "loss": 0.0578, "step": 17506 }, { "epoch": 2.8365197666882693, "grad_norm": 0.8809978365898132, "learning_rate": 3.883754306380244e-08, "loss": 0.0535, "step": 17507 }, { "epoch": 2.8366817887232663, "grad_norm": 0.9029288291931152, "learning_rate": 3.876079841322461e-08, "loss": 0.0615, "step": 17508 }, { "epoch": 2.836843810758263, "grad_norm": 0.9749417304992676, "learning_rate": 3.868412907061026e-08, "loss": 0.0564, "step": 17509 }, { "epoch": 2.8370058327932597, "grad_norm": 0.9761818051338196, "learning_rate": 3.8607535038305276e-08, "loss": 0.0627, "step": 17510 }, { "epoch": 2.8371678548282566, "grad_norm": 0.8667659759521484, "learning_rate": 3.853101631865336e-08, "loss": 0.0568, "step": 17511 }, { "epoch": 2.8373298768632536, "grad_norm": 1.0688188076019287, "learning_rate": 3.84545729139954e-08, "loss": 0.0594, "step": 17512 }, { "epoch": 2.83749189889825, "grad_norm": 0.9591261744499207, "learning_rate": 3.837820482667121e-08, "loss": 0.0625, "step": 17513 }, { "epoch": 2.837653920933247, "grad_norm": 0.90581876039505, "learning_rate": 3.83019120590164e-08, "loss": 0.0537, "step": 17514 }, { "epoch": 2.837815942968244, "grad_norm": 0.8603760004043579, "learning_rate": 3.822569461336606e-08, "loss": 0.0517, "step": 17515 }, { "epoch": 2.8379779650032404, "grad_norm": 0.9439842104911804, "learning_rate": 3.814955249205221e-08, "loss": 0.0637, "step": 17516 }, { "epoch": 2.8381399870382373, "grad_norm": 0.9294354915618896, "learning_rate": 3.8073485697404655e-08, "loss": 0.0601, "step": 17517 }, { "epoch": 2.838302009073234, "grad_norm": 0.8488750457763672, "learning_rate": 3.7997494231750145e-08, "loss": 0.0617, "step": 17518 }, { "epoch": 2.8384640311082308, "grad_norm": 0.9641651511192322, "learning_rate": 3.792157809741459e-08, "loss": 0.0648, "step": 17519 }, { "epoch": 2.8386260531432272, "grad_norm": 1.0396709442138672, "learning_rate": 3.784573729672086e-08, "loss": 0.0551, "step": 17520 }, { "epoch": 2.838788075178224, "grad_norm": 0.9486382603645325, "learning_rate": 3.7769971831989325e-08, "loss": 0.0659, "step": 17521 }, { "epoch": 2.838950097213221, "grad_norm": 0.972588300704956, "learning_rate": 3.769428170553785e-08, "loss": 0.0649, "step": 17522 }, { "epoch": 2.8391121192482176, "grad_norm": 1.0495613813400269, "learning_rate": 3.761866691968291e-08, "loss": 0.0679, "step": 17523 }, { "epoch": 2.8392741412832145, "grad_norm": 0.8821991682052612, "learning_rate": 3.754312747673766e-08, "loss": 0.056, "step": 17524 }, { "epoch": 2.8394361633182115, "grad_norm": 1.0493261814117432, "learning_rate": 3.746766337901386e-08, "loss": 0.0586, "step": 17525 }, { "epoch": 2.839598185353208, "grad_norm": 0.923512876033783, "learning_rate": 3.739227462882022e-08, "loss": 0.0601, "step": 17526 }, { "epoch": 2.839760207388205, "grad_norm": 0.8735133409500122, "learning_rate": 3.731696122846379e-08, "loss": 0.0589, "step": 17527 }, { "epoch": 2.839922229423202, "grad_norm": 1.056152105331421, "learning_rate": 3.724172318024854e-08, "loss": 0.0684, "step": 17528 }, { "epoch": 2.8400842514581983, "grad_norm": 0.8753458857536316, "learning_rate": 3.71665604864771e-08, "loss": 0.0581, "step": 17529 }, { "epoch": 2.840246273493195, "grad_norm": 1.102301836013794, "learning_rate": 3.709147314944872e-08, "loss": 0.07, "step": 17530 }, { "epoch": 2.8404082955281917, "grad_norm": 0.8725458383560181, "learning_rate": 3.7016461171461296e-08, "loss": 0.0613, "step": 17531 }, { "epoch": 2.8405703175631887, "grad_norm": 0.938378095626831, "learning_rate": 3.6941524554809924e-08, "loss": 0.0622, "step": 17532 }, { "epoch": 2.840732339598185, "grad_norm": 0.9458240270614624, "learning_rate": 3.686666330178695e-08, "loss": 0.0578, "step": 17533 }, { "epoch": 2.840894361633182, "grad_norm": 0.9221711754798889, "learning_rate": 3.6791877414683594e-08, "loss": 0.0605, "step": 17534 }, { "epoch": 2.841056383668179, "grad_norm": 0.9185110926628113, "learning_rate": 3.6717166895788306e-08, "loss": 0.0638, "step": 17535 }, { "epoch": 2.8412184057031755, "grad_norm": 0.8876226544380188, "learning_rate": 3.664253174738647e-08, "loss": 0.0588, "step": 17536 }, { "epoch": 2.8413804277381725, "grad_norm": 0.9121412634849548, "learning_rate": 3.656797197176182e-08, "loss": 0.0593, "step": 17537 }, { "epoch": 2.8415424497731694, "grad_norm": 0.8094960451126099, "learning_rate": 3.649348757119614e-08, "loss": 0.0577, "step": 17538 }, { "epoch": 2.841704471808166, "grad_norm": 0.8800841569900513, "learning_rate": 3.641907854796789e-08, "loss": 0.0586, "step": 17539 }, { "epoch": 2.841866493843163, "grad_norm": 0.8749896883964539, "learning_rate": 3.634474490435413e-08, "loss": 0.0591, "step": 17540 }, { "epoch": 2.8420285158781593, "grad_norm": 0.9184433221817017, "learning_rate": 3.627048664262916e-08, "loss": 0.0525, "step": 17541 }, { "epoch": 2.8421905379131562, "grad_norm": 0.9051634669303894, "learning_rate": 3.6196303765065333e-08, "loss": 0.0676, "step": 17542 }, { "epoch": 2.8423525599481527, "grad_norm": 0.8812767863273621, "learning_rate": 3.612219627393221e-08, "loss": 0.0601, "step": 17543 }, { "epoch": 2.8425145819831497, "grad_norm": 0.9540053606033325, "learning_rate": 3.60481641714977e-08, "loss": 0.0591, "step": 17544 }, { "epoch": 2.8426766040181466, "grad_norm": 1.0543007850646973, "learning_rate": 3.597420746002639e-08, "loss": 0.0655, "step": 17545 }, { "epoch": 2.842838626053143, "grad_norm": 0.8972881436347961, "learning_rate": 3.590032614178174e-08, "loss": 0.06, "step": 17546 }, { "epoch": 2.84300064808814, "grad_norm": 0.8661593794822693, "learning_rate": 3.5826520219023887e-08, "loss": 0.0607, "step": 17547 }, { "epoch": 2.843162670123137, "grad_norm": 0.896032452583313, "learning_rate": 3.57527896940113e-08, "loss": 0.0592, "step": 17548 }, { "epoch": 2.8433246921581334, "grad_norm": 0.822556734085083, "learning_rate": 3.567913456900024e-08, "loss": 0.0561, "step": 17549 }, { "epoch": 2.8434867141931304, "grad_norm": 0.9163435101509094, "learning_rate": 3.560555484624417e-08, "loss": 0.0679, "step": 17550 }, { "epoch": 2.843648736228127, "grad_norm": 0.8780238032341003, "learning_rate": 3.5532050527994076e-08, "loss": 0.0533, "step": 17551 }, { "epoch": 2.843810758263124, "grad_norm": 0.9988797903060913, "learning_rate": 3.545862161649927e-08, "loss": 0.057, "step": 17552 }, { "epoch": 2.8439727802981203, "grad_norm": 0.8687535524368286, "learning_rate": 3.538526811400711e-08, "loss": 0.0607, "step": 17553 }, { "epoch": 2.844134802333117, "grad_norm": 1.180427074432373, "learning_rate": 3.531199002276109e-08, "loss": 0.0727, "step": 17554 }, { "epoch": 2.844296824368114, "grad_norm": 0.8522838354110718, "learning_rate": 3.5238787345003855e-08, "loss": 0.0549, "step": 17555 }, { "epoch": 2.8444588464031106, "grad_norm": 0.9009879231452942, "learning_rate": 3.5165660082975006e-08, "loss": 0.0628, "step": 17556 }, { "epoch": 2.8446208684381076, "grad_norm": 0.8708153963088989, "learning_rate": 3.50926082389122e-08, "loss": 0.0574, "step": 17557 }, { "epoch": 2.8447828904731045, "grad_norm": 0.8726041316986084, "learning_rate": 3.501963181505058e-08, "loss": 0.063, "step": 17558 }, { "epoch": 2.844944912508101, "grad_norm": 0.9148244261741638, "learning_rate": 3.4946730813623376e-08, "loss": 0.0561, "step": 17559 }, { "epoch": 2.845106934543098, "grad_norm": 0.9740058779716492, "learning_rate": 3.487390523686074e-08, "loss": 0.0645, "step": 17560 }, { "epoch": 2.845268956578095, "grad_norm": 0.8795120716094971, "learning_rate": 3.4801155086991165e-08, "loss": 0.0604, "step": 17561 }, { "epoch": 2.8454309786130914, "grad_norm": 0.8973474502563477, "learning_rate": 3.472848036624038e-08, "loss": 0.0601, "step": 17562 }, { "epoch": 2.8455930006480883, "grad_norm": 0.9123368859291077, "learning_rate": 3.4655881076832156e-08, "loss": 0.0622, "step": 17563 }, { "epoch": 2.845755022683085, "grad_norm": 0.8565255403518677, "learning_rate": 3.4583357220988326e-08, "loss": 0.0551, "step": 17564 }, { "epoch": 2.8459170447180817, "grad_norm": 0.815396249294281, "learning_rate": 3.451090880092739e-08, "loss": 0.0529, "step": 17565 }, { "epoch": 2.846079066753078, "grad_norm": 0.7915904521942139, "learning_rate": 3.443853581886619e-08, "loss": 0.0512, "step": 17566 }, { "epoch": 2.846241088788075, "grad_norm": 1.000328779220581, "learning_rate": 3.4366238277019625e-08, "loss": 0.0591, "step": 17567 }, { "epoch": 2.846403110823072, "grad_norm": 0.8379427194595337, "learning_rate": 3.4294016177598974e-08, "loss": 0.0601, "step": 17568 }, { "epoch": 2.8465651328580686, "grad_norm": 0.8759309649467468, "learning_rate": 3.42218695228147e-08, "loss": 0.0594, "step": 17569 }, { "epoch": 2.8467271548930655, "grad_norm": 0.9378208518028259, "learning_rate": 3.4149798314874195e-08, "loss": 0.0637, "step": 17570 }, { "epoch": 2.8468891769280624, "grad_norm": 0.9910644888877869, "learning_rate": 3.4077802555982645e-08, "loss": 0.0633, "step": 17571 }, { "epoch": 2.847051198963059, "grad_norm": 0.8594523668289185, "learning_rate": 3.4005882248343e-08, "loss": 0.057, "step": 17572 }, { "epoch": 2.847213220998056, "grad_norm": 0.9198819994926453, "learning_rate": 3.393403739415546e-08, "loss": 0.0638, "step": 17573 }, { "epoch": 2.8473752430330523, "grad_norm": 0.8322975039482117, "learning_rate": 3.3862267995618817e-08, "loss": 0.0572, "step": 17574 }, { "epoch": 2.8475372650680493, "grad_norm": 0.9040446281433105, "learning_rate": 3.379057405492908e-08, "loss": 0.0641, "step": 17575 }, { "epoch": 2.8476992871030458, "grad_norm": 0.8539929986000061, "learning_rate": 3.3718955574279234e-08, "loss": 0.0602, "step": 17576 }, { "epoch": 2.8478613091380427, "grad_norm": 0.7834108471870422, "learning_rate": 3.3647412555861126e-08, "loss": 0.051, "step": 17577 }, { "epoch": 2.8480233311730396, "grad_norm": 0.8313713073730469, "learning_rate": 3.357594500186384e-08, "loss": 0.0548, "step": 17578 }, { "epoch": 2.848185353208036, "grad_norm": 0.8187208771705627, "learning_rate": 3.3504552914474244e-08, "loss": 0.0554, "step": 17579 }, { "epoch": 2.848347375243033, "grad_norm": 0.8749431371688843, "learning_rate": 3.3433236295876134e-08, "loss": 0.054, "step": 17580 }, { "epoch": 2.84850939727803, "grad_norm": 0.9242835640907288, "learning_rate": 3.336199514825195e-08, "loss": 0.0567, "step": 17581 }, { "epoch": 2.8486714193130265, "grad_norm": 0.9290145635604858, "learning_rate": 3.329082947378215e-08, "loss": 0.0639, "step": 17582 }, { "epoch": 2.8488334413480234, "grad_norm": 0.908678412437439, "learning_rate": 3.3219739274643057e-08, "loss": 0.0579, "step": 17583 }, { "epoch": 2.8489954633830203, "grad_norm": 0.9778639078140259, "learning_rate": 3.314872455301071e-08, "loss": 0.0648, "step": 17584 }, { "epoch": 2.849157485418017, "grad_norm": 0.6851035952568054, "learning_rate": 3.3077785311057545e-08, "loss": 0.0508, "step": 17585 }, { "epoch": 2.8493195074530138, "grad_norm": 0.9486784338951111, "learning_rate": 3.300692155095458e-08, "loss": 0.0606, "step": 17586 }, { "epoch": 2.8494815294880103, "grad_norm": 1.097536325454712, "learning_rate": 3.293613327486983e-08, "loss": 0.0656, "step": 17587 }, { "epoch": 2.849643551523007, "grad_norm": 0.9391690492630005, "learning_rate": 3.286542048496904e-08, "loss": 0.0613, "step": 17588 }, { "epoch": 2.8498055735580037, "grad_norm": 0.9068102836608887, "learning_rate": 3.2794783183416055e-08, "loss": 0.059, "step": 17589 }, { "epoch": 2.8499675955930006, "grad_norm": 0.851341962814331, "learning_rate": 3.272422137237219e-08, "loss": 0.0532, "step": 17590 }, { "epoch": 2.8501296176279975, "grad_norm": 0.9030383229255676, "learning_rate": 3.265373505399627e-08, "loss": 0.0536, "step": 17591 }, { "epoch": 2.850291639662994, "grad_norm": 0.8289960026741028, "learning_rate": 3.258332423044547e-08, "loss": 0.0602, "step": 17592 }, { "epoch": 2.850453661697991, "grad_norm": 0.8833999633789062, "learning_rate": 3.2512988903873885e-08, "loss": 0.0623, "step": 17593 }, { "epoch": 2.850615683732988, "grad_norm": 1.016706943511963, "learning_rate": 3.2442729076433697e-08, "loss": 0.0629, "step": 17594 }, { "epoch": 2.8507777057679844, "grad_norm": 0.903662383556366, "learning_rate": 3.237254475027429e-08, "loss": 0.0596, "step": 17595 }, { "epoch": 2.8509397278029813, "grad_norm": 0.8987908959388733, "learning_rate": 3.230243592754368e-08, "loss": 0.0649, "step": 17596 }, { "epoch": 2.851101749837978, "grad_norm": 0.9659387469291687, "learning_rate": 3.223240261038707e-08, "loss": 0.0661, "step": 17597 }, { "epoch": 2.8512637718729748, "grad_norm": 0.9991119503974915, "learning_rate": 3.2162444800946655e-08, "loss": 0.0595, "step": 17598 }, { "epoch": 2.8514257939079712, "grad_norm": 0.8747795224189758, "learning_rate": 3.209256250136378e-08, "loss": 0.0505, "step": 17599 }, { "epoch": 2.851587815942968, "grad_norm": 0.9156426191329956, "learning_rate": 3.202275571377589e-08, "loss": 0.0553, "step": 17600 }, { "epoch": 2.851749837977965, "grad_norm": 0.9192603826522827, "learning_rate": 3.1953024440319334e-08, "loss": 0.064, "step": 17601 }, { "epoch": 2.8519118600129616, "grad_norm": 0.9150480031967163, "learning_rate": 3.188336868312769e-08, "loss": 0.0652, "step": 17602 }, { "epoch": 2.8520738820479585, "grad_norm": 0.8279345035552979, "learning_rate": 3.18137884443323e-08, "loss": 0.0577, "step": 17603 }, { "epoch": 2.8522359040829555, "grad_norm": 0.9174199104309082, "learning_rate": 3.1744283726062306e-08, "loss": 0.0614, "step": 17604 }, { "epoch": 2.852397926117952, "grad_norm": 0.9263173937797546, "learning_rate": 3.167485453044378e-08, "loss": 0.0642, "step": 17605 }, { "epoch": 2.852559948152949, "grad_norm": 0.8777797222137451, "learning_rate": 3.160550085960168e-08, "loss": 0.0556, "step": 17606 }, { "epoch": 2.852721970187946, "grad_norm": 0.8794631958007812, "learning_rate": 3.153622271565793e-08, "loss": 0.0583, "step": 17607 }, { "epoch": 2.8528839922229423, "grad_norm": 0.8909582495689392, "learning_rate": 3.1467020100732215e-08, "loss": 0.0647, "step": 17608 }, { "epoch": 2.8530460142579392, "grad_norm": 0.9028645753860474, "learning_rate": 3.139789301694146e-08, "loss": 0.0583, "step": 17609 }, { "epoch": 2.8532080362929357, "grad_norm": 0.9116567969322205, "learning_rate": 3.1328841466401746e-08, "loss": 0.0605, "step": 17610 }, { "epoch": 2.8533700583279327, "grad_norm": 1.222704529762268, "learning_rate": 3.125986545122528e-08, "loss": 0.0621, "step": 17611 }, { "epoch": 2.853532080362929, "grad_norm": 0.8335679769515991, "learning_rate": 3.1190964973522865e-08, "loss": 0.0562, "step": 17612 }, { "epoch": 2.853694102397926, "grad_norm": 0.9687299728393555, "learning_rate": 3.112214003540254e-08, "loss": 0.057, "step": 17613 }, { "epoch": 2.853856124432923, "grad_norm": 0.9810713529586792, "learning_rate": 3.105339063896956e-08, "loss": 0.07, "step": 17614 }, { "epoch": 2.8540181464679195, "grad_norm": 1.0105944871902466, "learning_rate": 3.098471678632892e-08, "loss": 0.0691, "step": 17615 }, { "epoch": 2.8541801685029164, "grad_norm": 0.9086531400680542, "learning_rate": 3.0916118479580593e-08, "loss": 0.0627, "step": 17616 }, { "epoch": 2.8543421905379134, "grad_norm": 0.8142098188400269, "learning_rate": 3.084759572082402e-08, "loss": 0.0532, "step": 17617 }, { "epoch": 2.85450421257291, "grad_norm": 0.9001931548118591, "learning_rate": 3.077914851215585e-08, "loss": 0.0685, "step": 17618 }, { "epoch": 2.854666234607907, "grad_norm": 0.7494874000549316, "learning_rate": 3.071077685567025e-08, "loss": 0.0548, "step": 17619 }, { "epoch": 2.8548282566429033, "grad_norm": 0.956156849861145, "learning_rate": 3.064248075345916e-08, "loss": 0.06, "step": 17620 }, { "epoch": 2.8549902786779002, "grad_norm": 0.939167857170105, "learning_rate": 3.057426020761256e-08, "loss": 0.066, "step": 17621 }, { "epoch": 2.8551523007128967, "grad_norm": 0.9294570088386536, "learning_rate": 3.050611522021796e-08, "loss": 0.0586, "step": 17622 }, { "epoch": 2.8553143227478937, "grad_norm": 0.9607594609260559, "learning_rate": 3.043804579336007e-08, "loss": 0.0625, "step": 17623 }, { "epoch": 2.8554763447828906, "grad_norm": 1.0955085754394531, "learning_rate": 3.0370051929121405e-08, "loss": 0.0652, "step": 17624 }, { "epoch": 2.855638366817887, "grad_norm": 0.9362739324569702, "learning_rate": 3.030213362958306e-08, "loss": 0.0622, "step": 17625 }, { "epoch": 2.855800388852884, "grad_norm": 0.8863419890403748, "learning_rate": 3.023429089682284e-08, "loss": 0.0599, "step": 17626 }, { "epoch": 2.855962410887881, "grad_norm": 0.8718972206115723, "learning_rate": 3.0166523732916564e-08, "loss": 0.0588, "step": 17627 }, { "epoch": 2.8561244329228774, "grad_norm": 1.004003643989563, "learning_rate": 3.009883213993786e-08, "loss": 0.0603, "step": 17628 }, { "epoch": 2.8562864549578744, "grad_norm": 0.9552223682403564, "learning_rate": 3.0031216119957576e-08, "loss": 0.053, "step": 17629 }, { "epoch": 2.8564484769928713, "grad_norm": 0.9536174535751343, "learning_rate": 2.996367567504544e-08, "loss": 0.0656, "step": 17630 }, { "epoch": 2.856610499027868, "grad_norm": 1.1152795553207397, "learning_rate": 2.989621080726701e-08, "loss": 0.0635, "step": 17631 }, { "epoch": 2.8567725210628643, "grad_norm": 0.9013298749923706, "learning_rate": 2.9828821518687045e-08, "loss": 0.0566, "step": 17632 }, { "epoch": 2.856934543097861, "grad_norm": 0.9204021096229553, "learning_rate": 2.9761507811367497e-08, "loss": 0.0631, "step": 17633 }, { "epoch": 2.857096565132858, "grad_norm": 0.9465929865837097, "learning_rate": 2.9694269687367826e-08, "loss": 0.0542, "step": 17634 }, { "epoch": 2.8572585871678546, "grad_norm": 0.8179449439048767, "learning_rate": 2.962710714874556e-08, "loss": 0.0581, "step": 17635 }, { "epoch": 2.8574206092028516, "grad_norm": 1.1688090562820435, "learning_rate": 2.9560020197555716e-08, "loss": 0.0635, "step": 17636 }, { "epoch": 2.8575826312378485, "grad_norm": 1.0356324911117554, "learning_rate": 2.9493008835850823e-08, "loss": 0.0592, "step": 17637 }, { "epoch": 2.857744653272845, "grad_norm": 0.9192071557044983, "learning_rate": 2.9426073065681183e-08, "loss": 0.0646, "step": 17638 }, { "epoch": 2.857906675307842, "grad_norm": 0.8982980251312256, "learning_rate": 2.9359212889095157e-08, "loss": 0.0616, "step": 17639 }, { "epoch": 2.858068697342839, "grad_norm": 0.8842819333076477, "learning_rate": 2.929242830813861e-08, "loss": 0.0607, "step": 17640 }, { "epoch": 2.8582307193778353, "grad_norm": 0.8390337824821472, "learning_rate": 2.9225719324854628e-08, "loss": 0.0564, "step": 17641 }, { "epoch": 2.8583927414128323, "grad_norm": 0.9694913029670715, "learning_rate": 2.915908594128436e-08, "loss": 0.0596, "step": 17642 }, { "epoch": 2.8585547634478288, "grad_norm": 0.9143932461738586, "learning_rate": 2.9092528159466727e-08, "loss": 0.0585, "step": 17643 }, { "epoch": 2.8587167854828257, "grad_norm": 0.9615548849105835, "learning_rate": 2.9026045981438434e-08, "loss": 0.0646, "step": 17644 }, { "epoch": 2.858878807517822, "grad_norm": 0.8890393972396851, "learning_rate": 2.895963940923341e-08, "loss": 0.0605, "step": 17645 }, { "epoch": 2.859040829552819, "grad_norm": 0.9828749299049377, "learning_rate": 2.889330844488364e-08, "loss": 0.0568, "step": 17646 }, { "epoch": 2.859202851587816, "grad_norm": 0.8218222856521606, "learning_rate": 2.882705309041861e-08, "loss": 0.0532, "step": 17647 }, { "epoch": 2.8593648736228126, "grad_norm": 1.0544509887695312, "learning_rate": 2.8760873347865593e-08, "loss": 0.0649, "step": 17648 }, { "epoch": 2.8595268956578095, "grad_norm": 1.0283457040786743, "learning_rate": 2.869476921924963e-08, "loss": 0.0588, "step": 17649 }, { "epoch": 2.8596889176928064, "grad_norm": 0.9277712106704712, "learning_rate": 2.862874070659327e-08, "loss": 0.0557, "step": 17650 }, { "epoch": 2.859850939727803, "grad_norm": 0.8709115982055664, "learning_rate": 2.8562787811916848e-08, "loss": 0.0558, "step": 17651 }, { "epoch": 2.8600129617628, "grad_norm": 0.9610093235969543, "learning_rate": 2.8496910537238185e-08, "loss": 0.0622, "step": 17652 }, { "epoch": 2.8601749837977968, "grad_norm": 1.003127098083496, "learning_rate": 2.8431108884573454e-08, "loss": 0.0611, "step": 17653 }, { "epoch": 2.8603370058327933, "grad_norm": 0.879693865776062, "learning_rate": 2.8365382855935487e-08, "loss": 0.0526, "step": 17654 }, { "epoch": 2.8604990278677898, "grad_norm": 0.8258382678031921, "learning_rate": 2.8299732453335725e-08, "loss": 0.0576, "step": 17655 }, { "epoch": 2.8606610499027867, "grad_norm": 0.9305282831192017, "learning_rate": 2.8234157678782846e-08, "loss": 0.0603, "step": 17656 }, { "epoch": 2.8608230719377836, "grad_norm": 0.825541615486145, "learning_rate": 2.8168658534282743e-08, "loss": 0.0553, "step": 17657 }, { "epoch": 2.86098509397278, "grad_norm": 0.8502270579338074, "learning_rate": 2.8103235021840204e-08, "loss": 0.0581, "step": 17658 }, { "epoch": 2.861147116007777, "grad_norm": 0.9126089215278625, "learning_rate": 2.8037887143456954e-08, "loss": 0.0582, "step": 17659 }, { "epoch": 2.861309138042774, "grad_norm": 0.8462595343589783, "learning_rate": 2.7972614901132235e-08, "loss": 0.0621, "step": 17660 }, { "epoch": 2.8614711600777705, "grad_norm": 0.8996595144271851, "learning_rate": 2.790741829686333e-08, "loss": 0.0602, "step": 17661 }, { "epoch": 2.8616331821127674, "grad_norm": 1.2317053079605103, "learning_rate": 2.784229733264504e-08, "loss": 0.0688, "step": 17662 }, { "epoch": 2.8617952041477643, "grad_norm": 1.1419668197631836, "learning_rate": 2.7777252010469657e-08, "loss": 0.0643, "step": 17663 }, { "epoch": 2.861957226182761, "grad_norm": 1.055248737335205, "learning_rate": 2.771228233232809e-08, "loss": 0.0607, "step": 17664 }, { "epoch": 2.8621192482177578, "grad_norm": 0.8915876746177673, "learning_rate": 2.7647388300207635e-08, "loss": 0.0603, "step": 17665 }, { "epoch": 2.8622812702527543, "grad_norm": 1.019177794456482, "learning_rate": 2.7582569916094205e-08, "loss": 0.062, "step": 17666 }, { "epoch": 2.862443292287751, "grad_norm": 0.932235598564148, "learning_rate": 2.7517827181970937e-08, "loss": 0.0606, "step": 17667 }, { "epoch": 2.8626053143227477, "grad_norm": 0.764743983745575, "learning_rate": 2.745316009981902e-08, "loss": 0.0513, "step": 17668 }, { "epoch": 2.8627673363577446, "grad_norm": 0.9902213215827942, "learning_rate": 2.7388568671616877e-08, "loss": 0.0629, "step": 17669 }, { "epoch": 2.8629293583927415, "grad_norm": 1.0022236108779907, "learning_rate": 2.732405289934098e-08, "loss": 0.0586, "step": 17670 }, { "epoch": 2.863091380427738, "grad_norm": 1.2449359893798828, "learning_rate": 2.7259612784965307e-08, "loss": 0.0579, "step": 17671 }, { "epoch": 2.863253402462735, "grad_norm": 0.9988813400268555, "learning_rate": 2.719524833046133e-08, "loss": 0.0626, "step": 17672 }, { "epoch": 2.863415424497732, "grad_norm": 0.9619942307472229, "learning_rate": 2.7130959537798874e-08, "loss": 0.0637, "step": 17673 }, { "epoch": 2.8635774465327284, "grad_norm": 0.9914158582687378, "learning_rate": 2.7066746408944968e-08, "loss": 0.0621, "step": 17674 }, { "epoch": 2.8637394685677253, "grad_norm": 0.9497270584106445, "learning_rate": 2.700260894586415e-08, "loss": 0.0555, "step": 17675 }, { "epoch": 2.863901490602722, "grad_norm": 0.8510124087333679, "learning_rate": 2.6938547150518746e-08, "loss": 0.0556, "step": 17676 }, { "epoch": 2.8640635126377187, "grad_norm": 0.8168439269065857, "learning_rate": 2.6874561024869407e-08, "loss": 0.0514, "step": 17677 }, { "epoch": 2.8642255346727152, "grad_norm": 0.8079927563667297, "learning_rate": 2.6810650570873454e-08, "loss": 0.0501, "step": 17678 }, { "epoch": 2.864387556707712, "grad_norm": 0.9214690923690796, "learning_rate": 2.6746815790486548e-08, "loss": 0.061, "step": 17679 }, { "epoch": 2.864549578742709, "grad_norm": 1.2173523902893066, "learning_rate": 2.6683056685662122e-08, "loss": 0.0676, "step": 17680 }, { "epoch": 2.8647116007777056, "grad_norm": 0.9455307722091675, "learning_rate": 2.6619373258350566e-08, "loss": 0.0689, "step": 17681 }, { "epoch": 2.8648736228127025, "grad_norm": 0.84743332862854, "learning_rate": 2.6555765510500875e-08, "loss": 0.0549, "step": 17682 }, { "epoch": 2.8650356448476995, "grad_norm": 0.8518211245536804, "learning_rate": 2.6492233444059267e-08, "loss": 0.0595, "step": 17683 }, { "epoch": 2.865197666882696, "grad_norm": 0.8848883509635925, "learning_rate": 2.6428777060969468e-08, "loss": 0.0615, "step": 17684 }, { "epoch": 2.865359688917693, "grad_norm": 0.9524630308151245, "learning_rate": 2.6365396363173256e-08, "loss": 0.0634, "step": 17685 }, { "epoch": 2.86552171095269, "grad_norm": 0.9205766916275024, "learning_rate": 2.6302091352609637e-08, "loss": 0.0672, "step": 17686 }, { "epoch": 2.8656837329876863, "grad_norm": 0.821868360042572, "learning_rate": 2.6238862031215672e-08, "loss": 0.0562, "step": 17687 }, { "epoch": 2.8658457550226832, "grad_norm": 0.9511677026748657, "learning_rate": 2.617570840092648e-08, "loss": 0.0646, "step": 17688 }, { "epoch": 2.8660077770576797, "grad_norm": 0.9630586504936218, "learning_rate": 2.611263046367385e-08, "loss": 0.0589, "step": 17689 }, { "epoch": 2.8661697990926767, "grad_norm": 0.8870193362236023, "learning_rate": 2.604962822138818e-08, "loss": 0.0544, "step": 17690 }, { "epoch": 2.866331821127673, "grad_norm": 1.0679670572280884, "learning_rate": 2.5986701675996816e-08, "loss": 0.0634, "step": 17691 }, { "epoch": 2.86649384316267, "grad_norm": 0.9083020091056824, "learning_rate": 2.5923850829425723e-08, "loss": 0.0639, "step": 17692 }, { "epoch": 2.866655865197667, "grad_norm": 0.8310949802398682, "learning_rate": 2.5861075683597526e-08, "loss": 0.0558, "step": 17693 }, { "epoch": 2.8668178872326635, "grad_norm": 0.956186830997467, "learning_rate": 2.579837624043291e-08, "loss": 0.0709, "step": 17694 }, { "epoch": 2.8669799092676604, "grad_norm": 1.0341099500656128, "learning_rate": 2.573575250185062e-08, "loss": 0.0563, "step": 17695 }, { "epoch": 2.8671419313026574, "grad_norm": 0.8617078065872192, "learning_rate": 2.5673204469766898e-08, "loss": 0.0621, "step": 17696 }, { "epoch": 2.867303953337654, "grad_norm": 0.9676273465156555, "learning_rate": 2.561073214609494e-08, "loss": 0.0636, "step": 17697 }, { "epoch": 2.867465975372651, "grad_norm": 0.9023331999778748, "learning_rate": 2.5548335532747105e-08, "loss": 0.0571, "step": 17698 }, { "epoch": 2.8676279974076473, "grad_norm": 0.9330205321311951, "learning_rate": 2.5486014631631862e-08, "loss": 0.0592, "step": 17699 }, { "epoch": 2.8677900194426442, "grad_norm": 1.1070448160171509, "learning_rate": 2.5423769444656575e-08, "loss": 0.0669, "step": 17700 }, { "epoch": 2.8679520414776407, "grad_norm": 0.8804136514663696, "learning_rate": 2.536159997372528e-08, "loss": 0.0567, "step": 17701 }, { "epoch": 2.8681140635126376, "grad_norm": 0.9040849208831787, "learning_rate": 2.52995062207409e-08, "loss": 0.0598, "step": 17702 }, { "epoch": 2.8682760855476346, "grad_norm": 0.999211311340332, "learning_rate": 2.5237488187602743e-08, "loss": 0.0586, "step": 17703 }, { "epoch": 2.868438107582631, "grad_norm": 0.9211670160293579, "learning_rate": 2.517554587620874e-08, "loss": 0.0648, "step": 17704 }, { "epoch": 2.868600129617628, "grad_norm": 0.820000410079956, "learning_rate": 2.511367928845404e-08, "loss": 0.055, "step": 17705 }, { "epoch": 2.868762151652625, "grad_norm": 0.8488945960998535, "learning_rate": 2.5051888426231574e-08, "loss": 0.0563, "step": 17706 }, { "epoch": 2.8689241736876214, "grad_norm": 0.9143860936164856, "learning_rate": 2.499017329143205e-08, "loss": 0.0565, "step": 17707 }, { "epoch": 2.8690861957226184, "grad_norm": 0.9244613647460938, "learning_rate": 2.492853388594396e-08, "loss": 0.0627, "step": 17708 }, { "epoch": 2.8692482177576153, "grad_norm": 0.924808144569397, "learning_rate": 2.486697021165302e-08, "loss": 0.0617, "step": 17709 }, { "epoch": 2.869410239792612, "grad_norm": 1.042020320892334, "learning_rate": 2.480548227044327e-08, "loss": 0.0593, "step": 17710 }, { "epoch": 2.8695722618276087, "grad_norm": 1.0010639429092407, "learning_rate": 2.4744070064195713e-08, "loss": 0.0599, "step": 17711 }, { "epoch": 2.869734283862605, "grad_norm": 1.0316102504730225, "learning_rate": 2.4682733594789677e-08, "loss": 0.0673, "step": 17712 }, { "epoch": 2.869896305897602, "grad_norm": 1.0180681943893433, "learning_rate": 2.4621472864101992e-08, "loss": 0.0638, "step": 17713 }, { "epoch": 2.8700583279325986, "grad_norm": 1.028444766998291, "learning_rate": 2.4560287874006716e-08, "loss": 0.0638, "step": 17714 }, { "epoch": 2.8702203499675956, "grad_norm": 1.0102734565734863, "learning_rate": 2.4499178626376243e-08, "loss": 0.0612, "step": 17715 }, { "epoch": 2.8703823720025925, "grad_norm": 0.8836684226989746, "learning_rate": 2.443814512308018e-08, "loss": 0.0563, "step": 17716 }, { "epoch": 2.870544394037589, "grad_norm": 0.9856815934181213, "learning_rate": 2.437718736598621e-08, "loss": 0.0579, "step": 17717 }, { "epoch": 2.870706416072586, "grad_norm": 0.9209667444229126, "learning_rate": 2.431630535695978e-08, "loss": 0.056, "step": 17718 }, { "epoch": 2.870868438107583, "grad_norm": 0.9791855812072754, "learning_rate": 2.4255499097863012e-08, "loss": 0.0622, "step": 17719 }, { "epoch": 2.8710304601425793, "grad_norm": 0.9314326047897339, "learning_rate": 2.419476859055664e-08, "loss": 0.0617, "step": 17720 }, { "epoch": 2.8711924821775763, "grad_norm": 0.8641183376312256, "learning_rate": 2.4134113836899455e-08, "loss": 0.0557, "step": 17721 }, { "epoch": 2.8713545042125728, "grad_norm": 0.8541434407234192, "learning_rate": 2.4073534838746637e-08, "loss": 0.0541, "step": 17722 }, { "epoch": 2.8715165262475697, "grad_norm": 1.0766733884811401, "learning_rate": 2.4013031597951985e-08, "loss": 0.0597, "step": 17723 }, { "epoch": 2.871678548282566, "grad_norm": 0.9844111204147339, "learning_rate": 2.3952604116366795e-08, "loss": 0.0637, "step": 17724 }, { "epoch": 2.871840570317563, "grad_norm": 0.8357641696929932, "learning_rate": 2.3892252395840143e-08, "loss": 0.0598, "step": 17725 }, { "epoch": 2.87200259235256, "grad_norm": 1.0578702688217163, "learning_rate": 2.383197643821833e-08, "loss": 0.0574, "step": 17726 }, { "epoch": 2.8721646143875565, "grad_norm": 0.8286598920822144, "learning_rate": 2.377177624534599e-08, "loss": 0.0589, "step": 17727 }, { "epoch": 2.8723266364225535, "grad_norm": 0.890817403793335, "learning_rate": 2.3711651819064984e-08, "loss": 0.0612, "step": 17728 }, { "epoch": 2.8724886584575504, "grad_norm": 0.8068243265151978, "learning_rate": 2.3651603161214677e-08, "loss": 0.0547, "step": 17729 }, { "epoch": 2.872650680492547, "grad_norm": 0.8931058049201965, "learning_rate": 2.359163027363276e-08, "loss": 0.0582, "step": 17730 }, { "epoch": 2.872812702527544, "grad_norm": 0.9487268328666687, "learning_rate": 2.3531733158154157e-08, "loss": 0.0684, "step": 17731 }, { "epoch": 2.8729747245625408, "grad_norm": 0.8844464421272278, "learning_rate": 2.3471911816611846e-08, "loss": 0.0576, "step": 17732 }, { "epoch": 2.8731367465975373, "grad_norm": 0.7697759866714478, "learning_rate": 2.3412166250835756e-08, "loss": 0.0475, "step": 17733 }, { "epoch": 2.8732987686325338, "grad_norm": 1.038557767868042, "learning_rate": 2.335249646265414e-08, "loss": 0.0675, "step": 17734 }, { "epoch": 2.8734607906675307, "grad_norm": 0.8407655954360962, "learning_rate": 2.3292902453892485e-08, "loss": 0.0579, "step": 17735 }, { "epoch": 2.8736228127025276, "grad_norm": 0.940645694732666, "learning_rate": 2.3233384226375167e-08, "loss": 0.0677, "step": 17736 }, { "epoch": 2.873784834737524, "grad_norm": 0.9915009140968323, "learning_rate": 2.3173941781922114e-08, "loss": 0.0645, "step": 17737 }, { "epoch": 2.873946856772521, "grad_norm": 1.1203358173370361, "learning_rate": 2.311457512235271e-08, "loss": 0.0607, "step": 17738 }, { "epoch": 2.874108878807518, "grad_norm": 0.9407758116722107, "learning_rate": 2.305528424948328e-08, "loss": 0.0555, "step": 17739 }, { "epoch": 2.8742709008425145, "grad_norm": 1.1099528074264526, "learning_rate": 2.2996069165128198e-08, "loss": 0.0638, "step": 17740 }, { "epoch": 2.8744329228775114, "grad_norm": 0.8714383244514465, "learning_rate": 2.2936929871099356e-08, "loss": 0.0595, "step": 17741 }, { "epoch": 2.8745949449125083, "grad_norm": 0.834579586982727, "learning_rate": 2.2877866369205858e-08, "loss": 0.0633, "step": 17742 }, { "epoch": 2.874756966947505, "grad_norm": 0.9249007701873779, "learning_rate": 2.281887866125515e-08, "loss": 0.054, "step": 17743 }, { "epoch": 2.8749189889825018, "grad_norm": 0.8720956444740295, "learning_rate": 2.2759966749051897e-08, "loss": 0.0561, "step": 17744 }, { "epoch": 2.8750810110174982, "grad_norm": 1.0232727527618408, "learning_rate": 2.2701130634399104e-08, "loss": 0.063, "step": 17745 }, { "epoch": 2.875243033052495, "grad_norm": 0.9553148746490479, "learning_rate": 2.2642370319096718e-08, "loss": 0.0691, "step": 17746 }, { "epoch": 2.8754050550874917, "grad_norm": 0.820517361164093, "learning_rate": 2.2583685804942746e-08, "loss": 0.0578, "step": 17747 }, { "epoch": 2.8755670771224886, "grad_norm": 0.9615774154663086, "learning_rate": 2.2525077093732695e-08, "loss": 0.0674, "step": 17748 }, { "epoch": 2.8757290991574855, "grad_norm": 0.9988585114479065, "learning_rate": 2.2466544187259852e-08, "loss": 0.0686, "step": 17749 }, { "epoch": 2.875891121192482, "grad_norm": 0.8221516013145447, "learning_rate": 2.2408087087315567e-08, "loss": 0.0566, "step": 17750 }, { "epoch": 2.876053143227479, "grad_norm": 0.852265477180481, "learning_rate": 2.234970579568785e-08, "loss": 0.06, "step": 17751 }, { "epoch": 2.876215165262476, "grad_norm": 0.9517872929573059, "learning_rate": 2.2291400314163325e-08, "loss": 0.0628, "step": 17752 }, { "epoch": 2.8763771872974724, "grad_norm": 1.0439865589141846, "learning_rate": 2.2233170644526126e-08, "loss": 0.0619, "step": 17753 }, { "epoch": 2.8765392093324693, "grad_norm": 0.8319445848464966, "learning_rate": 2.217501678855788e-08, "loss": 0.0548, "step": 17754 }, { "epoch": 2.8767012313674662, "grad_norm": 0.9762924909591675, "learning_rate": 2.211693874803772e-08, "loss": 0.0599, "step": 17755 }, { "epoch": 2.8768632534024627, "grad_norm": 0.831531822681427, "learning_rate": 2.2058936524742835e-08, "loss": 0.0537, "step": 17756 }, { "epoch": 2.8770252754374592, "grad_norm": 0.8122191429138184, "learning_rate": 2.2001010120448197e-08, "loss": 0.0555, "step": 17757 }, { "epoch": 2.877187297472456, "grad_norm": 1.2487119436264038, "learning_rate": 2.1943159536925994e-08, "loss": 0.0614, "step": 17758 }, { "epoch": 2.877349319507453, "grad_norm": 1.0974977016448975, "learning_rate": 2.1885384775946207e-08, "loss": 0.0593, "step": 17759 }, { "epoch": 2.8775113415424496, "grad_norm": 0.9182059168815613, "learning_rate": 2.1827685839276856e-08, "loss": 0.0623, "step": 17760 }, { "epoch": 2.8776733635774465, "grad_norm": 1.0060678720474243, "learning_rate": 2.177006272868293e-08, "loss": 0.0623, "step": 17761 }, { "epoch": 2.8778353856124435, "grad_norm": 0.8593238592147827, "learning_rate": 2.1712515445928285e-08, "loss": 0.0612, "step": 17762 }, { "epoch": 2.87799740764744, "grad_norm": 0.8475682139396667, "learning_rate": 2.1655043992773183e-08, "loss": 0.0538, "step": 17763 }, { "epoch": 2.878159429682437, "grad_norm": 1.3795852661132812, "learning_rate": 2.159764837097622e-08, "loss": 0.0681, "step": 17764 }, { "epoch": 2.878321451717434, "grad_norm": 1.0932248830795288, "learning_rate": 2.1540328582293767e-08, "loss": 0.0699, "step": 17765 }, { "epoch": 2.8784834737524303, "grad_norm": 0.8664324879646301, "learning_rate": 2.1483084628479145e-08, "loss": 0.0559, "step": 17766 }, { "epoch": 2.8786454957874272, "grad_norm": 0.92646723985672, "learning_rate": 2.142591651128456e-08, "loss": 0.0559, "step": 17767 }, { "epoch": 2.8788075178224237, "grad_norm": 0.7677503228187561, "learning_rate": 2.1368824232458618e-08, "loss": 0.0519, "step": 17768 }, { "epoch": 2.8789695398574207, "grad_norm": 0.8314093351364136, "learning_rate": 2.1311807793748805e-08, "loss": 0.0546, "step": 17769 }, { "epoch": 2.879131561892417, "grad_norm": 0.971030056476593, "learning_rate": 2.125486719689929e-08, "loss": 0.0632, "step": 17770 }, { "epoch": 2.879293583927414, "grad_norm": 0.8812588453292847, "learning_rate": 2.1198002443652276e-08, "loss": 0.0607, "step": 17771 }, { "epoch": 2.879455605962411, "grad_norm": 1.1112418174743652, "learning_rate": 2.1141213535747772e-08, "loss": 0.0659, "step": 17772 }, { "epoch": 2.8796176279974075, "grad_norm": 0.976514995098114, "learning_rate": 2.1084500474923554e-08, "loss": 0.0672, "step": 17773 }, { "epoch": 2.8797796500324044, "grad_norm": 0.9304304122924805, "learning_rate": 2.1027863262914617e-08, "loss": 0.0598, "step": 17774 }, { "epoch": 2.8799416720674014, "grad_norm": 1.0370122194290161, "learning_rate": 2.0971301901454023e-08, "loss": 0.0622, "step": 17775 }, { "epoch": 2.880103694102398, "grad_norm": 0.7911035418510437, "learning_rate": 2.0914816392272608e-08, "loss": 0.057, "step": 17776 }, { "epoch": 2.880265716137395, "grad_norm": 0.8377315998077393, "learning_rate": 2.0858406737098435e-08, "loss": 0.0523, "step": 17777 }, { "epoch": 2.8804277381723913, "grad_norm": 0.895128071308136, "learning_rate": 2.0802072937657624e-08, "loss": 0.0597, "step": 17778 }, { "epoch": 2.880589760207388, "grad_norm": 0.9647876620292664, "learning_rate": 2.0745814995673796e-08, "loss": 0.0653, "step": 17779 }, { "epoch": 2.8807517822423847, "grad_norm": 0.8965823650360107, "learning_rate": 2.068963291286863e-08, "loss": 0.0554, "step": 17780 }, { "epoch": 2.8809138042773816, "grad_norm": 1.0152802467346191, "learning_rate": 2.0633526690960747e-08, "loss": 0.0599, "step": 17781 }, { "epoch": 2.8810758263123786, "grad_norm": 0.886622428894043, "learning_rate": 2.0577496331666837e-08, "loss": 0.0631, "step": 17782 }, { "epoch": 2.881237848347375, "grad_norm": 0.943684995174408, "learning_rate": 2.052154183670163e-08, "loss": 0.06, "step": 17783 }, { "epoch": 2.881399870382372, "grad_norm": 0.9235215783119202, "learning_rate": 2.046566320777682e-08, "loss": 0.0594, "step": 17784 }, { "epoch": 2.881561892417369, "grad_norm": 0.9773284196853638, "learning_rate": 2.04098604466027e-08, "loss": 0.0646, "step": 17785 }, { "epoch": 2.8817239144523654, "grad_norm": 0.921341061592102, "learning_rate": 2.0354133554885967e-08, "loss": 0.059, "step": 17786 }, { "epoch": 2.8818859364873624, "grad_norm": 0.9354360699653625, "learning_rate": 2.0298482534332198e-08, "loss": 0.0553, "step": 17787 }, { "epoch": 2.8820479585223593, "grad_norm": 0.8430424332618713, "learning_rate": 2.0242907386644195e-08, "loss": 0.0548, "step": 17788 }, { "epoch": 2.8822099805573558, "grad_norm": 0.8397956490516663, "learning_rate": 2.0187408113522266e-08, "loss": 0.054, "step": 17789 }, { "epoch": 2.8823720025923527, "grad_norm": 0.967339813709259, "learning_rate": 2.0131984716664776e-08, "loss": 0.0617, "step": 17790 }, { "epoch": 2.882534024627349, "grad_norm": 1.0861202478408813, "learning_rate": 2.0076637197767312e-08, "loss": 0.0653, "step": 17791 }, { "epoch": 2.882696046662346, "grad_norm": 0.9953954219818115, "learning_rate": 2.002136555852352e-08, "loss": 0.0634, "step": 17792 }, { "epoch": 2.8828580686973426, "grad_norm": 0.7926887273788452, "learning_rate": 1.996616980062427e-08, "loss": 0.0497, "step": 17793 }, { "epoch": 2.8830200907323396, "grad_norm": 1.0522444248199463, "learning_rate": 1.9911049925758765e-08, "loss": 0.0647, "step": 17794 }, { "epoch": 2.8831821127673365, "grad_norm": 0.9192836284637451, "learning_rate": 1.9856005935613708e-08, "loss": 0.0607, "step": 17795 }, { "epoch": 2.883344134802333, "grad_norm": 0.9567740559577942, "learning_rate": 1.9801037831872482e-08, "loss": 0.0569, "step": 17796 }, { "epoch": 2.88350615683733, "grad_norm": 0.8763161897659302, "learning_rate": 1.9746145616217905e-08, "loss": 0.0535, "step": 17797 }, { "epoch": 2.883668178872327, "grad_norm": 0.9974742531776428, "learning_rate": 1.9691329290329185e-08, "loss": 0.0576, "step": 17798 }, { "epoch": 2.8838302009073233, "grad_norm": 0.915104866027832, "learning_rate": 1.9636588855883598e-08, "loss": 0.062, "step": 17799 }, { "epoch": 2.8839922229423203, "grad_norm": 0.8486490249633789, "learning_rate": 1.958192431455591e-08, "loss": 0.059, "step": 17800 }, { "epoch": 2.8841542449773168, "grad_norm": 0.944681704044342, "learning_rate": 1.9527335668018954e-08, "loss": 0.0648, "step": 17801 }, { "epoch": 2.8843162670123137, "grad_norm": 0.9253272414207458, "learning_rate": 1.9472822917942778e-08, "loss": 0.0609, "step": 17802 }, { "epoch": 2.88447828904731, "grad_norm": 0.9688064455986023, "learning_rate": 1.9418386065995222e-08, "loss": 0.0623, "step": 17803 }, { "epoch": 2.884640311082307, "grad_norm": 1.0113117694854736, "learning_rate": 1.9364025113842444e-08, "loss": 0.0597, "step": 17804 }, { "epoch": 2.884802333117304, "grad_norm": 0.8749908804893494, "learning_rate": 1.9309740063147566e-08, "loss": 0.0587, "step": 17805 }, { "epoch": 2.8849643551523005, "grad_norm": 1.0198384523391724, "learning_rate": 1.9255530915571197e-08, "loss": 0.0611, "step": 17806 }, { "epoch": 2.8851263771872975, "grad_norm": 0.9575091600418091, "learning_rate": 1.920139767277257e-08, "loss": 0.061, "step": 17807 }, { "epoch": 2.8852883992222944, "grad_norm": 0.9491891264915466, "learning_rate": 1.9147340336407584e-08, "loss": 0.0618, "step": 17808 }, { "epoch": 2.885450421257291, "grad_norm": 0.9675207734107971, "learning_rate": 1.9093358908130743e-08, "loss": 0.0675, "step": 17809 }, { "epoch": 2.885612443292288, "grad_norm": 0.8479520082473755, "learning_rate": 1.9039453389592954e-08, "loss": 0.0612, "step": 17810 }, { "epoch": 2.8857744653272848, "grad_norm": 1.0039427280426025, "learning_rate": 1.8985623782444284e-08, "loss": 0.064, "step": 17811 }, { "epoch": 2.8859364873622813, "grad_norm": 0.9611213207244873, "learning_rate": 1.893187008833175e-08, "loss": 0.0603, "step": 17812 }, { "epoch": 2.886098509397278, "grad_norm": 0.8589776158332825, "learning_rate": 1.8878192308899867e-08, "loss": 0.0586, "step": 17813 }, { "epoch": 2.8862605314322747, "grad_norm": 0.8623048067092896, "learning_rate": 1.8824590445790935e-08, "loss": 0.0626, "step": 17814 }, { "epoch": 2.8864225534672716, "grad_norm": 0.9948340654373169, "learning_rate": 1.877106450064531e-08, "loss": 0.0649, "step": 17815 }, { "epoch": 2.886584575502268, "grad_norm": 0.9719632863998413, "learning_rate": 1.871761447510084e-08, "loss": 0.0546, "step": 17816 }, { "epoch": 2.886746597537265, "grad_norm": 0.8713729977607727, "learning_rate": 1.866424037079234e-08, "loss": 0.0602, "step": 17817 }, { "epoch": 2.886908619572262, "grad_norm": 0.8655098676681519, "learning_rate": 1.8610942189353777e-08, "loss": 0.0556, "step": 17818 }, { "epoch": 2.8870706416072585, "grad_norm": 0.830318808555603, "learning_rate": 1.855771993241523e-08, "loss": 0.0535, "step": 17819 }, { "epoch": 2.8872326636422554, "grad_norm": 0.8426806926727295, "learning_rate": 1.850457360160568e-08, "loss": 0.0574, "step": 17820 }, { "epoch": 2.8873946856772523, "grad_norm": 0.8310089111328125, "learning_rate": 1.8451503198551047e-08, "loss": 0.0546, "step": 17821 }, { "epoch": 2.887556707712249, "grad_norm": 0.8731127381324768, "learning_rate": 1.839850872487503e-08, "loss": 0.0574, "step": 17822 }, { "epoch": 2.8877187297472457, "grad_norm": 0.9887226819992065, "learning_rate": 1.834559018219939e-08, "loss": 0.0567, "step": 17823 }, { "epoch": 2.8878807517822422, "grad_norm": 0.9463251829147339, "learning_rate": 1.829274757214339e-08, "loss": 0.0628, "step": 17824 }, { "epoch": 2.888042773817239, "grad_norm": 0.8988006114959717, "learning_rate": 1.8239980896323505e-08, "loss": 0.0581, "step": 17825 }, { "epoch": 2.8882047958522357, "grad_norm": 0.9556702375411987, "learning_rate": 1.8187290156354565e-08, "loss": 0.0587, "step": 17826 }, { "epoch": 2.8883668178872326, "grad_norm": 0.8816114068031311, "learning_rate": 1.8134675353848608e-08, "loss": 0.0593, "step": 17827 }, { "epoch": 2.8885288399222295, "grad_norm": 0.856730043888092, "learning_rate": 1.808213649041546e-08, "loss": 0.0554, "step": 17828 }, { "epoch": 2.888690861957226, "grad_norm": 0.8481936454772949, "learning_rate": 1.8029673567662997e-08, "loss": 0.0543, "step": 17829 }, { "epoch": 2.888852883992223, "grad_norm": 1.0299345254898071, "learning_rate": 1.7977286587196053e-08, "loss": 0.0651, "step": 17830 }, { "epoch": 2.88901490602722, "grad_norm": 0.9097673296928406, "learning_rate": 1.792497555061806e-08, "loss": 0.0626, "step": 17831 }, { "epoch": 2.8891769280622164, "grad_norm": 0.9130776524543762, "learning_rate": 1.7872740459529135e-08, "loss": 0.0627, "step": 17832 }, { "epoch": 2.8893389500972133, "grad_norm": 0.9484906792640686, "learning_rate": 1.7820581315527717e-08, "loss": 0.0658, "step": 17833 }, { "epoch": 2.8895009721322102, "grad_norm": 0.8442859649658203, "learning_rate": 1.7768498120209755e-08, "loss": 0.0559, "step": 17834 }, { "epoch": 2.8896629941672067, "grad_norm": 0.9843007326126099, "learning_rate": 1.771649087516897e-08, "loss": 0.0648, "step": 17835 }, { "epoch": 2.8898250162022032, "grad_norm": 0.8426430225372314, "learning_rate": 1.766455958199631e-08, "loss": 0.0564, "step": 17836 }, { "epoch": 2.8899870382372, "grad_norm": 0.8461860418319702, "learning_rate": 1.7612704242281342e-08, "loss": 0.0614, "step": 17837 }, { "epoch": 2.890149060272197, "grad_norm": 1.05631422996521, "learning_rate": 1.7560924857610016e-08, "loss": 0.064, "step": 17838 }, { "epoch": 2.8903110823071936, "grad_norm": 0.8737570643424988, "learning_rate": 1.750922142956718e-08, "loss": 0.0607, "step": 17839 }, { "epoch": 2.8904731043421905, "grad_norm": 0.8831677436828613, "learning_rate": 1.745759395973462e-08, "loss": 0.0525, "step": 17840 }, { "epoch": 2.8906351263771874, "grad_norm": 0.9918739199638367, "learning_rate": 1.7406042449691907e-08, "loss": 0.0665, "step": 17841 }, { "epoch": 2.890797148412184, "grad_norm": 0.8606711030006409, "learning_rate": 1.7354566901016944e-08, "loss": 0.0596, "step": 17842 }, { "epoch": 2.890959170447181, "grad_norm": 0.8856733441352844, "learning_rate": 1.730316731528403e-08, "loss": 0.0629, "step": 17843 }, { "epoch": 2.891121192482178, "grad_norm": 1.0403659343719482, "learning_rate": 1.7251843694066074e-08, "loss": 0.0601, "step": 17844 }, { "epoch": 2.8912832145171743, "grad_norm": 0.8173930048942566, "learning_rate": 1.7200596038934038e-08, "loss": 0.0585, "step": 17845 }, { "epoch": 2.8914452365521712, "grad_norm": 0.8533201813697815, "learning_rate": 1.7149424351455003e-08, "loss": 0.0593, "step": 17846 }, { "epoch": 2.8916072585871677, "grad_norm": 0.941921591758728, "learning_rate": 1.7098328633195493e-08, "loss": 0.0658, "step": 17847 }, { "epoch": 2.8917692806221647, "grad_norm": 1.0077810287475586, "learning_rate": 1.7047308885718427e-08, "loss": 0.0636, "step": 17848 }, { "epoch": 2.891931302657161, "grad_norm": 1.1122187376022339, "learning_rate": 1.6996365110585332e-08, "loss": 0.0593, "step": 17849 }, { "epoch": 2.892093324692158, "grad_norm": 1.093993902206421, "learning_rate": 1.694549730935441e-08, "loss": 0.073, "step": 17850 }, { "epoch": 2.892255346727155, "grad_norm": 0.9843876957893372, "learning_rate": 1.6894705483582464e-08, "loss": 0.0657, "step": 17851 }, { "epoch": 2.8924173687621515, "grad_norm": 0.9816562533378601, "learning_rate": 1.684398963482381e-08, "loss": 0.0609, "step": 17852 }, { "epoch": 2.8925793907971484, "grad_norm": 1.04456627368927, "learning_rate": 1.6793349764629707e-08, "loss": 0.0619, "step": 17853 }, { "epoch": 2.8927414128321454, "grad_norm": 0.8714144825935364, "learning_rate": 1.674278587454975e-08, "loss": 0.0546, "step": 17854 }, { "epoch": 2.892903434867142, "grad_norm": 0.9265003204345703, "learning_rate": 1.669229796613131e-08, "loss": 0.0642, "step": 17855 }, { "epoch": 2.893065456902139, "grad_norm": 0.916529655456543, "learning_rate": 1.6641886040919263e-08, "loss": 0.0613, "step": 17856 }, { "epoch": 2.8932274789371357, "grad_norm": 0.8931515216827393, "learning_rate": 1.659155010045571e-08, "loss": 0.0584, "step": 17857 }, { "epoch": 2.893389500972132, "grad_norm": 0.9384124279022217, "learning_rate": 1.654129014628081e-08, "loss": 0.0612, "step": 17858 }, { "epoch": 2.8935515230071287, "grad_norm": 1.1038861274719238, "learning_rate": 1.649110617993277e-08, "loss": 0.0643, "step": 17859 }, { "epoch": 2.8937135450421256, "grad_norm": 0.8668294548988342, "learning_rate": 1.6440998202947034e-08, "loss": 0.0565, "step": 17860 }, { "epoch": 2.8938755670771226, "grad_norm": 0.8724477291107178, "learning_rate": 1.639096621685654e-08, "loss": 0.0577, "step": 17861 }, { "epoch": 2.894037589112119, "grad_norm": 0.786655068397522, "learning_rate": 1.634101022319229e-08, "loss": 0.0581, "step": 17862 }, { "epoch": 2.894199611147116, "grad_norm": 0.9810567498207092, "learning_rate": 1.6291130223482498e-08, "loss": 0.0551, "step": 17863 }, { "epoch": 2.894361633182113, "grad_norm": 1.0155911445617676, "learning_rate": 1.6241326219254006e-08, "loss": 0.0584, "step": 17864 }, { "epoch": 2.8945236552171094, "grad_norm": 1.0140700340270996, "learning_rate": 1.6191598212030314e-08, "loss": 0.0612, "step": 17865 }, { "epoch": 2.8946856772521063, "grad_norm": 0.9874489903450012, "learning_rate": 1.6141946203332703e-08, "loss": 0.0627, "step": 17866 }, { "epoch": 2.8948476992871033, "grad_norm": 0.938338041305542, "learning_rate": 1.609237019468107e-08, "loss": 0.0587, "step": 17867 }, { "epoch": 2.8950097213220998, "grad_norm": 0.9551253318786621, "learning_rate": 1.6042870187591985e-08, "loss": 0.066, "step": 17868 }, { "epoch": 2.8951717433570967, "grad_norm": 1.030510663986206, "learning_rate": 1.5993446183579786e-08, "loss": 0.0592, "step": 17869 }, { "epoch": 2.895333765392093, "grad_norm": 0.9057535529136658, "learning_rate": 1.5944098184156876e-08, "loss": 0.0584, "step": 17870 }, { "epoch": 2.89549578742709, "grad_norm": 0.8612263798713684, "learning_rate": 1.5894826190833712e-08, "loss": 0.0557, "step": 17871 }, { "epoch": 2.8956578094620866, "grad_norm": 0.784995973110199, "learning_rate": 1.5845630205117147e-08, "loss": 0.0542, "step": 17872 }, { "epoch": 2.8958198314970836, "grad_norm": 0.9127455353736877, "learning_rate": 1.579651022851264e-08, "loss": 0.0654, "step": 17873 }, { "epoch": 2.8959818535320805, "grad_norm": 0.9822579026222229, "learning_rate": 1.5747466262523438e-08, "loss": 0.0597, "step": 17874 }, { "epoch": 2.896143875567077, "grad_norm": 0.833354651927948, "learning_rate": 1.569849830865e-08, "loss": 0.0518, "step": 17875 }, { "epoch": 2.896305897602074, "grad_norm": 0.9307559132575989, "learning_rate": 1.5649606368390578e-08, "loss": 0.0571, "step": 17876 }, { "epoch": 2.896467919637071, "grad_norm": 0.8814070820808411, "learning_rate": 1.56007904432412e-08, "loss": 0.0577, "step": 17877 }, { "epoch": 2.8966299416720673, "grad_norm": 0.9398168325424194, "learning_rate": 1.5552050534695383e-08, "loss": 0.0579, "step": 17878 }, { "epoch": 2.8967919637070643, "grad_norm": 0.8523743152618408, "learning_rate": 1.5503386644244724e-08, "loss": 0.0567, "step": 17879 }, { "epoch": 2.8969539857420608, "grad_norm": 0.9002468585968018, "learning_rate": 1.5454798773378023e-08, "loss": 0.0628, "step": 17880 }, { "epoch": 2.8971160077770577, "grad_norm": 1.3708131313323975, "learning_rate": 1.5406286923582148e-08, "loss": 0.0594, "step": 17881 }, { "epoch": 2.897278029812054, "grad_norm": 0.9118489027023315, "learning_rate": 1.5357851096340915e-08, "loss": 0.0566, "step": 17882 }, { "epoch": 2.897440051847051, "grad_norm": 0.9221370816230774, "learning_rate": 1.5309491293137026e-08, "loss": 0.0616, "step": 17883 }, { "epoch": 2.897602073882048, "grad_norm": 0.9659370183944702, "learning_rate": 1.526120751544985e-08, "loss": 0.0584, "step": 17884 }, { "epoch": 2.8977640959170445, "grad_norm": 1.0934197902679443, "learning_rate": 1.5212999764756543e-08, "loss": 0.0669, "step": 17885 }, { "epoch": 2.8979261179520415, "grad_norm": 0.854312539100647, "learning_rate": 1.5164868042532864e-08, "loss": 0.0521, "step": 17886 }, { "epoch": 2.8980881399870384, "grad_norm": 1.0536237955093384, "learning_rate": 1.5116812350250422e-08, "loss": 0.0606, "step": 17887 }, { "epoch": 2.898250162022035, "grad_norm": 0.9276726245880127, "learning_rate": 1.506883268938053e-08, "loss": 0.0588, "step": 17888 }, { "epoch": 2.898412184057032, "grad_norm": 0.8733401894569397, "learning_rate": 1.5020929061391188e-08, "loss": 0.0603, "step": 17889 }, { "epoch": 2.8985742060920288, "grad_norm": 0.8288245797157288, "learning_rate": 1.4973101467747608e-08, "loss": 0.0547, "step": 17890 }, { "epoch": 2.8987362281270252, "grad_norm": 0.9966906905174255, "learning_rate": 1.4925349909913346e-08, "loss": 0.0615, "step": 17891 }, { "epoch": 2.898898250162022, "grad_norm": 0.9769363403320312, "learning_rate": 1.4877674389349728e-08, "loss": 0.0607, "step": 17892 }, { "epoch": 2.8990602721970187, "grad_norm": 0.8618332743644714, "learning_rate": 1.4830074907515313e-08, "loss": 0.0596, "step": 17893 }, { "epoch": 2.8992222942320156, "grad_norm": 0.819103479385376, "learning_rate": 1.4782551465866713e-08, "loss": 0.0527, "step": 17894 }, { "epoch": 2.899384316267012, "grad_norm": 0.8623654246330261, "learning_rate": 1.4735104065858042e-08, "loss": 0.0623, "step": 17895 }, { "epoch": 2.899546338302009, "grad_norm": 0.8684125542640686, "learning_rate": 1.4687732708940916e-08, "loss": 0.0539, "step": 17896 }, { "epoch": 2.899708360337006, "grad_norm": 0.8661196827888489, "learning_rate": 1.4640437396564733e-08, "loss": 0.056, "step": 17897 }, { "epoch": 2.8998703823720025, "grad_norm": 0.9082359671592712, "learning_rate": 1.4593218130176668e-08, "loss": 0.0535, "step": 17898 }, { "epoch": 2.9000324044069994, "grad_norm": 0.8829771876335144, "learning_rate": 1.4546074911221675e-08, "loss": 0.0582, "step": 17899 }, { "epoch": 2.9001944264419963, "grad_norm": 1.0028576850891113, "learning_rate": 1.4499007741141934e-08, "loss": 0.063, "step": 17900 }, { "epoch": 2.900356448476993, "grad_norm": 0.8748372793197632, "learning_rate": 1.4452016621377961e-08, "loss": 0.0625, "step": 17901 }, { "epoch": 2.9005184705119897, "grad_norm": 0.986389696598053, "learning_rate": 1.4405101553367218e-08, "loss": 0.0616, "step": 17902 }, { "epoch": 2.9006804925469862, "grad_norm": 1.0490912199020386, "learning_rate": 1.4358262538545498e-08, "loss": 0.058, "step": 17903 }, { "epoch": 2.900842514581983, "grad_norm": 0.8131743669509888, "learning_rate": 1.4311499578345821e-08, "loss": 0.0559, "step": 17904 }, { "epoch": 2.9010045366169797, "grad_norm": 0.8198305368423462, "learning_rate": 1.4264812674198714e-08, "loss": 0.0563, "step": 17905 }, { "epoch": 2.9011665586519766, "grad_norm": 0.8010324239730835, "learning_rate": 1.421820182753303e-08, "loss": 0.0547, "step": 17906 }, { "epoch": 2.9013285806869735, "grad_norm": 1.2426363229751587, "learning_rate": 1.4171667039775128e-08, "loss": 0.0607, "step": 17907 }, { "epoch": 2.90149060272197, "grad_norm": 0.8623930811882019, "learning_rate": 1.4125208312348593e-08, "loss": 0.0633, "step": 17908 }, { "epoch": 2.901652624756967, "grad_norm": 1.1092482805252075, "learning_rate": 1.4078825646675065e-08, "loss": 0.069, "step": 17909 }, { "epoch": 2.901814646791964, "grad_norm": 0.8913895487785339, "learning_rate": 1.403251904417341e-08, "loss": 0.0588, "step": 17910 }, { "epoch": 2.9019766688269604, "grad_norm": 1.0606086254119873, "learning_rate": 1.3986288506260825e-08, "loss": 0.0616, "step": 17911 }, { "epoch": 2.9021386908619573, "grad_norm": 0.8608266115188599, "learning_rate": 1.3940134034351738e-08, "loss": 0.0547, "step": 17912 }, { "epoch": 2.9023007128969542, "grad_norm": 1.2168540954589844, "learning_rate": 1.3894055629858627e-08, "loss": 0.072, "step": 17913 }, { "epoch": 2.9024627349319507, "grad_norm": 0.862879753112793, "learning_rate": 1.3848053294190922e-08, "loss": 0.054, "step": 17914 }, { "epoch": 2.9026247569669477, "grad_norm": 0.9422655701637268, "learning_rate": 1.3802127028756662e-08, "loss": 0.0596, "step": 17915 }, { "epoch": 2.902786779001944, "grad_norm": 0.8769212961196899, "learning_rate": 1.3756276834960558e-08, "loss": 0.0569, "step": 17916 }, { "epoch": 2.902948801036941, "grad_norm": 0.8366473913192749, "learning_rate": 1.3710502714205654e-08, "loss": 0.0578, "step": 17917 }, { "epoch": 2.9031108230719376, "grad_norm": 0.9338627457618713, "learning_rate": 1.366480466789305e-08, "loss": 0.0608, "step": 17918 }, { "epoch": 2.9032728451069345, "grad_norm": 0.9875946640968323, "learning_rate": 1.3619182697420518e-08, "loss": 0.0588, "step": 17919 }, { "epoch": 2.9034348671419314, "grad_norm": 0.8782922029495239, "learning_rate": 1.3573636804183887e-08, "loss": 0.0565, "step": 17920 }, { "epoch": 2.903596889176928, "grad_norm": 0.8396292924880981, "learning_rate": 1.352816698957704e-08, "loss": 0.0578, "step": 17921 }, { "epoch": 2.903758911211925, "grad_norm": 1.0632270574569702, "learning_rate": 1.3482773254991365e-08, "loss": 0.0586, "step": 17922 }, { "epoch": 2.903920933246922, "grad_norm": 0.9017321467399597, "learning_rate": 1.3437455601815198e-08, "loss": 0.0591, "step": 17923 }, { "epoch": 2.9040829552819183, "grad_norm": 0.8652000427246094, "learning_rate": 1.3392214031435757e-08, "loss": 0.0583, "step": 17924 }, { "epoch": 2.904244977316915, "grad_norm": 0.9219072461128235, "learning_rate": 1.334704854523694e-08, "loss": 0.0661, "step": 17925 }, { "epoch": 2.9044069993519117, "grad_norm": 1.0006201267242432, "learning_rate": 1.3301959144600974e-08, "loss": 0.0635, "step": 17926 }, { "epoch": 2.9045690213869086, "grad_norm": 0.8481730222702026, "learning_rate": 1.325694583090731e-08, "loss": 0.0576, "step": 17927 }, { "epoch": 2.904731043421905, "grad_norm": 0.8712880611419678, "learning_rate": 1.3212008605533177e-08, "loss": 0.0556, "step": 17928 }, { "epoch": 2.904893065456902, "grad_norm": 0.8364845514297485, "learning_rate": 1.316714746985387e-08, "loss": 0.058, "step": 17929 }, { "epoch": 2.905055087491899, "grad_norm": 0.8850131630897522, "learning_rate": 1.312236242524162e-08, "loss": 0.0563, "step": 17930 }, { "epoch": 2.9052171095268955, "grad_norm": 0.8984468579292297, "learning_rate": 1.3077653473067276e-08, "loss": 0.0587, "step": 17931 }, { "epoch": 2.9053791315618924, "grad_norm": 0.9292992949485779, "learning_rate": 1.3033020614698078e-08, "loss": 0.0633, "step": 17932 }, { "epoch": 2.9055411535968894, "grad_norm": 1.0178935527801514, "learning_rate": 1.298846385150071e-08, "loss": 0.0657, "step": 17933 }, { "epoch": 2.905703175631886, "grad_norm": 0.9548495411872864, "learning_rate": 1.2943983184837417e-08, "loss": 0.0589, "step": 17934 }, { "epoch": 2.905865197666883, "grad_norm": 0.9326539039611816, "learning_rate": 1.2899578616069607e-08, "loss": 0.0588, "step": 17935 }, { "epoch": 2.9060272197018797, "grad_norm": 0.9059630036354065, "learning_rate": 1.2855250146556197e-08, "loss": 0.0605, "step": 17936 }, { "epoch": 2.906189241736876, "grad_norm": 0.8523368835449219, "learning_rate": 1.28109977776536e-08, "loss": 0.051, "step": 17937 }, { "epoch": 2.906351263771873, "grad_norm": 0.7979968786239624, "learning_rate": 1.2766821510715177e-08, "loss": 0.0501, "step": 17938 }, { "epoch": 2.9065132858068696, "grad_norm": 0.8871493339538574, "learning_rate": 1.272272134709318e-08, "loss": 0.0613, "step": 17939 }, { "epoch": 2.9066753078418666, "grad_norm": 0.8767561316490173, "learning_rate": 1.2678697288136809e-08, "loss": 0.0619, "step": 17940 }, { "epoch": 2.906837329876863, "grad_norm": 0.8473535180091858, "learning_rate": 1.2634749335193319e-08, "loss": 0.0568, "step": 17941 }, { "epoch": 2.90699935191186, "grad_norm": 0.9051182866096497, "learning_rate": 1.2590877489606911e-08, "loss": 0.0559, "step": 17942 }, { "epoch": 2.907161373946857, "grad_norm": 0.928796112537384, "learning_rate": 1.2547081752720402e-08, "loss": 0.0609, "step": 17943 }, { "epoch": 2.9073233959818534, "grad_norm": 0.9684448838233948, "learning_rate": 1.2503362125873552e-08, "loss": 0.0574, "step": 17944 }, { "epoch": 2.9074854180168503, "grad_norm": 1.0342750549316406, "learning_rate": 1.2459718610404182e-08, "loss": 0.0629, "step": 17945 }, { "epoch": 2.9076474400518473, "grad_norm": 0.8830204606056213, "learning_rate": 1.241615120764761e-08, "loss": 0.0574, "step": 17946 }, { "epoch": 2.9078094620868438, "grad_norm": 0.8388495445251465, "learning_rate": 1.2372659918937213e-08, "loss": 0.0511, "step": 17947 }, { "epoch": 2.9079714841218407, "grad_norm": 1.0486900806427002, "learning_rate": 1.2329244745603596e-08, "loss": 0.0685, "step": 17948 }, { "epoch": 2.908133506156837, "grad_norm": 0.8908421993255615, "learning_rate": 1.2285905688974587e-08, "loss": 0.0535, "step": 17949 }, { "epoch": 2.908295528191834, "grad_norm": 0.8851662278175354, "learning_rate": 1.2242642750376899e-08, "loss": 0.0572, "step": 17950 }, { "epoch": 2.9084575502268306, "grad_norm": 0.9874460697174072, "learning_rate": 1.2199455931134197e-08, "loss": 0.0566, "step": 17951 }, { "epoch": 2.9086195722618275, "grad_norm": 1.0689846277236938, "learning_rate": 1.2156345232567923e-08, "loss": 0.0702, "step": 17952 }, { "epoch": 2.9087815942968245, "grad_norm": 1.057041049003601, "learning_rate": 1.2113310655996746e-08, "loss": 0.056, "step": 17953 }, { "epoch": 2.908943616331821, "grad_norm": 0.8022787570953369, "learning_rate": 1.2070352202737668e-08, "loss": 0.0512, "step": 17954 }, { "epoch": 2.909105638366818, "grad_norm": 0.9817745685577393, "learning_rate": 1.202746987410519e-08, "loss": 0.0612, "step": 17955 }, { "epoch": 2.909267660401815, "grad_norm": 1.1243666410446167, "learning_rate": 1.19846636714116e-08, "loss": 0.0682, "step": 17956 }, { "epoch": 2.9094296824368113, "grad_norm": 0.9882938861846924, "learning_rate": 1.1941933595966127e-08, "loss": 0.0599, "step": 17957 }, { "epoch": 2.9095917044718083, "grad_norm": 0.9126664996147156, "learning_rate": 1.1899279649076612e-08, "loss": 0.0604, "step": 17958 }, { "epoch": 2.909753726506805, "grad_norm": 0.8586945533752441, "learning_rate": 1.1856701832047845e-08, "loss": 0.0615, "step": 17959 }, { "epoch": 2.9099157485418017, "grad_norm": 0.8310577273368835, "learning_rate": 1.181420014618323e-08, "loss": 0.0592, "step": 17960 }, { "epoch": 2.910077770576798, "grad_norm": 0.9139492511749268, "learning_rate": 1.1771774592782558e-08, "loss": 0.0584, "step": 17961 }, { "epoch": 2.910239792611795, "grad_norm": 1.0066767930984497, "learning_rate": 1.172942517314396e-08, "loss": 0.0588, "step": 17962 }, { "epoch": 2.910401814646792, "grad_norm": 0.7591646909713745, "learning_rate": 1.1687151888563897e-08, "loss": 0.0507, "step": 17963 }, { "epoch": 2.9105638366817885, "grad_norm": 0.8789999485015869, "learning_rate": 1.1644954740334946e-08, "loss": 0.0631, "step": 17964 }, { "epoch": 2.9107258587167855, "grad_norm": 0.9494521617889404, "learning_rate": 1.160283372974913e-08, "loss": 0.0592, "step": 17965 }, { "epoch": 2.9108878807517824, "grad_norm": 0.9379144310951233, "learning_rate": 1.1560788858094584e-08, "loss": 0.0607, "step": 17966 }, { "epoch": 2.911049902786779, "grad_norm": 1.2142386436462402, "learning_rate": 1.1518820126658058e-08, "loss": 0.0623, "step": 17967 }, { "epoch": 2.911211924821776, "grad_norm": 0.8130635619163513, "learning_rate": 1.1476927536723248e-08, "loss": 0.0544, "step": 17968 }, { "epoch": 2.9113739468567728, "grad_norm": 0.9625205397605896, "learning_rate": 1.1435111089572737e-08, "loss": 0.0612, "step": 17969 }, { "epoch": 2.9115359688917692, "grad_norm": 1.122702956199646, "learning_rate": 1.1393370786485502e-08, "loss": 0.0628, "step": 17970 }, { "epoch": 2.911697990926766, "grad_norm": 0.9404149651527405, "learning_rate": 1.1351706628738857e-08, "loss": 0.0646, "step": 17971 }, { "epoch": 2.9118600129617627, "grad_norm": 0.8580222725868225, "learning_rate": 1.1310118617607613e-08, "loss": 0.0536, "step": 17972 }, { "epoch": 2.9120220349967596, "grad_norm": 0.8675228953361511, "learning_rate": 1.1268606754364087e-08, "loss": 0.056, "step": 17973 }, { "epoch": 2.912184057031756, "grad_norm": 0.8910603523254395, "learning_rate": 1.122717104027865e-08, "loss": 0.0547, "step": 17974 }, { "epoch": 2.912346079066753, "grad_norm": 0.8188005685806274, "learning_rate": 1.1185811476619179e-08, "loss": 0.0561, "step": 17975 }, { "epoch": 2.91250810110175, "grad_norm": 0.8997900485992432, "learning_rate": 1.1144528064650772e-08, "loss": 0.0615, "step": 17976 }, { "epoch": 2.9126701231367464, "grad_norm": 0.9417411684989929, "learning_rate": 1.1103320805637141e-08, "loss": 0.0616, "step": 17977 }, { "epoch": 2.9128321451717434, "grad_norm": 0.7963204383850098, "learning_rate": 1.1062189700838944e-08, "loss": 0.0538, "step": 17978 }, { "epoch": 2.9129941672067403, "grad_norm": 0.8257819414138794, "learning_rate": 1.102113475151434e-08, "loss": 0.0554, "step": 17979 }, { "epoch": 2.913156189241737, "grad_norm": 1.1627333164215088, "learning_rate": 1.0980155958920103e-08, "loss": 0.0594, "step": 17980 }, { "epoch": 2.9133182112767337, "grad_norm": 0.9248695373535156, "learning_rate": 1.0939253324309673e-08, "loss": 0.0618, "step": 17981 }, { "epoch": 2.9134802333117307, "grad_norm": 0.9199352264404297, "learning_rate": 1.089842684893455e-08, "loss": 0.0585, "step": 17982 }, { "epoch": 2.913642255346727, "grad_norm": 0.9159268140792847, "learning_rate": 1.0857676534044014e-08, "loss": 0.0591, "step": 17983 }, { "epoch": 2.9138042773817237, "grad_norm": 1.0434057712554932, "learning_rate": 1.0817002380885123e-08, "loss": 0.0663, "step": 17984 }, { "epoch": 2.9139662994167206, "grad_norm": 0.8765219449996948, "learning_rate": 1.0776404390702434e-08, "loss": 0.0646, "step": 17985 }, { "epoch": 2.9141283214517175, "grad_norm": 0.9191032648086548, "learning_rate": 1.0735882564737732e-08, "loss": 0.0617, "step": 17986 }, { "epoch": 2.914290343486714, "grad_norm": 1.1906532049179077, "learning_rate": 1.0695436904231137e-08, "loss": 0.0587, "step": 17987 }, { "epoch": 2.914452365521711, "grad_norm": 0.9076985716819763, "learning_rate": 1.0655067410419994e-08, "loss": 0.0602, "step": 17988 }, { "epoch": 2.914614387556708, "grad_norm": 0.9002047777175903, "learning_rate": 1.061477408453998e-08, "loss": 0.0568, "step": 17989 }, { "epoch": 2.9147764095917044, "grad_norm": 0.9402999877929688, "learning_rate": 1.057455692782372e-08, "loss": 0.062, "step": 17990 }, { "epoch": 2.9149384316267013, "grad_norm": 0.9988169074058533, "learning_rate": 1.0534415941501341e-08, "loss": 0.0629, "step": 17991 }, { "epoch": 2.9151004536616982, "grad_norm": 0.8821046352386475, "learning_rate": 1.049435112680186e-08, "loss": 0.0601, "step": 17992 }, { "epoch": 2.9152624756966947, "grad_norm": 0.8363478183746338, "learning_rate": 1.0454362484950409e-08, "loss": 0.0605, "step": 17993 }, { "epoch": 2.9154244977316917, "grad_norm": 0.9162735939025879, "learning_rate": 1.0414450017171007e-08, "loss": 0.0612, "step": 17994 }, { "epoch": 2.915586519766688, "grad_norm": 0.9638509750366211, "learning_rate": 1.03746137246849e-08, "loss": 0.0622, "step": 17995 }, { "epoch": 2.915748541801685, "grad_norm": 0.8784467577934265, "learning_rate": 1.0334853608710838e-08, "loss": 0.0626, "step": 17996 }, { "epoch": 2.9159105638366816, "grad_norm": 0.9395826458930969, "learning_rate": 1.0295169670465066e-08, "loss": 0.0605, "step": 17997 }, { "epoch": 2.9160725858716785, "grad_norm": 1.00931978225708, "learning_rate": 1.0255561911162449e-08, "loss": 0.0597, "step": 17998 }, { "epoch": 2.9162346079066754, "grad_norm": 0.8709773421287537, "learning_rate": 1.0216030332014515e-08, "loss": 0.0579, "step": 17999 }, { "epoch": 2.916396629941672, "grad_norm": 0.8078761100769043, "learning_rate": 1.0176574934230854e-08, "loss": 0.0583, "step": 18000 }, { "epoch": 2.916558651976669, "grad_norm": 0.8328400254249573, "learning_rate": 1.0137195719018556e-08, "loss": 0.0519, "step": 18001 }, { "epoch": 2.916720674011666, "grad_norm": 0.9479650259017944, "learning_rate": 1.0097892687583044e-08, "loss": 0.0587, "step": 18002 }, { "epoch": 2.9168826960466623, "grad_norm": 0.9351217150688171, "learning_rate": 1.0058665841126414e-08, "loss": 0.0608, "step": 18003 }, { "epoch": 2.917044718081659, "grad_norm": 0.7649050354957581, "learning_rate": 1.0019515180849094e-08, "loss": 0.0538, "step": 18004 }, { "epoch": 2.9172067401166557, "grad_norm": 0.9090438485145569, "learning_rate": 9.980440707948735e-09, "loss": 0.0559, "step": 18005 }, { "epoch": 2.9173687621516526, "grad_norm": 0.8982040286064148, "learning_rate": 9.941442423621606e-09, "loss": 0.056, "step": 18006 }, { "epoch": 2.917530784186649, "grad_norm": 0.9106990694999695, "learning_rate": 9.902520329060083e-09, "loss": 0.0535, "step": 18007 }, { "epoch": 2.917692806221646, "grad_norm": 1.0003997087478638, "learning_rate": 9.863674425455716e-09, "loss": 0.0647, "step": 18008 }, { "epoch": 2.917854828256643, "grad_norm": 1.027595043182373, "learning_rate": 9.824904713996996e-09, "loss": 0.0557, "step": 18009 }, { "epoch": 2.9180168502916395, "grad_norm": 0.9183197021484375, "learning_rate": 9.78621119586992e-09, "loss": 0.0558, "step": 18010 }, { "epoch": 2.9181788723266364, "grad_norm": 1.1512269973754883, "learning_rate": 9.747593872258543e-09, "loss": 0.0643, "step": 18011 }, { "epoch": 2.9183408943616334, "grad_norm": 0.859645664691925, "learning_rate": 9.709052744344694e-09, "loss": 0.0649, "step": 18012 }, { "epoch": 2.91850291639663, "grad_norm": 0.85866379737854, "learning_rate": 9.670587813307153e-09, "loss": 0.0537, "step": 18013 }, { "epoch": 2.9186649384316268, "grad_norm": 0.8549041152000427, "learning_rate": 9.63219908032359e-09, "loss": 0.0596, "step": 18014 }, { "epoch": 2.9188269604666237, "grad_norm": 0.8607486486434937, "learning_rate": 9.593886546567787e-09, "loss": 0.06, "step": 18015 }, { "epoch": 2.91898898250162, "grad_norm": 0.9240609407424927, "learning_rate": 9.55565021321242e-09, "loss": 0.0589, "step": 18016 }, { "epoch": 2.919151004536617, "grad_norm": 0.9318268895149231, "learning_rate": 9.51749008142766e-09, "loss": 0.0504, "step": 18017 }, { "epoch": 2.9193130265716136, "grad_norm": 0.9120360016822815, "learning_rate": 9.479406152380632e-09, "loss": 0.0582, "step": 18018 }, { "epoch": 2.9194750486066106, "grad_norm": 0.8662173748016357, "learning_rate": 9.441398427236792e-09, "loss": 0.0557, "step": 18019 }, { "epoch": 2.919637070641607, "grad_norm": 0.9860486388206482, "learning_rate": 9.403466907159375e-09, "loss": 0.0609, "step": 18020 }, { "epoch": 2.919799092676604, "grad_norm": 1.0033873319625854, "learning_rate": 9.365611593308565e-09, "loss": 0.0586, "step": 18021 }, { "epoch": 2.919961114711601, "grad_norm": 0.8788847327232361, "learning_rate": 9.327832486842603e-09, "loss": 0.0595, "step": 18022 }, { "epoch": 2.9201231367465974, "grad_norm": 1.0497198104858398, "learning_rate": 9.290129588918062e-09, "loss": 0.0669, "step": 18023 }, { "epoch": 2.9202851587815943, "grad_norm": 0.9634641408920288, "learning_rate": 9.25250290068791e-09, "loss": 0.0572, "step": 18024 }, { "epoch": 2.9204471808165913, "grad_norm": 0.8264623284339905, "learning_rate": 9.214952423303724e-09, "loss": 0.0575, "step": 18025 }, { "epoch": 2.9206092028515878, "grad_norm": 0.8819963335990906, "learning_rate": 9.17747815791431e-09, "loss": 0.0554, "step": 18026 }, { "epoch": 2.9207712248865847, "grad_norm": 0.8962211012840271, "learning_rate": 9.140080105666527e-09, "loss": 0.0608, "step": 18027 }, { "epoch": 2.920933246921581, "grad_norm": 1.065934181213379, "learning_rate": 9.102758267704736e-09, "loss": 0.0623, "step": 18028 }, { "epoch": 2.921095268956578, "grad_norm": 0.9114288687705994, "learning_rate": 9.065512645170249e-09, "loss": 0.0578, "step": 18029 }, { "epoch": 2.9212572909915746, "grad_norm": 1.0408483743667603, "learning_rate": 9.02834323920354e-09, "loss": 0.0679, "step": 18030 }, { "epoch": 2.9214193130265715, "grad_norm": 0.9269614815711975, "learning_rate": 8.991250050941203e-09, "loss": 0.0599, "step": 18031 }, { "epoch": 2.9215813350615685, "grad_norm": 0.850156843662262, "learning_rate": 8.954233081518438e-09, "loss": 0.0532, "step": 18032 }, { "epoch": 2.921743357096565, "grad_norm": 0.8286687135696411, "learning_rate": 8.917292332068228e-09, "loss": 0.0546, "step": 18033 }, { "epoch": 2.921905379131562, "grad_norm": 0.8290266394615173, "learning_rate": 8.880427803720226e-09, "loss": 0.0591, "step": 18034 }, { "epoch": 2.922067401166559, "grad_norm": 0.8320590853691101, "learning_rate": 8.843639497602973e-09, "loss": 0.0516, "step": 18035 }, { "epoch": 2.9222294232015553, "grad_norm": 0.9579795002937317, "learning_rate": 8.806927414841959e-09, "loss": 0.0585, "step": 18036 }, { "epoch": 2.9223914452365523, "grad_norm": 0.8604581356048584, "learning_rate": 8.770291556560174e-09, "loss": 0.0567, "step": 18037 }, { "epoch": 2.922553467271549, "grad_norm": 0.8759823441505432, "learning_rate": 8.73373192387894e-09, "loss": 0.0628, "step": 18038 }, { "epoch": 2.9227154893065457, "grad_norm": 0.9367085099220276, "learning_rate": 8.697248517916535e-09, "loss": 0.0609, "step": 18039 }, { "epoch": 2.9228775113415426, "grad_norm": 0.9392814040184021, "learning_rate": 8.660841339789561e-09, "loss": 0.068, "step": 18040 }, { "epoch": 2.923039533376539, "grad_norm": 0.8410159349441528, "learning_rate": 8.62451039061213e-09, "loss": 0.0539, "step": 18041 }, { "epoch": 2.923201555411536, "grad_norm": 0.8481424450874329, "learning_rate": 8.58825567149557e-09, "loss": 0.0567, "step": 18042 }, { "epoch": 2.9233635774465325, "grad_norm": 1.1205499172210693, "learning_rate": 8.55207718354928e-09, "loss": 0.0688, "step": 18043 }, { "epoch": 2.9235255994815295, "grad_norm": 0.819627046585083, "learning_rate": 8.515974927880144e-09, "loss": 0.055, "step": 18044 }, { "epoch": 2.9236876215165264, "grad_norm": 0.8424476981163025, "learning_rate": 8.479948905592839e-09, "loss": 0.0556, "step": 18045 }, { "epoch": 2.923849643551523, "grad_norm": 0.9404410719871521, "learning_rate": 8.443999117790091e-09, "loss": 0.0572, "step": 18046 }, { "epoch": 2.92401166558652, "grad_norm": 1.1736658811569214, "learning_rate": 8.4081255655713e-09, "loss": 0.0574, "step": 18047 }, { "epoch": 2.9241736876215167, "grad_norm": 1.16831374168396, "learning_rate": 8.3723282500342e-09, "loss": 0.0615, "step": 18048 }, { "epoch": 2.9243357096565132, "grad_norm": 0.8560138940811157, "learning_rate": 8.33660717227458e-09, "loss": 0.0581, "step": 18049 }, { "epoch": 2.92449773169151, "grad_norm": 0.9814584255218506, "learning_rate": 8.3009623333849e-09, "loss": 0.0648, "step": 18050 }, { "epoch": 2.9246597537265067, "grad_norm": 0.8727818131446838, "learning_rate": 8.265393734455674e-09, "loss": 0.0601, "step": 18051 }, { "epoch": 2.9248217757615036, "grad_norm": 1.0411512851715088, "learning_rate": 8.229901376575755e-09, "loss": 0.0601, "step": 18052 }, { "epoch": 2.9249837977965, "grad_norm": 1.0159389972686768, "learning_rate": 8.194485260830943e-09, "loss": 0.0582, "step": 18053 }, { "epoch": 2.925145819831497, "grad_norm": 0.8708806037902832, "learning_rate": 8.15914538830509e-09, "loss": 0.0551, "step": 18054 }, { "epoch": 2.925307841866494, "grad_norm": 0.8334715962409973, "learning_rate": 8.123881760078723e-09, "loss": 0.0575, "step": 18055 }, { "epoch": 2.9254698639014904, "grad_norm": 0.9106010794639587, "learning_rate": 8.088694377231532e-09, "loss": 0.0626, "step": 18056 }, { "epoch": 2.9256318859364874, "grad_norm": 0.9865289926528931, "learning_rate": 8.053583240840157e-09, "loss": 0.0614, "step": 18057 }, { "epoch": 2.9257939079714843, "grad_norm": 0.9036328196525574, "learning_rate": 8.018548351978738e-09, "loss": 0.0612, "step": 18058 }, { "epoch": 2.925955930006481, "grad_norm": 0.9329188466072083, "learning_rate": 7.98358971171892e-09, "loss": 0.0589, "step": 18059 }, { "epoch": 2.9261179520414777, "grad_norm": 1.0107616186141968, "learning_rate": 7.948707321130956e-09, "loss": 0.0607, "step": 18060 }, { "epoch": 2.9262799740764747, "grad_norm": 1.3307433128356934, "learning_rate": 7.913901181281768e-09, "loss": 0.0597, "step": 18061 }, { "epoch": 2.926441996111471, "grad_norm": 0.8141980171203613, "learning_rate": 7.879171293236621e-09, "loss": 0.0541, "step": 18062 }, { "epoch": 2.9266040181464676, "grad_norm": 0.894360363483429, "learning_rate": 7.844517658057993e-09, "loss": 0.0631, "step": 18063 }, { "epoch": 2.9267660401814646, "grad_norm": 0.9647737145423889, "learning_rate": 7.80994027680615e-09, "loss": 0.0555, "step": 18064 }, { "epoch": 2.9269280622164615, "grad_norm": 0.874281644821167, "learning_rate": 7.775439150539132e-09, "loss": 0.0584, "step": 18065 }, { "epoch": 2.927090084251458, "grad_norm": 0.9083302617073059, "learning_rate": 7.741014280312765e-09, "loss": 0.0563, "step": 18066 }, { "epoch": 2.927252106286455, "grad_norm": 0.9287460446357727, "learning_rate": 7.70666566718009e-09, "loss": 0.06, "step": 18067 }, { "epoch": 2.927414128321452, "grad_norm": 0.9412106275558472, "learning_rate": 7.672393312192218e-09, "loss": 0.0596, "step": 18068 }, { "epoch": 2.9275761503564484, "grad_norm": 0.7685844302177429, "learning_rate": 7.638197216397748e-09, "loss": 0.0508, "step": 18069 }, { "epoch": 2.9277381723914453, "grad_norm": 0.9476762413978577, "learning_rate": 7.604077380843067e-09, "loss": 0.0544, "step": 18070 }, { "epoch": 2.9279001944264422, "grad_norm": 0.9895046353340149, "learning_rate": 7.57003380657234e-09, "loss": 0.0626, "step": 18071 }, { "epoch": 2.9280622164614387, "grad_norm": 0.825796365737915, "learning_rate": 7.536066494626681e-09, "loss": 0.0536, "step": 18072 }, { "epoch": 2.9282242384964356, "grad_norm": 0.8940446972846985, "learning_rate": 7.502175446046089e-09, "loss": 0.0637, "step": 18073 }, { "epoch": 2.928386260531432, "grad_norm": 1.003281831741333, "learning_rate": 7.468360661866957e-09, "loss": 0.0616, "step": 18074 }, { "epoch": 2.928548282566429, "grad_norm": 0.901324987411499, "learning_rate": 7.434622143124015e-09, "loss": 0.0595, "step": 18075 }, { "epoch": 2.9287103046014256, "grad_norm": 0.998007595539093, "learning_rate": 7.400959890850046e-09, "loss": 0.0623, "step": 18076 }, { "epoch": 2.9288723266364225, "grad_norm": 0.9806587100028992, "learning_rate": 7.367373906074782e-09, "loss": 0.0575, "step": 18077 }, { "epoch": 2.9290343486714194, "grad_norm": 0.963106095790863, "learning_rate": 7.333864189825735e-09, "loss": 0.0635, "step": 18078 }, { "epoch": 2.929196370706416, "grad_norm": 0.9748607277870178, "learning_rate": 7.3004307431281954e-09, "loss": 0.0627, "step": 18079 }, { "epoch": 2.929358392741413, "grad_norm": 0.8219755291938782, "learning_rate": 7.267073567005234e-09, "loss": 0.0534, "step": 18080 }, { "epoch": 2.92952041477641, "grad_norm": 0.8908148407936096, "learning_rate": 7.233792662477701e-09, "loss": 0.0533, "step": 18081 }, { "epoch": 2.9296824368114063, "grad_norm": 0.8585811853408813, "learning_rate": 7.2005880305636714e-09, "loss": 0.0563, "step": 18082 }, { "epoch": 2.929844458846403, "grad_norm": 0.9972938895225525, "learning_rate": 7.167459672278998e-09, "loss": 0.0621, "step": 18083 }, { "epoch": 2.9300064808814, "grad_norm": 0.8972851634025574, "learning_rate": 7.134407588637871e-09, "loss": 0.0567, "step": 18084 }, { "epoch": 2.9301685029163966, "grad_norm": 0.903139591217041, "learning_rate": 7.1014317806508696e-09, "loss": 0.0603, "step": 18085 }, { "epoch": 2.930330524951393, "grad_norm": 0.8544906377792358, "learning_rate": 7.068532249327742e-09, "loss": 0.06, "step": 18086 }, { "epoch": 2.93049254698639, "grad_norm": 0.9023297429084778, "learning_rate": 7.035708995674628e-09, "loss": 0.0622, "step": 18087 }, { "epoch": 2.930654569021387, "grad_norm": 0.9524878859519958, "learning_rate": 7.002962020695725e-09, "loss": 0.0581, "step": 18088 }, { "epoch": 2.9308165910563835, "grad_norm": 1.0636718273162842, "learning_rate": 6.970291325393286e-09, "loss": 0.0624, "step": 18089 }, { "epoch": 2.9309786130913804, "grad_norm": 0.9945453405380249, "learning_rate": 6.937696910767067e-09, "loss": 0.0608, "step": 18090 }, { "epoch": 2.9311406351263773, "grad_norm": 0.8092207312583923, "learning_rate": 6.905178777814326e-09, "loss": 0.0559, "step": 18091 }, { "epoch": 2.931302657161374, "grad_norm": 1.0313624143600464, "learning_rate": 6.872736927529822e-09, "loss": 0.0619, "step": 18092 }, { "epoch": 2.9314646791963708, "grad_norm": 1.039193034172058, "learning_rate": 6.840371360906095e-09, "loss": 0.0611, "step": 18093 }, { "epoch": 2.9316267012313677, "grad_norm": 0.9258902668952942, "learning_rate": 6.8080820789340195e-09, "loss": 0.0572, "step": 18094 }, { "epoch": 2.931788723266364, "grad_norm": 0.9824669361114502, "learning_rate": 6.775869082601139e-09, "loss": 0.0573, "step": 18095 }, { "epoch": 2.931950745301361, "grad_norm": 0.8684791922569275, "learning_rate": 6.743732372893053e-09, "loss": 0.0595, "step": 18096 }, { "epoch": 2.9321127673363576, "grad_norm": 0.8853773474693298, "learning_rate": 6.711671950793419e-09, "loss": 0.0513, "step": 18097 }, { "epoch": 2.9322747893713546, "grad_norm": 0.944572389125824, "learning_rate": 6.679687817282843e-09, "loss": 0.0517, "step": 18098 }, { "epoch": 2.932436811406351, "grad_norm": 0.9606372117996216, "learning_rate": 6.647779973339985e-09, "loss": 0.0594, "step": 18099 }, { "epoch": 2.932598833441348, "grad_norm": 0.8097171187400818, "learning_rate": 6.615948419941565e-09, "loss": 0.0531, "step": 18100 }, { "epoch": 2.932760855476345, "grad_norm": 0.8059049844741821, "learning_rate": 6.584193158060969e-09, "loss": 0.0524, "step": 18101 }, { "epoch": 2.9329228775113414, "grad_norm": 0.8145474791526794, "learning_rate": 6.5525141886702005e-09, "loss": 0.0572, "step": 18102 }, { "epoch": 2.9330848995463383, "grad_norm": 1.0327907800674438, "learning_rate": 6.520911512738481e-09, "loss": 0.0654, "step": 18103 }, { "epoch": 2.9332469215813353, "grad_norm": 0.9246883392333984, "learning_rate": 6.489385131232817e-09, "loss": 0.0663, "step": 18104 }, { "epoch": 2.9334089436163318, "grad_norm": 0.9235041737556458, "learning_rate": 6.4579350451177135e-09, "loss": 0.0673, "step": 18105 }, { "epoch": 2.9335709656513287, "grad_norm": 0.8101209998130798, "learning_rate": 6.426561255355457e-09, "loss": 0.0568, "step": 18106 }, { "epoch": 2.933732987686325, "grad_norm": 0.8323474526405334, "learning_rate": 6.395263762906112e-09, "loss": 0.0514, "step": 18107 }, { "epoch": 2.933895009721322, "grad_norm": 0.8345054984092712, "learning_rate": 6.364042568727524e-09, "loss": 0.05, "step": 18108 }, { "epoch": 2.9340570317563186, "grad_norm": 0.903107225894928, "learning_rate": 6.332897673774485e-09, "loss": 0.0614, "step": 18109 }, { "epoch": 2.9342190537913155, "grad_norm": 1.5786057710647583, "learning_rate": 6.301829079000399e-09, "loss": 0.0695, "step": 18110 }, { "epoch": 2.9343810758263125, "grad_norm": 0.8032664060592651, "learning_rate": 6.270836785355616e-09, "loss": 0.05, "step": 18111 }, { "epoch": 2.934543097861309, "grad_norm": 1.0438019037246704, "learning_rate": 6.239920793788546e-09, "loss": 0.0631, "step": 18112 }, { "epoch": 2.934705119896306, "grad_norm": 0.9662144184112549, "learning_rate": 6.209081105245096e-09, "loss": 0.0647, "step": 18113 }, { "epoch": 2.934867141931303, "grad_norm": 1.402650237083435, "learning_rate": 6.178317720668958e-09, "loss": 0.0615, "step": 18114 }, { "epoch": 2.9350291639662993, "grad_norm": 0.9588105082511902, "learning_rate": 6.147630641001323e-09, "loss": 0.0585, "step": 18115 }, { "epoch": 2.9351911860012962, "grad_norm": 0.9412093162536621, "learning_rate": 6.117019867181162e-09, "loss": 0.0613, "step": 18116 }, { "epoch": 2.935353208036293, "grad_norm": 1.0334348678588867, "learning_rate": 6.0864854001452255e-09, "loss": 0.0649, "step": 18117 }, { "epoch": 2.9355152300712897, "grad_norm": 0.9597827196121216, "learning_rate": 6.056027240827489e-09, "loss": 0.0642, "step": 18118 }, { "epoch": 2.9356772521062866, "grad_norm": 0.927130401134491, "learning_rate": 6.025645390160262e-09, "loss": 0.0632, "step": 18119 }, { "epoch": 2.935839274141283, "grad_norm": 0.8240948915481567, "learning_rate": 5.995339849073079e-09, "loss": 0.0607, "step": 18120 }, { "epoch": 2.93600129617628, "grad_norm": 0.919998824596405, "learning_rate": 5.965110618492698e-09, "loss": 0.0637, "step": 18121 }, { "epoch": 2.9361633182112765, "grad_norm": 0.9138144254684448, "learning_rate": 5.9349576993447675e-09, "loss": 0.0608, "step": 18122 }, { "epoch": 2.9363253402462735, "grad_norm": 1.1568125486373901, "learning_rate": 5.904881092551607e-09, "loss": 0.0606, "step": 18123 }, { "epoch": 2.9364873622812704, "grad_norm": 0.8754058480262756, "learning_rate": 5.874880799033589e-09, "loss": 0.0555, "step": 18124 }, { "epoch": 2.936649384316267, "grad_norm": 0.8889382481575012, "learning_rate": 5.844956819708314e-09, "loss": 0.0585, "step": 18125 }, { "epoch": 2.936811406351264, "grad_norm": 0.9085453748703003, "learning_rate": 5.815109155491716e-09, "loss": 0.0605, "step": 18126 }, { "epoch": 2.9369734283862607, "grad_norm": 0.9168490767478943, "learning_rate": 5.785337807297231e-09, "loss": 0.0616, "step": 18127 }, { "epoch": 2.9371354504212572, "grad_norm": 1.0325088500976562, "learning_rate": 5.755642776035242e-09, "loss": 0.0636, "step": 18128 }, { "epoch": 2.937297472456254, "grad_norm": 0.9850202202796936, "learning_rate": 5.726024062614466e-09, "loss": 0.0607, "step": 18129 }, { "epoch": 2.9374594944912507, "grad_norm": 1.0262207984924316, "learning_rate": 5.696481667941678e-09, "loss": 0.0581, "step": 18130 }, { "epoch": 2.9376215165262476, "grad_norm": 0.9356762766838074, "learning_rate": 5.667015592920322e-09, "loss": 0.064, "step": 18131 }, { "epoch": 2.937783538561244, "grad_norm": 0.9865776896476746, "learning_rate": 5.637625838452176e-09, "loss": 0.0695, "step": 18132 }, { "epoch": 2.937945560596241, "grad_norm": 1.0751196146011353, "learning_rate": 5.608312405436245e-09, "loss": 0.0706, "step": 18133 }, { "epoch": 2.938107582631238, "grad_norm": 0.8332173824310303, "learning_rate": 5.579075294769864e-09, "loss": 0.0523, "step": 18134 }, { "epoch": 2.9382696046662344, "grad_norm": 0.9454176425933838, "learning_rate": 5.54991450734732e-09, "loss": 0.0566, "step": 18135 }, { "epoch": 2.9384316267012314, "grad_norm": 0.8918482661247253, "learning_rate": 5.520830044060677e-09, "loss": 0.06, "step": 18136 }, { "epoch": 2.9385936487362283, "grad_norm": 0.9533538818359375, "learning_rate": 5.491821905800332e-09, "loss": 0.0536, "step": 18137 }, { "epoch": 2.938755670771225, "grad_norm": 0.8268304467201233, "learning_rate": 5.46289009345391e-09, "loss": 0.0584, "step": 18138 }, { "epoch": 2.9389176928062217, "grad_norm": 0.7862445116043091, "learning_rate": 5.43403460790598e-09, "loss": 0.0561, "step": 18139 }, { "epoch": 2.9390797148412187, "grad_norm": 1.113399624824524, "learning_rate": 5.405255450040003e-09, "loss": 0.0603, "step": 18140 }, { "epoch": 2.939241736876215, "grad_norm": 1.0670251846313477, "learning_rate": 5.376552620736664e-09, "loss": 0.0631, "step": 18141 }, { "epoch": 2.939403758911212, "grad_norm": 0.9453678727149963, "learning_rate": 5.347926120873592e-09, "loss": 0.0585, "step": 18142 }, { "epoch": 2.9395657809462086, "grad_norm": 0.9201284050941467, "learning_rate": 5.319375951327033e-09, "loss": 0.0643, "step": 18143 }, { "epoch": 2.9397278029812055, "grad_norm": 0.929400622844696, "learning_rate": 5.290902112970731e-09, "loss": 0.0587, "step": 18144 }, { "epoch": 2.939889825016202, "grad_norm": 1.096695065498352, "learning_rate": 5.262504606675656e-09, "loss": 0.0624, "step": 18145 }, { "epoch": 2.940051847051199, "grad_norm": 0.8255508542060852, "learning_rate": 5.234183433310835e-09, "loss": 0.0513, "step": 18146 }, { "epoch": 2.940213869086196, "grad_norm": 1.058481216430664, "learning_rate": 5.205938593742799e-09, "loss": 0.0652, "step": 18147 }, { "epoch": 2.9403758911211924, "grad_norm": 0.99068284034729, "learning_rate": 5.177770088835854e-09, "loss": 0.0587, "step": 18148 }, { "epoch": 2.9405379131561893, "grad_norm": 0.9016160368919373, "learning_rate": 5.149677919451535e-09, "loss": 0.0575, "step": 18149 }, { "epoch": 2.940699935191186, "grad_norm": 1.0625081062316895, "learning_rate": 5.121662086449708e-09, "loss": 0.071, "step": 18150 }, { "epoch": 2.9408619572261827, "grad_norm": 1.1517467498779297, "learning_rate": 5.093722590687744e-09, "loss": 0.0692, "step": 18151 }, { "epoch": 2.9410239792611796, "grad_norm": 0.8428022861480713, "learning_rate": 5.065859433019959e-09, "loss": 0.051, "step": 18152 }, { "epoch": 2.941186001296176, "grad_norm": 0.9025364518165588, "learning_rate": 5.038072614299561e-09, "loss": 0.0583, "step": 18153 }, { "epoch": 2.941348023331173, "grad_norm": 1.0714325904846191, "learning_rate": 5.010362135376423e-09, "loss": 0.0552, "step": 18154 }, { "epoch": 2.9415100453661696, "grad_norm": 1.128408432006836, "learning_rate": 4.9827279970982024e-09, "loss": 0.0682, "step": 18155 }, { "epoch": 2.9416720674011665, "grad_norm": 0.9572305083274841, "learning_rate": 4.955170200310888e-09, "loss": 0.0565, "step": 18156 }, { "epoch": 2.9418340894361634, "grad_norm": 0.8969508409500122, "learning_rate": 4.927688745857417e-09, "loss": 0.0601, "step": 18157 }, { "epoch": 2.94199611147116, "grad_norm": 1.0484877824783325, "learning_rate": 4.9002836345787845e-09, "loss": 0.0628, "step": 18158 }, { "epoch": 2.942158133506157, "grad_norm": 0.9466218948364258, "learning_rate": 4.872954867313484e-09, "loss": 0.0631, "step": 18159 }, { "epoch": 2.942320155541154, "grad_norm": 1.0803978443145752, "learning_rate": 4.845702444897515e-09, "loss": 0.0597, "step": 18160 }, { "epoch": 2.9424821775761503, "grad_norm": 0.8907843232154846, "learning_rate": 4.818526368164933e-09, "loss": 0.0571, "step": 18161 }, { "epoch": 2.942644199611147, "grad_norm": 1.0460045337677002, "learning_rate": 4.791426637947294e-09, "loss": 0.0617, "step": 18162 }, { "epoch": 2.942806221646144, "grad_norm": 0.893657922744751, "learning_rate": 4.764403255073657e-09, "loss": 0.0601, "step": 18163 }, { "epoch": 2.9429682436811406, "grad_norm": 1.0240740776062012, "learning_rate": 4.7374562203708615e-09, "loss": 0.0714, "step": 18164 }, { "epoch": 2.943130265716137, "grad_norm": 0.8384248614311218, "learning_rate": 4.710585534663526e-09, "loss": 0.0593, "step": 18165 }, { "epoch": 2.943292287751134, "grad_norm": 0.8110920786857605, "learning_rate": 4.683791198773768e-09, "loss": 0.0551, "step": 18166 }, { "epoch": 2.943454309786131, "grad_norm": 0.8798488974571228, "learning_rate": 4.657073213521768e-09, "loss": 0.0547, "step": 18167 }, { "epoch": 2.9436163318211275, "grad_norm": 0.8243097066879272, "learning_rate": 4.630431579724371e-09, "loss": 0.0606, "step": 18168 }, { "epoch": 2.9437783538561244, "grad_norm": 1.1285996437072754, "learning_rate": 4.603866298197035e-09, "loss": 0.0602, "step": 18169 }, { "epoch": 2.9439403758911213, "grad_norm": 0.9858548641204834, "learning_rate": 4.577377369752722e-09, "loss": 0.0655, "step": 18170 }, { "epoch": 2.944102397926118, "grad_norm": 1.0158785581588745, "learning_rate": 4.550964795202173e-09, "loss": 0.0605, "step": 18171 }, { "epoch": 2.9442644199611148, "grad_norm": 0.9173128604888916, "learning_rate": 4.524628575352796e-09, "loss": 0.057, "step": 18172 }, { "epoch": 2.9444264419961117, "grad_norm": 1.0110288858413696, "learning_rate": 4.4983687110111695e-09, "loss": 0.0644, "step": 18173 }, { "epoch": 2.944588464031108, "grad_norm": 0.8735485076904297, "learning_rate": 4.472185202980261e-09, "loss": 0.0587, "step": 18174 }, { "epoch": 2.944750486066105, "grad_norm": 0.8773369193077087, "learning_rate": 4.446078052061375e-09, "loss": 0.0586, "step": 18175 }, { "epoch": 2.9449125081011016, "grad_norm": 1.5746303796768188, "learning_rate": 4.420047259053595e-09, "loss": 0.0564, "step": 18176 }, { "epoch": 2.9450745301360985, "grad_norm": 0.9835705161094666, "learning_rate": 4.394092824752949e-09, "loss": 0.0608, "step": 18177 }, { "epoch": 2.945236552171095, "grad_norm": 0.9602859020233154, "learning_rate": 4.36821474995408e-09, "loss": 0.06, "step": 18178 }, { "epoch": 2.945398574206092, "grad_norm": 0.9771770238876343, "learning_rate": 4.342413035448301e-09, "loss": 0.0584, "step": 18179 }, { "epoch": 2.945560596241089, "grad_norm": 0.8505083322525024, "learning_rate": 4.316687682025256e-09, "loss": 0.0548, "step": 18180 }, { "epoch": 2.9457226182760854, "grad_norm": 0.8177760243415833, "learning_rate": 4.291038690472371e-09, "loss": 0.0592, "step": 18181 }, { "epoch": 2.9458846403110823, "grad_norm": 0.9233116507530212, "learning_rate": 4.26546606157402e-09, "loss": 0.0594, "step": 18182 }, { "epoch": 2.9460466623460793, "grad_norm": 0.8462489247322083, "learning_rate": 4.2399697961129084e-09, "loss": 0.0502, "step": 18183 }, { "epoch": 2.9462086843810757, "grad_norm": 0.8417542576789856, "learning_rate": 4.2145498948692465e-09, "loss": 0.0603, "step": 18184 }, { "epoch": 2.9463707064160727, "grad_norm": 0.844294011592865, "learning_rate": 4.189206358620468e-09, "loss": 0.0534, "step": 18185 }, { "epoch": 2.9465327284510696, "grad_norm": 0.9389594197273254, "learning_rate": 4.163939188142341e-09, "loss": 0.0582, "step": 18186 }, { "epoch": 2.946694750486066, "grad_norm": 0.9046799540519714, "learning_rate": 4.138748384207858e-09, "loss": 0.0595, "step": 18187 }, { "epoch": 2.9468567725210626, "grad_norm": 0.9170434474945068, "learning_rate": 4.113633947587792e-09, "loss": 0.059, "step": 18188 }, { "epoch": 2.9470187945560595, "grad_norm": 0.7925693392753601, "learning_rate": 4.0885958790504165e-09, "loss": 0.0539, "step": 18189 }, { "epoch": 2.9471808165910565, "grad_norm": 0.9091478586196899, "learning_rate": 4.063634179362341e-09, "loss": 0.0521, "step": 18190 }, { "epoch": 2.947342838626053, "grad_norm": 0.9072052836418152, "learning_rate": 4.0387488492868446e-09, "loss": 0.0637, "step": 18191 }, { "epoch": 2.94750486066105, "grad_norm": 1.005933165550232, "learning_rate": 4.013939889585538e-09, "loss": 0.0623, "step": 18192 }, { "epoch": 2.947666882696047, "grad_norm": 0.9582807421684265, "learning_rate": 3.989207301017262e-09, "loss": 0.0613, "step": 18193 }, { "epoch": 2.9478289047310433, "grad_norm": 1.0312175750732422, "learning_rate": 3.964551084339463e-09, "loss": 0.0714, "step": 18194 }, { "epoch": 2.9479909267660402, "grad_norm": 0.9463022947311401, "learning_rate": 3.939971240305707e-09, "loss": 0.0649, "step": 18195 }, { "epoch": 2.948152948801037, "grad_norm": 0.9024858474731445, "learning_rate": 3.915467769668724e-09, "loss": 0.0549, "step": 18196 }, { "epoch": 2.9483149708360337, "grad_norm": 0.8559155464172363, "learning_rate": 3.891040673177915e-09, "loss": 0.0564, "step": 18197 }, { "epoch": 2.9484769928710306, "grad_norm": 0.804895281791687, "learning_rate": 3.866689951580738e-09, "loss": 0.0563, "step": 18198 }, { "epoch": 2.948639014906027, "grad_norm": 0.9484009742736816, "learning_rate": 3.842415605622429e-09, "loss": 0.056, "step": 18199 }, { "epoch": 2.948801036941024, "grad_norm": 0.813424289226532, "learning_rate": 3.818217636045729e-09, "loss": 0.0606, "step": 18200 }, { "epoch": 2.9489630589760205, "grad_norm": 0.9334542751312256, "learning_rate": 3.794096043590878e-09, "loss": 0.0611, "step": 18201 }, { "epoch": 2.9491250810110174, "grad_norm": 0.9005117416381836, "learning_rate": 3.770050828995897e-09, "loss": 0.0569, "step": 18202 }, { "epoch": 2.9492871030460144, "grad_norm": 0.8461850881576538, "learning_rate": 3.746081992996587e-09, "loss": 0.0572, "step": 18203 }, { "epoch": 2.949449125081011, "grad_norm": 0.8409425616264343, "learning_rate": 3.7221895363262485e-09, "loss": 0.0623, "step": 18204 }, { "epoch": 2.949611147116008, "grad_norm": 1.0652631521224976, "learning_rate": 3.6983734597162423e-09, "loss": 0.0653, "step": 18205 }, { "epoch": 2.9497731691510047, "grad_norm": 0.9219419956207275, "learning_rate": 3.674633763894875e-09, "loss": 0.0573, "step": 18206 }, { "epoch": 2.9499351911860012, "grad_norm": 1.0178706645965576, "learning_rate": 3.6509704495887866e-09, "loss": 0.0608, "step": 18207 }, { "epoch": 2.950097213220998, "grad_norm": 0.853714644908905, "learning_rate": 3.6273835175221204e-09, "loss": 0.0556, "step": 18208 }, { "epoch": 2.9502592352559946, "grad_norm": 0.9474278688430786, "learning_rate": 3.6038729684162445e-09, "loss": 0.0633, "step": 18209 }, { "epoch": 2.9504212572909916, "grad_norm": 0.962617039680481, "learning_rate": 3.58043880299086e-09, "loss": 0.0638, "step": 18210 }, { "epoch": 2.950583279325988, "grad_norm": 0.8814941048622131, "learning_rate": 3.557081021962616e-09, "loss": 0.0634, "step": 18211 }, { "epoch": 2.950745301360985, "grad_norm": 0.8792970776557922, "learning_rate": 3.533799626046497e-09, "loss": 0.0634, "step": 18212 }, { "epoch": 2.950907323395982, "grad_norm": 0.8841227889060974, "learning_rate": 3.5105946159549874e-09, "loss": 0.0564, "step": 18213 }, { "epoch": 2.9510693454309784, "grad_norm": 1.0366162061691284, "learning_rate": 3.487465992397521e-09, "loss": 0.0656, "step": 18214 }, { "epoch": 2.9512313674659754, "grad_norm": 0.873475968837738, "learning_rate": 3.4644137560824187e-09, "loss": 0.0582, "step": 18215 }, { "epoch": 2.9513933895009723, "grad_norm": 0.8530957698822021, "learning_rate": 3.4414379077146733e-09, "loss": 0.0568, "step": 18216 }, { "epoch": 2.951555411535969, "grad_norm": 0.9540935158729553, "learning_rate": 3.418538447997333e-09, "loss": 0.0599, "step": 18217 }, { "epoch": 2.9517174335709657, "grad_norm": 0.8702645301818848, "learning_rate": 3.3957153776312257e-09, "loss": 0.0599, "step": 18218 }, { "epoch": 2.9518794556059627, "grad_norm": 0.9750285744667053, "learning_rate": 3.372968697314405e-09, "loss": 0.0584, "step": 18219 }, { "epoch": 2.952041477640959, "grad_norm": 1.2663254737854004, "learning_rate": 3.3502984077429803e-09, "loss": 0.0627, "step": 18220 }, { "epoch": 2.952203499675956, "grad_norm": 0.9789478778839111, "learning_rate": 3.3277045096108405e-09, "loss": 0.0574, "step": 18221 }, { "epoch": 2.9523655217109526, "grad_norm": 1.000509262084961, "learning_rate": 3.3051870036091004e-09, "loss": 0.0558, "step": 18222 }, { "epoch": 2.9525275437459495, "grad_norm": 0.9567583799362183, "learning_rate": 3.282745890426653e-09, "loss": 0.0611, "step": 18223 }, { "epoch": 2.952689565780946, "grad_norm": 0.9223018288612366, "learning_rate": 3.260381170750171e-09, "loss": 0.0581, "step": 18224 }, { "epoch": 2.952851587815943, "grad_norm": 0.8357469439506531, "learning_rate": 3.238092845264107e-09, "loss": 0.0543, "step": 18225 }, { "epoch": 2.95301360985094, "grad_norm": 0.7842499017715454, "learning_rate": 3.215880914650693e-09, "loss": 0.0563, "step": 18226 }, { "epoch": 2.9531756318859363, "grad_norm": 1.091303825378418, "learning_rate": 3.1937453795888306e-09, "loss": 0.0625, "step": 18227 }, { "epoch": 2.9533376539209333, "grad_norm": 0.9732781052589417, "learning_rate": 3.171686240756033e-09, "loss": 0.0619, "step": 18228 }, { "epoch": 2.95349967595593, "grad_norm": 0.8895050883293152, "learning_rate": 3.1497034988278717e-09, "loss": 0.0539, "step": 18229 }, { "epoch": 2.9536616979909267, "grad_norm": 0.9890386462211609, "learning_rate": 3.1277971544763088e-09, "loss": 0.0618, "step": 18230 }, { "epoch": 2.9538237200259236, "grad_norm": 0.9173012375831604, "learning_rate": 3.1059672083719183e-09, "loss": 0.056, "step": 18231 }, { "epoch": 2.95398574206092, "grad_norm": 0.9382359385490417, "learning_rate": 3.0842136611825004e-09, "loss": 0.0637, "step": 18232 }, { "epoch": 2.954147764095917, "grad_norm": 0.8800296187400818, "learning_rate": 3.062536513573633e-09, "loss": 0.0596, "step": 18233 }, { "epoch": 2.9543097861309136, "grad_norm": 1.0513750314712524, "learning_rate": 3.0409357662086748e-09, "loss": 0.0661, "step": 18234 }, { "epoch": 2.9544718081659105, "grad_norm": 0.8666124939918518, "learning_rate": 3.0194114197487635e-09, "loss": 0.0535, "step": 18235 }, { "epoch": 2.9546338302009074, "grad_norm": 0.9290871024131775, "learning_rate": 2.997963474852261e-09, "loss": 0.0626, "step": 18236 }, { "epoch": 2.954795852235904, "grad_norm": 0.9381429553031921, "learning_rate": 2.97659193217531e-09, "loss": 0.0614, "step": 18237 }, { "epoch": 2.954957874270901, "grad_norm": 0.8524567484855652, "learning_rate": 2.9552967923721086e-09, "loss": 0.0595, "step": 18238 }, { "epoch": 2.9551198963058978, "grad_norm": 0.7893286943435669, "learning_rate": 2.9340780560938032e-09, "loss": 0.0515, "step": 18239 }, { "epoch": 2.9552819183408943, "grad_norm": 0.8815685510635376, "learning_rate": 2.9129357239901514e-09, "loss": 0.0554, "step": 18240 }, { "epoch": 2.955443940375891, "grad_norm": 0.7954118251800537, "learning_rate": 2.8918697967078578e-09, "loss": 0.0561, "step": 18241 }, { "epoch": 2.955605962410888, "grad_norm": 0.8579531908035278, "learning_rate": 2.8708802748914077e-09, "loss": 0.0591, "step": 18242 }, { "epoch": 2.9557679844458846, "grad_norm": 0.9822569489479065, "learning_rate": 2.849967159183065e-09, "loss": 0.0587, "step": 18243 }, { "epoch": 2.9559300064808816, "grad_norm": 0.9963696002960205, "learning_rate": 2.829130450222872e-09, "loss": 0.0605, "step": 18244 }, { "epoch": 2.956092028515878, "grad_norm": 1.0176767110824585, "learning_rate": 2.8083701486480985e-09, "loss": 0.0586, "step": 18245 }, { "epoch": 2.956254050550875, "grad_norm": 0.9688636064529419, "learning_rate": 2.7876862550940685e-09, "loss": 0.0648, "step": 18246 }, { "epoch": 2.9564160725858715, "grad_norm": 1.1327593326568604, "learning_rate": 2.7670787701938874e-09, "loss": 0.0649, "step": 18247 }, { "epoch": 2.9565780946208684, "grad_norm": 0.8865358829498291, "learning_rate": 2.7465476945778835e-09, "loss": 0.0517, "step": 18248 }, { "epoch": 2.9567401166558653, "grad_norm": 0.9270232319831848, "learning_rate": 2.726093028874166e-09, "loss": 0.063, "step": 18249 }, { "epoch": 2.956902138690862, "grad_norm": 0.8303735852241516, "learning_rate": 2.705714773708623e-09, "loss": 0.0583, "step": 18250 }, { "epoch": 2.9570641607258588, "grad_norm": 0.8890751004219055, "learning_rate": 2.6854129297049225e-09, "loss": 0.0557, "step": 18251 }, { "epoch": 2.9572261827608557, "grad_norm": 0.9399266242980957, "learning_rate": 2.6651874974845115e-09, "loss": 0.0563, "step": 18252 }, { "epoch": 2.957388204795852, "grad_norm": 0.7995545268058777, "learning_rate": 2.645038477665507e-09, "loss": 0.0526, "step": 18253 }, { "epoch": 2.957550226830849, "grad_norm": 0.9148885011672974, "learning_rate": 2.6249658708651928e-09, "loss": 0.0664, "step": 18254 }, { "epoch": 2.9577122488658456, "grad_norm": 0.847109317779541, "learning_rate": 2.6049696776972443e-09, "loss": 0.0579, "step": 18255 }, { "epoch": 2.9578742709008425, "grad_norm": 0.858539342880249, "learning_rate": 2.5850498987733952e-09, "loss": 0.0644, "step": 18256 }, { "epoch": 2.958036292935839, "grad_norm": 1.0084835290908813, "learning_rate": 2.5652065347037126e-09, "loss": 0.0673, "step": 18257 }, { "epoch": 2.958198314970836, "grad_norm": 0.8510595560073853, "learning_rate": 2.545439586094933e-09, "loss": 0.0584, "step": 18258 }, { "epoch": 2.958360337005833, "grad_norm": 0.9628735780715942, "learning_rate": 2.525749053552129e-09, "loss": 0.0601, "step": 18259 }, { "epoch": 2.9585223590408294, "grad_norm": 0.8464257121086121, "learning_rate": 2.506134937677318e-09, "loss": 0.0567, "step": 18260 }, { "epoch": 2.9586843810758263, "grad_norm": 0.838660478591919, "learning_rate": 2.4865972390711314e-09, "loss": 0.0583, "step": 18261 }, { "epoch": 2.9588464031108233, "grad_norm": 0.9245243668556213, "learning_rate": 2.4671359583314237e-09, "loss": 0.0618, "step": 18262 }, { "epoch": 2.9590084251458197, "grad_norm": 0.8043138980865479, "learning_rate": 2.4477510960532747e-09, "loss": 0.0579, "step": 18263 }, { "epoch": 2.9591704471808167, "grad_norm": 0.8946799039840698, "learning_rate": 2.4284426528298212e-09, "loss": 0.0584, "step": 18264 }, { "epoch": 2.9593324692158136, "grad_norm": 1.0274814367294312, "learning_rate": 2.409210629251979e-09, "loss": 0.063, "step": 18265 }, { "epoch": 2.95949449125081, "grad_norm": 0.8745455145835876, "learning_rate": 2.3900550259084445e-09, "loss": 0.0599, "step": 18266 }, { "epoch": 2.959656513285807, "grad_norm": 0.9556804895401001, "learning_rate": 2.370975843385137e-09, "loss": 0.0605, "step": 18267 }, { "epoch": 2.9598185353208035, "grad_norm": 0.9654064774513245, "learning_rate": 2.351973082265757e-09, "loss": 0.0604, "step": 18268 }, { "epoch": 2.9599805573558005, "grad_norm": 0.9703561067581177, "learning_rate": 2.333046743131784e-09, "loss": 0.0632, "step": 18269 }, { "epoch": 2.960142579390797, "grad_norm": 1.048510193824768, "learning_rate": 2.314196826562476e-09, "loss": 0.0593, "step": 18270 }, { "epoch": 2.960304601425794, "grad_norm": 0.8689060211181641, "learning_rate": 2.295423333134317e-09, "loss": 0.0659, "step": 18271 }, { "epoch": 2.960466623460791, "grad_norm": 0.9351786971092224, "learning_rate": 2.2767262634218466e-09, "loss": 0.0653, "step": 18272 }, { "epoch": 2.9606286454957873, "grad_norm": 0.7980687022209167, "learning_rate": 2.2581056179971084e-09, "loss": 0.0542, "step": 18273 }, { "epoch": 2.9607906675307842, "grad_norm": 0.8763124346733093, "learning_rate": 2.239561397430201e-09, "loss": 0.0586, "step": 18274 }, { "epoch": 2.960952689565781, "grad_norm": 0.9670480489730835, "learning_rate": 2.221093602288171e-09, "loss": 0.0565, "step": 18275 }, { "epoch": 2.9611147116007777, "grad_norm": 0.8312298655509949, "learning_rate": 2.2027022331361226e-09, "loss": 0.0575, "step": 18276 }, { "epoch": 2.9612767336357746, "grad_norm": 0.9964354038238525, "learning_rate": 2.18438729053666e-09, "loss": 0.06, "step": 18277 }, { "epoch": 2.961438755670771, "grad_norm": 0.9789435863494873, "learning_rate": 2.1661487750504473e-09, "loss": 0.0577, "step": 18278 }, { "epoch": 2.961600777705768, "grad_norm": 0.977944016456604, "learning_rate": 2.147986687235648e-09, "loss": 0.0589, "step": 18279 }, { "epoch": 2.9617627997407645, "grad_norm": 0.9870699644088745, "learning_rate": 2.129901027647652e-09, "loss": 0.0603, "step": 18280 }, { "epoch": 2.9619248217757614, "grad_norm": 0.8196133375167847, "learning_rate": 2.1118917968399045e-09, "loss": 0.0516, "step": 18281 }, { "epoch": 2.9620868438107584, "grad_norm": 1.1633763313293457, "learning_rate": 2.0939589953633542e-09, "loss": 0.0574, "step": 18282 }, { "epoch": 2.962248865845755, "grad_norm": 1.174547791481018, "learning_rate": 2.076102623767007e-09, "loss": 0.0649, "step": 18283 }, { "epoch": 2.962410887880752, "grad_norm": 0.8780858516693115, "learning_rate": 2.0583226825970915e-09, "loss": 0.0636, "step": 18284 }, { "epoch": 2.9625729099157487, "grad_norm": 0.8388274908065796, "learning_rate": 2.0406191723976175e-09, "loss": 0.0627, "step": 18285 }, { "epoch": 2.962734931950745, "grad_norm": 0.9338237047195435, "learning_rate": 2.022992093710097e-09, "loss": 0.0605, "step": 18286 }, { "epoch": 2.962896953985742, "grad_norm": 0.8622428774833679, "learning_rate": 2.005441447074097e-09, "loss": 0.0602, "step": 18287 }, { "epoch": 2.963058976020739, "grad_norm": 0.9662334322929382, "learning_rate": 1.9879672330266886e-09, "loss": 0.0603, "step": 18288 }, { "epoch": 2.9632209980557356, "grad_norm": 1.1020525693893433, "learning_rate": 1.9705694521021666e-09, "loss": 0.0663, "step": 18289 }, { "epoch": 2.963383020090732, "grad_norm": 0.82396000623703, "learning_rate": 1.9532481048334383e-09, "loss": 0.0535, "step": 18290 }, { "epoch": 2.963545042125729, "grad_norm": 0.8906350135803223, "learning_rate": 1.9360031917498024e-09, "loss": 0.06, "step": 18291 }, { "epoch": 2.963707064160726, "grad_norm": 0.845971405506134, "learning_rate": 1.918834713379447e-09, "loss": 0.0633, "step": 18292 }, { "epoch": 2.9638690861957224, "grad_norm": 0.9976989030838013, "learning_rate": 1.9017426702475084e-09, "loss": 0.0593, "step": 18293 }, { "epoch": 2.9640311082307194, "grad_norm": 0.9086126685142517, "learning_rate": 1.884727062876901e-09, "loss": 0.0592, "step": 18294 }, { "epoch": 2.9641931302657163, "grad_norm": 1.0157880783081055, "learning_rate": 1.867787891788597e-09, "loss": 0.0634, "step": 18295 }, { "epoch": 2.964355152300713, "grad_norm": 1.0585784912109375, "learning_rate": 1.8509251575002386e-09, "loss": 0.0652, "step": 18296 }, { "epoch": 2.9645171743357097, "grad_norm": 0.8272886276245117, "learning_rate": 1.8341388605283562e-09, "loss": 0.0594, "step": 18297 }, { "epoch": 2.9646791963707066, "grad_norm": 0.8589320182800293, "learning_rate": 1.8174290013864282e-09, "loss": 0.0577, "step": 18298 }, { "epoch": 2.964841218405703, "grad_norm": 0.8368940949440002, "learning_rate": 1.800795580585435e-09, "loss": 0.0585, "step": 18299 }, { "epoch": 2.9650032404407, "grad_norm": 0.8517768383026123, "learning_rate": 1.784238598634691e-09, "loss": 0.0569, "step": 18300 }, { "epoch": 2.9651652624756966, "grad_norm": 1.1536654233932495, "learning_rate": 1.767758056040736e-09, "loss": 0.0648, "step": 18301 }, { "epoch": 2.9653272845106935, "grad_norm": 1.0494126081466675, "learning_rate": 1.7513539533078882e-09, "loss": 0.0679, "step": 18302 }, { "epoch": 2.96548930654569, "grad_norm": 0.8454523682594299, "learning_rate": 1.735026290937969e-09, "loss": 0.0558, "step": 18303 }, { "epoch": 2.965651328580687, "grad_norm": 0.9818223714828491, "learning_rate": 1.7187750694303007e-09, "loss": 0.0593, "step": 18304 }, { "epoch": 2.965813350615684, "grad_norm": 0.9108536839485168, "learning_rate": 1.7026002892825411e-09, "loss": 0.0591, "step": 18305 }, { "epoch": 2.9659753726506803, "grad_norm": 0.8628289699554443, "learning_rate": 1.686501950989572e-09, "loss": 0.0569, "step": 18306 }, { "epoch": 2.9661373946856773, "grad_norm": 0.9580399990081787, "learning_rate": 1.6704800550434997e-09, "loss": 0.0551, "step": 18307 }, { "epoch": 2.966299416720674, "grad_norm": 0.9089958667755127, "learning_rate": 1.6545346019350427e-09, "loss": 0.0599, "step": 18308 }, { "epoch": 2.9664614387556707, "grad_norm": 0.875546932220459, "learning_rate": 1.6386655921521443e-09, "loss": 0.061, "step": 18309 }, { "epoch": 2.9666234607906676, "grad_norm": 1.0294671058654785, "learning_rate": 1.6228730261799718e-09, "loss": 0.0663, "step": 18310 }, { "epoch": 2.9667854828256646, "grad_norm": 0.8314631581306458, "learning_rate": 1.6071569045020274e-09, "loss": 0.0512, "step": 18311 }, { "epoch": 2.966947504860661, "grad_norm": 1.001203179359436, "learning_rate": 1.5915172275990375e-09, "loss": 0.0555, "step": 18312 }, { "epoch": 2.9671095268956575, "grad_norm": 0.8834933638572693, "learning_rate": 1.5759539959495085e-09, "loss": 0.0627, "step": 18313 }, { "epoch": 2.9672715489306545, "grad_norm": 1.0192748308181763, "learning_rate": 1.5604672100297258e-09, "loss": 0.064, "step": 18314 }, { "epoch": 2.9674335709656514, "grad_norm": 0.9845191240310669, "learning_rate": 1.5450568703137547e-09, "loss": 0.0635, "step": 18315 }, { "epoch": 2.967595593000648, "grad_norm": 0.7708606123924255, "learning_rate": 1.5297229772726075e-09, "loss": 0.0567, "step": 18316 }, { "epoch": 2.967757615035645, "grad_norm": 0.8803342580795288, "learning_rate": 1.5144655313759082e-09, "loss": 0.0596, "step": 18317 }, { "epoch": 2.9679196370706418, "grad_norm": 0.9847882986068726, "learning_rate": 1.499284533090506e-09, "loss": 0.0609, "step": 18318 }, { "epoch": 2.9680816591056383, "grad_norm": 0.9394886493682861, "learning_rate": 1.4841799828804737e-09, "loss": 0.0655, "step": 18319 }, { "epoch": 2.968243681140635, "grad_norm": 0.8597686886787415, "learning_rate": 1.469151881208497e-09, "loss": 0.0533, "step": 18320 }, { "epoch": 2.968405703175632, "grad_norm": 0.9775875806808472, "learning_rate": 1.4542002285339307e-09, "loss": 0.0652, "step": 18321 }, { "epoch": 2.9685677252106286, "grad_norm": 0.9064924716949463, "learning_rate": 1.4393250253144642e-09, "loss": 0.0605, "step": 18322 }, { "epoch": 2.9687297472456255, "grad_norm": 0.788995087146759, "learning_rate": 1.4245262720052888e-09, "loss": 0.055, "step": 18323 }, { "epoch": 2.968891769280622, "grad_norm": 0.86928391456604, "learning_rate": 1.4098039690593756e-09, "loss": 0.0589, "step": 18324 }, { "epoch": 2.969053791315619, "grad_norm": 0.894902765750885, "learning_rate": 1.3951581169266426e-09, "loss": 0.0581, "step": 18325 }, { "epoch": 2.9692158133506155, "grad_norm": 0.9499050974845886, "learning_rate": 1.3805887160558973e-09, "loss": 0.0611, "step": 18326 }, { "epoch": 2.9693778353856124, "grad_norm": 0.896775484085083, "learning_rate": 1.3660957668923391e-09, "loss": 0.0545, "step": 18327 }, { "epoch": 2.9695398574206093, "grad_norm": 1.0583899021148682, "learning_rate": 1.3516792698797797e-09, "loss": 0.0639, "step": 18328 }, { "epoch": 2.969701879455606, "grad_norm": 0.9759207963943481, "learning_rate": 1.3373392254592554e-09, "loss": 0.0665, "step": 18329 }, { "epoch": 2.9698639014906028, "grad_norm": 0.8481022715568542, "learning_rate": 1.323075634069304e-09, "loss": 0.0598, "step": 18330 }, { "epoch": 2.9700259235255997, "grad_norm": 1.0313864946365356, "learning_rate": 1.3088884961467985e-09, "loss": 0.0618, "step": 18331 }, { "epoch": 2.970187945560596, "grad_norm": 0.9152479767799377, "learning_rate": 1.2947778121255584e-09, "loss": 0.0561, "step": 18332 }, { "epoch": 2.970349967595593, "grad_norm": 0.9564821720123291, "learning_rate": 1.2807435824371827e-09, "loss": 0.0579, "step": 18333 }, { "epoch": 2.9705119896305896, "grad_norm": 0.9317049384117126, "learning_rate": 1.2667858075113281e-09, "loss": 0.0563, "step": 18334 }, { "epoch": 2.9706740116655865, "grad_norm": 1.058647632598877, "learning_rate": 1.2529044877751528e-09, "loss": 0.0618, "step": 18335 }, { "epoch": 2.970836033700583, "grad_norm": 0.8454841375350952, "learning_rate": 1.239099623653317e-09, "loss": 0.0543, "step": 18336 }, { "epoch": 2.97099805573558, "grad_norm": 1.0585004091262817, "learning_rate": 1.2253712155679832e-09, "loss": 0.0597, "step": 18337 }, { "epoch": 2.971160077770577, "grad_norm": 0.9348391890525818, "learning_rate": 1.2117192639393704e-09, "loss": 0.0573, "step": 18338 }, { "epoch": 2.9713220998055734, "grad_norm": 0.8744611740112305, "learning_rate": 1.1981437691852004e-09, "loss": 0.0567, "step": 18339 }, { "epoch": 2.9714841218405703, "grad_norm": 0.9730286598205566, "learning_rate": 1.1846447317206967e-09, "loss": 0.0593, "step": 18340 }, { "epoch": 2.9716461438755672, "grad_norm": 1.0506782531738281, "learning_rate": 1.1712221519594168e-09, "loss": 0.0634, "step": 18341 }, { "epoch": 2.9718081659105637, "grad_norm": 0.9589892625808716, "learning_rate": 1.1578760303113113e-09, "loss": 0.0554, "step": 18342 }, { "epoch": 2.9719701879455607, "grad_norm": 0.8297640085220337, "learning_rate": 1.1446063671854969e-09, "loss": 0.0528, "step": 18343 }, { "epoch": 2.9721322099805576, "grad_norm": 0.8789884448051453, "learning_rate": 1.131413162987205e-09, "loss": 0.0607, "step": 18344 }, { "epoch": 2.972294232015554, "grad_norm": 1.033374309539795, "learning_rate": 1.1182964181208344e-09, "loss": 0.0625, "step": 18345 }, { "epoch": 2.972456254050551, "grad_norm": 0.8781412839889526, "learning_rate": 1.1052561329871757e-09, "loss": 0.0567, "step": 18346 }, { "epoch": 2.9726182760855475, "grad_norm": 1.0838392972946167, "learning_rate": 1.0922923079856319e-09, "loss": 0.0611, "step": 18347 }, { "epoch": 2.9727802981205445, "grad_norm": 0.891605019569397, "learning_rate": 1.0794049435128296e-09, "loss": 0.0582, "step": 18348 }, { "epoch": 2.972942320155541, "grad_norm": 0.9720555543899536, "learning_rate": 1.0665940399626207e-09, "loss": 0.0635, "step": 18349 }, { "epoch": 2.973104342190538, "grad_norm": 0.8500351309776306, "learning_rate": 1.0538595977277466e-09, "loss": 0.0587, "step": 18350 }, { "epoch": 2.973266364225535, "grad_norm": 1.0722614526748657, "learning_rate": 1.041201617197063e-09, "loss": 0.0624, "step": 18351 }, { "epoch": 2.9734283862605313, "grad_norm": 0.9611464738845825, "learning_rate": 1.028620098758315e-09, "loss": 0.0588, "step": 18352 }, { "epoch": 2.9735904082955282, "grad_norm": 1.0209434032440186, "learning_rate": 1.0161150427964727e-09, "loss": 0.0619, "step": 18353 }, { "epoch": 2.973752430330525, "grad_norm": 0.9883117079734802, "learning_rate": 1.0036864496942856e-09, "loss": 0.0645, "step": 18354 }, { "epoch": 2.9739144523655217, "grad_norm": 0.890546977519989, "learning_rate": 9.913343198314495e-10, "loss": 0.0606, "step": 18355 }, { "epoch": 2.9740764744005186, "grad_norm": 0.9300897121429443, "learning_rate": 9.79058653586551e-10, "loss": 0.0638, "step": 18356 }, { "epoch": 2.974238496435515, "grad_norm": 0.7656763792037964, "learning_rate": 9.66859451334845e-10, "loss": 0.0535, "step": 18357 }, { "epoch": 2.974400518470512, "grad_norm": 0.9918314814567566, "learning_rate": 9.54736713449922e-10, "loss": 0.0602, "step": 18358 }, { "epoch": 2.9745625405055085, "grad_norm": 0.8427473306655884, "learning_rate": 9.426904403023185e-10, "loss": 0.061, "step": 18359 }, { "epoch": 2.9747245625405054, "grad_norm": 0.7689868211746216, "learning_rate": 9.307206322606288e-10, "loss": 0.0537, "step": 18360 }, { "epoch": 2.9748865845755024, "grad_norm": 0.8835668563842773, "learning_rate": 9.188272896915041e-10, "loss": 0.0591, "step": 18361 }, { "epoch": 2.975048606610499, "grad_norm": 0.8549810647964478, "learning_rate": 9.070104129582647e-10, "loss": 0.0575, "step": 18362 }, { "epoch": 2.975210628645496, "grad_norm": 0.9912297129631042, "learning_rate": 8.952700024231209e-10, "loss": 0.063, "step": 18363 }, { "epoch": 2.9753726506804927, "grad_norm": 0.8764829635620117, "learning_rate": 8.836060584449524e-10, "loss": 0.0609, "step": 18364 }, { "epoch": 2.975534672715489, "grad_norm": 0.7048518061637878, "learning_rate": 8.720185813806958e-10, "loss": 0.0481, "step": 18365 }, { "epoch": 2.975696694750486, "grad_norm": 0.97881680727005, "learning_rate": 8.6050757158479e-10, "loss": 0.0604, "step": 18366 }, { "epoch": 2.975858716785483, "grad_norm": 0.9841058850288391, "learning_rate": 8.490730294097304e-10, "loss": 0.0615, "step": 18367 }, { "epoch": 2.9760207388204796, "grad_norm": 0.8247631788253784, "learning_rate": 8.377149552049602e-10, "loss": 0.0558, "step": 18368 }, { "epoch": 2.9761827608554765, "grad_norm": 0.9710003137588501, "learning_rate": 8.264333493185339e-10, "loss": 0.0596, "step": 18369 }, { "epoch": 2.976344782890473, "grad_norm": 1.0644299983978271, "learning_rate": 8.15228212095176e-10, "loss": 0.0599, "step": 18370 }, { "epoch": 2.97650680492547, "grad_norm": 0.9960538148880005, "learning_rate": 8.040995438779453e-10, "loss": 0.07, "step": 18371 }, { "epoch": 2.9766688269604664, "grad_norm": 0.9092425107955933, "learning_rate": 7.930473450074028e-10, "loss": 0.0665, "step": 18372 }, { "epoch": 2.9768308489954634, "grad_norm": 0.7984481453895569, "learning_rate": 7.820716158216113e-10, "loss": 0.0556, "step": 18373 }, { "epoch": 2.9769928710304603, "grad_norm": 0.8725264668464661, "learning_rate": 7.711723566564133e-10, "loss": 0.0587, "step": 18374 }, { "epoch": 2.9771548930654568, "grad_norm": 0.873121976852417, "learning_rate": 7.603495678451534e-10, "loss": 0.0517, "step": 18375 }, { "epoch": 2.9773169151004537, "grad_norm": 1.0072768926620483, "learning_rate": 7.496032497195105e-10, "loss": 0.0708, "step": 18376 }, { "epoch": 2.9774789371354506, "grad_norm": 0.875882625579834, "learning_rate": 7.389334026078332e-10, "loss": 0.052, "step": 18377 }, { "epoch": 2.977640959170447, "grad_norm": 0.9897551536560059, "learning_rate": 7.283400268365271e-10, "loss": 0.057, "step": 18378 }, { "epoch": 2.977802981205444, "grad_norm": 0.9639883637428284, "learning_rate": 7.178231227300548e-10, "loss": 0.0682, "step": 18379 }, { "epoch": 2.9779650032404406, "grad_norm": 0.8938714861869812, "learning_rate": 7.073826906098258e-10, "loss": 0.0534, "step": 18380 }, { "epoch": 2.9781270252754375, "grad_norm": 0.8408438563346863, "learning_rate": 6.97018730795862e-10, "loss": 0.0569, "step": 18381 }, { "epoch": 2.978289047310434, "grad_norm": 0.9069212079048157, "learning_rate": 6.867312436045769e-10, "loss": 0.0674, "step": 18382 }, { "epoch": 2.978451069345431, "grad_norm": 0.9260680675506592, "learning_rate": 6.76520229351274e-10, "loss": 0.0562, "step": 18383 }, { "epoch": 2.978613091380428, "grad_norm": 1.0838451385498047, "learning_rate": 6.663856883482034e-10, "loss": 0.0593, "step": 18384 }, { "epoch": 2.9787751134154243, "grad_norm": 1.0117061138153076, "learning_rate": 6.563276209053948e-10, "loss": 0.0628, "step": 18385 }, { "epoch": 2.9789371354504213, "grad_norm": 1.174195647239685, "learning_rate": 6.463460273306577e-10, "loss": 0.0687, "step": 18386 }, { "epoch": 2.979099157485418, "grad_norm": 0.8610493540763855, "learning_rate": 6.364409079295808e-10, "loss": 0.0569, "step": 18387 }, { "epoch": 2.9792611795204147, "grad_norm": 0.8971181511878967, "learning_rate": 6.266122630049776e-10, "loss": 0.061, "step": 18388 }, { "epoch": 2.9794232015554116, "grad_norm": 1.0571962594985962, "learning_rate": 6.168600928577184e-10, "loss": 0.0602, "step": 18389 }, { "epoch": 2.9795852235904086, "grad_norm": 0.8279181718826294, "learning_rate": 6.071843977861758e-10, "loss": 0.0559, "step": 18390 }, { "epoch": 2.979747245625405, "grad_norm": 0.9802617430686951, "learning_rate": 5.975851780862241e-10, "loss": 0.0636, "step": 18391 }, { "epoch": 2.9799092676604015, "grad_norm": 1.0149646997451782, "learning_rate": 5.880624340517948e-10, "loss": 0.0606, "step": 18392 }, { "epoch": 2.9800712896953985, "grad_norm": 0.8134593367576599, "learning_rate": 5.786161659740441e-10, "loss": 0.0535, "step": 18393 }, { "epoch": 2.9802333117303954, "grad_norm": 0.8985582590103149, "learning_rate": 5.692463741424625e-10, "loss": 0.0575, "step": 18394 }, { "epoch": 2.980395333765392, "grad_norm": 1.0262095928192139, "learning_rate": 5.5995305884321e-10, "loss": 0.0618, "step": 18395 }, { "epoch": 2.980557355800389, "grad_norm": 0.9016473293304443, "learning_rate": 5.507362203607814e-10, "loss": 0.0611, "step": 18396 }, { "epoch": 2.9807193778353858, "grad_norm": 1.423275113105774, "learning_rate": 5.415958589774506e-10, "loss": 0.0656, "step": 18397 }, { "epoch": 2.9808813998703823, "grad_norm": 1.001585602760315, "learning_rate": 5.325319749727165e-10, "loss": 0.059, "step": 18398 }, { "epoch": 2.981043421905379, "grad_norm": 1.0050073862075806, "learning_rate": 5.235445686238572e-10, "loss": 0.0532, "step": 18399 }, { "epoch": 2.981205443940376, "grad_norm": 1.142298698425293, "learning_rate": 5.146336402059304e-10, "loss": 0.0697, "step": 18400 }, { "epoch": 2.9813674659753726, "grad_norm": 0.8341407775878906, "learning_rate": 5.057991899917735e-10, "loss": 0.0491, "step": 18401 }, { "epoch": 2.9815294880103695, "grad_norm": 1.106696367263794, "learning_rate": 4.970412182511708e-10, "loss": 0.065, "step": 18402 }, { "epoch": 2.981691510045366, "grad_norm": 0.9795176982879639, "learning_rate": 4.883597252525185e-10, "loss": 0.0658, "step": 18403 }, { "epoch": 2.981853532080363, "grad_norm": 0.879523515701294, "learning_rate": 4.797547112614376e-10, "loss": 0.0624, "step": 18404 }, { "epoch": 2.9820155541153595, "grad_norm": 0.9020334482192993, "learning_rate": 4.712261765410509e-10, "loss": 0.0638, "step": 18405 }, { "epoch": 2.9821775761503564, "grad_norm": 0.8660544753074646, "learning_rate": 4.627741213525383e-10, "loss": 0.0582, "step": 18406 }, { "epoch": 2.9823395981853533, "grad_norm": 0.871384859085083, "learning_rate": 4.5439854595430435e-10, "loss": 0.0613, "step": 18407 }, { "epoch": 2.98250162022035, "grad_norm": 0.9486377835273743, "learning_rate": 4.460994506028105e-10, "loss": 0.0588, "step": 18408 }, { "epoch": 2.9826636422553467, "grad_norm": 0.9920843839645386, "learning_rate": 4.378768355514651e-10, "loss": 0.0621, "step": 18409 }, { "epoch": 2.9828256642903437, "grad_norm": 1.2648646831512451, "learning_rate": 4.2973070105256643e-10, "loss": 0.066, "step": 18410 }, { "epoch": 2.98298768632534, "grad_norm": 0.9957455396652222, "learning_rate": 4.2166104735508197e-10, "loss": 0.0688, "step": 18411 }, { "epoch": 2.983149708360337, "grad_norm": 0.8266428709030151, "learning_rate": 4.136678747060363e-10, "loss": 0.0555, "step": 18412 }, { "epoch": 2.983311730395334, "grad_norm": 0.8543840050697327, "learning_rate": 4.0575118334967854e-10, "loss": 0.0543, "step": 18413 }, { "epoch": 2.9834737524303305, "grad_norm": 0.8616589307785034, "learning_rate": 3.9791097352831487e-10, "loss": 0.0617, "step": 18414 }, { "epoch": 2.983635774465327, "grad_norm": 1.0665658712387085, "learning_rate": 3.90147245482031e-10, "loss": 0.0605, "step": 18415 }, { "epoch": 2.983797796500324, "grad_norm": 0.7606042623519897, "learning_rate": 3.824599994484146e-10, "loss": 0.0514, "step": 18416 }, { "epoch": 2.983959818535321, "grad_norm": 1.0913400650024414, "learning_rate": 3.748492356625555e-10, "loss": 0.066, "step": 18417 }, { "epoch": 2.9841218405703174, "grad_norm": 0.9275001883506775, "learning_rate": 3.673149543573229e-10, "loss": 0.0614, "step": 18418 }, { "epoch": 2.9842838626053143, "grad_norm": 0.902944803237915, "learning_rate": 3.598571557630881e-10, "loss": 0.0639, "step": 18419 }, { "epoch": 2.9844458846403112, "grad_norm": 0.9159849286079407, "learning_rate": 3.5247584010827953e-10, "loss": 0.0619, "step": 18420 }, { "epoch": 2.9846079066753077, "grad_norm": 0.9164811372756958, "learning_rate": 3.451710076188275e-10, "loss": 0.059, "step": 18421 }, { "epoch": 2.9847699287103047, "grad_norm": 0.8712190389633179, "learning_rate": 3.3794265851816444e-10, "loss": 0.0598, "step": 18422 }, { "epoch": 2.9849319507453016, "grad_norm": 0.9287796020507812, "learning_rate": 3.307907930272247e-10, "loss": 0.0547, "step": 18423 }, { "epoch": 2.985093972780298, "grad_norm": 1.1647449731826782, "learning_rate": 3.237154113649998e-10, "loss": 0.0669, "step": 18424 }, { "epoch": 2.985255994815295, "grad_norm": 1.0105154514312744, "learning_rate": 3.167165137479833e-10, "loss": 0.055, "step": 18425 }, { "epoch": 2.9854180168502915, "grad_norm": 0.9725024700164795, "learning_rate": 3.0979410039017053e-10, "loss": 0.0611, "step": 18426 }, { "epoch": 2.9855800388852884, "grad_norm": 1.0524919033050537, "learning_rate": 3.029481715038918e-10, "loss": 0.0653, "step": 18427 }, { "epoch": 2.985742060920285, "grad_norm": 1.088059425354004, "learning_rate": 2.961787272978689e-10, "loss": 0.0651, "step": 18428 }, { "epoch": 2.985904082955282, "grad_norm": 0.8906368613243103, "learning_rate": 2.8948576797971364e-10, "loss": 0.0578, "step": 18429 }, { "epoch": 2.986066104990279, "grad_norm": 0.9600349068641663, "learning_rate": 2.828692937542621e-10, "loss": 0.0644, "step": 18430 }, { "epoch": 2.9862281270252753, "grad_norm": 0.9611825346946716, "learning_rate": 2.7632930482385243e-10, "loss": 0.0671, "step": 18431 }, { "epoch": 2.9863901490602722, "grad_norm": 0.9350132346153259, "learning_rate": 2.6986580138832487e-10, "loss": 0.0597, "step": 18432 }, { "epoch": 2.986552171095269, "grad_norm": 0.851214587688446, "learning_rate": 2.634787836458541e-10, "loss": 0.0572, "step": 18433 }, { "epoch": 2.9867141931302656, "grad_norm": 1.1418606042861938, "learning_rate": 2.571682517915619e-10, "loss": 0.0617, "step": 18434 }, { "epoch": 2.9868762151652626, "grad_norm": 0.9702183604240417, "learning_rate": 2.5093420601862706e-10, "loss": 0.0533, "step": 18435 }, { "epoch": 2.987038237200259, "grad_norm": 0.8244792819023132, "learning_rate": 2.447766465180079e-10, "loss": 0.054, "step": 18436 }, { "epoch": 2.987200259235256, "grad_norm": 1.0218993425369263, "learning_rate": 2.386955734778873e-10, "loss": 0.0629, "step": 18437 }, { "epoch": 2.9873622812702525, "grad_norm": 0.794620156288147, "learning_rate": 2.3269098708422754e-10, "loss": 0.048, "step": 18438 }, { "epoch": 2.9875243033052494, "grad_norm": 0.7882254719734192, "learning_rate": 2.2676288752104814e-10, "loss": 0.0506, "step": 18439 }, { "epoch": 2.9876863253402464, "grad_norm": 1.0353506803512573, "learning_rate": 2.2091127496959298e-10, "loss": 0.0663, "step": 18440 }, { "epoch": 2.987848347375243, "grad_norm": 0.8752200603485107, "learning_rate": 2.1513614960888552e-10, "loss": 0.0578, "step": 18441 }, { "epoch": 2.98801036941024, "grad_norm": 0.8921242952346802, "learning_rate": 2.0943751161545122e-10, "loss": 0.0593, "step": 18442 }, { "epoch": 2.9881723914452367, "grad_norm": 0.8822087049484253, "learning_rate": 2.0381536116415025e-10, "loss": 0.0559, "step": 18443 }, { "epoch": 2.988334413480233, "grad_norm": 1.00393545627594, "learning_rate": 1.98269698426512e-10, "loss": 0.0679, "step": 18444 }, { "epoch": 2.98849643551523, "grad_norm": 0.8842718005180359, "learning_rate": 1.9280052357240065e-10, "loss": 0.0635, "step": 18445 }, { "epoch": 2.988658457550227, "grad_norm": 0.875261664390564, "learning_rate": 1.8740783676945984e-10, "loss": 0.0622, "step": 18446 }, { "epoch": 2.9888204795852236, "grad_norm": 0.9737663865089417, "learning_rate": 1.820916381820026e-10, "loss": 0.0655, "step": 18447 }, { "epoch": 2.9889825016202205, "grad_norm": 0.8469840884208679, "learning_rate": 1.768519279732317e-10, "loss": 0.059, "step": 18448 }, { "epoch": 2.989144523655217, "grad_norm": 0.9026631116867065, "learning_rate": 1.7168870630357437e-10, "loss": 0.0613, "step": 18449 }, { "epoch": 2.989306545690214, "grad_norm": 1.0350512266159058, "learning_rate": 1.666019733306823e-10, "loss": 0.0625, "step": 18450 }, { "epoch": 2.9894685677252104, "grad_norm": 0.9230927228927612, "learning_rate": 1.6159172920998667e-10, "loss": 0.0618, "step": 18451 }, { "epoch": 2.9896305897602073, "grad_norm": 0.933576226234436, "learning_rate": 1.5665797409553097e-10, "loss": 0.0561, "step": 18452 }, { "epoch": 2.9897926117952043, "grad_norm": 0.8842147588729858, "learning_rate": 1.5180070813747282e-10, "loss": 0.0591, "step": 18453 }, { "epoch": 2.9899546338302008, "grad_norm": 0.9423680305480957, "learning_rate": 1.4701993148485972e-10, "loss": 0.0578, "step": 18454 }, { "epoch": 2.9901166558651977, "grad_norm": 0.9645872712135315, "learning_rate": 1.4231564428424105e-10, "loss": 0.0643, "step": 18455 }, { "epoch": 2.9902786779001946, "grad_norm": 1.0968506336212158, "learning_rate": 1.3768784667883562e-10, "loss": 0.0557, "step": 18456 }, { "epoch": 2.990440699935191, "grad_norm": 0.8200769424438477, "learning_rate": 1.3313653881075195e-10, "loss": 0.0579, "step": 18457 }, { "epoch": 2.990602721970188, "grad_norm": 0.8951736688613892, "learning_rate": 1.2866172081904548e-10, "loss": 0.059, "step": 18458 }, { "epoch": 2.9907647440051845, "grad_norm": 0.9866411685943604, "learning_rate": 1.2426339284082877e-10, "loss": 0.0585, "step": 18459 }, { "epoch": 2.9909267660401815, "grad_norm": 0.8763075470924377, "learning_rate": 1.1994155501071636e-10, "loss": 0.0578, "step": 18460 }, { "epoch": 2.991088788075178, "grad_norm": 0.9515385627746582, "learning_rate": 1.1569620746054722e-10, "loss": 0.0622, "step": 18461 }, { "epoch": 2.991250810110175, "grad_norm": 0.9050562977790833, "learning_rate": 1.1152735032077255e-10, "loss": 0.0639, "step": 18462 }, { "epoch": 2.991412832145172, "grad_norm": 0.997776985168457, "learning_rate": 1.0743498371823535e-10, "loss": 0.0671, "step": 18463 }, { "epoch": 2.9915748541801683, "grad_norm": 0.8363643288612366, "learning_rate": 1.0341910777894593e-10, "loss": 0.0567, "step": 18464 }, { "epoch": 2.9917368762151653, "grad_norm": 0.9793019890785217, "learning_rate": 9.947972262502881e-11, "loss": 0.0637, "step": 18465 }, { "epoch": 2.991898898250162, "grad_norm": 0.83316969871521, "learning_rate": 9.561682837777586e-11, "loss": 0.0526, "step": 18466 }, { "epoch": 2.9920609202851587, "grad_norm": 0.846834659576416, "learning_rate": 9.183042515459317e-11, "loss": 0.0572, "step": 18467 }, { "epoch": 2.9922229423201556, "grad_norm": 0.9713281393051147, "learning_rate": 8.812051307205416e-11, "loss": 0.0651, "step": 18468 }, { "epoch": 2.9923849643551526, "grad_norm": 0.8826894164085388, "learning_rate": 8.448709224312402e-11, "loss": 0.067, "step": 18469 }, { "epoch": 2.992546986390149, "grad_norm": 1.1817312240600586, "learning_rate": 8.093016277938015e-11, "loss": 0.0597, "step": 18470 }, { "epoch": 2.992709008425146, "grad_norm": 0.9918498992919922, "learning_rate": 7.744972478962443e-11, "loss": 0.0696, "step": 18471 }, { "epoch": 2.9928710304601425, "grad_norm": 0.9315574765205383, "learning_rate": 7.404577837988313e-11, "loss": 0.0574, "step": 18472 }, { "epoch": 2.9930330524951394, "grad_norm": 0.8410326838493347, "learning_rate": 7.071832365479481e-11, "loss": 0.0578, "step": 18473 }, { "epoch": 2.993195074530136, "grad_norm": 1.0091389417648315, "learning_rate": 6.746736071594484e-11, "loss": 0.0696, "step": 18474 }, { "epoch": 2.993357096565133, "grad_norm": 0.9494967460632324, "learning_rate": 6.429288966297576e-11, "loss": 0.0644, "step": 18475 }, { "epoch": 2.9935191186001298, "grad_norm": 0.8485308885574341, "learning_rate": 6.119491059303206e-11, "loss": 0.0614, "step": 18476 }, { "epoch": 2.9936811406351262, "grad_norm": 0.9347163438796997, "learning_rate": 5.817342360048273e-11, "loss": 0.0584, "step": 18477 }, { "epoch": 2.993843162670123, "grad_norm": 0.8836044669151306, "learning_rate": 5.522842877830892e-11, "loss": 0.0605, "step": 18478 }, { "epoch": 2.99400518470512, "grad_norm": 0.8401952981948853, "learning_rate": 5.235992621616115e-11, "loss": 0.0585, "step": 18479 }, { "epoch": 2.9941672067401166, "grad_norm": 1.1539223194122314, "learning_rate": 4.956791600230215e-11, "loss": 0.0589, "step": 18480 }, { "epoch": 2.9943292287751135, "grad_norm": 0.8261008262634277, "learning_rate": 4.685239822166398e-11, "loss": 0.0574, "step": 18481 }, { "epoch": 2.99449125081011, "grad_norm": 0.8534218072891235, "learning_rate": 4.4213372957790935e-11, "loss": 0.0544, "step": 18482 }, { "epoch": 2.994653272845107, "grad_norm": 0.8698385953903198, "learning_rate": 4.165084029117417e-11, "loss": 0.0586, "step": 18483 }, { "epoch": 2.9948152948801035, "grad_norm": 1.0100245475769043, "learning_rate": 3.9164800300084404e-11, "loss": 0.0542, "step": 18484 }, { "epoch": 2.9949773169151004, "grad_norm": 0.8979434967041016, "learning_rate": 3.6755253060849484e-11, "loss": 0.0589, "step": 18485 }, { "epoch": 2.9951393389500973, "grad_norm": 0.9884781837463379, "learning_rate": 3.442219864729923e-11, "loss": 0.065, "step": 18486 }, { "epoch": 2.995301360985094, "grad_norm": 1.027869701385498, "learning_rate": 3.216563713048793e-11, "loss": 0.0565, "step": 18487 }, { "epoch": 2.9954633830200907, "grad_norm": 0.9127057790756226, "learning_rate": 2.998556857952695e-11, "loss": 0.06, "step": 18488 }, { "epoch": 2.9956254050550877, "grad_norm": 0.9373353123664856, "learning_rate": 2.7881993061307233e-11, "loss": 0.0585, "step": 18489 }, { "epoch": 2.995787427090084, "grad_norm": 1.021518588066101, "learning_rate": 2.5854910639944165e-11, "loss": 0.0629, "step": 18490 }, { "epoch": 2.995949449125081, "grad_norm": 0.8520172238349915, "learning_rate": 2.390432137761023e-11, "loss": 0.0592, "step": 18491 }, { "epoch": 2.996111471160078, "grad_norm": 0.7974404096603394, "learning_rate": 2.203022533425747e-11, "loss": 0.0554, "step": 18492 }, { "epoch": 2.9962734931950745, "grad_norm": 0.8519773483276367, "learning_rate": 2.023262256678482e-11, "loss": 0.0582, "step": 18493 }, { "epoch": 2.9964355152300715, "grad_norm": 0.904719352722168, "learning_rate": 1.8511513130148317e-11, "loss": 0.0618, "step": 18494 }, { "epoch": 2.996597537265068, "grad_norm": 0.9865505695343018, "learning_rate": 1.686689707736111e-11, "loss": 0.0664, "step": 18495 }, { "epoch": 2.996759559300065, "grad_norm": 0.965429425239563, "learning_rate": 1.529877445866079e-11, "loss": 0.0616, "step": 18496 }, { "epoch": 2.9969215813350614, "grad_norm": 0.8916358351707458, "learning_rate": 1.3807145322064508e-11, "loss": 0.0618, "step": 18497 }, { "epoch": 2.9970836033700583, "grad_norm": 0.8863734602928162, "learning_rate": 1.2392009713091402e-11, "loss": 0.0537, "step": 18498 }, { "epoch": 2.9972456254050552, "grad_norm": 0.9996553659439087, "learning_rate": 1.1053367674762617e-11, "loss": 0.0576, "step": 18499 }, { "epoch": 2.9974076474400517, "grad_norm": 0.8960807919502258, "learning_rate": 9.791219248711515e-12, "loss": 0.0567, "step": 18500 }, { "epoch": 2.9975696694750487, "grad_norm": 0.9851888418197632, "learning_rate": 8.605564472963235e-12, "loss": 0.0613, "step": 18501 }, { "epoch": 2.9977316915100456, "grad_norm": 1.055681824684143, "learning_rate": 7.496403384155137e-12, "loss": 0.0598, "step": 18502 }, { "epoch": 2.997893713545042, "grad_norm": 0.9974924921989441, "learning_rate": 6.463736015871469e-12, "loss": 0.0644, "step": 18503 }, { "epoch": 2.998055735580039, "grad_norm": 0.9039761424064636, "learning_rate": 5.507562400308697e-12, "loss": 0.0685, "step": 18504 }, { "epoch": 2.9982177576150355, "grad_norm": 0.9217861890792847, "learning_rate": 4.6278825660550645e-12, "loss": 0.0597, "step": 18505 }, { "epoch": 2.9983797796500324, "grad_norm": 0.8809846043586731, "learning_rate": 3.8246965403110344e-12, "loss": 0.0548, "step": 18506 }, { "epoch": 2.998541801685029, "grad_norm": 0.8097039461135864, "learning_rate": 3.098004347779071e-12, "loss": 0.0532, "step": 18507 }, { "epoch": 2.998703823720026, "grad_norm": 0.847151517868042, "learning_rate": 2.4478060103860777e-12, "loss": 0.0547, "step": 18508 }, { "epoch": 2.998865845755023, "grad_norm": 0.8286563754081726, "learning_rate": 1.8741015483936253e-12, "loss": 0.0566, "step": 18509 }, { "epoch": 2.9990278677900193, "grad_norm": 0.9556106925010681, "learning_rate": 1.376890979287726e-12, "loss": 0.0555, "step": 18510 }, { "epoch": 2.999189889825016, "grad_norm": 0.7847691774368286, "learning_rate": 9.56174318056391e-13, "loss": 0.0503, "step": 18511 }, { "epoch": 2.999351911860013, "grad_norm": 1.074299931526184, "learning_rate": 6.119515774671847e-13, "loss": 0.0612, "step": 18512 }, { "epoch": 2.9995139338950096, "grad_norm": 0.9344704747200012, "learning_rate": 3.442227686223376e-13, "loss": 0.0638, "step": 18513 }, { "epoch": 2.9996759559300066, "grad_norm": 1.0170155763626099, "learning_rate": 1.529878990158551e-13, "loss": 0.0624, "step": 18514 }, { "epoch": 2.9998379779650035, "grad_norm": 0.8656219840049744, "learning_rate": 3.824697503151953e-14, "loss": 0.0574, "step": 18515 }, { "epoch": 3.0, "grad_norm": 0.9637875556945801, "learning_rate": 0.0, "loss": 0.0613, "step": 18516 } ], "logging_steps": 1.0, "max_steps": 18516, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8941809923882746e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }